diff options
author | shadchin <shadchin@yandex-team.ru> | 2022-02-10 16:44:30 +0300 |
---|---|---|
committer | Daniil Cherednik <dcherednik@yandex-team.ru> | 2022-02-10 16:44:30 +0300 |
commit | 2598ef1d0aee359b4b6d5fdd1758916d5907d04f (patch) | |
tree | 012bb94d777798f1f56ac1cec429509766d05181 /contrib/libs/llvm12/lib/Transforms/InstCombine | |
parent | 6751af0b0c1b952fede40b19b71da8025b5d8bcf (diff) | |
download | ydb-2598ef1d0aee359b4b6d5fdd1758916d5907d04f.tar.gz |
Restoring authorship annotation for <shadchin@yandex-team.ru>. Commit 1 of 2.
Diffstat (limited to 'contrib/libs/llvm12/lib/Transforms/InstCombine')
17 files changed, 3325 insertions, 3325 deletions
diff --git a/contrib/libs/llvm12/lib/Transforms/InstCombine/InstCombineAddSub.cpp b/contrib/libs/llvm12/lib/Transforms/InstCombine/InstCombineAddSub.cpp index bacb868989..e289e69efd 100644 --- a/contrib/libs/llvm12/lib/Transforms/InstCombine/InstCombineAddSub.cpp +++ b/contrib/libs/llvm12/lib/Transforms/InstCombine/InstCombineAddSub.cpp @@ -29,7 +29,7 @@ #include "llvm/Support/AlignOf.h" #include "llvm/Support/Casting.h" #include "llvm/Support/KnownBits.h" -#include "llvm/Transforms/InstCombine/InstCombiner.h" +#include "llvm/Transforms/InstCombine/InstCombiner.h" #include <cassert> #include <utility> @@ -82,11 +82,11 @@ namespace { private: bool insaneIntVal(int V) { return V > 4 || V < -4; } - APFloat *getFpValPtr() { return reinterpret_cast<APFloat *>(&FpValBuf); } + APFloat *getFpValPtr() { return reinterpret_cast<APFloat *>(&FpValBuf); } - const APFloat *getFpValPtr() const { - return reinterpret_cast<const APFloat *>(&FpValBuf); - } + const APFloat *getFpValPtr() const { + return reinterpret_cast<const APFloat *>(&FpValBuf); + } const APFloat &getFpVal() const { assert(IsFp && BufHasFpVal && "Incorret state"); @@ -861,7 +861,7 @@ static Instruction *foldNoWrapAdd(BinaryOperator &Add, return nullptr; } -Instruction *InstCombinerImpl::foldAddWithConstant(BinaryOperator &Add) { +Instruction *InstCombinerImpl::foldAddWithConstant(BinaryOperator &Add) { Value *Op0 = Add.getOperand(0), *Op1 = Add.getOperand(1); Constant *Op1C; if (!match(Op1, m_Constant(Op1C))) @@ -887,15 +887,15 @@ Instruction *InstCombinerImpl::foldAddWithConstant(BinaryOperator &Add) { // zext(bool) + C -> bool ? C + 1 : C if (match(Op0, m_ZExt(m_Value(X))) && X->getType()->getScalarSizeInBits() == 1) - return SelectInst::Create(X, InstCombiner::AddOne(Op1C), Op1); + return SelectInst::Create(X, InstCombiner::AddOne(Op1C), Op1); // sext(bool) + C -> bool ? C - 1 : C if (match(Op0, m_SExt(m_Value(X))) && X->getType()->getScalarSizeInBits() == 1) - return SelectInst::Create(X, InstCombiner::SubOne(Op1C), Op1); + return SelectInst::Create(X, InstCombiner::SubOne(Op1C), Op1); // ~X + C --> (C-1) - X if (match(Op0, m_Not(m_Value(X)))) - return BinaryOperator::CreateSub(InstCombiner::SubOne(Op1C), X); + return BinaryOperator::CreateSub(InstCombiner::SubOne(Op1C), X); const APInt *C; if (!match(Op1, m_APInt(C))) @@ -924,39 +924,39 @@ Instruction *InstCombinerImpl::foldAddWithConstant(BinaryOperator &Add) { C2->isMinSignedValue() && C2->sext(Ty->getScalarSizeInBits()) == *C) return CastInst::Create(Instruction::SExt, X, Ty); - if (match(Op0, m_Xor(m_Value(X), m_APInt(C2)))) { - // (X ^ signmask) + C --> (X + (signmask ^ C)) - if (C2->isSignMask()) - return BinaryOperator::CreateAdd(X, ConstantInt::get(Ty, *C2 ^ *C)); - - // If X has no high-bits set above an xor mask: - // add (xor X, LowMaskC), C --> sub (LowMaskC + C), X - if (C2->isMask()) { - KnownBits LHSKnown = computeKnownBits(X, 0, &Add); - if ((*C2 | LHSKnown.Zero).isAllOnesValue()) - return BinaryOperator::CreateSub(ConstantInt::get(Ty, *C2 + *C), X); - } - - // Look for a math+logic pattern that corresponds to sext-in-register of a - // value with cleared high bits. Convert that into a pair of shifts: - // add (xor X, 0x80), 0xF..F80 --> (X << ShAmtC) >>s ShAmtC - // add (xor X, 0xF..F80), 0x80 --> (X << ShAmtC) >>s ShAmtC - if (Op0->hasOneUse() && *C2 == -(*C)) { - unsigned BitWidth = Ty->getScalarSizeInBits(); - unsigned ShAmt = 0; - if (C->isPowerOf2()) - ShAmt = BitWidth - C->logBase2() - 1; - else if (C2->isPowerOf2()) - ShAmt = BitWidth - C2->logBase2() - 1; - if (ShAmt && MaskedValueIsZero(X, APInt::getHighBitsSet(BitWidth, ShAmt), - 0, &Add)) { - Constant *ShAmtC = ConstantInt::get(Ty, ShAmt); - Value *NewShl = Builder.CreateShl(X, ShAmtC, "sext"); - return BinaryOperator::CreateAShr(NewShl, ShAmtC); - } - } - } - + if (match(Op0, m_Xor(m_Value(X), m_APInt(C2)))) { + // (X ^ signmask) + C --> (X + (signmask ^ C)) + if (C2->isSignMask()) + return BinaryOperator::CreateAdd(X, ConstantInt::get(Ty, *C2 ^ *C)); + + // If X has no high-bits set above an xor mask: + // add (xor X, LowMaskC), C --> sub (LowMaskC + C), X + if (C2->isMask()) { + KnownBits LHSKnown = computeKnownBits(X, 0, &Add); + if ((*C2 | LHSKnown.Zero).isAllOnesValue()) + return BinaryOperator::CreateSub(ConstantInt::get(Ty, *C2 + *C), X); + } + + // Look for a math+logic pattern that corresponds to sext-in-register of a + // value with cleared high bits. Convert that into a pair of shifts: + // add (xor X, 0x80), 0xF..F80 --> (X << ShAmtC) >>s ShAmtC + // add (xor X, 0xF..F80), 0x80 --> (X << ShAmtC) >>s ShAmtC + if (Op0->hasOneUse() && *C2 == -(*C)) { + unsigned BitWidth = Ty->getScalarSizeInBits(); + unsigned ShAmt = 0; + if (C->isPowerOf2()) + ShAmt = BitWidth - C->logBase2() - 1; + else if (C2->isPowerOf2()) + ShAmt = BitWidth - C2->logBase2() - 1; + if (ShAmt && MaskedValueIsZero(X, APInt::getHighBitsSet(BitWidth, ShAmt), + 0, &Add)) { + Constant *ShAmtC = ConstantInt::get(Ty, ShAmt); + Value *NewShl = Builder.CreateShl(X, ShAmtC, "sext"); + return BinaryOperator::CreateAShr(NewShl, ShAmtC); + } + } + } + if (C->isOneValue() && Op0->hasOneUse()) { // add (sext i1 X), 1 --> zext (not X) // TODO: The smallest IR representation is (select X, 0, 1), and that would @@ -977,15 +977,15 @@ Instruction *InstCombinerImpl::foldAddWithConstant(BinaryOperator &Add) { } } - // If all bits affected by the add are included in a high-bit-mask, do the - // add before the mask op: - // (X & 0xFF00) + xx00 --> (X + xx00) & 0xFF00 - if (match(Op0, m_OneUse(m_And(m_Value(X), m_APInt(C2)))) && - C2->isNegative() && C2->isShiftedMask() && *C == (*C & *C2)) { - Value *NewAdd = Builder.CreateAdd(X, ConstantInt::get(Ty, *C)); - return BinaryOperator::CreateAnd(NewAdd, ConstantInt::get(Ty, *C2)); - } - + // If all bits affected by the add are included in a high-bit-mask, do the + // add before the mask op: + // (X & 0xFF00) + xx00 --> (X + xx00) & 0xFF00 + if (match(Op0, m_OneUse(m_And(m_Value(X), m_APInt(C2)))) && + C2->isNegative() && C2->isShiftedMask() && *C == (*C & *C2)) { + Value *NewAdd = Builder.CreateAdd(X, ConstantInt::get(Ty, *C)); + return BinaryOperator::CreateAnd(NewAdd, ConstantInt::get(Ty, *C2)); + } + return nullptr; } @@ -1064,7 +1064,7 @@ static bool MulWillOverflow(APInt &C0, APInt &C1, bool IsSigned) { // Simplifies X % C0 + (( X / C0 ) % C1) * C0 to X % (C0 * C1), where (C0 * C1) // does not overflow. -Value *InstCombinerImpl::SimplifyAddWithRemainder(BinaryOperator &I) { +Value *InstCombinerImpl::SimplifyAddWithRemainder(BinaryOperator &I) { Value *LHS = I.getOperand(0), *RHS = I.getOperand(1); Value *X, *MulOpV; APInt C0, MulOpC; @@ -1140,9 +1140,9 @@ static Instruction *foldToUnsignedSaturatedAdd(BinaryOperator &I) { return nullptr; } -Instruction *InstCombinerImpl:: - canonicalizeCondSignextOfHighBitExtractToSignextHighBitExtract( - BinaryOperator &I) { +Instruction *InstCombinerImpl:: + canonicalizeCondSignextOfHighBitExtractToSignextHighBitExtract( + BinaryOperator &I) { assert((I.getOpcode() == Instruction::Add || I.getOpcode() == Instruction::Or || I.getOpcode() == Instruction::Sub) && @@ -1241,44 +1241,44 @@ Instruction *InstCombinerImpl:: return TruncInst::CreateTruncOrBitCast(NewAShr, I.getType()); } -/// This is a specialization of a more general transform from -/// SimplifyUsingDistributiveLaws. If that code can be made to work optimally -/// for multi-use cases or propagating nsw/nuw, then we would not need this. -static Instruction *factorizeMathWithShlOps(BinaryOperator &I, - InstCombiner::BuilderTy &Builder) { - // TODO: Also handle mul by doubling the shift amount? - assert((I.getOpcode() == Instruction::Add || - I.getOpcode() == Instruction::Sub) && - "Expected add/sub"); - auto *Op0 = dyn_cast<BinaryOperator>(I.getOperand(0)); - auto *Op1 = dyn_cast<BinaryOperator>(I.getOperand(1)); - if (!Op0 || !Op1 || !(Op0->hasOneUse() || Op1->hasOneUse())) - return nullptr; - - Value *X, *Y, *ShAmt; - if (!match(Op0, m_Shl(m_Value(X), m_Value(ShAmt))) || - !match(Op1, m_Shl(m_Value(Y), m_Specific(ShAmt)))) - return nullptr; - - // No-wrap propagates only when all ops have no-wrap. - bool HasNSW = I.hasNoSignedWrap() && Op0->hasNoSignedWrap() && - Op1->hasNoSignedWrap(); - bool HasNUW = I.hasNoUnsignedWrap() && Op0->hasNoUnsignedWrap() && - Op1->hasNoUnsignedWrap(); - - // add/sub (X << ShAmt), (Y << ShAmt) --> (add/sub X, Y) << ShAmt - Value *NewMath = Builder.CreateBinOp(I.getOpcode(), X, Y); - if (auto *NewI = dyn_cast<BinaryOperator>(NewMath)) { - NewI->setHasNoSignedWrap(HasNSW); - NewI->setHasNoUnsignedWrap(HasNUW); - } - auto *NewShl = BinaryOperator::CreateShl(NewMath, ShAmt); - NewShl->setHasNoSignedWrap(HasNSW); - NewShl->setHasNoUnsignedWrap(HasNUW); - return NewShl; -} - -Instruction *InstCombinerImpl::visitAdd(BinaryOperator &I) { +/// This is a specialization of a more general transform from +/// SimplifyUsingDistributiveLaws. If that code can be made to work optimally +/// for multi-use cases or propagating nsw/nuw, then we would not need this. +static Instruction *factorizeMathWithShlOps(BinaryOperator &I, + InstCombiner::BuilderTy &Builder) { + // TODO: Also handle mul by doubling the shift amount? + assert((I.getOpcode() == Instruction::Add || + I.getOpcode() == Instruction::Sub) && + "Expected add/sub"); + auto *Op0 = dyn_cast<BinaryOperator>(I.getOperand(0)); + auto *Op1 = dyn_cast<BinaryOperator>(I.getOperand(1)); + if (!Op0 || !Op1 || !(Op0->hasOneUse() || Op1->hasOneUse())) + return nullptr; + + Value *X, *Y, *ShAmt; + if (!match(Op0, m_Shl(m_Value(X), m_Value(ShAmt))) || + !match(Op1, m_Shl(m_Value(Y), m_Specific(ShAmt)))) + return nullptr; + + // No-wrap propagates only when all ops have no-wrap. + bool HasNSW = I.hasNoSignedWrap() && Op0->hasNoSignedWrap() && + Op1->hasNoSignedWrap(); + bool HasNUW = I.hasNoUnsignedWrap() && Op0->hasNoUnsignedWrap() && + Op1->hasNoUnsignedWrap(); + + // add/sub (X << ShAmt), (Y << ShAmt) --> (add/sub X, Y) << ShAmt + Value *NewMath = Builder.CreateBinOp(I.getOpcode(), X, Y); + if (auto *NewI = dyn_cast<BinaryOperator>(NewMath)) { + NewI->setHasNoSignedWrap(HasNSW); + NewI->setHasNoUnsignedWrap(HasNUW); + } + auto *NewShl = BinaryOperator::CreateShl(NewMath, ShAmt); + NewShl->setHasNoSignedWrap(HasNSW); + NewShl->setHasNoUnsignedWrap(HasNUW); + return NewShl; +} + +Instruction *InstCombinerImpl::visitAdd(BinaryOperator &I) { if (Value *V = SimplifyAddInst(I.getOperand(0), I.getOperand(1), I.hasNoSignedWrap(), I.hasNoUnsignedWrap(), SQ.getWithInstruction(&I))) @@ -1294,9 +1294,9 @@ Instruction *InstCombinerImpl::visitAdd(BinaryOperator &I) { if (Value *V = SimplifyUsingDistributiveLaws(I)) return replaceInstUsesWith(I, V); - if (Instruction *R = factorizeMathWithShlOps(I, Builder)) - return R; - + if (Instruction *R = factorizeMathWithShlOps(I, Builder)) + return R; + if (Instruction *X = foldAddWithConstant(I)) return X; @@ -1434,14 +1434,14 @@ Instruction *InstCombinerImpl::visitAdd(BinaryOperator &I) { if (Instruction *SatAdd = foldToUnsignedSaturatedAdd(I)) return SatAdd; - // usub.sat(A, B) + B => umax(A, B) - if (match(&I, m_c_BinOp( - m_OneUse(m_Intrinsic<Intrinsic::usub_sat>(m_Value(A), m_Value(B))), - m_Deferred(B)))) { - return replaceInstUsesWith(I, - Builder.CreateIntrinsic(Intrinsic::umax, {I.getType()}, {A, B})); - } - + // usub.sat(A, B) + B => umax(A, B) + if (match(&I, m_c_BinOp( + m_OneUse(m_Intrinsic<Intrinsic::usub_sat>(m_Value(A), m_Value(B))), + m_Deferred(B)))) { + return replaceInstUsesWith(I, + Builder.CreateIntrinsic(Intrinsic::umax, {I.getType()}, {A, B})); + } + return Changed ? &I : nullptr; } @@ -1504,7 +1504,7 @@ static Instruction *factorizeFAddFSub(BinaryOperator &I, : BinaryOperator::CreateFDivFMF(XY, Z, &I); } -Instruction *InstCombinerImpl::visitFAdd(BinaryOperator &I) { +Instruction *InstCombinerImpl::visitFAdd(BinaryOperator &I) { if (Value *V = SimplifyFAddInst(I.getOperand(0), I.getOperand(1), I.getFastMathFlags(), SQ.getWithInstruction(&I))) @@ -1618,27 +1618,27 @@ Instruction *InstCombinerImpl::visitFAdd(BinaryOperator &I) { /// Optimize pointer differences into the same array into a size. Consider: /// &A[10] - &A[0]: we should compile this to "10". LHS/RHS are the pointer /// operands to the ptrtoint instructions for the LHS/RHS of the subtract. -Value *InstCombinerImpl::OptimizePointerDifference(Value *LHS, Value *RHS, - Type *Ty, bool IsNUW) { +Value *InstCombinerImpl::OptimizePointerDifference(Value *LHS, Value *RHS, + Type *Ty, bool IsNUW) { // If LHS is a gep based on RHS or RHS is a gep based on LHS, we can optimize // this. bool Swapped = false; GEPOperator *GEP1 = nullptr, *GEP2 = nullptr; - if (!isa<GEPOperator>(LHS) && isa<GEPOperator>(RHS)) { - std::swap(LHS, RHS); - Swapped = true; - } + if (!isa<GEPOperator>(LHS) && isa<GEPOperator>(RHS)) { + std::swap(LHS, RHS); + Swapped = true; + } - // Require at least one GEP with a common base pointer on both sides. - if (auto *LHSGEP = dyn_cast<GEPOperator>(LHS)) { + // Require at least one GEP with a common base pointer on both sides. + if (auto *LHSGEP = dyn_cast<GEPOperator>(LHS)) { // (gep X, ...) - X if (LHSGEP->getOperand(0) == RHS) { GEP1 = LHSGEP; - } else if (auto *RHSGEP = dyn_cast<GEPOperator>(RHS)) { + } else if (auto *RHSGEP = dyn_cast<GEPOperator>(RHS)) { // (gep X, ...) - (gep X, ...) if (LHSGEP->getOperand(0)->stripPointerCasts() == - RHSGEP->getOperand(0)->stripPointerCasts()) { - GEP1 = LHSGEP; + RHSGEP->getOperand(0)->stripPointerCasts()) { + GEP1 = LHSGEP; GEP2 = RHSGEP; } } @@ -1672,18 +1672,18 @@ Value *InstCombinerImpl::OptimizePointerDifference(Value *LHS, Value *RHS, Value *Result = EmitGEPOffset(GEP1); // If this is a single inbounds GEP and the original sub was nuw, - // then the final multiplication is also nuw. - if (auto *I = dyn_cast<Instruction>(Result)) - if (IsNUW && !GEP2 && !Swapped && GEP1->isInBounds() && - I->getOpcode() == Instruction::Mul) - I->setHasNoUnsignedWrap(); - - // If we have a 2nd GEP of the same base pointer, subtract the offsets. - // If both GEPs are inbounds, then the subtract does not have signed overflow. + // then the final multiplication is also nuw. + if (auto *I = dyn_cast<Instruction>(Result)) + if (IsNUW && !GEP2 && !Swapped && GEP1->isInBounds() && + I->getOpcode() == Instruction::Mul) + I->setHasNoUnsignedWrap(); + + // If we have a 2nd GEP of the same base pointer, subtract the offsets. + // If both GEPs are inbounds, then the subtract does not have signed overflow. if (GEP2) { Value *Offset = EmitGEPOffset(GEP2); - Result = Builder.CreateSub(Result, Offset, "gepdiff", /* NUW */ false, - GEP1->isInBounds() && GEP2->isInBounds()); + Result = Builder.CreateSub(Result, Offset, "gepdiff", /* NUW */ false, + GEP1->isInBounds() && GEP2->isInBounds()); } // If we have p - gep(p, ...) then we have to negate the result. @@ -1693,7 +1693,7 @@ Value *InstCombinerImpl::OptimizePointerDifference(Value *LHS, Value *RHS, return Builder.CreateIntCast(Result, Ty, true); } -Instruction *InstCombinerImpl::visitSub(BinaryOperator &I) { +Instruction *InstCombinerImpl::visitSub(BinaryOperator &I) { if (Value *V = SimplifySubInst(I.getOperand(0), I.getOperand(1), I.hasNoSignedWrap(), I.hasNoUnsignedWrap(), SQ.getWithInstruction(&I))) @@ -1722,19 +1722,19 @@ Instruction *InstCombinerImpl::visitSub(BinaryOperator &I) { return Res; } - // Try this before Negator to preserve NSW flag. - if (Instruction *R = factorizeMathWithShlOps(I, Builder)) - return R; - - if (Constant *C = dyn_cast<Constant>(Op0)) { - Value *X; - Constant *C2; - - // C-(X+C2) --> (C-C2)-X - if (match(Op1, m_Add(m_Value(X), m_Constant(C2)))) - return BinaryOperator::CreateSub(ConstantExpr::getSub(C, C2), X); - } - + // Try this before Negator to preserve NSW flag. + if (Instruction *R = factorizeMathWithShlOps(I, Builder)) + return R; + + if (Constant *C = dyn_cast<Constant>(Op0)) { + Value *X; + Constant *C2; + + // C-(X+C2) --> (C-C2)-X + if (match(Op1, m_Add(m_Value(X), m_Constant(C2)))) + return BinaryOperator::CreateSub(ConstantExpr::getSub(C, C2), X); + } + auto TryToNarrowDeduceFlags = [this, &I, &Op0, &Op1]() -> Instruction * { if (Instruction *Ext = narrowMathIfNoOverflow(I)) return Ext; @@ -1802,7 +1802,7 @@ Instruction *InstCombinerImpl::visitSub(BinaryOperator &I) { } auto m_AddRdx = [](Value *&Vec) { - return m_OneUse(m_Intrinsic<Intrinsic::vector_reduce_add>(m_Value(Vec))); + return m_OneUse(m_Intrinsic<Intrinsic::vector_reduce_add>(m_Value(Vec))); }; Value *V0, *V1; if (match(Op0, m_AddRdx(V0)) && match(Op1, m_AddRdx(V1)) && @@ -1810,8 +1810,8 @@ Instruction *InstCombinerImpl::visitSub(BinaryOperator &I) { // Difference of sums is sum of differences: // add_rdx(V0) - add_rdx(V1) --> add_rdx(V0 - V1) Value *Sub = Builder.CreateSub(V0, V1); - Value *Rdx = Builder.CreateIntrinsic(Intrinsic::vector_reduce_add, - {Sub->getType()}, {Sub}); + Value *Rdx = Builder.CreateIntrinsic(Intrinsic::vector_reduce_add, + {Sub->getType()}, {Sub}); return replaceInstUsesWith(I, Rdx); } @@ -1819,14 +1819,14 @@ Instruction *InstCombinerImpl::visitSub(BinaryOperator &I) { Value *X; if (match(Op1, m_ZExt(m_Value(X))) && X->getType()->isIntOrIntVectorTy(1)) // C - (zext bool) --> bool ? C - 1 : C - return SelectInst::Create(X, InstCombiner::SubOne(C), C); + return SelectInst::Create(X, InstCombiner::SubOne(C), C); if (match(Op1, m_SExt(m_Value(X))) && X->getType()->isIntOrIntVectorTy(1)) // C - (sext bool) --> bool ? C + 1 : C - return SelectInst::Create(X, InstCombiner::AddOne(C), C); + return SelectInst::Create(X, InstCombiner::AddOne(C), C); // C - ~X == X + (1+C) if (match(Op1, m_Not(m_Value(X)))) - return BinaryOperator::CreateAdd(X, InstCombiner::AddOne(C)); + return BinaryOperator::CreateAdd(X, InstCombiner::AddOne(C)); // Try to fold constant sub into select arguments. if (SelectInst *SI = dyn_cast<SelectInst>(Op1)) @@ -1841,7 +1841,7 @@ Instruction *InstCombinerImpl::visitSub(BinaryOperator &I) { Constant *C2; // C-(C2-X) --> X+(C-C2) - if (match(Op1, m_Sub(m_ImmConstant(C2), m_Value(X)))) + if (match(Op1, m_Sub(m_ImmConstant(C2), m_Value(X)))) return BinaryOperator::CreateAdd(X, ConstantExpr::getSub(C, C2)); } @@ -1873,22 +1873,22 @@ Instruction *InstCombinerImpl::visitSub(BinaryOperator &I) { return BinaryOperator::CreateXor(A, B); } - // (sub (add A, B) (or A, B)) --> (and A, B) - { - Value *A, *B; - if (match(Op0, m_Add(m_Value(A), m_Value(B))) && - match(Op1, m_c_Or(m_Specific(A), m_Specific(B)))) - return BinaryOperator::CreateAnd(A, B); - } - - // (sub (add A, B) (and A, B)) --> (or A, B) - { - Value *A, *B; - if (match(Op0, m_Add(m_Value(A), m_Value(B))) && - match(Op1, m_c_And(m_Specific(A), m_Specific(B)))) - return BinaryOperator::CreateOr(A, B); - } - + // (sub (add A, B) (or A, B)) --> (and A, B) + { + Value *A, *B; + if (match(Op0, m_Add(m_Value(A), m_Value(B))) && + match(Op1, m_c_Or(m_Specific(A), m_Specific(B)))) + return BinaryOperator::CreateAnd(A, B); + } + + // (sub (add A, B) (and A, B)) --> (or A, B) + { + Value *A, *B; + if (match(Op0, m_Add(m_Value(A), m_Value(B))) && + match(Op1, m_c_And(m_Specific(A), m_Specific(B)))) + return BinaryOperator::CreateOr(A, B); + } + // (sub (and A, B) (or A, B)) --> neg (xor A, B) { Value *A, *B; @@ -2067,20 +2067,20 @@ Instruction *InstCombinerImpl::visitSub(BinaryOperator &I) { return SelectInst::Create(Cmp, Neg, A); } - // If we are subtracting a low-bit masked subset of some value from an add - // of that same value with no low bits changed, that is clearing some low bits - // of the sum: - // sub (X + AddC), (X & AndC) --> and (X + AddC), ~AndC - const APInt *AddC, *AndC; - if (match(Op0, m_Add(m_Value(X), m_APInt(AddC))) && - match(Op1, m_And(m_Specific(X), m_APInt(AndC)))) { - unsigned BitWidth = Ty->getScalarSizeInBits(); - unsigned Cttz = AddC->countTrailingZeros(); - APInt HighMask(APInt::getHighBitsSet(BitWidth, BitWidth - Cttz)); - if ((HighMask & *AndC).isNullValue()) - return BinaryOperator::CreateAnd(Op0, ConstantInt::get(Ty, ~(*AndC))); - } - + // If we are subtracting a low-bit masked subset of some value from an add + // of that same value with no low bits changed, that is clearing some low bits + // of the sum: + // sub (X + AddC), (X & AndC) --> and (X + AddC), ~AndC + const APInt *AddC, *AndC; + if (match(Op0, m_Add(m_Value(X), m_APInt(AddC))) && + match(Op1, m_And(m_Specific(X), m_APInt(AndC)))) { + unsigned BitWidth = Ty->getScalarSizeInBits(); + unsigned Cttz = AddC->countTrailingZeros(); + APInt HighMask(APInt::getHighBitsSet(BitWidth, BitWidth - Cttz)); + if ((HighMask & *AndC).isNullValue()) + return BinaryOperator::CreateAnd(Op0, ConstantInt::get(Ty, ~(*AndC))); + } + if (Instruction *V = canonicalizeCondSignextOfHighBitExtractToSignextHighBitExtract(I)) return V; @@ -2133,11 +2133,11 @@ static Instruction *hoistFNegAboveFMulFDiv(Instruction &I, return nullptr; } -Instruction *InstCombinerImpl::visitFNeg(UnaryOperator &I) { +Instruction *InstCombinerImpl::visitFNeg(UnaryOperator &I) { Value *Op = I.getOperand(0); if (Value *V = SimplifyFNegInst(Op, I.getFastMathFlags(), - getSimplifyQuery().getWithInstruction(&I))) + getSimplifyQuery().getWithInstruction(&I))) return replaceInstUsesWith(I, V); if (Instruction *X = foldFNegIntoConstant(I)) @@ -2156,10 +2156,10 @@ Instruction *InstCombinerImpl::visitFNeg(UnaryOperator &I) { return nullptr; } -Instruction *InstCombinerImpl::visitFSub(BinaryOperator &I) { +Instruction *InstCombinerImpl::visitFSub(BinaryOperator &I) { if (Value *V = SimplifyFSubInst(I.getOperand(0), I.getOperand(1), I.getFastMathFlags(), - getSimplifyQuery().getWithInstruction(&I))) + getSimplifyQuery().getWithInstruction(&I))) return replaceInstUsesWith(I, V); if (Instruction *X = foldVectorBinop(I)) @@ -2214,7 +2214,7 @@ Instruction *InstCombinerImpl::visitFSub(BinaryOperator &I) { // X - C --> X + (-C) // But don't transform constant expressions because there's an inverse fold // for X + (-Y) --> X - Y. - if (match(Op1, m_ImmConstant(C))) + if (match(Op1, m_ImmConstant(C))) return BinaryOperator::CreateFAddFMF(Op0, ConstantExpr::getFNeg(C), &I); // X - (-Y) --> X + Y @@ -2283,8 +2283,8 @@ Instruction *InstCombinerImpl::visitFSub(BinaryOperator &I) { } auto m_FaddRdx = [](Value *&Sum, Value *&Vec) { - return m_OneUse(m_Intrinsic<Intrinsic::vector_reduce_fadd>(m_Value(Sum), - m_Value(Vec))); + return m_OneUse(m_Intrinsic<Intrinsic::vector_reduce_fadd>(m_Value(Sum), + m_Value(Vec))); }; Value *A0, *A1, *V0, *V1; if (match(Op0, m_FaddRdx(A0, V0)) && match(Op1, m_FaddRdx(A1, V1)) && @@ -2292,8 +2292,8 @@ Instruction *InstCombinerImpl::visitFSub(BinaryOperator &I) { // Difference of sums is sum of differences: // add_rdx(A0, V0) - add_rdx(A1, V1) --> add_rdx(A0, V0 - V1) - A1 Value *Sub = Builder.CreateFSubFMF(V0, V1, &I); - Value *Rdx = Builder.CreateIntrinsic(Intrinsic::vector_reduce_fadd, - {Sub->getType()}, {A0, Sub}, &I); + Value *Rdx = Builder.CreateIntrinsic(Intrinsic::vector_reduce_fadd, + {Sub->getType()}, {A0, Sub}, &I); return BinaryOperator::CreateFSubFMF(Rdx, A1, &I); } diff --git a/contrib/libs/llvm12/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/contrib/libs/llvm12/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp index 85a7abe211..7dea2acbf4 100644 --- a/contrib/libs/llvm12/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp +++ b/contrib/libs/llvm12/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp @@ -16,9 +16,9 @@ #include "llvm/IR/ConstantRange.h" #include "llvm/IR/Intrinsics.h" #include "llvm/IR/PatternMatch.h" -#include "llvm/Transforms/InstCombine/InstCombiner.h" -#include "llvm/Transforms/Utils/Local.h" - +#include "llvm/Transforms/InstCombine/InstCombiner.h" +#include "llvm/Transforms/Utils/Local.h" + using namespace llvm; using namespace PatternMatch; @@ -117,9 +117,9 @@ static Value *SimplifyBSwap(BinaryOperator &I, /// Emit a computation of: (V >= Lo && V < Hi) if Inside is true, otherwise /// (V < Lo || V >= Hi). This method expects that Lo < Hi. IsSigned indicates /// whether to treat V, Lo, and Hi as signed or not. -Value *InstCombinerImpl::insertRangeTest(Value *V, const APInt &Lo, - const APInt &Hi, bool isSigned, - bool Inside) { +Value *InstCombinerImpl::insertRangeTest(Value *V, const APInt &Lo, + const APInt &Hi, bool isSigned, + bool Inside) { assert((isSigned ? Lo.slt(Hi) : Lo.ult(Hi)) && "Lo is not < Hi in range emission code!"); @@ -394,10 +394,10 @@ getMaskedTypeForICmpPair(Value *&A, Value *&B, Value *&C, /// (icmp(A & X) ==/!= Y), where the left-hand side is of type Mask_NotAllZeros /// and the right hand side is of type BMask_Mixed. For example, /// (icmp (A & 12) != 0) & (icmp (A & 15) == 8) -> (icmp (A & 15) == 8). -static Value *foldLogOpOfMaskedICmps_NotAllZeros_BMask_Mixed( - ICmpInst *LHS, ICmpInst *RHS, bool IsAnd, Value *A, Value *B, Value *C, - Value *D, Value *E, ICmpInst::Predicate PredL, ICmpInst::Predicate PredR, - InstCombiner::BuilderTy &Builder) { +static Value *foldLogOpOfMaskedICmps_NotAllZeros_BMask_Mixed( + ICmpInst *LHS, ICmpInst *RHS, bool IsAnd, Value *A, Value *B, Value *C, + Value *D, Value *E, ICmpInst::Predicate PredL, ICmpInst::Predicate PredR, + InstCombiner::BuilderTy &Builder) { // We are given the canonical form: // (icmp ne (A & B), 0) & (icmp eq (A & D), E). // where D & E == E. @@ -408,9 +408,9 @@ static Value *foldLogOpOfMaskedICmps_NotAllZeros_BMask_Mixed( // // We currently handle the case of B, C, D, E are constant. // - ConstantInt *BCst, *CCst, *DCst, *ECst; - if (!match(B, m_ConstantInt(BCst)) || !match(C, m_ConstantInt(CCst)) || - !match(D, m_ConstantInt(DCst)) || !match(E, m_ConstantInt(ECst))) + ConstantInt *BCst, *CCst, *DCst, *ECst; + if (!match(B, m_ConstantInt(BCst)) || !match(C, m_ConstantInt(CCst)) || + !match(D, m_ConstantInt(DCst)) || !match(E, m_ConstantInt(ECst))) return nullptr; ICmpInst::Predicate NewCC = IsAnd ? ICmpInst::ICMP_EQ : ICmpInst::ICMP_NE; @@ -516,9 +516,9 @@ static Value *foldLogOpOfMaskedICmps_NotAllZeros_BMask_Mixed( /// (icmp(A & X) ==/!= Y), where the left-hand side and the right hand side /// aren't of the common mask pattern type. static Value *foldLogOpOfMaskedICmpsAsymmetric( - ICmpInst *LHS, ICmpInst *RHS, bool IsAnd, Value *A, Value *B, Value *C, - Value *D, Value *E, ICmpInst::Predicate PredL, ICmpInst::Predicate PredR, - unsigned LHSMask, unsigned RHSMask, InstCombiner::BuilderTy &Builder) { + ICmpInst *LHS, ICmpInst *RHS, bool IsAnd, Value *A, Value *B, Value *C, + Value *D, Value *E, ICmpInst::Predicate PredL, ICmpInst::Predicate PredR, + unsigned LHSMask, unsigned RHSMask, InstCombiner::BuilderTy &Builder) { assert(ICmpInst::isEquality(PredL) && ICmpInst::isEquality(PredR) && "Expected equality predicates for masked type of icmps."); // Handle Mask_NotAllZeros-BMask_Mixed cases. @@ -549,7 +549,7 @@ static Value *foldLogOpOfMaskedICmpsAsymmetric( /// Try to fold (icmp(A & B) ==/!= C) &/| (icmp(A & D) ==/!= E) /// into a single (icmp(A & X) ==/!= Y). static Value *foldLogOpOfMaskedICmps(ICmpInst *LHS, ICmpInst *RHS, bool IsAnd, - InstCombiner::BuilderTy &Builder) { + InstCombiner::BuilderTy &Builder) { Value *A = nullptr, *B = nullptr, *C = nullptr, *D = nullptr, *E = nullptr; ICmpInst::Predicate PredL = LHS->getPredicate(), PredR = RHS->getPredicate(); Optional<std::pair<unsigned, unsigned>> MaskPair = @@ -619,8 +619,8 @@ static Value *foldLogOpOfMaskedICmps(ICmpInst *LHS, ICmpInst *RHS, bool IsAnd, // Remaining cases assume at least that B and D are constant, and depend on // their actual values. This isn't strictly necessary, just a "handle the // easy cases for now" decision. - ConstantInt *BCst, *DCst; - if (!match(B, m_ConstantInt(BCst)) || !match(D, m_ConstantInt(DCst))) + ConstantInt *BCst, *DCst; + if (!match(B, m_ConstantInt(BCst)) || !match(D, m_ConstantInt(DCst))) return nullptr; if (Mask & (Mask_NotAllZeros | BMask_NotAllOnes)) { @@ -661,8 +661,8 @@ static Value *foldLogOpOfMaskedICmps(ICmpInst *LHS, ICmpInst *RHS, bool IsAnd, // We can't simply use C and E because we might actually handle // (icmp ne (A & B), B) & (icmp eq (A & D), D) // with B and D, having a single bit set. - ConstantInt *CCst, *ECst; - if (!match(C, m_ConstantInt(CCst)) || !match(E, m_ConstantInt(ECst))) + ConstantInt *CCst, *ECst; + if (!match(C, m_ConstantInt(CCst)) || !match(E, m_ConstantInt(ECst))) return nullptr; if (PredL != NewCC) CCst = cast<ConstantInt>(ConstantExpr::getXor(BCst, CCst)); @@ -688,8 +688,8 @@ static Value *foldLogOpOfMaskedICmps(ICmpInst *LHS, ICmpInst *RHS, bool IsAnd, /// Example: (icmp sge x, 0) & (icmp slt x, n) --> icmp ult x, n /// If \p Inverted is true then the check is for the inverted range, e.g. /// (icmp slt x, 0) | (icmp sgt x, n) --> icmp ugt x, n -Value *InstCombinerImpl::simplifyRangeCheck(ICmpInst *Cmp0, ICmpInst *Cmp1, - bool Inverted) { +Value *InstCombinerImpl::simplifyRangeCheck(ICmpInst *Cmp0, ICmpInst *Cmp1, + bool Inverted) { // Check the lower range comparison, e.g. x >= 0 // InstCombine already ensured that if there is a constant it's on the RHS. ConstantInt *RangeStart = dyn_cast<ConstantInt>(Cmp0->getOperand(1)); @@ -796,9 +796,9 @@ foldAndOrOfEqualityCmpsWithConstants(ICmpInst *LHS, ICmpInst *RHS, // Fold (iszero(A & K1) | iszero(A & K2)) -> (A & (K1 | K2)) != (K1 | K2) // Fold (!iszero(A & K1) & !iszero(A & K2)) -> (A & (K1 | K2)) == (K1 | K2) -Value *InstCombinerImpl::foldAndOrOfICmpsOfAndWithPow2(ICmpInst *LHS, - ICmpInst *RHS, - BinaryOperator &Logic) { +Value *InstCombinerImpl::foldAndOrOfICmpsOfAndWithPow2(ICmpInst *LHS, + ICmpInst *RHS, + BinaryOperator &Logic) { bool JoinedByAnd = Logic.getOpcode() == Instruction::And; assert((JoinedByAnd || Logic.getOpcode() == Instruction::Or) && "Wrong opcode"); @@ -810,8 +810,8 @@ Value *InstCombinerImpl::foldAndOrOfICmpsOfAndWithPow2(ICmpInst *LHS, if (!JoinedByAnd && Pred != ICmpInst::ICMP_EQ) return nullptr; - if (!match(LHS->getOperand(1), m_Zero()) || - !match(RHS->getOperand(1), m_Zero())) + if (!match(LHS->getOperand(1), m_Zero()) || + !match(RHS->getOperand(1), m_Zero())) return nullptr; Value *A, *B, *C, *D; @@ -1123,8 +1123,8 @@ static Value *foldAndOrOfICmpsWithConstEq(ICmpInst *Cmp0, ICmpInst *Cmp1, } /// Fold (icmp)&(icmp) if possible. -Value *InstCombinerImpl::foldAndOfICmps(ICmpInst *LHS, ICmpInst *RHS, - BinaryOperator &And) { +Value *InstCombinerImpl::foldAndOfICmps(ICmpInst *LHS, ICmpInst *RHS, + BinaryOperator &And) { const SimplifyQuery Q = SQ.getWithInstruction(&And); // Fold (!iszero(A & K1) & !iszero(A & K2)) -> (A & (K1 | K2)) == (K1 | K2) @@ -1183,10 +1183,10 @@ Value *InstCombinerImpl::foldAndOfICmps(ICmpInst *LHS, ICmpInst *RHS, // This only handles icmp of constants: (icmp1 A, C1) & (icmp2 B, C2). Value *LHS0 = LHS->getOperand(0), *RHS0 = RHS->getOperand(0); - - ConstantInt *LHSC, *RHSC; - if (!match(LHS->getOperand(1), m_ConstantInt(LHSC)) || - !match(RHS->getOperand(1), m_ConstantInt(RHSC))) + + ConstantInt *LHSC, *RHSC; + if (!match(LHS->getOperand(1), m_ConstantInt(LHSC)) || + !match(RHS->getOperand(1), m_ConstantInt(RHSC))) return nullptr; if (LHSC == RHSC && PredL == PredR) { @@ -1344,8 +1344,8 @@ Value *InstCombinerImpl::foldAndOfICmps(ICmpInst *LHS, ICmpInst *RHS, return nullptr; } -Value *InstCombinerImpl::foldLogicOfFCmps(FCmpInst *LHS, FCmpInst *RHS, - bool IsAnd) { +Value *InstCombinerImpl::foldLogicOfFCmps(FCmpInst *LHS, FCmpInst *RHS, + bool IsAnd) { Value *LHS0 = LHS->getOperand(0), *LHS1 = LHS->getOperand(1); Value *RHS0 = RHS->getOperand(0), *RHS1 = RHS->getOperand(1); FCmpInst::Predicate PredL = LHS->getPredicate(), PredR = RHS->getPredicate(); @@ -1455,8 +1455,8 @@ static Instruction *matchDeMorgansLaws(BinaryOperator &I, Value *A, *B; if (match(I.getOperand(0), m_OneUse(m_Not(m_Value(A)))) && match(I.getOperand(1), m_OneUse(m_Not(m_Value(B)))) && - !InstCombiner::isFreeToInvert(A, A->hasOneUse()) && - !InstCombiner::isFreeToInvert(B, B->hasOneUse())) { + !InstCombiner::isFreeToInvert(A, A->hasOneUse()) && + !InstCombiner::isFreeToInvert(B, B->hasOneUse())) { Value *AndOr = Builder.CreateBinOp(Opcode, A, B, I.getName() + ".demorgan"); return BinaryOperator::CreateNot(AndOr); } @@ -1464,7 +1464,7 @@ static Instruction *matchDeMorgansLaws(BinaryOperator &I, return nullptr; } -bool InstCombinerImpl::shouldOptimizeCast(CastInst *CI) { +bool InstCombinerImpl::shouldOptimizeCast(CastInst *CI) { Value *CastSrc = CI->getOperand(0); // Noop casts and casts of constants should be eliminated trivially. @@ -1520,7 +1520,7 @@ static Instruction *foldLogicCastConstant(BinaryOperator &Logic, CastInst *Cast, } /// Fold {and,or,xor} (cast X), Y. -Instruction *InstCombinerImpl::foldCastedBitwiseLogic(BinaryOperator &I) { +Instruction *InstCombinerImpl::foldCastedBitwiseLogic(BinaryOperator &I) { auto LogicOpc = I.getOpcode(); assert(I.isBitwiseLogicOp() && "Unexpected opcode for bitwise logic folding"); @@ -1627,14 +1627,14 @@ static Instruction *foldOrToXor(BinaryOperator &I, match(Op1, m_Not(m_c_Or(m_Specific(A), m_Specific(B))))) return BinaryOperator::CreateNot(Builder.CreateXor(A, B)); - // Operand complexity canonicalization guarantees that the 'xor' is Op0. - // (A ^ B) | ~(A | B) --> ~(A & B) - // (A ^ B) | ~(B | A) --> ~(A & B) - if (Op0->hasOneUse() || Op1->hasOneUse()) - if (match(Op0, m_Xor(m_Value(A), m_Value(B))) && - match(Op1, m_Not(m_c_Or(m_Specific(A), m_Specific(B))))) - return BinaryOperator::CreateNot(Builder.CreateAnd(A, B)); - + // Operand complexity canonicalization guarantees that the 'xor' is Op0. + // (A ^ B) | ~(A | B) --> ~(A & B) + // (A ^ B) | ~(B | A) --> ~(A & B) + if (Op0->hasOneUse() || Op1->hasOneUse()) + if (match(Op0, m_Xor(m_Value(A), m_Value(B))) && + match(Op1, m_Not(m_c_Or(m_Specific(A), m_Specific(B))))) + return BinaryOperator::CreateNot(Builder.CreateAnd(A, B)); + // (A & ~B) | (~A & B) --> A ^ B // (A & ~B) | (B & ~A) --> A ^ B // (~B & A) | (~A & B) --> A ^ B @@ -1649,13 +1649,13 @@ static Instruction *foldOrToXor(BinaryOperator &I, /// Return true if a constant shift amount is always less than the specified /// bit-width. If not, the shift could create poison in the narrower type. static bool canNarrowShiftAmt(Constant *C, unsigned BitWidth) { - APInt Threshold(C->getType()->getScalarSizeInBits(), BitWidth); - return match(C, m_SpecificInt_ICMP(ICmpInst::ICMP_ULT, Threshold)); + APInt Threshold(C->getType()->getScalarSizeInBits(), BitWidth); + return match(C, m_SpecificInt_ICMP(ICmpInst::ICMP_ULT, Threshold)); } /// Try to use narrower ops (sink zext ops) for an 'and' with binop operand and /// a common zext operand: and (binop (zext X), C), (zext X). -Instruction *InstCombinerImpl::narrowMaskedBinOp(BinaryOperator &And) { +Instruction *InstCombinerImpl::narrowMaskedBinOp(BinaryOperator &And) { // This transform could also apply to {or, and, xor}, but there are better // folds for those cases, so we don't expect those patterns here. AShr is not // handled because it should always be transformed to LShr in this sequence. @@ -1697,9 +1697,9 @@ Instruction *InstCombinerImpl::narrowMaskedBinOp(BinaryOperator &And) { // FIXME: We use commutative matchers (m_c_*) for some, but not all, matches // here. We should standardize that construct where it is needed or choose some // other way to ensure that commutated variants of patterns are not missed. -Instruction *InstCombinerImpl::visitAnd(BinaryOperator &I) { - Type *Ty = I.getType(); - +Instruction *InstCombinerImpl::visitAnd(BinaryOperator &I) { + Type *Ty = I.getType(); + if (Value *V = SimplifyAndInst(I.getOperand(0), I.getOperand(1), SQ.getWithInstruction(&I))) return replaceInstUsesWith(I, V); @@ -1727,22 +1727,22 @@ Instruction *InstCombinerImpl::visitAnd(BinaryOperator &I) { return replaceInstUsesWith(I, V); Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); - - Value *X, *Y; - if (match(Op0, m_OneUse(m_LogicalShift(m_One(), m_Value(X)))) && - match(Op1, m_One())) { - // (1 << X) & 1 --> zext(X == 0) - // (1 >> X) & 1 --> zext(X == 0) - Value *IsZero = Builder.CreateICmpEQ(X, ConstantInt::get(Ty, 0)); - return new ZExtInst(IsZero, Ty); - } - + + Value *X, *Y; + if (match(Op0, m_OneUse(m_LogicalShift(m_One(), m_Value(X)))) && + match(Op1, m_One())) { + // (1 << X) & 1 --> zext(X == 0) + // (1 >> X) & 1 --> zext(X == 0) + Value *IsZero = Builder.CreateICmpEQ(X, ConstantInt::get(Ty, 0)); + return new ZExtInst(IsZero, Ty); + } + const APInt *C; if (match(Op1, m_APInt(C))) { const APInt *XorC; if (match(Op0, m_OneUse(m_Xor(m_Value(X), m_APInt(XorC))))) { // (X ^ C1) & C2 --> (X & C2) ^ (C1&C2) - Constant *NewC = ConstantInt::get(Ty, *C & *XorC); + Constant *NewC = ConstantInt::get(Ty, *C & *XorC); Value *And = Builder.CreateAnd(X, Op1); And->takeName(Op0); return BinaryOperator::CreateXor(And, NewC); @@ -1757,9 +1757,9 @@ Instruction *InstCombinerImpl::visitAnd(BinaryOperator &I) { // that aren't set in C2. Meaning we can replace (C1&C2) with C1 in // above, but this feels safer. APInt Together = *C & *OrC; - Value *And = Builder.CreateAnd(X, ConstantInt::get(Ty, Together ^ *C)); + Value *And = Builder.CreateAnd(X, ConstantInt::get(Ty, Together ^ *C)); And->takeName(Op0); - return BinaryOperator::CreateOr(And, ConstantInt::get(Ty, Together)); + return BinaryOperator::CreateOr(And, ConstantInt::get(Ty, Together)); } // If the mask is only needed on one incoming arm, push the 'and' op up. @@ -1780,49 +1780,49 @@ Instruction *InstCombinerImpl::visitAnd(BinaryOperator &I) { return BinaryOperator::Create(BinOp, NewLHS, Y); } } - - unsigned Width = Ty->getScalarSizeInBits(); + + unsigned Width = Ty->getScalarSizeInBits(); const APInt *ShiftC; if (match(Op0, m_OneUse(m_SExt(m_AShr(m_Value(X), m_APInt(ShiftC)))))) { if (*C == APInt::getLowBitsSet(Width, Width - ShiftC->getZExtValue())) { // We are clearing high bits that were potentially set by sext+ashr: // and (sext (ashr X, ShiftC)), C --> lshr (sext X), ShiftC - Value *Sext = Builder.CreateSExt(X, Ty); - Constant *ShAmtC = ConstantInt::get(Ty, ShiftC->zext(Width)); + Value *Sext = Builder.CreateSExt(X, Ty); + Constant *ShAmtC = ConstantInt::get(Ty, ShiftC->zext(Width)); return BinaryOperator::CreateLShr(Sext, ShAmtC); } } - - const APInt *AddC; - if (match(Op0, m_Add(m_Value(X), m_APInt(AddC)))) { - // If we add zeros to every bit below a mask, the add has no effect: - // (X + AddC) & LowMaskC --> X & LowMaskC - unsigned Ctlz = C->countLeadingZeros(); - APInt LowMask(APInt::getLowBitsSet(Width, Width - Ctlz)); - if ((*AddC & LowMask).isNullValue()) - return BinaryOperator::CreateAnd(X, Op1); - - // If we are masking the result of the add down to exactly one bit and - // the constant we are adding has no bits set below that bit, then the - // add is flipping a single bit. Example: - // (X + 4) & 4 --> (X & 4) ^ 4 - if (Op0->hasOneUse() && C->isPowerOf2() && (*AddC & (*C - 1)) == 0) { - assert((*C & *AddC) != 0 && "Expected common bit"); - Value *NewAnd = Builder.CreateAnd(X, Op1); - return BinaryOperator::CreateXor(NewAnd, Op1); - } - } - } - - ConstantInt *AndRHS; - if (match(Op1, m_ConstantInt(AndRHS))) { + + const APInt *AddC; + if (match(Op0, m_Add(m_Value(X), m_APInt(AddC)))) { + // If we add zeros to every bit below a mask, the add has no effect: + // (X + AddC) & LowMaskC --> X & LowMaskC + unsigned Ctlz = C->countLeadingZeros(); + APInt LowMask(APInt::getLowBitsSet(Width, Width - Ctlz)); + if ((*AddC & LowMask).isNullValue()) + return BinaryOperator::CreateAnd(X, Op1); + + // If we are masking the result of the add down to exactly one bit and + // the constant we are adding has no bits set below that bit, then the + // add is flipping a single bit. Example: + // (X + 4) & 4 --> (X & 4) ^ 4 + if (Op0->hasOneUse() && C->isPowerOf2() && (*AddC & (*C - 1)) == 0) { + assert((*C & *AddC) != 0 && "Expected common bit"); + Value *NewAnd = Builder.CreateAnd(X, Op1); + return BinaryOperator::CreateXor(NewAnd, Op1); + } + } + } + + ConstantInt *AndRHS; + if (match(Op1, m_ConstantInt(AndRHS))) { const APInt &AndRHSMask = AndRHS->getValue(); // Optimize a variety of ((val OP C1) & C2) combinations... if (BinaryOperator *Op0I = dyn_cast<BinaryOperator>(Op0)) { // ((C1 OP zext(X)) & C2) -> zext((C1-X) & C2) if C2 fits in the bitwidth // of X and OP behaves well when given trunc(C1) and X. - // TODO: Do this for vectors by using m_APInt instead of m_ConstantInt. + // TODO: Do this for vectors by using m_APInt instead of m_ConstantInt. switch (Op0I->getOpcode()) { default: break; @@ -1847,30 +1847,30 @@ Instruction *InstCombinerImpl::visitAnd(BinaryOperator &I) { BinOp = Builder.CreateBinOp(Op0I->getOpcode(), TruncC1, X); auto *TruncC2 = ConstantExpr::getTrunc(AndRHS, X->getType()); auto *And = Builder.CreateAnd(BinOp, TruncC2); - return new ZExtInst(And, Ty); + return new ZExtInst(And, Ty); } } } } - } - - if (match(&I, m_And(m_OneUse(m_Shl(m_ZExt(m_Value(X)), m_Value(Y))), - m_SignMask())) && - match(Y, m_SpecificInt_ICMP( - ICmpInst::Predicate::ICMP_EQ, - APInt(Ty->getScalarSizeInBits(), - Ty->getScalarSizeInBits() - - X->getType()->getScalarSizeInBits())))) { - auto *SExt = Builder.CreateSExt(X, Ty, X->getName() + ".signext"); - auto *SanitizedSignMask = cast<Constant>(Op1); - // We must be careful with the undef elements of the sign bit mask, however: - // the mask elt can be undef iff the shift amount for that lane was undef, - // otherwise we need to sanitize undef masks to zero. - SanitizedSignMask = Constant::replaceUndefsWith( - SanitizedSignMask, ConstantInt::getNullValue(Ty->getScalarType())); - SanitizedSignMask = - Constant::mergeUndefsWith(SanitizedSignMask, cast<Constant>(Y)); - return BinaryOperator::CreateAnd(SExt, SanitizedSignMask); + } + + if (match(&I, m_And(m_OneUse(m_Shl(m_ZExt(m_Value(X)), m_Value(Y))), + m_SignMask())) && + match(Y, m_SpecificInt_ICMP( + ICmpInst::Predicate::ICMP_EQ, + APInt(Ty->getScalarSizeInBits(), + Ty->getScalarSizeInBits() - + X->getType()->getScalarSizeInBits())))) { + auto *SExt = Builder.CreateSExt(X, Ty, X->getName() + ".signext"); + auto *SanitizedSignMask = cast<Constant>(Op1); + // We must be careful with the undef elements of the sign bit mask, however: + // the mask elt can be undef iff the shift amount for that lane was undef, + // otherwise we need to sanitize undef masks to zero. + SanitizedSignMask = Constant::replaceUndefsWith( + SanitizedSignMask, ConstantInt::getNullValue(Ty->getScalarType())); + SanitizedSignMask = + Constant::mergeUndefsWith(SanitizedSignMask, cast<Constant>(Y)); + return BinaryOperator::CreateAnd(SExt, SanitizedSignMask); } if (Instruction *Z = narrowMaskedBinOp(I)) @@ -1891,13 +1891,13 @@ Instruction *InstCombinerImpl::visitAnd(BinaryOperator &I) { if (match(Op0, m_OneUse(m_c_Xor(m_Specific(Op1), m_Value(B))))) return BinaryOperator::CreateAnd(Op1, Builder.CreateNot(B)); - // A & ~(A ^ B) --> A & B - if (match(Op1, m_Not(m_c_Xor(m_Specific(Op0), m_Value(B))))) - return BinaryOperator::CreateAnd(Op0, B); - // ~(A ^ B) & A --> A & B - if (match(Op0, m_Not(m_c_Xor(m_Specific(Op1), m_Value(B))))) - return BinaryOperator::CreateAnd(Op1, B); - + // A & ~(A ^ B) --> A & B + if (match(Op1, m_Not(m_c_Xor(m_Specific(Op0), m_Value(B))))) + return BinaryOperator::CreateAnd(Op0, B); + // ~(A ^ B) & A --> A & B + if (match(Op0, m_Not(m_c_Xor(m_Specific(Op1), m_Value(B))))) + return BinaryOperator::CreateAnd(Op1, B); + // (A ^ B) & ((B ^ C) ^ A) -> (A ^ B) & ~C if (match(Op0, m_Xor(m_Value(A), m_Value(B)))) if (match(Op1, m_Xor(m_Xor(m_Specific(B), m_Value(C)), m_Specific(A)))) @@ -1969,30 +1969,30 @@ Instruction *InstCombinerImpl::visitAnd(BinaryOperator &I) { Value *A; if (match(Op0, m_OneUse(m_SExt(m_Value(A)))) && A->getType()->isIntOrIntVectorTy(1)) - return SelectInst::Create(A, Op1, Constant::getNullValue(Ty)); + return SelectInst::Create(A, Op1, Constant::getNullValue(Ty)); if (match(Op1, m_OneUse(m_SExt(m_Value(A)))) && A->getType()->isIntOrIntVectorTy(1)) - return SelectInst::Create(A, Op0, Constant::getNullValue(Ty)); + return SelectInst::Create(A, Op0, Constant::getNullValue(Ty)); // and(ashr(subNSW(Y, X), ScalarSizeInBits(Y)-1), X) --> X s> Y ? X : 0. - if (match(&I, m_c_And(m_OneUse(m_AShr( - m_NSWSub(m_Value(Y), m_Value(X)), - m_SpecificInt(Ty->getScalarSizeInBits() - 1))), - m_Deferred(X)))) { - Value *NewICmpInst = Builder.CreateICmpSGT(X, Y); - return SelectInst::Create(NewICmpInst, X, ConstantInt::getNullValue(Ty)); - } - - // (~x) & y --> ~(x | (~y)) iff that gets rid of inversions - if (sinkNotIntoOtherHandOfAndOrOr(I)) - return &I; - + if (match(&I, m_c_And(m_OneUse(m_AShr( + m_NSWSub(m_Value(Y), m_Value(X)), + m_SpecificInt(Ty->getScalarSizeInBits() - 1))), + m_Deferred(X)))) { + Value *NewICmpInst = Builder.CreateICmpSGT(X, Y); + return SelectInst::Create(NewICmpInst, X, ConstantInt::getNullValue(Ty)); + } + + // (~x) & y --> ~(x | (~y)) iff that gets rid of inversions + if (sinkNotIntoOtherHandOfAndOrOr(I)) + return &I; + return nullptr; } -Instruction *InstCombinerImpl::matchBSwapOrBitReverse(BinaryOperator &Or, - bool MatchBSwaps, - bool MatchBitReversals) { +Instruction *InstCombinerImpl::matchBSwapOrBitReverse(BinaryOperator &Or, + bool MatchBSwaps, + bool MatchBitReversals) { assert(Or.getOpcode() == Instruction::Or && "bswap requires an 'or'"); Value *Op0 = Or.getOperand(0), *Op1 = Or.getOperand(1); @@ -2004,32 +2004,32 @@ Instruction *InstCombinerImpl::matchBSwapOrBitReverse(BinaryOperator &Or, Op1 = Ext->getOperand(0); // (A | B) | C and A | (B | C) -> bswap if possible. - bool OrWithOrs = match(Op0, m_Or(m_Value(), m_Value())) || - match(Op1, m_Or(m_Value(), m_Value())); + bool OrWithOrs = match(Op0, m_Or(m_Value(), m_Value())) || + match(Op1, m_Or(m_Value(), m_Value())); - // (A >> B) | C and (A << B) | C -> bswap if possible. - bool OrWithShifts = match(Op0, m_LogicalShift(m_Value(), m_Value())) || - match(Op1, m_LogicalShift(m_Value(), m_Value())); + // (A >> B) | C and (A << B) | C -> bswap if possible. + bool OrWithShifts = match(Op0, m_LogicalShift(m_Value(), m_Value())) || + match(Op1, m_LogicalShift(m_Value(), m_Value())); - // (A & B) | C and A | (B & C) -> bswap if possible. - bool OrWithAnds = match(Op0, m_And(m_Value(), m_Value())) || - match(Op1, m_And(m_Value(), m_Value())); + // (A & B) | C and A | (B & C) -> bswap if possible. + bool OrWithAnds = match(Op0, m_And(m_Value(), m_Value())) || + match(Op1, m_And(m_Value(), m_Value())); - // fshl(A,B,C) | D and A | fshl(B,C,D) -> bswap if possible. - // fshr(A,B,C) | D and A | fshr(B,C,D) -> bswap if possible. - bool OrWithFunnels = match(Op0, m_FShl(m_Value(), m_Value(), m_Value())) || - match(Op0, m_FShr(m_Value(), m_Value(), m_Value())) || - match(Op0, m_FShl(m_Value(), m_Value(), m_Value())) || - match(Op0, m_FShr(m_Value(), m_Value(), m_Value())); + // fshl(A,B,C) | D and A | fshl(B,C,D) -> bswap if possible. + // fshr(A,B,C) | D and A | fshr(B,C,D) -> bswap if possible. + bool OrWithFunnels = match(Op0, m_FShl(m_Value(), m_Value(), m_Value())) || + match(Op0, m_FShr(m_Value(), m_Value(), m_Value())) || + match(Op0, m_FShl(m_Value(), m_Value(), m_Value())) || + match(Op0, m_FShr(m_Value(), m_Value(), m_Value())); - // TODO: Do we need all these filtering checks or should we just rely on - // recognizeBSwapOrBitReverseIdiom + collectBitParts to reject them quickly? - if (!OrWithOrs && !OrWithShifts && !OrWithAnds && !OrWithFunnels) + // TODO: Do we need all these filtering checks or should we just rely on + // recognizeBSwapOrBitReverseIdiom + collectBitParts to reject them quickly? + if (!OrWithOrs && !OrWithShifts && !OrWithAnds && !OrWithFunnels) return nullptr; - SmallVector<Instruction *, 4> Insts; - if (!recognizeBSwapOrBitReverseIdiom(&Or, MatchBSwaps, MatchBitReversals, - Insts)) + SmallVector<Instruction *, 4> Insts; + if (!recognizeBSwapOrBitReverseIdiom(&Or, MatchBSwaps, MatchBitReversals, + Insts)) return nullptr; Instruction *LastInst = Insts.pop_back_val(); LastInst->removeFromParent(); @@ -2039,72 +2039,72 @@ Instruction *InstCombinerImpl::matchBSwapOrBitReverse(BinaryOperator &Or, return LastInst; } -/// Match UB-safe variants of the funnel shift intrinsic. -static Instruction *matchFunnelShift(Instruction &Or, InstCombinerImpl &IC) { +/// Match UB-safe variants of the funnel shift intrinsic. +static Instruction *matchFunnelShift(Instruction &Or, InstCombinerImpl &IC) { // TODO: Can we reduce the code duplication between this and the related // rotate matching code under visitSelect and visitTrunc? unsigned Width = Or.getType()->getScalarSizeInBits(); - // First, find an or'd pair of opposite shifts: - // or (lshr ShVal0, ShAmt0), (shl ShVal1, ShAmt1) + // First, find an or'd pair of opposite shifts: + // or (lshr ShVal0, ShAmt0), (shl ShVal1, ShAmt1) BinaryOperator *Or0, *Or1; if (!match(Or.getOperand(0), m_BinOp(Or0)) || !match(Or.getOperand(1), m_BinOp(Or1))) return nullptr; - Value *ShVal0, *ShVal1, *ShAmt0, *ShAmt1; - if (!match(Or0, m_OneUse(m_LogicalShift(m_Value(ShVal0), m_Value(ShAmt0)))) || - !match(Or1, m_OneUse(m_LogicalShift(m_Value(ShVal1), m_Value(ShAmt1)))) || - Or0->getOpcode() == Or1->getOpcode()) - return nullptr; - - // Canonicalize to or(shl(ShVal0, ShAmt0), lshr(ShVal1, ShAmt1)). - if (Or0->getOpcode() == BinaryOperator::LShr) { - std::swap(Or0, Or1); - std::swap(ShVal0, ShVal1); - std::swap(ShAmt0, ShAmt1); - } - assert(Or0->getOpcode() == BinaryOperator::Shl && - Or1->getOpcode() == BinaryOperator::LShr && - "Illegal or(shift,shift) pair"); - - // Match the shift amount operands for a funnel shift pattern. This always - // matches a subtraction on the R operand. - auto matchShiftAmount = [&](Value *L, Value *R, unsigned Width) -> Value * { - // Check for constant shift amounts that sum to the bitwidth. - const APInt *LI, *RI; - if (match(L, m_APIntAllowUndef(LI)) && match(R, m_APIntAllowUndef(RI))) - if (LI->ult(Width) && RI->ult(Width) && (*LI + *RI) == Width) - return ConstantInt::get(L->getType(), *LI); - - Constant *LC, *RC; - if (match(L, m_Constant(LC)) && match(R, m_Constant(RC)) && - match(L, m_SpecificInt_ICMP(ICmpInst::ICMP_ULT, APInt(Width, Width))) && - match(R, m_SpecificInt_ICMP(ICmpInst::ICMP_ULT, APInt(Width, Width))) && - match(ConstantExpr::getAdd(LC, RC), m_SpecificIntAllowUndef(Width))) - return ConstantExpr::mergeUndefsWith(LC, RC); - - // (shl ShVal, X) | (lshr ShVal, (Width - x)) iff X < Width. - // We limit this to X < Width in case the backend re-expands the intrinsic, - // and has to reintroduce a shift modulo operation (InstCombine might remove - // it after this fold). This still doesn't guarantee that the final codegen - // will match this original pattern. - if (match(R, m_OneUse(m_Sub(m_SpecificInt(Width), m_Specific(L))))) { - KnownBits KnownL = IC.computeKnownBits(L, /*Depth*/ 0, &Or); - return KnownL.getMaxValue().ult(Width) ? L : nullptr; - } - - // For non-constant cases, the following patterns currently only work for - // rotation patterns. - // TODO: Add general funnel-shift compatible patterns. - if (ShVal0 != ShVal1) - return nullptr; - - // For non-constant cases we don't support non-pow2 shift masks. - // TODO: Is it worth matching urem as well? - if (!isPowerOf2_32(Width)) - return nullptr; - + Value *ShVal0, *ShVal1, *ShAmt0, *ShAmt1; + if (!match(Or0, m_OneUse(m_LogicalShift(m_Value(ShVal0), m_Value(ShAmt0)))) || + !match(Or1, m_OneUse(m_LogicalShift(m_Value(ShVal1), m_Value(ShAmt1)))) || + Or0->getOpcode() == Or1->getOpcode()) + return nullptr; + + // Canonicalize to or(shl(ShVal0, ShAmt0), lshr(ShVal1, ShAmt1)). + if (Or0->getOpcode() == BinaryOperator::LShr) { + std::swap(Or0, Or1); + std::swap(ShVal0, ShVal1); + std::swap(ShAmt0, ShAmt1); + } + assert(Or0->getOpcode() == BinaryOperator::Shl && + Or1->getOpcode() == BinaryOperator::LShr && + "Illegal or(shift,shift) pair"); + + // Match the shift amount operands for a funnel shift pattern. This always + // matches a subtraction on the R operand. + auto matchShiftAmount = [&](Value *L, Value *R, unsigned Width) -> Value * { + // Check for constant shift amounts that sum to the bitwidth. + const APInt *LI, *RI; + if (match(L, m_APIntAllowUndef(LI)) && match(R, m_APIntAllowUndef(RI))) + if (LI->ult(Width) && RI->ult(Width) && (*LI + *RI) == Width) + return ConstantInt::get(L->getType(), *LI); + + Constant *LC, *RC; + if (match(L, m_Constant(LC)) && match(R, m_Constant(RC)) && + match(L, m_SpecificInt_ICMP(ICmpInst::ICMP_ULT, APInt(Width, Width))) && + match(R, m_SpecificInt_ICMP(ICmpInst::ICMP_ULT, APInt(Width, Width))) && + match(ConstantExpr::getAdd(LC, RC), m_SpecificIntAllowUndef(Width))) + return ConstantExpr::mergeUndefsWith(LC, RC); + + // (shl ShVal, X) | (lshr ShVal, (Width - x)) iff X < Width. + // We limit this to X < Width in case the backend re-expands the intrinsic, + // and has to reintroduce a shift modulo operation (InstCombine might remove + // it after this fold). This still doesn't guarantee that the final codegen + // will match this original pattern. + if (match(R, m_OneUse(m_Sub(m_SpecificInt(Width), m_Specific(L))))) { + KnownBits KnownL = IC.computeKnownBits(L, /*Depth*/ 0, &Or); + return KnownL.getMaxValue().ult(Width) ? L : nullptr; + } + + // For non-constant cases, the following patterns currently only work for + // rotation patterns. + // TODO: Add general funnel-shift compatible patterns. + if (ShVal0 != ShVal1) + return nullptr; + + // For non-constant cases we don't support non-pow2 shift masks. + // TODO: Is it worth matching urem as well? + if (!isPowerOf2_32(Width)) + return nullptr; + // The shift amount may be masked with negation: // (shl ShVal, (X & (Width - 1))) | (lshr ShVal, ((-X) & (Width - 1))) Value *X; @@ -2120,25 +2120,25 @@ static Instruction *matchFunnelShift(Instruction &Or, InstCombinerImpl &IC) { m_SpecificInt(Mask)))) return L; - if (match(L, m_ZExt(m_And(m_Value(X), m_SpecificInt(Mask)))) && - match(R, m_ZExt(m_And(m_Neg(m_Specific(X)), m_SpecificInt(Mask))))) - return L; - + if (match(L, m_ZExt(m_And(m_Value(X), m_SpecificInt(Mask)))) && + match(R, m_ZExt(m_And(m_Neg(m_Specific(X)), m_SpecificInt(Mask))))) + return L; + return nullptr; }; Value *ShAmt = matchShiftAmount(ShAmt0, ShAmt1, Width); - bool IsFshl = true; // Sub on LSHR. + bool IsFshl = true; // Sub on LSHR. if (!ShAmt) { ShAmt = matchShiftAmount(ShAmt1, ShAmt0, Width); - IsFshl = false; // Sub on SHL. + IsFshl = false; // Sub on SHL. } if (!ShAmt) return nullptr; Intrinsic::ID IID = IsFshl ? Intrinsic::fshl : Intrinsic::fshr; Function *F = Intrinsic::getDeclaration(Or.getModule(), IID, Or.getType()); - return IntrinsicInst::Create(F, {ShVal0, ShVal1, ShAmt}); + return IntrinsicInst::Create(F, {ShVal0, ShVal1, ShAmt}); } /// Attempt to combine or(zext(x),shl(zext(y),bw/2) concat packing patterns. @@ -2196,7 +2196,7 @@ static Instruction *matchOrConcat(Instruction &Or, /// If all elements of two constant vectors are 0/-1 and inverses, return true. static bool areInverseVectorBitmasks(Constant *C1, Constant *C2) { - unsigned NumElts = cast<FixedVectorType>(C1->getType())->getNumElements(); + unsigned NumElts = cast<FixedVectorType>(C1->getType())->getNumElements(); for (unsigned i = 0; i != NumElts; ++i) { Constant *EltC1 = C1->getAggregateElement(i); Constant *EltC2 = C2->getAggregateElement(i); @@ -2214,7 +2214,7 @@ static bool areInverseVectorBitmasks(Constant *C1, Constant *C2) { /// We have an expression of the form (A & C) | (B & D). If A is a scalar or /// vector composed of all-zeros or all-ones values and is the bitwise 'not' of /// B, it can be used as the condition operand of a select instruction. -Value *InstCombinerImpl::getSelectCondition(Value *A, Value *B) { +Value *InstCombinerImpl::getSelectCondition(Value *A, Value *B) { // Step 1: We may have peeked through bitcasts in the caller. // Exit immediately if we don't have (vector) integer types. Type *Ty = A->getType(); @@ -2271,8 +2271,8 @@ Value *InstCombinerImpl::getSelectCondition(Value *A, Value *B) { /// We have an expression of the form (A & C) | (B & D). Try to simplify this /// to "A' ? C : D", where A' is a boolean or vector of booleans. -Value *InstCombinerImpl::matchSelectFromAndOr(Value *A, Value *C, Value *B, - Value *D) { +Value *InstCombinerImpl::matchSelectFromAndOr(Value *A, Value *C, Value *B, + Value *D) { // The potential condition of the select may be bitcasted. In that case, look // through its bitcast and the corresponding bitcast of the 'not' condition. Type *OrigType = A->getType(); @@ -2292,8 +2292,8 @@ Value *InstCombinerImpl::matchSelectFromAndOr(Value *A, Value *C, Value *B, } /// Fold (icmp)|(icmp) if possible. -Value *InstCombinerImpl::foldOrOfICmps(ICmpInst *LHS, ICmpInst *RHS, - BinaryOperator &Or) { +Value *InstCombinerImpl::foldOrOfICmps(ICmpInst *LHS, ICmpInst *RHS, + BinaryOperator &Or) { const SimplifyQuery Q = SQ.getWithInstruction(&Or); // Fold (iszero(A & K1) | iszero(A & K2)) -> (A & (K1 | K2)) != (K1 | K2) @@ -2302,10 +2302,10 @@ Value *InstCombinerImpl::foldOrOfICmps(ICmpInst *LHS, ICmpInst *RHS, return V; ICmpInst::Predicate PredL = LHS->getPredicate(), PredR = RHS->getPredicate(); - Value *LHS0 = LHS->getOperand(0), *RHS0 = RHS->getOperand(0); - Value *LHS1 = LHS->getOperand(1), *RHS1 = RHS->getOperand(1); - auto *LHSC = dyn_cast<ConstantInt>(LHS1); - auto *RHSC = dyn_cast<ConstantInt>(RHS1); + Value *LHS0 = LHS->getOperand(0), *RHS0 = RHS->getOperand(0); + Value *LHS1 = LHS->getOperand(1), *RHS1 = RHS->getOperand(1); + auto *LHSC = dyn_cast<ConstantInt>(LHS1); + auto *RHSC = dyn_cast<ConstantInt>(RHS1); // Fold (icmp ult/ule (A + C1), C3) | (icmp ult/ule (A + C2), C3) // --> (icmp ult/ule ((A & ~(C1 ^ C2)) + max(C1, C2)), C3) @@ -2322,15 +2322,15 @@ Value *InstCombinerImpl::foldOrOfICmps(ICmpInst *LHS, ICmpInst *RHS, LHSC->getType() == RHSC->getType() && LHSC->getValue() == (RHSC->getValue())) { - Value *AddOpnd; + Value *AddOpnd; ConstantInt *LAddC, *RAddC; - if (match(LHS0, m_Add(m_Value(AddOpnd), m_ConstantInt(LAddC))) && - match(RHS0, m_Add(m_Specific(AddOpnd), m_ConstantInt(RAddC))) && + if (match(LHS0, m_Add(m_Value(AddOpnd), m_ConstantInt(LAddC))) && + match(RHS0, m_Add(m_Specific(AddOpnd), m_ConstantInt(RAddC))) && LAddC->getValue().ugt(LHSC->getValue()) && RAddC->getValue().ugt(LHSC->getValue())) { APInt DiffC = LAddC->getValue() ^ RAddC->getValue(); - if (DiffC.isPowerOf2()) { + if (DiffC.isPowerOf2()) { ConstantInt *MaxAddC = nullptr; if (LAddC->getValue().ult(RAddC->getValue())) MaxAddC = RAddC; @@ -2350,7 +2350,7 @@ Value *InstCombinerImpl::foldOrOfICmps(ICmpInst *LHS, ICmpInst *RHS, RangeDiff.ugt(LHSC->getValue())) { Value *MaskC = ConstantInt::get(LAddC->getType(), ~DiffC); - Value *NewAnd = Builder.CreateAnd(AddOpnd, MaskC); + Value *NewAnd = Builder.CreateAnd(AddOpnd, MaskC); Value *NewAdd = Builder.CreateAdd(NewAnd, MaxAddC); return Builder.CreateICmp(LHS->getPredicate(), NewAdd, LHSC); } @@ -2360,12 +2360,12 @@ Value *InstCombinerImpl::foldOrOfICmps(ICmpInst *LHS, ICmpInst *RHS, // (icmp1 A, B) | (icmp2 A, B) --> (icmp3 A, B) if (predicatesFoldable(PredL, PredR)) { - if (LHS0 == RHS1 && LHS1 == RHS0) + if (LHS0 == RHS1 && LHS1 == RHS0) LHS->swapOperands(); - if (LHS0 == RHS0 && LHS1 == RHS1) { + if (LHS0 == RHS0 && LHS1 == RHS1) { unsigned Code = getICmpCode(LHS) | getICmpCode(RHS); bool IsSigned = LHS->isSigned() || RHS->isSigned(); - return getNewICmpValue(Code, IsSigned, LHS0, LHS1, Builder); + return getNewICmpValue(Code, IsSigned, LHS0, LHS1, Builder); } } @@ -2378,26 +2378,26 @@ Value *InstCombinerImpl::foldOrOfICmps(ICmpInst *LHS, ICmpInst *RHS, // (icmp eq B, 0) | (icmp ult A, B) -> (icmp ule A, B-1) // (icmp eq B, 0) | (icmp ugt B, A) -> (icmp ule A, B-1) Value *A = nullptr, *B = nullptr; - if (PredL == ICmpInst::ICMP_EQ && match(LHS1, m_Zero())) { + if (PredL == ICmpInst::ICMP_EQ && match(LHS1, m_Zero())) { B = LHS0; - if (PredR == ICmpInst::ICMP_ULT && LHS0 == RHS1) + if (PredR == ICmpInst::ICMP_ULT && LHS0 == RHS1) A = RHS0; else if (PredR == ICmpInst::ICMP_UGT && LHS0 == RHS0) - A = RHS1; + A = RHS1; } // (icmp ult A, B) | (icmp eq B, 0) -> (icmp ule A, B-1) // (icmp ugt B, A) | (icmp eq B, 0) -> (icmp ule A, B-1) - else if (PredR == ICmpInst::ICMP_EQ && match(RHS1, m_Zero())) { + else if (PredR == ICmpInst::ICMP_EQ && match(RHS1, m_Zero())) { B = RHS0; - if (PredL == ICmpInst::ICMP_ULT && RHS0 == LHS1) + if (PredL == ICmpInst::ICMP_ULT && RHS0 == LHS1) A = LHS0; - else if (PredL == ICmpInst::ICMP_UGT && RHS0 == LHS0) - A = LHS1; + else if (PredL == ICmpInst::ICMP_UGT && RHS0 == LHS0) + A = LHS1; } - if (A && B && B->getType()->isIntOrIntVectorTy()) + if (A && B && B->getType()->isIntOrIntVectorTy()) return Builder.CreateICmp( ICmpInst::ICMP_UGE, - Builder.CreateAdd(B, Constant::getAllOnesValue(B->getType())), A); + Builder.CreateAdd(B, Constant::getAllOnesValue(B->getType())), A); } if (Value *V = foldAndOrOfICmpsWithConstEq(LHS, RHS, Or, Builder, Q)) @@ -2426,17 +2426,17 @@ Value *InstCombinerImpl::foldOrOfICmps(ICmpInst *LHS, ICmpInst *RHS, foldUnsignedUnderflowCheck(RHS, LHS, /*IsAnd=*/false, Q, Builder)) return X; - // (icmp ne A, 0) | (icmp ne B, 0) --> (icmp ne (A|B), 0) - // TODO: Remove this when foldLogOpOfMaskedICmps can handle vectors. - if (PredL == ICmpInst::ICMP_NE && match(LHS1, m_Zero()) && - PredR == ICmpInst::ICMP_NE && match(RHS1, m_Zero()) && - LHS0->getType()->isIntOrIntVectorTy() && - LHS0->getType() == RHS0->getType()) { - Value *NewOr = Builder.CreateOr(LHS0, RHS0); - return Builder.CreateICmp(PredL, NewOr, - Constant::getNullValue(NewOr->getType())); - } - + // (icmp ne A, 0) | (icmp ne B, 0) --> (icmp ne (A|B), 0) + // TODO: Remove this when foldLogOpOfMaskedICmps can handle vectors. + if (PredL == ICmpInst::ICMP_NE && match(LHS1, m_Zero()) && + PredR == ICmpInst::ICMP_NE && match(RHS1, m_Zero()) && + LHS0->getType()->isIntOrIntVectorTy() && + LHS0->getType() == RHS0->getType()) { + Value *NewOr = Builder.CreateOr(LHS0, RHS0); + return Builder.CreateICmp(PredL, NewOr, + Constant::getNullValue(NewOr->getType())); + } + // This only handles icmp of constants: (icmp1 A, C1) | (icmp2 B, C2). if (!LHSC || !RHSC) return nullptr; @@ -2554,7 +2554,7 @@ Value *InstCombinerImpl::foldOrOfICmps(ICmpInst *LHS, ICmpInst *RHS, // FIXME: We use commutative matchers (m_c_*) for some, but not all, matches // here. We should standardize that construct where it is needed or choose some // other way to ensure that commutated variants of patterns are not missed. -Instruction *InstCombinerImpl::visitOr(BinaryOperator &I) { +Instruction *InstCombinerImpl::visitOr(BinaryOperator &I) { if (Value *V = SimplifyOrInst(I.getOperand(0), I.getOperand(1), SQ.getWithInstruction(&I))) return replaceInstUsesWith(I, V); @@ -2584,12 +2584,12 @@ Instruction *InstCombinerImpl::visitOr(BinaryOperator &I) { if (Instruction *FoldedLogic = foldBinOpIntoSelectOrPhi(I)) return FoldedLogic; - if (Instruction *BSwap = matchBSwapOrBitReverse(I, /*MatchBSwaps*/ true, - /*MatchBitReversals*/ false)) + if (Instruction *BSwap = matchBSwapOrBitReverse(I, /*MatchBSwaps*/ true, + /*MatchBitReversals*/ false)) return BSwap; - if (Instruction *Funnel = matchFunnelShift(I, *this)) - return Funnel; + if (Instruction *Funnel = matchFunnelShift(I, *this)) + return Funnel; if (Instruction *Concat = matchOrConcat(I, Builder)) return replaceInstUsesWith(I, Concat); @@ -2609,9 +2609,9 @@ Instruction *InstCombinerImpl::visitOr(BinaryOperator &I) { Value *A, *B, *C, *D; if (match(Op0, m_And(m_Value(A), m_Value(C))) && match(Op1, m_And(m_Value(B), m_Value(D)))) { - // (A & C1)|(B & C2) - ConstantInt *C1, *C2; - if (match(C, m_ConstantInt(C1)) && match(D, m_ConstantInt(C2))) { + // (A & C1)|(B & C2) + ConstantInt *C1, *C2; + if (match(C, m_ConstantInt(C1)) && match(D, m_ConstantInt(C2))) { Value *V1 = nullptr, *V2 = nullptr; if ((C1->getValue() & C2->getValue()).isNullValue()) { // ((V | N) & C1) | (V & C2) --> (V|N) & (C1|C2) @@ -2802,7 +2802,7 @@ Instruction *InstCombinerImpl::visitOr(BinaryOperator &I) { // ORs in the hopes that we'll be able to simplify it this way. // (X|C) | V --> (X|V) | C ConstantInt *CI; - if (Op0->hasOneUse() && !match(Op1, m_ConstantInt()) && + if (Op0->hasOneUse() && !match(Op1, m_ConstantInt()) && match(Op0, m_Or(m_Value(A), m_ConstantInt(CI)))) { Value *Inner = Builder.CreateOr(A, Op1); Inner->takeName(Op0); @@ -2823,17 +2823,17 @@ Instruction *InstCombinerImpl::visitOr(BinaryOperator &I) { } } - // or(ashr(subNSW(Y, X), ScalarSizeInBits(Y) - 1), X) --> X s> Y ? -1 : X. + // or(ashr(subNSW(Y, X), ScalarSizeInBits(Y) - 1), X) --> X s> Y ? -1 : X. { Value *X, *Y; Type *Ty = I.getType(); - if (match(&I, m_c_Or(m_OneUse(m_AShr( - m_NSWSub(m_Value(Y), m_Value(X)), - m_SpecificInt(Ty->getScalarSizeInBits() - 1))), - m_Deferred(X)))) { + if (match(&I, m_c_Or(m_OneUse(m_AShr( + m_NSWSub(m_Value(Y), m_Value(X)), + m_SpecificInt(Ty->getScalarSizeInBits() - 1))), + m_Deferred(X)))) { Value *NewICmpInst = Builder.CreateICmpSGT(X, Y); - Value *AllOnes = ConstantInt::getAllOnesValue(Ty); - return SelectInst::Create(NewICmpInst, AllOnes, X); + Value *AllOnes = ConstantInt::getAllOnesValue(Ty); + return SelectInst::Create(NewICmpInst, AllOnes, X); } } @@ -2867,10 +2867,10 @@ Instruction *InstCombinerImpl::visitOr(BinaryOperator &I) { } } - // (~x) | y --> ~(x & (~y)) iff that gets rid of inversions - if (sinkNotIntoOtherHandOfAndOrOr(I)) - return &I; - + // (~x) | y --> ~(x & (~y)) iff that gets rid of inversions + if (sinkNotIntoOtherHandOfAndOrOr(I)) + return &I; + return nullptr; } @@ -2927,8 +2927,8 @@ static Instruction *foldXorToXor(BinaryOperator &I, return nullptr; } -Value *InstCombinerImpl::foldXorOfICmps(ICmpInst *LHS, ICmpInst *RHS, - BinaryOperator &I) { +Value *InstCombinerImpl::foldXorOfICmps(ICmpInst *LHS, ICmpInst *RHS, + BinaryOperator &I) { assert(I.getOpcode() == Instruction::Xor && I.getOperand(0) == LHS && I.getOperand(1) == RHS && "Should be 'xor' with these operands"); @@ -3086,9 +3086,9 @@ static Instruction *sinkNotIntoXor(BinaryOperator &I, return nullptr; // We only want to do the transform if it is free to do. - if (InstCombiner::isFreeToInvert(X, X->hasOneUse())) { + if (InstCombiner::isFreeToInvert(X, X->hasOneUse())) { // Ok, good. - } else if (InstCombiner::isFreeToInvert(Y, Y->hasOneUse())) { + } else if (InstCombiner::isFreeToInvert(Y, Y->hasOneUse())) { std::swap(X, Y); } else return nullptr; @@ -3097,52 +3097,52 @@ static Instruction *sinkNotIntoXor(BinaryOperator &I, return BinaryOperator::CreateXor(NotX, Y, I.getName() + ".demorgan"); } -// Transform -// z = (~x) &/| y -// into: -// z = ~(x |/& (~y)) -// iff y is free to invert and all uses of z can be freely updated. -bool InstCombinerImpl::sinkNotIntoOtherHandOfAndOrOr(BinaryOperator &I) { - Instruction::BinaryOps NewOpc; - switch (I.getOpcode()) { - case Instruction::And: - NewOpc = Instruction::Or; - break; - case Instruction::Or: - NewOpc = Instruction::And; - break; - default: - return false; - }; - - Value *X, *Y; - if (!match(&I, m_c_BinOp(m_Not(m_Value(X)), m_Value(Y)))) - return false; - - // Will we be able to fold the `not` into Y eventually? - if (!InstCombiner::isFreeToInvert(Y, Y->hasOneUse())) - return false; - - // And can our users be adapted? - if (!InstCombiner::canFreelyInvertAllUsersOf(&I, /*IgnoredUser=*/nullptr)) - return false; - - Value *NotY = Builder.CreateNot(Y, Y->getName() + ".not"); - Value *NewBinOp = - BinaryOperator::Create(NewOpc, X, NotY, I.getName() + ".not"); - Builder.Insert(NewBinOp); - replaceInstUsesWith(I, NewBinOp); - // We can not just create an outer `not`, it will most likely be immediately - // folded back, reconstructing our initial pattern, and causing an - // infinite combine loop, so immediately manually fold it away. - freelyInvertAllUsersOf(NewBinOp); - return true; -} - +// Transform +// z = (~x) &/| y +// into: +// z = ~(x |/& (~y)) +// iff y is free to invert and all uses of z can be freely updated. +bool InstCombinerImpl::sinkNotIntoOtherHandOfAndOrOr(BinaryOperator &I) { + Instruction::BinaryOps NewOpc; + switch (I.getOpcode()) { + case Instruction::And: + NewOpc = Instruction::Or; + break; + case Instruction::Or: + NewOpc = Instruction::And; + break; + default: + return false; + }; + + Value *X, *Y; + if (!match(&I, m_c_BinOp(m_Not(m_Value(X)), m_Value(Y)))) + return false; + + // Will we be able to fold the `not` into Y eventually? + if (!InstCombiner::isFreeToInvert(Y, Y->hasOneUse())) + return false; + + // And can our users be adapted? + if (!InstCombiner::canFreelyInvertAllUsersOf(&I, /*IgnoredUser=*/nullptr)) + return false; + + Value *NotY = Builder.CreateNot(Y, Y->getName() + ".not"); + Value *NewBinOp = + BinaryOperator::Create(NewOpc, X, NotY, I.getName() + ".not"); + Builder.Insert(NewBinOp); + replaceInstUsesWith(I, NewBinOp); + // We can not just create an outer `not`, it will most likely be immediately + // folded back, reconstructing our initial pattern, and causing an + // infinite combine loop, so immediately manually fold it away. + freelyInvertAllUsersOf(NewBinOp); + return true; +} + // FIXME: We use commutative matchers (m_c_*) for some, but not all, matches // here. We should standardize that construct where it is needed or choose some // other way to ensure that commutated variants of patterns are not missed. -Instruction *InstCombinerImpl::visitXor(BinaryOperator &I) { +Instruction *InstCombinerImpl::visitXor(BinaryOperator &I) { if (Value *V = SimplifyXorInst(I.getOperand(0), I.getOperand(1), SQ.getWithInstruction(&I))) return replaceInstUsesWith(I, V); @@ -3169,7 +3169,7 @@ Instruction *InstCombinerImpl::visitXor(BinaryOperator &I) { return replaceInstUsesWith(I, V); Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); - Type *Ty = I.getType(); + Type *Ty = I.getType(); // Fold (X & M) ^ (Y & ~M) -> (X & M) | (Y & ~M) // This it a special case in haveNoCommonBitsSet, but the computeKnownBits @@ -3236,7 +3236,7 @@ Instruction *InstCombinerImpl::visitXor(BinaryOperator &I) { match(C, m_Negative())) { // We matched a negative constant, so propagating undef is unsafe. // Clamp undef elements to -1. - Type *EltTy = Ty->getScalarType(); + Type *EltTy = Ty->getScalarType(); C = Constant::replaceUndefsWith(C, ConstantInt::getAllOnesValue(EltTy)); return BinaryOperator::CreateLShr(ConstantExpr::getNot(C), Y); } @@ -3246,25 +3246,25 @@ Instruction *InstCombinerImpl::visitXor(BinaryOperator &I) { match(C, m_NonNegative())) { // We matched a non-negative constant, so propagating undef is unsafe. // Clamp undef elements to 0. - Type *EltTy = Ty->getScalarType(); + Type *EltTy = Ty->getScalarType(); C = Constant::replaceUndefsWith(C, ConstantInt::getNullValue(EltTy)); return BinaryOperator::CreateAShr(ConstantExpr::getNot(C), Y); } - // ~(X + C) --> ~C - X - if (match(NotVal, m_c_Add(m_Value(X), m_ImmConstant(C)))) - return BinaryOperator::CreateSub(ConstantExpr::getNot(C), X); - - // ~(X - Y) --> ~X + Y - // FIXME: is it really beneficial to sink the `not` here? - if (match(NotVal, m_Sub(m_Value(X), m_Value(Y)))) - if (isa<Constant>(X) || NotVal->hasOneUse()) - return BinaryOperator::CreateAdd(Builder.CreateNot(X), Y); - - // ~(~X + Y) --> X - Y - if (match(NotVal, m_c_Add(m_Not(m_Value(X)), m_Value(Y)))) - return BinaryOperator::CreateWithCopiedFlags(Instruction::Sub, X, Y, - NotVal); + // ~(X + C) --> ~C - X + if (match(NotVal, m_c_Add(m_Value(X), m_ImmConstant(C)))) + return BinaryOperator::CreateSub(ConstantExpr::getNot(C), X); + + // ~(X - Y) --> ~X + Y + // FIXME: is it really beneficial to sink the `not` here? + if (match(NotVal, m_Sub(m_Value(X), m_Value(Y)))) + if (isa<Constant>(X) || NotVal->hasOneUse()) + return BinaryOperator::CreateAdd(Builder.CreateNot(X), Y); + + // ~(~X + Y) --> X - Y + if (match(NotVal, m_c_Add(m_Not(m_Value(X)), m_Value(Y)))) + return BinaryOperator::CreateWithCopiedFlags(Instruction::Sub, X, Y, + NotVal); } // Use DeMorgan and reassociation to eliminate a 'not' op. @@ -3295,56 +3295,56 @@ Instruction *InstCombinerImpl::visitXor(BinaryOperator &I) { if (match(Op1, m_APInt(RHSC))) { Value *X; const APInt *C; - // (C - X) ^ signmaskC --> (C + signmaskC) - X - if (RHSC->isSignMask() && match(Op0, m_Sub(m_APInt(C), m_Value(X)))) - return BinaryOperator::CreateSub(ConstantInt::get(Ty, *C + *RHSC), X); - - // (X + C) ^ signmaskC --> X + (C + signmaskC) - if (RHSC->isSignMask() && match(Op0, m_Add(m_Value(X), m_APInt(C)))) - return BinaryOperator::CreateAdd(X, ConstantInt::get(Ty, *C + *RHSC)); - - // (X | C) ^ RHSC --> X ^ (C ^ RHSC) iff X & C == 0 + // (C - X) ^ signmaskC --> (C + signmaskC) - X + if (RHSC->isSignMask() && match(Op0, m_Sub(m_APInt(C), m_Value(X)))) + return BinaryOperator::CreateSub(ConstantInt::get(Ty, *C + *RHSC), X); + + // (X + C) ^ signmaskC --> X + (C + signmaskC) + if (RHSC->isSignMask() && match(Op0, m_Add(m_Value(X), m_APInt(C)))) + return BinaryOperator::CreateAdd(X, ConstantInt::get(Ty, *C + *RHSC)); + + // (X | C) ^ RHSC --> X ^ (C ^ RHSC) iff X & C == 0 if (match(Op0, m_Or(m_Value(X), m_APInt(C))) && - MaskedValueIsZero(X, *C, 0, &I)) - return BinaryOperator::CreateXor(X, ConstantInt::get(Ty, *C ^ *RHSC)); - - // If RHSC is inverting the remaining bits of shifted X, - // canonicalize to a 'not' before the shift to help SCEV and codegen: - // (X << C) ^ RHSC --> ~X << C - if (match(Op0, m_OneUse(m_Shl(m_Value(X), m_APInt(C)))) && - *RHSC == APInt::getAllOnesValue(Ty->getScalarSizeInBits()).shl(*C)) { - Value *NotX = Builder.CreateNot(X); - return BinaryOperator::CreateShl(NotX, ConstantInt::get(Ty, *C)); + MaskedValueIsZero(X, *C, 0, &I)) + return BinaryOperator::CreateXor(X, ConstantInt::get(Ty, *C ^ *RHSC)); + + // If RHSC is inverting the remaining bits of shifted X, + // canonicalize to a 'not' before the shift to help SCEV and codegen: + // (X << C) ^ RHSC --> ~X << C + if (match(Op0, m_OneUse(m_Shl(m_Value(X), m_APInt(C)))) && + *RHSC == APInt::getAllOnesValue(Ty->getScalarSizeInBits()).shl(*C)) { + Value *NotX = Builder.CreateNot(X); + return BinaryOperator::CreateShl(NotX, ConstantInt::get(Ty, *C)); } - // (X >>u C) ^ RHSC --> ~X >>u C - if (match(Op0, m_OneUse(m_LShr(m_Value(X), m_APInt(C)))) && - *RHSC == APInt::getAllOnesValue(Ty->getScalarSizeInBits()).lshr(*C)) { - Value *NotX = Builder.CreateNot(X); - return BinaryOperator::CreateLShr(NotX, ConstantInt::get(Ty, *C)); - } - // TODO: We could handle 'ashr' here as well. That would be matching - // a 'not' op and moving it before the shift. Doing that requires - // preventing the inverse fold in canShiftBinOpWithConstantRHS(). + // (X >>u C) ^ RHSC --> ~X >>u C + if (match(Op0, m_OneUse(m_LShr(m_Value(X), m_APInt(C)))) && + *RHSC == APInt::getAllOnesValue(Ty->getScalarSizeInBits()).lshr(*C)) { + Value *NotX = Builder.CreateNot(X); + return BinaryOperator::CreateLShr(NotX, ConstantInt::get(Ty, *C)); + } + // TODO: We could handle 'ashr' here as well. That would be matching + // a 'not' op and moving it before the shift. Doing that requires + // preventing the inverse fold in canShiftBinOpWithConstantRHS(). } } - // FIXME: This should not be limited to scalar (pull into APInt match above). - { - Value *X; - ConstantInt *C1, *C2, *C3; - // ((X^C1) >> C2) ^ C3 -> (X>>C2) ^ ((C1>>C2)^C3) - if (match(Op1, m_ConstantInt(C3)) && - match(Op0, m_LShr(m_Xor(m_Value(X), m_ConstantInt(C1)), - m_ConstantInt(C2))) && - Op0->hasOneUse()) { - // fold (C1 >> C2) ^ C3 - APInt FoldConst = C1->getValue().lshr(C2->getValue()); - FoldConst ^= C3->getValue(); - // Prepare the two operands. - auto *Opnd0 = cast<Instruction>(Builder.CreateLShr(X, C2)); - Opnd0->takeName(cast<Instruction>(Op0)); - Opnd0->setDebugLoc(I.getDebugLoc()); - return BinaryOperator::CreateXor(Opnd0, ConstantInt::get(Ty, FoldConst)); + // FIXME: This should not be limited to scalar (pull into APInt match above). + { + Value *X; + ConstantInt *C1, *C2, *C3; + // ((X^C1) >> C2) ^ C3 -> (X>>C2) ^ ((C1>>C2)^C3) + if (match(Op1, m_ConstantInt(C3)) && + match(Op0, m_LShr(m_Xor(m_Value(X), m_ConstantInt(C1)), + m_ConstantInt(C2))) && + Op0->hasOneUse()) { + // fold (C1 >> C2) ^ C3 + APInt FoldConst = C1->getValue().lshr(C2->getValue()); + FoldConst ^= C3->getValue(); + // Prepare the two operands. + auto *Opnd0 = cast<Instruction>(Builder.CreateLShr(X, C2)); + Opnd0->takeName(cast<Instruction>(Op0)); + Opnd0->setDebugLoc(I.getDebugLoc()); + return BinaryOperator::CreateXor(Opnd0, ConstantInt::get(Ty, FoldConst)); } } @@ -3401,25 +3401,25 @@ Instruction *InstCombinerImpl::visitXor(BinaryOperator &I) { match(Op1, m_Not(m_Specific(A)))) return BinaryOperator::CreateNot(Builder.CreateAnd(A, B)); - // (~A & B) ^ A --> A | B -- There are 4 commuted variants. - if (match(&I, m_c_Xor(m_c_And(m_Not(m_Value(A)), m_Value(B)), m_Deferred(A)))) - return BinaryOperator::CreateOr(A, B); - - // (A | B) ^ (A | C) --> (B ^ C) & ~A -- There are 4 commuted variants. - // TODO: Loosen one-use restriction if common operand is a constant. - Value *D; - if (match(Op0, m_OneUse(m_Or(m_Value(A), m_Value(B)))) && - match(Op1, m_OneUse(m_Or(m_Value(C), m_Value(D))))) { - if (B == C || B == D) - std::swap(A, B); - if (A == C) - std::swap(C, D); - if (A == D) { - Value *NotA = Builder.CreateNot(A); - return BinaryOperator::CreateAnd(Builder.CreateXor(B, C), NotA); - } - } - + // (~A & B) ^ A --> A | B -- There are 4 commuted variants. + if (match(&I, m_c_Xor(m_c_And(m_Not(m_Value(A)), m_Value(B)), m_Deferred(A)))) + return BinaryOperator::CreateOr(A, B); + + // (A | B) ^ (A | C) --> (B ^ C) & ~A -- There are 4 commuted variants. + // TODO: Loosen one-use restriction if common operand is a constant. + Value *D; + if (match(Op0, m_OneUse(m_Or(m_Value(A), m_Value(B)))) && + match(Op1, m_OneUse(m_Or(m_Value(C), m_Value(D))))) { + if (B == C || B == D) + std::swap(A, B); + if (A == C) + std::swap(C, D); + if (A == D) { + Value *NotA = Builder.CreateNot(A); + return BinaryOperator::CreateAnd(Builder.CreateXor(B, C), NotA); + } + } + if (auto *LHS = dyn_cast<ICmpInst>(I.getOperand(0))) if (auto *RHS = dyn_cast<ICmpInst>(I.getOperand(1))) if (Value *V = foldXorOfICmps(LHS, RHS, I)) @@ -3495,30 +3495,30 @@ Instruction *InstCombinerImpl::visitXor(BinaryOperator &I) { } } - // Pull 'not' into operands of select if both operands are one-use compares - // or one is one-use compare and the other one is a constant. + // Pull 'not' into operands of select if both operands are one-use compares + // or one is one-use compare and the other one is a constant. // Inverting the predicates eliminates the 'not' operation. // Example: - // not (select ?, (cmp TPred, ?, ?), (cmp FPred, ?, ?) --> + // not (select ?, (cmp TPred, ?, ?), (cmp FPred, ?, ?) --> // select ?, (cmp InvTPred, ?, ?), (cmp InvFPred, ?, ?) - // not (select ?, (cmp TPred, ?, ?), true --> - // select ?, (cmp InvTPred, ?, ?), false + // not (select ?, (cmp TPred, ?, ?), true --> + // select ?, (cmp InvTPred, ?, ?), false if (auto *Sel = dyn_cast<SelectInst>(Op0)) { - Value *TV = Sel->getTrueValue(); - Value *FV = Sel->getFalseValue(); - auto *CmpT = dyn_cast<CmpInst>(TV); - auto *CmpF = dyn_cast<CmpInst>(FV); - bool InvertibleT = (CmpT && CmpT->hasOneUse()) || isa<Constant>(TV); - bool InvertibleF = (CmpF && CmpF->hasOneUse()) || isa<Constant>(FV); - if (InvertibleT && InvertibleF) { - if (CmpT) - CmpT->setPredicate(CmpT->getInversePredicate()); - else - Sel->setTrueValue(ConstantExpr::getNot(cast<Constant>(TV))); - if (CmpF) - CmpF->setPredicate(CmpF->getInversePredicate()); - else - Sel->setFalseValue(ConstantExpr::getNot(cast<Constant>(FV))); + Value *TV = Sel->getTrueValue(); + Value *FV = Sel->getFalseValue(); + auto *CmpT = dyn_cast<CmpInst>(TV); + auto *CmpF = dyn_cast<CmpInst>(FV); + bool InvertibleT = (CmpT && CmpT->hasOneUse()) || isa<Constant>(TV); + bool InvertibleF = (CmpF && CmpF->hasOneUse()) || isa<Constant>(FV); + if (InvertibleT && InvertibleF) { + if (CmpT) + CmpT->setPredicate(CmpT->getInversePredicate()); + else + Sel->setTrueValue(ConstantExpr::getNot(cast<Constant>(TV))); + if (CmpF) + CmpF->setPredicate(CmpF->getInversePredicate()); + else + Sel->setFalseValue(ConstantExpr::getNot(cast<Constant>(FV))); return replaceInstUsesWith(I, Sel); } } @@ -3527,15 +3527,15 @@ Instruction *InstCombinerImpl::visitXor(BinaryOperator &I) { if (Instruction *NewXor = sinkNotIntoXor(I, Builder)) return NewXor; - // Otherwise, if all else failed, try to hoist the xor-by-constant: - // (X ^ C) ^ Y --> (X ^ Y) ^ C - // Just like we do in other places, we completely avoid the fold - // for constantexprs, at least to avoid endless combine loop. - if (match(&I, m_c_Xor(m_OneUse(m_Xor(m_CombineAnd(m_Value(X), - m_Unless(m_ConstantExpr())), - m_ImmConstant(C1))), - m_Value(Y)))) - return BinaryOperator::CreateXor(Builder.CreateXor(X, Y), C1); - + // Otherwise, if all else failed, try to hoist the xor-by-constant: + // (X ^ C) ^ Y --> (X ^ Y) ^ C + // Just like we do in other places, we completely avoid the fold + // for constantexprs, at least to avoid endless combine loop. + if (match(&I, m_c_Xor(m_OneUse(m_Xor(m_CombineAnd(m_Value(X), + m_Unless(m_ConstantExpr())), + m_ImmConstant(C1))), + m_Value(Y)))) + return BinaryOperator::CreateXor(Builder.CreateXor(X, Y), C1); + return nullptr; } diff --git a/contrib/libs/llvm12/lib/Transforms/InstCombine/InstCombineAtomicRMW.cpp b/contrib/libs/llvm12/lib/Transforms/InstCombine/InstCombineAtomicRMW.cpp index 495493aab4..494cb04049 100644 --- a/contrib/libs/llvm12/lib/Transforms/InstCombine/InstCombineAtomicRMW.cpp +++ b/contrib/libs/llvm12/lib/Transforms/InstCombine/InstCombineAtomicRMW.cpp @@ -9,10 +9,10 @@ // This file implements the visit functions for atomic rmw instructions. // //===----------------------------------------------------------------------===// - + #include "InstCombineInternal.h" #include "llvm/IR/Instructions.h" -#include "llvm/Transforms/InstCombine/InstCombiner.h" +#include "llvm/Transforms/InstCombine/InstCombiner.h" using namespace llvm; @@ -32,7 +32,7 @@ bool isIdempotentRMW(AtomicRMWInst& RMWI) { default: return false; }; - + auto C = dyn_cast<ConstantInt>(RMWI.getValOperand()); if(!C) return false; @@ -93,13 +93,13 @@ bool isSaturating(AtomicRMWInst& RMWI) { return C->isMaxValue(false); }; } -} // namespace +} // namespace -Instruction *InstCombinerImpl::visitAtomicRMWInst(AtomicRMWInst &RMWI) { +Instruction *InstCombinerImpl::visitAtomicRMWInst(AtomicRMWInst &RMWI) { // Volatile RMWs perform a load and a store, we cannot replace this by just a // load or just a store. We chose not to canonicalize out of general paranoia - // about user expectations around volatile. + // about user expectations around volatile. if (RMWI.isVolatile()) return nullptr; @@ -117,7 +117,7 @@ Instruction *InstCombinerImpl::visitAtomicRMWInst(AtomicRMWInst &RMWI) { "AtomicRMWs don't make sense with Unordered or NotAtomic"); // Any atomicrmw xchg with no uses can be converted to a atomic store if the - // ordering is compatible. + // ordering is compatible. if (RMWI.getOperation() == AtomicRMWInst::Xchg && RMWI.use_empty()) { if (Ordering != AtomicOrdering::Release && @@ -129,14 +129,14 @@ Instruction *InstCombinerImpl::visitAtomicRMWInst(AtomicRMWInst &RMWI) { SI->setAlignment(DL.getABITypeAlign(RMWI.getType())); return eraseInstFromFunction(RMWI); } - + if (!isIdempotentRMW(RMWI)) return nullptr; // We chose to canonicalize all idempotent operations to an single // operation code and constant. This makes it easier for the rest of the // optimizer to match easily. The choices of or w/0 and fadd w/-0.0 are - // arbitrary. + // arbitrary. if (RMWI.getType()->isIntegerTy() && RMWI.getOperation() != AtomicRMWInst::Or) { RMWI.setOperation(AtomicRMWInst::Or); @@ -151,7 +151,7 @@ Instruction *InstCombinerImpl::visitAtomicRMWInst(AtomicRMWInst &RMWI) { if (Ordering != AtomicOrdering::Acquire && Ordering != AtomicOrdering::Monotonic) return nullptr; - + LoadInst *Load = new LoadInst(RMWI.getType(), RMWI.getPointerOperand(), "", false, DL.getABITypeAlign(RMWI.getType()), Ordering, RMWI.getSyncScopeID()); diff --git a/contrib/libs/llvm12/lib/Transforms/InstCombine/InstCombineCalls.cpp b/contrib/libs/llvm12/lib/Transforms/InstCombine/InstCombineCalls.cpp index 5482b944e3..a50dc72c72 100644 --- a/contrib/libs/llvm12/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/contrib/libs/llvm12/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -28,7 +28,7 @@ #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/Loads.h" #include "llvm/Analysis/MemoryBuiltins.h" -#include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/Analysis/VectorUtils.h" #include "llvm/IR/Attributes.h" @@ -66,7 +66,7 @@ #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/InstCombine/InstCombineWorklist.h" -#include "llvm/Transforms/InstCombine/InstCombiner.h" +#include "llvm/Transforms/InstCombine/InstCombiner.h" #include "llvm/Transforms/Utils/Local.h" #include "llvm/Transforms/Utils/SimplifyLibCalls.h" #include <algorithm> @@ -99,7 +99,7 @@ static Type *getPromotedType(Type *Ty) { return Ty; } -Instruction *InstCombinerImpl::SimplifyAnyMemTransfer(AnyMemTransferInst *MI) { +Instruction *InstCombinerImpl::SimplifyAnyMemTransfer(AnyMemTransferInst *MI) { Align DstAlign = getKnownAlignment(MI->getRawDest(), DL, MI, &AC, &DT); MaybeAlign CopyDstAlign = MI->getDestAlign(); if (!CopyDstAlign || *CopyDstAlign < DstAlign) { @@ -214,7 +214,7 @@ Instruction *InstCombinerImpl::SimplifyAnyMemTransfer(AnyMemTransferInst *MI) { return MI; } -Instruction *InstCombinerImpl::SimplifyAnyMemSet(AnyMemSetInst *MI) { +Instruction *InstCombinerImpl::SimplifyAnyMemSet(AnyMemSetInst *MI) { const Align KnownAlignment = getKnownAlignment(MI->getDest(), DL, MI, &AC, &DT); MaybeAlign MemSetAlign = MI->getDestAlign(); @@ -276,7 +276,7 @@ Instruction *InstCombinerImpl::SimplifyAnyMemSet(AnyMemSetInst *MI) { // TODO, Obvious Missing Transforms: // * Narrow width by halfs excluding zero/undef lanes -Value *InstCombinerImpl::simplifyMaskedLoad(IntrinsicInst &II) { +Value *InstCombinerImpl::simplifyMaskedLoad(IntrinsicInst &II) { Value *LoadPtr = II.getArgOperand(0); const Align Alignment = cast<ConstantInt>(II.getArgOperand(1))->getAlignValue(); @@ -289,8 +289,8 @@ Value *InstCombinerImpl::simplifyMaskedLoad(IntrinsicInst &II) { // If we can unconditionally load from this address, replace with a // load/select idiom. TODO: use DT for context sensitive query - if (isDereferenceablePointer(LoadPtr, II.getType(), - II.getModule()->getDataLayout(), &II, nullptr)) { + if (isDereferenceablePointer(LoadPtr, II.getType(), + II.getModule()->getDataLayout(), &II, nullptr)) { Value *LI = Builder.CreateAlignedLoad(II.getType(), LoadPtr, Alignment, "unmaskedload"); return Builder.CreateSelect(II.getArgOperand(2), LI, II.getArgOperand(3)); @@ -302,7 +302,7 @@ Value *InstCombinerImpl::simplifyMaskedLoad(IntrinsicInst &II) { // TODO, Obvious Missing Transforms: // * Single constant active lane -> store // * Narrow width by halfs excluding zero/undef lanes -Instruction *InstCombinerImpl::simplifyMaskedStore(IntrinsicInst &II) { +Instruction *InstCombinerImpl::simplifyMaskedStore(IntrinsicInst &II) { auto *ConstMask = dyn_cast<Constant>(II.getArgOperand(3)); if (!ConstMask) return nullptr; @@ -318,14 +318,14 @@ Instruction *InstCombinerImpl::simplifyMaskedStore(IntrinsicInst &II) { return new StoreInst(II.getArgOperand(0), StorePtr, false, Alignment); } - if (isa<ScalableVectorType>(ConstMask->getType())) - return nullptr; - + if (isa<ScalableVectorType>(ConstMask->getType())) + return nullptr; + // Use masked off lanes to simplify operands via SimplifyDemandedVectorElts APInt DemandedElts = possiblyDemandedEltsInMask(ConstMask); APInt UndefElts(DemandedElts.getBitWidth(), 0); - if (Value *V = - SimplifyDemandedVectorElts(II.getOperand(0), DemandedElts, UndefElts)) + if (Value *V = + SimplifyDemandedVectorElts(II.getOperand(0), DemandedElts, UndefElts)) return replaceOperand(II, 0, V); return nullptr; @@ -338,7 +338,7 @@ Instruction *InstCombinerImpl::simplifyMaskedStore(IntrinsicInst &II) { // * Narrow width by halfs excluding zero/undef lanes // * Vector splat address w/known mask -> scalar load // * Vector incrementing address -> vector masked load -Instruction *InstCombinerImpl::simplifyMaskedGather(IntrinsicInst &II) { +Instruction *InstCombinerImpl::simplifyMaskedGather(IntrinsicInst &II) { return nullptr; } @@ -348,7 +348,7 @@ Instruction *InstCombinerImpl::simplifyMaskedGather(IntrinsicInst &II) { // * Narrow store width by halfs excluding zero/undef lanes // * Vector splat address w/known mask -> scalar store // * Vector incrementing address -> vector masked store -Instruction *InstCombinerImpl::simplifyMaskedScatter(IntrinsicInst &II) { +Instruction *InstCombinerImpl::simplifyMaskedScatter(IntrinsicInst &II) { auto *ConstMask = dyn_cast<Constant>(II.getArgOperand(3)); if (!ConstMask) return nullptr; @@ -357,17 +357,17 @@ Instruction *InstCombinerImpl::simplifyMaskedScatter(IntrinsicInst &II) { if (ConstMask->isNullValue()) return eraseInstFromFunction(II); - if (isa<ScalableVectorType>(ConstMask->getType())) - return nullptr; - + if (isa<ScalableVectorType>(ConstMask->getType())) + return nullptr; + // Use masked off lanes to simplify operands via SimplifyDemandedVectorElts APInt DemandedElts = possiblyDemandedEltsInMask(ConstMask); APInt UndefElts(DemandedElts.getBitWidth(), 0); - if (Value *V = - SimplifyDemandedVectorElts(II.getOperand(0), DemandedElts, UndefElts)) + if (Value *V = + SimplifyDemandedVectorElts(II.getOperand(0), DemandedElts, UndefElts)) return replaceOperand(II, 0, V); - if (Value *V = - SimplifyDemandedVectorElts(II.getOperand(1), DemandedElts, UndefElts)) + if (Value *V = + SimplifyDemandedVectorElts(II.getOperand(1), DemandedElts, UndefElts)) return replaceOperand(II, 1, V); return nullptr; @@ -382,7 +382,7 @@ Instruction *InstCombinerImpl::simplifyMaskedScatter(IntrinsicInst &II) { /// This is legal because it preserves the most recent information about /// the presence or absence of invariant.group. static Instruction *simplifyInvariantGroupIntrinsic(IntrinsicInst &II, - InstCombinerImpl &IC) { + InstCombinerImpl &IC) { auto *Arg = II.getArgOperand(0); auto *StrippedArg = Arg->stripPointerCasts(); auto *StrippedInvariantGroupsArg = Arg->stripPointerCastsAndInvariantGroups(); @@ -407,7 +407,7 @@ static Instruction *simplifyInvariantGroupIntrinsic(IntrinsicInst &II, return cast<Instruction>(Result); } -static Instruction *foldCttzCtlz(IntrinsicInst &II, InstCombinerImpl &IC) { +static Instruction *foldCttzCtlz(IntrinsicInst &II, InstCombinerImpl &IC) { assert((II.getIntrinsicID() == Intrinsic::cttz || II.getIntrinsicID() == Intrinsic::ctlz) && "Expected cttz or ctlz intrinsic"); @@ -433,9 +433,9 @@ static Instruction *foldCttzCtlz(IntrinsicInst &II, InstCombinerImpl &IC) { SelectPatternFlavor SPF = matchSelectPattern(Op0, X, Y).Flavor; if (SPF == SPF_ABS || SPF == SPF_NABS) return IC.replaceOperand(II, 0, X); - - if (match(Op0, m_Intrinsic<Intrinsic::abs>(m_Value(X)))) - return IC.replaceOperand(II, 0, X); + + if (match(Op0, m_Intrinsic<Intrinsic::abs>(m_Value(X)))) + return IC.replaceOperand(II, 0, X); } KnownBits Known = IC.computeKnownBits(Op0, 0, &II); @@ -480,7 +480,7 @@ static Instruction *foldCttzCtlz(IntrinsicInst &II, InstCombinerImpl &IC) { return nullptr; } -static Instruction *foldCtpop(IntrinsicInst &II, InstCombinerImpl &IC) { +static Instruction *foldCtpop(IntrinsicInst &II, InstCombinerImpl &IC) { assert(II.getIntrinsicID() == Intrinsic::ctpop && "Expected ctpop intrinsic"); Type *Ty = II.getType(); @@ -546,7 +546,7 @@ static Value *simplifyNeonTbl1(const IntrinsicInst &II, if (!C) return nullptr; - auto *VecTy = cast<FixedVectorType>(II.getType()); + auto *VecTy = cast<FixedVectorType>(II.getType()); unsigned NumElts = VecTy->getNumElements(); // Only perform this transformation for <8 x i8> vector types. @@ -594,9 +594,9 @@ static bool haveSameOperands(const IntrinsicInst &I, const IntrinsicInst &E, // call @llvm.foo.start(i1 0) ; This one won't be skipped: it will be removed // call @llvm.foo.end(i1 0) // call @llvm.foo.end(i1 0) ; &I -static bool -removeTriviallyEmptyRange(IntrinsicInst &EndI, InstCombinerImpl &IC, - std::function<bool(const IntrinsicInst &)> IsStart) { +static bool +removeTriviallyEmptyRange(IntrinsicInst &EndI, InstCombinerImpl &IC, + std::function<bool(const IntrinsicInst &)> IsStart) { // We start from the end intrinsic and scan backwards, so that InstCombine // has already processed (and potentially removed) all the instructions // before the end intrinsic. @@ -622,7 +622,7 @@ removeTriviallyEmptyRange(IntrinsicInst &EndI, InstCombinerImpl &IC, return false; } -Instruction *InstCombinerImpl::visitVAEndInst(VAEndInst &I) { +Instruction *InstCombinerImpl::visitVAEndInst(VAEndInst &I) { removeTriviallyEmptyRange(I, *this, [](const IntrinsicInst &I) { return I.getIntrinsicID() == Intrinsic::vastart || I.getIntrinsicID() == Intrinsic::vacopy; @@ -630,7 +630,7 @@ Instruction *InstCombinerImpl::visitVAEndInst(VAEndInst &I) { return nullptr; } -static CallInst *canonicalizeConstantArg0ToArg1(CallInst &Call) { +static CallInst *canonicalizeConstantArg0ToArg1(CallInst &Call) { assert(Call.getNumArgOperands() > 1 && "Need at least 2 args to swap"); Value *Arg0 = Call.getArgOperand(0), *Arg1 = Call.getArgOperand(1); if (isa<Constant>(Arg0) && !isa<Constant>(Arg1)) { @@ -641,44 +641,44 @@ static CallInst *canonicalizeConstantArg0ToArg1(CallInst &Call) { return nullptr; } -/// Creates a result tuple for an overflow intrinsic \p II with a given -/// \p Result and a constant \p Overflow value. -static Instruction *createOverflowTuple(IntrinsicInst *II, Value *Result, - Constant *Overflow) { - Constant *V[] = {UndefValue::get(Result->getType()), Overflow}; - StructType *ST = cast<StructType>(II->getType()); - Constant *Struct = ConstantStruct::get(ST, V); - return InsertValueInst::Create(Struct, Result, 0); -} - -Instruction * -InstCombinerImpl::foldIntrinsicWithOverflowCommon(IntrinsicInst *II) { +/// Creates a result tuple for an overflow intrinsic \p II with a given +/// \p Result and a constant \p Overflow value. +static Instruction *createOverflowTuple(IntrinsicInst *II, Value *Result, + Constant *Overflow) { + Constant *V[] = {UndefValue::get(Result->getType()), Overflow}; + StructType *ST = cast<StructType>(II->getType()); + Constant *Struct = ConstantStruct::get(ST, V); + return InsertValueInst::Create(Struct, Result, 0); +} + +Instruction * +InstCombinerImpl::foldIntrinsicWithOverflowCommon(IntrinsicInst *II) { WithOverflowInst *WO = cast<WithOverflowInst>(II); Value *OperationResult = nullptr; Constant *OverflowResult = nullptr; if (OptimizeOverflowCheck(WO->getBinaryOp(), WO->isSigned(), WO->getLHS(), WO->getRHS(), *WO, OperationResult, OverflowResult)) - return createOverflowTuple(WO, OperationResult, OverflowResult); + return createOverflowTuple(WO, OperationResult, OverflowResult); return nullptr; } -static Optional<bool> getKnownSign(Value *Op, Instruction *CxtI, - const DataLayout &DL, AssumptionCache *AC, - DominatorTree *DT) { - KnownBits Known = computeKnownBits(Op, DL, 0, AC, CxtI, DT); - if (Known.isNonNegative()) - return false; - if (Known.isNegative()) - return true; - - return isImpliedByDomCondition( - ICmpInst::ICMP_SLT, Op, Constant::getNullValue(Op->getType()), CxtI, DL); -} - +static Optional<bool> getKnownSign(Value *Op, Instruction *CxtI, + const DataLayout &DL, AssumptionCache *AC, + DominatorTree *DT) { + KnownBits Known = computeKnownBits(Op, DL, 0, AC, CxtI, DT); + if (Known.isNonNegative()) + return false; + if (Known.isNegative()) + return true; + + return isImpliedByDomCondition( + ICmpInst::ICMP_SLT, Op, Constant::getNullValue(Op->getType()), CxtI, DL); +} + /// CallInst simplification. This mostly only handles folding of intrinsic /// instructions. For normal calls, it allows visitCallBase to do the heavy /// lifting. -Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) { +Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) { // Don't try to simplify calls without uses. It will not do anything useful, // but will result in the following folds being skipped. if (!CI.use_empty()) @@ -784,10 +784,10 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) { } } - if (II->isCommutative()) { - if (CallInst *NewCall = canonicalizeConstantArg0ToArg1(CI)) - return NewCall; - } + if (II->isCommutative()) { + if (CallInst *NewCall = canonicalizeConstantArg0ToArg1(CI)) + return NewCall; + } Intrinsic::ID IID = II->getIntrinsicID(); switch (IID) { @@ -795,73 +795,73 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) { if (Value *V = lowerObjectSizeCall(II, DL, &TLI, /*MustSucceed=*/false)) return replaceInstUsesWith(CI, V); return nullptr; - case Intrinsic::abs: { - Value *IIOperand = II->getArgOperand(0); - bool IntMinIsPoison = cast<Constant>(II->getArgOperand(1))->isOneValue(); - - // abs(-x) -> abs(x) - // TODO: Copy nsw if it was present on the neg? - Value *X; - if (match(IIOperand, m_Neg(m_Value(X)))) - return replaceOperand(*II, 0, X); - if (match(IIOperand, m_Select(m_Value(), m_Value(X), m_Neg(m_Deferred(X))))) - return replaceOperand(*II, 0, X); - if (match(IIOperand, m_Select(m_Value(), m_Neg(m_Value(X)), m_Deferred(X)))) - return replaceOperand(*II, 0, X); - - if (Optional<bool> Sign = getKnownSign(IIOperand, II, DL, &AC, &DT)) { - // abs(x) -> x if x >= 0 - if (!*Sign) - return replaceInstUsesWith(*II, IIOperand); - - // abs(x) -> -x if x < 0 - if (IntMinIsPoison) - return BinaryOperator::CreateNSWNeg(IIOperand); - return BinaryOperator::CreateNeg(IIOperand); - } - - // abs (sext X) --> zext (abs X*) - // Clear the IsIntMin (nsw) bit on the abs to allow narrowing. - if (match(IIOperand, m_OneUse(m_SExt(m_Value(X))))) { - Value *NarrowAbs = - Builder.CreateBinaryIntrinsic(Intrinsic::abs, X, Builder.getFalse()); - return CastInst::Create(Instruction::ZExt, NarrowAbs, II->getType()); - } - - break; - } - case Intrinsic::umax: - case Intrinsic::umin: { - Value *I0 = II->getArgOperand(0), *I1 = II->getArgOperand(1); - Value *X, *Y; - if (match(I0, m_ZExt(m_Value(X))) && match(I1, m_ZExt(m_Value(Y))) && - (I0->hasOneUse() || I1->hasOneUse()) && X->getType() == Y->getType()) { - Value *NarrowMaxMin = Builder.CreateBinaryIntrinsic(IID, X, Y); - return CastInst::Create(Instruction::ZExt, NarrowMaxMin, II->getType()); - } - // If both operands of unsigned min/max are sign-extended, it is still ok - // to narrow the operation. - LLVM_FALLTHROUGH; - } - case Intrinsic::smax: - case Intrinsic::smin: { - Value *I0 = II->getArgOperand(0), *I1 = II->getArgOperand(1); - Value *X, *Y; - if (match(I0, m_SExt(m_Value(X))) && match(I1, m_SExt(m_Value(Y))) && - (I0->hasOneUse() || I1->hasOneUse()) && X->getType() == Y->getType()) { - Value *NarrowMaxMin = Builder.CreateBinaryIntrinsic(IID, X, Y); - return CastInst::Create(Instruction::SExt, NarrowMaxMin, II->getType()); - } - break; - } + case Intrinsic::abs: { + Value *IIOperand = II->getArgOperand(0); + bool IntMinIsPoison = cast<Constant>(II->getArgOperand(1))->isOneValue(); + + // abs(-x) -> abs(x) + // TODO: Copy nsw if it was present on the neg? + Value *X; + if (match(IIOperand, m_Neg(m_Value(X)))) + return replaceOperand(*II, 0, X); + if (match(IIOperand, m_Select(m_Value(), m_Value(X), m_Neg(m_Deferred(X))))) + return replaceOperand(*II, 0, X); + if (match(IIOperand, m_Select(m_Value(), m_Neg(m_Value(X)), m_Deferred(X)))) + return replaceOperand(*II, 0, X); + + if (Optional<bool> Sign = getKnownSign(IIOperand, II, DL, &AC, &DT)) { + // abs(x) -> x if x >= 0 + if (!*Sign) + return replaceInstUsesWith(*II, IIOperand); + + // abs(x) -> -x if x < 0 + if (IntMinIsPoison) + return BinaryOperator::CreateNSWNeg(IIOperand); + return BinaryOperator::CreateNeg(IIOperand); + } + + // abs (sext X) --> zext (abs X*) + // Clear the IsIntMin (nsw) bit on the abs to allow narrowing. + if (match(IIOperand, m_OneUse(m_SExt(m_Value(X))))) { + Value *NarrowAbs = + Builder.CreateBinaryIntrinsic(Intrinsic::abs, X, Builder.getFalse()); + return CastInst::Create(Instruction::ZExt, NarrowAbs, II->getType()); + } + + break; + } + case Intrinsic::umax: + case Intrinsic::umin: { + Value *I0 = II->getArgOperand(0), *I1 = II->getArgOperand(1); + Value *X, *Y; + if (match(I0, m_ZExt(m_Value(X))) && match(I1, m_ZExt(m_Value(Y))) && + (I0->hasOneUse() || I1->hasOneUse()) && X->getType() == Y->getType()) { + Value *NarrowMaxMin = Builder.CreateBinaryIntrinsic(IID, X, Y); + return CastInst::Create(Instruction::ZExt, NarrowMaxMin, II->getType()); + } + // If both operands of unsigned min/max are sign-extended, it is still ok + // to narrow the operation. + LLVM_FALLTHROUGH; + } + case Intrinsic::smax: + case Intrinsic::smin: { + Value *I0 = II->getArgOperand(0), *I1 = II->getArgOperand(1); + Value *X, *Y; + if (match(I0, m_SExt(m_Value(X))) && match(I1, m_SExt(m_Value(Y))) && + (I0->hasOneUse() || I1->hasOneUse()) && X->getType() == Y->getType()) { + Value *NarrowMaxMin = Builder.CreateBinaryIntrinsic(IID, X, Y); + return CastInst::Create(Instruction::SExt, NarrowMaxMin, II->getType()); + } + break; + } case Intrinsic::bswap: { Value *IIOperand = II->getArgOperand(0); Value *X = nullptr; // bswap(trunc(bswap(x))) -> trunc(lshr(x, c)) if (match(IIOperand, m_Trunc(m_BSwap(m_Value(X))))) { - unsigned C = X->getType()->getScalarSizeInBits() - - IIOperand->getType()->getScalarSizeInBits(); + unsigned C = X->getType()->getScalarSizeInBits() - + IIOperand->getType()->getScalarSizeInBits(); Value *CV = ConstantInt::get(X->getType(), C); Value *V = Builder.CreateLShr(X, CV); return new TruncInst(V, IIOperand->getType()); @@ -888,12 +888,12 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) { // 0 and 1 are handled in instsimplify // powi(x, -1) -> 1/x if (Power->isMinusOne()) - return BinaryOperator::CreateFDivFMF(ConstantFP::get(CI.getType(), 1.0), - II->getArgOperand(0), II); + return BinaryOperator::CreateFDivFMF(ConstantFP::get(CI.getType(), 1.0), + II->getArgOperand(0), II); // powi(x, 2) -> x*x if (Power->equalsInt(2)) - return BinaryOperator::CreateFMulFMF(II->getArgOperand(0), - II->getArgOperand(0), II); + return BinaryOperator::CreateFMulFMF(II->getArgOperand(0), + II->getArgOperand(0), II); } break; @@ -914,8 +914,8 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) { Type *Ty = II->getType(); unsigned BitWidth = Ty->getScalarSizeInBits(); Constant *ShAmtC; - if (match(II->getArgOperand(2), m_ImmConstant(ShAmtC)) && - !ShAmtC->containsConstantExpression()) { + if (match(II->getArgOperand(2), m_ImmConstant(ShAmtC)) && + !ShAmtC->containsConstantExpression()) { // Canonicalize a shift amount constant operand to modulo the bit-width. Constant *WidthC = ConstantInt::get(Ty, BitWidth); Constant *ModuloC = ConstantExpr::getURem(ShAmtC, WidthC); @@ -1259,52 +1259,52 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) { break; } case Intrinsic::copysign: { - Value *Mag = II->getArgOperand(0), *Sign = II->getArgOperand(1); - if (SignBitMustBeZero(Sign, &TLI)) { + Value *Mag = II->getArgOperand(0), *Sign = II->getArgOperand(1); + if (SignBitMustBeZero(Sign, &TLI)) { // If we know that the sign argument is positive, reduce to FABS: - // copysign Mag, +Sign --> fabs Mag - Value *Fabs = Builder.CreateUnaryIntrinsic(Intrinsic::fabs, Mag, II); + // copysign Mag, +Sign --> fabs Mag + Value *Fabs = Builder.CreateUnaryIntrinsic(Intrinsic::fabs, Mag, II); return replaceInstUsesWith(*II, Fabs); } // TODO: There should be a ValueTracking sibling like SignBitMustBeOne. const APFloat *C; - if (match(Sign, m_APFloat(C)) && C->isNegative()) { + if (match(Sign, m_APFloat(C)) && C->isNegative()) { // If we know that the sign argument is negative, reduce to FNABS: - // copysign Mag, -Sign --> fneg (fabs Mag) - Value *Fabs = Builder.CreateUnaryIntrinsic(Intrinsic::fabs, Mag, II); + // copysign Mag, -Sign --> fneg (fabs Mag) + Value *Fabs = Builder.CreateUnaryIntrinsic(Intrinsic::fabs, Mag, II); return replaceInstUsesWith(*II, Builder.CreateFNegFMF(Fabs, II)); } // Propagate sign argument through nested calls: - // copysign Mag, (copysign ?, X) --> copysign Mag, X - Value *X; - if (match(Sign, m_Intrinsic<Intrinsic::copysign>(m_Value(), m_Value(X)))) - return replaceOperand(*II, 1, X); - - // Peek through changes of magnitude's sign-bit. This call rewrites those: - // copysign (fabs X), Sign --> copysign X, Sign - // copysign (fneg X), Sign --> copysign X, Sign - if (match(Mag, m_FAbs(m_Value(X))) || match(Mag, m_FNeg(m_Value(X)))) - return replaceOperand(*II, 0, X); - + // copysign Mag, (copysign ?, X) --> copysign Mag, X + Value *X; + if (match(Sign, m_Intrinsic<Intrinsic::copysign>(m_Value(), m_Value(X)))) + return replaceOperand(*II, 1, X); + + // Peek through changes of magnitude's sign-bit. This call rewrites those: + // copysign (fabs X), Sign --> copysign X, Sign + // copysign (fneg X), Sign --> copysign X, Sign + if (match(Mag, m_FAbs(m_Value(X))) || match(Mag, m_FNeg(m_Value(X)))) + return replaceOperand(*II, 0, X); + break; } case Intrinsic::fabs: { - Value *Cond, *TVal, *FVal; + Value *Cond, *TVal, *FVal; if (match(II->getArgOperand(0), - m_Select(m_Value(Cond), m_Value(TVal), m_Value(FVal)))) { - // fabs (select Cond, TrueC, FalseC) --> select Cond, AbsT, AbsF - if (isa<Constant>(TVal) && isa<Constant>(FVal)) { - CallInst *AbsT = Builder.CreateCall(II->getCalledFunction(), {TVal}); - CallInst *AbsF = Builder.CreateCall(II->getCalledFunction(), {FVal}); - return SelectInst::Create(Cond, AbsT, AbsF); - } - // fabs (select Cond, -FVal, FVal) --> fabs FVal - if (match(TVal, m_FNeg(m_Specific(FVal)))) - return replaceOperand(*II, 0, FVal); - // fabs (select Cond, TVal, -TVal) --> fabs TVal - if (match(FVal, m_FNeg(m_Specific(TVal)))) - return replaceOperand(*II, 0, TVal); + m_Select(m_Value(Cond), m_Value(TVal), m_Value(FVal)))) { + // fabs (select Cond, TrueC, FalseC) --> select Cond, AbsT, AbsF + if (isa<Constant>(TVal) && isa<Constant>(FVal)) { + CallInst *AbsT = Builder.CreateCall(II->getCalledFunction(), {TVal}); + CallInst *AbsF = Builder.CreateCall(II->getCalledFunction(), {FVal}); + return SelectInst::Create(Cond, AbsT, AbsF); + } + // fabs (select Cond, -FVal, FVal) --> fabs FVal + if (match(TVal, m_FNeg(m_Specific(FVal)))) + return replaceOperand(*II, 0, FVal); + // fabs (select Cond, TVal, -TVal) --> fabs TVal + if (match(FVal, m_FNeg(m_Specific(TVal)))) + return replaceOperand(*II, 0, TVal); } LLVM_FALLTHROUGH; @@ -1491,16 +1491,16 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) { break; case Intrinsic::assume: { Value *IIOperand = II->getArgOperand(0); - SmallVector<OperandBundleDef, 4> OpBundles; - II->getOperandBundlesAsDefs(OpBundles); - bool HasOpBundles = !OpBundles.empty(); + SmallVector<OperandBundleDef, 4> OpBundles; + II->getOperandBundlesAsDefs(OpBundles); + bool HasOpBundles = !OpBundles.empty(); // Remove an assume if it is followed by an identical assume. // TODO: Do we need this? Unless there are conflicting assumptions, the // computeKnownBits(IIOperand) below here eliminates redundant assumes. Instruction *Next = II->getNextNonDebugInstruction(); - if (HasOpBundles && - match(Next, m_Intrinsic<Intrinsic::assume>(m_Specific(IIOperand))) && - !cast<IntrinsicInst>(Next)->hasOperandBundles()) + if (HasOpBundles && + match(Next, m_Intrinsic<Intrinsic::assume>(m_Specific(IIOperand))) && + !cast<IntrinsicInst>(Next)->hasOperandBundles()) return eraseInstFromFunction(CI); // Canonicalize assume(a && b) -> assume(a); assume(b); @@ -1509,16 +1509,16 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) { FunctionType *AssumeIntrinsicTy = II->getFunctionType(); Value *AssumeIntrinsic = II->getCalledOperand(); Value *A, *B; - if (match(IIOperand, m_LogicalAnd(m_Value(A), m_Value(B)))) { - Builder.CreateCall(AssumeIntrinsicTy, AssumeIntrinsic, A, OpBundles, - II->getName()); + if (match(IIOperand, m_LogicalAnd(m_Value(A), m_Value(B)))) { + Builder.CreateCall(AssumeIntrinsicTy, AssumeIntrinsic, A, OpBundles, + II->getName()); Builder.CreateCall(AssumeIntrinsicTy, AssumeIntrinsic, B, II->getName()); return eraseInstFromFunction(*II); } // assume(!(a || b)) -> assume(!a); assume(!b); - if (match(IIOperand, m_Not(m_LogicalOr(m_Value(A), m_Value(B))))) { + if (match(IIOperand, m_Not(m_LogicalOr(m_Value(A), m_Value(B))))) { Builder.CreateCall(AssumeIntrinsicTy, AssumeIntrinsic, - Builder.CreateNot(A), OpBundles, II->getName()); + Builder.CreateNot(A), OpBundles, II->getName()); Builder.CreateCall(AssumeIntrinsicTy, AssumeIntrinsic, Builder.CreateNot(B), II->getName()); return eraseInstFromFunction(*II); @@ -1534,8 +1534,8 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) { isValidAssumeForContext(II, LHS, &DT)) { MDNode *MD = MDNode::get(II->getContext(), None); LHS->setMetadata(LLVMContext::MD_nonnull, MD); - if (!HasOpBundles) - return eraseInstFromFunction(*II); + if (!HasOpBundles) + return eraseInstFromFunction(*II); // TODO: apply nonnull return attributes to calls and invokes // TODO: apply range metadata for range check patterns? @@ -1553,102 +1553,102 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) { AC.updateAffectedValues(II); break; } - case Intrinsic::experimental_gc_statepoint: { - GCStatepointInst &GCSP = *cast<GCStatepointInst>(II); - SmallPtrSet<Value *, 32> LiveGcValues; - for (const GCRelocateInst *Reloc : GCSP.getGCRelocates()) { - GCRelocateInst &GCR = *const_cast<GCRelocateInst *>(Reloc); - - // Remove the relocation if unused. - if (GCR.use_empty()) { - eraseInstFromFunction(GCR); - continue; + case Intrinsic::experimental_gc_statepoint: { + GCStatepointInst &GCSP = *cast<GCStatepointInst>(II); + SmallPtrSet<Value *, 32> LiveGcValues; + for (const GCRelocateInst *Reloc : GCSP.getGCRelocates()) { + GCRelocateInst &GCR = *const_cast<GCRelocateInst *>(Reloc); + + // Remove the relocation if unused. + if (GCR.use_empty()) { + eraseInstFromFunction(GCR); + continue; + } + + Value *DerivedPtr = GCR.getDerivedPtr(); + Value *BasePtr = GCR.getBasePtr(); + + // Undef is undef, even after relocation. + if (isa<UndefValue>(DerivedPtr) || isa<UndefValue>(BasePtr)) { + replaceInstUsesWith(GCR, UndefValue::get(GCR.getType())); + eraseInstFromFunction(GCR); + continue; + } + + if (auto *PT = dyn_cast<PointerType>(GCR.getType())) { + // The relocation of null will be null for most any collector. + // TODO: provide a hook for this in GCStrategy. There might be some + // weird collector this property does not hold for. + if (isa<ConstantPointerNull>(DerivedPtr)) { + // Use null-pointer of gc_relocate's type to replace it. + replaceInstUsesWith(GCR, ConstantPointerNull::get(PT)); + eraseInstFromFunction(GCR); + continue; + } + + // isKnownNonNull -> nonnull attribute + if (!GCR.hasRetAttr(Attribute::NonNull) && + isKnownNonZero(DerivedPtr, DL, 0, &AC, II, &DT)) { + GCR.addAttribute(AttributeList::ReturnIndex, Attribute::NonNull); + // We discovered new fact, re-check users. + Worklist.pushUsersToWorkList(GCR); + } + } + + // If we have two copies of the same pointer in the statepoint argument + // list, canonicalize to one. This may let us common gc.relocates. + if (GCR.getBasePtr() == GCR.getDerivedPtr() && + GCR.getBasePtrIndex() != GCR.getDerivedPtrIndex()) { + auto *OpIntTy = GCR.getOperand(2)->getType(); + GCR.setOperand(2, ConstantInt::get(OpIntTy, GCR.getBasePtrIndex())); } - Value *DerivedPtr = GCR.getDerivedPtr(); - Value *BasePtr = GCR.getBasePtr(); - - // Undef is undef, even after relocation. - if (isa<UndefValue>(DerivedPtr) || isa<UndefValue>(BasePtr)) { - replaceInstUsesWith(GCR, UndefValue::get(GCR.getType())); - eraseInstFromFunction(GCR); - continue; - } - - if (auto *PT = dyn_cast<PointerType>(GCR.getType())) { - // The relocation of null will be null for most any collector. - // TODO: provide a hook for this in GCStrategy. There might be some - // weird collector this property does not hold for. - if (isa<ConstantPointerNull>(DerivedPtr)) { - // Use null-pointer of gc_relocate's type to replace it. - replaceInstUsesWith(GCR, ConstantPointerNull::get(PT)); - eraseInstFromFunction(GCR); - continue; - } - - // isKnownNonNull -> nonnull attribute - if (!GCR.hasRetAttr(Attribute::NonNull) && - isKnownNonZero(DerivedPtr, DL, 0, &AC, II, &DT)) { - GCR.addAttribute(AttributeList::ReturnIndex, Attribute::NonNull); - // We discovered new fact, re-check users. - Worklist.pushUsersToWorkList(GCR); - } - } - - // If we have two copies of the same pointer in the statepoint argument - // list, canonicalize to one. This may let us common gc.relocates. - if (GCR.getBasePtr() == GCR.getDerivedPtr() && - GCR.getBasePtrIndex() != GCR.getDerivedPtrIndex()) { - auto *OpIntTy = GCR.getOperand(2)->getType(); - GCR.setOperand(2, ConstantInt::get(OpIntTy, GCR.getBasePtrIndex())); - } - - // TODO: bitcast(relocate(p)) -> relocate(bitcast(p)) - // Canonicalize on the type from the uses to the defs - - // TODO: relocate((gep p, C, C2, ...)) -> gep(relocate(p), C, C2, ...) - LiveGcValues.insert(BasePtr); - LiveGcValues.insert(DerivedPtr); - } - Optional<OperandBundleUse> Bundle = - GCSP.getOperandBundle(LLVMContext::OB_gc_live); - unsigned NumOfGCLives = LiveGcValues.size(); - if (!Bundle.hasValue() || NumOfGCLives == Bundle->Inputs.size()) - break; - // We can reduce the size of gc live bundle. - DenseMap<Value *, unsigned> Val2Idx; - std::vector<Value *> NewLiveGc; - for (unsigned I = 0, E = Bundle->Inputs.size(); I < E; ++I) { - Value *V = Bundle->Inputs[I]; - if (Val2Idx.count(V)) - continue; - if (LiveGcValues.count(V)) { - Val2Idx[V] = NewLiveGc.size(); - NewLiveGc.push_back(V); - } else - Val2Idx[V] = NumOfGCLives; - } - // Update all gc.relocates - for (const GCRelocateInst *Reloc : GCSP.getGCRelocates()) { - GCRelocateInst &GCR = *const_cast<GCRelocateInst *>(Reloc); - Value *BasePtr = GCR.getBasePtr(); - assert(Val2Idx.count(BasePtr) && Val2Idx[BasePtr] != NumOfGCLives && - "Missed live gc for base pointer"); - auto *OpIntTy1 = GCR.getOperand(1)->getType(); - GCR.setOperand(1, ConstantInt::get(OpIntTy1, Val2Idx[BasePtr])); - Value *DerivedPtr = GCR.getDerivedPtr(); - assert(Val2Idx.count(DerivedPtr) && Val2Idx[DerivedPtr] != NumOfGCLives && - "Missed live gc for derived pointer"); - auto *OpIntTy2 = GCR.getOperand(2)->getType(); - GCR.setOperand(2, ConstantInt::get(OpIntTy2, Val2Idx[DerivedPtr])); - } - // Create new statepoint instruction. - OperandBundleDef NewBundle("gc-live", NewLiveGc); - if (isa<CallInst>(II)) - return CallInst::CreateWithReplacedBundle(cast<CallInst>(II), NewBundle); - else - return InvokeInst::CreateWithReplacedBundle(cast<InvokeInst>(II), - NewBundle); + // TODO: bitcast(relocate(p)) -> relocate(bitcast(p)) + // Canonicalize on the type from the uses to the defs + + // TODO: relocate((gep p, C, C2, ...)) -> gep(relocate(p), C, C2, ...) + LiveGcValues.insert(BasePtr); + LiveGcValues.insert(DerivedPtr); + } + Optional<OperandBundleUse> Bundle = + GCSP.getOperandBundle(LLVMContext::OB_gc_live); + unsigned NumOfGCLives = LiveGcValues.size(); + if (!Bundle.hasValue() || NumOfGCLives == Bundle->Inputs.size()) + break; + // We can reduce the size of gc live bundle. + DenseMap<Value *, unsigned> Val2Idx; + std::vector<Value *> NewLiveGc; + for (unsigned I = 0, E = Bundle->Inputs.size(); I < E; ++I) { + Value *V = Bundle->Inputs[I]; + if (Val2Idx.count(V)) + continue; + if (LiveGcValues.count(V)) { + Val2Idx[V] = NewLiveGc.size(); + NewLiveGc.push_back(V); + } else + Val2Idx[V] = NumOfGCLives; + } + // Update all gc.relocates + for (const GCRelocateInst *Reloc : GCSP.getGCRelocates()) { + GCRelocateInst &GCR = *const_cast<GCRelocateInst *>(Reloc); + Value *BasePtr = GCR.getBasePtr(); + assert(Val2Idx.count(BasePtr) && Val2Idx[BasePtr] != NumOfGCLives && + "Missed live gc for base pointer"); + auto *OpIntTy1 = GCR.getOperand(1)->getType(); + GCR.setOperand(1, ConstantInt::get(OpIntTy1, Val2Idx[BasePtr])); + Value *DerivedPtr = GCR.getDerivedPtr(); + assert(Val2Idx.count(DerivedPtr) && Val2Idx[DerivedPtr] != NumOfGCLives && + "Missed live gc for derived pointer"); + auto *OpIntTy2 = GCR.getOperand(2)->getType(); + GCR.setOperand(2, ConstantInt::get(OpIntTy2, Val2Idx[DerivedPtr])); + } + // Create new statepoint instruction. + OperandBundleDef NewBundle("gc-live", NewLiveGc); + if (isa<CallInst>(II)) + return CallInst::CreateWithReplacedBundle(cast<CallInst>(II), NewBundle); + else + return InvokeInst::CreateWithReplacedBundle(cast<InvokeInst>(II), + NewBundle); break; } case Intrinsic::experimental_guard: { @@ -1683,114 +1683,114 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) { } break; } - case Intrinsic::experimental_vector_insert: { - Value *Vec = II->getArgOperand(0); - Value *SubVec = II->getArgOperand(1); - Value *Idx = II->getArgOperand(2); - auto *DstTy = dyn_cast<FixedVectorType>(II->getType()); - auto *VecTy = dyn_cast<FixedVectorType>(Vec->getType()); - auto *SubVecTy = dyn_cast<FixedVectorType>(SubVec->getType()); - - // Only canonicalize if the destination vector, Vec, and SubVec are all - // fixed vectors. - if (DstTy && VecTy && SubVecTy) { - unsigned DstNumElts = DstTy->getNumElements(); - unsigned VecNumElts = VecTy->getNumElements(); - unsigned SubVecNumElts = SubVecTy->getNumElements(); - unsigned IdxN = cast<ConstantInt>(Idx)->getZExtValue(); - - // The result of this call is undefined if IdxN is not a constant multiple - // of the SubVec's minimum vector length OR the insertion overruns Vec. - if (IdxN % SubVecNumElts != 0 || IdxN + SubVecNumElts > VecNumElts) { - replaceInstUsesWith(CI, UndefValue::get(CI.getType())); - return eraseInstFromFunction(CI); - } - - // An insert that entirely overwrites Vec with SubVec is a nop. - if (VecNumElts == SubVecNumElts) { - replaceInstUsesWith(CI, SubVec); - return eraseInstFromFunction(CI); - } - - // Widen SubVec into a vector of the same width as Vec, since - // shufflevector requires the two input vectors to be the same width. - // Elements beyond the bounds of SubVec within the widened vector are - // undefined. - SmallVector<int, 8> WidenMask; - unsigned i; - for (i = 0; i != SubVecNumElts; ++i) - WidenMask.push_back(i); - for (; i != VecNumElts; ++i) - WidenMask.push_back(UndefMaskElem); - - Value *WidenShuffle = Builder.CreateShuffleVector(SubVec, WidenMask); - - SmallVector<int, 8> Mask; - for (unsigned i = 0; i != IdxN; ++i) - Mask.push_back(i); - for (unsigned i = DstNumElts; i != DstNumElts + SubVecNumElts; ++i) - Mask.push_back(i); - for (unsigned i = IdxN + SubVecNumElts; i != DstNumElts; ++i) - Mask.push_back(i); - - Value *Shuffle = Builder.CreateShuffleVector(Vec, WidenShuffle, Mask); - replaceInstUsesWith(CI, Shuffle); - return eraseInstFromFunction(CI); - } - break; - } - case Intrinsic::experimental_vector_extract: { - Value *Vec = II->getArgOperand(0); - Value *Idx = II->getArgOperand(1); - - auto *DstTy = dyn_cast<FixedVectorType>(II->getType()); - auto *VecTy = dyn_cast<FixedVectorType>(Vec->getType()); - - // Only canonicalize if the the destination vector and Vec are fixed - // vectors. - if (DstTy && VecTy) { - unsigned DstNumElts = DstTy->getNumElements(); - unsigned VecNumElts = VecTy->getNumElements(); - unsigned IdxN = cast<ConstantInt>(Idx)->getZExtValue(); - - // The result of this call is undefined if IdxN is not a constant multiple - // of the result type's minimum vector length OR the extraction overruns - // Vec. - if (IdxN % DstNumElts != 0 || IdxN + DstNumElts > VecNumElts) { - replaceInstUsesWith(CI, UndefValue::get(CI.getType())); - return eraseInstFromFunction(CI); - } - - // Extracting the entirety of Vec is a nop. - if (VecNumElts == DstNumElts) { - replaceInstUsesWith(CI, Vec); - return eraseInstFromFunction(CI); - } - - SmallVector<int, 8> Mask; - for (unsigned i = 0; i != DstNumElts; ++i) - Mask.push_back(IdxN + i); - - Value *Shuffle = - Builder.CreateShuffleVector(Vec, UndefValue::get(VecTy), Mask); - replaceInstUsesWith(CI, Shuffle); - return eraseInstFromFunction(CI); - } - break; - } - default: { - // Handle target specific intrinsics - Optional<Instruction *> V = targetInstCombineIntrinsic(*II); - if (V.hasValue()) - return V.getValue(); - break; - } - } + case Intrinsic::experimental_vector_insert: { + Value *Vec = II->getArgOperand(0); + Value *SubVec = II->getArgOperand(1); + Value *Idx = II->getArgOperand(2); + auto *DstTy = dyn_cast<FixedVectorType>(II->getType()); + auto *VecTy = dyn_cast<FixedVectorType>(Vec->getType()); + auto *SubVecTy = dyn_cast<FixedVectorType>(SubVec->getType()); + + // Only canonicalize if the destination vector, Vec, and SubVec are all + // fixed vectors. + if (DstTy && VecTy && SubVecTy) { + unsigned DstNumElts = DstTy->getNumElements(); + unsigned VecNumElts = VecTy->getNumElements(); + unsigned SubVecNumElts = SubVecTy->getNumElements(); + unsigned IdxN = cast<ConstantInt>(Idx)->getZExtValue(); + + // The result of this call is undefined if IdxN is not a constant multiple + // of the SubVec's minimum vector length OR the insertion overruns Vec. + if (IdxN % SubVecNumElts != 0 || IdxN + SubVecNumElts > VecNumElts) { + replaceInstUsesWith(CI, UndefValue::get(CI.getType())); + return eraseInstFromFunction(CI); + } + + // An insert that entirely overwrites Vec with SubVec is a nop. + if (VecNumElts == SubVecNumElts) { + replaceInstUsesWith(CI, SubVec); + return eraseInstFromFunction(CI); + } + + // Widen SubVec into a vector of the same width as Vec, since + // shufflevector requires the two input vectors to be the same width. + // Elements beyond the bounds of SubVec within the widened vector are + // undefined. + SmallVector<int, 8> WidenMask; + unsigned i; + for (i = 0; i != SubVecNumElts; ++i) + WidenMask.push_back(i); + for (; i != VecNumElts; ++i) + WidenMask.push_back(UndefMaskElem); + + Value *WidenShuffle = Builder.CreateShuffleVector(SubVec, WidenMask); + + SmallVector<int, 8> Mask; + for (unsigned i = 0; i != IdxN; ++i) + Mask.push_back(i); + for (unsigned i = DstNumElts; i != DstNumElts + SubVecNumElts; ++i) + Mask.push_back(i); + for (unsigned i = IdxN + SubVecNumElts; i != DstNumElts; ++i) + Mask.push_back(i); + + Value *Shuffle = Builder.CreateShuffleVector(Vec, WidenShuffle, Mask); + replaceInstUsesWith(CI, Shuffle); + return eraseInstFromFunction(CI); + } + break; + } + case Intrinsic::experimental_vector_extract: { + Value *Vec = II->getArgOperand(0); + Value *Idx = II->getArgOperand(1); + + auto *DstTy = dyn_cast<FixedVectorType>(II->getType()); + auto *VecTy = dyn_cast<FixedVectorType>(Vec->getType()); + + // Only canonicalize if the the destination vector and Vec are fixed + // vectors. + if (DstTy && VecTy) { + unsigned DstNumElts = DstTy->getNumElements(); + unsigned VecNumElts = VecTy->getNumElements(); + unsigned IdxN = cast<ConstantInt>(Idx)->getZExtValue(); + + // The result of this call is undefined if IdxN is not a constant multiple + // of the result type's minimum vector length OR the extraction overruns + // Vec. + if (IdxN % DstNumElts != 0 || IdxN + DstNumElts > VecNumElts) { + replaceInstUsesWith(CI, UndefValue::get(CI.getType())); + return eraseInstFromFunction(CI); + } + + // Extracting the entirety of Vec is a nop. + if (VecNumElts == DstNumElts) { + replaceInstUsesWith(CI, Vec); + return eraseInstFromFunction(CI); + } + + SmallVector<int, 8> Mask; + for (unsigned i = 0; i != DstNumElts; ++i) + Mask.push_back(IdxN + i); + + Value *Shuffle = + Builder.CreateShuffleVector(Vec, UndefValue::get(VecTy), Mask); + replaceInstUsesWith(CI, Shuffle); + return eraseInstFromFunction(CI); + } + break; + } + default: { + // Handle target specific intrinsics + Optional<Instruction *> V = targetInstCombineIntrinsic(*II); + if (V.hasValue()) + return V.getValue(); + break; + } + } return visitCallBase(*II); } // Fence instruction simplification -Instruction *InstCombinerImpl::visitFenceInst(FenceInst &FI) { +Instruction *InstCombinerImpl::visitFenceInst(FenceInst &FI) { // Remove identical consecutive fences. Instruction *Next = FI.getNextNonDebugInstruction(); if (auto *NFI = dyn_cast<FenceInst>(Next)) @@ -1800,12 +1800,12 @@ Instruction *InstCombinerImpl::visitFenceInst(FenceInst &FI) { } // InvokeInst simplification -Instruction *InstCombinerImpl::visitInvokeInst(InvokeInst &II) { +Instruction *InstCombinerImpl::visitInvokeInst(InvokeInst &II) { return visitCallBase(II); } // CallBrInst simplification -Instruction *InstCombinerImpl::visitCallBrInst(CallBrInst &CBI) { +Instruction *InstCombinerImpl::visitCallBrInst(CallBrInst &CBI) { return visitCallBase(CBI); } @@ -1845,7 +1845,7 @@ static bool isSafeToEliminateVarargsCast(const CallBase &Call, return true; } -Instruction *InstCombinerImpl::tryOptimizeCall(CallInst *CI) { +Instruction *InstCombinerImpl::tryOptimizeCall(CallInst *CI) { if (!CI->getCalledFunction()) return nullptr; auto InstCombineRAUW = [this](Instruction *From, Value *With) { @@ -2002,7 +2002,7 @@ static void annotateAnyAllocSite(CallBase &Call, const TargetLibraryInfo *TLI) { } /// Improvements for call, callbr and invoke instructions. -Instruction *InstCombinerImpl::visitCallBase(CallBase &Call) { +Instruction *InstCombinerImpl::visitCallBase(CallBase &Call) { if (isAllocationFn(&Call, &TLI)) annotateAnyAllocSite(Call, &TLI); @@ -2058,7 +2058,7 @@ Instruction *InstCombinerImpl::visitCallBase(CallBase &Call) { !CalleeF->isDeclaration()) { Instruction *OldCall = &Call; CreateNonTerminatorUnreachable(OldCall); - // If OldCall does not return void then replaceInstUsesWith undef. + // If OldCall does not return void then replaceInstUsesWith undef. // This allows ValueHandlers and custom metadata to adjust itself. if (!OldCall->getType()->isVoidTy()) replaceInstUsesWith(*OldCall, UndefValue::get(OldCall->getType())); @@ -2077,7 +2077,7 @@ Instruction *InstCombinerImpl::visitCallBase(CallBase &Call) { if ((isa<ConstantPointerNull>(Callee) && !NullPointerIsDefined(Call.getFunction())) || isa<UndefValue>(Callee)) { - // If Call does not return void then replaceInstUsesWith undef. + // If Call does not return void then replaceInstUsesWith undef. // This allows ValueHandlers and custom metadata to adjust itself. if (!Call.getType()->isVoidTy()) replaceInstUsesWith(Call, UndefValue::get(Call.getType())); @@ -2153,7 +2153,7 @@ Instruction *InstCombinerImpl::visitCallBase(CallBase &Call) { /// If the callee is a constexpr cast of a function, attempt to move the cast to /// the arguments of the call/callbr/invoke. -bool InstCombinerImpl::transformConstExprCastCall(CallBase &Call) { +bool InstCombinerImpl::transformConstExprCastCall(CallBase &Call) { auto *Callee = dyn_cast<Function>(Call.getCalledOperand()->stripPointerCasts()); if (!Callee) @@ -2252,9 +2252,9 @@ bool InstCombinerImpl::transformConstExprCastCall(CallBase &Call) { if (Call.isInAllocaArgument(i)) return false; // Cannot transform to and from inalloca. - if (CallerPAL.hasParamAttribute(i, Attribute::SwiftError)) - return false; - + if (CallerPAL.hasParamAttribute(i, Attribute::SwiftError)) + return false; + // If the parameter is passed as a byval argument, then we have to have a // sized type and the sized type has to have the same size as the old type. if (ParamTy != ActTy && CallerPAL.hasParamAttribute(i, Attribute::ByVal)) { @@ -2440,8 +2440,8 @@ bool InstCombinerImpl::transformConstExprCastCall(CallBase &Call) { /// Turn a call to a function created by init_trampoline / adjust_trampoline /// intrinsic pair into a direct call to the underlying function. Instruction * -InstCombinerImpl::transformCallThroughTrampoline(CallBase &Call, - IntrinsicInst &Tramp) { +InstCombinerImpl::transformCallThroughTrampoline(CallBase &Call, + IntrinsicInst &Tramp) { Value *Callee = Call.getCalledOperand(); Type *CalleeTy = Callee->getType(); FunctionType *FTy = Call.getFunctionType(); diff --git a/contrib/libs/llvm12/lib/Transforms/InstCombine/InstCombineCasts.cpp b/contrib/libs/llvm12/lib/Transforms/InstCombine/InstCombineCasts.cpp index 07e68c4441..2b490d5084 100644 --- a/contrib/libs/llvm12/lib/Transforms/InstCombine/InstCombineCasts.cpp +++ b/contrib/libs/llvm12/lib/Transforms/InstCombine/InstCombineCasts.cpp @@ -14,11 +14,11 @@ #include "llvm/ADT/SetVector.h" #include "llvm/Analysis/ConstantFolding.h" #include "llvm/Analysis/TargetLibraryInfo.h" -#include "llvm/IR/DIBuilder.h" +#include "llvm/IR/DIBuilder.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/PatternMatch.h" #include "llvm/Support/KnownBits.h" -#include "llvm/Transforms/InstCombine/InstCombiner.h" +#include "llvm/Transforms/InstCombine/InstCombiner.h" #include <numeric> using namespace llvm; using namespace PatternMatch; @@ -82,8 +82,8 @@ static Value *decomposeSimpleLinearExpr(Value *Val, unsigned &Scale, /// If we find a cast of an allocation instruction, try to eliminate the cast by /// moving the type information into the alloc. -Instruction *InstCombinerImpl::PromoteCastOfAllocation(BitCastInst &CI, - AllocaInst &AI) { +Instruction *InstCombinerImpl::PromoteCastOfAllocation(BitCastInst &CI, + AllocaInst &AI) { PointerType *PTy = cast<PointerType>(CI.getType()); IRBuilderBase::InsertPointGuard Guard(Builder); @@ -94,18 +94,18 @@ Instruction *InstCombinerImpl::PromoteCastOfAllocation(BitCastInst &CI, Type *CastElTy = PTy->getElementType(); if (!AllocElTy->isSized() || !CastElTy->isSized()) return nullptr; - // This optimisation does not work for cases where the cast type - // is scalable and the allocated type is not. This because we need to - // know how many times the casted type fits into the allocated type. - // For the opposite case where the allocated type is scalable and the - // cast type is not this leads to poor code quality due to the - // introduction of 'vscale' into the calculations. It seems better to - // bail out for this case too until we've done a proper cost-benefit - // analysis. - bool AllocIsScalable = isa<ScalableVectorType>(AllocElTy); - bool CastIsScalable = isa<ScalableVectorType>(CastElTy); - if (AllocIsScalable != CastIsScalable) return nullptr; - + // This optimisation does not work for cases where the cast type + // is scalable and the allocated type is not. This because we need to + // know how many times the casted type fits into the allocated type. + // For the opposite case where the allocated type is scalable and the + // cast type is not this leads to poor code quality due to the + // introduction of 'vscale' into the calculations. It seems better to + // bail out for this case too until we've done a proper cost-benefit + // analysis. + bool AllocIsScalable = isa<ScalableVectorType>(AllocElTy); + bool CastIsScalable = isa<ScalableVectorType>(CastElTy); + if (AllocIsScalable != CastIsScalable) return nullptr; + Align AllocElTyAlign = DL.getABITypeAlign(AllocElTy); Align CastElTyAlign = DL.getABITypeAlign(CastElTy); if (CastElTyAlign < AllocElTyAlign) return nullptr; @@ -115,15 +115,15 @@ Instruction *InstCombinerImpl::PromoteCastOfAllocation(BitCastInst &CI, // same, we open the door to infinite loops of various kinds. if (!AI.hasOneUse() && CastElTyAlign == AllocElTyAlign) return nullptr; - // The alloc and cast types should be either both fixed or both scalable. - uint64_t AllocElTySize = DL.getTypeAllocSize(AllocElTy).getKnownMinSize(); - uint64_t CastElTySize = DL.getTypeAllocSize(CastElTy).getKnownMinSize(); + // The alloc and cast types should be either both fixed or both scalable. + uint64_t AllocElTySize = DL.getTypeAllocSize(AllocElTy).getKnownMinSize(); + uint64_t CastElTySize = DL.getTypeAllocSize(CastElTy).getKnownMinSize(); if (CastElTySize == 0 || AllocElTySize == 0) return nullptr; // If the allocation has multiple uses, only promote it if we're not // shrinking the amount of memory being allocated. - uint64_t AllocElTyStoreSize = DL.getTypeStoreSize(AllocElTy).getKnownMinSize(); - uint64_t CastElTyStoreSize = DL.getTypeStoreSize(CastElTy).getKnownMinSize(); + uint64_t AllocElTyStoreSize = DL.getTypeStoreSize(AllocElTy).getKnownMinSize(); + uint64_t CastElTyStoreSize = DL.getTypeStoreSize(CastElTy).getKnownMinSize(); if (!AI.hasOneUse() && CastElTyStoreSize < AllocElTyStoreSize) return nullptr; // See if we can satisfy the modulus by pulling a scale out of the array @@ -138,9 +138,9 @@ Instruction *InstCombinerImpl::PromoteCastOfAllocation(BitCastInst &CI, if ((AllocElTySize*ArraySizeScale) % CastElTySize != 0 || (AllocElTySize*ArrayOffset ) % CastElTySize != 0) return nullptr; - // We don't currently support arrays of scalable types. - assert(!AllocIsScalable || (ArrayOffset == 1 && ArraySizeScale == 0)); - + // We don't currently support arrays of scalable types. + assert(!AllocIsScalable || (ArrayOffset == 1 && ArraySizeScale == 0)); + unsigned Scale = (AllocElTySize*ArraySizeScale)/CastElTySize; Value *Amt = nullptr; if (Scale == 1) { @@ -177,8 +177,8 @@ Instruction *InstCombinerImpl::PromoteCastOfAllocation(BitCastInst &CI, /// Given an expression that CanEvaluateTruncated or CanEvaluateSExtd returns /// true for, actually insert the code to evaluate the expression. -Value *InstCombinerImpl::EvaluateInDifferentType(Value *V, Type *Ty, - bool isSigned) { +Value *InstCombinerImpl::EvaluateInDifferentType(Value *V, Type *Ty, + bool isSigned) { if (Constant *C = dyn_cast<Constant>(V)) { C = ConstantExpr::getIntegerCast(C, Ty, isSigned /*Sext or ZExt*/); // If we got a constantexpr back, try to simplify it with DL info. @@ -246,9 +246,9 @@ Value *InstCombinerImpl::EvaluateInDifferentType(Value *V, Type *Ty, return InsertNewInstWith(Res, *I); } -Instruction::CastOps -InstCombinerImpl::isEliminableCastPair(const CastInst *CI1, - const CastInst *CI2) { +Instruction::CastOps +InstCombinerImpl::isEliminableCastPair(const CastInst *CI1, + const CastInst *CI2) { Type *SrcTy = CI1->getSrcTy(); Type *MidTy = CI1->getDestTy(); Type *DstTy = CI2->getDestTy(); @@ -275,7 +275,7 @@ InstCombinerImpl::isEliminableCastPair(const CastInst *CI1, } /// Implement the transforms common to all CastInst visitors. -Instruction *InstCombinerImpl::commonCastTransforms(CastInst &CI) { +Instruction *InstCombinerImpl::commonCastTransforms(CastInst &CI) { Value *Src = CI.getOperand(0); // Try to eliminate a cast of a cast. @@ -360,7 +360,7 @@ static bool canNotEvaluateInType(Value *V, Type *Ty) { /// /// This function works on both vectors and scalars. /// -static bool canEvaluateTruncated(Value *V, Type *Ty, InstCombinerImpl &IC, +static bool canEvaluateTruncated(Value *V, Type *Ty, InstCombinerImpl &IC, Instruction *CxtI) { if (canAlwaysEvaluateInType(V, Ty)) return true; @@ -477,8 +477,8 @@ static bool canEvaluateTruncated(Value *V, Type *Ty, InstCombinerImpl &IC, /// trunc (lshr (bitcast <4 x i32> %X to i128), 32) to i32 /// ---> /// extractelement <4 x i32> %X, 1 -static Instruction *foldVecTruncToExtElt(TruncInst &Trunc, - InstCombinerImpl &IC) { +static Instruction *foldVecTruncToExtElt(TruncInst &Trunc, + InstCombinerImpl &IC) { Value *TruncOp = Trunc.getOperand(0); Type *DestType = Trunc.getType(); if (!TruncOp->hasOneUse() || !isa<IntegerType>(DestType)) @@ -515,9 +515,9 @@ static Instruction *foldVecTruncToExtElt(TruncInst &Trunc, return ExtractElementInst::Create(VecInput, IC.Builder.getInt32(Elt)); } -/// Funnel/Rotate left/right may occur in a wider type than necessary because of -/// type promotion rules. Try to narrow the inputs and convert to funnel shift. -Instruction *InstCombinerImpl::narrowFunnelShift(TruncInst &Trunc) { +/// Funnel/Rotate left/right may occur in a wider type than necessary because of +/// type promotion rules. Try to narrow the inputs and convert to funnel shift. +Instruction *InstCombinerImpl::narrowFunnelShift(TruncInst &Trunc) { assert((isa<VectorType>(Trunc.getSrcTy()) || shouldChangeType(Trunc.getSrcTy(), Trunc.getType())) && "Don't narrow to an illegal scalar type"); @@ -529,43 +529,43 @@ Instruction *InstCombinerImpl::narrowFunnelShift(TruncInst &Trunc) { if (!isPowerOf2_32(NarrowWidth)) return nullptr; - // First, find an or'd pair of opposite shifts: - // trunc (or (lshr ShVal0, ShAmt0), (shl ShVal1, ShAmt1)) - BinaryOperator *Or0, *Or1; - if (!match(Trunc.getOperand(0), m_OneUse(m_Or(m_BinOp(Or0), m_BinOp(Or1))))) + // First, find an or'd pair of opposite shifts: + // trunc (or (lshr ShVal0, ShAmt0), (shl ShVal1, ShAmt1)) + BinaryOperator *Or0, *Or1; + if (!match(Trunc.getOperand(0), m_OneUse(m_Or(m_BinOp(Or0), m_BinOp(Or1))))) return nullptr; - Value *ShVal0, *ShVal1, *ShAmt0, *ShAmt1; - if (!match(Or0, m_OneUse(m_LogicalShift(m_Value(ShVal0), m_Value(ShAmt0)))) || - !match(Or1, m_OneUse(m_LogicalShift(m_Value(ShVal1), m_Value(ShAmt1)))) || - Or0->getOpcode() == Or1->getOpcode()) + Value *ShVal0, *ShVal1, *ShAmt0, *ShAmt1; + if (!match(Or0, m_OneUse(m_LogicalShift(m_Value(ShVal0), m_Value(ShAmt0)))) || + !match(Or1, m_OneUse(m_LogicalShift(m_Value(ShVal1), m_Value(ShAmt1)))) || + Or0->getOpcode() == Or1->getOpcode()) return nullptr; - // Canonicalize to or(shl(ShVal0, ShAmt0), lshr(ShVal1, ShAmt1)). - if (Or0->getOpcode() == BinaryOperator::LShr) { - std::swap(Or0, Or1); - std::swap(ShVal0, ShVal1); - std::swap(ShAmt0, ShAmt1); - } - assert(Or0->getOpcode() == BinaryOperator::Shl && - Or1->getOpcode() == BinaryOperator::LShr && - "Illegal or(shift,shift) pair"); - - // Match the shift amount operands for a funnel/rotate pattern. This always - // matches a subtraction on the R operand. - auto matchShiftAmount = [&](Value *L, Value *R, unsigned Width) -> Value * { + // Canonicalize to or(shl(ShVal0, ShAmt0), lshr(ShVal1, ShAmt1)). + if (Or0->getOpcode() == BinaryOperator::LShr) { + std::swap(Or0, Or1); + std::swap(ShVal0, ShVal1); + std::swap(ShAmt0, ShAmt1); + } + assert(Or0->getOpcode() == BinaryOperator::Shl && + Or1->getOpcode() == BinaryOperator::LShr && + "Illegal or(shift,shift) pair"); + + // Match the shift amount operands for a funnel/rotate pattern. This always + // matches a subtraction on the R operand. + auto matchShiftAmount = [&](Value *L, Value *R, unsigned Width) -> Value * { // The shift amounts may add up to the narrow bit width: - // (shl ShVal0, L) | (lshr ShVal1, Width - L) + // (shl ShVal0, L) | (lshr ShVal1, Width - L) if (match(R, m_OneUse(m_Sub(m_SpecificInt(Width), m_Specific(L))))) return L; - // The following patterns currently only work for rotation patterns. - // TODO: Add more general funnel-shift compatible patterns. - if (ShVal0 != ShVal1) - return nullptr; - + // The following patterns currently only work for rotation patterns. + // TODO: Add more general funnel-shift compatible patterns. + if (ShVal0 != ShVal1) + return nullptr; + // The shift amount may be masked with negation: - // (shl ShVal0, (X & (Width - 1))) | (lshr ShVal1, ((-X) & (Width - 1))) + // (shl ShVal0, (X & (Width - 1))) | (lshr ShVal1, ((-X) & (Width - 1))) Value *X; unsigned Mask = Width - 1; if (match(L, m_And(m_Value(X), m_SpecificInt(Mask))) && @@ -581,10 +581,10 @@ Instruction *InstCombinerImpl::narrowFunnelShift(TruncInst &Trunc) { }; Value *ShAmt = matchShiftAmount(ShAmt0, ShAmt1, NarrowWidth); - bool IsFshl = true; // Sub on LSHR. + bool IsFshl = true; // Sub on LSHR. if (!ShAmt) { ShAmt = matchShiftAmount(ShAmt1, ShAmt0, NarrowWidth); - IsFshl = false; // Sub on SHL. + IsFshl = false; // Sub on SHL. } if (!ShAmt) return nullptr; @@ -593,28 +593,28 @@ Instruction *InstCombinerImpl::narrowFunnelShift(TruncInst &Trunc) { // will be a zext, but it could also be the result of an 'and' or 'shift'. unsigned WideWidth = Trunc.getSrcTy()->getScalarSizeInBits(); APInt HiBitMask = APInt::getHighBitsSet(WideWidth, WideWidth - NarrowWidth); - if (!MaskedValueIsZero(ShVal0, HiBitMask, 0, &Trunc) || - !MaskedValueIsZero(ShVal1, HiBitMask, 0, &Trunc)) + if (!MaskedValueIsZero(ShVal0, HiBitMask, 0, &Trunc) || + !MaskedValueIsZero(ShVal1, HiBitMask, 0, &Trunc)) return nullptr; // We have an unnecessarily wide rotate! - // trunc (or (lshr ShVal0, ShAmt), (shl ShVal1, BitWidth - ShAmt)) + // trunc (or (lshr ShVal0, ShAmt), (shl ShVal1, BitWidth - ShAmt)) // Narrow the inputs and convert to funnel shift intrinsic: // llvm.fshl.i8(trunc(ShVal), trunc(ShVal), trunc(ShAmt)) Value *NarrowShAmt = Builder.CreateTrunc(ShAmt, DestTy); - Value *X, *Y; - X = Y = Builder.CreateTrunc(ShVal0, DestTy); - if (ShVal0 != ShVal1) - Y = Builder.CreateTrunc(ShVal1, DestTy); + Value *X, *Y; + X = Y = Builder.CreateTrunc(ShVal0, DestTy); + if (ShVal0 != ShVal1) + Y = Builder.CreateTrunc(ShVal1, DestTy); Intrinsic::ID IID = IsFshl ? Intrinsic::fshl : Intrinsic::fshr; Function *F = Intrinsic::getDeclaration(Trunc.getModule(), IID, DestTy); - return IntrinsicInst::Create(F, {X, Y, NarrowShAmt}); + return IntrinsicInst::Create(F, {X, Y, NarrowShAmt}); } /// Try to narrow the width of math or bitwise logic instructions by pulling a /// truncate ahead of binary operators. /// TODO: Transforms for truncated shifts should be moved into here. -Instruction *InstCombinerImpl::narrowBinOp(TruncInst &Trunc) { +Instruction *InstCombinerImpl::narrowBinOp(TruncInst &Trunc) { Type *SrcTy = Trunc.getSrcTy(); Type *DestTy = Trunc.getType(); if (!isa<VectorType>(SrcTy) && !shouldChangeType(SrcTy, DestTy)) @@ -663,7 +663,7 @@ Instruction *InstCombinerImpl::narrowBinOp(TruncInst &Trunc) { default: break; } - if (Instruction *NarrowOr = narrowFunnelShift(Trunc)) + if (Instruction *NarrowOr = narrowFunnelShift(Trunc)) return NarrowOr; return nullptr; @@ -719,7 +719,7 @@ static Instruction *shrinkInsertElt(CastInst &Trunc, return nullptr; } -Instruction *InstCombinerImpl::visitTrunc(TruncInst &Trunc) { +Instruction *InstCombinerImpl::visitTrunc(TruncInst &Trunc) { if (Instruction *Result = commonCastTransforms(Trunc)) return Result; @@ -813,60 +813,60 @@ Instruction *InstCombinerImpl::visitTrunc(TruncInst &Trunc) { } } - Value *A; - Constant *C; - if (match(Src, m_LShr(m_SExt(m_Value(A)), m_Constant(C)))) { + Value *A; + Constant *C; + if (match(Src, m_LShr(m_SExt(m_Value(A)), m_Constant(C)))) { unsigned AWidth = A->getType()->getScalarSizeInBits(); unsigned MaxShiftAmt = SrcWidth - std::max(DestWidth, AWidth); - auto *OldSh = cast<Instruction>(Src); - bool IsExact = OldSh->isExact(); + auto *OldSh = cast<Instruction>(Src); + bool IsExact = OldSh->isExact(); // If the shift is small enough, all zero bits created by the shift are // removed by the trunc. - if (match(C, m_SpecificInt_ICMP(ICmpInst::ICMP_ULE, - APInt(SrcWidth, MaxShiftAmt)))) { + if (match(C, m_SpecificInt_ICMP(ICmpInst::ICMP_ULE, + APInt(SrcWidth, MaxShiftAmt)))) { // trunc (lshr (sext A), C) --> ashr A, C if (A->getType() == DestTy) { - Constant *MaxAmt = ConstantInt::get(SrcTy, DestWidth - 1, false); - Constant *ShAmt = ConstantExpr::getUMin(C, MaxAmt); - ShAmt = ConstantExpr::getTrunc(ShAmt, A->getType()); - ShAmt = Constant::mergeUndefsWith(ShAmt, C); - return IsExact ? BinaryOperator::CreateExactAShr(A, ShAmt) - : BinaryOperator::CreateAShr(A, ShAmt); + Constant *MaxAmt = ConstantInt::get(SrcTy, DestWidth - 1, false); + Constant *ShAmt = ConstantExpr::getUMin(C, MaxAmt); + ShAmt = ConstantExpr::getTrunc(ShAmt, A->getType()); + ShAmt = Constant::mergeUndefsWith(ShAmt, C); + return IsExact ? BinaryOperator::CreateExactAShr(A, ShAmt) + : BinaryOperator::CreateAShr(A, ShAmt); } // The types are mismatched, so create a cast after shifting: // trunc (lshr (sext A), C) --> sext/trunc (ashr A, C) if (Src->hasOneUse()) { - Constant *MaxAmt = ConstantInt::get(SrcTy, AWidth - 1, false); - Constant *ShAmt = ConstantExpr::getUMin(C, MaxAmt); - ShAmt = ConstantExpr::getTrunc(ShAmt, A->getType()); - Value *Shift = Builder.CreateAShr(A, ShAmt, "", IsExact); + Constant *MaxAmt = ConstantInt::get(SrcTy, AWidth - 1, false); + Constant *ShAmt = ConstantExpr::getUMin(C, MaxAmt); + ShAmt = ConstantExpr::getTrunc(ShAmt, A->getType()); + Value *Shift = Builder.CreateAShr(A, ShAmt, "", IsExact); return CastInst::CreateIntegerCast(Shift, DestTy, true); } } // TODO: Mask high bits with 'and'. } - // trunc (*shr (trunc A), C) --> trunc(*shr A, C) - if (match(Src, m_OneUse(m_Shr(m_Trunc(m_Value(A)), m_Constant(C))))) { - unsigned MaxShiftAmt = SrcWidth - DestWidth; - - // If the shift is small enough, all zero/sign bits created by the shift are - // removed by the trunc. - if (match(C, m_SpecificInt_ICMP(ICmpInst::ICMP_ULE, - APInt(SrcWidth, MaxShiftAmt)))) { - auto *OldShift = cast<Instruction>(Src); - bool IsExact = OldShift->isExact(); - auto *ShAmt = ConstantExpr::getIntegerCast(C, A->getType(), true); - ShAmt = Constant::mergeUndefsWith(ShAmt, C); - Value *Shift = - OldShift->getOpcode() == Instruction::AShr - ? Builder.CreateAShr(A, ShAmt, OldShift->getName(), IsExact) - : Builder.CreateLShr(A, ShAmt, OldShift->getName(), IsExact); - return CastInst::CreateTruncOrBitCast(Shift, DestTy); - } - } - + // trunc (*shr (trunc A), C) --> trunc(*shr A, C) + if (match(Src, m_OneUse(m_Shr(m_Trunc(m_Value(A)), m_Constant(C))))) { + unsigned MaxShiftAmt = SrcWidth - DestWidth; + + // If the shift is small enough, all zero/sign bits created by the shift are + // removed by the trunc. + if (match(C, m_SpecificInt_ICMP(ICmpInst::ICMP_ULE, + APInt(SrcWidth, MaxShiftAmt)))) { + auto *OldShift = cast<Instruction>(Src); + bool IsExact = OldShift->isExact(); + auto *ShAmt = ConstantExpr::getIntegerCast(C, A->getType(), true); + ShAmt = Constant::mergeUndefsWith(ShAmt, C); + Value *Shift = + OldShift->getOpcode() == Instruction::AShr + ? Builder.CreateAShr(A, ShAmt, OldShift->getName(), IsExact) + : Builder.CreateLShr(A, ShAmt, OldShift->getName(), IsExact); + return CastInst::CreateTruncOrBitCast(Shift, DestTy); + } + } + if (Instruction *I = narrowBinOp(Trunc)) return I; @@ -876,19 +876,19 @@ Instruction *InstCombinerImpl::visitTrunc(TruncInst &Trunc) { if (Instruction *I = shrinkInsertElt(Trunc, Builder)) return I; - if (Src->hasOneUse() && - (isa<VectorType>(SrcTy) || shouldChangeType(SrcTy, DestTy))) { + if (Src->hasOneUse() && + (isa<VectorType>(SrcTy) || shouldChangeType(SrcTy, DestTy))) { // Transform "trunc (shl X, cst)" -> "shl (trunc X), cst" so long as the // dest type is native and cst < dest size. - if (match(Src, m_Shl(m_Value(A), m_Constant(C))) && + if (match(Src, m_Shl(m_Value(A), m_Constant(C))) && !match(A, m_Shr(m_Value(), m_Constant()))) { // Skip shifts of shift by constants. It undoes a combine in // FoldShiftByConstant and is the extend in reg pattern. - APInt Threshold = APInt(C->getType()->getScalarSizeInBits(), DestWidth); - if (match(C, m_SpecificInt_ICMP(ICmpInst::ICMP_ULT, Threshold))) { + APInt Threshold = APInt(C->getType()->getScalarSizeInBits(), DestWidth); + if (match(C, m_SpecificInt_ICMP(ICmpInst::ICMP_ULT, Threshold))) { Value *NewTrunc = Builder.CreateTrunc(A, DestTy, A->getName() + ".tr"); - return BinaryOperator::Create(Instruction::Shl, NewTrunc, - ConstantExpr::getTrunc(C, DestTy)); + return BinaryOperator::Create(Instruction::Shl, NewTrunc, + ConstantExpr::getTrunc(C, DestTy)); } } } @@ -905,23 +905,23 @@ Instruction *InstCombinerImpl::visitTrunc(TruncInst &Trunc) { // ---> // extractelement <8 x i32> (bitcast <4 x i64> %X to <8 x i32>), i32 0 Value *VecOp; - ConstantInt *Cst; + ConstantInt *Cst; if (match(Src, m_OneUse(m_ExtractElt(m_Value(VecOp), m_ConstantInt(Cst))))) { auto *VecOpTy = cast<VectorType>(VecOp->getType()); - auto VecElts = VecOpTy->getElementCount(); + auto VecElts = VecOpTy->getElementCount(); // A badly fit destination size would result in an invalid cast. if (SrcWidth % DestWidth == 0) { uint64_t TruncRatio = SrcWidth / DestWidth; - uint64_t BitCastNumElts = VecElts.getKnownMinValue() * TruncRatio; + uint64_t BitCastNumElts = VecElts.getKnownMinValue() * TruncRatio; uint64_t VecOpIdx = Cst->getZExtValue(); uint64_t NewIdx = DL.isBigEndian() ? (VecOpIdx + 1) * TruncRatio - 1 : VecOpIdx * TruncRatio; assert(BitCastNumElts <= std::numeric_limits<uint32_t>::max() && "overflow 32-bits"); - auto *BitCastTo = - VectorType::get(DestTy, BitCastNumElts, VecElts.isScalable()); + auto *BitCastTo = + VectorType::get(DestTy, BitCastNumElts, VecElts.isScalable()); Value *BitCast = Builder.CreateBitCast(VecOp, BitCastTo); return ExtractElementInst::Create(BitCast, Builder.getInt32(NewIdx)); } @@ -930,8 +930,8 @@ Instruction *InstCombinerImpl::visitTrunc(TruncInst &Trunc) { return nullptr; } -Instruction *InstCombinerImpl::transformZExtICmp(ICmpInst *Cmp, ZExtInst &Zext, - bool DoTransform) { +Instruction *InstCombinerImpl::transformZExtICmp(ICmpInst *Cmp, ZExtInst &Zext, + bool DoTransform) { // If we are just checking for a icmp eq of a single bit and zext'ing it // to an integer, then shift the bit to the appropriate place and then // cast to integer to avoid the comparison. @@ -1067,7 +1067,7 @@ Instruction *InstCombinerImpl::transformZExtICmp(ICmpInst *Cmp, ZExtInst &Zext, /// /// This function works on both vectors and scalars. static bool canEvaluateZExtd(Value *V, Type *Ty, unsigned &BitsToClear, - InstCombinerImpl &IC, Instruction *CxtI) { + InstCombinerImpl &IC, Instruction *CxtI) { BitsToClear = 0; if (canAlwaysEvaluateInType(V, Ty)) return true; @@ -1172,7 +1172,7 @@ static bool canEvaluateZExtd(Value *V, Type *Ty, unsigned &BitsToClear, } } -Instruction *InstCombinerImpl::visitZExt(ZExtInst &CI) { +Instruction *InstCombinerImpl::visitZExt(ZExtInst &CI) { // If this zero extend is only used by a truncate, let the truncate be // eliminated before we try to optimize this zext. if (CI.hasOneUse() && isa<TruncInst>(CI.user_back())) @@ -1270,7 +1270,7 @@ Instruction *InstCombinerImpl::visitZExt(ZExtInst &CI) { ICmpInst *LHS = dyn_cast<ICmpInst>(SrcI->getOperand(0)); ICmpInst *RHS = dyn_cast<ICmpInst>(SrcI->getOperand(1)); if (LHS && RHS && LHS->hasOneUse() && RHS->hasOneUse() && - LHS->getOperand(0)->getType() == RHS->getOperand(0)->getType() && + LHS->getOperand(0)->getType() == RHS->getOperand(0)->getType() && (transformZExtICmp(LHS, CI, false) || transformZExtICmp(RHS, CI, false))) { // zext (or icmp, icmp) -> or (zext icmp), (zext icmp) @@ -1311,8 +1311,8 @@ Instruction *InstCombinerImpl::visitZExt(ZExtInst &CI) { } /// Transform (sext icmp) to bitwise / integer operations to eliminate the icmp. -Instruction *InstCombinerImpl::transformSExtICmp(ICmpInst *ICI, - Instruction &CI) { +Instruction *InstCombinerImpl::transformSExtICmp(ICmpInst *ICI, + Instruction &CI) { Value *Op0 = ICI->getOperand(0), *Op1 = ICI->getOperand(1); ICmpInst::Predicate Pred = ICI->getPredicate(); @@ -1448,7 +1448,7 @@ static bool canEvaluateSExtd(Value *V, Type *Ty) { return false; } -Instruction *InstCombinerImpl::visitSExt(SExtInst &CI) { +Instruction *InstCombinerImpl::visitSExt(SExtInst &CI) { // If this sign extend is only used by a truncate, let the truncate be // eliminated before we try to optimize this sext. if (CI.hasOneUse() && isa<TruncInst>(CI.user_back())) @@ -1511,28 +1511,28 @@ Instruction *InstCombinerImpl::visitSExt(SExtInst &CI) { // for a truncate. If the source and dest are the same type, eliminate the // trunc and extend and just do shifts. For example, turn: // %a = trunc i32 %i to i8 - // %b = shl i8 %a, C - // %c = ashr i8 %b, C + // %b = shl i8 %a, C + // %c = ashr i8 %b, C // %d = sext i8 %c to i32 // into: - // %a = shl i32 %i, 32-(8-C) - // %d = ashr i32 %a, 32-(8-C) + // %a = shl i32 %i, 32-(8-C) + // %d = ashr i32 %a, 32-(8-C) Value *A = nullptr; // TODO: Eventually this could be subsumed by EvaluateInDifferentType. Constant *BA = nullptr, *CA = nullptr; if (match(Src, m_AShr(m_Shl(m_Trunc(m_Value(A)), m_Constant(BA)), m_Constant(CA))) && - BA->isElementWiseEqual(CA) && A->getType() == DestTy) { - Constant *WideCurrShAmt = ConstantExpr::getSExt(CA, DestTy); - Constant *NumLowbitsLeft = ConstantExpr::getSub( - ConstantInt::get(DestTy, SrcTy->getScalarSizeInBits()), WideCurrShAmt); - Constant *NewShAmt = ConstantExpr::getSub( - ConstantInt::get(DestTy, DestTy->getScalarSizeInBits()), - NumLowbitsLeft); - NewShAmt = - Constant::mergeUndefsWith(Constant::mergeUndefsWith(NewShAmt, BA), CA); - A = Builder.CreateShl(A, NewShAmt, CI.getName()); - return BinaryOperator::CreateAShr(A, NewShAmt); + BA->isElementWiseEqual(CA) && A->getType() == DestTy) { + Constant *WideCurrShAmt = ConstantExpr::getSExt(CA, DestTy); + Constant *NumLowbitsLeft = ConstantExpr::getSub( + ConstantInt::get(DestTy, SrcTy->getScalarSizeInBits()), WideCurrShAmt); + Constant *NewShAmt = ConstantExpr::getSub( + ConstantInt::get(DestTy, DestTy->getScalarSizeInBits()), + NumLowbitsLeft); + NewShAmt = + Constant::mergeUndefsWith(Constant::mergeUndefsWith(NewShAmt, BA), CA); + A = Builder.CreateShl(A, NewShAmt, CI.getName()); + return BinaryOperator::CreateAShr(A, NewShAmt); } return nullptr; @@ -1575,7 +1575,7 @@ static Type *shrinkFPConstantVector(Value *V) { Type *MinType = nullptr; - unsigned NumElts = cast<FixedVectorType>(CVVTy)->getNumElements(); + unsigned NumElts = cast<FixedVectorType>(CVVTy)->getNumElements(); for (unsigned i = 0; i != NumElts; ++i) { auto *CFP = dyn_cast_or_null<ConstantFP>(CV->getAggregateElement(i)); if (!CFP) @@ -1656,7 +1656,7 @@ static bool isKnownExactCastIntToFP(CastInst &I) { return false; } -Instruction *InstCombinerImpl::visitFPTrunc(FPTruncInst &FPT) { +Instruction *InstCombinerImpl::visitFPTrunc(FPTruncInst &FPT) { if (Instruction *I = commonCastTransforms(FPT)) return I; @@ -1840,7 +1840,7 @@ Instruction *InstCombinerImpl::visitFPTrunc(FPTruncInst &FPT) { return nullptr; } -Instruction *InstCombinerImpl::visitFPExt(CastInst &FPExt) { +Instruction *InstCombinerImpl::visitFPExt(CastInst &FPExt) { // If the source operand is a cast from integer to FP and known exact, then // cast the integer operand directly to the destination type. Type *Ty = FPExt.getType(); @@ -1858,7 +1858,7 @@ Instruction *InstCombinerImpl::visitFPExt(CastInst &FPExt) { /// This is safe if the intermediate type has enough bits in its mantissa to /// accurately represent all values of X. For example, this won't work with /// i64 -> float -> i64. -Instruction *InstCombinerImpl::foldItoFPtoI(CastInst &FI) { +Instruction *InstCombinerImpl::foldItoFPtoI(CastInst &FI) { if (!isa<UIToFPInst>(FI.getOperand(0)) && !isa<SIToFPInst>(FI.getOperand(0))) return nullptr; @@ -1898,29 +1898,29 @@ Instruction *InstCombinerImpl::foldItoFPtoI(CastInst &FI) { return replaceInstUsesWith(FI, X); } -Instruction *InstCombinerImpl::visitFPToUI(FPToUIInst &FI) { +Instruction *InstCombinerImpl::visitFPToUI(FPToUIInst &FI) { if (Instruction *I = foldItoFPtoI(FI)) return I; return commonCastTransforms(FI); } -Instruction *InstCombinerImpl::visitFPToSI(FPToSIInst &FI) { +Instruction *InstCombinerImpl::visitFPToSI(FPToSIInst &FI) { if (Instruction *I = foldItoFPtoI(FI)) return I; return commonCastTransforms(FI); } -Instruction *InstCombinerImpl::visitUIToFP(CastInst &CI) { +Instruction *InstCombinerImpl::visitUIToFP(CastInst &CI) { return commonCastTransforms(CI); } -Instruction *InstCombinerImpl::visitSIToFP(CastInst &CI) { +Instruction *InstCombinerImpl::visitSIToFP(CastInst &CI) { return commonCastTransforms(CI); } -Instruction *InstCombinerImpl::visitIntToPtr(IntToPtrInst &CI) { +Instruction *InstCombinerImpl::visitIntToPtr(IntToPtrInst &CI) { // If the source integer type is not the intptr_t type for this target, do a // trunc or zext to the intptr_t type, then inttoptr of it. This allows the // cast to be exposed to other transforms. @@ -1943,7 +1943,7 @@ Instruction *InstCombinerImpl::visitIntToPtr(IntToPtrInst &CI) { } /// Implement the transforms for cast of pointer (bitcast/ptrtoint) -Instruction *InstCombinerImpl::commonPointerCastTransforms(CastInst &CI) { +Instruction *InstCombinerImpl::commonPointerCastTransforms(CastInst &CI) { Value *Src = CI.getOperand(0); if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Src)) { @@ -1965,37 +1965,37 @@ Instruction *InstCombinerImpl::commonPointerCastTransforms(CastInst &CI) { return commonCastTransforms(CI); } -Instruction *InstCombinerImpl::visitPtrToInt(PtrToIntInst &CI) { +Instruction *InstCombinerImpl::visitPtrToInt(PtrToIntInst &CI) { // If the destination integer type is not the intptr_t type for this target, // do a ptrtoint to intptr_t then do a trunc or zext. This allows the cast // to be exposed to other transforms. - Value *SrcOp = CI.getPointerOperand(); + Value *SrcOp = CI.getPointerOperand(); Type *Ty = CI.getType(); unsigned AS = CI.getPointerAddressSpace(); - unsigned TySize = Ty->getScalarSizeInBits(); - unsigned PtrSize = DL.getPointerSizeInBits(AS); - if (TySize != PtrSize) { - Type *IntPtrTy = DL.getIntPtrType(CI.getContext(), AS); - // Handle vectors of pointers. - if (auto *VecTy = dyn_cast<VectorType>(Ty)) - IntPtrTy = VectorType::get(IntPtrTy, VecTy->getElementCount()); - - Value *P = Builder.CreatePtrToInt(SrcOp, IntPtrTy); - return CastInst::CreateIntegerCast(P, Ty, /*isSigned=*/false); - } - - Value *Vec, *Scalar, *Index; - if (match(SrcOp, m_OneUse(m_InsertElt(m_IntToPtr(m_Value(Vec)), - m_Value(Scalar), m_Value(Index)))) && - Vec->getType() == Ty) { - assert(Vec->getType()->getScalarSizeInBits() == PtrSize && "Wrong type"); - // Convert the scalar to int followed by insert to eliminate one cast: - // p2i (ins (i2p Vec), Scalar, Index --> ins Vec, (p2i Scalar), Index - Value *NewCast = Builder.CreatePtrToInt(Scalar, Ty->getScalarType()); - return InsertElementInst::Create(Vec, NewCast, Index); - } - - return commonPointerCastTransforms(CI); + unsigned TySize = Ty->getScalarSizeInBits(); + unsigned PtrSize = DL.getPointerSizeInBits(AS); + if (TySize != PtrSize) { + Type *IntPtrTy = DL.getIntPtrType(CI.getContext(), AS); + // Handle vectors of pointers. + if (auto *VecTy = dyn_cast<VectorType>(Ty)) + IntPtrTy = VectorType::get(IntPtrTy, VecTy->getElementCount()); + + Value *P = Builder.CreatePtrToInt(SrcOp, IntPtrTy); + return CastInst::CreateIntegerCast(P, Ty, /*isSigned=*/false); + } + + Value *Vec, *Scalar, *Index; + if (match(SrcOp, m_OneUse(m_InsertElt(m_IntToPtr(m_Value(Vec)), + m_Value(Scalar), m_Value(Index)))) && + Vec->getType() == Ty) { + assert(Vec->getType()->getScalarSizeInBits() == PtrSize && "Wrong type"); + // Convert the scalar to int followed by insert to eliminate one cast: + // p2i (ins (i2p Vec), Scalar, Index --> ins Vec, (p2i Scalar), Index + Value *NewCast = Builder.CreatePtrToInt(Scalar, Ty->getScalarType()); + return InsertElementInst::Create(Vec, NewCast, Index); + } + + return commonPointerCastTransforms(CI); } /// This input value (which is known to have vector type) is being zero extended @@ -2014,9 +2014,9 @@ Instruction *InstCombinerImpl::visitPtrToInt(PtrToIntInst &CI) { /// Try to replace it with a shuffle (and vector/vector bitcast) if possible. /// /// The source and destination vector types may have different element types. -static Instruction * -optimizeVectorResizeWithIntegerBitCasts(Value *InVal, VectorType *DestTy, - InstCombinerImpl &IC) { +static Instruction * +optimizeVectorResizeWithIntegerBitCasts(Value *InVal, VectorType *DestTy, + InstCombinerImpl &IC) { // We can only do this optimization if the output is a multiple of the input // element size, or the input is a multiple of the output element size. // Convert the input type to have the same element type as the output. @@ -2032,14 +2032,14 @@ optimizeVectorResizeWithIntegerBitCasts(Value *InVal, VectorType *DestTy, return nullptr; SrcTy = - FixedVectorType::get(DestTy->getElementType(), - cast<FixedVectorType>(SrcTy)->getNumElements()); + FixedVectorType::get(DestTy->getElementType(), + cast<FixedVectorType>(SrcTy)->getNumElements()); InVal = IC.Builder.CreateBitCast(InVal, SrcTy); } bool IsBigEndian = IC.getDataLayout().isBigEndian(); - unsigned SrcElts = cast<FixedVectorType>(SrcTy)->getNumElements(); - unsigned DestElts = cast<FixedVectorType>(DestTy)->getNumElements(); + unsigned SrcElts = cast<FixedVectorType>(SrcTy)->getNumElements(); + unsigned DestElts = cast<FixedVectorType>(DestTy)->getNumElements(); assert(SrcElts != DestElts && "Element counts should be different."); @@ -2217,8 +2217,8 @@ static bool collectInsertionElements(Value *V, unsigned Shift, /// /// Into two insertelements that do "buildvector{%inc, %inc5}". static Value *optimizeIntegerToVectorInsertions(BitCastInst &CI, - InstCombinerImpl &IC) { - auto *DestVecTy = cast<FixedVectorType>(CI.getType()); + InstCombinerImpl &IC) { + auto *DestVecTy = cast<FixedVectorType>(CI.getType()); Value *IntInput = CI.getOperand(0); SmallVector<Value*, 8> Elements(DestVecTy->getNumElements()); @@ -2246,7 +2246,7 @@ static Value *optimizeIntegerToVectorInsertions(BitCastInst &CI, /// vectors better than bitcasts of scalars because vector registers are /// usually not type-specific like scalar integer or scalar floating-point. static Instruction *canonicalizeBitCastExtElt(BitCastInst &BitCast, - InstCombinerImpl &IC) { + InstCombinerImpl &IC) { // TODO: Create and use a pattern matcher for ExtractElementInst. auto *ExtElt = dyn_cast<ExtractElementInst>(BitCast.getOperand(0)); if (!ExtElt || !ExtElt->hasOneUse()) @@ -2258,7 +2258,7 @@ static Instruction *canonicalizeBitCastExtElt(BitCastInst &BitCast, if (!VectorType::isValidElementType(DestType)) return nullptr; - auto *NewVecType = VectorType::get(DestType, ExtElt->getVectorOperandType()); + auto *NewVecType = VectorType::get(DestType, ExtElt->getVectorOperandType()); auto *NewBC = IC.Builder.CreateBitCast(ExtElt->getVectorOperand(), NewVecType, "bc"); return ExtractElementInst::Create(NewBC, ExtElt->getIndexOperand()); @@ -2321,10 +2321,10 @@ static Instruction *foldBitCastSelect(BitCastInst &BitCast, // A vector select must maintain the same number of elements in its operands. Type *CondTy = Cond->getType(); Type *DestTy = BitCast.getType(); - if (auto *CondVTy = dyn_cast<VectorType>(CondTy)) - if (!DestTy->isVectorTy() || - CondVTy->getElementCount() != - cast<VectorType>(DestTy)->getElementCount()) + if (auto *CondVTy = dyn_cast<VectorType>(CondTy)) + if (!DestTy->isVectorTy() || + CondVTy->getElementCount() != + cast<VectorType>(DestTy)->getElementCount()) return nullptr; // FIXME: This transform is restricted from changing the select between @@ -2370,8 +2370,8 @@ static bool hasStoreUsersOnly(CastInst &CI) { /// /// All the related PHI nodes can be replaced by new PHI nodes with type A. /// The uses of \p CI can be changed to the new PHI node corresponding to \p PN. -Instruction *InstCombinerImpl::optimizeBitCastFromPhi(CastInst &CI, - PHINode *PN) { +Instruction *InstCombinerImpl::optimizeBitCastFromPhi(CastInst &CI, + PHINode *PN) { // BitCast used by Store can be handled in InstCombineLoadStoreAlloca.cpp. if (hasStoreUsersOnly(CI)) return nullptr; @@ -2501,7 +2501,7 @@ Instruction *InstCombinerImpl::optimizeBitCastFromPhi(CastInst &CI, Instruction *RetVal = nullptr; for (auto *OldPN : OldPhiNodes) { PHINode *NewPN = NewPNodes[OldPN]; - for (User *V : make_early_inc_range(OldPN->users())) { + for (User *V : make_early_inc_range(OldPN->users())) { if (auto *SI = dyn_cast<StoreInst>(V)) { assert(SI->isSimple() && SI->getOperand(0) == OldPN); Builder.SetInsertPoint(SI); @@ -2521,7 +2521,7 @@ Instruction *InstCombinerImpl::optimizeBitCastFromPhi(CastInst &CI, if (BCI == &CI) RetVal = I; } else if (auto *PHI = dyn_cast<PHINode>(V)) { - assert(OldPhiNodes.contains(PHI)); + assert(OldPhiNodes.contains(PHI)); (void) PHI; } else { llvm_unreachable("all uses should be handled"); @@ -2532,7 +2532,7 @@ Instruction *InstCombinerImpl::optimizeBitCastFromPhi(CastInst &CI, return RetVal; } -Instruction *InstCombinerImpl::visitBitCast(BitCastInst &CI) { +Instruction *InstCombinerImpl::visitBitCast(BitCastInst &CI) { // If the operands are integer typed then apply the integer transforms, // otherwise just apply the common ones. Value *Src = CI.getOperand(0); @@ -2656,11 +2656,11 @@ Instruction *InstCombinerImpl::visitBitCast(BitCastInst &CI) { // a bitcast to a vector with the same # elts. Value *ShufOp0 = Shuf->getOperand(0); Value *ShufOp1 = Shuf->getOperand(1); - auto ShufElts = cast<VectorType>(Shuf->getType())->getElementCount(); - auto SrcVecElts = cast<VectorType>(ShufOp0->getType())->getElementCount(); + auto ShufElts = cast<VectorType>(Shuf->getType())->getElementCount(); + auto SrcVecElts = cast<VectorType>(ShufOp0->getType())->getElementCount(); if (Shuf->hasOneUse() && DestTy->isVectorTy() && - cast<VectorType>(DestTy)->getElementCount() == ShufElts && - ShufElts == SrcVecElts) { + cast<VectorType>(DestTy)->getElementCount() == ShufElts && + ShufElts == SrcVecElts) { BitCastInst *Tmp; // If either of the operands is a cast from CI.getType(), then // evaluating the shuffle in the casted destination's type will allow @@ -2683,9 +2683,9 @@ Instruction *InstCombinerImpl::visitBitCast(BitCastInst &CI) { // TODO: We should match the related pattern for bitreverse. if (DestTy->isIntegerTy() && DL.isLegalInteger(DestTy->getScalarSizeInBits()) && - SrcTy->getScalarSizeInBits() == 8 && - ShufElts.getKnownMinValue() % 2 == 0 && Shuf->hasOneUse() && - Shuf->isReverse()) { + SrcTy->getScalarSizeInBits() == 8 && + ShufElts.getKnownMinValue() % 2 == 0 && Shuf->hasOneUse() && + Shuf->isReverse()) { assert(ShufOp0->getType() == SrcTy && "Unexpected shuffle mask"); assert(isa<UndefValue>(ShufOp1) && "Unexpected shuffle op"); Function *Bswap = @@ -2714,7 +2714,7 @@ Instruction *InstCombinerImpl::visitBitCast(BitCastInst &CI) { return commonCastTransforms(CI); } -Instruction *InstCombinerImpl::visitAddrSpaceCast(AddrSpaceCastInst &CI) { +Instruction *InstCombinerImpl::visitAddrSpaceCast(AddrSpaceCastInst &CI) { // If the destination pointer element type is not the same as the source's // first do a bitcast to the destination type, and then the addrspacecast. // This allows the cast to be exposed to other transforms. @@ -2725,9 +2725,9 @@ Instruction *InstCombinerImpl::visitAddrSpaceCast(AddrSpaceCastInst &CI) { Type *DestElemTy = DestTy->getElementType(); if (SrcTy->getElementType() != DestElemTy) { Type *MidTy = PointerType::get(DestElemTy, SrcTy->getAddressSpace()); - // Handle vectors of pointers. - if (VectorType *VT = dyn_cast<VectorType>(CI.getType())) - MidTy = VectorType::get(MidTy, VT->getElementCount()); + // Handle vectors of pointers. + if (VectorType *VT = dyn_cast<VectorType>(CI.getType())) + MidTy = VectorType::get(MidTy, VT->getElementCount()); Value *NewBitCast = Builder.CreateBitCast(Src, MidTy); return new AddrSpaceCastInst(NewBitCast, CI.getType()); diff --git a/contrib/libs/llvm12/lib/Transforms/InstCombine/InstCombineCompares.cpp b/contrib/libs/llvm12/lib/Transforms/InstCombine/InstCombineCompares.cpp index cd9a036179..50bdd66103 100644 --- a/contrib/libs/llvm12/lib/Transforms/InstCombine/InstCombineCompares.cpp +++ b/contrib/libs/llvm12/lib/Transforms/InstCombine/InstCombineCompares.cpp @@ -24,7 +24,7 @@ #include "llvm/IR/PatternMatch.h" #include "llvm/Support/Debug.h" #include "llvm/Support/KnownBits.h" -#include "llvm/Transforms/InstCombine/InstCombiner.h" +#include "llvm/Transforms/InstCombine/InstCombiner.h" using namespace llvm; using namespace PatternMatch; @@ -104,10 +104,10 @@ static bool isSignTest(ICmpInst::Predicate &Pred, const APInt &C) { /// /// If AndCst is non-null, then the loaded value is masked with that constant /// before doing the comparison. This handles cases like "A[i]&4 == 0". -Instruction * -InstCombinerImpl::foldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP, - GlobalVariable *GV, CmpInst &ICI, - ConstantInt *AndCst) { +Instruction * +InstCombinerImpl::foldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP, + GlobalVariable *GV, CmpInst &ICI, + ConstantInt *AndCst) { Constant *Init = GV->getInitializer(); if (!isa<ConstantArray>(Init) && !isa<ConstantDataArray>(Init)) return nullptr; @@ -275,7 +275,7 @@ InstCombinerImpl::foldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP, if (!GEP->isInBounds()) { Type *IntPtrTy = DL.getIntPtrType(GEP->getType()); unsigned PtrSize = IntPtrTy->getIntegerBitWidth(); - if (Idx->getType()->getPrimitiveSizeInBits().getFixedSize() > PtrSize) + if (Idx->getType()->getPrimitiveSizeInBits().getFixedSize() > PtrSize) Idx = Builder.CreateTrunc(Idx, IntPtrTy); } @@ -384,7 +384,7 @@ InstCombinerImpl::foldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP, /// /// If we can't emit an optimized form for this expression, this returns null. /// -static Value *evaluateGEPOffsetExpression(User *GEP, InstCombinerImpl &IC, +static Value *evaluateGEPOffsetExpression(User *GEP, InstCombinerImpl &IC, const DataLayout &DL) { gep_type_iterator GTI = gep_type_begin(GEP); @@ -448,8 +448,8 @@ static Value *evaluateGEPOffsetExpression(User *GEP, InstCombinerImpl &IC, // Cast to intptrty in case a truncation occurs. If an extension is needed, // we don't need to bother extending: the extension won't affect where the // computation crosses zero. - if (VariableIdx->getType()->getPrimitiveSizeInBits().getFixedSize() > - IntPtrWidth) { + if (VariableIdx->getType()->getPrimitiveSizeInBits().getFixedSize() > + IntPtrWidth) { VariableIdx = IC.Builder.CreateTrunc(VariableIdx, IntPtrTy); } return VariableIdx; @@ -502,7 +502,7 @@ static bool canRewriteGEPAsOffset(Value *Start, Value *Base, Value *V = WorkList.back(); - if (Explored.contains(V)) { + if (Explored.contains(V)) { WorkList.pop_back(); continue; } @@ -514,7 +514,7 @@ static bool canRewriteGEPAsOffset(Value *Start, Value *Base, return false; if (isa<IntToPtrInst>(V) || isa<PtrToIntInst>(V)) { - auto *CI = cast<CastInst>(V); + auto *CI = cast<CastInst>(V); if (!CI->isNoopCast(DL)) return false; @@ -804,9 +804,9 @@ static Instruction *transformToIndexedCompare(GEPOperator *GEPLHS, Value *RHS, /// Fold comparisons between a GEP instruction and something else. At this point /// we know that the GEP is on the LHS of the comparison. -Instruction *InstCombinerImpl::foldGEPICmp(GEPOperator *GEPLHS, Value *RHS, - ICmpInst::Predicate Cond, - Instruction &I) { +Instruction *InstCombinerImpl::foldGEPICmp(GEPOperator *GEPLHS, Value *RHS, + ICmpInst::Predicate Cond, + Instruction &I) { // Don't transform signed compares of GEPs into index compares. Even if the // GEP is inbounds, the final add of the base pointer can have signed overflow // and would change the result of the icmp. @@ -860,8 +860,8 @@ Instruction *InstCombinerImpl::foldGEPICmp(GEPOperator *GEPLHS, Value *RHS, // For vectors, we apply the same reasoning on a per-lane basis. auto *Base = GEPLHS->getPointerOperand(); if (GEPLHS->getType()->isVectorTy() && Base->getType()->isPointerTy()) { - auto EC = cast<VectorType>(GEPLHS->getType())->getElementCount(); - Base = Builder.CreateVectorSplat(EC, Base); + auto EC = cast<VectorType>(GEPLHS->getType())->getElementCount(); + Base = Builder.CreateVectorSplat(EC, Base); } return new ICmpInst(Cond, Base, ConstantExpr::getPointerBitCastOrAddrSpaceCast( @@ -904,8 +904,8 @@ Instruction *InstCombinerImpl::foldGEPICmp(GEPOperator *GEPLHS, Value *RHS, Type *LHSIndexTy = LOffset->getType(); Type *RHSIndexTy = ROffset->getType(); if (LHSIndexTy != RHSIndexTy) { - if (LHSIndexTy->getPrimitiveSizeInBits().getFixedSize() < - RHSIndexTy->getPrimitiveSizeInBits().getFixedSize()) { + if (LHSIndexTy->getPrimitiveSizeInBits().getFixedSize() < + RHSIndexTy->getPrimitiveSizeInBits().getFixedSize()) { ROffset = Builder.CreateTrunc(ROffset, LHSIndexTy); } else LOffset = Builder.CreateTrunc(LOffset, RHSIndexTy); @@ -984,9 +984,9 @@ Instruction *InstCombinerImpl::foldGEPICmp(GEPOperator *GEPLHS, Value *RHS, return transformToIndexedCompare(GEPLHS, RHS, Cond, DL); } -Instruction *InstCombinerImpl::foldAllocaCmp(ICmpInst &ICI, - const AllocaInst *Alloca, - const Value *Other) { +Instruction *InstCombinerImpl::foldAllocaCmp(ICmpInst &ICI, + const AllocaInst *Alloca, + const Value *Other) { assert(ICI.isEquality() && "Cannot fold non-equality comparison."); // It would be tempting to fold away comparisons between allocas and any @@ -1062,8 +1062,8 @@ Instruction *InstCombinerImpl::foldAllocaCmp(ICmpInst &ICI, } /// Fold "icmp pred (X+C), X". -Instruction *InstCombinerImpl::foldICmpAddOpConst(Value *X, const APInt &C, - ICmpInst::Predicate Pred) { +Instruction *InstCombinerImpl::foldICmpAddOpConst(Value *X, const APInt &C, + ICmpInst::Predicate Pred) { // From this point on, we know that (X+C <= X) --> (X+C < X) because C != 0, // so the values can never be equal. Similarly for all other "or equals" // operators. @@ -1112,9 +1112,9 @@ Instruction *InstCombinerImpl::foldICmpAddOpConst(Value *X, const APInt &C, /// Handle "(icmp eq/ne (ashr/lshr AP2, A), AP1)" -> /// (icmp eq/ne A, Log2(AP2/AP1)) -> /// (icmp eq/ne A, Log2(AP2) - Log2(AP1)). -Instruction *InstCombinerImpl::foldICmpShrConstConst(ICmpInst &I, Value *A, - const APInt &AP1, - const APInt &AP2) { +Instruction *InstCombinerImpl::foldICmpShrConstConst(ICmpInst &I, Value *A, + const APInt &AP1, + const APInt &AP2) { assert(I.isEquality() && "Cannot fold icmp gt/lt"); auto getICmp = [&I](CmpInst::Predicate Pred, Value *LHS, Value *RHS) { @@ -1171,9 +1171,9 @@ Instruction *InstCombinerImpl::foldICmpShrConstConst(ICmpInst &I, Value *A, /// Handle "(icmp eq/ne (shl AP2, A), AP1)" -> /// (icmp eq/ne A, TrailingZeros(AP1) - TrailingZeros(AP2)). -Instruction *InstCombinerImpl::foldICmpShlConstConst(ICmpInst &I, Value *A, - const APInt &AP1, - const APInt &AP2) { +Instruction *InstCombinerImpl::foldICmpShlConstConst(ICmpInst &I, Value *A, + const APInt &AP1, + const APInt &AP2) { assert(I.isEquality() && "Cannot fold icmp gt/lt"); auto getICmp = [&I](CmpInst::Predicate Pred, Value *LHS, Value *RHS) { @@ -1217,7 +1217,7 @@ Instruction *InstCombinerImpl::foldICmpShlConstConst(ICmpInst &I, Value *A, /// static Instruction *processUGT_ADDCST_ADD(ICmpInst &I, Value *A, Value *B, ConstantInt *CI2, ConstantInt *CI1, - InstCombinerImpl &IC) { + InstCombinerImpl &IC) { // The transformation we're trying to do here is to transform this into an // llvm.sadd.with.overflow. To do this, we have to replace the original add // with a narrower add, and discard the add-with-constant that is part of the @@ -1303,7 +1303,7 @@ static Instruction *processUGT_ADDCST_ADD(ICmpInst &I, Value *A, Value *B, /// icmp eq/ne (urem/srem %x, %y), 0 /// iff %y is a power-of-two, we can replace this with a bit test: /// icmp eq/ne (and %x, (add %y, -1)), 0 -Instruction *InstCombinerImpl::foldIRemByPowerOfTwoToBitTest(ICmpInst &I) { +Instruction *InstCombinerImpl::foldIRemByPowerOfTwoToBitTest(ICmpInst &I) { // This fold is only valid for equality predicates. if (!I.isEquality()) return nullptr; @@ -1322,7 +1322,7 @@ Instruction *InstCombinerImpl::foldIRemByPowerOfTwoToBitTest(ICmpInst &I) { /// Fold equality-comparison between zero and any (maybe truncated) right-shift /// by one-less-than-bitwidth into a sign test on the original value. -Instruction *InstCombinerImpl::foldSignBitTest(ICmpInst &I) { +Instruction *InstCombinerImpl::foldSignBitTest(ICmpInst &I) { Instruction *Val; ICmpInst::Predicate Pred; if (!I.isEquality() || !match(&I, m_ICmp(Pred, m_Instruction(Val), m_Zero()))) @@ -1353,7 +1353,7 @@ Instruction *InstCombinerImpl::foldSignBitTest(ICmpInst &I) { } // Handle icmp pred X, 0 -Instruction *InstCombinerImpl::foldICmpWithZero(ICmpInst &Cmp) { +Instruction *InstCombinerImpl::foldICmpWithZero(ICmpInst &Cmp) { CmpInst::Predicate Pred = Cmp.getPredicate(); if (!match(Cmp.getOperand(1), m_Zero())) return nullptr; @@ -1394,7 +1394,7 @@ Instruction *InstCombinerImpl::foldICmpWithZero(ICmpInst &Cmp) { /// should be moved to some other helper and extended as noted below (it is also /// possible that code has been made unnecessary - do we canonicalize IR to /// overflow/saturating intrinsics or not?). -Instruction *InstCombinerImpl::foldICmpWithConstant(ICmpInst &Cmp) { +Instruction *InstCombinerImpl::foldICmpWithConstant(ICmpInst &Cmp) { // Match the following pattern, which is a common idiom when writing // overflow-safe integer arithmetic functions. The source performs an addition // in wider type and explicitly checks for overflow using comparisons against @@ -1440,7 +1440,7 @@ Instruction *InstCombinerImpl::foldICmpWithConstant(ICmpInst &Cmp) { } /// Canonicalize icmp instructions based on dominating conditions. -Instruction *InstCombinerImpl::foldICmpWithDominatingICmp(ICmpInst &Cmp) { +Instruction *InstCombinerImpl::foldICmpWithDominatingICmp(ICmpInst &Cmp) { // This is a cheap/incomplete check for dominance - just match a single // predecessor with a conditional branch. BasicBlock *CmpBB = Cmp.getParent(); @@ -1510,9 +1510,9 @@ Instruction *InstCombinerImpl::foldICmpWithDominatingICmp(ICmpInst &Cmp) { } /// Fold icmp (trunc X, Y), C. -Instruction *InstCombinerImpl::foldICmpTruncConstant(ICmpInst &Cmp, - TruncInst *Trunc, - const APInt &C) { +Instruction *InstCombinerImpl::foldICmpTruncConstant(ICmpInst &Cmp, + TruncInst *Trunc, + const APInt &C) { ICmpInst::Predicate Pred = Cmp.getPredicate(); Value *X = Trunc->getOperand(0); if (C.isOneValue() && C.getBitWidth() > 1) { @@ -1543,9 +1543,9 @@ Instruction *InstCombinerImpl::foldICmpTruncConstant(ICmpInst &Cmp, } /// Fold icmp (xor X, Y), C. -Instruction *InstCombinerImpl::foldICmpXorConstant(ICmpInst &Cmp, - BinaryOperator *Xor, - const APInt &C) { +Instruction *InstCombinerImpl::foldICmpXorConstant(ICmpInst &Cmp, + BinaryOperator *Xor, + const APInt &C) { Value *X = Xor->getOperand(0); Value *Y = Xor->getOperand(1); const APInt *XorC; @@ -1575,13 +1575,13 @@ Instruction *InstCombinerImpl::foldICmpXorConstant(ICmpInst &Cmp, if (Xor->hasOneUse()) { // (icmp u/s (xor X SignMask), C) -> (icmp s/u X, (xor C SignMask)) if (!Cmp.isEquality() && XorC->isSignMask()) { - Pred = Cmp.getFlippedSignednessPredicate(); + Pred = Cmp.getFlippedSignednessPredicate(); return new ICmpInst(Pred, X, ConstantInt::get(X->getType(), C ^ *XorC)); } // (icmp u/s (xor X ~SignMask), C) -> (icmp s/u X, (xor C ~SignMask)) if (!Cmp.isEquality() && XorC->isMaxSignedValue()) { - Pred = Cmp.getFlippedSignednessPredicate(); + Pred = Cmp.getFlippedSignednessPredicate(); Pred = Cmp.getSwappedPredicate(Pred); return new ICmpInst(Pred, X, ConstantInt::get(X->getType(), C ^ *XorC)); } @@ -1610,10 +1610,10 @@ Instruction *InstCombinerImpl::foldICmpXorConstant(ICmpInst &Cmp, } /// Fold icmp (and (sh X, Y), C2), C1. -Instruction *InstCombinerImpl::foldICmpAndShift(ICmpInst &Cmp, - BinaryOperator *And, - const APInt &C1, - const APInt &C2) { +Instruction *InstCombinerImpl::foldICmpAndShift(ICmpInst &Cmp, + BinaryOperator *And, + const APInt &C1, + const APInt &C2) { BinaryOperator *Shift = dyn_cast<BinaryOperator>(And->getOperand(0)); if (!Shift || !Shift->isShift()) return nullptr; @@ -1696,9 +1696,9 @@ Instruction *InstCombinerImpl::foldICmpAndShift(ICmpInst &Cmp, } /// Fold icmp (and X, C2), C1. -Instruction *InstCombinerImpl::foldICmpAndConstConst(ICmpInst &Cmp, - BinaryOperator *And, - const APInt &C1) { +Instruction *InstCombinerImpl::foldICmpAndConstConst(ICmpInst &Cmp, + BinaryOperator *And, + const APInt &C1) { bool isICMP_NE = Cmp.getPredicate() == ICmpInst::ICMP_NE; // For vectors: icmp ne (and X, 1), 0 --> trunc X to N x i1 @@ -1804,9 +1804,9 @@ Instruction *InstCombinerImpl::foldICmpAndConstConst(ICmpInst &Cmp, } /// Fold icmp (and X, Y), C. -Instruction *InstCombinerImpl::foldICmpAndConstant(ICmpInst &Cmp, - BinaryOperator *And, - const APInt &C) { +Instruction *InstCombinerImpl::foldICmpAndConstant(ICmpInst &Cmp, + BinaryOperator *And, + const APInt &C) { if (Instruction *I = foldICmpAndConstConst(Cmp, And, C)) return I; @@ -1846,7 +1846,7 @@ Instruction *InstCombinerImpl::foldICmpAndConstant(ICmpInst &Cmp, if (ExactLogBase2 != -1 && DL.isLegalInteger(ExactLogBase2 + 1)) { Type *NTy = IntegerType::get(Cmp.getContext(), ExactLogBase2 + 1); if (auto *AndVTy = dyn_cast<VectorType>(And->getType())) - NTy = VectorType::get(NTy, AndVTy->getElementCount()); + NTy = VectorType::get(NTy, AndVTy->getElementCount()); Value *Trunc = Builder.CreateTrunc(X, NTy); auto NewPred = Cmp.getPredicate() == CmpInst::ICMP_EQ ? CmpInst::ICMP_SGE : CmpInst::ICMP_SLT; @@ -1858,9 +1858,9 @@ Instruction *InstCombinerImpl::foldICmpAndConstant(ICmpInst &Cmp, } /// Fold icmp (or X, Y), C. -Instruction *InstCombinerImpl::foldICmpOrConstant(ICmpInst &Cmp, - BinaryOperator *Or, - const APInt &C) { +Instruction *InstCombinerImpl::foldICmpOrConstant(ICmpInst &Cmp, + BinaryOperator *Or, + const APInt &C) { ICmpInst::Predicate Pred = Cmp.getPredicate(); if (C.isOneValue()) { // icmp slt signum(V) 1 --> icmp slt V, 1 @@ -1924,9 +1924,9 @@ Instruction *InstCombinerImpl::foldICmpOrConstant(ICmpInst &Cmp, } /// Fold icmp (mul X, Y), C. -Instruction *InstCombinerImpl::foldICmpMulConstant(ICmpInst &Cmp, - BinaryOperator *Mul, - const APInt &C) { +Instruction *InstCombinerImpl::foldICmpMulConstant(ICmpInst &Cmp, + BinaryOperator *Mul, + const APInt &C) { const APInt *MulC; if (!match(Mul->getOperand(1), m_APInt(MulC))) return nullptr; @@ -1941,21 +1941,21 @@ Instruction *InstCombinerImpl::foldICmpMulConstant(ICmpInst &Cmp, Constant::getNullValue(Mul->getType())); } - // If the multiply does not wrap, try to divide the compare constant by the - // multiplication factor. - if (Cmp.isEquality() && !MulC->isNullValue()) { - // (mul nsw X, MulC) == C --> X == C /s MulC - if (Mul->hasNoSignedWrap() && C.srem(*MulC).isNullValue()) { - Constant *NewC = ConstantInt::get(Mul->getType(), C.sdiv(*MulC)); - return new ICmpInst(Pred, Mul->getOperand(0), NewC); - } - // (mul nuw X, MulC) == C --> X == C /u MulC - if (Mul->hasNoUnsignedWrap() && C.urem(*MulC).isNullValue()) { - Constant *NewC = ConstantInt::get(Mul->getType(), C.udiv(*MulC)); - return new ICmpInst(Pred, Mul->getOperand(0), NewC); - } - } - + // If the multiply does not wrap, try to divide the compare constant by the + // multiplication factor. + if (Cmp.isEquality() && !MulC->isNullValue()) { + // (mul nsw X, MulC) == C --> X == C /s MulC + if (Mul->hasNoSignedWrap() && C.srem(*MulC).isNullValue()) { + Constant *NewC = ConstantInt::get(Mul->getType(), C.sdiv(*MulC)); + return new ICmpInst(Pred, Mul->getOperand(0), NewC); + } + // (mul nuw X, MulC) == C --> X == C /u MulC + if (Mul->hasNoUnsignedWrap() && C.urem(*MulC).isNullValue()) { + Constant *NewC = ConstantInt::get(Mul->getType(), C.udiv(*MulC)); + return new ICmpInst(Pred, Mul->getOperand(0), NewC); + } + } + return nullptr; } @@ -2022,9 +2022,9 @@ static Instruction *foldICmpShlOne(ICmpInst &Cmp, Instruction *Shl, } /// Fold icmp (shl X, Y), C. -Instruction *InstCombinerImpl::foldICmpShlConstant(ICmpInst &Cmp, - BinaryOperator *Shl, - const APInt &C) { +Instruction *InstCombinerImpl::foldICmpShlConstant(ICmpInst &Cmp, + BinaryOperator *Shl, + const APInt &C) { const APInt *ShiftVal; if (Cmp.isEquality() && match(Shl->getOperand(0), m_APInt(ShiftVal))) return foldICmpShlConstConst(Cmp, Shl->getOperand(1), C, *ShiftVal); @@ -2152,7 +2152,7 @@ Instruction *InstCombinerImpl::foldICmpShlConstant(ICmpInst &Cmp, DL.isLegalInteger(TypeBits - Amt)) { Type *TruncTy = IntegerType::get(Cmp.getContext(), TypeBits - Amt); if (auto *ShVTy = dyn_cast<VectorType>(ShType)) - TruncTy = VectorType::get(TruncTy, ShVTy->getElementCount()); + TruncTy = VectorType::get(TruncTy, ShVTy->getElementCount()); Constant *NewC = ConstantInt::get(TruncTy, C.ashr(*ShiftAmt).trunc(TypeBits - Amt)); return new ICmpInst(Pred, Builder.CreateTrunc(X, TruncTy), NewC); @@ -2162,9 +2162,9 @@ Instruction *InstCombinerImpl::foldICmpShlConstant(ICmpInst &Cmp, } /// Fold icmp ({al}shr X, Y), C. -Instruction *InstCombinerImpl::foldICmpShrConstant(ICmpInst &Cmp, - BinaryOperator *Shr, - const APInt &C) { +Instruction *InstCombinerImpl::foldICmpShrConstant(ICmpInst &Cmp, + BinaryOperator *Shr, + const APInt &C) { // An exact shr only shifts out zero bits, so: // icmp eq/ne (shr X, Y), 0 --> icmp eq/ne X, 0 Value *X = Shr->getOperand(0); @@ -2210,21 +2210,21 @@ Instruction *InstCombinerImpl::foldICmpShrConstant(ICmpInst &Cmp, (ShiftedC + 1).ashr(ShAmtVal) == (C + 1)) return new ICmpInst(Pred, X, ConstantInt::get(ShrTy, ShiftedC)); } - - // If the compare constant has significant bits above the lowest sign-bit, - // then convert an unsigned cmp to a test of the sign-bit: - // (ashr X, ShiftC) u> C --> X s< 0 - // (ashr X, ShiftC) u< C --> X s> -1 - if (C.getBitWidth() > 2 && C.getNumSignBits() <= ShAmtVal) { - if (Pred == CmpInst::ICMP_UGT) { - return new ICmpInst(CmpInst::ICMP_SLT, X, - ConstantInt::getNullValue(ShrTy)); - } - if (Pred == CmpInst::ICMP_ULT) { - return new ICmpInst(CmpInst::ICMP_SGT, X, - ConstantInt::getAllOnesValue(ShrTy)); - } - } + + // If the compare constant has significant bits above the lowest sign-bit, + // then convert an unsigned cmp to a test of the sign-bit: + // (ashr X, ShiftC) u> C --> X s< 0 + // (ashr X, ShiftC) u< C --> X s> -1 + if (C.getBitWidth() > 2 && C.getNumSignBits() <= ShAmtVal) { + if (Pred == CmpInst::ICMP_UGT) { + return new ICmpInst(CmpInst::ICMP_SLT, X, + ConstantInt::getNullValue(ShrTy)); + } + if (Pred == CmpInst::ICMP_ULT) { + return new ICmpInst(CmpInst::ICMP_SGT, X, + ConstantInt::getAllOnesValue(ShrTy)); + } + } } else { if (Pred == CmpInst::ICMP_ULT || (Pred == CmpInst::ICMP_UGT && IsExact)) { // icmp ult (lshr X, ShAmtC), C --> icmp ult X, (C << ShAmtC) @@ -2270,9 +2270,9 @@ Instruction *InstCombinerImpl::foldICmpShrConstant(ICmpInst &Cmp, return nullptr; } -Instruction *InstCombinerImpl::foldICmpSRemConstant(ICmpInst &Cmp, - BinaryOperator *SRem, - const APInt &C) { +Instruction *InstCombinerImpl::foldICmpSRemConstant(ICmpInst &Cmp, + BinaryOperator *SRem, + const APInt &C) { // Match an 'is positive' or 'is negative' comparison of remainder by a // constant power-of-2 value: // (X % pow2C) sgt/slt 0 @@ -2309,9 +2309,9 @@ Instruction *InstCombinerImpl::foldICmpSRemConstant(ICmpInst &Cmp, } /// Fold icmp (udiv X, Y), C. -Instruction *InstCombinerImpl::foldICmpUDivConstant(ICmpInst &Cmp, - BinaryOperator *UDiv, - const APInt &C) { +Instruction *InstCombinerImpl::foldICmpUDivConstant(ICmpInst &Cmp, + BinaryOperator *UDiv, + const APInt &C) { const APInt *C2; if (!match(UDiv->getOperand(0), m_APInt(C2))) return nullptr; @@ -2338,9 +2338,9 @@ Instruction *InstCombinerImpl::foldICmpUDivConstant(ICmpInst &Cmp, } /// Fold icmp ({su}div X, Y), C. -Instruction *InstCombinerImpl::foldICmpDivConstant(ICmpInst &Cmp, - BinaryOperator *Div, - const APInt &C) { +Instruction *InstCombinerImpl::foldICmpDivConstant(ICmpInst &Cmp, + BinaryOperator *Div, + const APInt &C) { // Fold: icmp pred ([us]div X, C2), C -> range test // Fold this div into the comparison, producing a range check. // Determine, based on the divide type, what the range is being @@ -2508,9 +2508,9 @@ Instruction *InstCombinerImpl::foldICmpDivConstant(ICmpInst &Cmp, } /// Fold icmp (sub X, Y), C. -Instruction *InstCombinerImpl::foldICmpSubConstant(ICmpInst &Cmp, - BinaryOperator *Sub, - const APInt &C) { +Instruction *InstCombinerImpl::foldICmpSubConstant(ICmpInst &Cmp, + BinaryOperator *Sub, + const APInt &C) { Value *X = Sub->getOperand(0), *Y = Sub->getOperand(1); ICmpInst::Predicate Pred = Cmp.getPredicate(); const APInt *C2; @@ -2570,9 +2570,9 @@ Instruction *InstCombinerImpl::foldICmpSubConstant(ICmpInst &Cmp, } /// Fold icmp (add X, Y), C. -Instruction *InstCombinerImpl::foldICmpAddConstant(ICmpInst &Cmp, - BinaryOperator *Add, - const APInt &C) { +Instruction *InstCombinerImpl::foldICmpAddConstant(ICmpInst &Cmp, + BinaryOperator *Add, + const APInt &C) { Value *Y = Add->getOperand(1); const APInt *C2; if (Cmp.isEquality() || !match(Y, m_APInt(C2))) @@ -2636,10 +2636,10 @@ Instruction *InstCombinerImpl::foldICmpAddConstant(ICmpInst &Cmp, return nullptr; } -bool InstCombinerImpl::matchThreeWayIntCompare(SelectInst *SI, Value *&LHS, - Value *&RHS, ConstantInt *&Less, - ConstantInt *&Equal, - ConstantInt *&Greater) { +bool InstCombinerImpl::matchThreeWayIntCompare(SelectInst *SI, Value *&LHS, + Value *&RHS, ConstantInt *&Less, + ConstantInt *&Equal, + ConstantInt *&Greater) { // TODO: Generalize this to work with other comparison idioms or ensure // they get canonicalized into this form. @@ -2676,8 +2676,8 @@ bool InstCombinerImpl::matchThreeWayIntCompare(SelectInst *SI, Value *&LHS, if (PredB == ICmpInst::ICMP_SGT && isa<Constant>(RHS2)) { // x sgt C-1 <--> x sge C <--> not(x slt C) auto FlippedStrictness = - InstCombiner::getFlippedStrictnessPredicateAndConstant( - PredB, cast<Constant>(RHS2)); + InstCombiner::getFlippedStrictnessPredicateAndConstant( + PredB, cast<Constant>(RHS2)); if (!FlippedStrictness) return false; assert(FlippedStrictness->first == ICmpInst::ICMP_SGE && "Sanity check"); @@ -2689,9 +2689,9 @@ bool InstCombinerImpl::matchThreeWayIntCompare(SelectInst *SI, Value *&LHS, return PredB == ICmpInst::ICMP_SLT && RHS == RHS2; } -Instruction *InstCombinerImpl::foldICmpSelectConstant(ICmpInst &Cmp, - SelectInst *Select, - ConstantInt *C) { +Instruction *InstCombinerImpl::foldICmpSelectConstant(ICmpInst &Cmp, + SelectInst *Select, + ConstantInt *C) { assert(C && "Cmp RHS should be a constant int!"); // If we're testing a constant value against the result of a three way @@ -2789,7 +2789,7 @@ static Instruction *foldICmpBitCast(ICmpInst &Cmp, const APInt *C; bool TrueIfSigned; if (match(Op1, m_APInt(C)) && Bitcast->hasOneUse() && - InstCombiner::isSignBitCheck(Pred, *C, TrueIfSigned)) { + InstCombiner::isSignBitCheck(Pred, *C, TrueIfSigned)) { if (match(BCSrcOp, m_FPExt(m_Value(X))) || match(BCSrcOp, m_FPTrunc(m_Value(X)))) { // (bitcast (fpext/fptrunc X)) to iX) < 0 --> (bitcast X to iY) < 0 @@ -2801,7 +2801,7 @@ static Instruction *foldICmpBitCast(ICmpInst &Cmp, Type *NewType = Builder.getIntNTy(XType->getScalarSizeInBits()); if (auto *XVTy = dyn_cast<VectorType>(XType)) - NewType = VectorType::get(NewType, XVTy->getElementCount()); + NewType = VectorType::get(NewType, XVTy->getElementCount()); Value *NewBitcast = Builder.CreateBitCast(X, NewType); if (TrueIfSigned) return new ICmpInst(ICmpInst::ICMP_SLT, NewBitcast, @@ -2865,7 +2865,7 @@ static Instruction *foldICmpBitCast(ICmpInst &Cmp, /// Try to fold integer comparisons with a constant operand: icmp Pred X, C /// where X is some kind of instruction. -Instruction *InstCombinerImpl::foldICmpInstWithConstant(ICmpInst &Cmp) { +Instruction *InstCombinerImpl::foldICmpInstWithConstant(ICmpInst &Cmp) { const APInt *C; if (!match(Cmp.getOperand(1), m_APInt(C))) return nullptr; @@ -2950,8 +2950,8 @@ Instruction *InstCombinerImpl::foldICmpInstWithConstant(ICmpInst &Cmp) { /// Fold an icmp equality instruction with binary operator LHS and constant RHS: /// icmp eq/ne BO, C. -Instruction *InstCombinerImpl::foldICmpBinOpEqualityWithConstant( - ICmpInst &Cmp, BinaryOperator *BO, const APInt &C) { +Instruction *InstCombinerImpl::foldICmpBinOpEqualityWithConstant( + ICmpInst &Cmp, BinaryOperator *BO, const APInt &C) { // TODO: Some of these folds could work with arbitrary constants, but this // function is limited to scalar and vector splat constants. if (!Cmp.isEquality()) @@ -3055,19 +3055,19 @@ Instruction *InstCombinerImpl::foldICmpBinOpEqualityWithConstant( } /// Fold an equality icmp with LLVM intrinsic and constant operand. -Instruction *InstCombinerImpl::foldICmpEqIntrinsicWithConstant( - ICmpInst &Cmp, IntrinsicInst *II, const APInt &C) { +Instruction *InstCombinerImpl::foldICmpEqIntrinsicWithConstant( + ICmpInst &Cmp, IntrinsicInst *II, const APInt &C) { Type *Ty = II->getType(); unsigned BitWidth = C.getBitWidth(); switch (II->getIntrinsicID()) { - case Intrinsic::abs: - // abs(A) == 0 -> A == 0 - // abs(A) == INT_MIN -> A == INT_MIN - if (C.isNullValue() || C.isMinSignedValue()) - return new ICmpInst(Cmp.getPredicate(), II->getArgOperand(0), - ConstantInt::get(Ty, C)); - break; - + case Intrinsic::abs: + // abs(A) == 0 -> A == 0 + // abs(A) == INT_MIN -> A == INT_MIN + if (C.isNullValue() || C.isMinSignedValue()) + return new ICmpInst(Cmp.getPredicate(), II->getArgOperand(0), + ConstantInt::get(Ty, C)); + break; + case Intrinsic::bswap: // bswap(A) == C -> A == bswap(C) return new ICmpInst(Cmp.getPredicate(), II->getArgOperand(0), @@ -3135,31 +3135,31 @@ Instruction *InstCombinerImpl::foldICmpEqIntrinsicWithConstant( } /// Fold an icmp with LLVM intrinsic and constant operand: icmp Pred II, C. -Instruction *InstCombinerImpl::foldICmpIntrinsicWithConstant(ICmpInst &Cmp, - IntrinsicInst *II, - const APInt &C) { +Instruction *InstCombinerImpl::foldICmpIntrinsicWithConstant(ICmpInst &Cmp, + IntrinsicInst *II, + const APInt &C) { if (Cmp.isEquality()) return foldICmpEqIntrinsicWithConstant(Cmp, II, C); Type *Ty = II->getType(); unsigned BitWidth = C.getBitWidth(); - ICmpInst::Predicate Pred = Cmp.getPredicate(); + ICmpInst::Predicate Pred = Cmp.getPredicate(); switch (II->getIntrinsicID()) { - case Intrinsic::ctpop: { - // (ctpop X > BitWidth - 1) --> X == -1 - Value *X = II->getArgOperand(0); - if (C == BitWidth - 1 && Pred == ICmpInst::ICMP_UGT) - return CmpInst::Create(Instruction::ICmp, ICmpInst::ICMP_EQ, X, - ConstantInt::getAllOnesValue(Ty)); - // (ctpop X < BitWidth) --> X != -1 - if (C == BitWidth && Pred == ICmpInst::ICMP_ULT) - return CmpInst::Create(Instruction::ICmp, ICmpInst::ICMP_NE, X, - ConstantInt::getAllOnesValue(Ty)); - break; - } + case Intrinsic::ctpop: { + // (ctpop X > BitWidth - 1) --> X == -1 + Value *X = II->getArgOperand(0); + if (C == BitWidth - 1 && Pred == ICmpInst::ICMP_UGT) + return CmpInst::Create(Instruction::ICmp, ICmpInst::ICMP_EQ, X, + ConstantInt::getAllOnesValue(Ty)); + // (ctpop X < BitWidth) --> X != -1 + if (C == BitWidth && Pred == ICmpInst::ICMP_ULT) + return CmpInst::Create(Instruction::ICmp, ICmpInst::ICMP_NE, X, + ConstantInt::getAllOnesValue(Ty)); + break; + } case Intrinsic::ctlz: { // ctlz(0bXXXXXXXX) > 3 -> 0bXXXXXXXX < 0b00010000 - if (Pred == ICmpInst::ICMP_UGT && C.ult(BitWidth)) { + if (Pred == ICmpInst::ICMP_UGT && C.ult(BitWidth)) { unsigned Num = C.getLimitedValue(); APInt Limit = APInt::getOneBitSet(BitWidth, BitWidth - Num - 1); return CmpInst::Create(Instruction::ICmp, ICmpInst::ICMP_ULT, @@ -3167,7 +3167,7 @@ Instruction *InstCombinerImpl::foldICmpIntrinsicWithConstant(ICmpInst &Cmp, } // ctlz(0bXXXXXXXX) < 3 -> 0bXXXXXXXX > 0b00011111 - if (Pred == ICmpInst::ICMP_ULT && C.uge(1) && C.ule(BitWidth)) { + if (Pred == ICmpInst::ICMP_ULT && C.uge(1) && C.ule(BitWidth)) { unsigned Num = C.getLimitedValue(); APInt Limit = APInt::getLowBitsSet(BitWidth, BitWidth - Num); return CmpInst::Create(Instruction::ICmp, ICmpInst::ICMP_UGT, @@ -3181,7 +3181,7 @@ Instruction *InstCombinerImpl::foldICmpIntrinsicWithConstant(ICmpInst &Cmp, return nullptr; // cttz(0bXXXXXXXX) > 3 -> 0bXXXXXXXX & 0b00001111 == 0 - if (Pred == ICmpInst::ICMP_UGT && C.ult(BitWidth)) { + if (Pred == ICmpInst::ICMP_UGT && C.ult(BitWidth)) { APInt Mask = APInt::getLowBitsSet(BitWidth, C.getLimitedValue() + 1); return CmpInst::Create(Instruction::ICmp, ICmpInst::ICMP_EQ, Builder.CreateAnd(II->getArgOperand(0), Mask), @@ -3189,7 +3189,7 @@ Instruction *InstCombinerImpl::foldICmpIntrinsicWithConstant(ICmpInst &Cmp, } // cttz(0bXXXXXXXX) < 3 -> 0bXXXXXXXX & 0b00000111 != 0 - if (Pred == ICmpInst::ICMP_ULT && C.uge(1) && C.ule(BitWidth)) { + if (Pred == ICmpInst::ICMP_ULT && C.uge(1) && C.ule(BitWidth)) { APInt Mask = APInt::getLowBitsSet(BitWidth, C.getLimitedValue()); return CmpInst::Create(Instruction::ICmp, ICmpInst::ICMP_NE, Builder.CreateAnd(II->getArgOperand(0), Mask), @@ -3205,7 +3205,7 @@ Instruction *InstCombinerImpl::foldICmpIntrinsicWithConstant(ICmpInst &Cmp, } /// Handle icmp with constant (but not simple integer constant) RHS. -Instruction *InstCombinerImpl::foldICmpInstWithConstantNotInt(ICmpInst &I) { +Instruction *InstCombinerImpl::foldICmpInstWithConstantNotInt(ICmpInst &I) { Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); Constant *RHSC = dyn_cast<Constant>(Op1); Instruction *LHSI = dyn_cast<Instruction>(Op0); @@ -3384,8 +3384,8 @@ static Value *foldICmpWithLowBitMaskedVal(ICmpInst &I, // those elements by copying an existing, defined, and safe scalar constant. Type *OpTy = M->getType(); auto *VecC = dyn_cast<Constant>(M); - auto *OpVTy = dyn_cast<FixedVectorType>(OpTy); - if (OpVTy && VecC && VecC->containsUndefOrPoisonElement()) { + auto *OpVTy = dyn_cast<FixedVectorType>(OpTy); + if (OpVTy && VecC && VecC->containsUndefOrPoisonElement()) { Constant *SafeReplacementConstant = nullptr; for (unsigned i = 0, e = OpVTy->getNumElements(); i != e; ++i) { if (!isa<UndefValue>(VecC->getAggregateElement(i))) { @@ -3651,7 +3651,7 @@ foldShiftIntoShiftInAnotherHandOfAndInICmp(ICmpInst &I, const SimplifyQuery SQ, /// @llvm.umul.with.overflow(x, y) plus extraction of overflow bit /// Note that the comparison is commutative, while inverted (u>=, ==) predicate /// will mean that we are looking for the opposite answer. -Value *InstCombinerImpl::foldUnsignedMultiplicationOverflowCheck(ICmpInst &I) { +Value *InstCombinerImpl::foldUnsignedMultiplicationOverflowCheck(ICmpInst &I) { ICmpInst::Predicate Pred; Value *X, *Y; Instruction *Mul; @@ -3713,28 +3713,28 @@ Value *InstCombinerImpl::foldUnsignedMultiplicationOverflowCheck(ICmpInst &I) { return Res; } -static Instruction *foldICmpXNegX(ICmpInst &I) { - CmpInst::Predicate Pred; - Value *X; - if (!match(&I, m_c_ICmp(Pred, m_NSWNeg(m_Value(X)), m_Deferred(X)))) - return nullptr; - - if (ICmpInst::isSigned(Pred)) - Pred = ICmpInst::getSwappedPredicate(Pred); - else if (ICmpInst::isUnsigned(Pred)) - Pred = ICmpInst::getSignedPredicate(Pred); - // else for equality-comparisons just keep the predicate. - - return ICmpInst::Create(Instruction::ICmp, Pred, X, - Constant::getNullValue(X->getType()), I.getName()); -} - +static Instruction *foldICmpXNegX(ICmpInst &I) { + CmpInst::Predicate Pred; + Value *X; + if (!match(&I, m_c_ICmp(Pred, m_NSWNeg(m_Value(X)), m_Deferred(X)))) + return nullptr; + + if (ICmpInst::isSigned(Pred)) + Pred = ICmpInst::getSwappedPredicate(Pred); + else if (ICmpInst::isUnsigned(Pred)) + Pred = ICmpInst::getSignedPredicate(Pred); + // else for equality-comparisons just keep the predicate. + + return ICmpInst::Create(Instruction::ICmp, Pred, X, + Constant::getNullValue(X->getType()), I.getName()); +} + /// Try to fold icmp (binop), X or icmp X, (binop). /// TODO: A large part of this logic is duplicated in InstSimplify's /// simplifyICmpWithBinOp(). We should be able to share that and avoid the code /// duplication. -Instruction *InstCombinerImpl::foldICmpBinOp(ICmpInst &I, - const SimplifyQuery &SQ) { +Instruction *InstCombinerImpl::foldICmpBinOp(ICmpInst &I, + const SimplifyQuery &SQ) { const SimplifyQuery Q = SQ.getWithInstruction(&I); Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); @@ -3744,9 +3744,9 @@ Instruction *InstCombinerImpl::foldICmpBinOp(ICmpInst &I, if (!BO0 && !BO1) return nullptr; - if (Instruction *NewICmp = foldICmpXNegX(I)) - return NewICmp; - + if (Instruction *NewICmp = foldICmpXNegX(I)) + return NewICmp; + const CmpInst::Predicate Pred = I.getPredicate(); Value *X; @@ -3967,19 +3967,19 @@ Instruction *InstCombinerImpl::foldICmpBinOp(ICmpInst &I, ConstantExpr::getNeg(RHSC)); } - { - // Try to remove shared constant multiplier from equality comparison: - // X * C == Y * C (with no overflowing/aliasing) --> X == Y - Value *X, *Y; - const APInt *C; - if (match(Op0, m_Mul(m_Value(X), m_APInt(C))) && *C != 0 && - match(Op1, m_Mul(m_Value(Y), m_SpecificInt(*C))) && I.isEquality()) - if (!C->countTrailingZeros() || - (BO0->hasNoSignedWrap() && BO1->hasNoSignedWrap()) || - (BO0->hasNoUnsignedWrap() && BO1->hasNoUnsignedWrap())) - return new ICmpInst(Pred, X, Y); - } - + { + // Try to remove shared constant multiplier from equality comparison: + // X * C == Y * C (with no overflowing/aliasing) --> X == Y + Value *X, *Y; + const APInt *C; + if (match(Op0, m_Mul(m_Value(X), m_APInt(C))) && *C != 0 && + match(Op1, m_Mul(m_Value(Y), m_SpecificInt(*C))) && I.isEquality()) + if (!C->countTrailingZeros() || + (BO0->hasNoSignedWrap() && BO1->hasNoSignedWrap()) || + (BO0->hasNoUnsignedWrap() && BO1->hasNoUnsignedWrap())) + return new ICmpInst(Pred, X, Y); + } + BinaryOperator *SRem = nullptr; // icmp (srem X, Y), Y if (BO0 && BO0->getOpcode() == Instruction::SRem && Op1 == BO0->getOperand(1)) @@ -4024,13 +4024,13 @@ Instruction *InstCombinerImpl::foldICmpBinOp(ICmpInst &I, if (match(BO0->getOperand(1), m_APInt(C))) { // icmp u/s (a ^ signmask), (b ^ signmask) --> icmp s/u a, b if (C->isSignMask()) { - ICmpInst::Predicate NewPred = I.getFlippedSignednessPredicate(); + ICmpInst::Predicate NewPred = I.getFlippedSignednessPredicate(); return new ICmpInst(NewPred, BO0->getOperand(0), BO1->getOperand(0)); } // icmp u/s (a ^ maxsignval), (b ^ maxsignval) --> icmp s/u' a, b if (BO0->getOpcode() == Instruction::Xor && C->isMaxSignedValue()) { - ICmpInst::Predicate NewPred = I.getFlippedSignednessPredicate(); + ICmpInst::Predicate NewPred = I.getFlippedSignednessPredicate(); NewPred = I.getSwappedPredicate(NewPred); return new ICmpInst(NewPred, BO0->getOperand(0), BO1->getOperand(0)); } @@ -4198,7 +4198,7 @@ static Instruction *foldICmpWithMinMax(ICmpInst &Cmp) { return nullptr; } -Instruction *InstCombinerImpl::foldICmpEquality(ICmpInst &I) { +Instruction *InstCombinerImpl::foldICmpEquality(ICmpInst &I) { if (!I.isEquality()) return nullptr; @@ -4466,7 +4466,7 @@ static Instruction *foldICmpWithZextOrSext(ICmpInst &ICmp, } /// Handle icmp (cast x), (cast or constant). -Instruction *InstCombinerImpl::foldICmpWithCastOp(ICmpInst &ICmp) { +Instruction *InstCombinerImpl::foldICmpWithCastOp(ICmpInst &ICmp) { auto *CastOp0 = dyn_cast<CastInst>(ICmp.getOperand(0)); if (!CastOp0) return nullptr; @@ -4521,10 +4521,10 @@ static bool isNeutralValue(Instruction::BinaryOps BinaryOp, Value *RHS) { } } -OverflowResult -InstCombinerImpl::computeOverflow(Instruction::BinaryOps BinaryOp, - bool IsSigned, Value *LHS, Value *RHS, - Instruction *CxtI) const { +OverflowResult +InstCombinerImpl::computeOverflow(Instruction::BinaryOps BinaryOp, + bool IsSigned, Value *LHS, Value *RHS, + Instruction *CxtI) const { switch (BinaryOp) { default: llvm_unreachable("Unsupported binary op"); @@ -4546,11 +4546,11 @@ InstCombinerImpl::computeOverflow(Instruction::BinaryOps BinaryOp, } } -bool InstCombinerImpl::OptimizeOverflowCheck(Instruction::BinaryOps BinaryOp, - bool IsSigned, Value *LHS, - Value *RHS, Instruction &OrigI, - Value *&Result, - Constant *&Overflow) { +bool InstCombinerImpl::OptimizeOverflowCheck(Instruction::BinaryOps BinaryOp, + bool IsSigned, Value *LHS, + Value *RHS, Instruction &OrigI, + Value *&Result, + Constant *&Overflow) { if (OrigI.isCommutative() && isa<Constant>(LHS) && !isa<Constant>(RHS)) std::swap(LHS, RHS); @@ -4560,13 +4560,13 @@ bool InstCombinerImpl::OptimizeOverflowCheck(Instruction::BinaryOps BinaryOp, // compare. Builder.SetInsertPoint(&OrigI); - Type *OverflowTy = Type::getInt1Ty(LHS->getContext()); - if (auto *LHSTy = dyn_cast<VectorType>(LHS->getType())) - OverflowTy = VectorType::get(OverflowTy, LHSTy->getElementCount()); - + Type *OverflowTy = Type::getInt1Ty(LHS->getContext()); + if (auto *LHSTy = dyn_cast<VectorType>(LHS->getType())) + OverflowTy = VectorType::get(OverflowTy, LHSTy->getElementCount()); + if (isNeutralValue(BinaryOp, RHS)) { Result = LHS; - Overflow = ConstantInt::getFalse(OverflowTy); + Overflow = ConstantInt::getFalse(OverflowTy); return true; } @@ -4577,12 +4577,12 @@ bool InstCombinerImpl::OptimizeOverflowCheck(Instruction::BinaryOps BinaryOp, case OverflowResult::AlwaysOverflowsHigh: Result = Builder.CreateBinOp(BinaryOp, LHS, RHS); Result->takeName(&OrigI); - Overflow = ConstantInt::getTrue(OverflowTy); + Overflow = ConstantInt::getTrue(OverflowTy); return true; case OverflowResult::NeverOverflows: Result = Builder.CreateBinOp(BinaryOp, LHS, RHS); Result->takeName(&OrigI); - Overflow = ConstantInt::getFalse(OverflowTy); + Overflow = ConstantInt::getFalse(OverflowTy); if (auto *Inst = dyn_cast<Instruction>(Result)) { if (IsSigned) Inst->setHasNoSignedWrap(); @@ -4610,8 +4610,8 @@ bool InstCombinerImpl::OptimizeOverflowCheck(Instruction::BinaryOps BinaryOp, /// \returns Instruction which must replace the compare instruction, NULL if no /// replacement required. static Instruction *processUMulZExtIdiom(ICmpInst &I, Value *MulVal, - Value *OtherVal, - InstCombinerImpl &IC) { + Value *OtherVal, + InstCombinerImpl &IC) { // Don't bother doing this transformation for pointers, don't do it for // vectors. if (!isa<IntegerType>(MulVal->getType())) @@ -4759,14 +4759,14 @@ static Instruction *processUMulZExtIdiom(ICmpInst &I, Value *MulVal, Function *F = Intrinsic::getDeclaration( I.getModule(), Intrinsic::umul_with_overflow, MulType); CallInst *Call = Builder.CreateCall(F, {MulA, MulB}, "umul"); - IC.addToWorklist(MulInstr); + IC.addToWorklist(MulInstr); // If there are uses of mul result other than the comparison, we know that // they are truncation or binary AND. Change them to use result of // mul.with.overflow and adjust properly mask/size. if (MulVal->hasNUsesOrMore(2)) { Value *Mul = Builder.CreateExtractValue(Call, 0, "umul.value"); - for (User *U : make_early_inc_range(MulVal->users())) { + for (User *U : make_early_inc_range(MulVal->users())) { if (U == &I || U == OtherVal) continue; if (TruncInst *TI = dyn_cast<TruncInst>(U)) { @@ -4785,11 +4785,11 @@ static Instruction *processUMulZExtIdiom(ICmpInst &I, Value *MulVal, } else { llvm_unreachable("Unexpected Binary operation"); } - IC.addToWorklist(cast<Instruction>(U)); + IC.addToWorklist(cast<Instruction>(U)); } } if (isa<Instruction>(OtherVal)) - IC.addToWorklist(cast<Instruction>(OtherVal)); + IC.addToWorklist(cast<Instruction>(OtherVal)); // The original icmp gets replaced with the overflow value, maybe inverted // depending on predicate. @@ -4834,7 +4834,7 @@ static APInt getDemandedBitsLHSMask(ICmpInst &I, unsigned BitWidth) { // If this is a normal comparison, it demands all bits. If it is a sign bit // comparison, it only demands the sign bit. bool UnusedBit; - if (InstCombiner::isSignBitCheck(I.getPredicate(), *RHS, UnusedBit)) + if (InstCombiner::isSignBitCheck(I.getPredicate(), *RHS, UnusedBit)) return APInt::getSignMask(BitWidth); switch (I.getPredicate()) { @@ -4891,9 +4891,9 @@ static bool swapMayExposeCSEOpportunities(const Value *Op0, const Value *Op1) { /// \return true when \p UI is the only use of \p DI in the parent block /// and all other uses of \p DI are in blocks dominated by \p DB. /// -bool InstCombinerImpl::dominatesAllUses(const Instruction *DI, - const Instruction *UI, - const BasicBlock *DB) const { +bool InstCombinerImpl::dominatesAllUses(const Instruction *DI, + const Instruction *UI, + const BasicBlock *DB) const { assert(DI && UI && "Instruction not defined\n"); // Ignore incomplete definitions. if (!DI->getParent()) @@ -4966,9 +4966,9 @@ static bool isChainSelectCmpBranch(const SelectInst *SI) { /// major restriction since a NE compare should be 'normalized' to an equal /// compare, which usually happens in the combiner and test case /// select-cmp-br.ll checks for it. -bool InstCombinerImpl::replacedSelectWithOperand(SelectInst *SI, - const ICmpInst *Icmp, - const unsigned SIOpd) { +bool InstCombinerImpl::replacedSelectWithOperand(SelectInst *SI, + const ICmpInst *Icmp, + const unsigned SIOpd) { assert((SIOpd == 1 || SIOpd == 2) && "Invalid select operand!"); if (isChainSelectCmpBranch(SI) && Icmp->getPredicate() == ICmpInst::ICMP_EQ) { BasicBlock *Succ = SI->getParent()->getTerminator()->getSuccessor(1); @@ -4994,7 +4994,7 @@ bool InstCombinerImpl::replacedSelectWithOperand(SelectInst *SI, /// Try to fold the comparison based on range information we can get by checking /// whether bits are known to be zero or one in the inputs. -Instruction *InstCombinerImpl::foldICmpUsingKnownBits(ICmpInst &I) { +Instruction *InstCombinerImpl::foldICmpUsingKnownBits(ICmpInst &I) { Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); Type *Ty = Op0->getType(); ICmpInst::Predicate Pred = I.getPredicate(); @@ -5025,15 +5025,15 @@ Instruction *InstCombinerImpl::foldICmpUsingKnownBits(ICmpInst &I) { APInt Op0Min(BitWidth, 0), Op0Max(BitWidth, 0); APInt Op1Min(BitWidth, 0), Op1Max(BitWidth, 0); if (I.isSigned()) { - Op0Min = Op0Known.getSignedMinValue(); - Op0Max = Op0Known.getSignedMaxValue(); - Op1Min = Op1Known.getSignedMinValue(); - Op1Max = Op1Known.getSignedMaxValue(); + Op0Min = Op0Known.getSignedMinValue(); + Op0Max = Op0Known.getSignedMaxValue(); + Op1Min = Op1Known.getSignedMinValue(); + Op1Max = Op1Known.getSignedMaxValue(); } else { - Op0Min = Op0Known.getMinValue(); - Op0Max = Op0Known.getMaxValue(); - Op1Min = Op1Known.getMinValue(); - Op1Max = Op1Known.getMaxValue(); + Op0Min = Op0Known.getMinValue(); + Op0Max = Op0Known.getMaxValue(); + Op1Min = Op1Known.getMinValue(); + Op1Max = Op1Known.getMaxValue(); } // If Min and Max are known to be the same, then SimplifyDemandedBits figured @@ -5051,9 +5051,9 @@ Instruction *InstCombinerImpl::foldICmpUsingKnownBits(ICmpInst &I) { llvm_unreachable("Unknown icmp opcode!"); case ICmpInst::ICMP_EQ: case ICmpInst::ICMP_NE: { - if (Op0Max.ult(Op1Min) || Op0Min.ugt(Op1Max)) - return replaceInstUsesWith( - I, ConstantInt::getBool(I.getType(), Pred == CmpInst::ICMP_NE)); + if (Op0Max.ult(Op1Min) || Op0Min.ugt(Op1Max)) + return replaceInstUsesWith( + I, ConstantInt::getBool(I.getType(), Pred == CmpInst::ICMP_NE)); // If all bits are known zero except for one, then we know at most one bit // is set. If the comparison is against zero, then this is a check to see if @@ -5223,8 +5223,8 @@ Instruction *InstCombinerImpl::foldICmpUsingKnownBits(ICmpInst &I) { } llvm::Optional<std::pair<CmpInst::Predicate, Constant *>> -InstCombiner::getFlippedStrictnessPredicateAndConstant(CmpInst::Predicate Pred, - Constant *C) { +InstCombiner::getFlippedStrictnessPredicateAndConstant(CmpInst::Predicate Pred, + Constant *C) { assert(ICmpInst::isRelational(Pred) && ICmpInst::isIntPredicate(Pred) && "Only for relational integer predicates."); @@ -5246,8 +5246,8 @@ InstCombiner::getFlippedStrictnessPredicateAndConstant(CmpInst::Predicate Pred, // Bail out if the constant can't be safely incremented/decremented. if (!ConstantIsOk(CI)) return llvm::None; - } else if (auto *FVTy = dyn_cast<FixedVectorType>(Type)) { - unsigned NumElts = FVTy->getNumElements(); + } else if (auto *FVTy = dyn_cast<FixedVectorType>(Type)) { + unsigned NumElts = FVTy->getNumElements(); for (unsigned i = 0; i != NumElts; ++i) { Constant *Elt = C->getAggregateElement(i); if (!Elt) @@ -5273,8 +5273,8 @@ InstCombiner::getFlippedStrictnessPredicateAndConstant(CmpInst::Predicate Pred, // It may not be safe to change a compare predicate in the presence of // undefined elements, so replace those elements with the first safe constant // that we found. - // TODO: in case of poison, it is safe; let's replace undefs only. - if (C->containsUndefOrPoisonElement()) { + // TODO: in case of poison, it is safe; let's replace undefs only. + if (C->containsUndefOrPoisonElement()) { assert(SafeReplacementConstant && "Replacement constant not set"); C = Constant::replaceUndefsWith(C, SafeReplacementConstant); } @@ -5294,7 +5294,7 @@ InstCombiner::getFlippedStrictnessPredicateAndConstant(CmpInst::Predicate Pred, static ICmpInst *canonicalizeCmpWithConstant(ICmpInst &I) { ICmpInst::Predicate Pred = I.getPredicate(); if (ICmpInst::isEquality(Pred) || !ICmpInst::isIntPredicate(Pred) || - InstCombiner::isCanonicalPredicate(Pred)) + InstCombiner::isCanonicalPredicate(Pred)) return nullptr; Value *Op0 = I.getOperand(0); @@ -5303,8 +5303,8 @@ static ICmpInst *canonicalizeCmpWithConstant(ICmpInst &I) { if (!Op1C) return nullptr; - auto FlippedStrictness = - InstCombiner::getFlippedStrictnessPredicateAndConstant(Pred, Op1C); + auto FlippedStrictness = + InstCombiner::getFlippedStrictnessPredicateAndConstant(Pred, Op1C); if (!FlippedStrictness) return nullptr; @@ -5313,14 +5313,14 @@ static ICmpInst *canonicalizeCmpWithConstant(ICmpInst &I) { /// If we have a comparison with a non-canonical predicate, if we can update /// all the users, invert the predicate and adjust all the users. -CmpInst *InstCombinerImpl::canonicalizeICmpPredicate(CmpInst &I) { +CmpInst *InstCombinerImpl::canonicalizeICmpPredicate(CmpInst &I) { // Is the predicate already canonical? CmpInst::Predicate Pred = I.getPredicate(); - if (InstCombiner::isCanonicalPredicate(Pred)) + if (InstCombiner::isCanonicalPredicate(Pred)) return nullptr; // Can all users be adjusted to predicate inversion? - if (!InstCombiner::canFreelyInvertAllUsersOf(&I, /*IgnoredUser=*/nullptr)) + if (!InstCombiner::canFreelyInvertAllUsersOf(&I, /*IgnoredUser=*/nullptr)) return nullptr; // Ok, we can canonicalize comparison! @@ -5328,8 +5328,8 @@ CmpInst *InstCombinerImpl::canonicalizeICmpPredicate(CmpInst &I) { I.setPredicate(CmpInst::getInversePredicate(Pred)); I.setName(I.getName() + ".not"); - // And, adapt users. - freelyInvertAllUsersOf(&I); + // And, adapt users. + freelyInvertAllUsersOf(&I); return &I; } @@ -5531,7 +5531,7 @@ static Instruction *foldICmpOfUAddOv(ICmpInst &I) { return ExtractValueInst::Create(UAddOv, 1); } -Instruction *InstCombinerImpl::visitICmpInst(ICmpInst &I) { +Instruction *InstCombinerImpl::visitICmpInst(ICmpInst &I) { bool Changed = false; const SimplifyQuery Q = SQ.getWithInstruction(&I); Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); @@ -5655,10 +5655,10 @@ Instruction *InstCombinerImpl::visitICmpInst(ICmpInst &I) { // Try to optimize equality comparisons against alloca-based pointers. if (Op0->getType()->isPointerTy() && I.isEquality()) { assert(Op1->getType()->isPointerTy() && "Comparing pointer with non-pointer?"); - if (auto *Alloca = dyn_cast<AllocaInst>(getUnderlyingObject(Op0))) + if (auto *Alloca = dyn_cast<AllocaInst>(getUnderlyingObject(Op0))) if (Instruction *New = foldAllocaCmp(I, Alloca, Op1)) return New; - if (auto *Alloca = dyn_cast<AllocaInst>(getUnderlyingObject(Op1))) + if (auto *Alloca = dyn_cast<AllocaInst>(getUnderlyingObject(Op1))) if (Instruction *New = foldAllocaCmp(I, Alloca, Op0)) return New; } @@ -5769,9 +5769,9 @@ Instruction *InstCombinerImpl::visitICmpInst(ICmpInst &I) { } /// Fold fcmp ([us]itofp x, cst) if possible. -Instruction *InstCombinerImpl::foldFCmpIntToFPConst(FCmpInst &I, - Instruction *LHSI, - Constant *RHSC) { +Instruction *InstCombinerImpl::foldFCmpIntToFPConst(FCmpInst &I, + Instruction *LHSI, + Constant *RHSC) { if (!isa<ConstantFP>(RHSC)) return nullptr; const APFloat &RHS = cast<ConstantFP>(RHSC)->getValueAPF(); @@ -6056,9 +6056,9 @@ static Instruction *foldFCmpReciprocalAndZero(FCmpInst &I, Instruction *LHSI, } /// Optimize fabs(X) compared with zero. -static Instruction *foldFabsWithFcmpZero(FCmpInst &I, InstCombinerImpl &IC) { +static Instruction *foldFabsWithFcmpZero(FCmpInst &I, InstCombinerImpl &IC) { Value *X; - if (!match(I.getOperand(0), m_FAbs(m_Value(X))) || + if (!match(I.getOperand(0), m_FAbs(m_Value(X))) || !match(I.getOperand(1), m_PosZeroFP())) return nullptr; @@ -6118,7 +6118,7 @@ static Instruction *foldFabsWithFcmpZero(FCmpInst &I, InstCombinerImpl &IC) { } } -Instruction *InstCombinerImpl::visitFCmpInst(FCmpInst &I) { +Instruction *InstCombinerImpl::visitFCmpInst(FCmpInst &I) { bool Changed = false; /// Orders the operands of the compare so that they are listed from most diff --git a/contrib/libs/llvm12/lib/Transforms/InstCombine/InstCombineInternal.h b/contrib/libs/llvm12/lib/Transforms/InstCombine/InstCombineInternal.h index 79e9d5c46c..04ae8f0b19 100644 --- a/contrib/libs/llvm12/lib/Transforms/InstCombine/InstCombineInternal.h +++ b/contrib/libs/llvm12/lib/Transforms/InstCombine/InstCombineInternal.h @@ -25,7 +25,7 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/KnownBits.h" #include "llvm/Transforms/InstCombine/InstCombineWorklist.h" -#include "llvm/Transforms/InstCombine/InstCombiner.h" +#include "llvm/Transforms/InstCombine/InstCombiner.h" #include "llvm/Transforms/Utils/Local.h" #include <cassert> @@ -33,14 +33,14 @@ using namespace llvm::PatternMatch; -// As a default, let's assume that we want to be aggressive, -// and attempt to traverse with no limits in attempt to sink negation. -static constexpr unsigned NegatorDefaultMaxDepth = ~0U; - -// Let's guesstimate that most often we will end up visiting/producing -// fairly small number of new instructions. -static constexpr unsigned NegatorMaxNodesSSO = 16; - +// As a default, let's assume that we want to be aggressive, +// and attempt to traverse with no limits in attempt to sink negation. +static constexpr unsigned NegatorDefaultMaxDepth = ~0U; + +// Let's guesstimate that most often we will end up visiting/producing +// fairly small number of new instructions. +static constexpr unsigned NegatorMaxNodesSSO = 16; + namespace llvm { class AAResults; @@ -57,20 +57,20 @@ class ProfileSummaryInfo; class TargetLibraryInfo; class User; -class LLVM_LIBRARY_VISIBILITY InstCombinerImpl final - : public InstCombiner, - public InstVisitor<InstCombinerImpl, Instruction *> { +class LLVM_LIBRARY_VISIBILITY InstCombinerImpl final + : public InstCombiner, + public InstVisitor<InstCombinerImpl, Instruction *> { public: - InstCombinerImpl(InstCombineWorklist &Worklist, BuilderTy &Builder, - bool MinimizeSize, AAResults *AA, AssumptionCache &AC, - TargetLibraryInfo &TLI, TargetTransformInfo &TTI, - DominatorTree &DT, OptimizationRemarkEmitter &ORE, - BlockFrequencyInfo *BFI, ProfileSummaryInfo *PSI, - const DataLayout &DL, LoopInfo *LI) - : InstCombiner(Worklist, Builder, MinimizeSize, AA, AC, TLI, TTI, DT, ORE, - BFI, PSI, DL, LI) {} + InstCombinerImpl(InstCombineWorklist &Worklist, BuilderTy &Builder, + bool MinimizeSize, AAResults *AA, AssumptionCache &AC, + TargetLibraryInfo &TLI, TargetTransformInfo &TTI, + DominatorTree &DT, OptimizationRemarkEmitter &ORE, + BlockFrequencyInfo *BFI, ProfileSummaryInfo *PSI, + const DataLayout &DL, LoopInfo *LI) + : InstCombiner(Worklist, Builder, MinimizeSize, AA, AC, TLI, TTI, DT, ORE, + BFI, PSI, DL, LI) {} - virtual ~InstCombinerImpl() {} + virtual ~InstCombinerImpl() {} /// Run the combiner over the entire worklist until it is empty. /// @@ -105,7 +105,7 @@ public: Value *simplifyRangeCheck(ICmpInst *Cmp0, ICmpInst *Cmp1, bool Inverted); Instruction *visitAnd(BinaryOperator &I); Instruction *visitOr(BinaryOperator &I); - bool sinkNotIntoOtherHandOfAndOrOr(BinaryOperator &I); + bool sinkNotIntoOtherHandOfAndOrOr(BinaryOperator &I); Instruction *visitXor(BinaryOperator &I); Instruction *visitShl(BinaryOperator &I); Value *reassociateShiftAmtsOfTwoSameDirectionShifts( @@ -119,7 +119,7 @@ public: Instruction *visitLShr(BinaryOperator &I); Instruction *commonShiftTransforms(BinaryOperator &I); Instruction *visitFCmpInst(FCmpInst &I); - CmpInst *canonicalizeICmpPredicate(CmpInst &I); + CmpInst *canonicalizeICmpPredicate(CmpInst &I); Instruction *visitICmpInst(ICmpInst &I); Instruction *FoldShiftByConstant(Value *Op0, Constant *Op1, BinaryOperator &I); @@ -158,9 +158,9 @@ public: Instruction *visitFenceInst(FenceInst &FI); Instruction *visitSwitchInst(SwitchInst &SI); Instruction *visitReturnInst(ReturnInst &RI); - Instruction *visitUnreachableInst(UnreachableInst &I); - Instruction * - foldAggregateConstructionIntoAggregateReuse(InsertValueInst &OrigIVI); + Instruction *visitUnreachableInst(UnreachableInst &I); + Instruction * + foldAggregateConstructionIntoAggregateReuse(InsertValueInst &OrigIVI); Instruction *visitInsertValueInst(InsertValueInst &IV); Instruction *visitInsertElementInst(InsertElementInst &IE); Instruction *visitExtractElementInst(ExtractElementInst &EI); @@ -320,12 +320,12 @@ private: Instruction *narrowBinOp(TruncInst &Trunc); Instruction *narrowMaskedBinOp(BinaryOperator &And); Instruction *narrowMathIfNoOverflow(BinaryOperator &I); - Instruction *narrowFunnelShift(TruncInst &Trunc); + Instruction *narrowFunnelShift(TruncInst &Trunc); Instruction *optimizeBitCastFromPhi(CastInst &CI, PHINode *PN); Instruction *matchSAddSubSat(SelectInst &MinMax1); - void freelyInvertAllUsersOf(Value *V); - + void freelyInvertAllUsersOf(Value *V); + /// Determine if a pair of casts can be replaced by a single cast. /// /// \param CI1 The first of a pair of casts. @@ -398,7 +398,7 @@ public: << " with " << *V << '\n'); I.replaceAllUsesWith(V); - MadeIRChange = true; + MadeIRChange = true; return &I; } @@ -440,7 +440,7 @@ public: /// When dealing with an instruction that has side effects or produces a void /// value, we can't rely on DCE to delete the instruction. Instead, visit /// methods should return the value returned by this function. - Instruction *eraseInstFromFunction(Instruction &I) override { + Instruction *eraseInstFromFunction(Instruction &I) override { LLVM_DEBUG(dbgs() << "IC: ERASE " << I << '\n'); assert(I.use_empty() && "Cannot erase instruction that is used!"); salvageDebugInfo(I); @@ -567,7 +567,7 @@ public: unsigned Depth, Instruction *CxtI); bool SimplifyDemandedBits(Instruction *I, unsigned Op, const APInt &DemandedMask, KnownBits &Known, - unsigned Depth = 0) override; + unsigned Depth = 0) override; /// Helper routine of SimplifyDemandedUseBits. It computes KnownZero/KnownOne /// bits. It also tries to handle simplifications that can be done based on @@ -587,10 +587,10 @@ public: /// demanded bits. bool SimplifyDemandedInstructionBits(Instruction &Inst); - virtual Value * - SimplifyDemandedVectorElts(Value *V, APInt DemandedElts, APInt &UndefElts, - unsigned Depth = 0, - bool AllowMultipleUsers = false) override; + virtual Value * + SimplifyDemandedVectorElts(Value *V, APInt DemandedElts, APInt &UndefElts, + unsigned Depth = 0, + bool AllowMultipleUsers = false) override; /// Canonicalize the position of binops relative to shufflevector. Instruction *foldVectorBinop(BinaryOperator &Inst); @@ -614,18 +614,18 @@ public: /// Try to rotate an operation below a PHI node, using PHI nodes for /// its operands. - Instruction *foldPHIArgOpIntoPHI(PHINode &PN); - Instruction *foldPHIArgBinOpIntoPHI(PHINode &PN); - Instruction *foldPHIArgInsertValueInstructionIntoPHI(PHINode &PN); - Instruction *foldPHIArgExtractValueInstructionIntoPHI(PHINode &PN); - Instruction *foldPHIArgGEPIntoPHI(PHINode &PN); - Instruction *foldPHIArgLoadIntoPHI(PHINode &PN); - Instruction *foldPHIArgZextsIntoPHI(PHINode &PN); + Instruction *foldPHIArgOpIntoPHI(PHINode &PN); + Instruction *foldPHIArgBinOpIntoPHI(PHINode &PN); + Instruction *foldPHIArgInsertValueInstructionIntoPHI(PHINode &PN); + Instruction *foldPHIArgExtractValueInstructionIntoPHI(PHINode &PN); + Instruction *foldPHIArgGEPIntoPHI(PHINode &PN); + Instruction *foldPHIArgLoadIntoPHI(PHINode &PN); + Instruction *foldPHIArgZextsIntoPHI(PHINode &PN); /// If an integer typed PHI has only one use which is an IntToPtr operation, /// replace the PHI with an existing pointer typed PHI if it exists. Otherwise /// insert a new pointer typed PHI and replace the original one. - Instruction *foldIntegerTypedPHI(PHINode &PN); + Instruction *foldIntegerTypedPHI(PHINode &PN); /// Helper function for FoldPHIArgXIntoPHI() to set debug location for the /// folded operation. @@ -708,18 +708,18 @@ public: Value *A, Value *B, Instruction &Outer, SelectPatternFlavor SPF2, Value *C); Instruction *foldSelectInstWithICmp(SelectInst &SI, ICmpInst *ICI); - Instruction *foldSelectValueEquivalence(SelectInst &SI, ICmpInst &ICI); + Instruction *foldSelectValueEquivalence(SelectInst &SI, ICmpInst &ICI); Value *insertRangeTest(Value *V, const APInt &Lo, const APInt &Hi, bool isSigned, bool Inside); Instruction *PromoteCastOfAllocation(BitCastInst &CI, AllocaInst &AI); bool mergeStoreIntoSuccessor(StoreInst &SI); - /// Given an 'or' instruction, check to see if it is part of a - /// bswap/bitreverse idiom. If so, return the equivalent bswap/bitreverse - /// intrinsic. - Instruction *matchBSwapOrBitReverse(BinaryOperator &Or, bool MatchBSwaps, - bool MatchBitReversals); + /// Given an 'or' instruction, check to see if it is part of a + /// bswap/bitreverse idiom. If so, return the equivalent bswap/bitreverse + /// intrinsic. + Instruction *matchBSwapOrBitReverse(BinaryOperator &Or, bool MatchBSwaps, + bool MatchBitReversals); Instruction *SimplifyAnyMemTransfer(AnyMemTransferInst *MI); Instruction *SimplifyAnyMemSet(AnyMemSetInst *MI); @@ -758,8 +758,8 @@ class Negator final { using Result = std::pair<ArrayRef<Instruction *> /*NewInstructions*/, Value * /*NegatedRoot*/>; - std::array<Value *, 2> getSortedOperandsOfBinOp(Instruction *I); - + std::array<Value *, 2> getSortedOperandsOfBinOp(Instruction *I); + LLVM_NODISCARD Value *visitImpl(Value *V, unsigned Depth); LLVM_NODISCARD Value *negate(Value *V, unsigned Depth); @@ -777,7 +777,7 @@ public: /// Attempt to negate \p Root. Retuns nullptr if negation can't be performed, /// otherwise returns negated value. LLVM_NODISCARD static Value *Negate(bool LHSIsZero, Value *Root, - InstCombinerImpl &IC); + InstCombinerImpl &IC); }; } // end namespace llvm diff --git a/contrib/libs/llvm12/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp b/contrib/libs/llvm12/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp index c7b5f6f780..f2c7e46163 100644 --- a/contrib/libs/llvm12/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp +++ b/contrib/libs/llvm12/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp @@ -23,7 +23,7 @@ #include "llvm/IR/LLVMContext.h" #include "llvm/IR/MDBuilder.h" #include "llvm/IR/PatternMatch.h" -#include "llvm/Transforms/InstCombine/InstCombiner.h" +#include "llvm/Transforms/InstCombine/InstCombiner.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Local.h" using namespace llvm; @@ -167,8 +167,8 @@ static bool isDereferenceableForAllocaSize(const Value *V, const AllocaInst *AI, APInt(64, AllocaSize), DL); } -static Instruction *simplifyAllocaArraySize(InstCombinerImpl &IC, - AllocaInst &AI) { +static Instruction *simplifyAllocaArraySize(InstCombinerImpl &IC, + AllocaInst &AI) { // Check for array size of 1 (scalar allocation). if (!AI.isArrayAllocation()) { // i32 1 is the canonical array size for scalar allocations. @@ -236,45 +236,45 @@ namespace { // instruction. class PointerReplacer { public: - PointerReplacer(InstCombinerImpl &IC) : IC(IC) {} - - bool collectUsers(Instruction &I); + PointerReplacer(InstCombinerImpl &IC) : IC(IC) {} + + bool collectUsers(Instruction &I); void replacePointer(Instruction &I, Value *V); private: void replace(Instruction *I); Value *getReplacement(Value *I); - SmallSetVector<Instruction *, 4> Worklist; + SmallSetVector<Instruction *, 4> Worklist; MapVector<Value *, Value *> WorkMap; - InstCombinerImpl &IC; + InstCombinerImpl &IC; }; } // end anonymous namespace -bool PointerReplacer::collectUsers(Instruction &I) { +bool PointerReplacer::collectUsers(Instruction &I) { for (auto U : I.users()) { - Instruction *Inst = cast<Instruction>(&*U); - if (LoadInst *Load = dyn_cast<LoadInst>(Inst)) { - if (Load->isVolatile()) - return false; - Worklist.insert(Load); + Instruction *Inst = cast<Instruction>(&*U); + if (LoadInst *Load = dyn_cast<LoadInst>(Inst)) { + if (Load->isVolatile()) + return false; + Worklist.insert(Load); } else if (isa<GetElementPtrInst>(Inst) || isa<BitCastInst>(Inst)) { - Worklist.insert(Inst); - if (!collectUsers(*Inst)) - return false; - } else if (isa<MemTransferInst>(Inst)) { - Worklist.insert(Inst); + Worklist.insert(Inst); + if (!collectUsers(*Inst)) + return false; + } else if (isa<MemTransferInst>(Inst)) { + Worklist.insert(Inst); } else { - LLVM_DEBUG(dbgs() << "Cannot handle pointer user: " << *U << '\n'); - return false; + LLVM_DEBUG(dbgs() << "Cannot handle pointer user: " << *U << '\n'); + return false; } } - return true; + return true; } -Value *PointerReplacer::getReplacement(Value *V) { return WorkMap.lookup(V); } - +Value *PointerReplacer::getReplacement(Value *V) { return WorkMap.lookup(V); } + void PointerReplacer::replace(Instruction *I) { if (getReplacement(I)) return; @@ -282,12 +282,12 @@ void PointerReplacer::replace(Instruction *I) { if (auto *LT = dyn_cast<LoadInst>(I)) { auto *V = getReplacement(LT->getPointerOperand()); assert(V && "Operand not replaced"); - auto *NewI = new LoadInst(LT->getType(), V, "", LT->isVolatile(), - LT->getAlign(), LT->getOrdering(), - LT->getSyncScopeID()); + auto *NewI = new LoadInst(LT->getType(), V, "", LT->isVolatile(), + LT->getAlign(), LT->getOrdering(), + LT->getSyncScopeID()); NewI->takeName(LT); - copyMetadataForLoad(*NewI, *LT); - + copyMetadataForLoad(*NewI, *LT); + IC.InsertNewInstWith(NewI, *LT); IC.replaceInstUsesWith(*LT, NewI); WorkMap[LT] = NewI; @@ -310,28 +310,28 @@ void PointerReplacer::replace(Instruction *I) { IC.InsertNewInstWith(NewI, *BC); NewI->takeName(BC); WorkMap[BC] = NewI; - } else if (auto *MemCpy = dyn_cast<MemTransferInst>(I)) { - auto *SrcV = getReplacement(MemCpy->getRawSource()); - // The pointer may appear in the destination of a copy, but we don't want to - // replace it. - if (!SrcV) { - assert(getReplacement(MemCpy->getRawDest()) && - "destination not in replace list"); - return; - } - - IC.Builder.SetInsertPoint(MemCpy); - auto *NewI = IC.Builder.CreateMemTransferInst( - MemCpy->getIntrinsicID(), MemCpy->getRawDest(), MemCpy->getDestAlign(), - SrcV, MemCpy->getSourceAlign(), MemCpy->getLength(), - MemCpy->isVolatile()); - AAMDNodes AAMD; - MemCpy->getAAMetadata(AAMD); - if (AAMD) - NewI->setAAMetadata(AAMD); - - IC.eraseInstFromFunction(*MemCpy); - WorkMap[MemCpy] = NewI; + } else if (auto *MemCpy = dyn_cast<MemTransferInst>(I)) { + auto *SrcV = getReplacement(MemCpy->getRawSource()); + // The pointer may appear in the destination of a copy, but we don't want to + // replace it. + if (!SrcV) { + assert(getReplacement(MemCpy->getRawDest()) && + "destination not in replace list"); + return; + } + + IC.Builder.SetInsertPoint(MemCpy); + auto *NewI = IC.Builder.CreateMemTransferInst( + MemCpy->getIntrinsicID(), MemCpy->getRawDest(), MemCpy->getDestAlign(), + SrcV, MemCpy->getSourceAlign(), MemCpy->getLength(), + MemCpy->isVolatile()); + AAMDNodes AAMD; + MemCpy->getAAMetadata(AAMD); + if (AAMD) + NewI->setAAMetadata(AAMD); + + IC.eraseInstFromFunction(*MemCpy); + WorkMap[MemCpy] = NewI; } else { llvm_unreachable("should never reach here"); } @@ -345,12 +345,12 @@ void PointerReplacer::replacePointer(Instruction &I, Value *V) { "Invalid usage"); #endif WorkMap[&I] = V; - - for (Instruction *Workitem : Worklist) - replace(Workitem); + + for (Instruction *Workitem : Worklist) + replace(Workitem); } -Instruction *InstCombinerImpl::visitAllocaInst(AllocaInst &AI) { +Instruction *InstCombinerImpl::visitAllocaInst(AllocaInst &AI) { if (auto *I = simplifyAllocaArraySize(*this, AI)) return I; @@ -401,21 +401,21 @@ Instruction *InstCombinerImpl::visitAllocaInst(AllocaInst &AI) { // read. SmallVector<Instruction *, 4> ToDelete; if (MemTransferInst *Copy = isOnlyCopiedFromConstantMemory(AA, &AI, ToDelete)) { - Value *TheSrc = Copy->getSource(); + Value *TheSrc = Copy->getSource(); Align AllocaAlign = AI.getAlign(); Align SourceAlign = getOrEnforceKnownAlignment( - TheSrc, AllocaAlign, DL, &AI, &AC, &DT); + TheSrc, AllocaAlign, DL, &AI, &AC, &DT); if (AllocaAlign <= SourceAlign && - isDereferenceableForAllocaSize(TheSrc, &AI, DL)) { + isDereferenceableForAllocaSize(TheSrc, &AI, DL)) { LLVM_DEBUG(dbgs() << "Found alloca equal to global: " << AI << '\n'); LLVM_DEBUG(dbgs() << " memcpy = " << *Copy << '\n'); - unsigned SrcAddrSpace = TheSrc->getType()->getPointerAddressSpace(); - auto *DestTy = PointerType::get(AI.getAllocatedType(), SrcAddrSpace); - if (AI.getType()->getAddressSpace() == SrcAddrSpace) { - for (Instruction *Delete : ToDelete) - eraseInstFromFunction(*Delete); - - Value *Cast = Builder.CreateBitCast(TheSrc, DestTy); + unsigned SrcAddrSpace = TheSrc->getType()->getPointerAddressSpace(); + auto *DestTy = PointerType::get(AI.getAllocatedType(), SrcAddrSpace); + if (AI.getType()->getAddressSpace() == SrcAddrSpace) { + for (Instruction *Delete : ToDelete) + eraseInstFromFunction(*Delete); + + Value *Cast = Builder.CreateBitCast(TheSrc, DestTy); Instruction *NewI = replaceInstUsesWith(AI, Cast); eraseInstFromFunction(*Copy); ++NumGlobalCopies; @@ -423,14 +423,14 @@ Instruction *InstCombinerImpl::visitAllocaInst(AllocaInst &AI) { } PointerReplacer PtrReplacer(*this); - if (PtrReplacer.collectUsers(AI)) { - for (Instruction *Delete : ToDelete) - eraseInstFromFunction(*Delete); - - Value *Cast = Builder.CreateBitCast(TheSrc, DestTy); - PtrReplacer.replacePointer(AI, Cast); - ++NumGlobalCopies; - } + if (PtrReplacer.collectUsers(AI)) { + for (Instruction *Delete : ToDelete) + eraseInstFromFunction(*Delete); + + Value *Cast = Builder.CreateBitCast(TheSrc, DestTy); + PtrReplacer.replacePointer(AI, Cast); + ++NumGlobalCopies; + } } } @@ -452,9 +452,9 @@ static bool isSupportedAtomicType(Type *Ty) { /// that pointer type, load it, etc. /// /// Note that this will create all of the instructions with whatever insert -/// point the \c InstCombinerImpl currently is using. -LoadInst *InstCombinerImpl::combineLoadToNewType(LoadInst &LI, Type *NewTy, - const Twine &Suffix) { +/// point the \c InstCombinerImpl currently is using. +LoadInst *InstCombinerImpl::combineLoadToNewType(LoadInst &LI, Type *NewTy, + const Twine &Suffix) { assert((!LI.isAtomic() || isSupportedAtomicType(NewTy)) && "can't fold an atomic load to requested type"); @@ -476,8 +476,8 @@ LoadInst *InstCombinerImpl::combineLoadToNewType(LoadInst &LI, Type *NewTy, /// Combine a store to a new type. /// /// Returns the newly created store instruction. -static StoreInst *combineStoreToNewValue(InstCombinerImpl &IC, StoreInst &SI, - Value *V) { +static StoreInst *combineStoreToNewValue(InstCombinerImpl &IC, StoreInst &SI, + Value *V) { assert((!SI.isAtomic() || isSupportedAtomicType(V->getType())) && "can't fold an atomic store of requested type"); @@ -517,7 +517,7 @@ static StoreInst *combineStoreToNewValue(InstCombinerImpl &IC, StoreInst &SI, break; case LLVMContext::MD_invariant_load: case LLVMContext::MD_nonnull: - case LLVMContext::MD_noundef: + case LLVMContext::MD_noundef: case LLVMContext::MD_range: case LLVMContext::MD_align: case LLVMContext::MD_dereferenceable: @@ -535,7 +535,7 @@ static StoreInst *combineStoreToNewValue(InstCombinerImpl &IC, StoreInst &SI, static bool isMinMaxWithLoads(Value *V, Type *&LoadTy) { assert(V->getType()->isPointerTy() && "Expected pointer type."); // Ignore possible ty* to ixx* bitcast. - V = InstCombiner::peekThroughBitcast(V); + V = InstCombiner::peekThroughBitcast(V); // Check that select is select ((cmp load V1, load V2), V1, V2) - minmax // pattern. CmpInst::Predicate Pred; @@ -570,8 +570,8 @@ static bool isMinMaxWithLoads(Value *V, Type *&LoadTy) { /// or a volatile load. This is debatable, and might be reasonable to change /// later. However, it is risky in case some backend or other part of LLVM is /// relying on the exact type loaded to select appropriate atomic operations. -static Instruction *combineLoadToOperationType(InstCombinerImpl &IC, - LoadInst &LI) { +static Instruction *combineLoadToOperationType(InstCombinerImpl &IC, + LoadInst &LI) { // FIXME: We could probably with some care handle both volatile and ordered // atomic loads here but it isn't clear that this is important. if (!LI.isUnordered()) @@ -586,36 +586,36 @@ static Instruction *combineLoadToOperationType(InstCombinerImpl &IC, const DataLayout &DL = IC.getDataLayout(); - // Fold away bit casts of the loaded value by loading the desired type. - // Note that we should not do this for pointer<->integer casts, - // because that would result in type punning. - if (LI.hasOneUse()) { - // Don't transform when the type is x86_amx, it makes the pass that lower - // x86_amx type happy. - if (auto *BC = dyn_cast<BitCastInst>(LI.user_back())) { - assert(!LI.getType()->isX86_AMXTy() && - "load from x86_amx* should not happen!"); - if (BC->getType()->isX86_AMXTy()) - return nullptr; + // Fold away bit casts of the loaded value by loading the desired type. + // Note that we should not do this for pointer<->integer casts, + // because that would result in type punning. + if (LI.hasOneUse()) { + // Don't transform when the type is x86_amx, it makes the pass that lower + // x86_amx type happy. + if (auto *BC = dyn_cast<BitCastInst>(LI.user_back())) { + assert(!LI.getType()->isX86_AMXTy() && + "load from x86_amx* should not happen!"); + if (BC->getType()->isX86_AMXTy()) + return nullptr; } if (auto* CI = dyn_cast<CastInst>(LI.user_back())) - if (CI->isNoopCast(DL) && LI.getType()->isPtrOrPtrVectorTy() == - CI->getDestTy()->isPtrOrPtrVectorTy()) + if (CI->isNoopCast(DL) && LI.getType()->isPtrOrPtrVectorTy() == + CI->getDestTy()->isPtrOrPtrVectorTy()) if (!LI.isAtomic() || isSupportedAtomicType(CI->getDestTy())) { LoadInst *NewLoad = IC.combineLoadToNewType(LI, CI->getDestTy()); CI->replaceAllUsesWith(NewLoad); IC.eraseInstFromFunction(*CI); return &LI; } - } + } // FIXME: We should also canonicalize loads of vectors when their elements are // cast to other types. return nullptr; } -static Instruction *unpackLoadToAggregate(InstCombinerImpl &IC, LoadInst &LI) { +static Instruction *unpackLoadToAggregate(InstCombinerImpl &IC, LoadInst &LI) { // FIXME: We could probably with some care handle both volatile and atomic // stores here but it isn't clear that this is important. if (!LI.isSimple()) @@ -753,7 +753,7 @@ static bool isObjectSizeLessThanOrEq(Value *V, uint64_t MaxSize, } if (PHINode *PN = dyn_cast<PHINode>(P)) { - append_range(Worklist, PN->incoming_values()); + append_range(Worklist, PN->incoming_values()); continue; } @@ -813,9 +813,9 @@ static bool isObjectSizeLessThanOrEq(Value *V, uint64_t MaxSize, // not zero. Currently, we only handle the first such index. Also, we could // also search through non-zero constant indices if we kept track of the // offsets those indices implied. -static bool canReplaceGEPIdxWithZero(InstCombinerImpl &IC, - GetElementPtrInst *GEPI, Instruction *MemI, - unsigned &Idx) { +static bool canReplaceGEPIdxWithZero(InstCombinerImpl &IC, + GetElementPtrInst *GEPI, Instruction *MemI, + unsigned &Idx) { if (GEPI->getNumOperands() < 2) return false; @@ -844,17 +844,17 @@ static bool canReplaceGEPIdxWithZero(InstCombinerImpl &IC, return false; SmallVector<Value *, 4> Ops(GEPI->idx_begin(), GEPI->idx_begin() + Idx); - Type *SourceElementType = GEPI->getSourceElementType(); - // Size information about scalable vectors is not available, so we cannot - // deduce whether indexing at n is undefined behaviour or not. Bail out. - if (isa<ScalableVectorType>(SourceElementType)) - return false; - - Type *AllocTy = GetElementPtrInst::getIndexedType(SourceElementType, Ops); + Type *SourceElementType = GEPI->getSourceElementType(); + // Size information about scalable vectors is not available, so we cannot + // deduce whether indexing at n is undefined behaviour or not. Bail out. + if (isa<ScalableVectorType>(SourceElementType)) + return false; + + Type *AllocTy = GetElementPtrInst::getIndexedType(SourceElementType, Ops); if (!AllocTy || !AllocTy->isSized()) return false; const DataLayout &DL = IC.getDataLayout(); - uint64_t TyAllocSize = DL.getTypeAllocSize(AllocTy).getFixedSize(); + uint64_t TyAllocSize = DL.getTypeAllocSize(AllocTy).getFixedSize(); // If there are more indices after the one we might replace with a zero, make // sure they're all non-negative. If any of them are negative, the overall @@ -889,7 +889,7 @@ static bool canReplaceGEPIdxWithZero(InstCombinerImpl &IC, // access, but the object has only one element, we can assume that the index // will always be zero. If we replace the GEP, return it. template <typename T> -static Instruction *replaceGEPIdxWithZero(InstCombinerImpl &IC, Value *Ptr, +static Instruction *replaceGEPIdxWithZero(InstCombinerImpl &IC, Value *Ptr, T &MemI) { if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(Ptr)) { unsigned Idx; @@ -931,7 +931,7 @@ static bool canSimplifyNullLoadOrGEP(LoadInst &LI, Value *Op) { return false; } -Instruction *InstCombinerImpl::visitLoadInst(LoadInst &LI) { +Instruction *InstCombinerImpl::visitLoadInst(LoadInst &LI) { Value *Op = LI.getOperand(0); // Try to canonicalize the loaded type. @@ -1048,7 +1048,7 @@ Instruction *InstCombinerImpl::visitLoadInst(LoadInst &LI) { /// and the layout of a <2 x double> is isomorphic to a [2 x double], /// then %V1 can be safely approximated by a conceptual "bitcast" of %U. /// Note that %U may contain non-undef values where %V1 has undef. -static Value *likeBitCastFromVector(InstCombinerImpl &IC, Value *V) { +static Value *likeBitCastFromVector(InstCombinerImpl &IC, Value *V) { Value *U = nullptr; while (auto *IV = dyn_cast<InsertValueInst>(V)) { auto *E = dyn_cast<ExtractElementInst>(IV->getInsertedValueOperand()); @@ -1075,11 +1075,11 @@ static Value *likeBitCastFromVector(InstCombinerImpl &IC, Value *V) { return nullptr; } if (auto *AT = dyn_cast<ArrayType>(VT)) { - if (AT->getNumElements() != cast<FixedVectorType>(UT)->getNumElements()) + if (AT->getNumElements() != cast<FixedVectorType>(UT)->getNumElements()) return nullptr; } else { auto *ST = cast<StructType>(VT); - if (ST->getNumElements() != cast<FixedVectorType>(UT)->getNumElements()) + if (ST->getNumElements() != cast<FixedVectorType>(UT)->getNumElements()) return nullptr; for (const auto *EltT : ST->elements()) { if (EltT != UT->getElementType()) @@ -1109,7 +1109,7 @@ static Value *likeBitCastFromVector(InstCombinerImpl &IC, Value *V) { /// the caller must erase the store instruction. We have to let the caller erase /// the store instruction as otherwise there is no way to signal whether it was /// combined or not: IC.EraseInstFromFunction returns a null pointer. -static bool combineStoreToValueType(InstCombinerImpl &IC, StoreInst &SI) { +static bool combineStoreToValueType(InstCombinerImpl &IC, StoreInst &SI) { // FIXME: We could probably with some care handle both volatile and ordered // atomic stores here but it isn't clear that this is important. if (!SI.isUnordered()) @@ -1123,13 +1123,13 @@ static bool combineStoreToValueType(InstCombinerImpl &IC, StoreInst &SI) { // Fold away bit casts of the stored value by storing the original type. if (auto *BC = dyn_cast<BitCastInst>(V)) { - assert(!BC->getType()->isX86_AMXTy() && - "store to x86_amx* should not happen!"); + assert(!BC->getType()->isX86_AMXTy() && + "store to x86_amx* should not happen!"); V = BC->getOperand(0); - // Don't transform when the type is x86_amx, it makes the pass that lower - // x86_amx type happy. - if (V->getType()->isX86_AMXTy()) - return false; + // Don't transform when the type is x86_amx, it makes the pass that lower + // x86_amx type happy. + if (V->getType()->isX86_AMXTy()) + return false; if (!SI.isAtomic() || isSupportedAtomicType(V->getType())) { combineStoreToNewValue(IC, SI, V); return true; @@ -1147,7 +1147,7 @@ static bool combineStoreToValueType(InstCombinerImpl &IC, StoreInst &SI) { return false; } -static bool unpackStoreToAggregate(InstCombinerImpl &IC, StoreInst &SI) { +static bool unpackStoreToAggregate(InstCombinerImpl &IC, StoreInst &SI) { // FIXME: We could probably with some care handle both volatile and atomic // stores here but it isn't clear that this is important. if (!SI.isSimple()) @@ -1287,7 +1287,7 @@ static bool equivalentAddressValues(Value *A, Value *B) { /// Converts store (bitcast (load (bitcast (select ...)))) to /// store (load (select ...)), where select is minmax: /// select ((cmp load V1, load V2), V1, V2). -static bool removeBitcastsFromLoadStoreOnMinMax(InstCombinerImpl &IC, +static bool removeBitcastsFromLoadStoreOnMinMax(InstCombinerImpl &IC, StoreInst &SI) { // bitcast? if (!match(SI.getPointerOperand(), m_BitCast(m_Value()))) @@ -1317,8 +1317,8 @@ static bool removeBitcastsFromLoadStoreOnMinMax(InstCombinerImpl &IC, if (!all_of(LI->users(), [LI, LoadAddr](User *U) { auto *SI = dyn_cast<StoreInst>(U); return SI && SI->getPointerOperand() != LI && - InstCombiner::peekThroughBitcast(SI->getPointerOperand()) != - LoadAddr && + InstCombiner::peekThroughBitcast(SI->getPointerOperand()) != + LoadAddr && !SI->getPointerOperand()->isSwiftError(); })) return false; @@ -1336,7 +1336,7 @@ static bool removeBitcastsFromLoadStoreOnMinMax(InstCombinerImpl &IC, return true; } -Instruction *InstCombinerImpl::visitStoreInst(StoreInst &SI) { +Instruction *InstCombinerImpl::visitStoreInst(StoreInst &SI) { Value *Val = SI.getOperand(0); Value *Ptr = SI.getOperand(1); @@ -1455,7 +1455,7 @@ Instruction *InstCombinerImpl::visitStoreInst(StoreInst &SI) { /// or: /// *P = v1; if () { *P = v2; } /// into a phi node with a store in the successor. -bool InstCombinerImpl::mergeStoreIntoSuccessor(StoreInst &SI) { +bool InstCombinerImpl::mergeStoreIntoSuccessor(StoreInst &SI) { if (!SI.isUnordered()) return false; // This code has not been audited for volatile/ordered case. diff --git a/contrib/libs/llvm12/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp b/contrib/libs/llvm12/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp index 4b485a0ad8..22cb22b49a 100644 --- a/contrib/libs/llvm12/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp +++ b/contrib/libs/llvm12/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp @@ -32,7 +32,7 @@ #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/KnownBits.h" #include "llvm/Transforms/InstCombine/InstCombineWorklist.h" -#include "llvm/Transforms/InstCombine/InstCombiner.h" +#include "llvm/Transforms/InstCombine/InstCombiner.h" #include "llvm/Transforms/Utils/BuildLibCalls.h" #include <cassert> #include <cstddef> @@ -47,7 +47,7 @@ using namespace PatternMatch; /// The specific integer value is used in a context where it is known to be /// non-zero. If this allows us to simplify the computation, do so and return /// the new operand, otherwise return null. -static Value *simplifyValueKnownNonZero(Value *V, InstCombinerImpl &IC, +static Value *simplifyValueKnownNonZero(Value *V, InstCombinerImpl &IC, Instruction &CxtI) { // If V has multiple uses, then we would have to do more analysis to determine // if this is safe. For example, the use could be in dynamically unreached @@ -139,7 +139,7 @@ static Value *foldMulSelectToNegate(BinaryOperator &I, return nullptr; } -Instruction *InstCombinerImpl::visitMul(BinaryOperator &I) { +Instruction *InstCombinerImpl::visitMul(BinaryOperator &I) { if (Value *V = SimplifyMulInst(I.getOperand(0), I.getOperand(1), SQ.getWithInstruction(&I))) return replaceInstUsesWith(I, V); @@ -153,9 +153,9 @@ Instruction *InstCombinerImpl::visitMul(BinaryOperator &I) { if (Value *V = SimplifyUsingDistributiveLaws(I)) return replaceInstUsesWith(I, V); - Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); - unsigned BitWidth = I.getType()->getScalarSizeInBits(); - + Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); + unsigned BitWidth = I.getType()->getScalarSizeInBits(); + // X * -1 == 0 - X if (match(Op1, m_AllOnes())) { BinaryOperator *BO = BinaryOperator::CreateNeg(Op0, I.getName()); @@ -186,7 +186,7 @@ Instruction *InstCombinerImpl::visitMul(BinaryOperator &I) { if (match(&I, m_Mul(m_Value(NewOp), m_Constant(C1)))) { // Replace X*(2^C) with X << C, where C is either a scalar or a vector. - if (Constant *NewCst = ConstantExpr::getExactLogBase2(C1)) { + if (Constant *NewCst = ConstantExpr::getExactLogBase2(C1)) { BinaryOperator *Shl = BinaryOperator::CreateShl(NewOp, NewCst); if (I.hasNoUnsignedWrap()) @@ -202,12 +202,12 @@ Instruction *InstCombinerImpl::visitMul(BinaryOperator &I) { } } - if (Op0->hasOneUse() && match(Op1, m_NegatedPower2())) { - // Interpret X * (-1<<C) as (-X) * (1<<C) and try to sink the negation. - // The "* (1<<C)" thus becomes a potential shifting opportunity. - if (Value *NegOp0 = Negator::Negate(/*IsNegation*/ true, Op0, *this)) - return BinaryOperator::CreateMul( - NegOp0, ConstantExpr::getNeg(cast<Constant>(Op1)), I.getName()); + if (Op0->hasOneUse() && match(Op1, m_NegatedPower2())) { + // Interpret X * (-1<<C) as (-X) * (1<<C) and try to sink the negation. + // The "* (1<<C)" thus becomes a potential shifting opportunity. + if (Value *NegOp0 = Negator::Negate(/*IsNegation*/ true, Op0, *this)) + return BinaryOperator::CreateMul( + NegOp0, ConstantExpr::getNeg(cast<Constant>(Op1)), I.getName()); } if (Instruction *FoldedMul = foldBinOpIntoSelectOrPhi(I)) @@ -237,9 +237,9 @@ Instruction *InstCombinerImpl::visitMul(BinaryOperator &I) { SelectPatternFlavor SPF = matchSelectPattern(Op0, X, Y).Flavor; if (SPF == SPF_ABS || SPF == SPF_NABS) return BinaryOperator::CreateMul(X, X); - - if (match(Op0, m_Intrinsic<Intrinsic::abs>(m_Value(X)))) - return BinaryOperator::CreateMul(X, X); + + if (match(Op0, m_Intrinsic<Intrinsic::abs>(m_Value(X)))) + return BinaryOperator::CreateMul(X, X); } // -X * C --> X * -C @@ -362,19 +362,19 @@ Instruction *InstCombinerImpl::visitMul(BinaryOperator &I) { if (match(Op1, m_LShr(m_Value(X), m_APInt(C))) && *C == C->getBitWidth() - 1) return BinaryOperator::CreateAnd(Builder.CreateAShr(X, *C), Op0); - // ((ashr X, 31) | 1) * X --> abs(X) - // X * ((ashr X, 31) | 1) --> abs(X) - if (match(&I, m_c_BinOp(m_Or(m_AShr(m_Value(X), - m_SpecificIntAllowUndef(BitWidth - 1)), - m_One()), - m_Deferred(X)))) { - Value *Abs = Builder.CreateBinaryIntrinsic( - Intrinsic::abs, X, - ConstantInt::getBool(I.getContext(), I.hasNoSignedWrap())); - Abs->takeName(&I); - return replaceInstUsesWith(I, Abs); - } - + // ((ashr X, 31) | 1) * X --> abs(X) + // X * ((ashr X, 31) | 1) --> abs(X) + if (match(&I, m_c_BinOp(m_Or(m_AShr(m_Value(X), + m_SpecificIntAllowUndef(BitWidth - 1)), + m_One()), + m_Deferred(X)))) { + Value *Abs = Builder.CreateBinaryIntrinsic( + Intrinsic::abs, X, + ConstantInt::getBool(I.getContext(), I.hasNoSignedWrap())); + Abs->takeName(&I); + return replaceInstUsesWith(I, Abs); + } + if (Instruction *Ext = narrowMathIfNoOverflow(I)) return Ext; @@ -392,7 +392,7 @@ Instruction *InstCombinerImpl::visitMul(BinaryOperator &I) { return Changed ? &I : nullptr; } -Instruction *InstCombinerImpl::foldFPSignBitOps(BinaryOperator &I) { +Instruction *InstCombinerImpl::foldFPSignBitOps(BinaryOperator &I) { BinaryOperator::BinaryOps Opcode = I.getOpcode(); assert((Opcode == Instruction::FMul || Opcode == Instruction::FDiv) && "Expected fmul or fdiv"); @@ -407,12 +407,12 @@ Instruction *InstCombinerImpl::foldFPSignBitOps(BinaryOperator &I) { // fabs(X) * fabs(X) -> X * X // fabs(X) / fabs(X) -> X / X - if (Op0 == Op1 && match(Op0, m_FAbs(m_Value(X)))) + if (Op0 == Op1 && match(Op0, m_FAbs(m_Value(X)))) return BinaryOperator::CreateWithCopiedFlags(Opcode, X, X, &I); // fabs(X) * fabs(Y) --> fabs(X * Y) // fabs(X) / fabs(Y) --> fabs(X / Y) - if (match(Op0, m_FAbs(m_Value(X))) && match(Op1, m_FAbs(m_Value(Y))) && + if (match(Op0, m_FAbs(m_Value(X))) && match(Op1, m_FAbs(m_Value(Y))) && (Op0->hasOneUse() || Op1->hasOneUse())) { IRBuilder<>::FastMathFlagGuard FMFGuard(Builder); Builder.setFastMathFlags(I.getFastMathFlags()); @@ -425,7 +425,7 @@ Instruction *InstCombinerImpl::foldFPSignBitOps(BinaryOperator &I) { return nullptr; } -Instruction *InstCombinerImpl::visitFMul(BinaryOperator &I) { +Instruction *InstCombinerImpl::visitFMul(BinaryOperator &I) { if (Value *V = SimplifyFMulInst(I.getOperand(0), I.getOperand(1), I.getFastMathFlags(), SQ.getWithInstruction(&I))) @@ -521,21 +521,21 @@ Instruction *InstCombinerImpl::visitFMul(BinaryOperator &I) { return replaceInstUsesWith(I, Sqrt); } - // The following transforms are done irrespective of the number of uses - // for the expression "1.0/sqrt(X)". - // 1) 1.0/sqrt(X) * X -> X/sqrt(X) - // 2) X * 1.0/sqrt(X) -> X/sqrt(X) - // We always expect the backend to reduce X/sqrt(X) to sqrt(X), if it - // has the necessary (reassoc) fast-math-flags. - if (I.hasNoSignedZeros() && - match(Op0, (m_FDiv(m_SpecificFP(1.0), m_Value(Y)))) && - match(Y, m_Intrinsic<Intrinsic::sqrt>(m_Value(X))) && Op1 == X) - return BinaryOperator::CreateFDivFMF(X, Y, &I); - if (I.hasNoSignedZeros() && - match(Op1, (m_FDiv(m_SpecificFP(1.0), m_Value(Y)))) && - match(Y, m_Intrinsic<Intrinsic::sqrt>(m_Value(X))) && Op0 == X) - return BinaryOperator::CreateFDivFMF(X, Y, &I); - + // The following transforms are done irrespective of the number of uses + // for the expression "1.0/sqrt(X)". + // 1) 1.0/sqrt(X) * X -> X/sqrt(X) + // 2) X * 1.0/sqrt(X) -> X/sqrt(X) + // We always expect the backend to reduce X/sqrt(X) to sqrt(X), if it + // has the necessary (reassoc) fast-math-flags. + if (I.hasNoSignedZeros() && + match(Op0, (m_FDiv(m_SpecificFP(1.0), m_Value(Y)))) && + match(Y, m_Intrinsic<Intrinsic::sqrt>(m_Value(X))) && Op1 == X) + return BinaryOperator::CreateFDivFMF(X, Y, &I); + if (I.hasNoSignedZeros() && + match(Op1, (m_FDiv(m_SpecificFP(1.0), m_Value(Y)))) && + match(Y, m_Intrinsic<Intrinsic::sqrt>(m_Value(X))) && Op0 == X) + return BinaryOperator::CreateFDivFMF(X, Y, &I); + // Like the similar transform in instsimplify, this requires 'nsz' because // sqrt(-0.0) = -0.0, and -0.0 * -0.0 does not simplify to -0.0. if (I.hasNoNaNs() && I.hasNoSignedZeros() && Op0 == Op1 && @@ -620,7 +620,7 @@ Instruction *InstCombinerImpl::visitFMul(BinaryOperator &I) { /// Fold a divide or remainder with a select instruction divisor when one of the /// select operands is zero. In that case, we can use the other select operand /// because div/rem by zero is undefined. -bool InstCombinerImpl::simplifyDivRemOfSelectWithZeroOp(BinaryOperator &I) { +bool InstCombinerImpl::simplifyDivRemOfSelectWithZeroOp(BinaryOperator &I) { SelectInst *SI = dyn_cast<SelectInst>(I.getOperand(1)); if (!SI) return false; @@ -721,7 +721,7 @@ static bool isMultiple(const APInt &C1, const APInt &C2, APInt &Quotient, /// instructions (udiv and sdiv). It is called by the visitors to those integer /// division instructions. /// Common integer divide transforms -Instruction *InstCombinerImpl::commonIDivTransforms(BinaryOperator &I) { +Instruction *InstCombinerImpl::commonIDivTransforms(BinaryOperator &I) { Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); bool IsSigned = I.getOpcode() == Instruction::SDiv; Type *Ty = I.getType(); @@ -857,7 +857,7 @@ namespace { using FoldUDivOperandCb = Instruction *(*)(Value *Op0, Value *Op1, const BinaryOperator &I, - InstCombinerImpl &IC); + InstCombinerImpl &IC); /// Used to maintain state for visitUDivOperand(). struct UDivFoldAction { @@ -886,9 +886,9 @@ struct UDivFoldAction { // X udiv 2^C -> X >> C static Instruction *foldUDivPow2Cst(Value *Op0, Value *Op1, - const BinaryOperator &I, - InstCombinerImpl &IC) { - Constant *C1 = ConstantExpr::getExactLogBase2(cast<Constant>(Op1)); + const BinaryOperator &I, + InstCombinerImpl &IC) { + Constant *C1 = ConstantExpr::getExactLogBase2(cast<Constant>(Op1)); if (!C1) llvm_unreachable("Failed to constant fold udiv -> logbase2"); BinaryOperator *LShr = BinaryOperator::CreateLShr(Op0, C1); @@ -900,7 +900,7 @@ static Instruction *foldUDivPow2Cst(Value *Op0, Value *Op1, // X udiv (C1 << N), where C1 is "1<<C2" --> X >> (N+C2) // X udiv (zext (C1 << N)), where C1 is "1<<C2" --> X >> (N+C2) static Instruction *foldUDivShl(Value *Op0, Value *Op1, const BinaryOperator &I, - InstCombinerImpl &IC) { + InstCombinerImpl &IC) { Value *ShiftLeft; if (!match(Op1, m_ZExt(m_Value(ShiftLeft)))) ShiftLeft = Op1; @@ -909,7 +909,7 @@ static Instruction *foldUDivShl(Value *Op0, Value *Op1, const BinaryOperator &I, Value *N; if (!match(ShiftLeft, m_Shl(m_Constant(CI), m_Value(N)))) llvm_unreachable("match should never fail here!"); - Constant *Log2Base = ConstantExpr::getExactLogBase2(CI); + Constant *Log2Base = ConstantExpr::getExactLogBase2(CI); if (!Log2Base) llvm_unreachable("getLogBase2 should never fail here!"); N = IC.Builder.CreateAdd(N, Log2Base); @@ -928,8 +928,8 @@ static Instruction *foldUDivShl(Value *Op0, Value *Op1, const BinaryOperator &I, static size_t visitUDivOperand(Value *Op0, Value *Op1, const BinaryOperator &I, SmallVectorImpl<UDivFoldAction> &Actions, unsigned Depth = 0) { - // FIXME: assert that Op1 isn't/doesn't contain undef. - + // FIXME: assert that Op1 isn't/doesn't contain undef. + // Check to see if this is an unsigned division with an exact power of 2, // if so, convert to a right shift. if (match(Op1, m_Power2())) { @@ -949,9 +949,9 @@ static size_t visitUDivOperand(Value *Op0, Value *Op1, const BinaryOperator &I, return 0; if (SelectInst *SI = dyn_cast<SelectInst>(Op1)) - // FIXME: missed optimization: if one of the hands of select is/contains - // undef, just directly pick the other one. - // FIXME: can both hands contain undef? + // FIXME: missed optimization: if one of the hands of select is/contains + // undef, just directly pick the other one. + // FIXME: can both hands contain undef? if (size_t LHSIdx = visitUDivOperand(Op0, SI->getOperand(1), I, Actions, Depth)) if (visitUDivOperand(Op0, SI->getOperand(2), I, Actions, Depth)) { @@ -999,7 +999,7 @@ static Instruction *narrowUDivURem(BinaryOperator &I, return nullptr; } -Instruction *InstCombinerImpl::visitUDiv(BinaryOperator &I) { +Instruction *InstCombinerImpl::visitUDiv(BinaryOperator &I) { if (Value *V = SimplifyUDivInst(I.getOperand(0), I.getOperand(1), SQ.getWithInstruction(&I))) return replaceInstUsesWith(I, V); @@ -1093,7 +1093,7 @@ Instruction *InstCombinerImpl::visitUDiv(BinaryOperator &I) { return nullptr; } -Instruction *InstCombinerImpl::visitSDiv(BinaryOperator &I) { +Instruction *InstCombinerImpl::visitSDiv(BinaryOperator &I) { if (Value *V = SimplifySDivInst(I.getOperand(0), I.getOperand(1), SQ.getWithInstruction(&I))) return replaceInstUsesWith(I, V); @@ -1106,7 +1106,7 @@ Instruction *InstCombinerImpl::visitSDiv(BinaryOperator &I) { return Common; Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); - Type *Ty = I.getType(); + Type *Ty = I.getType(); Value *X; // sdiv Op0, -1 --> -Op0 // sdiv Op0, (sext i1 X) --> -Op0 (because if X is 0, the op is undefined) @@ -1116,24 +1116,24 @@ Instruction *InstCombinerImpl::visitSDiv(BinaryOperator &I) { // X / INT_MIN --> X == INT_MIN if (match(Op1, m_SignMask())) - return new ZExtInst(Builder.CreateICmpEQ(Op0, Op1), Ty); - - // sdiv exact X, 1<<C --> ashr exact X, C iff 1<<C is non-negative - // sdiv exact X, -1<<C --> -(ashr exact X, C) - if (I.isExact() && ((match(Op1, m_Power2()) && match(Op1, m_NonNegative())) || - match(Op1, m_NegatedPower2()))) { - bool DivisorWasNegative = match(Op1, m_NegatedPower2()); - if (DivisorWasNegative) - Op1 = ConstantExpr::getNeg(cast<Constant>(Op1)); - auto *AShr = BinaryOperator::CreateExactAShr( - Op0, ConstantExpr::getExactLogBase2(cast<Constant>(Op1)), I.getName()); - if (!DivisorWasNegative) - return AShr; - Builder.Insert(AShr); - AShr->setName(I.getName() + ".neg"); - return BinaryOperator::CreateNeg(AShr, I.getName()); - } - + return new ZExtInst(Builder.CreateICmpEQ(Op0, Op1), Ty); + + // sdiv exact X, 1<<C --> ashr exact X, C iff 1<<C is non-negative + // sdiv exact X, -1<<C --> -(ashr exact X, C) + if (I.isExact() && ((match(Op1, m_Power2()) && match(Op1, m_NonNegative())) || + match(Op1, m_NegatedPower2()))) { + bool DivisorWasNegative = match(Op1, m_NegatedPower2()); + if (DivisorWasNegative) + Op1 = ConstantExpr::getNeg(cast<Constant>(Op1)); + auto *AShr = BinaryOperator::CreateExactAShr( + Op0, ConstantExpr::getExactLogBase2(cast<Constant>(Op1)), I.getName()); + if (!DivisorWasNegative) + return AShr; + Builder.Insert(AShr); + AShr->setName(I.getName() + ".neg"); + return BinaryOperator::CreateNeg(AShr, I.getName()); + } + const APInt *Op1C; if (match(Op1, m_APInt(Op1C))) { // If the dividend is sign-extended and the constant divisor is small enough @@ -1150,7 +1150,7 @@ Instruction *InstCombinerImpl::visitSDiv(BinaryOperator &I) { Constant *NarrowDivisor = ConstantExpr::getTrunc(cast<Constant>(Op1), Op0Src->getType()); Value *NarrowOp = Builder.CreateSDiv(Op0Src, NarrowDivisor); - return new SExtInst(NarrowOp, Ty); + return new SExtInst(NarrowOp, Ty); } // -X / C --> X / -C (if the negation doesn't overflow). @@ -1158,7 +1158,7 @@ Instruction *InstCombinerImpl::visitSDiv(BinaryOperator &I) { // checking if all elements are not the min-signed-val. if (!Op1C->isMinSignedValue() && match(Op0, m_NSWSub(m_Zero(), m_Value(X)))) { - Constant *NegC = ConstantInt::get(Ty, -(*Op1C)); + Constant *NegC = ConstantInt::get(Ty, -(*Op1C)); Instruction *BO = BinaryOperator::CreateSDiv(X, NegC); BO->setIsExact(I.isExact()); return BO; @@ -1171,19 +1171,19 @@ Instruction *InstCombinerImpl::visitSDiv(BinaryOperator &I) { return BinaryOperator::CreateNSWNeg( Builder.CreateSDiv(X, Y, I.getName(), I.isExact())); - // abs(X) / X --> X > -1 ? 1 : -1 - // X / abs(X) --> X > -1 ? 1 : -1 - if (match(&I, m_c_BinOp( - m_OneUse(m_Intrinsic<Intrinsic::abs>(m_Value(X), m_One())), - m_Deferred(X)))) { - Constant *NegOne = ConstantInt::getAllOnesValue(Ty); - Value *Cond = Builder.CreateICmpSGT(X, NegOne); - return SelectInst::Create(Cond, ConstantInt::get(Ty, 1), NegOne); - } - + // abs(X) / X --> X > -1 ? 1 : -1 + // X / abs(X) --> X > -1 ? 1 : -1 + if (match(&I, m_c_BinOp( + m_OneUse(m_Intrinsic<Intrinsic::abs>(m_Value(X), m_One())), + m_Deferred(X)))) { + Constant *NegOne = ConstantInt::getAllOnesValue(Ty); + Value *Cond = Builder.CreateICmpSGT(X, NegOne); + return SelectInst::Create(Cond, ConstantInt::get(Ty, 1), NegOne); + } + // If the sign bits of both operands are zero (i.e. we can prove they are // unsigned inputs), turn this into a udiv. - APInt Mask(APInt::getSignMask(Ty->getScalarSizeInBits())); + APInt Mask(APInt::getSignMask(Ty->getScalarSizeInBits())); if (MaskedValueIsZero(Op0, Mask, 0, &I)) { if (MaskedValueIsZero(Op1, Mask, 0, &I)) { // X sdiv Y -> X udiv Y, iff X and Y don't have sign bit set @@ -1192,13 +1192,13 @@ Instruction *InstCombinerImpl::visitSDiv(BinaryOperator &I) { return BO; } - if (match(Op1, m_NegatedPower2())) { - // X sdiv (-(1 << C)) -> -(X sdiv (1 << C)) -> - // -> -(X udiv (1 << C)) -> -(X u>> C) - return BinaryOperator::CreateNeg(Builder.Insert(foldUDivPow2Cst( - Op0, ConstantExpr::getNeg(cast<Constant>(Op1)), I, *this))); - } - + if (match(Op1, m_NegatedPower2())) { + // X sdiv (-(1 << C)) -> -(X sdiv (1 << C)) -> + // -> -(X udiv (1 << C)) -> -(X u>> C) + return BinaryOperator::CreateNeg(Builder.Insert(foldUDivPow2Cst( + Op0, ConstantExpr::getNeg(cast<Constant>(Op1)), I, *this))); + } + if (isKnownToBeAPowerOfTwo(Op1, /*OrZero*/ true, 0, &I)) { // X sdiv (1 << Y) -> X udiv (1 << Y) ( -> X u>> Y) // Safe because the only negative value (1 << Y) can take on is @@ -1275,7 +1275,7 @@ static Instruction *foldFDivConstantDividend(BinaryOperator &I) { return BinaryOperator::CreateFDivFMF(NewC, X, &I); } -Instruction *InstCombinerImpl::visitFDiv(BinaryOperator &I) { +Instruction *InstCombinerImpl::visitFDiv(BinaryOperator &I) { if (Value *V = SimplifyFDivInst(I.getOperand(0), I.getOperand(1), I.getFastMathFlags(), SQ.getWithInstruction(&I))) @@ -1367,8 +1367,8 @@ Instruction *InstCombinerImpl::visitFDiv(BinaryOperator &I) { // X / fabs(X) -> copysign(1.0, X) // fabs(X) / X -> copysign(1.0, X) if (I.hasNoNaNs() && I.hasNoInfs() && - (match(&I, m_FDiv(m_Value(X), m_FAbs(m_Deferred(X)))) || - match(&I, m_FDiv(m_FAbs(m_Value(X)), m_Deferred(X))))) { + (match(&I, m_FDiv(m_Value(X), m_FAbs(m_Deferred(X)))) || + match(&I, m_FDiv(m_FAbs(m_Value(X)), m_Deferred(X))))) { Value *V = Builder.CreateBinaryIntrinsic( Intrinsic::copysign, ConstantFP::get(I.getType(), 1.0), X, &I); return replaceInstUsesWith(I, V); @@ -1380,7 +1380,7 @@ Instruction *InstCombinerImpl::visitFDiv(BinaryOperator &I) { /// instructions (urem and srem). It is called by the visitors to those integer /// remainder instructions. /// Common integer remainder transforms -Instruction *InstCombinerImpl::commonIRemTransforms(BinaryOperator &I) { +Instruction *InstCombinerImpl::commonIRemTransforms(BinaryOperator &I) { Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); // The RHS is known non-zero. @@ -1418,7 +1418,7 @@ Instruction *InstCombinerImpl::commonIRemTransforms(BinaryOperator &I) { return nullptr; } -Instruction *InstCombinerImpl::visitURem(BinaryOperator &I) { +Instruction *InstCombinerImpl::visitURem(BinaryOperator &I) { if (Value *V = SimplifyURemInst(I.getOperand(0), I.getOperand(1), SQ.getWithInstruction(&I))) return replaceInstUsesWith(I, V); @@ -1469,7 +1469,7 @@ Instruction *InstCombinerImpl::visitURem(BinaryOperator &I) { return nullptr; } -Instruction *InstCombinerImpl::visitSRem(BinaryOperator &I) { +Instruction *InstCombinerImpl::visitSRem(BinaryOperator &I) { if (Value *V = SimplifySRemInst(I.getOperand(0), I.getOperand(1), SQ.getWithInstruction(&I))) return replaceInstUsesWith(I, V); @@ -1492,7 +1492,7 @@ Instruction *InstCombinerImpl::visitSRem(BinaryOperator &I) { // -X srem Y --> -(X srem Y) Value *X, *Y; if (match(&I, m_SRem(m_OneUse(m_NSWSub(m_Zero(), m_Value(X))), m_Value(Y)))) - return BinaryOperator::CreateNSWNeg(Builder.CreateSRem(X, Y)); + return BinaryOperator::CreateNSWNeg(Builder.CreateSRem(X, Y)); // If the sign bits of both operands are zero (i.e. we can prove they are // unsigned inputs), turn this into a urem. @@ -1506,7 +1506,7 @@ Instruction *InstCombinerImpl::visitSRem(BinaryOperator &I) { // If it's a constant vector, flip any negative values positive. if (isa<ConstantVector>(Op1) || isa<ConstantDataVector>(Op1)) { Constant *C = cast<Constant>(Op1); - unsigned VWidth = cast<FixedVectorType>(C->getType())->getNumElements(); + unsigned VWidth = cast<FixedVectorType>(C->getType())->getNumElements(); bool hasNegative = false; bool hasMissing = false; @@ -1541,7 +1541,7 @@ Instruction *InstCombinerImpl::visitSRem(BinaryOperator &I) { return nullptr; } -Instruction *InstCombinerImpl::visitFRem(BinaryOperator &I) { +Instruction *InstCombinerImpl::visitFRem(BinaryOperator &I) { if (Value *V = SimplifyFRemInst(I.getOperand(0), I.getOperand(1), I.getFastMathFlags(), SQ.getWithInstruction(&I))) diff --git a/contrib/libs/llvm12/lib/Transforms/InstCombine/InstCombineNegator.cpp b/contrib/libs/llvm12/lib/Transforms/InstCombine/InstCombineNegator.cpp index 7718c8b0ee..d5c83dd0ba 100644 --- a/contrib/libs/llvm12/lib/Transforms/InstCombine/InstCombineNegator.cpp +++ b/contrib/libs/llvm12/lib/Transforms/InstCombine/InstCombineNegator.cpp @@ -42,9 +42,9 @@ #include "llvm/Support/DebugCounter.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Transforms/InstCombine/InstCombiner.h" -#include <cassert> -#include <cstdint> +#include "llvm/Transforms/InstCombine/InstCombiner.h" +#include <cassert> +#include <cstdint> #include <functional> #include <tuple> #include <type_traits> @@ -115,19 +115,19 @@ Negator::~Negator() { } #endif -// Due to the InstCombine's worklist management, there are no guarantees that -// each instruction we'll encounter has been visited by InstCombine already. -// In particular, most importantly for us, that means we have to canonicalize -// constants to RHS ourselves, since that is helpful sometimes. -std::array<Value *, 2> Negator::getSortedOperandsOfBinOp(Instruction *I) { - assert(I->getNumOperands() == 2 && "Only for binops!"); - std::array<Value *, 2> Ops{I->getOperand(0), I->getOperand(1)}; - if (I->isCommutative() && InstCombiner::getComplexity(I->getOperand(0)) < - InstCombiner::getComplexity(I->getOperand(1))) - std::swap(Ops[0], Ops[1]); - return Ops; -} - +// Due to the InstCombine's worklist management, there are no guarantees that +// each instruction we'll encounter has been visited by InstCombine already. +// In particular, most importantly for us, that means we have to canonicalize +// constants to RHS ourselves, since that is helpful sometimes. +std::array<Value *, 2> Negator::getSortedOperandsOfBinOp(Instruction *I) { + assert(I->getNumOperands() == 2 && "Only for binops!"); + std::array<Value *, 2> Ops{I->getOperand(0), I->getOperand(1)}; + if (I->isCommutative() && InstCombiner::getComplexity(I->getOperand(0)) < + InstCombiner::getComplexity(I->getOperand(1))) + std::swap(Ops[0], Ops[1]); + return Ops; +} + // FIXME: can this be reworked into a worklist-based algorithm while preserving // the depth-first, early bailout traversal? LLVM_NODISCARD Value *Negator::visitImpl(Value *V, unsigned Depth) { @@ -172,13 +172,13 @@ LLVM_NODISCARD Value *Negator::visitImpl(Value *V, unsigned Depth) { // In some cases we can give the answer without further recursion. switch (I->getOpcode()) { - case Instruction::Add: { - std::array<Value *, 2> Ops = getSortedOperandsOfBinOp(I); + case Instruction::Add: { + std::array<Value *, 2> Ops = getSortedOperandsOfBinOp(I); // `inc` is always negatible. - if (match(Ops[1], m_One())) - return Builder.CreateNot(Ops[0], I->getName() + ".neg"); + if (match(Ops[1], m_One())) + return Builder.CreateNot(Ops[0], I->getName() + ".neg"); break; - } + } case Instruction::Xor: // `not` is always negatible. if (match(I, m_Not(m_Value(X)))) @@ -199,10 +199,10 @@ LLVM_NODISCARD Value *Negator::visitImpl(Value *V, unsigned Depth) { } return BO; } - // While we could negate exact arithmetic shift: - // ashr exact %x, C --> sdiv exact i8 %x, -1<<C - // iff C != 0 and C u< bitwidth(%x), we don't want to, - // because division is *THAT* much worse than a shift. + // While we could negate exact arithmetic shift: + // ashr exact %x, C --> sdiv exact i8 %x, -1<<C + // iff C != 0 and C u< bitwidth(%x), we don't want to, + // because division is *THAT* much worse than a shift. break; } case Instruction::SExt: @@ -219,15 +219,15 @@ LLVM_NODISCARD Value *Negator::visitImpl(Value *V, unsigned Depth) { break; // Other instructions require recursive reasoning. } - if (I->getOpcode() == Instruction::Sub && - (I->hasOneUse() || match(I->getOperand(0), m_ImmConstant()))) { - // `sub` is always negatible. - // However, only do this either if the old `sub` doesn't stick around, or - // it was subtracting from a constant. Otherwise, this isn't profitable. - return Builder.CreateSub(I->getOperand(1), I->getOperand(0), - I->getName() + ".neg"); - } - + if (I->getOpcode() == Instruction::Sub && + (I->hasOneUse() || match(I->getOperand(0), m_ImmConstant()))) { + // `sub` is always negatible. + // However, only do this either if the old `sub` doesn't stick around, or + // it was subtracting from a constant. Otherwise, this isn't profitable. + return Builder.CreateSub(I->getOperand(1), I->getOperand(0), + I->getName() + ".neg"); + } + // Some other cases, while still don't require recursion, // are restricted to the one-use case. if (!V->hasOneUse()) @@ -239,8 +239,8 @@ LLVM_NODISCARD Value *Negator::visitImpl(Value *V, unsigned Depth) { // While this is normally not behind a use-check, // let's consider division to be special since it's costly. if (auto *Op1C = dyn_cast<Constant>(I->getOperand(1))) { - if (!Op1C->containsUndefOrPoisonElement() && - Op1C->isNotMinSignedValue() && Op1C->isNotOneValue()) { + if (!Op1C->containsUndefOrPoisonElement() && + Op1C->isNotMinSignedValue() && Op1C->isNotOneValue()) { Value *BO = Builder.CreateSDiv(I->getOperand(0), ConstantExpr::getNeg(Op1C), I->getName() + ".neg"); @@ -261,13 +261,13 @@ LLVM_NODISCARD Value *Negator::visitImpl(Value *V, unsigned Depth) { } switch (I->getOpcode()) { - case Instruction::Freeze: { - // `freeze` is negatible if its operand is negatible. - Value *NegOp = negate(I->getOperand(0), Depth + 1); - if (!NegOp) // Early return. - return nullptr; - return Builder.CreateFreeze(NegOp, I->getName() + ".neg"); - } + case Instruction::Freeze: { + // `freeze` is negatible if its operand is negatible. + Value *NegOp = negate(I->getOperand(0), Depth + 1); + if (!NegOp) // Early return. + return nullptr; + return Builder.CreateFreeze(NegOp, I->getName() + ".neg"); + } case Instruction::PHI: { // `phi` is negatible if all the incoming values are negatible. auto *PHI = cast<PHINode>(I); @@ -285,16 +285,16 @@ LLVM_NODISCARD Value *Negator::visitImpl(Value *V, unsigned Depth) { return NegatedPHI; } case Instruction::Select: { - if (isKnownNegation(I->getOperand(1), I->getOperand(2))) { - // Of one hand of select is known to be negation of another hand, - // just swap the hands around. - auto *NewSelect = cast<SelectInst>(I->clone()); - // Just swap the operands of the select. - NewSelect->swapValues(); - // Don't swap prof metadata, we didn't change the branch behavior. - NewSelect->setName(I->getName() + ".neg"); - Builder.Insert(NewSelect); - return NewSelect; + if (isKnownNegation(I->getOperand(1), I->getOperand(2))) { + // Of one hand of select is known to be negation of another hand, + // just swap the hands around. + auto *NewSelect = cast<SelectInst>(I->clone()); + // Just swap the operands of the select. + NewSelect->swapValues(); + // Don't swap prof metadata, we didn't change the branch behavior. + NewSelect->setName(I->getName() + ".neg"); + Builder.Insert(NewSelect); + return NewSelect; } // `select` is negatible if both hands of `select` are negatible. Value *NegOp1 = negate(I->getOperand(1), Depth + 1); @@ -350,81 +350,81 @@ LLVM_NODISCARD Value *Negator::visitImpl(Value *V, unsigned Depth) { } case Instruction::Shl: { // `shl` is negatible if the first operand is negatible. - if (Value *NegOp0 = negate(I->getOperand(0), Depth + 1)) - return Builder.CreateShl(NegOp0, I->getOperand(1), I->getName() + ".neg"); - // Otherwise, `shl %x, C` can be interpreted as `mul %x, 1<<C`. - auto *Op1C = dyn_cast<Constant>(I->getOperand(1)); - if (!Op1C) // Early return. + if (Value *NegOp0 = negate(I->getOperand(0), Depth + 1)) + return Builder.CreateShl(NegOp0, I->getOperand(1), I->getName() + ".neg"); + // Otherwise, `shl %x, C` can be interpreted as `mul %x, 1<<C`. + auto *Op1C = dyn_cast<Constant>(I->getOperand(1)); + if (!Op1C) // Early return. return nullptr; - return Builder.CreateMul( - I->getOperand(0), - ConstantExpr::getShl(Constant::getAllOnesValue(Op1C->getType()), Op1C), - I->getName() + ".neg"); + return Builder.CreateMul( + I->getOperand(0), + ConstantExpr::getShl(Constant::getAllOnesValue(Op1C->getType()), Op1C), + I->getName() + ".neg"); } - case Instruction::Or: { + case Instruction::Or: { if (!haveNoCommonBitsSet(I->getOperand(0), I->getOperand(1), DL, &AC, I, &DT)) return nullptr; // Don't know how to handle `or` in general. - std::array<Value *, 2> Ops = getSortedOperandsOfBinOp(I); + std::array<Value *, 2> Ops = getSortedOperandsOfBinOp(I); // `or`/`add` are interchangeable when operands have no common bits set. // `inc` is always negatible. - if (match(Ops[1], m_One())) - return Builder.CreateNot(Ops[0], I->getName() + ".neg"); + if (match(Ops[1], m_One())) + return Builder.CreateNot(Ops[0], I->getName() + ".neg"); // Else, just defer to Instruction::Add handling. LLVM_FALLTHROUGH; - } + } case Instruction::Add: { // `add` is negatible if both of its operands are negatible. - SmallVector<Value *, 2> NegatedOps, NonNegatedOps; - for (Value *Op : I->operands()) { - // Can we sink the negation into this operand? - if (Value *NegOp = negate(Op, Depth + 1)) { - NegatedOps.emplace_back(NegOp); // Successfully negated operand! - continue; - } - // Failed to sink negation into this operand. IFF we started from negation - // and we manage to sink negation into one operand, we can still do this. - if (!IsTrulyNegation) - return nullptr; - NonNegatedOps.emplace_back(Op); // Just record which operand that was. - } - assert((NegatedOps.size() + NonNegatedOps.size()) == 2 && - "Internal consistency sanity check."); - // Did we manage to sink negation into both of the operands? - if (NegatedOps.size() == 2) // Then we get to keep the `add`! - return Builder.CreateAdd(NegatedOps[0], NegatedOps[1], - I->getName() + ".neg"); - assert(IsTrulyNegation && "We should have early-exited then."); - // Completely failed to sink negation? - if (NonNegatedOps.size() == 2) + SmallVector<Value *, 2> NegatedOps, NonNegatedOps; + for (Value *Op : I->operands()) { + // Can we sink the negation into this operand? + if (Value *NegOp = negate(Op, Depth + 1)) { + NegatedOps.emplace_back(NegOp); // Successfully negated operand! + continue; + } + // Failed to sink negation into this operand. IFF we started from negation + // and we manage to sink negation into one operand, we can still do this. + if (!IsTrulyNegation) + return nullptr; + NonNegatedOps.emplace_back(Op); // Just record which operand that was. + } + assert((NegatedOps.size() + NonNegatedOps.size()) == 2 && + "Internal consistency sanity check."); + // Did we manage to sink negation into both of the operands? + if (NegatedOps.size() == 2) // Then we get to keep the `add`! + return Builder.CreateAdd(NegatedOps[0], NegatedOps[1], + I->getName() + ".neg"); + assert(IsTrulyNegation && "We should have early-exited then."); + // Completely failed to sink negation? + if (NonNegatedOps.size() == 2) return nullptr; - // 0-(a+b) --> (-a)-b - return Builder.CreateSub(NegatedOps[0], NonNegatedOps[0], - I->getName() + ".neg"); + // 0-(a+b) --> (-a)-b + return Builder.CreateSub(NegatedOps[0], NonNegatedOps[0], + I->getName() + ".neg"); } - case Instruction::Xor: { - std::array<Value *, 2> Ops = getSortedOperandsOfBinOp(I); + case Instruction::Xor: { + std::array<Value *, 2> Ops = getSortedOperandsOfBinOp(I); // `xor` is negatible if one of its operands is invertible. // FIXME: InstCombineInverter? But how to connect Inverter and Negator? - if (auto *C = dyn_cast<Constant>(Ops[1])) { - Value *Xor = Builder.CreateXor(Ops[0], ConstantExpr::getNot(C)); + if (auto *C = dyn_cast<Constant>(Ops[1])) { + Value *Xor = Builder.CreateXor(Ops[0], ConstantExpr::getNot(C)); return Builder.CreateAdd(Xor, ConstantInt::get(Xor->getType(), 1), I->getName() + ".neg"); } return nullptr; - } + } case Instruction::Mul: { - std::array<Value *, 2> Ops = getSortedOperandsOfBinOp(I); + std::array<Value *, 2> Ops = getSortedOperandsOfBinOp(I); // `mul` is negatible if one of its operands is negatible. Value *NegatedOp, *OtherOp; // First try the second operand, in case it's a constant it will be best to // just invert it instead of sinking the `neg` deeper. - if (Value *NegOp1 = negate(Ops[1], Depth + 1)) { + if (Value *NegOp1 = negate(Ops[1], Depth + 1)) { NegatedOp = NegOp1; - OtherOp = Ops[0]; - } else if (Value *NegOp0 = negate(Ops[0], Depth + 1)) { + OtherOp = Ops[0]; + } else if (Value *NegOp0 = negate(Ops[0], Depth + 1)) { NegatedOp = NegOp0; - OtherOp = Ops[1]; + OtherOp = Ops[1]; } else // Can't negate either of them. return nullptr; @@ -487,7 +487,7 @@ LLVM_NODISCARD Optional<Negator::Result> Negator::run(Value *Root) { } LLVM_NODISCARD Value *Negator::Negate(bool LHSIsZero, Value *Root, - InstCombinerImpl &IC) { + InstCombinerImpl &IC) { ++NegatorTotalNegationsAttempted; LLVM_DEBUG(dbgs() << "Negator: attempting to sink negation into " << *Root << "\n"); diff --git a/contrib/libs/llvm12/lib/Transforms/InstCombine/InstCombinePHI.cpp b/contrib/libs/llvm12/lib/Transforms/InstCombine/InstCombinePHI.cpp index b211b08136..e05dda3670 100644 --- a/contrib/libs/llvm12/lib/Transforms/InstCombine/InstCombinePHI.cpp +++ b/contrib/libs/llvm12/lib/Transforms/InstCombine/InstCombinePHI.cpp @@ -13,14 +13,14 @@ #include "InstCombineInternal.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallPtrSet.h" -#include "llvm/ADT/Statistic.h" +#include "llvm/ADT/Statistic.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/PatternMatch.h" #include "llvm/Support/CommandLine.h" -#include "llvm/Transforms/InstCombine/InstCombiner.h" +#include "llvm/Transforms/InstCombine/InstCombiner.h" #include "llvm/Transforms/Utils/Local.h" - + using namespace llvm; using namespace llvm::PatternMatch; @@ -30,16 +30,16 @@ static cl::opt<unsigned> MaxNumPhis("instcombine-max-num-phis", cl::init(512), cl::desc("Maximum number phis to handle in intptr/ptrint folding")); -STATISTIC(NumPHIsOfInsertValues, - "Number of phi-of-insertvalue turned into insertvalue-of-phis"); -STATISTIC(NumPHIsOfExtractValues, - "Number of phi-of-extractvalue turned into extractvalue-of-phi"); -STATISTIC(NumPHICSEs, "Number of PHI's that got CSE'd"); - +STATISTIC(NumPHIsOfInsertValues, + "Number of phi-of-insertvalue turned into insertvalue-of-phis"); +STATISTIC(NumPHIsOfExtractValues, + "Number of phi-of-extractvalue turned into extractvalue-of-phi"); +STATISTIC(NumPHICSEs, "Number of PHI's that got CSE'd"); + /// The PHI arguments will be folded into a single operation with a PHI node /// as input. The debug location of the single operation will be the merged /// locations of the original PHI node arguments. -void InstCombinerImpl::PHIArgMergedDebugLoc(Instruction *Inst, PHINode &PN) { +void InstCombinerImpl::PHIArgMergedDebugLoc(Instruction *Inst, PHINode &PN) { auto *FirstInst = cast<Instruction>(PN.getIncomingValue(0)); Inst->setDebugLoc(FirstInst->getDebugLoc()); // We do not expect a CallInst here, otherwise, N-way merging of DebugLoc @@ -102,7 +102,7 @@ void InstCombinerImpl::PHIArgMergedDebugLoc(Instruction *Inst, PHINode &PN) { // ptr_val_inc = ... // ... // -Instruction *InstCombinerImpl::foldIntegerTypedPHI(PHINode &PN) { +Instruction *InstCombinerImpl::foldIntegerTypedPHI(PHINode &PN) { if (!PN.getType()->isIntegerTy()) return nullptr; if (!PN.hasOneUse()) @@ -299,86 +299,86 @@ Instruction *InstCombinerImpl::foldIntegerTypedPHI(PHINode &PN) { IntToPtr->getOperand(0)->getType()); } -/// If we have something like phi [insertvalue(a,b,0), insertvalue(c,d,0)], -/// turn this into a phi[a,c] and phi[b,d] and a single insertvalue. -Instruction * -InstCombinerImpl::foldPHIArgInsertValueInstructionIntoPHI(PHINode &PN) { - auto *FirstIVI = cast<InsertValueInst>(PN.getIncomingValue(0)); - - // Scan to see if all operands are `insertvalue`'s with the same indicies, - // and all have a single use. - for (unsigned i = 1; i != PN.getNumIncomingValues(); ++i) { - auto *I = dyn_cast<InsertValueInst>(PN.getIncomingValue(i)); - if (!I || !I->hasOneUser() || I->getIndices() != FirstIVI->getIndices()) - return nullptr; - } - - // For each operand of an `insertvalue` - std::array<PHINode *, 2> NewOperands; - for (int OpIdx : {0, 1}) { - auto *&NewOperand = NewOperands[OpIdx]; - // Create a new PHI node to receive the values the operand has in each - // incoming basic block. - NewOperand = PHINode::Create( - FirstIVI->getOperand(OpIdx)->getType(), PN.getNumIncomingValues(), - FirstIVI->getOperand(OpIdx)->getName() + ".pn"); - // And populate each operand's PHI with said values. - for (auto Incoming : zip(PN.blocks(), PN.incoming_values())) - NewOperand->addIncoming( - cast<InsertValueInst>(std::get<1>(Incoming))->getOperand(OpIdx), - std::get<0>(Incoming)); - InsertNewInstBefore(NewOperand, PN); - } - - // And finally, create `insertvalue` over the newly-formed PHI nodes. - auto *NewIVI = InsertValueInst::Create(NewOperands[0], NewOperands[1], - FirstIVI->getIndices(), PN.getName()); - - PHIArgMergedDebugLoc(NewIVI, PN); - ++NumPHIsOfInsertValues; - return NewIVI; -} - -/// If we have something like phi [extractvalue(a,0), extractvalue(b,0)], -/// turn this into a phi[a,b] and a single extractvalue. -Instruction * -InstCombinerImpl::foldPHIArgExtractValueInstructionIntoPHI(PHINode &PN) { - auto *FirstEVI = cast<ExtractValueInst>(PN.getIncomingValue(0)); - - // Scan to see if all operands are `extractvalue`'s with the same indicies, - // and all have a single use. - for (unsigned i = 1; i != PN.getNumIncomingValues(); ++i) { - auto *I = dyn_cast<ExtractValueInst>(PN.getIncomingValue(i)); - if (!I || !I->hasOneUser() || I->getIndices() != FirstEVI->getIndices() || - I->getAggregateOperand()->getType() != - FirstEVI->getAggregateOperand()->getType()) - return nullptr; - } - - // Create a new PHI node to receive the values the aggregate operand has - // in each incoming basic block. - auto *NewAggregateOperand = PHINode::Create( - FirstEVI->getAggregateOperand()->getType(), PN.getNumIncomingValues(), - FirstEVI->getAggregateOperand()->getName() + ".pn"); - // And populate the PHI with said values. - for (auto Incoming : zip(PN.blocks(), PN.incoming_values())) - NewAggregateOperand->addIncoming( - cast<ExtractValueInst>(std::get<1>(Incoming))->getAggregateOperand(), - std::get<0>(Incoming)); - InsertNewInstBefore(NewAggregateOperand, PN); - - // And finally, create `extractvalue` over the newly-formed PHI nodes. - auto *NewEVI = ExtractValueInst::Create(NewAggregateOperand, - FirstEVI->getIndices(), PN.getName()); - - PHIArgMergedDebugLoc(NewEVI, PN); - ++NumPHIsOfExtractValues; - return NewEVI; -} - +/// If we have something like phi [insertvalue(a,b,0), insertvalue(c,d,0)], +/// turn this into a phi[a,c] and phi[b,d] and a single insertvalue. +Instruction * +InstCombinerImpl::foldPHIArgInsertValueInstructionIntoPHI(PHINode &PN) { + auto *FirstIVI = cast<InsertValueInst>(PN.getIncomingValue(0)); + + // Scan to see if all operands are `insertvalue`'s with the same indicies, + // and all have a single use. + for (unsigned i = 1; i != PN.getNumIncomingValues(); ++i) { + auto *I = dyn_cast<InsertValueInst>(PN.getIncomingValue(i)); + if (!I || !I->hasOneUser() || I->getIndices() != FirstIVI->getIndices()) + return nullptr; + } + + // For each operand of an `insertvalue` + std::array<PHINode *, 2> NewOperands; + for (int OpIdx : {0, 1}) { + auto *&NewOperand = NewOperands[OpIdx]; + // Create a new PHI node to receive the values the operand has in each + // incoming basic block. + NewOperand = PHINode::Create( + FirstIVI->getOperand(OpIdx)->getType(), PN.getNumIncomingValues(), + FirstIVI->getOperand(OpIdx)->getName() + ".pn"); + // And populate each operand's PHI with said values. + for (auto Incoming : zip(PN.blocks(), PN.incoming_values())) + NewOperand->addIncoming( + cast<InsertValueInst>(std::get<1>(Incoming))->getOperand(OpIdx), + std::get<0>(Incoming)); + InsertNewInstBefore(NewOperand, PN); + } + + // And finally, create `insertvalue` over the newly-formed PHI nodes. + auto *NewIVI = InsertValueInst::Create(NewOperands[0], NewOperands[1], + FirstIVI->getIndices(), PN.getName()); + + PHIArgMergedDebugLoc(NewIVI, PN); + ++NumPHIsOfInsertValues; + return NewIVI; +} + +/// If we have something like phi [extractvalue(a,0), extractvalue(b,0)], +/// turn this into a phi[a,b] and a single extractvalue. +Instruction * +InstCombinerImpl::foldPHIArgExtractValueInstructionIntoPHI(PHINode &PN) { + auto *FirstEVI = cast<ExtractValueInst>(PN.getIncomingValue(0)); + + // Scan to see if all operands are `extractvalue`'s with the same indicies, + // and all have a single use. + for (unsigned i = 1; i != PN.getNumIncomingValues(); ++i) { + auto *I = dyn_cast<ExtractValueInst>(PN.getIncomingValue(i)); + if (!I || !I->hasOneUser() || I->getIndices() != FirstEVI->getIndices() || + I->getAggregateOperand()->getType() != + FirstEVI->getAggregateOperand()->getType()) + return nullptr; + } + + // Create a new PHI node to receive the values the aggregate operand has + // in each incoming basic block. + auto *NewAggregateOperand = PHINode::Create( + FirstEVI->getAggregateOperand()->getType(), PN.getNumIncomingValues(), + FirstEVI->getAggregateOperand()->getName() + ".pn"); + // And populate the PHI with said values. + for (auto Incoming : zip(PN.blocks(), PN.incoming_values())) + NewAggregateOperand->addIncoming( + cast<ExtractValueInst>(std::get<1>(Incoming))->getAggregateOperand(), + std::get<0>(Incoming)); + InsertNewInstBefore(NewAggregateOperand, PN); + + // And finally, create `extractvalue` over the newly-formed PHI nodes. + auto *NewEVI = ExtractValueInst::Create(NewAggregateOperand, + FirstEVI->getIndices(), PN.getName()); + + PHIArgMergedDebugLoc(NewEVI, PN); + ++NumPHIsOfExtractValues; + return NewEVI; +} + /// If we have something like phi [add (a,b), add(a,c)] and if a/b/c and the -/// adds all have a single user, turn this into a phi and a single binop. -Instruction *InstCombinerImpl::foldPHIArgBinOpIntoPHI(PHINode &PN) { +/// adds all have a single user, turn this into a phi and a single binop. +Instruction *InstCombinerImpl::foldPHIArgBinOpIntoPHI(PHINode &PN) { Instruction *FirstInst = cast<Instruction>(PN.getIncomingValue(0)); assert(isa<BinaryOperator>(FirstInst) || isa<CmpInst>(FirstInst)); unsigned Opc = FirstInst->getOpcode(); @@ -388,10 +388,10 @@ Instruction *InstCombinerImpl::foldPHIArgBinOpIntoPHI(PHINode &PN) { Type *LHSType = LHSVal->getType(); Type *RHSType = RHSVal->getType(); - // Scan to see if all operands are the same opcode, and all have one user. + // Scan to see if all operands are the same opcode, and all have one user. for (unsigned i = 1; i != PN.getNumIncomingValues(); ++i) { Instruction *I = dyn_cast<Instruction>(PN.getIncomingValue(i)); - if (!I || I->getOpcode() != Opc || !I->hasOneUser() || + if (!I || I->getOpcode() != Opc || !I->hasOneUser() || // Verify type of the LHS matches so we don't fold cmp's of different // types. I->getOperand(0)->getType() != LHSType || @@ -471,7 +471,7 @@ Instruction *InstCombinerImpl::foldPHIArgBinOpIntoPHI(PHINode &PN) { return NewBinOp; } -Instruction *InstCombinerImpl::foldPHIArgGEPIntoPHI(PHINode &PN) { +Instruction *InstCombinerImpl::foldPHIArgGEPIntoPHI(PHINode &PN) { GetElementPtrInst *FirstInst =cast<GetElementPtrInst>(PN.getIncomingValue(0)); SmallVector<Value*, 16> FixedOperands(FirstInst->op_begin(), @@ -487,12 +487,12 @@ Instruction *InstCombinerImpl::foldPHIArgGEPIntoPHI(PHINode &PN) { bool AllInBounds = true; - // Scan to see if all operands are the same opcode, and all have one user. + // Scan to see if all operands are the same opcode, and all have one user. for (unsigned i = 1; i != PN.getNumIncomingValues(); ++i) { - GetElementPtrInst *GEP = - dyn_cast<GetElementPtrInst>(PN.getIncomingValue(i)); - if (!GEP || !GEP->hasOneUser() || GEP->getType() != FirstInst->getType() || - GEP->getNumOperands() != FirstInst->getNumOperands()) + GetElementPtrInst *GEP = + dyn_cast<GetElementPtrInst>(PN.getIncomingValue(i)); + if (!GEP || !GEP->hasOneUser() || GEP->getType() != FirstInst->getType() || + GEP->getNumOperands() != FirstInst->getNumOperands()) return nullptr; AllInBounds &= GEP->isInBounds(); @@ -592,14 +592,14 @@ static bool isSafeAndProfitableToSinkLoad(LoadInst *L) { BasicBlock::iterator BBI = L->getIterator(), E = L->getParent()->end(); for (++BBI; BBI != E; ++BBI) - if (BBI->mayWriteToMemory()) { - // Calls that only access inaccessible memory do not block sinking the - // load. - if (auto *CB = dyn_cast<CallBase>(BBI)) - if (CB->onlyAccessesInaccessibleMemory()) - continue; + if (BBI->mayWriteToMemory()) { + // Calls that only access inaccessible memory do not block sinking the + // load. + if (auto *CB = dyn_cast<CallBase>(BBI)) + if (CB->onlyAccessesInaccessibleMemory()) + continue; return false; - } + } // Check for non-address taken alloca. If not address-taken already, it isn't // profitable to do this xform. @@ -632,7 +632,7 @@ static bool isSafeAndProfitableToSinkLoad(LoadInst *L) { return true; } -Instruction *InstCombinerImpl::foldPHIArgLoadIntoPHI(PHINode &PN) { +Instruction *InstCombinerImpl::foldPHIArgLoadIntoPHI(PHINode &PN) { LoadInst *FirstLI = cast<LoadInst>(PN.getIncomingValue(0)); // FIXME: This is overconservative; this transform is allowed in some cases @@ -665,7 +665,7 @@ Instruction *InstCombinerImpl::foldPHIArgLoadIntoPHI(PHINode &PN) { // Check to see if all arguments are the same operation. for (unsigned i = 1, e = PN.getNumIncomingValues(); i != e; ++i) { LoadInst *LI = dyn_cast<LoadInst>(PN.getIncomingValue(i)); - if (!LI || !LI->hasOneUser()) + if (!LI || !LI->hasOneUser()) return nullptr; // We can't sink the load if the loaded value could be modified between @@ -746,7 +746,7 @@ Instruction *InstCombinerImpl::foldPHIArgLoadIntoPHI(PHINode &PN) { /// TODO: This function could handle other cast types, but then it might /// require special-casing a cast from the 'i1' type. See the comment in /// FoldPHIArgOpIntoPHI() about pessimizing illegal integer types. -Instruction *InstCombinerImpl::foldPHIArgZextsIntoPHI(PHINode &Phi) { +Instruction *InstCombinerImpl::foldPHIArgZextsIntoPHI(PHINode &Phi) { // We cannot create a new instruction after the PHI if the terminator is an // EHPad because there is no valid insertion point. if (Instruction *TI = Phi.getParent()->getTerminator()) @@ -778,8 +778,8 @@ Instruction *InstCombinerImpl::foldPHIArgZextsIntoPHI(PHINode &Phi) { unsigned NumConsts = 0; for (Value *V : Phi.incoming_values()) { if (auto *Zext = dyn_cast<ZExtInst>(V)) { - // All zexts must be identical and have one user. - if (Zext->getSrcTy() != NarrowType || !Zext->hasOneUser()) + // All zexts must be identical and have one user. + if (Zext->getSrcTy() != NarrowType || !Zext->hasOneUser()) return nullptr; NewIncoming.push_back(Zext->getOperand(0)); NumZexts++; @@ -820,7 +820,7 @@ Instruction *InstCombinerImpl::foldPHIArgZextsIntoPHI(PHINode &Phi) { /// If all operands to a PHI node are the same "unary" operator and they all are /// only used by the PHI, PHI together their inputs, and do the operation once, /// to the result of the PHI. -Instruction *InstCombinerImpl::foldPHIArgOpIntoPHI(PHINode &PN) { +Instruction *InstCombinerImpl::foldPHIArgOpIntoPHI(PHINode &PN) { // We cannot create a new instruction after the PHI if the terminator is an // EHPad because there is no valid insertion point. if (Instruction *TI = PN.getParent()->getTerminator()) @@ -830,13 +830,13 @@ Instruction *InstCombinerImpl::foldPHIArgOpIntoPHI(PHINode &PN) { Instruction *FirstInst = cast<Instruction>(PN.getIncomingValue(0)); if (isa<GetElementPtrInst>(FirstInst)) - return foldPHIArgGEPIntoPHI(PN); + return foldPHIArgGEPIntoPHI(PN); if (isa<LoadInst>(FirstInst)) - return foldPHIArgLoadIntoPHI(PN); - if (isa<InsertValueInst>(FirstInst)) - return foldPHIArgInsertValueInstructionIntoPHI(PN); - if (isa<ExtractValueInst>(FirstInst)) - return foldPHIArgExtractValueInstructionIntoPHI(PN); + return foldPHIArgLoadIntoPHI(PN); + if (isa<InsertValueInst>(FirstInst)) + return foldPHIArgInsertValueInstructionIntoPHI(PN); + if (isa<ExtractValueInst>(FirstInst)) + return foldPHIArgExtractValueInstructionIntoPHI(PN); // Scan the instruction, looking for input operations that can be folded away. // If all input operands to the phi are the same instruction (e.g. a cast from @@ -859,7 +859,7 @@ Instruction *InstCombinerImpl::foldPHIArgOpIntoPHI(PHINode &PN) { // otherwise call FoldPHIArgBinOpIntoPHI. ConstantOp = dyn_cast<Constant>(FirstInst->getOperand(1)); if (!ConstantOp) - return foldPHIArgBinOpIntoPHI(PN); + return foldPHIArgBinOpIntoPHI(PN); } else { return nullptr; // Cannot fold this operation. } @@ -867,7 +867,7 @@ Instruction *InstCombinerImpl::foldPHIArgOpIntoPHI(PHINode &PN) { // Check to see if all arguments are the same operation. for (unsigned i = 1, e = PN.getNumIncomingValues(); i != e; ++i) { Instruction *I = dyn_cast<Instruction>(PN.getIncomingValue(i)); - if (!I || !I->hasOneUser() || !I->isSameOperationAs(FirstInst)) + if (!I || !I->hasOneUser() || !I->isSameOperationAs(FirstInst)) return nullptr; if (CastSrcTy) { if (I->getOperand(0)->getType() != CastSrcTy) @@ -1019,7 +1019,7 @@ struct LoweredPHIRecord { LoweredPHIRecord(PHINode *pn, unsigned Sh) : PN(pn), Shift(Sh), Width(0) {} }; -} // namespace +} // namespace namespace llvm { template<> @@ -1040,7 +1040,7 @@ namespace llvm { LHS.Width == RHS.Width; } }; -} // namespace llvm +} // namespace llvm /// This is an integer PHI and we know that it has an illegal type: see if it is @@ -1051,7 +1051,7 @@ namespace llvm { /// TODO: The user of the trunc may be an bitcast to float/double/vector or an /// inttoptr. We should produce new PHIs in the right type. /// -Instruction *InstCombinerImpl::SliceUpIllegalIntegerPHI(PHINode &FirstPhi) { +Instruction *InstCombinerImpl::SliceUpIllegalIntegerPHI(PHINode &FirstPhi) { // PHIUsers - Keep track of all of the truncated values extracted from a set // of PHIs, along with their offset. These are the things we want to rewrite. SmallVector<PHIUsageRecord, 16> PHIUsers; @@ -1225,85 +1225,85 @@ Instruction *InstCombinerImpl::SliceUpIllegalIntegerPHI(PHINode &FirstPhi) { return replaceInstUsesWith(FirstPhi, Undef); } -static Value *SimplifyUsingControlFlow(InstCombiner &Self, PHINode &PN, - const DominatorTree &DT) { - // Simplify the following patterns: - // if (cond) - // / \ - // ... ... - // \ / - // phi [true] [false] - if (!PN.getType()->isIntegerTy(1)) - return nullptr; - - if (PN.getNumOperands() != 2) - return nullptr; - - // Make sure all inputs are constants. - if (!all_of(PN.operands(), [](Value *V) { return isa<ConstantInt>(V); })) - return nullptr; - - BasicBlock *BB = PN.getParent(); - // Do not bother with unreachable instructions. - if (!DT.isReachableFromEntry(BB)) - return nullptr; - - // Same inputs. - if (PN.getOperand(0) == PN.getOperand(1)) - return PN.getOperand(0); - - BasicBlock *TruePred = nullptr, *FalsePred = nullptr; - for (auto *Pred : predecessors(BB)) { - auto *Input = cast<ConstantInt>(PN.getIncomingValueForBlock(Pred)); - if (Input->isAllOnesValue()) - TruePred = Pred; - else - FalsePred = Pred; - } - assert(TruePred && FalsePred && "Must be!"); - - // Check which edge of the dominator dominates the true input. If it is the - // false edge, we should invert the condition. - auto *IDom = DT.getNode(BB)->getIDom()->getBlock(); - auto *BI = dyn_cast<BranchInst>(IDom->getTerminator()); - if (!BI || BI->isUnconditional()) - return nullptr; - - // Check that edges outgoing from the idom's terminators dominate respective - // inputs of the Phi. - BasicBlockEdge TrueOutEdge(IDom, BI->getSuccessor(0)); - BasicBlockEdge FalseOutEdge(IDom, BI->getSuccessor(1)); - - BasicBlockEdge TrueIncEdge(TruePred, BB); - BasicBlockEdge FalseIncEdge(FalsePred, BB); - - auto *Cond = BI->getCondition(); - if (DT.dominates(TrueOutEdge, TrueIncEdge) && - DT.dominates(FalseOutEdge, FalseIncEdge)) - // This Phi is actually equivalent to branching condition of IDom. - return Cond; - else if (DT.dominates(TrueOutEdge, FalseIncEdge) && - DT.dominates(FalseOutEdge, TrueIncEdge)) { - // This Phi is actually opposite to branching condition of IDom. We invert - // the condition that will potentially open up some opportunities for - // sinking. - auto InsertPt = BB->getFirstInsertionPt(); - if (InsertPt != BB->end()) { - Self.Builder.SetInsertPoint(&*InsertPt); - return Self.Builder.CreateNot(Cond); - } - } - - return nullptr; -} - +static Value *SimplifyUsingControlFlow(InstCombiner &Self, PHINode &PN, + const DominatorTree &DT) { + // Simplify the following patterns: + // if (cond) + // / \ + // ... ... + // \ / + // phi [true] [false] + if (!PN.getType()->isIntegerTy(1)) + return nullptr; + + if (PN.getNumOperands() != 2) + return nullptr; + + // Make sure all inputs are constants. + if (!all_of(PN.operands(), [](Value *V) { return isa<ConstantInt>(V); })) + return nullptr; + + BasicBlock *BB = PN.getParent(); + // Do not bother with unreachable instructions. + if (!DT.isReachableFromEntry(BB)) + return nullptr; + + // Same inputs. + if (PN.getOperand(0) == PN.getOperand(1)) + return PN.getOperand(0); + + BasicBlock *TruePred = nullptr, *FalsePred = nullptr; + for (auto *Pred : predecessors(BB)) { + auto *Input = cast<ConstantInt>(PN.getIncomingValueForBlock(Pred)); + if (Input->isAllOnesValue()) + TruePred = Pred; + else + FalsePred = Pred; + } + assert(TruePred && FalsePred && "Must be!"); + + // Check which edge of the dominator dominates the true input. If it is the + // false edge, we should invert the condition. + auto *IDom = DT.getNode(BB)->getIDom()->getBlock(); + auto *BI = dyn_cast<BranchInst>(IDom->getTerminator()); + if (!BI || BI->isUnconditional()) + return nullptr; + + // Check that edges outgoing from the idom's terminators dominate respective + // inputs of the Phi. + BasicBlockEdge TrueOutEdge(IDom, BI->getSuccessor(0)); + BasicBlockEdge FalseOutEdge(IDom, BI->getSuccessor(1)); + + BasicBlockEdge TrueIncEdge(TruePred, BB); + BasicBlockEdge FalseIncEdge(FalsePred, BB); + + auto *Cond = BI->getCondition(); + if (DT.dominates(TrueOutEdge, TrueIncEdge) && + DT.dominates(FalseOutEdge, FalseIncEdge)) + // This Phi is actually equivalent to branching condition of IDom. + return Cond; + else if (DT.dominates(TrueOutEdge, FalseIncEdge) && + DT.dominates(FalseOutEdge, TrueIncEdge)) { + // This Phi is actually opposite to branching condition of IDom. We invert + // the condition that will potentially open up some opportunities for + // sinking. + auto InsertPt = BB->getFirstInsertionPt(); + if (InsertPt != BB->end()) { + Self.Builder.SetInsertPoint(&*InsertPt); + return Self.Builder.CreateNot(Cond); + } + } + + return nullptr; +} + // PHINode simplification // -Instruction *InstCombinerImpl::visitPHINode(PHINode &PN) { +Instruction *InstCombinerImpl::visitPHINode(PHINode &PN) { if (Value *V = SimplifyInstruction(&PN, SQ.getWithInstruction(&PN))) return replaceInstUsesWith(PN, V); - if (Instruction *Result = foldPHIArgZextsIntoPHI(PN)) + if (Instruction *Result = foldPHIArgZextsIntoPHI(PN)) return Result; // If all PHI operands are the same operation, pull them through the PHI, @@ -1311,16 +1311,16 @@ Instruction *InstCombinerImpl::visitPHINode(PHINode &PN) { if (isa<Instruction>(PN.getIncomingValue(0)) && isa<Instruction>(PN.getIncomingValue(1)) && cast<Instruction>(PN.getIncomingValue(0))->getOpcode() == - cast<Instruction>(PN.getIncomingValue(1))->getOpcode() && - PN.getIncomingValue(0)->hasOneUser()) - if (Instruction *Result = foldPHIArgOpIntoPHI(PN)) + cast<Instruction>(PN.getIncomingValue(1))->getOpcode() && + PN.getIncomingValue(0)->hasOneUser()) + if (Instruction *Result = foldPHIArgOpIntoPHI(PN)) return Result; // If this is a trivial cycle in the PHI node graph, remove it. Basically, if // this PHI only has a single use (a PHI), and if that PHI only has one use (a // PHI)... break the cycle. if (PN.hasOneUse()) { - if (Instruction *Result = foldIntegerTypedPHI(PN)) + if (Instruction *Result = foldIntegerTypedPHI(PN)) return Result; Instruction *PHIUser = cast<Instruction>(PN.user_back()); @@ -1433,21 +1433,21 @@ Instruction *InstCombinerImpl::visitPHINode(PHINode &PN) { } } - // Is there an identical PHI node in this basic block? - for (PHINode &IdenticalPN : PN.getParent()->phis()) { - // Ignore the PHI node itself. - if (&IdenticalPN == &PN) - continue; - // Note that even though we've just canonicalized this PHI, due to the - // worklist visitation order, there are no guarantess that *every* PHI - // has been canonicalized, so we can't just compare operands ranges. - if (!PN.isIdenticalToWhenDefined(&IdenticalPN)) - continue; - // Just use that PHI instead then. - ++NumPHICSEs; - return replaceInstUsesWith(PN, &IdenticalPN); - } - + // Is there an identical PHI node in this basic block? + for (PHINode &IdenticalPN : PN.getParent()->phis()) { + // Ignore the PHI node itself. + if (&IdenticalPN == &PN) + continue; + // Note that even though we've just canonicalized this PHI, due to the + // worklist visitation order, there are no guarantess that *every* PHI + // has been canonicalized, so we can't just compare operands ranges. + if (!PN.isIdenticalToWhenDefined(&IdenticalPN)) + continue; + // Just use that PHI instead then. + ++NumPHICSEs; + return replaceInstUsesWith(PN, &IdenticalPN); + } + // If this is an integer PHI and we know that it has an illegal type, see if // it is only used by trunc or trunc(lshr) operations. If so, we split the // PHI into the various pieces being extracted. This sort of thing is @@ -1457,9 +1457,9 @@ Instruction *InstCombinerImpl::visitPHINode(PHINode &PN) { if (Instruction *Res = SliceUpIllegalIntegerPHI(PN)) return Res; - // Ultimately, try to replace this Phi with a dominating condition. - if (auto *V = SimplifyUsingControlFlow(*this, PN, DT)) - return replaceInstUsesWith(PN, V); - + // Ultimately, try to replace this Phi with a dominating condition. + if (auto *V = SimplifyUsingControlFlow(*this, PN, DT)) + return replaceInstUsesWith(PN, V); + return nullptr; } diff --git a/contrib/libs/llvm12/lib/Transforms/InstCombine/InstCombineSelect.cpp b/contrib/libs/llvm12/lib/Transforms/InstCombine/InstCombineSelect.cpp index 5f174aae09..af4f67dee0 100644 --- a/contrib/libs/llvm12/lib/Transforms/InstCombine/InstCombineSelect.cpp +++ b/contrib/libs/llvm12/lib/Transforms/InstCombine/InstCombineSelect.cpp @@ -38,7 +38,7 @@ #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/KnownBits.h" #include "llvm/Transforms/InstCombine/InstCombineWorklist.h" -#include "llvm/Transforms/InstCombine/InstCombiner.h" +#include "llvm/Transforms/InstCombine/InstCombiner.h" #include <cassert> #include <utility> @@ -47,11 +47,11 @@ using namespace PatternMatch; #define DEBUG_TYPE "instcombine" -/// FIXME: Enabled by default until the pattern is supported well. -static cl::opt<bool> EnableUnsafeSelectTransform( - "instcombine-unsafe-select-transform", cl::init(true), - cl::desc("Enable poison-unsafe select to and/or transform")); - +/// FIXME: Enabled by default until the pattern is supported well. +static cl::opt<bool> EnableUnsafeSelectTransform( + "instcombine-unsafe-select-transform", cl::init(true), + cl::desc("Enable poison-unsafe select to and/or transform")); + static Value *createMinMax(InstCombiner::BuilderTy &Builder, SelectPatternFlavor SPF, Value *A, Value *B) { CmpInst::Predicate Pred = getMinMaxPred(SPF); @@ -63,7 +63,7 @@ static Value *createMinMax(InstCombiner::BuilderTy &Builder, /// constant of a binop. static Instruction *foldSelectBinOpIdentity(SelectInst &Sel, const TargetLibraryInfo &TLI, - InstCombinerImpl &IC) { + InstCombinerImpl &IC) { // The select condition must be an equality compare with a constant operand. Value *X; Constant *C; @@ -265,8 +265,8 @@ static unsigned getSelectFoldableOperands(BinaryOperator *I) { } /// We have (select c, TI, FI), and we know that TI and FI have the same opcode. -Instruction *InstCombinerImpl::foldSelectOpOp(SelectInst &SI, Instruction *TI, - Instruction *FI) { +Instruction *InstCombinerImpl::foldSelectOpOp(SelectInst &SI, Instruction *TI, + Instruction *FI) { // Don't break up min/max patterns. The hasOneUse checks below prevent that // for most cases, but vector min/max with bitcasts can be transformed. If the // one-use restrictions are eased for other patterns, we still don't want to @@ -288,9 +288,9 @@ Instruction *InstCombinerImpl::foldSelectOpOp(SelectInst &SI, Instruction *TI, // The select condition may be a vector. We may only change the operand // type if the vector width remains the same (and matches the condition). if (auto *CondVTy = dyn_cast<VectorType>(CondTy)) { - if (!FIOpndTy->isVectorTy() || - CondVTy->getElementCount() != - cast<VectorType>(FIOpndTy)->getElementCount()) + if (!FIOpndTy->isVectorTy() || + CondVTy->getElementCount() != + cast<VectorType>(FIOpndTy)->getElementCount()) return nullptr; // TODO: If the backend knew how to deal with casts better, we could @@ -403,8 +403,8 @@ static bool isSelect01(const APInt &C1I, const APInt &C2I) { /// Try to fold the select into one of the operands to allow further /// optimization. -Instruction *InstCombinerImpl::foldSelectIntoOp(SelectInst &SI, Value *TrueVal, - Value *FalseVal) { +Instruction *InstCombinerImpl::foldSelectIntoOp(SelectInst &SI, Value *TrueVal, + Value *FalseVal) { // See the comment above GetSelectFoldableOperands for a description of the // transformation we are doing here. if (auto *TVI = dyn_cast<BinaryOperator>(TrueVal)) { @@ -418,15 +418,15 @@ Instruction *InstCombinerImpl::foldSelectIntoOp(SelectInst &SI, Value *TrueVal, } if (OpToFold) { - Constant *C = ConstantExpr::getBinOpIdentity(TVI->getOpcode(), - TVI->getType(), true); + Constant *C = ConstantExpr::getBinOpIdentity(TVI->getOpcode(), + TVI->getType(), true); Value *OOp = TVI->getOperand(2-OpToFold); // Avoid creating select between 2 constants unless it's selecting // between 0, 1 and -1. const APInt *OOpC; bool OOpIsAPInt = match(OOp, m_APInt(OOpC)); - if (!isa<Constant>(OOp) || - (OOpIsAPInt && isSelect01(C->getUniqueInteger(), *OOpC))) { + if (!isa<Constant>(OOp) || + (OOpIsAPInt && isSelect01(C->getUniqueInteger(), *OOpC))) { Value *NewSel = Builder.CreateSelect(SI.getCondition(), OOp, C); NewSel->takeName(TVI); BinaryOperator *BO = BinaryOperator::Create(TVI->getOpcode(), @@ -450,15 +450,15 @@ Instruction *InstCombinerImpl::foldSelectIntoOp(SelectInst &SI, Value *TrueVal, } if (OpToFold) { - Constant *C = ConstantExpr::getBinOpIdentity(FVI->getOpcode(), - FVI->getType(), true); + Constant *C = ConstantExpr::getBinOpIdentity(FVI->getOpcode(), + FVI->getType(), true); Value *OOp = FVI->getOperand(2-OpToFold); // Avoid creating select between 2 constants unless it's selecting // between 0, 1 and -1. const APInt *OOpC; bool OOpIsAPInt = match(OOp, m_APInt(OOpC)); - if (!isa<Constant>(OOp) || - (OOpIsAPInt && isSelect01(C->getUniqueInteger(), *OOpC))) { + if (!isa<Constant>(OOp) || + (OOpIsAPInt && isSelect01(C->getUniqueInteger(), *OOpC))) { Value *NewSel = Builder.CreateSelect(SI.getCondition(), C, OOp); NewSel->takeName(FVI); BinaryOperator *BO = BinaryOperator::Create(FVI->getOpcode(), @@ -1013,9 +1013,9 @@ static bool adjustMinMax(SelectInst &Sel, ICmpInst &Cmp) { /// select (icmp Pred X, C1), C2, X --> select (icmp Pred' X, C2), X, C2 /// Note: if C1 != C2, this will change the icmp constant to the existing /// constant operand of the select. -static Instruction *canonicalizeMinMaxWithConstant(SelectInst &Sel, - ICmpInst &Cmp, - InstCombinerImpl &IC) { +static Instruction *canonicalizeMinMaxWithConstant(SelectInst &Sel, + ICmpInst &Cmp, + InstCombinerImpl &IC) { if (!Cmp.hasOneUse() || !isa<Constant>(Cmp.getOperand(1))) return nullptr; @@ -1053,7 +1053,7 @@ static Instruction *canonicalizeMinMaxWithConstant(SelectInst &Sel, } static Instruction *canonicalizeAbsNabs(SelectInst &Sel, ICmpInst &Cmp, - InstCombinerImpl &IC) { + InstCombinerImpl &IC) { if (!Cmp.hasOneUse() || !isa<Constant>(Cmp.getOperand(1))) return nullptr; @@ -1063,18 +1063,18 @@ static Instruction *canonicalizeAbsNabs(SelectInst &Sel, ICmpInst &Cmp, SPF != SelectPatternFlavor::SPF_NABS) return nullptr; - // Note that NSW flag can only be propagated for normal, non-negated abs! - bool IntMinIsPoison = SPF == SelectPatternFlavor::SPF_ABS && - match(RHS, m_NSWNeg(m_Specific(LHS))); - Constant *IntMinIsPoisonC = - ConstantInt::get(Type::getInt1Ty(Sel.getContext()), IntMinIsPoison); - Instruction *Abs = - IC.Builder.CreateBinaryIntrinsic(Intrinsic::abs, LHS, IntMinIsPoisonC); + // Note that NSW flag can only be propagated for normal, non-negated abs! + bool IntMinIsPoison = SPF == SelectPatternFlavor::SPF_ABS && + match(RHS, m_NSWNeg(m_Specific(LHS))); + Constant *IntMinIsPoisonC = + ConstantInt::get(Type::getInt1Ty(Sel.getContext()), IntMinIsPoison); + Instruction *Abs = + IC.Builder.CreateBinaryIntrinsic(Intrinsic::abs, LHS, IntMinIsPoisonC); - if (SPF == SelectPatternFlavor::SPF_NABS) - return BinaryOperator::CreateNeg(Abs); // Always without NSW flag! + if (SPF == SelectPatternFlavor::SPF_NABS) + return BinaryOperator::CreateNeg(Abs); // Always without NSW flag! - return IC.replaceInstUsesWith(Sel, Abs); + return IC.replaceInstUsesWith(Sel, Abs); } /// If we have a select with an equality comparison, then we know the value in @@ -1093,56 +1093,56 @@ static Instruction *canonicalizeAbsNabs(SelectInst &Sel, ICmpInst &Cmp, /// /// We can't replace %sel with %add unless we strip away the flags. /// TODO: Wrapping flags could be preserved in some cases with better analysis. -Instruction *InstCombinerImpl::foldSelectValueEquivalence(SelectInst &Sel, - ICmpInst &Cmp) { - // Value equivalence substitution requires an all-or-nothing replacement. - // It does not make sense for a vector compare where each lane is chosen - // independently. - if (!Cmp.isEquality() || Cmp.getType()->isVectorTy()) +Instruction *InstCombinerImpl::foldSelectValueEquivalence(SelectInst &Sel, + ICmpInst &Cmp) { + // Value equivalence substitution requires an all-or-nothing replacement. + // It does not make sense for a vector compare where each lane is chosen + // independently. + if (!Cmp.isEquality() || Cmp.getType()->isVectorTy()) return nullptr; // Canonicalize the pattern to ICMP_EQ by swapping the select operands. Value *TrueVal = Sel.getTrueValue(), *FalseVal = Sel.getFalseValue(); - bool Swapped = false; - if (Cmp.getPredicate() == ICmpInst::ICMP_NE) { + bool Swapped = false; + if (Cmp.getPredicate() == ICmpInst::ICMP_NE) { std::swap(TrueVal, FalseVal); - Swapped = true; - } - - // In X == Y ? f(X) : Z, try to evaluate f(Y) and replace the operand. - // Make sure Y cannot be undef though, as we might pick different values for - // undef in the icmp and in f(Y). Additionally, take care to avoid replacing - // X == Y ? X : Z with X == Y ? Y : Z, as that would lead to an infinite - // replacement cycle. - Value *CmpLHS = Cmp.getOperand(0), *CmpRHS = Cmp.getOperand(1); - if (TrueVal != CmpLHS && - isGuaranteedNotToBeUndefOrPoison(CmpRHS, SQ.AC, &Sel, &DT)) { - if (Value *V = SimplifyWithOpReplaced(TrueVal, CmpLHS, CmpRHS, SQ, - /* AllowRefinement */ true)) - return replaceOperand(Sel, Swapped ? 2 : 1, V); - - // Even if TrueVal does not simplify, we can directly replace a use of - // CmpLHS with CmpRHS, as long as the instruction is not used anywhere - // else and is safe to speculatively execute (we may end up executing it - // with different operands, which should not cause side-effects or trigger - // undefined behavior). Only do this if CmpRHS is a constant, as - // profitability is not clear for other cases. - // FIXME: The replacement could be performed recursively. - if (match(CmpRHS, m_ImmConstant()) && !match(CmpLHS, m_ImmConstant())) - if (auto *I = dyn_cast<Instruction>(TrueVal)) - if (I->hasOneUse() && isSafeToSpeculativelyExecute(I)) - for (Use &U : I->operands()) - if (U == CmpLHS) { - replaceUse(U, CmpRHS); - return &Sel; - } - } - if (TrueVal != CmpRHS && - isGuaranteedNotToBeUndefOrPoison(CmpLHS, SQ.AC, &Sel, &DT)) - if (Value *V = SimplifyWithOpReplaced(TrueVal, CmpRHS, CmpLHS, SQ, - /* AllowRefinement */ true)) - return replaceOperand(Sel, Swapped ? 2 : 1, V); - + Swapped = true; + } + + // In X == Y ? f(X) : Z, try to evaluate f(Y) and replace the operand. + // Make sure Y cannot be undef though, as we might pick different values for + // undef in the icmp and in f(Y). Additionally, take care to avoid replacing + // X == Y ? X : Z with X == Y ? Y : Z, as that would lead to an infinite + // replacement cycle. + Value *CmpLHS = Cmp.getOperand(0), *CmpRHS = Cmp.getOperand(1); + if (TrueVal != CmpLHS && + isGuaranteedNotToBeUndefOrPoison(CmpRHS, SQ.AC, &Sel, &DT)) { + if (Value *V = SimplifyWithOpReplaced(TrueVal, CmpLHS, CmpRHS, SQ, + /* AllowRefinement */ true)) + return replaceOperand(Sel, Swapped ? 2 : 1, V); + + // Even if TrueVal does not simplify, we can directly replace a use of + // CmpLHS with CmpRHS, as long as the instruction is not used anywhere + // else and is safe to speculatively execute (we may end up executing it + // with different operands, which should not cause side-effects or trigger + // undefined behavior). Only do this if CmpRHS is a constant, as + // profitability is not clear for other cases. + // FIXME: The replacement could be performed recursively. + if (match(CmpRHS, m_ImmConstant()) && !match(CmpLHS, m_ImmConstant())) + if (auto *I = dyn_cast<Instruction>(TrueVal)) + if (I->hasOneUse() && isSafeToSpeculativelyExecute(I)) + for (Use &U : I->operands()) + if (U == CmpLHS) { + replaceUse(U, CmpRHS); + return &Sel; + } + } + if (TrueVal != CmpRHS && + isGuaranteedNotToBeUndefOrPoison(CmpLHS, SQ.AC, &Sel, &DT)) + if (Value *V = SimplifyWithOpReplaced(TrueVal, CmpRHS, CmpLHS, SQ, + /* AllowRefinement */ true)) + return replaceOperand(Sel, Swapped ? 2 : 1, V); + auto *FalseInst = dyn_cast<Instruction>(FalseVal); if (!FalseInst) return nullptr; @@ -1150,7 +1150,7 @@ Instruction *InstCombinerImpl::foldSelectValueEquivalence(SelectInst &Sel, // InstSimplify already performed this fold if it was possible subject to // current poison-generating flags. Try the transform again with // poison-generating flags temporarily dropped. - bool WasNUW = false, WasNSW = false, WasExact = false, WasInBounds = false; + bool WasNUW = false, WasNSW = false, WasExact = false, WasInBounds = false; if (auto *OBO = dyn_cast<OverflowingBinaryOperator>(FalseVal)) { WasNUW = OBO->hasNoUnsignedWrap(); WasNSW = OBO->hasNoSignedWrap(); @@ -1161,20 +1161,20 @@ Instruction *InstCombinerImpl::foldSelectValueEquivalence(SelectInst &Sel, WasExact = PEO->isExact(); FalseInst->setIsExact(false); } - if (auto *GEP = dyn_cast<GetElementPtrInst>(FalseVal)) { - WasInBounds = GEP->isInBounds(); - GEP->setIsInBounds(false); - } + if (auto *GEP = dyn_cast<GetElementPtrInst>(FalseVal)) { + WasInBounds = GEP->isInBounds(); + GEP->setIsInBounds(false); + } // Try each equivalence substitution possibility. // We have an 'EQ' comparison, so the select's false value will propagate. // Example: // (X == 42) ? 43 : (X + 1) --> (X == 42) ? (X + 1) : (X + 1) --> X + 1 - if (SimplifyWithOpReplaced(FalseVal, CmpLHS, CmpRHS, SQ, + if (SimplifyWithOpReplaced(FalseVal, CmpLHS, CmpRHS, SQ, /* AllowRefinement */ false) == TrueVal || - SimplifyWithOpReplaced(FalseVal, CmpRHS, CmpLHS, SQ, + SimplifyWithOpReplaced(FalseVal, CmpRHS, CmpLHS, SQ, /* AllowRefinement */ false) == TrueVal) { - return replaceInstUsesWith(Sel, FalseVal); + return replaceInstUsesWith(Sel, FalseVal); } // Restore poison-generating flags if the transform did not apply. @@ -1184,8 +1184,8 @@ Instruction *InstCombinerImpl::foldSelectValueEquivalence(SelectInst &Sel, FalseInst->setHasNoSignedWrap(); if (WasExact) FalseInst->setIsExact(); - if (WasInBounds) - cast<GetElementPtrInst>(FalseInst)->setIsInBounds(); + if (WasInBounds) + cast<GetElementPtrInst>(FalseInst)->setIsInBounds(); return nullptr; } @@ -1236,7 +1236,7 @@ static Instruction *canonicalizeClampLike(SelectInst &Sel0, ICmpInst &Cmp0, APInt::getAllOnesValue( C0->getType()->getScalarSizeInBits())))) return nullptr; // Can't do, have all-ones element[s]. - C0 = InstCombiner::AddOne(C0); + C0 = InstCombiner::AddOne(C0); std::swap(X, Sel1); break; case ICmpInst::Predicate::ICMP_UGE: @@ -1296,7 +1296,7 @@ static Instruction *canonicalizeClampLike(SelectInst &Sel0, ICmpInst &Cmp0, APInt::getSignedMaxValue( C2->getType()->getScalarSizeInBits())))) return nullptr; // Can't do, have signed max element[s]. - C2 = InstCombiner::AddOne(C2); + C2 = InstCombiner::AddOne(C2); LLVM_FALLTHROUGH; case ICmpInst::Predicate::ICMP_SGE: // Also non-canonical, but here we don't need to change C2, @@ -1343,7 +1343,7 @@ static Instruction *canonicalizeClampLike(SelectInst &Sel0, ICmpInst &Cmp0, // and swap the hands of select. static Instruction * tryToReuseConstantFromSelectInComparison(SelectInst &Sel, ICmpInst &Cmp, - InstCombinerImpl &IC) { + InstCombinerImpl &IC) { ICmpInst::Predicate Pred; Value *X; Constant *C0; @@ -1358,7 +1358,7 @@ tryToReuseConstantFromSelectInComparison(SelectInst &Sel, ICmpInst &Cmp, // If comparison predicate is non-canonical, then we certainly won't be able // to make it canonical; canonicalizeCmpWithConstant() already tried. - if (!InstCombiner::isCanonicalPredicate(Pred)) + if (!InstCombiner::isCanonicalPredicate(Pred)) return nullptr; // If the [input] type of comparison and select type are different, lets abort @@ -1386,8 +1386,8 @@ tryToReuseConstantFromSelectInComparison(SelectInst &Sel, ICmpInst &Cmp, return nullptr; // Check the constant we'd have with flipped-strictness predicate. - auto FlippedStrictness = - InstCombiner::getFlippedStrictnessPredicateAndConstant(Pred, C0); + auto FlippedStrictness = + InstCombiner::getFlippedStrictnessPredicateAndConstant(Pred, C0); if (!FlippedStrictness) return nullptr; @@ -1410,10 +1410,10 @@ tryToReuseConstantFromSelectInComparison(SelectInst &Sel, ICmpInst &Cmp, } /// Visit a SelectInst that has an ICmpInst as its first operand. -Instruction *InstCombinerImpl::foldSelectInstWithICmp(SelectInst &SI, - ICmpInst *ICI) { - if (Instruction *NewSel = foldSelectValueEquivalence(SI, *ICI)) - return NewSel; +Instruction *InstCombinerImpl::foldSelectInstWithICmp(SelectInst &SI, + ICmpInst *ICI) { + if (Instruction *NewSel = foldSelectValueEquivalence(SI, *ICI)) + return NewSel; if (Instruction *NewSel = canonicalizeMinMaxWithConstant(SI, *ICI, *this)) return NewSel; @@ -1563,11 +1563,11 @@ static bool canSelectOperandBeMappingIntoPredBlock(const Value *V, /// We have an SPF (e.g. a min or max) of an SPF of the form: /// SPF2(SPF1(A, B), C) -Instruction *InstCombinerImpl::foldSPFofSPF(Instruction *Inner, - SelectPatternFlavor SPF1, Value *A, - Value *B, Instruction &Outer, - SelectPatternFlavor SPF2, - Value *C) { +Instruction *InstCombinerImpl::foldSPFofSPF(Instruction *Inner, + SelectPatternFlavor SPF1, Value *A, + Value *B, Instruction &Outer, + SelectPatternFlavor SPF2, + Value *C) { if (Outer.getType() != Inner->getType()) return nullptr; @@ -1884,7 +1884,7 @@ foldOverflowingAddSubSelect(SelectInst &SI, InstCombiner::BuilderTy &Builder) { return CallInst::Create(F, {X, Y}); } -Instruction *InstCombinerImpl::foldSelectExtConst(SelectInst &Sel) { +Instruction *InstCombinerImpl::foldSelectExtConst(SelectInst &Sel) { Constant *C; if (!match(Sel.getTrueValue(), m_Constant(C)) && !match(Sel.getFalseValue(), m_Constant(C))) @@ -1950,11 +1950,11 @@ Instruction *InstCombinerImpl::foldSelectExtConst(SelectInst &Sel) { static Instruction *canonicalizeSelectToShuffle(SelectInst &SI) { Value *CondVal = SI.getCondition(); Constant *CondC; - auto *CondValTy = dyn_cast<FixedVectorType>(CondVal->getType()); - if (!CondValTy || !match(CondVal, m_Constant(CondC))) + auto *CondValTy = dyn_cast<FixedVectorType>(CondVal->getType()); + if (!CondValTy || !match(CondVal, m_Constant(CondC))) return nullptr; - unsigned NumElts = CondValTy->getNumElements(); + unsigned NumElts = CondValTy->getNumElements(); SmallVector<int, 16> Mask; Mask.reserve(NumElts); for (unsigned i = 0; i != NumElts; ++i) { @@ -1986,8 +1986,8 @@ static Instruction *canonicalizeSelectToShuffle(SelectInst &SI) { /// to a vector select by splatting the condition. A splat may get folded with /// other operations in IR and having all operands of a select be vector types /// is likely better for vector codegen. -static Instruction *canonicalizeScalarSelectOfVecs(SelectInst &Sel, - InstCombinerImpl &IC) { +static Instruction *canonicalizeScalarSelectOfVecs(SelectInst &Sel, + InstCombinerImpl &IC) { auto *Ty = dyn_cast<VectorType>(Sel.getType()); if (!Ty) return nullptr; @@ -2000,8 +2000,8 @@ static Instruction *canonicalizeScalarSelectOfVecs(SelectInst &Sel, // select (extelt V, Index), T, F --> select (splat V, Index), T, F // Splatting the extracted condition reduces code (we could directly create a // splat shuffle of the source vector to eliminate the intermediate step). - return IC.replaceOperand( - Sel, 0, IC.Builder.CreateVectorSplat(Ty->getElementCount(), Cond)); + return IC.replaceOperand( + Sel, 0, IC.Builder.CreateVectorSplat(Ty->getElementCount(), Cond)); } /// Reuse bitcasted operands between a compare and select: @@ -2157,7 +2157,7 @@ static Instruction *moveAddAfterMinMax(SelectPatternFlavor SPF, Value *X, } /// Match a sadd_sat or ssub_sat which is using min/max to clamp the value. -Instruction *InstCombinerImpl::matchSAddSubSat(SelectInst &MinMax1) { +Instruction *InstCombinerImpl::matchSAddSubSat(SelectInst &MinMax1) { Type *Ty = MinMax1.getType(); // We are looking for a tree of: @@ -2278,42 +2278,42 @@ static Instruction *factorizeMinMaxTree(SelectPatternFlavor SPF, Value *LHS, return SelectInst::Create(CmpABC, MinMaxOp, ThirdOp); } -/// Try to reduce a funnel/rotate pattern that includes a compare and select -/// into a funnel shift intrinsic. Example: +/// Try to reduce a funnel/rotate pattern that includes a compare and select +/// into a funnel shift intrinsic. Example: /// rotl32(a, b) --> (b == 0 ? a : ((a >> (32 - b)) | (a << b))) /// --> call llvm.fshl.i32(a, a, b) -/// fshl32(a, b, c) --> (c == 0 ? a : ((b >> (32 - c)) | (a << c))) -/// --> call llvm.fshl.i32(a, b, c) -/// fshr32(a, b, c) --> (c == 0 ? b : ((a >> (32 - c)) | (b << c))) -/// --> call llvm.fshr.i32(a, b, c) -static Instruction *foldSelectFunnelShift(SelectInst &Sel, - InstCombiner::BuilderTy &Builder) { - // This must be a power-of-2 type for a bitmasking transform to be valid. - unsigned Width = Sel.getType()->getScalarSizeInBits(); - if (!isPowerOf2_32(Width)) - return nullptr; - - BinaryOperator *Or0, *Or1; - if (!match(Sel.getFalseValue(), m_OneUse(m_Or(m_BinOp(Or0), m_BinOp(Or1))))) - return nullptr; - - Value *SV0, *SV1, *SA0, *SA1; - if (!match(Or0, m_OneUse(m_LogicalShift(m_Value(SV0), - m_ZExtOrSelf(m_Value(SA0))))) || - !match(Or1, m_OneUse(m_LogicalShift(m_Value(SV1), - m_ZExtOrSelf(m_Value(SA1))))) || - Or0->getOpcode() == Or1->getOpcode()) - return nullptr; - - // Canonicalize to or(shl(SV0, SA0), lshr(SV1, SA1)). - if (Or0->getOpcode() == BinaryOperator::LShr) { - std::swap(Or0, Or1); - std::swap(SV0, SV1); - std::swap(SA0, SA1); - } - assert(Or0->getOpcode() == BinaryOperator::Shl && - Or1->getOpcode() == BinaryOperator::LShr && - "Illegal or(shift,shift) pair"); +/// fshl32(a, b, c) --> (c == 0 ? a : ((b >> (32 - c)) | (a << c))) +/// --> call llvm.fshl.i32(a, b, c) +/// fshr32(a, b, c) --> (c == 0 ? b : ((a >> (32 - c)) | (b << c))) +/// --> call llvm.fshr.i32(a, b, c) +static Instruction *foldSelectFunnelShift(SelectInst &Sel, + InstCombiner::BuilderTy &Builder) { + // This must be a power-of-2 type for a bitmasking transform to be valid. + unsigned Width = Sel.getType()->getScalarSizeInBits(); + if (!isPowerOf2_32(Width)) + return nullptr; + + BinaryOperator *Or0, *Or1; + if (!match(Sel.getFalseValue(), m_OneUse(m_Or(m_BinOp(Or0), m_BinOp(Or1))))) + return nullptr; + + Value *SV0, *SV1, *SA0, *SA1; + if (!match(Or0, m_OneUse(m_LogicalShift(m_Value(SV0), + m_ZExtOrSelf(m_Value(SA0))))) || + !match(Or1, m_OneUse(m_LogicalShift(m_Value(SV1), + m_ZExtOrSelf(m_Value(SA1))))) || + Or0->getOpcode() == Or1->getOpcode()) + return nullptr; + + // Canonicalize to or(shl(SV0, SA0), lshr(SV1, SA1)). + if (Or0->getOpcode() == BinaryOperator::LShr) { + std::swap(Or0, Or1); + std::swap(SV0, SV1); + std::swap(SA0, SA1); + } + assert(Or0->getOpcode() == BinaryOperator::Shl && + Or1->getOpcode() == BinaryOperator::LShr && + "Illegal or(shift,shift) pair"); // Check the shift amounts to see if they are an opposite pair. Value *ShAmt; @@ -2324,15 +2324,15 @@ static Instruction *foldSelectFunnelShift(SelectInst &Sel, else return nullptr; - // We should now have this pattern: - // select ?, TVal, (or (shl SV0, SA0), (lshr SV1, SA1)) - // The false value of the select must be a funnel-shift of the true value: - // IsFShl -> TVal must be SV0 else TVal must be SV1. - bool IsFshl = (ShAmt == SA0); - Value *TVal = Sel.getTrueValue(); - if ((IsFshl && TVal != SV0) || (!IsFshl && TVal != SV1)) - return nullptr; - + // We should now have this pattern: + // select ?, TVal, (or (shl SV0, SA0), (lshr SV1, SA1)) + // The false value of the select must be a funnel-shift of the true value: + // IsFShl -> TVal must be SV0 else TVal must be SV1. + bool IsFshl = (ShAmt == SA0); + Value *TVal = Sel.getTrueValue(); + if ((IsFshl && TVal != SV0) || (!IsFshl && TVal != SV1)) + return nullptr; + // Finally, see if the select is filtering out a shift-by-zero. Value *Cond = Sel.getCondition(); ICmpInst::Predicate Pred; @@ -2340,21 +2340,21 @@ static Instruction *foldSelectFunnelShift(SelectInst &Sel, Pred != ICmpInst::ICMP_EQ) return nullptr; - // If this is not a rotate then the select was blocking poison from the - // 'shift-by-zero' non-TVal, but a funnel shift won't - so freeze it. - if (SV0 != SV1) { - if (IsFshl && !llvm::isGuaranteedNotToBePoison(SV1)) - SV1 = Builder.CreateFreeze(SV1); - else if (!IsFshl && !llvm::isGuaranteedNotToBePoison(SV0)) - SV0 = Builder.CreateFreeze(SV0); - } - - // This is a funnel/rotate that avoids shift-by-bitwidth UB in a suboptimal way. + // If this is not a rotate then the select was blocking poison from the + // 'shift-by-zero' non-TVal, but a funnel shift won't - so freeze it. + if (SV0 != SV1) { + if (IsFshl && !llvm::isGuaranteedNotToBePoison(SV1)) + SV1 = Builder.CreateFreeze(SV1); + else if (!IsFshl && !llvm::isGuaranteedNotToBePoison(SV0)) + SV0 = Builder.CreateFreeze(SV0); + } + + // This is a funnel/rotate that avoids shift-by-bitwidth UB in a suboptimal way. // Convert to funnel shift intrinsic. Intrinsic::ID IID = IsFshl ? Intrinsic::fshl : Intrinsic::fshr; Function *F = Intrinsic::getDeclaration(Sel.getModule(), IID, Sel.getType()); - ShAmt = Builder.CreateZExt(ShAmt, Sel.getType()); - return IntrinsicInst::Create(F, { SV0, SV1, ShAmt }); + ShAmt = Builder.CreateZExt(ShAmt, Sel.getType()); + return IntrinsicInst::Create(F, { SV0, SV1, ShAmt }); } static Instruction *foldSelectToCopysign(SelectInst &Sel, @@ -2378,8 +2378,8 @@ static Instruction *foldSelectToCopysign(SelectInst &Sel, bool IsTrueIfSignSet; ICmpInst::Predicate Pred; if (!match(Cond, m_OneUse(m_ICmp(Pred, m_BitCast(m_Value(X)), m_APInt(C)))) || - !InstCombiner::isSignBitCheck(Pred, *C, IsTrueIfSignSet) || - X->getType() != SelType) + !InstCombiner::isSignBitCheck(Pred, *C, IsTrueIfSignSet) || + X->getType() != SelType) return nullptr; // If needed, negate the value that will be the sign argument of the copysign: @@ -2400,7 +2400,7 @@ static Instruction *foldSelectToCopysign(SelectInst &Sel, return CopySign; } -Instruction *InstCombinerImpl::foldVectorSelect(SelectInst &Sel) { +Instruction *InstCombinerImpl::foldVectorSelect(SelectInst &Sel) { auto *VecTy = dyn_cast<FixedVectorType>(Sel.getType()); if (!VecTy) return nullptr; @@ -2530,33 +2530,33 @@ static Instruction *foldSelectToPhi(SelectInst &Sel, const DominatorTree &DT, return nullptr; } -static Value *foldSelectWithFrozenICmp(SelectInst &Sel, InstCombiner::BuilderTy &Builder) { - FreezeInst *FI = dyn_cast<FreezeInst>(Sel.getCondition()); - if (!FI) - return nullptr; - - Value *Cond = FI->getOperand(0); - Value *TrueVal = Sel.getTrueValue(), *FalseVal = Sel.getFalseValue(); - - // select (freeze(x == y)), x, y --> y - // select (freeze(x != y)), x, y --> x - // The freeze should be only used by this select. Otherwise, remaining uses of - // the freeze can observe a contradictory value. - // c = freeze(x == y) ; Let's assume that y = poison & x = 42; c is 0 or 1 - // a = select c, x, y ; - // f(a, c) ; f(poison, 1) cannot happen, but if a is folded - // ; to y, this can happen. - CmpInst::Predicate Pred; - if (FI->hasOneUse() && - match(Cond, m_c_ICmp(Pred, m_Specific(TrueVal), m_Specific(FalseVal))) && - (Pred == ICmpInst::ICMP_EQ || Pred == ICmpInst::ICMP_NE)) { - return Pred == ICmpInst::ICMP_EQ ? FalseVal : TrueVal; - } - - return nullptr; -} - -Instruction *InstCombinerImpl::visitSelectInst(SelectInst &SI) { +static Value *foldSelectWithFrozenICmp(SelectInst &Sel, InstCombiner::BuilderTy &Builder) { + FreezeInst *FI = dyn_cast<FreezeInst>(Sel.getCondition()); + if (!FI) + return nullptr; + + Value *Cond = FI->getOperand(0); + Value *TrueVal = Sel.getTrueValue(), *FalseVal = Sel.getFalseValue(); + + // select (freeze(x == y)), x, y --> y + // select (freeze(x != y)), x, y --> x + // The freeze should be only used by this select. Otherwise, remaining uses of + // the freeze can observe a contradictory value. + // c = freeze(x == y) ; Let's assume that y = poison & x = 42; c is 0 or 1 + // a = select c, x, y ; + // f(a, c) ; f(poison, 1) cannot happen, but if a is folded + // ; to y, this can happen. + CmpInst::Predicate Pred; + if (FI->hasOneUse() && + match(Cond, m_c_ICmp(Pred, m_Specific(TrueVal), m_Specific(FalseVal))) && + (Pred == ICmpInst::ICMP_EQ || Pred == ICmpInst::ICMP_NE)) { + return Pred == ICmpInst::ICMP_EQ ? FalseVal : TrueVal; + } + + return nullptr; +} + +Instruction *InstCombinerImpl::visitSelectInst(SelectInst &SI) { Value *CondVal = SI.getCondition(); Value *TrueVal = SI.getTrueValue(); Value *FalseVal = SI.getFalseValue(); @@ -2592,45 +2592,45 @@ Instruction *InstCombinerImpl::visitSelectInst(SelectInst &SI) { if (SelType->isIntOrIntVectorTy(1) && TrueVal->getType() == CondVal->getType()) { - if (match(TrueVal, m_One()) && - (EnableUnsafeSelectTransform || impliesPoison(FalseVal, CondVal))) { + if (match(TrueVal, m_One()) && + (EnableUnsafeSelectTransform || impliesPoison(FalseVal, CondVal))) { // Change: A = select B, true, C --> A = or B, C return BinaryOperator::CreateOr(CondVal, FalseVal); } - if (match(FalseVal, m_Zero()) && - (EnableUnsafeSelectTransform || impliesPoison(TrueVal, CondVal))) { - // Change: A = select B, C, false --> A = and B, C - return BinaryOperator::CreateAnd(CondVal, TrueVal); - } - - // select a, false, b -> select !a, b, false + if (match(FalseVal, m_Zero()) && + (EnableUnsafeSelectTransform || impliesPoison(TrueVal, CondVal))) { + // Change: A = select B, C, false --> A = and B, C + return BinaryOperator::CreateAnd(CondVal, TrueVal); + } + + // select a, false, b -> select !a, b, false if (match(TrueVal, m_Zero())) { Value *NotCond = Builder.CreateNot(CondVal, "not." + CondVal->getName()); - return SelectInst::Create(NotCond, FalseVal, - ConstantInt::getFalse(SelType)); + return SelectInst::Create(NotCond, FalseVal, + ConstantInt::getFalse(SelType)); } - // select a, b, true -> select !a, true, b + // select a, b, true -> select !a, true, b if (match(FalseVal, m_One())) { Value *NotCond = Builder.CreateNot(CondVal, "not." + CondVal->getName()); - return SelectInst::Create(NotCond, ConstantInt::getTrue(SelType), - TrueVal); + return SelectInst::Create(NotCond, ConstantInt::getTrue(SelType), + TrueVal); } - // select a, a, b -> select a, true, b + // select a, a, b -> select a, true, b if (CondVal == TrueVal) - return replaceOperand(SI, 1, ConstantInt::getTrue(SelType)); - // select a, b, a -> select a, b, false + return replaceOperand(SI, 1, ConstantInt::getTrue(SelType)); + // select a, b, a -> select a, b, false if (CondVal == FalseVal) - return replaceOperand(SI, 2, ConstantInt::getFalse(SelType)); + return replaceOperand(SI, 2, ConstantInt::getFalse(SelType)); - // select a, !a, b -> select !a, b, false + // select a, !a, b -> select !a, b, false if (match(TrueVal, m_Not(m_Specific(CondVal)))) - return SelectInst::Create(TrueVal, FalseVal, - ConstantInt::getFalse(SelType)); - // select a, b, !a -> select !a, true, b + return SelectInst::Create(TrueVal, FalseVal, + ConstantInt::getFalse(SelType)); + // select a, b, !a -> select !a, true, b if (match(FalseVal, m_Not(m_Specific(CondVal)))) - return SelectInst::Create(FalseVal, ConstantInt::getTrue(SelType), - TrueVal); + return SelectInst::Create(FalseVal, ConstantInt::getTrue(SelType), + TrueVal); } // Selecting between two integer or vector splat integer constants? @@ -2639,10 +2639,10 @@ Instruction *InstCombinerImpl::visitSelectInst(SelectInst &SI) { // select i1 %c, <2 x i8> <1, 1>, <2 x i8> <0, 0> // because that may need 3 instructions to splat the condition value: // extend, insertelement, shufflevector. - // - // Do not handle i1 TrueVal and FalseVal otherwise would result in - // zext/sext i1 to i1. - if (SelType->isIntOrIntVectorTy() && !SelType->isIntOrIntVectorTy(1) && + // + // Do not handle i1 TrueVal and FalseVal otherwise would result in + // zext/sext i1 to i1. + if (SelType->isIntOrIntVectorTy() && !SelType->isIntOrIntVectorTy(1) && CondVal->getType()->isVectorTy() == SelType->isVectorTy()) { // select C, 1, 0 -> zext C to int if (match(TrueVal, m_One()) && match(FalseVal, m_Zero())) @@ -2889,9 +2889,9 @@ Instruction *InstCombinerImpl::visitSelectInst(SelectInst &SI) { return replaceOperand(SI, 1, TrueSI->getTrueValue()); } // select(C0, select(C1, a, b), b) -> select(C0&C1, a, b) - // We choose this as normal form to enable folding on the And and - // shortening paths for the values (this helps getUnderlyingObjects() for - // example). + // We choose this as normal form to enable folding on the And and + // shortening paths for the values (this helps getUnderlyingObjects() for + // example). if (TrueSI->getFalseValue() == FalseVal && TrueSI->hasOneUse()) { Value *And = Builder.CreateAnd(CondVal, TrueSI->getCondition()); replaceOperand(SI, 0, And); @@ -2974,8 +2974,8 @@ Instruction *InstCombinerImpl::visitSelectInst(SelectInst &SI) { } Value *NotCond; - if (match(CondVal, m_Not(m_Value(NotCond))) && - !InstCombiner::shouldAvoidAbsorbingNotIntoSelect(SI)) { + if (match(CondVal, m_Not(m_Value(NotCond))) && + !InstCombiner::shouldAvoidAbsorbingNotIntoSelect(SI)) { replaceOperand(SI, 0, NotCond); SI.swapValues(); SI.swapProfMetadata(); @@ -3009,8 +3009,8 @@ Instruction *InstCombinerImpl::visitSelectInst(SelectInst &SI) { if (Instruction *Select = foldSelectBinOpIdentity(SI, TLI, *this)) return Select; - if (Instruction *Funnel = foldSelectFunnelShift(SI, Builder)) - return Funnel; + if (Instruction *Funnel = foldSelectFunnelShift(SI, Builder)) + return Funnel; if (Instruction *Copysign = foldSelectToCopysign(SI, Builder)) return Copysign; @@ -3018,8 +3018,8 @@ Instruction *InstCombinerImpl::visitSelectInst(SelectInst &SI) { if (Instruction *PN = foldSelectToPhi(SI, DT, Builder)) return replaceInstUsesWith(SI, PN); - if (Value *Fr = foldSelectWithFrozenICmp(SI, Builder)) - return replaceInstUsesWith(SI, Fr); - + if (Value *Fr = foldSelectWithFrozenICmp(SI, Builder)) + return replaceInstUsesWith(SI, Fr); + return nullptr; } diff --git a/contrib/libs/llvm12/lib/Transforms/InstCombine/InstCombineShifts.cpp b/contrib/libs/llvm12/lib/Transforms/InstCombine/InstCombineShifts.cpp index 127bf80809..185c1d04ee 100644 --- a/contrib/libs/llvm12/lib/Transforms/InstCombine/InstCombineShifts.cpp +++ b/contrib/libs/llvm12/lib/Transforms/InstCombine/InstCombineShifts.cpp @@ -15,36 +15,36 @@ #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/PatternMatch.h" -#include "llvm/Transforms/InstCombine/InstCombiner.h" +#include "llvm/Transforms/InstCombine/InstCombiner.h" using namespace llvm; using namespace PatternMatch; #define DEBUG_TYPE "instcombine" -bool canTryToConstantAddTwoShiftAmounts(Value *Sh0, Value *ShAmt0, Value *Sh1, - Value *ShAmt1) { - // We have two shift amounts from two different shifts. The types of those - // shift amounts may not match. If that's the case let's bailout now.. - if (ShAmt0->getType() != ShAmt1->getType()) - return false; - - // As input, we have the following pattern: - // Sh0 (Sh1 X, Q), K - // We want to rewrite that as: - // Sh x, (Q+K) iff (Q+K) u< bitwidth(x) - // While we know that originally (Q+K) would not overflow - // (because 2 * (N-1) u<= iN -1), we have looked past extensions of - // shift amounts. so it may now overflow in smaller bitwidth. - // To ensure that does not happen, we need to ensure that the total maximal - // shift amount is still representable in that smaller bit width. - unsigned MaximalPossibleTotalShiftAmount = - (Sh0->getType()->getScalarSizeInBits() - 1) + - (Sh1->getType()->getScalarSizeInBits() - 1); - APInt MaximalRepresentableShiftAmount = - APInt::getAllOnesValue(ShAmt0->getType()->getScalarSizeInBits()); - return MaximalRepresentableShiftAmount.uge(MaximalPossibleTotalShiftAmount); -} - +bool canTryToConstantAddTwoShiftAmounts(Value *Sh0, Value *ShAmt0, Value *Sh1, + Value *ShAmt1) { + // We have two shift amounts from two different shifts. The types of those + // shift amounts may not match. If that's the case let's bailout now.. + if (ShAmt0->getType() != ShAmt1->getType()) + return false; + + // As input, we have the following pattern: + // Sh0 (Sh1 X, Q), K + // We want to rewrite that as: + // Sh x, (Q+K) iff (Q+K) u< bitwidth(x) + // While we know that originally (Q+K) would not overflow + // (because 2 * (N-1) u<= iN -1), we have looked past extensions of + // shift amounts. so it may now overflow in smaller bitwidth. + // To ensure that does not happen, we need to ensure that the total maximal + // shift amount is still representable in that smaller bit width. + unsigned MaximalPossibleTotalShiftAmount = + (Sh0->getType()->getScalarSizeInBits() - 1) + + (Sh1->getType()->getScalarSizeInBits() - 1); + APInt MaximalRepresentableShiftAmount = + APInt::getAllOnesValue(ShAmt0->getType()->getScalarSizeInBits()); + return MaximalRepresentableShiftAmount.uge(MaximalPossibleTotalShiftAmount); +} + // Given pattern: // (x shiftopcode Q) shiftopcode K // we should rewrite it as @@ -56,7 +56,7 @@ bool canTryToConstantAddTwoShiftAmounts(Value *Sh0, Value *ShAmt0, Value *Sh1, // // AnalyzeForSignBitExtraction indicates that we will only analyze whether this // pattern has any 2 right-shifts that sum to 1 less than original bit width. -Value *InstCombinerImpl::reassociateShiftAmtsOfTwoSameDirectionShifts( +Value *InstCombinerImpl::reassociateShiftAmtsOfTwoSameDirectionShifts( BinaryOperator *Sh0, const SimplifyQuery &SQ, bool AnalyzeForSignBitExtraction) { // Look for a shift of some instruction, ignore zext of shift amount if any. @@ -81,8 +81,8 @@ Value *InstCombinerImpl::reassociateShiftAmtsOfTwoSameDirectionShifts( if (!match(Sh1, m_Shift(m_Value(X), m_ZExtOrSelf(m_Value(ShAmt1))))) return nullptr; - // Verify that it would be safe to try to add those two shift amounts. - if (!canTryToConstantAddTwoShiftAmounts(Sh0, ShAmt0, Sh1, ShAmt1)) + // Verify that it would be safe to try to add those two shift amounts. + if (!canTryToConstantAddTwoShiftAmounts(Sh0, ShAmt0, Sh1, ShAmt1)) return nullptr; // We are only looking for signbit extraction if we have two right shifts. @@ -226,9 +226,9 @@ dropRedundantMaskingOfLeftShiftInput(BinaryOperator *OuterShift, // Peek through an optional zext of the shift amount. match(MaskShAmt, m_ZExtOrSelf(m_Value(MaskShAmt))); - // Verify that it would be safe to try to add those two shift amounts. - if (!canTryToConstantAddTwoShiftAmounts(OuterShift, ShiftShAmt, Masked, - MaskShAmt)) + // Verify that it would be safe to try to add those two shift amounts. + if (!canTryToConstantAddTwoShiftAmounts(OuterShift, ShiftShAmt, Masked, + MaskShAmt)) return nullptr; // Can we simplify (MaskShAmt+ShiftShAmt) ? @@ -258,9 +258,9 @@ dropRedundantMaskingOfLeftShiftInput(BinaryOperator *OuterShift, // Peek through an optional zext of the shift amount. match(MaskShAmt, m_ZExtOrSelf(m_Value(MaskShAmt))); - // Verify that it would be safe to try to add those two shift amounts. - if (!canTryToConstantAddTwoShiftAmounts(OuterShift, ShiftShAmt, Masked, - MaskShAmt)) + // Verify that it would be safe to try to add those two shift amounts. + if (!canTryToConstantAddTwoShiftAmounts(OuterShift, ShiftShAmt, Masked, + MaskShAmt)) return nullptr; // Can we simplify (ShiftShAmt-MaskShAmt) ? @@ -334,8 +334,8 @@ static Instruction *foldShiftOfShiftedLogic(BinaryOperator &I, if (!LogicInst || !LogicInst->isBitwiseLogicOp() || !LogicInst->hasOneUse()) return nullptr; - Constant *C0, *C1; - if (!match(I.getOperand(1), m_Constant(C1))) + Constant *C0, *C1; + if (!match(I.getOperand(1), m_Constant(C1))) return nullptr; Instruction::BinaryOps ShiftOpcode = I.getOpcode(); @@ -346,12 +346,12 @@ static Instruction *foldShiftOfShiftedLogic(BinaryOperator &I, // TODO: Remove the one-use check if the other logic operand (Y) is constant. Value *X, *Y; auto matchFirstShift = [&](Value *V) { - BinaryOperator *BO; - APInt Threshold(Ty->getScalarSizeInBits(), Ty->getScalarSizeInBits()); - return match(V, m_BinOp(BO)) && BO->getOpcode() == ShiftOpcode && - match(V, m_OneUse(m_Shift(m_Value(X), m_Constant(C0)))) && - match(ConstantExpr::getAdd(C0, C1), - m_SpecificInt_ICMP(ICmpInst::ICMP_ULT, Threshold)); + BinaryOperator *BO; + APInt Threshold(Ty->getScalarSizeInBits(), Ty->getScalarSizeInBits()); + return match(V, m_BinOp(BO)) && BO->getOpcode() == ShiftOpcode && + match(V, m_OneUse(m_Shift(m_Value(X), m_Constant(C0)))) && + match(ConstantExpr::getAdd(C0, C1), + m_SpecificInt_ICMP(ICmpInst::ICMP_ULT, Threshold)); }; // Logic ops are commutative, so check each operand for a match. @@ -363,13 +363,13 @@ static Instruction *foldShiftOfShiftedLogic(BinaryOperator &I, return nullptr; // shift (logic (shift X, C0), Y), C1 -> logic (shift X, C0+C1), (shift Y, C1) - Constant *ShiftSumC = ConstantExpr::getAdd(C0, C1); + Constant *ShiftSumC = ConstantExpr::getAdd(C0, C1); Value *NewShift1 = Builder.CreateBinOp(ShiftOpcode, X, ShiftSumC); Value *NewShift2 = Builder.CreateBinOp(ShiftOpcode, Y, I.getOperand(1)); return BinaryOperator::Create(LogicInst->getOpcode(), NewShift1, NewShift2); } -Instruction *InstCombinerImpl::commonShiftTransforms(BinaryOperator &I) { +Instruction *InstCombinerImpl::commonShiftTransforms(BinaryOperator &I) { Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); assert(Op0->getType() == Op1->getType()); @@ -408,15 +408,15 @@ Instruction *InstCombinerImpl::commonShiftTransforms(BinaryOperator &I) { return BinaryOperator::Create( I.getOpcode(), Builder.CreateBinOp(I.getOpcode(), Op0, C), A); - // X shift (A srem C) -> X shift (A and (C - 1)) iff C is a power of 2. + // X shift (A srem C) -> X shift (A and (C - 1)) iff C is a power of 2. // Because shifts by negative values (which could occur if A were negative) // are undefined. - if (Op1->hasOneUse() && match(Op1, m_SRem(m_Value(A), m_Constant(C))) && - match(C, m_Power2())) { + if (Op1->hasOneUse() && match(Op1, m_SRem(m_Value(A), m_Constant(C))) && + match(C, m_Power2())) { // FIXME: Should this get moved into SimplifyDemandedBits by saying we don't // demand the sign bit (and many others) here?? - Constant *Mask = ConstantExpr::getSub(C, ConstantInt::get(I.getType(), 1)); - Value *Rem = Builder.CreateAnd(A, Mask, Op1->getName()); + Constant *Mask = ConstantExpr::getSub(C, ConstantInt::get(I.getType(), 1)); + Value *Rem = Builder.CreateAnd(A, Mask, Op1->getName()); return replaceOperand(I, 1, Rem); } @@ -429,8 +429,8 @@ Instruction *InstCombinerImpl::commonShiftTransforms(BinaryOperator &I) { /// Return true if we can simplify two logical (either left or right) shifts /// that have constant shift amounts: OuterShift (InnerShift X, C1), C2. static bool canEvaluateShiftedShift(unsigned OuterShAmt, bool IsOuterShl, - Instruction *InnerShift, - InstCombinerImpl &IC, Instruction *CxtI) { + Instruction *InnerShift, + InstCombinerImpl &IC, Instruction *CxtI) { assert(InnerShift->isLogicalShift() && "Unexpected instruction type"); // We need constant scalar or constant splat shifts. @@ -481,7 +481,7 @@ static bool canEvaluateShiftedShift(unsigned OuterShAmt, bool IsOuterShl, /// where the client will ask if E can be computed shifted right by 64-bits. If /// this succeeds, getShiftedValue() will be called to produce the value. static bool canEvaluateShifted(Value *V, unsigned NumBits, bool IsLeftShift, - InstCombinerImpl &IC, Instruction *CxtI) { + InstCombinerImpl &IC, Instruction *CxtI) { // We can always evaluate constants shifted. if (isa<Constant>(V)) return true; @@ -592,7 +592,7 @@ static Value *foldShiftedShift(BinaryOperator *InnerShift, unsigned OuterShAmt, /// When canEvaluateShifted() returns true for an expression, this function /// inserts the new computation that produces the shifted value. static Value *getShiftedValue(Value *V, unsigned NumBits, bool isLeftShift, - InstCombinerImpl &IC, const DataLayout &DL) { + InstCombinerImpl &IC, const DataLayout &DL) { // We can always evaluate constants shifted. if (Constant *C = dyn_cast<Constant>(V)) { if (isLeftShift) @@ -602,7 +602,7 @@ static Value *getShiftedValue(Value *V, unsigned NumBits, bool isLeftShift, } Instruction *I = cast<Instruction>(V); - IC.addToWorklist(I); + IC.addToWorklist(I); switch (I->getOpcode()) { default: llvm_unreachable("Inconsistency with CanEvaluateShifted"); @@ -652,15 +652,15 @@ static bool canShiftBinOpWithConstantRHS(BinaryOperator &Shift, case Instruction::Or: case Instruction::And: return true; - case Instruction::Xor: - // Do not change a 'not' of logical shift because that would create a normal - // 'xor'. The 'not' is likely better for analysis, SCEV, and codegen. - return !(Shift.isLogicalShift() && match(BO, m_Not(m_Value()))); + case Instruction::Xor: + // Do not change a 'not' of logical shift because that would create a normal + // 'xor'. The 'not' is likely better for analysis, SCEV, and codegen. + return !(Shift.isLogicalShift() && match(BO, m_Not(m_Value()))); } } -Instruction *InstCombinerImpl::FoldShiftByConstant(Value *Op0, Constant *Op1, - BinaryOperator &I) { +Instruction *InstCombinerImpl::FoldShiftByConstant(Value *Op0, Constant *Op1, + BinaryOperator &I) { bool isLeftShift = I.getOpcode() == Instruction::Shl; const APInt *Op1C; @@ -682,8 +682,8 @@ Instruction *InstCombinerImpl::FoldShiftByConstant(Value *Op0, Constant *Op1, // See if we can simplify any instructions used by the instruction whose sole // purpose is to compute bits we don't care about. - Type *Ty = I.getType(); - unsigned TypeBits = Ty->getScalarSizeInBits(); + Type *Ty = I.getType(); + unsigned TypeBits = Ty->getScalarSizeInBits(); assert(!Op1C->uge(TypeBits) && "Shift over the type width should have been removed already"); @@ -691,20 +691,20 @@ Instruction *InstCombinerImpl::FoldShiftByConstant(Value *Op0, Constant *Op1, return FoldedShift; // Fold shift2(trunc(shift1(x,c1)), c2) -> trunc(shift2(shift1(x,c1),c2)) - if (auto *TI = dyn_cast<TruncInst>(Op0)) { + if (auto *TI = dyn_cast<TruncInst>(Op0)) { // If 'shift2' is an ashr, we would have to get the sign bit into a funny // place. Don't try to do this transformation in this case. Also, we // require that the input operand is a shift-by-constant so that we have // confidence that the shifts will get folded together. We could do this // xform in more cases, but it is unlikely to be profitable. - const APInt *TrShiftAmt; - if (I.isLogicalShift() && - match(TI->getOperand(0), m_Shift(m_Value(), m_APInt(TrShiftAmt)))) { - auto *TrOp = cast<Instruction>(TI->getOperand(0)); - Type *SrcTy = TrOp->getType(); - + const APInt *TrShiftAmt; + if (I.isLogicalShift() && + match(TI->getOperand(0), m_Shift(m_Value(), m_APInt(TrShiftAmt)))) { + auto *TrOp = cast<Instruction>(TI->getOperand(0)); + Type *SrcTy = TrOp->getType(); + // Okay, we'll do this xform. Make the shift of shift. - Constant *ShAmt = ConstantExpr::getZExt(Op1, SrcTy); + Constant *ShAmt = ConstantExpr::getZExt(Op1, SrcTy); // (shift2 (shift1 & 0x00FF), c2) Value *NSh = Builder.CreateBinOp(I.getOpcode(), TrOp, ShAmt, I.getName()); @@ -712,27 +712,27 @@ Instruction *InstCombinerImpl::FoldShiftByConstant(Value *Op0, Constant *Op1, // part of the register be zeros. Emulate this by inserting an AND to // clear the top bits as needed. This 'and' will usually be zapped by // other xforms later if dead. - unsigned SrcSize = SrcTy->getScalarSizeInBits(); - Constant *MaskV = - ConstantInt::get(SrcTy, APInt::getLowBitsSet(SrcSize, TypeBits)); + unsigned SrcSize = SrcTy->getScalarSizeInBits(); + Constant *MaskV = + ConstantInt::get(SrcTy, APInt::getLowBitsSet(SrcSize, TypeBits)); // The mask we constructed says what the trunc would do if occurring // between the shifts. We want to know the effect *after* the second // shift. We know that it is a logical shift by a constant, so adjust the // mask as appropriate. - MaskV = ConstantExpr::get(I.getOpcode(), MaskV, ShAmt); + MaskV = ConstantExpr::get(I.getOpcode(), MaskV, ShAmt); // shift1 & 0x00FF - Value *And = Builder.CreateAnd(NSh, MaskV, TI->getName()); + Value *And = Builder.CreateAnd(NSh, MaskV, TI->getName()); // Return the value truncated to the interesting size. - return new TruncInst(And, Ty); + return new TruncInst(And, Ty); } } if (Op0->hasOneUse()) { if (BinaryOperator *Op0BO = dyn_cast<BinaryOperator>(Op0)) { // Turn ((X >> C) + Y) << C -> (X + (Y << C)) & (~0 << C) - Value *V1; - const APInt *CC; + Value *V1; + const APInt *CC; switch (Op0BO->getOpcode()) { default: break; case Instruction::Add: @@ -751,21 +751,21 @@ Instruction *InstCombinerImpl::FoldShiftByConstant(Value *Op0, Constant *Op1, Op0BO->getOperand(1)->getName()); unsigned Op1Val = Op1C->getLimitedValue(TypeBits); APInt Bits = APInt::getHighBitsSet(TypeBits, TypeBits - Op1Val); - Constant *Mask = ConstantInt::get(Ty, Bits); + Constant *Mask = ConstantInt::get(Ty, Bits); return BinaryOperator::CreateAnd(X, Mask); } // Turn (Y + ((X >> C) & CC)) << C -> ((X & (CC << C)) + (Y << C)) Value *Op0BOOp1 = Op0BO->getOperand(1); if (isLeftShift && Op0BOOp1->hasOneUse() && - match(Op0BOOp1, m_And(m_OneUse(m_Shr(m_Value(V1), m_Specific(Op1))), - m_APInt(CC)))) { - Value *YS = // (Y << C) - Builder.CreateShl(Op0BO->getOperand(0), Op1, Op0BO->getName()); + match(Op0BOOp1, m_And(m_OneUse(m_Shr(m_Value(V1), m_Specific(Op1))), + m_APInt(CC)))) { + Value *YS = // (Y << C) + Builder.CreateShl(Op0BO->getOperand(0), Op1, Op0BO->getName()); // X & (CC << C) - Value *XM = Builder.CreateAnd( - V1, ConstantExpr::getShl(ConstantInt::get(Ty, *CC), Op1), - V1->getName() + ".mask"); + Value *XM = Builder.CreateAnd( + V1, ConstantExpr::getShl(ConstantInt::get(Ty, *CC), Op1), + V1->getName() + ".mask"); return BinaryOperator::Create(Op0BO->getOpcode(), YS, XM); } LLVM_FALLTHROUGH; @@ -783,21 +783,21 @@ Instruction *InstCombinerImpl::FoldShiftByConstant(Value *Op0, Constant *Op1, Op0BO->getOperand(0)->getName()); unsigned Op1Val = Op1C->getLimitedValue(TypeBits); APInt Bits = APInt::getHighBitsSet(TypeBits, TypeBits - Op1Val); - Constant *Mask = ConstantInt::get(Ty, Bits); + Constant *Mask = ConstantInt::get(Ty, Bits); return BinaryOperator::CreateAnd(X, Mask); } // Turn (((X >> C)&CC) + Y) << C -> (X + (Y << C)) & (CC << C) if (isLeftShift && Op0BO->getOperand(0)->hasOneUse() && match(Op0BO->getOperand(0), - m_And(m_OneUse(m_Shr(m_Value(V1), m_Specific(Op1))), - m_APInt(CC)))) { + m_And(m_OneUse(m_Shr(m_Value(V1), m_Specific(Op1))), + m_APInt(CC)))) { Value *YS = // (Y << C) - Builder.CreateShl(Op0BO->getOperand(1), Op1, Op0BO->getName()); + Builder.CreateShl(Op0BO->getOperand(1), Op1, Op0BO->getName()); // X & (CC << C) - Value *XM = Builder.CreateAnd( - V1, ConstantExpr::getShl(ConstantInt::get(Ty, *CC), Op1), - V1->getName() + ".mask"); + Value *XM = Builder.CreateAnd( + V1, ConstantExpr::getShl(ConstantInt::get(Ty, *CC), Op1), + V1->getName() + ".mask"); return BinaryOperator::Create(Op0BO->getOpcode(), XM, YS); } @@ -888,7 +888,7 @@ Instruction *InstCombinerImpl::FoldShiftByConstant(Value *Op0, Constant *Op1, return nullptr; } -Instruction *InstCombinerImpl::visitShl(BinaryOperator &I) { +Instruction *InstCombinerImpl::visitShl(BinaryOperator &I) { const SimplifyQuery Q = SQ.getWithInstruction(&I); if (Value *V = SimplifyShlInst(I.getOperand(0), I.getOperand(1), @@ -929,8 +929,8 @@ Instruction *InstCombinerImpl::visitShl(BinaryOperator &I) { } const APInt *ShOp1; - if (match(Op0, m_Exact(m_Shr(m_Value(X), m_APInt(ShOp1)))) && - ShOp1->ult(BitWidth)) { + if (match(Op0, m_Exact(m_Shr(m_Value(X), m_APInt(ShOp1)))) && + ShOp1->ult(BitWidth)) { unsigned ShrAmt = ShOp1->getZExtValue(); if (ShrAmt < ShAmt) { // If C1 < C2: (X >>?,exact C1) << C2 --> X << (C2 - C1) @@ -950,33 +950,33 @@ Instruction *InstCombinerImpl::visitShl(BinaryOperator &I) { } } - if (match(Op0, m_OneUse(m_Shr(m_Value(X), m_APInt(ShOp1)))) && - ShOp1->ult(BitWidth)) { - unsigned ShrAmt = ShOp1->getZExtValue(); - if (ShrAmt < ShAmt) { - // If C1 < C2: (X >>? C1) << C2 --> X << (C2 - C1) & (-1 << C2) - Constant *ShiftDiff = ConstantInt::get(Ty, ShAmt - ShrAmt); - auto *NewShl = BinaryOperator::CreateShl(X, ShiftDiff); - NewShl->setHasNoUnsignedWrap(I.hasNoUnsignedWrap()); - NewShl->setHasNoSignedWrap(I.hasNoSignedWrap()); - Builder.Insert(NewShl); - APInt Mask(APInt::getHighBitsSet(BitWidth, BitWidth - ShAmt)); - return BinaryOperator::CreateAnd(NewShl, ConstantInt::get(Ty, Mask)); - } - if (ShrAmt > ShAmt) { - // If C1 > C2: (X >>? C1) << C2 --> X >>? (C1 - C2) & (-1 << C2) - Constant *ShiftDiff = ConstantInt::get(Ty, ShrAmt - ShAmt); - auto *OldShr = cast<BinaryOperator>(Op0); - auto *NewShr = - BinaryOperator::Create(OldShr->getOpcode(), X, ShiftDiff); - NewShr->setIsExact(OldShr->isExact()); - Builder.Insert(NewShr); - APInt Mask(APInt::getHighBitsSet(BitWidth, BitWidth - ShAmt)); - return BinaryOperator::CreateAnd(NewShr, ConstantInt::get(Ty, Mask)); - } - } - - if (match(Op0, m_Shl(m_Value(X), m_APInt(ShOp1))) && ShOp1->ult(BitWidth)) { + if (match(Op0, m_OneUse(m_Shr(m_Value(X), m_APInt(ShOp1)))) && + ShOp1->ult(BitWidth)) { + unsigned ShrAmt = ShOp1->getZExtValue(); + if (ShrAmt < ShAmt) { + // If C1 < C2: (X >>? C1) << C2 --> X << (C2 - C1) & (-1 << C2) + Constant *ShiftDiff = ConstantInt::get(Ty, ShAmt - ShrAmt); + auto *NewShl = BinaryOperator::CreateShl(X, ShiftDiff); + NewShl->setHasNoUnsignedWrap(I.hasNoUnsignedWrap()); + NewShl->setHasNoSignedWrap(I.hasNoSignedWrap()); + Builder.Insert(NewShl); + APInt Mask(APInt::getHighBitsSet(BitWidth, BitWidth - ShAmt)); + return BinaryOperator::CreateAnd(NewShl, ConstantInt::get(Ty, Mask)); + } + if (ShrAmt > ShAmt) { + // If C1 > C2: (X >>? C1) << C2 --> X >>? (C1 - C2) & (-1 << C2) + Constant *ShiftDiff = ConstantInt::get(Ty, ShrAmt - ShAmt); + auto *OldShr = cast<BinaryOperator>(Op0); + auto *NewShr = + BinaryOperator::Create(OldShr->getOpcode(), X, ShiftDiff); + NewShr->setIsExact(OldShr->isExact()); + Builder.Insert(NewShr); + APInt Mask(APInt::getHighBitsSet(BitWidth, BitWidth - ShAmt)); + return BinaryOperator::CreateAnd(NewShr, ConstantInt::get(Ty, Mask)); + } + } + + if (match(Op0, m_Shl(m_Value(X), m_APInt(ShOp1))) && ShOp1->ult(BitWidth)) { unsigned AmtSum = ShAmt + ShOp1->getZExtValue(); // Oversized shifts are simplified to zero in InstSimplify. if (AmtSum < BitWidth) @@ -1035,7 +1035,7 @@ Instruction *InstCombinerImpl::visitShl(BinaryOperator &I) { return nullptr; } -Instruction *InstCombinerImpl::visitLShr(BinaryOperator &I) { +Instruction *InstCombinerImpl::visitLShr(BinaryOperator &I) { if (Value *V = SimplifyLShrInst(I.getOperand(0), I.getOperand(1), I.isExact(), SQ.getWithInstruction(&I))) return replaceInstUsesWith(I, V); @@ -1137,12 +1137,12 @@ Instruction *InstCombinerImpl::visitLShr(BinaryOperator &I) { } } - // lshr i32 (X -nsw Y), 31 --> zext (X < Y) - Value *Y; - if (ShAmt == BitWidth - 1 && - match(Op0, m_OneUse(m_NSWSub(m_Value(X), m_Value(Y))))) - return new ZExtInst(Builder.CreateICmpSLT(X, Y), Ty); - + // lshr i32 (X -nsw Y), 31 --> zext (X < Y) + Value *Y; + if (ShAmt == BitWidth - 1 && + match(Op0, m_OneUse(m_NSWSub(m_Value(X), m_Value(Y))))) + return new ZExtInst(Builder.CreateICmpSLT(X, Y), Ty); + if (match(Op0, m_LShr(m_Value(X), m_APInt(ShOp1)))) { unsigned AmtSum = ShAmt + ShOp1->getZExtValue(); // Oversized shifts are simplified to zero in InstSimplify. @@ -1171,7 +1171,7 @@ Instruction *InstCombinerImpl::visitLShr(BinaryOperator &I) { } Instruction * -InstCombinerImpl::foldVariableSignZeroExtensionOfVariableHighBitExtract( +InstCombinerImpl::foldVariableSignZeroExtensionOfVariableHighBitExtract( BinaryOperator &OldAShr) { assert(OldAShr.getOpcode() == Instruction::AShr && "Must be called with arithmetic right-shift instruction only."); @@ -1239,7 +1239,7 @@ InstCombinerImpl::foldVariableSignZeroExtensionOfVariableHighBitExtract( return TruncInst::CreateTruncOrBitCast(NewAShr, OldAShr.getType()); } -Instruction *InstCombinerImpl::visitAShr(BinaryOperator &I) { +Instruction *InstCombinerImpl::visitAShr(BinaryOperator &I) { if (Value *V = SimplifyAShrInst(I.getOperand(0), I.getOperand(1), I.isExact(), SQ.getWithInstruction(&I))) return replaceInstUsesWith(I, V); @@ -1305,12 +1305,12 @@ Instruction *InstCombinerImpl::visitAShr(BinaryOperator &I) { return new SExtInst(NewSh, Ty); } - // ashr i32 (X -nsw Y), 31 --> sext (X < Y) - Value *Y; - if (ShAmt == BitWidth - 1 && - match(Op0, m_OneUse(m_NSWSub(m_Value(X), m_Value(Y))))) - return new SExtInst(Builder.CreateICmpSLT(X, Y), Ty); - + // ashr i32 (X -nsw Y), 31 --> sext (X < Y) + Value *Y; + if (ShAmt == BitWidth - 1 && + match(Op0, m_OneUse(m_NSWSub(m_Value(X), m_Value(Y))))) + return new SExtInst(Builder.CreateICmpSLT(X, Y), Ty); + // If the shifted-out value is known-zero, then this is an exact shift. if (!I.isExact() && MaskedValueIsZero(Op0, APInt::getLowBitsSet(BitWidth, ShAmt), 0, &I)) { diff --git a/contrib/libs/llvm12/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp b/contrib/libs/llvm12/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp index 16efe86377..5380bf728c 100644 --- a/contrib/libs/llvm12/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp +++ b/contrib/libs/llvm12/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp @@ -12,12 +12,12 @@ //===----------------------------------------------------------------------===// #include "InstCombineInternal.h" -#include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/PatternMatch.h" #include "llvm/Support/KnownBits.h" -#include "llvm/Transforms/InstCombine/InstCombiner.h" +#include "llvm/Transforms/InstCombine/InstCombiner.h" using namespace llvm; using namespace llvm::PatternMatch; @@ -52,7 +52,7 @@ static bool ShrinkDemandedConstant(Instruction *I, unsigned OpNo, /// Inst is an integer instruction that SimplifyDemandedBits knows about. See if /// the instruction has any properties that allow us to simplify its operands. -bool InstCombinerImpl::SimplifyDemandedInstructionBits(Instruction &Inst) { +bool InstCombinerImpl::SimplifyDemandedInstructionBits(Instruction &Inst) { unsigned BitWidth = Inst.getType()->getScalarSizeInBits(); KnownBits Known(BitWidth); APInt DemandedMask(APInt::getAllOnesValue(BitWidth)); @@ -68,16 +68,16 @@ bool InstCombinerImpl::SimplifyDemandedInstructionBits(Instruction &Inst) { /// This form of SimplifyDemandedBits simplifies the specified instruction /// operand if possible, updating it in place. It returns true if it made any /// change and false otherwise. -bool InstCombinerImpl::SimplifyDemandedBits(Instruction *I, unsigned OpNo, - const APInt &DemandedMask, - KnownBits &Known, unsigned Depth) { +bool InstCombinerImpl::SimplifyDemandedBits(Instruction *I, unsigned OpNo, + const APInt &DemandedMask, + KnownBits &Known, unsigned Depth) { Use &U = I->getOperandUse(OpNo); Value *NewVal = SimplifyDemandedUseBits(U.get(), DemandedMask, Known, Depth, I); if (!NewVal) return false; if (Instruction* OpInst = dyn_cast<Instruction>(U)) salvageDebugInfo(*OpInst); - + replaceUse(U, NewVal); return true; } @@ -105,12 +105,12 @@ bool InstCombinerImpl::SimplifyDemandedBits(Instruction *I, unsigned OpNo, /// operands based on the information about what bits are demanded. This returns /// some other non-null value if it found out that V is equal to another value /// in the context where the specified bits are demanded, but not for all users. -Value *InstCombinerImpl::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, - KnownBits &Known, - unsigned Depth, - Instruction *CxtI) { +Value *InstCombinerImpl::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, + KnownBits &Known, + unsigned Depth, + Instruction *CxtI) { assert(V != nullptr && "Null pointer of Value???"); - assert(Depth <= MaxAnalysisRecursionDepth && "Limit Search Depth"); + assert(Depth <= MaxAnalysisRecursionDepth && "Limit Search Depth"); uint32_t BitWidth = DemandedMask.getBitWidth(); Type *VTy = V->getType(); assert( @@ -127,12 +127,12 @@ Value *InstCombinerImpl::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, if (DemandedMask.isNullValue()) // Not demanding any bits from V. return UndefValue::get(VTy); - if (Depth == MaxAnalysisRecursionDepth) - return nullptr; - - if (isa<ScalableVectorType>(VTy)) + if (Depth == MaxAnalysisRecursionDepth) return nullptr; + if (isa<ScalableVectorType>(VTy)) + return nullptr; + Instruction *I = dyn_cast<Instruction>(V); if (!I) { computeKnownBits(V, Known, Depth, CxtI); @@ -259,44 +259,44 @@ Value *InstCombinerImpl::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, return InsertNewInstWith(And, *I); } - // If the RHS is a constant, see if we can change it. Don't alter a -1 - // constant because that's a canonical 'not' op, and that is better for - // combining, SCEV, and codegen. - const APInt *C; - if (match(I->getOperand(1), m_APInt(C)) && !C->isAllOnesValue()) { - if ((*C | ~DemandedMask).isAllOnesValue()) { - // Force bits to 1 to create a 'not' op. - I->setOperand(1, ConstantInt::getAllOnesValue(VTy)); - return I; - } - // If we can't turn this into a 'not', try to shrink the constant. - if (ShrinkDemandedConstant(I, 1, DemandedMask)) - return I; - } + // If the RHS is a constant, see if we can change it. Don't alter a -1 + // constant because that's a canonical 'not' op, and that is better for + // combining, SCEV, and codegen. + const APInt *C; + if (match(I->getOperand(1), m_APInt(C)) && !C->isAllOnesValue()) { + if ((*C | ~DemandedMask).isAllOnesValue()) { + // Force bits to 1 to create a 'not' op. + I->setOperand(1, ConstantInt::getAllOnesValue(VTy)); + return I; + } + // If we can't turn this into a 'not', try to shrink the constant. + if (ShrinkDemandedConstant(I, 1, DemandedMask)) + return I; + } // If our LHS is an 'and' and if it has one use, and if any of the bits we // are flipping are known to be set, then the xor is just resetting those // bits to zero. We can just knock out bits from the 'and' and the 'xor', // simplifying both of them. - if (Instruction *LHSInst = dyn_cast<Instruction>(I->getOperand(0))) { - ConstantInt *AndRHS, *XorRHS; + if (Instruction *LHSInst = dyn_cast<Instruction>(I->getOperand(0))) { + ConstantInt *AndRHS, *XorRHS; if (LHSInst->getOpcode() == Instruction::And && LHSInst->hasOneUse() && - match(I->getOperand(1), m_ConstantInt(XorRHS)) && - match(LHSInst->getOperand(1), m_ConstantInt(AndRHS)) && + match(I->getOperand(1), m_ConstantInt(XorRHS)) && + match(LHSInst->getOperand(1), m_ConstantInt(AndRHS)) && (LHSKnown.One & RHSKnown.One & DemandedMask) != 0) { APInt NewMask = ~(LHSKnown.One & RHSKnown.One & DemandedMask); Constant *AndC = - ConstantInt::get(I->getType(), NewMask & AndRHS->getValue()); + ConstantInt::get(I->getType(), NewMask & AndRHS->getValue()); Instruction *NewAnd = BinaryOperator::CreateAnd(I->getOperand(0), AndC); InsertNewInstWith(NewAnd, *I); Constant *XorC = - ConstantInt::get(I->getType(), NewMask & XorRHS->getValue()); + ConstantInt::get(I->getType(), NewMask & XorRHS->getValue()); Instruction *NewXor = BinaryOperator::CreateXor(NewAnd, XorC); return InsertNewInstWith(NewXor, *I); } - } + } break; } case Instruction::Select: { @@ -339,20 +339,20 @@ Value *InstCombinerImpl::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, // we can. This helps not break apart (or helps put back together) // canonical patterns like min and max. auto CanonicalizeSelectConstant = [](Instruction *I, unsigned OpNo, - const APInt &DemandedMask) { + const APInt &DemandedMask) { const APInt *SelC; if (!match(I->getOperand(OpNo), m_APInt(SelC))) return false; // Get the constant out of the ICmp, if there is one. - // Only try this when exactly 1 operand is a constant (if both operands - // are constant, the icmp should eventually simplify). Otherwise, we may - // invert the transform that reduces set bits and infinite-loop. - Value *X; + // Only try this when exactly 1 operand is a constant (if both operands + // are constant, the icmp should eventually simplify). Otherwise, we may + // invert the transform that reduces set bits and infinite-loop. + Value *X; const APInt *CmpC; ICmpInst::Predicate Pred; - if (!match(I->getOperand(0), m_ICmp(Pred, m_Value(X), m_APInt(CmpC))) || - isa<Constant>(X) || CmpC->getBitWidth() != SelC->getBitWidth()) + if (!match(I->getOperand(0), m_ICmp(Pred, m_Value(X), m_APInt(CmpC))) || + isa<Constant>(X) || CmpC->getBitWidth() != SelC->getBitWidth()) return ShrinkDemandedConstant(I, OpNo, DemandedMask); // If the constant is already the same as the ICmp, leave it as-is. @@ -371,7 +371,7 @@ Value *InstCombinerImpl::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, return I; // Only known if known in both the LHS and RHS. - Known = KnownBits::commonBits(LHSKnown, RHSKnown); + Known = KnownBits::commonBits(LHSKnown, RHSKnown); break; } case Instruction::ZExt: @@ -394,8 +394,8 @@ Value *InstCombinerImpl::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, if (VectorType *DstVTy = dyn_cast<VectorType>(I->getType())) { if (VectorType *SrcVTy = dyn_cast<VectorType>(I->getOperand(0)->getType())) { - if (cast<FixedVectorType>(DstVTy)->getNumElements() != - cast<FixedVectorType>(SrcVTy)->getNumElements()) + if (cast<FixedVectorType>(DstVTy)->getNumElements() != + cast<FixedVectorType>(SrcVTy)->getNumElements()) // Don't touch a bitcast between vectors of different element counts. return nullptr; } else @@ -673,9 +673,9 @@ Value *InstCombinerImpl::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, } break; } - case Instruction::SRem: { - ConstantInt *Rem; - if (match(I->getOperand(1), m_ConstantInt(Rem))) { + case Instruction::SRem: { + ConstantInt *Rem; + if (match(I->getOperand(1), m_ConstantInt(Rem))) { // X % -1 demands all the bits because we don't want to introduce // INT_MIN % -1 (== undef) by accident. if (Rem->isMinusOne()) @@ -718,7 +718,7 @@ Value *InstCombinerImpl::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, Known.makeNonNegative(); } break; - } + } case Instruction::URem: { KnownBits Known2(BitWidth); APInt AllOnes = APInt::getAllOnesValue(BitWidth); @@ -789,12 +789,12 @@ Value *InstCombinerImpl::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, KnownBitsComputed = true; break; } - default: { - // Handle target specific intrinsics - Optional<Value *> V = targetSimplifyDemandedUseBitsIntrinsic( - *II, DemandedMask, Known, KnownBitsComputed); - if (V.hasValue()) - return V.getValue(); + default: { + // Handle target specific intrinsics + Optional<Value *> V = targetSimplifyDemandedUseBitsIntrinsic( + *II, DemandedMask, Known, KnownBitsComputed); + if (V.hasValue()) + return V.getValue(); break; } } @@ -816,9 +816,9 @@ Value *InstCombinerImpl::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, /// Helper routine of SimplifyDemandedUseBits. It computes Known /// bits. It also tries to handle simplifications that can be done based on /// DemandedMask, but without modifying the Instruction. -Value *InstCombinerImpl::SimplifyMultipleUseDemandedBits( - Instruction *I, const APInt &DemandedMask, KnownBits &Known, unsigned Depth, - Instruction *CxtI) { +Value *InstCombinerImpl::SimplifyMultipleUseDemandedBits( + Instruction *I, const APInt &DemandedMask, KnownBits &Known, unsigned Depth, + Instruction *CxtI) { unsigned BitWidth = DemandedMask.getBitWidth(); Type *ITy = I->getType(); @@ -903,33 +903,33 @@ Value *InstCombinerImpl::SimplifyMultipleUseDemandedBits( break; } - case Instruction::AShr: { - // Compute the Known bits to simplify things downstream. - computeKnownBits(I, Known, Depth, CxtI); - - // If this user is only demanding bits that we know, return the known - // constant. - if (DemandedMask.isSubsetOf(Known.Zero | Known.One)) - return Constant::getIntegerValue(ITy, Known.One); - - // If the right shift operand 0 is a result of a left shift by the same - // amount, this is probably a zero/sign extension, which may be unnecessary, - // if we do not demand any of the new sign bits. So, return the original - // operand instead. - const APInt *ShiftRC; - const APInt *ShiftLC; - Value *X; - unsigned BitWidth = DemandedMask.getBitWidth(); - if (match(I, - m_AShr(m_Shl(m_Value(X), m_APInt(ShiftLC)), m_APInt(ShiftRC))) && - ShiftLC == ShiftRC && - DemandedMask.isSubsetOf(APInt::getLowBitsSet( - BitWidth, BitWidth - ShiftRC->getZExtValue()))) { - return X; - } - - break; - } + case Instruction::AShr: { + // Compute the Known bits to simplify things downstream. + computeKnownBits(I, Known, Depth, CxtI); + + // If this user is only demanding bits that we know, return the known + // constant. + if (DemandedMask.isSubsetOf(Known.Zero | Known.One)) + return Constant::getIntegerValue(ITy, Known.One); + + // If the right shift operand 0 is a result of a left shift by the same + // amount, this is probably a zero/sign extension, which may be unnecessary, + // if we do not demand any of the new sign bits. So, return the original + // operand instead. + const APInt *ShiftRC; + const APInt *ShiftLC; + Value *X; + unsigned BitWidth = DemandedMask.getBitWidth(); + if (match(I, + m_AShr(m_Shl(m_Value(X), m_APInt(ShiftLC)), m_APInt(ShiftRC))) && + ShiftLC == ShiftRC && + DemandedMask.isSubsetOf(APInt::getLowBitsSet( + BitWidth, BitWidth - ShiftRC->getZExtValue()))) { + return X; + } + + break; + } default: // Compute the Known bits to simplify things downstream. computeKnownBits(I, Known, Depth, CxtI); @@ -962,9 +962,9 @@ Value *InstCombinerImpl::SimplifyMultipleUseDemandedBits( /// /// As with SimplifyDemandedUseBits, it returns NULL if the simplification was /// not successful. -Value *InstCombinerImpl::simplifyShrShlDemandedBits( - Instruction *Shr, const APInt &ShrOp1, Instruction *Shl, - const APInt &ShlOp1, const APInt &DemandedMask, KnownBits &Known) { +Value *InstCombinerImpl::simplifyShrShlDemandedBits( + Instruction *Shr, const APInt &ShrOp1, Instruction *Shl, + const APInt &ShlOp1, const APInt &DemandedMask, KnownBits &Known) { if (!ShlOp1 || !ShrOp1) return nullptr; // No-op. @@ -1025,8 +1025,8 @@ Value *InstCombinerImpl::simplifyShrShlDemandedBits( } /// The specified value produces a vector with any number of elements. -/// This method analyzes which elements of the operand are undef or poison and -/// returns that information in UndefElts. +/// This method analyzes which elements of the operand are undef or poison and +/// returns that information in UndefElts. /// /// DemandedElts contains the set of elements that are actually used by the /// caller, and by default (AllowMultipleUsers equals false) the value is @@ -1037,11 +1037,11 @@ Value *InstCombinerImpl::simplifyShrShlDemandedBits( /// If the information about demanded elements can be used to simplify the /// operation, the operation is simplified, then the resultant value is /// returned. This returns null if no change was made. -Value *InstCombinerImpl::SimplifyDemandedVectorElts(Value *V, - APInt DemandedElts, - APInt &UndefElts, - unsigned Depth, - bool AllowMultipleUsers) { +Value *InstCombinerImpl::SimplifyDemandedVectorElts(Value *V, + APInt DemandedElts, + APInt &UndefElts, + unsigned Depth, + bool AllowMultipleUsers) { // Cannot analyze scalable type. The number of vector elements is not a // compile-time constant. if (isa<ScalableVectorType>(V->getType())) @@ -1052,14 +1052,14 @@ Value *InstCombinerImpl::SimplifyDemandedVectorElts(Value *V, assert((DemandedElts & ~EltMask) == 0 && "Invalid DemandedElts!"); if (isa<UndefValue>(V)) { - // If the entire vector is undef or poison, just return this info. + // If the entire vector is undef or poison, just return this info. UndefElts = EltMask; return nullptr; } - if (DemandedElts.isNullValue()) { // If nothing is demanded, provide poison. + if (DemandedElts.isNullValue()) { // If nothing is demanded, provide poison. UndefElts = EltMask; - return PoisonValue::get(V->getType()); + return PoisonValue::get(V->getType()); } UndefElts = 0; @@ -1071,11 +1071,11 @@ Value *InstCombinerImpl::SimplifyDemandedVectorElts(Value *V, return nullptr; Type *EltTy = cast<VectorType>(V->getType())->getElementType(); - Constant *Poison = PoisonValue::get(EltTy); + Constant *Poison = PoisonValue::get(EltTy); SmallVector<Constant*, 16> Elts; for (unsigned i = 0; i != VWidth; ++i) { - if (!DemandedElts[i]) { // If not demanded, set to poison. - Elts.push_back(Poison); + if (!DemandedElts[i]) { // If not demanded, set to poison. + Elts.push_back(Poison); UndefElts.setBit(i); continue; } @@ -1083,8 +1083,8 @@ Value *InstCombinerImpl::SimplifyDemandedVectorElts(Value *V, Constant *Elt = C->getAggregateElement(i); if (!Elt) return nullptr; - Elts.push_back(Elt); - if (isa<UndefValue>(Elt)) // Already undef or poison. + Elts.push_back(Elt); + if (isa<UndefValue>(Elt)) // Already undef or poison. UndefElts.setBit(i); } @@ -1145,12 +1145,12 @@ Value *InstCombinerImpl::SimplifyDemandedVectorElts(Value *V, }; if (mayIndexStructType(cast<GetElementPtrInst>(*I))) break; - + // Conservatively track the demanded elements back through any vector // operands we may have. We know there must be at least one, or we // wouldn't have a vector result to get here. Note that we intentionally // merge the undef bits here since gepping with either an undef base or - // index results in undef. + // index results in undef. for (unsigned i = 0; i < I->getNumOperands(); i++) { if (isa<UndefValue>(I->getOperand(i))) { // If the entire vector is undefined, just return this info. @@ -1184,19 +1184,19 @@ Value *InstCombinerImpl::SimplifyDemandedVectorElts(Value *V, if (IdxNo < VWidth) PreInsertDemandedElts.clearBit(IdxNo); - // If we only demand the element that is being inserted and that element - // was extracted from the same index in another vector with the same type, - // replace this insert with that other vector. - // Note: This is attempted before the call to simplifyAndSetOp because that - // may change UndefElts to a value that does not match with Vec. - Value *Vec; - if (PreInsertDemandedElts == 0 && - match(I->getOperand(1), - m_ExtractElt(m_Value(Vec), m_SpecificInt(IdxNo))) && - Vec->getType() == I->getType()) { - return Vec; - } - + // If we only demand the element that is being inserted and that element + // was extracted from the same index in another vector with the same type, + // replace this insert with that other vector. + // Note: This is attempted before the call to simplifyAndSetOp because that + // may change UndefElts to a value that does not match with Vec. + Value *Vec; + if (PreInsertDemandedElts == 0 && + match(I->getOperand(1), + m_ExtractElt(m_Value(Vec), m_SpecificInt(IdxNo))) && + Vec->getType() == I->getType()) { + return Vec; + } + simplifyAndSetOp(I, 0, PreInsertDemandedElts, UndefElts); // If this is inserting an element that isn't demanded, remove this @@ -1215,8 +1215,8 @@ Value *InstCombinerImpl::SimplifyDemandedVectorElts(Value *V, assert(Shuffle->getOperand(0)->getType() == Shuffle->getOperand(1)->getType() && "Expected shuffle operands to have same type"); - unsigned OpWidth = cast<FixedVectorType>(Shuffle->getOperand(0)->getType()) - ->getNumElements(); + unsigned OpWidth = cast<FixedVectorType>(Shuffle->getOperand(0)->getType()) + ->getNumElements(); // Handle trivial case of a splat. Only check the first element of LHS // operand. if (all_of(Shuffle->getShuffleMask(), [](int Elt) { return Elt == 0; }) && @@ -1317,8 +1317,8 @@ Value *InstCombinerImpl::SimplifyDemandedVectorElts(Value *V, // this constant vector to single insertelement instruction. // shufflevector V, C, <v1, v2, .., ci, .., vm> -> // insertelement V, C[ci], ci-n - if (OpWidth == - cast<FixedVectorType>(Shuffle->getType())->getNumElements()) { + if (OpWidth == + cast<FixedVectorType>(Shuffle->getType())->getNumElements()) { Value *Op = nullptr; Constant *Value = nullptr; unsigned Idx = -1u; @@ -1405,7 +1405,7 @@ Value *InstCombinerImpl::SimplifyDemandedVectorElts(Value *V, // Vector->vector casts only. VectorType *VTy = dyn_cast<VectorType>(I->getOperand(0)->getType()); if (!VTy) break; - unsigned InVWidth = cast<FixedVectorType>(VTy)->getNumElements(); + unsigned InVWidth = cast<FixedVectorType>(VTy)->getNumElements(); APInt InputDemandedElts(InVWidth, 0); UndefElts2 = APInt(InVWidth, 0); unsigned Ratio; @@ -1488,19 +1488,19 @@ Value *InstCombinerImpl::SimplifyDemandedVectorElts(Value *V, if (II->getIntrinsicID() == Intrinsic::masked_gather) simplifyAndSetOp(II, 0, DemandedPtrs, UndefElts2); simplifyAndSetOp(II, 3, DemandedPassThrough, UndefElts3); - + // Output elements are undefined if the element from both sources are. // TODO: can strengthen via mask as well. UndefElts = UndefElts2 & UndefElts3; break; } default: { - // Handle target specific intrinsics - Optional<Value *> V = targetSimplifyDemandedVectorEltsIntrinsic( - *II, DemandedElts, UndefElts, UndefElts2, UndefElts3, - simplifyAndSetOp); - if (V.hasValue()) - return V.getValue(); + // Handle target specific intrinsics + Optional<Value *> V = targetSimplifyDemandedVectorEltsIntrinsic( + *II, DemandedElts, UndefElts, UndefElts2, UndefElts3, + simplifyAndSetOp); + if (V.hasValue()) + return V.getValue(); break; } } // switch on IntrinsicID diff --git a/contrib/libs/llvm12/lib/Transforms/InstCombine/InstCombineVectorOps.cpp b/contrib/libs/llvm12/lib/Transforms/InstCombine/InstCombineVectorOps.cpp index 06f22cdfb6..c1791dc17e 100644 --- a/contrib/libs/llvm12/lib/Transforms/InstCombine/InstCombineVectorOps.cpp +++ b/contrib/libs/llvm12/lib/Transforms/InstCombine/InstCombineVectorOps.cpp @@ -18,7 +18,7 @@ #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallBitVector.h" #include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/Statistic.h" +#include "llvm/ADT/Statistic.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/VectorUtils.h" #include "llvm/IR/BasicBlock.h" @@ -36,7 +36,7 @@ #include "llvm/Support/Casting.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Transforms/InstCombine/InstCombineWorklist.h" -#include "llvm/Transforms/InstCombine/InstCombiner.h" +#include "llvm/Transforms/InstCombine/InstCombiner.h" #include <cassert> #include <cstdint> #include <iterator> @@ -47,10 +47,10 @@ using namespace PatternMatch; #define DEBUG_TYPE "instcombine" -STATISTIC(NumAggregateReconstructionsSimplified, - "Number of aggregate reconstructions turned into reuse of the " - "original aggregate"); - +STATISTIC(NumAggregateReconstructionsSimplified, + "Number of aggregate reconstructions turned into reuse of the " + "original aggregate"); + /// Return true if the value is cheaper to scalarize than it is to leave as a /// vector operation. IsConstantExtractIndex indicates whether we are extracting /// one known element from a vector constant. @@ -91,8 +91,8 @@ static bool cheapToScalarize(Value *V, bool IsConstantExtractIndex) { // If we have a PHI node with a vector type that is only used to feed // itself and be an operand of extractelement at a constant location, // try to replace the PHI of the vector type with a PHI of a scalar type. -Instruction *InstCombinerImpl::scalarizePHI(ExtractElementInst &EI, - PHINode *PN) { +Instruction *InstCombinerImpl::scalarizePHI(ExtractElementInst &EI, + PHINode *PN) { SmallVector<Instruction *, 2> Extracts; // The users we want the PHI to have are: // 1) The EI ExtractElement (we already know this) @@ -185,19 +185,19 @@ static Instruction *foldBitcastExtElt(ExtractElementInst &Ext, // extelt (bitcast VecX), IndexC --> bitcast X[IndexC] auto *SrcTy = cast<VectorType>(X->getType()); Type *DestTy = Ext.getType(); - ElementCount NumSrcElts = SrcTy->getElementCount(); - ElementCount NumElts = - cast<VectorType>(Ext.getVectorOperandType())->getElementCount(); + ElementCount NumSrcElts = SrcTy->getElementCount(); + ElementCount NumElts = + cast<VectorType>(Ext.getVectorOperandType())->getElementCount(); if (NumSrcElts == NumElts) if (Value *Elt = findScalarElement(X, ExtIndexC)) return new BitCastInst(Elt, DestTy); - assert(NumSrcElts.isScalable() == NumElts.isScalable() && - "Src and Dst must be the same sort of vector type"); - + assert(NumSrcElts.isScalable() == NumElts.isScalable() && + "Src and Dst must be the same sort of vector type"); + // If the source elements are wider than the destination, try to shift and // truncate a subset of scalar bits of an insert op. - if (NumSrcElts.getKnownMinValue() < NumElts.getKnownMinValue()) { + if (NumSrcElts.getKnownMinValue() < NumElts.getKnownMinValue()) { Value *Scalar; uint64_t InsIndexC; if (!match(X, m_InsertElt(m_Value(), m_Value(Scalar), @@ -208,8 +208,8 @@ static Instruction *foldBitcastExtElt(ExtractElementInst &Ext, // into. Example: if we inserted element 1 of a <2 x i64> and we are // extracting an i16 (narrowing ratio = 4), then this extract must be from 1 // of elements 4-7 of the bitcasted vector. - unsigned NarrowingRatio = - NumElts.getKnownMinValue() / NumSrcElts.getKnownMinValue(); + unsigned NarrowingRatio = + NumElts.getKnownMinValue() / NumSrcElts.getKnownMinValue(); if (ExtIndexC / NarrowingRatio != InsIndexC) return nullptr; @@ -271,7 +271,7 @@ static Instruction *foldBitcastExtElt(ExtractElementInst &Ext, /// Find elements of V demanded by UserInstr. static APInt findDemandedEltsBySingleUser(Value *V, Instruction *UserInstr) { - unsigned VWidth = cast<FixedVectorType>(V->getType())->getNumElements(); + unsigned VWidth = cast<FixedVectorType>(V->getType())->getNumElements(); // Conservatively assume that all elements are needed. APInt UsedElts(APInt::getAllOnesValue(VWidth)); @@ -289,7 +289,7 @@ static APInt findDemandedEltsBySingleUser(Value *V, Instruction *UserInstr) { case Instruction::ShuffleVector: { ShuffleVectorInst *Shuffle = cast<ShuffleVectorInst>(UserInstr); unsigned MaskNumElts = - cast<FixedVectorType>(UserInstr->getType())->getNumElements(); + cast<FixedVectorType>(UserInstr->getType())->getNumElements(); UsedElts = APInt(VWidth, 0); for (unsigned i = 0; i < MaskNumElts; i++) { @@ -315,7 +315,7 @@ static APInt findDemandedEltsBySingleUser(Value *V, Instruction *UserInstr) { /// no user demands an element of V, then the corresponding bit /// remains unset in the returned value. static APInt findDemandedEltsByAllUsers(Value *V) { - unsigned VWidth = cast<FixedVectorType>(V->getType())->getNumElements(); + unsigned VWidth = cast<FixedVectorType>(V->getType())->getNumElements(); APInt UnionUsedElts(VWidth, 0); for (const Use &U : V->uses()) { @@ -333,7 +333,7 @@ static APInt findDemandedEltsByAllUsers(Value *V) { return UnionUsedElts; } -Instruction *InstCombinerImpl::visitExtractElementInst(ExtractElementInst &EI) { +Instruction *InstCombinerImpl::visitExtractElementInst(ExtractElementInst &EI) { Value *SrcVec = EI.getVectorOperand(); Value *Index = EI.getIndexOperand(); if (Value *V = SimplifyExtractElementInst(SrcVec, Index, @@ -345,17 +345,17 @@ Instruction *InstCombinerImpl::visitExtractElementInst(ExtractElementInst &EI) { auto *IndexC = dyn_cast<ConstantInt>(Index); if (IndexC) { ElementCount EC = EI.getVectorOperandType()->getElementCount(); - unsigned NumElts = EC.getKnownMinValue(); + unsigned NumElts = EC.getKnownMinValue(); // InstSimplify should handle cases where the index is invalid. // For fixed-length vector, it's invalid to extract out-of-range element. - if (!EC.isScalable() && IndexC->getValue().uge(NumElts)) + if (!EC.isScalable() && IndexC->getValue().uge(NumElts)) return nullptr; // This instruction only demands the single element from the input vector. // Skip for scalable type, the number of elements is unknown at // compile-time. - if (!EC.isScalable() && NumElts != 1) { + if (!EC.isScalable() && NumElts != 1) { // If the input vector has a single use, simplify it based on this use // property. if (SrcVec->hasOneUse()) { @@ -472,7 +472,7 @@ static bool collectSingleShuffleElements(Value *V, Value *LHS, Value *RHS, SmallVectorImpl<int> &Mask) { assert(LHS->getType() == RHS->getType() && "Invalid CollectSingleShuffleElements"); - unsigned NumElts = cast<FixedVectorType>(V->getType())->getNumElements(); + unsigned NumElts = cast<FixedVectorType>(V->getType())->getNumElements(); if (isa<UndefValue>(V)) { Mask.assign(NumElts, -1); @@ -514,7 +514,7 @@ static bool collectSingleShuffleElements(Value *V, Value *LHS, Value *RHS, unsigned ExtractedIdx = cast<ConstantInt>(EI->getOperand(1))->getZExtValue(); unsigned NumLHSElts = - cast<FixedVectorType>(LHS->getType())->getNumElements(); + cast<FixedVectorType>(LHS->getType())->getNumElements(); // This must be extracting from either LHS or RHS. if (EI->getOperand(0) == LHS || EI->getOperand(0) == RHS) { @@ -543,9 +543,9 @@ static bool collectSingleShuffleElements(Value *V, Value *LHS, Value *RHS, /// shufflevector to replace one or more insert/extract pairs. static void replaceExtractElements(InsertElementInst *InsElt, ExtractElementInst *ExtElt, - InstCombinerImpl &IC) { - auto *InsVecType = cast<FixedVectorType>(InsElt->getType()); - auto *ExtVecType = cast<FixedVectorType>(ExtElt->getVectorOperandType()); + InstCombinerImpl &IC) { + auto *InsVecType = cast<FixedVectorType>(InsElt->getType()); + auto *ExtVecType = cast<FixedVectorType>(ExtElt->getVectorOperandType()); unsigned NumInsElts = InsVecType->getNumElements(); unsigned NumExtElts = ExtVecType->getNumElements(); @@ -626,7 +626,7 @@ using ShuffleOps = std::pair<Value *, Value *>; static ShuffleOps collectShuffleElements(Value *V, SmallVectorImpl<int> &Mask, Value *PermittedRHS, - InstCombinerImpl &IC) { + InstCombinerImpl &IC) { assert(V->getType()->isVectorTy() && "Invalid shuffle!"); unsigned NumElts = cast<FixedVectorType>(V->getType())->getNumElements(); @@ -673,7 +673,7 @@ static ShuffleOps collectShuffleElements(Value *V, SmallVectorImpl<int> &Mask, } unsigned NumLHSElts = - cast<FixedVectorType>(RHS->getType())->getNumElements(); + cast<FixedVectorType>(RHS->getType())->getNumElements(); Mask[InsertedIdx % NumElts] = NumLHSElts + ExtractedIdx; return std::make_pair(LR.first, RHS); } @@ -682,8 +682,8 @@ static ShuffleOps collectShuffleElements(Value *V, SmallVectorImpl<int> &Mask, // We've gone as far as we can: anything on the other side of the // extractelement will already have been converted into a shuffle. unsigned NumLHSElts = - cast<FixedVectorType>(EI->getOperand(0)->getType()) - ->getNumElements(); + cast<FixedVectorType>(EI->getOperand(0)->getType()) + ->getNumElements(); for (unsigned i = 0; i != NumElts; ++i) Mask.push_back(i == InsertedIdx ? ExtractedIdx : NumLHSElts + i); return std::make_pair(EI->getOperand(0), PermittedRHS); @@ -705,285 +705,285 @@ static ShuffleOps collectShuffleElements(Value *V, SmallVectorImpl<int> &Mask, return std::make_pair(V, nullptr); } -/// Look for chain of insertvalue's that fully define an aggregate, and trace -/// back the values inserted, see if they are all were extractvalue'd from -/// the same source aggregate from the exact same element indexes. -/// If they were, just reuse the source aggregate. -/// This potentially deals with PHI indirections. -Instruction *InstCombinerImpl::foldAggregateConstructionIntoAggregateReuse( - InsertValueInst &OrigIVI) { - Type *AggTy = OrigIVI.getType(); - unsigned NumAggElts; - switch (AggTy->getTypeID()) { - case Type::StructTyID: - NumAggElts = AggTy->getStructNumElements(); - break; - case Type::ArrayTyID: - NumAggElts = AggTy->getArrayNumElements(); - break; - default: - llvm_unreachable("Unhandled aggregate type?"); - } - - // Arbitrary aggregate size cut-off. Motivation for limit of 2 is to be able - // to handle clang C++ exception struct (which is hardcoded as {i8*, i32}), - // FIXME: any interesting patterns to be caught with larger limit? - assert(NumAggElts > 0 && "Aggregate should have elements."); - if (NumAggElts > 2) - return nullptr; - - static constexpr auto NotFound = None; - static constexpr auto FoundMismatch = nullptr; - - // Try to find a value of each element of an aggregate. - // FIXME: deal with more complex, not one-dimensional, aggregate types - SmallVector<Optional<Value *>, 2> AggElts(NumAggElts, NotFound); - - // Do we know values for each element of the aggregate? - auto KnowAllElts = [&AggElts]() { - return all_of(AggElts, - [](Optional<Value *> Elt) { return Elt != NotFound; }); - }; - - int Depth = 0; - - // Arbitrary `insertvalue` visitation depth limit. Let's be okay with - // every element being overwritten twice, which should never happen. - static const int DepthLimit = 2 * NumAggElts; - - // Recurse up the chain of `insertvalue` aggregate operands until either we've - // reconstructed full initializer or can't visit any more `insertvalue`'s. - for (InsertValueInst *CurrIVI = &OrigIVI; - Depth < DepthLimit && CurrIVI && !KnowAllElts(); - CurrIVI = dyn_cast<InsertValueInst>(CurrIVI->getAggregateOperand()), - ++Depth) { - Value *InsertedValue = CurrIVI->getInsertedValueOperand(); - ArrayRef<unsigned int> Indices = CurrIVI->getIndices(); - - // Don't bother with more than single-level aggregates. - if (Indices.size() != 1) - return nullptr; // FIXME: deal with more complex aggregates? - - // Now, we may have already previously recorded the value for this element - // of an aggregate. If we did, that means the CurrIVI will later be - // overwritten with the already-recorded value. But if not, let's record it! - Optional<Value *> &Elt = AggElts[Indices.front()]; - Elt = Elt.getValueOr(InsertedValue); - - // FIXME: should we handle chain-terminating undef base operand? - } - - // Was that sufficient to deduce the full initializer for the aggregate? - if (!KnowAllElts()) - return nullptr; // Give up then. - - // We now want to find the source[s] of the aggregate elements we've found. - // And with "source" we mean the original aggregate[s] from which - // the inserted elements were extracted. This may require PHI translation. - - enum class AggregateDescription { - /// When analyzing the value that was inserted into an aggregate, we did - /// not manage to find defining `extractvalue` instruction to analyze. - NotFound, - /// When analyzing the value that was inserted into an aggregate, we did - /// manage to find defining `extractvalue` instruction[s], and everything - /// matched perfectly - aggregate type, element insertion/extraction index. - Found, - /// When analyzing the value that was inserted into an aggregate, we did - /// manage to find defining `extractvalue` instruction, but there was - /// a mismatch: either the source type from which the extraction was didn't - /// match the aggregate type into which the insertion was, - /// or the extraction/insertion channels mismatched, - /// or different elements had different source aggregates. - FoundMismatch - }; - auto Describe = [](Optional<Value *> SourceAggregate) { - if (SourceAggregate == NotFound) - return AggregateDescription::NotFound; - if (*SourceAggregate == FoundMismatch) - return AggregateDescription::FoundMismatch; - return AggregateDescription::Found; - }; - - // Given the value \p Elt that was being inserted into element \p EltIdx of an - // aggregate AggTy, see if \p Elt was originally defined by an - // appropriate extractvalue (same element index, same aggregate type). - // If found, return the source aggregate from which the extraction was. - // If \p PredBB is provided, does PHI translation of an \p Elt first. - auto FindSourceAggregate = - [&](Value *Elt, unsigned EltIdx, Optional<BasicBlock *> UseBB, - Optional<BasicBlock *> PredBB) -> Optional<Value *> { - // For now(?), only deal with, at most, a single level of PHI indirection. - if (UseBB && PredBB) - Elt = Elt->DoPHITranslation(*UseBB, *PredBB); - // FIXME: deal with multiple levels of PHI indirection? - - // Did we find an extraction? - auto *EVI = dyn_cast<ExtractValueInst>(Elt); - if (!EVI) - return NotFound; - - Value *SourceAggregate = EVI->getAggregateOperand(); - - // Is the extraction from the same type into which the insertion was? - if (SourceAggregate->getType() != AggTy) - return FoundMismatch; - // And the element index doesn't change between extraction and insertion? - if (EVI->getNumIndices() != 1 || EltIdx != EVI->getIndices().front()) - return FoundMismatch; - - return SourceAggregate; // AggregateDescription::Found - }; - - // Given elements AggElts that were constructing an aggregate OrigIVI, - // see if we can find appropriate source aggregate for each of the elements, - // and see it's the same aggregate for each element. If so, return it. - auto FindCommonSourceAggregate = - [&](Optional<BasicBlock *> UseBB, - Optional<BasicBlock *> PredBB) -> Optional<Value *> { - Optional<Value *> SourceAggregate; - - for (auto I : enumerate(AggElts)) { - assert(Describe(SourceAggregate) != AggregateDescription::FoundMismatch && - "We don't store nullptr in SourceAggregate!"); - assert((Describe(SourceAggregate) == AggregateDescription::Found) == - (I.index() != 0) && - "SourceAggregate should be valid after the the first element,"); - - // For this element, is there a plausible source aggregate? - // FIXME: we could special-case undef element, IFF we know that in the - // source aggregate said element isn't poison. - Optional<Value *> SourceAggregateForElement = - FindSourceAggregate(*I.value(), I.index(), UseBB, PredBB); - - // Okay, what have we found? Does that correlate with previous findings? - - // Regardless of whether or not we have previously found source - // aggregate for previous elements (if any), if we didn't find one for - // this element, passthrough whatever we have just found. - if (Describe(SourceAggregateForElement) != AggregateDescription::Found) - return SourceAggregateForElement; - - // Okay, we have found source aggregate for this element. - // Let's see what we already know from previous elements, if any. - switch (Describe(SourceAggregate)) { - case AggregateDescription::NotFound: - // This is apparently the first element that we have examined. - SourceAggregate = SourceAggregateForElement; // Record the aggregate! - continue; // Great, now look at next element. - case AggregateDescription::Found: - // We have previously already successfully examined other elements. - // Is this the same source aggregate we've found for other elements? - if (*SourceAggregateForElement != *SourceAggregate) - return FoundMismatch; - continue; // Still the same aggregate, look at next element. - case AggregateDescription::FoundMismatch: - llvm_unreachable("Can't happen. We would have early-exited then."); - }; - } - - assert(Describe(SourceAggregate) == AggregateDescription::Found && - "Must be a valid Value"); - return *SourceAggregate; - }; - - Optional<Value *> SourceAggregate; - - // Can we find the source aggregate without looking at predecessors? - SourceAggregate = FindCommonSourceAggregate(/*UseBB=*/None, /*PredBB=*/None); - if (Describe(SourceAggregate) != AggregateDescription::NotFound) { - if (Describe(SourceAggregate) == AggregateDescription::FoundMismatch) - return nullptr; // Conflicting source aggregates! - ++NumAggregateReconstructionsSimplified; - return replaceInstUsesWith(OrigIVI, *SourceAggregate); - } - - // Okay, apparently we need to look at predecessors. - - // We should be smart about picking the "use" basic block, which will be the - // merge point for aggregate, where we'll insert the final PHI that will be - // used instead of OrigIVI. Basic block of OrigIVI is *not* the right choice. - // We should look in which blocks each of the AggElts is being defined, - // they all should be defined in the same basic block. - BasicBlock *UseBB = nullptr; - - for (const Optional<Value *> &Elt : AggElts) { - // If this element's value was not defined by an instruction, ignore it. - auto *I = dyn_cast<Instruction>(*Elt); - if (!I) - continue; - // Otherwise, in which basic block is this instruction located? - BasicBlock *BB = I->getParent(); - // If it's the first instruction we've encountered, record the basic block. - if (!UseBB) { - UseBB = BB; - continue; - } - // Otherwise, this must be the same basic block we've seen previously. - if (UseBB != BB) - return nullptr; - } - - // If *all* of the elements are basic-block-independent, meaning they are - // either function arguments, or constant expressions, then if we didn't - // handle them without predecessor-aware handling, we won't handle them now. - if (!UseBB) - return nullptr; - - // If we didn't manage to find source aggregate without looking at - // predecessors, and there are no predecessors to look at, then we're done. - if (pred_empty(UseBB)) - return nullptr; - - // Arbitrary predecessor count limit. - static const int PredCountLimit = 64; - - // Cache the (non-uniqified!) list of predecessors in a vector, - // checking the limit at the same time for efficiency. - SmallVector<BasicBlock *, 4> Preds; // May have duplicates! - for (BasicBlock *Pred : predecessors(UseBB)) { - // Don't bother if there are too many predecessors. - if (Preds.size() >= PredCountLimit) // FIXME: only count duplicates once? - return nullptr; - Preds.emplace_back(Pred); - } - - // For each predecessor, what is the source aggregate, - // from which all the elements were originally extracted from? - // Note that we want for the map to have stable iteration order! - SmallDenseMap<BasicBlock *, Value *, 4> SourceAggregates; - for (BasicBlock *Pred : Preds) { - std::pair<decltype(SourceAggregates)::iterator, bool> IV = - SourceAggregates.insert({Pred, nullptr}); - // Did we already evaluate this predecessor? - if (!IV.second) - continue; - - // Let's hope that when coming from predecessor Pred, all elements of the - // aggregate produced by OrigIVI must have been originally extracted from - // the same aggregate. Is that so? Can we find said original aggregate? - SourceAggregate = FindCommonSourceAggregate(UseBB, Pred); - if (Describe(SourceAggregate) != AggregateDescription::Found) - return nullptr; // Give up. - IV.first->second = *SourceAggregate; - } - - // All good! Now we just need to thread the source aggregates here. - // Note that we have to insert the new PHI here, ourselves, because we can't - // rely on InstCombinerImpl::run() inserting it into the right basic block. - // Note that the same block can be a predecessor more than once, - // and we need to preserve that invariant for the PHI node. - BuilderTy::InsertPointGuard Guard(Builder); - Builder.SetInsertPoint(UseBB->getFirstNonPHI()); - auto *PHI = - Builder.CreatePHI(AggTy, Preds.size(), OrigIVI.getName() + ".merged"); - for (BasicBlock *Pred : Preds) - PHI->addIncoming(SourceAggregates[Pred], Pred); - - ++NumAggregateReconstructionsSimplified; - return replaceInstUsesWith(OrigIVI, PHI); -} - +/// Look for chain of insertvalue's that fully define an aggregate, and trace +/// back the values inserted, see if they are all were extractvalue'd from +/// the same source aggregate from the exact same element indexes. +/// If they were, just reuse the source aggregate. +/// This potentially deals with PHI indirections. +Instruction *InstCombinerImpl::foldAggregateConstructionIntoAggregateReuse( + InsertValueInst &OrigIVI) { + Type *AggTy = OrigIVI.getType(); + unsigned NumAggElts; + switch (AggTy->getTypeID()) { + case Type::StructTyID: + NumAggElts = AggTy->getStructNumElements(); + break; + case Type::ArrayTyID: + NumAggElts = AggTy->getArrayNumElements(); + break; + default: + llvm_unreachable("Unhandled aggregate type?"); + } + + // Arbitrary aggregate size cut-off. Motivation for limit of 2 is to be able + // to handle clang C++ exception struct (which is hardcoded as {i8*, i32}), + // FIXME: any interesting patterns to be caught with larger limit? + assert(NumAggElts > 0 && "Aggregate should have elements."); + if (NumAggElts > 2) + return nullptr; + + static constexpr auto NotFound = None; + static constexpr auto FoundMismatch = nullptr; + + // Try to find a value of each element of an aggregate. + // FIXME: deal with more complex, not one-dimensional, aggregate types + SmallVector<Optional<Value *>, 2> AggElts(NumAggElts, NotFound); + + // Do we know values for each element of the aggregate? + auto KnowAllElts = [&AggElts]() { + return all_of(AggElts, + [](Optional<Value *> Elt) { return Elt != NotFound; }); + }; + + int Depth = 0; + + // Arbitrary `insertvalue` visitation depth limit. Let's be okay with + // every element being overwritten twice, which should never happen. + static const int DepthLimit = 2 * NumAggElts; + + // Recurse up the chain of `insertvalue` aggregate operands until either we've + // reconstructed full initializer or can't visit any more `insertvalue`'s. + for (InsertValueInst *CurrIVI = &OrigIVI; + Depth < DepthLimit && CurrIVI && !KnowAllElts(); + CurrIVI = dyn_cast<InsertValueInst>(CurrIVI->getAggregateOperand()), + ++Depth) { + Value *InsertedValue = CurrIVI->getInsertedValueOperand(); + ArrayRef<unsigned int> Indices = CurrIVI->getIndices(); + + // Don't bother with more than single-level aggregates. + if (Indices.size() != 1) + return nullptr; // FIXME: deal with more complex aggregates? + + // Now, we may have already previously recorded the value for this element + // of an aggregate. If we did, that means the CurrIVI will later be + // overwritten with the already-recorded value. But if not, let's record it! + Optional<Value *> &Elt = AggElts[Indices.front()]; + Elt = Elt.getValueOr(InsertedValue); + + // FIXME: should we handle chain-terminating undef base operand? + } + + // Was that sufficient to deduce the full initializer for the aggregate? + if (!KnowAllElts()) + return nullptr; // Give up then. + + // We now want to find the source[s] of the aggregate elements we've found. + // And with "source" we mean the original aggregate[s] from which + // the inserted elements were extracted. This may require PHI translation. + + enum class AggregateDescription { + /// When analyzing the value that was inserted into an aggregate, we did + /// not manage to find defining `extractvalue` instruction to analyze. + NotFound, + /// When analyzing the value that was inserted into an aggregate, we did + /// manage to find defining `extractvalue` instruction[s], and everything + /// matched perfectly - aggregate type, element insertion/extraction index. + Found, + /// When analyzing the value that was inserted into an aggregate, we did + /// manage to find defining `extractvalue` instruction, but there was + /// a mismatch: either the source type from which the extraction was didn't + /// match the aggregate type into which the insertion was, + /// or the extraction/insertion channels mismatched, + /// or different elements had different source aggregates. + FoundMismatch + }; + auto Describe = [](Optional<Value *> SourceAggregate) { + if (SourceAggregate == NotFound) + return AggregateDescription::NotFound; + if (*SourceAggregate == FoundMismatch) + return AggregateDescription::FoundMismatch; + return AggregateDescription::Found; + }; + + // Given the value \p Elt that was being inserted into element \p EltIdx of an + // aggregate AggTy, see if \p Elt was originally defined by an + // appropriate extractvalue (same element index, same aggregate type). + // If found, return the source aggregate from which the extraction was. + // If \p PredBB is provided, does PHI translation of an \p Elt first. + auto FindSourceAggregate = + [&](Value *Elt, unsigned EltIdx, Optional<BasicBlock *> UseBB, + Optional<BasicBlock *> PredBB) -> Optional<Value *> { + // For now(?), only deal with, at most, a single level of PHI indirection. + if (UseBB && PredBB) + Elt = Elt->DoPHITranslation(*UseBB, *PredBB); + // FIXME: deal with multiple levels of PHI indirection? + + // Did we find an extraction? + auto *EVI = dyn_cast<ExtractValueInst>(Elt); + if (!EVI) + return NotFound; + + Value *SourceAggregate = EVI->getAggregateOperand(); + + // Is the extraction from the same type into which the insertion was? + if (SourceAggregate->getType() != AggTy) + return FoundMismatch; + // And the element index doesn't change between extraction and insertion? + if (EVI->getNumIndices() != 1 || EltIdx != EVI->getIndices().front()) + return FoundMismatch; + + return SourceAggregate; // AggregateDescription::Found + }; + + // Given elements AggElts that were constructing an aggregate OrigIVI, + // see if we can find appropriate source aggregate for each of the elements, + // and see it's the same aggregate for each element. If so, return it. + auto FindCommonSourceAggregate = + [&](Optional<BasicBlock *> UseBB, + Optional<BasicBlock *> PredBB) -> Optional<Value *> { + Optional<Value *> SourceAggregate; + + for (auto I : enumerate(AggElts)) { + assert(Describe(SourceAggregate) != AggregateDescription::FoundMismatch && + "We don't store nullptr in SourceAggregate!"); + assert((Describe(SourceAggregate) == AggregateDescription::Found) == + (I.index() != 0) && + "SourceAggregate should be valid after the the first element,"); + + // For this element, is there a plausible source aggregate? + // FIXME: we could special-case undef element, IFF we know that in the + // source aggregate said element isn't poison. + Optional<Value *> SourceAggregateForElement = + FindSourceAggregate(*I.value(), I.index(), UseBB, PredBB); + + // Okay, what have we found? Does that correlate with previous findings? + + // Regardless of whether or not we have previously found source + // aggregate for previous elements (if any), if we didn't find one for + // this element, passthrough whatever we have just found. + if (Describe(SourceAggregateForElement) != AggregateDescription::Found) + return SourceAggregateForElement; + + // Okay, we have found source aggregate for this element. + // Let's see what we already know from previous elements, if any. + switch (Describe(SourceAggregate)) { + case AggregateDescription::NotFound: + // This is apparently the first element that we have examined. + SourceAggregate = SourceAggregateForElement; // Record the aggregate! + continue; // Great, now look at next element. + case AggregateDescription::Found: + // We have previously already successfully examined other elements. + // Is this the same source aggregate we've found for other elements? + if (*SourceAggregateForElement != *SourceAggregate) + return FoundMismatch; + continue; // Still the same aggregate, look at next element. + case AggregateDescription::FoundMismatch: + llvm_unreachable("Can't happen. We would have early-exited then."); + }; + } + + assert(Describe(SourceAggregate) == AggregateDescription::Found && + "Must be a valid Value"); + return *SourceAggregate; + }; + + Optional<Value *> SourceAggregate; + + // Can we find the source aggregate without looking at predecessors? + SourceAggregate = FindCommonSourceAggregate(/*UseBB=*/None, /*PredBB=*/None); + if (Describe(SourceAggregate) != AggregateDescription::NotFound) { + if (Describe(SourceAggregate) == AggregateDescription::FoundMismatch) + return nullptr; // Conflicting source aggregates! + ++NumAggregateReconstructionsSimplified; + return replaceInstUsesWith(OrigIVI, *SourceAggregate); + } + + // Okay, apparently we need to look at predecessors. + + // We should be smart about picking the "use" basic block, which will be the + // merge point for aggregate, where we'll insert the final PHI that will be + // used instead of OrigIVI. Basic block of OrigIVI is *not* the right choice. + // We should look in which blocks each of the AggElts is being defined, + // they all should be defined in the same basic block. + BasicBlock *UseBB = nullptr; + + for (const Optional<Value *> &Elt : AggElts) { + // If this element's value was not defined by an instruction, ignore it. + auto *I = dyn_cast<Instruction>(*Elt); + if (!I) + continue; + // Otherwise, in which basic block is this instruction located? + BasicBlock *BB = I->getParent(); + // If it's the first instruction we've encountered, record the basic block. + if (!UseBB) { + UseBB = BB; + continue; + } + // Otherwise, this must be the same basic block we've seen previously. + if (UseBB != BB) + return nullptr; + } + + // If *all* of the elements are basic-block-independent, meaning they are + // either function arguments, or constant expressions, then if we didn't + // handle them without predecessor-aware handling, we won't handle them now. + if (!UseBB) + return nullptr; + + // If we didn't manage to find source aggregate without looking at + // predecessors, and there are no predecessors to look at, then we're done. + if (pred_empty(UseBB)) + return nullptr; + + // Arbitrary predecessor count limit. + static const int PredCountLimit = 64; + + // Cache the (non-uniqified!) list of predecessors in a vector, + // checking the limit at the same time for efficiency. + SmallVector<BasicBlock *, 4> Preds; // May have duplicates! + for (BasicBlock *Pred : predecessors(UseBB)) { + // Don't bother if there are too many predecessors. + if (Preds.size() >= PredCountLimit) // FIXME: only count duplicates once? + return nullptr; + Preds.emplace_back(Pred); + } + + // For each predecessor, what is the source aggregate, + // from which all the elements were originally extracted from? + // Note that we want for the map to have stable iteration order! + SmallDenseMap<BasicBlock *, Value *, 4> SourceAggregates; + for (BasicBlock *Pred : Preds) { + std::pair<decltype(SourceAggregates)::iterator, bool> IV = + SourceAggregates.insert({Pred, nullptr}); + // Did we already evaluate this predecessor? + if (!IV.second) + continue; + + // Let's hope that when coming from predecessor Pred, all elements of the + // aggregate produced by OrigIVI must have been originally extracted from + // the same aggregate. Is that so? Can we find said original aggregate? + SourceAggregate = FindCommonSourceAggregate(UseBB, Pred); + if (Describe(SourceAggregate) != AggregateDescription::Found) + return nullptr; // Give up. + IV.first->second = *SourceAggregate; + } + + // All good! Now we just need to thread the source aggregates here. + // Note that we have to insert the new PHI here, ourselves, because we can't + // rely on InstCombinerImpl::run() inserting it into the right basic block. + // Note that the same block can be a predecessor more than once, + // and we need to preserve that invariant for the PHI node. + BuilderTy::InsertPointGuard Guard(Builder); + Builder.SetInsertPoint(UseBB->getFirstNonPHI()); + auto *PHI = + Builder.CreatePHI(AggTy, Preds.size(), OrigIVI.getName() + ".merged"); + for (BasicBlock *Pred : Preds) + PHI->addIncoming(SourceAggregates[Pred], Pred); + + ++NumAggregateReconstructionsSimplified; + return replaceInstUsesWith(OrigIVI, PHI); +} + /// Try to find redundant insertvalue instructions, like the following ones: /// %0 = insertvalue { i8, i32 } undef, i8 %x, 0 /// %1 = insertvalue { i8, i32 } %0, i8 %y, 0 @@ -991,7 +991,7 @@ Instruction *InstCombinerImpl::foldAggregateConstructionIntoAggregateReuse( /// first one, making the first one redundant. /// It should be transformed to: /// %0 = insertvalue { i8, i32 } undef, i8 %y, 0 -Instruction *InstCombinerImpl::visitInsertValueInst(InsertValueInst &I) { +Instruction *InstCombinerImpl::visitInsertValueInst(InsertValueInst &I) { bool IsRedundant = false; ArrayRef<unsigned int> FirstIndices = I.getIndices(); @@ -1016,10 +1016,10 @@ Instruction *InstCombinerImpl::visitInsertValueInst(InsertValueInst &I) { if (IsRedundant) return replaceInstUsesWith(I, I.getOperand(0)); - - if (Instruction *NewI = foldAggregateConstructionIntoAggregateReuse(I)) - return NewI; - + + if (Instruction *NewI = foldAggregateConstructionIntoAggregateReuse(I)) + return NewI; + return nullptr; } @@ -1150,8 +1150,8 @@ static Instruction *foldInsEltIntoSplat(InsertElementInst &InsElt) { // For example: // inselt (shuf (inselt undef, X, 0), undef, <0,undef,0,undef>), X, 1 // --> shuf (inselt undef, X, 0), undef, <0,0,0,undef> - unsigned NumMaskElts = - cast<FixedVectorType>(Shuf->getType())->getNumElements(); + unsigned NumMaskElts = + cast<FixedVectorType>(Shuf->getType())->getNumElements(); SmallVector<int, 16> NewMask(NumMaskElts); for (unsigned i = 0; i != NumMaskElts; ++i) NewMask[i] = i == IdxC ? 0 : Shuf->getMaskValue(i); @@ -1189,8 +1189,8 @@ static Instruction *foldInsEltIntoIdentityShuffle(InsertElementInst &InsElt) { // that same index value. // For example: // inselt (shuf X, IdMask), (extelt X, IdxC), IdxC --> shuf X, IdMask' - unsigned NumMaskElts = - cast<FixedVectorType>(Shuf->getType())->getNumElements(); + unsigned NumMaskElts = + cast<FixedVectorType>(Shuf->getType())->getNumElements(); SmallVector<int, 16> NewMask(NumMaskElts); ArrayRef<int> OldMask = Shuf->getShuffleMask(); for (unsigned i = 0; i != NumMaskElts; ++i) { @@ -1339,7 +1339,7 @@ static Instruction *foldConstantInsEltIntoShuffle(InsertElementInst &InsElt) { return nullptr; } -Instruction *InstCombinerImpl::visitInsertElementInst(InsertElementInst &IE) { +Instruction *InstCombinerImpl::visitInsertElementInst(InsertElementInst &IE) { Value *VecOp = IE.getOperand(0); Value *ScalarOp = IE.getOperand(1); Value *IdxOp = IE.getOperand(2); @@ -1487,7 +1487,7 @@ static bool canEvaluateShuffled(Value *V, ArrayRef<int> Mask, // Propagating an undefined shuffle mask element to integer div/rem is not // allowed because those opcodes can create immediate undefined behavior // from an undefined element in an operand. - if (llvm::is_contained(Mask, -1)) + if (llvm::is_contained(Mask, -1)) return false; LLVM_FALLTHROUGH; case Instruction::Add: @@ -1520,7 +1520,7 @@ static bool canEvaluateShuffled(Value *V, ArrayRef<int> Mask, // longer vector ops, but that may result in more expensive codegen. Type *ITy = I->getType(); if (ITy->isVectorTy() && - Mask.size() > cast<FixedVectorType>(ITy)->getNumElements()) + Mask.size() > cast<FixedVectorType>(ITy)->getNumElements()) return false; for (Value *Operand : I->operands()) { if (!canEvaluateShuffled(Operand, Mask, Depth - 1)) @@ -1678,8 +1678,8 @@ static Value *evaluateInDifferentElementOrder(Value *V, ArrayRef<int> Mask) { case Instruction::GetElementPtr: { SmallVector<Value*, 8> NewOps; bool NeedsRebuild = - (Mask.size() != - cast<FixedVectorType>(I->getType())->getNumElements()); + (Mask.size() != + cast<FixedVectorType>(I->getType())->getNumElements()); for (int i = 0, e = I->getNumOperands(); i != e; ++i) { Value *V; // Recursively call evaluateInDifferentElementOrder on vector arguments @@ -1734,7 +1734,7 @@ static Value *evaluateInDifferentElementOrder(Value *V, ArrayRef<int> Mask) { static bool isShuffleExtractingFromLHS(ShuffleVectorInst &SVI, ArrayRef<int> Mask) { unsigned LHSElems = - cast<FixedVectorType>(SVI.getOperand(0)->getType())->getNumElements(); + cast<FixedVectorType>(SVI.getOperand(0)->getType())->getNumElements(); unsigned MaskElems = Mask.size(); unsigned BegIdx = Mask.front(); unsigned EndIdx = Mask.back(); @@ -1824,7 +1824,7 @@ static Instruction *foldSelectShuffleWith1Binop(ShuffleVectorInst &Shuf) { is_contained(Mask, UndefMaskElem) && (Instruction::isIntDivRem(BOpcode) || Instruction::isShift(BOpcode)); if (MightCreatePoisonOrUB) - NewC = InstCombiner::getSafeVectorConstantForBinop(BOpcode, NewC, true); + NewC = InstCombiner::getSafeVectorConstantForBinop(BOpcode, NewC, true); // shuf (bop X, C), X, M --> bop X, C' // shuf X, (bop X, C), M --> bop X, C' @@ -1866,8 +1866,8 @@ static Instruction *canonicalizeInsertSplat(ShuffleVectorInst &Shuf, // For example: // shuf (inselt undef, X, 2), undef, <2,2,undef> // --> shuf (inselt undef, X, 0), undef, <0,0,undef> - unsigned NumMaskElts = - cast<FixedVectorType>(Shuf.getType())->getNumElements(); + unsigned NumMaskElts = + cast<FixedVectorType>(Shuf.getType())->getNumElements(); SmallVector<int, 16> NewMask(NumMaskElts, 0); for (unsigned i = 0; i != NumMaskElts; ++i) if (Mask[i] == UndefMaskElem) @@ -1885,7 +1885,7 @@ static Instruction *foldSelectShuffle(ShuffleVectorInst &Shuf, // Canonicalize to choose from operand 0 first unless operand 1 is undefined. // Commuting undef to operand 0 conflicts with another canonicalization. - unsigned NumElts = cast<FixedVectorType>(Shuf.getType())->getNumElements(); + unsigned NumElts = cast<FixedVectorType>(Shuf.getType())->getNumElements(); if (!isa<UndefValue>(Shuf.getOperand(1)) && Shuf.getMaskValue(0) >= (int)NumElts) { // TODO: Can we assert that both operands of a shuffle-select are not undef @@ -1952,8 +1952,8 @@ static Instruction *foldSelectShuffle(ShuffleVectorInst &Shuf, is_contained(Mask, UndefMaskElem) && (Instruction::isIntDivRem(BOpc) || Instruction::isShift(BOpc)); if (MightCreatePoisonOrUB) - NewC = InstCombiner::getSafeVectorConstantForBinop(BOpc, NewC, - ConstantsAreOp1); + NewC = InstCombiner::getSafeVectorConstantForBinop(BOpc, NewC, + ConstantsAreOp1); Value *V; if (X == Y) { @@ -2020,8 +2020,8 @@ static Instruction *foldTruncShuffle(ShuffleVectorInst &Shuf, // and the source element type must be larger than the shuffle element type. Type *SrcType = X->getType(); if (!SrcType->isVectorTy() || !SrcType->isIntOrIntVectorTy() || - cast<FixedVectorType>(SrcType)->getNumElements() != - cast<FixedVectorType>(DestType)->getNumElements() || + cast<FixedVectorType>(SrcType)->getNumElements() != + cast<FixedVectorType>(DestType)->getNumElements() || SrcType->getScalarSizeInBits() % DestType->getScalarSizeInBits() != 0) return nullptr; @@ -2037,7 +2037,7 @@ static Instruction *foldTruncShuffle(ShuffleVectorInst &Shuf, if (Mask[i] == UndefMaskElem) continue; uint64_t LSBIndex = IsBigEndian ? (i + 1) * TruncRatio - 1 : i * TruncRatio; - assert(LSBIndex <= INT32_MAX && "Overflowed 32-bits"); + assert(LSBIndex <= INT32_MAX && "Overflowed 32-bits"); if (Mask[i] != (int)LSBIndex) return nullptr; } @@ -2064,19 +2064,19 @@ static Instruction *narrowVectorSelect(ShuffleVectorInst &Shuf, // We need a narrow condition value. It must be extended with undef elements // and have the same number of elements as this shuffle. - unsigned NarrowNumElts = - cast<FixedVectorType>(Shuf.getType())->getNumElements(); + unsigned NarrowNumElts = + cast<FixedVectorType>(Shuf.getType())->getNumElements(); Value *NarrowCond; if (!match(Cond, m_OneUse(m_Shuffle(m_Value(NarrowCond), m_Undef()))) || - cast<FixedVectorType>(NarrowCond->getType())->getNumElements() != + cast<FixedVectorType>(NarrowCond->getType())->getNumElements() != NarrowNumElts || !cast<ShuffleVectorInst>(Cond)->isIdentityWithPadding()) return nullptr; // shuf (sel (shuf NarrowCond, undef, WideMask), X, Y), undef, NarrowMask) --> // sel NarrowCond, (shuf X, undef, NarrowMask), (shuf Y, undef, NarrowMask) - Value *NarrowX = Builder.CreateShuffleVector(X, Shuf.getShuffleMask()); - Value *NarrowY = Builder.CreateShuffleVector(Y, Shuf.getShuffleMask()); + Value *NarrowX = Builder.CreateShuffleVector(X, Shuf.getShuffleMask()); + Value *NarrowY = Builder.CreateShuffleVector(Y, Shuf.getShuffleMask()); return SelectInst::Create(NarrowCond, NarrowX, NarrowY); } @@ -2107,7 +2107,7 @@ static Instruction *foldIdentityExtractShuffle(ShuffleVectorInst &Shuf) { // new shuffle mask. Otherwise, copy the original mask element. Example: // shuf (shuf X, Y, <C0, C1, C2, undef, C4>), undef, <0, undef, 2, 3> --> // shuf X, Y, <C0, undef, C2, undef> - unsigned NumElts = cast<FixedVectorType>(Shuf.getType())->getNumElements(); + unsigned NumElts = cast<FixedVectorType>(Shuf.getType())->getNumElements(); SmallVector<int, 16> NewMask(NumElts); assert(NumElts < Mask.size() && "Identity with extract must have less elements than its inputs"); @@ -2123,7 +2123,7 @@ static Instruction *foldIdentityExtractShuffle(ShuffleVectorInst &Shuf) { /// Try to replace a shuffle with an insertelement or try to replace a shuffle /// operand with the operand of an insertelement. static Instruction *foldShuffleWithInsert(ShuffleVectorInst &Shuf, - InstCombinerImpl &IC) { + InstCombinerImpl &IC) { Value *V0 = Shuf.getOperand(0), *V1 = Shuf.getOperand(1); SmallVector<int, 16> Mask; Shuf.getShuffleMask(Mask); @@ -2132,7 +2132,7 @@ static Instruction *foldShuffleWithInsert(ShuffleVectorInst &Shuf, // TODO: This restriction could be removed if the insert has only one use // (because the transform would require a new length-changing shuffle). int NumElts = Mask.size(); - if (NumElts != (int)(cast<FixedVectorType>(V0->getType())->getNumElements())) + if (NumElts != (int)(cast<FixedVectorType>(V0->getType())->getNumElements())) return nullptr; // This is a specialization of a fold in SimplifyDemandedVectorElts. We may @@ -2144,7 +2144,7 @@ static Instruction *foldShuffleWithInsert(ShuffleVectorInst &Shuf, uint64_t IdxC; if (match(V0, m_InsertElt(m_Value(X), m_Value(), m_ConstantInt(IdxC)))) { // shuf (inselt X, ?, IdxC), ?, Mask --> shuf X, ?, Mask - if (!is_contained(Mask, (int)IdxC)) + if (!is_contained(Mask, (int)IdxC)) return IC.replaceOperand(Shuf, 0, X); } if (match(V1, m_InsertElt(m_Value(X), m_Value(), m_ConstantInt(IdxC)))) { @@ -2152,7 +2152,7 @@ static Instruction *foldShuffleWithInsert(ShuffleVectorInst &Shuf, // accesses to the 2nd vector input of the shuffle. IdxC += NumElts; // shuf ?, (inselt X, ?, IdxC), Mask --> shuf ?, X, Mask - if (!is_contained(Mask, (int)IdxC)) + if (!is_contained(Mask, (int)IdxC)) return IC.replaceOperand(Shuf, 1, X); } @@ -2227,10 +2227,10 @@ static Instruction *foldIdentityPaddedShuffles(ShuffleVectorInst &Shuf) { Value *X = Shuffle0->getOperand(0); Value *Y = Shuffle1->getOperand(0); if (X->getType() != Y->getType() || - !isPowerOf2_32(cast<FixedVectorType>(Shuf.getType())->getNumElements()) || - !isPowerOf2_32( - cast<FixedVectorType>(Shuffle0->getType())->getNumElements()) || - !isPowerOf2_32(cast<FixedVectorType>(X->getType())->getNumElements()) || + !isPowerOf2_32(cast<FixedVectorType>(Shuf.getType())->getNumElements()) || + !isPowerOf2_32( + cast<FixedVectorType>(Shuffle0->getType())->getNumElements()) || + !isPowerOf2_32(cast<FixedVectorType>(X->getType())->getNumElements()) || isa<UndefValue>(X) || isa<UndefValue>(Y)) return nullptr; assert(isa<UndefValue>(Shuffle0->getOperand(1)) && @@ -2241,8 +2241,8 @@ static Instruction *foldIdentityPaddedShuffles(ShuffleVectorInst &Shuf) { // operands directly by adjusting the shuffle mask to account for the narrower // types: // shuf (widen X), (widen Y), Mask --> shuf X, Y, Mask' - int NarrowElts = cast<FixedVectorType>(X->getType())->getNumElements(); - int WideElts = cast<FixedVectorType>(Shuffle0->getType())->getNumElements(); + int NarrowElts = cast<FixedVectorType>(X->getType())->getNumElements(); + int WideElts = cast<FixedVectorType>(Shuffle0->getType())->getNumElements(); assert(WideElts > NarrowElts && "Unexpected types for identity with padding"); ArrayRef<int> Mask = Shuf.getShuffleMask(); @@ -2275,7 +2275,7 @@ static Instruction *foldIdentityPaddedShuffles(ShuffleVectorInst &Shuf) { return new ShuffleVectorInst(X, Y, NewMask); } -Instruction *InstCombinerImpl::visitShuffleVectorInst(ShuffleVectorInst &SVI) { +Instruction *InstCombinerImpl::visitShuffleVectorInst(ShuffleVectorInst &SVI) { Value *LHS = SVI.getOperand(0); Value *RHS = SVI.getOperand(1); SimplifyQuery ShufQuery = SQ.getWithInstruction(&SVI); @@ -2283,13 +2283,13 @@ Instruction *InstCombinerImpl::visitShuffleVectorInst(ShuffleVectorInst &SVI) { SVI.getType(), ShufQuery)) return replaceInstUsesWith(SVI, V); - // Bail out for scalable vectors - if (isa<ScalableVectorType>(LHS->getType())) - return nullptr; - + // Bail out for scalable vectors + if (isa<ScalableVectorType>(LHS->getType())) + return nullptr; + // shuffle x, x, mask --> shuffle x, undef, mask' - unsigned VWidth = cast<FixedVectorType>(SVI.getType())->getNumElements(); - unsigned LHSWidth = cast<FixedVectorType>(LHS->getType())->getNumElements(); + unsigned VWidth = cast<FixedVectorType>(SVI.getType())->getNumElements(); + unsigned LHSWidth = cast<FixedVectorType>(LHS->getType())->getNumElements(); ArrayRef<int> Mask = SVI.getShuffleMask(); Type *Int32Ty = Type::getInt32Ty(SVI.getContext()); @@ -2303,7 +2303,7 @@ Instruction *InstCombinerImpl::visitShuffleVectorInst(ShuffleVectorInst &SVI) { if (match(LHS, m_BitCast(m_Value(X))) && match(RHS, m_Undef()) && X->getType()->isVectorTy() && VWidth == LHSWidth) { // Try to create a scaled mask constant. - auto *XType = cast<FixedVectorType>(X->getType()); + auto *XType = cast<FixedVectorType>(X->getType()); unsigned XNumElts = XType->getNumElements(); SmallVector<int, 16> ScaledMask; if (XNumElts >= VWidth) { @@ -2411,7 +2411,7 @@ Instruction *InstCombinerImpl::visitShuffleVectorInst(ShuffleVectorInst &SVI) { if (isShuffleExtractingFromLHS(SVI, Mask)) { Value *V = LHS; unsigned MaskElems = Mask.size(); - auto *SrcTy = cast<FixedVectorType>(V->getType()); + auto *SrcTy = cast<FixedVectorType>(V->getType()); unsigned VecBitWidth = SrcTy->getPrimitiveSizeInBits().getFixedSize(); unsigned SrcElemBitWidth = DL.getTypeSizeInBits(SrcTy->getElementType()); assert(SrcElemBitWidth && "vector elements must have a bitwidth"); @@ -2443,7 +2443,7 @@ Instruction *InstCombinerImpl::visitShuffleVectorInst(ShuffleVectorInst &SVI) { SmallVector<int, 16> ShuffleMask(SrcNumElems, -1); for (unsigned I = 0, E = MaskElems, Idx = BegIdx; I != E; ++Idx, ++I) ShuffleMask[I] = Idx; - V = Builder.CreateShuffleVector(V, ShuffleMask, + V = Builder.CreateShuffleVector(V, ShuffleMask, SVI.getName() + ".extract"); BegIdx = 0; } @@ -2528,11 +2528,11 @@ Instruction *InstCombinerImpl::visitShuffleVectorInst(ShuffleVectorInst &SVI) { if (LHSShuffle) { LHSOp0 = LHSShuffle->getOperand(0); LHSOp1 = LHSShuffle->getOperand(1); - LHSOp0Width = cast<FixedVectorType>(LHSOp0->getType())->getNumElements(); + LHSOp0Width = cast<FixedVectorType>(LHSOp0->getType())->getNumElements(); } if (RHSShuffle) { RHSOp0 = RHSShuffle->getOperand(0); - RHSOp0Width = cast<FixedVectorType>(RHSOp0->getType())->getNumElements(); + RHSOp0Width = cast<FixedVectorType>(RHSOp0->getType())->getNumElements(); } Value* newLHS = LHS; Value* newRHS = RHS; @@ -2637,7 +2637,7 @@ Instruction *InstCombinerImpl::visitShuffleVectorInst(ShuffleVectorInst &SVI) { if (isSplat || newMask == LHSMask || newMask == RHSMask || newMask == Mask) { if (!newRHS) newRHS = UndefValue::get(newLHS->getType()); - return new ShuffleVectorInst(newLHS, newRHS, newMask); + return new ShuffleVectorInst(newLHS, newRHS, newMask); } return MadeChange ? &SVI : nullptr; diff --git a/contrib/libs/llvm12/lib/Transforms/InstCombine/InstructionCombining.cpp b/contrib/libs/llvm12/lib/Transforms/InstCombine/InstructionCombining.cpp index 828fd49524..5e2c9a3e54 100644 --- a/contrib/libs/llvm12/lib/Transforms/InstCombine/InstructionCombining.cpp +++ b/contrib/libs/llvm12/lib/Transforms/InstCombine/InstructionCombining.cpp @@ -59,7 +59,7 @@ #include "llvm/Analysis/ProfileSummaryInfo.h" #include "llvm/Analysis/TargetFolder.h" #include "llvm/Analysis/TargetLibraryInfo.h" -#include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/Analysis/VectorUtils.h" #include "llvm/IR/BasicBlock.h" @@ -114,9 +114,9 @@ using namespace llvm::PatternMatch; #define DEBUG_TYPE "instcombine" -STATISTIC(NumWorklistIterations, - "Number of instruction combining iterations performed"); - +STATISTIC(NumWorklistIterations, + "Number of instruction combining iterations performed"); + STATISTIC(NumCombined , "Number of insts combined"); STATISTIC(NumConstProp, "Number of constant folds"); STATISTIC(NumDeadInst , "Number of dead inst eliminated"); @@ -127,13 +127,13 @@ STATISTIC(NumReassoc , "Number of reassociations"); DEBUG_COUNTER(VisitCounter, "instcombine-visit", "Controls which instructions are visited"); -// FIXME: these limits eventually should be as low as 2. +// FIXME: these limits eventually should be as low as 2. static constexpr unsigned InstCombineDefaultMaxIterations = 1000; -#ifndef NDEBUG -static constexpr unsigned InstCombineDefaultInfiniteLoopThreshold = 100; -#else +#ifndef NDEBUG +static constexpr unsigned InstCombineDefaultInfiniteLoopThreshold = 100; +#else static constexpr unsigned InstCombineDefaultInfiniteLoopThreshold = 1000; -#endif +#endif static cl::opt<bool> EnableCodeSinking("instcombine-code-sinking", cl::desc("Enable code sinking"), @@ -164,41 +164,41 @@ MaxArraySize("instcombine-maxarray-size", cl::init(1024), static cl::opt<unsigned> ShouldLowerDbgDeclare("instcombine-lower-dbg-declare", cl::Hidden, cl::init(true)); -Optional<Instruction *> -InstCombiner::targetInstCombineIntrinsic(IntrinsicInst &II) { - // Handle target specific intrinsics - if (II.getCalledFunction()->isTargetIntrinsic()) { - return TTI.instCombineIntrinsic(*this, II); - } - return None; -} - -Optional<Value *> InstCombiner::targetSimplifyDemandedUseBitsIntrinsic( - IntrinsicInst &II, APInt DemandedMask, KnownBits &Known, - bool &KnownBitsComputed) { - // Handle target specific intrinsics - if (II.getCalledFunction()->isTargetIntrinsic()) { - return TTI.simplifyDemandedUseBitsIntrinsic(*this, II, DemandedMask, Known, - KnownBitsComputed); - } - return None; -} - -Optional<Value *> InstCombiner::targetSimplifyDemandedVectorEltsIntrinsic( - IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts, APInt &UndefElts2, - APInt &UndefElts3, - std::function<void(Instruction *, unsigned, APInt, APInt &)> - SimplifyAndSetOp) { - // Handle target specific intrinsics - if (II.getCalledFunction()->isTargetIntrinsic()) { - return TTI.simplifyDemandedVectorEltsIntrinsic( - *this, II, DemandedElts, UndefElts, UndefElts2, UndefElts3, - SimplifyAndSetOp); - } - return None; -} - -Value *InstCombinerImpl::EmitGEPOffset(User *GEP) { +Optional<Instruction *> +InstCombiner::targetInstCombineIntrinsic(IntrinsicInst &II) { + // Handle target specific intrinsics + if (II.getCalledFunction()->isTargetIntrinsic()) { + return TTI.instCombineIntrinsic(*this, II); + } + return None; +} + +Optional<Value *> InstCombiner::targetSimplifyDemandedUseBitsIntrinsic( + IntrinsicInst &II, APInt DemandedMask, KnownBits &Known, + bool &KnownBitsComputed) { + // Handle target specific intrinsics + if (II.getCalledFunction()->isTargetIntrinsic()) { + return TTI.simplifyDemandedUseBitsIntrinsic(*this, II, DemandedMask, Known, + KnownBitsComputed); + } + return None; +} + +Optional<Value *> InstCombiner::targetSimplifyDemandedVectorEltsIntrinsic( + IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts, APInt &UndefElts2, + APInt &UndefElts3, + std::function<void(Instruction *, unsigned, APInt, APInt &)> + SimplifyAndSetOp) { + // Handle target specific intrinsics + if (II.getCalledFunction()->isTargetIntrinsic()) { + return TTI.simplifyDemandedVectorEltsIntrinsic( + *this, II, DemandedElts, UndefElts, UndefElts2, UndefElts3, + SimplifyAndSetOp); + } + return None; +} + +Value *InstCombinerImpl::EmitGEPOffset(User *GEP) { return llvm::EmitGEPOffset(&Builder, DL, GEP); } @@ -211,8 +211,8 @@ Value *InstCombinerImpl::EmitGEPOffset(User *GEP) { /// legal to convert to, in order to open up more combining opportunities. /// NOTE: this treats i8, i16 and i32 specially, due to them being so common /// from frontend languages. -bool InstCombinerImpl::shouldChangeType(unsigned FromWidth, - unsigned ToWidth) const { +bool InstCombinerImpl::shouldChangeType(unsigned FromWidth, + unsigned ToWidth) const { bool FromLegal = FromWidth == 1 || DL.isLegalInteger(FromWidth); bool ToLegal = ToWidth == 1 || DL.isLegalInteger(ToWidth); @@ -239,7 +239,7 @@ bool InstCombinerImpl::shouldChangeType(unsigned FromWidth, /// to a larger illegal type. i1 is always treated as a legal type because it is /// a fundamental type in IR, and there are many specialized optimizations for /// i1 types. -bool InstCombinerImpl::shouldChangeType(Type *From, Type *To) const { +bool InstCombinerImpl::shouldChangeType(Type *From, Type *To) const { // TODO: This could be extended to allow vectors. Datalayout changes might be // needed to properly support that. if (!From->isIntegerTy() || !To->isIntegerTy()) @@ -307,8 +307,8 @@ static void ClearSubclassDataAfterReassociation(BinaryOperator &I) { /// cast to eliminate one of the associative operations: /// (op (cast (op X, C2)), C1) --> (cast (op X, op (C1, C2))) /// (op (cast (op X, C2)), C1) --> (op (cast X), op (C1, C2)) -static bool simplifyAssocCastAssoc(BinaryOperator *BinOp1, - InstCombinerImpl &IC) { +static bool simplifyAssocCastAssoc(BinaryOperator *BinOp1, + InstCombinerImpl &IC) { auto *Cast = dyn_cast<CastInst>(BinOp1->getOperand(0)); if (!Cast || !Cast->hasOneUse()) return false; @@ -366,7 +366,7 @@ static bool simplifyAssocCastAssoc(BinaryOperator *BinOp1, /// 5. Transform: "A op (B op C)" ==> "B op (C op A)" if "C op A" simplifies. /// 6. Transform: "(A op C1) op (B op C2)" ==> "(A op B) op (C1 op C2)" /// if C1 and C2 are constants. -bool InstCombinerImpl::SimplifyAssociativeOrCommutative(BinaryOperator &I) { +bool InstCombinerImpl::SimplifyAssociativeOrCommutative(BinaryOperator &I) { Instruction::BinaryOps Opcode = I.getOpcode(); bool Changed = false; @@ -594,10 +594,10 @@ getBinOpsForFactorization(Instruction::BinaryOps TopOpcode, BinaryOperator *Op, /// This tries to simplify binary operations by factorizing out common terms /// (e. g. "(A*B)+(A*C)" -> "A*(B+C)"). -Value *InstCombinerImpl::tryFactorization(BinaryOperator &I, - Instruction::BinaryOps InnerOpcode, - Value *A, Value *B, Value *C, - Value *D) { +Value *InstCombinerImpl::tryFactorization(BinaryOperator &I, + Instruction::BinaryOps InnerOpcode, + Value *A, Value *B, Value *C, + Value *D) { assert(A && B && C && D && "All values must be provided"); Value *V = nullptr; @@ -700,7 +700,7 @@ Value *InstCombinerImpl::tryFactorization(BinaryOperator &I, /// (eg "(A*B)+(A*C)" -> "A*(B+C)") or expanding out if this results in /// simplifications (eg: "A & (B | C) -> (A&B) | (A&C)" if this is a win). /// Returns the simplified value, or null if it didn't simplify. -Value *InstCombinerImpl::SimplifyUsingDistributiveLaws(BinaryOperator &I) { +Value *InstCombinerImpl::SimplifyUsingDistributiveLaws(BinaryOperator &I) { Value *LHS = I.getOperand(0), *RHS = I.getOperand(1); BinaryOperator *Op0 = dyn_cast<BinaryOperator>(LHS); BinaryOperator *Op1 = dyn_cast<BinaryOperator>(RHS); @@ -743,10 +743,10 @@ Value *InstCombinerImpl::SimplifyUsingDistributiveLaws(BinaryOperator &I) { Value *A = Op0->getOperand(0), *B = Op0->getOperand(1), *C = RHS; Instruction::BinaryOps InnerOpcode = Op0->getOpcode(); // op' - // Disable the use of undef because it's not safe to distribute undef. - auto SQDistributive = SQ.getWithInstruction(&I).getWithoutUndef(); - Value *L = SimplifyBinOp(TopLevelOpcode, A, C, SQDistributive); - Value *R = SimplifyBinOp(TopLevelOpcode, B, C, SQDistributive); + // Disable the use of undef because it's not safe to distribute undef. + auto SQDistributive = SQ.getWithInstruction(&I).getWithoutUndef(); + Value *L = SimplifyBinOp(TopLevelOpcode, A, C, SQDistributive); + Value *R = SimplifyBinOp(TopLevelOpcode, B, C, SQDistributive); // Do "A op C" and "B op C" both simplify? if (L && R) { @@ -782,10 +782,10 @@ Value *InstCombinerImpl::SimplifyUsingDistributiveLaws(BinaryOperator &I) { Value *A = LHS, *B = Op1->getOperand(0), *C = Op1->getOperand(1); Instruction::BinaryOps InnerOpcode = Op1->getOpcode(); // op' - // Disable the use of undef because it's not safe to distribute undef. - auto SQDistributive = SQ.getWithInstruction(&I).getWithoutUndef(); - Value *L = SimplifyBinOp(TopLevelOpcode, A, B, SQDistributive); - Value *R = SimplifyBinOp(TopLevelOpcode, A, C, SQDistributive); + // Disable the use of undef because it's not safe to distribute undef. + auto SQDistributive = SQ.getWithInstruction(&I).getWithoutUndef(); + Value *L = SimplifyBinOp(TopLevelOpcode, A, B, SQDistributive); + Value *R = SimplifyBinOp(TopLevelOpcode, A, C, SQDistributive); // Do "A op B" and "A op C" both simplify? if (L && R) { @@ -818,9 +818,9 @@ Value *InstCombinerImpl::SimplifyUsingDistributiveLaws(BinaryOperator &I) { return SimplifySelectsFeedingBinaryOp(I, LHS, RHS); } -Value *InstCombinerImpl::SimplifySelectsFeedingBinaryOp(BinaryOperator &I, - Value *LHS, - Value *RHS) { +Value *InstCombinerImpl::SimplifySelectsFeedingBinaryOp(BinaryOperator &I, + Value *LHS, + Value *RHS) { Value *A, *B, *C, *D, *E, *F; bool LHSIsSelect = match(LHS, m_Select(m_Value(A), m_Value(B), m_Value(C))); bool RHSIsSelect = match(RHS, m_Select(m_Value(D), m_Value(E), m_Value(F))); @@ -870,33 +870,33 @@ Value *InstCombinerImpl::SimplifySelectsFeedingBinaryOp(BinaryOperator &I, return SI; } -/// Freely adapt every user of V as-if V was changed to !V. -/// WARNING: only if canFreelyInvertAllUsersOf() said this can be done. -void InstCombinerImpl::freelyInvertAllUsersOf(Value *I) { - for (User *U : I->users()) { - switch (cast<Instruction>(U)->getOpcode()) { - case Instruction::Select: { - auto *SI = cast<SelectInst>(U); - SI->swapValues(); - SI->swapProfMetadata(); - break; - } - case Instruction::Br: - cast<BranchInst>(U)->swapSuccessors(); // swaps prof metadata too - break; - case Instruction::Xor: - replaceInstUsesWith(cast<Instruction>(*U), I); - break; - default: - llvm_unreachable("Got unexpected user - out of sync with " - "canFreelyInvertAllUsersOf() ?"); - } - } -} - +/// Freely adapt every user of V as-if V was changed to !V. +/// WARNING: only if canFreelyInvertAllUsersOf() said this can be done. +void InstCombinerImpl::freelyInvertAllUsersOf(Value *I) { + for (User *U : I->users()) { + switch (cast<Instruction>(U)->getOpcode()) { + case Instruction::Select: { + auto *SI = cast<SelectInst>(U); + SI->swapValues(); + SI->swapProfMetadata(); + break; + } + case Instruction::Br: + cast<BranchInst>(U)->swapSuccessors(); // swaps prof metadata too + break; + case Instruction::Xor: + replaceInstUsesWith(cast<Instruction>(*U), I); + break; + default: + llvm_unreachable("Got unexpected user - out of sync with " + "canFreelyInvertAllUsersOf() ?"); + } + } +} + /// Given a 'sub' instruction, return the RHS of the instruction if the LHS is a /// constant zero (which is the 'negate' form). -Value *InstCombinerImpl::dyn_castNegVal(Value *V) const { +Value *InstCombinerImpl::dyn_castNegVal(Value *V) const { Value *NegV; if (match(V, m_Neg(m_Value(NegV)))) return NegV; @@ -957,8 +957,8 @@ static Value *foldOperationIntoSelectOperand(Instruction &I, Value *SO, return RI; } -Instruction *InstCombinerImpl::FoldOpIntoSelect(Instruction &Op, - SelectInst *SI) { +Instruction *InstCombinerImpl::FoldOpIntoSelect(Instruction &Op, + SelectInst *SI) { // Don't modify shared select instructions. if (!SI->hasOneUse()) return nullptr; @@ -983,7 +983,7 @@ Instruction *InstCombinerImpl::FoldOpIntoSelect(Instruction &Op, return nullptr; // If vectors, verify that they have the same number of elements. - if (SrcTy && SrcTy->getElementCount() != DestTy->getElementCount()) + if (SrcTy && SrcTy->getElementCount() != DestTy->getElementCount()) return nullptr; } @@ -1053,7 +1053,7 @@ static Value *foldOperationIntoPhiValue(BinaryOperator *I, Value *InV, return RI; } -Instruction *InstCombinerImpl::foldOpIntoPhi(Instruction &I, PHINode *PN) { +Instruction *InstCombinerImpl::foldOpIntoPhi(Instruction &I, PHINode *PN) { unsigned NumPHIValues = PN->getNumIncomingValues(); if (NumPHIValues == 0) return nullptr; @@ -1079,9 +1079,9 @@ Instruction *InstCombinerImpl::foldOpIntoPhi(Instruction &I, PHINode *PN) { BasicBlock *NonConstBB = nullptr; for (unsigned i = 0; i != NumPHIValues; ++i) { Value *InVal = PN->getIncomingValue(i); - // If I is a freeze instruction, count undef as a non-constant. - if (match(InVal, m_ImmConstant()) && - (!isa<FreezeInst>(I) || isGuaranteedNotToBeUndefOrPoison(InVal))) + // If I is a freeze instruction, count undef as a non-constant. + if (match(InVal, m_ImmConstant()) && + (!isa<FreezeInst>(I) || isGuaranteedNotToBeUndefOrPoison(InVal))) continue; if (isa<PHINode>(InVal)) return nullptr; // Itself a phi. @@ -1106,11 +1106,11 @@ Instruction *InstCombinerImpl::foldOpIntoPhi(Instruction &I, PHINode *PN) { // operation in that block. However, if this is a critical edge, we would be // inserting the computation on some other paths (e.g. inside a loop). Only // do this if the pred block is unconditionally branching into the phi block. - // Also, make sure that the pred block is not dead code. + // Also, make sure that the pred block is not dead code. if (NonConstBB != nullptr) { BranchInst *BI = dyn_cast<BranchInst>(NonConstBB->getTerminator()); - if (!BI || !BI->isUnconditional() || !DT.isReachableFromEntry(NonConstBB)) - return nullptr; + if (!BI || !BI->isUnconditional() || !DT.isReachableFromEntry(NonConstBB)) + return nullptr; } // Okay, we can do the transformation: create the new PHI node. @@ -1142,7 +1142,7 @@ Instruction *InstCombinerImpl::foldOpIntoPhi(Instruction &I, PHINode *PN) { // FalseVInPred versus TrueVInPred. When we have individual nonzero // elements in the vector, we will incorrectly fold InC to // `TrueVInPred`. - if (InC && isa<ConstantInt>(InC)) + if (InC && isa<ConstantInt>(InC)) InV = InC->isNullValue() ? FalseVInPred : TrueVInPred; else { // Generate the select in the same block as PN's current incoming block. @@ -1176,15 +1176,15 @@ Instruction *InstCombinerImpl::foldOpIntoPhi(Instruction &I, PHINode *PN) { Builder); NewPN->addIncoming(InV, PN->getIncomingBlock(i)); } - } else if (isa<FreezeInst>(&I)) { - for (unsigned i = 0; i != NumPHIValues; ++i) { - Value *InV; - if (NonConstBB == PN->getIncomingBlock(i)) - InV = Builder.CreateFreeze(PN->getIncomingValue(i), "phi.fr"); - else - InV = PN->getIncomingValue(i); - NewPN->addIncoming(InV, PN->getIncomingBlock(i)); - } + } else if (isa<FreezeInst>(&I)) { + for (unsigned i = 0; i != NumPHIValues; ++i) { + Value *InV; + if (NonConstBB == PN->getIncomingBlock(i)) + InV = Builder.CreateFreeze(PN->getIncomingValue(i), "phi.fr"); + else + InV = PN->getIncomingValue(i); + NewPN->addIncoming(InV, PN->getIncomingBlock(i)); + } } else { CastInst *CI = cast<CastInst>(&I); Type *RetTy = CI->getType(); @@ -1199,8 +1199,8 @@ Instruction *InstCombinerImpl::foldOpIntoPhi(Instruction &I, PHINode *PN) { } } - for (User *U : make_early_inc_range(PN->users())) { - Instruction *User = cast<Instruction>(U); + for (User *U : make_early_inc_range(PN->users())) { + Instruction *User = cast<Instruction>(U); if (User == &I) continue; replaceInstUsesWith(*User, NewPN); eraseInstFromFunction(*User); @@ -1208,7 +1208,7 @@ Instruction *InstCombinerImpl::foldOpIntoPhi(Instruction &I, PHINode *PN) { return replaceInstUsesWith(I, NewPN); } -Instruction *InstCombinerImpl::foldBinOpIntoSelectOrPhi(BinaryOperator &I) { +Instruction *InstCombinerImpl::foldBinOpIntoSelectOrPhi(BinaryOperator &I) { if (!isa<Constant>(I.getOperand(1))) return nullptr; @@ -1226,9 +1226,9 @@ Instruction *InstCombinerImpl::foldBinOpIntoSelectOrPhi(BinaryOperator &I) { /// is a sequence of GEP indices into the pointed type that will land us at the /// specified offset. If so, fill them into NewIndices and return the resultant /// element type, otherwise return null. -Type * -InstCombinerImpl::FindElementAtOffset(PointerType *PtrTy, int64_t Offset, - SmallVectorImpl<Value *> &NewIndices) { +Type * +InstCombinerImpl::FindElementAtOffset(PointerType *PtrTy, int64_t Offset, + SmallVectorImpl<Value *> &NewIndices) { Type *Ty = PtrTy->getElementType(); if (!Ty->isSized()) return nullptr; @@ -1297,7 +1297,7 @@ static bool shouldMergeGEPs(GEPOperator &GEP, GEPOperator &Src) { /// Return a value X such that Val = X * Scale, or null if none. /// If the multiplication is known not to overflow, then NoSignedWrap is set. -Value *InstCombinerImpl::Descale(Value *Val, APInt Scale, bool &NoSignedWrap) { +Value *InstCombinerImpl::Descale(Value *Val, APInt Scale, bool &NoSignedWrap) { assert(isa<IntegerType>(Val->getType()) && "Can only descale integers!"); assert(cast<IntegerType>(Val->getType())->getBitWidth() == Scale.getBitWidth() && "Scale not compatible with value!"); @@ -1537,8 +1537,8 @@ Value *InstCombinerImpl::Descale(Value *Val, APInt Scale, bool &NoSignedWrap) { } while (true); } -Instruction *InstCombinerImpl::foldVectorBinop(BinaryOperator &Inst) { - if (!isa<VectorType>(Inst.getType())) +Instruction *InstCombinerImpl::foldVectorBinop(BinaryOperator &Inst) { + if (!isa<VectorType>(Inst.getType())) return nullptr; BinaryOperator::BinaryOps Opcode = Inst.getOpcode(); @@ -1628,14 +1628,14 @@ Instruction *InstCombinerImpl::foldVectorBinop(BinaryOperator &Inst) { // other binops, so they can be folded. It may also enable demanded elements // transforms. Constant *C; - auto *InstVTy = dyn_cast<FixedVectorType>(Inst.getType()); - if (InstVTy && - match(&Inst, + auto *InstVTy = dyn_cast<FixedVectorType>(Inst.getType()); + if (InstVTy && + match(&Inst, m_c_BinOp(m_OneUse(m_Shuffle(m_Value(V1), m_Undef(), m_Mask(Mask))), - m_ImmConstant(C))) && - cast<FixedVectorType>(V1->getType())->getNumElements() <= - InstVTy->getNumElements()) { - assert(InstVTy->getScalarType() == V1->getType()->getScalarType() && + m_ImmConstant(C))) && + cast<FixedVectorType>(V1->getType())->getNumElements() <= + InstVTy->getNumElements()) { + assert(InstVTy->getScalarType() == V1->getType()->getScalarType() && "Shuffle should not change scalar type"); // Find constant NewC that has property: @@ -1650,7 +1650,7 @@ Instruction *InstCombinerImpl::foldVectorBinop(BinaryOperator &Inst) { UndefValue *UndefScalar = UndefValue::get(C->getType()->getScalarType()); SmallVector<Constant *, 16> NewVecC(SrcVecNumElts, UndefScalar); bool MayChange = true; - unsigned NumElts = InstVTy->getNumElements(); + unsigned NumElts = InstVTy->getNumElements(); for (unsigned I = 0; I < NumElts; ++I) { Constant *CElt = C->getAggregateElement(I); if (ShMask[I] >= 0) { @@ -1740,7 +1740,7 @@ Instruction *InstCombinerImpl::foldVectorBinop(BinaryOperator &Inst) { // bo (splat X), (bo Y, OtherOp) --> bo (splat (bo X, Y)), OtherOp Value *NewBO = Builder.CreateBinOp(Opcode, X, Y); SmallVector<int, 8> NewMask(MaskC.size(), SplatIndex); - Value *NewSplat = Builder.CreateShuffleVector(NewBO, NewMask); + Value *NewSplat = Builder.CreateShuffleVector(NewBO, NewMask); Instruction *R = BinaryOperator::Create(Opcode, NewSplat, OtherOp); // Intersect FMF on both new binops. Other (poison-generating) flags are @@ -1760,7 +1760,7 @@ Instruction *InstCombinerImpl::foldVectorBinop(BinaryOperator &Inst) { /// Try to narrow the width of a binop if at least 1 operand is an extend of /// of a value. This requires a potentially expensive known bits check to make /// sure the narrow op does not overflow. -Instruction *InstCombinerImpl::narrowMathIfNoOverflow(BinaryOperator &BO) { +Instruction *InstCombinerImpl::narrowMathIfNoOverflow(BinaryOperator &BO) { // We need at least one extended operand. Value *Op0 = BO.getOperand(0), *Op1 = BO.getOperand(1); @@ -1840,7 +1840,7 @@ static Instruction *foldSelectGEP(GetElementPtrInst &GEP, // gep (select Cond, TrueC, FalseC), IndexC --> select Cond, TrueC', FalseC' // Propagate 'inbounds' and metadata from existing instructions. // Note: using IRBuilder to create the constants for efficiency. - SmallVector<Value *, 4> IndexC(GEP.indices()); + SmallVector<Value *, 4> IndexC(GEP.indices()); bool IsInBounds = GEP.isInBounds(); Value *NewTrueC = IsInBounds ? Builder.CreateInBoundsGEP(TrueC, IndexC) : Builder.CreateGEP(TrueC, IndexC); @@ -1849,8 +1849,8 @@ static Instruction *foldSelectGEP(GetElementPtrInst &GEP, return SelectInst::Create(Cond, NewTrueC, NewFalseC, "", nullptr, Sel); } -Instruction *InstCombinerImpl::visitGetElementPtrInst(GetElementPtrInst &GEP) { - SmallVector<Value *, 8> Ops(GEP.operands()); +Instruction *InstCombinerImpl::visitGetElementPtrInst(GetElementPtrInst &GEP) { + SmallVector<Value *, 8> Ops(GEP.operands()); Type *GEPType = GEP.getType(); Type *GEPEltType = GEP.getSourceElementType(); bool IsGEPSrcEleScalable = isa<ScalableVectorType>(GEPEltType); @@ -2220,7 +2220,7 @@ Instruction *InstCombinerImpl::visitGetElementPtrInst(GetElementPtrInst &GEP) { // GEP (bitcast i8* X to [0 x i8]*), i32 0, ... ? if (CATy->getElementType() == StrippedPtrEltTy) { // -> GEP i8* X, ... - SmallVector<Value *, 8> Idx(drop_begin(GEP.indices())); + SmallVector<Value *, 8> Idx(drop_begin(GEP.indices())); GetElementPtrInst *Res = GetElementPtrInst::Create( StrippedPtrEltTy, StrippedPtr, Idx, GEP.getName()); Res->setIsInBounds(GEP.isInBounds()); @@ -2256,7 +2256,7 @@ Instruction *InstCombinerImpl::visitGetElementPtrInst(GetElementPtrInst &GEP) { // -> // %0 = GEP [10 x i8] addrspace(1)* X, ... // addrspacecast i8 addrspace(1)* %0 to i8* - SmallVector<Value *, 8> Idx(GEP.indices()); + SmallVector<Value *, 8> Idx(GEP.indices()); Value *NewGEP = GEP.isInBounds() ? Builder.CreateInBoundsGEP(StrippedPtrEltTy, StrippedPtr, @@ -2398,15 +2398,15 @@ Instruction *InstCombinerImpl::visitGetElementPtrInst(GetElementPtrInst &GEP) { // gep (bitcast [c x ty]* X to <c x ty>*), Y, Z --> gep X, Y, Z auto areMatchingArrayAndVecTypes = [](Type *ArrTy, Type *VecTy, const DataLayout &DL) { - auto *VecVTy = cast<FixedVectorType>(VecTy); + auto *VecVTy = cast<FixedVectorType>(VecTy); return ArrTy->getArrayElementType() == VecVTy->getElementType() && ArrTy->getArrayNumElements() == VecVTy->getNumElements() && DL.getTypeAllocSize(ArrTy) == DL.getTypeAllocSize(VecTy); }; if (GEP.getNumOperands() == 3 && - ((GEPEltType->isArrayTy() && isa<FixedVectorType>(SrcEltType) && + ((GEPEltType->isArrayTy() && isa<FixedVectorType>(SrcEltType) && areMatchingArrayAndVecTypes(GEPEltType, SrcEltType, DL)) || - (isa<FixedVectorType>(GEPEltType) && SrcEltType->isArrayTy() && + (isa<FixedVectorType>(GEPEltType) && SrcEltType->isArrayTy() && areMatchingArrayAndVecTypes(SrcEltType, GEPEltType, DL)))) { // Create a new GEP here, as using `setOperand()` followed by @@ -2601,7 +2601,7 @@ static bool isAllocSiteRemovable(Instruction *AI, return true; } -Instruction *InstCombinerImpl::visitAllocSite(Instruction &MI) { +Instruction *InstCombinerImpl::visitAllocSite(Instruction &MI) { // If we have a malloc call which is only used in any amount of comparisons to // null and free calls, delete the calls and replace the comparisons with true // or false as appropriate. @@ -2616,10 +2616,10 @@ Instruction *InstCombinerImpl::visitAllocSite(Instruction &MI) { // If we are removing an alloca with a dbg.declare, insert dbg.value calls // before each store. - SmallVector<DbgVariableIntrinsic *, 8> DVIs; + SmallVector<DbgVariableIntrinsic *, 8> DVIs; std::unique_ptr<DIBuilder> DIB; if (isa<AllocaInst>(MI)) { - findDbgUsers(DVIs, &MI); + findDbgUsers(DVIs, &MI); DIB.reset(new DIBuilder(*MI.getModule(), /*AllowUnresolved=*/false)); } @@ -2653,9 +2653,9 @@ Instruction *InstCombinerImpl::visitAllocSite(Instruction &MI) { ConstantInt::get(Type::getInt1Ty(C->getContext()), C->isFalseWhenEqual())); } else if (auto *SI = dyn_cast<StoreInst>(I)) { - for (auto *DVI : DVIs) - if (DVI->isAddressOfVariable()) - ConvertDebugDeclareToDebugValue(DVI, SI, *DIB); + for (auto *DVI : DVIs) + if (DVI->isAddressOfVariable()) + ConvertDebugDeclareToDebugValue(DVI, SI, *DIB); } else { // Casts, GEP, or anything else: we're about to delete this instruction, // so it can not have any valid uses. @@ -2672,31 +2672,31 @@ Instruction *InstCombinerImpl::visitAllocSite(Instruction &MI) { None, "", II->getParent()); } - // Remove debug intrinsics which describe the value contained within the - // alloca. In addition to removing dbg.{declare,addr} which simply point to - // the alloca, remove dbg.value(<alloca>, ..., DW_OP_deref)'s as well, e.g.: - // - // ``` - // define void @foo(i32 %0) { - // %a = alloca i32 ; Deleted. - // store i32 %0, i32* %a - // dbg.value(i32 %0, "arg0") ; Not deleted. - // dbg.value(i32* %a, "arg0", DW_OP_deref) ; Deleted. - // call void @trivially_inlinable_no_op(i32* %a) - // ret void - // } - // ``` - // - // This may not be required if we stop describing the contents of allocas - // using dbg.value(<alloca>, ..., DW_OP_deref), but we currently do this in - // the LowerDbgDeclare utility. - // - // If there is a dead store to `%a` in @trivially_inlinable_no_op, the - // "arg0" dbg.value may be stale after the call. However, failing to remove - // the DW_OP_deref dbg.value causes large gaps in location coverage. - for (auto *DVI : DVIs) - if (DVI->isAddressOfVariable() || DVI->getExpression()->startsWithDeref()) - DVI->eraseFromParent(); + // Remove debug intrinsics which describe the value contained within the + // alloca. In addition to removing dbg.{declare,addr} which simply point to + // the alloca, remove dbg.value(<alloca>, ..., DW_OP_deref)'s as well, e.g.: + // + // ``` + // define void @foo(i32 %0) { + // %a = alloca i32 ; Deleted. + // store i32 %0, i32* %a + // dbg.value(i32 %0, "arg0") ; Not deleted. + // dbg.value(i32* %a, "arg0", DW_OP_deref) ; Deleted. + // call void @trivially_inlinable_no_op(i32* %a) + // ret void + // } + // ``` + // + // This may not be required if we stop describing the contents of allocas + // using dbg.value(<alloca>, ..., DW_OP_deref), but we currently do this in + // the LowerDbgDeclare utility. + // + // If there is a dead store to `%a` in @trivially_inlinable_no_op, the + // "arg0" dbg.value may be stale after the call. However, failing to remove + // the DW_OP_deref dbg.value causes large gaps in location coverage. + for (auto *DVI : DVIs) + if (DVI->isAddressOfVariable() || DVI->getExpression()->startsWithDeref()) + DVI->eraseFromParent(); return eraseInstFromFunction(MI); } @@ -2784,7 +2784,7 @@ static Instruction *tryToMoveFreeBeforeNullTest(CallInst &FI, return &FI; } -Instruction *InstCombinerImpl::visitFree(CallInst &FI) { +Instruction *InstCombinerImpl::visitFree(CallInst &FI) { Value *Op = FI.getArgOperand(0); // free undef -> unreachable. @@ -2825,7 +2825,7 @@ static bool isMustTailCall(Value *V) { return false; } -Instruction *InstCombinerImpl::visitReturnInst(ReturnInst &RI) { +Instruction *InstCombinerImpl::visitReturnInst(ReturnInst &RI) { if (RI.getNumOperands() == 0) // ret void return nullptr; @@ -2848,31 +2848,31 @@ Instruction *InstCombinerImpl::visitReturnInst(ReturnInst &RI) { return nullptr; } -Instruction *InstCombinerImpl::visitUnreachableInst(UnreachableInst &I) { - // Try to remove the previous instruction if it must lead to unreachable. - // This includes instructions like stores and "llvm.assume" that may not get - // removed by simple dead code elimination. - Instruction *Prev = I.getPrevNonDebugInstruction(); - if (Prev && !Prev->isEHPad() && - isGuaranteedToTransferExecutionToSuccessor(Prev)) { - // Temporarily disable removal of volatile stores preceding unreachable, - // pending a potential LangRef change permitting volatile stores to trap. - // TODO: Either remove this code, or properly integrate the check into - // isGuaranteedToTransferExecutionToSuccessor(). - if (auto *SI = dyn_cast<StoreInst>(Prev)) - if (SI->isVolatile()) - return nullptr; - - // A value may still have uses before we process it here (for example, in - // another unreachable block), so convert those to undef. - replaceInstUsesWith(*Prev, UndefValue::get(Prev->getType())); - eraseInstFromFunction(*Prev); - return &I; - } - return nullptr; -} - -Instruction *InstCombinerImpl::visitUnconditionalBranchInst(BranchInst &BI) { +Instruction *InstCombinerImpl::visitUnreachableInst(UnreachableInst &I) { + // Try to remove the previous instruction if it must lead to unreachable. + // This includes instructions like stores and "llvm.assume" that may not get + // removed by simple dead code elimination. + Instruction *Prev = I.getPrevNonDebugInstruction(); + if (Prev && !Prev->isEHPad() && + isGuaranteedToTransferExecutionToSuccessor(Prev)) { + // Temporarily disable removal of volatile stores preceding unreachable, + // pending a potential LangRef change permitting volatile stores to trap. + // TODO: Either remove this code, or properly integrate the check into + // isGuaranteedToTransferExecutionToSuccessor(). + if (auto *SI = dyn_cast<StoreInst>(Prev)) + if (SI->isVolatile()) + return nullptr; + + // A value may still have uses before we process it here (for example, in + // another unreachable block), so convert those to undef. + replaceInstUsesWith(*Prev, UndefValue::get(Prev->getType())); + eraseInstFromFunction(*Prev); + return &I; + } + return nullptr; +} + +Instruction *InstCombinerImpl::visitUnconditionalBranchInst(BranchInst &BI) { assert(BI.isUnconditional() && "Only for unconditional branches."); // If this store is the second-to-last instruction in the basic block @@ -2901,7 +2901,7 @@ Instruction *InstCombinerImpl::visitUnconditionalBranchInst(BranchInst &BI) { return nullptr; } -Instruction *InstCombinerImpl::visitBranchInst(BranchInst &BI) { +Instruction *InstCombinerImpl::visitBranchInst(BranchInst &BI) { if (BI.isUnconditional()) return visitUnconditionalBranchInst(BI); @@ -2937,7 +2937,7 @@ Instruction *InstCombinerImpl::visitBranchInst(BranchInst &BI) { return nullptr; } -Instruction *InstCombinerImpl::visitSwitchInst(SwitchInst &SI) { +Instruction *InstCombinerImpl::visitSwitchInst(SwitchInst &SI) { Value *Cond = SI.getCondition(); Value *Op0; ConstantInt *AddRHS; @@ -2968,7 +2968,7 @@ Instruction *InstCombinerImpl::visitSwitchInst(SwitchInst &SI) { unsigned NewWidth = Known.getBitWidth() - std::max(LeadingKnownZeros, LeadingKnownOnes); // Shrink the condition operand if the new type is smaller than the old type. - // But do not shrink to a non-standard type, because backend can't generate + // But do not shrink to a non-standard type, because backend can't generate // good code for that yet. // TODO: We can make it aggressive again after fixing PR39569. if (NewWidth > 0 && NewWidth < Known.getBitWidth() && @@ -2987,7 +2987,7 @@ Instruction *InstCombinerImpl::visitSwitchInst(SwitchInst &SI) { return nullptr; } -Instruction *InstCombinerImpl::visitExtractValueInst(ExtractValueInst &EV) { +Instruction *InstCombinerImpl::visitExtractValueInst(ExtractValueInst &EV) { Value *Agg = EV.getAggregateOperand(); if (!EV.hasIndices()) @@ -3132,11 +3132,11 @@ static bool isCatchAll(EHPersonality Personality, Constant *TypeInfo) { case EHPersonality::GNU_CXX_SjLj: case EHPersonality::GNU_ObjC: case EHPersonality::MSVC_X86SEH: - case EHPersonality::MSVC_TableSEH: + case EHPersonality::MSVC_TableSEH: case EHPersonality::MSVC_CXX: case EHPersonality::CoreCLR: case EHPersonality::Wasm_CXX: - case EHPersonality::XL_CXX: + case EHPersonality::XL_CXX: return TypeInfo->isNullValue(); } llvm_unreachable("invalid enum"); @@ -3149,7 +3149,7 @@ static bool shorter_filter(const Value *LHS, const Value *RHS) { cast<ArrayType>(RHS->getType())->getNumElements(); } -Instruction *InstCombinerImpl::visitLandingPadInst(LandingPadInst &LI) { +Instruction *InstCombinerImpl::visitLandingPadInst(LandingPadInst &LI) { // The logic here should be correct for any real-world personality function. // However if that turns out not to be true, the offending logic can always // be conditioned on the personality function, like the catch-all logic is. @@ -3458,46 +3458,46 @@ Instruction *InstCombinerImpl::visitLandingPadInst(LandingPadInst &LI) { return nullptr; } -Instruction *InstCombinerImpl::visitFreeze(FreezeInst &I) { +Instruction *InstCombinerImpl::visitFreeze(FreezeInst &I) { Value *Op0 = I.getOperand(0); if (Value *V = SimplifyFreezeInst(Op0, SQ.getWithInstruction(&I))) return replaceInstUsesWith(I, V); - // freeze (phi const, x) --> phi const, (freeze x) - if (auto *PN = dyn_cast<PHINode>(Op0)) { - if (Instruction *NV = foldOpIntoPhi(I, PN)) - return NV; - } - - if (match(Op0, m_Undef())) { - // If I is freeze(undef), see its uses and fold it to the best constant. - // - or: pick -1 - // - select's condition: pick the value that leads to choosing a constant - // - other ops: pick 0 - Constant *BestValue = nullptr; - Constant *NullValue = Constant::getNullValue(I.getType()); - for (const auto *U : I.users()) { - Constant *C = NullValue; - - if (match(U, m_Or(m_Value(), m_Value()))) - C = Constant::getAllOnesValue(I.getType()); - else if (const auto *SI = dyn_cast<SelectInst>(U)) { - if (SI->getCondition() == &I) { - APInt CondVal(1, isa<Constant>(SI->getFalseValue()) ? 0 : 1); - C = Constant::getIntegerValue(I.getType(), CondVal); - } - } - - if (!BestValue) - BestValue = C; - else if (BestValue != C) - BestValue = NullValue; - } - - return replaceInstUsesWith(I, BestValue); - } - + // freeze (phi const, x) --> phi const, (freeze x) + if (auto *PN = dyn_cast<PHINode>(Op0)) { + if (Instruction *NV = foldOpIntoPhi(I, PN)) + return NV; + } + + if (match(Op0, m_Undef())) { + // If I is freeze(undef), see its uses and fold it to the best constant. + // - or: pick -1 + // - select's condition: pick the value that leads to choosing a constant + // - other ops: pick 0 + Constant *BestValue = nullptr; + Constant *NullValue = Constant::getNullValue(I.getType()); + for (const auto *U : I.users()) { + Constant *C = NullValue; + + if (match(U, m_Or(m_Value(), m_Value()))) + C = Constant::getAllOnesValue(I.getType()); + else if (const auto *SI = dyn_cast<SelectInst>(U)) { + if (SI->getCondition() == &I) { + APInt CondVal(1, isa<Constant>(SI->getFalseValue()) ? 0 : 1); + C = Constant::getIntegerValue(I.getType(), CondVal); + } + } + + if (!BestValue) + BestValue = C; + else if (BestValue != C) + BestValue = NullValue; + } + + return replaceInstUsesWith(I, BestValue); + } + return nullptr; } @@ -3603,7 +3603,7 @@ static bool TryToSinkInstruction(Instruction *I, BasicBlock *DestBlock) { return true; } -bool InstCombinerImpl::run() { +bool InstCombinerImpl::run() { while (!Worklist.isEmpty()) { // Walk deferred instructions in reverse order, and push them to the // worklist, which means they'll end up popped from the worklist in-order. @@ -3665,9 +3665,9 @@ bool InstCombinerImpl::run() { else UserParent = UserInst->getParent(); - // Try sinking to another block. If that block is unreachable, then do - // not bother. SimplifyCFG should handle it. - if (UserParent != BB && DT.isReachableFromEntry(UserParent)) { + // Try sinking to another block. If that block is unreachable, then do + // not bother. SimplifyCFG should handle it. + if (UserParent != BB && DT.isReachableFromEntry(UserParent)) { // See if the user is one of our successors that has only one // predecessor, so that we don't have to split the critical edge. bool ShouldSink = UserParent->getUniquePredecessor() == BB; @@ -3701,8 +3701,8 @@ bool InstCombinerImpl::run() { // Now that we have an instruction, try combining it to simplify it. Builder.SetInsertPoint(I); - Builder.CollectMetadataToCopy( - I, {LLVMContext::MD_dbg, LLVMContext::MD_annotation}); + Builder.CollectMetadataToCopy( + I, {LLVMContext::MD_dbg, LLVMContext::MD_annotation}); #ifndef NDEBUG std::string OrigI; @@ -3717,8 +3717,8 @@ bool InstCombinerImpl::run() { LLVM_DEBUG(dbgs() << "IC: Old = " << *I << '\n' << " New = " << *Result << '\n'); - Result->copyMetadata(*I, - {LLVMContext::MD_dbg, LLVMContext::MD_annotation}); + Result->copyMetadata(*I, + {LLVMContext::MD_dbg, LLVMContext::MD_annotation}); // Everything uses the new instruction now. I->replaceAllUsesWith(Result); @@ -3729,14 +3729,14 @@ bool InstCombinerImpl::run() { BasicBlock *InstParent = I->getParent(); BasicBlock::iterator InsertPos = I->getIterator(); - // Are we replace a PHI with something that isn't a PHI, or vice versa? - if (isa<PHINode>(Result) != isa<PHINode>(I)) { - // We need to fix up the insertion point. - if (isa<PHINode>(I)) // PHI -> Non-PHI - InsertPos = InstParent->getFirstInsertionPt(); - else // Non-PHI -> PHI - InsertPos = InstParent->getFirstNonPHI()->getIterator(); - } + // Are we replace a PHI with something that isn't a PHI, or vice versa? + if (isa<PHINode>(Result) != isa<PHINode>(I)) { + // We need to fix up the insertion point. + if (isa<PHINode>(I)) // PHI -> Non-PHI + InsertPos = InstParent->getFirstInsertionPt(); + else // Non-PHI -> PHI + InsertPos = InstParent->getFirstNonPHI()->getIterator(); + } InstParent->getInstList().insert(InsertPos, Result); @@ -3766,55 +3766,55 @@ bool InstCombinerImpl::run() { return MadeIRChange; } -// Track the scopes used by !alias.scope and !noalias. In a function, a -// @llvm.experimental.noalias.scope.decl is only useful if that scope is used -// by both sets. If not, the declaration of the scope can be safely omitted. -// The MDNode of the scope can be omitted as well for the instructions that are -// part of this function. We do not do that at this point, as this might become -// too time consuming to do. -class AliasScopeTracker { - SmallPtrSet<const MDNode *, 8> UsedAliasScopesAndLists; - SmallPtrSet<const MDNode *, 8> UsedNoAliasScopesAndLists; - -public: - void analyse(Instruction *I) { - // This seems to be faster than checking 'mayReadOrWriteMemory()'. - if (!I->hasMetadataOtherThanDebugLoc()) - return; - - auto Track = [](Metadata *ScopeList, auto &Container) { - const auto *MDScopeList = dyn_cast_or_null<MDNode>(ScopeList); - if (!MDScopeList || !Container.insert(MDScopeList).second) - return; - for (auto &MDOperand : MDScopeList->operands()) - if (auto *MDScope = dyn_cast<MDNode>(MDOperand)) - Container.insert(MDScope); - }; - - Track(I->getMetadata(LLVMContext::MD_alias_scope), UsedAliasScopesAndLists); - Track(I->getMetadata(LLVMContext::MD_noalias), UsedNoAliasScopesAndLists); - } - - bool isNoAliasScopeDeclDead(Instruction *Inst) { - NoAliasScopeDeclInst *Decl = dyn_cast<NoAliasScopeDeclInst>(Inst); - if (!Decl) - return false; - - assert(Decl->use_empty() && - "llvm.experimental.noalias.scope.decl in use ?"); - const MDNode *MDSL = Decl->getScopeList(); - assert(MDSL->getNumOperands() == 1 && - "llvm.experimental.noalias.scope should refer to a single scope"); - auto &MDOperand = MDSL->getOperand(0); - if (auto *MD = dyn_cast<MDNode>(MDOperand)) - return !UsedAliasScopesAndLists.contains(MD) || - !UsedNoAliasScopesAndLists.contains(MD); - - // Not an MDNode ? throw away. - return true; - } -}; - +// Track the scopes used by !alias.scope and !noalias. In a function, a +// @llvm.experimental.noalias.scope.decl is only useful if that scope is used +// by both sets. If not, the declaration of the scope can be safely omitted. +// The MDNode of the scope can be omitted as well for the instructions that are +// part of this function. We do not do that at this point, as this might become +// too time consuming to do. +class AliasScopeTracker { + SmallPtrSet<const MDNode *, 8> UsedAliasScopesAndLists; + SmallPtrSet<const MDNode *, 8> UsedNoAliasScopesAndLists; + +public: + void analyse(Instruction *I) { + // This seems to be faster than checking 'mayReadOrWriteMemory()'. + if (!I->hasMetadataOtherThanDebugLoc()) + return; + + auto Track = [](Metadata *ScopeList, auto &Container) { + const auto *MDScopeList = dyn_cast_or_null<MDNode>(ScopeList); + if (!MDScopeList || !Container.insert(MDScopeList).second) + return; + for (auto &MDOperand : MDScopeList->operands()) + if (auto *MDScope = dyn_cast<MDNode>(MDOperand)) + Container.insert(MDScope); + }; + + Track(I->getMetadata(LLVMContext::MD_alias_scope), UsedAliasScopesAndLists); + Track(I->getMetadata(LLVMContext::MD_noalias), UsedNoAliasScopesAndLists); + } + + bool isNoAliasScopeDeclDead(Instruction *Inst) { + NoAliasScopeDeclInst *Decl = dyn_cast<NoAliasScopeDeclInst>(Inst); + if (!Decl) + return false; + + assert(Decl->use_empty() && + "llvm.experimental.noalias.scope.decl in use ?"); + const MDNode *MDSL = Decl->getScopeList(); + assert(MDSL->getNumOperands() == 1 && + "llvm.experimental.noalias.scope should refer to a single scope"); + auto &MDOperand = MDSL->getOperand(0); + if (auto *MD = dyn_cast<MDNode>(MDOperand)) + return !UsedAliasScopesAndLists.contains(MD) || + !UsedNoAliasScopesAndLists.contains(MD); + + // Not an MDNode ? throw away. + return true; + } +}; + /// Populate the IC worklist from a function, by walking it in depth-first /// order and adding all reachable code to the worklist. /// @@ -3833,7 +3833,7 @@ static bool prepareICWorklistFromFunction(Function &F, const DataLayout &DL, SmallVector<Instruction*, 128> InstrsForInstCombineWorklist; DenseMap<Constant *, Constant *> FoldedConstants; - AliasScopeTracker SeenAliasScopes; + AliasScopeTracker SeenAliasScopes; do { BasicBlock *BB = Worklist.pop_back_val(); @@ -3878,13 +3878,13 @@ static bool prepareICWorklistFromFunction(Function &F, const DataLayout &DL, } } - // Skip processing debug and pseudo intrinsics in InstCombine. Processing - // these call instructions consumes non-trivial amount of time and - // provides no value for the optimization. - if (!Inst->isDebugOrPseudoInst()) { + // Skip processing debug and pseudo intrinsics in InstCombine. Processing + // these call instructions consumes non-trivial amount of time and + // provides no value for the optimization. + if (!Inst->isDebugOrPseudoInst()) { InstrsForInstCombineWorklist.push_back(Inst); - SeenAliasScopes.analyse(Inst); - } + SeenAliasScopes.analyse(Inst); + } } // Recursively visit successors. If this is a branch or switch on a @@ -3904,7 +3904,7 @@ static bool prepareICWorklistFromFunction(Function &F, const DataLayout &DL, } } - append_range(Worklist, successors(TI)); + append_range(Worklist, successors(TI)); } while (!Worklist.empty()); // Remove instructions inside unreachable blocks. This prevents the @@ -3914,12 +3914,12 @@ static bool prepareICWorklistFromFunction(Function &F, const DataLayout &DL, if (Visited.count(&BB)) continue; - unsigned NumDeadInstInBB; - unsigned NumDeadDbgInstInBB; - std::tie(NumDeadInstInBB, NumDeadDbgInstInBB) = - removeAllNonTerminatorAndEHPadInstructions(&BB); - - MadeIRChange |= NumDeadInstInBB + NumDeadDbgInstInBB > 0; + unsigned NumDeadInstInBB; + unsigned NumDeadDbgInstInBB; + std::tie(NumDeadInstInBB, NumDeadDbgInstInBB) = + removeAllNonTerminatorAndEHPadInstructions(&BB); + + MadeIRChange |= NumDeadInstInBB + NumDeadDbgInstInBB > 0; NumDeadInst += NumDeadInstInBB; } @@ -3932,8 +3932,8 @@ static bool prepareICWorklistFromFunction(Function &F, const DataLayout &DL, for (Instruction *Inst : reverse(InstrsForInstCombineWorklist)) { // DCE instruction if trivially dead. As we iterate in reverse program // order here, we will clean up whole chains of dead instructions. - if (isInstructionTriviallyDead(Inst, TLI) || - SeenAliasScopes.isNoAliasScopeDeclDead(Inst)) { + if (isInstructionTriviallyDead(Inst, TLI) || + SeenAliasScopes.isNoAliasScopeDeclDead(Inst)) { ++NumDeadInst; LLVM_DEBUG(dbgs() << "IC: DCE: " << *Inst << '\n'); salvageDebugInfo(*Inst); @@ -3950,8 +3950,8 @@ static bool prepareICWorklistFromFunction(Function &F, const DataLayout &DL, static bool combineInstructionsOverFunction( Function &F, InstCombineWorklist &Worklist, AliasAnalysis *AA, - AssumptionCache &AC, TargetLibraryInfo &TLI, TargetTransformInfo &TTI, - DominatorTree &DT, OptimizationRemarkEmitter &ORE, BlockFrequencyInfo *BFI, + AssumptionCache &AC, TargetLibraryInfo &TLI, TargetTransformInfo &TTI, + DominatorTree &DT, OptimizationRemarkEmitter &ORE, BlockFrequencyInfo *BFI, ProfileSummaryInfo *PSI, unsigned MaxIterations, LoopInfo *LI) { auto &DL = F.getParent()->getDataLayout(); MaxIterations = std::min(MaxIterations, LimitMaxIterations.getValue()); @@ -3975,7 +3975,7 @@ static bool combineInstructionsOverFunction( // Iterate while there is work to do. unsigned Iteration = 0; while (true) { - ++NumWorklistIterations; + ++NumWorklistIterations; ++Iteration; if (Iteration > InfiniteLoopDetectionThreshold) { @@ -3996,8 +3996,8 @@ static bool combineInstructionsOverFunction( MadeIRChange |= prepareICWorklistFromFunction(F, DL, &TLI, Worklist); - InstCombinerImpl IC(Worklist, Builder, F.hasMinSize(), AA, AC, TLI, TTI, DT, - ORE, BFI, PSI, DL, LI); + InstCombinerImpl IC(Worklist, Builder, F.hasMinSize(), AA, AC, TLI, TTI, DT, + ORE, BFI, PSI, DL, LI); IC.MaxArraySizeForCombine = MaxArraySize; if (!IC.run()) @@ -4020,7 +4020,7 @@ PreservedAnalyses InstCombinePass::run(Function &F, auto &DT = AM.getResult<DominatorTreeAnalysis>(F); auto &TLI = AM.getResult<TargetLibraryAnalysis>(F); auto &ORE = AM.getResult<OptimizationRemarkEmitterAnalysis>(F); - auto &TTI = AM.getResult<TargetIRAnalysis>(F); + auto &TTI = AM.getResult<TargetIRAnalysis>(F); auto *LI = AM.getCachedResult<LoopAnalysis>(F); @@ -4031,8 +4031,8 @@ PreservedAnalyses InstCombinePass::run(Function &F, auto *BFI = (PSI && PSI->hasProfileSummary()) ? &AM.getResult<BlockFrequencyAnalysis>(F) : nullptr; - if (!combineInstructionsOverFunction(F, Worklist, AA, AC, TLI, TTI, DT, ORE, - BFI, PSI, MaxIterations, LI)) + if (!combineInstructionsOverFunction(F, Worklist, AA, AC, TLI, TTI, DT, ORE, + BFI, PSI, MaxIterations, LI)) // No changes, all analyses are preserved. return PreservedAnalyses::all(); @@ -4050,7 +4050,7 @@ void InstructionCombiningPass::getAnalysisUsage(AnalysisUsage &AU) const { AU.addRequired<AAResultsWrapperPass>(); AU.addRequired<AssumptionCacheTracker>(); AU.addRequired<TargetLibraryInfoWrapperPass>(); - AU.addRequired<TargetTransformInfoWrapperPass>(); + AU.addRequired<TargetTransformInfoWrapperPass>(); AU.addRequired<DominatorTreeWrapperPass>(); AU.addRequired<OptimizationRemarkEmitterWrapperPass>(); AU.addPreserved<DominatorTreeWrapperPass>(); @@ -4069,7 +4069,7 @@ bool InstructionCombiningPass::runOnFunction(Function &F) { auto AA = &getAnalysis<AAResultsWrapperPass>().getAAResults(); auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F); auto &TLI = getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F); - auto &TTI = getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F); + auto &TTI = getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F); auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree(); auto &ORE = getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE(); @@ -4083,8 +4083,8 @@ bool InstructionCombiningPass::runOnFunction(Function &F) { &getAnalysis<LazyBlockFrequencyInfoPass>().getBFI() : nullptr; - return combineInstructionsOverFunction(F, Worklist, AA, AC, TLI, TTI, DT, ORE, - BFI, PSI, MaxIterations, LI); + return combineInstructionsOverFunction(F, Worklist, AA, AC, TLI, TTI, DT, ORE, + BFI, PSI, MaxIterations, LI); } char InstructionCombiningPass::ID = 0; @@ -4103,7 +4103,7 @@ INITIALIZE_PASS_BEGIN(InstructionCombiningPass, "instcombine", "Combine redundant instructions", false, false) INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) -INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass) +INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass) INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) INITIALIZE_PASS_DEPENDENCY(GlobalsAAWrapperPass) diff --git a/contrib/libs/llvm12/lib/Transforms/InstCombine/ya.make b/contrib/libs/llvm12/lib/Transforms/InstCombine/ya.make index 3f74e68d16..ac05d9b16a 100644 --- a/contrib/libs/llvm12/lib/Transforms/InstCombine/ya.make +++ b/contrib/libs/llvm12/lib/Transforms/InstCombine/ya.make @@ -12,12 +12,12 @@ LICENSE(Apache-2.0 WITH LLVM-exception) LICENSE_TEXTS(.yandex_meta/licenses.list.txt) PEERDIR( - contrib/libs/llvm12 - contrib/libs/llvm12/include - contrib/libs/llvm12/lib/Analysis - contrib/libs/llvm12/lib/IR - contrib/libs/llvm12/lib/Support - contrib/libs/llvm12/lib/Transforms/Utils + contrib/libs/llvm12 + contrib/libs/llvm12/include + contrib/libs/llvm12/lib/Analysis + contrib/libs/llvm12/lib/IR + contrib/libs/llvm12/lib/Support + contrib/libs/llvm12/lib/Transforms/Utils ) ADDINCL( |