aboutsummaryrefslogtreecommitdiff
path: root/contrib/llvm/lib/Transforms/InstCombine
diff options
context:
space:
mode:
Diffstat (limited to 'contrib/llvm/lib/Transforms/InstCombine')
-rw-r--r--contrib/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp1730
-rw-r--r--contrib/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp2774
-rw-r--r--contrib/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp3049
-rw-r--r--contrib/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp1944
-rw-r--r--contrib/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp4715
-rw-r--r--contrib/llvm/lib/Transforms/InstCombine/InstCombineInternal.h582
-rw-r--r--contrib/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp1421
-rw-r--r--contrib/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp1543
-rw-r--r--contrib/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp1002
-rw-r--r--contrib/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp1270
-rw-r--r--contrib/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp831
-rw-r--r--contrib/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp1333
-rw-r--r--contrib/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp1274
-rw-r--r--contrib/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp3223
14 files changed, 26691 insertions, 0 deletions
diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
new file mode 100644
index 000000000000..221a22007173
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
@@ -0,0 +1,1730 @@
+//===- InstCombineAddSub.cpp ------------------------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the visit functions for add, fadd, sub, and fsub.
+//
+//===----------------------------------------------------------------------===//
+
+#include "InstCombineInternal.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/GetElementPtrTypeIterator.h"
+#include "llvm/IR/PatternMatch.h"
+
+using namespace llvm;
+using namespace PatternMatch;
+
+#define DEBUG_TYPE "instcombine"
+
+namespace {
+
+ /// Class representing coefficient of floating-point addend.
+ /// This class needs to be highly efficient, which is especially true for
+ /// the constructor. As of I write this comment, the cost of the default
+ /// constructor is merely 4-byte-store-zero (Assuming compiler is able to
+ /// perform write-merging).
+ ///
+ class FAddendCoef {
+ public:
+ // The constructor has to initialize a APFloat, which is unnecessary for
+ // most addends which have coefficient either 1 or -1. So, the constructor
+ // is expensive. In order to avoid the cost of the constructor, we should
+ // reuse some instances whenever possible. The pre-created instances
+ // FAddCombine::Add[0-5] embodies this idea.
+ //
+ FAddendCoef() : IsFp(false), BufHasFpVal(false), IntVal(0) {}
+ ~FAddendCoef();
+
+ void set(short C) {
+ assert(!insaneIntVal(C) && "Insane coefficient");
+ IsFp = false; IntVal = C;
+ }
+
+ void set(const APFloat& C);
+
+ void negate();
+
+ bool isZero() const { return isInt() ? !IntVal : getFpVal().isZero(); }
+ Value *getValue(Type *) const;
+
+ // If possible, don't define operator+/operator- etc because these
+ // operators inevitably call FAddendCoef's constructor which is not cheap.
+ void operator=(const FAddendCoef &A);
+ void operator+=(const FAddendCoef &A);
+ void operator*=(const FAddendCoef &S);
+
+ bool isOne() const { return isInt() && IntVal == 1; }
+ bool isTwo() const { return isInt() && IntVal == 2; }
+ bool isMinusOne() const { return isInt() && IntVal == -1; }
+ bool isMinusTwo() const { return isInt() && IntVal == -2; }
+
+ private:
+ bool insaneIntVal(int V) { return V > 4 || V < -4; }
+ APFloat *getFpValPtr()
+ { return reinterpret_cast<APFloat*>(&FpValBuf.buffer[0]); }
+ const APFloat *getFpValPtr() const
+ { return reinterpret_cast<const APFloat*>(&FpValBuf.buffer[0]); }
+
+ const APFloat &getFpVal() const {
+ assert(IsFp && BufHasFpVal && "Incorret state");
+ return *getFpValPtr();
+ }
+
+ APFloat &getFpVal() {
+ assert(IsFp && BufHasFpVal && "Incorret state");
+ return *getFpValPtr();
+ }
+
+ bool isInt() const { return !IsFp; }
+
+ // If the coefficient is represented by an integer, promote it to a
+ // floating point.
+ void convertToFpType(const fltSemantics &Sem);
+
+ // Construct an APFloat from a signed integer.
+ // TODO: We should get rid of this function when APFloat can be constructed
+ // from an *SIGNED* integer.
+ APFloat createAPFloatFromInt(const fltSemantics &Sem, int Val);
+
+ private:
+ bool IsFp;
+
+ // True iff FpValBuf contains an instance of APFloat.
+ bool BufHasFpVal;
+
+ // The integer coefficient of an individual addend is either 1 or -1,
+ // and we try to simplify at most 4 addends from neighboring at most
+ // two instructions. So the range of <IntVal> falls in [-4, 4]. APInt
+ // is overkill of this end.
+ short IntVal;
+
+ AlignedCharArrayUnion<APFloat> FpValBuf;
+ };
+
+ /// FAddend is used to represent floating-point addend. An addend is
+ /// represented as <C, V>, where the V is a symbolic value, and C is a
+ /// constant coefficient. A constant addend is represented as <C, 0>.
+ ///
+ class FAddend {
+ public:
+ FAddend() : Val(nullptr) {}
+
+ Value *getSymVal() const { return Val; }
+ const FAddendCoef &getCoef() const { return Coeff; }
+
+ bool isConstant() const { return Val == nullptr; }
+ bool isZero() const { return Coeff.isZero(); }
+
+ void set(short Coefficient, Value *V) {
+ Coeff.set(Coefficient);
+ Val = V;
+ }
+ void set(const APFloat &Coefficient, Value *V) {
+ Coeff.set(Coefficient);
+ Val = V;
+ }
+ void set(const ConstantFP *Coefficient, Value *V) {
+ Coeff.set(Coefficient->getValueAPF());
+ Val = V;
+ }
+
+ void negate() { Coeff.negate(); }
+
+ /// Drill down the U-D chain one step to find the definition of V, and
+ /// try to break the definition into one or two addends.
+ static unsigned drillValueDownOneStep(Value* V, FAddend &A0, FAddend &A1);
+
+ /// Similar to FAddend::drillDownOneStep() except that the value being
+ /// splitted is the addend itself.
+ unsigned drillAddendDownOneStep(FAddend &Addend0, FAddend &Addend1) const;
+
+ void operator+=(const FAddend &T) {
+ assert((Val == T.Val) && "Symbolic-values disagree");
+ Coeff += T.Coeff;
+ }
+
+ private:
+ void Scale(const FAddendCoef& ScaleAmt) { Coeff *= ScaleAmt; }
+
+ // This addend has the value of "Coeff * Val".
+ Value *Val;
+ FAddendCoef Coeff;
+ };
+
+ /// FAddCombine is the class for optimizing an unsafe fadd/fsub along
+ /// with its neighboring at most two instructions.
+ ///
+ class FAddCombine {
+ public:
+ FAddCombine(InstCombiner::BuilderTy *B) : Builder(B), Instr(nullptr) {}
+ Value *simplify(Instruction *FAdd);
+
+ private:
+ typedef SmallVector<const FAddend*, 4> AddendVect;
+
+ Value *simplifyFAdd(AddendVect& V, unsigned InstrQuota);
+
+ Value *performFactorization(Instruction *I);
+
+ /// Convert given addend to a Value
+ Value *createAddendVal(const FAddend &A, bool& NeedNeg);
+
+ /// Return the number of instructions needed to emit the N-ary addition.
+ unsigned calcInstrNumber(const AddendVect& Vect);
+ Value *createFSub(Value *Opnd0, Value *Opnd1);
+ Value *createFAdd(Value *Opnd0, Value *Opnd1);
+ Value *createFMul(Value *Opnd0, Value *Opnd1);
+ Value *createFDiv(Value *Opnd0, Value *Opnd1);
+ Value *createFNeg(Value *V);
+ Value *createNaryFAdd(const AddendVect& Opnds, unsigned InstrQuota);
+ void createInstPostProc(Instruction *NewInst, bool NoNumber = false);
+
+ InstCombiner::BuilderTy *Builder;
+ Instruction *Instr;
+
+ // Debugging stuff are clustered here.
+ #ifndef NDEBUG
+ unsigned CreateInstrNum;
+ void initCreateInstNum() { CreateInstrNum = 0; }
+ void incCreateInstNum() { CreateInstrNum++; }
+ #else
+ void initCreateInstNum() {}
+ void incCreateInstNum() {}
+ #endif
+ };
+
+} // anonymous namespace
+
+//===----------------------------------------------------------------------===//
+//
+// Implementation of
+// {FAddendCoef, FAddend, FAddition, FAddCombine}.
+//
+//===----------------------------------------------------------------------===//
+FAddendCoef::~FAddendCoef() {
+ if (BufHasFpVal)
+ getFpValPtr()->~APFloat();
+}
+
+void FAddendCoef::set(const APFloat& C) {
+ APFloat *P = getFpValPtr();
+
+ if (isInt()) {
+ // As the buffer is meanless byte stream, we cannot call
+ // APFloat::operator=().
+ new(P) APFloat(C);
+ } else
+ *P = C;
+
+ IsFp = BufHasFpVal = true;
+}
+
+void FAddendCoef::convertToFpType(const fltSemantics &Sem) {
+ if (!isInt())
+ return;
+
+ APFloat *P = getFpValPtr();
+ if (IntVal > 0)
+ new(P) APFloat(Sem, IntVal);
+ else {
+ new(P) APFloat(Sem, 0 - IntVal);
+ P->changeSign();
+ }
+ IsFp = BufHasFpVal = true;
+}
+
+APFloat FAddendCoef::createAPFloatFromInt(const fltSemantics &Sem, int Val) {
+ if (Val >= 0)
+ return APFloat(Sem, Val);
+
+ APFloat T(Sem, 0 - Val);
+ T.changeSign();
+
+ return T;
+}
+
+void FAddendCoef::operator=(const FAddendCoef &That) {
+ if (That.isInt())
+ set(That.IntVal);
+ else
+ set(That.getFpVal());
+}
+
+void FAddendCoef::operator+=(const FAddendCoef &That) {
+ enum APFloat::roundingMode RndMode = APFloat::rmNearestTiesToEven;
+ if (isInt() == That.isInt()) {
+ if (isInt())
+ IntVal += That.IntVal;
+ else
+ getFpVal().add(That.getFpVal(), RndMode);
+ return;
+ }
+
+ if (isInt()) {
+ const APFloat &T = That.getFpVal();
+ convertToFpType(T.getSemantics());
+ getFpVal().add(T, RndMode);
+ return;
+ }
+
+ APFloat &T = getFpVal();
+ T.add(createAPFloatFromInt(T.getSemantics(), That.IntVal), RndMode);
+}
+
+void FAddendCoef::operator*=(const FAddendCoef &That) {
+ if (That.isOne())
+ return;
+
+ if (That.isMinusOne()) {
+ negate();
+ return;
+ }
+
+ if (isInt() && That.isInt()) {
+ int Res = IntVal * (int)That.IntVal;
+ assert(!insaneIntVal(Res) && "Insane int value");
+ IntVal = Res;
+ return;
+ }
+
+ const fltSemantics &Semantic =
+ isInt() ? That.getFpVal().getSemantics() : getFpVal().getSemantics();
+
+ if (isInt())
+ convertToFpType(Semantic);
+ APFloat &F0 = getFpVal();
+
+ if (That.isInt())
+ F0.multiply(createAPFloatFromInt(Semantic, That.IntVal),
+ APFloat::rmNearestTiesToEven);
+ else
+ F0.multiply(That.getFpVal(), APFloat::rmNearestTiesToEven);
+}
+
+void FAddendCoef::negate() {
+ if (isInt())
+ IntVal = 0 - IntVal;
+ else
+ getFpVal().changeSign();
+}
+
+Value *FAddendCoef::getValue(Type *Ty) const {
+ return isInt() ?
+ ConstantFP::get(Ty, float(IntVal)) :
+ ConstantFP::get(Ty->getContext(), getFpVal());
+}
+
+// The definition of <Val> Addends
+// =========================================
+// A + B <1, A>, <1,B>
+// A - B <1, A>, <1,B>
+// 0 - B <-1, B>
+// C * A, <C, A>
+// A + C <1, A> <C, NULL>
+// 0 +/- 0 <0, NULL> (corner case)
+//
+// Legend: A and B are not constant, C is constant
+//
+unsigned FAddend::drillValueDownOneStep
+ (Value *Val, FAddend &Addend0, FAddend &Addend1) {
+ Instruction *I = nullptr;
+ if (!Val || !(I = dyn_cast<Instruction>(Val)))
+ return 0;
+
+ unsigned Opcode = I->getOpcode();
+
+ if (Opcode == Instruction::FAdd || Opcode == Instruction::FSub) {
+ ConstantFP *C0, *C1;
+ Value *Opnd0 = I->getOperand(0);
+ Value *Opnd1 = I->getOperand(1);
+ if ((C0 = dyn_cast<ConstantFP>(Opnd0)) && C0->isZero())
+ Opnd0 = nullptr;
+
+ if ((C1 = dyn_cast<ConstantFP>(Opnd1)) && C1->isZero())
+ Opnd1 = nullptr;
+
+ if (Opnd0) {
+ if (!C0)
+ Addend0.set(1, Opnd0);
+ else
+ Addend0.set(C0, nullptr);
+ }
+
+ if (Opnd1) {
+ FAddend &Addend = Opnd0 ? Addend1 : Addend0;
+ if (!C1)
+ Addend.set(1, Opnd1);
+ else
+ Addend.set(C1, nullptr);
+ if (Opcode == Instruction::FSub)
+ Addend.negate();
+ }
+
+ if (Opnd0 || Opnd1)
+ return Opnd0 && Opnd1 ? 2 : 1;
+
+ // Both operands are zero. Weird!
+ Addend0.set(APFloat(C0->getValueAPF().getSemantics()), nullptr);
+ return 1;
+ }
+
+ if (I->getOpcode() == Instruction::FMul) {
+ Value *V0 = I->getOperand(0);
+ Value *V1 = I->getOperand(1);
+ if (ConstantFP *C = dyn_cast<ConstantFP>(V0)) {
+ Addend0.set(C, V1);
+ return 1;
+ }
+
+ if (ConstantFP *C = dyn_cast<ConstantFP>(V1)) {
+ Addend0.set(C, V0);
+ return 1;
+ }
+ }
+
+ return 0;
+}
+
+// Try to break *this* addend into two addends. e.g. Suppose this addend is
+// <2.3, V>, and V = X + Y, by calling this function, we obtain two addends,
+// i.e. <2.3, X> and <2.3, Y>.
+//
+unsigned FAddend::drillAddendDownOneStep
+ (FAddend &Addend0, FAddend &Addend1) const {
+ if (isConstant())
+ return 0;
+
+ unsigned BreakNum = FAddend::drillValueDownOneStep(Val, Addend0, Addend1);
+ if (!BreakNum || Coeff.isOne())
+ return BreakNum;
+
+ Addend0.Scale(Coeff);
+
+ if (BreakNum == 2)
+ Addend1.Scale(Coeff);
+
+ return BreakNum;
+}
+
+// Try to perform following optimization on the input instruction I. Return the
+// simplified expression if was successful; otherwise, return 0.
+//
+// Instruction "I" is Simplified into
+// -------------------------------------------------------
+// (x * y) +/- (x * z) x * (y +/- z)
+// (y / x) +/- (z / x) (y +/- z) / x
+//
+Value *FAddCombine::performFactorization(Instruction *I) {
+ assert((I->getOpcode() == Instruction::FAdd ||
+ I->getOpcode() == Instruction::FSub) && "Expect add/sub");
+
+ Instruction *I0 = dyn_cast<Instruction>(I->getOperand(0));
+ Instruction *I1 = dyn_cast<Instruction>(I->getOperand(1));
+
+ if (!I0 || !I1 || I0->getOpcode() != I1->getOpcode())
+ return nullptr;
+
+ bool isMpy = false;
+ if (I0->getOpcode() == Instruction::FMul)
+ isMpy = true;
+ else if (I0->getOpcode() != Instruction::FDiv)
+ return nullptr;
+
+ Value *Opnd0_0 = I0->getOperand(0);
+ Value *Opnd0_1 = I0->getOperand(1);
+ Value *Opnd1_0 = I1->getOperand(0);
+ Value *Opnd1_1 = I1->getOperand(1);
+
+ // Input Instr I Factor AddSub0 AddSub1
+ // ----------------------------------------------
+ // (x*y) +/- (x*z) x y z
+ // (y/x) +/- (z/x) x y z
+ //
+ Value *Factor = nullptr;
+ Value *AddSub0 = nullptr, *AddSub1 = nullptr;
+
+ if (isMpy) {
+ if (Opnd0_0 == Opnd1_0 || Opnd0_0 == Opnd1_1)
+ Factor = Opnd0_0;
+ else if (Opnd0_1 == Opnd1_0 || Opnd0_1 == Opnd1_1)
+ Factor = Opnd0_1;
+
+ if (Factor) {
+ AddSub0 = (Factor == Opnd0_0) ? Opnd0_1 : Opnd0_0;
+ AddSub1 = (Factor == Opnd1_0) ? Opnd1_1 : Opnd1_0;
+ }
+ } else if (Opnd0_1 == Opnd1_1) {
+ Factor = Opnd0_1;
+ AddSub0 = Opnd0_0;
+ AddSub1 = Opnd1_0;
+ }
+
+ if (!Factor)
+ return nullptr;
+
+ FastMathFlags Flags;
+ Flags.setUnsafeAlgebra();
+ if (I0) Flags &= I->getFastMathFlags();
+ if (I1) Flags &= I->getFastMathFlags();
+
+ // Create expression "NewAddSub = AddSub0 +/- AddsSub1"
+ Value *NewAddSub = (I->getOpcode() == Instruction::FAdd) ?
+ createFAdd(AddSub0, AddSub1) :
+ createFSub(AddSub0, AddSub1);
+ if (ConstantFP *CFP = dyn_cast<ConstantFP>(NewAddSub)) {
+ const APFloat &F = CFP->getValueAPF();
+ if (!F.isNormal())
+ return nullptr;
+ } else if (Instruction *II = dyn_cast<Instruction>(NewAddSub))
+ II->setFastMathFlags(Flags);
+
+ if (isMpy) {
+ Value *RI = createFMul(Factor, NewAddSub);
+ if (Instruction *II = dyn_cast<Instruction>(RI))
+ II->setFastMathFlags(Flags);
+ return RI;
+ }
+
+ Value *RI = createFDiv(NewAddSub, Factor);
+ if (Instruction *II = dyn_cast<Instruction>(RI))
+ II->setFastMathFlags(Flags);
+ return RI;
+}
+
+Value *FAddCombine::simplify(Instruction *I) {
+ assert(I->hasUnsafeAlgebra() && "Should be in unsafe mode");
+
+ // Currently we are not able to handle vector type.
+ if (I->getType()->isVectorTy())
+ return nullptr;
+
+ assert((I->getOpcode() == Instruction::FAdd ||
+ I->getOpcode() == Instruction::FSub) && "Expect add/sub");
+
+ // Save the instruction before calling other member-functions.
+ Instr = I;
+
+ FAddend Opnd0, Opnd1, Opnd0_0, Opnd0_1, Opnd1_0, Opnd1_1;
+
+ unsigned OpndNum = FAddend::drillValueDownOneStep(I, Opnd0, Opnd1);
+
+ // Step 1: Expand the 1st addend into Opnd0_0 and Opnd0_1.
+ unsigned Opnd0_ExpNum = 0;
+ unsigned Opnd1_ExpNum = 0;
+
+ if (!Opnd0.isConstant())
+ Opnd0_ExpNum = Opnd0.drillAddendDownOneStep(Opnd0_0, Opnd0_1);
+
+ // Step 2: Expand the 2nd addend into Opnd1_0 and Opnd1_1.
+ if (OpndNum == 2 && !Opnd1.isConstant())
+ Opnd1_ExpNum = Opnd1.drillAddendDownOneStep(Opnd1_0, Opnd1_1);
+
+ // Step 3: Try to optimize Opnd0_0 + Opnd0_1 + Opnd1_0 + Opnd1_1
+ if (Opnd0_ExpNum && Opnd1_ExpNum) {
+ AddendVect AllOpnds;
+ AllOpnds.push_back(&Opnd0_0);
+ AllOpnds.push_back(&Opnd1_0);
+ if (Opnd0_ExpNum == 2)
+ AllOpnds.push_back(&Opnd0_1);
+ if (Opnd1_ExpNum == 2)
+ AllOpnds.push_back(&Opnd1_1);
+
+ // Compute instruction quota. We should save at least one instruction.
+ unsigned InstQuota = 0;
+
+ Value *V0 = I->getOperand(0);
+ Value *V1 = I->getOperand(1);
+ InstQuota = ((!isa<Constant>(V0) && V0->hasOneUse()) &&
+ (!isa<Constant>(V1) && V1->hasOneUse())) ? 2 : 1;
+
+ if (Value *R = simplifyFAdd(AllOpnds, InstQuota))
+ return R;
+ }
+
+ if (OpndNum != 2) {
+ // The input instruction is : "I=0.0 +/- V". If the "V" were able to be
+ // splitted into two addends, say "V = X - Y", the instruction would have
+ // been optimized into "I = Y - X" in the previous steps.
+ //
+ const FAddendCoef &CE = Opnd0.getCoef();
+ return CE.isOne() ? Opnd0.getSymVal() : nullptr;
+ }
+
+ // step 4: Try to optimize Opnd0 + Opnd1_0 [+ Opnd1_1]
+ if (Opnd1_ExpNum) {
+ AddendVect AllOpnds;
+ AllOpnds.push_back(&Opnd0);
+ AllOpnds.push_back(&Opnd1_0);
+ if (Opnd1_ExpNum == 2)
+ AllOpnds.push_back(&Opnd1_1);
+
+ if (Value *R = simplifyFAdd(AllOpnds, 1))
+ return R;
+ }
+
+ // step 5: Try to optimize Opnd1 + Opnd0_0 [+ Opnd0_1]
+ if (Opnd0_ExpNum) {
+ AddendVect AllOpnds;
+ AllOpnds.push_back(&Opnd1);
+ AllOpnds.push_back(&Opnd0_0);
+ if (Opnd0_ExpNum == 2)
+ AllOpnds.push_back(&Opnd0_1);
+
+ if (Value *R = simplifyFAdd(AllOpnds, 1))
+ return R;
+ }
+
+ // step 6: Try factorization as the last resort,
+ return performFactorization(I);
+}
+
+Value *FAddCombine::simplifyFAdd(AddendVect& Addends, unsigned InstrQuota) {
+ unsigned AddendNum = Addends.size();
+ assert(AddendNum <= 4 && "Too many addends");
+
+ // For saving intermediate results;
+ unsigned NextTmpIdx = 0;
+ FAddend TmpResult[3];
+
+ // Points to the constant addend of the resulting simplified expression.
+ // If the resulting expr has constant-addend, this constant-addend is
+ // desirable to reside at the top of the resulting expression tree. Placing
+ // constant close to supper-expr(s) will potentially reveal some optimization
+ // opportunities in super-expr(s).
+ //
+ const FAddend *ConstAdd = nullptr;
+
+ // Simplified addends are placed <SimpVect>.
+ AddendVect SimpVect;
+
+ // The outer loop works on one symbolic-value at a time. Suppose the input
+ // addends are : <a1, x>, <b1, y>, <a2, x>, <c1, z>, <b2, y>, ...
+ // The symbolic-values will be processed in this order: x, y, z.
+ //
+ for (unsigned SymIdx = 0; SymIdx < AddendNum; SymIdx++) {
+
+ const FAddend *ThisAddend = Addends[SymIdx];
+ if (!ThisAddend) {
+ // This addend was processed before.
+ continue;
+ }
+
+ Value *Val = ThisAddend->getSymVal();
+ unsigned StartIdx = SimpVect.size();
+ SimpVect.push_back(ThisAddend);
+
+ // The inner loop collects addends sharing same symbolic-value, and these
+ // addends will be later on folded into a single addend. Following above
+ // example, if the symbolic value "y" is being processed, the inner loop
+ // will collect two addends "<b1,y>" and "<b2,Y>". These two addends will
+ // be later on folded into "<b1+b2, y>".
+ //
+ for (unsigned SameSymIdx = SymIdx + 1;
+ SameSymIdx < AddendNum; SameSymIdx++) {
+ const FAddend *T = Addends[SameSymIdx];
+ if (T && T->getSymVal() == Val) {
+ // Set null such that next iteration of the outer loop will not process
+ // this addend again.
+ Addends[SameSymIdx] = nullptr;
+ SimpVect.push_back(T);
+ }
+ }
+
+ // If multiple addends share same symbolic value, fold them together.
+ if (StartIdx + 1 != SimpVect.size()) {
+ FAddend &R = TmpResult[NextTmpIdx ++];
+ R = *SimpVect[StartIdx];
+ for (unsigned Idx = StartIdx + 1; Idx < SimpVect.size(); Idx++)
+ R += *SimpVect[Idx];
+
+ // Pop all addends being folded and push the resulting folded addend.
+ SimpVect.resize(StartIdx);
+ if (Val) {
+ if (!R.isZero()) {
+ SimpVect.push_back(&R);
+ }
+ } else {
+ // Don't push constant addend at this time. It will be the last element
+ // of <SimpVect>.
+ ConstAdd = &R;
+ }
+ }
+ }
+
+ assert((NextTmpIdx <= array_lengthof(TmpResult) + 1) &&
+ "out-of-bound access");
+
+ if (ConstAdd)
+ SimpVect.push_back(ConstAdd);
+
+ Value *Result;
+ if (!SimpVect.empty())
+ Result = createNaryFAdd(SimpVect, InstrQuota);
+ else {
+ // The addition is folded to 0.0.
+ Result = ConstantFP::get(Instr->getType(), 0.0);
+ }
+
+ return Result;
+}
+
+Value *FAddCombine::createNaryFAdd
+ (const AddendVect &Opnds, unsigned InstrQuota) {
+ assert(!Opnds.empty() && "Expect at least one addend");
+
+ // Step 1: Check if the # of instructions needed exceeds the quota.
+ //
+ unsigned InstrNeeded = calcInstrNumber(Opnds);
+ if (InstrNeeded > InstrQuota)
+ return nullptr;
+
+ initCreateInstNum();
+
+ // step 2: Emit the N-ary addition.
+ // Note that at most three instructions are involved in Fadd-InstCombine: the
+ // addition in question, and at most two neighboring instructions.
+ // The resulting optimized addition should have at least one less instruction
+ // than the original addition expression tree. This implies that the resulting
+ // N-ary addition has at most two instructions, and we don't need to worry
+ // about tree-height when constructing the N-ary addition.
+
+ Value *LastVal = nullptr;
+ bool LastValNeedNeg = false;
+
+ // Iterate the addends, creating fadd/fsub using adjacent two addends.
+ for (const FAddend *Opnd : Opnds) {
+ bool NeedNeg;
+ Value *V = createAddendVal(*Opnd, NeedNeg);
+ if (!LastVal) {
+ LastVal = V;
+ LastValNeedNeg = NeedNeg;
+ continue;
+ }
+
+ if (LastValNeedNeg == NeedNeg) {
+ LastVal = createFAdd(LastVal, V);
+ continue;
+ }
+
+ if (LastValNeedNeg)
+ LastVal = createFSub(V, LastVal);
+ else
+ LastVal = createFSub(LastVal, V);
+
+ LastValNeedNeg = false;
+ }
+
+ if (LastValNeedNeg) {
+ LastVal = createFNeg(LastVal);
+ }
+
+ #ifndef NDEBUG
+ assert(CreateInstrNum == InstrNeeded &&
+ "Inconsistent in instruction numbers");
+ #endif
+
+ return LastVal;
+}
+
+Value *FAddCombine::createFSub(Value *Opnd0, Value *Opnd1) {
+ Value *V = Builder->CreateFSub(Opnd0, Opnd1);
+ if (Instruction *I = dyn_cast<Instruction>(V))
+ createInstPostProc(I);
+ return V;
+}
+
+Value *FAddCombine::createFNeg(Value *V) {
+ Value *Zero = cast<Value>(ConstantFP::getZeroValueForNegation(V->getType()));
+ Value *NewV = createFSub(Zero, V);
+ if (Instruction *I = dyn_cast<Instruction>(NewV))
+ createInstPostProc(I, true); // fneg's don't receive instruction numbers.
+ return NewV;
+}
+
+Value *FAddCombine::createFAdd(Value *Opnd0, Value *Opnd1) {
+ Value *V = Builder->CreateFAdd(Opnd0, Opnd1);
+ if (Instruction *I = dyn_cast<Instruction>(V))
+ createInstPostProc(I);
+ return V;
+}
+
+Value *FAddCombine::createFMul(Value *Opnd0, Value *Opnd1) {
+ Value *V = Builder->CreateFMul(Opnd0, Opnd1);
+ if (Instruction *I = dyn_cast<Instruction>(V))
+ createInstPostProc(I);
+ return V;
+}
+
+Value *FAddCombine::createFDiv(Value *Opnd0, Value *Opnd1) {
+ Value *V = Builder->CreateFDiv(Opnd0, Opnd1);
+ if (Instruction *I = dyn_cast<Instruction>(V))
+ createInstPostProc(I);
+ return V;
+}
+
+void FAddCombine::createInstPostProc(Instruction *NewInstr, bool NoNumber) {
+ NewInstr->setDebugLoc(Instr->getDebugLoc());
+
+ // Keep track of the number of instruction created.
+ if (!NoNumber)
+ incCreateInstNum();
+
+ // Propagate fast-math flags
+ NewInstr->setFastMathFlags(Instr->getFastMathFlags());
+}
+
+// Return the number of instruction needed to emit the N-ary addition.
+// NOTE: Keep this function in sync with createAddendVal().
+unsigned FAddCombine::calcInstrNumber(const AddendVect &Opnds) {
+ unsigned OpndNum = Opnds.size();
+ unsigned InstrNeeded = OpndNum - 1;
+
+ // The number of addends in the form of "(-1)*x".
+ unsigned NegOpndNum = 0;
+
+ // Adjust the number of instructions needed to emit the N-ary add.
+ for (const FAddend *Opnd : Opnds) {
+ if (Opnd->isConstant())
+ continue;
+
+ const FAddendCoef &CE = Opnd->getCoef();
+ if (CE.isMinusOne() || CE.isMinusTwo())
+ NegOpndNum++;
+
+ // Let the addend be "c * x". If "c == +/-1", the value of the addend
+ // is immediately available; otherwise, it needs exactly one instruction
+ // to evaluate the value.
+ if (!CE.isMinusOne() && !CE.isOne())
+ InstrNeeded++;
+ }
+ if (NegOpndNum == OpndNum)
+ InstrNeeded++;
+ return InstrNeeded;
+}
+
+// Input Addend Value NeedNeg(output)
+// ================================================================
+// Constant C C false
+// <+/-1, V> V coefficient is -1
+// <2/-2, V> "fadd V, V" coefficient is -2
+// <C, V> "fmul V, C" false
+//
+// NOTE: Keep this function in sync with FAddCombine::calcInstrNumber.
+Value *FAddCombine::createAddendVal(const FAddend &Opnd, bool &NeedNeg) {
+ const FAddendCoef &Coeff = Opnd.getCoef();
+
+ if (Opnd.isConstant()) {
+ NeedNeg = false;
+ return Coeff.getValue(Instr->getType());
+ }
+
+ Value *OpndVal = Opnd.getSymVal();
+
+ if (Coeff.isMinusOne() || Coeff.isOne()) {
+ NeedNeg = Coeff.isMinusOne();
+ return OpndVal;
+ }
+
+ if (Coeff.isTwo() || Coeff.isMinusTwo()) {
+ NeedNeg = Coeff.isMinusTwo();
+ return createFAdd(OpndVal, OpndVal);
+ }
+
+ NeedNeg = false;
+ return createFMul(OpndVal, Coeff.getValue(Instr->getType()));
+}
+
+// If one of the operands only has one non-zero bit, and if the other
+// operand has a known-zero bit in a more significant place than it (not
+// including the sign bit) the ripple may go up to and fill the zero, but
+// won't change the sign. For example, (X & ~4) + 1.
+static bool checkRippleForAdd(const APInt &Op0KnownZero,
+ const APInt &Op1KnownZero) {
+ APInt Op1MaybeOne = ~Op1KnownZero;
+ // Make sure that one of the operand has at most one bit set to 1.
+ if (Op1MaybeOne.countPopulation() != 1)
+ return false;
+
+ // Find the most significant known 0 other than the sign bit.
+ int BitWidth = Op0KnownZero.getBitWidth();
+ APInt Op0KnownZeroTemp(Op0KnownZero);
+ Op0KnownZeroTemp.clearBit(BitWidth - 1);
+ int Op0ZeroPosition = BitWidth - Op0KnownZeroTemp.countLeadingZeros() - 1;
+
+ int Op1OnePosition = BitWidth - Op1MaybeOne.countLeadingZeros() - 1;
+ assert(Op1OnePosition >= 0);
+
+ // This also covers the case of no known zero, since in that case
+ // Op0ZeroPosition is -1.
+ return Op0ZeroPosition >= Op1OnePosition;
+}
+
+/// Return true if we can prove that:
+/// (sext (add LHS, RHS)) === (add (sext LHS), (sext RHS))
+/// This basically requires proving that the add in the original type would not
+/// overflow to change the sign bit or have a carry out.
+bool InstCombiner::WillNotOverflowSignedAdd(Value *LHS, Value *RHS,
+ Instruction &CxtI) {
+ // There are different heuristics we can use for this. Here are some simple
+ // ones.
+
+ // If LHS and RHS each have at least two sign bits, the addition will look
+ // like
+ //
+ // XX..... +
+ // YY.....
+ //
+ // If the carry into the most significant position is 0, X and Y can't both
+ // be 1 and therefore the carry out of the addition is also 0.
+ //
+ // If the carry into the most significant position is 1, X and Y can't both
+ // be 0 and therefore the carry out of the addition is also 1.
+ //
+ // Since the carry into the most significant position is always equal to
+ // the carry out of the addition, there is no signed overflow.
+ if (ComputeNumSignBits(LHS, 0, &CxtI) > 1 &&
+ ComputeNumSignBits(RHS, 0, &CxtI) > 1)
+ return true;
+
+ unsigned BitWidth = LHS->getType()->getScalarSizeInBits();
+ APInt LHSKnownZero(BitWidth, 0);
+ APInt LHSKnownOne(BitWidth, 0);
+ computeKnownBits(LHS, LHSKnownZero, LHSKnownOne, 0, &CxtI);
+
+ APInt RHSKnownZero(BitWidth, 0);
+ APInt RHSKnownOne(BitWidth, 0);
+ computeKnownBits(RHS, RHSKnownZero, RHSKnownOne, 0, &CxtI);
+
+ // Addition of two 2's compliment numbers having opposite signs will never
+ // overflow.
+ if ((LHSKnownOne[BitWidth - 1] && RHSKnownZero[BitWidth - 1]) ||
+ (LHSKnownZero[BitWidth - 1] && RHSKnownOne[BitWidth - 1]))
+ return true;
+
+ // Check if carry bit of addition will not cause overflow.
+ if (checkRippleForAdd(LHSKnownZero, RHSKnownZero))
+ return true;
+ if (checkRippleForAdd(RHSKnownZero, LHSKnownZero))
+ return true;
+
+ return false;
+}
+
+/// \brief Return true if we can prove that:
+/// (sub LHS, RHS) === (sub nsw LHS, RHS)
+/// This basically requires proving that the add in the original type would not
+/// overflow to change the sign bit or have a carry out.
+/// TODO: Handle this for Vectors.
+bool InstCombiner::WillNotOverflowSignedSub(Value *LHS, Value *RHS,
+ Instruction &CxtI) {
+ // If LHS and RHS each have at least two sign bits, the subtraction
+ // cannot overflow.
+ if (ComputeNumSignBits(LHS, 0, &CxtI) > 1 &&
+ ComputeNumSignBits(RHS, 0, &CxtI) > 1)
+ return true;
+
+ unsigned BitWidth = LHS->getType()->getScalarSizeInBits();
+ APInt LHSKnownZero(BitWidth, 0);
+ APInt LHSKnownOne(BitWidth, 0);
+ computeKnownBits(LHS, LHSKnownZero, LHSKnownOne, 0, &CxtI);
+
+ APInt RHSKnownZero(BitWidth, 0);
+ APInt RHSKnownOne(BitWidth, 0);
+ computeKnownBits(RHS, RHSKnownZero, RHSKnownOne, 0, &CxtI);
+
+ // Subtraction of two 2's compliment numbers having identical signs will
+ // never overflow.
+ if ((LHSKnownOne[BitWidth - 1] && RHSKnownOne[BitWidth - 1]) ||
+ (LHSKnownZero[BitWidth - 1] && RHSKnownZero[BitWidth - 1]))
+ return true;
+
+ // TODO: implement logic similar to checkRippleForAdd
+ return false;
+}
+
+/// \brief Return true if we can prove that:
+/// (sub LHS, RHS) === (sub nuw LHS, RHS)
+bool InstCombiner::WillNotOverflowUnsignedSub(Value *LHS, Value *RHS,
+ Instruction &CxtI) {
+ // If the LHS is negative and the RHS is non-negative, no unsigned wrap.
+ bool LHSKnownNonNegative, LHSKnownNegative;
+ bool RHSKnownNonNegative, RHSKnownNegative;
+ ComputeSignBit(LHS, LHSKnownNonNegative, LHSKnownNegative, /*Depth=*/0,
+ &CxtI);
+ ComputeSignBit(RHS, RHSKnownNonNegative, RHSKnownNegative, /*Depth=*/0,
+ &CxtI);
+ if (LHSKnownNegative && RHSKnownNonNegative)
+ return true;
+
+ return false;
+}
+
+// Checks if any operand is negative and we can convert add to sub.
+// This function checks for following negative patterns
+// ADD(XOR(OR(Z, NOT(C)), C)), 1) == NEG(AND(Z, C))
+// ADD(XOR(AND(Z, C), C), 1) == NEG(OR(Z, ~C))
+// XOR(AND(Z, C), (C + 1)) == NEG(OR(Z, ~C)) if C is even
+static Value *checkForNegativeOperand(BinaryOperator &I,
+ InstCombiner::BuilderTy *Builder) {
+ Value *LHS = I.getOperand(0), *RHS = I.getOperand(1);
+
+ // This function creates 2 instructions to replace ADD, we need at least one
+ // of LHS or RHS to have one use to ensure benefit in transform.
+ if (!LHS->hasOneUse() && !RHS->hasOneUse())
+ return nullptr;
+
+ Value *X = nullptr, *Y = nullptr, *Z = nullptr;
+ const APInt *C1 = nullptr, *C2 = nullptr;
+
+ // if ONE is on other side, swap
+ if (match(RHS, m_Add(m_Value(X), m_One())))
+ std::swap(LHS, RHS);
+
+ if (match(LHS, m_Add(m_Value(X), m_One()))) {
+ // if XOR on other side, swap
+ if (match(RHS, m_Xor(m_Value(Y), m_APInt(C1))))
+ std::swap(X, RHS);
+
+ if (match(X, m_Xor(m_Value(Y), m_APInt(C1)))) {
+ // X = XOR(Y, C1), Y = OR(Z, C2), C2 = NOT(C1) ==> X == NOT(AND(Z, C1))
+ // ADD(ADD(X, 1), RHS) == ADD(X, ADD(RHS, 1)) == SUB(RHS, AND(Z, C1))
+ if (match(Y, m_Or(m_Value(Z), m_APInt(C2))) && (*C2 == ~(*C1))) {
+ Value *NewAnd = Builder->CreateAnd(Z, *C1);
+ return Builder->CreateSub(RHS, NewAnd, "sub");
+ } else if (match(Y, m_And(m_Value(Z), m_APInt(C2))) && (*C1 == *C2)) {
+ // X = XOR(Y, C1), Y = AND(Z, C2), C2 == C1 ==> X == NOT(OR(Z, ~C1))
+ // ADD(ADD(X, 1), RHS) == ADD(X, ADD(RHS, 1)) == SUB(RHS, OR(Z, ~C1))
+ Value *NewOr = Builder->CreateOr(Z, ~(*C1));
+ return Builder->CreateSub(RHS, NewOr, "sub");
+ }
+ }
+ }
+
+ // Restore LHS and RHS
+ LHS = I.getOperand(0);
+ RHS = I.getOperand(1);
+
+ // if XOR is on other side, swap
+ if (match(RHS, m_Xor(m_Value(Y), m_APInt(C1))))
+ std::swap(LHS, RHS);
+
+ // C2 is ODD
+ // LHS = XOR(Y, C1), Y = AND(Z, C2), C1 == (C2 + 1) => LHS == NEG(OR(Z, ~C2))
+ // ADD(LHS, RHS) == SUB(RHS, OR(Z, ~C2))
+ if (match(LHS, m_Xor(m_Value(Y), m_APInt(C1))))
+ if (C1->countTrailingZeros() == 0)
+ if (match(Y, m_And(m_Value(Z), m_APInt(C2))) && *C1 == (*C2 + 1)) {
+ Value *NewOr = Builder->CreateOr(Z, ~(*C2));
+ return Builder->CreateSub(RHS, NewOr, "sub");
+ }
+ return nullptr;
+}
+
+Instruction *InstCombiner::visitAdd(BinaryOperator &I) {
+ bool Changed = SimplifyAssociativeOrCommutative(I);
+ Value *LHS = I.getOperand(0), *RHS = I.getOperand(1);
+
+ if (Value *V = SimplifyVectorOp(I))
+ return replaceInstUsesWith(I, V);
+
+ if (Value *V = SimplifyAddInst(LHS, RHS, I.hasNoSignedWrap(),
+ I.hasNoUnsignedWrap(), DL, TLI, DT, AC))
+ return replaceInstUsesWith(I, V);
+
+ // (A*B)+(A*C) -> A*(B+C) etc
+ if (Value *V = SimplifyUsingDistributiveLaws(I))
+ return replaceInstUsesWith(I, V);
+
+ const APInt *Val;
+ if (match(RHS, m_APInt(Val))) {
+ // X + (signbit) --> X ^ signbit
+ if (Val->isSignBit())
+ return BinaryOperator::CreateXor(LHS, RHS);
+ }
+
+ // FIXME: Use the match above instead of dyn_cast to allow these transforms
+ // for splat vectors.
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(RHS)) {
+ // See if SimplifyDemandedBits can simplify this. This handles stuff like
+ // (X & 254)+1 -> (X&254)|1
+ if (SimplifyDemandedInstructionBits(I))
+ return &I;
+
+ // zext(bool) + C -> bool ? C + 1 : C
+ if (ZExtInst *ZI = dyn_cast<ZExtInst>(LHS))
+ if (ZI->getSrcTy()->isIntegerTy(1))
+ return SelectInst::Create(ZI->getOperand(0), AddOne(CI), CI);
+
+ Value *XorLHS = nullptr; ConstantInt *XorRHS = nullptr;
+ if (match(LHS, m_Xor(m_Value(XorLHS), m_ConstantInt(XorRHS)))) {
+ uint32_t TySizeBits = I.getType()->getScalarSizeInBits();
+ const APInt &RHSVal = CI->getValue();
+ unsigned ExtendAmt = 0;
+ // If we have ADD(XOR(AND(X, 0xFF), 0x80), 0xF..F80), it's a sext.
+ // If we have ADD(XOR(AND(X, 0xFF), 0xF..F80), 0x80), it's a sext.
+ if (XorRHS->getValue() == -RHSVal) {
+ if (RHSVal.isPowerOf2())
+ ExtendAmt = TySizeBits - RHSVal.logBase2() - 1;
+ else if (XorRHS->getValue().isPowerOf2())
+ ExtendAmt = TySizeBits - XorRHS->getValue().logBase2() - 1;
+ }
+
+ if (ExtendAmt) {
+ APInt Mask = APInt::getHighBitsSet(TySizeBits, ExtendAmt);
+ if (!MaskedValueIsZero(XorLHS, Mask, 0, &I))
+ ExtendAmt = 0;
+ }
+
+ if (ExtendAmt) {
+ Constant *ShAmt = ConstantInt::get(I.getType(), ExtendAmt);
+ Value *NewShl = Builder->CreateShl(XorLHS, ShAmt, "sext");
+ return BinaryOperator::CreateAShr(NewShl, ShAmt);
+ }
+
+ // If this is a xor that was canonicalized from a sub, turn it back into
+ // a sub and fuse this add with it.
+ if (LHS->hasOneUse() && (XorRHS->getValue()+1).isPowerOf2()) {
+ IntegerType *IT = cast<IntegerType>(I.getType());
+ APInt LHSKnownOne(IT->getBitWidth(), 0);
+ APInt LHSKnownZero(IT->getBitWidth(), 0);
+ computeKnownBits(XorLHS, LHSKnownZero, LHSKnownOne, 0, &I);
+ if ((XorRHS->getValue() | LHSKnownZero).isAllOnesValue())
+ return BinaryOperator::CreateSub(ConstantExpr::getAdd(XorRHS, CI),
+ XorLHS);
+ }
+ // (X + signbit) + C could have gotten canonicalized to (X ^ signbit) + C,
+ // transform them into (X + (signbit ^ C))
+ if (XorRHS->getValue().isSignBit())
+ return BinaryOperator::CreateAdd(XorLHS,
+ ConstantExpr::getXor(XorRHS, CI));
+ }
+ }
+
+ if (isa<Constant>(RHS) && isa<PHINode>(LHS))
+ if (Instruction *NV = FoldOpIntoPhi(I))
+ return NV;
+
+ if (I.getType()->getScalarType()->isIntegerTy(1))
+ return BinaryOperator::CreateXor(LHS, RHS);
+
+ // X + X --> X << 1
+ if (LHS == RHS) {
+ BinaryOperator *New =
+ BinaryOperator::CreateShl(LHS, ConstantInt::get(I.getType(), 1));
+ New->setHasNoSignedWrap(I.hasNoSignedWrap());
+ New->setHasNoUnsignedWrap(I.hasNoUnsignedWrap());
+ return New;
+ }
+
+ // -A + B --> B - A
+ // -A + -B --> -(A + B)
+ if (Value *LHSV = dyn_castNegVal(LHS)) {
+ if (!isa<Constant>(RHS))
+ if (Value *RHSV = dyn_castNegVal(RHS)) {
+ Value *NewAdd = Builder->CreateAdd(LHSV, RHSV, "sum");
+ return BinaryOperator::CreateNeg(NewAdd);
+ }
+
+ return BinaryOperator::CreateSub(RHS, LHSV);
+ }
+
+ // A + -B --> A - B
+ if (!isa<Constant>(RHS))
+ if (Value *V = dyn_castNegVal(RHS))
+ return BinaryOperator::CreateSub(LHS, V);
+
+ if (Value *V = checkForNegativeOperand(I, Builder))
+ return replaceInstUsesWith(I, V);
+
+ // A+B --> A|B iff A and B have no bits set in common.
+ if (haveNoCommonBitsSet(LHS, RHS, DL, AC, &I, DT))
+ return BinaryOperator::CreateOr(LHS, RHS);
+
+ if (Constant *CRHS = dyn_cast<Constant>(RHS)) {
+ Value *X;
+ if (match(LHS, m_Not(m_Value(X)))) // ~X + C --> (C-1) - X
+ return BinaryOperator::CreateSub(SubOne(CRHS), X);
+ }
+
+ // FIXME: We already did a check for ConstantInt RHS above this.
+ // FIXME: Is this pattern covered by another fold? No regression tests fail on
+ // removal.
+ if (ConstantInt *CRHS = dyn_cast<ConstantInt>(RHS)) {
+ // (X & FF00) + xx00 -> (X+xx00) & FF00
+ Value *X;
+ ConstantInt *C2;
+ if (LHS->hasOneUse() &&
+ match(LHS, m_And(m_Value(X), m_ConstantInt(C2))) &&
+ CRHS->getValue() == (CRHS->getValue() & C2->getValue())) {
+ // See if all bits from the first bit set in the Add RHS up are included
+ // in the mask. First, get the rightmost bit.
+ const APInt &AddRHSV = CRHS->getValue();
+
+ // Form a mask of all bits from the lowest bit added through the top.
+ APInt AddRHSHighBits(~((AddRHSV & -AddRHSV)-1));
+
+ // See if the and mask includes all of these bits.
+ APInt AddRHSHighBitsAnd(AddRHSHighBits & C2->getValue());
+
+ if (AddRHSHighBits == AddRHSHighBitsAnd) {
+ // Okay, the xform is safe. Insert the new add pronto.
+ Value *NewAdd = Builder->CreateAdd(X, CRHS, LHS->getName());
+ return BinaryOperator::CreateAnd(NewAdd, C2);
+ }
+ }
+
+ // Try to fold constant add into select arguments.
+ if (SelectInst *SI = dyn_cast<SelectInst>(LHS))
+ if (Instruction *R = FoldOpIntoSelect(I, SI))
+ return R;
+ }
+
+ // add (select X 0 (sub n A)) A --> select X A n
+ {
+ SelectInst *SI = dyn_cast<SelectInst>(LHS);
+ Value *A = RHS;
+ if (!SI) {
+ SI = dyn_cast<SelectInst>(RHS);
+ A = LHS;
+ }
+ if (SI && SI->hasOneUse()) {
+ Value *TV = SI->getTrueValue();
+ Value *FV = SI->getFalseValue();
+ Value *N;
+
+ // Can we fold the add into the argument of the select?
+ // We check both true and false select arguments for a matching subtract.
+ if (match(FV, m_Zero()) && match(TV, m_Sub(m_Value(N), m_Specific(A))))
+ // Fold the add into the true select value.
+ return SelectInst::Create(SI->getCondition(), N, A);
+
+ if (match(TV, m_Zero()) && match(FV, m_Sub(m_Value(N), m_Specific(A))))
+ // Fold the add into the false select value.
+ return SelectInst::Create(SI->getCondition(), A, N);
+ }
+ }
+
+ // Check for (add (sext x), y), see if we can merge this into an
+ // integer add followed by a sext.
+ if (SExtInst *LHSConv = dyn_cast<SExtInst>(LHS)) {
+ // (add (sext x), cst) --> (sext (add x, cst'))
+ if (ConstantInt *RHSC = dyn_cast<ConstantInt>(RHS)) {
+ Constant *CI =
+ ConstantExpr::getTrunc(RHSC, LHSConv->getOperand(0)->getType());
+ if (LHSConv->hasOneUse() &&
+ ConstantExpr::getSExt(CI, I.getType()) == RHSC &&
+ WillNotOverflowSignedAdd(LHSConv->getOperand(0), CI, I)) {
+ // Insert the new, smaller add.
+ Value *NewAdd = Builder->CreateNSWAdd(LHSConv->getOperand(0),
+ CI, "addconv");
+ return new SExtInst(NewAdd, I.getType());
+ }
+ }
+
+ // (add (sext x), (sext y)) --> (sext (add int x, y))
+ if (SExtInst *RHSConv = dyn_cast<SExtInst>(RHS)) {
+ // Only do this if x/y have the same type, if at last one of them has a
+ // single use (so we don't increase the number of sexts), and if the
+ // integer add will not overflow.
+ if (LHSConv->getOperand(0)->getType() ==
+ RHSConv->getOperand(0)->getType() &&
+ (LHSConv->hasOneUse() || RHSConv->hasOneUse()) &&
+ WillNotOverflowSignedAdd(LHSConv->getOperand(0),
+ RHSConv->getOperand(0), I)) {
+ // Insert the new integer add.
+ Value *NewAdd = Builder->CreateNSWAdd(LHSConv->getOperand(0),
+ RHSConv->getOperand(0), "addconv");
+ return new SExtInst(NewAdd, I.getType());
+ }
+ }
+ }
+
+ // (add (xor A, B) (and A, B)) --> (or A, B)
+ {
+ Value *A = nullptr, *B = nullptr;
+ if (match(RHS, m_Xor(m_Value(A), m_Value(B))) &&
+ (match(LHS, m_And(m_Specific(A), m_Specific(B))) ||
+ match(LHS, m_And(m_Specific(B), m_Specific(A)))))
+ return BinaryOperator::CreateOr(A, B);
+
+ if (match(LHS, m_Xor(m_Value(A), m_Value(B))) &&
+ (match(RHS, m_And(m_Specific(A), m_Specific(B))) ||
+ match(RHS, m_And(m_Specific(B), m_Specific(A)))))
+ return BinaryOperator::CreateOr(A, B);
+ }
+
+ // (add (or A, B) (and A, B)) --> (add A, B)
+ {
+ Value *A = nullptr, *B = nullptr;
+ if (match(RHS, m_Or(m_Value(A), m_Value(B))) &&
+ (match(LHS, m_And(m_Specific(A), m_Specific(B))) ||
+ match(LHS, m_And(m_Specific(B), m_Specific(A))))) {
+ auto *New = BinaryOperator::CreateAdd(A, B);
+ New->setHasNoSignedWrap(I.hasNoSignedWrap());
+ New->setHasNoUnsignedWrap(I.hasNoUnsignedWrap());
+ return New;
+ }
+
+ if (match(LHS, m_Or(m_Value(A), m_Value(B))) &&
+ (match(RHS, m_And(m_Specific(A), m_Specific(B))) ||
+ match(RHS, m_And(m_Specific(B), m_Specific(A))))) {
+ auto *New = BinaryOperator::CreateAdd(A, B);
+ New->setHasNoSignedWrap(I.hasNoSignedWrap());
+ New->setHasNoUnsignedWrap(I.hasNoUnsignedWrap());
+ return New;
+ }
+ }
+
+ // TODO(jingyue): Consider WillNotOverflowSignedAdd and
+ // WillNotOverflowUnsignedAdd to reduce the number of invocations of
+ // computeKnownBits.
+ if (!I.hasNoSignedWrap() && WillNotOverflowSignedAdd(LHS, RHS, I)) {
+ Changed = true;
+ I.setHasNoSignedWrap(true);
+ }
+ if (!I.hasNoUnsignedWrap() &&
+ computeOverflowForUnsignedAdd(LHS, RHS, &I) ==
+ OverflowResult::NeverOverflows) {
+ Changed = true;
+ I.setHasNoUnsignedWrap(true);
+ }
+
+ return Changed ? &I : nullptr;
+}
+
+Instruction *InstCombiner::visitFAdd(BinaryOperator &I) {
+ bool Changed = SimplifyAssociativeOrCommutative(I);
+ Value *LHS = I.getOperand(0), *RHS = I.getOperand(1);
+
+ if (Value *V = SimplifyVectorOp(I))
+ return replaceInstUsesWith(I, V);
+
+ if (Value *V =
+ SimplifyFAddInst(LHS, RHS, I.getFastMathFlags(), DL, TLI, DT, AC))
+ return replaceInstUsesWith(I, V);
+
+ if (isa<Constant>(RHS)) {
+ if (isa<PHINode>(LHS))
+ if (Instruction *NV = FoldOpIntoPhi(I))
+ return NV;
+
+ if (SelectInst *SI = dyn_cast<SelectInst>(LHS))
+ if (Instruction *NV = FoldOpIntoSelect(I, SI))
+ return NV;
+ }
+
+ // -A + B --> B - A
+ // -A + -B --> -(A + B)
+ if (Value *LHSV = dyn_castFNegVal(LHS)) {
+ Instruction *RI = BinaryOperator::CreateFSub(RHS, LHSV);
+ RI->copyFastMathFlags(&I);
+ return RI;
+ }
+
+ // A + -B --> A - B
+ if (!isa<Constant>(RHS))
+ if (Value *V = dyn_castFNegVal(RHS)) {
+ Instruction *RI = BinaryOperator::CreateFSub(LHS, V);
+ RI->copyFastMathFlags(&I);
+ return RI;
+ }
+
+ // Check for (fadd double (sitofp x), y), see if we can merge this into an
+ // integer add followed by a promotion.
+ if (SIToFPInst *LHSConv = dyn_cast<SIToFPInst>(LHS)) {
+ // (fadd double (sitofp x), fpcst) --> (sitofp (add int x, intcst))
+ // ... if the constant fits in the integer value. This is useful for things
+ // like (double)(x & 1234) + 4.0 -> (double)((X & 1234)+4) which no longer
+ // requires a constant pool load, and generally allows the add to be better
+ // instcombined.
+ if (ConstantFP *CFP = dyn_cast<ConstantFP>(RHS)) {
+ Constant *CI =
+ ConstantExpr::getFPToSI(CFP, LHSConv->getOperand(0)->getType());
+ if (LHSConv->hasOneUse() &&
+ ConstantExpr::getSIToFP(CI, I.getType()) == CFP &&
+ WillNotOverflowSignedAdd(LHSConv->getOperand(0), CI, I)) {
+ // Insert the new integer add.
+ Value *NewAdd = Builder->CreateNSWAdd(LHSConv->getOperand(0),
+ CI, "addconv");
+ return new SIToFPInst(NewAdd, I.getType());
+ }
+ }
+
+ // (fadd double (sitofp x), (sitofp y)) --> (sitofp (add int x, y))
+ if (SIToFPInst *RHSConv = dyn_cast<SIToFPInst>(RHS)) {
+ // Only do this if x/y have the same type, if at last one of them has a
+ // single use (so we don't increase the number of int->fp conversions),
+ // and if the integer add will not overflow.
+ if (LHSConv->getOperand(0)->getType() ==
+ RHSConv->getOperand(0)->getType() &&
+ (LHSConv->hasOneUse() || RHSConv->hasOneUse()) &&
+ WillNotOverflowSignedAdd(LHSConv->getOperand(0),
+ RHSConv->getOperand(0), I)) {
+ // Insert the new integer add.
+ Value *NewAdd = Builder->CreateNSWAdd(LHSConv->getOperand(0),
+ RHSConv->getOperand(0),"addconv");
+ return new SIToFPInst(NewAdd, I.getType());
+ }
+ }
+ }
+
+ // select C, 0, B + select C, A, 0 -> select C, A, B
+ {
+ Value *A1, *B1, *C1, *A2, *B2, *C2;
+ if (match(LHS, m_Select(m_Value(C1), m_Value(A1), m_Value(B1))) &&
+ match(RHS, m_Select(m_Value(C2), m_Value(A2), m_Value(B2)))) {
+ if (C1 == C2) {
+ Constant *Z1=nullptr, *Z2=nullptr;
+ Value *A, *B, *C=C1;
+ if (match(A1, m_AnyZero()) && match(B2, m_AnyZero())) {
+ Z1 = dyn_cast<Constant>(A1); A = A2;
+ Z2 = dyn_cast<Constant>(B2); B = B1;
+ } else if (match(B1, m_AnyZero()) && match(A2, m_AnyZero())) {
+ Z1 = dyn_cast<Constant>(B1); B = B2;
+ Z2 = dyn_cast<Constant>(A2); A = A1;
+ }
+
+ if (Z1 && Z2 &&
+ (I.hasNoSignedZeros() ||
+ (Z1->isNegativeZeroValue() && Z2->isNegativeZeroValue()))) {
+ return SelectInst::Create(C, A, B);
+ }
+ }
+ }
+ }
+
+ if (I.hasUnsafeAlgebra()) {
+ if (Value *V = FAddCombine(Builder).simplify(&I))
+ return replaceInstUsesWith(I, V);
+ }
+
+ return Changed ? &I : nullptr;
+}
+
+/// Optimize pointer differences into the same array into a size. Consider:
+/// &A[10] - &A[0]: we should compile this to "10". LHS/RHS are the pointer
+/// operands to the ptrtoint instructions for the LHS/RHS of the subtract.
+///
+Value *InstCombiner::OptimizePointerDifference(Value *LHS, Value *RHS,
+ Type *Ty) {
+ // If LHS is a gep based on RHS or RHS is a gep based on LHS, we can optimize
+ // this.
+ bool Swapped = false;
+ GEPOperator *GEP1 = nullptr, *GEP2 = nullptr;
+
+ // For now we require one side to be the base pointer "A" or a constant
+ // GEP derived from it.
+ if (GEPOperator *LHSGEP = dyn_cast<GEPOperator>(LHS)) {
+ // (gep X, ...) - X
+ if (LHSGEP->getOperand(0) == RHS) {
+ GEP1 = LHSGEP;
+ Swapped = false;
+ } else if (GEPOperator *RHSGEP = dyn_cast<GEPOperator>(RHS)) {
+ // (gep X, ...) - (gep X, ...)
+ if (LHSGEP->getOperand(0)->stripPointerCasts() ==
+ RHSGEP->getOperand(0)->stripPointerCasts()) {
+ GEP2 = RHSGEP;
+ GEP1 = LHSGEP;
+ Swapped = false;
+ }
+ }
+ }
+
+ if (GEPOperator *RHSGEP = dyn_cast<GEPOperator>(RHS)) {
+ // X - (gep X, ...)
+ if (RHSGEP->getOperand(0) == LHS) {
+ GEP1 = RHSGEP;
+ Swapped = true;
+ } else if (GEPOperator *LHSGEP = dyn_cast<GEPOperator>(LHS)) {
+ // (gep X, ...) - (gep X, ...)
+ if (RHSGEP->getOperand(0)->stripPointerCasts() ==
+ LHSGEP->getOperand(0)->stripPointerCasts()) {
+ GEP2 = LHSGEP;
+ GEP1 = RHSGEP;
+ Swapped = true;
+ }
+ }
+ }
+
+ // Avoid duplicating the arithmetic if GEP2 has non-constant indices and
+ // multiple users.
+ if (!GEP1 ||
+ (GEP2 && !GEP2->hasAllConstantIndices() && !GEP2->hasOneUse()))
+ return nullptr;
+
+ // Emit the offset of the GEP and an intptr_t.
+ Value *Result = EmitGEPOffset(GEP1);
+
+ // If we had a constant expression GEP on the other side offsetting the
+ // pointer, subtract it from the offset we have.
+ if (GEP2) {
+ Value *Offset = EmitGEPOffset(GEP2);
+ Result = Builder->CreateSub(Result, Offset);
+ }
+
+ // If we have p - gep(p, ...) then we have to negate the result.
+ if (Swapped)
+ Result = Builder->CreateNeg(Result, "diff.neg");
+
+ return Builder->CreateIntCast(Result, Ty, true);
+}
+
+Instruction *InstCombiner::visitSub(BinaryOperator &I) {
+ Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
+
+ if (Value *V = SimplifyVectorOp(I))
+ return replaceInstUsesWith(I, V);
+
+ if (Value *V = SimplifySubInst(Op0, Op1, I.hasNoSignedWrap(),
+ I.hasNoUnsignedWrap(), DL, TLI, DT, AC))
+ return replaceInstUsesWith(I, V);
+
+ // (A*B)-(A*C) -> A*(B-C) etc
+ if (Value *V = SimplifyUsingDistributiveLaws(I))
+ return replaceInstUsesWith(I, V);
+
+ // If this is a 'B = x-(-A)', change to B = x+A.
+ if (Value *V = dyn_castNegVal(Op1)) {
+ BinaryOperator *Res = BinaryOperator::CreateAdd(Op0, V);
+
+ if (const auto *BO = dyn_cast<BinaryOperator>(Op1)) {
+ assert(BO->getOpcode() == Instruction::Sub &&
+ "Expected a subtraction operator!");
+ if (BO->hasNoSignedWrap() && I.hasNoSignedWrap())
+ Res->setHasNoSignedWrap(true);
+ } else {
+ if (cast<Constant>(Op1)->isNotMinSignedValue() && I.hasNoSignedWrap())
+ Res->setHasNoSignedWrap(true);
+ }
+
+ return Res;
+ }
+
+ if (I.getType()->isIntegerTy(1))
+ return BinaryOperator::CreateXor(Op0, Op1);
+
+ // Replace (-1 - A) with (~A).
+ if (match(Op0, m_AllOnes()))
+ return BinaryOperator::CreateNot(Op1);
+
+ if (Constant *C = dyn_cast<Constant>(Op0)) {
+ // C - ~X == X + (1+C)
+ Value *X = nullptr;
+ if (match(Op1, m_Not(m_Value(X))))
+ return BinaryOperator::CreateAdd(X, AddOne(C));
+
+ // Try to fold constant sub into select arguments.
+ if (SelectInst *SI = dyn_cast<SelectInst>(Op1))
+ if (Instruction *R = FoldOpIntoSelect(I, SI))
+ return R;
+
+ // C-(X+C2) --> (C-C2)-X
+ Constant *C2;
+ if (match(Op1, m_Add(m_Value(X), m_Constant(C2))))
+ return BinaryOperator::CreateSub(ConstantExpr::getSub(C, C2), X);
+
+ if (SimplifyDemandedInstructionBits(I))
+ return &I;
+
+ // Fold (sub 0, (zext bool to B)) --> (sext bool to B)
+ if (C->isNullValue() && match(Op1, m_ZExt(m_Value(X))))
+ if (X->getType()->getScalarType()->isIntegerTy(1))
+ return CastInst::CreateSExtOrBitCast(X, Op1->getType());
+
+ // Fold (sub 0, (sext bool to B)) --> (zext bool to B)
+ if (C->isNullValue() && match(Op1, m_SExt(m_Value(X))))
+ if (X->getType()->getScalarType()->isIntegerTy(1))
+ return CastInst::CreateZExtOrBitCast(X, Op1->getType());
+ }
+
+ if (ConstantInt *C = dyn_cast<ConstantInt>(Op0)) {
+ // -(X >>u 31) -> (X >>s 31)
+ // -(X >>s 31) -> (X >>u 31)
+ if (C->isZero()) {
+ Value *X;
+ ConstantInt *CI;
+ if (match(Op1, m_LShr(m_Value(X), m_ConstantInt(CI))) &&
+ // Verify we are shifting out everything but the sign bit.
+ CI->getValue() == I.getType()->getPrimitiveSizeInBits() - 1)
+ return BinaryOperator::CreateAShr(X, CI);
+
+ if (match(Op1, m_AShr(m_Value(X), m_ConstantInt(CI))) &&
+ // Verify we are shifting out everything but the sign bit.
+ CI->getValue() == I.getType()->getPrimitiveSizeInBits() - 1)
+ return BinaryOperator::CreateLShr(X, CI);
+ }
+
+ // Turn this into a xor if LHS is 2^n-1 and the remaining bits are known
+ // zero.
+ APInt IntVal = C->getValue();
+ if ((IntVal + 1).isPowerOf2()) {
+ unsigned BitWidth = I.getType()->getScalarSizeInBits();
+ APInt KnownZero(BitWidth, 0);
+ APInt KnownOne(BitWidth, 0);
+ computeKnownBits(&I, KnownZero, KnownOne, 0, &I);
+ if ((IntVal | KnownZero).isAllOnesValue()) {
+ return BinaryOperator::CreateXor(Op1, C);
+ }
+ }
+ }
+
+ {
+ Value *Y;
+ // X-(X+Y) == -Y X-(Y+X) == -Y
+ if (match(Op1, m_Add(m_Specific(Op0), m_Value(Y))) ||
+ match(Op1, m_Add(m_Value(Y), m_Specific(Op0))))
+ return BinaryOperator::CreateNeg(Y);
+
+ // (X-Y)-X == -Y
+ if (match(Op0, m_Sub(m_Specific(Op1), m_Value(Y))))
+ return BinaryOperator::CreateNeg(Y);
+ }
+
+ // (sub (or A, B) (xor A, B)) --> (and A, B)
+ {
+ Value *A = nullptr, *B = nullptr;
+ if (match(Op1, m_Xor(m_Value(A), m_Value(B))) &&
+ (match(Op0, m_Or(m_Specific(A), m_Specific(B))) ||
+ match(Op0, m_Or(m_Specific(B), m_Specific(A)))))
+ return BinaryOperator::CreateAnd(A, B);
+ }
+
+ if (Op0->hasOneUse()) {
+ Value *Y = nullptr;
+ // ((X | Y) - X) --> (~X & Y)
+ if (match(Op0, m_Or(m_Value(Y), m_Specific(Op1))) ||
+ match(Op0, m_Or(m_Specific(Op1), m_Value(Y))))
+ return BinaryOperator::CreateAnd(
+ Y, Builder->CreateNot(Op1, Op1->getName() + ".not"));
+ }
+
+ if (Op1->hasOneUse()) {
+ Value *X = nullptr, *Y = nullptr, *Z = nullptr;
+ Constant *C = nullptr;
+ Constant *CI = nullptr;
+
+ // (X - (Y - Z)) --> (X + (Z - Y)).
+ if (match(Op1, m_Sub(m_Value(Y), m_Value(Z))))
+ return BinaryOperator::CreateAdd(Op0,
+ Builder->CreateSub(Z, Y, Op1->getName()));
+
+ // (X - (X & Y)) --> (X & ~Y)
+ //
+ if (match(Op1, m_And(m_Value(Y), m_Specific(Op0))) ||
+ match(Op1, m_And(m_Specific(Op0), m_Value(Y))))
+ return BinaryOperator::CreateAnd(Op0,
+ Builder->CreateNot(Y, Y->getName() + ".not"));
+
+ // 0 - (X sdiv C) -> (X sdiv -C) provided the negation doesn't overflow.
+ if (match(Op1, m_SDiv(m_Value(X), m_Constant(C))) && match(Op0, m_Zero()) &&
+ C->isNotMinSignedValue() && !C->isOneValue())
+ return BinaryOperator::CreateSDiv(X, ConstantExpr::getNeg(C));
+
+ // 0 - (X << Y) -> (-X << Y) when X is freely negatable.
+ if (match(Op1, m_Shl(m_Value(X), m_Value(Y))) && match(Op0, m_Zero()))
+ if (Value *XNeg = dyn_castNegVal(X))
+ return BinaryOperator::CreateShl(XNeg, Y);
+
+ // X - A*-B -> X + A*B
+ // X - -A*B -> X + A*B
+ Value *A, *B;
+ if (match(Op1, m_Mul(m_Value(A), m_Neg(m_Value(B)))) ||
+ match(Op1, m_Mul(m_Neg(m_Value(A)), m_Value(B))))
+ return BinaryOperator::CreateAdd(Op0, Builder->CreateMul(A, B));
+
+ // X - A*CI -> X + A*-CI
+ // X - CI*A -> X + A*-CI
+ if (match(Op1, m_Mul(m_Value(A), m_Constant(CI))) ||
+ match(Op1, m_Mul(m_Constant(CI), m_Value(A)))) {
+ Value *NewMul = Builder->CreateMul(A, ConstantExpr::getNeg(CI));
+ return BinaryOperator::CreateAdd(Op0, NewMul);
+ }
+ }
+
+ // Optimize pointer differences into the same array into a size. Consider:
+ // &A[10] - &A[0]: we should compile this to "10".
+ Value *LHSOp, *RHSOp;
+ if (match(Op0, m_PtrToInt(m_Value(LHSOp))) &&
+ match(Op1, m_PtrToInt(m_Value(RHSOp))))
+ if (Value *Res = OptimizePointerDifference(LHSOp, RHSOp, I.getType()))
+ return replaceInstUsesWith(I, Res);
+
+ // trunc(p)-trunc(q) -> trunc(p-q)
+ if (match(Op0, m_Trunc(m_PtrToInt(m_Value(LHSOp)))) &&
+ match(Op1, m_Trunc(m_PtrToInt(m_Value(RHSOp)))))
+ if (Value *Res = OptimizePointerDifference(LHSOp, RHSOp, I.getType()))
+ return replaceInstUsesWith(I, Res);
+
+ bool Changed = false;
+ if (!I.hasNoSignedWrap() && WillNotOverflowSignedSub(Op0, Op1, I)) {
+ Changed = true;
+ I.setHasNoSignedWrap(true);
+ }
+ if (!I.hasNoUnsignedWrap() && WillNotOverflowUnsignedSub(Op0, Op1, I)) {
+ Changed = true;
+ I.setHasNoUnsignedWrap(true);
+ }
+
+ return Changed ? &I : nullptr;
+}
+
+Instruction *InstCombiner::visitFSub(BinaryOperator &I) {
+ Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
+
+ if (Value *V = SimplifyVectorOp(I))
+ return replaceInstUsesWith(I, V);
+
+ if (Value *V =
+ SimplifyFSubInst(Op0, Op1, I.getFastMathFlags(), DL, TLI, DT, AC))
+ return replaceInstUsesWith(I, V);
+
+ // fsub nsz 0, X ==> fsub nsz -0.0, X
+ if (I.getFastMathFlags().noSignedZeros() && match(Op0, m_Zero())) {
+ // Subtraction from -0.0 is the canonical form of fneg.
+ Instruction *NewI = BinaryOperator::CreateFNeg(Op1);
+ NewI->copyFastMathFlags(&I);
+ return NewI;
+ }
+
+ if (isa<Constant>(Op0))
+ if (SelectInst *SI = dyn_cast<SelectInst>(Op1))
+ if (Instruction *NV = FoldOpIntoSelect(I, SI))
+ return NV;
+
+ // If this is a 'B = x-(-A)', change to B = x+A, potentially looking
+ // through FP extensions/truncations along the way.
+ if (Value *V = dyn_castFNegVal(Op1)) {
+ Instruction *NewI = BinaryOperator::CreateFAdd(Op0, V);
+ NewI->copyFastMathFlags(&I);
+ return NewI;
+ }
+ if (FPTruncInst *FPTI = dyn_cast<FPTruncInst>(Op1)) {
+ if (Value *V = dyn_castFNegVal(FPTI->getOperand(0))) {
+ Value *NewTrunc = Builder->CreateFPTrunc(V, I.getType());
+ Instruction *NewI = BinaryOperator::CreateFAdd(Op0, NewTrunc);
+ NewI->copyFastMathFlags(&I);
+ return NewI;
+ }
+ } else if (FPExtInst *FPEI = dyn_cast<FPExtInst>(Op1)) {
+ if (Value *V = dyn_castFNegVal(FPEI->getOperand(0))) {
+ Value *NewExt = Builder->CreateFPExt(V, I.getType());
+ Instruction *NewI = BinaryOperator::CreateFAdd(Op0, NewExt);
+ NewI->copyFastMathFlags(&I);
+ return NewI;
+ }
+ }
+
+ if (I.hasUnsafeAlgebra()) {
+ if (Value *V = FAddCombine(Builder).simplify(&I))
+ return replaceInstUsesWith(I, V);
+ }
+
+ return nullptr;
+}
diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
new file mode 100644
index 000000000000..1a6459b3d689
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
@@ -0,0 +1,2774 @@
+//===- InstCombineAndOrXor.cpp --------------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the visitAnd, visitOr, and visitXor functions.
+//
+//===----------------------------------------------------------------------===//
+
+#include "InstCombineInternal.h"
+#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/IR/ConstantRange.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/PatternMatch.h"
+#include "llvm/Transforms/Utils/CmpInstAnalysis.h"
+#include "llvm/Transforms/Utils/Local.h"
+using namespace llvm;
+using namespace PatternMatch;
+
+#define DEBUG_TYPE "instcombine"
+
+static inline Value *dyn_castNotVal(Value *V) {
+ // If this is not(not(x)) don't return that this is a not: we want the two
+ // not's to be folded first.
+ if (BinaryOperator::isNot(V)) {
+ Value *Operand = BinaryOperator::getNotArgument(V);
+ if (!IsFreeToInvert(Operand, Operand->hasOneUse()))
+ return Operand;
+ }
+
+ // Constants can be considered to be not'ed values...
+ if (ConstantInt *C = dyn_cast<ConstantInt>(V))
+ return ConstantInt::get(C->getType(), ~C->getValue());
+ return nullptr;
+}
+
+/// Similar to getICmpCode but for FCmpInst. This encodes a fcmp predicate into
+/// a four bit mask.
+static unsigned getFCmpCode(FCmpInst::Predicate CC) {
+ assert(FCmpInst::FCMP_FALSE <= CC && CC <= FCmpInst::FCMP_TRUE &&
+ "Unexpected FCmp predicate!");
+ // Take advantage of the bit pattern of FCmpInst::Predicate here.
+ // U L G E
+ static_assert(FCmpInst::FCMP_FALSE == 0, ""); // 0 0 0 0
+ static_assert(FCmpInst::FCMP_OEQ == 1, ""); // 0 0 0 1
+ static_assert(FCmpInst::FCMP_OGT == 2, ""); // 0 0 1 0
+ static_assert(FCmpInst::FCMP_OGE == 3, ""); // 0 0 1 1
+ static_assert(FCmpInst::FCMP_OLT == 4, ""); // 0 1 0 0
+ static_assert(FCmpInst::FCMP_OLE == 5, ""); // 0 1 0 1
+ static_assert(FCmpInst::FCMP_ONE == 6, ""); // 0 1 1 0
+ static_assert(FCmpInst::FCMP_ORD == 7, ""); // 0 1 1 1
+ static_assert(FCmpInst::FCMP_UNO == 8, ""); // 1 0 0 0
+ static_assert(FCmpInst::FCMP_UEQ == 9, ""); // 1 0 0 1
+ static_assert(FCmpInst::FCMP_UGT == 10, ""); // 1 0 1 0
+ static_assert(FCmpInst::FCMP_UGE == 11, ""); // 1 0 1 1
+ static_assert(FCmpInst::FCMP_ULT == 12, ""); // 1 1 0 0
+ static_assert(FCmpInst::FCMP_ULE == 13, ""); // 1 1 0 1
+ static_assert(FCmpInst::FCMP_UNE == 14, ""); // 1 1 1 0
+ static_assert(FCmpInst::FCMP_TRUE == 15, ""); // 1 1 1 1
+ return CC;
+}
+
+/// This is the complement of getICmpCode, which turns an opcode and two
+/// operands into either a constant true or false, or a brand new ICmp
+/// instruction. The sign is passed in to determine which kind of predicate to
+/// use in the new icmp instruction.
+static Value *getNewICmpValue(bool Sign, unsigned Code, Value *LHS, Value *RHS,
+ InstCombiner::BuilderTy *Builder) {
+ ICmpInst::Predicate NewPred;
+ if (Value *NewConstant = getICmpValue(Sign, Code, LHS, RHS, NewPred))
+ return NewConstant;
+ return Builder->CreateICmp(NewPred, LHS, RHS);
+}
+
+/// This is the complement of getFCmpCode, which turns an opcode and two
+/// operands into either a FCmp instruction, or a true/false constant.
+static Value *getFCmpValue(unsigned Code, Value *LHS, Value *RHS,
+ InstCombiner::BuilderTy *Builder) {
+ const auto Pred = static_cast<FCmpInst::Predicate>(Code);
+ assert(FCmpInst::FCMP_FALSE <= Pred && Pred <= FCmpInst::FCMP_TRUE &&
+ "Unexpected FCmp predicate!");
+ if (Pred == FCmpInst::FCMP_FALSE)
+ return ConstantInt::get(CmpInst::makeCmpResultType(LHS->getType()), 0);
+ if (Pred == FCmpInst::FCMP_TRUE)
+ return ConstantInt::get(CmpInst::makeCmpResultType(LHS->getType()), 1);
+ return Builder->CreateFCmp(Pred, LHS, RHS);
+}
+
+/// \brief Transform BITWISE_OP(BSWAP(A),BSWAP(B)) to BSWAP(BITWISE_OP(A, B))
+/// \param I Binary operator to transform.
+/// \return Pointer to node that must replace the original binary operator, or
+/// null pointer if no transformation was made.
+Value *InstCombiner::SimplifyBSwap(BinaryOperator &I) {
+ IntegerType *ITy = dyn_cast<IntegerType>(I.getType());
+
+ // Can't do vectors.
+ if (I.getType()->isVectorTy()) return nullptr;
+
+ // Can only do bitwise ops.
+ unsigned Op = I.getOpcode();
+ if (Op != Instruction::And && Op != Instruction::Or &&
+ Op != Instruction::Xor)
+ return nullptr;
+
+ Value *OldLHS = I.getOperand(0);
+ Value *OldRHS = I.getOperand(1);
+ ConstantInt *ConstLHS = dyn_cast<ConstantInt>(OldLHS);
+ ConstantInt *ConstRHS = dyn_cast<ConstantInt>(OldRHS);
+ IntrinsicInst *IntrLHS = dyn_cast<IntrinsicInst>(OldLHS);
+ IntrinsicInst *IntrRHS = dyn_cast<IntrinsicInst>(OldRHS);
+ bool IsBswapLHS = (IntrLHS && IntrLHS->getIntrinsicID() == Intrinsic::bswap);
+ bool IsBswapRHS = (IntrRHS && IntrRHS->getIntrinsicID() == Intrinsic::bswap);
+
+ if (!IsBswapLHS && !IsBswapRHS)
+ return nullptr;
+
+ if (!IsBswapLHS && !ConstLHS)
+ return nullptr;
+
+ if (!IsBswapRHS && !ConstRHS)
+ return nullptr;
+
+ /// OP( BSWAP(x), BSWAP(y) ) -> BSWAP( OP(x, y) )
+ /// OP( BSWAP(x), CONSTANT ) -> BSWAP( OP(x, BSWAP(CONSTANT) ) )
+ Value *NewLHS = IsBswapLHS ? IntrLHS->getOperand(0) :
+ Builder->getInt(ConstLHS->getValue().byteSwap());
+
+ Value *NewRHS = IsBswapRHS ? IntrRHS->getOperand(0) :
+ Builder->getInt(ConstRHS->getValue().byteSwap());
+
+ Value *BinOp = nullptr;
+ if (Op == Instruction::And)
+ BinOp = Builder->CreateAnd(NewLHS, NewRHS);
+ else if (Op == Instruction::Or)
+ BinOp = Builder->CreateOr(NewLHS, NewRHS);
+ else //if (Op == Instruction::Xor)
+ BinOp = Builder->CreateXor(NewLHS, NewRHS);
+
+ Function *F = Intrinsic::getDeclaration(I.getModule(), Intrinsic::bswap, ITy);
+ return Builder->CreateCall(F, BinOp);
+}
+
+/// This handles expressions of the form ((val OP C1) & C2). Where
+/// the Op parameter is 'OP', OpRHS is 'C1', and AndRHS is 'C2'. Op is
+/// guaranteed to be a binary operator.
+Instruction *InstCombiner::OptAndOp(Instruction *Op,
+ ConstantInt *OpRHS,
+ ConstantInt *AndRHS,
+ BinaryOperator &TheAnd) {
+ Value *X = Op->getOperand(0);
+ Constant *Together = nullptr;
+ if (!Op->isShift())
+ Together = ConstantExpr::getAnd(AndRHS, OpRHS);
+
+ switch (Op->getOpcode()) {
+ case Instruction::Xor:
+ if (Op->hasOneUse()) {
+ // (X ^ C1) & C2 --> (X & C2) ^ (C1&C2)
+ Value *And = Builder->CreateAnd(X, AndRHS);
+ And->takeName(Op);
+ return BinaryOperator::CreateXor(And, Together);
+ }
+ break;
+ case Instruction::Or:
+ if (Op->hasOneUse()){
+ if (Together != OpRHS) {
+ // (X | C1) & C2 --> (X | (C1&C2)) & C2
+ Value *Or = Builder->CreateOr(X, Together);
+ Or->takeName(Op);
+ return BinaryOperator::CreateAnd(Or, AndRHS);
+ }
+
+ ConstantInt *TogetherCI = dyn_cast<ConstantInt>(Together);
+ if (TogetherCI && !TogetherCI->isZero()){
+ // (X | C1) & C2 --> (X & (C2^(C1&C2))) | C1
+ // NOTE: This reduces the number of bits set in the & mask, which
+ // can expose opportunities for store narrowing.
+ Together = ConstantExpr::getXor(AndRHS, Together);
+ Value *And = Builder->CreateAnd(X, Together);
+ And->takeName(Op);
+ return BinaryOperator::CreateOr(And, OpRHS);
+ }
+ }
+
+ break;
+ case Instruction::Add:
+ if (Op->hasOneUse()) {
+ // Adding a one to a single bit bit-field should be turned into an XOR
+ // of the bit. First thing to check is to see if this AND is with a
+ // single bit constant.
+ const APInt &AndRHSV = AndRHS->getValue();
+
+ // If there is only one bit set.
+ if (AndRHSV.isPowerOf2()) {
+ // Ok, at this point, we know that we are masking the result of the
+ // ADD down to exactly one bit. If the constant we are adding has
+ // no bits set below this bit, then we can eliminate the ADD.
+ const APInt& AddRHS = OpRHS->getValue();
+
+ // Check to see if any bits below the one bit set in AndRHSV are set.
+ if ((AddRHS & (AndRHSV-1)) == 0) {
+ // If not, the only thing that can effect the output of the AND is
+ // the bit specified by AndRHSV. If that bit is set, the effect of
+ // the XOR is to toggle the bit. If it is clear, then the ADD has
+ // no effect.
+ if ((AddRHS & AndRHSV) == 0) { // Bit is not set, noop
+ TheAnd.setOperand(0, X);
+ return &TheAnd;
+ } else {
+ // Pull the XOR out of the AND.
+ Value *NewAnd = Builder->CreateAnd(X, AndRHS);
+ NewAnd->takeName(Op);
+ return BinaryOperator::CreateXor(NewAnd, AndRHS);
+ }
+ }
+ }
+ }
+ break;
+
+ case Instruction::Shl: {
+ // We know that the AND will not produce any of the bits shifted in, so if
+ // the anded constant includes them, clear them now!
+ //
+ uint32_t BitWidth = AndRHS->getType()->getBitWidth();
+ uint32_t OpRHSVal = OpRHS->getLimitedValue(BitWidth);
+ APInt ShlMask(APInt::getHighBitsSet(BitWidth, BitWidth-OpRHSVal));
+ ConstantInt *CI = Builder->getInt(AndRHS->getValue() & ShlMask);
+
+ if (CI->getValue() == ShlMask)
+ // Masking out bits that the shift already masks.
+ return replaceInstUsesWith(TheAnd, Op); // No need for the and.
+
+ if (CI != AndRHS) { // Reducing bits set in and.
+ TheAnd.setOperand(1, CI);
+ return &TheAnd;
+ }
+ break;
+ }
+ case Instruction::LShr: {
+ // We know that the AND will not produce any of the bits shifted in, so if
+ // the anded constant includes them, clear them now! This only applies to
+ // unsigned shifts, because a signed shr may bring in set bits!
+ //
+ uint32_t BitWidth = AndRHS->getType()->getBitWidth();
+ uint32_t OpRHSVal = OpRHS->getLimitedValue(BitWidth);
+ APInt ShrMask(APInt::getLowBitsSet(BitWidth, BitWidth - OpRHSVal));
+ ConstantInt *CI = Builder->getInt(AndRHS->getValue() & ShrMask);
+
+ if (CI->getValue() == ShrMask)
+ // Masking out bits that the shift already masks.
+ return replaceInstUsesWith(TheAnd, Op);
+
+ if (CI != AndRHS) {
+ TheAnd.setOperand(1, CI); // Reduce bits set in and cst.
+ return &TheAnd;
+ }
+ break;
+ }
+ case Instruction::AShr:
+ // Signed shr.
+ // See if this is shifting in some sign extension, then masking it out
+ // with an and.
+ if (Op->hasOneUse()) {
+ uint32_t BitWidth = AndRHS->getType()->getBitWidth();
+ uint32_t OpRHSVal = OpRHS->getLimitedValue(BitWidth);
+ APInt ShrMask(APInt::getLowBitsSet(BitWidth, BitWidth - OpRHSVal));
+ Constant *C = Builder->getInt(AndRHS->getValue() & ShrMask);
+ if (C == AndRHS) { // Masking out bits shifted in.
+ // (Val ashr C1) & C2 -> (Val lshr C1) & C2
+ // Make the argument unsigned.
+ Value *ShVal = Op->getOperand(0);
+ ShVal = Builder->CreateLShr(ShVal, OpRHS, Op->getName());
+ return BinaryOperator::CreateAnd(ShVal, AndRHS, TheAnd.getName());
+ }
+ }
+ break;
+ }
+ return nullptr;
+}
+
+/// Emit a computation of: (V >= Lo && V < Hi) if Inside is true, otherwise
+/// (V < Lo || V >= Hi). In practice, we emit the more efficient
+/// (V-Lo) \<u Hi-Lo. This method expects that Lo <= Hi. isSigned indicates
+/// whether to treat the V, Lo and HI as signed or not. IB is the location to
+/// insert new instructions.
+Value *InstCombiner::InsertRangeTest(Value *V, Constant *Lo, Constant *Hi,
+ bool isSigned, bool Inside) {
+ assert(cast<ConstantInt>(ConstantExpr::getICmp((isSigned ?
+ ICmpInst::ICMP_SLE:ICmpInst::ICMP_ULE), Lo, Hi))->getZExtValue() &&
+ "Lo is not <= Hi in range emission code!");
+
+ if (Inside) {
+ if (Lo == Hi) // Trivially false.
+ return Builder->getFalse();
+
+ // V >= Min && V < Hi --> V < Hi
+ if (cast<ConstantInt>(Lo)->isMinValue(isSigned)) {
+ ICmpInst::Predicate pred = (isSigned ?
+ ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT);
+ return Builder->CreateICmp(pred, V, Hi);
+ }
+
+ // Emit V-Lo <u Hi-Lo
+ Constant *NegLo = ConstantExpr::getNeg(Lo);
+ Value *Add = Builder->CreateAdd(V, NegLo, V->getName()+".off");
+ Constant *UpperBound = ConstantExpr::getAdd(NegLo, Hi);
+ return Builder->CreateICmpULT(Add, UpperBound);
+ }
+
+ if (Lo == Hi) // Trivially true.
+ return Builder->getTrue();
+
+ // V < Min || V >= Hi -> V > Hi-1
+ Hi = SubOne(cast<ConstantInt>(Hi));
+ if (cast<ConstantInt>(Lo)->isMinValue(isSigned)) {
+ ICmpInst::Predicate pred = (isSigned ?
+ ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT);
+ return Builder->CreateICmp(pred, V, Hi);
+ }
+
+ // Emit V-Lo >u Hi-1-Lo
+ // Note that Hi has already had one subtracted from it, above.
+ ConstantInt *NegLo = cast<ConstantInt>(ConstantExpr::getNeg(Lo));
+ Value *Add = Builder->CreateAdd(V, NegLo, V->getName()+".off");
+ Constant *LowerBound = ConstantExpr::getAdd(NegLo, Hi);
+ return Builder->CreateICmpUGT(Add, LowerBound);
+}
+
+/// Returns true iff Val consists of one contiguous run of 1s with any number
+/// of 0s on either side. The 1s are allowed to wrap from LSB to MSB,
+/// so 0x000FFF0, 0x0000FFFF, and 0xFF0000FF are all runs. 0x0F0F0000 is
+/// not, since all 1s are not contiguous.
+static bool isRunOfOnes(ConstantInt *Val, uint32_t &MB, uint32_t &ME) {
+ const APInt& V = Val->getValue();
+ uint32_t BitWidth = Val->getType()->getBitWidth();
+ if (!APIntOps::isShiftedMask(BitWidth, V)) return false;
+
+ // look for the first zero bit after the run of ones
+ MB = BitWidth - ((V - 1) ^ V).countLeadingZeros();
+ // look for the first non-zero bit
+ ME = V.getActiveBits();
+ return true;
+}
+
+/// This is part of an expression (LHS +/- RHS) & Mask, where isSub determines
+/// whether the operator is a sub. If we can fold one of the following xforms:
+///
+/// ((A & N) +/- B) & Mask -> (A +/- B) & Mask iff N&Mask == Mask
+/// ((A | N) +/- B) & Mask -> (A +/- B) & Mask iff N&Mask == 0
+/// ((A ^ N) +/- B) & Mask -> (A +/- B) & Mask iff N&Mask == 0
+///
+/// return (A +/- B).
+///
+Value *InstCombiner::FoldLogicalPlusAnd(Value *LHS, Value *RHS,
+ ConstantInt *Mask, bool isSub,
+ Instruction &I) {
+ Instruction *LHSI = dyn_cast<Instruction>(LHS);
+ if (!LHSI || LHSI->getNumOperands() != 2 ||
+ !isa<ConstantInt>(LHSI->getOperand(1))) return nullptr;
+
+ ConstantInt *N = cast<ConstantInt>(LHSI->getOperand(1));
+
+ switch (LHSI->getOpcode()) {
+ default: return nullptr;
+ case Instruction::And:
+ if (ConstantExpr::getAnd(N, Mask) == Mask) {
+ // If the AndRHS is a power of two minus one (0+1+), this is simple.
+ if ((Mask->getValue().countLeadingZeros() +
+ Mask->getValue().countPopulation()) ==
+ Mask->getValue().getBitWidth())
+ break;
+
+ // Otherwise, if Mask is 0+1+0+, and if B is known to have the low 0+
+ // part, we don't need any explicit masks to take them out of A. If that
+ // is all N is, ignore it.
+ uint32_t MB = 0, ME = 0;
+ if (isRunOfOnes(Mask, MB, ME)) { // begin/end bit of run, inclusive
+ uint32_t BitWidth = cast<IntegerType>(RHS->getType())->getBitWidth();
+ APInt Mask(APInt::getLowBitsSet(BitWidth, MB-1));
+ if (MaskedValueIsZero(RHS, Mask, 0, &I))
+ break;
+ }
+ }
+ return nullptr;
+ case Instruction::Or:
+ case Instruction::Xor:
+ // If the AndRHS is a power of two minus one (0+1+), and N&Mask == 0
+ if ((Mask->getValue().countLeadingZeros() +
+ Mask->getValue().countPopulation()) == Mask->getValue().getBitWidth()
+ && ConstantExpr::getAnd(N, Mask)->isNullValue())
+ break;
+ return nullptr;
+ }
+
+ if (isSub)
+ return Builder->CreateSub(LHSI->getOperand(0), RHS, "fold");
+ return Builder->CreateAdd(LHSI->getOperand(0), RHS, "fold");
+}
+
+/// enum for classifying (icmp eq (A & B), C) and (icmp ne (A & B), C)
+/// One of A and B is considered the mask, the other the value. This is
+/// described as the "AMask" or "BMask" part of the enum. If the enum
+/// contains only "Mask", then both A and B can be considered masks.
+/// If A is the mask, then it was proven, that (A & C) == C. This
+/// is trivial if C == A, or C == 0. If both A and C are constants, this
+/// proof is also easy.
+/// For the following explanations we assume that A is the mask.
+/// The part "AllOnes" declares, that the comparison is true only
+/// if (A & B) == A, or all bits of A are set in B.
+/// Example: (icmp eq (A & 3), 3) -> FoldMskICmp_AMask_AllOnes
+/// The part "AllZeroes" declares, that the comparison is true only
+/// if (A & B) == 0, or all bits of A are cleared in B.
+/// Example: (icmp eq (A & 3), 0) -> FoldMskICmp_Mask_AllZeroes
+/// The part "Mixed" declares, that (A & B) == C and C might or might not
+/// contain any number of one bits and zero bits.
+/// Example: (icmp eq (A & 3), 1) -> FoldMskICmp_AMask_Mixed
+/// The Part "Not" means, that in above descriptions "==" should be replaced
+/// by "!=".
+/// Example: (icmp ne (A & 3), 3) -> FoldMskICmp_AMask_NotAllOnes
+/// If the mask A contains a single bit, then the following is equivalent:
+/// (icmp eq (A & B), A) equals (icmp ne (A & B), 0)
+/// (icmp ne (A & B), A) equals (icmp eq (A & B), 0)
+enum MaskedICmpType {
+ FoldMskICmp_AMask_AllOnes = 1,
+ FoldMskICmp_AMask_NotAllOnes = 2,
+ FoldMskICmp_BMask_AllOnes = 4,
+ FoldMskICmp_BMask_NotAllOnes = 8,
+ FoldMskICmp_Mask_AllZeroes = 16,
+ FoldMskICmp_Mask_NotAllZeroes = 32,
+ FoldMskICmp_AMask_Mixed = 64,
+ FoldMskICmp_AMask_NotMixed = 128,
+ FoldMskICmp_BMask_Mixed = 256,
+ FoldMskICmp_BMask_NotMixed = 512
+};
+
+/// Return the set of pattern classes (from MaskedICmpType)
+/// that (icmp SCC (A & B), C) satisfies.
+static unsigned getTypeOfMaskedICmp(Value* A, Value* B, Value* C,
+ ICmpInst::Predicate SCC)
+{
+ ConstantInt *ACst = dyn_cast<ConstantInt>(A);
+ ConstantInt *BCst = dyn_cast<ConstantInt>(B);
+ ConstantInt *CCst = dyn_cast<ConstantInt>(C);
+ bool icmp_eq = (SCC == ICmpInst::ICMP_EQ);
+ bool icmp_abit = (ACst && !ACst->isZero() &&
+ ACst->getValue().isPowerOf2());
+ bool icmp_bbit = (BCst && !BCst->isZero() &&
+ BCst->getValue().isPowerOf2());
+ unsigned result = 0;
+ if (CCst && CCst->isZero()) {
+ // if C is zero, then both A and B qualify as mask
+ result |= (icmp_eq ? (FoldMskICmp_Mask_AllZeroes |
+ FoldMskICmp_AMask_Mixed |
+ FoldMskICmp_BMask_Mixed)
+ : (FoldMskICmp_Mask_NotAllZeroes |
+ FoldMskICmp_AMask_NotMixed |
+ FoldMskICmp_BMask_NotMixed));
+ if (icmp_abit)
+ result |= (icmp_eq ? (FoldMskICmp_AMask_NotAllOnes |
+ FoldMskICmp_AMask_NotMixed)
+ : (FoldMskICmp_AMask_AllOnes |
+ FoldMskICmp_AMask_Mixed));
+ if (icmp_bbit)
+ result |= (icmp_eq ? (FoldMskICmp_BMask_NotAllOnes |
+ FoldMskICmp_BMask_NotMixed)
+ : (FoldMskICmp_BMask_AllOnes |
+ FoldMskICmp_BMask_Mixed));
+ return result;
+ }
+ if (A == C) {
+ result |= (icmp_eq ? (FoldMskICmp_AMask_AllOnes |
+ FoldMskICmp_AMask_Mixed)
+ : (FoldMskICmp_AMask_NotAllOnes |
+ FoldMskICmp_AMask_NotMixed));
+ if (icmp_abit)
+ result |= (icmp_eq ? (FoldMskICmp_Mask_NotAllZeroes |
+ FoldMskICmp_AMask_NotMixed)
+ : (FoldMskICmp_Mask_AllZeroes |
+ FoldMskICmp_AMask_Mixed));
+ } else if (ACst && CCst &&
+ ConstantExpr::getAnd(ACst, CCst) == CCst) {
+ result |= (icmp_eq ? FoldMskICmp_AMask_Mixed
+ : FoldMskICmp_AMask_NotMixed);
+ }
+ if (B == C) {
+ result |= (icmp_eq ? (FoldMskICmp_BMask_AllOnes |
+ FoldMskICmp_BMask_Mixed)
+ : (FoldMskICmp_BMask_NotAllOnes |
+ FoldMskICmp_BMask_NotMixed));
+ if (icmp_bbit)
+ result |= (icmp_eq ? (FoldMskICmp_Mask_NotAllZeroes |
+ FoldMskICmp_BMask_NotMixed)
+ : (FoldMskICmp_Mask_AllZeroes |
+ FoldMskICmp_BMask_Mixed));
+ } else if (BCst && CCst &&
+ ConstantExpr::getAnd(BCst, CCst) == CCst) {
+ result |= (icmp_eq ? FoldMskICmp_BMask_Mixed
+ : FoldMskICmp_BMask_NotMixed);
+ }
+ return result;
+}
+
+/// Convert an analysis of a masked ICmp into its equivalent if all boolean
+/// operations had the opposite sense. Since each "NotXXX" flag (recording !=)
+/// is adjacent to the corresponding normal flag (recording ==), this just
+/// involves swapping those bits over.
+static unsigned conjugateICmpMask(unsigned Mask) {
+ unsigned NewMask;
+ NewMask = (Mask & (FoldMskICmp_AMask_AllOnes | FoldMskICmp_BMask_AllOnes |
+ FoldMskICmp_Mask_AllZeroes | FoldMskICmp_AMask_Mixed |
+ FoldMskICmp_BMask_Mixed))
+ << 1;
+
+ NewMask |=
+ (Mask & (FoldMskICmp_AMask_NotAllOnes | FoldMskICmp_BMask_NotAllOnes |
+ FoldMskICmp_Mask_NotAllZeroes | FoldMskICmp_AMask_NotMixed |
+ FoldMskICmp_BMask_NotMixed))
+ >> 1;
+
+ return NewMask;
+}
+
+/// Decompose an icmp into the form ((X & Y) pred Z) if possible.
+/// The returned predicate is either == or !=. Returns false if
+/// decomposition fails.
+static bool decomposeBitTestICmp(const ICmpInst *I, ICmpInst::Predicate &Pred,
+ Value *&X, Value *&Y, Value *&Z) {
+ ConstantInt *C = dyn_cast<ConstantInt>(I->getOperand(1));
+ if (!C)
+ return false;
+
+ switch (I->getPredicate()) {
+ default:
+ return false;
+ case ICmpInst::ICMP_SLT:
+ // X < 0 is equivalent to (X & SignBit) != 0.
+ if (!C->isZero())
+ return false;
+ Y = ConstantInt::get(I->getContext(), APInt::getSignBit(C->getBitWidth()));
+ Pred = ICmpInst::ICMP_NE;
+ break;
+ case ICmpInst::ICMP_SGT:
+ // X > -1 is equivalent to (X & SignBit) == 0.
+ if (!C->isAllOnesValue())
+ return false;
+ Y = ConstantInt::get(I->getContext(), APInt::getSignBit(C->getBitWidth()));
+ Pred = ICmpInst::ICMP_EQ;
+ break;
+ case ICmpInst::ICMP_ULT:
+ // X <u 2^n is equivalent to (X & ~(2^n-1)) == 0.
+ if (!C->getValue().isPowerOf2())
+ return false;
+ Y = ConstantInt::get(I->getContext(), -C->getValue());
+ Pred = ICmpInst::ICMP_EQ;
+ break;
+ case ICmpInst::ICMP_UGT:
+ // X >u 2^n-1 is equivalent to (X & ~(2^n-1)) != 0.
+ if (!(C->getValue() + 1).isPowerOf2())
+ return false;
+ Y = ConstantInt::get(I->getContext(), ~C->getValue());
+ Pred = ICmpInst::ICMP_NE;
+ break;
+ }
+
+ X = I->getOperand(0);
+ Z = ConstantInt::getNullValue(C->getType());
+ return true;
+}
+
+/// Handle (icmp(A & B) ==/!= C) &/| (icmp(A & D) ==/!= E)
+/// Return the set of pattern classes (from MaskedICmpType)
+/// that both LHS and RHS satisfy.
+static unsigned foldLogOpOfMaskedICmpsHelper(Value*& A,
+ Value*& B, Value*& C,
+ Value*& D, Value*& E,
+ ICmpInst *LHS, ICmpInst *RHS,
+ ICmpInst::Predicate &LHSCC,
+ ICmpInst::Predicate &RHSCC) {
+ if (LHS->getOperand(0)->getType() != RHS->getOperand(0)->getType()) return 0;
+ // vectors are not (yet?) supported
+ if (LHS->getOperand(0)->getType()->isVectorTy()) return 0;
+
+ // Here comes the tricky part:
+ // LHS might be of the form L11 & L12 == X, X == L21 & L22,
+ // and L11 & L12 == L21 & L22. The same goes for RHS.
+ // Now we must find those components L** and R**, that are equal, so
+ // that we can extract the parameters A, B, C, D, and E for the canonical
+ // above.
+ Value *L1 = LHS->getOperand(0);
+ Value *L2 = LHS->getOperand(1);
+ Value *L11,*L12,*L21,*L22;
+ // Check whether the icmp can be decomposed into a bit test.
+ if (decomposeBitTestICmp(LHS, LHSCC, L11, L12, L2)) {
+ L21 = L22 = L1 = nullptr;
+ } else {
+ // Look for ANDs in the LHS icmp.
+ if (!L1->getType()->isIntegerTy()) {
+ // You can icmp pointers, for example. They really aren't masks.
+ L11 = L12 = nullptr;
+ } else if (!match(L1, m_And(m_Value(L11), m_Value(L12)))) {
+ // Any icmp can be viewed as being trivially masked; if it allows us to
+ // remove one, it's worth it.
+ L11 = L1;
+ L12 = Constant::getAllOnesValue(L1->getType());
+ }
+
+ if (!L2->getType()->isIntegerTy()) {
+ // You can icmp pointers, for example. They really aren't masks.
+ L21 = L22 = nullptr;
+ } else if (!match(L2, m_And(m_Value(L21), m_Value(L22)))) {
+ L21 = L2;
+ L22 = Constant::getAllOnesValue(L2->getType());
+ }
+ }
+
+ // Bail if LHS was a icmp that can't be decomposed into an equality.
+ if (!ICmpInst::isEquality(LHSCC))
+ return 0;
+
+ Value *R1 = RHS->getOperand(0);
+ Value *R2 = RHS->getOperand(1);
+ Value *R11,*R12;
+ bool ok = false;
+ if (decomposeBitTestICmp(RHS, RHSCC, R11, R12, R2)) {
+ if (R11 == L11 || R11 == L12 || R11 == L21 || R11 == L22) {
+ A = R11; D = R12;
+ } else if (R12 == L11 || R12 == L12 || R12 == L21 || R12 == L22) {
+ A = R12; D = R11;
+ } else {
+ return 0;
+ }
+ E = R2; R1 = nullptr; ok = true;
+ } else if (R1->getType()->isIntegerTy()) {
+ if (!match(R1, m_And(m_Value(R11), m_Value(R12)))) {
+ // As before, model no mask as a trivial mask if it'll let us do an
+ // optimization.
+ R11 = R1;
+ R12 = Constant::getAllOnesValue(R1->getType());
+ }
+
+ if (R11 == L11 || R11 == L12 || R11 == L21 || R11 == L22) {
+ A = R11; D = R12; E = R2; ok = true;
+ } else if (R12 == L11 || R12 == L12 || R12 == L21 || R12 == L22) {
+ A = R12; D = R11; E = R2; ok = true;
+ }
+ }
+
+ // Bail if RHS was a icmp that can't be decomposed into an equality.
+ if (!ICmpInst::isEquality(RHSCC))
+ return 0;
+
+ // Look for ANDs on the right side of the RHS icmp.
+ if (!ok && R2->getType()->isIntegerTy()) {
+ if (!match(R2, m_And(m_Value(R11), m_Value(R12)))) {
+ R11 = R2;
+ R12 = Constant::getAllOnesValue(R2->getType());
+ }
+
+ if (R11 == L11 || R11 == L12 || R11 == L21 || R11 == L22) {
+ A = R11; D = R12; E = R1; ok = true;
+ } else if (R12 == L11 || R12 == L12 || R12 == L21 || R12 == L22) {
+ A = R12; D = R11; E = R1; ok = true;
+ } else {
+ return 0;
+ }
+ }
+ if (!ok)
+ return 0;
+
+ if (L11 == A) {
+ B = L12; C = L2;
+ } else if (L12 == A) {
+ B = L11; C = L2;
+ } else if (L21 == A) {
+ B = L22; C = L1;
+ } else if (L22 == A) {
+ B = L21; C = L1;
+ }
+
+ unsigned LeftType = getTypeOfMaskedICmp(A, B, C, LHSCC);
+ unsigned RightType = getTypeOfMaskedICmp(A, D, E, RHSCC);
+ return LeftType & RightType;
+}
+
+/// Try to fold (icmp(A & B) ==/!= C) &/| (icmp(A & D) ==/!= E)
+/// into a single (icmp(A & X) ==/!= Y).
+static Value *foldLogOpOfMaskedICmps(ICmpInst *LHS, ICmpInst *RHS, bool IsAnd,
+ llvm::InstCombiner::BuilderTy *Builder) {
+ Value *A = nullptr, *B = nullptr, *C = nullptr, *D = nullptr, *E = nullptr;
+ ICmpInst::Predicate LHSCC = LHS->getPredicate(), RHSCC = RHS->getPredicate();
+ unsigned Mask = foldLogOpOfMaskedICmpsHelper(A, B, C, D, E, LHS, RHS,
+ LHSCC, RHSCC);
+ if (Mask == 0) return nullptr;
+ assert(ICmpInst::isEquality(LHSCC) && ICmpInst::isEquality(RHSCC) &&
+ "foldLogOpOfMaskedICmpsHelper must return an equality predicate.");
+
+ // In full generality:
+ // (icmp (A & B) Op C) | (icmp (A & D) Op E)
+ // == ![ (icmp (A & B) !Op C) & (icmp (A & D) !Op E) ]
+ //
+ // If the latter can be converted into (icmp (A & X) Op Y) then the former is
+ // equivalent to (icmp (A & X) !Op Y).
+ //
+ // Therefore, we can pretend for the rest of this function that we're dealing
+ // with the conjunction, provided we flip the sense of any comparisons (both
+ // input and output).
+
+ // In most cases we're going to produce an EQ for the "&&" case.
+ ICmpInst::Predicate NewCC = IsAnd ? ICmpInst::ICMP_EQ : ICmpInst::ICMP_NE;
+ if (!IsAnd) {
+ // Convert the masking analysis into its equivalent with negated
+ // comparisons.
+ Mask = conjugateICmpMask(Mask);
+ }
+
+ if (Mask & FoldMskICmp_Mask_AllZeroes) {
+ // (icmp eq (A & B), 0) & (icmp eq (A & D), 0)
+ // -> (icmp eq (A & (B|D)), 0)
+ Value *NewOr = Builder->CreateOr(B, D);
+ Value *NewAnd = Builder->CreateAnd(A, NewOr);
+ // We can't use C as zero because we might actually handle
+ // (icmp ne (A & B), B) & (icmp ne (A & D), D)
+ // with B and D, having a single bit set.
+ Value *Zero = Constant::getNullValue(A->getType());
+ return Builder->CreateICmp(NewCC, NewAnd, Zero);
+ }
+ if (Mask & FoldMskICmp_BMask_AllOnes) {
+ // (icmp eq (A & B), B) & (icmp eq (A & D), D)
+ // -> (icmp eq (A & (B|D)), (B|D))
+ Value *NewOr = Builder->CreateOr(B, D);
+ Value *NewAnd = Builder->CreateAnd(A, NewOr);
+ return Builder->CreateICmp(NewCC, NewAnd, NewOr);
+ }
+ if (Mask & FoldMskICmp_AMask_AllOnes) {
+ // (icmp eq (A & B), A) & (icmp eq (A & D), A)
+ // -> (icmp eq (A & (B&D)), A)
+ Value *NewAnd1 = Builder->CreateAnd(B, D);
+ Value *NewAnd2 = Builder->CreateAnd(A, NewAnd1);
+ return Builder->CreateICmp(NewCC, NewAnd2, A);
+ }
+
+ // Remaining cases assume at least that B and D are constant, and depend on
+ // their actual values. This isn't strictly necessary, just a "handle the
+ // easy cases for now" decision.
+ ConstantInt *BCst = dyn_cast<ConstantInt>(B);
+ if (!BCst) return nullptr;
+ ConstantInt *DCst = dyn_cast<ConstantInt>(D);
+ if (!DCst) return nullptr;
+
+ if (Mask & (FoldMskICmp_Mask_NotAllZeroes | FoldMskICmp_BMask_NotAllOnes)) {
+ // (icmp ne (A & B), 0) & (icmp ne (A & D), 0) and
+ // (icmp ne (A & B), B) & (icmp ne (A & D), D)
+ // -> (icmp ne (A & B), 0) or (icmp ne (A & D), 0)
+ // Only valid if one of the masks is a superset of the other (check "B&D" is
+ // the same as either B or D).
+ APInt NewMask = BCst->getValue() & DCst->getValue();
+
+ if (NewMask == BCst->getValue())
+ return LHS;
+ else if (NewMask == DCst->getValue())
+ return RHS;
+ }
+ if (Mask & FoldMskICmp_AMask_NotAllOnes) {
+ // (icmp ne (A & B), B) & (icmp ne (A & D), D)
+ // -> (icmp ne (A & B), A) or (icmp ne (A & D), A)
+ // Only valid if one of the masks is a superset of the other (check "B|D" is
+ // the same as either B or D).
+ APInt NewMask = BCst->getValue() | DCst->getValue();
+
+ if (NewMask == BCst->getValue())
+ return LHS;
+ else if (NewMask == DCst->getValue())
+ return RHS;
+ }
+ if (Mask & FoldMskICmp_BMask_Mixed) {
+ // (icmp eq (A & B), C) & (icmp eq (A & D), E)
+ // We already know that B & C == C && D & E == E.
+ // If we can prove that (B & D) & (C ^ E) == 0, that is, the bits of
+ // C and E, which are shared by both the mask B and the mask D, don't
+ // contradict, then we can transform to
+ // -> (icmp eq (A & (B|D)), (C|E))
+ // Currently, we only handle the case of B, C, D, and E being constant.
+ // We can't simply use C and E because we might actually handle
+ // (icmp ne (A & B), B) & (icmp eq (A & D), D)
+ // with B and D, having a single bit set.
+ ConstantInt *CCst = dyn_cast<ConstantInt>(C);
+ if (!CCst) return nullptr;
+ ConstantInt *ECst = dyn_cast<ConstantInt>(E);
+ if (!ECst) return nullptr;
+ if (LHSCC != NewCC)
+ CCst = cast<ConstantInt>(ConstantExpr::getXor(BCst, CCst));
+ if (RHSCC != NewCC)
+ ECst = cast<ConstantInt>(ConstantExpr::getXor(DCst, ECst));
+ // If there is a conflict, we should actually return a false for the
+ // whole construct.
+ if (((BCst->getValue() & DCst->getValue()) &
+ (CCst->getValue() ^ ECst->getValue())) != 0)
+ return ConstantInt::get(LHS->getType(), !IsAnd);
+ Value *NewOr1 = Builder->CreateOr(B, D);
+ Value *NewOr2 = ConstantExpr::getOr(CCst, ECst);
+ Value *NewAnd = Builder->CreateAnd(A, NewOr1);
+ return Builder->CreateICmp(NewCC, NewAnd, NewOr2);
+ }
+ return nullptr;
+}
+
+/// Try to fold a signed range checked with lower bound 0 to an unsigned icmp.
+/// Example: (icmp sge x, 0) & (icmp slt x, n) --> icmp ult x, n
+/// If \p Inverted is true then the check is for the inverted range, e.g.
+/// (icmp slt x, 0) | (icmp sgt x, n) --> icmp ugt x, n
+Value *InstCombiner::simplifyRangeCheck(ICmpInst *Cmp0, ICmpInst *Cmp1,
+ bool Inverted) {
+ // Check the lower range comparison, e.g. x >= 0
+ // InstCombine already ensured that if there is a constant it's on the RHS.
+ ConstantInt *RangeStart = dyn_cast<ConstantInt>(Cmp0->getOperand(1));
+ if (!RangeStart)
+ return nullptr;
+
+ ICmpInst::Predicate Pred0 = (Inverted ? Cmp0->getInversePredicate() :
+ Cmp0->getPredicate());
+
+ // Accept x > -1 or x >= 0 (after potentially inverting the predicate).
+ if (!((Pred0 == ICmpInst::ICMP_SGT && RangeStart->isMinusOne()) ||
+ (Pred0 == ICmpInst::ICMP_SGE && RangeStart->isZero())))
+ return nullptr;
+
+ ICmpInst::Predicate Pred1 = (Inverted ? Cmp1->getInversePredicate() :
+ Cmp1->getPredicate());
+
+ Value *Input = Cmp0->getOperand(0);
+ Value *RangeEnd;
+ if (Cmp1->getOperand(0) == Input) {
+ // For the upper range compare we have: icmp x, n
+ RangeEnd = Cmp1->getOperand(1);
+ } else if (Cmp1->getOperand(1) == Input) {
+ // For the upper range compare we have: icmp n, x
+ RangeEnd = Cmp1->getOperand(0);
+ Pred1 = ICmpInst::getSwappedPredicate(Pred1);
+ } else {
+ return nullptr;
+ }
+
+ // Check the upper range comparison, e.g. x < n
+ ICmpInst::Predicate NewPred;
+ switch (Pred1) {
+ case ICmpInst::ICMP_SLT: NewPred = ICmpInst::ICMP_ULT; break;
+ case ICmpInst::ICMP_SLE: NewPred = ICmpInst::ICMP_ULE; break;
+ default: return nullptr;
+ }
+
+ // This simplification is only valid if the upper range is not negative.
+ bool IsNegative, IsNotNegative;
+ ComputeSignBit(RangeEnd, IsNotNegative, IsNegative, /*Depth=*/0, Cmp1);
+ if (!IsNotNegative)
+ return nullptr;
+
+ if (Inverted)
+ NewPred = ICmpInst::getInversePredicate(NewPred);
+
+ return Builder->CreateICmp(NewPred, Input, RangeEnd);
+}
+
+/// Fold (icmp)&(icmp) if possible.
+Value *InstCombiner::FoldAndOfICmps(ICmpInst *LHS, ICmpInst *RHS) {
+ ICmpInst::Predicate LHSCC = LHS->getPredicate(), RHSCC = RHS->getPredicate();
+
+ // (icmp1 A, B) & (icmp2 A, B) --> (icmp3 A, B)
+ if (PredicatesFoldable(LHSCC, RHSCC)) {
+ if (LHS->getOperand(0) == RHS->getOperand(1) &&
+ LHS->getOperand(1) == RHS->getOperand(0))
+ LHS->swapOperands();
+ if (LHS->getOperand(0) == RHS->getOperand(0) &&
+ LHS->getOperand(1) == RHS->getOperand(1)) {
+ Value *Op0 = LHS->getOperand(0), *Op1 = LHS->getOperand(1);
+ unsigned Code = getICmpCode(LHS) & getICmpCode(RHS);
+ bool isSigned = LHS->isSigned() || RHS->isSigned();
+ return getNewICmpValue(isSigned, Code, Op0, Op1, Builder);
+ }
+ }
+
+ // handle (roughly): (icmp eq (A & B), C) & (icmp eq (A & D), E)
+ if (Value *V = foldLogOpOfMaskedICmps(LHS, RHS, true, Builder))
+ return V;
+
+ // E.g. (icmp sge x, 0) & (icmp slt x, n) --> icmp ult x, n
+ if (Value *V = simplifyRangeCheck(LHS, RHS, /*Inverted=*/false))
+ return V;
+
+ // E.g. (icmp slt x, n) & (icmp sge x, 0) --> icmp ult x, n
+ if (Value *V = simplifyRangeCheck(RHS, LHS, /*Inverted=*/false))
+ return V;
+
+ // This only handles icmp of constants: (icmp1 A, C1) & (icmp2 B, C2).
+ Value *Val = LHS->getOperand(0), *Val2 = RHS->getOperand(0);
+ ConstantInt *LHSCst = dyn_cast<ConstantInt>(LHS->getOperand(1));
+ ConstantInt *RHSCst = dyn_cast<ConstantInt>(RHS->getOperand(1));
+ if (!LHSCst || !RHSCst) return nullptr;
+
+ if (LHSCst == RHSCst && LHSCC == RHSCC) {
+ // (icmp ult A, C) & (icmp ult B, C) --> (icmp ult (A|B), C)
+ // where C is a power of 2 or
+ // (icmp eq A, 0) & (icmp eq B, 0) --> (icmp eq (A|B), 0)
+ if ((LHSCC == ICmpInst::ICMP_ULT && LHSCst->getValue().isPowerOf2()) ||
+ (LHSCC == ICmpInst::ICMP_EQ && LHSCst->isZero())) {
+ Value *NewOr = Builder->CreateOr(Val, Val2);
+ return Builder->CreateICmp(LHSCC, NewOr, LHSCst);
+ }
+ }
+
+ // (trunc x) == C1 & (and x, CA) == C2 -> (and x, CA|CMAX) == C1|C2
+ // where CMAX is the all ones value for the truncated type,
+ // iff the lower bits of C2 and CA are zero.
+ if (LHSCC == ICmpInst::ICMP_EQ && LHSCC == RHSCC &&
+ LHS->hasOneUse() && RHS->hasOneUse()) {
+ Value *V;
+ ConstantInt *AndCst, *SmallCst = nullptr, *BigCst = nullptr;
+
+ // (trunc x) == C1 & (and x, CA) == C2
+ // (and x, CA) == C2 & (trunc x) == C1
+ if (match(Val2, m_Trunc(m_Value(V))) &&
+ match(Val, m_And(m_Specific(V), m_ConstantInt(AndCst)))) {
+ SmallCst = RHSCst;
+ BigCst = LHSCst;
+ } else if (match(Val, m_Trunc(m_Value(V))) &&
+ match(Val2, m_And(m_Specific(V), m_ConstantInt(AndCst)))) {
+ SmallCst = LHSCst;
+ BigCst = RHSCst;
+ }
+
+ if (SmallCst && BigCst) {
+ unsigned BigBitSize = BigCst->getType()->getBitWidth();
+ unsigned SmallBitSize = SmallCst->getType()->getBitWidth();
+
+ // Check that the low bits are zero.
+ APInt Low = APInt::getLowBitsSet(BigBitSize, SmallBitSize);
+ if ((Low & AndCst->getValue()) == 0 && (Low & BigCst->getValue()) == 0) {
+ Value *NewAnd = Builder->CreateAnd(V, Low | AndCst->getValue());
+ APInt N = SmallCst->getValue().zext(BigBitSize) | BigCst->getValue();
+ Value *NewVal = ConstantInt::get(AndCst->getType()->getContext(), N);
+ return Builder->CreateICmp(LHSCC, NewAnd, NewVal);
+ }
+ }
+ }
+
+ // From here on, we only handle:
+ // (icmp1 A, C1) & (icmp2 A, C2) --> something simpler.
+ if (Val != Val2) return nullptr;
+
+ // ICMP_[US][GL]E X, CST is folded to ICMP_[US][GL]T elsewhere.
+ if (LHSCC == ICmpInst::ICMP_UGE || LHSCC == ICmpInst::ICMP_ULE ||
+ RHSCC == ICmpInst::ICMP_UGE || RHSCC == ICmpInst::ICMP_ULE ||
+ LHSCC == ICmpInst::ICMP_SGE || LHSCC == ICmpInst::ICMP_SLE ||
+ RHSCC == ICmpInst::ICMP_SGE || RHSCC == ICmpInst::ICMP_SLE)
+ return nullptr;
+
+ // We can't fold (ugt x, C) & (sgt x, C2).
+ if (!PredicatesFoldable(LHSCC, RHSCC))
+ return nullptr;
+
+ // Ensure that the larger constant is on the RHS.
+ bool ShouldSwap;
+ if (CmpInst::isSigned(LHSCC) ||
+ (ICmpInst::isEquality(LHSCC) &&
+ CmpInst::isSigned(RHSCC)))
+ ShouldSwap = LHSCst->getValue().sgt(RHSCst->getValue());
+ else
+ ShouldSwap = LHSCst->getValue().ugt(RHSCst->getValue());
+
+ if (ShouldSwap) {
+ std::swap(LHS, RHS);
+ std::swap(LHSCst, RHSCst);
+ std::swap(LHSCC, RHSCC);
+ }
+
+ // At this point, we know we have two icmp instructions
+ // comparing a value against two constants and and'ing the result
+ // together. Because of the above check, we know that we only have
+ // icmp eq, icmp ne, icmp [su]lt, and icmp [SU]gt here. We also know
+ // (from the icmp folding check above), that the two constants
+ // are not equal and that the larger constant is on the RHS
+ assert(LHSCst != RHSCst && "Compares not folded above?");
+
+ switch (LHSCC) {
+ default: llvm_unreachable("Unknown integer condition code!");
+ case ICmpInst::ICMP_EQ:
+ switch (RHSCC) {
+ default: llvm_unreachable("Unknown integer condition code!");
+ case ICmpInst::ICMP_NE: // (X == 13 & X != 15) -> X == 13
+ case ICmpInst::ICMP_ULT: // (X == 13 & X < 15) -> X == 13
+ case ICmpInst::ICMP_SLT: // (X == 13 & X < 15) -> X == 13
+ return LHS;
+ }
+ case ICmpInst::ICMP_NE:
+ switch (RHSCC) {
+ default: llvm_unreachable("Unknown integer condition code!");
+ case ICmpInst::ICMP_ULT:
+ if (LHSCst == SubOne(RHSCst)) // (X != 13 & X u< 14) -> X < 13
+ return Builder->CreateICmpULT(Val, LHSCst);
+ if (LHSCst->isNullValue()) // (X != 0 & X u< 14) -> X-1 u< 13
+ return InsertRangeTest(Val, AddOne(LHSCst), RHSCst, false, true);
+ break; // (X != 13 & X u< 15) -> no change
+ case ICmpInst::ICMP_SLT:
+ if (LHSCst == SubOne(RHSCst)) // (X != 13 & X s< 14) -> X < 13
+ return Builder->CreateICmpSLT(Val, LHSCst);
+ break; // (X != 13 & X s< 15) -> no change
+ case ICmpInst::ICMP_EQ: // (X != 13 & X == 15) -> X == 15
+ case ICmpInst::ICMP_UGT: // (X != 13 & X u> 15) -> X u> 15
+ case ICmpInst::ICMP_SGT: // (X != 13 & X s> 15) -> X s> 15
+ return RHS;
+ case ICmpInst::ICMP_NE:
+ // Special case to get the ordering right when the values wrap around
+ // zero.
+ if (LHSCst->getValue() == 0 && RHSCst->getValue().isAllOnesValue())
+ std::swap(LHSCst, RHSCst);
+ if (LHSCst == SubOne(RHSCst)){// (X != 13 & X != 14) -> X-13 >u 1
+ Constant *AddCST = ConstantExpr::getNeg(LHSCst);
+ Value *Add = Builder->CreateAdd(Val, AddCST, Val->getName()+".off");
+ return Builder->CreateICmpUGT(Add, ConstantInt::get(Add->getType(), 1),
+ Val->getName()+".cmp");
+ }
+ break; // (X != 13 & X != 15) -> no change
+ }
+ break;
+ case ICmpInst::ICMP_ULT:
+ switch (RHSCC) {
+ default: llvm_unreachable("Unknown integer condition code!");
+ case ICmpInst::ICMP_EQ: // (X u< 13 & X == 15) -> false
+ case ICmpInst::ICMP_UGT: // (X u< 13 & X u> 15) -> false
+ return ConstantInt::get(CmpInst::makeCmpResultType(LHS->getType()), 0);
+ case ICmpInst::ICMP_SGT: // (X u< 13 & X s> 15) -> no change
+ break;
+ case ICmpInst::ICMP_NE: // (X u< 13 & X != 15) -> X u< 13
+ case ICmpInst::ICMP_ULT: // (X u< 13 & X u< 15) -> X u< 13
+ return LHS;
+ case ICmpInst::ICMP_SLT: // (X u< 13 & X s< 15) -> no change
+ break;
+ }
+ break;
+ case ICmpInst::ICMP_SLT:
+ switch (RHSCC) {
+ default: llvm_unreachable("Unknown integer condition code!");
+ case ICmpInst::ICMP_UGT: // (X s< 13 & X u> 15) -> no change
+ break;
+ case ICmpInst::ICMP_NE: // (X s< 13 & X != 15) -> X < 13
+ case ICmpInst::ICMP_SLT: // (X s< 13 & X s< 15) -> X < 13
+ return LHS;
+ case ICmpInst::ICMP_ULT: // (X s< 13 & X u< 15) -> no change
+ break;
+ }
+ break;
+ case ICmpInst::ICMP_UGT:
+ switch (RHSCC) {
+ default: llvm_unreachable("Unknown integer condition code!");
+ case ICmpInst::ICMP_EQ: // (X u> 13 & X == 15) -> X == 15
+ case ICmpInst::ICMP_UGT: // (X u> 13 & X u> 15) -> X u> 15
+ return RHS;
+ case ICmpInst::ICMP_SGT: // (X u> 13 & X s> 15) -> no change
+ break;
+ case ICmpInst::ICMP_NE:
+ if (RHSCst == AddOne(LHSCst)) // (X u> 13 & X != 14) -> X u> 14
+ return Builder->CreateICmp(LHSCC, Val, RHSCst);
+ break; // (X u> 13 & X != 15) -> no change
+ case ICmpInst::ICMP_ULT: // (X u> 13 & X u< 15) -> (X-14) <u 1
+ return InsertRangeTest(Val, AddOne(LHSCst), RHSCst, false, true);
+ case ICmpInst::ICMP_SLT: // (X u> 13 & X s< 15) -> no change
+ break;
+ }
+ break;
+ case ICmpInst::ICMP_SGT:
+ switch (RHSCC) {
+ default: llvm_unreachable("Unknown integer condition code!");
+ case ICmpInst::ICMP_EQ: // (X s> 13 & X == 15) -> X == 15
+ case ICmpInst::ICMP_SGT: // (X s> 13 & X s> 15) -> X s> 15
+ return RHS;
+ case ICmpInst::ICMP_UGT: // (X s> 13 & X u> 15) -> no change
+ break;
+ case ICmpInst::ICMP_NE:
+ if (RHSCst == AddOne(LHSCst)) // (X s> 13 & X != 14) -> X s> 14
+ return Builder->CreateICmp(LHSCC, Val, RHSCst);
+ break; // (X s> 13 & X != 15) -> no change
+ case ICmpInst::ICMP_SLT: // (X s> 13 & X s< 15) -> (X-14) s< 1
+ return InsertRangeTest(Val, AddOne(LHSCst), RHSCst, true, true);
+ case ICmpInst::ICMP_ULT: // (X s> 13 & X u< 15) -> no change
+ break;
+ }
+ break;
+ }
+
+ return nullptr;
+}
+
+/// Optimize (fcmp)&(fcmp). NOTE: Unlike the rest of instcombine, this returns
+/// a Value which should already be inserted into the function.
+Value *InstCombiner::FoldAndOfFCmps(FCmpInst *LHS, FCmpInst *RHS) {
+ Value *Op0LHS = LHS->getOperand(0), *Op0RHS = LHS->getOperand(1);
+ Value *Op1LHS = RHS->getOperand(0), *Op1RHS = RHS->getOperand(1);
+ FCmpInst::Predicate Op0CC = LHS->getPredicate(), Op1CC = RHS->getPredicate();
+
+ if (Op0LHS == Op1RHS && Op0RHS == Op1LHS) {
+ // Swap RHS operands to match LHS.
+ Op1CC = FCmpInst::getSwappedPredicate(Op1CC);
+ std::swap(Op1LHS, Op1RHS);
+ }
+
+ // Simplify (fcmp cc0 x, y) & (fcmp cc1 x, y).
+ // Suppose the relation between x and y is R, where R is one of
+ // U(1000), L(0100), G(0010) or E(0001), and CC0 and CC1 are the bitmasks for
+ // testing the desired relations.
+ //
+ // Since (R & CC0) and (R & CC1) are either R or 0, we actually have this:
+ // bool(R & CC0) && bool(R & CC1)
+ // = bool((R & CC0) & (R & CC1))
+ // = bool(R & (CC0 & CC1)) <= by re-association, commutation, and idempotency
+ if (Op0LHS == Op1LHS && Op0RHS == Op1RHS)
+ return getFCmpValue(getFCmpCode(Op0CC) & getFCmpCode(Op1CC), Op0LHS, Op0RHS,
+ Builder);
+
+ if (LHS->getPredicate() == FCmpInst::FCMP_ORD &&
+ RHS->getPredicate() == FCmpInst::FCMP_ORD) {
+ if (LHS->getOperand(0)->getType() != RHS->getOperand(0)->getType())
+ return nullptr;
+
+ // (fcmp ord x, c) & (fcmp ord y, c) -> (fcmp ord x, y)
+ if (ConstantFP *LHSC = dyn_cast<ConstantFP>(LHS->getOperand(1)))
+ if (ConstantFP *RHSC = dyn_cast<ConstantFP>(RHS->getOperand(1))) {
+ // If either of the constants are nans, then the whole thing returns
+ // false.
+ if (LHSC->getValueAPF().isNaN() || RHSC->getValueAPF().isNaN())
+ return Builder->getFalse();
+ return Builder->CreateFCmpORD(LHS->getOperand(0), RHS->getOperand(0));
+ }
+
+ // Handle vector zeros. This occurs because the canonical form of
+ // "fcmp ord x,x" is "fcmp ord x, 0".
+ if (isa<ConstantAggregateZero>(LHS->getOperand(1)) &&
+ isa<ConstantAggregateZero>(RHS->getOperand(1)))
+ return Builder->CreateFCmpORD(LHS->getOperand(0), RHS->getOperand(0));
+ return nullptr;
+ }
+
+ return nullptr;
+}
+
+/// Match De Morgan's Laws:
+/// (~A & ~B) == (~(A | B))
+/// (~A | ~B) == (~(A & B))
+static Instruction *matchDeMorgansLaws(BinaryOperator &I,
+ InstCombiner::BuilderTy *Builder) {
+ auto Opcode = I.getOpcode();
+ assert((Opcode == Instruction::And || Opcode == Instruction::Or) &&
+ "Trying to match De Morgan's Laws with something other than and/or");
+ // Flip the logic operation.
+ if (Opcode == Instruction::And)
+ Opcode = Instruction::Or;
+ else
+ Opcode = Instruction::And;
+
+ Value *Op0 = I.getOperand(0);
+ Value *Op1 = I.getOperand(1);
+ // TODO: Use pattern matchers instead of dyn_cast.
+ if (Value *Op0NotVal = dyn_castNotVal(Op0))
+ if (Value *Op1NotVal = dyn_castNotVal(Op1))
+ if (Op0->hasOneUse() && Op1->hasOneUse()) {
+ Value *LogicOp = Builder->CreateBinOp(Opcode, Op0NotVal, Op1NotVal,
+ I.getName() + ".demorgan");
+ return BinaryOperator::CreateNot(LogicOp);
+ }
+
+ // De Morgan's Law in disguise:
+ // (zext(bool A) ^ 1) & (zext(bool B) ^ 1) -> zext(~(A | B))
+ // (zext(bool A) ^ 1) | (zext(bool B) ^ 1) -> zext(~(A & B))
+ Value *A = nullptr;
+ Value *B = nullptr;
+ ConstantInt *C1 = nullptr;
+ if (match(Op0, m_OneUse(m_Xor(m_ZExt(m_Value(A)), m_ConstantInt(C1)))) &&
+ match(Op1, m_OneUse(m_Xor(m_ZExt(m_Value(B)), m_Specific(C1))))) {
+ // TODO: This check could be loosened to handle different type sizes.
+ // Alternatively, we could fix the definition of m_Not to recognize a not
+ // operation hidden by a zext?
+ if (A->getType()->isIntegerTy(1) && B->getType()->isIntegerTy(1) &&
+ C1->isOne()) {
+ Value *LogicOp = Builder->CreateBinOp(Opcode, A, B,
+ I.getName() + ".demorgan");
+ Value *Not = Builder->CreateNot(LogicOp);
+ return CastInst::CreateZExtOrBitCast(Not, I.getType());
+ }
+ }
+
+ return nullptr;
+}
+
+Instruction *InstCombiner::foldCastedBitwiseLogic(BinaryOperator &I) {
+ auto LogicOpc = I.getOpcode();
+ assert((LogicOpc == Instruction::And || LogicOpc == Instruction::Or ||
+ LogicOpc == Instruction::Xor) &&
+ "Unexpected opcode for bitwise logic folding");
+
+ Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
+ CastInst *Cast0 = dyn_cast<CastInst>(Op0);
+ if (!Cast0)
+ return nullptr;
+
+ // This must be a cast from an integer or integer vector source type to allow
+ // transformation of the logic operation to the source type.
+ Type *DestTy = I.getType();
+ Type *SrcTy = Cast0->getSrcTy();
+ if (!SrcTy->isIntOrIntVectorTy())
+ return nullptr;
+
+ // If one operand is a bitcast and the other is a constant, move the logic
+ // operation ahead of the bitcast. That is, do the logic operation in the
+ // original type. This can eliminate useless bitcasts and allow normal
+ // combines that would otherwise be impeded by the bitcast. Canonicalization
+ // ensures that if there is a constant operand, it will be the second operand.
+ Value *BC = nullptr;
+ Constant *C = nullptr;
+ if ((match(Op0, m_BitCast(m_Value(BC))) && match(Op1, m_Constant(C)))) {
+ Value *NewConstant = ConstantExpr::getBitCast(C, SrcTy);
+ Value *NewOp = Builder->CreateBinOp(LogicOpc, BC, NewConstant, I.getName());
+ return CastInst::CreateBitOrPointerCast(NewOp, DestTy);
+ }
+
+ CastInst *Cast1 = dyn_cast<CastInst>(Op1);
+ if (!Cast1)
+ return nullptr;
+
+ // Both operands of the logic operation are casts. The casts must be of the
+ // same type for reduction.
+ auto CastOpcode = Cast0->getOpcode();
+ if (CastOpcode != Cast1->getOpcode() || SrcTy != Cast1->getSrcTy())
+ return nullptr;
+
+ Value *Cast0Src = Cast0->getOperand(0);
+ Value *Cast1Src = Cast1->getOperand(0);
+
+ // fold (logic (cast A), (cast B)) -> (cast (logic A, B))
+
+ // Only do this if the casts both really cause code to be generated.
+ if ((!isa<ICmpInst>(Cast0Src) || !isa<ICmpInst>(Cast1Src)) &&
+ ShouldOptimizeCast(CastOpcode, Cast0Src, DestTy) &&
+ ShouldOptimizeCast(CastOpcode, Cast1Src, DestTy)) {
+ Value *NewOp = Builder->CreateBinOp(LogicOpc, Cast0Src, Cast1Src,
+ I.getName());
+ return CastInst::Create(CastOpcode, NewOp, DestTy);
+ }
+
+ // For now, only 'and'/'or' have optimizations after this.
+ if (LogicOpc == Instruction::Xor)
+ return nullptr;
+
+ // If this is logic(cast(icmp), cast(icmp)), try to fold this even if the
+ // cast is otherwise not optimizable. This happens for vector sexts.
+ ICmpInst *ICmp0 = dyn_cast<ICmpInst>(Cast0Src);
+ ICmpInst *ICmp1 = dyn_cast<ICmpInst>(Cast1Src);
+ if (ICmp0 && ICmp1) {
+ Value *Res = LogicOpc == Instruction::And ? FoldAndOfICmps(ICmp0, ICmp1)
+ : FoldOrOfICmps(ICmp0, ICmp1, &I);
+ if (Res)
+ return CastInst::Create(CastOpcode, Res, DestTy);
+ return nullptr;
+ }
+
+ // If this is logic(cast(fcmp), cast(fcmp)), try to fold this even if the
+ // cast is otherwise not optimizable. This happens for vector sexts.
+ FCmpInst *FCmp0 = dyn_cast<FCmpInst>(Cast0Src);
+ FCmpInst *FCmp1 = dyn_cast<FCmpInst>(Cast1Src);
+ if (FCmp0 && FCmp1) {
+ Value *Res = LogicOpc == Instruction::And ? FoldAndOfFCmps(FCmp0, FCmp1)
+ : FoldOrOfFCmps(FCmp0, FCmp1);
+ if (Res)
+ return CastInst::Create(CastOpcode, Res, DestTy);
+ return nullptr;
+ }
+
+ return nullptr;
+}
+
+static Instruction *foldBoolSextMaskToSelect(BinaryOperator &I) {
+ Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
+
+ // Canonicalize SExt or Not to the LHS
+ if (match(Op1, m_SExt(m_Value())) || match(Op1, m_Not(m_Value()))) {
+ std::swap(Op0, Op1);
+ }
+
+ // Fold (and (sext bool to A), B) --> (select bool, B, 0)
+ Value *X = nullptr;
+ if (match(Op0, m_SExt(m_Value(X))) &&
+ X->getType()->getScalarType()->isIntegerTy(1)) {
+ Value *Zero = Constant::getNullValue(Op1->getType());
+ return SelectInst::Create(X, Op1, Zero);
+ }
+
+ // Fold (and ~(sext bool to A), B) --> (select bool, 0, B)
+ if (match(Op0, m_Not(m_SExt(m_Value(X)))) &&
+ X->getType()->getScalarType()->isIntegerTy(1)) {
+ Value *Zero = Constant::getNullValue(Op0->getType());
+ return SelectInst::Create(X, Zero, Op1);
+ }
+
+ return nullptr;
+}
+
+Instruction *InstCombiner::visitAnd(BinaryOperator &I) {
+ bool Changed = SimplifyAssociativeOrCommutative(I);
+ Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
+
+ if (Value *V = SimplifyVectorOp(I))
+ return replaceInstUsesWith(I, V);
+
+ if (Value *V = SimplifyAndInst(Op0, Op1, DL, TLI, DT, AC))
+ return replaceInstUsesWith(I, V);
+
+ // (A|B)&(A|C) -> A|(B&C) etc
+ if (Value *V = SimplifyUsingDistributiveLaws(I))
+ return replaceInstUsesWith(I, V);
+
+ // See if we can simplify any instructions used by the instruction whose sole
+ // purpose is to compute bits we don't care about.
+ if (SimplifyDemandedInstructionBits(I))
+ return &I;
+
+ if (Value *V = SimplifyBSwap(I))
+ return replaceInstUsesWith(I, V);
+
+ if (ConstantInt *AndRHS = dyn_cast<ConstantInt>(Op1)) {
+ const APInt &AndRHSMask = AndRHS->getValue();
+
+ // Optimize a variety of ((val OP C1) & C2) combinations...
+ if (BinaryOperator *Op0I = dyn_cast<BinaryOperator>(Op0)) {
+ Value *Op0LHS = Op0I->getOperand(0);
+ Value *Op0RHS = Op0I->getOperand(1);
+ switch (Op0I->getOpcode()) {
+ default: break;
+ case Instruction::Xor:
+ case Instruction::Or: {
+ // If the mask is only needed on one incoming arm, push it up.
+ if (!Op0I->hasOneUse()) break;
+
+ APInt NotAndRHS(~AndRHSMask);
+ if (MaskedValueIsZero(Op0LHS, NotAndRHS, 0, &I)) {
+ // Not masking anything out for the LHS, move to RHS.
+ Value *NewRHS = Builder->CreateAnd(Op0RHS, AndRHS,
+ Op0RHS->getName()+".masked");
+ return BinaryOperator::Create(Op0I->getOpcode(), Op0LHS, NewRHS);
+ }
+ if (!isa<Constant>(Op0RHS) &&
+ MaskedValueIsZero(Op0RHS, NotAndRHS, 0, &I)) {
+ // Not masking anything out for the RHS, move to LHS.
+ Value *NewLHS = Builder->CreateAnd(Op0LHS, AndRHS,
+ Op0LHS->getName()+".masked");
+ return BinaryOperator::Create(Op0I->getOpcode(), NewLHS, Op0RHS);
+ }
+
+ break;
+ }
+ case Instruction::Add:
+ // ((A & N) + B) & AndRHS -> (A + B) & AndRHS iff N&AndRHS == AndRHS.
+ // ((A | N) + B) & AndRHS -> (A + B) & AndRHS iff N&AndRHS == 0
+ // ((A ^ N) + B) & AndRHS -> (A + B) & AndRHS iff N&AndRHS == 0
+ if (Value *V = FoldLogicalPlusAnd(Op0LHS, Op0RHS, AndRHS, false, I))
+ return BinaryOperator::CreateAnd(V, AndRHS);
+ if (Value *V = FoldLogicalPlusAnd(Op0RHS, Op0LHS, AndRHS, false, I))
+ return BinaryOperator::CreateAnd(V, AndRHS); // Add commutes
+ break;
+
+ case Instruction::Sub:
+ // ((A & N) - B) & AndRHS -> (A - B) & AndRHS iff N&AndRHS == AndRHS.
+ // ((A | N) - B) & AndRHS -> (A - B) & AndRHS iff N&AndRHS == 0
+ // ((A ^ N) - B) & AndRHS -> (A - B) & AndRHS iff N&AndRHS == 0
+ if (Value *V = FoldLogicalPlusAnd(Op0LHS, Op0RHS, AndRHS, true, I))
+ return BinaryOperator::CreateAnd(V, AndRHS);
+
+ // -x & 1 -> x & 1
+ if (AndRHSMask == 1 && match(Op0LHS, m_Zero()))
+ return BinaryOperator::CreateAnd(Op0RHS, AndRHS);
+
+ // (A - N) & AndRHS -> -N & AndRHS iff A&AndRHS==0 and AndRHS
+ // has 1's for all bits that the subtraction with A might affect.
+ if (Op0I->hasOneUse() && !match(Op0LHS, m_Zero())) {
+ uint32_t BitWidth = AndRHSMask.getBitWidth();
+ uint32_t Zeros = AndRHSMask.countLeadingZeros();
+ APInt Mask = APInt::getLowBitsSet(BitWidth, BitWidth - Zeros);
+
+ if (MaskedValueIsZero(Op0LHS, Mask, 0, &I)) {
+ Value *NewNeg = Builder->CreateNeg(Op0RHS);
+ return BinaryOperator::CreateAnd(NewNeg, AndRHS);
+ }
+ }
+ break;
+
+ case Instruction::Shl:
+ case Instruction::LShr:
+ // (1 << x) & 1 --> zext(x == 0)
+ // (1 >> x) & 1 --> zext(x == 0)
+ if (AndRHSMask == 1 && Op0LHS == AndRHS) {
+ Value *NewICmp =
+ Builder->CreateICmpEQ(Op0RHS, Constant::getNullValue(I.getType()));
+ return new ZExtInst(NewICmp, I.getType());
+ }
+ break;
+ }
+
+ if (ConstantInt *Op0CI = dyn_cast<ConstantInt>(Op0I->getOperand(1)))
+ if (Instruction *Res = OptAndOp(Op0I, Op0CI, AndRHS, I))
+ return Res;
+ }
+
+ // If this is an integer truncation, and if the source is an 'and' with
+ // immediate, transform it. This frequently occurs for bitfield accesses.
+ {
+ Value *X = nullptr; ConstantInt *YC = nullptr;
+ if (match(Op0, m_Trunc(m_And(m_Value(X), m_ConstantInt(YC))))) {
+ // Change: and (trunc (and X, YC) to T), C2
+ // into : and (trunc X to T), trunc(YC) & C2
+ // This will fold the two constants together, which may allow
+ // other simplifications.
+ Value *NewCast = Builder->CreateTrunc(X, I.getType(), "and.shrunk");
+ Constant *C3 = ConstantExpr::getTrunc(YC, I.getType());
+ C3 = ConstantExpr::getAnd(C3, AndRHS);
+ return BinaryOperator::CreateAnd(NewCast, C3);
+ }
+ }
+
+ // Try to fold constant and into select arguments.
+ if (SelectInst *SI = dyn_cast<SelectInst>(Op0))
+ if (Instruction *R = FoldOpIntoSelect(I, SI))
+ return R;
+ if (isa<PHINode>(Op0))
+ if (Instruction *NV = FoldOpIntoPhi(I))
+ return NV;
+ }
+
+ if (Instruction *DeMorgan = matchDeMorgansLaws(I, Builder))
+ return DeMorgan;
+
+ {
+ Value *A = nullptr, *B = nullptr, *C = nullptr, *D = nullptr;
+ // (A|B) & ~(A&B) -> A^B
+ if (match(Op0, m_Or(m_Value(A), m_Value(B))) &&
+ match(Op1, m_Not(m_And(m_Value(C), m_Value(D)))) &&
+ ((A == C && B == D) || (A == D && B == C)))
+ return BinaryOperator::CreateXor(A, B);
+
+ // ~(A&B) & (A|B) -> A^B
+ if (match(Op1, m_Or(m_Value(A), m_Value(B))) &&
+ match(Op0, m_Not(m_And(m_Value(C), m_Value(D)))) &&
+ ((A == C && B == D) || (A == D && B == C)))
+ return BinaryOperator::CreateXor(A, B);
+
+ // A&(A^B) => A & ~B
+ {
+ Value *tmpOp0 = Op0;
+ Value *tmpOp1 = Op1;
+ if (match(Op0, m_OneUse(m_Xor(m_Value(A), m_Value(B))))) {
+ if (A == Op1 || B == Op1 ) {
+ tmpOp1 = Op0;
+ tmpOp0 = Op1;
+ // Simplify below
+ }
+ }
+
+ if (match(tmpOp1, m_OneUse(m_Xor(m_Value(A), m_Value(B))))) {
+ if (B == tmpOp0) {
+ std::swap(A, B);
+ }
+ // Notice that the pattern (A&(~B)) is actually (A&(-1^B)), so if
+ // A is originally -1 (or a vector of -1 and undefs), then we enter
+ // an endless loop. By checking that A is non-constant we ensure that
+ // we will never get to the loop.
+ if (A == tmpOp0 && !isa<Constant>(A)) // A&(A^B) -> A & ~B
+ return BinaryOperator::CreateAnd(A, Builder->CreateNot(B));
+ }
+ }
+
+ // (A&((~A)|B)) -> A&B
+ if (match(Op0, m_Or(m_Not(m_Specific(Op1)), m_Value(A))) ||
+ match(Op0, m_Or(m_Value(A), m_Not(m_Specific(Op1)))))
+ return BinaryOperator::CreateAnd(A, Op1);
+ if (match(Op1, m_Or(m_Not(m_Specific(Op0)), m_Value(A))) ||
+ match(Op1, m_Or(m_Value(A), m_Not(m_Specific(Op0)))))
+ return BinaryOperator::CreateAnd(A, Op0);
+
+ // (A ^ B) & ((B ^ C) ^ A) -> (A ^ B) & ~C
+ if (match(Op0, m_Xor(m_Value(A), m_Value(B))))
+ if (match(Op1, m_Xor(m_Xor(m_Specific(B), m_Value(C)), m_Specific(A))))
+ if (Op1->hasOneUse() || cast<BinaryOperator>(Op1)->hasOneUse())
+ return BinaryOperator::CreateAnd(Op0, Builder->CreateNot(C));
+
+ // ((A ^ C) ^ B) & (B ^ A) -> (B ^ A) & ~C
+ if (match(Op0, m_Xor(m_Xor(m_Value(A), m_Value(C)), m_Value(B))))
+ if (match(Op1, m_Xor(m_Specific(B), m_Specific(A))))
+ if (Op0->hasOneUse() || cast<BinaryOperator>(Op0)->hasOneUse())
+ return BinaryOperator::CreateAnd(Op1, Builder->CreateNot(C));
+
+ // (A | B) & ((~A) ^ B) -> (A & B)
+ if (match(Op0, m_Or(m_Value(A), m_Value(B))) &&
+ match(Op1, m_Xor(m_Not(m_Specific(A)), m_Specific(B))))
+ return BinaryOperator::CreateAnd(A, B);
+
+ // ((~A) ^ B) & (A | B) -> (A & B)
+ if (match(Op0, m_Xor(m_Not(m_Value(A)), m_Value(B))) &&
+ match(Op1, m_Or(m_Specific(A), m_Specific(B))))
+ return BinaryOperator::CreateAnd(A, B);
+ }
+
+ {
+ ICmpInst *LHS = dyn_cast<ICmpInst>(Op0);
+ ICmpInst *RHS = dyn_cast<ICmpInst>(Op1);
+ if (LHS && RHS)
+ if (Value *Res = FoldAndOfICmps(LHS, RHS))
+ return replaceInstUsesWith(I, Res);
+
+ // TODO: Make this recursive; it's a little tricky because an arbitrary
+ // number of 'and' instructions might have to be created.
+ Value *X, *Y;
+ if (LHS && match(Op1, m_OneUse(m_And(m_Value(X), m_Value(Y))))) {
+ if (auto *Cmp = dyn_cast<ICmpInst>(X))
+ if (Value *Res = FoldAndOfICmps(LHS, Cmp))
+ return replaceInstUsesWith(I, Builder->CreateAnd(Res, Y));
+ if (auto *Cmp = dyn_cast<ICmpInst>(Y))
+ if (Value *Res = FoldAndOfICmps(LHS, Cmp))
+ return replaceInstUsesWith(I, Builder->CreateAnd(Res, X));
+ }
+ if (RHS && match(Op0, m_OneUse(m_And(m_Value(X), m_Value(Y))))) {
+ if (auto *Cmp = dyn_cast<ICmpInst>(X))
+ if (Value *Res = FoldAndOfICmps(Cmp, RHS))
+ return replaceInstUsesWith(I, Builder->CreateAnd(Res, Y));
+ if (auto *Cmp = dyn_cast<ICmpInst>(Y))
+ if (Value *Res = FoldAndOfICmps(Cmp, RHS))
+ return replaceInstUsesWith(I, Builder->CreateAnd(Res, X));
+ }
+ }
+
+ // If and'ing two fcmp, try combine them into one.
+ if (FCmpInst *LHS = dyn_cast<FCmpInst>(I.getOperand(0)))
+ if (FCmpInst *RHS = dyn_cast<FCmpInst>(I.getOperand(1)))
+ if (Value *Res = FoldAndOfFCmps(LHS, RHS))
+ return replaceInstUsesWith(I, Res);
+
+ if (Instruction *CastedAnd = foldCastedBitwiseLogic(I))
+ return CastedAnd;
+
+ if (Instruction *Select = foldBoolSextMaskToSelect(I))
+ return Select;
+
+ return Changed ? &I : nullptr;
+}
+
+/// Given an OR instruction, check to see if this is a bswap idiom. If so,
+/// insert the new intrinsic and return it.
+Instruction *InstCombiner::MatchBSwap(BinaryOperator &I) {
+ Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
+
+ // Look through zero extends.
+ if (Instruction *Ext = dyn_cast<ZExtInst>(Op0))
+ Op0 = Ext->getOperand(0);
+
+ if (Instruction *Ext = dyn_cast<ZExtInst>(Op1))
+ Op1 = Ext->getOperand(0);
+
+ // (A | B) | C and A | (B | C) -> bswap if possible.
+ bool OrOfOrs = match(Op0, m_Or(m_Value(), m_Value())) ||
+ match(Op1, m_Or(m_Value(), m_Value()));
+
+ // (A >> B) | (C << D) and (A << B) | (B >> C) -> bswap if possible.
+ bool OrOfShifts = match(Op0, m_LogicalShift(m_Value(), m_Value())) &&
+ match(Op1, m_LogicalShift(m_Value(), m_Value()));
+
+ // (A & B) | (C & D) -> bswap if possible.
+ bool OrOfAnds = match(Op0, m_And(m_Value(), m_Value())) &&
+ match(Op1, m_And(m_Value(), m_Value()));
+
+ if (!OrOfOrs && !OrOfShifts && !OrOfAnds)
+ return nullptr;
+
+ SmallVector<Instruction*, 4> Insts;
+ if (!recognizeBSwapOrBitReverseIdiom(&I, true, false, Insts))
+ return nullptr;
+ Instruction *LastInst = Insts.pop_back_val();
+ LastInst->removeFromParent();
+
+ for (auto *Inst : Insts)
+ Worklist.Add(Inst);
+ return LastInst;
+}
+
+/// If all elements of two constant vectors are 0/-1 and inverses, return true.
+static bool areInverseVectorBitmasks(Constant *C1, Constant *C2) {
+ unsigned NumElts = C1->getType()->getVectorNumElements();
+ for (unsigned i = 0; i != NumElts; ++i) {
+ Constant *EltC1 = C1->getAggregateElement(i);
+ Constant *EltC2 = C2->getAggregateElement(i);
+ if (!EltC1 || !EltC2)
+ return false;
+
+ // One element must be all ones, and the other must be all zeros.
+ // FIXME: Allow undef elements.
+ if (!((match(EltC1, m_Zero()) && match(EltC2, m_AllOnes())) ||
+ (match(EltC2, m_Zero()) && match(EltC1, m_AllOnes()))))
+ return false;
+ }
+ return true;
+}
+
+/// We have an expression of the form (A & C) | (B & D). If A is a scalar or
+/// vector composed of all-zeros or all-ones values and is the bitwise 'not' of
+/// B, it can be used as the condition operand of a select instruction.
+static Value *getSelectCondition(Value *A, Value *B,
+ InstCombiner::BuilderTy &Builder) {
+ // If these are scalars or vectors of i1, A can be used directly.
+ Type *Ty = A->getType();
+ if (match(A, m_Not(m_Specific(B))) && Ty->getScalarType()->isIntegerTy(1))
+ return A;
+
+ // If A and B are sign-extended, look through the sexts to find the booleans.
+ Value *Cond;
+ if (match(A, m_SExt(m_Value(Cond))) &&
+ Cond->getType()->getScalarType()->isIntegerTy(1) &&
+ match(B, m_CombineOr(m_Not(m_SExt(m_Specific(Cond))),
+ m_SExt(m_Not(m_Specific(Cond))))))
+ return Cond;
+
+ // All scalar (and most vector) possibilities should be handled now.
+ // Try more matches that only apply to non-splat constant vectors.
+ if (!Ty->isVectorTy())
+ return nullptr;
+
+ // If both operands are constants, see if the constants are inverse bitmasks.
+ Constant *AC, *BC;
+ if (match(A, m_Constant(AC)) && match(B, m_Constant(BC)) &&
+ areInverseVectorBitmasks(AC, BC))
+ return ConstantExpr::getTrunc(AC, CmpInst::makeCmpResultType(Ty));
+
+ // If both operands are xor'd with constants using the same sexted boolean
+ // operand, see if the constants are inverse bitmasks.
+ if (match(A, (m_Xor(m_SExt(m_Value(Cond)), m_Constant(AC)))) &&
+ match(B, (m_Xor(m_SExt(m_Specific(Cond)), m_Constant(BC)))) &&
+ Cond->getType()->getScalarType()->isIntegerTy(1) &&
+ areInverseVectorBitmasks(AC, BC)) {
+ AC = ConstantExpr::getTrunc(AC, CmpInst::makeCmpResultType(Ty));
+ return Builder.CreateXor(Cond, AC);
+ }
+ return nullptr;
+}
+
+/// We have an expression of the form (A & C) | (B & D). Try to simplify this
+/// to "A' ? C : D", where A' is a boolean or vector of booleans.
+static Value *matchSelectFromAndOr(Value *A, Value *C, Value *B, Value *D,
+ InstCombiner::BuilderTy &Builder) {
+ // The potential condition of the select may be bitcasted. In that case, look
+ // through its bitcast and the corresponding bitcast of the 'not' condition.
+ Type *OrigType = A->getType();
+ Value *SrcA, *SrcB;
+ if (match(A, m_OneUse(m_BitCast(m_Value(SrcA)))) &&
+ match(B, m_OneUse(m_BitCast(m_Value(SrcB))))) {
+ A = SrcA;
+ B = SrcB;
+ }
+
+ if (Value *Cond = getSelectCondition(A, B, Builder)) {
+ // ((bc Cond) & C) | ((bc ~Cond) & D) --> bc (select Cond, (bc C), (bc D))
+ // The bitcasts will either all exist or all not exist. The builder will
+ // not create unnecessary casts if the types already match.
+ Value *BitcastC = Builder.CreateBitCast(C, A->getType());
+ Value *BitcastD = Builder.CreateBitCast(D, A->getType());
+ Value *Select = Builder.CreateSelect(Cond, BitcastC, BitcastD);
+ return Builder.CreateBitCast(Select, OrigType);
+ }
+
+ return nullptr;
+}
+
+/// Fold (icmp)|(icmp) if possible.
+Value *InstCombiner::FoldOrOfICmps(ICmpInst *LHS, ICmpInst *RHS,
+ Instruction *CxtI) {
+ ICmpInst::Predicate LHSCC = LHS->getPredicate(), RHSCC = RHS->getPredicate();
+
+ // Fold (iszero(A & K1) | iszero(A & K2)) -> (A & (K1 | K2)) != (K1 | K2)
+ // if K1 and K2 are a one-bit mask.
+ ConstantInt *LHSCst = dyn_cast<ConstantInt>(LHS->getOperand(1));
+ ConstantInt *RHSCst = dyn_cast<ConstantInt>(RHS->getOperand(1));
+
+ if (LHS->getPredicate() == ICmpInst::ICMP_EQ && LHSCst && LHSCst->isZero() &&
+ RHS->getPredicate() == ICmpInst::ICMP_EQ && RHSCst && RHSCst->isZero()) {
+
+ BinaryOperator *LAnd = dyn_cast<BinaryOperator>(LHS->getOperand(0));
+ BinaryOperator *RAnd = dyn_cast<BinaryOperator>(RHS->getOperand(0));
+ if (LAnd && RAnd && LAnd->hasOneUse() && RHS->hasOneUse() &&
+ LAnd->getOpcode() == Instruction::And &&
+ RAnd->getOpcode() == Instruction::And) {
+
+ Value *Mask = nullptr;
+ Value *Masked = nullptr;
+ if (LAnd->getOperand(0) == RAnd->getOperand(0) &&
+ isKnownToBeAPowerOfTwo(LAnd->getOperand(1), DL, false, 0, AC, CxtI,
+ DT) &&
+ isKnownToBeAPowerOfTwo(RAnd->getOperand(1), DL, false, 0, AC, CxtI,
+ DT)) {
+ Mask = Builder->CreateOr(LAnd->getOperand(1), RAnd->getOperand(1));
+ Masked = Builder->CreateAnd(LAnd->getOperand(0), Mask);
+ } else if (LAnd->getOperand(1) == RAnd->getOperand(1) &&
+ isKnownToBeAPowerOfTwo(LAnd->getOperand(0), DL, false, 0, AC,
+ CxtI, DT) &&
+ isKnownToBeAPowerOfTwo(RAnd->getOperand(0), DL, false, 0, AC,
+ CxtI, DT)) {
+ Mask = Builder->CreateOr(LAnd->getOperand(0), RAnd->getOperand(0));
+ Masked = Builder->CreateAnd(LAnd->getOperand(1), Mask);
+ }
+
+ if (Masked)
+ return Builder->CreateICmp(ICmpInst::ICMP_NE, Masked, Mask);
+ }
+ }
+
+ // Fold (icmp ult/ule (A + C1), C3) | (icmp ult/ule (A + C2), C3)
+ // --> (icmp ult/ule ((A & ~(C1 ^ C2)) + max(C1, C2)), C3)
+ // The original condition actually refers to the following two ranges:
+ // [MAX_UINT-C1+1, MAX_UINT-C1+1+C3] and [MAX_UINT-C2+1, MAX_UINT-C2+1+C3]
+ // We can fold these two ranges if:
+ // 1) C1 and C2 is unsigned greater than C3.
+ // 2) The two ranges are separated.
+ // 3) C1 ^ C2 is one-bit mask.
+ // 4) LowRange1 ^ LowRange2 and HighRange1 ^ HighRange2 are one-bit mask.
+ // This implies all values in the two ranges differ by exactly one bit.
+
+ if ((LHSCC == ICmpInst::ICMP_ULT || LHSCC == ICmpInst::ICMP_ULE) &&
+ LHSCC == RHSCC && LHSCst && RHSCst && LHS->hasOneUse() &&
+ RHS->hasOneUse() && LHSCst->getType() == RHSCst->getType() &&
+ LHSCst->getValue() == (RHSCst->getValue())) {
+
+ Value *LAdd = LHS->getOperand(0);
+ Value *RAdd = RHS->getOperand(0);
+
+ Value *LAddOpnd, *RAddOpnd;
+ ConstantInt *LAddCst, *RAddCst;
+ if (match(LAdd, m_Add(m_Value(LAddOpnd), m_ConstantInt(LAddCst))) &&
+ match(RAdd, m_Add(m_Value(RAddOpnd), m_ConstantInt(RAddCst))) &&
+ LAddCst->getValue().ugt(LHSCst->getValue()) &&
+ RAddCst->getValue().ugt(LHSCst->getValue())) {
+
+ APInt DiffCst = LAddCst->getValue() ^ RAddCst->getValue();
+ if (LAddOpnd == RAddOpnd && DiffCst.isPowerOf2()) {
+ ConstantInt *MaxAddCst = nullptr;
+ if (LAddCst->getValue().ult(RAddCst->getValue()))
+ MaxAddCst = RAddCst;
+ else
+ MaxAddCst = LAddCst;
+
+ APInt RRangeLow = -RAddCst->getValue();
+ APInt RRangeHigh = RRangeLow + LHSCst->getValue();
+ APInt LRangeLow = -LAddCst->getValue();
+ APInt LRangeHigh = LRangeLow + LHSCst->getValue();
+ APInt LowRangeDiff = RRangeLow ^ LRangeLow;
+ APInt HighRangeDiff = RRangeHigh ^ LRangeHigh;
+ APInt RangeDiff = LRangeLow.sgt(RRangeLow) ? LRangeLow - RRangeLow
+ : RRangeLow - LRangeLow;
+
+ if (LowRangeDiff.isPowerOf2() && LowRangeDiff == HighRangeDiff &&
+ RangeDiff.ugt(LHSCst->getValue())) {
+ Value *MaskCst = ConstantInt::get(LAddCst->getType(), ~DiffCst);
+
+ Value *NewAnd = Builder->CreateAnd(LAddOpnd, MaskCst);
+ Value *NewAdd = Builder->CreateAdd(NewAnd, MaxAddCst);
+ return (Builder->CreateICmp(LHS->getPredicate(), NewAdd, LHSCst));
+ }
+ }
+ }
+ }
+
+ // (icmp1 A, B) | (icmp2 A, B) --> (icmp3 A, B)
+ if (PredicatesFoldable(LHSCC, RHSCC)) {
+ if (LHS->getOperand(0) == RHS->getOperand(1) &&
+ LHS->getOperand(1) == RHS->getOperand(0))
+ LHS->swapOperands();
+ if (LHS->getOperand(0) == RHS->getOperand(0) &&
+ LHS->getOperand(1) == RHS->getOperand(1)) {
+ Value *Op0 = LHS->getOperand(0), *Op1 = LHS->getOperand(1);
+ unsigned Code = getICmpCode(LHS) | getICmpCode(RHS);
+ bool isSigned = LHS->isSigned() || RHS->isSigned();
+ return getNewICmpValue(isSigned, Code, Op0, Op1, Builder);
+ }
+ }
+
+ // handle (roughly):
+ // (icmp ne (A & B), C) | (icmp ne (A & D), E)
+ if (Value *V = foldLogOpOfMaskedICmps(LHS, RHS, false, Builder))
+ return V;
+
+ Value *Val = LHS->getOperand(0), *Val2 = RHS->getOperand(0);
+ if (LHS->hasOneUse() || RHS->hasOneUse()) {
+ // (icmp eq B, 0) | (icmp ult A, B) -> (icmp ule A, B-1)
+ // (icmp eq B, 0) | (icmp ugt B, A) -> (icmp ule A, B-1)
+ Value *A = nullptr, *B = nullptr;
+ if (LHSCC == ICmpInst::ICMP_EQ && LHSCst && LHSCst->isZero()) {
+ B = Val;
+ if (RHSCC == ICmpInst::ICMP_ULT && Val == RHS->getOperand(1))
+ A = Val2;
+ else if (RHSCC == ICmpInst::ICMP_UGT && Val == Val2)
+ A = RHS->getOperand(1);
+ }
+ // (icmp ult A, B) | (icmp eq B, 0) -> (icmp ule A, B-1)
+ // (icmp ugt B, A) | (icmp eq B, 0) -> (icmp ule A, B-1)
+ else if (RHSCC == ICmpInst::ICMP_EQ && RHSCst && RHSCst->isZero()) {
+ B = Val2;
+ if (LHSCC == ICmpInst::ICMP_ULT && Val2 == LHS->getOperand(1))
+ A = Val;
+ else if (LHSCC == ICmpInst::ICMP_UGT && Val2 == Val)
+ A = LHS->getOperand(1);
+ }
+ if (A && B)
+ return Builder->CreateICmp(
+ ICmpInst::ICMP_UGE,
+ Builder->CreateAdd(B, ConstantInt::getSigned(B->getType(), -1)), A);
+ }
+
+ // E.g. (icmp slt x, 0) | (icmp sgt x, n) --> icmp ugt x, n
+ if (Value *V = simplifyRangeCheck(LHS, RHS, /*Inverted=*/true))
+ return V;
+
+ // E.g. (icmp sgt x, n) | (icmp slt x, 0) --> icmp ugt x, n
+ if (Value *V = simplifyRangeCheck(RHS, LHS, /*Inverted=*/true))
+ return V;
+
+ // This only handles icmp of constants: (icmp1 A, C1) | (icmp2 B, C2).
+ if (!LHSCst || !RHSCst) return nullptr;
+
+ if (LHSCst == RHSCst && LHSCC == RHSCC) {
+ // (icmp ne A, 0) | (icmp ne B, 0) --> (icmp ne (A|B), 0)
+ if (LHSCC == ICmpInst::ICMP_NE && LHSCst->isZero()) {
+ Value *NewOr = Builder->CreateOr(Val, Val2);
+ return Builder->CreateICmp(LHSCC, NewOr, LHSCst);
+ }
+ }
+
+ // (icmp ult (X + CA), C1) | (icmp eq X, C2) -> (icmp ule (X + CA), C1)
+ // iff C2 + CA == C1.
+ if (LHSCC == ICmpInst::ICMP_ULT && RHSCC == ICmpInst::ICMP_EQ) {
+ ConstantInt *AddCst;
+ if (match(Val, m_Add(m_Specific(Val2), m_ConstantInt(AddCst))))
+ if (RHSCst->getValue() + AddCst->getValue() == LHSCst->getValue())
+ return Builder->CreateICmpULE(Val, LHSCst);
+ }
+
+ // From here on, we only handle:
+ // (icmp1 A, C1) | (icmp2 A, C2) --> something simpler.
+ if (Val != Val2) return nullptr;
+
+ // ICMP_[US][GL]E X, CST is folded to ICMP_[US][GL]T elsewhere.
+ if (LHSCC == ICmpInst::ICMP_UGE || LHSCC == ICmpInst::ICMP_ULE ||
+ RHSCC == ICmpInst::ICMP_UGE || RHSCC == ICmpInst::ICMP_ULE ||
+ LHSCC == ICmpInst::ICMP_SGE || LHSCC == ICmpInst::ICMP_SLE ||
+ RHSCC == ICmpInst::ICMP_SGE || RHSCC == ICmpInst::ICMP_SLE)
+ return nullptr;
+
+ // We can't fold (ugt x, C) | (sgt x, C2).
+ if (!PredicatesFoldable(LHSCC, RHSCC))
+ return nullptr;
+
+ // Ensure that the larger constant is on the RHS.
+ bool ShouldSwap;
+ if (CmpInst::isSigned(LHSCC) ||
+ (ICmpInst::isEquality(LHSCC) &&
+ CmpInst::isSigned(RHSCC)))
+ ShouldSwap = LHSCst->getValue().sgt(RHSCst->getValue());
+ else
+ ShouldSwap = LHSCst->getValue().ugt(RHSCst->getValue());
+
+ if (ShouldSwap) {
+ std::swap(LHS, RHS);
+ std::swap(LHSCst, RHSCst);
+ std::swap(LHSCC, RHSCC);
+ }
+
+ // At this point, we know we have two icmp instructions
+ // comparing a value against two constants and or'ing the result
+ // together. Because of the above check, we know that we only have
+ // ICMP_EQ, ICMP_NE, ICMP_LT, and ICMP_GT here. We also know (from the
+ // icmp folding check above), that the two constants are not
+ // equal.
+ assert(LHSCst != RHSCst && "Compares not folded above?");
+
+ switch (LHSCC) {
+ default: llvm_unreachable("Unknown integer condition code!");
+ case ICmpInst::ICMP_EQ:
+ switch (RHSCC) {
+ default: llvm_unreachable("Unknown integer condition code!");
+ case ICmpInst::ICMP_EQ:
+ if (LHS->getOperand(0) == RHS->getOperand(0)) {
+ // if LHSCst and RHSCst differ only by one bit:
+ // (A == C1 || A == C2) -> (A | (C1 ^ C2)) == C2
+ assert(LHSCst->getValue().ule(LHSCst->getValue()));
+
+ APInt Xor = LHSCst->getValue() ^ RHSCst->getValue();
+ if (Xor.isPowerOf2()) {
+ Value *Cst = Builder->getInt(Xor);
+ Value *Or = Builder->CreateOr(LHS->getOperand(0), Cst);
+ return Builder->CreateICmp(ICmpInst::ICMP_EQ, Or, RHSCst);
+ }
+ }
+
+ if (LHSCst == SubOne(RHSCst)) {
+ // (X == 13 | X == 14) -> X-13 <u 2
+ Constant *AddCST = ConstantExpr::getNeg(LHSCst);
+ Value *Add = Builder->CreateAdd(Val, AddCST, Val->getName()+".off");
+ AddCST = ConstantExpr::getSub(AddOne(RHSCst), LHSCst);
+ return Builder->CreateICmpULT(Add, AddCST);
+ }
+
+ break; // (X == 13 | X == 15) -> no change
+ case ICmpInst::ICMP_UGT: // (X == 13 | X u> 14) -> no change
+ case ICmpInst::ICMP_SGT: // (X == 13 | X s> 14) -> no change
+ break;
+ case ICmpInst::ICMP_NE: // (X == 13 | X != 15) -> X != 15
+ case ICmpInst::ICMP_ULT: // (X == 13 | X u< 15) -> X u< 15
+ case ICmpInst::ICMP_SLT: // (X == 13 | X s< 15) -> X s< 15
+ return RHS;
+ }
+ break;
+ case ICmpInst::ICMP_NE:
+ switch (RHSCC) {
+ default: llvm_unreachable("Unknown integer condition code!");
+ case ICmpInst::ICMP_EQ: // (X != 13 | X == 15) -> X != 13
+ case ICmpInst::ICMP_UGT: // (X != 13 | X u> 15) -> X != 13
+ case ICmpInst::ICMP_SGT: // (X != 13 | X s> 15) -> X != 13
+ return LHS;
+ case ICmpInst::ICMP_NE: // (X != 13 | X != 15) -> true
+ case ICmpInst::ICMP_ULT: // (X != 13 | X u< 15) -> true
+ case ICmpInst::ICMP_SLT: // (X != 13 | X s< 15) -> true
+ return Builder->getTrue();
+ }
+ case ICmpInst::ICMP_ULT:
+ switch (RHSCC) {
+ default: llvm_unreachable("Unknown integer condition code!");
+ case ICmpInst::ICMP_EQ: // (X u< 13 | X == 14) -> no change
+ break;
+ case ICmpInst::ICMP_UGT: // (X u< 13 | X u> 15) -> (X-13) u> 2
+ // If RHSCst is [us]MAXINT, it is always false. Not handling
+ // this can cause overflow.
+ if (RHSCst->isMaxValue(false))
+ return LHS;
+ return InsertRangeTest(Val, LHSCst, AddOne(RHSCst), false, false);
+ case ICmpInst::ICMP_SGT: // (X u< 13 | X s> 15) -> no change
+ break;
+ case ICmpInst::ICMP_NE: // (X u< 13 | X != 15) -> X != 15
+ case ICmpInst::ICMP_ULT: // (X u< 13 | X u< 15) -> X u< 15
+ return RHS;
+ case ICmpInst::ICMP_SLT: // (X u< 13 | X s< 15) -> no change
+ break;
+ }
+ break;
+ case ICmpInst::ICMP_SLT:
+ switch (RHSCC) {
+ default: llvm_unreachable("Unknown integer condition code!");
+ case ICmpInst::ICMP_EQ: // (X s< 13 | X == 14) -> no change
+ break;
+ case ICmpInst::ICMP_SGT: // (X s< 13 | X s> 15) -> (X-13) s> 2
+ // If RHSCst is [us]MAXINT, it is always false. Not handling
+ // this can cause overflow.
+ if (RHSCst->isMaxValue(true))
+ return LHS;
+ return InsertRangeTest(Val, LHSCst, AddOne(RHSCst), true, false);
+ case ICmpInst::ICMP_UGT: // (X s< 13 | X u> 15) -> no change
+ break;
+ case ICmpInst::ICMP_NE: // (X s< 13 | X != 15) -> X != 15
+ case ICmpInst::ICMP_SLT: // (X s< 13 | X s< 15) -> X s< 15
+ return RHS;
+ case ICmpInst::ICMP_ULT: // (X s< 13 | X u< 15) -> no change
+ break;
+ }
+ break;
+ case ICmpInst::ICMP_UGT:
+ switch (RHSCC) {
+ default: llvm_unreachable("Unknown integer condition code!");
+ case ICmpInst::ICMP_EQ: // (X u> 13 | X == 15) -> X u> 13
+ case ICmpInst::ICMP_UGT: // (X u> 13 | X u> 15) -> X u> 13
+ return LHS;
+ case ICmpInst::ICMP_SGT: // (X u> 13 | X s> 15) -> no change
+ break;
+ case ICmpInst::ICMP_NE: // (X u> 13 | X != 15) -> true
+ case ICmpInst::ICMP_ULT: // (X u> 13 | X u< 15) -> true
+ return Builder->getTrue();
+ case ICmpInst::ICMP_SLT: // (X u> 13 | X s< 15) -> no change
+ break;
+ }
+ break;
+ case ICmpInst::ICMP_SGT:
+ switch (RHSCC) {
+ default: llvm_unreachable("Unknown integer condition code!");
+ case ICmpInst::ICMP_EQ: // (X s> 13 | X == 15) -> X > 13
+ case ICmpInst::ICMP_SGT: // (X s> 13 | X s> 15) -> X > 13
+ return LHS;
+ case ICmpInst::ICMP_UGT: // (X s> 13 | X u> 15) -> no change
+ break;
+ case ICmpInst::ICMP_NE: // (X s> 13 | X != 15) -> true
+ case ICmpInst::ICMP_SLT: // (X s> 13 | X s< 15) -> true
+ return Builder->getTrue();
+ case ICmpInst::ICMP_ULT: // (X s> 13 | X u< 15) -> no change
+ break;
+ }
+ break;
+ }
+ return nullptr;
+}
+
+/// Optimize (fcmp)|(fcmp). NOTE: Unlike the rest of instcombine, this returns
+/// a Value which should already be inserted into the function.
+Value *InstCombiner::FoldOrOfFCmps(FCmpInst *LHS, FCmpInst *RHS) {
+ Value *Op0LHS = LHS->getOperand(0), *Op0RHS = LHS->getOperand(1);
+ Value *Op1LHS = RHS->getOperand(0), *Op1RHS = RHS->getOperand(1);
+ FCmpInst::Predicate Op0CC = LHS->getPredicate(), Op1CC = RHS->getPredicate();
+
+ if (Op0LHS == Op1RHS && Op0RHS == Op1LHS) {
+ // Swap RHS operands to match LHS.
+ Op1CC = FCmpInst::getSwappedPredicate(Op1CC);
+ std::swap(Op1LHS, Op1RHS);
+ }
+
+ // Simplify (fcmp cc0 x, y) | (fcmp cc1 x, y).
+ // This is a similar transformation to the one in FoldAndOfFCmps.
+ //
+ // Since (R & CC0) and (R & CC1) are either R or 0, we actually have this:
+ // bool(R & CC0) || bool(R & CC1)
+ // = bool((R & CC0) | (R & CC1))
+ // = bool(R & (CC0 | CC1)) <= by reversed distribution (contribution? ;)
+ if (Op0LHS == Op1LHS && Op0RHS == Op1RHS)
+ return getFCmpValue(getFCmpCode(Op0CC) | getFCmpCode(Op1CC), Op0LHS, Op0RHS,
+ Builder);
+
+ if (LHS->getPredicate() == FCmpInst::FCMP_UNO &&
+ RHS->getPredicate() == FCmpInst::FCMP_UNO &&
+ LHS->getOperand(0)->getType() == RHS->getOperand(0)->getType()) {
+ if (ConstantFP *LHSC = dyn_cast<ConstantFP>(LHS->getOperand(1)))
+ if (ConstantFP *RHSC = dyn_cast<ConstantFP>(RHS->getOperand(1))) {
+ // If either of the constants are nans, then the whole thing returns
+ // true.
+ if (LHSC->getValueAPF().isNaN() || RHSC->getValueAPF().isNaN())
+ return Builder->getTrue();
+
+ // Otherwise, no need to compare the two constants, compare the
+ // rest.
+ return Builder->CreateFCmpUNO(LHS->getOperand(0), RHS->getOperand(0));
+ }
+
+ // Handle vector zeros. This occurs because the canonical form of
+ // "fcmp uno x,x" is "fcmp uno x, 0".
+ if (isa<ConstantAggregateZero>(LHS->getOperand(1)) &&
+ isa<ConstantAggregateZero>(RHS->getOperand(1)))
+ return Builder->CreateFCmpUNO(LHS->getOperand(0), RHS->getOperand(0));
+
+ return nullptr;
+ }
+
+ return nullptr;
+}
+
+/// This helper function folds:
+///
+/// ((A | B) & C1) | (B & C2)
+///
+/// into:
+///
+/// (A & C1) | B
+///
+/// when the XOR of the two constants is "all ones" (-1).
+Instruction *InstCombiner::FoldOrWithConstants(BinaryOperator &I, Value *Op,
+ Value *A, Value *B, Value *C) {
+ ConstantInt *CI1 = dyn_cast<ConstantInt>(C);
+ if (!CI1) return nullptr;
+
+ Value *V1 = nullptr;
+ ConstantInt *CI2 = nullptr;
+ if (!match(Op, m_And(m_Value(V1), m_ConstantInt(CI2)))) return nullptr;
+
+ APInt Xor = CI1->getValue() ^ CI2->getValue();
+ if (!Xor.isAllOnesValue()) return nullptr;
+
+ if (V1 == A || V1 == B) {
+ Value *NewOp = Builder->CreateAnd((V1 == A) ? B : A, CI1);
+ return BinaryOperator::CreateOr(NewOp, V1);
+ }
+
+ return nullptr;
+}
+
+/// \brief This helper function folds:
+///
+/// ((A | B) & C1) ^ (B & C2)
+///
+/// into:
+///
+/// (A & C1) ^ B
+///
+/// when the XOR of the two constants is "all ones" (-1).
+Instruction *InstCombiner::FoldXorWithConstants(BinaryOperator &I, Value *Op,
+ Value *A, Value *B, Value *C) {
+ ConstantInt *CI1 = dyn_cast<ConstantInt>(C);
+ if (!CI1)
+ return nullptr;
+
+ Value *V1 = nullptr;
+ ConstantInt *CI2 = nullptr;
+ if (!match(Op, m_And(m_Value(V1), m_ConstantInt(CI2))))
+ return nullptr;
+
+ APInt Xor = CI1->getValue() ^ CI2->getValue();
+ if (!Xor.isAllOnesValue())
+ return nullptr;
+
+ if (V1 == A || V1 == B) {
+ Value *NewOp = Builder->CreateAnd(V1 == A ? B : A, CI1);
+ return BinaryOperator::CreateXor(NewOp, V1);
+ }
+
+ return nullptr;
+}
+
+Instruction *InstCombiner::visitOr(BinaryOperator &I) {
+ bool Changed = SimplifyAssociativeOrCommutative(I);
+ Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
+
+ if (Value *V = SimplifyVectorOp(I))
+ return replaceInstUsesWith(I, V);
+
+ if (Value *V = SimplifyOrInst(Op0, Op1, DL, TLI, DT, AC))
+ return replaceInstUsesWith(I, V);
+
+ // (A&B)|(A&C) -> A&(B|C) etc
+ if (Value *V = SimplifyUsingDistributiveLaws(I))
+ return replaceInstUsesWith(I, V);
+
+ // See if we can simplify any instructions used by the instruction whose sole
+ // purpose is to compute bits we don't care about.
+ if (SimplifyDemandedInstructionBits(I))
+ return &I;
+
+ if (Value *V = SimplifyBSwap(I))
+ return replaceInstUsesWith(I, V);
+
+ if (ConstantInt *RHS = dyn_cast<ConstantInt>(Op1)) {
+ ConstantInt *C1 = nullptr; Value *X = nullptr;
+ // (X & C1) | C2 --> (X | C2) & (C1|C2)
+ // iff (C1 & C2) == 0.
+ if (match(Op0, m_And(m_Value(X), m_ConstantInt(C1))) &&
+ (RHS->getValue() & C1->getValue()) != 0 &&
+ Op0->hasOneUse()) {
+ Value *Or = Builder->CreateOr(X, RHS);
+ Or->takeName(Op0);
+ return BinaryOperator::CreateAnd(Or,
+ Builder->getInt(RHS->getValue() | C1->getValue()));
+ }
+
+ // (X ^ C1) | C2 --> (X | C2) ^ (C1&~C2)
+ if (match(Op0, m_Xor(m_Value(X), m_ConstantInt(C1))) &&
+ Op0->hasOneUse()) {
+ Value *Or = Builder->CreateOr(X, RHS);
+ Or->takeName(Op0);
+ return BinaryOperator::CreateXor(Or,
+ Builder->getInt(C1->getValue() & ~RHS->getValue()));
+ }
+
+ // Try to fold constant and into select arguments.
+ if (SelectInst *SI = dyn_cast<SelectInst>(Op0))
+ if (Instruction *R = FoldOpIntoSelect(I, SI))
+ return R;
+
+ if (isa<PHINode>(Op0))
+ if (Instruction *NV = FoldOpIntoPhi(I))
+ return NV;
+ }
+
+ // Given an OR instruction, check to see if this is a bswap.
+ if (Instruction *BSwap = MatchBSwap(I))
+ return BSwap;
+
+ Value *A = nullptr, *B = nullptr;
+ ConstantInt *C1 = nullptr, *C2 = nullptr;
+
+ // (X^C)|Y -> (X|Y)^C iff Y&C == 0
+ if (Op0->hasOneUse() &&
+ match(Op0, m_Xor(m_Value(A), m_ConstantInt(C1))) &&
+ MaskedValueIsZero(Op1, C1->getValue(), 0, &I)) {
+ Value *NOr = Builder->CreateOr(A, Op1);
+ NOr->takeName(Op0);
+ return BinaryOperator::CreateXor(NOr, C1);
+ }
+
+ // Y|(X^C) -> (X|Y)^C iff Y&C == 0
+ if (Op1->hasOneUse() &&
+ match(Op1, m_Xor(m_Value(A), m_ConstantInt(C1))) &&
+ MaskedValueIsZero(Op0, C1->getValue(), 0, &I)) {
+ Value *NOr = Builder->CreateOr(A, Op0);
+ NOr->takeName(Op0);
+ return BinaryOperator::CreateXor(NOr, C1);
+ }
+
+ // ((~A & B) | A) -> (A | B)
+ if (match(Op0, m_And(m_Not(m_Value(A)), m_Value(B))) &&
+ match(Op1, m_Specific(A)))
+ return BinaryOperator::CreateOr(A, B);
+
+ // ((A & B) | ~A) -> (~A | B)
+ if (match(Op0, m_And(m_Value(A), m_Value(B))) &&
+ match(Op1, m_Not(m_Specific(A))))
+ return BinaryOperator::CreateOr(Builder->CreateNot(A), B);
+
+ // (A & (~B)) | (A ^ B) -> (A ^ B)
+ if (match(Op0, m_And(m_Value(A), m_Not(m_Value(B)))) &&
+ match(Op1, m_Xor(m_Specific(A), m_Specific(B))))
+ return BinaryOperator::CreateXor(A, B);
+
+ // (A ^ B) | ( A & (~B)) -> (A ^ B)
+ if (match(Op0, m_Xor(m_Value(A), m_Value(B))) &&
+ match(Op1, m_And(m_Specific(A), m_Not(m_Specific(B)))))
+ return BinaryOperator::CreateXor(A, B);
+
+ // (A & C)|(B & D)
+ Value *C = nullptr, *D = nullptr;
+ if (match(Op0, m_And(m_Value(A), m_Value(C))) &&
+ match(Op1, m_And(m_Value(B), m_Value(D)))) {
+ Value *V1 = nullptr, *V2 = nullptr;
+ C1 = dyn_cast<ConstantInt>(C);
+ C2 = dyn_cast<ConstantInt>(D);
+ if (C1 && C2) { // (A & C1)|(B & C2)
+ if ((C1->getValue() & C2->getValue()) == 0) {
+ // ((V | N) & C1) | (V & C2) --> (V|N) & (C1|C2)
+ // iff (C1&C2) == 0 and (N&~C1) == 0
+ if (match(A, m_Or(m_Value(V1), m_Value(V2))) &&
+ ((V1 == B &&
+ MaskedValueIsZero(V2, ~C1->getValue(), 0, &I)) || // (V|N)
+ (V2 == B &&
+ MaskedValueIsZero(V1, ~C1->getValue(), 0, &I)))) // (N|V)
+ return BinaryOperator::CreateAnd(A,
+ Builder->getInt(C1->getValue()|C2->getValue()));
+ // Or commutes, try both ways.
+ if (match(B, m_Or(m_Value(V1), m_Value(V2))) &&
+ ((V1 == A &&
+ MaskedValueIsZero(V2, ~C2->getValue(), 0, &I)) || // (V|N)
+ (V2 == A &&
+ MaskedValueIsZero(V1, ~C2->getValue(), 0, &I)))) // (N|V)
+ return BinaryOperator::CreateAnd(B,
+ Builder->getInt(C1->getValue()|C2->getValue()));
+
+ // ((V|C3)&C1) | ((V|C4)&C2) --> (V|C3|C4)&(C1|C2)
+ // iff (C1&C2) == 0 and (C3&~C1) == 0 and (C4&~C2) == 0.
+ ConstantInt *C3 = nullptr, *C4 = nullptr;
+ if (match(A, m_Or(m_Value(V1), m_ConstantInt(C3))) &&
+ (C3->getValue() & ~C1->getValue()) == 0 &&
+ match(B, m_Or(m_Specific(V1), m_ConstantInt(C4))) &&
+ (C4->getValue() & ~C2->getValue()) == 0) {
+ V2 = Builder->CreateOr(V1, ConstantExpr::getOr(C3, C4), "bitfield");
+ return BinaryOperator::CreateAnd(V2,
+ Builder->getInt(C1->getValue()|C2->getValue()));
+ }
+ }
+ }
+
+ // Don't try to form a select if it's unlikely that we'll get rid of at
+ // least one of the operands. A select is generally more expensive than the
+ // 'or' that it is replacing.
+ if (Op0->hasOneUse() || Op1->hasOneUse()) {
+ // (Cond & C) | (~Cond & D) -> Cond ? C : D, and commuted variants.
+ if (Value *V = matchSelectFromAndOr(A, C, B, D, *Builder))
+ return replaceInstUsesWith(I, V);
+ if (Value *V = matchSelectFromAndOr(A, C, D, B, *Builder))
+ return replaceInstUsesWith(I, V);
+ if (Value *V = matchSelectFromAndOr(C, A, B, D, *Builder))
+ return replaceInstUsesWith(I, V);
+ if (Value *V = matchSelectFromAndOr(C, A, D, B, *Builder))
+ return replaceInstUsesWith(I, V);
+ if (Value *V = matchSelectFromAndOr(B, D, A, C, *Builder))
+ return replaceInstUsesWith(I, V);
+ if (Value *V = matchSelectFromAndOr(B, D, C, A, *Builder))
+ return replaceInstUsesWith(I, V);
+ if (Value *V = matchSelectFromAndOr(D, B, A, C, *Builder))
+ return replaceInstUsesWith(I, V);
+ if (Value *V = matchSelectFromAndOr(D, B, C, A, *Builder))
+ return replaceInstUsesWith(I, V);
+ }
+
+ // ((A&~B)|(~A&B)) -> A^B
+ if ((match(C, m_Not(m_Specific(D))) &&
+ match(B, m_Not(m_Specific(A)))))
+ return BinaryOperator::CreateXor(A, D);
+ // ((~B&A)|(~A&B)) -> A^B
+ if ((match(A, m_Not(m_Specific(D))) &&
+ match(B, m_Not(m_Specific(C)))))
+ return BinaryOperator::CreateXor(C, D);
+ // ((A&~B)|(B&~A)) -> A^B
+ if ((match(C, m_Not(m_Specific(B))) &&
+ match(D, m_Not(m_Specific(A)))))
+ return BinaryOperator::CreateXor(A, B);
+ // ((~B&A)|(B&~A)) -> A^B
+ if ((match(A, m_Not(m_Specific(B))) &&
+ match(D, m_Not(m_Specific(C)))))
+ return BinaryOperator::CreateXor(C, B);
+
+ // ((A|B)&1)|(B&-2) -> (A&1) | B
+ if (match(A, m_Or(m_Value(V1), m_Specific(B))) ||
+ match(A, m_Or(m_Specific(B), m_Value(V1)))) {
+ Instruction *Ret = FoldOrWithConstants(I, Op1, V1, B, C);
+ if (Ret) return Ret;
+ }
+ // (B&-2)|((A|B)&1) -> (A&1) | B
+ if (match(B, m_Or(m_Specific(A), m_Value(V1))) ||
+ match(B, m_Or(m_Value(V1), m_Specific(A)))) {
+ Instruction *Ret = FoldOrWithConstants(I, Op0, A, V1, D);
+ if (Ret) return Ret;
+ }
+ // ((A^B)&1)|(B&-2) -> (A&1) ^ B
+ if (match(A, m_Xor(m_Value(V1), m_Specific(B))) ||
+ match(A, m_Xor(m_Specific(B), m_Value(V1)))) {
+ Instruction *Ret = FoldXorWithConstants(I, Op1, V1, B, C);
+ if (Ret) return Ret;
+ }
+ // (B&-2)|((A^B)&1) -> (A&1) ^ B
+ if (match(B, m_Xor(m_Specific(A), m_Value(V1))) ||
+ match(B, m_Xor(m_Value(V1), m_Specific(A)))) {
+ Instruction *Ret = FoldXorWithConstants(I, Op0, A, V1, D);
+ if (Ret) return Ret;
+ }
+ }
+
+ // (A ^ B) | ((B ^ C) ^ A) -> (A ^ B) | C
+ if (match(Op0, m_Xor(m_Value(A), m_Value(B))))
+ if (match(Op1, m_Xor(m_Xor(m_Specific(B), m_Value(C)), m_Specific(A))))
+ if (Op1->hasOneUse() || cast<BinaryOperator>(Op1)->hasOneUse())
+ return BinaryOperator::CreateOr(Op0, C);
+
+ // ((A ^ C) ^ B) | (B ^ A) -> (B ^ A) | C
+ if (match(Op0, m_Xor(m_Xor(m_Value(A), m_Value(C)), m_Value(B))))
+ if (match(Op1, m_Xor(m_Specific(B), m_Specific(A))))
+ if (Op0->hasOneUse() || cast<BinaryOperator>(Op0)->hasOneUse())
+ return BinaryOperator::CreateOr(Op1, C);
+
+ // ((B | C) & A) | B -> B | (A & C)
+ if (match(Op0, m_And(m_Or(m_Specific(Op1), m_Value(C)), m_Value(A))))
+ return BinaryOperator::CreateOr(Op1, Builder->CreateAnd(A, C));
+
+ if (Instruction *DeMorgan = matchDeMorgansLaws(I, Builder))
+ return DeMorgan;
+
+ // Canonicalize xor to the RHS.
+ bool SwappedForXor = false;
+ if (match(Op0, m_Xor(m_Value(), m_Value()))) {
+ std::swap(Op0, Op1);
+ SwappedForXor = true;
+ }
+
+ // A | ( A ^ B) -> A | B
+ // A | (~A ^ B) -> A | ~B
+ // (A & B) | (A ^ B)
+ if (match(Op1, m_Xor(m_Value(A), m_Value(B)))) {
+ if (Op0 == A || Op0 == B)
+ return BinaryOperator::CreateOr(A, B);
+
+ if (match(Op0, m_And(m_Specific(A), m_Specific(B))) ||
+ match(Op0, m_And(m_Specific(B), m_Specific(A))))
+ return BinaryOperator::CreateOr(A, B);
+
+ if (Op1->hasOneUse() && match(A, m_Not(m_Specific(Op0)))) {
+ Value *Not = Builder->CreateNot(B, B->getName()+".not");
+ return BinaryOperator::CreateOr(Not, Op0);
+ }
+ if (Op1->hasOneUse() && match(B, m_Not(m_Specific(Op0)))) {
+ Value *Not = Builder->CreateNot(A, A->getName()+".not");
+ return BinaryOperator::CreateOr(Not, Op0);
+ }
+ }
+
+ // A | ~(A | B) -> A | ~B
+ // A | ~(A ^ B) -> A | ~B
+ if (match(Op1, m_Not(m_Value(A))))
+ if (BinaryOperator *B = dyn_cast<BinaryOperator>(A))
+ if ((Op0 == B->getOperand(0) || Op0 == B->getOperand(1)) &&
+ Op1->hasOneUse() && (B->getOpcode() == Instruction::Or ||
+ B->getOpcode() == Instruction::Xor)) {
+ Value *NotOp = Op0 == B->getOperand(0) ? B->getOperand(1) :
+ B->getOperand(0);
+ Value *Not = Builder->CreateNot(NotOp, NotOp->getName()+".not");
+ return BinaryOperator::CreateOr(Not, Op0);
+ }
+
+ // (A & B) | ((~A) ^ B) -> (~A ^ B)
+ if (match(Op0, m_And(m_Value(A), m_Value(B))) &&
+ match(Op1, m_Xor(m_Not(m_Specific(A)), m_Specific(B))))
+ return BinaryOperator::CreateXor(Builder->CreateNot(A), B);
+
+ // ((~A) ^ B) | (A & B) -> (~A ^ B)
+ if (match(Op0, m_Xor(m_Not(m_Value(A)), m_Value(B))) &&
+ match(Op1, m_And(m_Specific(A), m_Specific(B))))
+ return BinaryOperator::CreateXor(Builder->CreateNot(A), B);
+
+ if (SwappedForXor)
+ std::swap(Op0, Op1);
+
+ {
+ ICmpInst *LHS = dyn_cast<ICmpInst>(Op0);
+ ICmpInst *RHS = dyn_cast<ICmpInst>(Op1);
+ if (LHS && RHS)
+ if (Value *Res = FoldOrOfICmps(LHS, RHS, &I))
+ return replaceInstUsesWith(I, Res);
+
+ // TODO: Make this recursive; it's a little tricky because an arbitrary
+ // number of 'or' instructions might have to be created.
+ Value *X, *Y;
+ if (LHS && match(Op1, m_OneUse(m_Or(m_Value(X), m_Value(Y))))) {
+ if (auto *Cmp = dyn_cast<ICmpInst>(X))
+ if (Value *Res = FoldOrOfICmps(LHS, Cmp, &I))
+ return replaceInstUsesWith(I, Builder->CreateOr(Res, Y));
+ if (auto *Cmp = dyn_cast<ICmpInst>(Y))
+ if (Value *Res = FoldOrOfICmps(LHS, Cmp, &I))
+ return replaceInstUsesWith(I, Builder->CreateOr(Res, X));
+ }
+ if (RHS && match(Op0, m_OneUse(m_Or(m_Value(X), m_Value(Y))))) {
+ if (auto *Cmp = dyn_cast<ICmpInst>(X))
+ if (Value *Res = FoldOrOfICmps(Cmp, RHS, &I))
+ return replaceInstUsesWith(I, Builder->CreateOr(Res, Y));
+ if (auto *Cmp = dyn_cast<ICmpInst>(Y))
+ if (Value *Res = FoldOrOfICmps(Cmp, RHS, &I))
+ return replaceInstUsesWith(I, Builder->CreateOr(Res, X));
+ }
+ }
+
+ // (fcmp uno x, c) | (fcmp uno y, c) -> (fcmp uno x, y)
+ if (FCmpInst *LHS = dyn_cast<FCmpInst>(I.getOperand(0)))
+ if (FCmpInst *RHS = dyn_cast<FCmpInst>(I.getOperand(1)))
+ if (Value *Res = FoldOrOfFCmps(LHS, RHS))
+ return replaceInstUsesWith(I, Res);
+
+ if (Instruction *CastedOr = foldCastedBitwiseLogic(I))
+ return CastedOr;
+
+ // or(sext(A), B) / or(B, sext(A)) --> A ? -1 : B, where A is i1 or <N x i1>.
+ if (match(Op0, m_OneUse(m_SExt(m_Value(A)))) &&
+ A->getType()->getScalarType()->isIntegerTy(1))
+ return SelectInst::Create(A, ConstantInt::getSigned(I.getType(), -1), Op1);
+ if (match(Op1, m_OneUse(m_SExt(m_Value(A)))) &&
+ A->getType()->getScalarType()->isIntegerTy(1))
+ return SelectInst::Create(A, ConstantInt::getSigned(I.getType(), -1), Op0);
+
+ // Note: If we've gotten to the point of visiting the outer OR, then the
+ // inner one couldn't be simplified. If it was a constant, then it won't
+ // be simplified by a later pass either, so we try swapping the inner/outer
+ // ORs in the hopes that we'll be able to simplify it this way.
+ // (X|C) | V --> (X|V) | C
+ if (Op0->hasOneUse() && !isa<ConstantInt>(Op1) &&
+ match(Op0, m_Or(m_Value(A), m_ConstantInt(C1)))) {
+ Value *Inner = Builder->CreateOr(A, Op1);
+ Inner->takeName(Op0);
+ return BinaryOperator::CreateOr(Inner, C1);
+ }
+
+ // Change (or (bool?A:B),(bool?C:D)) --> (bool?(or A,C):(or B,D))
+ // Since this OR statement hasn't been optimized further yet, we hope
+ // that this transformation will allow the new ORs to be optimized.
+ {
+ Value *X = nullptr, *Y = nullptr;
+ if (Op0->hasOneUse() && Op1->hasOneUse() &&
+ match(Op0, m_Select(m_Value(X), m_Value(A), m_Value(B))) &&
+ match(Op1, m_Select(m_Value(Y), m_Value(C), m_Value(D))) && X == Y) {
+ Value *orTrue = Builder->CreateOr(A, C);
+ Value *orFalse = Builder->CreateOr(B, D);
+ return SelectInst::Create(X, orTrue, orFalse);
+ }
+ }
+
+ return Changed ? &I : nullptr;
+}
+
+Instruction *InstCombiner::visitXor(BinaryOperator &I) {
+ bool Changed = SimplifyAssociativeOrCommutative(I);
+ Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
+
+ if (Value *V = SimplifyVectorOp(I))
+ return replaceInstUsesWith(I, V);
+
+ if (Value *V = SimplifyXorInst(Op0, Op1, DL, TLI, DT, AC))
+ return replaceInstUsesWith(I, V);
+
+ // (A&B)^(A&C) -> A&(B^C) etc
+ if (Value *V = SimplifyUsingDistributiveLaws(I))
+ return replaceInstUsesWith(I, V);
+
+ // See if we can simplify any instructions used by the instruction whose sole
+ // purpose is to compute bits we don't care about.
+ if (SimplifyDemandedInstructionBits(I))
+ return &I;
+
+ if (Value *V = SimplifyBSwap(I))
+ return replaceInstUsesWith(I, V);
+
+ // Is this a ~ operation?
+ if (Value *NotOp = dyn_castNotVal(&I)) {
+ if (BinaryOperator *Op0I = dyn_cast<BinaryOperator>(NotOp)) {
+ if (Op0I->getOpcode() == Instruction::And ||
+ Op0I->getOpcode() == Instruction::Or) {
+ // ~(~X & Y) --> (X | ~Y) - De Morgan's Law
+ // ~(~X | Y) === (X & ~Y) - De Morgan's Law
+ if (dyn_castNotVal(Op0I->getOperand(1)))
+ Op0I->swapOperands();
+ if (Value *Op0NotVal = dyn_castNotVal(Op0I->getOperand(0))) {
+ Value *NotY =
+ Builder->CreateNot(Op0I->getOperand(1),
+ Op0I->getOperand(1)->getName()+".not");
+ if (Op0I->getOpcode() == Instruction::And)
+ return BinaryOperator::CreateOr(Op0NotVal, NotY);
+ return BinaryOperator::CreateAnd(Op0NotVal, NotY);
+ }
+
+ // ~(X & Y) --> (~X | ~Y) - De Morgan's Law
+ // ~(X | Y) === (~X & ~Y) - De Morgan's Law
+ if (IsFreeToInvert(Op0I->getOperand(0),
+ Op0I->getOperand(0)->hasOneUse()) &&
+ IsFreeToInvert(Op0I->getOperand(1),
+ Op0I->getOperand(1)->hasOneUse())) {
+ Value *NotX =
+ Builder->CreateNot(Op0I->getOperand(0), "notlhs");
+ Value *NotY =
+ Builder->CreateNot(Op0I->getOperand(1), "notrhs");
+ if (Op0I->getOpcode() == Instruction::And)
+ return BinaryOperator::CreateOr(NotX, NotY);
+ return BinaryOperator::CreateAnd(NotX, NotY);
+ }
+
+ } else if (Op0I->getOpcode() == Instruction::AShr) {
+ // ~(~X >>s Y) --> (X >>s Y)
+ if (Value *Op0NotVal = dyn_castNotVal(Op0I->getOperand(0)))
+ return BinaryOperator::CreateAShr(Op0NotVal, Op0I->getOperand(1));
+ }
+ }
+ }
+
+ if (Constant *RHS = dyn_cast<Constant>(Op1)) {
+ if (RHS->isAllOnesValue() && Op0->hasOneUse())
+ // xor (cmp A, B), true = not (cmp A, B) = !cmp A, B
+ if (CmpInst *CI = dyn_cast<CmpInst>(Op0))
+ return CmpInst::Create(CI->getOpcode(),
+ CI->getInversePredicate(),
+ CI->getOperand(0), CI->getOperand(1));
+ }
+
+ if (ConstantInt *RHS = dyn_cast<ConstantInt>(Op1)) {
+ // fold (xor(zext(cmp)), 1) and (xor(sext(cmp)), -1) to ext(!cmp).
+ if (CastInst *Op0C = dyn_cast<CastInst>(Op0)) {
+ if (CmpInst *CI = dyn_cast<CmpInst>(Op0C->getOperand(0))) {
+ if (CI->hasOneUse() && Op0C->hasOneUse()) {
+ Instruction::CastOps Opcode = Op0C->getOpcode();
+ if ((Opcode == Instruction::ZExt || Opcode == Instruction::SExt) &&
+ (RHS == ConstantExpr::getCast(Opcode, Builder->getTrue(),
+ Op0C->getDestTy()))) {
+ CI->setPredicate(CI->getInversePredicate());
+ return CastInst::Create(Opcode, CI, Op0C->getType());
+ }
+ }
+ }
+ }
+
+ if (BinaryOperator *Op0I = dyn_cast<BinaryOperator>(Op0)) {
+ // ~(c-X) == X-c-1 == X+(-c-1)
+ if (Op0I->getOpcode() == Instruction::Sub && RHS->isAllOnesValue())
+ if (Constant *Op0I0C = dyn_cast<Constant>(Op0I->getOperand(0))) {
+ Constant *NegOp0I0C = ConstantExpr::getNeg(Op0I0C);
+ Constant *ConstantRHS = ConstantExpr::getSub(NegOp0I0C,
+ ConstantInt::get(I.getType(), 1));
+ return BinaryOperator::CreateAdd(Op0I->getOperand(1), ConstantRHS);
+ }
+
+ if (ConstantInt *Op0CI = dyn_cast<ConstantInt>(Op0I->getOperand(1))) {
+ if (Op0I->getOpcode() == Instruction::Add) {
+ // ~(X-c) --> (-c-1)-X
+ if (RHS->isAllOnesValue()) {
+ Constant *NegOp0CI = ConstantExpr::getNeg(Op0CI);
+ return BinaryOperator::CreateSub(
+ ConstantExpr::getSub(NegOp0CI,
+ ConstantInt::get(I.getType(), 1)),
+ Op0I->getOperand(0));
+ } else if (RHS->getValue().isSignBit()) {
+ // (X + C) ^ signbit -> (X + C + signbit)
+ Constant *C = Builder->getInt(RHS->getValue() + Op0CI->getValue());
+ return BinaryOperator::CreateAdd(Op0I->getOperand(0), C);
+
+ }
+ } else if (Op0I->getOpcode() == Instruction::Or) {
+ // (X|C1)^C2 -> X^(C1|C2) iff X&~C1 == 0
+ if (MaskedValueIsZero(Op0I->getOperand(0), Op0CI->getValue(),
+ 0, &I)) {
+ Constant *NewRHS = ConstantExpr::getOr(Op0CI, RHS);
+ // Anything in both C1 and C2 is known to be zero, remove it from
+ // NewRHS.
+ Constant *CommonBits = ConstantExpr::getAnd(Op0CI, RHS);
+ NewRHS = ConstantExpr::getAnd(NewRHS,
+ ConstantExpr::getNot(CommonBits));
+ Worklist.Add(Op0I);
+ I.setOperand(0, Op0I->getOperand(0));
+ I.setOperand(1, NewRHS);
+ return &I;
+ }
+ } else if (Op0I->getOpcode() == Instruction::LShr) {
+ // ((X^C1) >> C2) ^ C3 -> (X>>C2) ^ ((C1>>C2)^C3)
+ // E1 = "X ^ C1"
+ BinaryOperator *E1;
+ ConstantInt *C1;
+ if (Op0I->hasOneUse() &&
+ (E1 = dyn_cast<BinaryOperator>(Op0I->getOperand(0))) &&
+ E1->getOpcode() == Instruction::Xor &&
+ (C1 = dyn_cast<ConstantInt>(E1->getOperand(1)))) {
+ // fold (C1 >> C2) ^ C3
+ ConstantInt *C2 = Op0CI, *C3 = RHS;
+ APInt FoldConst = C1->getValue().lshr(C2->getValue());
+ FoldConst ^= C3->getValue();
+ // Prepare the two operands.
+ Value *Opnd0 = Builder->CreateLShr(E1->getOperand(0), C2);
+ Opnd0->takeName(Op0I);
+ cast<Instruction>(Opnd0)->setDebugLoc(I.getDebugLoc());
+ Value *FoldVal = ConstantInt::get(Opnd0->getType(), FoldConst);
+
+ return BinaryOperator::CreateXor(Opnd0, FoldVal);
+ }
+ }
+ }
+ }
+
+ // Try to fold constant and into select arguments.
+ if (SelectInst *SI = dyn_cast<SelectInst>(Op0))
+ if (Instruction *R = FoldOpIntoSelect(I, SI))
+ return R;
+ if (isa<PHINode>(Op0))
+ if (Instruction *NV = FoldOpIntoPhi(I))
+ return NV;
+ }
+
+ BinaryOperator *Op1I = dyn_cast<BinaryOperator>(Op1);
+ if (Op1I) {
+ Value *A, *B;
+ if (match(Op1I, m_Or(m_Value(A), m_Value(B)))) {
+ if (A == Op0) { // B^(B|A) == (A|B)^B
+ Op1I->swapOperands();
+ I.swapOperands();
+ std::swap(Op0, Op1);
+ } else if (B == Op0) { // B^(A|B) == (A|B)^B
+ I.swapOperands(); // Simplified below.
+ std::swap(Op0, Op1);
+ }
+ } else if (match(Op1I, m_And(m_Value(A), m_Value(B))) &&
+ Op1I->hasOneUse()){
+ if (A == Op0) { // A^(A&B) -> A^(B&A)
+ Op1I->swapOperands();
+ std::swap(A, B);
+ }
+ if (B == Op0) { // A^(B&A) -> (B&A)^A
+ I.swapOperands(); // Simplified below.
+ std::swap(Op0, Op1);
+ }
+ }
+ }
+
+ BinaryOperator *Op0I = dyn_cast<BinaryOperator>(Op0);
+ if (Op0I) {
+ Value *A, *B;
+ if (match(Op0I, m_Or(m_Value(A), m_Value(B))) &&
+ Op0I->hasOneUse()) {
+ if (A == Op1) // (B|A)^B == (A|B)^B
+ std::swap(A, B);
+ if (B == Op1) // (A|B)^B == A & ~B
+ return BinaryOperator::CreateAnd(A, Builder->CreateNot(Op1));
+ } else if (match(Op0I, m_And(m_Value(A), m_Value(B))) &&
+ Op0I->hasOneUse()){
+ if (A == Op1) // (A&B)^A -> (B&A)^A
+ std::swap(A, B);
+ if (B == Op1 && // (B&A)^A == ~B & A
+ !isa<ConstantInt>(Op1)) { // Canonical form is (B&C)^C
+ return BinaryOperator::CreateAnd(Builder->CreateNot(A), Op1);
+ }
+ }
+ }
+
+ if (Op0I && Op1I) {
+ Value *A, *B, *C, *D;
+ // (A & B)^(A | B) -> A ^ B
+ if (match(Op0I, m_And(m_Value(A), m_Value(B))) &&
+ match(Op1I, m_Or(m_Value(C), m_Value(D)))) {
+ if ((A == C && B == D) || (A == D && B == C))
+ return BinaryOperator::CreateXor(A, B);
+ }
+ // (A | B)^(A & B) -> A ^ B
+ if (match(Op0I, m_Or(m_Value(A), m_Value(B))) &&
+ match(Op1I, m_And(m_Value(C), m_Value(D)))) {
+ if ((A == C && B == D) || (A == D && B == C))
+ return BinaryOperator::CreateXor(A, B);
+ }
+ // (A | ~B) ^ (~A | B) -> A ^ B
+ if (match(Op0I, m_Or(m_Value(A), m_Not(m_Value(B)))) &&
+ match(Op1I, m_Or(m_Not(m_Specific(A)), m_Specific(B)))) {
+ return BinaryOperator::CreateXor(A, B);
+ }
+ // (~A | B) ^ (A | ~B) -> A ^ B
+ if (match(Op0I, m_Or(m_Not(m_Value(A)), m_Value(B))) &&
+ match(Op1I, m_Or(m_Specific(A), m_Not(m_Specific(B))))) {
+ return BinaryOperator::CreateXor(A, B);
+ }
+ // (A & ~B) ^ (~A & B) -> A ^ B
+ if (match(Op0I, m_And(m_Value(A), m_Not(m_Value(B)))) &&
+ match(Op1I, m_And(m_Not(m_Specific(A)), m_Specific(B)))) {
+ return BinaryOperator::CreateXor(A, B);
+ }
+ // (~A & B) ^ (A & ~B) -> A ^ B
+ if (match(Op0I, m_And(m_Not(m_Value(A)), m_Value(B))) &&
+ match(Op1I, m_And(m_Specific(A), m_Not(m_Specific(B))))) {
+ return BinaryOperator::CreateXor(A, B);
+ }
+ // (A ^ C)^(A | B) -> ((~A) & B) ^ C
+ if (match(Op0I, m_Xor(m_Value(D), m_Value(C))) &&
+ match(Op1I, m_Or(m_Value(A), m_Value(B)))) {
+ if (D == A)
+ return BinaryOperator::CreateXor(
+ Builder->CreateAnd(Builder->CreateNot(A), B), C);
+ if (D == B)
+ return BinaryOperator::CreateXor(
+ Builder->CreateAnd(Builder->CreateNot(B), A), C);
+ }
+ // (A | B)^(A ^ C) -> ((~A) & B) ^ C
+ if (match(Op0I, m_Or(m_Value(A), m_Value(B))) &&
+ match(Op1I, m_Xor(m_Value(D), m_Value(C)))) {
+ if (D == A)
+ return BinaryOperator::CreateXor(
+ Builder->CreateAnd(Builder->CreateNot(A), B), C);
+ if (D == B)
+ return BinaryOperator::CreateXor(
+ Builder->CreateAnd(Builder->CreateNot(B), A), C);
+ }
+ // (A & B) ^ (A ^ B) -> (A | B)
+ if (match(Op0I, m_And(m_Value(A), m_Value(B))) &&
+ match(Op1I, m_Xor(m_Specific(A), m_Specific(B))))
+ return BinaryOperator::CreateOr(A, B);
+ // (A ^ B) ^ (A & B) -> (A | B)
+ if (match(Op0I, m_Xor(m_Value(A), m_Value(B))) &&
+ match(Op1I, m_And(m_Specific(A), m_Specific(B))))
+ return BinaryOperator::CreateOr(A, B);
+ }
+
+ Value *A = nullptr, *B = nullptr;
+ // (A & ~B) ^ (~A) -> ~(A & B)
+ if (match(Op0, m_And(m_Value(A), m_Not(m_Value(B)))) &&
+ match(Op1, m_Not(m_Specific(A))))
+ return BinaryOperator::CreateNot(Builder->CreateAnd(A, B));
+
+ // (icmp1 A, B) ^ (icmp2 A, B) --> (icmp3 A, B)
+ if (ICmpInst *RHS = dyn_cast<ICmpInst>(I.getOperand(1)))
+ if (ICmpInst *LHS = dyn_cast<ICmpInst>(I.getOperand(0)))
+ if (PredicatesFoldable(LHS->getPredicate(), RHS->getPredicate())) {
+ if (LHS->getOperand(0) == RHS->getOperand(1) &&
+ LHS->getOperand(1) == RHS->getOperand(0))
+ LHS->swapOperands();
+ if (LHS->getOperand(0) == RHS->getOperand(0) &&
+ LHS->getOperand(1) == RHS->getOperand(1)) {
+ Value *Op0 = LHS->getOperand(0), *Op1 = LHS->getOperand(1);
+ unsigned Code = getICmpCode(LHS) ^ getICmpCode(RHS);
+ bool isSigned = LHS->isSigned() || RHS->isSigned();
+ return replaceInstUsesWith(I,
+ getNewICmpValue(isSigned, Code, Op0, Op1,
+ Builder));
+ }
+ }
+
+ if (Instruction *CastedXor = foldCastedBitwiseLogic(I))
+ return CastedXor;
+
+ return Changed ? &I : nullptr;
+}
diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
new file mode 100644
index 000000000000..8acff91345d6
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -0,0 +1,3049 @@
+//===- InstCombineCalls.cpp -----------------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the visitCall and visitInvoke functions.
+//
+//===----------------------------------------------------------------------===//
+
+#include "InstCombineInternal.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Analysis/Loads.h"
+#include "llvm/Analysis/MemoryBuiltins.h"
+#include "llvm/IR/CallSite.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/PatternMatch.h"
+#include "llvm/IR/Statepoint.h"
+#include "llvm/Transforms/Utils/BuildLibCalls.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Transforms/Utils/SimplifyLibCalls.h"
+using namespace llvm;
+using namespace PatternMatch;
+
+#define DEBUG_TYPE "instcombine"
+
+STATISTIC(NumSimplified, "Number of library calls simplified");
+
+/// Return the specified type promoted as it would be to pass though a va_arg
+/// area.
+static Type *getPromotedType(Type *Ty) {
+ if (IntegerType* ITy = dyn_cast<IntegerType>(Ty)) {
+ if (ITy->getBitWidth() < 32)
+ return Type::getInt32Ty(Ty->getContext());
+ }
+ return Ty;
+}
+
+/// Given an aggregate type which ultimately holds a single scalar element,
+/// like {{{type}}} or [1 x type], return type.
+static Type *reduceToSingleValueType(Type *T) {
+ while (!T->isSingleValueType()) {
+ if (StructType *STy = dyn_cast<StructType>(T)) {
+ if (STy->getNumElements() == 1)
+ T = STy->getElementType(0);
+ else
+ break;
+ } else if (ArrayType *ATy = dyn_cast<ArrayType>(T)) {
+ if (ATy->getNumElements() == 1)
+ T = ATy->getElementType();
+ else
+ break;
+ } else
+ break;
+ }
+
+ return T;
+}
+
+/// Return a constant boolean vector that has true elements in all positions
+/// where the input constant data vector has an element with the sign bit set.
+static Constant *getNegativeIsTrueBoolVec(ConstantDataVector *V) {
+ SmallVector<Constant *, 32> BoolVec;
+ IntegerType *BoolTy = Type::getInt1Ty(V->getContext());
+ for (unsigned I = 0, E = V->getNumElements(); I != E; ++I) {
+ Constant *Elt = V->getElementAsConstant(I);
+ assert((isa<ConstantInt>(Elt) || isa<ConstantFP>(Elt)) &&
+ "Unexpected constant data vector element type");
+ bool Sign = V->getElementType()->isIntegerTy()
+ ? cast<ConstantInt>(Elt)->isNegative()
+ : cast<ConstantFP>(Elt)->isNegative();
+ BoolVec.push_back(ConstantInt::get(BoolTy, Sign));
+ }
+ return ConstantVector::get(BoolVec);
+}
+
+Instruction *InstCombiner::SimplifyMemTransfer(MemIntrinsic *MI) {
+ unsigned DstAlign = getKnownAlignment(MI->getArgOperand(0), DL, MI, AC, DT);
+ unsigned SrcAlign = getKnownAlignment(MI->getArgOperand(1), DL, MI, AC, DT);
+ unsigned MinAlign = std::min(DstAlign, SrcAlign);
+ unsigned CopyAlign = MI->getAlignment();
+
+ if (CopyAlign < MinAlign) {
+ MI->setAlignment(ConstantInt::get(MI->getAlignmentType(), MinAlign, false));
+ return MI;
+ }
+
+ // If MemCpyInst length is 1/2/4/8 bytes then replace memcpy with
+ // load/store.
+ ConstantInt *MemOpLength = dyn_cast<ConstantInt>(MI->getArgOperand(2));
+ if (!MemOpLength) return nullptr;
+
+ // Source and destination pointer types are always "i8*" for intrinsic. See
+ // if the size is something we can handle with a single primitive load/store.
+ // A single load+store correctly handles overlapping memory in the memmove
+ // case.
+ uint64_t Size = MemOpLength->getLimitedValue();
+ assert(Size && "0-sized memory transferring should be removed already.");
+
+ if (Size > 8 || (Size&(Size-1)))
+ return nullptr; // If not 1/2/4/8 bytes, exit.
+
+ // Use an integer load+store unless we can find something better.
+ unsigned SrcAddrSp =
+ cast<PointerType>(MI->getArgOperand(1)->getType())->getAddressSpace();
+ unsigned DstAddrSp =
+ cast<PointerType>(MI->getArgOperand(0)->getType())->getAddressSpace();
+
+ IntegerType* IntType = IntegerType::get(MI->getContext(), Size<<3);
+ Type *NewSrcPtrTy = PointerType::get(IntType, SrcAddrSp);
+ Type *NewDstPtrTy = PointerType::get(IntType, DstAddrSp);
+
+ // Memcpy forces the use of i8* for the source and destination. That means
+ // that if you're using memcpy to move one double around, you'll get a cast
+ // from double* to i8*. We'd much rather use a double load+store rather than
+ // an i64 load+store, here because this improves the odds that the source or
+ // dest address will be promotable. See if we can find a better type than the
+ // integer datatype.
+ Value *StrippedDest = MI->getArgOperand(0)->stripPointerCasts();
+ MDNode *CopyMD = nullptr;
+ if (StrippedDest != MI->getArgOperand(0)) {
+ Type *SrcETy = cast<PointerType>(StrippedDest->getType())
+ ->getElementType();
+ if (SrcETy->isSized() && DL.getTypeStoreSize(SrcETy) == Size) {
+ // The SrcETy might be something like {{{double}}} or [1 x double]. Rip
+ // down through these levels if so.
+ SrcETy = reduceToSingleValueType(SrcETy);
+
+ if (SrcETy->isSingleValueType()) {
+ NewSrcPtrTy = PointerType::get(SrcETy, SrcAddrSp);
+ NewDstPtrTy = PointerType::get(SrcETy, DstAddrSp);
+
+ // If the memcpy has metadata describing the members, see if we can
+ // get the TBAA tag describing our copy.
+ if (MDNode *M = MI->getMetadata(LLVMContext::MD_tbaa_struct)) {
+ if (M->getNumOperands() == 3 && M->getOperand(0) &&
+ mdconst::hasa<ConstantInt>(M->getOperand(0)) &&
+ mdconst::extract<ConstantInt>(M->getOperand(0))->isNullValue() &&
+ M->getOperand(1) &&
+ mdconst::hasa<ConstantInt>(M->getOperand(1)) &&
+ mdconst::extract<ConstantInt>(M->getOperand(1))->getValue() ==
+ Size &&
+ M->getOperand(2) && isa<MDNode>(M->getOperand(2)))
+ CopyMD = cast<MDNode>(M->getOperand(2));
+ }
+ }
+ }
+ }
+
+ // If the memcpy/memmove provides better alignment info than we can
+ // infer, use it.
+ SrcAlign = std::max(SrcAlign, CopyAlign);
+ DstAlign = std::max(DstAlign, CopyAlign);
+
+ Value *Src = Builder->CreateBitCast(MI->getArgOperand(1), NewSrcPtrTy);
+ Value *Dest = Builder->CreateBitCast(MI->getArgOperand(0), NewDstPtrTy);
+ LoadInst *L = Builder->CreateLoad(Src, MI->isVolatile());
+ L->setAlignment(SrcAlign);
+ if (CopyMD)
+ L->setMetadata(LLVMContext::MD_tbaa, CopyMD);
+ StoreInst *S = Builder->CreateStore(L, Dest, MI->isVolatile());
+ S->setAlignment(DstAlign);
+ if (CopyMD)
+ S->setMetadata(LLVMContext::MD_tbaa, CopyMD);
+
+ // Set the size of the copy to 0, it will be deleted on the next iteration.
+ MI->setArgOperand(2, Constant::getNullValue(MemOpLength->getType()));
+ return MI;
+}
+
+Instruction *InstCombiner::SimplifyMemSet(MemSetInst *MI) {
+ unsigned Alignment = getKnownAlignment(MI->getDest(), DL, MI, AC, DT);
+ if (MI->getAlignment() < Alignment) {
+ MI->setAlignment(ConstantInt::get(MI->getAlignmentType(),
+ Alignment, false));
+ return MI;
+ }
+
+ // Extract the length and alignment and fill if they are constant.
+ ConstantInt *LenC = dyn_cast<ConstantInt>(MI->getLength());
+ ConstantInt *FillC = dyn_cast<ConstantInt>(MI->getValue());
+ if (!LenC || !FillC || !FillC->getType()->isIntegerTy(8))
+ return nullptr;
+ uint64_t Len = LenC->getLimitedValue();
+ Alignment = MI->getAlignment();
+ assert(Len && "0-sized memory setting should be removed already.");
+
+ // memset(s,c,n) -> store s, c (for n=1,2,4,8)
+ if (Len <= 8 && isPowerOf2_32((uint32_t)Len)) {
+ Type *ITy = IntegerType::get(MI->getContext(), Len*8); // n=1 -> i8.
+
+ Value *Dest = MI->getDest();
+ unsigned DstAddrSp = cast<PointerType>(Dest->getType())->getAddressSpace();
+ Type *NewDstPtrTy = PointerType::get(ITy, DstAddrSp);
+ Dest = Builder->CreateBitCast(Dest, NewDstPtrTy);
+
+ // Alignment 0 is identity for alignment 1 for memset, but not store.
+ if (Alignment == 0) Alignment = 1;
+
+ // Extract the fill value and store.
+ uint64_t Fill = FillC->getZExtValue()*0x0101010101010101ULL;
+ StoreInst *S = Builder->CreateStore(ConstantInt::get(ITy, Fill), Dest,
+ MI->isVolatile());
+ S->setAlignment(Alignment);
+
+ // Set the size of the copy to 0, it will be deleted on the next iteration.
+ MI->setLength(Constant::getNullValue(LenC->getType()));
+ return MI;
+ }
+
+ return nullptr;
+}
+
+static Value *simplifyX86immShift(const IntrinsicInst &II,
+ InstCombiner::BuilderTy &Builder) {
+ bool LogicalShift = false;
+ bool ShiftLeft = false;
+
+ switch (II.getIntrinsicID()) {
+ default:
+ return nullptr;
+ case Intrinsic::x86_sse2_psra_d:
+ case Intrinsic::x86_sse2_psra_w:
+ case Intrinsic::x86_sse2_psrai_d:
+ case Intrinsic::x86_sse2_psrai_w:
+ case Intrinsic::x86_avx2_psra_d:
+ case Intrinsic::x86_avx2_psra_w:
+ case Intrinsic::x86_avx2_psrai_d:
+ case Intrinsic::x86_avx2_psrai_w:
+ LogicalShift = false; ShiftLeft = false;
+ break;
+ case Intrinsic::x86_sse2_psrl_d:
+ case Intrinsic::x86_sse2_psrl_q:
+ case Intrinsic::x86_sse2_psrl_w:
+ case Intrinsic::x86_sse2_psrli_d:
+ case Intrinsic::x86_sse2_psrli_q:
+ case Intrinsic::x86_sse2_psrli_w:
+ case Intrinsic::x86_avx2_psrl_d:
+ case Intrinsic::x86_avx2_psrl_q:
+ case Intrinsic::x86_avx2_psrl_w:
+ case Intrinsic::x86_avx2_psrli_d:
+ case Intrinsic::x86_avx2_psrli_q:
+ case Intrinsic::x86_avx2_psrli_w:
+ LogicalShift = true; ShiftLeft = false;
+ break;
+ case Intrinsic::x86_sse2_psll_d:
+ case Intrinsic::x86_sse2_psll_q:
+ case Intrinsic::x86_sse2_psll_w:
+ case Intrinsic::x86_sse2_pslli_d:
+ case Intrinsic::x86_sse2_pslli_q:
+ case Intrinsic::x86_sse2_pslli_w:
+ case Intrinsic::x86_avx2_psll_d:
+ case Intrinsic::x86_avx2_psll_q:
+ case Intrinsic::x86_avx2_psll_w:
+ case Intrinsic::x86_avx2_pslli_d:
+ case Intrinsic::x86_avx2_pslli_q:
+ case Intrinsic::x86_avx2_pslli_w:
+ LogicalShift = true; ShiftLeft = true;
+ break;
+ }
+ assert((LogicalShift || !ShiftLeft) && "Only logical shifts can shift left");
+
+ // Simplify if count is constant.
+ auto Arg1 = II.getArgOperand(1);
+ auto CAZ = dyn_cast<ConstantAggregateZero>(Arg1);
+ auto CDV = dyn_cast<ConstantDataVector>(Arg1);
+ auto CInt = dyn_cast<ConstantInt>(Arg1);
+ if (!CAZ && !CDV && !CInt)
+ return nullptr;
+
+ APInt Count(64, 0);
+ if (CDV) {
+ // SSE2/AVX2 uses all the first 64-bits of the 128-bit vector
+ // operand to compute the shift amount.
+ auto VT = cast<VectorType>(CDV->getType());
+ unsigned BitWidth = VT->getElementType()->getPrimitiveSizeInBits();
+ assert((64 % BitWidth) == 0 && "Unexpected packed shift size");
+ unsigned NumSubElts = 64 / BitWidth;
+
+ // Concatenate the sub-elements to create the 64-bit value.
+ for (unsigned i = 0; i != NumSubElts; ++i) {
+ unsigned SubEltIdx = (NumSubElts - 1) - i;
+ auto SubElt = cast<ConstantInt>(CDV->getElementAsConstant(SubEltIdx));
+ Count = Count.shl(BitWidth);
+ Count |= SubElt->getValue().zextOrTrunc(64);
+ }
+ }
+ else if (CInt)
+ Count = CInt->getValue();
+
+ auto Vec = II.getArgOperand(0);
+ auto VT = cast<VectorType>(Vec->getType());
+ auto SVT = VT->getElementType();
+ unsigned VWidth = VT->getNumElements();
+ unsigned BitWidth = SVT->getPrimitiveSizeInBits();
+
+ // If shift-by-zero then just return the original value.
+ if (Count == 0)
+ return Vec;
+
+ // Handle cases when Shift >= BitWidth.
+ if (Count.uge(BitWidth)) {
+ // If LogicalShift - just return zero.
+ if (LogicalShift)
+ return ConstantAggregateZero::get(VT);
+
+ // If ArithmeticShift - clamp Shift to (BitWidth - 1).
+ Count = APInt(64, BitWidth - 1);
+ }
+
+ // Get a constant vector of the same type as the first operand.
+ auto ShiftAmt = ConstantInt::get(SVT, Count.zextOrTrunc(BitWidth));
+ auto ShiftVec = Builder.CreateVectorSplat(VWidth, ShiftAmt);
+
+ if (ShiftLeft)
+ return Builder.CreateShl(Vec, ShiftVec);
+
+ if (LogicalShift)
+ return Builder.CreateLShr(Vec, ShiftVec);
+
+ return Builder.CreateAShr(Vec, ShiftVec);
+}
+
+// Attempt to simplify AVX2 per-element shift intrinsics to a generic IR shift.
+// Unlike the generic IR shifts, the intrinsics have defined behaviour for out
+// of range shift amounts (logical - set to zero, arithmetic - splat sign bit).
+static Value *simplifyX86varShift(const IntrinsicInst &II,
+ InstCombiner::BuilderTy &Builder) {
+ bool LogicalShift = false;
+ bool ShiftLeft = false;
+
+ switch (II.getIntrinsicID()) {
+ default:
+ return nullptr;
+ case Intrinsic::x86_avx2_psrav_d:
+ case Intrinsic::x86_avx2_psrav_d_256:
+ LogicalShift = false;
+ ShiftLeft = false;
+ break;
+ case Intrinsic::x86_avx2_psrlv_d:
+ case Intrinsic::x86_avx2_psrlv_d_256:
+ case Intrinsic::x86_avx2_psrlv_q:
+ case Intrinsic::x86_avx2_psrlv_q_256:
+ LogicalShift = true;
+ ShiftLeft = false;
+ break;
+ case Intrinsic::x86_avx2_psllv_d:
+ case Intrinsic::x86_avx2_psllv_d_256:
+ case Intrinsic::x86_avx2_psllv_q:
+ case Intrinsic::x86_avx2_psllv_q_256:
+ LogicalShift = true;
+ ShiftLeft = true;
+ break;
+ }
+ assert((LogicalShift || !ShiftLeft) && "Only logical shifts can shift left");
+
+ // Simplify if all shift amounts are constant/undef.
+ auto *CShift = dyn_cast<Constant>(II.getArgOperand(1));
+ if (!CShift)
+ return nullptr;
+
+ auto Vec = II.getArgOperand(0);
+ auto VT = cast<VectorType>(II.getType());
+ auto SVT = VT->getVectorElementType();
+ int NumElts = VT->getNumElements();
+ int BitWidth = SVT->getIntegerBitWidth();
+
+ // Collect each element's shift amount.
+ // We also collect special cases: UNDEF = -1, OUT-OF-RANGE = BitWidth.
+ bool AnyOutOfRange = false;
+ SmallVector<int, 8> ShiftAmts;
+ for (int I = 0; I < NumElts; ++I) {
+ auto *CElt = CShift->getAggregateElement(I);
+ if (CElt && isa<UndefValue>(CElt)) {
+ ShiftAmts.push_back(-1);
+ continue;
+ }
+
+ auto *COp = dyn_cast_or_null<ConstantInt>(CElt);
+ if (!COp)
+ return nullptr;
+
+ // Handle out of range shifts.
+ // If LogicalShift - set to BitWidth (special case).
+ // If ArithmeticShift - set to (BitWidth - 1) (sign splat).
+ APInt ShiftVal = COp->getValue();
+ if (ShiftVal.uge(BitWidth)) {
+ AnyOutOfRange = LogicalShift;
+ ShiftAmts.push_back(LogicalShift ? BitWidth : BitWidth - 1);
+ continue;
+ }
+
+ ShiftAmts.push_back((int)ShiftVal.getZExtValue());
+ }
+
+ // If all elements out of range or UNDEF, return vector of zeros/undefs.
+ // ArithmeticShift should only hit this if they are all UNDEF.
+ auto OutOfRange = [&](int Idx) { return (Idx < 0) || (BitWidth <= Idx); };
+ if (llvm::all_of(ShiftAmts, OutOfRange)) {
+ SmallVector<Constant *, 8> ConstantVec;
+ for (int Idx : ShiftAmts) {
+ if (Idx < 0) {
+ ConstantVec.push_back(UndefValue::get(SVT));
+ } else {
+ assert(LogicalShift && "Logical shift expected");
+ ConstantVec.push_back(ConstantInt::getNullValue(SVT));
+ }
+ }
+ return ConstantVector::get(ConstantVec);
+ }
+
+ // We can't handle only some out of range values with generic logical shifts.
+ if (AnyOutOfRange)
+ return nullptr;
+
+ // Build the shift amount constant vector.
+ SmallVector<Constant *, 8> ShiftVecAmts;
+ for (int Idx : ShiftAmts) {
+ if (Idx < 0)
+ ShiftVecAmts.push_back(UndefValue::get(SVT));
+ else
+ ShiftVecAmts.push_back(ConstantInt::get(SVT, Idx));
+ }
+ auto ShiftVec = ConstantVector::get(ShiftVecAmts);
+
+ if (ShiftLeft)
+ return Builder.CreateShl(Vec, ShiftVec);
+
+ if (LogicalShift)
+ return Builder.CreateLShr(Vec, ShiftVec);
+
+ return Builder.CreateAShr(Vec, ShiftVec);
+}
+
+static Value *simplifyX86movmsk(const IntrinsicInst &II,
+ InstCombiner::BuilderTy &Builder) {
+ Value *Arg = II.getArgOperand(0);
+ Type *ResTy = II.getType();
+ Type *ArgTy = Arg->getType();
+
+ // movmsk(undef) -> zero as we must ensure the upper bits are zero.
+ if (isa<UndefValue>(Arg))
+ return Constant::getNullValue(ResTy);
+
+ // We can't easily peek through x86_mmx types.
+ if (!ArgTy->isVectorTy())
+ return nullptr;
+
+ auto *C = dyn_cast<Constant>(Arg);
+ if (!C)
+ return nullptr;
+
+ // Extract signbits of the vector input and pack into integer result.
+ APInt Result(ResTy->getPrimitiveSizeInBits(), 0);
+ for (unsigned I = 0, E = ArgTy->getVectorNumElements(); I != E; ++I) {
+ auto *COp = C->getAggregateElement(I);
+ if (!COp)
+ return nullptr;
+ if (isa<UndefValue>(COp))
+ continue;
+
+ auto *CInt = dyn_cast<ConstantInt>(COp);
+ auto *CFp = dyn_cast<ConstantFP>(COp);
+ if (!CInt && !CFp)
+ return nullptr;
+
+ if ((CInt && CInt->isNegative()) || (CFp && CFp->isNegative()))
+ Result.setBit(I);
+ }
+
+ return Constant::getIntegerValue(ResTy, Result);
+}
+
+static Value *simplifyX86insertps(const IntrinsicInst &II,
+ InstCombiner::BuilderTy &Builder) {
+ auto *CInt = dyn_cast<ConstantInt>(II.getArgOperand(2));
+ if (!CInt)
+ return nullptr;
+
+ VectorType *VecTy = cast<VectorType>(II.getType());
+ assert(VecTy->getNumElements() == 4 && "insertps with wrong vector type");
+
+ // The immediate permute control byte looks like this:
+ // [3:0] - zero mask for each 32-bit lane
+ // [5:4] - select one 32-bit destination lane
+ // [7:6] - select one 32-bit source lane
+
+ uint8_t Imm = CInt->getZExtValue();
+ uint8_t ZMask = Imm & 0xf;
+ uint8_t DestLane = (Imm >> 4) & 0x3;
+ uint8_t SourceLane = (Imm >> 6) & 0x3;
+
+ ConstantAggregateZero *ZeroVector = ConstantAggregateZero::get(VecTy);
+
+ // If all zero mask bits are set, this was just a weird way to
+ // generate a zero vector.
+ if (ZMask == 0xf)
+ return ZeroVector;
+
+ // Initialize by passing all of the first source bits through.
+ uint32_t ShuffleMask[4] = { 0, 1, 2, 3 };
+
+ // We may replace the second operand with the zero vector.
+ Value *V1 = II.getArgOperand(1);
+
+ if (ZMask) {
+ // If the zero mask is being used with a single input or the zero mask
+ // overrides the destination lane, this is a shuffle with the zero vector.
+ if ((II.getArgOperand(0) == II.getArgOperand(1)) ||
+ (ZMask & (1 << DestLane))) {
+ V1 = ZeroVector;
+ // We may still move 32-bits of the first source vector from one lane
+ // to another.
+ ShuffleMask[DestLane] = SourceLane;
+ // The zero mask may override the previous insert operation.
+ for (unsigned i = 0; i < 4; ++i)
+ if ((ZMask >> i) & 0x1)
+ ShuffleMask[i] = i + 4;
+ } else {
+ // TODO: Model this case as 2 shuffles or a 'logical and' plus shuffle?
+ return nullptr;
+ }
+ } else {
+ // Replace the selected destination lane with the selected source lane.
+ ShuffleMask[DestLane] = SourceLane + 4;
+ }
+
+ return Builder.CreateShuffleVector(II.getArgOperand(0), V1, ShuffleMask);
+}
+
+/// Attempt to simplify SSE4A EXTRQ/EXTRQI instructions using constant folding
+/// or conversion to a shuffle vector.
+static Value *simplifyX86extrq(IntrinsicInst &II, Value *Op0,
+ ConstantInt *CILength, ConstantInt *CIIndex,
+ InstCombiner::BuilderTy &Builder) {
+ auto LowConstantHighUndef = [&](uint64_t Val) {
+ Type *IntTy64 = Type::getInt64Ty(II.getContext());
+ Constant *Args[] = {ConstantInt::get(IntTy64, Val),
+ UndefValue::get(IntTy64)};
+ return ConstantVector::get(Args);
+ };
+
+ // See if we're dealing with constant values.
+ Constant *C0 = dyn_cast<Constant>(Op0);
+ ConstantInt *CI0 =
+ C0 ? dyn_cast<ConstantInt>(C0->getAggregateElement((unsigned)0))
+ : nullptr;
+
+ // Attempt to constant fold.
+ if (CILength && CIIndex) {
+ // From AMD documentation: "The bit index and field length are each six
+ // bits in length other bits of the field are ignored."
+ APInt APIndex = CIIndex->getValue().zextOrTrunc(6);
+ APInt APLength = CILength->getValue().zextOrTrunc(6);
+
+ unsigned Index = APIndex.getZExtValue();
+
+ // From AMD documentation: "a value of zero in the field length is
+ // defined as length of 64".
+ unsigned Length = APLength == 0 ? 64 : APLength.getZExtValue();
+
+ // From AMD documentation: "If the sum of the bit index + length field
+ // is greater than 64, the results are undefined".
+ unsigned End = Index + Length;
+
+ // Note that both field index and field length are 8-bit quantities.
+ // Since variables 'Index' and 'Length' are unsigned values
+ // obtained from zero-extending field index and field length
+ // respectively, their sum should never wrap around.
+ if (End > 64)
+ return UndefValue::get(II.getType());
+
+ // If we are inserting whole bytes, we can convert this to a shuffle.
+ // Lowering can recognize EXTRQI shuffle masks.
+ if ((Length % 8) == 0 && (Index % 8) == 0) {
+ // Convert bit indices to byte indices.
+ Length /= 8;
+ Index /= 8;
+
+ Type *IntTy8 = Type::getInt8Ty(II.getContext());
+ Type *IntTy32 = Type::getInt32Ty(II.getContext());
+ VectorType *ShufTy = VectorType::get(IntTy8, 16);
+
+ SmallVector<Constant *, 16> ShuffleMask;
+ for (int i = 0; i != (int)Length; ++i)
+ ShuffleMask.push_back(
+ Constant::getIntegerValue(IntTy32, APInt(32, i + Index)));
+ for (int i = Length; i != 8; ++i)
+ ShuffleMask.push_back(
+ Constant::getIntegerValue(IntTy32, APInt(32, i + 16)));
+ for (int i = 8; i != 16; ++i)
+ ShuffleMask.push_back(UndefValue::get(IntTy32));
+
+ Value *SV = Builder.CreateShuffleVector(
+ Builder.CreateBitCast(Op0, ShufTy),
+ ConstantAggregateZero::get(ShufTy), ConstantVector::get(ShuffleMask));
+ return Builder.CreateBitCast(SV, II.getType());
+ }
+
+ // Constant Fold - shift Index'th bit to lowest position and mask off
+ // Length bits.
+ if (CI0) {
+ APInt Elt = CI0->getValue();
+ Elt = Elt.lshr(Index).zextOrTrunc(Length);
+ return LowConstantHighUndef(Elt.getZExtValue());
+ }
+
+ // If we were an EXTRQ call, we'll save registers if we convert to EXTRQI.
+ if (II.getIntrinsicID() == Intrinsic::x86_sse4a_extrq) {
+ Value *Args[] = {Op0, CILength, CIIndex};
+ Module *M = II.getModule();
+ Value *F = Intrinsic::getDeclaration(M, Intrinsic::x86_sse4a_extrqi);
+ return Builder.CreateCall(F, Args);
+ }
+ }
+
+ // Constant Fold - extraction from zero is always {zero, undef}.
+ if (CI0 && CI0->equalsInt(0))
+ return LowConstantHighUndef(0);
+
+ return nullptr;
+}
+
+/// Attempt to simplify SSE4A INSERTQ/INSERTQI instructions using constant
+/// folding or conversion to a shuffle vector.
+static Value *simplifyX86insertq(IntrinsicInst &II, Value *Op0, Value *Op1,
+ APInt APLength, APInt APIndex,
+ InstCombiner::BuilderTy &Builder) {
+
+ // From AMD documentation: "The bit index and field length are each six bits
+ // in length other bits of the field are ignored."
+ APIndex = APIndex.zextOrTrunc(6);
+ APLength = APLength.zextOrTrunc(6);
+
+ // Attempt to constant fold.
+ unsigned Index = APIndex.getZExtValue();
+
+ // From AMD documentation: "a value of zero in the field length is
+ // defined as length of 64".
+ unsigned Length = APLength == 0 ? 64 : APLength.getZExtValue();
+
+ // From AMD documentation: "If the sum of the bit index + length field
+ // is greater than 64, the results are undefined".
+ unsigned End = Index + Length;
+
+ // Note that both field index and field length are 8-bit quantities.
+ // Since variables 'Index' and 'Length' are unsigned values
+ // obtained from zero-extending field index and field length
+ // respectively, their sum should never wrap around.
+ if (End > 64)
+ return UndefValue::get(II.getType());
+
+ // If we are inserting whole bytes, we can convert this to a shuffle.
+ // Lowering can recognize INSERTQI shuffle masks.
+ if ((Length % 8) == 0 && (Index % 8) == 0) {
+ // Convert bit indices to byte indices.
+ Length /= 8;
+ Index /= 8;
+
+ Type *IntTy8 = Type::getInt8Ty(II.getContext());
+ Type *IntTy32 = Type::getInt32Ty(II.getContext());
+ VectorType *ShufTy = VectorType::get(IntTy8, 16);
+
+ SmallVector<Constant *, 16> ShuffleMask;
+ for (int i = 0; i != (int)Index; ++i)
+ ShuffleMask.push_back(Constant::getIntegerValue(IntTy32, APInt(32, i)));
+ for (int i = 0; i != (int)Length; ++i)
+ ShuffleMask.push_back(
+ Constant::getIntegerValue(IntTy32, APInt(32, i + 16)));
+ for (int i = Index + Length; i != 8; ++i)
+ ShuffleMask.push_back(Constant::getIntegerValue(IntTy32, APInt(32, i)));
+ for (int i = 8; i != 16; ++i)
+ ShuffleMask.push_back(UndefValue::get(IntTy32));
+
+ Value *SV = Builder.CreateShuffleVector(Builder.CreateBitCast(Op0, ShufTy),
+ Builder.CreateBitCast(Op1, ShufTy),
+ ConstantVector::get(ShuffleMask));
+ return Builder.CreateBitCast(SV, II.getType());
+ }
+
+ // See if we're dealing with constant values.
+ Constant *C0 = dyn_cast<Constant>(Op0);
+ Constant *C1 = dyn_cast<Constant>(Op1);
+ ConstantInt *CI00 =
+ C0 ? dyn_cast<ConstantInt>(C0->getAggregateElement((unsigned)0))
+ : nullptr;
+ ConstantInt *CI10 =
+ C1 ? dyn_cast<ConstantInt>(C1->getAggregateElement((unsigned)0))
+ : nullptr;
+
+ // Constant Fold - insert bottom Length bits starting at the Index'th bit.
+ if (CI00 && CI10) {
+ APInt V00 = CI00->getValue();
+ APInt V10 = CI10->getValue();
+ APInt Mask = APInt::getLowBitsSet(64, Length).shl(Index);
+ V00 = V00 & ~Mask;
+ V10 = V10.zextOrTrunc(Length).zextOrTrunc(64).shl(Index);
+ APInt Val = V00 | V10;
+ Type *IntTy64 = Type::getInt64Ty(II.getContext());
+ Constant *Args[] = {ConstantInt::get(IntTy64, Val.getZExtValue()),
+ UndefValue::get(IntTy64)};
+ return ConstantVector::get(Args);
+ }
+
+ // If we were an INSERTQ call, we'll save demanded elements if we convert to
+ // INSERTQI.
+ if (II.getIntrinsicID() == Intrinsic::x86_sse4a_insertq) {
+ Type *IntTy8 = Type::getInt8Ty(II.getContext());
+ Constant *CILength = ConstantInt::get(IntTy8, Length, false);
+ Constant *CIIndex = ConstantInt::get(IntTy8, Index, false);
+
+ Value *Args[] = {Op0, Op1, CILength, CIIndex};
+ Module *M = II.getModule();
+ Value *F = Intrinsic::getDeclaration(M, Intrinsic::x86_sse4a_insertqi);
+ return Builder.CreateCall(F, Args);
+ }
+
+ return nullptr;
+}
+
+/// Attempt to convert pshufb* to shufflevector if the mask is constant.
+static Value *simplifyX86pshufb(const IntrinsicInst &II,
+ InstCombiner::BuilderTy &Builder) {
+ Constant *V = dyn_cast<Constant>(II.getArgOperand(1));
+ if (!V)
+ return nullptr;
+
+ auto *VecTy = cast<VectorType>(II.getType());
+ auto *MaskEltTy = Type::getInt32Ty(II.getContext());
+ unsigned NumElts = VecTy->getNumElements();
+ assert((NumElts == 16 || NumElts == 32) &&
+ "Unexpected number of elements in shuffle mask!");
+
+ // Construct a shuffle mask from constant integers or UNDEFs.
+ Constant *Indexes[32] = {NULL};
+
+ // Each byte in the shuffle control mask forms an index to permute the
+ // corresponding byte in the destination operand.
+ for (unsigned I = 0; I < NumElts; ++I) {
+ Constant *COp = V->getAggregateElement(I);
+ if (!COp || (!isa<UndefValue>(COp) && !isa<ConstantInt>(COp)))
+ return nullptr;
+
+ if (isa<UndefValue>(COp)) {
+ Indexes[I] = UndefValue::get(MaskEltTy);
+ continue;
+ }
+
+ int8_t Index = cast<ConstantInt>(COp)->getValue().getZExtValue();
+
+ // If the most significant bit (bit[7]) of each byte of the shuffle
+ // control mask is set, then zero is written in the result byte.
+ // The zero vector is in the right-hand side of the resulting
+ // shufflevector.
+
+ // The value of each index for the high 128-bit lane is the least
+ // significant 4 bits of the respective shuffle control byte.
+ Index = ((Index < 0) ? NumElts : Index & 0x0F) + (I & 0xF0);
+ Indexes[I] = ConstantInt::get(MaskEltTy, Index);
+ }
+
+ auto ShuffleMask = ConstantVector::get(makeArrayRef(Indexes, NumElts));
+ auto V1 = II.getArgOperand(0);
+ auto V2 = Constant::getNullValue(VecTy);
+ return Builder.CreateShuffleVector(V1, V2, ShuffleMask);
+}
+
+/// Attempt to convert vpermilvar* to shufflevector if the mask is constant.
+static Value *simplifyX86vpermilvar(const IntrinsicInst &II,
+ InstCombiner::BuilderTy &Builder) {
+ Constant *V = dyn_cast<Constant>(II.getArgOperand(1));
+ if (!V)
+ return nullptr;
+
+ auto *MaskEltTy = Type::getInt32Ty(II.getContext());
+ unsigned NumElts = cast<VectorType>(V->getType())->getNumElements();
+ assert(NumElts == 8 || NumElts == 4 || NumElts == 2);
+
+ // Construct a shuffle mask from constant integers or UNDEFs.
+ Constant *Indexes[8] = {NULL};
+
+ // The intrinsics only read one or two bits, clear the rest.
+ for (unsigned I = 0; I < NumElts; ++I) {
+ Constant *COp = V->getAggregateElement(I);
+ if (!COp || (!isa<UndefValue>(COp) && !isa<ConstantInt>(COp)))
+ return nullptr;
+
+ if (isa<UndefValue>(COp)) {
+ Indexes[I] = UndefValue::get(MaskEltTy);
+ continue;
+ }
+
+ APInt Index = cast<ConstantInt>(COp)->getValue();
+ Index = Index.zextOrTrunc(32).getLoBits(2);
+
+ // The PD variants uses bit 1 to select per-lane element index, so
+ // shift down to convert to generic shuffle mask index.
+ if (II.getIntrinsicID() == Intrinsic::x86_avx_vpermilvar_pd ||
+ II.getIntrinsicID() == Intrinsic::x86_avx_vpermilvar_pd_256)
+ Index = Index.lshr(1);
+
+ // The _256 variants are a bit trickier since the mask bits always index
+ // into the corresponding 128 half. In order to convert to a generic
+ // shuffle, we have to make that explicit.
+ if ((II.getIntrinsicID() == Intrinsic::x86_avx_vpermilvar_ps_256 ||
+ II.getIntrinsicID() == Intrinsic::x86_avx_vpermilvar_pd_256) &&
+ ((NumElts / 2) <= I)) {
+ Index += APInt(32, NumElts / 2);
+ }
+
+ Indexes[I] = ConstantInt::get(MaskEltTy, Index);
+ }
+
+ auto ShuffleMask = ConstantVector::get(makeArrayRef(Indexes, NumElts));
+ auto V1 = II.getArgOperand(0);
+ auto V2 = UndefValue::get(V1->getType());
+ return Builder.CreateShuffleVector(V1, V2, ShuffleMask);
+}
+
+/// Attempt to convert vpermd/vpermps to shufflevector if the mask is constant.
+static Value *simplifyX86vpermv(const IntrinsicInst &II,
+ InstCombiner::BuilderTy &Builder) {
+ auto *V = dyn_cast<Constant>(II.getArgOperand(1));
+ if (!V)
+ return nullptr;
+
+ auto *VecTy = cast<VectorType>(II.getType());
+ auto *MaskEltTy = Type::getInt32Ty(II.getContext());
+ unsigned Size = VecTy->getNumElements();
+ assert(Size == 8 && "Unexpected shuffle mask size");
+
+ // Construct a shuffle mask from constant integers or UNDEFs.
+ Constant *Indexes[8] = {NULL};
+
+ for (unsigned I = 0; I < Size; ++I) {
+ Constant *COp = V->getAggregateElement(I);
+ if (!COp || (!isa<UndefValue>(COp) && !isa<ConstantInt>(COp)))
+ return nullptr;
+
+ if (isa<UndefValue>(COp)) {
+ Indexes[I] = UndefValue::get(MaskEltTy);
+ continue;
+ }
+
+ APInt Index = cast<ConstantInt>(COp)->getValue();
+ Index = Index.zextOrTrunc(32).getLoBits(3);
+ Indexes[I] = ConstantInt::get(MaskEltTy, Index);
+ }
+
+ auto ShuffleMask = ConstantVector::get(makeArrayRef(Indexes, Size));
+ auto V1 = II.getArgOperand(0);
+ auto V2 = UndefValue::get(VecTy);
+ return Builder.CreateShuffleVector(V1, V2, ShuffleMask);
+}
+
+/// The shuffle mask for a perm2*128 selects any two halves of two 256-bit
+/// source vectors, unless a zero bit is set. If a zero bit is set,
+/// then ignore that half of the mask and clear that half of the vector.
+static Value *simplifyX86vperm2(const IntrinsicInst &II,
+ InstCombiner::BuilderTy &Builder) {
+ auto *CInt = dyn_cast<ConstantInt>(II.getArgOperand(2));
+ if (!CInt)
+ return nullptr;
+
+ VectorType *VecTy = cast<VectorType>(II.getType());
+ ConstantAggregateZero *ZeroVector = ConstantAggregateZero::get(VecTy);
+
+ // The immediate permute control byte looks like this:
+ // [1:0] - select 128 bits from sources for low half of destination
+ // [2] - ignore
+ // [3] - zero low half of destination
+ // [5:4] - select 128 bits from sources for high half of destination
+ // [6] - ignore
+ // [7] - zero high half of destination
+
+ uint8_t Imm = CInt->getZExtValue();
+
+ bool LowHalfZero = Imm & 0x08;
+ bool HighHalfZero = Imm & 0x80;
+
+ // If both zero mask bits are set, this was just a weird way to
+ // generate a zero vector.
+ if (LowHalfZero && HighHalfZero)
+ return ZeroVector;
+
+ // If 0 or 1 zero mask bits are set, this is a simple shuffle.
+ unsigned NumElts = VecTy->getNumElements();
+ unsigned HalfSize = NumElts / 2;
+ SmallVector<uint32_t, 8> ShuffleMask(NumElts);
+
+ // The high bit of the selection field chooses the 1st or 2nd operand.
+ bool LowInputSelect = Imm & 0x02;
+ bool HighInputSelect = Imm & 0x20;
+
+ // The low bit of the selection field chooses the low or high half
+ // of the selected operand.
+ bool LowHalfSelect = Imm & 0x01;
+ bool HighHalfSelect = Imm & 0x10;
+
+ // Determine which operand(s) are actually in use for this instruction.
+ Value *V0 = LowInputSelect ? II.getArgOperand(1) : II.getArgOperand(0);
+ Value *V1 = HighInputSelect ? II.getArgOperand(1) : II.getArgOperand(0);
+
+ // If needed, replace operands based on zero mask.
+ V0 = LowHalfZero ? ZeroVector : V0;
+ V1 = HighHalfZero ? ZeroVector : V1;
+
+ // Permute low half of result.
+ unsigned StartIndex = LowHalfSelect ? HalfSize : 0;
+ for (unsigned i = 0; i < HalfSize; ++i)
+ ShuffleMask[i] = StartIndex + i;
+
+ // Permute high half of result.
+ StartIndex = HighHalfSelect ? HalfSize : 0;
+ StartIndex += NumElts;
+ for (unsigned i = 0; i < HalfSize; ++i)
+ ShuffleMask[i + HalfSize] = StartIndex + i;
+
+ return Builder.CreateShuffleVector(V0, V1, ShuffleMask);
+}
+
+/// Decode XOP integer vector comparison intrinsics.
+static Value *simplifyX86vpcom(const IntrinsicInst &II,
+ InstCombiner::BuilderTy &Builder,
+ bool IsSigned) {
+ if (auto *CInt = dyn_cast<ConstantInt>(II.getArgOperand(2))) {
+ uint64_t Imm = CInt->getZExtValue() & 0x7;
+ VectorType *VecTy = cast<VectorType>(II.getType());
+ CmpInst::Predicate Pred = ICmpInst::BAD_ICMP_PREDICATE;
+
+ switch (Imm) {
+ case 0x0:
+ Pred = IsSigned ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT;
+ break;
+ case 0x1:
+ Pred = IsSigned ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE;
+ break;
+ case 0x2:
+ Pred = IsSigned ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT;
+ break;
+ case 0x3:
+ Pred = IsSigned ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE;
+ break;
+ case 0x4:
+ Pred = ICmpInst::ICMP_EQ; break;
+ case 0x5:
+ Pred = ICmpInst::ICMP_NE; break;
+ case 0x6:
+ return ConstantInt::getSigned(VecTy, 0); // FALSE
+ case 0x7:
+ return ConstantInt::getSigned(VecTy, -1); // TRUE
+ }
+
+ if (Value *Cmp = Builder.CreateICmp(Pred, II.getArgOperand(0),
+ II.getArgOperand(1)))
+ return Builder.CreateSExtOrTrunc(Cmp, VecTy);
+ }
+ return nullptr;
+}
+
+static Value *simplifyMinnumMaxnum(const IntrinsicInst &II) {
+ Value *Arg0 = II.getArgOperand(0);
+ Value *Arg1 = II.getArgOperand(1);
+
+ // fmin(x, x) -> x
+ if (Arg0 == Arg1)
+ return Arg0;
+
+ const auto *C1 = dyn_cast<ConstantFP>(Arg1);
+
+ // fmin(x, nan) -> x
+ if (C1 && C1->isNaN())
+ return Arg0;
+
+ // This is the value because if undef were NaN, we would return the other
+ // value and cannot return a NaN unless both operands are.
+ //
+ // fmin(undef, x) -> x
+ if (isa<UndefValue>(Arg0))
+ return Arg1;
+
+ // fmin(x, undef) -> x
+ if (isa<UndefValue>(Arg1))
+ return Arg0;
+
+ Value *X = nullptr;
+ Value *Y = nullptr;
+ if (II.getIntrinsicID() == Intrinsic::minnum) {
+ // fmin(x, fmin(x, y)) -> fmin(x, y)
+ // fmin(y, fmin(x, y)) -> fmin(x, y)
+ if (match(Arg1, m_FMin(m_Value(X), m_Value(Y)))) {
+ if (Arg0 == X || Arg0 == Y)
+ return Arg1;
+ }
+
+ // fmin(fmin(x, y), x) -> fmin(x, y)
+ // fmin(fmin(x, y), y) -> fmin(x, y)
+ if (match(Arg0, m_FMin(m_Value(X), m_Value(Y)))) {
+ if (Arg1 == X || Arg1 == Y)
+ return Arg0;
+ }
+
+ // TODO: fmin(nnan x, inf) -> x
+ // TODO: fmin(nnan ninf x, flt_max) -> x
+ if (C1 && C1->isInfinity()) {
+ // fmin(x, -inf) -> -inf
+ if (C1->isNegative())
+ return Arg1;
+ }
+ } else {
+ assert(II.getIntrinsicID() == Intrinsic::maxnum);
+ // fmax(x, fmax(x, y)) -> fmax(x, y)
+ // fmax(y, fmax(x, y)) -> fmax(x, y)
+ if (match(Arg1, m_FMax(m_Value(X), m_Value(Y)))) {
+ if (Arg0 == X || Arg0 == Y)
+ return Arg1;
+ }
+
+ // fmax(fmax(x, y), x) -> fmax(x, y)
+ // fmax(fmax(x, y), y) -> fmax(x, y)
+ if (match(Arg0, m_FMax(m_Value(X), m_Value(Y)))) {
+ if (Arg1 == X || Arg1 == Y)
+ return Arg0;
+ }
+
+ // TODO: fmax(nnan x, -inf) -> x
+ // TODO: fmax(nnan ninf x, -flt_max) -> x
+ if (C1 && C1->isInfinity()) {
+ // fmax(x, inf) -> inf
+ if (!C1->isNegative())
+ return Arg1;
+ }
+ }
+ return nullptr;
+}
+
+static bool maskIsAllOneOrUndef(Value *Mask) {
+ auto *ConstMask = dyn_cast<Constant>(Mask);
+ if (!ConstMask)
+ return false;
+ if (ConstMask->isAllOnesValue() || isa<UndefValue>(ConstMask))
+ return true;
+ for (unsigned I = 0, E = ConstMask->getType()->getVectorNumElements(); I != E;
+ ++I) {
+ if (auto *MaskElt = ConstMask->getAggregateElement(I))
+ if (MaskElt->isAllOnesValue() || isa<UndefValue>(MaskElt))
+ continue;
+ return false;
+ }
+ return true;
+}
+
+static Value *simplifyMaskedLoad(const IntrinsicInst &II,
+ InstCombiner::BuilderTy &Builder) {
+ // If the mask is all ones or undefs, this is a plain vector load of the 1st
+ // argument.
+ if (maskIsAllOneOrUndef(II.getArgOperand(2))) {
+ Value *LoadPtr = II.getArgOperand(0);
+ unsigned Alignment = cast<ConstantInt>(II.getArgOperand(1))->getZExtValue();
+ return Builder.CreateAlignedLoad(LoadPtr, Alignment, "unmaskedload");
+ }
+
+ return nullptr;
+}
+
+static Instruction *simplifyMaskedStore(IntrinsicInst &II, InstCombiner &IC) {
+ auto *ConstMask = dyn_cast<Constant>(II.getArgOperand(3));
+ if (!ConstMask)
+ return nullptr;
+
+ // If the mask is all zeros, this instruction does nothing.
+ if (ConstMask->isNullValue())
+ return IC.eraseInstFromFunction(II);
+
+ // If the mask is all ones, this is a plain vector store of the 1st argument.
+ if (ConstMask->isAllOnesValue()) {
+ Value *StorePtr = II.getArgOperand(1);
+ unsigned Alignment = cast<ConstantInt>(II.getArgOperand(2))->getZExtValue();
+ return new StoreInst(II.getArgOperand(0), StorePtr, false, Alignment);
+ }
+
+ return nullptr;
+}
+
+static Instruction *simplifyMaskedGather(IntrinsicInst &II, InstCombiner &IC) {
+ // If the mask is all zeros, return the "passthru" argument of the gather.
+ auto *ConstMask = dyn_cast<Constant>(II.getArgOperand(2));
+ if (ConstMask && ConstMask->isNullValue())
+ return IC.replaceInstUsesWith(II, II.getArgOperand(3));
+
+ return nullptr;
+}
+
+static Instruction *simplifyMaskedScatter(IntrinsicInst &II, InstCombiner &IC) {
+ // If the mask is all zeros, a scatter does nothing.
+ auto *ConstMask = dyn_cast<Constant>(II.getArgOperand(3));
+ if (ConstMask && ConstMask->isNullValue())
+ return IC.eraseInstFromFunction(II);
+
+ return nullptr;
+}
+
+// TODO: If the x86 backend knew how to convert a bool vector mask back to an
+// XMM register mask efficiently, we could transform all x86 masked intrinsics
+// to LLVM masked intrinsics and remove the x86 masked intrinsic defs.
+static Instruction *simplifyX86MaskedLoad(IntrinsicInst &II, InstCombiner &IC) {
+ Value *Ptr = II.getOperand(0);
+ Value *Mask = II.getOperand(1);
+ Constant *ZeroVec = Constant::getNullValue(II.getType());
+
+ // Special case a zero mask since that's not a ConstantDataVector.
+ // This masked load instruction creates a zero vector.
+ if (isa<ConstantAggregateZero>(Mask))
+ return IC.replaceInstUsesWith(II, ZeroVec);
+
+ auto *ConstMask = dyn_cast<ConstantDataVector>(Mask);
+ if (!ConstMask)
+ return nullptr;
+
+ // The mask is constant. Convert this x86 intrinsic to the LLVM instrinsic
+ // to allow target-independent optimizations.
+
+ // First, cast the x86 intrinsic scalar pointer to a vector pointer to match
+ // the LLVM intrinsic definition for the pointer argument.
+ unsigned AddrSpace = cast<PointerType>(Ptr->getType())->getAddressSpace();
+ PointerType *VecPtrTy = PointerType::get(II.getType(), AddrSpace);
+ Value *PtrCast = IC.Builder->CreateBitCast(Ptr, VecPtrTy, "castvec");
+
+ // Second, convert the x86 XMM integer vector mask to a vector of bools based
+ // on each element's most significant bit (the sign bit).
+ Constant *BoolMask = getNegativeIsTrueBoolVec(ConstMask);
+
+ // The pass-through vector for an x86 masked load is a zero vector.
+ CallInst *NewMaskedLoad =
+ IC.Builder->CreateMaskedLoad(PtrCast, 1, BoolMask, ZeroVec);
+ return IC.replaceInstUsesWith(II, NewMaskedLoad);
+}
+
+// TODO: If the x86 backend knew how to convert a bool vector mask back to an
+// XMM register mask efficiently, we could transform all x86 masked intrinsics
+// to LLVM masked intrinsics and remove the x86 masked intrinsic defs.
+static bool simplifyX86MaskedStore(IntrinsicInst &II, InstCombiner &IC) {
+ Value *Ptr = II.getOperand(0);
+ Value *Mask = II.getOperand(1);
+ Value *Vec = II.getOperand(2);
+
+ // Special case a zero mask since that's not a ConstantDataVector:
+ // this masked store instruction does nothing.
+ if (isa<ConstantAggregateZero>(Mask)) {
+ IC.eraseInstFromFunction(II);
+ return true;
+ }
+
+ // The SSE2 version is too weird (eg, unaligned but non-temporal) to do
+ // anything else at this level.
+ if (II.getIntrinsicID() == Intrinsic::x86_sse2_maskmov_dqu)
+ return false;
+
+ auto *ConstMask = dyn_cast<ConstantDataVector>(Mask);
+ if (!ConstMask)
+ return false;
+
+ // The mask is constant. Convert this x86 intrinsic to the LLVM instrinsic
+ // to allow target-independent optimizations.
+
+ // First, cast the x86 intrinsic scalar pointer to a vector pointer to match
+ // the LLVM intrinsic definition for the pointer argument.
+ unsigned AddrSpace = cast<PointerType>(Ptr->getType())->getAddressSpace();
+ PointerType *VecPtrTy = PointerType::get(Vec->getType(), AddrSpace);
+ Value *PtrCast = IC.Builder->CreateBitCast(Ptr, VecPtrTy, "castvec");
+
+ // Second, convert the x86 XMM integer vector mask to a vector of bools based
+ // on each element's most significant bit (the sign bit).
+ Constant *BoolMask = getNegativeIsTrueBoolVec(ConstMask);
+
+ IC.Builder->CreateMaskedStore(Vec, PtrCast, 1, BoolMask);
+
+ // 'Replace uses' doesn't work for stores. Erase the original masked store.
+ IC.eraseInstFromFunction(II);
+ return true;
+}
+
+// Returns true iff the 2 intrinsics have the same operands, limiting the
+// comparison to the first NumOperands.
+static bool haveSameOperands(const IntrinsicInst &I, const IntrinsicInst &E,
+ unsigned NumOperands) {
+ assert(I.getNumArgOperands() >= NumOperands && "Not enough operands");
+ assert(E.getNumArgOperands() >= NumOperands && "Not enough operands");
+ for (unsigned i = 0; i < NumOperands; i++)
+ if (I.getArgOperand(i) != E.getArgOperand(i))
+ return false;
+ return true;
+}
+
+// Remove trivially empty start/end intrinsic ranges, i.e. a start
+// immediately followed by an end (ignoring debuginfo or other
+// start/end intrinsics in between). As this handles only the most trivial
+// cases, tracking the nesting level is not needed:
+//
+// call @llvm.foo.start(i1 0) ; &I
+// call @llvm.foo.start(i1 0)
+// call @llvm.foo.end(i1 0) ; This one will not be skipped: it will be removed
+// call @llvm.foo.end(i1 0)
+static bool removeTriviallyEmptyRange(IntrinsicInst &I, unsigned StartID,
+ unsigned EndID, InstCombiner &IC) {
+ assert(I.getIntrinsicID() == StartID &&
+ "Start intrinsic does not have expected ID");
+ BasicBlock::iterator BI(I), BE(I.getParent()->end());
+ for (++BI; BI != BE; ++BI) {
+ if (auto *E = dyn_cast<IntrinsicInst>(BI)) {
+ if (isa<DbgInfoIntrinsic>(E) || E->getIntrinsicID() == StartID)
+ continue;
+ if (E->getIntrinsicID() == EndID &&
+ haveSameOperands(I, *E, E->getNumArgOperands())) {
+ IC.eraseInstFromFunction(*E);
+ IC.eraseInstFromFunction(I);
+ return true;
+ }
+ }
+ break;
+ }
+
+ return false;
+}
+
+Instruction *InstCombiner::visitVAStartInst(VAStartInst &I) {
+ removeTriviallyEmptyRange(I, Intrinsic::vastart, Intrinsic::vaend, *this);
+ return nullptr;
+}
+
+Instruction *InstCombiner::visitVACopyInst(VACopyInst &I) {
+ removeTriviallyEmptyRange(I, Intrinsic::vacopy, Intrinsic::vaend, *this);
+ return nullptr;
+}
+
+/// CallInst simplification. This mostly only handles folding of intrinsic
+/// instructions. For normal calls, it allows visitCallSite to do the heavy
+/// lifting.
+Instruction *InstCombiner::visitCallInst(CallInst &CI) {
+ auto Args = CI.arg_operands();
+ if (Value *V = SimplifyCall(CI.getCalledValue(), Args.begin(), Args.end(), DL,
+ TLI, DT, AC))
+ return replaceInstUsesWith(CI, V);
+
+ if (isFreeCall(&CI, TLI))
+ return visitFree(CI);
+
+ // If the caller function is nounwind, mark the call as nounwind, even if the
+ // callee isn't.
+ if (CI.getParent()->getParent()->doesNotThrow() &&
+ !CI.doesNotThrow()) {
+ CI.setDoesNotThrow();
+ return &CI;
+ }
+
+ IntrinsicInst *II = dyn_cast<IntrinsicInst>(&CI);
+ if (!II) return visitCallSite(&CI);
+
+ // Intrinsics cannot occur in an invoke, so handle them here instead of in
+ // visitCallSite.
+ if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(II)) {
+ bool Changed = false;
+
+ // memmove/cpy/set of zero bytes is a noop.
+ if (Constant *NumBytes = dyn_cast<Constant>(MI->getLength())) {
+ if (NumBytes->isNullValue())
+ return eraseInstFromFunction(CI);
+
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(NumBytes))
+ if (CI->getZExtValue() == 1) {
+ // Replace the instruction with just byte operations. We would
+ // transform other cases to loads/stores, but we don't know if
+ // alignment is sufficient.
+ }
+ }
+
+ // No other transformations apply to volatile transfers.
+ if (MI->isVolatile())
+ return nullptr;
+
+ // If we have a memmove and the source operation is a constant global,
+ // then the source and dest pointers can't alias, so we can change this
+ // into a call to memcpy.
+ if (MemMoveInst *MMI = dyn_cast<MemMoveInst>(MI)) {
+ if (GlobalVariable *GVSrc = dyn_cast<GlobalVariable>(MMI->getSource()))
+ if (GVSrc->isConstant()) {
+ Module *M = CI.getModule();
+ Intrinsic::ID MemCpyID = Intrinsic::memcpy;
+ Type *Tys[3] = { CI.getArgOperand(0)->getType(),
+ CI.getArgOperand(1)->getType(),
+ CI.getArgOperand(2)->getType() };
+ CI.setCalledFunction(Intrinsic::getDeclaration(M, MemCpyID, Tys));
+ Changed = true;
+ }
+ }
+
+ if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(MI)) {
+ // memmove(x,x,size) -> noop.
+ if (MTI->getSource() == MTI->getDest())
+ return eraseInstFromFunction(CI);
+ }
+
+ // If we can determine a pointer alignment that is bigger than currently
+ // set, update the alignment.
+ if (isa<MemTransferInst>(MI)) {
+ if (Instruction *I = SimplifyMemTransfer(MI))
+ return I;
+ } else if (MemSetInst *MSI = dyn_cast<MemSetInst>(MI)) {
+ if (Instruction *I = SimplifyMemSet(MSI))
+ return I;
+ }
+
+ if (Changed) return II;
+ }
+
+ auto SimplifyDemandedVectorEltsLow = [this](Value *Op, unsigned Width,
+ unsigned DemandedWidth) {
+ APInt UndefElts(Width, 0);
+ APInt DemandedElts = APInt::getLowBitsSet(Width, DemandedWidth);
+ return SimplifyDemandedVectorElts(Op, DemandedElts, UndefElts);
+ };
+ auto SimplifyDemandedVectorEltsHigh = [this](Value *Op, unsigned Width,
+ unsigned DemandedWidth) {
+ APInt UndefElts(Width, 0);
+ APInt DemandedElts = APInt::getHighBitsSet(Width, DemandedWidth);
+ return SimplifyDemandedVectorElts(Op, DemandedElts, UndefElts);
+ };
+
+ switch (II->getIntrinsicID()) {
+ default: break;
+ case Intrinsic::objectsize: {
+ uint64_t Size;
+ if (getObjectSize(II->getArgOperand(0), Size, DL, TLI)) {
+ APInt APSize(II->getType()->getIntegerBitWidth(), Size);
+ // Equality check to be sure that `Size` can fit in a value of type
+ // `II->getType()`
+ if (APSize == Size)
+ return replaceInstUsesWith(CI, ConstantInt::get(II->getType(), APSize));
+ }
+ return nullptr;
+ }
+ case Intrinsic::bswap: {
+ Value *IIOperand = II->getArgOperand(0);
+ Value *X = nullptr;
+
+ // bswap(bswap(x)) -> x
+ if (match(IIOperand, m_BSwap(m_Value(X))))
+ return replaceInstUsesWith(CI, X);
+
+ // bswap(trunc(bswap(x))) -> trunc(lshr(x, c))
+ if (match(IIOperand, m_Trunc(m_BSwap(m_Value(X))))) {
+ unsigned C = X->getType()->getPrimitiveSizeInBits() -
+ IIOperand->getType()->getPrimitiveSizeInBits();
+ Value *CV = ConstantInt::get(X->getType(), C);
+ Value *V = Builder->CreateLShr(X, CV);
+ return new TruncInst(V, IIOperand->getType());
+ }
+ break;
+ }
+
+ case Intrinsic::bitreverse: {
+ Value *IIOperand = II->getArgOperand(0);
+ Value *X = nullptr;
+
+ // bitreverse(bitreverse(x)) -> x
+ if (match(IIOperand, m_Intrinsic<Intrinsic::bitreverse>(m_Value(X))))
+ return replaceInstUsesWith(CI, X);
+ break;
+ }
+
+ case Intrinsic::masked_load:
+ if (Value *SimplifiedMaskedOp = simplifyMaskedLoad(*II, *Builder))
+ return replaceInstUsesWith(CI, SimplifiedMaskedOp);
+ break;
+ case Intrinsic::masked_store:
+ return simplifyMaskedStore(*II, *this);
+ case Intrinsic::masked_gather:
+ return simplifyMaskedGather(*II, *this);
+ case Intrinsic::masked_scatter:
+ return simplifyMaskedScatter(*II, *this);
+
+ case Intrinsic::powi:
+ if (ConstantInt *Power = dyn_cast<ConstantInt>(II->getArgOperand(1))) {
+ // powi(x, 0) -> 1.0
+ if (Power->isZero())
+ return replaceInstUsesWith(CI, ConstantFP::get(CI.getType(), 1.0));
+ // powi(x, 1) -> x
+ if (Power->isOne())
+ return replaceInstUsesWith(CI, II->getArgOperand(0));
+ // powi(x, -1) -> 1/x
+ if (Power->isAllOnesValue())
+ return BinaryOperator::CreateFDiv(ConstantFP::get(CI.getType(), 1.0),
+ II->getArgOperand(0));
+ }
+ break;
+ case Intrinsic::cttz: {
+ // If all bits below the first known one are known zero,
+ // this value is constant.
+ IntegerType *IT = dyn_cast<IntegerType>(II->getArgOperand(0)->getType());
+ // FIXME: Try to simplify vectors of integers.
+ if (!IT) break;
+ uint32_t BitWidth = IT->getBitWidth();
+ APInt KnownZero(BitWidth, 0);
+ APInt KnownOne(BitWidth, 0);
+ computeKnownBits(II->getArgOperand(0), KnownZero, KnownOne, 0, II);
+ unsigned TrailingZeros = KnownOne.countTrailingZeros();
+ APInt Mask(APInt::getLowBitsSet(BitWidth, TrailingZeros));
+ if ((Mask & KnownZero) == Mask)
+ return replaceInstUsesWith(CI, ConstantInt::get(IT,
+ APInt(BitWidth, TrailingZeros)));
+
+ }
+ break;
+ case Intrinsic::ctlz: {
+ // If all bits above the first known one are known zero,
+ // this value is constant.
+ IntegerType *IT = dyn_cast<IntegerType>(II->getArgOperand(0)->getType());
+ // FIXME: Try to simplify vectors of integers.
+ if (!IT) break;
+ uint32_t BitWidth = IT->getBitWidth();
+ APInt KnownZero(BitWidth, 0);
+ APInt KnownOne(BitWidth, 0);
+ computeKnownBits(II->getArgOperand(0), KnownZero, KnownOne, 0, II);
+ unsigned LeadingZeros = KnownOne.countLeadingZeros();
+ APInt Mask(APInt::getHighBitsSet(BitWidth, LeadingZeros));
+ if ((Mask & KnownZero) == Mask)
+ return replaceInstUsesWith(CI, ConstantInt::get(IT,
+ APInt(BitWidth, LeadingZeros)));
+
+ }
+ break;
+
+ case Intrinsic::uadd_with_overflow:
+ case Intrinsic::sadd_with_overflow:
+ case Intrinsic::umul_with_overflow:
+ case Intrinsic::smul_with_overflow:
+ if (isa<Constant>(II->getArgOperand(0)) &&
+ !isa<Constant>(II->getArgOperand(1))) {
+ // Canonicalize constants into the RHS.
+ Value *LHS = II->getArgOperand(0);
+ II->setArgOperand(0, II->getArgOperand(1));
+ II->setArgOperand(1, LHS);
+ return II;
+ }
+ // fall through
+
+ case Intrinsic::usub_with_overflow:
+ case Intrinsic::ssub_with_overflow: {
+ OverflowCheckFlavor OCF =
+ IntrinsicIDToOverflowCheckFlavor(II->getIntrinsicID());
+ assert(OCF != OCF_INVALID && "unexpected!");
+
+ Value *OperationResult = nullptr;
+ Constant *OverflowResult = nullptr;
+ if (OptimizeOverflowCheck(OCF, II->getArgOperand(0), II->getArgOperand(1),
+ *II, OperationResult, OverflowResult))
+ return CreateOverflowTuple(II, OperationResult, OverflowResult);
+
+ break;
+ }
+
+ case Intrinsic::minnum:
+ case Intrinsic::maxnum: {
+ Value *Arg0 = II->getArgOperand(0);
+ Value *Arg1 = II->getArgOperand(1);
+ // Canonicalize constants to the RHS.
+ if (isa<ConstantFP>(Arg0) && !isa<ConstantFP>(Arg1)) {
+ II->setArgOperand(0, Arg1);
+ II->setArgOperand(1, Arg0);
+ return II;
+ }
+ if (Value *V = simplifyMinnumMaxnum(*II))
+ return replaceInstUsesWith(*II, V);
+ break;
+ }
+ case Intrinsic::ppc_altivec_lvx:
+ case Intrinsic::ppc_altivec_lvxl:
+ // Turn PPC lvx -> load if the pointer is known aligned.
+ if (getOrEnforceKnownAlignment(II->getArgOperand(0), 16, DL, II, AC, DT) >=
+ 16) {
+ Value *Ptr = Builder->CreateBitCast(II->getArgOperand(0),
+ PointerType::getUnqual(II->getType()));
+ return new LoadInst(Ptr);
+ }
+ break;
+ case Intrinsic::ppc_vsx_lxvw4x:
+ case Intrinsic::ppc_vsx_lxvd2x: {
+ // Turn PPC VSX loads into normal loads.
+ Value *Ptr = Builder->CreateBitCast(II->getArgOperand(0),
+ PointerType::getUnqual(II->getType()));
+ return new LoadInst(Ptr, Twine(""), false, 1);
+ }
+ case Intrinsic::ppc_altivec_stvx:
+ case Intrinsic::ppc_altivec_stvxl:
+ // Turn stvx -> store if the pointer is known aligned.
+ if (getOrEnforceKnownAlignment(II->getArgOperand(1), 16, DL, II, AC, DT) >=
+ 16) {
+ Type *OpPtrTy =
+ PointerType::getUnqual(II->getArgOperand(0)->getType());
+ Value *Ptr = Builder->CreateBitCast(II->getArgOperand(1), OpPtrTy);
+ return new StoreInst(II->getArgOperand(0), Ptr);
+ }
+ break;
+ case Intrinsic::ppc_vsx_stxvw4x:
+ case Intrinsic::ppc_vsx_stxvd2x: {
+ // Turn PPC VSX stores into normal stores.
+ Type *OpPtrTy = PointerType::getUnqual(II->getArgOperand(0)->getType());
+ Value *Ptr = Builder->CreateBitCast(II->getArgOperand(1), OpPtrTy);
+ return new StoreInst(II->getArgOperand(0), Ptr, false, 1);
+ }
+ case Intrinsic::ppc_qpx_qvlfs:
+ // Turn PPC QPX qvlfs -> load if the pointer is known aligned.
+ if (getOrEnforceKnownAlignment(II->getArgOperand(0), 16, DL, II, AC, DT) >=
+ 16) {
+ Type *VTy = VectorType::get(Builder->getFloatTy(),
+ II->getType()->getVectorNumElements());
+ Value *Ptr = Builder->CreateBitCast(II->getArgOperand(0),
+ PointerType::getUnqual(VTy));
+ Value *Load = Builder->CreateLoad(Ptr);
+ return new FPExtInst(Load, II->getType());
+ }
+ break;
+ case Intrinsic::ppc_qpx_qvlfd:
+ // Turn PPC QPX qvlfd -> load if the pointer is known aligned.
+ if (getOrEnforceKnownAlignment(II->getArgOperand(0), 32, DL, II, AC, DT) >=
+ 32) {
+ Value *Ptr = Builder->CreateBitCast(II->getArgOperand(0),
+ PointerType::getUnqual(II->getType()));
+ return new LoadInst(Ptr);
+ }
+ break;
+ case Intrinsic::ppc_qpx_qvstfs:
+ // Turn PPC QPX qvstfs -> store if the pointer is known aligned.
+ if (getOrEnforceKnownAlignment(II->getArgOperand(1), 16, DL, II, AC, DT) >=
+ 16) {
+ Type *VTy = VectorType::get(Builder->getFloatTy(),
+ II->getArgOperand(0)->getType()->getVectorNumElements());
+ Value *TOp = Builder->CreateFPTrunc(II->getArgOperand(0), VTy);
+ Type *OpPtrTy = PointerType::getUnqual(VTy);
+ Value *Ptr = Builder->CreateBitCast(II->getArgOperand(1), OpPtrTy);
+ return new StoreInst(TOp, Ptr);
+ }
+ break;
+ case Intrinsic::ppc_qpx_qvstfd:
+ // Turn PPC QPX qvstfd -> store if the pointer is known aligned.
+ if (getOrEnforceKnownAlignment(II->getArgOperand(1), 32, DL, II, AC, DT) >=
+ 32) {
+ Type *OpPtrTy =
+ PointerType::getUnqual(II->getArgOperand(0)->getType());
+ Value *Ptr = Builder->CreateBitCast(II->getArgOperand(1), OpPtrTy);
+ return new StoreInst(II->getArgOperand(0), Ptr);
+ }
+ break;
+
+ case Intrinsic::x86_vcvtph2ps_128:
+ case Intrinsic::x86_vcvtph2ps_256: {
+ auto Arg = II->getArgOperand(0);
+ auto ArgType = cast<VectorType>(Arg->getType());
+ auto RetType = cast<VectorType>(II->getType());
+ unsigned ArgWidth = ArgType->getNumElements();
+ unsigned RetWidth = RetType->getNumElements();
+ assert(RetWidth <= ArgWidth && "Unexpected input/return vector widths");
+ assert(ArgType->isIntOrIntVectorTy() &&
+ ArgType->getScalarSizeInBits() == 16 &&
+ "CVTPH2PS input type should be 16-bit integer vector");
+ assert(RetType->getScalarType()->isFloatTy() &&
+ "CVTPH2PS output type should be 32-bit float vector");
+
+ // Constant folding: Convert to generic half to single conversion.
+ if (isa<ConstantAggregateZero>(Arg))
+ return replaceInstUsesWith(*II, ConstantAggregateZero::get(RetType));
+
+ if (isa<ConstantDataVector>(Arg)) {
+ auto VectorHalfAsShorts = Arg;
+ if (RetWidth < ArgWidth) {
+ SmallVector<uint32_t, 8> SubVecMask;
+ for (unsigned i = 0; i != RetWidth; ++i)
+ SubVecMask.push_back((int)i);
+ VectorHalfAsShorts = Builder->CreateShuffleVector(
+ Arg, UndefValue::get(ArgType), SubVecMask);
+ }
+
+ auto VectorHalfType =
+ VectorType::get(Type::getHalfTy(II->getContext()), RetWidth);
+ auto VectorHalfs =
+ Builder->CreateBitCast(VectorHalfAsShorts, VectorHalfType);
+ auto VectorFloats = Builder->CreateFPExt(VectorHalfs, RetType);
+ return replaceInstUsesWith(*II, VectorFloats);
+ }
+
+ // We only use the lowest lanes of the argument.
+ if (Value *V = SimplifyDemandedVectorEltsLow(Arg, ArgWidth, RetWidth)) {
+ II->setArgOperand(0, V);
+ return II;
+ }
+ break;
+ }
+
+ case Intrinsic::x86_sse_cvtss2si:
+ case Intrinsic::x86_sse_cvtss2si64:
+ case Intrinsic::x86_sse_cvttss2si:
+ case Intrinsic::x86_sse_cvttss2si64:
+ case Intrinsic::x86_sse2_cvtsd2si:
+ case Intrinsic::x86_sse2_cvtsd2si64:
+ case Intrinsic::x86_sse2_cvttsd2si:
+ case Intrinsic::x86_sse2_cvttsd2si64: {
+ // These intrinsics only demand the 0th element of their input vectors. If
+ // we can simplify the input based on that, do so now.
+ Value *Arg = II->getArgOperand(0);
+ unsigned VWidth = Arg->getType()->getVectorNumElements();
+ if (Value *V = SimplifyDemandedVectorEltsLow(Arg, VWidth, 1)) {
+ II->setArgOperand(0, V);
+ return II;
+ }
+ break;
+ }
+
+ case Intrinsic::x86_mmx_pmovmskb:
+ case Intrinsic::x86_sse_movmsk_ps:
+ case Intrinsic::x86_sse2_movmsk_pd:
+ case Intrinsic::x86_sse2_pmovmskb_128:
+ case Intrinsic::x86_avx_movmsk_pd_256:
+ case Intrinsic::x86_avx_movmsk_ps_256:
+ case Intrinsic::x86_avx2_pmovmskb: {
+ if (Value *V = simplifyX86movmsk(*II, *Builder))
+ return replaceInstUsesWith(*II, V);
+ break;
+ }
+
+ case Intrinsic::x86_sse_comieq_ss:
+ case Intrinsic::x86_sse_comige_ss:
+ case Intrinsic::x86_sse_comigt_ss:
+ case Intrinsic::x86_sse_comile_ss:
+ case Intrinsic::x86_sse_comilt_ss:
+ case Intrinsic::x86_sse_comineq_ss:
+ case Intrinsic::x86_sse_ucomieq_ss:
+ case Intrinsic::x86_sse_ucomige_ss:
+ case Intrinsic::x86_sse_ucomigt_ss:
+ case Intrinsic::x86_sse_ucomile_ss:
+ case Intrinsic::x86_sse_ucomilt_ss:
+ case Intrinsic::x86_sse_ucomineq_ss:
+ case Intrinsic::x86_sse2_comieq_sd:
+ case Intrinsic::x86_sse2_comige_sd:
+ case Intrinsic::x86_sse2_comigt_sd:
+ case Intrinsic::x86_sse2_comile_sd:
+ case Intrinsic::x86_sse2_comilt_sd:
+ case Intrinsic::x86_sse2_comineq_sd:
+ case Intrinsic::x86_sse2_ucomieq_sd:
+ case Intrinsic::x86_sse2_ucomige_sd:
+ case Intrinsic::x86_sse2_ucomigt_sd:
+ case Intrinsic::x86_sse2_ucomile_sd:
+ case Intrinsic::x86_sse2_ucomilt_sd:
+ case Intrinsic::x86_sse2_ucomineq_sd: {
+ // These intrinsics only demand the 0th element of their input vectors. If
+ // we can simplify the input based on that, do so now.
+ bool MadeChange = false;
+ Value *Arg0 = II->getArgOperand(0);
+ Value *Arg1 = II->getArgOperand(1);
+ unsigned VWidth = Arg0->getType()->getVectorNumElements();
+ if (Value *V = SimplifyDemandedVectorEltsLow(Arg0, VWidth, 1)) {
+ II->setArgOperand(0, V);
+ MadeChange = true;
+ }
+ if (Value *V = SimplifyDemandedVectorEltsLow(Arg1, VWidth, 1)) {
+ II->setArgOperand(1, V);
+ MadeChange = true;
+ }
+ if (MadeChange)
+ return II;
+ break;
+ }
+
+ case Intrinsic::x86_sse_add_ss:
+ case Intrinsic::x86_sse_sub_ss:
+ case Intrinsic::x86_sse_mul_ss:
+ case Intrinsic::x86_sse_div_ss:
+ case Intrinsic::x86_sse_min_ss:
+ case Intrinsic::x86_sse_max_ss:
+ case Intrinsic::x86_sse_cmp_ss:
+ case Intrinsic::x86_sse2_add_sd:
+ case Intrinsic::x86_sse2_sub_sd:
+ case Intrinsic::x86_sse2_mul_sd:
+ case Intrinsic::x86_sse2_div_sd:
+ case Intrinsic::x86_sse2_min_sd:
+ case Intrinsic::x86_sse2_max_sd:
+ case Intrinsic::x86_sse2_cmp_sd: {
+ // These intrinsics only demand the lowest element of the second input
+ // vector.
+ Value *Arg1 = II->getArgOperand(1);
+ unsigned VWidth = Arg1->getType()->getVectorNumElements();
+ if (Value *V = SimplifyDemandedVectorEltsLow(Arg1, VWidth, 1)) {
+ II->setArgOperand(1, V);
+ return II;
+ }
+ break;
+ }
+
+ case Intrinsic::x86_sse41_round_ss:
+ case Intrinsic::x86_sse41_round_sd: {
+ // These intrinsics demand the upper elements of the first input vector and
+ // the lowest element of the second input vector.
+ bool MadeChange = false;
+ Value *Arg0 = II->getArgOperand(0);
+ Value *Arg1 = II->getArgOperand(1);
+ unsigned VWidth = Arg0->getType()->getVectorNumElements();
+ if (Value *V = SimplifyDemandedVectorEltsHigh(Arg0, VWidth, VWidth - 1)) {
+ II->setArgOperand(0, V);
+ MadeChange = true;
+ }
+ if (Value *V = SimplifyDemandedVectorEltsLow(Arg1, VWidth, 1)) {
+ II->setArgOperand(1, V);
+ MadeChange = true;
+ }
+ if (MadeChange)
+ return II;
+ break;
+ }
+
+ // Constant fold ashr( <A x Bi>, Ci ).
+ // Constant fold lshr( <A x Bi>, Ci ).
+ // Constant fold shl( <A x Bi>, Ci ).
+ case Intrinsic::x86_sse2_psrai_d:
+ case Intrinsic::x86_sse2_psrai_w:
+ case Intrinsic::x86_avx2_psrai_d:
+ case Intrinsic::x86_avx2_psrai_w:
+ case Intrinsic::x86_sse2_psrli_d:
+ case Intrinsic::x86_sse2_psrli_q:
+ case Intrinsic::x86_sse2_psrli_w:
+ case Intrinsic::x86_avx2_psrli_d:
+ case Intrinsic::x86_avx2_psrli_q:
+ case Intrinsic::x86_avx2_psrli_w:
+ case Intrinsic::x86_sse2_pslli_d:
+ case Intrinsic::x86_sse2_pslli_q:
+ case Intrinsic::x86_sse2_pslli_w:
+ case Intrinsic::x86_avx2_pslli_d:
+ case Intrinsic::x86_avx2_pslli_q:
+ case Intrinsic::x86_avx2_pslli_w:
+ if (Value *V = simplifyX86immShift(*II, *Builder))
+ return replaceInstUsesWith(*II, V);
+ break;
+
+ case Intrinsic::x86_sse2_psra_d:
+ case Intrinsic::x86_sse2_psra_w:
+ case Intrinsic::x86_avx2_psra_d:
+ case Intrinsic::x86_avx2_psra_w:
+ case Intrinsic::x86_sse2_psrl_d:
+ case Intrinsic::x86_sse2_psrl_q:
+ case Intrinsic::x86_sse2_psrl_w:
+ case Intrinsic::x86_avx2_psrl_d:
+ case Intrinsic::x86_avx2_psrl_q:
+ case Intrinsic::x86_avx2_psrl_w:
+ case Intrinsic::x86_sse2_psll_d:
+ case Intrinsic::x86_sse2_psll_q:
+ case Intrinsic::x86_sse2_psll_w:
+ case Intrinsic::x86_avx2_psll_d:
+ case Intrinsic::x86_avx2_psll_q:
+ case Intrinsic::x86_avx2_psll_w: {
+ if (Value *V = simplifyX86immShift(*II, *Builder))
+ return replaceInstUsesWith(*II, V);
+
+ // SSE2/AVX2 uses only the first 64-bits of the 128-bit vector
+ // operand to compute the shift amount.
+ Value *Arg1 = II->getArgOperand(1);
+ assert(Arg1->getType()->getPrimitiveSizeInBits() == 128 &&
+ "Unexpected packed shift size");
+ unsigned VWidth = Arg1->getType()->getVectorNumElements();
+
+ if (Value *V = SimplifyDemandedVectorEltsLow(Arg1, VWidth, VWidth / 2)) {
+ II->setArgOperand(1, V);
+ return II;
+ }
+ break;
+ }
+
+ case Intrinsic::x86_avx2_psllv_d:
+ case Intrinsic::x86_avx2_psllv_d_256:
+ case Intrinsic::x86_avx2_psllv_q:
+ case Intrinsic::x86_avx2_psllv_q_256:
+ case Intrinsic::x86_avx2_psrav_d:
+ case Intrinsic::x86_avx2_psrav_d_256:
+ case Intrinsic::x86_avx2_psrlv_d:
+ case Intrinsic::x86_avx2_psrlv_d_256:
+ case Intrinsic::x86_avx2_psrlv_q:
+ case Intrinsic::x86_avx2_psrlv_q_256:
+ if (Value *V = simplifyX86varShift(*II, *Builder))
+ return replaceInstUsesWith(*II, V);
+ break;
+
+ case Intrinsic::x86_sse41_insertps:
+ if (Value *V = simplifyX86insertps(*II, *Builder))
+ return replaceInstUsesWith(*II, V);
+ break;
+
+ case Intrinsic::x86_sse4a_extrq: {
+ Value *Op0 = II->getArgOperand(0);
+ Value *Op1 = II->getArgOperand(1);
+ unsigned VWidth0 = Op0->getType()->getVectorNumElements();
+ unsigned VWidth1 = Op1->getType()->getVectorNumElements();
+ assert(Op0->getType()->getPrimitiveSizeInBits() == 128 &&
+ Op1->getType()->getPrimitiveSizeInBits() == 128 && VWidth0 == 2 &&
+ VWidth1 == 16 && "Unexpected operand sizes");
+
+ // See if we're dealing with constant values.
+ Constant *C1 = dyn_cast<Constant>(Op1);
+ ConstantInt *CILength =
+ C1 ? dyn_cast<ConstantInt>(C1->getAggregateElement((unsigned)0))
+ : nullptr;
+ ConstantInt *CIIndex =
+ C1 ? dyn_cast<ConstantInt>(C1->getAggregateElement((unsigned)1))
+ : nullptr;
+
+ // Attempt to simplify to a constant, shuffle vector or EXTRQI call.
+ if (Value *V = simplifyX86extrq(*II, Op0, CILength, CIIndex, *Builder))
+ return replaceInstUsesWith(*II, V);
+
+ // EXTRQ only uses the lowest 64-bits of the first 128-bit vector
+ // operands and the lowest 16-bits of the second.
+ bool MadeChange = false;
+ if (Value *V = SimplifyDemandedVectorEltsLow(Op0, VWidth0, 1)) {
+ II->setArgOperand(0, V);
+ MadeChange = true;
+ }
+ if (Value *V = SimplifyDemandedVectorEltsLow(Op1, VWidth1, 2)) {
+ II->setArgOperand(1, V);
+ MadeChange = true;
+ }
+ if (MadeChange)
+ return II;
+ break;
+ }
+
+ case Intrinsic::x86_sse4a_extrqi: {
+ // EXTRQI: Extract Length bits starting from Index. Zero pad the remaining
+ // bits of the lower 64-bits. The upper 64-bits are undefined.
+ Value *Op0 = II->getArgOperand(0);
+ unsigned VWidth = Op0->getType()->getVectorNumElements();
+ assert(Op0->getType()->getPrimitiveSizeInBits() == 128 && VWidth == 2 &&
+ "Unexpected operand size");
+
+ // See if we're dealing with constant values.
+ ConstantInt *CILength = dyn_cast<ConstantInt>(II->getArgOperand(1));
+ ConstantInt *CIIndex = dyn_cast<ConstantInt>(II->getArgOperand(2));
+
+ // Attempt to simplify to a constant or shuffle vector.
+ if (Value *V = simplifyX86extrq(*II, Op0, CILength, CIIndex, *Builder))
+ return replaceInstUsesWith(*II, V);
+
+ // EXTRQI only uses the lowest 64-bits of the first 128-bit vector
+ // operand.
+ if (Value *V = SimplifyDemandedVectorEltsLow(Op0, VWidth, 1)) {
+ II->setArgOperand(0, V);
+ return II;
+ }
+ break;
+ }
+
+ case Intrinsic::x86_sse4a_insertq: {
+ Value *Op0 = II->getArgOperand(0);
+ Value *Op1 = II->getArgOperand(1);
+ unsigned VWidth = Op0->getType()->getVectorNumElements();
+ assert(Op0->getType()->getPrimitiveSizeInBits() == 128 &&
+ Op1->getType()->getPrimitiveSizeInBits() == 128 && VWidth == 2 &&
+ Op1->getType()->getVectorNumElements() == 2 &&
+ "Unexpected operand size");
+
+ // See if we're dealing with constant values.
+ Constant *C1 = dyn_cast<Constant>(Op1);
+ ConstantInt *CI11 =
+ C1 ? dyn_cast<ConstantInt>(C1->getAggregateElement((unsigned)1))
+ : nullptr;
+
+ // Attempt to simplify to a constant, shuffle vector or INSERTQI call.
+ if (CI11) {
+ const APInt &V11 = CI11->getValue();
+ APInt Len = V11.zextOrTrunc(6);
+ APInt Idx = V11.lshr(8).zextOrTrunc(6);
+ if (Value *V = simplifyX86insertq(*II, Op0, Op1, Len, Idx, *Builder))
+ return replaceInstUsesWith(*II, V);
+ }
+
+ // INSERTQ only uses the lowest 64-bits of the first 128-bit vector
+ // operand.
+ if (Value *V = SimplifyDemandedVectorEltsLow(Op0, VWidth, 1)) {
+ II->setArgOperand(0, V);
+ return II;
+ }
+ break;
+ }
+
+ case Intrinsic::x86_sse4a_insertqi: {
+ // INSERTQI: Extract lowest Length bits from lower half of second source and
+ // insert over first source starting at Index bit. The upper 64-bits are
+ // undefined.
+ Value *Op0 = II->getArgOperand(0);
+ Value *Op1 = II->getArgOperand(1);
+ unsigned VWidth0 = Op0->getType()->getVectorNumElements();
+ unsigned VWidth1 = Op1->getType()->getVectorNumElements();
+ assert(Op0->getType()->getPrimitiveSizeInBits() == 128 &&
+ Op1->getType()->getPrimitiveSizeInBits() == 128 && VWidth0 == 2 &&
+ VWidth1 == 2 && "Unexpected operand sizes");
+
+ // See if we're dealing with constant values.
+ ConstantInt *CILength = dyn_cast<ConstantInt>(II->getArgOperand(2));
+ ConstantInt *CIIndex = dyn_cast<ConstantInt>(II->getArgOperand(3));
+
+ // Attempt to simplify to a constant or shuffle vector.
+ if (CILength && CIIndex) {
+ APInt Len = CILength->getValue().zextOrTrunc(6);
+ APInt Idx = CIIndex->getValue().zextOrTrunc(6);
+ if (Value *V = simplifyX86insertq(*II, Op0, Op1, Len, Idx, *Builder))
+ return replaceInstUsesWith(*II, V);
+ }
+
+ // INSERTQI only uses the lowest 64-bits of the first two 128-bit vector
+ // operands.
+ bool MadeChange = false;
+ if (Value *V = SimplifyDemandedVectorEltsLow(Op0, VWidth0, 1)) {
+ II->setArgOperand(0, V);
+ MadeChange = true;
+ }
+ if (Value *V = SimplifyDemandedVectorEltsLow(Op1, VWidth1, 1)) {
+ II->setArgOperand(1, V);
+ MadeChange = true;
+ }
+ if (MadeChange)
+ return II;
+ break;
+ }
+
+ case Intrinsic::x86_sse41_pblendvb:
+ case Intrinsic::x86_sse41_blendvps:
+ case Intrinsic::x86_sse41_blendvpd:
+ case Intrinsic::x86_avx_blendv_ps_256:
+ case Intrinsic::x86_avx_blendv_pd_256:
+ case Intrinsic::x86_avx2_pblendvb: {
+ // Convert blendv* to vector selects if the mask is constant.
+ // This optimization is convoluted because the intrinsic is defined as
+ // getting a vector of floats or doubles for the ps and pd versions.
+ // FIXME: That should be changed.
+
+ Value *Op0 = II->getArgOperand(0);
+ Value *Op1 = II->getArgOperand(1);
+ Value *Mask = II->getArgOperand(2);
+
+ // fold (blend A, A, Mask) -> A
+ if (Op0 == Op1)
+ return replaceInstUsesWith(CI, Op0);
+
+ // Zero Mask - select 1st argument.
+ if (isa<ConstantAggregateZero>(Mask))
+ return replaceInstUsesWith(CI, Op0);
+
+ // Constant Mask - select 1st/2nd argument lane based on top bit of mask.
+ if (auto *ConstantMask = dyn_cast<ConstantDataVector>(Mask)) {
+ Constant *NewSelector = getNegativeIsTrueBoolVec(ConstantMask);
+ return SelectInst::Create(NewSelector, Op1, Op0, "blendv");
+ }
+ break;
+ }
+
+ case Intrinsic::x86_ssse3_pshuf_b_128:
+ case Intrinsic::x86_avx2_pshuf_b:
+ if (Value *V = simplifyX86pshufb(*II, *Builder))
+ return replaceInstUsesWith(*II, V);
+ break;
+
+ case Intrinsic::x86_avx_vpermilvar_ps:
+ case Intrinsic::x86_avx_vpermilvar_ps_256:
+ case Intrinsic::x86_avx_vpermilvar_pd:
+ case Intrinsic::x86_avx_vpermilvar_pd_256:
+ if (Value *V = simplifyX86vpermilvar(*II, *Builder))
+ return replaceInstUsesWith(*II, V);
+ break;
+
+ case Intrinsic::x86_avx2_permd:
+ case Intrinsic::x86_avx2_permps:
+ if (Value *V = simplifyX86vpermv(*II, *Builder))
+ return replaceInstUsesWith(*II, V);
+ break;
+
+ case Intrinsic::x86_avx_vperm2f128_pd_256:
+ case Intrinsic::x86_avx_vperm2f128_ps_256:
+ case Intrinsic::x86_avx_vperm2f128_si_256:
+ case Intrinsic::x86_avx2_vperm2i128:
+ if (Value *V = simplifyX86vperm2(*II, *Builder))
+ return replaceInstUsesWith(*II, V);
+ break;
+
+ case Intrinsic::x86_avx_maskload_ps:
+ case Intrinsic::x86_avx_maskload_pd:
+ case Intrinsic::x86_avx_maskload_ps_256:
+ case Intrinsic::x86_avx_maskload_pd_256:
+ case Intrinsic::x86_avx2_maskload_d:
+ case Intrinsic::x86_avx2_maskload_q:
+ case Intrinsic::x86_avx2_maskload_d_256:
+ case Intrinsic::x86_avx2_maskload_q_256:
+ if (Instruction *I = simplifyX86MaskedLoad(*II, *this))
+ return I;
+ break;
+
+ case Intrinsic::x86_sse2_maskmov_dqu:
+ case Intrinsic::x86_avx_maskstore_ps:
+ case Intrinsic::x86_avx_maskstore_pd:
+ case Intrinsic::x86_avx_maskstore_ps_256:
+ case Intrinsic::x86_avx_maskstore_pd_256:
+ case Intrinsic::x86_avx2_maskstore_d:
+ case Intrinsic::x86_avx2_maskstore_q:
+ case Intrinsic::x86_avx2_maskstore_d_256:
+ case Intrinsic::x86_avx2_maskstore_q_256:
+ if (simplifyX86MaskedStore(*II, *this))
+ return nullptr;
+ break;
+
+ case Intrinsic::x86_xop_vpcomb:
+ case Intrinsic::x86_xop_vpcomd:
+ case Intrinsic::x86_xop_vpcomq:
+ case Intrinsic::x86_xop_vpcomw:
+ if (Value *V = simplifyX86vpcom(*II, *Builder, true))
+ return replaceInstUsesWith(*II, V);
+ break;
+
+ case Intrinsic::x86_xop_vpcomub:
+ case Intrinsic::x86_xop_vpcomud:
+ case Intrinsic::x86_xop_vpcomuq:
+ case Intrinsic::x86_xop_vpcomuw:
+ if (Value *V = simplifyX86vpcom(*II, *Builder, false))
+ return replaceInstUsesWith(*II, V);
+ break;
+
+ case Intrinsic::ppc_altivec_vperm:
+ // Turn vperm(V1,V2,mask) -> shuffle(V1,V2,mask) if mask is a constant.
+ // Note that ppc_altivec_vperm has a big-endian bias, so when creating
+ // a vectorshuffle for little endian, we must undo the transformation
+ // performed on vec_perm in altivec.h. That is, we must complement
+ // the permutation mask with respect to 31 and reverse the order of
+ // V1 and V2.
+ if (Constant *Mask = dyn_cast<Constant>(II->getArgOperand(2))) {
+ assert(Mask->getType()->getVectorNumElements() == 16 &&
+ "Bad type for intrinsic!");
+
+ // Check that all of the elements are integer constants or undefs.
+ bool AllEltsOk = true;
+ for (unsigned i = 0; i != 16; ++i) {
+ Constant *Elt = Mask->getAggregateElement(i);
+ if (!Elt || !(isa<ConstantInt>(Elt) || isa<UndefValue>(Elt))) {
+ AllEltsOk = false;
+ break;
+ }
+ }
+
+ if (AllEltsOk) {
+ // Cast the input vectors to byte vectors.
+ Value *Op0 = Builder->CreateBitCast(II->getArgOperand(0),
+ Mask->getType());
+ Value *Op1 = Builder->CreateBitCast(II->getArgOperand(1),
+ Mask->getType());
+ Value *Result = UndefValue::get(Op0->getType());
+
+ // Only extract each element once.
+ Value *ExtractedElts[32];
+ memset(ExtractedElts, 0, sizeof(ExtractedElts));
+
+ for (unsigned i = 0; i != 16; ++i) {
+ if (isa<UndefValue>(Mask->getAggregateElement(i)))
+ continue;
+ unsigned Idx =
+ cast<ConstantInt>(Mask->getAggregateElement(i))->getZExtValue();
+ Idx &= 31; // Match the hardware behavior.
+ if (DL.isLittleEndian())
+ Idx = 31 - Idx;
+
+ if (!ExtractedElts[Idx]) {
+ Value *Op0ToUse = (DL.isLittleEndian()) ? Op1 : Op0;
+ Value *Op1ToUse = (DL.isLittleEndian()) ? Op0 : Op1;
+ ExtractedElts[Idx] =
+ Builder->CreateExtractElement(Idx < 16 ? Op0ToUse : Op1ToUse,
+ Builder->getInt32(Idx&15));
+ }
+
+ // Insert this value into the result vector.
+ Result = Builder->CreateInsertElement(Result, ExtractedElts[Idx],
+ Builder->getInt32(i));
+ }
+ return CastInst::Create(Instruction::BitCast, Result, CI.getType());
+ }
+ }
+ break;
+
+ case Intrinsic::arm_neon_vld1:
+ case Intrinsic::arm_neon_vld2:
+ case Intrinsic::arm_neon_vld3:
+ case Intrinsic::arm_neon_vld4:
+ case Intrinsic::arm_neon_vld2lane:
+ case Intrinsic::arm_neon_vld3lane:
+ case Intrinsic::arm_neon_vld4lane:
+ case Intrinsic::arm_neon_vst1:
+ case Intrinsic::arm_neon_vst2:
+ case Intrinsic::arm_neon_vst3:
+ case Intrinsic::arm_neon_vst4:
+ case Intrinsic::arm_neon_vst2lane:
+ case Intrinsic::arm_neon_vst3lane:
+ case Intrinsic::arm_neon_vst4lane: {
+ unsigned MemAlign = getKnownAlignment(II->getArgOperand(0), DL, II, AC, DT);
+ unsigned AlignArg = II->getNumArgOperands() - 1;
+ ConstantInt *IntrAlign = dyn_cast<ConstantInt>(II->getArgOperand(AlignArg));
+ if (IntrAlign && IntrAlign->getZExtValue() < MemAlign) {
+ II->setArgOperand(AlignArg,
+ ConstantInt::get(Type::getInt32Ty(II->getContext()),
+ MemAlign, false));
+ return II;
+ }
+ break;
+ }
+
+ case Intrinsic::arm_neon_vmulls:
+ case Intrinsic::arm_neon_vmullu:
+ case Intrinsic::aarch64_neon_smull:
+ case Intrinsic::aarch64_neon_umull: {
+ Value *Arg0 = II->getArgOperand(0);
+ Value *Arg1 = II->getArgOperand(1);
+
+ // Handle mul by zero first:
+ if (isa<ConstantAggregateZero>(Arg0) || isa<ConstantAggregateZero>(Arg1)) {
+ return replaceInstUsesWith(CI, ConstantAggregateZero::get(II->getType()));
+ }
+
+ // Check for constant LHS & RHS - in this case we just simplify.
+ bool Zext = (II->getIntrinsicID() == Intrinsic::arm_neon_vmullu ||
+ II->getIntrinsicID() == Intrinsic::aarch64_neon_umull);
+ VectorType *NewVT = cast<VectorType>(II->getType());
+ if (Constant *CV0 = dyn_cast<Constant>(Arg0)) {
+ if (Constant *CV1 = dyn_cast<Constant>(Arg1)) {
+ CV0 = ConstantExpr::getIntegerCast(CV0, NewVT, /*isSigned=*/!Zext);
+ CV1 = ConstantExpr::getIntegerCast(CV1, NewVT, /*isSigned=*/!Zext);
+
+ return replaceInstUsesWith(CI, ConstantExpr::getMul(CV0, CV1));
+ }
+
+ // Couldn't simplify - canonicalize constant to the RHS.
+ std::swap(Arg0, Arg1);
+ }
+
+ // Handle mul by one:
+ if (Constant *CV1 = dyn_cast<Constant>(Arg1))
+ if (ConstantInt *Splat =
+ dyn_cast_or_null<ConstantInt>(CV1->getSplatValue()))
+ if (Splat->isOne())
+ return CastInst::CreateIntegerCast(Arg0, II->getType(),
+ /*isSigned=*/!Zext);
+
+ break;
+ }
+
+ case Intrinsic::amdgcn_rcp: {
+ if (const ConstantFP *C = dyn_cast<ConstantFP>(II->getArgOperand(0))) {
+ const APFloat &ArgVal = C->getValueAPF();
+ APFloat Val(ArgVal.getSemantics(), 1.0);
+ APFloat::opStatus Status = Val.divide(ArgVal,
+ APFloat::rmNearestTiesToEven);
+ // Only do this if it was exact and therefore not dependent on the
+ // rounding mode.
+ if (Status == APFloat::opOK)
+ return replaceInstUsesWith(CI, ConstantFP::get(II->getContext(), Val));
+ }
+
+ break;
+ }
+ case Intrinsic::amdgcn_frexp_mant:
+ case Intrinsic::amdgcn_frexp_exp: {
+ Value *Src = II->getArgOperand(0);
+ if (const ConstantFP *C = dyn_cast<ConstantFP>(Src)) {
+ int Exp;
+ APFloat Significand = frexp(C->getValueAPF(), Exp,
+ APFloat::rmNearestTiesToEven);
+
+ if (II->getIntrinsicID() == Intrinsic::amdgcn_frexp_mant) {
+ return replaceInstUsesWith(CI, ConstantFP::get(II->getContext(),
+ Significand));
+ }
+
+ // Match instruction special case behavior.
+ if (Exp == APFloat::IEK_NaN || Exp == APFloat::IEK_Inf)
+ Exp = 0;
+
+ return replaceInstUsesWith(CI, ConstantInt::get(II->getType(), Exp));
+ }
+
+ if (isa<UndefValue>(Src))
+ return replaceInstUsesWith(CI, UndefValue::get(II->getType()));
+
+ break;
+ }
+ case Intrinsic::stackrestore: {
+ // If the save is right next to the restore, remove the restore. This can
+ // happen when variable allocas are DCE'd.
+ if (IntrinsicInst *SS = dyn_cast<IntrinsicInst>(II->getArgOperand(0))) {
+ if (SS->getIntrinsicID() == Intrinsic::stacksave) {
+ if (&*++SS->getIterator() == II)
+ return eraseInstFromFunction(CI);
+ }
+ }
+
+ // Scan down this block to see if there is another stack restore in the
+ // same block without an intervening call/alloca.
+ BasicBlock::iterator BI(II);
+ TerminatorInst *TI = II->getParent()->getTerminator();
+ bool CannotRemove = false;
+ for (++BI; &*BI != TI; ++BI) {
+ if (isa<AllocaInst>(BI)) {
+ CannotRemove = true;
+ break;
+ }
+ if (CallInst *BCI = dyn_cast<CallInst>(BI)) {
+ if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(BCI)) {
+ // If there is a stackrestore below this one, remove this one.
+ if (II->getIntrinsicID() == Intrinsic::stackrestore)
+ return eraseInstFromFunction(CI);
+
+ // Bail if we cross over an intrinsic with side effects, such as
+ // llvm.stacksave, llvm.read_register, or llvm.setjmp.
+ if (II->mayHaveSideEffects()) {
+ CannotRemove = true;
+ break;
+ }
+ } else {
+ // If we found a non-intrinsic call, we can't remove the stack
+ // restore.
+ CannotRemove = true;
+ break;
+ }
+ }
+ }
+
+ // If the stack restore is in a return, resume, or unwind block and if there
+ // are no allocas or calls between the restore and the return, nuke the
+ // restore.
+ if (!CannotRemove && (isa<ReturnInst>(TI) || isa<ResumeInst>(TI)))
+ return eraseInstFromFunction(CI);
+ break;
+ }
+ case Intrinsic::lifetime_start:
+ if (removeTriviallyEmptyRange(*II, Intrinsic::lifetime_start,
+ Intrinsic::lifetime_end, *this))
+ return nullptr;
+ break;
+ case Intrinsic::assume: {
+ Value *IIOperand = II->getArgOperand(0);
+ // Remove an assume if it is immediately followed by an identical assume.
+ if (match(II->getNextNode(),
+ m_Intrinsic<Intrinsic::assume>(m_Specific(IIOperand))))
+ return eraseInstFromFunction(CI);
+
+ // Canonicalize assume(a && b) -> assume(a); assume(b);
+ // Note: New assumption intrinsics created here are registered by
+ // the InstCombineIRInserter object.
+ Value *AssumeIntrinsic = II->getCalledValue(), *A, *B;
+ if (match(IIOperand, m_And(m_Value(A), m_Value(B)))) {
+ Builder->CreateCall(AssumeIntrinsic, A, II->getName());
+ Builder->CreateCall(AssumeIntrinsic, B, II->getName());
+ return eraseInstFromFunction(*II);
+ }
+ // assume(!(a || b)) -> assume(!a); assume(!b);
+ if (match(IIOperand, m_Not(m_Or(m_Value(A), m_Value(B))))) {
+ Builder->CreateCall(AssumeIntrinsic, Builder->CreateNot(A),
+ II->getName());
+ Builder->CreateCall(AssumeIntrinsic, Builder->CreateNot(B),
+ II->getName());
+ return eraseInstFromFunction(*II);
+ }
+
+ // assume( (load addr) != null ) -> add 'nonnull' metadata to load
+ // (if assume is valid at the load)
+ if (ICmpInst* ICmp = dyn_cast<ICmpInst>(IIOperand)) {
+ Value *LHS = ICmp->getOperand(0);
+ Value *RHS = ICmp->getOperand(1);
+ if (ICmpInst::ICMP_NE == ICmp->getPredicate() &&
+ isa<LoadInst>(LHS) &&
+ isa<Constant>(RHS) &&
+ RHS->getType()->isPointerTy() &&
+ cast<Constant>(RHS)->isNullValue()) {
+ LoadInst* LI = cast<LoadInst>(LHS);
+ if (isValidAssumeForContext(II, LI, DT)) {
+ MDNode *MD = MDNode::get(II->getContext(), None);
+ LI->setMetadata(LLVMContext::MD_nonnull, MD);
+ return eraseInstFromFunction(*II);
+ }
+ }
+ // TODO: apply nonnull return attributes to calls and invokes
+ // TODO: apply range metadata for range check patterns?
+ }
+ // If there is a dominating assume with the same condition as this one,
+ // then this one is redundant, and should be removed.
+ APInt KnownZero(1, 0), KnownOne(1, 0);
+ computeKnownBits(IIOperand, KnownZero, KnownOne, 0, II);
+ if (KnownOne.isAllOnesValue())
+ return eraseInstFromFunction(*II);
+
+ break;
+ }
+ case Intrinsic::experimental_gc_relocate: {
+ // Translate facts known about a pointer before relocating into
+ // facts about the relocate value, while being careful to
+ // preserve relocation semantics.
+ Value *DerivedPtr = cast<GCRelocateInst>(II)->getDerivedPtr();
+
+ // Remove the relocation if unused, note that this check is required
+ // to prevent the cases below from looping forever.
+ if (II->use_empty())
+ return eraseInstFromFunction(*II);
+
+ // Undef is undef, even after relocation.
+ // TODO: provide a hook for this in GCStrategy. This is clearly legal for
+ // most practical collectors, but there was discussion in the review thread
+ // about whether it was legal for all possible collectors.
+ if (isa<UndefValue>(DerivedPtr))
+ // Use undef of gc_relocate's type to replace it.
+ return replaceInstUsesWith(*II, UndefValue::get(II->getType()));
+
+ if (auto *PT = dyn_cast<PointerType>(II->getType())) {
+ // The relocation of null will be null for most any collector.
+ // TODO: provide a hook for this in GCStrategy. There might be some
+ // weird collector this property does not hold for.
+ if (isa<ConstantPointerNull>(DerivedPtr))
+ // Use null-pointer of gc_relocate's type to replace it.
+ return replaceInstUsesWith(*II, ConstantPointerNull::get(PT));
+
+ // isKnownNonNull -> nonnull attribute
+ if (isKnownNonNullAt(DerivedPtr, II, DT))
+ II->addAttribute(AttributeSet::ReturnIndex, Attribute::NonNull);
+ }
+
+ // TODO: bitcast(relocate(p)) -> relocate(bitcast(p))
+ // Canonicalize on the type from the uses to the defs
+
+ // TODO: relocate((gep p, C, C2, ...)) -> gep(relocate(p), C, C2, ...)
+ break;
+ }
+ }
+
+ return visitCallSite(II);
+}
+
+// InvokeInst simplification
+//
+Instruction *InstCombiner::visitInvokeInst(InvokeInst &II) {
+ return visitCallSite(&II);
+}
+
+/// If this cast does not affect the value passed through the varargs area, we
+/// can eliminate the use of the cast.
+static bool isSafeToEliminateVarargsCast(const CallSite CS,
+ const DataLayout &DL,
+ const CastInst *const CI,
+ const int ix) {
+ if (!CI->isLosslessCast())
+ return false;
+
+ // If this is a GC intrinsic, avoid munging types. We need types for
+ // statepoint reconstruction in SelectionDAG.
+ // TODO: This is probably something which should be expanded to all
+ // intrinsics since the entire point of intrinsics is that
+ // they are understandable by the optimizer.
+ if (isStatepoint(CS) || isGCRelocate(CS) || isGCResult(CS))
+ return false;
+
+ // The size of ByVal or InAlloca arguments is derived from the type, so we
+ // can't change to a type with a different size. If the size were
+ // passed explicitly we could avoid this check.
+ if (!CS.isByValOrInAllocaArgument(ix))
+ return true;
+
+ Type* SrcTy =
+ cast<PointerType>(CI->getOperand(0)->getType())->getElementType();
+ Type* DstTy = cast<PointerType>(CI->getType())->getElementType();
+ if (!SrcTy->isSized() || !DstTy->isSized())
+ return false;
+ if (DL.getTypeAllocSize(SrcTy) != DL.getTypeAllocSize(DstTy))
+ return false;
+ return true;
+}
+
+Instruction *InstCombiner::tryOptimizeCall(CallInst *CI) {
+ if (!CI->getCalledFunction()) return nullptr;
+
+ auto InstCombineRAUW = [this](Instruction *From, Value *With) {
+ replaceInstUsesWith(*From, With);
+ };
+ LibCallSimplifier Simplifier(DL, TLI, InstCombineRAUW);
+ if (Value *With = Simplifier.optimizeCall(CI)) {
+ ++NumSimplified;
+ return CI->use_empty() ? CI : replaceInstUsesWith(*CI, With);
+ }
+
+ return nullptr;
+}
+
+static IntrinsicInst *findInitTrampolineFromAlloca(Value *TrampMem) {
+ // Strip off at most one level of pointer casts, looking for an alloca. This
+ // is good enough in practice and simpler than handling any number of casts.
+ Value *Underlying = TrampMem->stripPointerCasts();
+ if (Underlying != TrampMem &&
+ (!Underlying->hasOneUse() || Underlying->user_back() != TrampMem))
+ return nullptr;
+ if (!isa<AllocaInst>(Underlying))
+ return nullptr;
+
+ IntrinsicInst *InitTrampoline = nullptr;
+ for (User *U : TrampMem->users()) {
+ IntrinsicInst *II = dyn_cast<IntrinsicInst>(U);
+ if (!II)
+ return nullptr;
+ if (II->getIntrinsicID() == Intrinsic::init_trampoline) {
+ if (InitTrampoline)
+ // More than one init_trampoline writes to this value. Give up.
+ return nullptr;
+ InitTrampoline = II;
+ continue;
+ }
+ if (II->getIntrinsicID() == Intrinsic::adjust_trampoline)
+ // Allow any number of calls to adjust.trampoline.
+ continue;
+ return nullptr;
+ }
+
+ // No call to init.trampoline found.
+ if (!InitTrampoline)
+ return nullptr;
+
+ // Check that the alloca is being used in the expected way.
+ if (InitTrampoline->getOperand(0) != TrampMem)
+ return nullptr;
+
+ return InitTrampoline;
+}
+
+static IntrinsicInst *findInitTrampolineFromBB(IntrinsicInst *AdjustTramp,
+ Value *TrampMem) {
+ // Visit all the previous instructions in the basic block, and try to find a
+ // init.trampoline which has a direct path to the adjust.trampoline.
+ for (BasicBlock::iterator I = AdjustTramp->getIterator(),
+ E = AdjustTramp->getParent()->begin();
+ I != E;) {
+ Instruction *Inst = &*--I;
+ if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I))
+ if (II->getIntrinsicID() == Intrinsic::init_trampoline &&
+ II->getOperand(0) == TrampMem)
+ return II;
+ if (Inst->mayWriteToMemory())
+ return nullptr;
+ }
+ return nullptr;
+}
+
+// Given a call to llvm.adjust.trampoline, find and return the corresponding
+// call to llvm.init.trampoline if the call to the trampoline can be optimized
+// to a direct call to a function. Otherwise return NULL.
+//
+static IntrinsicInst *findInitTrampoline(Value *Callee) {
+ Callee = Callee->stripPointerCasts();
+ IntrinsicInst *AdjustTramp = dyn_cast<IntrinsicInst>(Callee);
+ if (!AdjustTramp ||
+ AdjustTramp->getIntrinsicID() != Intrinsic::adjust_trampoline)
+ return nullptr;
+
+ Value *TrampMem = AdjustTramp->getOperand(0);
+
+ if (IntrinsicInst *IT = findInitTrampolineFromAlloca(TrampMem))
+ return IT;
+ if (IntrinsicInst *IT = findInitTrampolineFromBB(AdjustTramp, TrampMem))
+ return IT;
+ return nullptr;
+}
+
+/// Improvements for call and invoke instructions.
+Instruction *InstCombiner::visitCallSite(CallSite CS) {
+
+ if (isAllocLikeFn(CS.getInstruction(), TLI))
+ return visitAllocSite(*CS.getInstruction());
+
+ bool Changed = false;
+
+ // Mark any parameters that are known to be non-null with the nonnull
+ // attribute. This is helpful for inlining calls to functions with null
+ // checks on their arguments.
+ SmallVector<unsigned, 4> Indices;
+ unsigned ArgNo = 0;
+
+ for (Value *V : CS.args()) {
+ if (V->getType()->isPointerTy() &&
+ !CS.paramHasAttr(ArgNo + 1, Attribute::NonNull) &&
+ isKnownNonNullAt(V, CS.getInstruction(), DT))
+ Indices.push_back(ArgNo + 1);
+ ArgNo++;
+ }
+
+ assert(ArgNo == CS.arg_size() && "sanity check");
+
+ if (!Indices.empty()) {
+ AttributeSet AS = CS.getAttributes();
+ LLVMContext &Ctx = CS.getInstruction()->getContext();
+ AS = AS.addAttribute(Ctx, Indices,
+ Attribute::get(Ctx, Attribute::NonNull));
+ CS.setAttributes(AS);
+ Changed = true;
+ }
+
+ // If the callee is a pointer to a function, attempt to move any casts to the
+ // arguments of the call/invoke.
+ Value *Callee = CS.getCalledValue();
+ if (!isa<Function>(Callee) && transformConstExprCastCall(CS))
+ return nullptr;
+
+ if (Function *CalleeF = dyn_cast<Function>(Callee)) {
+ // Remove the convergent attr on calls when the callee is not convergent.
+ if (CS.isConvergent() && !CalleeF->isConvergent() &&
+ !CalleeF->isIntrinsic()) {
+ DEBUG(dbgs() << "Removing convergent attr from instr "
+ << CS.getInstruction() << "\n");
+ CS.setNotConvergent();
+ return CS.getInstruction();
+ }
+
+ // If the call and callee calling conventions don't match, this call must
+ // be unreachable, as the call is undefined.
+ if (CalleeF->getCallingConv() != CS.getCallingConv() &&
+ // Only do this for calls to a function with a body. A prototype may
+ // not actually end up matching the implementation's calling conv for a
+ // variety of reasons (e.g. it may be written in assembly).
+ !CalleeF->isDeclaration()) {
+ Instruction *OldCall = CS.getInstruction();
+ new StoreInst(ConstantInt::getTrue(Callee->getContext()),
+ UndefValue::get(Type::getInt1PtrTy(Callee->getContext())),
+ OldCall);
+ // If OldCall does not return void then replaceAllUsesWith undef.
+ // This allows ValueHandlers and custom metadata to adjust itself.
+ if (!OldCall->getType()->isVoidTy())
+ replaceInstUsesWith(*OldCall, UndefValue::get(OldCall->getType()));
+ if (isa<CallInst>(OldCall))
+ return eraseInstFromFunction(*OldCall);
+
+ // We cannot remove an invoke, because it would change the CFG, just
+ // change the callee to a null pointer.
+ cast<InvokeInst>(OldCall)->setCalledFunction(
+ Constant::getNullValue(CalleeF->getType()));
+ return nullptr;
+ }
+ }
+
+ if (isa<ConstantPointerNull>(Callee) || isa<UndefValue>(Callee)) {
+ // If CS does not return void then replaceAllUsesWith undef.
+ // This allows ValueHandlers and custom metadata to adjust itself.
+ if (!CS.getInstruction()->getType()->isVoidTy())
+ replaceInstUsesWith(*CS.getInstruction(),
+ UndefValue::get(CS.getInstruction()->getType()));
+
+ if (isa<InvokeInst>(CS.getInstruction())) {
+ // Can't remove an invoke because we cannot change the CFG.
+ return nullptr;
+ }
+
+ // This instruction is not reachable, just remove it. We insert a store to
+ // undef so that we know that this code is not reachable, despite the fact
+ // that we can't modify the CFG here.
+ new StoreInst(ConstantInt::getTrue(Callee->getContext()),
+ UndefValue::get(Type::getInt1PtrTy(Callee->getContext())),
+ CS.getInstruction());
+
+ return eraseInstFromFunction(*CS.getInstruction());
+ }
+
+ if (IntrinsicInst *II = findInitTrampoline(Callee))
+ return transformCallThroughTrampoline(CS, II);
+
+ PointerType *PTy = cast<PointerType>(Callee->getType());
+ FunctionType *FTy = cast<FunctionType>(PTy->getElementType());
+ if (FTy->isVarArg()) {
+ int ix = FTy->getNumParams();
+ // See if we can optimize any arguments passed through the varargs area of
+ // the call.
+ for (CallSite::arg_iterator I = CS.arg_begin() + FTy->getNumParams(),
+ E = CS.arg_end(); I != E; ++I, ++ix) {
+ CastInst *CI = dyn_cast<CastInst>(*I);
+ if (CI && isSafeToEliminateVarargsCast(CS, DL, CI, ix)) {
+ *I = CI->getOperand(0);
+ Changed = true;
+ }
+ }
+ }
+
+ if (isa<InlineAsm>(Callee) && !CS.doesNotThrow()) {
+ // Inline asm calls cannot throw - mark them 'nounwind'.
+ CS.setDoesNotThrow();
+ Changed = true;
+ }
+
+ // Try to optimize the call if possible, we require DataLayout for most of
+ // this. None of these calls are seen as possibly dead so go ahead and
+ // delete the instruction now.
+ if (CallInst *CI = dyn_cast<CallInst>(CS.getInstruction())) {
+ Instruction *I = tryOptimizeCall(CI);
+ // If we changed something return the result, etc. Otherwise let
+ // the fallthrough check.
+ if (I) return eraseInstFromFunction(*I);
+ }
+
+ return Changed ? CS.getInstruction() : nullptr;
+}
+
+/// If the callee is a constexpr cast of a function, attempt to move the cast to
+/// the arguments of the call/invoke.
+bool InstCombiner::transformConstExprCastCall(CallSite CS) {
+ Function *Callee =
+ dyn_cast<Function>(CS.getCalledValue()->stripPointerCasts());
+ if (!Callee)
+ return false;
+ // The prototype of thunks are a lie, don't try to directly call such
+ // functions.
+ if (Callee->hasFnAttribute("thunk"))
+ return false;
+ Instruction *Caller = CS.getInstruction();
+ const AttributeSet &CallerPAL = CS.getAttributes();
+
+ // Okay, this is a cast from a function to a different type. Unless doing so
+ // would cause a type conversion of one of our arguments, change this call to
+ // be a direct call with arguments casted to the appropriate types.
+ //
+ FunctionType *FT = Callee->getFunctionType();
+ Type *OldRetTy = Caller->getType();
+ Type *NewRetTy = FT->getReturnType();
+
+ // Check to see if we are changing the return type...
+ if (OldRetTy != NewRetTy) {
+
+ if (NewRetTy->isStructTy())
+ return false; // TODO: Handle multiple return values.
+
+ if (!CastInst::isBitOrNoopPointerCastable(NewRetTy, OldRetTy, DL)) {
+ if (Callee->isDeclaration())
+ return false; // Cannot transform this return value.
+
+ if (!Caller->use_empty() &&
+ // void -> non-void is handled specially
+ !NewRetTy->isVoidTy())
+ return false; // Cannot transform this return value.
+ }
+
+ if (!CallerPAL.isEmpty() && !Caller->use_empty()) {
+ AttrBuilder RAttrs(CallerPAL, AttributeSet::ReturnIndex);
+ if (RAttrs.overlaps(AttributeFuncs::typeIncompatible(NewRetTy)))
+ return false; // Attribute not compatible with transformed value.
+ }
+
+ // If the callsite is an invoke instruction, and the return value is used by
+ // a PHI node in a successor, we cannot change the return type of the call
+ // because there is no place to put the cast instruction (without breaking
+ // the critical edge). Bail out in this case.
+ if (!Caller->use_empty())
+ if (InvokeInst *II = dyn_cast<InvokeInst>(Caller))
+ for (User *U : II->users())
+ if (PHINode *PN = dyn_cast<PHINode>(U))
+ if (PN->getParent() == II->getNormalDest() ||
+ PN->getParent() == II->getUnwindDest())
+ return false;
+ }
+
+ unsigned NumActualArgs = CS.arg_size();
+ unsigned NumCommonArgs = std::min(FT->getNumParams(), NumActualArgs);
+
+ // Prevent us turning:
+ // declare void @takes_i32_inalloca(i32* inalloca)
+ // call void bitcast (void (i32*)* @takes_i32_inalloca to void (i32)*)(i32 0)
+ //
+ // into:
+ // call void @takes_i32_inalloca(i32* null)
+ //
+ // Similarly, avoid folding away bitcasts of byval calls.
+ if (Callee->getAttributes().hasAttrSomewhere(Attribute::InAlloca) ||
+ Callee->getAttributes().hasAttrSomewhere(Attribute::ByVal))
+ return false;
+
+ CallSite::arg_iterator AI = CS.arg_begin();
+ for (unsigned i = 0, e = NumCommonArgs; i != e; ++i, ++AI) {
+ Type *ParamTy = FT->getParamType(i);
+ Type *ActTy = (*AI)->getType();
+
+ if (!CastInst::isBitOrNoopPointerCastable(ActTy, ParamTy, DL))
+ return false; // Cannot transform this parameter value.
+
+ if (AttrBuilder(CallerPAL.getParamAttributes(i + 1), i + 1).
+ overlaps(AttributeFuncs::typeIncompatible(ParamTy)))
+ return false; // Attribute not compatible with transformed value.
+
+ if (CS.isInAllocaArgument(i))
+ return false; // Cannot transform to and from inalloca.
+
+ // If the parameter is passed as a byval argument, then we have to have a
+ // sized type and the sized type has to have the same size as the old type.
+ if (ParamTy != ActTy &&
+ CallerPAL.getParamAttributes(i + 1).hasAttribute(i + 1,
+ Attribute::ByVal)) {
+ PointerType *ParamPTy = dyn_cast<PointerType>(ParamTy);
+ if (!ParamPTy || !ParamPTy->getElementType()->isSized())
+ return false;
+
+ Type *CurElTy = ActTy->getPointerElementType();
+ if (DL.getTypeAllocSize(CurElTy) !=
+ DL.getTypeAllocSize(ParamPTy->getElementType()))
+ return false;
+ }
+ }
+
+ if (Callee->isDeclaration()) {
+ // Do not delete arguments unless we have a function body.
+ if (FT->getNumParams() < NumActualArgs && !FT->isVarArg())
+ return false;
+
+ // If the callee is just a declaration, don't change the varargsness of the
+ // call. We don't want to introduce a varargs call where one doesn't
+ // already exist.
+ PointerType *APTy = cast<PointerType>(CS.getCalledValue()->getType());
+ if (FT->isVarArg()!=cast<FunctionType>(APTy->getElementType())->isVarArg())
+ return false;
+
+ // If both the callee and the cast type are varargs, we still have to make
+ // sure the number of fixed parameters are the same or we have the same
+ // ABI issues as if we introduce a varargs call.
+ if (FT->isVarArg() &&
+ cast<FunctionType>(APTy->getElementType())->isVarArg() &&
+ FT->getNumParams() !=
+ cast<FunctionType>(APTy->getElementType())->getNumParams())
+ return false;
+ }
+
+ if (FT->getNumParams() < NumActualArgs && FT->isVarArg() &&
+ !CallerPAL.isEmpty())
+ // In this case we have more arguments than the new function type, but we
+ // won't be dropping them. Check that these extra arguments have attributes
+ // that are compatible with being a vararg call argument.
+ for (unsigned i = CallerPAL.getNumSlots(); i; --i) {
+ unsigned Index = CallerPAL.getSlotIndex(i - 1);
+ if (Index <= FT->getNumParams())
+ break;
+
+ // Check if it has an attribute that's incompatible with varargs.
+ AttributeSet PAttrs = CallerPAL.getSlotAttributes(i - 1);
+ if (PAttrs.hasAttribute(Index, Attribute::StructRet))
+ return false;
+ }
+
+
+ // Okay, we decided that this is a safe thing to do: go ahead and start
+ // inserting cast instructions as necessary.
+ std::vector<Value*> Args;
+ Args.reserve(NumActualArgs);
+ SmallVector<AttributeSet, 8> attrVec;
+ attrVec.reserve(NumCommonArgs);
+
+ // Get any return attributes.
+ AttrBuilder RAttrs(CallerPAL, AttributeSet::ReturnIndex);
+
+ // If the return value is not being used, the type may not be compatible
+ // with the existing attributes. Wipe out any problematic attributes.
+ RAttrs.remove(AttributeFuncs::typeIncompatible(NewRetTy));
+
+ // Add the new return attributes.
+ if (RAttrs.hasAttributes())
+ attrVec.push_back(AttributeSet::get(Caller->getContext(),
+ AttributeSet::ReturnIndex, RAttrs));
+
+ AI = CS.arg_begin();
+ for (unsigned i = 0; i != NumCommonArgs; ++i, ++AI) {
+ Type *ParamTy = FT->getParamType(i);
+
+ if ((*AI)->getType() == ParamTy) {
+ Args.push_back(*AI);
+ } else {
+ Args.push_back(Builder->CreateBitOrPointerCast(*AI, ParamTy));
+ }
+
+ // Add any parameter attributes.
+ AttrBuilder PAttrs(CallerPAL.getParamAttributes(i + 1), i + 1);
+ if (PAttrs.hasAttributes())
+ attrVec.push_back(AttributeSet::get(Caller->getContext(), i + 1,
+ PAttrs));
+ }
+
+ // If the function takes more arguments than the call was taking, add them
+ // now.
+ for (unsigned i = NumCommonArgs; i != FT->getNumParams(); ++i)
+ Args.push_back(Constant::getNullValue(FT->getParamType(i)));
+
+ // If we are removing arguments to the function, emit an obnoxious warning.
+ if (FT->getNumParams() < NumActualArgs) {
+ // TODO: if (!FT->isVarArg()) this call may be unreachable. PR14722
+ if (FT->isVarArg()) {
+ // Add all of the arguments in their promoted form to the arg list.
+ for (unsigned i = FT->getNumParams(); i != NumActualArgs; ++i, ++AI) {
+ Type *PTy = getPromotedType((*AI)->getType());
+ if (PTy != (*AI)->getType()) {
+ // Must promote to pass through va_arg area!
+ Instruction::CastOps opcode =
+ CastInst::getCastOpcode(*AI, false, PTy, false);
+ Args.push_back(Builder->CreateCast(opcode, *AI, PTy));
+ } else {
+ Args.push_back(*AI);
+ }
+
+ // Add any parameter attributes.
+ AttrBuilder PAttrs(CallerPAL.getParamAttributes(i + 1), i + 1);
+ if (PAttrs.hasAttributes())
+ attrVec.push_back(AttributeSet::get(FT->getContext(), i + 1,
+ PAttrs));
+ }
+ }
+ }
+
+ AttributeSet FnAttrs = CallerPAL.getFnAttributes();
+ if (CallerPAL.hasAttributes(AttributeSet::FunctionIndex))
+ attrVec.push_back(AttributeSet::get(Callee->getContext(), FnAttrs));
+
+ if (NewRetTy->isVoidTy())
+ Caller->setName(""); // Void type should not have a name.
+
+ const AttributeSet &NewCallerPAL = AttributeSet::get(Callee->getContext(),
+ attrVec);
+
+ SmallVector<OperandBundleDef, 1> OpBundles;
+ CS.getOperandBundlesAsDefs(OpBundles);
+
+ Instruction *NC;
+ if (InvokeInst *II = dyn_cast<InvokeInst>(Caller)) {
+ NC = Builder->CreateInvoke(Callee, II->getNormalDest(), II->getUnwindDest(),
+ Args, OpBundles);
+ NC->takeName(II);
+ cast<InvokeInst>(NC)->setCallingConv(II->getCallingConv());
+ cast<InvokeInst>(NC)->setAttributes(NewCallerPAL);
+ } else {
+ CallInst *CI = cast<CallInst>(Caller);
+ NC = Builder->CreateCall(Callee, Args, OpBundles);
+ NC->takeName(CI);
+ if (CI->isTailCall())
+ cast<CallInst>(NC)->setTailCall();
+ cast<CallInst>(NC)->setCallingConv(CI->getCallingConv());
+ cast<CallInst>(NC)->setAttributes(NewCallerPAL);
+ }
+
+ // Insert a cast of the return type as necessary.
+ Value *NV = NC;
+ if (OldRetTy != NV->getType() && !Caller->use_empty()) {
+ if (!NV->getType()->isVoidTy()) {
+ NV = NC = CastInst::CreateBitOrPointerCast(NC, OldRetTy);
+ NC->setDebugLoc(Caller->getDebugLoc());
+
+ // If this is an invoke instruction, we should insert it after the first
+ // non-phi, instruction in the normal successor block.
+ if (InvokeInst *II = dyn_cast<InvokeInst>(Caller)) {
+ BasicBlock::iterator I = II->getNormalDest()->getFirstInsertionPt();
+ InsertNewInstBefore(NC, *I);
+ } else {
+ // Otherwise, it's a call, just insert cast right after the call.
+ InsertNewInstBefore(NC, *Caller);
+ }
+ Worklist.AddUsersToWorkList(*Caller);
+ } else {
+ NV = UndefValue::get(Caller->getType());
+ }
+ }
+
+ if (!Caller->use_empty())
+ replaceInstUsesWith(*Caller, NV);
+ else if (Caller->hasValueHandle()) {
+ if (OldRetTy == NV->getType())
+ ValueHandleBase::ValueIsRAUWd(Caller, NV);
+ else
+ // We cannot call ValueIsRAUWd with a different type, and the
+ // actual tracked value will disappear.
+ ValueHandleBase::ValueIsDeleted(Caller);
+ }
+
+ eraseInstFromFunction(*Caller);
+ return true;
+}
+
+/// Turn a call to a function created by init_trampoline / adjust_trampoline
+/// intrinsic pair into a direct call to the underlying function.
+Instruction *
+InstCombiner::transformCallThroughTrampoline(CallSite CS,
+ IntrinsicInst *Tramp) {
+ Value *Callee = CS.getCalledValue();
+ PointerType *PTy = cast<PointerType>(Callee->getType());
+ FunctionType *FTy = cast<FunctionType>(PTy->getElementType());
+ const AttributeSet &Attrs = CS.getAttributes();
+
+ // If the call already has the 'nest' attribute somewhere then give up -
+ // otherwise 'nest' would occur twice after splicing in the chain.
+ if (Attrs.hasAttrSomewhere(Attribute::Nest))
+ return nullptr;
+
+ assert(Tramp &&
+ "transformCallThroughTrampoline called with incorrect CallSite.");
+
+ Function *NestF =cast<Function>(Tramp->getArgOperand(1)->stripPointerCasts());
+ FunctionType *NestFTy = cast<FunctionType>(NestF->getValueType());
+
+ const AttributeSet &NestAttrs = NestF->getAttributes();
+ if (!NestAttrs.isEmpty()) {
+ unsigned NestIdx = 1;
+ Type *NestTy = nullptr;
+ AttributeSet NestAttr;
+
+ // Look for a parameter marked with the 'nest' attribute.
+ for (FunctionType::param_iterator I = NestFTy->param_begin(),
+ E = NestFTy->param_end(); I != E; ++NestIdx, ++I)
+ if (NestAttrs.hasAttribute(NestIdx, Attribute::Nest)) {
+ // Record the parameter type and any other attributes.
+ NestTy = *I;
+ NestAttr = NestAttrs.getParamAttributes(NestIdx);
+ break;
+ }
+
+ if (NestTy) {
+ Instruction *Caller = CS.getInstruction();
+ std::vector<Value*> NewArgs;
+ NewArgs.reserve(CS.arg_size() + 1);
+
+ SmallVector<AttributeSet, 8> NewAttrs;
+ NewAttrs.reserve(Attrs.getNumSlots() + 1);
+
+ // Insert the nest argument into the call argument list, which may
+ // mean appending it. Likewise for attributes.
+
+ // Add any result attributes.
+ if (Attrs.hasAttributes(AttributeSet::ReturnIndex))
+ NewAttrs.push_back(AttributeSet::get(Caller->getContext(),
+ Attrs.getRetAttributes()));
+
+ {
+ unsigned Idx = 1;
+ CallSite::arg_iterator I = CS.arg_begin(), E = CS.arg_end();
+ do {
+ if (Idx == NestIdx) {
+ // Add the chain argument and attributes.
+ Value *NestVal = Tramp->getArgOperand(2);
+ if (NestVal->getType() != NestTy)
+ NestVal = Builder->CreateBitCast(NestVal, NestTy, "nest");
+ NewArgs.push_back(NestVal);
+ NewAttrs.push_back(AttributeSet::get(Caller->getContext(),
+ NestAttr));
+ }
+
+ if (I == E)
+ break;
+
+ // Add the original argument and attributes.
+ NewArgs.push_back(*I);
+ AttributeSet Attr = Attrs.getParamAttributes(Idx);
+ if (Attr.hasAttributes(Idx)) {
+ AttrBuilder B(Attr, Idx);
+ NewAttrs.push_back(AttributeSet::get(Caller->getContext(),
+ Idx + (Idx >= NestIdx), B));
+ }
+
+ ++Idx;
+ ++I;
+ } while (1);
+ }
+
+ // Add any function attributes.
+ if (Attrs.hasAttributes(AttributeSet::FunctionIndex))
+ NewAttrs.push_back(AttributeSet::get(FTy->getContext(),
+ Attrs.getFnAttributes()));
+
+ // The trampoline may have been bitcast to a bogus type (FTy).
+ // Handle this by synthesizing a new function type, equal to FTy
+ // with the chain parameter inserted.
+
+ std::vector<Type*> NewTypes;
+ NewTypes.reserve(FTy->getNumParams()+1);
+
+ // Insert the chain's type into the list of parameter types, which may
+ // mean appending it.
+ {
+ unsigned Idx = 1;
+ FunctionType::param_iterator I = FTy->param_begin(),
+ E = FTy->param_end();
+
+ do {
+ if (Idx == NestIdx)
+ // Add the chain's type.
+ NewTypes.push_back(NestTy);
+
+ if (I == E)
+ break;
+
+ // Add the original type.
+ NewTypes.push_back(*I);
+
+ ++Idx;
+ ++I;
+ } while (1);
+ }
+
+ // Replace the trampoline call with a direct call. Let the generic
+ // code sort out any function type mismatches.
+ FunctionType *NewFTy = FunctionType::get(FTy->getReturnType(), NewTypes,
+ FTy->isVarArg());
+ Constant *NewCallee =
+ NestF->getType() == PointerType::getUnqual(NewFTy) ?
+ NestF : ConstantExpr::getBitCast(NestF,
+ PointerType::getUnqual(NewFTy));
+ const AttributeSet &NewPAL =
+ AttributeSet::get(FTy->getContext(), NewAttrs);
+
+ SmallVector<OperandBundleDef, 1> OpBundles;
+ CS.getOperandBundlesAsDefs(OpBundles);
+
+ Instruction *NewCaller;
+ if (InvokeInst *II = dyn_cast<InvokeInst>(Caller)) {
+ NewCaller = InvokeInst::Create(NewCallee,
+ II->getNormalDest(), II->getUnwindDest(),
+ NewArgs, OpBundles);
+ cast<InvokeInst>(NewCaller)->setCallingConv(II->getCallingConv());
+ cast<InvokeInst>(NewCaller)->setAttributes(NewPAL);
+ } else {
+ NewCaller = CallInst::Create(NewCallee, NewArgs, OpBundles);
+ if (cast<CallInst>(Caller)->isTailCall())
+ cast<CallInst>(NewCaller)->setTailCall();
+ cast<CallInst>(NewCaller)->
+ setCallingConv(cast<CallInst>(Caller)->getCallingConv());
+ cast<CallInst>(NewCaller)->setAttributes(NewPAL);
+ }
+
+ return NewCaller;
+ }
+ }
+
+ // Replace the trampoline call with a direct call. Since there is no 'nest'
+ // parameter, there is no need to adjust the argument list. Let the generic
+ // code sort out any function type mismatches.
+ Constant *NewCallee =
+ NestF->getType() == PTy ? NestF :
+ ConstantExpr::getBitCast(NestF, PTy);
+ CS.setCalledFunction(NewCallee);
+ return CS.getInstruction();
+}
diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp
new file mode 100644
index 000000000000..20556157188f
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp
@@ -0,0 +1,1944 @@
+//===- InstCombineCasts.cpp -----------------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the visit functions for cast operations.
+//
+//===----------------------------------------------------------------------===//
+
+#include "InstCombineInternal.h"
+#include "llvm/Analysis/ConstantFolding.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/PatternMatch.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
+using namespace llvm;
+using namespace PatternMatch;
+
+#define DEBUG_TYPE "instcombine"
+
+/// Analyze 'Val', seeing if it is a simple linear expression.
+/// If so, decompose it, returning some value X, such that Val is
+/// X*Scale+Offset.
+///
+static Value *decomposeSimpleLinearExpr(Value *Val, unsigned &Scale,
+ uint64_t &Offset) {
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(Val)) {
+ Offset = CI->getZExtValue();
+ Scale = 0;
+ return ConstantInt::get(Val->getType(), 0);
+ }
+
+ if (BinaryOperator *I = dyn_cast<BinaryOperator>(Val)) {
+ // Cannot look past anything that might overflow.
+ OverflowingBinaryOperator *OBI = dyn_cast<OverflowingBinaryOperator>(Val);
+ if (OBI && !OBI->hasNoUnsignedWrap() && !OBI->hasNoSignedWrap()) {
+ Scale = 1;
+ Offset = 0;
+ return Val;
+ }
+
+ if (ConstantInt *RHS = dyn_cast<ConstantInt>(I->getOperand(1))) {
+ if (I->getOpcode() == Instruction::Shl) {
+ // This is a value scaled by '1 << the shift amt'.
+ Scale = UINT64_C(1) << RHS->getZExtValue();
+ Offset = 0;
+ return I->getOperand(0);
+ }
+
+ if (I->getOpcode() == Instruction::Mul) {
+ // This value is scaled by 'RHS'.
+ Scale = RHS->getZExtValue();
+ Offset = 0;
+ return I->getOperand(0);
+ }
+
+ if (I->getOpcode() == Instruction::Add) {
+ // We have X+C. Check to see if we really have (X*C2)+C1,
+ // where C1 is divisible by C2.
+ unsigned SubScale;
+ Value *SubVal =
+ decomposeSimpleLinearExpr(I->getOperand(0), SubScale, Offset);
+ Offset += RHS->getZExtValue();
+ Scale = SubScale;
+ return SubVal;
+ }
+ }
+ }
+
+ // Otherwise, we can't look past this.
+ Scale = 1;
+ Offset = 0;
+ return Val;
+}
+
+/// If we find a cast of an allocation instruction, try to eliminate the cast by
+/// moving the type information into the alloc.
+Instruction *InstCombiner::PromoteCastOfAllocation(BitCastInst &CI,
+ AllocaInst &AI) {
+ PointerType *PTy = cast<PointerType>(CI.getType());
+
+ BuilderTy AllocaBuilder(*Builder);
+ AllocaBuilder.SetInsertPoint(&AI);
+
+ // Get the type really allocated and the type casted to.
+ Type *AllocElTy = AI.getAllocatedType();
+ Type *CastElTy = PTy->getElementType();
+ if (!AllocElTy->isSized() || !CastElTy->isSized()) return nullptr;
+
+ unsigned AllocElTyAlign = DL.getABITypeAlignment(AllocElTy);
+ unsigned CastElTyAlign = DL.getABITypeAlignment(CastElTy);
+ if (CastElTyAlign < AllocElTyAlign) return nullptr;
+
+ // If the allocation has multiple uses, only promote it if we are strictly
+ // increasing the alignment of the resultant allocation. If we keep it the
+ // same, we open the door to infinite loops of various kinds.
+ if (!AI.hasOneUse() && CastElTyAlign == AllocElTyAlign) return nullptr;
+
+ uint64_t AllocElTySize = DL.getTypeAllocSize(AllocElTy);
+ uint64_t CastElTySize = DL.getTypeAllocSize(CastElTy);
+ if (CastElTySize == 0 || AllocElTySize == 0) return nullptr;
+
+ // If the allocation has multiple uses, only promote it if we're not
+ // shrinking the amount of memory being allocated.
+ uint64_t AllocElTyStoreSize = DL.getTypeStoreSize(AllocElTy);
+ uint64_t CastElTyStoreSize = DL.getTypeStoreSize(CastElTy);
+ if (!AI.hasOneUse() && CastElTyStoreSize < AllocElTyStoreSize) return nullptr;
+
+ // See if we can satisfy the modulus by pulling a scale out of the array
+ // size argument.
+ unsigned ArraySizeScale;
+ uint64_t ArrayOffset;
+ Value *NumElements = // See if the array size is a decomposable linear expr.
+ decomposeSimpleLinearExpr(AI.getOperand(0), ArraySizeScale, ArrayOffset);
+
+ // If we can now satisfy the modulus, by using a non-1 scale, we really can
+ // do the xform.
+ if ((AllocElTySize*ArraySizeScale) % CastElTySize != 0 ||
+ (AllocElTySize*ArrayOffset ) % CastElTySize != 0) return nullptr;
+
+ unsigned Scale = (AllocElTySize*ArraySizeScale)/CastElTySize;
+ Value *Amt = nullptr;
+ if (Scale == 1) {
+ Amt = NumElements;
+ } else {
+ Amt = ConstantInt::get(AI.getArraySize()->getType(), Scale);
+ // Insert before the alloca, not before the cast.
+ Amt = AllocaBuilder.CreateMul(Amt, NumElements);
+ }
+
+ if (uint64_t Offset = (AllocElTySize*ArrayOffset)/CastElTySize) {
+ Value *Off = ConstantInt::get(AI.getArraySize()->getType(),
+ Offset, true);
+ Amt = AllocaBuilder.CreateAdd(Amt, Off);
+ }
+
+ AllocaInst *New = AllocaBuilder.CreateAlloca(CastElTy, Amt);
+ New->setAlignment(AI.getAlignment());
+ New->takeName(&AI);
+ New->setUsedWithInAlloca(AI.isUsedWithInAlloca());
+
+ // If the allocation has multiple real uses, insert a cast and change all
+ // things that used it to use the new cast. This will also hack on CI, but it
+ // will die soon.
+ if (!AI.hasOneUse()) {
+ // New is the allocation instruction, pointer typed. AI is the original
+ // allocation instruction, also pointer typed. Thus, cast to use is BitCast.
+ Value *NewCast = AllocaBuilder.CreateBitCast(New, AI.getType(), "tmpcast");
+ replaceInstUsesWith(AI, NewCast);
+ }
+ return replaceInstUsesWith(CI, New);
+}
+
+/// Given an expression that CanEvaluateTruncated or CanEvaluateSExtd returns
+/// true for, actually insert the code to evaluate the expression.
+Value *InstCombiner::EvaluateInDifferentType(Value *V, Type *Ty,
+ bool isSigned) {
+ if (Constant *C = dyn_cast<Constant>(V)) {
+ C = ConstantExpr::getIntegerCast(C, Ty, isSigned /*Sext or ZExt*/);
+ // If we got a constantexpr back, try to simplify it with DL info.
+ if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C))
+ C = ConstantFoldConstantExpression(CE, DL, TLI);
+ return C;
+ }
+
+ // Otherwise, it must be an instruction.
+ Instruction *I = cast<Instruction>(V);
+ Instruction *Res = nullptr;
+ unsigned Opc = I->getOpcode();
+ switch (Opc) {
+ case Instruction::Add:
+ case Instruction::Sub:
+ case Instruction::Mul:
+ case Instruction::And:
+ case Instruction::Or:
+ case Instruction::Xor:
+ case Instruction::AShr:
+ case Instruction::LShr:
+ case Instruction::Shl:
+ case Instruction::UDiv:
+ case Instruction::URem: {
+ Value *LHS = EvaluateInDifferentType(I->getOperand(0), Ty, isSigned);
+ Value *RHS = EvaluateInDifferentType(I->getOperand(1), Ty, isSigned);
+ Res = BinaryOperator::Create((Instruction::BinaryOps)Opc, LHS, RHS);
+ break;
+ }
+ case Instruction::Trunc:
+ case Instruction::ZExt:
+ case Instruction::SExt:
+ // If the source type of the cast is the type we're trying for then we can
+ // just return the source. There's no need to insert it because it is not
+ // new.
+ if (I->getOperand(0)->getType() == Ty)
+ return I->getOperand(0);
+
+ // Otherwise, must be the same type of cast, so just reinsert a new one.
+ // This also handles the case of zext(trunc(x)) -> zext(x).
+ Res = CastInst::CreateIntegerCast(I->getOperand(0), Ty,
+ Opc == Instruction::SExt);
+ break;
+ case Instruction::Select: {
+ Value *True = EvaluateInDifferentType(I->getOperand(1), Ty, isSigned);
+ Value *False = EvaluateInDifferentType(I->getOperand(2), Ty, isSigned);
+ Res = SelectInst::Create(I->getOperand(0), True, False);
+ break;
+ }
+ case Instruction::PHI: {
+ PHINode *OPN = cast<PHINode>(I);
+ PHINode *NPN = PHINode::Create(Ty, OPN->getNumIncomingValues());
+ for (unsigned i = 0, e = OPN->getNumIncomingValues(); i != e; ++i) {
+ Value *V =
+ EvaluateInDifferentType(OPN->getIncomingValue(i), Ty, isSigned);
+ NPN->addIncoming(V, OPN->getIncomingBlock(i));
+ }
+ Res = NPN;
+ break;
+ }
+ default:
+ // TODO: Can handle more cases here.
+ llvm_unreachable("Unreachable!");
+ }
+
+ Res->takeName(I);
+ return InsertNewInstWith(Res, *I);
+}
+
+
+/// This function is a wrapper around CastInst::isEliminableCastPair. It
+/// simply extracts arguments and returns what that function returns.
+static Instruction::CastOps
+isEliminableCastPair(const CastInst *CI, ///< First cast instruction
+ unsigned opcode, ///< Opcode for the second cast
+ Type *DstTy, ///< Target type for the second cast
+ const DataLayout &DL) {
+ Type *SrcTy = CI->getOperand(0)->getType(); // A from above
+ Type *MidTy = CI->getType(); // B from above
+
+ // Get the opcodes of the two Cast instructions
+ Instruction::CastOps firstOp = Instruction::CastOps(CI->getOpcode());
+ Instruction::CastOps secondOp = Instruction::CastOps(opcode);
+ Type *SrcIntPtrTy =
+ SrcTy->isPtrOrPtrVectorTy() ? DL.getIntPtrType(SrcTy) : nullptr;
+ Type *MidIntPtrTy =
+ MidTy->isPtrOrPtrVectorTy() ? DL.getIntPtrType(MidTy) : nullptr;
+ Type *DstIntPtrTy =
+ DstTy->isPtrOrPtrVectorTy() ? DL.getIntPtrType(DstTy) : nullptr;
+ unsigned Res = CastInst::isEliminableCastPair(firstOp, secondOp, SrcTy, MidTy,
+ DstTy, SrcIntPtrTy, MidIntPtrTy,
+ DstIntPtrTy);
+
+ // We don't want to form an inttoptr or ptrtoint that converts to an integer
+ // type that differs from the pointer size.
+ if ((Res == Instruction::IntToPtr && SrcTy != DstIntPtrTy) ||
+ (Res == Instruction::PtrToInt && DstTy != SrcIntPtrTy))
+ Res = 0;
+
+ return Instruction::CastOps(Res);
+}
+
+/// Return true if the cast from "V to Ty" actually results in any code being
+/// generated and is interesting to optimize out.
+/// If the cast can be eliminated by some other simple transformation, we prefer
+/// to do the simplification first.
+bool InstCombiner::ShouldOptimizeCast(Instruction::CastOps opc, const Value *V,
+ Type *Ty) {
+ // Noop casts and casts of constants should be eliminated trivially.
+ if (V->getType() == Ty || isa<Constant>(V)) return false;
+
+ // If this is another cast that can be eliminated, we prefer to have it
+ // eliminated.
+ if (const CastInst *CI = dyn_cast<CastInst>(V))
+ if (isEliminableCastPair(CI, opc, Ty, DL))
+ return false;
+
+ // If this is a vector sext from a compare, then we don't want to break the
+ // idiom where each element of the extended vector is either zero or all ones.
+ if (opc == Instruction::SExt && isa<CmpInst>(V) && Ty->isVectorTy())
+ return false;
+
+ return true;
+}
+
+
+/// @brief Implement the transforms common to all CastInst visitors.
+Instruction *InstCombiner::commonCastTransforms(CastInst &CI) {
+ Value *Src = CI.getOperand(0);
+
+ // Many cases of "cast of a cast" are eliminable. If it's eliminable we just
+ // eliminate it now.
+ if (CastInst *CSrc = dyn_cast<CastInst>(Src)) { // A->B->C cast
+ if (Instruction::CastOps opc =
+ isEliminableCastPair(CSrc, CI.getOpcode(), CI.getType(), DL)) {
+ // The first cast (CSrc) is eliminable so we need to fix up or replace
+ // the second cast (CI). CSrc will then have a good chance of being dead.
+ return CastInst::Create(opc, CSrc->getOperand(0), CI.getType());
+ }
+ }
+
+ // If we are casting a select then fold the cast into the select
+ if (SelectInst *SI = dyn_cast<SelectInst>(Src))
+ if (Instruction *NV = FoldOpIntoSelect(CI, SI))
+ return NV;
+
+ // If we are casting a PHI then fold the cast into the PHI
+ if (isa<PHINode>(Src)) {
+ // We don't do this if this would create a PHI node with an illegal type if
+ // it is currently legal.
+ if (!Src->getType()->isIntegerTy() || !CI.getType()->isIntegerTy() ||
+ ShouldChangeType(CI.getType(), Src->getType()))
+ if (Instruction *NV = FoldOpIntoPhi(CI))
+ return NV;
+ }
+
+ return nullptr;
+}
+
+/// Return true if we can evaluate the specified expression tree as type Ty
+/// instead of its larger type, and arrive with the same value.
+/// This is used by code that tries to eliminate truncates.
+///
+/// Ty will always be a type smaller than V. We should return true if trunc(V)
+/// can be computed by computing V in the smaller type. If V is an instruction,
+/// then trunc(inst(x,y)) can be computed as inst(trunc(x),trunc(y)), which only
+/// makes sense if x and y can be efficiently truncated.
+///
+/// This function works on both vectors and scalars.
+///
+static bool canEvaluateTruncated(Value *V, Type *Ty, InstCombiner &IC,
+ Instruction *CxtI) {
+ // We can always evaluate constants in another type.
+ if (isa<Constant>(V))
+ return true;
+
+ Instruction *I = dyn_cast<Instruction>(V);
+ if (!I) return false;
+
+ Type *OrigTy = V->getType();
+
+ // If this is an extension from the dest type, we can eliminate it, even if it
+ // has multiple uses.
+ if ((isa<ZExtInst>(I) || isa<SExtInst>(I)) &&
+ I->getOperand(0)->getType() == Ty)
+ return true;
+
+ // We can't extend or shrink something that has multiple uses: doing so would
+ // require duplicating the instruction in general, which isn't profitable.
+ if (!I->hasOneUse()) return false;
+
+ unsigned Opc = I->getOpcode();
+ switch (Opc) {
+ case Instruction::Add:
+ case Instruction::Sub:
+ case Instruction::Mul:
+ case Instruction::And:
+ case Instruction::Or:
+ case Instruction::Xor:
+ // These operators can all arbitrarily be extended or truncated.
+ return canEvaluateTruncated(I->getOperand(0), Ty, IC, CxtI) &&
+ canEvaluateTruncated(I->getOperand(1), Ty, IC, CxtI);
+
+ case Instruction::UDiv:
+ case Instruction::URem: {
+ // UDiv and URem can be truncated if all the truncated bits are zero.
+ uint32_t OrigBitWidth = OrigTy->getScalarSizeInBits();
+ uint32_t BitWidth = Ty->getScalarSizeInBits();
+ if (BitWidth < OrigBitWidth) {
+ APInt Mask = APInt::getHighBitsSet(OrigBitWidth, OrigBitWidth-BitWidth);
+ if (IC.MaskedValueIsZero(I->getOperand(0), Mask, 0, CxtI) &&
+ IC.MaskedValueIsZero(I->getOperand(1), Mask, 0, CxtI)) {
+ return canEvaluateTruncated(I->getOperand(0), Ty, IC, CxtI) &&
+ canEvaluateTruncated(I->getOperand(1), Ty, IC, CxtI);
+ }
+ }
+ break;
+ }
+ case Instruction::Shl:
+ // If we are truncating the result of this SHL, and if it's a shift of a
+ // constant amount, we can always perform a SHL in a smaller type.
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(I->getOperand(1))) {
+ uint32_t BitWidth = Ty->getScalarSizeInBits();
+ if (CI->getLimitedValue(BitWidth) < BitWidth)
+ return canEvaluateTruncated(I->getOperand(0), Ty, IC, CxtI);
+ }
+ break;
+ case Instruction::LShr:
+ // If this is a truncate of a logical shr, we can truncate it to a smaller
+ // lshr iff we know that the bits we would otherwise be shifting in are
+ // already zeros.
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(I->getOperand(1))) {
+ uint32_t OrigBitWidth = OrigTy->getScalarSizeInBits();
+ uint32_t BitWidth = Ty->getScalarSizeInBits();
+ if (IC.MaskedValueIsZero(I->getOperand(0),
+ APInt::getHighBitsSet(OrigBitWidth, OrigBitWidth-BitWidth), 0, CxtI) &&
+ CI->getLimitedValue(BitWidth) < BitWidth) {
+ return canEvaluateTruncated(I->getOperand(0), Ty, IC, CxtI);
+ }
+ }
+ break;
+ case Instruction::Trunc:
+ // trunc(trunc(x)) -> trunc(x)
+ return true;
+ case Instruction::ZExt:
+ case Instruction::SExt:
+ // trunc(ext(x)) -> ext(x) if the source type is smaller than the new dest
+ // trunc(ext(x)) -> trunc(x) if the source type is larger than the new dest
+ return true;
+ case Instruction::Select: {
+ SelectInst *SI = cast<SelectInst>(I);
+ return canEvaluateTruncated(SI->getTrueValue(), Ty, IC, CxtI) &&
+ canEvaluateTruncated(SI->getFalseValue(), Ty, IC, CxtI);
+ }
+ case Instruction::PHI: {
+ // We can change a phi if we can change all operands. Note that we never
+ // get into trouble with cyclic PHIs here because we only consider
+ // instructions with a single use.
+ PHINode *PN = cast<PHINode>(I);
+ for (Value *IncValue : PN->incoming_values())
+ if (!canEvaluateTruncated(IncValue, Ty, IC, CxtI))
+ return false;
+ return true;
+ }
+ default:
+ // TODO: Can handle more cases here.
+ break;
+ }
+
+ return false;
+}
+
+/// Given a vector that is bitcast to an integer, optionally logically
+/// right-shifted, and truncated, convert it to an extractelement.
+/// Example (big endian):
+/// trunc (lshr (bitcast <4 x i32> %X to i128), 32) to i32
+/// --->
+/// extractelement <4 x i32> %X, 1
+static Instruction *foldVecTruncToExtElt(TruncInst &Trunc, InstCombiner &IC,
+ const DataLayout &DL) {
+ Value *TruncOp = Trunc.getOperand(0);
+ Type *DestType = Trunc.getType();
+ if (!TruncOp->hasOneUse() || !isa<IntegerType>(DestType))
+ return nullptr;
+
+ Value *VecInput = nullptr;
+ ConstantInt *ShiftVal = nullptr;
+ if (!match(TruncOp, m_CombineOr(m_BitCast(m_Value(VecInput)),
+ m_LShr(m_BitCast(m_Value(VecInput)),
+ m_ConstantInt(ShiftVal)))) ||
+ !isa<VectorType>(VecInput->getType()))
+ return nullptr;
+
+ VectorType *VecType = cast<VectorType>(VecInput->getType());
+ unsigned VecWidth = VecType->getPrimitiveSizeInBits();
+ unsigned DestWidth = DestType->getPrimitiveSizeInBits();
+ unsigned ShiftAmount = ShiftVal ? ShiftVal->getZExtValue() : 0;
+
+ if ((VecWidth % DestWidth != 0) || (ShiftAmount % DestWidth != 0))
+ return nullptr;
+
+ // If the element type of the vector doesn't match the result type,
+ // bitcast it to a vector type that we can extract from.
+ unsigned NumVecElts = VecWidth / DestWidth;
+ if (VecType->getElementType() != DestType) {
+ VecType = VectorType::get(DestType, NumVecElts);
+ VecInput = IC.Builder->CreateBitCast(VecInput, VecType, "bc");
+ }
+
+ unsigned Elt = ShiftAmount / DestWidth;
+ if (DL.isBigEndian())
+ Elt = NumVecElts - 1 - Elt;
+
+ return ExtractElementInst::Create(VecInput, IC.Builder->getInt32(Elt));
+}
+
+Instruction *InstCombiner::visitTrunc(TruncInst &CI) {
+ if (Instruction *Result = commonCastTransforms(CI))
+ return Result;
+
+ // Test if the trunc is the user of a select which is part of a
+ // minimum or maximum operation. If so, don't do any more simplification.
+ // Even simplifying demanded bits can break the canonical form of a
+ // min/max.
+ Value *LHS, *RHS;
+ if (SelectInst *SI = dyn_cast<SelectInst>(CI.getOperand(0)))
+ if (matchSelectPattern(SI, LHS, RHS).Flavor != SPF_UNKNOWN)
+ return nullptr;
+
+ // See if we can simplify any instructions used by the input whose sole
+ // purpose is to compute bits we don't care about.
+ if (SimplifyDemandedInstructionBits(CI))
+ return &CI;
+
+ Value *Src = CI.getOperand(0);
+ Type *DestTy = CI.getType(), *SrcTy = Src->getType();
+
+ // Attempt to truncate the entire input expression tree to the destination
+ // type. Only do this if the dest type is a simple type, don't convert the
+ // expression tree to something weird like i93 unless the source is also
+ // strange.
+ if ((DestTy->isVectorTy() || ShouldChangeType(SrcTy, DestTy)) &&
+ canEvaluateTruncated(Src, DestTy, *this, &CI)) {
+
+ // If this cast is a truncate, evaluting in a different type always
+ // eliminates the cast, so it is always a win.
+ DEBUG(dbgs() << "ICE: EvaluateInDifferentType converting expression type"
+ " to avoid cast: " << CI << '\n');
+ Value *Res = EvaluateInDifferentType(Src, DestTy, false);
+ assert(Res->getType() == DestTy);
+ return replaceInstUsesWith(CI, Res);
+ }
+
+ // Canonicalize trunc x to i1 -> (icmp ne (and x, 1), 0), likewise for vector.
+ if (DestTy->getScalarSizeInBits() == 1) {
+ Constant *One = ConstantInt::get(SrcTy, 1);
+ Src = Builder->CreateAnd(Src, One);
+ Value *Zero = Constant::getNullValue(Src->getType());
+ return new ICmpInst(ICmpInst::ICMP_NE, Src, Zero);
+ }
+
+ // Transform trunc(lshr (zext A), Cst) to eliminate one type conversion.
+ Value *A = nullptr; ConstantInt *Cst = nullptr;
+ if (Src->hasOneUse() &&
+ match(Src, m_LShr(m_ZExt(m_Value(A)), m_ConstantInt(Cst)))) {
+ // We have three types to worry about here, the type of A, the source of
+ // the truncate (MidSize), and the destination of the truncate. We know that
+ // ASize < MidSize and MidSize > ResultSize, but don't know the relation
+ // between ASize and ResultSize.
+ unsigned ASize = A->getType()->getPrimitiveSizeInBits();
+
+ // If the shift amount is larger than the size of A, then the result is
+ // known to be zero because all the input bits got shifted out.
+ if (Cst->getZExtValue() >= ASize)
+ return replaceInstUsesWith(CI, Constant::getNullValue(DestTy));
+
+ // Since we're doing an lshr and a zero extend, and know that the shift
+ // amount is smaller than ASize, it is always safe to do the shift in A's
+ // type, then zero extend or truncate to the result.
+ Value *Shift = Builder->CreateLShr(A, Cst->getZExtValue());
+ Shift->takeName(Src);
+ return CastInst::CreateIntegerCast(Shift, DestTy, false);
+ }
+
+ // Transform trunc(lshr (sext A), Cst) to ashr A, Cst to eliminate type
+ // conversion.
+ // It works because bits coming from sign extension have the same value as
+ // the sign bit of the original value; performing ashr instead of lshr
+ // generates bits of the same value as the sign bit.
+ if (Src->hasOneUse() &&
+ match(Src, m_LShr(m_SExt(m_Value(A)), m_ConstantInt(Cst))) &&
+ cast<Instruction>(Src)->getOperand(0)->hasOneUse()) {
+ const unsigned ASize = A->getType()->getPrimitiveSizeInBits();
+ // This optimization can be only performed when zero bits generated by
+ // the original lshr aren't pulled into the value after truncation, so we
+ // can only shift by values smaller than the size of destination type (in
+ // bits).
+ if (Cst->getValue().ult(ASize)) {
+ Value *Shift = Builder->CreateAShr(A, Cst->getZExtValue());
+ Shift->takeName(Src);
+ return CastInst::CreateIntegerCast(Shift, CI.getType(), true);
+ }
+ }
+
+ // Transform "trunc (and X, cst)" -> "and (trunc X), cst" so long as the dest
+ // type isn't non-native.
+ if (Src->hasOneUse() && isa<IntegerType>(SrcTy) &&
+ ShouldChangeType(SrcTy, DestTy) &&
+ match(Src, m_And(m_Value(A), m_ConstantInt(Cst)))) {
+ Value *NewTrunc = Builder->CreateTrunc(A, DestTy, A->getName() + ".tr");
+ return BinaryOperator::CreateAnd(NewTrunc,
+ ConstantExpr::getTrunc(Cst, DestTy));
+ }
+
+ if (Instruction *I = foldVecTruncToExtElt(CI, *this, DL))
+ return I;
+
+ return nullptr;
+}
+
+/// Transform (zext icmp) to bitwise / integer operations in order to eliminate
+/// the icmp.
+Instruction *InstCombiner::transformZExtICmp(ICmpInst *ICI, Instruction &CI,
+ bool DoXform) {
+ // If we are just checking for a icmp eq of a single bit and zext'ing it
+ // to an integer, then shift the bit to the appropriate place and then
+ // cast to integer to avoid the comparison.
+ if (ConstantInt *Op1C = dyn_cast<ConstantInt>(ICI->getOperand(1))) {
+ const APInt &Op1CV = Op1C->getValue();
+
+ // zext (x <s 0) to i32 --> x>>u31 true if signbit set.
+ // zext (x >s -1) to i32 --> (x>>u31)^1 true if signbit clear.
+ if ((ICI->getPredicate() == ICmpInst::ICMP_SLT && Op1CV == 0) ||
+ (ICI->getPredicate() == ICmpInst::ICMP_SGT && Op1CV.isAllOnesValue())) {
+ if (!DoXform) return ICI;
+
+ Value *In = ICI->getOperand(0);
+ Value *Sh = ConstantInt::get(In->getType(),
+ In->getType()->getScalarSizeInBits() - 1);
+ In = Builder->CreateLShr(In, Sh, In->getName() + ".lobit");
+ if (In->getType() != CI.getType())
+ In = Builder->CreateIntCast(In, CI.getType(), false/*ZExt*/);
+
+ if (ICI->getPredicate() == ICmpInst::ICMP_SGT) {
+ Constant *One = ConstantInt::get(In->getType(), 1);
+ In = Builder->CreateXor(In, One, In->getName() + ".not");
+ }
+
+ return replaceInstUsesWith(CI, In);
+ }
+
+ // zext (X == 0) to i32 --> X^1 iff X has only the low bit set.
+ // zext (X == 0) to i32 --> (X>>1)^1 iff X has only the 2nd bit set.
+ // zext (X == 1) to i32 --> X iff X has only the low bit set.
+ // zext (X == 2) to i32 --> X>>1 iff X has only the 2nd bit set.
+ // zext (X != 0) to i32 --> X iff X has only the low bit set.
+ // zext (X != 0) to i32 --> X>>1 iff X has only the 2nd bit set.
+ // zext (X != 1) to i32 --> X^1 iff X has only the low bit set.
+ // zext (X != 2) to i32 --> (X>>1)^1 iff X has only the 2nd bit set.
+ if ((Op1CV == 0 || Op1CV.isPowerOf2()) &&
+ // This only works for EQ and NE
+ ICI->isEquality()) {
+ // If Op1C some other power of two, convert:
+ uint32_t BitWidth = Op1C->getType()->getBitWidth();
+ APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0);
+ computeKnownBits(ICI->getOperand(0), KnownZero, KnownOne, 0, &CI);
+
+ APInt KnownZeroMask(~KnownZero);
+ if (KnownZeroMask.isPowerOf2()) { // Exactly 1 possible 1?
+ if (!DoXform) return ICI;
+
+ bool isNE = ICI->getPredicate() == ICmpInst::ICMP_NE;
+ if (Op1CV != 0 && (Op1CV != KnownZeroMask)) {
+ // (X&4) == 2 --> false
+ // (X&4) != 2 --> true
+ Constant *Res = ConstantInt::get(Type::getInt1Ty(CI.getContext()),
+ isNE);
+ Res = ConstantExpr::getZExt(Res, CI.getType());
+ return replaceInstUsesWith(CI, Res);
+ }
+
+ uint32_t ShAmt = KnownZeroMask.logBase2();
+ Value *In = ICI->getOperand(0);
+ if (ShAmt) {
+ // Perform a logical shr by shiftamt.
+ // Insert the shift to put the result in the low bit.
+ In = Builder->CreateLShr(In, ConstantInt::get(In->getType(), ShAmt),
+ In->getName() + ".lobit");
+ }
+
+ if ((Op1CV != 0) == isNE) { // Toggle the low bit.
+ Constant *One = ConstantInt::get(In->getType(), 1);
+ In = Builder->CreateXor(In, One);
+ }
+
+ if (CI.getType() == In->getType())
+ return replaceInstUsesWith(CI, In);
+ return CastInst::CreateIntegerCast(In, CI.getType(), false/*ZExt*/);
+ }
+ }
+ }
+
+ // icmp ne A, B is equal to xor A, B when A and B only really have one bit.
+ // It is also profitable to transform icmp eq into not(xor(A, B)) because that
+ // may lead to additional simplifications.
+ if (ICI->isEquality() && CI.getType() == ICI->getOperand(0)->getType()) {
+ if (IntegerType *ITy = dyn_cast<IntegerType>(CI.getType())) {
+ uint32_t BitWidth = ITy->getBitWidth();
+ Value *LHS = ICI->getOperand(0);
+ Value *RHS = ICI->getOperand(1);
+
+ APInt KnownZeroLHS(BitWidth, 0), KnownOneLHS(BitWidth, 0);
+ APInt KnownZeroRHS(BitWidth, 0), KnownOneRHS(BitWidth, 0);
+ computeKnownBits(LHS, KnownZeroLHS, KnownOneLHS, 0, &CI);
+ computeKnownBits(RHS, KnownZeroRHS, KnownOneRHS, 0, &CI);
+
+ if (KnownZeroLHS == KnownZeroRHS && KnownOneLHS == KnownOneRHS) {
+ APInt KnownBits = KnownZeroLHS | KnownOneLHS;
+ APInt UnknownBit = ~KnownBits;
+ if (UnknownBit.countPopulation() == 1) {
+ if (!DoXform) return ICI;
+
+ Value *Result = Builder->CreateXor(LHS, RHS);
+
+ // Mask off any bits that are set and won't be shifted away.
+ if (KnownOneLHS.uge(UnknownBit))
+ Result = Builder->CreateAnd(Result,
+ ConstantInt::get(ITy, UnknownBit));
+
+ // Shift the bit we're testing down to the lsb.
+ Result = Builder->CreateLShr(
+ Result, ConstantInt::get(ITy, UnknownBit.countTrailingZeros()));
+
+ if (ICI->getPredicate() == ICmpInst::ICMP_EQ)
+ Result = Builder->CreateXor(Result, ConstantInt::get(ITy, 1));
+ Result->takeName(ICI);
+ return replaceInstUsesWith(CI, Result);
+ }
+ }
+ }
+ }
+
+ return nullptr;
+}
+
+/// Determine if the specified value can be computed in the specified wider type
+/// and produce the same low bits. If not, return false.
+///
+/// If this function returns true, it can also return a non-zero number of bits
+/// (in BitsToClear) which indicates that the value it computes is correct for
+/// the zero extend, but that the additional BitsToClear bits need to be zero'd
+/// out. For example, to promote something like:
+///
+/// %B = trunc i64 %A to i32
+/// %C = lshr i32 %B, 8
+/// %E = zext i32 %C to i64
+///
+/// CanEvaluateZExtd for the 'lshr' will return true, and BitsToClear will be
+/// set to 8 to indicate that the promoted value needs to have bits 24-31
+/// cleared in addition to bits 32-63. Since an 'and' will be generated to
+/// clear the top bits anyway, doing this has no extra cost.
+///
+/// This function works on both vectors and scalars.
+static bool canEvaluateZExtd(Value *V, Type *Ty, unsigned &BitsToClear,
+ InstCombiner &IC, Instruction *CxtI) {
+ BitsToClear = 0;
+ if (isa<Constant>(V))
+ return true;
+
+ Instruction *I = dyn_cast<Instruction>(V);
+ if (!I) return false;
+
+ // If the input is a truncate from the destination type, we can trivially
+ // eliminate it.
+ if (isa<TruncInst>(I) && I->getOperand(0)->getType() == Ty)
+ return true;
+
+ // We can't extend or shrink something that has multiple uses: doing so would
+ // require duplicating the instruction in general, which isn't profitable.
+ if (!I->hasOneUse()) return false;
+
+ unsigned Opc = I->getOpcode(), Tmp;
+ switch (Opc) {
+ case Instruction::ZExt: // zext(zext(x)) -> zext(x).
+ case Instruction::SExt: // zext(sext(x)) -> sext(x).
+ case Instruction::Trunc: // zext(trunc(x)) -> trunc(x) or zext(x)
+ return true;
+ case Instruction::And:
+ case Instruction::Or:
+ case Instruction::Xor:
+ case Instruction::Add:
+ case Instruction::Sub:
+ case Instruction::Mul:
+ if (!canEvaluateZExtd(I->getOperand(0), Ty, BitsToClear, IC, CxtI) ||
+ !canEvaluateZExtd(I->getOperand(1), Ty, Tmp, IC, CxtI))
+ return false;
+ // These can all be promoted if neither operand has 'bits to clear'.
+ if (BitsToClear == 0 && Tmp == 0)
+ return true;
+
+ // If the operation is an AND/OR/XOR and the bits to clear are zero in the
+ // other side, BitsToClear is ok.
+ if (Tmp == 0 &&
+ (Opc == Instruction::And || Opc == Instruction::Or ||
+ Opc == Instruction::Xor)) {
+ // We use MaskedValueIsZero here for generality, but the case we care
+ // about the most is constant RHS.
+ unsigned VSize = V->getType()->getScalarSizeInBits();
+ if (IC.MaskedValueIsZero(I->getOperand(1),
+ APInt::getHighBitsSet(VSize, BitsToClear),
+ 0, CxtI))
+ return true;
+ }
+
+ // Otherwise, we don't know how to analyze this BitsToClear case yet.
+ return false;
+
+ case Instruction::Shl:
+ // We can promote shl(x, cst) if we can promote x. Since shl overwrites the
+ // upper bits we can reduce BitsToClear by the shift amount.
+ if (ConstantInt *Amt = dyn_cast<ConstantInt>(I->getOperand(1))) {
+ if (!canEvaluateZExtd(I->getOperand(0), Ty, BitsToClear, IC, CxtI))
+ return false;
+ uint64_t ShiftAmt = Amt->getZExtValue();
+ BitsToClear = ShiftAmt < BitsToClear ? BitsToClear - ShiftAmt : 0;
+ return true;
+ }
+ return false;
+ case Instruction::LShr:
+ // We can promote lshr(x, cst) if we can promote x. This requires the
+ // ultimate 'and' to clear out the high zero bits we're clearing out though.
+ if (ConstantInt *Amt = dyn_cast<ConstantInt>(I->getOperand(1))) {
+ if (!canEvaluateZExtd(I->getOperand(0), Ty, BitsToClear, IC, CxtI))
+ return false;
+ BitsToClear += Amt->getZExtValue();
+ if (BitsToClear > V->getType()->getScalarSizeInBits())
+ BitsToClear = V->getType()->getScalarSizeInBits();
+ return true;
+ }
+ // Cannot promote variable LSHR.
+ return false;
+ case Instruction::Select:
+ if (!canEvaluateZExtd(I->getOperand(1), Ty, Tmp, IC, CxtI) ||
+ !canEvaluateZExtd(I->getOperand(2), Ty, BitsToClear, IC, CxtI) ||
+ // TODO: If important, we could handle the case when the BitsToClear are
+ // known zero in the disagreeing side.
+ Tmp != BitsToClear)
+ return false;
+ return true;
+
+ case Instruction::PHI: {
+ // We can change a phi if we can change all operands. Note that we never
+ // get into trouble with cyclic PHIs here because we only consider
+ // instructions with a single use.
+ PHINode *PN = cast<PHINode>(I);
+ if (!canEvaluateZExtd(PN->getIncomingValue(0), Ty, BitsToClear, IC, CxtI))
+ return false;
+ for (unsigned i = 1, e = PN->getNumIncomingValues(); i != e; ++i)
+ if (!canEvaluateZExtd(PN->getIncomingValue(i), Ty, Tmp, IC, CxtI) ||
+ // TODO: If important, we could handle the case when the BitsToClear
+ // are known zero in the disagreeing input.
+ Tmp != BitsToClear)
+ return false;
+ return true;
+ }
+ default:
+ // TODO: Can handle more cases here.
+ return false;
+ }
+}
+
+Instruction *InstCombiner::visitZExt(ZExtInst &CI) {
+ // If this zero extend is only used by a truncate, let the truncate be
+ // eliminated before we try to optimize this zext.
+ if (CI.hasOneUse() && isa<TruncInst>(CI.user_back()))
+ return nullptr;
+
+ // If one of the common conversion will work, do it.
+ if (Instruction *Result = commonCastTransforms(CI))
+ return Result;
+
+ // See if we can simplify any instructions used by the input whose sole
+ // purpose is to compute bits we don't care about.
+ if (SimplifyDemandedInstructionBits(CI))
+ return &CI;
+
+ Value *Src = CI.getOperand(0);
+ Type *SrcTy = Src->getType(), *DestTy = CI.getType();
+
+ // Attempt to extend the entire input expression tree to the destination
+ // type. Only do this if the dest type is a simple type, don't convert the
+ // expression tree to something weird like i93 unless the source is also
+ // strange.
+ unsigned BitsToClear;
+ if ((DestTy->isVectorTy() || ShouldChangeType(SrcTy, DestTy)) &&
+ canEvaluateZExtd(Src, DestTy, BitsToClear, *this, &CI)) {
+ assert(BitsToClear < SrcTy->getScalarSizeInBits() &&
+ "Unreasonable BitsToClear");
+
+ // Okay, we can transform this! Insert the new expression now.
+ DEBUG(dbgs() << "ICE: EvaluateInDifferentType converting expression type"
+ " to avoid zero extend: " << CI << '\n');
+ Value *Res = EvaluateInDifferentType(Src, DestTy, false);
+ assert(Res->getType() == DestTy);
+
+ uint32_t SrcBitsKept = SrcTy->getScalarSizeInBits()-BitsToClear;
+ uint32_t DestBitSize = DestTy->getScalarSizeInBits();
+
+ // If the high bits are already filled with zeros, just replace this
+ // cast with the result.
+ if (MaskedValueIsZero(Res,
+ APInt::getHighBitsSet(DestBitSize,
+ DestBitSize-SrcBitsKept),
+ 0, &CI))
+ return replaceInstUsesWith(CI, Res);
+
+ // We need to emit an AND to clear the high bits.
+ Constant *C = ConstantInt::get(Res->getType(),
+ APInt::getLowBitsSet(DestBitSize, SrcBitsKept));
+ return BinaryOperator::CreateAnd(Res, C);
+ }
+
+ // If this is a TRUNC followed by a ZEXT then we are dealing with integral
+ // types and if the sizes are just right we can convert this into a logical
+ // 'and' which will be much cheaper than the pair of casts.
+ if (TruncInst *CSrc = dyn_cast<TruncInst>(Src)) { // A->B->C cast
+ // TODO: Subsume this into EvaluateInDifferentType.
+
+ // Get the sizes of the types involved. We know that the intermediate type
+ // will be smaller than A or C, but don't know the relation between A and C.
+ Value *A = CSrc->getOperand(0);
+ unsigned SrcSize = A->getType()->getScalarSizeInBits();
+ unsigned MidSize = CSrc->getType()->getScalarSizeInBits();
+ unsigned DstSize = CI.getType()->getScalarSizeInBits();
+ // If we're actually extending zero bits, then if
+ // SrcSize < DstSize: zext(a & mask)
+ // SrcSize == DstSize: a & mask
+ // SrcSize > DstSize: trunc(a) & mask
+ if (SrcSize < DstSize) {
+ APInt AndValue(APInt::getLowBitsSet(SrcSize, MidSize));
+ Constant *AndConst = ConstantInt::get(A->getType(), AndValue);
+ Value *And = Builder->CreateAnd(A, AndConst, CSrc->getName()+".mask");
+ return new ZExtInst(And, CI.getType());
+ }
+
+ if (SrcSize == DstSize) {
+ APInt AndValue(APInt::getLowBitsSet(SrcSize, MidSize));
+ return BinaryOperator::CreateAnd(A, ConstantInt::get(A->getType(),
+ AndValue));
+ }
+ if (SrcSize > DstSize) {
+ Value *Trunc = Builder->CreateTrunc(A, CI.getType());
+ APInt AndValue(APInt::getLowBitsSet(DstSize, MidSize));
+ return BinaryOperator::CreateAnd(Trunc,
+ ConstantInt::get(Trunc->getType(),
+ AndValue));
+ }
+ }
+
+ if (ICmpInst *ICI = dyn_cast<ICmpInst>(Src))
+ return transformZExtICmp(ICI, CI);
+
+ BinaryOperator *SrcI = dyn_cast<BinaryOperator>(Src);
+ if (SrcI && SrcI->getOpcode() == Instruction::Or) {
+ // zext (or icmp, icmp) --> or (zext icmp), (zext icmp) if at least one
+ // of the (zext icmp) will be transformed.
+ ICmpInst *LHS = dyn_cast<ICmpInst>(SrcI->getOperand(0));
+ ICmpInst *RHS = dyn_cast<ICmpInst>(SrcI->getOperand(1));
+ if (LHS && RHS && LHS->hasOneUse() && RHS->hasOneUse() &&
+ (transformZExtICmp(LHS, CI, false) ||
+ transformZExtICmp(RHS, CI, false))) {
+ Value *LCast = Builder->CreateZExt(LHS, CI.getType(), LHS->getName());
+ Value *RCast = Builder->CreateZExt(RHS, CI.getType(), RHS->getName());
+ return BinaryOperator::Create(Instruction::Or, LCast, RCast);
+ }
+ }
+
+ // zext(trunc(X) & C) -> (X & zext(C)).
+ Constant *C;
+ Value *X;
+ if (SrcI &&
+ match(SrcI, m_OneUse(m_And(m_Trunc(m_Value(X)), m_Constant(C)))) &&
+ X->getType() == CI.getType())
+ return BinaryOperator::CreateAnd(X, ConstantExpr::getZExt(C, CI.getType()));
+
+ // zext((trunc(X) & C) ^ C) -> ((X & zext(C)) ^ zext(C)).
+ Value *And;
+ if (SrcI && match(SrcI, m_OneUse(m_Xor(m_Value(And), m_Constant(C)))) &&
+ match(And, m_OneUse(m_And(m_Trunc(m_Value(X)), m_Specific(C)))) &&
+ X->getType() == CI.getType()) {
+ Constant *ZC = ConstantExpr::getZExt(C, CI.getType());
+ return BinaryOperator::CreateXor(Builder->CreateAnd(X, ZC), ZC);
+ }
+
+ // zext (xor i1 X, true) to i32 --> xor (zext i1 X to i32), 1
+ if (SrcI && SrcI->hasOneUse() &&
+ SrcI->getType()->getScalarType()->isIntegerTy(1) &&
+ match(SrcI, m_Not(m_Value(X))) && (!X->hasOneUse() || !isa<CmpInst>(X))) {
+ Value *New = Builder->CreateZExt(X, CI.getType());
+ return BinaryOperator::CreateXor(New, ConstantInt::get(CI.getType(), 1));
+ }
+
+ return nullptr;
+}
+
+/// Transform (sext icmp) to bitwise / integer operations to eliminate the icmp.
+Instruction *InstCombiner::transformSExtICmp(ICmpInst *ICI, Instruction &CI) {
+ Value *Op0 = ICI->getOperand(0), *Op1 = ICI->getOperand(1);
+ ICmpInst::Predicate Pred = ICI->getPredicate();
+
+ // Don't bother if Op1 isn't of vector or integer type.
+ if (!Op1->getType()->isIntOrIntVectorTy())
+ return nullptr;
+
+ if (Constant *Op1C = dyn_cast<Constant>(Op1)) {
+ // (x <s 0) ? -1 : 0 -> ashr x, 31 -> all ones if negative
+ // (x >s -1) ? -1 : 0 -> not (ashr x, 31) -> all ones if positive
+ if ((Pred == ICmpInst::ICMP_SLT && Op1C->isNullValue()) ||
+ (Pred == ICmpInst::ICMP_SGT && Op1C->isAllOnesValue())) {
+
+ Value *Sh = ConstantInt::get(Op0->getType(),
+ Op0->getType()->getScalarSizeInBits()-1);
+ Value *In = Builder->CreateAShr(Op0, Sh, Op0->getName()+".lobit");
+ if (In->getType() != CI.getType())
+ In = Builder->CreateIntCast(In, CI.getType(), true/*SExt*/);
+
+ if (Pred == ICmpInst::ICMP_SGT)
+ In = Builder->CreateNot(In, In->getName()+".not");
+ return replaceInstUsesWith(CI, In);
+ }
+ }
+
+ if (ConstantInt *Op1C = dyn_cast<ConstantInt>(Op1)) {
+ // If we know that only one bit of the LHS of the icmp can be set and we
+ // have an equality comparison with zero or a power of 2, we can transform
+ // the icmp and sext into bitwise/integer operations.
+ if (ICI->hasOneUse() &&
+ ICI->isEquality() && (Op1C->isZero() || Op1C->getValue().isPowerOf2())){
+ unsigned BitWidth = Op1C->getType()->getBitWidth();
+ APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0);
+ computeKnownBits(Op0, KnownZero, KnownOne, 0, &CI);
+
+ APInt KnownZeroMask(~KnownZero);
+ if (KnownZeroMask.isPowerOf2()) {
+ Value *In = ICI->getOperand(0);
+
+ // If the icmp tests for a known zero bit we can constant fold it.
+ if (!Op1C->isZero() && Op1C->getValue() != KnownZeroMask) {
+ Value *V = Pred == ICmpInst::ICMP_NE ?
+ ConstantInt::getAllOnesValue(CI.getType()) :
+ ConstantInt::getNullValue(CI.getType());
+ return replaceInstUsesWith(CI, V);
+ }
+
+ if (!Op1C->isZero() == (Pred == ICmpInst::ICMP_NE)) {
+ // sext ((x & 2^n) == 0) -> (x >> n) - 1
+ // sext ((x & 2^n) != 2^n) -> (x >> n) - 1
+ unsigned ShiftAmt = KnownZeroMask.countTrailingZeros();
+ // Perform a right shift to place the desired bit in the LSB.
+ if (ShiftAmt)
+ In = Builder->CreateLShr(In,
+ ConstantInt::get(In->getType(), ShiftAmt));
+
+ // At this point "In" is either 1 or 0. Subtract 1 to turn
+ // {1, 0} -> {0, -1}.
+ In = Builder->CreateAdd(In,
+ ConstantInt::getAllOnesValue(In->getType()),
+ "sext");
+ } else {
+ // sext ((x & 2^n) != 0) -> (x << bitwidth-n) a>> bitwidth-1
+ // sext ((x & 2^n) == 2^n) -> (x << bitwidth-n) a>> bitwidth-1
+ unsigned ShiftAmt = KnownZeroMask.countLeadingZeros();
+ // Perform a left shift to place the desired bit in the MSB.
+ if (ShiftAmt)
+ In = Builder->CreateShl(In,
+ ConstantInt::get(In->getType(), ShiftAmt));
+
+ // Distribute the bit over the whole bit width.
+ In = Builder->CreateAShr(In, ConstantInt::get(In->getType(),
+ BitWidth - 1), "sext");
+ }
+
+ if (CI.getType() == In->getType())
+ return replaceInstUsesWith(CI, In);
+ return CastInst::CreateIntegerCast(In, CI.getType(), true/*SExt*/);
+ }
+ }
+ }
+
+ return nullptr;
+}
+
+/// Return true if we can take the specified value and return it as type Ty
+/// without inserting any new casts and without changing the value of the common
+/// low bits. This is used by code that tries to promote integer operations to
+/// a wider types will allow us to eliminate the extension.
+///
+/// This function works on both vectors and scalars.
+///
+static bool canEvaluateSExtd(Value *V, Type *Ty) {
+ assert(V->getType()->getScalarSizeInBits() < Ty->getScalarSizeInBits() &&
+ "Can't sign extend type to a smaller type");
+ // If this is a constant, it can be trivially promoted.
+ if (isa<Constant>(V))
+ return true;
+
+ Instruction *I = dyn_cast<Instruction>(V);
+ if (!I) return false;
+
+ // If this is a truncate from the dest type, we can trivially eliminate it.
+ if (isa<TruncInst>(I) && I->getOperand(0)->getType() == Ty)
+ return true;
+
+ // We can't extend or shrink something that has multiple uses: doing so would
+ // require duplicating the instruction in general, which isn't profitable.
+ if (!I->hasOneUse()) return false;
+
+ switch (I->getOpcode()) {
+ case Instruction::SExt: // sext(sext(x)) -> sext(x)
+ case Instruction::ZExt: // sext(zext(x)) -> zext(x)
+ case Instruction::Trunc: // sext(trunc(x)) -> trunc(x) or sext(x)
+ return true;
+ case Instruction::And:
+ case Instruction::Or:
+ case Instruction::Xor:
+ case Instruction::Add:
+ case Instruction::Sub:
+ case Instruction::Mul:
+ // These operators can all arbitrarily be extended if their inputs can.
+ return canEvaluateSExtd(I->getOperand(0), Ty) &&
+ canEvaluateSExtd(I->getOperand(1), Ty);
+
+ //case Instruction::Shl: TODO
+ //case Instruction::LShr: TODO
+
+ case Instruction::Select:
+ return canEvaluateSExtd(I->getOperand(1), Ty) &&
+ canEvaluateSExtd(I->getOperand(2), Ty);
+
+ case Instruction::PHI: {
+ // We can change a phi if we can change all operands. Note that we never
+ // get into trouble with cyclic PHIs here because we only consider
+ // instructions with a single use.
+ PHINode *PN = cast<PHINode>(I);
+ for (Value *IncValue : PN->incoming_values())
+ if (!canEvaluateSExtd(IncValue, Ty)) return false;
+ return true;
+ }
+ default:
+ // TODO: Can handle more cases here.
+ break;
+ }
+
+ return false;
+}
+
+Instruction *InstCombiner::visitSExt(SExtInst &CI) {
+ // If this sign extend is only used by a truncate, let the truncate be
+ // eliminated before we try to optimize this sext.
+ if (CI.hasOneUse() && isa<TruncInst>(CI.user_back()))
+ return nullptr;
+
+ if (Instruction *I = commonCastTransforms(CI))
+ return I;
+
+ // See if we can simplify any instructions used by the input whose sole
+ // purpose is to compute bits we don't care about.
+ if (SimplifyDemandedInstructionBits(CI))
+ return &CI;
+
+ Value *Src = CI.getOperand(0);
+ Type *SrcTy = Src->getType(), *DestTy = CI.getType();
+
+ // If we know that the value being extended is positive, we can use a zext
+ // instead.
+ bool KnownZero, KnownOne;
+ ComputeSignBit(Src, KnownZero, KnownOne, 0, &CI);
+ if (KnownZero) {
+ Value *ZExt = Builder->CreateZExt(Src, DestTy);
+ return replaceInstUsesWith(CI, ZExt);
+ }
+
+ // Attempt to extend the entire input expression tree to the destination
+ // type. Only do this if the dest type is a simple type, don't convert the
+ // expression tree to something weird like i93 unless the source is also
+ // strange.
+ if ((DestTy->isVectorTy() || ShouldChangeType(SrcTy, DestTy)) &&
+ canEvaluateSExtd(Src, DestTy)) {
+ // Okay, we can transform this! Insert the new expression now.
+ DEBUG(dbgs() << "ICE: EvaluateInDifferentType converting expression type"
+ " to avoid sign extend: " << CI << '\n');
+ Value *Res = EvaluateInDifferentType(Src, DestTy, true);
+ assert(Res->getType() == DestTy);
+
+ uint32_t SrcBitSize = SrcTy->getScalarSizeInBits();
+ uint32_t DestBitSize = DestTy->getScalarSizeInBits();
+
+ // If the high bits are already filled with sign bit, just replace this
+ // cast with the result.
+ if (ComputeNumSignBits(Res, 0, &CI) > DestBitSize - SrcBitSize)
+ return replaceInstUsesWith(CI, Res);
+
+ // We need to emit a shl + ashr to do the sign extend.
+ Value *ShAmt = ConstantInt::get(DestTy, DestBitSize-SrcBitSize);
+ return BinaryOperator::CreateAShr(Builder->CreateShl(Res, ShAmt, "sext"),
+ ShAmt);
+ }
+
+ // If this input is a trunc from our destination, then turn sext(trunc(x))
+ // into shifts.
+ if (TruncInst *TI = dyn_cast<TruncInst>(Src))
+ if (TI->hasOneUse() && TI->getOperand(0)->getType() == DestTy) {
+ uint32_t SrcBitSize = SrcTy->getScalarSizeInBits();
+ uint32_t DestBitSize = DestTy->getScalarSizeInBits();
+
+ // We need to emit a shl + ashr to do the sign extend.
+ Value *ShAmt = ConstantInt::get(DestTy, DestBitSize-SrcBitSize);
+ Value *Res = Builder->CreateShl(TI->getOperand(0), ShAmt, "sext");
+ return BinaryOperator::CreateAShr(Res, ShAmt);
+ }
+
+ if (ICmpInst *ICI = dyn_cast<ICmpInst>(Src))
+ return transformSExtICmp(ICI, CI);
+
+ // If the input is a shl/ashr pair of a same constant, then this is a sign
+ // extension from a smaller value. If we could trust arbitrary bitwidth
+ // integers, we could turn this into a truncate to the smaller bit and then
+ // use a sext for the whole extension. Since we don't, look deeper and check
+ // for a truncate. If the source and dest are the same type, eliminate the
+ // trunc and extend and just do shifts. For example, turn:
+ // %a = trunc i32 %i to i8
+ // %b = shl i8 %a, 6
+ // %c = ashr i8 %b, 6
+ // %d = sext i8 %c to i32
+ // into:
+ // %a = shl i32 %i, 30
+ // %d = ashr i32 %a, 30
+ Value *A = nullptr;
+ // TODO: Eventually this could be subsumed by EvaluateInDifferentType.
+ ConstantInt *BA = nullptr, *CA = nullptr;
+ if (match(Src, m_AShr(m_Shl(m_Trunc(m_Value(A)), m_ConstantInt(BA)),
+ m_ConstantInt(CA))) &&
+ BA == CA && A->getType() == CI.getType()) {
+ unsigned MidSize = Src->getType()->getScalarSizeInBits();
+ unsigned SrcDstSize = CI.getType()->getScalarSizeInBits();
+ unsigned ShAmt = CA->getZExtValue()+SrcDstSize-MidSize;
+ Constant *ShAmtV = ConstantInt::get(CI.getType(), ShAmt);
+ A = Builder->CreateShl(A, ShAmtV, CI.getName());
+ return BinaryOperator::CreateAShr(A, ShAmtV);
+ }
+
+ return nullptr;
+}
+
+
+/// Return a Constant* for the specified floating-point constant if it fits
+/// in the specified FP type without changing its value.
+static Constant *fitsInFPType(ConstantFP *CFP, const fltSemantics &Sem) {
+ bool losesInfo;
+ APFloat F = CFP->getValueAPF();
+ (void)F.convert(Sem, APFloat::rmNearestTiesToEven, &losesInfo);
+ if (!losesInfo)
+ return ConstantFP::get(CFP->getContext(), F);
+ return nullptr;
+}
+
+/// If this is a floating-point extension instruction, look
+/// through it until we get the source value.
+static Value *lookThroughFPExtensions(Value *V) {
+ if (Instruction *I = dyn_cast<Instruction>(V))
+ if (I->getOpcode() == Instruction::FPExt)
+ return lookThroughFPExtensions(I->getOperand(0));
+
+ // If this value is a constant, return the constant in the smallest FP type
+ // that can accurately represent it. This allows us to turn
+ // (float)((double)X+2.0) into x+2.0f.
+ if (ConstantFP *CFP = dyn_cast<ConstantFP>(V)) {
+ if (CFP->getType() == Type::getPPC_FP128Ty(V->getContext()))
+ return V; // No constant folding of this.
+ // See if the value can be truncated to half and then reextended.
+ if (Value *V = fitsInFPType(CFP, APFloat::IEEEhalf))
+ return V;
+ // See if the value can be truncated to float and then reextended.
+ if (Value *V = fitsInFPType(CFP, APFloat::IEEEsingle))
+ return V;
+ if (CFP->getType()->isDoubleTy())
+ return V; // Won't shrink.
+ if (Value *V = fitsInFPType(CFP, APFloat::IEEEdouble))
+ return V;
+ // Don't try to shrink to various long double types.
+ }
+
+ return V;
+}
+
+Instruction *InstCombiner::visitFPTrunc(FPTruncInst &CI) {
+ if (Instruction *I = commonCastTransforms(CI))
+ return I;
+ // If we have fptrunc(OpI (fpextend x), (fpextend y)), we would like to
+ // simplify this expression to avoid one or more of the trunc/extend
+ // operations if we can do so without changing the numerical results.
+ //
+ // The exact manner in which the widths of the operands interact to limit
+ // what we can and cannot do safely varies from operation to operation, and
+ // is explained below in the various case statements.
+ BinaryOperator *OpI = dyn_cast<BinaryOperator>(CI.getOperand(0));
+ if (OpI && OpI->hasOneUse()) {
+ Value *LHSOrig = lookThroughFPExtensions(OpI->getOperand(0));
+ Value *RHSOrig = lookThroughFPExtensions(OpI->getOperand(1));
+ unsigned OpWidth = OpI->getType()->getFPMantissaWidth();
+ unsigned LHSWidth = LHSOrig->getType()->getFPMantissaWidth();
+ unsigned RHSWidth = RHSOrig->getType()->getFPMantissaWidth();
+ unsigned SrcWidth = std::max(LHSWidth, RHSWidth);
+ unsigned DstWidth = CI.getType()->getFPMantissaWidth();
+ switch (OpI->getOpcode()) {
+ default: break;
+ case Instruction::FAdd:
+ case Instruction::FSub:
+ // For addition and subtraction, the infinitely precise result can
+ // essentially be arbitrarily wide; proving that double rounding
+ // will not occur because the result of OpI is exact (as we will for
+ // FMul, for example) is hopeless. However, we *can* nonetheless
+ // frequently know that double rounding cannot occur (or that it is
+ // innocuous) by taking advantage of the specific structure of
+ // infinitely-precise results that admit double rounding.
+ //
+ // Specifically, if OpWidth >= 2*DstWdith+1 and DstWidth is sufficient
+ // to represent both sources, we can guarantee that the double
+ // rounding is innocuous (See p50 of Figueroa's 2000 PhD thesis,
+ // "A Rigorous Framework for Fully Supporting the IEEE Standard ..."
+ // for proof of this fact).
+ //
+ // Note: Figueroa does not consider the case where DstFormat !=
+ // SrcFormat. It's possible (likely even!) that this analysis
+ // could be tightened for those cases, but they are rare (the main
+ // case of interest here is (float)((double)float + float)).
+ if (OpWidth >= 2*DstWidth+1 && DstWidth >= SrcWidth) {
+ if (LHSOrig->getType() != CI.getType())
+ LHSOrig = Builder->CreateFPExt(LHSOrig, CI.getType());
+ if (RHSOrig->getType() != CI.getType())
+ RHSOrig = Builder->CreateFPExt(RHSOrig, CI.getType());
+ Instruction *RI =
+ BinaryOperator::Create(OpI->getOpcode(), LHSOrig, RHSOrig);
+ RI->copyFastMathFlags(OpI);
+ return RI;
+ }
+ break;
+ case Instruction::FMul:
+ // For multiplication, the infinitely precise result has at most
+ // LHSWidth + RHSWidth significant bits; if OpWidth is sufficient
+ // that such a value can be exactly represented, then no double
+ // rounding can possibly occur; we can safely perform the operation
+ // in the destination format if it can represent both sources.
+ if (OpWidth >= LHSWidth + RHSWidth && DstWidth >= SrcWidth) {
+ if (LHSOrig->getType() != CI.getType())
+ LHSOrig = Builder->CreateFPExt(LHSOrig, CI.getType());
+ if (RHSOrig->getType() != CI.getType())
+ RHSOrig = Builder->CreateFPExt(RHSOrig, CI.getType());
+ Instruction *RI =
+ BinaryOperator::CreateFMul(LHSOrig, RHSOrig);
+ RI->copyFastMathFlags(OpI);
+ return RI;
+ }
+ break;
+ case Instruction::FDiv:
+ // For division, we use again use the bound from Figueroa's
+ // dissertation. I am entirely certain that this bound can be
+ // tightened in the unbalanced operand case by an analysis based on
+ // the diophantine rational approximation bound, but the well-known
+ // condition used here is a good conservative first pass.
+ // TODO: Tighten bound via rigorous analysis of the unbalanced case.
+ if (OpWidth >= 2*DstWidth && DstWidth >= SrcWidth) {
+ if (LHSOrig->getType() != CI.getType())
+ LHSOrig = Builder->CreateFPExt(LHSOrig, CI.getType());
+ if (RHSOrig->getType() != CI.getType())
+ RHSOrig = Builder->CreateFPExt(RHSOrig, CI.getType());
+ Instruction *RI =
+ BinaryOperator::CreateFDiv(LHSOrig, RHSOrig);
+ RI->copyFastMathFlags(OpI);
+ return RI;
+ }
+ break;
+ case Instruction::FRem:
+ // Remainder is straightforward. Remainder is always exact, so the
+ // type of OpI doesn't enter into things at all. We simply evaluate
+ // in whichever source type is larger, then convert to the
+ // destination type.
+ if (SrcWidth == OpWidth)
+ break;
+ if (LHSWidth < SrcWidth)
+ LHSOrig = Builder->CreateFPExt(LHSOrig, RHSOrig->getType());
+ else if (RHSWidth <= SrcWidth)
+ RHSOrig = Builder->CreateFPExt(RHSOrig, LHSOrig->getType());
+ if (LHSOrig != OpI->getOperand(0) || RHSOrig != OpI->getOperand(1)) {
+ Value *ExactResult = Builder->CreateFRem(LHSOrig, RHSOrig);
+ if (Instruction *RI = dyn_cast<Instruction>(ExactResult))
+ RI->copyFastMathFlags(OpI);
+ return CastInst::CreateFPCast(ExactResult, CI.getType());
+ }
+ }
+
+ // (fptrunc (fneg x)) -> (fneg (fptrunc x))
+ if (BinaryOperator::isFNeg(OpI)) {
+ Value *InnerTrunc = Builder->CreateFPTrunc(OpI->getOperand(1),
+ CI.getType());
+ Instruction *RI = BinaryOperator::CreateFNeg(InnerTrunc);
+ RI->copyFastMathFlags(OpI);
+ return RI;
+ }
+ }
+
+ // (fptrunc (select cond, R1, Cst)) -->
+ // (select cond, (fptrunc R1), (fptrunc Cst))
+ //
+ // - but only if this isn't part of a min/max operation, else we'll
+ // ruin min/max canonical form which is to have the select and
+ // compare's operands be of the same type with no casts to look through.
+ Value *LHS, *RHS;
+ SelectInst *SI = dyn_cast<SelectInst>(CI.getOperand(0));
+ if (SI &&
+ (isa<ConstantFP>(SI->getOperand(1)) ||
+ isa<ConstantFP>(SI->getOperand(2))) &&
+ matchSelectPattern(SI, LHS, RHS).Flavor == SPF_UNKNOWN) {
+ Value *LHSTrunc = Builder->CreateFPTrunc(SI->getOperand(1),
+ CI.getType());
+ Value *RHSTrunc = Builder->CreateFPTrunc(SI->getOperand(2),
+ CI.getType());
+ return SelectInst::Create(SI->getOperand(0), LHSTrunc, RHSTrunc);
+ }
+
+ IntrinsicInst *II = dyn_cast<IntrinsicInst>(CI.getOperand(0));
+ if (II) {
+ switch (II->getIntrinsicID()) {
+ default: break;
+ case Intrinsic::fabs: {
+ // (fptrunc (fabs x)) -> (fabs (fptrunc x))
+ Value *InnerTrunc = Builder->CreateFPTrunc(II->getArgOperand(0),
+ CI.getType());
+ Type *IntrinsicType[] = { CI.getType() };
+ Function *Overload = Intrinsic::getDeclaration(
+ CI.getModule(), II->getIntrinsicID(), IntrinsicType);
+
+ SmallVector<OperandBundleDef, 1> OpBundles;
+ II->getOperandBundlesAsDefs(OpBundles);
+
+ Value *Args[] = { InnerTrunc };
+ return CallInst::Create(Overload, Args, OpBundles, II->getName());
+ }
+ }
+ }
+
+ return nullptr;
+}
+
+Instruction *InstCombiner::visitFPExt(CastInst &CI) {
+ return commonCastTransforms(CI);
+}
+
+// fpto{s/u}i({u/s}itofp(X)) --> X or zext(X) or sext(X) or trunc(X)
+// This is safe if the intermediate type has enough bits in its mantissa to
+// accurately represent all values of X. For example, this won't work with
+// i64 -> float -> i64.
+Instruction *InstCombiner::FoldItoFPtoI(Instruction &FI) {
+ if (!isa<UIToFPInst>(FI.getOperand(0)) && !isa<SIToFPInst>(FI.getOperand(0)))
+ return nullptr;
+ Instruction *OpI = cast<Instruction>(FI.getOperand(0));
+
+ Value *SrcI = OpI->getOperand(0);
+ Type *FITy = FI.getType();
+ Type *OpITy = OpI->getType();
+ Type *SrcTy = SrcI->getType();
+ bool IsInputSigned = isa<SIToFPInst>(OpI);
+ bool IsOutputSigned = isa<FPToSIInst>(FI);
+
+ // We can safely assume the conversion won't overflow the output range,
+ // because (for example) (uint8_t)18293.f is undefined behavior.
+
+ // Since we can assume the conversion won't overflow, our decision as to
+ // whether the input will fit in the float should depend on the minimum
+ // of the input range and output range.
+
+ // This means this is also safe for a signed input and unsigned output, since
+ // a negative input would lead to undefined behavior.
+ int InputSize = (int)SrcTy->getScalarSizeInBits() - IsInputSigned;
+ int OutputSize = (int)FITy->getScalarSizeInBits() - IsOutputSigned;
+ int ActualSize = std::min(InputSize, OutputSize);
+
+ if (ActualSize <= OpITy->getFPMantissaWidth()) {
+ if (FITy->getScalarSizeInBits() > SrcTy->getScalarSizeInBits()) {
+ if (IsInputSigned && IsOutputSigned)
+ return new SExtInst(SrcI, FITy);
+ return new ZExtInst(SrcI, FITy);
+ }
+ if (FITy->getScalarSizeInBits() < SrcTy->getScalarSizeInBits())
+ return new TruncInst(SrcI, FITy);
+ if (SrcTy == FITy)
+ return replaceInstUsesWith(FI, SrcI);
+ return new BitCastInst(SrcI, FITy);
+ }
+ return nullptr;
+}
+
+Instruction *InstCombiner::visitFPToUI(FPToUIInst &FI) {
+ Instruction *OpI = dyn_cast<Instruction>(FI.getOperand(0));
+ if (!OpI)
+ return commonCastTransforms(FI);
+
+ if (Instruction *I = FoldItoFPtoI(FI))
+ return I;
+
+ return commonCastTransforms(FI);
+}
+
+Instruction *InstCombiner::visitFPToSI(FPToSIInst &FI) {
+ Instruction *OpI = dyn_cast<Instruction>(FI.getOperand(0));
+ if (!OpI)
+ return commonCastTransforms(FI);
+
+ if (Instruction *I = FoldItoFPtoI(FI))
+ return I;
+
+ return commonCastTransforms(FI);
+}
+
+Instruction *InstCombiner::visitUIToFP(CastInst &CI) {
+ return commonCastTransforms(CI);
+}
+
+Instruction *InstCombiner::visitSIToFP(CastInst &CI) {
+ return commonCastTransforms(CI);
+}
+
+Instruction *InstCombiner::visitIntToPtr(IntToPtrInst &CI) {
+ // If the source integer type is not the intptr_t type for this target, do a
+ // trunc or zext to the intptr_t type, then inttoptr of it. This allows the
+ // cast to be exposed to other transforms.
+ unsigned AS = CI.getAddressSpace();
+ if (CI.getOperand(0)->getType()->getScalarSizeInBits() !=
+ DL.getPointerSizeInBits(AS)) {
+ Type *Ty = DL.getIntPtrType(CI.getContext(), AS);
+ if (CI.getType()->isVectorTy()) // Handle vectors of pointers.
+ Ty = VectorType::get(Ty, CI.getType()->getVectorNumElements());
+
+ Value *P = Builder->CreateZExtOrTrunc(CI.getOperand(0), Ty);
+ return new IntToPtrInst(P, CI.getType());
+ }
+
+ if (Instruction *I = commonCastTransforms(CI))
+ return I;
+
+ return nullptr;
+}
+
+/// @brief Implement the transforms for cast of pointer (bitcast/ptrtoint)
+Instruction *InstCombiner::commonPointerCastTransforms(CastInst &CI) {
+ Value *Src = CI.getOperand(0);
+
+ if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Src)) {
+ // If casting the result of a getelementptr instruction with no offset, turn
+ // this into a cast of the original pointer!
+ if (GEP->hasAllZeroIndices() &&
+ // If CI is an addrspacecast and GEP changes the poiner type, merging
+ // GEP into CI would undo canonicalizing addrspacecast with different
+ // pointer types, causing infinite loops.
+ (!isa<AddrSpaceCastInst>(CI) ||
+ GEP->getType() == GEP->getPointerOperand()->getType())) {
+ // Changing the cast operand is usually not a good idea but it is safe
+ // here because the pointer operand is being replaced with another
+ // pointer operand so the opcode doesn't need to change.
+ Worklist.Add(GEP);
+ CI.setOperand(0, GEP->getOperand(0));
+ return &CI;
+ }
+ }
+
+ return commonCastTransforms(CI);
+}
+
+Instruction *InstCombiner::visitPtrToInt(PtrToIntInst &CI) {
+ // If the destination integer type is not the intptr_t type for this target,
+ // do a ptrtoint to intptr_t then do a trunc or zext. This allows the cast
+ // to be exposed to other transforms.
+
+ Type *Ty = CI.getType();
+ unsigned AS = CI.getPointerAddressSpace();
+
+ if (Ty->getScalarSizeInBits() == DL.getPointerSizeInBits(AS))
+ return commonPointerCastTransforms(CI);
+
+ Type *PtrTy = DL.getIntPtrType(CI.getContext(), AS);
+ if (Ty->isVectorTy()) // Handle vectors of pointers.
+ PtrTy = VectorType::get(PtrTy, Ty->getVectorNumElements());
+
+ Value *P = Builder->CreatePtrToInt(CI.getOperand(0), PtrTy);
+ return CastInst::CreateIntegerCast(P, Ty, /*isSigned=*/false);
+}
+
+/// This input value (which is known to have vector type) is being zero extended
+/// or truncated to the specified vector type.
+/// Try to replace it with a shuffle (and vector/vector bitcast) if possible.
+///
+/// The source and destination vector types may have different element types.
+static Instruction *optimizeVectorResize(Value *InVal, VectorType *DestTy,
+ InstCombiner &IC) {
+ // We can only do this optimization if the output is a multiple of the input
+ // element size, or the input is a multiple of the output element size.
+ // Convert the input type to have the same element type as the output.
+ VectorType *SrcTy = cast<VectorType>(InVal->getType());
+
+ if (SrcTy->getElementType() != DestTy->getElementType()) {
+ // The input types don't need to be identical, but for now they must be the
+ // same size. There is no specific reason we couldn't handle things like
+ // <4 x i16> -> <4 x i32> by bitcasting to <2 x i32> but haven't gotten
+ // there yet.
+ if (SrcTy->getElementType()->getPrimitiveSizeInBits() !=
+ DestTy->getElementType()->getPrimitiveSizeInBits())
+ return nullptr;
+
+ SrcTy = VectorType::get(DestTy->getElementType(), SrcTy->getNumElements());
+ InVal = IC.Builder->CreateBitCast(InVal, SrcTy);
+ }
+
+ // Now that the element types match, get the shuffle mask and RHS of the
+ // shuffle to use, which depends on whether we're increasing or decreasing the
+ // size of the input.
+ SmallVector<uint32_t, 16> ShuffleMask;
+ Value *V2;
+
+ if (SrcTy->getNumElements() > DestTy->getNumElements()) {
+ // If we're shrinking the number of elements, just shuffle in the low
+ // elements from the input and use undef as the second shuffle input.
+ V2 = UndefValue::get(SrcTy);
+ for (unsigned i = 0, e = DestTy->getNumElements(); i != e; ++i)
+ ShuffleMask.push_back(i);
+
+ } else {
+ // If we're increasing the number of elements, shuffle in all of the
+ // elements from InVal and fill the rest of the result elements with zeros
+ // from a constant zero.
+ V2 = Constant::getNullValue(SrcTy);
+ unsigned SrcElts = SrcTy->getNumElements();
+ for (unsigned i = 0, e = SrcElts; i != e; ++i)
+ ShuffleMask.push_back(i);
+
+ // The excess elements reference the first element of the zero input.
+ for (unsigned i = 0, e = DestTy->getNumElements()-SrcElts; i != e; ++i)
+ ShuffleMask.push_back(SrcElts);
+ }
+
+ return new ShuffleVectorInst(InVal, V2,
+ ConstantDataVector::get(V2->getContext(),
+ ShuffleMask));
+}
+
+static bool isMultipleOfTypeSize(unsigned Value, Type *Ty) {
+ return Value % Ty->getPrimitiveSizeInBits() == 0;
+}
+
+static unsigned getTypeSizeIndex(unsigned Value, Type *Ty) {
+ return Value / Ty->getPrimitiveSizeInBits();
+}
+
+/// V is a value which is inserted into a vector of VecEltTy.
+/// Look through the value to see if we can decompose it into
+/// insertions into the vector. See the example in the comment for
+/// OptimizeIntegerToVectorInsertions for the pattern this handles.
+/// The type of V is always a non-zero multiple of VecEltTy's size.
+/// Shift is the number of bits between the lsb of V and the lsb of
+/// the vector.
+///
+/// This returns false if the pattern can't be matched or true if it can,
+/// filling in Elements with the elements found here.
+static bool collectInsertionElements(Value *V, unsigned Shift,
+ SmallVectorImpl<Value *> &Elements,
+ Type *VecEltTy, bool isBigEndian) {
+ assert(isMultipleOfTypeSize(Shift, VecEltTy) &&
+ "Shift should be a multiple of the element type size");
+
+ // Undef values never contribute useful bits to the result.
+ if (isa<UndefValue>(V)) return true;
+
+ // If we got down to a value of the right type, we win, try inserting into the
+ // right element.
+ if (V->getType() == VecEltTy) {
+ // Inserting null doesn't actually insert any elements.
+ if (Constant *C = dyn_cast<Constant>(V))
+ if (C->isNullValue())
+ return true;
+
+ unsigned ElementIndex = getTypeSizeIndex(Shift, VecEltTy);
+ if (isBigEndian)
+ ElementIndex = Elements.size() - ElementIndex - 1;
+
+ // Fail if multiple elements are inserted into this slot.
+ if (Elements[ElementIndex])
+ return false;
+
+ Elements[ElementIndex] = V;
+ return true;
+ }
+
+ if (Constant *C = dyn_cast<Constant>(V)) {
+ // Figure out the # elements this provides, and bitcast it or slice it up
+ // as required.
+ unsigned NumElts = getTypeSizeIndex(C->getType()->getPrimitiveSizeInBits(),
+ VecEltTy);
+ // If the constant is the size of a vector element, we just need to bitcast
+ // it to the right type so it gets properly inserted.
+ if (NumElts == 1)
+ return collectInsertionElements(ConstantExpr::getBitCast(C, VecEltTy),
+ Shift, Elements, VecEltTy, isBigEndian);
+
+ // Okay, this is a constant that covers multiple elements. Slice it up into
+ // pieces and insert each element-sized piece into the vector.
+ if (!isa<IntegerType>(C->getType()))
+ C = ConstantExpr::getBitCast(C, IntegerType::get(V->getContext(),
+ C->getType()->getPrimitiveSizeInBits()));
+ unsigned ElementSize = VecEltTy->getPrimitiveSizeInBits();
+ Type *ElementIntTy = IntegerType::get(C->getContext(), ElementSize);
+
+ for (unsigned i = 0; i != NumElts; ++i) {
+ unsigned ShiftI = Shift+i*ElementSize;
+ Constant *Piece = ConstantExpr::getLShr(C, ConstantInt::get(C->getType(),
+ ShiftI));
+ Piece = ConstantExpr::getTrunc(Piece, ElementIntTy);
+ if (!collectInsertionElements(Piece, ShiftI, Elements, VecEltTy,
+ isBigEndian))
+ return false;
+ }
+ return true;
+ }
+
+ if (!V->hasOneUse()) return false;
+
+ Instruction *I = dyn_cast<Instruction>(V);
+ if (!I) return false;
+ switch (I->getOpcode()) {
+ default: return false; // Unhandled case.
+ case Instruction::BitCast:
+ return collectInsertionElements(I->getOperand(0), Shift, Elements, VecEltTy,
+ isBigEndian);
+ case Instruction::ZExt:
+ if (!isMultipleOfTypeSize(
+ I->getOperand(0)->getType()->getPrimitiveSizeInBits(),
+ VecEltTy))
+ return false;
+ return collectInsertionElements(I->getOperand(0), Shift, Elements, VecEltTy,
+ isBigEndian);
+ case Instruction::Or:
+ return collectInsertionElements(I->getOperand(0), Shift, Elements, VecEltTy,
+ isBigEndian) &&
+ collectInsertionElements(I->getOperand(1), Shift, Elements, VecEltTy,
+ isBigEndian);
+ case Instruction::Shl: {
+ // Must be shifting by a constant that is a multiple of the element size.
+ ConstantInt *CI = dyn_cast<ConstantInt>(I->getOperand(1));
+ if (!CI) return false;
+ Shift += CI->getZExtValue();
+ if (!isMultipleOfTypeSize(Shift, VecEltTy)) return false;
+ return collectInsertionElements(I->getOperand(0), Shift, Elements, VecEltTy,
+ isBigEndian);
+ }
+
+ }
+}
+
+
+/// If the input is an 'or' instruction, we may be doing shifts and ors to
+/// assemble the elements of the vector manually.
+/// Try to rip the code out and replace it with insertelements. This is to
+/// optimize code like this:
+///
+/// %tmp37 = bitcast float %inc to i32
+/// %tmp38 = zext i32 %tmp37 to i64
+/// %tmp31 = bitcast float %inc5 to i32
+/// %tmp32 = zext i32 %tmp31 to i64
+/// %tmp33 = shl i64 %tmp32, 32
+/// %ins35 = or i64 %tmp33, %tmp38
+/// %tmp43 = bitcast i64 %ins35 to <2 x float>
+///
+/// Into two insertelements that do "buildvector{%inc, %inc5}".
+static Value *optimizeIntegerToVectorInsertions(BitCastInst &CI,
+ InstCombiner &IC) {
+ VectorType *DestVecTy = cast<VectorType>(CI.getType());
+ Value *IntInput = CI.getOperand(0);
+
+ SmallVector<Value*, 8> Elements(DestVecTy->getNumElements());
+ if (!collectInsertionElements(IntInput, 0, Elements,
+ DestVecTy->getElementType(),
+ IC.getDataLayout().isBigEndian()))
+ return nullptr;
+
+ // If we succeeded, we know that all of the element are specified by Elements
+ // or are zero if Elements has a null entry. Recast this as a set of
+ // insertions.
+ Value *Result = Constant::getNullValue(CI.getType());
+ for (unsigned i = 0, e = Elements.size(); i != e; ++i) {
+ if (!Elements[i]) continue; // Unset element.
+
+ Result = IC.Builder->CreateInsertElement(Result, Elements[i],
+ IC.Builder->getInt32(i));
+ }
+
+ return Result;
+}
+
+/// Canonicalize scalar bitcasts of extracted elements into a bitcast of the
+/// vector followed by extract element. The backend tends to handle bitcasts of
+/// vectors better than bitcasts of scalars because vector registers are
+/// usually not type-specific like scalar integer or scalar floating-point.
+static Instruction *canonicalizeBitCastExtElt(BitCastInst &BitCast,
+ InstCombiner &IC,
+ const DataLayout &DL) {
+ // TODO: Create and use a pattern matcher for ExtractElementInst.
+ auto *ExtElt = dyn_cast<ExtractElementInst>(BitCast.getOperand(0));
+ if (!ExtElt || !ExtElt->hasOneUse())
+ return nullptr;
+
+ // The bitcast must be to a vectorizable type, otherwise we can't make a new
+ // type to extract from.
+ Type *DestType = BitCast.getType();
+ if (!VectorType::isValidElementType(DestType))
+ return nullptr;
+
+ unsigned NumElts = ExtElt->getVectorOperandType()->getNumElements();
+ auto *NewVecType = VectorType::get(DestType, NumElts);
+ auto *NewBC = IC.Builder->CreateBitCast(ExtElt->getVectorOperand(),
+ NewVecType, "bc");
+ return ExtractElementInst::Create(NewBC, ExtElt->getIndexOperand());
+}
+
+Instruction *InstCombiner::visitBitCast(BitCastInst &CI) {
+ // If the operands are integer typed then apply the integer transforms,
+ // otherwise just apply the common ones.
+ Value *Src = CI.getOperand(0);
+ Type *SrcTy = Src->getType();
+ Type *DestTy = CI.getType();
+
+ // Get rid of casts from one type to the same type. These are useless and can
+ // be replaced by the operand.
+ if (DestTy == Src->getType())
+ return replaceInstUsesWith(CI, Src);
+
+ if (PointerType *DstPTy = dyn_cast<PointerType>(DestTy)) {
+ PointerType *SrcPTy = cast<PointerType>(SrcTy);
+ Type *DstElTy = DstPTy->getElementType();
+ Type *SrcElTy = SrcPTy->getElementType();
+
+ // If we are casting a alloca to a pointer to a type of the same
+ // size, rewrite the allocation instruction to allocate the "right" type.
+ // There is no need to modify malloc calls because it is their bitcast that
+ // needs to be cleaned up.
+ if (AllocaInst *AI = dyn_cast<AllocaInst>(Src))
+ if (Instruction *V = PromoteCastOfAllocation(CI, *AI))
+ return V;
+
+ // When the type pointed to is not sized the cast cannot be
+ // turned into a gep.
+ Type *PointeeType =
+ cast<PointerType>(Src->getType()->getScalarType())->getElementType();
+ if (!PointeeType->isSized())
+ return nullptr;
+
+ // If the source and destination are pointers, and this cast is equivalent
+ // to a getelementptr X, 0, 0, 0... turn it into the appropriate gep.
+ // This can enhance SROA and other transforms that want type-safe pointers.
+ unsigned NumZeros = 0;
+ while (SrcElTy != DstElTy &&
+ isa<CompositeType>(SrcElTy) && !SrcElTy->isPointerTy() &&
+ SrcElTy->getNumContainedTypes() /* not "{}" */) {
+ SrcElTy = cast<CompositeType>(SrcElTy)->getTypeAtIndex(0U);
+ ++NumZeros;
+ }
+
+ // If we found a path from the src to dest, create the getelementptr now.
+ if (SrcElTy == DstElTy) {
+ SmallVector<Value *, 8> Idxs(NumZeros + 1, Builder->getInt32(0));
+ return GetElementPtrInst::CreateInBounds(Src, Idxs);
+ }
+ }
+
+ if (VectorType *DestVTy = dyn_cast<VectorType>(DestTy)) {
+ if (DestVTy->getNumElements() == 1 && !SrcTy->isVectorTy()) {
+ Value *Elem = Builder->CreateBitCast(Src, DestVTy->getElementType());
+ return InsertElementInst::Create(UndefValue::get(DestTy), Elem,
+ Constant::getNullValue(Type::getInt32Ty(CI.getContext())));
+ // FIXME: Canonicalize bitcast(insertelement) -> insertelement(bitcast)
+ }
+
+ if (isa<IntegerType>(SrcTy)) {
+ // If this is a cast from an integer to vector, check to see if the input
+ // is a trunc or zext of a bitcast from vector. If so, we can replace all
+ // the casts with a shuffle and (potentially) a bitcast.
+ if (isa<TruncInst>(Src) || isa<ZExtInst>(Src)) {
+ CastInst *SrcCast = cast<CastInst>(Src);
+ if (BitCastInst *BCIn = dyn_cast<BitCastInst>(SrcCast->getOperand(0)))
+ if (isa<VectorType>(BCIn->getOperand(0)->getType()))
+ if (Instruction *I = optimizeVectorResize(BCIn->getOperand(0),
+ cast<VectorType>(DestTy), *this))
+ return I;
+ }
+
+ // If the input is an 'or' instruction, we may be doing shifts and ors to
+ // assemble the elements of the vector manually. Try to rip the code out
+ // and replace it with insertelements.
+ if (Value *V = optimizeIntegerToVectorInsertions(CI, *this))
+ return replaceInstUsesWith(CI, V);
+ }
+ }
+
+ if (VectorType *SrcVTy = dyn_cast<VectorType>(SrcTy)) {
+ if (SrcVTy->getNumElements() == 1) {
+ // If our destination is not a vector, then make this a straight
+ // scalar-scalar cast.
+ if (!DestTy->isVectorTy()) {
+ Value *Elem =
+ Builder->CreateExtractElement(Src,
+ Constant::getNullValue(Type::getInt32Ty(CI.getContext())));
+ return CastInst::Create(Instruction::BitCast, Elem, DestTy);
+ }
+
+ // Otherwise, see if our source is an insert. If so, then use the scalar
+ // component directly.
+ if (InsertElementInst *IEI =
+ dyn_cast<InsertElementInst>(CI.getOperand(0)))
+ return CastInst::Create(Instruction::BitCast, IEI->getOperand(1),
+ DestTy);
+ }
+ }
+
+ if (ShuffleVectorInst *SVI = dyn_cast<ShuffleVectorInst>(Src)) {
+ // Okay, we have (bitcast (shuffle ..)). Check to see if this is
+ // a bitcast to a vector with the same # elts.
+ if (SVI->hasOneUse() && DestTy->isVectorTy() &&
+ DestTy->getVectorNumElements() == SVI->getType()->getNumElements() &&
+ SVI->getType()->getNumElements() ==
+ SVI->getOperand(0)->getType()->getVectorNumElements()) {
+ BitCastInst *Tmp;
+ // If either of the operands is a cast from CI.getType(), then
+ // evaluating the shuffle in the casted destination's type will allow
+ // us to eliminate at least one cast.
+ if (((Tmp = dyn_cast<BitCastInst>(SVI->getOperand(0))) &&
+ Tmp->getOperand(0)->getType() == DestTy) ||
+ ((Tmp = dyn_cast<BitCastInst>(SVI->getOperand(1))) &&
+ Tmp->getOperand(0)->getType() == DestTy)) {
+ Value *LHS = Builder->CreateBitCast(SVI->getOperand(0), DestTy);
+ Value *RHS = Builder->CreateBitCast(SVI->getOperand(1), DestTy);
+ // Return a new shuffle vector. Use the same element ID's, as we
+ // know the vector types match #elts.
+ return new ShuffleVectorInst(LHS, RHS, SVI->getOperand(2));
+ }
+ }
+ }
+
+ if (Instruction *I = canonicalizeBitCastExtElt(CI, *this, DL))
+ return I;
+
+ if (SrcTy->isPointerTy())
+ return commonPointerCastTransforms(CI);
+ return commonCastTransforms(CI);
+}
+
+Instruction *InstCombiner::visitAddrSpaceCast(AddrSpaceCastInst &CI) {
+ // If the destination pointer element type is not the same as the source's
+ // first do a bitcast to the destination type, and then the addrspacecast.
+ // This allows the cast to be exposed to other transforms.
+ Value *Src = CI.getOperand(0);
+ PointerType *SrcTy = cast<PointerType>(Src->getType()->getScalarType());
+ PointerType *DestTy = cast<PointerType>(CI.getType()->getScalarType());
+
+ Type *DestElemTy = DestTy->getElementType();
+ if (SrcTy->getElementType() != DestElemTy) {
+ Type *MidTy = PointerType::get(DestElemTy, SrcTy->getAddressSpace());
+ if (VectorType *VT = dyn_cast<VectorType>(CI.getType())) {
+ // Handle vectors of pointers.
+ MidTy = VectorType::get(MidTy, VT->getNumElements());
+ }
+
+ Value *NewBitCast = Builder->CreateBitCast(Src, MidTy);
+ return new AddrSpaceCastInst(NewBitCast, CI.getType());
+ }
+
+ return commonPointerCastTransforms(CI);
+}
diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
new file mode 100644
index 000000000000..961497fe3c2d
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
@@ -0,0 +1,4715 @@
+//===- InstCombineCompares.cpp --------------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the visitICmp and visitFCmp functions.
+//
+//===----------------------------------------------------------------------===//
+
+#include "InstCombineInternal.h"
+#include "llvm/ADT/APSInt.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/ConstantFolding.h"
+#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Analysis/MemoryBuiltins.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
+#include "llvm/Analysis/VectorUtils.h"
+#include "llvm/IR/ConstantRange.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/GetElementPtrTypeIterator.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/PatternMatch.h"
+#include "llvm/Support/Debug.h"
+
+using namespace llvm;
+using namespace PatternMatch;
+
+#define DEBUG_TYPE "instcombine"
+
+// How many times is a select replaced by one of its operands?
+STATISTIC(NumSel, "Number of select opts");
+
+// Initialization Routines
+
+static ConstantInt *getOne(Constant *C) {
+ return ConstantInt::get(cast<IntegerType>(C->getType()), 1);
+}
+
+static ConstantInt *ExtractElement(Constant *V, Constant *Idx) {
+ return cast<ConstantInt>(ConstantExpr::getExtractElement(V, Idx));
+}
+
+static bool HasAddOverflow(ConstantInt *Result,
+ ConstantInt *In1, ConstantInt *In2,
+ bool IsSigned) {
+ if (!IsSigned)
+ return Result->getValue().ult(In1->getValue());
+
+ if (In2->isNegative())
+ return Result->getValue().sgt(In1->getValue());
+ return Result->getValue().slt(In1->getValue());
+}
+
+/// Compute Result = In1+In2, returning true if the result overflowed for this
+/// type.
+static bool AddWithOverflow(Constant *&Result, Constant *In1,
+ Constant *In2, bool IsSigned = false) {
+ Result = ConstantExpr::getAdd(In1, In2);
+
+ if (VectorType *VTy = dyn_cast<VectorType>(In1->getType())) {
+ for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i) {
+ Constant *Idx = ConstantInt::get(Type::getInt32Ty(In1->getContext()), i);
+ if (HasAddOverflow(ExtractElement(Result, Idx),
+ ExtractElement(In1, Idx),
+ ExtractElement(In2, Idx),
+ IsSigned))
+ return true;
+ }
+ return false;
+ }
+
+ return HasAddOverflow(cast<ConstantInt>(Result),
+ cast<ConstantInt>(In1), cast<ConstantInt>(In2),
+ IsSigned);
+}
+
+static bool HasSubOverflow(ConstantInt *Result,
+ ConstantInt *In1, ConstantInt *In2,
+ bool IsSigned) {
+ if (!IsSigned)
+ return Result->getValue().ugt(In1->getValue());
+
+ if (In2->isNegative())
+ return Result->getValue().slt(In1->getValue());
+
+ return Result->getValue().sgt(In1->getValue());
+}
+
+/// Compute Result = In1-In2, returning true if the result overflowed for this
+/// type.
+static bool SubWithOverflow(Constant *&Result, Constant *In1,
+ Constant *In2, bool IsSigned = false) {
+ Result = ConstantExpr::getSub(In1, In2);
+
+ if (VectorType *VTy = dyn_cast<VectorType>(In1->getType())) {
+ for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i) {
+ Constant *Idx = ConstantInt::get(Type::getInt32Ty(In1->getContext()), i);
+ if (HasSubOverflow(ExtractElement(Result, Idx),
+ ExtractElement(In1, Idx),
+ ExtractElement(In2, Idx),
+ IsSigned))
+ return true;
+ }
+ return false;
+ }
+
+ return HasSubOverflow(cast<ConstantInt>(Result),
+ cast<ConstantInt>(In1), cast<ConstantInt>(In2),
+ IsSigned);
+}
+
+/// Given an icmp instruction, return true if any use of this comparison is a
+/// branch on sign bit comparison.
+static bool isBranchOnSignBitCheck(ICmpInst &I, bool isSignBit) {
+ for (auto *U : I.users())
+ if (isa<BranchInst>(U))
+ return isSignBit;
+ return false;
+}
+
+/// Given an exploded icmp instruction, return true if the comparison only
+/// checks the sign bit. If it only checks the sign bit, set TrueIfSigned if the
+/// result of the comparison is true when the input value is signed.
+static bool isSignBitCheck(ICmpInst::Predicate Pred, ConstantInt *RHS,
+ bool &TrueIfSigned) {
+ switch (Pred) {
+ case ICmpInst::ICMP_SLT: // True if LHS s< 0
+ TrueIfSigned = true;
+ return RHS->isZero();
+ case ICmpInst::ICMP_SLE: // True if LHS s<= RHS and RHS == -1
+ TrueIfSigned = true;
+ return RHS->isAllOnesValue();
+ case ICmpInst::ICMP_SGT: // True if LHS s> -1
+ TrueIfSigned = false;
+ return RHS->isAllOnesValue();
+ case ICmpInst::ICMP_UGT:
+ // True if LHS u> RHS and RHS == high-bit-mask - 1
+ TrueIfSigned = true;
+ return RHS->isMaxValue(true);
+ case ICmpInst::ICMP_UGE:
+ // True if LHS u>= RHS and RHS == high-bit-mask (2^7, 2^15, 2^31, etc)
+ TrueIfSigned = true;
+ return RHS->getValue().isSignBit();
+ default:
+ return false;
+ }
+}
+
+/// Returns true if the exploded icmp can be expressed as a signed comparison
+/// to zero and updates the predicate accordingly.
+/// The signedness of the comparison is preserved.
+static bool isSignTest(ICmpInst::Predicate &Pred, const ConstantInt *RHS) {
+ if (!ICmpInst::isSigned(Pred))
+ return false;
+
+ if (RHS->isZero())
+ return ICmpInst::isRelational(Pred);
+
+ if (RHS->isOne()) {
+ if (Pred == ICmpInst::ICMP_SLT) {
+ Pred = ICmpInst::ICMP_SLE;
+ return true;
+ }
+ } else if (RHS->isAllOnesValue()) {
+ if (Pred == ICmpInst::ICMP_SGT) {
+ Pred = ICmpInst::ICMP_SGE;
+ return true;
+ }
+ }
+
+ return false;
+}
+
+/// Return true if the constant is of the form 1+0+. This is the same as
+/// lowones(~X).
+static bool isHighOnes(const ConstantInt *CI) {
+ return (~CI->getValue() + 1).isPowerOf2();
+}
+
+/// Given a signed integer type and a set of known zero and one bits, compute
+/// the maximum and minimum values that could have the specified known zero and
+/// known one bits, returning them in Min/Max.
+static void ComputeSignedMinMaxValuesFromKnownBits(const APInt &KnownZero,
+ const APInt &KnownOne,
+ APInt &Min, APInt &Max) {
+ assert(KnownZero.getBitWidth() == KnownOne.getBitWidth() &&
+ KnownZero.getBitWidth() == Min.getBitWidth() &&
+ KnownZero.getBitWidth() == Max.getBitWidth() &&
+ "KnownZero, KnownOne and Min, Max must have equal bitwidth.");
+ APInt UnknownBits = ~(KnownZero|KnownOne);
+
+ // The minimum value is when all unknown bits are zeros, EXCEPT for the sign
+ // bit if it is unknown.
+ Min = KnownOne;
+ Max = KnownOne|UnknownBits;
+
+ if (UnknownBits.isNegative()) { // Sign bit is unknown
+ Min.setBit(Min.getBitWidth()-1);
+ Max.clearBit(Max.getBitWidth()-1);
+ }
+}
+
+/// Given an unsigned integer type and a set of known zero and one bits, compute
+/// the maximum and minimum values that could have the specified known zero and
+/// known one bits, returning them in Min/Max.
+static void ComputeUnsignedMinMaxValuesFromKnownBits(const APInt &KnownZero,
+ const APInt &KnownOne,
+ APInt &Min, APInt &Max) {
+ assert(KnownZero.getBitWidth() == KnownOne.getBitWidth() &&
+ KnownZero.getBitWidth() == Min.getBitWidth() &&
+ KnownZero.getBitWidth() == Max.getBitWidth() &&
+ "Ty, KnownZero, KnownOne and Min, Max must have equal bitwidth.");
+ APInt UnknownBits = ~(KnownZero|KnownOne);
+
+ // The minimum value is when the unknown bits are all zeros.
+ Min = KnownOne;
+ // The maximum value is when the unknown bits are all ones.
+ Max = KnownOne|UnknownBits;
+}
+
+/// This is called when we see this pattern:
+/// cmp pred (load (gep GV, ...)), cmpcst
+/// where GV is a global variable with a constant initializer. Try to simplify
+/// this into some simple computation that does not need the load. For example
+/// we can optimize "icmp eq (load (gep "foo", 0, i)), 0" into "icmp eq i, 3".
+///
+/// If AndCst is non-null, then the loaded value is masked with that constant
+/// before doing the comparison. This handles cases like "A[i]&4 == 0".
+Instruction *InstCombiner::
+FoldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP, GlobalVariable *GV,
+ CmpInst &ICI, ConstantInt *AndCst) {
+ Constant *Init = GV->getInitializer();
+ if (!isa<ConstantArray>(Init) && !isa<ConstantDataArray>(Init))
+ return nullptr;
+
+ uint64_t ArrayElementCount = Init->getType()->getArrayNumElements();
+ if (ArrayElementCount > 1024) return nullptr; // Don't blow up on huge arrays.
+
+ // There are many forms of this optimization we can handle, for now, just do
+ // the simple index into a single-dimensional array.
+ //
+ // Require: GEP GV, 0, i {{, constant indices}}
+ if (GEP->getNumOperands() < 3 ||
+ !isa<ConstantInt>(GEP->getOperand(1)) ||
+ !cast<ConstantInt>(GEP->getOperand(1))->isZero() ||
+ isa<Constant>(GEP->getOperand(2)))
+ return nullptr;
+
+ // Check that indices after the variable are constants and in-range for the
+ // type they index. Collect the indices. This is typically for arrays of
+ // structs.
+ SmallVector<unsigned, 4> LaterIndices;
+
+ Type *EltTy = Init->getType()->getArrayElementType();
+ for (unsigned i = 3, e = GEP->getNumOperands(); i != e; ++i) {
+ ConstantInt *Idx = dyn_cast<ConstantInt>(GEP->getOperand(i));
+ if (!Idx) return nullptr; // Variable index.
+
+ uint64_t IdxVal = Idx->getZExtValue();
+ if ((unsigned)IdxVal != IdxVal) return nullptr; // Too large array index.
+
+ if (StructType *STy = dyn_cast<StructType>(EltTy))
+ EltTy = STy->getElementType(IdxVal);
+ else if (ArrayType *ATy = dyn_cast<ArrayType>(EltTy)) {
+ if (IdxVal >= ATy->getNumElements()) return nullptr;
+ EltTy = ATy->getElementType();
+ } else {
+ return nullptr; // Unknown type.
+ }
+
+ LaterIndices.push_back(IdxVal);
+ }
+
+ enum { Overdefined = -3, Undefined = -2 };
+
+ // Variables for our state machines.
+
+ // FirstTrueElement/SecondTrueElement - Used to emit a comparison of the form
+ // "i == 47 | i == 87", where 47 is the first index the condition is true for,
+ // and 87 is the second (and last) index. FirstTrueElement is -2 when
+ // undefined, otherwise set to the first true element. SecondTrueElement is
+ // -2 when undefined, -3 when overdefined and >= 0 when that index is true.
+ int FirstTrueElement = Undefined, SecondTrueElement = Undefined;
+
+ // FirstFalseElement/SecondFalseElement - Used to emit a comparison of the
+ // form "i != 47 & i != 87". Same state transitions as for true elements.
+ int FirstFalseElement = Undefined, SecondFalseElement = Undefined;
+
+ /// TrueRangeEnd/FalseRangeEnd - In conjunction with First*Element, these
+ /// define a state machine that triggers for ranges of values that the index
+ /// is true or false for. This triggers on things like "abbbbc"[i] == 'b'.
+ /// This is -2 when undefined, -3 when overdefined, and otherwise the last
+ /// index in the range (inclusive). We use -2 for undefined here because we
+ /// use relative comparisons and don't want 0-1 to match -1.
+ int TrueRangeEnd = Undefined, FalseRangeEnd = Undefined;
+
+ // MagicBitvector - This is a magic bitvector where we set a bit if the
+ // comparison is true for element 'i'. If there are 64 elements or less in
+ // the array, this will fully represent all the comparison results.
+ uint64_t MagicBitvector = 0;
+
+ // Scan the array and see if one of our patterns matches.
+ Constant *CompareRHS = cast<Constant>(ICI.getOperand(1));
+ for (unsigned i = 0, e = ArrayElementCount; i != e; ++i) {
+ Constant *Elt = Init->getAggregateElement(i);
+ if (!Elt) return nullptr;
+
+ // If this is indexing an array of structures, get the structure element.
+ if (!LaterIndices.empty())
+ Elt = ConstantExpr::getExtractValue(Elt, LaterIndices);
+
+ // If the element is masked, handle it.
+ if (AndCst) Elt = ConstantExpr::getAnd(Elt, AndCst);
+
+ // Find out if the comparison would be true or false for the i'th element.
+ Constant *C = ConstantFoldCompareInstOperands(ICI.getPredicate(), Elt,
+ CompareRHS, DL, TLI);
+ // If the result is undef for this element, ignore it.
+ if (isa<UndefValue>(C)) {
+ // Extend range state machines to cover this element in case there is an
+ // undef in the middle of the range.
+ if (TrueRangeEnd == (int)i-1)
+ TrueRangeEnd = i;
+ if (FalseRangeEnd == (int)i-1)
+ FalseRangeEnd = i;
+ continue;
+ }
+
+ // If we can't compute the result for any of the elements, we have to give
+ // up evaluating the entire conditional.
+ if (!isa<ConstantInt>(C)) return nullptr;
+
+ // Otherwise, we know if the comparison is true or false for this element,
+ // update our state machines.
+ bool IsTrueForElt = !cast<ConstantInt>(C)->isZero();
+
+ // State machine for single/double/range index comparison.
+ if (IsTrueForElt) {
+ // Update the TrueElement state machine.
+ if (FirstTrueElement == Undefined)
+ FirstTrueElement = TrueRangeEnd = i; // First true element.
+ else {
+ // Update double-compare state machine.
+ if (SecondTrueElement == Undefined)
+ SecondTrueElement = i;
+ else
+ SecondTrueElement = Overdefined;
+
+ // Update range state machine.
+ if (TrueRangeEnd == (int)i-1)
+ TrueRangeEnd = i;
+ else
+ TrueRangeEnd = Overdefined;
+ }
+ } else {
+ // Update the FalseElement state machine.
+ if (FirstFalseElement == Undefined)
+ FirstFalseElement = FalseRangeEnd = i; // First false element.
+ else {
+ // Update double-compare state machine.
+ if (SecondFalseElement == Undefined)
+ SecondFalseElement = i;
+ else
+ SecondFalseElement = Overdefined;
+
+ // Update range state machine.
+ if (FalseRangeEnd == (int)i-1)
+ FalseRangeEnd = i;
+ else
+ FalseRangeEnd = Overdefined;
+ }
+ }
+
+ // If this element is in range, update our magic bitvector.
+ if (i < 64 && IsTrueForElt)
+ MagicBitvector |= 1ULL << i;
+
+ // If all of our states become overdefined, bail out early. Since the
+ // predicate is expensive, only check it every 8 elements. This is only
+ // really useful for really huge arrays.
+ if ((i & 8) == 0 && i >= 64 && SecondTrueElement == Overdefined &&
+ SecondFalseElement == Overdefined && TrueRangeEnd == Overdefined &&
+ FalseRangeEnd == Overdefined)
+ return nullptr;
+ }
+
+ // Now that we've scanned the entire array, emit our new comparison(s). We
+ // order the state machines in complexity of the generated code.
+ Value *Idx = GEP->getOperand(2);
+
+ // If the index is larger than the pointer size of the target, truncate the
+ // index down like the GEP would do implicitly. We don't have to do this for
+ // an inbounds GEP because the index can't be out of range.
+ if (!GEP->isInBounds()) {
+ Type *IntPtrTy = DL.getIntPtrType(GEP->getType());
+ unsigned PtrSize = IntPtrTy->getIntegerBitWidth();
+ if (Idx->getType()->getPrimitiveSizeInBits() > PtrSize)
+ Idx = Builder->CreateTrunc(Idx, IntPtrTy);
+ }
+
+ // If the comparison is only true for one or two elements, emit direct
+ // comparisons.
+ if (SecondTrueElement != Overdefined) {
+ // None true -> false.
+ if (FirstTrueElement == Undefined)
+ return replaceInstUsesWith(ICI, Builder->getFalse());
+
+ Value *FirstTrueIdx = ConstantInt::get(Idx->getType(), FirstTrueElement);
+
+ // True for one element -> 'i == 47'.
+ if (SecondTrueElement == Undefined)
+ return new ICmpInst(ICmpInst::ICMP_EQ, Idx, FirstTrueIdx);
+
+ // True for two elements -> 'i == 47 | i == 72'.
+ Value *C1 = Builder->CreateICmpEQ(Idx, FirstTrueIdx);
+ Value *SecondTrueIdx = ConstantInt::get(Idx->getType(), SecondTrueElement);
+ Value *C2 = Builder->CreateICmpEQ(Idx, SecondTrueIdx);
+ return BinaryOperator::CreateOr(C1, C2);
+ }
+
+ // If the comparison is only false for one or two elements, emit direct
+ // comparisons.
+ if (SecondFalseElement != Overdefined) {
+ // None false -> true.
+ if (FirstFalseElement == Undefined)
+ return replaceInstUsesWith(ICI, Builder->getTrue());
+
+ Value *FirstFalseIdx = ConstantInt::get(Idx->getType(), FirstFalseElement);
+
+ // False for one element -> 'i != 47'.
+ if (SecondFalseElement == Undefined)
+ return new ICmpInst(ICmpInst::ICMP_NE, Idx, FirstFalseIdx);
+
+ // False for two elements -> 'i != 47 & i != 72'.
+ Value *C1 = Builder->CreateICmpNE(Idx, FirstFalseIdx);
+ Value *SecondFalseIdx = ConstantInt::get(Idx->getType(),SecondFalseElement);
+ Value *C2 = Builder->CreateICmpNE(Idx, SecondFalseIdx);
+ return BinaryOperator::CreateAnd(C1, C2);
+ }
+
+ // If the comparison can be replaced with a range comparison for the elements
+ // where it is true, emit the range check.
+ if (TrueRangeEnd != Overdefined) {
+ assert(TrueRangeEnd != FirstTrueElement && "Should emit single compare");
+
+ // Generate (i-FirstTrue) <u (TrueRangeEnd-FirstTrue+1).
+ if (FirstTrueElement) {
+ Value *Offs = ConstantInt::get(Idx->getType(), -FirstTrueElement);
+ Idx = Builder->CreateAdd(Idx, Offs);
+ }
+
+ Value *End = ConstantInt::get(Idx->getType(),
+ TrueRangeEnd-FirstTrueElement+1);
+ return new ICmpInst(ICmpInst::ICMP_ULT, Idx, End);
+ }
+
+ // False range check.
+ if (FalseRangeEnd != Overdefined) {
+ assert(FalseRangeEnd != FirstFalseElement && "Should emit single compare");
+ // Generate (i-FirstFalse) >u (FalseRangeEnd-FirstFalse).
+ if (FirstFalseElement) {
+ Value *Offs = ConstantInt::get(Idx->getType(), -FirstFalseElement);
+ Idx = Builder->CreateAdd(Idx, Offs);
+ }
+
+ Value *End = ConstantInt::get(Idx->getType(),
+ FalseRangeEnd-FirstFalseElement);
+ return new ICmpInst(ICmpInst::ICMP_UGT, Idx, End);
+ }
+
+ // If a magic bitvector captures the entire comparison state
+ // of this load, replace it with computation that does:
+ // ((magic_cst >> i) & 1) != 0
+ {
+ Type *Ty = nullptr;
+
+ // Look for an appropriate type:
+ // - The type of Idx if the magic fits
+ // - The smallest fitting legal type if we have a DataLayout
+ // - Default to i32
+ if (ArrayElementCount <= Idx->getType()->getIntegerBitWidth())
+ Ty = Idx->getType();
+ else
+ Ty = DL.getSmallestLegalIntType(Init->getContext(), ArrayElementCount);
+
+ if (Ty) {
+ Value *V = Builder->CreateIntCast(Idx, Ty, false);
+ V = Builder->CreateLShr(ConstantInt::get(Ty, MagicBitvector), V);
+ V = Builder->CreateAnd(ConstantInt::get(Ty, 1), V);
+ return new ICmpInst(ICmpInst::ICMP_NE, V, ConstantInt::get(Ty, 0));
+ }
+ }
+
+ return nullptr;
+}
+
+/// Return a value that can be used to compare the *offset* implied by a GEP to
+/// zero. For example, if we have &A[i], we want to return 'i' for
+/// "icmp ne i, 0". Note that, in general, indices can be complex, and scales
+/// are involved. The above expression would also be legal to codegen as
+/// "icmp ne (i*4), 0" (assuming A is a pointer to i32).
+/// This latter form is less amenable to optimization though, and we are allowed
+/// to generate the first by knowing that pointer arithmetic doesn't overflow.
+///
+/// If we can't emit an optimized form for this expression, this returns null.
+///
+static Value *EvaluateGEPOffsetExpression(User *GEP, InstCombiner &IC,
+ const DataLayout &DL) {
+ gep_type_iterator GTI = gep_type_begin(GEP);
+
+ // Check to see if this gep only has a single variable index. If so, and if
+ // any constant indices are a multiple of its scale, then we can compute this
+ // in terms of the scale of the variable index. For example, if the GEP
+ // implies an offset of "12 + i*4", then we can codegen this as "3 + i",
+ // because the expression will cross zero at the same point.
+ unsigned i, e = GEP->getNumOperands();
+ int64_t Offset = 0;
+ for (i = 1; i != e; ++i, ++GTI) {
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(GEP->getOperand(i))) {
+ // Compute the aggregate offset of constant indices.
+ if (CI->isZero()) continue;
+
+ // Handle a struct index, which adds its field offset to the pointer.
+ if (StructType *STy = dyn_cast<StructType>(*GTI)) {
+ Offset += DL.getStructLayout(STy)->getElementOffset(CI->getZExtValue());
+ } else {
+ uint64_t Size = DL.getTypeAllocSize(GTI.getIndexedType());
+ Offset += Size*CI->getSExtValue();
+ }
+ } else {
+ // Found our variable index.
+ break;
+ }
+ }
+
+ // If there are no variable indices, we must have a constant offset, just
+ // evaluate it the general way.
+ if (i == e) return nullptr;
+
+ Value *VariableIdx = GEP->getOperand(i);
+ // Determine the scale factor of the variable element. For example, this is
+ // 4 if the variable index is into an array of i32.
+ uint64_t VariableScale = DL.getTypeAllocSize(GTI.getIndexedType());
+
+ // Verify that there are no other variable indices. If so, emit the hard way.
+ for (++i, ++GTI; i != e; ++i, ++GTI) {
+ ConstantInt *CI = dyn_cast<ConstantInt>(GEP->getOperand(i));
+ if (!CI) return nullptr;
+
+ // Compute the aggregate offset of constant indices.
+ if (CI->isZero()) continue;
+
+ // Handle a struct index, which adds its field offset to the pointer.
+ if (StructType *STy = dyn_cast<StructType>(*GTI)) {
+ Offset += DL.getStructLayout(STy)->getElementOffset(CI->getZExtValue());
+ } else {
+ uint64_t Size = DL.getTypeAllocSize(GTI.getIndexedType());
+ Offset += Size*CI->getSExtValue();
+ }
+ }
+
+ // Okay, we know we have a single variable index, which must be a
+ // pointer/array/vector index. If there is no offset, life is simple, return
+ // the index.
+ Type *IntPtrTy = DL.getIntPtrType(GEP->getOperand(0)->getType());
+ unsigned IntPtrWidth = IntPtrTy->getIntegerBitWidth();
+ if (Offset == 0) {
+ // Cast to intptrty in case a truncation occurs. If an extension is needed,
+ // we don't need to bother extending: the extension won't affect where the
+ // computation crosses zero.
+ if (VariableIdx->getType()->getPrimitiveSizeInBits() > IntPtrWidth) {
+ VariableIdx = IC.Builder->CreateTrunc(VariableIdx, IntPtrTy);
+ }
+ return VariableIdx;
+ }
+
+ // Otherwise, there is an index. The computation we will do will be modulo
+ // the pointer size, so get it.
+ uint64_t PtrSizeMask = ~0ULL >> (64-IntPtrWidth);
+
+ Offset &= PtrSizeMask;
+ VariableScale &= PtrSizeMask;
+
+ // To do this transformation, any constant index must be a multiple of the
+ // variable scale factor. For example, we can evaluate "12 + 4*i" as "3 + i",
+ // but we can't evaluate "10 + 3*i" in terms of i. Check that the offset is a
+ // multiple of the variable scale.
+ int64_t NewOffs = Offset / (int64_t)VariableScale;
+ if (Offset != NewOffs*(int64_t)VariableScale)
+ return nullptr;
+
+ // Okay, we can do this evaluation. Start by converting the index to intptr.
+ if (VariableIdx->getType() != IntPtrTy)
+ VariableIdx = IC.Builder->CreateIntCast(VariableIdx, IntPtrTy,
+ true /*Signed*/);
+ Constant *OffsetVal = ConstantInt::get(IntPtrTy, NewOffs);
+ return IC.Builder->CreateAdd(VariableIdx, OffsetVal, "offset");
+}
+
+/// Returns true if we can rewrite Start as a GEP with pointer Base
+/// and some integer offset. The nodes that need to be re-written
+/// for this transformation will be added to Explored.
+static bool canRewriteGEPAsOffset(Value *Start, Value *Base,
+ const DataLayout &DL,
+ SetVector<Value *> &Explored) {
+ SmallVector<Value *, 16> WorkList(1, Start);
+ Explored.insert(Base);
+
+ // The following traversal gives us an order which can be used
+ // when doing the final transformation. Since in the final
+ // transformation we create the PHI replacement instructions first,
+ // we don't have to get them in any particular order.
+ //
+ // However, for other instructions we will have to traverse the
+ // operands of an instruction first, which means that we have to
+ // do a post-order traversal.
+ while (!WorkList.empty()) {
+ SetVector<PHINode *> PHIs;
+
+ while (!WorkList.empty()) {
+ if (Explored.size() >= 100)
+ return false;
+
+ Value *V = WorkList.back();
+
+ if (Explored.count(V) != 0) {
+ WorkList.pop_back();
+ continue;
+ }
+
+ if (!isa<IntToPtrInst>(V) && !isa<PtrToIntInst>(V) &&
+ !isa<GetElementPtrInst>(V) && !isa<PHINode>(V))
+ // We've found some value that we can't explore which is different from
+ // the base. Therefore we can't do this transformation.
+ return false;
+
+ if (isa<IntToPtrInst>(V) || isa<PtrToIntInst>(V)) {
+ auto *CI = dyn_cast<CastInst>(V);
+ if (!CI->isNoopCast(DL))
+ return false;
+
+ if (Explored.count(CI->getOperand(0)) == 0)
+ WorkList.push_back(CI->getOperand(0));
+ }
+
+ if (auto *GEP = dyn_cast<GEPOperator>(V)) {
+ // We're limiting the GEP to having one index. This will preserve
+ // the original pointer type. We could handle more cases in the
+ // future.
+ if (GEP->getNumIndices() != 1 || !GEP->isInBounds() ||
+ GEP->getType() != Start->getType())
+ return false;
+
+ if (Explored.count(GEP->getOperand(0)) == 0)
+ WorkList.push_back(GEP->getOperand(0));
+ }
+
+ if (WorkList.back() == V) {
+ WorkList.pop_back();
+ // We've finished visiting this node, mark it as such.
+ Explored.insert(V);
+ }
+
+ if (auto *PN = dyn_cast<PHINode>(V)) {
+ // We cannot transform PHIs on unsplittable basic blocks.
+ if (isa<CatchSwitchInst>(PN->getParent()->getTerminator()))
+ return false;
+ Explored.insert(PN);
+ PHIs.insert(PN);
+ }
+ }
+
+ // Explore the PHI nodes further.
+ for (auto *PN : PHIs)
+ for (Value *Op : PN->incoming_values())
+ if (Explored.count(Op) == 0)
+ WorkList.push_back(Op);
+ }
+
+ // Make sure that we can do this. Since we can't insert GEPs in a basic
+ // block before a PHI node, we can't easily do this transformation if
+ // we have PHI node users of transformed instructions.
+ for (Value *Val : Explored) {
+ for (Value *Use : Val->uses()) {
+
+ auto *PHI = dyn_cast<PHINode>(Use);
+ auto *Inst = dyn_cast<Instruction>(Val);
+
+ if (Inst == Base || Inst == PHI || !Inst || !PHI ||
+ Explored.count(PHI) == 0)
+ continue;
+
+ if (PHI->getParent() == Inst->getParent())
+ return false;
+ }
+ }
+ return true;
+}
+
+// Sets the appropriate insert point on Builder where we can add
+// a replacement Instruction for V (if that is possible).
+static void setInsertionPoint(IRBuilder<> &Builder, Value *V,
+ bool Before = true) {
+ if (auto *PHI = dyn_cast<PHINode>(V)) {
+ Builder.SetInsertPoint(&*PHI->getParent()->getFirstInsertionPt());
+ return;
+ }
+ if (auto *I = dyn_cast<Instruction>(V)) {
+ if (!Before)
+ I = &*std::next(I->getIterator());
+ Builder.SetInsertPoint(I);
+ return;
+ }
+ if (auto *A = dyn_cast<Argument>(V)) {
+ // Set the insertion point in the entry block.
+ BasicBlock &Entry = A->getParent()->getEntryBlock();
+ Builder.SetInsertPoint(&*Entry.getFirstInsertionPt());
+ return;
+ }
+ // Otherwise, this is a constant and we don't need to set a new
+ // insertion point.
+ assert(isa<Constant>(V) && "Setting insertion point for unknown value!");
+}
+
+/// Returns a re-written value of Start as an indexed GEP using Base as a
+/// pointer.
+static Value *rewriteGEPAsOffset(Value *Start, Value *Base,
+ const DataLayout &DL,
+ SetVector<Value *> &Explored) {
+ // Perform all the substitutions. This is a bit tricky because we can
+ // have cycles in our use-def chains.
+ // 1. Create the PHI nodes without any incoming values.
+ // 2. Create all the other values.
+ // 3. Add the edges for the PHI nodes.
+ // 4. Emit GEPs to get the original pointers.
+ // 5. Remove the original instructions.
+ Type *IndexType = IntegerType::get(
+ Base->getContext(), DL.getPointerTypeSizeInBits(Start->getType()));
+
+ DenseMap<Value *, Value *> NewInsts;
+ NewInsts[Base] = ConstantInt::getNullValue(IndexType);
+
+ // Create the new PHI nodes, without adding any incoming values.
+ for (Value *Val : Explored) {
+ if (Val == Base)
+ continue;
+ // Create empty phi nodes. This avoids cyclic dependencies when creating
+ // the remaining instructions.
+ if (auto *PHI = dyn_cast<PHINode>(Val))
+ NewInsts[PHI] = PHINode::Create(IndexType, PHI->getNumIncomingValues(),
+ PHI->getName() + ".idx", PHI);
+ }
+ IRBuilder<> Builder(Base->getContext());
+
+ // Create all the other instructions.
+ for (Value *Val : Explored) {
+
+ if (NewInsts.find(Val) != NewInsts.end())
+ continue;
+
+ if (auto *CI = dyn_cast<CastInst>(Val)) {
+ NewInsts[CI] = NewInsts[CI->getOperand(0)];
+ continue;
+ }
+ if (auto *GEP = dyn_cast<GEPOperator>(Val)) {
+ Value *Index = NewInsts[GEP->getOperand(1)] ? NewInsts[GEP->getOperand(1)]
+ : GEP->getOperand(1);
+ setInsertionPoint(Builder, GEP);
+ // Indices might need to be sign extended. GEPs will magically do
+ // this, but we need to do it ourselves here.
+ if (Index->getType()->getScalarSizeInBits() !=
+ NewInsts[GEP->getOperand(0)]->getType()->getScalarSizeInBits()) {
+ Index = Builder.CreateSExtOrTrunc(
+ Index, NewInsts[GEP->getOperand(0)]->getType(),
+ GEP->getOperand(0)->getName() + ".sext");
+ }
+
+ auto *Op = NewInsts[GEP->getOperand(0)];
+ if (isa<ConstantInt>(Op) && dyn_cast<ConstantInt>(Op)->isZero())
+ NewInsts[GEP] = Index;
+ else
+ NewInsts[GEP] = Builder.CreateNSWAdd(
+ Op, Index, GEP->getOperand(0)->getName() + ".add");
+ continue;
+ }
+ if (isa<PHINode>(Val))
+ continue;
+
+ llvm_unreachable("Unexpected instruction type");
+ }
+
+ // Add the incoming values to the PHI nodes.
+ for (Value *Val : Explored) {
+ if (Val == Base)
+ continue;
+ // All the instructions have been created, we can now add edges to the
+ // phi nodes.
+ if (auto *PHI = dyn_cast<PHINode>(Val)) {
+ PHINode *NewPhi = static_cast<PHINode *>(NewInsts[PHI]);
+ for (unsigned I = 0, E = PHI->getNumIncomingValues(); I < E; ++I) {
+ Value *NewIncoming = PHI->getIncomingValue(I);
+
+ if (NewInsts.find(NewIncoming) != NewInsts.end())
+ NewIncoming = NewInsts[NewIncoming];
+
+ NewPhi->addIncoming(NewIncoming, PHI->getIncomingBlock(I));
+ }
+ }
+ }
+
+ for (Value *Val : Explored) {
+ if (Val == Base)
+ continue;
+
+ // Depending on the type, for external users we have to emit
+ // a GEP or a GEP + ptrtoint.
+ setInsertionPoint(Builder, Val, false);
+
+ // If required, create an inttoptr instruction for Base.
+ Value *NewBase = Base;
+ if (!Base->getType()->isPointerTy())
+ NewBase = Builder.CreateBitOrPointerCast(Base, Start->getType(),
+ Start->getName() + "to.ptr");
+
+ Value *GEP = Builder.CreateInBoundsGEP(
+ Start->getType()->getPointerElementType(), NewBase,
+ makeArrayRef(NewInsts[Val]), Val->getName() + ".ptr");
+
+ if (!Val->getType()->isPointerTy()) {
+ Value *Cast = Builder.CreatePointerCast(GEP, Val->getType(),
+ Val->getName() + ".conv");
+ GEP = Cast;
+ }
+ Val->replaceAllUsesWith(GEP);
+ }
+
+ return NewInsts[Start];
+}
+
+/// Looks through GEPs, IntToPtrInsts and PtrToIntInsts in order to express
+/// the input Value as a constant indexed GEP. Returns a pair containing
+/// the GEPs Pointer and Index.
+static std::pair<Value *, Value *>
+getAsConstantIndexedAddress(Value *V, const DataLayout &DL) {
+ Type *IndexType = IntegerType::get(V->getContext(),
+ DL.getPointerTypeSizeInBits(V->getType()));
+
+ Constant *Index = ConstantInt::getNullValue(IndexType);
+ while (true) {
+ if (GEPOperator *GEP = dyn_cast<GEPOperator>(V)) {
+ // We accept only inbouds GEPs here to exclude the possibility of
+ // overflow.
+ if (!GEP->isInBounds())
+ break;
+ if (GEP->hasAllConstantIndices() && GEP->getNumIndices() == 1 &&
+ GEP->getType() == V->getType()) {
+ V = GEP->getOperand(0);
+ Constant *GEPIndex = static_cast<Constant *>(GEP->getOperand(1));
+ Index = ConstantExpr::getAdd(
+ Index, ConstantExpr::getSExtOrBitCast(GEPIndex, IndexType));
+ continue;
+ }
+ break;
+ }
+ if (auto *CI = dyn_cast<IntToPtrInst>(V)) {
+ if (!CI->isNoopCast(DL))
+ break;
+ V = CI->getOperand(0);
+ continue;
+ }
+ if (auto *CI = dyn_cast<PtrToIntInst>(V)) {
+ if (!CI->isNoopCast(DL))
+ break;
+ V = CI->getOperand(0);
+ continue;
+ }
+ break;
+ }
+ return {V, Index};
+}
+
+/// Converts (CMP GEPLHS, RHS) if this change would make RHS a constant.
+/// We can look through PHIs, GEPs and casts in order to determine a common base
+/// between GEPLHS and RHS.
+static Instruction *transformToIndexedCompare(GEPOperator *GEPLHS, Value *RHS,
+ ICmpInst::Predicate Cond,
+ const DataLayout &DL) {
+ if (!GEPLHS->hasAllConstantIndices())
+ return nullptr;
+
+ Value *PtrBase, *Index;
+ std::tie(PtrBase, Index) = getAsConstantIndexedAddress(GEPLHS, DL);
+
+ // The set of nodes that will take part in this transformation.
+ SetVector<Value *> Nodes;
+
+ if (!canRewriteGEPAsOffset(RHS, PtrBase, DL, Nodes))
+ return nullptr;
+
+ // We know we can re-write this as
+ // ((gep Ptr, OFFSET1) cmp (gep Ptr, OFFSET2)
+ // Since we've only looked through inbouds GEPs we know that we
+ // can't have overflow on either side. We can therefore re-write
+ // this as:
+ // OFFSET1 cmp OFFSET2
+ Value *NewRHS = rewriteGEPAsOffset(RHS, PtrBase, DL, Nodes);
+
+ // RewriteGEPAsOffset has replaced RHS and all of its uses with a re-written
+ // GEP having PtrBase as the pointer base, and has returned in NewRHS the
+ // offset. Since Index is the offset of LHS to the base pointer, we will now
+ // compare the offsets instead of comparing the pointers.
+ return new ICmpInst(ICmpInst::getSignedPredicate(Cond), Index, NewRHS);
+}
+
+/// Fold comparisons between a GEP instruction and something else. At this point
+/// we know that the GEP is on the LHS of the comparison.
+Instruction *InstCombiner::FoldGEPICmp(GEPOperator *GEPLHS, Value *RHS,
+ ICmpInst::Predicate Cond,
+ Instruction &I) {
+ // Don't transform signed compares of GEPs into index compares. Even if the
+ // GEP is inbounds, the final add of the base pointer can have signed overflow
+ // and would change the result of the icmp.
+ // e.g. "&foo[0] <s &foo[1]" can't be folded to "true" because "foo" could be
+ // the maximum signed value for the pointer type.
+ if (ICmpInst::isSigned(Cond))
+ return nullptr;
+
+ // Look through bitcasts and addrspacecasts. We do not however want to remove
+ // 0 GEPs.
+ if (!isa<GetElementPtrInst>(RHS))
+ RHS = RHS->stripPointerCasts();
+
+ Value *PtrBase = GEPLHS->getOperand(0);
+ if (PtrBase == RHS && GEPLHS->isInBounds()) {
+ // ((gep Ptr, OFFSET) cmp Ptr) ---> (OFFSET cmp 0).
+ // This transformation (ignoring the base and scales) is valid because we
+ // know pointers can't overflow since the gep is inbounds. See if we can
+ // output an optimized form.
+ Value *Offset = EvaluateGEPOffsetExpression(GEPLHS, *this, DL);
+
+ // If not, synthesize the offset the hard way.
+ if (!Offset)
+ Offset = EmitGEPOffset(GEPLHS);
+ return new ICmpInst(ICmpInst::getSignedPredicate(Cond), Offset,
+ Constant::getNullValue(Offset->getType()));
+ } else if (GEPOperator *GEPRHS = dyn_cast<GEPOperator>(RHS)) {
+ // If the base pointers are different, but the indices are the same, just
+ // compare the base pointer.
+ if (PtrBase != GEPRHS->getOperand(0)) {
+ bool IndicesTheSame = GEPLHS->getNumOperands()==GEPRHS->getNumOperands();
+ IndicesTheSame &= GEPLHS->getOperand(0)->getType() ==
+ GEPRHS->getOperand(0)->getType();
+ if (IndicesTheSame)
+ for (unsigned i = 1, e = GEPLHS->getNumOperands(); i != e; ++i)
+ if (GEPLHS->getOperand(i) != GEPRHS->getOperand(i)) {
+ IndicesTheSame = false;
+ break;
+ }
+
+ // If all indices are the same, just compare the base pointers.
+ if (IndicesTheSame)
+ return new ICmpInst(Cond, GEPLHS->getOperand(0), GEPRHS->getOperand(0));
+
+ // If we're comparing GEPs with two base pointers that only differ in type
+ // and both GEPs have only constant indices or just one use, then fold
+ // the compare with the adjusted indices.
+ if (GEPLHS->isInBounds() && GEPRHS->isInBounds() &&
+ (GEPLHS->hasAllConstantIndices() || GEPLHS->hasOneUse()) &&
+ (GEPRHS->hasAllConstantIndices() || GEPRHS->hasOneUse()) &&
+ PtrBase->stripPointerCasts() ==
+ GEPRHS->getOperand(0)->stripPointerCasts()) {
+ Value *LOffset = EmitGEPOffset(GEPLHS);
+ Value *ROffset = EmitGEPOffset(GEPRHS);
+
+ // If we looked through an addrspacecast between different sized address
+ // spaces, the LHS and RHS pointers are different sized
+ // integers. Truncate to the smaller one.
+ Type *LHSIndexTy = LOffset->getType();
+ Type *RHSIndexTy = ROffset->getType();
+ if (LHSIndexTy != RHSIndexTy) {
+ if (LHSIndexTy->getPrimitiveSizeInBits() <
+ RHSIndexTy->getPrimitiveSizeInBits()) {
+ ROffset = Builder->CreateTrunc(ROffset, LHSIndexTy);
+ } else
+ LOffset = Builder->CreateTrunc(LOffset, RHSIndexTy);
+ }
+
+ Value *Cmp = Builder->CreateICmp(ICmpInst::getSignedPredicate(Cond),
+ LOffset, ROffset);
+ return replaceInstUsesWith(I, Cmp);
+ }
+
+ // Otherwise, the base pointers are different and the indices are
+ // different. Try convert this to an indexed compare by looking through
+ // PHIs/casts.
+ return transformToIndexedCompare(GEPLHS, RHS, Cond, DL);
+ }
+
+ // If one of the GEPs has all zero indices, recurse.
+ if (GEPLHS->hasAllZeroIndices())
+ return FoldGEPICmp(GEPRHS, GEPLHS->getOperand(0),
+ ICmpInst::getSwappedPredicate(Cond), I);
+
+ // If the other GEP has all zero indices, recurse.
+ if (GEPRHS->hasAllZeroIndices())
+ return FoldGEPICmp(GEPLHS, GEPRHS->getOperand(0), Cond, I);
+
+ bool GEPsInBounds = GEPLHS->isInBounds() && GEPRHS->isInBounds();
+ if (GEPLHS->getNumOperands() == GEPRHS->getNumOperands()) {
+ // If the GEPs only differ by one index, compare it.
+ unsigned NumDifferences = 0; // Keep track of # differences.
+ unsigned DiffOperand = 0; // The operand that differs.
+ for (unsigned i = 1, e = GEPRHS->getNumOperands(); i != e; ++i)
+ if (GEPLHS->getOperand(i) != GEPRHS->getOperand(i)) {
+ if (GEPLHS->getOperand(i)->getType()->getPrimitiveSizeInBits() !=
+ GEPRHS->getOperand(i)->getType()->getPrimitiveSizeInBits()) {
+ // Irreconcilable differences.
+ NumDifferences = 2;
+ break;
+ } else {
+ if (NumDifferences++) break;
+ DiffOperand = i;
+ }
+ }
+
+ if (NumDifferences == 0) // SAME GEP?
+ return replaceInstUsesWith(I, // No comparison is needed here.
+ Builder->getInt1(ICmpInst::isTrueWhenEqual(Cond)));
+
+ else if (NumDifferences == 1 && GEPsInBounds) {
+ Value *LHSV = GEPLHS->getOperand(DiffOperand);
+ Value *RHSV = GEPRHS->getOperand(DiffOperand);
+ // Make sure we do a signed comparison here.
+ return new ICmpInst(ICmpInst::getSignedPredicate(Cond), LHSV, RHSV);
+ }
+ }
+
+ // Only lower this if the icmp is the only user of the GEP or if we expect
+ // the result to fold to a constant!
+ if (GEPsInBounds && (isa<ConstantExpr>(GEPLHS) || GEPLHS->hasOneUse()) &&
+ (isa<ConstantExpr>(GEPRHS) || GEPRHS->hasOneUse())) {
+ // ((gep Ptr, OFFSET1) cmp (gep Ptr, OFFSET2) ---> (OFFSET1 cmp OFFSET2)
+ Value *L = EmitGEPOffset(GEPLHS);
+ Value *R = EmitGEPOffset(GEPRHS);
+ return new ICmpInst(ICmpInst::getSignedPredicate(Cond), L, R);
+ }
+ }
+
+ // Try convert this to an indexed compare by looking through PHIs/casts as a
+ // last resort.
+ return transformToIndexedCompare(GEPLHS, RHS, Cond, DL);
+}
+
+Instruction *InstCombiner::FoldAllocaCmp(ICmpInst &ICI, AllocaInst *Alloca,
+ Value *Other) {
+ assert(ICI.isEquality() && "Cannot fold non-equality comparison.");
+
+ // It would be tempting to fold away comparisons between allocas and any
+ // pointer not based on that alloca (e.g. an argument). However, even
+ // though such pointers cannot alias, they can still compare equal.
+ //
+ // But LLVM doesn't specify where allocas get their memory, so if the alloca
+ // doesn't escape we can argue that it's impossible to guess its value, and we
+ // can therefore act as if any such guesses are wrong.
+ //
+ // The code below checks that the alloca doesn't escape, and that it's only
+ // used in a comparison once (the current instruction). The
+ // single-comparison-use condition ensures that we're trivially folding all
+ // comparisons against the alloca consistently, and avoids the risk of
+ // erroneously folding a comparison of the pointer with itself.
+
+ unsigned MaxIter = 32; // Break cycles and bound to constant-time.
+
+ SmallVector<Use *, 32> Worklist;
+ for (Use &U : Alloca->uses()) {
+ if (Worklist.size() >= MaxIter)
+ return nullptr;
+ Worklist.push_back(&U);
+ }
+
+ unsigned NumCmps = 0;
+ while (!Worklist.empty()) {
+ assert(Worklist.size() <= MaxIter);
+ Use *U = Worklist.pop_back_val();
+ Value *V = U->getUser();
+ --MaxIter;
+
+ if (isa<BitCastInst>(V) || isa<GetElementPtrInst>(V) || isa<PHINode>(V) ||
+ isa<SelectInst>(V)) {
+ // Track the uses.
+ } else if (isa<LoadInst>(V)) {
+ // Loading from the pointer doesn't escape it.
+ continue;
+ } else if (auto *SI = dyn_cast<StoreInst>(V)) {
+ // Storing *to* the pointer is fine, but storing the pointer escapes it.
+ if (SI->getValueOperand() == U->get())
+ return nullptr;
+ continue;
+ } else if (isa<ICmpInst>(V)) {
+ if (NumCmps++)
+ return nullptr; // Found more than one cmp.
+ continue;
+ } else if (auto *Intrin = dyn_cast<IntrinsicInst>(V)) {
+ switch (Intrin->getIntrinsicID()) {
+ // These intrinsics don't escape or compare the pointer. Memset is safe
+ // because we don't allow ptrtoint. Memcpy and memmove are safe because
+ // we don't allow stores, so src cannot point to V.
+ case Intrinsic::lifetime_start: case Intrinsic::lifetime_end:
+ case Intrinsic::dbg_declare: case Intrinsic::dbg_value:
+ case Intrinsic::memcpy: case Intrinsic::memmove: case Intrinsic::memset:
+ continue;
+ default:
+ return nullptr;
+ }
+ } else {
+ return nullptr;
+ }
+ for (Use &U : V->uses()) {
+ if (Worklist.size() >= MaxIter)
+ return nullptr;
+ Worklist.push_back(&U);
+ }
+ }
+
+ Type *CmpTy = CmpInst::makeCmpResultType(Other->getType());
+ return replaceInstUsesWith(
+ ICI,
+ ConstantInt::get(CmpTy, !CmpInst::isTrueWhenEqual(ICI.getPredicate())));
+}
+
+/// Fold "icmp pred (X+CI), X".
+Instruction *InstCombiner::FoldICmpAddOpCst(Instruction &ICI,
+ Value *X, ConstantInt *CI,
+ ICmpInst::Predicate Pred) {
+ // From this point on, we know that (X+C <= X) --> (X+C < X) because C != 0,
+ // so the values can never be equal. Similarly for all other "or equals"
+ // operators.
+
+ // (X+1) <u X --> X >u (MAXUINT-1) --> X == 255
+ // (X+2) <u X --> X >u (MAXUINT-2) --> X > 253
+ // (X+MAXUINT) <u X --> X >u (MAXUINT-MAXUINT) --> X != 0
+ if (Pred == ICmpInst::ICMP_ULT || Pred == ICmpInst::ICMP_ULE) {
+ Value *R =
+ ConstantExpr::getSub(ConstantInt::getAllOnesValue(CI->getType()), CI);
+ return new ICmpInst(ICmpInst::ICMP_UGT, X, R);
+ }
+
+ // (X+1) >u X --> X <u (0-1) --> X != 255
+ // (X+2) >u X --> X <u (0-2) --> X <u 254
+ // (X+MAXUINT) >u X --> X <u (0-MAXUINT) --> X <u 1 --> X == 0
+ if (Pred == ICmpInst::ICMP_UGT || Pred == ICmpInst::ICMP_UGE)
+ return new ICmpInst(ICmpInst::ICMP_ULT, X, ConstantExpr::getNeg(CI));
+
+ unsigned BitWidth = CI->getType()->getPrimitiveSizeInBits();
+ ConstantInt *SMax = ConstantInt::get(X->getContext(),
+ APInt::getSignedMaxValue(BitWidth));
+
+ // (X+ 1) <s X --> X >s (MAXSINT-1) --> X == 127
+ // (X+ 2) <s X --> X >s (MAXSINT-2) --> X >s 125
+ // (X+MAXSINT) <s X --> X >s (MAXSINT-MAXSINT) --> X >s 0
+ // (X+MINSINT) <s X --> X >s (MAXSINT-MINSINT) --> X >s -1
+ // (X+ -2) <s X --> X >s (MAXSINT- -2) --> X >s 126
+ // (X+ -1) <s X --> X >s (MAXSINT- -1) --> X != 127
+ if (Pred == ICmpInst::ICMP_SLT || Pred == ICmpInst::ICMP_SLE)
+ return new ICmpInst(ICmpInst::ICMP_SGT, X, ConstantExpr::getSub(SMax, CI));
+
+ // (X+ 1) >s X --> X <s (MAXSINT-(1-1)) --> X != 127
+ // (X+ 2) >s X --> X <s (MAXSINT-(2-1)) --> X <s 126
+ // (X+MAXSINT) >s X --> X <s (MAXSINT-(MAXSINT-1)) --> X <s 1
+ // (X+MINSINT) >s X --> X <s (MAXSINT-(MINSINT-1)) --> X <s -2
+ // (X+ -2) >s X --> X <s (MAXSINT-(-2-1)) --> X <s -126
+ // (X+ -1) >s X --> X <s (MAXSINT-(-1-1)) --> X == -128
+
+ assert(Pred == ICmpInst::ICMP_SGT || Pred == ICmpInst::ICMP_SGE);
+ Constant *C = Builder->getInt(CI->getValue()-1);
+ return new ICmpInst(ICmpInst::ICMP_SLT, X, ConstantExpr::getSub(SMax, C));
+}
+
+/// Fold "icmp pred, ([su]div X, DivRHS), CmpRHS" where DivRHS and CmpRHS are
+/// both known to be integer constants.
+Instruction *InstCombiner::FoldICmpDivCst(ICmpInst &ICI, BinaryOperator *DivI,
+ ConstantInt *DivRHS) {
+ ConstantInt *CmpRHS = cast<ConstantInt>(ICI.getOperand(1));
+ const APInt &CmpRHSV = CmpRHS->getValue();
+
+ // FIXME: If the operand types don't match the type of the divide
+ // then don't attempt this transform. The code below doesn't have the
+ // logic to deal with a signed divide and an unsigned compare (and
+ // vice versa). This is because (x /s C1) <s C2 produces different
+ // results than (x /s C1) <u C2 or (x /u C1) <s C2 or even
+ // (x /u C1) <u C2. Simply casting the operands and result won't
+ // work. :( The if statement below tests that condition and bails
+ // if it finds it.
+ bool DivIsSigned = DivI->getOpcode() == Instruction::SDiv;
+ if (!ICI.isEquality() && DivIsSigned != ICI.isSigned())
+ return nullptr;
+ if (DivRHS->isZero())
+ return nullptr; // The ProdOV computation fails on divide by zero.
+ if (DivIsSigned && DivRHS->isAllOnesValue())
+ return nullptr; // The overflow computation also screws up here
+ if (DivRHS->isOne()) {
+ // This eliminates some funny cases with INT_MIN.
+ ICI.setOperand(0, DivI->getOperand(0)); // X/1 == X.
+ return &ICI;
+ }
+
+ // Compute Prod = CI * DivRHS. We are essentially solving an equation
+ // of form X/C1=C2. We solve for X by multiplying C1 (DivRHS) and
+ // C2 (CI). By solving for X we can turn this into a range check
+ // instead of computing a divide.
+ Constant *Prod = ConstantExpr::getMul(CmpRHS, DivRHS);
+
+ // Determine if the product overflows by seeing if the product is
+ // not equal to the divide. Make sure we do the same kind of divide
+ // as in the LHS instruction that we're folding.
+ bool ProdOV = (DivIsSigned ? ConstantExpr::getSDiv(Prod, DivRHS) :
+ ConstantExpr::getUDiv(Prod, DivRHS)) != CmpRHS;
+
+ // Get the ICmp opcode
+ ICmpInst::Predicate Pred = ICI.getPredicate();
+
+ // If the division is known to be exact, then there is no remainder from the
+ // divide, so the covered range size is unit, otherwise it is the divisor.
+ ConstantInt *RangeSize = DivI->isExact() ? getOne(Prod) : DivRHS;
+
+ // Figure out the interval that is being checked. For example, a comparison
+ // like "X /u 5 == 0" is really checking that X is in the interval [0, 5).
+ // Compute this interval based on the constants involved and the signedness of
+ // the compare/divide. This computes a half-open interval, keeping track of
+ // whether either value in the interval overflows. After analysis each
+ // overflow variable is set to 0 if it's corresponding bound variable is valid
+ // -1 if overflowed off the bottom end, or +1 if overflowed off the top end.
+ int LoOverflow = 0, HiOverflow = 0;
+ Constant *LoBound = nullptr, *HiBound = nullptr;
+
+ if (!DivIsSigned) { // udiv
+ // e.g. X/5 op 3 --> [15, 20)
+ LoBound = Prod;
+ HiOverflow = LoOverflow = ProdOV;
+ if (!HiOverflow) {
+ // If this is not an exact divide, then many values in the range collapse
+ // to the same result value.
+ HiOverflow = AddWithOverflow(HiBound, LoBound, RangeSize, false);
+ }
+ } else if (DivRHS->getValue().isStrictlyPositive()) { // Divisor is > 0.
+ if (CmpRHSV == 0) { // (X / pos) op 0
+ // Can't overflow. e.g. X/2 op 0 --> [-1, 2)
+ LoBound = ConstantExpr::getNeg(SubOne(RangeSize));
+ HiBound = RangeSize;
+ } else if (CmpRHSV.isStrictlyPositive()) { // (X / pos) op pos
+ LoBound = Prod; // e.g. X/5 op 3 --> [15, 20)
+ HiOverflow = LoOverflow = ProdOV;
+ if (!HiOverflow)
+ HiOverflow = AddWithOverflow(HiBound, Prod, RangeSize, true);
+ } else { // (X / pos) op neg
+ // e.g. X/5 op -3 --> [-15-4, -15+1) --> [-19, -14)
+ HiBound = AddOne(Prod);
+ LoOverflow = HiOverflow = ProdOV ? -1 : 0;
+ if (!LoOverflow) {
+ ConstantInt *DivNeg =cast<ConstantInt>(ConstantExpr::getNeg(RangeSize));
+ LoOverflow = AddWithOverflow(LoBound, HiBound, DivNeg, true) ? -1 : 0;
+ }
+ }
+ } else if (DivRHS->isNegative()) { // Divisor is < 0.
+ if (DivI->isExact())
+ RangeSize = cast<ConstantInt>(ConstantExpr::getNeg(RangeSize));
+ if (CmpRHSV == 0) { // (X / neg) op 0
+ // e.g. X/-5 op 0 --> [-4, 5)
+ LoBound = AddOne(RangeSize);
+ HiBound = cast<ConstantInt>(ConstantExpr::getNeg(RangeSize));
+ if (HiBound == DivRHS) { // -INTMIN = INTMIN
+ HiOverflow = 1; // [INTMIN+1, overflow)
+ HiBound = nullptr; // e.g. X/INTMIN = 0 --> X > INTMIN
+ }
+ } else if (CmpRHSV.isStrictlyPositive()) { // (X / neg) op pos
+ // e.g. X/-5 op 3 --> [-19, -14)
+ HiBound = AddOne(Prod);
+ HiOverflow = LoOverflow = ProdOV ? -1 : 0;
+ if (!LoOverflow)
+ LoOverflow = AddWithOverflow(LoBound, HiBound, RangeSize, true) ? -1:0;
+ } else { // (X / neg) op neg
+ LoBound = Prod; // e.g. X/-5 op -3 --> [15, 20)
+ LoOverflow = HiOverflow = ProdOV;
+ if (!HiOverflow)
+ HiOverflow = SubWithOverflow(HiBound, Prod, RangeSize, true);
+ }
+
+ // Dividing by a negative swaps the condition. LT <-> GT
+ Pred = ICmpInst::getSwappedPredicate(Pred);
+ }
+
+ Value *X = DivI->getOperand(0);
+ switch (Pred) {
+ default: llvm_unreachable("Unhandled icmp opcode!");
+ case ICmpInst::ICMP_EQ:
+ if (LoOverflow && HiOverflow)
+ return replaceInstUsesWith(ICI, Builder->getFalse());
+ if (HiOverflow)
+ return new ICmpInst(DivIsSigned ? ICmpInst::ICMP_SGE :
+ ICmpInst::ICMP_UGE, X, LoBound);
+ if (LoOverflow)
+ return new ICmpInst(DivIsSigned ? ICmpInst::ICMP_SLT :
+ ICmpInst::ICMP_ULT, X, HiBound);
+ return replaceInstUsesWith(ICI, InsertRangeTest(X, LoBound, HiBound,
+ DivIsSigned, true));
+ case ICmpInst::ICMP_NE:
+ if (LoOverflow && HiOverflow)
+ return replaceInstUsesWith(ICI, Builder->getTrue());
+ if (HiOverflow)
+ return new ICmpInst(DivIsSigned ? ICmpInst::ICMP_SLT :
+ ICmpInst::ICMP_ULT, X, LoBound);
+ if (LoOverflow)
+ return new ICmpInst(DivIsSigned ? ICmpInst::ICMP_SGE :
+ ICmpInst::ICMP_UGE, X, HiBound);
+ return replaceInstUsesWith(ICI, InsertRangeTest(X, LoBound, HiBound,
+ DivIsSigned, false));
+ case ICmpInst::ICMP_ULT:
+ case ICmpInst::ICMP_SLT:
+ if (LoOverflow == +1) // Low bound is greater than input range.
+ return replaceInstUsesWith(ICI, Builder->getTrue());
+ if (LoOverflow == -1) // Low bound is less than input range.
+ return replaceInstUsesWith(ICI, Builder->getFalse());
+ return new ICmpInst(Pred, X, LoBound);
+ case ICmpInst::ICMP_UGT:
+ case ICmpInst::ICMP_SGT:
+ if (HiOverflow == +1) // High bound greater than input range.
+ return replaceInstUsesWith(ICI, Builder->getFalse());
+ if (HiOverflow == -1) // High bound less than input range.
+ return replaceInstUsesWith(ICI, Builder->getTrue());
+ if (Pred == ICmpInst::ICMP_UGT)
+ return new ICmpInst(ICmpInst::ICMP_UGE, X, HiBound);
+ return new ICmpInst(ICmpInst::ICMP_SGE, X, HiBound);
+ }
+}
+
+/// Handle "icmp(([al]shr X, cst1), cst2)".
+Instruction *InstCombiner::FoldICmpShrCst(ICmpInst &ICI, BinaryOperator *Shr,
+ ConstantInt *ShAmt) {
+ const APInt &CmpRHSV = cast<ConstantInt>(ICI.getOperand(1))->getValue();
+
+ // Check that the shift amount is in range. If not, don't perform
+ // undefined shifts. When the shift is visited it will be
+ // simplified.
+ uint32_t TypeBits = CmpRHSV.getBitWidth();
+ uint32_t ShAmtVal = (uint32_t)ShAmt->getLimitedValue(TypeBits);
+ if (ShAmtVal >= TypeBits || ShAmtVal == 0)
+ return nullptr;
+
+ if (!ICI.isEquality()) {
+ // If we have an unsigned comparison and an ashr, we can't simplify this.
+ // Similarly for signed comparisons with lshr.
+ if (ICI.isSigned() != (Shr->getOpcode() == Instruction::AShr))
+ return nullptr;
+
+ // Otherwise, all lshr and most exact ashr's are equivalent to a udiv/sdiv
+ // by a power of 2. Since we already have logic to simplify these,
+ // transform to div and then simplify the resultant comparison.
+ if (Shr->getOpcode() == Instruction::AShr &&
+ (!Shr->isExact() || ShAmtVal == TypeBits - 1))
+ return nullptr;
+
+ // Revisit the shift (to delete it).
+ Worklist.Add(Shr);
+
+ Constant *DivCst =
+ ConstantInt::get(Shr->getType(), APInt::getOneBitSet(TypeBits, ShAmtVal));
+
+ Value *Tmp =
+ Shr->getOpcode() == Instruction::AShr ?
+ Builder->CreateSDiv(Shr->getOperand(0), DivCst, "", Shr->isExact()) :
+ Builder->CreateUDiv(Shr->getOperand(0), DivCst, "", Shr->isExact());
+
+ ICI.setOperand(0, Tmp);
+
+ // If the builder folded the binop, just return it.
+ BinaryOperator *TheDiv = dyn_cast<BinaryOperator>(Tmp);
+ if (!TheDiv)
+ return &ICI;
+
+ // Otherwise, fold this div/compare.
+ assert(TheDiv->getOpcode() == Instruction::SDiv ||
+ TheDiv->getOpcode() == Instruction::UDiv);
+
+ Instruction *Res = FoldICmpDivCst(ICI, TheDiv, cast<ConstantInt>(DivCst));
+ assert(Res && "This div/cst should have folded!");
+ return Res;
+ }
+
+ // If we are comparing against bits always shifted out, the
+ // comparison cannot succeed.
+ APInt Comp = CmpRHSV << ShAmtVal;
+ ConstantInt *ShiftedCmpRHS = Builder->getInt(Comp);
+ if (Shr->getOpcode() == Instruction::LShr)
+ Comp = Comp.lshr(ShAmtVal);
+ else
+ Comp = Comp.ashr(ShAmtVal);
+
+ if (Comp != CmpRHSV) { // Comparing against a bit that we know is zero.
+ bool IsICMP_NE = ICI.getPredicate() == ICmpInst::ICMP_NE;
+ Constant *Cst = Builder->getInt1(IsICMP_NE);
+ return replaceInstUsesWith(ICI, Cst);
+ }
+
+ // Otherwise, check to see if the bits shifted out are known to be zero.
+ // If so, we can compare against the unshifted value:
+ // (X & 4) >> 1 == 2 --> (X & 4) == 4.
+ if (Shr->hasOneUse() && Shr->isExact())
+ return new ICmpInst(ICI.getPredicate(), Shr->getOperand(0), ShiftedCmpRHS);
+
+ if (Shr->hasOneUse()) {
+ // Otherwise strength reduce the shift into an and.
+ APInt Val(APInt::getHighBitsSet(TypeBits, TypeBits - ShAmtVal));
+ Constant *Mask = Builder->getInt(Val);
+
+ Value *And = Builder->CreateAnd(Shr->getOperand(0),
+ Mask, Shr->getName()+".mask");
+ return new ICmpInst(ICI.getPredicate(), And, ShiftedCmpRHS);
+ }
+ return nullptr;
+}
+
+/// Handle "(icmp eq/ne (ashr/lshr const2, A), const1)" ->
+/// (icmp eq/ne A, Log2(const2/const1)) ->
+/// (icmp eq/ne A, Log2(const2) - Log2(const1)).
+Instruction *InstCombiner::FoldICmpCstShrCst(ICmpInst &I, Value *Op, Value *A,
+ ConstantInt *CI1,
+ ConstantInt *CI2) {
+ assert(I.isEquality() && "Cannot fold icmp gt/lt");
+
+ auto getConstant = [&I, this](bool IsTrue) {
+ if (I.getPredicate() == I.ICMP_NE)
+ IsTrue = !IsTrue;
+ return replaceInstUsesWith(I, ConstantInt::get(I.getType(), IsTrue));
+ };
+
+ auto getICmp = [&I](CmpInst::Predicate Pred, Value *LHS, Value *RHS) {
+ if (I.getPredicate() == I.ICMP_NE)
+ Pred = CmpInst::getInversePredicate(Pred);
+ return new ICmpInst(Pred, LHS, RHS);
+ };
+
+ const APInt &AP1 = CI1->getValue();
+ const APInt &AP2 = CI2->getValue();
+
+ // Don't bother doing any work for cases which InstSimplify handles.
+ if (AP2 == 0)
+ return nullptr;
+ bool IsAShr = isa<AShrOperator>(Op);
+ if (IsAShr) {
+ if (AP2.isAllOnesValue())
+ return nullptr;
+ if (AP2.isNegative() != AP1.isNegative())
+ return nullptr;
+ if (AP2.sgt(AP1))
+ return nullptr;
+ }
+
+ if (!AP1)
+ // 'A' must be large enough to shift out the highest set bit.
+ return getICmp(I.ICMP_UGT, A,
+ ConstantInt::get(A->getType(), AP2.logBase2()));
+
+ if (AP1 == AP2)
+ return getICmp(I.ICMP_EQ, A, ConstantInt::getNullValue(A->getType()));
+
+ int Shift;
+ if (IsAShr && AP1.isNegative())
+ Shift = AP1.countLeadingOnes() - AP2.countLeadingOnes();
+ else
+ Shift = AP1.countLeadingZeros() - AP2.countLeadingZeros();
+
+ if (Shift > 0) {
+ if (IsAShr && AP1 == AP2.ashr(Shift)) {
+ // There are multiple solutions if we are comparing against -1 and the LHS
+ // of the ashr is not a power of two.
+ if (AP1.isAllOnesValue() && !AP2.isPowerOf2())
+ return getICmp(I.ICMP_UGE, A, ConstantInt::get(A->getType(), Shift));
+ return getICmp(I.ICMP_EQ, A, ConstantInt::get(A->getType(), Shift));
+ } else if (AP1 == AP2.lshr(Shift)) {
+ return getICmp(I.ICMP_EQ, A, ConstantInt::get(A->getType(), Shift));
+ }
+ }
+ // Shifting const2 will never be equal to const1.
+ return getConstant(false);
+}
+
+/// Handle "(icmp eq/ne (shl const2, A), const1)" ->
+/// (icmp eq/ne A, TrailingZeros(const1) - TrailingZeros(const2)).
+Instruction *InstCombiner::FoldICmpCstShlCst(ICmpInst &I, Value *Op, Value *A,
+ ConstantInt *CI1,
+ ConstantInt *CI2) {
+ assert(I.isEquality() && "Cannot fold icmp gt/lt");
+
+ auto getConstant = [&I, this](bool IsTrue) {
+ if (I.getPredicate() == I.ICMP_NE)
+ IsTrue = !IsTrue;
+ return replaceInstUsesWith(I, ConstantInt::get(I.getType(), IsTrue));
+ };
+
+ auto getICmp = [&I](CmpInst::Predicate Pred, Value *LHS, Value *RHS) {
+ if (I.getPredicate() == I.ICMP_NE)
+ Pred = CmpInst::getInversePredicate(Pred);
+ return new ICmpInst(Pred, LHS, RHS);
+ };
+
+ const APInt &AP1 = CI1->getValue();
+ const APInt &AP2 = CI2->getValue();
+
+ // Don't bother doing any work for cases which InstSimplify handles.
+ if (AP2 == 0)
+ return nullptr;
+
+ unsigned AP2TrailingZeros = AP2.countTrailingZeros();
+
+ if (!AP1 && AP2TrailingZeros != 0)
+ return getICmp(I.ICMP_UGE, A,
+ ConstantInt::get(A->getType(), AP2.getBitWidth() - AP2TrailingZeros));
+
+ if (AP1 == AP2)
+ return getICmp(I.ICMP_EQ, A, ConstantInt::getNullValue(A->getType()));
+
+ // Get the distance between the lowest bits that are set.
+ int Shift = AP1.countTrailingZeros() - AP2TrailingZeros;
+
+ if (Shift > 0 && AP2.shl(Shift) == AP1)
+ return getICmp(I.ICMP_EQ, A, ConstantInt::get(A->getType(), Shift));
+
+ // Shifting const2 will never be equal to const1.
+ return getConstant(false);
+}
+
+/// Handle "icmp (instr, intcst)".
+Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI,
+ Instruction *LHSI,
+ ConstantInt *RHS) {
+ const APInt &RHSV = RHS->getValue();
+
+ switch (LHSI->getOpcode()) {
+ case Instruction::Trunc:
+ if (RHS->isOne() && RHSV.getBitWidth() > 1) {
+ // icmp slt trunc(signum(V)) 1 --> icmp slt V, 1
+ Value *V = nullptr;
+ if (ICI.getPredicate() == ICmpInst::ICMP_SLT &&
+ match(LHSI->getOperand(0), m_Signum(m_Value(V))))
+ return new ICmpInst(ICmpInst::ICMP_SLT, V,
+ ConstantInt::get(V->getType(), 1));
+ }
+ if (ICI.isEquality() && LHSI->hasOneUse()) {
+ // Simplify icmp eq (trunc x to i8), 42 -> icmp eq x, 42|highbits if all
+ // of the high bits truncated out of x are known.
+ unsigned DstBits = LHSI->getType()->getPrimitiveSizeInBits(),
+ SrcBits = LHSI->getOperand(0)->getType()->getPrimitiveSizeInBits();
+ APInt KnownZero(SrcBits, 0), KnownOne(SrcBits, 0);
+ computeKnownBits(LHSI->getOperand(0), KnownZero, KnownOne, 0, &ICI);
+
+ // If all the high bits are known, we can do this xform.
+ if ((KnownZero|KnownOne).countLeadingOnes() >= SrcBits-DstBits) {
+ // Pull in the high bits from known-ones set.
+ APInt NewRHS = RHS->getValue().zext(SrcBits);
+ NewRHS |= KnownOne & APInt::getHighBitsSet(SrcBits, SrcBits-DstBits);
+ return new ICmpInst(ICI.getPredicate(), LHSI->getOperand(0),
+ Builder->getInt(NewRHS));
+ }
+ }
+ break;
+
+ case Instruction::Xor: // (icmp pred (xor X, XorCst), CI)
+ if (ConstantInt *XorCst = dyn_cast<ConstantInt>(LHSI->getOperand(1))) {
+ // If this is a comparison that tests the signbit (X < 0) or (x > -1),
+ // fold the xor.
+ if ((ICI.getPredicate() == ICmpInst::ICMP_SLT && RHSV == 0) ||
+ (ICI.getPredicate() == ICmpInst::ICMP_SGT && RHSV.isAllOnesValue())) {
+ Value *CompareVal = LHSI->getOperand(0);
+
+ // If the sign bit of the XorCst is not set, there is no change to
+ // the operation, just stop using the Xor.
+ if (!XorCst->isNegative()) {
+ ICI.setOperand(0, CompareVal);
+ Worklist.Add(LHSI);
+ return &ICI;
+ }
+
+ // Was the old condition true if the operand is positive?
+ bool isTrueIfPositive = ICI.getPredicate() == ICmpInst::ICMP_SGT;
+
+ // If so, the new one isn't.
+ isTrueIfPositive ^= true;
+
+ if (isTrueIfPositive)
+ return new ICmpInst(ICmpInst::ICMP_SGT, CompareVal,
+ SubOne(RHS));
+ else
+ return new ICmpInst(ICmpInst::ICMP_SLT, CompareVal,
+ AddOne(RHS));
+ }
+
+ if (LHSI->hasOneUse()) {
+ // (icmp u/s (xor A SignBit), C) -> (icmp s/u A, (xor C SignBit))
+ if (!ICI.isEquality() && XorCst->getValue().isSignBit()) {
+ const APInt &SignBit = XorCst->getValue();
+ ICmpInst::Predicate Pred = ICI.isSigned()
+ ? ICI.getUnsignedPredicate()
+ : ICI.getSignedPredicate();
+ return new ICmpInst(Pred, LHSI->getOperand(0),
+ Builder->getInt(RHSV ^ SignBit));
+ }
+
+ // (icmp u/s (xor A ~SignBit), C) -> (icmp s/u (xor C ~SignBit), A)
+ if (!ICI.isEquality() && XorCst->isMaxValue(true)) {
+ const APInt &NotSignBit = XorCst->getValue();
+ ICmpInst::Predicate Pred = ICI.isSigned()
+ ? ICI.getUnsignedPredicate()
+ : ICI.getSignedPredicate();
+ Pred = ICI.getSwappedPredicate(Pred);
+ return new ICmpInst(Pred, LHSI->getOperand(0),
+ Builder->getInt(RHSV ^ NotSignBit));
+ }
+ }
+
+ // (icmp ugt (xor X, C), ~C) -> (icmp ult X, C)
+ // iff -C is a power of 2
+ if (ICI.getPredicate() == ICmpInst::ICMP_UGT &&
+ XorCst->getValue() == ~RHSV && (RHSV + 1).isPowerOf2())
+ return new ICmpInst(ICmpInst::ICMP_ULT, LHSI->getOperand(0), XorCst);
+
+ // (icmp ult (xor X, C), -C) -> (icmp uge X, C)
+ // iff -C is a power of 2
+ if (ICI.getPredicate() == ICmpInst::ICMP_ULT &&
+ XorCst->getValue() == -RHSV && RHSV.isPowerOf2())
+ return new ICmpInst(ICmpInst::ICMP_UGE, LHSI->getOperand(0), XorCst);
+ }
+ break;
+ case Instruction::And: // (icmp pred (and X, AndCst), RHS)
+ if (LHSI->hasOneUse() && isa<ConstantInt>(LHSI->getOperand(1)) &&
+ LHSI->getOperand(0)->hasOneUse()) {
+ ConstantInt *AndCst = cast<ConstantInt>(LHSI->getOperand(1));
+
+ // If the LHS is an AND of a truncating cast, we can widen the
+ // and/compare to be the input width without changing the value
+ // produced, eliminating a cast.
+ if (TruncInst *Cast = dyn_cast<TruncInst>(LHSI->getOperand(0))) {
+ // We can do this transformation if either the AND constant does not
+ // have its sign bit set or if it is an equality comparison.
+ // Extending a relational comparison when we're checking the sign
+ // bit would not work.
+ if (ICI.isEquality() ||
+ (!AndCst->isNegative() && RHSV.isNonNegative())) {
+ Value *NewAnd =
+ Builder->CreateAnd(Cast->getOperand(0),
+ ConstantExpr::getZExt(AndCst, Cast->getSrcTy()));
+ NewAnd->takeName(LHSI);
+ return new ICmpInst(ICI.getPredicate(), NewAnd,
+ ConstantExpr::getZExt(RHS, Cast->getSrcTy()));
+ }
+ }
+
+ // If the LHS is an AND of a zext, and we have an equality compare, we can
+ // shrink the and/compare to the smaller type, eliminating the cast.
+ if (ZExtInst *Cast = dyn_cast<ZExtInst>(LHSI->getOperand(0))) {
+ IntegerType *Ty = cast<IntegerType>(Cast->getSrcTy());
+ // Make sure we don't compare the upper bits, SimplifyDemandedBits
+ // should fold the icmp to true/false in that case.
+ if (ICI.isEquality() && RHSV.getActiveBits() <= Ty->getBitWidth()) {
+ Value *NewAnd =
+ Builder->CreateAnd(Cast->getOperand(0),
+ ConstantExpr::getTrunc(AndCst, Ty));
+ NewAnd->takeName(LHSI);
+ return new ICmpInst(ICI.getPredicate(), NewAnd,
+ ConstantExpr::getTrunc(RHS, Ty));
+ }
+ }
+
+ // If this is: (X >> C1) & C2 != C3 (where any shift and any compare
+ // could exist), turn it into (X & (C2 << C1)) != (C3 << C1). This
+ // happens a LOT in code produced by the C front-end, for bitfield
+ // access.
+ BinaryOperator *Shift = dyn_cast<BinaryOperator>(LHSI->getOperand(0));
+ if (Shift && !Shift->isShift())
+ Shift = nullptr;
+
+ ConstantInt *ShAmt;
+ ShAmt = Shift ? dyn_cast<ConstantInt>(Shift->getOperand(1)) : nullptr;
+
+ // This seemingly simple opportunity to fold away a shift turns out to
+ // be rather complicated. See PR17827
+ // ( http://llvm.org/bugs/show_bug.cgi?id=17827 ) for details.
+ if (ShAmt) {
+ bool CanFold = false;
+ unsigned ShiftOpcode = Shift->getOpcode();
+ if (ShiftOpcode == Instruction::AShr) {
+ // There may be some constraints that make this possible,
+ // but nothing simple has been discovered yet.
+ CanFold = false;
+ } else if (ShiftOpcode == Instruction::Shl) {
+ // For a left shift, we can fold if the comparison is not signed.
+ // We can also fold a signed comparison if the mask value and
+ // comparison value are not negative. These constraints may not be
+ // obvious, but we can prove that they are correct using an SMT
+ // solver.
+ if (!ICI.isSigned() || (!AndCst->isNegative() && !RHS->isNegative()))
+ CanFold = true;
+ } else if (ShiftOpcode == Instruction::LShr) {
+ // For a logical right shift, we can fold if the comparison is not
+ // signed. We can also fold a signed comparison if the shifted mask
+ // value and the shifted comparison value are not negative.
+ // These constraints may not be obvious, but we can prove that they
+ // are correct using an SMT solver.
+ if (!ICI.isSigned())
+ CanFold = true;
+ else {
+ ConstantInt *ShiftedAndCst =
+ cast<ConstantInt>(ConstantExpr::getShl(AndCst, ShAmt));
+ ConstantInt *ShiftedRHSCst =
+ cast<ConstantInt>(ConstantExpr::getShl(RHS, ShAmt));
+
+ if (!ShiftedAndCst->isNegative() && !ShiftedRHSCst->isNegative())
+ CanFold = true;
+ }
+ }
+
+ if (CanFold) {
+ Constant *NewCst;
+ if (ShiftOpcode == Instruction::Shl)
+ NewCst = ConstantExpr::getLShr(RHS, ShAmt);
+ else
+ NewCst = ConstantExpr::getShl(RHS, ShAmt);
+
+ // Check to see if we are shifting out any of the bits being
+ // compared.
+ if (ConstantExpr::get(ShiftOpcode, NewCst, ShAmt) != RHS) {
+ // If we shifted bits out, the fold is not going to work out.
+ // As a special case, check to see if this means that the
+ // result is always true or false now.
+ if (ICI.getPredicate() == ICmpInst::ICMP_EQ)
+ return replaceInstUsesWith(ICI, Builder->getFalse());
+ if (ICI.getPredicate() == ICmpInst::ICMP_NE)
+ return replaceInstUsesWith(ICI, Builder->getTrue());
+ } else {
+ ICI.setOperand(1, NewCst);
+ Constant *NewAndCst;
+ if (ShiftOpcode == Instruction::Shl)
+ NewAndCst = ConstantExpr::getLShr(AndCst, ShAmt);
+ else
+ NewAndCst = ConstantExpr::getShl(AndCst, ShAmt);
+ LHSI->setOperand(1, NewAndCst);
+ LHSI->setOperand(0, Shift->getOperand(0));
+ Worklist.Add(Shift); // Shift is dead.
+ return &ICI;
+ }
+ }
+ }
+
+ // Turn ((X >> Y) & C) == 0 into (X & (C << Y)) == 0. The later is
+ // preferable because it allows the C<<Y expression to be hoisted out
+ // of a loop if Y is invariant and X is not.
+ if (Shift && Shift->hasOneUse() && RHSV == 0 &&
+ ICI.isEquality() && !Shift->isArithmeticShift() &&
+ !isa<Constant>(Shift->getOperand(0))) {
+ // Compute C << Y.
+ Value *NS;
+ if (Shift->getOpcode() == Instruction::LShr) {
+ NS = Builder->CreateShl(AndCst, Shift->getOperand(1));
+ } else {
+ // Insert a logical shift.
+ NS = Builder->CreateLShr(AndCst, Shift->getOperand(1));
+ }
+
+ // Compute X & (C << Y).
+ Value *NewAnd =
+ Builder->CreateAnd(Shift->getOperand(0), NS, LHSI->getName());
+
+ ICI.setOperand(0, NewAnd);
+ return &ICI;
+ }
+
+ // (icmp pred (and (or (lshr X, Y), X), 1), 0) -->
+ // (icmp pred (and X, (or (shl 1, Y), 1), 0))
+ //
+ // iff pred isn't signed
+ {
+ Value *X, *Y, *LShr;
+ if (!ICI.isSigned() && RHSV == 0) {
+ if (match(LHSI->getOperand(1), m_One())) {
+ Constant *One = cast<Constant>(LHSI->getOperand(1));
+ Value *Or = LHSI->getOperand(0);
+ if (match(Or, m_Or(m_Value(LShr), m_Value(X))) &&
+ match(LShr, m_LShr(m_Specific(X), m_Value(Y)))) {
+ unsigned UsesRemoved = 0;
+ if (LHSI->hasOneUse())
+ ++UsesRemoved;
+ if (Or->hasOneUse())
+ ++UsesRemoved;
+ if (LShr->hasOneUse())
+ ++UsesRemoved;
+ Value *NewOr = nullptr;
+ // Compute X & ((1 << Y) | 1)
+ if (auto *C = dyn_cast<Constant>(Y)) {
+ if (UsesRemoved >= 1)
+ NewOr =
+ ConstantExpr::getOr(ConstantExpr::getNUWShl(One, C), One);
+ } else {
+ if (UsesRemoved >= 3)
+ NewOr = Builder->CreateOr(Builder->CreateShl(One, Y,
+ LShr->getName(),
+ /*HasNUW=*/true),
+ One, Or->getName());
+ }
+ if (NewOr) {
+ Value *NewAnd = Builder->CreateAnd(X, NewOr, LHSI->getName());
+ ICI.setOperand(0, NewAnd);
+ return &ICI;
+ }
+ }
+ }
+ }
+ }
+
+ // Replace ((X & AndCst) > RHSV) with ((X & AndCst) != 0), if any
+ // bit set in (X & AndCst) will produce a result greater than RHSV.
+ if (ICI.getPredicate() == ICmpInst::ICMP_UGT) {
+ unsigned NTZ = AndCst->getValue().countTrailingZeros();
+ if ((NTZ < AndCst->getBitWidth()) &&
+ APInt::getOneBitSet(AndCst->getBitWidth(), NTZ).ugt(RHSV))
+ return new ICmpInst(ICmpInst::ICMP_NE, LHSI,
+ Constant::getNullValue(RHS->getType()));
+ }
+ }
+
+ // Try to optimize things like "A[i]&42 == 0" to index computations.
+ if (LoadInst *LI = dyn_cast<LoadInst>(LHSI->getOperand(0))) {
+ if (GetElementPtrInst *GEP =
+ dyn_cast<GetElementPtrInst>(LI->getOperand(0)))
+ if (GlobalVariable *GV = dyn_cast<GlobalVariable>(GEP->getOperand(0)))
+ if (GV->isConstant() && GV->hasDefinitiveInitializer() &&
+ !LI->isVolatile() && isa<ConstantInt>(LHSI->getOperand(1))) {
+ ConstantInt *C = cast<ConstantInt>(LHSI->getOperand(1));
+ if (Instruction *Res = FoldCmpLoadFromIndexedGlobal(GEP, GV,ICI, C))
+ return Res;
+ }
+ }
+
+ // X & -C == -C -> X > u ~C
+ // X & -C != -C -> X <= u ~C
+ // iff C is a power of 2
+ if (ICI.isEquality() && RHS == LHSI->getOperand(1) && (-RHSV).isPowerOf2())
+ return new ICmpInst(
+ ICI.getPredicate() == ICmpInst::ICMP_EQ ? ICmpInst::ICMP_UGT
+ : ICmpInst::ICMP_ULE,
+ LHSI->getOperand(0), SubOne(RHS));
+
+ // (icmp eq (and %A, C), 0) -> (icmp sgt (trunc %A), -1)
+ // iff C is a power of 2
+ if (ICI.isEquality() && LHSI->hasOneUse() && match(RHS, m_Zero())) {
+ if (auto *CI = dyn_cast<ConstantInt>(LHSI->getOperand(1))) {
+ const APInt &AI = CI->getValue();
+ int32_t ExactLogBase2 = AI.exactLogBase2();
+ if (ExactLogBase2 != -1 && DL.isLegalInteger(ExactLogBase2 + 1)) {
+ Type *NTy = IntegerType::get(ICI.getContext(), ExactLogBase2 + 1);
+ Value *Trunc = Builder->CreateTrunc(LHSI->getOperand(0), NTy);
+ return new ICmpInst(ICI.getPredicate() == ICmpInst::ICMP_EQ
+ ? ICmpInst::ICMP_SGE
+ : ICmpInst::ICMP_SLT,
+ Trunc, Constant::getNullValue(NTy));
+ }
+ }
+ }
+ break;
+
+ case Instruction::Or: {
+ if (RHS->isOne()) {
+ // icmp slt signum(V) 1 --> icmp slt V, 1
+ Value *V = nullptr;
+ if (ICI.getPredicate() == ICmpInst::ICMP_SLT &&
+ match(LHSI, m_Signum(m_Value(V))))
+ return new ICmpInst(ICmpInst::ICMP_SLT, V,
+ ConstantInt::get(V->getType(), 1));
+ }
+
+ if (!ICI.isEquality() || !RHS->isNullValue() || !LHSI->hasOneUse())
+ break;
+ Value *P, *Q;
+ if (match(LHSI, m_Or(m_PtrToInt(m_Value(P)), m_PtrToInt(m_Value(Q))))) {
+ // Simplify icmp eq (or (ptrtoint P), (ptrtoint Q)), 0
+ // -> and (icmp eq P, null), (icmp eq Q, null).
+ Value *ICIP = Builder->CreateICmp(ICI.getPredicate(), P,
+ Constant::getNullValue(P->getType()));
+ Value *ICIQ = Builder->CreateICmp(ICI.getPredicate(), Q,
+ Constant::getNullValue(Q->getType()));
+ Instruction *Op;
+ if (ICI.getPredicate() == ICmpInst::ICMP_EQ)
+ Op = BinaryOperator::CreateAnd(ICIP, ICIQ);
+ else
+ Op = BinaryOperator::CreateOr(ICIP, ICIQ);
+ return Op;
+ }
+ break;
+ }
+
+ case Instruction::Mul: { // (icmp pred (mul X, Val), CI)
+ ConstantInt *Val = dyn_cast<ConstantInt>(LHSI->getOperand(1));
+ if (!Val) break;
+
+ // If this is a signed comparison to 0 and the mul is sign preserving,
+ // use the mul LHS operand instead.
+ ICmpInst::Predicate pred = ICI.getPredicate();
+ if (isSignTest(pred, RHS) && !Val->isZero() &&
+ cast<BinaryOperator>(LHSI)->hasNoSignedWrap())
+ return new ICmpInst(Val->isNegative() ?
+ ICmpInst::getSwappedPredicate(pred) : pred,
+ LHSI->getOperand(0),
+ Constant::getNullValue(RHS->getType()));
+
+ break;
+ }
+
+ case Instruction::Shl: { // (icmp pred (shl X, ShAmt), CI)
+ uint32_t TypeBits = RHSV.getBitWidth();
+ ConstantInt *ShAmt = dyn_cast<ConstantInt>(LHSI->getOperand(1));
+ if (!ShAmt) {
+ Value *X;
+ // (1 << X) pred P2 -> X pred Log2(P2)
+ if (match(LHSI, m_Shl(m_One(), m_Value(X)))) {
+ bool RHSVIsPowerOf2 = RHSV.isPowerOf2();
+ ICmpInst::Predicate Pred = ICI.getPredicate();
+ if (ICI.isUnsigned()) {
+ if (!RHSVIsPowerOf2) {
+ // (1 << X) < 30 -> X <= 4
+ // (1 << X) <= 30 -> X <= 4
+ // (1 << X) >= 30 -> X > 4
+ // (1 << X) > 30 -> X > 4
+ if (Pred == ICmpInst::ICMP_ULT)
+ Pred = ICmpInst::ICMP_ULE;
+ else if (Pred == ICmpInst::ICMP_UGE)
+ Pred = ICmpInst::ICMP_UGT;
+ }
+ unsigned RHSLog2 = RHSV.logBase2();
+
+ // (1 << X) >= 2147483648 -> X >= 31 -> X == 31
+ // (1 << X) < 2147483648 -> X < 31 -> X != 31
+ if (RHSLog2 == TypeBits-1) {
+ if (Pred == ICmpInst::ICMP_UGE)
+ Pred = ICmpInst::ICMP_EQ;
+ else if (Pred == ICmpInst::ICMP_ULT)
+ Pred = ICmpInst::ICMP_NE;
+ }
+
+ return new ICmpInst(Pred, X,
+ ConstantInt::get(RHS->getType(), RHSLog2));
+ } else if (ICI.isSigned()) {
+ if (RHSV.isAllOnesValue()) {
+ // (1 << X) <= -1 -> X == 31
+ if (Pred == ICmpInst::ICMP_SLE)
+ return new ICmpInst(ICmpInst::ICMP_EQ, X,
+ ConstantInt::get(RHS->getType(), TypeBits-1));
+
+ // (1 << X) > -1 -> X != 31
+ if (Pred == ICmpInst::ICMP_SGT)
+ return new ICmpInst(ICmpInst::ICMP_NE, X,
+ ConstantInt::get(RHS->getType(), TypeBits-1));
+ } else if (!RHSV) {
+ // (1 << X) < 0 -> X == 31
+ // (1 << X) <= 0 -> X == 31
+ if (Pred == ICmpInst::ICMP_SLT || Pred == ICmpInst::ICMP_SLE)
+ return new ICmpInst(ICmpInst::ICMP_EQ, X,
+ ConstantInt::get(RHS->getType(), TypeBits-1));
+
+ // (1 << X) >= 0 -> X != 31
+ // (1 << X) > 0 -> X != 31
+ if (Pred == ICmpInst::ICMP_SGT || Pred == ICmpInst::ICMP_SGE)
+ return new ICmpInst(ICmpInst::ICMP_NE, X,
+ ConstantInt::get(RHS->getType(), TypeBits-1));
+ }
+ } else if (ICI.isEquality()) {
+ if (RHSVIsPowerOf2)
+ return new ICmpInst(
+ Pred, X, ConstantInt::get(RHS->getType(), RHSV.logBase2()));
+ }
+ }
+ break;
+ }
+
+ // Check that the shift amount is in range. If not, don't perform
+ // undefined shifts. When the shift is visited it will be
+ // simplified.
+ if (ShAmt->uge(TypeBits))
+ break;
+
+ if (ICI.isEquality()) {
+ // If we are comparing against bits always shifted out, the
+ // comparison cannot succeed.
+ Constant *Comp =
+ ConstantExpr::getShl(ConstantExpr::getLShr(RHS, ShAmt),
+ ShAmt);
+ if (Comp != RHS) {// Comparing against a bit that we know is zero.
+ bool IsICMP_NE = ICI.getPredicate() == ICmpInst::ICMP_NE;
+ Constant *Cst = Builder->getInt1(IsICMP_NE);
+ return replaceInstUsesWith(ICI, Cst);
+ }
+
+ // If the shift is NUW, then it is just shifting out zeros, no need for an
+ // AND.
+ if (cast<BinaryOperator>(LHSI)->hasNoUnsignedWrap())
+ return new ICmpInst(ICI.getPredicate(), LHSI->getOperand(0),
+ ConstantExpr::getLShr(RHS, ShAmt));
+
+ // If the shift is NSW and we compare to 0, then it is just shifting out
+ // sign bits, no need for an AND either.
+ if (cast<BinaryOperator>(LHSI)->hasNoSignedWrap() && RHSV == 0)
+ return new ICmpInst(ICI.getPredicate(), LHSI->getOperand(0),
+ ConstantExpr::getLShr(RHS, ShAmt));
+
+ if (LHSI->hasOneUse()) {
+ // Otherwise strength reduce the shift into an and.
+ uint32_t ShAmtVal = (uint32_t)ShAmt->getLimitedValue(TypeBits);
+ Constant *Mask = Builder->getInt(APInt::getLowBitsSet(TypeBits,
+ TypeBits - ShAmtVal));
+
+ Value *And =
+ Builder->CreateAnd(LHSI->getOperand(0),Mask, LHSI->getName()+".mask");
+ return new ICmpInst(ICI.getPredicate(), And,
+ ConstantExpr::getLShr(RHS, ShAmt));
+ }
+ }
+
+ // If this is a signed comparison to 0 and the shift is sign preserving,
+ // use the shift LHS operand instead.
+ ICmpInst::Predicate pred = ICI.getPredicate();
+ if (isSignTest(pred, RHS) &&
+ cast<BinaryOperator>(LHSI)->hasNoSignedWrap())
+ return new ICmpInst(pred,
+ LHSI->getOperand(0),
+ Constant::getNullValue(RHS->getType()));
+
+ // Otherwise, if this is a comparison of the sign bit, simplify to and/test.
+ bool TrueIfSigned = false;
+ if (LHSI->hasOneUse() &&
+ isSignBitCheck(ICI.getPredicate(), RHS, TrueIfSigned)) {
+ // (X << 31) <s 0 --> (X&1) != 0
+ Constant *Mask = ConstantInt::get(LHSI->getOperand(0)->getType(),
+ APInt::getOneBitSet(TypeBits,
+ TypeBits-ShAmt->getZExtValue()-1));
+ Value *And =
+ Builder->CreateAnd(LHSI->getOperand(0), Mask, LHSI->getName()+".mask");
+ return new ICmpInst(TrueIfSigned ? ICmpInst::ICMP_NE : ICmpInst::ICMP_EQ,
+ And, Constant::getNullValue(And->getType()));
+ }
+
+ // Transform (icmp pred iM (shl iM %v, N), CI)
+ // -> (icmp pred i(M-N) (trunc %v iM to i(M-N)), (trunc (CI>>N))
+ // Transform the shl to a trunc if (trunc (CI>>N)) has no loss and M-N.
+ // This enables to get rid of the shift in favor of a trunc which can be
+ // free on the target. It has the additional benefit of comparing to a
+ // smaller constant, which will be target friendly.
+ unsigned Amt = ShAmt->getLimitedValue(TypeBits-1);
+ if (LHSI->hasOneUse() &&
+ Amt != 0 && RHSV.countTrailingZeros() >= Amt) {
+ Type *NTy = IntegerType::get(ICI.getContext(), TypeBits - Amt);
+ Constant *NCI = ConstantExpr::getTrunc(
+ ConstantExpr::getAShr(RHS,
+ ConstantInt::get(RHS->getType(), Amt)),
+ NTy);
+ return new ICmpInst(ICI.getPredicate(),
+ Builder->CreateTrunc(LHSI->getOperand(0), NTy),
+ NCI);
+ }
+
+ break;
+ }
+
+ case Instruction::LShr: // (icmp pred (shr X, ShAmt), CI)
+ case Instruction::AShr: {
+ // Handle equality comparisons of shift-by-constant.
+ BinaryOperator *BO = cast<BinaryOperator>(LHSI);
+ if (ConstantInt *ShAmt = dyn_cast<ConstantInt>(LHSI->getOperand(1))) {
+ if (Instruction *Res = FoldICmpShrCst(ICI, BO, ShAmt))
+ return Res;
+ }
+
+ // Handle exact shr's.
+ if (ICI.isEquality() && BO->isExact() && BO->hasOneUse()) {
+ if (RHSV.isMinValue())
+ return new ICmpInst(ICI.getPredicate(), BO->getOperand(0), RHS);
+ }
+ break;
+ }
+
+ case Instruction::UDiv:
+ if (ConstantInt *DivLHS = dyn_cast<ConstantInt>(LHSI->getOperand(0))) {
+ Value *X = LHSI->getOperand(1);
+ const APInt &C1 = RHS->getValue();
+ const APInt &C2 = DivLHS->getValue();
+ assert(C2 != 0 && "udiv 0, X should have been simplified already.");
+ // (icmp ugt (udiv C2, X), C1) -> (icmp ule X, C2/(C1+1))
+ if (ICI.getPredicate() == ICmpInst::ICMP_UGT) {
+ assert(!C1.isMaxValue() &&
+ "icmp ugt X, UINT_MAX should have been simplified already.");
+ return new ICmpInst(ICmpInst::ICMP_ULE, X,
+ ConstantInt::get(X->getType(), C2.udiv(C1 + 1)));
+ }
+ // (icmp ult (udiv C2, X), C1) -> (icmp ugt X, C2/C1)
+ if (ICI.getPredicate() == ICmpInst::ICMP_ULT) {
+ assert(C1 != 0 && "icmp ult X, 0 should have been simplified already.");
+ return new ICmpInst(ICmpInst::ICMP_UGT, X,
+ ConstantInt::get(X->getType(), C2.udiv(C1)));
+ }
+ }
+ // fall-through
+ case Instruction::SDiv:
+ // Fold: icmp pred ([us]div X, C1), C2 -> range test
+ // Fold this div into the comparison, producing a range check.
+ // Determine, based on the divide type, what the range is being
+ // checked. If there is an overflow on the low or high side, remember
+ // it, otherwise compute the range [low, hi) bounding the new value.
+ // See: InsertRangeTest above for the kinds of replacements possible.
+ if (ConstantInt *DivRHS = dyn_cast<ConstantInt>(LHSI->getOperand(1)))
+ if (Instruction *R = FoldICmpDivCst(ICI, cast<BinaryOperator>(LHSI),
+ DivRHS))
+ return R;
+ break;
+
+ case Instruction::Sub: {
+ ConstantInt *LHSC = dyn_cast<ConstantInt>(LHSI->getOperand(0));
+ if (!LHSC) break;
+ const APInt &LHSV = LHSC->getValue();
+
+ // C1-X <u C2 -> (X|(C2-1)) == C1
+ // iff C1 & (C2-1) == C2-1
+ // C2 is a power of 2
+ if (ICI.getPredicate() == ICmpInst::ICMP_ULT && LHSI->hasOneUse() &&
+ RHSV.isPowerOf2() && (LHSV & (RHSV - 1)) == (RHSV - 1))
+ return new ICmpInst(ICmpInst::ICMP_EQ,
+ Builder->CreateOr(LHSI->getOperand(1), RHSV - 1),
+ LHSC);
+
+ // C1-X >u C2 -> (X|C2) != C1
+ // iff C1 & C2 == C2
+ // C2+1 is a power of 2
+ if (ICI.getPredicate() == ICmpInst::ICMP_UGT && LHSI->hasOneUse() &&
+ (RHSV + 1).isPowerOf2() && (LHSV & RHSV) == RHSV)
+ return new ICmpInst(ICmpInst::ICMP_NE,
+ Builder->CreateOr(LHSI->getOperand(1), RHSV), LHSC);
+ break;
+ }
+
+ case Instruction::Add:
+ // Fold: icmp pred (add X, C1), C2
+ if (!ICI.isEquality()) {
+ ConstantInt *LHSC = dyn_cast<ConstantInt>(LHSI->getOperand(1));
+ if (!LHSC) break;
+ const APInt &LHSV = LHSC->getValue();
+
+ ConstantRange CR = ICI.makeConstantRange(ICI.getPredicate(), RHSV)
+ .subtract(LHSV);
+
+ if (ICI.isSigned()) {
+ if (CR.getLower().isSignBit()) {
+ return new ICmpInst(ICmpInst::ICMP_SLT, LHSI->getOperand(0),
+ Builder->getInt(CR.getUpper()));
+ } else if (CR.getUpper().isSignBit()) {
+ return new ICmpInst(ICmpInst::ICMP_SGE, LHSI->getOperand(0),
+ Builder->getInt(CR.getLower()));
+ }
+ } else {
+ if (CR.getLower().isMinValue()) {
+ return new ICmpInst(ICmpInst::ICMP_ULT, LHSI->getOperand(0),
+ Builder->getInt(CR.getUpper()));
+ } else if (CR.getUpper().isMinValue()) {
+ return new ICmpInst(ICmpInst::ICMP_UGE, LHSI->getOperand(0),
+ Builder->getInt(CR.getLower()));
+ }
+ }
+
+ // X-C1 <u C2 -> (X & -C2) == C1
+ // iff C1 & (C2-1) == 0
+ // C2 is a power of 2
+ if (ICI.getPredicate() == ICmpInst::ICMP_ULT && LHSI->hasOneUse() &&
+ RHSV.isPowerOf2() && (LHSV & (RHSV - 1)) == 0)
+ return new ICmpInst(ICmpInst::ICMP_EQ,
+ Builder->CreateAnd(LHSI->getOperand(0), -RHSV),
+ ConstantExpr::getNeg(LHSC));
+
+ // X-C1 >u C2 -> (X & ~C2) != C1
+ // iff C1 & C2 == 0
+ // C2+1 is a power of 2
+ if (ICI.getPredicate() == ICmpInst::ICMP_UGT && LHSI->hasOneUse() &&
+ (RHSV + 1).isPowerOf2() && (LHSV & RHSV) == 0)
+ return new ICmpInst(ICmpInst::ICMP_NE,
+ Builder->CreateAnd(LHSI->getOperand(0), ~RHSV),
+ ConstantExpr::getNeg(LHSC));
+ }
+ break;
+ }
+
+ // Simplify icmp_eq and icmp_ne instructions with integer constant RHS.
+ if (ICI.isEquality()) {
+ bool isICMP_NE = ICI.getPredicate() == ICmpInst::ICMP_NE;
+
+ // If the first operand is (add|sub|and|or|xor|rem) with a constant, and
+ // the second operand is a constant, simplify a bit.
+ if (BinaryOperator *BO = dyn_cast<BinaryOperator>(LHSI)) {
+ switch (BO->getOpcode()) {
+ case Instruction::SRem:
+ // If we have a signed (X % (2^c)) == 0, turn it into an unsigned one.
+ if (RHSV == 0 && isa<ConstantInt>(BO->getOperand(1)) &&BO->hasOneUse()){
+ const APInt &V = cast<ConstantInt>(BO->getOperand(1))->getValue();
+ if (V.sgt(1) && V.isPowerOf2()) {
+ Value *NewRem =
+ Builder->CreateURem(BO->getOperand(0), BO->getOperand(1),
+ BO->getName());
+ return new ICmpInst(ICI.getPredicate(), NewRem,
+ Constant::getNullValue(BO->getType()));
+ }
+ }
+ break;
+ case Instruction::Add:
+ // Replace ((add A, B) != C) with (A != C-B) if B & C are constants.
+ if (ConstantInt *BOp1C = dyn_cast<ConstantInt>(BO->getOperand(1))) {
+ if (BO->hasOneUse())
+ return new ICmpInst(ICI.getPredicate(), BO->getOperand(0),
+ ConstantExpr::getSub(RHS, BOp1C));
+ } else if (RHSV == 0) {
+ // Replace ((add A, B) != 0) with (A != -B) if A or B is
+ // efficiently invertible, or if the add has just this one use.
+ Value *BOp0 = BO->getOperand(0), *BOp1 = BO->getOperand(1);
+
+ if (Value *NegVal = dyn_castNegVal(BOp1))
+ return new ICmpInst(ICI.getPredicate(), BOp0, NegVal);
+ if (Value *NegVal = dyn_castNegVal(BOp0))
+ return new ICmpInst(ICI.getPredicate(), NegVal, BOp1);
+ if (BO->hasOneUse()) {
+ Value *Neg = Builder->CreateNeg(BOp1);
+ Neg->takeName(BO);
+ return new ICmpInst(ICI.getPredicate(), BOp0, Neg);
+ }
+ }
+ break;
+ case Instruction::Xor:
+ if (BO->hasOneUse()) {
+ if (Constant *BOC = dyn_cast<Constant>(BO->getOperand(1))) {
+ // For the xor case, we can xor two constants together, eliminating
+ // the explicit xor.
+ return new ICmpInst(ICI.getPredicate(), BO->getOperand(0),
+ ConstantExpr::getXor(RHS, BOC));
+ } else if (RHSV == 0) {
+ // Replace ((xor A, B) != 0) with (A != B)
+ return new ICmpInst(ICI.getPredicate(), BO->getOperand(0),
+ BO->getOperand(1));
+ }
+ }
+ break;
+ case Instruction::Sub:
+ if (BO->hasOneUse()) {
+ if (ConstantInt *BOp0C = dyn_cast<ConstantInt>(BO->getOperand(0))) {
+ // Replace ((sub A, B) != C) with (B != A-C) if A & C are constants.
+ return new ICmpInst(ICI.getPredicate(), BO->getOperand(1),
+ ConstantExpr::getSub(BOp0C, RHS));
+ } else if (RHSV == 0) {
+ // Replace ((sub A, B) != 0) with (A != B)
+ return new ICmpInst(ICI.getPredicate(), BO->getOperand(0),
+ BO->getOperand(1));
+ }
+ }
+ break;
+ case Instruction::Or:
+ // If bits are being or'd in that are not present in the constant we
+ // are comparing against, then the comparison could never succeed!
+ if (ConstantInt *BOC = dyn_cast<ConstantInt>(BO->getOperand(1))) {
+ Constant *NotCI = ConstantExpr::getNot(RHS);
+ if (!ConstantExpr::getAnd(BOC, NotCI)->isNullValue())
+ return replaceInstUsesWith(ICI, Builder->getInt1(isICMP_NE));
+
+ // Comparing if all bits outside of a constant mask are set?
+ // Replace (X | C) == -1 with (X & ~C) == ~C.
+ // This removes the -1 constant.
+ if (BO->hasOneUse() && RHS->isAllOnesValue()) {
+ Constant *NotBOC = ConstantExpr::getNot(BOC);
+ Value *And = Builder->CreateAnd(BO->getOperand(0), NotBOC);
+ return new ICmpInst(ICI.getPredicate(), And, NotBOC);
+ }
+ }
+ break;
+
+ case Instruction::And:
+ if (ConstantInt *BOC = dyn_cast<ConstantInt>(BO->getOperand(1))) {
+ // If bits are being compared against that are and'd out, then the
+ // comparison can never succeed!
+ if ((RHSV & ~BOC->getValue()) != 0)
+ return replaceInstUsesWith(ICI, Builder->getInt1(isICMP_NE));
+
+ // If we have ((X & C) == C), turn it into ((X & C) != 0).
+ if (RHS == BOC && RHSV.isPowerOf2())
+ return new ICmpInst(isICMP_NE ? ICmpInst::ICMP_EQ :
+ ICmpInst::ICMP_NE, LHSI,
+ Constant::getNullValue(RHS->getType()));
+
+ // Don't perform the following transforms if the AND has multiple uses
+ if (!BO->hasOneUse())
+ break;
+
+ // Replace (and X, (1 << size(X)-1) != 0) with x s< 0
+ if (BOC->getValue().isSignBit()) {
+ Value *X = BO->getOperand(0);
+ Constant *Zero = Constant::getNullValue(X->getType());
+ ICmpInst::Predicate pred = isICMP_NE ?
+ ICmpInst::ICMP_SLT : ICmpInst::ICMP_SGE;
+ return new ICmpInst(pred, X, Zero);
+ }
+
+ // ((X & ~7) == 0) --> X < 8
+ if (RHSV == 0 && isHighOnes(BOC)) {
+ Value *X = BO->getOperand(0);
+ Constant *NegX = ConstantExpr::getNeg(BOC);
+ ICmpInst::Predicate pred = isICMP_NE ?
+ ICmpInst::ICMP_UGE : ICmpInst::ICMP_ULT;
+ return new ICmpInst(pred, X, NegX);
+ }
+ }
+ break;
+ case Instruction::Mul:
+ if (RHSV == 0 && BO->hasNoSignedWrap()) {
+ if (ConstantInt *BOC = dyn_cast<ConstantInt>(BO->getOperand(1))) {
+ // The trivial case (mul X, 0) is handled by InstSimplify
+ // General case : (mul X, C) != 0 iff X != 0
+ // (mul X, C) == 0 iff X == 0
+ if (!BOC->isZero())
+ return new ICmpInst(ICI.getPredicate(), BO->getOperand(0),
+ Constant::getNullValue(RHS->getType()));
+ }
+ }
+ break;
+ default: break;
+ }
+ } else if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(LHSI)) {
+ // Handle icmp {eq|ne} <intrinsic>, intcst.
+ switch (II->getIntrinsicID()) {
+ case Intrinsic::bswap:
+ Worklist.Add(II);
+ ICI.setOperand(0, II->getArgOperand(0));
+ ICI.setOperand(1, Builder->getInt(RHSV.byteSwap()));
+ return &ICI;
+ case Intrinsic::ctlz:
+ case Intrinsic::cttz:
+ // ctz(A) == bitwidth(a) -> A == 0 and likewise for !=
+ if (RHSV == RHS->getType()->getBitWidth()) {
+ Worklist.Add(II);
+ ICI.setOperand(0, II->getArgOperand(0));
+ ICI.setOperand(1, ConstantInt::get(RHS->getType(), 0));
+ return &ICI;
+ }
+ break;
+ case Intrinsic::ctpop:
+ // popcount(A) == 0 -> A == 0 and likewise for !=
+ if (RHS->isZero()) {
+ Worklist.Add(II);
+ ICI.setOperand(0, II->getArgOperand(0));
+ ICI.setOperand(1, RHS);
+ return &ICI;
+ }
+ break;
+ default:
+ break;
+ }
+ }
+ }
+ return nullptr;
+}
+
+/// Handle icmp (cast x to y), (cast/cst). We only handle extending casts so
+/// far.
+Instruction *InstCombiner::visitICmpInstWithCastAndCast(ICmpInst &ICmp) {
+ const CastInst *LHSCI = cast<CastInst>(ICmp.getOperand(0));
+ Value *LHSCIOp = LHSCI->getOperand(0);
+ Type *SrcTy = LHSCIOp->getType();
+ Type *DestTy = LHSCI->getType();
+ Value *RHSCIOp;
+
+ // Turn icmp (ptrtoint x), (ptrtoint/c) into a compare of the input if the
+ // integer type is the same size as the pointer type.
+ if (LHSCI->getOpcode() == Instruction::PtrToInt &&
+ DL.getPointerTypeSizeInBits(SrcTy) == DestTy->getIntegerBitWidth()) {
+ Value *RHSOp = nullptr;
+ if (auto *RHSC = dyn_cast<PtrToIntOperator>(ICmp.getOperand(1))) {
+ Value *RHSCIOp = RHSC->getOperand(0);
+ if (RHSCIOp->getType()->getPointerAddressSpace() ==
+ LHSCIOp->getType()->getPointerAddressSpace()) {
+ RHSOp = RHSC->getOperand(0);
+ // If the pointer types don't match, insert a bitcast.
+ if (LHSCIOp->getType() != RHSOp->getType())
+ RHSOp = Builder->CreateBitCast(RHSOp, LHSCIOp->getType());
+ }
+ } else if (auto *RHSC = dyn_cast<Constant>(ICmp.getOperand(1))) {
+ RHSOp = ConstantExpr::getIntToPtr(RHSC, SrcTy);
+ }
+
+ if (RHSOp)
+ return new ICmpInst(ICmp.getPredicate(), LHSCIOp, RHSOp);
+ }
+
+ // The code below only handles extension cast instructions, so far.
+ // Enforce this.
+ if (LHSCI->getOpcode() != Instruction::ZExt &&
+ LHSCI->getOpcode() != Instruction::SExt)
+ return nullptr;
+
+ bool isSignedExt = LHSCI->getOpcode() == Instruction::SExt;
+ bool isSignedCmp = ICmp.isSigned();
+
+ if (auto *CI = dyn_cast<CastInst>(ICmp.getOperand(1))) {
+ // Not an extension from the same type?
+ RHSCIOp = CI->getOperand(0);
+ if (RHSCIOp->getType() != LHSCIOp->getType())
+ return nullptr;
+
+ // If the signedness of the two casts doesn't agree (i.e. one is a sext
+ // and the other is a zext), then we can't handle this.
+ if (CI->getOpcode() != LHSCI->getOpcode())
+ return nullptr;
+
+ // Deal with equality cases early.
+ if (ICmp.isEquality())
+ return new ICmpInst(ICmp.getPredicate(), LHSCIOp, RHSCIOp);
+
+ // A signed comparison of sign extended values simplifies into a
+ // signed comparison.
+ if (isSignedCmp && isSignedExt)
+ return new ICmpInst(ICmp.getPredicate(), LHSCIOp, RHSCIOp);
+
+ // The other three cases all fold into an unsigned comparison.
+ return new ICmpInst(ICmp.getUnsignedPredicate(), LHSCIOp, RHSCIOp);
+ }
+
+ // If we aren't dealing with a constant on the RHS, exit early.
+ auto *C = dyn_cast<Constant>(ICmp.getOperand(1));
+ if (!C)
+ return nullptr;
+
+ // Compute the constant that would happen if we truncated to SrcTy then
+ // re-extended to DestTy.
+ Constant *Res1 = ConstantExpr::getTrunc(C, SrcTy);
+ Constant *Res2 = ConstantExpr::getCast(LHSCI->getOpcode(), Res1, DestTy);
+
+ // If the re-extended constant didn't change...
+ if (Res2 == C) {
+ // Deal with equality cases early.
+ if (ICmp.isEquality())
+ return new ICmpInst(ICmp.getPredicate(), LHSCIOp, Res1);
+
+ // A signed comparison of sign extended values simplifies into a
+ // signed comparison.
+ if (isSignedExt && isSignedCmp)
+ return new ICmpInst(ICmp.getPredicate(), LHSCIOp, Res1);
+
+ // The other three cases all fold into an unsigned comparison.
+ return new ICmpInst(ICmp.getUnsignedPredicate(), LHSCIOp, Res1);
+ }
+
+ // The re-extended constant changed, partly changed (in the case of a vector),
+ // or could not be determined to be equal (in the case of a constant
+ // expression), so the constant cannot be represented in the shorter type.
+ // Consequently, we cannot emit a simple comparison.
+ // All the cases that fold to true or false will have already been handled
+ // by SimplifyICmpInst, so only deal with the tricky case.
+
+ if (isSignedCmp || !isSignedExt || !isa<ConstantInt>(C))
+ return nullptr;
+
+ // Evaluate the comparison for LT (we invert for GT below). LE and GE cases
+ // should have been folded away previously and not enter in here.
+
+ // We're performing an unsigned comp with a sign extended value.
+ // This is true if the input is >= 0. [aka >s -1]
+ Constant *NegOne = Constant::getAllOnesValue(SrcTy);
+ Value *Result = Builder->CreateICmpSGT(LHSCIOp, NegOne, ICmp.getName());
+
+ // Finally, return the value computed.
+ if (ICmp.getPredicate() == ICmpInst::ICMP_ULT)
+ return replaceInstUsesWith(ICmp, Result);
+
+ assert(ICmp.getPredicate() == ICmpInst::ICMP_UGT && "ICmp should be folded!");
+ return BinaryOperator::CreateNot(Result);
+}
+
+/// The caller has matched a pattern of the form:
+/// I = icmp ugt (add (add A, B), CI2), CI1
+/// If this is of the form:
+/// sum = a + b
+/// if (sum+128 >u 255)
+/// Then replace it with llvm.sadd.with.overflow.i8.
+///
+static Instruction *ProcessUGT_ADDCST_ADD(ICmpInst &I, Value *A, Value *B,
+ ConstantInt *CI2, ConstantInt *CI1,
+ InstCombiner &IC) {
+ // The transformation we're trying to do here is to transform this into an
+ // llvm.sadd.with.overflow. To do this, we have to replace the original add
+ // with a narrower add, and discard the add-with-constant that is part of the
+ // range check (if we can't eliminate it, this isn't profitable).
+
+ // In order to eliminate the add-with-constant, the compare can be its only
+ // use.
+ Instruction *AddWithCst = cast<Instruction>(I.getOperand(0));
+ if (!AddWithCst->hasOneUse()) return nullptr;
+
+ // If CI2 is 2^7, 2^15, 2^31, then it might be an sadd.with.overflow.
+ if (!CI2->getValue().isPowerOf2()) return nullptr;
+ unsigned NewWidth = CI2->getValue().countTrailingZeros();
+ if (NewWidth != 7 && NewWidth != 15 && NewWidth != 31) return nullptr;
+
+ // The width of the new add formed is 1 more than the bias.
+ ++NewWidth;
+
+ // Check to see that CI1 is an all-ones value with NewWidth bits.
+ if (CI1->getBitWidth() == NewWidth ||
+ CI1->getValue() != APInt::getLowBitsSet(CI1->getBitWidth(), NewWidth))
+ return nullptr;
+
+ // This is only really a signed overflow check if the inputs have been
+ // sign-extended; check for that condition. For example, if CI2 is 2^31 and
+ // the operands of the add are 64 bits wide, we need at least 33 sign bits.
+ unsigned NeededSignBits = CI1->getBitWidth() - NewWidth + 1;
+ if (IC.ComputeNumSignBits(A, 0, &I) < NeededSignBits ||
+ IC.ComputeNumSignBits(B, 0, &I) < NeededSignBits)
+ return nullptr;
+
+ // In order to replace the original add with a narrower
+ // llvm.sadd.with.overflow, the only uses allowed are the add-with-constant
+ // and truncates that discard the high bits of the add. Verify that this is
+ // the case.
+ Instruction *OrigAdd = cast<Instruction>(AddWithCst->getOperand(0));
+ for (User *U : OrigAdd->users()) {
+ if (U == AddWithCst) continue;
+
+ // Only accept truncates for now. We would really like a nice recursive
+ // predicate like SimplifyDemandedBits, but which goes downwards the use-def
+ // chain to see which bits of a value are actually demanded. If the
+ // original add had another add which was then immediately truncated, we
+ // could still do the transformation.
+ TruncInst *TI = dyn_cast<TruncInst>(U);
+ if (!TI || TI->getType()->getPrimitiveSizeInBits() > NewWidth)
+ return nullptr;
+ }
+
+ // If the pattern matches, truncate the inputs to the narrower type and
+ // use the sadd_with_overflow intrinsic to efficiently compute both the
+ // result and the overflow bit.
+ Type *NewType = IntegerType::get(OrigAdd->getContext(), NewWidth);
+ Value *F = Intrinsic::getDeclaration(I.getModule(),
+ Intrinsic::sadd_with_overflow, NewType);
+
+ InstCombiner::BuilderTy *Builder = IC.Builder;
+
+ // Put the new code above the original add, in case there are any uses of the
+ // add between the add and the compare.
+ Builder->SetInsertPoint(OrigAdd);
+
+ Value *TruncA = Builder->CreateTrunc(A, NewType, A->getName()+".trunc");
+ Value *TruncB = Builder->CreateTrunc(B, NewType, B->getName()+".trunc");
+ CallInst *Call = Builder->CreateCall(F, {TruncA, TruncB}, "sadd");
+ Value *Add = Builder->CreateExtractValue(Call, 0, "sadd.result");
+ Value *ZExt = Builder->CreateZExt(Add, OrigAdd->getType());
+
+ // The inner add was the result of the narrow add, zero extended to the
+ // wider type. Replace it with the result computed by the intrinsic.
+ IC.replaceInstUsesWith(*OrigAdd, ZExt);
+
+ // The original icmp gets replaced with the overflow value.
+ return ExtractValueInst::Create(Call, 1, "sadd.overflow");
+}
+
+bool InstCombiner::OptimizeOverflowCheck(OverflowCheckFlavor OCF, Value *LHS,
+ Value *RHS, Instruction &OrigI,
+ Value *&Result, Constant *&Overflow) {
+ if (OrigI.isCommutative() && isa<Constant>(LHS) && !isa<Constant>(RHS))
+ std::swap(LHS, RHS);
+
+ auto SetResult = [&](Value *OpResult, Constant *OverflowVal, bool ReuseName) {
+ Result = OpResult;
+ Overflow = OverflowVal;
+ if (ReuseName)
+ Result->takeName(&OrigI);
+ return true;
+ };
+
+ // If the overflow check was an add followed by a compare, the insertion point
+ // may be pointing to the compare. We want to insert the new instructions
+ // before the add in case there are uses of the add between the add and the
+ // compare.
+ Builder->SetInsertPoint(&OrigI);
+
+ switch (OCF) {
+ case OCF_INVALID:
+ llvm_unreachable("bad overflow check kind!");
+
+ case OCF_UNSIGNED_ADD: {
+ OverflowResult OR = computeOverflowForUnsignedAdd(LHS, RHS, &OrigI);
+ if (OR == OverflowResult::NeverOverflows)
+ return SetResult(Builder->CreateNUWAdd(LHS, RHS), Builder->getFalse(),
+ true);
+
+ if (OR == OverflowResult::AlwaysOverflows)
+ return SetResult(Builder->CreateAdd(LHS, RHS), Builder->getTrue(), true);
+ }
+ // FALL THROUGH uadd into sadd
+ case OCF_SIGNED_ADD: {
+ // X + 0 -> {X, false}
+ if (match(RHS, m_Zero()))
+ return SetResult(LHS, Builder->getFalse(), false);
+
+ // We can strength reduce this signed add into a regular add if we can prove
+ // that it will never overflow.
+ if (OCF == OCF_SIGNED_ADD)
+ if (WillNotOverflowSignedAdd(LHS, RHS, OrigI))
+ return SetResult(Builder->CreateNSWAdd(LHS, RHS), Builder->getFalse(),
+ true);
+ break;
+ }
+
+ case OCF_UNSIGNED_SUB:
+ case OCF_SIGNED_SUB: {
+ // X - 0 -> {X, false}
+ if (match(RHS, m_Zero()))
+ return SetResult(LHS, Builder->getFalse(), false);
+
+ if (OCF == OCF_SIGNED_SUB) {
+ if (WillNotOverflowSignedSub(LHS, RHS, OrigI))
+ return SetResult(Builder->CreateNSWSub(LHS, RHS), Builder->getFalse(),
+ true);
+ } else {
+ if (WillNotOverflowUnsignedSub(LHS, RHS, OrigI))
+ return SetResult(Builder->CreateNUWSub(LHS, RHS), Builder->getFalse(),
+ true);
+ }
+ break;
+ }
+
+ case OCF_UNSIGNED_MUL: {
+ OverflowResult OR = computeOverflowForUnsignedMul(LHS, RHS, &OrigI);
+ if (OR == OverflowResult::NeverOverflows)
+ return SetResult(Builder->CreateNUWMul(LHS, RHS), Builder->getFalse(),
+ true);
+ if (OR == OverflowResult::AlwaysOverflows)
+ return SetResult(Builder->CreateMul(LHS, RHS), Builder->getTrue(), true);
+ } // FALL THROUGH
+ case OCF_SIGNED_MUL:
+ // X * undef -> undef
+ if (isa<UndefValue>(RHS))
+ return SetResult(RHS, UndefValue::get(Builder->getInt1Ty()), false);
+
+ // X * 0 -> {0, false}
+ if (match(RHS, m_Zero()))
+ return SetResult(RHS, Builder->getFalse(), false);
+
+ // X * 1 -> {X, false}
+ if (match(RHS, m_One()))
+ return SetResult(LHS, Builder->getFalse(), false);
+
+ if (OCF == OCF_SIGNED_MUL)
+ if (WillNotOverflowSignedMul(LHS, RHS, OrigI))
+ return SetResult(Builder->CreateNSWMul(LHS, RHS), Builder->getFalse(),
+ true);
+ break;
+ }
+
+ return false;
+}
+
+/// \brief Recognize and process idiom involving test for multiplication
+/// overflow.
+///
+/// The caller has matched a pattern of the form:
+/// I = cmp u (mul(zext A, zext B), V
+/// The function checks if this is a test for overflow and if so replaces
+/// multiplication with call to 'mul.with.overflow' intrinsic.
+///
+/// \param I Compare instruction.
+/// \param MulVal Result of 'mult' instruction. It is one of the arguments of
+/// the compare instruction. Must be of integer type.
+/// \param OtherVal The other argument of compare instruction.
+/// \returns Instruction which must replace the compare instruction, NULL if no
+/// replacement required.
+static Instruction *ProcessUMulZExtIdiom(ICmpInst &I, Value *MulVal,
+ Value *OtherVal, InstCombiner &IC) {
+ // Don't bother doing this transformation for pointers, don't do it for
+ // vectors.
+ if (!isa<IntegerType>(MulVal->getType()))
+ return nullptr;
+
+ assert(I.getOperand(0) == MulVal || I.getOperand(1) == MulVal);
+ assert(I.getOperand(0) == OtherVal || I.getOperand(1) == OtherVal);
+ auto *MulInstr = dyn_cast<Instruction>(MulVal);
+ if (!MulInstr)
+ return nullptr;
+ assert(MulInstr->getOpcode() == Instruction::Mul);
+
+ auto *LHS = cast<ZExtOperator>(MulInstr->getOperand(0)),
+ *RHS = cast<ZExtOperator>(MulInstr->getOperand(1));
+ assert(LHS->getOpcode() == Instruction::ZExt);
+ assert(RHS->getOpcode() == Instruction::ZExt);
+ Value *A = LHS->getOperand(0), *B = RHS->getOperand(0);
+
+ // Calculate type and width of the result produced by mul.with.overflow.
+ Type *TyA = A->getType(), *TyB = B->getType();
+ unsigned WidthA = TyA->getPrimitiveSizeInBits(),
+ WidthB = TyB->getPrimitiveSizeInBits();
+ unsigned MulWidth;
+ Type *MulType;
+ if (WidthB > WidthA) {
+ MulWidth = WidthB;
+ MulType = TyB;
+ } else {
+ MulWidth = WidthA;
+ MulType = TyA;
+ }
+
+ // In order to replace the original mul with a narrower mul.with.overflow,
+ // all uses must ignore upper bits of the product. The number of used low
+ // bits must be not greater than the width of mul.with.overflow.
+ if (MulVal->hasNUsesOrMore(2))
+ for (User *U : MulVal->users()) {
+ if (U == &I)
+ continue;
+ if (TruncInst *TI = dyn_cast<TruncInst>(U)) {
+ // Check if truncation ignores bits above MulWidth.
+ unsigned TruncWidth = TI->getType()->getPrimitiveSizeInBits();
+ if (TruncWidth > MulWidth)
+ return nullptr;
+ } else if (BinaryOperator *BO = dyn_cast<BinaryOperator>(U)) {
+ // Check if AND ignores bits above MulWidth.
+ if (BO->getOpcode() != Instruction::And)
+ return nullptr;
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(BO->getOperand(1))) {
+ const APInt &CVal = CI->getValue();
+ if (CVal.getBitWidth() - CVal.countLeadingZeros() > MulWidth)
+ return nullptr;
+ }
+ } else {
+ // Other uses prohibit this transformation.
+ return nullptr;
+ }
+ }
+
+ // Recognize patterns
+ switch (I.getPredicate()) {
+ case ICmpInst::ICMP_EQ:
+ case ICmpInst::ICMP_NE:
+ // Recognize pattern:
+ // mulval = mul(zext A, zext B)
+ // cmp eq/neq mulval, zext trunc mulval
+ if (ZExtInst *Zext = dyn_cast<ZExtInst>(OtherVal))
+ if (Zext->hasOneUse()) {
+ Value *ZextArg = Zext->getOperand(0);
+ if (TruncInst *Trunc = dyn_cast<TruncInst>(ZextArg))
+ if (Trunc->getType()->getPrimitiveSizeInBits() == MulWidth)
+ break; //Recognized
+ }
+
+ // Recognize pattern:
+ // mulval = mul(zext A, zext B)
+ // cmp eq/neq mulval, and(mulval, mask), mask selects low MulWidth bits.
+ ConstantInt *CI;
+ Value *ValToMask;
+ if (match(OtherVal, m_And(m_Value(ValToMask), m_ConstantInt(CI)))) {
+ if (ValToMask != MulVal)
+ return nullptr;
+ const APInt &CVal = CI->getValue() + 1;
+ if (CVal.isPowerOf2()) {
+ unsigned MaskWidth = CVal.logBase2();
+ if (MaskWidth == MulWidth)
+ break; // Recognized
+ }
+ }
+ return nullptr;
+
+ case ICmpInst::ICMP_UGT:
+ // Recognize pattern:
+ // mulval = mul(zext A, zext B)
+ // cmp ugt mulval, max
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(OtherVal)) {
+ APInt MaxVal = APInt::getMaxValue(MulWidth);
+ MaxVal = MaxVal.zext(CI->getBitWidth());
+ if (MaxVal.eq(CI->getValue()))
+ break; // Recognized
+ }
+ return nullptr;
+
+ case ICmpInst::ICMP_UGE:
+ // Recognize pattern:
+ // mulval = mul(zext A, zext B)
+ // cmp uge mulval, max+1
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(OtherVal)) {
+ APInt MaxVal = APInt::getOneBitSet(CI->getBitWidth(), MulWidth);
+ if (MaxVal.eq(CI->getValue()))
+ break; // Recognized
+ }
+ return nullptr;
+
+ case ICmpInst::ICMP_ULE:
+ // Recognize pattern:
+ // mulval = mul(zext A, zext B)
+ // cmp ule mulval, max
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(OtherVal)) {
+ APInt MaxVal = APInt::getMaxValue(MulWidth);
+ MaxVal = MaxVal.zext(CI->getBitWidth());
+ if (MaxVal.eq(CI->getValue()))
+ break; // Recognized
+ }
+ return nullptr;
+
+ case ICmpInst::ICMP_ULT:
+ // Recognize pattern:
+ // mulval = mul(zext A, zext B)
+ // cmp ule mulval, max + 1
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(OtherVal)) {
+ APInt MaxVal = APInt::getOneBitSet(CI->getBitWidth(), MulWidth);
+ if (MaxVal.eq(CI->getValue()))
+ break; // Recognized
+ }
+ return nullptr;
+
+ default:
+ return nullptr;
+ }
+
+ InstCombiner::BuilderTy *Builder = IC.Builder;
+ Builder->SetInsertPoint(MulInstr);
+
+ // Replace: mul(zext A, zext B) --> mul.with.overflow(A, B)
+ Value *MulA = A, *MulB = B;
+ if (WidthA < MulWidth)
+ MulA = Builder->CreateZExt(A, MulType);
+ if (WidthB < MulWidth)
+ MulB = Builder->CreateZExt(B, MulType);
+ Value *F = Intrinsic::getDeclaration(I.getModule(),
+ Intrinsic::umul_with_overflow, MulType);
+ CallInst *Call = Builder->CreateCall(F, {MulA, MulB}, "umul");
+ IC.Worklist.Add(MulInstr);
+
+ // If there are uses of mul result other than the comparison, we know that
+ // they are truncation or binary AND. Change them to use result of
+ // mul.with.overflow and adjust properly mask/size.
+ if (MulVal->hasNUsesOrMore(2)) {
+ Value *Mul = Builder->CreateExtractValue(Call, 0, "umul.value");
+ for (User *U : MulVal->users()) {
+ if (U == &I || U == OtherVal)
+ continue;
+ if (TruncInst *TI = dyn_cast<TruncInst>(U)) {
+ if (TI->getType()->getPrimitiveSizeInBits() == MulWidth)
+ IC.replaceInstUsesWith(*TI, Mul);
+ else
+ TI->setOperand(0, Mul);
+ } else if (BinaryOperator *BO = dyn_cast<BinaryOperator>(U)) {
+ assert(BO->getOpcode() == Instruction::And);
+ // Replace (mul & mask) --> zext (mul.with.overflow & short_mask)
+ ConstantInt *CI = cast<ConstantInt>(BO->getOperand(1));
+ APInt ShortMask = CI->getValue().trunc(MulWidth);
+ Value *ShortAnd = Builder->CreateAnd(Mul, ShortMask);
+ Instruction *Zext =
+ cast<Instruction>(Builder->CreateZExt(ShortAnd, BO->getType()));
+ IC.Worklist.Add(Zext);
+ IC.replaceInstUsesWith(*BO, Zext);
+ } else {
+ llvm_unreachable("Unexpected Binary operation");
+ }
+ IC.Worklist.Add(cast<Instruction>(U));
+ }
+ }
+ if (isa<Instruction>(OtherVal))
+ IC.Worklist.Add(cast<Instruction>(OtherVal));
+
+ // The original icmp gets replaced with the overflow value, maybe inverted
+ // depending on predicate.
+ bool Inverse = false;
+ switch (I.getPredicate()) {
+ case ICmpInst::ICMP_NE:
+ break;
+ case ICmpInst::ICMP_EQ:
+ Inverse = true;
+ break;
+ case ICmpInst::ICMP_UGT:
+ case ICmpInst::ICMP_UGE:
+ if (I.getOperand(0) == MulVal)
+ break;
+ Inverse = true;
+ break;
+ case ICmpInst::ICMP_ULT:
+ case ICmpInst::ICMP_ULE:
+ if (I.getOperand(1) == MulVal)
+ break;
+ Inverse = true;
+ break;
+ default:
+ llvm_unreachable("Unexpected predicate");
+ }
+ if (Inverse) {
+ Value *Res = Builder->CreateExtractValue(Call, 1);
+ return BinaryOperator::CreateNot(Res);
+ }
+
+ return ExtractValueInst::Create(Call, 1);
+}
+
+/// When performing a comparison against a constant, it is possible that not all
+/// the bits in the LHS are demanded. This helper method computes the mask that
+/// IS demanded.
+static APInt DemandedBitsLHSMask(ICmpInst &I,
+ unsigned BitWidth, bool isSignCheck) {
+ if (isSignCheck)
+ return APInt::getSignBit(BitWidth);
+
+ ConstantInt *CI = dyn_cast<ConstantInt>(I.getOperand(1));
+ if (!CI) return APInt::getAllOnesValue(BitWidth);
+ const APInt &RHS = CI->getValue();
+
+ switch (I.getPredicate()) {
+ // For a UGT comparison, we don't care about any bits that
+ // correspond to the trailing ones of the comparand. The value of these
+ // bits doesn't impact the outcome of the comparison, because any value
+ // greater than the RHS must differ in a bit higher than these due to carry.
+ case ICmpInst::ICMP_UGT: {
+ unsigned trailingOnes = RHS.countTrailingOnes();
+ APInt lowBitsSet = APInt::getLowBitsSet(BitWidth, trailingOnes);
+ return ~lowBitsSet;
+ }
+
+ // Similarly, for a ULT comparison, we don't care about the trailing zeros.
+ // Any value less than the RHS must differ in a higher bit because of carries.
+ case ICmpInst::ICMP_ULT: {
+ unsigned trailingZeros = RHS.countTrailingZeros();
+ APInt lowBitsSet = APInt::getLowBitsSet(BitWidth, trailingZeros);
+ return ~lowBitsSet;
+ }
+
+ default:
+ return APInt::getAllOnesValue(BitWidth);
+ }
+}
+
+/// \brief Check if the order of \p Op0 and \p Op1 as operand in an ICmpInst
+/// should be swapped.
+/// The decision is based on how many times these two operands are reused
+/// as subtract operands and their positions in those instructions.
+/// The rational is that several architectures use the same instruction for
+/// both subtract and cmp, thus it is better if the order of those operands
+/// match.
+/// \return true if Op0 and Op1 should be swapped.
+static bool swapMayExposeCSEOpportunities(const Value * Op0,
+ const Value * Op1) {
+ // Filter out pointer value as those cannot appears directly in subtract.
+ // FIXME: we may want to go through inttoptrs or bitcasts.
+ if (Op0->getType()->isPointerTy())
+ return false;
+ // Count every uses of both Op0 and Op1 in a subtract.
+ // Each time Op0 is the first operand, count -1: swapping is bad, the
+ // subtract has already the same layout as the compare.
+ // Each time Op0 is the second operand, count +1: swapping is good, the
+ // subtract has a different layout as the compare.
+ // At the end, if the benefit is greater than 0, Op0 should come second to
+ // expose more CSE opportunities.
+ int GlobalSwapBenefits = 0;
+ for (const User *U : Op0->users()) {
+ const BinaryOperator *BinOp = dyn_cast<BinaryOperator>(U);
+ if (!BinOp || BinOp->getOpcode() != Instruction::Sub)
+ continue;
+ // If Op0 is the first argument, this is not beneficial to swap the
+ // arguments.
+ int LocalSwapBenefits = -1;
+ unsigned Op1Idx = 1;
+ if (BinOp->getOperand(Op1Idx) == Op0) {
+ Op1Idx = 0;
+ LocalSwapBenefits = 1;
+ }
+ if (BinOp->getOperand(Op1Idx) != Op1)
+ continue;
+ GlobalSwapBenefits += LocalSwapBenefits;
+ }
+ return GlobalSwapBenefits > 0;
+}
+
+/// \brief Check that one use is in the same block as the definition and all
+/// other uses are in blocks dominated by a given block
+///
+/// \param DI Definition
+/// \param UI Use
+/// \param DB Block that must dominate all uses of \p DI outside
+/// the parent block
+/// \return true when \p UI is the only use of \p DI in the parent block
+/// and all other uses of \p DI are in blocks dominated by \p DB.
+///
+bool InstCombiner::dominatesAllUses(const Instruction *DI,
+ const Instruction *UI,
+ const BasicBlock *DB) const {
+ assert(DI && UI && "Instruction not defined\n");
+ // ignore incomplete definitions
+ if (!DI->getParent())
+ return false;
+ // DI and UI must be in the same block
+ if (DI->getParent() != UI->getParent())
+ return false;
+ // Protect from self-referencing blocks
+ if (DI->getParent() == DB)
+ return false;
+ // DominatorTree available?
+ if (!DT)
+ return false;
+ for (const User *U : DI->users()) {
+ auto *Usr = cast<Instruction>(U);
+ if (Usr != UI && !DT->dominates(DB, Usr->getParent()))
+ return false;
+ }
+ return true;
+}
+
+/// Return true when the instruction sequence within a block is select-cmp-br.
+static bool isChainSelectCmpBranch(const SelectInst *SI) {
+ const BasicBlock *BB = SI->getParent();
+ if (!BB)
+ return false;
+ auto *BI = dyn_cast_or_null<BranchInst>(BB->getTerminator());
+ if (!BI || BI->getNumSuccessors() != 2)
+ return false;
+ auto *IC = dyn_cast<ICmpInst>(BI->getCondition());
+ if (!IC || (IC->getOperand(0) != SI && IC->getOperand(1) != SI))
+ return false;
+ return true;
+}
+
+/// \brief True when a select result is replaced by one of its operands
+/// in select-icmp sequence. This will eventually result in the elimination
+/// of the select.
+///
+/// \param SI Select instruction
+/// \param Icmp Compare instruction
+/// \param SIOpd Operand that replaces the select
+///
+/// Notes:
+/// - The replacement is global and requires dominator information
+/// - The caller is responsible for the actual replacement
+///
+/// Example:
+///
+/// entry:
+/// %4 = select i1 %3, %C* %0, %C* null
+/// %5 = icmp eq %C* %4, null
+/// br i1 %5, label %9, label %7
+/// ...
+/// ; <label>:7 ; preds = %entry
+/// %8 = getelementptr inbounds %C* %4, i64 0, i32 0
+/// ...
+///
+/// can be transformed to
+///
+/// %5 = icmp eq %C* %0, null
+/// %6 = select i1 %3, i1 %5, i1 true
+/// br i1 %6, label %9, label %7
+/// ...
+/// ; <label>:7 ; preds = %entry
+/// %8 = getelementptr inbounds %C* %0, i64 0, i32 0 // replace by %0!
+///
+/// Similar when the first operand of the select is a constant or/and
+/// the compare is for not equal rather than equal.
+///
+/// NOTE: The function is only called when the select and compare constants
+/// are equal, the optimization can work only for EQ predicates. This is not a
+/// major restriction since a NE compare should be 'normalized' to an equal
+/// compare, which usually happens in the combiner and test case
+/// select-cmp-br.ll
+/// checks for it.
+bool InstCombiner::replacedSelectWithOperand(SelectInst *SI,
+ const ICmpInst *Icmp,
+ const unsigned SIOpd) {
+ assert((SIOpd == 1 || SIOpd == 2) && "Invalid select operand!");
+ if (isChainSelectCmpBranch(SI) && Icmp->getPredicate() == ICmpInst::ICMP_EQ) {
+ BasicBlock *Succ = SI->getParent()->getTerminator()->getSuccessor(1);
+ // The check for the unique predecessor is not the best that can be
+ // done. But it protects efficiently against cases like when SI's
+ // home block has two successors, Succ and Succ1, and Succ1 predecessor
+ // of Succ. Then SI can't be replaced by SIOpd because the use that gets
+ // replaced can be reached on either path. So the uniqueness check
+ // guarantees that the path all uses of SI (outside SI's parent) are on
+ // is disjoint from all other paths out of SI. But that information
+ // is more expensive to compute, and the trade-off here is in favor
+ // of compile-time.
+ if (Succ->getUniquePredecessor() && dominatesAllUses(SI, Icmp, Succ)) {
+ NumSel++;
+ SI->replaceUsesOutsideBlock(SI->getOperand(SIOpd), SI->getParent());
+ return true;
+ }
+ }
+ return false;
+}
+
+/// If we have an icmp le or icmp ge instruction with a constant operand, turn
+/// it into the appropriate icmp lt or icmp gt instruction. This transform
+/// allows them to be folded in visitICmpInst.
+static ICmpInst *canonicalizeCmpWithConstant(ICmpInst &I) {
+ ICmpInst::Predicate Pred = I.getPredicate();
+ if (Pred != ICmpInst::ICMP_SLE && Pred != ICmpInst::ICMP_SGE &&
+ Pred != ICmpInst::ICMP_ULE && Pred != ICmpInst::ICMP_UGE)
+ return nullptr;
+
+ Value *Op0 = I.getOperand(0);
+ Value *Op1 = I.getOperand(1);
+ auto *Op1C = dyn_cast<Constant>(Op1);
+ if (!Op1C)
+ return nullptr;
+
+ // Check if the constant operand can be safely incremented/decremented without
+ // overflowing/underflowing. For scalars, SimplifyICmpInst has already handled
+ // the edge cases for us, so we just assert on them. For vectors, we must
+ // handle the edge cases.
+ Type *Op1Type = Op1->getType();
+ bool IsSigned = I.isSigned();
+ bool IsLE = (Pred == ICmpInst::ICMP_SLE || Pred == ICmpInst::ICMP_ULE);
+ auto *CI = dyn_cast<ConstantInt>(Op1C);
+ if (CI) {
+ // A <= MAX -> TRUE ; A >= MIN -> TRUE
+ assert(IsLE ? !CI->isMaxValue(IsSigned) : !CI->isMinValue(IsSigned));
+ } else if (Op1Type->isVectorTy()) {
+ // TODO? If the edge cases for vectors were guaranteed to be handled as they
+ // are for scalar, we could remove the min/max checks. However, to do that,
+ // we would have to use insertelement/shufflevector to replace edge values.
+ unsigned NumElts = Op1Type->getVectorNumElements();
+ for (unsigned i = 0; i != NumElts; ++i) {
+ Constant *Elt = Op1C->getAggregateElement(i);
+ if (!Elt)
+ return nullptr;
+
+ if (isa<UndefValue>(Elt))
+ continue;
+ // Bail out if we can't determine if this constant is min/max or if we
+ // know that this constant is min/max.
+ auto *CI = dyn_cast<ConstantInt>(Elt);
+ if (!CI || (IsLE ? CI->isMaxValue(IsSigned) : CI->isMinValue(IsSigned)))
+ return nullptr;
+ }
+ } else {
+ // ConstantExpr?
+ return nullptr;
+ }
+
+ // Increment or decrement the constant and set the new comparison predicate:
+ // ULE -> ULT ; UGE -> UGT ; SLE -> SLT ; SGE -> SGT
+ Constant *OneOrNegOne = ConstantInt::get(Op1Type, IsLE ? 1 : -1, true);
+ CmpInst::Predicate NewPred = IsLE ? ICmpInst::ICMP_ULT: ICmpInst::ICMP_UGT;
+ NewPred = IsSigned ? ICmpInst::getSignedPredicate(NewPred) : NewPred;
+ return new ICmpInst(NewPred, Op0, ConstantExpr::getAdd(Op1C, OneOrNegOne));
+}
+
+Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
+ bool Changed = false;
+ Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
+ unsigned Op0Cplxity = getComplexity(Op0);
+ unsigned Op1Cplxity = getComplexity(Op1);
+
+ /// Orders the operands of the compare so that they are listed from most
+ /// complex to least complex. This puts constants before unary operators,
+ /// before binary operators.
+ if (Op0Cplxity < Op1Cplxity ||
+ (Op0Cplxity == Op1Cplxity && swapMayExposeCSEOpportunities(Op0, Op1))) {
+ I.swapOperands();
+ std::swap(Op0, Op1);
+ Changed = true;
+ }
+
+ if (Value *V =
+ SimplifyICmpInst(I.getPredicate(), Op0, Op1, DL, TLI, DT, AC, &I))
+ return replaceInstUsesWith(I, V);
+
+ // comparing -val or val with non-zero is the same as just comparing val
+ // ie, abs(val) != 0 -> val != 0
+ if (I.getPredicate() == ICmpInst::ICMP_NE && match(Op1, m_Zero())) {
+ Value *Cond, *SelectTrue, *SelectFalse;
+ if (match(Op0, m_Select(m_Value(Cond), m_Value(SelectTrue),
+ m_Value(SelectFalse)))) {
+ if (Value *V = dyn_castNegVal(SelectTrue)) {
+ if (V == SelectFalse)
+ return CmpInst::Create(Instruction::ICmp, I.getPredicate(), V, Op1);
+ }
+ else if (Value *V = dyn_castNegVal(SelectFalse)) {
+ if (V == SelectTrue)
+ return CmpInst::Create(Instruction::ICmp, I.getPredicate(), V, Op1);
+ }
+ }
+ }
+
+ Type *Ty = Op0->getType();
+
+ // icmp's with boolean values can always be turned into bitwise operations
+ if (Ty->getScalarType()->isIntegerTy(1)) {
+ switch (I.getPredicate()) {
+ default: llvm_unreachable("Invalid icmp instruction!");
+ case ICmpInst::ICMP_EQ: { // icmp eq i1 A, B -> ~(A^B)
+ Value *Xor = Builder->CreateXor(Op0, Op1, I.getName() + "tmp");
+ return BinaryOperator::CreateNot(Xor);
+ }
+ case ICmpInst::ICMP_NE: // icmp ne i1 A, B -> A^B
+ return BinaryOperator::CreateXor(Op0, Op1);
+
+ case ICmpInst::ICMP_UGT:
+ std::swap(Op0, Op1); // Change icmp ugt -> icmp ult
+ // FALL THROUGH
+ case ICmpInst::ICMP_ULT:{ // icmp ult i1 A, B -> ~A & B
+ Value *Not = Builder->CreateNot(Op0, I.getName() + "tmp");
+ return BinaryOperator::CreateAnd(Not, Op1);
+ }
+ case ICmpInst::ICMP_SGT:
+ std::swap(Op0, Op1); // Change icmp sgt -> icmp slt
+ // FALL THROUGH
+ case ICmpInst::ICMP_SLT: { // icmp slt i1 A, B -> A & ~B
+ Value *Not = Builder->CreateNot(Op1, I.getName() + "tmp");
+ return BinaryOperator::CreateAnd(Not, Op0);
+ }
+ case ICmpInst::ICMP_UGE:
+ std::swap(Op0, Op1); // Change icmp uge -> icmp ule
+ // FALL THROUGH
+ case ICmpInst::ICMP_ULE: { // icmp ule i1 A, B -> ~A | B
+ Value *Not = Builder->CreateNot(Op0, I.getName() + "tmp");
+ return BinaryOperator::CreateOr(Not, Op1);
+ }
+ case ICmpInst::ICMP_SGE:
+ std::swap(Op0, Op1); // Change icmp sge -> icmp sle
+ // FALL THROUGH
+ case ICmpInst::ICMP_SLE: { // icmp sle i1 A, B -> A | ~B
+ Value *Not = Builder->CreateNot(Op1, I.getName() + "tmp");
+ return BinaryOperator::CreateOr(Not, Op0);
+ }
+ }
+ }
+
+ if (ICmpInst *NewICmp = canonicalizeCmpWithConstant(I))
+ return NewICmp;
+
+ unsigned BitWidth = 0;
+ if (Ty->isIntOrIntVectorTy())
+ BitWidth = Ty->getScalarSizeInBits();
+ else // Get pointer size.
+ BitWidth = DL.getTypeSizeInBits(Ty->getScalarType());
+
+ bool isSignBit = false;
+
+ // See if we are doing a comparison with a constant.
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(Op1)) {
+ Value *A = nullptr, *B = nullptr;
+
+ // Match the following pattern, which is a common idiom when writing
+ // overflow-safe integer arithmetic function. The source performs an
+ // addition in wider type, and explicitly checks for overflow using
+ // comparisons against INT_MIN and INT_MAX. Simplify this by using the
+ // sadd_with_overflow intrinsic.
+ //
+ // TODO: This could probably be generalized to handle other overflow-safe
+ // operations if we worked out the formulas to compute the appropriate
+ // magic constants.
+ //
+ // sum = a + b
+ // if (sum+128 >u 255) ... -> llvm.sadd.with.overflow.i8
+ {
+ ConstantInt *CI2; // I = icmp ugt (add (add A, B), CI2), CI
+ if (I.getPredicate() == ICmpInst::ICMP_UGT &&
+ match(Op0, m_Add(m_Add(m_Value(A), m_Value(B)), m_ConstantInt(CI2))))
+ if (Instruction *Res = ProcessUGT_ADDCST_ADD(I, A, B, CI2, CI, *this))
+ return Res;
+ }
+
+ // (icmp sgt smin(PosA, B) 0) -> (icmp sgt B 0)
+ if (CI->isZero() && I.getPredicate() == ICmpInst::ICMP_SGT)
+ if (auto *SI = dyn_cast<SelectInst>(Op0)) {
+ SelectPatternResult SPR = matchSelectPattern(SI, A, B);
+ if (SPR.Flavor == SPF_SMIN) {
+ if (isKnownPositive(A, DL))
+ return new ICmpInst(I.getPredicate(), B, CI);
+ if (isKnownPositive(B, DL))
+ return new ICmpInst(I.getPredicate(), A, CI);
+ }
+ }
+
+
+ // The following transforms are only 'worth it' if the only user of the
+ // subtraction is the icmp.
+ if (Op0->hasOneUse()) {
+ // (icmp ne/eq (sub A B) 0) -> (icmp ne/eq A, B)
+ if (I.isEquality() && CI->isZero() &&
+ match(Op0, m_Sub(m_Value(A), m_Value(B))))
+ return new ICmpInst(I.getPredicate(), A, B);
+
+ // (icmp sgt (sub nsw A B), -1) -> (icmp sge A, B)
+ if (I.getPredicate() == ICmpInst::ICMP_SGT && CI->isAllOnesValue() &&
+ match(Op0, m_NSWSub(m_Value(A), m_Value(B))))
+ return new ICmpInst(ICmpInst::ICMP_SGE, A, B);
+
+ // (icmp sgt (sub nsw A B), 0) -> (icmp sgt A, B)
+ if (I.getPredicate() == ICmpInst::ICMP_SGT && CI->isZero() &&
+ match(Op0, m_NSWSub(m_Value(A), m_Value(B))))
+ return new ICmpInst(ICmpInst::ICMP_SGT, A, B);
+
+ // (icmp slt (sub nsw A B), 0) -> (icmp slt A, B)
+ if (I.getPredicate() == ICmpInst::ICMP_SLT && CI->isZero() &&
+ match(Op0, m_NSWSub(m_Value(A), m_Value(B))))
+ return new ICmpInst(ICmpInst::ICMP_SLT, A, B);
+
+ // (icmp slt (sub nsw A B), 1) -> (icmp sle A, B)
+ if (I.getPredicate() == ICmpInst::ICMP_SLT && CI->isOne() &&
+ match(Op0, m_NSWSub(m_Value(A), m_Value(B))))
+ return new ICmpInst(ICmpInst::ICMP_SLE, A, B);
+ }
+
+ if (I.isEquality()) {
+ ConstantInt *CI2;
+ if (match(Op0, m_AShr(m_ConstantInt(CI2), m_Value(A))) ||
+ match(Op0, m_LShr(m_ConstantInt(CI2), m_Value(A)))) {
+ // (icmp eq/ne (ashr/lshr const2, A), const1)
+ if (Instruction *Inst = FoldICmpCstShrCst(I, Op0, A, CI, CI2))
+ return Inst;
+ }
+ if (match(Op0, m_Shl(m_ConstantInt(CI2), m_Value(A)))) {
+ // (icmp eq/ne (shl const2, A), const1)
+ if (Instruction *Inst = FoldICmpCstShlCst(I, Op0, A, CI, CI2))
+ return Inst;
+ }
+ }
+
+ // If this comparison is a normal comparison, it demands all
+ // bits, if it is a sign bit comparison, it only demands the sign bit.
+ bool UnusedBit;
+ isSignBit = isSignBitCheck(I.getPredicate(), CI, UnusedBit);
+
+ // Canonicalize icmp instructions based on dominating conditions.
+ BasicBlock *Parent = I.getParent();
+ BasicBlock *Dom = Parent->getSinglePredecessor();
+ auto *BI = Dom ? dyn_cast<BranchInst>(Dom->getTerminator()) : nullptr;
+ ICmpInst::Predicate Pred;
+ BasicBlock *TrueBB, *FalseBB;
+ ConstantInt *CI2;
+ if (BI && match(BI, m_Br(m_ICmp(Pred, m_Specific(Op0), m_ConstantInt(CI2)),
+ TrueBB, FalseBB)) &&
+ TrueBB != FalseBB) {
+ ConstantRange CR = ConstantRange::makeAllowedICmpRegion(I.getPredicate(),
+ CI->getValue());
+ ConstantRange DominatingCR =
+ (Parent == TrueBB)
+ ? ConstantRange::makeExactICmpRegion(Pred, CI2->getValue())
+ : ConstantRange::makeExactICmpRegion(
+ CmpInst::getInversePredicate(Pred), CI2->getValue());
+ ConstantRange Intersection = DominatingCR.intersectWith(CR);
+ ConstantRange Difference = DominatingCR.difference(CR);
+ if (Intersection.isEmptySet())
+ return replaceInstUsesWith(I, Builder->getFalse());
+ if (Difference.isEmptySet())
+ return replaceInstUsesWith(I, Builder->getTrue());
+ // Canonicalizing a sign bit comparison that gets used in a branch,
+ // pessimizes codegen by generating branch on zero instruction instead
+ // of a test and branch. So we avoid canonicalizing in such situations
+ // because test and branch instruction has better branch displacement
+ // than compare and branch instruction.
+ if (!isBranchOnSignBitCheck(I, isSignBit) && !I.isEquality()) {
+ if (auto *AI = Intersection.getSingleElement())
+ return new ICmpInst(ICmpInst::ICMP_EQ, Op0, Builder->getInt(*AI));
+ if (auto *AD = Difference.getSingleElement())
+ return new ICmpInst(ICmpInst::ICMP_NE, Op0, Builder->getInt(*AD));
+ }
+ }
+ }
+
+ // See if we can fold the comparison based on range information we can get
+ // by checking whether bits are known to be zero or one in the input.
+ if (BitWidth != 0) {
+ APInt Op0KnownZero(BitWidth, 0), Op0KnownOne(BitWidth, 0);
+ APInt Op1KnownZero(BitWidth, 0), Op1KnownOne(BitWidth, 0);
+
+ if (SimplifyDemandedBits(I.getOperandUse(0),
+ DemandedBitsLHSMask(I, BitWidth, isSignBit),
+ Op0KnownZero, Op0KnownOne, 0))
+ return &I;
+ if (SimplifyDemandedBits(I.getOperandUse(1),
+ APInt::getAllOnesValue(BitWidth), Op1KnownZero,
+ Op1KnownOne, 0))
+ return &I;
+
+ // Given the known and unknown bits, compute a range that the LHS could be
+ // in. Compute the Min, Max and RHS values based on the known bits. For the
+ // EQ and NE we use unsigned values.
+ APInt Op0Min(BitWidth, 0), Op0Max(BitWidth, 0);
+ APInt Op1Min(BitWidth, 0), Op1Max(BitWidth, 0);
+ if (I.isSigned()) {
+ ComputeSignedMinMaxValuesFromKnownBits(Op0KnownZero, Op0KnownOne,
+ Op0Min, Op0Max);
+ ComputeSignedMinMaxValuesFromKnownBits(Op1KnownZero, Op1KnownOne,
+ Op1Min, Op1Max);
+ } else {
+ ComputeUnsignedMinMaxValuesFromKnownBits(Op0KnownZero, Op0KnownOne,
+ Op0Min, Op0Max);
+ ComputeUnsignedMinMaxValuesFromKnownBits(Op1KnownZero, Op1KnownOne,
+ Op1Min, Op1Max);
+ }
+
+ // If Min and Max are known to be the same, then SimplifyDemandedBits
+ // figured out that the LHS is a constant. Just constant fold this now so
+ // that code below can assume that Min != Max.
+ if (!isa<Constant>(Op0) && Op0Min == Op0Max)
+ return new ICmpInst(I.getPredicate(),
+ ConstantInt::get(Op0->getType(), Op0Min), Op1);
+ if (!isa<Constant>(Op1) && Op1Min == Op1Max)
+ return new ICmpInst(I.getPredicate(), Op0,
+ ConstantInt::get(Op1->getType(), Op1Min));
+
+ // Based on the range information we know about the LHS, see if we can
+ // simplify this comparison. For example, (x&4) < 8 is always true.
+ switch (I.getPredicate()) {
+ default: llvm_unreachable("Unknown icmp opcode!");
+ case ICmpInst::ICMP_EQ: {
+ if (Op0Max.ult(Op1Min) || Op0Min.ugt(Op1Max))
+ return replaceInstUsesWith(I, ConstantInt::getFalse(I.getType()));
+
+ // If all bits are known zero except for one, then we know at most one
+ // bit is set. If the comparison is against zero, then this is a check
+ // to see if *that* bit is set.
+ APInt Op0KnownZeroInverted = ~Op0KnownZero;
+ if (~Op1KnownZero == 0) {
+ // If the LHS is an AND with the same constant, look through it.
+ Value *LHS = nullptr;
+ ConstantInt *LHSC = nullptr;
+ if (!match(Op0, m_And(m_Value(LHS), m_ConstantInt(LHSC))) ||
+ LHSC->getValue() != Op0KnownZeroInverted)
+ LHS = Op0;
+
+ // If the LHS is 1 << x, and we know the result is a power of 2 like 8,
+ // then turn "((1 << x)&8) == 0" into "x != 3".
+ // or turn "((1 << x)&7) == 0" into "x > 2".
+ Value *X = nullptr;
+ if (match(LHS, m_Shl(m_One(), m_Value(X)))) {
+ APInt ValToCheck = Op0KnownZeroInverted;
+ if (ValToCheck.isPowerOf2()) {
+ unsigned CmpVal = ValToCheck.countTrailingZeros();
+ return new ICmpInst(ICmpInst::ICMP_NE, X,
+ ConstantInt::get(X->getType(), CmpVal));
+ } else if ((++ValToCheck).isPowerOf2()) {
+ unsigned CmpVal = ValToCheck.countTrailingZeros() - 1;
+ return new ICmpInst(ICmpInst::ICMP_UGT, X,
+ ConstantInt::get(X->getType(), CmpVal));
+ }
+ }
+
+ // If the LHS is 8 >>u x, and we know the result is a power of 2 like 1,
+ // then turn "((8 >>u x)&1) == 0" into "x != 3".
+ const APInt *CI;
+ if (Op0KnownZeroInverted == 1 &&
+ match(LHS, m_LShr(m_Power2(CI), m_Value(X))))
+ return new ICmpInst(ICmpInst::ICMP_NE, X,
+ ConstantInt::get(X->getType(),
+ CI->countTrailingZeros()));
+ }
+ break;
+ }
+ case ICmpInst::ICMP_NE: {
+ if (Op0Max.ult(Op1Min) || Op0Min.ugt(Op1Max))
+ return replaceInstUsesWith(I, ConstantInt::getTrue(I.getType()));
+
+ // If all bits are known zero except for one, then we know at most one
+ // bit is set. If the comparison is against zero, then this is a check
+ // to see if *that* bit is set.
+ APInt Op0KnownZeroInverted = ~Op0KnownZero;
+ if (~Op1KnownZero == 0) {
+ // If the LHS is an AND with the same constant, look through it.
+ Value *LHS = nullptr;
+ ConstantInt *LHSC = nullptr;
+ if (!match(Op0, m_And(m_Value(LHS), m_ConstantInt(LHSC))) ||
+ LHSC->getValue() != Op0KnownZeroInverted)
+ LHS = Op0;
+
+ // If the LHS is 1 << x, and we know the result is a power of 2 like 8,
+ // then turn "((1 << x)&8) != 0" into "x == 3".
+ // or turn "((1 << x)&7) != 0" into "x < 3".
+ Value *X = nullptr;
+ if (match(LHS, m_Shl(m_One(), m_Value(X)))) {
+ APInt ValToCheck = Op0KnownZeroInverted;
+ if (ValToCheck.isPowerOf2()) {
+ unsigned CmpVal = ValToCheck.countTrailingZeros();
+ return new ICmpInst(ICmpInst::ICMP_EQ, X,
+ ConstantInt::get(X->getType(), CmpVal));
+ } else if ((++ValToCheck).isPowerOf2()) {
+ unsigned CmpVal = ValToCheck.countTrailingZeros();
+ return new ICmpInst(ICmpInst::ICMP_ULT, X,
+ ConstantInt::get(X->getType(), CmpVal));
+ }
+ }
+
+ // If the LHS is 8 >>u x, and we know the result is a power of 2 like 1,
+ // then turn "((8 >>u x)&1) != 0" into "x == 3".
+ const APInt *CI;
+ if (Op0KnownZeroInverted == 1 &&
+ match(LHS, m_LShr(m_Power2(CI), m_Value(X))))
+ return new ICmpInst(ICmpInst::ICMP_EQ, X,
+ ConstantInt::get(X->getType(),
+ CI->countTrailingZeros()));
+ }
+ break;
+ }
+ case ICmpInst::ICMP_ULT:
+ if (Op0Max.ult(Op1Min)) // A <u B -> true if max(A) < min(B)
+ return replaceInstUsesWith(I, ConstantInt::getTrue(I.getType()));
+ if (Op0Min.uge(Op1Max)) // A <u B -> false if min(A) >= max(B)
+ return replaceInstUsesWith(I, ConstantInt::getFalse(I.getType()));
+ if (Op1Min == Op0Max) // A <u B -> A != B if max(A) == min(B)
+ return new ICmpInst(ICmpInst::ICMP_NE, Op0, Op1);
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(Op1)) {
+ if (Op1Max == Op0Min+1) // A <u C -> A == C-1 if min(A)+1 == C
+ return new ICmpInst(ICmpInst::ICMP_EQ, Op0,
+ Builder->getInt(CI->getValue()-1));
+
+ // (x <u 2147483648) -> (x >s -1) -> true if sign bit clear
+ if (CI->isMinValue(true))
+ return new ICmpInst(ICmpInst::ICMP_SGT, Op0,
+ Constant::getAllOnesValue(Op0->getType()));
+ }
+ break;
+ case ICmpInst::ICMP_UGT:
+ if (Op0Min.ugt(Op1Max)) // A >u B -> true if min(A) > max(B)
+ return replaceInstUsesWith(I, ConstantInt::getTrue(I.getType()));
+ if (Op0Max.ule(Op1Min)) // A >u B -> false if max(A) <= max(B)
+ return replaceInstUsesWith(I, ConstantInt::getFalse(I.getType()));
+
+ if (Op1Max == Op0Min) // A >u B -> A != B if min(A) == max(B)
+ return new ICmpInst(ICmpInst::ICMP_NE, Op0, Op1);
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(Op1)) {
+ if (Op1Min == Op0Max-1) // A >u C -> A == C+1 if max(a)-1 == C
+ return new ICmpInst(ICmpInst::ICMP_EQ, Op0,
+ Builder->getInt(CI->getValue()+1));
+
+ // (x >u 2147483647) -> (x <s 0) -> true if sign bit set
+ if (CI->isMaxValue(true))
+ return new ICmpInst(ICmpInst::ICMP_SLT, Op0,
+ Constant::getNullValue(Op0->getType()));
+ }
+ break;
+ case ICmpInst::ICMP_SLT:
+ if (Op0Max.slt(Op1Min)) // A <s B -> true if max(A) < min(C)
+ return replaceInstUsesWith(I, ConstantInt::getTrue(I.getType()));
+ if (Op0Min.sge(Op1Max)) // A <s B -> false if min(A) >= max(C)
+ return replaceInstUsesWith(I, ConstantInt::getFalse(I.getType()));
+ if (Op1Min == Op0Max) // A <s B -> A != B if max(A) == min(B)
+ return new ICmpInst(ICmpInst::ICMP_NE, Op0, Op1);
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(Op1)) {
+ if (Op1Max == Op0Min+1) // A <s C -> A == C-1 if min(A)+1 == C
+ return new ICmpInst(ICmpInst::ICMP_EQ, Op0,
+ Builder->getInt(CI->getValue()-1));
+ }
+ break;
+ case ICmpInst::ICMP_SGT:
+ if (Op0Min.sgt(Op1Max)) // A >s B -> true if min(A) > max(B)
+ return replaceInstUsesWith(I, ConstantInt::getTrue(I.getType()));
+ if (Op0Max.sle(Op1Min)) // A >s B -> false if max(A) <= min(B)
+ return replaceInstUsesWith(I, ConstantInt::getFalse(I.getType()));
+
+ if (Op1Max == Op0Min) // A >s B -> A != B if min(A) == max(B)
+ return new ICmpInst(ICmpInst::ICMP_NE, Op0, Op1);
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(Op1)) {
+ if (Op1Min == Op0Max-1) // A >s C -> A == C+1 if max(A)-1 == C
+ return new ICmpInst(ICmpInst::ICMP_EQ, Op0,
+ Builder->getInt(CI->getValue()+1));
+ }
+ break;
+ case ICmpInst::ICMP_SGE:
+ assert(!isa<ConstantInt>(Op1) && "ICMP_SGE with ConstantInt not folded!");
+ if (Op0Min.sge(Op1Max)) // A >=s B -> true if min(A) >= max(B)
+ return replaceInstUsesWith(I, ConstantInt::getTrue(I.getType()));
+ if (Op0Max.slt(Op1Min)) // A >=s B -> false if max(A) < min(B)
+ return replaceInstUsesWith(I, ConstantInt::getFalse(I.getType()));
+ break;
+ case ICmpInst::ICMP_SLE:
+ assert(!isa<ConstantInt>(Op1) && "ICMP_SLE with ConstantInt not folded!");
+ if (Op0Max.sle(Op1Min)) // A <=s B -> true if max(A) <= min(B)
+ return replaceInstUsesWith(I, ConstantInt::getTrue(I.getType()));
+ if (Op0Min.sgt(Op1Max)) // A <=s B -> false if min(A) > max(B)
+ return replaceInstUsesWith(I, ConstantInt::getFalse(I.getType()));
+ break;
+ case ICmpInst::ICMP_UGE:
+ assert(!isa<ConstantInt>(Op1) && "ICMP_UGE with ConstantInt not folded!");
+ if (Op0Min.uge(Op1Max)) // A >=u B -> true if min(A) >= max(B)
+ return replaceInstUsesWith(I, ConstantInt::getTrue(I.getType()));
+ if (Op0Max.ult(Op1Min)) // A >=u B -> false if max(A) < min(B)
+ return replaceInstUsesWith(I, ConstantInt::getFalse(I.getType()));
+ break;
+ case ICmpInst::ICMP_ULE:
+ assert(!isa<ConstantInt>(Op1) && "ICMP_ULE with ConstantInt not folded!");
+ if (Op0Max.ule(Op1Min)) // A <=u B -> true if max(A) <= min(B)
+ return replaceInstUsesWith(I, ConstantInt::getTrue(I.getType()));
+ if (Op0Min.ugt(Op1Max)) // A <=u B -> false if min(A) > max(B)
+ return replaceInstUsesWith(I, ConstantInt::getFalse(I.getType()));
+ break;
+ }
+
+ // Turn a signed comparison into an unsigned one if both operands
+ // are known to have the same sign.
+ if (I.isSigned() &&
+ ((Op0KnownZero.isNegative() && Op1KnownZero.isNegative()) ||
+ (Op0KnownOne.isNegative() && Op1KnownOne.isNegative())))
+ return new ICmpInst(I.getUnsignedPredicate(), Op0, Op1);
+ }
+
+ // Test if the ICmpInst instruction is used exclusively by a select as
+ // part of a minimum or maximum operation. If so, refrain from doing
+ // any other folding. This helps out other analyses which understand
+ // non-obfuscated minimum and maximum idioms, such as ScalarEvolution
+ // and CodeGen. And in this case, at least one of the comparison
+ // operands has at least one user besides the compare (the select),
+ // which would often largely negate the benefit of folding anyway.
+ if (I.hasOneUse())
+ if (SelectInst *SI = dyn_cast<SelectInst>(*I.user_begin()))
+ if ((SI->getOperand(1) == Op0 && SI->getOperand(2) == Op1) ||
+ (SI->getOperand(2) == Op0 && SI->getOperand(1) == Op1))
+ return nullptr;
+
+ // See if we are doing a comparison between a constant and an instruction that
+ // can be folded into the comparison.
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(Op1)) {
+ Value *A = nullptr, *B = nullptr;
+ // Since the RHS is a ConstantInt (CI), if the left hand side is an
+ // instruction, see if that instruction also has constants so that the
+ // instruction can be folded into the icmp
+ if (Instruction *LHSI = dyn_cast<Instruction>(Op0))
+ if (Instruction *Res = visitICmpInstWithInstAndIntCst(I, LHSI, CI))
+ return Res;
+
+ // (icmp eq/ne (udiv A, B), 0) -> (icmp ugt/ule i32 B, A)
+ if (I.isEquality() && CI->isZero() &&
+ match(Op0, m_UDiv(m_Value(A), m_Value(B)))) {
+ ICmpInst::Predicate Pred = I.getPredicate() == ICmpInst::ICMP_EQ
+ ? ICmpInst::ICMP_UGT
+ : ICmpInst::ICMP_ULE;
+ return new ICmpInst(Pred, B, A);
+ }
+ }
+
+ // Handle icmp with constant (but not simple integer constant) RHS
+ if (Constant *RHSC = dyn_cast<Constant>(Op1)) {
+ if (Instruction *LHSI = dyn_cast<Instruction>(Op0))
+ switch (LHSI->getOpcode()) {
+ case Instruction::GetElementPtr:
+ // icmp pred GEP (P, int 0, int 0, int 0), null -> icmp pred P, null
+ if (RHSC->isNullValue() &&
+ cast<GetElementPtrInst>(LHSI)->hasAllZeroIndices())
+ return new ICmpInst(I.getPredicate(), LHSI->getOperand(0),
+ Constant::getNullValue(LHSI->getOperand(0)->getType()));
+ break;
+ case Instruction::PHI:
+ // Only fold icmp into the PHI if the phi and icmp are in the same
+ // block. If in the same block, we're encouraging jump threading. If
+ // not, we are just pessimizing the code by making an i1 phi.
+ if (LHSI->getParent() == I.getParent())
+ if (Instruction *NV = FoldOpIntoPhi(I))
+ return NV;
+ break;
+ case Instruction::Select: {
+ // If either operand of the select is a constant, we can fold the
+ // comparison into the select arms, which will cause one to be
+ // constant folded and the select turned into a bitwise or.
+ Value *Op1 = nullptr, *Op2 = nullptr;
+ ConstantInt *CI = nullptr;
+ if (Constant *C = dyn_cast<Constant>(LHSI->getOperand(1))) {
+ Op1 = ConstantExpr::getICmp(I.getPredicate(), C, RHSC);
+ CI = dyn_cast<ConstantInt>(Op1);
+ }
+ if (Constant *C = dyn_cast<Constant>(LHSI->getOperand(2))) {
+ Op2 = ConstantExpr::getICmp(I.getPredicate(), C, RHSC);
+ CI = dyn_cast<ConstantInt>(Op2);
+ }
+
+ // We only want to perform this transformation if it will not lead to
+ // additional code. This is true if either both sides of the select
+ // fold to a constant (in which case the icmp is replaced with a select
+ // which will usually simplify) or this is the only user of the
+ // select (in which case we are trading a select+icmp for a simpler
+ // select+icmp) or all uses of the select can be replaced based on
+ // dominance information ("Global cases").
+ bool Transform = false;
+ if (Op1 && Op2)
+ Transform = true;
+ else if (Op1 || Op2) {
+ // Local case
+ if (LHSI->hasOneUse())
+ Transform = true;
+ // Global cases
+ else if (CI && !CI->isZero())
+ // When Op1 is constant try replacing select with second operand.
+ // Otherwise Op2 is constant and try replacing select with first
+ // operand.
+ Transform = replacedSelectWithOperand(cast<SelectInst>(LHSI), &I,
+ Op1 ? 2 : 1);
+ }
+ if (Transform) {
+ if (!Op1)
+ Op1 = Builder->CreateICmp(I.getPredicate(), LHSI->getOperand(1),
+ RHSC, I.getName());
+ if (!Op2)
+ Op2 = Builder->CreateICmp(I.getPredicate(), LHSI->getOperand(2),
+ RHSC, I.getName());
+ return SelectInst::Create(LHSI->getOperand(0), Op1, Op2);
+ }
+ break;
+ }
+ case Instruction::IntToPtr:
+ // icmp pred inttoptr(X), null -> icmp pred X, 0
+ if (RHSC->isNullValue() &&
+ DL.getIntPtrType(RHSC->getType()) == LHSI->getOperand(0)->getType())
+ return new ICmpInst(I.getPredicate(), LHSI->getOperand(0),
+ Constant::getNullValue(LHSI->getOperand(0)->getType()));
+ break;
+
+ case Instruction::Load:
+ // Try to optimize things like "A[i] > 4" to index computations.
+ if (GetElementPtrInst *GEP =
+ dyn_cast<GetElementPtrInst>(LHSI->getOperand(0))) {
+ if (GlobalVariable *GV = dyn_cast<GlobalVariable>(GEP->getOperand(0)))
+ if (GV->isConstant() && GV->hasDefinitiveInitializer() &&
+ !cast<LoadInst>(LHSI)->isVolatile())
+ if (Instruction *Res = FoldCmpLoadFromIndexedGlobal(GEP, GV, I))
+ return Res;
+ }
+ break;
+ }
+ }
+
+ // If we can optimize a 'icmp GEP, P' or 'icmp P, GEP', do so now.
+ if (GEPOperator *GEP = dyn_cast<GEPOperator>(Op0))
+ if (Instruction *NI = FoldGEPICmp(GEP, Op1, I.getPredicate(), I))
+ return NI;
+ if (GEPOperator *GEP = dyn_cast<GEPOperator>(Op1))
+ if (Instruction *NI = FoldGEPICmp(GEP, Op0,
+ ICmpInst::getSwappedPredicate(I.getPredicate()), I))
+ return NI;
+
+ // Try to optimize equality comparisons against alloca-based pointers.
+ if (Op0->getType()->isPointerTy() && I.isEquality()) {
+ assert(Op1->getType()->isPointerTy() && "Comparing pointer with non-pointer?");
+ if (auto *Alloca = dyn_cast<AllocaInst>(GetUnderlyingObject(Op0, DL)))
+ if (Instruction *New = FoldAllocaCmp(I, Alloca, Op1))
+ return New;
+ if (auto *Alloca = dyn_cast<AllocaInst>(GetUnderlyingObject(Op1, DL)))
+ if (Instruction *New = FoldAllocaCmp(I, Alloca, Op0))
+ return New;
+ }
+
+ // Test to see if the operands of the icmp are casted versions of other
+ // values. If the ptr->ptr cast can be stripped off both arguments, we do so
+ // now.
+ if (BitCastInst *CI = dyn_cast<BitCastInst>(Op0)) {
+ if (Op0->getType()->isPointerTy() &&
+ (isa<Constant>(Op1) || isa<BitCastInst>(Op1))) {
+ // We keep moving the cast from the left operand over to the right
+ // operand, where it can often be eliminated completely.
+ Op0 = CI->getOperand(0);
+
+ // If operand #1 is a bitcast instruction, it must also be a ptr->ptr cast
+ // so eliminate it as well.
+ if (BitCastInst *CI2 = dyn_cast<BitCastInst>(Op1))
+ Op1 = CI2->getOperand(0);
+
+ // If Op1 is a constant, we can fold the cast into the constant.
+ if (Op0->getType() != Op1->getType()) {
+ if (Constant *Op1C = dyn_cast<Constant>(Op1)) {
+ Op1 = ConstantExpr::getBitCast(Op1C, Op0->getType());
+ } else {
+ // Otherwise, cast the RHS right before the icmp
+ Op1 = Builder->CreateBitCast(Op1, Op0->getType());
+ }
+ }
+ return new ICmpInst(I.getPredicate(), Op0, Op1);
+ }
+ }
+
+ if (isa<CastInst>(Op0)) {
+ // Handle the special case of: icmp (cast bool to X), <cst>
+ // This comes up when you have code like
+ // int X = A < B;
+ // if (X) ...
+ // For generality, we handle any zero-extension of any operand comparison
+ // with a constant or another cast from the same type.
+ if (isa<Constant>(Op1) || isa<CastInst>(Op1))
+ if (Instruction *R = visitICmpInstWithCastAndCast(I))
+ return R;
+ }
+
+ // Special logic for binary operators.
+ BinaryOperator *BO0 = dyn_cast<BinaryOperator>(Op0);
+ BinaryOperator *BO1 = dyn_cast<BinaryOperator>(Op1);
+ if (BO0 || BO1) {
+ CmpInst::Predicate Pred = I.getPredicate();
+ bool NoOp0WrapProblem = false, NoOp1WrapProblem = false;
+ if (BO0 && isa<OverflowingBinaryOperator>(BO0))
+ NoOp0WrapProblem = ICmpInst::isEquality(Pred) ||
+ (CmpInst::isUnsigned(Pred) && BO0->hasNoUnsignedWrap()) ||
+ (CmpInst::isSigned(Pred) && BO0->hasNoSignedWrap());
+ if (BO1 && isa<OverflowingBinaryOperator>(BO1))
+ NoOp1WrapProblem = ICmpInst::isEquality(Pred) ||
+ (CmpInst::isUnsigned(Pred) && BO1->hasNoUnsignedWrap()) ||
+ (CmpInst::isSigned(Pred) && BO1->hasNoSignedWrap());
+
+ // Analyze the case when either Op0 or Op1 is an add instruction.
+ // Op0 = A + B (or A and B are null); Op1 = C + D (or C and D are null).
+ Value *A = nullptr, *B = nullptr, *C = nullptr, *D = nullptr;
+ if (BO0 && BO0->getOpcode() == Instruction::Add) {
+ A = BO0->getOperand(0);
+ B = BO0->getOperand(1);
+ }
+ if (BO1 && BO1->getOpcode() == Instruction::Add) {
+ C = BO1->getOperand(0);
+ D = BO1->getOperand(1);
+ }
+
+ // icmp (X+cst) < 0 --> X < -cst
+ if (NoOp0WrapProblem && ICmpInst::isSigned(Pred) && match(Op1, m_Zero()))
+ if (ConstantInt *RHSC = dyn_cast_or_null<ConstantInt>(B))
+ if (!RHSC->isMinValue(/*isSigned=*/true))
+ return new ICmpInst(Pred, A, ConstantExpr::getNeg(RHSC));
+
+ // icmp (X+Y), X -> icmp Y, 0 for equalities or if there is no overflow.
+ if ((A == Op1 || B == Op1) && NoOp0WrapProblem)
+ return new ICmpInst(Pred, A == Op1 ? B : A,
+ Constant::getNullValue(Op1->getType()));
+
+ // icmp X, (X+Y) -> icmp 0, Y for equalities or if there is no overflow.
+ if ((C == Op0 || D == Op0) && NoOp1WrapProblem)
+ return new ICmpInst(Pred, Constant::getNullValue(Op0->getType()),
+ C == Op0 ? D : C);
+
+ // icmp (X+Y), (X+Z) -> icmp Y, Z for equalities or if there is no overflow.
+ if (A && C && (A == C || A == D || B == C || B == D) &&
+ NoOp0WrapProblem && NoOp1WrapProblem &&
+ // Try not to increase register pressure.
+ BO0->hasOneUse() && BO1->hasOneUse()) {
+ // Determine Y and Z in the form icmp (X+Y), (X+Z).
+ Value *Y, *Z;
+ if (A == C) {
+ // C + B == C + D -> B == D
+ Y = B;
+ Z = D;
+ } else if (A == D) {
+ // D + B == C + D -> B == C
+ Y = B;
+ Z = C;
+ } else if (B == C) {
+ // A + C == C + D -> A == D
+ Y = A;
+ Z = D;
+ } else {
+ assert(B == D);
+ // A + D == C + D -> A == C
+ Y = A;
+ Z = C;
+ }
+ return new ICmpInst(Pred, Y, Z);
+ }
+
+ // icmp slt (X + -1), Y -> icmp sle X, Y
+ if (A && NoOp0WrapProblem && Pred == CmpInst::ICMP_SLT &&
+ match(B, m_AllOnes()))
+ return new ICmpInst(CmpInst::ICMP_SLE, A, Op1);
+
+ // icmp sge (X + -1), Y -> icmp sgt X, Y
+ if (A && NoOp0WrapProblem && Pred == CmpInst::ICMP_SGE &&
+ match(B, m_AllOnes()))
+ return new ICmpInst(CmpInst::ICMP_SGT, A, Op1);
+
+ // icmp sle (X + 1), Y -> icmp slt X, Y
+ if (A && NoOp0WrapProblem && Pred == CmpInst::ICMP_SLE &&
+ match(B, m_One()))
+ return new ICmpInst(CmpInst::ICMP_SLT, A, Op1);
+
+ // icmp sgt (X + 1), Y -> icmp sge X, Y
+ if (A && NoOp0WrapProblem && Pred == CmpInst::ICMP_SGT &&
+ match(B, m_One()))
+ return new ICmpInst(CmpInst::ICMP_SGE, A, Op1);
+
+ // icmp sgt X, (Y + -1) -> icmp sge X, Y
+ if (C && NoOp1WrapProblem && Pred == CmpInst::ICMP_SGT &&
+ match(D, m_AllOnes()))
+ return new ICmpInst(CmpInst::ICMP_SGE, Op0, C);
+
+ // icmp sle X, (Y + -1) -> icmp slt X, Y
+ if (C && NoOp1WrapProblem && Pred == CmpInst::ICMP_SLE &&
+ match(D, m_AllOnes()))
+ return new ICmpInst(CmpInst::ICMP_SLT, Op0, C);
+
+ // icmp sge X, (Y + 1) -> icmp sgt X, Y
+ if (C && NoOp1WrapProblem && Pred == CmpInst::ICMP_SGE &&
+ match(D, m_One()))
+ return new ICmpInst(CmpInst::ICMP_SGT, Op0, C);
+
+ // icmp slt X, (Y + 1) -> icmp sle X, Y
+ if (C && NoOp1WrapProblem && Pred == CmpInst::ICMP_SLT &&
+ match(D, m_One()))
+ return new ICmpInst(CmpInst::ICMP_SLE, Op0, C);
+
+ // if C1 has greater magnitude than C2:
+ // icmp (X + C1), (Y + C2) -> icmp (X + C3), Y
+ // s.t. C3 = C1 - C2
+ //
+ // if C2 has greater magnitude than C1:
+ // icmp (X + C1), (Y + C2) -> icmp X, (Y + C3)
+ // s.t. C3 = C2 - C1
+ if (A && C && NoOp0WrapProblem && NoOp1WrapProblem &&
+ (BO0->hasOneUse() || BO1->hasOneUse()) && !I.isUnsigned())
+ if (ConstantInt *C1 = dyn_cast<ConstantInt>(B))
+ if (ConstantInt *C2 = dyn_cast<ConstantInt>(D)) {
+ const APInt &AP1 = C1->getValue();
+ const APInt &AP2 = C2->getValue();
+ if (AP1.isNegative() == AP2.isNegative()) {
+ APInt AP1Abs = C1->getValue().abs();
+ APInt AP2Abs = C2->getValue().abs();
+ if (AP1Abs.uge(AP2Abs)) {
+ ConstantInt *C3 = Builder->getInt(AP1 - AP2);
+ Value *NewAdd = Builder->CreateNSWAdd(A, C3);
+ return new ICmpInst(Pred, NewAdd, C);
+ } else {
+ ConstantInt *C3 = Builder->getInt(AP2 - AP1);
+ Value *NewAdd = Builder->CreateNSWAdd(C, C3);
+ return new ICmpInst(Pred, A, NewAdd);
+ }
+ }
+ }
+
+
+ // Analyze the case when either Op0 or Op1 is a sub instruction.
+ // Op0 = A - B (or A and B are null); Op1 = C - D (or C and D are null).
+ A = nullptr;
+ B = nullptr;
+ C = nullptr;
+ D = nullptr;
+ if (BO0 && BO0->getOpcode() == Instruction::Sub) {
+ A = BO0->getOperand(0);
+ B = BO0->getOperand(1);
+ }
+ if (BO1 && BO1->getOpcode() == Instruction::Sub) {
+ C = BO1->getOperand(0);
+ D = BO1->getOperand(1);
+ }
+
+ // icmp (X-Y), X -> icmp 0, Y for equalities or if there is no overflow.
+ if (A == Op1 && NoOp0WrapProblem)
+ return new ICmpInst(Pred, Constant::getNullValue(Op1->getType()), B);
+
+ // icmp X, (X-Y) -> icmp Y, 0 for equalities or if there is no overflow.
+ if (C == Op0 && NoOp1WrapProblem)
+ return new ICmpInst(Pred, D, Constant::getNullValue(Op0->getType()));
+
+ // icmp (Y-X), (Z-X) -> icmp Y, Z for equalities or if there is no overflow.
+ if (B && D && B == D && NoOp0WrapProblem && NoOp1WrapProblem &&
+ // Try not to increase register pressure.
+ BO0->hasOneUse() && BO1->hasOneUse())
+ return new ICmpInst(Pred, A, C);
+
+ // icmp (X-Y), (X-Z) -> icmp Z, Y for equalities or if there is no overflow.
+ if (A && C && A == C && NoOp0WrapProblem && NoOp1WrapProblem &&
+ // Try not to increase register pressure.
+ BO0->hasOneUse() && BO1->hasOneUse())
+ return new ICmpInst(Pred, D, B);
+
+ // icmp (0-X) < cst --> x > -cst
+ if (NoOp0WrapProblem && ICmpInst::isSigned(Pred)) {
+ Value *X;
+ if (match(BO0, m_Neg(m_Value(X))))
+ if (ConstantInt *RHSC = dyn_cast<ConstantInt>(Op1))
+ if (!RHSC->isMinValue(/*isSigned=*/true))
+ return new ICmpInst(I.getSwappedPredicate(), X,
+ ConstantExpr::getNeg(RHSC));
+ }
+
+ BinaryOperator *SRem = nullptr;
+ // icmp (srem X, Y), Y
+ if (BO0 && BO0->getOpcode() == Instruction::SRem &&
+ Op1 == BO0->getOperand(1))
+ SRem = BO0;
+ // icmp Y, (srem X, Y)
+ else if (BO1 && BO1->getOpcode() == Instruction::SRem &&
+ Op0 == BO1->getOperand(1))
+ SRem = BO1;
+ if (SRem) {
+ // We don't check hasOneUse to avoid increasing register pressure because
+ // the value we use is the same value this instruction was already using.
+ switch (SRem == BO0 ? ICmpInst::getSwappedPredicate(Pred) : Pred) {
+ default: break;
+ case ICmpInst::ICMP_EQ:
+ return replaceInstUsesWith(I, ConstantInt::getFalse(I.getType()));
+ case ICmpInst::ICMP_NE:
+ return replaceInstUsesWith(I, ConstantInt::getTrue(I.getType()));
+ case ICmpInst::ICMP_SGT:
+ case ICmpInst::ICMP_SGE:
+ return new ICmpInst(ICmpInst::ICMP_SGT, SRem->getOperand(1),
+ Constant::getAllOnesValue(SRem->getType()));
+ case ICmpInst::ICMP_SLT:
+ case ICmpInst::ICMP_SLE:
+ return new ICmpInst(ICmpInst::ICMP_SLT, SRem->getOperand(1),
+ Constant::getNullValue(SRem->getType()));
+ }
+ }
+
+ if (BO0 && BO1 && BO0->getOpcode() == BO1->getOpcode() &&
+ BO0->hasOneUse() && BO1->hasOneUse() &&
+ BO0->getOperand(1) == BO1->getOperand(1)) {
+ switch (BO0->getOpcode()) {
+ default: break;
+ case Instruction::Add:
+ case Instruction::Sub:
+ case Instruction::Xor:
+ if (I.isEquality()) // a+x icmp eq/ne b+x --> a icmp b
+ return new ICmpInst(I.getPredicate(), BO0->getOperand(0),
+ BO1->getOperand(0));
+ // icmp u/s (a ^ signbit), (b ^ signbit) --> icmp s/u a, b
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(BO0->getOperand(1))) {
+ if (CI->getValue().isSignBit()) {
+ ICmpInst::Predicate Pred = I.isSigned()
+ ? I.getUnsignedPredicate()
+ : I.getSignedPredicate();
+ return new ICmpInst(Pred, BO0->getOperand(0),
+ BO1->getOperand(0));
+ }
+
+ if (BO0->getOpcode() == Instruction::Xor && CI->isMaxValue(true)) {
+ ICmpInst::Predicate Pred = I.isSigned()
+ ? I.getUnsignedPredicate()
+ : I.getSignedPredicate();
+ Pred = I.getSwappedPredicate(Pred);
+ return new ICmpInst(Pred, BO0->getOperand(0),
+ BO1->getOperand(0));
+ }
+ }
+ break;
+ case Instruction::Mul:
+ if (!I.isEquality())
+ break;
+
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(BO0->getOperand(1))) {
+ // a * Cst icmp eq/ne b * Cst --> a & Mask icmp b & Mask
+ // Mask = -1 >> count-trailing-zeros(Cst).
+ if (!CI->isZero() && !CI->isOne()) {
+ const APInt &AP = CI->getValue();
+ ConstantInt *Mask = ConstantInt::get(I.getContext(),
+ APInt::getLowBitsSet(AP.getBitWidth(),
+ AP.getBitWidth() -
+ AP.countTrailingZeros()));
+ Value *And1 = Builder->CreateAnd(BO0->getOperand(0), Mask);
+ Value *And2 = Builder->CreateAnd(BO1->getOperand(0), Mask);
+ return new ICmpInst(I.getPredicate(), And1, And2);
+ }
+ }
+ break;
+ case Instruction::UDiv:
+ case Instruction::LShr:
+ if (I.isSigned())
+ break;
+ // fall-through
+ case Instruction::SDiv:
+ case Instruction::AShr:
+ if (!BO0->isExact() || !BO1->isExact())
+ break;
+ return new ICmpInst(I.getPredicate(), BO0->getOperand(0),
+ BO1->getOperand(0));
+ case Instruction::Shl: {
+ bool NUW = BO0->hasNoUnsignedWrap() && BO1->hasNoUnsignedWrap();
+ bool NSW = BO0->hasNoSignedWrap() && BO1->hasNoSignedWrap();
+ if (!NUW && !NSW)
+ break;
+ if (!NSW && I.isSigned())
+ break;
+ return new ICmpInst(I.getPredicate(), BO0->getOperand(0),
+ BO1->getOperand(0));
+ }
+ }
+ }
+
+ if (BO0) {
+ // Transform A & (L - 1) `ult` L --> L != 0
+ auto LSubOne = m_Add(m_Specific(Op1), m_AllOnes());
+ auto BitwiseAnd =
+ m_CombineOr(m_And(m_Value(), LSubOne), m_And(LSubOne, m_Value()));
+
+ if (match(BO0, BitwiseAnd) && I.getPredicate() == ICmpInst::ICMP_ULT) {
+ auto *Zero = Constant::getNullValue(BO0->getType());
+ return new ICmpInst(ICmpInst::ICMP_NE, Op1, Zero);
+ }
+ }
+ }
+
+ { Value *A, *B;
+ // Transform (A & ~B) == 0 --> (A & B) != 0
+ // and (A & ~B) != 0 --> (A & B) == 0
+ // if A is a power of 2.
+ if (match(Op0, m_And(m_Value(A), m_Not(m_Value(B)))) &&
+ match(Op1, m_Zero()) &&
+ isKnownToBeAPowerOfTwo(A, DL, false, 0, AC, &I, DT) && I.isEquality())
+ return new ICmpInst(I.getInversePredicate(),
+ Builder->CreateAnd(A, B),
+ Op1);
+
+ // ~x < ~y --> y < x
+ // ~x < cst --> ~cst < x
+ if (match(Op0, m_Not(m_Value(A)))) {
+ if (match(Op1, m_Not(m_Value(B))))
+ return new ICmpInst(I.getPredicate(), B, A);
+ if (ConstantInt *RHSC = dyn_cast<ConstantInt>(Op1))
+ return new ICmpInst(I.getPredicate(), ConstantExpr::getNot(RHSC), A);
+ }
+
+ Instruction *AddI = nullptr;
+ if (match(&I, m_UAddWithOverflow(m_Value(A), m_Value(B),
+ m_Instruction(AddI))) &&
+ isa<IntegerType>(A->getType())) {
+ Value *Result;
+ Constant *Overflow;
+ if (OptimizeOverflowCheck(OCF_UNSIGNED_ADD, A, B, *AddI, Result,
+ Overflow)) {
+ replaceInstUsesWith(*AddI, Result);
+ return replaceInstUsesWith(I, Overflow);
+ }
+ }
+
+ // (zext a) * (zext b) --> llvm.umul.with.overflow.
+ if (match(Op0, m_Mul(m_ZExt(m_Value(A)), m_ZExt(m_Value(B))))) {
+ if (Instruction *R = ProcessUMulZExtIdiom(I, Op0, Op1, *this))
+ return R;
+ }
+ if (match(Op1, m_Mul(m_ZExt(m_Value(A)), m_ZExt(m_Value(B))))) {
+ if (Instruction *R = ProcessUMulZExtIdiom(I, Op1, Op0, *this))
+ return R;
+ }
+ }
+
+ if (I.isEquality()) {
+ Value *A, *B, *C, *D;
+
+ if (match(Op0, m_Xor(m_Value(A), m_Value(B)))) {
+ if (A == Op1 || B == Op1) { // (A^B) == A -> B == 0
+ Value *OtherVal = A == Op1 ? B : A;
+ return new ICmpInst(I.getPredicate(), OtherVal,
+ Constant::getNullValue(A->getType()));
+ }
+
+ if (match(Op1, m_Xor(m_Value(C), m_Value(D)))) {
+ // A^c1 == C^c2 --> A == C^(c1^c2)
+ ConstantInt *C1, *C2;
+ if (match(B, m_ConstantInt(C1)) &&
+ match(D, m_ConstantInt(C2)) && Op1->hasOneUse()) {
+ Constant *NC = Builder->getInt(C1->getValue() ^ C2->getValue());
+ Value *Xor = Builder->CreateXor(C, NC);
+ return new ICmpInst(I.getPredicate(), A, Xor);
+ }
+
+ // A^B == A^D -> B == D
+ if (A == C) return new ICmpInst(I.getPredicate(), B, D);
+ if (A == D) return new ICmpInst(I.getPredicate(), B, C);
+ if (B == C) return new ICmpInst(I.getPredicate(), A, D);
+ if (B == D) return new ICmpInst(I.getPredicate(), A, C);
+ }
+ }
+
+ if (match(Op1, m_Xor(m_Value(A), m_Value(B))) &&
+ (A == Op0 || B == Op0)) {
+ // A == (A^B) -> B == 0
+ Value *OtherVal = A == Op0 ? B : A;
+ return new ICmpInst(I.getPredicate(), OtherVal,
+ Constant::getNullValue(A->getType()));
+ }
+
+ // (X&Z) == (Y&Z) -> (X^Y) & Z == 0
+ if (match(Op0, m_OneUse(m_And(m_Value(A), m_Value(B)))) &&
+ match(Op1, m_OneUse(m_And(m_Value(C), m_Value(D))))) {
+ Value *X = nullptr, *Y = nullptr, *Z = nullptr;
+
+ if (A == C) {
+ X = B; Y = D; Z = A;
+ } else if (A == D) {
+ X = B; Y = C; Z = A;
+ } else if (B == C) {
+ X = A; Y = D; Z = B;
+ } else if (B == D) {
+ X = A; Y = C; Z = B;
+ }
+
+ if (X) { // Build (X^Y) & Z
+ Op1 = Builder->CreateXor(X, Y);
+ Op1 = Builder->CreateAnd(Op1, Z);
+ I.setOperand(0, Op1);
+ I.setOperand(1, Constant::getNullValue(Op1->getType()));
+ return &I;
+ }
+ }
+
+ // Transform (zext A) == (B & (1<<X)-1) --> A == (trunc B)
+ // and (B & (1<<X)-1) == (zext A) --> A == (trunc B)
+ ConstantInt *Cst1;
+ if ((Op0->hasOneUse() &&
+ match(Op0, m_ZExt(m_Value(A))) &&
+ match(Op1, m_And(m_Value(B), m_ConstantInt(Cst1)))) ||
+ (Op1->hasOneUse() &&
+ match(Op0, m_And(m_Value(B), m_ConstantInt(Cst1))) &&
+ match(Op1, m_ZExt(m_Value(A))))) {
+ APInt Pow2 = Cst1->getValue() + 1;
+ if (Pow2.isPowerOf2() && isa<IntegerType>(A->getType()) &&
+ Pow2.logBase2() == cast<IntegerType>(A->getType())->getBitWidth())
+ return new ICmpInst(I.getPredicate(), A,
+ Builder->CreateTrunc(B, A->getType()));
+ }
+
+ // (A >> C) == (B >> C) --> (A^B) u< (1 << C)
+ // For lshr and ashr pairs.
+ if ((match(Op0, m_OneUse(m_LShr(m_Value(A), m_ConstantInt(Cst1)))) &&
+ match(Op1, m_OneUse(m_LShr(m_Value(B), m_Specific(Cst1))))) ||
+ (match(Op0, m_OneUse(m_AShr(m_Value(A), m_ConstantInt(Cst1)))) &&
+ match(Op1, m_OneUse(m_AShr(m_Value(B), m_Specific(Cst1)))))) {
+ unsigned TypeBits = Cst1->getBitWidth();
+ unsigned ShAmt = (unsigned)Cst1->getLimitedValue(TypeBits);
+ if (ShAmt < TypeBits && ShAmt != 0) {
+ ICmpInst::Predicate Pred = I.getPredicate() == ICmpInst::ICMP_NE
+ ? ICmpInst::ICMP_UGE
+ : ICmpInst::ICMP_ULT;
+ Value *Xor = Builder->CreateXor(A, B, I.getName() + ".unshifted");
+ APInt CmpVal = APInt::getOneBitSet(TypeBits, ShAmt);
+ return new ICmpInst(Pred, Xor, Builder->getInt(CmpVal));
+ }
+ }
+
+ // (A << C) == (B << C) --> ((A^B) & (~0U >> C)) == 0
+ if (match(Op0, m_OneUse(m_Shl(m_Value(A), m_ConstantInt(Cst1)))) &&
+ match(Op1, m_OneUse(m_Shl(m_Value(B), m_Specific(Cst1))))) {
+ unsigned TypeBits = Cst1->getBitWidth();
+ unsigned ShAmt = (unsigned)Cst1->getLimitedValue(TypeBits);
+ if (ShAmt < TypeBits && ShAmt != 0) {
+ Value *Xor = Builder->CreateXor(A, B, I.getName() + ".unshifted");
+ APInt AndVal = APInt::getLowBitsSet(TypeBits, TypeBits - ShAmt);
+ Value *And = Builder->CreateAnd(Xor, Builder->getInt(AndVal),
+ I.getName() + ".mask");
+ return new ICmpInst(I.getPredicate(), And,
+ Constant::getNullValue(Cst1->getType()));
+ }
+ }
+
+ // Transform "icmp eq (trunc (lshr(X, cst1)), cst" to
+ // "icmp (and X, mask), cst"
+ uint64_t ShAmt = 0;
+ if (Op0->hasOneUse() &&
+ match(Op0, m_Trunc(m_OneUse(m_LShr(m_Value(A),
+ m_ConstantInt(ShAmt))))) &&
+ match(Op1, m_ConstantInt(Cst1)) &&
+ // Only do this when A has multiple uses. This is most important to do
+ // when it exposes other optimizations.
+ !A->hasOneUse()) {
+ unsigned ASize =cast<IntegerType>(A->getType())->getPrimitiveSizeInBits();
+
+ if (ShAmt < ASize) {
+ APInt MaskV =
+ APInt::getLowBitsSet(ASize, Op0->getType()->getPrimitiveSizeInBits());
+ MaskV <<= ShAmt;
+
+ APInt CmpV = Cst1->getValue().zext(ASize);
+ CmpV <<= ShAmt;
+
+ Value *Mask = Builder->CreateAnd(A, Builder->getInt(MaskV));
+ return new ICmpInst(I.getPredicate(), Mask, Builder->getInt(CmpV));
+ }
+ }
+ }
+
+ // The 'cmpxchg' instruction returns an aggregate containing the old value and
+ // an i1 which indicates whether or not we successfully did the swap.
+ //
+ // Replace comparisons between the old value and the expected value with the
+ // indicator that 'cmpxchg' returns.
+ //
+ // N.B. This transform is only valid when the 'cmpxchg' is not permitted to
+ // spuriously fail. In those cases, the old value may equal the expected
+ // value but it is possible for the swap to not occur.
+ if (I.getPredicate() == ICmpInst::ICMP_EQ)
+ if (auto *EVI = dyn_cast<ExtractValueInst>(Op0))
+ if (auto *ACXI = dyn_cast<AtomicCmpXchgInst>(EVI->getAggregateOperand()))
+ if (EVI->getIndices()[0] == 0 && ACXI->getCompareOperand() == Op1 &&
+ !ACXI->isWeak())
+ return ExtractValueInst::Create(ACXI, 1);
+
+ {
+ Value *X; ConstantInt *Cst;
+ // icmp X+Cst, X
+ if (match(Op0, m_Add(m_Value(X), m_ConstantInt(Cst))) && Op1 == X)
+ return FoldICmpAddOpCst(I, X, Cst, I.getPredicate());
+
+ // icmp X, X+Cst
+ if (match(Op1, m_Add(m_Value(X), m_ConstantInt(Cst))) && Op0 == X)
+ return FoldICmpAddOpCst(I, X, Cst, I.getSwappedPredicate());
+ }
+ return Changed ? &I : nullptr;
+}
+
+/// Fold fcmp ([us]itofp x, cst) if possible.
+Instruction *InstCombiner::FoldFCmp_IntToFP_Cst(FCmpInst &I,
+ Instruction *LHSI,
+ Constant *RHSC) {
+ if (!isa<ConstantFP>(RHSC)) return nullptr;
+ const APFloat &RHS = cast<ConstantFP>(RHSC)->getValueAPF();
+
+ // Get the width of the mantissa. We don't want to hack on conversions that
+ // might lose information from the integer, e.g. "i64 -> float"
+ int MantissaWidth = LHSI->getType()->getFPMantissaWidth();
+ if (MantissaWidth == -1) return nullptr; // Unknown.
+
+ IntegerType *IntTy = cast<IntegerType>(LHSI->getOperand(0)->getType());
+
+ bool LHSUnsigned = isa<UIToFPInst>(LHSI);
+
+ if (I.isEquality()) {
+ FCmpInst::Predicate P = I.getPredicate();
+ bool IsExact = false;
+ APSInt RHSCvt(IntTy->getBitWidth(), LHSUnsigned);
+ RHS.convertToInteger(RHSCvt, APFloat::rmNearestTiesToEven, &IsExact);
+
+ // If the floating point constant isn't an integer value, we know if we will
+ // ever compare equal / not equal to it.
+ if (!IsExact) {
+ // TODO: Can never be -0.0 and other non-representable values
+ APFloat RHSRoundInt(RHS);
+ RHSRoundInt.roundToIntegral(APFloat::rmNearestTiesToEven);
+ if (RHS.compare(RHSRoundInt) != APFloat::cmpEqual) {
+ if (P == FCmpInst::FCMP_OEQ || P == FCmpInst::FCMP_UEQ)
+ return replaceInstUsesWith(I, Builder->getFalse());
+
+ assert(P == FCmpInst::FCMP_ONE || P == FCmpInst::FCMP_UNE);
+ return replaceInstUsesWith(I, Builder->getTrue());
+ }
+ }
+
+ // TODO: If the constant is exactly representable, is it always OK to do
+ // equality compares as integer?
+ }
+
+ // Check to see that the input is converted from an integer type that is small
+ // enough that preserves all bits. TODO: check here for "known" sign bits.
+ // This would allow us to handle (fptosi (x >>s 62) to float) if x is i64 f.e.
+ unsigned InputSize = IntTy->getScalarSizeInBits();
+
+ // Following test does NOT adjust InputSize downwards for signed inputs,
+ // because the most negative value still requires all the mantissa bits
+ // to distinguish it from one less than that value.
+ if ((int)InputSize > MantissaWidth) {
+ // Conversion would lose accuracy. Check if loss can impact comparison.
+ int Exp = ilogb(RHS);
+ if (Exp == APFloat::IEK_Inf) {
+ int MaxExponent = ilogb(APFloat::getLargest(RHS.getSemantics()));
+ if (MaxExponent < (int)InputSize - !LHSUnsigned)
+ // Conversion could create infinity.
+ return nullptr;
+ } else {
+ // Note that if RHS is zero or NaN, then Exp is negative
+ // and first condition is trivially false.
+ if (MantissaWidth <= Exp && Exp <= (int)InputSize - !LHSUnsigned)
+ // Conversion could affect comparison.
+ return nullptr;
+ }
+ }
+
+ // Otherwise, we can potentially simplify the comparison. We know that it
+ // will always come through as an integer value and we know the constant is
+ // not a NAN (it would have been previously simplified).
+ assert(!RHS.isNaN() && "NaN comparison not already folded!");
+
+ ICmpInst::Predicate Pred;
+ switch (I.getPredicate()) {
+ default: llvm_unreachable("Unexpected predicate!");
+ case FCmpInst::FCMP_UEQ:
+ case FCmpInst::FCMP_OEQ:
+ Pred = ICmpInst::ICMP_EQ;
+ break;
+ case FCmpInst::FCMP_UGT:
+ case FCmpInst::FCMP_OGT:
+ Pred = LHSUnsigned ? ICmpInst::ICMP_UGT : ICmpInst::ICMP_SGT;
+ break;
+ case FCmpInst::FCMP_UGE:
+ case FCmpInst::FCMP_OGE:
+ Pred = LHSUnsigned ? ICmpInst::ICMP_UGE : ICmpInst::ICMP_SGE;
+ break;
+ case FCmpInst::FCMP_ULT:
+ case FCmpInst::FCMP_OLT:
+ Pred = LHSUnsigned ? ICmpInst::ICMP_ULT : ICmpInst::ICMP_SLT;
+ break;
+ case FCmpInst::FCMP_ULE:
+ case FCmpInst::FCMP_OLE:
+ Pred = LHSUnsigned ? ICmpInst::ICMP_ULE : ICmpInst::ICMP_SLE;
+ break;
+ case FCmpInst::FCMP_UNE:
+ case FCmpInst::FCMP_ONE:
+ Pred = ICmpInst::ICMP_NE;
+ break;
+ case FCmpInst::FCMP_ORD:
+ return replaceInstUsesWith(I, Builder->getTrue());
+ case FCmpInst::FCMP_UNO:
+ return replaceInstUsesWith(I, Builder->getFalse());
+ }
+
+ // Now we know that the APFloat is a normal number, zero or inf.
+
+ // See if the FP constant is too large for the integer. For example,
+ // comparing an i8 to 300.0.
+ unsigned IntWidth = IntTy->getScalarSizeInBits();
+
+ if (!LHSUnsigned) {
+ // If the RHS value is > SignedMax, fold the comparison. This handles +INF
+ // and large values.
+ APFloat SMax(RHS.getSemantics());
+ SMax.convertFromAPInt(APInt::getSignedMaxValue(IntWidth), true,
+ APFloat::rmNearestTiesToEven);
+ if (SMax.compare(RHS) == APFloat::cmpLessThan) { // smax < 13123.0
+ if (Pred == ICmpInst::ICMP_NE || Pred == ICmpInst::ICMP_SLT ||
+ Pred == ICmpInst::ICMP_SLE)
+ return replaceInstUsesWith(I, Builder->getTrue());
+ return replaceInstUsesWith(I, Builder->getFalse());
+ }
+ } else {
+ // If the RHS value is > UnsignedMax, fold the comparison. This handles
+ // +INF and large values.
+ APFloat UMax(RHS.getSemantics());
+ UMax.convertFromAPInt(APInt::getMaxValue(IntWidth), false,
+ APFloat::rmNearestTiesToEven);
+ if (UMax.compare(RHS) == APFloat::cmpLessThan) { // umax < 13123.0
+ if (Pred == ICmpInst::ICMP_NE || Pred == ICmpInst::ICMP_ULT ||
+ Pred == ICmpInst::ICMP_ULE)
+ return replaceInstUsesWith(I, Builder->getTrue());
+ return replaceInstUsesWith(I, Builder->getFalse());
+ }
+ }
+
+ if (!LHSUnsigned) {
+ // See if the RHS value is < SignedMin.
+ APFloat SMin(RHS.getSemantics());
+ SMin.convertFromAPInt(APInt::getSignedMinValue(IntWidth), true,
+ APFloat::rmNearestTiesToEven);
+ if (SMin.compare(RHS) == APFloat::cmpGreaterThan) { // smin > 12312.0
+ if (Pred == ICmpInst::ICMP_NE || Pred == ICmpInst::ICMP_SGT ||
+ Pred == ICmpInst::ICMP_SGE)
+ return replaceInstUsesWith(I, Builder->getTrue());
+ return replaceInstUsesWith(I, Builder->getFalse());
+ }
+ } else {
+ // See if the RHS value is < UnsignedMin.
+ APFloat SMin(RHS.getSemantics());
+ SMin.convertFromAPInt(APInt::getMinValue(IntWidth), true,
+ APFloat::rmNearestTiesToEven);
+ if (SMin.compare(RHS) == APFloat::cmpGreaterThan) { // umin > 12312.0
+ if (Pred == ICmpInst::ICMP_NE || Pred == ICmpInst::ICMP_UGT ||
+ Pred == ICmpInst::ICMP_UGE)
+ return replaceInstUsesWith(I, Builder->getTrue());
+ return replaceInstUsesWith(I, Builder->getFalse());
+ }
+ }
+
+ // Okay, now we know that the FP constant fits in the range [SMIN, SMAX] or
+ // [0, UMAX], but it may still be fractional. See if it is fractional by
+ // casting the FP value to the integer value and back, checking for equality.
+ // Don't do this for zero, because -0.0 is not fractional.
+ Constant *RHSInt = LHSUnsigned
+ ? ConstantExpr::getFPToUI(RHSC, IntTy)
+ : ConstantExpr::getFPToSI(RHSC, IntTy);
+ if (!RHS.isZero()) {
+ bool Equal = LHSUnsigned
+ ? ConstantExpr::getUIToFP(RHSInt, RHSC->getType()) == RHSC
+ : ConstantExpr::getSIToFP(RHSInt, RHSC->getType()) == RHSC;
+ if (!Equal) {
+ // If we had a comparison against a fractional value, we have to adjust
+ // the compare predicate and sometimes the value. RHSC is rounded towards
+ // zero at this point.
+ switch (Pred) {
+ default: llvm_unreachable("Unexpected integer comparison!");
+ case ICmpInst::ICMP_NE: // (float)int != 4.4 --> true
+ return replaceInstUsesWith(I, Builder->getTrue());
+ case ICmpInst::ICMP_EQ: // (float)int == 4.4 --> false
+ return replaceInstUsesWith(I, Builder->getFalse());
+ case ICmpInst::ICMP_ULE:
+ // (float)int <= 4.4 --> int <= 4
+ // (float)int <= -4.4 --> false
+ if (RHS.isNegative())
+ return replaceInstUsesWith(I, Builder->getFalse());
+ break;
+ case ICmpInst::ICMP_SLE:
+ // (float)int <= 4.4 --> int <= 4
+ // (float)int <= -4.4 --> int < -4
+ if (RHS.isNegative())
+ Pred = ICmpInst::ICMP_SLT;
+ break;
+ case ICmpInst::ICMP_ULT:
+ // (float)int < -4.4 --> false
+ // (float)int < 4.4 --> int <= 4
+ if (RHS.isNegative())
+ return replaceInstUsesWith(I, Builder->getFalse());
+ Pred = ICmpInst::ICMP_ULE;
+ break;
+ case ICmpInst::ICMP_SLT:
+ // (float)int < -4.4 --> int < -4
+ // (float)int < 4.4 --> int <= 4
+ if (!RHS.isNegative())
+ Pred = ICmpInst::ICMP_SLE;
+ break;
+ case ICmpInst::ICMP_UGT:
+ // (float)int > 4.4 --> int > 4
+ // (float)int > -4.4 --> true
+ if (RHS.isNegative())
+ return replaceInstUsesWith(I, Builder->getTrue());
+ break;
+ case ICmpInst::ICMP_SGT:
+ // (float)int > 4.4 --> int > 4
+ // (float)int > -4.4 --> int >= -4
+ if (RHS.isNegative())
+ Pred = ICmpInst::ICMP_SGE;
+ break;
+ case ICmpInst::ICMP_UGE:
+ // (float)int >= -4.4 --> true
+ // (float)int >= 4.4 --> int > 4
+ if (RHS.isNegative())
+ return replaceInstUsesWith(I, Builder->getTrue());
+ Pred = ICmpInst::ICMP_UGT;
+ break;
+ case ICmpInst::ICMP_SGE:
+ // (float)int >= -4.4 --> int >= -4
+ // (float)int >= 4.4 --> int > 4
+ if (!RHS.isNegative())
+ Pred = ICmpInst::ICMP_SGT;
+ break;
+ }
+ }
+ }
+
+ // Lower this FP comparison into an appropriate integer version of the
+ // comparison.
+ return new ICmpInst(Pred, LHSI->getOperand(0), RHSInt);
+}
+
+Instruction *InstCombiner::visitFCmpInst(FCmpInst &I) {
+ bool Changed = false;
+
+ /// Orders the operands of the compare so that they are listed from most
+ /// complex to least complex. This puts constants before unary operators,
+ /// before binary operators.
+ if (getComplexity(I.getOperand(0)) < getComplexity(I.getOperand(1))) {
+ I.swapOperands();
+ Changed = true;
+ }
+
+ Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
+
+ if (Value *V = SimplifyFCmpInst(I.getPredicate(), Op0, Op1,
+ I.getFastMathFlags(), DL, TLI, DT, AC, &I))
+ return replaceInstUsesWith(I, V);
+
+ // Simplify 'fcmp pred X, X'
+ if (Op0 == Op1) {
+ switch (I.getPredicate()) {
+ default: llvm_unreachable("Unknown predicate!");
+ case FCmpInst::FCMP_UNO: // True if unordered: isnan(X) | isnan(Y)
+ case FCmpInst::FCMP_ULT: // True if unordered or less than
+ case FCmpInst::FCMP_UGT: // True if unordered or greater than
+ case FCmpInst::FCMP_UNE: // True if unordered or not equal
+ // Canonicalize these to be 'fcmp uno %X, 0.0'.
+ I.setPredicate(FCmpInst::FCMP_UNO);
+ I.setOperand(1, Constant::getNullValue(Op0->getType()));
+ return &I;
+
+ case FCmpInst::FCMP_ORD: // True if ordered (no nans)
+ case FCmpInst::FCMP_OEQ: // True if ordered and equal
+ case FCmpInst::FCMP_OGE: // True if ordered and greater than or equal
+ case FCmpInst::FCMP_OLE: // True if ordered and less than or equal
+ // Canonicalize these to be 'fcmp ord %X, 0.0'.
+ I.setPredicate(FCmpInst::FCMP_ORD);
+ I.setOperand(1, Constant::getNullValue(Op0->getType()));
+ return &I;
+ }
+ }
+
+ // Test if the FCmpInst instruction is used exclusively by a select as
+ // part of a minimum or maximum operation. If so, refrain from doing
+ // any other folding. This helps out other analyses which understand
+ // non-obfuscated minimum and maximum idioms, such as ScalarEvolution
+ // and CodeGen. And in this case, at least one of the comparison
+ // operands has at least one user besides the compare (the select),
+ // which would often largely negate the benefit of folding anyway.
+ if (I.hasOneUse())
+ if (SelectInst *SI = dyn_cast<SelectInst>(*I.user_begin()))
+ if ((SI->getOperand(1) == Op0 && SI->getOperand(2) == Op1) ||
+ (SI->getOperand(2) == Op0 && SI->getOperand(1) == Op1))
+ return nullptr;
+
+ // Handle fcmp with constant RHS
+ if (Constant *RHSC = dyn_cast<Constant>(Op1)) {
+ if (Instruction *LHSI = dyn_cast<Instruction>(Op0))
+ switch (LHSI->getOpcode()) {
+ case Instruction::FPExt: {
+ // fcmp (fpext x), C -> fcmp x, (fptrunc C) if fptrunc is lossless
+ FPExtInst *LHSExt = cast<FPExtInst>(LHSI);
+ ConstantFP *RHSF = dyn_cast<ConstantFP>(RHSC);
+ if (!RHSF)
+ break;
+
+ const fltSemantics *Sem;
+ // FIXME: This shouldn't be here.
+ if (LHSExt->getSrcTy()->isHalfTy())
+ Sem = &APFloat::IEEEhalf;
+ else if (LHSExt->getSrcTy()->isFloatTy())
+ Sem = &APFloat::IEEEsingle;
+ else if (LHSExt->getSrcTy()->isDoubleTy())
+ Sem = &APFloat::IEEEdouble;
+ else if (LHSExt->getSrcTy()->isFP128Ty())
+ Sem = &APFloat::IEEEquad;
+ else if (LHSExt->getSrcTy()->isX86_FP80Ty())
+ Sem = &APFloat::x87DoubleExtended;
+ else if (LHSExt->getSrcTy()->isPPC_FP128Ty())
+ Sem = &APFloat::PPCDoubleDouble;
+ else
+ break;
+
+ bool Lossy;
+ APFloat F = RHSF->getValueAPF();
+ F.convert(*Sem, APFloat::rmNearestTiesToEven, &Lossy);
+
+ // Avoid lossy conversions and denormals. Zero is a special case
+ // that's OK to convert.
+ APFloat Fabs = F;
+ Fabs.clearSign();
+ if (!Lossy &&
+ ((Fabs.compare(APFloat::getSmallestNormalized(*Sem)) !=
+ APFloat::cmpLessThan) || Fabs.isZero()))
+
+ return new FCmpInst(I.getPredicate(), LHSExt->getOperand(0),
+ ConstantFP::get(RHSC->getContext(), F));
+ break;
+ }
+ case Instruction::PHI:
+ // Only fold fcmp into the PHI if the phi and fcmp are in the same
+ // block. If in the same block, we're encouraging jump threading. If
+ // not, we are just pessimizing the code by making an i1 phi.
+ if (LHSI->getParent() == I.getParent())
+ if (Instruction *NV = FoldOpIntoPhi(I))
+ return NV;
+ break;
+ case Instruction::SIToFP:
+ case Instruction::UIToFP:
+ if (Instruction *NV = FoldFCmp_IntToFP_Cst(I, LHSI, RHSC))
+ return NV;
+ break;
+ case Instruction::FSub: {
+ // fcmp pred (fneg x), C -> fcmp swap(pred) x, -C
+ Value *Op;
+ if (match(LHSI, m_FNeg(m_Value(Op))))
+ return new FCmpInst(I.getSwappedPredicate(), Op,
+ ConstantExpr::getFNeg(RHSC));
+ break;
+ }
+ case Instruction::Load:
+ if (GetElementPtrInst *GEP =
+ dyn_cast<GetElementPtrInst>(LHSI->getOperand(0))) {
+ if (GlobalVariable *GV = dyn_cast<GlobalVariable>(GEP->getOperand(0)))
+ if (GV->isConstant() && GV->hasDefinitiveInitializer() &&
+ !cast<LoadInst>(LHSI)->isVolatile())
+ if (Instruction *Res = FoldCmpLoadFromIndexedGlobal(GEP, GV, I))
+ return Res;
+ }
+ break;
+ case Instruction::Call: {
+ if (!RHSC->isNullValue())
+ break;
+
+ CallInst *CI = cast<CallInst>(LHSI);
+ Intrinsic::ID IID = getIntrinsicForCallSite(CI, TLI);
+ if (IID != Intrinsic::fabs)
+ break;
+
+ // Various optimization for fabs compared with zero.
+ switch (I.getPredicate()) {
+ default:
+ break;
+ // fabs(x) < 0 --> false
+ case FCmpInst::FCMP_OLT:
+ llvm_unreachable("handled by SimplifyFCmpInst");
+ // fabs(x) > 0 --> x != 0
+ case FCmpInst::FCMP_OGT:
+ return new FCmpInst(FCmpInst::FCMP_ONE, CI->getArgOperand(0), RHSC);
+ // fabs(x) <= 0 --> x == 0
+ case FCmpInst::FCMP_OLE:
+ return new FCmpInst(FCmpInst::FCMP_OEQ, CI->getArgOperand(0), RHSC);
+ // fabs(x) >= 0 --> !isnan(x)
+ case FCmpInst::FCMP_OGE:
+ return new FCmpInst(FCmpInst::FCMP_ORD, CI->getArgOperand(0), RHSC);
+ // fabs(x) == 0 --> x == 0
+ // fabs(x) != 0 --> x != 0
+ case FCmpInst::FCMP_OEQ:
+ case FCmpInst::FCMP_UEQ:
+ case FCmpInst::FCMP_ONE:
+ case FCmpInst::FCMP_UNE:
+ return new FCmpInst(I.getPredicate(), CI->getArgOperand(0), RHSC);
+ }
+ }
+ }
+ }
+
+ // fcmp pred (fneg x), (fneg y) -> fcmp swap(pred) x, y
+ Value *X, *Y;
+ if (match(Op0, m_FNeg(m_Value(X))) && match(Op1, m_FNeg(m_Value(Y))))
+ return new FCmpInst(I.getSwappedPredicate(), X, Y);
+
+ // fcmp (fpext x), (fpext y) -> fcmp x, y
+ if (FPExtInst *LHSExt = dyn_cast<FPExtInst>(Op0))
+ if (FPExtInst *RHSExt = dyn_cast<FPExtInst>(Op1))
+ if (LHSExt->getSrcTy() == RHSExt->getSrcTy())
+ return new FCmpInst(I.getPredicate(), LHSExt->getOperand(0),
+ RHSExt->getOperand(0));
+
+ return Changed ? &I : nullptr;
+}
diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineInternal.h b/contrib/llvm/lib/Transforms/InstCombine/InstCombineInternal.h
new file mode 100644
index 000000000000..aa421ff594fb
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineInternal.h
@@ -0,0 +1,582 @@
+//===- InstCombineInternal.h - InstCombine pass internals -------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \file
+///
+/// This file provides internal interfaces used to implement the InstCombine.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TRANSFORMS_INSTCOMBINE_INSTCOMBINEINTERNAL_H
+#define LLVM_LIB_TRANSFORMS_INSTCOMBINE_INSTCOMBINEINTERNAL_H
+
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/AssumptionCache.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/TargetFolder.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/InstVisitor.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Operator.h"
+#include "llvm/IR/PatternMatch.h"
+#include "llvm/Pass.h"
+#include "llvm/Transforms/InstCombine/InstCombineWorklist.h"
+
+#define DEBUG_TYPE "instcombine"
+
+namespace llvm {
+class CallSite;
+class DataLayout;
+class DominatorTree;
+class TargetLibraryInfo;
+class DbgDeclareInst;
+class MemIntrinsic;
+class MemSetInst;
+
+/// \brief Assign a complexity or rank value to LLVM Values.
+///
+/// This routine maps IR values to various complexity ranks:
+/// 0 -> undef
+/// 1 -> Constants
+/// 2 -> Other non-instructions
+/// 3 -> Arguments
+/// 3 -> Unary operations
+/// 4 -> Other instructions
+static inline unsigned getComplexity(Value *V) {
+ if (isa<Instruction>(V)) {
+ if (BinaryOperator::isNeg(V) || BinaryOperator::isFNeg(V) ||
+ BinaryOperator::isNot(V))
+ return 3;
+ return 4;
+ }
+ if (isa<Argument>(V))
+ return 3;
+ return isa<Constant>(V) ? (isa<UndefValue>(V) ? 0 : 1) : 2;
+}
+
+/// \brief Add one to a Constant
+static inline Constant *AddOne(Constant *C) {
+ return ConstantExpr::getAdd(C, ConstantInt::get(C->getType(), 1));
+}
+/// \brief Subtract one from a Constant
+static inline Constant *SubOne(Constant *C) {
+ return ConstantExpr::getSub(C, ConstantInt::get(C->getType(), 1));
+}
+
+/// \brief Return true if the specified value is free to invert (apply ~ to).
+/// This happens in cases where the ~ can be eliminated. If WillInvertAllUses
+/// is true, work under the assumption that the caller intends to remove all
+/// uses of V and only keep uses of ~V.
+///
+static inline bool IsFreeToInvert(Value *V, bool WillInvertAllUses) {
+ // ~(~(X)) -> X.
+ if (BinaryOperator::isNot(V))
+ return true;
+
+ // Constants can be considered to be not'ed values.
+ if (isa<ConstantInt>(V))
+ return true;
+
+ // Compares can be inverted if all of their uses are being modified to use the
+ // ~V.
+ if (isa<CmpInst>(V))
+ return WillInvertAllUses;
+
+ // If `V` is of the form `A + Constant` then `-1 - V` can be folded into `(-1
+ // - Constant) - A` if we are willing to invert all of the uses.
+ if (BinaryOperator *BO = dyn_cast<BinaryOperator>(V))
+ if (BO->getOpcode() == Instruction::Add ||
+ BO->getOpcode() == Instruction::Sub)
+ if (isa<Constant>(BO->getOperand(0)) || isa<Constant>(BO->getOperand(1)))
+ return WillInvertAllUses;
+
+ return false;
+}
+
+
+/// \brief Specific patterns of overflow check idioms that we match.
+enum OverflowCheckFlavor {
+ OCF_UNSIGNED_ADD,
+ OCF_SIGNED_ADD,
+ OCF_UNSIGNED_SUB,
+ OCF_SIGNED_SUB,
+ OCF_UNSIGNED_MUL,
+ OCF_SIGNED_MUL,
+
+ OCF_INVALID
+};
+
+/// \brief Returns the OverflowCheckFlavor corresponding to a overflow_with_op
+/// intrinsic.
+static inline OverflowCheckFlavor
+IntrinsicIDToOverflowCheckFlavor(unsigned ID) {
+ switch (ID) {
+ default:
+ return OCF_INVALID;
+ case Intrinsic::uadd_with_overflow:
+ return OCF_UNSIGNED_ADD;
+ case Intrinsic::sadd_with_overflow:
+ return OCF_SIGNED_ADD;
+ case Intrinsic::usub_with_overflow:
+ return OCF_UNSIGNED_SUB;
+ case Intrinsic::ssub_with_overflow:
+ return OCF_SIGNED_SUB;
+ case Intrinsic::umul_with_overflow:
+ return OCF_UNSIGNED_MUL;
+ case Intrinsic::smul_with_overflow:
+ return OCF_SIGNED_MUL;
+ }
+}
+
+/// \brief An IRBuilder inserter that adds new instructions to the instcombine
+/// worklist.
+class LLVM_LIBRARY_VISIBILITY InstCombineIRInserter
+ : public IRBuilderDefaultInserter {
+ InstCombineWorklist &Worklist;
+ AssumptionCache *AC;
+
+public:
+ InstCombineIRInserter(InstCombineWorklist &WL, AssumptionCache *AC)
+ : Worklist(WL), AC(AC) {}
+
+ void InsertHelper(Instruction *I, const Twine &Name, BasicBlock *BB,
+ BasicBlock::iterator InsertPt) const {
+ IRBuilderDefaultInserter::InsertHelper(I, Name, BB, InsertPt);
+ Worklist.Add(I);
+
+ using namespace llvm::PatternMatch;
+ if (match(I, m_Intrinsic<Intrinsic::assume>()))
+ AC->registerAssumption(cast<CallInst>(I));
+ }
+};
+
+/// \brief The core instruction combiner logic.
+///
+/// This class provides both the logic to recursively visit instructions and
+/// combine them, as well as the pass infrastructure for running this as part
+/// of the LLVM pass pipeline.
+class LLVM_LIBRARY_VISIBILITY InstCombiner
+ : public InstVisitor<InstCombiner, Instruction *> {
+ // FIXME: These members shouldn't be public.
+public:
+ /// \brief A worklist of the instructions that need to be simplified.
+ InstCombineWorklist &Worklist;
+
+ /// \brief An IRBuilder that automatically inserts new instructions into the
+ /// worklist.
+ typedef IRBuilder<TargetFolder, InstCombineIRInserter> BuilderTy;
+ BuilderTy *Builder;
+
+private:
+ // Mode in which we are running the combiner.
+ const bool MinimizeSize;
+ /// Enable combines that trigger rarely but are costly in compiletime.
+ const bool ExpensiveCombines;
+
+ AliasAnalysis *AA;
+
+ // Required analyses.
+ // FIXME: These can never be null and should be references.
+ AssumptionCache *AC;
+ TargetLibraryInfo *TLI;
+ DominatorTree *DT;
+ const DataLayout &DL;
+
+ // Optional analyses. When non-null, these can both be used to do better
+ // combining and will be updated to reflect any changes.
+ LoopInfo *LI;
+
+ bool MadeIRChange;
+
+public:
+ InstCombiner(InstCombineWorklist &Worklist, BuilderTy *Builder,
+ bool MinimizeSize, bool ExpensiveCombines, AliasAnalysis *AA,
+ AssumptionCache *AC, TargetLibraryInfo *TLI,
+ DominatorTree *DT, const DataLayout &DL, LoopInfo *LI)
+ : Worklist(Worklist), Builder(Builder), MinimizeSize(MinimizeSize),
+ ExpensiveCombines(ExpensiveCombines), AA(AA), AC(AC), TLI(TLI), DT(DT),
+ DL(DL), LI(LI), MadeIRChange(false) {}
+
+ /// \brief Run the combiner over the entire worklist until it is empty.
+ ///
+ /// \returns true if the IR is changed.
+ bool run();
+
+ AssumptionCache *getAssumptionCache() const { return AC; }
+
+ const DataLayout &getDataLayout() const { return DL; }
+
+ DominatorTree *getDominatorTree() const { return DT; }
+
+ LoopInfo *getLoopInfo() const { return LI; }
+
+ TargetLibraryInfo *getTargetLibraryInfo() const { return TLI; }
+
+ // Visitation implementation - Implement instruction combining for different
+ // instruction types. The semantics are as follows:
+ // Return Value:
+ // null - No change was made
+ // I - Change was made, I is still valid, I may be dead though
+ // otherwise - Change was made, replace I with returned instruction
+ //
+ Instruction *visitAdd(BinaryOperator &I);
+ Instruction *visitFAdd(BinaryOperator &I);
+ Value *OptimizePointerDifference(Value *LHS, Value *RHS, Type *Ty);
+ Instruction *visitSub(BinaryOperator &I);
+ Instruction *visitFSub(BinaryOperator &I);
+ Instruction *visitMul(BinaryOperator &I);
+ Value *foldFMulConst(Instruction *FMulOrDiv, Constant *C,
+ Instruction *InsertBefore);
+ Instruction *visitFMul(BinaryOperator &I);
+ Instruction *visitURem(BinaryOperator &I);
+ Instruction *visitSRem(BinaryOperator &I);
+ Instruction *visitFRem(BinaryOperator &I);
+ bool SimplifyDivRemOfSelect(BinaryOperator &I);
+ Instruction *commonRemTransforms(BinaryOperator &I);
+ Instruction *commonIRemTransforms(BinaryOperator &I);
+ Instruction *commonDivTransforms(BinaryOperator &I);
+ Instruction *commonIDivTransforms(BinaryOperator &I);
+ Instruction *visitUDiv(BinaryOperator &I);
+ Instruction *visitSDiv(BinaryOperator &I);
+ Instruction *visitFDiv(BinaryOperator &I);
+ Value *simplifyRangeCheck(ICmpInst *Cmp0, ICmpInst *Cmp1, bool Inverted);
+ Value *FoldAndOfICmps(ICmpInst *LHS, ICmpInst *RHS);
+ Value *FoldAndOfFCmps(FCmpInst *LHS, FCmpInst *RHS);
+ Instruction *visitAnd(BinaryOperator &I);
+ Value *FoldOrOfICmps(ICmpInst *LHS, ICmpInst *RHS, Instruction *CxtI);
+ Value *FoldOrOfFCmps(FCmpInst *LHS, FCmpInst *RHS);
+ Instruction *FoldOrWithConstants(BinaryOperator &I, Value *Op, Value *A,
+ Value *B, Value *C);
+ Instruction *FoldXorWithConstants(BinaryOperator &I, Value *Op, Value *A,
+ Value *B, Value *C);
+ Instruction *visitOr(BinaryOperator &I);
+ Instruction *visitXor(BinaryOperator &I);
+ Instruction *visitShl(BinaryOperator &I);
+ Instruction *visitAShr(BinaryOperator &I);
+ Instruction *visitLShr(BinaryOperator &I);
+ Instruction *commonShiftTransforms(BinaryOperator &I);
+ Instruction *FoldFCmp_IntToFP_Cst(FCmpInst &I, Instruction *LHSI,
+ Constant *RHSC);
+ Instruction *FoldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP,
+ GlobalVariable *GV, CmpInst &ICI,
+ ConstantInt *AndCst = nullptr);
+ Instruction *visitFCmpInst(FCmpInst &I);
+ Instruction *visitICmpInst(ICmpInst &I);
+ Instruction *visitICmpInstWithCastAndCast(ICmpInst &ICI);
+ Instruction *visitICmpInstWithInstAndIntCst(ICmpInst &ICI, Instruction *LHS,
+ ConstantInt *RHS);
+ Instruction *FoldICmpDivCst(ICmpInst &ICI, BinaryOperator *DivI,
+ ConstantInt *DivRHS);
+ Instruction *FoldICmpShrCst(ICmpInst &ICI, BinaryOperator *DivI,
+ ConstantInt *DivRHS);
+ Instruction *FoldICmpCstShrCst(ICmpInst &I, Value *Op, Value *A,
+ ConstantInt *CI1, ConstantInt *CI2);
+ Instruction *FoldICmpCstShlCst(ICmpInst &I, Value *Op, Value *A,
+ ConstantInt *CI1, ConstantInt *CI2);
+ Instruction *FoldICmpAddOpCst(Instruction &ICI, Value *X, ConstantInt *CI,
+ ICmpInst::Predicate Pred);
+ Instruction *FoldGEPICmp(GEPOperator *GEPLHS, Value *RHS,
+ ICmpInst::Predicate Cond, Instruction &I);
+ Instruction *FoldAllocaCmp(ICmpInst &ICI, AllocaInst *Alloca, Value *Other);
+ Instruction *FoldShiftByConstant(Value *Op0, Constant *Op1,
+ BinaryOperator &I);
+ Instruction *commonCastTransforms(CastInst &CI);
+ Instruction *commonPointerCastTransforms(CastInst &CI);
+ Instruction *visitTrunc(TruncInst &CI);
+ Instruction *visitZExt(ZExtInst &CI);
+ Instruction *visitSExt(SExtInst &CI);
+ Instruction *visitFPTrunc(FPTruncInst &CI);
+ Instruction *visitFPExt(CastInst &CI);
+ Instruction *visitFPToUI(FPToUIInst &FI);
+ Instruction *visitFPToSI(FPToSIInst &FI);
+ Instruction *visitUIToFP(CastInst &CI);
+ Instruction *visitSIToFP(CastInst &CI);
+ Instruction *visitPtrToInt(PtrToIntInst &CI);
+ Instruction *visitIntToPtr(IntToPtrInst &CI);
+ Instruction *visitBitCast(BitCastInst &CI);
+ Instruction *visitAddrSpaceCast(AddrSpaceCastInst &CI);
+ Instruction *FoldSelectOpOp(SelectInst &SI, Instruction *TI, Instruction *FI);
+ Instruction *FoldSelectIntoOp(SelectInst &SI, Value *, Value *);
+ Instruction *FoldSPFofSPF(Instruction *Inner, SelectPatternFlavor SPF1,
+ Value *A, Value *B, Instruction &Outer,
+ SelectPatternFlavor SPF2, Value *C);
+ Instruction *FoldItoFPtoI(Instruction &FI);
+ Instruction *visitSelectInst(SelectInst &SI);
+ Instruction *visitSelectInstWithICmp(SelectInst &SI, ICmpInst *ICI);
+ Instruction *visitCallInst(CallInst &CI);
+ Instruction *visitInvokeInst(InvokeInst &II);
+
+ Instruction *SliceUpIllegalIntegerPHI(PHINode &PN);
+ Instruction *visitPHINode(PHINode &PN);
+ Instruction *visitGetElementPtrInst(GetElementPtrInst &GEP);
+ Instruction *visitAllocaInst(AllocaInst &AI);
+ Instruction *visitAllocSite(Instruction &FI);
+ Instruction *visitFree(CallInst &FI);
+ Instruction *visitLoadInst(LoadInst &LI);
+ Instruction *visitStoreInst(StoreInst &SI);
+ Instruction *visitBranchInst(BranchInst &BI);
+ Instruction *visitSwitchInst(SwitchInst &SI);
+ Instruction *visitReturnInst(ReturnInst &RI);
+ Instruction *visitInsertValueInst(InsertValueInst &IV);
+ Instruction *visitInsertElementInst(InsertElementInst &IE);
+ Instruction *visitExtractElementInst(ExtractElementInst &EI);
+ Instruction *visitShuffleVectorInst(ShuffleVectorInst &SVI);
+ Instruction *visitExtractValueInst(ExtractValueInst &EV);
+ Instruction *visitLandingPadInst(LandingPadInst &LI);
+ Instruction *visitVAStartInst(VAStartInst &I);
+ Instruction *visitVACopyInst(VACopyInst &I);
+
+ // visitInstruction - Specify what to return for unhandled instructions...
+ Instruction *visitInstruction(Instruction &I) { return nullptr; }
+
+ // True when DB dominates all uses of DI execpt UI.
+ // UI must be in the same block as DI.
+ // The routine checks that the DI parent and DB are different.
+ bool dominatesAllUses(const Instruction *DI, const Instruction *UI,
+ const BasicBlock *DB) const;
+
+ // Replace select with select operand SIOpd in SI-ICmp sequence when possible
+ bool replacedSelectWithOperand(SelectInst *SI, const ICmpInst *Icmp,
+ const unsigned SIOpd);
+
+private:
+ bool ShouldChangeType(unsigned FromBitWidth, unsigned ToBitWidth) const;
+ bool ShouldChangeType(Type *From, Type *To) const;
+ Value *dyn_castNegVal(Value *V) const;
+ Value *dyn_castFNegVal(Value *V, bool NoSignedZero = false) const;
+ Type *FindElementAtOffset(PointerType *PtrTy, int64_t Offset,
+ SmallVectorImpl<Value *> &NewIndices);
+ Instruction *FoldOpIntoSelect(Instruction &Op, SelectInst *SI);
+
+ /// \brief Classify whether a cast is worth optimizing.
+ ///
+ /// Returns true if the cast from "V to Ty" actually results in any code
+ /// being generated and is interesting to optimize out. If the cast can be
+ /// eliminated by some other simple transformation, we prefer to do the
+ /// simplification first.
+ bool ShouldOptimizeCast(Instruction::CastOps opcode, const Value *V,
+ Type *Ty);
+
+ /// \brief Try to optimize a sequence of instructions checking if an operation
+ /// on LHS and RHS overflows.
+ ///
+ /// If this overflow check is done via one of the overflow check intrinsics,
+ /// then CtxI has to be the call instruction calling that intrinsic. If this
+ /// overflow check is done by arithmetic followed by a compare, then CtxI has
+ /// to be the arithmetic instruction.
+ ///
+ /// If a simplification is possible, stores the simplified result of the
+ /// operation in OperationResult and result of the overflow check in
+ /// OverflowResult, and return true. If no simplification is possible,
+ /// returns false.
+ bool OptimizeOverflowCheck(OverflowCheckFlavor OCF, Value *LHS, Value *RHS,
+ Instruction &CtxI, Value *&OperationResult,
+ Constant *&OverflowResult);
+
+ Instruction *visitCallSite(CallSite CS);
+ Instruction *tryOptimizeCall(CallInst *CI);
+ bool transformConstExprCastCall(CallSite CS);
+ Instruction *transformCallThroughTrampoline(CallSite CS,
+ IntrinsicInst *Tramp);
+ Instruction *transformZExtICmp(ICmpInst *ICI, Instruction &CI,
+ bool DoXform = true);
+ Instruction *transformSExtICmp(ICmpInst *ICI, Instruction &CI);
+ bool WillNotOverflowSignedAdd(Value *LHS, Value *RHS, Instruction &CxtI);
+ bool WillNotOverflowSignedSub(Value *LHS, Value *RHS, Instruction &CxtI);
+ bool WillNotOverflowUnsignedSub(Value *LHS, Value *RHS, Instruction &CxtI);
+ bool WillNotOverflowSignedMul(Value *LHS, Value *RHS, Instruction &CxtI);
+ Value *EmitGEPOffset(User *GEP);
+ Instruction *scalarizePHI(ExtractElementInst &EI, PHINode *PN);
+ Value *EvaluateInDifferentElementOrder(Value *V, ArrayRef<int> Mask);
+ Instruction *foldCastedBitwiseLogic(BinaryOperator &I);
+
+public:
+ /// \brief Inserts an instruction \p New before instruction \p Old
+ ///
+ /// Also adds the new instruction to the worklist and returns \p New so that
+ /// it is suitable for use as the return from the visitation patterns.
+ Instruction *InsertNewInstBefore(Instruction *New, Instruction &Old) {
+ assert(New && !New->getParent() &&
+ "New instruction already inserted into a basic block!");
+ BasicBlock *BB = Old.getParent();
+ BB->getInstList().insert(Old.getIterator(), New); // Insert inst
+ Worklist.Add(New);
+ return New;
+ }
+
+ /// \brief Same as InsertNewInstBefore, but also sets the debug loc.
+ Instruction *InsertNewInstWith(Instruction *New, Instruction &Old) {
+ New->setDebugLoc(Old.getDebugLoc());
+ return InsertNewInstBefore(New, Old);
+ }
+
+ /// \brief A combiner-aware RAUW-like routine.
+ ///
+ /// This method is to be used when an instruction is found to be dead,
+ /// replaceable with another preexisting expression. Here we add all uses of
+ /// I to the worklist, replace all uses of I with the new value, then return
+ /// I, so that the inst combiner will know that I was modified.
+ Instruction *replaceInstUsesWith(Instruction &I, Value *V) {
+ // If there are no uses to replace, then we return nullptr to indicate that
+ // no changes were made to the program.
+ if (I.use_empty()) return nullptr;
+
+ Worklist.AddUsersToWorkList(I); // Add all modified instrs to worklist.
+
+ // If we are replacing the instruction with itself, this must be in a
+ // segment of unreachable code, so just clobber the instruction.
+ if (&I == V)
+ V = UndefValue::get(I.getType());
+
+ DEBUG(dbgs() << "IC: Replacing " << I << "\n"
+ << " with " << *V << '\n');
+
+ I.replaceAllUsesWith(V);
+ return &I;
+ }
+
+ /// Creates a result tuple for an overflow intrinsic \p II with a given
+ /// \p Result and a constant \p Overflow value.
+ Instruction *CreateOverflowTuple(IntrinsicInst *II, Value *Result,
+ Constant *Overflow) {
+ Constant *V[] = {UndefValue::get(Result->getType()), Overflow};
+ StructType *ST = cast<StructType>(II->getType());
+ Constant *Struct = ConstantStruct::get(ST, V);
+ return InsertValueInst::Create(Struct, Result, 0);
+ }
+
+ /// \brief Combiner aware instruction erasure.
+ ///
+ /// When dealing with an instruction that has side effects or produces a void
+ /// value, we can't rely on DCE to delete the instruction. Instead, visit
+ /// methods should return the value returned by this function.
+ Instruction *eraseInstFromFunction(Instruction &I) {
+ DEBUG(dbgs() << "IC: ERASE " << I << '\n');
+
+ assert(I.use_empty() && "Cannot erase instruction that is used!");
+ // Make sure that we reprocess all operands now that we reduced their
+ // use counts.
+ if (I.getNumOperands() < 8) {
+ for (Use &Operand : I.operands())
+ if (auto *Inst = dyn_cast<Instruction>(Operand))
+ Worklist.Add(Inst);
+ }
+ Worklist.Remove(&I);
+ I.eraseFromParent();
+ MadeIRChange = true;
+ return nullptr; // Don't do anything with FI
+ }
+
+ void computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne,
+ unsigned Depth, Instruction *CxtI) const {
+ return llvm::computeKnownBits(V, KnownZero, KnownOne, DL, Depth, AC, CxtI,
+ DT);
+ }
+
+ bool MaskedValueIsZero(Value *V, const APInt &Mask, unsigned Depth = 0,
+ Instruction *CxtI = nullptr) const {
+ return llvm::MaskedValueIsZero(V, Mask, DL, Depth, AC, CxtI, DT);
+ }
+ unsigned ComputeNumSignBits(Value *Op, unsigned Depth = 0,
+ Instruction *CxtI = nullptr) const {
+ return llvm::ComputeNumSignBits(Op, DL, Depth, AC, CxtI, DT);
+ }
+ void ComputeSignBit(Value *V, bool &KnownZero, bool &KnownOne,
+ unsigned Depth = 0, Instruction *CxtI = nullptr) const {
+ return llvm::ComputeSignBit(V, KnownZero, KnownOne, DL, Depth, AC, CxtI,
+ DT);
+ }
+ OverflowResult computeOverflowForUnsignedMul(Value *LHS, Value *RHS,
+ const Instruction *CxtI) {
+ return llvm::computeOverflowForUnsignedMul(LHS, RHS, DL, AC, CxtI, DT);
+ }
+ OverflowResult computeOverflowForUnsignedAdd(Value *LHS, Value *RHS,
+ const Instruction *CxtI) {
+ return llvm::computeOverflowForUnsignedAdd(LHS, RHS, DL, AC, CxtI, DT);
+ }
+
+private:
+ /// \brief Performs a few simplifications for operators which are associative
+ /// or commutative.
+ bool SimplifyAssociativeOrCommutative(BinaryOperator &I);
+
+ /// \brief Tries to simplify binary operations which some other binary
+ /// operation distributes over.
+ ///
+ /// It does this by either by factorizing out common terms (eg "(A*B)+(A*C)"
+ /// -> "A*(B+C)") or expanding out if this results in simplifications (eg: "A
+ /// & (B | C) -> (A&B) | (A&C)" if this is a win). Returns the simplified
+ /// value, or null if it didn't simplify.
+ Value *SimplifyUsingDistributiveLaws(BinaryOperator &I);
+
+ /// \brief Attempts to replace V with a simpler value based on the demanded
+ /// bits.
+ Value *SimplifyDemandedUseBits(Value *V, APInt DemandedMask, APInt &KnownZero,
+ APInt &KnownOne, unsigned Depth,
+ Instruction *CxtI);
+ bool SimplifyDemandedBits(Use &U, const APInt &DemandedMask, APInt &KnownZero,
+ APInt &KnownOne, unsigned Depth = 0);
+ /// Helper routine of SimplifyDemandedUseBits. It tries to simplify demanded
+ /// bit for "r1 = shr x, c1; r2 = shl r1, c2" instruction sequence.
+ Value *SimplifyShrShlDemandedBits(Instruction *Lsr, Instruction *Sftl,
+ const APInt &DemandedMask, APInt &KnownZero,
+ APInt &KnownOne);
+
+ /// \brief Tries to simplify operands to an integer instruction based on its
+ /// demanded bits.
+ bool SimplifyDemandedInstructionBits(Instruction &Inst);
+
+ Value *SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
+ APInt &UndefElts, unsigned Depth = 0);
+
+ Value *SimplifyVectorOp(BinaryOperator &Inst);
+ Value *SimplifyBSwap(BinaryOperator &Inst);
+
+ // FoldOpIntoPhi - Given a binary operator, cast instruction, or select
+ // which has a PHI node as operand #0, see if we can fold the instruction
+ // into the PHI (which is only possible if all operands to the PHI are
+ // constants).
+ //
+ Instruction *FoldOpIntoPhi(Instruction &I);
+
+ /// \brief Try to rotate an operation below a PHI node, using PHI nodes for
+ /// its operands.
+ Instruction *FoldPHIArgOpIntoPHI(PHINode &PN);
+ Instruction *FoldPHIArgBinOpIntoPHI(PHINode &PN);
+ Instruction *FoldPHIArgGEPIntoPHI(PHINode &PN);
+ Instruction *FoldPHIArgLoadIntoPHI(PHINode &PN);
+ Instruction *FoldPHIArgZextsIntoPHI(PHINode &PN);
+
+ Instruction *OptAndOp(Instruction *Op, ConstantInt *OpRHS,
+ ConstantInt *AndRHS, BinaryOperator &TheAnd);
+
+ Value *FoldLogicalPlusAnd(Value *LHS, Value *RHS, ConstantInt *Mask,
+ bool isSub, Instruction &I);
+ Value *InsertRangeTest(Value *V, Constant *Lo, Constant *Hi, bool isSigned,
+ bool Inside);
+ Instruction *PromoteCastOfAllocation(BitCastInst &CI, AllocaInst &AI);
+ Instruction *MatchBSwap(BinaryOperator &I);
+ bool SimplifyStoreAtEndOfBlock(StoreInst &SI);
+ Instruction *SimplifyMemTransfer(MemIntrinsic *MI);
+ Instruction *SimplifyMemSet(MemSetInst *MI);
+
+ Value *EvaluateInDifferentType(Value *V, Type *Ty, bool isSigned);
+
+ /// \brief Returns a value X such that Val = X * Scale, or null if none.
+ ///
+ /// If the multiplication is known not to overflow then NoSignedWrap is set.
+ Value *Descale(Value *Val, APInt Scale, bool &NoSignedWrap);
+};
+
+} // end namespace llvm.
+
+#undef DEBUG_TYPE
+
+#endif
diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
new file mode 100644
index 000000000000..d88456ee4adc
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
@@ -0,0 +1,1421 @@
+//===- InstCombineLoadStoreAlloca.cpp -------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the visit functions for load, store and alloca.
+//
+//===----------------------------------------------------------------------===//
+
+#include "InstCombineInternal.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/Loads.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/MDBuilder.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/Local.h"
+using namespace llvm;
+
+#define DEBUG_TYPE "instcombine"
+
+STATISTIC(NumDeadStore, "Number of dead stores eliminated");
+STATISTIC(NumGlobalCopies, "Number of allocas copied from constant global");
+
+/// pointsToConstantGlobal - Return true if V (possibly indirectly) points to
+/// some part of a constant global variable. This intentionally only accepts
+/// constant expressions because we can't rewrite arbitrary instructions.
+static bool pointsToConstantGlobal(Value *V) {
+ if (GlobalVariable *GV = dyn_cast<GlobalVariable>(V))
+ return GV->isConstant();
+
+ if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) {
+ if (CE->getOpcode() == Instruction::BitCast ||
+ CE->getOpcode() == Instruction::AddrSpaceCast ||
+ CE->getOpcode() == Instruction::GetElementPtr)
+ return pointsToConstantGlobal(CE->getOperand(0));
+ }
+ return false;
+}
+
+/// isOnlyCopiedFromConstantGlobal - Recursively walk the uses of a (derived)
+/// pointer to an alloca. Ignore any reads of the pointer, return false if we
+/// see any stores or other unknown uses. If we see pointer arithmetic, keep
+/// track of whether it moves the pointer (with IsOffset) but otherwise traverse
+/// the uses. If we see a memcpy/memmove that targets an unoffseted pointer to
+/// the alloca, and if the source pointer is a pointer to a constant global, we
+/// can optimize this.
+static bool
+isOnlyCopiedFromConstantGlobal(Value *V, MemTransferInst *&TheCopy,
+ SmallVectorImpl<Instruction *> &ToDelete) {
+ // We track lifetime intrinsics as we encounter them. If we decide to go
+ // ahead and replace the value with the global, this lets the caller quickly
+ // eliminate the markers.
+
+ SmallVector<std::pair<Value *, bool>, 35> ValuesToInspect;
+ ValuesToInspect.push_back(std::make_pair(V, false));
+ while (!ValuesToInspect.empty()) {
+ auto ValuePair = ValuesToInspect.pop_back_val();
+ const bool IsOffset = ValuePair.second;
+ for (auto &U : ValuePair.first->uses()) {
+ Instruction *I = cast<Instruction>(U.getUser());
+
+ if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
+ // Ignore non-volatile loads, they are always ok.
+ if (!LI->isSimple()) return false;
+ continue;
+ }
+
+ if (isa<BitCastInst>(I) || isa<AddrSpaceCastInst>(I)) {
+ // If uses of the bitcast are ok, we are ok.
+ ValuesToInspect.push_back(std::make_pair(I, IsOffset));
+ continue;
+ }
+ if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(I)) {
+ // If the GEP has all zero indices, it doesn't offset the pointer. If it
+ // doesn't, it does.
+ ValuesToInspect.push_back(
+ std::make_pair(I, IsOffset || !GEP->hasAllZeroIndices()));
+ continue;
+ }
+
+ if (auto CS = CallSite(I)) {
+ // If this is the function being called then we treat it like a load and
+ // ignore it.
+ if (CS.isCallee(&U))
+ continue;
+
+ unsigned DataOpNo = CS.getDataOperandNo(&U);
+ bool IsArgOperand = CS.isArgOperand(&U);
+
+ // Inalloca arguments are clobbered by the call.
+ if (IsArgOperand && CS.isInAllocaArgument(DataOpNo))
+ return false;
+
+ // If this is a readonly/readnone call site, then we know it is just a
+ // load (but one that potentially returns the value itself), so we can
+ // ignore it if we know that the value isn't captured.
+ if (CS.onlyReadsMemory() &&
+ (CS.getInstruction()->use_empty() || CS.doesNotCapture(DataOpNo)))
+ continue;
+
+ // If this is being passed as a byval argument, the caller is making a
+ // copy, so it is only a read of the alloca.
+ if (IsArgOperand && CS.isByValArgument(DataOpNo))
+ continue;
+ }
+
+ // Lifetime intrinsics can be handled by the caller.
+ if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
+ if (II->getIntrinsicID() == Intrinsic::lifetime_start ||
+ II->getIntrinsicID() == Intrinsic::lifetime_end) {
+ assert(II->use_empty() && "Lifetime markers have no result to use!");
+ ToDelete.push_back(II);
+ continue;
+ }
+ }
+
+ // If this is isn't our memcpy/memmove, reject it as something we can't
+ // handle.
+ MemTransferInst *MI = dyn_cast<MemTransferInst>(I);
+ if (!MI)
+ return false;
+
+ // If the transfer is using the alloca as a source of the transfer, then
+ // ignore it since it is a load (unless the transfer is volatile).
+ if (U.getOperandNo() == 1) {
+ if (MI->isVolatile()) return false;
+ continue;
+ }
+
+ // If we already have seen a copy, reject the second one.
+ if (TheCopy) return false;
+
+ // If the pointer has been offset from the start of the alloca, we can't
+ // safely handle this.
+ if (IsOffset) return false;
+
+ // If the memintrinsic isn't using the alloca as the dest, reject it.
+ if (U.getOperandNo() != 0) return false;
+
+ // If the source of the memcpy/move is not a constant global, reject it.
+ if (!pointsToConstantGlobal(MI->getSource()))
+ return false;
+
+ // Otherwise, the transform is safe. Remember the copy instruction.
+ TheCopy = MI;
+ }
+ }
+ return true;
+}
+
+/// isOnlyCopiedFromConstantGlobal - Return true if the specified alloca is only
+/// modified by a copy from a constant global. If we can prove this, we can
+/// replace any uses of the alloca with uses of the global directly.
+static MemTransferInst *
+isOnlyCopiedFromConstantGlobal(AllocaInst *AI,
+ SmallVectorImpl<Instruction *> &ToDelete) {
+ MemTransferInst *TheCopy = nullptr;
+ if (isOnlyCopiedFromConstantGlobal(AI, TheCopy, ToDelete))
+ return TheCopy;
+ return nullptr;
+}
+
+static Instruction *simplifyAllocaArraySize(InstCombiner &IC, AllocaInst &AI) {
+ // Check for array size of 1 (scalar allocation).
+ if (!AI.isArrayAllocation()) {
+ // i32 1 is the canonical array size for scalar allocations.
+ if (AI.getArraySize()->getType()->isIntegerTy(32))
+ return nullptr;
+
+ // Canonicalize it.
+ Value *V = IC.Builder->getInt32(1);
+ AI.setOperand(0, V);
+ return &AI;
+ }
+
+ // Convert: alloca Ty, C - where C is a constant != 1 into: alloca [C x Ty], 1
+ if (const ConstantInt *C = dyn_cast<ConstantInt>(AI.getArraySize())) {
+ Type *NewTy = ArrayType::get(AI.getAllocatedType(), C->getZExtValue());
+ AllocaInst *New = IC.Builder->CreateAlloca(NewTy, nullptr, AI.getName());
+ New->setAlignment(AI.getAlignment());
+
+ // Scan to the end of the allocation instructions, to skip over a block of
+ // allocas if possible...also skip interleaved debug info
+ //
+ BasicBlock::iterator It(New);
+ while (isa<AllocaInst>(*It) || isa<DbgInfoIntrinsic>(*It))
+ ++It;
+
+ // Now that I is pointing to the first non-allocation-inst in the block,
+ // insert our getelementptr instruction...
+ //
+ Type *IdxTy = IC.getDataLayout().getIntPtrType(AI.getType());
+ Value *NullIdx = Constant::getNullValue(IdxTy);
+ Value *Idx[2] = {NullIdx, NullIdx};
+ Instruction *GEP =
+ GetElementPtrInst::CreateInBounds(New, Idx, New->getName() + ".sub");
+ IC.InsertNewInstBefore(GEP, *It);
+
+ // Now make everything use the getelementptr instead of the original
+ // allocation.
+ return IC.replaceInstUsesWith(AI, GEP);
+ }
+
+ if (isa<UndefValue>(AI.getArraySize()))
+ return IC.replaceInstUsesWith(AI, Constant::getNullValue(AI.getType()));
+
+ // Ensure that the alloca array size argument has type intptr_t, so that
+ // any casting is exposed early.
+ Type *IntPtrTy = IC.getDataLayout().getIntPtrType(AI.getType());
+ if (AI.getArraySize()->getType() != IntPtrTy) {
+ Value *V = IC.Builder->CreateIntCast(AI.getArraySize(), IntPtrTy, false);
+ AI.setOperand(0, V);
+ return &AI;
+ }
+
+ return nullptr;
+}
+
+Instruction *InstCombiner::visitAllocaInst(AllocaInst &AI) {
+ if (auto *I = simplifyAllocaArraySize(*this, AI))
+ return I;
+
+ if (AI.getAllocatedType()->isSized()) {
+ // If the alignment is 0 (unspecified), assign it the preferred alignment.
+ if (AI.getAlignment() == 0)
+ AI.setAlignment(DL.getPrefTypeAlignment(AI.getAllocatedType()));
+
+ // Move all alloca's of zero byte objects to the entry block and merge them
+ // together. Note that we only do this for alloca's, because malloc should
+ // allocate and return a unique pointer, even for a zero byte allocation.
+ if (DL.getTypeAllocSize(AI.getAllocatedType()) == 0) {
+ // For a zero sized alloca there is no point in doing an array allocation.
+ // This is helpful if the array size is a complicated expression not used
+ // elsewhere.
+ if (AI.isArrayAllocation()) {
+ AI.setOperand(0, ConstantInt::get(AI.getArraySize()->getType(), 1));
+ return &AI;
+ }
+
+ // Get the first instruction in the entry block.
+ BasicBlock &EntryBlock = AI.getParent()->getParent()->getEntryBlock();
+ Instruction *FirstInst = EntryBlock.getFirstNonPHIOrDbg();
+ if (FirstInst != &AI) {
+ // If the entry block doesn't start with a zero-size alloca then move
+ // this one to the start of the entry block. There is no problem with
+ // dominance as the array size was forced to a constant earlier already.
+ AllocaInst *EntryAI = dyn_cast<AllocaInst>(FirstInst);
+ if (!EntryAI || !EntryAI->getAllocatedType()->isSized() ||
+ DL.getTypeAllocSize(EntryAI->getAllocatedType()) != 0) {
+ AI.moveBefore(FirstInst);
+ return &AI;
+ }
+
+ // If the alignment of the entry block alloca is 0 (unspecified),
+ // assign it the preferred alignment.
+ if (EntryAI->getAlignment() == 0)
+ EntryAI->setAlignment(
+ DL.getPrefTypeAlignment(EntryAI->getAllocatedType()));
+ // Replace this zero-sized alloca with the one at the start of the entry
+ // block after ensuring that the address will be aligned enough for both
+ // types.
+ unsigned MaxAlign = std::max(EntryAI->getAlignment(),
+ AI.getAlignment());
+ EntryAI->setAlignment(MaxAlign);
+ if (AI.getType() != EntryAI->getType())
+ return new BitCastInst(EntryAI, AI.getType());
+ return replaceInstUsesWith(AI, EntryAI);
+ }
+ }
+ }
+
+ if (AI.getAlignment()) {
+ // Check to see if this allocation is only modified by a memcpy/memmove from
+ // a constant global whose alignment is equal to or exceeds that of the
+ // allocation. If this is the case, we can change all users to use
+ // the constant global instead. This is commonly produced by the CFE by
+ // constructs like "void foo() { int A[] = {1,2,3,4,5,6,7,8,9...}; }" if 'A'
+ // is only subsequently read.
+ SmallVector<Instruction *, 4> ToDelete;
+ if (MemTransferInst *Copy = isOnlyCopiedFromConstantGlobal(&AI, ToDelete)) {
+ unsigned SourceAlign = getOrEnforceKnownAlignment(
+ Copy->getSource(), AI.getAlignment(), DL, &AI, AC, DT);
+ if (AI.getAlignment() <= SourceAlign) {
+ DEBUG(dbgs() << "Found alloca equal to global: " << AI << '\n');
+ DEBUG(dbgs() << " memcpy = " << *Copy << '\n');
+ for (unsigned i = 0, e = ToDelete.size(); i != e; ++i)
+ eraseInstFromFunction(*ToDelete[i]);
+ Constant *TheSrc = cast<Constant>(Copy->getSource());
+ Constant *Cast
+ = ConstantExpr::getPointerBitCastOrAddrSpaceCast(TheSrc, AI.getType());
+ Instruction *NewI = replaceInstUsesWith(AI, Cast);
+ eraseInstFromFunction(*Copy);
+ ++NumGlobalCopies;
+ return NewI;
+ }
+ }
+ }
+
+ // At last, use the generic allocation site handler to aggressively remove
+ // unused allocas.
+ return visitAllocSite(AI);
+}
+
+/// \brief Helper to combine a load to a new type.
+///
+/// This just does the work of combining a load to a new type. It handles
+/// metadata, etc., and returns the new instruction. The \c NewTy should be the
+/// loaded *value* type. This will convert it to a pointer, cast the operand to
+/// that pointer type, load it, etc.
+///
+/// Note that this will create all of the instructions with whatever insert
+/// point the \c InstCombiner currently is using.
+static LoadInst *combineLoadToNewType(InstCombiner &IC, LoadInst &LI, Type *NewTy,
+ const Twine &Suffix = "") {
+ Value *Ptr = LI.getPointerOperand();
+ unsigned AS = LI.getPointerAddressSpace();
+ SmallVector<std::pair<unsigned, MDNode *>, 8> MD;
+ LI.getAllMetadata(MD);
+
+ LoadInst *NewLoad = IC.Builder->CreateAlignedLoad(
+ IC.Builder->CreateBitCast(Ptr, NewTy->getPointerTo(AS)),
+ LI.getAlignment(), LI.isVolatile(), LI.getName() + Suffix);
+ NewLoad->setAtomic(LI.getOrdering(), LI.getSynchScope());
+ MDBuilder MDB(NewLoad->getContext());
+ for (const auto &MDPair : MD) {
+ unsigned ID = MDPair.first;
+ MDNode *N = MDPair.second;
+ // Note, essentially every kind of metadata should be preserved here! This
+ // routine is supposed to clone a load instruction changing *only its type*.
+ // The only metadata it makes sense to drop is metadata which is invalidated
+ // when the pointer type changes. This should essentially never be the case
+ // in LLVM, but we explicitly switch over only known metadata to be
+ // conservatively correct. If you are adding metadata to LLVM which pertains
+ // to loads, you almost certainly want to add it here.
+ switch (ID) {
+ case LLVMContext::MD_dbg:
+ case LLVMContext::MD_tbaa:
+ case LLVMContext::MD_prof:
+ case LLVMContext::MD_fpmath:
+ case LLVMContext::MD_tbaa_struct:
+ case LLVMContext::MD_invariant_load:
+ case LLVMContext::MD_alias_scope:
+ case LLVMContext::MD_noalias:
+ case LLVMContext::MD_nontemporal:
+ case LLVMContext::MD_mem_parallel_loop_access:
+ // All of these directly apply.
+ NewLoad->setMetadata(ID, N);
+ break;
+
+ case LLVMContext::MD_nonnull:
+ // This only directly applies if the new type is also a pointer.
+ if (NewTy->isPointerTy()) {
+ NewLoad->setMetadata(ID, N);
+ break;
+ }
+ // If it's integral now, translate it to !range metadata.
+ if (NewTy->isIntegerTy()) {
+ auto *ITy = cast<IntegerType>(NewTy);
+ auto *NullInt = ConstantExpr::getPtrToInt(
+ ConstantPointerNull::get(cast<PointerType>(Ptr->getType())), ITy);
+ auto *NonNullInt =
+ ConstantExpr::getAdd(NullInt, ConstantInt::get(ITy, 1));
+ NewLoad->setMetadata(LLVMContext::MD_range,
+ MDB.createRange(NonNullInt, NullInt));
+ }
+ break;
+ case LLVMContext::MD_align:
+ case LLVMContext::MD_dereferenceable:
+ case LLVMContext::MD_dereferenceable_or_null:
+ // These only directly apply if the new type is also a pointer.
+ if (NewTy->isPointerTy())
+ NewLoad->setMetadata(ID, N);
+ break;
+ case LLVMContext::MD_range:
+ // FIXME: It would be nice to propagate this in some way, but the type
+ // conversions make it hard. If the new type is a pointer, we could
+ // translate it to !nonnull metadata.
+ break;
+ }
+ }
+ return NewLoad;
+}
+
+/// \brief Combine a store to a new type.
+///
+/// Returns the newly created store instruction.
+static StoreInst *combineStoreToNewValue(InstCombiner &IC, StoreInst &SI, Value *V) {
+ Value *Ptr = SI.getPointerOperand();
+ unsigned AS = SI.getPointerAddressSpace();
+ SmallVector<std::pair<unsigned, MDNode *>, 8> MD;
+ SI.getAllMetadata(MD);
+
+ StoreInst *NewStore = IC.Builder->CreateAlignedStore(
+ V, IC.Builder->CreateBitCast(Ptr, V->getType()->getPointerTo(AS)),
+ SI.getAlignment(), SI.isVolatile());
+ NewStore->setAtomic(SI.getOrdering(), SI.getSynchScope());
+ for (const auto &MDPair : MD) {
+ unsigned ID = MDPair.first;
+ MDNode *N = MDPair.second;
+ // Note, essentially every kind of metadata should be preserved here! This
+ // routine is supposed to clone a store instruction changing *only its
+ // type*. The only metadata it makes sense to drop is metadata which is
+ // invalidated when the pointer type changes. This should essentially
+ // never be the case in LLVM, but we explicitly switch over only known
+ // metadata to be conservatively correct. If you are adding metadata to
+ // LLVM which pertains to stores, you almost certainly want to add it
+ // here.
+ switch (ID) {
+ case LLVMContext::MD_dbg:
+ case LLVMContext::MD_tbaa:
+ case LLVMContext::MD_prof:
+ case LLVMContext::MD_fpmath:
+ case LLVMContext::MD_tbaa_struct:
+ case LLVMContext::MD_alias_scope:
+ case LLVMContext::MD_noalias:
+ case LLVMContext::MD_nontemporal:
+ case LLVMContext::MD_mem_parallel_loop_access:
+ // All of these directly apply.
+ NewStore->setMetadata(ID, N);
+ break;
+
+ case LLVMContext::MD_invariant_load:
+ case LLVMContext::MD_nonnull:
+ case LLVMContext::MD_range:
+ case LLVMContext::MD_align:
+ case LLVMContext::MD_dereferenceable:
+ case LLVMContext::MD_dereferenceable_or_null:
+ // These don't apply for stores.
+ break;
+ }
+ }
+
+ return NewStore;
+}
+
+/// \brief Combine loads to match the type of their uses' value after looking
+/// through intervening bitcasts.
+///
+/// The core idea here is that if the result of a load is used in an operation,
+/// we should load the type most conducive to that operation. For example, when
+/// loading an integer and converting that immediately to a pointer, we should
+/// instead directly load a pointer.
+///
+/// However, this routine must never change the width of a load or the number of
+/// loads as that would introduce a semantic change. This combine is expected to
+/// be a semantic no-op which just allows loads to more closely model the types
+/// of their consuming operations.
+///
+/// Currently, we also refuse to change the precise type used for an atomic load
+/// or a volatile load. This is debatable, and might be reasonable to change
+/// later. However, it is risky in case some backend or other part of LLVM is
+/// relying on the exact type loaded to select appropriate atomic operations.
+static Instruction *combineLoadToOperationType(InstCombiner &IC, LoadInst &LI) {
+ // FIXME: We could probably with some care handle both volatile and ordered
+ // atomic loads here but it isn't clear that this is important.
+ if (!LI.isUnordered())
+ return nullptr;
+
+ if (LI.use_empty())
+ return nullptr;
+
+ Type *Ty = LI.getType();
+ const DataLayout &DL = IC.getDataLayout();
+
+ // Try to canonicalize loads which are only ever stored to operate over
+ // integers instead of any other type. We only do this when the loaded type
+ // is sized and has a size exactly the same as its store size and the store
+ // size is a legal integer type.
+ if (!Ty->isIntegerTy() && Ty->isSized() &&
+ DL.isLegalInteger(DL.getTypeStoreSizeInBits(Ty)) &&
+ DL.getTypeStoreSizeInBits(Ty) == DL.getTypeSizeInBits(Ty)) {
+ if (std::all_of(LI.user_begin(), LI.user_end(), [&LI](User *U) {
+ auto *SI = dyn_cast<StoreInst>(U);
+ return SI && SI->getPointerOperand() != &LI;
+ })) {
+ LoadInst *NewLoad = combineLoadToNewType(
+ IC, LI,
+ Type::getIntNTy(LI.getContext(), DL.getTypeStoreSizeInBits(Ty)));
+ // Replace all the stores with stores of the newly loaded value.
+ for (auto UI = LI.user_begin(), UE = LI.user_end(); UI != UE;) {
+ auto *SI = cast<StoreInst>(*UI++);
+ IC.Builder->SetInsertPoint(SI);
+ combineStoreToNewValue(IC, *SI, NewLoad);
+ IC.eraseInstFromFunction(*SI);
+ }
+ assert(LI.use_empty() && "Failed to remove all users of the load!");
+ // Return the old load so the combiner can delete it safely.
+ return &LI;
+ }
+ }
+
+ // Fold away bit casts of the loaded value by loading the desired type.
+ // We can do this for BitCastInsts as well as casts from and to pointer types,
+ // as long as those are noops (i.e., the source or dest type have the same
+ // bitwidth as the target's pointers).
+ if (LI.hasOneUse())
+ if (auto* CI = dyn_cast<CastInst>(LI.user_back())) {
+ if (CI->isNoopCast(DL)) {
+ LoadInst *NewLoad = combineLoadToNewType(IC, LI, CI->getDestTy());
+ CI->replaceAllUsesWith(NewLoad);
+ IC.eraseInstFromFunction(*CI);
+ return &LI;
+ }
+ }
+
+ // FIXME: We should also canonicalize loads of vectors when their elements are
+ // cast to other types.
+ return nullptr;
+}
+
+static Instruction *unpackLoadToAggregate(InstCombiner &IC, LoadInst &LI) {
+ // FIXME: We could probably with some care handle both volatile and atomic
+ // stores here but it isn't clear that this is important.
+ if (!LI.isSimple())
+ return nullptr;
+
+ Type *T = LI.getType();
+ if (!T->isAggregateType())
+ return nullptr;
+
+ StringRef Name = LI.getName();
+ assert(LI.getAlignment() && "Alignment must be set at this point");
+
+ if (auto *ST = dyn_cast<StructType>(T)) {
+ // If the struct only have one element, we unpack.
+ auto NumElements = ST->getNumElements();
+ if (NumElements == 1) {
+ LoadInst *NewLoad = combineLoadToNewType(IC, LI, ST->getTypeAtIndex(0U),
+ ".unpack");
+ return IC.replaceInstUsesWith(LI, IC.Builder->CreateInsertValue(
+ UndefValue::get(T), NewLoad, 0, Name));
+ }
+
+ // We don't want to break loads with padding here as we'd loose
+ // the knowledge that padding exists for the rest of the pipeline.
+ const DataLayout &DL = IC.getDataLayout();
+ auto *SL = DL.getStructLayout(ST);
+ if (SL->hasPadding())
+ return nullptr;
+
+ auto Align = LI.getAlignment();
+ if (!Align)
+ Align = DL.getABITypeAlignment(ST);
+
+ auto *Addr = LI.getPointerOperand();
+ auto *IdxType = Type::getInt32Ty(T->getContext());
+ auto *Zero = ConstantInt::get(IdxType, 0);
+
+ Value *V = UndefValue::get(T);
+ for (unsigned i = 0; i < NumElements; i++) {
+ Value *Indices[2] = {
+ Zero,
+ ConstantInt::get(IdxType, i),
+ };
+ auto *Ptr = IC.Builder->CreateInBoundsGEP(ST, Addr, makeArrayRef(Indices),
+ Name + ".elt");
+ auto EltAlign = MinAlign(Align, SL->getElementOffset(i));
+ auto *L = IC.Builder->CreateAlignedLoad(Ptr, EltAlign, Name + ".unpack");
+ V = IC.Builder->CreateInsertValue(V, L, i);
+ }
+
+ V->setName(Name);
+ return IC.replaceInstUsesWith(LI, V);
+ }
+
+ if (auto *AT = dyn_cast<ArrayType>(T)) {
+ auto *ET = AT->getElementType();
+ auto NumElements = AT->getNumElements();
+ if (NumElements == 1) {
+ LoadInst *NewLoad = combineLoadToNewType(IC, LI, ET, ".unpack");
+ return IC.replaceInstUsesWith(LI, IC.Builder->CreateInsertValue(
+ UndefValue::get(T), NewLoad, 0, Name));
+ }
+
+ // Bail out if the array is too large. Ideally we would like to optimize
+ // arrays of arbitrary size but this has a terrible impact on compile time.
+ // The threshold here is chosen arbitrarily, maybe needs a little bit of
+ // tuning.
+ if (NumElements > 1024)
+ return nullptr;
+
+ const DataLayout &DL = IC.getDataLayout();
+ auto EltSize = DL.getTypeAllocSize(ET);
+ auto Align = LI.getAlignment();
+ if (!Align)
+ Align = DL.getABITypeAlignment(T);
+
+ auto *Addr = LI.getPointerOperand();
+ auto *IdxType = Type::getInt64Ty(T->getContext());
+ auto *Zero = ConstantInt::get(IdxType, 0);
+
+ Value *V = UndefValue::get(T);
+ uint64_t Offset = 0;
+ for (uint64_t i = 0; i < NumElements; i++) {
+ Value *Indices[2] = {
+ Zero,
+ ConstantInt::get(IdxType, i),
+ };
+ auto *Ptr = IC.Builder->CreateInBoundsGEP(AT, Addr, makeArrayRef(Indices),
+ Name + ".elt");
+ auto *L = IC.Builder->CreateAlignedLoad(Ptr, MinAlign(Align, Offset),
+ Name + ".unpack");
+ V = IC.Builder->CreateInsertValue(V, L, i);
+ Offset += EltSize;
+ }
+
+ V->setName(Name);
+ return IC.replaceInstUsesWith(LI, V);
+ }
+
+ return nullptr;
+}
+
+// If we can determine that all possible objects pointed to by the provided
+// pointer value are, not only dereferenceable, but also definitively less than
+// or equal to the provided maximum size, then return true. Otherwise, return
+// false (constant global values and allocas fall into this category).
+//
+// FIXME: This should probably live in ValueTracking (or similar).
+static bool isObjectSizeLessThanOrEq(Value *V, uint64_t MaxSize,
+ const DataLayout &DL) {
+ SmallPtrSet<Value *, 4> Visited;
+ SmallVector<Value *, 4> Worklist(1, V);
+
+ do {
+ Value *P = Worklist.pop_back_val();
+ P = P->stripPointerCasts();
+
+ if (!Visited.insert(P).second)
+ continue;
+
+ if (SelectInst *SI = dyn_cast<SelectInst>(P)) {
+ Worklist.push_back(SI->getTrueValue());
+ Worklist.push_back(SI->getFalseValue());
+ continue;
+ }
+
+ if (PHINode *PN = dyn_cast<PHINode>(P)) {
+ for (Value *IncValue : PN->incoming_values())
+ Worklist.push_back(IncValue);
+ continue;
+ }
+
+ if (GlobalAlias *GA = dyn_cast<GlobalAlias>(P)) {
+ if (GA->isInterposable())
+ return false;
+ Worklist.push_back(GA->getAliasee());
+ continue;
+ }
+
+ // If we know how big this object is, and it is less than MaxSize, continue
+ // searching. Otherwise, return false.
+ if (AllocaInst *AI = dyn_cast<AllocaInst>(P)) {
+ if (!AI->getAllocatedType()->isSized())
+ return false;
+
+ ConstantInt *CS = dyn_cast<ConstantInt>(AI->getArraySize());
+ if (!CS)
+ return false;
+
+ uint64_t TypeSize = DL.getTypeAllocSize(AI->getAllocatedType());
+ // Make sure that, even if the multiplication below would wrap as an
+ // uint64_t, we still do the right thing.
+ if ((CS->getValue().zextOrSelf(128)*APInt(128, TypeSize)).ugt(MaxSize))
+ return false;
+ continue;
+ }
+
+ if (GlobalVariable *GV = dyn_cast<GlobalVariable>(P)) {
+ if (!GV->hasDefinitiveInitializer() || !GV->isConstant())
+ return false;
+
+ uint64_t InitSize = DL.getTypeAllocSize(GV->getValueType());
+ if (InitSize > MaxSize)
+ return false;
+ continue;
+ }
+
+ return false;
+ } while (!Worklist.empty());
+
+ return true;
+}
+
+// If we're indexing into an object of a known size, and the outer index is
+// not a constant, but having any value but zero would lead to undefined
+// behavior, replace it with zero.
+//
+// For example, if we have:
+// @f.a = private unnamed_addr constant [1 x i32] [i32 12], align 4
+// ...
+// %arrayidx = getelementptr inbounds [1 x i32]* @f.a, i64 0, i64 %x
+// ... = load i32* %arrayidx, align 4
+// Then we know that we can replace %x in the GEP with i64 0.
+//
+// FIXME: We could fold any GEP index to zero that would cause UB if it were
+// not zero. Currently, we only handle the first such index. Also, we could
+// also search through non-zero constant indices if we kept track of the
+// offsets those indices implied.
+static bool canReplaceGEPIdxWithZero(InstCombiner &IC, GetElementPtrInst *GEPI,
+ Instruction *MemI, unsigned &Idx) {
+ if (GEPI->getNumOperands() < 2)
+ return false;
+
+ // Find the first non-zero index of a GEP. If all indices are zero, return
+ // one past the last index.
+ auto FirstNZIdx = [](const GetElementPtrInst *GEPI) {
+ unsigned I = 1;
+ for (unsigned IE = GEPI->getNumOperands(); I != IE; ++I) {
+ Value *V = GEPI->getOperand(I);
+ if (const ConstantInt *CI = dyn_cast<ConstantInt>(V))
+ if (CI->isZero())
+ continue;
+
+ break;
+ }
+
+ return I;
+ };
+
+ // Skip through initial 'zero' indices, and find the corresponding pointer
+ // type. See if the next index is not a constant.
+ Idx = FirstNZIdx(GEPI);
+ if (Idx == GEPI->getNumOperands())
+ return false;
+ if (isa<Constant>(GEPI->getOperand(Idx)))
+ return false;
+
+ SmallVector<Value *, 4> Ops(GEPI->idx_begin(), GEPI->idx_begin() + Idx);
+ Type *AllocTy =
+ GetElementPtrInst::getIndexedType(GEPI->getSourceElementType(), Ops);
+ if (!AllocTy || !AllocTy->isSized())
+ return false;
+ const DataLayout &DL = IC.getDataLayout();
+ uint64_t TyAllocSize = DL.getTypeAllocSize(AllocTy);
+
+ // If there are more indices after the one we might replace with a zero, make
+ // sure they're all non-negative. If any of them are negative, the overall
+ // address being computed might be before the base address determined by the
+ // first non-zero index.
+ auto IsAllNonNegative = [&]() {
+ for (unsigned i = Idx+1, e = GEPI->getNumOperands(); i != e; ++i) {
+ bool KnownNonNegative, KnownNegative;
+ IC.ComputeSignBit(GEPI->getOperand(i), KnownNonNegative,
+ KnownNegative, 0, MemI);
+ if (KnownNonNegative)
+ continue;
+ return false;
+ }
+
+ return true;
+ };
+
+ // FIXME: If the GEP is not inbounds, and there are extra indices after the
+ // one we'll replace, those could cause the address computation to wrap
+ // (rendering the IsAllNonNegative() check below insufficient). We can do
+ // better, ignoring zero indices (and other indices we can prove small
+ // enough not to wrap).
+ if (Idx+1 != GEPI->getNumOperands() && !GEPI->isInBounds())
+ return false;
+
+ // Note that isObjectSizeLessThanOrEq will return true only if the pointer is
+ // also known to be dereferenceable.
+ return isObjectSizeLessThanOrEq(GEPI->getOperand(0), TyAllocSize, DL) &&
+ IsAllNonNegative();
+}
+
+// If we're indexing into an object with a variable index for the memory
+// access, but the object has only one element, we can assume that the index
+// will always be zero. If we replace the GEP, return it.
+template <typename T>
+static Instruction *replaceGEPIdxWithZero(InstCombiner &IC, Value *Ptr,
+ T &MemI) {
+ if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(Ptr)) {
+ unsigned Idx;
+ if (canReplaceGEPIdxWithZero(IC, GEPI, &MemI, Idx)) {
+ Instruction *NewGEPI = GEPI->clone();
+ NewGEPI->setOperand(Idx,
+ ConstantInt::get(GEPI->getOperand(Idx)->getType(), 0));
+ NewGEPI->insertBefore(GEPI);
+ MemI.setOperand(MemI.getPointerOperandIndex(), NewGEPI);
+ return NewGEPI;
+ }
+ }
+
+ return nullptr;
+}
+
+Instruction *InstCombiner::visitLoadInst(LoadInst &LI) {
+ Value *Op = LI.getOperand(0);
+
+ // Try to canonicalize the loaded type.
+ if (Instruction *Res = combineLoadToOperationType(*this, LI))
+ return Res;
+
+ // Attempt to improve the alignment.
+ unsigned KnownAlign = getOrEnforceKnownAlignment(
+ Op, DL.getPrefTypeAlignment(LI.getType()), DL, &LI, AC, DT);
+ unsigned LoadAlign = LI.getAlignment();
+ unsigned EffectiveLoadAlign =
+ LoadAlign != 0 ? LoadAlign : DL.getABITypeAlignment(LI.getType());
+
+ if (KnownAlign > EffectiveLoadAlign)
+ LI.setAlignment(KnownAlign);
+ else if (LoadAlign == 0)
+ LI.setAlignment(EffectiveLoadAlign);
+
+ // Replace GEP indices if possible.
+ if (Instruction *NewGEPI = replaceGEPIdxWithZero(*this, Op, LI)) {
+ Worklist.Add(NewGEPI);
+ return &LI;
+ }
+
+ if (Instruction *Res = unpackLoadToAggregate(*this, LI))
+ return Res;
+
+ // Do really simple store-to-load forwarding and load CSE, to catch cases
+ // where there are several consecutive memory accesses to the same location,
+ // separated by a few arithmetic operations.
+ BasicBlock::iterator BBI(LI);
+ AAMDNodes AATags;
+ bool IsLoadCSE = false;
+ if (Value *AvailableVal =
+ FindAvailableLoadedValue(&LI, LI.getParent(), BBI,
+ DefMaxInstsToScan, AA, &AATags, &IsLoadCSE)) {
+ if (IsLoadCSE) {
+ LoadInst *NLI = cast<LoadInst>(AvailableVal);
+ unsigned KnownIDs[] = {
+ LLVMContext::MD_tbaa, LLVMContext::MD_alias_scope,
+ LLVMContext::MD_noalias, LLVMContext::MD_range,
+ LLVMContext::MD_invariant_load, LLVMContext::MD_nonnull,
+ LLVMContext::MD_invariant_group, LLVMContext::MD_align,
+ LLVMContext::MD_dereferenceable,
+ LLVMContext::MD_dereferenceable_or_null};
+ combineMetadata(NLI, &LI, KnownIDs);
+ };
+
+ return replaceInstUsesWith(
+ LI, Builder->CreateBitOrPointerCast(AvailableVal, LI.getType(),
+ LI.getName() + ".cast"));
+ }
+
+ // None of the following transforms are legal for volatile/ordered atomic
+ // loads. Most of them do apply for unordered atomics.
+ if (!LI.isUnordered()) return nullptr;
+
+ // load(gep null, ...) -> unreachable
+ if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(Op)) {
+ const Value *GEPI0 = GEPI->getOperand(0);
+ // TODO: Consider a target hook for valid address spaces for this xform.
+ if (isa<ConstantPointerNull>(GEPI0) && GEPI->getPointerAddressSpace() == 0){
+ // Insert a new store to null instruction before the load to indicate
+ // that this code is not reachable. We do this instead of inserting
+ // an unreachable instruction directly because we cannot modify the
+ // CFG.
+ new StoreInst(UndefValue::get(LI.getType()),
+ Constant::getNullValue(Op->getType()), &LI);
+ return replaceInstUsesWith(LI, UndefValue::get(LI.getType()));
+ }
+ }
+
+ // load null/undef -> unreachable
+ // TODO: Consider a target hook for valid address spaces for this xform.
+ if (isa<UndefValue>(Op) ||
+ (isa<ConstantPointerNull>(Op) && LI.getPointerAddressSpace() == 0)) {
+ // Insert a new store to null instruction before the load to indicate that
+ // this code is not reachable. We do this instead of inserting an
+ // unreachable instruction directly because we cannot modify the CFG.
+ new StoreInst(UndefValue::get(LI.getType()),
+ Constant::getNullValue(Op->getType()), &LI);
+ return replaceInstUsesWith(LI, UndefValue::get(LI.getType()));
+ }
+
+ if (Op->hasOneUse()) {
+ // Change select and PHI nodes to select values instead of addresses: this
+ // helps alias analysis out a lot, allows many others simplifications, and
+ // exposes redundancy in the code.
+ //
+ // Note that we cannot do the transformation unless we know that the
+ // introduced loads cannot trap! Something like this is valid as long as
+ // the condition is always false: load (select bool %C, int* null, int* %G),
+ // but it would not be valid if we transformed it to load from null
+ // unconditionally.
+ //
+ if (SelectInst *SI = dyn_cast<SelectInst>(Op)) {
+ // load (select (Cond, &V1, &V2)) --> select(Cond, load &V1, load &V2).
+ unsigned Align = LI.getAlignment();
+ if (isSafeToLoadUnconditionally(SI->getOperand(1), Align, DL, SI) &&
+ isSafeToLoadUnconditionally(SI->getOperand(2), Align, DL, SI)) {
+ LoadInst *V1 = Builder->CreateLoad(SI->getOperand(1),
+ SI->getOperand(1)->getName()+".val");
+ LoadInst *V2 = Builder->CreateLoad(SI->getOperand(2),
+ SI->getOperand(2)->getName()+".val");
+ assert(LI.isUnordered() && "implied by above");
+ V1->setAlignment(Align);
+ V1->setAtomic(LI.getOrdering(), LI.getSynchScope());
+ V2->setAlignment(Align);
+ V2->setAtomic(LI.getOrdering(), LI.getSynchScope());
+ return SelectInst::Create(SI->getCondition(), V1, V2);
+ }
+
+ // load (select (cond, null, P)) -> load P
+ if (isa<ConstantPointerNull>(SI->getOperand(1)) &&
+ LI.getPointerAddressSpace() == 0) {
+ LI.setOperand(0, SI->getOperand(2));
+ return &LI;
+ }
+
+ // load (select (cond, P, null)) -> load P
+ if (isa<ConstantPointerNull>(SI->getOperand(2)) &&
+ LI.getPointerAddressSpace() == 0) {
+ LI.setOperand(0, SI->getOperand(1));
+ return &LI;
+ }
+ }
+ }
+ return nullptr;
+}
+
+/// \brief Look for extractelement/insertvalue sequence that acts like a bitcast.
+///
+/// \returns underlying value that was "cast", or nullptr otherwise.
+///
+/// For example, if we have:
+///
+/// %E0 = extractelement <2 x double> %U, i32 0
+/// %V0 = insertvalue [2 x double] undef, double %E0, 0
+/// %E1 = extractelement <2 x double> %U, i32 1
+/// %V1 = insertvalue [2 x double] %V0, double %E1, 1
+///
+/// and the layout of a <2 x double> is isomorphic to a [2 x double],
+/// then %V1 can be safely approximated by a conceptual "bitcast" of %U.
+/// Note that %U may contain non-undef values where %V1 has undef.
+static Value *likeBitCastFromVector(InstCombiner &IC, Value *V) {
+ Value *U = nullptr;
+ while (auto *IV = dyn_cast<InsertValueInst>(V)) {
+ auto *E = dyn_cast<ExtractElementInst>(IV->getInsertedValueOperand());
+ if (!E)
+ return nullptr;
+ auto *W = E->getVectorOperand();
+ if (!U)
+ U = W;
+ else if (U != W)
+ return nullptr;
+ auto *CI = dyn_cast<ConstantInt>(E->getIndexOperand());
+ if (!CI || IV->getNumIndices() != 1 || CI->getZExtValue() != *IV->idx_begin())
+ return nullptr;
+ V = IV->getAggregateOperand();
+ }
+ if (!isa<UndefValue>(V) ||!U)
+ return nullptr;
+
+ auto *UT = cast<VectorType>(U->getType());
+ auto *VT = V->getType();
+ // Check that types UT and VT are bitwise isomorphic.
+ const auto &DL = IC.getDataLayout();
+ if (DL.getTypeStoreSizeInBits(UT) != DL.getTypeStoreSizeInBits(VT)) {
+ return nullptr;
+ }
+ if (auto *AT = dyn_cast<ArrayType>(VT)) {
+ if (AT->getNumElements() != UT->getNumElements())
+ return nullptr;
+ } else {
+ auto *ST = cast<StructType>(VT);
+ if (ST->getNumElements() != UT->getNumElements())
+ return nullptr;
+ for (const auto *EltT : ST->elements()) {
+ if (EltT != UT->getElementType())
+ return nullptr;
+ }
+ }
+ return U;
+}
+
+/// \brief Combine stores to match the type of value being stored.
+///
+/// The core idea here is that the memory does not have any intrinsic type and
+/// where we can we should match the type of a store to the type of value being
+/// stored.
+///
+/// However, this routine must never change the width of a store or the number of
+/// stores as that would introduce a semantic change. This combine is expected to
+/// be a semantic no-op which just allows stores to more closely model the types
+/// of their incoming values.
+///
+/// Currently, we also refuse to change the precise type used for an atomic or
+/// volatile store. This is debatable, and might be reasonable to change later.
+/// However, it is risky in case some backend or other part of LLVM is relying
+/// on the exact type stored to select appropriate atomic operations.
+///
+/// \returns true if the store was successfully combined away. This indicates
+/// the caller must erase the store instruction. We have to let the caller erase
+/// the store instruction as otherwise there is no way to signal whether it was
+/// combined or not: IC.EraseInstFromFunction returns a null pointer.
+static bool combineStoreToValueType(InstCombiner &IC, StoreInst &SI) {
+ // FIXME: We could probably with some care handle both volatile and ordered
+ // atomic stores here but it isn't clear that this is important.
+ if (!SI.isUnordered())
+ return false;
+
+ Value *V = SI.getValueOperand();
+
+ // Fold away bit casts of the stored value by storing the original type.
+ if (auto *BC = dyn_cast<BitCastInst>(V)) {
+ V = BC->getOperand(0);
+ combineStoreToNewValue(IC, SI, V);
+ return true;
+ }
+
+ if (Value *U = likeBitCastFromVector(IC, V)) {
+ combineStoreToNewValue(IC, SI, U);
+ return true;
+ }
+
+ // FIXME: We should also canonicalize stores of vectors when their elements
+ // are cast to other types.
+ return false;
+}
+
+static bool unpackStoreToAggregate(InstCombiner &IC, StoreInst &SI) {
+ // FIXME: We could probably with some care handle both volatile and atomic
+ // stores here but it isn't clear that this is important.
+ if (!SI.isSimple())
+ return false;
+
+ Value *V = SI.getValueOperand();
+ Type *T = V->getType();
+
+ if (!T->isAggregateType())
+ return false;
+
+ if (auto *ST = dyn_cast<StructType>(T)) {
+ // If the struct only have one element, we unpack.
+ unsigned Count = ST->getNumElements();
+ if (Count == 1) {
+ V = IC.Builder->CreateExtractValue(V, 0);
+ combineStoreToNewValue(IC, SI, V);
+ return true;
+ }
+
+ // We don't want to break loads with padding here as we'd loose
+ // the knowledge that padding exists for the rest of the pipeline.
+ const DataLayout &DL = IC.getDataLayout();
+ auto *SL = DL.getStructLayout(ST);
+ if (SL->hasPadding())
+ return false;
+
+ auto Align = SI.getAlignment();
+ if (!Align)
+ Align = DL.getABITypeAlignment(ST);
+
+ SmallString<16> EltName = V->getName();
+ EltName += ".elt";
+ auto *Addr = SI.getPointerOperand();
+ SmallString<16> AddrName = Addr->getName();
+ AddrName += ".repack";
+
+ auto *IdxType = Type::getInt32Ty(ST->getContext());
+ auto *Zero = ConstantInt::get(IdxType, 0);
+ for (unsigned i = 0; i < Count; i++) {
+ Value *Indices[2] = {
+ Zero,
+ ConstantInt::get(IdxType, i),
+ };
+ auto *Ptr = IC.Builder->CreateInBoundsGEP(ST, Addr, makeArrayRef(Indices),
+ AddrName);
+ auto *Val = IC.Builder->CreateExtractValue(V, i, EltName);
+ auto EltAlign = MinAlign(Align, SL->getElementOffset(i));
+ IC.Builder->CreateAlignedStore(Val, Ptr, EltAlign);
+ }
+
+ return true;
+ }
+
+ if (auto *AT = dyn_cast<ArrayType>(T)) {
+ // If the array only have one element, we unpack.
+ auto NumElements = AT->getNumElements();
+ if (NumElements == 1) {
+ V = IC.Builder->CreateExtractValue(V, 0);
+ combineStoreToNewValue(IC, SI, V);
+ return true;
+ }
+
+ // Bail out if the array is too large. Ideally we would like to optimize
+ // arrays of arbitrary size but this has a terrible impact on compile time.
+ // The threshold here is chosen arbitrarily, maybe needs a little bit of
+ // tuning.
+ if (NumElements > 1024)
+ return false;
+
+ const DataLayout &DL = IC.getDataLayout();
+ auto EltSize = DL.getTypeAllocSize(AT->getElementType());
+ auto Align = SI.getAlignment();
+ if (!Align)
+ Align = DL.getABITypeAlignment(T);
+
+ SmallString<16> EltName = V->getName();
+ EltName += ".elt";
+ auto *Addr = SI.getPointerOperand();
+ SmallString<16> AddrName = Addr->getName();
+ AddrName += ".repack";
+
+ auto *IdxType = Type::getInt64Ty(T->getContext());
+ auto *Zero = ConstantInt::get(IdxType, 0);
+
+ uint64_t Offset = 0;
+ for (uint64_t i = 0; i < NumElements; i++) {
+ Value *Indices[2] = {
+ Zero,
+ ConstantInt::get(IdxType, i),
+ };
+ auto *Ptr = IC.Builder->CreateInBoundsGEP(AT, Addr, makeArrayRef(Indices),
+ AddrName);
+ auto *Val = IC.Builder->CreateExtractValue(V, i, EltName);
+ auto EltAlign = MinAlign(Align, Offset);
+ IC.Builder->CreateAlignedStore(Val, Ptr, EltAlign);
+ Offset += EltSize;
+ }
+
+ return true;
+ }
+
+ return false;
+}
+
+/// equivalentAddressValues - Test if A and B will obviously have the same
+/// value. This includes recognizing that %t0 and %t1 will have the same
+/// value in code like this:
+/// %t0 = getelementptr \@a, 0, 3
+/// store i32 0, i32* %t0
+/// %t1 = getelementptr \@a, 0, 3
+/// %t2 = load i32* %t1
+///
+static bool equivalentAddressValues(Value *A, Value *B) {
+ // Test if the values are trivially equivalent.
+ if (A == B) return true;
+
+ // Test if the values come form identical arithmetic instructions.
+ // This uses isIdenticalToWhenDefined instead of isIdenticalTo because
+ // its only used to compare two uses within the same basic block, which
+ // means that they'll always either have the same value or one of them
+ // will have an undefined value.
+ if (isa<BinaryOperator>(A) ||
+ isa<CastInst>(A) ||
+ isa<PHINode>(A) ||
+ isa<GetElementPtrInst>(A))
+ if (Instruction *BI = dyn_cast<Instruction>(B))
+ if (cast<Instruction>(A)->isIdenticalToWhenDefined(BI))
+ return true;
+
+ // Otherwise they may not be equivalent.
+ return false;
+}
+
+Instruction *InstCombiner::visitStoreInst(StoreInst &SI) {
+ Value *Val = SI.getOperand(0);
+ Value *Ptr = SI.getOperand(1);
+
+ // Try to canonicalize the stored type.
+ if (combineStoreToValueType(*this, SI))
+ return eraseInstFromFunction(SI);
+
+ // Attempt to improve the alignment.
+ unsigned KnownAlign = getOrEnforceKnownAlignment(
+ Ptr, DL.getPrefTypeAlignment(Val->getType()), DL, &SI, AC, DT);
+ unsigned StoreAlign = SI.getAlignment();
+ unsigned EffectiveStoreAlign =
+ StoreAlign != 0 ? StoreAlign : DL.getABITypeAlignment(Val->getType());
+
+ if (KnownAlign > EffectiveStoreAlign)
+ SI.setAlignment(KnownAlign);
+ else if (StoreAlign == 0)
+ SI.setAlignment(EffectiveStoreAlign);
+
+ // Try to canonicalize the stored type.
+ if (unpackStoreToAggregate(*this, SI))
+ return eraseInstFromFunction(SI);
+
+ // Replace GEP indices if possible.
+ if (Instruction *NewGEPI = replaceGEPIdxWithZero(*this, Ptr, SI)) {
+ Worklist.Add(NewGEPI);
+ return &SI;
+ }
+
+ // Don't hack volatile/ordered stores.
+ // FIXME: Some bits are legal for ordered atomic stores; needs refactoring.
+ if (!SI.isUnordered()) return nullptr;
+
+ // If the RHS is an alloca with a single use, zapify the store, making the
+ // alloca dead.
+ if (Ptr->hasOneUse()) {
+ if (isa<AllocaInst>(Ptr))
+ return eraseInstFromFunction(SI);
+ if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Ptr)) {
+ if (isa<AllocaInst>(GEP->getOperand(0))) {
+ if (GEP->getOperand(0)->hasOneUse())
+ return eraseInstFromFunction(SI);
+ }
+ }
+ }
+
+ // Do really simple DSE, to catch cases where there are several consecutive
+ // stores to the same location, separated by a few arithmetic operations. This
+ // situation often occurs with bitfield accesses.
+ BasicBlock::iterator BBI(SI);
+ for (unsigned ScanInsts = 6; BBI != SI.getParent()->begin() && ScanInsts;
+ --ScanInsts) {
+ --BBI;
+ // Don't count debug info directives, lest they affect codegen,
+ // and we skip pointer-to-pointer bitcasts, which are NOPs.
+ if (isa<DbgInfoIntrinsic>(BBI) ||
+ (isa<BitCastInst>(BBI) && BBI->getType()->isPointerTy())) {
+ ScanInsts++;
+ continue;
+ }
+
+ if (StoreInst *PrevSI = dyn_cast<StoreInst>(BBI)) {
+ // Prev store isn't volatile, and stores to the same location?
+ if (PrevSI->isUnordered() && equivalentAddressValues(PrevSI->getOperand(1),
+ SI.getOperand(1))) {
+ ++NumDeadStore;
+ ++BBI;
+ eraseInstFromFunction(*PrevSI);
+ continue;
+ }
+ break;
+ }
+
+ // If this is a load, we have to stop. However, if the loaded value is from
+ // the pointer we're loading and is producing the pointer we're storing,
+ // then *this* store is dead (X = load P; store X -> P).
+ if (LoadInst *LI = dyn_cast<LoadInst>(BBI)) {
+ if (LI == Val && equivalentAddressValues(LI->getOperand(0), Ptr)) {
+ assert(SI.isUnordered() && "can't eliminate ordering operation");
+ return eraseInstFromFunction(SI);
+ }
+
+ // Otherwise, this is a load from some other location. Stores before it
+ // may not be dead.
+ break;
+ }
+
+ // Don't skip over loads or things that can modify memory.
+ if (BBI->mayWriteToMemory() || BBI->mayReadFromMemory())
+ break;
+ }
+
+ // store X, null -> turns into 'unreachable' in SimplifyCFG
+ if (isa<ConstantPointerNull>(Ptr) && SI.getPointerAddressSpace() == 0) {
+ if (!isa<UndefValue>(Val)) {
+ SI.setOperand(0, UndefValue::get(Val->getType()));
+ if (Instruction *U = dyn_cast<Instruction>(Val))
+ Worklist.Add(U); // Dropped a use.
+ }
+ return nullptr; // Do not modify these!
+ }
+
+ // store undef, Ptr -> noop
+ if (isa<UndefValue>(Val))
+ return eraseInstFromFunction(SI);
+
+ // If this store is the last instruction in the basic block (possibly
+ // excepting debug info instructions), and if the block ends with an
+ // unconditional branch, try to move it to the successor block.
+ BBI = SI.getIterator();
+ do {
+ ++BBI;
+ } while (isa<DbgInfoIntrinsic>(BBI) ||
+ (isa<BitCastInst>(BBI) && BBI->getType()->isPointerTy()));
+ if (BranchInst *BI = dyn_cast<BranchInst>(BBI))
+ if (BI->isUnconditional())
+ if (SimplifyStoreAtEndOfBlock(SI))
+ return nullptr; // xform done!
+
+ return nullptr;
+}
+
+/// SimplifyStoreAtEndOfBlock - Turn things like:
+/// if () { *P = v1; } else { *P = v2 }
+/// into a phi node with a store in the successor.
+///
+/// Simplify things like:
+/// *P = v1; if () { *P = v2; }
+/// into a phi node with a store in the successor.
+///
+bool InstCombiner::SimplifyStoreAtEndOfBlock(StoreInst &SI) {
+ assert(SI.isUnordered() &&
+ "this code has not been auditted for volatile or ordered store case");
+
+ BasicBlock *StoreBB = SI.getParent();
+
+ // Check to see if the successor block has exactly two incoming edges. If
+ // so, see if the other predecessor contains a store to the same location.
+ // if so, insert a PHI node (if needed) and move the stores down.
+ BasicBlock *DestBB = StoreBB->getTerminator()->getSuccessor(0);
+
+ // Determine whether Dest has exactly two predecessors and, if so, compute
+ // the other predecessor.
+ pred_iterator PI = pred_begin(DestBB);
+ BasicBlock *P = *PI;
+ BasicBlock *OtherBB = nullptr;
+
+ if (P != StoreBB)
+ OtherBB = P;
+
+ if (++PI == pred_end(DestBB))
+ return false;
+
+ P = *PI;
+ if (P != StoreBB) {
+ if (OtherBB)
+ return false;
+ OtherBB = P;
+ }
+ if (++PI != pred_end(DestBB))
+ return false;
+
+ // Bail out if all the relevant blocks aren't distinct (this can happen,
+ // for example, if SI is in an infinite loop)
+ if (StoreBB == DestBB || OtherBB == DestBB)
+ return false;
+
+ // Verify that the other block ends in a branch and is not otherwise empty.
+ BasicBlock::iterator BBI(OtherBB->getTerminator());
+ BranchInst *OtherBr = dyn_cast<BranchInst>(BBI);
+ if (!OtherBr || BBI == OtherBB->begin())
+ return false;
+
+ // If the other block ends in an unconditional branch, check for the 'if then
+ // else' case. there is an instruction before the branch.
+ StoreInst *OtherStore = nullptr;
+ if (OtherBr->isUnconditional()) {
+ --BBI;
+ // Skip over debugging info.
+ while (isa<DbgInfoIntrinsic>(BBI) ||
+ (isa<BitCastInst>(BBI) && BBI->getType()->isPointerTy())) {
+ if (BBI==OtherBB->begin())
+ return false;
+ --BBI;
+ }
+ // If this isn't a store, isn't a store to the same location, or is not the
+ // right kind of store, bail out.
+ OtherStore = dyn_cast<StoreInst>(BBI);
+ if (!OtherStore || OtherStore->getOperand(1) != SI.getOperand(1) ||
+ !SI.isSameOperationAs(OtherStore))
+ return false;
+ } else {
+ // Otherwise, the other block ended with a conditional branch. If one of the
+ // destinations is StoreBB, then we have the if/then case.
+ if (OtherBr->getSuccessor(0) != StoreBB &&
+ OtherBr->getSuccessor(1) != StoreBB)
+ return false;
+
+ // Okay, we know that OtherBr now goes to Dest and StoreBB, so this is an
+ // if/then triangle. See if there is a store to the same ptr as SI that
+ // lives in OtherBB.
+ for (;; --BBI) {
+ // Check to see if we find the matching store.
+ if ((OtherStore = dyn_cast<StoreInst>(BBI))) {
+ if (OtherStore->getOperand(1) != SI.getOperand(1) ||
+ !SI.isSameOperationAs(OtherStore))
+ return false;
+ break;
+ }
+ // If we find something that may be using or overwriting the stored
+ // value, or if we run out of instructions, we can't do the xform.
+ if (BBI->mayReadFromMemory() || BBI->mayWriteToMemory() ||
+ BBI == OtherBB->begin())
+ return false;
+ }
+
+ // In order to eliminate the store in OtherBr, we have to
+ // make sure nothing reads or overwrites the stored value in
+ // StoreBB.
+ for (BasicBlock::iterator I = StoreBB->begin(); &*I != &SI; ++I) {
+ // FIXME: This should really be AA driven.
+ if (I->mayReadFromMemory() || I->mayWriteToMemory())
+ return false;
+ }
+ }
+
+ // Insert a PHI node now if we need it.
+ Value *MergedVal = OtherStore->getOperand(0);
+ if (MergedVal != SI.getOperand(0)) {
+ PHINode *PN = PHINode::Create(MergedVal->getType(), 2, "storemerge");
+ PN->addIncoming(SI.getOperand(0), SI.getParent());
+ PN->addIncoming(OtherStore->getOperand(0), OtherBB);
+ MergedVal = InsertNewInstBefore(PN, DestBB->front());
+ }
+
+ // Advance to a place where it is safe to insert the new store and
+ // insert it.
+ BBI = DestBB->getFirstInsertionPt();
+ StoreInst *NewSI = new StoreInst(MergedVal, SI.getOperand(1),
+ SI.isVolatile(),
+ SI.getAlignment(),
+ SI.getOrdering(),
+ SI.getSynchScope());
+ InsertNewInstBefore(NewSI, *BBI);
+ NewSI->setDebugLoc(OtherStore->getDebugLoc());
+
+ // If the two stores had AA tags, merge them.
+ AAMDNodes AATags;
+ SI.getAAMetadata(AATags);
+ if (AATags) {
+ OtherStore->getAAMetadata(AATags, /* Merge = */ true);
+ NewSI->setAAMetadata(AATags);
+ }
+
+ // Nuke the old stores.
+ eraseInstFromFunction(SI);
+ eraseInstFromFunction(*OtherStore);
+ return true;
+}
diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
new file mode 100644
index 000000000000..788097f33f12
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
@@ -0,0 +1,1543 @@
+//===- InstCombineMulDivRem.cpp -------------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the visit functions for mul, fmul, sdiv, udiv, fdiv,
+// srem, urem, frem.
+//
+//===----------------------------------------------------------------------===//
+
+#include "InstCombineInternal.h"
+#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/PatternMatch.h"
+using namespace llvm;
+using namespace PatternMatch;
+
+#define DEBUG_TYPE "instcombine"
+
+
+/// The specific integer value is used in a context where it is known to be
+/// non-zero. If this allows us to simplify the computation, do so and return
+/// the new operand, otherwise return null.
+static Value *simplifyValueKnownNonZero(Value *V, InstCombiner &IC,
+ Instruction &CxtI) {
+ // If V has multiple uses, then we would have to do more analysis to determine
+ // if this is safe. For example, the use could be in dynamically unreached
+ // code.
+ if (!V->hasOneUse()) return nullptr;
+
+ bool MadeChange = false;
+
+ // ((1 << A) >>u B) --> (1 << (A-B))
+ // Because V cannot be zero, we know that B is less than A.
+ Value *A = nullptr, *B = nullptr, *One = nullptr;
+ if (match(V, m_LShr(m_OneUse(m_Shl(m_Value(One), m_Value(A))), m_Value(B))) &&
+ match(One, m_One())) {
+ A = IC.Builder->CreateSub(A, B);
+ return IC.Builder->CreateShl(One, A);
+ }
+
+ // (PowerOfTwo >>u B) --> isExact since shifting out the result would make it
+ // inexact. Similarly for <<.
+ BinaryOperator *I = dyn_cast<BinaryOperator>(V);
+ if (I && I->isLogicalShift() &&
+ isKnownToBeAPowerOfTwo(I->getOperand(0), IC.getDataLayout(), false, 0,
+ IC.getAssumptionCache(), &CxtI,
+ IC.getDominatorTree())) {
+ // We know that this is an exact/nuw shift and that the input is a
+ // non-zero context as well.
+ if (Value *V2 = simplifyValueKnownNonZero(I->getOperand(0), IC, CxtI)) {
+ I->setOperand(0, V2);
+ MadeChange = true;
+ }
+
+ if (I->getOpcode() == Instruction::LShr && !I->isExact()) {
+ I->setIsExact();
+ MadeChange = true;
+ }
+
+ if (I->getOpcode() == Instruction::Shl && !I->hasNoUnsignedWrap()) {
+ I->setHasNoUnsignedWrap();
+ MadeChange = true;
+ }
+ }
+
+ // TODO: Lots more we could do here:
+ // If V is a phi node, we can call this on each of its operands.
+ // "select cond, X, 0" can simplify to "X".
+
+ return MadeChange ? V : nullptr;
+}
+
+
+/// True if the multiply can not be expressed in an int this size.
+static bool MultiplyOverflows(const APInt &C1, const APInt &C2, APInt &Product,
+ bool IsSigned) {
+ bool Overflow;
+ if (IsSigned)
+ Product = C1.smul_ov(C2, Overflow);
+ else
+ Product = C1.umul_ov(C2, Overflow);
+
+ return Overflow;
+}
+
+/// \brief True if C2 is a multiple of C1. Quotient contains C2/C1.
+static bool IsMultiple(const APInt &C1, const APInt &C2, APInt &Quotient,
+ bool IsSigned) {
+ assert(C1.getBitWidth() == C2.getBitWidth() &&
+ "Inconsistent width of constants!");
+
+ // Bail if we will divide by zero.
+ if (C2.isMinValue())
+ return false;
+
+ // Bail if we would divide INT_MIN by -1.
+ if (IsSigned && C1.isMinSignedValue() && C2.isAllOnesValue())
+ return false;
+
+ APInt Remainder(C1.getBitWidth(), /*Val=*/0ULL, IsSigned);
+ if (IsSigned)
+ APInt::sdivrem(C1, C2, Quotient, Remainder);
+ else
+ APInt::udivrem(C1, C2, Quotient, Remainder);
+
+ return Remainder.isMinValue();
+}
+
+/// \brief A helper routine of InstCombiner::visitMul().
+///
+/// If C is a vector of known powers of 2, then this function returns
+/// a new vector obtained from C replacing each element with its logBase2.
+/// Return a null pointer otherwise.
+static Constant *getLogBase2Vector(ConstantDataVector *CV) {
+ const APInt *IVal;
+ SmallVector<Constant *, 4> Elts;
+
+ for (unsigned I = 0, E = CV->getNumElements(); I != E; ++I) {
+ Constant *Elt = CV->getElementAsConstant(I);
+ if (!match(Elt, m_APInt(IVal)) || !IVal->isPowerOf2())
+ return nullptr;
+ Elts.push_back(ConstantInt::get(Elt->getType(), IVal->logBase2()));
+ }
+
+ return ConstantVector::get(Elts);
+}
+
+/// \brief Return true if we can prove that:
+/// (mul LHS, RHS) === (mul nsw LHS, RHS)
+bool InstCombiner::WillNotOverflowSignedMul(Value *LHS, Value *RHS,
+ Instruction &CxtI) {
+ // Multiplying n * m significant bits yields a result of n + m significant
+ // bits. If the total number of significant bits does not exceed the
+ // result bit width (minus 1), there is no overflow.
+ // This means if we have enough leading sign bits in the operands
+ // we can guarantee that the result does not overflow.
+ // Ref: "Hacker's Delight" by Henry Warren
+ unsigned BitWidth = LHS->getType()->getScalarSizeInBits();
+
+ // Note that underestimating the number of sign bits gives a more
+ // conservative answer.
+ unsigned SignBits =
+ ComputeNumSignBits(LHS, 0, &CxtI) + ComputeNumSignBits(RHS, 0, &CxtI);
+
+ // First handle the easy case: if we have enough sign bits there's
+ // definitely no overflow.
+ if (SignBits > BitWidth + 1)
+ return true;
+
+ // There are two ambiguous cases where there can be no overflow:
+ // SignBits == BitWidth + 1 and
+ // SignBits == BitWidth
+ // The second case is difficult to check, therefore we only handle the
+ // first case.
+ if (SignBits == BitWidth + 1) {
+ // It overflows only when both arguments are negative and the true
+ // product is exactly the minimum negative number.
+ // E.g. mul i16 with 17 sign bits: 0xff00 * 0xff80 = 0x8000
+ // For simplicity we just check if at least one side is not negative.
+ bool LHSNonNegative, LHSNegative;
+ bool RHSNonNegative, RHSNegative;
+ ComputeSignBit(LHS, LHSNonNegative, LHSNegative, /*Depth=*/0, &CxtI);
+ ComputeSignBit(RHS, RHSNonNegative, RHSNegative, /*Depth=*/0, &CxtI);
+ if (LHSNonNegative || RHSNonNegative)
+ return true;
+ }
+ return false;
+}
+
+Instruction *InstCombiner::visitMul(BinaryOperator &I) {
+ bool Changed = SimplifyAssociativeOrCommutative(I);
+ Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
+
+ if (Value *V = SimplifyVectorOp(I))
+ return replaceInstUsesWith(I, V);
+
+ if (Value *V = SimplifyMulInst(Op0, Op1, DL, TLI, DT, AC))
+ return replaceInstUsesWith(I, V);
+
+ if (Value *V = SimplifyUsingDistributiveLaws(I))
+ return replaceInstUsesWith(I, V);
+
+ // X * -1 == 0 - X
+ if (match(Op1, m_AllOnes())) {
+ BinaryOperator *BO = BinaryOperator::CreateNeg(Op0, I.getName());
+ if (I.hasNoSignedWrap())
+ BO->setHasNoSignedWrap();
+ return BO;
+ }
+
+ // Also allow combining multiply instructions on vectors.
+ {
+ Value *NewOp;
+ Constant *C1, *C2;
+ const APInt *IVal;
+ if (match(&I, m_Mul(m_Shl(m_Value(NewOp), m_Constant(C2)),
+ m_Constant(C1))) &&
+ match(C1, m_APInt(IVal))) {
+ // ((X << C2)*C1) == (X * (C1 << C2))
+ Constant *Shl = ConstantExpr::getShl(C1, C2);
+ BinaryOperator *Mul = cast<BinaryOperator>(I.getOperand(0));
+ BinaryOperator *BO = BinaryOperator::CreateMul(NewOp, Shl);
+ if (I.hasNoUnsignedWrap() && Mul->hasNoUnsignedWrap())
+ BO->setHasNoUnsignedWrap();
+ if (I.hasNoSignedWrap() && Mul->hasNoSignedWrap() &&
+ Shl->isNotMinSignedValue())
+ BO->setHasNoSignedWrap();
+ return BO;
+ }
+
+ if (match(&I, m_Mul(m_Value(NewOp), m_Constant(C1)))) {
+ Constant *NewCst = nullptr;
+ if (match(C1, m_APInt(IVal)) && IVal->isPowerOf2())
+ // Replace X*(2^C) with X << C, where C is either a scalar or a splat.
+ NewCst = ConstantInt::get(NewOp->getType(), IVal->logBase2());
+ else if (ConstantDataVector *CV = dyn_cast<ConstantDataVector>(C1))
+ // Replace X*(2^C) with X << C, where C is a vector of known
+ // constant powers of 2.
+ NewCst = getLogBase2Vector(CV);
+
+ if (NewCst) {
+ unsigned Width = NewCst->getType()->getPrimitiveSizeInBits();
+ BinaryOperator *Shl = BinaryOperator::CreateShl(NewOp, NewCst);
+
+ if (I.hasNoUnsignedWrap())
+ Shl->setHasNoUnsignedWrap();
+ if (I.hasNoSignedWrap()) {
+ uint64_t V;
+ if (match(NewCst, m_ConstantInt(V)) && V != Width - 1)
+ Shl->setHasNoSignedWrap();
+ }
+
+ return Shl;
+ }
+ }
+ }
+
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(Op1)) {
+ // (Y - X) * (-(2**n)) -> (X - Y) * (2**n), for positive nonzero n
+ // (Y + const) * (-(2**n)) -> (-constY) * (2**n), for positive nonzero n
+ // The "* (2**n)" thus becomes a potential shifting opportunity.
+ {
+ const APInt & Val = CI->getValue();
+ const APInt &PosVal = Val.abs();
+ if (Val.isNegative() && PosVal.isPowerOf2()) {
+ Value *X = nullptr, *Y = nullptr;
+ if (Op0->hasOneUse()) {
+ ConstantInt *C1;
+ Value *Sub = nullptr;
+ if (match(Op0, m_Sub(m_Value(Y), m_Value(X))))
+ Sub = Builder->CreateSub(X, Y, "suba");
+ else if (match(Op0, m_Add(m_Value(Y), m_ConstantInt(C1))))
+ Sub = Builder->CreateSub(Builder->CreateNeg(C1), Y, "subc");
+ if (Sub)
+ return
+ BinaryOperator::CreateMul(Sub,
+ ConstantInt::get(Y->getType(), PosVal));
+ }
+ }
+ }
+ }
+
+ // Simplify mul instructions with a constant RHS.
+ if (isa<Constant>(Op1)) {
+ // Try to fold constant mul into select arguments.
+ if (SelectInst *SI = dyn_cast<SelectInst>(Op0))
+ if (Instruction *R = FoldOpIntoSelect(I, SI))
+ return R;
+
+ if (isa<PHINode>(Op0))
+ if (Instruction *NV = FoldOpIntoPhi(I))
+ return NV;
+
+ // Canonicalize (X+C1)*CI -> X*CI+C1*CI.
+ {
+ Value *X;
+ Constant *C1;
+ if (match(Op0, m_OneUse(m_Add(m_Value(X), m_Constant(C1))))) {
+ Value *Mul = Builder->CreateMul(C1, Op1);
+ // Only go forward with the transform if C1*CI simplifies to a tidier
+ // constant.
+ if (!match(Mul, m_Mul(m_Value(), m_Value())))
+ return BinaryOperator::CreateAdd(Builder->CreateMul(X, Op1), Mul);
+ }
+ }
+ }
+
+ if (Value *Op0v = dyn_castNegVal(Op0)) { // -X * -Y = X*Y
+ if (Value *Op1v = dyn_castNegVal(Op1)) {
+ BinaryOperator *BO = BinaryOperator::CreateMul(Op0v, Op1v);
+ if (I.hasNoSignedWrap() &&
+ match(Op0, m_NSWSub(m_Value(), m_Value())) &&
+ match(Op1, m_NSWSub(m_Value(), m_Value())))
+ BO->setHasNoSignedWrap();
+ return BO;
+ }
+ }
+
+ // (X / Y) * Y = X - (X % Y)
+ // (X / Y) * -Y = (X % Y) - X
+ {
+ Value *Op1C = Op1;
+ BinaryOperator *BO = dyn_cast<BinaryOperator>(Op0);
+ if (!BO ||
+ (BO->getOpcode() != Instruction::UDiv &&
+ BO->getOpcode() != Instruction::SDiv)) {
+ Op1C = Op0;
+ BO = dyn_cast<BinaryOperator>(Op1);
+ }
+ Value *Neg = dyn_castNegVal(Op1C);
+ if (BO && BO->hasOneUse() &&
+ (BO->getOperand(1) == Op1C || BO->getOperand(1) == Neg) &&
+ (BO->getOpcode() == Instruction::UDiv ||
+ BO->getOpcode() == Instruction::SDiv)) {
+ Value *Op0BO = BO->getOperand(0), *Op1BO = BO->getOperand(1);
+
+ // If the division is exact, X % Y is zero, so we end up with X or -X.
+ if (PossiblyExactOperator *SDiv = dyn_cast<PossiblyExactOperator>(BO))
+ if (SDiv->isExact()) {
+ if (Op1BO == Op1C)
+ return replaceInstUsesWith(I, Op0BO);
+ return BinaryOperator::CreateNeg(Op0BO);
+ }
+
+ Value *Rem;
+ if (BO->getOpcode() == Instruction::UDiv)
+ Rem = Builder->CreateURem(Op0BO, Op1BO);
+ else
+ Rem = Builder->CreateSRem(Op0BO, Op1BO);
+ Rem->takeName(BO);
+
+ if (Op1BO == Op1C)
+ return BinaryOperator::CreateSub(Op0BO, Rem);
+ return BinaryOperator::CreateSub(Rem, Op0BO);
+ }
+ }
+
+ /// i1 mul -> i1 and.
+ if (I.getType()->getScalarType()->isIntegerTy(1))
+ return BinaryOperator::CreateAnd(Op0, Op1);
+
+ // X*(1 << Y) --> X << Y
+ // (1 << Y)*X --> X << Y
+ {
+ Value *Y;
+ BinaryOperator *BO = nullptr;
+ bool ShlNSW = false;
+ if (match(Op0, m_Shl(m_One(), m_Value(Y)))) {
+ BO = BinaryOperator::CreateShl(Op1, Y);
+ ShlNSW = cast<ShlOperator>(Op0)->hasNoSignedWrap();
+ } else if (match(Op1, m_Shl(m_One(), m_Value(Y)))) {
+ BO = BinaryOperator::CreateShl(Op0, Y);
+ ShlNSW = cast<ShlOperator>(Op1)->hasNoSignedWrap();
+ }
+ if (BO) {
+ if (I.hasNoUnsignedWrap())
+ BO->setHasNoUnsignedWrap();
+ if (I.hasNoSignedWrap() && ShlNSW)
+ BO->setHasNoSignedWrap();
+ return BO;
+ }
+ }
+
+ // If one of the operands of the multiply is a cast from a boolean value, then
+ // we know the bool is either zero or one, so this is a 'masking' multiply.
+ // X * Y (where Y is 0 or 1) -> X & (0-Y)
+ if (!I.getType()->isVectorTy()) {
+ // -2 is "-1 << 1" so it is all bits set except the low one.
+ APInt Negative2(I.getType()->getPrimitiveSizeInBits(), (uint64_t)-2, true);
+
+ Value *BoolCast = nullptr, *OtherOp = nullptr;
+ if (MaskedValueIsZero(Op0, Negative2, 0, &I)) {
+ BoolCast = Op0;
+ OtherOp = Op1;
+ } else if (MaskedValueIsZero(Op1, Negative2, 0, &I)) {
+ BoolCast = Op1;
+ OtherOp = Op0;
+ }
+
+ if (BoolCast) {
+ Value *V = Builder->CreateSub(Constant::getNullValue(I.getType()),
+ BoolCast);
+ return BinaryOperator::CreateAnd(V, OtherOp);
+ }
+ }
+
+ if (!I.hasNoSignedWrap() && WillNotOverflowSignedMul(Op0, Op1, I)) {
+ Changed = true;
+ I.setHasNoSignedWrap(true);
+ }
+
+ if (!I.hasNoUnsignedWrap() &&
+ computeOverflowForUnsignedMul(Op0, Op1, &I) ==
+ OverflowResult::NeverOverflows) {
+ Changed = true;
+ I.setHasNoUnsignedWrap(true);
+ }
+
+ return Changed ? &I : nullptr;
+}
+
+/// Detect pattern log2(Y * 0.5) with corresponding fast math flags.
+static void detectLog2OfHalf(Value *&Op, Value *&Y, IntrinsicInst *&Log2) {
+ if (!Op->hasOneUse())
+ return;
+
+ IntrinsicInst *II = dyn_cast<IntrinsicInst>(Op);
+ if (!II)
+ return;
+ if (II->getIntrinsicID() != Intrinsic::log2 || !II->hasUnsafeAlgebra())
+ return;
+ Log2 = II;
+
+ Value *OpLog2Of = II->getArgOperand(0);
+ if (!OpLog2Of->hasOneUse())
+ return;
+
+ Instruction *I = dyn_cast<Instruction>(OpLog2Of);
+ if (!I)
+ return;
+ if (I->getOpcode() != Instruction::FMul || !I->hasUnsafeAlgebra())
+ return;
+
+ if (match(I->getOperand(0), m_SpecificFP(0.5)))
+ Y = I->getOperand(1);
+ else if (match(I->getOperand(1), m_SpecificFP(0.5)))
+ Y = I->getOperand(0);
+}
+
+static bool isFiniteNonZeroFp(Constant *C) {
+ if (C->getType()->isVectorTy()) {
+ for (unsigned I = 0, E = C->getType()->getVectorNumElements(); I != E;
+ ++I) {
+ ConstantFP *CFP = dyn_cast_or_null<ConstantFP>(C->getAggregateElement(I));
+ if (!CFP || !CFP->getValueAPF().isFiniteNonZero())
+ return false;
+ }
+ return true;
+ }
+
+ return isa<ConstantFP>(C) &&
+ cast<ConstantFP>(C)->getValueAPF().isFiniteNonZero();
+}
+
+static bool isNormalFp(Constant *C) {
+ if (C->getType()->isVectorTy()) {
+ for (unsigned I = 0, E = C->getType()->getVectorNumElements(); I != E;
+ ++I) {
+ ConstantFP *CFP = dyn_cast_or_null<ConstantFP>(C->getAggregateElement(I));
+ if (!CFP || !CFP->getValueAPF().isNormal())
+ return false;
+ }
+ return true;
+ }
+
+ return isa<ConstantFP>(C) && cast<ConstantFP>(C)->getValueAPF().isNormal();
+}
+
+/// Helper function of InstCombiner::visitFMul(BinaryOperator(). It returns
+/// true iff the given value is FMul or FDiv with one and only one operand
+/// being a normal constant (i.e. not Zero/NaN/Infinity).
+static bool isFMulOrFDivWithConstant(Value *V) {
+ Instruction *I = dyn_cast<Instruction>(V);
+ if (!I || (I->getOpcode() != Instruction::FMul &&
+ I->getOpcode() != Instruction::FDiv))
+ return false;
+
+ Constant *C0 = dyn_cast<Constant>(I->getOperand(0));
+ Constant *C1 = dyn_cast<Constant>(I->getOperand(1));
+
+ if (C0 && C1)
+ return false;
+
+ return (C0 && isFiniteNonZeroFp(C0)) || (C1 && isFiniteNonZeroFp(C1));
+}
+
+/// foldFMulConst() is a helper routine of InstCombiner::visitFMul().
+/// The input \p FMulOrDiv is a FMul/FDiv with one and only one operand
+/// being a constant (i.e. isFMulOrFDivWithConstant(FMulOrDiv) == true).
+/// This function is to simplify "FMulOrDiv * C" and returns the
+/// resulting expression. Note that this function could return NULL in
+/// case the constants cannot be folded into a normal floating-point.
+///
+Value *InstCombiner::foldFMulConst(Instruction *FMulOrDiv, Constant *C,
+ Instruction *InsertBefore) {
+ assert(isFMulOrFDivWithConstant(FMulOrDiv) && "V is invalid");
+
+ Value *Opnd0 = FMulOrDiv->getOperand(0);
+ Value *Opnd1 = FMulOrDiv->getOperand(1);
+
+ Constant *C0 = dyn_cast<Constant>(Opnd0);
+ Constant *C1 = dyn_cast<Constant>(Opnd1);
+
+ BinaryOperator *R = nullptr;
+
+ // (X * C0) * C => X * (C0*C)
+ if (FMulOrDiv->getOpcode() == Instruction::FMul) {
+ Constant *F = ConstantExpr::getFMul(C1 ? C1 : C0, C);
+ if (isNormalFp(F))
+ R = BinaryOperator::CreateFMul(C1 ? Opnd0 : Opnd1, F);
+ } else {
+ if (C0) {
+ // (C0 / X) * C => (C0 * C) / X
+ if (FMulOrDiv->hasOneUse()) {
+ // It would otherwise introduce another div.
+ Constant *F = ConstantExpr::getFMul(C0, C);
+ if (isNormalFp(F))
+ R = BinaryOperator::CreateFDiv(F, Opnd1);
+ }
+ } else {
+ // (X / C1) * C => X * (C/C1) if C/C1 is not a denormal
+ Constant *F = ConstantExpr::getFDiv(C, C1);
+ if (isNormalFp(F)) {
+ R = BinaryOperator::CreateFMul(Opnd0, F);
+ } else {
+ // (X / C1) * C => X / (C1/C)
+ Constant *F = ConstantExpr::getFDiv(C1, C);
+ if (isNormalFp(F))
+ R = BinaryOperator::CreateFDiv(Opnd0, F);
+ }
+ }
+ }
+
+ if (R) {
+ R->setHasUnsafeAlgebra(true);
+ InsertNewInstWith(R, *InsertBefore);
+ }
+
+ return R;
+}
+
+Instruction *InstCombiner::visitFMul(BinaryOperator &I) {
+ bool Changed = SimplifyAssociativeOrCommutative(I);
+ Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
+
+ if (Value *V = SimplifyVectorOp(I))
+ return replaceInstUsesWith(I, V);
+
+ if (isa<Constant>(Op0))
+ std::swap(Op0, Op1);
+
+ if (Value *V =
+ SimplifyFMulInst(Op0, Op1, I.getFastMathFlags(), DL, TLI, DT, AC))
+ return replaceInstUsesWith(I, V);
+
+ bool AllowReassociate = I.hasUnsafeAlgebra();
+
+ // Simplify mul instructions with a constant RHS.
+ if (isa<Constant>(Op1)) {
+ // Try to fold constant mul into select arguments.
+ if (SelectInst *SI = dyn_cast<SelectInst>(Op0))
+ if (Instruction *R = FoldOpIntoSelect(I, SI))
+ return R;
+
+ if (isa<PHINode>(Op0))
+ if (Instruction *NV = FoldOpIntoPhi(I))
+ return NV;
+
+ // (fmul X, -1.0) --> (fsub -0.0, X)
+ if (match(Op1, m_SpecificFP(-1.0))) {
+ Constant *NegZero = ConstantFP::getNegativeZero(Op1->getType());
+ Instruction *RI = BinaryOperator::CreateFSub(NegZero, Op0);
+ RI->copyFastMathFlags(&I);
+ return RI;
+ }
+
+ Constant *C = cast<Constant>(Op1);
+ if (AllowReassociate && isFiniteNonZeroFp(C)) {
+ // Let MDC denote an expression in one of these forms:
+ // X * C, C/X, X/C, where C is a constant.
+ //
+ // Try to simplify "MDC * Constant"
+ if (isFMulOrFDivWithConstant(Op0))
+ if (Value *V = foldFMulConst(cast<Instruction>(Op0), C, &I))
+ return replaceInstUsesWith(I, V);
+
+ // (MDC +/- C1) * C => (MDC * C) +/- (C1 * C)
+ Instruction *FAddSub = dyn_cast<Instruction>(Op0);
+ if (FAddSub &&
+ (FAddSub->getOpcode() == Instruction::FAdd ||
+ FAddSub->getOpcode() == Instruction::FSub)) {
+ Value *Opnd0 = FAddSub->getOperand(0);
+ Value *Opnd1 = FAddSub->getOperand(1);
+ Constant *C0 = dyn_cast<Constant>(Opnd0);
+ Constant *C1 = dyn_cast<Constant>(Opnd1);
+ bool Swap = false;
+ if (C0) {
+ std::swap(C0, C1);
+ std::swap(Opnd0, Opnd1);
+ Swap = true;
+ }
+
+ if (C1 && isFiniteNonZeroFp(C1) && isFMulOrFDivWithConstant(Opnd0)) {
+ Value *M1 = ConstantExpr::getFMul(C1, C);
+ Value *M0 = isNormalFp(cast<Constant>(M1)) ?
+ foldFMulConst(cast<Instruction>(Opnd0), C, &I) :
+ nullptr;
+ if (M0 && M1) {
+ if (Swap && FAddSub->getOpcode() == Instruction::FSub)
+ std::swap(M0, M1);
+
+ Instruction *RI = (FAddSub->getOpcode() == Instruction::FAdd)
+ ? BinaryOperator::CreateFAdd(M0, M1)
+ : BinaryOperator::CreateFSub(M0, M1);
+ RI->copyFastMathFlags(&I);
+ return RI;
+ }
+ }
+ }
+ }
+ }
+
+ if (Op0 == Op1) {
+ if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Op0)) {
+ // sqrt(X) * sqrt(X) -> X
+ if (AllowReassociate && II->getIntrinsicID() == Intrinsic::sqrt)
+ return replaceInstUsesWith(I, II->getOperand(0));
+
+ // fabs(X) * fabs(X) -> X * X
+ if (II->getIntrinsicID() == Intrinsic::fabs) {
+ Instruction *FMulVal = BinaryOperator::CreateFMul(II->getOperand(0),
+ II->getOperand(0),
+ I.getName());
+ FMulVal->copyFastMathFlags(&I);
+ return FMulVal;
+ }
+ }
+ }
+
+ // Under unsafe algebra do:
+ // X * log2(0.5*Y) = X*log2(Y) - X
+ if (AllowReassociate) {
+ Value *OpX = nullptr;
+ Value *OpY = nullptr;
+ IntrinsicInst *Log2;
+ detectLog2OfHalf(Op0, OpY, Log2);
+ if (OpY) {
+ OpX = Op1;
+ } else {
+ detectLog2OfHalf(Op1, OpY, Log2);
+ if (OpY) {
+ OpX = Op0;
+ }
+ }
+ // if pattern detected emit alternate sequence
+ if (OpX && OpY) {
+ BuilderTy::FastMathFlagGuard Guard(*Builder);
+ Builder->setFastMathFlags(Log2->getFastMathFlags());
+ Log2->setArgOperand(0, OpY);
+ Value *FMulVal = Builder->CreateFMul(OpX, Log2);
+ Value *FSub = Builder->CreateFSub(FMulVal, OpX);
+ FSub->takeName(&I);
+ return replaceInstUsesWith(I, FSub);
+ }
+ }
+
+ // Handle symmetric situation in a 2-iteration loop
+ Value *Opnd0 = Op0;
+ Value *Opnd1 = Op1;
+ for (int i = 0; i < 2; i++) {
+ bool IgnoreZeroSign = I.hasNoSignedZeros();
+ if (BinaryOperator::isFNeg(Opnd0, IgnoreZeroSign)) {
+ BuilderTy::FastMathFlagGuard Guard(*Builder);
+ Builder->setFastMathFlags(I.getFastMathFlags());
+
+ Value *N0 = dyn_castFNegVal(Opnd0, IgnoreZeroSign);
+ Value *N1 = dyn_castFNegVal(Opnd1, IgnoreZeroSign);
+
+ // -X * -Y => X*Y
+ if (N1) {
+ Value *FMul = Builder->CreateFMul(N0, N1);
+ FMul->takeName(&I);
+ return replaceInstUsesWith(I, FMul);
+ }
+
+ if (Opnd0->hasOneUse()) {
+ // -X * Y => -(X*Y) (Promote negation as high as possible)
+ Value *T = Builder->CreateFMul(N0, Opnd1);
+ Value *Neg = Builder->CreateFNeg(T);
+ Neg->takeName(&I);
+ return replaceInstUsesWith(I, Neg);
+ }
+ }
+
+ // (X*Y) * X => (X*X) * Y where Y != X
+ // The purpose is two-fold:
+ // 1) to form a power expression (of X).
+ // 2) potentially shorten the critical path: After transformation, the
+ // latency of the instruction Y is amortized by the expression of X*X,
+ // and therefore Y is in a "less critical" position compared to what it
+ // was before the transformation.
+ //
+ if (AllowReassociate) {
+ Value *Opnd0_0, *Opnd0_1;
+ if (Opnd0->hasOneUse() &&
+ match(Opnd0, m_FMul(m_Value(Opnd0_0), m_Value(Opnd0_1)))) {
+ Value *Y = nullptr;
+ if (Opnd0_0 == Opnd1 && Opnd0_1 != Opnd1)
+ Y = Opnd0_1;
+ else if (Opnd0_1 == Opnd1 && Opnd0_0 != Opnd1)
+ Y = Opnd0_0;
+
+ if (Y) {
+ BuilderTy::FastMathFlagGuard Guard(*Builder);
+ Builder->setFastMathFlags(I.getFastMathFlags());
+ Value *T = Builder->CreateFMul(Opnd1, Opnd1);
+
+ Value *R = Builder->CreateFMul(T, Y);
+ R->takeName(&I);
+ return replaceInstUsesWith(I, R);
+ }
+ }
+ }
+
+ if (!isa<Constant>(Op1))
+ std::swap(Opnd0, Opnd1);
+ else
+ break;
+ }
+
+ return Changed ? &I : nullptr;
+}
+
+/// Try to fold a divide or remainder of a select instruction.
+bool InstCombiner::SimplifyDivRemOfSelect(BinaryOperator &I) {
+ SelectInst *SI = cast<SelectInst>(I.getOperand(1));
+
+ // div/rem X, (Cond ? 0 : Y) -> div/rem X, Y
+ int NonNullOperand = -1;
+ if (Constant *ST = dyn_cast<Constant>(SI->getOperand(1)))
+ if (ST->isNullValue())
+ NonNullOperand = 2;
+ // div/rem X, (Cond ? Y : 0) -> div/rem X, Y
+ if (Constant *ST = dyn_cast<Constant>(SI->getOperand(2)))
+ if (ST->isNullValue())
+ NonNullOperand = 1;
+
+ if (NonNullOperand == -1)
+ return false;
+
+ Value *SelectCond = SI->getOperand(0);
+
+ // Change the div/rem to use 'Y' instead of the select.
+ I.setOperand(1, SI->getOperand(NonNullOperand));
+
+ // Okay, we know we replace the operand of the div/rem with 'Y' with no
+ // problem. However, the select, or the condition of the select may have
+ // multiple uses. Based on our knowledge that the operand must be non-zero,
+ // propagate the known value for the select into other uses of it, and
+ // propagate a known value of the condition into its other users.
+
+ // If the select and condition only have a single use, don't bother with this,
+ // early exit.
+ if (SI->use_empty() && SelectCond->hasOneUse())
+ return true;
+
+ // Scan the current block backward, looking for other uses of SI.
+ BasicBlock::iterator BBI = I.getIterator(), BBFront = I.getParent()->begin();
+
+ while (BBI != BBFront) {
+ --BBI;
+ // If we found a call to a function, we can't assume it will return, so
+ // information from below it cannot be propagated above it.
+ if (isa<CallInst>(BBI) && !isa<IntrinsicInst>(BBI))
+ break;
+
+ // Replace uses of the select or its condition with the known values.
+ for (Instruction::op_iterator I = BBI->op_begin(), E = BBI->op_end();
+ I != E; ++I) {
+ if (*I == SI) {
+ *I = SI->getOperand(NonNullOperand);
+ Worklist.Add(&*BBI);
+ } else if (*I == SelectCond) {
+ *I = Builder->getInt1(NonNullOperand == 1);
+ Worklist.Add(&*BBI);
+ }
+ }
+
+ // If we past the instruction, quit looking for it.
+ if (&*BBI == SI)
+ SI = nullptr;
+ if (&*BBI == SelectCond)
+ SelectCond = nullptr;
+
+ // If we ran out of things to eliminate, break out of the loop.
+ if (!SelectCond && !SI)
+ break;
+
+ }
+ return true;
+}
+
+
+/// This function implements the transforms common to both integer division
+/// instructions (udiv and sdiv). It is called by the visitors to those integer
+/// division instructions.
+/// @brief Common integer divide transforms
+Instruction *InstCombiner::commonIDivTransforms(BinaryOperator &I) {
+ Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
+
+ // The RHS is known non-zero.
+ if (Value *V = simplifyValueKnownNonZero(I.getOperand(1), *this, I)) {
+ I.setOperand(1, V);
+ return &I;
+ }
+
+ // Handle cases involving: [su]div X, (select Cond, Y, Z)
+ // This does not apply for fdiv.
+ if (isa<SelectInst>(Op1) && SimplifyDivRemOfSelect(I))
+ return &I;
+
+ if (Instruction *LHS = dyn_cast<Instruction>(Op0)) {
+ const APInt *C2;
+ if (match(Op1, m_APInt(C2))) {
+ Value *X;
+ const APInt *C1;
+ bool IsSigned = I.getOpcode() == Instruction::SDiv;
+
+ // (X / C1) / C2 -> X / (C1*C2)
+ if ((IsSigned && match(LHS, m_SDiv(m_Value(X), m_APInt(C1)))) ||
+ (!IsSigned && match(LHS, m_UDiv(m_Value(X), m_APInt(C1))))) {
+ APInt Product(C1->getBitWidth(), /*Val=*/0ULL, IsSigned);
+ if (!MultiplyOverflows(*C1, *C2, Product, IsSigned))
+ return BinaryOperator::Create(I.getOpcode(), X,
+ ConstantInt::get(I.getType(), Product));
+ }
+
+ if ((IsSigned && match(LHS, m_NSWMul(m_Value(X), m_APInt(C1)))) ||
+ (!IsSigned && match(LHS, m_NUWMul(m_Value(X), m_APInt(C1))))) {
+ APInt Quotient(C1->getBitWidth(), /*Val=*/0ULL, IsSigned);
+
+ // (X * C1) / C2 -> X / (C2 / C1) if C2 is a multiple of C1.
+ if (IsMultiple(*C2, *C1, Quotient, IsSigned)) {
+ BinaryOperator *BO = BinaryOperator::Create(
+ I.getOpcode(), X, ConstantInt::get(X->getType(), Quotient));
+ BO->setIsExact(I.isExact());
+ return BO;
+ }
+
+ // (X * C1) / C2 -> X * (C1 / C2) if C1 is a multiple of C2.
+ if (IsMultiple(*C1, *C2, Quotient, IsSigned)) {
+ BinaryOperator *BO = BinaryOperator::Create(
+ Instruction::Mul, X, ConstantInt::get(X->getType(), Quotient));
+ BO->setHasNoUnsignedWrap(
+ !IsSigned &&
+ cast<OverflowingBinaryOperator>(LHS)->hasNoUnsignedWrap());
+ BO->setHasNoSignedWrap(
+ cast<OverflowingBinaryOperator>(LHS)->hasNoSignedWrap());
+ return BO;
+ }
+ }
+
+ if ((IsSigned && match(LHS, m_NSWShl(m_Value(X), m_APInt(C1))) &&
+ *C1 != C1->getBitWidth() - 1) ||
+ (!IsSigned && match(LHS, m_NUWShl(m_Value(X), m_APInt(C1))))) {
+ APInt Quotient(C1->getBitWidth(), /*Val=*/0ULL, IsSigned);
+ APInt C1Shifted = APInt::getOneBitSet(
+ C1->getBitWidth(), static_cast<unsigned>(C1->getLimitedValue()));
+
+ // (X << C1) / C2 -> X / (C2 >> C1) if C2 is a multiple of C1.
+ if (IsMultiple(*C2, C1Shifted, Quotient, IsSigned)) {
+ BinaryOperator *BO = BinaryOperator::Create(
+ I.getOpcode(), X, ConstantInt::get(X->getType(), Quotient));
+ BO->setIsExact(I.isExact());
+ return BO;
+ }
+
+ // (X << C1) / C2 -> X * (C2 >> C1) if C1 is a multiple of C2.
+ if (IsMultiple(C1Shifted, *C2, Quotient, IsSigned)) {
+ BinaryOperator *BO = BinaryOperator::Create(
+ Instruction::Mul, X, ConstantInt::get(X->getType(), Quotient));
+ BO->setHasNoUnsignedWrap(
+ !IsSigned &&
+ cast<OverflowingBinaryOperator>(LHS)->hasNoUnsignedWrap());
+ BO->setHasNoSignedWrap(
+ cast<OverflowingBinaryOperator>(LHS)->hasNoSignedWrap());
+ return BO;
+ }
+ }
+
+ if (*C2 != 0) { // avoid X udiv 0
+ if (SelectInst *SI = dyn_cast<SelectInst>(Op0))
+ if (Instruction *R = FoldOpIntoSelect(I, SI))
+ return R;
+ if (isa<PHINode>(Op0))
+ if (Instruction *NV = FoldOpIntoPhi(I))
+ return NV;
+ }
+ }
+ }
+
+ if (ConstantInt *One = dyn_cast<ConstantInt>(Op0)) {
+ if (One->isOne() && !I.getType()->isIntegerTy(1)) {
+ bool isSigned = I.getOpcode() == Instruction::SDiv;
+ if (isSigned) {
+ // If Op1 is 0 then it's undefined behaviour, if Op1 is 1 then the
+ // result is one, if Op1 is -1 then the result is minus one, otherwise
+ // it's zero.
+ Value *Inc = Builder->CreateAdd(Op1, One);
+ Value *Cmp = Builder->CreateICmpULT(
+ Inc, ConstantInt::get(I.getType(), 3));
+ return SelectInst::Create(Cmp, Op1, ConstantInt::get(I.getType(), 0));
+ } else {
+ // If Op1 is 0 then it's undefined behaviour. If Op1 is 1 then the
+ // result is one, otherwise it's zero.
+ return new ZExtInst(Builder->CreateICmpEQ(Op1, One), I.getType());
+ }
+ }
+ }
+
+ // See if we can fold away this div instruction.
+ if (SimplifyDemandedInstructionBits(I))
+ return &I;
+
+ // (X - (X rem Y)) / Y -> X / Y; usually originates as ((X / Y) * Y) / Y
+ Value *X = nullptr, *Z = nullptr;
+ if (match(Op0, m_Sub(m_Value(X), m_Value(Z)))) { // (X - Z) / Y; Y = Op1
+ bool isSigned = I.getOpcode() == Instruction::SDiv;
+ if ((isSigned && match(Z, m_SRem(m_Specific(X), m_Specific(Op1)))) ||
+ (!isSigned && match(Z, m_URem(m_Specific(X), m_Specific(Op1)))))
+ return BinaryOperator::Create(I.getOpcode(), X, Op1);
+ }
+
+ return nullptr;
+}
+
+/// dyn_castZExtVal - Checks if V is a zext or constant that can
+/// be truncated to Ty without losing bits.
+static Value *dyn_castZExtVal(Value *V, Type *Ty) {
+ if (ZExtInst *Z = dyn_cast<ZExtInst>(V)) {
+ if (Z->getSrcTy() == Ty)
+ return Z->getOperand(0);
+ } else if (ConstantInt *C = dyn_cast<ConstantInt>(V)) {
+ if (C->getValue().getActiveBits() <= cast<IntegerType>(Ty)->getBitWidth())
+ return ConstantExpr::getTrunc(C, Ty);
+ }
+ return nullptr;
+}
+
+namespace {
+const unsigned MaxDepth = 6;
+typedef Instruction *(*FoldUDivOperandCb)(Value *Op0, Value *Op1,
+ const BinaryOperator &I,
+ InstCombiner &IC);
+
+/// \brief Used to maintain state for visitUDivOperand().
+struct UDivFoldAction {
+ FoldUDivOperandCb FoldAction; ///< Informs visitUDiv() how to fold this
+ ///< operand. This can be zero if this action
+ ///< joins two actions together.
+
+ Value *OperandToFold; ///< Which operand to fold.
+ union {
+ Instruction *FoldResult; ///< The instruction returned when FoldAction is
+ ///< invoked.
+
+ size_t SelectLHSIdx; ///< Stores the LHS action index if this action
+ ///< joins two actions together.
+ };
+
+ UDivFoldAction(FoldUDivOperandCb FA, Value *InputOperand)
+ : FoldAction(FA), OperandToFold(InputOperand), FoldResult(nullptr) {}
+ UDivFoldAction(FoldUDivOperandCb FA, Value *InputOperand, size_t SLHS)
+ : FoldAction(FA), OperandToFold(InputOperand), SelectLHSIdx(SLHS) {}
+};
+}
+
+// X udiv 2^C -> X >> C
+static Instruction *foldUDivPow2Cst(Value *Op0, Value *Op1,
+ const BinaryOperator &I, InstCombiner &IC) {
+ const APInt &C = cast<Constant>(Op1)->getUniqueInteger();
+ BinaryOperator *LShr = BinaryOperator::CreateLShr(
+ Op0, ConstantInt::get(Op0->getType(), C.logBase2()));
+ if (I.isExact())
+ LShr->setIsExact();
+ return LShr;
+}
+
+// X udiv C, where C >= signbit
+static Instruction *foldUDivNegCst(Value *Op0, Value *Op1,
+ const BinaryOperator &I, InstCombiner &IC) {
+ Value *ICI = IC.Builder->CreateICmpULT(Op0, cast<ConstantInt>(Op1));
+
+ return SelectInst::Create(ICI, Constant::getNullValue(I.getType()),
+ ConstantInt::get(I.getType(), 1));
+}
+
+// X udiv (C1 << N), where C1 is "1<<C2" --> X >> (N+C2)
+static Instruction *foldUDivShl(Value *Op0, Value *Op1, const BinaryOperator &I,
+ InstCombiner &IC) {
+ Instruction *ShiftLeft = cast<Instruction>(Op1);
+ if (isa<ZExtInst>(ShiftLeft))
+ ShiftLeft = cast<Instruction>(ShiftLeft->getOperand(0));
+
+ const APInt &CI =
+ cast<Constant>(ShiftLeft->getOperand(0))->getUniqueInteger();
+ Value *N = ShiftLeft->getOperand(1);
+ if (CI != 1)
+ N = IC.Builder->CreateAdd(N, ConstantInt::get(N->getType(), CI.logBase2()));
+ if (ZExtInst *Z = dyn_cast<ZExtInst>(Op1))
+ N = IC.Builder->CreateZExt(N, Z->getDestTy());
+ BinaryOperator *LShr = BinaryOperator::CreateLShr(Op0, N);
+ if (I.isExact())
+ LShr->setIsExact();
+ return LShr;
+}
+
+// \brief Recursively visits the possible right hand operands of a udiv
+// instruction, seeing through select instructions, to determine if we can
+// replace the udiv with something simpler. If we find that an operand is not
+// able to simplify the udiv, we abort the entire transformation.
+static size_t visitUDivOperand(Value *Op0, Value *Op1, const BinaryOperator &I,
+ SmallVectorImpl<UDivFoldAction> &Actions,
+ unsigned Depth = 0) {
+ // Check to see if this is an unsigned division with an exact power of 2,
+ // if so, convert to a right shift.
+ if (match(Op1, m_Power2())) {
+ Actions.push_back(UDivFoldAction(foldUDivPow2Cst, Op1));
+ return Actions.size();
+ }
+
+ if (ConstantInt *C = dyn_cast<ConstantInt>(Op1))
+ // X udiv C, where C >= signbit
+ if (C->getValue().isNegative()) {
+ Actions.push_back(UDivFoldAction(foldUDivNegCst, C));
+ return Actions.size();
+ }
+
+ // X udiv (C1 << N), where C1 is "1<<C2" --> X >> (N+C2)
+ if (match(Op1, m_Shl(m_Power2(), m_Value())) ||
+ match(Op1, m_ZExt(m_Shl(m_Power2(), m_Value())))) {
+ Actions.push_back(UDivFoldAction(foldUDivShl, Op1));
+ return Actions.size();
+ }
+
+ // The remaining tests are all recursive, so bail out if we hit the limit.
+ if (Depth++ == MaxDepth)
+ return 0;
+
+ if (SelectInst *SI = dyn_cast<SelectInst>(Op1))
+ if (size_t LHSIdx =
+ visitUDivOperand(Op0, SI->getOperand(1), I, Actions, Depth))
+ if (visitUDivOperand(Op0, SI->getOperand(2), I, Actions, Depth)) {
+ Actions.push_back(UDivFoldAction(nullptr, Op1, LHSIdx - 1));
+ return Actions.size();
+ }
+
+ return 0;
+}
+
+Instruction *InstCombiner::visitUDiv(BinaryOperator &I) {
+ Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
+
+ if (Value *V = SimplifyVectorOp(I))
+ return replaceInstUsesWith(I, V);
+
+ if (Value *V = SimplifyUDivInst(Op0, Op1, DL, TLI, DT, AC))
+ return replaceInstUsesWith(I, V);
+
+ // Handle the integer div common cases
+ if (Instruction *Common = commonIDivTransforms(I))
+ return Common;
+
+ // (x lshr C1) udiv C2 --> x udiv (C2 << C1)
+ {
+ Value *X;
+ const APInt *C1, *C2;
+ if (match(Op0, m_LShr(m_Value(X), m_APInt(C1))) &&
+ match(Op1, m_APInt(C2))) {
+ bool Overflow;
+ APInt C2ShlC1 = C2->ushl_ov(*C1, Overflow);
+ if (!Overflow) {
+ bool IsExact = I.isExact() && match(Op0, m_Exact(m_Value()));
+ BinaryOperator *BO = BinaryOperator::CreateUDiv(
+ X, ConstantInt::get(X->getType(), C2ShlC1));
+ if (IsExact)
+ BO->setIsExact();
+ return BO;
+ }
+ }
+ }
+
+ // (zext A) udiv (zext B) --> zext (A udiv B)
+ if (ZExtInst *ZOp0 = dyn_cast<ZExtInst>(Op0))
+ if (Value *ZOp1 = dyn_castZExtVal(Op1, ZOp0->getSrcTy()))
+ return new ZExtInst(
+ Builder->CreateUDiv(ZOp0->getOperand(0), ZOp1, "div", I.isExact()),
+ I.getType());
+
+ // (LHS udiv (select (select (...)))) -> (LHS >> (select (select (...))))
+ SmallVector<UDivFoldAction, 6> UDivActions;
+ if (visitUDivOperand(Op0, Op1, I, UDivActions))
+ for (unsigned i = 0, e = UDivActions.size(); i != e; ++i) {
+ FoldUDivOperandCb Action = UDivActions[i].FoldAction;
+ Value *ActionOp1 = UDivActions[i].OperandToFold;
+ Instruction *Inst;
+ if (Action)
+ Inst = Action(Op0, ActionOp1, I, *this);
+ else {
+ // This action joins two actions together. The RHS of this action is
+ // simply the last action we processed, we saved the LHS action index in
+ // the joining action.
+ size_t SelectRHSIdx = i - 1;
+ Value *SelectRHS = UDivActions[SelectRHSIdx].FoldResult;
+ size_t SelectLHSIdx = UDivActions[i].SelectLHSIdx;
+ Value *SelectLHS = UDivActions[SelectLHSIdx].FoldResult;
+ Inst = SelectInst::Create(cast<SelectInst>(ActionOp1)->getCondition(),
+ SelectLHS, SelectRHS);
+ }
+
+ // If this is the last action to process, return it to the InstCombiner.
+ // Otherwise, we insert it before the UDiv and record it so that we may
+ // use it as part of a joining action (i.e., a SelectInst).
+ if (e - i != 1) {
+ Inst->insertBefore(&I);
+ UDivActions[i].FoldResult = Inst;
+ } else
+ return Inst;
+ }
+
+ return nullptr;
+}
+
+Instruction *InstCombiner::visitSDiv(BinaryOperator &I) {
+ Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
+
+ if (Value *V = SimplifyVectorOp(I))
+ return replaceInstUsesWith(I, V);
+
+ if (Value *V = SimplifySDivInst(Op0, Op1, DL, TLI, DT, AC))
+ return replaceInstUsesWith(I, V);
+
+ // Handle the integer div common cases
+ if (Instruction *Common = commonIDivTransforms(I))
+ return Common;
+
+ const APInt *Op1C;
+ if (match(Op1, m_APInt(Op1C))) {
+ // sdiv X, -1 == -X
+ if (Op1C->isAllOnesValue())
+ return BinaryOperator::CreateNeg(Op0);
+
+ // sdiv exact X, C --> ashr exact X, log2(C)
+ if (I.isExact() && Op1C->isNonNegative() && Op1C->isPowerOf2()) {
+ Value *ShAmt = ConstantInt::get(Op1->getType(), Op1C->exactLogBase2());
+ return BinaryOperator::CreateExactAShr(Op0, ShAmt, I.getName());
+ }
+
+ // If the dividend is sign-extended and the constant divisor is small enough
+ // to fit in the source type, shrink the division to the narrower type:
+ // (sext X) sdiv C --> sext (X sdiv C)
+ Value *Op0Src;
+ if (match(Op0, m_OneUse(m_SExt(m_Value(Op0Src)))) &&
+ Op0Src->getType()->getScalarSizeInBits() >= Op1C->getMinSignedBits()) {
+
+ // In the general case, we need to make sure that the dividend is not the
+ // minimum signed value because dividing that by -1 is UB. But here, we
+ // know that the -1 divisor case is already handled above.
+
+ Constant *NarrowDivisor =
+ ConstantExpr::getTrunc(cast<Constant>(Op1), Op0Src->getType());
+ Value *NarrowOp = Builder->CreateSDiv(Op0Src, NarrowDivisor);
+ return new SExtInst(NarrowOp, Op0->getType());
+ }
+ }
+
+ if (Constant *RHS = dyn_cast<Constant>(Op1)) {
+ // X/INT_MIN -> X == INT_MIN
+ if (RHS->isMinSignedValue())
+ return new ZExtInst(Builder->CreateICmpEQ(Op0, Op1), I.getType());
+
+ // -X/C --> X/-C provided the negation doesn't overflow.
+ Value *X;
+ if (match(Op0, m_NSWSub(m_Zero(), m_Value(X)))) {
+ auto *BO = BinaryOperator::CreateSDiv(X, ConstantExpr::getNeg(RHS));
+ BO->setIsExact(I.isExact());
+ return BO;
+ }
+ }
+
+ // If the sign bits of both operands are zero (i.e. we can prove they are
+ // unsigned inputs), turn this into a udiv.
+ if (I.getType()->isIntegerTy()) {
+ APInt Mask(APInt::getSignBit(I.getType()->getPrimitiveSizeInBits()));
+ if (MaskedValueIsZero(Op0, Mask, 0, &I)) {
+ if (MaskedValueIsZero(Op1, Mask, 0, &I)) {
+ // X sdiv Y -> X udiv Y, iff X and Y don't have sign bit set
+ auto *BO = BinaryOperator::CreateUDiv(Op0, Op1, I.getName());
+ BO->setIsExact(I.isExact());
+ return BO;
+ }
+
+ if (isKnownToBeAPowerOfTwo(Op1, DL, /*OrZero*/ true, 0, AC, &I, DT)) {
+ // X sdiv (1 << Y) -> X udiv (1 << Y) ( -> X u>> Y)
+ // Safe because the only negative value (1 << Y) can take on is
+ // INT_MIN, and X sdiv INT_MIN == X udiv INT_MIN == 0 if X doesn't have
+ // the sign bit set.
+ auto *BO = BinaryOperator::CreateUDiv(Op0, Op1, I.getName());
+ BO->setIsExact(I.isExact());
+ return BO;
+ }
+ }
+ }
+
+ return nullptr;
+}
+
+/// CvtFDivConstToReciprocal tries to convert X/C into X*1/C if C not a special
+/// FP value and:
+/// 1) 1/C is exact, or
+/// 2) reciprocal is allowed.
+/// If the conversion was successful, the simplified expression "X * 1/C" is
+/// returned; otherwise, NULL is returned.
+///
+static Instruction *CvtFDivConstToReciprocal(Value *Dividend, Constant *Divisor,
+ bool AllowReciprocal) {
+ if (!isa<ConstantFP>(Divisor)) // TODO: handle vectors.
+ return nullptr;
+
+ const APFloat &FpVal = cast<ConstantFP>(Divisor)->getValueAPF();
+ APFloat Reciprocal(FpVal.getSemantics());
+ bool Cvt = FpVal.getExactInverse(&Reciprocal);
+
+ if (!Cvt && AllowReciprocal && FpVal.isFiniteNonZero()) {
+ Reciprocal = APFloat(FpVal.getSemantics(), 1.0f);
+ (void)Reciprocal.divide(FpVal, APFloat::rmNearestTiesToEven);
+ Cvt = !Reciprocal.isDenormal();
+ }
+
+ if (!Cvt)
+ return nullptr;
+
+ ConstantFP *R;
+ R = ConstantFP::get(Dividend->getType()->getContext(), Reciprocal);
+ return BinaryOperator::CreateFMul(Dividend, R);
+}
+
+Instruction *InstCombiner::visitFDiv(BinaryOperator &I) {
+ Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
+
+ if (Value *V = SimplifyVectorOp(I))
+ return replaceInstUsesWith(I, V);
+
+ if (Value *V = SimplifyFDivInst(Op0, Op1, I.getFastMathFlags(),
+ DL, TLI, DT, AC))
+ return replaceInstUsesWith(I, V);
+
+ if (isa<Constant>(Op0))
+ if (SelectInst *SI = dyn_cast<SelectInst>(Op1))
+ if (Instruction *R = FoldOpIntoSelect(I, SI))
+ return R;
+
+ bool AllowReassociate = I.hasUnsafeAlgebra();
+ bool AllowReciprocal = I.hasAllowReciprocal();
+
+ if (Constant *Op1C = dyn_cast<Constant>(Op1)) {
+ if (SelectInst *SI = dyn_cast<SelectInst>(Op0))
+ if (Instruction *R = FoldOpIntoSelect(I, SI))
+ return R;
+
+ if (AllowReassociate) {
+ Constant *C1 = nullptr;
+ Constant *C2 = Op1C;
+ Value *X;
+ Instruction *Res = nullptr;
+
+ if (match(Op0, m_FMul(m_Value(X), m_Constant(C1)))) {
+ // (X*C1)/C2 => X * (C1/C2)
+ //
+ Constant *C = ConstantExpr::getFDiv(C1, C2);
+ if (isNormalFp(C))
+ Res = BinaryOperator::CreateFMul(X, C);
+ } else if (match(Op0, m_FDiv(m_Value(X), m_Constant(C1)))) {
+ // (X/C1)/C2 => X /(C2*C1) [=> X * 1/(C2*C1) if reciprocal is allowed]
+ //
+ Constant *C = ConstantExpr::getFMul(C1, C2);
+ if (isNormalFp(C)) {
+ Res = CvtFDivConstToReciprocal(X, C, AllowReciprocal);
+ if (!Res)
+ Res = BinaryOperator::CreateFDiv(X, C);
+ }
+ }
+
+ if (Res) {
+ Res->setFastMathFlags(I.getFastMathFlags());
+ return Res;
+ }
+ }
+
+ // X / C => X * 1/C
+ if (Instruction *T = CvtFDivConstToReciprocal(Op0, Op1C, AllowReciprocal)) {
+ T->copyFastMathFlags(&I);
+ return T;
+ }
+
+ return nullptr;
+ }
+
+ if (AllowReassociate && isa<Constant>(Op0)) {
+ Constant *C1 = cast<Constant>(Op0), *C2;
+ Constant *Fold = nullptr;
+ Value *X;
+ bool CreateDiv = true;
+
+ // C1 / (X*C2) => (C1/C2) / X
+ if (match(Op1, m_FMul(m_Value(X), m_Constant(C2))))
+ Fold = ConstantExpr::getFDiv(C1, C2);
+ else if (match(Op1, m_FDiv(m_Value(X), m_Constant(C2)))) {
+ // C1 / (X/C2) => (C1*C2) / X
+ Fold = ConstantExpr::getFMul(C1, C2);
+ } else if (match(Op1, m_FDiv(m_Constant(C2), m_Value(X)))) {
+ // C1 / (C2/X) => (C1/C2) * X
+ Fold = ConstantExpr::getFDiv(C1, C2);
+ CreateDiv = false;
+ }
+
+ if (Fold && isNormalFp(Fold)) {
+ Instruction *R = CreateDiv ? BinaryOperator::CreateFDiv(Fold, X)
+ : BinaryOperator::CreateFMul(X, Fold);
+ R->setFastMathFlags(I.getFastMathFlags());
+ return R;
+ }
+ return nullptr;
+ }
+
+ if (AllowReassociate) {
+ Value *X, *Y;
+ Value *NewInst = nullptr;
+ Instruction *SimpR = nullptr;
+
+ if (Op0->hasOneUse() && match(Op0, m_FDiv(m_Value(X), m_Value(Y)))) {
+ // (X/Y) / Z => X / (Y*Z)
+ //
+ if (!isa<Constant>(Y) || !isa<Constant>(Op1)) {
+ NewInst = Builder->CreateFMul(Y, Op1);
+ if (Instruction *RI = dyn_cast<Instruction>(NewInst)) {
+ FastMathFlags Flags = I.getFastMathFlags();
+ Flags &= cast<Instruction>(Op0)->getFastMathFlags();
+ RI->setFastMathFlags(Flags);
+ }
+ SimpR = BinaryOperator::CreateFDiv(X, NewInst);
+ }
+ } else if (Op1->hasOneUse() && match(Op1, m_FDiv(m_Value(X), m_Value(Y)))) {
+ // Z / (X/Y) => Z*Y / X
+ //
+ if (!isa<Constant>(Y) || !isa<Constant>(Op0)) {
+ NewInst = Builder->CreateFMul(Op0, Y);
+ if (Instruction *RI = dyn_cast<Instruction>(NewInst)) {
+ FastMathFlags Flags = I.getFastMathFlags();
+ Flags &= cast<Instruction>(Op1)->getFastMathFlags();
+ RI->setFastMathFlags(Flags);
+ }
+ SimpR = BinaryOperator::CreateFDiv(NewInst, X);
+ }
+ }
+
+ if (NewInst) {
+ if (Instruction *T = dyn_cast<Instruction>(NewInst))
+ T->setDebugLoc(I.getDebugLoc());
+ SimpR->setFastMathFlags(I.getFastMathFlags());
+ return SimpR;
+ }
+ }
+
+ return nullptr;
+}
+
+/// This function implements the transforms common to both integer remainder
+/// instructions (urem and srem). It is called by the visitors to those integer
+/// remainder instructions.
+/// @brief Common integer remainder transforms
+Instruction *InstCombiner::commonIRemTransforms(BinaryOperator &I) {
+ Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
+
+ // The RHS is known non-zero.
+ if (Value *V = simplifyValueKnownNonZero(I.getOperand(1), *this, I)) {
+ I.setOperand(1, V);
+ return &I;
+ }
+
+ // Handle cases involving: rem X, (select Cond, Y, Z)
+ if (isa<SelectInst>(Op1) && SimplifyDivRemOfSelect(I))
+ return &I;
+
+ if (isa<Constant>(Op1)) {
+ if (Instruction *Op0I = dyn_cast<Instruction>(Op0)) {
+ if (SelectInst *SI = dyn_cast<SelectInst>(Op0I)) {
+ if (Instruction *R = FoldOpIntoSelect(I, SI))
+ return R;
+ } else if (isa<PHINode>(Op0I)) {
+ using namespace llvm::PatternMatch;
+ const APInt *Op1Int;
+ if (match(Op1, m_APInt(Op1Int)) && !Op1Int->isMinValue() &&
+ (I.getOpcode() == Instruction::URem ||
+ !Op1Int->isMinSignedValue())) {
+ // FoldOpIntoPhi will speculate instructions to the end of the PHI's
+ // predecessor blocks, so do this only if we know the srem or urem
+ // will not fault.
+ if (Instruction *NV = FoldOpIntoPhi(I))
+ return NV;
+ }
+ }
+
+ // See if we can fold away this rem instruction.
+ if (SimplifyDemandedInstructionBits(I))
+ return &I;
+ }
+ }
+
+ return nullptr;
+}
+
+Instruction *InstCombiner::visitURem(BinaryOperator &I) {
+ Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
+
+ if (Value *V = SimplifyVectorOp(I))
+ return replaceInstUsesWith(I, V);
+
+ if (Value *V = SimplifyURemInst(Op0, Op1, DL, TLI, DT, AC))
+ return replaceInstUsesWith(I, V);
+
+ if (Instruction *common = commonIRemTransforms(I))
+ return common;
+
+ // (zext A) urem (zext B) --> zext (A urem B)
+ if (ZExtInst *ZOp0 = dyn_cast<ZExtInst>(Op0))
+ if (Value *ZOp1 = dyn_castZExtVal(Op1, ZOp0->getSrcTy()))
+ return new ZExtInst(Builder->CreateURem(ZOp0->getOperand(0), ZOp1),
+ I.getType());
+
+ // X urem Y -> X and Y-1, where Y is a power of 2,
+ if (isKnownToBeAPowerOfTwo(Op1, DL, /*OrZero*/ true, 0, AC, &I, DT)) {
+ Constant *N1 = Constant::getAllOnesValue(I.getType());
+ Value *Add = Builder->CreateAdd(Op1, N1);
+ return BinaryOperator::CreateAnd(Op0, Add);
+ }
+
+ // 1 urem X -> zext(X != 1)
+ if (match(Op0, m_One())) {
+ Value *Cmp = Builder->CreateICmpNE(Op1, Op0);
+ Value *Ext = Builder->CreateZExt(Cmp, I.getType());
+ return replaceInstUsesWith(I, Ext);
+ }
+
+ return nullptr;
+}
+
+Instruction *InstCombiner::visitSRem(BinaryOperator &I) {
+ Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
+
+ if (Value *V = SimplifyVectorOp(I))
+ return replaceInstUsesWith(I, V);
+
+ if (Value *V = SimplifySRemInst(Op0, Op1, DL, TLI, DT, AC))
+ return replaceInstUsesWith(I, V);
+
+ // Handle the integer rem common cases
+ if (Instruction *Common = commonIRemTransforms(I))
+ return Common;
+
+ {
+ const APInt *Y;
+ // X % -Y -> X % Y
+ if (match(Op1, m_APInt(Y)) && Y->isNegative() && !Y->isMinSignedValue()) {
+ Worklist.AddValue(I.getOperand(1));
+ I.setOperand(1, ConstantInt::get(I.getType(), -*Y));
+ return &I;
+ }
+ }
+
+ // If the sign bits of both operands are zero (i.e. we can prove they are
+ // unsigned inputs), turn this into a urem.
+ if (I.getType()->isIntegerTy()) {
+ APInt Mask(APInt::getSignBit(I.getType()->getPrimitiveSizeInBits()));
+ if (MaskedValueIsZero(Op1, Mask, 0, &I) &&
+ MaskedValueIsZero(Op0, Mask, 0, &I)) {
+ // X srem Y -> X urem Y, iff X and Y don't have sign bit set
+ return BinaryOperator::CreateURem(Op0, Op1, I.getName());
+ }
+ }
+
+ // If it's a constant vector, flip any negative values positive.
+ if (isa<ConstantVector>(Op1) || isa<ConstantDataVector>(Op1)) {
+ Constant *C = cast<Constant>(Op1);
+ unsigned VWidth = C->getType()->getVectorNumElements();
+
+ bool hasNegative = false;
+ bool hasMissing = false;
+ for (unsigned i = 0; i != VWidth; ++i) {
+ Constant *Elt = C->getAggregateElement(i);
+ if (!Elt) {
+ hasMissing = true;
+ break;
+ }
+
+ if (ConstantInt *RHS = dyn_cast<ConstantInt>(Elt))
+ if (RHS->isNegative())
+ hasNegative = true;
+ }
+
+ if (hasNegative && !hasMissing) {
+ SmallVector<Constant *, 16> Elts(VWidth);
+ for (unsigned i = 0; i != VWidth; ++i) {
+ Elts[i] = C->getAggregateElement(i); // Handle undef, etc.
+ if (ConstantInt *RHS = dyn_cast<ConstantInt>(Elts[i])) {
+ if (RHS->isNegative())
+ Elts[i] = cast<ConstantInt>(ConstantExpr::getNeg(RHS));
+ }
+ }
+
+ Constant *NewRHSV = ConstantVector::get(Elts);
+ if (NewRHSV != C) { // Don't loop on -MININT
+ Worklist.AddValue(I.getOperand(1));
+ I.setOperand(1, NewRHSV);
+ return &I;
+ }
+ }
+ }
+
+ return nullptr;
+}
+
+Instruction *InstCombiner::visitFRem(BinaryOperator &I) {
+ Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
+
+ if (Value *V = SimplifyVectorOp(I))
+ return replaceInstUsesWith(I, V);
+
+ if (Value *V = SimplifyFRemInst(Op0, Op1, I.getFastMathFlags(),
+ DL, TLI, DT, AC))
+ return replaceInstUsesWith(I, V);
+
+ // Handle cases involving: rem X, (select Cond, Y, Z)
+ if (isa<SelectInst>(Op1) && SimplifyDivRemOfSelect(I))
+ return &I;
+
+ return nullptr;
+}
diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp
new file mode 100644
index 000000000000..79a4912332ff
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp
@@ -0,0 +1,1002 @@
+//===- InstCombinePHI.cpp -------------------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the visitPHINode function.
+//
+//===----------------------------------------------------------------------===//
+
+#include "InstCombineInternal.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/IR/PatternMatch.h"
+#include "llvm/Transforms/Utils/Local.h"
+using namespace llvm;
+using namespace llvm::PatternMatch;
+
+#define DEBUG_TYPE "instcombine"
+
+/// If we have something like phi [add (a,b), add(a,c)] and if a/b/c and the
+/// adds all have a single use, turn this into a phi and a single binop.
+Instruction *InstCombiner::FoldPHIArgBinOpIntoPHI(PHINode &PN) {
+ Instruction *FirstInst = cast<Instruction>(PN.getIncomingValue(0));
+ assert(isa<BinaryOperator>(FirstInst) || isa<CmpInst>(FirstInst));
+ unsigned Opc = FirstInst->getOpcode();
+ Value *LHSVal = FirstInst->getOperand(0);
+ Value *RHSVal = FirstInst->getOperand(1);
+
+ Type *LHSType = LHSVal->getType();
+ Type *RHSType = RHSVal->getType();
+
+ // Scan to see if all operands are the same opcode, and all have one use.
+ for (unsigned i = 1; i != PN.getNumIncomingValues(); ++i) {
+ Instruction *I = dyn_cast<Instruction>(PN.getIncomingValue(i));
+ if (!I || I->getOpcode() != Opc || !I->hasOneUse() ||
+ // Verify type of the LHS matches so we don't fold cmp's of different
+ // types.
+ I->getOperand(0)->getType() != LHSType ||
+ I->getOperand(1)->getType() != RHSType)
+ return nullptr;
+
+ // If they are CmpInst instructions, check their predicates
+ if (CmpInst *CI = dyn_cast<CmpInst>(I))
+ if (CI->getPredicate() != cast<CmpInst>(FirstInst)->getPredicate())
+ return nullptr;
+
+ // Keep track of which operand needs a phi node.
+ if (I->getOperand(0) != LHSVal) LHSVal = nullptr;
+ if (I->getOperand(1) != RHSVal) RHSVal = nullptr;
+ }
+
+ // If both LHS and RHS would need a PHI, don't do this transformation,
+ // because it would increase the number of PHIs entering the block,
+ // which leads to higher register pressure. This is especially
+ // bad when the PHIs are in the header of a loop.
+ if (!LHSVal && !RHSVal)
+ return nullptr;
+
+ // Otherwise, this is safe to transform!
+
+ Value *InLHS = FirstInst->getOperand(0);
+ Value *InRHS = FirstInst->getOperand(1);
+ PHINode *NewLHS = nullptr, *NewRHS = nullptr;
+ if (!LHSVal) {
+ NewLHS = PHINode::Create(LHSType, PN.getNumIncomingValues(),
+ FirstInst->getOperand(0)->getName() + ".pn");
+ NewLHS->addIncoming(InLHS, PN.getIncomingBlock(0));
+ InsertNewInstBefore(NewLHS, PN);
+ LHSVal = NewLHS;
+ }
+
+ if (!RHSVal) {
+ NewRHS = PHINode::Create(RHSType, PN.getNumIncomingValues(),
+ FirstInst->getOperand(1)->getName() + ".pn");
+ NewRHS->addIncoming(InRHS, PN.getIncomingBlock(0));
+ InsertNewInstBefore(NewRHS, PN);
+ RHSVal = NewRHS;
+ }
+
+ // Add all operands to the new PHIs.
+ if (NewLHS || NewRHS) {
+ for (unsigned i = 1, e = PN.getNumIncomingValues(); i != e; ++i) {
+ Instruction *InInst = cast<Instruction>(PN.getIncomingValue(i));
+ if (NewLHS) {
+ Value *NewInLHS = InInst->getOperand(0);
+ NewLHS->addIncoming(NewInLHS, PN.getIncomingBlock(i));
+ }
+ if (NewRHS) {
+ Value *NewInRHS = InInst->getOperand(1);
+ NewRHS->addIncoming(NewInRHS, PN.getIncomingBlock(i));
+ }
+ }
+ }
+
+ if (CmpInst *CIOp = dyn_cast<CmpInst>(FirstInst)) {
+ CmpInst *NewCI = CmpInst::Create(CIOp->getOpcode(), CIOp->getPredicate(),
+ LHSVal, RHSVal);
+ NewCI->setDebugLoc(FirstInst->getDebugLoc());
+ return NewCI;
+ }
+
+ BinaryOperator *BinOp = cast<BinaryOperator>(FirstInst);
+ BinaryOperator *NewBinOp =
+ BinaryOperator::Create(BinOp->getOpcode(), LHSVal, RHSVal);
+
+ NewBinOp->copyIRFlags(PN.getIncomingValue(0));
+
+ for (unsigned i = 1, e = PN.getNumIncomingValues(); i != e; ++i)
+ NewBinOp->andIRFlags(PN.getIncomingValue(i));
+
+ NewBinOp->setDebugLoc(FirstInst->getDebugLoc());
+ return NewBinOp;
+}
+
+Instruction *InstCombiner::FoldPHIArgGEPIntoPHI(PHINode &PN) {
+ GetElementPtrInst *FirstInst =cast<GetElementPtrInst>(PN.getIncomingValue(0));
+
+ SmallVector<Value*, 16> FixedOperands(FirstInst->op_begin(),
+ FirstInst->op_end());
+ // This is true if all GEP bases are allocas and if all indices into them are
+ // constants.
+ bool AllBasePointersAreAllocas = true;
+
+ // We don't want to replace this phi if the replacement would require
+ // more than one phi, which leads to higher register pressure. This is
+ // especially bad when the PHIs are in the header of a loop.
+ bool NeededPhi = false;
+
+ bool AllInBounds = true;
+
+ // Scan to see if all operands are the same opcode, and all have one use.
+ for (unsigned i = 1; i != PN.getNumIncomingValues(); ++i) {
+ GetElementPtrInst *GEP= dyn_cast<GetElementPtrInst>(PN.getIncomingValue(i));
+ if (!GEP || !GEP->hasOneUse() || GEP->getType() != FirstInst->getType() ||
+ GEP->getNumOperands() != FirstInst->getNumOperands())
+ return nullptr;
+
+ AllInBounds &= GEP->isInBounds();
+
+ // Keep track of whether or not all GEPs are of alloca pointers.
+ if (AllBasePointersAreAllocas &&
+ (!isa<AllocaInst>(GEP->getOperand(0)) ||
+ !GEP->hasAllConstantIndices()))
+ AllBasePointersAreAllocas = false;
+
+ // Compare the operand lists.
+ for (unsigned op = 0, e = FirstInst->getNumOperands(); op != e; ++op) {
+ if (FirstInst->getOperand(op) == GEP->getOperand(op))
+ continue;
+
+ // Don't merge two GEPs when two operands differ (introducing phi nodes)
+ // if one of the PHIs has a constant for the index. The index may be
+ // substantially cheaper to compute for the constants, so making it a
+ // variable index could pessimize the path. This also handles the case
+ // for struct indices, which must always be constant.
+ if (isa<ConstantInt>(FirstInst->getOperand(op)) ||
+ isa<ConstantInt>(GEP->getOperand(op)))
+ return nullptr;
+
+ if (FirstInst->getOperand(op)->getType() !=GEP->getOperand(op)->getType())
+ return nullptr;
+
+ // If we already needed a PHI for an earlier operand, and another operand
+ // also requires a PHI, we'd be introducing more PHIs than we're
+ // eliminating, which increases register pressure on entry to the PHI's
+ // block.
+ if (NeededPhi)
+ return nullptr;
+
+ FixedOperands[op] = nullptr; // Needs a PHI.
+ NeededPhi = true;
+ }
+ }
+
+ // If all of the base pointers of the PHI'd GEPs are from allocas, don't
+ // bother doing this transformation. At best, this will just save a bit of
+ // offset calculation, but all the predecessors will have to materialize the
+ // stack address into a register anyway. We'd actually rather *clone* the
+ // load up into the predecessors so that we have a load of a gep of an alloca,
+ // which can usually all be folded into the load.
+ if (AllBasePointersAreAllocas)
+ return nullptr;
+
+ // Otherwise, this is safe to transform. Insert PHI nodes for each operand
+ // that is variable.
+ SmallVector<PHINode*, 16> OperandPhis(FixedOperands.size());
+
+ bool HasAnyPHIs = false;
+ for (unsigned i = 0, e = FixedOperands.size(); i != e; ++i) {
+ if (FixedOperands[i]) continue; // operand doesn't need a phi.
+ Value *FirstOp = FirstInst->getOperand(i);
+ PHINode *NewPN = PHINode::Create(FirstOp->getType(), e,
+ FirstOp->getName()+".pn");
+ InsertNewInstBefore(NewPN, PN);
+
+ NewPN->addIncoming(FirstOp, PN.getIncomingBlock(0));
+ OperandPhis[i] = NewPN;
+ FixedOperands[i] = NewPN;
+ HasAnyPHIs = true;
+ }
+
+
+ // Add all operands to the new PHIs.
+ if (HasAnyPHIs) {
+ for (unsigned i = 1, e = PN.getNumIncomingValues(); i != e; ++i) {
+ GetElementPtrInst *InGEP =cast<GetElementPtrInst>(PN.getIncomingValue(i));
+ BasicBlock *InBB = PN.getIncomingBlock(i);
+
+ for (unsigned op = 0, e = OperandPhis.size(); op != e; ++op)
+ if (PHINode *OpPhi = OperandPhis[op])
+ OpPhi->addIncoming(InGEP->getOperand(op), InBB);
+ }
+ }
+
+ Value *Base = FixedOperands[0];
+ GetElementPtrInst *NewGEP =
+ GetElementPtrInst::Create(FirstInst->getSourceElementType(), Base,
+ makeArrayRef(FixedOperands).slice(1));
+ if (AllInBounds) NewGEP->setIsInBounds();
+ NewGEP->setDebugLoc(FirstInst->getDebugLoc());
+ return NewGEP;
+}
+
+
+/// Return true if we know that it is safe to sink the load out of the block
+/// that defines it. This means that it must be obvious the value of the load is
+/// not changed from the point of the load to the end of the block it is in.
+///
+/// Finally, it is safe, but not profitable, to sink a load targeting a
+/// non-address-taken alloca. Doing so will cause us to not promote the alloca
+/// to a register.
+static bool isSafeAndProfitableToSinkLoad(LoadInst *L) {
+ BasicBlock::iterator BBI = L->getIterator(), E = L->getParent()->end();
+
+ for (++BBI; BBI != E; ++BBI)
+ if (BBI->mayWriteToMemory())
+ return false;
+
+ // Check for non-address taken alloca. If not address-taken already, it isn't
+ // profitable to do this xform.
+ if (AllocaInst *AI = dyn_cast<AllocaInst>(L->getOperand(0))) {
+ bool isAddressTaken = false;
+ for (User *U : AI->users()) {
+ if (isa<LoadInst>(U)) continue;
+ if (StoreInst *SI = dyn_cast<StoreInst>(U)) {
+ // If storing TO the alloca, then the address isn't taken.
+ if (SI->getOperand(1) == AI) continue;
+ }
+ isAddressTaken = true;
+ break;
+ }
+
+ if (!isAddressTaken && AI->isStaticAlloca())
+ return false;
+ }
+
+ // If this load is a load from a GEP with a constant offset from an alloca,
+ // then we don't want to sink it. In its present form, it will be
+ // load [constant stack offset]. Sinking it will cause us to have to
+ // materialize the stack addresses in each predecessor in a register only to
+ // do a shared load from register in the successor.
+ if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(L->getOperand(0)))
+ if (AllocaInst *AI = dyn_cast<AllocaInst>(GEP->getOperand(0)))
+ if (AI->isStaticAlloca() && GEP->hasAllConstantIndices())
+ return false;
+
+ return true;
+}
+
+Instruction *InstCombiner::FoldPHIArgLoadIntoPHI(PHINode &PN) {
+ LoadInst *FirstLI = cast<LoadInst>(PN.getIncomingValue(0));
+
+ // FIXME: This is overconservative; this transform is allowed in some cases
+ // for atomic operations.
+ if (FirstLI->isAtomic())
+ return nullptr;
+
+ // When processing loads, we need to propagate two bits of information to the
+ // sunk load: whether it is volatile, and what its alignment is. We currently
+ // don't sink loads when some have their alignment specified and some don't.
+ // visitLoadInst will propagate an alignment onto the load when TD is around,
+ // and if TD isn't around, we can't handle the mixed case.
+ bool isVolatile = FirstLI->isVolatile();
+ unsigned LoadAlignment = FirstLI->getAlignment();
+ unsigned LoadAddrSpace = FirstLI->getPointerAddressSpace();
+
+ // We can't sink the load if the loaded value could be modified between the
+ // load and the PHI.
+ if (FirstLI->getParent() != PN.getIncomingBlock(0) ||
+ !isSafeAndProfitableToSinkLoad(FirstLI))
+ return nullptr;
+
+ // If the PHI is of volatile loads and the load block has multiple
+ // successors, sinking it would remove a load of the volatile value from
+ // the path through the other successor.
+ if (isVolatile &&
+ FirstLI->getParent()->getTerminator()->getNumSuccessors() != 1)
+ return nullptr;
+
+ // Check to see if all arguments are the same operation.
+ for (unsigned i = 1, e = PN.getNumIncomingValues(); i != e; ++i) {
+ LoadInst *LI = dyn_cast<LoadInst>(PN.getIncomingValue(i));
+ if (!LI || !LI->hasOneUse())
+ return nullptr;
+
+ // We can't sink the load if the loaded value could be modified between
+ // the load and the PHI.
+ if (LI->isVolatile() != isVolatile ||
+ LI->getParent() != PN.getIncomingBlock(i) ||
+ LI->getPointerAddressSpace() != LoadAddrSpace ||
+ !isSafeAndProfitableToSinkLoad(LI))
+ return nullptr;
+
+ // If some of the loads have an alignment specified but not all of them,
+ // we can't do the transformation.
+ if ((LoadAlignment != 0) != (LI->getAlignment() != 0))
+ return nullptr;
+
+ LoadAlignment = std::min(LoadAlignment, LI->getAlignment());
+
+ // If the PHI is of volatile loads and the load block has multiple
+ // successors, sinking it would remove a load of the volatile value from
+ // the path through the other successor.
+ if (isVolatile &&
+ LI->getParent()->getTerminator()->getNumSuccessors() != 1)
+ return nullptr;
+ }
+
+ // Okay, they are all the same operation. Create a new PHI node of the
+ // correct type, and PHI together all of the LHS's of the instructions.
+ PHINode *NewPN = PHINode::Create(FirstLI->getOperand(0)->getType(),
+ PN.getNumIncomingValues(),
+ PN.getName()+".in");
+
+ Value *InVal = FirstLI->getOperand(0);
+ NewPN->addIncoming(InVal, PN.getIncomingBlock(0));
+ LoadInst *NewLI = new LoadInst(NewPN, "", isVolatile, LoadAlignment);
+
+ unsigned KnownIDs[] = {
+ LLVMContext::MD_tbaa,
+ LLVMContext::MD_range,
+ LLVMContext::MD_invariant_load,
+ LLVMContext::MD_alias_scope,
+ LLVMContext::MD_noalias,
+ LLVMContext::MD_nonnull,
+ LLVMContext::MD_align,
+ LLVMContext::MD_dereferenceable,
+ LLVMContext::MD_dereferenceable_or_null,
+ };
+
+ for (unsigned ID : KnownIDs)
+ NewLI->setMetadata(ID, FirstLI->getMetadata(ID));
+
+ // Add all operands to the new PHI and combine TBAA metadata.
+ for (unsigned i = 1, e = PN.getNumIncomingValues(); i != e; ++i) {
+ LoadInst *LI = cast<LoadInst>(PN.getIncomingValue(i));
+ combineMetadata(NewLI, LI, KnownIDs);
+ Value *NewInVal = LI->getOperand(0);
+ if (NewInVal != InVal)
+ InVal = nullptr;
+ NewPN->addIncoming(NewInVal, PN.getIncomingBlock(i));
+ }
+
+ if (InVal) {
+ // The new PHI unions all of the same values together. This is really
+ // common, so we handle it intelligently here for compile-time speed.
+ NewLI->setOperand(0, InVal);
+ delete NewPN;
+ } else {
+ InsertNewInstBefore(NewPN, PN);
+ }
+
+ // If this was a volatile load that we are merging, make sure to loop through
+ // and mark all the input loads as non-volatile. If we don't do this, we will
+ // insert a new volatile load and the old ones will not be deletable.
+ if (isVolatile)
+ for (Value *IncValue : PN.incoming_values())
+ cast<LoadInst>(IncValue)->setVolatile(false);
+
+ NewLI->setDebugLoc(FirstLI->getDebugLoc());
+ return NewLI;
+}
+
+/// TODO: This function could handle other cast types, but then it might
+/// require special-casing a cast from the 'i1' type. See the comment in
+/// FoldPHIArgOpIntoPHI() about pessimizing illegal integer types.
+Instruction *InstCombiner::FoldPHIArgZextsIntoPHI(PHINode &Phi) {
+ // We cannot create a new instruction after the PHI if the terminator is an
+ // EHPad because there is no valid insertion point.
+ if (TerminatorInst *TI = Phi.getParent()->getTerminator())
+ if (TI->isEHPad())
+ return nullptr;
+
+ // Early exit for the common case of a phi with two operands. These are
+ // handled elsewhere. See the comment below where we check the count of zexts
+ // and constants for more details.
+ unsigned NumIncomingValues = Phi.getNumIncomingValues();
+ if (NumIncomingValues < 3)
+ return nullptr;
+
+ // Find the narrower type specified by the first zext.
+ Type *NarrowType = nullptr;
+ for (Value *V : Phi.incoming_values()) {
+ if (auto *Zext = dyn_cast<ZExtInst>(V)) {
+ NarrowType = Zext->getSrcTy();
+ break;
+ }
+ }
+ if (!NarrowType)
+ return nullptr;
+
+ // Walk the phi operands checking that we only have zexts or constants that
+ // we can shrink for free. Store the new operands for the new phi.
+ SmallVector<Value *, 4> NewIncoming;
+ unsigned NumZexts = 0;
+ unsigned NumConsts = 0;
+ for (Value *V : Phi.incoming_values()) {
+ if (auto *Zext = dyn_cast<ZExtInst>(V)) {
+ // All zexts must be identical and have one use.
+ if (Zext->getSrcTy() != NarrowType || !Zext->hasOneUse())
+ return nullptr;
+ NewIncoming.push_back(Zext->getOperand(0));
+ NumZexts++;
+ } else if (auto *C = dyn_cast<Constant>(V)) {
+ // Make sure that constants can fit in the new type.
+ Constant *Trunc = ConstantExpr::getTrunc(C, NarrowType);
+ if (ConstantExpr::getZExt(Trunc, C->getType()) != C)
+ return nullptr;
+ NewIncoming.push_back(Trunc);
+ NumConsts++;
+ } else {
+ // If it's not a cast or a constant, bail out.
+ return nullptr;
+ }
+ }
+
+ // The more common cases of a phi with no constant operands or just one
+ // variable operand are handled by FoldPHIArgOpIntoPHI() and FoldOpIntoPhi()
+ // respectively. FoldOpIntoPhi() wants to do the opposite transform that is
+ // performed here. It tries to replicate a cast in the phi operand's basic
+ // block to expose other folding opportunities. Thus, InstCombine will
+ // infinite loop without this check.
+ if (NumConsts == 0 || NumZexts < 2)
+ return nullptr;
+
+ // All incoming values are zexts or constants that are safe to truncate.
+ // Create a new phi node of the narrow type, phi together all of the new
+ // operands, and zext the result back to the original type.
+ PHINode *NewPhi = PHINode::Create(NarrowType, NumIncomingValues,
+ Phi.getName() + ".shrunk");
+ for (unsigned i = 0; i != NumIncomingValues; ++i)
+ NewPhi->addIncoming(NewIncoming[i], Phi.getIncomingBlock(i));
+
+ InsertNewInstBefore(NewPhi, Phi);
+ return CastInst::CreateZExtOrBitCast(NewPhi, Phi.getType());
+}
+
+/// If all operands to a PHI node are the same "unary" operator and they all are
+/// only used by the PHI, PHI together their inputs, and do the operation once,
+/// to the result of the PHI.
+Instruction *InstCombiner::FoldPHIArgOpIntoPHI(PHINode &PN) {
+ // We cannot create a new instruction after the PHI if the terminator is an
+ // EHPad because there is no valid insertion point.
+ if (TerminatorInst *TI = PN.getParent()->getTerminator())
+ if (TI->isEHPad())
+ return nullptr;
+
+ Instruction *FirstInst = cast<Instruction>(PN.getIncomingValue(0));
+
+ if (isa<GetElementPtrInst>(FirstInst))
+ return FoldPHIArgGEPIntoPHI(PN);
+ if (isa<LoadInst>(FirstInst))
+ return FoldPHIArgLoadIntoPHI(PN);
+
+ // Scan the instruction, looking for input operations that can be folded away.
+ // If all input operands to the phi are the same instruction (e.g. a cast from
+ // the same type or "+42") we can pull the operation through the PHI, reducing
+ // code size and simplifying code.
+ Constant *ConstantOp = nullptr;
+ Type *CastSrcTy = nullptr;
+
+ if (isa<CastInst>(FirstInst)) {
+ CastSrcTy = FirstInst->getOperand(0)->getType();
+
+ // Be careful about transforming integer PHIs. We don't want to pessimize
+ // the code by turning an i32 into an i1293.
+ if (PN.getType()->isIntegerTy() && CastSrcTy->isIntegerTy()) {
+ if (!ShouldChangeType(PN.getType(), CastSrcTy))
+ return nullptr;
+ }
+ } else if (isa<BinaryOperator>(FirstInst) || isa<CmpInst>(FirstInst)) {
+ // Can fold binop, compare or shift here if the RHS is a constant,
+ // otherwise call FoldPHIArgBinOpIntoPHI.
+ ConstantOp = dyn_cast<Constant>(FirstInst->getOperand(1));
+ if (!ConstantOp)
+ return FoldPHIArgBinOpIntoPHI(PN);
+ } else {
+ return nullptr; // Cannot fold this operation.
+ }
+
+ // Check to see if all arguments are the same operation.
+ for (unsigned i = 1, e = PN.getNumIncomingValues(); i != e; ++i) {
+ Instruction *I = dyn_cast<Instruction>(PN.getIncomingValue(i));
+ if (!I || !I->hasOneUse() || !I->isSameOperationAs(FirstInst))
+ return nullptr;
+ if (CastSrcTy) {
+ if (I->getOperand(0)->getType() != CastSrcTy)
+ return nullptr; // Cast operation must match.
+ } else if (I->getOperand(1) != ConstantOp) {
+ return nullptr;
+ }
+ }
+
+ // Okay, they are all the same operation. Create a new PHI node of the
+ // correct type, and PHI together all of the LHS's of the instructions.
+ PHINode *NewPN = PHINode::Create(FirstInst->getOperand(0)->getType(),
+ PN.getNumIncomingValues(),
+ PN.getName()+".in");
+
+ Value *InVal = FirstInst->getOperand(0);
+ NewPN->addIncoming(InVal, PN.getIncomingBlock(0));
+
+ // Add all operands to the new PHI.
+ for (unsigned i = 1, e = PN.getNumIncomingValues(); i != e; ++i) {
+ Value *NewInVal = cast<Instruction>(PN.getIncomingValue(i))->getOperand(0);
+ if (NewInVal != InVal)
+ InVal = nullptr;
+ NewPN->addIncoming(NewInVal, PN.getIncomingBlock(i));
+ }
+
+ Value *PhiVal;
+ if (InVal) {
+ // The new PHI unions all of the same values together. This is really
+ // common, so we handle it intelligently here for compile-time speed.
+ PhiVal = InVal;
+ delete NewPN;
+ } else {
+ InsertNewInstBefore(NewPN, PN);
+ PhiVal = NewPN;
+ }
+
+ // Insert and return the new operation.
+ if (CastInst *FirstCI = dyn_cast<CastInst>(FirstInst)) {
+ CastInst *NewCI = CastInst::Create(FirstCI->getOpcode(), PhiVal,
+ PN.getType());
+ NewCI->setDebugLoc(FirstInst->getDebugLoc());
+ return NewCI;
+ }
+
+ if (BinaryOperator *BinOp = dyn_cast<BinaryOperator>(FirstInst)) {
+ BinOp = BinaryOperator::Create(BinOp->getOpcode(), PhiVal, ConstantOp);
+ BinOp->copyIRFlags(PN.getIncomingValue(0));
+
+ for (unsigned i = 1, e = PN.getNumIncomingValues(); i != e; ++i)
+ BinOp->andIRFlags(PN.getIncomingValue(i));
+
+ BinOp->setDebugLoc(FirstInst->getDebugLoc());
+ return BinOp;
+ }
+
+ CmpInst *CIOp = cast<CmpInst>(FirstInst);
+ CmpInst *NewCI = CmpInst::Create(CIOp->getOpcode(), CIOp->getPredicate(),
+ PhiVal, ConstantOp);
+ NewCI->setDebugLoc(FirstInst->getDebugLoc());
+ return NewCI;
+}
+
+/// Return true if this PHI node is only used by a PHI node cycle that is dead.
+static bool DeadPHICycle(PHINode *PN,
+ SmallPtrSetImpl<PHINode*> &PotentiallyDeadPHIs) {
+ if (PN->use_empty()) return true;
+ if (!PN->hasOneUse()) return false;
+
+ // Remember this node, and if we find the cycle, return.
+ if (!PotentiallyDeadPHIs.insert(PN).second)
+ return true;
+
+ // Don't scan crazily complex things.
+ if (PotentiallyDeadPHIs.size() == 16)
+ return false;
+
+ if (PHINode *PU = dyn_cast<PHINode>(PN->user_back()))
+ return DeadPHICycle(PU, PotentiallyDeadPHIs);
+
+ return false;
+}
+
+/// Return true if this phi node is always equal to NonPhiInVal.
+/// This happens with mutually cyclic phi nodes like:
+/// z = some value; x = phi (y, z); y = phi (x, z)
+static bool PHIsEqualValue(PHINode *PN, Value *NonPhiInVal,
+ SmallPtrSetImpl<PHINode*> &ValueEqualPHIs) {
+ // See if we already saw this PHI node.
+ if (!ValueEqualPHIs.insert(PN).second)
+ return true;
+
+ // Don't scan crazily complex things.
+ if (ValueEqualPHIs.size() == 16)
+ return false;
+
+ // Scan the operands to see if they are either phi nodes or are equal to
+ // the value.
+ for (Value *Op : PN->incoming_values()) {
+ if (PHINode *OpPN = dyn_cast<PHINode>(Op)) {
+ if (!PHIsEqualValue(OpPN, NonPhiInVal, ValueEqualPHIs))
+ return false;
+ } else if (Op != NonPhiInVal)
+ return false;
+ }
+
+ return true;
+}
+
+/// Return an existing non-zero constant if this phi node has one, otherwise
+/// return constant 1.
+static ConstantInt *GetAnyNonZeroConstInt(PHINode &PN) {
+ assert(isa<IntegerType>(PN.getType()) && "Expect only intger type phi");
+ for (Value *V : PN.operands())
+ if (auto *ConstVA = dyn_cast<ConstantInt>(V))
+ if (!ConstVA->isZeroValue())
+ return ConstVA;
+ return ConstantInt::get(cast<IntegerType>(PN.getType()), 1);
+}
+
+namespace {
+struct PHIUsageRecord {
+ unsigned PHIId; // The ID # of the PHI (something determinstic to sort on)
+ unsigned Shift; // The amount shifted.
+ Instruction *Inst; // The trunc instruction.
+
+ PHIUsageRecord(unsigned pn, unsigned Sh, Instruction *User)
+ : PHIId(pn), Shift(Sh), Inst(User) {}
+
+ bool operator<(const PHIUsageRecord &RHS) const {
+ if (PHIId < RHS.PHIId) return true;
+ if (PHIId > RHS.PHIId) return false;
+ if (Shift < RHS.Shift) return true;
+ if (Shift > RHS.Shift) return false;
+ return Inst->getType()->getPrimitiveSizeInBits() <
+ RHS.Inst->getType()->getPrimitiveSizeInBits();
+ }
+};
+
+struct LoweredPHIRecord {
+ PHINode *PN; // The PHI that was lowered.
+ unsigned Shift; // The amount shifted.
+ unsigned Width; // The width extracted.
+
+ LoweredPHIRecord(PHINode *pn, unsigned Sh, Type *Ty)
+ : PN(pn), Shift(Sh), Width(Ty->getPrimitiveSizeInBits()) {}
+
+ // Ctor form used by DenseMap.
+ LoweredPHIRecord(PHINode *pn, unsigned Sh)
+ : PN(pn), Shift(Sh), Width(0) {}
+};
+}
+
+namespace llvm {
+ template<>
+ struct DenseMapInfo<LoweredPHIRecord> {
+ static inline LoweredPHIRecord getEmptyKey() {
+ return LoweredPHIRecord(nullptr, 0);
+ }
+ static inline LoweredPHIRecord getTombstoneKey() {
+ return LoweredPHIRecord(nullptr, 1);
+ }
+ static unsigned getHashValue(const LoweredPHIRecord &Val) {
+ return DenseMapInfo<PHINode*>::getHashValue(Val.PN) ^ (Val.Shift>>3) ^
+ (Val.Width>>3);
+ }
+ static bool isEqual(const LoweredPHIRecord &LHS,
+ const LoweredPHIRecord &RHS) {
+ return LHS.PN == RHS.PN && LHS.Shift == RHS.Shift &&
+ LHS.Width == RHS.Width;
+ }
+ };
+}
+
+
+/// This is an integer PHI and we know that it has an illegal type: see if it is
+/// only used by trunc or trunc(lshr) operations. If so, we split the PHI into
+/// the various pieces being extracted. This sort of thing is introduced when
+/// SROA promotes an aggregate to large integer values.
+///
+/// TODO: The user of the trunc may be an bitcast to float/double/vector or an
+/// inttoptr. We should produce new PHIs in the right type.
+///
+Instruction *InstCombiner::SliceUpIllegalIntegerPHI(PHINode &FirstPhi) {
+ // PHIUsers - Keep track of all of the truncated values extracted from a set
+ // of PHIs, along with their offset. These are the things we want to rewrite.
+ SmallVector<PHIUsageRecord, 16> PHIUsers;
+
+ // PHIs are often mutually cyclic, so we keep track of a whole set of PHI
+ // nodes which are extracted from. PHIsToSlice is a set we use to avoid
+ // revisiting PHIs, PHIsInspected is a ordered list of PHIs that we need to
+ // check the uses of (to ensure they are all extracts).
+ SmallVector<PHINode*, 8> PHIsToSlice;
+ SmallPtrSet<PHINode*, 8> PHIsInspected;
+
+ PHIsToSlice.push_back(&FirstPhi);
+ PHIsInspected.insert(&FirstPhi);
+
+ for (unsigned PHIId = 0; PHIId != PHIsToSlice.size(); ++PHIId) {
+ PHINode *PN = PHIsToSlice[PHIId];
+
+ // Scan the input list of the PHI. If any input is an invoke, and if the
+ // input is defined in the predecessor, then we won't be split the critical
+ // edge which is required to insert a truncate. Because of this, we have to
+ // bail out.
+ for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
+ InvokeInst *II = dyn_cast<InvokeInst>(PN->getIncomingValue(i));
+ if (!II) continue;
+ if (II->getParent() != PN->getIncomingBlock(i))
+ continue;
+
+ // If we have a phi, and if it's directly in the predecessor, then we have
+ // a critical edge where we need to put the truncate. Since we can't
+ // split the edge in instcombine, we have to bail out.
+ return nullptr;
+ }
+
+ for (User *U : PN->users()) {
+ Instruction *UserI = cast<Instruction>(U);
+
+ // If the user is a PHI, inspect its uses recursively.
+ if (PHINode *UserPN = dyn_cast<PHINode>(UserI)) {
+ if (PHIsInspected.insert(UserPN).second)
+ PHIsToSlice.push_back(UserPN);
+ continue;
+ }
+
+ // Truncates are always ok.
+ if (isa<TruncInst>(UserI)) {
+ PHIUsers.push_back(PHIUsageRecord(PHIId, 0, UserI));
+ continue;
+ }
+
+ // Otherwise it must be a lshr which can only be used by one trunc.
+ if (UserI->getOpcode() != Instruction::LShr ||
+ !UserI->hasOneUse() || !isa<TruncInst>(UserI->user_back()) ||
+ !isa<ConstantInt>(UserI->getOperand(1)))
+ return nullptr;
+
+ unsigned Shift = cast<ConstantInt>(UserI->getOperand(1))->getZExtValue();
+ PHIUsers.push_back(PHIUsageRecord(PHIId, Shift, UserI->user_back()));
+ }
+ }
+
+ // If we have no users, they must be all self uses, just nuke the PHI.
+ if (PHIUsers.empty())
+ return replaceInstUsesWith(FirstPhi, UndefValue::get(FirstPhi.getType()));
+
+ // If this phi node is transformable, create new PHIs for all the pieces
+ // extracted out of it. First, sort the users by their offset and size.
+ array_pod_sort(PHIUsers.begin(), PHIUsers.end());
+
+ DEBUG(dbgs() << "SLICING UP PHI: " << FirstPhi << '\n';
+ for (unsigned i = 1, e = PHIsToSlice.size(); i != e; ++i)
+ dbgs() << "AND USER PHI #" << i << ": " << *PHIsToSlice[i] << '\n';
+ );
+
+ // PredValues - This is a temporary used when rewriting PHI nodes. It is
+ // hoisted out here to avoid construction/destruction thrashing.
+ DenseMap<BasicBlock*, Value*> PredValues;
+
+ // ExtractedVals - Each new PHI we introduce is saved here so we don't
+ // introduce redundant PHIs.
+ DenseMap<LoweredPHIRecord, PHINode*> ExtractedVals;
+
+ for (unsigned UserI = 0, UserE = PHIUsers.size(); UserI != UserE; ++UserI) {
+ unsigned PHIId = PHIUsers[UserI].PHIId;
+ PHINode *PN = PHIsToSlice[PHIId];
+ unsigned Offset = PHIUsers[UserI].Shift;
+ Type *Ty = PHIUsers[UserI].Inst->getType();
+
+ PHINode *EltPHI;
+
+ // If we've already lowered a user like this, reuse the previously lowered
+ // value.
+ if ((EltPHI = ExtractedVals[LoweredPHIRecord(PN, Offset, Ty)]) == nullptr) {
+
+ // Otherwise, Create the new PHI node for this user.
+ EltPHI = PHINode::Create(Ty, PN->getNumIncomingValues(),
+ PN->getName()+".off"+Twine(Offset), PN);
+ assert(EltPHI->getType() != PN->getType() &&
+ "Truncate didn't shrink phi?");
+
+ for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
+ BasicBlock *Pred = PN->getIncomingBlock(i);
+ Value *&PredVal = PredValues[Pred];
+
+ // If we already have a value for this predecessor, reuse it.
+ if (PredVal) {
+ EltPHI->addIncoming(PredVal, Pred);
+ continue;
+ }
+
+ // Handle the PHI self-reuse case.
+ Value *InVal = PN->getIncomingValue(i);
+ if (InVal == PN) {
+ PredVal = EltPHI;
+ EltPHI->addIncoming(PredVal, Pred);
+ continue;
+ }
+
+ if (PHINode *InPHI = dyn_cast<PHINode>(PN)) {
+ // If the incoming value was a PHI, and if it was one of the PHIs we
+ // already rewrote it, just use the lowered value.
+ if (Value *Res = ExtractedVals[LoweredPHIRecord(InPHI, Offset, Ty)]) {
+ PredVal = Res;
+ EltPHI->addIncoming(PredVal, Pred);
+ continue;
+ }
+ }
+
+ // Otherwise, do an extract in the predecessor.
+ Builder->SetInsertPoint(Pred->getTerminator());
+ Value *Res = InVal;
+ if (Offset)
+ Res = Builder->CreateLShr(Res, ConstantInt::get(InVal->getType(),
+ Offset), "extract");
+ Res = Builder->CreateTrunc(Res, Ty, "extract.t");
+ PredVal = Res;
+ EltPHI->addIncoming(Res, Pred);
+
+ // If the incoming value was a PHI, and if it was one of the PHIs we are
+ // rewriting, we will ultimately delete the code we inserted. This
+ // means we need to revisit that PHI to make sure we extract out the
+ // needed piece.
+ if (PHINode *OldInVal = dyn_cast<PHINode>(PN->getIncomingValue(i)))
+ if (PHIsInspected.count(OldInVal)) {
+ unsigned RefPHIId = std::find(PHIsToSlice.begin(),PHIsToSlice.end(),
+ OldInVal)-PHIsToSlice.begin();
+ PHIUsers.push_back(PHIUsageRecord(RefPHIId, Offset,
+ cast<Instruction>(Res)));
+ ++UserE;
+ }
+ }
+ PredValues.clear();
+
+ DEBUG(dbgs() << " Made element PHI for offset " << Offset << ": "
+ << *EltPHI << '\n');
+ ExtractedVals[LoweredPHIRecord(PN, Offset, Ty)] = EltPHI;
+ }
+
+ // Replace the use of this piece with the PHI node.
+ replaceInstUsesWith(*PHIUsers[UserI].Inst, EltPHI);
+ }
+
+ // Replace all the remaining uses of the PHI nodes (self uses and the lshrs)
+ // with undefs.
+ Value *Undef = UndefValue::get(FirstPhi.getType());
+ for (unsigned i = 1, e = PHIsToSlice.size(); i != e; ++i)
+ replaceInstUsesWith(*PHIsToSlice[i], Undef);
+ return replaceInstUsesWith(FirstPhi, Undef);
+}
+
+// PHINode simplification
+//
+Instruction *InstCombiner::visitPHINode(PHINode &PN) {
+ if (Value *V = SimplifyInstruction(&PN, DL, TLI, DT, AC))
+ return replaceInstUsesWith(PN, V);
+
+ if (Instruction *Result = FoldPHIArgZextsIntoPHI(PN))
+ return Result;
+
+ // If all PHI operands are the same operation, pull them through the PHI,
+ // reducing code size.
+ if (isa<Instruction>(PN.getIncomingValue(0)) &&
+ isa<Instruction>(PN.getIncomingValue(1)) &&
+ cast<Instruction>(PN.getIncomingValue(0))->getOpcode() ==
+ cast<Instruction>(PN.getIncomingValue(1))->getOpcode() &&
+ // FIXME: The hasOneUse check will fail for PHIs that use the value more
+ // than themselves more than once.
+ PN.getIncomingValue(0)->hasOneUse())
+ if (Instruction *Result = FoldPHIArgOpIntoPHI(PN))
+ return Result;
+
+ // If this is a trivial cycle in the PHI node graph, remove it. Basically, if
+ // this PHI only has a single use (a PHI), and if that PHI only has one use (a
+ // PHI)... break the cycle.
+ if (PN.hasOneUse()) {
+ Instruction *PHIUser = cast<Instruction>(PN.user_back());
+ if (PHINode *PU = dyn_cast<PHINode>(PHIUser)) {
+ SmallPtrSet<PHINode*, 16> PotentiallyDeadPHIs;
+ PotentiallyDeadPHIs.insert(&PN);
+ if (DeadPHICycle(PU, PotentiallyDeadPHIs))
+ return replaceInstUsesWith(PN, UndefValue::get(PN.getType()));
+ }
+
+ // If this phi has a single use, and if that use just computes a value for
+ // the next iteration of a loop, delete the phi. This occurs with unused
+ // induction variables, e.g. "for (int j = 0; ; ++j);". Detecting this
+ // common case here is good because the only other things that catch this
+ // are induction variable analysis (sometimes) and ADCE, which is only run
+ // late.
+ if (PHIUser->hasOneUse() &&
+ (isa<BinaryOperator>(PHIUser) || isa<GetElementPtrInst>(PHIUser)) &&
+ PHIUser->user_back() == &PN) {
+ return replaceInstUsesWith(PN, UndefValue::get(PN.getType()));
+ }
+ // When a PHI is used only to be compared with zero, it is safe to replace
+ // an incoming value proved as known nonzero with any non-zero constant.
+ // For example, in the code below, the incoming value %v can be replaced
+ // with any non-zero constant based on the fact that the PHI is only used to
+ // be compared with zero and %v is a known non-zero value:
+ // %v = select %cond, 1, 2
+ // %p = phi [%v, BB] ...
+ // icmp eq, %p, 0
+ auto *CmpInst = dyn_cast<ICmpInst>(PHIUser);
+ // FIXME: To be simple, handle only integer type for now.
+ if (CmpInst && isa<IntegerType>(PN.getType()) && CmpInst->isEquality() &&
+ match(CmpInst->getOperand(1), m_Zero())) {
+ ConstantInt *NonZeroConst = nullptr;
+ for (unsigned i = 0, e = PN.getNumIncomingValues(); i != e; ++i) {
+ Instruction *CtxI = PN.getIncomingBlock(i)->getTerminator();
+ Value *VA = PN.getIncomingValue(i);
+ if (isKnownNonZero(VA, DL, 0, AC, CtxI, DT)) {
+ if (!NonZeroConst)
+ NonZeroConst = GetAnyNonZeroConstInt(PN);
+ PN.setIncomingValue(i, NonZeroConst);
+ }
+ }
+ }
+ }
+
+ // We sometimes end up with phi cycles that non-obviously end up being the
+ // same value, for example:
+ // z = some value; x = phi (y, z); y = phi (x, z)
+ // where the phi nodes don't necessarily need to be in the same block. Do a
+ // quick check to see if the PHI node only contains a single non-phi value, if
+ // so, scan to see if the phi cycle is actually equal to that value.
+ {
+ unsigned InValNo = 0, NumIncomingVals = PN.getNumIncomingValues();
+ // Scan for the first non-phi operand.
+ while (InValNo != NumIncomingVals &&
+ isa<PHINode>(PN.getIncomingValue(InValNo)))
+ ++InValNo;
+
+ if (InValNo != NumIncomingVals) {
+ Value *NonPhiInVal = PN.getIncomingValue(InValNo);
+
+ // Scan the rest of the operands to see if there are any conflicts, if so
+ // there is no need to recursively scan other phis.
+ for (++InValNo; InValNo != NumIncomingVals; ++InValNo) {
+ Value *OpVal = PN.getIncomingValue(InValNo);
+ if (OpVal != NonPhiInVal && !isa<PHINode>(OpVal))
+ break;
+ }
+
+ // If we scanned over all operands, then we have one unique value plus
+ // phi values. Scan PHI nodes to see if they all merge in each other or
+ // the value.
+ if (InValNo == NumIncomingVals) {
+ SmallPtrSet<PHINode*, 16> ValueEqualPHIs;
+ if (PHIsEqualValue(&PN, NonPhiInVal, ValueEqualPHIs))
+ return replaceInstUsesWith(PN, NonPhiInVal);
+ }
+ }
+ }
+
+ // If there are multiple PHIs, sort their operands so that they all list
+ // the blocks in the same order. This will help identical PHIs be eliminated
+ // by other passes. Other passes shouldn't depend on this for correctness
+ // however.
+ PHINode *FirstPN = cast<PHINode>(PN.getParent()->begin());
+ if (&PN != FirstPN)
+ for (unsigned i = 0, e = FirstPN->getNumIncomingValues(); i != e; ++i) {
+ BasicBlock *BBA = PN.getIncomingBlock(i);
+ BasicBlock *BBB = FirstPN->getIncomingBlock(i);
+ if (BBA != BBB) {
+ Value *VA = PN.getIncomingValue(i);
+ unsigned j = PN.getBasicBlockIndex(BBB);
+ Value *VB = PN.getIncomingValue(j);
+ PN.setIncomingBlock(i, BBB);
+ PN.setIncomingValue(i, VB);
+ PN.setIncomingBlock(j, BBA);
+ PN.setIncomingValue(j, VA);
+ // NOTE: Instcombine normally would want us to "return &PN" if we
+ // modified any of the operands of an instruction. However, since we
+ // aren't adding or removing uses (just rearranging them) we don't do
+ // this in this case.
+ }
+ }
+
+ // If this is an integer PHI and we know that it has an illegal type, see if
+ // it is only used by trunc or trunc(lshr) operations. If so, we split the
+ // PHI into the various pieces being extracted. This sort of thing is
+ // introduced when SROA promotes an aggregate to a single large integer type.
+ if (PN.getType()->isIntegerTy() &&
+ !DL.isLegalInteger(PN.getType()->getPrimitiveSizeInBits()))
+ if (Instruction *Res = SliceUpIllegalIntegerPHI(PN))
+ return Res;
+
+ return nullptr;
+}
diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
new file mode 100644
index 000000000000..8f1ff8ac0e66
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
@@ -0,0 +1,1270 @@
+//===- InstCombineSelect.cpp ----------------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the visitSelect function.
+//
+//===----------------------------------------------------------------------===//
+
+#include "InstCombineInternal.h"
+#include "llvm/Analysis/ConstantFolding.h"
+#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/IR/PatternMatch.h"
+using namespace llvm;
+using namespace PatternMatch;
+
+#define DEBUG_TYPE "instcombine"
+
+static SelectPatternFlavor
+getInverseMinMaxSelectPattern(SelectPatternFlavor SPF) {
+ switch (SPF) {
+ default:
+ llvm_unreachable("unhandled!");
+
+ case SPF_SMIN:
+ return SPF_SMAX;
+ case SPF_UMIN:
+ return SPF_UMAX;
+ case SPF_SMAX:
+ return SPF_SMIN;
+ case SPF_UMAX:
+ return SPF_UMIN;
+ }
+}
+
+static CmpInst::Predicate getCmpPredicateForMinMax(SelectPatternFlavor SPF,
+ bool Ordered=false) {
+ switch (SPF) {
+ default:
+ llvm_unreachable("unhandled!");
+
+ case SPF_SMIN:
+ return ICmpInst::ICMP_SLT;
+ case SPF_UMIN:
+ return ICmpInst::ICMP_ULT;
+ case SPF_SMAX:
+ return ICmpInst::ICMP_SGT;
+ case SPF_UMAX:
+ return ICmpInst::ICMP_UGT;
+ case SPF_FMINNUM:
+ return Ordered ? FCmpInst::FCMP_OLT : FCmpInst::FCMP_ULT;
+ case SPF_FMAXNUM:
+ return Ordered ? FCmpInst::FCMP_OGT : FCmpInst::FCMP_UGT;
+ }
+}
+
+static Value *generateMinMaxSelectPattern(InstCombiner::BuilderTy *Builder,
+ SelectPatternFlavor SPF, Value *A,
+ Value *B) {
+ CmpInst::Predicate Pred = getCmpPredicateForMinMax(SPF);
+ assert(CmpInst::isIntPredicate(Pred));
+ return Builder->CreateSelect(Builder->CreateICmp(Pred, A, B), A, B);
+}
+
+/// We want to turn code that looks like this:
+/// %C = or %A, %B
+/// %D = select %cond, %C, %A
+/// into:
+/// %C = select %cond, %B, 0
+/// %D = or %A, %C
+///
+/// Assuming that the specified instruction is an operand to the select, return
+/// a bitmask indicating which operands of this instruction are foldable if they
+/// equal the other incoming value of the select.
+///
+static unsigned GetSelectFoldableOperands(Instruction *I) {
+ switch (I->getOpcode()) {
+ case Instruction::Add:
+ case Instruction::Mul:
+ case Instruction::And:
+ case Instruction::Or:
+ case Instruction::Xor:
+ return 3; // Can fold through either operand.
+ case Instruction::Sub: // Can only fold on the amount subtracted.
+ case Instruction::Shl: // Can only fold on the shift amount.
+ case Instruction::LShr:
+ case Instruction::AShr:
+ return 1;
+ default:
+ return 0; // Cannot fold
+ }
+}
+
+/// For the same transformation as the previous function, return the identity
+/// constant that goes into the select.
+static Constant *GetSelectFoldableConstant(Instruction *I) {
+ switch (I->getOpcode()) {
+ default: llvm_unreachable("This cannot happen!");
+ case Instruction::Add:
+ case Instruction::Sub:
+ case Instruction::Or:
+ case Instruction::Xor:
+ case Instruction::Shl:
+ case Instruction::LShr:
+ case Instruction::AShr:
+ return Constant::getNullValue(I->getType());
+ case Instruction::And:
+ return Constant::getAllOnesValue(I->getType());
+ case Instruction::Mul:
+ return ConstantInt::get(I->getType(), 1);
+ }
+}
+
+/// We have (select c, TI, FI), and we know that TI and FI have the same opcode.
+Instruction *InstCombiner::FoldSelectOpOp(SelectInst &SI, Instruction *TI,
+ Instruction *FI) {
+ // If this is a cast from the same type, merge.
+ if (TI->getNumOperands() == 1 && TI->isCast()) {
+ Type *FIOpndTy = FI->getOperand(0)->getType();
+ if (TI->getOperand(0)->getType() != FIOpndTy)
+ return nullptr;
+
+ // The select condition may be a vector. We may only change the operand
+ // type if the vector width remains the same (and matches the condition).
+ Type *CondTy = SI.getCondition()->getType();
+ if (CondTy->isVectorTy()) {
+ if (!FIOpndTy->isVectorTy())
+ return nullptr;
+ if (CondTy->getVectorNumElements() != FIOpndTy->getVectorNumElements())
+ return nullptr;
+
+ // TODO: If the backend knew how to deal with casts better, we could
+ // remove this limitation. For now, there's too much potential to create
+ // worse codegen by promoting the select ahead of size-altering casts
+ // (PR28160).
+ //
+ // Note that ValueTracking's matchSelectPattern() looks through casts
+ // without checking 'hasOneUse' when it matches min/max patterns, so this
+ // transform may end up happening anyway.
+ if (TI->getOpcode() != Instruction::BitCast &&
+ (!TI->hasOneUse() || !FI->hasOneUse()))
+ return nullptr;
+
+ } else if (!TI->hasOneUse() || !FI->hasOneUse()) {
+ // TODO: The one-use restrictions for a scalar select could be eased if
+ // the fold of a select in visitLoadInst() was enhanced to match a pattern
+ // that includes a cast.
+ return nullptr;
+ }
+
+ // Fold this by inserting a select from the input values.
+ Value *NewSI = Builder->CreateSelect(SI.getCondition(), TI->getOperand(0),
+ FI->getOperand(0), SI.getName()+".v");
+ return CastInst::Create(Instruction::CastOps(TI->getOpcode()), NewSI,
+ TI->getType());
+ }
+
+ // TODO: This function ends awkwardly in unreachable - fix to be more normal.
+
+ // Only handle binary operators with one-use here. As with the cast case
+ // above, it may be possible to relax the one-use constraint, but that needs
+ // be examined carefully since it may not reduce the total number of
+ // instructions.
+ if (!isa<BinaryOperator>(TI) || !TI->hasOneUse() || !FI->hasOneUse())
+ return nullptr;
+
+ // Figure out if the operations have any operands in common.
+ Value *MatchOp, *OtherOpT, *OtherOpF;
+ bool MatchIsOpZero;
+ if (TI->getOperand(0) == FI->getOperand(0)) {
+ MatchOp = TI->getOperand(0);
+ OtherOpT = TI->getOperand(1);
+ OtherOpF = FI->getOperand(1);
+ MatchIsOpZero = true;
+ } else if (TI->getOperand(1) == FI->getOperand(1)) {
+ MatchOp = TI->getOperand(1);
+ OtherOpT = TI->getOperand(0);
+ OtherOpF = FI->getOperand(0);
+ MatchIsOpZero = false;
+ } else if (!TI->isCommutative()) {
+ return nullptr;
+ } else if (TI->getOperand(0) == FI->getOperand(1)) {
+ MatchOp = TI->getOperand(0);
+ OtherOpT = TI->getOperand(1);
+ OtherOpF = FI->getOperand(0);
+ MatchIsOpZero = true;
+ } else if (TI->getOperand(1) == FI->getOperand(0)) {
+ MatchOp = TI->getOperand(1);
+ OtherOpT = TI->getOperand(0);
+ OtherOpF = FI->getOperand(1);
+ MatchIsOpZero = true;
+ } else {
+ return nullptr;
+ }
+
+ // If we reach here, they do have operations in common.
+ Value *NewSI = Builder->CreateSelect(SI.getCondition(), OtherOpT,
+ OtherOpF, SI.getName()+".v");
+
+ if (BinaryOperator *BO = dyn_cast<BinaryOperator>(TI)) {
+ if (MatchIsOpZero)
+ return BinaryOperator::Create(BO->getOpcode(), MatchOp, NewSI);
+ else
+ return BinaryOperator::Create(BO->getOpcode(), NewSI, MatchOp);
+ }
+ llvm_unreachable("Shouldn't get here");
+}
+
+static bool isSelect01(Constant *C1, Constant *C2) {
+ ConstantInt *C1I = dyn_cast<ConstantInt>(C1);
+ if (!C1I)
+ return false;
+ ConstantInt *C2I = dyn_cast<ConstantInt>(C2);
+ if (!C2I)
+ return false;
+ if (!C1I->isZero() && !C2I->isZero()) // One side must be zero.
+ return false;
+ return C1I->isOne() || C1I->isAllOnesValue() ||
+ C2I->isOne() || C2I->isAllOnesValue();
+}
+
+/// Try to fold the select into one of the operands to allow further
+/// optimization.
+Instruction *InstCombiner::FoldSelectIntoOp(SelectInst &SI, Value *TrueVal,
+ Value *FalseVal) {
+ // See the comment above GetSelectFoldableOperands for a description of the
+ // transformation we are doing here.
+ if (Instruction *TVI = dyn_cast<Instruction>(TrueVal)) {
+ if (TVI->hasOneUse() && TVI->getNumOperands() == 2 &&
+ !isa<Constant>(FalseVal)) {
+ if (unsigned SFO = GetSelectFoldableOperands(TVI)) {
+ unsigned OpToFold = 0;
+ if ((SFO & 1) && FalseVal == TVI->getOperand(0)) {
+ OpToFold = 1;
+ } else if ((SFO & 2) && FalseVal == TVI->getOperand(1)) {
+ OpToFold = 2;
+ }
+
+ if (OpToFold) {
+ Constant *C = GetSelectFoldableConstant(TVI);
+ Value *OOp = TVI->getOperand(2-OpToFold);
+ // Avoid creating select between 2 constants unless it's selecting
+ // between 0, 1 and -1.
+ if (!isa<Constant>(OOp) || isSelect01(C, cast<Constant>(OOp))) {
+ Value *NewSel = Builder->CreateSelect(SI.getCondition(), OOp, C);
+ NewSel->takeName(TVI);
+ BinaryOperator *TVI_BO = cast<BinaryOperator>(TVI);
+ BinaryOperator *BO = BinaryOperator::Create(TVI_BO->getOpcode(),
+ FalseVal, NewSel);
+ BO->copyIRFlags(TVI_BO);
+ return BO;
+ }
+ }
+ }
+ }
+ }
+
+ if (Instruction *FVI = dyn_cast<Instruction>(FalseVal)) {
+ if (FVI->hasOneUse() && FVI->getNumOperands() == 2 &&
+ !isa<Constant>(TrueVal)) {
+ if (unsigned SFO = GetSelectFoldableOperands(FVI)) {
+ unsigned OpToFold = 0;
+ if ((SFO & 1) && TrueVal == FVI->getOperand(0)) {
+ OpToFold = 1;
+ } else if ((SFO & 2) && TrueVal == FVI->getOperand(1)) {
+ OpToFold = 2;
+ }
+
+ if (OpToFold) {
+ Constant *C = GetSelectFoldableConstant(FVI);
+ Value *OOp = FVI->getOperand(2-OpToFold);
+ // Avoid creating select between 2 constants unless it's selecting
+ // between 0, 1 and -1.
+ if (!isa<Constant>(OOp) || isSelect01(C, cast<Constant>(OOp))) {
+ Value *NewSel = Builder->CreateSelect(SI.getCondition(), C, OOp);
+ NewSel->takeName(FVI);
+ BinaryOperator *FVI_BO = cast<BinaryOperator>(FVI);
+ BinaryOperator *BO = BinaryOperator::Create(FVI_BO->getOpcode(),
+ TrueVal, NewSel);
+ BO->copyIRFlags(FVI_BO);
+ return BO;
+ }
+ }
+ }
+ }
+ }
+
+ return nullptr;
+}
+
+/// We want to turn:
+/// (select (icmp eq (and X, C1), 0), Y, (or Y, C2))
+/// into:
+/// (or (shl (and X, C1), C3), y)
+/// iff:
+/// C1 and C2 are both powers of 2
+/// where:
+/// C3 = Log(C2) - Log(C1)
+///
+/// This transform handles cases where:
+/// 1. The icmp predicate is inverted
+/// 2. The select operands are reversed
+/// 3. The magnitude of C2 and C1 are flipped
+static Value *foldSelectICmpAndOr(const SelectInst &SI, Value *TrueVal,
+ Value *FalseVal,
+ InstCombiner::BuilderTy *Builder) {
+ const ICmpInst *IC = dyn_cast<ICmpInst>(SI.getCondition());
+ if (!IC || !IC->isEquality() || !SI.getType()->isIntegerTy())
+ return nullptr;
+
+ Value *CmpLHS = IC->getOperand(0);
+ Value *CmpRHS = IC->getOperand(1);
+
+ if (!match(CmpRHS, m_Zero()))
+ return nullptr;
+
+ Value *X;
+ const APInt *C1;
+ if (!match(CmpLHS, m_And(m_Value(X), m_Power2(C1))))
+ return nullptr;
+
+ const APInt *C2;
+ bool OrOnTrueVal = false;
+ bool OrOnFalseVal = match(FalseVal, m_Or(m_Specific(TrueVal), m_Power2(C2)));
+ if (!OrOnFalseVal)
+ OrOnTrueVal = match(TrueVal, m_Or(m_Specific(FalseVal), m_Power2(C2)));
+
+ if (!OrOnFalseVal && !OrOnTrueVal)
+ return nullptr;
+
+ Value *V = CmpLHS;
+ Value *Y = OrOnFalseVal ? TrueVal : FalseVal;
+
+ unsigned C1Log = C1->logBase2();
+ unsigned C2Log = C2->logBase2();
+ if (C2Log > C1Log) {
+ V = Builder->CreateZExtOrTrunc(V, Y->getType());
+ V = Builder->CreateShl(V, C2Log - C1Log);
+ } else if (C1Log > C2Log) {
+ V = Builder->CreateLShr(V, C1Log - C2Log);
+ V = Builder->CreateZExtOrTrunc(V, Y->getType());
+ } else
+ V = Builder->CreateZExtOrTrunc(V, Y->getType());
+
+ ICmpInst::Predicate Pred = IC->getPredicate();
+ if ((Pred == ICmpInst::ICMP_NE && OrOnFalseVal) ||
+ (Pred == ICmpInst::ICMP_EQ && OrOnTrueVal))
+ V = Builder->CreateXor(V, *C2);
+
+ return Builder->CreateOr(V, Y);
+}
+
+/// Attempt to fold a cttz/ctlz followed by a icmp plus select into a single
+/// call to cttz/ctlz with flag 'is_zero_undef' cleared.
+///
+/// For example, we can fold the following code sequence:
+/// \code
+/// %0 = tail call i32 @llvm.cttz.i32(i32 %x, i1 true)
+/// %1 = icmp ne i32 %x, 0
+/// %2 = select i1 %1, i32 %0, i32 32
+/// \code
+///
+/// into:
+/// %0 = tail call i32 @llvm.cttz.i32(i32 %x, i1 false)
+static Value *foldSelectCttzCtlz(ICmpInst *ICI, Value *TrueVal, Value *FalseVal,
+ InstCombiner::BuilderTy *Builder) {
+ ICmpInst::Predicate Pred = ICI->getPredicate();
+ Value *CmpLHS = ICI->getOperand(0);
+ Value *CmpRHS = ICI->getOperand(1);
+
+ // Check if the condition value compares a value for equality against zero.
+ if (!ICI->isEquality() || !match(CmpRHS, m_Zero()))
+ return nullptr;
+
+ Value *Count = FalseVal;
+ Value *ValueOnZero = TrueVal;
+ if (Pred == ICmpInst::ICMP_NE)
+ std::swap(Count, ValueOnZero);
+
+ // Skip zero extend/truncate.
+ Value *V = nullptr;
+ if (match(Count, m_ZExt(m_Value(V))) ||
+ match(Count, m_Trunc(m_Value(V))))
+ Count = V;
+
+ // Check if the value propagated on zero is a constant number equal to the
+ // sizeof in bits of 'Count'.
+ unsigned SizeOfInBits = Count->getType()->getScalarSizeInBits();
+ if (!match(ValueOnZero, m_SpecificInt(SizeOfInBits)))
+ return nullptr;
+
+ // Check that 'Count' is a call to intrinsic cttz/ctlz. Also check that the
+ // input to the cttz/ctlz is used as LHS for the compare instruction.
+ if (match(Count, m_Intrinsic<Intrinsic::cttz>(m_Specific(CmpLHS))) ||
+ match(Count, m_Intrinsic<Intrinsic::ctlz>(m_Specific(CmpLHS)))) {
+ IntrinsicInst *II = cast<IntrinsicInst>(Count);
+ IRBuilder<> Builder(II);
+ // Explicitly clear the 'undef_on_zero' flag.
+ IntrinsicInst *NewI = cast<IntrinsicInst>(II->clone());
+ Type *Ty = NewI->getArgOperand(1)->getType();
+ NewI->setArgOperand(1, Constant::getNullValue(Ty));
+ Builder.Insert(NewI);
+ return Builder.CreateZExtOrTrunc(NewI, ValueOnZero->getType());
+ }
+
+ return nullptr;
+}
+
+/// Visit a SelectInst that has an ICmpInst as its first operand.
+Instruction *InstCombiner::visitSelectInstWithICmp(SelectInst &SI,
+ ICmpInst *ICI) {
+ bool Changed = false;
+ ICmpInst::Predicate Pred = ICI->getPredicate();
+ Value *CmpLHS = ICI->getOperand(0);
+ Value *CmpRHS = ICI->getOperand(1);
+ Value *TrueVal = SI.getTrueValue();
+ Value *FalseVal = SI.getFalseValue();
+
+ // Check cases where the comparison is with a constant that
+ // can be adjusted to fit the min/max idiom. We may move or edit ICI
+ // here, so make sure the select is the only user.
+ if (ICI->hasOneUse())
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(CmpRHS)) {
+ switch (Pred) {
+ default: break;
+ case ICmpInst::ICMP_ULT:
+ case ICmpInst::ICMP_SLT:
+ case ICmpInst::ICMP_UGT:
+ case ICmpInst::ICMP_SGT: {
+ // These transformations only work for selects over integers.
+ IntegerType *SelectTy = dyn_cast<IntegerType>(SI.getType());
+ if (!SelectTy)
+ break;
+
+ Constant *AdjustedRHS;
+ if (Pred == ICmpInst::ICMP_UGT || Pred == ICmpInst::ICMP_SGT)
+ AdjustedRHS = ConstantInt::get(CI->getContext(), CI->getValue() + 1);
+ else // (Pred == ICmpInst::ICMP_ULT || Pred == ICmpInst::ICMP_SLT)
+ AdjustedRHS = ConstantInt::get(CI->getContext(), CI->getValue() - 1);
+
+ // X > C ? X : C+1 --> X < C+1 ? C+1 : X
+ // X < C ? X : C-1 --> X > C-1 ? C-1 : X
+ if ((CmpLHS == TrueVal && AdjustedRHS == FalseVal) ||
+ (CmpLHS == FalseVal && AdjustedRHS == TrueVal))
+ ; // Nothing to do here. Values match without any sign/zero extension.
+
+ // Types do not match. Instead of calculating this with mixed types
+ // promote all to the larger type. This enables scalar evolution to
+ // analyze this expression.
+ else if (CmpRHS->getType()->getScalarSizeInBits()
+ < SelectTy->getBitWidth()) {
+ Constant *sextRHS = ConstantExpr::getSExt(AdjustedRHS, SelectTy);
+
+ // X = sext x; x >s c ? X : C+1 --> X = sext x; X <s C+1 ? C+1 : X
+ // X = sext x; x <s c ? X : C-1 --> X = sext x; X >s C-1 ? C-1 : X
+ // X = sext x; x >u c ? X : C+1 --> X = sext x; X <u C+1 ? C+1 : X
+ // X = sext x; x <u c ? X : C-1 --> X = sext x; X >u C-1 ? C-1 : X
+ if (match(TrueVal, m_SExt(m_Specific(CmpLHS))) &&
+ sextRHS == FalseVal) {
+ CmpLHS = TrueVal;
+ AdjustedRHS = sextRHS;
+ } else if (match(FalseVal, m_SExt(m_Specific(CmpLHS))) &&
+ sextRHS == TrueVal) {
+ CmpLHS = FalseVal;
+ AdjustedRHS = sextRHS;
+ } else if (ICI->isUnsigned()) {
+ Constant *zextRHS = ConstantExpr::getZExt(AdjustedRHS, SelectTy);
+ // X = zext x; x >u c ? X : C+1 --> X = zext x; X <u C+1 ? C+1 : X
+ // X = zext x; x <u c ? X : C-1 --> X = zext x; X >u C-1 ? C-1 : X
+ // zext + signed compare cannot be changed:
+ // 0xff <s 0x00, but 0x00ff >s 0x0000
+ if (match(TrueVal, m_ZExt(m_Specific(CmpLHS))) &&
+ zextRHS == FalseVal) {
+ CmpLHS = TrueVal;
+ AdjustedRHS = zextRHS;
+ } else if (match(FalseVal, m_ZExt(m_Specific(CmpLHS))) &&
+ zextRHS == TrueVal) {
+ CmpLHS = FalseVal;
+ AdjustedRHS = zextRHS;
+ } else
+ break;
+ } else
+ break;
+ } else
+ break;
+
+ Pred = ICmpInst::getSwappedPredicate(Pred);
+ CmpRHS = AdjustedRHS;
+ std::swap(FalseVal, TrueVal);
+ ICI->setPredicate(Pred);
+ ICI->setOperand(0, CmpLHS);
+ ICI->setOperand(1, CmpRHS);
+ SI.setOperand(1, TrueVal);
+ SI.setOperand(2, FalseVal);
+
+ // Move ICI instruction right before the select instruction. Otherwise
+ // the sext/zext value may be defined after the ICI instruction uses it.
+ ICI->moveBefore(&SI);
+
+ Changed = true;
+ break;
+ }
+ }
+ }
+
+ // Transform (X >s -1) ? C1 : C2 --> ((X >>s 31) & (C2 - C1)) + C1
+ // and (X <s 0) ? C2 : C1 --> ((X >>s 31) & (C2 - C1)) + C1
+ // FIXME: Type and constness constraints could be lifted, but we have to
+ // watch code size carefully. We should consider xor instead of
+ // sub/add when we decide to do that.
+ if (IntegerType *Ty = dyn_cast<IntegerType>(CmpLHS->getType())) {
+ if (TrueVal->getType() == Ty) {
+ if (ConstantInt *Cmp = dyn_cast<ConstantInt>(CmpRHS)) {
+ ConstantInt *C1 = nullptr, *C2 = nullptr;
+ if (Pred == ICmpInst::ICMP_SGT && Cmp->isAllOnesValue()) {
+ C1 = dyn_cast<ConstantInt>(TrueVal);
+ C2 = dyn_cast<ConstantInt>(FalseVal);
+ } else if (Pred == ICmpInst::ICMP_SLT && Cmp->isNullValue()) {
+ C1 = dyn_cast<ConstantInt>(FalseVal);
+ C2 = dyn_cast<ConstantInt>(TrueVal);
+ }
+ if (C1 && C2) {
+ // This shift results in either -1 or 0.
+ Value *AShr = Builder->CreateAShr(CmpLHS, Ty->getBitWidth()-1);
+
+ // Check if we can express the operation with a single or.
+ if (C2->isAllOnesValue())
+ return replaceInstUsesWith(SI, Builder->CreateOr(AShr, C1));
+
+ Value *And = Builder->CreateAnd(AShr, C2->getValue()-C1->getValue());
+ return replaceInstUsesWith(SI, Builder->CreateAdd(And, C1));
+ }
+ }
+ }
+ }
+
+ // NOTE: if we wanted to, this is where to detect integer MIN/MAX
+
+ if (CmpRHS != CmpLHS && isa<Constant>(CmpRHS)) {
+ if (CmpLHS == TrueVal && Pred == ICmpInst::ICMP_EQ) {
+ // Transform (X == C) ? X : Y -> (X == C) ? C : Y
+ SI.setOperand(1, CmpRHS);
+ Changed = true;
+ } else if (CmpLHS == FalseVal && Pred == ICmpInst::ICMP_NE) {
+ // Transform (X != C) ? Y : X -> (X != C) ? Y : C
+ SI.setOperand(2, CmpRHS);
+ Changed = true;
+ }
+ }
+
+ // FIXME: This code is nearly duplicated in InstSimplify. Using/refactoring
+ // decomposeBitTestICmp() might help.
+ {
+ unsigned BitWidth =
+ DL.getTypeSizeInBits(TrueVal->getType()->getScalarType());
+ APInt MinSignedValue = APInt::getSignBit(BitWidth);
+ Value *X;
+ const APInt *Y, *C;
+ bool TrueWhenUnset;
+ bool IsBitTest = false;
+ if (ICmpInst::isEquality(Pred) &&
+ match(CmpLHS, m_And(m_Value(X), m_Power2(Y))) &&
+ match(CmpRHS, m_Zero())) {
+ IsBitTest = true;
+ TrueWhenUnset = Pred == ICmpInst::ICMP_EQ;
+ } else if (Pred == ICmpInst::ICMP_SLT && match(CmpRHS, m_Zero())) {
+ X = CmpLHS;
+ Y = &MinSignedValue;
+ IsBitTest = true;
+ TrueWhenUnset = false;
+ } else if (Pred == ICmpInst::ICMP_SGT && match(CmpRHS, m_AllOnes())) {
+ X = CmpLHS;
+ Y = &MinSignedValue;
+ IsBitTest = true;
+ TrueWhenUnset = true;
+ }
+ if (IsBitTest) {
+ Value *V = nullptr;
+ // (X & Y) == 0 ? X : X ^ Y --> X & ~Y
+ if (TrueWhenUnset && TrueVal == X &&
+ match(FalseVal, m_Xor(m_Specific(X), m_APInt(C))) && *Y == *C)
+ V = Builder->CreateAnd(X, ~(*Y));
+ // (X & Y) != 0 ? X ^ Y : X --> X & ~Y
+ else if (!TrueWhenUnset && FalseVal == X &&
+ match(TrueVal, m_Xor(m_Specific(X), m_APInt(C))) && *Y == *C)
+ V = Builder->CreateAnd(X, ~(*Y));
+ // (X & Y) == 0 ? X ^ Y : X --> X | Y
+ else if (TrueWhenUnset && FalseVal == X &&
+ match(TrueVal, m_Xor(m_Specific(X), m_APInt(C))) && *Y == *C)
+ V = Builder->CreateOr(X, *Y);
+ // (X & Y) != 0 ? X : X ^ Y --> X | Y
+ else if (!TrueWhenUnset && TrueVal == X &&
+ match(FalseVal, m_Xor(m_Specific(X), m_APInt(C))) && *Y == *C)
+ V = Builder->CreateOr(X, *Y);
+
+ if (V)
+ return replaceInstUsesWith(SI, V);
+ }
+ }
+
+ if (Value *V = foldSelectICmpAndOr(SI, TrueVal, FalseVal, Builder))
+ return replaceInstUsesWith(SI, V);
+
+ if (Value *V = foldSelectCttzCtlz(ICI, TrueVal, FalseVal, Builder))
+ return replaceInstUsesWith(SI, V);
+
+ return Changed ? &SI : nullptr;
+}
+
+
+/// SI is a select whose condition is a PHI node (but the two may be in
+/// different blocks). See if the true/false values (V) are live in all of the
+/// predecessor blocks of the PHI. For example, cases like this can't be mapped:
+///
+/// X = phi [ C1, BB1], [C2, BB2]
+/// Y = add
+/// Z = select X, Y, 0
+///
+/// because Y is not live in BB1/BB2.
+///
+static bool CanSelectOperandBeMappingIntoPredBlock(const Value *V,
+ const SelectInst &SI) {
+ // If the value is a non-instruction value like a constant or argument, it
+ // can always be mapped.
+ const Instruction *I = dyn_cast<Instruction>(V);
+ if (!I) return true;
+
+ // If V is a PHI node defined in the same block as the condition PHI, we can
+ // map the arguments.
+ const PHINode *CondPHI = cast<PHINode>(SI.getCondition());
+
+ if (const PHINode *VP = dyn_cast<PHINode>(I))
+ if (VP->getParent() == CondPHI->getParent())
+ return true;
+
+ // Otherwise, if the PHI and select are defined in the same block and if V is
+ // defined in a different block, then we can transform it.
+ if (SI.getParent() == CondPHI->getParent() &&
+ I->getParent() != CondPHI->getParent())
+ return true;
+
+ // Otherwise we have a 'hard' case and we can't tell without doing more
+ // detailed dominator based analysis, punt.
+ return false;
+}
+
+/// We have an SPF (e.g. a min or max) of an SPF of the form:
+/// SPF2(SPF1(A, B), C)
+Instruction *InstCombiner::FoldSPFofSPF(Instruction *Inner,
+ SelectPatternFlavor SPF1,
+ Value *A, Value *B,
+ Instruction &Outer,
+ SelectPatternFlavor SPF2, Value *C) {
+ if (Outer.getType() != Inner->getType())
+ return nullptr;
+
+ if (C == A || C == B) {
+ // MAX(MAX(A, B), B) -> MAX(A, B)
+ // MIN(MIN(a, b), a) -> MIN(a, b)
+ if (SPF1 == SPF2)
+ return replaceInstUsesWith(Outer, Inner);
+
+ // MAX(MIN(a, b), a) -> a
+ // MIN(MAX(a, b), a) -> a
+ if ((SPF1 == SPF_SMIN && SPF2 == SPF_SMAX) ||
+ (SPF1 == SPF_SMAX && SPF2 == SPF_SMIN) ||
+ (SPF1 == SPF_UMIN && SPF2 == SPF_UMAX) ||
+ (SPF1 == SPF_UMAX && SPF2 == SPF_UMIN))
+ return replaceInstUsesWith(Outer, C);
+ }
+
+ if (SPF1 == SPF2) {
+ if (ConstantInt *CB = dyn_cast<ConstantInt>(B)) {
+ if (ConstantInt *CC = dyn_cast<ConstantInt>(C)) {
+ const APInt &ACB = CB->getValue();
+ const APInt &ACC = CC->getValue();
+
+ // MIN(MIN(A, 23), 97) -> MIN(A, 23)
+ // MAX(MAX(A, 97), 23) -> MAX(A, 97)
+ if ((SPF1 == SPF_UMIN && ACB.ule(ACC)) ||
+ (SPF1 == SPF_SMIN && ACB.sle(ACC)) ||
+ (SPF1 == SPF_UMAX && ACB.uge(ACC)) ||
+ (SPF1 == SPF_SMAX && ACB.sge(ACC)))
+ return replaceInstUsesWith(Outer, Inner);
+
+ // MIN(MIN(A, 97), 23) -> MIN(A, 23)
+ // MAX(MAX(A, 23), 97) -> MAX(A, 97)
+ if ((SPF1 == SPF_UMIN && ACB.ugt(ACC)) ||
+ (SPF1 == SPF_SMIN && ACB.sgt(ACC)) ||
+ (SPF1 == SPF_UMAX && ACB.ult(ACC)) ||
+ (SPF1 == SPF_SMAX && ACB.slt(ACC))) {
+ Outer.replaceUsesOfWith(Inner, A);
+ return &Outer;
+ }
+ }
+ }
+ }
+
+ // ABS(ABS(X)) -> ABS(X)
+ // NABS(NABS(X)) -> NABS(X)
+ if (SPF1 == SPF2 && (SPF1 == SPF_ABS || SPF1 == SPF_NABS)) {
+ return replaceInstUsesWith(Outer, Inner);
+ }
+
+ // ABS(NABS(X)) -> ABS(X)
+ // NABS(ABS(X)) -> NABS(X)
+ if ((SPF1 == SPF_ABS && SPF2 == SPF_NABS) ||
+ (SPF1 == SPF_NABS && SPF2 == SPF_ABS)) {
+ SelectInst *SI = cast<SelectInst>(Inner);
+ Value *NewSI = Builder->CreateSelect(
+ SI->getCondition(), SI->getFalseValue(), SI->getTrueValue());
+ return replaceInstUsesWith(Outer, NewSI);
+ }
+
+ auto IsFreeOrProfitableToInvert =
+ [&](Value *V, Value *&NotV, bool &ElidesXor) {
+ if (match(V, m_Not(m_Value(NotV)))) {
+ // If V has at most 2 uses then we can get rid of the xor operation
+ // entirely.
+ ElidesXor |= !V->hasNUsesOrMore(3);
+ return true;
+ }
+
+ if (IsFreeToInvert(V, !V->hasNUsesOrMore(3))) {
+ NotV = nullptr;
+ return true;
+ }
+
+ return false;
+ };
+
+ Value *NotA, *NotB, *NotC;
+ bool ElidesXor = false;
+
+ // MIN(MIN(~A, ~B), ~C) == ~MAX(MAX(A, B), C)
+ // MIN(MAX(~A, ~B), ~C) == ~MAX(MIN(A, B), C)
+ // MAX(MIN(~A, ~B), ~C) == ~MIN(MAX(A, B), C)
+ // MAX(MAX(~A, ~B), ~C) == ~MIN(MIN(A, B), C)
+ //
+ // This transform is performance neutral if we can elide at least one xor from
+ // the set of three operands, since we'll be tacking on an xor at the very
+ // end.
+ if (IsFreeOrProfitableToInvert(A, NotA, ElidesXor) &&
+ IsFreeOrProfitableToInvert(B, NotB, ElidesXor) &&
+ IsFreeOrProfitableToInvert(C, NotC, ElidesXor) && ElidesXor) {
+ if (!NotA)
+ NotA = Builder->CreateNot(A);
+ if (!NotB)
+ NotB = Builder->CreateNot(B);
+ if (!NotC)
+ NotC = Builder->CreateNot(C);
+
+ Value *NewInner = generateMinMaxSelectPattern(
+ Builder, getInverseMinMaxSelectPattern(SPF1), NotA, NotB);
+ Value *NewOuter = Builder->CreateNot(generateMinMaxSelectPattern(
+ Builder, getInverseMinMaxSelectPattern(SPF2), NewInner, NotC));
+ return replaceInstUsesWith(Outer, NewOuter);
+ }
+
+ return nullptr;
+}
+
+/// If one of the constants is zero (we know they can't both be) and we have an
+/// icmp instruction with zero, and we have an 'and' with the non-constant value
+/// and a power of two we can turn the select into a shift on the result of the
+/// 'and'.
+static Value *foldSelectICmpAnd(const SelectInst &SI, ConstantInt *TrueVal,
+ ConstantInt *FalseVal,
+ InstCombiner::BuilderTy *Builder) {
+ const ICmpInst *IC = dyn_cast<ICmpInst>(SI.getCondition());
+ if (!IC || !IC->isEquality() || !SI.getType()->isIntegerTy())
+ return nullptr;
+
+ if (!match(IC->getOperand(1), m_Zero()))
+ return nullptr;
+
+ ConstantInt *AndRHS;
+ Value *LHS = IC->getOperand(0);
+ if (!match(LHS, m_And(m_Value(), m_ConstantInt(AndRHS))))
+ return nullptr;
+
+ // If both select arms are non-zero see if we have a select of the form
+ // 'x ? 2^n + C : C'. Then we can offset both arms by C, use the logic
+ // for 'x ? 2^n : 0' and fix the thing up at the end.
+ ConstantInt *Offset = nullptr;
+ if (!TrueVal->isZero() && !FalseVal->isZero()) {
+ if ((TrueVal->getValue() - FalseVal->getValue()).isPowerOf2())
+ Offset = FalseVal;
+ else if ((FalseVal->getValue() - TrueVal->getValue()).isPowerOf2())
+ Offset = TrueVal;
+ else
+ return nullptr;
+
+ // Adjust TrueVal and FalseVal to the offset.
+ TrueVal = ConstantInt::get(Builder->getContext(),
+ TrueVal->getValue() - Offset->getValue());
+ FalseVal = ConstantInt::get(Builder->getContext(),
+ FalseVal->getValue() - Offset->getValue());
+ }
+
+ // Make sure the mask in the 'and' and one of the select arms is a power of 2.
+ if (!AndRHS->getValue().isPowerOf2() ||
+ (!TrueVal->getValue().isPowerOf2() &&
+ !FalseVal->getValue().isPowerOf2()))
+ return nullptr;
+
+ // Determine which shift is needed to transform result of the 'and' into the
+ // desired result.
+ ConstantInt *ValC = !TrueVal->isZero() ? TrueVal : FalseVal;
+ unsigned ValZeros = ValC->getValue().logBase2();
+ unsigned AndZeros = AndRHS->getValue().logBase2();
+
+ // If types don't match we can still convert the select by introducing a zext
+ // or a trunc of the 'and'. The trunc case requires that all of the truncated
+ // bits are zero, we can figure that out by looking at the 'and' mask.
+ if (AndZeros >= ValC->getBitWidth())
+ return nullptr;
+
+ Value *V = Builder->CreateZExtOrTrunc(LHS, SI.getType());
+ if (ValZeros > AndZeros)
+ V = Builder->CreateShl(V, ValZeros - AndZeros);
+ else if (ValZeros < AndZeros)
+ V = Builder->CreateLShr(V, AndZeros - ValZeros);
+
+ // Okay, now we know that everything is set up, we just don't know whether we
+ // have a icmp_ne or icmp_eq and whether the true or false val is the zero.
+ bool ShouldNotVal = !TrueVal->isZero();
+ ShouldNotVal ^= IC->getPredicate() == ICmpInst::ICMP_NE;
+ if (ShouldNotVal)
+ V = Builder->CreateXor(V, ValC);
+
+ // Apply an offset if needed.
+ if (Offset)
+ V = Builder->CreateAdd(V, Offset);
+ return V;
+}
+
+/// Turn select C, (X + Y), (X - Y) --> (X + (select C, Y, (-Y))).
+/// This is even legal for FP.
+static Instruction *foldAddSubSelect(SelectInst &SI,
+ InstCombiner::BuilderTy &Builder) {
+ Value *CondVal = SI.getCondition();
+ Value *TrueVal = SI.getTrueValue();
+ Value *FalseVal = SI.getFalseValue();
+ auto *TI = dyn_cast<Instruction>(TrueVal);
+ auto *FI = dyn_cast<Instruction>(FalseVal);
+ if (!TI || !FI || !TI->hasOneUse() || !FI->hasOneUse())
+ return nullptr;
+
+ Instruction *AddOp = nullptr, *SubOp = nullptr;
+ if ((TI->getOpcode() == Instruction::Sub &&
+ FI->getOpcode() == Instruction::Add) ||
+ (TI->getOpcode() == Instruction::FSub &&
+ FI->getOpcode() == Instruction::FAdd)) {
+ AddOp = FI;
+ SubOp = TI;
+ } else if ((FI->getOpcode() == Instruction::Sub &&
+ TI->getOpcode() == Instruction::Add) ||
+ (FI->getOpcode() == Instruction::FSub &&
+ TI->getOpcode() == Instruction::FAdd)) {
+ AddOp = TI;
+ SubOp = FI;
+ }
+
+ if (AddOp) {
+ Value *OtherAddOp = nullptr;
+ if (SubOp->getOperand(0) == AddOp->getOperand(0)) {
+ OtherAddOp = AddOp->getOperand(1);
+ } else if (SubOp->getOperand(0) == AddOp->getOperand(1)) {
+ OtherAddOp = AddOp->getOperand(0);
+ }
+
+ if (OtherAddOp) {
+ // So at this point we know we have (Y -> OtherAddOp):
+ // select C, (add X, Y), (sub X, Z)
+ Value *NegVal; // Compute -Z
+ if (SI.getType()->isFPOrFPVectorTy()) {
+ NegVal = Builder.CreateFNeg(SubOp->getOperand(1));
+ if (Instruction *NegInst = dyn_cast<Instruction>(NegVal)) {
+ FastMathFlags Flags = AddOp->getFastMathFlags();
+ Flags &= SubOp->getFastMathFlags();
+ NegInst->setFastMathFlags(Flags);
+ }
+ } else {
+ NegVal = Builder.CreateNeg(SubOp->getOperand(1));
+ }
+
+ Value *NewTrueOp = OtherAddOp;
+ Value *NewFalseOp = NegVal;
+ if (AddOp != TI)
+ std::swap(NewTrueOp, NewFalseOp);
+ Value *NewSel = Builder.CreateSelect(CondVal, NewTrueOp, NewFalseOp,
+ SI.getName() + ".p");
+
+ if (SI.getType()->isFPOrFPVectorTy()) {
+ Instruction *RI =
+ BinaryOperator::CreateFAdd(SubOp->getOperand(0), NewSel);
+
+ FastMathFlags Flags = AddOp->getFastMathFlags();
+ Flags &= SubOp->getFastMathFlags();
+ RI->setFastMathFlags(Flags);
+ return RI;
+ } else
+ return BinaryOperator::CreateAdd(SubOp->getOperand(0), NewSel);
+ }
+ }
+ return nullptr;
+}
+
+Instruction *InstCombiner::visitSelectInst(SelectInst &SI) {
+ Value *CondVal = SI.getCondition();
+ Value *TrueVal = SI.getTrueValue();
+ Value *FalseVal = SI.getFalseValue();
+ Type *SelType = SI.getType();
+
+ if (Value *V =
+ SimplifySelectInst(CondVal, TrueVal, FalseVal, DL, TLI, DT, AC))
+ return replaceInstUsesWith(SI, V);
+
+ if (SelType->getScalarType()->isIntegerTy(1) &&
+ TrueVal->getType() == CondVal->getType()) {
+ if (match(TrueVal, m_One())) {
+ // Change: A = select B, true, C --> A = or B, C
+ return BinaryOperator::CreateOr(CondVal, FalseVal);
+ }
+ if (match(TrueVal, m_Zero())) {
+ // Change: A = select B, false, C --> A = and !B, C
+ Value *NotCond = Builder->CreateNot(CondVal, "not." + CondVal->getName());
+ return BinaryOperator::CreateAnd(NotCond, FalseVal);
+ }
+ if (match(FalseVal, m_Zero())) {
+ // Change: A = select B, C, false --> A = and B, C
+ return BinaryOperator::CreateAnd(CondVal, TrueVal);
+ }
+ if (match(FalseVal, m_One())) {
+ // Change: A = select B, C, true --> A = or !B, C
+ Value *NotCond = Builder->CreateNot(CondVal, "not." + CondVal->getName());
+ return BinaryOperator::CreateOr(NotCond, TrueVal);
+ }
+
+ // select a, a, b -> a | b
+ // select a, b, a -> a & b
+ if (CondVal == TrueVal)
+ return BinaryOperator::CreateOr(CondVal, FalseVal);
+ if (CondVal == FalseVal)
+ return BinaryOperator::CreateAnd(CondVal, TrueVal);
+
+ // select a, ~a, b -> (~a) & b
+ // select a, b, ~a -> (~a) | b
+ if (match(TrueVal, m_Not(m_Specific(CondVal))))
+ return BinaryOperator::CreateAnd(TrueVal, FalseVal);
+ if (match(FalseVal, m_Not(m_Specific(CondVal))))
+ return BinaryOperator::CreateOr(TrueVal, FalseVal);
+ }
+
+ // Selecting between two integer or vector splat integer constants?
+ //
+ // Note that we don't handle a scalar select of vectors:
+ // select i1 %c, <2 x i8> <1, 1>, <2 x i8> <0, 0>
+ // because that may need 3 instructions to splat the condition value:
+ // extend, insertelement, shufflevector.
+ if (CondVal->getType()->isVectorTy() == SelType->isVectorTy()) {
+ // select C, 1, 0 -> zext C to int
+ if (match(TrueVal, m_One()) && match(FalseVal, m_Zero()))
+ return new ZExtInst(CondVal, SelType);
+
+ // select C, -1, 0 -> sext C to int
+ if (match(TrueVal, m_AllOnes()) && match(FalseVal, m_Zero()))
+ return new SExtInst(CondVal, SelType);
+
+ // select C, 0, 1 -> zext !C to int
+ if (match(TrueVal, m_Zero()) && match(FalseVal, m_One())) {
+ Value *NotCond = Builder->CreateNot(CondVal, "not." + CondVal->getName());
+ return new ZExtInst(NotCond, SelType);
+ }
+
+ // select C, 0, -1 -> sext !C to int
+ if (match(TrueVal, m_Zero()) && match(FalseVal, m_AllOnes())) {
+ Value *NotCond = Builder->CreateNot(CondVal, "not." + CondVal->getName());
+ return new SExtInst(NotCond, SelType);
+ }
+ }
+
+ if (ConstantInt *TrueValC = dyn_cast<ConstantInt>(TrueVal))
+ if (ConstantInt *FalseValC = dyn_cast<ConstantInt>(FalseVal))
+ if (Value *V = foldSelectICmpAnd(SI, TrueValC, FalseValC, Builder))
+ return replaceInstUsesWith(SI, V);
+
+ // See if we are selecting two values based on a comparison of the two values.
+ if (FCmpInst *FCI = dyn_cast<FCmpInst>(CondVal)) {
+ if (FCI->getOperand(0) == TrueVal && FCI->getOperand(1) == FalseVal) {
+ // Transform (X == Y) ? X : Y -> Y
+ if (FCI->getPredicate() == FCmpInst::FCMP_OEQ) {
+ // This is not safe in general for floating point:
+ // consider X== -0, Y== +0.
+ // It becomes safe if either operand is a nonzero constant.
+ ConstantFP *CFPt, *CFPf;
+ if (((CFPt = dyn_cast<ConstantFP>(TrueVal)) &&
+ !CFPt->getValueAPF().isZero()) ||
+ ((CFPf = dyn_cast<ConstantFP>(FalseVal)) &&
+ !CFPf->getValueAPF().isZero()))
+ return replaceInstUsesWith(SI, FalseVal);
+ }
+ // Transform (X une Y) ? X : Y -> X
+ if (FCI->getPredicate() == FCmpInst::FCMP_UNE) {
+ // This is not safe in general for floating point:
+ // consider X== -0, Y== +0.
+ // It becomes safe if either operand is a nonzero constant.
+ ConstantFP *CFPt, *CFPf;
+ if (((CFPt = dyn_cast<ConstantFP>(TrueVal)) &&
+ !CFPt->getValueAPF().isZero()) ||
+ ((CFPf = dyn_cast<ConstantFP>(FalseVal)) &&
+ !CFPf->getValueAPF().isZero()))
+ return replaceInstUsesWith(SI, TrueVal);
+ }
+
+ // Canonicalize to use ordered comparisons by swapping the select
+ // operands.
+ //
+ // e.g.
+ // (X ugt Y) ? X : Y -> (X ole Y) ? Y : X
+ if (FCI->hasOneUse() && FCmpInst::isUnordered(FCI->getPredicate())) {
+ FCmpInst::Predicate InvPred = FCI->getInversePredicate();
+ IRBuilder<>::FastMathFlagGuard FMFG(*Builder);
+ Builder->setFastMathFlags(FCI->getFastMathFlags());
+ Value *NewCond = Builder->CreateFCmp(InvPred, TrueVal, FalseVal,
+ FCI->getName() + ".inv");
+
+ return SelectInst::Create(NewCond, FalseVal, TrueVal,
+ SI.getName() + ".p");
+ }
+
+ // NOTE: if we wanted to, this is where to detect MIN/MAX
+ } else if (FCI->getOperand(0) == FalseVal && FCI->getOperand(1) == TrueVal){
+ // Transform (X == Y) ? Y : X -> X
+ if (FCI->getPredicate() == FCmpInst::FCMP_OEQ) {
+ // This is not safe in general for floating point:
+ // consider X== -0, Y== +0.
+ // It becomes safe if either operand is a nonzero constant.
+ ConstantFP *CFPt, *CFPf;
+ if (((CFPt = dyn_cast<ConstantFP>(TrueVal)) &&
+ !CFPt->getValueAPF().isZero()) ||
+ ((CFPf = dyn_cast<ConstantFP>(FalseVal)) &&
+ !CFPf->getValueAPF().isZero()))
+ return replaceInstUsesWith(SI, FalseVal);
+ }
+ // Transform (X une Y) ? Y : X -> Y
+ if (FCI->getPredicate() == FCmpInst::FCMP_UNE) {
+ // This is not safe in general for floating point:
+ // consider X== -0, Y== +0.
+ // It becomes safe if either operand is a nonzero constant.
+ ConstantFP *CFPt, *CFPf;
+ if (((CFPt = dyn_cast<ConstantFP>(TrueVal)) &&
+ !CFPt->getValueAPF().isZero()) ||
+ ((CFPf = dyn_cast<ConstantFP>(FalseVal)) &&
+ !CFPf->getValueAPF().isZero()))
+ return replaceInstUsesWith(SI, TrueVal);
+ }
+
+ // Canonicalize to use ordered comparisons by swapping the select
+ // operands.
+ //
+ // e.g.
+ // (X ugt Y) ? X : Y -> (X ole Y) ? X : Y
+ if (FCI->hasOneUse() && FCmpInst::isUnordered(FCI->getPredicate())) {
+ FCmpInst::Predicate InvPred = FCI->getInversePredicate();
+ IRBuilder<>::FastMathFlagGuard FMFG(*Builder);
+ Builder->setFastMathFlags(FCI->getFastMathFlags());
+ Value *NewCond = Builder->CreateFCmp(InvPred, FalseVal, TrueVal,
+ FCI->getName() + ".inv");
+
+ return SelectInst::Create(NewCond, FalseVal, TrueVal,
+ SI.getName() + ".p");
+ }
+
+ // NOTE: if we wanted to, this is where to detect MIN/MAX
+ }
+ // NOTE: if we wanted to, this is where to detect ABS
+ }
+
+ // See if we are selecting two values based on a comparison of the two values.
+ if (ICmpInst *ICI = dyn_cast<ICmpInst>(CondVal))
+ if (Instruction *Result = visitSelectInstWithICmp(SI, ICI))
+ return Result;
+
+ if (Instruction *Add = foldAddSubSelect(SI, *Builder))
+ return Add;
+
+ // Turn (select C, (op X, Y), (op X, Z)) -> (op X, (select C, Y, Z))
+ auto *TI = dyn_cast<Instruction>(TrueVal);
+ auto *FI = dyn_cast<Instruction>(FalseVal);
+ if (TI && FI && TI->getOpcode() == FI->getOpcode())
+ if (Instruction *IV = FoldSelectOpOp(SI, TI, FI))
+ return IV;
+
+ // See if we can fold the select into one of our operands.
+ if (SelType->isIntOrIntVectorTy() || SelType->isFPOrFPVectorTy()) {
+ if (Instruction *FoldI = FoldSelectIntoOp(SI, TrueVal, FalseVal))
+ return FoldI;
+
+ Value *LHS, *RHS, *LHS2, *RHS2;
+ Instruction::CastOps CastOp;
+ SelectPatternResult SPR = matchSelectPattern(&SI, LHS, RHS, &CastOp);
+ auto SPF = SPR.Flavor;
+
+ if (SelectPatternResult::isMinOrMax(SPF)) {
+ // Canonicalize so that type casts are outside select patterns.
+ if (LHS->getType()->getPrimitiveSizeInBits() !=
+ SelType->getPrimitiveSizeInBits()) {
+ CmpInst::Predicate Pred = getCmpPredicateForMinMax(SPF, SPR.Ordered);
+
+ Value *Cmp;
+ if (CmpInst::isIntPredicate(Pred)) {
+ Cmp = Builder->CreateICmp(Pred, LHS, RHS);
+ } else {
+ IRBuilder<>::FastMathFlagGuard FMFG(*Builder);
+ auto FMF = cast<FPMathOperator>(SI.getCondition())->getFastMathFlags();
+ Builder->setFastMathFlags(FMF);
+ Cmp = Builder->CreateFCmp(Pred, LHS, RHS);
+ }
+
+ Value *NewSI = Builder->CreateCast(CastOp,
+ Builder->CreateSelect(Cmp, LHS, RHS),
+ SelType);
+ return replaceInstUsesWith(SI, NewSI);
+ }
+ }
+
+ if (SPF) {
+ // MAX(MAX(a, b), a) -> MAX(a, b)
+ // MIN(MIN(a, b), a) -> MIN(a, b)
+ // MAX(MIN(a, b), a) -> a
+ // MIN(MAX(a, b), a) -> a
+ // ABS(ABS(a)) -> ABS(a)
+ // NABS(NABS(a)) -> NABS(a)
+ if (SelectPatternFlavor SPF2 = matchSelectPattern(LHS, LHS2, RHS2).Flavor)
+ if (Instruction *R = FoldSPFofSPF(cast<Instruction>(LHS),SPF2,LHS2,RHS2,
+ SI, SPF, RHS))
+ return R;
+ if (SelectPatternFlavor SPF2 = matchSelectPattern(RHS, LHS2, RHS2).Flavor)
+ if (Instruction *R = FoldSPFofSPF(cast<Instruction>(RHS),SPF2,LHS2,RHS2,
+ SI, SPF, LHS))
+ return R;
+ }
+
+ // MAX(~a, ~b) -> ~MIN(a, b)
+ if (SPF == SPF_SMAX || SPF == SPF_UMAX) {
+ if (IsFreeToInvert(LHS, LHS->hasNUses(2)) &&
+ IsFreeToInvert(RHS, RHS->hasNUses(2))) {
+
+ // This transform adds a xor operation and that extra cost needs to be
+ // justified. We look for simplifications that will result from
+ // applying this rule:
+
+ bool Profitable =
+ (LHS->hasNUses(2) && match(LHS, m_Not(m_Value()))) ||
+ (RHS->hasNUses(2) && match(RHS, m_Not(m_Value()))) ||
+ (SI.hasOneUse() && match(*SI.user_begin(), m_Not(m_Value())));
+
+ if (Profitable) {
+ Value *NewLHS = Builder->CreateNot(LHS);
+ Value *NewRHS = Builder->CreateNot(RHS);
+ Value *NewCmp = SPF == SPF_SMAX
+ ? Builder->CreateICmpSLT(NewLHS, NewRHS)
+ : Builder->CreateICmpULT(NewLHS, NewRHS);
+ Value *NewSI =
+ Builder->CreateNot(Builder->CreateSelect(NewCmp, NewLHS, NewRHS));
+ return replaceInstUsesWith(SI, NewSI);
+ }
+ }
+ }
+
+ // TODO.
+ // ABS(-X) -> ABS(X)
+ }
+
+ // See if we can fold the select into a phi node if the condition is a select.
+ if (isa<PHINode>(SI.getCondition()))
+ // The true/false values have to be live in the PHI predecessor's blocks.
+ if (CanSelectOperandBeMappingIntoPredBlock(TrueVal, SI) &&
+ CanSelectOperandBeMappingIntoPredBlock(FalseVal, SI))
+ if (Instruction *NV = FoldOpIntoPhi(SI))
+ return NV;
+
+ if (SelectInst *TrueSI = dyn_cast<SelectInst>(TrueVal)) {
+ if (TrueSI->getCondition()->getType() == CondVal->getType()) {
+ // select(C, select(C, a, b), c) -> select(C, a, c)
+ if (TrueSI->getCondition() == CondVal) {
+ if (SI.getTrueValue() == TrueSI->getTrueValue())
+ return nullptr;
+ SI.setOperand(1, TrueSI->getTrueValue());
+ return &SI;
+ }
+ // select(C0, select(C1, a, b), b) -> select(C0&C1, a, b)
+ // We choose this as normal form to enable folding on the And and shortening
+ // paths for the values (this helps GetUnderlyingObjects() for example).
+ if (TrueSI->getFalseValue() == FalseVal && TrueSI->hasOneUse()) {
+ Value *And = Builder->CreateAnd(CondVal, TrueSI->getCondition());
+ SI.setOperand(0, And);
+ SI.setOperand(1, TrueSI->getTrueValue());
+ return &SI;
+ }
+ }
+ }
+ if (SelectInst *FalseSI = dyn_cast<SelectInst>(FalseVal)) {
+ if (FalseSI->getCondition()->getType() == CondVal->getType()) {
+ // select(C, a, select(C, b, c)) -> select(C, a, c)
+ if (FalseSI->getCondition() == CondVal) {
+ if (SI.getFalseValue() == FalseSI->getFalseValue())
+ return nullptr;
+ SI.setOperand(2, FalseSI->getFalseValue());
+ return &SI;
+ }
+ // select(C0, a, select(C1, a, b)) -> select(C0|C1, a, b)
+ if (FalseSI->getTrueValue() == TrueVal && FalseSI->hasOneUse()) {
+ Value *Or = Builder->CreateOr(CondVal, FalseSI->getCondition());
+ SI.setOperand(0, Or);
+ SI.setOperand(2, FalseSI->getFalseValue());
+ return &SI;
+ }
+ }
+ }
+
+ if (BinaryOperator::isNot(CondVal)) {
+ SI.setOperand(0, BinaryOperator::getNotArgument(CondVal));
+ SI.setOperand(1, FalseVal);
+ SI.setOperand(2, TrueVal);
+ return &SI;
+ }
+
+ if (VectorType* VecTy = dyn_cast<VectorType>(SelType)) {
+ unsigned VWidth = VecTy->getNumElements();
+ APInt UndefElts(VWidth, 0);
+ APInt AllOnesEltMask(APInt::getAllOnesValue(VWidth));
+ if (Value *V = SimplifyDemandedVectorElts(&SI, AllOnesEltMask, UndefElts)) {
+ if (V != &SI)
+ return replaceInstUsesWith(SI, V);
+ return &SI;
+ }
+
+ if (isa<ConstantAggregateZero>(CondVal)) {
+ return replaceInstUsesWith(SI, FalseVal);
+ }
+ }
+
+ // See if we can determine the result of this select based on a dominating
+ // condition.
+ BasicBlock *Parent = SI.getParent();
+ if (BasicBlock *Dom = Parent->getSinglePredecessor()) {
+ auto *PBI = dyn_cast_or_null<BranchInst>(Dom->getTerminator());
+ if (PBI && PBI->isConditional() &&
+ PBI->getSuccessor(0) != PBI->getSuccessor(1) &&
+ (PBI->getSuccessor(0) == Parent || PBI->getSuccessor(1) == Parent)) {
+ bool CondIsFalse = PBI->getSuccessor(1) == Parent;
+ Optional<bool> Implication = isImpliedCondition(
+ PBI->getCondition(), SI.getCondition(), DL, CondIsFalse);
+ if (Implication) {
+ Value *V = *Implication ? TrueVal : FalseVal;
+ return replaceInstUsesWith(SI, V);
+ }
+ }
+ }
+
+ return nullptr;
+}
diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp
new file mode 100644
index 000000000000..08e16a7ee1af
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp
@@ -0,0 +1,831 @@
+//===- InstCombineShifts.cpp ----------------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the visitShl, visitLShr, and visitAShr functions.
+//
+//===----------------------------------------------------------------------===//
+
+#include "InstCombineInternal.h"
+#include "llvm/Analysis/ConstantFolding.h"
+#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/PatternMatch.h"
+using namespace llvm;
+using namespace PatternMatch;
+
+#define DEBUG_TYPE "instcombine"
+
+Instruction *InstCombiner::commonShiftTransforms(BinaryOperator &I) {
+ assert(I.getOperand(1)->getType() == I.getOperand(0)->getType());
+ Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
+
+ // See if we can fold away this shift.
+ if (SimplifyDemandedInstructionBits(I))
+ return &I;
+
+ // Try to fold constant and into select arguments.
+ if (isa<Constant>(Op0))
+ if (SelectInst *SI = dyn_cast<SelectInst>(Op1))
+ if (Instruction *R = FoldOpIntoSelect(I, SI))
+ return R;
+
+ if (Constant *CUI = dyn_cast<Constant>(Op1))
+ if (Instruction *Res = FoldShiftByConstant(Op0, CUI, I))
+ return Res;
+
+ // X shift (A srem B) -> X shift (A and B-1) iff B is a power of 2.
+ // Because shifts by negative values (which could occur if A were negative)
+ // are undefined.
+ Value *A; const APInt *B;
+ if (Op1->hasOneUse() && match(Op1, m_SRem(m_Value(A), m_Power2(B)))) {
+ // FIXME: Should this get moved into SimplifyDemandedBits by saying we don't
+ // demand the sign bit (and many others) here??
+ Value *Rem = Builder->CreateAnd(A, ConstantInt::get(I.getType(), *B-1),
+ Op1->getName());
+ I.setOperand(1, Rem);
+ return &I;
+ }
+
+ return nullptr;
+}
+
+/// Return true if we can simplify two logical (either left or right) shifts
+/// that have constant shift amounts.
+static bool canEvaluateShiftedShift(unsigned FirstShiftAmt,
+ bool IsFirstShiftLeft,
+ Instruction *SecondShift, InstCombiner &IC,
+ Instruction *CxtI) {
+ assert(SecondShift->isLogicalShift() && "Unexpected instruction type");
+
+ // We need constant shifts.
+ auto *SecondShiftConst = dyn_cast<ConstantInt>(SecondShift->getOperand(1));
+ if (!SecondShiftConst)
+ return false;
+
+ unsigned SecondShiftAmt = SecondShiftConst->getZExtValue();
+ bool IsSecondShiftLeft = SecondShift->getOpcode() == Instruction::Shl;
+
+ // We can always fold shl(c1) + shl(c2) -> shl(c1+c2).
+ // We can always fold lshr(c1) + lshr(c2) -> lshr(c1+c2).
+ if (IsFirstShiftLeft == IsSecondShiftLeft)
+ return true;
+
+ // We can always fold lshr(c) + shl(c) -> and(c2).
+ // We can always fold shl(c) + lshr(c) -> and(c2).
+ if (FirstShiftAmt == SecondShiftAmt)
+ return true;
+
+ unsigned TypeWidth = SecondShift->getType()->getScalarSizeInBits();
+
+ // If the 2nd shift is bigger than the 1st, we can fold:
+ // lshr(c1) + shl(c2) -> shl(c3) + and(c4) or
+ // shl(c1) + lshr(c2) -> lshr(c3) + and(c4),
+ // but it isn't profitable unless we know the and'd out bits are already zero.
+ // Also check that the 2nd shift is valid (less than the type width) or we'll
+ // crash trying to produce the bit mask for the 'and'.
+ if (SecondShiftAmt > FirstShiftAmt && SecondShiftAmt < TypeWidth) {
+ unsigned MaskShift = IsSecondShiftLeft ? TypeWidth - SecondShiftAmt
+ : SecondShiftAmt - FirstShiftAmt;
+ APInt Mask = APInt::getLowBitsSet(TypeWidth, FirstShiftAmt) << MaskShift;
+ if (IC.MaskedValueIsZero(SecondShift->getOperand(0), Mask, 0, CxtI))
+ return true;
+ }
+
+ return false;
+}
+
+/// See if we can compute the specified value, but shifted
+/// logically to the left or right by some number of bits. This should return
+/// true if the expression can be computed for the same cost as the current
+/// expression tree. This is used to eliminate extraneous shifting from things
+/// like:
+/// %C = shl i128 %A, 64
+/// %D = shl i128 %B, 96
+/// %E = or i128 %C, %D
+/// %F = lshr i128 %E, 64
+/// where the client will ask if E can be computed shifted right by 64-bits. If
+/// this succeeds, the GetShiftedValue function will be called to produce the
+/// value.
+static bool CanEvaluateShifted(Value *V, unsigned NumBits, bool IsLeftShift,
+ InstCombiner &IC, Instruction *CxtI) {
+ // We can always evaluate constants shifted.
+ if (isa<Constant>(V))
+ return true;
+
+ Instruction *I = dyn_cast<Instruction>(V);
+ if (!I) return false;
+
+ // If this is the opposite shift, we can directly reuse the input of the shift
+ // if the needed bits are already zero in the input. This allows us to reuse
+ // the value which means that we don't care if the shift has multiple uses.
+ // TODO: Handle opposite shift by exact value.
+ ConstantInt *CI = nullptr;
+ if ((IsLeftShift && match(I, m_LShr(m_Value(), m_ConstantInt(CI)))) ||
+ (!IsLeftShift && match(I, m_Shl(m_Value(), m_ConstantInt(CI))))) {
+ if (CI->getZExtValue() == NumBits) {
+ // TODO: Check that the input bits are already zero with MaskedValueIsZero
+#if 0
+ // If this is a truncate of a logical shr, we can truncate it to a smaller
+ // lshr iff we know that the bits we would otherwise be shifting in are
+ // already zeros.
+ uint32_t OrigBitWidth = OrigTy->getScalarSizeInBits();
+ uint32_t BitWidth = Ty->getScalarSizeInBits();
+ if (MaskedValueIsZero(I->getOperand(0),
+ APInt::getHighBitsSet(OrigBitWidth, OrigBitWidth-BitWidth)) &&
+ CI->getLimitedValue(BitWidth) < BitWidth) {
+ return CanEvaluateTruncated(I->getOperand(0), Ty);
+ }
+#endif
+
+ }
+ }
+
+ // We can't mutate something that has multiple uses: doing so would
+ // require duplicating the instruction in general, which isn't profitable.
+ if (!I->hasOneUse()) return false;
+
+ switch (I->getOpcode()) {
+ default: return false;
+ case Instruction::And:
+ case Instruction::Or:
+ case Instruction::Xor:
+ // Bitwise operators can all arbitrarily be arbitrarily evaluated shifted.
+ return CanEvaluateShifted(I->getOperand(0), NumBits, IsLeftShift, IC, I) &&
+ CanEvaluateShifted(I->getOperand(1), NumBits, IsLeftShift, IC, I);
+
+ case Instruction::Shl:
+ case Instruction::LShr:
+ return canEvaluateShiftedShift(NumBits, IsLeftShift, I, IC, CxtI);
+
+ case Instruction::Select: {
+ SelectInst *SI = cast<SelectInst>(I);
+ Value *TrueVal = SI->getTrueValue();
+ Value *FalseVal = SI->getFalseValue();
+ return CanEvaluateShifted(TrueVal, NumBits, IsLeftShift, IC, SI) &&
+ CanEvaluateShifted(FalseVal, NumBits, IsLeftShift, IC, SI);
+ }
+ case Instruction::PHI: {
+ // We can change a phi if we can change all operands. Note that we never
+ // get into trouble with cyclic PHIs here because we only consider
+ // instructions with a single use.
+ PHINode *PN = cast<PHINode>(I);
+ for (Value *IncValue : PN->incoming_values())
+ if (!CanEvaluateShifted(IncValue, NumBits, IsLeftShift, IC, PN))
+ return false;
+ return true;
+ }
+ }
+}
+
+/// When CanEvaluateShifted returned true for an expression,
+/// this value inserts the new computation that produces the shifted value.
+static Value *GetShiftedValue(Value *V, unsigned NumBits, bool isLeftShift,
+ InstCombiner &IC, const DataLayout &DL) {
+ // We can always evaluate constants shifted.
+ if (Constant *C = dyn_cast<Constant>(V)) {
+ if (isLeftShift)
+ V = IC.Builder->CreateShl(C, NumBits);
+ else
+ V = IC.Builder->CreateLShr(C, NumBits);
+ // If we got a constantexpr back, try to simplify it with TD info.
+ if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V))
+ V = ConstantFoldConstantExpression(CE, DL, IC.getTargetLibraryInfo());
+ return V;
+ }
+
+ Instruction *I = cast<Instruction>(V);
+ IC.Worklist.Add(I);
+
+ switch (I->getOpcode()) {
+ default: llvm_unreachable("Inconsistency with CanEvaluateShifted");
+ case Instruction::And:
+ case Instruction::Or:
+ case Instruction::Xor:
+ // Bitwise operators can all arbitrarily be arbitrarily evaluated shifted.
+ I->setOperand(
+ 0, GetShiftedValue(I->getOperand(0), NumBits, isLeftShift, IC, DL));
+ I->setOperand(
+ 1, GetShiftedValue(I->getOperand(1), NumBits, isLeftShift, IC, DL));
+ return I;
+
+ case Instruction::Shl: {
+ BinaryOperator *BO = cast<BinaryOperator>(I);
+ unsigned TypeWidth = BO->getType()->getScalarSizeInBits();
+
+ // We only accept shifts-by-a-constant in CanEvaluateShifted.
+ ConstantInt *CI = cast<ConstantInt>(BO->getOperand(1));
+
+ // We can always fold shl(c1)+shl(c2) -> shl(c1+c2).
+ if (isLeftShift) {
+ // If this is oversized composite shift, then unsigned shifts get 0.
+ unsigned NewShAmt = NumBits+CI->getZExtValue();
+ if (NewShAmt >= TypeWidth)
+ return Constant::getNullValue(I->getType());
+
+ BO->setOperand(1, ConstantInt::get(BO->getType(), NewShAmt));
+ BO->setHasNoUnsignedWrap(false);
+ BO->setHasNoSignedWrap(false);
+ return I;
+ }
+
+ // We turn shl(c)+lshr(c) -> and(c2) if the input doesn't already have
+ // zeros.
+ if (CI->getValue() == NumBits) {
+ APInt Mask(APInt::getLowBitsSet(TypeWidth, TypeWidth - NumBits));
+ V = IC.Builder->CreateAnd(BO->getOperand(0),
+ ConstantInt::get(BO->getContext(), Mask));
+ if (Instruction *VI = dyn_cast<Instruction>(V)) {
+ VI->moveBefore(BO);
+ VI->takeName(BO);
+ }
+ return V;
+ }
+
+ // We turn shl(c1)+shr(c2) -> shl(c3)+and(c4), but only when we know that
+ // the and won't be needed.
+ assert(CI->getZExtValue() > NumBits);
+ BO->setOperand(1, ConstantInt::get(BO->getType(),
+ CI->getZExtValue() - NumBits));
+ BO->setHasNoUnsignedWrap(false);
+ BO->setHasNoSignedWrap(false);
+ return BO;
+ }
+ // FIXME: This is almost identical to the SHL case. Refactor both cases into
+ // a helper function.
+ case Instruction::LShr: {
+ BinaryOperator *BO = cast<BinaryOperator>(I);
+ unsigned TypeWidth = BO->getType()->getScalarSizeInBits();
+ // We only accept shifts-by-a-constant in CanEvaluateShifted.
+ ConstantInt *CI = cast<ConstantInt>(BO->getOperand(1));
+
+ // We can always fold lshr(c1)+lshr(c2) -> lshr(c1+c2).
+ if (!isLeftShift) {
+ // If this is oversized composite shift, then unsigned shifts get 0.
+ unsigned NewShAmt = NumBits+CI->getZExtValue();
+ if (NewShAmt >= TypeWidth)
+ return Constant::getNullValue(BO->getType());
+
+ BO->setOperand(1, ConstantInt::get(BO->getType(), NewShAmt));
+ BO->setIsExact(false);
+ return I;
+ }
+
+ // We turn lshr(c)+shl(c) -> and(c2) if the input doesn't already have
+ // zeros.
+ if (CI->getValue() == NumBits) {
+ APInt Mask(APInt::getHighBitsSet(TypeWidth, TypeWidth - NumBits));
+ V = IC.Builder->CreateAnd(I->getOperand(0),
+ ConstantInt::get(BO->getContext(), Mask));
+ if (Instruction *VI = dyn_cast<Instruction>(V)) {
+ VI->moveBefore(I);
+ VI->takeName(I);
+ }
+ return V;
+ }
+
+ // We turn lshr(c1)+shl(c2) -> lshr(c3)+and(c4), but only when we know that
+ // the and won't be needed.
+ assert(CI->getZExtValue() > NumBits);
+ BO->setOperand(1, ConstantInt::get(BO->getType(),
+ CI->getZExtValue() - NumBits));
+ BO->setIsExact(false);
+ return BO;
+ }
+
+ case Instruction::Select:
+ I->setOperand(
+ 1, GetShiftedValue(I->getOperand(1), NumBits, isLeftShift, IC, DL));
+ I->setOperand(
+ 2, GetShiftedValue(I->getOperand(2), NumBits, isLeftShift, IC, DL));
+ return I;
+ case Instruction::PHI: {
+ // We can change a phi if we can change all operands. Note that we never
+ // get into trouble with cyclic PHIs here because we only consider
+ // instructions with a single use.
+ PHINode *PN = cast<PHINode>(I);
+ for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
+ PN->setIncomingValue(i, GetShiftedValue(PN->getIncomingValue(i), NumBits,
+ isLeftShift, IC, DL));
+ return PN;
+ }
+ }
+}
+
+
+
+Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, Constant *Op1,
+ BinaryOperator &I) {
+ bool isLeftShift = I.getOpcode() == Instruction::Shl;
+
+ ConstantInt *COp1 = nullptr;
+ if (ConstantDataVector *CV = dyn_cast<ConstantDataVector>(Op1))
+ COp1 = dyn_cast_or_null<ConstantInt>(CV->getSplatValue());
+ else if (ConstantVector *CV = dyn_cast<ConstantVector>(Op1))
+ COp1 = dyn_cast_or_null<ConstantInt>(CV->getSplatValue());
+ else
+ COp1 = dyn_cast<ConstantInt>(Op1);
+
+ if (!COp1)
+ return nullptr;
+
+ // See if we can propagate this shift into the input, this covers the trivial
+ // cast of lshr(shl(x,c1),c2) as well as other more complex cases.
+ if (I.getOpcode() != Instruction::AShr &&
+ CanEvaluateShifted(Op0, COp1->getZExtValue(), isLeftShift, *this, &I)) {
+ DEBUG(dbgs() << "ICE: GetShiftedValue propagating shift through expression"
+ " to eliminate shift:\n IN: " << *Op0 << "\n SH: " << I <<"\n");
+
+ return replaceInstUsesWith(
+ I, GetShiftedValue(Op0, COp1->getZExtValue(), isLeftShift, *this, DL));
+ }
+
+ // See if we can simplify any instructions used by the instruction whose sole
+ // purpose is to compute bits we don't care about.
+ uint32_t TypeBits = Op0->getType()->getScalarSizeInBits();
+
+ assert(!COp1->uge(TypeBits) &&
+ "Shift over the type width should have been removed already");
+
+ // ((X*C1) << C2) == (X * (C1 << C2))
+ if (BinaryOperator *BO = dyn_cast<BinaryOperator>(Op0))
+ if (BO->getOpcode() == Instruction::Mul && isLeftShift)
+ if (Constant *BOOp = dyn_cast<Constant>(BO->getOperand(1)))
+ return BinaryOperator::CreateMul(BO->getOperand(0),
+ ConstantExpr::getShl(BOOp, Op1));
+
+ // Try to fold constant and into select arguments.
+ if (SelectInst *SI = dyn_cast<SelectInst>(Op0))
+ if (Instruction *R = FoldOpIntoSelect(I, SI))
+ return R;
+ if (isa<PHINode>(Op0))
+ if (Instruction *NV = FoldOpIntoPhi(I))
+ return NV;
+
+ // Fold shift2(trunc(shift1(x,c1)), c2) -> trunc(shift2(shift1(x,c1),c2))
+ if (TruncInst *TI = dyn_cast<TruncInst>(Op0)) {
+ Instruction *TrOp = dyn_cast<Instruction>(TI->getOperand(0));
+ // If 'shift2' is an ashr, we would have to get the sign bit into a funny
+ // place. Don't try to do this transformation in this case. Also, we
+ // require that the input operand is a shift-by-constant so that we have
+ // confidence that the shifts will get folded together. We could do this
+ // xform in more cases, but it is unlikely to be profitable.
+ if (TrOp && I.isLogicalShift() && TrOp->isShift() &&
+ isa<ConstantInt>(TrOp->getOperand(1))) {
+ // Okay, we'll do this xform. Make the shift of shift.
+ Constant *ShAmt = ConstantExpr::getZExt(COp1, TrOp->getType());
+ // (shift2 (shift1 & 0x00FF), c2)
+ Value *NSh = Builder->CreateBinOp(I.getOpcode(), TrOp, ShAmt,I.getName());
+
+ // For logical shifts, the truncation has the effect of making the high
+ // part of the register be zeros. Emulate this by inserting an AND to
+ // clear the top bits as needed. This 'and' will usually be zapped by
+ // other xforms later if dead.
+ unsigned SrcSize = TrOp->getType()->getScalarSizeInBits();
+ unsigned DstSize = TI->getType()->getScalarSizeInBits();
+ APInt MaskV(APInt::getLowBitsSet(SrcSize, DstSize));
+
+ // The mask we constructed says what the trunc would do if occurring
+ // between the shifts. We want to know the effect *after* the second
+ // shift. We know that it is a logical shift by a constant, so adjust the
+ // mask as appropriate.
+ if (I.getOpcode() == Instruction::Shl)
+ MaskV <<= COp1->getZExtValue();
+ else {
+ assert(I.getOpcode() == Instruction::LShr && "Unknown logical shift");
+ MaskV = MaskV.lshr(COp1->getZExtValue());
+ }
+
+ // shift1 & 0x00FF
+ Value *And = Builder->CreateAnd(NSh,
+ ConstantInt::get(I.getContext(), MaskV),
+ TI->getName());
+
+ // Return the value truncated to the interesting size.
+ return new TruncInst(And, I.getType());
+ }
+ }
+
+ if (Op0->hasOneUse()) {
+ if (BinaryOperator *Op0BO = dyn_cast<BinaryOperator>(Op0)) {
+ // Turn ((X >> C) + Y) << C -> (X + (Y << C)) & (~0 << C)
+ Value *V1, *V2;
+ ConstantInt *CC;
+ switch (Op0BO->getOpcode()) {
+ default: break;
+ case Instruction::Add:
+ case Instruction::And:
+ case Instruction::Or:
+ case Instruction::Xor: {
+ // These operators commute.
+ // Turn (Y + (X >> C)) << C -> (X + (Y << C)) & (~0 << C)
+ if (isLeftShift && Op0BO->getOperand(1)->hasOneUse() &&
+ match(Op0BO->getOperand(1), m_Shr(m_Value(V1),
+ m_Specific(Op1)))) {
+ Value *YS = // (Y << C)
+ Builder->CreateShl(Op0BO->getOperand(0), Op1, Op0BO->getName());
+ // (X + (Y << C))
+ Value *X = Builder->CreateBinOp(Op0BO->getOpcode(), YS, V1,
+ Op0BO->getOperand(1)->getName());
+ uint32_t Op1Val = COp1->getLimitedValue(TypeBits);
+
+ APInt Bits = APInt::getHighBitsSet(TypeBits, TypeBits - Op1Val);
+ Constant *Mask = ConstantInt::get(I.getContext(), Bits);
+ if (VectorType *VT = dyn_cast<VectorType>(X->getType()))
+ Mask = ConstantVector::getSplat(VT->getNumElements(), Mask);
+ return BinaryOperator::CreateAnd(X, Mask);
+ }
+
+ // Turn (Y + ((X >> C) & CC)) << C -> ((X & (CC << C)) + (Y << C))
+ Value *Op0BOOp1 = Op0BO->getOperand(1);
+ if (isLeftShift && Op0BOOp1->hasOneUse() &&
+ match(Op0BOOp1,
+ m_And(m_OneUse(m_Shr(m_Value(V1), m_Specific(Op1))),
+ m_ConstantInt(CC)))) {
+ Value *YS = // (Y << C)
+ Builder->CreateShl(Op0BO->getOperand(0), Op1,
+ Op0BO->getName());
+ // X & (CC << C)
+ Value *XM = Builder->CreateAnd(V1, ConstantExpr::getShl(CC, Op1),
+ V1->getName()+".mask");
+ return BinaryOperator::Create(Op0BO->getOpcode(), YS, XM);
+ }
+ }
+
+ // FALL THROUGH.
+ case Instruction::Sub: {
+ // Turn ((X >> C) + Y) << C -> (X + (Y << C)) & (~0 << C)
+ if (isLeftShift && Op0BO->getOperand(0)->hasOneUse() &&
+ match(Op0BO->getOperand(0), m_Shr(m_Value(V1),
+ m_Specific(Op1)))) {
+ Value *YS = // (Y << C)
+ Builder->CreateShl(Op0BO->getOperand(1), Op1, Op0BO->getName());
+ // (X + (Y << C))
+ Value *X = Builder->CreateBinOp(Op0BO->getOpcode(), V1, YS,
+ Op0BO->getOperand(0)->getName());
+ uint32_t Op1Val = COp1->getLimitedValue(TypeBits);
+
+ APInt Bits = APInt::getHighBitsSet(TypeBits, TypeBits - Op1Val);
+ Constant *Mask = ConstantInt::get(I.getContext(), Bits);
+ if (VectorType *VT = dyn_cast<VectorType>(X->getType()))
+ Mask = ConstantVector::getSplat(VT->getNumElements(), Mask);
+ return BinaryOperator::CreateAnd(X, Mask);
+ }
+
+ // Turn (((X >> C)&CC) + Y) << C -> (X + (Y << C)) & (CC << C)
+ if (isLeftShift && Op0BO->getOperand(0)->hasOneUse() &&
+ match(Op0BO->getOperand(0),
+ m_And(m_OneUse(m_Shr(m_Value(V1), m_Value(V2))),
+ m_ConstantInt(CC))) && V2 == Op1) {
+ Value *YS = // (Y << C)
+ Builder->CreateShl(Op0BO->getOperand(1), Op1, Op0BO->getName());
+ // X & (CC << C)
+ Value *XM = Builder->CreateAnd(V1, ConstantExpr::getShl(CC, Op1),
+ V1->getName()+".mask");
+
+ return BinaryOperator::Create(Op0BO->getOpcode(), XM, YS);
+ }
+
+ break;
+ }
+ }
+
+
+ // If the operand is a bitwise operator with a constant RHS, and the
+ // shift is the only use, we can pull it out of the shift.
+ if (ConstantInt *Op0C = dyn_cast<ConstantInt>(Op0BO->getOperand(1))) {
+ bool isValid = true; // Valid only for And, Or, Xor
+ bool highBitSet = false; // Transform if high bit of constant set?
+
+ switch (Op0BO->getOpcode()) {
+ default: isValid = false; break; // Do not perform transform!
+ case Instruction::Add:
+ isValid = isLeftShift;
+ break;
+ case Instruction::Or:
+ case Instruction::Xor:
+ highBitSet = false;
+ break;
+ case Instruction::And:
+ highBitSet = true;
+ break;
+ }
+
+ // If this is a signed shift right, and the high bit is modified
+ // by the logical operation, do not perform the transformation.
+ // The highBitSet boolean indicates the value of the high bit of
+ // the constant which would cause it to be modified for this
+ // operation.
+ //
+ if (isValid && I.getOpcode() == Instruction::AShr)
+ isValid = Op0C->getValue()[TypeBits-1] == highBitSet;
+
+ if (isValid) {
+ Constant *NewRHS = ConstantExpr::get(I.getOpcode(), Op0C, Op1);
+
+ Value *NewShift =
+ Builder->CreateBinOp(I.getOpcode(), Op0BO->getOperand(0), Op1);
+ NewShift->takeName(Op0BO);
+
+ return BinaryOperator::Create(Op0BO->getOpcode(), NewShift,
+ NewRHS);
+ }
+ }
+ }
+ }
+
+ // Find out if this is a shift of a shift by a constant.
+ BinaryOperator *ShiftOp = dyn_cast<BinaryOperator>(Op0);
+ if (ShiftOp && !ShiftOp->isShift())
+ ShiftOp = nullptr;
+
+ if (ShiftOp && isa<ConstantInt>(ShiftOp->getOperand(1))) {
+
+ // This is a constant shift of a constant shift. Be careful about hiding
+ // shl instructions behind bit masks. They are used to represent multiplies
+ // by a constant, and it is important that simple arithmetic expressions
+ // are still recognizable by scalar evolution.
+ //
+ // The transforms applied to shl are very similar to the transforms applied
+ // to mul by constant. We can be more aggressive about optimizing right
+ // shifts.
+ //
+ // Combinations of right and left shifts will still be optimized in
+ // DAGCombine where scalar evolution no longer applies.
+
+ ConstantInt *ShiftAmt1C = cast<ConstantInt>(ShiftOp->getOperand(1));
+ uint32_t ShiftAmt1 = ShiftAmt1C->getLimitedValue(TypeBits);
+ uint32_t ShiftAmt2 = COp1->getLimitedValue(TypeBits);
+ assert(ShiftAmt2 != 0 && "Should have been simplified earlier");
+ if (ShiftAmt1 == 0) return nullptr; // Will be simplified in the future.
+ Value *X = ShiftOp->getOperand(0);
+
+ IntegerType *Ty = cast<IntegerType>(I.getType());
+
+ // Check for (X << c1) << c2 and (X >> c1) >> c2
+ if (I.getOpcode() == ShiftOp->getOpcode()) {
+ uint32_t AmtSum = ShiftAmt1+ShiftAmt2; // Fold into one big shift.
+ // If this is oversized composite shift, then unsigned shifts get 0, ashr
+ // saturates.
+ if (AmtSum >= TypeBits) {
+ if (I.getOpcode() != Instruction::AShr)
+ return replaceInstUsesWith(I, Constant::getNullValue(I.getType()));
+ AmtSum = TypeBits-1; // Saturate to 31 for i32 ashr.
+ }
+
+ return BinaryOperator::Create(I.getOpcode(), X,
+ ConstantInt::get(Ty, AmtSum));
+ }
+
+ if (ShiftAmt1 == ShiftAmt2) {
+ // If we have ((X << C) >>u C), turn this into X & (-1 >>u C).
+ if (I.getOpcode() == Instruction::LShr &&
+ ShiftOp->getOpcode() == Instruction::Shl) {
+ APInt Mask(APInt::getLowBitsSet(TypeBits, TypeBits - ShiftAmt1));
+ return BinaryOperator::CreateAnd(X,
+ ConstantInt::get(I.getContext(), Mask));
+ }
+ } else if (ShiftAmt1 < ShiftAmt2) {
+ uint32_t ShiftDiff = ShiftAmt2-ShiftAmt1;
+
+ // (X >>?,exact C1) << C2 --> X << (C2-C1)
+ // The inexact version is deferred to DAGCombine so we don't hide shl
+ // behind a bit mask.
+ if (I.getOpcode() == Instruction::Shl &&
+ ShiftOp->getOpcode() != Instruction::Shl &&
+ ShiftOp->isExact()) {
+ assert(ShiftOp->getOpcode() == Instruction::LShr ||
+ ShiftOp->getOpcode() == Instruction::AShr);
+ ConstantInt *ShiftDiffCst = ConstantInt::get(Ty, ShiftDiff);
+ BinaryOperator *NewShl = BinaryOperator::Create(Instruction::Shl,
+ X, ShiftDiffCst);
+ NewShl->setHasNoUnsignedWrap(I.hasNoUnsignedWrap());
+ NewShl->setHasNoSignedWrap(I.hasNoSignedWrap());
+ return NewShl;
+ }
+
+ // (X << C1) >>u C2 --> X >>u (C2-C1) & (-1 >> C2)
+ if (I.getOpcode() == Instruction::LShr &&
+ ShiftOp->getOpcode() == Instruction::Shl) {
+ ConstantInt *ShiftDiffCst = ConstantInt::get(Ty, ShiftDiff);
+ // (X <<nuw C1) >>u C2 --> X >>u (C2-C1)
+ if (ShiftOp->hasNoUnsignedWrap()) {
+ BinaryOperator *NewLShr = BinaryOperator::Create(Instruction::LShr,
+ X, ShiftDiffCst);
+ NewLShr->setIsExact(I.isExact());
+ return NewLShr;
+ }
+ Value *Shift = Builder->CreateLShr(X, ShiftDiffCst);
+
+ APInt Mask(APInt::getLowBitsSet(TypeBits, TypeBits - ShiftAmt2));
+ return BinaryOperator::CreateAnd(Shift,
+ ConstantInt::get(I.getContext(),Mask));
+ }
+
+ // We can't handle (X << C1) >>s C2, it shifts arbitrary bits in. However,
+ // we can handle (X <<nsw C1) >>s C2 since it only shifts in sign bits.
+ if (I.getOpcode() == Instruction::AShr &&
+ ShiftOp->getOpcode() == Instruction::Shl) {
+ if (ShiftOp->hasNoSignedWrap()) {
+ // (X <<nsw C1) >>s C2 --> X >>s (C2-C1)
+ ConstantInt *ShiftDiffCst = ConstantInt::get(Ty, ShiftDiff);
+ BinaryOperator *NewAShr = BinaryOperator::Create(Instruction::AShr,
+ X, ShiftDiffCst);
+ NewAShr->setIsExact(I.isExact());
+ return NewAShr;
+ }
+ }
+ } else {
+ assert(ShiftAmt2 < ShiftAmt1);
+ uint32_t ShiftDiff = ShiftAmt1-ShiftAmt2;
+
+ // (X >>?exact C1) << C2 --> X >>?exact (C1-C2)
+ // The inexact version is deferred to DAGCombine so we don't hide shl
+ // behind a bit mask.
+ if (I.getOpcode() == Instruction::Shl &&
+ ShiftOp->getOpcode() != Instruction::Shl &&
+ ShiftOp->isExact()) {
+ ConstantInt *ShiftDiffCst = ConstantInt::get(Ty, ShiftDiff);
+ BinaryOperator *NewShr = BinaryOperator::Create(ShiftOp->getOpcode(),
+ X, ShiftDiffCst);
+ NewShr->setIsExact(true);
+ return NewShr;
+ }
+
+ // (X << C1) >>u C2 --> X << (C1-C2) & (-1 >> C2)
+ if (I.getOpcode() == Instruction::LShr &&
+ ShiftOp->getOpcode() == Instruction::Shl) {
+ ConstantInt *ShiftDiffCst = ConstantInt::get(Ty, ShiftDiff);
+ if (ShiftOp->hasNoUnsignedWrap()) {
+ // (X <<nuw C1) >>u C2 --> X <<nuw (C1-C2)
+ BinaryOperator *NewShl = BinaryOperator::Create(Instruction::Shl,
+ X, ShiftDiffCst);
+ NewShl->setHasNoUnsignedWrap(true);
+ return NewShl;
+ }
+ Value *Shift = Builder->CreateShl(X, ShiftDiffCst);
+
+ APInt Mask(APInt::getLowBitsSet(TypeBits, TypeBits - ShiftAmt2));
+ return BinaryOperator::CreateAnd(Shift,
+ ConstantInt::get(I.getContext(),Mask));
+ }
+
+ // We can't handle (X << C1) >>s C2, it shifts arbitrary bits in. However,
+ // we can handle (X <<nsw C1) >>s C2 since it only shifts in sign bits.
+ if (I.getOpcode() == Instruction::AShr &&
+ ShiftOp->getOpcode() == Instruction::Shl) {
+ if (ShiftOp->hasNoSignedWrap()) {
+ // (X <<nsw C1) >>s C2 --> X <<nsw (C1-C2)
+ ConstantInt *ShiftDiffCst = ConstantInt::get(Ty, ShiftDiff);
+ BinaryOperator *NewShl = BinaryOperator::Create(Instruction::Shl,
+ X, ShiftDiffCst);
+ NewShl->setHasNoSignedWrap(true);
+ return NewShl;
+ }
+ }
+ }
+ }
+ return nullptr;
+}
+
+Instruction *InstCombiner::visitShl(BinaryOperator &I) {
+ if (Value *V = SimplifyVectorOp(I))
+ return replaceInstUsesWith(I, V);
+
+ if (Value *V =
+ SimplifyShlInst(I.getOperand(0), I.getOperand(1), I.hasNoSignedWrap(),
+ I.hasNoUnsignedWrap(), DL, TLI, DT, AC))
+ return replaceInstUsesWith(I, V);
+
+ if (Instruction *V = commonShiftTransforms(I))
+ return V;
+
+ if (ConstantInt *Op1C = dyn_cast<ConstantInt>(I.getOperand(1))) {
+ unsigned ShAmt = Op1C->getZExtValue();
+
+ // If the shifted-out value is known-zero, then this is a NUW shift.
+ if (!I.hasNoUnsignedWrap() &&
+ MaskedValueIsZero(I.getOperand(0),
+ APInt::getHighBitsSet(Op1C->getBitWidth(), ShAmt), 0,
+ &I)) {
+ I.setHasNoUnsignedWrap();
+ return &I;
+ }
+
+ // If the shifted out value is all signbits, this is a NSW shift.
+ if (!I.hasNoSignedWrap() &&
+ ComputeNumSignBits(I.getOperand(0), 0, &I) > ShAmt) {
+ I.setHasNoSignedWrap();
+ return &I;
+ }
+ }
+
+ // (C1 << A) << C2 -> (C1 << C2) << A
+ Constant *C1, *C2;
+ Value *A;
+ if (match(I.getOperand(0), m_OneUse(m_Shl(m_Constant(C1), m_Value(A)))) &&
+ match(I.getOperand(1), m_Constant(C2)))
+ return BinaryOperator::CreateShl(ConstantExpr::getShl(C1, C2), A);
+
+ return nullptr;
+}
+
+Instruction *InstCombiner::visitLShr(BinaryOperator &I) {
+ if (Value *V = SimplifyVectorOp(I))
+ return replaceInstUsesWith(I, V);
+
+ if (Value *V = SimplifyLShrInst(I.getOperand(0), I.getOperand(1), I.isExact(),
+ DL, TLI, DT, AC))
+ return replaceInstUsesWith(I, V);
+
+ if (Instruction *R = commonShiftTransforms(I))
+ return R;
+
+ Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
+
+ if (ConstantInt *Op1C = dyn_cast<ConstantInt>(Op1)) {
+ unsigned ShAmt = Op1C->getZExtValue();
+
+ if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Op0)) {
+ unsigned BitWidth = Op0->getType()->getScalarSizeInBits();
+ // ctlz.i32(x)>>5 --> zext(x == 0)
+ // cttz.i32(x)>>5 --> zext(x == 0)
+ // ctpop.i32(x)>>5 --> zext(x == -1)
+ if ((II->getIntrinsicID() == Intrinsic::ctlz ||
+ II->getIntrinsicID() == Intrinsic::cttz ||
+ II->getIntrinsicID() == Intrinsic::ctpop) &&
+ isPowerOf2_32(BitWidth) && Log2_32(BitWidth) == ShAmt) {
+ bool isCtPop = II->getIntrinsicID() == Intrinsic::ctpop;
+ Constant *RHS = ConstantInt::getSigned(Op0->getType(), isCtPop ? -1:0);
+ Value *Cmp = Builder->CreateICmpEQ(II->getArgOperand(0), RHS);
+ return new ZExtInst(Cmp, II->getType());
+ }
+ }
+
+ // If the shifted-out value is known-zero, then this is an exact shift.
+ if (!I.isExact() &&
+ MaskedValueIsZero(Op0, APInt::getLowBitsSet(Op1C->getBitWidth(), ShAmt),
+ 0, &I)){
+ I.setIsExact();
+ return &I;
+ }
+ }
+
+ return nullptr;
+}
+
+Instruction *InstCombiner::visitAShr(BinaryOperator &I) {
+ if (Value *V = SimplifyVectorOp(I))
+ return replaceInstUsesWith(I, V);
+
+ if (Value *V = SimplifyAShrInst(I.getOperand(0), I.getOperand(1), I.isExact(),
+ DL, TLI, DT, AC))
+ return replaceInstUsesWith(I, V);
+
+ if (Instruction *R = commonShiftTransforms(I))
+ return R;
+
+ Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
+
+ if (ConstantInt *Op1C = dyn_cast<ConstantInt>(Op1)) {
+ unsigned ShAmt = Op1C->getZExtValue();
+
+ // If the input is a SHL by the same constant (ashr (shl X, C), C), then we
+ // have a sign-extend idiom.
+ Value *X;
+ if (match(Op0, m_Shl(m_Value(X), m_Specific(Op1)))) {
+ // If the input is an extension from the shifted amount value, e.g.
+ // %x = zext i8 %A to i32
+ // %y = shl i32 %x, 24
+ // %z = ashr %y, 24
+ // then turn this into "z = sext i8 A to i32".
+ if (ZExtInst *ZI = dyn_cast<ZExtInst>(X)) {
+ uint32_t SrcBits = ZI->getOperand(0)->getType()->getScalarSizeInBits();
+ uint32_t DestBits = ZI->getType()->getScalarSizeInBits();
+ if (Op1C->getZExtValue() == DestBits-SrcBits)
+ return new SExtInst(ZI->getOperand(0), ZI->getType());
+ }
+ }
+
+ // If the shifted-out value is known-zero, then this is an exact shift.
+ if (!I.isExact() &&
+ MaskedValueIsZero(Op0, APInt::getLowBitsSet(Op1C->getBitWidth(), ShAmt),
+ 0, &I)) {
+ I.setIsExact();
+ return &I;
+ }
+ }
+
+ // See if we can turn a signed shr into an unsigned shr.
+ if (MaskedValueIsZero(Op0,
+ APInt::getSignBit(I.getType()->getScalarSizeInBits()),
+ 0, &I))
+ return BinaryOperator::CreateLShr(Op0, Op1);
+
+ return nullptr;
+}
diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
new file mode 100644
index 000000000000..f3268d2c3471
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
@@ -0,0 +1,1333 @@
+//===- InstCombineSimplifyDemanded.cpp ------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains logic for simplifying instructions based on information
+// about how they are used.
+//
+//===----------------------------------------------------------------------===//
+
+#include "InstCombineInternal.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/PatternMatch.h"
+
+using namespace llvm;
+using namespace llvm::PatternMatch;
+
+#define DEBUG_TYPE "instcombine"
+
+/// Check to see if the specified operand of the specified instruction is a
+/// constant integer. If so, check to see if there are any bits set in the
+/// constant that are not demanded. If so, shrink the constant and return true.
+static bool ShrinkDemandedConstant(Instruction *I, unsigned OpNo,
+ APInt Demanded) {
+ assert(I && "No instruction?");
+ assert(OpNo < I->getNumOperands() && "Operand index too large");
+
+ // If the operand is not a constant integer, nothing to do.
+ ConstantInt *OpC = dyn_cast<ConstantInt>(I->getOperand(OpNo));
+ if (!OpC) return false;
+
+ // If there are no bits set that aren't demanded, nothing to do.
+ Demanded = Demanded.zextOrTrunc(OpC->getValue().getBitWidth());
+ if ((~Demanded & OpC->getValue()) == 0)
+ return false;
+
+ // This instruction is producing bits that are not demanded. Shrink the RHS.
+ Demanded &= OpC->getValue();
+ I->setOperand(OpNo, ConstantInt::get(OpC->getType(), Demanded));
+
+ return true;
+}
+
+
+
+/// Inst is an integer instruction that SimplifyDemandedBits knows about. See if
+/// the instruction has any properties that allow us to simplify its operands.
+bool InstCombiner::SimplifyDemandedInstructionBits(Instruction &Inst) {
+ unsigned BitWidth = Inst.getType()->getScalarSizeInBits();
+ APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0);
+ APInt DemandedMask(APInt::getAllOnesValue(BitWidth));
+
+ Value *V = SimplifyDemandedUseBits(&Inst, DemandedMask, KnownZero, KnownOne,
+ 0, &Inst);
+ if (!V) return false;
+ if (V == &Inst) return true;
+ replaceInstUsesWith(Inst, V);
+ return true;
+}
+
+/// This form of SimplifyDemandedBits simplifies the specified instruction
+/// operand if possible, updating it in place. It returns true if it made any
+/// change and false otherwise.
+bool InstCombiner::SimplifyDemandedBits(Use &U, const APInt &DemandedMask,
+ APInt &KnownZero, APInt &KnownOne,
+ unsigned Depth) {
+ auto *UserI = dyn_cast<Instruction>(U.getUser());
+ Value *NewVal = SimplifyDemandedUseBits(U.get(), DemandedMask, KnownZero,
+ KnownOne, Depth, UserI);
+ if (!NewVal) return false;
+ U = NewVal;
+ return true;
+}
+
+
+/// This function attempts to replace V with a simpler value based on the
+/// demanded bits. When this function is called, it is known that only the bits
+/// set in DemandedMask of the result of V are ever used downstream.
+/// Consequently, depending on the mask and V, it may be possible to replace V
+/// with a constant or one of its operands. In such cases, this function does
+/// the replacement and returns true. In all other cases, it returns false after
+/// analyzing the expression and setting KnownOne and known to be one in the
+/// expression. KnownZero contains all the bits that are known to be zero in the
+/// expression. These are provided to potentially allow the caller (which might
+/// recursively be SimplifyDemandedBits itself) to simplify the expression.
+/// KnownOne and KnownZero always follow the invariant that:
+/// KnownOne & KnownZero == 0.
+/// That is, a bit can't be both 1 and 0. Note that the bits in KnownOne and
+/// KnownZero may only be accurate for those bits set in DemandedMask. Note also
+/// that the bitwidth of V, DemandedMask, KnownZero and KnownOne must all be the
+/// same.
+///
+/// This returns null if it did not change anything and it permits no
+/// simplification. This returns V itself if it did some simplification of V's
+/// operands based on the information about what bits are demanded. This returns
+/// some other non-null value if it found out that V is equal to another value
+/// in the context where the specified bits are demanded, but not for all users.
+Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
+ APInt &KnownZero, APInt &KnownOne,
+ unsigned Depth,
+ Instruction *CxtI) {
+ assert(V != nullptr && "Null pointer of Value???");
+ assert(Depth <= 6 && "Limit Search Depth");
+ uint32_t BitWidth = DemandedMask.getBitWidth();
+ Type *VTy = V->getType();
+ assert(
+ (!VTy->isIntOrIntVectorTy() || VTy->getScalarSizeInBits() == BitWidth) &&
+ KnownZero.getBitWidth() == BitWidth &&
+ KnownOne.getBitWidth() == BitWidth &&
+ "Value *V, DemandedMask, KnownZero and KnownOne "
+ "must have same BitWidth");
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(V)) {
+ // We know all of the bits for a constant!
+ KnownOne = CI->getValue() & DemandedMask;
+ KnownZero = ~KnownOne & DemandedMask;
+ return nullptr;
+ }
+ if (isa<ConstantPointerNull>(V)) {
+ // We know all of the bits for a constant!
+ KnownOne.clearAllBits();
+ KnownZero = DemandedMask;
+ return nullptr;
+ }
+
+ KnownZero.clearAllBits();
+ KnownOne.clearAllBits();
+ if (DemandedMask == 0) { // Not demanding any bits from V.
+ if (isa<UndefValue>(V))
+ return nullptr;
+ return UndefValue::get(VTy);
+ }
+
+ if (Depth == 6) // Limit search depth.
+ return nullptr;
+
+ APInt LHSKnownZero(BitWidth, 0), LHSKnownOne(BitWidth, 0);
+ APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0);
+
+ Instruction *I = dyn_cast<Instruction>(V);
+ if (!I) {
+ computeKnownBits(V, KnownZero, KnownOne, Depth, CxtI);
+ return nullptr; // Only analyze instructions.
+ }
+
+ // If there are multiple uses of this value and we aren't at the root, then
+ // we can't do any simplifications of the operands, because DemandedMask
+ // only reflects the bits demanded by *one* of the users.
+ if (Depth != 0 && !I->hasOneUse()) {
+ // Despite the fact that we can't simplify this instruction in all User's
+ // context, we can at least compute the knownzero/knownone bits, and we can
+ // do simplifications that apply to *just* the one user if we know that
+ // this instruction has a simpler value in that context.
+ if (I->getOpcode() == Instruction::And) {
+ // If either the LHS or the RHS are Zero, the result is zero.
+ computeKnownBits(I->getOperand(1), RHSKnownZero, RHSKnownOne, Depth + 1,
+ CxtI);
+ computeKnownBits(I->getOperand(0), LHSKnownZero, LHSKnownOne, Depth + 1,
+ CxtI);
+
+ // If all of the demanded bits are known 1 on one side, return the other.
+ // These bits cannot contribute to the result of the 'and' in this
+ // context.
+ if ((DemandedMask & ~LHSKnownZero & RHSKnownOne) ==
+ (DemandedMask & ~LHSKnownZero))
+ return I->getOperand(0);
+ if ((DemandedMask & ~RHSKnownZero & LHSKnownOne) ==
+ (DemandedMask & ~RHSKnownZero))
+ return I->getOperand(1);
+
+ // If all of the demanded bits in the inputs are known zeros, return zero.
+ if ((DemandedMask & (RHSKnownZero|LHSKnownZero)) == DemandedMask)
+ return Constant::getNullValue(VTy);
+
+ } else if (I->getOpcode() == Instruction::Or) {
+ // We can simplify (X|Y) -> X or Y in the user's context if we know that
+ // only bits from X or Y are demanded.
+
+ // If either the LHS or the RHS are One, the result is One.
+ computeKnownBits(I->getOperand(1), RHSKnownZero, RHSKnownOne, Depth + 1,
+ CxtI);
+ computeKnownBits(I->getOperand(0), LHSKnownZero, LHSKnownOne, Depth + 1,
+ CxtI);
+
+ // If all of the demanded bits are known zero on one side, return the
+ // other. These bits cannot contribute to the result of the 'or' in this
+ // context.
+ if ((DemandedMask & ~LHSKnownOne & RHSKnownZero) ==
+ (DemandedMask & ~LHSKnownOne))
+ return I->getOperand(0);
+ if ((DemandedMask & ~RHSKnownOne & LHSKnownZero) ==
+ (DemandedMask & ~RHSKnownOne))
+ return I->getOperand(1);
+
+ // If all of the potentially set bits on one side are known to be set on
+ // the other side, just use the 'other' side.
+ if ((DemandedMask & (~RHSKnownZero) & LHSKnownOne) ==
+ (DemandedMask & (~RHSKnownZero)))
+ return I->getOperand(0);
+ if ((DemandedMask & (~LHSKnownZero) & RHSKnownOne) ==
+ (DemandedMask & (~LHSKnownZero)))
+ return I->getOperand(1);
+ } else if (I->getOpcode() == Instruction::Xor) {
+ // We can simplify (X^Y) -> X or Y in the user's context if we know that
+ // only bits from X or Y are demanded.
+
+ computeKnownBits(I->getOperand(1), RHSKnownZero, RHSKnownOne, Depth + 1,
+ CxtI);
+ computeKnownBits(I->getOperand(0), LHSKnownZero, LHSKnownOne, Depth + 1,
+ CxtI);
+
+ // If all of the demanded bits are known zero on one side, return the
+ // other.
+ if ((DemandedMask & RHSKnownZero) == DemandedMask)
+ return I->getOperand(0);
+ if ((DemandedMask & LHSKnownZero) == DemandedMask)
+ return I->getOperand(1);
+ }
+
+ // Compute the KnownZero/KnownOne bits to simplify things downstream.
+ computeKnownBits(I, KnownZero, KnownOne, Depth, CxtI);
+ return nullptr;
+ }
+
+ // If this is the root being simplified, allow it to have multiple uses,
+ // just set the DemandedMask to all bits so that we can try to simplify the
+ // operands. This allows visitTruncInst (for example) to simplify the
+ // operand of a trunc without duplicating all the logic below.
+ if (Depth == 0 && !V->hasOneUse())
+ DemandedMask = APInt::getAllOnesValue(BitWidth);
+
+ switch (I->getOpcode()) {
+ default:
+ computeKnownBits(I, KnownZero, KnownOne, Depth, CxtI);
+ break;
+ case Instruction::And:
+ // If either the LHS or the RHS are Zero, the result is zero.
+ if (SimplifyDemandedBits(I->getOperandUse(1), DemandedMask, RHSKnownZero,
+ RHSKnownOne, Depth + 1) ||
+ SimplifyDemandedBits(I->getOperandUse(0), DemandedMask & ~RHSKnownZero,
+ LHSKnownZero, LHSKnownOne, Depth + 1))
+ return I;
+ assert(!(RHSKnownZero & RHSKnownOne) && "Bits known to be one AND zero?");
+ assert(!(LHSKnownZero & LHSKnownOne) && "Bits known to be one AND zero?");
+
+ // If the client is only demanding bits that we know, return the known
+ // constant.
+ if ((DemandedMask & ((RHSKnownZero | LHSKnownZero)|
+ (RHSKnownOne & LHSKnownOne))) == DemandedMask)
+ return Constant::getIntegerValue(VTy, RHSKnownOne & LHSKnownOne);
+
+ // If all of the demanded bits are known 1 on one side, return the other.
+ // These bits cannot contribute to the result of the 'and'.
+ if ((DemandedMask & ~LHSKnownZero & RHSKnownOne) ==
+ (DemandedMask & ~LHSKnownZero))
+ return I->getOperand(0);
+ if ((DemandedMask & ~RHSKnownZero & LHSKnownOne) ==
+ (DemandedMask & ~RHSKnownZero))
+ return I->getOperand(1);
+
+ // If all of the demanded bits in the inputs are known zeros, return zero.
+ if ((DemandedMask & (RHSKnownZero|LHSKnownZero)) == DemandedMask)
+ return Constant::getNullValue(VTy);
+
+ // If the RHS is a constant, see if we can simplify it.
+ if (ShrinkDemandedConstant(I, 1, DemandedMask & ~LHSKnownZero))
+ return I;
+
+ // Output known-1 bits are only known if set in both the LHS & RHS.
+ KnownOne = RHSKnownOne & LHSKnownOne;
+ // Output known-0 are known to be clear if zero in either the LHS | RHS.
+ KnownZero = RHSKnownZero | LHSKnownZero;
+ break;
+ case Instruction::Or:
+ // If either the LHS or the RHS are One, the result is One.
+ if (SimplifyDemandedBits(I->getOperandUse(1), DemandedMask, RHSKnownZero,
+ RHSKnownOne, Depth + 1) ||
+ SimplifyDemandedBits(I->getOperandUse(0), DemandedMask & ~RHSKnownOne,
+ LHSKnownZero, LHSKnownOne, Depth + 1))
+ return I;
+ assert(!(RHSKnownZero & RHSKnownOne) && "Bits known to be one AND zero?");
+ assert(!(LHSKnownZero & LHSKnownOne) && "Bits known to be one AND zero?");
+
+ // If the client is only demanding bits that we know, return the known
+ // constant.
+ if ((DemandedMask & ((RHSKnownZero & LHSKnownZero)|
+ (RHSKnownOne | LHSKnownOne))) == DemandedMask)
+ return Constant::getIntegerValue(VTy, RHSKnownOne | LHSKnownOne);
+
+ // If all of the demanded bits are known zero on one side, return the other.
+ // These bits cannot contribute to the result of the 'or'.
+ if ((DemandedMask & ~LHSKnownOne & RHSKnownZero) ==
+ (DemandedMask & ~LHSKnownOne))
+ return I->getOperand(0);
+ if ((DemandedMask & ~RHSKnownOne & LHSKnownZero) ==
+ (DemandedMask & ~RHSKnownOne))
+ return I->getOperand(1);
+
+ // If all of the potentially set bits on one side are known to be set on
+ // the other side, just use the 'other' side.
+ if ((DemandedMask & (~RHSKnownZero) & LHSKnownOne) ==
+ (DemandedMask & (~RHSKnownZero)))
+ return I->getOperand(0);
+ if ((DemandedMask & (~LHSKnownZero) & RHSKnownOne) ==
+ (DemandedMask & (~LHSKnownZero)))
+ return I->getOperand(1);
+
+ // If the RHS is a constant, see if we can simplify it.
+ if (ShrinkDemandedConstant(I, 1, DemandedMask))
+ return I;
+
+ // Output known-0 bits are only known if clear in both the LHS & RHS.
+ KnownZero = RHSKnownZero & LHSKnownZero;
+ // Output known-1 are known to be set if set in either the LHS | RHS.
+ KnownOne = RHSKnownOne | LHSKnownOne;
+ break;
+ case Instruction::Xor: {
+ if (SimplifyDemandedBits(I->getOperandUse(1), DemandedMask, RHSKnownZero,
+ RHSKnownOne, Depth + 1) ||
+ SimplifyDemandedBits(I->getOperandUse(0), DemandedMask, LHSKnownZero,
+ LHSKnownOne, Depth + 1))
+ return I;
+ assert(!(RHSKnownZero & RHSKnownOne) && "Bits known to be one AND zero?");
+ assert(!(LHSKnownZero & LHSKnownOne) && "Bits known to be one AND zero?");
+
+ // Output known-0 bits are known if clear or set in both the LHS & RHS.
+ APInt IKnownZero = (RHSKnownZero & LHSKnownZero) |
+ (RHSKnownOne & LHSKnownOne);
+ // Output known-1 are known to be set if set in only one of the LHS, RHS.
+ APInt IKnownOne = (RHSKnownZero & LHSKnownOne) |
+ (RHSKnownOne & LHSKnownZero);
+
+ // If the client is only demanding bits that we know, return the known
+ // constant.
+ if ((DemandedMask & (IKnownZero|IKnownOne)) == DemandedMask)
+ return Constant::getIntegerValue(VTy, IKnownOne);
+
+ // If all of the demanded bits are known zero on one side, return the other.
+ // These bits cannot contribute to the result of the 'xor'.
+ if ((DemandedMask & RHSKnownZero) == DemandedMask)
+ return I->getOperand(0);
+ if ((DemandedMask & LHSKnownZero) == DemandedMask)
+ return I->getOperand(1);
+
+ // If all of the demanded bits are known to be zero on one side or the
+ // other, turn this into an *inclusive* or.
+ // e.g. (A & C1)^(B & C2) -> (A & C1)|(B & C2) iff C1&C2 == 0
+ if ((DemandedMask & ~RHSKnownZero & ~LHSKnownZero) == 0) {
+ Instruction *Or =
+ BinaryOperator::CreateOr(I->getOperand(0), I->getOperand(1),
+ I->getName());
+ return InsertNewInstWith(Or, *I);
+ }
+
+ // If all of the demanded bits on one side are known, and all of the set
+ // bits on that side are also known to be set on the other side, turn this
+ // into an AND, as we know the bits will be cleared.
+ // e.g. (X | C1) ^ C2 --> (X | C1) & ~C2 iff (C1&C2) == C2
+ if ((DemandedMask & (RHSKnownZero|RHSKnownOne)) == DemandedMask) {
+ // all known
+ if ((RHSKnownOne & LHSKnownOne) == RHSKnownOne) {
+ Constant *AndC = Constant::getIntegerValue(VTy,
+ ~RHSKnownOne & DemandedMask);
+ Instruction *And = BinaryOperator::CreateAnd(I->getOperand(0), AndC);
+ return InsertNewInstWith(And, *I);
+ }
+ }
+
+ // If the RHS is a constant, see if we can simplify it.
+ // FIXME: for XOR, we prefer to force bits to 1 if they will make a -1.
+ if (ShrinkDemandedConstant(I, 1, DemandedMask))
+ return I;
+
+ // If our LHS is an 'and' and if it has one use, and if any of the bits we
+ // are flipping are known to be set, then the xor is just resetting those
+ // bits to zero. We can just knock out bits from the 'and' and the 'xor',
+ // simplifying both of them.
+ if (Instruction *LHSInst = dyn_cast<Instruction>(I->getOperand(0)))
+ if (LHSInst->getOpcode() == Instruction::And && LHSInst->hasOneUse() &&
+ isa<ConstantInt>(I->getOperand(1)) &&
+ isa<ConstantInt>(LHSInst->getOperand(1)) &&
+ (LHSKnownOne & RHSKnownOne & DemandedMask) != 0) {
+ ConstantInt *AndRHS = cast<ConstantInt>(LHSInst->getOperand(1));
+ ConstantInt *XorRHS = cast<ConstantInt>(I->getOperand(1));
+ APInt NewMask = ~(LHSKnownOne & RHSKnownOne & DemandedMask);
+
+ Constant *AndC =
+ ConstantInt::get(I->getType(), NewMask & AndRHS->getValue());
+ Instruction *NewAnd = BinaryOperator::CreateAnd(I->getOperand(0), AndC);
+ InsertNewInstWith(NewAnd, *I);
+
+ Constant *XorC =
+ ConstantInt::get(I->getType(), NewMask & XorRHS->getValue());
+ Instruction *NewXor = BinaryOperator::CreateXor(NewAnd, XorC);
+ return InsertNewInstWith(NewXor, *I);
+ }
+
+ // Output known-0 bits are known if clear or set in both the LHS & RHS.
+ KnownZero= (RHSKnownZero & LHSKnownZero) | (RHSKnownOne & LHSKnownOne);
+ // Output known-1 are known to be set if set in only one of the LHS, RHS.
+ KnownOne = (RHSKnownZero & LHSKnownOne) | (RHSKnownOne & LHSKnownZero);
+ break;
+ }
+ case Instruction::Select:
+ // If this is a select as part of a min/max pattern, don't simplify any
+ // further in case we break the structure.
+ Value *LHS, *RHS;
+ if (matchSelectPattern(I, LHS, RHS).Flavor != SPF_UNKNOWN)
+ return nullptr;
+
+ if (SimplifyDemandedBits(I->getOperandUse(2), DemandedMask, RHSKnownZero,
+ RHSKnownOne, Depth + 1) ||
+ SimplifyDemandedBits(I->getOperandUse(1), DemandedMask, LHSKnownZero,
+ LHSKnownOne, Depth + 1))
+ return I;
+ assert(!(RHSKnownZero & RHSKnownOne) && "Bits known to be one AND zero?");
+ assert(!(LHSKnownZero & LHSKnownOne) && "Bits known to be one AND zero?");
+
+ // If the operands are constants, see if we can simplify them.
+ if (ShrinkDemandedConstant(I, 1, DemandedMask) ||
+ ShrinkDemandedConstant(I, 2, DemandedMask))
+ return I;
+
+ // Only known if known in both the LHS and RHS.
+ KnownOne = RHSKnownOne & LHSKnownOne;
+ KnownZero = RHSKnownZero & LHSKnownZero;
+ break;
+ case Instruction::Trunc: {
+ unsigned truncBf = I->getOperand(0)->getType()->getScalarSizeInBits();
+ DemandedMask = DemandedMask.zext(truncBf);
+ KnownZero = KnownZero.zext(truncBf);
+ KnownOne = KnownOne.zext(truncBf);
+ if (SimplifyDemandedBits(I->getOperandUse(0), DemandedMask, KnownZero,
+ KnownOne, Depth + 1))
+ return I;
+ DemandedMask = DemandedMask.trunc(BitWidth);
+ KnownZero = KnownZero.trunc(BitWidth);
+ KnownOne = KnownOne.trunc(BitWidth);
+ assert(!(KnownZero & KnownOne) && "Bits known to be one AND zero?");
+ break;
+ }
+ case Instruction::BitCast:
+ if (!I->getOperand(0)->getType()->isIntOrIntVectorTy())
+ return nullptr; // vector->int or fp->int?
+
+ if (VectorType *DstVTy = dyn_cast<VectorType>(I->getType())) {
+ if (VectorType *SrcVTy =
+ dyn_cast<VectorType>(I->getOperand(0)->getType())) {
+ if (DstVTy->getNumElements() != SrcVTy->getNumElements())
+ // Don't touch a bitcast between vectors of different element counts.
+ return nullptr;
+ } else
+ // Don't touch a scalar-to-vector bitcast.
+ return nullptr;
+ } else if (I->getOperand(0)->getType()->isVectorTy())
+ // Don't touch a vector-to-scalar bitcast.
+ return nullptr;
+
+ if (SimplifyDemandedBits(I->getOperandUse(0), DemandedMask, KnownZero,
+ KnownOne, Depth + 1))
+ return I;
+ assert(!(KnownZero & KnownOne) && "Bits known to be one AND zero?");
+ break;
+ case Instruction::ZExt: {
+ // Compute the bits in the result that are not present in the input.
+ unsigned SrcBitWidth =I->getOperand(0)->getType()->getScalarSizeInBits();
+
+ DemandedMask = DemandedMask.trunc(SrcBitWidth);
+ KnownZero = KnownZero.trunc(SrcBitWidth);
+ KnownOne = KnownOne.trunc(SrcBitWidth);
+ if (SimplifyDemandedBits(I->getOperandUse(0), DemandedMask, KnownZero,
+ KnownOne, Depth + 1))
+ return I;
+ DemandedMask = DemandedMask.zext(BitWidth);
+ KnownZero = KnownZero.zext(BitWidth);
+ KnownOne = KnownOne.zext(BitWidth);
+ assert(!(KnownZero & KnownOne) && "Bits known to be one AND zero?");
+ // The top bits are known to be zero.
+ KnownZero |= APInt::getHighBitsSet(BitWidth, BitWidth - SrcBitWidth);
+ break;
+ }
+ case Instruction::SExt: {
+ // Compute the bits in the result that are not present in the input.
+ unsigned SrcBitWidth =I->getOperand(0)->getType()->getScalarSizeInBits();
+
+ APInt InputDemandedBits = DemandedMask &
+ APInt::getLowBitsSet(BitWidth, SrcBitWidth);
+
+ APInt NewBits(APInt::getHighBitsSet(BitWidth, BitWidth - SrcBitWidth));
+ // If any of the sign extended bits are demanded, we know that the sign
+ // bit is demanded.
+ if ((NewBits & DemandedMask) != 0)
+ InputDemandedBits.setBit(SrcBitWidth-1);
+
+ InputDemandedBits = InputDemandedBits.trunc(SrcBitWidth);
+ KnownZero = KnownZero.trunc(SrcBitWidth);
+ KnownOne = KnownOne.trunc(SrcBitWidth);
+ if (SimplifyDemandedBits(I->getOperandUse(0), InputDemandedBits, KnownZero,
+ KnownOne, Depth + 1))
+ return I;
+ InputDemandedBits = InputDemandedBits.zext(BitWidth);
+ KnownZero = KnownZero.zext(BitWidth);
+ KnownOne = KnownOne.zext(BitWidth);
+ assert(!(KnownZero & KnownOne) && "Bits known to be one AND zero?");
+
+ // If the sign bit of the input is known set or clear, then we know the
+ // top bits of the result.
+
+ // If the input sign bit is known zero, or if the NewBits are not demanded
+ // convert this into a zero extension.
+ if (KnownZero[SrcBitWidth-1] || (NewBits & ~DemandedMask) == NewBits) {
+ // Convert to ZExt cast
+ CastInst *NewCast = new ZExtInst(I->getOperand(0), VTy, I->getName());
+ return InsertNewInstWith(NewCast, *I);
+ } else if (KnownOne[SrcBitWidth-1]) { // Input sign bit known set
+ KnownOne |= NewBits;
+ }
+ break;
+ }
+ case Instruction::Add:
+ case Instruction::Sub: {
+ /// If the high-bits of an ADD/SUB are not demanded, then we do not care
+ /// about the high bits of the operands.
+ unsigned NLZ = DemandedMask.countLeadingZeros();
+ if (NLZ > 0) {
+ // Right fill the mask of bits for this ADD/SUB to demand the most
+ // significant bit and all those below it.
+ APInt DemandedFromOps(APInt::getLowBitsSet(BitWidth, BitWidth-NLZ));
+ if (SimplifyDemandedBits(I->getOperandUse(0), DemandedFromOps,
+ LHSKnownZero, LHSKnownOne, Depth + 1) ||
+ ShrinkDemandedConstant(I, 1, DemandedFromOps) ||
+ SimplifyDemandedBits(I->getOperandUse(1), DemandedFromOps,
+ LHSKnownZero, LHSKnownOne, Depth + 1)) {
+ // Disable the nsw and nuw flags here: We can no longer guarantee that
+ // we won't wrap after simplification. Removing the nsw/nuw flags is
+ // legal here because the top bit is not demanded.
+ BinaryOperator &BinOP = *cast<BinaryOperator>(I);
+ BinOP.setHasNoSignedWrap(false);
+ BinOP.setHasNoUnsignedWrap(false);
+ return I;
+ }
+ }
+
+ // Otherwise just hand the add/sub off to computeKnownBits to fill in
+ // the known zeros and ones.
+ computeKnownBits(V, KnownZero, KnownOne, Depth, CxtI);
+ break;
+ }
+ case Instruction::Shl:
+ if (ConstantInt *SA = dyn_cast<ConstantInt>(I->getOperand(1))) {
+ {
+ Value *VarX; ConstantInt *C1;
+ if (match(I->getOperand(0), m_Shr(m_Value(VarX), m_ConstantInt(C1)))) {
+ Instruction *Shr = cast<Instruction>(I->getOperand(0));
+ Value *R = SimplifyShrShlDemandedBits(Shr, I, DemandedMask,
+ KnownZero, KnownOne);
+ if (R)
+ return R;
+ }
+ }
+
+ uint64_t ShiftAmt = SA->getLimitedValue(BitWidth-1);
+ APInt DemandedMaskIn(DemandedMask.lshr(ShiftAmt));
+
+ // If the shift is NUW/NSW, then it does demand the high bits.
+ ShlOperator *IOp = cast<ShlOperator>(I);
+ if (IOp->hasNoSignedWrap())
+ DemandedMaskIn |= APInt::getHighBitsSet(BitWidth, ShiftAmt+1);
+ else if (IOp->hasNoUnsignedWrap())
+ DemandedMaskIn |= APInt::getHighBitsSet(BitWidth, ShiftAmt);
+
+ if (SimplifyDemandedBits(I->getOperandUse(0), DemandedMaskIn, KnownZero,
+ KnownOne, Depth + 1))
+ return I;
+ assert(!(KnownZero & KnownOne) && "Bits known to be one AND zero?");
+ KnownZero <<= ShiftAmt;
+ KnownOne <<= ShiftAmt;
+ // low bits known zero.
+ if (ShiftAmt)
+ KnownZero |= APInt::getLowBitsSet(BitWidth, ShiftAmt);
+ }
+ break;
+ case Instruction::LShr:
+ // For a logical shift right
+ if (ConstantInt *SA = dyn_cast<ConstantInt>(I->getOperand(1))) {
+ uint64_t ShiftAmt = SA->getLimitedValue(BitWidth-1);
+
+ // Unsigned shift right.
+ APInt DemandedMaskIn(DemandedMask.shl(ShiftAmt));
+
+ // If the shift is exact, then it does demand the low bits (and knows that
+ // they are zero).
+ if (cast<LShrOperator>(I)->isExact())
+ DemandedMaskIn |= APInt::getLowBitsSet(BitWidth, ShiftAmt);
+
+ if (SimplifyDemandedBits(I->getOperandUse(0), DemandedMaskIn, KnownZero,
+ KnownOne, Depth + 1))
+ return I;
+ assert(!(KnownZero & KnownOne) && "Bits known to be one AND zero?");
+ KnownZero = APIntOps::lshr(KnownZero, ShiftAmt);
+ KnownOne = APIntOps::lshr(KnownOne, ShiftAmt);
+ if (ShiftAmt) {
+ // Compute the new bits that are at the top now.
+ APInt HighBits(APInt::getHighBitsSet(BitWidth, ShiftAmt));
+ KnownZero |= HighBits; // high bits known zero.
+ }
+ }
+ break;
+ case Instruction::AShr:
+ // If this is an arithmetic shift right and only the low-bit is set, we can
+ // always convert this into a logical shr, even if the shift amount is
+ // variable. The low bit of the shift cannot be an input sign bit unless
+ // the shift amount is >= the size of the datatype, which is undefined.
+ if (DemandedMask == 1) {
+ // Perform the logical shift right.
+ Instruction *NewVal = BinaryOperator::CreateLShr(
+ I->getOperand(0), I->getOperand(1), I->getName());
+ return InsertNewInstWith(NewVal, *I);
+ }
+
+ // If the sign bit is the only bit demanded by this ashr, then there is no
+ // need to do it, the shift doesn't change the high bit.
+ if (DemandedMask.isSignBit())
+ return I->getOperand(0);
+
+ if (ConstantInt *SA = dyn_cast<ConstantInt>(I->getOperand(1))) {
+ uint32_t ShiftAmt = SA->getLimitedValue(BitWidth-1);
+
+ // Signed shift right.
+ APInt DemandedMaskIn(DemandedMask.shl(ShiftAmt));
+ // If any of the "high bits" are demanded, we should set the sign bit as
+ // demanded.
+ if (DemandedMask.countLeadingZeros() <= ShiftAmt)
+ DemandedMaskIn.setBit(BitWidth-1);
+
+ // If the shift is exact, then it does demand the low bits (and knows that
+ // they are zero).
+ if (cast<AShrOperator>(I)->isExact())
+ DemandedMaskIn |= APInt::getLowBitsSet(BitWidth, ShiftAmt);
+
+ if (SimplifyDemandedBits(I->getOperandUse(0), DemandedMaskIn, KnownZero,
+ KnownOne, Depth + 1))
+ return I;
+ assert(!(KnownZero & KnownOne) && "Bits known to be one AND zero?");
+ // Compute the new bits that are at the top now.
+ APInt HighBits(APInt::getHighBitsSet(BitWidth, ShiftAmt));
+ KnownZero = APIntOps::lshr(KnownZero, ShiftAmt);
+ KnownOne = APIntOps::lshr(KnownOne, ShiftAmt);
+
+ // Handle the sign bits.
+ APInt SignBit(APInt::getSignBit(BitWidth));
+ // Adjust to where it is now in the mask.
+ SignBit = APIntOps::lshr(SignBit, ShiftAmt);
+
+ // If the input sign bit is known to be zero, or if none of the top bits
+ // are demanded, turn this into an unsigned shift right.
+ if (BitWidth <= ShiftAmt || KnownZero[BitWidth-ShiftAmt-1] ||
+ (HighBits & ~DemandedMask) == HighBits) {
+ // Perform the logical shift right.
+ BinaryOperator *NewVal = BinaryOperator::CreateLShr(I->getOperand(0),
+ SA, I->getName());
+ NewVal->setIsExact(cast<BinaryOperator>(I)->isExact());
+ return InsertNewInstWith(NewVal, *I);
+ } else if ((KnownOne & SignBit) != 0) { // New bits are known one.
+ KnownOne |= HighBits;
+ }
+ }
+ break;
+ case Instruction::SRem:
+ if (ConstantInt *Rem = dyn_cast<ConstantInt>(I->getOperand(1))) {
+ // X % -1 demands all the bits because we don't want to introduce
+ // INT_MIN % -1 (== undef) by accident.
+ if (Rem->isAllOnesValue())
+ break;
+ APInt RA = Rem->getValue().abs();
+ if (RA.isPowerOf2()) {
+ if (DemandedMask.ult(RA)) // srem won't affect demanded bits
+ return I->getOperand(0);
+
+ APInt LowBits = RA - 1;
+ APInt Mask2 = LowBits | APInt::getSignBit(BitWidth);
+ if (SimplifyDemandedBits(I->getOperandUse(0), Mask2, LHSKnownZero,
+ LHSKnownOne, Depth + 1))
+ return I;
+
+ // The low bits of LHS are unchanged by the srem.
+ KnownZero = LHSKnownZero & LowBits;
+ KnownOne = LHSKnownOne & LowBits;
+
+ // If LHS is non-negative or has all low bits zero, then the upper bits
+ // are all zero.
+ if (LHSKnownZero[BitWidth-1] || ((LHSKnownZero & LowBits) == LowBits))
+ KnownZero |= ~LowBits;
+
+ // If LHS is negative and not all low bits are zero, then the upper bits
+ // are all one.
+ if (LHSKnownOne[BitWidth-1] && ((LHSKnownOne & LowBits) != 0))
+ KnownOne |= ~LowBits;
+
+ assert(!(KnownZero & KnownOne) && "Bits known to be one AND zero?");
+ }
+ }
+
+ // The sign bit is the LHS's sign bit, except when the result of the
+ // remainder is zero.
+ if (DemandedMask.isNegative() && KnownZero.isNonNegative()) {
+ APInt LHSKnownZero(BitWidth, 0), LHSKnownOne(BitWidth, 0);
+ computeKnownBits(I->getOperand(0), LHSKnownZero, LHSKnownOne, Depth + 1,
+ CxtI);
+ // If it's known zero, our sign bit is also zero.
+ if (LHSKnownZero.isNegative())
+ KnownZero.setBit(KnownZero.getBitWidth() - 1);
+ }
+ break;
+ case Instruction::URem: {
+ APInt KnownZero2(BitWidth, 0), KnownOne2(BitWidth, 0);
+ APInt AllOnes = APInt::getAllOnesValue(BitWidth);
+ if (SimplifyDemandedBits(I->getOperandUse(0), AllOnes, KnownZero2,
+ KnownOne2, Depth + 1) ||
+ SimplifyDemandedBits(I->getOperandUse(1), AllOnes, KnownZero2,
+ KnownOne2, Depth + 1))
+ return I;
+
+ unsigned Leaders = KnownZero2.countLeadingOnes();
+ Leaders = std::max(Leaders,
+ KnownZero2.countLeadingOnes());
+ KnownZero = APInt::getHighBitsSet(BitWidth, Leaders) & DemandedMask;
+ break;
+ }
+ case Instruction::Call:
+ if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
+ switch (II->getIntrinsicID()) {
+ default: break;
+ case Intrinsic::bswap: {
+ // If the only bits demanded come from one byte of the bswap result,
+ // just shift the input byte into position to eliminate the bswap.
+ unsigned NLZ = DemandedMask.countLeadingZeros();
+ unsigned NTZ = DemandedMask.countTrailingZeros();
+
+ // Round NTZ down to the next byte. If we have 11 trailing zeros, then
+ // we need all the bits down to bit 8. Likewise, round NLZ. If we
+ // have 14 leading zeros, round to 8.
+ NLZ &= ~7;
+ NTZ &= ~7;
+ // If we need exactly one byte, we can do this transformation.
+ if (BitWidth-NLZ-NTZ == 8) {
+ unsigned ResultBit = NTZ;
+ unsigned InputBit = BitWidth-NTZ-8;
+
+ // Replace this with either a left or right shift to get the byte into
+ // the right place.
+ Instruction *NewVal;
+ if (InputBit > ResultBit)
+ NewVal = BinaryOperator::CreateLShr(II->getArgOperand(0),
+ ConstantInt::get(I->getType(), InputBit-ResultBit));
+ else
+ NewVal = BinaryOperator::CreateShl(II->getArgOperand(0),
+ ConstantInt::get(I->getType(), ResultBit-InputBit));
+ NewVal->takeName(I);
+ return InsertNewInstWith(NewVal, *I);
+ }
+
+ // TODO: Could compute known zero/one bits based on the input.
+ break;
+ }
+ case Intrinsic::x86_mmx_pmovmskb:
+ case Intrinsic::x86_sse_movmsk_ps:
+ case Intrinsic::x86_sse2_movmsk_pd:
+ case Intrinsic::x86_sse2_pmovmskb_128:
+ case Intrinsic::x86_avx_movmsk_ps_256:
+ case Intrinsic::x86_avx_movmsk_pd_256:
+ case Intrinsic::x86_avx2_pmovmskb: {
+ // MOVMSK copies the vector elements' sign bits to the low bits
+ // and zeros the high bits.
+ unsigned ArgWidth;
+ if (II->getIntrinsicID() == Intrinsic::x86_mmx_pmovmskb) {
+ ArgWidth = 8; // Arg is x86_mmx, but treated as <8 x i8>.
+ } else {
+ auto Arg = II->getArgOperand(0);
+ auto ArgType = cast<VectorType>(Arg->getType());
+ ArgWidth = ArgType->getNumElements();
+ }
+
+ // If we don't need any of low bits then return zero,
+ // we know that DemandedMask is non-zero already.
+ APInt DemandedElts = DemandedMask.zextOrTrunc(ArgWidth);
+ if (DemandedElts == 0)
+ return ConstantInt::getNullValue(VTy);
+
+ // We know that the upper bits are set to zero.
+ KnownZero = APInt::getHighBitsSet(BitWidth, BitWidth - ArgWidth);
+ return nullptr;
+ }
+ case Intrinsic::x86_sse42_crc32_64_64:
+ KnownZero = APInt::getHighBitsSet(64, 32);
+ return nullptr;
+ }
+ }
+ computeKnownBits(V, KnownZero, KnownOne, Depth, CxtI);
+ break;
+ }
+
+ // If the client is only demanding bits that we know, return the known
+ // constant.
+ if ((DemandedMask & (KnownZero|KnownOne)) == DemandedMask)
+ return Constant::getIntegerValue(VTy, KnownOne);
+ return nullptr;
+}
+
+/// Helper routine of SimplifyDemandedUseBits. It tries to simplify
+/// "E1 = (X lsr C1) << C2", where the C1 and C2 are constant, into
+/// "E2 = X << (C2 - C1)" or "E2 = X >> (C1 - C2)", depending on the sign
+/// of "C2-C1".
+///
+/// Suppose E1 and E2 are generally different in bits S={bm, bm+1,
+/// ..., bn}, without considering the specific value X is holding.
+/// This transformation is legal iff one of following conditions is hold:
+/// 1) All the bit in S are 0, in this case E1 == E2.
+/// 2) We don't care those bits in S, per the input DemandedMask.
+/// 3) Combination of 1) and 2). Some bits in S are 0, and we don't care the
+/// rest bits.
+///
+/// Currently we only test condition 2).
+///
+/// As with SimplifyDemandedUseBits, it returns NULL if the simplification was
+/// not successful.
+Value *InstCombiner::SimplifyShrShlDemandedBits(Instruction *Shr,
+ Instruction *Shl,
+ const APInt &DemandedMask,
+ APInt &KnownZero,
+ APInt &KnownOne) {
+
+ const APInt &ShlOp1 = cast<ConstantInt>(Shl->getOperand(1))->getValue();
+ const APInt &ShrOp1 = cast<ConstantInt>(Shr->getOperand(1))->getValue();
+ if (!ShlOp1 || !ShrOp1)
+ return nullptr; // Noop.
+
+ Value *VarX = Shr->getOperand(0);
+ Type *Ty = VarX->getType();
+ unsigned BitWidth = Ty->getIntegerBitWidth();
+ if (ShlOp1.uge(BitWidth) || ShrOp1.uge(BitWidth))
+ return nullptr; // Undef.
+
+ unsigned ShlAmt = ShlOp1.getZExtValue();
+ unsigned ShrAmt = ShrOp1.getZExtValue();
+
+ KnownOne.clearAllBits();
+ KnownZero = APInt::getBitsSet(KnownZero.getBitWidth(), 0, ShlAmt-1);
+ KnownZero &= DemandedMask;
+
+ APInt BitMask1(APInt::getAllOnesValue(BitWidth));
+ APInt BitMask2(APInt::getAllOnesValue(BitWidth));
+
+ bool isLshr = (Shr->getOpcode() == Instruction::LShr);
+ BitMask1 = isLshr ? (BitMask1.lshr(ShrAmt) << ShlAmt) :
+ (BitMask1.ashr(ShrAmt) << ShlAmt);
+
+ if (ShrAmt <= ShlAmt) {
+ BitMask2 <<= (ShlAmt - ShrAmt);
+ } else {
+ BitMask2 = isLshr ? BitMask2.lshr(ShrAmt - ShlAmt):
+ BitMask2.ashr(ShrAmt - ShlAmt);
+ }
+
+ // Check if condition-2 (see the comment to this function) is satified.
+ if ((BitMask1 & DemandedMask) == (BitMask2 & DemandedMask)) {
+ if (ShrAmt == ShlAmt)
+ return VarX;
+
+ if (!Shr->hasOneUse())
+ return nullptr;
+
+ BinaryOperator *New;
+ if (ShrAmt < ShlAmt) {
+ Constant *Amt = ConstantInt::get(VarX->getType(), ShlAmt - ShrAmt);
+ New = BinaryOperator::CreateShl(VarX, Amt);
+ BinaryOperator *Orig = cast<BinaryOperator>(Shl);
+ New->setHasNoSignedWrap(Orig->hasNoSignedWrap());
+ New->setHasNoUnsignedWrap(Orig->hasNoUnsignedWrap());
+ } else {
+ Constant *Amt = ConstantInt::get(VarX->getType(), ShrAmt - ShlAmt);
+ New = isLshr ? BinaryOperator::CreateLShr(VarX, Amt) :
+ BinaryOperator::CreateAShr(VarX, Amt);
+ if (cast<BinaryOperator>(Shr)->isExact())
+ New->setIsExact(true);
+ }
+
+ return InsertNewInstWith(New, *Shl);
+ }
+
+ return nullptr;
+}
+
+/// The specified value produces a vector with any number of elements.
+/// DemandedElts contains the set of elements that are actually used by the
+/// caller. This method analyzes which elements of the operand are undef and
+/// returns that information in UndefElts.
+///
+/// If the information about demanded elements can be used to simplify the
+/// operation, the operation is simplified, then the resultant value is
+/// returned. This returns null if no change was made.
+Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
+ APInt &UndefElts,
+ unsigned Depth) {
+ unsigned VWidth = V->getType()->getVectorNumElements();
+ APInt EltMask(APInt::getAllOnesValue(VWidth));
+ assert((DemandedElts & ~EltMask) == 0 && "Invalid DemandedElts!");
+
+ if (isa<UndefValue>(V)) {
+ // If the entire vector is undefined, just return this info.
+ UndefElts = EltMask;
+ return nullptr;
+ }
+
+ if (DemandedElts == 0) { // If nothing is demanded, provide undef.
+ UndefElts = EltMask;
+ return UndefValue::get(V->getType());
+ }
+
+ UndefElts = 0;
+
+ // Handle ConstantAggregateZero, ConstantVector, ConstantDataSequential.
+ if (Constant *C = dyn_cast<Constant>(V)) {
+ // Check if this is identity. If so, return 0 since we are not simplifying
+ // anything.
+ if (DemandedElts.isAllOnesValue())
+ return nullptr;
+
+ Type *EltTy = cast<VectorType>(V->getType())->getElementType();
+ Constant *Undef = UndefValue::get(EltTy);
+
+ SmallVector<Constant*, 16> Elts;
+ for (unsigned i = 0; i != VWidth; ++i) {
+ if (!DemandedElts[i]) { // If not demanded, set to undef.
+ Elts.push_back(Undef);
+ UndefElts.setBit(i);
+ continue;
+ }
+
+ Constant *Elt = C->getAggregateElement(i);
+ if (!Elt) return nullptr;
+
+ if (isa<UndefValue>(Elt)) { // Already undef.
+ Elts.push_back(Undef);
+ UndefElts.setBit(i);
+ } else { // Otherwise, defined.
+ Elts.push_back(Elt);
+ }
+ }
+
+ // If we changed the constant, return it.
+ Constant *NewCV = ConstantVector::get(Elts);
+ return NewCV != C ? NewCV : nullptr;
+ }
+
+ // Limit search depth.
+ if (Depth == 10)
+ return nullptr;
+
+ // If multiple users are using the root value, proceed with
+ // simplification conservatively assuming that all elements
+ // are needed.
+ if (!V->hasOneUse()) {
+ // Quit if we find multiple users of a non-root value though.
+ // They'll be handled when it's their turn to be visited by
+ // the main instcombine process.
+ if (Depth != 0)
+ // TODO: Just compute the UndefElts information recursively.
+ return nullptr;
+
+ // Conservatively assume that all elements are needed.
+ DemandedElts = EltMask;
+ }
+
+ Instruction *I = dyn_cast<Instruction>(V);
+ if (!I) return nullptr; // Only analyze instructions.
+
+ bool MadeChange = false;
+ APInt UndefElts2(VWidth, 0);
+ Value *TmpV;
+ switch (I->getOpcode()) {
+ default: break;
+
+ case Instruction::InsertElement: {
+ // If this is a variable index, we don't know which element it overwrites.
+ // demand exactly the same input as we produce.
+ ConstantInt *Idx = dyn_cast<ConstantInt>(I->getOperand(2));
+ if (!Idx) {
+ // Note that we can't propagate undef elt info, because we don't know
+ // which elt is getting updated.
+ TmpV = SimplifyDemandedVectorElts(I->getOperand(0), DemandedElts,
+ UndefElts2, Depth + 1);
+ if (TmpV) { I->setOperand(0, TmpV); MadeChange = true; }
+ break;
+ }
+
+ // If this is inserting an element that isn't demanded, remove this
+ // insertelement.
+ unsigned IdxNo = Idx->getZExtValue();
+ if (IdxNo >= VWidth || !DemandedElts[IdxNo]) {
+ Worklist.Add(I);
+ return I->getOperand(0);
+ }
+
+ // Otherwise, the element inserted overwrites whatever was there, so the
+ // input demanded set is simpler than the output set.
+ APInt DemandedElts2 = DemandedElts;
+ DemandedElts2.clearBit(IdxNo);
+ TmpV = SimplifyDemandedVectorElts(I->getOperand(0), DemandedElts2,
+ UndefElts, Depth + 1);
+ if (TmpV) { I->setOperand(0, TmpV); MadeChange = true; }
+
+ // The inserted element is defined.
+ UndefElts.clearBit(IdxNo);
+ break;
+ }
+ case Instruction::ShuffleVector: {
+ ShuffleVectorInst *Shuffle = cast<ShuffleVectorInst>(I);
+ uint64_t LHSVWidth =
+ cast<VectorType>(Shuffle->getOperand(0)->getType())->getNumElements();
+ APInt LeftDemanded(LHSVWidth, 0), RightDemanded(LHSVWidth, 0);
+ for (unsigned i = 0; i < VWidth; i++) {
+ if (DemandedElts[i]) {
+ unsigned MaskVal = Shuffle->getMaskValue(i);
+ if (MaskVal != -1u) {
+ assert(MaskVal < LHSVWidth * 2 &&
+ "shufflevector mask index out of range!");
+ if (MaskVal < LHSVWidth)
+ LeftDemanded.setBit(MaskVal);
+ else
+ RightDemanded.setBit(MaskVal - LHSVWidth);
+ }
+ }
+ }
+
+ APInt UndefElts4(LHSVWidth, 0);
+ TmpV = SimplifyDemandedVectorElts(I->getOperand(0), LeftDemanded,
+ UndefElts4, Depth + 1);
+ if (TmpV) { I->setOperand(0, TmpV); MadeChange = true; }
+
+ APInt UndefElts3(LHSVWidth, 0);
+ TmpV = SimplifyDemandedVectorElts(I->getOperand(1), RightDemanded,
+ UndefElts3, Depth + 1);
+ if (TmpV) { I->setOperand(1, TmpV); MadeChange = true; }
+
+ bool NewUndefElts = false;
+ for (unsigned i = 0; i < VWidth; i++) {
+ unsigned MaskVal = Shuffle->getMaskValue(i);
+ if (MaskVal == -1u) {
+ UndefElts.setBit(i);
+ } else if (!DemandedElts[i]) {
+ NewUndefElts = true;
+ UndefElts.setBit(i);
+ } else if (MaskVal < LHSVWidth) {
+ if (UndefElts4[MaskVal]) {
+ NewUndefElts = true;
+ UndefElts.setBit(i);
+ }
+ } else {
+ if (UndefElts3[MaskVal - LHSVWidth]) {
+ NewUndefElts = true;
+ UndefElts.setBit(i);
+ }
+ }
+ }
+
+ if (NewUndefElts) {
+ // Add additional discovered undefs.
+ SmallVector<Constant*, 16> Elts;
+ for (unsigned i = 0; i < VWidth; ++i) {
+ if (UndefElts[i])
+ Elts.push_back(UndefValue::get(Type::getInt32Ty(I->getContext())));
+ else
+ Elts.push_back(ConstantInt::get(Type::getInt32Ty(I->getContext()),
+ Shuffle->getMaskValue(i)));
+ }
+ I->setOperand(2, ConstantVector::get(Elts));
+ MadeChange = true;
+ }
+ break;
+ }
+ case Instruction::Select: {
+ APInt LeftDemanded(DemandedElts), RightDemanded(DemandedElts);
+ if (ConstantVector* CV = dyn_cast<ConstantVector>(I->getOperand(0))) {
+ for (unsigned i = 0; i < VWidth; i++) {
+ Constant *CElt = CV->getAggregateElement(i);
+ // Method isNullValue always returns false when called on a
+ // ConstantExpr. If CElt is a ConstantExpr then skip it in order to
+ // to avoid propagating incorrect information.
+ if (isa<ConstantExpr>(CElt))
+ continue;
+ if (CElt->isNullValue())
+ LeftDemanded.clearBit(i);
+ else
+ RightDemanded.clearBit(i);
+ }
+ }
+
+ TmpV = SimplifyDemandedVectorElts(I->getOperand(1), LeftDemanded, UndefElts,
+ Depth + 1);
+ if (TmpV) { I->setOperand(1, TmpV); MadeChange = true; }
+
+ TmpV = SimplifyDemandedVectorElts(I->getOperand(2), RightDemanded,
+ UndefElts2, Depth + 1);
+ if (TmpV) { I->setOperand(2, TmpV); MadeChange = true; }
+
+ // Output elements are undefined if both are undefined.
+ UndefElts &= UndefElts2;
+ break;
+ }
+ case Instruction::BitCast: {
+ // Vector->vector casts only.
+ VectorType *VTy = dyn_cast<VectorType>(I->getOperand(0)->getType());
+ if (!VTy) break;
+ unsigned InVWidth = VTy->getNumElements();
+ APInt InputDemandedElts(InVWidth, 0);
+ UndefElts2 = APInt(InVWidth, 0);
+ unsigned Ratio;
+
+ if (VWidth == InVWidth) {
+ // If we are converting from <4 x i32> -> <4 x f32>, we demand the same
+ // elements as are demanded of us.
+ Ratio = 1;
+ InputDemandedElts = DemandedElts;
+ } else if ((VWidth % InVWidth) == 0) {
+ // If the number of elements in the output is a multiple of the number of
+ // elements in the input then an input element is live if any of the
+ // corresponding output elements are live.
+ Ratio = VWidth / InVWidth;
+ for (unsigned OutIdx = 0; OutIdx != VWidth; ++OutIdx)
+ if (DemandedElts[OutIdx])
+ InputDemandedElts.setBit(OutIdx / Ratio);
+ } else if ((InVWidth % VWidth) == 0) {
+ // If the number of elements in the input is a multiple of the number of
+ // elements in the output then an input element is live if the
+ // corresponding output element is live.
+ Ratio = InVWidth / VWidth;
+ for (unsigned InIdx = 0; InIdx != InVWidth; ++InIdx)
+ if (DemandedElts[InIdx / Ratio])
+ InputDemandedElts.setBit(InIdx);
+ } else {
+ // Unsupported so far.
+ break;
+ }
+
+ // div/rem demand all inputs, because they don't want divide by zero.
+ TmpV = SimplifyDemandedVectorElts(I->getOperand(0), InputDemandedElts,
+ UndefElts2, Depth + 1);
+ if (TmpV) {
+ I->setOperand(0, TmpV);
+ MadeChange = true;
+ }
+
+ if (VWidth == InVWidth) {
+ UndefElts = UndefElts2;
+ } else if ((VWidth % InVWidth) == 0) {
+ // If the number of elements in the output is a multiple of the number of
+ // elements in the input then an output element is undef if the
+ // corresponding input element is undef.
+ for (unsigned OutIdx = 0; OutIdx != VWidth; ++OutIdx)
+ if (UndefElts2[OutIdx / Ratio])
+ UndefElts.setBit(OutIdx);
+ } else if ((InVWidth % VWidth) == 0) {
+ // If the number of elements in the input is a multiple of the number of
+ // elements in the output then an output element is undef if all of the
+ // corresponding input elements are undef.
+ for (unsigned OutIdx = 0; OutIdx != VWidth; ++OutIdx) {
+ APInt SubUndef = UndefElts2.lshr(OutIdx * Ratio).zextOrTrunc(Ratio);
+ if (SubUndef.countPopulation() == Ratio)
+ UndefElts.setBit(OutIdx);
+ }
+ } else {
+ llvm_unreachable("Unimp");
+ }
+ break;
+ }
+ case Instruction::And:
+ case Instruction::Or:
+ case Instruction::Xor:
+ case Instruction::Add:
+ case Instruction::Sub:
+ case Instruction::Mul:
+ // div/rem demand all inputs, because they don't want divide by zero.
+ TmpV = SimplifyDemandedVectorElts(I->getOperand(0), DemandedElts, UndefElts,
+ Depth + 1);
+ if (TmpV) { I->setOperand(0, TmpV); MadeChange = true; }
+ TmpV = SimplifyDemandedVectorElts(I->getOperand(1), DemandedElts,
+ UndefElts2, Depth + 1);
+ if (TmpV) { I->setOperand(1, TmpV); MadeChange = true; }
+
+ // Output elements are undefined if both are undefined. Consider things
+ // like undef&0. The result is known zero, not undef.
+ UndefElts &= UndefElts2;
+ break;
+ case Instruction::FPTrunc:
+ case Instruction::FPExt:
+ TmpV = SimplifyDemandedVectorElts(I->getOperand(0), DemandedElts, UndefElts,
+ Depth + 1);
+ if (TmpV) { I->setOperand(0, TmpV); MadeChange = true; }
+ break;
+
+ case Instruction::Call: {
+ IntrinsicInst *II = dyn_cast<IntrinsicInst>(I);
+ if (!II) break;
+ switch (II->getIntrinsicID()) {
+ default: break;
+
+ // Unary scalar-as-vector operations that work column-wise.
+ case Intrinsic::x86_sse_rcp_ss:
+ case Intrinsic::x86_sse_rsqrt_ss:
+ case Intrinsic::x86_sse_sqrt_ss:
+ case Intrinsic::x86_sse2_sqrt_sd:
+ case Intrinsic::x86_xop_vfrcz_ss:
+ case Intrinsic::x86_xop_vfrcz_sd:
+ TmpV = SimplifyDemandedVectorElts(II->getArgOperand(0), DemandedElts,
+ UndefElts, Depth + 1);
+ if (TmpV) { II->setArgOperand(0, TmpV); MadeChange = true; }
+
+ // If lowest element of a scalar op isn't used then use Arg0.
+ if (DemandedElts.getLoBits(1) != 1)
+ return II->getArgOperand(0);
+ // TODO: If only low elt lower SQRT to FSQRT (with rounding/exceptions
+ // checks).
+ break;
+
+ // Binary scalar-as-vector operations that work column-wise. A dest element
+ // is a function of the corresponding input elements from the two inputs.
+ case Intrinsic::x86_sse_add_ss:
+ case Intrinsic::x86_sse_sub_ss:
+ case Intrinsic::x86_sse_mul_ss:
+ case Intrinsic::x86_sse_div_ss:
+ case Intrinsic::x86_sse_min_ss:
+ case Intrinsic::x86_sse_max_ss:
+ case Intrinsic::x86_sse_cmp_ss:
+ case Intrinsic::x86_sse2_add_sd:
+ case Intrinsic::x86_sse2_sub_sd:
+ case Intrinsic::x86_sse2_mul_sd:
+ case Intrinsic::x86_sse2_div_sd:
+ case Intrinsic::x86_sse2_min_sd:
+ case Intrinsic::x86_sse2_max_sd:
+ case Intrinsic::x86_sse2_cmp_sd:
+ case Intrinsic::x86_sse41_round_ss:
+ case Intrinsic::x86_sse41_round_sd:
+ TmpV = SimplifyDemandedVectorElts(II->getArgOperand(0), DemandedElts,
+ UndefElts, Depth + 1);
+ if (TmpV) { II->setArgOperand(0, TmpV); MadeChange = true; }
+ TmpV = SimplifyDemandedVectorElts(II->getArgOperand(1), DemandedElts,
+ UndefElts2, Depth + 1);
+ if (TmpV) { II->setArgOperand(1, TmpV); MadeChange = true; }
+
+ // If only the low elt is demanded and this is a scalarizable intrinsic,
+ // scalarize it now.
+ if (DemandedElts == 1) {
+ switch (II->getIntrinsicID()) {
+ default: break;
+ case Intrinsic::x86_sse_add_ss:
+ case Intrinsic::x86_sse_sub_ss:
+ case Intrinsic::x86_sse_mul_ss:
+ case Intrinsic::x86_sse_div_ss:
+ case Intrinsic::x86_sse2_add_sd:
+ case Intrinsic::x86_sse2_sub_sd:
+ case Intrinsic::x86_sse2_mul_sd:
+ case Intrinsic::x86_sse2_div_sd:
+ // TODO: Lower MIN/MAX/etc.
+ Value *LHS = II->getArgOperand(0);
+ Value *RHS = II->getArgOperand(1);
+ // Extract the element as scalars.
+ LHS = InsertNewInstWith(ExtractElementInst::Create(LHS,
+ ConstantInt::get(Type::getInt32Ty(I->getContext()), 0U)), *II);
+ RHS = InsertNewInstWith(ExtractElementInst::Create(RHS,
+ ConstantInt::get(Type::getInt32Ty(I->getContext()), 0U)), *II);
+
+ switch (II->getIntrinsicID()) {
+ default: llvm_unreachable("Case stmts out of sync!");
+ case Intrinsic::x86_sse_add_ss:
+ case Intrinsic::x86_sse2_add_sd:
+ TmpV = InsertNewInstWith(BinaryOperator::CreateFAdd(LHS, RHS,
+ II->getName()), *II);
+ break;
+ case Intrinsic::x86_sse_sub_ss:
+ case Intrinsic::x86_sse2_sub_sd:
+ TmpV = InsertNewInstWith(BinaryOperator::CreateFSub(LHS, RHS,
+ II->getName()), *II);
+ break;
+ case Intrinsic::x86_sse_mul_ss:
+ case Intrinsic::x86_sse2_mul_sd:
+ TmpV = InsertNewInstWith(BinaryOperator::CreateFMul(LHS, RHS,
+ II->getName()), *II);
+ break;
+ case Intrinsic::x86_sse_div_ss:
+ case Intrinsic::x86_sse2_div_sd:
+ TmpV = InsertNewInstWith(BinaryOperator::CreateFDiv(LHS, RHS,
+ II->getName()), *II);
+ break;
+ }
+
+ Instruction *New =
+ InsertElementInst::Create(
+ UndefValue::get(II->getType()), TmpV,
+ ConstantInt::get(Type::getInt32Ty(I->getContext()), 0U, false),
+ II->getName());
+ InsertNewInstWith(New, *II);
+ return New;
+ }
+ }
+
+ // If lowest element of a scalar op isn't used then use Arg0.
+ if (DemandedElts.getLoBits(1) != 1)
+ return II->getArgOperand(0);
+
+ // Output elements are undefined if both are undefined. Consider things
+ // like undef&0. The result is known zero, not undef.
+ UndefElts &= UndefElts2;
+ break;
+
+ // SSE4A instructions leave the upper 64-bits of the 128-bit result
+ // in an undefined state.
+ case Intrinsic::x86_sse4a_extrq:
+ case Intrinsic::x86_sse4a_extrqi:
+ case Intrinsic::x86_sse4a_insertq:
+ case Intrinsic::x86_sse4a_insertqi:
+ UndefElts |= APInt::getHighBitsSet(VWidth, VWidth / 2);
+ break;
+ }
+ break;
+ }
+ }
+ return MadeChange ? I : nullptr;
+}
diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
new file mode 100644
index 000000000000..a76138756148
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
@@ -0,0 +1,1274 @@
+//===- InstCombineVectorOps.cpp -------------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements instcombine for ExtractElement, InsertElement and
+// ShuffleVector.
+//
+//===----------------------------------------------------------------------===//
+
+#include "InstCombineInternal.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Analysis/VectorUtils.h"
+#include "llvm/IR/PatternMatch.h"
+using namespace llvm;
+using namespace PatternMatch;
+
+#define DEBUG_TYPE "instcombine"
+
+/// Return true if the value is cheaper to scalarize than it is to leave as a
+/// vector operation. isConstant indicates whether we're extracting one known
+/// element. If false we're extracting a variable index.
+static bool cheapToScalarize(Value *V, bool isConstant) {
+ if (Constant *C = dyn_cast<Constant>(V)) {
+ if (isConstant) return true;
+
+ // If all elts are the same, we can extract it and use any of the values.
+ if (Constant *Op0 = C->getAggregateElement(0U)) {
+ for (unsigned i = 1, e = V->getType()->getVectorNumElements(); i != e;
+ ++i)
+ if (C->getAggregateElement(i) != Op0)
+ return false;
+ return true;
+ }
+ }
+ Instruction *I = dyn_cast<Instruction>(V);
+ if (!I) return false;
+
+ // Insert element gets simplified to the inserted element or is deleted if
+ // this is constant idx extract element and its a constant idx insertelt.
+ if (I->getOpcode() == Instruction::InsertElement && isConstant &&
+ isa<ConstantInt>(I->getOperand(2)))
+ return true;
+ if (I->getOpcode() == Instruction::Load && I->hasOneUse())
+ return true;
+ if (BinaryOperator *BO = dyn_cast<BinaryOperator>(I))
+ if (BO->hasOneUse() &&
+ (cheapToScalarize(BO->getOperand(0), isConstant) ||
+ cheapToScalarize(BO->getOperand(1), isConstant)))
+ return true;
+ if (CmpInst *CI = dyn_cast<CmpInst>(I))
+ if (CI->hasOneUse() &&
+ (cheapToScalarize(CI->getOperand(0), isConstant) ||
+ cheapToScalarize(CI->getOperand(1), isConstant)))
+ return true;
+
+ return false;
+}
+
+// If we have a PHI node with a vector type that is only used to feed
+// itself and be an operand of extractelement at a constant location,
+// try to replace the PHI of the vector type with a PHI of a scalar type.
+Instruction *InstCombiner::scalarizePHI(ExtractElementInst &EI, PHINode *PN) {
+ SmallVector<Instruction *, 2> Extracts;
+ // The users we want the PHI to have are:
+ // 1) The EI ExtractElement (we already know this)
+ // 2) Possibly more ExtractElements with the same index.
+ // 3) Another operand, which will feed back into the PHI.
+ Instruction *PHIUser = nullptr;
+ for (auto U : PN->users()) {
+ if (ExtractElementInst *EU = dyn_cast<ExtractElementInst>(U)) {
+ if (EI.getIndexOperand() == EU->getIndexOperand())
+ Extracts.push_back(EU);
+ else
+ return nullptr;
+ } else if (!PHIUser) {
+ PHIUser = cast<Instruction>(U);
+ } else {
+ return nullptr;
+ }
+ }
+
+ if (!PHIUser)
+ return nullptr;
+
+ // Verify that this PHI user has one use, which is the PHI itself,
+ // and that it is a binary operation which is cheap to scalarize.
+ // otherwise return NULL.
+ if (!PHIUser->hasOneUse() || !(PHIUser->user_back() == PN) ||
+ !(isa<BinaryOperator>(PHIUser)) || !cheapToScalarize(PHIUser, true))
+ return nullptr;
+
+ // Create a scalar PHI node that will replace the vector PHI node
+ // just before the current PHI node.
+ PHINode *scalarPHI = cast<PHINode>(InsertNewInstWith(
+ PHINode::Create(EI.getType(), PN->getNumIncomingValues(), ""), *PN));
+ // Scalarize each PHI operand.
+ for (unsigned i = 0; i < PN->getNumIncomingValues(); i++) {
+ Value *PHIInVal = PN->getIncomingValue(i);
+ BasicBlock *inBB = PN->getIncomingBlock(i);
+ Value *Elt = EI.getIndexOperand();
+ // If the operand is the PHI induction variable:
+ if (PHIInVal == PHIUser) {
+ // Scalarize the binary operation. Its first operand is the
+ // scalar PHI, and the second operand is extracted from the other
+ // vector operand.
+ BinaryOperator *B0 = cast<BinaryOperator>(PHIUser);
+ unsigned opId = (B0->getOperand(0) == PN) ? 1 : 0;
+ Value *Op = InsertNewInstWith(
+ ExtractElementInst::Create(B0->getOperand(opId), Elt,
+ B0->getOperand(opId)->getName() + ".Elt"),
+ *B0);
+ Value *newPHIUser = InsertNewInstWith(
+ BinaryOperator::CreateWithCopiedFlags(B0->getOpcode(),
+ scalarPHI, Op, B0), *B0);
+ scalarPHI->addIncoming(newPHIUser, inBB);
+ } else {
+ // Scalarize PHI input:
+ Instruction *newEI = ExtractElementInst::Create(PHIInVal, Elt, "");
+ // Insert the new instruction into the predecessor basic block.
+ Instruction *pos = dyn_cast<Instruction>(PHIInVal);
+ BasicBlock::iterator InsertPos;
+ if (pos && !isa<PHINode>(pos)) {
+ InsertPos = ++pos->getIterator();
+ } else {
+ InsertPos = inBB->getFirstInsertionPt();
+ }
+
+ InsertNewInstWith(newEI, *InsertPos);
+
+ scalarPHI->addIncoming(newEI, inBB);
+ }
+ }
+
+ for (auto E : Extracts)
+ replaceInstUsesWith(*E, scalarPHI);
+
+ return &EI;
+}
+
+Instruction *InstCombiner::visitExtractElementInst(ExtractElementInst &EI) {
+ if (Value *V = SimplifyExtractElementInst(
+ EI.getVectorOperand(), EI.getIndexOperand(), DL, TLI, DT, AC))
+ return replaceInstUsesWith(EI, V);
+
+ // If vector val is constant with all elements the same, replace EI with
+ // that element. We handle a known element # below.
+ if (Constant *C = dyn_cast<Constant>(EI.getOperand(0)))
+ if (cheapToScalarize(C, false))
+ return replaceInstUsesWith(EI, C->getAggregateElement(0U));
+
+ // If extracting a specified index from the vector, see if we can recursively
+ // find a previously computed scalar that was inserted into the vector.
+ if (ConstantInt *IdxC = dyn_cast<ConstantInt>(EI.getOperand(1))) {
+ unsigned IndexVal = IdxC->getZExtValue();
+ unsigned VectorWidth = EI.getVectorOperandType()->getNumElements();
+
+ // InstSimplify handles cases where the index is invalid.
+ assert(IndexVal < VectorWidth);
+
+ // This instruction only demands the single element from the input vector.
+ // If the input vector has a single use, simplify it based on this use
+ // property.
+ if (EI.getOperand(0)->hasOneUse() && VectorWidth != 1) {
+ APInt UndefElts(VectorWidth, 0);
+ APInt DemandedMask(VectorWidth, 0);
+ DemandedMask.setBit(IndexVal);
+ if (Value *V = SimplifyDemandedVectorElts(EI.getOperand(0), DemandedMask,
+ UndefElts)) {
+ EI.setOperand(0, V);
+ return &EI;
+ }
+ }
+
+ // If this extractelement is directly using a bitcast from a vector of
+ // the same number of elements, see if we can find the source element from
+ // it. In this case, we will end up needing to bitcast the scalars.
+ if (BitCastInst *BCI = dyn_cast<BitCastInst>(EI.getOperand(0))) {
+ if (VectorType *VT = dyn_cast<VectorType>(BCI->getOperand(0)->getType()))
+ if (VT->getNumElements() == VectorWidth)
+ if (Value *Elt = findScalarElement(BCI->getOperand(0), IndexVal))
+ return new BitCastInst(Elt, EI.getType());
+ }
+
+ // If there's a vector PHI feeding a scalar use through this extractelement
+ // instruction, try to scalarize the PHI.
+ if (PHINode *PN = dyn_cast<PHINode>(EI.getOperand(0))) {
+ Instruction *scalarPHI = scalarizePHI(EI, PN);
+ if (scalarPHI)
+ return scalarPHI;
+ }
+ }
+
+ if (Instruction *I = dyn_cast<Instruction>(EI.getOperand(0))) {
+ // Push extractelement into predecessor operation if legal and
+ // profitable to do so.
+ if (BinaryOperator *BO = dyn_cast<BinaryOperator>(I)) {
+ if (I->hasOneUse() &&
+ cheapToScalarize(BO, isa<ConstantInt>(EI.getOperand(1)))) {
+ Value *newEI0 =
+ Builder->CreateExtractElement(BO->getOperand(0), EI.getOperand(1),
+ EI.getName()+".lhs");
+ Value *newEI1 =
+ Builder->CreateExtractElement(BO->getOperand(1), EI.getOperand(1),
+ EI.getName()+".rhs");
+ return BinaryOperator::CreateWithCopiedFlags(BO->getOpcode(),
+ newEI0, newEI1, BO);
+ }
+ } else if (InsertElementInst *IE = dyn_cast<InsertElementInst>(I)) {
+ // Extracting the inserted element?
+ if (IE->getOperand(2) == EI.getOperand(1))
+ return replaceInstUsesWith(EI, IE->getOperand(1));
+ // If the inserted and extracted elements are constants, they must not
+ // be the same value, extract from the pre-inserted value instead.
+ if (isa<Constant>(IE->getOperand(2)) && isa<Constant>(EI.getOperand(1))) {
+ Worklist.AddValue(EI.getOperand(0));
+ EI.setOperand(0, IE->getOperand(0));
+ return &EI;
+ }
+ } else if (ShuffleVectorInst *SVI = dyn_cast<ShuffleVectorInst>(I)) {
+ // If this is extracting an element from a shufflevector, figure out where
+ // it came from and extract from the appropriate input element instead.
+ if (ConstantInt *Elt = dyn_cast<ConstantInt>(EI.getOperand(1))) {
+ int SrcIdx = SVI->getMaskValue(Elt->getZExtValue());
+ Value *Src;
+ unsigned LHSWidth =
+ SVI->getOperand(0)->getType()->getVectorNumElements();
+
+ if (SrcIdx < 0)
+ return replaceInstUsesWith(EI, UndefValue::get(EI.getType()));
+ if (SrcIdx < (int)LHSWidth)
+ Src = SVI->getOperand(0);
+ else {
+ SrcIdx -= LHSWidth;
+ Src = SVI->getOperand(1);
+ }
+ Type *Int32Ty = Type::getInt32Ty(EI.getContext());
+ return ExtractElementInst::Create(Src,
+ ConstantInt::get(Int32Ty,
+ SrcIdx, false));
+ }
+ } else if (CastInst *CI = dyn_cast<CastInst>(I)) {
+ // Canonicalize extractelement(cast) -> cast(extractelement).
+ // Bitcasts can change the number of vector elements, and they cost
+ // nothing.
+ if (CI->hasOneUse() && (CI->getOpcode() != Instruction::BitCast)) {
+ Value *EE = Builder->CreateExtractElement(CI->getOperand(0),
+ EI.getIndexOperand());
+ Worklist.AddValue(EE);
+ return CastInst::Create(CI->getOpcode(), EE, EI.getType());
+ }
+ } else if (SelectInst *SI = dyn_cast<SelectInst>(I)) {
+ if (SI->hasOneUse()) {
+ // TODO: For a select on vectors, it might be useful to do this if it
+ // has multiple extractelement uses. For vector select, that seems to
+ // fight the vectorizer.
+
+ // If we are extracting an element from a vector select or a select on
+ // vectors, create a select on the scalars extracted from the vector
+ // arguments.
+ Value *TrueVal = SI->getTrueValue();
+ Value *FalseVal = SI->getFalseValue();
+
+ Value *Cond = SI->getCondition();
+ if (Cond->getType()->isVectorTy()) {
+ Cond = Builder->CreateExtractElement(Cond,
+ EI.getIndexOperand(),
+ Cond->getName() + ".elt");
+ }
+
+ Value *V1Elem
+ = Builder->CreateExtractElement(TrueVal,
+ EI.getIndexOperand(),
+ TrueVal->getName() + ".elt");
+
+ Value *V2Elem
+ = Builder->CreateExtractElement(FalseVal,
+ EI.getIndexOperand(),
+ FalseVal->getName() + ".elt");
+ return SelectInst::Create(Cond,
+ V1Elem,
+ V2Elem,
+ SI->getName() + ".elt");
+ }
+ }
+ }
+ return nullptr;
+}
+
+/// If V is a shuffle of values that ONLY returns elements from either LHS or
+/// RHS, return the shuffle mask and true. Otherwise, return false.
+static bool collectSingleShuffleElements(Value *V, Value *LHS, Value *RHS,
+ SmallVectorImpl<Constant*> &Mask) {
+ assert(LHS->getType() == RHS->getType() &&
+ "Invalid CollectSingleShuffleElements");
+ unsigned NumElts = V->getType()->getVectorNumElements();
+
+ if (isa<UndefValue>(V)) {
+ Mask.assign(NumElts, UndefValue::get(Type::getInt32Ty(V->getContext())));
+ return true;
+ }
+
+ if (V == LHS) {
+ for (unsigned i = 0; i != NumElts; ++i)
+ Mask.push_back(ConstantInt::get(Type::getInt32Ty(V->getContext()), i));
+ return true;
+ }
+
+ if (V == RHS) {
+ for (unsigned i = 0; i != NumElts; ++i)
+ Mask.push_back(ConstantInt::get(Type::getInt32Ty(V->getContext()),
+ i+NumElts));
+ return true;
+ }
+
+ if (InsertElementInst *IEI = dyn_cast<InsertElementInst>(V)) {
+ // If this is an insert of an extract from some other vector, include it.
+ Value *VecOp = IEI->getOperand(0);
+ Value *ScalarOp = IEI->getOperand(1);
+ Value *IdxOp = IEI->getOperand(2);
+
+ if (!isa<ConstantInt>(IdxOp))
+ return false;
+ unsigned InsertedIdx = cast<ConstantInt>(IdxOp)->getZExtValue();
+
+ if (isa<UndefValue>(ScalarOp)) { // inserting undef into vector.
+ // We can handle this if the vector we are inserting into is
+ // transitively ok.
+ if (collectSingleShuffleElements(VecOp, LHS, RHS, Mask)) {
+ // If so, update the mask to reflect the inserted undef.
+ Mask[InsertedIdx] = UndefValue::get(Type::getInt32Ty(V->getContext()));
+ return true;
+ }
+ } else if (ExtractElementInst *EI = dyn_cast<ExtractElementInst>(ScalarOp)){
+ if (isa<ConstantInt>(EI->getOperand(1))) {
+ unsigned ExtractedIdx =
+ cast<ConstantInt>(EI->getOperand(1))->getZExtValue();
+ unsigned NumLHSElts = LHS->getType()->getVectorNumElements();
+
+ // This must be extracting from either LHS or RHS.
+ if (EI->getOperand(0) == LHS || EI->getOperand(0) == RHS) {
+ // We can handle this if the vector we are inserting into is
+ // transitively ok.
+ if (collectSingleShuffleElements(VecOp, LHS, RHS, Mask)) {
+ // If so, update the mask to reflect the inserted value.
+ if (EI->getOperand(0) == LHS) {
+ Mask[InsertedIdx % NumElts] =
+ ConstantInt::get(Type::getInt32Ty(V->getContext()),
+ ExtractedIdx);
+ } else {
+ assert(EI->getOperand(0) == RHS);
+ Mask[InsertedIdx % NumElts] =
+ ConstantInt::get(Type::getInt32Ty(V->getContext()),
+ ExtractedIdx + NumLHSElts);
+ }
+ return true;
+ }
+ }
+ }
+ }
+ }
+
+ return false;
+}
+
+/// If we have insertion into a vector that is wider than the vector that we
+/// are extracting from, try to widen the source vector to allow a single
+/// shufflevector to replace one or more insert/extract pairs.
+static void replaceExtractElements(InsertElementInst *InsElt,
+ ExtractElementInst *ExtElt,
+ InstCombiner &IC) {
+ VectorType *InsVecType = InsElt->getType();
+ VectorType *ExtVecType = ExtElt->getVectorOperandType();
+ unsigned NumInsElts = InsVecType->getVectorNumElements();
+ unsigned NumExtElts = ExtVecType->getVectorNumElements();
+
+ // The inserted-to vector must be wider than the extracted-from vector.
+ if (InsVecType->getElementType() != ExtVecType->getElementType() ||
+ NumExtElts >= NumInsElts)
+ return;
+
+ // Create a shuffle mask to widen the extended-from vector using undefined
+ // values. The mask selects all of the values of the original vector followed
+ // by as many undefined values as needed to create a vector of the same length
+ // as the inserted-to vector.
+ SmallVector<Constant *, 16> ExtendMask;
+ IntegerType *IntType = Type::getInt32Ty(InsElt->getContext());
+ for (unsigned i = 0; i < NumExtElts; ++i)
+ ExtendMask.push_back(ConstantInt::get(IntType, i));
+ for (unsigned i = NumExtElts; i < NumInsElts; ++i)
+ ExtendMask.push_back(UndefValue::get(IntType));
+
+ Value *ExtVecOp = ExtElt->getVectorOperand();
+ auto *ExtVecOpInst = dyn_cast<Instruction>(ExtVecOp);
+ BasicBlock *InsertionBlock = (ExtVecOpInst && !isa<PHINode>(ExtVecOpInst))
+ ? ExtVecOpInst->getParent()
+ : ExtElt->getParent();
+
+ // TODO: This restriction matches the basic block check below when creating
+ // new extractelement instructions. If that limitation is removed, this one
+ // could also be removed. But for now, we just bail out to ensure that we
+ // will replace the extractelement instruction that is feeding our
+ // insertelement instruction. This allows the insertelement to then be
+ // replaced by a shufflevector. If the insertelement is not replaced, we can
+ // induce infinite looping because there's an optimization for extractelement
+ // that will delete our widening shuffle. This would trigger another attempt
+ // here to create that shuffle, and we spin forever.
+ if (InsertionBlock != InsElt->getParent())
+ return;
+
+ auto *WideVec = new ShuffleVectorInst(ExtVecOp, UndefValue::get(ExtVecType),
+ ConstantVector::get(ExtendMask));
+
+ // Insert the new shuffle after the vector operand of the extract is defined
+ // (as long as it's not a PHI) or at the start of the basic block of the
+ // extract, so any subsequent extracts in the same basic block can use it.
+ // TODO: Insert before the earliest ExtractElementInst that is replaced.
+ if (ExtVecOpInst && !isa<PHINode>(ExtVecOpInst))
+ WideVec->insertAfter(ExtVecOpInst);
+ else
+ IC.InsertNewInstWith(WideVec, *ExtElt->getParent()->getFirstInsertionPt());
+
+ // Replace extracts from the original narrow vector with extracts from the new
+ // wide vector.
+ for (User *U : ExtVecOp->users()) {
+ ExtractElementInst *OldExt = dyn_cast<ExtractElementInst>(U);
+ if (!OldExt || OldExt->getParent() != WideVec->getParent())
+ continue;
+ auto *NewExt = ExtractElementInst::Create(WideVec, OldExt->getOperand(1));
+ NewExt->insertAfter(WideVec);
+ IC.replaceInstUsesWith(*OldExt, NewExt);
+ }
+}
+
+/// We are building a shuffle to create V, which is a sequence of insertelement,
+/// extractelement pairs. If PermittedRHS is set, then we must either use it or
+/// not rely on the second vector source. Return a std::pair containing the
+/// left and right vectors of the proposed shuffle (or 0), and set the Mask
+/// parameter as required.
+///
+/// Note: we intentionally don't try to fold earlier shuffles since they have
+/// often been chosen carefully to be efficiently implementable on the target.
+typedef std::pair<Value *, Value *> ShuffleOps;
+
+static ShuffleOps collectShuffleElements(Value *V,
+ SmallVectorImpl<Constant *> &Mask,
+ Value *PermittedRHS,
+ InstCombiner &IC) {
+ assert(V->getType()->isVectorTy() && "Invalid shuffle!");
+ unsigned NumElts = cast<VectorType>(V->getType())->getNumElements();
+
+ if (isa<UndefValue>(V)) {
+ Mask.assign(NumElts, UndefValue::get(Type::getInt32Ty(V->getContext())));
+ return std::make_pair(
+ PermittedRHS ? UndefValue::get(PermittedRHS->getType()) : V, nullptr);
+ }
+
+ if (isa<ConstantAggregateZero>(V)) {
+ Mask.assign(NumElts, ConstantInt::get(Type::getInt32Ty(V->getContext()),0));
+ return std::make_pair(V, nullptr);
+ }
+
+ if (InsertElementInst *IEI = dyn_cast<InsertElementInst>(V)) {
+ // If this is an insert of an extract from some other vector, include it.
+ Value *VecOp = IEI->getOperand(0);
+ Value *ScalarOp = IEI->getOperand(1);
+ Value *IdxOp = IEI->getOperand(2);
+
+ if (ExtractElementInst *EI = dyn_cast<ExtractElementInst>(ScalarOp)) {
+ if (isa<ConstantInt>(EI->getOperand(1)) && isa<ConstantInt>(IdxOp)) {
+ unsigned ExtractedIdx =
+ cast<ConstantInt>(EI->getOperand(1))->getZExtValue();
+ unsigned InsertedIdx = cast<ConstantInt>(IdxOp)->getZExtValue();
+
+ // Either the extracted from or inserted into vector must be RHSVec,
+ // otherwise we'd end up with a shuffle of three inputs.
+ if (EI->getOperand(0) == PermittedRHS || PermittedRHS == nullptr) {
+ Value *RHS = EI->getOperand(0);
+ ShuffleOps LR = collectShuffleElements(VecOp, Mask, RHS, IC);
+ assert(LR.second == nullptr || LR.second == RHS);
+
+ if (LR.first->getType() != RHS->getType()) {
+ // Although we are giving up for now, see if we can create extracts
+ // that match the inserts for another round of combining.
+ replaceExtractElements(IEI, EI, IC);
+
+ // We tried our best, but we can't find anything compatible with RHS
+ // further up the chain. Return a trivial shuffle.
+ for (unsigned i = 0; i < NumElts; ++i)
+ Mask[i] = ConstantInt::get(Type::getInt32Ty(V->getContext()), i);
+ return std::make_pair(V, nullptr);
+ }
+
+ unsigned NumLHSElts = RHS->getType()->getVectorNumElements();
+ Mask[InsertedIdx % NumElts] =
+ ConstantInt::get(Type::getInt32Ty(V->getContext()),
+ NumLHSElts+ExtractedIdx);
+ return std::make_pair(LR.first, RHS);
+ }
+
+ if (VecOp == PermittedRHS) {
+ // We've gone as far as we can: anything on the other side of the
+ // extractelement will already have been converted into a shuffle.
+ unsigned NumLHSElts =
+ EI->getOperand(0)->getType()->getVectorNumElements();
+ for (unsigned i = 0; i != NumElts; ++i)
+ Mask.push_back(ConstantInt::get(
+ Type::getInt32Ty(V->getContext()),
+ i == InsertedIdx ? ExtractedIdx : NumLHSElts + i));
+ return std::make_pair(EI->getOperand(0), PermittedRHS);
+ }
+
+ // If this insertelement is a chain that comes from exactly these two
+ // vectors, return the vector and the effective shuffle.
+ if (EI->getOperand(0)->getType() == PermittedRHS->getType() &&
+ collectSingleShuffleElements(IEI, EI->getOperand(0), PermittedRHS,
+ Mask))
+ return std::make_pair(EI->getOperand(0), PermittedRHS);
+ }
+ }
+ }
+
+ // Otherwise, we can't do anything fancy. Return an identity vector.
+ for (unsigned i = 0; i != NumElts; ++i)
+ Mask.push_back(ConstantInt::get(Type::getInt32Ty(V->getContext()), i));
+ return std::make_pair(V, nullptr);
+}
+
+/// Try to find redundant insertvalue instructions, like the following ones:
+/// %0 = insertvalue { i8, i32 } undef, i8 %x, 0
+/// %1 = insertvalue { i8, i32 } %0, i8 %y, 0
+/// Here the second instruction inserts values at the same indices, as the
+/// first one, making the first one redundant.
+/// It should be transformed to:
+/// %0 = insertvalue { i8, i32 } undef, i8 %y, 0
+Instruction *InstCombiner::visitInsertValueInst(InsertValueInst &I) {
+ bool IsRedundant = false;
+ ArrayRef<unsigned int> FirstIndices = I.getIndices();
+
+ // If there is a chain of insertvalue instructions (each of them except the
+ // last one has only one use and it's another insertvalue insn from this
+ // chain), check if any of the 'children' uses the same indices as the first
+ // instruction. In this case, the first one is redundant.
+ Value *V = &I;
+ unsigned Depth = 0;
+ while (V->hasOneUse() && Depth < 10) {
+ User *U = V->user_back();
+ auto UserInsInst = dyn_cast<InsertValueInst>(U);
+ if (!UserInsInst || U->getOperand(0) != V)
+ break;
+ if (UserInsInst->getIndices() == FirstIndices) {
+ IsRedundant = true;
+ break;
+ }
+ V = UserInsInst;
+ Depth++;
+ }
+
+ if (IsRedundant)
+ return replaceInstUsesWith(I, I.getOperand(0));
+ return nullptr;
+}
+
+Instruction *InstCombiner::visitInsertElementInst(InsertElementInst &IE) {
+ Value *VecOp = IE.getOperand(0);
+ Value *ScalarOp = IE.getOperand(1);
+ Value *IdxOp = IE.getOperand(2);
+
+ // Inserting an undef or into an undefined place, remove this.
+ if (isa<UndefValue>(ScalarOp) || isa<UndefValue>(IdxOp))
+ replaceInstUsesWith(IE, VecOp);
+
+ // If the inserted element was extracted from some other vector, and if the
+ // indexes are constant, try to turn this into a shufflevector operation.
+ if (ExtractElementInst *EI = dyn_cast<ExtractElementInst>(ScalarOp)) {
+ if (isa<ConstantInt>(EI->getOperand(1)) && isa<ConstantInt>(IdxOp)) {
+ unsigned NumInsertVectorElts = IE.getType()->getNumElements();
+ unsigned NumExtractVectorElts =
+ EI->getOperand(0)->getType()->getVectorNumElements();
+ unsigned ExtractedIdx =
+ cast<ConstantInt>(EI->getOperand(1))->getZExtValue();
+ unsigned InsertedIdx = cast<ConstantInt>(IdxOp)->getZExtValue();
+
+ if (ExtractedIdx >= NumExtractVectorElts) // Out of range extract.
+ return replaceInstUsesWith(IE, VecOp);
+
+ if (InsertedIdx >= NumInsertVectorElts) // Out of range insert.
+ return replaceInstUsesWith(IE, UndefValue::get(IE.getType()));
+
+ // If we are extracting a value from a vector, then inserting it right
+ // back into the same place, just use the input vector.
+ if (EI->getOperand(0) == VecOp && ExtractedIdx == InsertedIdx)
+ return replaceInstUsesWith(IE, VecOp);
+
+ // If this insertelement isn't used by some other insertelement, turn it
+ // (and any insertelements it points to), into one big shuffle.
+ if (!IE.hasOneUse() || !isa<InsertElementInst>(IE.user_back())) {
+ SmallVector<Constant*, 16> Mask;
+ ShuffleOps LR = collectShuffleElements(&IE, Mask, nullptr, *this);
+
+ // The proposed shuffle may be trivial, in which case we shouldn't
+ // perform the combine.
+ if (LR.first != &IE && LR.second != &IE) {
+ // We now have a shuffle of LHS, RHS, Mask.
+ if (LR.second == nullptr)
+ LR.second = UndefValue::get(LR.first->getType());
+ return new ShuffleVectorInst(LR.first, LR.second,
+ ConstantVector::get(Mask));
+ }
+ }
+ }
+ }
+
+ unsigned VWidth = cast<VectorType>(VecOp->getType())->getNumElements();
+ APInt UndefElts(VWidth, 0);
+ APInt AllOnesEltMask(APInt::getAllOnesValue(VWidth));
+ if (Value *V = SimplifyDemandedVectorElts(&IE, AllOnesEltMask, UndefElts)) {
+ if (V != &IE)
+ return replaceInstUsesWith(IE, V);
+ return &IE;
+ }
+
+ return nullptr;
+}
+
+/// Return true if we can evaluate the specified expression tree if the vector
+/// elements were shuffled in a different order.
+static bool CanEvaluateShuffled(Value *V, ArrayRef<int> Mask,
+ unsigned Depth = 5) {
+ // We can always reorder the elements of a constant.
+ if (isa<Constant>(V))
+ return true;
+
+ // We won't reorder vector arguments. No IPO here.
+ Instruction *I = dyn_cast<Instruction>(V);
+ if (!I) return false;
+
+ // Two users may expect different orders of the elements. Don't try it.
+ if (!I->hasOneUse())
+ return false;
+
+ if (Depth == 0) return false;
+
+ switch (I->getOpcode()) {
+ case Instruction::Add:
+ case Instruction::FAdd:
+ case Instruction::Sub:
+ case Instruction::FSub:
+ case Instruction::Mul:
+ case Instruction::FMul:
+ case Instruction::UDiv:
+ case Instruction::SDiv:
+ case Instruction::FDiv:
+ case Instruction::URem:
+ case Instruction::SRem:
+ case Instruction::FRem:
+ case Instruction::Shl:
+ case Instruction::LShr:
+ case Instruction::AShr:
+ case Instruction::And:
+ case Instruction::Or:
+ case Instruction::Xor:
+ case Instruction::ICmp:
+ case Instruction::FCmp:
+ case Instruction::Trunc:
+ case Instruction::ZExt:
+ case Instruction::SExt:
+ case Instruction::FPToUI:
+ case Instruction::FPToSI:
+ case Instruction::UIToFP:
+ case Instruction::SIToFP:
+ case Instruction::FPTrunc:
+ case Instruction::FPExt:
+ case Instruction::GetElementPtr: {
+ for (Value *Operand : I->operands()) {
+ if (!CanEvaluateShuffled(Operand, Mask, Depth-1))
+ return false;
+ }
+ return true;
+ }
+ case Instruction::InsertElement: {
+ ConstantInt *CI = dyn_cast<ConstantInt>(I->getOperand(2));
+ if (!CI) return false;
+ int ElementNumber = CI->getLimitedValue();
+
+ // Verify that 'CI' does not occur twice in Mask. A single 'insertelement'
+ // can't put an element into multiple indices.
+ bool SeenOnce = false;
+ for (int i = 0, e = Mask.size(); i != e; ++i) {
+ if (Mask[i] == ElementNumber) {
+ if (SeenOnce)
+ return false;
+ SeenOnce = true;
+ }
+ }
+ return CanEvaluateShuffled(I->getOperand(0), Mask, Depth-1);
+ }
+ }
+ return false;
+}
+
+/// Rebuild a new instruction just like 'I' but with the new operands given.
+/// In the event of type mismatch, the type of the operands is correct.
+static Value *buildNew(Instruction *I, ArrayRef<Value*> NewOps) {
+ // We don't want to use the IRBuilder here because we want the replacement
+ // instructions to appear next to 'I', not the builder's insertion point.
+ switch (I->getOpcode()) {
+ case Instruction::Add:
+ case Instruction::FAdd:
+ case Instruction::Sub:
+ case Instruction::FSub:
+ case Instruction::Mul:
+ case Instruction::FMul:
+ case Instruction::UDiv:
+ case Instruction::SDiv:
+ case Instruction::FDiv:
+ case Instruction::URem:
+ case Instruction::SRem:
+ case Instruction::FRem:
+ case Instruction::Shl:
+ case Instruction::LShr:
+ case Instruction::AShr:
+ case Instruction::And:
+ case Instruction::Or:
+ case Instruction::Xor: {
+ BinaryOperator *BO = cast<BinaryOperator>(I);
+ assert(NewOps.size() == 2 && "binary operator with #ops != 2");
+ BinaryOperator *New =
+ BinaryOperator::Create(cast<BinaryOperator>(I)->getOpcode(),
+ NewOps[0], NewOps[1], "", BO);
+ if (isa<OverflowingBinaryOperator>(BO)) {
+ New->setHasNoUnsignedWrap(BO->hasNoUnsignedWrap());
+ New->setHasNoSignedWrap(BO->hasNoSignedWrap());
+ }
+ if (isa<PossiblyExactOperator>(BO)) {
+ New->setIsExact(BO->isExact());
+ }
+ if (isa<FPMathOperator>(BO))
+ New->copyFastMathFlags(I);
+ return New;
+ }
+ case Instruction::ICmp:
+ assert(NewOps.size() == 2 && "icmp with #ops != 2");
+ return new ICmpInst(I, cast<ICmpInst>(I)->getPredicate(),
+ NewOps[0], NewOps[1]);
+ case Instruction::FCmp:
+ assert(NewOps.size() == 2 && "fcmp with #ops != 2");
+ return new FCmpInst(I, cast<FCmpInst>(I)->getPredicate(),
+ NewOps[0], NewOps[1]);
+ case Instruction::Trunc:
+ case Instruction::ZExt:
+ case Instruction::SExt:
+ case Instruction::FPToUI:
+ case Instruction::FPToSI:
+ case Instruction::UIToFP:
+ case Instruction::SIToFP:
+ case Instruction::FPTrunc:
+ case Instruction::FPExt: {
+ // It's possible that the mask has a different number of elements from
+ // the original cast. We recompute the destination type to match the mask.
+ Type *DestTy =
+ VectorType::get(I->getType()->getScalarType(),
+ NewOps[0]->getType()->getVectorNumElements());
+ assert(NewOps.size() == 1 && "cast with #ops != 1");
+ return CastInst::Create(cast<CastInst>(I)->getOpcode(), NewOps[0], DestTy,
+ "", I);
+ }
+ case Instruction::GetElementPtr: {
+ Value *Ptr = NewOps[0];
+ ArrayRef<Value*> Idx = NewOps.slice(1);
+ GetElementPtrInst *GEP = GetElementPtrInst::Create(
+ cast<GetElementPtrInst>(I)->getSourceElementType(), Ptr, Idx, "", I);
+ GEP->setIsInBounds(cast<GetElementPtrInst>(I)->isInBounds());
+ return GEP;
+ }
+ }
+ llvm_unreachable("failed to rebuild vector instructions");
+}
+
+Value *
+InstCombiner::EvaluateInDifferentElementOrder(Value *V, ArrayRef<int> Mask) {
+ // Mask.size() does not need to be equal to the number of vector elements.
+
+ assert(V->getType()->isVectorTy() && "can't reorder non-vector elements");
+ if (isa<UndefValue>(V)) {
+ return UndefValue::get(VectorType::get(V->getType()->getScalarType(),
+ Mask.size()));
+ }
+ if (isa<ConstantAggregateZero>(V)) {
+ return ConstantAggregateZero::get(
+ VectorType::get(V->getType()->getScalarType(),
+ Mask.size()));
+ }
+ if (Constant *C = dyn_cast<Constant>(V)) {
+ SmallVector<Constant *, 16> MaskValues;
+ for (int i = 0, e = Mask.size(); i != e; ++i) {
+ if (Mask[i] == -1)
+ MaskValues.push_back(UndefValue::get(Builder->getInt32Ty()));
+ else
+ MaskValues.push_back(Builder->getInt32(Mask[i]));
+ }
+ return ConstantExpr::getShuffleVector(C, UndefValue::get(C->getType()),
+ ConstantVector::get(MaskValues));
+ }
+
+ Instruction *I = cast<Instruction>(V);
+ switch (I->getOpcode()) {
+ case Instruction::Add:
+ case Instruction::FAdd:
+ case Instruction::Sub:
+ case Instruction::FSub:
+ case Instruction::Mul:
+ case Instruction::FMul:
+ case Instruction::UDiv:
+ case Instruction::SDiv:
+ case Instruction::FDiv:
+ case Instruction::URem:
+ case Instruction::SRem:
+ case Instruction::FRem:
+ case Instruction::Shl:
+ case Instruction::LShr:
+ case Instruction::AShr:
+ case Instruction::And:
+ case Instruction::Or:
+ case Instruction::Xor:
+ case Instruction::ICmp:
+ case Instruction::FCmp:
+ case Instruction::Trunc:
+ case Instruction::ZExt:
+ case Instruction::SExt:
+ case Instruction::FPToUI:
+ case Instruction::FPToSI:
+ case Instruction::UIToFP:
+ case Instruction::SIToFP:
+ case Instruction::FPTrunc:
+ case Instruction::FPExt:
+ case Instruction::Select:
+ case Instruction::GetElementPtr: {
+ SmallVector<Value*, 8> NewOps;
+ bool NeedsRebuild = (Mask.size() != I->getType()->getVectorNumElements());
+ for (int i = 0, e = I->getNumOperands(); i != e; ++i) {
+ Value *V = EvaluateInDifferentElementOrder(I->getOperand(i), Mask);
+ NewOps.push_back(V);
+ NeedsRebuild |= (V != I->getOperand(i));
+ }
+ if (NeedsRebuild) {
+ return buildNew(I, NewOps);
+ }
+ return I;
+ }
+ case Instruction::InsertElement: {
+ int Element = cast<ConstantInt>(I->getOperand(2))->getLimitedValue();
+
+ // The insertelement was inserting at Element. Figure out which element
+ // that becomes after shuffling. The answer is guaranteed to be unique
+ // by CanEvaluateShuffled.
+ bool Found = false;
+ int Index = 0;
+ for (int e = Mask.size(); Index != e; ++Index) {
+ if (Mask[Index] == Element) {
+ Found = true;
+ break;
+ }
+ }
+
+ // If element is not in Mask, no need to handle the operand 1 (element to
+ // be inserted). Just evaluate values in operand 0 according to Mask.
+ if (!Found)
+ return EvaluateInDifferentElementOrder(I->getOperand(0), Mask);
+
+ Value *V = EvaluateInDifferentElementOrder(I->getOperand(0), Mask);
+ return InsertElementInst::Create(V, I->getOperand(1),
+ Builder->getInt32(Index), "", I);
+ }
+ }
+ llvm_unreachable("failed to reorder elements of vector instruction!");
+}
+
+static void recognizeIdentityMask(const SmallVectorImpl<int> &Mask,
+ bool &isLHSID, bool &isRHSID) {
+ isLHSID = isRHSID = true;
+
+ for (unsigned i = 0, e = Mask.size(); i != e; ++i) {
+ if (Mask[i] < 0) continue; // Ignore undef values.
+ // Is this an identity shuffle of the LHS value?
+ isLHSID &= (Mask[i] == (int)i);
+
+ // Is this an identity shuffle of the RHS value?
+ isRHSID &= (Mask[i]-e == i);
+ }
+}
+
+// Returns true if the shuffle is extracting a contiguous range of values from
+// LHS, for example:
+// +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+
+// Input: |AA|BB|CC|DD|EE|FF|GG|HH|II|JJ|KK|LL|MM|NN|OO|PP|
+// Shuffles to: |EE|FF|GG|HH|
+// +--+--+--+--+
+static bool isShuffleExtractingFromLHS(ShuffleVectorInst &SVI,
+ SmallVector<int, 16> &Mask) {
+ unsigned LHSElems =
+ cast<VectorType>(SVI.getOperand(0)->getType())->getNumElements();
+ unsigned MaskElems = Mask.size();
+ unsigned BegIdx = Mask.front();
+ unsigned EndIdx = Mask.back();
+ if (BegIdx > EndIdx || EndIdx >= LHSElems || EndIdx - BegIdx != MaskElems - 1)
+ return false;
+ for (unsigned I = 0; I != MaskElems; ++I)
+ if (static_cast<unsigned>(Mask[I]) != BegIdx + I)
+ return false;
+ return true;
+}
+
+Instruction *InstCombiner::visitShuffleVectorInst(ShuffleVectorInst &SVI) {
+ Value *LHS = SVI.getOperand(0);
+ Value *RHS = SVI.getOperand(1);
+ SmallVector<int, 16> Mask = SVI.getShuffleMask();
+ Type *Int32Ty = Type::getInt32Ty(SVI.getContext());
+
+ bool MadeChange = false;
+
+ // Undefined shuffle mask -> undefined value.
+ if (isa<UndefValue>(SVI.getOperand(2)))
+ return replaceInstUsesWith(SVI, UndefValue::get(SVI.getType()));
+
+ unsigned VWidth = cast<VectorType>(SVI.getType())->getNumElements();
+
+ APInt UndefElts(VWidth, 0);
+ APInt AllOnesEltMask(APInt::getAllOnesValue(VWidth));
+ if (Value *V = SimplifyDemandedVectorElts(&SVI, AllOnesEltMask, UndefElts)) {
+ if (V != &SVI)
+ return replaceInstUsesWith(SVI, V);
+ LHS = SVI.getOperand(0);
+ RHS = SVI.getOperand(1);
+ MadeChange = true;
+ }
+
+ unsigned LHSWidth = cast<VectorType>(LHS->getType())->getNumElements();
+
+ // Canonicalize shuffle(x ,x,mask) -> shuffle(x, undef,mask')
+ // Canonicalize shuffle(undef,x,mask) -> shuffle(x, undef,mask').
+ if (LHS == RHS || isa<UndefValue>(LHS)) {
+ if (isa<UndefValue>(LHS) && LHS == RHS) {
+ // shuffle(undef,undef,mask) -> undef.
+ Value *Result = (VWidth == LHSWidth)
+ ? LHS : UndefValue::get(SVI.getType());
+ return replaceInstUsesWith(SVI, Result);
+ }
+
+ // Remap any references to RHS to use LHS.
+ SmallVector<Constant*, 16> Elts;
+ for (unsigned i = 0, e = LHSWidth; i != VWidth; ++i) {
+ if (Mask[i] < 0) {
+ Elts.push_back(UndefValue::get(Int32Ty));
+ continue;
+ }
+
+ if ((Mask[i] >= (int)e && isa<UndefValue>(RHS)) ||
+ (Mask[i] < (int)e && isa<UndefValue>(LHS))) {
+ Mask[i] = -1; // Turn into undef.
+ Elts.push_back(UndefValue::get(Int32Ty));
+ } else {
+ Mask[i] = Mask[i] % e; // Force to LHS.
+ Elts.push_back(ConstantInt::get(Int32Ty, Mask[i]));
+ }
+ }
+ SVI.setOperand(0, SVI.getOperand(1));
+ SVI.setOperand(1, UndefValue::get(RHS->getType()));
+ SVI.setOperand(2, ConstantVector::get(Elts));
+ LHS = SVI.getOperand(0);
+ RHS = SVI.getOperand(1);
+ MadeChange = true;
+ }
+
+ if (VWidth == LHSWidth) {
+ // Analyze the shuffle, are the LHS or RHS and identity shuffles?
+ bool isLHSID, isRHSID;
+ recognizeIdentityMask(Mask, isLHSID, isRHSID);
+
+ // Eliminate identity shuffles.
+ if (isLHSID) return replaceInstUsesWith(SVI, LHS);
+ if (isRHSID) return replaceInstUsesWith(SVI, RHS);
+ }
+
+ if (isa<UndefValue>(RHS) && CanEvaluateShuffled(LHS, Mask)) {
+ Value *V = EvaluateInDifferentElementOrder(LHS, Mask);
+ return replaceInstUsesWith(SVI, V);
+ }
+
+ // SROA generates shuffle+bitcast when the extracted sub-vector is bitcast to
+ // a non-vector type. We can instead bitcast the original vector followed by
+ // an extract of the desired element:
+ //
+ // %sroa = shufflevector <16 x i8> %in, <16 x i8> undef,
+ // <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ // %1 = bitcast <4 x i8> %sroa to i32
+ // Becomes:
+ // %bc = bitcast <16 x i8> %in to <4 x i32>
+ // %ext = extractelement <4 x i32> %bc, i32 0
+ //
+ // If the shuffle is extracting a contiguous range of values from the input
+ // vector then each use which is a bitcast of the extracted size can be
+ // replaced. This will work if the vector types are compatible, and the begin
+ // index is aligned to a value in the casted vector type. If the begin index
+ // isn't aligned then we can shuffle the original vector (keeping the same
+ // vector type) before extracting.
+ //
+ // This code will bail out if the target type is fundamentally incompatible
+ // with vectors of the source type.
+ //
+ // Example of <16 x i8>, target type i32:
+ // Index range [4,8): v-----------v Will work.
+ // +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+
+ // <16 x i8>: | | | | | | | | | | | | | | | | |
+ // <4 x i32>: | | | | |
+ // +-----------+-----------+-----------+-----------+
+ // Index range [6,10): ^-----------^ Needs an extra shuffle.
+ // Target type i40: ^--------------^ Won't work, bail.
+ if (isShuffleExtractingFromLHS(SVI, Mask)) {
+ Value *V = LHS;
+ unsigned MaskElems = Mask.size();
+ unsigned BegIdx = Mask.front();
+ VectorType *SrcTy = cast<VectorType>(V->getType());
+ unsigned VecBitWidth = SrcTy->getBitWidth();
+ unsigned SrcElemBitWidth = DL.getTypeSizeInBits(SrcTy->getElementType());
+ assert(SrcElemBitWidth && "vector elements must have a bitwidth");
+ unsigned SrcNumElems = SrcTy->getNumElements();
+ SmallVector<BitCastInst *, 8> BCs;
+ DenseMap<Type *, Value *> NewBCs;
+ for (User *U : SVI.users())
+ if (BitCastInst *BC = dyn_cast<BitCastInst>(U))
+ if (!BC->use_empty())
+ // Only visit bitcasts that weren't previously handled.
+ BCs.push_back(BC);
+ for (BitCastInst *BC : BCs) {
+ Type *TgtTy = BC->getDestTy();
+ unsigned TgtElemBitWidth = DL.getTypeSizeInBits(TgtTy);
+ if (!TgtElemBitWidth)
+ continue;
+ unsigned TgtNumElems = VecBitWidth / TgtElemBitWidth;
+ bool VecBitWidthsEqual = VecBitWidth == TgtNumElems * TgtElemBitWidth;
+ bool BegIsAligned = 0 == ((SrcElemBitWidth * BegIdx) % TgtElemBitWidth);
+ if (!VecBitWidthsEqual)
+ continue;
+ if (!VectorType::isValidElementType(TgtTy))
+ continue;
+ VectorType *CastSrcTy = VectorType::get(TgtTy, TgtNumElems);
+ if (!BegIsAligned) {
+ // Shuffle the input so [0,NumElements) contains the output, and
+ // [NumElems,SrcNumElems) is undef.
+ SmallVector<Constant *, 16> ShuffleMask(SrcNumElems,
+ UndefValue::get(Int32Ty));
+ for (unsigned I = 0, E = MaskElems, Idx = BegIdx; I != E; ++Idx, ++I)
+ ShuffleMask[I] = ConstantInt::get(Int32Ty, Idx);
+ V = Builder->CreateShuffleVector(V, UndefValue::get(V->getType()),
+ ConstantVector::get(ShuffleMask),
+ SVI.getName() + ".extract");
+ BegIdx = 0;
+ }
+ unsigned SrcElemsPerTgtElem = TgtElemBitWidth / SrcElemBitWidth;
+ assert(SrcElemsPerTgtElem);
+ BegIdx /= SrcElemsPerTgtElem;
+ bool BCAlreadyExists = NewBCs.find(CastSrcTy) != NewBCs.end();
+ auto *NewBC =
+ BCAlreadyExists
+ ? NewBCs[CastSrcTy]
+ : Builder->CreateBitCast(V, CastSrcTy, SVI.getName() + ".bc");
+ if (!BCAlreadyExists)
+ NewBCs[CastSrcTy] = NewBC;
+ auto *Ext = Builder->CreateExtractElement(
+ NewBC, ConstantInt::get(Int32Ty, BegIdx), SVI.getName() + ".extract");
+ // The shufflevector isn't being replaced: the bitcast that used it
+ // is. InstCombine will visit the newly-created instructions.
+ replaceInstUsesWith(*BC, Ext);
+ MadeChange = true;
+ }
+ }
+
+ // If the LHS is a shufflevector itself, see if we can combine it with this
+ // one without producing an unusual shuffle.
+ // Cases that might be simplified:
+ // 1.
+ // x1=shuffle(v1,v2,mask1)
+ // x=shuffle(x1,undef,mask)
+ // ==>
+ // x=shuffle(v1,undef,newMask)
+ // newMask[i] = (mask[i] < x1.size()) ? mask1[mask[i]] : -1
+ // 2.
+ // x1=shuffle(v1,undef,mask1)
+ // x=shuffle(x1,x2,mask)
+ // where v1.size() == mask1.size()
+ // ==>
+ // x=shuffle(v1,x2,newMask)
+ // newMask[i] = (mask[i] < x1.size()) ? mask1[mask[i]] : mask[i]
+ // 3.
+ // x2=shuffle(v2,undef,mask2)
+ // x=shuffle(x1,x2,mask)
+ // where v2.size() == mask2.size()
+ // ==>
+ // x=shuffle(x1,v2,newMask)
+ // newMask[i] = (mask[i] < x1.size())
+ // ? mask[i] : mask2[mask[i]-x1.size()]+x1.size()
+ // 4.
+ // x1=shuffle(v1,undef,mask1)
+ // x2=shuffle(v2,undef,mask2)
+ // x=shuffle(x1,x2,mask)
+ // where v1.size() == v2.size()
+ // ==>
+ // x=shuffle(v1,v2,newMask)
+ // newMask[i] = (mask[i] < x1.size())
+ // ? mask1[mask[i]] : mask2[mask[i]-x1.size()]+v1.size()
+ //
+ // Here we are really conservative:
+ // we are absolutely afraid of producing a shuffle mask not in the input
+ // program, because the code gen may not be smart enough to turn a merged
+ // shuffle into two specific shuffles: it may produce worse code. As such,
+ // we only merge two shuffles if the result is either a splat or one of the
+ // input shuffle masks. In this case, merging the shuffles just removes
+ // one instruction, which we know is safe. This is good for things like
+ // turning: (splat(splat)) -> splat, or
+ // merge(V[0..n], V[n+1..2n]) -> V[0..2n]
+ ShuffleVectorInst* LHSShuffle = dyn_cast<ShuffleVectorInst>(LHS);
+ ShuffleVectorInst* RHSShuffle = dyn_cast<ShuffleVectorInst>(RHS);
+ if (LHSShuffle)
+ if (!isa<UndefValue>(LHSShuffle->getOperand(1)) && !isa<UndefValue>(RHS))
+ LHSShuffle = nullptr;
+ if (RHSShuffle)
+ if (!isa<UndefValue>(RHSShuffle->getOperand(1)))
+ RHSShuffle = nullptr;
+ if (!LHSShuffle && !RHSShuffle)
+ return MadeChange ? &SVI : nullptr;
+
+ Value* LHSOp0 = nullptr;
+ Value* LHSOp1 = nullptr;
+ Value* RHSOp0 = nullptr;
+ unsigned LHSOp0Width = 0;
+ unsigned RHSOp0Width = 0;
+ if (LHSShuffle) {
+ LHSOp0 = LHSShuffle->getOperand(0);
+ LHSOp1 = LHSShuffle->getOperand(1);
+ LHSOp0Width = cast<VectorType>(LHSOp0->getType())->getNumElements();
+ }
+ if (RHSShuffle) {
+ RHSOp0 = RHSShuffle->getOperand(0);
+ RHSOp0Width = cast<VectorType>(RHSOp0->getType())->getNumElements();
+ }
+ Value* newLHS = LHS;
+ Value* newRHS = RHS;
+ if (LHSShuffle) {
+ // case 1
+ if (isa<UndefValue>(RHS)) {
+ newLHS = LHSOp0;
+ newRHS = LHSOp1;
+ }
+ // case 2 or 4
+ else if (LHSOp0Width == LHSWidth) {
+ newLHS = LHSOp0;
+ }
+ }
+ // case 3 or 4
+ if (RHSShuffle && RHSOp0Width == LHSWidth) {
+ newRHS = RHSOp0;
+ }
+ // case 4
+ if (LHSOp0 == RHSOp0) {
+ newLHS = LHSOp0;
+ newRHS = nullptr;
+ }
+
+ if (newLHS == LHS && newRHS == RHS)
+ return MadeChange ? &SVI : nullptr;
+
+ SmallVector<int, 16> LHSMask;
+ SmallVector<int, 16> RHSMask;
+ if (newLHS != LHS)
+ LHSMask = LHSShuffle->getShuffleMask();
+ if (RHSShuffle && newRHS != RHS)
+ RHSMask = RHSShuffle->getShuffleMask();
+
+ unsigned newLHSWidth = (newLHS != LHS) ? LHSOp0Width : LHSWidth;
+ SmallVector<int, 16> newMask;
+ bool isSplat = true;
+ int SplatElt = -1;
+ // Create a new mask for the new ShuffleVectorInst so that the new
+ // ShuffleVectorInst is equivalent to the original one.
+ for (unsigned i = 0; i < VWidth; ++i) {
+ int eltMask;
+ if (Mask[i] < 0) {
+ // This element is an undef value.
+ eltMask = -1;
+ } else if (Mask[i] < (int)LHSWidth) {
+ // This element is from left hand side vector operand.
+ //
+ // If LHS is going to be replaced (case 1, 2, or 4), calculate the
+ // new mask value for the element.
+ if (newLHS != LHS) {
+ eltMask = LHSMask[Mask[i]];
+ // If the value selected is an undef value, explicitly specify it
+ // with a -1 mask value.
+ if (eltMask >= (int)LHSOp0Width && isa<UndefValue>(LHSOp1))
+ eltMask = -1;
+ } else
+ eltMask = Mask[i];
+ } else {
+ // This element is from right hand side vector operand
+ //
+ // If the value selected is an undef value, explicitly specify it
+ // with a -1 mask value. (case 1)
+ if (isa<UndefValue>(RHS))
+ eltMask = -1;
+ // If RHS is going to be replaced (case 3 or 4), calculate the
+ // new mask value for the element.
+ else if (newRHS != RHS) {
+ eltMask = RHSMask[Mask[i]-LHSWidth];
+ // If the value selected is an undef value, explicitly specify it
+ // with a -1 mask value.
+ if (eltMask >= (int)RHSOp0Width) {
+ assert(isa<UndefValue>(RHSShuffle->getOperand(1))
+ && "should have been check above");
+ eltMask = -1;
+ }
+ } else
+ eltMask = Mask[i]-LHSWidth;
+
+ // If LHS's width is changed, shift the mask value accordingly.
+ // If newRHS == NULL, i.e. LHSOp0 == RHSOp0, we want to remap any
+ // references from RHSOp0 to LHSOp0, so we don't need to shift the mask.
+ // If newRHS == newLHS, we want to remap any references from newRHS to
+ // newLHS so that we can properly identify splats that may occur due to
+ // obfuscation across the two vectors.
+ if (eltMask >= 0 && newRHS != nullptr && newLHS != newRHS)
+ eltMask += newLHSWidth;
+ }
+
+ // Check if this could still be a splat.
+ if (eltMask >= 0) {
+ if (SplatElt >= 0 && SplatElt != eltMask)
+ isSplat = false;
+ SplatElt = eltMask;
+ }
+
+ newMask.push_back(eltMask);
+ }
+
+ // If the result mask is equal to one of the original shuffle masks,
+ // or is a splat, do the replacement.
+ if (isSplat || newMask == LHSMask || newMask == RHSMask || newMask == Mask) {
+ SmallVector<Constant*, 16> Elts;
+ for (unsigned i = 0, e = newMask.size(); i != e; ++i) {
+ if (newMask[i] < 0) {
+ Elts.push_back(UndefValue::get(Int32Ty));
+ } else {
+ Elts.push_back(ConstantInt::get(Int32Ty, newMask[i]));
+ }
+ }
+ if (!newRHS)
+ newRHS = UndefValue::get(newLHS->getType());
+ return new ShuffleVectorInst(newLHS, newRHS, ConstantVector::get(Elts));
+ }
+
+ // If the result mask is an identity, replace uses of this instruction with
+ // corresponding argument.
+ bool isLHSID, isRHSID;
+ recognizeIdentityMask(newMask, isLHSID, isRHSID);
+ if (isLHSID && VWidth == LHSOp0Width) return replaceInstUsesWith(SVI, newLHS);
+ if (isRHSID && VWidth == RHSOp0Width) return replaceInstUsesWith(SVI, newRHS);
+
+ return MadeChange ? &SVI : nullptr;
+}
diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
new file mode 100644
index 000000000000..377ccb9c37f7
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
@@ -0,0 +1,3223 @@
+//===- InstructionCombining.cpp - Combine multiple instructions -----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// InstructionCombining - Combine instructions to form fewer, simple
+// instructions. This pass does not modify the CFG. This pass is where
+// algebraic simplification happens.
+//
+// This pass combines things like:
+// %Y = add i32 %X, 1
+// %Z = add i32 %Y, 1
+// into:
+// %Z = add i32 %X, 2
+//
+// This is a simple worklist driven algorithm.
+//
+// This pass guarantees that the following canonicalizations are performed on
+// the program:
+// 1. If a binary operator has a constant operand, it is moved to the RHS
+// 2. Bitwise operators with constant operands are always grouped so that
+// shifts are performed first, then or's, then and's, then xor's.
+// 3. Compare instructions are converted from <,>,<=,>= to ==,!= if possible
+// 4. All cmp instructions on boolean values are replaced with logical ops
+// 5. add X, X is represented as (X*2) => (X << 1)
+// 6. Multiplies with a power-of-two constant argument are transformed into
+// shifts.
+// ... etc.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/InstCombine/InstCombine.h"
+#include "InstCombineInternal.h"
+#include "llvm-c/Initialization.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/StringSwitch.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/AssumptionCache.h"
+#include "llvm/Analysis/BasicAliasAnalysis.h"
+#include "llvm/Analysis/CFG.h"
+#include "llvm/Analysis/ConstantFolding.h"
+#include "llvm/Analysis/EHPersonalities.h"
+#include "llvm/Analysis/GlobalsModRef.h"
+#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/MemoryBuiltins.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/IR/CFG.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/GetElementPtrTypeIterator.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/PatternMatch.h"
+#include "llvm/IR/ValueHandle.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include <algorithm>
+#include <climits>
+using namespace llvm;
+using namespace llvm::PatternMatch;
+
+#define DEBUG_TYPE "instcombine"
+
+STATISTIC(NumCombined , "Number of insts combined");
+STATISTIC(NumConstProp, "Number of constant folds");
+STATISTIC(NumDeadInst , "Number of dead inst eliminated");
+STATISTIC(NumSunkInst , "Number of instructions sunk");
+STATISTIC(NumExpand, "Number of expansions");
+STATISTIC(NumFactor , "Number of factorizations");
+STATISTIC(NumReassoc , "Number of reassociations");
+
+static cl::opt<bool>
+EnableExpensiveCombines("expensive-combines",
+ cl::desc("Enable expensive instruction combines"));
+
+Value *InstCombiner::EmitGEPOffset(User *GEP) {
+ return llvm::EmitGEPOffset(Builder, DL, GEP);
+}
+
+/// Return true if it is desirable to convert an integer computation from a
+/// given bit width to a new bit width.
+/// We don't want to convert from a legal to an illegal type for example or from
+/// a smaller to a larger illegal type.
+bool InstCombiner::ShouldChangeType(unsigned FromWidth,
+ unsigned ToWidth) const {
+ bool FromLegal = DL.isLegalInteger(FromWidth);
+ bool ToLegal = DL.isLegalInteger(ToWidth);
+
+ // If this is a legal integer from type, and the result would be an illegal
+ // type, don't do the transformation.
+ if (FromLegal && !ToLegal)
+ return false;
+
+ // Otherwise, if both are illegal, do not increase the size of the result. We
+ // do allow things like i160 -> i64, but not i64 -> i160.
+ if (!FromLegal && !ToLegal && ToWidth > FromWidth)
+ return false;
+
+ return true;
+}
+
+/// Return true if it is desirable to convert a computation from 'From' to 'To'.
+/// We don't want to convert from a legal to an illegal type for example or from
+/// a smaller to a larger illegal type.
+bool InstCombiner::ShouldChangeType(Type *From, Type *To) const {
+ assert(From->isIntegerTy() && To->isIntegerTy());
+
+ unsigned FromWidth = From->getPrimitiveSizeInBits();
+ unsigned ToWidth = To->getPrimitiveSizeInBits();
+ return ShouldChangeType(FromWidth, ToWidth);
+}
+
+// Return true, if No Signed Wrap should be maintained for I.
+// The No Signed Wrap flag can be kept if the operation "B (I.getOpcode) C",
+// where both B and C should be ConstantInts, results in a constant that does
+// not overflow. This function only handles the Add and Sub opcodes. For
+// all other opcodes, the function conservatively returns false.
+static bool MaintainNoSignedWrap(BinaryOperator &I, Value *B, Value *C) {
+ OverflowingBinaryOperator *OBO = dyn_cast<OverflowingBinaryOperator>(&I);
+ if (!OBO || !OBO->hasNoSignedWrap())
+ return false;
+
+ // We reason about Add and Sub Only.
+ Instruction::BinaryOps Opcode = I.getOpcode();
+ if (Opcode != Instruction::Add && Opcode != Instruction::Sub)
+ return false;
+
+ const APInt *BVal, *CVal;
+ if (!match(B, m_APInt(BVal)) || !match(C, m_APInt(CVal)))
+ return false;
+
+ bool Overflow = false;
+ if (Opcode == Instruction::Add)
+ BVal->sadd_ov(*CVal, Overflow);
+ else
+ BVal->ssub_ov(*CVal, Overflow);
+
+ return !Overflow;
+}
+
+/// Conservatively clears subclassOptionalData after a reassociation or
+/// commutation. We preserve fast-math flags when applicable as they can be
+/// preserved.
+static void ClearSubclassDataAfterReassociation(BinaryOperator &I) {
+ FPMathOperator *FPMO = dyn_cast<FPMathOperator>(&I);
+ if (!FPMO) {
+ I.clearSubclassOptionalData();
+ return;
+ }
+
+ FastMathFlags FMF = I.getFastMathFlags();
+ I.clearSubclassOptionalData();
+ I.setFastMathFlags(FMF);
+}
+
+/// Combine constant operands of associative operations either before or after a
+/// cast to eliminate one of the associative operations:
+/// (op (cast (op X, C2)), C1) --> (cast (op X, op (C1, C2)))
+/// (op (cast (op X, C2)), C1) --> (op (cast X), op (C1, C2))
+static bool simplifyAssocCastAssoc(BinaryOperator *BinOp1) {
+ auto *Cast = dyn_cast<CastInst>(BinOp1->getOperand(0));
+ if (!Cast || !Cast->hasOneUse())
+ return false;
+
+ // TODO: Enhance logic for other casts and remove this check.
+ auto CastOpcode = Cast->getOpcode();
+ if (CastOpcode != Instruction::ZExt)
+ return false;
+
+ // TODO: Enhance logic for other BinOps and remove this check.
+ auto AssocOpcode = BinOp1->getOpcode();
+ if (AssocOpcode != Instruction::Xor && AssocOpcode != Instruction::And &&
+ AssocOpcode != Instruction::Or)
+ return false;
+
+ auto *BinOp2 = dyn_cast<BinaryOperator>(Cast->getOperand(0));
+ if (!BinOp2 || !BinOp2->hasOneUse() || BinOp2->getOpcode() != AssocOpcode)
+ return false;
+
+ Constant *C1, *C2;
+ if (!match(BinOp1->getOperand(1), m_Constant(C1)) ||
+ !match(BinOp2->getOperand(1), m_Constant(C2)))
+ return false;
+
+ // TODO: This assumes a zext cast.
+ // Eg, if it was a trunc, we'd cast C1 to the source type because casting C2
+ // to the destination type might lose bits.
+
+ // Fold the constants together in the destination type:
+ // (op (cast (op X, C2)), C1) --> (op (cast X), FoldedC)
+ Type *DestTy = C1->getType();
+ Constant *CastC2 = ConstantExpr::getCast(CastOpcode, C2, DestTy);
+ Constant *FoldedC = ConstantExpr::get(AssocOpcode, C1, CastC2);
+ Cast->setOperand(0, BinOp2->getOperand(0));
+ BinOp1->setOperand(1, FoldedC);
+ return true;
+}
+
+/// This performs a few simplifications for operators that are associative or
+/// commutative:
+///
+/// Commutative operators:
+///
+/// 1. Order operands such that they are listed from right (least complex) to
+/// left (most complex). This puts constants before unary operators before
+/// binary operators.
+///
+/// Associative operators:
+///
+/// 2. Transform: "(A op B) op C" ==> "A op (B op C)" if "B op C" simplifies.
+/// 3. Transform: "A op (B op C)" ==> "(A op B) op C" if "A op B" simplifies.
+///
+/// Associative and commutative operators:
+///
+/// 4. Transform: "(A op B) op C" ==> "(C op A) op B" if "C op A" simplifies.
+/// 5. Transform: "A op (B op C)" ==> "B op (C op A)" if "C op A" simplifies.
+/// 6. Transform: "(A op C1) op (B op C2)" ==> "(A op B) op (C1 op C2)"
+/// if C1 and C2 are constants.
+bool InstCombiner::SimplifyAssociativeOrCommutative(BinaryOperator &I) {
+ Instruction::BinaryOps Opcode = I.getOpcode();
+ bool Changed = false;
+
+ do {
+ // Order operands such that they are listed from right (least complex) to
+ // left (most complex). This puts constants before unary operators before
+ // binary operators.
+ if (I.isCommutative() && getComplexity(I.getOperand(0)) <
+ getComplexity(I.getOperand(1)))
+ Changed = !I.swapOperands();
+
+ BinaryOperator *Op0 = dyn_cast<BinaryOperator>(I.getOperand(0));
+ BinaryOperator *Op1 = dyn_cast<BinaryOperator>(I.getOperand(1));
+
+ if (I.isAssociative()) {
+ // Transform: "(A op B) op C" ==> "A op (B op C)" if "B op C" simplifies.
+ if (Op0 && Op0->getOpcode() == Opcode) {
+ Value *A = Op0->getOperand(0);
+ Value *B = Op0->getOperand(1);
+ Value *C = I.getOperand(1);
+
+ // Does "B op C" simplify?
+ if (Value *V = SimplifyBinOp(Opcode, B, C, DL)) {
+ // It simplifies to V. Form "A op V".
+ I.setOperand(0, A);
+ I.setOperand(1, V);
+ // Conservatively clear the optional flags, since they may not be
+ // preserved by the reassociation.
+ if (MaintainNoSignedWrap(I, B, C) &&
+ (!Op0 || (isa<BinaryOperator>(Op0) && Op0->hasNoSignedWrap()))) {
+ // Note: this is only valid because SimplifyBinOp doesn't look at
+ // the operands to Op0.
+ I.clearSubclassOptionalData();
+ I.setHasNoSignedWrap(true);
+ } else {
+ ClearSubclassDataAfterReassociation(I);
+ }
+
+ Changed = true;
+ ++NumReassoc;
+ continue;
+ }
+ }
+
+ // Transform: "A op (B op C)" ==> "(A op B) op C" if "A op B" simplifies.
+ if (Op1 && Op1->getOpcode() == Opcode) {
+ Value *A = I.getOperand(0);
+ Value *B = Op1->getOperand(0);
+ Value *C = Op1->getOperand(1);
+
+ // Does "A op B" simplify?
+ if (Value *V = SimplifyBinOp(Opcode, A, B, DL)) {
+ // It simplifies to V. Form "V op C".
+ I.setOperand(0, V);
+ I.setOperand(1, C);
+ // Conservatively clear the optional flags, since they may not be
+ // preserved by the reassociation.
+ ClearSubclassDataAfterReassociation(I);
+ Changed = true;
+ ++NumReassoc;
+ continue;
+ }
+ }
+ }
+
+ if (I.isAssociative() && I.isCommutative()) {
+ if (simplifyAssocCastAssoc(&I)) {
+ Changed = true;
+ ++NumReassoc;
+ continue;
+ }
+
+ // Transform: "(A op B) op C" ==> "(C op A) op B" if "C op A" simplifies.
+ if (Op0 && Op0->getOpcode() == Opcode) {
+ Value *A = Op0->getOperand(0);
+ Value *B = Op0->getOperand(1);
+ Value *C = I.getOperand(1);
+
+ // Does "C op A" simplify?
+ if (Value *V = SimplifyBinOp(Opcode, C, A, DL)) {
+ // It simplifies to V. Form "V op B".
+ I.setOperand(0, V);
+ I.setOperand(1, B);
+ // Conservatively clear the optional flags, since they may not be
+ // preserved by the reassociation.
+ ClearSubclassDataAfterReassociation(I);
+ Changed = true;
+ ++NumReassoc;
+ continue;
+ }
+ }
+
+ // Transform: "A op (B op C)" ==> "B op (C op A)" if "C op A" simplifies.
+ if (Op1 && Op1->getOpcode() == Opcode) {
+ Value *A = I.getOperand(0);
+ Value *B = Op1->getOperand(0);
+ Value *C = Op1->getOperand(1);
+
+ // Does "C op A" simplify?
+ if (Value *V = SimplifyBinOp(Opcode, C, A, DL)) {
+ // It simplifies to V. Form "B op V".
+ I.setOperand(0, B);
+ I.setOperand(1, V);
+ // Conservatively clear the optional flags, since they may not be
+ // preserved by the reassociation.
+ ClearSubclassDataAfterReassociation(I);
+ Changed = true;
+ ++NumReassoc;
+ continue;
+ }
+ }
+
+ // Transform: "(A op C1) op (B op C2)" ==> "(A op B) op (C1 op C2)"
+ // if C1 and C2 are constants.
+ if (Op0 && Op1 &&
+ Op0->getOpcode() == Opcode && Op1->getOpcode() == Opcode &&
+ isa<Constant>(Op0->getOperand(1)) &&
+ isa<Constant>(Op1->getOperand(1)) &&
+ Op0->hasOneUse() && Op1->hasOneUse()) {
+ Value *A = Op0->getOperand(0);
+ Constant *C1 = cast<Constant>(Op0->getOperand(1));
+ Value *B = Op1->getOperand(0);
+ Constant *C2 = cast<Constant>(Op1->getOperand(1));
+
+ Constant *Folded = ConstantExpr::get(Opcode, C1, C2);
+ BinaryOperator *New = BinaryOperator::Create(Opcode, A, B);
+ if (isa<FPMathOperator>(New)) {
+ FastMathFlags Flags = I.getFastMathFlags();
+ Flags &= Op0->getFastMathFlags();
+ Flags &= Op1->getFastMathFlags();
+ New->setFastMathFlags(Flags);
+ }
+ InsertNewInstWith(New, I);
+ New->takeName(Op1);
+ I.setOperand(0, New);
+ I.setOperand(1, Folded);
+ // Conservatively clear the optional flags, since they may not be
+ // preserved by the reassociation.
+ ClearSubclassDataAfterReassociation(I);
+
+ Changed = true;
+ continue;
+ }
+ }
+
+ // No further simplifications.
+ return Changed;
+ } while (1);
+}
+
+/// Return whether "X LOp (Y ROp Z)" is always equal to
+/// "(X LOp Y) ROp (X LOp Z)".
+static bool LeftDistributesOverRight(Instruction::BinaryOps LOp,
+ Instruction::BinaryOps ROp) {
+ switch (LOp) {
+ default:
+ return false;
+
+ case Instruction::And:
+ // And distributes over Or and Xor.
+ switch (ROp) {
+ default:
+ return false;
+ case Instruction::Or:
+ case Instruction::Xor:
+ return true;
+ }
+
+ case Instruction::Mul:
+ // Multiplication distributes over addition and subtraction.
+ switch (ROp) {
+ default:
+ return false;
+ case Instruction::Add:
+ case Instruction::Sub:
+ return true;
+ }
+
+ case Instruction::Or:
+ // Or distributes over And.
+ switch (ROp) {
+ default:
+ return false;
+ case Instruction::And:
+ return true;
+ }
+ }
+}
+
+/// Return whether "(X LOp Y) ROp Z" is always equal to
+/// "(X ROp Z) LOp (Y ROp Z)".
+static bool RightDistributesOverLeft(Instruction::BinaryOps LOp,
+ Instruction::BinaryOps ROp) {
+ if (Instruction::isCommutative(ROp))
+ return LeftDistributesOverRight(ROp, LOp);
+
+ switch (LOp) {
+ default:
+ return false;
+ // (X >> Z) & (Y >> Z) -> (X&Y) >> Z for all shifts.
+ // (X >> Z) | (Y >> Z) -> (X|Y) >> Z for all shifts.
+ // (X >> Z) ^ (Y >> Z) -> (X^Y) >> Z for all shifts.
+ case Instruction::And:
+ case Instruction::Or:
+ case Instruction::Xor:
+ switch (ROp) {
+ default:
+ return false;
+ case Instruction::Shl:
+ case Instruction::LShr:
+ case Instruction::AShr:
+ return true;
+ }
+ }
+ // TODO: It would be nice to handle division, aka "(X + Y)/Z = X/Z + Y/Z",
+ // but this requires knowing that the addition does not overflow and other
+ // such subtleties.
+ return false;
+}
+
+/// This function returns identity value for given opcode, which can be used to
+/// factor patterns like (X * 2) + X ==> (X * 2) + (X * 1) ==> X * (2 + 1).
+static Value *getIdentityValue(Instruction::BinaryOps OpCode, Value *V) {
+ if (isa<Constant>(V))
+ return nullptr;
+
+ if (OpCode == Instruction::Mul)
+ return ConstantInt::get(V->getType(), 1);
+
+ // TODO: We can handle other cases e.g. Instruction::And, Instruction::Or etc.
+
+ return nullptr;
+}
+
+/// This function factors binary ops which can be combined using distributive
+/// laws. This function tries to transform 'Op' based TopLevelOpcode to enable
+/// factorization e.g for ADD(SHL(X , 2), MUL(X, 5)), When this function called
+/// with TopLevelOpcode == Instruction::Add and Op = SHL(X, 2), transforms
+/// SHL(X, 2) to MUL(X, 4) i.e. returns Instruction::Mul with LHS set to 'X' and
+/// RHS to 4.
+static Instruction::BinaryOps
+getBinOpsForFactorization(Instruction::BinaryOps TopLevelOpcode,
+ BinaryOperator *Op, Value *&LHS, Value *&RHS) {
+ if (!Op)
+ return Instruction::BinaryOpsEnd;
+
+ LHS = Op->getOperand(0);
+ RHS = Op->getOperand(1);
+
+ switch (TopLevelOpcode) {
+ default:
+ return Op->getOpcode();
+
+ case Instruction::Add:
+ case Instruction::Sub:
+ if (Op->getOpcode() == Instruction::Shl) {
+ if (Constant *CST = dyn_cast<Constant>(Op->getOperand(1))) {
+ // The multiplier is really 1 << CST.
+ RHS = ConstantExpr::getShl(ConstantInt::get(Op->getType(), 1), CST);
+ return Instruction::Mul;
+ }
+ }
+ return Op->getOpcode();
+ }
+
+ // TODO: We can add other conversions e.g. shr => div etc.
+}
+
+/// This tries to simplify binary operations by factorizing out common terms
+/// (e. g. "(A*B)+(A*C)" -> "A*(B+C)").
+static Value *tryFactorization(InstCombiner::BuilderTy *Builder,
+ const DataLayout &DL, BinaryOperator &I,
+ Instruction::BinaryOps InnerOpcode, Value *A,
+ Value *B, Value *C, Value *D) {
+
+ // If any of A, B, C, D are null, we can not factor I, return early.
+ // Checking A and C should be enough.
+ if (!A || !C || !B || !D)
+ return nullptr;
+
+ Value *V = nullptr;
+ Value *SimplifiedInst = nullptr;
+ Value *LHS = I.getOperand(0), *RHS = I.getOperand(1);
+ Instruction::BinaryOps TopLevelOpcode = I.getOpcode();
+
+ // Does "X op' Y" always equal "Y op' X"?
+ bool InnerCommutative = Instruction::isCommutative(InnerOpcode);
+
+ // Does "X op' (Y op Z)" always equal "(X op' Y) op (X op' Z)"?
+ if (LeftDistributesOverRight(InnerOpcode, TopLevelOpcode))
+ // Does the instruction have the form "(A op' B) op (A op' D)" or, in the
+ // commutative case, "(A op' B) op (C op' A)"?
+ if (A == C || (InnerCommutative && A == D)) {
+ if (A != C)
+ std::swap(C, D);
+ // Consider forming "A op' (B op D)".
+ // If "B op D" simplifies then it can be formed with no cost.
+ V = SimplifyBinOp(TopLevelOpcode, B, D, DL);
+ // If "B op D" doesn't simplify then only go on if both of the existing
+ // operations "A op' B" and "C op' D" will be zapped as no longer used.
+ if (!V && LHS->hasOneUse() && RHS->hasOneUse())
+ V = Builder->CreateBinOp(TopLevelOpcode, B, D, RHS->getName());
+ if (V) {
+ SimplifiedInst = Builder->CreateBinOp(InnerOpcode, A, V);
+ }
+ }
+
+ // Does "(X op Y) op' Z" always equal "(X op' Z) op (Y op' Z)"?
+ if (!SimplifiedInst && RightDistributesOverLeft(TopLevelOpcode, InnerOpcode))
+ // Does the instruction have the form "(A op' B) op (C op' B)" or, in the
+ // commutative case, "(A op' B) op (B op' D)"?
+ if (B == D || (InnerCommutative && B == C)) {
+ if (B != D)
+ std::swap(C, D);
+ // Consider forming "(A op C) op' B".
+ // If "A op C" simplifies then it can be formed with no cost.
+ V = SimplifyBinOp(TopLevelOpcode, A, C, DL);
+
+ // If "A op C" doesn't simplify then only go on if both of the existing
+ // operations "A op' B" and "C op' D" will be zapped as no longer used.
+ if (!V && LHS->hasOneUse() && RHS->hasOneUse())
+ V = Builder->CreateBinOp(TopLevelOpcode, A, C, LHS->getName());
+ if (V) {
+ SimplifiedInst = Builder->CreateBinOp(InnerOpcode, V, B);
+ }
+ }
+
+ if (SimplifiedInst) {
+ ++NumFactor;
+ SimplifiedInst->takeName(&I);
+
+ // Check if we can add NSW flag to SimplifiedInst. If so, set NSW flag.
+ // TODO: Check for NUW.
+ if (BinaryOperator *BO = dyn_cast<BinaryOperator>(SimplifiedInst)) {
+ if (isa<OverflowingBinaryOperator>(SimplifiedInst)) {
+ bool HasNSW = false;
+ if (isa<OverflowingBinaryOperator>(&I))
+ HasNSW = I.hasNoSignedWrap();
+
+ if (BinaryOperator *Op0 = dyn_cast<BinaryOperator>(LHS))
+ if (isa<OverflowingBinaryOperator>(Op0))
+ HasNSW &= Op0->hasNoSignedWrap();
+
+ if (BinaryOperator *Op1 = dyn_cast<BinaryOperator>(RHS))
+ if (isa<OverflowingBinaryOperator>(Op1))
+ HasNSW &= Op1->hasNoSignedWrap();
+
+ // We can propagate 'nsw' if we know that
+ // %Y = mul nsw i16 %X, C
+ // %Z = add nsw i16 %Y, %X
+ // =>
+ // %Z = mul nsw i16 %X, C+1
+ //
+ // iff C+1 isn't INT_MIN
+ const APInt *CInt;
+ if (TopLevelOpcode == Instruction::Add &&
+ InnerOpcode == Instruction::Mul)
+ if (match(V, m_APInt(CInt)) && !CInt->isMinSignedValue())
+ BO->setHasNoSignedWrap(HasNSW);
+ }
+ }
+ }
+ return SimplifiedInst;
+}
+
+/// This tries to simplify binary operations which some other binary operation
+/// distributes over either by factorizing out common terms
+/// (eg "(A*B)+(A*C)" -> "A*(B+C)") or expanding out if this results in
+/// simplifications (eg: "A & (B | C) -> (A&B) | (A&C)" if this is a win).
+/// Returns the simplified value, or null if it didn't simplify.
+Value *InstCombiner::SimplifyUsingDistributiveLaws(BinaryOperator &I) {
+ Value *LHS = I.getOperand(0), *RHS = I.getOperand(1);
+ BinaryOperator *Op0 = dyn_cast<BinaryOperator>(LHS);
+ BinaryOperator *Op1 = dyn_cast<BinaryOperator>(RHS);
+
+ // Factorization.
+ Value *A = nullptr, *B = nullptr, *C = nullptr, *D = nullptr;
+ auto TopLevelOpcode = I.getOpcode();
+ auto LHSOpcode = getBinOpsForFactorization(TopLevelOpcode, Op0, A, B);
+ auto RHSOpcode = getBinOpsForFactorization(TopLevelOpcode, Op1, C, D);
+
+ // The instruction has the form "(A op' B) op (C op' D)". Try to factorize
+ // a common term.
+ if (LHSOpcode == RHSOpcode) {
+ if (Value *V = tryFactorization(Builder, DL, I, LHSOpcode, A, B, C, D))
+ return V;
+ }
+
+ // The instruction has the form "(A op' B) op (C)". Try to factorize common
+ // term.
+ if (Value *V = tryFactorization(Builder, DL, I, LHSOpcode, A, B, RHS,
+ getIdentityValue(LHSOpcode, RHS)))
+ return V;
+
+ // The instruction has the form "(B) op (C op' D)". Try to factorize common
+ // term.
+ if (Value *V = tryFactorization(Builder, DL, I, RHSOpcode, LHS,
+ getIdentityValue(RHSOpcode, LHS), C, D))
+ return V;
+
+ // Expansion.
+ if (Op0 && RightDistributesOverLeft(Op0->getOpcode(), TopLevelOpcode)) {
+ // The instruction has the form "(A op' B) op C". See if expanding it out
+ // to "(A op C) op' (B op C)" results in simplifications.
+ Value *A = Op0->getOperand(0), *B = Op0->getOperand(1), *C = RHS;
+ Instruction::BinaryOps InnerOpcode = Op0->getOpcode(); // op'
+
+ // Do "A op C" and "B op C" both simplify?
+ if (Value *L = SimplifyBinOp(TopLevelOpcode, A, C, DL))
+ if (Value *R = SimplifyBinOp(TopLevelOpcode, B, C, DL)) {
+ // They do! Return "L op' R".
+ ++NumExpand;
+ // If "L op' R" equals "A op' B" then "L op' R" is just the LHS.
+ if ((L == A && R == B) ||
+ (Instruction::isCommutative(InnerOpcode) && L == B && R == A))
+ return Op0;
+ // Otherwise return "L op' R" if it simplifies.
+ if (Value *V = SimplifyBinOp(InnerOpcode, L, R, DL))
+ return V;
+ // Otherwise, create a new instruction.
+ C = Builder->CreateBinOp(InnerOpcode, L, R);
+ C->takeName(&I);
+ return C;
+ }
+ }
+
+ if (Op1 && LeftDistributesOverRight(TopLevelOpcode, Op1->getOpcode())) {
+ // The instruction has the form "A op (B op' C)". See if expanding it out
+ // to "(A op B) op' (A op C)" results in simplifications.
+ Value *A = LHS, *B = Op1->getOperand(0), *C = Op1->getOperand(1);
+ Instruction::BinaryOps InnerOpcode = Op1->getOpcode(); // op'
+
+ // Do "A op B" and "A op C" both simplify?
+ if (Value *L = SimplifyBinOp(TopLevelOpcode, A, B, DL))
+ if (Value *R = SimplifyBinOp(TopLevelOpcode, A, C, DL)) {
+ // They do! Return "L op' R".
+ ++NumExpand;
+ // If "L op' R" equals "B op' C" then "L op' R" is just the RHS.
+ if ((L == B && R == C) ||
+ (Instruction::isCommutative(InnerOpcode) && L == C && R == B))
+ return Op1;
+ // Otherwise return "L op' R" if it simplifies.
+ if (Value *V = SimplifyBinOp(InnerOpcode, L, R, DL))
+ return V;
+ // Otherwise, create a new instruction.
+ A = Builder->CreateBinOp(InnerOpcode, L, R);
+ A->takeName(&I);
+ return A;
+ }
+ }
+
+ // (op (select (a, c, b)), (select (a, d, b))) -> (select (a, (op c, d), 0))
+ // (op (select (a, b, c)), (select (a, b, d))) -> (select (a, 0, (op c, d)))
+ if (auto *SI0 = dyn_cast<SelectInst>(LHS)) {
+ if (auto *SI1 = dyn_cast<SelectInst>(RHS)) {
+ if (SI0->getCondition() == SI1->getCondition()) {
+ Value *SI = nullptr;
+ if (Value *V = SimplifyBinOp(TopLevelOpcode, SI0->getFalseValue(),
+ SI1->getFalseValue(), DL, TLI, DT, AC))
+ SI = Builder->CreateSelect(SI0->getCondition(),
+ Builder->CreateBinOp(TopLevelOpcode,
+ SI0->getTrueValue(),
+ SI1->getTrueValue()),
+ V);
+ if (Value *V = SimplifyBinOp(TopLevelOpcode, SI0->getTrueValue(),
+ SI1->getTrueValue(), DL, TLI, DT, AC))
+ SI = Builder->CreateSelect(
+ SI0->getCondition(), V,
+ Builder->CreateBinOp(TopLevelOpcode, SI0->getFalseValue(),
+ SI1->getFalseValue()));
+ if (SI) {
+ SI->takeName(&I);
+ return SI;
+ }
+ }
+ }
+ }
+
+ return nullptr;
+}
+
+/// Given a 'sub' instruction, return the RHS of the instruction if the LHS is a
+/// constant zero (which is the 'negate' form).
+Value *InstCombiner::dyn_castNegVal(Value *V) const {
+ if (BinaryOperator::isNeg(V))
+ return BinaryOperator::getNegArgument(V);
+
+ // Constants can be considered to be negated values if they can be folded.
+ if (ConstantInt *C = dyn_cast<ConstantInt>(V))
+ return ConstantExpr::getNeg(C);
+
+ if (ConstantDataVector *C = dyn_cast<ConstantDataVector>(V))
+ if (C->getType()->getElementType()->isIntegerTy())
+ return ConstantExpr::getNeg(C);
+
+ return nullptr;
+}
+
+/// Given a 'fsub' instruction, return the RHS of the instruction if the LHS is
+/// a constant negative zero (which is the 'negate' form).
+Value *InstCombiner::dyn_castFNegVal(Value *V, bool IgnoreZeroSign) const {
+ if (BinaryOperator::isFNeg(V, IgnoreZeroSign))
+ return BinaryOperator::getFNegArgument(V);
+
+ // Constants can be considered to be negated values if they can be folded.
+ if (ConstantFP *C = dyn_cast<ConstantFP>(V))
+ return ConstantExpr::getFNeg(C);
+
+ if (ConstantDataVector *C = dyn_cast<ConstantDataVector>(V))
+ if (C->getType()->getElementType()->isFloatingPointTy())
+ return ConstantExpr::getFNeg(C);
+
+ return nullptr;
+}
+
+static Value *FoldOperationIntoSelectOperand(Instruction &I, Value *SO,
+ InstCombiner *IC) {
+ if (CastInst *CI = dyn_cast<CastInst>(&I)) {
+ return IC->Builder->CreateCast(CI->getOpcode(), SO, I.getType());
+ }
+
+ // Figure out if the constant is the left or the right argument.
+ bool ConstIsRHS = isa<Constant>(I.getOperand(1));
+ Constant *ConstOperand = cast<Constant>(I.getOperand(ConstIsRHS));
+
+ if (Constant *SOC = dyn_cast<Constant>(SO)) {
+ if (ConstIsRHS)
+ return ConstantExpr::get(I.getOpcode(), SOC, ConstOperand);
+ return ConstantExpr::get(I.getOpcode(), ConstOperand, SOC);
+ }
+
+ Value *Op0 = SO, *Op1 = ConstOperand;
+ if (!ConstIsRHS)
+ std::swap(Op0, Op1);
+
+ if (BinaryOperator *BO = dyn_cast<BinaryOperator>(&I)) {
+ Value *RI = IC->Builder->CreateBinOp(BO->getOpcode(), Op0, Op1,
+ SO->getName()+".op");
+ Instruction *FPInst = dyn_cast<Instruction>(RI);
+ if (FPInst && isa<FPMathOperator>(FPInst))
+ FPInst->copyFastMathFlags(BO);
+ return RI;
+ }
+ if (ICmpInst *CI = dyn_cast<ICmpInst>(&I))
+ return IC->Builder->CreateICmp(CI->getPredicate(), Op0, Op1,
+ SO->getName()+".cmp");
+ if (FCmpInst *CI = dyn_cast<FCmpInst>(&I))
+ return IC->Builder->CreateICmp(CI->getPredicate(), Op0, Op1,
+ SO->getName()+".cmp");
+ llvm_unreachable("Unknown binary instruction type!");
+}
+
+/// Given an instruction with a select as one operand and a constant as the
+/// other operand, try to fold the binary operator into the select arguments.
+/// This also works for Cast instructions, which obviously do not have a second
+/// operand.
+Instruction *InstCombiner::FoldOpIntoSelect(Instruction &Op, SelectInst *SI) {
+ // Don't modify shared select instructions
+ if (!SI->hasOneUse()) return nullptr;
+ Value *TV = SI->getOperand(1);
+ Value *FV = SI->getOperand(2);
+
+ if (isa<Constant>(TV) || isa<Constant>(FV)) {
+ // Bool selects with constant operands can be folded to logical ops.
+ if (SI->getType()->isIntegerTy(1)) return nullptr;
+
+ // If it's a bitcast involving vectors, make sure it has the same number of
+ // elements on both sides.
+ if (BitCastInst *BC = dyn_cast<BitCastInst>(&Op)) {
+ VectorType *DestTy = dyn_cast<VectorType>(BC->getDestTy());
+ VectorType *SrcTy = dyn_cast<VectorType>(BC->getSrcTy());
+
+ // Verify that either both or neither are vectors.
+ if ((SrcTy == nullptr) != (DestTy == nullptr)) return nullptr;
+ // If vectors, verify that they have the same number of elements.
+ if (SrcTy && SrcTy->getNumElements() != DestTy->getNumElements())
+ return nullptr;
+ }
+
+ // Test if a CmpInst instruction is used exclusively by a select as
+ // part of a minimum or maximum operation. If so, refrain from doing
+ // any other folding. This helps out other analyses which understand
+ // non-obfuscated minimum and maximum idioms, such as ScalarEvolution
+ // and CodeGen. And in this case, at least one of the comparison
+ // operands has at least one user besides the compare (the select),
+ // which would often largely negate the benefit of folding anyway.
+ if (auto *CI = dyn_cast<CmpInst>(SI->getCondition())) {
+ if (CI->hasOneUse()) {
+ Value *Op0 = CI->getOperand(0), *Op1 = CI->getOperand(1);
+ if ((SI->getOperand(1) == Op0 && SI->getOperand(2) == Op1) ||
+ (SI->getOperand(2) == Op0 && SI->getOperand(1) == Op1))
+ return nullptr;
+ }
+ }
+
+ Value *SelectTrueVal = FoldOperationIntoSelectOperand(Op, TV, this);
+ Value *SelectFalseVal = FoldOperationIntoSelectOperand(Op, FV, this);
+
+ return SelectInst::Create(SI->getCondition(),
+ SelectTrueVal, SelectFalseVal);
+ }
+ return nullptr;
+}
+
+/// Given a binary operator, cast instruction, or select which has a PHI node as
+/// operand #0, see if we can fold the instruction into the PHI (which is only
+/// possible if all operands to the PHI are constants).
+Instruction *InstCombiner::FoldOpIntoPhi(Instruction &I) {
+ PHINode *PN = cast<PHINode>(I.getOperand(0));
+ unsigned NumPHIValues = PN->getNumIncomingValues();
+ if (NumPHIValues == 0)
+ return nullptr;
+
+ // We normally only transform phis with a single use. However, if a PHI has
+ // multiple uses and they are all the same operation, we can fold *all* of the
+ // uses into the PHI.
+ if (!PN->hasOneUse()) {
+ // Walk the use list for the instruction, comparing them to I.
+ for (User *U : PN->users()) {
+ Instruction *UI = cast<Instruction>(U);
+ if (UI != &I && !I.isIdenticalTo(UI))
+ return nullptr;
+ }
+ // Otherwise, we can replace *all* users with the new PHI we form.
+ }
+
+ // Check to see if all of the operands of the PHI are simple constants
+ // (constantint/constantfp/undef). If there is one non-constant value,
+ // remember the BB it is in. If there is more than one or if *it* is a PHI,
+ // bail out. We don't do arbitrary constant expressions here because moving
+ // their computation can be expensive without a cost model.
+ BasicBlock *NonConstBB = nullptr;
+ for (unsigned i = 0; i != NumPHIValues; ++i) {
+ Value *InVal = PN->getIncomingValue(i);
+ if (isa<Constant>(InVal) && !isa<ConstantExpr>(InVal))
+ continue;
+
+ if (isa<PHINode>(InVal)) return nullptr; // Itself a phi.
+ if (NonConstBB) return nullptr; // More than one non-const value.
+
+ NonConstBB = PN->getIncomingBlock(i);
+
+ // If the InVal is an invoke at the end of the pred block, then we can't
+ // insert a computation after it without breaking the edge.
+ if (InvokeInst *II = dyn_cast<InvokeInst>(InVal))
+ if (II->getParent() == NonConstBB)
+ return nullptr;
+
+ // If the incoming non-constant value is in I's block, we will remove one
+ // instruction, but insert another equivalent one, leading to infinite
+ // instcombine.
+ if (isPotentiallyReachable(I.getParent(), NonConstBB, DT, LI))
+ return nullptr;
+ }
+
+ // If there is exactly one non-constant value, we can insert a copy of the
+ // operation in that block. However, if this is a critical edge, we would be
+ // inserting the computation on some other paths (e.g. inside a loop). Only
+ // do this if the pred block is unconditionally branching into the phi block.
+ if (NonConstBB != nullptr) {
+ BranchInst *BI = dyn_cast<BranchInst>(NonConstBB->getTerminator());
+ if (!BI || !BI->isUnconditional()) return nullptr;
+ }
+
+ // Okay, we can do the transformation: create the new PHI node.
+ PHINode *NewPN = PHINode::Create(I.getType(), PN->getNumIncomingValues());
+ InsertNewInstBefore(NewPN, *PN);
+ NewPN->takeName(PN);
+
+ // If we are going to have to insert a new computation, do so right before the
+ // predecessor's terminator.
+ if (NonConstBB)
+ Builder->SetInsertPoint(NonConstBB->getTerminator());
+
+ // Next, add all of the operands to the PHI.
+ if (SelectInst *SI = dyn_cast<SelectInst>(&I)) {
+ // We only currently try to fold the condition of a select when it is a phi,
+ // not the true/false values.
+ Value *TrueV = SI->getTrueValue();
+ Value *FalseV = SI->getFalseValue();
+ BasicBlock *PhiTransBB = PN->getParent();
+ for (unsigned i = 0; i != NumPHIValues; ++i) {
+ BasicBlock *ThisBB = PN->getIncomingBlock(i);
+ Value *TrueVInPred = TrueV->DoPHITranslation(PhiTransBB, ThisBB);
+ Value *FalseVInPred = FalseV->DoPHITranslation(PhiTransBB, ThisBB);
+ Value *InV = nullptr;
+ // Beware of ConstantExpr: it may eventually evaluate to getNullValue,
+ // even if currently isNullValue gives false.
+ Constant *InC = dyn_cast<Constant>(PN->getIncomingValue(i));
+ if (InC && !isa<ConstantExpr>(InC))
+ InV = InC->isNullValue() ? FalseVInPred : TrueVInPred;
+ else
+ InV = Builder->CreateSelect(PN->getIncomingValue(i),
+ TrueVInPred, FalseVInPred, "phitmp");
+ NewPN->addIncoming(InV, ThisBB);
+ }
+ } else if (CmpInst *CI = dyn_cast<CmpInst>(&I)) {
+ Constant *C = cast<Constant>(I.getOperand(1));
+ for (unsigned i = 0; i != NumPHIValues; ++i) {
+ Value *InV = nullptr;
+ if (Constant *InC = dyn_cast<Constant>(PN->getIncomingValue(i)))
+ InV = ConstantExpr::getCompare(CI->getPredicate(), InC, C);
+ else if (isa<ICmpInst>(CI))
+ InV = Builder->CreateICmp(CI->getPredicate(), PN->getIncomingValue(i),
+ C, "phitmp");
+ else
+ InV = Builder->CreateFCmp(CI->getPredicate(), PN->getIncomingValue(i),
+ C, "phitmp");
+ NewPN->addIncoming(InV, PN->getIncomingBlock(i));
+ }
+ } else if (I.getNumOperands() == 2) {
+ Constant *C = cast<Constant>(I.getOperand(1));
+ for (unsigned i = 0; i != NumPHIValues; ++i) {
+ Value *InV = nullptr;
+ if (Constant *InC = dyn_cast<Constant>(PN->getIncomingValue(i)))
+ InV = ConstantExpr::get(I.getOpcode(), InC, C);
+ else
+ InV = Builder->CreateBinOp(cast<BinaryOperator>(I).getOpcode(),
+ PN->getIncomingValue(i), C, "phitmp");
+ NewPN->addIncoming(InV, PN->getIncomingBlock(i));
+ }
+ } else {
+ CastInst *CI = cast<CastInst>(&I);
+ Type *RetTy = CI->getType();
+ for (unsigned i = 0; i != NumPHIValues; ++i) {
+ Value *InV;
+ if (Constant *InC = dyn_cast<Constant>(PN->getIncomingValue(i)))
+ InV = ConstantExpr::getCast(CI->getOpcode(), InC, RetTy);
+ else
+ InV = Builder->CreateCast(CI->getOpcode(),
+ PN->getIncomingValue(i), I.getType(), "phitmp");
+ NewPN->addIncoming(InV, PN->getIncomingBlock(i));
+ }
+ }
+
+ for (auto UI = PN->user_begin(), E = PN->user_end(); UI != E;) {
+ Instruction *User = cast<Instruction>(*UI++);
+ if (User == &I) continue;
+ replaceInstUsesWith(*User, NewPN);
+ eraseInstFromFunction(*User);
+ }
+ return replaceInstUsesWith(I, NewPN);
+}
+
+/// Given a pointer type and a constant offset, determine whether or not there
+/// is a sequence of GEP indices into the pointed type that will land us at the
+/// specified offset. If so, fill them into NewIndices and return the resultant
+/// element type, otherwise return null.
+Type *InstCombiner::FindElementAtOffset(PointerType *PtrTy, int64_t Offset,
+ SmallVectorImpl<Value *> &NewIndices) {
+ Type *Ty = PtrTy->getElementType();
+ if (!Ty->isSized())
+ return nullptr;
+
+ // Start with the index over the outer type. Note that the type size
+ // might be zero (even if the offset isn't zero) if the indexed type
+ // is something like [0 x {int, int}]
+ Type *IntPtrTy = DL.getIntPtrType(PtrTy);
+ int64_t FirstIdx = 0;
+ if (int64_t TySize = DL.getTypeAllocSize(Ty)) {
+ FirstIdx = Offset/TySize;
+ Offset -= FirstIdx*TySize;
+
+ // Handle hosts where % returns negative instead of values [0..TySize).
+ if (Offset < 0) {
+ --FirstIdx;
+ Offset += TySize;
+ assert(Offset >= 0);
+ }
+ assert((uint64_t)Offset < (uint64_t)TySize && "Out of range offset");
+ }
+
+ NewIndices.push_back(ConstantInt::get(IntPtrTy, FirstIdx));
+
+ // Index into the types. If we fail, set OrigBase to null.
+ while (Offset) {
+ // Indexing into tail padding between struct/array elements.
+ if (uint64_t(Offset * 8) >= DL.getTypeSizeInBits(Ty))
+ return nullptr;
+
+ if (StructType *STy = dyn_cast<StructType>(Ty)) {
+ const StructLayout *SL = DL.getStructLayout(STy);
+ assert(Offset < (int64_t)SL->getSizeInBytes() &&
+ "Offset must stay within the indexed type");
+
+ unsigned Elt = SL->getElementContainingOffset(Offset);
+ NewIndices.push_back(ConstantInt::get(Type::getInt32Ty(Ty->getContext()),
+ Elt));
+
+ Offset -= SL->getElementOffset(Elt);
+ Ty = STy->getElementType(Elt);
+ } else if (ArrayType *AT = dyn_cast<ArrayType>(Ty)) {
+ uint64_t EltSize = DL.getTypeAllocSize(AT->getElementType());
+ assert(EltSize && "Cannot index into a zero-sized array");
+ NewIndices.push_back(ConstantInt::get(IntPtrTy,Offset/EltSize));
+ Offset %= EltSize;
+ Ty = AT->getElementType();
+ } else {
+ // Otherwise, we can't index into the middle of this atomic type, bail.
+ return nullptr;
+ }
+ }
+
+ return Ty;
+}
+
+static bool shouldMergeGEPs(GEPOperator &GEP, GEPOperator &Src) {
+ // If this GEP has only 0 indices, it is the same pointer as
+ // Src. If Src is not a trivial GEP too, don't combine
+ // the indices.
+ if (GEP.hasAllZeroIndices() && !Src.hasAllZeroIndices() &&
+ !Src.hasOneUse())
+ return false;
+ return true;
+}
+
+/// Return a value X such that Val = X * Scale, or null if none.
+/// If the multiplication is known not to overflow, then NoSignedWrap is set.
+Value *InstCombiner::Descale(Value *Val, APInt Scale, bool &NoSignedWrap) {
+ assert(isa<IntegerType>(Val->getType()) && "Can only descale integers!");
+ assert(cast<IntegerType>(Val->getType())->getBitWidth() ==
+ Scale.getBitWidth() && "Scale not compatible with value!");
+
+ // If Val is zero or Scale is one then Val = Val * Scale.
+ if (match(Val, m_Zero()) || Scale == 1) {
+ NoSignedWrap = true;
+ return Val;
+ }
+
+ // If Scale is zero then it does not divide Val.
+ if (Scale.isMinValue())
+ return nullptr;
+
+ // Look through chains of multiplications, searching for a constant that is
+ // divisible by Scale. For example, descaling X*(Y*(Z*4)) by a factor of 4
+ // will find the constant factor 4 and produce X*(Y*Z). Descaling X*(Y*8) by
+ // a factor of 4 will produce X*(Y*2). The principle of operation is to bore
+ // down from Val:
+ //
+ // Val = M1 * X || Analysis starts here and works down
+ // M1 = M2 * Y || Doesn't descend into terms with more
+ // M2 = Z * 4 \/ than one use
+ //
+ // Then to modify a term at the bottom:
+ //
+ // Val = M1 * X
+ // M1 = Z * Y || Replaced M2 with Z
+ //
+ // Then to work back up correcting nsw flags.
+
+ // Op - the term we are currently analyzing. Starts at Val then drills down.
+ // Replaced with its descaled value before exiting from the drill down loop.
+ Value *Op = Val;
+
+ // Parent - initially null, but after drilling down notes where Op came from.
+ // In the example above, Parent is (Val, 0) when Op is M1, because M1 is the
+ // 0'th operand of Val.
+ std::pair<Instruction*, unsigned> Parent;
+
+ // Set if the transform requires a descaling at deeper levels that doesn't
+ // overflow.
+ bool RequireNoSignedWrap = false;
+
+ // Log base 2 of the scale. Negative if not a power of 2.
+ int32_t logScale = Scale.exactLogBase2();
+
+ for (;; Op = Parent.first->getOperand(Parent.second)) { // Drill down
+
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(Op)) {
+ // If Op is a constant divisible by Scale then descale to the quotient.
+ APInt Quotient(Scale), Remainder(Scale); // Init ensures right bitwidth.
+ APInt::sdivrem(CI->getValue(), Scale, Quotient, Remainder);
+ if (!Remainder.isMinValue())
+ // Not divisible by Scale.
+ return nullptr;
+ // Replace with the quotient in the parent.
+ Op = ConstantInt::get(CI->getType(), Quotient);
+ NoSignedWrap = true;
+ break;
+ }
+
+ if (BinaryOperator *BO = dyn_cast<BinaryOperator>(Op)) {
+
+ if (BO->getOpcode() == Instruction::Mul) {
+ // Multiplication.
+ NoSignedWrap = BO->hasNoSignedWrap();
+ if (RequireNoSignedWrap && !NoSignedWrap)
+ return nullptr;
+
+ // There are three cases for multiplication: multiplication by exactly
+ // the scale, multiplication by a constant different to the scale, and
+ // multiplication by something else.
+ Value *LHS = BO->getOperand(0);
+ Value *RHS = BO->getOperand(1);
+
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(RHS)) {
+ // Multiplication by a constant.
+ if (CI->getValue() == Scale) {
+ // Multiplication by exactly the scale, replace the multiplication
+ // by its left-hand side in the parent.
+ Op = LHS;
+ break;
+ }
+
+ // Otherwise drill down into the constant.
+ if (!Op->hasOneUse())
+ return nullptr;
+
+ Parent = std::make_pair(BO, 1);
+ continue;
+ }
+
+ // Multiplication by something else. Drill down into the left-hand side
+ // since that's where the reassociate pass puts the good stuff.
+ if (!Op->hasOneUse())
+ return nullptr;
+
+ Parent = std::make_pair(BO, 0);
+ continue;
+ }
+
+ if (logScale > 0 && BO->getOpcode() == Instruction::Shl &&
+ isa<ConstantInt>(BO->getOperand(1))) {
+ // Multiplication by a power of 2.
+ NoSignedWrap = BO->hasNoSignedWrap();
+ if (RequireNoSignedWrap && !NoSignedWrap)
+ return nullptr;
+
+ Value *LHS = BO->getOperand(0);
+ int32_t Amt = cast<ConstantInt>(BO->getOperand(1))->
+ getLimitedValue(Scale.getBitWidth());
+ // Op = LHS << Amt.
+
+ if (Amt == logScale) {
+ // Multiplication by exactly the scale, replace the multiplication
+ // by its left-hand side in the parent.
+ Op = LHS;
+ break;
+ }
+ if (Amt < logScale || !Op->hasOneUse())
+ return nullptr;
+
+ // Multiplication by more than the scale. Reduce the multiplying amount
+ // by the scale in the parent.
+ Parent = std::make_pair(BO, 1);
+ Op = ConstantInt::get(BO->getType(), Amt - logScale);
+ break;
+ }
+ }
+
+ if (!Op->hasOneUse())
+ return nullptr;
+
+ if (CastInst *Cast = dyn_cast<CastInst>(Op)) {
+ if (Cast->getOpcode() == Instruction::SExt) {
+ // Op is sign-extended from a smaller type, descale in the smaller type.
+ unsigned SmallSize = Cast->getSrcTy()->getPrimitiveSizeInBits();
+ APInt SmallScale = Scale.trunc(SmallSize);
+ // Suppose Op = sext X, and we descale X as Y * SmallScale. We want to
+ // descale Op as (sext Y) * Scale. In order to have
+ // sext (Y * SmallScale) = (sext Y) * Scale
+ // some conditions need to hold however: SmallScale must sign-extend to
+ // Scale and the multiplication Y * SmallScale should not overflow.
+ if (SmallScale.sext(Scale.getBitWidth()) != Scale)
+ // SmallScale does not sign-extend to Scale.
+ return nullptr;
+ assert(SmallScale.exactLogBase2() == logScale);
+ // Require that Y * SmallScale must not overflow.
+ RequireNoSignedWrap = true;
+
+ // Drill down through the cast.
+ Parent = std::make_pair(Cast, 0);
+ Scale = SmallScale;
+ continue;
+ }
+
+ if (Cast->getOpcode() == Instruction::Trunc) {
+ // Op is truncated from a larger type, descale in the larger type.
+ // Suppose Op = trunc X, and we descale X as Y * sext Scale. Then
+ // trunc (Y * sext Scale) = (trunc Y) * Scale
+ // always holds. However (trunc Y) * Scale may overflow even if
+ // trunc (Y * sext Scale) does not, so nsw flags need to be cleared
+ // from this point up in the expression (see later).
+ if (RequireNoSignedWrap)
+ return nullptr;
+
+ // Drill down through the cast.
+ unsigned LargeSize = Cast->getSrcTy()->getPrimitiveSizeInBits();
+ Parent = std::make_pair(Cast, 0);
+ Scale = Scale.sext(LargeSize);
+ if (logScale + 1 == (int32_t)Cast->getType()->getPrimitiveSizeInBits())
+ logScale = -1;
+ assert(Scale.exactLogBase2() == logScale);
+ continue;
+ }
+ }
+
+ // Unsupported expression, bail out.
+ return nullptr;
+ }
+
+ // If Op is zero then Val = Op * Scale.
+ if (match(Op, m_Zero())) {
+ NoSignedWrap = true;
+ return Op;
+ }
+
+ // We know that we can successfully descale, so from here on we can safely
+ // modify the IR. Op holds the descaled version of the deepest term in the
+ // expression. NoSignedWrap is 'true' if multiplying Op by Scale is known
+ // not to overflow.
+
+ if (!Parent.first)
+ // The expression only had one term.
+ return Op;
+
+ // Rewrite the parent using the descaled version of its operand.
+ assert(Parent.first->hasOneUse() && "Drilled down when more than one use!");
+ assert(Op != Parent.first->getOperand(Parent.second) &&
+ "Descaling was a no-op?");
+ Parent.first->setOperand(Parent.second, Op);
+ Worklist.Add(Parent.first);
+
+ // Now work back up the expression correcting nsw flags. The logic is based
+ // on the following observation: if X * Y is known not to overflow as a signed
+ // multiplication, and Y is replaced by a value Z with smaller absolute value,
+ // then X * Z will not overflow as a signed multiplication either. As we work
+ // our way up, having NoSignedWrap 'true' means that the descaled value at the
+ // current level has strictly smaller absolute value than the original.
+ Instruction *Ancestor = Parent.first;
+ do {
+ if (BinaryOperator *BO = dyn_cast<BinaryOperator>(Ancestor)) {
+ // If the multiplication wasn't nsw then we can't say anything about the
+ // value of the descaled multiplication, and we have to clear nsw flags
+ // from this point on up.
+ bool OpNoSignedWrap = BO->hasNoSignedWrap();
+ NoSignedWrap &= OpNoSignedWrap;
+ if (NoSignedWrap != OpNoSignedWrap) {
+ BO->setHasNoSignedWrap(NoSignedWrap);
+ Worklist.Add(Ancestor);
+ }
+ } else if (Ancestor->getOpcode() == Instruction::Trunc) {
+ // The fact that the descaled input to the trunc has smaller absolute
+ // value than the original input doesn't tell us anything useful about
+ // the absolute values of the truncations.
+ NoSignedWrap = false;
+ }
+ assert((Ancestor->getOpcode() != Instruction::SExt || NoSignedWrap) &&
+ "Failed to keep proper track of nsw flags while drilling down?");
+
+ if (Ancestor == Val)
+ // Got to the top, all done!
+ return Val;
+
+ // Move up one level in the expression.
+ assert(Ancestor->hasOneUse() && "Drilled down when more than one use!");
+ Ancestor = Ancestor->user_back();
+ } while (1);
+}
+
+/// \brief Creates node of binary operation with the same attributes as the
+/// specified one but with other operands.
+static Value *CreateBinOpAsGiven(BinaryOperator &Inst, Value *LHS, Value *RHS,
+ InstCombiner::BuilderTy *B) {
+ Value *BO = B->CreateBinOp(Inst.getOpcode(), LHS, RHS);
+ // If LHS and RHS are constant, BO won't be a binary operator.
+ if (BinaryOperator *NewBO = dyn_cast<BinaryOperator>(BO))
+ NewBO->copyIRFlags(&Inst);
+ return BO;
+}
+
+/// \brief Makes transformation of binary operation specific for vector types.
+/// \param Inst Binary operator to transform.
+/// \return Pointer to node that must replace the original binary operator, or
+/// null pointer if no transformation was made.
+Value *InstCombiner::SimplifyVectorOp(BinaryOperator &Inst) {
+ if (!Inst.getType()->isVectorTy()) return nullptr;
+
+ // It may not be safe to reorder shuffles and things like div, urem, etc.
+ // because we may trap when executing those ops on unknown vector elements.
+ // See PR20059.
+ if (!isSafeToSpeculativelyExecute(&Inst))
+ return nullptr;
+
+ unsigned VWidth = cast<VectorType>(Inst.getType())->getNumElements();
+ Value *LHS = Inst.getOperand(0), *RHS = Inst.getOperand(1);
+ assert(cast<VectorType>(LHS->getType())->getNumElements() == VWidth);
+ assert(cast<VectorType>(RHS->getType())->getNumElements() == VWidth);
+
+ // If both arguments of binary operation are shuffles, which use the same
+ // mask and shuffle within a single vector, it is worthwhile to move the
+ // shuffle after binary operation:
+ // Op(shuffle(v1, m), shuffle(v2, m)) -> shuffle(Op(v1, v2), m)
+ if (isa<ShuffleVectorInst>(LHS) && isa<ShuffleVectorInst>(RHS)) {
+ ShuffleVectorInst *LShuf = cast<ShuffleVectorInst>(LHS);
+ ShuffleVectorInst *RShuf = cast<ShuffleVectorInst>(RHS);
+ if (isa<UndefValue>(LShuf->getOperand(1)) &&
+ isa<UndefValue>(RShuf->getOperand(1)) &&
+ LShuf->getOperand(0)->getType() == RShuf->getOperand(0)->getType() &&
+ LShuf->getMask() == RShuf->getMask()) {
+ Value *NewBO = CreateBinOpAsGiven(Inst, LShuf->getOperand(0),
+ RShuf->getOperand(0), Builder);
+ return Builder->CreateShuffleVector(NewBO,
+ UndefValue::get(NewBO->getType()), LShuf->getMask());
+ }
+ }
+
+ // If one argument is a shuffle within one vector, the other is a constant,
+ // try moving the shuffle after the binary operation.
+ ShuffleVectorInst *Shuffle = nullptr;
+ Constant *C1 = nullptr;
+ if (isa<ShuffleVectorInst>(LHS)) Shuffle = cast<ShuffleVectorInst>(LHS);
+ if (isa<ShuffleVectorInst>(RHS)) Shuffle = cast<ShuffleVectorInst>(RHS);
+ if (isa<Constant>(LHS)) C1 = cast<Constant>(LHS);
+ if (isa<Constant>(RHS)) C1 = cast<Constant>(RHS);
+ if (Shuffle && C1 &&
+ (isa<ConstantVector>(C1) || isa<ConstantDataVector>(C1)) &&
+ isa<UndefValue>(Shuffle->getOperand(1)) &&
+ Shuffle->getType() == Shuffle->getOperand(0)->getType()) {
+ SmallVector<int, 16> ShMask = Shuffle->getShuffleMask();
+ // Find constant C2 that has property:
+ // shuffle(C2, ShMask) = C1
+ // If such constant does not exist (example: ShMask=<0,0> and C1=<1,2>)
+ // reorder is not possible.
+ SmallVector<Constant*, 16> C2M(VWidth,
+ UndefValue::get(C1->getType()->getScalarType()));
+ bool MayChange = true;
+ for (unsigned I = 0; I < VWidth; ++I) {
+ if (ShMask[I] >= 0) {
+ assert(ShMask[I] < (int)VWidth);
+ if (!isa<UndefValue>(C2M[ShMask[I]])) {
+ MayChange = false;
+ break;
+ }
+ C2M[ShMask[I]] = C1->getAggregateElement(I);
+ }
+ }
+ if (MayChange) {
+ Constant *C2 = ConstantVector::get(C2M);
+ Value *NewLHS = isa<Constant>(LHS) ? C2 : Shuffle->getOperand(0);
+ Value *NewRHS = isa<Constant>(LHS) ? Shuffle->getOperand(0) : C2;
+ Value *NewBO = CreateBinOpAsGiven(Inst, NewLHS, NewRHS, Builder);
+ return Builder->CreateShuffleVector(NewBO,
+ UndefValue::get(Inst.getType()), Shuffle->getMask());
+ }
+ }
+
+ return nullptr;
+}
+
+Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
+ SmallVector<Value*, 8> Ops(GEP.op_begin(), GEP.op_end());
+
+ if (Value *V = SimplifyGEPInst(GEP.getSourceElementType(), Ops, DL, TLI, DT, AC))
+ return replaceInstUsesWith(GEP, V);
+
+ Value *PtrOp = GEP.getOperand(0);
+
+ // Eliminate unneeded casts for indices, and replace indices which displace
+ // by multiples of a zero size type with zero.
+ bool MadeChange = false;
+ Type *IntPtrTy =
+ DL.getIntPtrType(GEP.getPointerOperandType()->getScalarType());
+
+ gep_type_iterator GTI = gep_type_begin(GEP);
+ for (User::op_iterator I = GEP.op_begin() + 1, E = GEP.op_end(); I != E;
+ ++I, ++GTI) {
+ // Skip indices into struct types.
+ if (isa<StructType>(*GTI))
+ continue;
+
+ // Index type should have the same width as IntPtr
+ Type *IndexTy = (*I)->getType();
+ Type *NewIndexType = IndexTy->isVectorTy() ?
+ VectorType::get(IntPtrTy, IndexTy->getVectorNumElements()) : IntPtrTy;
+
+ // If the element type has zero size then any index over it is equivalent
+ // to an index of zero, so replace it with zero if it is not zero already.
+ Type *EltTy = GTI.getIndexedType();
+ if (EltTy->isSized() && DL.getTypeAllocSize(EltTy) == 0)
+ if (!isa<Constant>(*I) || !cast<Constant>(*I)->isNullValue()) {
+ *I = Constant::getNullValue(NewIndexType);
+ MadeChange = true;
+ }
+
+ if (IndexTy != NewIndexType) {
+ // If we are using a wider index than needed for this platform, shrink
+ // it to what we need. If narrower, sign-extend it to what we need.
+ // This explicit cast can make subsequent optimizations more obvious.
+ *I = Builder->CreateIntCast(*I, NewIndexType, true);
+ MadeChange = true;
+ }
+ }
+ if (MadeChange)
+ return &GEP;
+
+ // Check to see if the inputs to the PHI node are getelementptr instructions.
+ if (PHINode *PN = dyn_cast<PHINode>(PtrOp)) {
+ GetElementPtrInst *Op1 = dyn_cast<GetElementPtrInst>(PN->getOperand(0));
+ if (!Op1)
+ return nullptr;
+
+ // Don't fold a GEP into itself through a PHI node. This can only happen
+ // through the back-edge of a loop. Folding a GEP into itself means that
+ // the value of the previous iteration needs to be stored in the meantime,
+ // thus requiring an additional register variable to be live, but not
+ // actually achieving anything (the GEP still needs to be executed once per
+ // loop iteration).
+ if (Op1 == &GEP)
+ return nullptr;
+
+ int DI = -1;
+
+ for (auto I = PN->op_begin()+1, E = PN->op_end(); I !=E; ++I) {
+ GetElementPtrInst *Op2 = dyn_cast<GetElementPtrInst>(*I);
+ if (!Op2 || Op1->getNumOperands() != Op2->getNumOperands())
+ return nullptr;
+
+ // As for Op1 above, don't try to fold a GEP into itself.
+ if (Op2 == &GEP)
+ return nullptr;
+
+ // Keep track of the type as we walk the GEP.
+ Type *CurTy = nullptr;
+
+ for (unsigned J = 0, F = Op1->getNumOperands(); J != F; ++J) {
+ if (Op1->getOperand(J)->getType() != Op2->getOperand(J)->getType())
+ return nullptr;
+
+ if (Op1->getOperand(J) != Op2->getOperand(J)) {
+ if (DI == -1) {
+ // We have not seen any differences yet in the GEPs feeding the
+ // PHI yet, so we record this one if it is allowed to be a
+ // variable.
+
+ // The first two arguments can vary for any GEP, the rest have to be
+ // static for struct slots
+ if (J > 1 && CurTy->isStructTy())
+ return nullptr;
+
+ DI = J;
+ } else {
+ // The GEP is different by more than one input. While this could be
+ // extended to support GEPs that vary by more than one variable it
+ // doesn't make sense since it greatly increases the complexity and
+ // would result in an R+R+R addressing mode which no backend
+ // directly supports and would need to be broken into several
+ // simpler instructions anyway.
+ return nullptr;
+ }
+ }
+
+ // Sink down a layer of the type for the next iteration.
+ if (J > 0) {
+ if (J == 1) {
+ CurTy = Op1->getSourceElementType();
+ } else if (CompositeType *CT = dyn_cast<CompositeType>(CurTy)) {
+ CurTy = CT->getTypeAtIndex(Op1->getOperand(J));
+ } else {
+ CurTy = nullptr;
+ }
+ }
+ }
+ }
+
+ // If not all GEPs are identical we'll have to create a new PHI node.
+ // Check that the old PHI node has only one use so that it will get
+ // removed.
+ if (DI != -1 && !PN->hasOneUse())
+ return nullptr;
+
+ GetElementPtrInst *NewGEP = cast<GetElementPtrInst>(Op1->clone());
+ if (DI == -1) {
+ // All the GEPs feeding the PHI are identical. Clone one down into our
+ // BB so that it can be merged with the current GEP.
+ GEP.getParent()->getInstList().insert(
+ GEP.getParent()->getFirstInsertionPt(), NewGEP);
+ } else {
+ // All the GEPs feeding the PHI differ at a single offset. Clone a GEP
+ // into the current block so it can be merged, and create a new PHI to
+ // set that index.
+ PHINode *NewPN;
+ {
+ IRBuilderBase::InsertPointGuard Guard(*Builder);
+ Builder->SetInsertPoint(PN);
+ NewPN = Builder->CreatePHI(Op1->getOperand(DI)->getType(),
+ PN->getNumOperands());
+ }
+
+ for (auto &I : PN->operands())
+ NewPN->addIncoming(cast<GEPOperator>(I)->getOperand(DI),
+ PN->getIncomingBlock(I));
+
+ NewGEP->setOperand(DI, NewPN);
+ GEP.getParent()->getInstList().insert(
+ GEP.getParent()->getFirstInsertionPt(), NewGEP);
+ NewGEP->setOperand(DI, NewPN);
+ }
+
+ GEP.setOperand(0, NewGEP);
+ PtrOp = NewGEP;
+ }
+
+ // Combine Indices - If the source pointer to this getelementptr instruction
+ // is a getelementptr instruction, combine the indices of the two
+ // getelementptr instructions into a single instruction.
+ //
+ if (GEPOperator *Src = dyn_cast<GEPOperator>(PtrOp)) {
+ if (!shouldMergeGEPs(*cast<GEPOperator>(&GEP), *Src))
+ return nullptr;
+
+ // Note that if our source is a gep chain itself then we wait for that
+ // chain to be resolved before we perform this transformation. This
+ // avoids us creating a TON of code in some cases.
+ if (GEPOperator *SrcGEP =
+ dyn_cast<GEPOperator>(Src->getOperand(0)))
+ if (SrcGEP->getNumOperands() == 2 && shouldMergeGEPs(*Src, *SrcGEP))
+ return nullptr; // Wait until our source is folded to completion.
+
+ SmallVector<Value*, 8> Indices;
+
+ // Find out whether the last index in the source GEP is a sequential idx.
+ bool EndsWithSequential = false;
+ for (gep_type_iterator I = gep_type_begin(*Src), E = gep_type_end(*Src);
+ I != E; ++I)
+ EndsWithSequential = !(*I)->isStructTy();
+
+ // Can we combine the two pointer arithmetics offsets?
+ if (EndsWithSequential) {
+ // Replace: gep (gep %P, long B), long A, ...
+ // With: T = long A+B; gep %P, T, ...
+ //
+ Value *Sum;
+ Value *SO1 = Src->getOperand(Src->getNumOperands()-1);
+ Value *GO1 = GEP.getOperand(1);
+ if (SO1 == Constant::getNullValue(SO1->getType())) {
+ Sum = GO1;
+ } else if (GO1 == Constant::getNullValue(GO1->getType())) {
+ Sum = SO1;
+ } else {
+ // If they aren't the same type, then the input hasn't been processed
+ // by the loop above yet (which canonicalizes sequential index types to
+ // intptr_t). Just avoid transforming this until the input has been
+ // normalized.
+ if (SO1->getType() != GO1->getType())
+ return nullptr;
+ // Only do the combine when GO1 and SO1 are both constants. Only in
+ // this case, we are sure the cost after the merge is never more than
+ // that before the merge.
+ if (!isa<Constant>(GO1) || !isa<Constant>(SO1))
+ return nullptr;
+ Sum = Builder->CreateAdd(SO1, GO1, PtrOp->getName()+".sum");
+ }
+
+ // Update the GEP in place if possible.
+ if (Src->getNumOperands() == 2) {
+ GEP.setOperand(0, Src->getOperand(0));
+ GEP.setOperand(1, Sum);
+ return &GEP;
+ }
+ Indices.append(Src->op_begin()+1, Src->op_end()-1);
+ Indices.push_back(Sum);
+ Indices.append(GEP.op_begin()+2, GEP.op_end());
+ } else if (isa<Constant>(*GEP.idx_begin()) &&
+ cast<Constant>(*GEP.idx_begin())->isNullValue() &&
+ Src->getNumOperands() != 1) {
+ // Otherwise we can do the fold if the first index of the GEP is a zero
+ Indices.append(Src->op_begin()+1, Src->op_end());
+ Indices.append(GEP.idx_begin()+1, GEP.idx_end());
+ }
+
+ if (!Indices.empty())
+ return GEP.isInBounds() && Src->isInBounds()
+ ? GetElementPtrInst::CreateInBounds(
+ Src->getSourceElementType(), Src->getOperand(0), Indices,
+ GEP.getName())
+ : GetElementPtrInst::Create(Src->getSourceElementType(),
+ Src->getOperand(0), Indices,
+ GEP.getName());
+ }
+
+ if (GEP.getNumIndices() == 1) {
+ unsigned AS = GEP.getPointerAddressSpace();
+ if (GEP.getOperand(1)->getType()->getScalarSizeInBits() ==
+ DL.getPointerSizeInBits(AS)) {
+ Type *Ty = GEP.getSourceElementType();
+ uint64_t TyAllocSize = DL.getTypeAllocSize(Ty);
+
+ bool Matched = false;
+ uint64_t C;
+ Value *V = nullptr;
+ if (TyAllocSize == 1) {
+ V = GEP.getOperand(1);
+ Matched = true;
+ } else if (match(GEP.getOperand(1),
+ m_AShr(m_Value(V), m_ConstantInt(C)))) {
+ if (TyAllocSize == 1ULL << C)
+ Matched = true;
+ } else if (match(GEP.getOperand(1),
+ m_SDiv(m_Value(V), m_ConstantInt(C)))) {
+ if (TyAllocSize == C)
+ Matched = true;
+ }
+
+ if (Matched) {
+ // Canonicalize (gep i8* X, -(ptrtoint Y))
+ // to (inttoptr (sub (ptrtoint X), (ptrtoint Y)))
+ // The GEP pattern is emitted by the SCEV expander for certain kinds of
+ // pointer arithmetic.
+ if (match(V, m_Neg(m_PtrToInt(m_Value())))) {
+ Operator *Index = cast<Operator>(V);
+ Value *PtrToInt = Builder->CreatePtrToInt(PtrOp, Index->getType());
+ Value *NewSub = Builder->CreateSub(PtrToInt, Index->getOperand(1));
+ return CastInst::Create(Instruction::IntToPtr, NewSub, GEP.getType());
+ }
+ // Canonicalize (gep i8* X, (ptrtoint Y)-(ptrtoint X))
+ // to (bitcast Y)
+ Value *Y;
+ if (match(V, m_Sub(m_PtrToInt(m_Value(Y)),
+ m_PtrToInt(m_Specific(GEP.getOperand(0)))))) {
+ return CastInst::CreatePointerBitCastOrAddrSpaceCast(Y,
+ GEP.getType());
+ }
+ }
+ }
+ }
+
+ // Handle gep(bitcast x) and gep(gep x, 0, 0, 0).
+ Value *StrippedPtr = PtrOp->stripPointerCasts();
+ PointerType *StrippedPtrTy = dyn_cast<PointerType>(StrippedPtr->getType());
+
+ // We do not handle pointer-vector geps here.
+ if (!StrippedPtrTy)
+ return nullptr;
+
+ if (StrippedPtr != PtrOp) {
+ bool HasZeroPointerIndex = false;
+ if (ConstantInt *C = dyn_cast<ConstantInt>(GEP.getOperand(1)))
+ HasZeroPointerIndex = C->isZero();
+
+ // Transform: GEP (bitcast [10 x i8]* X to [0 x i8]*), i32 0, ...
+ // into : GEP [10 x i8]* X, i32 0, ...
+ //
+ // Likewise, transform: GEP (bitcast i8* X to [0 x i8]*), i32 0, ...
+ // into : GEP i8* X, ...
+ //
+ // This occurs when the program declares an array extern like "int X[];"
+ if (HasZeroPointerIndex) {
+ if (ArrayType *CATy =
+ dyn_cast<ArrayType>(GEP.getSourceElementType())) {
+ // GEP (bitcast i8* X to [0 x i8]*), i32 0, ... ?
+ if (CATy->getElementType() == StrippedPtrTy->getElementType()) {
+ // -> GEP i8* X, ...
+ SmallVector<Value*, 8> Idx(GEP.idx_begin()+1, GEP.idx_end());
+ GetElementPtrInst *Res = GetElementPtrInst::Create(
+ StrippedPtrTy->getElementType(), StrippedPtr, Idx, GEP.getName());
+ Res->setIsInBounds(GEP.isInBounds());
+ if (StrippedPtrTy->getAddressSpace() == GEP.getAddressSpace())
+ return Res;
+ // Insert Res, and create an addrspacecast.
+ // e.g.,
+ // GEP (addrspacecast i8 addrspace(1)* X to [0 x i8]*), i32 0, ...
+ // ->
+ // %0 = GEP i8 addrspace(1)* X, ...
+ // addrspacecast i8 addrspace(1)* %0 to i8*
+ return new AddrSpaceCastInst(Builder->Insert(Res), GEP.getType());
+ }
+
+ if (ArrayType *XATy =
+ dyn_cast<ArrayType>(StrippedPtrTy->getElementType())){
+ // GEP (bitcast [10 x i8]* X to [0 x i8]*), i32 0, ... ?
+ if (CATy->getElementType() == XATy->getElementType()) {
+ // -> GEP [10 x i8]* X, i32 0, ...
+ // At this point, we know that the cast source type is a pointer
+ // to an array of the same type as the destination pointer
+ // array. Because the array type is never stepped over (there
+ // is a leading zero) we can fold the cast into this GEP.
+ if (StrippedPtrTy->getAddressSpace() == GEP.getAddressSpace()) {
+ GEP.setOperand(0, StrippedPtr);
+ GEP.setSourceElementType(XATy);
+ return &GEP;
+ }
+ // Cannot replace the base pointer directly because StrippedPtr's
+ // address space is different. Instead, create a new GEP followed by
+ // an addrspacecast.
+ // e.g.,
+ // GEP (addrspacecast [10 x i8] addrspace(1)* X to [0 x i8]*),
+ // i32 0, ...
+ // ->
+ // %0 = GEP [10 x i8] addrspace(1)* X, ...
+ // addrspacecast i8 addrspace(1)* %0 to i8*
+ SmallVector<Value*, 8> Idx(GEP.idx_begin(), GEP.idx_end());
+ Value *NewGEP = GEP.isInBounds()
+ ? Builder->CreateInBoundsGEP(
+ nullptr, StrippedPtr, Idx, GEP.getName())
+ : Builder->CreateGEP(nullptr, StrippedPtr, Idx,
+ GEP.getName());
+ return new AddrSpaceCastInst(NewGEP, GEP.getType());
+ }
+ }
+ }
+ } else if (GEP.getNumOperands() == 2) {
+ // Transform things like:
+ // %t = getelementptr i32* bitcast ([2 x i32]* %str to i32*), i32 %V
+ // into: %t1 = getelementptr [2 x i32]* %str, i32 0, i32 %V; bitcast
+ Type *SrcElTy = StrippedPtrTy->getElementType();
+ Type *ResElTy = GEP.getSourceElementType();
+ if (SrcElTy->isArrayTy() &&
+ DL.getTypeAllocSize(SrcElTy->getArrayElementType()) ==
+ DL.getTypeAllocSize(ResElTy)) {
+ Type *IdxType = DL.getIntPtrType(GEP.getType());
+ Value *Idx[2] = { Constant::getNullValue(IdxType), GEP.getOperand(1) };
+ Value *NewGEP =
+ GEP.isInBounds()
+ ? Builder->CreateInBoundsGEP(nullptr, StrippedPtr, Idx,
+ GEP.getName())
+ : Builder->CreateGEP(nullptr, StrippedPtr, Idx, GEP.getName());
+
+ // V and GEP are both pointer types --> BitCast
+ return CastInst::CreatePointerBitCastOrAddrSpaceCast(NewGEP,
+ GEP.getType());
+ }
+
+ // Transform things like:
+ // %V = mul i64 %N, 4
+ // %t = getelementptr i8* bitcast (i32* %arr to i8*), i32 %V
+ // into: %t1 = getelementptr i32* %arr, i32 %N; bitcast
+ if (ResElTy->isSized() && SrcElTy->isSized()) {
+ // Check that changing the type amounts to dividing the index by a scale
+ // factor.
+ uint64_t ResSize = DL.getTypeAllocSize(ResElTy);
+ uint64_t SrcSize = DL.getTypeAllocSize(SrcElTy);
+ if (ResSize && SrcSize % ResSize == 0) {
+ Value *Idx = GEP.getOperand(1);
+ unsigned BitWidth = Idx->getType()->getPrimitiveSizeInBits();
+ uint64_t Scale = SrcSize / ResSize;
+
+ // Earlier transforms ensure that the index has type IntPtrType, which
+ // considerably simplifies the logic by eliminating implicit casts.
+ assert(Idx->getType() == DL.getIntPtrType(GEP.getType()) &&
+ "Index not cast to pointer width?");
+
+ bool NSW;
+ if (Value *NewIdx = Descale(Idx, APInt(BitWidth, Scale), NSW)) {
+ // Successfully decomposed Idx as NewIdx * Scale, form a new GEP.
+ // If the multiplication NewIdx * Scale may overflow then the new
+ // GEP may not be "inbounds".
+ Value *NewGEP =
+ GEP.isInBounds() && NSW
+ ? Builder->CreateInBoundsGEP(nullptr, StrippedPtr, NewIdx,
+ GEP.getName())
+ : Builder->CreateGEP(nullptr, StrippedPtr, NewIdx,
+ GEP.getName());
+
+ // The NewGEP must be pointer typed, so must the old one -> BitCast
+ return CastInst::CreatePointerBitCastOrAddrSpaceCast(NewGEP,
+ GEP.getType());
+ }
+ }
+ }
+
+ // Similarly, transform things like:
+ // getelementptr i8* bitcast ([100 x double]* X to i8*), i32 %tmp
+ // (where tmp = 8*tmp2) into:
+ // getelementptr [100 x double]* %arr, i32 0, i32 %tmp2; bitcast
+ if (ResElTy->isSized() && SrcElTy->isSized() && SrcElTy->isArrayTy()) {
+ // Check that changing to the array element type amounts to dividing the
+ // index by a scale factor.
+ uint64_t ResSize = DL.getTypeAllocSize(ResElTy);
+ uint64_t ArrayEltSize =
+ DL.getTypeAllocSize(SrcElTy->getArrayElementType());
+ if (ResSize && ArrayEltSize % ResSize == 0) {
+ Value *Idx = GEP.getOperand(1);
+ unsigned BitWidth = Idx->getType()->getPrimitiveSizeInBits();
+ uint64_t Scale = ArrayEltSize / ResSize;
+
+ // Earlier transforms ensure that the index has type IntPtrType, which
+ // considerably simplifies the logic by eliminating implicit casts.
+ assert(Idx->getType() == DL.getIntPtrType(GEP.getType()) &&
+ "Index not cast to pointer width?");
+
+ bool NSW;
+ if (Value *NewIdx = Descale(Idx, APInt(BitWidth, Scale), NSW)) {
+ // Successfully decomposed Idx as NewIdx * Scale, form a new GEP.
+ // If the multiplication NewIdx * Scale may overflow then the new
+ // GEP may not be "inbounds".
+ Value *Off[2] = {
+ Constant::getNullValue(DL.getIntPtrType(GEP.getType())),
+ NewIdx};
+
+ Value *NewGEP = GEP.isInBounds() && NSW
+ ? Builder->CreateInBoundsGEP(
+ SrcElTy, StrippedPtr, Off, GEP.getName())
+ : Builder->CreateGEP(SrcElTy, StrippedPtr, Off,
+ GEP.getName());
+ // The NewGEP must be pointer typed, so must the old one -> BitCast
+ return CastInst::CreatePointerBitCastOrAddrSpaceCast(NewGEP,
+ GEP.getType());
+ }
+ }
+ }
+ }
+ }
+
+ // addrspacecast between types is canonicalized as a bitcast, then an
+ // addrspacecast. To take advantage of the below bitcast + struct GEP, look
+ // through the addrspacecast.
+ if (AddrSpaceCastInst *ASC = dyn_cast<AddrSpaceCastInst>(PtrOp)) {
+ // X = bitcast A addrspace(1)* to B addrspace(1)*
+ // Y = addrspacecast A addrspace(1)* to B addrspace(2)*
+ // Z = gep Y, <...constant indices...>
+ // Into an addrspacecasted GEP of the struct.
+ if (BitCastInst *BC = dyn_cast<BitCastInst>(ASC->getOperand(0)))
+ PtrOp = BC;
+ }
+
+ /// See if we can simplify:
+ /// X = bitcast A* to B*
+ /// Y = gep X, <...constant indices...>
+ /// into a gep of the original struct. This is important for SROA and alias
+ /// analysis of unions. If "A" is also a bitcast, wait for A/X to be merged.
+ if (BitCastInst *BCI = dyn_cast<BitCastInst>(PtrOp)) {
+ Value *Operand = BCI->getOperand(0);
+ PointerType *OpType = cast<PointerType>(Operand->getType());
+ unsigned OffsetBits = DL.getPointerTypeSizeInBits(GEP.getType());
+ APInt Offset(OffsetBits, 0);
+ if (!isa<BitCastInst>(Operand) &&
+ GEP.accumulateConstantOffset(DL, Offset)) {
+
+ // If this GEP instruction doesn't move the pointer, just replace the GEP
+ // with a bitcast of the real input to the dest type.
+ if (!Offset) {
+ // If the bitcast is of an allocation, and the allocation will be
+ // converted to match the type of the cast, don't touch this.
+ if (isa<AllocaInst>(Operand) || isAllocationFn(Operand, TLI)) {
+ // See if the bitcast simplifies, if so, don't nuke this GEP yet.
+ if (Instruction *I = visitBitCast(*BCI)) {
+ if (I != BCI) {
+ I->takeName(BCI);
+ BCI->getParent()->getInstList().insert(BCI->getIterator(), I);
+ replaceInstUsesWith(*BCI, I);
+ }
+ return &GEP;
+ }
+ }
+
+ if (Operand->getType()->getPointerAddressSpace() != GEP.getAddressSpace())
+ return new AddrSpaceCastInst(Operand, GEP.getType());
+ return new BitCastInst(Operand, GEP.getType());
+ }
+
+ // Otherwise, if the offset is non-zero, we need to find out if there is a
+ // field at Offset in 'A's type. If so, we can pull the cast through the
+ // GEP.
+ SmallVector<Value*, 8> NewIndices;
+ if (FindElementAtOffset(OpType, Offset.getSExtValue(), NewIndices)) {
+ Value *NGEP =
+ GEP.isInBounds()
+ ? Builder->CreateInBoundsGEP(nullptr, Operand, NewIndices)
+ : Builder->CreateGEP(nullptr, Operand, NewIndices);
+
+ if (NGEP->getType() == GEP.getType())
+ return replaceInstUsesWith(GEP, NGEP);
+ NGEP->takeName(&GEP);
+
+ if (NGEP->getType()->getPointerAddressSpace() != GEP.getAddressSpace())
+ return new AddrSpaceCastInst(NGEP, GEP.getType());
+ return new BitCastInst(NGEP, GEP.getType());
+ }
+ }
+ }
+
+ return nullptr;
+}
+
+static bool isNeverEqualToUnescapedAlloc(Value *V, const TargetLibraryInfo *TLI,
+ Instruction *AI) {
+ if (isa<ConstantPointerNull>(V))
+ return true;
+ if (auto *LI = dyn_cast<LoadInst>(V))
+ return isa<GlobalVariable>(LI->getPointerOperand());
+ // Two distinct allocations will never be equal.
+ // We rely on LookThroughBitCast in isAllocLikeFn being false, since looking
+ // through bitcasts of V can cause
+ // the result statement below to be true, even when AI and V (ex:
+ // i8* ->i32* ->i8* of AI) are the same allocations.
+ return isAllocLikeFn(V, TLI) && V != AI;
+}
+
+static bool
+isAllocSiteRemovable(Instruction *AI, SmallVectorImpl<WeakVH> &Users,
+ const TargetLibraryInfo *TLI) {
+ SmallVector<Instruction*, 4> Worklist;
+ Worklist.push_back(AI);
+
+ do {
+ Instruction *PI = Worklist.pop_back_val();
+ for (User *U : PI->users()) {
+ Instruction *I = cast<Instruction>(U);
+ switch (I->getOpcode()) {
+ default:
+ // Give up the moment we see something we can't handle.
+ return false;
+
+ case Instruction::BitCast:
+ case Instruction::GetElementPtr:
+ Users.emplace_back(I);
+ Worklist.push_back(I);
+ continue;
+
+ case Instruction::ICmp: {
+ ICmpInst *ICI = cast<ICmpInst>(I);
+ // We can fold eq/ne comparisons with null to false/true, respectively.
+ // We also fold comparisons in some conditions provided the alloc has
+ // not escaped (see isNeverEqualToUnescapedAlloc).
+ if (!ICI->isEquality())
+ return false;
+ unsigned OtherIndex = (ICI->getOperand(0) == PI) ? 1 : 0;
+ if (!isNeverEqualToUnescapedAlloc(ICI->getOperand(OtherIndex), TLI, AI))
+ return false;
+ Users.emplace_back(I);
+ continue;
+ }
+
+ case Instruction::Call:
+ // Ignore no-op and store intrinsics.
+ if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
+ switch (II->getIntrinsicID()) {
+ default:
+ return false;
+
+ case Intrinsic::memmove:
+ case Intrinsic::memcpy:
+ case Intrinsic::memset: {
+ MemIntrinsic *MI = cast<MemIntrinsic>(II);
+ if (MI->isVolatile() || MI->getRawDest() != PI)
+ return false;
+ }
+ // fall through
+ case Intrinsic::dbg_declare:
+ case Intrinsic::dbg_value:
+ case Intrinsic::invariant_start:
+ case Intrinsic::invariant_end:
+ case Intrinsic::lifetime_start:
+ case Intrinsic::lifetime_end:
+ case Intrinsic::objectsize:
+ Users.emplace_back(I);
+ continue;
+ }
+ }
+
+ if (isFreeCall(I, TLI)) {
+ Users.emplace_back(I);
+ continue;
+ }
+ return false;
+
+ case Instruction::Store: {
+ StoreInst *SI = cast<StoreInst>(I);
+ if (SI->isVolatile() || SI->getPointerOperand() != PI)
+ return false;
+ Users.emplace_back(I);
+ continue;
+ }
+ }
+ llvm_unreachable("missing a return?");
+ }
+ } while (!Worklist.empty());
+ return true;
+}
+
+Instruction *InstCombiner::visitAllocSite(Instruction &MI) {
+ // If we have a malloc call which is only used in any amount of comparisons
+ // to null and free calls, delete the calls and replace the comparisons with
+ // true or false as appropriate.
+ SmallVector<WeakVH, 64> Users;
+ if (isAllocSiteRemovable(&MI, Users, TLI)) {
+ for (unsigned i = 0, e = Users.size(); i != e; ++i) {
+ // Lowering all @llvm.objectsize calls first because they may
+ // use a bitcast/GEP of the alloca we are removing.
+ if (!Users[i])
+ continue;
+
+ Instruction *I = cast<Instruction>(&*Users[i]);
+
+ if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
+ if (II->getIntrinsicID() == Intrinsic::objectsize) {
+ uint64_t Size;
+ if (!getObjectSize(II->getArgOperand(0), Size, DL, TLI)) {
+ ConstantInt *CI = cast<ConstantInt>(II->getArgOperand(1));
+ Size = CI->isZero() ? -1ULL : 0;
+ }
+ replaceInstUsesWith(*I, ConstantInt::get(I->getType(), Size));
+ eraseInstFromFunction(*I);
+ Users[i] = nullptr; // Skip examining in the next loop.
+ }
+ }
+ }
+ for (unsigned i = 0, e = Users.size(); i != e; ++i) {
+ if (!Users[i])
+ continue;
+
+ Instruction *I = cast<Instruction>(&*Users[i]);
+
+ if (ICmpInst *C = dyn_cast<ICmpInst>(I)) {
+ replaceInstUsesWith(*C,
+ ConstantInt::get(Type::getInt1Ty(C->getContext()),
+ C->isFalseWhenEqual()));
+ } else if (isa<BitCastInst>(I) || isa<GetElementPtrInst>(I)) {
+ replaceInstUsesWith(*I, UndefValue::get(I->getType()));
+ }
+ eraseInstFromFunction(*I);
+ }
+
+ if (InvokeInst *II = dyn_cast<InvokeInst>(&MI)) {
+ // Replace invoke with a NOP intrinsic to maintain the original CFG
+ Module *M = II->getModule();
+ Function *F = Intrinsic::getDeclaration(M, Intrinsic::donothing);
+ InvokeInst::Create(F, II->getNormalDest(), II->getUnwindDest(),
+ None, "", II->getParent());
+ }
+ return eraseInstFromFunction(MI);
+ }
+ return nullptr;
+}
+
+/// \brief Move the call to free before a NULL test.
+///
+/// Check if this free is accessed after its argument has been test
+/// against NULL (property 0).
+/// If yes, it is legal to move this call in its predecessor block.
+///
+/// The move is performed only if the block containing the call to free
+/// will be removed, i.e.:
+/// 1. it has only one predecessor P, and P has two successors
+/// 2. it contains the call and an unconditional branch
+/// 3. its successor is the same as its predecessor's successor
+///
+/// The profitability is out-of concern here and this function should
+/// be called only if the caller knows this transformation would be
+/// profitable (e.g., for code size).
+static Instruction *
+tryToMoveFreeBeforeNullTest(CallInst &FI) {
+ Value *Op = FI.getArgOperand(0);
+ BasicBlock *FreeInstrBB = FI.getParent();
+ BasicBlock *PredBB = FreeInstrBB->getSinglePredecessor();
+
+ // Validate part of constraint #1: Only one predecessor
+ // FIXME: We can extend the number of predecessor, but in that case, we
+ // would duplicate the call to free in each predecessor and it may
+ // not be profitable even for code size.
+ if (!PredBB)
+ return nullptr;
+
+ // Validate constraint #2: Does this block contains only the call to
+ // free and an unconditional branch?
+ // FIXME: We could check if we can speculate everything in the
+ // predecessor block
+ if (FreeInstrBB->size() != 2)
+ return nullptr;
+ BasicBlock *SuccBB;
+ if (!match(FreeInstrBB->getTerminator(), m_UnconditionalBr(SuccBB)))
+ return nullptr;
+
+ // Validate the rest of constraint #1 by matching on the pred branch.
+ TerminatorInst *TI = PredBB->getTerminator();
+ BasicBlock *TrueBB, *FalseBB;
+ ICmpInst::Predicate Pred;
+ if (!match(TI, m_Br(m_ICmp(Pred, m_Specific(Op), m_Zero()), TrueBB, FalseBB)))
+ return nullptr;
+ if (Pred != ICmpInst::ICMP_EQ && Pred != ICmpInst::ICMP_NE)
+ return nullptr;
+
+ // Validate constraint #3: Ensure the null case just falls through.
+ if (SuccBB != (Pred == ICmpInst::ICMP_EQ ? TrueBB : FalseBB))
+ return nullptr;
+ assert(FreeInstrBB == (Pred == ICmpInst::ICMP_EQ ? FalseBB : TrueBB) &&
+ "Broken CFG: missing edge from predecessor to successor");
+
+ FI.moveBefore(TI);
+ return &FI;
+}
+
+
+Instruction *InstCombiner::visitFree(CallInst &FI) {
+ Value *Op = FI.getArgOperand(0);
+
+ // free undef -> unreachable.
+ if (isa<UndefValue>(Op)) {
+ // Insert a new store to null because we cannot modify the CFG here.
+ Builder->CreateStore(ConstantInt::getTrue(FI.getContext()),
+ UndefValue::get(Type::getInt1PtrTy(FI.getContext())));
+ return eraseInstFromFunction(FI);
+ }
+
+ // If we have 'free null' delete the instruction. This can happen in stl code
+ // when lots of inlining happens.
+ if (isa<ConstantPointerNull>(Op))
+ return eraseInstFromFunction(FI);
+
+ // If we optimize for code size, try to move the call to free before the null
+ // test so that simplify cfg can remove the empty block and dead code
+ // elimination the branch. I.e., helps to turn something like:
+ // if (foo) free(foo);
+ // into
+ // free(foo);
+ if (MinimizeSize)
+ if (Instruction *I = tryToMoveFreeBeforeNullTest(FI))
+ return I;
+
+ return nullptr;
+}
+
+Instruction *InstCombiner::visitReturnInst(ReturnInst &RI) {
+ if (RI.getNumOperands() == 0) // ret void
+ return nullptr;
+
+ Value *ResultOp = RI.getOperand(0);
+ Type *VTy = ResultOp->getType();
+ if (!VTy->isIntegerTy())
+ return nullptr;
+
+ // There might be assume intrinsics dominating this return that completely
+ // determine the value. If so, constant fold it.
+ unsigned BitWidth = VTy->getPrimitiveSizeInBits();
+ APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0);
+ computeKnownBits(ResultOp, KnownZero, KnownOne, 0, &RI);
+ if ((KnownZero|KnownOne).isAllOnesValue())
+ RI.setOperand(0, Constant::getIntegerValue(VTy, KnownOne));
+
+ return nullptr;
+}
+
+Instruction *InstCombiner::visitBranchInst(BranchInst &BI) {
+ // Change br (not X), label True, label False to: br X, label False, True
+ Value *X = nullptr;
+ BasicBlock *TrueDest;
+ BasicBlock *FalseDest;
+ if (match(&BI, m_Br(m_Not(m_Value(X)), TrueDest, FalseDest)) &&
+ !isa<Constant>(X)) {
+ // Swap Destinations and condition...
+ BI.setCondition(X);
+ BI.swapSuccessors();
+ return &BI;
+ }
+
+ // If the condition is irrelevant, remove the use so that other
+ // transforms on the condition become more effective.
+ if (BI.isConditional() &&
+ BI.getSuccessor(0) == BI.getSuccessor(1) &&
+ !isa<UndefValue>(BI.getCondition())) {
+ BI.setCondition(UndefValue::get(BI.getCondition()->getType()));
+ return &BI;
+ }
+
+ // Canonicalize fcmp_one -> fcmp_oeq
+ FCmpInst::Predicate FPred; Value *Y;
+ if (match(&BI, m_Br(m_FCmp(FPred, m_Value(X), m_Value(Y)),
+ TrueDest, FalseDest)) &&
+ BI.getCondition()->hasOneUse())
+ if (FPred == FCmpInst::FCMP_ONE || FPred == FCmpInst::FCMP_OLE ||
+ FPred == FCmpInst::FCMP_OGE) {
+ FCmpInst *Cond = cast<FCmpInst>(BI.getCondition());
+ Cond->setPredicate(FCmpInst::getInversePredicate(FPred));
+
+ // Swap Destinations and condition.
+ BI.swapSuccessors();
+ Worklist.Add(Cond);
+ return &BI;
+ }
+
+ // Canonicalize icmp_ne -> icmp_eq
+ ICmpInst::Predicate IPred;
+ if (match(&BI, m_Br(m_ICmp(IPred, m_Value(X), m_Value(Y)),
+ TrueDest, FalseDest)) &&
+ BI.getCondition()->hasOneUse())
+ if (IPred == ICmpInst::ICMP_NE || IPred == ICmpInst::ICMP_ULE ||
+ IPred == ICmpInst::ICMP_SLE || IPred == ICmpInst::ICMP_UGE ||
+ IPred == ICmpInst::ICMP_SGE) {
+ ICmpInst *Cond = cast<ICmpInst>(BI.getCondition());
+ Cond->setPredicate(ICmpInst::getInversePredicate(IPred));
+ // Swap Destinations and condition.
+ BI.swapSuccessors();
+ Worklist.Add(Cond);
+ return &BI;
+ }
+
+ return nullptr;
+}
+
+Instruction *InstCombiner::visitSwitchInst(SwitchInst &SI) {
+ Value *Cond = SI.getCondition();
+ unsigned BitWidth = cast<IntegerType>(Cond->getType())->getBitWidth();
+ APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0);
+ computeKnownBits(Cond, KnownZero, KnownOne, 0, &SI);
+ unsigned LeadingKnownZeros = KnownZero.countLeadingOnes();
+ unsigned LeadingKnownOnes = KnownOne.countLeadingOnes();
+
+ // Compute the number of leading bits we can ignore.
+ // TODO: A better way to determine this would use ComputeNumSignBits().
+ for (auto &C : SI.cases()) {
+ LeadingKnownZeros = std::min(
+ LeadingKnownZeros, C.getCaseValue()->getValue().countLeadingZeros());
+ LeadingKnownOnes = std::min(
+ LeadingKnownOnes, C.getCaseValue()->getValue().countLeadingOnes());
+ }
+
+ unsigned NewWidth = BitWidth - std::max(LeadingKnownZeros, LeadingKnownOnes);
+
+ // Shrink the condition operand if the new type is smaller than the old type.
+ // This may produce a non-standard type for the switch, but that's ok because
+ // the backend should extend back to a legal type for the target.
+ bool TruncCond = false;
+ if (NewWidth > 0 && NewWidth < BitWidth) {
+ TruncCond = true;
+ IntegerType *Ty = IntegerType::get(SI.getContext(), NewWidth);
+ Builder->SetInsertPoint(&SI);
+ Value *NewCond = Builder->CreateTrunc(Cond, Ty, "trunc");
+ SI.setCondition(NewCond);
+
+ for (auto &C : SI.cases())
+ static_cast<SwitchInst::CaseIt *>(&C)->setValue(ConstantInt::get(
+ SI.getContext(), C.getCaseValue()->getValue().trunc(NewWidth)));
+ }
+
+ ConstantInt *AddRHS = nullptr;
+ if (match(Cond, m_Add(m_Value(), m_ConstantInt(AddRHS)))) {
+ Instruction *I = cast<Instruction>(Cond);
+ // Change 'switch (X+4) case 1:' into 'switch (X) case -3'.
+ for (SwitchInst::CaseIt i = SI.case_begin(), e = SI.case_end(); i != e;
+ ++i) {
+ ConstantInt *CaseVal = i.getCaseValue();
+ Constant *LHS = CaseVal;
+ if (TruncCond) {
+ LHS = LeadingKnownZeros
+ ? ConstantExpr::getZExt(CaseVal, Cond->getType())
+ : ConstantExpr::getSExt(CaseVal, Cond->getType());
+ }
+ Constant *NewCaseVal = ConstantExpr::getSub(LHS, AddRHS);
+ assert(isa<ConstantInt>(NewCaseVal) &&
+ "Result of expression should be constant");
+ i.setValue(cast<ConstantInt>(NewCaseVal));
+ }
+ SI.setCondition(I->getOperand(0));
+ Worklist.Add(I);
+ return &SI;
+ }
+
+ return TruncCond ? &SI : nullptr;
+}
+
+Instruction *InstCombiner::visitExtractValueInst(ExtractValueInst &EV) {
+ Value *Agg = EV.getAggregateOperand();
+
+ if (!EV.hasIndices())
+ return replaceInstUsesWith(EV, Agg);
+
+ if (Value *V =
+ SimplifyExtractValueInst(Agg, EV.getIndices(), DL, TLI, DT, AC))
+ return replaceInstUsesWith(EV, V);
+
+ if (InsertValueInst *IV = dyn_cast<InsertValueInst>(Agg)) {
+ // We're extracting from an insertvalue instruction, compare the indices
+ const unsigned *exti, *exte, *insi, *inse;
+ for (exti = EV.idx_begin(), insi = IV->idx_begin(),
+ exte = EV.idx_end(), inse = IV->idx_end();
+ exti != exte && insi != inse;
+ ++exti, ++insi) {
+ if (*insi != *exti)
+ // The insert and extract both reference distinctly different elements.
+ // This means the extract is not influenced by the insert, and we can
+ // replace the aggregate operand of the extract with the aggregate
+ // operand of the insert. i.e., replace
+ // %I = insertvalue { i32, { i32 } } %A, { i32 } { i32 42 }, 1
+ // %E = extractvalue { i32, { i32 } } %I, 0
+ // with
+ // %E = extractvalue { i32, { i32 } } %A, 0
+ return ExtractValueInst::Create(IV->getAggregateOperand(),
+ EV.getIndices());
+ }
+ if (exti == exte && insi == inse)
+ // Both iterators are at the end: Index lists are identical. Replace
+ // %B = insertvalue { i32, { i32 } } %A, i32 42, 1, 0
+ // %C = extractvalue { i32, { i32 } } %B, 1, 0
+ // with "i32 42"
+ return replaceInstUsesWith(EV, IV->getInsertedValueOperand());
+ if (exti == exte) {
+ // The extract list is a prefix of the insert list. i.e. replace
+ // %I = insertvalue { i32, { i32 } } %A, i32 42, 1, 0
+ // %E = extractvalue { i32, { i32 } } %I, 1
+ // with
+ // %X = extractvalue { i32, { i32 } } %A, 1
+ // %E = insertvalue { i32 } %X, i32 42, 0
+ // by switching the order of the insert and extract (though the
+ // insertvalue should be left in, since it may have other uses).
+ Value *NewEV = Builder->CreateExtractValue(IV->getAggregateOperand(),
+ EV.getIndices());
+ return InsertValueInst::Create(NewEV, IV->getInsertedValueOperand(),
+ makeArrayRef(insi, inse));
+ }
+ if (insi == inse)
+ // The insert list is a prefix of the extract list
+ // We can simply remove the common indices from the extract and make it
+ // operate on the inserted value instead of the insertvalue result.
+ // i.e., replace
+ // %I = insertvalue { i32, { i32 } } %A, { i32 } { i32 42 }, 1
+ // %E = extractvalue { i32, { i32 } } %I, 1, 0
+ // with
+ // %E extractvalue { i32 } { i32 42 }, 0
+ return ExtractValueInst::Create(IV->getInsertedValueOperand(),
+ makeArrayRef(exti, exte));
+ }
+ if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Agg)) {
+ // We're extracting from an intrinsic, see if we're the only user, which
+ // allows us to simplify multiple result intrinsics to simpler things that
+ // just get one value.
+ if (II->hasOneUse()) {
+ // Check if we're grabbing the overflow bit or the result of a 'with
+ // overflow' intrinsic. If it's the latter we can remove the intrinsic
+ // and replace it with a traditional binary instruction.
+ switch (II->getIntrinsicID()) {
+ case Intrinsic::uadd_with_overflow:
+ case Intrinsic::sadd_with_overflow:
+ if (*EV.idx_begin() == 0) { // Normal result.
+ Value *LHS = II->getArgOperand(0), *RHS = II->getArgOperand(1);
+ replaceInstUsesWith(*II, UndefValue::get(II->getType()));
+ eraseInstFromFunction(*II);
+ return BinaryOperator::CreateAdd(LHS, RHS);
+ }
+
+ // If the normal result of the add is dead, and the RHS is a constant,
+ // we can transform this into a range comparison.
+ // overflow = uadd a, -4 --> overflow = icmp ugt a, 3
+ if (II->getIntrinsicID() == Intrinsic::uadd_with_overflow)
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(II->getArgOperand(1)))
+ return new ICmpInst(ICmpInst::ICMP_UGT, II->getArgOperand(0),
+ ConstantExpr::getNot(CI));
+ break;
+ case Intrinsic::usub_with_overflow:
+ case Intrinsic::ssub_with_overflow:
+ if (*EV.idx_begin() == 0) { // Normal result.
+ Value *LHS = II->getArgOperand(0), *RHS = II->getArgOperand(1);
+ replaceInstUsesWith(*II, UndefValue::get(II->getType()));
+ eraseInstFromFunction(*II);
+ return BinaryOperator::CreateSub(LHS, RHS);
+ }
+ break;
+ case Intrinsic::umul_with_overflow:
+ case Intrinsic::smul_with_overflow:
+ if (*EV.idx_begin() == 0) { // Normal result.
+ Value *LHS = II->getArgOperand(0), *RHS = II->getArgOperand(1);
+ replaceInstUsesWith(*II, UndefValue::get(II->getType()));
+ eraseInstFromFunction(*II);
+ return BinaryOperator::CreateMul(LHS, RHS);
+ }
+ break;
+ default:
+ break;
+ }
+ }
+ }
+ if (LoadInst *L = dyn_cast<LoadInst>(Agg))
+ // If the (non-volatile) load only has one use, we can rewrite this to a
+ // load from a GEP. This reduces the size of the load. If a load is used
+ // only by extractvalue instructions then this either must have been
+ // optimized before, or it is a struct with padding, in which case we
+ // don't want to do the transformation as it loses padding knowledge.
+ if (L->isSimple() && L->hasOneUse()) {
+ // extractvalue has integer indices, getelementptr has Value*s. Convert.
+ SmallVector<Value*, 4> Indices;
+ // Prefix an i32 0 since we need the first element.
+ Indices.push_back(Builder->getInt32(0));
+ for (ExtractValueInst::idx_iterator I = EV.idx_begin(), E = EV.idx_end();
+ I != E; ++I)
+ Indices.push_back(Builder->getInt32(*I));
+
+ // We need to insert these at the location of the old load, not at that of
+ // the extractvalue.
+ Builder->SetInsertPoint(L);
+ Value *GEP = Builder->CreateInBoundsGEP(L->getType(),
+ L->getPointerOperand(), Indices);
+ // Returning the load directly will cause the main loop to insert it in
+ // the wrong spot, so use replaceInstUsesWith().
+ return replaceInstUsesWith(EV, Builder->CreateLoad(GEP));
+ }
+ // We could simplify extracts from other values. Note that nested extracts may
+ // already be simplified implicitly by the above: extract (extract (insert) )
+ // will be translated into extract ( insert ( extract ) ) first and then just
+ // the value inserted, if appropriate. Similarly for extracts from single-use
+ // loads: extract (extract (load)) will be translated to extract (load (gep))
+ // and if again single-use then via load (gep (gep)) to load (gep).
+ // However, double extracts from e.g. function arguments or return values
+ // aren't handled yet.
+ return nullptr;
+}
+
+/// Return 'true' if the given typeinfo will match anything.
+static bool isCatchAll(EHPersonality Personality, Constant *TypeInfo) {
+ switch (Personality) {
+ case EHPersonality::GNU_C:
+ case EHPersonality::GNU_C_SjLj:
+ case EHPersonality::Rust:
+ // The GCC C EH and Rust personality only exists to support cleanups, so
+ // it's not clear what the semantics of catch clauses are.
+ return false;
+ case EHPersonality::Unknown:
+ return false;
+ case EHPersonality::GNU_Ada:
+ // While __gnat_all_others_value will match any Ada exception, it doesn't
+ // match foreign exceptions (or didn't, before gcc-4.7).
+ return false;
+ case EHPersonality::GNU_CXX:
+ case EHPersonality::GNU_CXX_SjLj:
+ case EHPersonality::GNU_ObjC:
+ case EHPersonality::MSVC_X86SEH:
+ case EHPersonality::MSVC_Win64SEH:
+ case EHPersonality::MSVC_CXX:
+ case EHPersonality::CoreCLR:
+ return TypeInfo->isNullValue();
+ }
+ llvm_unreachable("invalid enum");
+}
+
+static bool shorter_filter(const Value *LHS, const Value *RHS) {
+ return
+ cast<ArrayType>(LHS->getType())->getNumElements()
+ <
+ cast<ArrayType>(RHS->getType())->getNumElements();
+}
+
+Instruction *InstCombiner::visitLandingPadInst(LandingPadInst &LI) {
+ // The logic here should be correct for any real-world personality function.
+ // However if that turns out not to be true, the offending logic can always
+ // be conditioned on the personality function, like the catch-all logic is.
+ EHPersonality Personality =
+ classifyEHPersonality(LI.getParent()->getParent()->getPersonalityFn());
+
+ // Simplify the list of clauses, eg by removing repeated catch clauses
+ // (these are often created by inlining).
+ bool MakeNewInstruction = false; // If true, recreate using the following:
+ SmallVector<Constant *, 16> NewClauses; // - Clauses for the new instruction;
+ bool CleanupFlag = LI.isCleanup(); // - The new instruction is a cleanup.
+
+ SmallPtrSet<Value *, 16> AlreadyCaught; // Typeinfos known caught already.
+ for (unsigned i = 0, e = LI.getNumClauses(); i != e; ++i) {
+ bool isLastClause = i + 1 == e;
+ if (LI.isCatch(i)) {
+ // A catch clause.
+ Constant *CatchClause = LI.getClause(i);
+ Constant *TypeInfo = CatchClause->stripPointerCasts();
+
+ // If we already saw this clause, there is no point in having a second
+ // copy of it.
+ if (AlreadyCaught.insert(TypeInfo).second) {
+ // This catch clause was not already seen.
+ NewClauses.push_back(CatchClause);
+ } else {
+ // Repeated catch clause - drop the redundant copy.
+ MakeNewInstruction = true;
+ }
+
+ // If this is a catch-all then there is no point in keeping any following
+ // clauses or marking the landingpad as having a cleanup.
+ if (isCatchAll(Personality, TypeInfo)) {
+ if (!isLastClause)
+ MakeNewInstruction = true;
+ CleanupFlag = false;
+ break;
+ }
+ } else {
+ // A filter clause. If any of the filter elements were already caught
+ // then they can be dropped from the filter. It is tempting to try to
+ // exploit the filter further by saying that any typeinfo that does not
+ // occur in the filter can't be caught later (and thus can be dropped).
+ // However this would be wrong, since typeinfos can match without being
+ // equal (for example if one represents a C++ class, and the other some
+ // class derived from it).
+ assert(LI.isFilter(i) && "Unsupported landingpad clause!");
+ Constant *FilterClause = LI.getClause(i);
+ ArrayType *FilterType = cast<ArrayType>(FilterClause->getType());
+ unsigned NumTypeInfos = FilterType->getNumElements();
+
+ // An empty filter catches everything, so there is no point in keeping any
+ // following clauses or marking the landingpad as having a cleanup. By
+ // dealing with this case here the following code is made a bit simpler.
+ if (!NumTypeInfos) {
+ NewClauses.push_back(FilterClause);
+ if (!isLastClause)
+ MakeNewInstruction = true;
+ CleanupFlag = false;
+ break;
+ }
+
+ bool MakeNewFilter = false; // If true, make a new filter.
+ SmallVector<Constant *, 16> NewFilterElts; // New elements.
+ if (isa<ConstantAggregateZero>(FilterClause)) {
+ // Not an empty filter - it contains at least one null typeinfo.
+ assert(NumTypeInfos > 0 && "Should have handled empty filter already!");
+ Constant *TypeInfo =
+ Constant::getNullValue(FilterType->getElementType());
+ // If this typeinfo is a catch-all then the filter can never match.
+ if (isCatchAll(Personality, TypeInfo)) {
+ // Throw the filter away.
+ MakeNewInstruction = true;
+ continue;
+ }
+
+ // There is no point in having multiple copies of this typeinfo, so
+ // discard all but the first copy if there is more than one.
+ NewFilterElts.push_back(TypeInfo);
+ if (NumTypeInfos > 1)
+ MakeNewFilter = true;
+ } else {
+ ConstantArray *Filter = cast<ConstantArray>(FilterClause);
+ SmallPtrSet<Value *, 16> SeenInFilter; // For uniquing the elements.
+ NewFilterElts.reserve(NumTypeInfos);
+
+ // Remove any filter elements that were already caught or that already
+ // occurred in the filter. While there, see if any of the elements are
+ // catch-alls. If so, the filter can be discarded.
+ bool SawCatchAll = false;
+ for (unsigned j = 0; j != NumTypeInfos; ++j) {
+ Constant *Elt = Filter->getOperand(j);
+ Constant *TypeInfo = Elt->stripPointerCasts();
+ if (isCatchAll(Personality, TypeInfo)) {
+ // This element is a catch-all. Bail out, noting this fact.
+ SawCatchAll = true;
+ break;
+ }
+
+ // Even if we've seen a type in a catch clause, we don't want to
+ // remove it from the filter. An unexpected type handler may be
+ // set up for a call site which throws an exception of the same
+ // type caught. In order for the exception thrown by the unexpected
+ // handler to propogate correctly, the filter must be correctly
+ // described for the call site.
+ //
+ // Example:
+ //
+ // void unexpected() { throw 1;}
+ // void foo() throw (int) {
+ // std::set_unexpected(unexpected);
+ // try {
+ // throw 2.0;
+ // } catch (int i) {}
+ // }
+
+ // There is no point in having multiple copies of the same typeinfo in
+ // a filter, so only add it if we didn't already.
+ if (SeenInFilter.insert(TypeInfo).second)
+ NewFilterElts.push_back(cast<Constant>(Elt));
+ }
+ // A filter containing a catch-all cannot match anything by definition.
+ if (SawCatchAll) {
+ // Throw the filter away.
+ MakeNewInstruction = true;
+ continue;
+ }
+
+ // If we dropped something from the filter, make a new one.
+ if (NewFilterElts.size() < NumTypeInfos)
+ MakeNewFilter = true;
+ }
+ if (MakeNewFilter) {
+ FilterType = ArrayType::get(FilterType->getElementType(),
+ NewFilterElts.size());
+ FilterClause = ConstantArray::get(FilterType, NewFilterElts);
+ MakeNewInstruction = true;
+ }
+
+ NewClauses.push_back(FilterClause);
+
+ // If the new filter is empty then it will catch everything so there is
+ // no point in keeping any following clauses or marking the landingpad
+ // as having a cleanup. The case of the original filter being empty was
+ // already handled above.
+ if (MakeNewFilter && !NewFilterElts.size()) {
+ assert(MakeNewInstruction && "New filter but not a new instruction!");
+ CleanupFlag = false;
+ break;
+ }
+ }
+ }
+
+ // If several filters occur in a row then reorder them so that the shortest
+ // filters come first (those with the smallest number of elements). This is
+ // advantageous because shorter filters are more likely to match, speeding up
+ // unwinding, but mostly because it increases the effectiveness of the other
+ // filter optimizations below.
+ for (unsigned i = 0, e = NewClauses.size(); i + 1 < e; ) {
+ unsigned j;
+ // Find the maximal 'j' s.t. the range [i, j) consists entirely of filters.
+ for (j = i; j != e; ++j)
+ if (!isa<ArrayType>(NewClauses[j]->getType()))
+ break;
+
+ // Check whether the filters are already sorted by length. We need to know
+ // if sorting them is actually going to do anything so that we only make a
+ // new landingpad instruction if it does.
+ for (unsigned k = i; k + 1 < j; ++k)
+ if (shorter_filter(NewClauses[k+1], NewClauses[k])) {
+ // Not sorted, so sort the filters now. Doing an unstable sort would be
+ // correct too but reordering filters pointlessly might confuse users.
+ std::stable_sort(NewClauses.begin() + i, NewClauses.begin() + j,
+ shorter_filter);
+ MakeNewInstruction = true;
+ break;
+ }
+
+ // Look for the next batch of filters.
+ i = j + 1;
+ }
+
+ // If typeinfos matched if and only if equal, then the elements of a filter L
+ // that occurs later than a filter F could be replaced by the intersection of
+ // the elements of F and L. In reality two typeinfos can match without being
+ // equal (for example if one represents a C++ class, and the other some class
+ // derived from it) so it would be wrong to perform this transform in general.
+ // However the transform is correct and useful if F is a subset of L. In that
+ // case L can be replaced by F, and thus removed altogether since repeating a
+ // filter is pointless. So here we look at all pairs of filters F and L where
+ // L follows F in the list of clauses, and remove L if every element of F is
+ // an element of L. This can occur when inlining C++ functions with exception
+ // specifications.
+ for (unsigned i = 0; i + 1 < NewClauses.size(); ++i) {
+ // Examine each filter in turn.
+ Value *Filter = NewClauses[i];
+ ArrayType *FTy = dyn_cast<ArrayType>(Filter->getType());
+ if (!FTy)
+ // Not a filter - skip it.
+ continue;
+ unsigned FElts = FTy->getNumElements();
+ // Examine each filter following this one. Doing this backwards means that
+ // we don't have to worry about filters disappearing under us when removed.
+ for (unsigned j = NewClauses.size() - 1; j != i; --j) {
+ Value *LFilter = NewClauses[j];
+ ArrayType *LTy = dyn_cast<ArrayType>(LFilter->getType());
+ if (!LTy)
+ // Not a filter - skip it.
+ continue;
+ // If Filter is a subset of LFilter, i.e. every element of Filter is also
+ // an element of LFilter, then discard LFilter.
+ SmallVectorImpl<Constant *>::iterator J = NewClauses.begin() + j;
+ // If Filter is empty then it is a subset of LFilter.
+ if (!FElts) {
+ // Discard LFilter.
+ NewClauses.erase(J);
+ MakeNewInstruction = true;
+ // Move on to the next filter.
+ continue;
+ }
+ unsigned LElts = LTy->getNumElements();
+ // If Filter is longer than LFilter then it cannot be a subset of it.
+ if (FElts > LElts)
+ // Move on to the next filter.
+ continue;
+ // At this point we know that LFilter has at least one element.
+ if (isa<ConstantAggregateZero>(LFilter)) { // LFilter only contains zeros.
+ // Filter is a subset of LFilter iff Filter contains only zeros (as we
+ // already know that Filter is not longer than LFilter).
+ if (isa<ConstantAggregateZero>(Filter)) {
+ assert(FElts <= LElts && "Should have handled this case earlier!");
+ // Discard LFilter.
+ NewClauses.erase(J);
+ MakeNewInstruction = true;
+ }
+ // Move on to the next filter.
+ continue;
+ }
+ ConstantArray *LArray = cast<ConstantArray>(LFilter);
+ if (isa<ConstantAggregateZero>(Filter)) { // Filter only contains zeros.
+ // Since Filter is non-empty and contains only zeros, it is a subset of
+ // LFilter iff LFilter contains a zero.
+ assert(FElts > 0 && "Should have eliminated the empty filter earlier!");
+ for (unsigned l = 0; l != LElts; ++l)
+ if (LArray->getOperand(l)->isNullValue()) {
+ // LFilter contains a zero - discard it.
+ NewClauses.erase(J);
+ MakeNewInstruction = true;
+ break;
+ }
+ // Move on to the next filter.
+ continue;
+ }
+ // At this point we know that both filters are ConstantArrays. Loop over
+ // operands to see whether every element of Filter is also an element of
+ // LFilter. Since filters tend to be short this is probably faster than
+ // using a method that scales nicely.
+ ConstantArray *FArray = cast<ConstantArray>(Filter);
+ bool AllFound = true;
+ for (unsigned f = 0; f != FElts; ++f) {
+ Value *FTypeInfo = FArray->getOperand(f)->stripPointerCasts();
+ AllFound = false;
+ for (unsigned l = 0; l != LElts; ++l) {
+ Value *LTypeInfo = LArray->getOperand(l)->stripPointerCasts();
+ if (LTypeInfo == FTypeInfo) {
+ AllFound = true;
+ break;
+ }
+ }
+ if (!AllFound)
+ break;
+ }
+ if (AllFound) {
+ // Discard LFilter.
+ NewClauses.erase(J);
+ MakeNewInstruction = true;
+ }
+ // Move on to the next filter.
+ }
+ }
+
+ // If we changed any of the clauses, replace the old landingpad instruction
+ // with a new one.
+ if (MakeNewInstruction) {
+ LandingPadInst *NLI = LandingPadInst::Create(LI.getType(),
+ NewClauses.size());
+ for (unsigned i = 0, e = NewClauses.size(); i != e; ++i)
+ NLI->addClause(NewClauses[i]);
+ // A landing pad with no clauses must have the cleanup flag set. It is
+ // theoretically possible, though highly unlikely, that we eliminated all
+ // clauses. If so, force the cleanup flag to true.
+ if (NewClauses.empty())
+ CleanupFlag = true;
+ NLI->setCleanup(CleanupFlag);
+ return NLI;
+ }
+
+ // Even if none of the clauses changed, we may nonetheless have understood
+ // that the cleanup flag is pointless. Clear it if so.
+ if (LI.isCleanup() != CleanupFlag) {
+ assert(!CleanupFlag && "Adding a cleanup, not removing one?!");
+ LI.setCleanup(CleanupFlag);
+ return &LI;
+ }
+
+ return nullptr;
+}
+
+/// Try to move the specified instruction from its current block into the
+/// beginning of DestBlock, which can only happen if it's safe to move the
+/// instruction past all of the instructions between it and the end of its
+/// block.
+static bool TryToSinkInstruction(Instruction *I, BasicBlock *DestBlock) {
+ assert(I->hasOneUse() && "Invariants didn't hold!");
+
+ // Cannot move control-flow-involving, volatile loads, vaarg, etc.
+ if (isa<PHINode>(I) || I->isEHPad() || I->mayHaveSideEffects() ||
+ isa<TerminatorInst>(I))
+ return false;
+
+ // Do not sink alloca instructions out of the entry block.
+ if (isa<AllocaInst>(I) && I->getParent() ==
+ &DestBlock->getParent()->getEntryBlock())
+ return false;
+
+ // Do not sink into catchswitch blocks.
+ if (isa<CatchSwitchInst>(DestBlock->getTerminator()))
+ return false;
+
+ // Do not sink convergent call instructions.
+ if (auto *CI = dyn_cast<CallInst>(I)) {
+ if (CI->isConvergent())
+ return false;
+ }
+ // We can only sink load instructions if there is nothing between the load and
+ // the end of block that could change the value.
+ if (I->mayReadFromMemory()) {
+ for (BasicBlock::iterator Scan = I->getIterator(),
+ E = I->getParent()->end();
+ Scan != E; ++Scan)
+ if (Scan->mayWriteToMemory())
+ return false;
+ }
+
+ BasicBlock::iterator InsertPos = DestBlock->getFirstInsertionPt();
+ I->moveBefore(&*InsertPos);
+ ++NumSunkInst;
+ return true;
+}
+
+bool InstCombiner::run() {
+ while (!Worklist.isEmpty()) {
+ Instruction *I = Worklist.RemoveOne();
+ if (I == nullptr) continue; // skip null values.
+
+ // Check to see if we can DCE the instruction.
+ if (isInstructionTriviallyDead(I, TLI)) {
+ DEBUG(dbgs() << "IC: DCE: " << *I << '\n');
+ eraseInstFromFunction(*I);
+ ++NumDeadInst;
+ MadeIRChange = true;
+ continue;
+ }
+
+ // Instruction isn't dead, see if we can constant propagate it.
+ if (!I->use_empty() &&
+ (I->getNumOperands() == 0 || isa<Constant>(I->getOperand(0)))) {
+ if (Constant *C = ConstantFoldInstruction(I, DL, TLI)) {
+ DEBUG(dbgs() << "IC: ConstFold to: " << *C << " from: " << *I << '\n');
+
+ // Add operands to the worklist.
+ replaceInstUsesWith(*I, C);
+ ++NumConstProp;
+ if (isInstructionTriviallyDead(I, TLI))
+ eraseInstFromFunction(*I);
+ MadeIRChange = true;
+ continue;
+ }
+ }
+
+ // In general, it is possible for computeKnownBits to determine all bits in
+ // a value even when the operands are not all constants.
+ if (ExpensiveCombines && !I->use_empty() && I->getType()->isIntegerTy()) {
+ unsigned BitWidth = I->getType()->getScalarSizeInBits();
+ APInt KnownZero(BitWidth, 0);
+ APInt KnownOne(BitWidth, 0);
+ computeKnownBits(I, KnownZero, KnownOne, /*Depth*/0, I);
+ if ((KnownZero | KnownOne).isAllOnesValue()) {
+ Constant *C = ConstantInt::get(I->getContext(), KnownOne);
+ DEBUG(dbgs() << "IC: ConstFold (all bits known) to: " << *C <<
+ " from: " << *I << '\n');
+
+ // Add operands to the worklist.
+ replaceInstUsesWith(*I, C);
+ ++NumConstProp;
+ if (isInstructionTriviallyDead(I, TLI))
+ eraseInstFromFunction(*I);
+ MadeIRChange = true;
+ continue;
+ }
+ }
+
+ // See if we can trivially sink this instruction to a successor basic block.
+ if (I->hasOneUse()) {
+ BasicBlock *BB = I->getParent();
+ Instruction *UserInst = cast<Instruction>(*I->user_begin());
+ BasicBlock *UserParent;
+
+ // Get the block the use occurs in.
+ if (PHINode *PN = dyn_cast<PHINode>(UserInst))
+ UserParent = PN->getIncomingBlock(*I->use_begin());
+ else
+ UserParent = UserInst->getParent();
+
+ if (UserParent != BB) {
+ bool UserIsSuccessor = false;
+ // See if the user is one of our successors.
+ for (succ_iterator SI = succ_begin(BB), E = succ_end(BB); SI != E; ++SI)
+ if (*SI == UserParent) {
+ UserIsSuccessor = true;
+ break;
+ }
+
+ // If the user is one of our immediate successors, and if that successor
+ // only has us as a predecessors (we'd have to split the critical edge
+ // otherwise), we can keep going.
+ if (UserIsSuccessor && UserParent->getSinglePredecessor()) {
+ // Okay, the CFG is simple enough, try to sink this instruction.
+ if (TryToSinkInstruction(I, UserParent)) {
+ DEBUG(dbgs() << "IC: Sink: " << *I << '\n');
+ MadeIRChange = true;
+ // We'll add uses of the sunk instruction below, but since sinking
+ // can expose opportunities for it's *operands* add them to the
+ // worklist
+ for (Use &U : I->operands())
+ if (Instruction *OpI = dyn_cast<Instruction>(U.get()))
+ Worklist.Add(OpI);
+ }
+ }
+ }
+ }
+
+ // Now that we have an instruction, try combining it to simplify it.
+ Builder->SetInsertPoint(I);
+ Builder->SetCurrentDebugLocation(I->getDebugLoc());
+
+#ifndef NDEBUG
+ std::string OrigI;
+#endif
+ DEBUG(raw_string_ostream SS(OrigI); I->print(SS); OrigI = SS.str(););
+ DEBUG(dbgs() << "IC: Visiting: " << OrigI << '\n');
+
+ if (Instruction *Result = visit(*I)) {
+ ++NumCombined;
+ // Should we replace the old instruction with a new one?
+ if (Result != I) {
+ DEBUG(dbgs() << "IC: Old = " << *I << '\n'
+ << " New = " << *Result << '\n');
+
+ if (I->getDebugLoc())
+ Result->setDebugLoc(I->getDebugLoc());
+ // Everything uses the new instruction now.
+ I->replaceAllUsesWith(Result);
+
+ // Move the name to the new instruction first.
+ Result->takeName(I);
+
+ // Push the new instruction and any users onto the worklist.
+ Worklist.Add(Result);
+ Worklist.AddUsersToWorkList(*Result);
+
+ // Insert the new instruction into the basic block...
+ BasicBlock *InstParent = I->getParent();
+ BasicBlock::iterator InsertPos = I->getIterator();
+
+ // If we replace a PHI with something that isn't a PHI, fix up the
+ // insertion point.
+ if (!isa<PHINode>(Result) && isa<PHINode>(InsertPos))
+ InsertPos = InstParent->getFirstInsertionPt();
+
+ InstParent->getInstList().insert(InsertPos, Result);
+
+ eraseInstFromFunction(*I);
+ } else {
+#ifndef NDEBUG
+ DEBUG(dbgs() << "IC: Mod = " << OrigI << '\n'
+ << " New = " << *I << '\n');
+#endif
+
+ // If the instruction was modified, it's possible that it is now dead.
+ // if so, remove it.
+ if (isInstructionTriviallyDead(I, TLI)) {
+ eraseInstFromFunction(*I);
+ } else {
+ Worklist.Add(I);
+ Worklist.AddUsersToWorkList(*I);
+ }
+ }
+ MadeIRChange = true;
+ }
+ }
+
+ Worklist.Zap();
+ return MadeIRChange;
+}
+
+/// Walk the function in depth-first order, adding all reachable code to the
+/// worklist.
+///
+/// This has a couple of tricks to make the code faster and more powerful. In
+/// particular, we constant fold and DCE instructions as we go, to avoid adding
+/// them to the worklist (this significantly speeds up instcombine on code where
+/// many instructions are dead or constant). Additionally, if we find a branch
+/// whose condition is a known constant, we only visit the reachable successors.
+///
+static bool AddReachableCodeToWorklist(BasicBlock *BB, const DataLayout &DL,
+ SmallPtrSetImpl<BasicBlock *> &Visited,
+ InstCombineWorklist &ICWorklist,
+ const TargetLibraryInfo *TLI) {
+ bool MadeIRChange = false;
+ SmallVector<BasicBlock*, 256> Worklist;
+ Worklist.push_back(BB);
+
+ SmallVector<Instruction*, 128> InstrsForInstCombineWorklist;
+ DenseMap<ConstantExpr*, Constant*> FoldedConstants;
+
+ do {
+ BB = Worklist.pop_back_val();
+
+ // We have now visited this block! If we've already been here, ignore it.
+ if (!Visited.insert(BB).second)
+ continue;
+
+ for (BasicBlock::iterator BBI = BB->begin(), E = BB->end(); BBI != E; ) {
+ Instruction *Inst = &*BBI++;
+
+ // DCE instruction if trivially dead.
+ if (isInstructionTriviallyDead(Inst, TLI)) {
+ ++NumDeadInst;
+ DEBUG(dbgs() << "IC: DCE: " << *Inst << '\n');
+ Inst->eraseFromParent();
+ continue;
+ }
+
+ // ConstantProp instruction if trivially constant.
+ if (!Inst->use_empty() &&
+ (Inst->getNumOperands() == 0 || isa<Constant>(Inst->getOperand(0))))
+ if (Constant *C = ConstantFoldInstruction(Inst, DL, TLI)) {
+ DEBUG(dbgs() << "IC: ConstFold to: " << *C << " from: "
+ << *Inst << '\n');
+ Inst->replaceAllUsesWith(C);
+ ++NumConstProp;
+ if (isInstructionTriviallyDead(Inst, TLI))
+ Inst->eraseFromParent();
+ continue;
+ }
+
+ // See if we can constant fold its operands.
+ for (User::op_iterator i = Inst->op_begin(), e = Inst->op_end(); i != e;
+ ++i) {
+ ConstantExpr *CE = dyn_cast<ConstantExpr>(i);
+ if (CE == nullptr)
+ continue;
+
+ Constant *&FoldRes = FoldedConstants[CE];
+ if (!FoldRes)
+ FoldRes = ConstantFoldConstantExpression(CE, DL, TLI);
+ if (!FoldRes)
+ FoldRes = CE;
+
+ if (FoldRes != CE) {
+ *i = FoldRes;
+ MadeIRChange = true;
+ }
+ }
+
+ InstrsForInstCombineWorklist.push_back(Inst);
+ }
+
+ // Recursively visit successors. If this is a branch or switch on a
+ // constant, only visit the reachable successor.
+ TerminatorInst *TI = BB->getTerminator();
+ if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
+ if (BI->isConditional() && isa<ConstantInt>(BI->getCondition())) {
+ bool CondVal = cast<ConstantInt>(BI->getCondition())->getZExtValue();
+ BasicBlock *ReachableBB = BI->getSuccessor(!CondVal);
+ Worklist.push_back(ReachableBB);
+ continue;
+ }
+ } else if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
+ if (ConstantInt *Cond = dyn_cast<ConstantInt>(SI->getCondition())) {
+ // See if this is an explicit destination.
+ for (SwitchInst::CaseIt i = SI->case_begin(), e = SI->case_end();
+ i != e; ++i)
+ if (i.getCaseValue() == Cond) {
+ BasicBlock *ReachableBB = i.getCaseSuccessor();
+ Worklist.push_back(ReachableBB);
+ continue;
+ }
+
+ // Otherwise it is the default destination.
+ Worklist.push_back(SI->getDefaultDest());
+ continue;
+ }
+ }
+
+ for (BasicBlock *SuccBB : TI->successors())
+ Worklist.push_back(SuccBB);
+ } while (!Worklist.empty());
+
+ // Once we've found all of the instructions to add to instcombine's worklist,
+ // add them in reverse order. This way instcombine will visit from the top
+ // of the function down. This jives well with the way that it adds all uses
+ // of instructions to the worklist after doing a transformation, thus avoiding
+ // some N^2 behavior in pathological cases.
+ ICWorklist.AddInitialGroup(InstrsForInstCombineWorklist);
+
+ return MadeIRChange;
+}
+
+/// \brief Populate the IC worklist from a function, and prune any dead basic
+/// blocks discovered in the process.
+///
+/// This also does basic constant propagation and other forward fixing to make
+/// the combiner itself run much faster.
+static bool prepareICWorklistFromFunction(Function &F, const DataLayout &DL,
+ TargetLibraryInfo *TLI,
+ InstCombineWorklist &ICWorklist) {
+ bool MadeIRChange = false;
+
+ // Do a depth-first traversal of the function, populate the worklist with
+ // the reachable instructions. Ignore blocks that are not reachable. Keep
+ // track of which blocks we visit.
+ SmallPtrSet<BasicBlock *, 32> Visited;
+ MadeIRChange |=
+ AddReachableCodeToWorklist(&F.front(), DL, Visited, ICWorklist, TLI);
+
+ // Do a quick scan over the function. If we find any blocks that are
+ // unreachable, remove any instructions inside of them. This prevents
+ // the instcombine code from having to deal with some bad special cases.
+ for (BasicBlock &BB : F) {
+ if (Visited.count(&BB))
+ continue;
+
+ unsigned NumDeadInstInBB = removeAllNonTerminatorAndEHPadInstructions(&BB);
+ MadeIRChange |= NumDeadInstInBB > 0;
+ NumDeadInst += NumDeadInstInBB;
+ }
+
+ return MadeIRChange;
+}
+
+static bool
+combineInstructionsOverFunction(Function &F, InstCombineWorklist &Worklist,
+ AliasAnalysis *AA, AssumptionCache &AC,
+ TargetLibraryInfo &TLI, DominatorTree &DT,
+ bool ExpensiveCombines = true,
+ LoopInfo *LI = nullptr) {
+ auto &DL = F.getParent()->getDataLayout();
+ ExpensiveCombines |= EnableExpensiveCombines;
+
+ /// Builder - This is an IRBuilder that automatically inserts new
+ /// instructions into the worklist when they are created.
+ IRBuilder<TargetFolder, InstCombineIRInserter> Builder(
+ F.getContext(), TargetFolder(DL), InstCombineIRInserter(Worklist, &AC));
+
+ // Lower dbg.declare intrinsics otherwise their value may be clobbered
+ // by instcombiner.
+ bool DbgDeclaresChanged = LowerDbgDeclare(F);
+
+ // Iterate while there is work to do.
+ int Iteration = 0;
+ for (;;) {
+ ++Iteration;
+ DEBUG(dbgs() << "\n\nINSTCOMBINE ITERATION #" << Iteration << " on "
+ << F.getName() << "\n");
+
+ bool Changed = prepareICWorklistFromFunction(F, DL, &TLI, Worklist);
+
+ InstCombiner IC(Worklist, &Builder, F.optForMinSize(), ExpensiveCombines,
+ AA, &AC, &TLI, &DT, DL, LI);
+ Changed |= IC.run();
+
+ if (!Changed)
+ break;
+ }
+
+ return DbgDeclaresChanged || Iteration > 1;
+}
+
+PreservedAnalyses InstCombinePass::run(Function &F,
+ AnalysisManager<Function> &AM) {
+ auto &AC = AM.getResult<AssumptionAnalysis>(F);
+ auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
+ auto &TLI = AM.getResult<TargetLibraryAnalysis>(F);
+
+ auto *LI = AM.getCachedResult<LoopAnalysis>(F);
+
+ // FIXME: The AliasAnalysis is not yet supported in the new pass manager
+ if (!combineInstructionsOverFunction(F, Worklist, nullptr, AC, TLI, DT,
+ ExpensiveCombines, LI))
+ // No changes, all analyses are preserved.
+ return PreservedAnalyses::all();
+
+ // Mark all the analyses that instcombine updates as preserved.
+ // FIXME: This should also 'preserve the CFG'.
+ PreservedAnalyses PA;
+ PA.preserve<DominatorTreeAnalysis>();
+ return PA;
+}
+
+void InstructionCombiningPass::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesCFG();
+ AU.addRequired<AAResultsWrapperPass>();
+ AU.addRequired<AssumptionCacheTracker>();
+ AU.addRequired<TargetLibraryInfoWrapperPass>();
+ AU.addRequired<DominatorTreeWrapperPass>();
+ AU.addPreserved<DominatorTreeWrapperPass>();
+ AU.addPreserved<AAResultsWrapperPass>();
+ AU.addPreserved<BasicAAWrapperPass>();
+ AU.addPreserved<GlobalsAAWrapperPass>();
+}
+
+bool InstructionCombiningPass::runOnFunction(Function &F) {
+ if (skipFunction(F))
+ return false;
+
+ // Required analyses.
+ auto AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
+ auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
+ auto &TLI = getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
+ auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
+
+ // Optional analyses.
+ auto *LIWP = getAnalysisIfAvailable<LoopInfoWrapperPass>();
+ auto *LI = LIWP ? &LIWP->getLoopInfo() : nullptr;
+
+ return combineInstructionsOverFunction(F, Worklist, AA, AC, TLI, DT,
+ ExpensiveCombines, LI);
+}
+
+char InstructionCombiningPass::ID = 0;
+INITIALIZE_PASS_BEGIN(InstructionCombiningPass, "instcombine",
+ "Combine redundant instructions", false, false)
+INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
+INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(GlobalsAAWrapperPass)
+INITIALIZE_PASS_END(InstructionCombiningPass, "instcombine",
+ "Combine redundant instructions", false, false)
+
+// Initialization Routines
+void llvm::initializeInstCombine(PassRegistry &Registry) {
+ initializeInstructionCombiningPassPass(Registry);
+}
+
+void LLVMInitializeInstCombine(LLVMPassRegistryRef R) {
+ initializeInstructionCombiningPassPass(*unwrap(R));
+}
+
+FunctionPass *llvm::createInstructionCombiningPass(bool ExpensiveCombines) {
+ return new InstructionCombiningPass(ExpensiveCombines);
+}