diff options
Diffstat (limited to 'contrib/llvm/lib/Transforms/InstCombine')
15 files changed, 20886 insertions, 0 deletions
diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombine.h b/contrib/llvm/lib/Transforms/InstCombine/InstCombine.h new file mode 100644 index 000000000000..a5eddc206134 --- /dev/null +++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombine.h @@ -0,0 +1,392 @@ +//===- InstCombine.h - Main InstCombine pass definition ---------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef INSTCOMBINE_INSTCOMBINE_H +#define INSTCOMBINE_INSTCOMBINE_H + +#include "InstCombineWorklist.h" +#include "llvm/Analysis/ValueTracking.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Operator.h" +#include "llvm/InstVisitor.h" +#include "llvm/Pass.h" +#include "llvm/Support/TargetFolder.h" +#include "llvm/Transforms/Utils/SimplifyLibCalls.h" + +namespace llvm { + class CallSite; + class DataLayout; + class TargetLibraryInfo; + class DbgDeclareInst; + class MemIntrinsic; + class MemSetInst; + +/// SelectPatternFlavor - We can match a variety of different patterns for +/// select operations. +enum SelectPatternFlavor { + SPF_UNKNOWN = 0, + SPF_SMIN, SPF_UMIN, + SPF_SMAX, SPF_UMAX + //SPF_ABS - TODO. +}; + +/// getComplexity: Assign a complexity or rank value to LLVM Values... +/// 0 -> undef, 1 -> Const, 2 -> Other, 3 -> Arg, 3 -> Unary, 4 -> OtherInst +static inline unsigned getComplexity(Value *V) { + if (isa<Instruction>(V)) { + if (BinaryOperator::isNeg(V) || + BinaryOperator::isFNeg(V) || + BinaryOperator::isNot(V)) + return 3; + return 4; + } + if (isa<Argument>(V)) return 3; + return isa<Constant>(V) ? (isa<UndefValue>(V) ? 0 : 1) : 2; +} + + +/// InstCombineIRInserter - This is an IRBuilder insertion helper that works +/// just like the normal insertion helper, but also adds any new instructions +/// to the instcombine worklist. +class LLVM_LIBRARY_VISIBILITY InstCombineIRInserter + : public IRBuilderDefaultInserter<true> { + InstCombineWorklist &Worklist; +public: + InstCombineIRInserter(InstCombineWorklist &WL) : Worklist(WL) {} + + void InsertHelper(Instruction *I, const Twine &Name, + BasicBlock *BB, BasicBlock::iterator InsertPt) const { + IRBuilderDefaultInserter<true>::InsertHelper(I, Name, BB, InsertPt); + Worklist.Add(I); + } +}; + +/// InstCombiner - The -instcombine pass. +class LLVM_LIBRARY_VISIBILITY InstCombiner + : public FunctionPass, + public InstVisitor<InstCombiner, Instruction*> { + DataLayout *TD; + TargetLibraryInfo *TLI; + bool MadeIRChange; + LibCallSimplifier *Simplifier; + bool MinimizeSize; +public: + /// Worklist - All of the instructions that need to be simplified. + InstCombineWorklist Worklist; + + /// Builder - This is an IRBuilder that automatically inserts new + /// instructions into the worklist when they are created. + typedef IRBuilder<true, TargetFolder, InstCombineIRInserter> BuilderTy; + BuilderTy *Builder; + + static char ID; // Pass identification, replacement for typeid + InstCombiner() : FunctionPass(ID), TD(0), Builder(0) { + MinimizeSize = false; + initializeInstCombinerPass(*PassRegistry::getPassRegistry()); + } + +public: + virtual bool runOnFunction(Function &F); + + bool DoOneIteration(Function &F, unsigned ItNum); + + virtual void getAnalysisUsage(AnalysisUsage &AU) const; + + DataLayout *getDataLayout() const { return TD; } + + TargetLibraryInfo *getTargetLibraryInfo() const { return TLI; } + + // Visitation implementation - Implement instruction combining for different + // instruction types. The semantics are as follows: + // Return Value: + // null - No change was made + // I - Change was made, I is still valid, I may be dead though + // otherwise - Change was made, replace I with returned instruction + // + Instruction *visitAdd(BinaryOperator &I); + Instruction *visitFAdd(BinaryOperator &I); + Value *OptimizePointerDifference(Value *LHS, Value *RHS, Type *Ty); + Instruction *visitSub(BinaryOperator &I); + Instruction *visitFSub(BinaryOperator &I); + Instruction *visitMul(BinaryOperator &I); + Value *foldFMulConst(Instruction *FMulOrDiv, ConstantFP *C, + Instruction *InsertBefore); + Instruction *visitFMul(BinaryOperator &I); + Instruction *visitURem(BinaryOperator &I); + Instruction *visitSRem(BinaryOperator &I); + Instruction *visitFRem(BinaryOperator &I); + bool SimplifyDivRemOfSelect(BinaryOperator &I); + Instruction *commonRemTransforms(BinaryOperator &I); + Instruction *commonIRemTransforms(BinaryOperator &I); + Instruction *commonDivTransforms(BinaryOperator &I); + Instruction *commonIDivTransforms(BinaryOperator &I); + Instruction *visitUDiv(BinaryOperator &I); + Instruction *visitSDiv(BinaryOperator &I); + Instruction *visitFDiv(BinaryOperator &I); + Value *FoldAndOfICmps(ICmpInst *LHS, ICmpInst *RHS); + Value *FoldAndOfFCmps(FCmpInst *LHS, FCmpInst *RHS); + Instruction *visitAnd(BinaryOperator &I); + Value *FoldOrOfICmps(ICmpInst *LHS, ICmpInst *RHS); + Value *FoldOrOfFCmps(FCmpInst *LHS, FCmpInst *RHS); + Instruction *FoldOrWithConstants(BinaryOperator &I, Value *Op, + Value *A, Value *B, Value *C); + Instruction *visitOr (BinaryOperator &I); + Instruction *visitXor(BinaryOperator &I); + Instruction *visitShl(BinaryOperator &I); + Instruction *visitAShr(BinaryOperator &I); + Instruction *visitLShr(BinaryOperator &I); + Instruction *commonShiftTransforms(BinaryOperator &I); + Instruction *FoldFCmp_IntToFP_Cst(FCmpInst &I, Instruction *LHSI, + Constant *RHSC); + Instruction *FoldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP, + GlobalVariable *GV, CmpInst &ICI, + ConstantInt *AndCst = 0); + Instruction *visitFCmpInst(FCmpInst &I); + Instruction *visitICmpInst(ICmpInst &I); + Instruction *visitICmpInstWithCastAndCast(ICmpInst &ICI); + Instruction *visitICmpInstWithInstAndIntCst(ICmpInst &ICI, + Instruction *LHS, + ConstantInt *RHS); + Instruction *FoldICmpDivCst(ICmpInst &ICI, BinaryOperator *DivI, + ConstantInt *DivRHS); + Instruction *FoldICmpShrCst(ICmpInst &ICI, BinaryOperator *DivI, + ConstantInt *DivRHS); + Instruction *FoldICmpAddOpCst(Instruction &ICI, Value *X, ConstantInt *CI, + ICmpInst::Predicate Pred); + Instruction *FoldGEPICmp(GEPOperator *GEPLHS, Value *RHS, + ICmpInst::Predicate Cond, Instruction &I); + Instruction *FoldShiftByConstant(Value *Op0, ConstantInt *Op1, + BinaryOperator &I); + Instruction *commonCastTransforms(CastInst &CI); + Instruction *commonPointerCastTransforms(CastInst &CI); + Instruction *visitTrunc(TruncInst &CI); + Instruction *visitZExt(ZExtInst &CI); + Instruction *visitSExt(SExtInst &CI); + Instruction *visitFPTrunc(FPTruncInst &CI); + Instruction *visitFPExt(CastInst &CI); + Instruction *visitFPToUI(FPToUIInst &FI); + Instruction *visitFPToSI(FPToSIInst &FI); + Instruction *visitUIToFP(CastInst &CI); + Instruction *visitSIToFP(CastInst &CI); + Instruction *visitPtrToInt(PtrToIntInst &CI); + Instruction *visitIntToPtr(IntToPtrInst &CI); + Instruction *visitBitCast(BitCastInst &CI); + Instruction *visitAddrSpaceCast(AddrSpaceCastInst &CI); + Instruction *FoldSelectOpOp(SelectInst &SI, Instruction *TI, + Instruction *FI); + Instruction *FoldSelectIntoOp(SelectInst &SI, Value*, Value*); + Instruction *FoldSPFofSPF(Instruction *Inner, SelectPatternFlavor SPF1, + Value *A, Value *B, Instruction &Outer, + SelectPatternFlavor SPF2, Value *C); + Instruction *visitSelectInst(SelectInst &SI); + Instruction *visitSelectInstWithICmp(SelectInst &SI, ICmpInst *ICI); + Instruction *visitCallInst(CallInst &CI); + Instruction *visitInvokeInst(InvokeInst &II); + + Instruction *SliceUpIllegalIntegerPHI(PHINode &PN); + Instruction *visitPHINode(PHINode &PN); + Instruction *visitGetElementPtrInst(GetElementPtrInst &GEP); + Instruction *visitAllocaInst(AllocaInst &AI); + Instruction *visitAllocSite(Instruction &FI); + Instruction *visitFree(CallInst &FI); + Instruction *visitLoadInst(LoadInst &LI); + Instruction *visitStoreInst(StoreInst &SI); + Instruction *visitBranchInst(BranchInst &BI); + Instruction *visitSwitchInst(SwitchInst &SI); + Instruction *visitInsertElementInst(InsertElementInst &IE); + Instruction *visitExtractElementInst(ExtractElementInst &EI); + Instruction *visitShuffleVectorInst(ShuffleVectorInst &SVI); + Instruction *visitExtractValueInst(ExtractValueInst &EV); + Instruction *visitLandingPadInst(LandingPadInst &LI); + + // visitInstruction - Specify what to return for unhandled instructions... + Instruction *visitInstruction(Instruction &I) { return 0; } + +private: + bool ShouldChangeType(Type *From, Type *To) const; + Value *dyn_castNegVal(Value *V) const; + Value *dyn_castFNegVal(Value *V, bool NoSignedZero=false) const; + Type *FindElementAtOffset(Type *PtrTy, int64_t Offset, + SmallVectorImpl<Value*> &NewIndices); + Instruction *FoldOpIntoSelect(Instruction &Op, SelectInst *SI); + + /// ShouldOptimizeCast - Return true if the cast from "V to Ty" actually + /// results in any code being generated and is interesting to optimize out. If + /// the cast can be eliminated by some other simple transformation, we prefer + /// to do the simplification first. + bool ShouldOptimizeCast(Instruction::CastOps opcode,const Value *V, + Type *Ty); + + Instruction *visitCallSite(CallSite CS); + Instruction *tryOptimizeCall(CallInst *CI, const DataLayout *TD); + bool transformConstExprCastCall(CallSite CS); + Instruction *transformCallThroughTrampoline(CallSite CS, + IntrinsicInst *Tramp); + Instruction *transformZExtICmp(ICmpInst *ICI, Instruction &CI, + bool DoXform = true); + Instruction *transformSExtICmp(ICmpInst *ICI, Instruction &CI); + bool WillNotOverflowSignedAdd(Value *LHS, Value *RHS); + Value *EmitGEPOffset(User *GEP); + Instruction *scalarizePHI(ExtractElementInst &EI, PHINode *PN); + Value *EvaluateInDifferentElementOrder(Value *V, ArrayRef<int> Mask); + +public: + // InsertNewInstBefore - insert an instruction New before instruction Old + // in the program. Add the new instruction to the worklist. + // + Instruction *InsertNewInstBefore(Instruction *New, Instruction &Old) { + assert(New && New->getParent() == 0 && + "New instruction already inserted into a basic block!"); + BasicBlock *BB = Old.getParent(); + BB->getInstList().insert(&Old, New); // Insert inst + Worklist.Add(New); + return New; + } + + // InsertNewInstWith - same as InsertNewInstBefore, but also sets the + // debug loc. + // + Instruction *InsertNewInstWith(Instruction *New, Instruction &Old) { + New->setDebugLoc(Old.getDebugLoc()); + return InsertNewInstBefore(New, Old); + } + + // ReplaceInstUsesWith - This method is to be used when an instruction is + // found to be dead, replacable with another preexisting expression. Here + // we add all uses of I to the worklist, replace all uses of I with the new + // value, then return I, so that the inst combiner will know that I was + // modified. + // + Instruction *ReplaceInstUsesWith(Instruction &I, Value *V) { + Worklist.AddUsersToWorkList(I); // Add all modified instrs to worklist. + + // If we are replacing the instruction with itself, this must be in a + // segment of unreachable code, so just clobber the instruction. + if (&I == V) + V = UndefValue::get(I.getType()); + + DEBUG(dbgs() << "IC: Replacing " << I << "\n" + " with " << *V << '\n'); + + I.replaceAllUsesWith(V); + return &I; + } + + // EraseInstFromFunction - When dealing with an instruction that has side + // effects or produces a void value, we can't rely on DCE to delete the + // instruction. Instead, visit methods should return the value returned by + // this function. + Instruction *EraseInstFromFunction(Instruction &I) { + DEBUG(dbgs() << "IC: ERASE " << I << '\n'); + + assert(I.use_empty() && "Cannot erase instruction that is used!"); + // Make sure that we reprocess all operands now that we reduced their + // use counts. + if (I.getNumOperands() < 8) { + for (User::op_iterator i = I.op_begin(), e = I.op_end(); i != e; ++i) + if (Instruction *Op = dyn_cast<Instruction>(*i)) + Worklist.Add(Op); + } + Worklist.Remove(&I); + I.eraseFromParent(); + MadeIRChange = true; + return 0; // Don't do anything with FI + } + + void ComputeMaskedBits(Value *V, APInt &KnownZero, + APInt &KnownOne, unsigned Depth = 0) const { + return llvm::ComputeMaskedBits(V, KnownZero, KnownOne, TD, Depth); + } + + bool MaskedValueIsZero(Value *V, const APInt &Mask, + unsigned Depth = 0) const { + return llvm::MaskedValueIsZero(V, Mask, TD, Depth); + } + unsigned ComputeNumSignBits(Value *Op, unsigned Depth = 0) const { + return llvm::ComputeNumSignBits(Op, TD, Depth); + } + +private: + + /// SimplifyAssociativeOrCommutative - This performs a few simplifications for + /// operators which are associative or commutative. + bool SimplifyAssociativeOrCommutative(BinaryOperator &I); + + /// SimplifyUsingDistributiveLaws - This tries to simplify binary operations + /// which some other binary operation distributes over either by factorizing + /// out common terms (eg "(A*B)+(A*C)" -> "A*(B+C)") or expanding out if this + /// results in simplifications (eg: "A & (B | C) -> (A&B) | (A&C)" if this is + /// a win). Returns the simplified value, or null if it didn't simplify. + Value *SimplifyUsingDistributiveLaws(BinaryOperator &I); + + /// SimplifyDemandedUseBits - Attempts to replace V with a simpler value + /// based on the demanded bits. + Value *SimplifyDemandedUseBits(Value *V, APInt DemandedMask, + APInt& KnownZero, APInt& KnownOne, + unsigned Depth); + bool SimplifyDemandedBits(Use &U, APInt DemandedMask, + APInt& KnownZero, APInt& KnownOne, + unsigned Depth=0); + /// Helper routine of SimplifyDemandedUseBits. It tries to simplify demanded + /// bit for "r1 = shr x, c1; r2 = shl r1, c2" instruction sequence. + Value *SimplifyShrShlDemandedBits(Instruction *Lsr, Instruction *Sftl, + APInt DemandedMask, APInt &KnownZero, + APInt &KnownOne); + + /// SimplifyDemandedInstructionBits - Inst is an integer instruction that + /// SimplifyDemandedBits knows about. See if the instruction has any + /// properties that allow us to simplify its operands. + bool SimplifyDemandedInstructionBits(Instruction &Inst); + + Value *SimplifyDemandedVectorElts(Value *V, APInt DemandedElts, + APInt& UndefElts, unsigned Depth = 0); + + // FoldOpIntoPhi - Given a binary operator, cast instruction, or select + // which has a PHI node as operand #0, see if we can fold the instruction + // into the PHI (which is only possible if all operands to the PHI are + // constants). + // + Instruction *FoldOpIntoPhi(Instruction &I); + + // FoldPHIArgOpIntoPHI - If all operands to a PHI node are the same "unary" + // operator and they all are only used by the PHI, PHI together their + // inputs, and do the operation once, to the result of the PHI. + Instruction *FoldPHIArgOpIntoPHI(PHINode &PN); + Instruction *FoldPHIArgBinOpIntoPHI(PHINode &PN); + Instruction *FoldPHIArgGEPIntoPHI(PHINode &PN); + Instruction *FoldPHIArgLoadIntoPHI(PHINode &PN); + + + Instruction *OptAndOp(Instruction *Op, ConstantInt *OpRHS, + ConstantInt *AndRHS, BinaryOperator &TheAnd); + + Value *FoldLogicalPlusAnd(Value *LHS, Value *RHS, ConstantInt *Mask, + bool isSub, Instruction &I); + Value *InsertRangeTest(Value *V, Constant *Lo, Constant *Hi, + bool isSigned, bool Inside); + Instruction *PromoteCastOfAllocation(BitCastInst &CI, AllocaInst &AI); + Instruction *MatchBSwap(BinaryOperator &I); + bool SimplifyStoreAtEndOfBlock(StoreInst &SI); + Instruction *SimplifyMemTransfer(MemIntrinsic *MI); + Instruction *SimplifyMemSet(MemSetInst *MI); + + + Value *EvaluateInDifferentType(Value *V, Type *Ty, bool isSigned); + + /// Descale - Return a value X such that Val = X * Scale, or null if none. If + /// the multiplication is known not to overflow then NoSignedWrap is set. + Value *Descale(Value *Val, APInt Scale, bool &NoSignedWrap); +}; + + + +} // end namespace llvm. + +#endif diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp new file mode 100644 index 000000000000..534feb8fad21 --- /dev/null +++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp @@ -0,0 +1,1560 @@ +//===- InstCombineAddSub.cpp ----------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the visit functions for add, fadd, sub, and fsub. +// +//===----------------------------------------------------------------------===// + +#include "InstCombine.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/Analysis/InstructionSimplify.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/Support/GetElementPtrTypeIterator.h" +#include "llvm/Support/PatternMatch.h" +using namespace llvm; +using namespace PatternMatch; + +namespace { + + /// Class representing coefficient of floating-point addend. + /// This class needs to be highly efficient, which is especially true for + /// the constructor. As of I write this comment, the cost of the default + /// constructor is merely 4-byte-store-zero (Assuming compiler is able to + /// perform write-merging). + /// + class FAddendCoef { + public: + // The constructor has to initialize a APFloat, which is uncessary for + // most addends which have coefficient either 1 or -1. So, the constructor + // is expensive. In order to avoid the cost of the constructor, we should + // reuse some instances whenever possible. The pre-created instances + // FAddCombine::Add[0-5] embodies this idea. + // + FAddendCoef() : IsFp(false), BufHasFpVal(false), IntVal(0) {} + ~FAddendCoef(); + + void set(short C) { + assert(!insaneIntVal(C) && "Insane coefficient"); + IsFp = false; IntVal = C; + } + + void set(const APFloat& C); + + void negate(); + + bool isZero() const { return isInt() ? !IntVal : getFpVal().isZero(); } + Value *getValue(Type *) const; + + // If possible, don't define operator+/operator- etc because these + // operators inevitably call FAddendCoef's constructor which is not cheap. + void operator=(const FAddendCoef &A); + void operator+=(const FAddendCoef &A); + void operator-=(const FAddendCoef &A); + void operator*=(const FAddendCoef &S); + + bool isOne() const { return isInt() && IntVal == 1; } + bool isTwo() const { return isInt() && IntVal == 2; } + bool isMinusOne() const { return isInt() && IntVal == -1; } + bool isMinusTwo() const { return isInt() && IntVal == -2; } + + private: + bool insaneIntVal(int V) { return V > 4 || V < -4; } + APFloat *getFpValPtr(void) + { return reinterpret_cast<APFloat*>(&FpValBuf.buffer[0]); } + const APFloat *getFpValPtr(void) const + { return reinterpret_cast<const APFloat*>(&FpValBuf.buffer[0]); } + + const APFloat &getFpVal(void) const { + assert(IsFp && BufHasFpVal && "Incorret state"); + return *getFpValPtr(); + } + + APFloat &getFpVal(void) { + assert(IsFp && BufHasFpVal && "Incorret state"); + return *getFpValPtr(); + } + + bool isInt() const { return !IsFp; } + + // If the coefficient is represented by an integer, promote it to a + // floating point. + void convertToFpType(const fltSemantics &Sem); + + // Construct an APFloat from a signed integer. + // TODO: We should get rid of this function when APFloat can be constructed + // from an *SIGNED* integer. + APFloat createAPFloatFromInt(const fltSemantics &Sem, int Val); + private: + + bool IsFp; + + // True iff FpValBuf contains an instance of APFloat. + bool BufHasFpVal; + + // The integer coefficient of an individual addend is either 1 or -1, + // and we try to simplify at most 4 addends from neighboring at most + // two instructions. So the range of <IntVal> falls in [-4, 4]. APInt + // is overkill of this end. + short IntVal; + + AlignedCharArrayUnion<APFloat> FpValBuf; + }; + + /// FAddend is used to represent floating-point addend. An addend is + /// represented as <C, V>, where the V is a symbolic value, and C is a + /// constant coefficient. A constant addend is represented as <C, 0>. + /// + class FAddend { + public: + FAddend() { Val = 0; } + + Value *getSymVal (void) const { return Val; } + const FAddendCoef &getCoef(void) const { return Coeff; } + + bool isConstant() const { return Val == 0; } + bool isZero() const { return Coeff.isZero(); } + + void set(short Coefficient, Value *V) { Coeff.set(Coefficient), Val = V; } + void set(const APFloat& Coefficient, Value *V) + { Coeff.set(Coefficient); Val = V; } + void set(const ConstantFP* Coefficient, Value *V) + { Coeff.set(Coefficient->getValueAPF()); Val = V; } + + void negate() { Coeff.negate(); } + + /// Drill down the U-D chain one step to find the definition of V, and + /// try to break the definition into one or two addends. + static unsigned drillValueDownOneStep(Value* V, FAddend &A0, FAddend &A1); + + /// Similar to FAddend::drillDownOneStep() except that the value being + /// splitted is the addend itself. + unsigned drillAddendDownOneStep(FAddend &Addend0, FAddend &Addend1) const; + + void operator+=(const FAddend &T) { + assert((Val == T.Val) && "Symbolic-values disagree"); + Coeff += T.Coeff; + } + + private: + void Scale(const FAddendCoef& ScaleAmt) { Coeff *= ScaleAmt; } + + // This addend has the value of "Coeff * Val". + Value *Val; + FAddendCoef Coeff; + }; + + /// FAddCombine is the class for optimizing an unsafe fadd/fsub along + /// with its neighboring at most two instructions. + /// + class FAddCombine { + public: + FAddCombine(InstCombiner::BuilderTy *B) : Builder(B), Instr(0) {} + Value *simplify(Instruction *FAdd); + + private: + typedef SmallVector<const FAddend*, 4> AddendVect; + + Value *simplifyFAdd(AddendVect& V, unsigned InstrQuota); + + Value *performFactorization(Instruction *I); + + /// Convert given addend to a Value + Value *createAddendVal(const FAddend &A, bool& NeedNeg); + + /// Return the number of instructions needed to emit the N-ary addition. + unsigned calcInstrNumber(const AddendVect& Vect); + Value *createFSub(Value *Opnd0, Value *Opnd1); + Value *createFAdd(Value *Opnd0, Value *Opnd1); + Value *createFMul(Value *Opnd0, Value *Opnd1); + Value *createFDiv(Value *Opnd0, Value *Opnd1); + Value *createFNeg(Value *V); + Value *createNaryFAdd(const AddendVect& Opnds, unsigned InstrQuota); + void createInstPostProc(Instruction *NewInst); + + InstCombiner::BuilderTy *Builder; + Instruction *Instr; + + private: + // Debugging stuff are clustered here. + #ifndef NDEBUG + unsigned CreateInstrNum; + void initCreateInstNum() { CreateInstrNum = 0; } + void incCreateInstNum() { CreateInstrNum++; } + #else + void initCreateInstNum() {} + void incCreateInstNum() {} + #endif + }; +} + +//===----------------------------------------------------------------------===// +// +// Implementation of +// {FAddendCoef, FAddend, FAddition, FAddCombine}. +// +//===----------------------------------------------------------------------===// +FAddendCoef::~FAddendCoef() { + if (BufHasFpVal) + getFpValPtr()->~APFloat(); +} + +void FAddendCoef::set(const APFloat& C) { + APFloat *P = getFpValPtr(); + + if (isInt()) { + // As the buffer is meanless byte stream, we cannot call + // APFloat::operator=(). + new(P) APFloat(C); + } else + *P = C; + + IsFp = BufHasFpVal = true; +} + +void FAddendCoef::convertToFpType(const fltSemantics &Sem) { + if (!isInt()) + return; + + APFloat *P = getFpValPtr(); + if (IntVal > 0) + new(P) APFloat(Sem, IntVal); + else { + new(P) APFloat(Sem, 0 - IntVal); + P->changeSign(); + } + IsFp = BufHasFpVal = true; +} + +APFloat FAddendCoef::createAPFloatFromInt(const fltSemantics &Sem, int Val) { + if (Val >= 0) + return APFloat(Sem, Val); + + APFloat T(Sem, 0 - Val); + T.changeSign(); + + return T; +} + +void FAddendCoef::operator=(const FAddendCoef &That) { + if (That.isInt()) + set(That.IntVal); + else + set(That.getFpVal()); +} + +void FAddendCoef::operator+=(const FAddendCoef &That) { + enum APFloat::roundingMode RndMode = APFloat::rmNearestTiesToEven; + if (isInt() == That.isInt()) { + if (isInt()) + IntVal += That.IntVal; + else + getFpVal().add(That.getFpVal(), RndMode); + return; + } + + if (isInt()) { + const APFloat &T = That.getFpVal(); + convertToFpType(T.getSemantics()); + getFpVal().add(T, RndMode); + return; + } + + APFloat &T = getFpVal(); + T.add(createAPFloatFromInt(T.getSemantics(), That.IntVal), RndMode); +} + +void FAddendCoef::operator-=(const FAddendCoef &That) { + enum APFloat::roundingMode RndMode = APFloat::rmNearestTiesToEven; + if (isInt() == That.isInt()) { + if (isInt()) + IntVal -= That.IntVal; + else + getFpVal().subtract(That.getFpVal(), RndMode); + return; + } + + if (isInt()) { + const APFloat &T = That.getFpVal(); + convertToFpType(T.getSemantics()); + getFpVal().subtract(T, RndMode); + return; + } + + APFloat &T = getFpVal(); + T.subtract(createAPFloatFromInt(T.getSemantics(), IntVal), RndMode); +} + +void FAddendCoef::operator*=(const FAddendCoef &That) { + if (That.isOne()) + return; + + if (That.isMinusOne()) { + negate(); + return; + } + + if (isInt() && That.isInt()) { + int Res = IntVal * (int)That.IntVal; + assert(!insaneIntVal(Res) && "Insane int value"); + IntVal = Res; + return; + } + + const fltSemantics &Semantic = + isInt() ? That.getFpVal().getSemantics() : getFpVal().getSemantics(); + + if (isInt()) + convertToFpType(Semantic); + APFloat &F0 = getFpVal(); + + if (That.isInt()) + F0.multiply(createAPFloatFromInt(Semantic, That.IntVal), + APFloat::rmNearestTiesToEven); + else + F0.multiply(That.getFpVal(), APFloat::rmNearestTiesToEven); + + return; +} + +void FAddendCoef::negate() { + if (isInt()) + IntVal = 0 - IntVal; + else + getFpVal().changeSign(); +} + +Value *FAddendCoef::getValue(Type *Ty) const { + return isInt() ? + ConstantFP::get(Ty, float(IntVal)) : + ConstantFP::get(Ty->getContext(), getFpVal()); +} + +// The definition of <Val> Addends +// ========================================= +// A + B <1, A>, <1,B> +// A - B <1, A>, <1,B> +// 0 - B <-1, B> +// C * A, <C, A> +// A + C <1, A> <C, NULL> +// 0 +/- 0 <0, NULL> (corner case) +// +// Legend: A and B are not constant, C is constant +// +unsigned FAddend::drillValueDownOneStep + (Value *Val, FAddend &Addend0, FAddend &Addend1) { + Instruction *I = 0; + if (Val == 0 || !(I = dyn_cast<Instruction>(Val))) + return 0; + + unsigned Opcode = I->getOpcode(); + + if (Opcode == Instruction::FAdd || Opcode == Instruction::FSub) { + ConstantFP *C0, *C1; + Value *Opnd0 = I->getOperand(0); + Value *Opnd1 = I->getOperand(1); + if ((C0 = dyn_cast<ConstantFP>(Opnd0)) && C0->isZero()) + Opnd0 = 0; + + if ((C1 = dyn_cast<ConstantFP>(Opnd1)) && C1->isZero()) + Opnd1 = 0; + + if (Opnd0) { + if (!C0) + Addend0.set(1, Opnd0); + else + Addend0.set(C0, 0); + } + + if (Opnd1) { + FAddend &Addend = Opnd0 ? Addend1 : Addend0; + if (!C1) + Addend.set(1, Opnd1); + else + Addend.set(C1, 0); + if (Opcode == Instruction::FSub) + Addend.negate(); + } + + if (Opnd0 || Opnd1) + return Opnd0 && Opnd1 ? 2 : 1; + + // Both operands are zero. Weird! + Addend0.set(APFloat(C0->getValueAPF().getSemantics()), 0); + return 1; + } + + if (I->getOpcode() == Instruction::FMul) { + Value *V0 = I->getOperand(0); + Value *V1 = I->getOperand(1); + if (ConstantFP *C = dyn_cast<ConstantFP>(V0)) { + Addend0.set(C, V1); + return 1; + } + + if (ConstantFP *C = dyn_cast<ConstantFP>(V1)) { + Addend0.set(C, V0); + return 1; + } + } + + return 0; +} + +// Try to break *this* addend into two addends. e.g. Suppose this addend is +// <2.3, V>, and V = X + Y, by calling this function, we obtain two addends, +// i.e. <2.3, X> and <2.3, Y>. +// +unsigned FAddend::drillAddendDownOneStep + (FAddend &Addend0, FAddend &Addend1) const { + if (isConstant()) + return 0; + + unsigned BreakNum = FAddend::drillValueDownOneStep(Val, Addend0, Addend1); + if (!BreakNum || Coeff.isOne()) + return BreakNum; + + Addend0.Scale(Coeff); + + if (BreakNum == 2) + Addend1.Scale(Coeff); + + return BreakNum; +} + +// Try to perform following optimization on the input instruction I. Return the +// simplified expression if was successful; otherwise, return 0. +// +// Instruction "I" is Simplified into +// ------------------------------------------------------- +// (x * y) +/- (x * z) x * (y +/- z) +// (y / x) +/- (z / x) (y +/- z) / x +// +Value *FAddCombine::performFactorization(Instruction *I) { + assert((I->getOpcode() == Instruction::FAdd || + I->getOpcode() == Instruction::FSub) && "Expect add/sub"); + + Instruction *I0 = dyn_cast<Instruction>(I->getOperand(0)); + Instruction *I1 = dyn_cast<Instruction>(I->getOperand(1)); + + if (!I0 || !I1 || I0->getOpcode() != I1->getOpcode()) + return 0; + + bool isMpy = false; + if (I0->getOpcode() == Instruction::FMul) + isMpy = true; + else if (I0->getOpcode() != Instruction::FDiv) + return 0; + + Value *Opnd0_0 = I0->getOperand(0); + Value *Opnd0_1 = I0->getOperand(1); + Value *Opnd1_0 = I1->getOperand(0); + Value *Opnd1_1 = I1->getOperand(1); + + // Input Instr I Factor AddSub0 AddSub1 + // ---------------------------------------------- + // (x*y) +/- (x*z) x y z + // (y/x) +/- (z/x) x y z + // + Value *Factor = 0; + Value *AddSub0 = 0, *AddSub1 = 0; + + if (isMpy) { + if (Opnd0_0 == Opnd1_0 || Opnd0_0 == Opnd1_1) + Factor = Opnd0_0; + else if (Opnd0_1 == Opnd1_0 || Opnd0_1 == Opnd1_1) + Factor = Opnd0_1; + + if (Factor) { + AddSub0 = (Factor == Opnd0_0) ? Opnd0_1 : Opnd0_0; + AddSub1 = (Factor == Opnd1_0) ? Opnd1_1 : Opnd1_0; + } + } else if (Opnd0_1 == Opnd1_1) { + Factor = Opnd0_1; + AddSub0 = Opnd0_0; + AddSub1 = Opnd1_0; + } + + if (!Factor) + return 0; + + // Create expression "NewAddSub = AddSub0 +/- AddsSub1" + Value *NewAddSub = (I->getOpcode() == Instruction::FAdd) ? + createFAdd(AddSub0, AddSub1) : + createFSub(AddSub0, AddSub1); + if (ConstantFP *CFP = dyn_cast<ConstantFP>(NewAddSub)) { + const APFloat &F = CFP->getValueAPF(); + if (!F.isNormal()) + return 0; + } + + if (isMpy) + return createFMul(Factor, NewAddSub); + + return createFDiv(NewAddSub, Factor); +} + +Value *FAddCombine::simplify(Instruction *I) { + assert(I->hasUnsafeAlgebra() && "Should be in unsafe mode"); + + // Currently we are not able to handle vector type. + if (I->getType()->isVectorTy()) + return 0; + + assert((I->getOpcode() == Instruction::FAdd || + I->getOpcode() == Instruction::FSub) && "Expect add/sub"); + + // Save the instruction before calling other member-functions. + Instr = I; + + FAddend Opnd0, Opnd1, Opnd0_0, Opnd0_1, Opnd1_0, Opnd1_1; + + unsigned OpndNum = FAddend::drillValueDownOneStep(I, Opnd0, Opnd1); + + // Step 1: Expand the 1st addend into Opnd0_0 and Opnd0_1. + unsigned Opnd0_ExpNum = 0; + unsigned Opnd1_ExpNum = 0; + + if (!Opnd0.isConstant()) + Opnd0_ExpNum = Opnd0.drillAddendDownOneStep(Opnd0_0, Opnd0_1); + + // Step 2: Expand the 2nd addend into Opnd1_0 and Opnd1_1. + if (OpndNum == 2 && !Opnd1.isConstant()) + Opnd1_ExpNum = Opnd1.drillAddendDownOneStep(Opnd1_0, Opnd1_1); + + // Step 3: Try to optimize Opnd0_0 + Opnd0_1 + Opnd1_0 + Opnd1_1 + if (Opnd0_ExpNum && Opnd1_ExpNum) { + AddendVect AllOpnds; + AllOpnds.push_back(&Opnd0_0); + AllOpnds.push_back(&Opnd1_0); + if (Opnd0_ExpNum == 2) + AllOpnds.push_back(&Opnd0_1); + if (Opnd1_ExpNum == 2) + AllOpnds.push_back(&Opnd1_1); + + // Compute instruction quota. We should save at least one instruction. + unsigned InstQuota = 0; + + Value *V0 = I->getOperand(0); + Value *V1 = I->getOperand(1); + InstQuota = ((!isa<Constant>(V0) && V0->hasOneUse()) && + (!isa<Constant>(V1) && V1->hasOneUse())) ? 2 : 1; + + if (Value *R = simplifyFAdd(AllOpnds, InstQuota)) + return R; + } + + if (OpndNum != 2) { + // The input instruction is : "I=0.0 +/- V". If the "V" were able to be + // splitted into two addends, say "V = X - Y", the instruction would have + // been optimized into "I = Y - X" in the previous steps. + // + const FAddendCoef &CE = Opnd0.getCoef(); + return CE.isOne() ? Opnd0.getSymVal() : 0; + } + + // step 4: Try to optimize Opnd0 + Opnd1_0 [+ Opnd1_1] + if (Opnd1_ExpNum) { + AddendVect AllOpnds; + AllOpnds.push_back(&Opnd0); + AllOpnds.push_back(&Opnd1_0); + if (Opnd1_ExpNum == 2) + AllOpnds.push_back(&Opnd1_1); + + if (Value *R = simplifyFAdd(AllOpnds, 1)) + return R; + } + + // step 5: Try to optimize Opnd1 + Opnd0_0 [+ Opnd0_1] + if (Opnd0_ExpNum) { + AddendVect AllOpnds; + AllOpnds.push_back(&Opnd1); + AllOpnds.push_back(&Opnd0_0); + if (Opnd0_ExpNum == 2) + AllOpnds.push_back(&Opnd0_1); + + if (Value *R = simplifyFAdd(AllOpnds, 1)) + return R; + } + + // step 6: Try factorization as the last resort, + return performFactorization(I); +} + +Value *FAddCombine::simplifyFAdd(AddendVect& Addends, unsigned InstrQuota) { + + unsigned AddendNum = Addends.size(); + assert(AddendNum <= 4 && "Too many addends"); + + // For saving intermediate results; + unsigned NextTmpIdx = 0; + FAddend TmpResult[3]; + + // Points to the constant addend of the resulting simplified expression. + // If the resulting expr has constant-addend, this constant-addend is + // desirable to reside at the top of the resulting expression tree. Placing + // constant close to supper-expr(s) will potentially reveal some optimization + // opportunities in super-expr(s). + // + const FAddend *ConstAdd = 0; + + // Simplified addends are placed <SimpVect>. + AddendVect SimpVect; + + // The outer loop works on one symbolic-value at a time. Suppose the input + // addends are : <a1, x>, <b1, y>, <a2, x>, <c1, z>, <b2, y>, ... + // The symbolic-values will be processed in this order: x, y, z. + // + for (unsigned SymIdx = 0; SymIdx < AddendNum; SymIdx++) { + + const FAddend *ThisAddend = Addends[SymIdx]; + if (!ThisAddend) { + // This addend was processed before. + continue; + } + + Value *Val = ThisAddend->getSymVal(); + unsigned StartIdx = SimpVect.size(); + SimpVect.push_back(ThisAddend); + + // The inner loop collects addends sharing same symbolic-value, and these + // addends will be later on folded into a single addend. Following above + // example, if the symbolic value "y" is being processed, the inner loop + // will collect two addends "<b1,y>" and "<b2,Y>". These two addends will + // be later on folded into "<b1+b2, y>". + // + for (unsigned SameSymIdx = SymIdx + 1; + SameSymIdx < AddendNum; SameSymIdx++) { + const FAddend *T = Addends[SameSymIdx]; + if (T && T->getSymVal() == Val) { + // Set null such that next iteration of the outer loop will not process + // this addend again. + Addends[SameSymIdx] = 0; + SimpVect.push_back(T); + } + } + + // If multiple addends share same symbolic value, fold them together. + if (StartIdx + 1 != SimpVect.size()) { + FAddend &R = TmpResult[NextTmpIdx ++]; + R = *SimpVect[StartIdx]; + for (unsigned Idx = StartIdx + 1; Idx < SimpVect.size(); Idx++) + R += *SimpVect[Idx]; + + // Pop all addends being folded and push the resulting folded addend. + SimpVect.resize(StartIdx); + if (Val != 0) { + if (!R.isZero()) { + SimpVect.push_back(&R); + } + } else { + // Don't push constant addend at this time. It will be the last element + // of <SimpVect>. + ConstAdd = &R; + } + } + } + + assert((NextTmpIdx <= array_lengthof(TmpResult) + 1) && + "out-of-bound access"); + + if (ConstAdd) + SimpVect.push_back(ConstAdd); + + Value *Result; + if (!SimpVect.empty()) + Result = createNaryFAdd(SimpVect, InstrQuota); + else { + // The addition is folded to 0.0. + Result = ConstantFP::get(Instr->getType(), 0.0); + } + + return Result; +} + +Value *FAddCombine::createNaryFAdd + (const AddendVect &Opnds, unsigned InstrQuota) { + assert(!Opnds.empty() && "Expect at least one addend"); + + // Step 1: Check if the # of instructions needed exceeds the quota. + // + unsigned InstrNeeded = calcInstrNumber(Opnds); + if (InstrNeeded > InstrQuota) + return 0; + + initCreateInstNum(); + + // step 2: Emit the N-ary addition. + // Note that at most three instructions are involved in Fadd-InstCombine: the + // addition in question, and at most two neighboring instructions. + // The resulting optimized addition should have at least one less instruction + // than the original addition expression tree. This implies that the resulting + // N-ary addition has at most two instructions, and we don't need to worry + // about tree-height when constructing the N-ary addition. + + Value *LastVal = 0; + bool LastValNeedNeg = false; + + // Iterate the addends, creating fadd/fsub using adjacent two addends. + for (AddendVect::const_iterator I = Opnds.begin(), E = Opnds.end(); + I != E; I++) { + bool NeedNeg; + Value *V = createAddendVal(**I, NeedNeg); + if (!LastVal) { + LastVal = V; + LastValNeedNeg = NeedNeg; + continue; + } + + if (LastValNeedNeg == NeedNeg) { + LastVal = createFAdd(LastVal, V); + continue; + } + + if (LastValNeedNeg) + LastVal = createFSub(V, LastVal); + else + LastVal = createFSub(LastVal, V); + + LastValNeedNeg = false; + } + + if (LastValNeedNeg) { + LastVal = createFNeg(LastVal); + } + + #ifndef NDEBUG + assert(CreateInstrNum == InstrNeeded && + "Inconsistent in instruction numbers"); + #endif + + return LastVal; +} + +Value *FAddCombine::createFSub + (Value *Opnd0, Value *Opnd1) { + Value *V = Builder->CreateFSub(Opnd0, Opnd1); + if (Instruction *I = dyn_cast<Instruction>(V)) + createInstPostProc(I); + return V; +} + +Value *FAddCombine::createFNeg(Value *V) { + Value *Zero = cast<Value>(ConstantFP::get(V->getType(), 0.0)); + return createFSub(Zero, V); +} + +Value *FAddCombine::createFAdd + (Value *Opnd0, Value *Opnd1) { + Value *V = Builder->CreateFAdd(Opnd0, Opnd1); + if (Instruction *I = dyn_cast<Instruction>(V)) + createInstPostProc(I); + return V; +} + +Value *FAddCombine::createFMul(Value *Opnd0, Value *Opnd1) { + Value *V = Builder->CreateFMul(Opnd0, Opnd1); + if (Instruction *I = dyn_cast<Instruction>(V)) + createInstPostProc(I); + return V; +} + +Value *FAddCombine::createFDiv(Value *Opnd0, Value *Opnd1) { + Value *V = Builder->CreateFDiv(Opnd0, Opnd1); + if (Instruction *I = dyn_cast<Instruction>(V)) + createInstPostProc(I); + return V; +} + +void FAddCombine::createInstPostProc(Instruction *NewInstr) { + NewInstr->setDebugLoc(Instr->getDebugLoc()); + + // Keep track of the number of instruction created. + incCreateInstNum(); + + // Propagate fast-math flags + NewInstr->setFastMathFlags(Instr->getFastMathFlags()); +} + +// Return the number of instruction needed to emit the N-ary addition. +// NOTE: Keep this function in sync with createAddendVal(). +unsigned FAddCombine::calcInstrNumber(const AddendVect &Opnds) { + unsigned OpndNum = Opnds.size(); + unsigned InstrNeeded = OpndNum - 1; + + // The number of addends in the form of "(-1)*x". + unsigned NegOpndNum = 0; + + // Adjust the number of instructions needed to emit the N-ary add. + for (AddendVect::const_iterator I = Opnds.begin(), E = Opnds.end(); + I != E; I++) { + const FAddend *Opnd = *I; + if (Opnd->isConstant()) + continue; + + const FAddendCoef &CE = Opnd->getCoef(); + if (CE.isMinusOne() || CE.isMinusTwo()) + NegOpndNum++; + + // Let the addend be "c * x". If "c == +/-1", the value of the addend + // is immediately available; otherwise, it needs exactly one instruction + // to evaluate the value. + if (!CE.isMinusOne() && !CE.isOne()) + InstrNeeded++; + } + if (NegOpndNum == OpndNum) + InstrNeeded++; + return InstrNeeded; +} + +// Input Addend Value NeedNeg(output) +// ================================================================ +// Constant C C false +// <+/-1, V> V coefficient is -1 +// <2/-2, V> "fadd V, V" coefficient is -2 +// <C, V> "fmul V, C" false +// +// NOTE: Keep this function in sync with FAddCombine::calcInstrNumber. +Value *FAddCombine::createAddendVal + (const FAddend &Opnd, bool &NeedNeg) { + const FAddendCoef &Coeff = Opnd.getCoef(); + + if (Opnd.isConstant()) { + NeedNeg = false; + return Coeff.getValue(Instr->getType()); + } + + Value *OpndVal = Opnd.getSymVal(); + + if (Coeff.isMinusOne() || Coeff.isOne()) { + NeedNeg = Coeff.isMinusOne(); + return OpndVal; + } + + if (Coeff.isTwo() || Coeff.isMinusTwo()) { + NeedNeg = Coeff.isMinusTwo(); + return createFAdd(OpndVal, OpndVal); + } + + NeedNeg = false; + return createFMul(OpndVal, Coeff.getValue(Instr->getType())); +} + +/// AddOne - Add one to a ConstantInt. +static Constant *AddOne(Constant *C) { + return ConstantExpr::getAdd(C, ConstantInt::get(C->getType(), 1)); +} + +/// SubOne - Subtract one from a ConstantInt. +static Constant *SubOne(ConstantInt *C) { + return ConstantInt::get(C->getContext(), C->getValue()-1); +} + + +// dyn_castFoldableMul - If this value is a multiply that can be folded into +// other computations (because it has a constant operand), return the +// non-constant operand of the multiply, and set CST to point to the multiplier. +// Otherwise, return null. +// +static inline Value *dyn_castFoldableMul(Value *V, ConstantInt *&CST) { + if (!V->hasOneUse() || !V->getType()->isIntegerTy()) + return 0; + + Instruction *I = dyn_cast<Instruction>(V); + if (I == 0) return 0; + + if (I->getOpcode() == Instruction::Mul) + if ((CST = dyn_cast<ConstantInt>(I->getOperand(1)))) + return I->getOperand(0); + if (I->getOpcode() == Instruction::Shl) + if ((CST = dyn_cast<ConstantInt>(I->getOperand(1)))) { + // The multiplier is really 1 << CST. + uint32_t BitWidth = cast<IntegerType>(V->getType())->getBitWidth(); + uint32_t CSTVal = CST->getLimitedValue(BitWidth); + CST = ConstantInt::get(V->getType()->getContext(), + APInt::getOneBitSet(BitWidth, CSTVal)); + return I->getOperand(0); + } + return 0; +} + + +/// WillNotOverflowSignedAdd - Return true if we can prove that: +/// (sext (add LHS, RHS)) === (add (sext LHS), (sext RHS)) +/// This basically requires proving that the add in the original type would not +/// overflow to change the sign bit or have a carry out. +bool InstCombiner::WillNotOverflowSignedAdd(Value *LHS, Value *RHS) { + // There are different heuristics we can use for this. Here are some simple + // ones. + + // Add has the property that adding any two 2's complement numbers can only + // have one carry bit which can change a sign. As such, if LHS and RHS each + // have at least two sign bits, we know that the addition of the two values + // will sign extend fine. + if (ComputeNumSignBits(LHS) > 1 && ComputeNumSignBits(RHS) > 1) + return true; + + + // If one of the operands only has one non-zero bit, and if the other operand + // has a known-zero bit in a more significant place than it (not including the + // sign bit) the ripple may go up to and fill the zero, but won't change the + // sign. For example, (X & ~4) + 1. + + // TODO: Implement. + + return false; +} + +Instruction *InstCombiner::visitAdd(BinaryOperator &I) { + bool Changed = SimplifyAssociativeOrCommutative(I); + Value *LHS = I.getOperand(0), *RHS = I.getOperand(1); + + if (Value *V = SimplifyAddInst(LHS, RHS, I.hasNoSignedWrap(), + I.hasNoUnsignedWrap(), TD)) + return ReplaceInstUsesWith(I, V); + + // (A*B)+(A*C) -> A*(B+C) etc + if (Value *V = SimplifyUsingDistributiveLaws(I)) + return ReplaceInstUsesWith(I, V); + + if (ConstantInt *CI = dyn_cast<ConstantInt>(RHS)) { + // X + (signbit) --> X ^ signbit + const APInt &Val = CI->getValue(); + if (Val.isSignBit()) + return BinaryOperator::CreateXor(LHS, RHS); + + // See if SimplifyDemandedBits can simplify this. This handles stuff like + // (X & 254)+1 -> (X&254)|1 + if (SimplifyDemandedInstructionBits(I)) + return &I; + + // zext(bool) + C -> bool ? C + 1 : C + if (ZExtInst *ZI = dyn_cast<ZExtInst>(LHS)) + if (ZI->getSrcTy()->isIntegerTy(1)) + return SelectInst::Create(ZI->getOperand(0), AddOne(CI), CI); + + Value *XorLHS = 0; ConstantInt *XorRHS = 0; + if (match(LHS, m_Xor(m_Value(XorLHS), m_ConstantInt(XorRHS)))) { + uint32_t TySizeBits = I.getType()->getScalarSizeInBits(); + const APInt &RHSVal = CI->getValue(); + unsigned ExtendAmt = 0; + // If we have ADD(XOR(AND(X, 0xFF), 0x80), 0xF..F80), it's a sext. + // If we have ADD(XOR(AND(X, 0xFF), 0xF..F80), 0x80), it's a sext. + if (XorRHS->getValue() == -RHSVal) { + if (RHSVal.isPowerOf2()) + ExtendAmt = TySizeBits - RHSVal.logBase2() - 1; + else if (XorRHS->getValue().isPowerOf2()) + ExtendAmt = TySizeBits - XorRHS->getValue().logBase2() - 1; + } + + if (ExtendAmt) { + APInt Mask = APInt::getHighBitsSet(TySizeBits, ExtendAmt); + if (!MaskedValueIsZero(XorLHS, Mask)) + ExtendAmt = 0; + } + + if (ExtendAmt) { + Constant *ShAmt = ConstantInt::get(I.getType(), ExtendAmt); + Value *NewShl = Builder->CreateShl(XorLHS, ShAmt, "sext"); + return BinaryOperator::CreateAShr(NewShl, ShAmt); + } + + // If this is a xor that was canonicalized from a sub, turn it back into + // a sub and fuse this add with it. + if (LHS->hasOneUse() && (XorRHS->getValue()+1).isPowerOf2()) { + IntegerType *IT = cast<IntegerType>(I.getType()); + APInt LHSKnownOne(IT->getBitWidth(), 0); + APInt LHSKnownZero(IT->getBitWidth(), 0); + ComputeMaskedBits(XorLHS, LHSKnownZero, LHSKnownOne); + if ((XorRHS->getValue() | LHSKnownZero).isAllOnesValue()) + return BinaryOperator::CreateSub(ConstantExpr::getAdd(XorRHS, CI), + XorLHS); + } + // (X + signbit) + C could have gotten canonicalized to (X ^ signbit) + C, + // transform them into (X + (signbit ^ C)) + if (XorRHS->getValue().isSignBit()) + return BinaryOperator::CreateAdd(XorLHS, + ConstantExpr::getXor(XorRHS, CI)); + } + } + + if (isa<Constant>(RHS) && isa<PHINode>(LHS)) + if (Instruction *NV = FoldOpIntoPhi(I)) + return NV; + + if (I.getType()->isIntegerTy(1)) + return BinaryOperator::CreateXor(LHS, RHS); + + // X + X --> X << 1 + if (LHS == RHS) { + BinaryOperator *New = + BinaryOperator::CreateShl(LHS, ConstantInt::get(I.getType(), 1)); + New->setHasNoSignedWrap(I.hasNoSignedWrap()); + New->setHasNoUnsignedWrap(I.hasNoUnsignedWrap()); + return New; + } + + // -A + B --> B - A + // -A + -B --> -(A + B) + if (Value *LHSV = dyn_castNegVal(LHS)) { + if (!isa<Constant>(RHS)) + if (Value *RHSV = dyn_castNegVal(RHS)) { + Value *NewAdd = Builder->CreateAdd(LHSV, RHSV, "sum"); + return BinaryOperator::CreateNeg(NewAdd); + } + + return BinaryOperator::CreateSub(RHS, LHSV); + } + + // A + -B --> A - B + if (!isa<Constant>(RHS)) + if (Value *V = dyn_castNegVal(RHS)) + return BinaryOperator::CreateSub(LHS, V); + + + ConstantInt *C2; + if (Value *X = dyn_castFoldableMul(LHS, C2)) { + if (X == RHS) // X*C + X --> X * (C+1) + return BinaryOperator::CreateMul(RHS, AddOne(C2)); + + // X*C1 + X*C2 --> X * (C1+C2) + ConstantInt *C1; + if (X == dyn_castFoldableMul(RHS, C1)) + return BinaryOperator::CreateMul(X, ConstantExpr::getAdd(C1, C2)); + } + + // X + X*C --> X * (C+1) + if (dyn_castFoldableMul(RHS, C2) == LHS) + return BinaryOperator::CreateMul(LHS, AddOne(C2)); + + // A+B --> A|B iff A and B have no bits set in common. + if (IntegerType *IT = dyn_cast<IntegerType>(I.getType())) { + APInt LHSKnownOne(IT->getBitWidth(), 0); + APInt LHSKnownZero(IT->getBitWidth(), 0); + ComputeMaskedBits(LHS, LHSKnownZero, LHSKnownOne); + if (LHSKnownZero != 0) { + APInt RHSKnownOne(IT->getBitWidth(), 0); + APInt RHSKnownZero(IT->getBitWidth(), 0); + ComputeMaskedBits(RHS, RHSKnownZero, RHSKnownOne); + + // No bits in common -> bitwise or. + if ((LHSKnownZero|RHSKnownZero).isAllOnesValue()) + return BinaryOperator::CreateOr(LHS, RHS); + } + } + + // W*X + Y*Z --> W * (X+Z) iff W == Y + { + Value *W, *X, *Y, *Z; + if (match(LHS, m_Mul(m_Value(W), m_Value(X))) && + match(RHS, m_Mul(m_Value(Y), m_Value(Z)))) { + if (W != Y) { + if (W == Z) { + std::swap(Y, Z); + } else if (Y == X) { + std::swap(W, X); + } else if (X == Z) { + std::swap(Y, Z); + std::swap(W, X); + } + } + + if (W == Y) { + Value *NewAdd = Builder->CreateAdd(X, Z, LHS->getName()); + return BinaryOperator::CreateMul(W, NewAdd); + } + } + } + + if (ConstantInt *CRHS = dyn_cast<ConstantInt>(RHS)) { + Value *X = 0; + if (match(LHS, m_Not(m_Value(X)))) // ~X + C --> (C-1) - X + return BinaryOperator::CreateSub(SubOne(CRHS), X); + + // (X & FF00) + xx00 -> (X+xx00) & FF00 + if (LHS->hasOneUse() && + match(LHS, m_And(m_Value(X), m_ConstantInt(C2))) && + CRHS->getValue() == (CRHS->getValue() & C2->getValue())) { + // See if all bits from the first bit set in the Add RHS up are included + // in the mask. First, get the rightmost bit. + const APInt &AddRHSV = CRHS->getValue(); + + // Form a mask of all bits from the lowest bit added through the top. + APInt AddRHSHighBits(~((AddRHSV & -AddRHSV)-1)); + + // See if the and mask includes all of these bits. + APInt AddRHSHighBitsAnd(AddRHSHighBits & C2->getValue()); + + if (AddRHSHighBits == AddRHSHighBitsAnd) { + // Okay, the xform is safe. Insert the new add pronto. + Value *NewAdd = Builder->CreateAdd(X, CRHS, LHS->getName()); + return BinaryOperator::CreateAnd(NewAdd, C2); + } + } + + // Try to fold constant add into select arguments. + if (SelectInst *SI = dyn_cast<SelectInst>(LHS)) + if (Instruction *R = FoldOpIntoSelect(I, SI)) + return R; + } + + // add (select X 0 (sub n A)) A --> select X A n + { + SelectInst *SI = dyn_cast<SelectInst>(LHS); + Value *A = RHS; + if (!SI) { + SI = dyn_cast<SelectInst>(RHS); + A = LHS; + } + if (SI && SI->hasOneUse()) { + Value *TV = SI->getTrueValue(); + Value *FV = SI->getFalseValue(); + Value *N; + + // Can we fold the add into the argument of the select? + // We check both true and false select arguments for a matching subtract. + if (match(FV, m_Zero()) && match(TV, m_Sub(m_Value(N), m_Specific(A)))) + // Fold the add into the true select value. + return SelectInst::Create(SI->getCondition(), N, A); + + if (match(TV, m_Zero()) && match(FV, m_Sub(m_Value(N), m_Specific(A)))) + // Fold the add into the false select value. + return SelectInst::Create(SI->getCondition(), A, N); + } + } + + // Check for (add (sext x), y), see if we can merge this into an + // integer add followed by a sext. + if (SExtInst *LHSConv = dyn_cast<SExtInst>(LHS)) { + // (add (sext x), cst) --> (sext (add x, cst')) + if (ConstantInt *RHSC = dyn_cast<ConstantInt>(RHS)) { + Constant *CI = + ConstantExpr::getTrunc(RHSC, LHSConv->getOperand(0)->getType()); + if (LHSConv->hasOneUse() && + ConstantExpr::getSExt(CI, I.getType()) == RHSC && + WillNotOverflowSignedAdd(LHSConv->getOperand(0), CI)) { + // Insert the new, smaller add. + Value *NewAdd = Builder->CreateNSWAdd(LHSConv->getOperand(0), + CI, "addconv"); + return new SExtInst(NewAdd, I.getType()); + } + } + + // (add (sext x), (sext y)) --> (sext (add int x, y)) + if (SExtInst *RHSConv = dyn_cast<SExtInst>(RHS)) { + // Only do this if x/y have the same type, if at last one of them has a + // single use (so we don't increase the number of sexts), and if the + // integer add will not overflow. + if (LHSConv->getOperand(0)->getType()==RHSConv->getOperand(0)->getType()&& + (LHSConv->hasOneUse() || RHSConv->hasOneUse()) && + WillNotOverflowSignedAdd(LHSConv->getOperand(0), + RHSConv->getOperand(0))) { + // Insert the new integer add. + Value *NewAdd = Builder->CreateNSWAdd(LHSConv->getOperand(0), + RHSConv->getOperand(0), "addconv"); + return new SExtInst(NewAdd, I.getType()); + } + } + } + + // Check for (x & y) + (x ^ y) + { + Value *A = 0, *B = 0; + if (match(RHS, m_Xor(m_Value(A), m_Value(B))) && + (match(LHS, m_And(m_Specific(A), m_Specific(B))) || + match(LHS, m_And(m_Specific(B), m_Specific(A))))) + return BinaryOperator::CreateOr(A, B); + + if (match(LHS, m_Xor(m_Value(A), m_Value(B))) && + (match(RHS, m_And(m_Specific(A), m_Specific(B))) || + match(RHS, m_And(m_Specific(B), m_Specific(A))))) + return BinaryOperator::CreateOr(A, B); + } + + return Changed ? &I : 0; +} + +Instruction *InstCombiner::visitFAdd(BinaryOperator &I) { + bool Changed = SimplifyAssociativeOrCommutative(I); + Value *LHS = I.getOperand(0), *RHS = I.getOperand(1); + + if (Value *V = SimplifyFAddInst(LHS, RHS, I.getFastMathFlags(), TD)) + return ReplaceInstUsesWith(I, V); + + if (isa<Constant>(RHS)) { + if (isa<PHINode>(LHS)) + if (Instruction *NV = FoldOpIntoPhi(I)) + return NV; + + if (SelectInst *SI = dyn_cast<SelectInst>(LHS)) + if (Instruction *NV = FoldOpIntoSelect(I, SI)) + return NV; + } + + // -A + B --> B - A + // -A + -B --> -(A + B) + if (Value *LHSV = dyn_castFNegVal(LHS)) + return BinaryOperator::CreateFSub(RHS, LHSV); + + // A + -B --> A - B + if (!isa<Constant>(RHS)) + if (Value *V = dyn_castFNegVal(RHS)) + return BinaryOperator::CreateFSub(LHS, V); + + // Check for (fadd double (sitofp x), y), see if we can merge this into an + // integer add followed by a promotion. + if (SIToFPInst *LHSConv = dyn_cast<SIToFPInst>(LHS)) { + // (fadd double (sitofp x), fpcst) --> (sitofp (add int x, intcst)) + // ... if the constant fits in the integer value. This is useful for things + // like (double)(x & 1234) + 4.0 -> (double)((X & 1234)+4) which no longer + // requires a constant pool load, and generally allows the add to be better + // instcombined. + if (ConstantFP *CFP = dyn_cast<ConstantFP>(RHS)) { + Constant *CI = + ConstantExpr::getFPToSI(CFP, LHSConv->getOperand(0)->getType()); + if (LHSConv->hasOneUse() && + ConstantExpr::getSIToFP(CI, I.getType()) == CFP && + WillNotOverflowSignedAdd(LHSConv->getOperand(0), CI)) { + // Insert the new integer add. + Value *NewAdd = Builder->CreateNSWAdd(LHSConv->getOperand(0), + CI, "addconv"); + return new SIToFPInst(NewAdd, I.getType()); + } + } + + // (fadd double (sitofp x), (sitofp y)) --> (sitofp (add int x, y)) + if (SIToFPInst *RHSConv = dyn_cast<SIToFPInst>(RHS)) { + // Only do this if x/y have the same type, if at last one of them has a + // single use (so we don't increase the number of int->fp conversions), + // and if the integer add will not overflow. + if (LHSConv->getOperand(0)->getType()==RHSConv->getOperand(0)->getType()&& + (LHSConv->hasOneUse() || RHSConv->hasOneUse()) && + WillNotOverflowSignedAdd(LHSConv->getOperand(0), + RHSConv->getOperand(0))) { + // Insert the new integer add. + Value *NewAdd = Builder->CreateNSWAdd(LHSConv->getOperand(0), + RHSConv->getOperand(0),"addconv"); + return new SIToFPInst(NewAdd, I.getType()); + } + } + } + + // select C, 0, B + select C, A, 0 -> select C, A, B + { + Value *A1, *B1, *C1, *A2, *B2, *C2; + if (match(LHS, m_Select(m_Value(C1), m_Value(A1), m_Value(B1))) && + match(RHS, m_Select(m_Value(C2), m_Value(A2), m_Value(B2)))) { + if (C1 == C2) { + Constant *Z1=0, *Z2=0; + Value *A, *B, *C=C1; + if (match(A1, m_AnyZero()) && match(B2, m_AnyZero())) { + Z1 = dyn_cast<Constant>(A1); A = A2; + Z2 = dyn_cast<Constant>(B2); B = B1; + } else if (match(B1, m_AnyZero()) && match(A2, m_AnyZero())) { + Z1 = dyn_cast<Constant>(B1); B = B2; + Z2 = dyn_cast<Constant>(A2); A = A1; + } + + if (Z1 && Z2 && + (I.hasNoSignedZeros() || + (Z1->isNegativeZeroValue() && Z2->isNegativeZeroValue()))) { + return SelectInst::Create(C, A, B); + } + } + } + } + + if (I.hasUnsafeAlgebra()) { + if (Value *V = FAddCombine(Builder).simplify(&I)) + return ReplaceInstUsesWith(I, V); + } + + return Changed ? &I : 0; +} + + +/// Optimize pointer differences into the same array into a size. Consider: +/// &A[10] - &A[0]: we should compile this to "10". LHS/RHS are the pointer +/// operands to the ptrtoint instructions for the LHS/RHS of the subtract. +/// +Value *InstCombiner::OptimizePointerDifference(Value *LHS, Value *RHS, + Type *Ty) { + assert(TD && "Must have target data info for this"); + + // If LHS is a gep based on RHS or RHS is a gep based on LHS, we can optimize + // this. + bool Swapped = false; + GEPOperator *GEP1 = 0, *GEP2 = 0; + + // For now we require one side to be the base pointer "A" or a constant + // GEP derived from it. + if (GEPOperator *LHSGEP = dyn_cast<GEPOperator>(LHS)) { + // (gep X, ...) - X + if (LHSGEP->getOperand(0) == RHS) { + GEP1 = LHSGEP; + Swapped = false; + } else if (GEPOperator *RHSGEP = dyn_cast<GEPOperator>(RHS)) { + // (gep X, ...) - (gep X, ...) + if (LHSGEP->getOperand(0)->stripPointerCasts() == + RHSGEP->getOperand(0)->stripPointerCasts()) { + GEP2 = RHSGEP; + GEP1 = LHSGEP; + Swapped = false; + } + } + } + + if (GEPOperator *RHSGEP = dyn_cast<GEPOperator>(RHS)) { + // X - (gep X, ...) + if (RHSGEP->getOperand(0) == LHS) { + GEP1 = RHSGEP; + Swapped = true; + } else if (GEPOperator *LHSGEP = dyn_cast<GEPOperator>(LHS)) { + // (gep X, ...) - (gep X, ...) + if (RHSGEP->getOperand(0)->stripPointerCasts() == + LHSGEP->getOperand(0)->stripPointerCasts()) { + GEP2 = LHSGEP; + GEP1 = RHSGEP; + Swapped = true; + } + } + } + + // Avoid duplicating the arithmetic if GEP2 has non-constant indices and + // multiple users. + if (GEP1 == 0 || + (GEP2 != 0 && !GEP2->hasAllConstantIndices() && !GEP2->hasOneUse())) + return 0; + + // Emit the offset of the GEP and an intptr_t. + Value *Result = EmitGEPOffset(GEP1); + + // If we had a constant expression GEP on the other side offsetting the + // pointer, subtract it from the offset we have. + if (GEP2) { + Value *Offset = EmitGEPOffset(GEP2); + Result = Builder->CreateSub(Result, Offset); + } + + // If we have p - gep(p, ...) then we have to negate the result. + if (Swapped) + Result = Builder->CreateNeg(Result, "diff.neg"); + + return Builder->CreateIntCast(Result, Ty, true); +} + + +Instruction *InstCombiner::visitSub(BinaryOperator &I) { + Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); + + if (Value *V = SimplifySubInst(Op0, Op1, I.hasNoSignedWrap(), + I.hasNoUnsignedWrap(), TD)) + return ReplaceInstUsesWith(I, V); + + // (A*B)-(A*C) -> A*(B-C) etc + if (Value *V = SimplifyUsingDistributiveLaws(I)) + return ReplaceInstUsesWith(I, V); + + // If this is a 'B = x-(-A)', change to B = x+A. This preserves NSW/NUW. + if (Value *V = dyn_castNegVal(Op1)) { + BinaryOperator *Res = BinaryOperator::CreateAdd(Op0, V); + Res->setHasNoSignedWrap(I.hasNoSignedWrap()); + Res->setHasNoUnsignedWrap(I.hasNoUnsignedWrap()); + return Res; + } + + if (I.getType()->isIntegerTy(1)) + return BinaryOperator::CreateXor(Op0, Op1); + + // Replace (-1 - A) with (~A). + if (match(Op0, m_AllOnes())) + return BinaryOperator::CreateNot(Op1); + + if (ConstantInt *C = dyn_cast<ConstantInt>(Op0)) { + // C - ~X == X + (1+C) + Value *X = 0; + if (match(Op1, m_Not(m_Value(X)))) + return BinaryOperator::CreateAdd(X, AddOne(C)); + + // -(X >>u 31) -> (X >>s 31) + // -(X >>s 31) -> (X >>u 31) + if (C->isZero()) { + Value *X; ConstantInt *CI; + if (match(Op1, m_LShr(m_Value(X), m_ConstantInt(CI))) && + // Verify we are shifting out everything but the sign bit. + CI->getValue() == I.getType()->getPrimitiveSizeInBits()-1) + return BinaryOperator::CreateAShr(X, CI); + + if (match(Op1, m_AShr(m_Value(X), m_ConstantInt(CI))) && + // Verify we are shifting out everything but the sign bit. + CI->getValue() == I.getType()->getPrimitiveSizeInBits()-1) + return BinaryOperator::CreateLShr(X, CI); + } + + // Try to fold constant sub into select arguments. + if (SelectInst *SI = dyn_cast<SelectInst>(Op1)) + if (Instruction *R = FoldOpIntoSelect(I, SI)) + return R; + + // C-(X+C2) --> (C-C2)-X + ConstantInt *C2; + if (match(Op1, m_Add(m_Value(X), m_ConstantInt(C2)))) + return BinaryOperator::CreateSub(ConstantExpr::getSub(C, C2), X); + + if (SimplifyDemandedInstructionBits(I)) + return &I; + + // Fold (sub 0, (zext bool to B)) --> (sext bool to B) + if (C->isZero() && match(Op1, m_ZExt(m_Value(X)))) + if (X->getType()->isIntegerTy(1)) + return CastInst::CreateSExtOrBitCast(X, Op1->getType()); + + // Fold (sub 0, (sext bool to B)) --> (zext bool to B) + if (C->isZero() && match(Op1, m_SExt(m_Value(X)))) + if (X->getType()->isIntegerTy(1)) + return CastInst::CreateZExtOrBitCast(X, Op1->getType()); + } + + + { Value *Y; + // X-(X+Y) == -Y X-(Y+X) == -Y + if (match(Op1, m_Add(m_Specific(Op0), m_Value(Y))) || + match(Op1, m_Add(m_Value(Y), m_Specific(Op0)))) + return BinaryOperator::CreateNeg(Y); + + // (X-Y)-X == -Y + if (match(Op0, m_Sub(m_Specific(Op1), m_Value(Y)))) + return BinaryOperator::CreateNeg(Y); + } + + if (Op1->hasOneUse()) { + Value *X = 0, *Y = 0, *Z = 0; + Constant *C = 0; + ConstantInt *CI = 0; + + // (X - (Y - Z)) --> (X + (Z - Y)). + if (match(Op1, m_Sub(m_Value(Y), m_Value(Z)))) + return BinaryOperator::CreateAdd(Op0, + Builder->CreateSub(Z, Y, Op1->getName())); + + // (X - (X & Y)) --> (X & ~Y) + // + if (match(Op1, m_And(m_Value(Y), m_Specific(Op0))) || + match(Op1, m_And(m_Specific(Op0), m_Value(Y)))) + return BinaryOperator::CreateAnd(Op0, + Builder->CreateNot(Y, Y->getName() + ".not")); + + // 0 - (X sdiv C) -> (X sdiv -C) + if (match(Op1, m_SDiv(m_Value(X), m_Constant(C))) && + match(Op0, m_Zero())) + return BinaryOperator::CreateSDiv(X, ConstantExpr::getNeg(C)); + + // 0 - (X << Y) -> (-X << Y) when X is freely negatable. + if (match(Op1, m_Shl(m_Value(X), m_Value(Y))) && match(Op0, m_Zero())) + if (Value *XNeg = dyn_castNegVal(X)) + return BinaryOperator::CreateShl(XNeg, Y); + + // X - X*C --> X * (1-C) + if (match(Op1, m_Mul(m_Specific(Op0), m_ConstantInt(CI)))) { + Constant *CP1 = ConstantExpr::getSub(ConstantInt::get(I.getType(),1), CI); + return BinaryOperator::CreateMul(Op0, CP1); + } + + // X - X<<C --> X * (1-(1<<C)) + if (match(Op1, m_Shl(m_Specific(Op0), m_ConstantInt(CI)))) { + Constant *One = ConstantInt::get(I.getType(), 1); + C = ConstantExpr::getSub(One, ConstantExpr::getShl(One, CI)); + return BinaryOperator::CreateMul(Op0, C); + } + + // X - A*-B -> X + A*B + // X - -A*B -> X + A*B + Value *A, *B; + if (match(Op1, m_Mul(m_Value(A), m_Neg(m_Value(B)))) || + match(Op1, m_Mul(m_Neg(m_Value(A)), m_Value(B)))) + return BinaryOperator::CreateAdd(Op0, Builder->CreateMul(A, B)); + + // X - A*CI -> X + A*-CI + // X - CI*A -> X + A*-CI + if (match(Op1, m_Mul(m_Value(A), m_ConstantInt(CI))) || + match(Op1, m_Mul(m_ConstantInt(CI), m_Value(A)))) { + Value *NewMul = Builder->CreateMul(A, ConstantExpr::getNeg(CI)); + return BinaryOperator::CreateAdd(Op0, NewMul); + } + } + + ConstantInt *C1; + if (Value *X = dyn_castFoldableMul(Op0, C1)) { + if (X == Op1) // X*C - X --> X * (C-1) + return BinaryOperator::CreateMul(Op1, SubOne(C1)); + + ConstantInt *C2; // X*C1 - X*C2 -> X * (C1-C2) + if (X == dyn_castFoldableMul(Op1, C2)) + return BinaryOperator::CreateMul(X, ConstantExpr::getSub(C1, C2)); + } + + // Optimize pointer differences into the same array into a size. Consider: + // &A[10] - &A[0]: we should compile this to "10". + if (TD) { + Value *LHSOp, *RHSOp; + if (match(Op0, m_PtrToInt(m_Value(LHSOp))) && + match(Op1, m_PtrToInt(m_Value(RHSOp)))) + if (Value *Res = OptimizePointerDifference(LHSOp, RHSOp, I.getType())) + return ReplaceInstUsesWith(I, Res); + + // trunc(p)-trunc(q) -> trunc(p-q) + if (match(Op0, m_Trunc(m_PtrToInt(m_Value(LHSOp)))) && + match(Op1, m_Trunc(m_PtrToInt(m_Value(RHSOp))))) + if (Value *Res = OptimizePointerDifference(LHSOp, RHSOp, I.getType())) + return ReplaceInstUsesWith(I, Res); + } + + return 0; +} + +Instruction *InstCombiner::visitFSub(BinaryOperator &I) { + Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); + + if (Value *V = SimplifyFSubInst(Op0, Op1, I.getFastMathFlags(), TD)) + return ReplaceInstUsesWith(I, V); + + if (isa<Constant>(Op0)) + if (SelectInst *SI = dyn_cast<SelectInst>(Op1)) + if (Instruction *NV = FoldOpIntoSelect(I, SI)) + return NV; + + // If this is a 'B = x-(-A)', change to B = x+A, potentially looking + // through FP extensions/truncations along the way. + if (Value *V = dyn_castFNegVal(Op1)) { + Instruction *NewI = BinaryOperator::CreateFAdd(Op0, V); + NewI->copyFastMathFlags(&I); + return NewI; + } + if (FPTruncInst *FPTI = dyn_cast<FPTruncInst>(Op1)) { + if (Value *V = dyn_castFNegVal(FPTI->getOperand(0))) { + Value *NewTrunc = Builder->CreateFPTrunc(V, I.getType()); + Instruction *NewI = BinaryOperator::CreateFAdd(Op0, NewTrunc); + NewI->copyFastMathFlags(&I); + return NewI; + } + } else if (FPExtInst *FPEI = dyn_cast<FPExtInst>(Op1)) { + if (Value *V = dyn_castFNegVal(FPEI->getOperand(0))) { + Value *NewExt = Builder->CreateFPExt(V, I.getType()); + Instruction *NewI = BinaryOperator::CreateFAdd(Op0, NewExt); + NewI->copyFastMathFlags(&I); + return NewI; + } + } + + if (I.hasUnsafeAlgebra()) { + if (Value *V = FAddCombine(Builder).simplify(&I)) + return ReplaceInstUsesWith(I, V); + } + + return 0; +} diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp new file mode 100644 index 000000000000..88bb69beef03 --- /dev/null +++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp @@ -0,0 +1,2510 @@ +//===- InstCombineAndOrXor.cpp --------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the visitAnd, visitOr, and visitXor functions. +// +//===----------------------------------------------------------------------===// + +#include "InstCombine.h" +#include "llvm/Analysis/InstructionSimplify.h" +#include "llvm/IR/Intrinsics.h" +#include "llvm/Support/ConstantRange.h" +#include "llvm/Support/PatternMatch.h" +#include "llvm/Transforms/Utils/CmpInstAnalysis.h" +using namespace llvm; +using namespace PatternMatch; + + +/// AddOne - Add one to a ConstantInt. +static Constant *AddOne(ConstantInt *C) { + return ConstantInt::get(C->getContext(), C->getValue() + 1); +} +/// SubOne - Subtract one from a ConstantInt. +static Constant *SubOne(ConstantInt *C) { + return ConstantInt::get(C->getContext(), C->getValue()-1); +} + +/// isFreeToInvert - Return true if the specified value is free to invert (apply +/// ~ to). This happens in cases where the ~ can be eliminated. +static inline bool isFreeToInvert(Value *V) { + // ~(~(X)) -> X. + if (BinaryOperator::isNot(V)) + return true; + + // Constants can be considered to be not'ed values. + if (isa<ConstantInt>(V)) + return true; + + // Compares can be inverted if they have a single use. + if (CmpInst *CI = dyn_cast<CmpInst>(V)) + return CI->hasOneUse(); + + return false; +} + +static inline Value *dyn_castNotVal(Value *V) { + // If this is not(not(x)) don't return that this is a not: we want the two + // not's to be folded first. + if (BinaryOperator::isNot(V)) { + Value *Operand = BinaryOperator::getNotArgument(V); + if (!isFreeToInvert(Operand)) + return Operand; + } + + // Constants can be considered to be not'ed values... + if (ConstantInt *C = dyn_cast<ConstantInt>(V)) + return ConstantInt::get(C->getType(), ~C->getValue()); + return 0; +} + +/// getFCmpCode - Similar to getICmpCode but for FCmpInst. This encodes a fcmp +/// predicate into a three bit mask. It also returns whether it is an ordered +/// predicate by reference. +static unsigned getFCmpCode(FCmpInst::Predicate CC, bool &isOrdered) { + isOrdered = false; + switch (CC) { + case FCmpInst::FCMP_ORD: isOrdered = true; return 0; // 000 + case FCmpInst::FCMP_UNO: return 0; // 000 + case FCmpInst::FCMP_OGT: isOrdered = true; return 1; // 001 + case FCmpInst::FCMP_UGT: return 1; // 001 + case FCmpInst::FCMP_OEQ: isOrdered = true; return 2; // 010 + case FCmpInst::FCMP_UEQ: return 2; // 010 + case FCmpInst::FCMP_OGE: isOrdered = true; return 3; // 011 + case FCmpInst::FCMP_UGE: return 3; // 011 + case FCmpInst::FCMP_OLT: isOrdered = true; return 4; // 100 + case FCmpInst::FCMP_ULT: return 4; // 100 + case FCmpInst::FCMP_ONE: isOrdered = true; return 5; // 101 + case FCmpInst::FCMP_UNE: return 5; // 101 + case FCmpInst::FCMP_OLE: isOrdered = true; return 6; // 110 + case FCmpInst::FCMP_ULE: return 6; // 110 + // True -> 7 + default: + // Not expecting FCMP_FALSE and FCMP_TRUE; + llvm_unreachable("Unexpected FCmp predicate!"); + } +} + +/// getNewICmpValue - This is the complement of getICmpCode, which turns an +/// opcode and two operands into either a constant true or false, or a brand +/// new ICmp instruction. The sign is passed in to determine which kind +/// of predicate to use in the new icmp instruction. +static Value *getNewICmpValue(bool Sign, unsigned Code, Value *LHS, Value *RHS, + InstCombiner::BuilderTy *Builder) { + ICmpInst::Predicate NewPred; + if (Value *NewConstant = getICmpValue(Sign, Code, LHS, RHS, NewPred)) + return NewConstant; + return Builder->CreateICmp(NewPred, LHS, RHS); +} + +/// getFCmpValue - This is the complement of getFCmpCode, which turns an +/// opcode and two operands into either a FCmp instruction. isordered is passed +/// in to determine which kind of predicate to use in the new fcmp instruction. +static Value *getFCmpValue(bool isordered, unsigned code, + Value *LHS, Value *RHS, + InstCombiner::BuilderTy *Builder) { + CmpInst::Predicate Pred; + switch (code) { + default: llvm_unreachable("Illegal FCmp code!"); + case 0: Pred = isordered ? FCmpInst::FCMP_ORD : FCmpInst::FCMP_UNO; break; + case 1: Pred = isordered ? FCmpInst::FCMP_OGT : FCmpInst::FCMP_UGT; break; + case 2: Pred = isordered ? FCmpInst::FCMP_OEQ : FCmpInst::FCMP_UEQ; break; + case 3: Pred = isordered ? FCmpInst::FCMP_OGE : FCmpInst::FCMP_UGE; break; + case 4: Pred = isordered ? FCmpInst::FCMP_OLT : FCmpInst::FCMP_ULT; break; + case 5: Pred = isordered ? FCmpInst::FCMP_ONE : FCmpInst::FCMP_UNE; break; + case 6: Pred = isordered ? FCmpInst::FCMP_OLE : FCmpInst::FCMP_ULE; break; + case 7: + if (!isordered) return ConstantInt::getTrue(LHS->getContext()); + Pred = FCmpInst::FCMP_ORD; break; + } + return Builder->CreateFCmp(Pred, LHS, RHS); +} + +// OptAndOp - This handles expressions of the form ((val OP C1) & C2). Where +// the Op parameter is 'OP', OpRHS is 'C1', and AndRHS is 'C2'. Op is +// guaranteed to be a binary operator. +Instruction *InstCombiner::OptAndOp(Instruction *Op, + ConstantInt *OpRHS, + ConstantInt *AndRHS, + BinaryOperator &TheAnd) { + Value *X = Op->getOperand(0); + Constant *Together = 0; + if (!Op->isShift()) + Together = ConstantExpr::getAnd(AndRHS, OpRHS); + + switch (Op->getOpcode()) { + case Instruction::Xor: + if (Op->hasOneUse()) { + // (X ^ C1) & C2 --> (X & C2) ^ (C1&C2) + Value *And = Builder->CreateAnd(X, AndRHS); + And->takeName(Op); + return BinaryOperator::CreateXor(And, Together); + } + break; + case Instruction::Or: + if (Op->hasOneUse()){ + if (Together != OpRHS) { + // (X | C1) & C2 --> (X | (C1&C2)) & C2 + Value *Or = Builder->CreateOr(X, Together); + Or->takeName(Op); + return BinaryOperator::CreateAnd(Or, AndRHS); + } + + ConstantInt *TogetherCI = dyn_cast<ConstantInt>(Together); + if (TogetherCI && !TogetherCI->isZero()){ + // (X | C1) & C2 --> (X & (C2^(C1&C2))) | C1 + // NOTE: This reduces the number of bits set in the & mask, which + // can expose opportunities for store narrowing. + Together = ConstantExpr::getXor(AndRHS, Together); + Value *And = Builder->CreateAnd(X, Together); + And->takeName(Op); + return BinaryOperator::CreateOr(And, OpRHS); + } + } + + break; + case Instruction::Add: + if (Op->hasOneUse()) { + // Adding a one to a single bit bit-field should be turned into an XOR + // of the bit. First thing to check is to see if this AND is with a + // single bit constant. + const APInt &AndRHSV = AndRHS->getValue(); + + // If there is only one bit set. + if (AndRHSV.isPowerOf2()) { + // Ok, at this point, we know that we are masking the result of the + // ADD down to exactly one bit. If the constant we are adding has + // no bits set below this bit, then we can eliminate the ADD. + const APInt& AddRHS = OpRHS->getValue(); + + // Check to see if any bits below the one bit set in AndRHSV are set. + if ((AddRHS & (AndRHSV-1)) == 0) { + // If not, the only thing that can effect the output of the AND is + // the bit specified by AndRHSV. If that bit is set, the effect of + // the XOR is to toggle the bit. If it is clear, then the ADD has + // no effect. + if ((AddRHS & AndRHSV) == 0) { // Bit is not set, noop + TheAnd.setOperand(0, X); + return &TheAnd; + } else { + // Pull the XOR out of the AND. + Value *NewAnd = Builder->CreateAnd(X, AndRHS); + NewAnd->takeName(Op); + return BinaryOperator::CreateXor(NewAnd, AndRHS); + } + } + } + } + break; + + case Instruction::Shl: { + // We know that the AND will not produce any of the bits shifted in, so if + // the anded constant includes them, clear them now! + // + uint32_t BitWidth = AndRHS->getType()->getBitWidth(); + uint32_t OpRHSVal = OpRHS->getLimitedValue(BitWidth); + APInt ShlMask(APInt::getHighBitsSet(BitWidth, BitWidth-OpRHSVal)); + ConstantInt *CI = Builder->getInt(AndRHS->getValue() & ShlMask); + + if (CI->getValue() == ShlMask) + // Masking out bits that the shift already masks. + return ReplaceInstUsesWith(TheAnd, Op); // No need for the and. + + if (CI != AndRHS) { // Reducing bits set in and. + TheAnd.setOperand(1, CI); + return &TheAnd; + } + break; + } + case Instruction::LShr: { + // We know that the AND will not produce any of the bits shifted in, so if + // the anded constant includes them, clear them now! This only applies to + // unsigned shifts, because a signed shr may bring in set bits! + // + uint32_t BitWidth = AndRHS->getType()->getBitWidth(); + uint32_t OpRHSVal = OpRHS->getLimitedValue(BitWidth); + APInt ShrMask(APInt::getLowBitsSet(BitWidth, BitWidth - OpRHSVal)); + ConstantInt *CI = Builder->getInt(AndRHS->getValue() & ShrMask); + + if (CI->getValue() == ShrMask) + // Masking out bits that the shift already masks. + return ReplaceInstUsesWith(TheAnd, Op); + + if (CI != AndRHS) { + TheAnd.setOperand(1, CI); // Reduce bits set in and cst. + return &TheAnd; + } + break; + } + case Instruction::AShr: + // Signed shr. + // See if this is shifting in some sign extension, then masking it out + // with an and. + if (Op->hasOneUse()) { + uint32_t BitWidth = AndRHS->getType()->getBitWidth(); + uint32_t OpRHSVal = OpRHS->getLimitedValue(BitWidth); + APInt ShrMask(APInt::getLowBitsSet(BitWidth, BitWidth - OpRHSVal)); + Constant *C = Builder->getInt(AndRHS->getValue() & ShrMask); + if (C == AndRHS) { // Masking out bits shifted in. + // (Val ashr C1) & C2 -> (Val lshr C1) & C2 + // Make the argument unsigned. + Value *ShVal = Op->getOperand(0); + ShVal = Builder->CreateLShr(ShVal, OpRHS, Op->getName()); + return BinaryOperator::CreateAnd(ShVal, AndRHS, TheAnd.getName()); + } + } + break; + } + return 0; +} + +/// Emit a computation of: (V >= Lo && V < Hi) if Inside is true, otherwise +/// (V < Lo || V >= Hi). In practice, we emit the more efficient +/// (V-Lo) \<u Hi-Lo. This method expects that Lo <= Hi. isSigned indicates +/// whether to treat the V, Lo and HI as signed or not. IB is the location to +/// insert new instructions. +Value *InstCombiner::InsertRangeTest(Value *V, Constant *Lo, Constant *Hi, + bool isSigned, bool Inside) { + assert(cast<ConstantInt>(ConstantExpr::getICmp((isSigned ? + ICmpInst::ICMP_SLE:ICmpInst::ICMP_ULE), Lo, Hi))->getZExtValue() && + "Lo is not <= Hi in range emission code!"); + + if (Inside) { + if (Lo == Hi) // Trivially false. + return Builder->getFalse(); + + // V >= Min && V < Hi --> V < Hi + if (cast<ConstantInt>(Lo)->isMinValue(isSigned)) { + ICmpInst::Predicate pred = (isSigned ? + ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT); + return Builder->CreateICmp(pred, V, Hi); + } + + // Emit V-Lo <u Hi-Lo + Constant *NegLo = ConstantExpr::getNeg(Lo); + Value *Add = Builder->CreateAdd(V, NegLo, V->getName()+".off"); + Constant *UpperBound = ConstantExpr::getAdd(NegLo, Hi); + return Builder->CreateICmpULT(Add, UpperBound); + } + + if (Lo == Hi) // Trivially true. + return Builder->getTrue(); + + // V < Min || V >= Hi -> V > Hi-1 + Hi = SubOne(cast<ConstantInt>(Hi)); + if (cast<ConstantInt>(Lo)->isMinValue(isSigned)) { + ICmpInst::Predicate pred = (isSigned ? + ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT); + return Builder->CreateICmp(pred, V, Hi); + } + + // Emit V-Lo >u Hi-1-Lo + // Note that Hi has already had one subtracted from it, above. + ConstantInt *NegLo = cast<ConstantInt>(ConstantExpr::getNeg(Lo)); + Value *Add = Builder->CreateAdd(V, NegLo, V->getName()+".off"); + Constant *LowerBound = ConstantExpr::getAdd(NegLo, Hi); + return Builder->CreateICmpUGT(Add, LowerBound); +} + +// isRunOfOnes - Returns true iff Val consists of one contiguous run of 1s with +// any number of 0s on either side. The 1s are allowed to wrap from LSB to +// MSB, so 0x000FFF0, 0x0000FFFF, and 0xFF0000FF are all runs. 0x0F0F0000 is +// not, since all 1s are not contiguous. +static bool isRunOfOnes(ConstantInt *Val, uint32_t &MB, uint32_t &ME) { + const APInt& V = Val->getValue(); + uint32_t BitWidth = Val->getType()->getBitWidth(); + if (!APIntOps::isShiftedMask(BitWidth, V)) return false; + + // look for the first zero bit after the run of ones + MB = BitWidth - ((V - 1) ^ V).countLeadingZeros(); + // look for the first non-zero bit + ME = V.getActiveBits(); + return true; +} + +/// FoldLogicalPlusAnd - This is part of an expression (LHS +/- RHS) & Mask, +/// where isSub determines whether the operator is a sub. If we can fold one of +/// the following xforms: +/// +/// ((A & N) +/- B) & Mask -> (A +/- B) & Mask iff N&Mask == Mask +/// ((A | N) +/- B) & Mask -> (A +/- B) & Mask iff N&Mask == 0 +/// ((A ^ N) +/- B) & Mask -> (A +/- B) & Mask iff N&Mask == 0 +/// +/// return (A +/- B). +/// +Value *InstCombiner::FoldLogicalPlusAnd(Value *LHS, Value *RHS, + ConstantInt *Mask, bool isSub, + Instruction &I) { + Instruction *LHSI = dyn_cast<Instruction>(LHS); + if (!LHSI || LHSI->getNumOperands() != 2 || + !isa<ConstantInt>(LHSI->getOperand(1))) return 0; + + ConstantInt *N = cast<ConstantInt>(LHSI->getOperand(1)); + + switch (LHSI->getOpcode()) { + default: return 0; + case Instruction::And: + if (ConstantExpr::getAnd(N, Mask) == Mask) { + // If the AndRHS is a power of two minus one (0+1+), this is simple. + if ((Mask->getValue().countLeadingZeros() + + Mask->getValue().countPopulation()) == + Mask->getValue().getBitWidth()) + break; + + // Otherwise, if Mask is 0+1+0+, and if B is known to have the low 0+ + // part, we don't need any explicit masks to take them out of A. If that + // is all N is, ignore it. + uint32_t MB = 0, ME = 0; + if (isRunOfOnes(Mask, MB, ME)) { // begin/end bit of run, inclusive + uint32_t BitWidth = cast<IntegerType>(RHS->getType())->getBitWidth(); + APInt Mask(APInt::getLowBitsSet(BitWidth, MB-1)); + if (MaskedValueIsZero(RHS, Mask)) + break; + } + } + return 0; + case Instruction::Or: + case Instruction::Xor: + // If the AndRHS is a power of two minus one (0+1+), and N&Mask == 0 + if ((Mask->getValue().countLeadingZeros() + + Mask->getValue().countPopulation()) == Mask->getValue().getBitWidth() + && ConstantExpr::getAnd(N, Mask)->isNullValue()) + break; + return 0; + } + + if (isSub) + return Builder->CreateSub(LHSI->getOperand(0), RHS, "fold"); + return Builder->CreateAdd(LHSI->getOperand(0), RHS, "fold"); +} + +/// enum for classifying (icmp eq (A & B), C) and (icmp ne (A & B), C) +/// One of A and B is considered the mask, the other the value. This is +/// described as the "AMask" or "BMask" part of the enum. If the enum +/// contains only "Mask", then both A and B can be considered masks. +/// If A is the mask, then it was proven, that (A & C) == C. This +/// is trivial if C == A, or C == 0. If both A and C are constants, this +/// proof is also easy. +/// For the following explanations we assume that A is the mask. +/// The part "AllOnes" declares, that the comparison is true only +/// if (A & B) == A, or all bits of A are set in B. +/// Example: (icmp eq (A & 3), 3) -> FoldMskICmp_AMask_AllOnes +/// The part "AllZeroes" declares, that the comparison is true only +/// if (A & B) == 0, or all bits of A are cleared in B. +/// Example: (icmp eq (A & 3), 0) -> FoldMskICmp_Mask_AllZeroes +/// The part "Mixed" declares, that (A & B) == C and C might or might not +/// contain any number of one bits and zero bits. +/// Example: (icmp eq (A & 3), 1) -> FoldMskICmp_AMask_Mixed +/// The Part "Not" means, that in above descriptions "==" should be replaced +/// by "!=". +/// Example: (icmp ne (A & 3), 3) -> FoldMskICmp_AMask_NotAllOnes +/// If the mask A contains a single bit, then the following is equivalent: +/// (icmp eq (A & B), A) equals (icmp ne (A & B), 0) +/// (icmp ne (A & B), A) equals (icmp eq (A & B), 0) +enum MaskedICmpType { + FoldMskICmp_AMask_AllOnes = 1, + FoldMskICmp_AMask_NotAllOnes = 2, + FoldMskICmp_BMask_AllOnes = 4, + FoldMskICmp_BMask_NotAllOnes = 8, + FoldMskICmp_Mask_AllZeroes = 16, + FoldMskICmp_Mask_NotAllZeroes = 32, + FoldMskICmp_AMask_Mixed = 64, + FoldMskICmp_AMask_NotMixed = 128, + FoldMskICmp_BMask_Mixed = 256, + FoldMskICmp_BMask_NotMixed = 512 +}; + +/// return the set of pattern classes (from MaskedICmpType) +/// that (icmp SCC (A & B), C) satisfies +static unsigned getTypeOfMaskedICmp(Value* A, Value* B, Value* C, + ICmpInst::Predicate SCC) +{ + ConstantInt *ACst = dyn_cast<ConstantInt>(A); + ConstantInt *BCst = dyn_cast<ConstantInt>(B); + ConstantInt *CCst = dyn_cast<ConstantInt>(C); + bool icmp_eq = (SCC == ICmpInst::ICMP_EQ); + bool icmp_abit = (ACst != 0 && !ACst->isZero() && + ACst->getValue().isPowerOf2()); + bool icmp_bbit = (BCst != 0 && !BCst->isZero() && + BCst->getValue().isPowerOf2()); + unsigned result = 0; + if (CCst != 0 && CCst->isZero()) { + // if C is zero, then both A and B qualify as mask + result |= (icmp_eq ? (FoldMskICmp_Mask_AllZeroes | + FoldMskICmp_Mask_AllZeroes | + FoldMskICmp_AMask_Mixed | + FoldMskICmp_BMask_Mixed) + : (FoldMskICmp_Mask_NotAllZeroes | + FoldMskICmp_Mask_NotAllZeroes | + FoldMskICmp_AMask_NotMixed | + FoldMskICmp_BMask_NotMixed)); + if (icmp_abit) + result |= (icmp_eq ? (FoldMskICmp_AMask_NotAllOnes | + FoldMskICmp_AMask_NotMixed) + : (FoldMskICmp_AMask_AllOnes | + FoldMskICmp_AMask_Mixed)); + if (icmp_bbit) + result |= (icmp_eq ? (FoldMskICmp_BMask_NotAllOnes | + FoldMskICmp_BMask_NotMixed) + : (FoldMskICmp_BMask_AllOnes | + FoldMskICmp_BMask_Mixed)); + return result; + } + if (A == C) { + result |= (icmp_eq ? (FoldMskICmp_AMask_AllOnes | + FoldMskICmp_AMask_Mixed) + : (FoldMskICmp_AMask_NotAllOnes | + FoldMskICmp_AMask_NotMixed)); + if (icmp_abit) + result |= (icmp_eq ? (FoldMskICmp_Mask_NotAllZeroes | + FoldMskICmp_AMask_NotMixed) + : (FoldMskICmp_Mask_AllZeroes | + FoldMskICmp_AMask_Mixed)); + } else if (ACst != 0 && CCst != 0 && + ConstantExpr::getAnd(ACst, CCst) == CCst) { + result |= (icmp_eq ? FoldMskICmp_AMask_Mixed + : FoldMskICmp_AMask_NotMixed); + } + if (B == C) { + result |= (icmp_eq ? (FoldMskICmp_BMask_AllOnes | + FoldMskICmp_BMask_Mixed) + : (FoldMskICmp_BMask_NotAllOnes | + FoldMskICmp_BMask_NotMixed)); + if (icmp_bbit) + result |= (icmp_eq ? (FoldMskICmp_Mask_NotAllZeroes | + FoldMskICmp_BMask_NotMixed) + : (FoldMskICmp_Mask_AllZeroes | + FoldMskICmp_BMask_Mixed)); + } else if (BCst != 0 && CCst != 0 && + ConstantExpr::getAnd(BCst, CCst) == CCst) { + result |= (icmp_eq ? FoldMskICmp_BMask_Mixed + : FoldMskICmp_BMask_NotMixed); + } + return result; +} + +/// Convert an analysis of a masked ICmp into its equivalent if all boolean +/// operations had the opposite sense. Since each "NotXXX" flag (recording !=) +/// is adjacent to the corresponding normal flag (recording ==), this just +/// involves swapping those bits over. +static unsigned conjugateICmpMask(unsigned Mask) { + unsigned NewMask; + NewMask = (Mask & (FoldMskICmp_AMask_AllOnes | FoldMskICmp_BMask_AllOnes | + FoldMskICmp_Mask_AllZeroes | FoldMskICmp_AMask_Mixed | + FoldMskICmp_BMask_Mixed)) + << 1; + + NewMask |= + (Mask & (FoldMskICmp_AMask_NotAllOnes | FoldMskICmp_BMask_NotAllOnes | + FoldMskICmp_Mask_NotAllZeroes | FoldMskICmp_AMask_NotMixed | + FoldMskICmp_BMask_NotMixed)) + >> 1; + + return NewMask; +} + +/// decomposeBitTestICmp - Decompose an icmp into the form ((X & Y) pred Z) +/// if possible. The returned predicate is either == or !=. Returns false if +/// decomposition fails. +static bool decomposeBitTestICmp(const ICmpInst *I, ICmpInst::Predicate &Pred, + Value *&X, Value *&Y, Value *&Z) { + // X < 0 is equivalent to (X & SignBit) != 0. + if (I->getPredicate() == ICmpInst::ICMP_SLT) + if (ConstantInt *C = dyn_cast<ConstantInt>(I->getOperand(1))) + if (C->isZero()) { + X = I->getOperand(0); + Y = ConstantInt::get(I->getContext(), + APInt::getSignBit(C->getBitWidth())); + Pred = ICmpInst::ICMP_NE; + Z = C; + return true; + } + + // X > -1 is equivalent to (X & SignBit) == 0. + if (I->getPredicate() == ICmpInst::ICMP_SGT) + if (ConstantInt *C = dyn_cast<ConstantInt>(I->getOperand(1))) + if (C->isAllOnesValue()) { + X = I->getOperand(0); + Y = ConstantInt::get(I->getContext(), + APInt::getSignBit(C->getBitWidth())); + Pred = ICmpInst::ICMP_EQ; + Z = ConstantInt::getNullValue(C->getType()); + return true; + } + + return false; +} + +/// foldLogOpOfMaskedICmpsHelper: +/// handle (icmp(A & B) ==/!= C) &/| (icmp(A & D) ==/!= E) +/// return the set of pattern classes (from MaskedICmpType) +/// that both LHS and RHS satisfy +static unsigned foldLogOpOfMaskedICmpsHelper(Value*& A, + Value*& B, Value*& C, + Value*& D, Value*& E, + ICmpInst *LHS, ICmpInst *RHS, + ICmpInst::Predicate &LHSCC, + ICmpInst::Predicate &RHSCC) { + if (LHS->getOperand(0)->getType() != RHS->getOperand(0)->getType()) return 0; + // vectors are not (yet?) supported + if (LHS->getOperand(0)->getType()->isVectorTy()) return 0; + + // Here comes the tricky part: + // LHS might be of the form L11 & L12 == X, X == L21 & L22, + // and L11 & L12 == L21 & L22. The same goes for RHS. + // Now we must find those components L** and R**, that are equal, so + // that we can extract the parameters A, B, C, D, and E for the canonical + // above. + Value *L1 = LHS->getOperand(0); + Value *L2 = LHS->getOperand(1); + Value *L11,*L12,*L21,*L22; + // Check whether the icmp can be decomposed into a bit test. + if (decomposeBitTestICmp(LHS, LHSCC, L11, L12, L2)) { + L21 = L22 = L1 = 0; + } else { + // Look for ANDs in the LHS icmp. + if (!L1->getType()->isIntegerTy()) { + // You can icmp pointers, for example. They really aren't masks. + L11 = L12 = 0; + } else if (!match(L1, m_And(m_Value(L11), m_Value(L12)))) { + // Any icmp can be viewed as being trivially masked; if it allows us to + // remove one, it's worth it. + L11 = L1; + L12 = Constant::getAllOnesValue(L1->getType()); + } + + if (!L2->getType()->isIntegerTy()) { + // You can icmp pointers, for example. They really aren't masks. + L21 = L22 = 0; + } else if (!match(L2, m_And(m_Value(L21), m_Value(L22)))) { + L21 = L2; + L22 = Constant::getAllOnesValue(L2->getType()); + } + } + + // Bail if LHS was a icmp that can't be decomposed into an equality. + if (!ICmpInst::isEquality(LHSCC)) + return 0; + + Value *R1 = RHS->getOperand(0); + Value *R2 = RHS->getOperand(1); + Value *R11,*R12; + bool ok = false; + if (decomposeBitTestICmp(RHS, RHSCC, R11, R12, R2)) { + if (R11 == L11 || R11 == L12 || R11 == L21 || R11 == L22) { + A = R11; D = R12; + } else if (R12 == L11 || R12 == L12 || R12 == L21 || R12 == L22) { + A = R12; D = R11; + } else { + return 0; + } + E = R2; R1 = 0; ok = true; + } else if (R1->getType()->isIntegerTy()) { + if (!match(R1, m_And(m_Value(R11), m_Value(R12)))) { + // As before, model no mask as a trivial mask if it'll let us do an + // optimisation. + R11 = R1; + R12 = Constant::getAllOnesValue(R1->getType()); + } + + if (R11 == L11 || R11 == L12 || R11 == L21 || R11 == L22) { + A = R11; D = R12; E = R2; ok = true; + } else if (R12 == L11 || R12 == L12 || R12 == L21 || R12 == L22) { + A = R12; D = R11; E = R2; ok = true; + } + } + + // Bail if RHS was a icmp that can't be decomposed into an equality. + if (!ICmpInst::isEquality(RHSCC)) + return 0; + + // Look for ANDs in on the right side of the RHS icmp. + if (!ok && R2->getType()->isIntegerTy()) { + if (!match(R2, m_And(m_Value(R11), m_Value(R12)))) { + R11 = R2; + R12 = Constant::getAllOnesValue(R2->getType()); + } + + if (R11 == L11 || R11 == L12 || R11 == L21 || R11 == L22) { + A = R11; D = R12; E = R1; ok = true; + } else if (R12 == L11 || R12 == L12 || R12 == L21 || R12 == L22) { + A = R12; D = R11; E = R1; ok = true; + } else { + return 0; + } + } + if (!ok) + return 0; + + if (L11 == A) { + B = L12; C = L2; + } else if (L12 == A) { + B = L11; C = L2; + } else if (L21 == A) { + B = L22; C = L1; + } else if (L22 == A) { + B = L21; C = L1; + } + + unsigned left_type = getTypeOfMaskedICmp(A, B, C, LHSCC); + unsigned right_type = getTypeOfMaskedICmp(A, D, E, RHSCC); + return left_type & right_type; +} +/// foldLogOpOfMaskedICmps: +/// try to fold (icmp(A & B) ==/!= C) &/| (icmp(A & D) ==/!= E) +/// into a single (icmp(A & X) ==/!= Y) +static Value* foldLogOpOfMaskedICmps(ICmpInst *LHS, ICmpInst *RHS, bool IsAnd, + llvm::InstCombiner::BuilderTy* Builder) { + Value *A = 0, *B = 0, *C = 0, *D = 0, *E = 0; + ICmpInst::Predicate LHSCC = LHS->getPredicate(), RHSCC = RHS->getPredicate(); + unsigned mask = foldLogOpOfMaskedICmpsHelper(A, B, C, D, E, LHS, RHS, + LHSCC, RHSCC); + if (mask == 0) return 0; + assert(ICmpInst::isEquality(LHSCC) && ICmpInst::isEquality(RHSCC) && + "foldLogOpOfMaskedICmpsHelper must return an equality predicate."); + + // In full generality: + // (icmp (A & B) Op C) | (icmp (A & D) Op E) + // == ![ (icmp (A & B) !Op C) & (icmp (A & D) !Op E) ] + // + // If the latter can be converted into (icmp (A & X) Op Y) then the former is + // equivalent to (icmp (A & X) !Op Y). + // + // Therefore, we can pretend for the rest of this function that we're dealing + // with the conjunction, provided we flip the sense of any comparisons (both + // input and output). + + // In most cases we're going to produce an EQ for the "&&" case. + ICmpInst::Predicate NEWCC = IsAnd ? ICmpInst::ICMP_EQ : ICmpInst::ICMP_NE; + if (!IsAnd) { + // Convert the masking analysis into its equivalent with negated + // comparisons. + mask = conjugateICmpMask(mask); + } + + if (mask & FoldMskICmp_Mask_AllZeroes) { + // (icmp eq (A & B), 0) & (icmp eq (A & D), 0) + // -> (icmp eq (A & (B|D)), 0) + Value* newOr = Builder->CreateOr(B, D); + Value* newAnd = Builder->CreateAnd(A, newOr); + // we can't use C as zero, because we might actually handle + // (icmp ne (A & B), B) & (icmp ne (A & D), D) + // with B and D, having a single bit set + Value* zero = Constant::getNullValue(A->getType()); + return Builder->CreateICmp(NEWCC, newAnd, zero); + } + if (mask & FoldMskICmp_BMask_AllOnes) { + // (icmp eq (A & B), B) & (icmp eq (A & D), D) + // -> (icmp eq (A & (B|D)), (B|D)) + Value* newOr = Builder->CreateOr(B, D); + Value* newAnd = Builder->CreateAnd(A, newOr); + return Builder->CreateICmp(NEWCC, newAnd, newOr); + } + if (mask & FoldMskICmp_AMask_AllOnes) { + // (icmp eq (A & B), A) & (icmp eq (A & D), A) + // -> (icmp eq (A & (B&D)), A) + Value* newAnd1 = Builder->CreateAnd(B, D); + Value* newAnd = Builder->CreateAnd(A, newAnd1); + return Builder->CreateICmp(NEWCC, newAnd, A); + } + + // Remaining cases assume at least that B and D are constant, and depend on + // their actual values. This isn't strictly, necessary, just a "handle the + // easy cases for now" decision. + ConstantInt *BCst = dyn_cast<ConstantInt>(B); + if (BCst == 0) return 0; + ConstantInt *DCst = dyn_cast<ConstantInt>(D); + if (DCst == 0) return 0; + + if (mask & (FoldMskICmp_Mask_NotAllZeroes | FoldMskICmp_BMask_NotAllOnes)) { + // (icmp ne (A & B), 0) & (icmp ne (A & D), 0) and + // (icmp ne (A & B), B) & (icmp ne (A & D), D) + // -> (icmp ne (A & B), 0) or (icmp ne (A & D), 0) + // Only valid if one of the masks is a superset of the other (check "B&D" is + // the same as either B or D). + APInt NewMask = BCst->getValue() & DCst->getValue(); + + if (NewMask == BCst->getValue()) + return LHS; + else if (NewMask == DCst->getValue()) + return RHS; + } + if (mask & FoldMskICmp_AMask_NotAllOnes) { + // (icmp ne (A & B), B) & (icmp ne (A & D), D) + // -> (icmp ne (A & B), A) or (icmp ne (A & D), A) + // Only valid if one of the masks is a superset of the other (check "B|D" is + // the same as either B or D). + APInt NewMask = BCst->getValue() | DCst->getValue(); + + if (NewMask == BCst->getValue()) + return LHS; + else if (NewMask == DCst->getValue()) + return RHS; + } + if (mask & FoldMskICmp_BMask_Mixed) { + // (icmp eq (A & B), C) & (icmp eq (A & D), E) + // We already know that B & C == C && D & E == E. + // If we can prove that (B & D) & (C ^ E) == 0, that is, the bits of + // C and E, which are shared by both the mask B and the mask D, don't + // contradict, then we can transform to + // -> (icmp eq (A & (B|D)), (C|E)) + // Currently, we only handle the case of B, C, D, and E being constant. + // we can't simply use C and E, because we might actually handle + // (icmp ne (A & B), B) & (icmp eq (A & D), D) + // with B and D, having a single bit set + ConstantInt *CCst = dyn_cast<ConstantInt>(C); + if (CCst == 0) return 0; + if (LHSCC != NEWCC) + CCst = dyn_cast<ConstantInt>( ConstantExpr::getXor(BCst, CCst) ); + ConstantInt *ECst = dyn_cast<ConstantInt>(E); + if (ECst == 0) return 0; + if (RHSCC != NEWCC) + ECst = dyn_cast<ConstantInt>( ConstantExpr::getXor(DCst, ECst) ); + ConstantInt* MCst = dyn_cast<ConstantInt>( + ConstantExpr::getAnd(ConstantExpr::getAnd(BCst, DCst), + ConstantExpr::getXor(CCst, ECst)) ); + // if there is a conflict we should actually return a false for the + // whole construct + if (!MCst->isZero()) + return 0; + Value *newOr1 = Builder->CreateOr(B, D); + Value *newOr2 = ConstantExpr::getOr(CCst, ECst); + Value *newAnd = Builder->CreateAnd(A, newOr1); + return Builder->CreateICmp(NEWCC, newAnd, newOr2); + } + return 0; +} + +/// FoldAndOfICmps - Fold (icmp)&(icmp) if possible. +Value *InstCombiner::FoldAndOfICmps(ICmpInst *LHS, ICmpInst *RHS) { + ICmpInst::Predicate LHSCC = LHS->getPredicate(), RHSCC = RHS->getPredicate(); + + // (icmp1 A, B) & (icmp2 A, B) --> (icmp3 A, B) + if (PredicatesFoldable(LHSCC, RHSCC)) { + if (LHS->getOperand(0) == RHS->getOperand(1) && + LHS->getOperand(1) == RHS->getOperand(0)) + LHS->swapOperands(); + if (LHS->getOperand(0) == RHS->getOperand(0) && + LHS->getOperand(1) == RHS->getOperand(1)) { + Value *Op0 = LHS->getOperand(0), *Op1 = LHS->getOperand(1); + unsigned Code = getICmpCode(LHS) & getICmpCode(RHS); + bool isSigned = LHS->isSigned() || RHS->isSigned(); + return getNewICmpValue(isSigned, Code, Op0, Op1, Builder); + } + } + + // handle (roughly): (icmp eq (A & B), C) & (icmp eq (A & D), E) + if (Value *V = foldLogOpOfMaskedICmps(LHS, RHS, true, Builder)) + return V; + + // This only handles icmp of constants: (icmp1 A, C1) & (icmp2 B, C2). + Value *Val = LHS->getOperand(0), *Val2 = RHS->getOperand(0); + ConstantInt *LHSCst = dyn_cast<ConstantInt>(LHS->getOperand(1)); + ConstantInt *RHSCst = dyn_cast<ConstantInt>(RHS->getOperand(1)); + if (LHSCst == 0 || RHSCst == 0) return 0; + + if (LHSCst == RHSCst && LHSCC == RHSCC) { + // (icmp ult A, C) & (icmp ult B, C) --> (icmp ult (A|B), C) + // where C is a power of 2 + if (LHSCC == ICmpInst::ICMP_ULT && + LHSCst->getValue().isPowerOf2()) { + Value *NewOr = Builder->CreateOr(Val, Val2); + return Builder->CreateICmp(LHSCC, NewOr, LHSCst); + } + + // (icmp eq A, 0) & (icmp eq B, 0) --> (icmp eq (A|B), 0) + if (LHSCC == ICmpInst::ICMP_EQ && LHSCst->isZero()) { + Value *NewOr = Builder->CreateOr(Val, Val2); + return Builder->CreateICmp(LHSCC, NewOr, LHSCst); + } + } + + // (trunc x) == C1 & (and x, CA) == C2 -> (and x, CA|CMAX) == C1|C2 + // where CMAX is the all ones value for the truncated type, + // iff the lower bits of C2 and CA are zero. + if (LHSCC == ICmpInst::ICMP_EQ && LHSCC == RHSCC && + LHS->hasOneUse() && RHS->hasOneUse()) { + Value *V; + ConstantInt *AndCst, *SmallCst = 0, *BigCst = 0; + + // (trunc x) == C1 & (and x, CA) == C2 + // (and x, CA) == C2 & (trunc x) == C1 + if (match(Val2, m_Trunc(m_Value(V))) && + match(Val, m_And(m_Specific(V), m_ConstantInt(AndCst)))) { + SmallCst = RHSCst; + BigCst = LHSCst; + } else if (match(Val, m_Trunc(m_Value(V))) && + match(Val2, m_And(m_Specific(V), m_ConstantInt(AndCst)))) { + SmallCst = LHSCst; + BigCst = RHSCst; + } + + if (SmallCst && BigCst) { + unsigned BigBitSize = BigCst->getType()->getBitWidth(); + unsigned SmallBitSize = SmallCst->getType()->getBitWidth(); + + // Check that the low bits are zero. + APInt Low = APInt::getLowBitsSet(BigBitSize, SmallBitSize); + if ((Low & AndCst->getValue()) == 0 && (Low & BigCst->getValue()) == 0) { + Value *NewAnd = Builder->CreateAnd(V, Low | AndCst->getValue()); + APInt N = SmallCst->getValue().zext(BigBitSize) | BigCst->getValue(); + Value *NewVal = ConstantInt::get(AndCst->getType()->getContext(), N); + return Builder->CreateICmp(LHSCC, NewAnd, NewVal); + } + } + } + + // From here on, we only handle: + // (icmp1 A, C1) & (icmp2 A, C2) --> something simpler. + if (Val != Val2) return 0; + + // ICMP_[US][GL]E X, CST is folded to ICMP_[US][GL]T elsewhere. + if (LHSCC == ICmpInst::ICMP_UGE || LHSCC == ICmpInst::ICMP_ULE || + RHSCC == ICmpInst::ICMP_UGE || RHSCC == ICmpInst::ICMP_ULE || + LHSCC == ICmpInst::ICMP_SGE || LHSCC == ICmpInst::ICMP_SLE || + RHSCC == ICmpInst::ICMP_SGE || RHSCC == ICmpInst::ICMP_SLE) + return 0; + + // Make a constant range that's the intersection of the two icmp ranges. + // If the intersection is empty, we know that the result is false. + ConstantRange LHSRange = + ConstantRange::makeICmpRegion(LHSCC, LHSCst->getValue()); + ConstantRange RHSRange = + ConstantRange::makeICmpRegion(RHSCC, RHSCst->getValue()); + + if (LHSRange.intersectWith(RHSRange).isEmptySet()) + return ConstantInt::get(CmpInst::makeCmpResultType(LHS->getType()), 0); + + // We can't fold (ugt x, C) & (sgt x, C2). + if (!PredicatesFoldable(LHSCC, RHSCC)) + return 0; + + // Ensure that the larger constant is on the RHS. + bool ShouldSwap; + if (CmpInst::isSigned(LHSCC) || + (ICmpInst::isEquality(LHSCC) && + CmpInst::isSigned(RHSCC))) + ShouldSwap = LHSCst->getValue().sgt(RHSCst->getValue()); + else + ShouldSwap = LHSCst->getValue().ugt(RHSCst->getValue()); + + if (ShouldSwap) { + std::swap(LHS, RHS); + std::swap(LHSCst, RHSCst); + std::swap(LHSCC, RHSCC); + } + + // At this point, we know we have two icmp instructions + // comparing a value against two constants and and'ing the result + // together. Because of the above check, we know that we only have + // icmp eq, icmp ne, icmp [su]lt, and icmp [SU]gt here. We also know + // (from the icmp folding check above), that the two constants + // are not equal and that the larger constant is on the RHS + assert(LHSCst != RHSCst && "Compares not folded above?"); + + switch (LHSCC) { + default: llvm_unreachable("Unknown integer condition code!"); + case ICmpInst::ICMP_EQ: + switch (RHSCC) { + default: llvm_unreachable("Unknown integer condition code!"); + case ICmpInst::ICMP_NE: // (X == 13 & X != 15) -> X == 13 + case ICmpInst::ICMP_ULT: // (X == 13 & X < 15) -> X == 13 + case ICmpInst::ICMP_SLT: // (X == 13 & X < 15) -> X == 13 + return LHS; + } + case ICmpInst::ICMP_NE: + switch (RHSCC) { + default: llvm_unreachable("Unknown integer condition code!"); + case ICmpInst::ICMP_ULT: + if (LHSCst == SubOne(RHSCst)) // (X != 13 & X u< 14) -> X < 13 + return Builder->CreateICmpULT(Val, LHSCst); + break; // (X != 13 & X u< 15) -> no change + case ICmpInst::ICMP_SLT: + if (LHSCst == SubOne(RHSCst)) // (X != 13 & X s< 14) -> X < 13 + return Builder->CreateICmpSLT(Val, LHSCst); + break; // (X != 13 & X s< 15) -> no change + case ICmpInst::ICMP_EQ: // (X != 13 & X == 15) -> X == 15 + case ICmpInst::ICMP_UGT: // (X != 13 & X u> 15) -> X u> 15 + case ICmpInst::ICMP_SGT: // (X != 13 & X s> 15) -> X s> 15 + return RHS; + case ICmpInst::ICMP_NE: + // Special case to get the ordering right when the values wrap around + // zero. + if (LHSCst->getValue() == 0 && RHSCst->getValue().isAllOnesValue()) + std::swap(LHSCst, RHSCst); + if (LHSCst == SubOne(RHSCst)){// (X != 13 & X != 14) -> X-13 >u 1 + Constant *AddCST = ConstantExpr::getNeg(LHSCst); + Value *Add = Builder->CreateAdd(Val, AddCST, Val->getName()+".off"); + return Builder->CreateICmpUGT(Add, ConstantInt::get(Add->getType(), 1), + Val->getName()+".cmp"); + } + break; // (X != 13 & X != 15) -> no change + } + break; + case ICmpInst::ICMP_ULT: + switch (RHSCC) { + default: llvm_unreachable("Unknown integer condition code!"); + case ICmpInst::ICMP_EQ: // (X u< 13 & X == 15) -> false + case ICmpInst::ICMP_UGT: // (X u< 13 & X u> 15) -> false + return ConstantInt::get(CmpInst::makeCmpResultType(LHS->getType()), 0); + case ICmpInst::ICMP_SGT: // (X u< 13 & X s> 15) -> no change + break; + case ICmpInst::ICMP_NE: // (X u< 13 & X != 15) -> X u< 13 + case ICmpInst::ICMP_ULT: // (X u< 13 & X u< 15) -> X u< 13 + return LHS; + case ICmpInst::ICMP_SLT: // (X u< 13 & X s< 15) -> no change + break; + } + break; + case ICmpInst::ICMP_SLT: + switch (RHSCC) { + default: llvm_unreachable("Unknown integer condition code!"); + case ICmpInst::ICMP_UGT: // (X s< 13 & X u> 15) -> no change + break; + case ICmpInst::ICMP_NE: // (X s< 13 & X != 15) -> X < 13 + case ICmpInst::ICMP_SLT: // (X s< 13 & X s< 15) -> X < 13 + return LHS; + case ICmpInst::ICMP_ULT: // (X s< 13 & X u< 15) -> no change + break; + } + break; + case ICmpInst::ICMP_UGT: + switch (RHSCC) { + default: llvm_unreachable("Unknown integer condition code!"); + case ICmpInst::ICMP_EQ: // (X u> 13 & X == 15) -> X == 15 + case ICmpInst::ICMP_UGT: // (X u> 13 & X u> 15) -> X u> 15 + return RHS; + case ICmpInst::ICMP_SGT: // (X u> 13 & X s> 15) -> no change + break; + case ICmpInst::ICMP_NE: + if (RHSCst == AddOne(LHSCst)) // (X u> 13 & X != 14) -> X u> 14 + return Builder->CreateICmp(LHSCC, Val, RHSCst); + break; // (X u> 13 & X != 15) -> no change + case ICmpInst::ICMP_ULT: // (X u> 13 & X u< 15) -> (X-14) <u 1 + return InsertRangeTest(Val, AddOne(LHSCst), RHSCst, false, true); + case ICmpInst::ICMP_SLT: // (X u> 13 & X s< 15) -> no change + break; + } + break; + case ICmpInst::ICMP_SGT: + switch (RHSCC) { + default: llvm_unreachable("Unknown integer condition code!"); + case ICmpInst::ICMP_EQ: // (X s> 13 & X == 15) -> X == 15 + case ICmpInst::ICMP_SGT: // (X s> 13 & X s> 15) -> X s> 15 + return RHS; + case ICmpInst::ICMP_UGT: // (X s> 13 & X u> 15) -> no change + break; + case ICmpInst::ICMP_NE: + if (RHSCst == AddOne(LHSCst)) // (X s> 13 & X != 14) -> X s> 14 + return Builder->CreateICmp(LHSCC, Val, RHSCst); + break; // (X s> 13 & X != 15) -> no change + case ICmpInst::ICMP_SLT: // (X s> 13 & X s< 15) -> (X-14) s< 1 + return InsertRangeTest(Val, AddOne(LHSCst), RHSCst, true, true); + case ICmpInst::ICMP_ULT: // (X s> 13 & X u< 15) -> no change + break; + } + break; + } + + return 0; +} + +/// FoldAndOfFCmps - Optimize (fcmp)&(fcmp). NOTE: Unlike the rest of +/// instcombine, this returns a Value which should already be inserted into the +/// function. +Value *InstCombiner::FoldAndOfFCmps(FCmpInst *LHS, FCmpInst *RHS) { + if (LHS->getPredicate() == FCmpInst::FCMP_ORD && + RHS->getPredicate() == FCmpInst::FCMP_ORD) { + if (LHS->getOperand(0)->getType() != RHS->getOperand(0)->getType()) + return 0; + + // (fcmp ord x, c) & (fcmp ord y, c) -> (fcmp ord x, y) + if (ConstantFP *LHSC = dyn_cast<ConstantFP>(LHS->getOperand(1))) + if (ConstantFP *RHSC = dyn_cast<ConstantFP>(RHS->getOperand(1))) { + // If either of the constants are nans, then the whole thing returns + // false. + if (LHSC->getValueAPF().isNaN() || RHSC->getValueAPF().isNaN()) + return Builder->getFalse(); + return Builder->CreateFCmpORD(LHS->getOperand(0), RHS->getOperand(0)); + } + + // Handle vector zeros. This occurs because the canonical form of + // "fcmp ord x,x" is "fcmp ord x, 0". + if (isa<ConstantAggregateZero>(LHS->getOperand(1)) && + isa<ConstantAggregateZero>(RHS->getOperand(1))) + return Builder->CreateFCmpORD(LHS->getOperand(0), RHS->getOperand(0)); + return 0; + } + + Value *Op0LHS = LHS->getOperand(0), *Op0RHS = LHS->getOperand(1); + Value *Op1LHS = RHS->getOperand(0), *Op1RHS = RHS->getOperand(1); + FCmpInst::Predicate Op0CC = LHS->getPredicate(), Op1CC = RHS->getPredicate(); + + + if (Op0LHS == Op1RHS && Op0RHS == Op1LHS) { + // Swap RHS operands to match LHS. + Op1CC = FCmpInst::getSwappedPredicate(Op1CC); + std::swap(Op1LHS, Op1RHS); + } + + if (Op0LHS == Op1LHS && Op0RHS == Op1RHS) { + // Simplify (fcmp cc0 x, y) & (fcmp cc1 x, y). + if (Op0CC == Op1CC) + return Builder->CreateFCmp((FCmpInst::Predicate)Op0CC, Op0LHS, Op0RHS); + if (Op0CC == FCmpInst::FCMP_FALSE || Op1CC == FCmpInst::FCMP_FALSE) + return ConstantInt::get(CmpInst::makeCmpResultType(LHS->getType()), 0); + if (Op0CC == FCmpInst::FCMP_TRUE) + return RHS; + if (Op1CC == FCmpInst::FCMP_TRUE) + return LHS; + + bool Op0Ordered; + bool Op1Ordered; + unsigned Op0Pred = getFCmpCode(Op0CC, Op0Ordered); + unsigned Op1Pred = getFCmpCode(Op1CC, Op1Ordered); + // uno && ord -> false + if (Op0Pred == 0 && Op1Pred == 0 && Op0Ordered != Op1Ordered) + return ConstantInt::get(CmpInst::makeCmpResultType(LHS->getType()), 0); + if (Op1Pred == 0) { + std::swap(LHS, RHS); + std::swap(Op0Pred, Op1Pred); + std::swap(Op0Ordered, Op1Ordered); + } + if (Op0Pred == 0) { + // uno && ueq -> uno && (uno || eq) -> uno + // ord && olt -> ord && (ord && lt) -> olt + if (!Op0Ordered && (Op0Ordered == Op1Ordered)) + return LHS; + if (Op0Ordered && (Op0Ordered == Op1Ordered)) + return RHS; + + // uno && oeq -> uno && (ord && eq) -> false + if (!Op0Ordered) + return ConstantInt::get(CmpInst::makeCmpResultType(LHS->getType()), 0); + // ord && ueq -> ord && (uno || eq) -> oeq + return getFCmpValue(true, Op1Pred, Op0LHS, Op0RHS, Builder); + } + } + + return 0; +} + + +Instruction *InstCombiner::visitAnd(BinaryOperator &I) { + bool Changed = SimplifyAssociativeOrCommutative(I); + Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); + + if (Value *V = SimplifyAndInst(Op0, Op1, TD)) + return ReplaceInstUsesWith(I, V); + + // (A|B)&(A|C) -> A|(B&C) etc + if (Value *V = SimplifyUsingDistributiveLaws(I)) + return ReplaceInstUsesWith(I, V); + + // See if we can simplify any instructions used by the instruction whose sole + // purpose is to compute bits we don't care about. + if (SimplifyDemandedInstructionBits(I)) + return &I; + + if (ConstantInt *AndRHS = dyn_cast<ConstantInt>(Op1)) { + const APInt &AndRHSMask = AndRHS->getValue(); + + // Optimize a variety of ((val OP C1) & C2) combinations... + if (BinaryOperator *Op0I = dyn_cast<BinaryOperator>(Op0)) { + Value *Op0LHS = Op0I->getOperand(0); + Value *Op0RHS = Op0I->getOperand(1); + switch (Op0I->getOpcode()) { + default: break; + case Instruction::Xor: + case Instruction::Or: { + // If the mask is only needed on one incoming arm, push it up. + if (!Op0I->hasOneUse()) break; + + APInt NotAndRHS(~AndRHSMask); + if (MaskedValueIsZero(Op0LHS, NotAndRHS)) { + // Not masking anything out for the LHS, move to RHS. + Value *NewRHS = Builder->CreateAnd(Op0RHS, AndRHS, + Op0RHS->getName()+".masked"); + return BinaryOperator::Create(Op0I->getOpcode(), Op0LHS, NewRHS); + } + if (!isa<Constant>(Op0RHS) && + MaskedValueIsZero(Op0RHS, NotAndRHS)) { + // Not masking anything out for the RHS, move to LHS. + Value *NewLHS = Builder->CreateAnd(Op0LHS, AndRHS, + Op0LHS->getName()+".masked"); + return BinaryOperator::Create(Op0I->getOpcode(), NewLHS, Op0RHS); + } + + break; + } + case Instruction::Add: + // ((A & N) + B) & AndRHS -> (A + B) & AndRHS iff N&AndRHS == AndRHS. + // ((A | N) + B) & AndRHS -> (A + B) & AndRHS iff N&AndRHS == 0 + // ((A ^ N) + B) & AndRHS -> (A + B) & AndRHS iff N&AndRHS == 0 + if (Value *V = FoldLogicalPlusAnd(Op0LHS, Op0RHS, AndRHS, false, I)) + return BinaryOperator::CreateAnd(V, AndRHS); + if (Value *V = FoldLogicalPlusAnd(Op0RHS, Op0LHS, AndRHS, false, I)) + return BinaryOperator::CreateAnd(V, AndRHS); // Add commutes + break; + + case Instruction::Sub: + // ((A & N) - B) & AndRHS -> (A - B) & AndRHS iff N&AndRHS == AndRHS. + // ((A | N) - B) & AndRHS -> (A - B) & AndRHS iff N&AndRHS == 0 + // ((A ^ N) - B) & AndRHS -> (A - B) & AndRHS iff N&AndRHS == 0 + if (Value *V = FoldLogicalPlusAnd(Op0LHS, Op0RHS, AndRHS, true, I)) + return BinaryOperator::CreateAnd(V, AndRHS); + + // (A - N) & AndRHS -> -N & AndRHS iff A&AndRHS==0 and AndRHS + // has 1's for all bits that the subtraction with A might affect. + if (Op0I->hasOneUse() && !match(Op0LHS, m_Zero())) { + uint32_t BitWidth = AndRHSMask.getBitWidth(); + uint32_t Zeros = AndRHSMask.countLeadingZeros(); + APInt Mask = APInt::getLowBitsSet(BitWidth, BitWidth - Zeros); + + if (MaskedValueIsZero(Op0LHS, Mask)) { + Value *NewNeg = Builder->CreateNeg(Op0RHS); + return BinaryOperator::CreateAnd(NewNeg, AndRHS); + } + } + break; + + case Instruction::Shl: + case Instruction::LShr: + // (1 << x) & 1 --> zext(x == 0) + // (1 >> x) & 1 --> zext(x == 0) + if (AndRHSMask == 1 && Op0LHS == AndRHS) { + Value *NewICmp = + Builder->CreateICmpEQ(Op0RHS, Constant::getNullValue(I.getType())); + return new ZExtInst(NewICmp, I.getType()); + } + break; + } + + if (ConstantInt *Op0CI = dyn_cast<ConstantInt>(Op0I->getOperand(1))) + if (Instruction *Res = OptAndOp(Op0I, Op0CI, AndRHS, I)) + return Res; + } + + // If this is an integer truncation, and if the source is an 'and' with + // immediate, transform it. This frequently occurs for bitfield accesses. + { + Value *X = 0; ConstantInt *YC = 0; + if (match(Op0, m_Trunc(m_And(m_Value(X), m_ConstantInt(YC))))) { + // Change: and (trunc (and X, YC) to T), C2 + // into : and (trunc X to T), trunc(YC) & C2 + // This will fold the two constants together, which may allow + // other simplifications. + Value *NewCast = Builder->CreateTrunc(X, I.getType(), "and.shrunk"); + Constant *C3 = ConstantExpr::getTrunc(YC, I.getType()); + C3 = ConstantExpr::getAnd(C3, AndRHS); + return BinaryOperator::CreateAnd(NewCast, C3); + } + } + + // Try to fold constant and into select arguments. + if (SelectInst *SI = dyn_cast<SelectInst>(Op0)) + if (Instruction *R = FoldOpIntoSelect(I, SI)) + return R; + if (isa<PHINode>(Op0)) + if (Instruction *NV = FoldOpIntoPhi(I)) + return NV; + } + + + // (~A & ~B) == (~(A | B)) - De Morgan's Law + if (Value *Op0NotVal = dyn_castNotVal(Op0)) + if (Value *Op1NotVal = dyn_castNotVal(Op1)) + if (Op0->hasOneUse() && Op1->hasOneUse()) { + Value *Or = Builder->CreateOr(Op0NotVal, Op1NotVal, + I.getName()+".demorgan"); + return BinaryOperator::CreateNot(Or); + } + + { + Value *A = 0, *B = 0, *C = 0, *D = 0; + // (A|B) & ~(A&B) -> A^B + if (match(Op0, m_Or(m_Value(A), m_Value(B))) && + match(Op1, m_Not(m_And(m_Value(C), m_Value(D)))) && + ((A == C && B == D) || (A == D && B == C))) + return BinaryOperator::CreateXor(A, B); + + // ~(A&B) & (A|B) -> A^B + if (match(Op1, m_Or(m_Value(A), m_Value(B))) && + match(Op0, m_Not(m_And(m_Value(C), m_Value(D)))) && + ((A == C && B == D) || (A == D && B == C))) + return BinaryOperator::CreateXor(A, B); + + // A&(A^B) => A & ~B + { + Value *tmpOp0 = Op0; + Value *tmpOp1 = Op1; + if (Op0->hasOneUse() && + match(Op0, m_Xor(m_Value(A), m_Value(B)))) { + if (A == Op1 || B == Op1 ) { + tmpOp1 = Op0; + tmpOp0 = Op1; + // Simplify below + } + } + + if (tmpOp1->hasOneUse() && + match(tmpOp1, m_Xor(m_Value(A), m_Value(B)))) { + if (B == tmpOp0) { + std::swap(A, B); + } + // Notice that the patten (A&(~B)) is actually (A&(-1^B)), so if + // A is originally -1 (or a vector of -1 and undefs), then we enter + // an endless loop. By checking that A is non-constant we ensure that + // we will never get to the loop. + if (A == tmpOp0 && !isa<Constant>(A)) // A&(A^B) -> A & ~B + return BinaryOperator::CreateAnd(A, Builder->CreateNot(B)); + } + } + + // (A&((~A)|B)) -> A&B + if (match(Op0, m_Or(m_Not(m_Specific(Op1)), m_Value(A))) || + match(Op0, m_Or(m_Value(A), m_Not(m_Specific(Op1))))) + return BinaryOperator::CreateAnd(A, Op1); + if (match(Op1, m_Or(m_Not(m_Specific(Op0)), m_Value(A))) || + match(Op1, m_Or(m_Value(A), m_Not(m_Specific(Op0))))) + return BinaryOperator::CreateAnd(A, Op0); + } + + if (ICmpInst *RHS = dyn_cast<ICmpInst>(Op1)) + if (ICmpInst *LHS = dyn_cast<ICmpInst>(Op0)) + if (Value *Res = FoldAndOfICmps(LHS, RHS)) + return ReplaceInstUsesWith(I, Res); + + // If and'ing two fcmp, try combine them into one. + if (FCmpInst *LHS = dyn_cast<FCmpInst>(I.getOperand(0))) + if (FCmpInst *RHS = dyn_cast<FCmpInst>(I.getOperand(1))) + if (Value *Res = FoldAndOfFCmps(LHS, RHS)) + return ReplaceInstUsesWith(I, Res); + + + // fold (and (cast A), (cast B)) -> (cast (and A, B)) + if (CastInst *Op0C = dyn_cast<CastInst>(Op0)) + if (CastInst *Op1C = dyn_cast<CastInst>(Op1)) { + Type *SrcTy = Op0C->getOperand(0)->getType(); + if (Op0C->getOpcode() == Op1C->getOpcode() && // same cast kind ? + SrcTy == Op1C->getOperand(0)->getType() && + SrcTy->isIntOrIntVectorTy()) { + Value *Op0COp = Op0C->getOperand(0), *Op1COp = Op1C->getOperand(0); + + // Only do this if the casts both really cause code to be generated. + if (ShouldOptimizeCast(Op0C->getOpcode(), Op0COp, I.getType()) && + ShouldOptimizeCast(Op1C->getOpcode(), Op1COp, I.getType())) { + Value *NewOp = Builder->CreateAnd(Op0COp, Op1COp, I.getName()); + return CastInst::Create(Op0C->getOpcode(), NewOp, I.getType()); + } + + // If this is and(cast(icmp), cast(icmp)), try to fold this even if the + // cast is otherwise not optimizable. This happens for vector sexts. + if (ICmpInst *RHS = dyn_cast<ICmpInst>(Op1COp)) + if (ICmpInst *LHS = dyn_cast<ICmpInst>(Op0COp)) + if (Value *Res = FoldAndOfICmps(LHS, RHS)) + return CastInst::Create(Op0C->getOpcode(), Res, I.getType()); + + // If this is and(cast(fcmp), cast(fcmp)), try to fold this even if the + // cast is otherwise not optimizable. This happens for vector sexts. + if (FCmpInst *RHS = dyn_cast<FCmpInst>(Op1COp)) + if (FCmpInst *LHS = dyn_cast<FCmpInst>(Op0COp)) + if (Value *Res = FoldAndOfFCmps(LHS, RHS)) + return CastInst::Create(Op0C->getOpcode(), Res, I.getType()); + } + } + + // (X >> Z) & (Y >> Z) -> (X&Y) >> Z for all shifts. + if (BinaryOperator *SI1 = dyn_cast<BinaryOperator>(Op1)) { + if (BinaryOperator *SI0 = dyn_cast<BinaryOperator>(Op0)) + if (SI0->isShift() && SI0->getOpcode() == SI1->getOpcode() && + SI0->getOperand(1) == SI1->getOperand(1) && + (SI0->hasOneUse() || SI1->hasOneUse())) { + Value *NewOp = + Builder->CreateAnd(SI0->getOperand(0), SI1->getOperand(0), + SI0->getName()); + return BinaryOperator::Create(SI1->getOpcode(), NewOp, + SI1->getOperand(1)); + } + } + + { + Value *X = 0; + bool OpsSwapped = false; + // Canonicalize SExt or Not to the LHS + if (match(Op1, m_SExt(m_Value())) || + match(Op1, m_Not(m_Value()))) { + std::swap(Op0, Op1); + OpsSwapped = true; + } + + // Fold (and (sext bool to A), B) --> (select bool, B, 0) + if (match(Op0, m_SExt(m_Value(X))) && + X->getType()->getScalarType()->isIntegerTy(1)) { + Value *Zero = Constant::getNullValue(Op1->getType()); + return SelectInst::Create(X, Op1, Zero); + } + + // Fold (and ~(sext bool to A), B) --> (select bool, 0, B) + if (match(Op0, m_Not(m_SExt(m_Value(X)))) && + X->getType()->getScalarType()->isIntegerTy(1)) { + Value *Zero = Constant::getNullValue(Op0->getType()); + return SelectInst::Create(X, Zero, Op1); + } + + if (OpsSwapped) + std::swap(Op0, Op1); + } + + return Changed ? &I : 0; +} + +/// CollectBSwapParts - Analyze the specified subexpression and see if it is +/// capable of providing pieces of a bswap. The subexpression provides pieces +/// of a bswap if it is proven that each of the non-zero bytes in the output of +/// the expression came from the corresponding "byte swapped" byte in some other +/// value. For example, if the current subexpression is "(shl i32 %X, 24)" then +/// we know that the expression deposits the low byte of %X into the high byte +/// of the bswap result and that all other bytes are zero. This expression is +/// accepted, the high byte of ByteValues is set to X to indicate a correct +/// match. +/// +/// This function returns true if the match was unsuccessful and false if so. +/// On entry to the function the "OverallLeftShift" is a signed integer value +/// indicating the number of bytes that the subexpression is later shifted. For +/// example, if the expression is later right shifted by 16 bits, the +/// OverallLeftShift value would be -2 on entry. This is used to specify which +/// byte of ByteValues is actually being set. +/// +/// Similarly, ByteMask is a bitmask where a bit is clear if its corresponding +/// byte is masked to zero by a user. For example, in (X & 255), X will be +/// processed with a bytemask of 1. Because bytemask is 32-bits, this limits +/// this function to working on up to 32-byte (256 bit) values. ByteMask is +/// always in the local (OverallLeftShift) coordinate space. +/// +static bool CollectBSwapParts(Value *V, int OverallLeftShift, uint32_t ByteMask, + SmallVectorImpl<Value *> &ByteValues) { + if (Instruction *I = dyn_cast<Instruction>(V)) { + // If this is an or instruction, it may be an inner node of the bswap. + if (I->getOpcode() == Instruction::Or) { + return CollectBSwapParts(I->getOperand(0), OverallLeftShift, ByteMask, + ByteValues) || + CollectBSwapParts(I->getOperand(1), OverallLeftShift, ByteMask, + ByteValues); + } + + // If this is a logical shift by a constant multiple of 8, recurse with + // OverallLeftShift and ByteMask adjusted. + if (I->isLogicalShift() && isa<ConstantInt>(I->getOperand(1))) { + unsigned ShAmt = + cast<ConstantInt>(I->getOperand(1))->getLimitedValue(~0U); + // Ensure the shift amount is defined and of a byte value. + if ((ShAmt & 7) || (ShAmt > 8*ByteValues.size())) + return true; + + unsigned ByteShift = ShAmt >> 3; + if (I->getOpcode() == Instruction::Shl) { + // X << 2 -> collect(X, +2) + OverallLeftShift += ByteShift; + ByteMask >>= ByteShift; + } else { + // X >>u 2 -> collect(X, -2) + OverallLeftShift -= ByteShift; + ByteMask <<= ByteShift; + ByteMask &= (~0U >> (32-ByteValues.size())); + } + + if (OverallLeftShift >= (int)ByteValues.size()) return true; + if (OverallLeftShift <= -(int)ByteValues.size()) return true; + + return CollectBSwapParts(I->getOperand(0), OverallLeftShift, ByteMask, + ByteValues); + } + + // If this is a logical 'and' with a mask that clears bytes, clear the + // corresponding bytes in ByteMask. + if (I->getOpcode() == Instruction::And && + isa<ConstantInt>(I->getOperand(1))) { + // Scan every byte of the and mask, seeing if the byte is either 0 or 255. + unsigned NumBytes = ByteValues.size(); + APInt Byte(I->getType()->getPrimitiveSizeInBits(), 255); + const APInt &AndMask = cast<ConstantInt>(I->getOperand(1))->getValue(); + + for (unsigned i = 0; i != NumBytes; ++i, Byte <<= 8) { + // If this byte is masked out by a later operation, we don't care what + // the and mask is. + if ((ByteMask & (1 << i)) == 0) + continue; + + // If the AndMask is all zeros for this byte, clear the bit. + APInt MaskB = AndMask & Byte; + if (MaskB == 0) { + ByteMask &= ~(1U << i); + continue; + } + + // If the AndMask is not all ones for this byte, it's not a bytezap. + if (MaskB != Byte) + return true; + + // Otherwise, this byte is kept. + } + + return CollectBSwapParts(I->getOperand(0), OverallLeftShift, ByteMask, + ByteValues); + } + } + + // Okay, we got to something that isn't a shift, 'or' or 'and'. This must be + // the input value to the bswap. Some observations: 1) if more than one byte + // is demanded from this input, then it could not be successfully assembled + // into a byteswap. At least one of the two bytes would not be aligned with + // their ultimate destination. + if (!isPowerOf2_32(ByteMask)) return true; + unsigned InputByteNo = countTrailingZeros(ByteMask); + + // 2) The input and ultimate destinations must line up: if byte 3 of an i32 + // is demanded, it needs to go into byte 0 of the result. This means that the + // byte needs to be shifted until it lands in the right byte bucket. The + // shift amount depends on the position: if the byte is coming from the high + // part of the value (e.g. byte 3) then it must be shifted right. If from the + // low part, it must be shifted left. + unsigned DestByteNo = InputByteNo + OverallLeftShift; + if (ByteValues.size()-1-DestByteNo != InputByteNo) + return true; + + // If the destination byte value is already defined, the values are or'd + // together, which isn't a bswap (unless it's an or of the same bits). + if (ByteValues[DestByteNo] && ByteValues[DestByteNo] != V) + return true; + ByteValues[DestByteNo] = V; + return false; +} + +/// MatchBSwap - Given an OR instruction, check to see if this is a bswap idiom. +/// If so, insert the new bswap intrinsic and return it. +Instruction *InstCombiner::MatchBSwap(BinaryOperator &I) { + IntegerType *ITy = dyn_cast<IntegerType>(I.getType()); + if (!ITy || ITy->getBitWidth() % 16 || + // ByteMask only allows up to 32-byte values. + ITy->getBitWidth() > 32*8) + return 0; // Can only bswap pairs of bytes. Can't do vectors. + + /// ByteValues - For each byte of the result, we keep track of which value + /// defines each byte. + SmallVector<Value*, 8> ByteValues; + ByteValues.resize(ITy->getBitWidth()/8); + + // Try to find all the pieces corresponding to the bswap. + uint32_t ByteMask = ~0U >> (32-ByteValues.size()); + if (CollectBSwapParts(&I, 0, ByteMask, ByteValues)) + return 0; + + // Check to see if all of the bytes come from the same value. + Value *V = ByteValues[0]; + if (V == 0) return 0; // Didn't find a byte? Must be zero. + + // Check to make sure that all of the bytes come from the same value. + for (unsigned i = 1, e = ByteValues.size(); i != e; ++i) + if (ByteValues[i] != V) + return 0; + Module *M = I.getParent()->getParent()->getParent(); + Function *F = Intrinsic::getDeclaration(M, Intrinsic::bswap, ITy); + return CallInst::Create(F, V); +} + +/// MatchSelectFromAndOr - We have an expression of the form (A&C)|(B&D). Check +/// If A is (cond?-1:0) and either B or D is ~(cond?-1,0) or (cond?0,-1), then +/// we can simplify this expression to "cond ? C : D or B". +static Instruction *MatchSelectFromAndOr(Value *A, Value *B, + Value *C, Value *D) { + // If A is not a select of -1/0, this cannot match. + Value *Cond = 0; + if (!match(A, m_SExt(m_Value(Cond))) || + !Cond->getType()->isIntegerTy(1)) + return 0; + + // ((cond?-1:0)&C) | (B&(cond?0:-1)) -> cond ? C : B. + if (match(D, m_Not(m_SExt(m_Specific(Cond))))) + return SelectInst::Create(Cond, C, B); + if (match(D, m_SExt(m_Not(m_Specific(Cond))))) + return SelectInst::Create(Cond, C, B); + + // ((cond?-1:0)&C) | ((cond?0:-1)&D) -> cond ? C : D. + if (match(B, m_Not(m_SExt(m_Specific(Cond))))) + return SelectInst::Create(Cond, C, D); + if (match(B, m_SExt(m_Not(m_Specific(Cond))))) + return SelectInst::Create(Cond, C, D); + return 0; +} + +/// IsOneHotValue - Returns true for "one-hot" values (values where at most +/// one bit can be set). +static bool IsOneHotValue(Value *V) { + // Match 1<<K. + if (BinaryOperator *BO = dyn_cast<BinaryOperator>(V)) + if (BO->getOpcode() == Instruction::Shl) { + ConstantInt *One = dyn_cast<ConstantInt>(BO->getOperand(0)); + return One && One->isOne(); + } + + // Check for power of two integer constants. + if (ConstantInt *K = dyn_cast<ConstantInt>(V)) + return K->getValue().isPowerOf2(); + + return false; +} + +/// FoldOrOfICmps - Fold (icmp)|(icmp) if possible. +Value *InstCombiner::FoldOrOfICmps(ICmpInst *LHS, ICmpInst *RHS) { + ICmpInst::Predicate LHSCC = LHS->getPredicate(), RHSCC = RHS->getPredicate(); + + // Fold (iszero(A & K1) | iszero(A & K2)) -> (A & (K1 | K2)) != (K1 | K2) + // if K1 and K2 are a one-bit mask. + ConstantInt *LHSCst = dyn_cast<ConstantInt>(LHS->getOperand(1)); + ConstantInt *RHSCst = dyn_cast<ConstantInt>(RHS->getOperand(1)); + + if (LHS->getPredicate() == ICmpInst::ICMP_EQ && LHSCst && LHSCst->isZero() && + RHS->getPredicate() == ICmpInst::ICMP_EQ && RHSCst && RHSCst->isZero()) { + + BinaryOperator *LAnd = dyn_cast<BinaryOperator>(LHS->getOperand(0)); + BinaryOperator *RAnd = dyn_cast<BinaryOperator>(RHS->getOperand(0)); + if (LAnd && RAnd && LAnd->hasOneUse() && RHS->hasOneUse() && + LAnd->getOpcode() == Instruction::And && + RAnd->getOpcode() == Instruction::And) { + + Value *Mask = 0; + Value *Masked = 0; + if (LAnd->getOperand(0) == RAnd->getOperand(0) && + IsOneHotValue(LAnd->getOperand(1)) && + IsOneHotValue(RAnd->getOperand(1))) { + Mask = Builder->CreateOr(LAnd->getOperand(1), RAnd->getOperand(1)); + Masked = Builder->CreateAnd(LAnd->getOperand(0), Mask); + } else if (LAnd->getOperand(1) == RAnd->getOperand(1) && + IsOneHotValue(LAnd->getOperand(0)) && + IsOneHotValue(RAnd->getOperand(0))) { + Mask = Builder->CreateOr(LAnd->getOperand(0), RAnd->getOperand(0)); + Masked = Builder->CreateAnd(LAnd->getOperand(1), Mask); + } + + if (Masked) + return Builder->CreateICmp(ICmpInst::ICMP_NE, Masked, Mask); + } + } + + // (icmp1 A, B) | (icmp2 A, B) --> (icmp3 A, B) + if (PredicatesFoldable(LHSCC, RHSCC)) { + if (LHS->getOperand(0) == RHS->getOperand(1) && + LHS->getOperand(1) == RHS->getOperand(0)) + LHS->swapOperands(); + if (LHS->getOperand(0) == RHS->getOperand(0) && + LHS->getOperand(1) == RHS->getOperand(1)) { + Value *Op0 = LHS->getOperand(0), *Op1 = LHS->getOperand(1); + unsigned Code = getICmpCode(LHS) | getICmpCode(RHS); + bool isSigned = LHS->isSigned() || RHS->isSigned(); + return getNewICmpValue(isSigned, Code, Op0, Op1, Builder); + } + } + + // handle (roughly): + // (icmp ne (A & B), C) | (icmp ne (A & D), E) + if (Value *V = foldLogOpOfMaskedICmps(LHS, RHS, false, Builder)) + return V; + + Value *Val = LHS->getOperand(0), *Val2 = RHS->getOperand(0); + if (LHS->hasOneUse() || RHS->hasOneUse()) { + // (icmp eq B, 0) | (icmp ult A, B) -> (icmp ule A, B-1) + // (icmp eq B, 0) | (icmp ugt B, A) -> (icmp ule A, B-1) + Value *A = 0, *B = 0; + if (LHSCC == ICmpInst::ICMP_EQ && LHSCst && LHSCst->isZero()) { + B = Val; + if (RHSCC == ICmpInst::ICMP_ULT && Val == RHS->getOperand(1)) + A = Val2; + else if (RHSCC == ICmpInst::ICMP_UGT && Val == Val2) + A = RHS->getOperand(1); + } + // (icmp ult A, B) | (icmp eq B, 0) -> (icmp ule A, B-1) + // (icmp ugt B, A) | (icmp eq B, 0) -> (icmp ule A, B-1) + else if (RHSCC == ICmpInst::ICMP_EQ && RHSCst && RHSCst->isZero()) { + B = Val2; + if (LHSCC == ICmpInst::ICMP_ULT && Val2 == LHS->getOperand(1)) + A = Val; + else if (LHSCC == ICmpInst::ICMP_UGT && Val2 == Val) + A = LHS->getOperand(1); + } + if (A && B) + return Builder->CreateICmp( + ICmpInst::ICMP_UGE, + Builder->CreateAdd(B, ConstantInt::getSigned(B->getType(), -1)), A); + } + + // This only handles icmp of constants: (icmp1 A, C1) | (icmp2 B, C2). + if (LHSCst == 0 || RHSCst == 0) return 0; + + if (LHSCst == RHSCst && LHSCC == RHSCC) { + // (icmp ne A, 0) | (icmp ne B, 0) --> (icmp ne (A|B), 0) + if (LHSCC == ICmpInst::ICMP_NE && LHSCst->isZero()) { + Value *NewOr = Builder->CreateOr(Val, Val2); + return Builder->CreateICmp(LHSCC, NewOr, LHSCst); + } + } + + // (icmp ult (X + CA), C1) | (icmp eq X, C2) -> (icmp ule (X + CA), C1) + // iff C2 + CA == C1. + if (LHSCC == ICmpInst::ICMP_ULT && RHSCC == ICmpInst::ICMP_EQ) { + ConstantInt *AddCst; + if (match(Val, m_Add(m_Specific(Val2), m_ConstantInt(AddCst)))) + if (RHSCst->getValue() + AddCst->getValue() == LHSCst->getValue()) + return Builder->CreateICmpULE(Val, LHSCst); + } + + // From here on, we only handle: + // (icmp1 A, C1) | (icmp2 A, C2) --> something simpler. + if (Val != Val2) return 0; + + // ICMP_[US][GL]E X, CST is folded to ICMP_[US][GL]T elsewhere. + if (LHSCC == ICmpInst::ICMP_UGE || LHSCC == ICmpInst::ICMP_ULE || + RHSCC == ICmpInst::ICMP_UGE || RHSCC == ICmpInst::ICMP_ULE || + LHSCC == ICmpInst::ICMP_SGE || LHSCC == ICmpInst::ICMP_SLE || + RHSCC == ICmpInst::ICMP_SGE || RHSCC == ICmpInst::ICMP_SLE) + return 0; + + // We can't fold (ugt x, C) | (sgt x, C2). + if (!PredicatesFoldable(LHSCC, RHSCC)) + return 0; + + // Ensure that the larger constant is on the RHS. + bool ShouldSwap; + if (CmpInst::isSigned(LHSCC) || + (ICmpInst::isEquality(LHSCC) && + CmpInst::isSigned(RHSCC))) + ShouldSwap = LHSCst->getValue().sgt(RHSCst->getValue()); + else + ShouldSwap = LHSCst->getValue().ugt(RHSCst->getValue()); + + if (ShouldSwap) { + std::swap(LHS, RHS); + std::swap(LHSCst, RHSCst); + std::swap(LHSCC, RHSCC); + } + + // At this point, we know we have two icmp instructions + // comparing a value against two constants and or'ing the result + // together. Because of the above check, we know that we only have + // ICMP_EQ, ICMP_NE, ICMP_LT, and ICMP_GT here. We also know (from the + // icmp folding check above), that the two constants are not + // equal. + assert(LHSCst != RHSCst && "Compares not folded above?"); + + switch (LHSCC) { + default: llvm_unreachable("Unknown integer condition code!"); + case ICmpInst::ICMP_EQ: + switch (RHSCC) { + default: llvm_unreachable("Unknown integer condition code!"); + case ICmpInst::ICMP_EQ: + if (LHS->getOperand(0) == RHS->getOperand(0)) { + // if LHSCst and RHSCst differ only by one bit: + // (A == C1 || A == C2) -> (A & ~(C1 ^ C2)) == C1 + assert(LHSCst->getValue().ule(LHSCst->getValue())); + + APInt Xor = LHSCst->getValue() ^ RHSCst->getValue(); + if (Xor.isPowerOf2()) { + Value *NegCst = Builder->getInt(~Xor); + Value *And = Builder->CreateAnd(LHS->getOperand(0), NegCst); + return Builder->CreateICmp(ICmpInst::ICMP_EQ, And, LHSCst); + } + } + + if (LHSCst == SubOne(RHSCst)) { + // (X == 13 | X == 14) -> X-13 <u 2 + Constant *AddCST = ConstantExpr::getNeg(LHSCst); + Value *Add = Builder->CreateAdd(Val, AddCST, Val->getName()+".off"); + AddCST = ConstantExpr::getSub(AddOne(RHSCst), LHSCst); + return Builder->CreateICmpULT(Add, AddCST); + } + + break; // (X == 13 | X == 15) -> no change + case ICmpInst::ICMP_UGT: // (X == 13 | X u> 14) -> no change + case ICmpInst::ICMP_SGT: // (X == 13 | X s> 14) -> no change + break; + case ICmpInst::ICMP_NE: // (X == 13 | X != 15) -> X != 15 + case ICmpInst::ICMP_ULT: // (X == 13 | X u< 15) -> X u< 15 + case ICmpInst::ICMP_SLT: // (X == 13 | X s< 15) -> X s< 15 + return RHS; + } + break; + case ICmpInst::ICMP_NE: + switch (RHSCC) { + default: llvm_unreachable("Unknown integer condition code!"); + case ICmpInst::ICMP_EQ: // (X != 13 | X == 15) -> X != 13 + case ICmpInst::ICMP_UGT: // (X != 13 | X u> 15) -> X != 13 + case ICmpInst::ICMP_SGT: // (X != 13 | X s> 15) -> X != 13 + return LHS; + case ICmpInst::ICMP_NE: // (X != 13 | X != 15) -> true + case ICmpInst::ICMP_ULT: // (X != 13 | X u< 15) -> true + case ICmpInst::ICMP_SLT: // (X != 13 | X s< 15) -> true + return Builder->getTrue(); + } + case ICmpInst::ICMP_ULT: + switch (RHSCC) { + default: llvm_unreachable("Unknown integer condition code!"); + case ICmpInst::ICMP_EQ: // (X u< 13 | X == 14) -> no change + break; + case ICmpInst::ICMP_UGT: // (X u< 13 | X u> 15) -> (X-13) u> 2 + // If RHSCst is [us]MAXINT, it is always false. Not handling + // this can cause overflow. + if (RHSCst->isMaxValue(false)) + return LHS; + return InsertRangeTest(Val, LHSCst, AddOne(RHSCst), false, false); + case ICmpInst::ICMP_SGT: // (X u< 13 | X s> 15) -> no change + break; + case ICmpInst::ICMP_NE: // (X u< 13 | X != 15) -> X != 15 + case ICmpInst::ICMP_ULT: // (X u< 13 | X u< 15) -> X u< 15 + return RHS; + case ICmpInst::ICMP_SLT: // (X u< 13 | X s< 15) -> no change + break; + } + break; + case ICmpInst::ICMP_SLT: + switch (RHSCC) { + default: llvm_unreachable("Unknown integer condition code!"); + case ICmpInst::ICMP_EQ: // (X s< 13 | X == 14) -> no change + break; + case ICmpInst::ICMP_SGT: // (X s< 13 | X s> 15) -> (X-13) s> 2 + // If RHSCst is [us]MAXINT, it is always false. Not handling + // this can cause overflow. + if (RHSCst->isMaxValue(true)) + return LHS; + return InsertRangeTest(Val, LHSCst, AddOne(RHSCst), true, false); + case ICmpInst::ICMP_UGT: // (X s< 13 | X u> 15) -> no change + break; + case ICmpInst::ICMP_NE: // (X s< 13 | X != 15) -> X != 15 + case ICmpInst::ICMP_SLT: // (X s< 13 | X s< 15) -> X s< 15 + return RHS; + case ICmpInst::ICMP_ULT: // (X s< 13 | X u< 15) -> no change + break; + } + break; + case ICmpInst::ICMP_UGT: + switch (RHSCC) { + default: llvm_unreachable("Unknown integer condition code!"); + case ICmpInst::ICMP_EQ: // (X u> 13 | X == 15) -> X u> 13 + case ICmpInst::ICMP_UGT: // (X u> 13 | X u> 15) -> X u> 13 + return LHS; + case ICmpInst::ICMP_SGT: // (X u> 13 | X s> 15) -> no change + break; + case ICmpInst::ICMP_NE: // (X u> 13 | X != 15) -> true + case ICmpInst::ICMP_ULT: // (X u> 13 | X u< 15) -> true + return Builder->getTrue(); + case ICmpInst::ICMP_SLT: // (X u> 13 | X s< 15) -> no change + break; + } + break; + case ICmpInst::ICMP_SGT: + switch (RHSCC) { + default: llvm_unreachable("Unknown integer condition code!"); + case ICmpInst::ICMP_EQ: // (X s> 13 | X == 15) -> X > 13 + case ICmpInst::ICMP_SGT: // (X s> 13 | X s> 15) -> X > 13 + return LHS; + case ICmpInst::ICMP_UGT: // (X s> 13 | X u> 15) -> no change + break; + case ICmpInst::ICMP_NE: // (X s> 13 | X != 15) -> true + case ICmpInst::ICMP_SLT: // (X s> 13 | X s< 15) -> true + return Builder->getTrue(); + case ICmpInst::ICMP_ULT: // (X s> 13 | X u< 15) -> no change + break; + } + break; + } + return 0; +} + +/// FoldOrOfFCmps - Optimize (fcmp)|(fcmp). NOTE: Unlike the rest of +/// instcombine, this returns a Value which should already be inserted into the +/// function. +Value *InstCombiner::FoldOrOfFCmps(FCmpInst *LHS, FCmpInst *RHS) { + if (LHS->getPredicate() == FCmpInst::FCMP_UNO && + RHS->getPredicate() == FCmpInst::FCMP_UNO && + LHS->getOperand(0)->getType() == RHS->getOperand(0)->getType()) { + if (ConstantFP *LHSC = dyn_cast<ConstantFP>(LHS->getOperand(1))) + if (ConstantFP *RHSC = dyn_cast<ConstantFP>(RHS->getOperand(1))) { + // If either of the constants are nans, then the whole thing returns + // true. + if (LHSC->getValueAPF().isNaN() || RHSC->getValueAPF().isNaN()) + return Builder->getTrue(); + + // Otherwise, no need to compare the two constants, compare the + // rest. + return Builder->CreateFCmpUNO(LHS->getOperand(0), RHS->getOperand(0)); + } + + // Handle vector zeros. This occurs because the canonical form of + // "fcmp uno x,x" is "fcmp uno x, 0". + if (isa<ConstantAggregateZero>(LHS->getOperand(1)) && + isa<ConstantAggregateZero>(RHS->getOperand(1))) + return Builder->CreateFCmpUNO(LHS->getOperand(0), RHS->getOperand(0)); + + return 0; + } + + Value *Op0LHS = LHS->getOperand(0), *Op0RHS = LHS->getOperand(1); + Value *Op1LHS = RHS->getOperand(0), *Op1RHS = RHS->getOperand(1); + FCmpInst::Predicate Op0CC = LHS->getPredicate(), Op1CC = RHS->getPredicate(); + + if (Op0LHS == Op1RHS && Op0RHS == Op1LHS) { + // Swap RHS operands to match LHS. + Op1CC = FCmpInst::getSwappedPredicate(Op1CC); + std::swap(Op1LHS, Op1RHS); + } + if (Op0LHS == Op1LHS && Op0RHS == Op1RHS) { + // Simplify (fcmp cc0 x, y) | (fcmp cc1 x, y). + if (Op0CC == Op1CC) + return Builder->CreateFCmp((FCmpInst::Predicate)Op0CC, Op0LHS, Op0RHS); + if (Op0CC == FCmpInst::FCMP_TRUE || Op1CC == FCmpInst::FCMP_TRUE) + return ConstantInt::get(CmpInst::makeCmpResultType(LHS->getType()), 1); + if (Op0CC == FCmpInst::FCMP_FALSE) + return RHS; + if (Op1CC == FCmpInst::FCMP_FALSE) + return LHS; + bool Op0Ordered; + bool Op1Ordered; + unsigned Op0Pred = getFCmpCode(Op0CC, Op0Ordered); + unsigned Op1Pred = getFCmpCode(Op1CC, Op1Ordered); + if (Op0Ordered == Op1Ordered) { + // If both are ordered or unordered, return a new fcmp with + // or'ed predicates. + return getFCmpValue(Op0Ordered, Op0Pred|Op1Pred, Op0LHS, Op0RHS, Builder); + } + } + return 0; +} + +/// FoldOrWithConstants - This helper function folds: +/// +/// ((A | B) & C1) | (B & C2) +/// +/// into: +/// +/// (A & C1) | B +/// +/// when the XOR of the two constants is "all ones" (-1). +Instruction *InstCombiner::FoldOrWithConstants(BinaryOperator &I, Value *Op, + Value *A, Value *B, Value *C) { + ConstantInt *CI1 = dyn_cast<ConstantInt>(C); + if (!CI1) return 0; + + Value *V1 = 0; + ConstantInt *CI2 = 0; + if (!match(Op, m_And(m_Value(V1), m_ConstantInt(CI2)))) return 0; + + APInt Xor = CI1->getValue() ^ CI2->getValue(); + if (!Xor.isAllOnesValue()) return 0; + + if (V1 == A || V1 == B) { + Value *NewOp = Builder->CreateAnd((V1 == A) ? B : A, CI1); + return BinaryOperator::CreateOr(NewOp, V1); + } + + return 0; +} + +Instruction *InstCombiner::visitOr(BinaryOperator &I) { + bool Changed = SimplifyAssociativeOrCommutative(I); + Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); + + if (Value *V = SimplifyOrInst(Op0, Op1, TD)) + return ReplaceInstUsesWith(I, V); + + // (A&B)|(A&C) -> A&(B|C) etc + if (Value *V = SimplifyUsingDistributiveLaws(I)) + return ReplaceInstUsesWith(I, V); + + // See if we can simplify any instructions used by the instruction whose sole + // purpose is to compute bits we don't care about. + if (SimplifyDemandedInstructionBits(I)) + return &I; + + if (ConstantInt *RHS = dyn_cast<ConstantInt>(Op1)) { + ConstantInt *C1 = 0; Value *X = 0; + // (X & C1) | C2 --> (X | C2) & (C1|C2) + // iff (C1 & C2) == 0. + if (match(Op0, m_And(m_Value(X), m_ConstantInt(C1))) && + (RHS->getValue() & C1->getValue()) != 0 && + Op0->hasOneUse()) { + Value *Or = Builder->CreateOr(X, RHS); + Or->takeName(Op0); + return BinaryOperator::CreateAnd(Or, + Builder->getInt(RHS->getValue() | C1->getValue())); + } + + // (X ^ C1) | C2 --> (X | C2) ^ (C1&~C2) + if (match(Op0, m_Xor(m_Value(X), m_ConstantInt(C1))) && + Op0->hasOneUse()) { + Value *Or = Builder->CreateOr(X, RHS); + Or->takeName(Op0); + return BinaryOperator::CreateXor(Or, + Builder->getInt(C1->getValue() & ~RHS->getValue())); + } + + // Try to fold constant and into select arguments. + if (SelectInst *SI = dyn_cast<SelectInst>(Op0)) + if (Instruction *R = FoldOpIntoSelect(I, SI)) + return R; + + if (isa<PHINode>(Op0)) + if (Instruction *NV = FoldOpIntoPhi(I)) + return NV; + } + + Value *A = 0, *B = 0; + ConstantInt *C1 = 0, *C2 = 0; + + // (A | B) | C and A | (B | C) -> bswap if possible. + // (A >> B) | (C << D) and (A << B) | (B >> C) -> bswap if possible. + if (match(Op0, m_Or(m_Value(), m_Value())) || + match(Op1, m_Or(m_Value(), m_Value())) || + (match(Op0, m_LogicalShift(m_Value(), m_Value())) && + match(Op1, m_LogicalShift(m_Value(), m_Value())))) { + if (Instruction *BSwap = MatchBSwap(I)) + return BSwap; + } + + // (X^C)|Y -> (X|Y)^C iff Y&C == 0 + if (Op0->hasOneUse() && + match(Op0, m_Xor(m_Value(A), m_ConstantInt(C1))) && + MaskedValueIsZero(Op1, C1->getValue())) { + Value *NOr = Builder->CreateOr(A, Op1); + NOr->takeName(Op0); + return BinaryOperator::CreateXor(NOr, C1); + } + + // Y|(X^C) -> (X|Y)^C iff Y&C == 0 + if (Op1->hasOneUse() && + match(Op1, m_Xor(m_Value(A), m_ConstantInt(C1))) && + MaskedValueIsZero(Op0, C1->getValue())) { + Value *NOr = Builder->CreateOr(A, Op0); + NOr->takeName(Op0); + return BinaryOperator::CreateXor(NOr, C1); + } + + // (A & C)|(B & D) + Value *C = 0, *D = 0; + if (match(Op0, m_And(m_Value(A), m_Value(C))) && + match(Op1, m_And(m_Value(B), m_Value(D)))) { + Value *V1 = 0, *V2 = 0; + C1 = dyn_cast<ConstantInt>(C); + C2 = dyn_cast<ConstantInt>(D); + if (C1 && C2) { // (A & C1)|(B & C2) + // If we have: ((V + N) & C1) | (V & C2) + // .. and C2 = ~C1 and C2 is 0+1+ and (N & C2) == 0 + // replace with V+N. + if (C1->getValue() == ~C2->getValue()) { + if ((C2->getValue() & (C2->getValue()+1)) == 0 && // C2 == 0+1+ + match(A, m_Add(m_Value(V1), m_Value(V2)))) { + // Add commutes, try both ways. + if (V1 == B && MaskedValueIsZero(V2, C2->getValue())) + return ReplaceInstUsesWith(I, A); + if (V2 == B && MaskedValueIsZero(V1, C2->getValue())) + return ReplaceInstUsesWith(I, A); + } + // Or commutes, try both ways. + if ((C1->getValue() & (C1->getValue()+1)) == 0 && + match(B, m_Add(m_Value(V1), m_Value(V2)))) { + // Add commutes, try both ways. + if (V1 == A && MaskedValueIsZero(V2, C1->getValue())) + return ReplaceInstUsesWith(I, B); + if (V2 == A && MaskedValueIsZero(V1, C1->getValue())) + return ReplaceInstUsesWith(I, B); + } + } + + if ((C1->getValue() & C2->getValue()) == 0) { + // ((V | N) & C1) | (V & C2) --> (V|N) & (C1|C2) + // iff (C1&C2) == 0 and (N&~C1) == 0 + if (match(A, m_Or(m_Value(V1), m_Value(V2))) && + ((V1 == B && MaskedValueIsZero(V2, ~C1->getValue())) || // (V|N) + (V2 == B && MaskedValueIsZero(V1, ~C1->getValue())))) // (N|V) + return BinaryOperator::CreateAnd(A, + Builder->getInt(C1->getValue()|C2->getValue())); + // Or commutes, try both ways. + if (match(B, m_Or(m_Value(V1), m_Value(V2))) && + ((V1 == A && MaskedValueIsZero(V2, ~C2->getValue())) || // (V|N) + (V2 == A && MaskedValueIsZero(V1, ~C2->getValue())))) // (N|V) + return BinaryOperator::CreateAnd(B, + Builder->getInt(C1->getValue()|C2->getValue())); + + // ((V|C3)&C1) | ((V|C4)&C2) --> (V|C3|C4)&(C1|C2) + // iff (C1&C2) == 0 and (C3&~C1) == 0 and (C4&~C2) == 0. + ConstantInt *C3 = 0, *C4 = 0; + if (match(A, m_Or(m_Value(V1), m_ConstantInt(C3))) && + (C3->getValue() & ~C1->getValue()) == 0 && + match(B, m_Or(m_Specific(V1), m_ConstantInt(C4))) && + (C4->getValue() & ~C2->getValue()) == 0) { + V2 = Builder->CreateOr(V1, ConstantExpr::getOr(C3, C4), "bitfield"); + return BinaryOperator::CreateAnd(V2, + Builder->getInt(C1->getValue()|C2->getValue())); + } + } + } + + // (A & (C0?-1:0)) | (B & ~(C0?-1:0)) -> C0 ? A : B, and commuted variants. + // Don't do this for vector select idioms, the code generator doesn't handle + // them well yet. + if (!I.getType()->isVectorTy()) { + if (Instruction *Match = MatchSelectFromAndOr(A, B, C, D)) + return Match; + if (Instruction *Match = MatchSelectFromAndOr(B, A, D, C)) + return Match; + if (Instruction *Match = MatchSelectFromAndOr(C, B, A, D)) + return Match; + if (Instruction *Match = MatchSelectFromAndOr(D, A, B, C)) + return Match; + } + + // ((A&~B)|(~A&B)) -> A^B + if ((match(C, m_Not(m_Specific(D))) && + match(B, m_Not(m_Specific(A))))) + return BinaryOperator::CreateXor(A, D); + // ((~B&A)|(~A&B)) -> A^B + if ((match(A, m_Not(m_Specific(D))) && + match(B, m_Not(m_Specific(C))))) + return BinaryOperator::CreateXor(C, D); + // ((A&~B)|(B&~A)) -> A^B + if ((match(C, m_Not(m_Specific(B))) && + match(D, m_Not(m_Specific(A))))) + return BinaryOperator::CreateXor(A, B); + // ((~B&A)|(B&~A)) -> A^B + if ((match(A, m_Not(m_Specific(B))) && + match(D, m_Not(m_Specific(C))))) + return BinaryOperator::CreateXor(C, B); + + // ((A|B)&1)|(B&-2) -> (A&1) | B + if (match(A, m_Or(m_Value(V1), m_Specific(B))) || + match(A, m_Or(m_Specific(B), m_Value(V1)))) { + Instruction *Ret = FoldOrWithConstants(I, Op1, V1, B, C); + if (Ret) return Ret; + } + // (B&-2)|((A|B)&1) -> (A&1) | B + if (match(B, m_Or(m_Specific(A), m_Value(V1))) || + match(B, m_Or(m_Value(V1), m_Specific(A)))) { + Instruction *Ret = FoldOrWithConstants(I, Op0, A, V1, D); + if (Ret) return Ret; + } + } + + // (X >> Z) | (Y >> Z) -> (X|Y) >> Z for all shifts. + if (BinaryOperator *SI1 = dyn_cast<BinaryOperator>(Op1)) { + if (BinaryOperator *SI0 = dyn_cast<BinaryOperator>(Op0)) + if (SI0->isShift() && SI0->getOpcode() == SI1->getOpcode() && + SI0->getOperand(1) == SI1->getOperand(1) && + (SI0->hasOneUse() || SI1->hasOneUse())) { + Value *NewOp = Builder->CreateOr(SI0->getOperand(0), SI1->getOperand(0), + SI0->getName()); + return BinaryOperator::Create(SI1->getOpcode(), NewOp, + SI1->getOperand(1)); + } + } + + // (~A | ~B) == (~(A & B)) - De Morgan's Law + if (Value *Op0NotVal = dyn_castNotVal(Op0)) + if (Value *Op1NotVal = dyn_castNotVal(Op1)) + if (Op0->hasOneUse() && Op1->hasOneUse()) { + Value *And = Builder->CreateAnd(Op0NotVal, Op1NotVal, + I.getName()+".demorgan"); + return BinaryOperator::CreateNot(And); + } + + // Canonicalize xor to the RHS. + bool SwappedForXor = false; + if (match(Op0, m_Xor(m_Value(), m_Value()))) { + std::swap(Op0, Op1); + SwappedForXor = true; + } + + // A | ( A ^ B) -> A | B + // A | (~A ^ B) -> A | ~B + // (A & B) | (A ^ B) + if (match(Op1, m_Xor(m_Value(A), m_Value(B)))) { + if (Op0 == A || Op0 == B) + return BinaryOperator::CreateOr(A, B); + + if (match(Op0, m_And(m_Specific(A), m_Specific(B))) || + match(Op0, m_And(m_Specific(B), m_Specific(A)))) + return BinaryOperator::CreateOr(A, B); + + if (Op1->hasOneUse() && match(A, m_Not(m_Specific(Op0)))) { + Value *Not = Builder->CreateNot(B, B->getName()+".not"); + return BinaryOperator::CreateOr(Not, Op0); + } + if (Op1->hasOneUse() && match(B, m_Not(m_Specific(Op0)))) { + Value *Not = Builder->CreateNot(A, A->getName()+".not"); + return BinaryOperator::CreateOr(Not, Op0); + } + } + + // A | ~(A | B) -> A | ~B + // A | ~(A ^ B) -> A | ~B + if (match(Op1, m_Not(m_Value(A)))) + if (BinaryOperator *B = dyn_cast<BinaryOperator>(A)) + if ((Op0 == B->getOperand(0) || Op0 == B->getOperand(1)) && + Op1->hasOneUse() && (B->getOpcode() == Instruction::Or || + B->getOpcode() == Instruction::Xor)) { + Value *NotOp = Op0 == B->getOperand(0) ? B->getOperand(1) : + B->getOperand(0); + Value *Not = Builder->CreateNot(NotOp, NotOp->getName()+".not"); + return BinaryOperator::CreateOr(Not, Op0); + } + + if (SwappedForXor) + std::swap(Op0, Op1); + + if (ICmpInst *RHS = dyn_cast<ICmpInst>(I.getOperand(1))) + if (ICmpInst *LHS = dyn_cast<ICmpInst>(I.getOperand(0))) + if (Value *Res = FoldOrOfICmps(LHS, RHS)) + return ReplaceInstUsesWith(I, Res); + + // (fcmp uno x, c) | (fcmp uno y, c) -> (fcmp uno x, y) + if (FCmpInst *LHS = dyn_cast<FCmpInst>(I.getOperand(0))) + if (FCmpInst *RHS = dyn_cast<FCmpInst>(I.getOperand(1))) + if (Value *Res = FoldOrOfFCmps(LHS, RHS)) + return ReplaceInstUsesWith(I, Res); + + // fold (or (cast A), (cast B)) -> (cast (or A, B)) + if (CastInst *Op0C = dyn_cast<CastInst>(Op0)) { + CastInst *Op1C = dyn_cast<CastInst>(Op1); + if (Op1C && Op0C->getOpcode() == Op1C->getOpcode()) {// same cast kind ? + Type *SrcTy = Op0C->getOperand(0)->getType(); + if (SrcTy == Op1C->getOperand(0)->getType() && + SrcTy->isIntOrIntVectorTy()) { + Value *Op0COp = Op0C->getOperand(0), *Op1COp = Op1C->getOperand(0); + + if ((!isa<ICmpInst>(Op0COp) || !isa<ICmpInst>(Op1COp)) && + // Only do this if the casts both really cause code to be + // generated. + ShouldOptimizeCast(Op0C->getOpcode(), Op0COp, I.getType()) && + ShouldOptimizeCast(Op1C->getOpcode(), Op1COp, I.getType())) { + Value *NewOp = Builder->CreateOr(Op0COp, Op1COp, I.getName()); + return CastInst::Create(Op0C->getOpcode(), NewOp, I.getType()); + } + + // If this is or(cast(icmp), cast(icmp)), try to fold this even if the + // cast is otherwise not optimizable. This happens for vector sexts. + if (ICmpInst *RHS = dyn_cast<ICmpInst>(Op1COp)) + if (ICmpInst *LHS = dyn_cast<ICmpInst>(Op0COp)) + if (Value *Res = FoldOrOfICmps(LHS, RHS)) + return CastInst::Create(Op0C->getOpcode(), Res, I.getType()); + + // If this is or(cast(fcmp), cast(fcmp)), try to fold this even if the + // cast is otherwise not optimizable. This happens for vector sexts. + if (FCmpInst *RHS = dyn_cast<FCmpInst>(Op1COp)) + if (FCmpInst *LHS = dyn_cast<FCmpInst>(Op0COp)) + if (Value *Res = FoldOrOfFCmps(LHS, RHS)) + return CastInst::Create(Op0C->getOpcode(), Res, I.getType()); + } + } + } + + // or(sext(A), B) -> A ? -1 : B where A is an i1 + // or(A, sext(B)) -> B ? -1 : A where B is an i1 + if (match(Op0, m_SExt(m_Value(A))) && A->getType()->isIntegerTy(1)) + return SelectInst::Create(A, ConstantInt::getSigned(I.getType(), -1), Op1); + if (match(Op1, m_SExt(m_Value(A))) && A->getType()->isIntegerTy(1)) + return SelectInst::Create(A, ConstantInt::getSigned(I.getType(), -1), Op0); + + // Note: If we've gotten to the point of visiting the outer OR, then the + // inner one couldn't be simplified. If it was a constant, then it won't + // be simplified by a later pass either, so we try swapping the inner/outer + // ORs in the hopes that we'll be able to simplify it this way. + // (X|C) | V --> (X|V) | C + if (Op0->hasOneUse() && !isa<ConstantInt>(Op1) && + match(Op0, m_Or(m_Value(A), m_ConstantInt(C1)))) { + Value *Inner = Builder->CreateOr(A, Op1); + Inner->takeName(Op0); + return BinaryOperator::CreateOr(Inner, C1); + } + + // Change (or (bool?A:B),(bool?C:D)) --> (bool?(or A,C):(or B,D)) + // Since this OR statement hasn't been optimized further yet, we hope + // that this transformation will allow the new ORs to be optimized. + { + Value *X = 0, *Y = 0; + if (Op0->hasOneUse() && Op1->hasOneUse() && + match(Op0, m_Select(m_Value(X), m_Value(A), m_Value(B))) && + match(Op1, m_Select(m_Value(Y), m_Value(C), m_Value(D))) && X == Y) { + Value *orTrue = Builder->CreateOr(A, C); + Value *orFalse = Builder->CreateOr(B, D); + return SelectInst::Create(X, orTrue, orFalse); + } + } + + return Changed ? &I : 0; +} + +Instruction *InstCombiner::visitXor(BinaryOperator &I) { + bool Changed = SimplifyAssociativeOrCommutative(I); + Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); + + if (Value *V = SimplifyXorInst(Op0, Op1, TD)) + return ReplaceInstUsesWith(I, V); + + // (A&B)^(A&C) -> A&(B^C) etc + if (Value *V = SimplifyUsingDistributiveLaws(I)) + return ReplaceInstUsesWith(I, V); + + // See if we can simplify any instructions used by the instruction whose sole + // purpose is to compute bits we don't care about. + if (SimplifyDemandedInstructionBits(I)) + return &I; + + // Is this a ~ operation? + if (Value *NotOp = dyn_castNotVal(&I)) { + if (BinaryOperator *Op0I = dyn_cast<BinaryOperator>(NotOp)) { + if (Op0I->getOpcode() == Instruction::And || + Op0I->getOpcode() == Instruction::Or) { + // ~(~X & Y) --> (X | ~Y) - De Morgan's Law + // ~(~X | Y) === (X & ~Y) - De Morgan's Law + if (dyn_castNotVal(Op0I->getOperand(1))) + Op0I->swapOperands(); + if (Value *Op0NotVal = dyn_castNotVal(Op0I->getOperand(0))) { + Value *NotY = + Builder->CreateNot(Op0I->getOperand(1), + Op0I->getOperand(1)->getName()+".not"); + if (Op0I->getOpcode() == Instruction::And) + return BinaryOperator::CreateOr(Op0NotVal, NotY); + return BinaryOperator::CreateAnd(Op0NotVal, NotY); + } + + // ~(X & Y) --> (~X | ~Y) - De Morgan's Law + // ~(X | Y) === (~X & ~Y) - De Morgan's Law + if (isFreeToInvert(Op0I->getOperand(0)) && + isFreeToInvert(Op0I->getOperand(1))) { + Value *NotX = + Builder->CreateNot(Op0I->getOperand(0), "notlhs"); + Value *NotY = + Builder->CreateNot(Op0I->getOperand(1), "notrhs"); + if (Op0I->getOpcode() == Instruction::And) + return BinaryOperator::CreateOr(NotX, NotY); + return BinaryOperator::CreateAnd(NotX, NotY); + } + + } else if (Op0I->getOpcode() == Instruction::AShr) { + // ~(~X >>s Y) --> (X >>s Y) + if (Value *Op0NotVal = dyn_castNotVal(Op0I->getOperand(0))) + return BinaryOperator::CreateAShr(Op0NotVal, Op0I->getOperand(1)); + } + } + } + + + if (ConstantInt *RHS = dyn_cast<ConstantInt>(Op1)) { + if (RHS->isOne() && Op0->hasOneUse()) + // xor (cmp A, B), true = not (cmp A, B) = !cmp A, B + if (CmpInst *CI = dyn_cast<CmpInst>(Op0)) + return CmpInst::Create(CI->getOpcode(), + CI->getInversePredicate(), + CI->getOperand(0), CI->getOperand(1)); + + // fold (xor(zext(cmp)), 1) and (xor(sext(cmp)), -1) to ext(!cmp). + if (CastInst *Op0C = dyn_cast<CastInst>(Op0)) { + if (CmpInst *CI = dyn_cast<CmpInst>(Op0C->getOperand(0))) { + if (CI->hasOneUse() && Op0C->hasOneUse()) { + Instruction::CastOps Opcode = Op0C->getOpcode(); + if ((Opcode == Instruction::ZExt || Opcode == Instruction::SExt) && + (RHS == ConstantExpr::getCast(Opcode, Builder->getTrue(), + Op0C->getDestTy()))) { + CI->setPredicate(CI->getInversePredicate()); + return CastInst::Create(Opcode, CI, Op0C->getType()); + } + } + } + } + + if (BinaryOperator *Op0I = dyn_cast<BinaryOperator>(Op0)) { + // ~(c-X) == X-c-1 == X+(-c-1) + if (Op0I->getOpcode() == Instruction::Sub && RHS->isAllOnesValue()) + if (Constant *Op0I0C = dyn_cast<Constant>(Op0I->getOperand(0))) { + Constant *NegOp0I0C = ConstantExpr::getNeg(Op0I0C); + Constant *ConstantRHS = ConstantExpr::getSub(NegOp0I0C, + ConstantInt::get(I.getType(), 1)); + return BinaryOperator::CreateAdd(Op0I->getOperand(1), ConstantRHS); + } + + if (ConstantInt *Op0CI = dyn_cast<ConstantInt>(Op0I->getOperand(1))) { + if (Op0I->getOpcode() == Instruction::Add) { + // ~(X-c) --> (-c-1)-X + if (RHS->isAllOnesValue()) { + Constant *NegOp0CI = ConstantExpr::getNeg(Op0CI); + return BinaryOperator::CreateSub( + ConstantExpr::getSub(NegOp0CI, + ConstantInt::get(I.getType(), 1)), + Op0I->getOperand(0)); + } else if (RHS->getValue().isSignBit()) { + // (X + C) ^ signbit -> (X + C + signbit) + Constant *C = Builder->getInt(RHS->getValue() + Op0CI->getValue()); + return BinaryOperator::CreateAdd(Op0I->getOperand(0), C); + + } + } else if (Op0I->getOpcode() == Instruction::Or) { + // (X|C1)^C2 -> X^(C1|C2) iff X&~C1 == 0 + if (MaskedValueIsZero(Op0I->getOperand(0), Op0CI->getValue())) { + Constant *NewRHS = ConstantExpr::getOr(Op0CI, RHS); + // Anything in both C1 and C2 is known to be zero, remove it from + // NewRHS. + Constant *CommonBits = ConstantExpr::getAnd(Op0CI, RHS); + NewRHS = ConstantExpr::getAnd(NewRHS, + ConstantExpr::getNot(CommonBits)); + Worklist.Add(Op0I); + I.setOperand(0, Op0I->getOperand(0)); + I.setOperand(1, NewRHS); + return &I; + } + } else if (Op0I->getOpcode() == Instruction::LShr) { + // ((X^C1) >> C2) ^ C3 -> (X>>C2) ^ ((C1>>C2)^C3) + // E1 = "X ^ C1" + BinaryOperator *E1; + ConstantInt *C1; + if (Op0I->hasOneUse() && + (E1 = dyn_cast<BinaryOperator>(Op0I->getOperand(0))) && + E1->getOpcode() == Instruction::Xor && + (C1 = dyn_cast<ConstantInt>(E1->getOperand(1)))) { + // fold (C1 >> C2) ^ C3 + ConstantInt *C2 = Op0CI, *C3 = RHS; + APInt FoldConst = C1->getValue().lshr(C2->getValue()); + FoldConst ^= C3->getValue(); + // Prepare the two operands. + Value *Opnd0 = Builder->CreateLShr(E1->getOperand(0), C2); + Opnd0->takeName(Op0I); + cast<Instruction>(Opnd0)->setDebugLoc(I.getDebugLoc()); + Value *FoldVal = ConstantInt::get(Opnd0->getType(), FoldConst); + + return BinaryOperator::CreateXor(Opnd0, FoldVal); + } + } + } + } + + // Try to fold constant and into select arguments. + if (SelectInst *SI = dyn_cast<SelectInst>(Op0)) + if (Instruction *R = FoldOpIntoSelect(I, SI)) + return R; + if (isa<PHINode>(Op0)) + if (Instruction *NV = FoldOpIntoPhi(I)) + return NV; + } + + BinaryOperator *Op1I = dyn_cast<BinaryOperator>(Op1); + if (Op1I) { + Value *A, *B; + if (match(Op1I, m_Or(m_Value(A), m_Value(B)))) { + if (A == Op0) { // B^(B|A) == (A|B)^B + Op1I->swapOperands(); + I.swapOperands(); + std::swap(Op0, Op1); + } else if (B == Op0) { // B^(A|B) == (A|B)^B + I.swapOperands(); // Simplified below. + std::swap(Op0, Op1); + } + } else if (match(Op1I, m_And(m_Value(A), m_Value(B))) && + Op1I->hasOneUse()){ + if (A == Op0) { // A^(A&B) -> A^(B&A) + Op1I->swapOperands(); + std::swap(A, B); + } + if (B == Op0) { // A^(B&A) -> (B&A)^A + I.swapOperands(); // Simplified below. + std::swap(Op0, Op1); + } + } + } + + BinaryOperator *Op0I = dyn_cast<BinaryOperator>(Op0); + if (Op0I) { + Value *A, *B; + if (match(Op0I, m_Or(m_Value(A), m_Value(B))) && + Op0I->hasOneUse()) { + if (A == Op1) // (B|A)^B == (A|B)^B + std::swap(A, B); + if (B == Op1) // (A|B)^B == A & ~B + return BinaryOperator::CreateAnd(A, Builder->CreateNot(Op1)); + } else if (match(Op0I, m_And(m_Value(A), m_Value(B))) && + Op0I->hasOneUse()){ + if (A == Op1) // (A&B)^A -> (B&A)^A + std::swap(A, B); + if (B == Op1 && // (B&A)^A == ~B & A + !isa<ConstantInt>(Op1)) { // Canonical form is (B&C)^C + return BinaryOperator::CreateAnd(Builder->CreateNot(A), Op1); + } + } + } + + // (X >> Z) ^ (Y >> Z) -> (X^Y) >> Z for all shifts. + if (Op0I && Op1I && Op0I->isShift() && + Op0I->getOpcode() == Op1I->getOpcode() && + Op0I->getOperand(1) == Op1I->getOperand(1) && + (Op0I->hasOneUse() || Op1I->hasOneUse())) { + Value *NewOp = + Builder->CreateXor(Op0I->getOperand(0), Op1I->getOperand(0), + Op0I->getName()); + return BinaryOperator::Create(Op1I->getOpcode(), NewOp, + Op1I->getOperand(1)); + } + + if (Op0I && Op1I) { + Value *A, *B, *C, *D; + // (A & B)^(A | B) -> A ^ B + if (match(Op0I, m_And(m_Value(A), m_Value(B))) && + match(Op1I, m_Or(m_Value(C), m_Value(D)))) { + if ((A == C && B == D) || (A == D && B == C)) + return BinaryOperator::CreateXor(A, B); + } + // (A | B)^(A & B) -> A ^ B + if (match(Op0I, m_Or(m_Value(A), m_Value(B))) && + match(Op1I, m_And(m_Value(C), m_Value(D)))) { + if ((A == C && B == D) || (A == D && B == C)) + return BinaryOperator::CreateXor(A, B); + } + } + + // (icmp1 A, B) ^ (icmp2 A, B) --> (icmp3 A, B) + if (ICmpInst *RHS = dyn_cast<ICmpInst>(I.getOperand(1))) + if (ICmpInst *LHS = dyn_cast<ICmpInst>(I.getOperand(0))) + if (PredicatesFoldable(LHS->getPredicate(), RHS->getPredicate())) { + if (LHS->getOperand(0) == RHS->getOperand(1) && + LHS->getOperand(1) == RHS->getOperand(0)) + LHS->swapOperands(); + if (LHS->getOperand(0) == RHS->getOperand(0) && + LHS->getOperand(1) == RHS->getOperand(1)) { + Value *Op0 = LHS->getOperand(0), *Op1 = LHS->getOperand(1); + unsigned Code = getICmpCode(LHS) ^ getICmpCode(RHS); + bool isSigned = LHS->isSigned() || RHS->isSigned(); + return ReplaceInstUsesWith(I, + getNewICmpValue(isSigned, Code, Op0, Op1, + Builder)); + } + } + + // fold (xor (cast A), (cast B)) -> (cast (xor A, B)) + if (CastInst *Op0C = dyn_cast<CastInst>(Op0)) { + if (CastInst *Op1C = dyn_cast<CastInst>(Op1)) + if (Op0C->getOpcode() == Op1C->getOpcode()) { // same cast kind? + Type *SrcTy = Op0C->getOperand(0)->getType(); + if (SrcTy == Op1C->getOperand(0)->getType() && SrcTy->isIntegerTy() && + // Only do this if the casts both really cause code to be generated. + ShouldOptimizeCast(Op0C->getOpcode(), Op0C->getOperand(0), + I.getType()) && + ShouldOptimizeCast(Op1C->getOpcode(), Op1C->getOperand(0), + I.getType())) { + Value *NewOp = Builder->CreateXor(Op0C->getOperand(0), + Op1C->getOperand(0), I.getName()); + return CastInst::Create(Op0C->getOpcode(), NewOp, I.getType()); + } + } + } + + return Changed ? &I : 0; +} diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp new file mode 100644 index 000000000000..0cd7b149d42a --- /dev/null +++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -0,0 +1,1390 @@ +//===- InstCombineCalls.cpp -----------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the visitCall and visitInvoke functions. +// +//===----------------------------------------------------------------------===// + +#include "InstCombine.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/MemoryBuiltins.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/Support/CallSite.h" +#include "llvm/Support/PatternMatch.h" +#include "llvm/Transforms/Utils/BuildLibCalls.h" +#include "llvm/Transforms/Utils/Local.h" +using namespace llvm; +using namespace PatternMatch; + +STATISTIC(NumSimplified, "Number of library calls simplified"); + +/// getPromotedType - Return the specified type promoted as it would be to pass +/// though a va_arg area. +static Type *getPromotedType(Type *Ty) { + if (IntegerType* ITy = dyn_cast<IntegerType>(Ty)) { + if (ITy->getBitWidth() < 32) + return Type::getInt32Ty(Ty->getContext()); + } + return Ty; +} + +/// reduceToSingleValueType - Given an aggregate type which ultimately holds a +/// single scalar element, like {{{type}}} or [1 x type], return type. +static Type *reduceToSingleValueType(Type *T) { + while (!T->isSingleValueType()) { + if (StructType *STy = dyn_cast<StructType>(T)) { + if (STy->getNumElements() == 1) + T = STy->getElementType(0); + else + break; + } else if (ArrayType *ATy = dyn_cast<ArrayType>(T)) { + if (ATy->getNumElements() == 1) + T = ATy->getElementType(); + else + break; + } else + break; + } + + return T; +} + +Instruction *InstCombiner::SimplifyMemTransfer(MemIntrinsic *MI) { + unsigned DstAlign = getKnownAlignment(MI->getArgOperand(0), TD); + unsigned SrcAlign = getKnownAlignment(MI->getArgOperand(1), TD); + unsigned MinAlign = std::min(DstAlign, SrcAlign); + unsigned CopyAlign = MI->getAlignment(); + + if (CopyAlign < MinAlign) { + MI->setAlignment(ConstantInt::get(MI->getAlignmentType(), + MinAlign, false)); + return MI; + } + + // If MemCpyInst length is 1/2/4/8 bytes then replace memcpy with + // load/store. + ConstantInt *MemOpLength = dyn_cast<ConstantInt>(MI->getArgOperand(2)); + if (MemOpLength == 0) return 0; + + // Source and destination pointer types are always "i8*" for intrinsic. See + // if the size is something we can handle with a single primitive load/store. + // A single load+store correctly handles overlapping memory in the memmove + // case. + uint64_t Size = MemOpLength->getLimitedValue(); + assert(Size && "0-sized memory transfering should be removed already."); + + if (Size > 8 || (Size&(Size-1))) + return 0; // If not 1/2/4/8 bytes, exit. + + // Use an integer load+store unless we can find something better. + unsigned SrcAddrSp = + cast<PointerType>(MI->getArgOperand(1)->getType())->getAddressSpace(); + unsigned DstAddrSp = + cast<PointerType>(MI->getArgOperand(0)->getType())->getAddressSpace(); + + IntegerType* IntType = IntegerType::get(MI->getContext(), Size<<3); + Type *NewSrcPtrTy = PointerType::get(IntType, SrcAddrSp); + Type *NewDstPtrTy = PointerType::get(IntType, DstAddrSp); + + // Memcpy forces the use of i8* for the source and destination. That means + // that if you're using memcpy to move one double around, you'll get a cast + // from double* to i8*. We'd much rather use a double load+store rather than + // an i64 load+store, here because this improves the odds that the source or + // dest address will be promotable. See if we can find a better type than the + // integer datatype. + Value *StrippedDest = MI->getArgOperand(0)->stripPointerCasts(); + MDNode *CopyMD = 0; + if (StrippedDest != MI->getArgOperand(0)) { + Type *SrcETy = cast<PointerType>(StrippedDest->getType()) + ->getElementType(); + if (TD && SrcETy->isSized() && TD->getTypeStoreSize(SrcETy) == Size) { + // The SrcETy might be something like {{{double}}} or [1 x double]. Rip + // down through these levels if so. + SrcETy = reduceToSingleValueType(SrcETy); + + if (SrcETy->isSingleValueType()) { + NewSrcPtrTy = PointerType::get(SrcETy, SrcAddrSp); + NewDstPtrTy = PointerType::get(SrcETy, DstAddrSp); + + // If the memcpy has metadata describing the members, see if we can + // get the TBAA tag describing our copy. + if (MDNode *M = MI->getMetadata(LLVMContext::MD_tbaa_struct)) { + if (M->getNumOperands() == 3 && + M->getOperand(0) && + isa<ConstantInt>(M->getOperand(0)) && + cast<ConstantInt>(M->getOperand(0))->isNullValue() && + M->getOperand(1) && + isa<ConstantInt>(M->getOperand(1)) && + cast<ConstantInt>(M->getOperand(1))->getValue() == Size && + M->getOperand(2) && + isa<MDNode>(M->getOperand(2))) + CopyMD = cast<MDNode>(M->getOperand(2)); + } + } + } + } + + // If the memcpy/memmove provides better alignment info than we can + // infer, use it. + SrcAlign = std::max(SrcAlign, CopyAlign); + DstAlign = std::max(DstAlign, CopyAlign); + + Value *Src = Builder->CreateBitCast(MI->getArgOperand(1), NewSrcPtrTy); + Value *Dest = Builder->CreateBitCast(MI->getArgOperand(0), NewDstPtrTy); + LoadInst *L = Builder->CreateLoad(Src, MI->isVolatile()); + L->setAlignment(SrcAlign); + if (CopyMD) + L->setMetadata(LLVMContext::MD_tbaa, CopyMD); + StoreInst *S = Builder->CreateStore(L, Dest, MI->isVolatile()); + S->setAlignment(DstAlign); + if (CopyMD) + S->setMetadata(LLVMContext::MD_tbaa, CopyMD); + + // Set the size of the copy to 0, it will be deleted on the next iteration. + MI->setArgOperand(2, Constant::getNullValue(MemOpLength->getType())); + return MI; +} + +Instruction *InstCombiner::SimplifyMemSet(MemSetInst *MI) { + unsigned Alignment = getKnownAlignment(MI->getDest(), TD); + if (MI->getAlignment() < Alignment) { + MI->setAlignment(ConstantInt::get(MI->getAlignmentType(), + Alignment, false)); + return MI; + } + + // Extract the length and alignment and fill if they are constant. + ConstantInt *LenC = dyn_cast<ConstantInt>(MI->getLength()); + ConstantInt *FillC = dyn_cast<ConstantInt>(MI->getValue()); + if (!LenC || !FillC || !FillC->getType()->isIntegerTy(8)) + return 0; + uint64_t Len = LenC->getLimitedValue(); + Alignment = MI->getAlignment(); + assert(Len && "0-sized memory setting should be removed already."); + + // memset(s,c,n) -> store s, c (for n=1,2,4,8) + if (Len <= 8 && isPowerOf2_32((uint32_t)Len)) { + Type *ITy = IntegerType::get(MI->getContext(), Len*8); // n=1 -> i8. + + Value *Dest = MI->getDest(); + unsigned DstAddrSp = cast<PointerType>(Dest->getType())->getAddressSpace(); + Type *NewDstPtrTy = PointerType::get(ITy, DstAddrSp); + Dest = Builder->CreateBitCast(Dest, NewDstPtrTy); + + // Alignment 0 is identity for alignment 1 for memset, but not store. + if (Alignment == 0) Alignment = 1; + + // Extract the fill value and store. + uint64_t Fill = FillC->getZExtValue()*0x0101010101010101ULL; + StoreInst *S = Builder->CreateStore(ConstantInt::get(ITy, Fill), Dest, + MI->isVolatile()); + S->setAlignment(Alignment); + + // Set the size of the copy to 0, it will be deleted on the next iteration. + MI->setLength(Constant::getNullValue(LenC->getType())); + return MI; + } + + return 0; +} + +/// visitCallInst - CallInst simplification. This mostly only handles folding +/// of intrinsic instructions. For normal calls, it allows visitCallSite to do +/// the heavy lifting. +/// +Instruction *InstCombiner::visitCallInst(CallInst &CI) { + if (isFreeCall(&CI, TLI)) + return visitFree(CI); + + // If the caller function is nounwind, mark the call as nounwind, even if the + // callee isn't. + if (CI.getParent()->getParent()->doesNotThrow() && + !CI.doesNotThrow()) { + CI.setDoesNotThrow(); + return &CI; + } + + IntrinsicInst *II = dyn_cast<IntrinsicInst>(&CI); + if (!II) return visitCallSite(&CI); + + // Intrinsics cannot occur in an invoke, so handle them here instead of in + // visitCallSite. + if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(II)) { + bool Changed = false; + + // memmove/cpy/set of zero bytes is a noop. + if (Constant *NumBytes = dyn_cast<Constant>(MI->getLength())) { + if (NumBytes->isNullValue()) + return EraseInstFromFunction(CI); + + if (ConstantInt *CI = dyn_cast<ConstantInt>(NumBytes)) + if (CI->getZExtValue() == 1) { + // Replace the instruction with just byte operations. We would + // transform other cases to loads/stores, but we don't know if + // alignment is sufficient. + } + } + + // No other transformations apply to volatile transfers. + if (MI->isVolatile()) + return 0; + + // If we have a memmove and the source operation is a constant global, + // then the source and dest pointers can't alias, so we can change this + // into a call to memcpy. + if (MemMoveInst *MMI = dyn_cast<MemMoveInst>(MI)) { + if (GlobalVariable *GVSrc = dyn_cast<GlobalVariable>(MMI->getSource())) + if (GVSrc->isConstant()) { + Module *M = CI.getParent()->getParent()->getParent(); + Intrinsic::ID MemCpyID = Intrinsic::memcpy; + Type *Tys[3] = { CI.getArgOperand(0)->getType(), + CI.getArgOperand(1)->getType(), + CI.getArgOperand(2)->getType() }; + CI.setCalledFunction(Intrinsic::getDeclaration(M, MemCpyID, Tys)); + Changed = true; + } + } + + if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(MI)) { + // memmove(x,x,size) -> noop. + if (MTI->getSource() == MTI->getDest()) + return EraseInstFromFunction(CI); + } + + // If we can determine a pointer alignment that is bigger than currently + // set, update the alignment. + if (isa<MemTransferInst>(MI)) { + if (Instruction *I = SimplifyMemTransfer(MI)) + return I; + } else if (MemSetInst *MSI = dyn_cast<MemSetInst>(MI)) { + if (Instruction *I = SimplifyMemSet(MSI)) + return I; + } + + if (Changed) return II; + } + + switch (II->getIntrinsicID()) { + default: break; + case Intrinsic::objectsize: { + uint64_t Size; + if (getObjectSize(II->getArgOperand(0), Size, TD, TLI)) + return ReplaceInstUsesWith(CI, ConstantInt::get(CI.getType(), Size)); + return 0; + } + case Intrinsic::bswap: { + Value *IIOperand = II->getArgOperand(0); + Value *X = 0; + + // bswap(bswap(x)) -> x + if (match(IIOperand, m_BSwap(m_Value(X)))) + return ReplaceInstUsesWith(CI, X); + + // bswap(trunc(bswap(x))) -> trunc(lshr(x, c)) + if (match(IIOperand, m_Trunc(m_BSwap(m_Value(X))))) { + unsigned C = X->getType()->getPrimitiveSizeInBits() - + IIOperand->getType()->getPrimitiveSizeInBits(); + Value *CV = ConstantInt::get(X->getType(), C); + Value *V = Builder->CreateLShr(X, CV); + return new TruncInst(V, IIOperand->getType()); + } + break; + } + + case Intrinsic::powi: + if (ConstantInt *Power = dyn_cast<ConstantInt>(II->getArgOperand(1))) { + // powi(x, 0) -> 1.0 + if (Power->isZero()) + return ReplaceInstUsesWith(CI, ConstantFP::get(CI.getType(), 1.0)); + // powi(x, 1) -> x + if (Power->isOne()) + return ReplaceInstUsesWith(CI, II->getArgOperand(0)); + // powi(x, -1) -> 1/x + if (Power->isAllOnesValue()) + return BinaryOperator::CreateFDiv(ConstantFP::get(CI.getType(), 1.0), + II->getArgOperand(0)); + } + break; + case Intrinsic::cttz: { + // If all bits below the first known one are known zero, + // this value is constant. + IntegerType *IT = dyn_cast<IntegerType>(II->getArgOperand(0)->getType()); + // FIXME: Try to simplify vectors of integers. + if (!IT) break; + uint32_t BitWidth = IT->getBitWidth(); + APInt KnownZero(BitWidth, 0); + APInt KnownOne(BitWidth, 0); + ComputeMaskedBits(II->getArgOperand(0), KnownZero, KnownOne); + unsigned TrailingZeros = KnownOne.countTrailingZeros(); + APInt Mask(APInt::getLowBitsSet(BitWidth, TrailingZeros)); + if ((Mask & KnownZero) == Mask) + return ReplaceInstUsesWith(CI, ConstantInt::get(IT, + APInt(BitWidth, TrailingZeros))); + + } + break; + case Intrinsic::ctlz: { + // If all bits above the first known one are known zero, + // this value is constant. + IntegerType *IT = dyn_cast<IntegerType>(II->getArgOperand(0)->getType()); + // FIXME: Try to simplify vectors of integers. + if (!IT) break; + uint32_t BitWidth = IT->getBitWidth(); + APInt KnownZero(BitWidth, 0); + APInt KnownOne(BitWidth, 0); + ComputeMaskedBits(II->getArgOperand(0), KnownZero, KnownOne); + unsigned LeadingZeros = KnownOne.countLeadingZeros(); + APInt Mask(APInt::getHighBitsSet(BitWidth, LeadingZeros)); + if ((Mask & KnownZero) == Mask) + return ReplaceInstUsesWith(CI, ConstantInt::get(IT, + APInt(BitWidth, LeadingZeros))); + + } + break; + case Intrinsic::uadd_with_overflow: { + Value *LHS = II->getArgOperand(0), *RHS = II->getArgOperand(1); + IntegerType *IT = cast<IntegerType>(II->getArgOperand(0)->getType()); + uint32_t BitWidth = IT->getBitWidth(); + APInt LHSKnownZero(BitWidth, 0); + APInt LHSKnownOne(BitWidth, 0); + ComputeMaskedBits(LHS, LHSKnownZero, LHSKnownOne); + bool LHSKnownNegative = LHSKnownOne[BitWidth - 1]; + bool LHSKnownPositive = LHSKnownZero[BitWidth - 1]; + + if (LHSKnownNegative || LHSKnownPositive) { + APInt RHSKnownZero(BitWidth, 0); + APInt RHSKnownOne(BitWidth, 0); + ComputeMaskedBits(RHS, RHSKnownZero, RHSKnownOne); + bool RHSKnownNegative = RHSKnownOne[BitWidth - 1]; + bool RHSKnownPositive = RHSKnownZero[BitWidth - 1]; + if (LHSKnownNegative && RHSKnownNegative) { + // The sign bit is set in both cases: this MUST overflow. + // Create a simple add instruction, and insert it into the struct. + Value *Add = Builder->CreateAdd(LHS, RHS); + Add->takeName(&CI); + Constant *V[] = { + UndefValue::get(LHS->getType()), + ConstantInt::getTrue(II->getContext()) + }; + StructType *ST = cast<StructType>(II->getType()); + Constant *Struct = ConstantStruct::get(ST, V); + return InsertValueInst::Create(Struct, Add, 0); + } + + if (LHSKnownPositive && RHSKnownPositive) { + // The sign bit is clear in both cases: this CANNOT overflow. + // Create a simple add instruction, and insert it into the struct. + Value *Add = Builder->CreateNUWAdd(LHS, RHS); + Add->takeName(&CI); + Constant *V[] = { + UndefValue::get(LHS->getType()), + ConstantInt::getFalse(II->getContext()) + }; + StructType *ST = cast<StructType>(II->getType()); + Constant *Struct = ConstantStruct::get(ST, V); + return InsertValueInst::Create(Struct, Add, 0); + } + } + } + // FALL THROUGH uadd into sadd + case Intrinsic::sadd_with_overflow: + // Canonicalize constants into the RHS. + if (isa<Constant>(II->getArgOperand(0)) && + !isa<Constant>(II->getArgOperand(1))) { + Value *LHS = II->getArgOperand(0); + II->setArgOperand(0, II->getArgOperand(1)); + II->setArgOperand(1, LHS); + return II; + } + + // X + undef -> undef + if (isa<UndefValue>(II->getArgOperand(1))) + return ReplaceInstUsesWith(CI, UndefValue::get(II->getType())); + + if (ConstantInt *RHS = dyn_cast<ConstantInt>(II->getArgOperand(1))) { + // X + 0 -> {X, false} + if (RHS->isZero()) { + Constant *V[] = { + UndefValue::get(II->getArgOperand(0)->getType()), + ConstantInt::getFalse(II->getContext()) + }; + Constant *Struct = + ConstantStruct::get(cast<StructType>(II->getType()), V); + return InsertValueInst::Create(Struct, II->getArgOperand(0), 0); + } + } + break; + case Intrinsic::usub_with_overflow: + case Intrinsic::ssub_with_overflow: + // undef - X -> undef + // X - undef -> undef + if (isa<UndefValue>(II->getArgOperand(0)) || + isa<UndefValue>(II->getArgOperand(1))) + return ReplaceInstUsesWith(CI, UndefValue::get(II->getType())); + + if (ConstantInt *RHS = dyn_cast<ConstantInt>(II->getArgOperand(1))) { + // X - 0 -> {X, false} + if (RHS->isZero()) { + Constant *V[] = { + UndefValue::get(II->getArgOperand(0)->getType()), + ConstantInt::getFalse(II->getContext()) + }; + Constant *Struct = + ConstantStruct::get(cast<StructType>(II->getType()), V); + return InsertValueInst::Create(Struct, II->getArgOperand(0), 0); + } + } + break; + case Intrinsic::umul_with_overflow: { + Value *LHS = II->getArgOperand(0), *RHS = II->getArgOperand(1); + unsigned BitWidth = cast<IntegerType>(LHS->getType())->getBitWidth(); + + APInt LHSKnownZero(BitWidth, 0); + APInt LHSKnownOne(BitWidth, 0); + ComputeMaskedBits(LHS, LHSKnownZero, LHSKnownOne); + APInt RHSKnownZero(BitWidth, 0); + APInt RHSKnownOne(BitWidth, 0); + ComputeMaskedBits(RHS, RHSKnownZero, RHSKnownOne); + + // Get the largest possible values for each operand. + APInt LHSMax = ~LHSKnownZero; + APInt RHSMax = ~RHSKnownZero; + + // If multiplying the maximum values does not overflow then we can turn + // this into a plain NUW mul. + bool Overflow; + LHSMax.umul_ov(RHSMax, Overflow); + if (!Overflow) { + Value *Mul = Builder->CreateNUWMul(LHS, RHS, "umul_with_overflow"); + Constant *V[] = { + UndefValue::get(LHS->getType()), + Builder->getFalse() + }; + Constant *Struct = ConstantStruct::get(cast<StructType>(II->getType()),V); + return InsertValueInst::Create(Struct, Mul, 0); + } + } // FALL THROUGH + case Intrinsic::smul_with_overflow: + // Canonicalize constants into the RHS. + if (isa<Constant>(II->getArgOperand(0)) && + !isa<Constant>(II->getArgOperand(1))) { + Value *LHS = II->getArgOperand(0); + II->setArgOperand(0, II->getArgOperand(1)); + II->setArgOperand(1, LHS); + return II; + } + + // X * undef -> undef + if (isa<UndefValue>(II->getArgOperand(1))) + return ReplaceInstUsesWith(CI, UndefValue::get(II->getType())); + + if (ConstantInt *RHSI = dyn_cast<ConstantInt>(II->getArgOperand(1))) { + // X*0 -> {0, false} + if (RHSI->isZero()) + return ReplaceInstUsesWith(CI, Constant::getNullValue(II->getType())); + + // X * 1 -> {X, false} + if (RHSI->equalsInt(1)) { + Constant *V[] = { + UndefValue::get(II->getArgOperand(0)->getType()), + ConstantInt::getFalse(II->getContext()) + }; + Constant *Struct = + ConstantStruct::get(cast<StructType>(II->getType()), V); + return InsertValueInst::Create(Struct, II->getArgOperand(0), 0); + } + } + break; + case Intrinsic::ppc_altivec_lvx: + case Intrinsic::ppc_altivec_lvxl: + // Turn PPC lvx -> load if the pointer is known aligned. + if (getOrEnforceKnownAlignment(II->getArgOperand(0), 16, TD) >= 16) { + Value *Ptr = Builder->CreateBitCast(II->getArgOperand(0), + PointerType::getUnqual(II->getType())); + return new LoadInst(Ptr); + } + break; + case Intrinsic::ppc_altivec_stvx: + case Intrinsic::ppc_altivec_stvxl: + // Turn stvx -> store if the pointer is known aligned. + if (getOrEnforceKnownAlignment(II->getArgOperand(1), 16, TD) >= 16) { + Type *OpPtrTy = + PointerType::getUnqual(II->getArgOperand(0)->getType()); + Value *Ptr = Builder->CreateBitCast(II->getArgOperand(1), OpPtrTy); + return new StoreInst(II->getArgOperand(0), Ptr); + } + break; + case Intrinsic::x86_sse_storeu_ps: + case Intrinsic::x86_sse2_storeu_pd: + case Intrinsic::x86_sse2_storeu_dq: + // Turn X86 storeu -> store if the pointer is known aligned. + if (getOrEnforceKnownAlignment(II->getArgOperand(0), 16, TD) >= 16) { + Type *OpPtrTy = + PointerType::getUnqual(II->getArgOperand(1)->getType()); + Value *Ptr = Builder->CreateBitCast(II->getArgOperand(0), OpPtrTy); + return new StoreInst(II->getArgOperand(1), Ptr); + } + break; + + case Intrinsic::x86_sse_cvtss2si: + case Intrinsic::x86_sse_cvtss2si64: + case Intrinsic::x86_sse_cvttss2si: + case Intrinsic::x86_sse_cvttss2si64: + case Intrinsic::x86_sse2_cvtsd2si: + case Intrinsic::x86_sse2_cvtsd2si64: + case Intrinsic::x86_sse2_cvttsd2si: + case Intrinsic::x86_sse2_cvttsd2si64: { + // These intrinsics only demand the 0th element of their input vectors. If + // we can simplify the input based on that, do so now. + unsigned VWidth = + cast<VectorType>(II->getArgOperand(0)->getType())->getNumElements(); + APInt DemandedElts(VWidth, 1); + APInt UndefElts(VWidth, 0); + if (Value *V = SimplifyDemandedVectorElts(II->getArgOperand(0), + DemandedElts, UndefElts)) { + II->setArgOperand(0, V); + return II; + } + break; + } + + + case Intrinsic::x86_sse41_pmovsxbw: + case Intrinsic::x86_sse41_pmovsxwd: + case Intrinsic::x86_sse41_pmovsxdq: + case Intrinsic::x86_sse41_pmovzxbw: + case Intrinsic::x86_sse41_pmovzxwd: + case Intrinsic::x86_sse41_pmovzxdq: { + // pmov{s|z}x ignores the upper half of their input vectors. + unsigned VWidth = + cast<VectorType>(II->getArgOperand(0)->getType())->getNumElements(); + unsigned LowHalfElts = VWidth / 2; + APInt InputDemandedElts(APInt::getBitsSet(VWidth, 0, LowHalfElts)); + APInt UndefElts(VWidth, 0); + if (Value *TmpV = SimplifyDemandedVectorElts(II->getArgOperand(0), + InputDemandedElts, + UndefElts)) { + II->setArgOperand(0, TmpV); + return II; + } + break; + } + + case Intrinsic::ppc_altivec_vperm: + // Turn vperm(V1,V2,mask) -> shuffle(V1,V2,mask) if mask is a constant. + if (Constant *Mask = dyn_cast<Constant>(II->getArgOperand(2))) { + assert(Mask->getType()->getVectorNumElements() == 16 && + "Bad type for intrinsic!"); + + // Check that all of the elements are integer constants or undefs. + bool AllEltsOk = true; + for (unsigned i = 0; i != 16; ++i) { + Constant *Elt = Mask->getAggregateElement(i); + if (Elt == 0 || + !(isa<ConstantInt>(Elt) || isa<UndefValue>(Elt))) { + AllEltsOk = false; + break; + } + } + + if (AllEltsOk) { + // Cast the input vectors to byte vectors. + Value *Op0 = Builder->CreateBitCast(II->getArgOperand(0), + Mask->getType()); + Value *Op1 = Builder->CreateBitCast(II->getArgOperand(1), + Mask->getType()); + Value *Result = UndefValue::get(Op0->getType()); + + // Only extract each element once. + Value *ExtractedElts[32]; + memset(ExtractedElts, 0, sizeof(ExtractedElts)); + + for (unsigned i = 0; i != 16; ++i) { + if (isa<UndefValue>(Mask->getAggregateElement(i))) + continue; + unsigned Idx = + cast<ConstantInt>(Mask->getAggregateElement(i))->getZExtValue(); + Idx &= 31; // Match the hardware behavior. + + if (ExtractedElts[Idx] == 0) { + ExtractedElts[Idx] = + Builder->CreateExtractElement(Idx < 16 ? Op0 : Op1, + Builder->getInt32(Idx&15)); + } + + // Insert this value into the result vector. + Result = Builder->CreateInsertElement(Result, ExtractedElts[Idx], + Builder->getInt32(i)); + } + return CastInst::Create(Instruction::BitCast, Result, CI.getType()); + } + } + break; + + case Intrinsic::arm_neon_vld1: + case Intrinsic::arm_neon_vld2: + case Intrinsic::arm_neon_vld3: + case Intrinsic::arm_neon_vld4: + case Intrinsic::arm_neon_vld2lane: + case Intrinsic::arm_neon_vld3lane: + case Intrinsic::arm_neon_vld4lane: + case Intrinsic::arm_neon_vst1: + case Intrinsic::arm_neon_vst2: + case Intrinsic::arm_neon_vst3: + case Intrinsic::arm_neon_vst4: + case Intrinsic::arm_neon_vst2lane: + case Intrinsic::arm_neon_vst3lane: + case Intrinsic::arm_neon_vst4lane: { + unsigned MemAlign = getKnownAlignment(II->getArgOperand(0), TD); + unsigned AlignArg = II->getNumArgOperands() - 1; + ConstantInt *IntrAlign = dyn_cast<ConstantInt>(II->getArgOperand(AlignArg)); + if (IntrAlign && IntrAlign->getZExtValue() < MemAlign) { + II->setArgOperand(AlignArg, + ConstantInt::get(Type::getInt32Ty(II->getContext()), + MemAlign, false)); + return II; + } + break; + } + + case Intrinsic::arm_neon_vmulls: + case Intrinsic::arm_neon_vmullu: { + Value *Arg0 = II->getArgOperand(0); + Value *Arg1 = II->getArgOperand(1); + + // Handle mul by zero first: + if (isa<ConstantAggregateZero>(Arg0) || isa<ConstantAggregateZero>(Arg1)) { + return ReplaceInstUsesWith(CI, ConstantAggregateZero::get(II->getType())); + } + + // Check for constant LHS & RHS - in this case we just simplify. + bool Zext = (II->getIntrinsicID() == Intrinsic::arm_neon_vmullu); + VectorType *NewVT = cast<VectorType>(II->getType()); + unsigned NewWidth = NewVT->getElementType()->getIntegerBitWidth(); + if (ConstantDataVector *CV0 = dyn_cast<ConstantDataVector>(Arg0)) { + if (ConstantDataVector *CV1 = dyn_cast<ConstantDataVector>(Arg1)) { + VectorType* VT = cast<VectorType>(CV0->getType()); + SmallVector<Constant*, 4> NewElems; + for (unsigned i = 0; i < VT->getNumElements(); ++i) { + APInt CV0E = + (cast<ConstantInt>(CV0->getAggregateElement(i)))->getValue(); + CV0E = Zext ? CV0E.zext(NewWidth) : CV0E.sext(NewWidth); + APInt CV1E = + (cast<ConstantInt>(CV1->getAggregateElement(i)))->getValue(); + CV1E = Zext ? CV1E.zext(NewWidth) : CV1E.sext(NewWidth); + NewElems.push_back( + ConstantInt::get(NewVT->getElementType(), CV0E * CV1E)); + } + return ReplaceInstUsesWith(CI, ConstantVector::get(NewElems)); + } + + // Couldn't simplify - cannonicalize constant to the RHS. + std::swap(Arg0, Arg1); + } + + // Handle mul by one: + if (ConstantDataVector *CV1 = dyn_cast<ConstantDataVector>(Arg1)) { + if (ConstantInt *Splat = + dyn_cast_or_null<ConstantInt>(CV1->getSplatValue())) { + if (Splat->isOne()) { + if (Zext) + return CastInst::CreateZExtOrBitCast(Arg0, II->getType()); + // else + return CastInst::CreateSExtOrBitCast(Arg0, II->getType()); + } + } + } + + break; + } + + case Intrinsic::stackrestore: { + // If the save is right next to the restore, remove the restore. This can + // happen when variable allocas are DCE'd. + if (IntrinsicInst *SS = dyn_cast<IntrinsicInst>(II->getArgOperand(0))) { + if (SS->getIntrinsicID() == Intrinsic::stacksave) { + BasicBlock::iterator BI = SS; + if (&*++BI == II) + return EraseInstFromFunction(CI); + } + } + + // Scan down this block to see if there is another stack restore in the + // same block without an intervening call/alloca. + BasicBlock::iterator BI = II; + TerminatorInst *TI = II->getParent()->getTerminator(); + bool CannotRemove = false; + for (++BI; &*BI != TI; ++BI) { + if (isa<AllocaInst>(BI)) { + CannotRemove = true; + break; + } + if (CallInst *BCI = dyn_cast<CallInst>(BI)) { + if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(BCI)) { + // If there is a stackrestore below this one, remove this one. + if (II->getIntrinsicID() == Intrinsic::stackrestore) + return EraseInstFromFunction(CI); + // Otherwise, ignore the intrinsic. + } else { + // If we found a non-intrinsic call, we can't remove the stack + // restore. + CannotRemove = true; + break; + } + } + } + + // If the stack restore is in a return, resume, or unwind block and if there + // are no allocas or calls between the restore and the return, nuke the + // restore. + if (!CannotRemove && (isa<ReturnInst>(TI) || isa<ResumeInst>(TI))) + return EraseInstFromFunction(CI); + break; + } + } + + return visitCallSite(II); +} + +// InvokeInst simplification +// +Instruction *InstCombiner::visitInvokeInst(InvokeInst &II) { + return visitCallSite(&II); +} + +/// isSafeToEliminateVarargsCast - If this cast does not affect the value +/// passed through the varargs area, we can eliminate the use of the cast. +static bool isSafeToEliminateVarargsCast(const CallSite CS, + const CastInst * const CI, + const DataLayout * const TD, + const int ix) { + if (!CI->isLosslessCast()) + return false; + + // The size of ByVal arguments is derived from the type, so we + // can't change to a type with a different size. If the size were + // passed explicitly we could avoid this check. + if (!CS.isByValArgument(ix)) + return true; + + Type* SrcTy = + cast<PointerType>(CI->getOperand(0)->getType())->getElementType(); + Type* DstTy = cast<PointerType>(CI->getType())->getElementType(); + if (!SrcTy->isSized() || !DstTy->isSized()) + return false; + if (!TD || TD->getTypeAllocSize(SrcTy) != TD->getTypeAllocSize(DstTy)) + return false; + return true; +} + +// Try to fold some different type of calls here. +// Currently we're only working with the checking functions, memcpy_chk, +// mempcpy_chk, memmove_chk, memset_chk, strcpy_chk, stpcpy_chk, strncpy_chk, +// strcat_chk and strncat_chk. +Instruction *InstCombiner::tryOptimizeCall(CallInst *CI, const DataLayout *TD) { + if (CI->getCalledFunction() == 0) return 0; + + if (Value *With = Simplifier->optimizeCall(CI)) { + ++NumSimplified; + return CI->use_empty() ? CI : ReplaceInstUsesWith(*CI, With); + } + + return 0; +} + +static IntrinsicInst *FindInitTrampolineFromAlloca(Value *TrampMem) { + // Strip off at most one level of pointer casts, looking for an alloca. This + // is good enough in practice and simpler than handling any number of casts. + Value *Underlying = TrampMem->stripPointerCasts(); + if (Underlying != TrampMem && + (!Underlying->hasOneUse() || *Underlying->use_begin() != TrampMem)) + return 0; + if (!isa<AllocaInst>(Underlying)) + return 0; + + IntrinsicInst *InitTrampoline = 0; + for (Value::use_iterator I = TrampMem->use_begin(), E = TrampMem->use_end(); + I != E; I++) { + IntrinsicInst *II = dyn_cast<IntrinsicInst>(*I); + if (!II) + return 0; + if (II->getIntrinsicID() == Intrinsic::init_trampoline) { + if (InitTrampoline) + // More than one init_trampoline writes to this value. Give up. + return 0; + InitTrampoline = II; + continue; + } + if (II->getIntrinsicID() == Intrinsic::adjust_trampoline) + // Allow any number of calls to adjust.trampoline. + continue; + return 0; + } + + // No call to init.trampoline found. + if (!InitTrampoline) + return 0; + + // Check that the alloca is being used in the expected way. + if (InitTrampoline->getOperand(0) != TrampMem) + return 0; + + return InitTrampoline; +} + +static IntrinsicInst *FindInitTrampolineFromBB(IntrinsicInst *AdjustTramp, + Value *TrampMem) { + // Visit all the previous instructions in the basic block, and try to find a + // init.trampoline which has a direct path to the adjust.trampoline. + for (BasicBlock::iterator I = AdjustTramp, + E = AdjustTramp->getParent()->begin(); I != E; ) { + Instruction *Inst = --I; + if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) + if (II->getIntrinsicID() == Intrinsic::init_trampoline && + II->getOperand(0) == TrampMem) + return II; + if (Inst->mayWriteToMemory()) + return 0; + } + return 0; +} + +// Given a call to llvm.adjust.trampoline, find and return the corresponding +// call to llvm.init.trampoline if the call to the trampoline can be optimized +// to a direct call to a function. Otherwise return NULL. +// +static IntrinsicInst *FindInitTrampoline(Value *Callee) { + Callee = Callee->stripPointerCasts(); + IntrinsicInst *AdjustTramp = dyn_cast<IntrinsicInst>(Callee); + if (!AdjustTramp || + AdjustTramp->getIntrinsicID() != Intrinsic::adjust_trampoline) + return 0; + + Value *TrampMem = AdjustTramp->getOperand(0); + + if (IntrinsicInst *IT = FindInitTrampolineFromAlloca(TrampMem)) + return IT; + if (IntrinsicInst *IT = FindInitTrampolineFromBB(AdjustTramp, TrampMem)) + return IT; + return 0; +} + +// visitCallSite - Improvements for call and invoke instructions. +// +Instruction *InstCombiner::visitCallSite(CallSite CS) { + if (isAllocLikeFn(CS.getInstruction(), TLI)) + return visitAllocSite(*CS.getInstruction()); + + bool Changed = false; + + // If the callee is a pointer to a function, attempt to move any casts to the + // arguments of the call/invoke. + Value *Callee = CS.getCalledValue(); + if (!isa<Function>(Callee) && transformConstExprCastCall(CS)) + return 0; + + if (Function *CalleeF = dyn_cast<Function>(Callee)) + // If the call and callee calling conventions don't match, this call must + // be unreachable, as the call is undefined. + if (CalleeF->getCallingConv() != CS.getCallingConv() && + // Only do this for calls to a function with a body. A prototype may + // not actually end up matching the implementation's calling conv for a + // variety of reasons (e.g. it may be written in assembly). + !CalleeF->isDeclaration()) { + Instruction *OldCall = CS.getInstruction(); + new StoreInst(ConstantInt::getTrue(Callee->getContext()), + UndefValue::get(Type::getInt1PtrTy(Callee->getContext())), + OldCall); + // If OldCall does not return void then replaceAllUsesWith undef. + // This allows ValueHandlers and custom metadata to adjust itself. + if (!OldCall->getType()->isVoidTy()) + ReplaceInstUsesWith(*OldCall, UndefValue::get(OldCall->getType())); + if (isa<CallInst>(OldCall)) + return EraseInstFromFunction(*OldCall); + + // We cannot remove an invoke, because it would change the CFG, just + // change the callee to a null pointer. + cast<InvokeInst>(OldCall)->setCalledFunction( + Constant::getNullValue(CalleeF->getType())); + return 0; + } + + if (isa<ConstantPointerNull>(Callee) || isa<UndefValue>(Callee)) { + // If CS does not return void then replaceAllUsesWith undef. + // This allows ValueHandlers and custom metadata to adjust itself. + if (!CS.getInstruction()->getType()->isVoidTy()) + ReplaceInstUsesWith(*CS.getInstruction(), + UndefValue::get(CS.getInstruction()->getType())); + + if (isa<InvokeInst>(CS.getInstruction())) { + // Can't remove an invoke because we cannot change the CFG. + return 0; + } + + // This instruction is not reachable, just remove it. We insert a store to + // undef so that we know that this code is not reachable, despite the fact + // that we can't modify the CFG here. + new StoreInst(ConstantInt::getTrue(Callee->getContext()), + UndefValue::get(Type::getInt1PtrTy(Callee->getContext())), + CS.getInstruction()); + + return EraseInstFromFunction(*CS.getInstruction()); + } + + if (IntrinsicInst *II = FindInitTrampoline(Callee)) + return transformCallThroughTrampoline(CS, II); + + PointerType *PTy = cast<PointerType>(Callee->getType()); + FunctionType *FTy = cast<FunctionType>(PTy->getElementType()); + if (FTy->isVarArg()) { + int ix = FTy->getNumParams(); + // See if we can optimize any arguments passed through the varargs area of + // the call. + for (CallSite::arg_iterator I = CS.arg_begin() + FTy->getNumParams(), + E = CS.arg_end(); I != E; ++I, ++ix) { + CastInst *CI = dyn_cast<CastInst>(*I); + if (CI && isSafeToEliminateVarargsCast(CS, CI, TD, ix)) { + *I = CI->getOperand(0); + Changed = true; + } + } + } + + if (isa<InlineAsm>(Callee) && !CS.doesNotThrow()) { + // Inline asm calls cannot throw - mark them 'nounwind'. + CS.setDoesNotThrow(); + Changed = true; + } + + // Try to optimize the call if possible, we require DataLayout for most of + // this. None of these calls are seen as possibly dead so go ahead and + // delete the instruction now. + if (CallInst *CI = dyn_cast<CallInst>(CS.getInstruction())) { + Instruction *I = tryOptimizeCall(CI, TD); + // If we changed something return the result, etc. Otherwise let + // the fallthrough check. + if (I) return EraseInstFromFunction(*I); + } + + return Changed ? CS.getInstruction() : 0; +} + +// transformConstExprCastCall - If the callee is a constexpr cast of a function, +// attempt to move the cast to the arguments of the call/invoke. +// +bool InstCombiner::transformConstExprCastCall(CallSite CS) { + Function *Callee = + dyn_cast<Function>(CS.getCalledValue()->stripPointerCasts()); + if (Callee == 0) + return false; + Instruction *Caller = CS.getInstruction(); + const AttributeSet &CallerPAL = CS.getAttributes(); + + // Okay, this is a cast from a function to a different type. Unless doing so + // would cause a type conversion of one of our arguments, change this call to + // be a direct call with arguments casted to the appropriate types. + // + FunctionType *FT = Callee->getFunctionType(); + Type *OldRetTy = Caller->getType(); + Type *NewRetTy = FT->getReturnType(); + + if (NewRetTy->isStructTy()) + return false; // TODO: Handle multiple return values. + + // Check to see if we are changing the return type... + if (OldRetTy != NewRetTy) { + if (!CastInst::isBitCastable(NewRetTy, OldRetTy)) { + if (Callee->isDeclaration()) + return false; // Cannot transform this return value. + + if (!Caller->use_empty() && + // void -> non-void is handled specially + !NewRetTy->isVoidTy()) + return false; // Cannot transform this return value. + } + + if (!CallerPAL.isEmpty() && !Caller->use_empty()) { + AttrBuilder RAttrs(CallerPAL, AttributeSet::ReturnIndex); + if (RAttrs. + hasAttributes(AttributeFuncs:: + typeIncompatible(NewRetTy, AttributeSet::ReturnIndex), + AttributeSet::ReturnIndex)) + return false; // Attribute not compatible with transformed value. + } + + // If the callsite is an invoke instruction, and the return value is used by + // a PHI node in a successor, we cannot change the return type of the call + // because there is no place to put the cast instruction (without breaking + // the critical edge). Bail out in this case. + if (!Caller->use_empty()) + if (InvokeInst *II = dyn_cast<InvokeInst>(Caller)) + for (Value::use_iterator UI = II->use_begin(), E = II->use_end(); + UI != E; ++UI) + if (PHINode *PN = dyn_cast<PHINode>(*UI)) + if (PN->getParent() == II->getNormalDest() || + PN->getParent() == II->getUnwindDest()) + return false; + } + + unsigned NumActualArgs = CS.arg_size(); + unsigned NumCommonArgs = std::min(FT->getNumParams(), NumActualArgs); + + CallSite::arg_iterator AI = CS.arg_begin(); + for (unsigned i = 0, e = NumCommonArgs; i != e; ++i, ++AI) { + Type *ParamTy = FT->getParamType(i); + Type *ActTy = (*AI)->getType(); + + if (!CastInst::isBitCastable(ActTy, ParamTy)) + return false; // Cannot transform this parameter value. + + if (AttrBuilder(CallerPAL.getParamAttributes(i + 1), i + 1). + hasAttributes(AttributeFuncs:: + typeIncompatible(ParamTy, i + 1), i + 1)) + return false; // Attribute not compatible with transformed value. + + // If the parameter is passed as a byval argument, then we have to have a + // sized type and the sized type has to have the same size as the old type. + if (ParamTy != ActTy && + CallerPAL.getParamAttributes(i + 1).hasAttribute(i + 1, + Attribute::ByVal)) { + PointerType *ParamPTy = dyn_cast<PointerType>(ParamTy); + if (ParamPTy == 0 || !ParamPTy->getElementType()->isSized() || TD == 0) + return false; + + Type *CurElTy = ActTy->getPointerElementType(); + if (TD->getTypeAllocSize(CurElTy) != + TD->getTypeAllocSize(ParamPTy->getElementType())) + return false; + } + } + + if (Callee->isDeclaration()) { + // Do not delete arguments unless we have a function body. + if (FT->getNumParams() < NumActualArgs && !FT->isVarArg()) + return false; + + // If the callee is just a declaration, don't change the varargsness of the + // call. We don't want to introduce a varargs call where one doesn't + // already exist. + PointerType *APTy = cast<PointerType>(CS.getCalledValue()->getType()); + if (FT->isVarArg()!=cast<FunctionType>(APTy->getElementType())->isVarArg()) + return false; + + // If both the callee and the cast type are varargs, we still have to make + // sure the number of fixed parameters are the same or we have the same + // ABI issues as if we introduce a varargs call. + if (FT->isVarArg() && + cast<FunctionType>(APTy->getElementType())->isVarArg() && + FT->getNumParams() != + cast<FunctionType>(APTy->getElementType())->getNumParams()) + return false; + } + + if (FT->getNumParams() < NumActualArgs && FT->isVarArg() && + !CallerPAL.isEmpty()) + // In this case we have more arguments than the new function type, but we + // won't be dropping them. Check that these extra arguments have attributes + // that are compatible with being a vararg call argument. + for (unsigned i = CallerPAL.getNumSlots(); i; --i) { + unsigned Index = CallerPAL.getSlotIndex(i - 1); + if (Index <= FT->getNumParams()) + break; + + // Check if it has an attribute that's incompatible with varargs. + AttributeSet PAttrs = CallerPAL.getSlotAttributes(i - 1); + if (PAttrs.hasAttribute(Index, Attribute::StructRet)) + return false; + } + + + // Okay, we decided that this is a safe thing to do: go ahead and start + // inserting cast instructions as necessary. + std::vector<Value*> Args; + Args.reserve(NumActualArgs); + SmallVector<AttributeSet, 8> attrVec; + attrVec.reserve(NumCommonArgs); + + // Get any return attributes. + AttrBuilder RAttrs(CallerPAL, AttributeSet::ReturnIndex); + + // If the return value is not being used, the type may not be compatible + // with the existing attributes. Wipe out any problematic attributes. + RAttrs. + removeAttributes(AttributeFuncs:: + typeIncompatible(NewRetTy, AttributeSet::ReturnIndex), + AttributeSet::ReturnIndex); + + // Add the new return attributes. + if (RAttrs.hasAttributes()) + attrVec.push_back(AttributeSet::get(Caller->getContext(), + AttributeSet::ReturnIndex, RAttrs)); + + AI = CS.arg_begin(); + for (unsigned i = 0; i != NumCommonArgs; ++i, ++AI) { + Type *ParamTy = FT->getParamType(i); + + if ((*AI)->getType() == ParamTy) { + Args.push_back(*AI); + } else { + Args.push_back(Builder->CreateBitCast(*AI, ParamTy)); + } + + // Add any parameter attributes. + AttrBuilder PAttrs(CallerPAL.getParamAttributes(i + 1), i + 1); + if (PAttrs.hasAttributes()) + attrVec.push_back(AttributeSet::get(Caller->getContext(), i + 1, + PAttrs)); + } + + // If the function takes more arguments than the call was taking, add them + // now. + for (unsigned i = NumCommonArgs; i != FT->getNumParams(); ++i) + Args.push_back(Constant::getNullValue(FT->getParamType(i))); + + // If we are removing arguments to the function, emit an obnoxious warning. + if (FT->getNumParams() < NumActualArgs) { + // TODO: if (!FT->isVarArg()) this call may be unreachable. PR14722 + if (FT->isVarArg()) { + // Add all of the arguments in their promoted form to the arg list. + for (unsigned i = FT->getNumParams(); i != NumActualArgs; ++i, ++AI) { + Type *PTy = getPromotedType((*AI)->getType()); + if (PTy != (*AI)->getType()) { + // Must promote to pass through va_arg area! + Instruction::CastOps opcode = + CastInst::getCastOpcode(*AI, false, PTy, false); + Args.push_back(Builder->CreateCast(opcode, *AI, PTy)); + } else { + Args.push_back(*AI); + } + + // Add any parameter attributes. + AttrBuilder PAttrs(CallerPAL.getParamAttributes(i + 1), i + 1); + if (PAttrs.hasAttributes()) + attrVec.push_back(AttributeSet::get(FT->getContext(), i + 1, + PAttrs)); + } + } + } + + AttributeSet FnAttrs = CallerPAL.getFnAttributes(); + if (CallerPAL.hasAttributes(AttributeSet::FunctionIndex)) + attrVec.push_back(AttributeSet::get(Callee->getContext(), FnAttrs)); + + if (NewRetTy->isVoidTy()) + Caller->setName(""); // Void type should not have a name. + + const AttributeSet &NewCallerPAL = AttributeSet::get(Callee->getContext(), + attrVec); + + Instruction *NC; + if (InvokeInst *II = dyn_cast<InvokeInst>(Caller)) { + NC = Builder->CreateInvoke(Callee, II->getNormalDest(), + II->getUnwindDest(), Args); + NC->takeName(II); + cast<InvokeInst>(NC)->setCallingConv(II->getCallingConv()); + cast<InvokeInst>(NC)->setAttributes(NewCallerPAL); + } else { + CallInst *CI = cast<CallInst>(Caller); + NC = Builder->CreateCall(Callee, Args); + NC->takeName(CI); + if (CI->isTailCall()) + cast<CallInst>(NC)->setTailCall(); + cast<CallInst>(NC)->setCallingConv(CI->getCallingConv()); + cast<CallInst>(NC)->setAttributes(NewCallerPAL); + } + + // Insert a cast of the return type as necessary. + Value *NV = NC; + if (OldRetTy != NV->getType() && !Caller->use_empty()) { + if (!NV->getType()->isVoidTy()) { + NV = NC = CastInst::Create(CastInst::BitCast, NC, OldRetTy); + NC->setDebugLoc(Caller->getDebugLoc()); + + // If this is an invoke instruction, we should insert it after the first + // non-phi, instruction in the normal successor block. + if (InvokeInst *II = dyn_cast<InvokeInst>(Caller)) { + BasicBlock::iterator I = II->getNormalDest()->getFirstInsertionPt(); + InsertNewInstBefore(NC, *I); + } else { + // Otherwise, it's a call, just insert cast right after the call. + InsertNewInstBefore(NC, *Caller); + } + Worklist.AddUsersToWorkList(*Caller); + } else { + NV = UndefValue::get(Caller->getType()); + } + } + + if (!Caller->use_empty()) + ReplaceInstUsesWith(*Caller, NV); + + EraseInstFromFunction(*Caller); + return true; +} + +// transformCallThroughTrampoline - Turn a call to a function created by +// init_trampoline / adjust_trampoline intrinsic pair into a direct call to the +// underlying function. +// +Instruction * +InstCombiner::transformCallThroughTrampoline(CallSite CS, + IntrinsicInst *Tramp) { + Value *Callee = CS.getCalledValue(); + PointerType *PTy = cast<PointerType>(Callee->getType()); + FunctionType *FTy = cast<FunctionType>(PTy->getElementType()); + const AttributeSet &Attrs = CS.getAttributes(); + + // If the call already has the 'nest' attribute somewhere then give up - + // otherwise 'nest' would occur twice after splicing in the chain. + if (Attrs.hasAttrSomewhere(Attribute::Nest)) + return 0; + + assert(Tramp && + "transformCallThroughTrampoline called with incorrect CallSite."); + + Function *NestF =cast<Function>(Tramp->getArgOperand(1)->stripPointerCasts()); + PointerType *NestFPTy = cast<PointerType>(NestF->getType()); + FunctionType *NestFTy = cast<FunctionType>(NestFPTy->getElementType()); + + const AttributeSet &NestAttrs = NestF->getAttributes(); + if (!NestAttrs.isEmpty()) { + unsigned NestIdx = 1; + Type *NestTy = 0; + AttributeSet NestAttr; + + // Look for a parameter marked with the 'nest' attribute. + for (FunctionType::param_iterator I = NestFTy->param_begin(), + E = NestFTy->param_end(); I != E; ++NestIdx, ++I) + if (NestAttrs.hasAttribute(NestIdx, Attribute::Nest)) { + // Record the parameter type and any other attributes. + NestTy = *I; + NestAttr = NestAttrs.getParamAttributes(NestIdx); + break; + } + + if (NestTy) { + Instruction *Caller = CS.getInstruction(); + std::vector<Value*> NewArgs; + NewArgs.reserve(CS.arg_size() + 1); + + SmallVector<AttributeSet, 8> NewAttrs; + NewAttrs.reserve(Attrs.getNumSlots() + 1); + + // Insert the nest argument into the call argument list, which may + // mean appending it. Likewise for attributes. + + // Add any result attributes. + if (Attrs.hasAttributes(AttributeSet::ReturnIndex)) + NewAttrs.push_back(AttributeSet::get(Caller->getContext(), + Attrs.getRetAttributes())); + + { + unsigned Idx = 1; + CallSite::arg_iterator I = CS.arg_begin(), E = CS.arg_end(); + do { + if (Idx == NestIdx) { + // Add the chain argument and attributes. + Value *NestVal = Tramp->getArgOperand(2); + if (NestVal->getType() != NestTy) + NestVal = Builder->CreateBitCast(NestVal, NestTy, "nest"); + NewArgs.push_back(NestVal); + NewAttrs.push_back(AttributeSet::get(Caller->getContext(), + NestAttr)); + } + + if (I == E) + break; + + // Add the original argument and attributes. + NewArgs.push_back(*I); + AttributeSet Attr = Attrs.getParamAttributes(Idx); + if (Attr.hasAttributes(Idx)) { + AttrBuilder B(Attr, Idx); + NewAttrs.push_back(AttributeSet::get(Caller->getContext(), + Idx + (Idx >= NestIdx), B)); + } + + ++Idx, ++I; + } while (1); + } + + // Add any function attributes. + if (Attrs.hasAttributes(AttributeSet::FunctionIndex)) + NewAttrs.push_back(AttributeSet::get(FTy->getContext(), + Attrs.getFnAttributes())); + + // The trampoline may have been bitcast to a bogus type (FTy). + // Handle this by synthesizing a new function type, equal to FTy + // with the chain parameter inserted. + + std::vector<Type*> NewTypes; + NewTypes.reserve(FTy->getNumParams()+1); + + // Insert the chain's type into the list of parameter types, which may + // mean appending it. + { + unsigned Idx = 1; + FunctionType::param_iterator I = FTy->param_begin(), + E = FTy->param_end(); + + do { + if (Idx == NestIdx) + // Add the chain's type. + NewTypes.push_back(NestTy); + + if (I == E) + break; + + // Add the original type. + NewTypes.push_back(*I); + + ++Idx, ++I; + } while (1); + } + + // Replace the trampoline call with a direct call. Let the generic + // code sort out any function type mismatches. + FunctionType *NewFTy = FunctionType::get(FTy->getReturnType(), NewTypes, + FTy->isVarArg()); + Constant *NewCallee = + NestF->getType() == PointerType::getUnqual(NewFTy) ? + NestF : ConstantExpr::getBitCast(NestF, + PointerType::getUnqual(NewFTy)); + const AttributeSet &NewPAL = + AttributeSet::get(FTy->getContext(), NewAttrs); + + Instruction *NewCaller; + if (InvokeInst *II = dyn_cast<InvokeInst>(Caller)) { + NewCaller = InvokeInst::Create(NewCallee, + II->getNormalDest(), II->getUnwindDest(), + NewArgs); + cast<InvokeInst>(NewCaller)->setCallingConv(II->getCallingConv()); + cast<InvokeInst>(NewCaller)->setAttributes(NewPAL); + } else { + NewCaller = CallInst::Create(NewCallee, NewArgs); + if (cast<CallInst>(Caller)->isTailCall()) + cast<CallInst>(NewCaller)->setTailCall(); + cast<CallInst>(NewCaller)-> + setCallingConv(cast<CallInst>(Caller)->getCallingConv()); + cast<CallInst>(NewCaller)->setAttributes(NewPAL); + } + + return NewCaller; + } + } + + // Replace the trampoline call with a direct call. Since there is no 'nest' + // parameter, there is no need to adjust the argument list. Let the generic + // code sort out any function type mismatches. + Constant *NewCallee = + NestF->getType() == PTy ? NestF : + ConstantExpr::getBitCast(NestF, PTy); + CS.setCalledFunction(NewCallee); + return CS.getInstruction(); +} diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp new file mode 100644 index 000000000000..72377dc0adca --- /dev/null +++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp @@ -0,0 +1,1862 @@ +//===- InstCombineCasts.cpp -----------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the visit functions for cast operations. +// +//===----------------------------------------------------------------------===// + +#include "InstCombine.h" +#include "llvm/Analysis/ConstantFolding.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/Support/PatternMatch.h" +#include "llvm/Target/TargetLibraryInfo.h" +using namespace llvm; +using namespace PatternMatch; + +/// DecomposeSimpleLinearExpr - Analyze 'Val', seeing if it is a simple linear +/// expression. If so, decompose it, returning some value X, such that Val is +/// X*Scale+Offset. +/// +static Value *DecomposeSimpleLinearExpr(Value *Val, unsigned &Scale, + uint64_t &Offset) { + if (ConstantInt *CI = dyn_cast<ConstantInt>(Val)) { + Offset = CI->getZExtValue(); + Scale = 0; + return ConstantInt::get(Val->getType(), 0); + } + + if (BinaryOperator *I = dyn_cast<BinaryOperator>(Val)) { + // Cannot look past anything that might overflow. + OverflowingBinaryOperator *OBI = dyn_cast<OverflowingBinaryOperator>(Val); + if (OBI && !OBI->hasNoUnsignedWrap() && !OBI->hasNoSignedWrap()) { + Scale = 1; + Offset = 0; + return Val; + } + + if (ConstantInt *RHS = dyn_cast<ConstantInt>(I->getOperand(1))) { + if (I->getOpcode() == Instruction::Shl) { + // This is a value scaled by '1 << the shift amt'. + Scale = UINT64_C(1) << RHS->getZExtValue(); + Offset = 0; + return I->getOperand(0); + } + + if (I->getOpcode() == Instruction::Mul) { + // This value is scaled by 'RHS'. + Scale = RHS->getZExtValue(); + Offset = 0; + return I->getOperand(0); + } + + if (I->getOpcode() == Instruction::Add) { + // We have X+C. Check to see if we really have (X*C2)+C1, + // where C1 is divisible by C2. + unsigned SubScale; + Value *SubVal = + DecomposeSimpleLinearExpr(I->getOperand(0), SubScale, Offset); + Offset += RHS->getZExtValue(); + Scale = SubScale; + return SubVal; + } + } + } + + // Otherwise, we can't look past this. + Scale = 1; + Offset = 0; + return Val; +} + +/// PromoteCastOfAllocation - If we find a cast of an allocation instruction, +/// try to eliminate the cast by moving the type information into the alloc. +Instruction *InstCombiner::PromoteCastOfAllocation(BitCastInst &CI, + AllocaInst &AI) { + // This requires DataLayout to get the alloca alignment and size information. + if (!TD) return 0; + + PointerType *PTy = cast<PointerType>(CI.getType()); + + BuilderTy AllocaBuilder(*Builder); + AllocaBuilder.SetInsertPoint(AI.getParent(), &AI); + + // Get the type really allocated and the type casted to. + Type *AllocElTy = AI.getAllocatedType(); + Type *CastElTy = PTy->getElementType(); + if (!AllocElTy->isSized() || !CastElTy->isSized()) return 0; + + unsigned AllocElTyAlign = TD->getABITypeAlignment(AllocElTy); + unsigned CastElTyAlign = TD->getABITypeAlignment(CastElTy); + if (CastElTyAlign < AllocElTyAlign) return 0; + + // If the allocation has multiple uses, only promote it if we are strictly + // increasing the alignment of the resultant allocation. If we keep it the + // same, we open the door to infinite loops of various kinds. + if (!AI.hasOneUse() && CastElTyAlign == AllocElTyAlign) return 0; + + uint64_t AllocElTySize = TD->getTypeAllocSize(AllocElTy); + uint64_t CastElTySize = TD->getTypeAllocSize(CastElTy); + if (CastElTySize == 0 || AllocElTySize == 0) return 0; + + // If the allocation has multiple uses, only promote it if we're not + // shrinking the amount of memory being allocated. + uint64_t AllocElTyStoreSize = TD->getTypeStoreSize(AllocElTy); + uint64_t CastElTyStoreSize = TD->getTypeStoreSize(CastElTy); + if (!AI.hasOneUse() && CastElTyStoreSize < AllocElTyStoreSize) return 0; + + // See if we can satisfy the modulus by pulling a scale out of the array + // size argument. + unsigned ArraySizeScale; + uint64_t ArrayOffset; + Value *NumElements = // See if the array size is a decomposable linear expr. + DecomposeSimpleLinearExpr(AI.getOperand(0), ArraySizeScale, ArrayOffset); + + // If we can now satisfy the modulus, by using a non-1 scale, we really can + // do the xform. + if ((AllocElTySize*ArraySizeScale) % CastElTySize != 0 || + (AllocElTySize*ArrayOffset ) % CastElTySize != 0) return 0; + + unsigned Scale = (AllocElTySize*ArraySizeScale)/CastElTySize; + Value *Amt = 0; + if (Scale == 1) { + Amt = NumElements; + } else { + Amt = ConstantInt::get(AI.getArraySize()->getType(), Scale); + // Insert before the alloca, not before the cast. + Amt = AllocaBuilder.CreateMul(Amt, NumElements); + } + + if (uint64_t Offset = (AllocElTySize*ArrayOffset)/CastElTySize) { + Value *Off = ConstantInt::get(AI.getArraySize()->getType(), + Offset, true); + Amt = AllocaBuilder.CreateAdd(Amt, Off); + } + + AllocaInst *New = AllocaBuilder.CreateAlloca(CastElTy, Amt); + New->setAlignment(AI.getAlignment()); + New->takeName(&AI); + + // If the allocation has multiple real uses, insert a cast and change all + // things that used it to use the new cast. This will also hack on CI, but it + // will die soon. + if (!AI.hasOneUse()) { + // New is the allocation instruction, pointer typed. AI is the original + // allocation instruction, also pointer typed. Thus, cast to use is BitCast. + Value *NewCast = AllocaBuilder.CreateBitCast(New, AI.getType(), "tmpcast"); + ReplaceInstUsesWith(AI, NewCast); + } + return ReplaceInstUsesWith(CI, New); +} + +/// EvaluateInDifferentType - Given an expression that +/// CanEvaluateTruncated or CanEvaluateSExtd returns true for, actually +/// insert the code to evaluate the expression. +Value *InstCombiner::EvaluateInDifferentType(Value *V, Type *Ty, + bool isSigned) { + if (Constant *C = dyn_cast<Constant>(V)) { + C = ConstantExpr::getIntegerCast(C, Ty, isSigned /*Sext or ZExt*/); + // If we got a constantexpr back, try to simplify it with TD info. + if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) + C = ConstantFoldConstantExpression(CE, TD, TLI); + return C; + } + + // Otherwise, it must be an instruction. + Instruction *I = cast<Instruction>(V); + Instruction *Res = 0; + unsigned Opc = I->getOpcode(); + switch (Opc) { + case Instruction::Add: + case Instruction::Sub: + case Instruction::Mul: + case Instruction::And: + case Instruction::Or: + case Instruction::Xor: + case Instruction::AShr: + case Instruction::LShr: + case Instruction::Shl: + case Instruction::UDiv: + case Instruction::URem: { + Value *LHS = EvaluateInDifferentType(I->getOperand(0), Ty, isSigned); + Value *RHS = EvaluateInDifferentType(I->getOperand(1), Ty, isSigned); + Res = BinaryOperator::Create((Instruction::BinaryOps)Opc, LHS, RHS); + break; + } + case Instruction::Trunc: + case Instruction::ZExt: + case Instruction::SExt: + // If the source type of the cast is the type we're trying for then we can + // just return the source. There's no need to insert it because it is not + // new. + if (I->getOperand(0)->getType() == Ty) + return I->getOperand(0); + + // Otherwise, must be the same type of cast, so just reinsert a new one. + // This also handles the case of zext(trunc(x)) -> zext(x). + Res = CastInst::CreateIntegerCast(I->getOperand(0), Ty, + Opc == Instruction::SExt); + break; + case Instruction::Select: { + Value *True = EvaluateInDifferentType(I->getOperand(1), Ty, isSigned); + Value *False = EvaluateInDifferentType(I->getOperand(2), Ty, isSigned); + Res = SelectInst::Create(I->getOperand(0), True, False); + break; + } + case Instruction::PHI: { + PHINode *OPN = cast<PHINode>(I); + PHINode *NPN = PHINode::Create(Ty, OPN->getNumIncomingValues()); + for (unsigned i = 0, e = OPN->getNumIncomingValues(); i != e; ++i) { + Value *V =EvaluateInDifferentType(OPN->getIncomingValue(i), Ty, isSigned); + NPN->addIncoming(V, OPN->getIncomingBlock(i)); + } + Res = NPN; + break; + } + default: + // TODO: Can handle more cases here. + llvm_unreachable("Unreachable!"); + } + + Res->takeName(I); + return InsertNewInstWith(Res, *I); +} + + +/// This function is a wrapper around CastInst::isEliminableCastPair. It +/// simply extracts arguments and returns what that function returns. +static Instruction::CastOps +isEliminableCastPair( + const CastInst *CI, ///< The first cast instruction + unsigned opcode, ///< The opcode of the second cast instruction + Type *DstTy, ///< The target type for the second cast instruction + DataLayout *TD ///< The target data for pointer size +) { + + Type *SrcTy = CI->getOperand(0)->getType(); // A from above + Type *MidTy = CI->getType(); // B from above + + // Get the opcodes of the two Cast instructions + Instruction::CastOps firstOp = Instruction::CastOps(CI->getOpcode()); + Instruction::CastOps secondOp = Instruction::CastOps(opcode); + Type *SrcIntPtrTy = TD && SrcTy->isPtrOrPtrVectorTy() ? + TD->getIntPtrType(SrcTy) : 0; + Type *MidIntPtrTy = TD && MidTy->isPtrOrPtrVectorTy() ? + TD->getIntPtrType(MidTy) : 0; + Type *DstIntPtrTy = TD && DstTy->isPtrOrPtrVectorTy() ? + TD->getIntPtrType(DstTy) : 0; + unsigned Res = CastInst::isEliminableCastPair(firstOp, secondOp, SrcTy, MidTy, + DstTy, SrcIntPtrTy, MidIntPtrTy, + DstIntPtrTy); + + // We don't want to form an inttoptr or ptrtoint that converts to an integer + // type that differs from the pointer size. + if ((Res == Instruction::IntToPtr && SrcTy != DstIntPtrTy) || + (Res == Instruction::PtrToInt && DstTy != SrcIntPtrTy)) + Res = 0; + + return Instruction::CastOps(Res); +} + +/// ShouldOptimizeCast - Return true if the cast from "V to Ty" actually +/// results in any code being generated and is interesting to optimize out. If +/// the cast can be eliminated by some other simple transformation, we prefer +/// to do the simplification first. +bool InstCombiner::ShouldOptimizeCast(Instruction::CastOps opc, const Value *V, + Type *Ty) { + // Noop casts and casts of constants should be eliminated trivially. + if (V->getType() == Ty || isa<Constant>(V)) return false; + + // If this is another cast that can be eliminated, we prefer to have it + // eliminated. + if (const CastInst *CI = dyn_cast<CastInst>(V)) + if (isEliminableCastPair(CI, opc, Ty, TD)) + return false; + + // If this is a vector sext from a compare, then we don't want to break the + // idiom where each element of the extended vector is either zero or all ones. + if (opc == Instruction::SExt && isa<CmpInst>(V) && Ty->isVectorTy()) + return false; + + return true; +} + + +/// @brief Implement the transforms common to all CastInst visitors. +Instruction *InstCombiner::commonCastTransforms(CastInst &CI) { + Value *Src = CI.getOperand(0); + + // Many cases of "cast of a cast" are eliminable. If it's eliminable we just + // eliminate it now. + if (CastInst *CSrc = dyn_cast<CastInst>(Src)) { // A->B->C cast + if (Instruction::CastOps opc = + isEliminableCastPair(CSrc, CI.getOpcode(), CI.getType(), TD)) { + // The first cast (CSrc) is eliminable so we need to fix up or replace + // the second cast (CI). CSrc will then have a good chance of being dead. + return CastInst::Create(opc, CSrc->getOperand(0), CI.getType()); + } + } + + // If we are casting a select then fold the cast into the select + if (SelectInst *SI = dyn_cast<SelectInst>(Src)) + if (Instruction *NV = FoldOpIntoSelect(CI, SI)) + return NV; + + // If we are casting a PHI then fold the cast into the PHI + if (isa<PHINode>(Src)) { + // We don't do this if this would create a PHI node with an illegal type if + // it is currently legal. + if (!Src->getType()->isIntegerTy() || + !CI.getType()->isIntegerTy() || + ShouldChangeType(CI.getType(), Src->getType())) + if (Instruction *NV = FoldOpIntoPhi(CI)) + return NV; + } + + return 0; +} + +/// CanEvaluateTruncated - Return true if we can evaluate the specified +/// expression tree as type Ty instead of its larger type, and arrive with the +/// same value. This is used by code that tries to eliminate truncates. +/// +/// Ty will always be a type smaller than V. We should return true if trunc(V) +/// can be computed by computing V in the smaller type. If V is an instruction, +/// then trunc(inst(x,y)) can be computed as inst(trunc(x),trunc(y)), which only +/// makes sense if x and y can be efficiently truncated. +/// +/// This function works on both vectors and scalars. +/// +static bool CanEvaluateTruncated(Value *V, Type *Ty) { + // We can always evaluate constants in another type. + if (isa<Constant>(V)) + return true; + + Instruction *I = dyn_cast<Instruction>(V); + if (!I) return false; + + Type *OrigTy = V->getType(); + + // If this is an extension from the dest type, we can eliminate it, even if it + // has multiple uses. + if ((isa<ZExtInst>(I) || isa<SExtInst>(I)) && + I->getOperand(0)->getType() == Ty) + return true; + + // We can't extend or shrink something that has multiple uses: doing so would + // require duplicating the instruction in general, which isn't profitable. + if (!I->hasOneUse()) return false; + + unsigned Opc = I->getOpcode(); + switch (Opc) { + case Instruction::Add: + case Instruction::Sub: + case Instruction::Mul: + case Instruction::And: + case Instruction::Or: + case Instruction::Xor: + // These operators can all arbitrarily be extended or truncated. + return CanEvaluateTruncated(I->getOperand(0), Ty) && + CanEvaluateTruncated(I->getOperand(1), Ty); + + case Instruction::UDiv: + case Instruction::URem: { + // UDiv and URem can be truncated if all the truncated bits are zero. + uint32_t OrigBitWidth = OrigTy->getScalarSizeInBits(); + uint32_t BitWidth = Ty->getScalarSizeInBits(); + if (BitWidth < OrigBitWidth) { + APInt Mask = APInt::getHighBitsSet(OrigBitWidth, OrigBitWidth-BitWidth); + if (MaskedValueIsZero(I->getOperand(0), Mask) && + MaskedValueIsZero(I->getOperand(1), Mask)) { + return CanEvaluateTruncated(I->getOperand(0), Ty) && + CanEvaluateTruncated(I->getOperand(1), Ty); + } + } + break; + } + case Instruction::Shl: + // If we are truncating the result of this SHL, and if it's a shift of a + // constant amount, we can always perform a SHL in a smaller type. + if (ConstantInt *CI = dyn_cast<ConstantInt>(I->getOperand(1))) { + uint32_t BitWidth = Ty->getScalarSizeInBits(); + if (CI->getLimitedValue(BitWidth) < BitWidth) + return CanEvaluateTruncated(I->getOperand(0), Ty); + } + break; + case Instruction::LShr: + // If this is a truncate of a logical shr, we can truncate it to a smaller + // lshr iff we know that the bits we would otherwise be shifting in are + // already zeros. + if (ConstantInt *CI = dyn_cast<ConstantInt>(I->getOperand(1))) { + uint32_t OrigBitWidth = OrigTy->getScalarSizeInBits(); + uint32_t BitWidth = Ty->getScalarSizeInBits(); + if (MaskedValueIsZero(I->getOperand(0), + APInt::getHighBitsSet(OrigBitWidth, OrigBitWidth-BitWidth)) && + CI->getLimitedValue(BitWidth) < BitWidth) { + return CanEvaluateTruncated(I->getOperand(0), Ty); + } + } + break; + case Instruction::Trunc: + // trunc(trunc(x)) -> trunc(x) + return true; + case Instruction::ZExt: + case Instruction::SExt: + // trunc(ext(x)) -> ext(x) if the source type is smaller than the new dest + // trunc(ext(x)) -> trunc(x) if the source type is larger than the new dest + return true; + case Instruction::Select: { + SelectInst *SI = cast<SelectInst>(I); + return CanEvaluateTruncated(SI->getTrueValue(), Ty) && + CanEvaluateTruncated(SI->getFalseValue(), Ty); + } + case Instruction::PHI: { + // We can change a phi if we can change all operands. Note that we never + // get into trouble with cyclic PHIs here because we only consider + // instructions with a single use. + PHINode *PN = cast<PHINode>(I); + for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) + if (!CanEvaluateTruncated(PN->getIncomingValue(i), Ty)) + return false; + return true; + } + default: + // TODO: Can handle more cases here. + break; + } + + return false; +} + +Instruction *InstCombiner::visitTrunc(TruncInst &CI) { + if (Instruction *Result = commonCastTransforms(CI)) + return Result; + + // See if we can simplify any instructions used by the input whose sole + // purpose is to compute bits we don't care about. + if (SimplifyDemandedInstructionBits(CI)) + return &CI; + + Value *Src = CI.getOperand(0); + Type *DestTy = CI.getType(), *SrcTy = Src->getType(); + + // Attempt to truncate the entire input expression tree to the destination + // type. Only do this if the dest type is a simple type, don't convert the + // expression tree to something weird like i93 unless the source is also + // strange. + if ((DestTy->isVectorTy() || ShouldChangeType(SrcTy, DestTy)) && + CanEvaluateTruncated(Src, DestTy)) { + + // If this cast is a truncate, evaluting in a different type always + // eliminates the cast, so it is always a win. + DEBUG(dbgs() << "ICE: EvaluateInDifferentType converting expression type" + " to avoid cast: " << CI << '\n'); + Value *Res = EvaluateInDifferentType(Src, DestTy, false); + assert(Res->getType() == DestTy); + return ReplaceInstUsesWith(CI, Res); + } + + // Canonicalize trunc x to i1 -> (icmp ne (and x, 1), 0), likewise for vector. + if (DestTy->getScalarSizeInBits() == 1) { + Constant *One = ConstantInt::get(Src->getType(), 1); + Src = Builder->CreateAnd(Src, One); + Value *Zero = Constant::getNullValue(Src->getType()); + return new ICmpInst(ICmpInst::ICMP_NE, Src, Zero); + } + + // Transform trunc(lshr (zext A), Cst) to eliminate one type conversion. + Value *A = 0; ConstantInt *Cst = 0; + if (Src->hasOneUse() && + match(Src, m_LShr(m_ZExt(m_Value(A)), m_ConstantInt(Cst)))) { + // We have three types to worry about here, the type of A, the source of + // the truncate (MidSize), and the destination of the truncate. We know that + // ASize < MidSize and MidSize > ResultSize, but don't know the relation + // between ASize and ResultSize. + unsigned ASize = A->getType()->getPrimitiveSizeInBits(); + + // If the shift amount is larger than the size of A, then the result is + // known to be zero because all the input bits got shifted out. + if (Cst->getZExtValue() >= ASize) + return ReplaceInstUsesWith(CI, Constant::getNullValue(CI.getType())); + + // Since we're doing an lshr and a zero extend, and know that the shift + // amount is smaller than ASize, it is always safe to do the shift in A's + // type, then zero extend or truncate to the result. + Value *Shift = Builder->CreateLShr(A, Cst->getZExtValue()); + Shift->takeName(Src); + return CastInst::CreateIntegerCast(Shift, CI.getType(), false); + } + + // Transform "trunc (and X, cst)" -> "and (trunc X), cst" so long as the dest + // type isn't non-native. + if (Src->hasOneUse() && isa<IntegerType>(Src->getType()) && + ShouldChangeType(Src->getType(), CI.getType()) && + match(Src, m_And(m_Value(A), m_ConstantInt(Cst)))) { + Value *NewTrunc = Builder->CreateTrunc(A, CI.getType(), A->getName()+".tr"); + return BinaryOperator::CreateAnd(NewTrunc, + ConstantExpr::getTrunc(Cst, CI.getType())); + } + + return 0; +} + +/// transformZExtICmp - Transform (zext icmp) to bitwise / integer operations +/// in order to eliminate the icmp. +Instruction *InstCombiner::transformZExtICmp(ICmpInst *ICI, Instruction &CI, + bool DoXform) { + // If we are just checking for a icmp eq of a single bit and zext'ing it + // to an integer, then shift the bit to the appropriate place and then + // cast to integer to avoid the comparison. + if (ConstantInt *Op1C = dyn_cast<ConstantInt>(ICI->getOperand(1))) { + const APInt &Op1CV = Op1C->getValue(); + + // zext (x <s 0) to i32 --> x>>u31 true if signbit set. + // zext (x >s -1) to i32 --> (x>>u31)^1 true if signbit clear. + if ((ICI->getPredicate() == ICmpInst::ICMP_SLT && Op1CV == 0) || + (ICI->getPredicate() == ICmpInst::ICMP_SGT &&Op1CV.isAllOnesValue())) { + if (!DoXform) return ICI; + + Value *In = ICI->getOperand(0); + Value *Sh = ConstantInt::get(In->getType(), + In->getType()->getScalarSizeInBits()-1); + In = Builder->CreateLShr(In, Sh, In->getName()+".lobit"); + if (In->getType() != CI.getType()) + In = Builder->CreateIntCast(In, CI.getType(), false/*ZExt*/); + + if (ICI->getPredicate() == ICmpInst::ICMP_SGT) { + Constant *One = ConstantInt::get(In->getType(), 1); + In = Builder->CreateXor(In, One, In->getName()+".not"); + } + + return ReplaceInstUsesWith(CI, In); + } + + // zext (X == 0) to i32 --> X^1 iff X has only the low bit set. + // zext (X == 0) to i32 --> (X>>1)^1 iff X has only the 2nd bit set. + // zext (X == 1) to i32 --> X iff X has only the low bit set. + // zext (X == 2) to i32 --> X>>1 iff X has only the 2nd bit set. + // zext (X != 0) to i32 --> X iff X has only the low bit set. + // zext (X != 0) to i32 --> X>>1 iff X has only the 2nd bit set. + // zext (X != 1) to i32 --> X^1 iff X has only the low bit set. + // zext (X != 2) to i32 --> (X>>1)^1 iff X has only the 2nd bit set. + if ((Op1CV == 0 || Op1CV.isPowerOf2()) && + // This only works for EQ and NE + ICI->isEquality()) { + // If Op1C some other power of two, convert: + uint32_t BitWidth = Op1C->getType()->getBitWidth(); + APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0); + ComputeMaskedBits(ICI->getOperand(0), KnownZero, KnownOne); + + APInt KnownZeroMask(~KnownZero); + if (KnownZeroMask.isPowerOf2()) { // Exactly 1 possible 1? + if (!DoXform) return ICI; + + bool isNE = ICI->getPredicate() == ICmpInst::ICMP_NE; + if (Op1CV != 0 && (Op1CV != KnownZeroMask)) { + // (X&4) == 2 --> false + // (X&4) != 2 --> true + Constant *Res = ConstantInt::get(Type::getInt1Ty(CI.getContext()), + isNE); + Res = ConstantExpr::getZExt(Res, CI.getType()); + return ReplaceInstUsesWith(CI, Res); + } + + uint32_t ShiftAmt = KnownZeroMask.logBase2(); + Value *In = ICI->getOperand(0); + if (ShiftAmt) { + // Perform a logical shr by shiftamt. + // Insert the shift to put the result in the low bit. + In = Builder->CreateLShr(In, ConstantInt::get(In->getType(),ShiftAmt), + In->getName()+".lobit"); + } + + if ((Op1CV != 0) == isNE) { // Toggle the low bit. + Constant *One = ConstantInt::get(In->getType(), 1); + In = Builder->CreateXor(In, One); + } + + if (CI.getType() == In->getType()) + return ReplaceInstUsesWith(CI, In); + return CastInst::CreateIntegerCast(In, CI.getType(), false/*ZExt*/); + } + } + } + + // icmp ne A, B is equal to xor A, B when A and B only really have one bit. + // It is also profitable to transform icmp eq into not(xor(A, B)) because that + // may lead to additional simplifications. + if (ICI->isEquality() && CI.getType() == ICI->getOperand(0)->getType()) { + if (IntegerType *ITy = dyn_cast<IntegerType>(CI.getType())) { + uint32_t BitWidth = ITy->getBitWidth(); + Value *LHS = ICI->getOperand(0); + Value *RHS = ICI->getOperand(1); + + APInt KnownZeroLHS(BitWidth, 0), KnownOneLHS(BitWidth, 0); + APInt KnownZeroRHS(BitWidth, 0), KnownOneRHS(BitWidth, 0); + ComputeMaskedBits(LHS, KnownZeroLHS, KnownOneLHS); + ComputeMaskedBits(RHS, KnownZeroRHS, KnownOneRHS); + + if (KnownZeroLHS == KnownZeroRHS && KnownOneLHS == KnownOneRHS) { + APInt KnownBits = KnownZeroLHS | KnownOneLHS; + APInt UnknownBit = ~KnownBits; + if (UnknownBit.countPopulation() == 1) { + if (!DoXform) return ICI; + + Value *Result = Builder->CreateXor(LHS, RHS); + + // Mask off any bits that are set and won't be shifted away. + if (KnownOneLHS.uge(UnknownBit)) + Result = Builder->CreateAnd(Result, + ConstantInt::get(ITy, UnknownBit)); + + // Shift the bit we're testing down to the lsb. + Result = Builder->CreateLShr( + Result, ConstantInt::get(ITy, UnknownBit.countTrailingZeros())); + + if (ICI->getPredicate() == ICmpInst::ICMP_EQ) + Result = Builder->CreateXor(Result, ConstantInt::get(ITy, 1)); + Result->takeName(ICI); + return ReplaceInstUsesWith(CI, Result); + } + } + } + } + + return 0; +} + +/// CanEvaluateZExtd - Determine if the specified value can be computed in the +/// specified wider type and produce the same low bits. If not, return false. +/// +/// If this function returns true, it can also return a non-zero number of bits +/// (in BitsToClear) which indicates that the value it computes is correct for +/// the zero extend, but that the additional BitsToClear bits need to be zero'd +/// out. For example, to promote something like: +/// +/// %B = trunc i64 %A to i32 +/// %C = lshr i32 %B, 8 +/// %E = zext i32 %C to i64 +/// +/// CanEvaluateZExtd for the 'lshr' will return true, and BitsToClear will be +/// set to 8 to indicate that the promoted value needs to have bits 24-31 +/// cleared in addition to bits 32-63. Since an 'and' will be generated to +/// clear the top bits anyway, doing this has no extra cost. +/// +/// This function works on both vectors and scalars. +static bool CanEvaluateZExtd(Value *V, Type *Ty, unsigned &BitsToClear) { + BitsToClear = 0; + if (isa<Constant>(V)) + return true; + + Instruction *I = dyn_cast<Instruction>(V); + if (!I) return false; + + // If the input is a truncate from the destination type, we can trivially + // eliminate it. + if (isa<TruncInst>(I) && I->getOperand(0)->getType() == Ty) + return true; + + // We can't extend or shrink something that has multiple uses: doing so would + // require duplicating the instruction in general, which isn't profitable. + if (!I->hasOneUse()) return false; + + unsigned Opc = I->getOpcode(), Tmp; + switch (Opc) { + case Instruction::ZExt: // zext(zext(x)) -> zext(x). + case Instruction::SExt: // zext(sext(x)) -> sext(x). + case Instruction::Trunc: // zext(trunc(x)) -> trunc(x) or zext(x) + return true; + case Instruction::And: + case Instruction::Or: + case Instruction::Xor: + case Instruction::Add: + case Instruction::Sub: + case Instruction::Mul: + if (!CanEvaluateZExtd(I->getOperand(0), Ty, BitsToClear) || + !CanEvaluateZExtd(I->getOperand(1), Ty, Tmp)) + return false; + // These can all be promoted if neither operand has 'bits to clear'. + if (BitsToClear == 0 && Tmp == 0) + return true; + + // If the operation is an AND/OR/XOR and the bits to clear are zero in the + // other side, BitsToClear is ok. + if (Tmp == 0 && + (Opc == Instruction::And || Opc == Instruction::Or || + Opc == Instruction::Xor)) { + // We use MaskedValueIsZero here for generality, but the case we care + // about the most is constant RHS. + unsigned VSize = V->getType()->getScalarSizeInBits(); + if (MaskedValueIsZero(I->getOperand(1), + APInt::getHighBitsSet(VSize, BitsToClear))) + return true; + } + + // Otherwise, we don't know how to analyze this BitsToClear case yet. + return false; + + case Instruction::Shl: + // We can promote shl(x, cst) if we can promote x. Since shl overwrites the + // upper bits we can reduce BitsToClear by the shift amount. + if (ConstantInt *Amt = dyn_cast<ConstantInt>(I->getOperand(1))) { + if (!CanEvaluateZExtd(I->getOperand(0), Ty, BitsToClear)) + return false; + uint64_t ShiftAmt = Amt->getZExtValue(); + BitsToClear = ShiftAmt < BitsToClear ? BitsToClear - ShiftAmt : 0; + return true; + } + return false; + case Instruction::LShr: + // We can promote lshr(x, cst) if we can promote x. This requires the + // ultimate 'and' to clear out the high zero bits we're clearing out though. + if (ConstantInt *Amt = dyn_cast<ConstantInt>(I->getOperand(1))) { + if (!CanEvaluateZExtd(I->getOperand(0), Ty, BitsToClear)) + return false; + BitsToClear += Amt->getZExtValue(); + if (BitsToClear > V->getType()->getScalarSizeInBits()) + BitsToClear = V->getType()->getScalarSizeInBits(); + return true; + } + // Cannot promote variable LSHR. + return false; + case Instruction::Select: + if (!CanEvaluateZExtd(I->getOperand(1), Ty, Tmp) || + !CanEvaluateZExtd(I->getOperand(2), Ty, BitsToClear) || + // TODO: If important, we could handle the case when the BitsToClear are + // known zero in the disagreeing side. + Tmp != BitsToClear) + return false; + return true; + + case Instruction::PHI: { + // We can change a phi if we can change all operands. Note that we never + // get into trouble with cyclic PHIs here because we only consider + // instructions with a single use. + PHINode *PN = cast<PHINode>(I); + if (!CanEvaluateZExtd(PN->getIncomingValue(0), Ty, BitsToClear)) + return false; + for (unsigned i = 1, e = PN->getNumIncomingValues(); i != e; ++i) + if (!CanEvaluateZExtd(PN->getIncomingValue(i), Ty, Tmp) || + // TODO: If important, we could handle the case when the BitsToClear + // are known zero in the disagreeing input. + Tmp != BitsToClear) + return false; + return true; + } + default: + // TODO: Can handle more cases here. + return false; + } +} + +Instruction *InstCombiner::visitZExt(ZExtInst &CI) { + // If this zero extend is only used by a truncate, let the truncate be + // eliminated before we try to optimize this zext. + if (CI.hasOneUse() && isa<TruncInst>(CI.use_back())) + return 0; + + // If one of the common conversion will work, do it. + if (Instruction *Result = commonCastTransforms(CI)) + return Result; + + // See if we can simplify any instructions used by the input whose sole + // purpose is to compute bits we don't care about. + if (SimplifyDemandedInstructionBits(CI)) + return &CI; + + Value *Src = CI.getOperand(0); + Type *SrcTy = Src->getType(), *DestTy = CI.getType(); + + // Attempt to extend the entire input expression tree to the destination + // type. Only do this if the dest type is a simple type, don't convert the + // expression tree to something weird like i93 unless the source is also + // strange. + unsigned BitsToClear; + if ((DestTy->isVectorTy() || ShouldChangeType(SrcTy, DestTy)) && + CanEvaluateZExtd(Src, DestTy, BitsToClear)) { + assert(BitsToClear < SrcTy->getScalarSizeInBits() && + "Unreasonable BitsToClear"); + + // Okay, we can transform this! Insert the new expression now. + DEBUG(dbgs() << "ICE: EvaluateInDifferentType converting expression type" + " to avoid zero extend: " << CI); + Value *Res = EvaluateInDifferentType(Src, DestTy, false); + assert(Res->getType() == DestTy); + + uint32_t SrcBitsKept = SrcTy->getScalarSizeInBits()-BitsToClear; + uint32_t DestBitSize = DestTy->getScalarSizeInBits(); + + // If the high bits are already filled with zeros, just replace this + // cast with the result. + if (MaskedValueIsZero(Res, APInt::getHighBitsSet(DestBitSize, + DestBitSize-SrcBitsKept))) + return ReplaceInstUsesWith(CI, Res); + + // We need to emit an AND to clear the high bits. + Constant *C = ConstantInt::get(Res->getType(), + APInt::getLowBitsSet(DestBitSize, SrcBitsKept)); + return BinaryOperator::CreateAnd(Res, C); + } + + // If this is a TRUNC followed by a ZEXT then we are dealing with integral + // types and if the sizes are just right we can convert this into a logical + // 'and' which will be much cheaper than the pair of casts. + if (TruncInst *CSrc = dyn_cast<TruncInst>(Src)) { // A->B->C cast + // TODO: Subsume this into EvaluateInDifferentType. + + // Get the sizes of the types involved. We know that the intermediate type + // will be smaller than A or C, but don't know the relation between A and C. + Value *A = CSrc->getOperand(0); + unsigned SrcSize = A->getType()->getScalarSizeInBits(); + unsigned MidSize = CSrc->getType()->getScalarSizeInBits(); + unsigned DstSize = CI.getType()->getScalarSizeInBits(); + // If we're actually extending zero bits, then if + // SrcSize < DstSize: zext(a & mask) + // SrcSize == DstSize: a & mask + // SrcSize > DstSize: trunc(a) & mask + if (SrcSize < DstSize) { + APInt AndValue(APInt::getLowBitsSet(SrcSize, MidSize)); + Constant *AndConst = ConstantInt::get(A->getType(), AndValue); + Value *And = Builder->CreateAnd(A, AndConst, CSrc->getName()+".mask"); + return new ZExtInst(And, CI.getType()); + } + + if (SrcSize == DstSize) { + APInt AndValue(APInt::getLowBitsSet(SrcSize, MidSize)); + return BinaryOperator::CreateAnd(A, ConstantInt::get(A->getType(), + AndValue)); + } + if (SrcSize > DstSize) { + Value *Trunc = Builder->CreateTrunc(A, CI.getType()); + APInt AndValue(APInt::getLowBitsSet(DstSize, MidSize)); + return BinaryOperator::CreateAnd(Trunc, + ConstantInt::get(Trunc->getType(), + AndValue)); + } + } + + if (ICmpInst *ICI = dyn_cast<ICmpInst>(Src)) + return transformZExtICmp(ICI, CI); + + BinaryOperator *SrcI = dyn_cast<BinaryOperator>(Src); + if (SrcI && SrcI->getOpcode() == Instruction::Or) { + // zext (or icmp, icmp) --> or (zext icmp), (zext icmp) if at least one + // of the (zext icmp) will be transformed. + ICmpInst *LHS = dyn_cast<ICmpInst>(SrcI->getOperand(0)); + ICmpInst *RHS = dyn_cast<ICmpInst>(SrcI->getOperand(1)); + if (LHS && RHS && LHS->hasOneUse() && RHS->hasOneUse() && + (transformZExtICmp(LHS, CI, false) || + transformZExtICmp(RHS, CI, false))) { + Value *LCast = Builder->CreateZExt(LHS, CI.getType(), LHS->getName()); + Value *RCast = Builder->CreateZExt(RHS, CI.getType(), RHS->getName()); + return BinaryOperator::Create(Instruction::Or, LCast, RCast); + } + } + + // zext(trunc(t) & C) -> (t & zext(C)). + if (SrcI && SrcI->getOpcode() == Instruction::And && SrcI->hasOneUse()) + if (ConstantInt *C = dyn_cast<ConstantInt>(SrcI->getOperand(1))) + if (TruncInst *TI = dyn_cast<TruncInst>(SrcI->getOperand(0))) { + Value *TI0 = TI->getOperand(0); + if (TI0->getType() == CI.getType()) + return + BinaryOperator::CreateAnd(TI0, + ConstantExpr::getZExt(C, CI.getType())); + } + + // zext((trunc(t) & C) ^ C) -> ((t & zext(C)) ^ zext(C)). + if (SrcI && SrcI->getOpcode() == Instruction::Xor && SrcI->hasOneUse()) + if (ConstantInt *C = dyn_cast<ConstantInt>(SrcI->getOperand(1))) + if (BinaryOperator *And = dyn_cast<BinaryOperator>(SrcI->getOperand(0))) + if (And->getOpcode() == Instruction::And && And->hasOneUse() && + And->getOperand(1) == C) + if (TruncInst *TI = dyn_cast<TruncInst>(And->getOperand(0))) { + Value *TI0 = TI->getOperand(0); + if (TI0->getType() == CI.getType()) { + Constant *ZC = ConstantExpr::getZExt(C, CI.getType()); + Value *NewAnd = Builder->CreateAnd(TI0, ZC); + return BinaryOperator::CreateXor(NewAnd, ZC); + } + } + + // zext (xor i1 X, true) to i32 --> xor (zext i1 X to i32), 1 + Value *X; + if (SrcI && SrcI->hasOneUse() && SrcI->getType()->isIntegerTy(1) && + match(SrcI, m_Not(m_Value(X))) && + (!X->hasOneUse() || !isa<CmpInst>(X))) { + Value *New = Builder->CreateZExt(X, CI.getType()); + return BinaryOperator::CreateXor(New, ConstantInt::get(CI.getType(), 1)); + } + + return 0; +} + +/// transformSExtICmp - Transform (sext icmp) to bitwise / integer operations +/// in order to eliminate the icmp. +Instruction *InstCombiner::transformSExtICmp(ICmpInst *ICI, Instruction &CI) { + Value *Op0 = ICI->getOperand(0), *Op1 = ICI->getOperand(1); + ICmpInst::Predicate Pred = ICI->getPredicate(); + + if (ConstantInt *Op1C = dyn_cast<ConstantInt>(Op1)) { + // (x <s 0) ? -1 : 0 -> ashr x, 31 -> all ones if negative + // (x >s -1) ? -1 : 0 -> not (ashr x, 31) -> all ones if positive + if ((Pred == ICmpInst::ICMP_SLT && Op1C->isZero()) || + (Pred == ICmpInst::ICMP_SGT && Op1C->isAllOnesValue())) { + + Value *Sh = ConstantInt::get(Op0->getType(), + Op0->getType()->getScalarSizeInBits()-1); + Value *In = Builder->CreateAShr(Op0, Sh, Op0->getName()+".lobit"); + if (In->getType() != CI.getType()) + In = Builder->CreateIntCast(In, CI.getType(), true/*SExt*/); + + if (Pred == ICmpInst::ICMP_SGT) + In = Builder->CreateNot(In, In->getName()+".not"); + return ReplaceInstUsesWith(CI, In); + } + + // If we know that only one bit of the LHS of the icmp can be set and we + // have an equality comparison with zero or a power of 2, we can transform + // the icmp and sext into bitwise/integer operations. + if (ICI->hasOneUse() && + ICI->isEquality() && (Op1C->isZero() || Op1C->getValue().isPowerOf2())){ + unsigned BitWidth = Op1C->getType()->getBitWidth(); + APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0); + ComputeMaskedBits(Op0, KnownZero, KnownOne); + + APInt KnownZeroMask(~KnownZero); + if (KnownZeroMask.isPowerOf2()) { + Value *In = ICI->getOperand(0); + + // If the icmp tests for a known zero bit we can constant fold it. + if (!Op1C->isZero() && Op1C->getValue() != KnownZeroMask) { + Value *V = Pred == ICmpInst::ICMP_NE ? + ConstantInt::getAllOnesValue(CI.getType()) : + ConstantInt::getNullValue(CI.getType()); + return ReplaceInstUsesWith(CI, V); + } + + if (!Op1C->isZero() == (Pred == ICmpInst::ICMP_NE)) { + // sext ((x & 2^n) == 0) -> (x >> n) - 1 + // sext ((x & 2^n) != 2^n) -> (x >> n) - 1 + unsigned ShiftAmt = KnownZeroMask.countTrailingZeros(); + // Perform a right shift to place the desired bit in the LSB. + if (ShiftAmt) + In = Builder->CreateLShr(In, + ConstantInt::get(In->getType(), ShiftAmt)); + + // At this point "In" is either 1 or 0. Subtract 1 to turn + // {1, 0} -> {0, -1}. + In = Builder->CreateAdd(In, + ConstantInt::getAllOnesValue(In->getType()), + "sext"); + } else { + // sext ((x & 2^n) != 0) -> (x << bitwidth-n) a>> bitwidth-1 + // sext ((x & 2^n) == 2^n) -> (x << bitwidth-n) a>> bitwidth-1 + unsigned ShiftAmt = KnownZeroMask.countLeadingZeros(); + // Perform a left shift to place the desired bit in the MSB. + if (ShiftAmt) + In = Builder->CreateShl(In, + ConstantInt::get(In->getType(), ShiftAmt)); + + // Distribute the bit over the whole bit width. + In = Builder->CreateAShr(In, ConstantInt::get(In->getType(), + BitWidth - 1), "sext"); + } + + if (CI.getType() == In->getType()) + return ReplaceInstUsesWith(CI, In); + return CastInst::CreateIntegerCast(In, CI.getType(), true/*SExt*/); + } + } + } + + // vector (x <s 0) ? -1 : 0 -> ashr x, 31 -> all ones if signed. + if (VectorType *VTy = dyn_cast<VectorType>(CI.getType())) { + if (Pred == ICmpInst::ICMP_SLT && match(Op1, m_Zero()) && + Op0->getType() == CI.getType()) { + Type *EltTy = VTy->getElementType(); + + // splat the shift constant to a constant vector. + Constant *VSh = ConstantInt::get(VTy, EltTy->getScalarSizeInBits()-1); + Value *In = Builder->CreateAShr(Op0, VSh, Op0->getName()+".lobit"); + return ReplaceInstUsesWith(CI, In); + } + } + + return 0; +} + +/// CanEvaluateSExtd - Return true if we can take the specified value +/// and return it as type Ty without inserting any new casts and without +/// changing the value of the common low bits. This is used by code that tries +/// to promote integer operations to a wider types will allow us to eliminate +/// the extension. +/// +/// This function works on both vectors and scalars. +/// +static bool CanEvaluateSExtd(Value *V, Type *Ty) { + assert(V->getType()->getScalarSizeInBits() < Ty->getScalarSizeInBits() && + "Can't sign extend type to a smaller type"); + // If this is a constant, it can be trivially promoted. + if (isa<Constant>(V)) + return true; + + Instruction *I = dyn_cast<Instruction>(V); + if (!I) return false; + + // If this is a truncate from the dest type, we can trivially eliminate it. + if (isa<TruncInst>(I) && I->getOperand(0)->getType() == Ty) + return true; + + // We can't extend or shrink something that has multiple uses: doing so would + // require duplicating the instruction in general, which isn't profitable. + if (!I->hasOneUse()) return false; + + switch (I->getOpcode()) { + case Instruction::SExt: // sext(sext(x)) -> sext(x) + case Instruction::ZExt: // sext(zext(x)) -> zext(x) + case Instruction::Trunc: // sext(trunc(x)) -> trunc(x) or sext(x) + return true; + case Instruction::And: + case Instruction::Or: + case Instruction::Xor: + case Instruction::Add: + case Instruction::Sub: + case Instruction::Mul: + // These operators can all arbitrarily be extended if their inputs can. + return CanEvaluateSExtd(I->getOperand(0), Ty) && + CanEvaluateSExtd(I->getOperand(1), Ty); + + //case Instruction::Shl: TODO + //case Instruction::LShr: TODO + + case Instruction::Select: + return CanEvaluateSExtd(I->getOperand(1), Ty) && + CanEvaluateSExtd(I->getOperand(2), Ty); + + case Instruction::PHI: { + // We can change a phi if we can change all operands. Note that we never + // get into trouble with cyclic PHIs here because we only consider + // instructions with a single use. + PHINode *PN = cast<PHINode>(I); + for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) + if (!CanEvaluateSExtd(PN->getIncomingValue(i), Ty)) return false; + return true; + } + default: + // TODO: Can handle more cases here. + break; + } + + return false; +} + +Instruction *InstCombiner::visitSExt(SExtInst &CI) { + // If this sign extend is only used by a truncate, let the truncate be + // eliminated before we try to optimize this sext. + if (CI.hasOneUse() && isa<TruncInst>(CI.use_back())) + return 0; + + if (Instruction *I = commonCastTransforms(CI)) + return I; + + // See if we can simplify any instructions used by the input whose sole + // purpose is to compute bits we don't care about. + if (SimplifyDemandedInstructionBits(CI)) + return &CI; + + Value *Src = CI.getOperand(0); + Type *SrcTy = Src->getType(), *DestTy = CI.getType(); + + // Attempt to extend the entire input expression tree to the destination + // type. Only do this if the dest type is a simple type, don't convert the + // expression tree to something weird like i93 unless the source is also + // strange. + if ((DestTy->isVectorTy() || ShouldChangeType(SrcTy, DestTy)) && + CanEvaluateSExtd(Src, DestTy)) { + // Okay, we can transform this! Insert the new expression now. + DEBUG(dbgs() << "ICE: EvaluateInDifferentType converting expression type" + " to avoid sign extend: " << CI); + Value *Res = EvaluateInDifferentType(Src, DestTy, true); + assert(Res->getType() == DestTy); + + uint32_t SrcBitSize = SrcTy->getScalarSizeInBits(); + uint32_t DestBitSize = DestTy->getScalarSizeInBits(); + + // If the high bits are already filled with sign bit, just replace this + // cast with the result. + if (ComputeNumSignBits(Res) > DestBitSize - SrcBitSize) + return ReplaceInstUsesWith(CI, Res); + + // We need to emit a shl + ashr to do the sign extend. + Value *ShAmt = ConstantInt::get(DestTy, DestBitSize-SrcBitSize); + return BinaryOperator::CreateAShr(Builder->CreateShl(Res, ShAmt, "sext"), + ShAmt); + } + + // If this input is a trunc from our destination, then turn sext(trunc(x)) + // into shifts. + if (TruncInst *TI = dyn_cast<TruncInst>(Src)) + if (TI->hasOneUse() && TI->getOperand(0)->getType() == DestTy) { + uint32_t SrcBitSize = SrcTy->getScalarSizeInBits(); + uint32_t DestBitSize = DestTy->getScalarSizeInBits(); + + // We need to emit a shl + ashr to do the sign extend. + Value *ShAmt = ConstantInt::get(DestTy, DestBitSize-SrcBitSize); + Value *Res = Builder->CreateShl(TI->getOperand(0), ShAmt, "sext"); + return BinaryOperator::CreateAShr(Res, ShAmt); + } + + if (ICmpInst *ICI = dyn_cast<ICmpInst>(Src)) + return transformSExtICmp(ICI, CI); + + // If the input is a shl/ashr pair of a same constant, then this is a sign + // extension from a smaller value. If we could trust arbitrary bitwidth + // integers, we could turn this into a truncate to the smaller bit and then + // use a sext for the whole extension. Since we don't, look deeper and check + // for a truncate. If the source and dest are the same type, eliminate the + // trunc and extend and just do shifts. For example, turn: + // %a = trunc i32 %i to i8 + // %b = shl i8 %a, 6 + // %c = ashr i8 %b, 6 + // %d = sext i8 %c to i32 + // into: + // %a = shl i32 %i, 30 + // %d = ashr i32 %a, 30 + Value *A = 0; + // TODO: Eventually this could be subsumed by EvaluateInDifferentType. + ConstantInt *BA = 0, *CA = 0; + if (match(Src, m_AShr(m_Shl(m_Trunc(m_Value(A)), m_ConstantInt(BA)), + m_ConstantInt(CA))) && + BA == CA && A->getType() == CI.getType()) { + unsigned MidSize = Src->getType()->getScalarSizeInBits(); + unsigned SrcDstSize = CI.getType()->getScalarSizeInBits(); + unsigned ShAmt = CA->getZExtValue()+SrcDstSize-MidSize; + Constant *ShAmtV = ConstantInt::get(CI.getType(), ShAmt); + A = Builder->CreateShl(A, ShAmtV, CI.getName()); + return BinaryOperator::CreateAShr(A, ShAmtV); + } + + return 0; +} + + +/// FitsInFPType - Return a Constant* for the specified FP constant if it fits +/// in the specified FP type without changing its value. +static Constant *FitsInFPType(ConstantFP *CFP, const fltSemantics &Sem) { + bool losesInfo; + APFloat F = CFP->getValueAPF(); + (void)F.convert(Sem, APFloat::rmNearestTiesToEven, &losesInfo); + if (!losesInfo) + return ConstantFP::get(CFP->getContext(), F); + return 0; +} + +/// LookThroughFPExtensions - If this is an fp extension instruction, look +/// through it until we get the source value. +static Value *LookThroughFPExtensions(Value *V) { + if (Instruction *I = dyn_cast<Instruction>(V)) + if (I->getOpcode() == Instruction::FPExt) + return LookThroughFPExtensions(I->getOperand(0)); + + // If this value is a constant, return the constant in the smallest FP type + // that can accurately represent it. This allows us to turn + // (float)((double)X+2.0) into x+2.0f. + if (ConstantFP *CFP = dyn_cast<ConstantFP>(V)) { + if (CFP->getType() == Type::getPPC_FP128Ty(V->getContext())) + return V; // No constant folding of this. + // See if the value can be truncated to half and then reextended. + if (Value *V = FitsInFPType(CFP, APFloat::IEEEhalf)) + return V; + // See if the value can be truncated to float and then reextended. + if (Value *V = FitsInFPType(CFP, APFloat::IEEEsingle)) + return V; + if (CFP->getType()->isDoubleTy()) + return V; // Won't shrink. + if (Value *V = FitsInFPType(CFP, APFloat::IEEEdouble)) + return V; + // Don't try to shrink to various long double types. + } + + return V; +} + +Instruction *InstCombiner::visitFPTrunc(FPTruncInst &CI) { + if (Instruction *I = commonCastTransforms(CI)) + return I; + + // If we have fptrunc(fadd (fpextend x), (fpextend y)), where x and y are + // smaller than the destination type, we can eliminate the truncate by doing + // the add as the smaller type. This applies to fadd/fsub/fmul/fdiv as well + // as many builtins (sqrt, etc). + BinaryOperator *OpI = dyn_cast<BinaryOperator>(CI.getOperand(0)); + if (OpI && OpI->hasOneUse()) { + switch (OpI->getOpcode()) { + default: break; + case Instruction::FAdd: + case Instruction::FSub: + case Instruction::FMul: + case Instruction::FDiv: + case Instruction::FRem: + Type *SrcTy = OpI->getType(); + Value *LHSTrunc = LookThroughFPExtensions(OpI->getOperand(0)); + Value *RHSTrunc = LookThroughFPExtensions(OpI->getOperand(1)); + if (LHSTrunc->getType() != SrcTy && + RHSTrunc->getType() != SrcTy) { + unsigned DstSize = CI.getType()->getScalarSizeInBits(); + // If the source types were both smaller than the destination type of + // the cast, do this xform. + if (LHSTrunc->getType()->getScalarSizeInBits() <= DstSize && + RHSTrunc->getType()->getScalarSizeInBits() <= DstSize) { + LHSTrunc = Builder->CreateFPExt(LHSTrunc, CI.getType()); + RHSTrunc = Builder->CreateFPExt(RHSTrunc, CI.getType()); + return BinaryOperator::Create(OpI->getOpcode(), LHSTrunc, RHSTrunc); + } + } + break; + } + + // (fptrunc (fneg x)) -> (fneg (fptrunc x)) + if (BinaryOperator::isFNeg(OpI)) { + Value *InnerTrunc = Builder->CreateFPTrunc(OpI->getOperand(1), + CI.getType()); + return BinaryOperator::CreateFNeg(InnerTrunc); + } + } + + // (fptrunc (select cond, R1, Cst)) --> + // (select cond, (fptrunc R1), (fptrunc Cst)) + SelectInst *SI = dyn_cast<SelectInst>(CI.getOperand(0)); + if (SI && + (isa<ConstantFP>(SI->getOperand(1)) || + isa<ConstantFP>(SI->getOperand(2)))) { + Value *LHSTrunc = Builder->CreateFPTrunc(SI->getOperand(1), + CI.getType()); + Value *RHSTrunc = Builder->CreateFPTrunc(SI->getOperand(2), + CI.getType()); + return SelectInst::Create(SI->getOperand(0), LHSTrunc, RHSTrunc); + } + + IntrinsicInst *II = dyn_cast<IntrinsicInst>(CI.getOperand(0)); + if (II) { + switch (II->getIntrinsicID()) { + default: break; + case Intrinsic::fabs: { + // (fptrunc (fabs x)) -> (fabs (fptrunc x)) + Value *InnerTrunc = Builder->CreateFPTrunc(II->getArgOperand(0), + CI.getType()); + Type *IntrinsicType[] = { CI.getType() }; + Function *Overload = + Intrinsic::getDeclaration(CI.getParent()->getParent()->getParent(), + II->getIntrinsicID(), IntrinsicType); + + Value *Args[] = { InnerTrunc }; + return CallInst::Create(Overload, Args, II->getName()); + } + } + } + + // Fold (fptrunc (sqrt (fpext x))) -> (sqrtf x) + // Note that we restrict this transformation based on + // TLI->has(LibFunc::sqrtf), even for the sqrt intrinsic, because + // TLI->has(LibFunc::sqrtf) is sufficient to guarantee that the + // single-precision intrinsic can be expanded in the backend. + CallInst *Call = dyn_cast<CallInst>(CI.getOperand(0)); + if (Call && Call->getCalledFunction() && TLI->has(LibFunc::sqrtf) && + (Call->getCalledFunction()->getName() == TLI->getName(LibFunc::sqrt) || + Call->getCalledFunction()->getIntrinsicID() == Intrinsic::sqrt) && + Call->getNumArgOperands() == 1 && + Call->hasOneUse()) { + CastInst *Arg = dyn_cast<CastInst>(Call->getArgOperand(0)); + if (Arg && Arg->getOpcode() == Instruction::FPExt && + CI.getType()->isFloatTy() && + Call->getType()->isDoubleTy() && + Arg->getType()->isDoubleTy() && + Arg->getOperand(0)->getType()->isFloatTy()) { + Function *Callee = Call->getCalledFunction(); + Module *M = CI.getParent()->getParent()->getParent(); + Constant *SqrtfFunc = (Callee->getIntrinsicID() == Intrinsic::sqrt) ? + Intrinsic::getDeclaration(M, Intrinsic::sqrt, Builder->getFloatTy()) : + M->getOrInsertFunction("sqrtf", Callee->getAttributes(), + Builder->getFloatTy(), Builder->getFloatTy(), + NULL); + CallInst *ret = CallInst::Create(SqrtfFunc, Arg->getOperand(0), + "sqrtfcall"); + ret->setAttributes(Callee->getAttributes()); + + + // Remove the old Call. With -fmath-errno, it won't get marked readnone. + ReplaceInstUsesWith(*Call, UndefValue::get(Call->getType())); + EraseInstFromFunction(*Call); + return ret; + } + } + + return 0; +} + +Instruction *InstCombiner::visitFPExt(CastInst &CI) { + return commonCastTransforms(CI); +} + +Instruction *InstCombiner::visitFPToUI(FPToUIInst &FI) { + Instruction *OpI = dyn_cast<Instruction>(FI.getOperand(0)); + if (OpI == 0) + return commonCastTransforms(FI); + + // fptoui(uitofp(X)) --> X + // fptoui(sitofp(X)) --> X + // This is safe if the intermediate type has enough bits in its mantissa to + // accurately represent all values of X. For example, do not do this with + // i64->float->i64. This is also safe for sitofp case, because any negative + // 'X' value would cause an undefined result for the fptoui. + if ((isa<UIToFPInst>(OpI) || isa<SIToFPInst>(OpI)) && + OpI->getOperand(0)->getType() == FI.getType() && + (int)FI.getType()->getScalarSizeInBits() < /*extra bit for sign */ + OpI->getType()->getFPMantissaWidth()) + return ReplaceInstUsesWith(FI, OpI->getOperand(0)); + + return commonCastTransforms(FI); +} + +Instruction *InstCombiner::visitFPToSI(FPToSIInst &FI) { + Instruction *OpI = dyn_cast<Instruction>(FI.getOperand(0)); + if (OpI == 0) + return commonCastTransforms(FI); + + // fptosi(sitofp(X)) --> X + // fptosi(uitofp(X)) --> X + // This is safe if the intermediate type has enough bits in its mantissa to + // accurately represent all values of X. For example, do not do this with + // i64->float->i64. This is also safe for sitofp case, because any negative + // 'X' value would cause an undefined result for the fptoui. + if ((isa<UIToFPInst>(OpI) || isa<SIToFPInst>(OpI)) && + OpI->getOperand(0)->getType() == FI.getType() && + (int)FI.getType()->getScalarSizeInBits() <= + OpI->getType()->getFPMantissaWidth()) + return ReplaceInstUsesWith(FI, OpI->getOperand(0)); + + return commonCastTransforms(FI); +} + +Instruction *InstCombiner::visitUIToFP(CastInst &CI) { + return commonCastTransforms(CI); +} + +Instruction *InstCombiner::visitSIToFP(CastInst &CI) { + return commonCastTransforms(CI); +} + +Instruction *InstCombiner::visitIntToPtr(IntToPtrInst &CI) { + // If the source integer type is not the intptr_t type for this target, do a + // trunc or zext to the intptr_t type, then inttoptr of it. This allows the + // cast to be exposed to other transforms. + + if (TD) { + unsigned AS = CI.getAddressSpace(); + if (CI.getOperand(0)->getType()->getScalarSizeInBits() != + TD->getPointerSizeInBits(AS)) { + Type *Ty = TD->getIntPtrType(CI.getContext(), AS); + if (CI.getType()->isVectorTy()) // Handle vectors of pointers. + Ty = VectorType::get(Ty, CI.getType()->getVectorNumElements()); + + Value *P = Builder->CreateZExtOrTrunc(CI.getOperand(0), Ty); + return new IntToPtrInst(P, CI.getType()); + } + } + + if (Instruction *I = commonCastTransforms(CI)) + return I; + + return 0; +} + +/// @brief Implement the transforms for cast of pointer (bitcast/ptrtoint) +Instruction *InstCombiner::commonPointerCastTransforms(CastInst &CI) { + Value *Src = CI.getOperand(0); + + if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Src)) { + // If casting the result of a getelementptr instruction with no offset, turn + // this into a cast of the original pointer! + if (GEP->hasAllZeroIndices()) { + // Changing the cast operand is usually not a good idea but it is safe + // here because the pointer operand is being replaced with another + // pointer operand so the opcode doesn't need to change. + Worklist.Add(GEP); + CI.setOperand(0, GEP->getOperand(0)); + return &CI; + } + + if (!TD) + return commonCastTransforms(CI); + + // If the GEP has a single use, and the base pointer is a bitcast, and the + // GEP computes a constant offset, see if we can convert these three + // instructions into fewer. This typically happens with unions and other + // non-type-safe code. + unsigned AS = GEP->getPointerAddressSpace(); + unsigned OffsetBits = TD->getPointerSizeInBits(AS); + APInt Offset(OffsetBits, 0); + BitCastInst *BCI = dyn_cast<BitCastInst>(GEP->getOperand(0)); + if (GEP->hasOneUse() && + BCI && + GEP->accumulateConstantOffset(*TD, Offset)) { + // Get the base pointer input of the bitcast, and the type it points to. + Value *OrigBase = BCI->getOperand(0); + SmallVector<Value*, 8> NewIndices; + if (FindElementAtOffset(OrigBase->getType(), + Offset.getSExtValue(), + NewIndices)) { + // If we were able to index down into an element, create the GEP + // and bitcast the result. This eliminates one bitcast, potentially + // two. + Value *NGEP = cast<GEPOperator>(GEP)->isInBounds() ? + Builder->CreateInBoundsGEP(OrigBase, NewIndices) : + Builder->CreateGEP(OrigBase, NewIndices); + NGEP->takeName(GEP); + + if (isa<BitCastInst>(CI)) + return new BitCastInst(NGEP, CI.getType()); + assert(isa<PtrToIntInst>(CI)); + return new PtrToIntInst(NGEP, CI.getType()); + } + } + } + + return commonCastTransforms(CI); +} + +Instruction *InstCombiner::visitPtrToInt(PtrToIntInst &CI) { + // If the destination integer type is not the intptr_t type for this target, + // do a ptrtoint to intptr_t then do a trunc or zext. This allows the cast + // to be exposed to other transforms. + + if (!TD) + return commonPointerCastTransforms(CI); + + Type *Ty = CI.getType(); + unsigned AS = CI.getPointerAddressSpace(); + + if (Ty->getScalarSizeInBits() == TD->getPointerSizeInBits(AS)) + return commonPointerCastTransforms(CI); + + Type *PtrTy = TD->getIntPtrType(CI.getContext(), AS); + if (Ty->isVectorTy()) // Handle vectors of pointers. + PtrTy = VectorType::get(PtrTy, Ty->getVectorNumElements()); + + Value *P = Builder->CreatePtrToInt(CI.getOperand(0), PtrTy); + return CastInst::CreateIntegerCast(P, Ty, /*isSigned=*/false); +} + +/// OptimizeVectorResize - This input value (which is known to have vector type) +/// is being zero extended or truncated to the specified vector type. Try to +/// replace it with a shuffle (and vector/vector bitcast) if possible. +/// +/// The source and destination vector types may have different element types. +static Instruction *OptimizeVectorResize(Value *InVal, VectorType *DestTy, + InstCombiner &IC) { + // We can only do this optimization if the output is a multiple of the input + // element size, or the input is a multiple of the output element size. + // Convert the input type to have the same element type as the output. + VectorType *SrcTy = cast<VectorType>(InVal->getType()); + + if (SrcTy->getElementType() != DestTy->getElementType()) { + // The input types don't need to be identical, but for now they must be the + // same size. There is no specific reason we couldn't handle things like + // <4 x i16> -> <4 x i32> by bitcasting to <2 x i32> but haven't gotten + // there yet. + if (SrcTy->getElementType()->getPrimitiveSizeInBits() != + DestTy->getElementType()->getPrimitiveSizeInBits()) + return 0; + + SrcTy = VectorType::get(DestTy->getElementType(), SrcTy->getNumElements()); + InVal = IC.Builder->CreateBitCast(InVal, SrcTy); + } + + // Now that the element types match, get the shuffle mask and RHS of the + // shuffle to use, which depends on whether we're increasing or decreasing the + // size of the input. + SmallVector<uint32_t, 16> ShuffleMask; + Value *V2; + + if (SrcTy->getNumElements() > DestTy->getNumElements()) { + // If we're shrinking the number of elements, just shuffle in the low + // elements from the input and use undef as the second shuffle input. + V2 = UndefValue::get(SrcTy); + for (unsigned i = 0, e = DestTy->getNumElements(); i != e; ++i) + ShuffleMask.push_back(i); + + } else { + // If we're increasing the number of elements, shuffle in all of the + // elements from InVal and fill the rest of the result elements with zeros + // from a constant zero. + V2 = Constant::getNullValue(SrcTy); + unsigned SrcElts = SrcTy->getNumElements(); + for (unsigned i = 0, e = SrcElts; i != e; ++i) + ShuffleMask.push_back(i); + + // The excess elements reference the first element of the zero input. + for (unsigned i = 0, e = DestTy->getNumElements()-SrcElts; i != e; ++i) + ShuffleMask.push_back(SrcElts); + } + + return new ShuffleVectorInst(InVal, V2, + ConstantDataVector::get(V2->getContext(), + ShuffleMask)); +} + +static bool isMultipleOfTypeSize(unsigned Value, Type *Ty) { + return Value % Ty->getPrimitiveSizeInBits() == 0; +} + +static unsigned getTypeSizeIndex(unsigned Value, Type *Ty) { + return Value / Ty->getPrimitiveSizeInBits(); +} + +/// CollectInsertionElements - V is a value which is inserted into a vector of +/// VecEltTy. Look through the value to see if we can decompose it into +/// insertions into the vector. See the example in the comment for +/// OptimizeIntegerToVectorInsertions for the pattern this handles. +/// The type of V is always a non-zero multiple of VecEltTy's size. +/// Shift is the number of bits between the lsb of V and the lsb of +/// the vector. +/// +/// This returns false if the pattern can't be matched or true if it can, +/// filling in Elements with the elements found here. +static bool CollectInsertionElements(Value *V, unsigned Shift, + SmallVectorImpl<Value*> &Elements, + Type *VecEltTy, InstCombiner &IC) { + assert(isMultipleOfTypeSize(Shift, VecEltTy) && + "Shift should be a multiple of the element type size"); + + // Undef values never contribute useful bits to the result. + if (isa<UndefValue>(V)) return true; + + // If we got down to a value of the right type, we win, try inserting into the + // right element. + if (V->getType() == VecEltTy) { + // Inserting null doesn't actually insert any elements. + if (Constant *C = dyn_cast<Constant>(V)) + if (C->isNullValue()) + return true; + + unsigned ElementIndex = getTypeSizeIndex(Shift, VecEltTy); + if (IC.getDataLayout()->isBigEndian()) + ElementIndex = Elements.size() - ElementIndex - 1; + + // Fail if multiple elements are inserted into this slot. + if (Elements[ElementIndex] != 0) + return false; + + Elements[ElementIndex] = V; + return true; + } + + if (Constant *C = dyn_cast<Constant>(V)) { + // Figure out the # elements this provides, and bitcast it or slice it up + // as required. + unsigned NumElts = getTypeSizeIndex(C->getType()->getPrimitiveSizeInBits(), + VecEltTy); + // If the constant is the size of a vector element, we just need to bitcast + // it to the right type so it gets properly inserted. + if (NumElts == 1) + return CollectInsertionElements(ConstantExpr::getBitCast(C, VecEltTy), + Shift, Elements, VecEltTy, IC); + + // Okay, this is a constant that covers multiple elements. Slice it up into + // pieces and insert each element-sized piece into the vector. + if (!isa<IntegerType>(C->getType())) + C = ConstantExpr::getBitCast(C, IntegerType::get(V->getContext(), + C->getType()->getPrimitiveSizeInBits())); + unsigned ElementSize = VecEltTy->getPrimitiveSizeInBits(); + Type *ElementIntTy = IntegerType::get(C->getContext(), ElementSize); + + for (unsigned i = 0; i != NumElts; ++i) { + unsigned ShiftI = Shift+i*ElementSize; + Constant *Piece = ConstantExpr::getLShr(C, ConstantInt::get(C->getType(), + ShiftI)); + Piece = ConstantExpr::getTrunc(Piece, ElementIntTy); + if (!CollectInsertionElements(Piece, ShiftI, Elements, VecEltTy, IC)) + return false; + } + return true; + } + + if (!V->hasOneUse()) return false; + + Instruction *I = dyn_cast<Instruction>(V); + if (I == 0) return false; + switch (I->getOpcode()) { + default: return false; // Unhandled case. + case Instruction::BitCast: + return CollectInsertionElements(I->getOperand(0), Shift, + Elements, VecEltTy, IC); + case Instruction::ZExt: + if (!isMultipleOfTypeSize( + I->getOperand(0)->getType()->getPrimitiveSizeInBits(), + VecEltTy)) + return false; + return CollectInsertionElements(I->getOperand(0), Shift, + Elements, VecEltTy, IC); + case Instruction::Or: + return CollectInsertionElements(I->getOperand(0), Shift, + Elements, VecEltTy, IC) && + CollectInsertionElements(I->getOperand(1), Shift, + Elements, VecEltTy, IC); + case Instruction::Shl: { + // Must be shifting by a constant that is a multiple of the element size. + ConstantInt *CI = dyn_cast<ConstantInt>(I->getOperand(1)); + if (CI == 0) return false; + Shift += CI->getZExtValue(); + if (!isMultipleOfTypeSize(Shift, VecEltTy)) return false; + return CollectInsertionElements(I->getOperand(0), Shift, + Elements, VecEltTy, IC); + } + + } +} + + +/// OptimizeIntegerToVectorInsertions - If the input is an 'or' instruction, we +/// may be doing shifts and ors to assemble the elements of the vector manually. +/// Try to rip the code out and replace it with insertelements. This is to +/// optimize code like this: +/// +/// %tmp37 = bitcast float %inc to i32 +/// %tmp38 = zext i32 %tmp37 to i64 +/// %tmp31 = bitcast float %inc5 to i32 +/// %tmp32 = zext i32 %tmp31 to i64 +/// %tmp33 = shl i64 %tmp32, 32 +/// %ins35 = or i64 %tmp33, %tmp38 +/// %tmp43 = bitcast i64 %ins35 to <2 x float> +/// +/// Into two insertelements that do "buildvector{%inc, %inc5}". +static Value *OptimizeIntegerToVectorInsertions(BitCastInst &CI, + InstCombiner &IC) { + // We need to know the target byte order to perform this optimization. + if (!IC.getDataLayout()) return 0; + + VectorType *DestVecTy = cast<VectorType>(CI.getType()); + Value *IntInput = CI.getOperand(0); + + SmallVector<Value*, 8> Elements(DestVecTy->getNumElements()); + if (!CollectInsertionElements(IntInput, 0, Elements, + DestVecTy->getElementType(), IC)) + return 0; + + // If we succeeded, we know that all of the element are specified by Elements + // or are zero if Elements has a null entry. Recast this as a set of + // insertions. + Value *Result = Constant::getNullValue(CI.getType()); + for (unsigned i = 0, e = Elements.size(); i != e; ++i) { + if (Elements[i] == 0) continue; // Unset element. + + Result = IC.Builder->CreateInsertElement(Result, Elements[i], + IC.Builder->getInt32(i)); + } + + return Result; +} + + +/// OptimizeIntToFloatBitCast - See if we can optimize an integer->float/double +/// bitcast. The various long double bitcasts can't get in here. +static Instruction *OptimizeIntToFloatBitCast(BitCastInst &CI,InstCombiner &IC){ + // We need to know the target byte order to perform this optimization. + if (!IC.getDataLayout()) return 0; + + Value *Src = CI.getOperand(0); + Type *DestTy = CI.getType(); + + // If this is a bitcast from int to float, check to see if the int is an + // extraction from a vector. + Value *VecInput = 0; + // bitcast(trunc(bitcast(somevector))) + if (match(Src, m_Trunc(m_BitCast(m_Value(VecInput)))) && + isa<VectorType>(VecInput->getType())) { + VectorType *VecTy = cast<VectorType>(VecInput->getType()); + unsigned DestWidth = DestTy->getPrimitiveSizeInBits(); + + if (VecTy->getPrimitiveSizeInBits() % DestWidth == 0) { + // If the element type of the vector doesn't match the result type, + // bitcast it to be a vector type we can extract from. + if (VecTy->getElementType() != DestTy) { + VecTy = VectorType::get(DestTy, + VecTy->getPrimitiveSizeInBits() / DestWidth); + VecInput = IC.Builder->CreateBitCast(VecInput, VecTy); + } + + unsigned Elt = 0; + if (IC.getDataLayout()->isBigEndian()) + Elt = VecTy->getPrimitiveSizeInBits() / DestWidth - 1; + return ExtractElementInst::Create(VecInput, IC.Builder->getInt32(Elt)); + } + } + + // bitcast(trunc(lshr(bitcast(somevector), cst)) + ConstantInt *ShAmt = 0; + if (match(Src, m_Trunc(m_LShr(m_BitCast(m_Value(VecInput)), + m_ConstantInt(ShAmt)))) && + isa<VectorType>(VecInput->getType())) { + VectorType *VecTy = cast<VectorType>(VecInput->getType()); + unsigned DestWidth = DestTy->getPrimitiveSizeInBits(); + if (VecTy->getPrimitiveSizeInBits() % DestWidth == 0 && + ShAmt->getZExtValue() % DestWidth == 0) { + // If the element type of the vector doesn't match the result type, + // bitcast it to be a vector type we can extract from. + if (VecTy->getElementType() != DestTy) { + VecTy = VectorType::get(DestTy, + VecTy->getPrimitiveSizeInBits() / DestWidth); + VecInput = IC.Builder->CreateBitCast(VecInput, VecTy); + } + + unsigned Elt = ShAmt->getZExtValue() / DestWidth; + if (IC.getDataLayout()->isBigEndian()) + Elt = VecTy->getPrimitiveSizeInBits() / DestWidth - 1 - Elt; + return ExtractElementInst::Create(VecInput, IC.Builder->getInt32(Elt)); + } + } + return 0; +} + +Instruction *InstCombiner::visitBitCast(BitCastInst &CI) { + // If the operands are integer typed then apply the integer transforms, + // otherwise just apply the common ones. + Value *Src = CI.getOperand(0); + Type *SrcTy = Src->getType(); + Type *DestTy = CI.getType(); + + // Get rid of casts from one type to the same type. These are useless and can + // be replaced by the operand. + if (DestTy == Src->getType()) + return ReplaceInstUsesWith(CI, Src); + + if (PointerType *DstPTy = dyn_cast<PointerType>(DestTy)) { + PointerType *SrcPTy = cast<PointerType>(SrcTy); + Type *DstElTy = DstPTy->getElementType(); + Type *SrcElTy = SrcPTy->getElementType(); + + // If the address spaces don't match, don't eliminate the bitcast, which is + // required for changing types. + if (SrcPTy->getAddressSpace() != DstPTy->getAddressSpace()) + return 0; + + // If we are casting a alloca to a pointer to a type of the same + // size, rewrite the allocation instruction to allocate the "right" type. + // There is no need to modify malloc calls because it is their bitcast that + // needs to be cleaned up. + if (AllocaInst *AI = dyn_cast<AllocaInst>(Src)) + if (Instruction *V = PromoteCastOfAllocation(CI, *AI)) + return V; + + // If the source and destination are pointers, and this cast is equivalent + // to a getelementptr X, 0, 0, 0... turn it into the appropriate gep. + // This can enhance SROA and other transforms that want type-safe pointers. + Constant *ZeroUInt = + Constant::getNullValue(Type::getInt32Ty(CI.getContext())); + unsigned NumZeros = 0; + while (SrcElTy != DstElTy && + isa<CompositeType>(SrcElTy) && !SrcElTy->isPointerTy() && + SrcElTy->getNumContainedTypes() /* not "{}" */) { + SrcElTy = cast<CompositeType>(SrcElTy)->getTypeAtIndex(ZeroUInt); + ++NumZeros; + } + + // If we found a path from the src to dest, create the getelementptr now. + if (SrcElTy == DstElTy) { + SmallVector<Value*, 8> Idxs(NumZeros+1, ZeroUInt); + return GetElementPtrInst::CreateInBounds(Src, Idxs); + } + } + + // Try to optimize int -> float bitcasts. + if ((DestTy->isFloatTy() || DestTy->isDoubleTy()) && isa<IntegerType>(SrcTy)) + if (Instruction *I = OptimizeIntToFloatBitCast(CI, *this)) + return I; + + if (VectorType *DestVTy = dyn_cast<VectorType>(DestTy)) { + if (DestVTy->getNumElements() == 1 && !SrcTy->isVectorTy()) { + Value *Elem = Builder->CreateBitCast(Src, DestVTy->getElementType()); + return InsertElementInst::Create(UndefValue::get(DestTy), Elem, + Constant::getNullValue(Type::getInt32Ty(CI.getContext()))); + // FIXME: Canonicalize bitcast(insertelement) -> insertelement(bitcast) + } + + if (isa<IntegerType>(SrcTy)) { + // If this is a cast from an integer to vector, check to see if the input + // is a trunc or zext of a bitcast from vector. If so, we can replace all + // the casts with a shuffle and (potentially) a bitcast. + if (isa<TruncInst>(Src) || isa<ZExtInst>(Src)) { + CastInst *SrcCast = cast<CastInst>(Src); + if (BitCastInst *BCIn = dyn_cast<BitCastInst>(SrcCast->getOperand(0))) + if (isa<VectorType>(BCIn->getOperand(0)->getType())) + if (Instruction *I = OptimizeVectorResize(BCIn->getOperand(0), + cast<VectorType>(DestTy), *this)) + return I; + } + + // If the input is an 'or' instruction, we may be doing shifts and ors to + // assemble the elements of the vector manually. Try to rip the code out + // and replace it with insertelements. + if (Value *V = OptimizeIntegerToVectorInsertions(CI, *this)) + return ReplaceInstUsesWith(CI, V); + } + } + + if (VectorType *SrcVTy = dyn_cast<VectorType>(SrcTy)) { + if (SrcVTy->getNumElements() == 1) { + // If our destination is not a vector, then make this a straight + // scalar-scalar cast. + if (!DestTy->isVectorTy()) { + Value *Elem = + Builder->CreateExtractElement(Src, + Constant::getNullValue(Type::getInt32Ty(CI.getContext()))); + return CastInst::Create(Instruction::BitCast, Elem, DestTy); + } + + // Otherwise, see if our source is an insert. If so, then use the scalar + // component directly. + if (InsertElementInst *IEI = + dyn_cast<InsertElementInst>(CI.getOperand(0))) + return CastInst::Create(Instruction::BitCast, IEI->getOperand(1), + DestTy); + } + } + + if (ShuffleVectorInst *SVI = dyn_cast<ShuffleVectorInst>(Src)) { + // Okay, we have (bitcast (shuffle ..)). Check to see if this is + // a bitcast to a vector with the same # elts. + if (SVI->hasOneUse() && DestTy->isVectorTy() && + DestTy->getVectorNumElements() == SVI->getType()->getNumElements() && + SVI->getType()->getNumElements() == + SVI->getOperand(0)->getType()->getVectorNumElements()) { + BitCastInst *Tmp; + // If either of the operands is a cast from CI.getType(), then + // evaluating the shuffle in the casted destination's type will allow + // us to eliminate at least one cast. + if (((Tmp = dyn_cast<BitCastInst>(SVI->getOperand(0))) && + Tmp->getOperand(0)->getType() == DestTy) || + ((Tmp = dyn_cast<BitCastInst>(SVI->getOperand(1))) && + Tmp->getOperand(0)->getType() == DestTy)) { + Value *LHS = Builder->CreateBitCast(SVI->getOperand(0), DestTy); + Value *RHS = Builder->CreateBitCast(SVI->getOperand(1), DestTy); + // Return a new shuffle vector. Use the same element ID's, as we + // know the vector types match #elts. + return new ShuffleVectorInst(LHS, RHS, SVI->getOperand(2)); + } + } + } + + if (SrcTy->isPointerTy()) + return commonPointerCastTransforms(CI); + return commonCastTransforms(CI); +} + +Instruction *InstCombiner::visitAddrSpaceCast(AddrSpaceCastInst &CI) { + return commonCastTransforms(CI); +} diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp new file mode 100644 index 000000000000..9bb65efbd619 --- /dev/null +++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp @@ -0,0 +1,3413 @@ +//===- InstCombineCompares.cpp --------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the visitICmp and visitFCmp functions. +// +//===----------------------------------------------------------------------===// + +#include "InstCombine.h" +#include "llvm/Analysis/ConstantFolding.h" +#include "llvm/Analysis/InstructionSimplify.h" +#include "llvm/Analysis/MemoryBuiltins.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/Support/ConstantRange.h" +#include "llvm/Support/GetElementPtrTypeIterator.h" +#include "llvm/Support/PatternMatch.h" +#include "llvm/Target/TargetLibraryInfo.h" +using namespace llvm; +using namespace PatternMatch; + +static ConstantInt *getOne(Constant *C) { + return ConstantInt::get(cast<IntegerType>(C->getType()), 1); +} + +/// AddOne - Add one to a ConstantInt +static Constant *AddOne(Constant *C) { + return ConstantExpr::getAdd(C, ConstantInt::get(C->getType(), 1)); +} +/// SubOne - Subtract one from a ConstantInt +static Constant *SubOne(Constant *C) { + return ConstantExpr::getSub(C, ConstantInt::get(C->getType(), 1)); +} + +static ConstantInt *ExtractElement(Constant *V, Constant *Idx) { + return cast<ConstantInt>(ConstantExpr::getExtractElement(V, Idx)); +} + +static bool HasAddOverflow(ConstantInt *Result, + ConstantInt *In1, ConstantInt *In2, + bool IsSigned) { + if (!IsSigned) + return Result->getValue().ult(In1->getValue()); + + if (In2->isNegative()) + return Result->getValue().sgt(In1->getValue()); + return Result->getValue().slt(In1->getValue()); +} + +/// AddWithOverflow - Compute Result = In1+In2, returning true if the result +/// overflowed for this type. +static bool AddWithOverflow(Constant *&Result, Constant *In1, + Constant *In2, bool IsSigned = false) { + Result = ConstantExpr::getAdd(In1, In2); + + if (VectorType *VTy = dyn_cast<VectorType>(In1->getType())) { + for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i) { + Constant *Idx = ConstantInt::get(Type::getInt32Ty(In1->getContext()), i); + if (HasAddOverflow(ExtractElement(Result, Idx), + ExtractElement(In1, Idx), + ExtractElement(In2, Idx), + IsSigned)) + return true; + } + return false; + } + + return HasAddOverflow(cast<ConstantInt>(Result), + cast<ConstantInt>(In1), cast<ConstantInt>(In2), + IsSigned); +} + +static bool HasSubOverflow(ConstantInt *Result, + ConstantInt *In1, ConstantInt *In2, + bool IsSigned) { + if (!IsSigned) + return Result->getValue().ugt(In1->getValue()); + + if (In2->isNegative()) + return Result->getValue().slt(In1->getValue()); + + return Result->getValue().sgt(In1->getValue()); +} + +/// SubWithOverflow - Compute Result = In1-In2, returning true if the result +/// overflowed for this type. +static bool SubWithOverflow(Constant *&Result, Constant *In1, + Constant *In2, bool IsSigned = false) { + Result = ConstantExpr::getSub(In1, In2); + + if (VectorType *VTy = dyn_cast<VectorType>(In1->getType())) { + for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i) { + Constant *Idx = ConstantInt::get(Type::getInt32Ty(In1->getContext()), i); + if (HasSubOverflow(ExtractElement(Result, Idx), + ExtractElement(In1, Idx), + ExtractElement(In2, Idx), + IsSigned)) + return true; + } + return false; + } + + return HasSubOverflow(cast<ConstantInt>(Result), + cast<ConstantInt>(In1), cast<ConstantInt>(In2), + IsSigned); +} + +/// isSignBitCheck - Given an exploded icmp instruction, return true if the +/// comparison only checks the sign bit. If it only checks the sign bit, set +/// TrueIfSigned if the result of the comparison is true when the input value is +/// signed. +static bool isSignBitCheck(ICmpInst::Predicate pred, ConstantInt *RHS, + bool &TrueIfSigned) { + switch (pred) { + case ICmpInst::ICMP_SLT: // True if LHS s< 0 + TrueIfSigned = true; + return RHS->isZero(); + case ICmpInst::ICMP_SLE: // True if LHS s<= RHS and RHS == -1 + TrueIfSigned = true; + return RHS->isAllOnesValue(); + case ICmpInst::ICMP_SGT: // True if LHS s> -1 + TrueIfSigned = false; + return RHS->isAllOnesValue(); + case ICmpInst::ICMP_UGT: + // True if LHS u> RHS and RHS == high-bit-mask - 1 + TrueIfSigned = true; + return RHS->isMaxValue(true); + case ICmpInst::ICMP_UGE: + // True if LHS u>= RHS and RHS == high-bit-mask (2^7, 2^15, 2^31, etc) + TrueIfSigned = true; + return RHS->getValue().isSignBit(); + default: + return false; + } +} + +/// Returns true if the exploded icmp can be expressed as a signed comparison +/// to zero and updates the predicate accordingly. +/// The signedness of the comparison is preserved. +static bool isSignTest(ICmpInst::Predicate &pred, const ConstantInt *RHS) { + if (!ICmpInst::isSigned(pred)) + return false; + + if (RHS->isZero()) + return ICmpInst::isRelational(pred); + + if (RHS->isOne()) { + if (pred == ICmpInst::ICMP_SLT) { + pred = ICmpInst::ICMP_SLE; + return true; + } + } else if (RHS->isAllOnesValue()) { + if (pred == ICmpInst::ICMP_SGT) { + pred = ICmpInst::ICMP_SGE; + return true; + } + } + + return false; +} + +// isHighOnes - Return true if the constant is of the form 1+0+. +// This is the same as lowones(~X). +static bool isHighOnes(const ConstantInt *CI) { + return (~CI->getValue() + 1).isPowerOf2(); +} + +/// ComputeSignedMinMaxValuesFromKnownBits - Given a signed integer type and a +/// set of known zero and one bits, compute the maximum and minimum values that +/// could have the specified known zero and known one bits, returning them in +/// min/max. +static void ComputeSignedMinMaxValuesFromKnownBits(const APInt& KnownZero, + const APInt& KnownOne, + APInt& Min, APInt& Max) { + assert(KnownZero.getBitWidth() == KnownOne.getBitWidth() && + KnownZero.getBitWidth() == Min.getBitWidth() && + KnownZero.getBitWidth() == Max.getBitWidth() && + "KnownZero, KnownOne and Min, Max must have equal bitwidth."); + APInt UnknownBits = ~(KnownZero|KnownOne); + + // The minimum value is when all unknown bits are zeros, EXCEPT for the sign + // bit if it is unknown. + Min = KnownOne; + Max = KnownOne|UnknownBits; + + if (UnknownBits.isNegative()) { // Sign bit is unknown + Min.setBit(Min.getBitWidth()-1); + Max.clearBit(Max.getBitWidth()-1); + } +} + +// ComputeUnsignedMinMaxValuesFromKnownBits - Given an unsigned integer type and +// a set of known zero and one bits, compute the maximum and minimum values that +// could have the specified known zero and known one bits, returning them in +// min/max. +static void ComputeUnsignedMinMaxValuesFromKnownBits(const APInt &KnownZero, + const APInt &KnownOne, + APInt &Min, APInt &Max) { + assert(KnownZero.getBitWidth() == KnownOne.getBitWidth() && + KnownZero.getBitWidth() == Min.getBitWidth() && + KnownZero.getBitWidth() == Max.getBitWidth() && + "Ty, KnownZero, KnownOne and Min, Max must have equal bitwidth."); + APInt UnknownBits = ~(KnownZero|KnownOne); + + // The minimum value is when the unknown bits are all zeros. + Min = KnownOne; + // The maximum value is when the unknown bits are all ones. + Max = KnownOne|UnknownBits; +} + + + +/// FoldCmpLoadFromIndexedGlobal - Called we see this pattern: +/// cmp pred (load (gep GV, ...)), cmpcst +/// where GV is a global variable with a constant initializer. Try to simplify +/// this into some simple computation that does not need the load. For example +/// we can optimize "icmp eq (load (gep "foo", 0, i)), 0" into "icmp eq i, 3". +/// +/// If AndCst is non-null, then the loaded value is masked with that constant +/// before doing the comparison. This handles cases like "A[i]&4 == 0". +Instruction *InstCombiner:: +FoldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP, GlobalVariable *GV, + CmpInst &ICI, ConstantInt *AndCst) { + // We need TD information to know the pointer size unless this is inbounds. + if (!GEP->isInBounds() && TD == 0) + return 0; + + Constant *Init = GV->getInitializer(); + if (!isa<ConstantArray>(Init) && !isa<ConstantDataArray>(Init)) + return 0; + + uint64_t ArrayElementCount = Init->getType()->getArrayNumElements(); + if (ArrayElementCount > 1024) return 0; // Don't blow up on huge arrays. + + // There are many forms of this optimization we can handle, for now, just do + // the simple index into a single-dimensional array. + // + // Require: GEP GV, 0, i {{, constant indices}} + if (GEP->getNumOperands() < 3 || + !isa<ConstantInt>(GEP->getOperand(1)) || + !cast<ConstantInt>(GEP->getOperand(1))->isZero() || + isa<Constant>(GEP->getOperand(2))) + return 0; + + // Check that indices after the variable are constants and in-range for the + // type they index. Collect the indices. This is typically for arrays of + // structs. + SmallVector<unsigned, 4> LaterIndices; + + Type *EltTy = Init->getType()->getArrayElementType(); + for (unsigned i = 3, e = GEP->getNumOperands(); i != e; ++i) { + ConstantInt *Idx = dyn_cast<ConstantInt>(GEP->getOperand(i)); + if (Idx == 0) return 0; // Variable index. + + uint64_t IdxVal = Idx->getZExtValue(); + if ((unsigned)IdxVal != IdxVal) return 0; // Too large array index. + + if (StructType *STy = dyn_cast<StructType>(EltTy)) + EltTy = STy->getElementType(IdxVal); + else if (ArrayType *ATy = dyn_cast<ArrayType>(EltTy)) { + if (IdxVal >= ATy->getNumElements()) return 0; + EltTy = ATy->getElementType(); + } else { + return 0; // Unknown type. + } + + LaterIndices.push_back(IdxVal); + } + + enum { Overdefined = -3, Undefined = -2 }; + + // Variables for our state machines. + + // FirstTrueElement/SecondTrueElement - Used to emit a comparison of the form + // "i == 47 | i == 87", where 47 is the first index the condition is true for, + // and 87 is the second (and last) index. FirstTrueElement is -2 when + // undefined, otherwise set to the first true element. SecondTrueElement is + // -2 when undefined, -3 when overdefined and >= 0 when that index is true. + int FirstTrueElement = Undefined, SecondTrueElement = Undefined; + + // FirstFalseElement/SecondFalseElement - Used to emit a comparison of the + // form "i != 47 & i != 87". Same state transitions as for true elements. + int FirstFalseElement = Undefined, SecondFalseElement = Undefined; + + /// TrueRangeEnd/FalseRangeEnd - In conjunction with First*Element, these + /// define a state machine that triggers for ranges of values that the index + /// is true or false for. This triggers on things like "abbbbc"[i] == 'b'. + /// This is -2 when undefined, -3 when overdefined, and otherwise the last + /// index in the range (inclusive). We use -2 for undefined here because we + /// use relative comparisons and don't want 0-1 to match -1. + int TrueRangeEnd = Undefined, FalseRangeEnd = Undefined; + + // MagicBitvector - This is a magic bitvector where we set a bit if the + // comparison is true for element 'i'. If there are 64 elements or less in + // the array, this will fully represent all the comparison results. + uint64_t MagicBitvector = 0; + + + // Scan the array and see if one of our patterns matches. + Constant *CompareRHS = cast<Constant>(ICI.getOperand(1)); + for (unsigned i = 0, e = ArrayElementCount; i != e; ++i) { + Constant *Elt = Init->getAggregateElement(i); + if (Elt == 0) return 0; + + // If this is indexing an array of structures, get the structure element. + if (!LaterIndices.empty()) + Elt = ConstantExpr::getExtractValue(Elt, LaterIndices); + + // If the element is masked, handle it. + if (AndCst) Elt = ConstantExpr::getAnd(Elt, AndCst); + + // Find out if the comparison would be true or false for the i'th element. + Constant *C = ConstantFoldCompareInstOperands(ICI.getPredicate(), Elt, + CompareRHS, TD, TLI); + // If the result is undef for this element, ignore it. + if (isa<UndefValue>(C)) { + // Extend range state machines to cover this element in case there is an + // undef in the middle of the range. + if (TrueRangeEnd == (int)i-1) + TrueRangeEnd = i; + if (FalseRangeEnd == (int)i-1) + FalseRangeEnd = i; + continue; + } + + // If we can't compute the result for any of the elements, we have to give + // up evaluating the entire conditional. + if (!isa<ConstantInt>(C)) return 0; + + // Otherwise, we know if the comparison is true or false for this element, + // update our state machines. + bool IsTrueForElt = !cast<ConstantInt>(C)->isZero(); + + // State machine for single/double/range index comparison. + if (IsTrueForElt) { + // Update the TrueElement state machine. + if (FirstTrueElement == Undefined) + FirstTrueElement = TrueRangeEnd = i; // First true element. + else { + // Update double-compare state machine. + if (SecondTrueElement == Undefined) + SecondTrueElement = i; + else + SecondTrueElement = Overdefined; + + // Update range state machine. + if (TrueRangeEnd == (int)i-1) + TrueRangeEnd = i; + else + TrueRangeEnd = Overdefined; + } + } else { + // Update the FalseElement state machine. + if (FirstFalseElement == Undefined) + FirstFalseElement = FalseRangeEnd = i; // First false element. + else { + // Update double-compare state machine. + if (SecondFalseElement == Undefined) + SecondFalseElement = i; + else + SecondFalseElement = Overdefined; + + // Update range state machine. + if (FalseRangeEnd == (int)i-1) + FalseRangeEnd = i; + else + FalseRangeEnd = Overdefined; + } + } + + + // If this element is in range, update our magic bitvector. + if (i < 64 && IsTrueForElt) + MagicBitvector |= 1ULL << i; + + // If all of our states become overdefined, bail out early. Since the + // predicate is expensive, only check it every 8 elements. This is only + // really useful for really huge arrays. + if ((i & 8) == 0 && i >= 64 && SecondTrueElement == Overdefined && + SecondFalseElement == Overdefined && TrueRangeEnd == Overdefined && + FalseRangeEnd == Overdefined) + return 0; + } + + // Now that we've scanned the entire array, emit our new comparison(s). We + // order the state machines in complexity of the generated code. + Value *Idx = GEP->getOperand(2); + + // If the index is larger than the pointer size of the target, truncate the + // index down like the GEP would do implicitly. We don't have to do this for + // an inbounds GEP because the index can't be out of range. + if (!GEP->isInBounds()) { + Type *IntPtrTy = TD->getIntPtrType(GEP->getType()); + unsigned PtrSize = IntPtrTy->getIntegerBitWidth(); + if (Idx->getType()->getPrimitiveSizeInBits() > PtrSize) + Idx = Builder->CreateTrunc(Idx, IntPtrTy); + } + + // If the comparison is only true for one or two elements, emit direct + // comparisons. + if (SecondTrueElement != Overdefined) { + // None true -> false. + if (FirstTrueElement == Undefined) + return ReplaceInstUsesWith(ICI, Builder->getFalse()); + + Value *FirstTrueIdx = ConstantInt::get(Idx->getType(), FirstTrueElement); + + // True for one element -> 'i == 47'. + if (SecondTrueElement == Undefined) + return new ICmpInst(ICmpInst::ICMP_EQ, Idx, FirstTrueIdx); + + // True for two elements -> 'i == 47 | i == 72'. + Value *C1 = Builder->CreateICmpEQ(Idx, FirstTrueIdx); + Value *SecondTrueIdx = ConstantInt::get(Idx->getType(), SecondTrueElement); + Value *C2 = Builder->CreateICmpEQ(Idx, SecondTrueIdx); + return BinaryOperator::CreateOr(C1, C2); + } + + // If the comparison is only false for one or two elements, emit direct + // comparisons. + if (SecondFalseElement != Overdefined) { + // None false -> true. + if (FirstFalseElement == Undefined) + return ReplaceInstUsesWith(ICI, Builder->getTrue()); + + Value *FirstFalseIdx = ConstantInt::get(Idx->getType(), FirstFalseElement); + + // False for one element -> 'i != 47'. + if (SecondFalseElement == Undefined) + return new ICmpInst(ICmpInst::ICMP_NE, Idx, FirstFalseIdx); + + // False for two elements -> 'i != 47 & i != 72'. + Value *C1 = Builder->CreateICmpNE(Idx, FirstFalseIdx); + Value *SecondFalseIdx = ConstantInt::get(Idx->getType(),SecondFalseElement); + Value *C2 = Builder->CreateICmpNE(Idx, SecondFalseIdx); + return BinaryOperator::CreateAnd(C1, C2); + } + + // If the comparison can be replaced with a range comparison for the elements + // where it is true, emit the range check. + if (TrueRangeEnd != Overdefined) { + assert(TrueRangeEnd != FirstTrueElement && "Should emit single compare"); + + // Generate (i-FirstTrue) <u (TrueRangeEnd-FirstTrue+1). + if (FirstTrueElement) { + Value *Offs = ConstantInt::get(Idx->getType(), -FirstTrueElement); + Idx = Builder->CreateAdd(Idx, Offs); + } + + Value *End = ConstantInt::get(Idx->getType(), + TrueRangeEnd-FirstTrueElement+1); + return new ICmpInst(ICmpInst::ICMP_ULT, Idx, End); + } + + // False range check. + if (FalseRangeEnd != Overdefined) { + assert(FalseRangeEnd != FirstFalseElement && "Should emit single compare"); + // Generate (i-FirstFalse) >u (FalseRangeEnd-FirstFalse). + if (FirstFalseElement) { + Value *Offs = ConstantInt::get(Idx->getType(), -FirstFalseElement); + Idx = Builder->CreateAdd(Idx, Offs); + } + + Value *End = ConstantInt::get(Idx->getType(), + FalseRangeEnd-FirstFalseElement); + return new ICmpInst(ICmpInst::ICMP_UGT, Idx, End); + } + + + // If a magic bitvector captures the entire comparison state + // of this load, replace it with computation that does: + // ((magic_cst >> i) & 1) != 0 + { + Type *Ty = 0; + + // Look for an appropriate type: + // - The type of Idx if the magic fits + // - The smallest fitting legal type if we have a DataLayout + // - Default to i32 + if (ArrayElementCount <= Idx->getType()->getIntegerBitWidth()) + Ty = Idx->getType(); + else if (TD) + Ty = TD->getSmallestLegalIntType(Init->getContext(), ArrayElementCount); + else if (ArrayElementCount <= 32) + Ty = Type::getInt32Ty(Init->getContext()); + + if (Ty != 0) { + Value *V = Builder->CreateIntCast(Idx, Ty, false); + V = Builder->CreateLShr(ConstantInt::get(Ty, MagicBitvector), V); + V = Builder->CreateAnd(ConstantInt::get(Ty, 1), V); + return new ICmpInst(ICmpInst::ICMP_NE, V, ConstantInt::get(Ty, 0)); + } + } + + return 0; +} + + +/// EvaluateGEPOffsetExpression - Return a value that can be used to compare +/// the *offset* implied by a GEP to zero. For example, if we have &A[i], we +/// want to return 'i' for "icmp ne i, 0". Note that, in general, indices can +/// be complex, and scales are involved. The above expression would also be +/// legal to codegen as "icmp ne (i*4), 0" (assuming A is a pointer to i32). +/// This later form is less amenable to optimization though, and we are allowed +/// to generate the first by knowing that pointer arithmetic doesn't overflow. +/// +/// If we can't emit an optimized form for this expression, this returns null. +/// +static Value *EvaluateGEPOffsetExpression(User *GEP, InstCombiner &IC) { + DataLayout &TD = *IC.getDataLayout(); + gep_type_iterator GTI = gep_type_begin(GEP); + + // Check to see if this gep only has a single variable index. If so, and if + // any constant indices are a multiple of its scale, then we can compute this + // in terms of the scale of the variable index. For example, if the GEP + // implies an offset of "12 + i*4", then we can codegen this as "3 + i", + // because the expression will cross zero at the same point. + unsigned i, e = GEP->getNumOperands(); + int64_t Offset = 0; + for (i = 1; i != e; ++i, ++GTI) { + if (ConstantInt *CI = dyn_cast<ConstantInt>(GEP->getOperand(i))) { + // Compute the aggregate offset of constant indices. + if (CI->isZero()) continue; + + // Handle a struct index, which adds its field offset to the pointer. + if (StructType *STy = dyn_cast<StructType>(*GTI)) { + Offset += TD.getStructLayout(STy)->getElementOffset(CI->getZExtValue()); + } else { + uint64_t Size = TD.getTypeAllocSize(GTI.getIndexedType()); + Offset += Size*CI->getSExtValue(); + } + } else { + // Found our variable index. + break; + } + } + + // If there are no variable indices, we must have a constant offset, just + // evaluate it the general way. + if (i == e) return 0; + + Value *VariableIdx = GEP->getOperand(i); + // Determine the scale factor of the variable element. For example, this is + // 4 if the variable index is into an array of i32. + uint64_t VariableScale = TD.getTypeAllocSize(GTI.getIndexedType()); + + // Verify that there are no other variable indices. If so, emit the hard way. + for (++i, ++GTI; i != e; ++i, ++GTI) { + ConstantInt *CI = dyn_cast<ConstantInt>(GEP->getOperand(i)); + if (!CI) return 0; + + // Compute the aggregate offset of constant indices. + if (CI->isZero()) continue; + + // Handle a struct index, which adds its field offset to the pointer. + if (StructType *STy = dyn_cast<StructType>(*GTI)) { + Offset += TD.getStructLayout(STy)->getElementOffset(CI->getZExtValue()); + } else { + uint64_t Size = TD.getTypeAllocSize(GTI.getIndexedType()); + Offset += Size*CI->getSExtValue(); + } + } + + + + // Okay, we know we have a single variable index, which must be a + // pointer/array/vector index. If there is no offset, life is simple, return + // the index. + Type *IntPtrTy = TD.getIntPtrType(GEP->getOperand(0)->getType()); + unsigned IntPtrWidth = IntPtrTy->getIntegerBitWidth(); + if (Offset == 0) { + // Cast to intptrty in case a truncation occurs. If an extension is needed, + // we don't need to bother extending: the extension won't affect where the + // computation crosses zero. + if (VariableIdx->getType()->getPrimitiveSizeInBits() > IntPtrWidth) { + VariableIdx = IC.Builder->CreateTrunc(VariableIdx, IntPtrTy); + } + return VariableIdx; + } + + // Otherwise, there is an index. The computation we will do will be modulo + // the pointer size, so get it. + uint64_t PtrSizeMask = ~0ULL >> (64-IntPtrWidth); + + Offset &= PtrSizeMask; + VariableScale &= PtrSizeMask; + + // To do this transformation, any constant index must be a multiple of the + // variable scale factor. For example, we can evaluate "12 + 4*i" as "3 + i", + // but we can't evaluate "10 + 3*i" in terms of i. Check that the offset is a + // multiple of the variable scale. + int64_t NewOffs = Offset / (int64_t)VariableScale; + if (Offset != NewOffs*(int64_t)VariableScale) + return 0; + + // Okay, we can do this evaluation. Start by converting the index to intptr. + if (VariableIdx->getType() != IntPtrTy) + VariableIdx = IC.Builder->CreateIntCast(VariableIdx, IntPtrTy, + true /*Signed*/); + Constant *OffsetVal = ConstantInt::get(IntPtrTy, NewOffs); + return IC.Builder->CreateAdd(VariableIdx, OffsetVal, "offset"); +} + +/// FoldGEPICmp - Fold comparisons between a GEP instruction and something +/// else. At this point we know that the GEP is on the LHS of the comparison. +Instruction *InstCombiner::FoldGEPICmp(GEPOperator *GEPLHS, Value *RHS, + ICmpInst::Predicate Cond, + Instruction &I) { + // Don't transform signed compares of GEPs into index compares. Even if the + // GEP is inbounds, the final add of the base pointer can have signed overflow + // and would change the result of the icmp. + // e.g. "&foo[0] <s &foo[1]" can't be folded to "true" because "foo" could be + // the maximum signed value for the pointer type. + if (ICmpInst::isSigned(Cond)) + return 0; + + // Look through bitcasts. + if (BitCastInst *BCI = dyn_cast<BitCastInst>(RHS)) + RHS = BCI->getOperand(0); + + Value *PtrBase = GEPLHS->getOperand(0); + if (TD && PtrBase == RHS && GEPLHS->isInBounds()) { + // ((gep Ptr, OFFSET) cmp Ptr) ---> (OFFSET cmp 0). + // This transformation (ignoring the base and scales) is valid because we + // know pointers can't overflow since the gep is inbounds. See if we can + // output an optimized form. + Value *Offset = EvaluateGEPOffsetExpression(GEPLHS, *this); + + // If not, synthesize the offset the hard way. + if (Offset == 0) + Offset = EmitGEPOffset(GEPLHS); + return new ICmpInst(ICmpInst::getSignedPredicate(Cond), Offset, + Constant::getNullValue(Offset->getType())); + } else if (GEPOperator *GEPRHS = dyn_cast<GEPOperator>(RHS)) { + // If the base pointers are different, but the indices are the same, just + // compare the base pointer. + if (PtrBase != GEPRHS->getOperand(0)) { + bool IndicesTheSame = GEPLHS->getNumOperands()==GEPRHS->getNumOperands(); + IndicesTheSame &= GEPLHS->getOperand(0)->getType() == + GEPRHS->getOperand(0)->getType(); + if (IndicesTheSame) + for (unsigned i = 1, e = GEPLHS->getNumOperands(); i != e; ++i) + if (GEPLHS->getOperand(i) != GEPRHS->getOperand(i)) { + IndicesTheSame = false; + break; + } + + // If all indices are the same, just compare the base pointers. + if (IndicesTheSame) + return new ICmpInst(Cond, GEPLHS->getOperand(0), GEPRHS->getOperand(0)); + + // If we're comparing GEPs with two base pointers that only differ in type + // and both GEPs have only constant indices or just one use, then fold + // the compare with the adjusted indices. + if (TD && GEPLHS->isInBounds() && GEPRHS->isInBounds() && + (GEPLHS->hasAllConstantIndices() || GEPLHS->hasOneUse()) && + (GEPRHS->hasAllConstantIndices() || GEPRHS->hasOneUse()) && + PtrBase->stripPointerCasts() == + GEPRHS->getOperand(0)->stripPointerCasts()) { + Value *Cmp = Builder->CreateICmp(ICmpInst::getSignedPredicate(Cond), + EmitGEPOffset(GEPLHS), + EmitGEPOffset(GEPRHS)); + return ReplaceInstUsesWith(I, Cmp); + } + + // Otherwise, the base pointers are different and the indices are + // different, bail out. + return 0; + } + + // If one of the GEPs has all zero indices, recurse. + bool AllZeros = true; + for (unsigned i = 1, e = GEPLHS->getNumOperands(); i != e; ++i) + if (!isa<Constant>(GEPLHS->getOperand(i)) || + !cast<Constant>(GEPLHS->getOperand(i))->isNullValue()) { + AllZeros = false; + break; + } + if (AllZeros) + return FoldGEPICmp(GEPRHS, GEPLHS->getOperand(0), + ICmpInst::getSwappedPredicate(Cond), I); + + // If the other GEP has all zero indices, recurse. + AllZeros = true; + for (unsigned i = 1, e = GEPRHS->getNumOperands(); i != e; ++i) + if (!isa<Constant>(GEPRHS->getOperand(i)) || + !cast<Constant>(GEPRHS->getOperand(i))->isNullValue()) { + AllZeros = false; + break; + } + if (AllZeros) + return FoldGEPICmp(GEPLHS, GEPRHS->getOperand(0), Cond, I); + + bool GEPsInBounds = GEPLHS->isInBounds() && GEPRHS->isInBounds(); + if (GEPLHS->getNumOperands() == GEPRHS->getNumOperands()) { + // If the GEPs only differ by one index, compare it. + unsigned NumDifferences = 0; // Keep track of # differences. + unsigned DiffOperand = 0; // The operand that differs. + for (unsigned i = 1, e = GEPRHS->getNumOperands(); i != e; ++i) + if (GEPLHS->getOperand(i) != GEPRHS->getOperand(i)) { + if (GEPLHS->getOperand(i)->getType()->getPrimitiveSizeInBits() != + GEPRHS->getOperand(i)->getType()->getPrimitiveSizeInBits()) { + // Irreconcilable differences. + NumDifferences = 2; + break; + } else { + if (NumDifferences++) break; + DiffOperand = i; + } + } + + if (NumDifferences == 0) // SAME GEP? + return ReplaceInstUsesWith(I, // No comparison is needed here. + Builder->getInt1(ICmpInst::isTrueWhenEqual(Cond))); + + else if (NumDifferences == 1 && GEPsInBounds) { + Value *LHSV = GEPLHS->getOperand(DiffOperand); + Value *RHSV = GEPRHS->getOperand(DiffOperand); + // Make sure we do a signed comparison here. + return new ICmpInst(ICmpInst::getSignedPredicate(Cond), LHSV, RHSV); + } + } + + // Only lower this if the icmp is the only user of the GEP or if we expect + // the result to fold to a constant! + if (TD && + GEPsInBounds && + (isa<ConstantExpr>(GEPLHS) || GEPLHS->hasOneUse()) && + (isa<ConstantExpr>(GEPRHS) || GEPRHS->hasOneUse())) { + // ((gep Ptr, OFFSET1) cmp (gep Ptr, OFFSET2) ---> (OFFSET1 cmp OFFSET2) + Value *L = EmitGEPOffset(GEPLHS); + Value *R = EmitGEPOffset(GEPRHS); + return new ICmpInst(ICmpInst::getSignedPredicate(Cond), L, R); + } + } + return 0; +} + +/// FoldICmpAddOpCst - Fold "icmp pred (X+CI), X". +Instruction *InstCombiner::FoldICmpAddOpCst(Instruction &ICI, + Value *X, ConstantInt *CI, + ICmpInst::Predicate Pred) { + // If we have X+0, exit early (simplifying logic below) and let it get folded + // elsewhere. icmp X+0, X -> icmp X, X + if (CI->isZero()) { + bool isTrue = ICmpInst::isTrueWhenEqual(Pred); + return ReplaceInstUsesWith(ICI, ConstantInt::get(ICI.getType(), isTrue)); + } + + // (X+4) == X -> false. + if (Pred == ICmpInst::ICMP_EQ) + return ReplaceInstUsesWith(ICI, Builder->getFalse()); + + // (X+4) != X -> true. + if (Pred == ICmpInst::ICMP_NE) + return ReplaceInstUsesWith(ICI, Builder->getTrue()); + + // From this point on, we know that (X+C <= X) --> (X+C < X) because C != 0, + // so the values can never be equal. Similarly for all other "or equals" + // operators. + + // (X+1) <u X --> X >u (MAXUINT-1) --> X == 255 + // (X+2) <u X --> X >u (MAXUINT-2) --> X > 253 + // (X+MAXUINT) <u X --> X >u (MAXUINT-MAXUINT) --> X != 0 + if (Pred == ICmpInst::ICMP_ULT || Pred == ICmpInst::ICMP_ULE) { + Value *R = + ConstantExpr::getSub(ConstantInt::getAllOnesValue(CI->getType()), CI); + return new ICmpInst(ICmpInst::ICMP_UGT, X, R); + } + + // (X+1) >u X --> X <u (0-1) --> X != 255 + // (X+2) >u X --> X <u (0-2) --> X <u 254 + // (X+MAXUINT) >u X --> X <u (0-MAXUINT) --> X <u 1 --> X == 0 + if (Pred == ICmpInst::ICMP_UGT || Pred == ICmpInst::ICMP_UGE) + return new ICmpInst(ICmpInst::ICMP_ULT, X, ConstantExpr::getNeg(CI)); + + unsigned BitWidth = CI->getType()->getPrimitiveSizeInBits(); + ConstantInt *SMax = ConstantInt::get(X->getContext(), + APInt::getSignedMaxValue(BitWidth)); + + // (X+ 1) <s X --> X >s (MAXSINT-1) --> X == 127 + // (X+ 2) <s X --> X >s (MAXSINT-2) --> X >s 125 + // (X+MAXSINT) <s X --> X >s (MAXSINT-MAXSINT) --> X >s 0 + // (X+MINSINT) <s X --> X >s (MAXSINT-MINSINT) --> X >s -1 + // (X+ -2) <s X --> X >s (MAXSINT- -2) --> X >s 126 + // (X+ -1) <s X --> X >s (MAXSINT- -1) --> X != 127 + if (Pred == ICmpInst::ICMP_SLT || Pred == ICmpInst::ICMP_SLE) + return new ICmpInst(ICmpInst::ICMP_SGT, X, ConstantExpr::getSub(SMax, CI)); + + // (X+ 1) >s X --> X <s (MAXSINT-(1-1)) --> X != 127 + // (X+ 2) >s X --> X <s (MAXSINT-(2-1)) --> X <s 126 + // (X+MAXSINT) >s X --> X <s (MAXSINT-(MAXSINT-1)) --> X <s 1 + // (X+MINSINT) >s X --> X <s (MAXSINT-(MINSINT-1)) --> X <s -2 + // (X+ -2) >s X --> X <s (MAXSINT-(-2-1)) --> X <s -126 + // (X+ -1) >s X --> X <s (MAXSINT-(-1-1)) --> X == -128 + + assert(Pred == ICmpInst::ICMP_SGT || Pred == ICmpInst::ICMP_SGE); + Constant *C = Builder->getInt(CI->getValue()-1); + return new ICmpInst(ICmpInst::ICMP_SLT, X, ConstantExpr::getSub(SMax, C)); +} + +/// FoldICmpDivCst - Fold "icmp pred, ([su]div X, DivRHS), CmpRHS" where DivRHS +/// and CmpRHS are both known to be integer constants. +Instruction *InstCombiner::FoldICmpDivCst(ICmpInst &ICI, BinaryOperator *DivI, + ConstantInt *DivRHS) { + ConstantInt *CmpRHS = cast<ConstantInt>(ICI.getOperand(1)); + const APInt &CmpRHSV = CmpRHS->getValue(); + + // FIXME: If the operand types don't match the type of the divide + // then don't attempt this transform. The code below doesn't have the + // logic to deal with a signed divide and an unsigned compare (and + // vice versa). This is because (x /s C1) <s C2 produces different + // results than (x /s C1) <u C2 or (x /u C1) <s C2 or even + // (x /u C1) <u C2. Simply casting the operands and result won't + // work. :( The if statement below tests that condition and bails + // if it finds it. + bool DivIsSigned = DivI->getOpcode() == Instruction::SDiv; + if (!ICI.isEquality() && DivIsSigned != ICI.isSigned()) + return 0; + if (DivRHS->isZero()) + return 0; // The ProdOV computation fails on divide by zero. + if (DivIsSigned && DivRHS->isAllOnesValue()) + return 0; // The overflow computation also screws up here + if (DivRHS->isOne()) { + // This eliminates some funny cases with INT_MIN. + ICI.setOperand(0, DivI->getOperand(0)); // X/1 == X. + return &ICI; + } + + // Compute Prod = CI * DivRHS. We are essentially solving an equation + // of form X/C1=C2. We solve for X by multiplying C1 (DivRHS) and + // C2 (CI). By solving for X we can turn this into a range check + // instead of computing a divide. + Constant *Prod = ConstantExpr::getMul(CmpRHS, DivRHS); + + // Determine if the product overflows by seeing if the product is + // not equal to the divide. Make sure we do the same kind of divide + // as in the LHS instruction that we're folding. + bool ProdOV = (DivIsSigned ? ConstantExpr::getSDiv(Prod, DivRHS) : + ConstantExpr::getUDiv(Prod, DivRHS)) != CmpRHS; + + // Get the ICmp opcode + ICmpInst::Predicate Pred = ICI.getPredicate(); + + /// If the division is known to be exact, then there is no remainder from the + /// divide, so the covered range size is unit, otherwise it is the divisor. + ConstantInt *RangeSize = DivI->isExact() ? getOne(Prod) : DivRHS; + + // Figure out the interval that is being checked. For example, a comparison + // like "X /u 5 == 0" is really checking that X is in the interval [0, 5). + // Compute this interval based on the constants involved and the signedness of + // the compare/divide. This computes a half-open interval, keeping track of + // whether either value in the interval overflows. After analysis each + // overflow variable is set to 0 if it's corresponding bound variable is valid + // -1 if overflowed off the bottom end, or +1 if overflowed off the top end. + int LoOverflow = 0, HiOverflow = 0; + Constant *LoBound = 0, *HiBound = 0; + + if (!DivIsSigned) { // udiv + // e.g. X/5 op 3 --> [15, 20) + LoBound = Prod; + HiOverflow = LoOverflow = ProdOV; + if (!HiOverflow) { + // If this is not an exact divide, then many values in the range collapse + // to the same result value. + HiOverflow = AddWithOverflow(HiBound, LoBound, RangeSize, false); + } + + } else if (DivRHS->getValue().isStrictlyPositive()) { // Divisor is > 0. + if (CmpRHSV == 0) { // (X / pos) op 0 + // Can't overflow. e.g. X/2 op 0 --> [-1, 2) + LoBound = ConstantExpr::getNeg(SubOne(RangeSize)); + HiBound = RangeSize; + } else if (CmpRHSV.isStrictlyPositive()) { // (X / pos) op pos + LoBound = Prod; // e.g. X/5 op 3 --> [15, 20) + HiOverflow = LoOverflow = ProdOV; + if (!HiOverflow) + HiOverflow = AddWithOverflow(HiBound, Prod, RangeSize, true); + } else { // (X / pos) op neg + // e.g. X/5 op -3 --> [-15-4, -15+1) --> [-19, -14) + HiBound = AddOne(Prod); + LoOverflow = HiOverflow = ProdOV ? -1 : 0; + if (!LoOverflow) { + ConstantInt *DivNeg =cast<ConstantInt>(ConstantExpr::getNeg(RangeSize)); + LoOverflow = AddWithOverflow(LoBound, HiBound, DivNeg, true) ? -1 : 0; + } + } + } else if (DivRHS->isNegative()) { // Divisor is < 0. + if (DivI->isExact()) + RangeSize = cast<ConstantInt>(ConstantExpr::getNeg(RangeSize)); + if (CmpRHSV == 0) { // (X / neg) op 0 + // e.g. X/-5 op 0 --> [-4, 5) + LoBound = AddOne(RangeSize); + HiBound = cast<ConstantInt>(ConstantExpr::getNeg(RangeSize)); + if (HiBound == DivRHS) { // -INTMIN = INTMIN + HiOverflow = 1; // [INTMIN+1, overflow) + HiBound = 0; // e.g. X/INTMIN = 0 --> X > INTMIN + } + } else if (CmpRHSV.isStrictlyPositive()) { // (X / neg) op pos + // e.g. X/-5 op 3 --> [-19, -14) + HiBound = AddOne(Prod); + HiOverflow = LoOverflow = ProdOV ? -1 : 0; + if (!LoOverflow) + LoOverflow = AddWithOverflow(LoBound, HiBound, RangeSize, true) ? -1:0; + } else { // (X / neg) op neg + LoBound = Prod; // e.g. X/-5 op -3 --> [15, 20) + LoOverflow = HiOverflow = ProdOV; + if (!HiOverflow) + HiOverflow = SubWithOverflow(HiBound, Prod, RangeSize, true); + } + + // Dividing by a negative swaps the condition. LT <-> GT + Pred = ICmpInst::getSwappedPredicate(Pred); + } + + Value *X = DivI->getOperand(0); + switch (Pred) { + default: llvm_unreachable("Unhandled icmp opcode!"); + case ICmpInst::ICMP_EQ: + if (LoOverflow && HiOverflow) + return ReplaceInstUsesWith(ICI, Builder->getFalse()); + if (HiOverflow) + return new ICmpInst(DivIsSigned ? ICmpInst::ICMP_SGE : + ICmpInst::ICMP_UGE, X, LoBound); + if (LoOverflow) + return new ICmpInst(DivIsSigned ? ICmpInst::ICMP_SLT : + ICmpInst::ICMP_ULT, X, HiBound); + return ReplaceInstUsesWith(ICI, InsertRangeTest(X, LoBound, HiBound, + DivIsSigned, true)); + case ICmpInst::ICMP_NE: + if (LoOverflow && HiOverflow) + return ReplaceInstUsesWith(ICI, Builder->getTrue()); + if (HiOverflow) + return new ICmpInst(DivIsSigned ? ICmpInst::ICMP_SLT : + ICmpInst::ICMP_ULT, X, LoBound); + if (LoOverflow) + return new ICmpInst(DivIsSigned ? ICmpInst::ICMP_SGE : + ICmpInst::ICMP_UGE, X, HiBound); + return ReplaceInstUsesWith(ICI, InsertRangeTest(X, LoBound, HiBound, + DivIsSigned, false)); + case ICmpInst::ICMP_ULT: + case ICmpInst::ICMP_SLT: + if (LoOverflow == +1) // Low bound is greater than input range. + return ReplaceInstUsesWith(ICI, Builder->getTrue()); + if (LoOverflow == -1) // Low bound is less than input range. + return ReplaceInstUsesWith(ICI, Builder->getFalse()); + return new ICmpInst(Pred, X, LoBound); + case ICmpInst::ICMP_UGT: + case ICmpInst::ICMP_SGT: + if (HiOverflow == +1) // High bound greater than input range. + return ReplaceInstUsesWith(ICI, Builder->getFalse()); + if (HiOverflow == -1) // High bound less than input range. + return ReplaceInstUsesWith(ICI, Builder->getTrue()); + if (Pred == ICmpInst::ICMP_UGT) + return new ICmpInst(ICmpInst::ICMP_UGE, X, HiBound); + return new ICmpInst(ICmpInst::ICMP_SGE, X, HiBound); + } +} + +/// FoldICmpShrCst - Handle "icmp(([al]shr X, cst1), cst2)". +Instruction *InstCombiner::FoldICmpShrCst(ICmpInst &ICI, BinaryOperator *Shr, + ConstantInt *ShAmt) { + const APInt &CmpRHSV = cast<ConstantInt>(ICI.getOperand(1))->getValue(); + + // Check that the shift amount is in range. If not, don't perform + // undefined shifts. When the shift is visited it will be + // simplified. + uint32_t TypeBits = CmpRHSV.getBitWidth(); + uint32_t ShAmtVal = (uint32_t)ShAmt->getLimitedValue(TypeBits); + if (ShAmtVal >= TypeBits || ShAmtVal == 0) + return 0; + + if (!ICI.isEquality()) { + // If we have an unsigned comparison and an ashr, we can't simplify this. + // Similarly for signed comparisons with lshr. + if (ICI.isSigned() != (Shr->getOpcode() == Instruction::AShr)) + return 0; + + // Otherwise, all lshr and most exact ashr's are equivalent to a udiv/sdiv + // by a power of 2. Since we already have logic to simplify these, + // transform to div and then simplify the resultant comparison. + if (Shr->getOpcode() == Instruction::AShr && + (!Shr->isExact() || ShAmtVal == TypeBits - 1)) + return 0; + + // Revisit the shift (to delete it). + Worklist.Add(Shr); + + Constant *DivCst = + ConstantInt::get(Shr->getType(), APInt::getOneBitSet(TypeBits, ShAmtVal)); + + Value *Tmp = + Shr->getOpcode() == Instruction::AShr ? + Builder->CreateSDiv(Shr->getOperand(0), DivCst, "", Shr->isExact()) : + Builder->CreateUDiv(Shr->getOperand(0), DivCst, "", Shr->isExact()); + + ICI.setOperand(0, Tmp); + + // If the builder folded the binop, just return it. + BinaryOperator *TheDiv = dyn_cast<BinaryOperator>(Tmp); + if (TheDiv == 0) + return &ICI; + + // Otherwise, fold this div/compare. + assert(TheDiv->getOpcode() == Instruction::SDiv || + TheDiv->getOpcode() == Instruction::UDiv); + + Instruction *Res = FoldICmpDivCst(ICI, TheDiv, cast<ConstantInt>(DivCst)); + assert(Res && "This div/cst should have folded!"); + return Res; + } + + + // If we are comparing against bits always shifted out, the + // comparison cannot succeed. + APInt Comp = CmpRHSV << ShAmtVal; + ConstantInt *ShiftedCmpRHS = Builder->getInt(Comp); + if (Shr->getOpcode() == Instruction::LShr) + Comp = Comp.lshr(ShAmtVal); + else + Comp = Comp.ashr(ShAmtVal); + + if (Comp != CmpRHSV) { // Comparing against a bit that we know is zero. + bool IsICMP_NE = ICI.getPredicate() == ICmpInst::ICMP_NE; + Constant *Cst = Builder->getInt1(IsICMP_NE); + return ReplaceInstUsesWith(ICI, Cst); + } + + // Otherwise, check to see if the bits shifted out are known to be zero. + // If so, we can compare against the unshifted value: + // (X & 4) >> 1 == 2 --> (X & 4) == 4. + if (Shr->hasOneUse() && Shr->isExact()) + return new ICmpInst(ICI.getPredicate(), Shr->getOperand(0), ShiftedCmpRHS); + + if (Shr->hasOneUse()) { + // Otherwise strength reduce the shift into an and. + APInt Val(APInt::getHighBitsSet(TypeBits, TypeBits - ShAmtVal)); + Constant *Mask = Builder->getInt(Val); + + Value *And = Builder->CreateAnd(Shr->getOperand(0), + Mask, Shr->getName()+".mask"); + return new ICmpInst(ICI.getPredicate(), And, ShiftedCmpRHS); + } + return 0; +} + + +/// visitICmpInstWithInstAndIntCst - Handle "icmp (instr, intcst)". +/// +Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI, + Instruction *LHSI, + ConstantInt *RHS) { + const APInt &RHSV = RHS->getValue(); + + switch (LHSI->getOpcode()) { + case Instruction::Trunc: + if (ICI.isEquality() && LHSI->hasOneUse()) { + // Simplify icmp eq (trunc x to i8), 42 -> icmp eq x, 42|highbits if all + // of the high bits truncated out of x are known. + unsigned DstBits = LHSI->getType()->getPrimitiveSizeInBits(), + SrcBits = LHSI->getOperand(0)->getType()->getPrimitiveSizeInBits(); + APInt KnownZero(SrcBits, 0), KnownOne(SrcBits, 0); + ComputeMaskedBits(LHSI->getOperand(0), KnownZero, KnownOne); + + // If all the high bits are known, we can do this xform. + if ((KnownZero|KnownOne).countLeadingOnes() >= SrcBits-DstBits) { + // Pull in the high bits from known-ones set. + APInt NewRHS = RHS->getValue().zext(SrcBits); + NewRHS |= KnownOne & APInt::getHighBitsSet(SrcBits, SrcBits-DstBits); + return new ICmpInst(ICI.getPredicate(), LHSI->getOperand(0), + Builder->getInt(NewRHS)); + } + } + break; + + case Instruction::Xor: // (icmp pred (xor X, XorCST), CI) + if (ConstantInt *XorCST = dyn_cast<ConstantInt>(LHSI->getOperand(1))) { + // If this is a comparison that tests the signbit (X < 0) or (x > -1), + // fold the xor. + if ((ICI.getPredicate() == ICmpInst::ICMP_SLT && RHSV == 0) || + (ICI.getPredicate() == ICmpInst::ICMP_SGT && RHSV.isAllOnesValue())) { + Value *CompareVal = LHSI->getOperand(0); + + // If the sign bit of the XorCST is not set, there is no change to + // the operation, just stop using the Xor. + if (!XorCST->isNegative()) { + ICI.setOperand(0, CompareVal); + Worklist.Add(LHSI); + return &ICI; + } + + // Was the old condition true if the operand is positive? + bool isTrueIfPositive = ICI.getPredicate() == ICmpInst::ICMP_SGT; + + // If so, the new one isn't. + isTrueIfPositive ^= true; + + if (isTrueIfPositive) + return new ICmpInst(ICmpInst::ICMP_SGT, CompareVal, + SubOne(RHS)); + else + return new ICmpInst(ICmpInst::ICMP_SLT, CompareVal, + AddOne(RHS)); + } + + if (LHSI->hasOneUse()) { + // (icmp u/s (xor A SignBit), C) -> (icmp s/u A, (xor C SignBit)) + if (!ICI.isEquality() && XorCST->getValue().isSignBit()) { + const APInt &SignBit = XorCST->getValue(); + ICmpInst::Predicate Pred = ICI.isSigned() + ? ICI.getUnsignedPredicate() + : ICI.getSignedPredicate(); + return new ICmpInst(Pred, LHSI->getOperand(0), + Builder->getInt(RHSV ^ SignBit)); + } + + // (icmp u/s (xor A ~SignBit), C) -> (icmp s/u (xor C ~SignBit), A) + if (!ICI.isEquality() && XorCST->isMaxValue(true)) { + const APInt &NotSignBit = XorCST->getValue(); + ICmpInst::Predicate Pred = ICI.isSigned() + ? ICI.getUnsignedPredicate() + : ICI.getSignedPredicate(); + Pred = ICI.getSwappedPredicate(Pred); + return new ICmpInst(Pred, LHSI->getOperand(0), + Builder->getInt(RHSV ^ NotSignBit)); + } + } + + // (icmp ugt (xor X, C), ~C) -> (icmp ult X, C) + // iff -C is a power of 2 + if (ICI.getPredicate() == ICmpInst::ICMP_UGT && + XorCST->getValue() == ~RHSV && (RHSV + 1).isPowerOf2()) + return new ICmpInst(ICmpInst::ICMP_ULT, LHSI->getOperand(0), XorCST); + + // (icmp ult (xor X, C), -C) -> (icmp uge X, C) + // iff -C is a power of 2 + if (ICI.getPredicate() == ICmpInst::ICMP_ULT && + XorCST->getValue() == -RHSV && RHSV.isPowerOf2()) + return new ICmpInst(ICmpInst::ICMP_UGE, LHSI->getOperand(0), XorCST); + } + break; + case Instruction::And: // (icmp pred (and X, AndCST), RHS) + if (LHSI->hasOneUse() && isa<ConstantInt>(LHSI->getOperand(1)) && + LHSI->getOperand(0)->hasOneUse()) { + ConstantInt *AndCST = cast<ConstantInt>(LHSI->getOperand(1)); + + // If the LHS is an AND of a truncating cast, we can widen the + // and/compare to be the input width without changing the value + // produced, eliminating a cast. + if (TruncInst *Cast = dyn_cast<TruncInst>(LHSI->getOperand(0))) { + // We can do this transformation if either the AND constant does not + // have its sign bit set or if it is an equality comparison. + // Extending a relational comparison when we're checking the sign + // bit would not work. + if (ICI.isEquality() || + (!AndCST->isNegative() && RHSV.isNonNegative())) { + Value *NewAnd = + Builder->CreateAnd(Cast->getOperand(0), + ConstantExpr::getZExt(AndCST, Cast->getSrcTy())); + NewAnd->takeName(LHSI); + return new ICmpInst(ICI.getPredicate(), NewAnd, + ConstantExpr::getZExt(RHS, Cast->getSrcTy())); + } + } + + // If the LHS is an AND of a zext, and we have an equality compare, we can + // shrink the and/compare to the smaller type, eliminating the cast. + if (ZExtInst *Cast = dyn_cast<ZExtInst>(LHSI->getOperand(0))) { + IntegerType *Ty = cast<IntegerType>(Cast->getSrcTy()); + // Make sure we don't compare the upper bits, SimplifyDemandedBits + // should fold the icmp to true/false in that case. + if (ICI.isEquality() && RHSV.getActiveBits() <= Ty->getBitWidth()) { + Value *NewAnd = + Builder->CreateAnd(Cast->getOperand(0), + ConstantExpr::getTrunc(AndCST, Ty)); + NewAnd->takeName(LHSI); + return new ICmpInst(ICI.getPredicate(), NewAnd, + ConstantExpr::getTrunc(RHS, Ty)); + } + } + + // If this is: (X >> C1) & C2 != C3 (where any shift and any compare + // could exist), turn it into (X & (C2 << C1)) != (C3 << C1). This + // happens a LOT in code produced by the C front-end, for bitfield + // access. + BinaryOperator *Shift = dyn_cast<BinaryOperator>(LHSI->getOperand(0)); + if (Shift && !Shift->isShift()) + Shift = 0; + + ConstantInt *ShAmt; + ShAmt = Shift ? dyn_cast<ConstantInt>(Shift->getOperand(1)) : 0; + Type *Ty = Shift ? Shift->getType() : 0; // Type of the shift. + Type *AndTy = AndCST->getType(); // Type of the and. + + // We can fold this as long as we can't shift unknown bits + // into the mask. This can happen with signed shift + // rights, as they sign-extend. With logical shifts, + // we must still make sure the comparison is not signed + // because we are effectively changing the + // position of the sign bit (PR17827). + // TODO: We can relax these constraints a bit more. + if (ShAmt) { + bool CanFold = false; + unsigned ShiftOpcode = Shift->getOpcode(); + if (ShiftOpcode == Instruction::AShr) { + // To test for the bad case of the signed shr, see if any + // of the bits shifted in could be tested after the mask. + uint32_t TyBits = Ty->getPrimitiveSizeInBits(); + int ShAmtVal = TyBits - ShAmt->getLimitedValue(TyBits); + + uint32_t BitWidth = AndTy->getPrimitiveSizeInBits(); + if ((APInt::getHighBitsSet(BitWidth, BitWidth-ShAmtVal) & + AndCST->getValue()) == 0) + CanFold = true; + } else if (ShiftOpcode == Instruction::Shl || + ShiftOpcode == Instruction::LShr) { + CanFold = !ICI.isSigned(); + } + + if (CanFold) { + Constant *NewCst; + if (Shift->getOpcode() == Instruction::Shl) + NewCst = ConstantExpr::getLShr(RHS, ShAmt); + else + NewCst = ConstantExpr::getShl(RHS, ShAmt); + + // Check to see if we are shifting out any of the bits being + // compared. + if (ConstantExpr::get(Shift->getOpcode(), + NewCst, ShAmt) != RHS) { + // If we shifted bits out, the fold is not going to work out. + // As a special case, check to see if this means that the + // result is always true or false now. + if (ICI.getPredicate() == ICmpInst::ICMP_EQ) + return ReplaceInstUsesWith(ICI, Builder->getFalse()); + if (ICI.getPredicate() == ICmpInst::ICMP_NE) + return ReplaceInstUsesWith(ICI, Builder->getTrue()); + } else { + ICI.setOperand(1, NewCst); + Constant *NewAndCST; + if (Shift->getOpcode() == Instruction::Shl) + NewAndCST = ConstantExpr::getLShr(AndCST, ShAmt); + else + NewAndCST = ConstantExpr::getShl(AndCST, ShAmt); + LHSI->setOperand(1, NewAndCST); + LHSI->setOperand(0, Shift->getOperand(0)); + Worklist.Add(Shift); // Shift is dead. + return &ICI; + } + } + } + + // Turn ((X >> Y) & C) == 0 into (X & (C << Y)) == 0. The later is + // preferable because it allows the C<<Y expression to be hoisted out + // of a loop if Y is invariant and X is not. + if (Shift && Shift->hasOneUse() && RHSV == 0 && + ICI.isEquality() && !Shift->isArithmeticShift() && + !isa<Constant>(Shift->getOperand(0))) { + // Compute C << Y. + Value *NS; + if (Shift->getOpcode() == Instruction::LShr) { + NS = Builder->CreateShl(AndCST, Shift->getOperand(1)); + } else { + // Insert a logical shift. + NS = Builder->CreateLShr(AndCST, Shift->getOperand(1)); + } + + // Compute X & (C << Y). + Value *NewAnd = + Builder->CreateAnd(Shift->getOperand(0), NS, LHSI->getName()); + + ICI.setOperand(0, NewAnd); + return &ICI; + } + + // Replace ((X & AndCST) > RHSV) with ((X & AndCST) != 0), if any + // bit set in (X & AndCST) will produce a result greater than RHSV. + if (ICI.getPredicate() == ICmpInst::ICMP_UGT) { + unsigned NTZ = AndCST->getValue().countTrailingZeros(); + if ((NTZ < AndCST->getBitWidth()) && + APInt::getOneBitSet(AndCST->getBitWidth(), NTZ).ugt(RHSV)) + return new ICmpInst(ICmpInst::ICMP_NE, LHSI, + Constant::getNullValue(RHS->getType())); + } + } + + // Try to optimize things like "A[i]&42 == 0" to index computations. + if (LoadInst *LI = dyn_cast<LoadInst>(LHSI->getOperand(0))) { + if (GetElementPtrInst *GEP = + dyn_cast<GetElementPtrInst>(LI->getOperand(0))) + if (GlobalVariable *GV = dyn_cast<GlobalVariable>(GEP->getOperand(0))) + if (GV->isConstant() && GV->hasDefinitiveInitializer() && + !LI->isVolatile() && isa<ConstantInt>(LHSI->getOperand(1))) { + ConstantInt *C = cast<ConstantInt>(LHSI->getOperand(1)); + if (Instruction *Res = FoldCmpLoadFromIndexedGlobal(GEP, GV,ICI, C)) + return Res; + } + } + + // X & -C == -C -> X > u ~C + // X & -C != -C -> X <= u ~C + // iff C is a power of 2 + if (ICI.isEquality() && RHS == LHSI->getOperand(1) && (-RHSV).isPowerOf2()) + return new ICmpInst( + ICI.getPredicate() == ICmpInst::ICMP_EQ ? ICmpInst::ICMP_UGT + : ICmpInst::ICMP_ULE, + LHSI->getOperand(0), SubOne(RHS)); + break; + + case Instruction::Or: { + if (!ICI.isEquality() || !RHS->isNullValue() || !LHSI->hasOneUse()) + break; + Value *P, *Q; + if (match(LHSI, m_Or(m_PtrToInt(m_Value(P)), m_PtrToInt(m_Value(Q))))) { + // Simplify icmp eq (or (ptrtoint P), (ptrtoint Q)), 0 + // -> and (icmp eq P, null), (icmp eq Q, null). + Value *ICIP = Builder->CreateICmp(ICI.getPredicate(), P, + Constant::getNullValue(P->getType())); + Value *ICIQ = Builder->CreateICmp(ICI.getPredicate(), Q, + Constant::getNullValue(Q->getType())); + Instruction *Op; + if (ICI.getPredicate() == ICmpInst::ICMP_EQ) + Op = BinaryOperator::CreateAnd(ICIP, ICIQ); + else + Op = BinaryOperator::CreateOr(ICIP, ICIQ); + return Op; + } + break; + } + + case Instruction::Mul: { // (icmp pred (mul X, Val), CI) + ConstantInt *Val = dyn_cast<ConstantInt>(LHSI->getOperand(1)); + if (!Val) break; + + // If this is a signed comparison to 0 and the mul is sign preserving, + // use the mul LHS operand instead. + ICmpInst::Predicate pred = ICI.getPredicate(); + if (isSignTest(pred, RHS) && !Val->isZero() && + cast<BinaryOperator>(LHSI)->hasNoSignedWrap()) + return new ICmpInst(Val->isNegative() ? + ICmpInst::getSwappedPredicate(pred) : pred, + LHSI->getOperand(0), + Constant::getNullValue(RHS->getType())); + + break; + } + + case Instruction::Shl: { // (icmp pred (shl X, ShAmt), CI) + uint32_t TypeBits = RHSV.getBitWidth(); + ConstantInt *ShAmt = dyn_cast<ConstantInt>(LHSI->getOperand(1)); + if (!ShAmt) { + Value *X; + // (1 << X) pred P2 -> X pred Log2(P2) + if (match(LHSI, m_Shl(m_One(), m_Value(X)))) { + bool RHSVIsPowerOf2 = RHSV.isPowerOf2(); + ICmpInst::Predicate Pred = ICI.getPredicate(); + if (ICI.isUnsigned()) { + if (!RHSVIsPowerOf2) { + // (1 << X) < 30 -> X <= 4 + // (1 << X) <= 30 -> X <= 4 + // (1 << X) >= 30 -> X > 4 + // (1 << X) > 30 -> X > 4 + if (Pred == ICmpInst::ICMP_ULT) + Pred = ICmpInst::ICMP_ULE; + else if (Pred == ICmpInst::ICMP_UGE) + Pred = ICmpInst::ICMP_UGT; + } + unsigned RHSLog2 = RHSV.logBase2(); + + // (1 << X) >= 2147483648 -> X >= 31 -> X == 31 + // (1 << X) > 2147483648 -> X > 31 -> false + // (1 << X) <= 2147483648 -> X <= 31 -> true + // (1 << X) < 2147483648 -> X < 31 -> X != 31 + if (RHSLog2 == TypeBits-1) { + if (Pred == ICmpInst::ICMP_UGE) + Pred = ICmpInst::ICMP_EQ; + else if (Pred == ICmpInst::ICMP_UGT) + return ReplaceInstUsesWith(ICI, Builder->getFalse()); + else if (Pred == ICmpInst::ICMP_ULE) + return ReplaceInstUsesWith(ICI, Builder->getTrue()); + else if (Pred == ICmpInst::ICMP_ULT) + Pred = ICmpInst::ICMP_NE; + } + + return new ICmpInst(Pred, X, + ConstantInt::get(RHS->getType(), RHSLog2)); + } else if (ICI.isSigned()) { + if (RHSV.isAllOnesValue()) { + // (1 << X) <= -1 -> X == 31 + if (Pred == ICmpInst::ICMP_SLE) + return new ICmpInst(ICmpInst::ICMP_EQ, X, + ConstantInt::get(RHS->getType(), TypeBits-1)); + + // (1 << X) > -1 -> X != 31 + if (Pred == ICmpInst::ICMP_SGT) + return new ICmpInst(ICmpInst::ICMP_NE, X, + ConstantInt::get(RHS->getType(), TypeBits-1)); + } else if (!RHSV) { + // (1 << X) < 0 -> X == 31 + // (1 << X) <= 0 -> X == 31 + if (Pred == ICmpInst::ICMP_SLT || Pred == ICmpInst::ICMP_SLE) + return new ICmpInst(ICmpInst::ICMP_EQ, X, + ConstantInt::get(RHS->getType(), TypeBits-1)); + + // (1 << X) >= 0 -> X != 31 + // (1 << X) > 0 -> X != 31 + if (Pred == ICmpInst::ICMP_SGT || Pred == ICmpInst::ICMP_SGE) + return new ICmpInst(ICmpInst::ICMP_NE, X, + ConstantInt::get(RHS->getType(), TypeBits-1)); + } + } else if (ICI.isEquality()) { + if (RHSVIsPowerOf2) + return new ICmpInst( + Pred, X, ConstantInt::get(RHS->getType(), RHSV.logBase2())); + + return ReplaceInstUsesWith( + ICI, Pred == ICmpInst::ICMP_EQ ? Builder->getFalse() + : Builder->getTrue()); + } + } + break; + } + + // Check that the shift amount is in range. If not, don't perform + // undefined shifts. When the shift is visited it will be + // simplified. + if (ShAmt->uge(TypeBits)) + break; + + if (ICI.isEquality()) { + // If we are comparing against bits always shifted out, the + // comparison cannot succeed. + Constant *Comp = + ConstantExpr::getShl(ConstantExpr::getLShr(RHS, ShAmt), + ShAmt); + if (Comp != RHS) {// Comparing against a bit that we know is zero. + bool IsICMP_NE = ICI.getPredicate() == ICmpInst::ICMP_NE; + Constant *Cst = Builder->getInt1(IsICMP_NE); + return ReplaceInstUsesWith(ICI, Cst); + } + + // If the shift is NUW, then it is just shifting out zeros, no need for an + // AND. + if (cast<BinaryOperator>(LHSI)->hasNoUnsignedWrap()) + return new ICmpInst(ICI.getPredicate(), LHSI->getOperand(0), + ConstantExpr::getLShr(RHS, ShAmt)); + + // If the shift is NSW and we compare to 0, then it is just shifting out + // sign bits, no need for an AND either. + if (cast<BinaryOperator>(LHSI)->hasNoSignedWrap() && RHSV == 0) + return new ICmpInst(ICI.getPredicate(), LHSI->getOperand(0), + ConstantExpr::getLShr(RHS, ShAmt)); + + if (LHSI->hasOneUse()) { + // Otherwise strength reduce the shift into an and. + uint32_t ShAmtVal = (uint32_t)ShAmt->getLimitedValue(TypeBits); + Constant *Mask = Builder->getInt(APInt::getLowBitsSet(TypeBits, + TypeBits - ShAmtVal)); + + Value *And = + Builder->CreateAnd(LHSI->getOperand(0),Mask, LHSI->getName()+".mask"); + return new ICmpInst(ICI.getPredicate(), And, + ConstantExpr::getLShr(RHS, ShAmt)); + } + } + + // If this is a signed comparison to 0 and the shift is sign preserving, + // use the shift LHS operand instead. + ICmpInst::Predicate pred = ICI.getPredicate(); + if (isSignTest(pred, RHS) && + cast<BinaryOperator>(LHSI)->hasNoSignedWrap()) + return new ICmpInst(pred, + LHSI->getOperand(0), + Constant::getNullValue(RHS->getType())); + + // Otherwise, if this is a comparison of the sign bit, simplify to and/test. + bool TrueIfSigned = false; + if (LHSI->hasOneUse() && + isSignBitCheck(ICI.getPredicate(), RHS, TrueIfSigned)) { + // (X << 31) <s 0 --> (X&1) != 0 + Constant *Mask = ConstantInt::get(LHSI->getOperand(0)->getType(), + APInt::getOneBitSet(TypeBits, + TypeBits-ShAmt->getZExtValue()-1)); + Value *And = + Builder->CreateAnd(LHSI->getOperand(0), Mask, LHSI->getName()+".mask"); + return new ICmpInst(TrueIfSigned ? ICmpInst::ICMP_NE : ICmpInst::ICMP_EQ, + And, Constant::getNullValue(And->getType())); + } + + // Transform (icmp pred iM (shl iM %v, N), CI) + // -> (icmp pred i(M-N) (trunc %v iM to i(M-N)), (trunc (CI>>N)) + // Transform the shl to a trunc if (trunc (CI>>N)) has no loss and M-N. + // This enables to get rid of the shift in favor of a trunc which can be + // free on the target. It has the additional benefit of comparing to a + // smaller constant, which will be target friendly. + unsigned Amt = ShAmt->getLimitedValue(TypeBits-1); + if (LHSI->hasOneUse() && + Amt != 0 && RHSV.countTrailingZeros() >= Amt) { + Type *NTy = IntegerType::get(ICI.getContext(), TypeBits - Amt); + Constant *NCI = ConstantExpr::getTrunc( + ConstantExpr::getAShr(RHS, + ConstantInt::get(RHS->getType(), Amt)), + NTy); + return new ICmpInst(ICI.getPredicate(), + Builder->CreateTrunc(LHSI->getOperand(0), NTy), + NCI); + } + + break; + } + + case Instruction::LShr: // (icmp pred (shr X, ShAmt), CI) + case Instruction::AShr: { + // Handle equality comparisons of shift-by-constant. + BinaryOperator *BO = cast<BinaryOperator>(LHSI); + if (ConstantInt *ShAmt = dyn_cast<ConstantInt>(LHSI->getOperand(1))) { + if (Instruction *Res = FoldICmpShrCst(ICI, BO, ShAmt)) + return Res; + } + + // Handle exact shr's. + if (ICI.isEquality() && BO->isExact() && BO->hasOneUse()) { + if (RHSV.isMinValue()) + return new ICmpInst(ICI.getPredicate(), BO->getOperand(0), RHS); + } + break; + } + + case Instruction::SDiv: + case Instruction::UDiv: + // Fold: icmp pred ([us]div X, C1), C2 -> range test + // Fold this div into the comparison, producing a range check. + // Determine, based on the divide type, what the range is being + // checked. If there is an overflow on the low or high side, remember + // it, otherwise compute the range [low, hi) bounding the new value. + // See: InsertRangeTest above for the kinds of replacements possible. + if (ConstantInt *DivRHS = dyn_cast<ConstantInt>(LHSI->getOperand(1))) + if (Instruction *R = FoldICmpDivCst(ICI, cast<BinaryOperator>(LHSI), + DivRHS)) + return R; + break; + + case Instruction::Sub: { + ConstantInt *LHSC = dyn_cast<ConstantInt>(LHSI->getOperand(0)); + if (!LHSC) break; + const APInt &LHSV = LHSC->getValue(); + + // C1-X <u C2 -> (X|(C2-1)) == C1 + // iff C1 & (C2-1) == C2-1 + // C2 is a power of 2 + if (ICI.getPredicate() == ICmpInst::ICMP_ULT && LHSI->hasOneUse() && + RHSV.isPowerOf2() && (LHSV & (RHSV - 1)) == (RHSV - 1)) + return new ICmpInst(ICmpInst::ICMP_EQ, + Builder->CreateOr(LHSI->getOperand(1), RHSV - 1), + LHSC); + + // C1-X >u C2 -> (X|C2) != C1 + // iff C1 & C2 == C2 + // C2+1 is a power of 2 + if (ICI.getPredicate() == ICmpInst::ICMP_UGT && LHSI->hasOneUse() && + (RHSV + 1).isPowerOf2() && (LHSV & RHSV) == RHSV) + return new ICmpInst(ICmpInst::ICMP_NE, + Builder->CreateOr(LHSI->getOperand(1), RHSV), LHSC); + break; + } + + case Instruction::Add: + // Fold: icmp pred (add X, C1), C2 + if (!ICI.isEquality()) { + ConstantInt *LHSC = dyn_cast<ConstantInt>(LHSI->getOperand(1)); + if (!LHSC) break; + const APInt &LHSV = LHSC->getValue(); + + ConstantRange CR = ICI.makeConstantRange(ICI.getPredicate(), RHSV) + .subtract(LHSV); + + if (ICI.isSigned()) { + if (CR.getLower().isSignBit()) { + return new ICmpInst(ICmpInst::ICMP_SLT, LHSI->getOperand(0), + Builder->getInt(CR.getUpper())); + } else if (CR.getUpper().isSignBit()) { + return new ICmpInst(ICmpInst::ICMP_SGE, LHSI->getOperand(0), + Builder->getInt(CR.getLower())); + } + } else { + if (CR.getLower().isMinValue()) { + return new ICmpInst(ICmpInst::ICMP_ULT, LHSI->getOperand(0), + Builder->getInt(CR.getUpper())); + } else if (CR.getUpper().isMinValue()) { + return new ICmpInst(ICmpInst::ICMP_UGE, LHSI->getOperand(0), + Builder->getInt(CR.getLower())); + } + } + + // X-C1 <u C2 -> (X & -C2) == C1 + // iff C1 & (C2-1) == 0 + // C2 is a power of 2 + if (ICI.getPredicate() == ICmpInst::ICMP_ULT && LHSI->hasOneUse() && + RHSV.isPowerOf2() && (LHSV & (RHSV - 1)) == 0) + return new ICmpInst(ICmpInst::ICMP_EQ, + Builder->CreateAnd(LHSI->getOperand(0), -RHSV), + ConstantExpr::getNeg(LHSC)); + + // X-C1 >u C2 -> (X & ~C2) != C1 + // iff C1 & C2 == 0 + // C2+1 is a power of 2 + if (ICI.getPredicate() == ICmpInst::ICMP_UGT && LHSI->hasOneUse() && + (RHSV + 1).isPowerOf2() && (LHSV & RHSV) == 0) + return new ICmpInst(ICmpInst::ICMP_NE, + Builder->CreateAnd(LHSI->getOperand(0), ~RHSV), + ConstantExpr::getNeg(LHSC)); + } + break; + } + + // Simplify icmp_eq and icmp_ne instructions with integer constant RHS. + if (ICI.isEquality()) { + bool isICMP_NE = ICI.getPredicate() == ICmpInst::ICMP_NE; + + // If the first operand is (add|sub|and|or|xor|rem) with a constant, and + // the second operand is a constant, simplify a bit. + if (BinaryOperator *BO = dyn_cast<BinaryOperator>(LHSI)) { + switch (BO->getOpcode()) { + case Instruction::SRem: + // If we have a signed (X % (2^c)) == 0, turn it into an unsigned one. + if (RHSV == 0 && isa<ConstantInt>(BO->getOperand(1)) &&BO->hasOneUse()){ + const APInt &V = cast<ConstantInt>(BO->getOperand(1))->getValue(); + if (V.sgt(1) && V.isPowerOf2()) { + Value *NewRem = + Builder->CreateURem(BO->getOperand(0), BO->getOperand(1), + BO->getName()); + return new ICmpInst(ICI.getPredicate(), NewRem, + Constant::getNullValue(BO->getType())); + } + } + break; + case Instruction::Add: + // Replace ((add A, B) != C) with (A != C-B) if B & C are constants. + if (ConstantInt *BOp1C = dyn_cast<ConstantInt>(BO->getOperand(1))) { + if (BO->hasOneUse()) + return new ICmpInst(ICI.getPredicate(), BO->getOperand(0), + ConstantExpr::getSub(RHS, BOp1C)); + } else if (RHSV == 0) { + // Replace ((add A, B) != 0) with (A != -B) if A or B is + // efficiently invertible, or if the add has just this one use. + Value *BOp0 = BO->getOperand(0), *BOp1 = BO->getOperand(1); + + if (Value *NegVal = dyn_castNegVal(BOp1)) + return new ICmpInst(ICI.getPredicate(), BOp0, NegVal); + if (Value *NegVal = dyn_castNegVal(BOp0)) + return new ICmpInst(ICI.getPredicate(), NegVal, BOp1); + if (BO->hasOneUse()) { + Value *Neg = Builder->CreateNeg(BOp1); + Neg->takeName(BO); + return new ICmpInst(ICI.getPredicate(), BOp0, Neg); + } + } + break; + case Instruction::Xor: + // For the xor case, we can xor two constants together, eliminating + // the explicit xor. + if (Constant *BOC = dyn_cast<Constant>(BO->getOperand(1))) { + return new ICmpInst(ICI.getPredicate(), BO->getOperand(0), + ConstantExpr::getXor(RHS, BOC)); + } else if (RHSV == 0) { + // Replace ((xor A, B) != 0) with (A != B) + return new ICmpInst(ICI.getPredicate(), BO->getOperand(0), + BO->getOperand(1)); + } + break; + case Instruction::Sub: + // Replace ((sub A, B) != C) with (B != A-C) if A & C are constants. + if (ConstantInt *BOp0C = dyn_cast<ConstantInt>(BO->getOperand(0))) { + if (BO->hasOneUse()) + return new ICmpInst(ICI.getPredicate(), BO->getOperand(1), + ConstantExpr::getSub(BOp0C, RHS)); + } else if (RHSV == 0) { + // Replace ((sub A, B) != 0) with (A != B) + return new ICmpInst(ICI.getPredicate(), BO->getOperand(0), + BO->getOperand(1)); + } + break; + case Instruction::Or: + // If bits are being or'd in that are not present in the constant we + // are comparing against, then the comparison could never succeed! + if (ConstantInt *BOC = dyn_cast<ConstantInt>(BO->getOperand(1))) { + Constant *NotCI = ConstantExpr::getNot(RHS); + if (!ConstantExpr::getAnd(BOC, NotCI)->isNullValue()) + return ReplaceInstUsesWith(ICI, Builder->getInt1(isICMP_NE)); + } + break; + + case Instruction::And: + if (ConstantInt *BOC = dyn_cast<ConstantInt>(BO->getOperand(1))) { + // If bits are being compared against that are and'd out, then the + // comparison can never succeed! + if ((RHSV & ~BOC->getValue()) != 0) + return ReplaceInstUsesWith(ICI, Builder->getInt1(isICMP_NE)); + + // If we have ((X & C) == C), turn it into ((X & C) != 0). + if (RHS == BOC && RHSV.isPowerOf2()) + return new ICmpInst(isICMP_NE ? ICmpInst::ICMP_EQ : + ICmpInst::ICMP_NE, LHSI, + Constant::getNullValue(RHS->getType())); + + // Don't perform the following transforms if the AND has multiple uses + if (!BO->hasOneUse()) + break; + + // Replace (and X, (1 << size(X)-1) != 0) with x s< 0 + if (BOC->getValue().isSignBit()) { + Value *X = BO->getOperand(0); + Constant *Zero = Constant::getNullValue(X->getType()); + ICmpInst::Predicate pred = isICMP_NE ? + ICmpInst::ICMP_SLT : ICmpInst::ICMP_SGE; + return new ICmpInst(pred, X, Zero); + } + + // ((X & ~7) == 0) --> X < 8 + if (RHSV == 0 && isHighOnes(BOC)) { + Value *X = BO->getOperand(0); + Constant *NegX = ConstantExpr::getNeg(BOC); + ICmpInst::Predicate pred = isICMP_NE ? + ICmpInst::ICMP_UGE : ICmpInst::ICMP_ULT; + return new ICmpInst(pred, X, NegX); + } + } + break; + case Instruction::Mul: + if (RHSV == 0 && BO->hasNoSignedWrap()) { + if (ConstantInt *BOC = dyn_cast<ConstantInt>(BO->getOperand(1))) { + // The trivial case (mul X, 0) is handled by InstSimplify + // General case : (mul X, C) != 0 iff X != 0 + // (mul X, C) == 0 iff X == 0 + if (!BOC->isZero()) + return new ICmpInst(ICI.getPredicate(), BO->getOperand(0), + Constant::getNullValue(RHS->getType())); + } + } + break; + default: break; + } + } else if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(LHSI)) { + // Handle icmp {eq|ne} <intrinsic>, intcst. + switch (II->getIntrinsicID()) { + case Intrinsic::bswap: + Worklist.Add(II); + ICI.setOperand(0, II->getArgOperand(0)); + ICI.setOperand(1, Builder->getInt(RHSV.byteSwap())); + return &ICI; + case Intrinsic::ctlz: + case Intrinsic::cttz: + // ctz(A) == bitwidth(a) -> A == 0 and likewise for != + if (RHSV == RHS->getType()->getBitWidth()) { + Worklist.Add(II); + ICI.setOperand(0, II->getArgOperand(0)); + ICI.setOperand(1, ConstantInt::get(RHS->getType(), 0)); + return &ICI; + } + break; + case Intrinsic::ctpop: + // popcount(A) == 0 -> A == 0 and likewise for != + if (RHS->isZero()) { + Worklist.Add(II); + ICI.setOperand(0, II->getArgOperand(0)); + ICI.setOperand(1, RHS); + return &ICI; + } + break; + default: + break; + } + } + } + return 0; +} + +/// visitICmpInstWithCastAndCast - Handle icmp (cast x to y), (cast/cst). +/// We only handle extending casts so far. +/// +Instruction *InstCombiner::visitICmpInstWithCastAndCast(ICmpInst &ICI) { + const CastInst *LHSCI = cast<CastInst>(ICI.getOperand(0)); + Value *LHSCIOp = LHSCI->getOperand(0); + Type *SrcTy = LHSCIOp->getType(); + Type *DestTy = LHSCI->getType(); + Value *RHSCIOp; + + // Turn icmp (ptrtoint x), (ptrtoint/c) into a compare of the input if the + // integer type is the same size as the pointer type. + if (TD && LHSCI->getOpcode() == Instruction::PtrToInt && + TD->getPointerTypeSizeInBits(SrcTy) == DestTy->getIntegerBitWidth()) { + Value *RHSOp = 0; + if (Constant *RHSC = dyn_cast<Constant>(ICI.getOperand(1))) { + RHSOp = ConstantExpr::getIntToPtr(RHSC, SrcTy); + } else if (PtrToIntInst *RHSC = dyn_cast<PtrToIntInst>(ICI.getOperand(1))) { + RHSOp = RHSC->getOperand(0); + // If the pointer types don't match, insert a bitcast. + if (LHSCIOp->getType() != RHSOp->getType()) + RHSOp = Builder->CreateBitCast(RHSOp, LHSCIOp->getType()); + } + + if (RHSOp) + return new ICmpInst(ICI.getPredicate(), LHSCIOp, RHSOp); + } + + // The code below only handles extension cast instructions, so far. + // Enforce this. + if (LHSCI->getOpcode() != Instruction::ZExt && + LHSCI->getOpcode() != Instruction::SExt) + return 0; + + bool isSignedExt = LHSCI->getOpcode() == Instruction::SExt; + bool isSignedCmp = ICI.isSigned(); + + if (CastInst *CI = dyn_cast<CastInst>(ICI.getOperand(1))) { + // Not an extension from the same type? + RHSCIOp = CI->getOperand(0); + if (RHSCIOp->getType() != LHSCIOp->getType()) + return 0; + + // If the signedness of the two casts doesn't agree (i.e. one is a sext + // and the other is a zext), then we can't handle this. + if (CI->getOpcode() != LHSCI->getOpcode()) + return 0; + + // Deal with equality cases early. + if (ICI.isEquality()) + return new ICmpInst(ICI.getPredicate(), LHSCIOp, RHSCIOp); + + // A signed comparison of sign extended values simplifies into a + // signed comparison. + if (isSignedCmp && isSignedExt) + return new ICmpInst(ICI.getPredicate(), LHSCIOp, RHSCIOp); + + // The other three cases all fold into an unsigned comparison. + return new ICmpInst(ICI.getUnsignedPredicate(), LHSCIOp, RHSCIOp); + } + + // If we aren't dealing with a constant on the RHS, exit early + ConstantInt *CI = dyn_cast<ConstantInt>(ICI.getOperand(1)); + if (!CI) + return 0; + + // Compute the constant that would happen if we truncated to SrcTy then + // reextended to DestTy. + Constant *Res1 = ConstantExpr::getTrunc(CI, SrcTy); + Constant *Res2 = ConstantExpr::getCast(LHSCI->getOpcode(), + Res1, DestTy); + + // If the re-extended constant didn't change... + if (Res2 == CI) { + // Deal with equality cases early. + if (ICI.isEquality()) + return new ICmpInst(ICI.getPredicate(), LHSCIOp, Res1); + + // A signed comparison of sign extended values simplifies into a + // signed comparison. + if (isSignedExt && isSignedCmp) + return new ICmpInst(ICI.getPredicate(), LHSCIOp, Res1); + + // The other three cases all fold into an unsigned comparison. + return new ICmpInst(ICI.getUnsignedPredicate(), LHSCIOp, Res1); + } + + // The re-extended constant changed so the constant cannot be represented + // in the shorter type. Consequently, we cannot emit a simple comparison. + // All the cases that fold to true or false will have already been handled + // by SimplifyICmpInst, so only deal with the tricky case. + + if (isSignedCmp || !isSignedExt) + return 0; + + // Evaluate the comparison for LT (we invert for GT below). LE and GE cases + // should have been folded away previously and not enter in here. + + // We're performing an unsigned comp with a sign extended value. + // This is true if the input is >= 0. [aka >s -1] + Constant *NegOne = Constant::getAllOnesValue(SrcTy); + Value *Result = Builder->CreateICmpSGT(LHSCIOp, NegOne, ICI.getName()); + + // Finally, return the value computed. + if (ICI.getPredicate() == ICmpInst::ICMP_ULT) + return ReplaceInstUsesWith(ICI, Result); + + assert(ICI.getPredicate() == ICmpInst::ICMP_UGT && "ICmp should be folded!"); + return BinaryOperator::CreateNot(Result); +} + +/// ProcessUGT_ADDCST_ADD - The caller has matched a pattern of the form: +/// I = icmp ugt (add (add A, B), CI2), CI1 +/// If this is of the form: +/// sum = a + b +/// if (sum+128 >u 255) +/// Then replace it with llvm.sadd.with.overflow.i8. +/// +static Instruction *ProcessUGT_ADDCST_ADD(ICmpInst &I, Value *A, Value *B, + ConstantInt *CI2, ConstantInt *CI1, + InstCombiner &IC) { + // The transformation we're trying to do here is to transform this into an + // llvm.sadd.with.overflow. To do this, we have to replace the original add + // with a narrower add, and discard the add-with-constant that is part of the + // range check (if we can't eliminate it, this isn't profitable). + + // In order to eliminate the add-with-constant, the compare can be its only + // use. + Instruction *AddWithCst = cast<Instruction>(I.getOperand(0)); + if (!AddWithCst->hasOneUse()) return 0; + + // If CI2 is 2^7, 2^15, 2^31, then it might be an sadd.with.overflow. + if (!CI2->getValue().isPowerOf2()) return 0; + unsigned NewWidth = CI2->getValue().countTrailingZeros(); + if (NewWidth != 7 && NewWidth != 15 && NewWidth != 31) return 0; + + // The width of the new add formed is 1 more than the bias. + ++NewWidth; + + // Check to see that CI1 is an all-ones value with NewWidth bits. + if (CI1->getBitWidth() == NewWidth || + CI1->getValue() != APInt::getLowBitsSet(CI1->getBitWidth(), NewWidth)) + return 0; + + // This is only really a signed overflow check if the inputs have been + // sign-extended; check for that condition. For example, if CI2 is 2^31 and + // the operands of the add are 64 bits wide, we need at least 33 sign bits. + unsigned NeededSignBits = CI1->getBitWidth() - NewWidth + 1; + if (IC.ComputeNumSignBits(A) < NeededSignBits || + IC.ComputeNumSignBits(B) < NeededSignBits) + return 0; + + // In order to replace the original add with a narrower + // llvm.sadd.with.overflow, the only uses allowed are the add-with-constant + // and truncates that discard the high bits of the add. Verify that this is + // the case. + Instruction *OrigAdd = cast<Instruction>(AddWithCst->getOperand(0)); + for (Value::use_iterator UI = OrigAdd->use_begin(), E = OrigAdd->use_end(); + UI != E; ++UI) { + if (*UI == AddWithCst) continue; + + // Only accept truncates for now. We would really like a nice recursive + // predicate like SimplifyDemandedBits, but which goes downwards the use-def + // chain to see which bits of a value are actually demanded. If the + // original add had another add which was then immediately truncated, we + // could still do the transformation. + TruncInst *TI = dyn_cast<TruncInst>(*UI); + if (TI == 0 || + TI->getType()->getPrimitiveSizeInBits() > NewWidth) return 0; + } + + // If the pattern matches, truncate the inputs to the narrower type and + // use the sadd_with_overflow intrinsic to efficiently compute both the + // result and the overflow bit. + Module *M = I.getParent()->getParent()->getParent(); + + Type *NewType = IntegerType::get(OrigAdd->getContext(), NewWidth); + Value *F = Intrinsic::getDeclaration(M, Intrinsic::sadd_with_overflow, + NewType); + + InstCombiner::BuilderTy *Builder = IC.Builder; + + // Put the new code above the original add, in case there are any uses of the + // add between the add and the compare. + Builder->SetInsertPoint(OrigAdd); + + Value *TruncA = Builder->CreateTrunc(A, NewType, A->getName()+".trunc"); + Value *TruncB = Builder->CreateTrunc(B, NewType, B->getName()+".trunc"); + CallInst *Call = Builder->CreateCall2(F, TruncA, TruncB, "sadd"); + Value *Add = Builder->CreateExtractValue(Call, 0, "sadd.result"); + Value *ZExt = Builder->CreateZExt(Add, OrigAdd->getType()); + + // The inner add was the result of the narrow add, zero extended to the + // wider type. Replace it with the result computed by the intrinsic. + IC.ReplaceInstUsesWith(*OrigAdd, ZExt); + + // The original icmp gets replaced with the overflow value. + return ExtractValueInst::Create(Call, 1, "sadd.overflow"); +} + +static Instruction *ProcessUAddIdiom(Instruction &I, Value *OrigAddV, + InstCombiner &IC) { + // Don't bother doing this transformation for pointers, don't do it for + // vectors. + if (!isa<IntegerType>(OrigAddV->getType())) return 0; + + // If the add is a constant expr, then we don't bother transforming it. + Instruction *OrigAdd = dyn_cast<Instruction>(OrigAddV); + if (OrigAdd == 0) return 0; + + Value *LHS = OrigAdd->getOperand(0), *RHS = OrigAdd->getOperand(1); + + // Put the new code above the original add, in case there are any uses of the + // add between the add and the compare. + InstCombiner::BuilderTy *Builder = IC.Builder; + Builder->SetInsertPoint(OrigAdd); + + Module *M = I.getParent()->getParent()->getParent(); + Type *Ty = LHS->getType(); + Value *F = Intrinsic::getDeclaration(M, Intrinsic::uadd_with_overflow, Ty); + CallInst *Call = Builder->CreateCall2(F, LHS, RHS, "uadd"); + Value *Add = Builder->CreateExtractValue(Call, 0); + + IC.ReplaceInstUsesWith(*OrigAdd, Add); + + // The original icmp gets replaced with the overflow value. + return ExtractValueInst::Create(Call, 1, "uadd.overflow"); +} + +// DemandedBitsLHSMask - When performing a comparison against a constant, +// it is possible that not all the bits in the LHS are demanded. This helper +// method computes the mask that IS demanded. +static APInt DemandedBitsLHSMask(ICmpInst &I, + unsigned BitWidth, bool isSignCheck) { + if (isSignCheck) + return APInt::getSignBit(BitWidth); + + ConstantInt *CI = dyn_cast<ConstantInt>(I.getOperand(1)); + if (!CI) return APInt::getAllOnesValue(BitWidth); + const APInt &RHS = CI->getValue(); + + switch (I.getPredicate()) { + // For a UGT comparison, we don't care about any bits that + // correspond to the trailing ones of the comparand. The value of these + // bits doesn't impact the outcome of the comparison, because any value + // greater than the RHS must differ in a bit higher than these due to carry. + case ICmpInst::ICMP_UGT: { + unsigned trailingOnes = RHS.countTrailingOnes(); + APInt lowBitsSet = APInt::getLowBitsSet(BitWidth, trailingOnes); + return ~lowBitsSet; + } + + // Similarly, for a ULT comparison, we don't care about the trailing zeros. + // Any value less than the RHS must differ in a higher bit because of carries. + case ICmpInst::ICMP_ULT: { + unsigned trailingZeros = RHS.countTrailingZeros(); + APInt lowBitsSet = APInt::getLowBitsSet(BitWidth, trailingZeros); + return ~lowBitsSet; + } + + default: + return APInt::getAllOnesValue(BitWidth); + } + +} + +/// \brief Check if the order of \p Op0 and \p Op1 as operand in an ICmpInst +/// should be swapped. +/// The descision is based on how many times these two operands are reused +/// as subtract operands and their positions in those instructions. +/// The rational is that several architectures use the same instruction for +/// both subtract and cmp, thus it is better if the order of those operands +/// match. +/// \return true if Op0 and Op1 should be swapped. +static bool swapMayExposeCSEOpportunities(const Value * Op0, + const Value * Op1) { + // Filter out pointer value as those cannot appears directly in subtract. + // FIXME: we may want to go through inttoptrs or bitcasts. + if (Op0->getType()->isPointerTy()) + return false; + // Count every uses of both Op0 and Op1 in a subtract. + // Each time Op0 is the first operand, count -1: swapping is bad, the + // subtract has already the same layout as the compare. + // Each time Op0 is the second operand, count +1: swapping is good, the + // subtract has a diffrent layout as the compare. + // At the end, if the benefit is greater than 0, Op0 should come second to + // expose more CSE opportunities. + int GlobalSwapBenefits = 0; + for (Value::const_use_iterator UI = Op0->use_begin(), UIEnd = Op0->use_end(); UI != UIEnd; ++UI) { + const BinaryOperator *BinOp = dyn_cast<BinaryOperator>(*UI); + if (!BinOp || BinOp->getOpcode() != Instruction::Sub) + continue; + // If Op0 is the first argument, this is not beneficial to swap the + // arguments. + int LocalSwapBenefits = -1; + unsigned Op1Idx = 1; + if (BinOp->getOperand(Op1Idx) == Op0) { + Op1Idx = 0; + LocalSwapBenefits = 1; + } + if (BinOp->getOperand(Op1Idx) != Op1) + continue; + GlobalSwapBenefits += LocalSwapBenefits; + } + return GlobalSwapBenefits > 0; +} + +Instruction *InstCombiner::visitICmpInst(ICmpInst &I) { + bool Changed = false; + Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); + unsigned Op0Cplxity = getComplexity(Op0); + unsigned Op1Cplxity = getComplexity(Op1); + + /// Orders the operands of the compare so that they are listed from most + /// complex to least complex. This puts constants before unary operators, + /// before binary operators. + if (Op0Cplxity < Op1Cplxity || + (Op0Cplxity == Op1Cplxity && + swapMayExposeCSEOpportunities(Op0, Op1))) { + I.swapOperands(); + std::swap(Op0, Op1); + Changed = true; + } + + if (Value *V = SimplifyICmpInst(I.getPredicate(), Op0, Op1, TD)) + return ReplaceInstUsesWith(I, V); + + // comparing -val or val with non-zero is the same as just comparing val + // ie, abs(val) != 0 -> val != 0 + if (I.getPredicate() == ICmpInst::ICMP_NE && match(Op1, m_Zero())) + { + Value *Cond, *SelectTrue, *SelectFalse; + if (match(Op0, m_Select(m_Value(Cond), m_Value(SelectTrue), + m_Value(SelectFalse)))) { + if (Value *V = dyn_castNegVal(SelectTrue)) { + if (V == SelectFalse) + return CmpInst::Create(Instruction::ICmp, I.getPredicate(), V, Op1); + } + else if (Value *V = dyn_castNegVal(SelectFalse)) { + if (V == SelectTrue) + return CmpInst::Create(Instruction::ICmp, I.getPredicate(), V, Op1); + } + } + } + + Type *Ty = Op0->getType(); + + // icmp's with boolean values can always be turned into bitwise operations + if (Ty->isIntegerTy(1)) { + switch (I.getPredicate()) { + default: llvm_unreachable("Invalid icmp instruction!"); + case ICmpInst::ICMP_EQ: { // icmp eq i1 A, B -> ~(A^B) + Value *Xor = Builder->CreateXor(Op0, Op1, I.getName()+"tmp"); + return BinaryOperator::CreateNot(Xor); + } + case ICmpInst::ICMP_NE: // icmp eq i1 A, B -> A^B + return BinaryOperator::CreateXor(Op0, Op1); + + case ICmpInst::ICMP_UGT: + std::swap(Op0, Op1); // Change icmp ugt -> icmp ult + // FALL THROUGH + case ICmpInst::ICMP_ULT:{ // icmp ult i1 A, B -> ~A & B + Value *Not = Builder->CreateNot(Op0, I.getName()+"tmp"); + return BinaryOperator::CreateAnd(Not, Op1); + } + case ICmpInst::ICMP_SGT: + std::swap(Op0, Op1); // Change icmp sgt -> icmp slt + // FALL THROUGH + case ICmpInst::ICMP_SLT: { // icmp slt i1 A, B -> A & ~B + Value *Not = Builder->CreateNot(Op1, I.getName()+"tmp"); + return BinaryOperator::CreateAnd(Not, Op0); + } + case ICmpInst::ICMP_UGE: + std::swap(Op0, Op1); // Change icmp uge -> icmp ule + // FALL THROUGH + case ICmpInst::ICMP_ULE: { // icmp ule i1 A, B -> ~A | B + Value *Not = Builder->CreateNot(Op0, I.getName()+"tmp"); + return BinaryOperator::CreateOr(Not, Op1); + } + case ICmpInst::ICMP_SGE: + std::swap(Op0, Op1); // Change icmp sge -> icmp sle + // FALL THROUGH + case ICmpInst::ICMP_SLE: { // icmp sle i1 A, B -> A | ~B + Value *Not = Builder->CreateNot(Op1, I.getName()+"tmp"); + return BinaryOperator::CreateOr(Not, Op0); + } + } + } + + unsigned BitWidth = 0; + if (Ty->isIntOrIntVectorTy()) + BitWidth = Ty->getScalarSizeInBits(); + else if (TD) // Pointers require TD info to get their size. + BitWidth = TD->getTypeSizeInBits(Ty->getScalarType()); + + bool isSignBit = false; + + // See if we are doing a comparison with a constant. + if (ConstantInt *CI = dyn_cast<ConstantInt>(Op1)) { + Value *A = 0, *B = 0; + + // Match the following pattern, which is a common idiom when writing + // overflow-safe integer arithmetic function. The source performs an + // addition in wider type, and explicitly checks for overflow using + // comparisons against INT_MIN and INT_MAX. Simplify this by using the + // sadd_with_overflow intrinsic. + // + // TODO: This could probably be generalized to handle other overflow-safe + // operations if we worked out the formulas to compute the appropriate + // magic constants. + // + // sum = a + b + // if (sum+128 >u 255) ... -> llvm.sadd.with.overflow.i8 + { + ConstantInt *CI2; // I = icmp ugt (add (add A, B), CI2), CI + if (I.getPredicate() == ICmpInst::ICMP_UGT && + match(Op0, m_Add(m_Add(m_Value(A), m_Value(B)), m_ConstantInt(CI2)))) + if (Instruction *Res = ProcessUGT_ADDCST_ADD(I, A, B, CI2, CI, *this)) + return Res; + } + + // (icmp ne/eq (sub A B) 0) -> (icmp ne/eq A, B) + if (I.isEquality() && CI->isZero() && + match(Op0, m_Sub(m_Value(A), m_Value(B)))) { + // (icmp cond A B) if cond is equality + return new ICmpInst(I.getPredicate(), A, B); + } + + // If we have an icmp le or icmp ge instruction, turn it into the + // appropriate icmp lt or icmp gt instruction. This allows us to rely on + // them being folded in the code below. The SimplifyICmpInst code has + // already handled the edge cases for us, so we just assert on them. + switch (I.getPredicate()) { + default: break; + case ICmpInst::ICMP_ULE: + assert(!CI->isMaxValue(false)); // A <=u MAX -> TRUE + return new ICmpInst(ICmpInst::ICMP_ULT, Op0, + Builder->getInt(CI->getValue()+1)); + case ICmpInst::ICMP_SLE: + assert(!CI->isMaxValue(true)); // A <=s MAX -> TRUE + return new ICmpInst(ICmpInst::ICMP_SLT, Op0, + Builder->getInt(CI->getValue()+1)); + case ICmpInst::ICMP_UGE: + assert(!CI->isMinValue(false)); // A >=u MIN -> TRUE + return new ICmpInst(ICmpInst::ICMP_UGT, Op0, + Builder->getInt(CI->getValue()-1)); + case ICmpInst::ICMP_SGE: + assert(!CI->isMinValue(true)); // A >=s MIN -> TRUE + return new ICmpInst(ICmpInst::ICMP_SGT, Op0, + Builder->getInt(CI->getValue()-1)); + } + + // If this comparison is a normal comparison, it demands all + // bits, if it is a sign bit comparison, it only demands the sign bit. + bool UnusedBit; + isSignBit = isSignBitCheck(I.getPredicate(), CI, UnusedBit); + } + + // See if we can fold the comparison based on range information we can get + // by checking whether bits are known to be zero or one in the input. + if (BitWidth != 0) { + APInt Op0KnownZero(BitWidth, 0), Op0KnownOne(BitWidth, 0); + APInt Op1KnownZero(BitWidth, 0), Op1KnownOne(BitWidth, 0); + + if (SimplifyDemandedBits(I.getOperandUse(0), + DemandedBitsLHSMask(I, BitWidth, isSignBit), + Op0KnownZero, Op0KnownOne, 0)) + return &I; + if (SimplifyDemandedBits(I.getOperandUse(1), + APInt::getAllOnesValue(BitWidth), + Op1KnownZero, Op1KnownOne, 0)) + return &I; + + // Given the known and unknown bits, compute a range that the LHS could be + // in. Compute the Min, Max and RHS values based on the known bits. For the + // EQ and NE we use unsigned values. + APInt Op0Min(BitWidth, 0), Op0Max(BitWidth, 0); + APInt Op1Min(BitWidth, 0), Op1Max(BitWidth, 0); + if (I.isSigned()) { + ComputeSignedMinMaxValuesFromKnownBits(Op0KnownZero, Op0KnownOne, + Op0Min, Op0Max); + ComputeSignedMinMaxValuesFromKnownBits(Op1KnownZero, Op1KnownOne, + Op1Min, Op1Max); + } else { + ComputeUnsignedMinMaxValuesFromKnownBits(Op0KnownZero, Op0KnownOne, + Op0Min, Op0Max); + ComputeUnsignedMinMaxValuesFromKnownBits(Op1KnownZero, Op1KnownOne, + Op1Min, Op1Max); + } + + // If Min and Max are known to be the same, then SimplifyDemandedBits + // figured out that the LHS is a constant. Just constant fold this now so + // that code below can assume that Min != Max. + if (!isa<Constant>(Op0) && Op0Min == Op0Max) + return new ICmpInst(I.getPredicate(), + ConstantInt::get(Op0->getType(), Op0Min), Op1); + if (!isa<Constant>(Op1) && Op1Min == Op1Max) + return new ICmpInst(I.getPredicate(), Op0, + ConstantInt::get(Op1->getType(), Op1Min)); + + // Based on the range information we know about the LHS, see if we can + // simplify this comparison. For example, (x&4) < 8 is always true. + switch (I.getPredicate()) { + default: llvm_unreachable("Unknown icmp opcode!"); + case ICmpInst::ICMP_EQ: { + if (Op0Max.ult(Op1Min) || Op0Min.ugt(Op1Max)) + return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getType())); + + // If all bits are known zero except for one, then we know at most one + // bit is set. If the comparison is against zero, then this is a check + // to see if *that* bit is set. + APInt Op0KnownZeroInverted = ~Op0KnownZero; + if (~Op1KnownZero == 0 && Op0KnownZeroInverted.isPowerOf2()) { + // If the LHS is an AND with the same constant, look through it. + Value *LHS = 0; + ConstantInt *LHSC = 0; + if (!match(Op0, m_And(m_Value(LHS), m_ConstantInt(LHSC))) || + LHSC->getValue() != Op0KnownZeroInverted) + LHS = Op0; + + // If the LHS is 1 << x, and we know the result is a power of 2 like 8, + // then turn "((1 << x)&8) == 0" into "x != 3". + Value *X = 0; + if (match(LHS, m_Shl(m_One(), m_Value(X)))) { + unsigned CmpVal = Op0KnownZeroInverted.countTrailingZeros(); + return new ICmpInst(ICmpInst::ICMP_NE, X, + ConstantInt::get(X->getType(), CmpVal)); + } + + // If the LHS is 8 >>u x, and we know the result is a power of 2 like 1, + // then turn "((8 >>u x)&1) == 0" into "x != 3". + const APInt *CI; + if (Op0KnownZeroInverted == 1 && + match(LHS, m_LShr(m_Power2(CI), m_Value(X)))) + return new ICmpInst(ICmpInst::ICMP_NE, X, + ConstantInt::get(X->getType(), + CI->countTrailingZeros())); + } + + break; + } + case ICmpInst::ICMP_NE: { + if (Op0Max.ult(Op1Min) || Op0Min.ugt(Op1Max)) + return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getType())); + + // If all bits are known zero except for one, then we know at most one + // bit is set. If the comparison is against zero, then this is a check + // to see if *that* bit is set. + APInt Op0KnownZeroInverted = ~Op0KnownZero; + if (~Op1KnownZero == 0 && Op0KnownZeroInverted.isPowerOf2()) { + // If the LHS is an AND with the same constant, look through it. + Value *LHS = 0; + ConstantInt *LHSC = 0; + if (!match(Op0, m_And(m_Value(LHS), m_ConstantInt(LHSC))) || + LHSC->getValue() != Op0KnownZeroInverted) + LHS = Op0; + + // If the LHS is 1 << x, and we know the result is a power of 2 like 8, + // then turn "((1 << x)&8) != 0" into "x == 3". + Value *X = 0; + if (match(LHS, m_Shl(m_One(), m_Value(X)))) { + unsigned CmpVal = Op0KnownZeroInverted.countTrailingZeros(); + return new ICmpInst(ICmpInst::ICMP_EQ, X, + ConstantInt::get(X->getType(), CmpVal)); + } + + // If the LHS is 8 >>u x, and we know the result is a power of 2 like 1, + // then turn "((8 >>u x)&1) != 0" into "x == 3". + const APInt *CI; + if (Op0KnownZeroInverted == 1 && + match(LHS, m_LShr(m_Power2(CI), m_Value(X)))) + return new ICmpInst(ICmpInst::ICMP_EQ, X, + ConstantInt::get(X->getType(), + CI->countTrailingZeros())); + } + + break; + } + case ICmpInst::ICMP_ULT: + if (Op0Max.ult(Op1Min)) // A <u B -> true if max(A) < min(B) + return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getType())); + if (Op0Min.uge(Op1Max)) // A <u B -> false if min(A) >= max(B) + return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getType())); + if (Op1Min == Op0Max) // A <u B -> A != B if max(A) == min(B) + return new ICmpInst(ICmpInst::ICMP_NE, Op0, Op1); + if (ConstantInt *CI = dyn_cast<ConstantInt>(Op1)) { + if (Op1Max == Op0Min+1) // A <u C -> A == C-1 if min(A)+1 == C + return new ICmpInst(ICmpInst::ICMP_EQ, Op0, + Builder->getInt(CI->getValue()-1)); + + // (x <u 2147483648) -> (x >s -1) -> true if sign bit clear + if (CI->isMinValue(true)) + return new ICmpInst(ICmpInst::ICMP_SGT, Op0, + Constant::getAllOnesValue(Op0->getType())); + } + break; + case ICmpInst::ICMP_UGT: + if (Op0Min.ugt(Op1Max)) // A >u B -> true if min(A) > max(B) + return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getType())); + if (Op0Max.ule(Op1Min)) // A >u B -> false if max(A) <= max(B) + return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getType())); + + if (Op1Max == Op0Min) // A >u B -> A != B if min(A) == max(B) + return new ICmpInst(ICmpInst::ICMP_NE, Op0, Op1); + if (ConstantInt *CI = dyn_cast<ConstantInt>(Op1)) { + if (Op1Min == Op0Max-1) // A >u C -> A == C+1 if max(a)-1 == C + return new ICmpInst(ICmpInst::ICMP_EQ, Op0, + Builder->getInt(CI->getValue()+1)); + + // (x >u 2147483647) -> (x <s 0) -> true if sign bit set + if (CI->isMaxValue(true)) + return new ICmpInst(ICmpInst::ICMP_SLT, Op0, + Constant::getNullValue(Op0->getType())); + } + break; + case ICmpInst::ICMP_SLT: + if (Op0Max.slt(Op1Min)) // A <s B -> true if max(A) < min(C) + return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getType())); + if (Op0Min.sge(Op1Max)) // A <s B -> false if min(A) >= max(C) + return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getType())); + if (Op1Min == Op0Max) // A <s B -> A != B if max(A) == min(B) + return new ICmpInst(ICmpInst::ICMP_NE, Op0, Op1); + if (ConstantInt *CI = dyn_cast<ConstantInt>(Op1)) { + if (Op1Max == Op0Min+1) // A <s C -> A == C-1 if min(A)+1 == C + return new ICmpInst(ICmpInst::ICMP_EQ, Op0, + Builder->getInt(CI->getValue()-1)); + } + break; + case ICmpInst::ICMP_SGT: + if (Op0Min.sgt(Op1Max)) // A >s B -> true if min(A) > max(B) + return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getType())); + if (Op0Max.sle(Op1Min)) // A >s B -> false if max(A) <= min(B) + return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getType())); + + if (Op1Max == Op0Min) // A >s B -> A != B if min(A) == max(B) + return new ICmpInst(ICmpInst::ICMP_NE, Op0, Op1); + if (ConstantInt *CI = dyn_cast<ConstantInt>(Op1)) { + if (Op1Min == Op0Max-1) // A >s C -> A == C+1 if max(A)-1 == C + return new ICmpInst(ICmpInst::ICMP_EQ, Op0, + Builder->getInt(CI->getValue()+1)); + } + break; + case ICmpInst::ICMP_SGE: + assert(!isa<ConstantInt>(Op1) && "ICMP_SGE with ConstantInt not folded!"); + if (Op0Min.sge(Op1Max)) // A >=s B -> true if min(A) >= max(B) + return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getType())); + if (Op0Max.slt(Op1Min)) // A >=s B -> false if max(A) < min(B) + return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getType())); + break; + case ICmpInst::ICMP_SLE: + assert(!isa<ConstantInt>(Op1) && "ICMP_SLE with ConstantInt not folded!"); + if (Op0Max.sle(Op1Min)) // A <=s B -> true if max(A) <= min(B) + return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getType())); + if (Op0Min.sgt(Op1Max)) // A <=s B -> false if min(A) > max(B) + return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getType())); + break; + case ICmpInst::ICMP_UGE: + assert(!isa<ConstantInt>(Op1) && "ICMP_UGE with ConstantInt not folded!"); + if (Op0Min.uge(Op1Max)) // A >=u B -> true if min(A) >= max(B) + return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getType())); + if (Op0Max.ult(Op1Min)) // A >=u B -> false if max(A) < min(B) + return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getType())); + break; + case ICmpInst::ICMP_ULE: + assert(!isa<ConstantInt>(Op1) && "ICMP_ULE with ConstantInt not folded!"); + if (Op0Max.ule(Op1Min)) // A <=u B -> true if max(A) <= min(B) + return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getType())); + if (Op0Min.ugt(Op1Max)) // A <=u B -> false if min(A) > max(B) + return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getType())); + break; + } + + // Turn a signed comparison into an unsigned one if both operands + // are known to have the same sign. + if (I.isSigned() && + ((Op0KnownZero.isNegative() && Op1KnownZero.isNegative()) || + (Op0KnownOne.isNegative() && Op1KnownOne.isNegative()))) + return new ICmpInst(I.getUnsignedPredicate(), Op0, Op1); + } + + // Test if the ICmpInst instruction is used exclusively by a select as + // part of a minimum or maximum operation. If so, refrain from doing + // any other folding. This helps out other analyses which understand + // non-obfuscated minimum and maximum idioms, such as ScalarEvolution + // and CodeGen. And in this case, at least one of the comparison + // operands has at least one user besides the compare (the select), + // which would often largely negate the benefit of folding anyway. + if (I.hasOneUse()) + if (SelectInst *SI = dyn_cast<SelectInst>(*I.use_begin())) + if ((SI->getOperand(1) == Op0 && SI->getOperand(2) == Op1) || + (SI->getOperand(2) == Op0 && SI->getOperand(1) == Op1)) + return 0; + + // See if we are doing a comparison between a constant and an instruction that + // can be folded into the comparison. + if (ConstantInt *CI = dyn_cast<ConstantInt>(Op1)) { + // Since the RHS is a ConstantInt (CI), if the left hand side is an + // instruction, see if that instruction also has constants so that the + // instruction can be folded into the icmp + if (Instruction *LHSI = dyn_cast<Instruction>(Op0)) + if (Instruction *Res = visitICmpInstWithInstAndIntCst(I, LHSI, CI)) + return Res; + } + + // Handle icmp with constant (but not simple integer constant) RHS + if (Constant *RHSC = dyn_cast<Constant>(Op1)) { + if (Instruction *LHSI = dyn_cast<Instruction>(Op0)) + switch (LHSI->getOpcode()) { + case Instruction::GetElementPtr: + // icmp pred GEP (P, int 0, int 0, int 0), null -> icmp pred P, null + if (RHSC->isNullValue() && + cast<GetElementPtrInst>(LHSI)->hasAllZeroIndices()) + return new ICmpInst(I.getPredicate(), LHSI->getOperand(0), + Constant::getNullValue(LHSI->getOperand(0)->getType())); + break; + case Instruction::PHI: + // Only fold icmp into the PHI if the phi and icmp are in the same + // block. If in the same block, we're encouraging jump threading. If + // not, we are just pessimizing the code by making an i1 phi. + if (LHSI->getParent() == I.getParent()) + if (Instruction *NV = FoldOpIntoPhi(I)) + return NV; + break; + case Instruction::Select: { + // If either operand of the select is a constant, we can fold the + // comparison into the select arms, which will cause one to be + // constant folded and the select turned into a bitwise or. + Value *Op1 = 0, *Op2 = 0; + if (Constant *C = dyn_cast<Constant>(LHSI->getOperand(1))) + Op1 = ConstantExpr::getICmp(I.getPredicate(), C, RHSC); + if (Constant *C = dyn_cast<Constant>(LHSI->getOperand(2))) + Op2 = ConstantExpr::getICmp(I.getPredicate(), C, RHSC); + + // We only want to perform this transformation if it will not lead to + // additional code. This is true if either both sides of the select + // fold to a constant (in which case the icmp is replaced with a select + // which will usually simplify) or this is the only user of the + // select (in which case we are trading a select+icmp for a simpler + // select+icmp). + if ((Op1 && Op2) || (LHSI->hasOneUse() && (Op1 || Op2))) { + if (!Op1) + Op1 = Builder->CreateICmp(I.getPredicate(), LHSI->getOperand(1), + RHSC, I.getName()); + if (!Op2) + Op2 = Builder->CreateICmp(I.getPredicate(), LHSI->getOperand(2), + RHSC, I.getName()); + return SelectInst::Create(LHSI->getOperand(0), Op1, Op2); + } + break; + } + case Instruction::IntToPtr: + // icmp pred inttoptr(X), null -> icmp pred X, 0 + if (RHSC->isNullValue() && TD && + TD->getIntPtrType(RHSC->getType()) == + LHSI->getOperand(0)->getType()) + return new ICmpInst(I.getPredicate(), LHSI->getOperand(0), + Constant::getNullValue(LHSI->getOperand(0)->getType())); + break; + + case Instruction::Load: + // Try to optimize things like "A[i] > 4" to index computations. + if (GetElementPtrInst *GEP = + dyn_cast<GetElementPtrInst>(LHSI->getOperand(0))) { + if (GlobalVariable *GV = dyn_cast<GlobalVariable>(GEP->getOperand(0))) + if (GV->isConstant() && GV->hasDefinitiveInitializer() && + !cast<LoadInst>(LHSI)->isVolatile()) + if (Instruction *Res = FoldCmpLoadFromIndexedGlobal(GEP, GV, I)) + return Res; + } + break; + } + } + + // If we can optimize a 'icmp GEP, P' or 'icmp P, GEP', do so now. + if (GEPOperator *GEP = dyn_cast<GEPOperator>(Op0)) + if (Instruction *NI = FoldGEPICmp(GEP, Op1, I.getPredicate(), I)) + return NI; + if (GEPOperator *GEP = dyn_cast<GEPOperator>(Op1)) + if (Instruction *NI = FoldGEPICmp(GEP, Op0, + ICmpInst::getSwappedPredicate(I.getPredicate()), I)) + return NI; + + // Test to see if the operands of the icmp are casted versions of other + // values. If the ptr->ptr cast can be stripped off both arguments, we do so + // now. + if (BitCastInst *CI = dyn_cast<BitCastInst>(Op0)) { + if (Op0->getType()->isPointerTy() && + (isa<Constant>(Op1) || isa<BitCastInst>(Op1))) { + // We keep moving the cast from the left operand over to the right + // operand, where it can often be eliminated completely. + Op0 = CI->getOperand(0); + + // If operand #1 is a bitcast instruction, it must also be a ptr->ptr cast + // so eliminate it as well. + if (BitCastInst *CI2 = dyn_cast<BitCastInst>(Op1)) + Op1 = CI2->getOperand(0); + + // If Op1 is a constant, we can fold the cast into the constant. + if (Op0->getType() != Op1->getType()) { + if (Constant *Op1C = dyn_cast<Constant>(Op1)) { + Op1 = ConstantExpr::getBitCast(Op1C, Op0->getType()); + } else { + // Otherwise, cast the RHS right before the icmp + Op1 = Builder->CreateBitCast(Op1, Op0->getType()); + } + } + return new ICmpInst(I.getPredicate(), Op0, Op1); + } + } + + if (isa<CastInst>(Op0)) { + // Handle the special case of: icmp (cast bool to X), <cst> + // This comes up when you have code like + // int X = A < B; + // if (X) ... + // For generality, we handle any zero-extension of any operand comparison + // with a constant or another cast from the same type. + if (isa<Constant>(Op1) || isa<CastInst>(Op1)) + if (Instruction *R = visitICmpInstWithCastAndCast(I)) + return R; + } + + // Special logic for binary operators. + BinaryOperator *BO0 = dyn_cast<BinaryOperator>(Op0); + BinaryOperator *BO1 = dyn_cast<BinaryOperator>(Op1); + if (BO0 || BO1) { + CmpInst::Predicate Pred = I.getPredicate(); + bool NoOp0WrapProblem = false, NoOp1WrapProblem = false; + if (BO0 && isa<OverflowingBinaryOperator>(BO0)) + NoOp0WrapProblem = ICmpInst::isEquality(Pred) || + (CmpInst::isUnsigned(Pred) && BO0->hasNoUnsignedWrap()) || + (CmpInst::isSigned(Pred) && BO0->hasNoSignedWrap()); + if (BO1 && isa<OverflowingBinaryOperator>(BO1)) + NoOp1WrapProblem = ICmpInst::isEquality(Pred) || + (CmpInst::isUnsigned(Pred) && BO1->hasNoUnsignedWrap()) || + (CmpInst::isSigned(Pred) && BO1->hasNoSignedWrap()); + + // Analyze the case when either Op0 or Op1 is an add instruction. + // Op0 = A + B (or A and B are null); Op1 = C + D (or C and D are null). + Value *A = 0, *B = 0, *C = 0, *D = 0; + if (BO0 && BO0->getOpcode() == Instruction::Add) + A = BO0->getOperand(0), B = BO0->getOperand(1); + if (BO1 && BO1->getOpcode() == Instruction::Add) + C = BO1->getOperand(0), D = BO1->getOperand(1); + + // icmp (X+Y), X -> icmp Y, 0 for equalities or if there is no overflow. + if ((A == Op1 || B == Op1) && NoOp0WrapProblem) + return new ICmpInst(Pred, A == Op1 ? B : A, + Constant::getNullValue(Op1->getType())); + + // icmp X, (X+Y) -> icmp 0, Y for equalities or if there is no overflow. + if ((C == Op0 || D == Op0) && NoOp1WrapProblem) + return new ICmpInst(Pred, Constant::getNullValue(Op0->getType()), + C == Op0 ? D : C); + + // icmp (X+Y), (X+Z) -> icmp Y, Z for equalities or if there is no overflow. + if (A && C && (A == C || A == D || B == C || B == D) && + NoOp0WrapProblem && NoOp1WrapProblem && + // Try not to increase register pressure. + BO0->hasOneUse() && BO1->hasOneUse()) { + // Determine Y and Z in the form icmp (X+Y), (X+Z). + Value *Y, *Z; + if (A == C) { + // C + B == C + D -> B == D + Y = B; + Z = D; + } else if (A == D) { + // D + B == C + D -> B == C + Y = B; + Z = C; + } else if (B == C) { + // A + C == C + D -> A == D + Y = A; + Z = D; + } else { + assert(B == D); + // A + D == C + D -> A == C + Y = A; + Z = C; + } + return new ICmpInst(Pred, Y, Z); + } + + // icmp slt (X + -1), Y -> icmp sle X, Y + if (A && NoOp0WrapProblem && Pred == CmpInst::ICMP_SLT && + match(B, m_AllOnes())) + return new ICmpInst(CmpInst::ICMP_SLE, A, Op1); + + // icmp sge (X + -1), Y -> icmp sgt X, Y + if (A && NoOp0WrapProblem && Pred == CmpInst::ICMP_SGE && + match(B, m_AllOnes())) + return new ICmpInst(CmpInst::ICMP_SGT, A, Op1); + + // icmp sle (X + 1), Y -> icmp slt X, Y + if (A && NoOp0WrapProblem && Pred == CmpInst::ICMP_SLE && + match(B, m_One())) + return new ICmpInst(CmpInst::ICMP_SLT, A, Op1); + + // icmp sgt (X + 1), Y -> icmp sge X, Y + if (A && NoOp0WrapProblem && Pred == CmpInst::ICMP_SGT && + match(B, m_One())) + return new ICmpInst(CmpInst::ICMP_SGE, A, Op1); + + // if C1 has greater magnitude than C2: + // icmp (X + C1), (Y + C2) -> icmp (X + C3), Y + // s.t. C3 = C1 - C2 + // + // if C2 has greater magnitude than C1: + // icmp (X + C1), (Y + C2) -> icmp X, (Y + C3) + // s.t. C3 = C2 - C1 + if (A && C && NoOp0WrapProblem && NoOp1WrapProblem && + (BO0->hasOneUse() || BO1->hasOneUse()) && !I.isUnsigned()) + if (ConstantInt *C1 = dyn_cast<ConstantInt>(B)) + if (ConstantInt *C2 = dyn_cast<ConstantInt>(D)) { + const APInt &AP1 = C1->getValue(); + const APInt &AP2 = C2->getValue(); + if (AP1.isNegative() == AP2.isNegative()) { + APInt AP1Abs = C1->getValue().abs(); + APInt AP2Abs = C2->getValue().abs(); + if (AP1Abs.uge(AP2Abs)) { + ConstantInt *C3 = Builder->getInt(AP1 - AP2); + Value *NewAdd = Builder->CreateNSWAdd(A, C3); + return new ICmpInst(Pred, NewAdd, C); + } else { + ConstantInt *C3 = Builder->getInt(AP2 - AP1); + Value *NewAdd = Builder->CreateNSWAdd(C, C3); + return new ICmpInst(Pred, A, NewAdd); + } + } + } + + + // Analyze the case when either Op0 or Op1 is a sub instruction. + // Op0 = A - B (or A and B are null); Op1 = C - D (or C and D are null). + A = 0; B = 0; C = 0; D = 0; + if (BO0 && BO0->getOpcode() == Instruction::Sub) + A = BO0->getOperand(0), B = BO0->getOperand(1); + if (BO1 && BO1->getOpcode() == Instruction::Sub) + C = BO1->getOperand(0), D = BO1->getOperand(1); + + // icmp (X-Y), X -> icmp 0, Y for equalities or if there is no overflow. + if (A == Op1 && NoOp0WrapProblem) + return new ICmpInst(Pred, Constant::getNullValue(Op1->getType()), B); + + // icmp X, (X-Y) -> icmp Y, 0 for equalities or if there is no overflow. + if (C == Op0 && NoOp1WrapProblem) + return new ICmpInst(Pred, D, Constant::getNullValue(Op0->getType())); + + // icmp (Y-X), (Z-X) -> icmp Y, Z for equalities or if there is no overflow. + if (B && D && B == D && NoOp0WrapProblem && NoOp1WrapProblem && + // Try not to increase register pressure. + BO0->hasOneUse() && BO1->hasOneUse()) + return new ICmpInst(Pred, A, C); + + // icmp (X-Y), (X-Z) -> icmp Z, Y for equalities or if there is no overflow. + if (A && C && A == C && NoOp0WrapProblem && NoOp1WrapProblem && + // Try not to increase register pressure. + BO0->hasOneUse() && BO1->hasOneUse()) + return new ICmpInst(Pred, D, B); + + BinaryOperator *SRem = NULL; + // icmp (srem X, Y), Y + if (BO0 && BO0->getOpcode() == Instruction::SRem && + Op1 == BO0->getOperand(1)) + SRem = BO0; + // icmp Y, (srem X, Y) + else if (BO1 && BO1->getOpcode() == Instruction::SRem && + Op0 == BO1->getOperand(1)) + SRem = BO1; + if (SRem) { + // We don't check hasOneUse to avoid increasing register pressure because + // the value we use is the same value this instruction was already using. + switch (SRem == BO0 ? ICmpInst::getSwappedPredicate(Pred) : Pred) { + default: break; + case ICmpInst::ICMP_EQ: + return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getType())); + case ICmpInst::ICMP_NE: + return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getType())); + case ICmpInst::ICMP_SGT: + case ICmpInst::ICMP_SGE: + return new ICmpInst(ICmpInst::ICMP_SGT, SRem->getOperand(1), + Constant::getAllOnesValue(SRem->getType())); + case ICmpInst::ICMP_SLT: + case ICmpInst::ICMP_SLE: + return new ICmpInst(ICmpInst::ICMP_SLT, SRem->getOperand(1), + Constant::getNullValue(SRem->getType())); + } + } + + if (BO0 && BO1 && BO0->getOpcode() == BO1->getOpcode() && + BO0->hasOneUse() && BO1->hasOneUse() && + BO0->getOperand(1) == BO1->getOperand(1)) { + switch (BO0->getOpcode()) { + default: break; + case Instruction::Add: + case Instruction::Sub: + case Instruction::Xor: + if (I.isEquality()) // a+x icmp eq/ne b+x --> a icmp b + return new ICmpInst(I.getPredicate(), BO0->getOperand(0), + BO1->getOperand(0)); + // icmp u/s (a ^ signbit), (b ^ signbit) --> icmp s/u a, b + if (ConstantInt *CI = dyn_cast<ConstantInt>(BO0->getOperand(1))) { + if (CI->getValue().isSignBit()) { + ICmpInst::Predicate Pred = I.isSigned() + ? I.getUnsignedPredicate() + : I.getSignedPredicate(); + return new ICmpInst(Pred, BO0->getOperand(0), + BO1->getOperand(0)); + } + + if (CI->isMaxValue(true)) { + ICmpInst::Predicate Pred = I.isSigned() + ? I.getUnsignedPredicate() + : I.getSignedPredicate(); + Pred = I.getSwappedPredicate(Pred); + return new ICmpInst(Pred, BO0->getOperand(0), + BO1->getOperand(0)); + } + } + break; + case Instruction::Mul: + if (!I.isEquality()) + break; + + if (ConstantInt *CI = dyn_cast<ConstantInt>(BO0->getOperand(1))) { + // a * Cst icmp eq/ne b * Cst --> a & Mask icmp b & Mask + // Mask = -1 >> count-trailing-zeros(Cst). + if (!CI->isZero() && !CI->isOne()) { + const APInt &AP = CI->getValue(); + ConstantInt *Mask = ConstantInt::get(I.getContext(), + APInt::getLowBitsSet(AP.getBitWidth(), + AP.getBitWidth() - + AP.countTrailingZeros())); + Value *And1 = Builder->CreateAnd(BO0->getOperand(0), Mask); + Value *And2 = Builder->CreateAnd(BO1->getOperand(0), Mask); + return new ICmpInst(I.getPredicate(), And1, And2); + } + } + break; + case Instruction::UDiv: + case Instruction::LShr: + if (I.isSigned()) + break; + // fall-through + case Instruction::SDiv: + case Instruction::AShr: + if (!BO0->isExact() || !BO1->isExact()) + break; + return new ICmpInst(I.getPredicate(), BO0->getOperand(0), + BO1->getOperand(0)); + case Instruction::Shl: { + bool NUW = BO0->hasNoUnsignedWrap() && BO1->hasNoUnsignedWrap(); + bool NSW = BO0->hasNoSignedWrap() && BO1->hasNoSignedWrap(); + if (!NUW && !NSW) + break; + if (!NSW && I.isSigned()) + break; + return new ICmpInst(I.getPredicate(), BO0->getOperand(0), + BO1->getOperand(0)); + } + } + } + } + + { Value *A, *B; + // Transform (A & ~B) == 0 --> (A & B) != 0 + // and (A & ~B) != 0 --> (A & B) == 0 + // if A is a power of 2. + if (match(Op0, m_And(m_Value(A), m_Not(m_Value(B)))) && + match(Op1, m_Zero()) && isKnownToBeAPowerOfTwo(A) && I.isEquality()) + return new ICmpInst(I.getInversePredicate(), + Builder->CreateAnd(A, B), + Op1); + + // ~x < ~y --> y < x + // ~x < cst --> ~cst < x + if (match(Op0, m_Not(m_Value(A)))) { + if (match(Op1, m_Not(m_Value(B)))) + return new ICmpInst(I.getPredicate(), B, A); + if (ConstantInt *RHSC = dyn_cast<ConstantInt>(Op1)) + return new ICmpInst(I.getPredicate(), ConstantExpr::getNot(RHSC), A); + } + + // (a+b) <u a --> llvm.uadd.with.overflow. + // (a+b) <u b --> llvm.uadd.with.overflow. + if (I.getPredicate() == ICmpInst::ICMP_ULT && + match(Op0, m_Add(m_Value(A), m_Value(B))) && + (Op1 == A || Op1 == B)) + if (Instruction *R = ProcessUAddIdiom(I, Op0, *this)) + return R; + + // a >u (a+b) --> llvm.uadd.with.overflow. + // b >u (a+b) --> llvm.uadd.with.overflow. + if (I.getPredicate() == ICmpInst::ICMP_UGT && + match(Op1, m_Add(m_Value(A), m_Value(B))) && + (Op0 == A || Op0 == B)) + if (Instruction *R = ProcessUAddIdiom(I, Op1, *this)) + return R; + } + + if (I.isEquality()) { + Value *A, *B, *C, *D; + + if (match(Op0, m_Xor(m_Value(A), m_Value(B)))) { + if (A == Op1 || B == Op1) { // (A^B) == A -> B == 0 + Value *OtherVal = A == Op1 ? B : A; + return new ICmpInst(I.getPredicate(), OtherVal, + Constant::getNullValue(A->getType())); + } + + if (match(Op1, m_Xor(m_Value(C), m_Value(D)))) { + // A^c1 == C^c2 --> A == C^(c1^c2) + ConstantInt *C1, *C2; + if (match(B, m_ConstantInt(C1)) && + match(D, m_ConstantInt(C2)) && Op1->hasOneUse()) { + Constant *NC = Builder->getInt(C1->getValue() ^ C2->getValue()); + Value *Xor = Builder->CreateXor(C, NC); + return new ICmpInst(I.getPredicate(), A, Xor); + } + + // A^B == A^D -> B == D + if (A == C) return new ICmpInst(I.getPredicate(), B, D); + if (A == D) return new ICmpInst(I.getPredicate(), B, C); + if (B == C) return new ICmpInst(I.getPredicate(), A, D); + if (B == D) return new ICmpInst(I.getPredicate(), A, C); + } + } + + if (match(Op1, m_Xor(m_Value(A), m_Value(B))) && + (A == Op0 || B == Op0)) { + // A == (A^B) -> B == 0 + Value *OtherVal = A == Op0 ? B : A; + return new ICmpInst(I.getPredicate(), OtherVal, + Constant::getNullValue(A->getType())); + } + + // (X&Z) == (Y&Z) -> (X^Y) & Z == 0 + if (match(Op0, m_OneUse(m_And(m_Value(A), m_Value(B)))) && + match(Op1, m_OneUse(m_And(m_Value(C), m_Value(D))))) { + Value *X = 0, *Y = 0, *Z = 0; + + if (A == C) { + X = B; Y = D; Z = A; + } else if (A == D) { + X = B; Y = C; Z = A; + } else if (B == C) { + X = A; Y = D; Z = B; + } else if (B == D) { + X = A; Y = C; Z = B; + } + + if (X) { // Build (X^Y) & Z + Op1 = Builder->CreateXor(X, Y); + Op1 = Builder->CreateAnd(Op1, Z); + I.setOperand(0, Op1); + I.setOperand(1, Constant::getNullValue(Op1->getType())); + return &I; + } + } + + // Transform (zext A) == (B & (1<<X)-1) --> A == (trunc B) + // and (B & (1<<X)-1) == (zext A) --> A == (trunc B) + ConstantInt *Cst1; + if ((Op0->hasOneUse() && + match(Op0, m_ZExt(m_Value(A))) && + match(Op1, m_And(m_Value(B), m_ConstantInt(Cst1)))) || + (Op1->hasOneUse() && + match(Op0, m_And(m_Value(B), m_ConstantInt(Cst1))) && + match(Op1, m_ZExt(m_Value(A))))) { + APInt Pow2 = Cst1->getValue() + 1; + if (Pow2.isPowerOf2() && isa<IntegerType>(A->getType()) && + Pow2.logBase2() == cast<IntegerType>(A->getType())->getBitWidth()) + return new ICmpInst(I.getPredicate(), A, + Builder->CreateTrunc(B, A->getType())); + } + + // (A >> C) == (B >> C) --> (A^B) u< (1 << C) + // For lshr and ashr pairs. + if ((match(Op0, m_OneUse(m_LShr(m_Value(A), m_ConstantInt(Cst1)))) && + match(Op1, m_OneUse(m_LShr(m_Value(B), m_Specific(Cst1))))) || + (match(Op0, m_OneUse(m_AShr(m_Value(A), m_ConstantInt(Cst1)))) && + match(Op1, m_OneUse(m_AShr(m_Value(B), m_Specific(Cst1)))))) { + unsigned TypeBits = Cst1->getBitWidth(); + unsigned ShAmt = (unsigned)Cst1->getLimitedValue(TypeBits); + if (ShAmt < TypeBits && ShAmt != 0) { + ICmpInst::Predicate Pred = I.getPredicate() == ICmpInst::ICMP_NE + ? ICmpInst::ICMP_UGE + : ICmpInst::ICMP_ULT; + Value *Xor = Builder->CreateXor(A, B, I.getName() + ".unshifted"); + APInt CmpVal = APInt::getOneBitSet(TypeBits, ShAmt); + return new ICmpInst(Pred, Xor, Builder->getInt(CmpVal)); + } + } + + // Transform "icmp eq (trunc (lshr(X, cst1)), cst" to + // "icmp (and X, mask), cst" + uint64_t ShAmt = 0; + if (Op0->hasOneUse() && + match(Op0, m_Trunc(m_OneUse(m_LShr(m_Value(A), + m_ConstantInt(ShAmt))))) && + match(Op1, m_ConstantInt(Cst1)) && + // Only do this when A has multiple uses. This is most important to do + // when it exposes other optimizations. + !A->hasOneUse()) { + unsigned ASize =cast<IntegerType>(A->getType())->getPrimitiveSizeInBits(); + + if (ShAmt < ASize) { + APInt MaskV = + APInt::getLowBitsSet(ASize, Op0->getType()->getPrimitiveSizeInBits()); + MaskV <<= ShAmt; + + APInt CmpV = Cst1->getValue().zext(ASize); + CmpV <<= ShAmt; + + Value *Mask = Builder->CreateAnd(A, Builder->getInt(MaskV)); + return new ICmpInst(I.getPredicate(), Mask, Builder->getInt(CmpV)); + } + } + } + + { + Value *X; ConstantInt *Cst; + // icmp X+Cst, X + if (match(Op0, m_Add(m_Value(X), m_ConstantInt(Cst))) && Op1 == X) + return FoldICmpAddOpCst(I, X, Cst, I.getPredicate()); + + // icmp X, X+Cst + if (match(Op1, m_Add(m_Value(X), m_ConstantInt(Cst))) && Op0 == X) + return FoldICmpAddOpCst(I, X, Cst, I.getSwappedPredicate()); + } + return Changed ? &I : 0; +} + +/// FoldFCmp_IntToFP_Cst - Fold fcmp ([us]itofp x, cst) if possible. +/// +Instruction *InstCombiner::FoldFCmp_IntToFP_Cst(FCmpInst &I, + Instruction *LHSI, + Constant *RHSC) { + if (!isa<ConstantFP>(RHSC)) return 0; + const APFloat &RHS = cast<ConstantFP>(RHSC)->getValueAPF(); + + // Get the width of the mantissa. We don't want to hack on conversions that + // might lose information from the integer, e.g. "i64 -> float" + int MantissaWidth = LHSI->getType()->getFPMantissaWidth(); + if (MantissaWidth == -1) return 0; // Unknown. + + // Check to see that the input is converted from an integer type that is small + // enough that preserves all bits. TODO: check here for "known" sign bits. + // This would allow us to handle (fptosi (x >>s 62) to float) if x is i64 f.e. + unsigned InputSize = LHSI->getOperand(0)->getType()->getScalarSizeInBits(); + + // If this is a uitofp instruction, we need an extra bit to hold the sign. + bool LHSUnsigned = isa<UIToFPInst>(LHSI); + if (LHSUnsigned) + ++InputSize; + + // If the conversion would lose info, don't hack on this. + if ((int)InputSize > MantissaWidth) + return 0; + + // Otherwise, we can potentially simplify the comparison. We know that it + // will always come through as an integer value and we know the constant is + // not a NAN (it would have been previously simplified). + assert(!RHS.isNaN() && "NaN comparison not already folded!"); + + ICmpInst::Predicate Pred; + switch (I.getPredicate()) { + default: llvm_unreachable("Unexpected predicate!"); + case FCmpInst::FCMP_UEQ: + case FCmpInst::FCMP_OEQ: + Pred = ICmpInst::ICMP_EQ; + break; + case FCmpInst::FCMP_UGT: + case FCmpInst::FCMP_OGT: + Pred = LHSUnsigned ? ICmpInst::ICMP_UGT : ICmpInst::ICMP_SGT; + break; + case FCmpInst::FCMP_UGE: + case FCmpInst::FCMP_OGE: + Pred = LHSUnsigned ? ICmpInst::ICMP_UGE : ICmpInst::ICMP_SGE; + break; + case FCmpInst::FCMP_ULT: + case FCmpInst::FCMP_OLT: + Pred = LHSUnsigned ? ICmpInst::ICMP_ULT : ICmpInst::ICMP_SLT; + break; + case FCmpInst::FCMP_ULE: + case FCmpInst::FCMP_OLE: + Pred = LHSUnsigned ? ICmpInst::ICMP_ULE : ICmpInst::ICMP_SLE; + break; + case FCmpInst::FCMP_UNE: + case FCmpInst::FCMP_ONE: + Pred = ICmpInst::ICMP_NE; + break; + case FCmpInst::FCMP_ORD: + return ReplaceInstUsesWith(I, Builder->getTrue()); + case FCmpInst::FCMP_UNO: + return ReplaceInstUsesWith(I, Builder->getFalse()); + } + + IntegerType *IntTy = cast<IntegerType>(LHSI->getOperand(0)->getType()); + + // Now we know that the APFloat is a normal number, zero or inf. + + // See if the FP constant is too large for the integer. For example, + // comparing an i8 to 300.0. + unsigned IntWidth = IntTy->getScalarSizeInBits(); + + if (!LHSUnsigned) { + // If the RHS value is > SignedMax, fold the comparison. This handles +INF + // and large values. + APFloat SMax(RHS.getSemantics()); + SMax.convertFromAPInt(APInt::getSignedMaxValue(IntWidth), true, + APFloat::rmNearestTiesToEven); + if (SMax.compare(RHS) == APFloat::cmpLessThan) { // smax < 13123.0 + if (Pred == ICmpInst::ICMP_NE || Pred == ICmpInst::ICMP_SLT || + Pred == ICmpInst::ICMP_SLE) + return ReplaceInstUsesWith(I, Builder->getTrue()); + return ReplaceInstUsesWith(I, Builder->getFalse()); + } + } else { + // If the RHS value is > UnsignedMax, fold the comparison. This handles + // +INF and large values. + APFloat UMax(RHS.getSemantics()); + UMax.convertFromAPInt(APInt::getMaxValue(IntWidth), false, + APFloat::rmNearestTiesToEven); + if (UMax.compare(RHS) == APFloat::cmpLessThan) { // umax < 13123.0 + if (Pred == ICmpInst::ICMP_NE || Pred == ICmpInst::ICMP_ULT || + Pred == ICmpInst::ICMP_ULE) + return ReplaceInstUsesWith(I, Builder->getTrue()); + return ReplaceInstUsesWith(I, Builder->getFalse()); + } + } + + if (!LHSUnsigned) { + // See if the RHS value is < SignedMin. + APFloat SMin(RHS.getSemantics()); + SMin.convertFromAPInt(APInt::getSignedMinValue(IntWidth), true, + APFloat::rmNearestTiesToEven); + if (SMin.compare(RHS) == APFloat::cmpGreaterThan) { // smin > 12312.0 + if (Pred == ICmpInst::ICMP_NE || Pred == ICmpInst::ICMP_SGT || + Pred == ICmpInst::ICMP_SGE) + return ReplaceInstUsesWith(I, Builder->getTrue()); + return ReplaceInstUsesWith(I, Builder->getFalse()); + } + } else { + // See if the RHS value is < UnsignedMin. + APFloat SMin(RHS.getSemantics()); + SMin.convertFromAPInt(APInt::getMinValue(IntWidth), true, + APFloat::rmNearestTiesToEven); + if (SMin.compare(RHS) == APFloat::cmpGreaterThan) { // umin > 12312.0 + if (Pred == ICmpInst::ICMP_NE || Pred == ICmpInst::ICMP_UGT || + Pred == ICmpInst::ICMP_UGE) + return ReplaceInstUsesWith(I, Builder->getTrue()); + return ReplaceInstUsesWith(I, Builder->getFalse()); + } + } + + // Okay, now we know that the FP constant fits in the range [SMIN, SMAX] or + // [0, UMAX], but it may still be fractional. See if it is fractional by + // casting the FP value to the integer value and back, checking for equality. + // Don't do this for zero, because -0.0 is not fractional. + Constant *RHSInt = LHSUnsigned + ? ConstantExpr::getFPToUI(RHSC, IntTy) + : ConstantExpr::getFPToSI(RHSC, IntTy); + if (!RHS.isZero()) { + bool Equal = LHSUnsigned + ? ConstantExpr::getUIToFP(RHSInt, RHSC->getType()) == RHSC + : ConstantExpr::getSIToFP(RHSInt, RHSC->getType()) == RHSC; + if (!Equal) { + // If we had a comparison against a fractional value, we have to adjust + // the compare predicate and sometimes the value. RHSC is rounded towards + // zero at this point. + switch (Pred) { + default: llvm_unreachable("Unexpected integer comparison!"); + case ICmpInst::ICMP_NE: // (float)int != 4.4 --> true + return ReplaceInstUsesWith(I, Builder->getTrue()); + case ICmpInst::ICMP_EQ: // (float)int == 4.4 --> false + return ReplaceInstUsesWith(I, Builder->getFalse()); + case ICmpInst::ICMP_ULE: + // (float)int <= 4.4 --> int <= 4 + // (float)int <= -4.4 --> false + if (RHS.isNegative()) + return ReplaceInstUsesWith(I, Builder->getFalse()); + break; + case ICmpInst::ICMP_SLE: + // (float)int <= 4.4 --> int <= 4 + // (float)int <= -4.4 --> int < -4 + if (RHS.isNegative()) + Pred = ICmpInst::ICMP_SLT; + break; + case ICmpInst::ICMP_ULT: + // (float)int < -4.4 --> false + // (float)int < 4.4 --> int <= 4 + if (RHS.isNegative()) + return ReplaceInstUsesWith(I, Builder->getFalse()); + Pred = ICmpInst::ICMP_ULE; + break; + case ICmpInst::ICMP_SLT: + // (float)int < -4.4 --> int < -4 + // (float)int < 4.4 --> int <= 4 + if (!RHS.isNegative()) + Pred = ICmpInst::ICMP_SLE; + break; + case ICmpInst::ICMP_UGT: + // (float)int > 4.4 --> int > 4 + // (float)int > -4.4 --> true + if (RHS.isNegative()) + return ReplaceInstUsesWith(I, Builder->getTrue()); + break; + case ICmpInst::ICMP_SGT: + // (float)int > 4.4 --> int > 4 + // (float)int > -4.4 --> int >= -4 + if (RHS.isNegative()) + Pred = ICmpInst::ICMP_SGE; + break; + case ICmpInst::ICMP_UGE: + // (float)int >= -4.4 --> true + // (float)int >= 4.4 --> int > 4 + if (RHS.isNegative()) + return ReplaceInstUsesWith(I, Builder->getTrue()); + Pred = ICmpInst::ICMP_UGT; + break; + case ICmpInst::ICMP_SGE: + // (float)int >= -4.4 --> int >= -4 + // (float)int >= 4.4 --> int > 4 + if (!RHS.isNegative()) + Pred = ICmpInst::ICMP_SGT; + break; + } + } + } + + // Lower this FP comparison into an appropriate integer version of the + // comparison. + return new ICmpInst(Pred, LHSI->getOperand(0), RHSInt); +} + +Instruction *InstCombiner::visitFCmpInst(FCmpInst &I) { + bool Changed = false; + + /// Orders the operands of the compare so that they are listed from most + /// complex to least complex. This puts constants before unary operators, + /// before binary operators. + if (getComplexity(I.getOperand(0)) < getComplexity(I.getOperand(1))) { + I.swapOperands(); + Changed = true; + } + + Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); + + if (Value *V = SimplifyFCmpInst(I.getPredicate(), Op0, Op1, TD)) + return ReplaceInstUsesWith(I, V); + + // Simplify 'fcmp pred X, X' + if (Op0 == Op1) { + switch (I.getPredicate()) { + default: llvm_unreachable("Unknown predicate!"); + case FCmpInst::FCMP_UNO: // True if unordered: isnan(X) | isnan(Y) + case FCmpInst::FCMP_ULT: // True if unordered or less than + case FCmpInst::FCMP_UGT: // True if unordered or greater than + case FCmpInst::FCMP_UNE: // True if unordered or not equal + // Canonicalize these to be 'fcmp uno %X, 0.0'. + I.setPredicate(FCmpInst::FCMP_UNO); + I.setOperand(1, Constant::getNullValue(Op0->getType())); + return &I; + + case FCmpInst::FCMP_ORD: // True if ordered (no nans) + case FCmpInst::FCMP_OEQ: // True if ordered and equal + case FCmpInst::FCMP_OGE: // True if ordered and greater than or equal + case FCmpInst::FCMP_OLE: // True if ordered and less than or equal + // Canonicalize these to be 'fcmp ord %X, 0.0'. + I.setPredicate(FCmpInst::FCMP_ORD); + I.setOperand(1, Constant::getNullValue(Op0->getType())); + return &I; + } + } + + // Handle fcmp with constant RHS + if (Constant *RHSC = dyn_cast<Constant>(Op1)) { + if (Instruction *LHSI = dyn_cast<Instruction>(Op0)) + switch (LHSI->getOpcode()) { + case Instruction::FPExt: { + // fcmp (fpext x), C -> fcmp x, (fptrunc C) if fptrunc is lossless + FPExtInst *LHSExt = cast<FPExtInst>(LHSI); + ConstantFP *RHSF = dyn_cast<ConstantFP>(RHSC); + if (!RHSF) + break; + + const fltSemantics *Sem; + // FIXME: This shouldn't be here. + if (LHSExt->getSrcTy()->isHalfTy()) + Sem = &APFloat::IEEEhalf; + else if (LHSExt->getSrcTy()->isFloatTy()) + Sem = &APFloat::IEEEsingle; + else if (LHSExt->getSrcTy()->isDoubleTy()) + Sem = &APFloat::IEEEdouble; + else if (LHSExt->getSrcTy()->isFP128Ty()) + Sem = &APFloat::IEEEquad; + else if (LHSExt->getSrcTy()->isX86_FP80Ty()) + Sem = &APFloat::x87DoubleExtended; + else if (LHSExt->getSrcTy()->isPPC_FP128Ty()) + Sem = &APFloat::PPCDoubleDouble; + else + break; + + bool Lossy; + APFloat F = RHSF->getValueAPF(); + F.convert(*Sem, APFloat::rmNearestTiesToEven, &Lossy); + + // Avoid lossy conversions and denormals. Zero is a special case + // that's OK to convert. + APFloat Fabs = F; + Fabs.clearSign(); + if (!Lossy && + ((Fabs.compare(APFloat::getSmallestNormalized(*Sem)) != + APFloat::cmpLessThan) || Fabs.isZero())) + + return new FCmpInst(I.getPredicate(), LHSExt->getOperand(0), + ConstantFP::get(RHSC->getContext(), F)); + break; + } + case Instruction::PHI: + // Only fold fcmp into the PHI if the phi and fcmp are in the same + // block. If in the same block, we're encouraging jump threading. If + // not, we are just pessimizing the code by making an i1 phi. + if (LHSI->getParent() == I.getParent()) + if (Instruction *NV = FoldOpIntoPhi(I)) + return NV; + break; + case Instruction::SIToFP: + case Instruction::UIToFP: + if (Instruction *NV = FoldFCmp_IntToFP_Cst(I, LHSI, RHSC)) + return NV; + break; + case Instruction::Select: { + // If either operand of the select is a constant, we can fold the + // comparison into the select arms, which will cause one to be + // constant folded and the select turned into a bitwise or. + Value *Op1 = 0, *Op2 = 0; + if (LHSI->hasOneUse()) { + if (Constant *C = dyn_cast<Constant>(LHSI->getOperand(1))) { + // Fold the known value into the constant operand. + Op1 = ConstantExpr::getCompare(I.getPredicate(), C, RHSC); + // Insert a new FCmp of the other select operand. + Op2 = Builder->CreateFCmp(I.getPredicate(), + LHSI->getOperand(2), RHSC, I.getName()); + } else if (Constant *C = dyn_cast<Constant>(LHSI->getOperand(2))) { + // Fold the known value into the constant operand. + Op2 = ConstantExpr::getCompare(I.getPredicate(), C, RHSC); + // Insert a new FCmp of the other select operand. + Op1 = Builder->CreateFCmp(I.getPredicate(), LHSI->getOperand(1), + RHSC, I.getName()); + } + } + + if (Op1) + return SelectInst::Create(LHSI->getOperand(0), Op1, Op2); + break; + } + case Instruction::FSub: { + // fcmp pred (fneg x), C -> fcmp swap(pred) x, -C + Value *Op; + if (match(LHSI, m_FNeg(m_Value(Op)))) + return new FCmpInst(I.getSwappedPredicate(), Op, + ConstantExpr::getFNeg(RHSC)); + break; + } + case Instruction::Load: + if (GetElementPtrInst *GEP = + dyn_cast<GetElementPtrInst>(LHSI->getOperand(0))) { + if (GlobalVariable *GV = dyn_cast<GlobalVariable>(GEP->getOperand(0))) + if (GV->isConstant() && GV->hasDefinitiveInitializer() && + !cast<LoadInst>(LHSI)->isVolatile()) + if (Instruction *Res = FoldCmpLoadFromIndexedGlobal(GEP, GV, I)) + return Res; + } + break; + case Instruction::Call: { + CallInst *CI = cast<CallInst>(LHSI); + LibFunc::Func Func; + // Various optimization for fabs compared with zero. + if (RHSC->isNullValue() && CI->getCalledFunction() && + TLI->getLibFunc(CI->getCalledFunction()->getName(), Func) && + TLI->has(Func)) { + if (Func == LibFunc::fabs || Func == LibFunc::fabsf || + Func == LibFunc::fabsl) { + switch (I.getPredicate()) { + default: break; + // fabs(x) < 0 --> false + case FCmpInst::FCMP_OLT: + return ReplaceInstUsesWith(I, Builder->getFalse()); + // fabs(x) > 0 --> x != 0 + case FCmpInst::FCMP_OGT: + return new FCmpInst(FCmpInst::FCMP_ONE, CI->getArgOperand(0), + RHSC); + // fabs(x) <= 0 --> x == 0 + case FCmpInst::FCMP_OLE: + return new FCmpInst(FCmpInst::FCMP_OEQ, CI->getArgOperand(0), + RHSC); + // fabs(x) >= 0 --> !isnan(x) + case FCmpInst::FCMP_OGE: + return new FCmpInst(FCmpInst::FCMP_ORD, CI->getArgOperand(0), + RHSC); + // fabs(x) == 0 --> x == 0 + // fabs(x) != 0 --> x != 0 + case FCmpInst::FCMP_OEQ: + case FCmpInst::FCMP_UEQ: + case FCmpInst::FCMP_ONE: + case FCmpInst::FCMP_UNE: + return new FCmpInst(I.getPredicate(), CI->getArgOperand(0), + RHSC); + } + } + } + } + } + } + + // fcmp pred (fneg x), (fneg y) -> fcmp swap(pred) x, y + Value *X, *Y; + if (match(Op0, m_FNeg(m_Value(X))) && match(Op1, m_FNeg(m_Value(Y)))) + return new FCmpInst(I.getSwappedPredicate(), X, Y); + + // fcmp (fpext x), (fpext y) -> fcmp x, y + if (FPExtInst *LHSExt = dyn_cast<FPExtInst>(Op0)) + if (FPExtInst *RHSExt = dyn_cast<FPExtInst>(Op1)) + if (LHSExt->getSrcTy() == RHSExt->getSrcTy()) + return new FCmpInst(I.getPredicate(), LHSExt->getOperand(0), + RHSExt->getOperand(0)); + + return Changed ? &I : 0; +} diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp new file mode 100644 index 000000000000..4c861b3fd095 --- /dev/null +++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp @@ -0,0 +1,820 @@ +//===- InstCombineLoadStoreAlloca.cpp -------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the visit functions for load, store and alloca. +// +//===----------------------------------------------------------------------===// + +#include "InstCombine.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/Loads.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include "llvm/Transforms/Utils/Local.h" +using namespace llvm; + +STATISTIC(NumDeadStore, "Number of dead stores eliminated"); +STATISTIC(NumGlobalCopies, "Number of allocas copied from constant global"); + +/// pointsToConstantGlobal - Return true if V (possibly indirectly) points to +/// some part of a constant global variable. This intentionally only accepts +/// constant expressions because we can't rewrite arbitrary instructions. +static bool pointsToConstantGlobal(Value *V) { + if (GlobalVariable *GV = dyn_cast<GlobalVariable>(V)) + return GV->isConstant(); + if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) + if (CE->getOpcode() == Instruction::BitCast || + CE->getOpcode() == Instruction::GetElementPtr) + return pointsToConstantGlobal(CE->getOperand(0)); + return false; +} + +/// isOnlyCopiedFromConstantGlobal - Recursively walk the uses of a (derived) +/// pointer to an alloca. Ignore any reads of the pointer, return false if we +/// see any stores or other unknown uses. If we see pointer arithmetic, keep +/// track of whether it moves the pointer (with IsOffset) but otherwise traverse +/// the uses. If we see a memcpy/memmove that targets an unoffseted pointer to +/// the alloca, and if the source pointer is a pointer to a constant global, we +/// can optimize this. +static bool +isOnlyCopiedFromConstantGlobal(Value *V, MemTransferInst *&TheCopy, + SmallVectorImpl<Instruction *> &ToDelete, + bool IsOffset = false) { + // We track lifetime intrinsics as we encounter them. If we decide to go + // ahead and replace the value with the global, this lets the caller quickly + // eliminate the markers. + + for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI!=E; ++UI) { + User *U = cast<Instruction>(*UI); + + if (LoadInst *LI = dyn_cast<LoadInst>(U)) { + // Ignore non-volatile loads, they are always ok. + if (!LI->isSimple()) return false; + continue; + } + + if (BitCastInst *BCI = dyn_cast<BitCastInst>(U)) { + // If uses of the bitcast are ok, we are ok. + if (!isOnlyCopiedFromConstantGlobal(BCI, TheCopy, ToDelete, IsOffset)) + return false; + continue; + } + if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(U)) { + // If the GEP has all zero indices, it doesn't offset the pointer. If it + // doesn't, it does. + if (!isOnlyCopiedFromConstantGlobal( + GEP, TheCopy, ToDelete, IsOffset || !GEP->hasAllZeroIndices())) + return false; + continue; + } + + if (CallSite CS = U) { + // If this is the function being called then we treat it like a load and + // ignore it. + if (CS.isCallee(UI)) + continue; + + // If this is a readonly/readnone call site, then we know it is just a + // load (but one that potentially returns the value itself), so we can + // ignore it if we know that the value isn't captured. + unsigned ArgNo = CS.getArgumentNo(UI); + if (CS.onlyReadsMemory() && + (CS.getInstruction()->use_empty() || CS.doesNotCapture(ArgNo))) + continue; + + // If this is being passed as a byval argument, the caller is making a + // copy, so it is only a read of the alloca. + if (CS.isByValArgument(ArgNo)) + continue; + } + + // Lifetime intrinsics can be handled by the caller. + if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(U)) { + if (II->getIntrinsicID() == Intrinsic::lifetime_start || + II->getIntrinsicID() == Intrinsic::lifetime_end) { + assert(II->use_empty() && "Lifetime markers have no result to use!"); + ToDelete.push_back(II); + continue; + } + } + + // If this is isn't our memcpy/memmove, reject it as something we can't + // handle. + MemTransferInst *MI = dyn_cast<MemTransferInst>(U); + if (MI == 0) + return false; + + // If the transfer is using the alloca as a source of the transfer, then + // ignore it since it is a load (unless the transfer is volatile). + if (UI.getOperandNo() == 1) { + if (MI->isVolatile()) return false; + continue; + } + + // If we already have seen a copy, reject the second one. + if (TheCopy) return false; + + // If the pointer has been offset from the start of the alloca, we can't + // safely handle this. + if (IsOffset) return false; + + // If the memintrinsic isn't using the alloca as the dest, reject it. + if (UI.getOperandNo() != 0) return false; + + // If the source of the memcpy/move is not a constant global, reject it. + if (!pointsToConstantGlobal(MI->getSource())) + return false; + + // Otherwise, the transform is safe. Remember the copy instruction. + TheCopy = MI; + } + return true; +} + +/// isOnlyCopiedFromConstantGlobal - Return true if the specified alloca is only +/// modified by a copy from a constant global. If we can prove this, we can +/// replace any uses of the alloca with uses of the global directly. +static MemTransferInst * +isOnlyCopiedFromConstantGlobal(AllocaInst *AI, + SmallVectorImpl<Instruction *> &ToDelete) { + MemTransferInst *TheCopy = 0; + if (isOnlyCopiedFromConstantGlobal(AI, TheCopy, ToDelete)) + return TheCopy; + return 0; +} + +Instruction *InstCombiner::visitAllocaInst(AllocaInst &AI) { + // Ensure that the alloca array size argument has type intptr_t, so that + // any casting is exposed early. + if (TD) { + Type *IntPtrTy = TD->getIntPtrType(AI.getType()); + if (AI.getArraySize()->getType() != IntPtrTy) { + Value *V = Builder->CreateIntCast(AI.getArraySize(), + IntPtrTy, false); + AI.setOperand(0, V); + return &AI; + } + } + + // Convert: alloca Ty, C - where C is a constant != 1 into: alloca [C x Ty], 1 + if (AI.isArrayAllocation()) { // Check C != 1 + if (const ConstantInt *C = dyn_cast<ConstantInt>(AI.getArraySize())) { + Type *NewTy = + ArrayType::get(AI.getAllocatedType(), C->getZExtValue()); + AllocaInst *New = Builder->CreateAlloca(NewTy, 0, AI.getName()); + New->setAlignment(AI.getAlignment()); + + // Scan to the end of the allocation instructions, to skip over a block of + // allocas if possible...also skip interleaved debug info + // + BasicBlock::iterator It = New; + while (isa<AllocaInst>(*It) || isa<DbgInfoIntrinsic>(*It)) ++It; + + // Now that I is pointing to the first non-allocation-inst in the block, + // insert our getelementptr instruction... + // + Type *IdxTy = TD + ? TD->getIntPtrType(AI.getType()) + : Type::getInt64Ty(AI.getContext()); + Value *NullIdx = Constant::getNullValue(IdxTy); + Value *Idx[2] = { NullIdx, NullIdx }; + Instruction *GEP = + GetElementPtrInst::CreateInBounds(New, Idx, New->getName() + ".sub"); + InsertNewInstBefore(GEP, *It); + + // Now make everything use the getelementptr instead of the original + // allocation. + return ReplaceInstUsesWith(AI, GEP); + } else if (isa<UndefValue>(AI.getArraySize())) { + return ReplaceInstUsesWith(AI, Constant::getNullValue(AI.getType())); + } + } + + if (TD && AI.getAllocatedType()->isSized()) { + // If the alignment is 0 (unspecified), assign it the preferred alignment. + if (AI.getAlignment() == 0) + AI.setAlignment(TD->getPrefTypeAlignment(AI.getAllocatedType())); + + // Move all alloca's of zero byte objects to the entry block and merge them + // together. Note that we only do this for alloca's, because malloc should + // allocate and return a unique pointer, even for a zero byte allocation. + if (TD->getTypeAllocSize(AI.getAllocatedType()) == 0) { + // For a zero sized alloca there is no point in doing an array allocation. + // This is helpful if the array size is a complicated expression not used + // elsewhere. + if (AI.isArrayAllocation()) { + AI.setOperand(0, ConstantInt::get(AI.getArraySize()->getType(), 1)); + return &AI; + } + + // Get the first instruction in the entry block. + BasicBlock &EntryBlock = AI.getParent()->getParent()->getEntryBlock(); + Instruction *FirstInst = EntryBlock.getFirstNonPHIOrDbg(); + if (FirstInst != &AI) { + // If the entry block doesn't start with a zero-size alloca then move + // this one to the start of the entry block. There is no problem with + // dominance as the array size was forced to a constant earlier already. + AllocaInst *EntryAI = dyn_cast<AllocaInst>(FirstInst); + if (!EntryAI || !EntryAI->getAllocatedType()->isSized() || + TD->getTypeAllocSize(EntryAI->getAllocatedType()) != 0) { + AI.moveBefore(FirstInst); + return &AI; + } + + // If the alignment of the entry block alloca is 0 (unspecified), + // assign it the preferred alignment. + if (EntryAI->getAlignment() == 0) + EntryAI->setAlignment( + TD->getPrefTypeAlignment(EntryAI->getAllocatedType())); + // Replace this zero-sized alloca with the one at the start of the entry + // block after ensuring that the address will be aligned enough for both + // types. + unsigned MaxAlign = std::max(EntryAI->getAlignment(), + AI.getAlignment()); + EntryAI->setAlignment(MaxAlign); + if (AI.getType() != EntryAI->getType()) + return new BitCastInst(EntryAI, AI.getType()); + return ReplaceInstUsesWith(AI, EntryAI); + } + } + } + + if (AI.getAlignment()) { + // Check to see if this allocation is only modified by a memcpy/memmove from + // a constant global whose alignment is equal to or exceeds that of the + // allocation. If this is the case, we can change all users to use + // the constant global instead. This is commonly produced by the CFE by + // constructs like "void foo() { int A[] = {1,2,3,4,5,6,7,8,9...}; }" if 'A' + // is only subsequently read. + SmallVector<Instruction *, 4> ToDelete; + if (MemTransferInst *Copy = isOnlyCopiedFromConstantGlobal(&AI, ToDelete)) { + unsigned SourceAlign = getOrEnforceKnownAlignment(Copy->getSource(), + AI.getAlignment(), TD); + if (AI.getAlignment() <= SourceAlign) { + DEBUG(dbgs() << "Found alloca equal to global: " << AI << '\n'); + DEBUG(dbgs() << " memcpy = " << *Copy << '\n'); + for (unsigned i = 0, e = ToDelete.size(); i != e; ++i) + EraseInstFromFunction(*ToDelete[i]); + Constant *TheSrc = cast<Constant>(Copy->getSource()); + Constant *Cast + = ConstantExpr::getPointerBitCastOrAddrSpaceCast(TheSrc, AI.getType()); + Instruction *NewI = ReplaceInstUsesWith(AI, Cast); + EraseInstFromFunction(*Copy); + ++NumGlobalCopies; + return NewI; + } + } + } + + // At last, use the generic allocation site handler to aggressively remove + // unused allocas. + return visitAllocSite(AI); +} + + +/// InstCombineLoadCast - Fold 'load (cast P)' -> cast (load P)' when possible. +static Instruction *InstCombineLoadCast(InstCombiner &IC, LoadInst &LI, + const DataLayout *TD) { + User *CI = cast<User>(LI.getOperand(0)); + Value *CastOp = CI->getOperand(0); + + PointerType *DestTy = cast<PointerType>(CI->getType()); + Type *DestPTy = DestTy->getElementType(); + if (PointerType *SrcTy = dyn_cast<PointerType>(CastOp->getType())) { + + // If the address spaces don't match, don't eliminate the cast. + if (DestTy->getAddressSpace() != SrcTy->getAddressSpace()) + return 0; + + Type *SrcPTy = SrcTy->getElementType(); + + if (DestPTy->isIntegerTy() || DestPTy->isPointerTy() || + DestPTy->isVectorTy()) { + // If the source is an array, the code below will not succeed. Check to + // see if a trivial 'gep P, 0, 0' will help matters. Only do this for + // constants. + if (ArrayType *ASrcTy = dyn_cast<ArrayType>(SrcPTy)) + if (Constant *CSrc = dyn_cast<Constant>(CastOp)) + if (ASrcTy->getNumElements() != 0) { + Type *IdxTy = TD + ? TD->getIntPtrType(SrcTy) + : Type::getInt64Ty(SrcTy->getContext()); + Value *Idx = Constant::getNullValue(IdxTy); + Value *Idxs[2] = { Idx, Idx }; + CastOp = ConstantExpr::getGetElementPtr(CSrc, Idxs); + SrcTy = cast<PointerType>(CastOp->getType()); + SrcPTy = SrcTy->getElementType(); + } + + if (IC.getDataLayout() && + (SrcPTy->isIntegerTy() || SrcPTy->isPointerTy() || + SrcPTy->isVectorTy()) && + // Do not allow turning this into a load of an integer, which is then + // casted to a pointer, this pessimizes pointer analysis a lot. + (SrcPTy->isPtrOrPtrVectorTy() == + LI.getType()->isPtrOrPtrVectorTy()) && + IC.getDataLayout()->getTypeSizeInBits(SrcPTy) == + IC.getDataLayout()->getTypeSizeInBits(DestPTy)) { + + // Okay, we are casting from one integer or pointer type to another of + // the same size. Instead of casting the pointer before the load, cast + // the result of the loaded value. + LoadInst *NewLoad = + IC.Builder->CreateLoad(CastOp, LI.isVolatile(), CI->getName()); + NewLoad->setAlignment(LI.getAlignment()); + NewLoad->setAtomic(LI.getOrdering(), LI.getSynchScope()); + // Now cast the result of the load. + return new BitCastInst(NewLoad, LI.getType()); + } + } + } + return 0; +} + +Instruction *InstCombiner::visitLoadInst(LoadInst &LI) { + Value *Op = LI.getOperand(0); + + // Attempt to improve the alignment. + if (TD) { + unsigned KnownAlign = + getOrEnforceKnownAlignment(Op, TD->getPrefTypeAlignment(LI.getType()),TD); + unsigned LoadAlign = LI.getAlignment(); + unsigned EffectiveLoadAlign = LoadAlign != 0 ? LoadAlign : + TD->getABITypeAlignment(LI.getType()); + + if (KnownAlign > EffectiveLoadAlign) + LI.setAlignment(KnownAlign); + else if (LoadAlign == 0) + LI.setAlignment(EffectiveLoadAlign); + } + + // load (cast X) --> cast (load X) iff safe. + if (isa<CastInst>(Op)) + if (Instruction *Res = InstCombineLoadCast(*this, LI, TD)) + return Res; + + // None of the following transforms are legal for volatile/atomic loads. + // FIXME: Some of it is okay for atomic loads; needs refactoring. + if (!LI.isSimple()) return 0; + + // Do really simple store-to-load forwarding and load CSE, to catch cases + // where there are several consecutive memory accesses to the same location, + // separated by a few arithmetic operations. + BasicBlock::iterator BBI = &LI; + if (Value *AvailableVal = FindAvailableLoadedValue(Op, LI.getParent(), BBI,6)) + return ReplaceInstUsesWith(LI, AvailableVal); + + // load(gep null, ...) -> unreachable + if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(Op)) { + const Value *GEPI0 = GEPI->getOperand(0); + // TODO: Consider a target hook for valid address spaces for this xform. + if (isa<ConstantPointerNull>(GEPI0) && GEPI->getPointerAddressSpace() == 0){ + // Insert a new store to null instruction before the load to indicate + // that this code is not reachable. We do this instead of inserting + // an unreachable instruction directly because we cannot modify the + // CFG. + new StoreInst(UndefValue::get(LI.getType()), + Constant::getNullValue(Op->getType()), &LI); + return ReplaceInstUsesWith(LI, UndefValue::get(LI.getType())); + } + } + + // load null/undef -> unreachable + // TODO: Consider a target hook for valid address spaces for this xform. + if (isa<UndefValue>(Op) || + (isa<ConstantPointerNull>(Op) && LI.getPointerAddressSpace() == 0)) { + // Insert a new store to null instruction before the load to indicate that + // this code is not reachable. We do this instead of inserting an + // unreachable instruction directly because we cannot modify the CFG. + new StoreInst(UndefValue::get(LI.getType()), + Constant::getNullValue(Op->getType()), &LI); + return ReplaceInstUsesWith(LI, UndefValue::get(LI.getType())); + } + + // Instcombine load (constantexpr_cast global) -> cast (load global) + if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Op)) + if (CE->isCast()) + if (Instruction *Res = InstCombineLoadCast(*this, LI, TD)) + return Res; + + if (Op->hasOneUse()) { + // Change select and PHI nodes to select values instead of addresses: this + // helps alias analysis out a lot, allows many others simplifications, and + // exposes redundancy in the code. + // + // Note that we cannot do the transformation unless we know that the + // introduced loads cannot trap! Something like this is valid as long as + // the condition is always false: load (select bool %C, int* null, int* %G), + // but it would not be valid if we transformed it to load from null + // unconditionally. + // + if (SelectInst *SI = dyn_cast<SelectInst>(Op)) { + // load (select (Cond, &V1, &V2)) --> select(Cond, load &V1, load &V2). + unsigned Align = LI.getAlignment(); + if (isSafeToLoadUnconditionally(SI->getOperand(1), SI, Align, TD) && + isSafeToLoadUnconditionally(SI->getOperand(2), SI, Align, TD)) { + LoadInst *V1 = Builder->CreateLoad(SI->getOperand(1), + SI->getOperand(1)->getName()+".val"); + LoadInst *V2 = Builder->CreateLoad(SI->getOperand(2), + SI->getOperand(2)->getName()+".val"); + V1->setAlignment(Align); + V2->setAlignment(Align); + return SelectInst::Create(SI->getCondition(), V1, V2); + } + + // load (select (cond, null, P)) -> load P + if (Constant *C = dyn_cast<Constant>(SI->getOperand(1))) + if (C->isNullValue()) { + LI.setOperand(0, SI->getOperand(2)); + return &LI; + } + + // load (select (cond, P, null)) -> load P + if (Constant *C = dyn_cast<Constant>(SI->getOperand(2))) + if (C->isNullValue()) { + LI.setOperand(0, SI->getOperand(1)); + return &LI; + } + } + } + return 0; +} + +/// InstCombineStoreToCast - Fold store V, (cast P) -> store (cast V), P +/// when possible. This makes it generally easy to do alias analysis and/or +/// SROA/mem2reg of the memory object. +static Instruction *InstCombineStoreToCast(InstCombiner &IC, StoreInst &SI) { + User *CI = cast<User>(SI.getOperand(1)); + Value *CastOp = CI->getOperand(0); + + Type *DestPTy = cast<PointerType>(CI->getType())->getElementType(); + PointerType *SrcTy = dyn_cast<PointerType>(CastOp->getType()); + if (SrcTy == 0) return 0; + + Type *SrcPTy = SrcTy->getElementType(); + + if (!DestPTy->isIntegerTy() && !DestPTy->isPointerTy()) + return 0; + + /// NewGEPIndices - If SrcPTy is an aggregate type, we can emit a "noop gep" + /// to its first element. This allows us to handle things like: + /// store i32 xxx, (bitcast {foo*, float}* %P to i32*) + /// on 32-bit hosts. + SmallVector<Value*, 4> NewGEPIndices; + + // If the source is an array, the code below will not succeed. Check to + // see if a trivial 'gep P, 0, 0' will help matters. Only do this for + // constants. + if (SrcPTy->isArrayTy() || SrcPTy->isStructTy()) { + // Index through pointer. + Constant *Zero = Constant::getNullValue(Type::getInt32Ty(SI.getContext())); + NewGEPIndices.push_back(Zero); + + while (1) { + if (StructType *STy = dyn_cast<StructType>(SrcPTy)) { + if (!STy->getNumElements()) /* Struct can be empty {} */ + break; + NewGEPIndices.push_back(Zero); + SrcPTy = STy->getElementType(0); + } else if (ArrayType *ATy = dyn_cast<ArrayType>(SrcPTy)) { + NewGEPIndices.push_back(Zero); + SrcPTy = ATy->getElementType(); + } else { + break; + } + } + + SrcTy = PointerType::get(SrcPTy, SrcTy->getAddressSpace()); + } + + if (!SrcPTy->isIntegerTy() && !SrcPTy->isPointerTy()) + return 0; + + // If the pointers point into different address spaces or if they point to + // values with different sizes, we can't do the transformation. + if (!IC.getDataLayout() || + SrcTy->getAddressSpace() != + cast<PointerType>(CI->getType())->getAddressSpace() || + IC.getDataLayout()->getTypeSizeInBits(SrcPTy) != + IC.getDataLayout()->getTypeSizeInBits(DestPTy)) + return 0; + + // Okay, we are casting from one integer or pointer type to another of + // the same size. Instead of casting the pointer before + // the store, cast the value to be stored. + Value *NewCast; + Value *SIOp0 = SI.getOperand(0); + Instruction::CastOps opcode = Instruction::BitCast; + Type* CastSrcTy = SIOp0->getType(); + Type* CastDstTy = SrcPTy; + if (CastDstTy->isPointerTy()) { + if (CastSrcTy->isIntegerTy()) + opcode = Instruction::IntToPtr; + } else if (CastDstTy->isIntegerTy()) { + if (SIOp0->getType()->isPointerTy()) + opcode = Instruction::PtrToInt; + } + + // SIOp0 is a pointer to aggregate and this is a store to the first field, + // emit a GEP to index into its first field. + if (!NewGEPIndices.empty()) + CastOp = IC.Builder->CreateInBoundsGEP(CastOp, NewGEPIndices); + + NewCast = IC.Builder->CreateCast(opcode, SIOp0, CastDstTy, + SIOp0->getName()+".c"); + SI.setOperand(0, NewCast); + SI.setOperand(1, CastOp); + return &SI; +} + +/// equivalentAddressValues - Test if A and B will obviously have the same +/// value. This includes recognizing that %t0 and %t1 will have the same +/// value in code like this: +/// %t0 = getelementptr \@a, 0, 3 +/// store i32 0, i32* %t0 +/// %t1 = getelementptr \@a, 0, 3 +/// %t2 = load i32* %t1 +/// +static bool equivalentAddressValues(Value *A, Value *B) { + // Test if the values are trivially equivalent. + if (A == B) return true; + + // Test if the values come form identical arithmetic instructions. + // This uses isIdenticalToWhenDefined instead of isIdenticalTo because + // its only used to compare two uses within the same basic block, which + // means that they'll always either have the same value or one of them + // will have an undefined value. + if (isa<BinaryOperator>(A) || + isa<CastInst>(A) || + isa<PHINode>(A) || + isa<GetElementPtrInst>(A)) + if (Instruction *BI = dyn_cast<Instruction>(B)) + if (cast<Instruction>(A)->isIdenticalToWhenDefined(BI)) + return true; + + // Otherwise they may not be equivalent. + return false; +} + +Instruction *InstCombiner::visitStoreInst(StoreInst &SI) { + Value *Val = SI.getOperand(0); + Value *Ptr = SI.getOperand(1); + + // Attempt to improve the alignment. + if (TD) { + unsigned KnownAlign = + getOrEnforceKnownAlignment(Ptr, TD->getPrefTypeAlignment(Val->getType()), + TD); + unsigned StoreAlign = SI.getAlignment(); + unsigned EffectiveStoreAlign = StoreAlign != 0 ? StoreAlign : + TD->getABITypeAlignment(Val->getType()); + + if (KnownAlign > EffectiveStoreAlign) + SI.setAlignment(KnownAlign); + else if (StoreAlign == 0) + SI.setAlignment(EffectiveStoreAlign); + } + + // Don't hack volatile/atomic stores. + // FIXME: Some bits are legal for atomic stores; needs refactoring. + if (!SI.isSimple()) return 0; + + // If the RHS is an alloca with a single use, zapify the store, making the + // alloca dead. + if (Ptr->hasOneUse()) { + if (isa<AllocaInst>(Ptr)) + return EraseInstFromFunction(SI); + if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Ptr)) { + if (isa<AllocaInst>(GEP->getOperand(0))) { + if (GEP->getOperand(0)->hasOneUse()) + return EraseInstFromFunction(SI); + } + } + } + + // Do really simple DSE, to catch cases where there are several consecutive + // stores to the same location, separated by a few arithmetic operations. This + // situation often occurs with bitfield accesses. + BasicBlock::iterator BBI = &SI; + for (unsigned ScanInsts = 6; BBI != SI.getParent()->begin() && ScanInsts; + --ScanInsts) { + --BBI; + // Don't count debug info directives, lest they affect codegen, + // and we skip pointer-to-pointer bitcasts, which are NOPs. + if (isa<DbgInfoIntrinsic>(BBI) || + (isa<BitCastInst>(BBI) && BBI->getType()->isPointerTy())) { + ScanInsts++; + continue; + } + + if (StoreInst *PrevSI = dyn_cast<StoreInst>(BBI)) { + // Prev store isn't volatile, and stores to the same location? + if (PrevSI->isSimple() && equivalentAddressValues(PrevSI->getOperand(1), + SI.getOperand(1))) { + ++NumDeadStore; + ++BBI; + EraseInstFromFunction(*PrevSI); + continue; + } + break; + } + + // If this is a load, we have to stop. However, if the loaded value is from + // the pointer we're loading and is producing the pointer we're storing, + // then *this* store is dead (X = load P; store X -> P). + if (LoadInst *LI = dyn_cast<LoadInst>(BBI)) { + if (LI == Val && equivalentAddressValues(LI->getOperand(0), Ptr) && + LI->isSimple()) + return EraseInstFromFunction(SI); + + // Otherwise, this is a load from some other location. Stores before it + // may not be dead. + break; + } + + // Don't skip over loads or things that can modify memory. + if (BBI->mayWriteToMemory() || BBI->mayReadFromMemory()) + break; + } + + // store X, null -> turns into 'unreachable' in SimplifyCFG + if (isa<ConstantPointerNull>(Ptr) && SI.getPointerAddressSpace() == 0) { + if (!isa<UndefValue>(Val)) { + SI.setOperand(0, UndefValue::get(Val->getType())); + if (Instruction *U = dyn_cast<Instruction>(Val)) + Worklist.Add(U); // Dropped a use. + } + return 0; // Do not modify these! + } + + // store undef, Ptr -> noop + if (isa<UndefValue>(Val)) + return EraseInstFromFunction(SI); + + // If the pointer destination is a cast, see if we can fold the cast into the + // source instead. + if (isa<CastInst>(Ptr)) + if (Instruction *Res = InstCombineStoreToCast(*this, SI)) + return Res; + if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Ptr)) + if (CE->isCast()) + if (Instruction *Res = InstCombineStoreToCast(*this, SI)) + return Res; + + + // If this store is the last instruction in the basic block (possibly + // excepting debug info instructions), and if the block ends with an + // unconditional branch, try to move it to the successor block. + BBI = &SI; + do { + ++BBI; + } while (isa<DbgInfoIntrinsic>(BBI) || + (isa<BitCastInst>(BBI) && BBI->getType()->isPointerTy())); + if (BranchInst *BI = dyn_cast<BranchInst>(BBI)) + if (BI->isUnconditional()) + if (SimplifyStoreAtEndOfBlock(SI)) + return 0; // xform done! + + return 0; +} + +/// SimplifyStoreAtEndOfBlock - Turn things like: +/// if () { *P = v1; } else { *P = v2 } +/// into a phi node with a store in the successor. +/// +/// Simplify things like: +/// *P = v1; if () { *P = v2; } +/// into a phi node with a store in the successor. +/// +bool InstCombiner::SimplifyStoreAtEndOfBlock(StoreInst &SI) { + BasicBlock *StoreBB = SI.getParent(); + + // Check to see if the successor block has exactly two incoming edges. If + // so, see if the other predecessor contains a store to the same location. + // if so, insert a PHI node (if needed) and move the stores down. + BasicBlock *DestBB = StoreBB->getTerminator()->getSuccessor(0); + + // Determine whether Dest has exactly two predecessors and, if so, compute + // the other predecessor. + pred_iterator PI = pred_begin(DestBB); + BasicBlock *P = *PI; + BasicBlock *OtherBB = 0; + + if (P != StoreBB) + OtherBB = P; + + if (++PI == pred_end(DestBB)) + return false; + + P = *PI; + if (P != StoreBB) { + if (OtherBB) + return false; + OtherBB = P; + } + if (++PI != pred_end(DestBB)) + return false; + + // Bail out if all the relevant blocks aren't distinct (this can happen, + // for example, if SI is in an infinite loop) + if (StoreBB == DestBB || OtherBB == DestBB) + return false; + + // Verify that the other block ends in a branch and is not otherwise empty. + BasicBlock::iterator BBI = OtherBB->getTerminator(); + BranchInst *OtherBr = dyn_cast<BranchInst>(BBI); + if (!OtherBr || BBI == OtherBB->begin()) + return false; + + // If the other block ends in an unconditional branch, check for the 'if then + // else' case. there is an instruction before the branch. + StoreInst *OtherStore = 0; + if (OtherBr->isUnconditional()) { + --BBI; + // Skip over debugging info. + while (isa<DbgInfoIntrinsic>(BBI) || + (isa<BitCastInst>(BBI) && BBI->getType()->isPointerTy())) { + if (BBI==OtherBB->begin()) + return false; + --BBI; + } + // If this isn't a store, isn't a store to the same location, or is not the + // right kind of store, bail out. + OtherStore = dyn_cast<StoreInst>(BBI); + if (!OtherStore || OtherStore->getOperand(1) != SI.getOperand(1) || + !SI.isSameOperationAs(OtherStore)) + return false; + } else { + // Otherwise, the other block ended with a conditional branch. If one of the + // destinations is StoreBB, then we have the if/then case. + if (OtherBr->getSuccessor(0) != StoreBB && + OtherBr->getSuccessor(1) != StoreBB) + return false; + + // Okay, we know that OtherBr now goes to Dest and StoreBB, so this is an + // if/then triangle. See if there is a store to the same ptr as SI that + // lives in OtherBB. + for (;; --BBI) { + // Check to see if we find the matching store. + if ((OtherStore = dyn_cast<StoreInst>(BBI))) { + if (OtherStore->getOperand(1) != SI.getOperand(1) || + !SI.isSameOperationAs(OtherStore)) + return false; + break; + } + // If we find something that may be using or overwriting the stored + // value, or if we run out of instructions, we can't do the xform. + if (BBI->mayReadFromMemory() || BBI->mayWriteToMemory() || + BBI == OtherBB->begin()) + return false; + } + + // In order to eliminate the store in OtherBr, we have to + // make sure nothing reads or overwrites the stored value in + // StoreBB. + for (BasicBlock::iterator I = StoreBB->begin(); &*I != &SI; ++I) { + // FIXME: This should really be AA driven. + if (I->mayReadFromMemory() || I->mayWriteToMemory()) + return false; + } + } + + // Insert a PHI node now if we need it. + Value *MergedVal = OtherStore->getOperand(0); + if (MergedVal != SI.getOperand(0)) { + PHINode *PN = PHINode::Create(MergedVal->getType(), 2, "storemerge"); + PN->addIncoming(SI.getOperand(0), SI.getParent()); + PN->addIncoming(OtherStore->getOperand(0), OtherBB); + MergedVal = InsertNewInstBefore(PN, DestBB->front()); + } + + // Advance to a place where it is safe to insert the new store and + // insert it. + BBI = DestBB->getFirstInsertionPt(); + StoreInst *NewSI = new StoreInst(MergedVal, SI.getOperand(1), + SI.isVolatile(), + SI.getAlignment(), + SI.getOrdering(), + SI.getSynchScope()); + InsertNewInstBefore(NewSI, *BBI); + NewSI->setDebugLoc(OtherStore->getDebugLoc()); + + // If the two stores had the same TBAA tag, preserve it. + if (MDNode *TBAATag = SI.getMetadata(LLVMContext::MD_tbaa)) + if ((TBAATag = MDNode::getMostGenericTBAA(TBAATag, + OtherStore->getMetadata(LLVMContext::MD_tbaa)))) + NewSI->setMetadata(LLVMContext::MD_tbaa, TBAATag); + + + // Nuke the old stores. + EraseInstFromFunction(SI); + EraseInstFromFunction(*OtherStore); + return true; +} diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp new file mode 100644 index 000000000000..a7595482eedb --- /dev/null +++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp @@ -0,0 +1,1259 @@ +//===- InstCombineMulDivRem.cpp -------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the visit functions for mul, fmul, sdiv, udiv, fdiv, +// srem, urem, frem. +// +//===----------------------------------------------------------------------===// + +#include "InstCombine.h" +#include "llvm/Analysis/InstructionSimplify.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/Support/PatternMatch.h" +using namespace llvm; +using namespace PatternMatch; + + +/// simplifyValueKnownNonZero - The specific integer value is used in a context +/// where it is known to be non-zero. If this allows us to simplify the +/// computation, do so and return the new operand, otherwise return null. +static Value *simplifyValueKnownNonZero(Value *V, InstCombiner &IC) { + // If V has multiple uses, then we would have to do more analysis to determine + // if this is safe. For example, the use could be in dynamically unreached + // code. + if (!V->hasOneUse()) return 0; + + bool MadeChange = false; + + // ((1 << A) >>u B) --> (1 << (A-B)) + // Because V cannot be zero, we know that B is less than A. + Value *A = 0, *B = 0, *PowerOf2 = 0; + if (match(V, m_LShr(m_OneUse(m_Shl(m_Value(PowerOf2), m_Value(A))), + m_Value(B))) && + // The "1" can be any value known to be a power of 2. + isKnownToBeAPowerOfTwo(PowerOf2)) { + A = IC.Builder->CreateSub(A, B); + return IC.Builder->CreateShl(PowerOf2, A); + } + + // (PowerOfTwo >>u B) --> isExact since shifting out the result would make it + // inexact. Similarly for <<. + if (BinaryOperator *I = dyn_cast<BinaryOperator>(V)) + if (I->isLogicalShift() && isKnownToBeAPowerOfTwo(I->getOperand(0))) { + // We know that this is an exact/nuw shift and that the input is a + // non-zero context as well. + if (Value *V2 = simplifyValueKnownNonZero(I->getOperand(0), IC)) { + I->setOperand(0, V2); + MadeChange = true; + } + + if (I->getOpcode() == Instruction::LShr && !I->isExact()) { + I->setIsExact(); + MadeChange = true; + } + + if (I->getOpcode() == Instruction::Shl && !I->hasNoUnsignedWrap()) { + I->setHasNoUnsignedWrap(); + MadeChange = true; + } + } + + // TODO: Lots more we could do here: + // If V is a phi node, we can call this on each of its operands. + // "select cond, X, 0" can simplify to "X". + + return MadeChange ? V : 0; +} + + +/// MultiplyOverflows - True if the multiply can not be expressed in an int +/// this size. +static bool MultiplyOverflows(ConstantInt *C1, ConstantInt *C2, bool sign) { + uint32_t W = C1->getBitWidth(); + APInt LHSExt = C1->getValue(), RHSExt = C2->getValue(); + if (sign) { + LHSExt = LHSExt.sext(W * 2); + RHSExt = RHSExt.sext(W * 2); + } else { + LHSExt = LHSExt.zext(W * 2); + RHSExt = RHSExt.zext(W * 2); + } + + APInt MulExt = LHSExt * RHSExt; + + if (!sign) + return MulExt.ugt(APInt::getLowBitsSet(W * 2, W)); + + APInt Min = APInt::getSignedMinValue(W).sext(W * 2); + APInt Max = APInt::getSignedMaxValue(W).sext(W * 2); + return MulExt.slt(Min) || MulExt.sgt(Max); +} + +/// \brief A helper routine of InstCombiner::visitMul(). +/// +/// If C is a vector of known powers of 2, then this function returns +/// a new vector obtained from C replacing each element with its logBase2. +/// Return a null pointer otherwise. +static Constant *getLogBase2Vector(ConstantDataVector *CV) { + const APInt *IVal; + SmallVector<Constant *, 4> Elts; + + for (unsigned I = 0, E = CV->getNumElements(); I != E; ++I) { + Constant *Elt = CV->getElementAsConstant(I); + if (!match(Elt, m_APInt(IVal)) || !IVal->isPowerOf2()) + return 0; + Elts.push_back(ConstantInt::get(Elt->getType(), IVal->logBase2())); + } + + return ConstantVector::get(Elts); +} + +Instruction *InstCombiner::visitMul(BinaryOperator &I) { + bool Changed = SimplifyAssociativeOrCommutative(I); + Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); + + if (Value *V = SimplifyMulInst(Op0, Op1, TD)) + return ReplaceInstUsesWith(I, V); + + if (Value *V = SimplifyUsingDistributiveLaws(I)) + return ReplaceInstUsesWith(I, V); + + if (match(Op1, m_AllOnes())) // X * -1 == 0 - X + return BinaryOperator::CreateNeg(Op0, I.getName()); + + // Also allow combining multiply instructions on vectors. + { + Value *NewOp; + Constant *C1, *C2; + const APInt *IVal; + if (match(&I, m_Mul(m_Shl(m_Value(NewOp), m_Constant(C2)), + m_Constant(C1))) && + match(C1, m_APInt(IVal))) + // ((X << C1)*C2) == (X * (C2 << C1)) + return BinaryOperator::CreateMul(NewOp, ConstantExpr::getShl(C1, C2)); + + if (match(&I, m_Mul(m_Value(NewOp), m_Constant(C1)))) { + Constant *NewCst = 0; + if (match(C1, m_APInt(IVal)) && IVal->isPowerOf2()) + // Replace X*(2^C) with X << C, where C is either a scalar or a splat. + NewCst = ConstantInt::get(NewOp->getType(), IVal->logBase2()); + else if (ConstantDataVector *CV = dyn_cast<ConstantDataVector>(C1)) + // Replace X*(2^C) with X << C, where C is a vector of known + // constant powers of 2. + NewCst = getLogBase2Vector(CV); + + if (NewCst) { + BinaryOperator *Shl = BinaryOperator::CreateShl(NewOp, NewCst); + if (I.hasNoSignedWrap()) Shl->setHasNoSignedWrap(); + if (I.hasNoUnsignedWrap()) Shl->setHasNoUnsignedWrap(); + return Shl; + } + } + } + + if (ConstantInt *CI = dyn_cast<ConstantInt>(Op1)) { + // Canonicalize (X+C1)*CI -> X*CI+C1*CI. + { Value *X; ConstantInt *C1; + if (Op0->hasOneUse() && + match(Op0, m_Add(m_Value(X), m_ConstantInt(C1)))) { + Value *Add = Builder->CreateMul(X, CI); + return BinaryOperator::CreateAdd(Add, Builder->CreateMul(C1, CI)); + } + } + + // (Y - X) * (-(2**n)) -> (X - Y) * (2**n), for positive nonzero n + // (Y + const) * (-(2**n)) -> (-constY) * (2**n), for positive nonzero n + // The "* (2**n)" thus becomes a potential shifting opportunity. + { + const APInt & Val = CI->getValue(); + const APInt &PosVal = Val.abs(); + if (Val.isNegative() && PosVal.isPowerOf2()) { + Value *X = 0, *Y = 0; + if (Op0->hasOneUse()) { + ConstantInt *C1; + Value *Sub = 0; + if (match(Op0, m_Sub(m_Value(Y), m_Value(X)))) + Sub = Builder->CreateSub(X, Y, "suba"); + else if (match(Op0, m_Add(m_Value(Y), m_ConstantInt(C1)))) + Sub = Builder->CreateSub(Builder->CreateNeg(C1), Y, "subc"); + if (Sub) + return + BinaryOperator::CreateMul(Sub, + ConstantInt::get(Y->getType(), PosVal)); + } + } + } + } + + // Simplify mul instructions with a constant RHS. + if (isa<Constant>(Op1)) { + // Try to fold constant mul into select arguments. + if (SelectInst *SI = dyn_cast<SelectInst>(Op0)) + if (Instruction *R = FoldOpIntoSelect(I, SI)) + return R; + + if (isa<PHINode>(Op0)) + if (Instruction *NV = FoldOpIntoPhi(I)) + return NV; + } + + if (Value *Op0v = dyn_castNegVal(Op0)) // -X * -Y = X*Y + if (Value *Op1v = dyn_castNegVal(Op1)) + return BinaryOperator::CreateMul(Op0v, Op1v); + + // (X / Y) * Y = X - (X % Y) + // (X / Y) * -Y = (X % Y) - X + { + Value *Op1C = Op1; + BinaryOperator *BO = dyn_cast<BinaryOperator>(Op0); + if (!BO || + (BO->getOpcode() != Instruction::UDiv && + BO->getOpcode() != Instruction::SDiv)) { + Op1C = Op0; + BO = dyn_cast<BinaryOperator>(Op1); + } + Value *Neg = dyn_castNegVal(Op1C); + if (BO && BO->hasOneUse() && + (BO->getOperand(1) == Op1C || BO->getOperand(1) == Neg) && + (BO->getOpcode() == Instruction::UDiv || + BO->getOpcode() == Instruction::SDiv)) { + Value *Op0BO = BO->getOperand(0), *Op1BO = BO->getOperand(1); + + // If the division is exact, X % Y is zero, so we end up with X or -X. + if (PossiblyExactOperator *SDiv = dyn_cast<PossiblyExactOperator>(BO)) + if (SDiv->isExact()) { + if (Op1BO == Op1C) + return ReplaceInstUsesWith(I, Op0BO); + return BinaryOperator::CreateNeg(Op0BO); + } + + Value *Rem; + if (BO->getOpcode() == Instruction::UDiv) + Rem = Builder->CreateURem(Op0BO, Op1BO); + else + Rem = Builder->CreateSRem(Op0BO, Op1BO); + Rem->takeName(BO); + + if (Op1BO == Op1C) + return BinaryOperator::CreateSub(Op0BO, Rem); + return BinaryOperator::CreateSub(Rem, Op0BO); + } + } + + /// i1 mul -> i1 and. + if (I.getType()->isIntegerTy(1)) + return BinaryOperator::CreateAnd(Op0, Op1); + + // X*(1 << Y) --> X << Y + // (1 << Y)*X --> X << Y + { + Value *Y; + if (match(Op0, m_Shl(m_One(), m_Value(Y)))) + return BinaryOperator::CreateShl(Op1, Y); + if (match(Op1, m_Shl(m_One(), m_Value(Y)))) + return BinaryOperator::CreateShl(Op0, Y); + } + + // If one of the operands of the multiply is a cast from a boolean value, then + // we know the bool is either zero or one, so this is a 'masking' multiply. + // X * Y (where Y is 0 or 1) -> X & (0-Y) + if (!I.getType()->isVectorTy()) { + // -2 is "-1 << 1" so it is all bits set except the low one. + APInt Negative2(I.getType()->getPrimitiveSizeInBits(), (uint64_t)-2, true); + + Value *BoolCast = 0, *OtherOp = 0; + if (MaskedValueIsZero(Op0, Negative2)) + BoolCast = Op0, OtherOp = Op1; + else if (MaskedValueIsZero(Op1, Negative2)) + BoolCast = Op1, OtherOp = Op0; + + if (BoolCast) { + Value *V = Builder->CreateSub(Constant::getNullValue(I.getType()), + BoolCast); + return BinaryOperator::CreateAnd(V, OtherOp); + } + } + + return Changed ? &I : 0; +} + +// +// Detect pattern: +// +// log2(Y*0.5) +// +// And check for corresponding fast math flags +// + +static void detectLog2OfHalf(Value *&Op, Value *&Y, IntrinsicInst *&Log2) { + + if (!Op->hasOneUse()) + return; + + IntrinsicInst *II = dyn_cast<IntrinsicInst>(Op); + if (!II) + return; + if (II->getIntrinsicID() != Intrinsic::log2 || !II->hasUnsafeAlgebra()) + return; + Log2 = II; + + Value *OpLog2Of = II->getArgOperand(0); + if (!OpLog2Of->hasOneUse()) + return; + + Instruction *I = dyn_cast<Instruction>(OpLog2Of); + if (!I) + return; + if (I->getOpcode() != Instruction::FMul || !I->hasUnsafeAlgebra()) + return; + + ConstantFP *CFP = dyn_cast<ConstantFP>(I->getOperand(0)); + if (CFP && CFP->isExactlyValue(0.5)) { + Y = I->getOperand(1); + return; + } + CFP = dyn_cast<ConstantFP>(I->getOperand(1)); + if (CFP && CFP->isExactlyValue(0.5)) + Y = I->getOperand(0); +} + +/// Helper function of InstCombiner::visitFMul(BinaryOperator(). It returns +/// true iff the given value is FMul or FDiv with one and only one operand +/// being a normal constant (i.e. not Zero/NaN/Infinity). +static bool isFMulOrFDivWithConstant(Value *V) { + Instruction *I = dyn_cast<Instruction>(V); + if (!I || (I->getOpcode() != Instruction::FMul && + I->getOpcode() != Instruction::FDiv)) + return false; + + ConstantFP *C0 = dyn_cast<ConstantFP>(I->getOperand(0)); + ConstantFP *C1 = dyn_cast<ConstantFP>(I->getOperand(1)); + + if (C0 && C1) + return false; + + return (C0 && C0->getValueAPF().isFiniteNonZero()) || + (C1 && C1->getValueAPF().isFiniteNonZero()); +} + +static bool isNormalFp(const ConstantFP *C) { + const APFloat &Flt = C->getValueAPF(); + return Flt.isNormal(); +} + +/// foldFMulConst() is a helper routine of InstCombiner::visitFMul(). +/// The input \p FMulOrDiv is a FMul/FDiv with one and only one operand +/// being a constant (i.e. isFMulOrFDivWithConstant(FMulOrDiv) == true). +/// This function is to simplify "FMulOrDiv * C" and returns the +/// resulting expression. Note that this function could return NULL in +/// case the constants cannot be folded into a normal floating-point. +/// +Value *InstCombiner::foldFMulConst(Instruction *FMulOrDiv, ConstantFP *C, + Instruction *InsertBefore) { + assert(isFMulOrFDivWithConstant(FMulOrDiv) && "V is invalid"); + + Value *Opnd0 = FMulOrDiv->getOperand(0); + Value *Opnd1 = FMulOrDiv->getOperand(1); + + ConstantFP *C0 = dyn_cast<ConstantFP>(Opnd0); + ConstantFP *C1 = dyn_cast<ConstantFP>(Opnd1); + + BinaryOperator *R = 0; + + // (X * C0) * C => X * (C0*C) + if (FMulOrDiv->getOpcode() == Instruction::FMul) { + Constant *F = ConstantExpr::getFMul(C1 ? C1 : C0, C); + if (isNormalFp(cast<ConstantFP>(F))) + R = BinaryOperator::CreateFMul(C1 ? Opnd0 : Opnd1, F); + } else { + if (C0) { + // (C0 / X) * C => (C0 * C) / X + if (FMulOrDiv->hasOneUse()) { + // It would otherwise introduce another div. + ConstantFP *F = cast<ConstantFP>(ConstantExpr::getFMul(C0, C)); + if (isNormalFp(F)) + R = BinaryOperator::CreateFDiv(F, Opnd1); + } + } else { + // (X / C1) * C => X * (C/C1) if C/C1 is not a denormal + ConstantFP *F = cast<ConstantFP>(ConstantExpr::getFDiv(C, C1)); + if (isNormalFp(F)) { + R = BinaryOperator::CreateFMul(Opnd0, F); + } else { + // (X / C1) * C => X / (C1/C) + Constant *F = ConstantExpr::getFDiv(C1, C); + if (isNormalFp(cast<ConstantFP>(F))) + R = BinaryOperator::CreateFDiv(Opnd0, F); + } + } + } + + if (R) { + R->setHasUnsafeAlgebra(true); + InsertNewInstWith(R, *InsertBefore); + } + + return R; +} + +Instruction *InstCombiner::visitFMul(BinaryOperator &I) { + bool Changed = SimplifyAssociativeOrCommutative(I); + Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); + + if (isa<Constant>(Op0)) + std::swap(Op0, Op1); + + if (Value *V = SimplifyFMulInst(Op0, Op1, I.getFastMathFlags(), TD)) + return ReplaceInstUsesWith(I, V); + + bool AllowReassociate = I.hasUnsafeAlgebra(); + + // Simplify mul instructions with a constant RHS. + if (isa<Constant>(Op1)) { + // Try to fold constant mul into select arguments. + if (SelectInst *SI = dyn_cast<SelectInst>(Op0)) + if (Instruction *R = FoldOpIntoSelect(I, SI)) + return R; + + if (isa<PHINode>(Op0)) + if (Instruction *NV = FoldOpIntoPhi(I)) + return NV; + + ConstantFP *C = dyn_cast<ConstantFP>(Op1); + if (C && AllowReassociate && C->getValueAPF().isFiniteNonZero()) { + // Let MDC denote an expression in one of these forms: + // X * C, C/X, X/C, where C is a constant. + // + // Try to simplify "MDC * Constant" + if (isFMulOrFDivWithConstant(Op0)) { + Value *V = foldFMulConst(cast<Instruction>(Op0), C, &I); + if (V) + return ReplaceInstUsesWith(I, V); + } + + // (MDC +/- C1) * C => (MDC * C) +/- (C1 * C) + Instruction *FAddSub = dyn_cast<Instruction>(Op0); + if (FAddSub && + (FAddSub->getOpcode() == Instruction::FAdd || + FAddSub->getOpcode() == Instruction::FSub)) { + Value *Opnd0 = FAddSub->getOperand(0); + Value *Opnd1 = FAddSub->getOperand(1); + ConstantFP *C0 = dyn_cast<ConstantFP>(Opnd0); + ConstantFP *C1 = dyn_cast<ConstantFP>(Opnd1); + bool Swap = false; + if (C0) { + std::swap(C0, C1); + std::swap(Opnd0, Opnd1); + Swap = true; + } + + if (C1 && C1->getValueAPF().isFiniteNonZero() && + isFMulOrFDivWithConstant(Opnd0)) { + Value *M1 = ConstantExpr::getFMul(C1, C); + Value *M0 = isNormalFp(cast<ConstantFP>(M1)) ? + foldFMulConst(cast<Instruction>(Opnd0), C, &I) : + 0; + if (M0 && M1) { + if (Swap && FAddSub->getOpcode() == Instruction::FSub) + std::swap(M0, M1); + + Instruction *RI = (FAddSub->getOpcode() == Instruction::FAdd) + ? BinaryOperator::CreateFAdd(M0, M1) + : BinaryOperator::CreateFSub(M0, M1); + RI->copyFastMathFlags(&I); + return RI; + } + } + } + } + } + + + // Under unsafe algebra do: + // X * log2(0.5*Y) = X*log2(Y) - X + if (I.hasUnsafeAlgebra()) { + Value *OpX = NULL; + Value *OpY = NULL; + IntrinsicInst *Log2; + detectLog2OfHalf(Op0, OpY, Log2); + if (OpY) { + OpX = Op1; + } else { + detectLog2OfHalf(Op1, OpY, Log2); + if (OpY) { + OpX = Op0; + } + } + // if pattern detected emit alternate sequence + if (OpX && OpY) { + BuilderTy::FastMathFlagGuard Guard(*Builder); + Builder->SetFastMathFlags(Log2->getFastMathFlags()); + Log2->setArgOperand(0, OpY); + Value *FMulVal = Builder->CreateFMul(OpX, Log2); + Value *FSub = Builder->CreateFSub(FMulVal, OpX); + FSub->takeName(&I); + return ReplaceInstUsesWith(I, FSub); + } + } + + // Handle symmetric situation in a 2-iteration loop + Value *Opnd0 = Op0; + Value *Opnd1 = Op1; + for (int i = 0; i < 2; i++) { + bool IgnoreZeroSign = I.hasNoSignedZeros(); + if (BinaryOperator::isFNeg(Opnd0, IgnoreZeroSign)) { + BuilderTy::FastMathFlagGuard Guard(*Builder); + Builder->SetFastMathFlags(I.getFastMathFlags()); + + Value *N0 = dyn_castFNegVal(Opnd0, IgnoreZeroSign); + Value *N1 = dyn_castFNegVal(Opnd1, IgnoreZeroSign); + + // -X * -Y => X*Y + if (N1) + return BinaryOperator::CreateFMul(N0, N1); + + if (Opnd0->hasOneUse()) { + // -X * Y => -(X*Y) (Promote negation as high as possible) + Value *T = Builder->CreateFMul(N0, Opnd1); + Value *Neg = Builder->CreateFNeg(T); + Neg->takeName(&I); + return ReplaceInstUsesWith(I, Neg); + } + } + + // (X*Y) * X => (X*X) * Y where Y != X + // The purpose is two-fold: + // 1) to form a power expression (of X). + // 2) potentially shorten the critical path: After transformation, the + // latency of the instruction Y is amortized by the expression of X*X, + // and therefore Y is in a "less critical" position compared to what it + // was before the transformation. + // + if (AllowReassociate) { + Value *Opnd0_0, *Opnd0_1; + if (Opnd0->hasOneUse() && + match(Opnd0, m_FMul(m_Value(Opnd0_0), m_Value(Opnd0_1)))) { + Value *Y = 0; + if (Opnd0_0 == Opnd1 && Opnd0_1 != Opnd1) + Y = Opnd0_1; + else if (Opnd0_1 == Opnd1 && Opnd0_0 != Opnd1) + Y = Opnd0_0; + + if (Y) { + BuilderTy::FastMathFlagGuard Guard(*Builder); + Builder->SetFastMathFlags(I.getFastMathFlags()); + Value *T = Builder->CreateFMul(Opnd1, Opnd1); + + Value *R = Builder->CreateFMul(T, Y); + R->takeName(&I); + return ReplaceInstUsesWith(I, R); + } + } + } + + // B * (uitofp i1 C) -> select C, B, 0 + if (I.hasNoNaNs() && I.hasNoInfs() && I.hasNoSignedZeros()) { + Value *LHS = Op0, *RHS = Op1; + Value *B, *C; + if (!match(RHS, m_UIToFP(m_Value(C)))) + std::swap(LHS, RHS); + + if (match(RHS, m_UIToFP(m_Value(C))) && C->getType()->isIntegerTy(1)) { + B = LHS; + Value *Zero = ConstantFP::getNegativeZero(B->getType()); + return SelectInst::Create(C, B, Zero); + } + } + + // A * (1 - uitofp i1 C) -> select C, 0, A + if (I.hasNoNaNs() && I.hasNoInfs() && I.hasNoSignedZeros()) { + Value *LHS = Op0, *RHS = Op1; + Value *A, *C; + if (!match(RHS, m_FSub(m_FPOne(), m_UIToFP(m_Value(C))))) + std::swap(LHS, RHS); + + if (match(RHS, m_FSub(m_FPOne(), m_UIToFP(m_Value(C)))) && + C->getType()->isIntegerTy(1)) { + A = LHS; + Value *Zero = ConstantFP::getNegativeZero(A->getType()); + return SelectInst::Create(C, Zero, A); + } + } + + if (!isa<Constant>(Op1)) + std::swap(Opnd0, Opnd1); + else + break; + } + + return Changed ? &I : 0; +} + +/// SimplifyDivRemOfSelect - Try to fold a divide or remainder of a select +/// instruction. +bool InstCombiner::SimplifyDivRemOfSelect(BinaryOperator &I) { + SelectInst *SI = cast<SelectInst>(I.getOperand(1)); + + // div/rem X, (Cond ? 0 : Y) -> div/rem X, Y + int NonNullOperand = -1; + if (Constant *ST = dyn_cast<Constant>(SI->getOperand(1))) + if (ST->isNullValue()) + NonNullOperand = 2; + // div/rem X, (Cond ? Y : 0) -> div/rem X, Y + if (Constant *ST = dyn_cast<Constant>(SI->getOperand(2))) + if (ST->isNullValue()) + NonNullOperand = 1; + + if (NonNullOperand == -1) + return false; + + Value *SelectCond = SI->getOperand(0); + + // Change the div/rem to use 'Y' instead of the select. + I.setOperand(1, SI->getOperand(NonNullOperand)); + + // Okay, we know we replace the operand of the div/rem with 'Y' with no + // problem. However, the select, or the condition of the select may have + // multiple uses. Based on our knowledge that the operand must be non-zero, + // propagate the known value for the select into other uses of it, and + // propagate a known value of the condition into its other users. + + // If the select and condition only have a single use, don't bother with this, + // early exit. + if (SI->use_empty() && SelectCond->hasOneUse()) + return true; + + // Scan the current block backward, looking for other uses of SI. + BasicBlock::iterator BBI = &I, BBFront = I.getParent()->begin(); + + while (BBI != BBFront) { + --BBI; + // If we found a call to a function, we can't assume it will return, so + // information from below it cannot be propagated above it. + if (isa<CallInst>(BBI) && !isa<IntrinsicInst>(BBI)) + break; + + // Replace uses of the select or its condition with the known values. + for (Instruction::op_iterator I = BBI->op_begin(), E = BBI->op_end(); + I != E; ++I) { + if (*I == SI) { + *I = SI->getOperand(NonNullOperand); + Worklist.Add(BBI); + } else if (*I == SelectCond) { + *I = Builder->getInt1(NonNullOperand == 1); + Worklist.Add(BBI); + } + } + + // If we past the instruction, quit looking for it. + if (&*BBI == SI) + SI = 0; + if (&*BBI == SelectCond) + SelectCond = 0; + + // If we ran out of things to eliminate, break out of the loop. + if (SelectCond == 0 && SI == 0) + break; + + } + return true; +} + + +/// This function implements the transforms common to both integer division +/// instructions (udiv and sdiv). It is called by the visitors to those integer +/// division instructions. +/// @brief Common integer divide transforms +Instruction *InstCombiner::commonIDivTransforms(BinaryOperator &I) { + Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); + + // The RHS is known non-zero. + if (Value *V = simplifyValueKnownNonZero(I.getOperand(1), *this)) { + I.setOperand(1, V); + return &I; + } + + // Handle cases involving: [su]div X, (select Cond, Y, Z) + // This does not apply for fdiv. + if (isa<SelectInst>(Op1) && SimplifyDivRemOfSelect(I)) + return &I; + + if (ConstantInt *RHS = dyn_cast<ConstantInt>(Op1)) { + // (X / C1) / C2 -> X / (C1*C2) + if (Instruction *LHS = dyn_cast<Instruction>(Op0)) + if (Instruction::BinaryOps(LHS->getOpcode()) == I.getOpcode()) + if (ConstantInt *LHSRHS = dyn_cast<ConstantInt>(LHS->getOperand(1))) { + if (MultiplyOverflows(RHS, LHSRHS, + I.getOpcode()==Instruction::SDiv)) + return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType())); + return BinaryOperator::Create(I.getOpcode(), LHS->getOperand(0), + ConstantExpr::getMul(RHS, LHSRHS)); + } + + if (!RHS->isZero()) { // avoid X udiv 0 + if (SelectInst *SI = dyn_cast<SelectInst>(Op0)) + if (Instruction *R = FoldOpIntoSelect(I, SI)) + return R; + if (isa<PHINode>(Op0)) + if (Instruction *NV = FoldOpIntoPhi(I)) + return NV; + } + } + + // See if we can fold away this div instruction. + if (SimplifyDemandedInstructionBits(I)) + return &I; + + // (X - (X rem Y)) / Y -> X / Y; usually originates as ((X / Y) * Y) / Y + Value *X = 0, *Z = 0; + if (match(Op0, m_Sub(m_Value(X), m_Value(Z)))) { // (X - Z) / Y; Y = Op1 + bool isSigned = I.getOpcode() == Instruction::SDiv; + if ((isSigned && match(Z, m_SRem(m_Specific(X), m_Specific(Op1)))) || + (!isSigned && match(Z, m_URem(m_Specific(X), m_Specific(Op1))))) + return BinaryOperator::Create(I.getOpcode(), X, Op1); + } + + return 0; +} + +/// dyn_castZExtVal - Checks if V is a zext or constant that can +/// be truncated to Ty without losing bits. +static Value *dyn_castZExtVal(Value *V, Type *Ty) { + if (ZExtInst *Z = dyn_cast<ZExtInst>(V)) { + if (Z->getSrcTy() == Ty) + return Z->getOperand(0); + } else if (ConstantInt *C = dyn_cast<ConstantInt>(V)) { + if (C->getValue().getActiveBits() <= cast<IntegerType>(Ty)->getBitWidth()) + return ConstantExpr::getTrunc(C, Ty); + } + return 0; +} + +namespace { +const unsigned MaxDepth = 6; +typedef Instruction *(*FoldUDivOperandCb)(Value *Op0, Value *Op1, + const BinaryOperator &I, + InstCombiner &IC); + +/// \brief Used to maintain state for visitUDivOperand(). +struct UDivFoldAction { + FoldUDivOperandCb FoldAction; ///< Informs visitUDiv() how to fold this + ///< operand. This can be zero if this action + ///< joins two actions together. + + Value *OperandToFold; ///< Which operand to fold. + union { + Instruction *FoldResult; ///< The instruction returned when FoldAction is + ///< invoked. + + size_t SelectLHSIdx; ///< Stores the LHS action index if this action + ///< joins two actions together. + }; + + UDivFoldAction(FoldUDivOperandCb FA, Value *InputOperand) + : FoldAction(FA), OperandToFold(InputOperand), FoldResult(0) {} + UDivFoldAction(FoldUDivOperandCb FA, Value *InputOperand, size_t SLHS) + : FoldAction(FA), OperandToFold(InputOperand), SelectLHSIdx(SLHS) {} +}; +} + +// X udiv 2^C -> X >> C +static Instruction *foldUDivPow2Cst(Value *Op0, Value *Op1, + const BinaryOperator &I, InstCombiner &IC) { + const APInt &C = cast<Constant>(Op1)->getUniqueInteger(); + BinaryOperator *LShr = BinaryOperator::CreateLShr( + Op0, ConstantInt::get(Op0->getType(), C.logBase2())); + if (I.isExact()) LShr->setIsExact(); + return LShr; +} + +// X udiv C, where C >= signbit +static Instruction *foldUDivNegCst(Value *Op0, Value *Op1, + const BinaryOperator &I, InstCombiner &IC) { + Value *ICI = IC.Builder->CreateICmpULT(Op0, cast<ConstantInt>(Op1)); + + return SelectInst::Create(ICI, Constant::getNullValue(I.getType()), + ConstantInt::get(I.getType(), 1)); +} + +// X udiv (C1 << N), where C1 is "1<<C2" --> X >> (N+C2) +static Instruction *foldUDivShl(Value *Op0, Value *Op1, const BinaryOperator &I, + InstCombiner &IC) { + Instruction *ShiftLeft = cast<Instruction>(Op1); + if (isa<ZExtInst>(ShiftLeft)) + ShiftLeft = cast<Instruction>(ShiftLeft->getOperand(0)); + + const APInt &CI = + cast<Constant>(ShiftLeft->getOperand(0))->getUniqueInteger(); + Value *N = ShiftLeft->getOperand(1); + if (CI != 1) + N = IC.Builder->CreateAdd(N, ConstantInt::get(N->getType(), CI.logBase2())); + if (ZExtInst *Z = dyn_cast<ZExtInst>(Op1)) + N = IC.Builder->CreateZExt(N, Z->getDestTy()); + BinaryOperator *LShr = BinaryOperator::CreateLShr(Op0, N); + if (I.isExact()) LShr->setIsExact(); + return LShr; +} + +// \brief Recursively visits the possible right hand operands of a udiv +// instruction, seeing through select instructions, to determine if we can +// replace the udiv with something simpler. If we find that an operand is not +// able to simplify the udiv, we abort the entire transformation. +static size_t visitUDivOperand(Value *Op0, Value *Op1, const BinaryOperator &I, + SmallVectorImpl<UDivFoldAction> &Actions, + unsigned Depth = 0) { + // Check to see if this is an unsigned division with an exact power of 2, + // if so, convert to a right shift. + if (match(Op1, m_Power2())) { + Actions.push_back(UDivFoldAction(foldUDivPow2Cst, Op1)); + return Actions.size(); + } + + if (ConstantInt *C = dyn_cast<ConstantInt>(Op1)) + // X udiv C, where C >= signbit + if (C->getValue().isNegative()) { + Actions.push_back(UDivFoldAction(foldUDivNegCst, C)); + return Actions.size(); + } + + // X udiv (C1 << N), where C1 is "1<<C2" --> X >> (N+C2) + if (match(Op1, m_Shl(m_Power2(), m_Value())) || + match(Op1, m_ZExt(m_Shl(m_Power2(), m_Value())))) { + Actions.push_back(UDivFoldAction(foldUDivShl, Op1)); + return Actions.size(); + } + + // The remaining tests are all recursive, so bail out if we hit the limit. + if (Depth++ == MaxDepth) + return 0; + + if (SelectInst *SI = dyn_cast<SelectInst>(Op1)) + if (size_t LHSIdx = visitUDivOperand(Op0, SI->getOperand(1), I, Actions)) + if (visitUDivOperand(Op0, SI->getOperand(2), I, Actions)) { + Actions.push_back(UDivFoldAction((FoldUDivOperandCb)0, Op1, LHSIdx-1)); + return Actions.size(); + } + + return 0; +} + +Instruction *InstCombiner::visitUDiv(BinaryOperator &I) { + Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); + + if (Value *V = SimplifyUDivInst(Op0, Op1, TD)) + return ReplaceInstUsesWith(I, V); + + // Handle the integer div common cases + if (Instruction *Common = commonIDivTransforms(I)) + return Common; + + // (x lshr C1) udiv C2 --> x udiv (C2 << C1) + if (ConstantInt *C2 = dyn_cast<ConstantInt>(Op1)) { + Value *X; + ConstantInt *C1; + if (match(Op0, m_LShr(m_Value(X), m_ConstantInt(C1)))) { + APInt NC = C2->getValue().shl(C1->getLimitedValue(C1->getBitWidth()-1)); + return BinaryOperator::CreateUDiv(X, Builder->getInt(NC)); + } + } + + // (zext A) udiv (zext B) --> zext (A udiv B) + if (ZExtInst *ZOp0 = dyn_cast<ZExtInst>(Op0)) + if (Value *ZOp1 = dyn_castZExtVal(Op1, ZOp0->getSrcTy())) + return new ZExtInst(Builder->CreateUDiv(ZOp0->getOperand(0), ZOp1, "div", + I.isExact()), + I.getType()); + + // (LHS udiv (select (select (...)))) -> (LHS >> (select (select (...)))) + SmallVector<UDivFoldAction, 6> UDivActions; + if (visitUDivOperand(Op0, Op1, I, UDivActions)) + for (unsigned i = 0, e = UDivActions.size(); i != e; ++i) { + FoldUDivOperandCb Action = UDivActions[i].FoldAction; + Value *ActionOp1 = UDivActions[i].OperandToFold; + Instruction *Inst; + if (Action) + Inst = Action(Op0, ActionOp1, I, *this); + else { + // This action joins two actions together. The RHS of this action is + // simply the last action we processed, we saved the LHS action index in + // the joining action. + size_t SelectRHSIdx = i - 1; + Value *SelectRHS = UDivActions[SelectRHSIdx].FoldResult; + size_t SelectLHSIdx = UDivActions[i].SelectLHSIdx; + Value *SelectLHS = UDivActions[SelectLHSIdx].FoldResult; + Inst = SelectInst::Create(cast<SelectInst>(ActionOp1)->getCondition(), + SelectLHS, SelectRHS); + } + + // If this is the last action to process, return it to the InstCombiner. + // Otherwise, we insert it before the UDiv and record it so that we may + // use it as part of a joining action (i.e., a SelectInst). + if (e - i != 1) { + Inst->insertBefore(&I); + UDivActions[i].FoldResult = Inst; + } else + return Inst; + } + + return 0; +} + +Instruction *InstCombiner::visitSDiv(BinaryOperator &I) { + Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); + + if (Value *V = SimplifySDivInst(Op0, Op1, TD)) + return ReplaceInstUsesWith(I, V); + + // Handle the integer div common cases + if (Instruction *Common = commonIDivTransforms(I)) + return Common; + + if (ConstantInt *RHS = dyn_cast<ConstantInt>(Op1)) { + // sdiv X, -1 == -X + if (RHS->isAllOnesValue()) + return BinaryOperator::CreateNeg(Op0); + + // sdiv X, C --> ashr exact X, log2(C) + if (I.isExact() && RHS->getValue().isNonNegative() && + RHS->getValue().isPowerOf2()) { + Value *ShAmt = llvm::ConstantInt::get(RHS->getType(), + RHS->getValue().exactLogBase2()); + return BinaryOperator::CreateExactAShr(Op0, ShAmt, I.getName()); + } + + // -X/C --> X/-C provided the negation doesn't overflow. + if (SubOperator *Sub = dyn_cast<SubOperator>(Op0)) + if (match(Sub->getOperand(0), m_Zero()) && Sub->hasNoSignedWrap()) + return BinaryOperator::CreateSDiv(Sub->getOperand(1), + ConstantExpr::getNeg(RHS)); + } + + // If the sign bits of both operands are zero (i.e. we can prove they are + // unsigned inputs), turn this into a udiv. + if (I.getType()->isIntegerTy()) { + APInt Mask(APInt::getSignBit(I.getType()->getPrimitiveSizeInBits())); + if (MaskedValueIsZero(Op0, Mask)) { + if (MaskedValueIsZero(Op1, Mask)) { + // X sdiv Y -> X udiv Y, iff X and Y don't have sign bit set + return BinaryOperator::CreateUDiv(Op0, Op1, I.getName()); + } + + if (match(Op1, m_Shl(m_Power2(), m_Value()))) { + // X sdiv (1 << Y) -> X udiv (1 << Y) ( -> X u>> Y) + // Safe because the only negative value (1 << Y) can take on is + // INT_MIN, and X sdiv INT_MIN == X udiv INT_MIN == 0 if X doesn't have + // the sign bit set. + return BinaryOperator::CreateUDiv(Op0, Op1, I.getName()); + } + } + } + + return 0; +} + +/// CvtFDivConstToReciprocal tries to convert X/C into X*1/C if C not a special +/// FP value and: +/// 1) 1/C is exact, or +/// 2) reciprocal is allowed. +/// If the conversion was successful, the simplified expression "X * 1/C" is +/// returned; otherwise, NULL is returned. +/// +static Instruction *CvtFDivConstToReciprocal(Value *Dividend, + ConstantFP *Divisor, + bool AllowReciprocal) { + const APFloat &FpVal = Divisor->getValueAPF(); + APFloat Reciprocal(FpVal.getSemantics()); + bool Cvt = FpVal.getExactInverse(&Reciprocal); + + if (!Cvt && AllowReciprocal && FpVal.isFiniteNonZero()) { + Reciprocal = APFloat(FpVal.getSemantics(), 1.0f); + (void)Reciprocal.divide(FpVal, APFloat::rmNearestTiesToEven); + Cvt = !Reciprocal.isDenormal(); + } + + if (!Cvt) + return 0; + + ConstantFP *R; + R = ConstantFP::get(Dividend->getType()->getContext(), Reciprocal); + return BinaryOperator::CreateFMul(Dividend, R); +} + +Instruction *InstCombiner::visitFDiv(BinaryOperator &I) { + Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); + + if (Value *V = SimplifyFDivInst(Op0, Op1, TD)) + return ReplaceInstUsesWith(I, V); + + if (isa<Constant>(Op0)) + if (SelectInst *SI = dyn_cast<SelectInst>(Op1)) + if (Instruction *R = FoldOpIntoSelect(I, SI)) + return R; + + bool AllowReassociate = I.hasUnsafeAlgebra(); + bool AllowReciprocal = I.hasAllowReciprocal(); + + if (ConstantFP *Op1C = dyn_cast<ConstantFP>(Op1)) { + if (SelectInst *SI = dyn_cast<SelectInst>(Op0)) + if (Instruction *R = FoldOpIntoSelect(I, SI)) + return R; + + if (AllowReassociate) { + ConstantFP *C1 = 0; + ConstantFP *C2 = Op1C; + Value *X; + Instruction *Res = 0; + + if (match(Op0, m_FMul(m_Value(X), m_ConstantFP(C1)))) { + // (X*C1)/C2 => X * (C1/C2) + // + Constant *C = ConstantExpr::getFDiv(C1, C2); + const APFloat &F = cast<ConstantFP>(C)->getValueAPF(); + if (F.isNormal()) + Res = BinaryOperator::CreateFMul(X, C); + } else if (match(Op0, m_FDiv(m_Value(X), m_ConstantFP(C1)))) { + // (X/C1)/C2 => X /(C2*C1) [=> X * 1/(C2*C1) if reciprocal is allowed] + // + Constant *C = ConstantExpr::getFMul(C1, C2); + const APFloat &F = cast<ConstantFP>(C)->getValueAPF(); + if (F.isNormal()) { + Res = CvtFDivConstToReciprocal(X, cast<ConstantFP>(C), + AllowReciprocal); + if (!Res) + Res = BinaryOperator::CreateFDiv(X, C); + } + } + + if (Res) { + Res->setFastMathFlags(I.getFastMathFlags()); + return Res; + } + } + + // X / C => X * 1/C + if (Instruction *T = CvtFDivConstToReciprocal(Op0, Op1C, AllowReciprocal)) + return T; + + return 0; + } + + if (AllowReassociate && isa<ConstantFP>(Op0)) { + ConstantFP *C1 = cast<ConstantFP>(Op0), *C2; + Constant *Fold = 0; + Value *X; + bool CreateDiv = true; + + // C1 / (X*C2) => (C1/C2) / X + if (match(Op1, m_FMul(m_Value(X), m_ConstantFP(C2)))) + Fold = ConstantExpr::getFDiv(C1, C2); + else if (match(Op1, m_FDiv(m_Value(X), m_ConstantFP(C2)))) { + // C1 / (X/C2) => (C1*C2) / X + Fold = ConstantExpr::getFMul(C1, C2); + } else if (match(Op1, m_FDiv(m_ConstantFP(C2), m_Value(X)))) { + // C1 / (C2/X) => (C1/C2) * X + Fold = ConstantExpr::getFDiv(C1, C2); + CreateDiv = false; + } + + if (Fold) { + const APFloat &FoldC = cast<ConstantFP>(Fold)->getValueAPF(); + if (FoldC.isNormal()) { + Instruction *R = CreateDiv ? + BinaryOperator::CreateFDiv(Fold, X) : + BinaryOperator::CreateFMul(X, Fold); + R->setFastMathFlags(I.getFastMathFlags()); + return R; + } + } + return 0; + } + + if (AllowReassociate) { + Value *X, *Y; + Value *NewInst = 0; + Instruction *SimpR = 0; + + if (Op0->hasOneUse() && match(Op0, m_FDiv(m_Value(X), m_Value(Y)))) { + // (X/Y) / Z => X / (Y*Z) + // + if (!isa<ConstantFP>(Y) || !isa<ConstantFP>(Op1)) { + NewInst = Builder->CreateFMul(Y, Op1); + SimpR = BinaryOperator::CreateFDiv(X, NewInst); + } + } else if (Op1->hasOneUse() && match(Op1, m_FDiv(m_Value(X), m_Value(Y)))) { + // Z / (X/Y) => Z*Y / X + // + if (!isa<ConstantFP>(Y) || !isa<ConstantFP>(Op0)) { + NewInst = Builder->CreateFMul(Op0, Y); + SimpR = BinaryOperator::CreateFDiv(NewInst, X); + } + } + + if (NewInst) { + if (Instruction *T = dyn_cast<Instruction>(NewInst)) + T->setDebugLoc(I.getDebugLoc()); + SimpR->setFastMathFlags(I.getFastMathFlags()); + return SimpR; + } + } + + return 0; +} + +/// This function implements the transforms common to both integer remainder +/// instructions (urem and srem). It is called by the visitors to those integer +/// remainder instructions. +/// @brief Common integer remainder transforms +Instruction *InstCombiner::commonIRemTransforms(BinaryOperator &I) { + Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); + + // The RHS is known non-zero. + if (Value *V = simplifyValueKnownNonZero(I.getOperand(1), *this)) { + I.setOperand(1, V); + return &I; + } + + // Handle cases involving: rem X, (select Cond, Y, Z) + if (isa<SelectInst>(Op1) && SimplifyDivRemOfSelect(I)) + return &I; + + if (isa<ConstantInt>(Op1)) { + if (Instruction *Op0I = dyn_cast<Instruction>(Op0)) { + if (SelectInst *SI = dyn_cast<SelectInst>(Op0I)) { + if (Instruction *R = FoldOpIntoSelect(I, SI)) + return R; + } else if (isa<PHINode>(Op0I)) { + if (Instruction *NV = FoldOpIntoPhi(I)) + return NV; + } + + // See if we can fold away this rem instruction. + if (SimplifyDemandedInstructionBits(I)) + return &I; + } + } + + return 0; +} + +Instruction *InstCombiner::visitURem(BinaryOperator &I) { + Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); + + if (Value *V = SimplifyURemInst(Op0, Op1, TD)) + return ReplaceInstUsesWith(I, V); + + if (Instruction *common = commonIRemTransforms(I)) + return common; + + // (zext A) urem (zext B) --> zext (A urem B) + if (ZExtInst *ZOp0 = dyn_cast<ZExtInst>(Op0)) + if (Value *ZOp1 = dyn_castZExtVal(Op1, ZOp0->getSrcTy())) + return new ZExtInst(Builder->CreateURem(ZOp0->getOperand(0), ZOp1), + I.getType()); + + // X urem Y -> X and Y-1, where Y is a power of 2, + if (isKnownToBeAPowerOfTwo(Op1, /*OrZero*/true)) { + Constant *N1 = Constant::getAllOnesValue(I.getType()); + Value *Add = Builder->CreateAdd(Op1, N1); + return BinaryOperator::CreateAnd(Op0, Add); + } + + // 1 urem X -> zext(X != 1) + if (match(Op0, m_One())) { + Value *Cmp = Builder->CreateICmpNE(Op1, Op0); + Value *Ext = Builder->CreateZExt(Cmp, I.getType()); + return ReplaceInstUsesWith(I, Ext); + } + + return 0; +} + +Instruction *InstCombiner::visitSRem(BinaryOperator &I) { + Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); + + if (Value *V = SimplifySRemInst(Op0, Op1, TD)) + return ReplaceInstUsesWith(I, V); + + // Handle the integer rem common cases + if (Instruction *Common = commonIRemTransforms(I)) + return Common; + + if (Value *RHSNeg = dyn_castNegVal(Op1)) + if (!isa<Constant>(RHSNeg) || + (isa<ConstantInt>(RHSNeg) && + cast<ConstantInt>(RHSNeg)->getValue().isStrictlyPositive())) { + // X % -Y -> X % Y + Worklist.AddValue(I.getOperand(1)); + I.setOperand(1, RHSNeg); + return &I; + } + + // If the sign bits of both operands are zero (i.e. we can prove they are + // unsigned inputs), turn this into a urem. + if (I.getType()->isIntegerTy()) { + APInt Mask(APInt::getSignBit(I.getType()->getPrimitiveSizeInBits())); + if (MaskedValueIsZero(Op1, Mask) && MaskedValueIsZero(Op0, Mask)) { + // X srem Y -> X urem Y, iff X and Y don't have sign bit set + return BinaryOperator::CreateURem(Op0, Op1, I.getName()); + } + } + + // If it's a constant vector, flip any negative values positive. + if (isa<ConstantVector>(Op1) || isa<ConstantDataVector>(Op1)) { + Constant *C = cast<Constant>(Op1); + unsigned VWidth = C->getType()->getVectorNumElements(); + + bool hasNegative = false; + bool hasMissing = false; + for (unsigned i = 0; i != VWidth; ++i) { + Constant *Elt = C->getAggregateElement(i); + if (Elt == 0) { + hasMissing = true; + break; + } + + if (ConstantInt *RHS = dyn_cast<ConstantInt>(Elt)) + if (RHS->isNegative()) + hasNegative = true; + } + + if (hasNegative && !hasMissing) { + SmallVector<Constant *, 16> Elts(VWidth); + for (unsigned i = 0; i != VWidth; ++i) { + Elts[i] = C->getAggregateElement(i); // Handle undef, etc. + if (ConstantInt *RHS = dyn_cast<ConstantInt>(Elts[i])) { + if (RHS->isNegative()) + Elts[i] = cast<ConstantInt>(ConstantExpr::getNeg(RHS)); + } + } + + Constant *NewRHSV = ConstantVector::get(Elts); + if (NewRHSV != C) { // Don't loop on -MININT + Worklist.AddValue(I.getOperand(1)); + I.setOperand(1, NewRHSV); + return &I; + } + } + } + + return 0; +} + +Instruction *InstCombiner::visitFRem(BinaryOperator &I) { + Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); + + if (Value *V = SimplifyFRemInst(Op0, Op1, TD)) + return ReplaceInstUsesWith(I, V); + + // Handle cases involving: rem X, (select Cond, Y, Z) + if (isa<SelectInst>(Op1) && SimplifyDivRemOfSelect(I)) + return &I; + + return 0; +} diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp new file mode 100644 index 000000000000..4c6d0c43cd97 --- /dev/null +++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp @@ -0,0 +1,902 @@ +//===- InstCombinePHI.cpp -------------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the visitPHINode function. +// +//===----------------------------------------------------------------------===// + +#include "InstCombine.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/Analysis/InstructionSimplify.h" +#include "llvm/IR/DataLayout.h" +using namespace llvm; + +/// FoldPHIArgBinOpIntoPHI - If we have something like phi [add (a,b), add(a,c)] +/// and if a/b/c and the add's all have a single use, turn this into a phi +/// and a single binop. +Instruction *InstCombiner::FoldPHIArgBinOpIntoPHI(PHINode &PN) { + Instruction *FirstInst = cast<Instruction>(PN.getIncomingValue(0)); + assert(isa<BinaryOperator>(FirstInst) || isa<CmpInst>(FirstInst)); + unsigned Opc = FirstInst->getOpcode(); + Value *LHSVal = FirstInst->getOperand(0); + Value *RHSVal = FirstInst->getOperand(1); + + Type *LHSType = LHSVal->getType(); + Type *RHSType = RHSVal->getType(); + + bool isNUW = false, isNSW = false, isExact = false; + if (OverflowingBinaryOperator *BO = + dyn_cast<OverflowingBinaryOperator>(FirstInst)) { + isNUW = BO->hasNoUnsignedWrap(); + isNSW = BO->hasNoSignedWrap(); + } else if (PossiblyExactOperator *PEO = + dyn_cast<PossiblyExactOperator>(FirstInst)) + isExact = PEO->isExact(); + + // Scan to see if all operands are the same opcode, and all have one use. + for (unsigned i = 1; i != PN.getNumIncomingValues(); ++i) { + Instruction *I = dyn_cast<Instruction>(PN.getIncomingValue(i)); + if (!I || I->getOpcode() != Opc || !I->hasOneUse() || + // Verify type of the LHS matches so we don't fold cmp's of different + // types. + I->getOperand(0)->getType() != LHSType || + I->getOperand(1)->getType() != RHSType) + return 0; + + // If they are CmpInst instructions, check their predicates + if (CmpInst *CI = dyn_cast<CmpInst>(I)) + if (CI->getPredicate() != cast<CmpInst>(FirstInst)->getPredicate()) + return 0; + + if (isNUW) + isNUW = cast<OverflowingBinaryOperator>(I)->hasNoUnsignedWrap(); + if (isNSW) + isNSW = cast<OverflowingBinaryOperator>(I)->hasNoSignedWrap(); + if (isExact) + isExact = cast<PossiblyExactOperator>(I)->isExact(); + + // Keep track of which operand needs a phi node. + if (I->getOperand(0) != LHSVal) LHSVal = 0; + if (I->getOperand(1) != RHSVal) RHSVal = 0; + } + + // If both LHS and RHS would need a PHI, don't do this transformation, + // because it would increase the number of PHIs entering the block, + // which leads to higher register pressure. This is especially + // bad when the PHIs are in the header of a loop. + if (!LHSVal && !RHSVal) + return 0; + + // Otherwise, this is safe to transform! + + Value *InLHS = FirstInst->getOperand(0); + Value *InRHS = FirstInst->getOperand(1); + PHINode *NewLHS = 0, *NewRHS = 0; + if (LHSVal == 0) { + NewLHS = PHINode::Create(LHSType, PN.getNumIncomingValues(), + FirstInst->getOperand(0)->getName() + ".pn"); + NewLHS->addIncoming(InLHS, PN.getIncomingBlock(0)); + InsertNewInstBefore(NewLHS, PN); + LHSVal = NewLHS; + } + + if (RHSVal == 0) { + NewRHS = PHINode::Create(RHSType, PN.getNumIncomingValues(), + FirstInst->getOperand(1)->getName() + ".pn"); + NewRHS->addIncoming(InRHS, PN.getIncomingBlock(0)); + InsertNewInstBefore(NewRHS, PN); + RHSVal = NewRHS; + } + + // Add all operands to the new PHIs. + if (NewLHS || NewRHS) { + for (unsigned i = 1, e = PN.getNumIncomingValues(); i != e; ++i) { + Instruction *InInst = cast<Instruction>(PN.getIncomingValue(i)); + if (NewLHS) { + Value *NewInLHS = InInst->getOperand(0); + NewLHS->addIncoming(NewInLHS, PN.getIncomingBlock(i)); + } + if (NewRHS) { + Value *NewInRHS = InInst->getOperand(1); + NewRHS->addIncoming(NewInRHS, PN.getIncomingBlock(i)); + } + } + } + + if (CmpInst *CIOp = dyn_cast<CmpInst>(FirstInst)) { + CmpInst *NewCI = CmpInst::Create(CIOp->getOpcode(), CIOp->getPredicate(), + LHSVal, RHSVal); + NewCI->setDebugLoc(FirstInst->getDebugLoc()); + return NewCI; + } + + BinaryOperator *BinOp = cast<BinaryOperator>(FirstInst); + BinaryOperator *NewBinOp = + BinaryOperator::Create(BinOp->getOpcode(), LHSVal, RHSVal); + if (isNUW) NewBinOp->setHasNoUnsignedWrap(); + if (isNSW) NewBinOp->setHasNoSignedWrap(); + if (isExact) NewBinOp->setIsExact(); + NewBinOp->setDebugLoc(FirstInst->getDebugLoc()); + return NewBinOp; +} + +Instruction *InstCombiner::FoldPHIArgGEPIntoPHI(PHINode &PN) { + GetElementPtrInst *FirstInst =cast<GetElementPtrInst>(PN.getIncomingValue(0)); + + SmallVector<Value*, 16> FixedOperands(FirstInst->op_begin(), + FirstInst->op_end()); + // This is true if all GEP bases are allocas and if all indices into them are + // constants. + bool AllBasePointersAreAllocas = true; + + // We don't want to replace this phi if the replacement would require + // more than one phi, which leads to higher register pressure. This is + // especially bad when the PHIs are in the header of a loop. + bool NeededPhi = false; + + bool AllInBounds = true; + + // Scan to see if all operands are the same opcode, and all have one use. + for (unsigned i = 1; i != PN.getNumIncomingValues(); ++i) { + GetElementPtrInst *GEP= dyn_cast<GetElementPtrInst>(PN.getIncomingValue(i)); + if (!GEP || !GEP->hasOneUse() || GEP->getType() != FirstInst->getType() || + GEP->getNumOperands() != FirstInst->getNumOperands()) + return 0; + + AllInBounds &= GEP->isInBounds(); + + // Keep track of whether or not all GEPs are of alloca pointers. + if (AllBasePointersAreAllocas && + (!isa<AllocaInst>(GEP->getOperand(0)) || + !GEP->hasAllConstantIndices())) + AllBasePointersAreAllocas = false; + + // Compare the operand lists. + for (unsigned op = 0, e = FirstInst->getNumOperands(); op != e; ++op) { + if (FirstInst->getOperand(op) == GEP->getOperand(op)) + continue; + + // Don't merge two GEPs when two operands differ (introducing phi nodes) + // if one of the PHIs has a constant for the index. The index may be + // substantially cheaper to compute for the constants, so making it a + // variable index could pessimize the path. This also handles the case + // for struct indices, which must always be constant. + if (isa<ConstantInt>(FirstInst->getOperand(op)) || + isa<ConstantInt>(GEP->getOperand(op))) + return 0; + + if (FirstInst->getOperand(op)->getType() !=GEP->getOperand(op)->getType()) + return 0; + + // If we already needed a PHI for an earlier operand, and another operand + // also requires a PHI, we'd be introducing more PHIs than we're + // eliminating, which increases register pressure on entry to the PHI's + // block. + if (NeededPhi) + return 0; + + FixedOperands[op] = 0; // Needs a PHI. + NeededPhi = true; + } + } + + // If all of the base pointers of the PHI'd GEPs are from allocas, don't + // bother doing this transformation. At best, this will just save a bit of + // offset calculation, but all the predecessors will have to materialize the + // stack address into a register anyway. We'd actually rather *clone* the + // load up into the predecessors so that we have a load of a gep of an alloca, + // which can usually all be folded into the load. + if (AllBasePointersAreAllocas) + return 0; + + // Otherwise, this is safe to transform. Insert PHI nodes for each operand + // that is variable. + SmallVector<PHINode*, 16> OperandPhis(FixedOperands.size()); + + bool HasAnyPHIs = false; + for (unsigned i = 0, e = FixedOperands.size(); i != e; ++i) { + if (FixedOperands[i]) continue; // operand doesn't need a phi. + Value *FirstOp = FirstInst->getOperand(i); + PHINode *NewPN = PHINode::Create(FirstOp->getType(), e, + FirstOp->getName()+".pn"); + InsertNewInstBefore(NewPN, PN); + + NewPN->addIncoming(FirstOp, PN.getIncomingBlock(0)); + OperandPhis[i] = NewPN; + FixedOperands[i] = NewPN; + HasAnyPHIs = true; + } + + + // Add all operands to the new PHIs. + if (HasAnyPHIs) { + for (unsigned i = 1, e = PN.getNumIncomingValues(); i != e; ++i) { + GetElementPtrInst *InGEP =cast<GetElementPtrInst>(PN.getIncomingValue(i)); + BasicBlock *InBB = PN.getIncomingBlock(i); + + for (unsigned op = 0, e = OperandPhis.size(); op != e; ++op) + if (PHINode *OpPhi = OperandPhis[op]) + OpPhi->addIncoming(InGEP->getOperand(op), InBB); + } + } + + Value *Base = FixedOperands[0]; + GetElementPtrInst *NewGEP = + GetElementPtrInst::Create(Base, makeArrayRef(FixedOperands).slice(1)); + if (AllInBounds) NewGEP->setIsInBounds(); + NewGEP->setDebugLoc(FirstInst->getDebugLoc()); + return NewGEP; +} + + +/// isSafeAndProfitableToSinkLoad - Return true if we know that it is safe to +/// sink the load out of the block that defines it. This means that it must be +/// obvious the value of the load is not changed from the point of the load to +/// the end of the block it is in. +/// +/// Finally, it is safe, but not profitable, to sink a load targeting a +/// non-address-taken alloca. Doing so will cause us to not promote the alloca +/// to a register. +static bool isSafeAndProfitableToSinkLoad(LoadInst *L) { + BasicBlock::iterator BBI = L, E = L->getParent()->end(); + + for (++BBI; BBI != E; ++BBI) + if (BBI->mayWriteToMemory()) + return false; + + // Check for non-address taken alloca. If not address-taken already, it isn't + // profitable to do this xform. + if (AllocaInst *AI = dyn_cast<AllocaInst>(L->getOperand(0))) { + bool isAddressTaken = false; + for (Value::use_iterator UI = AI->use_begin(), E = AI->use_end(); + UI != E; ++UI) { + User *U = *UI; + if (isa<LoadInst>(U)) continue; + if (StoreInst *SI = dyn_cast<StoreInst>(U)) { + // If storing TO the alloca, then the address isn't taken. + if (SI->getOperand(1) == AI) continue; + } + isAddressTaken = true; + break; + } + + if (!isAddressTaken && AI->isStaticAlloca()) + return false; + } + + // If this load is a load from a GEP with a constant offset from an alloca, + // then we don't want to sink it. In its present form, it will be + // load [constant stack offset]. Sinking it will cause us to have to + // materialize the stack addresses in each predecessor in a register only to + // do a shared load from register in the successor. + if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(L->getOperand(0))) + if (AllocaInst *AI = dyn_cast<AllocaInst>(GEP->getOperand(0))) + if (AI->isStaticAlloca() && GEP->hasAllConstantIndices()) + return false; + + return true; +} + +Instruction *InstCombiner::FoldPHIArgLoadIntoPHI(PHINode &PN) { + LoadInst *FirstLI = cast<LoadInst>(PN.getIncomingValue(0)); + + // FIXME: This is overconservative; this transform is allowed in some cases + // for atomic operations. + if (FirstLI->isAtomic()) + return 0; + + // When processing loads, we need to propagate two bits of information to the + // sunk load: whether it is volatile, and what its alignment is. We currently + // don't sink loads when some have their alignment specified and some don't. + // visitLoadInst will propagate an alignment onto the load when TD is around, + // and if TD isn't around, we can't handle the mixed case. + bool isVolatile = FirstLI->isVolatile(); + unsigned LoadAlignment = FirstLI->getAlignment(); + unsigned LoadAddrSpace = FirstLI->getPointerAddressSpace(); + + // We can't sink the load if the loaded value could be modified between the + // load and the PHI. + if (FirstLI->getParent() != PN.getIncomingBlock(0) || + !isSafeAndProfitableToSinkLoad(FirstLI)) + return 0; + + // If the PHI is of volatile loads and the load block has multiple + // successors, sinking it would remove a load of the volatile value from + // the path through the other successor. + if (isVolatile && + FirstLI->getParent()->getTerminator()->getNumSuccessors() != 1) + return 0; + + // Check to see if all arguments are the same operation. + for (unsigned i = 1, e = PN.getNumIncomingValues(); i != e; ++i) { + LoadInst *LI = dyn_cast<LoadInst>(PN.getIncomingValue(i)); + if (!LI || !LI->hasOneUse()) + return 0; + + // We can't sink the load if the loaded value could be modified between + // the load and the PHI. + if (LI->isVolatile() != isVolatile || + LI->getParent() != PN.getIncomingBlock(i) || + LI->getPointerAddressSpace() != LoadAddrSpace || + !isSafeAndProfitableToSinkLoad(LI)) + return 0; + + // If some of the loads have an alignment specified but not all of them, + // we can't do the transformation. + if ((LoadAlignment != 0) != (LI->getAlignment() != 0)) + return 0; + + LoadAlignment = std::min(LoadAlignment, LI->getAlignment()); + + // If the PHI is of volatile loads and the load block has multiple + // successors, sinking it would remove a load of the volatile value from + // the path through the other successor. + if (isVolatile && + LI->getParent()->getTerminator()->getNumSuccessors() != 1) + return 0; + } + + // Okay, they are all the same operation. Create a new PHI node of the + // correct type, and PHI together all of the LHS's of the instructions. + PHINode *NewPN = PHINode::Create(FirstLI->getOperand(0)->getType(), + PN.getNumIncomingValues(), + PN.getName()+".in"); + + Value *InVal = FirstLI->getOperand(0); + NewPN->addIncoming(InVal, PN.getIncomingBlock(0)); + + // Add all operands to the new PHI. + for (unsigned i = 1, e = PN.getNumIncomingValues(); i != e; ++i) { + Value *NewInVal = cast<LoadInst>(PN.getIncomingValue(i))->getOperand(0); + if (NewInVal != InVal) + InVal = 0; + NewPN->addIncoming(NewInVal, PN.getIncomingBlock(i)); + } + + Value *PhiVal; + if (InVal) { + // The new PHI unions all of the same values together. This is really + // common, so we handle it intelligently here for compile-time speed. + PhiVal = InVal; + delete NewPN; + } else { + InsertNewInstBefore(NewPN, PN); + PhiVal = NewPN; + } + + // If this was a volatile load that we are merging, make sure to loop through + // and mark all the input loads as non-volatile. If we don't do this, we will + // insert a new volatile load and the old ones will not be deletable. + if (isVolatile) + for (unsigned i = 0, e = PN.getNumIncomingValues(); i != e; ++i) + cast<LoadInst>(PN.getIncomingValue(i))->setVolatile(false); + + LoadInst *NewLI = new LoadInst(PhiVal, "", isVolatile, LoadAlignment); + NewLI->setDebugLoc(FirstLI->getDebugLoc()); + return NewLI; +} + + + +/// FoldPHIArgOpIntoPHI - If all operands to a PHI node are the same "unary" +/// operator and they all are only used by the PHI, PHI together their +/// inputs, and do the operation once, to the result of the PHI. +Instruction *InstCombiner::FoldPHIArgOpIntoPHI(PHINode &PN) { + Instruction *FirstInst = cast<Instruction>(PN.getIncomingValue(0)); + + if (isa<GetElementPtrInst>(FirstInst)) + return FoldPHIArgGEPIntoPHI(PN); + if (isa<LoadInst>(FirstInst)) + return FoldPHIArgLoadIntoPHI(PN); + + // Scan the instruction, looking for input operations that can be folded away. + // If all input operands to the phi are the same instruction (e.g. a cast from + // the same type or "+42") we can pull the operation through the PHI, reducing + // code size and simplifying code. + Constant *ConstantOp = 0; + Type *CastSrcTy = 0; + bool isNUW = false, isNSW = false, isExact = false; + + if (isa<CastInst>(FirstInst)) { + CastSrcTy = FirstInst->getOperand(0)->getType(); + + // Be careful about transforming integer PHIs. We don't want to pessimize + // the code by turning an i32 into an i1293. + if (PN.getType()->isIntegerTy() && CastSrcTy->isIntegerTy()) { + if (!ShouldChangeType(PN.getType(), CastSrcTy)) + return 0; + } + } else if (isa<BinaryOperator>(FirstInst) || isa<CmpInst>(FirstInst)) { + // Can fold binop, compare or shift here if the RHS is a constant, + // otherwise call FoldPHIArgBinOpIntoPHI. + ConstantOp = dyn_cast<Constant>(FirstInst->getOperand(1)); + if (ConstantOp == 0) + return FoldPHIArgBinOpIntoPHI(PN); + + if (OverflowingBinaryOperator *BO = + dyn_cast<OverflowingBinaryOperator>(FirstInst)) { + isNUW = BO->hasNoUnsignedWrap(); + isNSW = BO->hasNoSignedWrap(); + } else if (PossiblyExactOperator *PEO = + dyn_cast<PossiblyExactOperator>(FirstInst)) + isExact = PEO->isExact(); + } else { + return 0; // Cannot fold this operation. + } + + // Check to see if all arguments are the same operation. + for (unsigned i = 1, e = PN.getNumIncomingValues(); i != e; ++i) { + Instruction *I = dyn_cast<Instruction>(PN.getIncomingValue(i)); + if (I == 0 || !I->hasOneUse() || !I->isSameOperationAs(FirstInst)) + return 0; + if (CastSrcTy) { + if (I->getOperand(0)->getType() != CastSrcTy) + return 0; // Cast operation must match. + } else if (I->getOperand(1) != ConstantOp) { + return 0; + } + + if (isNUW) + isNUW = cast<OverflowingBinaryOperator>(I)->hasNoUnsignedWrap(); + if (isNSW) + isNSW = cast<OverflowingBinaryOperator>(I)->hasNoSignedWrap(); + if (isExact) + isExact = cast<PossiblyExactOperator>(I)->isExact(); + } + + // Okay, they are all the same operation. Create a new PHI node of the + // correct type, and PHI together all of the LHS's of the instructions. + PHINode *NewPN = PHINode::Create(FirstInst->getOperand(0)->getType(), + PN.getNumIncomingValues(), + PN.getName()+".in"); + + Value *InVal = FirstInst->getOperand(0); + NewPN->addIncoming(InVal, PN.getIncomingBlock(0)); + + // Add all operands to the new PHI. + for (unsigned i = 1, e = PN.getNumIncomingValues(); i != e; ++i) { + Value *NewInVal = cast<Instruction>(PN.getIncomingValue(i))->getOperand(0); + if (NewInVal != InVal) + InVal = 0; + NewPN->addIncoming(NewInVal, PN.getIncomingBlock(i)); + } + + Value *PhiVal; + if (InVal) { + // The new PHI unions all of the same values together. This is really + // common, so we handle it intelligently here for compile-time speed. + PhiVal = InVal; + delete NewPN; + } else { + InsertNewInstBefore(NewPN, PN); + PhiVal = NewPN; + } + + // Insert and return the new operation. + if (CastInst *FirstCI = dyn_cast<CastInst>(FirstInst)) { + CastInst *NewCI = CastInst::Create(FirstCI->getOpcode(), PhiVal, + PN.getType()); + NewCI->setDebugLoc(FirstInst->getDebugLoc()); + return NewCI; + } + + if (BinaryOperator *BinOp = dyn_cast<BinaryOperator>(FirstInst)) { + BinOp = BinaryOperator::Create(BinOp->getOpcode(), PhiVal, ConstantOp); + if (isNUW) BinOp->setHasNoUnsignedWrap(); + if (isNSW) BinOp->setHasNoSignedWrap(); + if (isExact) BinOp->setIsExact(); + BinOp->setDebugLoc(FirstInst->getDebugLoc()); + return BinOp; + } + + CmpInst *CIOp = cast<CmpInst>(FirstInst); + CmpInst *NewCI = CmpInst::Create(CIOp->getOpcode(), CIOp->getPredicate(), + PhiVal, ConstantOp); + NewCI->setDebugLoc(FirstInst->getDebugLoc()); + return NewCI; +} + +/// DeadPHICycle - Return true if this PHI node is only used by a PHI node cycle +/// that is dead. +static bool DeadPHICycle(PHINode *PN, + SmallPtrSet<PHINode*, 16> &PotentiallyDeadPHIs) { + if (PN->use_empty()) return true; + if (!PN->hasOneUse()) return false; + + // Remember this node, and if we find the cycle, return. + if (!PotentiallyDeadPHIs.insert(PN)) + return true; + + // Don't scan crazily complex things. + if (PotentiallyDeadPHIs.size() == 16) + return false; + + if (PHINode *PU = dyn_cast<PHINode>(PN->use_back())) + return DeadPHICycle(PU, PotentiallyDeadPHIs); + + return false; +} + +/// PHIsEqualValue - Return true if this phi node is always equal to +/// NonPhiInVal. This happens with mutually cyclic phi nodes like: +/// z = some value; x = phi (y, z); y = phi (x, z) +static bool PHIsEqualValue(PHINode *PN, Value *NonPhiInVal, + SmallPtrSet<PHINode*, 16> &ValueEqualPHIs) { + // See if we already saw this PHI node. + if (!ValueEqualPHIs.insert(PN)) + return true; + + // Don't scan crazily complex things. + if (ValueEqualPHIs.size() == 16) + return false; + + // Scan the operands to see if they are either phi nodes or are equal to + // the value. + for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { + Value *Op = PN->getIncomingValue(i); + if (PHINode *OpPN = dyn_cast<PHINode>(Op)) { + if (!PHIsEqualValue(OpPN, NonPhiInVal, ValueEqualPHIs)) + return false; + } else if (Op != NonPhiInVal) + return false; + } + + return true; +} + + +namespace { +struct PHIUsageRecord { + unsigned PHIId; // The ID # of the PHI (something determinstic to sort on) + unsigned Shift; // The amount shifted. + Instruction *Inst; // The trunc instruction. + + PHIUsageRecord(unsigned pn, unsigned Sh, Instruction *User) + : PHIId(pn), Shift(Sh), Inst(User) {} + + bool operator<(const PHIUsageRecord &RHS) const { + if (PHIId < RHS.PHIId) return true; + if (PHIId > RHS.PHIId) return false; + if (Shift < RHS.Shift) return true; + if (Shift > RHS.Shift) return false; + return Inst->getType()->getPrimitiveSizeInBits() < + RHS.Inst->getType()->getPrimitiveSizeInBits(); + } +}; + +struct LoweredPHIRecord { + PHINode *PN; // The PHI that was lowered. + unsigned Shift; // The amount shifted. + unsigned Width; // The width extracted. + + LoweredPHIRecord(PHINode *pn, unsigned Sh, Type *Ty) + : PN(pn), Shift(Sh), Width(Ty->getPrimitiveSizeInBits()) {} + + // Ctor form used by DenseMap. + LoweredPHIRecord(PHINode *pn, unsigned Sh) + : PN(pn), Shift(Sh), Width(0) {} +}; +} + +namespace llvm { + template<> + struct DenseMapInfo<LoweredPHIRecord> { + static inline LoweredPHIRecord getEmptyKey() { + return LoweredPHIRecord(0, 0); + } + static inline LoweredPHIRecord getTombstoneKey() { + return LoweredPHIRecord(0, 1); + } + static unsigned getHashValue(const LoweredPHIRecord &Val) { + return DenseMapInfo<PHINode*>::getHashValue(Val.PN) ^ (Val.Shift>>3) ^ + (Val.Width>>3); + } + static bool isEqual(const LoweredPHIRecord &LHS, + const LoweredPHIRecord &RHS) { + return LHS.PN == RHS.PN && LHS.Shift == RHS.Shift && + LHS.Width == RHS.Width; + } + }; +} + + +/// SliceUpIllegalIntegerPHI - This is an integer PHI and we know that it has an +/// illegal type: see if it is only used by trunc or trunc(lshr) operations. If +/// so, we split the PHI into the various pieces being extracted. This sort of +/// thing is introduced when SROA promotes an aggregate to large integer values. +/// +/// TODO: The user of the trunc may be an bitcast to float/double/vector or an +/// inttoptr. We should produce new PHIs in the right type. +/// +Instruction *InstCombiner::SliceUpIllegalIntegerPHI(PHINode &FirstPhi) { + // PHIUsers - Keep track of all of the truncated values extracted from a set + // of PHIs, along with their offset. These are the things we want to rewrite. + SmallVector<PHIUsageRecord, 16> PHIUsers; + + // PHIs are often mutually cyclic, so we keep track of a whole set of PHI + // nodes which are extracted from. PHIsToSlice is a set we use to avoid + // revisiting PHIs, PHIsInspected is a ordered list of PHIs that we need to + // check the uses of (to ensure they are all extracts). + SmallVector<PHINode*, 8> PHIsToSlice; + SmallPtrSet<PHINode*, 8> PHIsInspected; + + PHIsToSlice.push_back(&FirstPhi); + PHIsInspected.insert(&FirstPhi); + + for (unsigned PHIId = 0; PHIId != PHIsToSlice.size(); ++PHIId) { + PHINode *PN = PHIsToSlice[PHIId]; + + // Scan the input list of the PHI. If any input is an invoke, and if the + // input is defined in the predecessor, then we won't be split the critical + // edge which is required to insert a truncate. Because of this, we have to + // bail out. + for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { + InvokeInst *II = dyn_cast<InvokeInst>(PN->getIncomingValue(i)); + if (II == 0) continue; + if (II->getParent() != PN->getIncomingBlock(i)) + continue; + + // If we have a phi, and if it's directly in the predecessor, then we have + // a critical edge where we need to put the truncate. Since we can't + // split the edge in instcombine, we have to bail out. + return 0; + } + + + for (Value::use_iterator UI = PN->use_begin(), E = PN->use_end(); + UI != E; ++UI) { + Instruction *User = cast<Instruction>(*UI); + + // If the user is a PHI, inspect its uses recursively. + if (PHINode *UserPN = dyn_cast<PHINode>(User)) { + if (PHIsInspected.insert(UserPN)) + PHIsToSlice.push_back(UserPN); + continue; + } + + // Truncates are always ok. + if (isa<TruncInst>(User)) { + PHIUsers.push_back(PHIUsageRecord(PHIId, 0, User)); + continue; + } + + // Otherwise it must be a lshr which can only be used by one trunc. + if (User->getOpcode() != Instruction::LShr || + !User->hasOneUse() || !isa<TruncInst>(User->use_back()) || + !isa<ConstantInt>(User->getOperand(1))) + return 0; + + unsigned Shift = cast<ConstantInt>(User->getOperand(1))->getZExtValue(); + PHIUsers.push_back(PHIUsageRecord(PHIId, Shift, User->use_back())); + } + } + + // If we have no users, they must be all self uses, just nuke the PHI. + if (PHIUsers.empty()) + return ReplaceInstUsesWith(FirstPhi, UndefValue::get(FirstPhi.getType())); + + // If this phi node is transformable, create new PHIs for all the pieces + // extracted out of it. First, sort the users by their offset and size. + array_pod_sort(PHIUsers.begin(), PHIUsers.end()); + + DEBUG(dbgs() << "SLICING UP PHI: " << FirstPhi << '\n'; + for (unsigned i = 1, e = PHIsToSlice.size(); i != e; ++i) + dbgs() << "AND USER PHI #" << i << ": " << *PHIsToSlice[i] << '\n'; + ); + + // PredValues - This is a temporary used when rewriting PHI nodes. It is + // hoisted out here to avoid construction/destruction thrashing. + DenseMap<BasicBlock*, Value*> PredValues; + + // ExtractedVals - Each new PHI we introduce is saved here so we don't + // introduce redundant PHIs. + DenseMap<LoweredPHIRecord, PHINode*> ExtractedVals; + + for (unsigned UserI = 0, UserE = PHIUsers.size(); UserI != UserE; ++UserI) { + unsigned PHIId = PHIUsers[UserI].PHIId; + PHINode *PN = PHIsToSlice[PHIId]; + unsigned Offset = PHIUsers[UserI].Shift; + Type *Ty = PHIUsers[UserI].Inst->getType(); + + PHINode *EltPHI; + + // If we've already lowered a user like this, reuse the previously lowered + // value. + if ((EltPHI = ExtractedVals[LoweredPHIRecord(PN, Offset, Ty)]) == 0) { + + // Otherwise, Create the new PHI node for this user. + EltPHI = PHINode::Create(Ty, PN->getNumIncomingValues(), + PN->getName()+".off"+Twine(Offset), PN); + assert(EltPHI->getType() != PN->getType() && + "Truncate didn't shrink phi?"); + + for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { + BasicBlock *Pred = PN->getIncomingBlock(i); + Value *&PredVal = PredValues[Pred]; + + // If we already have a value for this predecessor, reuse it. + if (PredVal) { + EltPHI->addIncoming(PredVal, Pred); + continue; + } + + // Handle the PHI self-reuse case. + Value *InVal = PN->getIncomingValue(i); + if (InVal == PN) { + PredVal = EltPHI; + EltPHI->addIncoming(PredVal, Pred); + continue; + } + + if (PHINode *InPHI = dyn_cast<PHINode>(PN)) { + // If the incoming value was a PHI, and if it was one of the PHIs we + // already rewrote it, just use the lowered value. + if (Value *Res = ExtractedVals[LoweredPHIRecord(InPHI, Offset, Ty)]) { + PredVal = Res; + EltPHI->addIncoming(PredVal, Pred); + continue; + } + } + + // Otherwise, do an extract in the predecessor. + Builder->SetInsertPoint(Pred, Pred->getTerminator()); + Value *Res = InVal; + if (Offset) + Res = Builder->CreateLShr(Res, ConstantInt::get(InVal->getType(), + Offset), "extract"); + Res = Builder->CreateTrunc(Res, Ty, "extract.t"); + PredVal = Res; + EltPHI->addIncoming(Res, Pred); + + // If the incoming value was a PHI, and if it was one of the PHIs we are + // rewriting, we will ultimately delete the code we inserted. This + // means we need to revisit that PHI to make sure we extract out the + // needed piece. + if (PHINode *OldInVal = dyn_cast<PHINode>(PN->getIncomingValue(i))) + if (PHIsInspected.count(OldInVal)) { + unsigned RefPHIId = std::find(PHIsToSlice.begin(),PHIsToSlice.end(), + OldInVal)-PHIsToSlice.begin(); + PHIUsers.push_back(PHIUsageRecord(RefPHIId, Offset, + cast<Instruction>(Res))); + ++UserE; + } + } + PredValues.clear(); + + DEBUG(dbgs() << " Made element PHI for offset " << Offset << ": " + << *EltPHI << '\n'); + ExtractedVals[LoweredPHIRecord(PN, Offset, Ty)] = EltPHI; + } + + // Replace the use of this piece with the PHI node. + ReplaceInstUsesWith(*PHIUsers[UserI].Inst, EltPHI); + } + + // Replace all the remaining uses of the PHI nodes (self uses and the lshrs) + // with undefs. + Value *Undef = UndefValue::get(FirstPhi.getType()); + for (unsigned i = 1, e = PHIsToSlice.size(); i != e; ++i) + ReplaceInstUsesWith(*PHIsToSlice[i], Undef); + return ReplaceInstUsesWith(FirstPhi, Undef); +} + +// PHINode simplification +// +Instruction *InstCombiner::visitPHINode(PHINode &PN) { + if (Value *V = SimplifyInstruction(&PN, TD, TLI)) + return ReplaceInstUsesWith(PN, V); + + // If all PHI operands are the same operation, pull them through the PHI, + // reducing code size. + if (isa<Instruction>(PN.getIncomingValue(0)) && + isa<Instruction>(PN.getIncomingValue(1)) && + cast<Instruction>(PN.getIncomingValue(0))->getOpcode() == + cast<Instruction>(PN.getIncomingValue(1))->getOpcode() && + // FIXME: The hasOneUse check will fail for PHIs that use the value more + // than themselves more than once. + PN.getIncomingValue(0)->hasOneUse()) + if (Instruction *Result = FoldPHIArgOpIntoPHI(PN)) + return Result; + + // If this is a trivial cycle in the PHI node graph, remove it. Basically, if + // this PHI only has a single use (a PHI), and if that PHI only has one use (a + // PHI)... break the cycle. + if (PN.hasOneUse()) { + Instruction *PHIUser = cast<Instruction>(PN.use_back()); + if (PHINode *PU = dyn_cast<PHINode>(PHIUser)) { + SmallPtrSet<PHINode*, 16> PotentiallyDeadPHIs; + PotentiallyDeadPHIs.insert(&PN); + if (DeadPHICycle(PU, PotentiallyDeadPHIs)) + return ReplaceInstUsesWith(PN, UndefValue::get(PN.getType())); + } + + // If this phi has a single use, and if that use just computes a value for + // the next iteration of a loop, delete the phi. This occurs with unused + // induction variables, e.g. "for (int j = 0; ; ++j);". Detecting this + // common case here is good because the only other things that catch this + // are induction variable analysis (sometimes) and ADCE, which is only run + // late. + if (PHIUser->hasOneUse() && + (isa<BinaryOperator>(PHIUser) || isa<GetElementPtrInst>(PHIUser)) && + PHIUser->use_back() == &PN) { + return ReplaceInstUsesWith(PN, UndefValue::get(PN.getType())); + } + } + + // We sometimes end up with phi cycles that non-obviously end up being the + // same value, for example: + // z = some value; x = phi (y, z); y = phi (x, z) + // where the phi nodes don't necessarily need to be in the same block. Do a + // quick check to see if the PHI node only contains a single non-phi value, if + // so, scan to see if the phi cycle is actually equal to that value. + { + unsigned InValNo = 0, NumIncomingVals = PN.getNumIncomingValues(); + // Scan for the first non-phi operand. + while (InValNo != NumIncomingVals && + isa<PHINode>(PN.getIncomingValue(InValNo))) + ++InValNo; + + if (InValNo != NumIncomingVals) { + Value *NonPhiInVal = PN.getIncomingValue(InValNo); + + // Scan the rest of the operands to see if there are any conflicts, if so + // there is no need to recursively scan other phis. + for (++InValNo; InValNo != NumIncomingVals; ++InValNo) { + Value *OpVal = PN.getIncomingValue(InValNo); + if (OpVal != NonPhiInVal && !isa<PHINode>(OpVal)) + break; + } + + // If we scanned over all operands, then we have one unique value plus + // phi values. Scan PHI nodes to see if they all merge in each other or + // the value. + if (InValNo == NumIncomingVals) { + SmallPtrSet<PHINode*, 16> ValueEqualPHIs; + if (PHIsEqualValue(&PN, NonPhiInVal, ValueEqualPHIs)) + return ReplaceInstUsesWith(PN, NonPhiInVal); + } + } + } + + // If there are multiple PHIs, sort their operands so that they all list + // the blocks in the same order. This will help identical PHIs be eliminated + // by other passes. Other passes shouldn't depend on this for correctness + // however. + PHINode *FirstPN = cast<PHINode>(PN.getParent()->begin()); + if (&PN != FirstPN) + for (unsigned i = 0, e = FirstPN->getNumIncomingValues(); i != e; ++i) { + BasicBlock *BBA = PN.getIncomingBlock(i); + BasicBlock *BBB = FirstPN->getIncomingBlock(i); + if (BBA != BBB) { + Value *VA = PN.getIncomingValue(i); + unsigned j = PN.getBasicBlockIndex(BBB); + Value *VB = PN.getIncomingValue(j); + PN.setIncomingBlock(i, BBB); + PN.setIncomingValue(i, VB); + PN.setIncomingBlock(j, BBA); + PN.setIncomingValue(j, VA); + // NOTE: Instcombine normally would want us to "return &PN" if we + // modified any of the operands of an instruction. However, since we + // aren't adding or removing uses (just rearranging them) we don't do + // this in this case. + } + } + + // If this is an integer PHI and we know that it has an illegal type, see if + // it is only used by trunc or trunc(lshr) operations. If so, we split the + // PHI into the various pieces being extracted. This sort of thing is + // introduced when SROA promotes an aggregate to a single large integer type. + if (PN.getType()->isIntegerTy() && TD && + !TD->isLegalInteger(PN.getType()->getPrimitiveSizeInBits())) + if (Instruction *Res = SliceUpIllegalIntegerPHI(PN)) + return Res; + + return 0; +} diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp new file mode 100644 index 000000000000..283bec2881fa --- /dev/null +++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp @@ -0,0 +1,998 @@ +//===- InstCombineSelect.cpp ----------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the visitSelect function. +// +//===----------------------------------------------------------------------===// + +#include "InstCombine.h" +#include "llvm/Analysis/ConstantFolding.h" +#include "llvm/Analysis/InstructionSimplify.h" +#include "llvm/Support/PatternMatch.h" +using namespace llvm; +using namespace PatternMatch; + +/// MatchSelectPattern - Pattern match integer [SU]MIN, [SU]MAX, and ABS idioms, +/// returning the kind and providing the out parameter results if we +/// successfully match. +static SelectPatternFlavor +MatchSelectPattern(Value *V, Value *&LHS, Value *&RHS) { + SelectInst *SI = dyn_cast<SelectInst>(V); + if (SI == 0) return SPF_UNKNOWN; + + ICmpInst *ICI = dyn_cast<ICmpInst>(SI->getCondition()); + if (ICI == 0) return SPF_UNKNOWN; + + LHS = ICI->getOperand(0); + RHS = ICI->getOperand(1); + + // (icmp X, Y) ? X : Y + if (SI->getTrueValue() == ICI->getOperand(0) && + SI->getFalseValue() == ICI->getOperand(1)) { + switch (ICI->getPredicate()) { + default: return SPF_UNKNOWN; // Equality. + case ICmpInst::ICMP_UGT: + case ICmpInst::ICMP_UGE: return SPF_UMAX; + case ICmpInst::ICMP_SGT: + case ICmpInst::ICMP_SGE: return SPF_SMAX; + case ICmpInst::ICMP_ULT: + case ICmpInst::ICMP_ULE: return SPF_UMIN; + case ICmpInst::ICMP_SLT: + case ICmpInst::ICMP_SLE: return SPF_SMIN; + } + } + + // (icmp X, Y) ? Y : X + if (SI->getTrueValue() == ICI->getOperand(1) && + SI->getFalseValue() == ICI->getOperand(0)) { + switch (ICI->getPredicate()) { + default: return SPF_UNKNOWN; // Equality. + case ICmpInst::ICMP_UGT: + case ICmpInst::ICMP_UGE: return SPF_UMIN; + case ICmpInst::ICMP_SGT: + case ICmpInst::ICMP_SGE: return SPF_SMIN; + case ICmpInst::ICMP_ULT: + case ICmpInst::ICMP_ULE: return SPF_UMAX; + case ICmpInst::ICMP_SLT: + case ICmpInst::ICMP_SLE: return SPF_SMAX; + } + } + + // TODO: (X > 4) ? X : 5 --> (X >= 5) ? X : 5 --> MAX(X, 5) + + return SPF_UNKNOWN; +} + + +/// GetSelectFoldableOperands - We want to turn code that looks like this: +/// %C = or %A, %B +/// %D = select %cond, %C, %A +/// into: +/// %C = select %cond, %B, 0 +/// %D = or %A, %C +/// +/// Assuming that the specified instruction is an operand to the select, return +/// a bitmask indicating which operands of this instruction are foldable if they +/// equal the other incoming value of the select. +/// +static unsigned GetSelectFoldableOperands(Instruction *I) { + switch (I->getOpcode()) { + case Instruction::Add: + case Instruction::Mul: + case Instruction::And: + case Instruction::Or: + case Instruction::Xor: + return 3; // Can fold through either operand. + case Instruction::Sub: // Can only fold on the amount subtracted. + case Instruction::Shl: // Can only fold on the shift amount. + case Instruction::LShr: + case Instruction::AShr: + return 1; + default: + return 0; // Cannot fold + } +} + +/// GetSelectFoldableConstant - For the same transformation as the previous +/// function, return the identity constant that goes into the select. +static Constant *GetSelectFoldableConstant(Instruction *I) { + switch (I->getOpcode()) { + default: llvm_unreachable("This cannot happen!"); + case Instruction::Add: + case Instruction::Sub: + case Instruction::Or: + case Instruction::Xor: + case Instruction::Shl: + case Instruction::LShr: + case Instruction::AShr: + return Constant::getNullValue(I->getType()); + case Instruction::And: + return Constant::getAllOnesValue(I->getType()); + case Instruction::Mul: + return ConstantInt::get(I->getType(), 1); + } +} + +/// FoldSelectOpOp - Here we have (select c, TI, FI), and we know that TI and FI +/// have the same opcode and only one use each. Try to simplify this. +Instruction *InstCombiner::FoldSelectOpOp(SelectInst &SI, Instruction *TI, + Instruction *FI) { + if (TI->getNumOperands() == 1) { + // If this is a non-volatile load or a cast from the same type, + // merge. + if (TI->isCast()) { + Type *FIOpndTy = FI->getOperand(0)->getType(); + if (TI->getOperand(0)->getType() != FIOpndTy) + return 0; + // The select condition may be a vector. We may only change the operand + // type if the vector width remains the same (and matches the condition). + Type *CondTy = SI.getCondition()->getType(); + if (CondTy->isVectorTy() && (!FIOpndTy->isVectorTy() || + CondTy->getVectorNumElements() != FIOpndTy->getVectorNumElements())) + return 0; + } else { + return 0; // unknown unary op. + } + + // Fold this by inserting a select from the input values. + Value *NewSI = Builder->CreateSelect(SI.getCondition(), TI->getOperand(0), + FI->getOperand(0), SI.getName()+".v"); + return CastInst::Create(Instruction::CastOps(TI->getOpcode()), NewSI, + TI->getType()); + } + + // Only handle binary operators here. + if (!isa<BinaryOperator>(TI)) + return 0; + + // Figure out if the operations have any operands in common. + Value *MatchOp, *OtherOpT, *OtherOpF; + bool MatchIsOpZero; + if (TI->getOperand(0) == FI->getOperand(0)) { + MatchOp = TI->getOperand(0); + OtherOpT = TI->getOperand(1); + OtherOpF = FI->getOperand(1); + MatchIsOpZero = true; + } else if (TI->getOperand(1) == FI->getOperand(1)) { + MatchOp = TI->getOperand(1); + OtherOpT = TI->getOperand(0); + OtherOpF = FI->getOperand(0); + MatchIsOpZero = false; + } else if (!TI->isCommutative()) { + return 0; + } else if (TI->getOperand(0) == FI->getOperand(1)) { + MatchOp = TI->getOperand(0); + OtherOpT = TI->getOperand(1); + OtherOpF = FI->getOperand(0); + MatchIsOpZero = true; + } else if (TI->getOperand(1) == FI->getOperand(0)) { + MatchOp = TI->getOperand(1); + OtherOpT = TI->getOperand(0); + OtherOpF = FI->getOperand(1); + MatchIsOpZero = true; + } else { + return 0; + } + + // If we reach here, they do have operations in common. + Value *NewSI = Builder->CreateSelect(SI.getCondition(), OtherOpT, + OtherOpF, SI.getName()+".v"); + + if (BinaryOperator *BO = dyn_cast<BinaryOperator>(TI)) { + if (MatchIsOpZero) + return BinaryOperator::Create(BO->getOpcode(), MatchOp, NewSI); + else + return BinaryOperator::Create(BO->getOpcode(), NewSI, MatchOp); + } + llvm_unreachable("Shouldn't get here"); +} + +static bool isSelect01(Constant *C1, Constant *C2) { + ConstantInt *C1I = dyn_cast<ConstantInt>(C1); + if (!C1I) + return false; + ConstantInt *C2I = dyn_cast<ConstantInt>(C2); + if (!C2I) + return false; + if (!C1I->isZero() && !C2I->isZero()) // One side must be zero. + return false; + return C1I->isOne() || C1I->isAllOnesValue() || + C2I->isOne() || C2I->isAllOnesValue(); +} + +/// FoldSelectIntoOp - Try fold the select into one of the operands to +/// facilitate further optimization. +Instruction *InstCombiner::FoldSelectIntoOp(SelectInst &SI, Value *TrueVal, + Value *FalseVal) { + // See the comment above GetSelectFoldableOperands for a description of the + // transformation we are doing here. + if (Instruction *TVI = dyn_cast<Instruction>(TrueVal)) { + if (TVI->hasOneUse() && TVI->getNumOperands() == 2 && + !isa<Constant>(FalseVal)) { + if (unsigned SFO = GetSelectFoldableOperands(TVI)) { + unsigned OpToFold = 0; + if ((SFO & 1) && FalseVal == TVI->getOperand(0)) { + OpToFold = 1; + } else if ((SFO & 2) && FalseVal == TVI->getOperand(1)) { + OpToFold = 2; + } + + if (OpToFold) { + Constant *C = GetSelectFoldableConstant(TVI); + Value *OOp = TVI->getOperand(2-OpToFold); + // Avoid creating select between 2 constants unless it's selecting + // between 0, 1 and -1. + if (!isa<Constant>(OOp) || isSelect01(C, cast<Constant>(OOp))) { + Value *NewSel = Builder->CreateSelect(SI.getCondition(), OOp, C); + NewSel->takeName(TVI); + BinaryOperator *TVI_BO = cast<BinaryOperator>(TVI); + BinaryOperator *BO = BinaryOperator::Create(TVI_BO->getOpcode(), + FalseVal, NewSel); + if (isa<PossiblyExactOperator>(BO)) + BO->setIsExact(TVI_BO->isExact()); + if (isa<OverflowingBinaryOperator>(BO)) { + BO->setHasNoUnsignedWrap(TVI_BO->hasNoUnsignedWrap()); + BO->setHasNoSignedWrap(TVI_BO->hasNoSignedWrap()); + } + return BO; + } + } + } + } + } + + if (Instruction *FVI = dyn_cast<Instruction>(FalseVal)) { + if (FVI->hasOneUse() && FVI->getNumOperands() == 2 && + !isa<Constant>(TrueVal)) { + if (unsigned SFO = GetSelectFoldableOperands(FVI)) { + unsigned OpToFold = 0; + if ((SFO & 1) && TrueVal == FVI->getOperand(0)) { + OpToFold = 1; + } else if ((SFO & 2) && TrueVal == FVI->getOperand(1)) { + OpToFold = 2; + } + + if (OpToFold) { + Constant *C = GetSelectFoldableConstant(FVI); + Value *OOp = FVI->getOperand(2-OpToFold); + // Avoid creating select between 2 constants unless it's selecting + // between 0, 1 and -1. + if (!isa<Constant>(OOp) || isSelect01(C, cast<Constant>(OOp))) { + Value *NewSel = Builder->CreateSelect(SI.getCondition(), C, OOp); + NewSel->takeName(FVI); + BinaryOperator *FVI_BO = cast<BinaryOperator>(FVI); + BinaryOperator *BO = BinaryOperator::Create(FVI_BO->getOpcode(), + TrueVal, NewSel); + if (isa<PossiblyExactOperator>(BO)) + BO->setIsExact(FVI_BO->isExact()); + if (isa<OverflowingBinaryOperator>(BO)) { + BO->setHasNoUnsignedWrap(FVI_BO->hasNoUnsignedWrap()); + BO->setHasNoSignedWrap(FVI_BO->hasNoSignedWrap()); + } + return BO; + } + } + } + } + } + + return 0; +} + +/// SimplifyWithOpReplaced - See if V simplifies when its operand Op is +/// replaced with RepOp. +static Value *SimplifyWithOpReplaced(Value *V, Value *Op, Value *RepOp, + const DataLayout *TD, + const TargetLibraryInfo *TLI) { + // Trivial replacement. + if (V == Op) + return RepOp; + + Instruction *I = dyn_cast<Instruction>(V); + if (!I) + return 0; + + // If this is a binary operator, try to simplify it with the replaced op. + if (BinaryOperator *B = dyn_cast<BinaryOperator>(I)) { + if (B->getOperand(0) == Op) + return SimplifyBinOp(B->getOpcode(), RepOp, B->getOperand(1), TD, TLI); + if (B->getOperand(1) == Op) + return SimplifyBinOp(B->getOpcode(), B->getOperand(0), RepOp, TD, TLI); + } + + // Same for CmpInsts. + if (CmpInst *C = dyn_cast<CmpInst>(I)) { + if (C->getOperand(0) == Op) + return SimplifyCmpInst(C->getPredicate(), RepOp, C->getOperand(1), TD, + TLI); + if (C->getOperand(1) == Op) + return SimplifyCmpInst(C->getPredicate(), C->getOperand(0), RepOp, TD, + TLI); + } + + // TODO: We could hand off more cases to instsimplify here. + + // If all operands are constant after substituting Op for RepOp then we can + // constant fold the instruction. + if (Constant *CRepOp = dyn_cast<Constant>(RepOp)) { + // Build a list of all constant operands. + SmallVector<Constant*, 8> ConstOps; + for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) { + if (I->getOperand(i) == Op) + ConstOps.push_back(CRepOp); + else if (Constant *COp = dyn_cast<Constant>(I->getOperand(i))) + ConstOps.push_back(COp); + else + break; + } + + // All operands were constants, fold it. + if (ConstOps.size() == I->getNumOperands()) { + if (CmpInst *C = dyn_cast<CmpInst>(I)) + return ConstantFoldCompareInstOperands(C->getPredicate(), ConstOps[0], + ConstOps[1], TD, TLI); + + if (LoadInst *LI = dyn_cast<LoadInst>(I)) + if (!LI->isVolatile()) + return ConstantFoldLoadFromConstPtr(ConstOps[0], TD); + + return ConstantFoldInstOperands(I->getOpcode(), I->getType(), + ConstOps, TD, TLI); + } + } + + return 0; +} + +/// foldSelectICmpAndOr - We want to turn: +/// (select (icmp eq (and X, C1), 0), Y, (or Y, C2)) +/// into: +/// (or (shl (and X, C1), C3), y) +/// iff: +/// C1 and C2 are both powers of 2 +/// where: +/// C3 = Log(C2) - Log(C1) +/// +/// This transform handles cases where: +/// 1. The icmp predicate is inverted +/// 2. The select operands are reversed +/// 3. The magnitude of C2 and C1 are flipped +static Value *foldSelectICmpAndOr(const SelectInst &SI, Value *TrueVal, + Value *FalseVal, + InstCombiner::BuilderTy *Builder) { + const ICmpInst *IC = dyn_cast<ICmpInst>(SI.getCondition()); + if (!IC || !IC->isEquality() || !SI.getType()->isIntegerTy()) + return 0; + + Value *CmpLHS = IC->getOperand(0); + Value *CmpRHS = IC->getOperand(1); + + if (!match(CmpRHS, m_Zero())) + return 0; + + Value *X; + const APInt *C1; + if (!match(CmpLHS, m_And(m_Value(X), m_Power2(C1)))) + return 0; + + const APInt *C2; + bool OrOnTrueVal = false; + bool OrOnFalseVal = match(FalseVal, m_Or(m_Specific(TrueVal), m_Power2(C2))); + if (!OrOnFalseVal) + OrOnTrueVal = match(TrueVal, m_Or(m_Specific(FalseVal), m_Power2(C2))); + + if (!OrOnFalseVal && !OrOnTrueVal) + return 0; + + Value *V = CmpLHS; + Value *Y = OrOnFalseVal ? TrueVal : FalseVal; + + unsigned C1Log = C1->logBase2(); + unsigned C2Log = C2->logBase2(); + if (C2Log > C1Log) { + V = Builder->CreateZExtOrTrunc(V, Y->getType()); + V = Builder->CreateShl(V, C2Log - C1Log); + } else if (C1Log > C2Log) { + V = Builder->CreateLShr(V, C1Log - C2Log); + V = Builder->CreateZExtOrTrunc(V, Y->getType()); + } else + V = Builder->CreateZExtOrTrunc(V, Y->getType()); + + ICmpInst::Predicate Pred = IC->getPredicate(); + if ((Pred == ICmpInst::ICMP_NE && OrOnFalseVal) || + (Pred == ICmpInst::ICMP_EQ && OrOnTrueVal)) + V = Builder->CreateXor(V, *C2); + + return Builder->CreateOr(V, Y); +} + +/// visitSelectInstWithICmp - Visit a SelectInst that has an +/// ICmpInst as its first operand. +/// +Instruction *InstCombiner::visitSelectInstWithICmp(SelectInst &SI, + ICmpInst *ICI) { + bool Changed = false; + ICmpInst::Predicate Pred = ICI->getPredicate(); + Value *CmpLHS = ICI->getOperand(0); + Value *CmpRHS = ICI->getOperand(1); + Value *TrueVal = SI.getTrueValue(); + Value *FalseVal = SI.getFalseValue(); + + // Check cases where the comparison is with a constant that + // can be adjusted to fit the min/max idiom. We may move or edit ICI + // here, so make sure the select is the only user. + if (ICI->hasOneUse()) + if (ConstantInt *CI = dyn_cast<ConstantInt>(CmpRHS)) { + // X < MIN ? T : F --> F + if ((Pred == ICmpInst::ICMP_SLT || Pred == ICmpInst::ICMP_ULT) + && CI->isMinValue(Pred == ICmpInst::ICMP_SLT)) + return ReplaceInstUsesWith(SI, FalseVal); + // X > MAX ? T : F --> F + else if ((Pred == ICmpInst::ICMP_SGT || Pred == ICmpInst::ICMP_UGT) + && CI->isMaxValue(Pred == ICmpInst::ICMP_SGT)) + return ReplaceInstUsesWith(SI, FalseVal); + switch (Pred) { + default: break; + case ICmpInst::ICMP_ULT: + case ICmpInst::ICMP_SLT: + case ICmpInst::ICMP_UGT: + case ICmpInst::ICMP_SGT: { + // These transformations only work for selects over integers. + IntegerType *SelectTy = dyn_cast<IntegerType>(SI.getType()); + if (!SelectTy) + break; + + Constant *AdjustedRHS; + if (Pred == ICmpInst::ICMP_UGT || Pred == ICmpInst::ICMP_SGT) + AdjustedRHS = ConstantInt::get(CI->getContext(), CI->getValue() + 1); + else // (Pred == ICmpInst::ICMP_ULT || Pred == ICmpInst::ICMP_SLT) + AdjustedRHS = ConstantInt::get(CI->getContext(), CI->getValue() - 1); + + // X > C ? X : C+1 --> X < C+1 ? C+1 : X + // X < C ? X : C-1 --> X > C-1 ? C-1 : X + if ((CmpLHS == TrueVal && AdjustedRHS == FalseVal) || + (CmpLHS == FalseVal && AdjustedRHS == TrueVal)) + ; // Nothing to do here. Values match without any sign/zero extension. + + // Types do not match. Instead of calculating this with mixed types + // promote all to the larger type. This enables scalar evolution to + // analyze this expression. + else if (CmpRHS->getType()->getScalarSizeInBits() + < SelectTy->getBitWidth()) { + Constant *sextRHS = ConstantExpr::getSExt(AdjustedRHS, SelectTy); + + // X = sext x; x >s c ? X : C+1 --> X = sext x; X <s C+1 ? C+1 : X + // X = sext x; x <s c ? X : C-1 --> X = sext x; X >s C-1 ? C-1 : X + // X = sext x; x >u c ? X : C+1 --> X = sext x; X <u C+1 ? C+1 : X + // X = sext x; x <u c ? X : C-1 --> X = sext x; X >u C-1 ? C-1 : X + if (match(TrueVal, m_SExt(m_Specific(CmpLHS))) && + sextRHS == FalseVal) { + CmpLHS = TrueVal; + AdjustedRHS = sextRHS; + } else if (match(FalseVal, m_SExt(m_Specific(CmpLHS))) && + sextRHS == TrueVal) { + CmpLHS = FalseVal; + AdjustedRHS = sextRHS; + } else if (ICI->isUnsigned()) { + Constant *zextRHS = ConstantExpr::getZExt(AdjustedRHS, SelectTy); + // X = zext x; x >u c ? X : C+1 --> X = zext x; X <u C+1 ? C+1 : X + // X = zext x; x <u c ? X : C-1 --> X = zext x; X >u C-1 ? C-1 : X + // zext + signed compare cannot be changed: + // 0xff <s 0x00, but 0x00ff >s 0x0000 + if (match(TrueVal, m_ZExt(m_Specific(CmpLHS))) && + zextRHS == FalseVal) { + CmpLHS = TrueVal; + AdjustedRHS = zextRHS; + } else if (match(FalseVal, m_ZExt(m_Specific(CmpLHS))) && + zextRHS == TrueVal) { + CmpLHS = FalseVal; + AdjustedRHS = zextRHS; + } else + break; + } else + break; + } else + break; + + Pred = ICmpInst::getSwappedPredicate(Pred); + CmpRHS = AdjustedRHS; + std::swap(FalseVal, TrueVal); + ICI->setPredicate(Pred); + ICI->setOperand(0, CmpLHS); + ICI->setOperand(1, CmpRHS); + SI.setOperand(1, TrueVal); + SI.setOperand(2, FalseVal); + + // Move ICI instruction right before the select instruction. Otherwise + // the sext/zext value may be defined after the ICI instruction uses it. + ICI->moveBefore(&SI); + + Changed = true; + break; + } + } + } + + // Transform (X >s -1) ? C1 : C2 --> ((X >>s 31) & (C2 - C1)) + C1 + // and (X <s 0) ? C2 : C1 --> ((X >>s 31) & (C2 - C1)) + C1 + // FIXME: Type and constness constraints could be lifted, but we have to + // watch code size carefully. We should consider xor instead of + // sub/add when we decide to do that. + if (IntegerType *Ty = dyn_cast<IntegerType>(CmpLHS->getType())) { + if (TrueVal->getType() == Ty) { + if (ConstantInt *Cmp = dyn_cast<ConstantInt>(CmpRHS)) { + ConstantInt *C1 = NULL, *C2 = NULL; + if (Pred == ICmpInst::ICMP_SGT && Cmp->isAllOnesValue()) { + C1 = dyn_cast<ConstantInt>(TrueVal); + C2 = dyn_cast<ConstantInt>(FalseVal); + } else if (Pred == ICmpInst::ICMP_SLT && Cmp->isNullValue()) { + C1 = dyn_cast<ConstantInt>(FalseVal); + C2 = dyn_cast<ConstantInt>(TrueVal); + } + if (C1 && C2) { + // This shift results in either -1 or 0. + Value *AShr = Builder->CreateAShr(CmpLHS, Ty->getBitWidth()-1); + + // Check if we can express the operation with a single or. + if (C2->isAllOnesValue()) + return ReplaceInstUsesWith(SI, Builder->CreateOr(AShr, C1)); + + Value *And = Builder->CreateAnd(AShr, C2->getValue()-C1->getValue()); + return ReplaceInstUsesWith(SI, Builder->CreateAdd(And, C1)); + } + } + } + } + + // If we have an equality comparison then we know the value in one of the + // arms of the select. See if substituting this value into the arm and + // simplifying the result yields the same value as the other arm. + if (Pred == ICmpInst::ICMP_EQ) { + if (SimplifyWithOpReplaced(FalseVal, CmpLHS, CmpRHS, TD, TLI) == TrueVal || + SimplifyWithOpReplaced(FalseVal, CmpRHS, CmpLHS, TD, TLI) == TrueVal) + return ReplaceInstUsesWith(SI, FalseVal); + if (SimplifyWithOpReplaced(TrueVal, CmpLHS, CmpRHS, TD, TLI) == FalseVal || + SimplifyWithOpReplaced(TrueVal, CmpRHS, CmpLHS, TD, TLI) == FalseVal) + return ReplaceInstUsesWith(SI, FalseVal); + } else if (Pred == ICmpInst::ICMP_NE) { + if (SimplifyWithOpReplaced(TrueVal, CmpLHS, CmpRHS, TD, TLI) == FalseVal || + SimplifyWithOpReplaced(TrueVal, CmpRHS, CmpLHS, TD, TLI) == FalseVal) + return ReplaceInstUsesWith(SI, TrueVal); + if (SimplifyWithOpReplaced(FalseVal, CmpLHS, CmpRHS, TD, TLI) == TrueVal || + SimplifyWithOpReplaced(FalseVal, CmpRHS, CmpLHS, TD, TLI) == TrueVal) + return ReplaceInstUsesWith(SI, TrueVal); + } + + // NOTE: if we wanted to, this is where to detect integer MIN/MAX + + if (CmpRHS != CmpLHS && isa<Constant>(CmpRHS)) { + if (CmpLHS == TrueVal && Pred == ICmpInst::ICMP_EQ) { + // Transform (X == C) ? X : Y -> (X == C) ? C : Y + SI.setOperand(1, CmpRHS); + Changed = true; + } else if (CmpLHS == FalseVal && Pred == ICmpInst::ICMP_NE) { + // Transform (X != C) ? Y : X -> (X != C) ? Y : C + SI.setOperand(2, CmpRHS); + Changed = true; + } + } + + if (Value *V = foldSelectICmpAndOr(SI, TrueVal, FalseVal, Builder)) + return ReplaceInstUsesWith(SI, V); + + return Changed ? &SI : 0; +} + + +/// CanSelectOperandBeMappingIntoPredBlock - SI is a select whose condition is a +/// PHI node (but the two may be in different blocks). See if the true/false +/// values (V) are live in all of the predecessor blocks of the PHI. For +/// example, cases like this cannot be mapped: +/// +/// X = phi [ C1, BB1], [C2, BB2] +/// Y = add +/// Z = select X, Y, 0 +/// +/// because Y is not live in BB1/BB2. +/// +static bool CanSelectOperandBeMappingIntoPredBlock(const Value *V, + const SelectInst &SI) { + // If the value is a non-instruction value like a constant or argument, it + // can always be mapped. + const Instruction *I = dyn_cast<Instruction>(V); + if (I == 0) return true; + + // If V is a PHI node defined in the same block as the condition PHI, we can + // map the arguments. + const PHINode *CondPHI = cast<PHINode>(SI.getCondition()); + + if (const PHINode *VP = dyn_cast<PHINode>(I)) + if (VP->getParent() == CondPHI->getParent()) + return true; + + // Otherwise, if the PHI and select are defined in the same block and if V is + // defined in a different block, then we can transform it. + if (SI.getParent() == CondPHI->getParent() && + I->getParent() != CondPHI->getParent()) + return true; + + // Otherwise we have a 'hard' case and we can't tell without doing more + // detailed dominator based analysis, punt. + return false; +} + +/// FoldSPFofSPF - We have an SPF (e.g. a min or max) of an SPF of the form: +/// SPF2(SPF1(A, B), C) +Instruction *InstCombiner::FoldSPFofSPF(Instruction *Inner, + SelectPatternFlavor SPF1, + Value *A, Value *B, + Instruction &Outer, + SelectPatternFlavor SPF2, Value *C) { + if (C == A || C == B) { + // MAX(MAX(A, B), B) -> MAX(A, B) + // MIN(MIN(a, b), a) -> MIN(a, b) + if (SPF1 == SPF2) + return ReplaceInstUsesWith(Outer, Inner); + + // MAX(MIN(a, b), a) -> a + // MIN(MAX(a, b), a) -> a + if ((SPF1 == SPF_SMIN && SPF2 == SPF_SMAX) || + (SPF1 == SPF_SMAX && SPF2 == SPF_SMIN) || + (SPF1 == SPF_UMIN && SPF2 == SPF_UMAX) || + (SPF1 == SPF_UMAX && SPF2 == SPF_UMIN)) + return ReplaceInstUsesWith(Outer, C); + } + + // TODO: MIN(MIN(A, 23), 97) + return 0; +} + + +/// foldSelectICmpAnd - If one of the constants is zero (we know they can't +/// both be) and we have an icmp instruction with zero, and we have an 'and' +/// with the non-constant value and a power of two we can turn the select +/// into a shift on the result of the 'and'. +static Value *foldSelectICmpAnd(const SelectInst &SI, ConstantInt *TrueVal, + ConstantInt *FalseVal, + InstCombiner::BuilderTy *Builder) { + const ICmpInst *IC = dyn_cast<ICmpInst>(SI.getCondition()); + if (!IC || !IC->isEquality() || !SI.getType()->isIntegerTy()) + return 0; + + if (!match(IC->getOperand(1), m_Zero())) + return 0; + + ConstantInt *AndRHS; + Value *LHS = IC->getOperand(0); + if (!match(LHS, m_And(m_Value(), m_ConstantInt(AndRHS)))) + return 0; + + // If both select arms are non-zero see if we have a select of the form + // 'x ? 2^n + C : C'. Then we can offset both arms by C, use the logic + // for 'x ? 2^n : 0' and fix the thing up at the end. + ConstantInt *Offset = 0; + if (!TrueVal->isZero() && !FalseVal->isZero()) { + if ((TrueVal->getValue() - FalseVal->getValue()).isPowerOf2()) + Offset = FalseVal; + else if ((FalseVal->getValue() - TrueVal->getValue()).isPowerOf2()) + Offset = TrueVal; + else + return 0; + + // Adjust TrueVal and FalseVal to the offset. + TrueVal = ConstantInt::get(Builder->getContext(), + TrueVal->getValue() - Offset->getValue()); + FalseVal = ConstantInt::get(Builder->getContext(), + FalseVal->getValue() - Offset->getValue()); + } + + // Make sure the mask in the 'and' and one of the select arms is a power of 2. + if (!AndRHS->getValue().isPowerOf2() || + (!TrueVal->getValue().isPowerOf2() && + !FalseVal->getValue().isPowerOf2())) + return 0; + + // Determine which shift is needed to transform result of the 'and' into the + // desired result. + ConstantInt *ValC = !TrueVal->isZero() ? TrueVal : FalseVal; + unsigned ValZeros = ValC->getValue().logBase2(); + unsigned AndZeros = AndRHS->getValue().logBase2(); + + // If types don't match we can still convert the select by introducing a zext + // or a trunc of the 'and'. The trunc case requires that all of the truncated + // bits are zero, we can figure that out by looking at the 'and' mask. + if (AndZeros >= ValC->getBitWidth()) + return 0; + + Value *V = Builder->CreateZExtOrTrunc(LHS, SI.getType()); + if (ValZeros > AndZeros) + V = Builder->CreateShl(V, ValZeros - AndZeros); + else if (ValZeros < AndZeros) + V = Builder->CreateLShr(V, AndZeros - ValZeros); + + // Okay, now we know that everything is set up, we just don't know whether we + // have a icmp_ne or icmp_eq and whether the true or false val is the zero. + bool ShouldNotVal = !TrueVal->isZero(); + ShouldNotVal ^= IC->getPredicate() == ICmpInst::ICMP_NE; + if (ShouldNotVal) + V = Builder->CreateXor(V, ValC); + + // Apply an offset if needed. + if (Offset) + V = Builder->CreateAdd(V, Offset); + return V; +} + +Instruction *InstCombiner::visitSelectInst(SelectInst &SI) { + Value *CondVal = SI.getCondition(); + Value *TrueVal = SI.getTrueValue(); + Value *FalseVal = SI.getFalseValue(); + + if (Value *V = SimplifySelectInst(CondVal, TrueVal, FalseVal, TD)) + return ReplaceInstUsesWith(SI, V); + + if (SI.getType()->isIntegerTy(1)) { + if (ConstantInt *C = dyn_cast<ConstantInt>(TrueVal)) { + if (C->getZExtValue()) { + // Change: A = select B, true, C --> A = or B, C + return BinaryOperator::CreateOr(CondVal, FalseVal); + } + // Change: A = select B, false, C --> A = and !B, C + Value *NotCond = Builder->CreateNot(CondVal, "not."+CondVal->getName()); + return BinaryOperator::CreateAnd(NotCond, FalseVal); + } + if (ConstantInt *C = dyn_cast<ConstantInt>(FalseVal)) { + if (C->getZExtValue() == false) { + // Change: A = select B, C, false --> A = and B, C + return BinaryOperator::CreateAnd(CondVal, TrueVal); + } + // Change: A = select B, C, true --> A = or !B, C + Value *NotCond = Builder->CreateNot(CondVal, "not."+CondVal->getName()); + return BinaryOperator::CreateOr(NotCond, TrueVal); + } + + // select a, b, a -> a&b + // select a, a, b -> a|b + if (CondVal == TrueVal) + return BinaryOperator::CreateOr(CondVal, FalseVal); + if (CondVal == FalseVal) + return BinaryOperator::CreateAnd(CondVal, TrueVal); + + // select a, ~a, b -> (~a)&b + // select a, b, ~a -> (~a)|b + if (match(TrueVal, m_Not(m_Specific(CondVal)))) + return BinaryOperator::CreateAnd(TrueVal, FalseVal); + if (match(FalseVal, m_Not(m_Specific(CondVal)))) + return BinaryOperator::CreateOr(TrueVal, FalseVal); + } + + // Selecting between two integer constants? + if (ConstantInt *TrueValC = dyn_cast<ConstantInt>(TrueVal)) + if (ConstantInt *FalseValC = dyn_cast<ConstantInt>(FalseVal)) { + // select C, 1, 0 -> zext C to int + if (FalseValC->isZero() && TrueValC->getValue() == 1) + return new ZExtInst(CondVal, SI.getType()); + + // select C, -1, 0 -> sext C to int + if (FalseValC->isZero() && TrueValC->isAllOnesValue()) + return new SExtInst(CondVal, SI.getType()); + + // select C, 0, 1 -> zext !C to int + if (TrueValC->isZero() && FalseValC->getValue() == 1) { + Value *NotCond = Builder->CreateNot(CondVal, "not."+CondVal->getName()); + return new ZExtInst(NotCond, SI.getType()); + } + + // select C, 0, -1 -> sext !C to int + if (TrueValC->isZero() && FalseValC->isAllOnesValue()) { + Value *NotCond = Builder->CreateNot(CondVal, "not."+CondVal->getName()); + return new SExtInst(NotCond, SI.getType()); + } + + if (Value *V = foldSelectICmpAnd(SI, TrueValC, FalseValC, Builder)) + return ReplaceInstUsesWith(SI, V); + } + + // See if we are selecting two values based on a comparison of the two values. + if (FCmpInst *FCI = dyn_cast<FCmpInst>(CondVal)) { + if (FCI->getOperand(0) == TrueVal && FCI->getOperand(1) == FalseVal) { + // Transform (X == Y) ? X : Y -> Y + if (FCI->getPredicate() == FCmpInst::FCMP_OEQ) { + // This is not safe in general for floating point: + // consider X== -0, Y== +0. + // It becomes safe if either operand is a nonzero constant. + ConstantFP *CFPt, *CFPf; + if (((CFPt = dyn_cast<ConstantFP>(TrueVal)) && + !CFPt->getValueAPF().isZero()) || + ((CFPf = dyn_cast<ConstantFP>(FalseVal)) && + !CFPf->getValueAPF().isZero())) + return ReplaceInstUsesWith(SI, FalseVal); + } + // Transform (X une Y) ? X : Y -> X + if (FCI->getPredicate() == FCmpInst::FCMP_UNE) { + // This is not safe in general for floating point: + // consider X== -0, Y== +0. + // It becomes safe if either operand is a nonzero constant. + ConstantFP *CFPt, *CFPf; + if (((CFPt = dyn_cast<ConstantFP>(TrueVal)) && + !CFPt->getValueAPF().isZero()) || + ((CFPf = dyn_cast<ConstantFP>(FalseVal)) && + !CFPf->getValueAPF().isZero())) + return ReplaceInstUsesWith(SI, TrueVal); + } + // NOTE: if we wanted to, this is where to detect MIN/MAX + + } else if (FCI->getOperand(0) == FalseVal && FCI->getOperand(1) == TrueVal){ + // Transform (X == Y) ? Y : X -> X + if (FCI->getPredicate() == FCmpInst::FCMP_OEQ) { + // This is not safe in general for floating point: + // consider X== -0, Y== +0. + // It becomes safe if either operand is a nonzero constant. + ConstantFP *CFPt, *CFPf; + if (((CFPt = dyn_cast<ConstantFP>(TrueVal)) && + !CFPt->getValueAPF().isZero()) || + ((CFPf = dyn_cast<ConstantFP>(FalseVal)) && + !CFPf->getValueAPF().isZero())) + return ReplaceInstUsesWith(SI, FalseVal); + } + // Transform (X une Y) ? Y : X -> Y + if (FCI->getPredicate() == FCmpInst::FCMP_UNE) { + // This is not safe in general for floating point: + // consider X== -0, Y== +0. + // It becomes safe if either operand is a nonzero constant. + ConstantFP *CFPt, *CFPf; + if (((CFPt = dyn_cast<ConstantFP>(TrueVal)) && + !CFPt->getValueAPF().isZero()) || + ((CFPf = dyn_cast<ConstantFP>(FalseVal)) && + !CFPf->getValueAPF().isZero())) + return ReplaceInstUsesWith(SI, TrueVal); + } + // NOTE: if we wanted to, this is where to detect MIN/MAX + } + // NOTE: if we wanted to, this is where to detect ABS + } + + // See if we are selecting two values based on a comparison of the two values. + if (ICmpInst *ICI = dyn_cast<ICmpInst>(CondVal)) + if (Instruction *Result = visitSelectInstWithICmp(SI, ICI)) + return Result; + + if (Instruction *TI = dyn_cast<Instruction>(TrueVal)) + if (Instruction *FI = dyn_cast<Instruction>(FalseVal)) + if (TI->hasOneUse() && FI->hasOneUse()) { + Instruction *AddOp = 0, *SubOp = 0; + + // Turn (select C, (op X, Y), (op X, Z)) -> (op X, (select C, Y, Z)) + if (TI->getOpcode() == FI->getOpcode()) + if (Instruction *IV = FoldSelectOpOp(SI, TI, FI)) + return IV; + + // Turn select C, (X+Y), (X-Y) --> (X+(select C, Y, (-Y))). This is + // even legal for FP. + if ((TI->getOpcode() == Instruction::Sub && + FI->getOpcode() == Instruction::Add) || + (TI->getOpcode() == Instruction::FSub && + FI->getOpcode() == Instruction::FAdd)) { + AddOp = FI; SubOp = TI; + } else if ((FI->getOpcode() == Instruction::Sub && + TI->getOpcode() == Instruction::Add) || + (FI->getOpcode() == Instruction::FSub && + TI->getOpcode() == Instruction::FAdd)) { + AddOp = TI; SubOp = FI; + } + + if (AddOp) { + Value *OtherAddOp = 0; + if (SubOp->getOperand(0) == AddOp->getOperand(0)) { + OtherAddOp = AddOp->getOperand(1); + } else if (SubOp->getOperand(0) == AddOp->getOperand(1)) { + OtherAddOp = AddOp->getOperand(0); + } + + if (OtherAddOp) { + // So at this point we know we have (Y -> OtherAddOp): + // select C, (add X, Y), (sub X, Z) + Value *NegVal; // Compute -Z + if (SI.getType()->isFPOrFPVectorTy()) { + NegVal = Builder->CreateFNeg(SubOp->getOperand(1)); + } else { + NegVal = Builder->CreateNeg(SubOp->getOperand(1)); + } + + Value *NewTrueOp = OtherAddOp; + Value *NewFalseOp = NegVal; + if (AddOp != TI) + std::swap(NewTrueOp, NewFalseOp); + Value *NewSel = + Builder->CreateSelect(CondVal, NewTrueOp, + NewFalseOp, SI.getName() + ".p"); + + if (SI.getType()->isFPOrFPVectorTy()) + return BinaryOperator::CreateFAdd(SubOp->getOperand(0), NewSel); + else + return BinaryOperator::CreateAdd(SubOp->getOperand(0), NewSel); + } + } + } + + // See if we can fold the select into one of our operands. + if (SI.getType()->isIntegerTy()) { + if (Instruction *FoldI = FoldSelectIntoOp(SI, TrueVal, FalseVal)) + return FoldI; + + // MAX(MAX(a, b), a) -> MAX(a, b) + // MIN(MIN(a, b), a) -> MIN(a, b) + // MAX(MIN(a, b), a) -> a + // MIN(MAX(a, b), a) -> a + Value *LHS, *RHS, *LHS2, *RHS2; + if (SelectPatternFlavor SPF = MatchSelectPattern(&SI, LHS, RHS)) { + if (SelectPatternFlavor SPF2 = MatchSelectPattern(LHS, LHS2, RHS2)) + if (Instruction *R = FoldSPFofSPF(cast<Instruction>(LHS),SPF2,LHS2,RHS2, + SI, SPF, RHS)) + return R; + if (SelectPatternFlavor SPF2 = MatchSelectPattern(RHS, LHS2, RHS2)) + if (Instruction *R = FoldSPFofSPF(cast<Instruction>(RHS),SPF2,LHS2,RHS2, + SI, SPF, LHS)) + return R; + } + + // TODO. + // ABS(-X) -> ABS(X) + // ABS(ABS(X)) -> ABS(X) + } + + // See if we can fold the select into a phi node if the condition is a select. + if (isa<PHINode>(SI.getCondition())) + // The true/false values have to be live in the PHI predecessor's blocks. + if (CanSelectOperandBeMappingIntoPredBlock(TrueVal, SI) && + CanSelectOperandBeMappingIntoPredBlock(FalseVal, SI)) + if (Instruction *NV = FoldOpIntoPhi(SI)) + return NV; + + if (SelectInst *TrueSI = dyn_cast<SelectInst>(TrueVal)) { + if (TrueSI->getCondition() == CondVal) { + if (SI.getTrueValue() == TrueSI->getTrueValue()) + return 0; + SI.setOperand(1, TrueSI->getTrueValue()); + return &SI; + } + } + if (SelectInst *FalseSI = dyn_cast<SelectInst>(FalseVal)) { + if (FalseSI->getCondition() == CondVal) { + if (SI.getFalseValue() == FalseSI->getFalseValue()) + return 0; + SI.setOperand(2, FalseSI->getFalseValue()); + return &SI; + } + } + + if (BinaryOperator::isNot(CondVal)) { + SI.setOperand(0, BinaryOperator::getNotArgument(CondVal)); + SI.setOperand(1, FalseVal); + SI.setOperand(2, TrueVal); + return &SI; + } + + if (VectorType* VecTy = dyn_cast<VectorType>(SI.getType())) { + unsigned VWidth = VecTy->getNumElements(); + APInt UndefElts(VWidth, 0); + APInt AllOnesEltMask(APInt::getAllOnesValue(VWidth)); + if (Value *V = SimplifyDemandedVectorElts(&SI, AllOnesEltMask, UndefElts)) { + if (V != &SI) + return ReplaceInstUsesWith(SI, V); + return &SI; + } + + if (isa<ConstantAggregateZero>(CondVal)) { + return ReplaceInstUsesWith(SI, FalseVal); + } + } + + return 0; +} diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp new file mode 100644 index 000000000000..8cf76e5e8a9f --- /dev/null +++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp @@ -0,0 +1,810 @@ +//===- InstCombineShifts.cpp ----------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the visitShl, visitLShr, and visitAShr functions. +// +//===----------------------------------------------------------------------===// + +#include "InstCombine.h" +#include "llvm/Analysis/ConstantFolding.h" +#include "llvm/Analysis/InstructionSimplify.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/Support/PatternMatch.h" +using namespace llvm; +using namespace PatternMatch; + +Instruction *InstCombiner::commonShiftTransforms(BinaryOperator &I) { + assert(I.getOperand(1)->getType() == I.getOperand(0)->getType()); + Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); + + // See if we can fold away this shift. + if (SimplifyDemandedInstructionBits(I)) + return &I; + + // Try to fold constant and into select arguments. + if (isa<Constant>(Op0)) + if (SelectInst *SI = dyn_cast<SelectInst>(Op1)) + if (Instruction *R = FoldOpIntoSelect(I, SI)) + return R; + + if (ConstantInt *CUI = dyn_cast<ConstantInt>(Op1)) + if (Instruction *Res = FoldShiftByConstant(Op0, CUI, I)) + return Res; + + // X shift (A srem B) -> X shift (A and B-1) iff B is a power of 2. + // Because shifts by negative values (which could occur if A were negative) + // are undefined. + Value *A; const APInt *B; + if (Op1->hasOneUse() && match(Op1, m_SRem(m_Value(A), m_Power2(B)))) { + // FIXME: Should this get moved into SimplifyDemandedBits by saying we don't + // demand the sign bit (and many others) here?? + Value *Rem = Builder->CreateAnd(A, ConstantInt::get(I.getType(), *B-1), + Op1->getName()); + I.setOperand(1, Rem); + return &I; + } + + return 0; +} + +/// CanEvaluateShifted - See if we can compute the specified value, but shifted +/// logically to the left or right by some number of bits. This should return +/// true if the expression can be computed for the same cost as the current +/// expression tree. This is used to eliminate extraneous shifting from things +/// like: +/// %C = shl i128 %A, 64 +/// %D = shl i128 %B, 96 +/// %E = or i128 %C, %D +/// %F = lshr i128 %E, 64 +/// where the client will ask if E can be computed shifted right by 64-bits. If +/// this succeeds, the GetShiftedValue function will be called to produce the +/// value. +static bool CanEvaluateShifted(Value *V, unsigned NumBits, bool isLeftShift, + InstCombiner &IC) { + // We can always evaluate constants shifted. + if (isa<Constant>(V)) + return true; + + Instruction *I = dyn_cast<Instruction>(V); + if (!I) return false; + + // If this is the opposite shift, we can directly reuse the input of the shift + // if the needed bits are already zero in the input. This allows us to reuse + // the value which means that we don't care if the shift has multiple uses. + // TODO: Handle opposite shift by exact value. + ConstantInt *CI = 0; + if ((isLeftShift && match(I, m_LShr(m_Value(), m_ConstantInt(CI)))) || + (!isLeftShift && match(I, m_Shl(m_Value(), m_ConstantInt(CI))))) { + if (CI->getZExtValue() == NumBits) { + // TODO: Check that the input bits are already zero with MaskedValueIsZero +#if 0 + // If this is a truncate of a logical shr, we can truncate it to a smaller + // lshr iff we know that the bits we would otherwise be shifting in are + // already zeros. + uint32_t OrigBitWidth = OrigTy->getScalarSizeInBits(); + uint32_t BitWidth = Ty->getScalarSizeInBits(); + if (MaskedValueIsZero(I->getOperand(0), + APInt::getHighBitsSet(OrigBitWidth, OrigBitWidth-BitWidth)) && + CI->getLimitedValue(BitWidth) < BitWidth) { + return CanEvaluateTruncated(I->getOperand(0), Ty); + } +#endif + + } + } + + // We can't mutate something that has multiple uses: doing so would + // require duplicating the instruction in general, which isn't profitable. + if (!I->hasOneUse()) return false; + + switch (I->getOpcode()) { + default: return false; + case Instruction::And: + case Instruction::Or: + case Instruction::Xor: + // Bitwise operators can all arbitrarily be arbitrarily evaluated shifted. + return CanEvaluateShifted(I->getOperand(0), NumBits, isLeftShift, IC) && + CanEvaluateShifted(I->getOperand(1), NumBits, isLeftShift, IC); + + case Instruction::Shl: { + // We can often fold the shift into shifts-by-a-constant. + CI = dyn_cast<ConstantInt>(I->getOperand(1)); + if (CI == 0) return false; + + // We can always fold shl(c1)+shl(c2) -> shl(c1+c2). + if (isLeftShift) return true; + + // We can always turn shl(c)+shr(c) -> and(c2). + if (CI->getValue() == NumBits) return true; + + unsigned TypeWidth = I->getType()->getScalarSizeInBits(); + + // We can turn shl(c1)+shr(c2) -> shl(c3)+and(c4), but it isn't + // profitable unless we know the and'd out bits are already zero. + if (CI->getZExtValue() > NumBits) { + unsigned LowBits = TypeWidth - CI->getZExtValue(); + if (MaskedValueIsZero(I->getOperand(0), + APInt::getLowBitsSet(TypeWidth, NumBits) << LowBits)) + return true; + } + + return false; + } + case Instruction::LShr: { + // We can often fold the shift into shifts-by-a-constant. + CI = dyn_cast<ConstantInt>(I->getOperand(1)); + if (CI == 0) return false; + + // We can always fold lshr(c1)+lshr(c2) -> lshr(c1+c2). + if (!isLeftShift) return true; + + // We can always turn lshr(c)+shl(c) -> and(c2). + if (CI->getValue() == NumBits) return true; + + unsigned TypeWidth = I->getType()->getScalarSizeInBits(); + + // We can always turn lshr(c1)+shl(c2) -> lshr(c3)+and(c4), but it isn't + // profitable unless we know the and'd out bits are already zero. + if (CI->getValue().ult(TypeWidth) && CI->getZExtValue() > NumBits) { + unsigned LowBits = CI->getZExtValue() - NumBits; + if (MaskedValueIsZero(I->getOperand(0), + APInt::getLowBitsSet(TypeWidth, NumBits) << LowBits)) + return true; + } + + return false; + } + case Instruction::Select: { + SelectInst *SI = cast<SelectInst>(I); + return CanEvaluateShifted(SI->getTrueValue(), NumBits, isLeftShift, IC) && + CanEvaluateShifted(SI->getFalseValue(), NumBits, isLeftShift, IC); + } + case Instruction::PHI: { + // We can change a phi if we can change all operands. Note that we never + // get into trouble with cyclic PHIs here because we only consider + // instructions with a single use. + PHINode *PN = cast<PHINode>(I); + for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) + if (!CanEvaluateShifted(PN->getIncomingValue(i), NumBits, isLeftShift,IC)) + return false; + return true; + } + } +} + +/// GetShiftedValue - When CanEvaluateShifted returned true for an expression, +/// this value inserts the new computation that produces the shifted value. +static Value *GetShiftedValue(Value *V, unsigned NumBits, bool isLeftShift, + InstCombiner &IC) { + // We can always evaluate constants shifted. + if (Constant *C = dyn_cast<Constant>(V)) { + if (isLeftShift) + V = IC.Builder->CreateShl(C, NumBits); + else + V = IC.Builder->CreateLShr(C, NumBits); + // If we got a constantexpr back, try to simplify it with TD info. + if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) + V = ConstantFoldConstantExpression(CE, IC.getDataLayout(), + IC.getTargetLibraryInfo()); + return V; + } + + Instruction *I = cast<Instruction>(V); + IC.Worklist.Add(I); + + switch (I->getOpcode()) { + default: llvm_unreachable("Inconsistency with CanEvaluateShifted"); + case Instruction::And: + case Instruction::Or: + case Instruction::Xor: + // Bitwise operators can all arbitrarily be arbitrarily evaluated shifted. + I->setOperand(0, GetShiftedValue(I->getOperand(0), NumBits,isLeftShift,IC)); + I->setOperand(1, GetShiftedValue(I->getOperand(1), NumBits,isLeftShift,IC)); + return I; + + case Instruction::Shl: { + BinaryOperator *BO = cast<BinaryOperator>(I); + unsigned TypeWidth = BO->getType()->getScalarSizeInBits(); + + // We only accept shifts-by-a-constant in CanEvaluateShifted. + ConstantInt *CI = cast<ConstantInt>(BO->getOperand(1)); + + // We can always fold shl(c1)+shl(c2) -> shl(c1+c2). + if (isLeftShift) { + // If this is oversized composite shift, then unsigned shifts get 0. + unsigned NewShAmt = NumBits+CI->getZExtValue(); + if (NewShAmt >= TypeWidth) + return Constant::getNullValue(I->getType()); + + BO->setOperand(1, ConstantInt::get(BO->getType(), NewShAmt)); + BO->setHasNoUnsignedWrap(false); + BO->setHasNoSignedWrap(false); + return I; + } + + // We turn shl(c)+lshr(c) -> and(c2) if the input doesn't already have + // zeros. + if (CI->getValue() == NumBits) { + APInt Mask(APInt::getLowBitsSet(TypeWidth, TypeWidth - NumBits)); + V = IC.Builder->CreateAnd(BO->getOperand(0), + ConstantInt::get(BO->getContext(), Mask)); + if (Instruction *VI = dyn_cast<Instruction>(V)) { + VI->moveBefore(BO); + VI->takeName(BO); + } + return V; + } + + // We turn shl(c1)+shr(c2) -> shl(c3)+and(c4), but only when we know that + // the and won't be needed. + assert(CI->getZExtValue() > NumBits); + BO->setOperand(1, ConstantInt::get(BO->getType(), + CI->getZExtValue() - NumBits)); + BO->setHasNoUnsignedWrap(false); + BO->setHasNoSignedWrap(false); + return BO; + } + case Instruction::LShr: { + BinaryOperator *BO = cast<BinaryOperator>(I); + unsigned TypeWidth = BO->getType()->getScalarSizeInBits(); + // We only accept shifts-by-a-constant in CanEvaluateShifted. + ConstantInt *CI = cast<ConstantInt>(BO->getOperand(1)); + + // We can always fold lshr(c1)+lshr(c2) -> lshr(c1+c2). + if (!isLeftShift) { + // If this is oversized composite shift, then unsigned shifts get 0. + unsigned NewShAmt = NumBits+CI->getZExtValue(); + if (NewShAmt >= TypeWidth) + return Constant::getNullValue(BO->getType()); + + BO->setOperand(1, ConstantInt::get(BO->getType(), NewShAmt)); + BO->setIsExact(false); + return I; + } + + // We turn lshr(c)+shl(c) -> and(c2) if the input doesn't already have + // zeros. + if (CI->getValue() == NumBits) { + APInt Mask(APInt::getHighBitsSet(TypeWidth, TypeWidth - NumBits)); + V = IC.Builder->CreateAnd(I->getOperand(0), + ConstantInt::get(BO->getContext(), Mask)); + if (Instruction *VI = dyn_cast<Instruction>(V)) { + VI->moveBefore(I); + VI->takeName(I); + } + return V; + } + + // We turn lshr(c1)+shl(c2) -> lshr(c3)+and(c4), but only when we know that + // the and won't be needed. + assert(CI->getZExtValue() > NumBits); + BO->setOperand(1, ConstantInt::get(BO->getType(), + CI->getZExtValue() - NumBits)); + BO->setIsExact(false); + return BO; + } + + case Instruction::Select: + I->setOperand(1, GetShiftedValue(I->getOperand(1), NumBits,isLeftShift,IC)); + I->setOperand(2, GetShiftedValue(I->getOperand(2), NumBits,isLeftShift,IC)); + return I; + case Instruction::PHI: { + // We can change a phi if we can change all operands. Note that we never + // get into trouble with cyclic PHIs here because we only consider + // instructions with a single use. + PHINode *PN = cast<PHINode>(I); + for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) + PN->setIncomingValue(i, GetShiftedValue(PN->getIncomingValue(i), + NumBits, isLeftShift, IC)); + return PN; + } + } +} + + + +Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1, + BinaryOperator &I) { + bool isLeftShift = I.getOpcode() == Instruction::Shl; + + + // See if we can propagate this shift into the input, this covers the trivial + // cast of lshr(shl(x,c1),c2) as well as other more complex cases. + if (I.getOpcode() != Instruction::AShr && + CanEvaluateShifted(Op0, Op1->getZExtValue(), isLeftShift, *this)) { + DEBUG(dbgs() << "ICE: GetShiftedValue propagating shift through expression" + " to eliminate shift:\n IN: " << *Op0 << "\n SH: " << I <<"\n"); + + return ReplaceInstUsesWith(I, + GetShiftedValue(Op0, Op1->getZExtValue(), isLeftShift, *this)); + } + + + // See if we can simplify any instructions used by the instruction whose sole + // purpose is to compute bits we don't care about. + uint32_t TypeBits = Op0->getType()->getScalarSizeInBits(); + + // shl i32 X, 32 = 0 and srl i8 Y, 9 = 0, ... just don't eliminate + // a signed shift. + // + if (Op1->uge(TypeBits)) { + if (I.getOpcode() != Instruction::AShr) + return ReplaceInstUsesWith(I, Constant::getNullValue(Op0->getType())); + // ashr i32 X, 32 --> ashr i32 X, 31 + I.setOperand(1, ConstantInt::get(I.getType(), TypeBits-1)); + return &I; + } + + // ((X*C1) << C2) == (X * (C1 << C2)) + if (BinaryOperator *BO = dyn_cast<BinaryOperator>(Op0)) + if (BO->getOpcode() == Instruction::Mul && isLeftShift) + if (Constant *BOOp = dyn_cast<Constant>(BO->getOperand(1))) + return BinaryOperator::CreateMul(BO->getOperand(0), + ConstantExpr::getShl(BOOp, Op1)); + + // Try to fold constant and into select arguments. + if (SelectInst *SI = dyn_cast<SelectInst>(Op0)) + if (Instruction *R = FoldOpIntoSelect(I, SI)) + return R; + if (isa<PHINode>(Op0)) + if (Instruction *NV = FoldOpIntoPhi(I)) + return NV; + + // Fold shift2(trunc(shift1(x,c1)), c2) -> trunc(shift2(shift1(x,c1),c2)) + if (TruncInst *TI = dyn_cast<TruncInst>(Op0)) { + Instruction *TrOp = dyn_cast<Instruction>(TI->getOperand(0)); + // If 'shift2' is an ashr, we would have to get the sign bit into a funny + // place. Don't try to do this transformation in this case. Also, we + // require that the input operand is a shift-by-constant so that we have + // confidence that the shifts will get folded together. We could do this + // xform in more cases, but it is unlikely to be profitable. + if (TrOp && I.isLogicalShift() && TrOp->isShift() && + isa<ConstantInt>(TrOp->getOperand(1))) { + // Okay, we'll do this xform. Make the shift of shift. + Constant *ShAmt = ConstantExpr::getZExt(Op1, TrOp->getType()); + // (shift2 (shift1 & 0x00FF), c2) + Value *NSh = Builder->CreateBinOp(I.getOpcode(), TrOp, ShAmt,I.getName()); + + // For logical shifts, the truncation has the effect of making the high + // part of the register be zeros. Emulate this by inserting an AND to + // clear the top bits as needed. This 'and' will usually be zapped by + // other xforms later if dead. + unsigned SrcSize = TrOp->getType()->getScalarSizeInBits(); + unsigned DstSize = TI->getType()->getScalarSizeInBits(); + APInt MaskV(APInt::getLowBitsSet(SrcSize, DstSize)); + + // The mask we constructed says what the trunc would do if occurring + // between the shifts. We want to know the effect *after* the second + // shift. We know that it is a logical shift by a constant, so adjust the + // mask as appropriate. + if (I.getOpcode() == Instruction::Shl) + MaskV <<= Op1->getZExtValue(); + else { + assert(I.getOpcode() == Instruction::LShr && "Unknown logical shift"); + MaskV = MaskV.lshr(Op1->getZExtValue()); + } + + // shift1 & 0x00FF + Value *And = Builder->CreateAnd(NSh, + ConstantInt::get(I.getContext(), MaskV), + TI->getName()); + + // Return the value truncated to the interesting size. + return new TruncInst(And, I.getType()); + } + } + + if (Op0->hasOneUse()) { + if (BinaryOperator *Op0BO = dyn_cast<BinaryOperator>(Op0)) { + // Turn ((X >> C) + Y) << C -> (X + (Y << C)) & (~0 << C) + Value *V1, *V2; + ConstantInt *CC; + switch (Op0BO->getOpcode()) { + default: break; + case Instruction::Add: + case Instruction::And: + case Instruction::Or: + case Instruction::Xor: { + // These operators commute. + // Turn (Y + (X >> C)) << C -> (X + (Y << C)) & (~0 << C) + if (isLeftShift && Op0BO->getOperand(1)->hasOneUse() && + match(Op0BO->getOperand(1), m_Shr(m_Value(V1), + m_Specific(Op1)))) { + Value *YS = // (Y << C) + Builder->CreateShl(Op0BO->getOperand(0), Op1, Op0BO->getName()); + // (X + (Y << C)) + Value *X = Builder->CreateBinOp(Op0BO->getOpcode(), YS, V1, + Op0BO->getOperand(1)->getName()); + uint32_t Op1Val = Op1->getLimitedValue(TypeBits); + return BinaryOperator::CreateAnd(X, ConstantInt::get(I.getContext(), + APInt::getHighBitsSet(TypeBits, TypeBits-Op1Val))); + } + + // Turn (Y + ((X >> C) & CC)) << C -> ((X & (CC << C)) + (Y << C)) + Value *Op0BOOp1 = Op0BO->getOperand(1); + if (isLeftShift && Op0BOOp1->hasOneUse() && + match(Op0BOOp1, + m_And(m_OneUse(m_Shr(m_Value(V1), m_Specific(Op1))), + m_ConstantInt(CC)))) { + Value *YS = // (Y << C) + Builder->CreateShl(Op0BO->getOperand(0), Op1, + Op0BO->getName()); + // X & (CC << C) + Value *XM = Builder->CreateAnd(V1, ConstantExpr::getShl(CC, Op1), + V1->getName()+".mask"); + return BinaryOperator::Create(Op0BO->getOpcode(), YS, XM); + } + } + + // FALL THROUGH. + case Instruction::Sub: { + // Turn ((X >> C) + Y) << C -> (X + (Y << C)) & (~0 << C) + if (isLeftShift && Op0BO->getOperand(0)->hasOneUse() && + match(Op0BO->getOperand(0), m_Shr(m_Value(V1), + m_Specific(Op1)))) { + Value *YS = // (Y << C) + Builder->CreateShl(Op0BO->getOperand(1), Op1, Op0BO->getName()); + // (X + (Y << C)) + Value *X = Builder->CreateBinOp(Op0BO->getOpcode(), V1, YS, + Op0BO->getOperand(0)->getName()); + uint32_t Op1Val = Op1->getLimitedValue(TypeBits); + return BinaryOperator::CreateAnd(X, ConstantInt::get(I.getContext(), + APInt::getHighBitsSet(TypeBits, TypeBits-Op1Val))); + } + + // Turn (((X >> C)&CC) + Y) << C -> (X + (Y << C)) & (CC << C) + if (isLeftShift && Op0BO->getOperand(0)->hasOneUse() && + match(Op0BO->getOperand(0), + m_And(m_OneUse(m_Shr(m_Value(V1), m_Value(V2))), + m_ConstantInt(CC))) && V2 == Op1) { + Value *YS = // (Y << C) + Builder->CreateShl(Op0BO->getOperand(1), Op1, Op0BO->getName()); + // X & (CC << C) + Value *XM = Builder->CreateAnd(V1, ConstantExpr::getShl(CC, Op1), + V1->getName()+".mask"); + + return BinaryOperator::Create(Op0BO->getOpcode(), XM, YS); + } + + break; + } + } + + + // If the operand is an bitwise operator with a constant RHS, and the + // shift is the only use, we can pull it out of the shift. + if (ConstantInt *Op0C = dyn_cast<ConstantInt>(Op0BO->getOperand(1))) { + bool isValid = true; // Valid only for And, Or, Xor + bool highBitSet = false; // Transform if high bit of constant set? + + switch (Op0BO->getOpcode()) { + default: isValid = false; break; // Do not perform transform! + case Instruction::Add: + isValid = isLeftShift; + break; + case Instruction::Or: + case Instruction::Xor: + highBitSet = false; + break; + case Instruction::And: + highBitSet = true; + break; + } + + // If this is a signed shift right, and the high bit is modified + // by the logical operation, do not perform the transformation. + // The highBitSet boolean indicates the value of the high bit of + // the constant which would cause it to be modified for this + // operation. + // + if (isValid && I.getOpcode() == Instruction::AShr) + isValid = Op0C->getValue()[TypeBits-1] == highBitSet; + + if (isValid) { + Constant *NewRHS = ConstantExpr::get(I.getOpcode(), Op0C, Op1); + + Value *NewShift = + Builder->CreateBinOp(I.getOpcode(), Op0BO->getOperand(0), Op1); + NewShift->takeName(Op0BO); + + return BinaryOperator::Create(Op0BO->getOpcode(), NewShift, + NewRHS); + } + } + } + } + + // Find out if this is a shift of a shift by a constant. + BinaryOperator *ShiftOp = dyn_cast<BinaryOperator>(Op0); + if (ShiftOp && !ShiftOp->isShift()) + ShiftOp = 0; + + if (ShiftOp && isa<ConstantInt>(ShiftOp->getOperand(1))) { + + // This is a constant shift of a constant shift. Be careful about hiding + // shl instructions behind bit masks. They are used to represent multiplies + // by a constant, and it is important that simple arithmetic expressions + // are still recognizable by scalar evolution. + // + // The transforms applied to shl are very similar to the transforms applied + // to mul by constant. We can be more aggressive about optimizing right + // shifts. + // + // Combinations of right and left shifts will still be optimized in + // DAGCombine where scalar evolution no longer applies. + + ConstantInt *ShiftAmt1C = cast<ConstantInt>(ShiftOp->getOperand(1)); + uint32_t ShiftAmt1 = ShiftAmt1C->getLimitedValue(TypeBits); + uint32_t ShiftAmt2 = Op1->getLimitedValue(TypeBits); + assert(ShiftAmt2 != 0 && "Should have been simplified earlier"); + if (ShiftAmt1 == 0) return 0; // Will be simplified in the future. + Value *X = ShiftOp->getOperand(0); + + IntegerType *Ty = cast<IntegerType>(I.getType()); + + // Check for (X << c1) << c2 and (X >> c1) >> c2 + if (I.getOpcode() == ShiftOp->getOpcode()) { + uint32_t AmtSum = ShiftAmt1+ShiftAmt2; // Fold into one big shift. + // If this is oversized composite shift, then unsigned shifts get 0, ashr + // saturates. + if (AmtSum >= TypeBits) { + if (I.getOpcode() != Instruction::AShr) + return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType())); + AmtSum = TypeBits-1; // Saturate to 31 for i32 ashr. + } + + return BinaryOperator::Create(I.getOpcode(), X, + ConstantInt::get(Ty, AmtSum)); + } + + if (ShiftAmt1 == ShiftAmt2) { + // If we have ((X << C) >>u C), turn this into X & (-1 >>u C). + if (I.getOpcode() == Instruction::LShr && + ShiftOp->getOpcode() == Instruction::Shl) { + APInt Mask(APInt::getLowBitsSet(TypeBits, TypeBits - ShiftAmt1)); + return BinaryOperator::CreateAnd(X, + ConstantInt::get(I.getContext(), Mask)); + } + } else if (ShiftAmt1 < ShiftAmt2) { + uint32_t ShiftDiff = ShiftAmt2-ShiftAmt1; + + // (X >>?,exact C1) << C2 --> X << (C2-C1) + // The inexact version is deferred to DAGCombine so we don't hide shl + // behind a bit mask. + if (I.getOpcode() == Instruction::Shl && + ShiftOp->getOpcode() != Instruction::Shl && + ShiftOp->isExact()) { + assert(ShiftOp->getOpcode() == Instruction::LShr || + ShiftOp->getOpcode() == Instruction::AShr); + ConstantInt *ShiftDiffCst = ConstantInt::get(Ty, ShiftDiff); + BinaryOperator *NewShl = BinaryOperator::Create(Instruction::Shl, + X, ShiftDiffCst); + NewShl->setHasNoUnsignedWrap(I.hasNoUnsignedWrap()); + NewShl->setHasNoSignedWrap(I.hasNoSignedWrap()); + return NewShl; + } + + // (X << C1) >>u C2 --> X >>u (C2-C1) & (-1 >> C2) + if (I.getOpcode() == Instruction::LShr && + ShiftOp->getOpcode() == Instruction::Shl) { + ConstantInt *ShiftDiffCst = ConstantInt::get(Ty, ShiftDiff); + // (X <<nuw C1) >>u C2 --> X >>u (C2-C1) + if (ShiftOp->hasNoUnsignedWrap()) { + BinaryOperator *NewLShr = BinaryOperator::Create(Instruction::LShr, + X, ShiftDiffCst); + NewLShr->setIsExact(I.isExact()); + return NewLShr; + } + Value *Shift = Builder->CreateLShr(X, ShiftDiffCst); + + APInt Mask(APInt::getLowBitsSet(TypeBits, TypeBits - ShiftAmt2)); + return BinaryOperator::CreateAnd(Shift, + ConstantInt::get(I.getContext(),Mask)); + } + + // We can't handle (X << C1) >>s C2, it shifts arbitrary bits in. However, + // we can handle (X <<nsw C1) >>s C2 since it only shifts in sign bits. + if (I.getOpcode() == Instruction::AShr && + ShiftOp->getOpcode() == Instruction::Shl) { + if (ShiftOp->hasNoSignedWrap()) { + // (X <<nsw C1) >>s C2 --> X >>s (C2-C1) + ConstantInt *ShiftDiffCst = ConstantInt::get(Ty, ShiftDiff); + BinaryOperator *NewAShr = BinaryOperator::Create(Instruction::AShr, + X, ShiftDiffCst); + NewAShr->setIsExact(I.isExact()); + return NewAShr; + } + } + } else { + assert(ShiftAmt2 < ShiftAmt1); + uint32_t ShiftDiff = ShiftAmt1-ShiftAmt2; + + // (X >>?exact C1) << C2 --> X >>?exact (C1-C2) + // The inexact version is deferred to DAGCombine so we don't hide shl + // behind a bit mask. + if (I.getOpcode() == Instruction::Shl && + ShiftOp->getOpcode() != Instruction::Shl && + ShiftOp->isExact()) { + ConstantInt *ShiftDiffCst = ConstantInt::get(Ty, ShiftDiff); + BinaryOperator *NewShr = BinaryOperator::Create(ShiftOp->getOpcode(), + X, ShiftDiffCst); + NewShr->setIsExact(true); + return NewShr; + } + + // (X << C1) >>u C2 --> X << (C1-C2) & (-1 >> C2) + if (I.getOpcode() == Instruction::LShr && + ShiftOp->getOpcode() == Instruction::Shl) { + ConstantInt *ShiftDiffCst = ConstantInt::get(Ty, ShiftDiff); + if (ShiftOp->hasNoUnsignedWrap()) { + // (X <<nuw C1) >>u C2 --> X <<nuw (C1-C2) + BinaryOperator *NewShl = BinaryOperator::Create(Instruction::Shl, + X, ShiftDiffCst); + NewShl->setHasNoUnsignedWrap(true); + return NewShl; + } + Value *Shift = Builder->CreateShl(X, ShiftDiffCst); + + APInt Mask(APInt::getLowBitsSet(TypeBits, TypeBits - ShiftAmt2)); + return BinaryOperator::CreateAnd(Shift, + ConstantInt::get(I.getContext(),Mask)); + } + + // We can't handle (X << C1) >>s C2, it shifts arbitrary bits in. However, + // we can handle (X <<nsw C1) >>s C2 since it only shifts in sign bits. + if (I.getOpcode() == Instruction::AShr && + ShiftOp->getOpcode() == Instruction::Shl) { + if (ShiftOp->hasNoSignedWrap()) { + // (X <<nsw C1) >>s C2 --> X <<nsw (C1-C2) + ConstantInt *ShiftDiffCst = ConstantInt::get(Ty, ShiftDiff); + BinaryOperator *NewShl = BinaryOperator::Create(Instruction::Shl, + X, ShiftDiffCst); + NewShl->setHasNoSignedWrap(true); + return NewShl; + } + } + } + } + return 0; +} + +Instruction *InstCombiner::visitShl(BinaryOperator &I) { + if (Value *V = SimplifyShlInst(I.getOperand(0), I.getOperand(1), + I.hasNoSignedWrap(), I.hasNoUnsignedWrap(), + TD)) + return ReplaceInstUsesWith(I, V); + + if (Instruction *V = commonShiftTransforms(I)) + return V; + + if (ConstantInt *Op1C = dyn_cast<ConstantInt>(I.getOperand(1))) { + unsigned ShAmt = Op1C->getZExtValue(); + + // If the shifted-out value is known-zero, then this is a NUW shift. + if (!I.hasNoUnsignedWrap() && + MaskedValueIsZero(I.getOperand(0), + APInt::getHighBitsSet(Op1C->getBitWidth(), ShAmt))) { + I.setHasNoUnsignedWrap(); + return &I; + } + + // If the shifted out value is all signbits, this is a NSW shift. + if (!I.hasNoSignedWrap() && + ComputeNumSignBits(I.getOperand(0)) > ShAmt) { + I.setHasNoSignedWrap(); + return &I; + } + } + + // (C1 << A) << C2 -> (C1 << C2) << A + Constant *C1, *C2; + Value *A; + if (match(I.getOperand(0), m_OneUse(m_Shl(m_Constant(C1), m_Value(A)))) && + match(I.getOperand(1), m_Constant(C2))) + return BinaryOperator::CreateShl(ConstantExpr::getShl(C1, C2), A); + + return 0; +} + +Instruction *InstCombiner::visitLShr(BinaryOperator &I) { + if (Value *V = SimplifyLShrInst(I.getOperand(0), I.getOperand(1), + I.isExact(), TD)) + return ReplaceInstUsesWith(I, V); + + if (Instruction *R = commonShiftTransforms(I)) + return R; + + Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); + + if (ConstantInt *Op1C = dyn_cast<ConstantInt>(Op1)) { + unsigned ShAmt = Op1C->getZExtValue(); + + if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Op0)) { + unsigned BitWidth = Op0->getType()->getScalarSizeInBits(); + // ctlz.i32(x)>>5 --> zext(x == 0) + // cttz.i32(x)>>5 --> zext(x == 0) + // ctpop.i32(x)>>5 --> zext(x == -1) + if ((II->getIntrinsicID() == Intrinsic::ctlz || + II->getIntrinsicID() == Intrinsic::cttz || + II->getIntrinsicID() == Intrinsic::ctpop) && + isPowerOf2_32(BitWidth) && Log2_32(BitWidth) == ShAmt) { + bool isCtPop = II->getIntrinsicID() == Intrinsic::ctpop; + Constant *RHS = ConstantInt::getSigned(Op0->getType(), isCtPop ? -1:0); + Value *Cmp = Builder->CreateICmpEQ(II->getArgOperand(0), RHS); + return new ZExtInst(Cmp, II->getType()); + } + } + + // If the shifted-out value is known-zero, then this is an exact shift. + if (!I.isExact() && + MaskedValueIsZero(Op0,APInt::getLowBitsSet(Op1C->getBitWidth(),ShAmt))){ + I.setIsExact(); + return &I; + } + } + + return 0; +} + +Instruction *InstCombiner::visitAShr(BinaryOperator &I) { + if (Value *V = SimplifyAShrInst(I.getOperand(0), I.getOperand(1), + I.isExact(), TD)) + return ReplaceInstUsesWith(I, V); + + if (Instruction *R = commonShiftTransforms(I)) + return R; + + Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); + + if (ConstantInt *Op1C = dyn_cast<ConstantInt>(Op1)) { + unsigned ShAmt = Op1C->getZExtValue(); + + // If the input is a SHL by the same constant (ashr (shl X, C), C), then we + // have a sign-extend idiom. + Value *X; + if (match(Op0, m_Shl(m_Value(X), m_Specific(Op1)))) { + // If the left shift is just shifting out partial signbits, delete the + // extension. + if (cast<OverflowingBinaryOperator>(Op0)->hasNoSignedWrap()) + return ReplaceInstUsesWith(I, X); + + // If the input is an extension from the shifted amount value, e.g. + // %x = zext i8 %A to i32 + // %y = shl i32 %x, 24 + // %z = ashr %y, 24 + // then turn this into "z = sext i8 A to i32". + if (ZExtInst *ZI = dyn_cast<ZExtInst>(X)) { + uint32_t SrcBits = ZI->getOperand(0)->getType()->getScalarSizeInBits(); + uint32_t DestBits = ZI->getType()->getScalarSizeInBits(); + if (Op1C->getZExtValue() == DestBits-SrcBits) + return new SExtInst(ZI->getOperand(0), ZI->getType()); + } + } + + // If the shifted-out value is known-zero, then this is an exact shift. + if (!I.isExact() && + MaskedValueIsZero(Op0,APInt::getLowBitsSet(Op1C->getBitWidth(),ShAmt))){ + I.setIsExact(); + return &I; + } + } + + // See if we can turn a signed shr into an unsigned shr. + if (MaskedValueIsZero(Op0, + APInt::getSignBit(I.getType()->getScalarSizeInBits()))) + return BinaryOperator::CreateLShr(Op0, Op1); + + // Arithmetic shifting an all-sign-bit value is a no-op. + unsigned NumSignBits = ComputeNumSignBits(Op0); + if (NumSignBits == Op0->getType()->getScalarSizeInBits()) + return ReplaceInstUsesWith(I, Op0); + + return 0; +} + diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp new file mode 100644 index 000000000000..c831ddd3da77 --- /dev/null +++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp @@ -0,0 +1,1285 @@ +//===- InstCombineSimplifyDemanded.cpp ------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains logic for simplifying instructions based on information +// about how they are used. +// +//===----------------------------------------------------------------------===// + + +#include "InstCombine.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/Support/PatternMatch.h" + +using namespace llvm; +using namespace llvm::PatternMatch; + +/// ShrinkDemandedConstant - Check to see if the specified operand of the +/// specified instruction is a constant integer. If so, check to see if there +/// are any bits set in the constant that are not demanded. If so, shrink the +/// constant and return true. +static bool ShrinkDemandedConstant(Instruction *I, unsigned OpNo, + APInt Demanded) { + assert(I && "No instruction?"); + assert(OpNo < I->getNumOperands() && "Operand index too large"); + + // If the operand is not a constant integer, nothing to do. + ConstantInt *OpC = dyn_cast<ConstantInt>(I->getOperand(OpNo)); + if (!OpC) return false; + + // If there are no bits set that aren't demanded, nothing to do. + Demanded = Demanded.zextOrTrunc(OpC->getValue().getBitWidth()); + if ((~Demanded & OpC->getValue()) == 0) + return false; + + // This instruction is producing bits that are not demanded. Shrink the RHS. + Demanded &= OpC->getValue(); + I->setOperand(OpNo, ConstantInt::get(OpC->getType(), Demanded)); + return true; +} + + + +/// SimplifyDemandedInstructionBits - Inst is an integer instruction that +/// SimplifyDemandedBits knows about. See if the instruction has any +/// properties that allow us to simplify its operands. +bool InstCombiner::SimplifyDemandedInstructionBits(Instruction &Inst) { + unsigned BitWidth = Inst.getType()->getScalarSizeInBits(); + APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0); + APInt DemandedMask(APInt::getAllOnesValue(BitWidth)); + + Value *V = SimplifyDemandedUseBits(&Inst, DemandedMask, + KnownZero, KnownOne, 0); + if (V == 0) return false; + if (V == &Inst) return true; + ReplaceInstUsesWith(Inst, V); + return true; +} + +/// SimplifyDemandedBits - This form of SimplifyDemandedBits simplifies the +/// specified instruction operand if possible, updating it in place. It returns +/// true if it made any change and false otherwise. +bool InstCombiner::SimplifyDemandedBits(Use &U, APInt DemandedMask, + APInt &KnownZero, APInt &KnownOne, + unsigned Depth) { + Value *NewVal = SimplifyDemandedUseBits(U.get(), DemandedMask, + KnownZero, KnownOne, Depth); + if (NewVal == 0) return false; + U = NewVal; + return true; +} + + +/// SimplifyDemandedUseBits - This function attempts to replace V with a simpler +/// value based on the demanded bits. When this function is called, it is known +/// that only the bits set in DemandedMask of the result of V are ever used +/// downstream. Consequently, depending on the mask and V, it may be possible +/// to replace V with a constant or one of its operands. In such cases, this +/// function does the replacement and returns true. In all other cases, it +/// returns false after analyzing the expression and setting KnownOne and known +/// to be one in the expression. KnownZero contains all the bits that are known +/// to be zero in the expression. These are provided to potentially allow the +/// caller (which might recursively be SimplifyDemandedBits itself) to simplify +/// the expression. KnownOne and KnownZero always follow the invariant that +/// KnownOne & KnownZero == 0. That is, a bit can't be both 1 and 0. Note that +/// the bits in KnownOne and KnownZero may only be accurate for those bits set +/// in DemandedMask. Note also that the bitwidth of V, DemandedMask, KnownZero +/// and KnownOne must all be the same. +/// +/// This returns null if it did not change anything and it permits no +/// simplification. This returns V itself if it did some simplification of V's +/// operands based on the information about what bits are demanded. This returns +/// some other non-null value if it found out that V is equal to another value +/// in the context where the specified bits are demanded, but not for all users. +Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, + APInt &KnownZero, APInt &KnownOne, + unsigned Depth) { + assert(V != 0 && "Null pointer of Value???"); + assert(Depth <= 6 && "Limit Search Depth"); + uint32_t BitWidth = DemandedMask.getBitWidth(); + Type *VTy = V->getType(); + assert((TD || !VTy->isPointerTy()) && + "SimplifyDemandedBits needs to know bit widths!"); + assert((!TD || TD->getTypeSizeInBits(VTy->getScalarType()) == BitWidth) && + (!VTy->isIntOrIntVectorTy() || + VTy->getScalarSizeInBits() == BitWidth) && + KnownZero.getBitWidth() == BitWidth && + KnownOne.getBitWidth() == BitWidth && + "Value *V, DemandedMask, KnownZero and KnownOne " + "must have same BitWidth"); + if (ConstantInt *CI = dyn_cast<ConstantInt>(V)) { + // We know all of the bits for a constant! + KnownOne = CI->getValue() & DemandedMask; + KnownZero = ~KnownOne & DemandedMask; + return 0; + } + if (isa<ConstantPointerNull>(V)) { + // We know all of the bits for a constant! + KnownOne.clearAllBits(); + KnownZero = DemandedMask; + return 0; + } + + KnownZero.clearAllBits(); + KnownOne.clearAllBits(); + if (DemandedMask == 0) { // Not demanding any bits from V. + if (isa<UndefValue>(V)) + return 0; + return UndefValue::get(VTy); + } + + if (Depth == 6) // Limit search depth. + return 0; + + APInt LHSKnownZero(BitWidth, 0), LHSKnownOne(BitWidth, 0); + APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0); + + Instruction *I = dyn_cast<Instruction>(V); + if (!I) { + ComputeMaskedBits(V, KnownZero, KnownOne, Depth); + return 0; // Only analyze instructions. + } + + // If there are multiple uses of this value and we aren't at the root, then + // we can't do any simplifications of the operands, because DemandedMask + // only reflects the bits demanded by *one* of the users. + if (Depth != 0 && !I->hasOneUse()) { + // Despite the fact that we can't simplify this instruction in all User's + // context, we can at least compute the knownzero/knownone bits, and we can + // do simplifications that apply to *just* the one user if we know that + // this instruction has a simpler value in that context. + if (I->getOpcode() == Instruction::And) { + // If either the LHS or the RHS are Zero, the result is zero. + ComputeMaskedBits(I->getOperand(1), RHSKnownZero, RHSKnownOne, Depth+1); + ComputeMaskedBits(I->getOperand(0), LHSKnownZero, LHSKnownOne, Depth+1); + + // If all of the demanded bits are known 1 on one side, return the other. + // These bits cannot contribute to the result of the 'and' in this + // context. + if ((DemandedMask & ~LHSKnownZero & RHSKnownOne) == + (DemandedMask & ~LHSKnownZero)) + return I->getOperand(0); + if ((DemandedMask & ~RHSKnownZero & LHSKnownOne) == + (DemandedMask & ~RHSKnownZero)) + return I->getOperand(1); + + // If all of the demanded bits in the inputs are known zeros, return zero. + if ((DemandedMask & (RHSKnownZero|LHSKnownZero)) == DemandedMask) + return Constant::getNullValue(VTy); + + } else if (I->getOpcode() == Instruction::Or) { + // We can simplify (X|Y) -> X or Y in the user's context if we know that + // only bits from X or Y are demanded. + + // If either the LHS or the RHS are One, the result is One. + ComputeMaskedBits(I->getOperand(1), RHSKnownZero, RHSKnownOne, Depth+1); + ComputeMaskedBits(I->getOperand(0), LHSKnownZero, LHSKnownOne, Depth+1); + + // If all of the demanded bits are known zero on one side, return the + // other. These bits cannot contribute to the result of the 'or' in this + // context. + if ((DemandedMask & ~LHSKnownOne & RHSKnownZero) == + (DemandedMask & ~LHSKnownOne)) + return I->getOperand(0); + if ((DemandedMask & ~RHSKnownOne & LHSKnownZero) == + (DemandedMask & ~RHSKnownOne)) + return I->getOperand(1); + + // If all of the potentially set bits on one side are known to be set on + // the other side, just use the 'other' side. + if ((DemandedMask & (~RHSKnownZero) & LHSKnownOne) == + (DemandedMask & (~RHSKnownZero))) + return I->getOperand(0); + if ((DemandedMask & (~LHSKnownZero) & RHSKnownOne) == + (DemandedMask & (~LHSKnownZero))) + return I->getOperand(1); + } else if (I->getOpcode() == Instruction::Xor) { + // We can simplify (X^Y) -> X or Y in the user's context if we know that + // only bits from X or Y are demanded. + + ComputeMaskedBits(I->getOperand(1), RHSKnownZero, RHSKnownOne, Depth+1); + ComputeMaskedBits(I->getOperand(0), LHSKnownZero, LHSKnownOne, Depth+1); + + // If all of the demanded bits are known zero on one side, return the + // other. + if ((DemandedMask & RHSKnownZero) == DemandedMask) + return I->getOperand(0); + if ((DemandedMask & LHSKnownZero) == DemandedMask) + return I->getOperand(1); + } + + // Compute the KnownZero/KnownOne bits to simplify things downstream. + ComputeMaskedBits(I, KnownZero, KnownOne, Depth); + return 0; + } + + // If this is the root being simplified, allow it to have multiple uses, + // just set the DemandedMask to all bits so that we can try to simplify the + // operands. This allows visitTruncInst (for example) to simplify the + // operand of a trunc without duplicating all the logic below. + if (Depth == 0 && !V->hasOneUse()) + DemandedMask = APInt::getAllOnesValue(BitWidth); + + switch (I->getOpcode()) { + default: + ComputeMaskedBits(I, KnownZero, KnownOne, Depth); + break; + case Instruction::And: + // If either the LHS or the RHS are Zero, the result is zero. + if (SimplifyDemandedBits(I->getOperandUse(1), DemandedMask, + RHSKnownZero, RHSKnownOne, Depth+1) || + SimplifyDemandedBits(I->getOperandUse(0), DemandedMask & ~RHSKnownZero, + LHSKnownZero, LHSKnownOne, Depth+1)) + return I; + assert(!(RHSKnownZero & RHSKnownOne) && "Bits known to be one AND zero?"); + assert(!(LHSKnownZero & LHSKnownOne) && "Bits known to be one AND zero?"); + + // If all of the demanded bits are known 1 on one side, return the other. + // These bits cannot contribute to the result of the 'and'. + if ((DemandedMask & ~LHSKnownZero & RHSKnownOne) == + (DemandedMask & ~LHSKnownZero)) + return I->getOperand(0); + if ((DemandedMask & ~RHSKnownZero & LHSKnownOne) == + (DemandedMask & ~RHSKnownZero)) + return I->getOperand(1); + + // If all of the demanded bits in the inputs are known zeros, return zero. + if ((DemandedMask & (RHSKnownZero|LHSKnownZero)) == DemandedMask) + return Constant::getNullValue(VTy); + + // If the RHS is a constant, see if we can simplify it. + if (ShrinkDemandedConstant(I, 1, DemandedMask & ~LHSKnownZero)) + return I; + + // Output known-1 bits are only known if set in both the LHS & RHS. + KnownOne = RHSKnownOne & LHSKnownOne; + // Output known-0 are known to be clear if zero in either the LHS | RHS. + KnownZero = RHSKnownZero | LHSKnownZero; + break; + case Instruction::Or: + // If either the LHS or the RHS are One, the result is One. + if (SimplifyDemandedBits(I->getOperandUse(1), DemandedMask, + RHSKnownZero, RHSKnownOne, Depth+1) || + SimplifyDemandedBits(I->getOperandUse(0), DemandedMask & ~RHSKnownOne, + LHSKnownZero, LHSKnownOne, Depth+1)) + return I; + assert(!(RHSKnownZero & RHSKnownOne) && "Bits known to be one AND zero?"); + assert(!(LHSKnownZero & LHSKnownOne) && "Bits known to be one AND zero?"); + + // If all of the demanded bits are known zero on one side, return the other. + // These bits cannot contribute to the result of the 'or'. + if ((DemandedMask & ~LHSKnownOne & RHSKnownZero) == + (DemandedMask & ~LHSKnownOne)) + return I->getOperand(0); + if ((DemandedMask & ~RHSKnownOne & LHSKnownZero) == + (DemandedMask & ~RHSKnownOne)) + return I->getOperand(1); + + // If all of the potentially set bits on one side are known to be set on + // the other side, just use the 'other' side. + if ((DemandedMask & (~RHSKnownZero) & LHSKnownOne) == + (DemandedMask & (~RHSKnownZero))) + return I->getOperand(0); + if ((DemandedMask & (~LHSKnownZero) & RHSKnownOne) == + (DemandedMask & (~LHSKnownZero))) + return I->getOperand(1); + + // If the RHS is a constant, see if we can simplify it. + if (ShrinkDemandedConstant(I, 1, DemandedMask)) + return I; + + // Output known-0 bits are only known if clear in both the LHS & RHS. + KnownZero = RHSKnownZero & LHSKnownZero; + // Output known-1 are known to be set if set in either the LHS | RHS. + KnownOne = RHSKnownOne | LHSKnownOne; + break; + case Instruction::Xor: { + if (SimplifyDemandedBits(I->getOperandUse(1), DemandedMask, + RHSKnownZero, RHSKnownOne, Depth+1) || + SimplifyDemandedBits(I->getOperandUse(0), DemandedMask, + LHSKnownZero, LHSKnownOne, Depth+1)) + return I; + assert(!(RHSKnownZero & RHSKnownOne) && "Bits known to be one AND zero?"); + assert(!(LHSKnownZero & LHSKnownOne) && "Bits known to be one AND zero?"); + + // If all of the demanded bits are known zero on one side, return the other. + // These bits cannot contribute to the result of the 'xor'. + if ((DemandedMask & RHSKnownZero) == DemandedMask) + return I->getOperand(0); + if ((DemandedMask & LHSKnownZero) == DemandedMask) + return I->getOperand(1); + + // If all of the demanded bits are known to be zero on one side or the + // other, turn this into an *inclusive* or. + // e.g. (A & C1)^(B & C2) -> (A & C1)|(B & C2) iff C1&C2 == 0 + if ((DemandedMask & ~RHSKnownZero & ~LHSKnownZero) == 0) { + Instruction *Or = + BinaryOperator::CreateOr(I->getOperand(0), I->getOperand(1), + I->getName()); + return InsertNewInstWith(Or, *I); + } + + // If all of the demanded bits on one side are known, and all of the set + // bits on that side are also known to be set on the other side, turn this + // into an AND, as we know the bits will be cleared. + // e.g. (X | C1) ^ C2 --> (X | C1) & ~C2 iff (C1&C2) == C2 + if ((DemandedMask & (RHSKnownZero|RHSKnownOne)) == DemandedMask) { + // all known + if ((RHSKnownOne & LHSKnownOne) == RHSKnownOne) { + Constant *AndC = Constant::getIntegerValue(VTy, + ~RHSKnownOne & DemandedMask); + Instruction *And = BinaryOperator::CreateAnd(I->getOperand(0), AndC); + return InsertNewInstWith(And, *I); + } + } + + // If the RHS is a constant, see if we can simplify it. + // FIXME: for XOR, we prefer to force bits to 1 if they will make a -1. + if (ShrinkDemandedConstant(I, 1, DemandedMask)) + return I; + + // If our LHS is an 'and' and if it has one use, and if any of the bits we + // are flipping are known to be set, then the xor is just resetting those + // bits to zero. We can just knock out bits from the 'and' and the 'xor', + // simplifying both of them. + if (Instruction *LHSInst = dyn_cast<Instruction>(I->getOperand(0))) + if (LHSInst->getOpcode() == Instruction::And && LHSInst->hasOneUse() && + isa<ConstantInt>(I->getOperand(1)) && + isa<ConstantInt>(LHSInst->getOperand(1)) && + (LHSKnownOne & RHSKnownOne & DemandedMask) != 0) { + ConstantInt *AndRHS = cast<ConstantInt>(LHSInst->getOperand(1)); + ConstantInt *XorRHS = cast<ConstantInt>(I->getOperand(1)); + APInt NewMask = ~(LHSKnownOne & RHSKnownOne & DemandedMask); + + Constant *AndC = + ConstantInt::get(I->getType(), NewMask & AndRHS->getValue()); + Instruction *NewAnd = BinaryOperator::CreateAnd(I->getOperand(0), AndC); + InsertNewInstWith(NewAnd, *I); + + Constant *XorC = + ConstantInt::get(I->getType(), NewMask & XorRHS->getValue()); + Instruction *NewXor = BinaryOperator::CreateXor(NewAnd, XorC); + return InsertNewInstWith(NewXor, *I); + } + + // Output known-0 bits are known if clear or set in both the LHS & RHS. + KnownZero= (RHSKnownZero & LHSKnownZero) | (RHSKnownOne & LHSKnownOne); + // Output known-1 are known to be set if set in only one of the LHS, RHS. + KnownOne = (RHSKnownZero & LHSKnownOne) | (RHSKnownOne & LHSKnownZero); + break; + } + case Instruction::Select: + if (SimplifyDemandedBits(I->getOperandUse(2), DemandedMask, + RHSKnownZero, RHSKnownOne, Depth+1) || + SimplifyDemandedBits(I->getOperandUse(1), DemandedMask, + LHSKnownZero, LHSKnownOne, Depth+1)) + return I; + assert(!(RHSKnownZero & RHSKnownOne) && "Bits known to be one AND zero?"); + assert(!(LHSKnownZero & LHSKnownOne) && "Bits known to be one AND zero?"); + + // If the operands are constants, see if we can simplify them. + if (ShrinkDemandedConstant(I, 1, DemandedMask) || + ShrinkDemandedConstant(I, 2, DemandedMask)) + return I; + + // Only known if known in both the LHS and RHS. + KnownOne = RHSKnownOne & LHSKnownOne; + KnownZero = RHSKnownZero & LHSKnownZero; + break; + case Instruction::Trunc: { + unsigned truncBf = I->getOperand(0)->getType()->getScalarSizeInBits(); + DemandedMask = DemandedMask.zext(truncBf); + KnownZero = KnownZero.zext(truncBf); + KnownOne = KnownOne.zext(truncBf); + if (SimplifyDemandedBits(I->getOperandUse(0), DemandedMask, + KnownZero, KnownOne, Depth+1)) + return I; + DemandedMask = DemandedMask.trunc(BitWidth); + KnownZero = KnownZero.trunc(BitWidth); + KnownOne = KnownOne.trunc(BitWidth); + assert(!(KnownZero & KnownOne) && "Bits known to be one AND zero?"); + break; + } + case Instruction::BitCast: + if (!I->getOperand(0)->getType()->isIntOrIntVectorTy()) + return 0; // vector->int or fp->int? + + if (VectorType *DstVTy = dyn_cast<VectorType>(I->getType())) { + if (VectorType *SrcVTy = + dyn_cast<VectorType>(I->getOperand(0)->getType())) { + if (DstVTy->getNumElements() != SrcVTy->getNumElements()) + // Don't touch a bitcast between vectors of different element counts. + return 0; + } else + // Don't touch a scalar-to-vector bitcast. + return 0; + } else if (I->getOperand(0)->getType()->isVectorTy()) + // Don't touch a vector-to-scalar bitcast. + return 0; + + if (SimplifyDemandedBits(I->getOperandUse(0), DemandedMask, + KnownZero, KnownOne, Depth+1)) + return I; + assert(!(KnownZero & KnownOne) && "Bits known to be one AND zero?"); + break; + case Instruction::ZExt: { + // Compute the bits in the result that are not present in the input. + unsigned SrcBitWidth =I->getOperand(0)->getType()->getScalarSizeInBits(); + + DemandedMask = DemandedMask.trunc(SrcBitWidth); + KnownZero = KnownZero.trunc(SrcBitWidth); + KnownOne = KnownOne.trunc(SrcBitWidth); + if (SimplifyDemandedBits(I->getOperandUse(0), DemandedMask, + KnownZero, KnownOne, Depth+1)) + return I; + DemandedMask = DemandedMask.zext(BitWidth); + KnownZero = KnownZero.zext(BitWidth); + KnownOne = KnownOne.zext(BitWidth); + assert(!(KnownZero & KnownOne) && "Bits known to be one AND zero?"); + // The top bits are known to be zero. + KnownZero |= APInt::getHighBitsSet(BitWidth, BitWidth - SrcBitWidth); + break; + } + case Instruction::SExt: { + // Compute the bits in the result that are not present in the input. + unsigned SrcBitWidth =I->getOperand(0)->getType()->getScalarSizeInBits(); + + APInt InputDemandedBits = DemandedMask & + APInt::getLowBitsSet(BitWidth, SrcBitWidth); + + APInt NewBits(APInt::getHighBitsSet(BitWidth, BitWidth - SrcBitWidth)); + // If any of the sign extended bits are demanded, we know that the sign + // bit is demanded. + if ((NewBits & DemandedMask) != 0) + InputDemandedBits.setBit(SrcBitWidth-1); + + InputDemandedBits = InputDemandedBits.trunc(SrcBitWidth); + KnownZero = KnownZero.trunc(SrcBitWidth); + KnownOne = KnownOne.trunc(SrcBitWidth); + if (SimplifyDemandedBits(I->getOperandUse(0), InputDemandedBits, + KnownZero, KnownOne, Depth+1)) + return I; + InputDemandedBits = InputDemandedBits.zext(BitWidth); + KnownZero = KnownZero.zext(BitWidth); + KnownOne = KnownOne.zext(BitWidth); + assert(!(KnownZero & KnownOne) && "Bits known to be one AND zero?"); + + // If the sign bit of the input is known set or clear, then we know the + // top bits of the result. + + // If the input sign bit is known zero, or if the NewBits are not demanded + // convert this into a zero extension. + if (KnownZero[SrcBitWidth-1] || (NewBits & ~DemandedMask) == NewBits) { + // Convert to ZExt cast + CastInst *NewCast = new ZExtInst(I->getOperand(0), VTy, I->getName()); + return InsertNewInstWith(NewCast, *I); + } else if (KnownOne[SrcBitWidth-1]) { // Input sign bit known set + KnownOne |= NewBits; + } + break; + } + case Instruction::Add: { + // Figure out what the input bits are. If the top bits of the and result + // are not demanded, then the add doesn't demand them from its input + // either. + unsigned NLZ = DemandedMask.countLeadingZeros(); + + // If there is a constant on the RHS, there are a variety of xformations + // we can do. + if (ConstantInt *RHS = dyn_cast<ConstantInt>(I->getOperand(1))) { + // If null, this should be simplified elsewhere. Some of the xforms here + // won't work if the RHS is zero. + if (RHS->isZero()) + break; + + // If the top bit of the output is demanded, demand everything from the + // input. Otherwise, we demand all the input bits except NLZ top bits. + APInt InDemandedBits(APInt::getLowBitsSet(BitWidth, BitWidth - NLZ)); + + // Find information about known zero/one bits in the input. + if (SimplifyDemandedBits(I->getOperandUse(0), InDemandedBits, + LHSKnownZero, LHSKnownOne, Depth+1)) + return I; + + // If the RHS of the add has bits set that can't affect the input, reduce + // the constant. + if (ShrinkDemandedConstant(I, 1, InDemandedBits)) + return I; + + // Avoid excess work. + if (LHSKnownZero == 0 && LHSKnownOne == 0) + break; + + // Turn it into OR if input bits are zero. + if ((LHSKnownZero & RHS->getValue()) == RHS->getValue()) { + Instruction *Or = + BinaryOperator::CreateOr(I->getOperand(0), I->getOperand(1), + I->getName()); + return InsertNewInstWith(Or, *I); + } + + // We can say something about the output known-zero and known-one bits, + // depending on potential carries from the input constant and the + // unknowns. For example if the LHS is known to have at most the 0x0F0F0 + // bits set and the RHS constant is 0x01001, then we know we have a known + // one mask of 0x00001 and a known zero mask of 0xE0F0E. + + // To compute this, we first compute the potential carry bits. These are + // the bits which may be modified. I'm not aware of a better way to do + // this scan. + const APInt &RHSVal = RHS->getValue(); + APInt CarryBits((~LHSKnownZero + RHSVal) ^ (~LHSKnownZero ^ RHSVal)); + + // Now that we know which bits have carries, compute the known-1/0 sets. + + // Bits are known one if they are known zero in one operand and one in the + // other, and there is no input carry. + KnownOne = ((LHSKnownZero & RHSVal) | + (LHSKnownOne & ~RHSVal)) & ~CarryBits; + + // Bits are known zero if they are known zero in both operands and there + // is no input carry. + KnownZero = LHSKnownZero & ~RHSVal & ~CarryBits; + } else { + // If the high-bits of this ADD are not demanded, then it does not demand + // the high bits of its LHS or RHS. + if (DemandedMask[BitWidth-1] == 0) { + // Right fill the mask of bits for this ADD to demand the most + // significant bit and all those below it. + APInt DemandedFromOps(APInt::getLowBitsSet(BitWidth, BitWidth-NLZ)); + if (SimplifyDemandedBits(I->getOperandUse(0), DemandedFromOps, + LHSKnownZero, LHSKnownOne, Depth+1) || + SimplifyDemandedBits(I->getOperandUse(1), DemandedFromOps, + LHSKnownZero, LHSKnownOne, Depth+1)) + return I; + } + } + break; + } + case Instruction::Sub: + // If the high-bits of this SUB are not demanded, then it does not demand + // the high bits of its LHS or RHS. + if (DemandedMask[BitWidth-1] == 0) { + // Right fill the mask of bits for this SUB to demand the most + // significant bit and all those below it. + uint32_t NLZ = DemandedMask.countLeadingZeros(); + APInt DemandedFromOps(APInt::getLowBitsSet(BitWidth, BitWidth-NLZ)); + if (SimplifyDemandedBits(I->getOperandUse(0), DemandedFromOps, + LHSKnownZero, LHSKnownOne, Depth+1) || + SimplifyDemandedBits(I->getOperandUse(1), DemandedFromOps, + LHSKnownZero, LHSKnownOne, Depth+1)) + return I; + } + + // Otherwise just hand the sub off to ComputeMaskedBits to fill in + // the known zeros and ones. + ComputeMaskedBits(V, KnownZero, KnownOne, Depth); + + // Turn this into a xor if LHS is 2^n-1 and the remaining bits are known + // zero. + if (ConstantInt *C0 = dyn_cast<ConstantInt>(I->getOperand(0))) { + APInt I0 = C0->getValue(); + if ((I0 + 1).isPowerOf2() && (I0 | KnownZero).isAllOnesValue()) { + Instruction *Xor = BinaryOperator::CreateXor(I->getOperand(1), C0); + return InsertNewInstWith(Xor, *I); + } + } + break; + case Instruction::Shl: + if (ConstantInt *SA = dyn_cast<ConstantInt>(I->getOperand(1))) { + { + Value *VarX; ConstantInt *C1; + if (match(I->getOperand(0), m_Shr(m_Value(VarX), m_ConstantInt(C1)))) { + Instruction *Shr = cast<Instruction>(I->getOperand(0)); + Value *R = SimplifyShrShlDemandedBits(Shr, I, DemandedMask, + KnownZero, KnownOne); + if (R) + return R; + } + } + + uint64_t ShiftAmt = SA->getLimitedValue(BitWidth-1); + APInt DemandedMaskIn(DemandedMask.lshr(ShiftAmt)); + + // If the shift is NUW/NSW, then it does demand the high bits. + ShlOperator *IOp = cast<ShlOperator>(I); + if (IOp->hasNoSignedWrap()) + DemandedMaskIn |= APInt::getHighBitsSet(BitWidth, ShiftAmt+1); + else if (IOp->hasNoUnsignedWrap()) + DemandedMaskIn |= APInt::getHighBitsSet(BitWidth, ShiftAmt); + + if (SimplifyDemandedBits(I->getOperandUse(0), DemandedMaskIn, + KnownZero, KnownOne, Depth+1)) + return I; + assert(!(KnownZero & KnownOne) && "Bits known to be one AND zero?"); + KnownZero <<= ShiftAmt; + KnownOne <<= ShiftAmt; + // low bits known zero. + if (ShiftAmt) + KnownZero |= APInt::getLowBitsSet(BitWidth, ShiftAmt); + } + break; + case Instruction::LShr: + // For a logical shift right + if (ConstantInt *SA = dyn_cast<ConstantInt>(I->getOperand(1))) { + uint64_t ShiftAmt = SA->getLimitedValue(BitWidth-1); + + // Unsigned shift right. + APInt DemandedMaskIn(DemandedMask.shl(ShiftAmt)); + + // If the shift is exact, then it does demand the low bits (and knows that + // they are zero). + if (cast<LShrOperator>(I)->isExact()) + DemandedMaskIn |= APInt::getLowBitsSet(BitWidth, ShiftAmt); + + if (SimplifyDemandedBits(I->getOperandUse(0), DemandedMaskIn, + KnownZero, KnownOne, Depth+1)) + return I; + assert(!(KnownZero & KnownOne) && "Bits known to be one AND zero?"); + KnownZero = APIntOps::lshr(KnownZero, ShiftAmt); + KnownOne = APIntOps::lshr(KnownOne, ShiftAmt); + if (ShiftAmt) { + // Compute the new bits that are at the top now. + APInt HighBits(APInt::getHighBitsSet(BitWidth, ShiftAmt)); + KnownZero |= HighBits; // high bits known zero. + } + } + break; + case Instruction::AShr: + // If this is an arithmetic shift right and only the low-bit is set, we can + // always convert this into a logical shr, even if the shift amount is + // variable. The low bit of the shift cannot be an input sign bit unless + // the shift amount is >= the size of the datatype, which is undefined. + if (DemandedMask == 1) { + // Perform the logical shift right. + Instruction *NewVal = BinaryOperator::CreateLShr( + I->getOperand(0), I->getOperand(1), I->getName()); + return InsertNewInstWith(NewVal, *I); + } + + // If the sign bit is the only bit demanded by this ashr, then there is no + // need to do it, the shift doesn't change the high bit. + if (DemandedMask.isSignBit()) + return I->getOperand(0); + + if (ConstantInt *SA = dyn_cast<ConstantInt>(I->getOperand(1))) { + uint32_t ShiftAmt = SA->getLimitedValue(BitWidth-1); + + // Signed shift right. + APInt DemandedMaskIn(DemandedMask.shl(ShiftAmt)); + // If any of the "high bits" are demanded, we should set the sign bit as + // demanded. + if (DemandedMask.countLeadingZeros() <= ShiftAmt) + DemandedMaskIn.setBit(BitWidth-1); + + // If the shift is exact, then it does demand the low bits (and knows that + // they are zero). + if (cast<AShrOperator>(I)->isExact()) + DemandedMaskIn |= APInt::getLowBitsSet(BitWidth, ShiftAmt); + + if (SimplifyDemandedBits(I->getOperandUse(0), DemandedMaskIn, + KnownZero, KnownOne, Depth+1)) + return I; + assert(!(KnownZero & KnownOne) && "Bits known to be one AND zero?"); + // Compute the new bits that are at the top now. + APInt HighBits(APInt::getHighBitsSet(BitWidth, ShiftAmt)); + KnownZero = APIntOps::lshr(KnownZero, ShiftAmt); + KnownOne = APIntOps::lshr(KnownOne, ShiftAmt); + + // Handle the sign bits. + APInt SignBit(APInt::getSignBit(BitWidth)); + // Adjust to where it is now in the mask. + SignBit = APIntOps::lshr(SignBit, ShiftAmt); + + // If the input sign bit is known to be zero, or if none of the top bits + // are demanded, turn this into an unsigned shift right. + if (BitWidth <= ShiftAmt || KnownZero[BitWidth-ShiftAmt-1] || + (HighBits & ~DemandedMask) == HighBits) { + // Perform the logical shift right. + BinaryOperator *NewVal = BinaryOperator::CreateLShr(I->getOperand(0), + SA, I->getName()); + NewVal->setIsExact(cast<BinaryOperator>(I)->isExact()); + return InsertNewInstWith(NewVal, *I); + } else if ((KnownOne & SignBit) != 0) { // New bits are known one. + KnownOne |= HighBits; + } + } + break; + case Instruction::SRem: + if (ConstantInt *Rem = dyn_cast<ConstantInt>(I->getOperand(1))) { + // X % -1 demands all the bits because we don't want to introduce + // INT_MIN % -1 (== undef) by accident. + if (Rem->isAllOnesValue()) + break; + APInt RA = Rem->getValue().abs(); + if (RA.isPowerOf2()) { + if (DemandedMask.ult(RA)) // srem won't affect demanded bits + return I->getOperand(0); + + APInt LowBits = RA - 1; + APInt Mask2 = LowBits | APInt::getSignBit(BitWidth); + if (SimplifyDemandedBits(I->getOperandUse(0), Mask2, + LHSKnownZero, LHSKnownOne, Depth+1)) + return I; + + // The low bits of LHS are unchanged by the srem. + KnownZero = LHSKnownZero & LowBits; + KnownOne = LHSKnownOne & LowBits; + + // If LHS is non-negative or has all low bits zero, then the upper bits + // are all zero. + if (LHSKnownZero[BitWidth-1] || ((LHSKnownZero & LowBits) == LowBits)) + KnownZero |= ~LowBits; + + // If LHS is negative and not all low bits are zero, then the upper bits + // are all one. + if (LHSKnownOne[BitWidth-1] && ((LHSKnownOne & LowBits) != 0)) + KnownOne |= ~LowBits; + + assert(!(KnownZero & KnownOne) && "Bits known to be one AND zero?"); + } + } + + // The sign bit is the LHS's sign bit, except when the result of the + // remainder is zero. + if (DemandedMask.isNegative() && KnownZero.isNonNegative()) { + APInt LHSKnownZero(BitWidth, 0), LHSKnownOne(BitWidth, 0); + ComputeMaskedBits(I->getOperand(0), LHSKnownZero, LHSKnownOne, Depth+1); + // If it's known zero, our sign bit is also zero. + if (LHSKnownZero.isNegative()) + KnownZero.setBit(KnownZero.getBitWidth() - 1); + } + break; + case Instruction::URem: { + APInt KnownZero2(BitWidth, 0), KnownOne2(BitWidth, 0); + APInt AllOnes = APInt::getAllOnesValue(BitWidth); + if (SimplifyDemandedBits(I->getOperandUse(0), AllOnes, + KnownZero2, KnownOne2, Depth+1) || + SimplifyDemandedBits(I->getOperandUse(1), AllOnes, + KnownZero2, KnownOne2, Depth+1)) + return I; + + unsigned Leaders = KnownZero2.countLeadingOnes(); + Leaders = std::max(Leaders, + KnownZero2.countLeadingOnes()); + KnownZero = APInt::getHighBitsSet(BitWidth, Leaders) & DemandedMask; + break; + } + case Instruction::Call: + if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) { + switch (II->getIntrinsicID()) { + default: break; + case Intrinsic::bswap: { + // If the only bits demanded come from one byte of the bswap result, + // just shift the input byte into position to eliminate the bswap. + unsigned NLZ = DemandedMask.countLeadingZeros(); + unsigned NTZ = DemandedMask.countTrailingZeros(); + + // Round NTZ down to the next byte. If we have 11 trailing zeros, then + // we need all the bits down to bit 8. Likewise, round NLZ. If we + // have 14 leading zeros, round to 8. + NLZ &= ~7; + NTZ &= ~7; + // If we need exactly one byte, we can do this transformation. + if (BitWidth-NLZ-NTZ == 8) { + unsigned ResultBit = NTZ; + unsigned InputBit = BitWidth-NTZ-8; + + // Replace this with either a left or right shift to get the byte into + // the right place. + Instruction *NewVal; + if (InputBit > ResultBit) + NewVal = BinaryOperator::CreateLShr(II->getArgOperand(0), + ConstantInt::get(I->getType(), InputBit-ResultBit)); + else + NewVal = BinaryOperator::CreateShl(II->getArgOperand(0), + ConstantInt::get(I->getType(), ResultBit-InputBit)); + NewVal->takeName(I); + return InsertNewInstWith(NewVal, *I); + } + + // TODO: Could compute known zero/one bits based on the input. + break; + } + case Intrinsic::x86_sse42_crc32_64_64: + KnownZero = APInt::getHighBitsSet(64, 32); + return 0; + } + } + ComputeMaskedBits(V, KnownZero, KnownOne, Depth); + break; + } + + // If the client is only demanding bits that we know, return the known + // constant. + if ((DemandedMask & (KnownZero|KnownOne)) == DemandedMask) + return Constant::getIntegerValue(VTy, KnownOne); + return 0; +} + +/// Helper routine of SimplifyDemandedUseBits. It tries to simplify +/// "E1 = (X lsr C1) << C2", where the C1 and C2 are constant, into +/// "E2 = X << (C2 - C1)" or "E2 = X >> (C1 - C2)", depending on the sign +/// of "C2-C1". +/// +/// Suppose E1 and E2 are generally different in bits S={bm, bm+1, +/// ..., bn}, without considering the specific value X is holding. +/// This transformation is legal iff one of following conditions is hold: +/// 1) All the bit in S are 0, in this case E1 == E2. +/// 2) We don't care those bits in S, per the input DemandedMask. +/// 3) Combination of 1) and 2). Some bits in S are 0, and we don't care the +/// rest bits. +/// +/// Currently we only test condition 2). +/// +/// As with SimplifyDemandedUseBits, it returns NULL if the simplification was +/// not successful. +Value *InstCombiner::SimplifyShrShlDemandedBits(Instruction *Shr, + Instruction *Shl, APInt DemandedMask, APInt &KnownZero, APInt &KnownOne) { + + const APInt &ShlOp1 = cast<ConstantInt>(Shl->getOperand(1))->getValue(); + const APInt &ShrOp1 = cast<ConstantInt>(Shr->getOperand(1))->getValue(); + if (!ShlOp1 || !ShrOp1) + return 0; // Noop. + + Value *VarX = Shr->getOperand(0); + Type *Ty = VarX->getType(); + unsigned BitWidth = Ty->getIntegerBitWidth(); + if (ShlOp1.uge(BitWidth) || ShrOp1.uge(BitWidth)) + return 0; // Undef. + + unsigned ShlAmt = ShlOp1.getZExtValue(); + unsigned ShrAmt = ShrOp1.getZExtValue(); + + KnownOne.clearAllBits(); + KnownZero = APInt::getBitsSet(KnownZero.getBitWidth(), 0, ShlAmt-1); + KnownZero &= DemandedMask; + + APInt BitMask1(APInt::getAllOnesValue(BitWidth)); + APInt BitMask2(APInt::getAllOnesValue(BitWidth)); + + bool isLshr = (Shr->getOpcode() == Instruction::LShr); + BitMask1 = isLshr ? (BitMask1.lshr(ShrAmt) << ShlAmt) : + (BitMask1.ashr(ShrAmt) << ShlAmt); + + if (ShrAmt <= ShlAmt) { + BitMask2 <<= (ShlAmt - ShrAmt); + } else { + BitMask2 = isLshr ? BitMask2.lshr(ShrAmt - ShlAmt): + BitMask2.ashr(ShrAmt - ShlAmt); + } + + // Check if condition-2 (see the comment to this function) is satified. + if ((BitMask1 & DemandedMask) == (BitMask2 & DemandedMask)) { + if (ShrAmt == ShlAmt) + return VarX; + + if (!Shr->hasOneUse()) + return 0; + + BinaryOperator *New; + if (ShrAmt < ShlAmt) { + Constant *Amt = ConstantInt::get(VarX->getType(), ShlAmt - ShrAmt); + New = BinaryOperator::CreateShl(VarX, Amt); + BinaryOperator *Orig = cast<BinaryOperator>(Shl); + New->setHasNoSignedWrap(Orig->hasNoSignedWrap()); + New->setHasNoUnsignedWrap(Orig->hasNoUnsignedWrap()); + } else { + Constant *Amt = ConstantInt::get(VarX->getType(), ShrAmt - ShlAmt); + New = isLshr ? BinaryOperator::CreateLShr(VarX, Amt) : + BinaryOperator::CreateAShr(VarX, Amt); + if (cast<BinaryOperator>(Shr)->isExact()) + New->setIsExact(true); + } + + return InsertNewInstWith(New, *Shl); + } + + return 0; +} + +/// SimplifyDemandedVectorElts - The specified value produces a vector with +/// any number of elements. DemandedElts contains the set of elements that are +/// actually used by the caller. This method analyzes which elements of the +/// operand are undef and returns that information in UndefElts. +/// +/// If the information about demanded elements can be used to simplify the +/// operation, the operation is simplified, then the resultant value is +/// returned. This returns null if no change was made. +Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts, + APInt &UndefElts, + unsigned Depth) { + unsigned VWidth = cast<VectorType>(V->getType())->getNumElements(); + APInt EltMask(APInt::getAllOnesValue(VWidth)); + assert((DemandedElts & ~EltMask) == 0 && "Invalid DemandedElts!"); + + if (isa<UndefValue>(V)) { + // If the entire vector is undefined, just return this info. + UndefElts = EltMask; + return 0; + } + + if (DemandedElts == 0) { // If nothing is demanded, provide undef. + UndefElts = EltMask; + return UndefValue::get(V->getType()); + } + + UndefElts = 0; + + // Handle ConstantAggregateZero, ConstantVector, ConstantDataSequential. + if (Constant *C = dyn_cast<Constant>(V)) { + // Check if this is identity. If so, return 0 since we are not simplifying + // anything. + if (DemandedElts.isAllOnesValue()) + return 0; + + Type *EltTy = cast<VectorType>(V->getType())->getElementType(); + Constant *Undef = UndefValue::get(EltTy); + + SmallVector<Constant*, 16> Elts; + for (unsigned i = 0; i != VWidth; ++i) { + if (!DemandedElts[i]) { // If not demanded, set to undef. + Elts.push_back(Undef); + UndefElts.setBit(i); + continue; + } + + Constant *Elt = C->getAggregateElement(i); + if (Elt == 0) return 0; + + if (isa<UndefValue>(Elt)) { // Already undef. + Elts.push_back(Undef); + UndefElts.setBit(i); + } else { // Otherwise, defined. + Elts.push_back(Elt); + } + } + + // If we changed the constant, return it. + Constant *NewCV = ConstantVector::get(Elts); + return NewCV != C ? NewCV : 0; + } + + // Limit search depth. + if (Depth == 10) + return 0; + + // If multiple users are using the root value, proceed with + // simplification conservatively assuming that all elements + // are needed. + if (!V->hasOneUse()) { + // Quit if we find multiple users of a non-root value though. + // They'll be handled when it's their turn to be visited by + // the main instcombine process. + if (Depth != 0) + // TODO: Just compute the UndefElts information recursively. + return 0; + + // Conservatively assume that all elements are needed. + DemandedElts = EltMask; + } + + Instruction *I = dyn_cast<Instruction>(V); + if (!I) return 0; // Only analyze instructions. + + bool MadeChange = false; + APInt UndefElts2(VWidth, 0); + Value *TmpV; + switch (I->getOpcode()) { + default: break; + + case Instruction::InsertElement: { + // If this is a variable index, we don't know which element it overwrites. + // demand exactly the same input as we produce. + ConstantInt *Idx = dyn_cast<ConstantInt>(I->getOperand(2)); + if (Idx == 0) { + // Note that we can't propagate undef elt info, because we don't know + // which elt is getting updated. + TmpV = SimplifyDemandedVectorElts(I->getOperand(0), DemandedElts, + UndefElts2, Depth+1); + if (TmpV) { I->setOperand(0, TmpV); MadeChange = true; } + break; + } + + // If this is inserting an element that isn't demanded, remove this + // insertelement. + unsigned IdxNo = Idx->getZExtValue(); + if (IdxNo >= VWidth || !DemandedElts[IdxNo]) { + Worklist.Add(I); + return I->getOperand(0); + } + + // Otherwise, the element inserted overwrites whatever was there, so the + // input demanded set is simpler than the output set. + APInt DemandedElts2 = DemandedElts; + DemandedElts2.clearBit(IdxNo); + TmpV = SimplifyDemandedVectorElts(I->getOperand(0), DemandedElts2, + UndefElts, Depth+1); + if (TmpV) { I->setOperand(0, TmpV); MadeChange = true; } + + // The inserted element is defined. + UndefElts.clearBit(IdxNo); + break; + } + case Instruction::ShuffleVector: { + ShuffleVectorInst *Shuffle = cast<ShuffleVectorInst>(I); + uint64_t LHSVWidth = + cast<VectorType>(Shuffle->getOperand(0)->getType())->getNumElements(); + APInt LeftDemanded(LHSVWidth, 0), RightDemanded(LHSVWidth, 0); + for (unsigned i = 0; i < VWidth; i++) { + if (DemandedElts[i]) { + unsigned MaskVal = Shuffle->getMaskValue(i); + if (MaskVal != -1u) { + assert(MaskVal < LHSVWidth * 2 && + "shufflevector mask index out of range!"); + if (MaskVal < LHSVWidth) + LeftDemanded.setBit(MaskVal); + else + RightDemanded.setBit(MaskVal - LHSVWidth); + } + } + } + + APInt UndefElts4(LHSVWidth, 0); + TmpV = SimplifyDemandedVectorElts(I->getOperand(0), LeftDemanded, + UndefElts4, Depth+1); + if (TmpV) { I->setOperand(0, TmpV); MadeChange = true; } + + APInt UndefElts3(LHSVWidth, 0); + TmpV = SimplifyDemandedVectorElts(I->getOperand(1), RightDemanded, + UndefElts3, Depth+1); + if (TmpV) { I->setOperand(1, TmpV); MadeChange = true; } + + bool NewUndefElts = false; + for (unsigned i = 0; i < VWidth; i++) { + unsigned MaskVal = Shuffle->getMaskValue(i); + if (MaskVal == -1u) { + UndefElts.setBit(i); + } else if (!DemandedElts[i]) { + NewUndefElts = true; + UndefElts.setBit(i); + } else if (MaskVal < LHSVWidth) { + if (UndefElts4[MaskVal]) { + NewUndefElts = true; + UndefElts.setBit(i); + } + } else { + if (UndefElts3[MaskVal - LHSVWidth]) { + NewUndefElts = true; + UndefElts.setBit(i); + } + } + } + + if (NewUndefElts) { + // Add additional discovered undefs. + SmallVector<Constant*, 16> Elts; + for (unsigned i = 0; i < VWidth; ++i) { + if (UndefElts[i]) + Elts.push_back(UndefValue::get(Type::getInt32Ty(I->getContext()))); + else + Elts.push_back(ConstantInt::get(Type::getInt32Ty(I->getContext()), + Shuffle->getMaskValue(i))); + } + I->setOperand(2, ConstantVector::get(Elts)); + MadeChange = true; + } + break; + } + case Instruction::Select: { + APInt LeftDemanded(DemandedElts), RightDemanded(DemandedElts); + if (ConstantVector* CV = dyn_cast<ConstantVector>(I->getOperand(0))) { + for (unsigned i = 0; i < VWidth; i++) { + if (CV->getAggregateElement(i)->isNullValue()) + LeftDemanded.clearBit(i); + else + RightDemanded.clearBit(i); + } + } + + TmpV = SimplifyDemandedVectorElts(I->getOperand(1), LeftDemanded, + UndefElts, Depth+1); + if (TmpV) { I->setOperand(1, TmpV); MadeChange = true; } + + TmpV = SimplifyDemandedVectorElts(I->getOperand(2), RightDemanded, + UndefElts2, Depth+1); + if (TmpV) { I->setOperand(2, TmpV); MadeChange = true; } + + // Output elements are undefined if both are undefined. + UndefElts &= UndefElts2; + break; + } + case Instruction::BitCast: { + // Vector->vector casts only. + VectorType *VTy = dyn_cast<VectorType>(I->getOperand(0)->getType()); + if (!VTy) break; + unsigned InVWidth = VTy->getNumElements(); + APInt InputDemandedElts(InVWidth, 0); + unsigned Ratio; + + if (VWidth == InVWidth) { + // If we are converting from <4 x i32> -> <4 x f32>, we demand the same + // elements as are demanded of us. + Ratio = 1; + InputDemandedElts = DemandedElts; + } else if (VWidth > InVWidth) { + // Untested so far. + break; + + // If there are more elements in the result than there are in the source, + // then an input element is live if any of the corresponding output + // elements are live. + Ratio = VWidth/InVWidth; + for (unsigned OutIdx = 0; OutIdx != VWidth; ++OutIdx) { + if (DemandedElts[OutIdx]) + InputDemandedElts.setBit(OutIdx/Ratio); + } + } else { + // Untested so far. + break; + + // If there are more elements in the source than there are in the result, + // then an input element is live if the corresponding output element is + // live. + Ratio = InVWidth/VWidth; + for (unsigned InIdx = 0; InIdx != InVWidth; ++InIdx) + if (DemandedElts[InIdx/Ratio]) + InputDemandedElts.setBit(InIdx); + } + + // div/rem demand all inputs, because they don't want divide by zero. + TmpV = SimplifyDemandedVectorElts(I->getOperand(0), InputDemandedElts, + UndefElts2, Depth+1); + if (TmpV) { + I->setOperand(0, TmpV); + MadeChange = true; + } + + UndefElts = UndefElts2; + if (VWidth > InVWidth) { + llvm_unreachable("Unimp"); + // If there are more elements in the result than there are in the source, + // then an output element is undef if the corresponding input element is + // undef. + for (unsigned OutIdx = 0; OutIdx != VWidth; ++OutIdx) + if (UndefElts2[OutIdx/Ratio]) + UndefElts.setBit(OutIdx); + } else if (VWidth < InVWidth) { + llvm_unreachable("Unimp"); + // If there are more elements in the source than there are in the result, + // then a result element is undef if all of the corresponding input + // elements are undef. + UndefElts = ~0ULL >> (64-VWidth); // Start out all undef. + for (unsigned InIdx = 0; InIdx != InVWidth; ++InIdx) + if (!UndefElts2[InIdx]) // Not undef? + UndefElts.clearBit(InIdx/Ratio); // Clear undef bit. + } + break; + } + case Instruction::And: + case Instruction::Or: + case Instruction::Xor: + case Instruction::Add: + case Instruction::Sub: + case Instruction::Mul: + // div/rem demand all inputs, because they don't want divide by zero. + TmpV = SimplifyDemandedVectorElts(I->getOperand(0), DemandedElts, + UndefElts, Depth+1); + if (TmpV) { I->setOperand(0, TmpV); MadeChange = true; } + TmpV = SimplifyDemandedVectorElts(I->getOperand(1), DemandedElts, + UndefElts2, Depth+1); + if (TmpV) { I->setOperand(1, TmpV); MadeChange = true; } + + // Output elements are undefined if both are undefined. Consider things + // like undef&0. The result is known zero, not undef. + UndefElts &= UndefElts2; + break; + case Instruction::FPTrunc: + case Instruction::FPExt: + TmpV = SimplifyDemandedVectorElts(I->getOperand(0), DemandedElts, + UndefElts, Depth+1); + if (TmpV) { I->setOperand(0, TmpV); MadeChange = true; } + break; + + case Instruction::Call: { + IntrinsicInst *II = dyn_cast<IntrinsicInst>(I); + if (!II) break; + switch (II->getIntrinsicID()) { + default: break; + + // Binary vector operations that work column-wise. A dest element is a + // function of the corresponding input elements from the two inputs. + case Intrinsic::x86_sse_sub_ss: + case Intrinsic::x86_sse_mul_ss: + case Intrinsic::x86_sse_min_ss: + case Intrinsic::x86_sse_max_ss: + case Intrinsic::x86_sse2_sub_sd: + case Intrinsic::x86_sse2_mul_sd: + case Intrinsic::x86_sse2_min_sd: + case Intrinsic::x86_sse2_max_sd: + TmpV = SimplifyDemandedVectorElts(II->getArgOperand(0), DemandedElts, + UndefElts, Depth+1); + if (TmpV) { II->setArgOperand(0, TmpV); MadeChange = true; } + TmpV = SimplifyDemandedVectorElts(II->getArgOperand(1), DemandedElts, + UndefElts2, Depth+1); + if (TmpV) { II->setArgOperand(1, TmpV); MadeChange = true; } + + // If only the low elt is demanded and this is a scalarizable intrinsic, + // scalarize it now. + if (DemandedElts == 1) { + switch (II->getIntrinsicID()) { + default: break; + case Intrinsic::x86_sse_sub_ss: + case Intrinsic::x86_sse_mul_ss: + case Intrinsic::x86_sse2_sub_sd: + case Intrinsic::x86_sse2_mul_sd: + // TODO: Lower MIN/MAX/ABS/etc + Value *LHS = II->getArgOperand(0); + Value *RHS = II->getArgOperand(1); + // Extract the element as scalars. + LHS = InsertNewInstWith(ExtractElementInst::Create(LHS, + ConstantInt::get(Type::getInt32Ty(I->getContext()), 0U)), *II); + RHS = InsertNewInstWith(ExtractElementInst::Create(RHS, + ConstantInt::get(Type::getInt32Ty(I->getContext()), 0U)), *II); + + switch (II->getIntrinsicID()) { + default: llvm_unreachable("Case stmts out of sync!"); + case Intrinsic::x86_sse_sub_ss: + case Intrinsic::x86_sse2_sub_sd: + TmpV = InsertNewInstWith(BinaryOperator::CreateFSub(LHS, RHS, + II->getName()), *II); + break; + case Intrinsic::x86_sse_mul_ss: + case Intrinsic::x86_sse2_mul_sd: + TmpV = InsertNewInstWith(BinaryOperator::CreateFMul(LHS, RHS, + II->getName()), *II); + break; + } + + Instruction *New = + InsertElementInst::Create( + UndefValue::get(II->getType()), TmpV, + ConstantInt::get(Type::getInt32Ty(I->getContext()), 0U, false), + II->getName()); + InsertNewInstWith(New, *II); + return New; + } + } + + // Output elements are undefined if both are undefined. Consider things + // like undef&0. The result is known zero, not undef. + UndefElts &= UndefElts2; + break; + } + break; + } + } + return MadeChange ? I : 0; +} diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp new file mode 100644 index 000000000000..1e724106991a --- /dev/null +++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp @@ -0,0 +1,1046 @@ +//===- InstCombineVectorOps.cpp -------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements instcombine for ExtractElement, InsertElement and +// ShuffleVector. +// +//===----------------------------------------------------------------------===// + +#include "InstCombine.h" +#include "llvm/Support/PatternMatch.h" +using namespace llvm; +using namespace PatternMatch; + +/// CheapToScalarize - Return true if the value is cheaper to scalarize than it +/// is to leave as a vector operation. isConstant indicates whether we're +/// extracting one known element. If false we're extracting a variable index. +static bool CheapToScalarize(Value *V, bool isConstant) { + if (Constant *C = dyn_cast<Constant>(V)) { + if (isConstant) return true; + + // If all elts are the same, we can extract it and use any of the values. + Constant *Op0 = C->getAggregateElement(0U); + for (unsigned i = 1, e = V->getType()->getVectorNumElements(); i != e; ++i) + if (C->getAggregateElement(i) != Op0) + return false; + return true; + } + Instruction *I = dyn_cast<Instruction>(V); + if (!I) return false; + + // Insert element gets simplified to the inserted element or is deleted if + // this is constant idx extract element and its a constant idx insertelt. + if (I->getOpcode() == Instruction::InsertElement && isConstant && + isa<ConstantInt>(I->getOperand(2))) + return true; + if (I->getOpcode() == Instruction::Load && I->hasOneUse()) + return true; + if (BinaryOperator *BO = dyn_cast<BinaryOperator>(I)) + if (BO->hasOneUse() && + (CheapToScalarize(BO->getOperand(0), isConstant) || + CheapToScalarize(BO->getOperand(1), isConstant))) + return true; + if (CmpInst *CI = dyn_cast<CmpInst>(I)) + if (CI->hasOneUse() && + (CheapToScalarize(CI->getOperand(0), isConstant) || + CheapToScalarize(CI->getOperand(1), isConstant))) + return true; + + return false; +} + +/// FindScalarElement - Given a vector and an element number, see if the scalar +/// value is already around as a register, for example if it were inserted then +/// extracted from the vector. +static Value *FindScalarElement(Value *V, unsigned EltNo) { + assert(V->getType()->isVectorTy() && "Not looking at a vector?"); + VectorType *VTy = cast<VectorType>(V->getType()); + unsigned Width = VTy->getNumElements(); + if (EltNo >= Width) // Out of range access. + return UndefValue::get(VTy->getElementType()); + + if (Constant *C = dyn_cast<Constant>(V)) + return C->getAggregateElement(EltNo); + + if (InsertElementInst *III = dyn_cast<InsertElementInst>(V)) { + // If this is an insert to a variable element, we don't know what it is. + if (!isa<ConstantInt>(III->getOperand(2))) + return 0; + unsigned IIElt = cast<ConstantInt>(III->getOperand(2))->getZExtValue(); + + // If this is an insert to the element we are looking for, return the + // inserted value. + if (EltNo == IIElt) + return III->getOperand(1); + + // Otherwise, the insertelement doesn't modify the value, recurse on its + // vector input. + return FindScalarElement(III->getOperand(0), EltNo); + } + + if (ShuffleVectorInst *SVI = dyn_cast<ShuffleVectorInst>(V)) { + unsigned LHSWidth = SVI->getOperand(0)->getType()->getVectorNumElements(); + int InEl = SVI->getMaskValue(EltNo); + if (InEl < 0) + return UndefValue::get(VTy->getElementType()); + if (InEl < (int)LHSWidth) + return FindScalarElement(SVI->getOperand(0), InEl); + return FindScalarElement(SVI->getOperand(1), InEl - LHSWidth); + } + + // Extract a value from a vector add operation with a constant zero. + Value *Val = 0; Constant *Con = 0; + if (match(V, m_Add(m_Value(Val), m_Constant(Con)))) { + if (Con->getAggregateElement(EltNo)->isNullValue()) + return FindScalarElement(Val, EltNo); + } + + // Otherwise, we don't know. + return 0; +} + +// If we have a PHI node with a vector type that has only 2 uses: feed +// itself and be an operand of extractelement at a constant location, +// try to replace the PHI of the vector type with a PHI of a scalar type. +Instruction *InstCombiner::scalarizePHI(ExtractElementInst &EI, PHINode *PN) { + // Verify that the PHI node has exactly 2 uses. Otherwise return NULL. + if (!PN->hasNUses(2)) + return NULL; + + // If so, it's known at this point that one operand is PHI and the other is + // an extractelement node. Find the PHI user that is not the extractelement + // node. + Value::use_iterator iu = PN->use_begin(); + Instruction *PHIUser = dyn_cast<Instruction>(*iu); + if (PHIUser == cast<Instruction>(&EI)) + PHIUser = cast<Instruction>(*(++iu)); + + // Verify that this PHI user has one use, which is the PHI itself, + // and that it is a binary operation which is cheap to scalarize. + // otherwise return NULL. + if (!PHIUser->hasOneUse() || !(PHIUser->use_back() == PN) || + !(isa<BinaryOperator>(PHIUser)) || !CheapToScalarize(PHIUser, true)) + return NULL; + + // Create a scalar PHI node that will replace the vector PHI node + // just before the current PHI node. + PHINode *scalarPHI = cast<PHINode>(InsertNewInstWith( + PHINode::Create(EI.getType(), PN->getNumIncomingValues(), ""), *PN)); + // Scalarize each PHI operand. + for (unsigned i = 0; i < PN->getNumIncomingValues(); i++) { + Value *PHIInVal = PN->getIncomingValue(i); + BasicBlock *inBB = PN->getIncomingBlock(i); + Value *Elt = EI.getIndexOperand(); + // If the operand is the PHI induction variable: + if (PHIInVal == PHIUser) { + // Scalarize the binary operation. Its first operand is the + // scalar PHI and the second operand is extracted from the other + // vector operand. + BinaryOperator *B0 = cast<BinaryOperator>(PHIUser); + unsigned opId = (B0->getOperand(0) == PN) ? 1 : 0; + Value *Op = InsertNewInstWith( + ExtractElementInst::Create(B0->getOperand(opId), Elt, + B0->getOperand(opId)->getName() + ".Elt"), + *B0); + Value *newPHIUser = InsertNewInstWith( + BinaryOperator::Create(B0->getOpcode(), scalarPHI, Op), *B0); + scalarPHI->addIncoming(newPHIUser, inBB); + } else { + // Scalarize PHI input: + Instruction *newEI = ExtractElementInst::Create(PHIInVal, Elt, ""); + // Insert the new instruction into the predecessor basic block. + Instruction *pos = dyn_cast<Instruction>(PHIInVal); + BasicBlock::iterator InsertPos; + if (pos && !isa<PHINode>(pos)) { + InsertPos = pos; + ++InsertPos; + } else { + InsertPos = inBB->getFirstInsertionPt(); + } + + InsertNewInstWith(newEI, *InsertPos); + + scalarPHI->addIncoming(newEI, inBB); + } + } + return ReplaceInstUsesWith(EI, scalarPHI); +} + +Instruction *InstCombiner::visitExtractElementInst(ExtractElementInst &EI) { + // If vector val is constant with all elements the same, replace EI with + // that element. We handle a known element # below. + if (Constant *C = dyn_cast<Constant>(EI.getOperand(0))) + if (CheapToScalarize(C, false)) + return ReplaceInstUsesWith(EI, C->getAggregateElement(0U)); + + // If extracting a specified index from the vector, see if we can recursively + // find a previously computed scalar that was inserted into the vector. + if (ConstantInt *IdxC = dyn_cast<ConstantInt>(EI.getOperand(1))) { + unsigned IndexVal = IdxC->getZExtValue(); + unsigned VectorWidth = EI.getVectorOperandType()->getNumElements(); + + // If this is extracting an invalid index, turn this into undef, to avoid + // crashing the code below. + if (IndexVal >= VectorWidth) + return ReplaceInstUsesWith(EI, UndefValue::get(EI.getType())); + + // This instruction only demands the single element from the input vector. + // If the input vector has a single use, simplify it based on this use + // property. + if (EI.getOperand(0)->hasOneUse() && VectorWidth != 1) { + APInt UndefElts(VectorWidth, 0); + APInt DemandedMask(VectorWidth, 0); + DemandedMask.setBit(IndexVal); + if (Value *V = SimplifyDemandedVectorElts(EI.getOperand(0), + DemandedMask, UndefElts)) { + EI.setOperand(0, V); + return &EI; + } + } + + if (Value *Elt = FindScalarElement(EI.getOperand(0), IndexVal)) + return ReplaceInstUsesWith(EI, Elt); + + // If the this extractelement is directly using a bitcast from a vector of + // the same number of elements, see if we can find the source element from + // it. In this case, we will end up needing to bitcast the scalars. + if (BitCastInst *BCI = dyn_cast<BitCastInst>(EI.getOperand(0))) { + if (VectorType *VT = dyn_cast<VectorType>(BCI->getOperand(0)->getType())) + if (VT->getNumElements() == VectorWidth) + if (Value *Elt = FindScalarElement(BCI->getOperand(0), IndexVal)) + return new BitCastInst(Elt, EI.getType()); + } + + // If there's a vector PHI feeding a scalar use through this extractelement + // instruction, try to scalarize the PHI. + if (PHINode *PN = dyn_cast<PHINode>(EI.getOperand(0))) { + Instruction *scalarPHI = scalarizePHI(EI, PN); + if (scalarPHI) + return scalarPHI; + } + } + + if (Instruction *I = dyn_cast<Instruction>(EI.getOperand(0))) { + // Push extractelement into predecessor operation if legal and + // profitable to do so + if (BinaryOperator *BO = dyn_cast<BinaryOperator>(I)) { + if (I->hasOneUse() && + CheapToScalarize(BO, isa<ConstantInt>(EI.getOperand(1)))) { + Value *newEI0 = + Builder->CreateExtractElement(BO->getOperand(0), EI.getOperand(1), + EI.getName()+".lhs"); + Value *newEI1 = + Builder->CreateExtractElement(BO->getOperand(1), EI.getOperand(1), + EI.getName()+".rhs"); + return BinaryOperator::Create(BO->getOpcode(), newEI0, newEI1); + } + } else if (InsertElementInst *IE = dyn_cast<InsertElementInst>(I)) { + // Extracting the inserted element? + if (IE->getOperand(2) == EI.getOperand(1)) + return ReplaceInstUsesWith(EI, IE->getOperand(1)); + // If the inserted and extracted elements are constants, they must not + // be the same value, extract from the pre-inserted value instead. + if (isa<Constant>(IE->getOperand(2)) && isa<Constant>(EI.getOperand(1))) { + Worklist.AddValue(EI.getOperand(0)); + EI.setOperand(0, IE->getOperand(0)); + return &EI; + } + } else if (ShuffleVectorInst *SVI = dyn_cast<ShuffleVectorInst>(I)) { + // If this is extracting an element from a shufflevector, figure out where + // it came from and extract from the appropriate input element instead. + if (ConstantInt *Elt = dyn_cast<ConstantInt>(EI.getOperand(1))) { + int SrcIdx = SVI->getMaskValue(Elt->getZExtValue()); + Value *Src; + unsigned LHSWidth = + SVI->getOperand(0)->getType()->getVectorNumElements(); + + if (SrcIdx < 0) + return ReplaceInstUsesWith(EI, UndefValue::get(EI.getType())); + if (SrcIdx < (int)LHSWidth) + Src = SVI->getOperand(0); + else { + SrcIdx -= LHSWidth; + Src = SVI->getOperand(1); + } + Type *Int32Ty = Type::getInt32Ty(EI.getContext()); + return ExtractElementInst::Create(Src, + ConstantInt::get(Int32Ty, + SrcIdx, false)); + } + } else if (CastInst *CI = dyn_cast<CastInst>(I)) { + // Canonicalize extractelement(cast) -> cast(extractelement) + // bitcasts can change the number of vector elements and they cost nothing + if (CI->hasOneUse() && (CI->getOpcode() != Instruction::BitCast)) { + Value *EE = Builder->CreateExtractElement(CI->getOperand(0), + EI.getIndexOperand()); + Worklist.AddValue(EE); + return CastInst::Create(CI->getOpcode(), EE, EI.getType()); + } + } else if (SelectInst *SI = dyn_cast<SelectInst>(I)) { + if (SI->hasOneUse()) { + // TODO: For a select on vectors, it might be useful to do this if it + // has multiple extractelement uses. For vector select, that seems to + // fight the vectorizer. + + // If we are extracting an element from a vector select or a select on + // vectors, a select on the scalars extracted from the vector arguments. + Value *TrueVal = SI->getTrueValue(); + Value *FalseVal = SI->getFalseValue(); + + Value *Cond = SI->getCondition(); + if (Cond->getType()->isVectorTy()) { + Cond = Builder->CreateExtractElement(Cond, + EI.getIndexOperand(), + Cond->getName() + ".elt"); + } + + Value *V1Elem + = Builder->CreateExtractElement(TrueVal, + EI.getIndexOperand(), + TrueVal->getName() + ".elt"); + + Value *V2Elem + = Builder->CreateExtractElement(FalseVal, + EI.getIndexOperand(), + FalseVal->getName() + ".elt"); + return SelectInst::Create(Cond, + V1Elem, + V2Elem, + SI->getName() + ".elt"); + } + } + } + return 0; +} + +/// CollectSingleShuffleElements - If V is a shuffle of values that ONLY returns +/// elements from either LHS or RHS, return the shuffle mask and true. +/// Otherwise, return false. +static bool CollectSingleShuffleElements(Value *V, Value *LHS, Value *RHS, + SmallVectorImpl<Constant*> &Mask) { + assert(V->getType() == LHS->getType() && V->getType() == RHS->getType() && + "Invalid CollectSingleShuffleElements"); + unsigned NumElts = V->getType()->getVectorNumElements(); + + if (isa<UndefValue>(V)) { + Mask.assign(NumElts, UndefValue::get(Type::getInt32Ty(V->getContext()))); + return true; + } + + if (V == LHS) { + for (unsigned i = 0; i != NumElts; ++i) + Mask.push_back(ConstantInt::get(Type::getInt32Ty(V->getContext()), i)); + return true; + } + + if (V == RHS) { + for (unsigned i = 0; i != NumElts; ++i) + Mask.push_back(ConstantInt::get(Type::getInt32Ty(V->getContext()), + i+NumElts)); + return true; + } + + if (InsertElementInst *IEI = dyn_cast<InsertElementInst>(V)) { + // If this is an insert of an extract from some other vector, include it. + Value *VecOp = IEI->getOperand(0); + Value *ScalarOp = IEI->getOperand(1); + Value *IdxOp = IEI->getOperand(2); + + if (!isa<ConstantInt>(IdxOp)) + return false; + unsigned InsertedIdx = cast<ConstantInt>(IdxOp)->getZExtValue(); + + if (isa<UndefValue>(ScalarOp)) { // inserting undef into vector. + // Okay, we can handle this if the vector we are insertinting into is + // transitively ok. + if (CollectSingleShuffleElements(VecOp, LHS, RHS, Mask)) { + // If so, update the mask to reflect the inserted undef. + Mask[InsertedIdx] = UndefValue::get(Type::getInt32Ty(V->getContext())); + return true; + } + } else if (ExtractElementInst *EI = dyn_cast<ExtractElementInst>(ScalarOp)){ + if (isa<ConstantInt>(EI->getOperand(1)) && + EI->getOperand(0)->getType() == V->getType()) { + unsigned ExtractedIdx = + cast<ConstantInt>(EI->getOperand(1))->getZExtValue(); + + // This must be extracting from either LHS or RHS. + if (EI->getOperand(0) == LHS || EI->getOperand(0) == RHS) { + // Okay, we can handle this if the vector we are insertinting into is + // transitively ok. + if (CollectSingleShuffleElements(VecOp, LHS, RHS, Mask)) { + // If so, update the mask to reflect the inserted value. + if (EI->getOperand(0) == LHS) { + Mask[InsertedIdx % NumElts] = + ConstantInt::get(Type::getInt32Ty(V->getContext()), + ExtractedIdx); + } else { + assert(EI->getOperand(0) == RHS); + Mask[InsertedIdx % NumElts] = + ConstantInt::get(Type::getInt32Ty(V->getContext()), + ExtractedIdx+NumElts); + } + return true; + } + } + } + } + } + // TODO: Handle shufflevector here! + + return false; +} + +/// CollectShuffleElements - We are building a shuffle of V, using RHS as the +/// RHS of the shuffle instruction, if it is not null. Return a shuffle mask +/// that computes V and the LHS value of the shuffle. +static Value *CollectShuffleElements(Value *V, SmallVectorImpl<Constant*> &Mask, + Value *&RHS) { + assert(V->getType()->isVectorTy() && + (RHS == 0 || V->getType() == RHS->getType()) && + "Invalid shuffle!"); + unsigned NumElts = cast<VectorType>(V->getType())->getNumElements(); + + if (isa<UndefValue>(V)) { + Mask.assign(NumElts, UndefValue::get(Type::getInt32Ty(V->getContext()))); + return V; + } + + if (isa<ConstantAggregateZero>(V)) { + Mask.assign(NumElts, ConstantInt::get(Type::getInt32Ty(V->getContext()),0)); + return V; + } + + if (InsertElementInst *IEI = dyn_cast<InsertElementInst>(V)) { + // If this is an insert of an extract from some other vector, include it. + Value *VecOp = IEI->getOperand(0); + Value *ScalarOp = IEI->getOperand(1); + Value *IdxOp = IEI->getOperand(2); + + if (ExtractElementInst *EI = dyn_cast<ExtractElementInst>(ScalarOp)) { + if (isa<ConstantInt>(EI->getOperand(1)) && isa<ConstantInt>(IdxOp) && + EI->getOperand(0)->getType() == V->getType()) { + unsigned ExtractedIdx = + cast<ConstantInt>(EI->getOperand(1))->getZExtValue(); + unsigned InsertedIdx = cast<ConstantInt>(IdxOp)->getZExtValue(); + + // Either the extracted from or inserted into vector must be RHSVec, + // otherwise we'd end up with a shuffle of three inputs. + if (EI->getOperand(0) == RHS || RHS == 0) { + RHS = EI->getOperand(0); + Value *V = CollectShuffleElements(VecOp, Mask, RHS); + Mask[InsertedIdx % NumElts] = + ConstantInt::get(Type::getInt32Ty(V->getContext()), + NumElts+ExtractedIdx); + return V; + } + + if (VecOp == RHS) { + Value *V = CollectShuffleElements(EI->getOperand(0), Mask, RHS); + // Update Mask to reflect that `ScalarOp' has been inserted at + // position `InsertedIdx' within the vector returned by IEI. + Mask[InsertedIdx % NumElts] = Mask[ExtractedIdx]; + + // Everything but the extracted element is replaced with the RHS. + for (unsigned i = 0; i != NumElts; ++i) { + if (i != InsertedIdx) + Mask[i] = ConstantInt::get(Type::getInt32Ty(V->getContext()), + NumElts+i); + } + return V; + } + + // If this insertelement is a chain that comes from exactly these two + // vectors, return the vector and the effective shuffle. + if (CollectSingleShuffleElements(IEI, EI->getOperand(0), RHS, Mask)) + return EI->getOperand(0); + } + } + } + // TODO: Handle shufflevector here! + + // Otherwise, can't do anything fancy. Return an identity vector. + for (unsigned i = 0; i != NumElts; ++i) + Mask.push_back(ConstantInt::get(Type::getInt32Ty(V->getContext()), i)); + return V; +} + +Instruction *InstCombiner::visitInsertElementInst(InsertElementInst &IE) { + Value *VecOp = IE.getOperand(0); + Value *ScalarOp = IE.getOperand(1); + Value *IdxOp = IE.getOperand(2); + + // Inserting an undef or into an undefined place, remove this. + if (isa<UndefValue>(ScalarOp) || isa<UndefValue>(IdxOp)) + ReplaceInstUsesWith(IE, VecOp); + + // If the inserted element was extracted from some other vector, and if the + // indexes are constant, try to turn this into a shufflevector operation. + if (ExtractElementInst *EI = dyn_cast<ExtractElementInst>(ScalarOp)) { + if (isa<ConstantInt>(EI->getOperand(1)) && isa<ConstantInt>(IdxOp) && + EI->getOperand(0)->getType() == IE.getType()) { + unsigned NumVectorElts = IE.getType()->getNumElements(); + unsigned ExtractedIdx = + cast<ConstantInt>(EI->getOperand(1))->getZExtValue(); + unsigned InsertedIdx = cast<ConstantInt>(IdxOp)->getZExtValue(); + + if (ExtractedIdx >= NumVectorElts) // Out of range extract. + return ReplaceInstUsesWith(IE, VecOp); + + if (InsertedIdx >= NumVectorElts) // Out of range insert. + return ReplaceInstUsesWith(IE, UndefValue::get(IE.getType())); + + // If we are extracting a value from a vector, then inserting it right + // back into the same place, just use the input vector. + if (EI->getOperand(0) == VecOp && ExtractedIdx == InsertedIdx) + return ReplaceInstUsesWith(IE, VecOp); + + // If this insertelement isn't used by some other insertelement, turn it + // (and any insertelements it points to), into one big shuffle. + if (!IE.hasOneUse() || !isa<InsertElementInst>(IE.use_back())) { + SmallVector<Constant*, 16> Mask; + Value *RHS = 0; + Value *LHS = CollectShuffleElements(&IE, Mask, RHS); + if (RHS == 0) RHS = UndefValue::get(LHS->getType()); + // We now have a shuffle of LHS, RHS, Mask. + return new ShuffleVectorInst(LHS, RHS, ConstantVector::get(Mask)); + } + } + } + + unsigned VWidth = cast<VectorType>(VecOp->getType())->getNumElements(); + APInt UndefElts(VWidth, 0); + APInt AllOnesEltMask(APInt::getAllOnesValue(VWidth)); + if (Value *V = SimplifyDemandedVectorElts(&IE, AllOnesEltMask, UndefElts)) { + if (V != &IE) + return ReplaceInstUsesWith(IE, V); + return &IE; + } + + return 0; +} + +/// Return true if we can evaluate the specified expression tree if the vector +/// elements were shuffled in a different order. +static bool CanEvaluateShuffled(Value *V, ArrayRef<int> Mask, + unsigned Depth = 5) { + // We can always reorder the elements of a constant. + if (isa<Constant>(V)) + return true; + + // We won't reorder vector arguments. No IPO here. + Instruction *I = dyn_cast<Instruction>(V); + if (!I) return false; + + // Two users may expect different orders of the elements. Don't try it. + if (!I->hasOneUse()) + return false; + + if (Depth == 0) return false; + + switch (I->getOpcode()) { + case Instruction::Add: + case Instruction::FAdd: + case Instruction::Sub: + case Instruction::FSub: + case Instruction::Mul: + case Instruction::FMul: + case Instruction::UDiv: + case Instruction::SDiv: + case Instruction::FDiv: + case Instruction::URem: + case Instruction::SRem: + case Instruction::FRem: + case Instruction::Shl: + case Instruction::LShr: + case Instruction::AShr: + case Instruction::And: + case Instruction::Or: + case Instruction::Xor: + case Instruction::ICmp: + case Instruction::FCmp: + case Instruction::Trunc: + case Instruction::ZExt: + case Instruction::SExt: + case Instruction::FPToUI: + case Instruction::FPToSI: + case Instruction::UIToFP: + case Instruction::SIToFP: + case Instruction::FPTrunc: + case Instruction::FPExt: + case Instruction::GetElementPtr: { + for (int i = 0, e = I->getNumOperands(); i != e; ++i) { + if (!CanEvaluateShuffled(I->getOperand(i), Mask, Depth-1)) + return false; + } + return true; + } + case Instruction::InsertElement: { + ConstantInt *CI = dyn_cast<ConstantInt>(I->getOperand(2)); + if (!CI) return false; + int ElementNumber = CI->getLimitedValue(); + + // Verify that 'CI' does not occur twice in Mask. A single 'insertelement' + // can't put an element into multiple indices. + bool SeenOnce = false; + for (int i = 0, e = Mask.size(); i != e; ++i) { + if (Mask[i] == ElementNumber) { + if (SeenOnce) + return false; + SeenOnce = true; + } + } + return CanEvaluateShuffled(I->getOperand(0), Mask, Depth-1); + } + } + return false; +} + +/// Rebuild a new instruction just like 'I' but with the new operands given. +/// In the event of type mismatch, the type of the operands is correct. +static Value *BuildNew(Instruction *I, ArrayRef<Value*> NewOps) { + // We don't want to use the IRBuilder here because we want the replacement + // instructions to appear next to 'I', not the builder's insertion point. + switch (I->getOpcode()) { + case Instruction::Add: + case Instruction::FAdd: + case Instruction::Sub: + case Instruction::FSub: + case Instruction::Mul: + case Instruction::FMul: + case Instruction::UDiv: + case Instruction::SDiv: + case Instruction::FDiv: + case Instruction::URem: + case Instruction::SRem: + case Instruction::FRem: + case Instruction::Shl: + case Instruction::LShr: + case Instruction::AShr: + case Instruction::And: + case Instruction::Or: + case Instruction::Xor: { + BinaryOperator *BO = cast<BinaryOperator>(I); + assert(NewOps.size() == 2 && "binary operator with #ops != 2"); + BinaryOperator *New = + BinaryOperator::Create(cast<BinaryOperator>(I)->getOpcode(), + NewOps[0], NewOps[1], "", BO); + if (isa<OverflowingBinaryOperator>(BO)) { + New->setHasNoUnsignedWrap(BO->hasNoUnsignedWrap()); + New->setHasNoSignedWrap(BO->hasNoSignedWrap()); + } + if (isa<PossiblyExactOperator>(BO)) { + New->setIsExact(BO->isExact()); + } + return New; + } + case Instruction::ICmp: + assert(NewOps.size() == 2 && "icmp with #ops != 2"); + return new ICmpInst(I, cast<ICmpInst>(I)->getPredicate(), + NewOps[0], NewOps[1]); + case Instruction::FCmp: + assert(NewOps.size() == 2 && "fcmp with #ops != 2"); + return new FCmpInst(I, cast<FCmpInst>(I)->getPredicate(), + NewOps[0], NewOps[1]); + case Instruction::Trunc: + case Instruction::ZExt: + case Instruction::SExt: + case Instruction::FPToUI: + case Instruction::FPToSI: + case Instruction::UIToFP: + case Instruction::SIToFP: + case Instruction::FPTrunc: + case Instruction::FPExt: { + // It's possible that the mask has a different number of elements from + // the original cast. We recompute the destination type to match the mask. + Type *DestTy = + VectorType::get(I->getType()->getScalarType(), + NewOps[0]->getType()->getVectorNumElements()); + assert(NewOps.size() == 1 && "cast with #ops != 1"); + return CastInst::Create(cast<CastInst>(I)->getOpcode(), NewOps[0], DestTy, + "", I); + } + case Instruction::GetElementPtr: { + Value *Ptr = NewOps[0]; + ArrayRef<Value*> Idx = NewOps.slice(1); + GetElementPtrInst *GEP = GetElementPtrInst::Create(Ptr, Idx, "", I); + GEP->setIsInBounds(cast<GetElementPtrInst>(I)->isInBounds()); + return GEP; + } + } + llvm_unreachable("failed to rebuild vector instructions"); +} + +Value * +InstCombiner::EvaluateInDifferentElementOrder(Value *V, ArrayRef<int> Mask) { + // Mask.size() does not need to be equal to the number of vector elements. + + assert(V->getType()->isVectorTy() && "can't reorder non-vector elements"); + if (isa<UndefValue>(V)) { + return UndefValue::get(VectorType::get(V->getType()->getScalarType(), + Mask.size())); + } + if (isa<ConstantAggregateZero>(V)) { + return ConstantAggregateZero::get( + VectorType::get(V->getType()->getScalarType(), + Mask.size())); + } + if (Constant *C = dyn_cast<Constant>(V)) { + SmallVector<Constant *, 16> MaskValues; + for (int i = 0, e = Mask.size(); i != e; ++i) { + if (Mask[i] == -1) + MaskValues.push_back(UndefValue::get(Builder->getInt32Ty())); + else + MaskValues.push_back(Builder->getInt32(Mask[i])); + } + return ConstantExpr::getShuffleVector(C, UndefValue::get(C->getType()), + ConstantVector::get(MaskValues)); + } + + Instruction *I = cast<Instruction>(V); + switch (I->getOpcode()) { + case Instruction::Add: + case Instruction::FAdd: + case Instruction::Sub: + case Instruction::FSub: + case Instruction::Mul: + case Instruction::FMul: + case Instruction::UDiv: + case Instruction::SDiv: + case Instruction::FDiv: + case Instruction::URem: + case Instruction::SRem: + case Instruction::FRem: + case Instruction::Shl: + case Instruction::LShr: + case Instruction::AShr: + case Instruction::And: + case Instruction::Or: + case Instruction::Xor: + case Instruction::ICmp: + case Instruction::FCmp: + case Instruction::Trunc: + case Instruction::ZExt: + case Instruction::SExt: + case Instruction::FPToUI: + case Instruction::FPToSI: + case Instruction::UIToFP: + case Instruction::SIToFP: + case Instruction::FPTrunc: + case Instruction::FPExt: + case Instruction::Select: + case Instruction::GetElementPtr: { + SmallVector<Value*, 8> NewOps; + bool NeedsRebuild = (Mask.size() != I->getType()->getVectorNumElements()); + for (int i = 0, e = I->getNumOperands(); i != e; ++i) { + Value *V = EvaluateInDifferentElementOrder(I->getOperand(i), Mask); + NewOps.push_back(V); + NeedsRebuild |= (V != I->getOperand(i)); + } + if (NeedsRebuild) { + return BuildNew(I, NewOps); + } + return I; + } + case Instruction::InsertElement: { + int Element = cast<ConstantInt>(I->getOperand(2))->getLimitedValue(); + + // The insertelement was inserting at Element. Figure out which element + // that becomes after shuffling. The answer is guaranteed to be unique + // by CanEvaluateShuffled. + bool Found = false; + int Index = 0; + for (int e = Mask.size(); Index != e; ++Index) { + if (Mask[Index] == Element) { + Found = true; + break; + } + } + + if (!Found) + return UndefValue::get( + VectorType::get(V->getType()->getScalarType(), Mask.size())); + + Value *V = EvaluateInDifferentElementOrder(I->getOperand(0), Mask); + return InsertElementInst::Create(V, I->getOperand(1), + Builder->getInt32(Index), "", I); + } + } + llvm_unreachable("failed to reorder elements of vector instruction!"); +} + +Instruction *InstCombiner::visitShuffleVectorInst(ShuffleVectorInst &SVI) { + Value *LHS = SVI.getOperand(0); + Value *RHS = SVI.getOperand(1); + SmallVector<int, 16> Mask = SVI.getShuffleMask(); + + bool MadeChange = false; + + // Undefined shuffle mask -> undefined value. + if (isa<UndefValue>(SVI.getOperand(2))) + return ReplaceInstUsesWith(SVI, UndefValue::get(SVI.getType())); + + unsigned VWidth = cast<VectorType>(SVI.getType())->getNumElements(); + + APInt UndefElts(VWidth, 0); + APInt AllOnesEltMask(APInt::getAllOnesValue(VWidth)); + if (Value *V = SimplifyDemandedVectorElts(&SVI, AllOnesEltMask, UndefElts)) { + if (V != &SVI) + return ReplaceInstUsesWith(SVI, V); + LHS = SVI.getOperand(0); + RHS = SVI.getOperand(1); + MadeChange = true; + } + + unsigned LHSWidth = cast<VectorType>(LHS->getType())->getNumElements(); + + // Canonicalize shuffle(x ,x,mask) -> shuffle(x, undef,mask') + // Canonicalize shuffle(undef,x,mask) -> shuffle(x, undef,mask'). + if (LHS == RHS || isa<UndefValue>(LHS)) { + if (isa<UndefValue>(LHS) && LHS == RHS) { + // shuffle(undef,undef,mask) -> undef. + Value *Result = (VWidth == LHSWidth) + ? LHS : UndefValue::get(SVI.getType()); + return ReplaceInstUsesWith(SVI, Result); + } + + // Remap any references to RHS to use LHS. + SmallVector<Constant*, 16> Elts; + for (unsigned i = 0, e = LHSWidth; i != VWidth; ++i) { + if (Mask[i] < 0) { + Elts.push_back(UndefValue::get(Type::getInt32Ty(SVI.getContext()))); + continue; + } + + if ((Mask[i] >= (int)e && isa<UndefValue>(RHS)) || + (Mask[i] < (int)e && isa<UndefValue>(LHS))) { + Mask[i] = -1; // Turn into undef. + Elts.push_back(UndefValue::get(Type::getInt32Ty(SVI.getContext()))); + } else { + Mask[i] = Mask[i] % e; // Force to LHS. + Elts.push_back(ConstantInt::get(Type::getInt32Ty(SVI.getContext()), + Mask[i])); + } + } + SVI.setOperand(0, SVI.getOperand(1)); + SVI.setOperand(1, UndefValue::get(RHS->getType())); + SVI.setOperand(2, ConstantVector::get(Elts)); + LHS = SVI.getOperand(0); + RHS = SVI.getOperand(1); + MadeChange = true; + } + + if (VWidth == LHSWidth) { + // Analyze the shuffle, are the LHS or RHS and identity shuffles? + bool isLHSID = true, isRHSID = true; + + for (unsigned i = 0, e = Mask.size(); i != e; ++i) { + if (Mask[i] < 0) continue; // Ignore undef values. + // Is this an identity shuffle of the LHS value? + isLHSID &= (Mask[i] == (int)i); + + // Is this an identity shuffle of the RHS value? + isRHSID &= (Mask[i]-e == i); + } + + // Eliminate identity shuffles. + if (isLHSID) return ReplaceInstUsesWith(SVI, LHS); + if (isRHSID) return ReplaceInstUsesWith(SVI, RHS); + } + + if (isa<UndefValue>(RHS) && CanEvaluateShuffled(LHS, Mask)) { + Value *V = EvaluateInDifferentElementOrder(LHS, Mask); + return ReplaceInstUsesWith(SVI, V); + } + + // If the LHS is a shufflevector itself, see if we can combine it with this + // one without producing an unusual shuffle. + // Cases that might be simplified: + // 1. + // x1=shuffle(v1,v2,mask1) + // x=shuffle(x1,undef,mask) + // ==> + // x=shuffle(v1,undef,newMask) + // newMask[i] = (mask[i] < x1.size()) ? mask1[mask[i]] : -1 + // 2. + // x1=shuffle(v1,undef,mask1) + // x=shuffle(x1,x2,mask) + // where v1.size() == mask1.size() + // ==> + // x=shuffle(v1,x2,newMask) + // newMask[i] = (mask[i] < x1.size()) ? mask1[mask[i]] : mask[i] + // 3. + // x2=shuffle(v2,undef,mask2) + // x=shuffle(x1,x2,mask) + // where v2.size() == mask2.size() + // ==> + // x=shuffle(x1,v2,newMask) + // newMask[i] = (mask[i] < x1.size()) + // ? mask[i] : mask2[mask[i]-x1.size()]+x1.size() + // 4. + // x1=shuffle(v1,undef,mask1) + // x2=shuffle(v2,undef,mask2) + // x=shuffle(x1,x2,mask) + // where v1.size() == v2.size() + // ==> + // x=shuffle(v1,v2,newMask) + // newMask[i] = (mask[i] < x1.size()) + // ? mask1[mask[i]] : mask2[mask[i]-x1.size()]+v1.size() + // + // Here we are really conservative: + // we are absolutely afraid of producing a shuffle mask not in the input + // program, because the code gen may not be smart enough to turn a merged + // shuffle into two specific shuffles: it may produce worse code. As such, + // we only merge two shuffles if the result is either a splat or one of the + // input shuffle masks. In this case, merging the shuffles just removes + // one instruction, which we know is safe. This is good for things like + // turning: (splat(splat)) -> splat, or + // merge(V[0..n], V[n+1..2n]) -> V[0..2n] + ShuffleVectorInst* LHSShuffle = dyn_cast<ShuffleVectorInst>(LHS); + ShuffleVectorInst* RHSShuffle = dyn_cast<ShuffleVectorInst>(RHS); + if (LHSShuffle) + if (!isa<UndefValue>(LHSShuffle->getOperand(1)) && !isa<UndefValue>(RHS)) + LHSShuffle = NULL; + if (RHSShuffle) + if (!isa<UndefValue>(RHSShuffle->getOperand(1))) + RHSShuffle = NULL; + if (!LHSShuffle && !RHSShuffle) + return MadeChange ? &SVI : 0; + + Value* LHSOp0 = NULL; + Value* LHSOp1 = NULL; + Value* RHSOp0 = NULL; + unsigned LHSOp0Width = 0; + unsigned RHSOp0Width = 0; + if (LHSShuffle) { + LHSOp0 = LHSShuffle->getOperand(0); + LHSOp1 = LHSShuffle->getOperand(1); + LHSOp0Width = cast<VectorType>(LHSOp0->getType())->getNumElements(); + } + if (RHSShuffle) { + RHSOp0 = RHSShuffle->getOperand(0); + RHSOp0Width = cast<VectorType>(RHSOp0->getType())->getNumElements(); + } + Value* newLHS = LHS; + Value* newRHS = RHS; + if (LHSShuffle) { + // case 1 + if (isa<UndefValue>(RHS)) { + newLHS = LHSOp0; + newRHS = LHSOp1; + } + // case 2 or 4 + else if (LHSOp0Width == LHSWidth) { + newLHS = LHSOp0; + } + } + // case 3 or 4 + if (RHSShuffle && RHSOp0Width == LHSWidth) { + newRHS = RHSOp0; + } + // case 4 + if (LHSOp0 == RHSOp0) { + newLHS = LHSOp0; + newRHS = NULL; + } + + if (newLHS == LHS && newRHS == RHS) + return MadeChange ? &SVI : 0; + + SmallVector<int, 16> LHSMask; + SmallVector<int, 16> RHSMask; + if (newLHS != LHS) + LHSMask = LHSShuffle->getShuffleMask(); + if (RHSShuffle && newRHS != RHS) + RHSMask = RHSShuffle->getShuffleMask(); + + unsigned newLHSWidth = (newLHS != LHS) ? LHSOp0Width : LHSWidth; + SmallVector<int, 16> newMask; + bool isSplat = true; + int SplatElt = -1; + // Create a new mask for the new ShuffleVectorInst so that the new + // ShuffleVectorInst is equivalent to the original one. + for (unsigned i = 0; i < VWidth; ++i) { + int eltMask; + if (Mask[i] < 0) { + // This element is an undef value. + eltMask = -1; + } else if (Mask[i] < (int)LHSWidth) { + // This element is from left hand side vector operand. + // + // If LHS is going to be replaced (case 1, 2, or 4), calculate the + // new mask value for the element. + if (newLHS != LHS) { + eltMask = LHSMask[Mask[i]]; + // If the value selected is an undef value, explicitly specify it + // with a -1 mask value. + if (eltMask >= (int)LHSOp0Width && isa<UndefValue>(LHSOp1)) + eltMask = -1; + } else + eltMask = Mask[i]; + } else { + // This element is from right hand side vector operand + // + // If the value selected is an undef value, explicitly specify it + // with a -1 mask value. (case 1) + if (isa<UndefValue>(RHS)) + eltMask = -1; + // If RHS is going to be replaced (case 3 or 4), calculate the + // new mask value for the element. + else if (newRHS != RHS) { + eltMask = RHSMask[Mask[i]-LHSWidth]; + // If the value selected is an undef value, explicitly specify it + // with a -1 mask value. + if (eltMask >= (int)RHSOp0Width) { + assert(isa<UndefValue>(RHSShuffle->getOperand(1)) + && "should have been check above"); + eltMask = -1; + } + } else + eltMask = Mask[i]-LHSWidth; + + // If LHS's width is changed, shift the mask value accordingly. + // If newRHS == NULL, i.e. LHSOp0 == RHSOp0, we want to remap any + // references from RHSOp0 to LHSOp0, so we don't need to shift the mask. + // If newRHS == newLHS, we want to remap any references from newRHS to + // newLHS so that we can properly identify splats that may occur due to + // obfuscation accross the two vectors. + if (eltMask >= 0 && newRHS != NULL && newLHS != newRHS) + eltMask += newLHSWidth; + } + + // Check if this could still be a splat. + if (eltMask >= 0) { + if (SplatElt >= 0 && SplatElt != eltMask) + isSplat = false; + SplatElt = eltMask; + } + + newMask.push_back(eltMask); + } + + // If the result mask is equal to one of the original shuffle masks, + // or is a splat, do the replacement. + if (isSplat || newMask == LHSMask || newMask == RHSMask || newMask == Mask) { + SmallVector<Constant*, 16> Elts; + Type *Int32Ty = Type::getInt32Ty(SVI.getContext()); + for (unsigned i = 0, e = newMask.size(); i != e; ++i) { + if (newMask[i] < 0) { + Elts.push_back(UndefValue::get(Int32Ty)); + } else { + Elts.push_back(ConstantInt::get(Int32Ty, newMask[i])); + } + } + if (newRHS == NULL) + newRHS = UndefValue::get(newLHS->getType()); + return new ShuffleVectorInst(newLHS, newRHS, ConstantVector::get(Elts)); + } + + return MadeChange ? &SVI : 0; +} diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineWorklist.h b/contrib/llvm/lib/Transforms/InstCombine/InstCombineWorklist.h new file mode 100644 index 000000000000..f84db2730b43 --- /dev/null +++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineWorklist.h @@ -0,0 +1,105 @@ +//===- InstCombineWorklist.h - Worklist for InstCombine pass ----*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef INSTCOMBINE_WORKLIST_H +#define INSTCOMBINE_WORKLIST_H + +#define DEBUG_TYPE "instcombine" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/IR/Instruction.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" + +namespace llvm { + +/// InstCombineWorklist - This is the worklist management logic for +/// InstCombine. +class LLVM_LIBRARY_VISIBILITY InstCombineWorklist { + SmallVector<Instruction*, 256> Worklist; + DenseMap<Instruction*, unsigned> WorklistMap; + + void operator=(const InstCombineWorklist&RHS) LLVM_DELETED_FUNCTION; + InstCombineWorklist(const InstCombineWorklist&) LLVM_DELETED_FUNCTION; +public: + InstCombineWorklist() {} + + bool isEmpty() const { return Worklist.empty(); } + + /// Add - Add the specified instruction to the worklist if it isn't already + /// in it. + void Add(Instruction *I) { + if (WorklistMap.insert(std::make_pair(I, Worklist.size())).second) { + DEBUG(dbgs() << "IC: ADD: " << *I << '\n'); + Worklist.push_back(I); + } + } + + void AddValue(Value *V) { + if (Instruction *I = dyn_cast<Instruction>(V)) + Add(I); + } + + /// AddInitialGroup - Add the specified batch of stuff in reverse order. + /// which should only be done when the worklist is empty and when the group + /// has no duplicates. + void AddInitialGroup(Instruction *const *List, unsigned NumEntries) { + assert(Worklist.empty() && "Worklist must be empty to add initial group"); + Worklist.reserve(NumEntries+16); + WorklistMap.resize(NumEntries); + DEBUG(dbgs() << "IC: ADDING: " << NumEntries << " instrs to worklist\n"); + for (unsigned Idx = 0; NumEntries; --NumEntries) { + Instruction *I = List[NumEntries-1]; + WorklistMap.insert(std::make_pair(I, Idx++)); + Worklist.push_back(I); + } + } + + // Remove - remove I from the worklist if it exists. + void Remove(Instruction *I) { + DenseMap<Instruction*, unsigned>::iterator It = WorklistMap.find(I); + if (It == WorklistMap.end()) return; // Not in worklist. + + // Don't bother moving everything down, just null out the slot. + Worklist[It->second] = 0; + + WorklistMap.erase(It); + } + + Instruction *RemoveOne() { + Instruction *I = Worklist.pop_back_val(); + WorklistMap.erase(I); + return I; + } + + /// AddUsersToWorkList - When an instruction is simplified, add all users of + /// the instruction to the work lists because they might get more simplified + /// now. + /// + void AddUsersToWorkList(Instruction &I) { + for (Value::use_iterator UI = I.use_begin(), UE = I.use_end(); + UI != UE; ++UI) + Add(cast<Instruction>(*UI)); + } + + + /// Zap - check that the worklist is empty and nuke the backing store for + /// the map if it is large. + void Zap() { + assert(WorklistMap.empty() && "Worklist empty, but map not?"); + + // Do an explicit clear, this shrinks the map if needed. + WorklistMap.clear(); + } +}; + +} // end namespace llvm. + +#endif diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp new file mode 100644 index 000000000000..191a101e0a30 --- /dev/null +++ b/contrib/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp @@ -0,0 +1,2534 @@ +//===- InstructionCombining.cpp - Combine multiple instructions -----------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// InstructionCombining - Combine instructions to form fewer, simple +// instructions. This pass does not modify the CFG. This pass is where +// algebraic simplification happens. +// +// This pass combines things like: +// %Y = add i32 %X, 1 +// %Z = add i32 %Y, 1 +// into: +// %Z = add i32 %X, 2 +// +// This is a simple worklist driven algorithm. +// +// This pass guarantees that the following canonicalizations are performed on +// the program: +// 1. If a binary operator has a constant operand, it is moved to the RHS +// 2. Bitwise operators with constant operands are always grouped so that +// shifts are performed first, then or's, then and's, then xor's. +// 3. Compare instructions are converted from <,>,<=,>= to ==,!= if possible +// 4. All cmp instructions on boolean values are replaced with logical ops +// 5. add X, X is represented as (X*2) => (X << 1) +// 6. Multiplies with a power-of-two constant argument are transformed into +// shifts. +// ... etc. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "instcombine" +#include "llvm/Transforms/Scalar.h" +#include "InstCombine.h" +#include "llvm-c/Initialization.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/ADT/StringSwitch.h" +#include "llvm/Analysis/ConstantFolding.h" +#include "llvm/Analysis/InstructionSimplify.h" +#include "llvm/Analysis/MemoryBuiltins.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/Support/CFG.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/GetElementPtrTypeIterator.h" +#include "llvm/Support/PatternMatch.h" +#include "llvm/Support/ValueHandle.h" +#include "llvm/Target/TargetLibraryInfo.h" +#include "llvm/Transforms/Utils/Local.h" +#include <algorithm> +#include <climits> +using namespace llvm; +using namespace llvm::PatternMatch; + +STATISTIC(NumCombined , "Number of insts combined"); +STATISTIC(NumConstProp, "Number of constant folds"); +STATISTIC(NumDeadInst , "Number of dead inst eliminated"); +STATISTIC(NumSunkInst , "Number of instructions sunk"); +STATISTIC(NumExpand, "Number of expansions"); +STATISTIC(NumFactor , "Number of factorizations"); +STATISTIC(NumReassoc , "Number of reassociations"); + +static cl::opt<bool> UnsafeFPShrink("enable-double-float-shrink", cl::Hidden, + cl::init(false), + cl::desc("Enable unsafe double to float " + "shrinking for math lib calls")); + +// Initialization Routines +void llvm::initializeInstCombine(PassRegistry &Registry) { + initializeInstCombinerPass(Registry); +} + +void LLVMInitializeInstCombine(LLVMPassRegistryRef R) { + initializeInstCombine(*unwrap(R)); +} + +char InstCombiner::ID = 0; +INITIALIZE_PASS_BEGIN(InstCombiner, "instcombine", + "Combine redundant instructions", false, false) +INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo) +INITIALIZE_PASS_END(InstCombiner, "instcombine", + "Combine redundant instructions", false, false) + +void InstCombiner::getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesCFG(); + AU.addRequired<TargetLibraryInfo>(); +} + + +Value *InstCombiner::EmitGEPOffset(User *GEP) { + return llvm::EmitGEPOffset(Builder, *getDataLayout(), GEP); +} + +/// ShouldChangeType - Return true if it is desirable to convert a computation +/// from 'From' to 'To'. We don't want to convert from a legal to an illegal +/// type for example, or from a smaller to a larger illegal type. +bool InstCombiner::ShouldChangeType(Type *From, Type *To) const { + assert(From->isIntegerTy() && To->isIntegerTy()); + + // If we don't have TD, we don't know if the source/dest are legal. + if (!TD) return false; + + unsigned FromWidth = From->getPrimitiveSizeInBits(); + unsigned ToWidth = To->getPrimitiveSizeInBits(); + bool FromLegal = TD->isLegalInteger(FromWidth); + bool ToLegal = TD->isLegalInteger(ToWidth); + + // If this is a legal integer from type, and the result would be an illegal + // type, don't do the transformation. + if (FromLegal && !ToLegal) + return false; + + // Otherwise, if both are illegal, do not increase the size of the result. We + // do allow things like i160 -> i64, but not i64 -> i160. + if (!FromLegal && !ToLegal && ToWidth > FromWidth) + return false; + + return true; +} + +// Return true, if No Signed Wrap should be maintained for I. +// The No Signed Wrap flag can be kept if the operation "B (I.getOpcode) C", +// where both B and C should be ConstantInts, results in a constant that does +// not overflow. This function only handles the Add and Sub opcodes. For +// all other opcodes, the function conservatively returns false. +static bool MaintainNoSignedWrap(BinaryOperator &I, Value *B, Value *C) { + OverflowingBinaryOperator *OBO = dyn_cast<OverflowingBinaryOperator>(&I); + if (!OBO || !OBO->hasNoSignedWrap()) { + return false; + } + + // We reason about Add and Sub Only. + Instruction::BinaryOps Opcode = I.getOpcode(); + if (Opcode != Instruction::Add && + Opcode != Instruction::Sub) { + return false; + } + + ConstantInt *CB = dyn_cast<ConstantInt>(B); + ConstantInt *CC = dyn_cast<ConstantInt>(C); + + if (!CB || !CC) { + return false; + } + + const APInt &BVal = CB->getValue(); + const APInt &CVal = CC->getValue(); + bool Overflow = false; + + if (Opcode == Instruction::Add) { + BVal.sadd_ov(CVal, Overflow); + } else { + BVal.ssub_ov(CVal, Overflow); + } + + return !Overflow; +} + +/// Conservatively clears subclassOptionalData after a reassociation or +/// commutation. We preserve fast-math flags when applicable as they can be +/// preserved. +static void ClearSubclassDataAfterReassociation(BinaryOperator &I) { + FPMathOperator *FPMO = dyn_cast<FPMathOperator>(&I); + if (!FPMO) { + I.clearSubclassOptionalData(); + return; + } + + FastMathFlags FMF = I.getFastMathFlags(); + I.clearSubclassOptionalData(); + I.setFastMathFlags(FMF); +} + +/// SimplifyAssociativeOrCommutative - This performs a few simplifications for +/// operators which are associative or commutative: +// +// Commutative operators: +// +// 1. Order operands such that they are listed from right (least complex) to +// left (most complex). This puts constants before unary operators before +// binary operators. +// +// Associative operators: +// +// 2. Transform: "(A op B) op C" ==> "A op (B op C)" if "B op C" simplifies. +// 3. Transform: "A op (B op C)" ==> "(A op B) op C" if "A op B" simplifies. +// +// Associative and commutative operators: +// +// 4. Transform: "(A op B) op C" ==> "(C op A) op B" if "C op A" simplifies. +// 5. Transform: "A op (B op C)" ==> "B op (C op A)" if "C op A" simplifies. +// 6. Transform: "(A op C1) op (B op C2)" ==> "(A op B) op (C1 op C2)" +// if C1 and C2 are constants. +// +bool InstCombiner::SimplifyAssociativeOrCommutative(BinaryOperator &I) { + Instruction::BinaryOps Opcode = I.getOpcode(); + bool Changed = false; + + do { + // Order operands such that they are listed from right (least complex) to + // left (most complex). This puts constants before unary operators before + // binary operators. + if (I.isCommutative() && getComplexity(I.getOperand(0)) < + getComplexity(I.getOperand(1))) + Changed = !I.swapOperands(); + + BinaryOperator *Op0 = dyn_cast<BinaryOperator>(I.getOperand(0)); + BinaryOperator *Op1 = dyn_cast<BinaryOperator>(I.getOperand(1)); + + if (I.isAssociative()) { + // Transform: "(A op B) op C" ==> "A op (B op C)" if "B op C" simplifies. + if (Op0 && Op0->getOpcode() == Opcode) { + Value *A = Op0->getOperand(0); + Value *B = Op0->getOperand(1); + Value *C = I.getOperand(1); + + // Does "B op C" simplify? + if (Value *V = SimplifyBinOp(Opcode, B, C, TD)) { + // It simplifies to V. Form "A op V". + I.setOperand(0, A); + I.setOperand(1, V); + // Conservatively clear the optional flags, since they may not be + // preserved by the reassociation. + if (MaintainNoSignedWrap(I, B, C) && + (!Op0 || (isa<BinaryOperator>(Op0) && Op0->hasNoSignedWrap()))) { + // Note: this is only valid because SimplifyBinOp doesn't look at + // the operands to Op0. + I.clearSubclassOptionalData(); + I.setHasNoSignedWrap(true); + } else { + ClearSubclassDataAfterReassociation(I); + } + + Changed = true; + ++NumReassoc; + continue; + } + } + + // Transform: "A op (B op C)" ==> "(A op B) op C" if "A op B" simplifies. + if (Op1 && Op1->getOpcode() == Opcode) { + Value *A = I.getOperand(0); + Value *B = Op1->getOperand(0); + Value *C = Op1->getOperand(1); + + // Does "A op B" simplify? + if (Value *V = SimplifyBinOp(Opcode, A, B, TD)) { + // It simplifies to V. Form "V op C". + I.setOperand(0, V); + I.setOperand(1, C); + // Conservatively clear the optional flags, since they may not be + // preserved by the reassociation. + ClearSubclassDataAfterReassociation(I); + Changed = true; + ++NumReassoc; + continue; + } + } + } + + if (I.isAssociative() && I.isCommutative()) { + // Transform: "(A op B) op C" ==> "(C op A) op B" if "C op A" simplifies. + if (Op0 && Op0->getOpcode() == Opcode) { + Value *A = Op0->getOperand(0); + Value *B = Op0->getOperand(1); + Value *C = I.getOperand(1); + + // Does "C op A" simplify? + if (Value *V = SimplifyBinOp(Opcode, C, A, TD)) { + // It simplifies to V. Form "V op B". + I.setOperand(0, V); + I.setOperand(1, B); + // Conservatively clear the optional flags, since they may not be + // preserved by the reassociation. + ClearSubclassDataAfterReassociation(I); + Changed = true; + ++NumReassoc; + continue; + } + } + + // Transform: "A op (B op C)" ==> "B op (C op A)" if "C op A" simplifies. + if (Op1 && Op1->getOpcode() == Opcode) { + Value *A = I.getOperand(0); + Value *B = Op1->getOperand(0); + Value *C = Op1->getOperand(1); + + // Does "C op A" simplify? + if (Value *V = SimplifyBinOp(Opcode, C, A, TD)) { + // It simplifies to V. Form "B op V". + I.setOperand(0, B); + I.setOperand(1, V); + // Conservatively clear the optional flags, since they may not be + // preserved by the reassociation. + ClearSubclassDataAfterReassociation(I); + Changed = true; + ++NumReassoc; + continue; + } + } + + // Transform: "(A op C1) op (B op C2)" ==> "(A op B) op (C1 op C2)" + // if C1 and C2 are constants. + if (Op0 && Op1 && + Op0->getOpcode() == Opcode && Op1->getOpcode() == Opcode && + isa<Constant>(Op0->getOperand(1)) && + isa<Constant>(Op1->getOperand(1)) && + Op0->hasOneUse() && Op1->hasOneUse()) { + Value *A = Op0->getOperand(0); + Constant *C1 = cast<Constant>(Op0->getOperand(1)); + Value *B = Op1->getOperand(0); + Constant *C2 = cast<Constant>(Op1->getOperand(1)); + + Constant *Folded = ConstantExpr::get(Opcode, C1, C2); + BinaryOperator *New = BinaryOperator::Create(Opcode, A, B); + InsertNewInstWith(New, I); + New->takeName(Op1); + I.setOperand(0, New); + I.setOperand(1, Folded); + // Conservatively clear the optional flags, since they may not be + // preserved by the reassociation. + ClearSubclassDataAfterReassociation(I); + + Changed = true; + continue; + } + } + + // No further simplifications. + return Changed; + } while (1); +} + +/// LeftDistributesOverRight - Whether "X LOp (Y ROp Z)" is always equal to +/// "(X LOp Y) ROp (X LOp Z)". +static bool LeftDistributesOverRight(Instruction::BinaryOps LOp, + Instruction::BinaryOps ROp) { + switch (LOp) { + default: + return false; + + case Instruction::And: + // And distributes over Or and Xor. + switch (ROp) { + default: + return false; + case Instruction::Or: + case Instruction::Xor: + return true; + } + + case Instruction::Mul: + // Multiplication distributes over addition and subtraction. + switch (ROp) { + default: + return false; + case Instruction::Add: + case Instruction::Sub: + return true; + } + + case Instruction::Or: + // Or distributes over And. + switch (ROp) { + default: + return false; + case Instruction::And: + return true; + } + } +} + +/// RightDistributesOverLeft - Whether "(X LOp Y) ROp Z" is always equal to +/// "(X ROp Z) LOp (Y ROp Z)". +static bool RightDistributesOverLeft(Instruction::BinaryOps LOp, + Instruction::BinaryOps ROp) { + if (Instruction::isCommutative(ROp)) + return LeftDistributesOverRight(ROp, LOp); + // TODO: It would be nice to handle division, aka "(X + Y)/Z = X/Z + Y/Z", + // but this requires knowing that the addition does not overflow and other + // such subtleties. + return false; +} + +/// SimplifyUsingDistributiveLaws - This tries to simplify binary operations +/// which some other binary operation distributes over either by factorizing +/// out common terms (eg "(A*B)+(A*C)" -> "A*(B+C)") or expanding out if this +/// results in simplifications (eg: "A & (B | C) -> (A&B) | (A&C)" if this is +/// a win). Returns the simplified value, or null if it didn't simplify. +Value *InstCombiner::SimplifyUsingDistributiveLaws(BinaryOperator &I) { + Value *LHS = I.getOperand(0), *RHS = I.getOperand(1); + BinaryOperator *Op0 = dyn_cast<BinaryOperator>(LHS); + BinaryOperator *Op1 = dyn_cast<BinaryOperator>(RHS); + Instruction::BinaryOps TopLevelOpcode = I.getOpcode(); // op + + // Factorization. + if (Op0 && Op1 && Op0->getOpcode() == Op1->getOpcode()) { + // The instruction has the form "(A op' B) op (C op' D)". Try to factorize + // a common term. + Value *A = Op0->getOperand(0), *B = Op0->getOperand(1); + Value *C = Op1->getOperand(0), *D = Op1->getOperand(1); + Instruction::BinaryOps InnerOpcode = Op0->getOpcode(); // op' + + // Does "X op' Y" always equal "Y op' X"? + bool InnerCommutative = Instruction::isCommutative(InnerOpcode); + + // Does "X op' (Y op Z)" always equal "(X op' Y) op (X op' Z)"? + if (LeftDistributesOverRight(InnerOpcode, TopLevelOpcode)) + // Does the instruction have the form "(A op' B) op (A op' D)" or, in the + // commutative case, "(A op' B) op (C op' A)"? + if (A == C || (InnerCommutative && A == D)) { + if (A != C) + std::swap(C, D); + // Consider forming "A op' (B op D)". + // If "B op D" simplifies then it can be formed with no cost. + Value *V = SimplifyBinOp(TopLevelOpcode, B, D, TD); + // If "B op D" doesn't simplify then only go on if both of the existing + // operations "A op' B" and "C op' D" will be zapped as no longer used. + if (!V && Op0->hasOneUse() && Op1->hasOneUse()) + V = Builder->CreateBinOp(TopLevelOpcode, B, D, Op1->getName()); + if (V) { + ++NumFactor; + V = Builder->CreateBinOp(InnerOpcode, A, V); + V->takeName(&I); + return V; + } + } + + // Does "(X op Y) op' Z" always equal "(X op' Z) op (Y op' Z)"? + if (RightDistributesOverLeft(TopLevelOpcode, InnerOpcode)) + // Does the instruction have the form "(A op' B) op (C op' B)" or, in the + // commutative case, "(A op' B) op (B op' D)"? + if (B == D || (InnerCommutative && B == C)) { + if (B != D) + std::swap(C, D); + // Consider forming "(A op C) op' B". + // If "A op C" simplifies then it can be formed with no cost. + Value *V = SimplifyBinOp(TopLevelOpcode, A, C, TD); + // If "A op C" doesn't simplify then only go on if both of the existing + // operations "A op' B" and "C op' D" will be zapped as no longer used. + if (!V && Op0->hasOneUse() && Op1->hasOneUse()) + V = Builder->CreateBinOp(TopLevelOpcode, A, C, Op0->getName()); + if (V) { + ++NumFactor; + V = Builder->CreateBinOp(InnerOpcode, V, B); + V->takeName(&I); + return V; + } + } + } + + // Expansion. + if (Op0 && RightDistributesOverLeft(Op0->getOpcode(), TopLevelOpcode)) { + // The instruction has the form "(A op' B) op C". See if expanding it out + // to "(A op C) op' (B op C)" results in simplifications. + Value *A = Op0->getOperand(0), *B = Op0->getOperand(1), *C = RHS; + Instruction::BinaryOps InnerOpcode = Op0->getOpcode(); // op' + + // Do "A op C" and "B op C" both simplify? + if (Value *L = SimplifyBinOp(TopLevelOpcode, A, C, TD)) + if (Value *R = SimplifyBinOp(TopLevelOpcode, B, C, TD)) { + // They do! Return "L op' R". + ++NumExpand; + // If "L op' R" equals "A op' B" then "L op' R" is just the LHS. + if ((L == A && R == B) || + (Instruction::isCommutative(InnerOpcode) && L == B && R == A)) + return Op0; + // Otherwise return "L op' R" if it simplifies. + if (Value *V = SimplifyBinOp(InnerOpcode, L, R, TD)) + return V; + // Otherwise, create a new instruction. + C = Builder->CreateBinOp(InnerOpcode, L, R); + C->takeName(&I); + return C; + } + } + + if (Op1 && LeftDistributesOverRight(TopLevelOpcode, Op1->getOpcode())) { + // The instruction has the form "A op (B op' C)". See if expanding it out + // to "(A op B) op' (A op C)" results in simplifications. + Value *A = LHS, *B = Op1->getOperand(0), *C = Op1->getOperand(1); + Instruction::BinaryOps InnerOpcode = Op1->getOpcode(); // op' + + // Do "A op B" and "A op C" both simplify? + if (Value *L = SimplifyBinOp(TopLevelOpcode, A, B, TD)) + if (Value *R = SimplifyBinOp(TopLevelOpcode, A, C, TD)) { + // They do! Return "L op' R". + ++NumExpand; + // If "L op' R" equals "B op' C" then "L op' R" is just the RHS. + if ((L == B && R == C) || + (Instruction::isCommutative(InnerOpcode) && L == C && R == B)) + return Op1; + // Otherwise return "L op' R" if it simplifies. + if (Value *V = SimplifyBinOp(InnerOpcode, L, R, TD)) + return V; + // Otherwise, create a new instruction. + A = Builder->CreateBinOp(InnerOpcode, L, R); + A->takeName(&I); + return A; + } + } + + return 0; +} + +// dyn_castNegVal - Given a 'sub' instruction, return the RHS of the instruction +// if the LHS is a constant zero (which is the 'negate' form). +// +Value *InstCombiner::dyn_castNegVal(Value *V) const { + if (BinaryOperator::isNeg(V)) + return BinaryOperator::getNegArgument(V); + + // Constants can be considered to be negated values if they can be folded. + if (ConstantInt *C = dyn_cast<ConstantInt>(V)) + return ConstantExpr::getNeg(C); + + if (ConstantDataVector *C = dyn_cast<ConstantDataVector>(V)) + if (C->getType()->getElementType()->isIntegerTy()) + return ConstantExpr::getNeg(C); + + return 0; +} + +// dyn_castFNegVal - Given a 'fsub' instruction, return the RHS of the +// instruction if the LHS is a constant negative zero (which is the 'negate' +// form). +// +Value *InstCombiner::dyn_castFNegVal(Value *V, bool IgnoreZeroSign) const { + if (BinaryOperator::isFNeg(V, IgnoreZeroSign)) + return BinaryOperator::getFNegArgument(V); + + // Constants can be considered to be negated values if they can be folded. + if (ConstantFP *C = dyn_cast<ConstantFP>(V)) + return ConstantExpr::getFNeg(C); + + if (ConstantDataVector *C = dyn_cast<ConstantDataVector>(V)) + if (C->getType()->getElementType()->isFloatingPointTy()) + return ConstantExpr::getFNeg(C); + + return 0; +} + +static Value *FoldOperationIntoSelectOperand(Instruction &I, Value *SO, + InstCombiner *IC) { + if (CastInst *CI = dyn_cast<CastInst>(&I)) { + return IC->Builder->CreateCast(CI->getOpcode(), SO, I.getType()); + } + + // Figure out if the constant is the left or the right argument. + bool ConstIsRHS = isa<Constant>(I.getOperand(1)); + Constant *ConstOperand = cast<Constant>(I.getOperand(ConstIsRHS)); + + if (Constant *SOC = dyn_cast<Constant>(SO)) { + if (ConstIsRHS) + return ConstantExpr::get(I.getOpcode(), SOC, ConstOperand); + return ConstantExpr::get(I.getOpcode(), ConstOperand, SOC); + } + + Value *Op0 = SO, *Op1 = ConstOperand; + if (!ConstIsRHS) + std::swap(Op0, Op1); + + if (BinaryOperator *BO = dyn_cast<BinaryOperator>(&I)) + return IC->Builder->CreateBinOp(BO->getOpcode(), Op0, Op1, + SO->getName()+".op"); + if (ICmpInst *CI = dyn_cast<ICmpInst>(&I)) + return IC->Builder->CreateICmp(CI->getPredicate(), Op0, Op1, + SO->getName()+".cmp"); + if (FCmpInst *CI = dyn_cast<FCmpInst>(&I)) + return IC->Builder->CreateICmp(CI->getPredicate(), Op0, Op1, + SO->getName()+".cmp"); + llvm_unreachable("Unknown binary instruction type!"); +} + +// FoldOpIntoSelect - Given an instruction with a select as one operand and a +// constant as the other operand, try to fold the binary operator into the +// select arguments. This also works for Cast instructions, which obviously do +// not have a second operand. +Instruction *InstCombiner::FoldOpIntoSelect(Instruction &Op, SelectInst *SI) { + // Don't modify shared select instructions + if (!SI->hasOneUse()) return 0; + Value *TV = SI->getOperand(1); + Value *FV = SI->getOperand(2); + + if (isa<Constant>(TV) || isa<Constant>(FV)) { + // Bool selects with constant operands can be folded to logical ops. + if (SI->getType()->isIntegerTy(1)) return 0; + + // If it's a bitcast involving vectors, make sure it has the same number of + // elements on both sides. + if (BitCastInst *BC = dyn_cast<BitCastInst>(&Op)) { + VectorType *DestTy = dyn_cast<VectorType>(BC->getDestTy()); + VectorType *SrcTy = dyn_cast<VectorType>(BC->getSrcTy()); + + // Verify that either both or neither are vectors. + if ((SrcTy == NULL) != (DestTy == NULL)) return 0; + // If vectors, verify that they have the same number of elements. + if (SrcTy && SrcTy->getNumElements() != DestTy->getNumElements()) + return 0; + } + + Value *SelectTrueVal = FoldOperationIntoSelectOperand(Op, TV, this); + Value *SelectFalseVal = FoldOperationIntoSelectOperand(Op, FV, this); + + return SelectInst::Create(SI->getCondition(), + SelectTrueVal, SelectFalseVal); + } + return 0; +} + + +/// FoldOpIntoPhi - Given a binary operator, cast instruction, or select which +/// has a PHI node as operand #0, see if we can fold the instruction into the +/// PHI (which is only possible if all operands to the PHI are constants). +/// +Instruction *InstCombiner::FoldOpIntoPhi(Instruction &I) { + PHINode *PN = cast<PHINode>(I.getOperand(0)); + unsigned NumPHIValues = PN->getNumIncomingValues(); + if (NumPHIValues == 0) + return 0; + + // We normally only transform phis with a single use. However, if a PHI has + // multiple uses and they are all the same operation, we can fold *all* of the + // uses into the PHI. + if (!PN->hasOneUse()) { + // Walk the use list for the instruction, comparing them to I. + for (Value::use_iterator UI = PN->use_begin(), E = PN->use_end(); + UI != E; ++UI) { + Instruction *User = cast<Instruction>(*UI); + if (User != &I && !I.isIdenticalTo(User)) + return 0; + } + // Otherwise, we can replace *all* users with the new PHI we form. + } + + // Check to see if all of the operands of the PHI are simple constants + // (constantint/constantfp/undef). If there is one non-constant value, + // remember the BB it is in. If there is more than one or if *it* is a PHI, + // bail out. We don't do arbitrary constant expressions here because moving + // their computation can be expensive without a cost model. + BasicBlock *NonConstBB = 0; + for (unsigned i = 0; i != NumPHIValues; ++i) { + Value *InVal = PN->getIncomingValue(i); + if (isa<Constant>(InVal) && !isa<ConstantExpr>(InVal)) + continue; + + if (isa<PHINode>(InVal)) return 0; // Itself a phi. + if (NonConstBB) return 0; // More than one non-const value. + + NonConstBB = PN->getIncomingBlock(i); + + // If the InVal is an invoke at the end of the pred block, then we can't + // insert a computation after it without breaking the edge. + if (InvokeInst *II = dyn_cast<InvokeInst>(InVal)) + if (II->getParent() == NonConstBB) + return 0; + + // If the incoming non-constant value is in I's block, we will remove one + // instruction, but insert another equivalent one, leading to infinite + // instcombine. + if (NonConstBB == I.getParent()) + return 0; + } + + // If there is exactly one non-constant value, we can insert a copy of the + // operation in that block. However, if this is a critical edge, we would be + // inserting the computation one some other paths (e.g. inside a loop). Only + // do this if the pred block is unconditionally branching into the phi block. + if (NonConstBB != 0) { + BranchInst *BI = dyn_cast<BranchInst>(NonConstBB->getTerminator()); + if (!BI || !BI->isUnconditional()) return 0; + } + + // Okay, we can do the transformation: create the new PHI node. + PHINode *NewPN = PHINode::Create(I.getType(), PN->getNumIncomingValues()); + InsertNewInstBefore(NewPN, *PN); + NewPN->takeName(PN); + + // If we are going to have to insert a new computation, do so right before the + // predecessors terminator. + if (NonConstBB) + Builder->SetInsertPoint(NonConstBB->getTerminator()); + + // Next, add all of the operands to the PHI. + if (SelectInst *SI = dyn_cast<SelectInst>(&I)) { + // We only currently try to fold the condition of a select when it is a phi, + // not the true/false values. + Value *TrueV = SI->getTrueValue(); + Value *FalseV = SI->getFalseValue(); + BasicBlock *PhiTransBB = PN->getParent(); + for (unsigned i = 0; i != NumPHIValues; ++i) { + BasicBlock *ThisBB = PN->getIncomingBlock(i); + Value *TrueVInPred = TrueV->DoPHITranslation(PhiTransBB, ThisBB); + Value *FalseVInPred = FalseV->DoPHITranslation(PhiTransBB, ThisBB); + Value *InV = 0; + // Beware of ConstantExpr: it may eventually evaluate to getNullValue, + // even if currently isNullValue gives false. + Constant *InC = dyn_cast<Constant>(PN->getIncomingValue(i)); + if (InC && !isa<ConstantExpr>(InC)) + InV = InC->isNullValue() ? FalseVInPred : TrueVInPred; + else + InV = Builder->CreateSelect(PN->getIncomingValue(i), + TrueVInPred, FalseVInPred, "phitmp"); + NewPN->addIncoming(InV, ThisBB); + } + } else if (CmpInst *CI = dyn_cast<CmpInst>(&I)) { + Constant *C = cast<Constant>(I.getOperand(1)); + for (unsigned i = 0; i != NumPHIValues; ++i) { + Value *InV = 0; + if (Constant *InC = dyn_cast<Constant>(PN->getIncomingValue(i))) + InV = ConstantExpr::getCompare(CI->getPredicate(), InC, C); + else if (isa<ICmpInst>(CI)) + InV = Builder->CreateICmp(CI->getPredicate(), PN->getIncomingValue(i), + C, "phitmp"); + else + InV = Builder->CreateFCmp(CI->getPredicate(), PN->getIncomingValue(i), + C, "phitmp"); + NewPN->addIncoming(InV, PN->getIncomingBlock(i)); + } + } else if (I.getNumOperands() == 2) { + Constant *C = cast<Constant>(I.getOperand(1)); + for (unsigned i = 0; i != NumPHIValues; ++i) { + Value *InV = 0; + if (Constant *InC = dyn_cast<Constant>(PN->getIncomingValue(i))) + InV = ConstantExpr::get(I.getOpcode(), InC, C); + else + InV = Builder->CreateBinOp(cast<BinaryOperator>(I).getOpcode(), + PN->getIncomingValue(i), C, "phitmp"); + NewPN->addIncoming(InV, PN->getIncomingBlock(i)); + } + } else { + CastInst *CI = cast<CastInst>(&I); + Type *RetTy = CI->getType(); + for (unsigned i = 0; i != NumPHIValues; ++i) { + Value *InV; + if (Constant *InC = dyn_cast<Constant>(PN->getIncomingValue(i))) + InV = ConstantExpr::getCast(CI->getOpcode(), InC, RetTy); + else + InV = Builder->CreateCast(CI->getOpcode(), + PN->getIncomingValue(i), I.getType(), "phitmp"); + NewPN->addIncoming(InV, PN->getIncomingBlock(i)); + } + } + + for (Value::use_iterator UI = PN->use_begin(), E = PN->use_end(); + UI != E; ) { + Instruction *User = cast<Instruction>(*UI++); + if (User == &I) continue; + ReplaceInstUsesWith(*User, NewPN); + EraseInstFromFunction(*User); + } + return ReplaceInstUsesWith(I, NewPN); +} + +/// FindElementAtOffset - Given a pointer type and a constant offset, determine +/// whether or not there is a sequence of GEP indices into the pointed type that +/// will land us at the specified offset. If so, fill them into NewIndices and +/// return the resultant element type, otherwise return null. +Type *InstCombiner::FindElementAtOffset(Type *PtrTy, int64_t Offset, + SmallVectorImpl<Value*> &NewIndices) { + assert(PtrTy->isPtrOrPtrVectorTy()); + + if (!TD) + return 0; + + Type *Ty = PtrTy->getPointerElementType(); + if (!Ty->isSized()) + return 0; + + // Start with the index over the outer type. Note that the type size + // might be zero (even if the offset isn't zero) if the indexed type + // is something like [0 x {int, int}] + Type *IntPtrTy = TD->getIntPtrType(PtrTy); + int64_t FirstIdx = 0; + if (int64_t TySize = TD->getTypeAllocSize(Ty)) { + FirstIdx = Offset/TySize; + Offset -= FirstIdx*TySize; + + // Handle hosts where % returns negative instead of values [0..TySize). + if (Offset < 0) { + --FirstIdx; + Offset += TySize; + assert(Offset >= 0); + } + assert((uint64_t)Offset < (uint64_t)TySize && "Out of range offset"); + } + + NewIndices.push_back(ConstantInt::get(IntPtrTy, FirstIdx)); + + // Index into the types. If we fail, set OrigBase to null. + while (Offset) { + // Indexing into tail padding between struct/array elements. + if (uint64_t(Offset*8) >= TD->getTypeSizeInBits(Ty)) + return 0; + + if (StructType *STy = dyn_cast<StructType>(Ty)) { + const StructLayout *SL = TD->getStructLayout(STy); + assert(Offset < (int64_t)SL->getSizeInBytes() && + "Offset must stay within the indexed type"); + + unsigned Elt = SL->getElementContainingOffset(Offset); + NewIndices.push_back(ConstantInt::get(Type::getInt32Ty(Ty->getContext()), + Elt)); + + Offset -= SL->getElementOffset(Elt); + Ty = STy->getElementType(Elt); + } else if (ArrayType *AT = dyn_cast<ArrayType>(Ty)) { + uint64_t EltSize = TD->getTypeAllocSize(AT->getElementType()); + assert(EltSize && "Cannot index into a zero-sized array"); + NewIndices.push_back(ConstantInt::get(IntPtrTy,Offset/EltSize)); + Offset %= EltSize; + Ty = AT->getElementType(); + } else { + // Otherwise, we can't index into the middle of this atomic type, bail. + return 0; + } + } + + return Ty; +} + +static bool shouldMergeGEPs(GEPOperator &GEP, GEPOperator &Src) { + // If this GEP has only 0 indices, it is the same pointer as + // Src. If Src is not a trivial GEP too, don't combine + // the indices. + if (GEP.hasAllZeroIndices() && !Src.hasAllZeroIndices() && + !Src.hasOneUse()) + return false; + return true; +} + +/// Descale - Return a value X such that Val = X * Scale, or null if none. If +/// the multiplication is known not to overflow then NoSignedWrap is set. +Value *InstCombiner::Descale(Value *Val, APInt Scale, bool &NoSignedWrap) { + assert(isa<IntegerType>(Val->getType()) && "Can only descale integers!"); + assert(cast<IntegerType>(Val->getType())->getBitWidth() == + Scale.getBitWidth() && "Scale not compatible with value!"); + + // If Val is zero or Scale is one then Val = Val * Scale. + if (match(Val, m_Zero()) || Scale == 1) { + NoSignedWrap = true; + return Val; + } + + // If Scale is zero then it does not divide Val. + if (Scale.isMinValue()) + return 0; + + // Look through chains of multiplications, searching for a constant that is + // divisible by Scale. For example, descaling X*(Y*(Z*4)) by a factor of 4 + // will find the constant factor 4 and produce X*(Y*Z). Descaling X*(Y*8) by + // a factor of 4 will produce X*(Y*2). The principle of operation is to bore + // down from Val: + // + // Val = M1 * X || Analysis starts here and works down + // M1 = M2 * Y || Doesn't descend into terms with more + // M2 = Z * 4 \/ than one use + // + // Then to modify a term at the bottom: + // + // Val = M1 * X + // M1 = Z * Y || Replaced M2 with Z + // + // Then to work back up correcting nsw flags. + + // Op - the term we are currently analyzing. Starts at Val then drills down. + // Replaced with its descaled value before exiting from the drill down loop. + Value *Op = Val; + + // Parent - initially null, but after drilling down notes where Op came from. + // In the example above, Parent is (Val, 0) when Op is M1, because M1 is the + // 0'th operand of Val. + std::pair<Instruction*, unsigned> Parent; + + // RequireNoSignedWrap - Set if the transform requires a descaling at deeper + // levels that doesn't overflow. + bool RequireNoSignedWrap = false; + + // logScale - log base 2 of the scale. Negative if not a power of 2. + int32_t logScale = Scale.exactLogBase2(); + + for (;; Op = Parent.first->getOperand(Parent.second)) { // Drill down + + if (ConstantInt *CI = dyn_cast<ConstantInt>(Op)) { + // If Op is a constant divisible by Scale then descale to the quotient. + APInt Quotient(Scale), Remainder(Scale); // Init ensures right bitwidth. + APInt::sdivrem(CI->getValue(), Scale, Quotient, Remainder); + if (!Remainder.isMinValue()) + // Not divisible by Scale. + return 0; + // Replace with the quotient in the parent. + Op = ConstantInt::get(CI->getType(), Quotient); + NoSignedWrap = true; + break; + } + + if (BinaryOperator *BO = dyn_cast<BinaryOperator>(Op)) { + + if (BO->getOpcode() == Instruction::Mul) { + // Multiplication. + NoSignedWrap = BO->hasNoSignedWrap(); + if (RequireNoSignedWrap && !NoSignedWrap) + return 0; + + // There are three cases for multiplication: multiplication by exactly + // the scale, multiplication by a constant different to the scale, and + // multiplication by something else. + Value *LHS = BO->getOperand(0); + Value *RHS = BO->getOperand(1); + + if (ConstantInt *CI = dyn_cast<ConstantInt>(RHS)) { + // Multiplication by a constant. + if (CI->getValue() == Scale) { + // Multiplication by exactly the scale, replace the multiplication + // by its left-hand side in the parent. + Op = LHS; + break; + } + + // Otherwise drill down into the constant. + if (!Op->hasOneUse()) + return 0; + + Parent = std::make_pair(BO, 1); + continue; + } + + // Multiplication by something else. Drill down into the left-hand side + // since that's where the reassociate pass puts the good stuff. + if (!Op->hasOneUse()) + return 0; + + Parent = std::make_pair(BO, 0); + continue; + } + + if (logScale > 0 && BO->getOpcode() == Instruction::Shl && + isa<ConstantInt>(BO->getOperand(1))) { + // Multiplication by a power of 2. + NoSignedWrap = BO->hasNoSignedWrap(); + if (RequireNoSignedWrap && !NoSignedWrap) + return 0; + + Value *LHS = BO->getOperand(0); + int32_t Amt = cast<ConstantInt>(BO->getOperand(1))-> + getLimitedValue(Scale.getBitWidth()); + // Op = LHS << Amt. + + if (Amt == logScale) { + // Multiplication by exactly the scale, replace the multiplication + // by its left-hand side in the parent. + Op = LHS; + break; + } + if (Amt < logScale || !Op->hasOneUse()) + return 0; + + // Multiplication by more than the scale. Reduce the multiplying amount + // by the scale in the parent. + Parent = std::make_pair(BO, 1); + Op = ConstantInt::get(BO->getType(), Amt - logScale); + break; + } + } + + if (!Op->hasOneUse()) + return 0; + + if (CastInst *Cast = dyn_cast<CastInst>(Op)) { + if (Cast->getOpcode() == Instruction::SExt) { + // Op is sign-extended from a smaller type, descale in the smaller type. + unsigned SmallSize = Cast->getSrcTy()->getPrimitiveSizeInBits(); + APInt SmallScale = Scale.trunc(SmallSize); + // Suppose Op = sext X, and we descale X as Y * SmallScale. We want to + // descale Op as (sext Y) * Scale. In order to have + // sext (Y * SmallScale) = (sext Y) * Scale + // some conditions need to hold however: SmallScale must sign-extend to + // Scale and the multiplication Y * SmallScale should not overflow. + if (SmallScale.sext(Scale.getBitWidth()) != Scale) + // SmallScale does not sign-extend to Scale. + return 0; + assert(SmallScale.exactLogBase2() == logScale); + // Require that Y * SmallScale must not overflow. + RequireNoSignedWrap = true; + + // Drill down through the cast. + Parent = std::make_pair(Cast, 0); + Scale = SmallScale; + continue; + } + + if (Cast->getOpcode() == Instruction::Trunc) { + // Op is truncated from a larger type, descale in the larger type. + // Suppose Op = trunc X, and we descale X as Y * sext Scale. Then + // trunc (Y * sext Scale) = (trunc Y) * Scale + // always holds. However (trunc Y) * Scale may overflow even if + // trunc (Y * sext Scale) does not, so nsw flags need to be cleared + // from this point up in the expression (see later). + if (RequireNoSignedWrap) + return 0; + + // Drill down through the cast. + unsigned LargeSize = Cast->getSrcTy()->getPrimitiveSizeInBits(); + Parent = std::make_pair(Cast, 0); + Scale = Scale.sext(LargeSize); + if (logScale + 1 == (int32_t)Cast->getType()->getPrimitiveSizeInBits()) + logScale = -1; + assert(Scale.exactLogBase2() == logScale); + continue; + } + } + + // Unsupported expression, bail out. + return 0; + } + + // We know that we can successfully descale, so from here on we can safely + // modify the IR. Op holds the descaled version of the deepest term in the + // expression. NoSignedWrap is 'true' if multiplying Op by Scale is known + // not to overflow. + + if (!Parent.first) + // The expression only had one term. + return Op; + + // Rewrite the parent using the descaled version of its operand. + assert(Parent.first->hasOneUse() && "Drilled down when more than one use!"); + assert(Op != Parent.first->getOperand(Parent.second) && + "Descaling was a no-op?"); + Parent.first->setOperand(Parent.second, Op); + Worklist.Add(Parent.first); + + // Now work back up the expression correcting nsw flags. The logic is based + // on the following observation: if X * Y is known not to overflow as a signed + // multiplication, and Y is replaced by a value Z with smaller absolute value, + // then X * Z will not overflow as a signed multiplication either. As we work + // our way up, having NoSignedWrap 'true' means that the descaled value at the + // current level has strictly smaller absolute value than the original. + Instruction *Ancestor = Parent.first; + do { + if (BinaryOperator *BO = dyn_cast<BinaryOperator>(Ancestor)) { + // If the multiplication wasn't nsw then we can't say anything about the + // value of the descaled multiplication, and we have to clear nsw flags + // from this point on up. + bool OpNoSignedWrap = BO->hasNoSignedWrap(); + NoSignedWrap &= OpNoSignedWrap; + if (NoSignedWrap != OpNoSignedWrap) { + BO->setHasNoSignedWrap(NoSignedWrap); + Worklist.Add(Ancestor); + } + } else if (Ancestor->getOpcode() == Instruction::Trunc) { + // The fact that the descaled input to the trunc has smaller absolute + // value than the original input doesn't tell us anything useful about + // the absolute values of the truncations. + NoSignedWrap = false; + } + assert((Ancestor->getOpcode() != Instruction::SExt || NoSignedWrap) && + "Failed to keep proper track of nsw flags while drilling down?"); + + if (Ancestor == Val) + // Got to the top, all done! + return Val; + + // Move up one level in the expression. + assert(Ancestor->hasOneUse() && "Drilled down when more than one use!"); + Ancestor = Ancestor->use_back(); + } while (1); +} + +Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { + SmallVector<Value*, 8> Ops(GEP.op_begin(), GEP.op_end()); + + if (Value *V = SimplifyGEPInst(Ops, TD)) + return ReplaceInstUsesWith(GEP, V); + + Value *PtrOp = GEP.getOperand(0); + + // Eliminate unneeded casts for indices, and replace indices which displace + // by multiples of a zero size type with zero. + if (TD) { + bool MadeChange = false; + Type *IntPtrTy = TD->getIntPtrType(GEP.getPointerOperandType()); + + gep_type_iterator GTI = gep_type_begin(GEP); + for (User::op_iterator I = GEP.op_begin() + 1, E = GEP.op_end(); + I != E; ++I, ++GTI) { + // Skip indices into struct types. + SequentialType *SeqTy = dyn_cast<SequentialType>(*GTI); + if (!SeqTy) continue; + + // If the element type has zero size then any index over it is equivalent + // to an index of zero, so replace it with zero if it is not zero already. + if (SeqTy->getElementType()->isSized() && + TD->getTypeAllocSize(SeqTy->getElementType()) == 0) + if (!isa<Constant>(*I) || !cast<Constant>(*I)->isNullValue()) { + *I = Constant::getNullValue(IntPtrTy); + MadeChange = true; + } + + Type *IndexTy = (*I)->getType(); + if (IndexTy != IntPtrTy) { + // If we are using a wider index than needed for this platform, shrink + // it to what we need. If narrower, sign-extend it to what we need. + // This explicit cast can make subsequent optimizations more obvious. + *I = Builder->CreateIntCast(*I, IntPtrTy, true); + MadeChange = true; + } + } + if (MadeChange) return &GEP; + } + + // Combine Indices - If the source pointer to this getelementptr instruction + // is a getelementptr instruction, combine the indices of the two + // getelementptr instructions into a single instruction. + // + if (GEPOperator *Src = dyn_cast<GEPOperator>(PtrOp)) { + if (!shouldMergeGEPs(*cast<GEPOperator>(&GEP), *Src)) + return 0; + + // Note that if our source is a gep chain itself then we wait for that + // chain to be resolved before we perform this transformation. This + // avoids us creating a TON of code in some cases. + if (GEPOperator *SrcGEP = + dyn_cast<GEPOperator>(Src->getOperand(0))) + if (SrcGEP->getNumOperands() == 2 && shouldMergeGEPs(*Src, *SrcGEP)) + return 0; // Wait until our source is folded to completion. + + SmallVector<Value*, 8> Indices; + + // Find out whether the last index in the source GEP is a sequential idx. + bool EndsWithSequential = false; + for (gep_type_iterator I = gep_type_begin(*Src), E = gep_type_end(*Src); + I != E; ++I) + EndsWithSequential = !(*I)->isStructTy(); + + // Can we combine the two pointer arithmetics offsets? + if (EndsWithSequential) { + // Replace: gep (gep %P, long B), long A, ... + // With: T = long A+B; gep %P, T, ... + // + Value *Sum; + Value *SO1 = Src->getOperand(Src->getNumOperands()-1); + Value *GO1 = GEP.getOperand(1); + if (SO1 == Constant::getNullValue(SO1->getType())) { + Sum = GO1; + } else if (GO1 == Constant::getNullValue(GO1->getType())) { + Sum = SO1; + } else { + // If they aren't the same type, then the input hasn't been processed + // by the loop above yet (which canonicalizes sequential index types to + // intptr_t). Just avoid transforming this until the input has been + // normalized. + if (SO1->getType() != GO1->getType()) + return 0; + Sum = Builder->CreateAdd(SO1, GO1, PtrOp->getName()+".sum"); + } + + // Update the GEP in place if possible. + if (Src->getNumOperands() == 2) { + GEP.setOperand(0, Src->getOperand(0)); + GEP.setOperand(1, Sum); + return &GEP; + } + Indices.append(Src->op_begin()+1, Src->op_end()-1); + Indices.push_back(Sum); + Indices.append(GEP.op_begin()+2, GEP.op_end()); + } else if (isa<Constant>(*GEP.idx_begin()) && + cast<Constant>(*GEP.idx_begin())->isNullValue() && + Src->getNumOperands() != 1) { + // Otherwise we can do the fold if the first index of the GEP is a zero + Indices.append(Src->op_begin()+1, Src->op_end()); + Indices.append(GEP.idx_begin()+1, GEP.idx_end()); + } + + if (!Indices.empty()) + return (GEP.isInBounds() && Src->isInBounds()) ? + GetElementPtrInst::CreateInBounds(Src->getOperand(0), Indices, + GEP.getName()) : + GetElementPtrInst::Create(Src->getOperand(0), Indices, GEP.getName()); + } + + // Canonicalize (gep i8* X, -(ptrtoint Y)) to (sub (ptrtoint X), (ptrtoint Y)) + // The GEP pattern is emitted by the SCEV expander for certain kinds of + // pointer arithmetic. + if (TD && GEP.getNumIndices() == 1 && + match(GEP.getOperand(1), m_Neg(m_PtrToInt(m_Value())))) { + unsigned AS = GEP.getPointerAddressSpace(); + if (GEP.getType() == Builder->getInt8PtrTy(AS) && + GEP.getOperand(1)->getType()->getScalarSizeInBits() == + TD->getPointerSizeInBits(AS)) { + Operator *Index = cast<Operator>(GEP.getOperand(1)); + Value *PtrToInt = Builder->CreatePtrToInt(PtrOp, Index->getType()); + Value *NewSub = Builder->CreateSub(PtrToInt, Index->getOperand(1)); + return CastInst::Create(Instruction::IntToPtr, NewSub, GEP.getType()); + } + } + + // Handle gep(bitcast x) and gep(gep x, 0, 0, 0). + Value *StrippedPtr = PtrOp->stripPointerCasts(); + PointerType *StrippedPtrTy = dyn_cast<PointerType>(StrippedPtr->getType()); + + // We do not handle pointer-vector geps here. + if (!StrippedPtrTy) + return 0; + + if (StrippedPtr != PtrOp && + StrippedPtrTy->getAddressSpace() == GEP.getPointerAddressSpace()) { + + bool HasZeroPointerIndex = false; + if (ConstantInt *C = dyn_cast<ConstantInt>(GEP.getOperand(1))) + HasZeroPointerIndex = C->isZero(); + + // Transform: GEP (bitcast [10 x i8]* X to [0 x i8]*), i32 0, ... + // into : GEP [10 x i8]* X, i32 0, ... + // + // Likewise, transform: GEP (bitcast i8* X to [0 x i8]*), i32 0, ... + // into : GEP i8* X, ... + // + // This occurs when the program declares an array extern like "int X[];" + if (HasZeroPointerIndex) { + PointerType *CPTy = cast<PointerType>(PtrOp->getType()); + if (ArrayType *CATy = + dyn_cast<ArrayType>(CPTy->getElementType())) { + // GEP (bitcast i8* X to [0 x i8]*), i32 0, ... ? + if (CATy->getElementType() == StrippedPtrTy->getElementType()) { + // -> GEP i8* X, ... + SmallVector<Value*, 8> Idx(GEP.idx_begin()+1, GEP.idx_end()); + GetElementPtrInst *Res = + GetElementPtrInst::Create(StrippedPtr, Idx, GEP.getName()); + Res->setIsInBounds(GEP.isInBounds()); + return Res; + } + + if (ArrayType *XATy = + dyn_cast<ArrayType>(StrippedPtrTy->getElementType())){ + // GEP (bitcast [10 x i8]* X to [0 x i8]*), i32 0, ... ? + if (CATy->getElementType() == XATy->getElementType()) { + // -> GEP [10 x i8]* X, i32 0, ... + // At this point, we know that the cast source type is a pointer + // to an array of the same type as the destination pointer + // array. Because the array type is never stepped over (there + // is a leading zero) we can fold the cast into this GEP. + GEP.setOperand(0, StrippedPtr); + return &GEP; + } + } + } + } else if (GEP.getNumOperands() == 2) { + // Transform things like: + // %t = getelementptr i32* bitcast ([2 x i32]* %str to i32*), i32 %V + // into: %t1 = getelementptr [2 x i32]* %str, i32 0, i32 %V; bitcast + Type *SrcElTy = StrippedPtrTy->getElementType(); + Type *ResElTy = PtrOp->getType()->getPointerElementType(); + if (TD && SrcElTy->isArrayTy() && + TD->getTypeAllocSize(SrcElTy->getArrayElementType()) == + TD->getTypeAllocSize(ResElTy)) { + Type *IdxType = TD->getIntPtrType(GEP.getType()); + Value *Idx[2] = { Constant::getNullValue(IdxType), GEP.getOperand(1) }; + Value *NewGEP = GEP.isInBounds() ? + Builder->CreateInBoundsGEP(StrippedPtr, Idx, GEP.getName()) : + Builder->CreateGEP(StrippedPtr, Idx, GEP.getName()); + // V and GEP are both pointer types --> BitCast + return new BitCastInst(NewGEP, GEP.getType()); + } + + // Transform things like: + // %V = mul i64 %N, 4 + // %t = getelementptr i8* bitcast (i32* %arr to i8*), i32 %V + // into: %t1 = getelementptr i32* %arr, i32 %N; bitcast + if (TD && ResElTy->isSized() && SrcElTy->isSized()) { + // Check that changing the type amounts to dividing the index by a scale + // factor. + uint64_t ResSize = TD->getTypeAllocSize(ResElTy); + uint64_t SrcSize = TD->getTypeAllocSize(SrcElTy); + if (ResSize && SrcSize % ResSize == 0) { + Value *Idx = GEP.getOperand(1); + unsigned BitWidth = Idx->getType()->getPrimitiveSizeInBits(); + uint64_t Scale = SrcSize / ResSize; + + // Earlier transforms ensure that the index has type IntPtrType, which + // considerably simplifies the logic by eliminating implicit casts. + assert(Idx->getType() == TD->getIntPtrType(GEP.getType()) && + "Index not cast to pointer width?"); + + bool NSW; + if (Value *NewIdx = Descale(Idx, APInt(BitWidth, Scale), NSW)) { + // Successfully decomposed Idx as NewIdx * Scale, form a new GEP. + // If the multiplication NewIdx * Scale may overflow then the new + // GEP may not be "inbounds". + Value *NewGEP = GEP.isInBounds() && NSW ? + Builder->CreateInBoundsGEP(StrippedPtr, NewIdx, GEP.getName()) : + Builder->CreateGEP(StrippedPtr, NewIdx, GEP.getName()); + // The NewGEP must be pointer typed, so must the old one -> BitCast + return new BitCastInst(NewGEP, GEP.getType()); + } + } + } + + // Similarly, transform things like: + // getelementptr i8* bitcast ([100 x double]* X to i8*), i32 %tmp + // (where tmp = 8*tmp2) into: + // getelementptr [100 x double]* %arr, i32 0, i32 %tmp2; bitcast + if (TD && ResElTy->isSized() && SrcElTy->isSized() && + SrcElTy->isArrayTy()) { + // Check that changing to the array element type amounts to dividing the + // index by a scale factor. + uint64_t ResSize = TD->getTypeAllocSize(ResElTy); + uint64_t ArrayEltSize + = TD->getTypeAllocSize(SrcElTy->getArrayElementType()); + if (ResSize && ArrayEltSize % ResSize == 0) { + Value *Idx = GEP.getOperand(1); + unsigned BitWidth = Idx->getType()->getPrimitiveSizeInBits(); + uint64_t Scale = ArrayEltSize / ResSize; + + // Earlier transforms ensure that the index has type IntPtrType, which + // considerably simplifies the logic by eliminating implicit casts. + assert(Idx->getType() == TD->getIntPtrType(GEP.getType()) && + "Index not cast to pointer width?"); + + bool NSW; + if (Value *NewIdx = Descale(Idx, APInt(BitWidth, Scale), NSW)) { + // Successfully decomposed Idx as NewIdx * Scale, form a new GEP. + // If the multiplication NewIdx * Scale may overflow then the new + // GEP may not be "inbounds". + Value *Off[2] = { + Constant::getNullValue(TD->getIntPtrType(GEP.getType())), + NewIdx + }; + + Value *NewGEP = GEP.isInBounds() && NSW ? + Builder->CreateInBoundsGEP(StrippedPtr, Off, GEP.getName()) : + Builder->CreateGEP(StrippedPtr, Off, GEP.getName()); + // The NewGEP must be pointer typed, so must the old one -> BitCast + return new BitCastInst(NewGEP, GEP.getType()); + } + } + } + } + } + + if (!TD) + return 0; + + /// See if we can simplify: + /// X = bitcast A* to B* + /// Y = gep X, <...constant indices...> + /// into a gep of the original struct. This is important for SROA and alias + /// analysis of unions. If "A" is also a bitcast, wait for A/X to be merged. + if (BitCastInst *BCI = dyn_cast<BitCastInst>(PtrOp)) { + Value *Operand = BCI->getOperand(0); + PointerType *OpType = cast<PointerType>(Operand->getType()); + unsigned OffsetBits = TD->getPointerTypeSizeInBits(OpType); + APInt Offset(OffsetBits, 0); + if (!isa<BitCastInst>(Operand) && + GEP.accumulateConstantOffset(*TD, Offset) && + StrippedPtrTy->getAddressSpace() == GEP.getPointerAddressSpace()) { + + // If this GEP instruction doesn't move the pointer, just replace the GEP + // with a bitcast of the real input to the dest type. + if (!Offset) { + // If the bitcast is of an allocation, and the allocation will be + // converted to match the type of the cast, don't touch this. + if (isa<AllocaInst>(Operand) || isAllocationFn(Operand, TLI)) { + // See if the bitcast simplifies, if so, don't nuke this GEP yet. + if (Instruction *I = visitBitCast(*BCI)) { + if (I != BCI) { + I->takeName(BCI); + BCI->getParent()->getInstList().insert(BCI, I); + ReplaceInstUsesWith(*BCI, I); + } + return &GEP; + } + } + return new BitCastInst(Operand, GEP.getType()); + } + + // Otherwise, if the offset is non-zero, we need to find out if there is a + // field at Offset in 'A's type. If so, we can pull the cast through the + // GEP. + SmallVector<Value*, 8> NewIndices; + if (FindElementAtOffset(OpType, Offset.getSExtValue(), NewIndices)) { + Value *NGEP = GEP.isInBounds() ? + Builder->CreateInBoundsGEP(Operand, NewIndices) : + Builder->CreateGEP(Operand, NewIndices); + + if (NGEP->getType() == GEP.getType()) + return ReplaceInstUsesWith(GEP, NGEP); + NGEP->takeName(&GEP); + return new BitCastInst(NGEP, GEP.getType()); + } + } + } + + return 0; +} + +static bool +isAllocSiteRemovable(Instruction *AI, SmallVectorImpl<WeakVH> &Users, + const TargetLibraryInfo *TLI) { + SmallVector<Instruction*, 4> Worklist; + Worklist.push_back(AI); + + do { + Instruction *PI = Worklist.pop_back_val(); + for (Value::use_iterator UI = PI->use_begin(), UE = PI->use_end(); UI != UE; + ++UI) { + Instruction *I = cast<Instruction>(*UI); + switch (I->getOpcode()) { + default: + // Give up the moment we see something we can't handle. + return false; + + case Instruction::BitCast: + case Instruction::GetElementPtr: + Users.push_back(I); + Worklist.push_back(I); + continue; + + case Instruction::ICmp: { + ICmpInst *ICI = cast<ICmpInst>(I); + // We can fold eq/ne comparisons with null to false/true, respectively. + if (!ICI->isEquality() || !isa<ConstantPointerNull>(ICI->getOperand(1))) + return false; + Users.push_back(I); + continue; + } + + case Instruction::Call: + // Ignore no-op and store intrinsics. + if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) { + switch (II->getIntrinsicID()) { + default: + return false; + + case Intrinsic::memmove: + case Intrinsic::memcpy: + case Intrinsic::memset: { + MemIntrinsic *MI = cast<MemIntrinsic>(II); + if (MI->isVolatile() || MI->getRawDest() != PI) + return false; + } + // fall through + case Intrinsic::dbg_declare: + case Intrinsic::dbg_value: + case Intrinsic::invariant_start: + case Intrinsic::invariant_end: + case Intrinsic::lifetime_start: + case Intrinsic::lifetime_end: + case Intrinsic::objectsize: + Users.push_back(I); + continue; + } + } + + if (isFreeCall(I, TLI)) { + Users.push_back(I); + continue; + } + return false; + + case Instruction::Store: { + StoreInst *SI = cast<StoreInst>(I); + if (SI->isVolatile() || SI->getPointerOperand() != PI) + return false; + Users.push_back(I); + continue; + } + } + llvm_unreachable("missing a return?"); + } + } while (!Worklist.empty()); + return true; +} + +Instruction *InstCombiner::visitAllocSite(Instruction &MI) { + // If we have a malloc call which is only used in any amount of comparisons + // to null and free calls, delete the calls and replace the comparisons with + // true or false as appropriate. + SmallVector<WeakVH, 64> Users; + if (isAllocSiteRemovable(&MI, Users, TLI)) { + for (unsigned i = 0, e = Users.size(); i != e; ++i) { + Instruction *I = cast_or_null<Instruction>(&*Users[i]); + if (!I) continue; + + if (ICmpInst *C = dyn_cast<ICmpInst>(I)) { + ReplaceInstUsesWith(*C, + ConstantInt::get(Type::getInt1Ty(C->getContext()), + C->isFalseWhenEqual())); + } else if (isa<BitCastInst>(I) || isa<GetElementPtrInst>(I)) { + ReplaceInstUsesWith(*I, UndefValue::get(I->getType())); + } else if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) { + if (II->getIntrinsicID() == Intrinsic::objectsize) { + ConstantInt *CI = cast<ConstantInt>(II->getArgOperand(1)); + uint64_t DontKnow = CI->isZero() ? -1ULL : 0; + ReplaceInstUsesWith(*I, ConstantInt::get(I->getType(), DontKnow)); + } + } + EraseInstFromFunction(*I); + } + + if (InvokeInst *II = dyn_cast<InvokeInst>(&MI)) { + // Replace invoke with a NOP intrinsic to maintain the original CFG + Module *M = II->getParent()->getParent()->getParent(); + Function *F = Intrinsic::getDeclaration(M, Intrinsic::donothing); + InvokeInst::Create(F, II->getNormalDest(), II->getUnwindDest(), + None, "", II->getParent()); + } + return EraseInstFromFunction(MI); + } + return 0; +} + +/// \brief Move the call to free before a NULL test. +/// +/// Check if this free is accessed after its argument has been test +/// against NULL (property 0). +/// If yes, it is legal to move this call in its predecessor block. +/// +/// The move is performed only if the block containing the call to free +/// will be removed, i.e.: +/// 1. it has only one predecessor P, and P has two successors +/// 2. it contains the call and an unconditional branch +/// 3. its successor is the same as its predecessor's successor +/// +/// The profitability is out-of concern here and this function should +/// be called only if the caller knows this transformation would be +/// profitable (e.g., for code size). +static Instruction * +tryToMoveFreeBeforeNullTest(CallInst &FI) { + Value *Op = FI.getArgOperand(0); + BasicBlock *FreeInstrBB = FI.getParent(); + BasicBlock *PredBB = FreeInstrBB->getSinglePredecessor(); + + // Validate part of constraint #1: Only one predecessor + // FIXME: We can extend the number of predecessor, but in that case, we + // would duplicate the call to free in each predecessor and it may + // not be profitable even for code size. + if (!PredBB) + return 0; + + // Validate constraint #2: Does this block contains only the call to + // free and an unconditional branch? + // FIXME: We could check if we can speculate everything in the + // predecessor block + if (FreeInstrBB->size() != 2) + return 0; + BasicBlock *SuccBB; + if (!match(FreeInstrBB->getTerminator(), m_UnconditionalBr(SuccBB))) + return 0; + + // Validate the rest of constraint #1 by matching on the pred branch. + TerminatorInst *TI = PredBB->getTerminator(); + BasicBlock *TrueBB, *FalseBB; + ICmpInst::Predicate Pred; + if (!match(TI, m_Br(m_ICmp(Pred, m_Specific(Op), m_Zero()), TrueBB, FalseBB))) + return 0; + if (Pred != ICmpInst::ICMP_EQ && Pred != ICmpInst::ICMP_NE) + return 0; + + // Validate constraint #3: Ensure the null case just falls through. + if (SuccBB != (Pred == ICmpInst::ICMP_EQ ? TrueBB : FalseBB)) + return 0; + assert(FreeInstrBB == (Pred == ICmpInst::ICMP_EQ ? FalseBB : TrueBB) && + "Broken CFG: missing edge from predecessor to successor"); + + FI.moveBefore(TI); + return &FI; +} + + +Instruction *InstCombiner::visitFree(CallInst &FI) { + Value *Op = FI.getArgOperand(0); + + // free undef -> unreachable. + if (isa<UndefValue>(Op)) { + // Insert a new store to null because we cannot modify the CFG here. + Builder->CreateStore(ConstantInt::getTrue(FI.getContext()), + UndefValue::get(Type::getInt1PtrTy(FI.getContext()))); + return EraseInstFromFunction(FI); + } + + // If we have 'free null' delete the instruction. This can happen in stl code + // when lots of inlining happens. + if (isa<ConstantPointerNull>(Op)) + return EraseInstFromFunction(FI); + + // If we optimize for code size, try to move the call to free before the null + // test so that simplify cfg can remove the empty block and dead code + // elimination the branch. I.e., helps to turn something like: + // if (foo) free(foo); + // into + // free(foo); + if (MinimizeSize) + if (Instruction *I = tryToMoveFreeBeforeNullTest(FI)) + return I; + + return 0; +} + + + +Instruction *InstCombiner::visitBranchInst(BranchInst &BI) { + // Change br (not X), label True, label False to: br X, label False, True + Value *X = 0; + BasicBlock *TrueDest; + BasicBlock *FalseDest; + if (match(&BI, m_Br(m_Not(m_Value(X)), TrueDest, FalseDest)) && + !isa<Constant>(X)) { + // Swap Destinations and condition... + BI.setCondition(X); + BI.swapSuccessors(); + return &BI; + } + + // Cannonicalize fcmp_one -> fcmp_oeq + FCmpInst::Predicate FPred; Value *Y; + if (match(&BI, m_Br(m_FCmp(FPred, m_Value(X), m_Value(Y)), + TrueDest, FalseDest)) && + BI.getCondition()->hasOneUse()) + if (FPred == FCmpInst::FCMP_ONE || FPred == FCmpInst::FCMP_OLE || + FPred == FCmpInst::FCMP_OGE) { + FCmpInst *Cond = cast<FCmpInst>(BI.getCondition()); + Cond->setPredicate(FCmpInst::getInversePredicate(FPred)); + + // Swap Destinations and condition. + BI.swapSuccessors(); + Worklist.Add(Cond); + return &BI; + } + + // Cannonicalize icmp_ne -> icmp_eq + ICmpInst::Predicate IPred; + if (match(&BI, m_Br(m_ICmp(IPred, m_Value(X), m_Value(Y)), + TrueDest, FalseDest)) && + BI.getCondition()->hasOneUse()) + if (IPred == ICmpInst::ICMP_NE || IPred == ICmpInst::ICMP_ULE || + IPred == ICmpInst::ICMP_SLE || IPred == ICmpInst::ICMP_UGE || + IPred == ICmpInst::ICMP_SGE) { + ICmpInst *Cond = cast<ICmpInst>(BI.getCondition()); + Cond->setPredicate(ICmpInst::getInversePredicate(IPred)); + // Swap Destinations and condition. + BI.swapSuccessors(); + Worklist.Add(Cond); + return &BI; + } + + return 0; +} + +Instruction *InstCombiner::visitSwitchInst(SwitchInst &SI) { + Value *Cond = SI.getCondition(); + if (Instruction *I = dyn_cast<Instruction>(Cond)) { + if (I->getOpcode() == Instruction::Add) + if (ConstantInt *AddRHS = dyn_cast<ConstantInt>(I->getOperand(1))) { + // change 'switch (X+4) case 1:' into 'switch (X) case -3' + // Skip the first item since that's the default case. + for (SwitchInst::CaseIt i = SI.case_begin(), e = SI.case_end(); + i != e; ++i) { + ConstantInt* CaseVal = i.getCaseValue(); + Constant* NewCaseVal = ConstantExpr::getSub(cast<Constant>(CaseVal), + AddRHS); + assert(isa<ConstantInt>(NewCaseVal) && + "Result of expression should be constant"); + i.setValue(cast<ConstantInt>(NewCaseVal)); + } + SI.setCondition(I->getOperand(0)); + Worklist.Add(I); + return &SI; + } + } + return 0; +} + +Instruction *InstCombiner::visitExtractValueInst(ExtractValueInst &EV) { + Value *Agg = EV.getAggregateOperand(); + + if (!EV.hasIndices()) + return ReplaceInstUsesWith(EV, Agg); + + if (Constant *C = dyn_cast<Constant>(Agg)) { + if (Constant *C2 = C->getAggregateElement(*EV.idx_begin())) { + if (EV.getNumIndices() == 0) + return ReplaceInstUsesWith(EV, C2); + // Extract the remaining indices out of the constant indexed by the + // first index + return ExtractValueInst::Create(C2, EV.getIndices().slice(1)); + } + return 0; // Can't handle other constants + } + + if (InsertValueInst *IV = dyn_cast<InsertValueInst>(Agg)) { + // We're extracting from an insertvalue instruction, compare the indices + const unsigned *exti, *exte, *insi, *inse; + for (exti = EV.idx_begin(), insi = IV->idx_begin(), + exte = EV.idx_end(), inse = IV->idx_end(); + exti != exte && insi != inse; + ++exti, ++insi) { + if (*insi != *exti) + // The insert and extract both reference distinctly different elements. + // This means the extract is not influenced by the insert, and we can + // replace the aggregate operand of the extract with the aggregate + // operand of the insert. i.e., replace + // %I = insertvalue { i32, { i32 } } %A, { i32 } { i32 42 }, 1 + // %E = extractvalue { i32, { i32 } } %I, 0 + // with + // %E = extractvalue { i32, { i32 } } %A, 0 + return ExtractValueInst::Create(IV->getAggregateOperand(), + EV.getIndices()); + } + if (exti == exte && insi == inse) + // Both iterators are at the end: Index lists are identical. Replace + // %B = insertvalue { i32, { i32 } } %A, i32 42, 1, 0 + // %C = extractvalue { i32, { i32 } } %B, 1, 0 + // with "i32 42" + return ReplaceInstUsesWith(EV, IV->getInsertedValueOperand()); + if (exti == exte) { + // The extract list is a prefix of the insert list. i.e. replace + // %I = insertvalue { i32, { i32 } } %A, i32 42, 1, 0 + // %E = extractvalue { i32, { i32 } } %I, 1 + // with + // %X = extractvalue { i32, { i32 } } %A, 1 + // %E = insertvalue { i32 } %X, i32 42, 0 + // by switching the order of the insert and extract (though the + // insertvalue should be left in, since it may have other uses). + Value *NewEV = Builder->CreateExtractValue(IV->getAggregateOperand(), + EV.getIndices()); + return InsertValueInst::Create(NewEV, IV->getInsertedValueOperand(), + makeArrayRef(insi, inse)); + } + if (insi == inse) + // The insert list is a prefix of the extract list + // We can simply remove the common indices from the extract and make it + // operate on the inserted value instead of the insertvalue result. + // i.e., replace + // %I = insertvalue { i32, { i32 } } %A, { i32 } { i32 42 }, 1 + // %E = extractvalue { i32, { i32 } } %I, 1, 0 + // with + // %E extractvalue { i32 } { i32 42 }, 0 + return ExtractValueInst::Create(IV->getInsertedValueOperand(), + makeArrayRef(exti, exte)); + } + if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Agg)) { + // We're extracting from an intrinsic, see if we're the only user, which + // allows us to simplify multiple result intrinsics to simpler things that + // just get one value. + if (II->hasOneUse()) { + // Check if we're grabbing the overflow bit or the result of a 'with + // overflow' intrinsic. If it's the latter we can remove the intrinsic + // and replace it with a traditional binary instruction. + switch (II->getIntrinsicID()) { + case Intrinsic::uadd_with_overflow: + case Intrinsic::sadd_with_overflow: + if (*EV.idx_begin() == 0) { // Normal result. + Value *LHS = II->getArgOperand(0), *RHS = II->getArgOperand(1); + ReplaceInstUsesWith(*II, UndefValue::get(II->getType())); + EraseInstFromFunction(*II); + return BinaryOperator::CreateAdd(LHS, RHS); + } + + // If the normal result of the add is dead, and the RHS is a constant, + // we can transform this into a range comparison. + // overflow = uadd a, -4 --> overflow = icmp ugt a, 3 + if (II->getIntrinsicID() == Intrinsic::uadd_with_overflow) + if (ConstantInt *CI = dyn_cast<ConstantInt>(II->getArgOperand(1))) + return new ICmpInst(ICmpInst::ICMP_UGT, II->getArgOperand(0), + ConstantExpr::getNot(CI)); + break; + case Intrinsic::usub_with_overflow: + case Intrinsic::ssub_with_overflow: + if (*EV.idx_begin() == 0) { // Normal result. + Value *LHS = II->getArgOperand(0), *RHS = II->getArgOperand(1); + ReplaceInstUsesWith(*II, UndefValue::get(II->getType())); + EraseInstFromFunction(*II); + return BinaryOperator::CreateSub(LHS, RHS); + } + break; + case Intrinsic::umul_with_overflow: + case Intrinsic::smul_with_overflow: + if (*EV.idx_begin() == 0) { // Normal result. + Value *LHS = II->getArgOperand(0), *RHS = II->getArgOperand(1); + ReplaceInstUsesWith(*II, UndefValue::get(II->getType())); + EraseInstFromFunction(*II); + return BinaryOperator::CreateMul(LHS, RHS); + } + break; + default: + break; + } + } + } + if (LoadInst *L = dyn_cast<LoadInst>(Agg)) + // If the (non-volatile) load only has one use, we can rewrite this to a + // load from a GEP. This reduces the size of the load. + // FIXME: If a load is used only by extractvalue instructions then this + // could be done regardless of having multiple uses. + if (L->isSimple() && L->hasOneUse()) { + // extractvalue has integer indices, getelementptr has Value*s. Convert. + SmallVector<Value*, 4> Indices; + // Prefix an i32 0 since we need the first element. + Indices.push_back(Builder->getInt32(0)); + for (ExtractValueInst::idx_iterator I = EV.idx_begin(), E = EV.idx_end(); + I != E; ++I) + Indices.push_back(Builder->getInt32(*I)); + + // We need to insert these at the location of the old load, not at that of + // the extractvalue. + Builder->SetInsertPoint(L->getParent(), L); + Value *GEP = Builder->CreateInBoundsGEP(L->getPointerOperand(), Indices); + // Returning the load directly will cause the main loop to insert it in + // the wrong spot, so use ReplaceInstUsesWith(). + return ReplaceInstUsesWith(EV, Builder->CreateLoad(GEP)); + } + // We could simplify extracts from other values. Note that nested extracts may + // already be simplified implicitly by the above: extract (extract (insert) ) + // will be translated into extract ( insert ( extract ) ) first and then just + // the value inserted, if appropriate. Similarly for extracts from single-use + // loads: extract (extract (load)) will be translated to extract (load (gep)) + // and if again single-use then via load (gep (gep)) to load (gep). + // However, double extracts from e.g. function arguments or return values + // aren't handled yet. + return 0; +} + +enum Personality_Type { + Unknown_Personality, + GNU_Ada_Personality, + GNU_CXX_Personality, + GNU_ObjC_Personality +}; + +/// RecognizePersonality - See if the given exception handling personality +/// function is one that we understand. If so, return a description of it; +/// otherwise return Unknown_Personality. +static Personality_Type RecognizePersonality(Value *Pers) { + Function *F = dyn_cast<Function>(Pers->stripPointerCasts()); + if (!F) + return Unknown_Personality; + return StringSwitch<Personality_Type>(F->getName()) + .Case("__gnat_eh_personality", GNU_Ada_Personality) + .Case("__gxx_personality_v0", GNU_CXX_Personality) + .Case("__objc_personality_v0", GNU_ObjC_Personality) + .Default(Unknown_Personality); +} + +/// isCatchAll - Return 'true' if the given typeinfo will match anything. +static bool isCatchAll(Personality_Type Personality, Constant *TypeInfo) { + switch (Personality) { + case Unknown_Personality: + return false; + case GNU_Ada_Personality: + // While __gnat_all_others_value will match any Ada exception, it doesn't + // match foreign exceptions (or didn't, before gcc-4.7). + return false; + case GNU_CXX_Personality: + case GNU_ObjC_Personality: + return TypeInfo->isNullValue(); + } + llvm_unreachable("Unknown personality!"); +} + +static bool shorter_filter(const Value *LHS, const Value *RHS) { + return + cast<ArrayType>(LHS->getType())->getNumElements() + < + cast<ArrayType>(RHS->getType())->getNumElements(); +} + +Instruction *InstCombiner::visitLandingPadInst(LandingPadInst &LI) { + // The logic here should be correct for any real-world personality function. + // However if that turns out not to be true, the offending logic can always + // be conditioned on the personality function, like the catch-all logic is. + Personality_Type Personality = RecognizePersonality(LI.getPersonalityFn()); + + // Simplify the list of clauses, eg by removing repeated catch clauses + // (these are often created by inlining). + bool MakeNewInstruction = false; // If true, recreate using the following: + SmallVector<Value *, 16> NewClauses; // - Clauses for the new instruction; + bool CleanupFlag = LI.isCleanup(); // - The new instruction is a cleanup. + + SmallPtrSet<Value *, 16> AlreadyCaught; // Typeinfos known caught already. + for (unsigned i = 0, e = LI.getNumClauses(); i != e; ++i) { + bool isLastClause = i + 1 == e; + if (LI.isCatch(i)) { + // A catch clause. + Value *CatchClause = LI.getClause(i); + Constant *TypeInfo = cast<Constant>(CatchClause->stripPointerCasts()); + + // If we already saw this clause, there is no point in having a second + // copy of it. + if (AlreadyCaught.insert(TypeInfo)) { + // This catch clause was not already seen. + NewClauses.push_back(CatchClause); + } else { + // Repeated catch clause - drop the redundant copy. + MakeNewInstruction = true; + } + + // If this is a catch-all then there is no point in keeping any following + // clauses or marking the landingpad as having a cleanup. + if (isCatchAll(Personality, TypeInfo)) { + if (!isLastClause) + MakeNewInstruction = true; + CleanupFlag = false; + break; + } + } else { + // A filter clause. If any of the filter elements were already caught + // then they can be dropped from the filter. It is tempting to try to + // exploit the filter further by saying that any typeinfo that does not + // occur in the filter can't be caught later (and thus can be dropped). + // However this would be wrong, since typeinfos can match without being + // equal (for example if one represents a C++ class, and the other some + // class derived from it). + assert(LI.isFilter(i) && "Unsupported landingpad clause!"); + Value *FilterClause = LI.getClause(i); + ArrayType *FilterType = cast<ArrayType>(FilterClause->getType()); + unsigned NumTypeInfos = FilterType->getNumElements(); + + // An empty filter catches everything, so there is no point in keeping any + // following clauses or marking the landingpad as having a cleanup. By + // dealing with this case here the following code is made a bit simpler. + if (!NumTypeInfos) { + NewClauses.push_back(FilterClause); + if (!isLastClause) + MakeNewInstruction = true; + CleanupFlag = false; + break; + } + + bool MakeNewFilter = false; // If true, make a new filter. + SmallVector<Constant *, 16> NewFilterElts; // New elements. + if (isa<ConstantAggregateZero>(FilterClause)) { + // Not an empty filter - it contains at least one null typeinfo. + assert(NumTypeInfos > 0 && "Should have handled empty filter already!"); + Constant *TypeInfo = + Constant::getNullValue(FilterType->getElementType()); + // If this typeinfo is a catch-all then the filter can never match. + if (isCatchAll(Personality, TypeInfo)) { + // Throw the filter away. + MakeNewInstruction = true; + continue; + } + + // There is no point in having multiple copies of this typeinfo, so + // discard all but the first copy if there is more than one. + NewFilterElts.push_back(TypeInfo); + if (NumTypeInfos > 1) + MakeNewFilter = true; + } else { + ConstantArray *Filter = cast<ConstantArray>(FilterClause); + SmallPtrSet<Value *, 16> SeenInFilter; // For uniquing the elements. + NewFilterElts.reserve(NumTypeInfos); + + // Remove any filter elements that were already caught or that already + // occurred in the filter. While there, see if any of the elements are + // catch-alls. If so, the filter can be discarded. + bool SawCatchAll = false; + for (unsigned j = 0; j != NumTypeInfos; ++j) { + Value *Elt = Filter->getOperand(j); + Constant *TypeInfo = cast<Constant>(Elt->stripPointerCasts()); + if (isCatchAll(Personality, TypeInfo)) { + // This element is a catch-all. Bail out, noting this fact. + SawCatchAll = true; + break; + } + if (AlreadyCaught.count(TypeInfo)) + // Already caught by an earlier clause, so having it in the filter + // is pointless. + continue; + // There is no point in having multiple copies of the same typeinfo in + // a filter, so only add it if we didn't already. + if (SeenInFilter.insert(TypeInfo)) + NewFilterElts.push_back(cast<Constant>(Elt)); + } + // A filter containing a catch-all cannot match anything by definition. + if (SawCatchAll) { + // Throw the filter away. + MakeNewInstruction = true; + continue; + } + + // If we dropped something from the filter, make a new one. + if (NewFilterElts.size() < NumTypeInfos) + MakeNewFilter = true; + } + if (MakeNewFilter) { + FilterType = ArrayType::get(FilterType->getElementType(), + NewFilterElts.size()); + FilterClause = ConstantArray::get(FilterType, NewFilterElts); + MakeNewInstruction = true; + } + + NewClauses.push_back(FilterClause); + + // If the new filter is empty then it will catch everything so there is + // no point in keeping any following clauses or marking the landingpad + // as having a cleanup. The case of the original filter being empty was + // already handled above. + if (MakeNewFilter && !NewFilterElts.size()) { + assert(MakeNewInstruction && "New filter but not a new instruction!"); + CleanupFlag = false; + break; + } + } + } + + // If several filters occur in a row then reorder them so that the shortest + // filters come first (those with the smallest number of elements). This is + // advantageous because shorter filters are more likely to match, speeding up + // unwinding, but mostly because it increases the effectiveness of the other + // filter optimizations below. + for (unsigned i = 0, e = NewClauses.size(); i + 1 < e; ) { + unsigned j; + // Find the maximal 'j' s.t. the range [i, j) consists entirely of filters. + for (j = i; j != e; ++j) + if (!isa<ArrayType>(NewClauses[j]->getType())) + break; + + // Check whether the filters are already sorted by length. We need to know + // if sorting them is actually going to do anything so that we only make a + // new landingpad instruction if it does. + for (unsigned k = i; k + 1 < j; ++k) + if (shorter_filter(NewClauses[k+1], NewClauses[k])) { + // Not sorted, so sort the filters now. Doing an unstable sort would be + // correct too but reordering filters pointlessly might confuse users. + std::stable_sort(NewClauses.begin() + i, NewClauses.begin() + j, + shorter_filter); + MakeNewInstruction = true; + break; + } + + // Look for the next batch of filters. + i = j + 1; + } + + // If typeinfos matched if and only if equal, then the elements of a filter L + // that occurs later than a filter F could be replaced by the intersection of + // the elements of F and L. In reality two typeinfos can match without being + // equal (for example if one represents a C++ class, and the other some class + // derived from it) so it would be wrong to perform this transform in general. + // However the transform is correct and useful if F is a subset of L. In that + // case L can be replaced by F, and thus removed altogether since repeating a + // filter is pointless. So here we look at all pairs of filters F and L where + // L follows F in the list of clauses, and remove L if every element of F is + // an element of L. This can occur when inlining C++ functions with exception + // specifications. + for (unsigned i = 0; i + 1 < NewClauses.size(); ++i) { + // Examine each filter in turn. + Value *Filter = NewClauses[i]; + ArrayType *FTy = dyn_cast<ArrayType>(Filter->getType()); + if (!FTy) + // Not a filter - skip it. + continue; + unsigned FElts = FTy->getNumElements(); + // Examine each filter following this one. Doing this backwards means that + // we don't have to worry about filters disappearing under us when removed. + for (unsigned j = NewClauses.size() - 1; j != i; --j) { + Value *LFilter = NewClauses[j]; + ArrayType *LTy = dyn_cast<ArrayType>(LFilter->getType()); + if (!LTy) + // Not a filter - skip it. + continue; + // If Filter is a subset of LFilter, i.e. every element of Filter is also + // an element of LFilter, then discard LFilter. + SmallVectorImpl<Value *>::iterator J = NewClauses.begin() + j; + // If Filter is empty then it is a subset of LFilter. + if (!FElts) { + // Discard LFilter. + NewClauses.erase(J); + MakeNewInstruction = true; + // Move on to the next filter. + continue; + } + unsigned LElts = LTy->getNumElements(); + // If Filter is longer than LFilter then it cannot be a subset of it. + if (FElts > LElts) + // Move on to the next filter. + continue; + // At this point we know that LFilter has at least one element. + if (isa<ConstantAggregateZero>(LFilter)) { // LFilter only contains zeros. + // Filter is a subset of LFilter iff Filter contains only zeros (as we + // already know that Filter is not longer than LFilter). + if (isa<ConstantAggregateZero>(Filter)) { + assert(FElts <= LElts && "Should have handled this case earlier!"); + // Discard LFilter. + NewClauses.erase(J); + MakeNewInstruction = true; + } + // Move on to the next filter. + continue; + } + ConstantArray *LArray = cast<ConstantArray>(LFilter); + if (isa<ConstantAggregateZero>(Filter)) { // Filter only contains zeros. + // Since Filter is non-empty and contains only zeros, it is a subset of + // LFilter iff LFilter contains a zero. + assert(FElts > 0 && "Should have eliminated the empty filter earlier!"); + for (unsigned l = 0; l != LElts; ++l) + if (LArray->getOperand(l)->isNullValue()) { + // LFilter contains a zero - discard it. + NewClauses.erase(J); + MakeNewInstruction = true; + break; + } + // Move on to the next filter. + continue; + } + // At this point we know that both filters are ConstantArrays. Loop over + // operands to see whether every element of Filter is also an element of + // LFilter. Since filters tend to be short this is probably faster than + // using a method that scales nicely. + ConstantArray *FArray = cast<ConstantArray>(Filter); + bool AllFound = true; + for (unsigned f = 0; f != FElts; ++f) { + Value *FTypeInfo = FArray->getOperand(f)->stripPointerCasts(); + AllFound = false; + for (unsigned l = 0; l != LElts; ++l) { + Value *LTypeInfo = LArray->getOperand(l)->stripPointerCasts(); + if (LTypeInfo == FTypeInfo) { + AllFound = true; + break; + } + } + if (!AllFound) + break; + } + if (AllFound) { + // Discard LFilter. + NewClauses.erase(J); + MakeNewInstruction = true; + } + // Move on to the next filter. + } + } + + // If we changed any of the clauses, replace the old landingpad instruction + // with a new one. + if (MakeNewInstruction) { + LandingPadInst *NLI = LandingPadInst::Create(LI.getType(), + LI.getPersonalityFn(), + NewClauses.size()); + for (unsigned i = 0, e = NewClauses.size(); i != e; ++i) + NLI->addClause(NewClauses[i]); + // A landing pad with no clauses must have the cleanup flag set. It is + // theoretically possible, though highly unlikely, that we eliminated all + // clauses. If so, force the cleanup flag to true. + if (NewClauses.empty()) + CleanupFlag = true; + NLI->setCleanup(CleanupFlag); + return NLI; + } + + // Even if none of the clauses changed, we may nonetheless have understood + // that the cleanup flag is pointless. Clear it if so. + if (LI.isCleanup() != CleanupFlag) { + assert(!CleanupFlag && "Adding a cleanup, not removing one?!"); + LI.setCleanup(CleanupFlag); + return &LI; + } + + return 0; +} + + + + +/// TryToSinkInstruction - Try to move the specified instruction from its +/// current block into the beginning of DestBlock, which can only happen if it's +/// safe to move the instruction past all of the instructions between it and the +/// end of its block. +static bool TryToSinkInstruction(Instruction *I, BasicBlock *DestBlock) { + assert(I->hasOneUse() && "Invariants didn't hold!"); + + // Cannot move control-flow-involving, volatile loads, vaarg, etc. + if (isa<PHINode>(I) || isa<LandingPadInst>(I) || I->mayHaveSideEffects() || + isa<TerminatorInst>(I)) + return false; + + // Do not sink alloca instructions out of the entry block. + if (isa<AllocaInst>(I) && I->getParent() == + &DestBlock->getParent()->getEntryBlock()) + return false; + + // We can only sink load instructions if there is nothing between the load and + // the end of block that could change the value. + if (I->mayReadFromMemory()) { + for (BasicBlock::iterator Scan = I, E = I->getParent()->end(); + Scan != E; ++Scan) + if (Scan->mayWriteToMemory()) + return false; + } + + BasicBlock::iterator InsertPos = DestBlock->getFirstInsertionPt(); + I->moveBefore(InsertPos); + ++NumSunkInst; + return true; +} + + +/// AddReachableCodeToWorklist - Walk the function in depth-first order, adding +/// all reachable code to the worklist. +/// +/// This has a couple of tricks to make the code faster and more powerful. In +/// particular, we constant fold and DCE instructions as we go, to avoid adding +/// them to the worklist (this significantly speeds up instcombine on code where +/// many instructions are dead or constant). Additionally, if we find a branch +/// whose condition is a known constant, we only visit the reachable successors. +/// +static bool AddReachableCodeToWorklist(BasicBlock *BB, + SmallPtrSet<BasicBlock*, 64> &Visited, + InstCombiner &IC, + const DataLayout *TD, + const TargetLibraryInfo *TLI) { + bool MadeIRChange = false; + SmallVector<BasicBlock*, 256> Worklist; + Worklist.push_back(BB); + + SmallVector<Instruction*, 128> InstrsForInstCombineWorklist; + DenseMap<ConstantExpr*, Constant*> FoldedConstants; + + do { + BB = Worklist.pop_back_val(); + + // We have now visited this block! If we've already been here, ignore it. + if (!Visited.insert(BB)) continue; + + for (BasicBlock::iterator BBI = BB->begin(), E = BB->end(); BBI != E; ) { + Instruction *Inst = BBI++; + + // DCE instruction if trivially dead. + if (isInstructionTriviallyDead(Inst, TLI)) { + ++NumDeadInst; + DEBUG(dbgs() << "IC: DCE: " << *Inst << '\n'); + Inst->eraseFromParent(); + continue; + } + + // ConstantProp instruction if trivially constant. + if (!Inst->use_empty() && isa<Constant>(Inst->getOperand(0))) + if (Constant *C = ConstantFoldInstruction(Inst, TD, TLI)) { + DEBUG(dbgs() << "IC: ConstFold to: " << *C << " from: " + << *Inst << '\n'); + Inst->replaceAllUsesWith(C); + ++NumConstProp; + Inst->eraseFromParent(); + continue; + } + + if (TD) { + // See if we can constant fold its operands. + for (User::op_iterator i = Inst->op_begin(), e = Inst->op_end(); + i != e; ++i) { + ConstantExpr *CE = dyn_cast<ConstantExpr>(i); + if (CE == 0) continue; + + Constant*& FoldRes = FoldedConstants[CE]; + if (!FoldRes) + FoldRes = ConstantFoldConstantExpression(CE, TD, TLI); + if (!FoldRes) + FoldRes = CE; + + if (FoldRes != CE) { + *i = FoldRes; + MadeIRChange = true; + } + } + } + + InstrsForInstCombineWorklist.push_back(Inst); + } + + // Recursively visit successors. If this is a branch or switch on a + // constant, only visit the reachable successor. + TerminatorInst *TI = BB->getTerminator(); + if (BranchInst *BI = dyn_cast<BranchInst>(TI)) { + if (BI->isConditional() && isa<ConstantInt>(BI->getCondition())) { + bool CondVal = cast<ConstantInt>(BI->getCondition())->getZExtValue(); + BasicBlock *ReachableBB = BI->getSuccessor(!CondVal); + Worklist.push_back(ReachableBB); + continue; + } + } else if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) { + if (ConstantInt *Cond = dyn_cast<ConstantInt>(SI->getCondition())) { + // See if this is an explicit destination. + for (SwitchInst::CaseIt i = SI->case_begin(), e = SI->case_end(); + i != e; ++i) + if (i.getCaseValue() == Cond) { + BasicBlock *ReachableBB = i.getCaseSuccessor(); + Worklist.push_back(ReachableBB); + continue; + } + + // Otherwise it is the default destination. + Worklist.push_back(SI->getDefaultDest()); + continue; + } + } + + for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i) + Worklist.push_back(TI->getSuccessor(i)); + } while (!Worklist.empty()); + + // Once we've found all of the instructions to add to instcombine's worklist, + // add them in reverse order. This way instcombine will visit from the top + // of the function down. This jives well with the way that it adds all uses + // of instructions to the worklist after doing a transformation, thus avoiding + // some N^2 behavior in pathological cases. + IC.Worklist.AddInitialGroup(&InstrsForInstCombineWorklist[0], + InstrsForInstCombineWorklist.size()); + + return MadeIRChange; +} + +bool InstCombiner::DoOneIteration(Function &F, unsigned Iteration) { + MadeIRChange = false; + + DEBUG(dbgs() << "\n\nINSTCOMBINE ITERATION #" << Iteration << " on " + << F.getName() << "\n"); + + { + // Do a depth-first traversal of the function, populate the worklist with + // the reachable instructions. Ignore blocks that are not reachable. Keep + // track of which blocks we visit. + SmallPtrSet<BasicBlock*, 64> Visited; + MadeIRChange |= AddReachableCodeToWorklist(F.begin(), Visited, *this, TD, + TLI); + + // Do a quick scan over the function. If we find any blocks that are + // unreachable, remove any instructions inside of them. This prevents + // the instcombine code from having to deal with some bad special cases. + for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) { + if (Visited.count(BB)) continue; + + // Delete the instructions backwards, as it has a reduced likelihood of + // having to update as many def-use and use-def chains. + Instruction *EndInst = BB->getTerminator(); // Last not to be deleted. + while (EndInst != BB->begin()) { + // Delete the next to last instruction. + BasicBlock::iterator I = EndInst; + Instruction *Inst = --I; + if (!Inst->use_empty()) + Inst->replaceAllUsesWith(UndefValue::get(Inst->getType())); + if (isa<LandingPadInst>(Inst)) { + EndInst = Inst; + continue; + } + if (!isa<DbgInfoIntrinsic>(Inst)) { + ++NumDeadInst; + MadeIRChange = true; + } + Inst->eraseFromParent(); + } + } + } + + while (!Worklist.isEmpty()) { + Instruction *I = Worklist.RemoveOne(); + if (I == 0) continue; // skip null values. + + // Check to see if we can DCE the instruction. + if (isInstructionTriviallyDead(I, TLI)) { + DEBUG(dbgs() << "IC: DCE: " << *I << '\n'); + EraseInstFromFunction(*I); + ++NumDeadInst; + MadeIRChange = true; + continue; + } + + // Instruction isn't dead, see if we can constant propagate it. + if (!I->use_empty() && isa<Constant>(I->getOperand(0))) + if (Constant *C = ConstantFoldInstruction(I, TD, TLI)) { + DEBUG(dbgs() << "IC: ConstFold to: " << *C << " from: " << *I << '\n'); + + // Add operands to the worklist. + ReplaceInstUsesWith(*I, C); + ++NumConstProp; + EraseInstFromFunction(*I); + MadeIRChange = true; + continue; + } + + // See if we can trivially sink this instruction to a successor basic block. + if (I->hasOneUse()) { + BasicBlock *BB = I->getParent(); + Instruction *UserInst = cast<Instruction>(I->use_back()); + BasicBlock *UserParent; + + // Get the block the use occurs in. + if (PHINode *PN = dyn_cast<PHINode>(UserInst)) + UserParent = PN->getIncomingBlock(I->use_begin().getUse()); + else + UserParent = UserInst->getParent(); + + if (UserParent != BB) { + bool UserIsSuccessor = false; + // See if the user is one of our successors. + for (succ_iterator SI = succ_begin(BB), E = succ_end(BB); SI != E; ++SI) + if (*SI == UserParent) { + UserIsSuccessor = true; + break; + } + + // If the user is one of our immediate successors, and if that successor + // only has us as a predecessors (we'd have to split the critical edge + // otherwise), we can keep going. + if (UserIsSuccessor && UserParent->getSinglePredecessor()) + // Okay, the CFG is simple enough, try to sink this instruction. + MadeIRChange |= TryToSinkInstruction(I, UserParent); + } + } + + // Now that we have an instruction, try combining it to simplify it. + Builder->SetInsertPoint(I->getParent(), I); + Builder->SetCurrentDebugLocation(I->getDebugLoc()); + +#ifndef NDEBUG + std::string OrigI; +#endif + DEBUG(raw_string_ostream SS(OrigI); I->print(SS); OrigI = SS.str();); + DEBUG(dbgs() << "IC: Visiting: " << OrigI << '\n'); + + if (Instruction *Result = visit(*I)) { + ++NumCombined; + // Should we replace the old instruction with a new one? + if (Result != I) { + DEBUG(dbgs() << "IC: Old = " << *I << '\n' + << " New = " << *Result << '\n'); + + if (!I->getDebugLoc().isUnknown()) + Result->setDebugLoc(I->getDebugLoc()); + // Everything uses the new instruction now. + I->replaceAllUsesWith(Result); + + // Move the name to the new instruction first. + Result->takeName(I); + + // Push the new instruction and any users onto the worklist. + Worklist.Add(Result); + Worklist.AddUsersToWorkList(*Result); + + // Insert the new instruction into the basic block... + BasicBlock *InstParent = I->getParent(); + BasicBlock::iterator InsertPos = I; + + // If we replace a PHI with something that isn't a PHI, fix up the + // insertion point. + if (!isa<PHINode>(Result) && isa<PHINode>(InsertPos)) + InsertPos = InstParent->getFirstInsertionPt(); + + InstParent->getInstList().insert(InsertPos, Result); + + EraseInstFromFunction(*I); + } else { +#ifndef NDEBUG + DEBUG(dbgs() << "IC: Mod = " << OrigI << '\n' + << " New = " << *I << '\n'); +#endif + + // If the instruction was modified, it's possible that it is now dead. + // if so, remove it. + if (isInstructionTriviallyDead(I, TLI)) { + EraseInstFromFunction(*I); + } else { + Worklist.Add(I); + Worklist.AddUsersToWorkList(*I); + } + } + MadeIRChange = true; + } + } + + Worklist.Zap(); + return MadeIRChange; +} + +namespace { +class InstCombinerLibCallSimplifier : public LibCallSimplifier { + InstCombiner *IC; +public: + InstCombinerLibCallSimplifier(const DataLayout *TD, + const TargetLibraryInfo *TLI, + InstCombiner *IC) + : LibCallSimplifier(TD, TLI, UnsafeFPShrink) { + this->IC = IC; + } + + /// replaceAllUsesWith - override so that instruction replacement + /// can be defined in terms of the instruction combiner framework. + virtual void replaceAllUsesWith(Instruction *I, Value *With) const { + IC->ReplaceInstUsesWith(*I, With); + } +}; +} + +bool InstCombiner::runOnFunction(Function &F) { + TD = getAnalysisIfAvailable<DataLayout>(); + TLI = &getAnalysis<TargetLibraryInfo>(); + // Minimizing size? + MinimizeSize = F.getAttributes().hasAttribute(AttributeSet::FunctionIndex, + Attribute::MinSize); + + /// Builder - This is an IRBuilder that automatically inserts new + /// instructions into the worklist when they are created. + IRBuilder<true, TargetFolder, InstCombineIRInserter> + TheBuilder(F.getContext(), TargetFolder(TD), + InstCombineIRInserter(Worklist)); + Builder = &TheBuilder; + + InstCombinerLibCallSimplifier TheSimplifier(TD, TLI, this); + Simplifier = &TheSimplifier; + + bool EverMadeChange = false; + + // Lower dbg.declare intrinsics otherwise their value may be clobbered + // by instcombiner. + EverMadeChange = LowerDbgDeclare(F); + + // Iterate while there is work to do. + unsigned Iteration = 0; + while (DoOneIteration(F, Iteration++)) + EverMadeChange = true; + + Builder = 0; + return EverMadeChange; +} + +FunctionPass *llvm::createInstructionCombiningPass() { + return new InstCombiner(); +} |