src - FreeBSD source tree

diff options


context:
space:
mode:

author	Ed Schouten <ed@FreeBSD.org>	2009-06-22 08:08:12 +0000
committer	Ed Schouten <ed@FreeBSD.org>	2009-06-22 08:08:12 +0000
commit	b2f21fb044b6b5c52cff6227f9f79ca4ed42b18f (patch)
tree	86c1bc482baa6c81fc70b8d715153bfa93377186 /lib/Transforms
parent	600c6fa13de5c407dc36dbb0ab73807868741ae0 (diff)
download	src-b2f21fb044b6b5c52cff6227f9f79ca4ed42b18f.tar.gz src-b2f21fb044b6b5c52cff6227f9f79ca4ed42b18f.zip

Update LLVM sources to r73879.vendor/llvm/llvm-r73879

Notes

Notes: svn path=/vendor/llvm/dist/; revision=194612 svn path=/vendor/llvm/llvm-r73879/; revision=194614; tag=vendor/llvm/llvm-r73879

Diffstat (limited to 'lib/Transforms')

-rw-r--r--

lib/Transforms/IPO/CMakeLists.txt

-rw-r--r--

lib/Transforms/IPO/GlobalOpt.cpp

-rw-r--r--

lib/Transforms/IPO/PartialInlining.cpp

-rw-r--r--

lib/Transforms/IPO/RaiseAllocations.cpp

-rw-r--r--

lib/Transforms/Instrumentation/RSProfiling.cpp

-rw-r--r--

lib/Transforms/Scalar/CodeGenPrepare.cpp

-rw-r--r--

lib/Transforms/Scalar/GVN.cpp

-rw-r--r--

lib/Transforms/Scalar/IndVarSimplify.cpp

-rw-r--r--

lib/Transforms/Scalar/InstructionCombining.cpp

394

-rw-r--r--

lib/Transforms/Scalar/JumpThreading.cpp

-rw-r--r--

lib/Transforms/Scalar/LoopIndexSplit.cpp

-rw-r--r--

lib/Transforms/Scalar/LoopStrengthReduce.cpp

151

-rw-r--r--

lib/Transforms/Scalar/ScalarReplAggregates.cpp

-rw-r--r--

lib/Transforms/Scalar/SimplifyLibCalls.cpp

-rw-r--r--

lib/Transforms/Scalar/TailRecursionElimination.cpp

-rw-r--r--

lib/Transforms/Utils/Local.cpp

-rw-r--r--

lib/Transforms/Utils/LowerAllocations.cpp

-rw-r--r--

lib/Transforms/Utils/SimplifyCFG.cpp

18 files changed, 555 insertions, 352 deletions

diff --git a/lib/Transforms/IPO/CMakeLists.txt b/lib/Transforms/IPO/CMakeLists.txt
index 4b85e1388a68..1438b4879d2b 100644
--- a/lib/Transforms/IPO/CMakeLists.txt
+++ b/lib/Transforms/IPO/CMakeLists.txt

@@ -16,6 +16,7 @@ add_llvm_library(LLVMipo

LoopExtractor.cpp

LowerSetJmp.cpp

MergeFunctions.cpp

+ PartialInlining.cpp

PartialSpecialization.cpp

PruneEH.cpp

RaiseAllocations.cpp

diff --git a/lib/Transforms/IPO/GlobalOpt.cpp b/lib/Transforms/IPO/GlobalOpt.cpp
index 9a1b29419077..cbf3a1d827a9 100644
--- a/lib/Transforms/IPO/GlobalOpt.cpp
+++ b/lib/Transforms/IPO/GlobalOpt.cpp

@@ -1667,11 +1667,14 @@ bool GlobalOpt::ProcessInternalGlobal(GlobalVariable *GV,

// NOTE: It doesn't make sense to promote non single-value types since we

// are just replacing static memory to stack memory.

+ //

+ // If the global is in different address space, don't bring it to stack.

if (!GS.HasMultipleAccessingFunctions &&

GS.AccessingFunction && !GS.HasNonInstructionUser &&

GV->getType()->getElementType()->isSingleValueType() &&

GS.AccessingFunction->getName() == "main" &&

- GS.AccessingFunction->hasExternalLinkage()) {

+ GS.AccessingFunction->hasExternalLinkage() &&

+ GV->getType()->getAddressSpace() == 0) {

DOUT << "LOCALIZING GLOBAL: " << *GV;

Instruction* FirstI = GS.AccessingFunction->getEntryBlock().begin();

const Type* ElemTy = GV->getType()->getElementType();

diff --git a/lib/Transforms/IPO/PartialInlining.cpp b/lib/Transforms/IPO/PartialInlining.cpp
index b3a2554039a1..0b975ae49979 100644
--- a/lib/Transforms/IPO/PartialInlining.cpp
+++ b/lib/Transforms/IPO/PartialInlining.cpp

@@ -20,10 +20,13 @@

#include "llvm/Analysis/Dominators.h"

#include "llvm/Transforms/Utils/Cloning.h"

#include "llvm/Transforms/Utils/FunctionUtils.h"

+#include "llvm/ADT/Statistic.h"

#include "llvm/Support/Compiler.h"

#include "llvm/Support/CFG.h"

using namespace llvm;

+STATISTIC(NumPartialInlined, "Number of functions partially inlined");

namespace {

struct VISIBILITY_HIDDEN PartialInliner : public ModulePass {

virtual void getAnalysisUsage(AnalysisUsage &AU) const { }

@@ -132,6 +135,8 @@ Function* PartialInliner::unswitchFunction(Function* F) {

duplicateFunction->replaceAllUsesWith(F);

duplicateFunction->eraseFromParent();

+ ++NumPartialInlined;

return extractedFunction;

}

diff --git a/lib/Transforms/IPO/RaiseAllocations.cpp b/lib/Transforms/IPO/RaiseAllocations.cpp
index a81bbdb3c53d..8c97b5d17c08 100644
--- a/lib/Transforms/IPO/RaiseAllocations.cpp
+++ b/lib/Transforms/IPO/RaiseAllocations.cpp

@@ -82,14 +82,14 @@ void RaiseAllocations::doInitialization(Module &M) {

// Chck to see if we got the expected malloc

if (TyWeHave != Malloc1Type) {

- // Check to see if the prototype is wrong, giving us sbyte*(uint) * malloc

+ // Check to see if the prototype is wrong, giving us i8*(i32) * malloc

// This handles the common declaration of: 'void *malloc(unsigned);'

const FunctionType *Malloc2Type =

FunctionType::get(PointerType::getUnqual(Type::Int8Ty),

std::vector<const Type*>(1, Type::Int32Ty), false);

if (TyWeHave != Malloc2Type) {

// Check to see if the prototype is missing, giving us

- // sbyte*(...) * malloc

+ // i8*(...) * malloc

// This handles the common declaration of: 'void *malloc();'

const FunctionType *Malloc3Type =

FunctionType::get(PointerType::getUnqual(Type::Int8Ty),

diff --git a/lib/Transforms/Instrumentation/RSProfiling.cpp b/lib/Transforms/Instrumentation/RSProfiling.cpp
index c6cf4dfd6ebf..b110f4eb368b 100644
--- a/lib/Transforms/Instrumentation/RSProfiling.cpp
+++ b/lib/Transforms/Instrumentation/RSProfiling.cpp

@@ -108,9 +108,9 @@ namespace {

class VISIBILITY_HIDDEN GlobalRandomCounter : public Chooser {

GlobalVariable* Counter;

Value* ResetValue;

- const Type* T;

+ const IntegerType* T;

public:

- GlobalRandomCounter(Module& M, const Type* t, uint64_t resetval);

+ GlobalRandomCounter(Module& M, const IntegerType* t, uint64_t resetval);

virtual ~GlobalRandomCounter();

virtual void PrepFunction(Function* F);

virtual void ProcessChoicePoint(BasicBlock* bb);

@@ -121,9 +121,9 @@ namespace {

GlobalVariable* Counter;

Value* ResetValue;

AllocaInst* AI;

- const Type* T;

+ const IntegerType* T;

public:

- GlobalRandomCounterOpt(Module& M, const Type* t, uint64_t resetval);

+ GlobalRandomCounterOpt(Module& M, const IntegerType* t, uint64_t resetval);

virtual ~GlobalRandomCounterOpt();

virtual void PrepFunction(Function* F);

virtual void ProcessChoicePoint(BasicBlock* bb);

@@ -193,7 +193,7 @@ static void getBackEdges(Function& F, T& BackEdges);

// Methods of choosing when to profile

///////////////////////////////////////

-GlobalRandomCounter::GlobalRandomCounter(Module& M, const Type* t,

+GlobalRandomCounter::GlobalRandomCounter(Module& M, const IntegerType* t,

uint64_t resetval) : T(t) {

ConstantInt* Init = ConstantInt::get(T, resetval);

ResetValue = Init;

@@ -229,7 +229,7 @@ void GlobalRandomCounter::ProcessChoicePoint(BasicBlock* bb) {

ReplacePhiPred(oldnext, bb, resetblock);

}

-GlobalRandomCounterOpt::GlobalRandomCounterOpt(Module& M, const Type* t,

+GlobalRandomCounterOpt::GlobalRandomCounterOpt(Module& M, const IntegerType* t,

uint64_t resetval)

: AI(0), T(t) {

ConstantInt* Init = ConstantInt::get(T, resetval);

diff --git a/lib/Transforms/Scalar/CodeGenPrepare.cpp b/lib/Transforms/Scalar/CodeGenPrepare.cpp
index 42978e753d16..e9bee6408fe3 100644
--- a/lib/Transforms/Scalar/CodeGenPrepare.cpp
+++ b/lib/Transforms/Scalar/CodeGenPrepare.cpp

@@ -401,8 +401,8 @@ static void SplitEdgeNicely(TerminatorInst *TI, unsigned SuccNum,

/// OptimizeNoopCopyExpression - If the specified cast instruction is a noop

-/// copy (e.g. it's casting from one pointer type to another, int->uint, or

-/// int->sbyte on PPC), sink it into user blocks to reduce the number of virtual

+/// copy (e.g. it's casting from one pointer type to another, i32->i8 on PPC),

+/// sink it into user blocks to reduce the number of virtual

/// registers that must be created and coalesced.

///

/// Return true if any changes are made.

diff --git a/lib/Transforms/Scalar/GVN.cpp b/lib/Transforms/Scalar/GVN.cpp
index 673d38b7f3ae..f4a989844478 100644
--- a/lib/Transforms/Scalar/GVN.cpp
+++ b/lib/Transforms/Scalar/GVN.cpp

@@ -37,6 +37,7 @@

#include "llvm/Support/Compiler.h"

#include "llvm/Support/Debug.h"

#include "llvm/Transforms/Utils/BasicBlockUtils.h"

+#include "llvm/Transforms/Utils/Local.h"

#include <cstdio>

using namespace llvm;

@@ -48,7 +49,7 @@ STATISTIC(NumPRELoad, "Number of loads PRE'd");

static cl::opt<bool> EnablePRE("enable-pre",

cl::init(true), cl::Hidden);

-cl::opt<bool> EnableLoadPRE("enable-load-pre", cl::init(true));

+static cl::opt<bool> EnableLoadPRE("enable-load-pre", cl::init(true));

//===----------------------------------------------------------------------===//

// ValueTable Class

@@ -952,8 +953,14 @@ bool GVN::processNonLocalLoad(LoadInst *LI,

// If we had a phi translation failure, we'll have a single entry which is a

// clobber in the current block. Reject this early.

- if (Deps.size() == 1 && Deps[0].second.isClobber())

+ if (Deps.size() == 1 && Deps[0].second.isClobber()) {

+ DEBUG(

+ DOUT << "GVN: non-local load ";

+ WriteAsOperand(*DOUT.stream(), LI);

+ DOUT << " is clobbered by " << *Deps[0].second.getInst();

+ );

return false;

+ }

// Filter out useless results (non-locals, etc). Keep track of the blocks

// where we have a value available in repl, also keep track of whether we see

@@ -1069,6 +1076,7 @@ bool GVN::processNonLocalLoad(LoadInst *LI,

BasicBlock *TmpBB = LoadBB;

bool isSinglePred = false;

+ bool allSingleSucc = true;

while (TmpBB->getSinglePredecessor()) {

isSinglePred = true;

TmpBB = TmpBB->getSinglePredecessor();

@@ -1078,6 +1086,8 @@ bool GVN::processNonLocalLoad(LoadInst *LI,

return false;

if (Blockers.count(TmpBB))

return false;

+ if (TmpBB->getTerminator()->getNumSuccessors() != 1)

+ allSingleSucc = false;

}

assert(TmpBB);

@@ -1154,7 +1164,20 @@ bool GVN::processNonLocalLoad(LoadInst *LI,

<< UnavailablePred->getName() << "': " << *LI);

return false;

}

+ // Make sure it is valid to move this load here. We have to watch out for:

+ // @1 = getelementptr (i8* p, ...

+ // test p and branch if == 0

+ // load @1

+ // It is valid to have the getelementptr before the test, even if p can be 0,

+ // as getelementptr only does address arithmetic.

+ // If we are not pushing the value through any multiple-successor blocks

+ // we do not have this case. Otherwise, check that the load is safe to

+ // put anywhere; this can be improved, but should be conservatively safe.

+ if (!allSingleSucc &&

+ !isSafeToLoadUnconditionally(LoadPtr, UnavailablePred->getTerminator()))

+ return false;

// Okay, we can eliminate this load by inserting a reload in the predecessor

// and using PHI construction to get the value in the other predecessors, do

// it.

diff --git a/lib/Transforms/Scalar/IndVarSimplify.cpp b/lib/Transforms/Scalar/IndVarSimplify.cpp
index 38b11985519f..326fb38909b5 100644
--- a/lib/Transforms/Scalar/IndVarSimplify.cpp
+++ b/lib/Transforms/Scalar/IndVarSimplify.cpp

@@ -17,7 +17,10 @@

// which starts at zero and steps by one.

// 2. The canonical induction variable is guaranteed to be the first PHI node

// in the loop header block.

-// 3. Any pointer arithmetic recurrences are raised to use array subscripts.

+// 3. The canonical induction variable is guaranteed to be in a wide enough

+// type so that IV expressions need not be (directly) zero-extended or

+// sign-extended.

+// 4. Any pointer arithmetic recurrences are raised to use array subscripts.

// If the trip count of a loop is computable, this pass also makes the following

// changes:

@@ -296,11 +299,11 @@ void IndVarSimplify::RewriteLoopExitValues(Loop *L,

// If this instruction is dead now, delete it.

RecursivelyDeleteTriviallyDeadInstructions(Inst);

- // See if this is a single-entry LCSSA PHI node. If so, we can (and

- // have to) remove

- // the PHI entirely. This is safe, because the NewVal won't be variant

+ // If we're inserting code into the exit block rather than the

+ // preheader, we can (and have to) remove the PHI entirely.

+ // This is safe, because the NewVal won't be variant

// in the loop, so we don't need an LCSSA phi node anymore.

- if (NumPreds == 1) {

+ if (ExitBlocks.size() == 1) {

PN->replaceAllUsesWith(ExitVal);

RecursivelyDeleteTriviallyDeadInstructions(PN);

break;

diff --git a/lib/Transforms/Scalar/InstructionCombining.cpp b/lib/Transforms/Scalar/InstructionCombining.cpp
index 5465e4a88466..5bd17e0737b2 100644
--- a/lib/Transforms/Scalar/InstructionCombining.cpp
+++ b/lib/Transforms/Scalar/InstructionCombining.cpp

@@ -390,7 +390,7 @@ namespace {

Value *EvaluateInDifferentType(Value *V, const Type *Ty, bool isSigned);

- bool CanEvaluateInDifferentType(Value *V, const IntegerType *Ty,

+ bool CanEvaluateInDifferentType(Value *V, const Type *Ty,

unsigned CastOpc, int &NumCastsRemoved);

unsigned GetOrEnforceKnownAlignment(Value *V,

unsigned PrefAlign = 0);

@@ -654,30 +654,12 @@ static unsigned getOpcode(const Value *V) {

}

/// AddOne - Add one to a ConstantInt

-static ConstantInt *AddOne(ConstantInt *C) {

- APInt Val(C->getValue());

- return ConstantInt::get(++Val);

+static Constant *AddOne(Constant *C) {

+ return ConstantExpr::getAdd(C, ConstantInt::get(C->getType(), 1));

}

/// SubOne - Subtract one from a ConstantInt

-static ConstantInt *SubOne(ConstantInt *C) {

- APInt Val(C->getValue());

- return ConstantInt::get(--Val);

-/// Add - Add two ConstantInts together

-static ConstantInt *Add(ConstantInt *C1, ConstantInt *C2) {

- return ConstantInt::get(C1->getValue() + C2->getValue());

-/// And - Bitwise AND two ConstantInts together

-static ConstantInt *And(ConstantInt *C1, ConstantInt *C2) {

- return ConstantInt::get(C1->getValue() & C2->getValue());

-/// Subtract - Subtract one ConstantInt from another

-static ConstantInt *Subtract(ConstantInt *C1, ConstantInt *C2) {

- return ConstantInt::get(C1->getValue() - C2->getValue());

-/// Multiply - Multiply two ConstantInts together

-static ConstantInt *Multiply(ConstantInt *C1, ConstantInt *C2) {

- return ConstantInt::get(C1->getValue() * C2->getValue());

+static Constant *SubOne(ConstantInt *C) {

+ return ConstantExpr::getSub(C, ConstantInt::get(C->getType(), 1));

}

/// MultiplyOverflows - True if the multiply can not be expressed in an int

/// this size.

@@ -774,7 +756,7 @@ static void ComputeUnsignedMinMaxValuesFromKnownBits(const APInt &KnownZero,

/// SimplifyDemandedBits knows about. See if the instruction has any

/// properties that allow us to simplify its operands.

bool InstCombiner::SimplifyDemandedInstructionBits(Instruction &Inst) {

- unsigned BitWidth = cast<IntegerType>(Inst.getType())->getBitWidth();

+ unsigned BitWidth = Inst.getType()->getScalarSizeInBits();

APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0);

APInt DemandedMask(APInt::getAllOnesValue(BitWidth));

@@ -830,13 +812,13 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,

const Type *VTy = V->getType();

assert((TD || !isa<PointerType>(VTy)) &&

"SimplifyDemandedBits needs to know bit widths!");

- assert((!TD || TD->getTypeSizeInBits(VTy) == BitWidth) &&

- (!isa<IntegerType>(VTy) ||

- VTy->getPrimitiveSizeInBits() == BitWidth) &&

+ assert((!TD || TD->getTypeSizeInBits(VTy->getScalarType()) == BitWidth) &&

+ (!VTy->isIntOrIntVector() ||

+ VTy->getScalarSizeInBits() == BitWidth) &&

KnownZero.getBitWidth() == BitWidth &&

KnownOne.getBitWidth() == BitWidth &&

- "Value *V, DemandedMask, KnownZero and KnownOne \

- must have same BitWidth");

+ "Value *V, DemandedMask, KnownZero and KnownOne "

+ "must have same BitWidth");

if (ConstantInt *CI = dyn_cast<ConstantInt>(V)) {

// We know all of the bits for a constant!

KnownOne = CI->getValue() & DemandedMask;

@@ -1089,7 +1071,7 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,

RHSKnownZero &= LHSKnownZero;

break;

case Instruction::Trunc: {

- unsigned truncBf = I->getOperand(0)->getType()->getPrimitiveSizeInBits();

+ unsigned truncBf = I->getOperand(0)->getType()->getScalarSizeInBits();

DemandedMask.zext(truncBf);

RHSKnownZero.zext(truncBf);

RHSKnownOne.zext(truncBf);

@@ -1112,7 +1094,7 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,

break;

case Instruction::ZExt: {

// Compute the bits in the result that are not present in the input.

- unsigned SrcBitWidth =I->getOperand(0)->getType()->getPrimitiveSizeInBits();

+ unsigned SrcBitWidth =I->getOperand(0)->getType()->getScalarSizeInBits();

DemandedMask.trunc(SrcBitWidth);

RHSKnownZero.trunc(SrcBitWidth);

@@ -1130,7 +1112,7 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,

}

case Instruction::SExt: {

// Compute the bits in the result that are not present in the input.

- unsigned SrcBitWidth =I->getOperand(0)->getType()->getPrimitiveSizeInBits();

+ unsigned SrcBitWidth =I->getOperand(0)->getType()->getScalarSizeInBits();

APInt InputDemandedBits = DemandedMask &

APInt::getLowBitsSet(BitWidth, SrcBitWidth);

@@ -1354,7 +1336,7 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,

if (ConstantInt *Rem = dyn_cast<ConstantInt>(I->getOperand(1))) {

APInt RA = Rem->getValue().abs();

if (RA.isPowerOf2()) {

- if (DemandedMask.ule(RA)) // srem won't affect demanded bits

+ if (DemandedMask.ult(RA)) // srem won't affect demanded bits

return I->getOperand(0);

APInt LowBits = RA - 1;

@@ -2087,7 +2069,7 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) {

// See if SimplifyDemandedBits can simplify this. This handles stuff like

// (X & 254)+1 -> (X&254)|1

- if (!isa<VectorType>(I.getType()) && SimplifyDemandedInstructionBits(I))

+ if (SimplifyDemandedInstructionBits(I))

return &I;

// zext(i1) - 1 -> select i1, 0, -1

@@ -2107,7 +2089,7 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) {

Value *XorLHS = 0;

if (isa<ConstantInt>(RHSC) &&

match(LHS, m_Xor(m_Value(XorLHS), m_ConstantInt(XorRHS)))) {

- uint32_t TySizeBits = I.getType()->getPrimitiveSizeInBits();

+ uint32_t TySizeBits = I.getType()->getScalarSizeInBits();

const APInt& RHSVal = cast<ConstantInt>(RHSC)->getValue();

uint32_t Size = TySizeBits / 2;

@@ -2197,7 +2179,7 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) {

// X*C1 + X*C2 --> X * (C1+C2)

ConstantInt *C1;

if (X == dyn_castFoldableMul(RHS, C1))

- return BinaryOperator::CreateMul(X, Add(C1, C2));

+ return BinaryOperator::CreateMul(X, ConstantExpr::getAdd(C1, C2));

}

// X + X*C --> X * (C+1)

@@ -2262,7 +2244,7 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) {

// (X & FF00) + xx00 -> (X+xx00) & FF00

if (LHS->hasOneUse() && match(LHS, m_And(m_Value(X), m_ConstantInt(C2)))) {

- Constant *Anded = And(CRHS, C2);

+ Constant *Anded = ConstantExpr::getAnd(CRHS, C2);

if (Anded == CRHS) {

// See if all bits from the first bit set in the Add RHS up are included

// in the mask. First, get the rightmost bit.

@@ -2290,7 +2272,7 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) {

}

// add (cast *A to intptrtype) B ->

- // cast (GEP (cast *A to sbyte*) B) --> intptrtype

+ // cast (GEP (cast *A to i8*) B) --> intptrtype

{

CastInst *CI = dyn_cast<CastInst>(LHS);

Value *Other = RHS;

@@ -2299,7 +2281,7 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) {

Other = LHS;

}

if (CI && CI->getType()->isSized() &&

- (CI->getType()->getPrimitiveSizeInBits() ==

+ (CI->getType()->getScalarSizeInBits() ==

TD->getIntPtrType()->getPrimitiveSizeInBits())

&& isa<PointerType>(CI->getOperand(0)->getType())) {

unsigned AS =

@@ -2523,7 +2505,7 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) {

else if (ConstantInt *CI1 = dyn_cast<ConstantInt>(I.getOperand(0))) {

if (ConstantInt *CI2 = dyn_cast<ConstantInt>(Op1I->getOperand(1)))

// C1-(X+C2) --> (C1-C2)-X

- return BinaryOperator::CreateSub(Subtract(CI1, CI2),

+ return BinaryOperator::CreateSub(ConstantExpr::getSub(CI1, CI2),

Op1I->getOperand(0));

}

@@ -2564,7 +2546,8 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) {

// X - X*C --> X * (1-C)

ConstantInt *C2 = 0;

if (dyn_castFoldableMul(Op1I, C2) == Op0) {

- Constant *CP1 = Subtract(ConstantInt::get(I.getType(), 1), C2);

+ Constant *CP1 = ConstantExpr::getSub(ConstantInt::get(I.getType(), 1),

+ C2);

return BinaryOperator::CreateMul(Op0, CP1);

}

@@ -2589,7 +2572,7 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) {

ConstantInt *C2; // X*C1 - X*C2 -> X * (C1-C2)

if (X == dyn_castFoldableMul(Op1, C2))

- return BinaryOperator::CreateMul(X, Subtract(C1, C2));

+ return BinaryOperator::CreateMul(X, ConstantExpr::getSub(C1, C2));

}

return 0;

}

@@ -2950,12 +2933,12 @@ Instruction *InstCombiner::commonIDivTransforms(BinaryOperator &I) {

// (sdiv X, X) --> 1 (udiv X, X) --> 1

if (Op0 == Op1) {

if (const VectorType *Ty = dyn_cast<VectorType>(I.getType())) {

- ConstantInt *CI = ConstantInt::get(Ty->getElementType(), 1);

+ Constant *CI = ConstantInt::get(Ty->getElementType(), 1);

std::vector<Constant*> Elts(Ty->getNumElements(), CI);

return ReplaceInstUsesWith(I, ConstantVector::get(Elts));

}

- ConstantInt *CI = ConstantInt::get(I.getType(), 1);

+ Constant *CI = ConstantInt::get(I.getType(), 1);

return ReplaceInstUsesWith(I, CI);

}

@@ -2980,7 +2963,7 @@ Instruction *InstCombiner::commonIDivTransforms(BinaryOperator &I) {

return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType()));

else

return BinaryOperator::Create(I.getOpcode(), LHS->getOperand(0),

- Multiply(RHS, LHSRHS));

+ ConstantExpr::getMul(RHS, LHSRHS));

}

if (!RHS->isZero()) { // avoid X udiv 0

@@ -3513,7 +3496,7 @@ Instruction *InstCombiner::OptAndOp(Instruction *Op,

Value *X = Op->getOperand(0);

Constant *Together = 0;

if (!Op->isShift())

- Together = And(AndRHS, OpRHS);

+ Together = ConstantExpr::getAnd(AndRHS, OpRHS);

switch (Op->getOpcode()) {

case Instruction::Xor:

@@ -3724,7 +3707,7 @@ Value *InstCombiner::FoldLogicalPlusAnd(Value *LHS, Value *RHS,

switch (LHSI->getOpcode()) {

default: return 0;

case Instruction::And:

- if (And(N, Mask) == Mask) {

+ if (ConstantExpr::getAnd(N, Mask) == Mask) {

// If the AndRHS is a power of two minus one (0+1+), this is simple.

if ((Mask->getValue().countLeadingZeros() +

Mask->getValue().countPopulation()) ==

@@ -3748,7 +3731,7 @@ Value *InstCombiner::FoldLogicalPlusAnd(Value *LHS, Value *RHS,

// If the AndRHS is a power of two minus one (0+1+), and N&Mask == 0

if ((Mask->getValue().countLeadingZeros() +

Mask->getValue().countPopulation()) == Mask->getValue().getBitWidth()

- && And(N, Mask)->isZero())

+ && ConstantExpr::getAnd(N, Mask)->isNullValue())

break;

return 0;

}

@@ -3946,10 +3929,9 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) {

// See if we can simplify any instructions used by the instruction whose sole

// purpose is to compute bits we don't care about.

- if (!isa<VectorType>(I.getType())) {

- if (SimplifyDemandedInstructionBits(I))

- return &I;

- } else {

+ if (SimplifyDemandedInstructionBits(I))

+ return &I;

+ if (isa<VectorType>(I.getType())) {

if (ConstantVector *CP = dyn_cast<ConstantVector>(Op1)) {

if (CP->isAllOnesValue()) // X & <-1,-1> -> X

return ReplaceInstUsesWith(I, I.getOperand(0));

@@ -3957,7 +3939,7 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) {

return ReplaceInstUsesWith(I, Op1); // X & <0,0> -> <0,0>

}

if (ConstantInt *AndRHS = dyn_cast<ConstantInt>(Op1)) {

const APInt& AndRHSMask = AndRHS->getValue();

APInt NotAndRHS(~AndRHSMask);

@@ -4510,7 +4492,7 @@ Instruction *InstCombiner::FoldOrOfICmps(Instruction &I,

Instruction *Add = BinaryOperator::CreateAdd(Val, AddCST,

Val->getName()+".off");

InsertNewInstBefore(Add, I);

- AddCST = Subtract(AddOne(RHSCst), LHSCst);

+ AddCST = ConstantExpr::getSub(AddOne(RHSCst), LHSCst);

return new ICmpInst(ICmpInst::ICMP_ULT, Add, AddCST);

}

break; // (X == 13 | X == 15) -> no change

@@ -4653,18 +4635,17 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) {

// See if we can simplify any instructions used by the instruction whose sole

// purpose is to compute bits we don't care about.

- if (!isa<VectorType>(I.getType())) {

- if (SimplifyDemandedInstructionBits(I))

- return &I;

- } else if (isa<ConstantAggregateZero>(Op1)) {

- return ReplaceInstUsesWith(I, Op0); // X | <0,0> -> X

- } else if (ConstantVector *CP = dyn_cast<ConstantVector>(Op1)) {

- if (CP->isAllOnesValue()) // X | <-1,-1> -> <-1,-1>

- return ReplaceInstUsesWith(I, I.getOperand(1));

+ if (SimplifyDemandedInstructionBits(I))

+ return &I;

+ if (isa<VectorType>(I.getType())) {

+ if (isa<ConstantAggregateZero>(Op1)) {

+ return ReplaceInstUsesWith(I, Op0); // X | <0,0> -> X

+ } else if (ConstantVector *CP = dyn_cast<ConstantVector>(Op1)) {

+ if (CP->isAllOnesValue()) // X | <-1,-1> -> <-1,-1>

+ return ReplaceInstUsesWith(I, I.getOperand(1));

+ }

}

// or X, -1 == -1

if (ConstantInt *RHS = dyn_cast<ConstantInt>(Op1)) {

ConstantInt *C1 = 0; Value *X = 0;

@@ -4991,12 +4972,11 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) {

// See if we can simplify any instructions used by the instruction whose sole

// purpose is to compute bits we don't care about.

- if (!isa<VectorType>(I.getType())) {

- if (SimplifyDemandedInstructionBits(I))

- return &I;

- } else if (isa<ConstantAggregateZero>(Op1)) {

- return ReplaceInstUsesWith(I, Op0); // X ^ <0,0> -> X

- }

+ if (SimplifyDemandedInstructionBits(I))

+ return &I;

+ if (isa<VectorType>(I.getType()))

+ if (isa<ConstantAggregateZero>(Op1))

+ return ReplaceInstUsesWith(I, Op0); // X ^ <0,0> -> X

// Is this a ~ operation?

if (Value *NotOp = dyn_castNotVal(&I)) {

@@ -5083,7 +5063,7 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) {

Constant *NewRHS = ConstantExpr::getOr(Op0CI, RHS);

// Anything in both C1 and C2 is known to be zero, remove it from

// NewRHS.

- Constant *CommonBits = And(Op0CI, RHS);

+ Constant *CommonBits = ConstantExpr::getAnd(Op0CI, RHS);

NewRHS = ConstantExpr::getAnd(NewRHS,

ConstantExpr::getNot(CommonBits));

AddToWorkList(Op0I);

@@ -5247,12 +5227,13 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) {

return Changed ? &I : 0;

}

-/// AddWithOverflow - Compute Result = In1+In2, returning true if the result

-/// overflowed for this type.

-static bool AddWithOverflow(ConstantInt *&Result, ConstantInt *In1,

- ConstantInt *In2, bool IsSigned = false) {

- Result = cast<ConstantInt>(Add(In1, In2));

+static ConstantInt *ExtractElement(Constant *V, Constant *Idx) {

+ return cast<ConstantInt>(ConstantExpr::getExtractElement(V, Idx));

+static bool HasAddOverflow(ConstantInt *Result,

+ ConstantInt *In1, ConstantInt *In2,

+ bool IsSigned) {

if (IsSigned)

if (In2->getValue().isNegative())

return Result->getValue().sgt(In1->getValue());

@@ -5262,12 +5243,32 @@ static bool AddWithOverflow(ConstantInt *&Result, ConstantInt *In1,

return Result->getValue().ult(In1->getValue());

}

-/// SubWithOverflow - Compute Result = In1-In2, returning true if the result

+/// AddWithOverflow - Compute Result = In1+In2, returning true if the result

/// overflowed for this type.

-static bool SubWithOverflow(ConstantInt *&Result, ConstantInt *In1,

- ConstantInt *In2, bool IsSigned = false) {

- Result = cast<ConstantInt>(Subtract(In1, In2));

+static bool AddWithOverflow(Constant *&Result, Constant *In1,

+ Constant *In2, bool IsSigned = false) {

+ Result = ConstantExpr::getAdd(In1, In2);

+ if (const VectorType *VTy = dyn_cast<VectorType>(In1->getType())) {

+ for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i) {

+ Constant *Idx = ConstantInt::get(Type::Int32Ty, i);

+ if (HasAddOverflow(ExtractElement(Result, Idx),

+ ExtractElement(In1, Idx),

+ ExtractElement(In2, Idx),

+ IsSigned))

+ return true;

+ }

+ return false;

+ }

+ return HasAddOverflow(cast<ConstantInt>(Result),

+ cast<ConstantInt>(In1), cast<ConstantInt>(In2),

+ IsSigned);

+static bool HasSubOverflow(ConstantInt *Result,

+ ConstantInt *In1, ConstantInt *In2,

+ bool IsSigned) {

if (IsSigned)

if (In2->getValue().isNegative())

return Result->getValue().slt(In1->getValue());

@@ -5277,6 +5278,29 @@ static bool SubWithOverflow(ConstantInt *&Result, ConstantInt *In1,

return Result->getValue().ugt(In1->getValue());

}

+/// SubWithOverflow - Compute Result = In1-In2, returning true if the result

+/// overflowed for this type.

+static bool SubWithOverflow(Constant *&Result, Constant *In1,

+ Constant *In2, bool IsSigned = false) {

+ Result = ConstantExpr::getSub(In1, In2);

+ if (const VectorType *VTy = dyn_cast<VectorType>(In1->getType())) {

+ for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i) {

+ Constant *Idx = ConstantInt::get(Type::Int32Ty, i);

+ if (HasSubOverflow(ExtractElement(Result, Idx),

+ ExtractElement(In1, Idx),

+ ExtractElement(In2, Idx),

+ IsSigned))

+ return true;

+ }

+ return false;

+ }

+ return HasSubOverflow(cast<ConstantInt>(Result),

+ cast<ConstantInt>(In1), cast<ConstantInt>(In2),

+ IsSigned);

/// EmitGEPOffset - Given a getelementptr instruction/constantexpr, emit the

/// code necessary to compute the offset from the base pointer (without adding

/// in the base pointer). Return the result as a signed integer of intptr size.

@@ -5589,7 +5613,7 @@ Instruction *InstCombiner::FoldFCmp_IntToFP_Cst(FCmpInst &I,

// Check to see that the input is converted from an integer type that is small

// enough that preserves all bits. TODO: check here for "known" sign bits.

// This would allow us to handle (fptosi (x >>s 62) to float) if x is i64 f.e.

- unsigned InputSize = LHSI->getOperand(0)->getType()->getPrimitiveSizeInBits();

+ unsigned InputSize = LHSI->getOperand(0)->getType()->getScalarSizeInBits();

// If this is a uitofp instruction, we need an extra bit to hold the sign.

bool LHSUnsigned = isa<UIToFPInst>(LHSI);

@@ -5644,7 +5668,7 @@ Instruction *InstCombiner::FoldFCmp_IntToFP_Cst(FCmpInst &I,

// See if the FP constant is too large for the integer. For example,

// comparing an i8 to 300.0.

- unsigned IntWidth = IntTy->getPrimitiveSizeInBits();

+ unsigned IntWidth = IntTy->getScalarSizeInBits();

if (!LHSUnsigned) {

// If the RHS value is > SignedMax, fold the comparison. This handles +INF

@@ -5943,9 +5967,9 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {

unsigned BitWidth = 0;

if (TD)

- BitWidth = TD->getTypeSizeInBits(Ty);

- else if (isa<IntegerType>(Ty))

- BitWidth = Ty->getPrimitiveSizeInBits();

+ BitWidth = TD->getTypeSizeInBits(Ty->getScalarType());

+ else if (Ty->isIntOrIntVector())

+ BitWidth = Ty->getScalarSizeInBits();

bool isSignBit = false;

@@ -6459,7 +6483,7 @@ Instruction *InstCombiner::FoldICmpDivCst(ICmpInst &ICI, BinaryOperator *DivI,

// of form X/C1=C2. We solve for X by multiplying C1 (DivRHS) and

// C2 (CI). By solving for X we can turn this into a range check

// instead of computing a divide.

- ConstantInt *Prod = Multiply(CmpRHS, DivRHS);

+ Constant *Prod = ConstantExpr::getMul(CmpRHS, DivRHS);

// Determine if the product overflows by seeing if the product is

// not equal to the divide. Make sure we do the same kind of divide

@@ -6478,7 +6502,7 @@ Instruction *InstCombiner::FoldICmpDivCst(ICmpInst &ICI, BinaryOperator *DivI,

// overflow variable is set to 0 if it's corresponding bound variable is valid

// -1 if overflowed off the bottom end, or +1 if overflowed off the top end.

int LoOverflow = 0, HiOverflow = 0;

- ConstantInt *LoBound = 0, *HiBound = 0;

+ Constant *LoBound = 0, *HiBound = 0;

if (!DivIsSigned) { // udiv

// e.g. X/5 op 3 --> [15, 20)

@@ -6966,7 +6990,7 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI,

if (ConstantInt *BOp1C = dyn_cast<ConstantInt>(BO->getOperand(1))) {

if (BO->hasOneUse())

return new ICmpInst(ICI.getPredicate(), BO->getOperand(0),

- Subtract(RHS, BOp1C));

+ ConstantExpr::getSub(RHS, BOp1C));

} else if (RHSV == 0) {

// Replace ((add A, B) != 0) with (A != -B) if A or B is

// efficiently invertible, or if the add has just this one use.

@@ -7133,10 +7157,10 @@ Instruction *InstCombiner::visitICmpInstWithCastAndCast(ICmpInst &ICI) {

if (Res2 == CI) {

// Make sure that sign of the Cmp and the sign of the Cast are the same.

// For example, we might have:

- // %A = sext short %X to uint

- // %B = icmp ugt uint %A, 1330

+ // %A = sext i16 %X to i32

+ // %B = icmp ugt i32 %A, 1330

// It is incorrect to transform this into

- // %B = icmp ugt short %X, 1330

+ // %B = icmp ugt i16 %X, 1330

// because %A may have negative value.

// However, we allow this when the compare is EQ/NE, because they are

@@ -7210,18 +7234,16 @@ Instruction *InstCombiner::visitAShr(BinaryOperator &I) {

if (ConstantInt *CSI = dyn_cast<ConstantInt>(Op0))

if (CSI->isAllOnesValue())

return ReplaceInstUsesWith(I, CSI);

// See if we can turn a signed shr into an unsigned shr.

- if (!isa<VectorType>(I.getType())) {

- if (MaskedValueIsZero(Op0,

- APInt::getSignBit(I.getType()->getPrimitiveSizeInBits())))

- return BinaryOperator::CreateLShr(Op0, I.getOperand(1));

- // Arithmetic shifting an all-sign-bit value is a no-op.

- unsigned NumSignBits = ComputeNumSignBits(Op0);

- if (NumSignBits == Op0->getType()->getPrimitiveSizeInBits())

- return ReplaceInstUsesWith(I, Op0);

- }

+ if (MaskedValueIsZero(Op0,

+ APInt::getSignBit(I.getType()->getScalarSizeInBits())))

+ return BinaryOperator::CreateLShr(Op0, I.getOperand(1));

+ // Arithmetic shifting an all-sign-bit value is a no-op.

+ unsigned NumSignBits = ComputeNumSignBits(Op0);

+ if (NumSignBits == Op0->getType()->getScalarSizeInBits())

+ return ReplaceInstUsesWith(I, Op0);

return 0;

}

@@ -7250,7 +7272,7 @@ Instruction *InstCombiner::commonShiftTransforms(BinaryOperator &I) {

}

// See if we can fold away this shift.

- if (!isa<VectorType>(I.getType()) && SimplifyDemandedInstructionBits(I))

+ if (SimplifyDemandedInstructionBits(I))

return &I;

// Try to fold constant and into select arguments.

@@ -7271,10 +7293,10 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1,

// See if we can simplify any instructions used by the instruction whose sole

// purpose is to compute bits we don't care about.

- uint32_t TypeBits = Op0->getType()->getPrimitiveSizeInBits();

+ uint32_t TypeBits = Op0->getType()->getScalarSizeInBits();

- // shl uint X, 32 = 0 and shr ubyte Y, 9 = 0, ... just don't eliminate shr

- // of a signed value.

+ // shl i32 X, 32 = 0 and srl i8 Y, 9 = 0, ... just don't eliminate

+ // a signed shift.

if (Op1->uge(TypeBits)) {

if (I.getOpcode() != Instruction::AShr)

@@ -7320,8 +7342,8 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1,

// part of the register be zeros. Emulate this by inserting an AND to

// clear the top bits as needed. This 'and' will usually be zapped by

// other xforms later if dead.

- unsigned SrcSize = TrOp->getType()->getPrimitiveSizeInBits();

- unsigned DstSize = TI->getType()->getPrimitiveSizeInBits();

+ unsigned SrcSize = TrOp->getType()->getScalarSizeInBits();

+ unsigned DstSize = TI->getType()->getScalarSizeInBits();

APInt MaskV(APInt::getLowBitsSet(SrcSize, DstSize));

// The mask we constructed says what the trunc would do if occurring

@@ -7729,7 +7751,8 @@ Instruction *InstCombiner::PromoteCastOfAllocation(BitCastInst &CI,

// If the allocation size is constant, form a constant mul expression

Amt = ConstantInt::get(Type::Int32Ty, Scale);

if (isa<ConstantInt>(NumElements))

- Amt = Multiply(cast<ConstantInt>(NumElements), cast<ConstantInt>(Amt));

+ Amt = ConstantExpr::getMul(cast<ConstantInt>(NumElements),

+ cast<ConstantInt>(Amt));

// otherwise multiply the amount and the number of elements

else {

Instruction *Tmp = BinaryOperator::CreateMul(Amt, NumElements, "tmp");

@@ -7788,17 +7811,17 @@ Instruction *InstCombiner::PromoteCastOfAllocation(BitCastInst &CI,

/// If CastOpc is a sext or zext, we are asking if the low bits of the value can

/// bit computed in a larger type, which is then and'd or sext_in_reg'd to get

/// the final result.

-bool InstCombiner::CanEvaluateInDifferentType(Value *V, const IntegerType *Ty,

+bool InstCombiner::CanEvaluateInDifferentType(Value *V, const Type *Ty,

unsigned CastOpc,

int &NumCastsRemoved){

// We can always evaluate constants in another type.

- if (isa<ConstantInt>(V))

+ if (isa<Constant>(V))

return true;

Instruction *I = dyn_cast<Instruction>(V);

if (!I) return false;

- const IntegerType *OrigTy = cast<IntegerType>(V->getType());

+ const Type *OrigTy = V->getType();

// If this is an extension or truncate, we can often eliminate it.

if (isa<TruncInst>(I) || isa<ZExtInst>(I) || isa<SExtInst>(I)) {

@@ -7836,8 +7859,8 @@ bool InstCombiner::CanEvaluateInDifferentType(Value *V, const IntegerType *Ty,

// If we are truncating the result of this SHL, and if it's a shift of a

// constant amount, we can always perform a SHL in a smaller type.

if (ConstantInt *CI = dyn_cast<ConstantInt>(I->getOperand(1))) {

- uint32_t BitWidth = Ty->getBitWidth();

- if (BitWidth < OrigTy->getBitWidth() &&

+ uint32_t BitWidth = Ty->getScalarSizeInBits();

+ if (BitWidth < OrigTy->getScalarSizeInBits() &&

CI->getLimitedValue(BitWidth) < BitWidth)

return CanEvaluateInDifferentType(I->getOperand(0), Ty, CastOpc,

NumCastsRemoved);

@@ -7848,8 +7871,8 @@ bool InstCombiner::CanEvaluateInDifferentType(Value *V, const IntegerType *Ty,

// lshr iff we know that the bits we would otherwise be shifting in are

// already zeros.

if (ConstantInt *CI = dyn_cast<ConstantInt>(I->getOperand(1))) {

- uint32_t OrigBitWidth = OrigTy->getBitWidth();

- uint32_t BitWidth = Ty->getBitWidth();

+ uint32_t OrigBitWidth = OrigTy->getScalarSizeInBits();

+ uint32_t BitWidth = Ty->getScalarSizeInBits();

if (BitWidth < OrigBitWidth &&

MaskedValueIsZero(I->getOperand(0),

APInt::getHighBitsSet(OrigBitWidth, OrigBitWidth-BitWidth)) &&

@@ -8131,8 +8154,8 @@ Instruction *InstCombiner::commonIntCastTransforms(CastInst &CI) {

Value *Src = CI.getOperand(0);

const Type *SrcTy = Src->getType();

const Type *DestTy = CI.getType();

- uint32_t SrcBitSize = SrcTy->getPrimitiveSizeInBits();

- uint32_t DestBitSize = DestTy->getPrimitiveSizeInBits();

+ uint32_t SrcBitSize = SrcTy->getScalarSizeInBits();

+ uint32_t DestBitSize = DestTy->getScalarSizeInBits();

// See if we can simplify any instructions used by the LHS whose sole

// purpose is to compute bits we don't care about.

@@ -8151,8 +8174,9 @@ Instruction *InstCombiner::commonIntCastTransforms(CastInst &CI) {

// Only do this if the dest type is a simple type, don't convert the

// expression tree to something weird like i93 unless the source is also

// strange.

- (isSafeIntegerType(DestTy) || !isSafeIntegerType(SrcI->getType())) &&

- CanEvaluateInDifferentType(SrcI, cast<IntegerType>(DestTy),

+ (isSafeIntegerType(DestTy->getScalarType()) ||

+ !isSafeIntegerType(SrcI->getType()->getScalarType())) &&

+ CanEvaluateInDifferentType(SrcI, DestTy,

CI.getOpcode(), NumCastsRemoved)) {

// If this cast is a truncate, evaluting in a different type always

// eliminates the cast, so it is always a win. If this is a zero-extension,

@@ -8350,17 +8374,18 @@ Instruction *InstCombiner::visitTrunc(TruncInst &CI) {

Value *Src = CI.getOperand(0);

const Type *Ty = CI.getType();

- uint32_t DestBitWidth = Ty->getPrimitiveSizeInBits();

- uint32_t SrcBitWidth = cast<IntegerType>(Src->getType())->getBitWidth();

+ uint32_t DestBitWidth = Ty->getScalarSizeInBits();

+ uint32_t SrcBitWidth = Src->getType()->getScalarSizeInBits();

// Canonicalize trunc x to i1 -> (icmp ne (and x, 1), 0)

- if (DestBitWidth == 1) {

+ if (DestBitWidth == 1 &&

+ isa<VectorType>(Ty) == isa<VectorType>(Src->getType())) {

Constant *One = ConstantInt::get(Src->getType(), 1);

Src = InsertNewInstBefore(BinaryOperator::CreateAnd(Src, One, "tmp"), CI);

Value *Zero = Constant::getNullValue(Src->getType());

return new ICmpInst(ICmpInst::ICMP_NE, Src, Zero);

}

// Optimize trunc(lshr(), c) to pull the shift through the truncate.

ConstantInt *ShAmtV = 0;

Value *ShiftOp = 0;

@@ -8403,7 +8428,7 @@ Instruction *InstCombiner::transformZExtICmp(ICmpInst *ICI, Instruction &CI,

Value *In = ICI->getOperand(0);

Value *Sh = ConstantInt::get(In->getType(),

- In->getType()->getPrimitiveSizeInBits()-1);

+ In->getType()->getScalarSizeInBits()-1);

In = InsertNewInstBefore(BinaryOperator::CreateLShr(In, Sh,

In->getName()+".lobit"),

CI);

@@ -8494,28 +8519,30 @@ Instruction *InstCombiner::visitZExt(ZExtInst &CI) {

// Get the sizes of the types involved. We know that the intermediate type

// will be smaller than A or C, but don't know the relation between A and C.

Value *A = CSrc->getOperand(0);

- unsigned SrcSize = A->getType()->getPrimitiveSizeInBits();

- unsigned MidSize = CSrc->getType()->getPrimitiveSizeInBits();

- unsigned DstSize = CI.getType()->getPrimitiveSizeInBits();

+ unsigned SrcSize = A->getType()->getScalarSizeInBits();

+ unsigned MidSize = CSrc->getType()->getScalarSizeInBits();

+ unsigned DstSize = CI.getType()->getScalarSizeInBits();

// If we're actually extending zero bits, then if

// SrcSize < DstSize: zext(a & mask)

// SrcSize == DstSize: a & mask

// SrcSize > DstSize: trunc(a) & mask

if (SrcSize < DstSize) {

APInt AndValue(APInt::getLowBitsSet(SrcSize, MidSize));

- Constant *AndConst = ConstantInt::get(AndValue);

+ Constant *AndConst = ConstantInt::get(A->getType(), AndValue);

Instruction *And =

BinaryOperator::CreateAnd(A, AndConst, CSrc->getName()+".mask");

InsertNewInstBefore(And, CI);

return new ZExtInst(And, CI.getType());

} else if (SrcSize == DstSize) {

APInt AndValue(APInt::getLowBitsSet(SrcSize, MidSize));

- return BinaryOperator::CreateAnd(A, ConstantInt::get(AndValue));

+ return BinaryOperator::CreateAnd(A, ConstantInt::get(A->getType(),

+ AndValue));

} else if (SrcSize > DstSize) {

Instruction *Trunc = new TruncInst(A, CI.getType(), "tmp");

InsertNewInstBefore(Trunc, CI);

APInt AndValue(APInt::getLowBitsSet(DstSize, MidSize));

- return BinaryOperator::CreateAnd(Trunc, ConstantInt::get(AndValue));

+ return BinaryOperator::CreateAnd(Trunc, ConstantInt::get(Trunc->getType(),

+ AndValue));

}

@@ -8537,6 +8564,33 @@ Instruction *InstCombiner::visitZExt(ZExtInst &CI) {

}

+ // zext(trunc(t) & C) -> (t & zext(C)).

+ if (SrcI && SrcI->getOpcode() == Instruction::And && SrcI->hasOneUse())

+ if (ConstantInt *C = dyn_cast<ConstantInt>(SrcI->getOperand(1)))

+ if (TruncInst *TI = dyn_cast<TruncInst>(SrcI->getOperand(0))) {

+ Value *TI0 = TI->getOperand(0);

+ if (TI0->getType() == CI.getType())

+ return

+ BinaryOperator::CreateAnd(TI0,

+ ConstantExpr::getZExt(C, CI.getType()));

+ }

+ // zext((trunc(t) & C) ^ C) -> ((t & zext(C)) ^ zext(C)).

+ if (SrcI && SrcI->getOpcode() == Instruction::Xor && SrcI->hasOneUse())

+ if (ConstantInt *C = dyn_cast<ConstantInt>(SrcI->getOperand(1)))

+ if (BinaryOperator *And = dyn_cast<BinaryOperator>(SrcI->getOperand(0)))

+ if (And->getOpcode() == Instruction::And && And->hasOneUse() &&

+ And->getOperand(1) == C)

+ if (TruncInst *TI = dyn_cast<TruncInst>(And->getOperand(0))) {

+ Value *TI0 = TI->getOperand(0);

+ if (TI0->getType() == CI.getType()) {

+ Constant *ZC = ConstantExpr::getZExt(C, CI.getType());

+ Instruction *NewAnd = BinaryOperator::CreateAnd(TI0, ZC, "tmp");

+ InsertNewInstBefore(NewAnd, *And);

+ return BinaryOperator::CreateXor(NewAnd, ZC);

+ }

return 0;

}

@@ -8556,9 +8610,9 @@ Instruction *InstCombiner::visitSExt(SExtInst &CI) {

// eliminate the trunc/sext pair.

if (getOpcode(Src) == Instruction::Trunc) {

Value *Op = cast<User>(Src)->getOperand(0);

- unsigned OpBits = cast<IntegerType>(Op->getType())->getBitWidth();

- unsigned MidBits = cast<IntegerType>(Src->getType())->getBitWidth();

- unsigned DestBits = cast<IntegerType>(CI.getType())->getBitWidth();

+ unsigned OpBits = Op->getType()->getScalarSizeInBits();

+ unsigned MidBits = Src->getType()->getScalarSizeInBits();

+ unsigned DestBits = CI.getType()->getScalarSizeInBits();

unsigned NumSignBits = ComputeNumSignBits(Op);

if (OpBits == DestBits) {

@@ -8599,8 +8653,8 @@ Instruction *InstCombiner::visitSExt(SExtInst &CI) {

BA == CA && isa<TruncInst>(A)) {

Value *I = cast<TruncInst>(A)->getOperand(0);

if (I->getType() == CI.getType()) {

- unsigned MidSize = Src->getType()->getPrimitiveSizeInBits();

- unsigned SrcDstSize = CI.getType()->getPrimitiveSizeInBits();

+ unsigned MidSize = Src->getType()->getScalarSizeInBits();

+ unsigned SrcDstSize = CI.getType()->getScalarSizeInBits();

unsigned ShAmt = CA->getZExtValue()+SrcDstSize-MidSize;

Constant *ShAmtV = ConstantInt::get(CI.getType(), ShAmt);

I = InsertNewInstBefore(BinaryOperator::CreateShl(I, ShAmtV,

@@ -8671,11 +8725,11 @@ Instruction *InstCombiner::visitFPTrunc(FPTruncInst &CI) {

Value *RHSTrunc = LookThroughFPExtensions(OpI->getOperand(1));

if (LHSTrunc->getType() != SrcTy &&

RHSTrunc->getType() != SrcTy) {

- unsigned DstSize = CI.getType()->getPrimitiveSizeInBits();

+ unsigned DstSize = CI.getType()->getScalarSizeInBits();

// If the source types were both smaller than the destination type of

// the cast, do this xform.

- if (LHSTrunc->getType()->getPrimitiveSizeInBits() <= DstSize &&

- RHSTrunc->getType()->getPrimitiveSizeInBits() <= DstSize) {

+ if (LHSTrunc->getType()->getScalarSizeInBits() <= DstSize &&

+ RHSTrunc->getType()->getScalarSizeInBits() <= DstSize) {

LHSTrunc = InsertCastBefore(Instruction::FPExt, LHSTrunc,

CI.getType(), CI);

RHSTrunc = InsertCastBefore(Instruction::FPExt, RHSTrunc,

@@ -8706,7 +8760,7 @@ Instruction *InstCombiner::visitFPToUI(FPToUIInst &FI) {

// 'X' value would cause an undefined result for the fptoui.

if ((isa<UIToFPInst>(OpI) || isa<SIToFPInst>(OpI)) &&

OpI->getOperand(0)->getType() == FI.getType() &&

- (int)FI.getType()->getPrimitiveSizeInBits() < /*extra bit for sign */

+ (int)FI.getType()->getScalarSizeInBits() < /*extra bit for sign */

OpI->getType()->getFPMantissaWidth())

return ReplaceInstUsesWith(FI, OpI->getOperand(0));

@@ -8726,7 +8780,7 @@ Instruction *InstCombiner::visitFPToSI(FPToSIInst &FI) {

// 'X' value would cause an undefined result for the fptoui.

if ((isa<UIToFPInst>(OpI) || isa<SIToFPInst>(OpI)) &&

OpI->getOperand(0)->getType() == FI.getType() &&

- (int)FI.getType()->getPrimitiveSizeInBits() <=

+ (int)FI.getType()->getScalarSizeInBits() <=

OpI->getType()->getFPMantissaWidth())

return ReplaceInstUsesWith(FI, OpI->getOperand(0));

@@ -8747,7 +8801,7 @@ Instruction *InstCombiner::visitPtrToInt(PtrToIntInst &CI) {

// trunc to be exposed to other transforms. Don't do this for extending

// ptrtoint's, because we don't know if the target sign or zero extends its

// pointers.

- if (CI.getType()->getPrimitiveSizeInBits() < TD->getPointerSizeInBits()) {

+ if (CI.getType()->getScalarSizeInBits() < TD->getPointerSizeInBits()) {

Value *P = InsertNewInstBefore(new PtrToIntInst(CI.getOperand(0),

TD->getIntPtrType(),

"tmp"), CI);

@@ -8763,7 +8817,7 @@ Instruction *InstCombiner::visitIntToPtr(IntToPtrInst &CI) {

// allows the trunc to be exposed to other transforms. Don't do this for

// extending inttoptr's, because we don't know if the target sign or zero

// extends to pointers.

- if (CI.getOperand(0)->getType()->getPrimitiveSizeInBits() >

+ if (CI.getOperand(0)->getType()->getScalarSizeInBits() >

TD->getPointerSizeInBits()) {

Value *P = InsertNewInstBefore(new TruncInst(CI.getOperand(0),

TD->getIntPtrType(),

@@ -9194,7 +9248,7 @@ Instruction *InstCombiner::visitSelectInstWithICmp(SelectInst &SI,

(Pred == ICmpInst::ICMP_SGT && Op1CV.isAllOnesValue())) {

Value *In = ICI->getOperand(0);

Value *Sh = ConstantInt::get(In->getType(),

- In->getType()->getPrimitiveSizeInBits()-1);

+ In->getType()->getScalarSizeInBits()-1);

In = InsertNewInstBefore(BinaryOperator::CreateAShr(In, Sh,

In->getName()+".lobit"),

*ICI);

@@ -9316,7 +9370,7 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) {

// The comparison constant and the result are not neccessarily the

// same width. Make an all-ones value by inserting a AShr.

Value *X = IC->getOperand(0);

- uint32_t Bits = X->getType()->getPrimitiveSizeInBits();

+ uint32_t Bits = X->getType()->getScalarSizeInBits();

Constant *ShAmt = ConstantInt::get(X->getType(), Bits-1);

Instruction *SRA = BinaryOperator::Create(Instruction::AShr, X,

ShAmt, "ones");

@@ -10850,8 +10904,8 @@ Instruction *InstCombiner::visitPHINode(PHINode &PN) {

static Value *InsertCastToIntPtrTy(Value *V, const Type *DTy,

Instruction *InsertPoint,

InstCombiner *IC) {

- unsigned PtrSize = DTy->getPrimitiveSizeInBits();

- unsigned VTySize = V->getType()->getPrimitiveSizeInBits();

+ unsigned PtrSize = DTy->getScalarSizeInBits();

+ unsigned VTySize = V->getType()->getScalarSizeInBits();

// We must cast correctly to the pointer type. Ensure that we

// sign extend the integer value if it is smaller as this is

// used for address computation.

@@ -10892,7 +10946,7 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {

const Type *SrcTy = CI->getOperand(0)->getType();

// We can eliminate a cast from i32 to i64 iff the target

// is a 32-bit pointer target.

- if (SrcTy->getPrimitiveSizeInBits() >= TD->getPointerSizeInBits()) {

+ if (SrcTy->getScalarSizeInBits() >= TD->getPointerSizeInBits()) {

MadeChange = true;

*i = CI->getOperand(0);

}

@@ -11105,7 +11159,7 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {

ConstantInt *Scale = 0;

if (ArrayEltSize == 1) {

NewIdx = GEP.getOperand(1);

- Scale = ConstantInt::get(NewIdx->getType(), 1);

+ Scale = ConstantInt::get(cast<IntegerType>(NewIdx->getType()), 1);

} else if (ConstantInt *CI = dyn_cast<ConstantInt>(GEP.getOperand(1))) {

NewIdx = ConstantInt::get(CI->getType(), 1);

Scale = CI;

@@ -11114,7 +11168,8 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {

isa<ConstantInt>(Inst->getOperand(1))) {

ConstantInt *ShAmt = cast<ConstantInt>(Inst->getOperand(1));

uint32_t ShAmtVal = ShAmt->getLimitedValue(64);

- Scale = ConstantInt::get(Inst->getType(), 1ULL << ShAmtVal);

+ Scale = ConstantInt::get(cast<IntegerType>(Inst->getType()),

+ 1ULL << ShAmtVal);

NewIdx = Inst->getOperand(0);

} else if (Inst->getOpcode() == Instruction::Mul &&

isa<ConstantInt>(Inst->getOperand(1))) {

@@ -11390,45 +11445,6 @@ static Instruction *InstCombineLoadCast(InstCombiner &IC, LoadInst &LI,

return 0;

}

-/// isSafeToLoadUnconditionally - Return true if we know that executing a load

-/// from this value cannot trap. If it is not obviously safe to load from the

-/// specified pointer, we do a quick local scan of the basic block containing

-/// ScanFrom, to determine if the address is already accessed.

-static bool isSafeToLoadUnconditionally(Value *V, Instruction *ScanFrom) {

- // If it is an alloca it is always safe to load from.

- if (isa<AllocaInst>(V)) return true;

- // If it is a global variable it is mostly safe to load from.

- if (const GlobalValue *GV = dyn_cast<GlobalVariable>(V))

- // Don't try to evaluate aliases. External weak GV can be null.

- return !isa<GlobalAlias>(GV) && !GV->hasExternalWeakLinkage();

- // Otherwise, be a little bit agressive by scanning the local block where we

- // want to check to see if the pointer is already being loaded or stored

- // from/to. If so, the previous load or store would have already trapped,

- // so there is no harm doing an extra load (also, CSE will later eliminate

- // the load entirely).

- BasicBlock::iterator BBI = ScanFrom, E = ScanFrom->getParent()->begin();

- while (BBI != E) {

- --BBI;

- // If we see a free or a call (which might do a free) the pointer could be

- // marked invalid.

- if (isa<FreeInst>(BBI) ||

- (isa<CallInst>(BBI) && !isa<DbgInfoIntrinsic>(BBI)))

- return false;

- if (LoadInst *LI = dyn_cast<LoadInst>(BBI)) {

- if (LI->getOperand(0) == V) return true;

- } else if (StoreInst *SI = dyn_cast<StoreInst>(BBI)) {

- if (SI->getOperand(1) == V) return true;

- }

- return false;

Instruction *InstCombiner::visitLoadInst(LoadInst &LI) {

Value *Op = LI.getOperand(0);

diff --git a/lib/Transforms/Scalar/JumpThreading.cpp b/lib/Transforms/Scalar/JumpThreading.cpp
index c0ca2df1ce11..5a70fc3bc6f7 100644
--- a/lib/Transforms/Scalar/JumpThreading.cpp
+++ b/lib/Transforms/Scalar/JumpThreading.cpp

@@ -76,7 +76,7 @@ namespace {

bool ProcessBlock(BasicBlock *BB);

bool ThreadEdge(BasicBlock *BB, BasicBlock *PredBB, BasicBlock *SuccBB,

unsigned JumpThreadCost);

- BasicBlock *FactorCommonPHIPreds(PHINode *PN, Constant *CstVal);

+ BasicBlock *FactorCommonPHIPreds(PHINode *PN, Value *Val);

bool ProcessBranchOnDuplicateCond(BasicBlock *PredBB, BasicBlock *DestBB);

bool ProcessSwitchOnDuplicateCond(BasicBlock *PredBB, BasicBlock *DestBB);

@@ -163,10 +163,10 @@ void JumpThreading::FindLoopHeaders(Function &F) {

/// This is important for things like "phi i1 [true, true, false, true, x]"

/// where we only need to clone the block for the true blocks once.

///

-BasicBlock *JumpThreading::FactorCommonPHIPreds(PHINode *PN, Constant *CstVal) {

+BasicBlock *JumpThreading::FactorCommonPHIPreds(PHINode *PN, Value *Val) {

SmallVector<BasicBlock*, 16> CommonPreds;

for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)

- if (PN->getIncomingValue(i) == CstVal)

+ if (PN->getIncomingValue(i) == Val)

CommonPreds.push_back(PN->getIncomingBlock(i));

if (CommonPreds.size() == 1)

@@ -324,10 +324,6 @@ bool JumpThreading::ProcessBlock(BasicBlock *BB) {

}

- // If there is only a single predecessor of this block, nothing to fold.

- if (BB->getSinglePredecessor())

- return false;

// All the rest of our checks depend on the condition being an instruction.

if (CondInst == 0)

return false;

@@ -346,13 +342,36 @@ bool JumpThreading::ProcessBlock(BasicBlock *BB) {

CondInst->getOpcode() == Instruction::And))

return true;

- // If we have "br (phi != 42)" and the phi node has any constant values as

- // operands, we can thread through this block.

- if (CmpInst *CondCmp = dyn_cast<CmpInst>(CondInst))

- if (isa<PHINode>(CondCmp->getOperand(0)) &&

- isa<Constant>(CondCmp->getOperand(1)) &&

- ProcessBranchOnCompare(CondCmp, BB))

- return true;

+ if (CmpInst *CondCmp = dyn_cast<CmpInst>(CondInst)) {

+ if (isa<PHINode>(CondCmp->getOperand(0))) {

+ // If we have "br (phi != 42)" and the phi node has any constant values

+ // as operands, we can thread through this block.

+ //

+ // If we have "br (cmp phi, x)" and the phi node contains x such that the

+ // comparison uniquely identifies the branch target, we can thread

+ // through this block.

+ if (ProcessBranchOnCompare(CondCmp, BB))

+ return true;

+ }

+ // If we have a comparison, loop over the predecessors to see if there is

+ // a condition with the same value.

+ pred_iterator PI = pred_begin(BB), E = pred_end(BB);

+ for (; PI != E; ++PI)

+ if (BranchInst *PBI = dyn_cast<BranchInst>((*PI)->getTerminator()))

+ if (PBI->isConditional() && *PI != BB) {

+ if (CmpInst *CI = dyn_cast<CmpInst>(PBI->getCondition())) {

+ if (CI->getOperand(0) == CondCmp->getOperand(0) &&

+ CI->getOperand(1) == CondCmp->getOperand(1) &&

+ CI->getPredicate() == CondCmp->getPredicate()) {

+ // TODO: Could handle things like (x != 4) --> (x == 17)

+ if (ProcessBranchOnDuplicateCond(*PI, BB))

+ return true;

+ }

// Check for some cases that are worth simplifying. Right now we want to look

// for loads that are used by a switch or by the condition for the branch. If

@@ -770,12 +789,30 @@ bool JumpThreading::ProcessBranchOnLogical(Value *V, BasicBlock *BB,

return ThreadEdge(BB, PredBB, SuccBB, JumpThreadCost);

}

+/// GetResultOfComparison - Given an icmp/fcmp predicate and the left and right

+/// hand sides of the compare instruction, try to determine the result. If the

+/// result can not be determined, a null pointer is returned.

+static Constant *GetResultOfComparison(CmpInst::Predicate pred,

+ Value *LHS, Value *RHS) {

+ if (Constant *CLHS = dyn_cast<Constant>(LHS))

+ if (Constant *CRHS = dyn_cast<Constant>(RHS))

+ return ConstantExpr::getCompare(pred, CLHS, CRHS);

+ if (LHS == RHS)

+ if (isa<IntegerType>(LHS->getType()) || isa<PointerType>(LHS->getType()))

+ return ICmpInst::isTrueWhenEqual(pred) ?

+ ConstantInt::getTrue() : ConstantInt::getFalse();

+ return 0;

/// ProcessBranchOnCompare - We found a branch on a comparison between a phi

-/// node and a constant. If the PHI node contains any constants as inputs, we

-/// can fold the compare for that edge and thread through it.

+/// node and a value. If we can identify when the comparison is true between

+/// the phi inputs and the value, we can fold the compare for that edge and

+/// thread through it.

bool JumpThreading::ProcessBranchOnCompare(CmpInst *Cmp, BasicBlock *BB) {

PHINode *PN = cast<PHINode>(Cmp->getOperand(0));

- Constant *RHS = cast<Constant>(Cmp->getOperand(1));

+ Value *RHS = Cmp->getOperand(1);

// If the phi isn't in the current block, an incoming edge to this block

// doesn't control the destination.

@@ -784,18 +821,17 @@ bool JumpThreading::ProcessBranchOnCompare(CmpInst *Cmp, BasicBlock *BB) {

// We can do this simplification if any comparisons fold to true or false.

// See if any do.

- Constant *PredCst = 0;

+ Value *PredVal = 0;

bool TrueDirection = false;

for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {

- PredCst = dyn_cast<Constant>(PN->getIncomingValue(i));

- if (PredCst == 0) continue;

+ PredVal = PN->getIncomingValue(i);

+ Constant *Res = GetResultOfComparison(Cmp->getPredicate(), PredVal, RHS);

+ if (!Res) {

+ PredVal = 0;

+ continue;

+ }

- Constant *Res;

- if (ICmpInst *ICI = dyn_cast<ICmpInst>(Cmp))

- Res = ConstantExpr::getICmp(ICI->getPredicate(), PredCst, RHS);

- else

- Res = ConstantExpr::getFCmp(cast<FCmpInst>(Cmp)->getPredicate(),

- PredCst, RHS);

// If this folded to a constant expr, we can't do anything.

if (ConstantInt *ResC = dyn_cast<ConstantInt>(Res)) {

TrueDirection = ResC->getZExtValue();

@@ -808,11 +844,11 @@ bool JumpThreading::ProcessBranchOnCompare(CmpInst *Cmp, BasicBlock *BB) {

}

// Otherwise, we can't fold this input.

- PredCst = 0;

+ PredVal = 0;

}

// If no match, bail out.

- if (PredCst == 0)

+ if (PredVal == 0)

return false;

// See if the cost of duplicating this block is low enough.

@@ -825,7 +861,7 @@ bool JumpThreading::ProcessBranchOnCompare(CmpInst *Cmp, BasicBlock *BB) {

// If so, we can actually do this threading. Merge any common predecessors

// that will act the same.

- BasicBlock *PredBB = FactorCommonPHIPreds(PN, PredCst);

+ BasicBlock *PredBB = FactorCommonPHIPreds(PN, PredVal);

// Next, get our successor.

BasicBlock *SuccBB = BB->getTerminator()->getSuccessor(!TrueDirection);

diff --git a/lib/Transforms/Scalar/LoopIndexSplit.cpp b/lib/Transforms/Scalar/LoopIndexSplit.cpp
index 9c785968e1d4..6f7a7f866a8e 100644
--- a/lib/Transforms/Scalar/LoopIndexSplit.cpp
+++ b/lib/Transforms/Scalar/LoopIndexSplit.cpp

@@ -290,13 +290,13 @@ static bool isUsedOutsideLoop(Value *V, Loop *L) {

// Return V+1

static Value *getPlusOne(Value *V, bool Sign, Instruction *InsertPt) {

- ConstantInt *One = ConstantInt::get(V->getType(), 1, Sign);

+ Constant *One = ConstantInt::get(V->getType(), 1, Sign);

return BinaryOperator::CreateAdd(V, One, "lsp", InsertPt);

}

// Return V-1

static Value *getMinusOne(Value *V, bool Sign, Instruction *InsertPt) {

- ConstantInt *One = ConstantInt::get(V->getType(), 1, Sign);

+ Constant *One = ConstantInt::get(V->getType(), 1, Sign);

return BinaryOperator::CreateSub(V, One, "lsp", InsertPt);

}

diff --git a/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/lib/Transforms/Scalar/LoopStrengthReduce.cpp
index 944f40931910..7579748bbc0a 100644
--- a/lib/Transforms/Scalar/LoopStrengthReduce.cpp
+++ b/lib/Transforms/Scalar/LoopStrengthReduce.cpp

@@ -143,10 +143,10 @@ namespace {

/// inside the loop then try to eliminate the cast opeation.

void OptimizeShadowIV(Loop *L);

- /// OptimizeSMax - Rewrite the loop's terminating condition

- /// if it uses an smax computation.

- ICmpInst *OptimizeSMax(Loop *L, ICmpInst *Cond,

- IVStrideUse* &CondUse);

+ /// OptimizeMax - Rewrite the loop's terminating condition

+ /// if it uses a max computation.

+ ICmpInst *OptimizeMax(Loop *L, ICmpInst *Cond,

+ IVStrideUse* &CondUse);

bool FindIVUserForCond(ICmpInst *Cond, IVStrideUse *&CondUse,

const SCEVHandle *&CondStride);

@@ -336,13 +336,6 @@ namespace {

/// EmittedBase.

Value *OperandValToReplace;

- /// isSigned - The stride (and thus also the Base) of this use may be in

- /// a narrower type than the use itself (OperandValToReplace->getType()).

- /// When this is the case, the isSigned field indicates whether the

- /// IV expression should be signed-extended instead of zero-extended to

- /// fit the type of the use.

- bool isSigned;

/// Imm - The immediate value that should be added to the base immediately

/// before Inst, because it will be folded into the imm field of the

/// instruction. This is also sometimes used for loop-variant values that

@@ -363,7 +356,6 @@ namespace {

BasedUser(IVStrideUse &IVSU, ScalarEvolution *se)

: SE(se), Base(IVSU.getOffset()), Inst(IVSU.getUser()),

OperandValToReplace(IVSU.getOperandValToReplace()),

- isSigned(IVSU.isSigned()),

Imm(SE->getIntegerSCEV(0, Base->getType())),

isUseOfPostIncrementedValue(IVSU.isUseOfPostIncrementedValue()) {}

@@ -428,11 +420,6 @@ Value *BasedUser::InsertCodeForBaseAtPosition(const SCEVHandle &NewBase,

NewValSCEV = SE->getAddExpr(NewValSCEV, Imm);

}

- if (isSigned)

- NewValSCEV = SE->getTruncateOrSignExtend(NewValSCEV, Ty);

- else

- NewValSCEV = SE->getTruncateOrZeroExtend(NewValSCEV, Ty);

return Rewriter.expandCodeFor(NewValSCEV, Ty, IP);

}

@@ -592,7 +579,7 @@ static void MoveLoopVariantsToImmediateField(SCEVHandle &Val, SCEVHandle &Imm,

if (Val->isLoopInvariant(L)) return; // Nothing to do.

if (const SCEVAddExpr *SAE = dyn_cast<SCEVAddExpr>(Val)) {

- std::vector<SCEVHandle> NewOps;

+ SmallVector<SCEVHandle, 4> NewOps;

NewOps.reserve(SAE->getNumOperands());

for (unsigned i = 0; i != SAE->getNumOperands(); ++i)

@@ -613,7 +600,7 @@ static void MoveLoopVariantsToImmediateField(SCEVHandle &Val, SCEVHandle &Imm,

SCEVHandle Start = SARE->getStart();

MoveLoopVariantsToImmediateField(Start, Imm, L, SE);

- std::vector<SCEVHandle> Ops(SARE->op_begin(), SARE->op_end());

+ SmallVector<SCEVHandle, 4> Ops(SARE->op_begin(), SARE->op_end());

Ops[0] = Start;

Val = SE->getAddRecExpr(Ops, SARE->getLoop());

} else {

@@ -633,7 +620,7 @@ static void MoveImmediateValues(const TargetLowering *TLI,

bool isAddress, Loop *L,

ScalarEvolution *SE) {

if (const SCEVAddExpr *SAE = dyn_cast<SCEVAddExpr>(Val)) {

- std::vector<SCEVHandle> NewOps;

+ SmallVector<SCEVHandle, 4> NewOps;

NewOps.reserve(SAE->getNumOperands());

for (unsigned i = 0; i != SAE->getNumOperands(); ++i) {

@@ -660,7 +647,7 @@ static void MoveImmediateValues(const TargetLowering *TLI,

MoveImmediateValues(TLI, AccessTy, Start, Imm, isAddress, L, SE);

if (Start != SARE->getStart()) {

- std::vector<SCEVHandle> Ops(SARE->op_begin(), SARE->op_end());

+ SmallVector<SCEVHandle, 4> Ops(SARE->op_begin(), SARE->op_end());

Ops[0] = Start;

Val = SE->getAddRecExpr(Ops, SARE->getLoop());

}

@@ -717,7 +704,7 @@ static void MoveImmediateValues(const TargetLowering *TLI,

/// SeparateSubExprs - Decompose Expr into all of the subexpressions that are

/// added together. This is used to reassociate common addition subexprs

/// together for maximal sharing when rewriting bases.

-static void SeparateSubExprs(std::vector<SCEVHandle> &SubExprs,

+static void SeparateSubExprs(SmallVector<SCEVHandle, 16> &SubExprs,

SCEVHandle Expr,

ScalarEvolution *SE) {

if (const SCEVAddExpr *AE = dyn_cast<SCEVAddExpr>(Expr)) {

@@ -729,7 +716,7 @@ static void SeparateSubExprs(std::vector<SCEVHandle> &SubExprs,

SubExprs.push_back(Expr);

} else {

// Compute the addrec with zero as its base.

- std::vector<SCEVHandle> Ops(SARE->op_begin(), SARE->op_end());

+ SmallVector<SCEVHandle, 4> Ops(SARE->op_begin(), SARE->op_end());

Ops[0] = Zero; // Start with zero base.

SubExprs.push_back(SE->getAddRecExpr(Ops, SARE->getLoop()));

@@ -783,9 +770,9 @@ RemoveCommonExpressionsFromUseBases(std::vector<BasedUser> &Uses,

// UniqueSubExprs - Keep track of all of the subexpressions we see in the

// order we see them.

- std::vector<SCEVHandle> UniqueSubExprs;

+ SmallVector<SCEVHandle, 16> UniqueSubExprs;

- std::vector<SCEVHandle> SubExprs;

+ SmallVector<SCEVHandle, 16> SubExprs;

unsigned NumUsesInsideLoop = 0;

for (unsigned i = 0; i != NumUses; ++i) {

// If the user is outside the loop, just ignore it for base computation.

@@ -1129,11 +1116,11 @@ static bool isNonConstantNegative(const SCEVHandle &Expr) {

return SC->getValue()->getValue().isNegative();

}

-// CollectIVUsers - Transform our list of users and offsets to a bit more

-// complex table. In this new vector, each 'BasedUser' contains 'Base', the base

-// of the strided accesses, as well as the old information from Uses. We

-// progressively move information from the Base field to the Imm field, until

-// we eventually have the full access expression to rewrite the use.

+/// CollectIVUsers - Transform our list of users and offsets to a bit more

+/// complex table. In this new vector, each 'BasedUser' contains 'Base', the base

+/// of the strided accesses, as well as the old information from Uses. We

+/// progressively move information from the Base field to the Imm field, until

+/// we eventually have the full access expression to rewrite the use.

SCEVHandle LoopStrengthReduce::CollectIVUsers(const SCEVHandle &Stride,

IVUsersOfOneStride &Uses,

Loop *L,

@@ -2008,15 +1995,15 @@ ICmpInst *LoopStrengthReduce::ChangeCompareStride(Loop *L, ICmpInst *Cond,

if (!isa<PointerType>(NewCmpTy))

NewCmpRHS = ConstantInt::get(NewCmpTy, NewCmpVal);

else {

- ConstantInt *CI = ConstantInt::get(NewCmpIntTy, NewCmpVal);

+ Constant *CI = ConstantInt::get(NewCmpIntTy, NewCmpVal);

NewCmpRHS = ConstantExpr::getIntToPtr(CI, NewCmpTy);

}

NewOffset = TyBits == NewTyBits

? SE->getMulExpr(CondUse->getOffset(),

- SE->getConstant(ConstantInt::get(CmpTy, Scale)))

- : SE->getConstant(ConstantInt::get(NewCmpIntTy,

+ SE->getConstant(CmpTy, Scale))

+ : SE->getConstant(NewCmpIntTy,

cast<SCEVConstant>(CondUse->getOffset())->getValue()

- ->getSExtValue()*Scale));

+ ->getSExtValue()*Scale);

break;

}

@@ -2047,7 +2034,7 @@ ICmpInst *LoopStrengthReduce::ChangeCompareStride(Loop *L, ICmpInst *Cond,

OldCond->replaceAllUsesWith(Cond);

OldCond->eraseFromParent();

- IU->IVUsesByStride[*NewStride]->addUser(NewOffset, Cond, NewCmpLHS, false);

+ IU->IVUsesByStride[*NewStride]->addUser(NewOffset, Cond, NewCmpLHS);

CondUse = &IU->IVUsesByStride[*NewStride]->Users.back();

CondStride = NewStride;

++NumEliminated;

@@ -2057,8 +2044,8 @@ ICmpInst *LoopStrengthReduce::ChangeCompareStride(Loop *L, ICmpInst *Cond,

return Cond;

}

-/// OptimizeSMax - Rewrite the loop's terminating condition if it uses

-/// an smax computation.

+/// OptimizeMax - Rewrite the loop's terminating condition if it uses

+/// a max computation.

///

/// This is a narrow solution to a specific, but acute, problem. For loops

/// like this:

@@ -2068,10 +2055,10 @@ ICmpInst *LoopStrengthReduce::ChangeCompareStride(Loop *L, ICmpInst *Cond,

/// p[i] = 0.0;

/// } while (++i < n);

///

-/// where the comparison is signed, the trip count isn't just 'n', because

-/// 'n' could be negative. And unfortunately this can come up even for loops

-/// where the user didn't use a C do-while loop. For example, seemingly

-/// well-behaved top-test loops will commonly be lowered like this:

+/// the trip count isn't just 'n', because 'n' might not be positive. And

+/// unfortunately this can come up even for loops where the user didn't use

+/// a C do-while loop. For example, seemingly well-behaved top-test loops

+/// will commonly be lowered like this:

/// if (n > 0) {

/// i = 0;

@@ -2084,14 +2071,14 @@ ICmpInst *LoopStrengthReduce::ChangeCompareStride(Loop *L, ICmpInst *Cond,

/// test in such a way that indvars can't find it.

///

/// When indvars can't find the if test in loops like this, it creates a

-/// signed-max expression, which allows it to give the loop a canonical

+/// max expression, which allows it to give the loop a canonical

/// induction variable:

///

/// i = 0;

-/// smax = n < 1 ? 1 : n;

+/// max = n < 1 ? 1 : n;

/// do {

/// p[i] = 0.0;

-/// } while (++i != smax);

+/// } while (++i != max);

///

/// Canonical induction variables are necessary because the loop passes

/// are designed around them. The most obvious example of this is the

@@ -2107,8 +2094,8 @@ ICmpInst *LoopStrengthReduce::ChangeCompareStride(Loop *L, ICmpInst *Cond,

/// rewriting their conditions from ICMP_NE back to ICMP_SLT, and deleting

/// the instructions for the maximum computation.

///

-ICmpInst *LoopStrengthReduce::OptimizeSMax(Loop *L, ICmpInst *Cond,

- IVStrideUse* &CondUse) {

+ICmpInst *LoopStrengthReduce::OptimizeMax(Loop *L, ICmpInst *Cond,

+ IVStrideUse* &CondUse) {

// Check that the loop matches the pattern we're looking for.

if (Cond->getPredicate() != CmpInst::ICMP_EQ &&

Cond->getPredicate() != CmpInst::ICMP_NE)

@@ -2126,12 +2113,19 @@ ICmpInst *LoopStrengthReduce::OptimizeSMax(Loop *L, ICmpInst *Cond,

SCEVHandle IterationCount = SE->getAddExpr(BackedgeTakenCount, One);

// Check for a max calculation that matches the pattern.

- const SCEVSMaxExpr *SMax = dyn_cast<SCEVSMaxExpr>(IterationCount);

- if (!SMax || SMax != SE->getSCEV(Sel)) return Cond;

+ if (!isa<SCEVSMaxExpr>(IterationCount) && !isa<SCEVUMaxExpr>(IterationCount))

+ return Cond;

+ const SCEVNAryExpr *Max = cast<SCEVNAryExpr>(IterationCount);

+ if (Max != SE->getSCEV(Sel)) return Cond;

+ // To handle a max with more than two operands, this optimization would

+ // require additional checking and setup.

+ if (Max->getNumOperands() != 2)

+ return Cond;

- SCEVHandle SMaxLHS = SMax->getOperand(0);

- SCEVHandle SMaxRHS = SMax->getOperand(1);

- if (!SMaxLHS || SMaxLHS != One) return Cond;

+ SCEVHandle MaxLHS = Max->getOperand(0);

+ SCEVHandle MaxRHS = Max->getOperand(1);

+ if (!MaxLHS || MaxLHS != One) return Cond;

// Check the relevant induction variable for conformance to

// the pattern.

@@ -2148,19 +2142,23 @@ ICmpInst *LoopStrengthReduce::OptimizeSMax(Loop *L, ICmpInst *Cond,

// Check the right operand of the select, and remember it, as it will

// be used in the new comparison instruction.

Value *NewRHS = 0;

- if (SE->getSCEV(Sel->getOperand(1)) == SMaxRHS)

+ if (SE->getSCEV(Sel->getOperand(1)) == MaxRHS)

NewRHS = Sel->getOperand(1);

- else if (SE->getSCEV(Sel->getOperand(2)) == SMaxRHS)

+ else if (SE->getSCEV(Sel->getOperand(2)) == MaxRHS)

NewRHS = Sel->getOperand(2);

if (!NewRHS) return Cond;

+ // Determine the new comparison opcode. It may be signed or unsigned,

+ // and the original comparison may be either equality or inequality.

+ CmpInst::Predicate Pred =

+ isa<SCEVSMaxExpr>(Max) ? CmpInst::ICMP_SLT : CmpInst::ICMP_ULT;

+ if (Cond->getPredicate() == CmpInst::ICMP_EQ)

+ Pred = CmpInst::getInversePredicate(Pred);

// Ok, everything looks ok to change the condition into an SLT or SGE and

// delete the max calculation.

ICmpInst *NewCond =

- new ICmpInst(Cond->getPredicate() == CmpInst::ICMP_NE ?

- CmpInst::ICMP_SLT :

- CmpInst::ICMP_SGE,

- Cond->getOperand(0), NewRHS, "scmp", Cond);

+ new ICmpInst(Pred, Cond->getOperand(0), NewRHS, "scmp", Cond);

// Delete the max calculation instructions.

Cond->replaceAllUsesWith(NewCond);

@@ -2242,7 +2240,7 @@ void LoopStrengthReduce::OptimizeShadowIV(Loop *L) {

ConstantInt *Init = dyn_cast<ConstantInt>(PH->getIncomingValue(Entry));

if (!Init) continue;

- ConstantFP *NewInit = ConstantFP::get(DestTy, Init->getZExtValue());

+ Constant *NewInit = ConstantFP::get(DestTy, Init->getZExtValue());

BinaryOperator *Incr =

dyn_cast<BinaryOperator>(PH->getIncomingValue(Latch));

@@ -2266,7 +2264,7 @@ void LoopStrengthReduce::OptimizeShadowIV(Loop *L) {

PHINode *NewPH = PHINode::Create(DestTy, "IV.S.", PH);

/* create new increment. '++d' in above example. */

- ConstantFP *CFP = ConstantFP::get(DestTy, C->getZExtValue());

+ Constant *CFP = ConstantFP::get(DestTy, C->getZExtValue());

BinaryOperator *NewIncr =

BinaryOperator::Create(Incr->getOpcode() == Instruction::Add ?

Instruction::FAdd : Instruction::FSub,

@@ -2284,9 +2282,9 @@ void LoopStrengthReduce::OptimizeShadowIV(Loop *L) {

}

-// OptimizeIndvars - Now that IVUsesByStride is set up with all of the indvar

-// uses in the loop, look to see if we can eliminate some, in favor of using

-// common indvars for the different uses.

+/// OptimizeIndvars - Now that IVUsesByStride is set up with all of the indvar

+/// uses in the loop, look to see if we can eliminate some, in favor of using

+/// common indvars for the different uses.

void LoopStrengthReduce::OptimizeIndvars(Loop *L) {

// TODO: implement optzns here.

@@ -2301,11 +2299,11 @@ void LoopStrengthReduce::OptimizeLoopTermCond(Loop *L) {

// induction variable, to allow coalescing the live ranges for the IV into

// one register value.

BasicBlock *LatchBlock = L->getLoopLatch();

- BasicBlock *ExitBlock = L->getExitingBlock();

- if (!ExitBlock)

+ BasicBlock *ExitingBlock = L->getExitingBlock();

+ if (!ExitingBlock)

// Multiple exits, just look at the exit in the latch block if there is one.

- ExitBlock = LatchBlock;

- BranchInst *TermBr = dyn_cast<BranchInst>(ExitBlock->getTerminator());

+ ExitingBlock = LatchBlock;

+ BranchInst *TermBr = dyn_cast<BranchInst>(ExitingBlock->getTerminator());

if (!TermBr)

return;

if (TermBr->isUnconditional() || !isa<ICmpInst>(TermBr->getCondition()))

@@ -2318,7 +2316,7 @@ void LoopStrengthReduce::OptimizeLoopTermCond(Loop *L) {

if (!FindIVUserForCond(Cond, CondUse, CondStride))

return; // setcc doesn't use the IV.

- if (ExitBlock != LatchBlock) {

+ if (ExitingBlock != LatchBlock) {

if (!Cond->hasOneUse())

// See below, we don't want the condition to be cloned.

return;

@@ -2373,14 +2371,14 @@ void LoopStrengthReduce::OptimizeLoopTermCond(Loop *L) {

StrideNoReuse.insert(*CondStride);

}

- // If the trip count is computed in terms of an smax (due to ScalarEvolution

+ // If the trip count is computed in terms of a max (due to ScalarEvolution

// being unable to find a sufficient guard, for example), change the loop

- // comparison to use SLT instead of NE.

- Cond = OptimizeSMax(L, Cond, CondUse);

+ // comparison to use SLT or ULT instead of NE.

+ Cond = OptimizeMax(L, Cond, CondUse);

// If possible, change stride and operands of the compare instruction to

// eliminate one stride.

- if (ExitBlock == LatchBlock)

+ if (ExitingBlock == LatchBlock)

Cond = ChangeCompareStride(L, Cond, CondUse, CondStride);

// It's possible for the setcc instruction to be anywhere in the loop, and

@@ -2397,8 +2395,7 @@ void LoopStrengthReduce::OptimizeLoopTermCond(Loop *L) {

// Clone the IVUse, as the old use still exists!

IU->IVUsesByStride[*CondStride]->addUser(CondUse->getOffset(), Cond,

- CondUse->getOperandValToReplace(),

- false);

+ CondUse->getOperandValToReplace());

CondUse = &IU->IVUsesByStride[*CondStride]->Users.back();

}

@@ -2413,9 +2410,9 @@ void LoopStrengthReduce::OptimizeLoopTermCond(Loop *L) {

++NumLoopCond;

}

-// OptimizeLoopCountIV - If, after all sharing of IVs, the IV used for deciding

-// when to exit the loop is used only for that purpose, try to rearrange things

-// so it counts down to a test against zero.

+/// OptimizeLoopCountIV - If, after all sharing of IVs, the IV used for deciding

+/// when to exit the loop is used only for that purpose, try to rearrange things

+/// so it counts down to a test against zero.

void LoopStrengthReduce::OptimizeLoopCountIV(Loop *L) {

// If the number of times the loop is executed isn't computable, give up.

@@ -2506,7 +2503,7 @@ void LoopStrengthReduce::OptimizeLoopCountIV(Loop *L) {

Value *startVal = phi->getIncomingValue(inBlock);

Value *endVal = Cond->getOperand(1);

// FIXME check for case where both are constant

- ConstantInt* Zero = ConstantInt::get(Cond->getOperand(1)->getType(), 0);

+ Constant* Zero = ConstantInt::get(Cond->getOperand(1)->getType(), 0);

BinaryOperator *NewStartVal =

BinaryOperator::Create(Instruction::Sub, endVal, startVal,

"tmp", PreInsertPt);

diff --git a/lib/Transforms/Scalar/ScalarReplAggregates.cpp b/lib/Transforms/Scalar/ScalarReplAggregates.cpp
index 7143c7be485e..d89790c29217 100644
--- a/lib/Transforms/Scalar/ScalarReplAggregates.cpp
+++ b/lib/Transforms/Scalar/ScalarReplAggregates.cpp

@@ -820,10 +820,8 @@ void SROA::RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *BCInst,

StoreVal = Constant::getNullValue(EltTy); // 0.0, null, 0, <0,0>

} else {

// If EltTy is a vector type, get the element type.

- const Type *ValTy = EltTy;

- if (const VectorType *VTy = dyn_cast<VectorType>(ValTy))

- ValTy = VTy->getElementType();

+ const Type *ValTy = EltTy->getScalarType();

// Construct an integer with the right value.

unsigned EltSize = TD->getTypeSizeInBits(ValTy);

APInt OneVal(EltSize, CI->getZExtValue());

diff --git a/lib/Transforms/Scalar/SimplifyLibCalls.cpp b/lib/Transforms/Scalar/SimplifyLibCalls.cpp
index 59989c92d740..bbcb79255eef 100644
--- a/lib/Transforms/Scalar/SimplifyLibCalls.cpp
+++ b/lib/Transforms/Scalar/SimplifyLibCalls.cpp

@@ -135,7 +135,11 @@ Value *LibCallOptimization::EmitStrLen(Value *Ptr, IRBuilder<> &B) {

TD->getIntPtrType(),

PointerType::getUnqual(Type::Int8Ty),

NULL);

- return B.CreateCall(StrLen, CastToCStr(Ptr, B), "strlen");

+ CallInst *CI = B.CreateCall(StrLen, CastToCStr(Ptr, B), "strlen");

+ if (const Function *F = dyn_cast<Function>(StrLen->stripPointerCasts()))

+ CI->setCallingConv(F->getCallingConv());

+ return CI;

}

/// EmitMemCpy - Emit a call to the memcpy function to the builder. This always

@@ -164,7 +168,12 @@ Value *LibCallOptimization::EmitMemChr(Value *Ptr, Value *Val,

PointerType::getUnqual(Type::Int8Ty),

Type::Int32Ty, TD->getIntPtrType(),

NULL);

- return B.CreateCall3(MemChr, CastToCStr(Ptr, B), Val, Len, "memchr");

+ CallInst *CI = B.CreateCall3(MemChr, CastToCStr(Ptr, B), Val, Len, "memchr");

+ if (const Function *F = dyn_cast<Function>(MemChr->stripPointerCasts()))

+ CI->setCallingConv(F->getCallingConv());

+ return CI;

}

/// EmitMemCmp - Emit a call to the memcmp function.

@@ -182,8 +191,13 @@ Value *LibCallOptimization::EmitMemCmp(Value *Ptr1, Value *Ptr2,

PointerType::getUnqual(Type::Int8Ty),

TD->getIntPtrType(), NULL);

- return B.CreateCall3(MemCmp, CastToCStr(Ptr1, B), CastToCStr(Ptr2, B),

- Len, "memcmp");

+ CallInst *CI = B.CreateCall3(MemCmp, CastToCStr(Ptr1, B), CastToCStr(Ptr2, B),

+ Len, "memcmp");

+ if (const Function *F = dyn_cast<Function>(MemCmp->stripPointerCasts()))

+ CI->setCallingConv(F->getCallingConv());

+ return CI;

}

/// EmitMemSet - Emit a call to the memset function

@@ -217,20 +231,30 @@ Value *LibCallOptimization::EmitUnaryFloatFnCall(Value *Op, const char *Name,

NameBuffer[NameLen+1] = 0;

Name = NameBuffer;

}

Module *M = Caller->getParent();

- Value *Callee = M->getOrInsertFunction(Name, Op->getType(),

+ Value *Callee = M->getOrInsertFunction(Name, Op->getType(),

Op->getType(), NULL);

- return B.CreateCall(Callee, Op, Name);

+ CallInst *CI = B.CreateCall(Callee, Op, Name);

+ if (const Function *F = dyn_cast<Function>(Callee->stripPointerCasts()))

+ CI->setCallingConv(F->getCallingConv());

+ return CI;

}

/// EmitPutChar - Emit a call to the putchar function. This assumes that Char

/// is an integer.

void LibCallOptimization::EmitPutChar(Value *Char, IRBuilder<> &B) {

Module *M = Caller->getParent();

- Value *F = M->getOrInsertFunction("putchar", Type::Int32Ty,

- Type::Int32Ty, NULL);

- B.CreateCall(F, B.CreateIntCast(Char, Type::Int32Ty, "chari"), "putchar");

+ Value *PutChar = M->getOrInsertFunction("putchar", Type::Int32Ty,

+ Type::Int32Ty, NULL);

+ CallInst *CI = B.CreateCall(PutChar,

+ B.CreateIntCast(Char, Type::Int32Ty, "chari"),

+ "putchar");

+ if (const Function *F = dyn_cast<Function>(PutChar->stripPointerCasts()))

+ CI->setCallingConv(F->getCallingConv());

}

/// EmitPutS - Emit a call to the puts function. This assumes that Str is

@@ -241,10 +265,14 @@ void LibCallOptimization::EmitPutS(Value *Str, IRBuilder<> &B) {

AWI[0] = AttributeWithIndex::get(1, Attribute::NoCapture);

AWI[1] = AttributeWithIndex::get(~0u, Attribute::NoUnwind);

- Value *F = M->getOrInsertFunction("puts", AttrListPtr::get(AWI, 2),

- Type::Int32Ty,

- PointerType::getUnqual(Type::Int8Ty), NULL);

- B.CreateCall(F, CastToCStr(Str, B), "puts");

+ Value *PutS = M->getOrInsertFunction("puts", AttrListPtr::get(AWI, 2),

+ Type::Int32Ty,

+ PointerType::getUnqual(Type::Int8Ty),

+ NULL);

+ CallInst *CI = B.CreateCall(PutS, CastToCStr(Str, B), "puts");

+ if (const Function *F = dyn_cast<Function>(PutS->stripPointerCasts()))

+ CI->setCallingConv(F->getCallingConv());

}

/// EmitFPutC - Emit a call to the fputc function. This assumes that Char is

@@ -258,12 +286,14 @@ void LibCallOptimization::EmitFPutC(Value *Char, Value *File, IRBuilder<> &B) {

if (isa<PointerType>(File->getType()))

F = M->getOrInsertFunction("fputc", AttrListPtr::get(AWI, 2), Type::Int32Ty,

Type::Int32Ty, File->getType(), NULL);

else

F = M->getOrInsertFunction("fputc", Type::Int32Ty, Type::Int32Ty,

File->getType(), NULL);

Char = B.CreateIntCast(Char, Type::Int32Ty, "chari");

- B.CreateCall2(F, Char, File, "fputc");

+ CallInst *CI = B.CreateCall2(F, Char, File, "fputc");

+ if (const Function *Fn = dyn_cast<Function>(F->stripPointerCasts()))

+ CI->setCallingConv(Fn->getCallingConv());

}

/// EmitFPutS - Emit a call to the puts function. Str is required to be a

@@ -283,7 +313,10 @@ void LibCallOptimization::EmitFPutS(Value *Str, Value *File, IRBuilder<> &B) {

F = M->getOrInsertFunction("fputs", Type::Int32Ty,

PointerType::getUnqual(Type::Int8Ty),

File->getType(), NULL);

- B.CreateCall2(F, CastToCStr(Str, B), File, "fputs");

+ CallInst *CI = B.CreateCall2(F, CastToCStr(Str, B), File, "fputs");

+ if (const Function *Fn = dyn_cast<Function>(F->stripPointerCasts()))

+ CI->setCallingConv(Fn->getCallingConv());

}

/// EmitFWrite - Emit a call to the fwrite function. This assumes that Ptr is

@@ -307,8 +340,11 @@ void LibCallOptimization::EmitFWrite(Value *Ptr, Value *Size, Value *File,

PointerType::getUnqual(Type::Int8Ty),

TD->getIntPtrType(), TD->getIntPtrType(),

File->getType(), NULL);

- B.CreateCall4(F, CastToCStr(Ptr, B), Size,

- ConstantInt::get(TD->getIntPtrType(), 1), File);

+ CallInst *CI = B.CreateCall4(F, CastToCStr(Ptr, B), Size,

+ ConstantInt::get(TD->getIntPtrType(), 1), File);

+ if (const Function *Fn = dyn_cast<Function>(F->stripPointerCasts()))

+ CI->setCallingConv(Fn->getCallingConv());

}

//===----------------------------------------------------------------------===//

@@ -673,12 +709,10 @@ struct VISIBILITY_HIDDEN StrCmpOpt : public LibCallOptimization {

// strcmp(P, "x") -> memcmp(P, "x", 2)

uint64_t Len1 = GetStringLength(Str1P);

uint64_t Len2 = GetStringLength(Str2P);

- if (Len1 || Len2) {

- // Choose the smallest Len excluding 0 which means 'unknown'.

- if (!Len1 || (Len2 && Len2 < Len1))

- Len1 = Len2;

+ if (Len1 && Len2) {

return EmitMemCmp(Str1P, Str2P,

- ConstantInt::get(TD->getIntPtrType(), Len1), B);

+ ConstantInt::get(TD->getIntPtrType(),

+ std::min(Len1, Len2)), B);

}

return 0;

@@ -1039,7 +1073,7 @@ struct VISIBILITY_HIDDEN Exp2Opt : public LibCallOptimization {

if (OpC->getOperand(0)->getType()->getPrimitiveSizeInBits() < 32)

LdExpArg = B.CreateZExt(OpC->getOperand(0), Type::Int32Ty, "tmp");

}

if (LdExpArg) {

const char *Name;

if (Op->getType() == Type::FloatTy)

@@ -1056,12 +1090,15 @@ struct VISIBILITY_HIDDEN Exp2Opt : public LibCallOptimization {

Module *M = Caller->getParent();

Value *Callee = M->getOrInsertFunction(Name, Op->getType(),

Op->getType(), Type::Int32Ty,NULL);

- return B.CreateCall2(Callee, One, LdExpArg);

+ CallInst *CI = B.CreateCall2(Callee, One, LdExpArg);

+ if (const Function *F = dyn_cast<Function>(Callee->stripPointerCasts()))

+ CI->setCallingConv(F->getCallingConv());

+ return CI;

}

return 0;

}

};

//===---------------------------------------===//

// Double -> Float Shrinking Optimizations for Unary Functions like 'floor'

@@ -1072,7 +1109,7 @@ struct VISIBILITY_HIDDEN UnaryDoubleFPOpt : public LibCallOptimization {

if (FT->getNumParams() != 1 || FT->getReturnType() != Type::DoubleTy ||

FT->getParamType(0) != Type::DoubleTy)

return 0;

// If this is something like 'floor((double)floatval)', convert to floorf.

FPExtInst *Cast = dyn_cast<FPExtInst>(CI->getOperand(1));

if (Cast == 0 || Cast->getOperand(0)->getType() != Type::FloatTy)

diff --git a/lib/Transforms/Scalar/TailRecursionElimination.cpp b/lib/Transforms/Scalar/TailRecursionElimination.cpp
index 682d069923e4..34ee57c9b9dc 100644
--- a/lib/Transforms/Scalar/TailRecursionElimination.cpp
+++ b/lib/Transforms/Scalar/TailRecursionElimination.cpp

@@ -52,6 +52,7 @@

#define DEBUG_TYPE "tailcallelim"

#include "llvm/Transforms/Scalar.h"

+#include "llvm/Transforms/Utils/Local.h"

#include "llvm/Constants.h"

#include "llvm/DerivedTypes.h"

#include "llvm/Function.h"

@@ -201,8 +202,21 @@ bool TailCallElim::runOnFunction(Function &F) {

bool TailCallElim::CanMoveAboveCall(Instruction *I, CallInst *CI) {

// FIXME: We can move load/store/call/free instructions above the call if the

// call does not mod/ref the memory location being processed.

- if (I->mayHaveSideEffects() || isa<LoadInst>(I))

+ if (I->mayHaveSideEffects()) // This also handles volatile loads.

return false;

+ if (LoadInst* L = dyn_cast<LoadInst>(I)) {

+ // Loads may always be moved above calls without side effects.

+ if (CI->mayHaveSideEffects()) {

+ // Non-volatile loads may be moved above a call with side effects if it

+ // does not write to memory and the load provably won't trap.

+ // FIXME: Writes to memory only matter if they may alias the pointer

+ // being loaded from.

+ if (CI->mayWriteToMemory() ||

+ !isSafeToLoadUnconditionally(L->getPointerOperand(), L))

+ return false;

+ }

// Otherwise, if this is a side-effect free instruction, check to make sure

// that it does not use the return value of the call. If it doesn't use the

diff --git a/lib/Transforms/Utils/Local.cpp b/lib/Transforms/Utils/Local.cpp
index 94483b816e3b..c7fff548bca7 100644
--- a/lib/Transforms/Utils/Local.cpp
+++ b/lib/Transforms/Utils/Local.cpp

@@ -14,6 +14,7 @@

#include "llvm/Transforms/Utils/Local.h"

#include "llvm/Constants.h"

+#include "llvm/GlobalAlias.h"

#include "llvm/GlobalVariable.h"

#include "llvm/DerivedTypes.h"

#include "llvm/Instructions.h"

@@ -28,6 +29,50 @@

using namespace llvm;

//===----------------------------------------------------------------------===//

+// Local analysis.

+//

+/// isSafeToLoadUnconditionally - Return true if we know that executing a load

+/// from this value cannot trap. If it is not obviously safe to load from the

+/// specified pointer, we do a quick local scan of the basic block containing

+/// ScanFrom, to determine if the address is already accessed.

+bool llvm::isSafeToLoadUnconditionally(Value *V, Instruction *ScanFrom) {

+ // If it is an alloca it is always safe to load from.

+ if (isa<AllocaInst>(V)) return true;

+ // If it is a global variable it is mostly safe to load from.

+ if (const GlobalValue *GV = dyn_cast<GlobalVariable>(V))

+ // Don't try to evaluate aliases. External weak GV can be null.

+ return !isa<GlobalAlias>(GV) && !GV->hasExternalWeakLinkage();

+ // Otherwise, be a little bit agressive by scanning the local block where we

+ // want to check to see if the pointer is already being loaded or stored

+ // from/to. If so, the previous load or store would have already trapped,

+ // so there is no harm doing an extra load (also, CSE will later eliminate

+ // the load entirely).

+ BasicBlock::iterator BBI = ScanFrom, E = ScanFrom->getParent()->begin();

+ while (BBI != E) {

+ --BBI;

+ // If we see a free or a call which may write to memory (i.e. which might do

+ // a free) the pointer could be marked invalid.

+ if (isa<FreeInst>(BBI) ||

+ (isa<CallInst>(BBI) && BBI->mayWriteToMemory() &&

+ !isa<DbgInfoIntrinsic>(BBI)))

+ return false;

+ if (LoadInst *LI = dyn_cast<LoadInst>(BBI)) {

+ if (LI->getOperand(0) == V) return true;

+ } else if (StoreInst *SI = dyn_cast<StoreInst>(BBI)) {

+ if (SI->getOperand(1) == V) return true;

+ }

+ return false;

+//===----------------------------------------------------------------------===//

// Local constant propagation.

diff --git a/lib/Transforms/Utils/LowerAllocations.cpp b/lib/Transforms/Utils/LowerAllocations.cpp
index 32498958e34f..9af47f56ef3d 100644
--- a/lib/Transforms/Utils/LowerAllocations.cpp
+++ b/lib/Transforms/Utils/LowerAllocations.cpp

@@ -112,7 +112,7 @@ bool LowerAllocations::runOnBasicBlock(BasicBlock &BB) {

if (MallocInst *MI = dyn_cast<MallocInst>(I)) {

const Type *AllocTy = MI->getType()->getElementType();

- // malloc(type) becomes sbyte *malloc(size)

+ // malloc(type) becomes i8 *malloc(size)

Value *MallocArg;

if (LowerMallocArgToInteger)

MallocArg = ConstantInt::get(Type::Int64Ty,

diff --git a/lib/Transforms/Utils/SimplifyCFG.cpp b/lib/Transforms/Utils/SimplifyCFG.cpp
index bcc6b819013b..ee0f6a65de4e 100644
--- a/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/lib/Transforms/Utils/SimplifyCFG.cpp

@@ -859,6 +859,26 @@ static bool FoldValueComparisonIntoPredecessors(TerminatorInst *TI) {

return Changed;

}

+// isSafeToHoistInvoke - If we would need to insert a select that uses the

+// value of this invoke (comments in HoistThenElseCodeToIf explain why we

+// would need to do this), we can't hoist the invoke, as there is nowhere

+// to put the select in this case.

+static bool isSafeToHoistInvoke(BasicBlock *BB1, BasicBlock *BB2,

+ Instruction *I1, Instruction *I2) {

+ for (succ_iterator SI = succ_begin(BB1), E = succ_end(BB1); SI != E; ++SI) {

+ PHINode *PN;

+ for (BasicBlock::iterator BBI = SI->begin();

+ (PN = dyn_cast<PHINode>(BBI)); ++BBI) {

+ Value *BB1V = PN->getIncomingValueForBlock(BB1);

+ Value *BB2V = PN->getIncomingValueForBlock(BB2);

+ if (BB1V != BB2V && (BB1V==I1 || BB2V==I2)) {

+ return false;

+ }

+ return true;

/// HoistThenElseCodeToIf - Given a conditional branch that goes to BB1 and

/// BB2, hoist any common code in the two blocks up into the branch block. The

/// caller of this function guarantees that BI's block dominates BB1 and BB2.

@@ -879,8 +899,9 @@ static bool HoistThenElseCodeToIf(BranchInst *BI) {

I1 = BB1_Itr++;

while (isa<DbgInfoIntrinsic>(I2))

I2 = BB2_Itr++;

- if (I1->getOpcode() != I2->getOpcode() || isa<PHINode>(I1) ||

- isa<InvokeInst>(I1) || !I1->isIdenticalTo(I2))

+ if (I1->getOpcode() != I2->getOpcode() || isa<PHINode>(I1) ||

+ !I1->isIdenticalTo(I2) ||

+ (isa<InvokeInst>(I1) && !isSafeToHoistInvoke(BB1, BB2, I1, I2)))

return false;

// If we get here, we can hoist at least one instruction.

@@ -911,6 +932,10 @@ static bool HoistThenElseCodeToIf(BranchInst *BI) {

return true;

HoistTerminator:

+ // It may not be possible to hoist an invoke.

+ if (isa<InvokeInst>(I1) && !isSafeToHoistInvoke(BB1, BB2, I1, I2))

+ return true;

// Okay, it is safe to hoist the terminator.

Instruction *NT = I1->clone();

BIParent->getInstList().insert(BI, NT);