aboutsummaryrefslogtreecommitdiff
path: root/lib/Transforms
diff options
context:
space:
mode:
authorEd Schouten <ed@FreeBSD.org>2009-06-22 08:08:12 +0000
committerEd Schouten <ed@FreeBSD.org>2009-06-22 08:08:12 +0000
commitb2f21fb044b6b5c52cff6227f9f79ca4ed42b18f (patch)
tree86c1bc482baa6c81fc70b8d715153bfa93377186 /lib/Transforms
parent600c6fa13de5c407dc36dbb0ab73807868741ae0 (diff)
downloadsrc-b2f21fb044b6b5c52cff6227f9f79ca4ed42b18f.tar.gz
src-b2f21fb044b6b5c52cff6227f9f79ca4ed42b18f.zip
Update LLVM sources to r73879.vendor/llvm/llvm-r73879
Notes
Notes: svn path=/vendor/llvm/dist/; revision=194612 svn path=/vendor/llvm/llvm-r73879/; revision=194614; tag=vendor/llvm/llvm-r73879
Diffstat (limited to 'lib/Transforms')
-rw-r--r--lib/Transforms/IPO/CMakeLists.txt1
-rw-r--r--lib/Transforms/IPO/GlobalOpt.cpp5
-rw-r--r--lib/Transforms/IPO/PartialInlining.cpp5
-rw-r--r--lib/Transforms/IPO/RaiseAllocations.cpp4
-rw-r--r--lib/Transforms/Instrumentation/RSProfiling.cpp12
-rw-r--r--lib/Transforms/Scalar/CodeGenPrepare.cpp4
-rw-r--r--lib/Transforms/Scalar/GVN.cpp29
-rw-r--r--lib/Transforms/Scalar/IndVarSimplify.cpp13
-rw-r--r--lib/Transforms/Scalar/InstructionCombining.cpp394
-rw-r--r--lib/Transforms/Scalar/JumpThreading.cpp94
-rw-r--r--lib/Transforms/Scalar/LoopIndexSplit.cpp4
-rw-r--r--lib/Transforms/Scalar/LoopStrengthReduce.cpp151
-rw-r--r--lib/Transforms/Scalar/ScalarReplAggregates.cpp6
-rw-r--r--lib/Transforms/Scalar/SimplifyLibCalls.cpp93
-rw-r--r--lib/Transforms/Scalar/TailRecursionElimination.cpp16
-rw-r--r--lib/Transforms/Utils/Local.cpp45
-rw-r--r--lib/Transforms/Utils/LowerAllocations.cpp2
-rw-r--r--lib/Transforms/Utils/SimplifyCFG.cpp29
18 files changed, 555 insertions, 352 deletions
diff --git a/lib/Transforms/IPO/CMakeLists.txt b/lib/Transforms/IPO/CMakeLists.txt
index 4b85e1388a68..1438b4879d2b 100644
--- a/lib/Transforms/IPO/CMakeLists.txt
+++ b/lib/Transforms/IPO/CMakeLists.txt
@@ -16,6 +16,7 @@ add_llvm_library(LLVMipo
LoopExtractor.cpp
LowerSetJmp.cpp
MergeFunctions.cpp
+ PartialInlining.cpp
PartialSpecialization.cpp
PruneEH.cpp
RaiseAllocations.cpp
diff --git a/lib/Transforms/IPO/GlobalOpt.cpp b/lib/Transforms/IPO/GlobalOpt.cpp
index 9a1b29419077..cbf3a1d827a9 100644
--- a/lib/Transforms/IPO/GlobalOpt.cpp
+++ b/lib/Transforms/IPO/GlobalOpt.cpp
@@ -1667,11 +1667,14 @@ bool GlobalOpt::ProcessInternalGlobal(GlobalVariable *GV,
//
// NOTE: It doesn't make sense to promote non single-value types since we
// are just replacing static memory to stack memory.
+ //
+ // If the global is in different address space, don't bring it to stack.
if (!GS.HasMultipleAccessingFunctions &&
GS.AccessingFunction && !GS.HasNonInstructionUser &&
GV->getType()->getElementType()->isSingleValueType() &&
GS.AccessingFunction->getName() == "main" &&
- GS.AccessingFunction->hasExternalLinkage()) {
+ GS.AccessingFunction->hasExternalLinkage() &&
+ GV->getType()->getAddressSpace() == 0) {
DOUT << "LOCALIZING GLOBAL: " << *GV;
Instruction* FirstI = GS.AccessingFunction->getEntryBlock().begin();
const Type* ElemTy = GV->getType()->getElementType();
diff --git a/lib/Transforms/IPO/PartialInlining.cpp b/lib/Transforms/IPO/PartialInlining.cpp
index b3a2554039a1..0b975ae49979 100644
--- a/lib/Transforms/IPO/PartialInlining.cpp
+++ b/lib/Transforms/IPO/PartialInlining.cpp
@@ -20,10 +20,13 @@
#include "llvm/Analysis/Dominators.h"
#include "llvm/Transforms/Utils/Cloning.h"
#include "llvm/Transforms/Utils/FunctionUtils.h"
+#include "llvm/ADT/Statistic.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/CFG.h"
using namespace llvm;
+STATISTIC(NumPartialInlined, "Number of functions partially inlined");
+
namespace {
struct VISIBILITY_HIDDEN PartialInliner : public ModulePass {
virtual void getAnalysisUsage(AnalysisUsage &AU) const { }
@@ -132,6 +135,8 @@ Function* PartialInliner::unswitchFunction(Function* F) {
duplicateFunction->replaceAllUsesWith(F);
duplicateFunction->eraseFromParent();
+ ++NumPartialInlined;
+
return extractedFunction;
}
diff --git a/lib/Transforms/IPO/RaiseAllocations.cpp b/lib/Transforms/IPO/RaiseAllocations.cpp
index a81bbdb3c53d..8c97b5d17c08 100644
--- a/lib/Transforms/IPO/RaiseAllocations.cpp
+++ b/lib/Transforms/IPO/RaiseAllocations.cpp
@@ -82,14 +82,14 @@ void RaiseAllocations::doInitialization(Module &M) {
// Chck to see if we got the expected malloc
if (TyWeHave != Malloc1Type) {
- // Check to see if the prototype is wrong, giving us sbyte*(uint) * malloc
+ // Check to see if the prototype is wrong, giving us i8*(i32) * malloc
// This handles the common declaration of: 'void *malloc(unsigned);'
const FunctionType *Malloc2Type =
FunctionType::get(PointerType::getUnqual(Type::Int8Ty),
std::vector<const Type*>(1, Type::Int32Ty), false);
if (TyWeHave != Malloc2Type) {
// Check to see if the prototype is missing, giving us
- // sbyte*(...) * malloc
+ // i8*(...) * malloc
// This handles the common declaration of: 'void *malloc();'
const FunctionType *Malloc3Type =
FunctionType::get(PointerType::getUnqual(Type::Int8Ty),
diff --git a/lib/Transforms/Instrumentation/RSProfiling.cpp b/lib/Transforms/Instrumentation/RSProfiling.cpp
index c6cf4dfd6ebf..b110f4eb368b 100644
--- a/lib/Transforms/Instrumentation/RSProfiling.cpp
+++ b/lib/Transforms/Instrumentation/RSProfiling.cpp
@@ -108,9 +108,9 @@ namespace {
class VISIBILITY_HIDDEN GlobalRandomCounter : public Chooser {
GlobalVariable* Counter;
Value* ResetValue;
- const Type* T;
+ const IntegerType* T;
public:
- GlobalRandomCounter(Module& M, const Type* t, uint64_t resetval);
+ GlobalRandomCounter(Module& M, const IntegerType* t, uint64_t resetval);
virtual ~GlobalRandomCounter();
virtual void PrepFunction(Function* F);
virtual void ProcessChoicePoint(BasicBlock* bb);
@@ -121,9 +121,9 @@ namespace {
GlobalVariable* Counter;
Value* ResetValue;
AllocaInst* AI;
- const Type* T;
+ const IntegerType* T;
public:
- GlobalRandomCounterOpt(Module& M, const Type* t, uint64_t resetval);
+ GlobalRandomCounterOpt(Module& M, const IntegerType* t, uint64_t resetval);
virtual ~GlobalRandomCounterOpt();
virtual void PrepFunction(Function* F);
virtual void ProcessChoicePoint(BasicBlock* bb);
@@ -193,7 +193,7 @@ static void getBackEdges(Function& F, T& BackEdges);
// Methods of choosing when to profile
///////////////////////////////////////
-GlobalRandomCounter::GlobalRandomCounter(Module& M, const Type* t,
+GlobalRandomCounter::GlobalRandomCounter(Module& M, const IntegerType* t,
uint64_t resetval) : T(t) {
ConstantInt* Init = ConstantInt::get(T, resetval);
ResetValue = Init;
@@ -229,7 +229,7 @@ void GlobalRandomCounter::ProcessChoicePoint(BasicBlock* bb) {
ReplacePhiPred(oldnext, bb, resetblock);
}
-GlobalRandomCounterOpt::GlobalRandomCounterOpt(Module& M, const Type* t,
+GlobalRandomCounterOpt::GlobalRandomCounterOpt(Module& M, const IntegerType* t,
uint64_t resetval)
: AI(0), T(t) {
ConstantInt* Init = ConstantInt::get(T, resetval);
diff --git a/lib/Transforms/Scalar/CodeGenPrepare.cpp b/lib/Transforms/Scalar/CodeGenPrepare.cpp
index 42978e753d16..e9bee6408fe3 100644
--- a/lib/Transforms/Scalar/CodeGenPrepare.cpp
+++ b/lib/Transforms/Scalar/CodeGenPrepare.cpp
@@ -401,8 +401,8 @@ static void SplitEdgeNicely(TerminatorInst *TI, unsigned SuccNum,
/// OptimizeNoopCopyExpression - If the specified cast instruction is a noop
-/// copy (e.g. it's casting from one pointer type to another, int->uint, or
-/// int->sbyte on PPC), sink it into user blocks to reduce the number of virtual
+/// copy (e.g. it's casting from one pointer type to another, i32->i8 on PPC),
+/// sink it into user blocks to reduce the number of virtual
/// registers that must be created and coalesced.
///
/// Return true if any changes are made.
diff --git a/lib/Transforms/Scalar/GVN.cpp b/lib/Transforms/Scalar/GVN.cpp
index 673d38b7f3ae..f4a989844478 100644
--- a/lib/Transforms/Scalar/GVN.cpp
+++ b/lib/Transforms/Scalar/GVN.cpp
@@ -37,6 +37,7 @@
#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/Local.h"
#include <cstdio>
using namespace llvm;
@@ -48,7 +49,7 @@ STATISTIC(NumPRELoad, "Number of loads PRE'd");
static cl::opt<bool> EnablePRE("enable-pre",
cl::init(true), cl::Hidden);
-cl::opt<bool> EnableLoadPRE("enable-load-pre", cl::init(true));
+static cl::opt<bool> EnableLoadPRE("enable-load-pre", cl::init(true));
//===----------------------------------------------------------------------===//
// ValueTable Class
@@ -952,8 +953,14 @@ bool GVN::processNonLocalLoad(LoadInst *LI,
// If we had a phi translation failure, we'll have a single entry which is a
// clobber in the current block. Reject this early.
- if (Deps.size() == 1 && Deps[0].second.isClobber())
+ if (Deps.size() == 1 && Deps[0].second.isClobber()) {
+ DEBUG(
+ DOUT << "GVN: non-local load ";
+ WriteAsOperand(*DOUT.stream(), LI);
+ DOUT << " is clobbered by " << *Deps[0].second.getInst();
+ );
return false;
+ }
// Filter out useless results (non-locals, etc). Keep track of the blocks
// where we have a value available in repl, also keep track of whether we see
@@ -1069,6 +1076,7 @@ bool GVN::processNonLocalLoad(LoadInst *LI,
BasicBlock *TmpBB = LoadBB;
bool isSinglePred = false;
+ bool allSingleSucc = true;
while (TmpBB->getSinglePredecessor()) {
isSinglePred = true;
TmpBB = TmpBB->getSinglePredecessor();
@@ -1078,6 +1086,8 @@ bool GVN::processNonLocalLoad(LoadInst *LI,
return false;
if (Blockers.count(TmpBB))
return false;
+ if (TmpBB->getTerminator()->getNumSuccessors() != 1)
+ allSingleSucc = false;
}
assert(TmpBB);
@@ -1154,7 +1164,20 @@ bool GVN::processNonLocalLoad(LoadInst *LI,
<< UnavailablePred->getName() << "': " << *LI);
return false;
}
-
+
+ // Make sure it is valid to move this load here. We have to watch out for:
+ // @1 = getelementptr (i8* p, ...
+ // test p and branch if == 0
+ // load @1
+ // It is valid to have the getelementptr before the test, even if p can be 0,
+ // as getelementptr only does address arithmetic.
+ // If we are not pushing the value through any multiple-successor blocks
+ // we do not have this case. Otherwise, check that the load is safe to
+ // put anywhere; this can be improved, but should be conservatively safe.
+ if (!allSingleSucc &&
+ !isSafeToLoadUnconditionally(LoadPtr, UnavailablePred->getTerminator()))
+ return false;
+
// Okay, we can eliminate this load by inserting a reload in the predecessor
// and using PHI construction to get the value in the other predecessors, do
// it.
diff --git a/lib/Transforms/Scalar/IndVarSimplify.cpp b/lib/Transforms/Scalar/IndVarSimplify.cpp
index 38b11985519f..326fb38909b5 100644
--- a/lib/Transforms/Scalar/IndVarSimplify.cpp
+++ b/lib/Transforms/Scalar/IndVarSimplify.cpp
@@ -17,7 +17,10 @@
// which starts at zero and steps by one.
// 2. The canonical induction variable is guaranteed to be the first PHI node
// in the loop header block.
-// 3. Any pointer arithmetic recurrences are raised to use array subscripts.
+// 3. The canonical induction variable is guaranteed to be in a wide enough
+// type so that IV expressions need not be (directly) zero-extended or
+// sign-extended.
+// 4. Any pointer arithmetic recurrences are raised to use array subscripts.
//
// If the trip count of a loop is computable, this pass also makes the following
// changes:
@@ -296,11 +299,11 @@ void IndVarSimplify::RewriteLoopExitValues(Loop *L,
// If this instruction is dead now, delete it.
RecursivelyDeleteTriviallyDeadInstructions(Inst);
- // See if this is a single-entry LCSSA PHI node. If so, we can (and
- // have to) remove
- // the PHI entirely. This is safe, because the NewVal won't be variant
+ // If we're inserting code into the exit block rather than the
+ // preheader, we can (and have to) remove the PHI entirely.
+ // This is safe, because the NewVal won't be variant
// in the loop, so we don't need an LCSSA phi node anymore.
- if (NumPreds == 1) {
+ if (ExitBlocks.size() == 1) {
PN->replaceAllUsesWith(ExitVal);
RecursivelyDeleteTriviallyDeadInstructions(PN);
break;
diff --git a/lib/Transforms/Scalar/InstructionCombining.cpp b/lib/Transforms/Scalar/InstructionCombining.cpp
index 5465e4a88466..5bd17e0737b2 100644
--- a/lib/Transforms/Scalar/InstructionCombining.cpp
+++ b/lib/Transforms/Scalar/InstructionCombining.cpp
@@ -390,7 +390,7 @@ namespace {
Value *EvaluateInDifferentType(Value *V, const Type *Ty, bool isSigned);
- bool CanEvaluateInDifferentType(Value *V, const IntegerType *Ty,
+ bool CanEvaluateInDifferentType(Value *V, const Type *Ty,
unsigned CastOpc, int &NumCastsRemoved);
unsigned GetOrEnforceKnownAlignment(Value *V,
unsigned PrefAlign = 0);
@@ -654,30 +654,12 @@ static unsigned getOpcode(const Value *V) {
}
/// AddOne - Add one to a ConstantInt
-static ConstantInt *AddOne(ConstantInt *C) {
- APInt Val(C->getValue());
- return ConstantInt::get(++Val);
+static Constant *AddOne(Constant *C) {
+ return ConstantExpr::getAdd(C, ConstantInt::get(C->getType(), 1));
}
/// SubOne - Subtract one from a ConstantInt
-static ConstantInt *SubOne(ConstantInt *C) {
- APInt Val(C->getValue());
- return ConstantInt::get(--Val);
-}
-/// Add - Add two ConstantInts together
-static ConstantInt *Add(ConstantInt *C1, ConstantInt *C2) {
- return ConstantInt::get(C1->getValue() + C2->getValue());
-}
-/// And - Bitwise AND two ConstantInts together
-static ConstantInt *And(ConstantInt *C1, ConstantInt *C2) {
- return ConstantInt::get(C1->getValue() & C2->getValue());
-}
-/// Subtract - Subtract one ConstantInt from another
-static ConstantInt *Subtract(ConstantInt *C1, ConstantInt *C2) {
- return ConstantInt::get(C1->getValue() - C2->getValue());
-}
-/// Multiply - Multiply two ConstantInts together
-static ConstantInt *Multiply(ConstantInt *C1, ConstantInt *C2) {
- return ConstantInt::get(C1->getValue() * C2->getValue());
+static Constant *SubOne(ConstantInt *C) {
+ return ConstantExpr::getSub(C, ConstantInt::get(C->getType(), 1));
}
/// MultiplyOverflows - True if the multiply can not be expressed in an int
/// this size.
@@ -774,7 +756,7 @@ static void ComputeUnsignedMinMaxValuesFromKnownBits(const APInt &KnownZero,
/// SimplifyDemandedBits knows about. See if the instruction has any
/// properties that allow us to simplify its operands.
bool InstCombiner::SimplifyDemandedInstructionBits(Instruction &Inst) {
- unsigned BitWidth = cast<IntegerType>(Inst.getType())->getBitWidth();
+ unsigned BitWidth = Inst.getType()->getScalarSizeInBits();
APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0);
APInt DemandedMask(APInt::getAllOnesValue(BitWidth));
@@ -830,13 +812,13 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
const Type *VTy = V->getType();
assert((TD || !isa<PointerType>(VTy)) &&
"SimplifyDemandedBits needs to know bit widths!");
- assert((!TD || TD->getTypeSizeInBits(VTy) == BitWidth) &&
- (!isa<IntegerType>(VTy) ||
- VTy->getPrimitiveSizeInBits() == BitWidth) &&
+ assert((!TD || TD->getTypeSizeInBits(VTy->getScalarType()) == BitWidth) &&
+ (!VTy->isIntOrIntVector() ||
+ VTy->getScalarSizeInBits() == BitWidth) &&
KnownZero.getBitWidth() == BitWidth &&
KnownOne.getBitWidth() == BitWidth &&
- "Value *V, DemandedMask, KnownZero and KnownOne \
- must have same BitWidth");
+ "Value *V, DemandedMask, KnownZero and KnownOne "
+ "must have same BitWidth");
if (ConstantInt *CI = dyn_cast<ConstantInt>(V)) {
// We know all of the bits for a constant!
KnownOne = CI->getValue() & DemandedMask;
@@ -1089,7 +1071,7 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
RHSKnownZero &= LHSKnownZero;
break;
case Instruction::Trunc: {
- unsigned truncBf = I->getOperand(0)->getType()->getPrimitiveSizeInBits();
+ unsigned truncBf = I->getOperand(0)->getType()->getScalarSizeInBits();
DemandedMask.zext(truncBf);
RHSKnownZero.zext(truncBf);
RHSKnownOne.zext(truncBf);
@@ -1112,7 +1094,7 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
break;
case Instruction::ZExt: {
// Compute the bits in the result that are not present in the input.
- unsigned SrcBitWidth =I->getOperand(0)->getType()->getPrimitiveSizeInBits();
+ unsigned SrcBitWidth =I->getOperand(0)->getType()->getScalarSizeInBits();
DemandedMask.trunc(SrcBitWidth);
RHSKnownZero.trunc(SrcBitWidth);
@@ -1130,7 +1112,7 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
}
case Instruction::SExt: {
// Compute the bits in the result that are not present in the input.
- unsigned SrcBitWidth =I->getOperand(0)->getType()->getPrimitiveSizeInBits();
+ unsigned SrcBitWidth =I->getOperand(0)->getType()->getScalarSizeInBits();
APInt InputDemandedBits = DemandedMask &
APInt::getLowBitsSet(BitWidth, SrcBitWidth);
@@ -1354,7 +1336,7 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
if (ConstantInt *Rem = dyn_cast<ConstantInt>(I->getOperand(1))) {
APInt RA = Rem->getValue().abs();
if (RA.isPowerOf2()) {
- if (DemandedMask.ule(RA)) // srem won't affect demanded bits
+ if (DemandedMask.ult(RA)) // srem won't affect demanded bits
return I->getOperand(0);
APInt LowBits = RA - 1;
@@ -2087,7 +2069,7 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) {
// See if SimplifyDemandedBits can simplify this. This handles stuff like
// (X & 254)+1 -> (X&254)|1
- if (!isa<VectorType>(I.getType()) && SimplifyDemandedInstructionBits(I))
+ if (SimplifyDemandedInstructionBits(I))
return &I;
// zext(i1) - 1 -> select i1, 0, -1
@@ -2107,7 +2089,7 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) {
Value *XorLHS = 0;
if (isa<ConstantInt>(RHSC) &&
match(LHS, m_Xor(m_Value(XorLHS), m_ConstantInt(XorRHS)))) {
- uint32_t TySizeBits = I.getType()->getPrimitiveSizeInBits();
+ uint32_t TySizeBits = I.getType()->getScalarSizeInBits();
const APInt& RHSVal = cast<ConstantInt>(RHSC)->getValue();
uint32_t Size = TySizeBits / 2;
@@ -2197,7 +2179,7 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) {
// X*C1 + X*C2 --> X * (C1+C2)
ConstantInt *C1;
if (X == dyn_castFoldableMul(RHS, C1))
- return BinaryOperator::CreateMul(X, Add(C1, C2));
+ return BinaryOperator::CreateMul(X, ConstantExpr::getAdd(C1, C2));
}
// X + X*C --> X * (C+1)
@@ -2262,7 +2244,7 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) {
// (X & FF00) + xx00 -> (X+xx00) & FF00
if (LHS->hasOneUse() && match(LHS, m_And(m_Value(X), m_ConstantInt(C2)))) {
- Constant *Anded = And(CRHS, C2);
+ Constant *Anded = ConstantExpr::getAnd(CRHS, C2);
if (Anded == CRHS) {
// See if all bits from the first bit set in the Add RHS up are included
// in the mask. First, get the rightmost bit.
@@ -2290,7 +2272,7 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) {
}
// add (cast *A to intptrtype) B ->
- // cast (GEP (cast *A to sbyte*) B) --> intptrtype
+ // cast (GEP (cast *A to i8*) B) --> intptrtype
{
CastInst *CI = dyn_cast<CastInst>(LHS);
Value *Other = RHS;
@@ -2299,7 +2281,7 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) {
Other = LHS;
}
if (CI && CI->getType()->isSized() &&
- (CI->getType()->getPrimitiveSizeInBits() ==
+ (CI->getType()->getScalarSizeInBits() ==
TD->getIntPtrType()->getPrimitiveSizeInBits())
&& isa<PointerType>(CI->getOperand(0)->getType())) {
unsigned AS =
@@ -2523,7 +2505,7 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) {
else if (ConstantInt *CI1 = dyn_cast<ConstantInt>(I.getOperand(0))) {
if (ConstantInt *CI2 = dyn_cast<ConstantInt>(Op1I->getOperand(1)))
// C1-(X+C2) --> (C1-C2)-X
- return BinaryOperator::CreateSub(Subtract(CI1, CI2),
+ return BinaryOperator::CreateSub(ConstantExpr::getSub(CI1, CI2),
Op1I->getOperand(0));
}
}
@@ -2564,7 +2546,8 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) {
// X - X*C --> X * (1-C)
ConstantInt *C2 = 0;
if (dyn_castFoldableMul(Op1I, C2) == Op0) {
- Constant *CP1 = Subtract(ConstantInt::get(I.getType(), 1), C2);
+ Constant *CP1 = ConstantExpr::getSub(ConstantInt::get(I.getType(), 1),
+ C2);
return BinaryOperator::CreateMul(Op0, CP1);
}
}
@@ -2589,7 +2572,7 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) {
ConstantInt *C2; // X*C1 - X*C2 -> X * (C1-C2)
if (X == dyn_castFoldableMul(Op1, C2))
- return BinaryOperator::CreateMul(X, Subtract(C1, C2));
+ return BinaryOperator::CreateMul(X, ConstantExpr::getSub(C1, C2));
}
return 0;
}
@@ -2950,12 +2933,12 @@ Instruction *InstCombiner::commonIDivTransforms(BinaryOperator &I) {
// (sdiv X, X) --> 1 (udiv X, X) --> 1
if (Op0 == Op1) {
if (const VectorType *Ty = dyn_cast<VectorType>(I.getType())) {
- ConstantInt *CI = ConstantInt::get(Ty->getElementType(), 1);
+ Constant *CI = ConstantInt::get(Ty->getElementType(), 1);
std::vector<Constant*> Elts(Ty->getNumElements(), CI);
return ReplaceInstUsesWith(I, ConstantVector::get(Elts));
}
- ConstantInt *CI = ConstantInt::get(I.getType(), 1);
+ Constant *CI = ConstantInt::get(I.getType(), 1);
return ReplaceInstUsesWith(I, CI);
}
@@ -2980,7 +2963,7 @@ Instruction *InstCombiner::commonIDivTransforms(BinaryOperator &I) {
return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType()));
else
return BinaryOperator::Create(I.getOpcode(), LHS->getOperand(0),
- Multiply(RHS, LHSRHS));
+ ConstantExpr::getMul(RHS, LHSRHS));
}
if (!RHS->isZero()) { // avoid X udiv 0
@@ -3513,7 +3496,7 @@ Instruction *InstCombiner::OptAndOp(Instruction *Op,
Value *X = Op->getOperand(0);
Constant *Together = 0;
if (!Op->isShift())
- Together = And(AndRHS, OpRHS);
+ Together = ConstantExpr::getAnd(AndRHS, OpRHS);
switch (Op->getOpcode()) {
case Instruction::Xor:
@@ -3724,7 +3707,7 @@ Value *InstCombiner::FoldLogicalPlusAnd(Value *LHS, Value *RHS,
switch (LHSI->getOpcode()) {
default: return 0;
case Instruction::And:
- if (And(N, Mask) == Mask) {
+ if (ConstantExpr::getAnd(N, Mask) == Mask) {
// If the AndRHS is a power of two minus one (0+1+), this is simple.
if ((Mask->getValue().countLeadingZeros() +
Mask->getValue().countPopulation()) ==
@@ -3748,7 +3731,7 @@ Value *InstCombiner::FoldLogicalPlusAnd(Value *LHS, Value *RHS,
// If the AndRHS is a power of two minus one (0+1+), and N&Mask == 0
if ((Mask->getValue().countLeadingZeros() +
Mask->getValue().countPopulation()) == Mask->getValue().getBitWidth()
- && And(N, Mask)->isZero())
+ && ConstantExpr::getAnd(N, Mask)->isNullValue())
break;
return 0;
}
@@ -3946,10 +3929,9 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) {
// See if we can simplify any instructions used by the instruction whose sole
// purpose is to compute bits we don't care about.
- if (!isa<VectorType>(I.getType())) {
- if (SimplifyDemandedInstructionBits(I))
- return &I;
- } else {
+ if (SimplifyDemandedInstructionBits(I))
+ return &I;
+ if (isa<VectorType>(I.getType())) {
if (ConstantVector *CP = dyn_cast<ConstantVector>(Op1)) {
if (CP->isAllOnesValue()) // X & <-1,-1> -> X
return ReplaceInstUsesWith(I, I.getOperand(0));
@@ -3957,7 +3939,7 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) {
return ReplaceInstUsesWith(I, Op1); // X & <0,0> -> <0,0>
}
}
-
+
if (ConstantInt *AndRHS = dyn_cast<ConstantInt>(Op1)) {
const APInt& AndRHSMask = AndRHS->getValue();
APInt NotAndRHS(~AndRHSMask);
@@ -4510,7 +4492,7 @@ Instruction *InstCombiner::FoldOrOfICmps(Instruction &I,
Instruction *Add = BinaryOperator::CreateAdd(Val, AddCST,
Val->getName()+".off");
InsertNewInstBefore(Add, I);
- AddCST = Subtract(AddOne(RHSCst), LHSCst);
+ AddCST = ConstantExpr::getSub(AddOne(RHSCst), LHSCst);
return new ICmpInst(ICmpInst::ICMP_ULT, Add, AddCST);
}
break; // (X == 13 | X == 15) -> no change
@@ -4653,18 +4635,17 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) {
// See if we can simplify any instructions used by the instruction whose sole
// purpose is to compute bits we don't care about.
- if (!isa<VectorType>(I.getType())) {
- if (SimplifyDemandedInstructionBits(I))
- return &I;
- } else if (isa<ConstantAggregateZero>(Op1)) {
- return ReplaceInstUsesWith(I, Op0); // X | <0,0> -> X
- } else if (ConstantVector *CP = dyn_cast<ConstantVector>(Op1)) {
- if (CP->isAllOnesValue()) // X | <-1,-1> -> <-1,-1>
- return ReplaceInstUsesWith(I, I.getOperand(1));
+ if (SimplifyDemandedInstructionBits(I))
+ return &I;
+ if (isa<VectorType>(I.getType())) {
+ if (isa<ConstantAggregateZero>(Op1)) {
+ return ReplaceInstUsesWith(I, Op0); // X | <0,0> -> X
+ } else if (ConstantVector *CP = dyn_cast<ConstantVector>(Op1)) {
+ if (CP->isAllOnesValue()) // X | <-1,-1> -> <-1,-1>
+ return ReplaceInstUsesWith(I, I.getOperand(1));
+ }
}
-
-
// or X, -1 == -1
if (ConstantInt *RHS = dyn_cast<ConstantInt>(Op1)) {
ConstantInt *C1 = 0; Value *X = 0;
@@ -4991,12 +4972,11 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) {
// See if we can simplify any instructions used by the instruction whose sole
// purpose is to compute bits we don't care about.
- if (!isa<VectorType>(I.getType())) {
- if (SimplifyDemandedInstructionBits(I))
- return &I;
- } else if (isa<ConstantAggregateZero>(Op1)) {
- return ReplaceInstUsesWith(I, Op0); // X ^ <0,0> -> X
- }
+ if (SimplifyDemandedInstructionBits(I))
+ return &I;
+ if (isa<VectorType>(I.getType()))
+ if (isa<ConstantAggregateZero>(Op1))
+ return ReplaceInstUsesWith(I, Op0); // X ^ <0,0> -> X
// Is this a ~ operation?
if (Value *NotOp = dyn_castNotVal(&I)) {
@@ -5083,7 +5063,7 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) {
Constant *NewRHS = ConstantExpr::getOr(Op0CI, RHS);
// Anything in both C1 and C2 is known to be zero, remove it from
// NewRHS.
- Constant *CommonBits = And(Op0CI, RHS);
+ Constant *CommonBits = ConstantExpr::getAnd(Op0CI, RHS);
NewRHS = ConstantExpr::getAnd(NewRHS,
ConstantExpr::getNot(CommonBits));
AddToWorkList(Op0I);
@@ -5247,12 +5227,13 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) {
return Changed ? &I : 0;
}
-/// AddWithOverflow - Compute Result = In1+In2, returning true if the result
-/// overflowed for this type.
-static bool AddWithOverflow(ConstantInt *&Result, ConstantInt *In1,
- ConstantInt *In2, bool IsSigned = false) {
- Result = cast<ConstantInt>(Add(In1, In2));
+static ConstantInt *ExtractElement(Constant *V, Constant *Idx) {
+ return cast<ConstantInt>(ConstantExpr::getExtractElement(V, Idx));
+}
+static bool HasAddOverflow(ConstantInt *Result,
+ ConstantInt *In1, ConstantInt *In2,
+ bool IsSigned) {
if (IsSigned)
if (In2->getValue().isNegative())
return Result->getValue().sgt(In1->getValue());
@@ -5262,12 +5243,32 @@ static bool AddWithOverflow(ConstantInt *&Result, ConstantInt *In1,
return Result->getValue().ult(In1->getValue());
}
-/// SubWithOverflow - Compute Result = In1-In2, returning true if the result
+/// AddWithOverflow - Compute Result = In1+In2, returning true if the result
/// overflowed for this type.
-static bool SubWithOverflow(ConstantInt *&Result, ConstantInt *In1,
- ConstantInt *In2, bool IsSigned = false) {
- Result = cast<ConstantInt>(Subtract(In1, In2));
+static bool AddWithOverflow(Constant *&Result, Constant *In1,
+ Constant *In2, bool IsSigned = false) {
+ Result = ConstantExpr::getAdd(In1, In2);
+
+ if (const VectorType *VTy = dyn_cast<VectorType>(In1->getType())) {
+ for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i) {
+ Constant *Idx = ConstantInt::get(Type::Int32Ty, i);
+ if (HasAddOverflow(ExtractElement(Result, Idx),
+ ExtractElement(In1, Idx),
+ ExtractElement(In2, Idx),
+ IsSigned))
+ return true;
+ }
+ return false;
+ }
+
+ return HasAddOverflow(cast<ConstantInt>(Result),
+ cast<ConstantInt>(In1), cast<ConstantInt>(In2),
+ IsSigned);
+}
+static bool HasSubOverflow(ConstantInt *Result,
+ ConstantInt *In1, ConstantInt *In2,
+ bool IsSigned) {
if (IsSigned)
if (In2->getValue().isNegative())
return Result->getValue().slt(In1->getValue());
@@ -5277,6 +5278,29 @@ static bool SubWithOverflow(ConstantInt *&Result, ConstantInt *In1,
return Result->getValue().ugt(In1->getValue());
}
+/// SubWithOverflow - Compute Result = In1-In2, returning true if the result
+/// overflowed for this type.
+static bool SubWithOverflow(Constant *&Result, Constant *In1,
+ Constant *In2, bool IsSigned = false) {
+ Result = ConstantExpr::getSub(In1, In2);
+
+ if (const VectorType *VTy = dyn_cast<VectorType>(In1->getType())) {
+ for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i) {
+ Constant *Idx = ConstantInt::get(Type::Int32Ty, i);
+ if (HasSubOverflow(ExtractElement(Result, Idx),
+ ExtractElement(In1, Idx),
+ ExtractElement(In2, Idx),
+ IsSigned))
+ return true;
+ }
+ return false;
+ }
+
+ return HasSubOverflow(cast<ConstantInt>(Result),
+ cast<ConstantInt>(In1), cast<ConstantInt>(In2),
+ IsSigned);
+}
+
/// EmitGEPOffset - Given a getelementptr instruction/constantexpr, emit the
/// code necessary to compute the offset from the base pointer (without adding
/// in the base pointer). Return the result as a signed integer of intptr size.
@@ -5589,7 +5613,7 @@ Instruction *InstCombiner::FoldFCmp_IntToFP_Cst(FCmpInst &I,
// Check to see that the input is converted from an integer type that is small
// enough that preserves all bits. TODO: check here for "known" sign bits.
// This would allow us to handle (fptosi (x >>s 62) to float) if x is i64 f.e.
- unsigned InputSize = LHSI->getOperand(0)->getType()->getPrimitiveSizeInBits();
+ unsigned InputSize = LHSI->getOperand(0)->getType()->getScalarSizeInBits();
// If this is a uitofp instruction, we need an extra bit to hold the sign.
bool LHSUnsigned = isa<UIToFPInst>(LHSI);
@@ -5644,7 +5668,7 @@ Instruction *InstCombiner::FoldFCmp_IntToFP_Cst(FCmpInst &I,
// See if the FP constant is too large for the integer. For example,
// comparing an i8 to 300.0.
- unsigned IntWidth = IntTy->getPrimitiveSizeInBits();
+ unsigned IntWidth = IntTy->getScalarSizeInBits();
if (!LHSUnsigned) {
// If the RHS value is > SignedMax, fold the comparison. This handles +INF
@@ -5943,9 +5967,9 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
unsigned BitWidth = 0;
if (TD)
- BitWidth = TD->getTypeSizeInBits(Ty);
- else if (isa<IntegerType>(Ty))
- BitWidth = Ty->getPrimitiveSizeInBits();
+ BitWidth = TD->getTypeSizeInBits(Ty->getScalarType());
+ else if (Ty->isIntOrIntVector())
+ BitWidth = Ty->getScalarSizeInBits();
bool isSignBit = false;
@@ -6459,7 +6483,7 @@ Instruction *InstCombiner::FoldICmpDivCst(ICmpInst &ICI, BinaryOperator *DivI,
// of form X/C1=C2. We solve for X by multiplying C1 (DivRHS) and
// C2 (CI). By solving for X we can turn this into a range check
// instead of computing a divide.
- ConstantInt *Prod = Multiply(CmpRHS, DivRHS);
+ Constant *Prod = ConstantExpr::getMul(CmpRHS, DivRHS);
// Determine if the product overflows by seeing if the product is
// not equal to the divide. Make sure we do the same kind of divide
@@ -6478,7 +6502,7 @@ Instruction *InstCombiner::FoldICmpDivCst(ICmpInst &ICI, BinaryOperator *DivI,
// overflow variable is set to 0 if it's corresponding bound variable is valid
// -1 if overflowed off the bottom end, or +1 if overflowed off the top end.
int LoOverflow = 0, HiOverflow = 0;
- ConstantInt *LoBound = 0, *HiBound = 0;
+ Constant *LoBound = 0, *HiBound = 0;
if (!DivIsSigned) { // udiv
// e.g. X/5 op 3 --> [15, 20)
@@ -6966,7 +6990,7 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI,
if (ConstantInt *BOp1C = dyn_cast<ConstantInt>(BO->getOperand(1))) {
if (BO->hasOneUse())
return new ICmpInst(ICI.getPredicate(), BO->getOperand(0),
- Subtract(RHS, BOp1C));
+ ConstantExpr::getSub(RHS, BOp1C));
} else if (RHSV == 0) {
// Replace ((add A, B) != 0) with (A != -B) if A or B is
// efficiently invertible, or if the add has just this one use.
@@ -7133,10 +7157,10 @@ Instruction *InstCombiner::visitICmpInstWithCastAndCast(ICmpInst &ICI) {
if (Res2 == CI) {
// Make sure that sign of the Cmp and the sign of the Cast are the same.
// For example, we might have:
- // %A = sext short %X to uint
- // %B = icmp ugt uint %A, 1330
+ // %A = sext i16 %X to i32
+ // %B = icmp ugt i32 %A, 1330
// It is incorrect to transform this into
- // %B = icmp ugt short %X, 1330
+ // %B = icmp ugt i16 %X, 1330
// because %A may have negative value.
//
// However, we allow this when the compare is EQ/NE, because they are
@@ -7210,18 +7234,16 @@ Instruction *InstCombiner::visitAShr(BinaryOperator &I) {
if (ConstantInt *CSI = dyn_cast<ConstantInt>(Op0))
if (CSI->isAllOnesValue())
return ReplaceInstUsesWith(I, CSI);
-
+
// See if we can turn a signed shr into an unsigned shr.
- if (!isa<VectorType>(I.getType())) {
- if (MaskedValueIsZero(Op0,
- APInt::getSignBit(I.getType()->getPrimitiveSizeInBits())))
- return BinaryOperator::CreateLShr(Op0, I.getOperand(1));
-
- // Arithmetic shifting an all-sign-bit value is a no-op.
- unsigned NumSignBits = ComputeNumSignBits(Op0);
- if (NumSignBits == Op0->getType()->getPrimitiveSizeInBits())
- return ReplaceInstUsesWith(I, Op0);
- }
+ if (MaskedValueIsZero(Op0,
+ APInt::getSignBit(I.getType()->getScalarSizeInBits())))
+ return BinaryOperator::CreateLShr(Op0, I.getOperand(1));
+
+ // Arithmetic shifting an all-sign-bit value is a no-op.
+ unsigned NumSignBits = ComputeNumSignBits(Op0);
+ if (NumSignBits == Op0->getType()->getScalarSizeInBits())
+ return ReplaceInstUsesWith(I, Op0);
return 0;
}
@@ -7250,7 +7272,7 @@ Instruction *InstCombiner::commonShiftTransforms(BinaryOperator &I) {
}
// See if we can fold away this shift.
- if (!isa<VectorType>(I.getType()) && SimplifyDemandedInstructionBits(I))
+ if (SimplifyDemandedInstructionBits(I))
return &I;
// Try to fold constant and into select arguments.
@@ -7271,10 +7293,10 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1,
// See if we can simplify any instructions used by the instruction whose sole
// purpose is to compute bits we don't care about.
- uint32_t TypeBits = Op0->getType()->getPrimitiveSizeInBits();
+ uint32_t TypeBits = Op0->getType()->getScalarSizeInBits();
- // shl uint X, 32 = 0 and shr ubyte Y, 9 = 0, ... just don't eliminate shr
- // of a signed value.
+ // shl i32 X, 32 = 0 and srl i8 Y, 9 = 0, ... just don't eliminate
+ // a signed shift.
//
if (Op1->uge(TypeBits)) {
if (I.getOpcode() != Instruction::AShr)
@@ -7320,8 +7342,8 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1,
// part of the register be zeros. Emulate this by inserting an AND to
// clear the top bits as needed. This 'and' will usually be zapped by
// other xforms later if dead.
- unsigned SrcSize = TrOp->getType()->getPrimitiveSizeInBits();
- unsigned DstSize = TI->getType()->getPrimitiveSizeInBits();
+ unsigned SrcSize = TrOp->getType()->getScalarSizeInBits();
+ unsigned DstSize = TI->getType()->getScalarSizeInBits();
APInt MaskV(APInt::getLowBitsSet(SrcSize, DstSize));
// The mask we constructed says what the trunc would do if occurring
@@ -7729,7 +7751,8 @@ Instruction *InstCombiner::PromoteCastOfAllocation(BitCastInst &CI,
// If the allocation size is constant, form a constant mul expression
Amt = ConstantInt::get(Type::Int32Ty, Scale);
if (isa<ConstantInt>(NumElements))
- Amt = Multiply(cast<ConstantInt>(NumElements), cast<ConstantInt>(Amt));
+ Amt = ConstantExpr::getMul(cast<ConstantInt>(NumElements),
+ cast<ConstantInt>(Amt));
// otherwise multiply the amount and the number of elements
else {
Instruction *Tmp = BinaryOperator::CreateMul(Amt, NumElements, "tmp");
@@ -7788,17 +7811,17 @@ Instruction *InstCombiner::PromoteCastOfAllocation(BitCastInst &CI,
/// If CastOpc is a sext or zext, we are asking if the low bits of the value can
/// bit computed in a larger type, which is then and'd or sext_in_reg'd to get
/// the final result.
-bool InstCombiner::CanEvaluateInDifferentType(Value *V, const IntegerType *Ty,
+bool InstCombiner::CanEvaluateInDifferentType(Value *V, const Type *Ty,
unsigned CastOpc,
int &NumCastsRemoved){
// We can always evaluate constants in another type.
- if (isa<ConstantInt>(V))
+ if (isa<Constant>(V))
return true;
Instruction *I = dyn_cast<Instruction>(V);
if (!I) return false;
- const IntegerType *OrigTy = cast<IntegerType>(V->getType());
+ const Type *OrigTy = V->getType();
// If this is an extension or truncate, we can often eliminate it.
if (isa<TruncInst>(I) || isa<ZExtInst>(I) || isa<SExtInst>(I)) {
@@ -7836,8 +7859,8 @@ bool InstCombiner::CanEvaluateInDifferentType(Value *V, const IntegerType *Ty,
// If we are truncating the result of this SHL, and if it's a shift of a
// constant amount, we can always perform a SHL in a smaller type.
if (ConstantInt *CI = dyn_cast<ConstantInt>(I->getOperand(1))) {
- uint32_t BitWidth = Ty->getBitWidth();
- if (BitWidth < OrigTy->getBitWidth() &&
+ uint32_t BitWidth = Ty->getScalarSizeInBits();
+ if (BitWidth < OrigTy->getScalarSizeInBits() &&
CI->getLimitedValue(BitWidth) < BitWidth)
return CanEvaluateInDifferentType(I->getOperand(0), Ty, CastOpc,
NumCastsRemoved);
@@ -7848,8 +7871,8 @@ bool InstCombiner::CanEvaluateInDifferentType(Value *V, const IntegerType *Ty,
// lshr iff we know that the bits we would otherwise be shifting in are
// already zeros.
if (ConstantInt *CI = dyn_cast<ConstantInt>(I->getOperand(1))) {
- uint32_t OrigBitWidth = OrigTy->getBitWidth();
- uint32_t BitWidth = Ty->getBitWidth();
+ uint32_t OrigBitWidth = OrigTy->getScalarSizeInBits();
+ uint32_t BitWidth = Ty->getScalarSizeInBits();
if (BitWidth < OrigBitWidth &&
MaskedValueIsZero(I->getOperand(0),
APInt::getHighBitsSet(OrigBitWidth, OrigBitWidth-BitWidth)) &&
@@ -8131,8 +8154,8 @@ Instruction *InstCombiner::commonIntCastTransforms(CastInst &CI) {
Value *Src = CI.getOperand(0);
const Type *SrcTy = Src->getType();
const Type *DestTy = CI.getType();
- uint32_t SrcBitSize = SrcTy->getPrimitiveSizeInBits();
- uint32_t DestBitSize = DestTy->getPrimitiveSizeInBits();
+ uint32_t SrcBitSize = SrcTy->getScalarSizeInBits();
+ uint32_t DestBitSize = DestTy->getScalarSizeInBits();
// See if we can simplify any instructions used by the LHS whose sole
// purpose is to compute bits we don't care about.
@@ -8151,8 +8174,9 @@ Instruction *InstCombiner::commonIntCastTransforms(CastInst &CI) {
// Only do this if the dest type is a simple type, don't convert the
// expression tree to something weird like i93 unless the source is also
// strange.
- (isSafeIntegerType(DestTy) || !isSafeIntegerType(SrcI->getType())) &&
- CanEvaluateInDifferentType(SrcI, cast<IntegerType>(DestTy),
+ (isSafeIntegerType(DestTy->getScalarType()) ||
+ !isSafeIntegerType(SrcI->getType()->getScalarType())) &&
+ CanEvaluateInDifferentType(SrcI, DestTy,
CI.getOpcode(), NumCastsRemoved)) {
// If this cast is a truncate, evaluting in a different type always
// eliminates the cast, so it is always a win. If this is a zero-extension,
@@ -8350,17 +8374,18 @@ Instruction *InstCombiner::visitTrunc(TruncInst &CI) {
Value *Src = CI.getOperand(0);
const Type *Ty = CI.getType();
- uint32_t DestBitWidth = Ty->getPrimitiveSizeInBits();
- uint32_t SrcBitWidth = cast<IntegerType>(Src->getType())->getBitWidth();
+ uint32_t DestBitWidth = Ty->getScalarSizeInBits();
+ uint32_t SrcBitWidth = Src->getType()->getScalarSizeInBits();
// Canonicalize trunc x to i1 -> (icmp ne (and x, 1), 0)
- if (DestBitWidth == 1) {
+ if (DestBitWidth == 1 &&
+ isa<VectorType>(Ty) == isa<VectorType>(Src->getType())) {
Constant *One = ConstantInt::get(Src->getType(), 1);
Src = InsertNewInstBefore(BinaryOperator::CreateAnd(Src, One, "tmp"), CI);
Value *Zero = Constant::getNullValue(Src->getType());
return new ICmpInst(ICmpInst::ICMP_NE, Src, Zero);
}
-
+
// Optimize trunc(lshr(), c) to pull the shift through the truncate.
ConstantInt *ShAmtV = 0;
Value *ShiftOp = 0;
@@ -8403,7 +8428,7 @@ Instruction *InstCombiner::transformZExtICmp(ICmpInst *ICI, Instruction &CI,
Value *In = ICI->getOperand(0);
Value *Sh = ConstantInt::get(In->getType(),
- In->getType()->getPrimitiveSizeInBits()-1);
+ In->getType()->getScalarSizeInBits()-1);
In = InsertNewInstBefore(BinaryOperator::CreateLShr(In, Sh,
In->getName()+".lobit"),
CI);
@@ -8494,28 +8519,30 @@ Instruction *InstCombiner::visitZExt(ZExtInst &CI) {
// Get the sizes of the types involved. We know that the intermediate type
// will be smaller than A or C, but don't know the relation between A and C.
Value *A = CSrc->getOperand(0);
- unsigned SrcSize = A->getType()->getPrimitiveSizeInBits();
- unsigned MidSize = CSrc->getType()->getPrimitiveSizeInBits();
- unsigned DstSize = CI.getType()->getPrimitiveSizeInBits();
+ unsigned SrcSize = A->getType()->getScalarSizeInBits();
+ unsigned MidSize = CSrc->getType()->getScalarSizeInBits();
+ unsigned DstSize = CI.getType()->getScalarSizeInBits();
// If we're actually extending zero bits, then if
// SrcSize < DstSize: zext(a & mask)
// SrcSize == DstSize: a & mask
// SrcSize > DstSize: trunc(a) & mask
if (SrcSize < DstSize) {
APInt AndValue(APInt::getLowBitsSet(SrcSize, MidSize));
- Constant *AndConst = ConstantInt::get(AndValue);
+ Constant *AndConst = ConstantInt::get(A->getType(), AndValue);
Instruction *And =
BinaryOperator::CreateAnd(A, AndConst, CSrc->getName()+".mask");
InsertNewInstBefore(And, CI);
return new ZExtInst(And, CI.getType());
} else if (SrcSize == DstSize) {
APInt AndValue(APInt::getLowBitsSet(SrcSize, MidSize));
- return BinaryOperator::CreateAnd(A, ConstantInt::get(AndValue));
+ return BinaryOperator::CreateAnd(A, ConstantInt::get(A->getType(),
+ AndValue));
} else if (SrcSize > DstSize) {
Instruction *Trunc = new TruncInst(A, CI.getType(), "tmp");
InsertNewInstBefore(Trunc, CI);
APInt AndValue(APInt::getLowBitsSet(DstSize, MidSize));
- return BinaryOperator::CreateAnd(Trunc, ConstantInt::get(AndValue));
+ return BinaryOperator::CreateAnd(Trunc, ConstantInt::get(Trunc->getType(),
+ AndValue));
}
}
@@ -8537,6 +8564,33 @@ Instruction *InstCombiner::visitZExt(ZExtInst &CI) {
}
}
+ // zext(trunc(t) & C) -> (t & zext(C)).
+ if (SrcI && SrcI->getOpcode() == Instruction::And && SrcI->hasOneUse())
+ if (ConstantInt *C = dyn_cast<ConstantInt>(SrcI->getOperand(1)))
+ if (TruncInst *TI = dyn_cast<TruncInst>(SrcI->getOperand(0))) {
+ Value *TI0 = TI->getOperand(0);
+ if (TI0->getType() == CI.getType())
+ return
+ BinaryOperator::CreateAnd(TI0,
+ ConstantExpr::getZExt(C, CI.getType()));
+ }
+
+ // zext((trunc(t) & C) ^ C) -> ((t & zext(C)) ^ zext(C)).
+ if (SrcI && SrcI->getOpcode() == Instruction::Xor && SrcI->hasOneUse())
+ if (ConstantInt *C = dyn_cast<ConstantInt>(SrcI->getOperand(1)))
+ if (BinaryOperator *And = dyn_cast<BinaryOperator>(SrcI->getOperand(0)))
+ if (And->getOpcode() == Instruction::And && And->hasOneUse() &&
+ And->getOperand(1) == C)
+ if (TruncInst *TI = dyn_cast<TruncInst>(And->getOperand(0))) {
+ Value *TI0 = TI->getOperand(0);
+ if (TI0->getType() == CI.getType()) {
+ Constant *ZC = ConstantExpr::getZExt(C, CI.getType());
+ Instruction *NewAnd = BinaryOperator::CreateAnd(TI0, ZC, "tmp");
+ InsertNewInstBefore(NewAnd, *And);
+ return BinaryOperator::CreateXor(NewAnd, ZC);
+ }
+ }
+
return 0;
}
@@ -8556,9 +8610,9 @@ Instruction *InstCombiner::visitSExt(SExtInst &CI) {
// eliminate the trunc/sext pair.
if (getOpcode(Src) == Instruction::Trunc) {
Value *Op = cast<User>(Src)->getOperand(0);
- unsigned OpBits = cast<IntegerType>(Op->getType())->getBitWidth();
- unsigned MidBits = cast<IntegerType>(Src->getType())->getBitWidth();
- unsigned DestBits = cast<IntegerType>(CI.getType())->getBitWidth();
+ unsigned OpBits = Op->getType()->getScalarSizeInBits();
+ unsigned MidBits = Src->getType()->getScalarSizeInBits();
+ unsigned DestBits = CI.getType()->getScalarSizeInBits();
unsigned NumSignBits = ComputeNumSignBits(Op);
if (OpBits == DestBits) {
@@ -8599,8 +8653,8 @@ Instruction *InstCombiner::visitSExt(SExtInst &CI) {
BA == CA && isa<TruncInst>(A)) {
Value *I = cast<TruncInst>(A)->getOperand(0);
if (I->getType() == CI.getType()) {
- unsigned MidSize = Src->getType()->getPrimitiveSizeInBits();
- unsigned SrcDstSize = CI.getType()->getPrimitiveSizeInBits();
+ unsigned MidSize = Src->getType()->getScalarSizeInBits();
+ unsigned SrcDstSize = CI.getType()->getScalarSizeInBits();
unsigned ShAmt = CA->getZExtValue()+SrcDstSize-MidSize;
Constant *ShAmtV = ConstantInt::get(CI.getType(), ShAmt);
I = InsertNewInstBefore(BinaryOperator::CreateShl(I, ShAmtV,
@@ -8671,11 +8725,11 @@ Instruction *InstCombiner::visitFPTrunc(FPTruncInst &CI) {
Value *RHSTrunc = LookThroughFPExtensions(OpI->getOperand(1));
if (LHSTrunc->getType() != SrcTy &&
RHSTrunc->getType() != SrcTy) {
- unsigned DstSize = CI.getType()->getPrimitiveSizeInBits();
+ unsigned DstSize = CI.getType()->getScalarSizeInBits();
// If the source types were both smaller than the destination type of
// the cast, do this xform.
- if (LHSTrunc->getType()->getPrimitiveSizeInBits() <= DstSize &&
- RHSTrunc->getType()->getPrimitiveSizeInBits() <= DstSize) {
+ if (LHSTrunc->getType()->getScalarSizeInBits() <= DstSize &&
+ RHSTrunc->getType()->getScalarSizeInBits() <= DstSize) {
LHSTrunc = InsertCastBefore(Instruction::FPExt, LHSTrunc,
CI.getType(), CI);
RHSTrunc = InsertCastBefore(Instruction::FPExt, RHSTrunc,
@@ -8706,7 +8760,7 @@ Instruction *InstCombiner::visitFPToUI(FPToUIInst &FI) {
// 'X' value would cause an undefined result for the fptoui.
if ((isa<UIToFPInst>(OpI) || isa<SIToFPInst>(OpI)) &&
OpI->getOperand(0)->getType() == FI.getType() &&
- (int)FI.getType()->getPrimitiveSizeInBits() < /*extra bit for sign */
+ (int)FI.getType()->getScalarSizeInBits() < /*extra bit for sign */
OpI->getType()->getFPMantissaWidth())
return ReplaceInstUsesWith(FI, OpI->getOperand(0));
@@ -8726,7 +8780,7 @@ Instruction *InstCombiner::visitFPToSI(FPToSIInst &FI) {
// 'X' value would cause an undefined result for the fptoui.
if ((isa<UIToFPInst>(OpI) || isa<SIToFPInst>(OpI)) &&
OpI->getOperand(0)->getType() == FI.getType() &&
- (int)FI.getType()->getPrimitiveSizeInBits() <=
+ (int)FI.getType()->getScalarSizeInBits() <=
OpI->getType()->getFPMantissaWidth())
return ReplaceInstUsesWith(FI, OpI->getOperand(0));
@@ -8747,7 +8801,7 @@ Instruction *InstCombiner::visitPtrToInt(PtrToIntInst &CI) {
// trunc to be exposed to other transforms. Don't do this for extending
// ptrtoint's, because we don't know if the target sign or zero extends its
// pointers.
- if (CI.getType()->getPrimitiveSizeInBits() < TD->getPointerSizeInBits()) {
+ if (CI.getType()->getScalarSizeInBits() < TD->getPointerSizeInBits()) {
Value *P = InsertNewInstBefore(new PtrToIntInst(CI.getOperand(0),
TD->getIntPtrType(),
"tmp"), CI);
@@ -8763,7 +8817,7 @@ Instruction *InstCombiner::visitIntToPtr(IntToPtrInst &CI) {
// allows the trunc to be exposed to other transforms. Don't do this for
// extending inttoptr's, because we don't know if the target sign or zero
// extends to pointers.
- if (CI.getOperand(0)->getType()->getPrimitiveSizeInBits() >
+ if (CI.getOperand(0)->getType()->getScalarSizeInBits() >
TD->getPointerSizeInBits()) {
Value *P = InsertNewInstBefore(new TruncInst(CI.getOperand(0),
TD->getIntPtrType(),
@@ -9194,7 +9248,7 @@ Instruction *InstCombiner::visitSelectInstWithICmp(SelectInst &SI,
(Pred == ICmpInst::ICMP_SGT && Op1CV.isAllOnesValue())) {
Value *In = ICI->getOperand(0);
Value *Sh = ConstantInt::get(In->getType(),
- In->getType()->getPrimitiveSizeInBits()-1);
+ In->getType()->getScalarSizeInBits()-1);
In = InsertNewInstBefore(BinaryOperator::CreateAShr(In, Sh,
In->getName()+".lobit"),
*ICI);
@@ -9316,7 +9370,7 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) {
// The comparison constant and the result are not neccessarily the
// same width. Make an all-ones value by inserting a AShr.
Value *X = IC->getOperand(0);
- uint32_t Bits = X->getType()->getPrimitiveSizeInBits();
+ uint32_t Bits = X->getType()->getScalarSizeInBits();
Constant *ShAmt = ConstantInt::get(X->getType(), Bits-1);
Instruction *SRA = BinaryOperator::Create(Instruction::AShr, X,
ShAmt, "ones");
@@ -10850,8 +10904,8 @@ Instruction *InstCombiner::visitPHINode(PHINode &PN) {
static Value *InsertCastToIntPtrTy(Value *V, const Type *DTy,
Instruction *InsertPoint,
InstCombiner *IC) {
- unsigned PtrSize = DTy->getPrimitiveSizeInBits();
- unsigned VTySize = V->getType()->getPrimitiveSizeInBits();
+ unsigned PtrSize = DTy->getScalarSizeInBits();
+ unsigned VTySize = V->getType()->getScalarSizeInBits();
// We must cast correctly to the pointer type. Ensure that we
// sign extend the integer value if it is smaller as this is
// used for address computation.
@@ -10892,7 +10946,7 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
const Type *SrcTy = CI->getOperand(0)->getType();
// We can eliminate a cast from i32 to i64 iff the target
// is a 32-bit pointer target.
- if (SrcTy->getPrimitiveSizeInBits() >= TD->getPointerSizeInBits()) {
+ if (SrcTy->getScalarSizeInBits() >= TD->getPointerSizeInBits()) {
MadeChange = true;
*i = CI->getOperand(0);
}
@@ -11105,7 +11159,7 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
ConstantInt *Scale = 0;
if (ArrayEltSize == 1) {
NewIdx = GEP.getOperand(1);
- Scale = ConstantInt::get(NewIdx->getType(), 1);
+ Scale = ConstantInt::get(cast<IntegerType>(NewIdx->getType()), 1);
} else if (ConstantInt *CI = dyn_cast<ConstantInt>(GEP.getOperand(1))) {
NewIdx = ConstantInt::get(CI->getType(), 1);
Scale = CI;
@@ -11114,7 +11168,8 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
isa<ConstantInt>(Inst->getOperand(1))) {
ConstantInt *ShAmt = cast<ConstantInt>(Inst->getOperand(1));
uint32_t ShAmtVal = ShAmt->getLimitedValue(64);
- Scale = ConstantInt::get(Inst->getType(), 1ULL << ShAmtVal);
+ Scale = ConstantInt::get(cast<IntegerType>(Inst->getType()),
+ 1ULL << ShAmtVal);
NewIdx = Inst->getOperand(0);
} else if (Inst->getOpcode() == Instruction::Mul &&
isa<ConstantInt>(Inst->getOperand(1))) {
@@ -11390,45 +11445,6 @@ static Instruction *InstCombineLoadCast(InstCombiner &IC, LoadInst &LI,
return 0;
}
-/// isSafeToLoadUnconditionally - Return true if we know that executing a load
-/// from this value cannot trap. If it is not obviously safe to load from the
-/// specified pointer, we do a quick local scan of the basic block containing
-/// ScanFrom, to determine if the address is already accessed.
-static bool isSafeToLoadUnconditionally(Value *V, Instruction *ScanFrom) {
- // If it is an alloca it is always safe to load from.
- if (isa<AllocaInst>(V)) return true;
-
- // If it is a global variable it is mostly safe to load from.
- if (const GlobalValue *GV = dyn_cast<GlobalVariable>(V))
- // Don't try to evaluate aliases. External weak GV can be null.
- return !isa<GlobalAlias>(GV) && !GV->hasExternalWeakLinkage();
-
- // Otherwise, be a little bit agressive by scanning the local block where we
- // want to check to see if the pointer is already being loaded or stored
- // from/to. If so, the previous load or store would have already trapped,
- // so there is no harm doing an extra load (also, CSE will later eliminate
- // the load entirely).
- BasicBlock::iterator BBI = ScanFrom, E = ScanFrom->getParent()->begin();
-
- while (BBI != E) {
- --BBI;
-
- // If we see a free or a call (which might do a free) the pointer could be
- // marked invalid.
- if (isa<FreeInst>(BBI) ||
- (isa<CallInst>(BBI) && !isa<DbgInfoIntrinsic>(BBI)))
- return false;
-
- if (LoadInst *LI = dyn_cast<LoadInst>(BBI)) {
- if (LI->getOperand(0) == V) return true;
- } else if (StoreInst *SI = dyn_cast<StoreInst>(BBI)) {
- if (SI->getOperand(1) == V) return true;
- }
-
- }
- return false;
-}
-
Instruction *InstCombiner::visitLoadInst(LoadInst &LI) {
Value *Op = LI.getOperand(0);
diff --git a/lib/Transforms/Scalar/JumpThreading.cpp b/lib/Transforms/Scalar/JumpThreading.cpp
index c0ca2df1ce11..5a70fc3bc6f7 100644
--- a/lib/Transforms/Scalar/JumpThreading.cpp
+++ b/lib/Transforms/Scalar/JumpThreading.cpp
@@ -76,7 +76,7 @@ namespace {
bool ProcessBlock(BasicBlock *BB);
bool ThreadEdge(BasicBlock *BB, BasicBlock *PredBB, BasicBlock *SuccBB,
unsigned JumpThreadCost);
- BasicBlock *FactorCommonPHIPreds(PHINode *PN, Constant *CstVal);
+ BasicBlock *FactorCommonPHIPreds(PHINode *PN, Value *Val);
bool ProcessBranchOnDuplicateCond(BasicBlock *PredBB, BasicBlock *DestBB);
bool ProcessSwitchOnDuplicateCond(BasicBlock *PredBB, BasicBlock *DestBB);
@@ -163,10 +163,10 @@ void JumpThreading::FindLoopHeaders(Function &F) {
/// This is important for things like "phi i1 [true, true, false, true, x]"
/// where we only need to clone the block for the true blocks once.
///
-BasicBlock *JumpThreading::FactorCommonPHIPreds(PHINode *PN, Constant *CstVal) {
+BasicBlock *JumpThreading::FactorCommonPHIPreds(PHINode *PN, Value *Val) {
SmallVector<BasicBlock*, 16> CommonPreds;
for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
- if (PN->getIncomingValue(i) == CstVal)
+ if (PN->getIncomingValue(i) == Val)
CommonPreds.push_back(PN->getIncomingBlock(i));
if (CommonPreds.size() == 1)
@@ -324,10 +324,6 @@ bool JumpThreading::ProcessBlock(BasicBlock *BB) {
}
}
- // If there is only a single predecessor of this block, nothing to fold.
- if (BB->getSinglePredecessor())
- return false;
-
// All the rest of our checks depend on the condition being an instruction.
if (CondInst == 0)
return false;
@@ -346,13 +342,36 @@ bool JumpThreading::ProcessBlock(BasicBlock *BB) {
CondInst->getOpcode() == Instruction::And))
return true;
- // If we have "br (phi != 42)" and the phi node has any constant values as
- // operands, we can thread through this block.
- if (CmpInst *CondCmp = dyn_cast<CmpInst>(CondInst))
- if (isa<PHINode>(CondCmp->getOperand(0)) &&
- isa<Constant>(CondCmp->getOperand(1)) &&
- ProcessBranchOnCompare(CondCmp, BB))
- return true;
+ if (CmpInst *CondCmp = dyn_cast<CmpInst>(CondInst)) {
+ if (isa<PHINode>(CondCmp->getOperand(0))) {
+ // If we have "br (phi != 42)" and the phi node has any constant values
+ // as operands, we can thread through this block.
+ //
+ // If we have "br (cmp phi, x)" and the phi node contains x such that the
+ // comparison uniquely identifies the branch target, we can thread
+ // through this block.
+
+ if (ProcessBranchOnCompare(CondCmp, BB))
+ return true;
+ }
+
+ // If we have a comparison, loop over the predecessors to see if there is
+ // a condition with the same value.
+ pred_iterator PI = pred_begin(BB), E = pred_end(BB);
+ for (; PI != E; ++PI)
+ if (BranchInst *PBI = dyn_cast<BranchInst>((*PI)->getTerminator()))
+ if (PBI->isConditional() && *PI != BB) {
+ if (CmpInst *CI = dyn_cast<CmpInst>(PBI->getCondition())) {
+ if (CI->getOperand(0) == CondCmp->getOperand(0) &&
+ CI->getOperand(1) == CondCmp->getOperand(1) &&
+ CI->getPredicate() == CondCmp->getPredicate()) {
+ // TODO: Could handle things like (x != 4) --> (x == 17)
+ if (ProcessBranchOnDuplicateCond(*PI, BB))
+ return true;
+ }
+ }
+ }
+ }
// Check for some cases that are worth simplifying. Right now we want to look
// for loads that are used by a switch or by the condition for the branch. If
@@ -770,12 +789,30 @@ bool JumpThreading::ProcessBranchOnLogical(Value *V, BasicBlock *BB,
return ThreadEdge(BB, PredBB, SuccBB, JumpThreadCost);
}
+/// GetResultOfComparison - Given an icmp/fcmp predicate and the left and right
+/// hand sides of the compare instruction, try to determine the result. If the
+/// result can not be determined, a null pointer is returned.
+static Constant *GetResultOfComparison(CmpInst::Predicate pred,
+ Value *LHS, Value *RHS) {
+ if (Constant *CLHS = dyn_cast<Constant>(LHS))
+ if (Constant *CRHS = dyn_cast<Constant>(RHS))
+ return ConstantExpr::getCompare(pred, CLHS, CRHS);
+
+ if (LHS == RHS)
+ if (isa<IntegerType>(LHS->getType()) || isa<PointerType>(LHS->getType()))
+ return ICmpInst::isTrueWhenEqual(pred) ?
+ ConstantInt::getTrue() : ConstantInt::getFalse();
+
+ return 0;
+}
+
/// ProcessBranchOnCompare - We found a branch on a comparison between a phi
-/// node and a constant. If the PHI node contains any constants as inputs, we
-/// can fold the compare for that edge and thread through it.
+/// node and a value. If we can identify when the comparison is true between
+/// the phi inputs and the value, we can fold the compare for that edge and
+/// thread through it.
bool JumpThreading::ProcessBranchOnCompare(CmpInst *Cmp, BasicBlock *BB) {
PHINode *PN = cast<PHINode>(Cmp->getOperand(0));
- Constant *RHS = cast<Constant>(Cmp->getOperand(1));
+ Value *RHS = Cmp->getOperand(1);
// If the phi isn't in the current block, an incoming edge to this block
// doesn't control the destination.
@@ -784,18 +821,17 @@ bool JumpThreading::ProcessBranchOnCompare(CmpInst *Cmp, BasicBlock *BB) {
// We can do this simplification if any comparisons fold to true or false.
// See if any do.
- Constant *PredCst = 0;
+ Value *PredVal = 0;
bool TrueDirection = false;
for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
- PredCst = dyn_cast<Constant>(PN->getIncomingValue(i));
- if (PredCst == 0) continue;
+ PredVal = PN->getIncomingValue(i);
+
+ Constant *Res = GetResultOfComparison(Cmp->getPredicate(), PredVal, RHS);
+ if (!Res) {
+ PredVal = 0;
+ continue;
+ }
- Constant *Res;
- if (ICmpInst *ICI = dyn_cast<ICmpInst>(Cmp))
- Res = ConstantExpr::getICmp(ICI->getPredicate(), PredCst, RHS);
- else
- Res = ConstantExpr::getFCmp(cast<FCmpInst>(Cmp)->getPredicate(),
- PredCst, RHS);
// If this folded to a constant expr, we can't do anything.
if (ConstantInt *ResC = dyn_cast<ConstantInt>(Res)) {
TrueDirection = ResC->getZExtValue();
@@ -808,11 +844,11 @@ bool JumpThreading::ProcessBranchOnCompare(CmpInst *Cmp, BasicBlock *BB) {
}
// Otherwise, we can't fold this input.
- PredCst = 0;
+ PredVal = 0;
}
// If no match, bail out.
- if (PredCst == 0)
+ if (PredVal == 0)
return false;
// See if the cost of duplicating this block is low enough.
@@ -825,7 +861,7 @@ bool JumpThreading::ProcessBranchOnCompare(CmpInst *Cmp, BasicBlock *BB) {
// If so, we can actually do this threading. Merge any common predecessors
// that will act the same.
- BasicBlock *PredBB = FactorCommonPHIPreds(PN, PredCst);
+ BasicBlock *PredBB = FactorCommonPHIPreds(PN, PredVal);
// Next, get our successor.
BasicBlock *SuccBB = BB->getTerminator()->getSuccessor(!TrueDirection);
diff --git a/lib/Transforms/Scalar/LoopIndexSplit.cpp b/lib/Transforms/Scalar/LoopIndexSplit.cpp
index 9c785968e1d4..6f7a7f866a8e 100644
--- a/lib/Transforms/Scalar/LoopIndexSplit.cpp
+++ b/lib/Transforms/Scalar/LoopIndexSplit.cpp
@@ -290,13 +290,13 @@ static bool isUsedOutsideLoop(Value *V, Loop *L) {
// Return V+1
static Value *getPlusOne(Value *V, bool Sign, Instruction *InsertPt) {
- ConstantInt *One = ConstantInt::get(V->getType(), 1, Sign);
+ Constant *One = ConstantInt::get(V->getType(), 1, Sign);
return BinaryOperator::CreateAdd(V, One, "lsp", InsertPt);
}
// Return V-1
static Value *getMinusOne(Value *V, bool Sign, Instruction *InsertPt) {
- ConstantInt *One = ConstantInt::get(V->getType(), 1, Sign);
+ Constant *One = ConstantInt::get(V->getType(), 1, Sign);
return BinaryOperator::CreateSub(V, One, "lsp", InsertPt);
}
diff --git a/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/lib/Transforms/Scalar/LoopStrengthReduce.cpp
index 944f40931910..7579748bbc0a 100644
--- a/lib/Transforms/Scalar/LoopStrengthReduce.cpp
+++ b/lib/Transforms/Scalar/LoopStrengthReduce.cpp
@@ -143,10 +143,10 @@ namespace {
/// inside the loop then try to eliminate the cast opeation.
void OptimizeShadowIV(Loop *L);
- /// OptimizeSMax - Rewrite the loop's terminating condition
- /// if it uses an smax computation.
- ICmpInst *OptimizeSMax(Loop *L, ICmpInst *Cond,
- IVStrideUse* &CondUse);
+ /// OptimizeMax - Rewrite the loop's terminating condition
+ /// if it uses a max computation.
+ ICmpInst *OptimizeMax(Loop *L, ICmpInst *Cond,
+ IVStrideUse* &CondUse);
bool FindIVUserForCond(ICmpInst *Cond, IVStrideUse *&CondUse,
const SCEVHandle *&CondStride);
@@ -336,13 +336,6 @@ namespace {
/// EmittedBase.
Value *OperandValToReplace;
- /// isSigned - The stride (and thus also the Base) of this use may be in
- /// a narrower type than the use itself (OperandValToReplace->getType()).
- /// When this is the case, the isSigned field indicates whether the
- /// IV expression should be signed-extended instead of zero-extended to
- /// fit the type of the use.
- bool isSigned;
-
/// Imm - The immediate value that should be added to the base immediately
/// before Inst, because it will be folded into the imm field of the
/// instruction. This is also sometimes used for loop-variant values that
@@ -363,7 +356,6 @@ namespace {
BasedUser(IVStrideUse &IVSU, ScalarEvolution *se)
: SE(se), Base(IVSU.getOffset()), Inst(IVSU.getUser()),
OperandValToReplace(IVSU.getOperandValToReplace()),
- isSigned(IVSU.isSigned()),
Imm(SE->getIntegerSCEV(0, Base->getType())),
isUseOfPostIncrementedValue(IVSU.isUseOfPostIncrementedValue()) {}
@@ -428,11 +420,6 @@ Value *BasedUser::InsertCodeForBaseAtPosition(const SCEVHandle &NewBase,
NewValSCEV = SE->getAddExpr(NewValSCEV, Imm);
}
- if (isSigned)
- NewValSCEV = SE->getTruncateOrSignExtend(NewValSCEV, Ty);
- else
- NewValSCEV = SE->getTruncateOrZeroExtend(NewValSCEV, Ty);
-
return Rewriter.expandCodeFor(NewValSCEV, Ty, IP);
}
@@ -592,7 +579,7 @@ static void MoveLoopVariantsToImmediateField(SCEVHandle &Val, SCEVHandle &Imm,
if (Val->isLoopInvariant(L)) return; // Nothing to do.
if (const SCEVAddExpr *SAE = dyn_cast<SCEVAddExpr>(Val)) {
- std::vector<SCEVHandle> NewOps;
+ SmallVector<SCEVHandle, 4> NewOps;
NewOps.reserve(SAE->getNumOperands());
for (unsigned i = 0; i != SAE->getNumOperands(); ++i)
@@ -613,7 +600,7 @@ static void MoveLoopVariantsToImmediateField(SCEVHandle &Val, SCEVHandle &Imm,
SCEVHandle Start = SARE->getStart();
MoveLoopVariantsToImmediateField(Start, Imm, L, SE);
- std::vector<SCEVHandle> Ops(SARE->op_begin(), SARE->op_end());
+ SmallVector<SCEVHandle, 4> Ops(SARE->op_begin(), SARE->op_end());
Ops[0] = Start;
Val = SE->getAddRecExpr(Ops, SARE->getLoop());
} else {
@@ -633,7 +620,7 @@ static void MoveImmediateValues(const TargetLowering *TLI,
bool isAddress, Loop *L,
ScalarEvolution *SE) {
if (const SCEVAddExpr *SAE = dyn_cast<SCEVAddExpr>(Val)) {
- std::vector<SCEVHandle> NewOps;
+ SmallVector<SCEVHandle, 4> NewOps;
NewOps.reserve(SAE->getNumOperands());
for (unsigned i = 0; i != SAE->getNumOperands(); ++i) {
@@ -660,7 +647,7 @@ static void MoveImmediateValues(const TargetLowering *TLI,
MoveImmediateValues(TLI, AccessTy, Start, Imm, isAddress, L, SE);
if (Start != SARE->getStart()) {
- std::vector<SCEVHandle> Ops(SARE->op_begin(), SARE->op_end());
+ SmallVector<SCEVHandle, 4> Ops(SARE->op_begin(), SARE->op_end());
Ops[0] = Start;
Val = SE->getAddRecExpr(Ops, SARE->getLoop());
}
@@ -717,7 +704,7 @@ static void MoveImmediateValues(const TargetLowering *TLI,
/// SeparateSubExprs - Decompose Expr into all of the subexpressions that are
/// added together. This is used to reassociate common addition subexprs
/// together for maximal sharing when rewriting bases.
-static void SeparateSubExprs(std::vector<SCEVHandle> &SubExprs,
+static void SeparateSubExprs(SmallVector<SCEVHandle, 16> &SubExprs,
SCEVHandle Expr,
ScalarEvolution *SE) {
if (const SCEVAddExpr *AE = dyn_cast<SCEVAddExpr>(Expr)) {
@@ -729,7 +716,7 @@ static void SeparateSubExprs(std::vector<SCEVHandle> &SubExprs,
SubExprs.push_back(Expr);
} else {
// Compute the addrec with zero as its base.
- std::vector<SCEVHandle> Ops(SARE->op_begin(), SARE->op_end());
+ SmallVector<SCEVHandle, 4> Ops(SARE->op_begin(), SARE->op_end());
Ops[0] = Zero; // Start with zero base.
SubExprs.push_back(SE->getAddRecExpr(Ops, SARE->getLoop()));
@@ -783,9 +770,9 @@ RemoveCommonExpressionsFromUseBases(std::vector<BasedUser> &Uses,
// UniqueSubExprs - Keep track of all of the subexpressions we see in the
// order we see them.
- std::vector<SCEVHandle> UniqueSubExprs;
+ SmallVector<SCEVHandle, 16> UniqueSubExprs;
- std::vector<SCEVHandle> SubExprs;
+ SmallVector<SCEVHandle, 16> SubExprs;
unsigned NumUsesInsideLoop = 0;
for (unsigned i = 0; i != NumUses; ++i) {
// If the user is outside the loop, just ignore it for base computation.
@@ -1129,11 +1116,11 @@ static bool isNonConstantNegative(const SCEVHandle &Expr) {
return SC->getValue()->getValue().isNegative();
}
-// CollectIVUsers - Transform our list of users and offsets to a bit more
-// complex table. In this new vector, each 'BasedUser' contains 'Base', the base
-// of the strided accesses, as well as the old information from Uses. We
-// progressively move information from the Base field to the Imm field, until
-// we eventually have the full access expression to rewrite the use.
+/// CollectIVUsers - Transform our list of users and offsets to a bit more
+/// complex table. In this new vector, each 'BasedUser' contains 'Base', the base
+/// of the strided accesses, as well as the old information from Uses. We
+/// progressively move information from the Base field to the Imm field, until
+/// we eventually have the full access expression to rewrite the use.
SCEVHandle LoopStrengthReduce::CollectIVUsers(const SCEVHandle &Stride,
IVUsersOfOneStride &Uses,
Loop *L,
@@ -2008,15 +1995,15 @@ ICmpInst *LoopStrengthReduce::ChangeCompareStride(Loop *L, ICmpInst *Cond,
if (!isa<PointerType>(NewCmpTy))
NewCmpRHS = ConstantInt::get(NewCmpTy, NewCmpVal);
else {
- ConstantInt *CI = ConstantInt::get(NewCmpIntTy, NewCmpVal);
+ Constant *CI = ConstantInt::get(NewCmpIntTy, NewCmpVal);
NewCmpRHS = ConstantExpr::getIntToPtr(CI, NewCmpTy);
}
NewOffset = TyBits == NewTyBits
? SE->getMulExpr(CondUse->getOffset(),
- SE->getConstant(ConstantInt::get(CmpTy, Scale)))
- : SE->getConstant(ConstantInt::get(NewCmpIntTy,
+ SE->getConstant(CmpTy, Scale))
+ : SE->getConstant(NewCmpIntTy,
cast<SCEVConstant>(CondUse->getOffset())->getValue()
- ->getSExtValue()*Scale));
+ ->getSExtValue()*Scale);
break;
}
}
@@ -2047,7 +2034,7 @@ ICmpInst *LoopStrengthReduce::ChangeCompareStride(Loop *L, ICmpInst *Cond,
OldCond->replaceAllUsesWith(Cond);
OldCond->eraseFromParent();
- IU->IVUsesByStride[*NewStride]->addUser(NewOffset, Cond, NewCmpLHS, false);
+ IU->IVUsesByStride[*NewStride]->addUser(NewOffset, Cond, NewCmpLHS);
CondUse = &IU->IVUsesByStride[*NewStride]->Users.back();
CondStride = NewStride;
++NumEliminated;
@@ -2057,8 +2044,8 @@ ICmpInst *LoopStrengthReduce::ChangeCompareStride(Loop *L, ICmpInst *Cond,
return Cond;
}
-/// OptimizeSMax - Rewrite the loop's terminating condition if it uses
-/// an smax computation.
+/// OptimizeMax - Rewrite the loop's terminating condition if it uses
+/// a max computation.
///
/// This is a narrow solution to a specific, but acute, problem. For loops
/// like this:
@@ -2068,10 +2055,10 @@ ICmpInst *LoopStrengthReduce::ChangeCompareStride(Loop *L, ICmpInst *Cond,
/// p[i] = 0.0;
/// } while (++i < n);
///
-/// where the comparison is signed, the trip count isn't just 'n', because
-/// 'n' could be negative. And unfortunately this can come up even for loops
-/// where the user didn't use a C do-while loop. For example, seemingly
-/// well-behaved top-test loops will commonly be lowered like this:
+/// the trip count isn't just 'n', because 'n' might not be positive. And
+/// unfortunately this can come up even for loops where the user didn't use
+/// a C do-while loop. For example, seemingly well-behaved top-test loops
+/// will commonly be lowered like this:
//
/// if (n > 0) {
/// i = 0;
@@ -2084,14 +2071,14 @@ ICmpInst *LoopStrengthReduce::ChangeCompareStride(Loop *L, ICmpInst *Cond,
/// test in such a way that indvars can't find it.
///
/// When indvars can't find the if test in loops like this, it creates a
-/// signed-max expression, which allows it to give the loop a canonical
+/// max expression, which allows it to give the loop a canonical
/// induction variable:
///
/// i = 0;
-/// smax = n < 1 ? 1 : n;
+/// max = n < 1 ? 1 : n;
/// do {
/// p[i] = 0.0;
-/// } while (++i != smax);
+/// } while (++i != max);
///
/// Canonical induction variables are necessary because the loop passes
/// are designed around them. The most obvious example of this is the
@@ -2107,8 +2094,8 @@ ICmpInst *LoopStrengthReduce::ChangeCompareStride(Loop *L, ICmpInst *Cond,
/// rewriting their conditions from ICMP_NE back to ICMP_SLT, and deleting
/// the instructions for the maximum computation.
///
-ICmpInst *LoopStrengthReduce::OptimizeSMax(Loop *L, ICmpInst *Cond,
- IVStrideUse* &CondUse) {
+ICmpInst *LoopStrengthReduce::OptimizeMax(Loop *L, ICmpInst *Cond,
+ IVStrideUse* &CondUse) {
// Check that the loop matches the pattern we're looking for.
if (Cond->getPredicate() != CmpInst::ICMP_EQ &&
Cond->getPredicate() != CmpInst::ICMP_NE)
@@ -2126,12 +2113,19 @@ ICmpInst *LoopStrengthReduce::OptimizeSMax(Loop *L, ICmpInst *Cond,
SCEVHandle IterationCount = SE->getAddExpr(BackedgeTakenCount, One);
// Check for a max calculation that matches the pattern.
- const SCEVSMaxExpr *SMax = dyn_cast<SCEVSMaxExpr>(IterationCount);
- if (!SMax || SMax != SE->getSCEV(Sel)) return Cond;
+ if (!isa<SCEVSMaxExpr>(IterationCount) && !isa<SCEVUMaxExpr>(IterationCount))
+ return Cond;
+ const SCEVNAryExpr *Max = cast<SCEVNAryExpr>(IterationCount);
+ if (Max != SE->getSCEV(Sel)) return Cond;
+
+ // To handle a max with more than two operands, this optimization would
+ // require additional checking and setup.
+ if (Max->getNumOperands() != 2)
+ return Cond;
- SCEVHandle SMaxLHS = SMax->getOperand(0);
- SCEVHandle SMaxRHS = SMax->getOperand(1);
- if (!SMaxLHS || SMaxLHS != One) return Cond;
+ SCEVHandle MaxLHS = Max->getOperand(0);
+ SCEVHandle MaxRHS = Max->getOperand(1);
+ if (!MaxLHS || MaxLHS != One) return Cond;
// Check the relevant induction variable for conformance to
// the pattern.
@@ -2148,19 +2142,23 @@ ICmpInst *LoopStrengthReduce::OptimizeSMax(Loop *L, ICmpInst *Cond,
// Check the right operand of the select, and remember it, as it will
// be used in the new comparison instruction.
Value *NewRHS = 0;
- if (SE->getSCEV(Sel->getOperand(1)) == SMaxRHS)
+ if (SE->getSCEV(Sel->getOperand(1)) == MaxRHS)
NewRHS = Sel->getOperand(1);
- else if (SE->getSCEV(Sel->getOperand(2)) == SMaxRHS)
+ else if (SE->getSCEV(Sel->getOperand(2)) == MaxRHS)
NewRHS = Sel->getOperand(2);
if (!NewRHS) return Cond;
+ // Determine the new comparison opcode. It may be signed or unsigned,
+ // and the original comparison may be either equality or inequality.
+ CmpInst::Predicate Pred =
+ isa<SCEVSMaxExpr>(Max) ? CmpInst::ICMP_SLT : CmpInst::ICMP_ULT;
+ if (Cond->getPredicate() == CmpInst::ICMP_EQ)
+ Pred = CmpInst::getInversePredicate(Pred);
+
// Ok, everything looks ok to change the condition into an SLT or SGE and
// delete the max calculation.
ICmpInst *NewCond =
- new ICmpInst(Cond->getPredicate() == CmpInst::ICMP_NE ?
- CmpInst::ICMP_SLT :
- CmpInst::ICMP_SGE,
- Cond->getOperand(0), NewRHS, "scmp", Cond);
+ new ICmpInst(Pred, Cond->getOperand(0), NewRHS, "scmp", Cond);
// Delete the max calculation instructions.
Cond->replaceAllUsesWith(NewCond);
@@ -2242,7 +2240,7 @@ void LoopStrengthReduce::OptimizeShadowIV(Loop *L) {
ConstantInt *Init = dyn_cast<ConstantInt>(PH->getIncomingValue(Entry));
if (!Init) continue;
- ConstantFP *NewInit = ConstantFP::get(DestTy, Init->getZExtValue());
+ Constant *NewInit = ConstantFP::get(DestTy, Init->getZExtValue());
BinaryOperator *Incr =
dyn_cast<BinaryOperator>(PH->getIncomingValue(Latch));
@@ -2266,7 +2264,7 @@ void LoopStrengthReduce::OptimizeShadowIV(Loop *L) {
PHINode *NewPH = PHINode::Create(DestTy, "IV.S.", PH);
/* create new increment. '++d' in above example. */
- ConstantFP *CFP = ConstantFP::get(DestTy, C->getZExtValue());
+ Constant *CFP = ConstantFP::get(DestTy, C->getZExtValue());
BinaryOperator *NewIncr =
BinaryOperator::Create(Incr->getOpcode() == Instruction::Add ?
Instruction::FAdd : Instruction::FSub,
@@ -2284,9 +2282,9 @@ void LoopStrengthReduce::OptimizeShadowIV(Loop *L) {
}
}
-// OptimizeIndvars - Now that IVUsesByStride is set up with all of the indvar
-// uses in the loop, look to see if we can eliminate some, in favor of using
-// common indvars for the different uses.
+/// OptimizeIndvars - Now that IVUsesByStride is set up with all of the indvar
+/// uses in the loop, look to see if we can eliminate some, in favor of using
+/// common indvars for the different uses.
void LoopStrengthReduce::OptimizeIndvars(Loop *L) {
// TODO: implement optzns here.
@@ -2301,11 +2299,11 @@ void LoopStrengthReduce::OptimizeLoopTermCond(Loop *L) {
// induction variable, to allow coalescing the live ranges for the IV into
// one register value.
BasicBlock *LatchBlock = L->getLoopLatch();
- BasicBlock *ExitBlock = L->getExitingBlock();
- if (!ExitBlock)
+ BasicBlock *ExitingBlock = L->getExitingBlock();
+ if (!ExitingBlock)
// Multiple exits, just look at the exit in the latch block if there is one.
- ExitBlock = LatchBlock;
- BranchInst *TermBr = dyn_cast<BranchInst>(ExitBlock->getTerminator());
+ ExitingBlock = LatchBlock;
+ BranchInst *TermBr = dyn_cast<BranchInst>(ExitingBlock->getTerminator());
if (!TermBr)
return;
if (TermBr->isUnconditional() || !isa<ICmpInst>(TermBr->getCondition()))
@@ -2318,7 +2316,7 @@ void LoopStrengthReduce::OptimizeLoopTermCond(Loop *L) {
if (!FindIVUserForCond(Cond, CondUse, CondStride))
return; // setcc doesn't use the IV.
- if (ExitBlock != LatchBlock) {
+ if (ExitingBlock != LatchBlock) {
if (!Cond->hasOneUse())
// See below, we don't want the condition to be cloned.
return;
@@ -2373,14 +2371,14 @@ void LoopStrengthReduce::OptimizeLoopTermCond(Loop *L) {
StrideNoReuse.insert(*CondStride);
}
- // If the trip count is computed in terms of an smax (due to ScalarEvolution
+ // If the trip count is computed in terms of a max (due to ScalarEvolution
// being unable to find a sufficient guard, for example), change the loop
- // comparison to use SLT instead of NE.
- Cond = OptimizeSMax(L, Cond, CondUse);
+ // comparison to use SLT or ULT instead of NE.
+ Cond = OptimizeMax(L, Cond, CondUse);
// If possible, change stride and operands of the compare instruction to
// eliminate one stride.
- if (ExitBlock == LatchBlock)
+ if (ExitingBlock == LatchBlock)
Cond = ChangeCompareStride(L, Cond, CondUse, CondStride);
// It's possible for the setcc instruction to be anywhere in the loop, and
@@ -2397,8 +2395,7 @@ void LoopStrengthReduce::OptimizeLoopTermCond(Loop *L) {
// Clone the IVUse, as the old use still exists!
IU->IVUsesByStride[*CondStride]->addUser(CondUse->getOffset(), Cond,
- CondUse->getOperandValToReplace(),
- false);
+ CondUse->getOperandValToReplace());
CondUse = &IU->IVUsesByStride[*CondStride]->Users.back();
}
}
@@ -2413,9 +2410,9 @@ void LoopStrengthReduce::OptimizeLoopTermCond(Loop *L) {
++NumLoopCond;
}
-// OptimizeLoopCountIV - If, after all sharing of IVs, the IV used for deciding
-// when to exit the loop is used only for that purpose, try to rearrange things
-// so it counts down to a test against zero.
+/// OptimizeLoopCountIV - If, after all sharing of IVs, the IV used for deciding
+/// when to exit the loop is used only for that purpose, try to rearrange things
+/// so it counts down to a test against zero.
void LoopStrengthReduce::OptimizeLoopCountIV(Loop *L) {
// If the number of times the loop is executed isn't computable, give up.
@@ -2506,7 +2503,7 @@ void LoopStrengthReduce::OptimizeLoopCountIV(Loop *L) {
Value *startVal = phi->getIncomingValue(inBlock);
Value *endVal = Cond->getOperand(1);
// FIXME check for case where both are constant
- ConstantInt* Zero = ConstantInt::get(Cond->getOperand(1)->getType(), 0);
+ Constant* Zero = ConstantInt::get(Cond->getOperand(1)->getType(), 0);
BinaryOperator *NewStartVal =
BinaryOperator::Create(Instruction::Sub, endVal, startVal,
"tmp", PreInsertPt);
diff --git a/lib/Transforms/Scalar/ScalarReplAggregates.cpp b/lib/Transforms/Scalar/ScalarReplAggregates.cpp
index 7143c7be485e..d89790c29217 100644
--- a/lib/Transforms/Scalar/ScalarReplAggregates.cpp
+++ b/lib/Transforms/Scalar/ScalarReplAggregates.cpp
@@ -820,10 +820,8 @@ void SROA::RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *BCInst,
StoreVal = Constant::getNullValue(EltTy); // 0.0, null, 0, <0,0>
} else {
// If EltTy is a vector type, get the element type.
- const Type *ValTy = EltTy;
- if (const VectorType *VTy = dyn_cast<VectorType>(ValTy))
- ValTy = VTy->getElementType();
-
+ const Type *ValTy = EltTy->getScalarType();
+
// Construct an integer with the right value.
unsigned EltSize = TD->getTypeSizeInBits(ValTy);
APInt OneVal(EltSize, CI->getZExtValue());
diff --git a/lib/Transforms/Scalar/SimplifyLibCalls.cpp b/lib/Transforms/Scalar/SimplifyLibCalls.cpp
index 59989c92d740..bbcb79255eef 100644
--- a/lib/Transforms/Scalar/SimplifyLibCalls.cpp
+++ b/lib/Transforms/Scalar/SimplifyLibCalls.cpp
@@ -135,7 +135,11 @@ Value *LibCallOptimization::EmitStrLen(Value *Ptr, IRBuilder<> &B) {
TD->getIntPtrType(),
PointerType::getUnqual(Type::Int8Ty),
NULL);
- return B.CreateCall(StrLen, CastToCStr(Ptr, B), "strlen");
+ CallInst *CI = B.CreateCall(StrLen, CastToCStr(Ptr, B), "strlen");
+ if (const Function *F = dyn_cast<Function>(StrLen->stripPointerCasts()))
+ CI->setCallingConv(F->getCallingConv());
+
+ return CI;
}
/// EmitMemCpy - Emit a call to the memcpy function to the builder. This always
@@ -164,7 +168,12 @@ Value *LibCallOptimization::EmitMemChr(Value *Ptr, Value *Val,
PointerType::getUnqual(Type::Int8Ty),
Type::Int32Ty, TD->getIntPtrType(),
NULL);
- return B.CreateCall3(MemChr, CastToCStr(Ptr, B), Val, Len, "memchr");
+ CallInst *CI = B.CreateCall3(MemChr, CastToCStr(Ptr, B), Val, Len, "memchr");
+
+ if (const Function *F = dyn_cast<Function>(MemChr->stripPointerCasts()))
+ CI->setCallingConv(F->getCallingConv());
+
+ return CI;
}
/// EmitMemCmp - Emit a call to the memcmp function.
@@ -182,8 +191,13 @@ Value *LibCallOptimization::EmitMemCmp(Value *Ptr1, Value *Ptr2,
PointerType::getUnqual(Type::Int8Ty),
PointerType::getUnqual(Type::Int8Ty),
TD->getIntPtrType(), NULL);
- return B.CreateCall3(MemCmp, CastToCStr(Ptr1, B), CastToCStr(Ptr2, B),
- Len, "memcmp");
+ CallInst *CI = B.CreateCall3(MemCmp, CastToCStr(Ptr1, B), CastToCStr(Ptr2, B),
+ Len, "memcmp");
+
+ if (const Function *F = dyn_cast<Function>(MemCmp->stripPointerCasts()))
+ CI->setCallingConv(F->getCallingConv());
+
+ return CI;
}
/// EmitMemSet - Emit a call to the memset function
@@ -217,20 +231,30 @@ Value *LibCallOptimization::EmitUnaryFloatFnCall(Value *Op, const char *Name,
NameBuffer[NameLen+1] = 0;
Name = NameBuffer;
}
-
+
Module *M = Caller->getParent();
- Value *Callee = M->getOrInsertFunction(Name, Op->getType(),
+ Value *Callee = M->getOrInsertFunction(Name, Op->getType(),
Op->getType(), NULL);
- return B.CreateCall(Callee, Op, Name);
+ CallInst *CI = B.CreateCall(Callee, Op, Name);
+
+ if (const Function *F = dyn_cast<Function>(Callee->stripPointerCasts()))
+ CI->setCallingConv(F->getCallingConv());
+
+ return CI;
}
/// EmitPutChar - Emit a call to the putchar function. This assumes that Char
/// is an integer.
void LibCallOptimization::EmitPutChar(Value *Char, IRBuilder<> &B) {
Module *M = Caller->getParent();
- Value *F = M->getOrInsertFunction("putchar", Type::Int32Ty,
- Type::Int32Ty, NULL);
- B.CreateCall(F, B.CreateIntCast(Char, Type::Int32Ty, "chari"), "putchar");
+ Value *PutChar = M->getOrInsertFunction("putchar", Type::Int32Ty,
+ Type::Int32Ty, NULL);
+ CallInst *CI = B.CreateCall(PutChar,
+ B.CreateIntCast(Char, Type::Int32Ty, "chari"),
+ "putchar");
+
+ if (const Function *F = dyn_cast<Function>(PutChar->stripPointerCasts()))
+ CI->setCallingConv(F->getCallingConv());
}
/// EmitPutS - Emit a call to the puts function. This assumes that Str is
@@ -241,10 +265,14 @@ void LibCallOptimization::EmitPutS(Value *Str, IRBuilder<> &B) {
AWI[0] = AttributeWithIndex::get(1, Attribute::NoCapture);
AWI[1] = AttributeWithIndex::get(~0u, Attribute::NoUnwind);
- Value *F = M->getOrInsertFunction("puts", AttrListPtr::get(AWI, 2),
- Type::Int32Ty,
- PointerType::getUnqual(Type::Int8Ty), NULL);
- B.CreateCall(F, CastToCStr(Str, B), "puts");
+ Value *PutS = M->getOrInsertFunction("puts", AttrListPtr::get(AWI, 2),
+ Type::Int32Ty,
+ PointerType::getUnqual(Type::Int8Ty),
+ NULL);
+ CallInst *CI = B.CreateCall(PutS, CastToCStr(Str, B), "puts");
+ if (const Function *F = dyn_cast<Function>(PutS->stripPointerCasts()))
+ CI->setCallingConv(F->getCallingConv());
+
}
/// EmitFPutC - Emit a call to the fputc function. This assumes that Char is
@@ -258,12 +286,14 @@ void LibCallOptimization::EmitFPutC(Value *Char, Value *File, IRBuilder<> &B) {
if (isa<PointerType>(File->getType()))
F = M->getOrInsertFunction("fputc", AttrListPtr::get(AWI, 2), Type::Int32Ty,
Type::Int32Ty, File->getType(), NULL);
-
else
F = M->getOrInsertFunction("fputc", Type::Int32Ty, Type::Int32Ty,
File->getType(), NULL);
Char = B.CreateIntCast(Char, Type::Int32Ty, "chari");
- B.CreateCall2(F, Char, File, "fputc");
+ CallInst *CI = B.CreateCall2(F, Char, File, "fputc");
+
+ if (const Function *Fn = dyn_cast<Function>(F->stripPointerCasts()))
+ CI->setCallingConv(Fn->getCallingConv());
}
/// EmitFPutS - Emit a call to the puts function. Str is required to be a
@@ -283,7 +313,10 @@ void LibCallOptimization::EmitFPutS(Value *Str, Value *File, IRBuilder<> &B) {
F = M->getOrInsertFunction("fputs", Type::Int32Ty,
PointerType::getUnqual(Type::Int8Ty),
File->getType(), NULL);
- B.CreateCall2(F, CastToCStr(Str, B), File, "fputs");
+ CallInst *CI = B.CreateCall2(F, CastToCStr(Str, B), File, "fputs");
+
+ if (const Function *Fn = dyn_cast<Function>(F->stripPointerCasts()))
+ CI->setCallingConv(Fn->getCallingConv());
}
/// EmitFWrite - Emit a call to the fwrite function. This assumes that Ptr is
@@ -307,8 +340,11 @@ void LibCallOptimization::EmitFWrite(Value *Ptr, Value *Size, Value *File,
PointerType::getUnqual(Type::Int8Ty),
TD->getIntPtrType(), TD->getIntPtrType(),
File->getType(), NULL);
- B.CreateCall4(F, CastToCStr(Ptr, B), Size,
- ConstantInt::get(TD->getIntPtrType(), 1), File);
+ CallInst *CI = B.CreateCall4(F, CastToCStr(Ptr, B), Size,
+ ConstantInt::get(TD->getIntPtrType(), 1), File);
+
+ if (const Function *Fn = dyn_cast<Function>(F->stripPointerCasts()))
+ CI->setCallingConv(Fn->getCallingConv());
}
//===----------------------------------------------------------------------===//
@@ -673,12 +709,10 @@ struct VISIBILITY_HIDDEN StrCmpOpt : public LibCallOptimization {
// strcmp(P, "x") -> memcmp(P, "x", 2)
uint64_t Len1 = GetStringLength(Str1P);
uint64_t Len2 = GetStringLength(Str2P);
- if (Len1 || Len2) {
- // Choose the smallest Len excluding 0 which means 'unknown'.
- if (!Len1 || (Len2 && Len2 < Len1))
- Len1 = Len2;
+ if (Len1 && Len2) {
return EmitMemCmp(Str1P, Str2P,
- ConstantInt::get(TD->getIntPtrType(), Len1), B);
+ ConstantInt::get(TD->getIntPtrType(),
+ std::min(Len1, Len2)), B);
}
return 0;
@@ -1039,7 +1073,7 @@ struct VISIBILITY_HIDDEN Exp2Opt : public LibCallOptimization {
if (OpC->getOperand(0)->getType()->getPrimitiveSizeInBits() < 32)
LdExpArg = B.CreateZExt(OpC->getOperand(0), Type::Int32Ty, "tmp");
}
-
+
if (LdExpArg) {
const char *Name;
if (Op->getType() == Type::FloatTy)
@@ -1056,12 +1090,15 @@ struct VISIBILITY_HIDDEN Exp2Opt : public LibCallOptimization {
Module *M = Caller->getParent();
Value *Callee = M->getOrInsertFunction(Name, Op->getType(),
Op->getType(), Type::Int32Ty,NULL);
- return B.CreateCall2(Callee, One, LdExpArg);
+ CallInst *CI = B.CreateCall2(Callee, One, LdExpArg);
+ if (const Function *F = dyn_cast<Function>(Callee->stripPointerCasts()))
+ CI->setCallingConv(F->getCallingConv());
+
+ return CI;
}
return 0;
}
};
-
//===---------------------------------------===//
// Double -> Float Shrinking Optimizations for Unary Functions like 'floor'
@@ -1072,7 +1109,7 @@ struct VISIBILITY_HIDDEN UnaryDoubleFPOpt : public LibCallOptimization {
if (FT->getNumParams() != 1 || FT->getReturnType() != Type::DoubleTy ||
FT->getParamType(0) != Type::DoubleTy)
return 0;
-
+
// If this is something like 'floor((double)floatval)', convert to floorf.
FPExtInst *Cast = dyn_cast<FPExtInst>(CI->getOperand(1));
if (Cast == 0 || Cast->getOperand(0)->getType() != Type::FloatTy)
diff --git a/lib/Transforms/Scalar/TailRecursionElimination.cpp b/lib/Transforms/Scalar/TailRecursionElimination.cpp
index 682d069923e4..34ee57c9b9dc 100644
--- a/lib/Transforms/Scalar/TailRecursionElimination.cpp
+++ b/lib/Transforms/Scalar/TailRecursionElimination.cpp
@@ -52,6 +52,7 @@
#define DEBUG_TYPE "tailcallelim"
#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Constants.h"
#include "llvm/DerivedTypes.h"
#include "llvm/Function.h"
@@ -201,8 +202,21 @@ bool TailCallElim::runOnFunction(Function &F) {
bool TailCallElim::CanMoveAboveCall(Instruction *I, CallInst *CI) {
// FIXME: We can move load/store/call/free instructions above the call if the
// call does not mod/ref the memory location being processed.
- if (I->mayHaveSideEffects() || isa<LoadInst>(I))
+ if (I->mayHaveSideEffects()) // This also handles volatile loads.
return false;
+
+ if (LoadInst* L = dyn_cast<LoadInst>(I)) {
+ // Loads may always be moved above calls without side effects.
+ if (CI->mayHaveSideEffects()) {
+ // Non-volatile loads may be moved above a call with side effects if it
+ // does not write to memory and the load provably won't trap.
+ // FIXME: Writes to memory only matter if they may alias the pointer
+ // being loaded from.
+ if (CI->mayWriteToMemory() ||
+ !isSafeToLoadUnconditionally(L->getPointerOperand(), L))
+ return false;
+ }
+ }
// Otherwise, if this is a side-effect free instruction, check to make sure
// that it does not use the return value of the call. If it doesn't use the
diff --git a/lib/Transforms/Utils/Local.cpp b/lib/Transforms/Utils/Local.cpp
index 94483b816e3b..c7fff548bca7 100644
--- a/lib/Transforms/Utils/Local.cpp
+++ b/lib/Transforms/Utils/Local.cpp
@@ -14,6 +14,7 @@
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Constants.h"
+#include "llvm/GlobalAlias.h"
#include "llvm/GlobalVariable.h"
#include "llvm/DerivedTypes.h"
#include "llvm/Instructions.h"
@@ -28,6 +29,50 @@
using namespace llvm;
//===----------------------------------------------------------------------===//
+// Local analysis.
+//
+
+/// isSafeToLoadUnconditionally - Return true if we know that executing a load
+/// from this value cannot trap. If it is not obviously safe to load from the
+/// specified pointer, we do a quick local scan of the basic block containing
+/// ScanFrom, to determine if the address is already accessed.
+bool llvm::isSafeToLoadUnconditionally(Value *V, Instruction *ScanFrom) {
+ // If it is an alloca it is always safe to load from.
+ if (isa<AllocaInst>(V)) return true;
+
+ // If it is a global variable it is mostly safe to load from.
+ if (const GlobalValue *GV = dyn_cast<GlobalVariable>(V))
+ // Don't try to evaluate aliases. External weak GV can be null.
+ return !isa<GlobalAlias>(GV) && !GV->hasExternalWeakLinkage();
+
+ // Otherwise, be a little bit agressive by scanning the local block where we
+ // want to check to see if the pointer is already being loaded or stored
+ // from/to. If so, the previous load or store would have already trapped,
+ // so there is no harm doing an extra load (also, CSE will later eliminate
+ // the load entirely).
+ BasicBlock::iterator BBI = ScanFrom, E = ScanFrom->getParent()->begin();
+
+ while (BBI != E) {
+ --BBI;
+
+ // If we see a free or a call which may write to memory (i.e. which might do
+ // a free) the pointer could be marked invalid.
+ if (isa<FreeInst>(BBI) ||
+ (isa<CallInst>(BBI) && BBI->mayWriteToMemory() &&
+ !isa<DbgInfoIntrinsic>(BBI)))
+ return false;
+
+ if (LoadInst *LI = dyn_cast<LoadInst>(BBI)) {
+ if (LI->getOperand(0) == V) return true;
+ } else if (StoreInst *SI = dyn_cast<StoreInst>(BBI)) {
+ if (SI->getOperand(1) == V) return true;
+ }
+ }
+ return false;
+}
+
+
+//===----------------------------------------------------------------------===//
// Local constant propagation.
//
diff --git a/lib/Transforms/Utils/LowerAllocations.cpp b/lib/Transforms/Utils/LowerAllocations.cpp
index 32498958e34f..9af47f56ef3d 100644
--- a/lib/Transforms/Utils/LowerAllocations.cpp
+++ b/lib/Transforms/Utils/LowerAllocations.cpp
@@ -112,7 +112,7 @@ bool LowerAllocations::runOnBasicBlock(BasicBlock &BB) {
if (MallocInst *MI = dyn_cast<MallocInst>(I)) {
const Type *AllocTy = MI->getType()->getElementType();
- // malloc(type) becomes sbyte *malloc(size)
+ // malloc(type) becomes i8 *malloc(size)
Value *MallocArg;
if (LowerMallocArgToInteger)
MallocArg = ConstantInt::get(Type::Int64Ty,
diff --git a/lib/Transforms/Utils/SimplifyCFG.cpp b/lib/Transforms/Utils/SimplifyCFG.cpp
index bcc6b819013b..ee0f6a65de4e 100644
--- a/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -859,6 +859,26 @@ static bool FoldValueComparisonIntoPredecessors(TerminatorInst *TI) {
return Changed;
}
+// isSafeToHoistInvoke - If we would need to insert a select that uses the
+// value of this invoke (comments in HoistThenElseCodeToIf explain why we
+// would need to do this), we can't hoist the invoke, as there is nowhere
+// to put the select in this case.
+static bool isSafeToHoistInvoke(BasicBlock *BB1, BasicBlock *BB2,
+ Instruction *I1, Instruction *I2) {
+ for (succ_iterator SI = succ_begin(BB1), E = succ_end(BB1); SI != E; ++SI) {
+ PHINode *PN;
+ for (BasicBlock::iterator BBI = SI->begin();
+ (PN = dyn_cast<PHINode>(BBI)); ++BBI) {
+ Value *BB1V = PN->getIncomingValueForBlock(BB1);
+ Value *BB2V = PN->getIncomingValueForBlock(BB2);
+ if (BB1V != BB2V && (BB1V==I1 || BB2V==I2)) {
+ return false;
+ }
+ }
+ }
+ return true;
+}
+
/// HoistThenElseCodeToIf - Given a conditional branch that goes to BB1 and
/// BB2, hoist any common code in the two blocks up into the branch block. The
/// caller of this function guarantees that BI's block dominates BB1 and BB2.
@@ -879,8 +899,9 @@ static bool HoistThenElseCodeToIf(BranchInst *BI) {
I1 = BB1_Itr++;
while (isa<DbgInfoIntrinsic>(I2))
I2 = BB2_Itr++;
- if (I1->getOpcode() != I2->getOpcode() || isa<PHINode>(I1) ||
- isa<InvokeInst>(I1) || !I1->isIdenticalTo(I2))
+ if (I1->getOpcode() != I2->getOpcode() || isa<PHINode>(I1) ||
+ !I1->isIdenticalTo(I2) ||
+ (isa<InvokeInst>(I1) && !isSafeToHoistInvoke(BB1, BB2, I1, I2)))
return false;
// If we get here, we can hoist at least one instruction.
@@ -911,6 +932,10 @@ static bool HoistThenElseCodeToIf(BranchInst *BI) {
return true;
HoistTerminator:
+ // It may not be possible to hoist an invoke.
+ if (isa<InvokeInst>(I1) && !isSafeToHoistInvoke(BB1, BB2, I1, I2))
+ return true;
+
// Okay, it is safe to hoist the terminator.
Instruction *NT = I1->clone();
BIParent->getInstList().insert(BI, NT);