aboutsummaryrefslogtreecommitdiff
path: root/contrib/llvm/lib/Transforms
diff options
context:
space:
mode:
Diffstat (limited to 'contrib/llvm/lib/Transforms')
-rw-r--r--contrib/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp4
-rw-r--r--contrib/llvm/lib/Transforms/IPO/DeadArgumentElimination.cpp14
-rw-r--r--contrib/llvm/lib/Transforms/IPO/GlobalOpt.cpp164
-rw-r--r--contrib/llvm/lib/Transforms/IPO/IPConstantPropagation.cpp2
-rw-r--r--contrib/llvm/lib/Transforms/IPO/IPO.cpp1
-rw-r--r--contrib/llvm/lib/Transforms/IPO/Inliner.cpp1
-rw-r--r--contrib/llvm/lib/Transforms/IPO/Internalize.cpp5
-rw-r--r--contrib/llvm/lib/Transforms/IPO/LowerSetJmp.cpp2
-rw-r--r--contrib/llvm/lib/Transforms/IPO/MergeFunctions.cpp5
-rw-r--r--contrib/llvm/lib/Transforms/IPO/PartialInlining.cpp2
-rw-r--r--contrib/llvm/lib/Transforms/IPO/PruneEH.cpp1
-rw-r--r--contrib/llvm/lib/Transforms/IPO/StructRetPromotion.cpp357
-rw-r--r--contrib/llvm/lib/Transforms/InstCombine/InstCombine.h9
-rw-r--r--contrib/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp91
-rw-r--r--contrib/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp47
-rw-r--r--contrib/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp150
-rw-r--r--contrib/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp207
-rw-r--r--contrib/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp33
-rw-r--r--contrib/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp100
-rw-r--r--contrib/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp33
-rw-r--r--contrib/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp48
-rw-r--r--contrib/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp9
-rw-r--r--contrib/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp16
-rw-r--r--contrib/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp10
-rw-r--r--contrib/llvm/lib/Transforms/InstCombine/InstCombineWorklist.h1
-rw-r--r--contrib/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp28
-rw-r--r--contrib/llvm/lib/Transforms/Instrumentation/GCOVProfiling.cpp638
-rw-r--r--contrib/llvm/lib/Transforms/Instrumentation/Instrumentation.cpp1
-rw-r--r--contrib/llvm/lib/Transforms/Instrumentation/MaximumSpanningTree.h2
-rw-r--r--contrib/llvm/lib/Transforms/Instrumentation/OptimalEdgeProfiling.cpp28
-rw-r--r--contrib/llvm/lib/Transforms/Instrumentation/PathProfiling.cpp14
-rw-r--r--contrib/llvm/lib/Transforms/Instrumentation/ProfilingUtils.cpp43
-rw-r--r--contrib/llvm/lib/Transforms/Instrumentation/ProfilingUtils.h4
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/CodeGenPrepare.cpp377
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp1
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/DCE.cpp1
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp54
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/GEPSplitter.cpp83
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/GVN.cpp453
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp155
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/JumpThreading.cpp25
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/LICM.cpp28
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp144
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/LoopRotation.cpp9
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp31
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp35
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp19
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/Reassociate.cpp146
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/Reg2Mem.cpp2
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/SCCP.cpp2
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/Scalar.cpp27
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/ScalarReplAggregates.cpp298
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp10
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/SimplifyHalfPowrLibCalls.cpp160
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/SimplifyLibCalls.cpp127
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp15
-rw-r--r--contrib/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp14
-rw-r--r--contrib/llvm/lib/Transforms/Utils/BreakCriticalEdges.cpp4
-rw-r--r--contrib/llvm/lib/Transforms/Utils/CodeExtractor.cpp12
-rw-r--r--contrib/llvm/lib/Transforms/Utils/InlineFunction.cpp4
-rw-r--r--contrib/llvm/lib/Transforms/Utils/LCSSA.cpp15
-rw-r--r--contrib/llvm/lib/Transforms/Utils/Local.cpp110
-rw-r--r--contrib/llvm/lib/Transforms/Utils/LoopSimplify.cpp7
-rw-r--r--contrib/llvm/lib/Transforms/Utils/LowerSwitch.cpp2
-rw-r--r--contrib/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp49
-rw-r--r--contrib/llvm/lib/Transforms/Utils/SSAUpdater.cpp15
-rw-r--r--contrib/llvm/lib/Transforms/Utils/SimplifyCFG.cpp215
-rw-r--r--contrib/llvm/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp3
-rw-r--r--contrib/llvm/lib/Transforms/Utils/ValueMapper.cpp4
69 files changed, 3036 insertions, 1690 deletions
diff --git a/contrib/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp b/contrib/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp
index 0c650cfe6440..54a7f679e01c 100644
--- a/contrib/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp
+++ b/contrib/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp
@@ -771,8 +771,8 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F,
// function empty.
NF->getBasicBlockList().splice(NF->begin(), F->getBasicBlockList());
- // Loop over the argument list, transfering uses of the old arguments over to
- // the new arguments, also transfering over the names as well.
+ // Loop over the argument list, transferring uses of the old arguments over to
+ // the new arguments, also transferring over the names as well.
//
for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(),
I2 = NF->arg_begin(); I != E; ++I) {
diff --git a/contrib/llvm/lib/Transforms/IPO/DeadArgumentElimination.cpp b/contrib/llvm/lib/Transforms/IPO/DeadArgumentElimination.cpp
index b42322116a98..d4eaf0c4a3ec 100644
--- a/contrib/llvm/lib/Transforms/IPO/DeadArgumentElimination.cpp
+++ b/contrib/llvm/lib/Transforms/IPO/DeadArgumentElimination.cpp
@@ -49,7 +49,7 @@ namespace {
/// Struct that represents (part of) either a return value or a function
/// argument. Used so that arguments and return values can be used
- /// interchangably.
+ /// interchangeably.
struct RetOrArg {
RetOrArg(const Function *F, unsigned Idx, bool IsArg) : F(F), Idx(Idx),
IsArg(IsArg) {}
@@ -273,8 +273,8 @@ bool DAE::DeleteDeadVarargs(Function &Fn) {
// function empty.
NF->getBasicBlockList().splice(NF->begin(), Fn.getBasicBlockList());
- // Loop over the argument list, transfering uses of the old arguments over to
- // the new arguments, also transfering over the names as well. While we're at
+ // Loop over the argument list, transferring uses of the old arguments over to
+ // the new arguments, also transferring over the names as well. While we're at
// it, remove the dead arguments from the DeadArguments list.
//
for (Function::arg_iterator I = Fn.arg_begin(), E = Fn.arg_end(),
@@ -294,7 +294,7 @@ bool DAE::DeleteDeadVarargs(Function &Fn) {
/// instead.
bool DAE::RemoveDeadArgumentsFromCallers(Function &Fn)
{
- if (Fn.isDeclaration())
+ if (Fn.isDeclaration() || Fn.mayBeOverridden())
return false;
// Functions with local linkage should already have been handled.
@@ -379,7 +379,7 @@ DAE::Liveness DAE::SurveyUse(Value::const_use_iterator U,
// The value is returned from a function. It's only live when the
// function's return value is live. We use RetValNum here, for the case
// that U is really a use of an insertvalue instruction that uses the
- // orginal Use.
+ // original Use.
RetOrArg Use = CreateRet(RI->getParent()->getParent(), RetValNum);
// We might be live, depending on the liveness of Use.
return MarkIfNotLive(Use, MaybeLiveUses);
@@ -894,8 +894,8 @@ bool DAE::RemoveDeadStuffFromFunction(Function *F) {
// function empty.
NF->getBasicBlockList().splice(NF->begin(), F->getBasicBlockList());
- // Loop over the argument list, transfering uses of the old arguments over to
- // the new arguments, also transfering over the names as well.
+ // Loop over the argument list, transferring uses of the old arguments over to
+ // the new arguments, also transferring over the names as well.
i = 0;
for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(),
I2 = NF->arg_begin(); I != E; ++I, ++i)
diff --git a/contrib/llvm/lib/Transforms/IPO/GlobalOpt.cpp b/contrib/llvm/lib/Transforms/IPO/GlobalOpt.cpp
index d4cb71272f76..ded58aca75fc 100644
--- a/contrib/llvm/lib/Transforms/IPO/GlobalOpt.cpp
+++ b/contrib/llvm/lib/Transforms/IPO/GlobalOpt.cpp
@@ -21,6 +21,7 @@
#include "llvm/Instructions.h"
#include "llvm/IntrinsicInst.h"
#include "llvm/Module.h"
+#include "llvm/Operator.h"
#include "llvm/Pass.h"
#include "llvm/Analysis/ConstantFolding.h"
#include "llvm/Analysis/MemoryBuiltins.h"
@@ -54,6 +55,7 @@ STATISTIC(NumCtorsEvaluated, "Number of static ctors evaluated");
STATISTIC(NumNestRemoved , "Number of nest attributes removed");
STATISTIC(NumAliasesResolved, "Number of global aliases resolved");
STATISTIC(NumAliasesRemoved, "Number of global aliases eliminated");
+STATISTIC(NumCXXDtorsRemoved, "Number of global C++ destructors removed");
namespace {
struct GlobalStatus;
@@ -77,6 +79,7 @@ namespace {
bool ProcessInternalGlobal(GlobalVariable *GV,Module::global_iterator &GVI,
const SmallPtrSet<const PHINode*, 16> &PHIUsers,
const GlobalStatus &GS);
+ bool OptimizeEmptyGlobalCXXDtors(Function *CXAAtExitFn);
};
}
@@ -1191,9 +1194,11 @@ static Value *GetHeapSROAValue(Value *V, unsigned FieldNo,
const StructType *ST =
cast<StructType>(cast<PointerType>(PN->getType())->getElementType());
- Result =
+ PHINode *NewPN =
PHINode::Create(PointerType::getUnqual(ST->getElementType(FieldNo)),
+ PN->getNumIncomingValues(),
PN->getName()+".f"+Twine(FieldNo), PN);
+ Result = NewPN;
PHIsToRewrite.push_back(std::make_pair(PN, FieldNo));
} else {
llvm_unreachable("Unknown usable value");
@@ -1940,36 +1945,24 @@ bool GlobalOpt::OptimizeGlobalVars(Module &M) {
return Changed;
}
-/// FindGlobalCtors - Find the llvm.globalctors list, verifying that all
+/// FindGlobalCtors - Find the llvm.global_ctors list, verifying that all
/// initializers have an init priority of 65535.
GlobalVariable *GlobalOpt::FindGlobalCtors(Module &M) {
GlobalVariable *GV = M.getGlobalVariable("llvm.global_ctors");
if (GV == 0) return 0;
- // Found it, verify it's an array of { int, void()* }.
- const ArrayType *ATy =dyn_cast<ArrayType>(GV->getType()->getElementType());
- if (!ATy) return 0;
- const StructType *STy = dyn_cast<StructType>(ATy->getElementType());
- if (!STy || STy->getNumElements() != 2 ||
- !STy->getElementType(0)->isIntegerTy(32)) return 0;
- const PointerType *PFTy = dyn_cast<PointerType>(STy->getElementType(1));
- if (!PFTy) return 0;
- const FunctionType *FTy = dyn_cast<FunctionType>(PFTy->getElementType());
- if (!FTy || !FTy->getReturnType()->isVoidTy() ||
- FTy->isVarArg() || FTy->getNumParams() != 0)
- return 0;
-
// Verify that the initializer is simple enough for us to handle. We are
// only allowed to optimize the initializer if it is unique.
if (!GV->hasUniqueInitializer()) return 0;
-
- ConstantArray *CA = dyn_cast<ConstantArray>(GV->getInitializer());
- if (!CA) return 0;
-
+
+ if (isa<ConstantAggregateZero>(GV->getInitializer()))
+ return GV;
+ ConstantArray *CA = cast<ConstantArray>(GV->getInitializer());
+
for (User::op_iterator i = CA->op_begin(), e = CA->op_end(); i != e; ++i) {
- ConstantStruct *CS = dyn_cast<ConstantStruct>(*i);
- if (CS == 0) return 0;
-
+ if (isa<ConstantAggregateZero>(*i))
+ continue;
+ ConstantStruct *CS = cast<ConstantStruct>(*i);
if (isa<ConstantPointerNull>(CS->getOperand(1)))
continue;
@@ -1978,8 +1971,8 @@ GlobalVariable *GlobalOpt::FindGlobalCtors(Module &M) {
return 0;
// Init priority must be standard.
- ConstantInt *CI = dyn_cast<ConstantInt>(CS->getOperand(0));
- if (!CI || CI->getZExtValue() != 65535)
+ ConstantInt *CI = cast<ConstantInt>(CS->getOperand(0));
+ if (CI->getZExtValue() != 65535)
return 0;
}
@@ -1989,6 +1982,8 @@ GlobalVariable *GlobalOpt::FindGlobalCtors(Module &M) {
/// ParseGlobalCtors - Given a llvm.global_ctors list that we can understand,
/// return a list of the functions and null terminator as a vector.
static std::vector<Function*> ParseGlobalCtors(GlobalVariable *GV) {
+ if (GV->getInitializer()->isNullValue())
+ return std::vector<Function*>();
ConstantArray *CA = cast<ConstantArray>(GV->getInitializer());
std::vector<Function*> Result;
Result.reserve(CA->getNumOperands());
@@ -2019,7 +2014,7 @@ static GlobalVariable *InstallGlobalCtors(GlobalVariable *GCL,
const PointerType *PFTy = PointerType::getUnqual(FTy);
CSVals[1] = Constant::getNullValue(PFTy);
CSVals[0] = ConstantInt::get(Type::getInt32Ty(GCL->getContext()),
- 2147483647);
+ 0x7fffffff);
}
CAList.push_back(ConstantStruct::get(GCL->getContext(), CSVals, false));
}
@@ -2696,12 +2691,126 @@ bool GlobalOpt::OptimizeGlobalAliases(Module &M) {
return Changed;
}
+static Function *FindCXAAtExit(Module &M) {
+ Function *Fn = M.getFunction("__cxa_atexit");
+
+ if (!Fn)
+ return 0;
+
+ const FunctionType *FTy = Fn->getFunctionType();
+
+ // Checking that the function has the right return type, the right number of
+ // parameters and that they all have pointer types should be enough.
+ if (!FTy->getReturnType()->isIntegerTy() ||
+ FTy->getNumParams() != 3 ||
+ !FTy->getParamType(0)->isPointerTy() ||
+ !FTy->getParamType(1)->isPointerTy() ||
+ !FTy->getParamType(2)->isPointerTy())
+ return 0;
+
+ return Fn;
+}
+
+/// cxxDtorIsEmpty - Returns whether the given function is an empty C++
+/// destructor and can therefore be eliminated.
+/// Note that we assume that other optimization passes have already simplified
+/// the code so we only look for a function with a single basic block, where
+/// the only allowed instructions are 'ret' or 'call' to empty C++ dtor.
+static bool cxxDtorIsEmpty(const Function &Fn,
+ SmallPtrSet<const Function *, 8> &CalledFunctions) {
+ // FIXME: We could eliminate C++ destructors if they're readonly/readnone and
+ // nounwind, but that doesn't seem worth doing.
+ if (Fn.isDeclaration())
+ return false;
+
+ if (++Fn.begin() != Fn.end())
+ return false;
+
+ const BasicBlock &EntryBlock = Fn.getEntryBlock();
+ for (BasicBlock::const_iterator I = EntryBlock.begin(), E = EntryBlock.end();
+ I != E; ++I) {
+ if (const CallInst *CI = dyn_cast<CallInst>(I)) {
+ // Ignore debug intrinsics.
+ if (isa<DbgInfoIntrinsic>(CI))
+ continue;
+
+ const Function *CalledFn = CI->getCalledFunction();
+
+ if (!CalledFn)
+ return false;
+
+ SmallPtrSet<const Function *, 8> NewCalledFunctions(CalledFunctions);
+
+ // Don't treat recursive functions as empty.
+ if (!NewCalledFunctions.insert(CalledFn))
+ return false;
+
+ if (!cxxDtorIsEmpty(*CalledFn, NewCalledFunctions))
+ return false;
+ } else if (isa<ReturnInst>(*I))
+ return true;
+ else
+ return false;
+ }
+
+ return false;
+}
+
+bool GlobalOpt::OptimizeEmptyGlobalCXXDtors(Function *CXAAtExitFn) {
+ /// Itanium C++ ABI p3.3.5:
+ ///
+ /// After constructing a global (or local static) object, that will require
+ /// destruction on exit, a termination function is registered as follows:
+ ///
+ /// extern "C" int __cxa_atexit ( void (*f)(void *), void *p, void *d );
+ ///
+ /// This registration, e.g. __cxa_atexit(f,p,d), is intended to cause the
+ /// call f(p) when DSO d is unloaded, before all such termination calls
+ /// registered before this one. It returns zero if registration is
+ /// successful, nonzero on failure.
+
+ // This pass will look for calls to __cxa_atexit where the function is trivial
+ // and remove them.
+ bool Changed = false;
+
+ for (Function::use_iterator I = CXAAtExitFn->use_begin(),
+ E = CXAAtExitFn->use_end(); I != E;) {
+ // We're only interested in calls. Theoretically, we could handle invoke
+ // instructions as well, but neither llvm-gcc nor clang generate invokes
+ // to __cxa_atexit.
+ CallInst *CI = dyn_cast<CallInst>(*I++);
+ if (!CI)
+ continue;
+
+ Function *DtorFn =
+ dyn_cast<Function>(CI->getArgOperand(0)->stripPointerCasts());
+ if (!DtorFn)
+ continue;
+
+ SmallPtrSet<const Function *, 8> CalledFunctions;
+ if (!cxxDtorIsEmpty(*DtorFn, CalledFunctions))
+ continue;
+
+ // Just remove the call.
+ CI->replaceAllUsesWith(Constant::getNullValue(CI->getType()));
+ CI->eraseFromParent();
+
+ ++NumCXXDtorsRemoved;
+
+ Changed |= true;
+ }
+
+ return Changed;
+}
+
bool GlobalOpt::runOnModule(Module &M) {
bool Changed = false;
// Try to find the llvm.globalctors list.
GlobalVariable *GlobalCtors = FindGlobalCtors(M);
+ Function *CXAAtExitFn = FindCXAAtExit(M);
+
bool LocalChange = true;
while (LocalChange) {
LocalChange = false;
@@ -2718,6 +2827,11 @@ bool GlobalOpt::runOnModule(Module &M) {
// Resolve aliases, when possible.
LocalChange |= OptimizeGlobalAliases(M);
+
+ // Try to remove trivial global destructors.
+ if (CXAAtExitFn)
+ LocalChange |= OptimizeEmptyGlobalCXXDtors(CXAAtExitFn);
+
Changed |= LocalChange;
}
diff --git a/contrib/llvm/lib/Transforms/IPO/IPConstantPropagation.cpp b/contrib/llvm/lib/Transforms/IPO/IPConstantPropagation.cpp
index c7c293987a58..25c01346642b 100644
--- a/contrib/llvm/lib/Transforms/IPO/IPConstantPropagation.cpp
+++ b/contrib/llvm/lib/Transforms/IPO/IPConstantPropagation.cpp
@@ -186,7 +186,7 @@ bool IPCP::PropagateConstantReturn(Function &F) {
// Find the returned value
Value *V;
if (!STy)
- V = RI->getOperand(i);
+ V = RI->getOperand(0);
else
V = FindInsertedValue(RI->getOperand(0), i);
diff --git a/contrib/llvm/lib/Transforms/IPO/IPO.cpp b/contrib/llvm/lib/Transforms/IPO/IPO.cpp
index fbe90ce67591..21dcb519d9c9 100644
--- a/contrib/llvm/lib/Transforms/IPO/IPO.cpp
+++ b/contrib/llvm/lib/Transforms/IPO/IPO.cpp
@@ -45,7 +45,6 @@ void llvm::initializeIPO(PassRegistry &Registry) {
initializeStripDebugDeclarePass(Registry);
initializeStripDeadDebugInfoPass(Registry);
initializeStripNonDebugSymbolsPass(Registry);
- initializeSRETPromotionPass(Registry);
}
void LLVMInitializeIPO(LLVMPassRegistryRef R) {
diff --git a/contrib/llvm/lib/Transforms/IPO/Inliner.cpp b/contrib/llvm/lib/Transforms/IPO/Inliner.cpp
index 37eafd723bf8..57f3e772b569 100644
--- a/contrib/llvm/lib/Transforms/IPO/Inliner.cpp
+++ b/contrib/llvm/lib/Transforms/IPO/Inliner.cpp
@@ -29,7 +29,6 @@
#include "llvm/Support/raw_ostream.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/Statistic.h"
-#include <set>
using namespace llvm;
STATISTIC(NumInlined, "Number of functions inlined");
diff --git a/contrib/llvm/lib/Transforms/IPO/Internalize.cpp b/contrib/llvm/lib/Transforms/IPO/Internalize.cpp
index 9b9ebad47225..7cb1d18f933d 100644
--- a/contrib/llvm/lib/Transforms/IPO/Internalize.cpp
+++ b/contrib/llvm/lib/Transforms/IPO/Internalize.cpp
@@ -126,6 +126,8 @@ bool InternalizePass::runOnModule(Module &M) {
// FIXME: maybe use private linkage?
for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I)
if (!I->isDeclaration() && // Function must be defined here
+ // Available externally is really just a "declaration with a body".
+ !I->hasAvailableExternallyLinkage() &&
!I->hasLocalLinkage() && // Can't already have internal linkage
!ExternalNames.count(I->getName())) {// Not marked to keep external?
I->setLinkage(GlobalValue::InternalLinkage);
@@ -144,9 +146,6 @@ bool InternalizePass::runOnModule(Module &M) {
// Never internalize anchors used by the machine module info, else the info
// won't find them. (see MachineModuleInfo.)
- ExternalNames.insert("llvm.dbg.compile_units");
- ExternalNames.insert("llvm.dbg.global_variables");
- ExternalNames.insert("llvm.dbg.subprograms");
ExternalNames.insert("llvm.global_ctors");
ExternalNames.insert("llvm.global_dtors");
ExternalNames.insert("llvm.noinline");
diff --git a/contrib/llvm/lib/Transforms/IPO/LowerSetJmp.cpp b/contrib/llvm/lib/Transforms/IPO/LowerSetJmp.cpp
index b545f0bb267d..52ecf17b8f9b 100644
--- a/contrib/llvm/lib/Transforms/IPO/LowerSetJmp.cpp
+++ b/contrib/llvm/lib/Transforms/IPO/LowerSetJmp.cpp
@@ -430,7 +430,7 @@ void LowerSetJmp::TransformSetJmpCall(CallInst* Inst)
// This PHI node will be in the new block created from the
// splitBasicBlock call.
- PHINode* PHI = PHINode::Create(Type::getInt32Ty(Inst->getContext()),
+ PHINode* PHI = PHINode::Create(Type::getInt32Ty(Inst->getContext()), 2,
"SetJmpReturn", Inst);
// Coming from a call to setjmp, the return is 0.
diff --git a/contrib/llvm/lib/Transforms/IPO/MergeFunctions.cpp b/contrib/llvm/lib/Transforms/IPO/MergeFunctions.cpp
index cccffca6e384..f74144338a61 100644
--- a/contrib/llvm/lib/Transforms/IPO/MergeFunctions.cpp
+++ b/contrib/llvm/lib/Transforms/IPO/MergeFunctions.cpp
@@ -55,6 +55,7 @@
#include "llvm/Instructions.h"
#include "llvm/LLVMContext.h"
#include "llvm/Module.h"
+#include "llvm/Operator.h"
#include "llvm/Pass.h"
#include "llvm/Support/CallSite.h"
#include "llvm/Support/Debug.h"
@@ -125,7 +126,7 @@ private:
const ComparableFunction ComparableFunction::EmptyKey = ComparableFunction(0);
const ComparableFunction ComparableFunction::TombstoneKey =
ComparableFunction(1);
-TargetData * const ComparableFunction::LookupOnly = (TargetData*)(-1);
+TargetData *const ComparableFunction::LookupOnly = (TargetData*)(-1);
}
@@ -212,7 +213,7 @@ bool FunctionComparator::isEquivalentType(const Type *Ty1,
return false;
}
- switch(Ty1->getTypeID()) {
+ switch (Ty1->getTypeID()) {
default:
llvm_unreachable("Unknown type!");
// Fall through in Release mode.
diff --git a/contrib/llvm/lib/Transforms/IPO/PartialInlining.cpp b/contrib/llvm/lib/Transforms/IPO/PartialInlining.cpp
index 2afd02985764..d9d1d106111e 100644
--- a/contrib/llvm/lib/Transforms/IPO/PartialInlining.cpp
+++ b/contrib/llvm/lib/Transforms/IPO/PartialInlining.cpp
@@ -95,7 +95,7 @@ Function* PartialInliner::unswitchFunction(Function* F) {
PHINode* OldPhi = dyn_cast<PHINode>(I);
if (!OldPhi) break;
- PHINode* retPhi = PHINode::Create(OldPhi->getType(), "", Ins);
+ PHINode* retPhi = PHINode::Create(OldPhi->getType(), 2, "", Ins);
OldPhi->replaceAllUsesWith(retPhi);
Ins = newReturnBlock->getFirstNonPHI();
diff --git a/contrib/llvm/lib/Transforms/IPO/PruneEH.cpp b/contrib/llvm/lib/Transforms/IPO/PruneEH.cpp
index d91c2c403aae..9470180c5657 100644
--- a/contrib/llvm/lib/Transforms/IPO/PruneEH.cpp
+++ b/contrib/llvm/lib/Transforms/IPO/PruneEH.cpp
@@ -27,7 +27,6 @@
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Support/CFG.h"
-#include <set>
#include <algorithm>
using namespace llvm;
diff --git a/contrib/llvm/lib/Transforms/IPO/StructRetPromotion.cpp b/contrib/llvm/lib/Transforms/IPO/StructRetPromotion.cpp
deleted file mode 100644
index 584deacaff1b..000000000000
--- a/contrib/llvm/lib/Transforms/IPO/StructRetPromotion.cpp
+++ /dev/null
@@ -1,357 +0,0 @@
-//===-- StructRetPromotion.cpp - Promote sret arguments -------------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This pass finds functions that return a struct (using a pointer to the struct
-// as the first argument of the function, marked with the 'sret' attribute) and
-// replaces them with a new function that simply returns each of the elements of
-// that struct (using multiple return values).
-//
-// This pass works under a number of conditions:
-// 1. The returned struct must not contain other structs
-// 2. The returned struct must only be used to load values from
-// 3. The placeholder struct passed in is the result of an alloca
-//
-//===----------------------------------------------------------------------===//
-
-#define DEBUG_TYPE "sretpromotion"
-#include "llvm/Transforms/IPO.h"
-#include "llvm/Constants.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/LLVMContext.h"
-#include "llvm/Module.h"
-#include "llvm/CallGraphSCCPass.h"
-#include "llvm/Instructions.h"
-#include "llvm/Analysis/CallGraph.h"
-#include "llvm/Support/CallSite.h"
-#include "llvm/Support/CFG.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/ADT/Statistic.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/Statistic.h"
-#include "llvm/Support/raw_ostream.h"
-using namespace llvm;
-
-STATISTIC(NumRejectedSRETUses , "Number of sret rejected due to unexpected uses");
-STATISTIC(NumSRET , "Number of sret promoted");
-namespace {
- /// SRETPromotion - This pass removes sret parameter and updates
- /// function to use multiple return value.
- ///
- struct SRETPromotion : public CallGraphSCCPass {
- virtual void getAnalysisUsage(AnalysisUsage &AU) const {
- CallGraphSCCPass::getAnalysisUsage(AU);
- }
-
- virtual bool runOnSCC(CallGraphSCC &SCC);
- static char ID; // Pass identification, replacement for typeid
- SRETPromotion() : CallGraphSCCPass(ID) {
- initializeSRETPromotionPass(*PassRegistry::getPassRegistry());
- }
-
- private:
- CallGraphNode *PromoteReturn(CallGraphNode *CGN);
- bool isSafeToUpdateAllCallers(Function *F);
- Function *cloneFunctionBody(Function *F, const StructType *STy);
- CallGraphNode *updateCallSites(Function *F, Function *NF);
- };
-}
-
-char SRETPromotion::ID = 0;
-INITIALIZE_PASS_BEGIN(SRETPromotion, "sretpromotion",
- "Promote sret arguments to multiple ret values", false, false)
-INITIALIZE_AG_DEPENDENCY(CallGraph)
-INITIALIZE_PASS_END(SRETPromotion, "sretpromotion",
- "Promote sret arguments to multiple ret values", false, false)
-
-Pass *llvm::createStructRetPromotionPass() {
- return new SRETPromotion();
-}
-
-bool SRETPromotion::runOnSCC(CallGraphSCC &SCC) {
- bool Changed = false;
-
- for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I)
- if (CallGraphNode *NewNode = PromoteReturn(*I)) {
- SCC.ReplaceNode(*I, NewNode);
- Changed = true;
- }
-
- return Changed;
-}
-
-/// PromoteReturn - This method promotes function that uses StructRet paramater
-/// into a function that uses multiple return values.
-CallGraphNode *SRETPromotion::PromoteReturn(CallGraphNode *CGN) {
- Function *F = CGN->getFunction();
-
- if (!F || F->isDeclaration() || !F->hasLocalLinkage())
- return 0;
-
- // Make sure that function returns struct.
- if (F->arg_size() == 0 || !F->hasStructRetAttr() || F->doesNotReturn())
- return 0;
-
- DEBUG(dbgs() << "SretPromotion: Looking at sret function "
- << F->getName() << "\n");
-
- assert(F->getReturnType()->isVoidTy() && "Invalid function return type");
- Function::arg_iterator AI = F->arg_begin();
- const llvm::PointerType *FArgType = dyn_cast<PointerType>(AI->getType());
- assert(FArgType && "Invalid sret parameter type");
- const llvm::StructType *STy =
- dyn_cast<StructType>(FArgType->getElementType());
- assert(STy && "Invalid sret parameter element type");
-
- // Check if it is ok to perform this promotion.
- if (isSafeToUpdateAllCallers(F) == false) {
- DEBUG(dbgs() << "SretPromotion: Not all callers can be updated\n");
- ++NumRejectedSRETUses;
- return 0;
- }
-
- DEBUG(dbgs() << "SretPromotion: sret argument will be promoted\n");
- ++NumSRET;
- // [1] Replace use of sret parameter
- AllocaInst *TheAlloca = new AllocaInst(STy, NULL, "mrv",
- F->getEntryBlock().begin());
- Value *NFirstArg = F->arg_begin();
- NFirstArg->replaceAllUsesWith(TheAlloca);
-
- // [2] Find and replace ret instructions
- for (Function::iterator FI = F->begin(), FE = F->end(); FI != FE; ++FI)
- for(BasicBlock::iterator BI = FI->begin(), BE = FI->end(); BI != BE; ) {
- Instruction *I = BI;
- ++BI;
- if (isa<ReturnInst>(I)) {
- Value *NV = new LoadInst(TheAlloca, "mrv.ld", I);
- ReturnInst *NR = ReturnInst::Create(F->getContext(), NV, I);
- I->replaceAllUsesWith(NR);
- I->eraseFromParent();
- }
- }
-
- // [3] Create the new function body and insert it into the module.
- Function *NF = cloneFunctionBody(F, STy);
-
- // [4] Update all call sites to use new function
- CallGraphNode *NF_CFN = updateCallSites(F, NF);
-
- CallGraph &CG = getAnalysis<CallGraph>();
- NF_CFN->stealCalledFunctionsFrom(CG[F]);
-
- delete CG.removeFunctionFromModule(F);
- return NF_CFN;
-}
-
-// Check if it is ok to perform this promotion.
-bool SRETPromotion::isSafeToUpdateAllCallers(Function *F) {
-
- if (F->use_empty())
- // No users. OK to modify signature.
- return true;
-
- for (Value::use_iterator FnUseI = F->use_begin(), FnUseE = F->use_end();
- FnUseI != FnUseE; ++FnUseI) {
- // The function is passed in as an argument to (possibly) another function,
- // we can't change it!
- CallSite CS(*FnUseI);
- Instruction *Call = CS.getInstruction();
- // The function is used by something else than a call or invoke instruction,
- // we can't change it!
- if (!Call || !CS.isCallee(FnUseI))
- return false;
- CallSite::arg_iterator AI = CS.arg_begin();
- Value *FirstArg = *AI;
-
- if (!isa<AllocaInst>(FirstArg))
- return false;
-
- // Check FirstArg's users.
- for (Value::use_iterator ArgI = FirstArg->use_begin(),
- ArgE = FirstArg->use_end(); ArgI != ArgE; ++ArgI) {
- User *U = *ArgI;
- // If FirstArg user is a CallInst that does not correspond to current
- // call site then this function F is not suitable for sret promotion.
- if (CallInst *CI = dyn_cast<CallInst>(U)) {
- if (CI != Call)
- return false;
- }
- // If FirstArg user is a GEP whose all users are not LoadInst then
- // this function F is not suitable for sret promotion.
- else if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(U)) {
- // TODO : Use dom info and insert PHINodes to collect get results
- // from multiple call sites for this GEP.
- if (GEP->getParent() != Call->getParent())
- return false;
- for (Value::use_iterator GEPI = GEP->use_begin(), GEPE = GEP->use_end();
- GEPI != GEPE; ++GEPI)
- if (!isa<LoadInst>(*GEPI))
- return false;
- }
- // Any other FirstArg users make this function unsuitable for sret
- // promotion.
- else
- return false;
- }
- }
-
- return true;
-}
-
-/// cloneFunctionBody - Create a new function based on F and
-/// insert it into module. Remove first argument. Use STy as
-/// the return type for new function.
-Function *SRETPromotion::cloneFunctionBody(Function *F,
- const StructType *STy) {
-
- const FunctionType *FTy = F->getFunctionType();
- std::vector<const Type*> Params;
-
- // Attributes - Keep track of the parameter attributes for the arguments.
- SmallVector<AttributeWithIndex, 8> AttributesVec;
- const AttrListPtr &PAL = F->getAttributes();
-
- // Add any return attributes.
- if (Attributes attrs = PAL.getRetAttributes())
- AttributesVec.push_back(AttributeWithIndex::get(0, attrs));
-
- // Skip first argument.
- Function::arg_iterator I = F->arg_begin(), E = F->arg_end();
- ++I;
- // 0th parameter attribute is reserved for return type.
- // 1th parameter attribute is for first 1st sret argument.
- unsigned ParamIndex = 2;
- while (I != E) {
- Params.push_back(I->getType());
- if (Attributes Attrs = PAL.getParamAttributes(ParamIndex))
- AttributesVec.push_back(AttributeWithIndex::get(ParamIndex - 1, Attrs));
- ++I;
- ++ParamIndex;
- }
-
- // Add any fn attributes.
- if (Attributes attrs = PAL.getFnAttributes())
- AttributesVec.push_back(AttributeWithIndex::get(~0, attrs));
-
-
- FunctionType *NFTy = FunctionType::get(STy, Params, FTy->isVarArg());
- Function *NF = Function::Create(NFTy, F->getLinkage());
- NF->takeName(F);
- NF->copyAttributesFrom(F);
- NF->setAttributes(AttrListPtr::get(AttributesVec.begin(), AttributesVec.end()));
- F->getParent()->getFunctionList().insert(F, NF);
- NF->getBasicBlockList().splice(NF->begin(), F->getBasicBlockList());
-
- // Replace arguments
- I = F->arg_begin();
- E = F->arg_end();
- Function::arg_iterator NI = NF->arg_begin();
- ++I;
- while (I != E) {
- I->replaceAllUsesWith(NI);
- NI->takeName(I);
- ++I;
- ++NI;
- }
-
- return NF;
-}
-
-/// updateCallSites - Update all sites that call F to use NF.
-CallGraphNode *SRETPromotion::updateCallSites(Function *F, Function *NF) {
- CallGraph &CG = getAnalysis<CallGraph>();
- SmallVector<Value*, 16> Args;
-
- // Attributes - Keep track of the parameter attributes for the arguments.
- SmallVector<AttributeWithIndex, 8> ArgAttrsVec;
-
- // Get a new callgraph node for NF.
- CallGraphNode *NF_CGN = CG.getOrInsertFunction(NF);
-
- while (!F->use_empty()) {
- CallSite CS(*F->use_begin());
- Instruction *Call = CS.getInstruction();
-
- const AttrListPtr &PAL = F->getAttributes();
- // Add any return attributes.
- if (Attributes attrs = PAL.getRetAttributes())
- ArgAttrsVec.push_back(AttributeWithIndex::get(0, attrs));
-
- // Copy arguments, however skip first one.
- CallSite::arg_iterator AI = CS.arg_begin(), AE = CS.arg_end();
- Value *FirstCArg = *AI;
- ++AI;
- // 0th parameter attribute is reserved for return type.
- // 1th parameter attribute is for first 1st sret argument.
- unsigned ParamIndex = 2;
- while (AI != AE) {
- Args.push_back(*AI);
- if (Attributes Attrs = PAL.getParamAttributes(ParamIndex))
- ArgAttrsVec.push_back(AttributeWithIndex::get(ParamIndex - 1, Attrs));
- ++ParamIndex;
- ++AI;
- }
-
- // Add any function attributes.
- if (Attributes attrs = PAL.getFnAttributes())
- ArgAttrsVec.push_back(AttributeWithIndex::get(~0, attrs));
-
- AttrListPtr NewPAL = AttrListPtr::get(ArgAttrsVec.begin(), ArgAttrsVec.end());
-
- // Build new call instruction.
- Instruction *New;
- if (InvokeInst *II = dyn_cast<InvokeInst>(Call)) {
- New = InvokeInst::Create(NF, II->getNormalDest(), II->getUnwindDest(),
- Args.begin(), Args.end(), "", Call);
- cast<InvokeInst>(New)->setCallingConv(CS.getCallingConv());
- cast<InvokeInst>(New)->setAttributes(NewPAL);
- } else {
- New = CallInst::Create(NF, Args.begin(), Args.end(), "", Call);
- cast<CallInst>(New)->setCallingConv(CS.getCallingConv());
- cast<CallInst>(New)->setAttributes(NewPAL);
- if (cast<CallInst>(Call)->isTailCall())
- cast<CallInst>(New)->setTailCall();
- }
- Args.clear();
- ArgAttrsVec.clear();
- New->takeName(Call);
-
- // Update the callgraph to know that the callsite has been transformed.
- CallGraphNode *CalleeNode = CG[Call->getParent()->getParent()];
- CalleeNode->removeCallEdgeFor(Call);
- CalleeNode->addCalledFunction(New, NF_CGN);
-
- // Update all users of sret parameter to extract value using extractvalue.
- for (Value::use_iterator UI = FirstCArg->use_begin(),
- UE = FirstCArg->use_end(); UI != UE; ) {
- User *U2 = *UI++;
- CallInst *C2 = dyn_cast<CallInst>(U2);
- if (C2 && (C2 == Call))
- continue;
-
- GetElementPtrInst *UGEP = cast<GetElementPtrInst>(U2);
- ConstantInt *Idx = cast<ConstantInt>(UGEP->getOperand(2));
- Value *GR = ExtractValueInst::Create(New, Idx->getZExtValue(),
- "evi", UGEP);
- while(!UGEP->use_empty()) {
- // isSafeToUpdateAllCallers has checked that all GEP uses are
- // LoadInsts
- LoadInst *L = cast<LoadInst>(*UGEP->use_begin());
- L->replaceAllUsesWith(GR);
- L->eraseFromParent();
- }
- UGEP->eraseFromParent();
- continue;
- }
- Call->eraseFromParent();
- }
-
- return NF_CGN;
-}
-
diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombine.h b/contrib/llvm/lib/Transforms/InstCombine/InstCombine.h
index 9c2969c7ab22..9c70cf89e48c 100644
--- a/contrib/llvm/lib/Transforms/InstCombine/InstCombine.h
+++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombine.h
@@ -11,6 +11,7 @@
#define INSTCOMBINE_INSTCOMBINE_H
#include "InstCombineWorklist.h"
+#include "llvm/Operator.h"
#include "llvm/Pass.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/Support/IRBuilder.h"
@@ -69,7 +70,6 @@ class LLVM_LIBRARY_VISIBILITY InstCombiner
: public FunctionPass,
public InstVisitor<InstCombiner, Instruction*> {
TargetData *TD;
- bool MustPreserveLCSSA;
bool MadeIRChange;
public:
/// Worklist - All of the instructions that need to be simplified.
@@ -217,8 +217,8 @@ private:
Instruction *transformCallThroughTrampoline(CallSite CS);
Instruction *transformZExtICmp(ICmpInst *ICI, Instruction &CI,
bool DoXform = true);
+ Instruction *transformSExtICmp(ICmpInst *ICI, Instruction &CI);
bool WillNotOverflowSignedAdd(Value *LHS, Value *RHS);
- DbgDeclareInst *hasOneUsePlusDeclare(Value *V);
Value *EmitGEPOffset(User *GEP);
public:
@@ -247,7 +247,10 @@ public:
// segment of unreachable code, so just clobber the instruction.
if (&I == V)
V = UndefValue::get(I.getType());
-
+
+ DEBUG(errs() << "IC: Replacing " << I << "\n"
+ " with " << *V << '\n');
+
I.replaceAllUsesWith(V);
return &I;
}
diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
index 7986d1aca762..a08446e5d519 100644
--- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
+++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
@@ -14,6 +14,7 @@
#include "InstCombine.h"
#include "llvm/Intrinsics.h"
#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Support/ConstantRange.h"
#include "llvm/Support/PatternMatch.h"
using namespace llvm;
using namespace PatternMatch;
@@ -330,7 +331,7 @@ Instruction *InstCombiner::OptAndOp(Instruction *Op,
/// InsertRangeTest - Emit a computation of: (V >= Lo && V < Hi) if Inside is
-/// true, otherwise (V < Lo || V >= Hi). In pratice, we emit the more efficient
+/// true, otherwise (V < Lo || V >= Hi). In practice, we emit the more efficient
/// (V-Lo) <u Hi-Lo. This method expects that Lo <= Hi. isSigned indicates
/// whether to treat the V, Lo and HI as signed or not. IB is the location to
/// insert new instructions.
@@ -755,6 +756,54 @@ Value *InstCombiner::FoldAndOfICmps(ICmpInst *LHS, ICmpInst *RHS) {
Value *NewOr = Builder->CreateOr(Val, Val2);
return Builder->CreateICmp(LHSCC, NewOr, LHSCst);
}
+
+ // (icmp slt A, 0) & (icmp slt B, 0) --> (icmp slt (A&B), 0)
+ if (LHSCC == ICmpInst::ICMP_SLT && LHSCst->isZero()) {
+ Value *NewAnd = Builder->CreateAnd(Val, Val2);
+ return Builder->CreateICmp(LHSCC, NewAnd, LHSCst);
+ }
+
+ // (icmp sgt A, -1) & (icmp sgt B, -1) --> (icmp sgt (A|B), -1)
+ if (LHSCC == ICmpInst::ICMP_SGT && LHSCst->isAllOnesValue()) {
+ Value *NewOr = Builder->CreateOr(Val, Val2);
+ return Builder->CreateICmp(LHSCC, NewOr, LHSCst);
+ }
+ }
+
+ // (trunc x) == C1 & (and x, CA) == C2 -> (and x, CA|CMAX) == C1|C2
+ // where CMAX is the all ones value for the truncated type,
+ // iff the lower bits of C2 and CA are zero.
+ if (LHSCC == RHSCC && ICmpInst::isEquality(LHSCC) &&
+ LHS->hasOneUse() && RHS->hasOneUse()) {
+ Value *V;
+ ConstantInt *AndCst, *SmallCst = 0, *BigCst = 0;
+
+ // (trunc x) == C1 & (and x, CA) == C2
+ if (match(Val2, m_Trunc(m_Value(V))) &&
+ match(Val, m_And(m_Specific(V), m_ConstantInt(AndCst)))) {
+ SmallCst = RHSCst;
+ BigCst = LHSCst;
+ }
+ // (and x, CA) == C2 & (trunc x) == C1
+ else if (match(Val, m_Trunc(m_Value(V))) &&
+ match(Val2, m_And(m_Specific(V), m_ConstantInt(AndCst)))) {
+ SmallCst = LHSCst;
+ BigCst = RHSCst;
+ }
+
+ if (SmallCst && BigCst) {
+ unsigned BigBitSize = BigCst->getType()->getBitWidth();
+ unsigned SmallBitSize = SmallCst->getType()->getBitWidth();
+
+ // Check that the low bits are zero.
+ APInt Low = APInt::getLowBitsSet(BigBitSize, SmallBitSize);
+ if ((Low & AndCst->getValue()) == 0 && (Low & BigCst->getValue()) == 0) {
+ Value *NewAnd = Builder->CreateAnd(V, Low | AndCst->getValue());
+ APInt N = SmallCst->getValue().zext(BigBitSize) | BigCst->getValue();
+ Value *NewVal = ConstantInt::get(AndCst->getType()->getContext(), N);
+ return Builder->CreateICmp(LHSCC, NewAnd, NewVal);
+ }
+ }
}
// From here on, we only handle:
@@ -767,7 +816,17 @@ Value *InstCombiner::FoldAndOfICmps(ICmpInst *LHS, ICmpInst *RHS) {
LHSCC == ICmpInst::ICMP_SGE || LHSCC == ICmpInst::ICMP_SLE ||
RHSCC == ICmpInst::ICMP_SGE || RHSCC == ICmpInst::ICMP_SLE)
return 0;
-
+
+ // Make a constant range that's the intersection of the two icmp ranges.
+ // If the intersection is empty, we know that the result is false.
+ ConstantRange LHSRange =
+ ConstantRange::makeICmpRegion(LHSCC, LHSCst->getValue());
+ ConstantRange RHSRange =
+ ConstantRange::makeICmpRegion(RHSCC, RHSCst->getValue());
+
+ if (LHSRange.intersectWith(RHSRange).isEmptySet())
+ return ConstantInt::get(CmpInst::makeCmpResultType(LHS->getType()), 0);
+
// We can't fold (ugt x, C) & (sgt x, C2).
if (!PredicatesFoldable(LHSCC, RHSCC))
return 0;
@@ -800,10 +859,6 @@ Value *InstCombiner::FoldAndOfICmps(ICmpInst *LHS, ICmpInst *RHS) {
case ICmpInst::ICMP_EQ:
switch (RHSCC) {
default: llvm_unreachable("Unknown integer condition code!");
- case ICmpInst::ICMP_EQ: // (X == 13 & X == 15) -> false
- case ICmpInst::ICMP_UGT: // (X == 13 & X > 15) -> false
- case ICmpInst::ICMP_SGT: // (X == 13 & X > 15) -> false
- return ConstantInt::get(CmpInst::makeCmpResultType(LHS->getType()), 0);
case ICmpInst::ICMP_NE: // (X == 13 & X != 15) -> X == 13
case ICmpInst::ICMP_ULT: // (X == 13 & X < 15) -> X == 13
case ICmpInst::ICMP_SLT: // (X == 13 & X < 15) -> X == 13
@@ -851,9 +906,6 @@ Value *InstCombiner::FoldAndOfICmps(ICmpInst *LHS, ICmpInst *RHS) {
case ICmpInst::ICMP_SLT:
switch (RHSCC) {
default: llvm_unreachable("Unknown integer condition code!");
- case ICmpInst::ICMP_EQ: // (X s< 13 & X == 15) -> false
- case ICmpInst::ICMP_SGT: // (X s< 13 & X s> 15) -> false
- return ConstantInt::get(CmpInst::makeCmpResultType(LHS->getType()), 0);
case ICmpInst::ICMP_UGT: // (X s< 13 & X u> 15) -> no change
break;
case ICmpInst::ICMP_NE: // (X s< 13 & X != 15) -> X < 13
@@ -1438,6 +1490,18 @@ Value *InstCombiner::FoldOrOfICmps(ICmpInst *LHS, ICmpInst *RHS) {
Value *NewOr = Builder->CreateOr(Val, Val2);
return Builder->CreateICmp(LHSCC, NewOr, LHSCst);
}
+
+ // (icmp slt A, 0) | (icmp slt B, 0) --> (icmp slt (A|B), 0)
+ if (LHSCC == ICmpInst::ICMP_SLT && LHSCst->isZero()) {
+ Value *NewOr = Builder->CreateOr(Val, Val2);
+ return Builder->CreateICmp(LHSCC, NewOr, LHSCst);
+ }
+
+ // (icmp sgt A, -1) | (icmp sgt B, -1) --> (icmp sgt (A&B), -1)
+ if (LHSCC == ICmpInst::ICMP_SGT && LHSCst->isAllOnesValue()) {
+ Value *NewAnd = Builder->CreateAnd(Val, Val2);
+ return Builder->CreateICmp(LHSCC, NewAnd, LHSCst);
+ }
}
// (icmp ult (X + CA), C1) | (icmp eq X, C2) -> (icmp ule (X + CA), C1)
@@ -1975,7 +2039,14 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) {
}
}
}
-
+
+ // or(sext(A), B) -> A ? -1 : B where A is an i1
+ // or(A, sext(B)) -> B ? -1 : A where B is an i1
+ if (match(Op0, m_SExt(m_Value(A))) && A->getType()->isIntegerTy(1))
+ return SelectInst::Create(A, ConstantInt::getSigned(I.getType(), -1), Op1);
+ if (match(Op1, m_SExt(m_Value(A))) && A->getType()->isIntegerTy(1))
+ return SelectInst::Create(A, ConstantInt::getSigned(I.getType(), -1), Op0);
+
// Note: If we've gotten to the point of visiting the outer OR, then the
// inner one couldn't be simplified. If it was a constant, then it won't
// be simplified by a later pass either, so we try swapping the inner/outer
diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
index 0e464507a7e4..726105f75d6f 100644
--- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -475,7 +475,36 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
}
}
break;
- case Intrinsic::umul_with_overflow:
+ case Intrinsic::umul_with_overflow: {
+ Value *LHS = II->getArgOperand(0), *RHS = II->getArgOperand(1);
+ unsigned BitWidth = cast<IntegerType>(LHS->getType())->getBitWidth();
+ APInt Mask = APInt::getAllOnesValue(BitWidth);
+
+ APInt LHSKnownZero(BitWidth, 0);
+ APInt LHSKnownOne(BitWidth, 0);
+ ComputeMaskedBits(LHS, Mask, LHSKnownZero, LHSKnownOne);
+ APInt RHSKnownZero(BitWidth, 0);
+ APInt RHSKnownOne(BitWidth, 0);
+ ComputeMaskedBits(RHS, Mask, RHSKnownZero, RHSKnownOne);
+
+ // Get the largest possible values for each operand.
+ APInt LHSMax = ~LHSKnownZero;
+ APInt RHSMax = ~RHSKnownZero;
+
+ // If multiplying the maximum values does not overflow then we can turn
+ // this into a plain NUW mul.
+ bool Overflow;
+ LHSMax.umul_ov(RHSMax, Overflow);
+ if (!Overflow) {
+ Value *Mul = Builder->CreateNUWMul(LHS, RHS, "umul_with_overflow");
+ Constant *V[] = {
+ UndefValue::get(LHS->getType()),
+ Builder->getFalse()
+ };
+ Constant *Struct = ConstantStruct::get(II->getContext(), V, 2, false);
+ return InsertValueInst::Create(Struct, Mul, 0);
+ }
+ } // FALL THROUGH
case Intrinsic::smul_with_overflow:
// Canonicalize constants into the RHS.
if (isa<Constant>(II->getArgOperand(0)) &&
@@ -508,11 +537,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
break;
case Intrinsic::ppc_altivec_lvx:
case Intrinsic::ppc_altivec_lvxl:
- case Intrinsic::x86_sse_loadu_ps:
- case Intrinsic::x86_sse2_loadu_pd:
- case Intrinsic::x86_sse2_loadu_dq:
- // Turn PPC lvx -> load if the pointer is known aligned.
- // Turn X86 loadups -> load if the pointer is known aligned.
+ // Turn PPC lvx -> load if the pointer is known aligned.
if (getOrEnforceKnownAlignment(II->getArgOperand(0), 16, TD) >= 16) {
Value *Ptr = Builder->CreateBitCast(II->getArgOperand(0),
PointerType::getUnqual(II->getType()));
@@ -731,9 +756,13 @@ protected:
dyn_cast<ConstantInt>(CI->getArgOperand(SizeCIOp))) {
if (SizeCI->isAllOnesValue())
return true;
- if (isString)
- return SizeCI->getZExtValue() >=
- GetStringLength(CI->getArgOperand(SizeArgOp));
+ if (isString) {
+ uint64_t Len = GetStringLength(CI->getArgOperand(SizeArgOp));
+ // If the length is 0 we don't know how long it is and so we can't
+ // remove the check.
+ if (Len == 0) return false;
+ return SizeCI->getZExtValue() >= Len;
+ }
if (ConstantInt *Arg = dyn_cast<ConstantInt>(
CI->getArgOperand(SizeArgOp)))
return SizeCI->getZExtValue() >= Arg->getZExtValue();
diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp
index b432641a1403..6f70de865764 100644
--- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp
+++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp
@@ -87,10 +87,8 @@ Instruction *InstCombiner::PromoteCastOfAllocation(BitCastInst &CI,
// If the allocation has multiple uses, only promote it if we are strictly
// increasing the alignment of the resultant allocation. If we keep it the
- // same, we open the door to infinite loops of various kinds. (A reference
- // from a dbg.declare doesn't count as a use for this purpose.)
- if (!AI.hasOneUse() && !hasOneUsePlusDeclare(&AI) &&
- CastElTyAlign == AllocElTyAlign) return 0;
+ // same, we open the door to infinite loops of various kinds.
+ if (!AI.hasOneUse() && CastElTyAlign == AllocElTyAlign) return 0;
uint64_t AllocElTySize = TD->getTypeAllocSize(AllocElTy);
uint64_t CastElTySize = TD->getTypeAllocSize(CastElTy);
@@ -128,15 +126,10 @@ Instruction *InstCombiner::PromoteCastOfAllocation(BitCastInst &CI,
New->setAlignment(AI.getAlignment());
New->takeName(&AI);
- // If the allocation has one real use plus a dbg.declare, just remove the
- // declare.
- if (DbgDeclareInst *DI = hasOneUsePlusDeclare(&AI)) {
- EraseInstFromFunction(*(Instruction*)DI);
- }
// If the allocation has multiple real uses, insert a cast and change all
// things that used it to use the new cast. This will also hack on CI, but it
// will die soon.
- else if (!AI.hasOneUse()) {
+ if (!AI.hasOneUse()) {
// New is the allocation instruction, pointer typed. AI is the original
// allocation instruction, also pointer typed. Thus, cast to use is BitCast.
Value *NewCast = AllocaBuilder.CreateBitCast(New, AI.getType(), "tmpcast");
@@ -203,7 +196,7 @@ Value *InstCombiner::EvaluateInDifferentType(Value *V, const Type *Ty,
}
case Instruction::PHI: {
PHINode *OPN = cast<PHINode>(I);
- PHINode *NPN = PHINode::Create(Ty);
+ PHINode *NPN = PHINode::Create(Ty, OPN->getNumIncomingValues());
for (unsigned i = 0, e = OPN->getNumIncomingValues(); i != e; ++i) {
Value *V =EvaluateInDifferentType(OPN->getIncomingValue(i), Ty, isSigned);
NPN->addIncoming(V, OPN->getIncomingBlock(i));
@@ -883,6 +876,102 @@ Instruction *InstCombiner::visitZExt(ZExtInst &CI) {
return 0;
}
+/// transformSExtICmp - Transform (sext icmp) to bitwise / integer operations
+/// in order to eliminate the icmp.
+Instruction *InstCombiner::transformSExtICmp(ICmpInst *ICI, Instruction &CI) {
+ Value *Op0 = ICI->getOperand(0), *Op1 = ICI->getOperand(1);
+ ICmpInst::Predicate Pred = ICI->getPredicate();
+
+ if (ConstantInt *Op1C = dyn_cast<ConstantInt>(Op1)) {
+ // (x <s 0) ? -1 : 0 -> ashr x, 31 -> all ones if negative
+ // (x >s -1) ? -1 : 0 -> not (ashr x, 31) -> all ones if positive
+ if ((Pred == ICmpInst::ICMP_SLT && Op1C->isZero()) ||
+ (Pred == ICmpInst::ICMP_SGT && Op1C->isAllOnesValue())) {
+
+ Value *Sh = ConstantInt::get(Op0->getType(),
+ Op0->getType()->getScalarSizeInBits()-1);
+ Value *In = Builder->CreateAShr(Op0, Sh, Op0->getName()+".lobit");
+ if (In->getType() != CI.getType())
+ In = Builder->CreateIntCast(In, CI.getType(), true/*SExt*/, "tmp");
+
+ if (Pred == ICmpInst::ICMP_SGT)
+ In = Builder->CreateNot(In, In->getName()+".not");
+ return ReplaceInstUsesWith(CI, In);
+ }
+
+ // If we know that only one bit of the LHS of the icmp can be set and we
+ // have an equality comparison with zero or a power of 2, we can transform
+ // the icmp and sext into bitwise/integer operations.
+ if (ICI->hasOneUse() &&
+ ICI->isEquality() && (Op1C->isZero() || Op1C->getValue().isPowerOf2())){
+ unsigned BitWidth = Op1C->getType()->getBitWidth();
+ APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0);
+ APInt TypeMask(APInt::getAllOnesValue(BitWidth));
+ ComputeMaskedBits(Op0, TypeMask, KnownZero, KnownOne);
+
+ APInt KnownZeroMask(~KnownZero);
+ if (KnownZeroMask.isPowerOf2()) {
+ Value *In = ICI->getOperand(0);
+
+ // If the icmp tests for a known zero bit we can constant fold it.
+ if (!Op1C->isZero() && Op1C->getValue() != KnownZeroMask) {
+ Value *V = Pred == ICmpInst::ICMP_NE ?
+ ConstantInt::getAllOnesValue(CI.getType()) :
+ ConstantInt::getNullValue(CI.getType());
+ return ReplaceInstUsesWith(CI, V);
+ }
+
+ if (!Op1C->isZero() == (Pred == ICmpInst::ICMP_NE)) {
+ // sext ((x & 2^n) == 0) -> (x >> n) - 1
+ // sext ((x & 2^n) != 2^n) -> (x >> n) - 1
+ unsigned ShiftAmt = KnownZeroMask.countTrailingZeros();
+ // Perform a right shift to place the desired bit in the LSB.
+ if (ShiftAmt)
+ In = Builder->CreateLShr(In,
+ ConstantInt::get(In->getType(), ShiftAmt));
+
+ // At this point "In" is either 1 or 0. Subtract 1 to turn
+ // {1, 0} -> {0, -1}.
+ In = Builder->CreateAdd(In,
+ ConstantInt::getAllOnesValue(In->getType()),
+ "sext");
+ } else {
+ // sext ((x & 2^n) != 0) -> (x << bitwidth-n) a>> bitwidth-1
+ // sext ((x & 2^n) == 2^n) -> (x << bitwidth-n) a>> bitwidth-1
+ unsigned ShiftAmt = KnownZeroMask.countLeadingZeros();
+ // Perform a left shift to place the desired bit in the MSB.
+ if (ShiftAmt)
+ In = Builder->CreateShl(In,
+ ConstantInt::get(In->getType(), ShiftAmt));
+
+ // Distribute the bit over the whole bit width.
+ In = Builder->CreateAShr(In, ConstantInt::get(In->getType(),
+ BitWidth - 1), "sext");
+ }
+
+ if (CI.getType() == In->getType())
+ return ReplaceInstUsesWith(CI, In);
+ return CastInst::CreateIntegerCast(In, CI.getType(), true/*SExt*/);
+ }
+ }
+ }
+
+ // vector (x <s 0) ? -1 : 0 -> ashr x, 31 -> all ones if signed.
+ if (const VectorType *VTy = dyn_cast<VectorType>(CI.getType())) {
+ if (Pred == ICmpInst::ICMP_SLT && match(Op1, m_Zero()) &&
+ Op0->getType() == CI.getType()) {
+ const Type *EltTy = VTy->getElementType();
+
+ // splat the shift constant to a constant vector.
+ Constant *VSh = ConstantInt::get(VTy, EltTy->getScalarSizeInBits()-1);
+ Value *In = Builder->CreateAShr(Op0, VSh, Op0->getName()+".lobit");
+ return ReplaceInstUsesWith(CI, In);
+ }
+ }
+
+ return 0;
+}
+
/// CanEvaluateSExtd - Return true if we can take the specified value
/// and return it as type Ty without inserting any new casts and without
/// changing the value of the common low bits. This is used by code that tries
@@ -1006,44 +1095,9 @@ Instruction *InstCombiner::visitSExt(SExtInst &CI) {
Value *Res = Builder->CreateShl(TI->getOperand(0), ShAmt, "sext");
return BinaryOperator::CreateAShr(Res, ShAmt);
}
-
-
- // (x <s 0) ? -1 : 0 -> ashr x, 31 -> all ones if signed
- // (x >s -1) ? -1 : 0 -> ashr x, 31 -> all ones if not signed
- {
- ICmpInst::Predicate Pred; Value *CmpLHS; ConstantInt *CmpRHS;
- if (match(Src, m_ICmp(Pred, m_Value(CmpLHS), m_ConstantInt(CmpRHS)))) {
- // sext (x <s 0) to i32 --> x>>s31 true if signbit set.
- // sext (x >s -1) to i32 --> (x>>s31)^-1 true if signbit clear.
- if ((Pred == ICmpInst::ICMP_SLT && CmpRHS->isZero()) ||
- (Pred == ICmpInst::ICMP_SGT && CmpRHS->isAllOnesValue())) {
- Value *Sh = ConstantInt::get(CmpLHS->getType(),
- CmpLHS->getType()->getScalarSizeInBits()-1);
- Value *In = Builder->CreateAShr(CmpLHS, Sh, CmpLHS->getName()+".lobit");
- if (In->getType() != CI.getType())
- In = Builder->CreateIntCast(In, CI.getType(), true/*SExt*/, "tmp");
-
- if (Pred == ICmpInst::ICMP_SGT)
- In = Builder->CreateNot(In, In->getName()+".not");
- return ReplaceInstUsesWith(CI, In);
- }
- }
- }
- // vector (x <s 0) ? -1 : 0 -> ashr x, 31 -> all ones if signed.
- if (const VectorType *VTy = dyn_cast<VectorType>(DestTy)) {
- ICmpInst::Predicate Pred; Value *CmpLHS;
- if (match(Src, m_ICmp(Pred, m_Value(CmpLHS), m_Zero()))) {
- if (Pred == ICmpInst::ICMP_SLT && CmpLHS->getType() == DestTy) {
- const Type *EltTy = VTy->getElementType();
-
- // splat the shift constant to a constant vector.
- Constant *VSh = ConstantInt::get(VTy, EltTy->getScalarSizeInBits()-1);
- Value *In = Builder->CreateAShr(CmpLHS, VSh,CmpLHS->getName()+".lobit");
- return ReplaceInstUsesWith(CI, In);
- }
- }
- }
+ if (ICmpInst *ICI = dyn_cast<ICmpInst>(Src))
+ return transformSExtICmp(ICI, CI);
// If the input is a shl/ashr pair of a same constant, then this is a sign
// extension from a smaller value. If we could trust arbitrary bitwidth
diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
index 999de3409750..bb9b88bfe6a7 100644
--- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
+++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
@@ -699,7 +699,7 @@ Instruction *InstCombiner::FoldICmpAddOpCst(ICmpInst &ICI,
return ReplaceInstUsesWith(ICI, ConstantInt::getTrue(X->getContext()));
// From this point on, we know that (X+C <= X) --> (X+C < X) because C != 0,
- // so the values can never be equal. Similiarly for all other "or equals"
+ // so the values can never be equal. Similarly for all other "or equals"
// operators.
// (X+1) <u X --> X >u (MAXUINT-1) --> X == 255
@@ -1289,13 +1289,21 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI,
}
case Instruction::LShr: // (icmp pred (shr X, ShAmt), CI)
- case Instruction::AShr:
- // Only handle equality comparisons of shift-by-constant.
- if (ConstantInt *ShAmt = dyn_cast<ConstantInt>(LHSI->getOperand(1)))
- if (Instruction *Res = FoldICmpShrCst(ICI, cast<BinaryOperator>(LHSI),
- ShAmt))
+ case Instruction::AShr: {
+ // Handle equality comparisons of shift-by-constant.
+ BinaryOperator *BO = cast<BinaryOperator>(LHSI);
+ if (ConstantInt *ShAmt = dyn_cast<ConstantInt>(LHSI->getOperand(1))) {
+ if (Instruction *Res = FoldICmpShrCst(ICI, BO, ShAmt))
return Res;
+ }
+
+ // Handle exact shr's.
+ if (ICI.isEquality() && BO->isExact() && BO->hasOneUse()) {
+ if (RHSV.isMinValue())
+ return new ICmpInst(ICI.getPredicate(), BO->getOperand(0), RHS);
+ }
break;
+ }
case Instruction::SDiv:
case Instruction::UDiv:
@@ -1376,9 +1384,9 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI,
if (Value *NegVal = dyn_castNegVal(BOp1))
return new ICmpInst(ICI.getPredicate(), BOp0, NegVal);
- else if (Value *NegVal = dyn_castNegVal(BOp0))
+ if (Value *NegVal = dyn_castNegVal(BOp0))
return new ICmpInst(ICI.getPredicate(), NegVal, BOp1);
- else if (BO->hasOneUse()) {
+ if (BO->hasOneUse()) {
Value *Neg = Builder->CreateNeg(BOp1);
Neg->takeName(BO);
return new ICmpInst(ICI.getPredicate(), BOp0, Neg);
@@ -1855,11 +1863,11 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
return new ICmpInst(ICmpInst::ICMP_SLT, Op0,
ConstantInt::get(CI->getContext(), CI->getValue()+1));
case ICmpInst::ICMP_UGE:
- assert(!CI->isMinValue(false)); // A >=u MIN -> TRUE
+ assert(!CI->isMinValue(false)); // A >=u MIN -> TRUE
return new ICmpInst(ICmpInst::ICMP_UGT, Op0,
ConstantInt::get(CI->getContext(), CI->getValue()-1));
case ICmpInst::ICMP_SGE:
- assert(!CI->isMinValue(true)); // A >=s MIN -> TRUE
+ assert(!CI->isMinValue(true)); // A >=s MIN -> TRUE
return new ICmpInst(ICmpInst::ICMP_SGT, Op0,
ConstantInt::get(CI->getContext(), CI->getValue()-1));
}
@@ -1907,18 +1915,18 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
// that code below can assume that Min != Max.
if (!isa<Constant>(Op0) && Op0Min == Op0Max)
return new ICmpInst(I.getPredicate(),
- ConstantInt::get(I.getContext(), Op0Min), Op1);
+ ConstantInt::get(Op0->getType(), Op0Min), Op1);
if (!isa<Constant>(Op1) && Op1Min == Op1Max)
return new ICmpInst(I.getPredicate(), Op0,
- ConstantInt::get(I.getContext(), Op1Min));
+ ConstantInt::get(Op1->getType(), Op1Min));
// Based on the range information we know about the LHS, see if we can
- // simplify this comparison. For example, (x&4) < 8 is always true.
+ // simplify this comparison. For example, (x&4) < 8 is always true.
switch (I.getPredicate()) {
default: llvm_unreachable("Unknown icmp opcode!");
case ICmpInst::ICMP_EQ: {
if (Op0Max.ult(Op1Min) || Op0Min.ugt(Op1Max))
- return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getContext()));
+ return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getType()));
// If all bits are known zero except for one, then we know at most one
// bit is set. If the comparison is against zero, then this is a check
@@ -1955,7 +1963,7 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
}
case ICmpInst::ICMP_NE: {
if (Op0Max.ult(Op1Min) || Op0Min.ugt(Op1Max))
- return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getContext()));
+ return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getType()));
// If all bits are known zero except for one, then we know at most one
// bit is set. If the comparison is against zero, then this is a check
@@ -1992,9 +2000,9 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
}
case ICmpInst::ICMP_ULT:
if (Op0Max.ult(Op1Min)) // A <u B -> true if max(A) < min(B)
- return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getContext()));
+ return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getType()));
if (Op0Min.uge(Op1Max)) // A <u B -> false if min(A) >= max(B)
- return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getContext()));
+ return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getType()));
if (Op1Min == Op0Max) // A <u B -> A != B if max(A) == min(B)
return new ICmpInst(ICmpInst::ICMP_NE, Op0, Op1);
if (ConstantInt *CI = dyn_cast<ConstantInt>(Op1)) {
@@ -2010,9 +2018,9 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
break;
case ICmpInst::ICMP_UGT:
if (Op0Min.ugt(Op1Max)) // A >u B -> true if min(A) > max(B)
- return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getContext()));
+ return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getType()));
if (Op0Max.ule(Op1Min)) // A >u B -> false if max(A) <= max(B)
- return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getContext()));
+ return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getType()));
if (Op1Max == Op0Min) // A >u B -> A != B if min(A) == max(B)
return new ICmpInst(ICmpInst::ICMP_NE, Op0, Op1);
@@ -2029,9 +2037,9 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
break;
case ICmpInst::ICMP_SLT:
if (Op0Max.slt(Op1Min)) // A <s B -> true if max(A) < min(C)
- return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getContext()));
+ return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getType()));
if (Op0Min.sge(Op1Max)) // A <s B -> false if min(A) >= max(C)
- return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getContext()));
+ return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getType()));
if (Op1Min == Op0Max) // A <s B -> A != B if max(A) == min(B)
return new ICmpInst(ICmpInst::ICMP_NE, Op0, Op1);
if (ConstantInt *CI = dyn_cast<ConstantInt>(Op1)) {
@@ -2042,9 +2050,9 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
break;
case ICmpInst::ICMP_SGT:
if (Op0Min.sgt(Op1Max)) // A >s B -> true if min(A) > max(B)
- return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getContext()));
+ return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getType()));
if (Op0Max.sle(Op1Min)) // A >s B -> false if max(A) <= min(B)
- return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getContext()));
+ return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getType()));
if (Op1Max == Op0Min) // A >s B -> A != B if min(A) == max(B)
return new ICmpInst(ICmpInst::ICMP_NE, Op0, Op1);
@@ -2057,30 +2065,30 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
case ICmpInst::ICMP_SGE:
assert(!isa<ConstantInt>(Op1) && "ICMP_SGE with ConstantInt not folded!");
if (Op0Min.sge(Op1Max)) // A >=s B -> true if min(A) >= max(B)
- return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getContext()));
+ return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getType()));
if (Op0Max.slt(Op1Min)) // A >=s B -> false if max(A) < min(B)
- return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getContext()));
+ return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getType()));
break;
case ICmpInst::ICMP_SLE:
assert(!isa<ConstantInt>(Op1) && "ICMP_SLE with ConstantInt not folded!");
if (Op0Max.sle(Op1Min)) // A <=s B -> true if max(A) <= min(B)
- return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getContext()));
+ return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getType()));
if (Op0Min.sgt(Op1Max)) // A <=s B -> false if min(A) > max(B)
- return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getContext()));
+ return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getType()));
break;
case ICmpInst::ICMP_UGE:
assert(!isa<ConstantInt>(Op1) && "ICMP_UGE with ConstantInt not folded!");
if (Op0Min.uge(Op1Max)) // A >=u B -> true if min(A) >= max(B)
- return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getContext()));
+ return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getType()));
if (Op0Max.ult(Op1Min)) // A >=u B -> false if max(A) < min(B)
- return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getContext()));
+ return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getType()));
break;
case ICmpInst::ICMP_ULE:
assert(!isa<ConstantInt>(Op1) && "ICMP_ULE with ConstantInt not folded!");
if (Op0Max.ule(Op1Min)) // A <=u B -> true if max(A) <= min(B)
- return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getContext()));
+ return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getType()));
if (Op0Min.ugt(Op1Max)) // A <=u B -> false if min(A) > max(B)
- return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getContext()));
+ return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getType()));
break;
}
@@ -2306,6 +2314,35 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
BO0->hasOneUse() && BO1->hasOneUse())
return new ICmpInst(Pred, D, B);
+ BinaryOperator *SRem = NULL;
+ // icmp (srem X, Y), Y
+ if (BO0 && BO0->getOpcode() == Instruction::SRem &&
+ Op1 == BO0->getOperand(1))
+ SRem = BO0;
+ // icmp Y, (srem X, Y)
+ else if (BO1 && BO1->getOpcode() == Instruction::SRem &&
+ Op0 == BO1->getOperand(1))
+ SRem = BO1;
+ if (SRem) {
+ // We don't check hasOneUse to avoid increasing register pressure because
+ // the value we use is the same value this instruction was already using.
+ switch (SRem == BO0 ? ICmpInst::getSwappedPredicate(Pred) : Pred) {
+ default: break;
+ case ICmpInst::ICMP_EQ:
+ return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getType()));
+ case ICmpInst::ICMP_NE:
+ return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getType()));
+ case ICmpInst::ICMP_SGT:
+ case ICmpInst::ICMP_SGE:
+ return new ICmpInst(ICmpInst::ICMP_SGT, SRem->getOperand(1),
+ Constant::getAllOnesValue(SRem->getType()));
+ case ICmpInst::ICMP_SLT:
+ case ICmpInst::ICMP_SLE:
+ return new ICmpInst(ICmpInst::ICMP_SLT, SRem->getOperand(1),
+ Constant::getNullValue(SRem->getType()));
+ }
+ }
+
if (BO0 && BO1 && BO0->getOpcode() == BO1->getOpcode() &&
BO0->hasOneUse() && BO1->hasOneUse() &&
BO0->getOperand(1) == BO1->getOperand(1)) {
@@ -2356,6 +2393,27 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
}
}
break;
+ case Instruction::UDiv:
+ case Instruction::LShr:
+ if (I.isSigned())
+ break;
+ // fall-through
+ case Instruction::SDiv:
+ case Instruction::AShr:
+ if (!BO0->isExact() && !BO1->isExact())
+ break;
+ return new ICmpInst(I.getPredicate(), BO0->getOperand(0),
+ BO1->getOperand(0));
+ case Instruction::Shl: {
+ bool NUW = BO0->hasNoUnsignedWrap() && BO1->hasNoUnsignedWrap();
+ bool NSW = BO0->hasNoSignedWrap() && BO1->hasNoSignedWrap();
+ if (!NUW && !NSW)
+ break;
+ if (!NSW && I.isSigned())
+ break;
+ return new ICmpInst(I.getPredicate(), BO0->getOperand(0),
+ BO1->getOperand(0));
+ }
}
}
}
@@ -2425,9 +2483,8 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
}
// (X&Z) == (Y&Z) -> (X^Y) & Z == 0
- if (Op0->hasOneUse() && Op1->hasOneUse() &&
- match(Op0, m_And(m_Value(A), m_Value(B))) &&
- match(Op1, m_And(m_Value(C), m_Value(D)))) {
+ if (match(Op0, m_OneUse(m_And(m_Value(A), m_Value(B)))) &&
+ match(Op1, m_OneUse(m_And(m_Value(C), m_Value(D))))) {
Value *X = 0, *Y = 0, *Z = 0;
if (A == C) {
@@ -2448,6 +2505,32 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
return &I;
}
}
+
+ // Transform "icmp eq (trunc (lshr(X, cst1)), cst" to
+ // "icmp (and X, mask), cst"
+ uint64_t ShAmt = 0;
+ ConstantInt *Cst1;
+ if (Op0->hasOneUse() &&
+ match(Op0, m_Trunc(m_OneUse(m_LShr(m_Value(A),
+ m_ConstantInt(ShAmt))))) &&
+ match(Op1, m_ConstantInt(Cst1)) &&
+ // Only do this when A has multiple uses. This is most important to do
+ // when it exposes other optimizations.
+ !A->hasOneUse()) {
+ unsigned ASize =cast<IntegerType>(A->getType())->getPrimitiveSizeInBits();
+
+ if (ShAmt < ASize) {
+ APInt MaskV =
+ APInt::getLowBitsSet(ASize, Op0->getType()->getPrimitiveSizeInBits());
+ MaskV <<= ShAmt;
+
+ APInt CmpV = Cst1->getValue().zext(ASize);
+ CmpV <<= ShAmt;
+
+ Value *Mask = Builder->CreateAnd(A, Builder->getInt(MaskV));
+ return new ICmpInst(I.getPredicate(), Mask, Builder->getInt(CmpV));
+ }
+ }
}
{
@@ -2704,6 +2787,42 @@ Instruction *InstCombiner::visitFCmpInst(FCmpInst &I) {
if (Constant *RHSC = dyn_cast<Constant>(Op1)) {
if (Instruction *LHSI = dyn_cast<Instruction>(Op0))
switch (LHSI->getOpcode()) {
+ case Instruction::FPExt: {
+ // fcmp (fpext x), C -> fcmp x, (fptrunc C) if fptrunc is lossless
+ FPExtInst *LHSExt = cast<FPExtInst>(LHSI);
+ ConstantFP *RHSF = dyn_cast<ConstantFP>(RHSC);
+ if (!RHSF)
+ break;
+
+ // We can't convert a PPC double double.
+ if (RHSF->getType()->isPPC_FP128Ty())
+ break;
+
+ const fltSemantics *Sem;
+ // FIXME: This shouldn't be here.
+ if (LHSExt->getSrcTy()->isFloatTy())
+ Sem = &APFloat::IEEEsingle;
+ else if (LHSExt->getSrcTy()->isDoubleTy())
+ Sem = &APFloat::IEEEdouble;
+ else if (LHSExt->getSrcTy()->isFP128Ty())
+ Sem = &APFloat::IEEEquad;
+ else if (LHSExt->getSrcTy()->isX86_FP80Ty())
+ Sem = &APFloat::x87DoubleExtended;
+ else
+ break;
+
+ bool Lossy;
+ APFloat F = RHSF->getValueAPF();
+ F.convert(*Sem, APFloat::rmNearestTiesToEven, &Lossy);
+
+ // Avoid lossy conversions and denormals.
+ if (!Lossy &&
+ F.compare(APFloat::getSmallestNormalized(*Sem)) !=
+ APFloat::cmpLessThan)
+ return new FCmpInst(I.getPredicate(), LHSExt->getOperand(0),
+ ConstantFP::get(RHSC->getContext(), F));
+ break;
+ }
case Instruction::PHI:
// Only fold fcmp into the PHI if the phi and fcmp are in the same
// block. If in the same block, we're encouraging jump threading. If
@@ -2742,6 +2861,14 @@ Instruction *InstCombiner::visitFCmpInst(FCmpInst &I) {
return SelectInst::Create(LHSI->getOperand(0), Op1, Op2);
break;
}
+ case Instruction::FSub: {
+ // fcmp pred (fneg x), C -> fcmp swap(pred) x, -C
+ Value *Op;
+ if (match(LHSI, m_FNeg(m_Value(Op))))
+ return new FCmpInst(I.getSwappedPredicate(), Op,
+ ConstantExpr::getFNeg(RHSC));
+ break;
+ }
case Instruction::Load:
if (GetElementPtrInst *GEP =
dyn_cast<GetElementPtrInst>(LHSI->getOperand(0))) {
@@ -2755,5 +2882,17 @@ Instruction *InstCombiner::visitFCmpInst(FCmpInst &I) {
}
}
+ // fcmp pred (fneg x), (fneg y) -> fcmp swap(pred) x, y
+ Value *X, *Y;
+ if (match(Op0, m_FNeg(m_Value(X))) && match(Op1, m_FNeg(m_Value(Y))))
+ return new FCmpInst(I.getSwappedPredicate(), X, Y);
+
+ // fcmp (fpext x), (fpext y) -> fcmp x, y
+ if (FPExtInst *LHSExt = dyn_cast<FPExtInst>(Op0))
+ if (FPExtInst *RHSExt = dyn_cast<FPExtInst>(Op1))
+ if (LHSExt->getSrcTy() == RHSExt->getSrcTy())
+ return new FCmpInst(I.getPredicate(), LHSExt->getOperand(0),
+ RHSExt->getOperand(0));
+
return Changed ? &I : 0;
}
diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
index 78ff7346abe4..432adc9d046d 100644
--- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
+++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
@@ -364,34 +364,12 @@ static bool equivalentAddressValues(Value *A, Value *B) {
return false;
}
-// If this instruction has two uses, one of which is a llvm.dbg.declare,
-// return the llvm.dbg.declare.
-DbgDeclareInst *InstCombiner::hasOneUsePlusDeclare(Value *V) {
- if (!V->hasNUses(2))
- return 0;
- for (Value::use_iterator UI = V->use_begin(), E = V->use_end();
- UI != E; ++UI) {
- User *U = *UI;
- if (DbgDeclareInst *DI = dyn_cast<DbgDeclareInst>(U))
- return DI;
- if (isa<BitCastInst>(U) && U->hasOneUse()) {
- if (DbgDeclareInst *DI = dyn_cast<DbgDeclareInst>(*U->use_begin()))
- return DI;
- }
- }
- return 0;
-}
-
Instruction *InstCombiner::visitStoreInst(StoreInst &SI) {
Value *Val = SI.getOperand(0);
Value *Ptr = SI.getOperand(1);
// If the RHS is an alloca with a single use, zapify the store, making the
// alloca dead.
- // If the RHS is an alloca with a two uses, the other one being a
- // llvm.dbg.declare, zapify the store and the declare, making the
- // alloca dead. We must do this to prevent declares from affecting
- // codegen.
if (!SI.isVolatile()) {
if (Ptr->hasOneUse()) {
if (isa<AllocaInst>(Ptr))
@@ -400,17 +378,9 @@ Instruction *InstCombiner::visitStoreInst(StoreInst &SI) {
if (isa<AllocaInst>(GEP->getOperand(0))) {
if (GEP->getOperand(0)->hasOneUse())
return EraseInstFromFunction(SI);
- if (DbgDeclareInst *DI = hasOneUsePlusDeclare(GEP->getOperand(0))) {
- EraseInstFromFunction(*DI);
- return EraseInstFromFunction(SI);
- }
}
}
}
- if (DbgDeclareInst *DI = hasOneUsePlusDeclare(Ptr)) {
- EraseInstFromFunction(*DI);
- return EraseInstFromFunction(SI);
- }
}
// Attempt to improve the alignment.
@@ -621,8 +591,7 @@ bool InstCombiner::SimplifyStoreAtEndOfBlock(StoreInst &SI) {
// Insert a PHI node now if we need it.
Value *MergedVal = OtherStore->getOperand(0);
if (MergedVal != SI.getOperand(0)) {
- PHINode *PN = PHINode::Create(MergedVal->getType(), "storemerge");
- PN->reserveOperandSpace(2);
+ PHINode *PN = PHINode::Create(MergedVal->getType(), 2, "storemerge");
PN->addIncoming(SI.getOperand(0), SI.getParent());
PN->addIncoming(OtherStore->getOperand(0), OtherBB);
MergedVal = InsertNewInstBefore(PN, DestBB->front());
diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
index d1a1fd6ddfac..57fb08aca266 100644
--- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
+++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
@@ -320,6 +320,10 @@ Instruction *InstCombiner::commonIDivTransforms(BinaryOperator &I) {
}
}
+ // See if we can fold away this div instruction.
+ if (SimplifyDemandedInstructionBits(I))
+ return &I;
+
// (X - (X rem Y)) / Y -> X / Y; usually originates as ((X / Y) * Y) / Y
Value *X = 0, *Z = 0;
if (match(Op0, m_Sub(m_Value(X), m_Value(Z)))) { // (X - Z) / Y; Y = Op1
@@ -332,6 +336,19 @@ Instruction *InstCombiner::commonIDivTransforms(BinaryOperator &I) {
return 0;
}
+/// dyn_castZExtVal - Checks if V is a zext or constant that can
+/// be truncated to Ty without losing bits.
+static Value *dyn_castZExtVal(Value *V, const Type *Ty) {
+ if (ZExtInst *Z = dyn_cast<ZExtInst>(V)) {
+ if (Z->getSrcTy() == Ty)
+ return Z->getOperand(0);
+ } else if (ConstantInt *C = dyn_cast<ConstantInt>(V)) {
+ if (C->getValue().getActiveBits() <= cast<IntegerType>(Ty)->getBitWidth())
+ return ConstantExpr::getTrunc(C, Ty);
+ }
+ return 0;
+}
+
Instruction *InstCombiner::visitUDiv(BinaryOperator &I) {
Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
@@ -390,6 +407,14 @@ Instruction *InstCombiner::visitUDiv(BinaryOperator &I) {
return SelectInst::Create(Cond, TSI, FSI);
}
}
+
+ // (zext A) udiv (zext B) --> zext (A udiv B)
+ if (ZExtInst *ZOp0 = dyn_cast<ZExtInst>(Op0))
+ if (Value *ZOp1 = dyn_castZExtVal(Op1, ZOp0->getSrcTy()))
+ return new ZExtInst(Builder->CreateUDiv(ZOp0->getOperand(0), ZOp1, "div",
+ I.isExact()),
+ I.getType());
+
return 0;
}
@@ -452,27 +477,17 @@ Instruction *InstCombiner::visitFDiv(BinaryOperator &I) {
if (Value *V = SimplifyFDivInst(Op0, Op1, TD))
return ReplaceInstUsesWith(I, V);
- return 0;
-}
+ if (ConstantFP *Op1C = dyn_cast<ConstantFP>(Op1)) {
+ const APFloat &Op1F = Op1C->getValueAPF();
-/// This function implements the transforms on rem instructions that work
-/// regardless of the kind of rem instruction it is (urem, srem, or frem). It
-/// is used by the visitors to those instructions.
-/// @brief Transforms common to all three rem instructions
-Instruction *InstCombiner::commonRemTransforms(BinaryOperator &I) {
- Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
-
- if (isa<UndefValue>(Op0)) { // undef % X -> 0
- if (I.getType()->isFPOrFPVectorTy())
- return ReplaceInstUsesWith(I, Op0); // X % undef -> undef (could be SNaN)
- return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType()));
+ // If the divisor has an exact multiplicative inverse we can turn the fdiv
+ // into a cheaper fmul.
+ APFloat Reciprocal(Op1F.getSemantics());
+ if (Op1F.getExactInverse(&Reciprocal)) {
+ ConstantFP *RFP = ConstantFP::get(Builder->getContext(), Reciprocal);
+ return BinaryOperator::CreateFMul(Op0, RFP);
+ }
}
- if (isa<UndefValue>(Op1))
- return ReplaceInstUsesWith(I, Op1); // X % undef -> undef
-
- // Handle cases involving: rem X, (select Cond, Y, Z)
- if (isa<SelectInst>(Op1) && SimplifyDivRemOfSelect(I))
- return &I;
return 0;
}
@@ -484,26 +499,11 @@ Instruction *InstCombiner::commonRemTransforms(BinaryOperator &I) {
Instruction *InstCombiner::commonIRemTransforms(BinaryOperator &I) {
Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
- if (Instruction *common = commonRemTransforms(I))
- return common;
-
- // X % X == 0
- if (Op0 == Op1)
- return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType()));
-
- // 0 % X == 0 for integer, we don't need to preserve faults!
- if (Constant *LHS = dyn_cast<Constant>(Op0))
- if (LHS->isNullValue())
- return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType()));
+ // Handle cases involving: rem X, (select Cond, Y, Z)
+ if (isa<SelectInst>(Op1) && SimplifyDivRemOfSelect(I))
+ return &I;
if (ConstantInt *RHS = dyn_cast<ConstantInt>(Op1)) {
- // X % 0 == undef, we don't need to preserve faults!
- if (RHS->equalsInt(0))
- return ReplaceInstUsesWith(I, UndefValue::get(I.getType()));
-
- if (RHS->equalsInt(1)) // X % 1 == 0
- return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType()));
-
if (Instruction *Op0I = dyn_cast<Instruction>(Op0)) {
if (SelectInst *SI = dyn_cast<SelectInst>(Op0I)) {
if (Instruction *R = FoldOpIntoSelect(I, SI))
@@ -525,6 +525,9 @@ Instruction *InstCombiner::commonIRemTransforms(BinaryOperator &I) {
Instruction *InstCombiner::visitURem(BinaryOperator &I) {
Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
+ if (Value *V = SimplifyURemInst(Op0, Op1, TD))
+ return ReplaceInstUsesWith(I, V);
+
if (Instruction *common = commonIRemTransforms(I))
return common;
@@ -552,13 +555,22 @@ Instruction *InstCombiner::visitURem(BinaryOperator &I) {
return SelectInst::Create(Cond, TrueAnd, FalseAnd);
}
}
-
+
+ // (zext A) urem (zext B) --> zext (A urem B)
+ if (ZExtInst *ZOp0 = dyn_cast<ZExtInst>(Op0))
+ if (Value *ZOp1 = dyn_castZExtVal(Op1, ZOp0->getSrcTy()))
+ return new ZExtInst(Builder->CreateURem(ZOp0->getOperand(0), ZOp1),
+ I.getType());
+
return 0;
}
Instruction *InstCombiner::visitSRem(BinaryOperator &I) {
Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
+ if (Value *V = SimplifySRemInst(Op0, Op1, TD))
+ return ReplaceInstUsesWith(I, V);
+
// Handle the integer rem common cases
if (Instruction *Common = commonIRemTransforms(I))
return Common;
@@ -617,6 +629,14 @@ Instruction *InstCombiner::visitSRem(BinaryOperator &I) {
}
Instruction *InstCombiner::visitFRem(BinaryOperator &I) {
- return commonRemTransforms(I);
-}
+ Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
+
+ if (Value *V = SimplifyFRemInst(Op0, Op1, TD))
+ return ReplaceInstUsesWith(I, V);
+
+ // Handle cases involving: rem X, (select Cond, Y, Z)
+ if (isa<SelectInst>(Op1) && SimplifyDivRemOfSelect(I))
+ return &I;
+ return 0;
+}
diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp
index 297a18c40a97..abf61bbaf3a6 100644
--- a/contrib/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp
+++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp
@@ -80,18 +80,16 @@ Instruction *InstCombiner::FoldPHIArgBinOpIntoPHI(PHINode &PN) {
Value *InRHS = FirstInst->getOperand(1);
PHINode *NewLHS = 0, *NewRHS = 0;
if (LHSVal == 0) {
- NewLHS = PHINode::Create(LHSType,
+ NewLHS = PHINode::Create(LHSType, PN.getNumIncomingValues(),
FirstInst->getOperand(0)->getName() + ".pn");
- NewLHS->reserveOperandSpace(PN.getNumOperands()/2);
NewLHS->addIncoming(InLHS, PN.getIncomingBlock(0));
InsertNewInstBefore(NewLHS, PN);
LHSVal = NewLHS;
}
if (RHSVal == 0) {
- NewRHS = PHINode::Create(RHSType,
+ NewRHS = PHINode::Create(RHSType, PN.getNumIncomingValues(),
FirstInst->getOperand(1)->getName() + ".pn");
- NewRHS->reserveOperandSpace(PN.getNumOperands()/2);
NewRHS->addIncoming(InRHS, PN.getIncomingBlock(0));
InsertNewInstBefore(NewRHS, PN);
RHSVal = NewRHS;
@@ -202,11 +200,10 @@ Instruction *InstCombiner::FoldPHIArgGEPIntoPHI(PHINode &PN) {
for (unsigned i = 0, e = FixedOperands.size(); i != e; ++i) {
if (FixedOperands[i]) continue; // operand doesn't need a phi.
Value *FirstOp = FirstInst->getOperand(i);
- PHINode *NewPN = PHINode::Create(FirstOp->getType(),
+ PHINode *NewPN = PHINode::Create(FirstOp->getType(), e,
FirstOp->getName()+".pn");
InsertNewInstBefore(NewPN, PN);
- NewPN->reserveOperandSpace(e);
NewPN->addIncoming(FirstOp, PN.getIncomingBlock(0));
OperandPhis[i] = NewPN;
FixedOperands[i] = NewPN;
@@ -240,7 +237,7 @@ Instruction *InstCombiner::FoldPHIArgGEPIntoPHI(PHINode &PN) {
/// obvious the value of the load is not changed from the point of the load to
/// the end of the block it is in.
///
-/// Finally, it is safe, but not profitable, to sink a load targetting a
+/// Finally, it is safe, but not profitable, to sink a load targeting a
/// non-address-taken alloca. Doing so will cause us to not promote the alloca
/// to a register.
static bool isSafeAndProfitableToSinkLoad(LoadInst *L) {
@@ -340,8 +337,8 @@ Instruction *InstCombiner::FoldPHIArgLoadIntoPHI(PHINode &PN) {
// Okay, they are all the same operation. Create a new PHI node of the
// correct type, and PHI together all of the LHS's of the instructions.
PHINode *NewPN = PHINode::Create(FirstLI->getOperand(0)->getType(),
+ PN.getNumIncomingValues(),
PN.getName()+".in");
- NewPN->reserveOperandSpace(PN.getNumOperands()/2);
Value *InVal = FirstLI->getOperand(0);
NewPN->addIncoming(InVal, PN.getIncomingBlock(0));
@@ -446,8 +443,8 @@ Instruction *InstCombiner::FoldPHIArgOpIntoPHI(PHINode &PN) {
// Okay, they are all the same operation. Create a new PHI node of the
// correct type, and PHI together all of the LHS's of the instructions.
PHINode *NewPN = PHINode::Create(FirstInst->getOperand(0)->getType(),
+ PN.getNumIncomingValues(),
PN.getName()+".in");
- NewPN->reserveOperandSpace(PN.getNumOperands()/2);
Value *InVal = FirstInst->getOperand(0);
NewPN->addIncoming(InVal, PN.getIncomingBlock(0));
@@ -699,7 +696,8 @@ Instruction *InstCombiner::SliceUpIllegalIntegerPHI(PHINode &FirstPhi) {
if ((EltPHI = ExtractedVals[LoweredPHIRecord(PN, Offset, Ty)]) == 0) {
// Otherwise, Create the new PHI node for this user.
- EltPHI = PHINode::Create(Ty, PN->getName()+".off"+Twine(Offset), PN);
+ EltPHI = PHINode::Create(Ty, PN->getNumIncomingValues(),
+ PN->getName()+".off"+Twine(Offset), PN);
assert(EltPHI->getType() != PN->getType() &&
"Truncate didn't shrink phi?");
@@ -776,9 +774,6 @@ Instruction *InstCombiner::SliceUpIllegalIntegerPHI(PHINode &FirstPhi) {
// PHINode simplification
//
Instruction *InstCombiner::visitPHINode(PHINode &PN) {
- // If LCSSA is around, don't mess with Phi nodes
- if (MustPreserveLCSSA) return 0;
-
if (Value *V = SimplifyInstruction(&PN, TD))
return ReplaceInstUsesWith(PN, V);
@@ -826,18 +821,18 @@ Instruction *InstCombiner::visitPHINode(PHINode &PN) {
// quick check to see if the PHI node only contains a single non-phi value, if
// so, scan to see if the phi cycle is actually equal to that value.
{
- unsigned InValNo = 0, NumOperandVals = PN.getNumIncomingValues();
+ unsigned InValNo = 0, NumIncomingVals = PN.getNumIncomingValues();
// Scan for the first non-phi operand.
- while (InValNo != NumOperandVals &&
+ while (InValNo != NumIncomingVals &&
isa<PHINode>(PN.getIncomingValue(InValNo)))
++InValNo;
- if (InValNo != NumOperandVals) {
- Value *NonPhiInVal = PN.getOperand(InValNo);
+ if (InValNo != NumIncomingVals) {
+ Value *NonPhiInVal = PN.getIncomingValue(InValNo);
// Scan the rest of the operands to see if there are any conflicts, if so
// there is no need to recursively scan other phis.
- for (++InValNo; InValNo != NumOperandVals; ++InValNo) {
+ for (++InValNo; InValNo != NumIncomingVals; ++InValNo) {
Value *OpVal = PN.getIncomingValue(InValNo);
if (OpVal != NonPhiInVal && !isa<PHINode>(OpVal))
break;
@@ -846,7 +841,7 @@ Instruction *InstCombiner::visitPHINode(PHINode &PN) {
// If we scanned over all operands, then we have one unique value plus
// phi values. Scan PHI nodes to see if they all merge in each other or
// the value.
- if (InValNo == NumOperandVals) {
+ if (InValNo == NumIncomingVals) {
SmallPtrSet<PHINode*, 16> ValueEqualPHIs;
if (PHIsEqualValue(&PN, NonPhiInVal, ValueEqualPHIs))
return ReplaceInstUsesWith(PN, NonPhiInVal);
diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
index 97abc769ae5f..61a433a9c00c 100644
--- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
+++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
@@ -214,7 +214,7 @@ Instruction *InstCombiner::FoldSelectIntoOp(SelectInst &SI, Value *TrueVal,
unsigned OpToFold = 0;
if ((SFO & 1) && FalseVal == TVI->getOperand(0)) {
OpToFold = 1;
- } else if ((SFO & 2) && FalseVal == TVI->getOperand(1)) {
+ } else if ((SFO & 2) && FalseVal == TVI->getOperand(1)) {
OpToFold = 2;
}
@@ -227,9 +227,16 @@ Instruction *InstCombiner::FoldSelectIntoOp(SelectInst &SI, Value *TrueVal,
Instruction *NewSel = SelectInst::Create(SI.getCondition(), OOp, C);
InsertNewInstBefore(NewSel, SI);
NewSel->takeName(TVI);
- if (BinaryOperator *BO = dyn_cast<BinaryOperator>(TVI))
- return BinaryOperator::Create(BO->getOpcode(), FalseVal, NewSel);
- llvm_unreachable("Unknown instruction!!");
+ BinaryOperator *TVI_BO = cast<BinaryOperator>(TVI);
+ BinaryOperator *BO = BinaryOperator::Create(TVI_BO->getOpcode(),
+ FalseVal, NewSel);
+ if (isa<PossiblyExactOperator>(BO))
+ BO->setIsExact(TVI_BO->isExact());
+ if (isa<OverflowingBinaryOperator>(BO)) {
+ BO->setHasNoUnsignedWrap(TVI_BO->hasNoUnsignedWrap());
+ BO->setHasNoSignedWrap(TVI_BO->hasNoSignedWrap());
+ }
+ return BO;
}
}
}
@@ -243,7 +250,7 @@ Instruction *InstCombiner::FoldSelectIntoOp(SelectInst &SI, Value *TrueVal,
unsigned OpToFold = 0;
if ((SFO & 1) && TrueVal == FVI->getOperand(0)) {
OpToFold = 1;
- } else if ((SFO & 2) && TrueVal == FVI->getOperand(1)) {
+ } else if ((SFO & 2) && TrueVal == FVI->getOperand(1)) {
OpToFold = 2;
}
@@ -256,9 +263,16 @@ Instruction *InstCombiner::FoldSelectIntoOp(SelectInst &SI, Value *TrueVal,
Instruction *NewSel = SelectInst::Create(SI.getCondition(), C, OOp);
InsertNewInstBefore(NewSel, SI);
NewSel->takeName(FVI);
- if (BinaryOperator *BO = dyn_cast<BinaryOperator>(FVI))
- return BinaryOperator::Create(BO->getOpcode(), TrueVal, NewSel);
- llvm_unreachable("Unknown instruction!!");
+ BinaryOperator *FVI_BO = cast<BinaryOperator>(FVI);
+ BinaryOperator *BO = BinaryOperator::Create(FVI_BO->getOpcode(),
+ TrueVal, NewSel);
+ if (isa<PossiblyExactOperator>(BO))
+ BO->setIsExact(FVI_BO->isExact());
+ if (isa<OverflowingBinaryOperator>(BO)) {
+ BO->setHasNoUnsignedWrap(FVI_BO->hasNoUnsignedWrap());
+ BO->setHasNoSignedWrap(FVI_BO->hasNoSignedWrap());
+ }
+ return BO;
}
}
}
@@ -424,6 +438,19 @@ Instruction *InstCombiner::visitSelectInstWithICmp(SelectInst &SI,
return ReplaceInstUsesWith(SI, TrueVal);
/// NOTE: if we wanted to, this is where to detect integer MIN/MAX
}
+
+ if (isa<Constant>(CmpRHS)) {
+ if (CmpLHS == TrueVal && Pred == ICmpInst::ICMP_EQ) {
+ // Transform (X == C) ? X : Y -> (X == C) ? C : Y
+ SI.setOperand(1, CmpRHS);
+ Changed = true;
+ } else if (CmpLHS == FalseVal && Pred == ICmpInst::ICMP_NE) {
+ // Transform (X != C) ? Y : X -> (X != C) ? Y : C
+ SI.setOperand(2, CmpRHS);
+ Changed = true;
+ }
+ }
+
return Changed ? &SI : 0;
}
@@ -503,9 +530,8 @@ static Value *foldSelectICmpAnd(const SelectInst &SI, ConstantInt *TrueVal,
if (!IC || !IC->isEquality())
return 0;
- if (ConstantInt *C = dyn_cast<ConstantInt>(IC->getOperand(1)))
- if (!C->isZero())
- return 0;
+ if (!match(IC->getOperand(1), m_Zero()))
+ return 0;
ConstantInt *AndRHS;
Value *LHS = IC->getOperand(0);
diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp
index a7f800587bb6..811f94976f68 100644
--- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp
+++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp
@@ -644,7 +644,14 @@ Instruction *InstCombiner::visitShl(BinaryOperator &I) {
return &I;
}
}
-
+
+ // (C1 << A) << C2 -> (C1 << C2) << A
+ Constant *C1, *C2;
+ Value *A;
+ if (match(I.getOperand(0), m_OneUse(m_Shl(m_Constant(C1), m_Value(A)))) &&
+ match(I.getOperand(1), m_Constant(C2)))
+ return BinaryOperator::CreateShl(ConstantExpr::getShl(C1, C2), A);
+
return 0;
}
diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
index bda8cea4e41f..6e727ce6e35c 100644
--- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
+++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
@@ -684,6 +684,10 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
break;
case Instruction::SRem:
if (ConstantInt *Rem = dyn_cast<ConstantInt>(I->getOperand(1))) {
+ // X % -1 demands all the bits because we don't want to introduce
+ // INT_MIN % -1 (== undef) by accident.
+ if (Rem->isAllOnesValue())
+ break;
APInt RA = Rem->getValue().abs();
if (RA.isPowerOf2()) {
if (DemandedMask.ult(RA)) // srem won't affect demanded bits
@@ -712,6 +716,18 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
assert(!(KnownZero & KnownOne) && "Bits known to be one AND zero?");
}
}
+
+ // The sign bit is the LHS's sign bit, except when the result of the
+ // remainder is zero.
+ if (DemandedMask.isNegative() && KnownZero.isNonNegative()) {
+ APInt Mask2 = APInt::getSignBit(BitWidth);
+ APInt LHSKnownZero(BitWidth, 0), LHSKnownOne(BitWidth, 0);
+ ComputeMaskedBits(I->getOperand(0), Mask2, LHSKnownZero, LHSKnownOne,
+ Depth+1);
+ // If it's known zero, our sign bit is also zero.
+ if (LHSKnownZero.isNegative())
+ KnownZero |= LHSKnownZero;
+ }
break;
case Instruction::URem: {
APInt KnownZero2(BitWidth, 0), KnownOne2(BitWidth, 0);
diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
index 5caa12dfdfa5..ad6a8d054ee7 100644
--- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
+++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
@@ -230,8 +230,16 @@ Instruction *InstCombiner::visitExtractElementInst(ExtractElementInst &EI) {
ConstantInt::get(Int32Ty,
SrcIdx, false));
}
+ } else if (CastInst *CI = dyn_cast<CastInst>(I)) {
+ // Canonicalize extractelement(cast) -> cast(extractelement)
+ // bitcasts can change the number of vector elements and they cost nothing
+ if (CI->hasOneUse() && EI.hasOneUse() &&
+ (CI->getOpcode() != Instruction::BitCast)) {
+ Value *EE = Builder->CreateExtractElement(CI->getOperand(0),
+ EI.getIndexOperand());
+ return CastInst::Create(CI->getOpcode(), EE, EI.getType());
+ }
}
- // FIXME: Canonicalize extractelement(bitcast) -> bitcast(extractelement)
}
return 0;
}
diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineWorklist.h b/contrib/llvm/lib/Transforms/InstCombine/InstCombineWorklist.h
index 9100a851f16e..32009c39ec25 100644
--- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineWorklist.h
+++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineWorklist.h
@@ -53,6 +53,7 @@ public:
void AddInitialGroup(Instruction *const *List, unsigned NumEntries) {
assert(Worklist.empty() && "Worklist must be empty to add initial group");
Worklist.reserve(NumEntries+16);
+ WorklistMap.resize(NumEntries);
DEBUG(errs() << "IC: ADDING: " << NumEntries << " instrs to worklist\n");
for (; NumEntries; --NumEntries) {
Instruction *I = List[NumEntries-1];
diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
index 37123d0621eb..7a84598c3a0d 100644
--- a/contrib/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
+++ b/contrib/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
@@ -76,7 +76,6 @@ INITIALIZE_PASS(InstCombiner, "instcombine",
"Combine redundant instructions", false, false)
void InstCombiner::getAnalysisUsage(AnalysisUsage &AU) const {
- AU.addPreservedID(LCSSAID);
AU.setPreservesCFG();
}
@@ -600,8 +599,7 @@ Instruction *InstCombiner::FoldOpIntoPhi(Instruction &I) {
}
// Okay, we can do the transformation: create the new PHI node.
- PHINode *NewPN = PHINode::Create(I.getType(), "");
- NewPN->reserveOperandSpace(PN->getNumOperands()/2);
+ PHINode *NewPN = PHINode::Create(I.getType(), PN->getNumIncomingValues(), "");
InsertNewInstBefore(NewPN, *PN);
NewPN->takeName(PN);
@@ -850,22 +848,23 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
GetElementPtrInst::Create(Src->getOperand(0), Indices.begin(),
Indices.end(), GEP.getName());
}
-
+
// Handle gep(bitcast x) and gep(gep x, 0, 0, 0).
Value *StrippedPtr = PtrOp->stripPointerCasts();
- if (StrippedPtr != PtrOp) {
- const PointerType *StrippedPtrTy =cast<PointerType>(StrippedPtr->getType());
+ const PointerType *StrippedPtrTy =cast<PointerType>(StrippedPtr->getType());
+ if (StrippedPtr != PtrOp &&
+ StrippedPtrTy->getAddressSpace() == GEP.getPointerAddressSpace()) {
bool HasZeroPointerIndex = false;
if (ConstantInt *C = dyn_cast<ConstantInt>(GEP.getOperand(1)))
HasZeroPointerIndex = C->isZero();
-
+
// Transform: GEP (bitcast [10 x i8]* X to [0 x i8]*), i32 0, ...
// into : GEP [10 x i8]* X, i32 0, ...
//
// Likewise, transform: GEP (bitcast i8* X to [0 x i8]*), i32 0, ...
// into : GEP i8* X, ...
- //
+ //
// This occurs when the program declares an array extern like "int X[];"
if (HasZeroPointerIndex) {
const PointerType *CPTy = cast<PointerType>(PtrOp->getType());
@@ -976,7 +975,7 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
}
}
}
-
+
/// See if we can simplify:
/// X = bitcast A* to B*
/// Y = gep X, <...constant indices...>
@@ -984,12 +983,14 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
/// analysis of unions. If "A" is also a bitcast, wait for A/X to be merged.
if (BitCastInst *BCI = dyn_cast<BitCastInst>(PtrOp)) {
if (TD &&
- !isa<BitCastInst>(BCI->getOperand(0)) && GEP.hasAllConstantIndices()) {
+ !isa<BitCastInst>(BCI->getOperand(0)) && GEP.hasAllConstantIndices() &&
+ StrippedPtrTy->getAddressSpace() == GEP.getPointerAddressSpace()) {
+
// Determine how much the GEP moves the pointer. We are guaranteed to get
// a constant back from EmitGEPOffset.
ConstantInt *OffsetV = cast<ConstantInt>(EmitGEPOffset(&GEP));
int64_t Offset = OffsetV->getSExtValue();
-
+
// If this GEP instruction doesn't move the pointer, just replace the GEP
// with a bitcast of the real input to the dest type.
if (Offset == 0) {
@@ -1635,7 +1636,6 @@ bool InstCombiner::DoOneIteration(Function &F, unsigned Iteration) {
bool InstCombiner::runOnFunction(Function &F) {
- MustPreserveLCSSA = mustPreserveAnalysisID(LCSSAID);
TD = getAnalysisIfAvailable<TargetData>();
@@ -1648,6 +1648,10 @@ bool InstCombiner::runOnFunction(Function &F) {
bool EverMadeChange = false;
+ // Lower dbg.declare intrinsics otherwise their value may be clobbered
+ // by instcombiner.
+ EverMadeChange = LowerDbgDeclare(F);
+
// Iterate while there is work to do.
unsigned Iteration = 0;
while (DoOneIteration(F, Iteration++))
diff --git a/contrib/llvm/lib/Transforms/Instrumentation/GCOVProfiling.cpp b/contrib/llvm/lib/Transforms/Instrumentation/GCOVProfiling.cpp
new file mode 100644
index 000000000000..2425342f7e6c
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Instrumentation/GCOVProfiling.cpp
@@ -0,0 +1,638 @@
+//===- GCOVProfiling.cpp - Insert edge counters for gcov profiling --------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass implements GCOV-style profiling. When this pass is run it emits
+// "gcno" files next to the existing source, and instruments the code that runs
+// to records the edges between blocks that run and emit a complementary "gcda"
+// file on exit.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "insert-gcov-profiling"
+
+#include "ProfilingUtils.h"
+#include "llvm/Transforms/Instrumentation.h"
+#include "llvm/Analysis/DebugInfo.h"
+#include "llvm/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/Instructions.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/DebugLoc.h"
+#include "llvm/Support/InstIterator.h"
+#include "llvm/Support/IRBuilder.h"
+#include "llvm/Support/PathV2.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/StringMap.h"
+#include "llvm/ADT/UniqueVector.h"
+#include <string>
+#include <utility>
+using namespace llvm;
+
+namespace {
+ class GCOVProfiler : public ModulePass {
+ bool runOnModule(Module &M);
+ public:
+ static char ID;
+ GCOVProfiler()
+ : ModulePass(ID), EmitNotes(true), EmitData(true) {
+ initializeGCOVProfilerPass(*PassRegistry::getPassRegistry());
+ }
+ GCOVProfiler(bool EmitNotes, bool EmitData)
+ : ModulePass(ID), EmitNotes(EmitNotes), EmitData(EmitData) {
+ assert((EmitNotes || EmitData) && "GCOVProfiler asked to do nothing?");
+ initializeGCOVProfilerPass(*PassRegistry::getPassRegistry());
+ }
+ virtual const char *getPassName() const {
+ return "GCOV Profiler";
+ }
+
+ private:
+ // Create the GCNO files for the Module based on DebugInfo.
+ void emitGCNO(DebugInfoFinder &DIF);
+
+ // Modify the program to track transitions along edges and call into the
+ // profiling runtime to emit .gcda files when run.
+ bool emitProfileArcs(DebugInfoFinder &DIF);
+
+ // Get pointers to the functions in the runtime library.
+ Constant *getStartFileFunc();
+ Constant *getIncrementIndirectCounterFunc();
+ Constant *getEmitFunctionFunc();
+ Constant *getEmitArcsFunc();
+ Constant *getEndFileFunc();
+
+ // Create or retrieve an i32 state value that is used to represent the
+ // pred block number for certain non-trivial edges.
+ GlobalVariable *getEdgeStateValue();
+
+ // Produce a table of pointers to counters, by predecessor and successor
+ // block number.
+ GlobalVariable *buildEdgeLookupTable(Function *F,
+ GlobalVariable *Counter,
+ const UniqueVector<BasicBlock *> &Preds,
+ const UniqueVector<BasicBlock *> &Succs);
+
+ // Add the function to write out all our counters to the global destructor
+ // list.
+ void insertCounterWriteout(DebugInfoFinder &,
+ SmallVector<std::pair<GlobalVariable *,
+ uint32_t>, 8> &);
+
+ bool EmitNotes;
+ bool EmitData;
+
+ Module *M;
+ LLVMContext *Ctx;
+ };
+}
+
+char GCOVProfiler::ID = 0;
+INITIALIZE_PASS(GCOVProfiler, "insert-gcov-profiling",
+ "Insert instrumentation for GCOV profiling", false, false)
+
+ModulePass *llvm::createGCOVProfilerPass(bool EmitNotes, bool EmitData) {
+ return new GCOVProfiler(EmitNotes, EmitData);
+}
+
+static DISubprogram findSubprogram(DIScope Scope) {
+ while (!Scope.isSubprogram()) {
+ assert(Scope.isLexicalBlock() &&
+ "Debug location not lexical block or subprogram");
+ Scope = DILexicalBlock(Scope).getContext();
+ }
+ return DISubprogram(Scope);
+}
+
+namespace {
+ class GCOVRecord {
+ protected:
+ static const char *LinesTag;
+ static const char *FunctionTag;
+ static const char *BlockTag;
+ static const char *EdgeTag;
+
+ GCOVRecord() {}
+
+ void writeBytes(const char *Bytes, int Size) {
+ os->write(Bytes, Size);
+ }
+
+ void write(uint32_t i) {
+ writeBytes(reinterpret_cast<char*>(&i), 4);
+ }
+
+ // Returns the length measured in 4-byte blocks that will be used to
+ // represent this string in a GCOV file
+ unsigned lengthOfGCOVString(StringRef s) {
+ // A GCOV string is a length, followed by a NUL, then between 0 and 3 NULs
+ // padding out to the next 4-byte word. The length is measured in 4-byte
+ // words including padding, not bytes of actual string.
+ return (s.size() + 5) / 4;
+ }
+
+ void writeGCOVString(StringRef s) {
+ uint32_t Len = lengthOfGCOVString(s);
+ write(Len);
+ writeBytes(s.data(), s.size());
+
+ // Write 1 to 4 bytes of NUL padding.
+ assert((unsigned)(4 - (s.size() % 4)) > 0);
+ assert((unsigned)(4 - (s.size() % 4)) <= 4);
+ writeBytes("\0\0\0\0", 4 - (s.size() % 4));
+ }
+
+ raw_ostream *os;
+ };
+ const char *GCOVRecord::LinesTag = "\0\0\x45\x01";
+ const char *GCOVRecord::FunctionTag = "\0\0\0\1";
+ const char *GCOVRecord::BlockTag = "\0\0\x41\x01";
+ const char *GCOVRecord::EdgeTag = "\0\0\x43\x01";
+
+ class GCOVFunction;
+ class GCOVBlock;
+
+ // Constructed only by requesting it from a GCOVBlock, this object stores a
+ // list of line numbers and a single filename, representing lines that belong
+ // to the block.
+ class GCOVLines : public GCOVRecord {
+ public:
+ void addLine(uint32_t Line) {
+ Lines.push_back(Line);
+ }
+
+ uint32_t length() {
+ return lengthOfGCOVString(Filename) + 2 + Lines.size();
+ }
+
+ private:
+ friend class GCOVBlock;
+
+ GCOVLines(std::string Filename, raw_ostream *os)
+ : Filename(Filename) {
+ this->os = os;
+ }
+
+ std::string Filename;
+ SmallVector<uint32_t, 32> Lines;
+ };
+
+ // Represent a basic block in GCOV. Each block has a unique number in the
+ // function, number of lines belonging to each block, and a set of edges to
+ // other blocks.
+ class GCOVBlock : public GCOVRecord {
+ public:
+ GCOVLines &getFile(std::string Filename) {
+ GCOVLines *&Lines = LinesByFile[Filename];
+ if (!Lines) {
+ Lines = new GCOVLines(Filename, os);
+ }
+ return *Lines;
+ }
+
+ void addEdge(GCOVBlock &Successor) {
+ OutEdges.push_back(&Successor);
+ }
+
+ void writeOut() {
+ uint32_t Len = 3;
+ for (StringMap<GCOVLines *>::iterator I = LinesByFile.begin(),
+ E = LinesByFile.end(); I != E; ++I) {
+ Len += I->second->length();
+ }
+
+ writeBytes(LinesTag, 4);
+ write(Len);
+ write(Number);
+ for (StringMap<GCOVLines *>::iterator I = LinesByFile.begin(),
+ E = LinesByFile.end(); I != E; ++I) {
+ write(0);
+ writeGCOVString(I->second->Filename);
+ for (int i = 0, e = I->second->Lines.size(); i != e; ++i) {
+ write(I->second->Lines[i]);
+ }
+ }
+ write(0);
+ write(0);
+ }
+
+ ~GCOVBlock() {
+ DeleteContainerSeconds(LinesByFile);
+ }
+
+ private:
+ friend class GCOVFunction;
+
+ GCOVBlock(uint32_t Number, raw_ostream *os)
+ : Number(Number) {
+ this->os = os;
+ }
+
+ uint32_t Number;
+ StringMap<GCOVLines *> LinesByFile;
+ SmallVector<GCOVBlock *, 4> OutEdges;
+ };
+
+ // A function has a unique identifier, a checksum (we leave as zero) and a
+ // set of blocks and a map of edges between blocks. This is the only GCOV
+ // object users can construct, the blocks and lines will be rooted here.
+ class GCOVFunction : public GCOVRecord {
+ public:
+ GCOVFunction(DISubprogram SP, raw_ostream *os) {
+ this->os = os;
+
+ Function *F = SP.getFunction();
+ uint32_t i = 0;
+ for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) {
+ Blocks[BB] = new GCOVBlock(i++, os);
+ }
+ ReturnBlock = new GCOVBlock(i++, os);
+
+ writeBytes(FunctionTag, 4);
+ uint32_t BlockLen = 1 + 1 + 1 + lengthOfGCOVString(SP.getName()) +
+ 1 + lengthOfGCOVString(SP.getFilename()) + 1;
+ write(BlockLen);
+ uint32_t Ident = reinterpret_cast<intptr_t>((MDNode*)SP);
+ write(Ident);
+ write(0); // checksum
+ writeGCOVString(SP.getName());
+ writeGCOVString(SP.getFilename());
+ write(SP.getLineNumber());
+ }
+
+ ~GCOVFunction() {
+ DeleteContainerSeconds(Blocks);
+ delete ReturnBlock;
+ }
+
+ GCOVBlock &getBlock(BasicBlock *BB) {
+ return *Blocks[BB];
+ }
+
+ GCOVBlock &getReturnBlock() {
+ return *ReturnBlock;
+ }
+
+ void writeOut() {
+ // Emit count of blocks.
+ writeBytes(BlockTag, 4);
+ write(Blocks.size() + 1);
+ for (int i = 0, e = Blocks.size() + 1; i != e; ++i) {
+ write(0); // No flags on our blocks.
+ }
+
+ // Emit edges between blocks.
+ for (DenseMap<BasicBlock *, GCOVBlock *>::iterator I = Blocks.begin(),
+ E = Blocks.end(); I != E; ++I) {
+ GCOVBlock &Block = *I->second;
+ if (Block.OutEdges.empty()) continue;
+
+ writeBytes(EdgeTag, 4);
+ write(Block.OutEdges.size() * 2 + 1);
+ write(Block.Number);
+ for (int i = 0, e = Block.OutEdges.size(); i != e; ++i) {
+ write(Block.OutEdges[i]->Number);
+ write(0); // no flags
+ }
+ }
+
+ // Emit lines for each block.
+ for (DenseMap<BasicBlock *, GCOVBlock *>::iterator I = Blocks.begin(),
+ E = Blocks.end(); I != E; ++I) {
+ I->second->writeOut();
+ }
+ }
+
+ private:
+ DenseMap<BasicBlock *, GCOVBlock *> Blocks;
+ GCOVBlock *ReturnBlock;
+ };
+}
+
+// Replace the stem of a file, or add one if missing.
+static std::string replaceStem(std::string OrigFilename, std::string NewStem) {
+ return (sys::path::stem(OrigFilename) + "." + NewStem).str();
+}
+
+bool GCOVProfiler::runOnModule(Module &M) {
+ this->M = &M;
+ Ctx = &M.getContext();
+
+ DebugInfoFinder DIF;
+ DIF.processModule(M);
+
+ if (EmitNotes) emitGCNO(DIF);
+ if (EmitData) return emitProfileArcs(DIF);
+ return false;
+}
+
+void GCOVProfiler::emitGCNO(DebugInfoFinder &DIF) {
+ DenseMap<const MDNode *, raw_fd_ostream *> GcnoFiles;
+ for (DebugInfoFinder::iterator I = DIF.compile_unit_begin(),
+ E = DIF.compile_unit_end(); I != E; ++I) {
+ // Each compile unit gets its own .gcno file. This means that whether we run
+ // this pass over the original .o's as they're produced, or run it after
+ // LTO, we'll generate the same .gcno files.
+
+ DICompileUnit CU(*I);
+ raw_fd_ostream *&out = GcnoFiles[CU];
+ std::string ErrorInfo;
+ out = new raw_fd_ostream(replaceStem(CU.getFilename(), "gcno").c_str(),
+ ErrorInfo, raw_fd_ostream::F_Binary);
+ out->write("oncg*404MVLL", 12);
+ }
+
+ for (DebugInfoFinder::iterator SPI = DIF.subprogram_begin(),
+ SPE = DIF.subprogram_end(); SPI != SPE; ++SPI) {
+ DISubprogram SP(*SPI);
+ raw_fd_ostream *&os = GcnoFiles[SP.getCompileUnit()];
+
+ Function *F = SP.getFunction();
+ if (!F) continue;
+ GCOVFunction Func(SP, os);
+
+ for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) {
+ GCOVBlock &Block = Func.getBlock(BB);
+ TerminatorInst *TI = BB->getTerminator();
+ if (int successors = TI->getNumSuccessors()) {
+ for (int i = 0; i != successors; ++i) {
+ Block.addEdge(Func.getBlock(TI->getSuccessor(i)));
+ }
+ } else if (isa<ReturnInst>(TI)) {
+ Block.addEdge(Func.getReturnBlock());
+ }
+
+ uint32_t Line = 0;
+ for (BasicBlock::iterator I = BB->begin(), IE = BB->end(); I != IE; ++I) {
+ const DebugLoc &Loc = I->getDebugLoc();
+ if (Loc.isUnknown()) continue;
+ if (Line == Loc.getLine()) continue;
+ Line = Loc.getLine();
+ if (SP != findSubprogram(DIScope(Loc.getScope(*Ctx)))) continue;
+
+ GCOVLines &Lines = Block.getFile(SP.getFilename());
+ Lines.addLine(Loc.getLine());
+ }
+ }
+ Func.writeOut();
+ }
+
+ for (DenseMap<const MDNode *, raw_fd_ostream *>::iterator
+ I = GcnoFiles.begin(), E = GcnoFiles.end(); I != E; ++I) {
+ raw_fd_ostream *&out = I->second;
+ out->write("\0\0\0\0\0\0\0\0", 8); // EOF
+ out->close();
+ delete out;
+ }
+}
+
+bool GCOVProfiler::emitProfileArcs(DebugInfoFinder &DIF) {
+ if (DIF.subprogram_begin() == DIF.subprogram_end())
+ return false;
+
+ SmallVector<std::pair<GlobalVariable *, uint32_t>, 8> CountersByIdent;
+ for (DebugInfoFinder::iterator SPI = DIF.subprogram_begin(),
+ SPE = DIF.subprogram_end(); SPI != SPE; ++SPI) {
+ DISubprogram SP(*SPI);
+ Function *F = SP.getFunction();
+ if (!F) continue;
+
+ unsigned Edges = 0;
+ for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) {
+ TerminatorInst *TI = BB->getTerminator();
+ if (isa<ReturnInst>(TI))
+ ++Edges;
+ else
+ Edges += TI->getNumSuccessors();
+ }
+
+ const ArrayType *CounterTy =
+ ArrayType::get(Type::getInt64Ty(*Ctx), Edges);
+ GlobalVariable *Counters =
+ new GlobalVariable(*M, CounterTy, false,
+ GlobalValue::InternalLinkage,
+ Constant::getNullValue(CounterTy),
+ "__llvm_gcov_ctr", 0, false, 0);
+ CountersByIdent.push_back(
+ std::make_pair(Counters, reinterpret_cast<intptr_t>((MDNode*)SP)));
+
+ UniqueVector<BasicBlock *> ComplexEdgePreds;
+ UniqueVector<BasicBlock *> ComplexEdgeSuccs;
+
+ unsigned Edge = 0;
+ for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) {
+ TerminatorInst *TI = BB->getTerminator();
+ int Successors = isa<ReturnInst>(TI) ? 1 : TI->getNumSuccessors();
+ if (Successors) {
+ IRBuilder<> Builder(TI);
+
+ if (Successors == 1) {
+ Value *Counter = Builder.CreateConstInBoundsGEP2_64(Counters, 0,
+ Edge);
+ Value *Count = Builder.CreateLoad(Counter);
+ Count = Builder.CreateAdd(Count,
+ ConstantInt::get(Type::getInt64Ty(*Ctx),1));
+ Builder.CreateStore(Count, Counter);
+ } else if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
+ Value *Sel = Builder.CreateSelect(
+ BI->getCondition(),
+ ConstantInt::get(Type::getInt64Ty(*Ctx), Edge),
+ ConstantInt::get(Type::getInt64Ty(*Ctx), Edge + 1));
+ SmallVector<Value *, 2> Idx;
+ Idx.push_back(Constant::getNullValue(Type::getInt64Ty(*Ctx)));
+ Idx.push_back(Sel);
+ Value *Counter = Builder.CreateInBoundsGEP(Counters,
+ Idx.begin(), Idx.end());
+ Value *Count = Builder.CreateLoad(Counter);
+ Count = Builder.CreateAdd(Count,
+ ConstantInt::get(Type::getInt64Ty(*Ctx),1));
+ Builder.CreateStore(Count, Counter);
+ } else {
+ ComplexEdgePreds.insert(BB);
+ for (int i = 0; i != Successors; ++i)
+ ComplexEdgeSuccs.insert(TI->getSuccessor(i));
+ }
+ Edge += Successors;
+ }
+ }
+
+ if (!ComplexEdgePreds.empty()) {
+ GlobalVariable *EdgeTable =
+ buildEdgeLookupTable(F, Counters,
+ ComplexEdgePreds, ComplexEdgeSuccs);
+ GlobalVariable *EdgeState = getEdgeStateValue();
+
+ const Type *Int32Ty = Type::getInt32Ty(*Ctx);
+ for (int i = 0, e = ComplexEdgePreds.size(); i != e; ++i) {
+ IRBuilder<> Builder(ComplexEdgePreds[i+1]->getTerminator());
+ Builder.CreateStore(ConstantInt::get(Int32Ty, i), EdgeState);
+ }
+ for (int i = 0, e = ComplexEdgeSuccs.size(); i != e; ++i) {
+ // call runtime to perform increment
+ IRBuilder<> Builder(ComplexEdgeSuccs[i+1]->getFirstNonPHI());
+ Value *CounterPtrArray =
+ Builder.CreateConstInBoundsGEP2_64(EdgeTable, 0,
+ i * ComplexEdgePreds.size());
+ Builder.CreateCall2(getIncrementIndirectCounterFunc(),
+ EdgeState, CounterPtrArray);
+ // clear the predecessor number
+ Builder.CreateStore(ConstantInt::get(Int32Ty, 0xffffffff), EdgeState);
+ }
+ }
+ }
+
+ insertCounterWriteout(DIF, CountersByIdent);
+
+ return true;
+}
+
+// All edges with successors that aren't branches are "complex", because it
+// requires complex logic to pick which counter to update.
+GlobalVariable *GCOVProfiler::buildEdgeLookupTable(
+ Function *F,
+ GlobalVariable *Counters,
+ const UniqueVector<BasicBlock *> &Preds,
+ const UniqueVector<BasicBlock *> &Succs) {
+ // TODO: support invoke, threads. We rely on the fact that nothing can modify
+ // the whole-Module pred edge# between the time we set it and the time we next
+ // read it. Threads and invoke make this untrue.
+
+ // emit [(succs * preds) x i64*], logically [succ x [pred x i64*]].
+ const Type *Int64PtrTy = Type::getInt64PtrTy(*Ctx);
+ const ArrayType *EdgeTableTy = ArrayType::get(
+ Int64PtrTy, Succs.size() * Preds.size());
+
+ Constant **EdgeTable = new Constant*[Succs.size() * Preds.size()];
+ Constant *NullValue = Constant::getNullValue(Int64PtrTy);
+ for (int i = 0, ie = Succs.size() * Preds.size(); i != ie; ++i)
+ EdgeTable[i] = NullValue;
+
+ unsigned Edge = 0;
+ for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) {
+ TerminatorInst *TI = BB->getTerminator();
+ int Successors = isa<ReturnInst>(TI) ? 1 : TI->getNumSuccessors();
+ if (Successors > 1 && !isa<BranchInst>(TI) && !isa<ReturnInst>(TI)) {
+ for (int i = 0; i != Successors; ++i) {
+ BasicBlock *Succ = TI->getSuccessor(i);
+ IRBuilder<> builder(Succ);
+ Value *Counter = builder.CreateConstInBoundsGEP2_64(Counters, 0,
+ Edge + i);
+ EdgeTable[((Succs.idFor(Succ)-1) * Preds.size()) +
+ (Preds.idFor(BB)-1)] = cast<Constant>(Counter);
+ }
+ }
+ Edge += Successors;
+ }
+
+ GlobalVariable *EdgeTableGV =
+ new GlobalVariable(
+ *M, EdgeTableTy, true, GlobalValue::InternalLinkage,
+ ConstantArray::get(EdgeTableTy,
+ &EdgeTable[0], Succs.size() * Preds.size()),
+ "__llvm_gcda_edge_table");
+ EdgeTableGV->setUnnamedAddr(true);
+ return EdgeTableGV;
+}
+
+Constant *GCOVProfiler::getStartFileFunc() {
+ const Type *Args[] = { Type::getInt8PtrTy(*Ctx) };
+ const FunctionType *FTy = FunctionType::get(Type::getVoidTy(*Ctx),
+ Args, false);
+ return M->getOrInsertFunction("llvm_gcda_start_file", FTy);
+}
+
+Constant *GCOVProfiler::getIncrementIndirectCounterFunc() {
+ const Type *Args[] = {
+ Type::getInt32PtrTy(*Ctx), // uint32_t *predecessor
+ Type::getInt64PtrTy(*Ctx)->getPointerTo(), // uint64_t **state_table_row
+ };
+ const FunctionType *FTy = FunctionType::get(Type::getVoidTy(*Ctx),
+ Args, false);
+ return M->getOrInsertFunction("llvm_gcda_increment_indirect_counter", FTy);
+}
+
+Constant *GCOVProfiler::getEmitFunctionFunc() {
+ const Type *Args[] = { Type::getInt32Ty(*Ctx) };
+ const FunctionType *FTy = FunctionType::get(Type::getVoidTy(*Ctx),
+ Args, false);
+ return M->getOrInsertFunction("llvm_gcda_emit_function", FTy);
+}
+
+Constant *GCOVProfiler::getEmitArcsFunc() {
+ const Type *Args[] = {
+ Type::getInt32Ty(*Ctx), // uint32_t num_counters
+ Type::getInt64PtrTy(*Ctx), // uint64_t *counters
+ };
+ const FunctionType *FTy = FunctionType::get(Type::getVoidTy(*Ctx),
+ Args, false);
+ return M->getOrInsertFunction("llvm_gcda_emit_arcs", FTy);
+}
+
+Constant *GCOVProfiler::getEndFileFunc() {
+ const FunctionType *FTy = FunctionType::get(Type::getVoidTy(*Ctx), false);
+ return M->getOrInsertFunction("llvm_gcda_end_file", FTy);
+}
+
+GlobalVariable *GCOVProfiler::getEdgeStateValue() {
+ GlobalVariable *GV = M->getGlobalVariable("__llvm_gcov_global_state_pred");
+ if (!GV) {
+ GV = new GlobalVariable(*M, Type::getInt32Ty(*Ctx), false,
+ GlobalValue::InternalLinkage,
+ ConstantInt::get(Type::getInt32Ty(*Ctx),
+ 0xffffffff),
+ "__llvm_gcov_global_state_pred");
+ GV->setUnnamedAddr(true);
+ }
+ return GV;
+}
+
+void GCOVProfiler::insertCounterWriteout(
+ DebugInfoFinder &DIF,
+ SmallVector<std::pair<GlobalVariable *, uint32_t>, 8> &CountersByIdent) {
+ const FunctionType *WriteoutFTy =
+ FunctionType::get(Type::getVoidTy(*Ctx), false);
+ Function *WriteoutF = Function::Create(WriteoutFTy,
+ GlobalValue::InternalLinkage,
+ "__llvm_gcov_writeout", M);
+ WriteoutF->setUnnamedAddr(true);
+ BasicBlock *BB = BasicBlock::Create(*Ctx, "", WriteoutF);
+ IRBuilder<> Builder(BB);
+
+ Constant *StartFile = getStartFileFunc();
+ Constant *EmitFunction = getEmitFunctionFunc();
+ Constant *EmitArcs = getEmitArcsFunc();
+ Constant *EndFile = getEndFileFunc();
+
+ for (DebugInfoFinder::iterator CUI = DIF.compile_unit_begin(),
+ CUE = DIF.compile_unit_end(); CUI != CUE; ++CUI) {
+ DICompileUnit compile_unit(*CUI);
+ std::string FilenameGcda = replaceStem(compile_unit.getFilename(), "gcda");
+ Builder.CreateCall(StartFile,
+ Builder.CreateGlobalStringPtr(FilenameGcda));
+ for (SmallVector<std::pair<GlobalVariable *, uint32_t>, 8>::iterator
+ I = CountersByIdent.begin(), E = CountersByIdent.end();
+ I != E; ++I) {
+ Builder.CreateCall(EmitFunction, ConstantInt::get(Type::getInt32Ty(*Ctx),
+ I->second));
+ GlobalVariable *GV = I->first;
+ unsigned Arcs =
+ cast<ArrayType>(GV->getType()->getElementType())->getNumElements();
+ Builder.CreateCall2(EmitArcs,
+ ConstantInt::get(Type::getInt32Ty(*Ctx), Arcs),
+ Builder.CreateConstGEP2_64(GV, 0, 0));
+ }
+ Builder.CreateCall(EndFile);
+ }
+ Builder.CreateRetVoid();
+
+ InsertProfilingShutdownCall(WriteoutF, M);
+}
diff --git a/contrib/llvm/lib/Transforms/Instrumentation/Instrumentation.cpp b/contrib/llvm/lib/Transforms/Instrumentation/Instrumentation.cpp
index 96ed4fa5c0fe..71adc1ec6de0 100644
--- a/contrib/llvm/lib/Transforms/Instrumentation/Instrumentation.cpp
+++ b/contrib/llvm/lib/Transforms/Instrumentation/Instrumentation.cpp
@@ -23,6 +23,7 @@ void llvm::initializeInstrumentation(PassRegistry &Registry) {
initializeEdgeProfilerPass(Registry);
initializeOptimalEdgeProfilerPass(Registry);
initializePathProfilerPass(Registry);
+ initializeGCOVProfilerPass(Registry);
}
/// LLVMInitializeInstrumentation - C binding for
diff --git a/contrib/llvm/lib/Transforms/Instrumentation/MaximumSpanningTree.h b/contrib/llvm/lib/Transforms/Instrumentation/MaximumSpanningTree.h
index 829da6b295de..f76c77e1bdbf 100644
--- a/contrib/llvm/lib/Transforms/Instrumentation/MaximumSpanningTree.h
+++ b/contrib/llvm/lib/Transforms/Instrumentation/MaximumSpanningTree.h
@@ -7,7 +7,7 @@
//
//===----------------------------------------------------------------------===//
//
-// This module privides means for calculating a maximum spanning tree for a
+// This module provides means for calculating a maximum spanning tree for a
// given set of weighted edges. The type parameter T is the type of a node.
//
//===----------------------------------------------------------------------===//
diff --git a/contrib/llvm/lib/Transforms/Instrumentation/OptimalEdgeProfiling.cpp b/contrib/llvm/lib/Transforms/Instrumentation/OptimalEdgeProfiling.cpp
index c85a1a9391d4..e09f882aa323 100644
--- a/contrib/llvm/lib/Transforms/Instrumentation/OptimalEdgeProfiling.cpp
+++ b/contrib/llvm/lib/Transforms/Instrumentation/OptimalEdgeProfiling.cpp
@@ -14,6 +14,7 @@
//===----------------------------------------------------------------------===//
#define DEBUG_TYPE "insert-optimal-edge-profiling"
#include "ProfilingUtils.h"
+#include "llvm/Constants.h"
#include "llvm/Module.h"
#include "llvm/Pass.h"
#include "llvm/Analysis/Passes.h"
@@ -26,7 +27,6 @@
#include "llvm/ADT/DenseSet.h"
#include "llvm/ADT/Statistic.h"
#include "MaximumSpanningTree.h"
-#include <set>
using namespace llvm;
STATISTIC(NumEdgesInserted, "The # of edges inserted.");
@@ -120,14 +120,14 @@ bool OptimalEdgeProfiler::runOnModule(Module &M) {
NumEdgesInserted = 0;
std::vector<Constant*> Initializer(NumEdges);
- Constant* Zero = ConstantInt::get(Int32, 0);
- Constant* Uncounted = ConstantInt::get(Int32, ProfileInfoLoader::Uncounted);
+ Constant *Zero = ConstantInt::get(Int32, 0);
+ Constant *Uncounted = ConstantInt::get(Int32, ProfileInfoLoader::Uncounted);
// Instrument all of the edges not in MST...
unsigned i = 0;
for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) {
if (F->isDeclaration()) continue;
- DEBUG(dbgs()<<"Working on "<<F->getNameStr()<<"\n");
+ DEBUG(dbgs() << "Working on " << F->getNameStr() << "\n");
// Calculate a Maximum Spanning Tree with the edge weights determined by
// ProfileEstimator. ProfileEstimator also assign weights to the virtual
@@ -139,17 +139,17 @@ bool OptimalEdgeProfiler::runOnModule(Module &M) {
ProfileInfo::EdgeWeights ECs =
getAnalysis<ProfileInfo>(*F).getEdgeWeights(F);
std::vector<ProfileInfo::EdgeWeight> EdgeVector(ECs.begin(), ECs.end());
- MaximumSpanningTree<BasicBlock> MST (EdgeVector);
- std::stable_sort(MST.begin(),MST.end());
+ MaximumSpanningTree<BasicBlock> MST(EdgeVector);
+ std::stable_sort(MST.begin(), MST.end());
// Check if (0,entry) not in the MST. If not, instrument edge
// (IncrementCounterInBlock()) and set the counter initially to zero, if
// the edge is in the MST the counter is initialised to -1.
BasicBlock *entry = &(F->getEntryBlock());
- ProfileInfo::Edge edge = ProfileInfo::getEdge(0,entry);
+ ProfileInfo::Edge edge = ProfileInfo::getEdge(0, entry);
if (!std::binary_search(MST.begin(), MST.end(), edge)) {
- printEdgeCounter(edge,entry,i);
+ printEdgeCounter(edge, entry, i);
IncrementCounterInBlock(entry, i, Counters); ++NumEdgesInserted;
Initializer[i++] = (Zero);
} else{
@@ -170,9 +170,9 @@ bool OptimalEdgeProfiler::runOnModule(Module &M) {
// has no successors, the virtual edge (BB,0) is processed.
TerminatorInst *TI = BB->getTerminator();
if (TI->getNumSuccessors() == 0) {
- ProfileInfo::Edge edge = ProfileInfo::getEdge(BB,0);
+ ProfileInfo::Edge edge = ProfileInfo::getEdge(BB, 0);
if (!std::binary_search(MST.begin(), MST.end(), edge)) {
- printEdgeCounter(edge,BB,i);
+ printEdgeCounter(edge, BB, i);
IncrementCounterInBlock(BB, i, Counters); ++NumEdgesInserted;
Initializer[i++] = (Zero);
} else{
@@ -195,11 +195,11 @@ bool OptimalEdgeProfiler::runOnModule(Module &M) {
// otherwise insert it in the successor block.
if (TI->getNumSuccessors() == 1) {
// Insert counter at the start of the block
- printEdgeCounter(edge,BB,i);
+ printEdgeCounter(edge, BB, i);
IncrementCounterInBlock(BB, i, Counters); ++NumEdgesInserted;
} else {
// Insert counter at the start of the block
- printEdgeCounter(edge,Succ,i);
+ printEdgeCounter(edge, Succ, i);
IncrementCounterInBlock(Succ, i, Counters); ++NumEdgesInserted;
}
Initializer[i++] = (Zero);
@@ -212,9 +212,9 @@ bool OptimalEdgeProfiler::runOnModule(Module &M) {
// Check if the number of edges counted at first was the number of edges we
// considered for instrumentation.
- assert(i==NumEdges && "the number of edges in counting array is wrong");
+ assert(i == NumEdges && "the number of edges in counting array is wrong");
- // Assing the now completely defined initialiser to the array.
+ // Assign the now completely defined initialiser to the array.
Constant *init = ConstantArray::get(ATy, Initializer);
Counters->setInitializer(init);
diff --git a/contrib/llvm/lib/Transforms/Instrumentation/PathProfiling.cpp b/contrib/llvm/lib/Transforms/Instrumentation/PathProfiling.cpp
index 6449b39cfc9d..6b3f12dcbc84 100644
--- a/contrib/llvm/lib/Transforms/Instrumentation/PathProfiling.cpp
+++ b/contrib/llvm/lib/Transforms/Instrumentation/PathProfiling.cpp
@@ -63,7 +63,6 @@
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Instrumentation.h"
-#include <map>
#include <vector>
#define HASH_THRESHHOLD 100000
@@ -259,7 +258,7 @@ private:
};
// ---------------------------------------------------------------------------
-// PathProfiler is a module pass which intruments path profiling instructions
+// PathProfiler is a module pass which instruments path profiling instructions
// ---------------------------------------------------------------------------
class PathProfiler : public ModulePass {
private:
@@ -389,6 +388,9 @@ namespace llvm {
// BallLarusEdge << operator overloading
raw_ostream& operator<<(raw_ostream& os,
+ const BLInstrumentationEdge& edge)
+ LLVM_ATTRIBUTE_USED;
+ raw_ostream& operator<<(raw_ostream& os,
const BLInstrumentationEdge& edge) {
os << "[" << edge.getSource()->getName() << " -> "
<< edge.getTarget()->getName() << "] init: "
@@ -929,14 +931,16 @@ BasicBlock::iterator PathProfiler::getInsertionPoint(BasicBlock* block, Value*
void PathProfiler::preparePHI(BLInstrumentationNode* node) {
BasicBlock* block = node->getBlock();
BasicBlock::iterator insertPoint = block->getFirstNonPHI();
- PHINode* phi = PHINode::Create(Type::getInt32Ty(*Context), "pathNumber",
+ pred_iterator PB = pred_begin(node->getBlock()),
+ PE = pred_end(node->getBlock());
+ PHINode* phi = PHINode::Create(Type::getInt32Ty(*Context),
+ std::distance(PB, PE), "pathNumber",
insertPoint );
node->setPathPHI(phi);
node->setStartingPathNumber(phi);
node->setEndingPathNumber(phi);
- for(pred_iterator predIt = pred_begin(node->getBlock()),
- end = pred_end(node->getBlock()); predIt != end; predIt++) {
+ for(pred_iterator predIt = PB; predIt != PE; predIt++) {
BasicBlock* pred = (*predIt);
if(pred != NULL)
diff --git a/contrib/llvm/lib/Transforms/Instrumentation/ProfilingUtils.cpp b/contrib/llvm/lib/Transforms/Instrumentation/ProfilingUtils.cpp
index b57bbf60a07a..7435bc37fbe1 100644
--- a/contrib/llvm/lib/Transforms/Instrumentation/ProfilingUtils.cpp
+++ b/contrib/llvm/lib/Transforms/Instrumentation/ProfilingUtils.cpp
@@ -110,7 +110,7 @@ void llvm::IncrementCounterInBlock(BasicBlock *BB, unsigned CounterNum,
GlobalValue *CounterArray, bool beginning) {
// Insert the increment after any alloca or PHI instructions...
BasicBlock::iterator InsertPos = beginning ? BB->getFirstNonPHI() :
- BB->getTerminator();
+ BB->getTerminator();
while (isa<AllocaInst>(InsertPos))
++InsertPos;
@@ -121,8 +121,7 @@ void llvm::IncrementCounterInBlock(BasicBlock *BB, unsigned CounterNum,
Indices[0] = Constant::getNullValue(Type::getInt32Ty(Context));
Indices[1] = ConstantInt::get(Type::getInt32Ty(Context), CounterNum);
Constant *ElementPtr =
- ConstantExpr::getGetElementPtr(CounterArray, &Indices[0],
- Indices.size());
+ ConstantExpr::getGetElementPtr(CounterArray, &Indices[0], Indices.size());
// Load, increment and store the value back.
Value *OldVal = new LoadInst(ElementPtr, "OldFuncCounter", InsertPos);
@@ -131,3 +130,41 @@ void llvm::IncrementCounterInBlock(BasicBlock *BB, unsigned CounterNum,
"NewFuncCounter", InsertPos);
new StoreInst(NewVal, ElementPtr, InsertPos);
}
+
+void llvm::InsertProfilingShutdownCall(Function *Callee, Module *Mod) {
+ // llvm.global_dtors is an array of type { i32, void ()* }. Prepare those
+ // types.
+ const Type *GlobalDtorElems[2] = {
+ Type::getInt32Ty(Mod->getContext()),
+ FunctionType::get(Type::getVoidTy(Mod->getContext()), false)->getPointerTo()
+ };
+ const StructType *GlobalDtorElemTy =
+ StructType::get(Mod->getContext(), GlobalDtorElems, false);
+
+ // Construct the new element we'll be adding.
+ Constant *Elem[2] = {
+ ConstantInt::get(Type::getInt32Ty(Mod->getContext()), 65535),
+ ConstantExpr::getBitCast(Callee, GlobalDtorElems[1])
+ };
+
+ // If llvm.global_dtors exists, make a copy of the things in its list and
+ // delete it, to replace it with one that has a larger array type.
+ std::vector<Constant *> dtors;
+ if (GlobalVariable *GlobalDtors = Mod->getNamedGlobal("llvm.global_dtors")) {
+ if (ConstantArray *InitList =
+ dyn_cast<ConstantArray>(GlobalDtors->getInitializer())) {
+ for (unsigned i = 0, e = InitList->getType()->getNumElements();
+ i != e; ++i)
+ dtors.push_back(cast<Constant>(InitList->getOperand(i)));
+ }
+ GlobalDtors->eraseFromParent();
+ }
+
+ // Build up llvm.global_dtors with our new item in it.
+ GlobalVariable *GlobalDtors = new GlobalVariable(
+ *Mod, ArrayType::get(GlobalDtorElemTy, 1), false,
+ GlobalValue::AppendingLinkage, NULL, "llvm.global_dtors");
+ dtors.push_back(ConstantStruct::get(Mod->getContext(), Elem, 2, false));
+ GlobalDtors->setInitializer(ConstantArray::get(
+ cast<ArrayType>(GlobalDtors->getType()->getElementType()), dtors));
+}
diff --git a/contrib/llvm/lib/Transforms/Instrumentation/ProfilingUtils.h b/contrib/llvm/lib/Transforms/Instrumentation/ProfilingUtils.h
index a76e3576e1ca..09b22171ff04 100644
--- a/contrib/llvm/lib/Transforms/Instrumentation/ProfilingUtils.h
+++ b/contrib/llvm/lib/Transforms/Instrumentation/ProfilingUtils.h
@@ -18,9 +18,10 @@
#define PROFILINGUTILS_H
namespace llvm {
+ class BasicBlock;
class Function;
class GlobalValue;
- class BasicBlock;
+ class Module;
class PointerType;
void InsertProfilingInitCall(Function *MainFn, const char *FnName,
@@ -29,6 +30,7 @@ namespace llvm {
void IncrementCounterInBlock(BasicBlock *BB, unsigned CounterNum,
GlobalValue *CounterArray,
bool beginning = true);
+ void InsertProfilingShutdownCall(Function *Callee, Module *Mod);
}
#endif
diff --git a/contrib/llvm/lib/Transforms/Scalar/CodeGenPrepare.cpp b/contrib/llvm/lib/Transforms/Scalar/CodeGenPrepare.cpp
index 9536939ba2d4..018439018553 100644
--- a/contrib/llvm/lib/Transforms/Scalar/CodeGenPrepare.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/CodeGenPrepare.cpp
@@ -47,21 +47,21 @@ using namespace llvm;
using namespace llvm::PatternMatch;
STATISTIC(NumBlocksElim, "Number of blocks eliminated");
-STATISTIC(NumPHIsElim, "Number of trivial PHIs eliminated");
-STATISTIC(NumGEPsElim, "Number of GEPs converted to casts");
+STATISTIC(NumPHIsElim, "Number of trivial PHIs eliminated");
+STATISTIC(NumGEPsElim, "Number of GEPs converted to casts");
STATISTIC(NumCmpUses, "Number of uses of Cmp expressions replaced with uses of "
"sunken Cmps");
STATISTIC(NumCastUses, "Number of uses of Cast expressions replaced with uses "
"of sunken Casts");
STATISTIC(NumMemoryInsts, "Number of memory instructions whose address "
"computations were sunk");
-STATISTIC(NumExtsMoved, "Number of [s|z]ext instructions combined with loads");
-STATISTIC(NumExtUses, "Number of uses of [s|z]ext instructions optimized");
+STATISTIC(NumExtsMoved, "Number of [s|z]ext instructions combined with loads");
+STATISTIC(NumExtUses, "Number of uses of [s|z]ext instructions optimized");
+STATISTIC(NumRetsDup, "Number of return instructions duplicated");
-static cl::opt<bool>
-CriticalEdgeSplit("cgp-critical-edge-splitting",
- cl::desc("Split critical edges during codegen prepare"),
- cl::init(false), cl::Hidden);
+static cl::opt<bool> DisableBranchOpts(
+ "disable-cgp-branch-opts", cl::Hidden, cl::init(false),
+ cl::desc("Disable branch optimizations in CodeGenPrepare"));
namespace {
class CodeGenPrepare : public FunctionPass {
@@ -76,15 +76,15 @@ namespace {
/// update it.
BasicBlock::iterator CurInstIterator;
- /// BackEdges - Keep a set of all the loop back edges.
- ///
- SmallSet<std::pair<const BasicBlock*, const BasicBlock*>, 8> BackEdges;
-
- // Keeps track of non-local addresses that have been sunk into a block. This
- // allows us to avoid inserting duplicate code for blocks with multiple
- // load/stores of the same address.
+ /// Keeps track of non-local addresses that have been sunk into a block.
+ /// This allows us to avoid inserting duplicate code for blocks with
+ /// multiple load/stores of the same address.
DenseMap<Value*, Value*> SunkAddrs;
+ /// ModifiedDT - If CFG is modified in anyway, dominator tree may need to
+ /// be updated.
+ bool ModifiedDT;
+
public:
static char ID; // Pass identification, replacement for typeid
explicit CodeGenPrepare(const TargetLowering *tli = 0)
@@ -98,10 +98,6 @@ namespace {
AU.addPreserved<ProfileInfo>();
}
- virtual void releaseMemory() {
- BackEdges.clear();
- }
-
private:
bool EliminateMostlyEmptyBlocks(Function &F);
bool CanMergeBlocks(const BasicBlock *BB, const BasicBlock *DestBB) const;
@@ -113,7 +109,7 @@ namespace {
bool OptimizeCallInst(CallInst *CI);
bool MoveExtToFormExtLoad(Instruction *I);
bool OptimizeExtUses(Instruction *I);
- void findLoopBackEdges(const Function &F);
+ bool DupRetToEnableTailCallOpts(ReturnInst *RI);
};
}
@@ -125,40 +121,42 @@ FunctionPass *llvm::createCodeGenPreparePass(const TargetLowering *TLI) {
return new CodeGenPrepare(TLI);
}
-/// findLoopBackEdges - Do a DFS walk to find loop back edges.
-///
-void CodeGenPrepare::findLoopBackEdges(const Function &F) {
- SmallVector<std::pair<const BasicBlock*,const BasicBlock*>, 32> Edges;
- FindFunctionBackedges(F, Edges);
-
- BackEdges.insert(Edges.begin(), Edges.end());
-}
-
-
bool CodeGenPrepare::runOnFunction(Function &F) {
bool EverMadeChange = false;
+ ModifiedDT = false;
DT = getAnalysisIfAvailable<DominatorTree>();
PFI = getAnalysisIfAvailable<ProfileInfo>();
+
// First pass, eliminate blocks that contain only PHI nodes and an
// unconditional branch.
EverMadeChange |= EliminateMostlyEmptyBlocks(F);
- // Now find loop back edges, but only if they are being used to decide which
- // critical edges to split.
- if (CriticalEdgeSplit)
- findLoopBackEdges(F);
-
bool MadeChange = true;
while (MadeChange) {
MadeChange = false;
- for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB)
+ for (Function::iterator I = F.begin(), E = F.end(); I != E; ) {
+ BasicBlock *BB = I++;
MadeChange |= OptimizeBlock(*BB);
+ }
EverMadeChange |= MadeChange;
}
SunkAddrs.clear();
+ if (!DisableBranchOpts) {
+ MadeChange = false;
+ for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB)
+ MadeChange |= ConstantFoldTerminator(BB);
+
+ if (MadeChange)
+ ModifiedDT = true;
+ EverMadeChange |= MadeChange;
+ }
+
+ if (ModifiedDT && DT)
+ DT->DT->recalculate(F);
+
return EverMadeChange;
}
@@ -333,7 +331,7 @@ void CodeGenPrepare::EliminateMostlyEmptyBlock(BasicBlock *BB) {
// The PHIs are now updated, change everything that refers to BB to use
// DestBB and remove BB.
BB->replaceAllUsesWith(DestBB);
- if (DT) {
+ if (DT && !ModifiedDT) {
BasicBlock *BBIDom = DT->getNode(BB)->getIDom()->getBlock();
BasicBlock *DestBBIDom = DT->getNode(DestBB)->getIDom()->getBlock();
BasicBlock *NewIDom = DT->findNearestCommonDominator(BBIDom, DestBBIDom);
@@ -350,110 +348,6 @@ void CodeGenPrepare::EliminateMostlyEmptyBlock(BasicBlock *BB) {
DEBUG(dbgs() << "AFTER:\n" << *DestBB << "\n\n\n");
}
-/// FindReusablePredBB - Check all of the predecessors of the block DestPHI
-/// lives in to see if there is a block that we can reuse as a critical edge
-/// from TIBB.
-static BasicBlock *FindReusablePredBB(PHINode *DestPHI, BasicBlock *TIBB) {
- BasicBlock *Dest = DestPHI->getParent();
-
- /// TIPHIValues - This array is lazily computed to determine the values of
- /// PHIs in Dest that TI would provide.
- SmallVector<Value*, 32> TIPHIValues;
-
- /// TIBBEntryNo - This is a cache to speed up pred queries for TIBB.
- unsigned TIBBEntryNo = 0;
-
- // Check to see if Dest has any blocks that can be used as a split edge for
- // this terminator.
- for (unsigned pi = 0, e = DestPHI->getNumIncomingValues(); pi != e; ++pi) {
- BasicBlock *Pred = DestPHI->getIncomingBlock(pi);
- // To be usable, the pred has to end with an uncond branch to the dest.
- BranchInst *PredBr = dyn_cast<BranchInst>(Pred->getTerminator());
- if (!PredBr || !PredBr->isUnconditional())
- continue;
- // Must be empty other than the branch and debug info.
- BasicBlock::iterator I = Pred->begin();
- while (isa<DbgInfoIntrinsic>(I))
- I++;
- if (&*I != PredBr)
- continue;
- // Cannot be the entry block; its label does not get emitted.
- if (Pred == &Dest->getParent()->getEntryBlock())
- continue;
-
- // Finally, since we know that Dest has phi nodes in it, we have to make
- // sure that jumping to Pred will have the same effect as going to Dest in
- // terms of PHI values.
- PHINode *PN;
- unsigned PHINo = 0;
- unsigned PredEntryNo = pi;
-
- bool FoundMatch = true;
- for (BasicBlock::iterator I = Dest->begin();
- (PN = dyn_cast<PHINode>(I)); ++I, ++PHINo) {
- if (PHINo == TIPHIValues.size()) {
- if (PN->getIncomingBlock(TIBBEntryNo) != TIBB)
- TIBBEntryNo = PN->getBasicBlockIndex(TIBB);
- TIPHIValues.push_back(PN->getIncomingValue(TIBBEntryNo));
- }
-
- // If the PHI entry doesn't work, we can't use this pred.
- if (PN->getIncomingBlock(PredEntryNo) != Pred)
- PredEntryNo = PN->getBasicBlockIndex(Pred);
-
- if (TIPHIValues[PHINo] != PN->getIncomingValue(PredEntryNo)) {
- FoundMatch = false;
- break;
- }
- }
-
- // If we found a workable predecessor, change TI to branch to Succ.
- if (FoundMatch)
- return Pred;
- }
- return 0;
-}
-
-
-/// SplitEdgeNicely - Split the critical edge from TI to its specified
-/// successor if it will improve codegen. We only do this if the successor has
-/// phi nodes (otherwise critical edges are ok). If there is already another
-/// predecessor of the succ that is empty (and thus has no phi nodes), use it
-/// instead of introducing a new block.
-static void SplitEdgeNicely(TerminatorInst *TI, unsigned SuccNum,
- SmallSet<std::pair<const BasicBlock*,
- const BasicBlock*>, 8> &BackEdges,
- Pass *P) {
- BasicBlock *TIBB = TI->getParent();
- BasicBlock *Dest = TI->getSuccessor(SuccNum);
- assert(isa<PHINode>(Dest->begin()) &&
- "This should only be called if Dest has a PHI!");
- PHINode *DestPHI = cast<PHINode>(Dest->begin());
-
- // Do not split edges to EH landing pads.
- if (InvokeInst *Invoke = dyn_cast<InvokeInst>(TI))
- if (Invoke->getSuccessor(1) == Dest)
- return;
-
- // As a hack, never split backedges of loops. Even though the copy for any
- // PHIs inserted on the backedge would be dead for exits from the loop, we
- // assume that the cost of *splitting* the backedge would be too high.
- if (BackEdges.count(std::make_pair(TIBB, Dest)))
- return;
-
- if (BasicBlock *ReuseBB = FindReusablePredBB(DestPHI, TIBB)) {
- ProfileInfo *PFI = P->getAnalysisIfAvailable<ProfileInfo>();
- if (PFI)
- PFI->splitEdge(TIBB, Dest, ReuseBB);
- Dest->removePredecessor(TIBB);
- TI->setSuccessor(SuccNum, ReuseBB);
- return;
- }
-
- SplitCriticalEdge(TI, SuccNum, P, true);
-}
-
-
/// OptimizeNoopCopyExpression - If the specified cast instruction is a noop
/// copy (e.g. it's casting from one pointer type to another, i32->i8 on PPC),
/// sink it into user blocks to reduce the number of virtual
@@ -640,7 +534,8 @@ bool CodeGenPrepare::OptimizeCallInst(CallInst *CI) {
// happens.
WeakVH IterHandle(CurInstIterator);
- ReplaceAndSimplifyAllUses(CI, RetVal, TLI ? TLI->getTargetData() : 0, DT);
+ ReplaceAndSimplifyAllUses(CI, RetVal, TLI ? TLI->getTargetData() : 0,
+ ModifiedDT ? 0 : DT);
// If the iterator instruction was recursively deleted, start over at the
// start of the block.
@@ -666,6 +561,129 @@ bool CodeGenPrepare::OptimizeCallInst(CallInst *CI) {
return Simplifier.fold(CI, TD);
}
+/// DupRetToEnableTailCallOpts - Look for opportunities to duplicate return
+/// instructions to the predecessor to enable tail call optimizations. The
+/// case it is currently looking for is:
+/// bb0:
+/// %tmp0 = tail call i32 @f0()
+/// br label %return
+/// bb1:
+/// %tmp1 = tail call i32 @f1()
+/// br label %return
+/// bb2:
+/// %tmp2 = tail call i32 @f2()
+/// br label %return
+/// return:
+/// %retval = phi i32 [ %tmp0, %bb0 ], [ %tmp1, %bb1 ], [ %tmp2, %bb2 ]
+/// ret i32 %retval
+///
+/// =>
+///
+/// bb0:
+/// %tmp0 = tail call i32 @f0()
+/// ret i32 %tmp0
+/// bb1:
+/// %tmp1 = tail call i32 @f1()
+/// ret i32 %tmp1
+/// bb2:
+/// %tmp2 = tail call i32 @f2()
+/// ret i32 %tmp2
+///
+bool CodeGenPrepare::DupRetToEnableTailCallOpts(ReturnInst *RI) {
+ if (!TLI)
+ return false;
+
+ Value *V = RI->getReturnValue();
+ PHINode *PN = V ? dyn_cast<PHINode>(V) : NULL;
+ if (V && !PN)
+ return false;
+
+ BasicBlock *BB = RI->getParent();
+ if (PN && PN->getParent() != BB)
+ return false;
+
+ // It's not safe to eliminate the sign / zero extension of the return value.
+ // See llvm::isInTailCallPosition().
+ const Function *F = BB->getParent();
+ unsigned CallerRetAttr = F->getAttributes().getRetAttributes();
+ if ((CallerRetAttr & Attribute::ZExt) || (CallerRetAttr & Attribute::SExt))
+ return false;
+
+ // Make sure there are no instructions between the PHI and return, or that the
+ // return is the first instruction in the block.
+ if (PN) {
+ BasicBlock::iterator BI = BB->begin();
+ do { ++BI; } while (isa<DbgInfoIntrinsic>(BI));
+ if (&*BI != RI)
+ return false;
+ } else {
+ BasicBlock::iterator BI = BB->begin();
+ while (isa<DbgInfoIntrinsic>(BI)) ++BI;
+ if (&*BI != RI)
+ return false;
+ }
+
+ /// Only dup the ReturnInst if the CallInst is likely to be emitted as a tail
+ /// call.
+ SmallVector<CallInst*, 4> TailCalls;
+ if (PN) {
+ for (unsigned I = 0, E = PN->getNumIncomingValues(); I != E; ++I) {
+ CallInst *CI = dyn_cast<CallInst>(PN->getIncomingValue(I));
+ // Make sure the phi value is indeed produced by the tail call.
+ if (CI && CI->hasOneUse() && CI->getParent() == PN->getIncomingBlock(I) &&
+ TLI->mayBeEmittedAsTailCall(CI))
+ TailCalls.push_back(CI);
+ }
+ } else {
+ SmallPtrSet<BasicBlock*, 4> VisitedBBs;
+ for (pred_iterator PI = pred_begin(BB), PE = pred_end(BB); PI != PE; ++PI) {
+ if (!VisitedBBs.insert(*PI))
+ continue;
+
+ BasicBlock::InstListType &InstList = (*PI)->getInstList();
+ BasicBlock::InstListType::reverse_iterator RI = InstList.rbegin();
+ BasicBlock::InstListType::reverse_iterator RE = InstList.rend();
+ do { ++RI; } while (RI != RE && isa<DbgInfoIntrinsic>(&*RI));
+ if (RI == RE)
+ continue;
+
+ CallInst *CI = dyn_cast<CallInst>(&*RI);
+ if (CI && CI->use_empty() && TLI->mayBeEmittedAsTailCall(CI))
+ TailCalls.push_back(CI);
+ }
+ }
+
+ bool Changed = false;
+ for (unsigned i = 0, e = TailCalls.size(); i != e; ++i) {
+ CallInst *CI = TailCalls[i];
+ CallSite CS(CI);
+
+ // Conservatively require the attributes of the call to match those of the
+ // return. Ignore noalias because it doesn't affect the call sequence.
+ unsigned CalleeRetAttr = CS.getAttributes().getRetAttributes();
+ if ((CalleeRetAttr ^ CallerRetAttr) & ~Attribute::NoAlias)
+ continue;
+
+ // Make sure the call instruction is followed by an unconditional branch to
+ // the return block.
+ BasicBlock *CallBB = CI->getParent();
+ BranchInst *BI = dyn_cast<BranchInst>(CallBB->getTerminator());
+ if (!BI || !BI->isUnconditional() || BI->getSuccessor(0) != BB)
+ continue;
+
+ // Duplicate the return into CallBB.
+ (void)FoldReturnIntoUncondBranch(RI, BB, CallBB);
+ ModifiedDT = Changed = true;
+ ++NumRetsDup;
+ }
+
+ // If we eliminated all predecessors of the block, delete the block now.
+ if (Changed && pred_begin(BB) == pred_end(BB))
+ BB->eraseFromParent();
+
+ return Changed;
+}
+
//===----------------------------------------------------------------------===//
// Memory Optimization
//===----------------------------------------------------------------------===//
@@ -701,7 +719,8 @@ bool CodeGenPrepare::OptimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
// the addressing mode obtained from the non-PHI roots of the graph
// are equivalent.
Value *Consensus = 0;
- unsigned NumUses = 0;
+ unsigned NumUsesConsensus = 0;
+ bool IsNumUsesConsensusValid = false;
SmallVector<Instruction*, 16> AddrModeInsts;
ExtAddrMode AddrMode;
while (!worklist.empty()) {
@@ -728,16 +747,31 @@ bool CodeGenPrepare::OptimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
ExtAddrMode NewAddrMode =
AddressingModeMatcher::Match(V, AccessTy,MemoryInst,
NewAddrModeInsts, *TLI);
-
- // Ensure that the obtained addressing mode is equivalent to that obtained
- // for all other roots of the PHI traversal. Also, when choosing one
- // such root as representative, select the one with the most uses in order
- // to keep the cost modeling heuristics in AddressingModeMatcher applicable.
- if (!Consensus || NewAddrMode == AddrMode) {
- if (V->getNumUses() > NumUses) {
+
+ // This check is broken into two cases with very similar code to avoid using
+ // getNumUses() as much as possible. Some values have a lot of uses, so
+ // calling getNumUses() unconditionally caused a significant compile-time
+ // regression.
+ if (!Consensus) {
+ Consensus = V;
+ AddrMode = NewAddrMode;
+ AddrModeInsts = NewAddrModeInsts;
+ continue;
+ } else if (NewAddrMode == AddrMode) {
+ if (!IsNumUsesConsensusValid) {
+ NumUsesConsensus = Consensus->getNumUses();
+ IsNumUsesConsensusValid = true;
+ }
+
+ // Ensure that the obtained addressing mode is equivalent to that obtained
+ // for all other roots of the PHI traversal. Also, when choosing one
+ // such root as representative, select the one with the most uses in order
+ // to keep the cost modeling heuristics in AddressingModeMatcher
+ // applicable.
+ unsigned NumUses = V->getNumUses();
+ if (NumUses > NumUsesConsensus) {
Consensus = V;
- NumUses = V->getNumUses();
- AddrMode = NewAddrMode;
+ NumUsesConsensus = NumUses;
AddrModeInsts = NewAddrModeInsts;
}
continue;
@@ -855,11 +889,26 @@ bool CodeGenPrepare::OptimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
MemoryInst->replaceUsesOfWith(Repl, SunkAddr);
+ // If we have no uses, recursively delete the value and all dead instructions
+ // using it.
if (Repl->use_empty()) {
+ // This can cause recursive deletion, which can invalidate our iterator.
+ // Use a WeakVH to hold onto it in case this happens.
+ WeakVH IterHandle(CurInstIterator);
+ BasicBlock *BB = CurInstIterator->getParent();
+
RecursivelyDeleteTriviallyDeadInstructions(Repl);
- // This address is now available for reassignment, so erase the table entry;
- // we don't want to match some completely different instruction.
- SunkAddrs[Addr] = 0;
+
+ if (IterHandle != CurInstIterator) {
+ // If the iterator instruction was recursively deleted, start over at the
+ // start of the block.
+ CurInstIterator = BB->begin();
+ SunkAddrs.clear();
+ } else {
+ // This address is now available for reassignment, so erase the table
+ // entry; we don't want to match some completely different instruction.
+ SunkAddrs[Addr] = 0;
+ }
}
++NumMemoryInsts;
return true;
@@ -1073,6 +1122,9 @@ bool CodeGenPrepare::OptimizeInst(Instruction *I) {
if (CallInst *CI = dyn_cast<CallInst>(I))
return OptimizeCallInst(CI);
+ if (ReturnInst *RI = dyn_cast<ReturnInst>(I))
+ return DupRetToEnableTailCallOpts(RI);
+
return false;
}
@@ -1080,21 +1132,8 @@ bool CodeGenPrepare::OptimizeInst(Instruction *I) {
// across basic blocks and rewrite them to improve basic-block-at-a-time
// selection.
bool CodeGenPrepare::OptimizeBlock(BasicBlock &BB) {
- bool MadeChange = false;
-
- // Split all critical edges where the dest block has a PHI.
- if (CriticalEdgeSplit) {
- TerminatorInst *BBTI = BB.getTerminator();
- if (BBTI->getNumSuccessors() > 1 && !isa<IndirectBrInst>(BBTI)) {
- for (unsigned i = 0, e = BBTI->getNumSuccessors(); i != e; ++i) {
- BasicBlock *SuccBB = BBTI->getSuccessor(i);
- if (isa<PHINode>(SuccBB->begin()) && isCriticalEdge(BBTI, i, true))
- SplitEdgeNicely(BBTI, i, BackEdges, this);
- }
- }
- }
-
SunkAddrs.clear();
+ bool MadeChange = false;
CurInstIterator = BB.begin();
for (BasicBlock::iterator E = BB.end(); CurInstIterator != E; )
diff --git a/contrib/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp b/contrib/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp
index be12973b645f..e275268fc4ea 100644
--- a/contrib/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp
@@ -13,6 +13,7 @@
#define DEBUG_TYPE "correlated-value-propagation"
#include "llvm/Transforms/Scalar.h"
+#include "llvm/Constants.h"
#include "llvm/Function.h"
#include "llvm/Instructions.h"
#include "llvm/Pass.h"
diff --git a/contrib/llvm/lib/Transforms/Scalar/DCE.cpp b/contrib/llvm/lib/Transforms/Scalar/DCE.cpp
index dbb68f3e0bd1..8dbcc23d7ec8 100644
--- a/contrib/llvm/lib/Transforms/Scalar/DCE.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/DCE.cpp
@@ -23,7 +23,6 @@
#include "llvm/Pass.h"
#include "llvm/Support/InstIterator.h"
#include "llvm/ADT/Statistic.h"
-#include <set>
using namespace llvm;
STATISTIC(DIEEliminated, "Number of insts removed by DIE pass");
diff --git a/contrib/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp b/contrib/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
index 867a06ad202d..53e46400dca8 100644
--- a/contrib/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
@@ -340,24 +340,35 @@ static bool isCompleteOverwrite(const AliasAnalysis::Location &Later,
// Okay, we have stores to two completely different pointers. Try to
// decompose the pointer into a "base + constant_offset" form. If the base
// pointers are equal, then we can reason about the two stores.
- int64_t Off1 = 0, Off2 = 0;
- const Value *BP1 = GetPointerBaseWithConstantOffset(P1, Off1, TD);
- const Value *BP2 = GetPointerBaseWithConstantOffset(P2, Off2, TD);
+ int64_t EarlierOff = 0, LaterOff = 0;
+ const Value *BP1 = GetPointerBaseWithConstantOffset(P1, EarlierOff, TD);
+ const Value *BP2 = GetPointerBaseWithConstantOffset(P2, LaterOff, TD);
// If the base pointers still differ, we have two completely different stores.
if (BP1 != BP2)
return false;
-
- // Otherwise, we might have a situation like:
- // store i16 -> P + 1 Byte
- // store i32 -> P
- // In this case, we see if the later store completely overlaps all bytes
- // stored by the previous store.
- if (Off1 < Off2 || // Earlier starts before Later.
- Off1+Earlier.Size > Off2+Later.Size) // Earlier goes beyond Later.
- return false;
- // Otherwise, we have complete overlap.
- return true;
+
+ // The later store completely overlaps the earlier store if:
+ //
+ // 1. Both start at the same offset and the later one's size is greater than
+ // or equal to the earlier one's, or
+ //
+ // |--earlier--|
+ // |-- later --|
+ //
+ // 2. The earlier store has an offset greater than the later offset, but which
+ // still lies completely within the later store.
+ //
+ // |--earlier--|
+ // |----- later ------|
+ //
+ // We have to be careful here as *Off is signed while *.Size is unsigned.
+ if (EarlierOff >= LaterOff &&
+ uint64_t(EarlierOff - LaterOff) + Earlier.Size <= Later.Size)
+ return true;
+
+ // Otherwise, they don't completely overlap.
+ return false;
}
/// isPossibleSelfRead - If 'Inst' might be a self read (i.e. a noop copy of a
@@ -474,7 +485,7 @@ bool DSE::runOnBasicBlock(BasicBlock &BB) {
// away the store and we bail out. However, if we depend on on something
// that overwrites the memory location we *can* potentially optimize it.
//
- // Find out what memory location the dependant instruction stores.
+ // Find out what memory location the dependent instruction stores.
Instruction *DepWrite = InstDep.getInst();
AliasAnalysis::Location DepLoc = getLocForWrite(DepWrite, *AA);
// If we didn't get a useful location, or if it isn't a size, bail out.
@@ -631,28 +642,15 @@ bool DSE::handleEndBlock(BasicBlock &BB) {
if (AA->doesNotAccessMemory(CS))
continue;
- unsigned NumModRef = 0, NumOther = 0;
-
// If the call might load from any of our allocas, then any store above
// the call is live.
SmallVector<Value*, 8> LiveAllocas;
for (SmallPtrSet<Value*, 16>::iterator I = DeadStackObjects.begin(),
E = DeadStackObjects.end(); I != E; ++I) {
- // If we detect that our AA is imprecise, it's not worth it to scan the
- // rest of the DeadPointers set. Just assume that the AA will return
- // ModRef for everything, and go ahead and bail out.
- if (NumModRef >= 16 && NumOther == 0)
- return MadeChange;
-
// See if the call site touches it.
AliasAnalysis::ModRefResult A =
AA->getModRefInfo(CS, *I, getPointerSize(*I, *AA));
- if (A == AliasAnalysis::ModRef)
- ++NumModRef;
- else
- ++NumOther;
-
if (A == AliasAnalysis::ModRef || A == AliasAnalysis::Ref)
LiveAllocas.push_back(*I);
}
diff --git a/contrib/llvm/lib/Transforms/Scalar/GEPSplitter.cpp b/contrib/llvm/lib/Transforms/Scalar/GEPSplitter.cpp
deleted file mode 100644
index 4c3d188a8afd..000000000000
--- a/contrib/llvm/lib/Transforms/Scalar/GEPSplitter.cpp
+++ /dev/null
@@ -1,83 +0,0 @@
-//===- GEPSplitter.cpp - Split complex GEPs into simple ones --------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This function breaks GEPs with more than 2 non-zero operands into smaller
-// GEPs each with no more than 2 non-zero operands. This exposes redundancy
-// between GEPs with common initial operand sequences.
-//
-//===----------------------------------------------------------------------===//
-
-#define DEBUG_TYPE "split-geps"
-#include "llvm/Transforms/Scalar.h"
-#include "llvm/Constants.h"
-#include "llvm/Function.h"
-#include "llvm/Instructions.h"
-#include "llvm/Pass.h"
-using namespace llvm;
-
-namespace {
- class GEPSplitter : public FunctionPass {
- virtual bool runOnFunction(Function &F);
- virtual void getAnalysisUsage(AnalysisUsage &AU) const;
- public:
- static char ID; // Pass identification, replacement for typeid
- explicit GEPSplitter() : FunctionPass(ID) {
- initializeGEPSplitterPass(*PassRegistry::getPassRegistry());
- }
- };
-}
-
-char GEPSplitter::ID = 0;
-INITIALIZE_PASS(GEPSplitter, "split-geps",
- "split complex GEPs into simple GEPs", false, false)
-
-FunctionPass *llvm::createGEPSplitterPass() {
- return new GEPSplitter();
-}
-
-bool GEPSplitter::runOnFunction(Function &F) {
- bool Changed = false;
-
- // Visit each GEP instruction.
- for (Function::iterator I = F.begin(), E = F.end(); I != E; ++I)
- for (BasicBlock::iterator II = I->begin(), IE = I->end(); II != IE; )
- if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(II++)) {
- unsigned NumOps = GEP->getNumOperands();
- // Ignore GEPs which are already simple.
- if (NumOps <= 2)
- continue;
- bool FirstIndexIsZero = isa<ConstantInt>(GEP->getOperand(1)) &&
- cast<ConstantInt>(GEP->getOperand(1))->isZero();
- if (NumOps == 3 && FirstIndexIsZero)
- continue;
- // The first index is special and gets expanded with a 2-operand GEP
- // (unless it's zero, in which case we can skip this).
- Value *NewGEP = FirstIndexIsZero ?
- GEP->getOperand(0) :
- GetElementPtrInst::Create(GEP->getOperand(0), GEP->getOperand(1),
- "tmp", GEP);
- // All remaining indices get expanded with a 3-operand GEP with zero
- // as the second operand.
- Value *Idxs[2];
- Idxs[0] = ConstantInt::get(Type::getInt64Ty(F.getContext()), 0);
- for (unsigned i = 2; i != NumOps; ++i) {
- Idxs[1] = GEP->getOperand(i);
- NewGEP = GetElementPtrInst::Create(NewGEP, Idxs, Idxs+2, "tmp", GEP);
- }
- GEP->replaceAllUsesWith(NewGEP);
- GEP->eraseFromParent();
- Changed = true;
- }
-
- return Changed;
-}
-
-void GEPSplitter::getAnalysisUsage(AnalysisUsage &AU) const {
- AU.setPreservesCFG();
-}
diff --git a/contrib/llvm/lib/Transforms/Scalar/GVN.cpp b/contrib/llvm/lib/Transforms/Scalar/GVN.cpp
index a0123f589816..efecb97de77d 100644
--- a/contrib/llvm/lib/Transforms/Scalar/GVN.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/GVN.cpp
@@ -63,50 +63,48 @@ static cl::opt<bool> EnableLoadPRE("enable-load-pre", cl::init(true));
namespace {
struct Expression {
uint32_t opcode;
- const Type* type;
+ const Type *type;
SmallVector<uint32_t, 4> varargs;
- Expression() { }
- Expression(uint32_t o) : opcode(o) { }
+ Expression(uint32_t o = ~2U) : opcode(o) { }
bool operator==(const Expression &other) const {
if (opcode != other.opcode)
return false;
- else if (opcode == ~0U || opcode == ~1U)
+ if (opcode == ~0U || opcode == ~1U)
return true;
- else if (type != other.type)
+ if (type != other.type)
return false;
- else if (varargs != other.varargs)
+ if (varargs != other.varargs)
return false;
return true;
}
};
class ValueTable {
- private:
- DenseMap<Value*, uint32_t> valueNumbering;
- DenseMap<Expression, uint32_t> expressionNumbering;
- AliasAnalysis* AA;
- MemoryDependenceAnalysis* MD;
- DominatorTree* DT;
-
- uint32_t nextValueNumber;
-
- Expression create_expression(Instruction* I);
- uint32_t lookup_or_add_call(CallInst* C);
- public:
- ValueTable() : nextValueNumber(1) { }
- uint32_t lookup_or_add(Value *V);
- uint32_t lookup(Value *V) const;
- void add(Value *V, uint32_t num);
- void clear();
- void erase(Value *v);
- void setAliasAnalysis(AliasAnalysis* A) { AA = A; }
- AliasAnalysis *getAliasAnalysis() const { return AA; }
- void setMemDep(MemoryDependenceAnalysis* M) { MD = M; }
- void setDomTree(DominatorTree* D) { DT = D; }
- uint32_t getNextUnusedValueNumber() { return nextValueNumber; }
- void verifyRemoved(const Value *) const;
+ DenseMap<Value*, uint32_t> valueNumbering;
+ DenseMap<Expression, uint32_t> expressionNumbering;
+ AliasAnalysis *AA;
+ MemoryDependenceAnalysis *MD;
+ DominatorTree *DT;
+
+ uint32_t nextValueNumber;
+
+ Expression create_expression(Instruction* I);
+ uint32_t lookup_or_add_call(CallInst* C);
+ public:
+ ValueTable() : nextValueNumber(1) { }
+ uint32_t lookup_or_add(Value *V);
+ uint32_t lookup(Value *V) const;
+ void add(Value *V, uint32_t num);
+ void clear();
+ void erase(Value *v);
+ void setAliasAnalysis(AliasAnalysis* A) { AA = A; }
+ AliasAnalysis *getAliasAnalysis() const { return AA; }
+ void setMemDep(MemoryDependenceAnalysis* M) { MD = M; }
+ void setDomTree(DominatorTree* D) { DT = D; }
+ uint32_t getNextUnusedValueNumber() { return nextValueNumber; }
+ void verifyRemoved(const Value *) const;
};
}
@@ -364,14 +362,14 @@ uint32_t ValueTable::lookup(Value *V) const {
return VI->second;
}
-/// clear - Remove all entries from the ValueTable
+/// clear - Remove all entries from the ValueTable.
void ValueTable::clear() {
valueNumbering.clear();
expressionNumbering.clear();
nextValueNumber = 1;
}
-/// erase - Remove a value from the value numbering
+/// erase - Remove a value from the value numbering.
void ValueTable::erase(Value *V) {
valueNumbering.erase(V);
}
@@ -392,20 +390,11 @@ void ValueTable::verifyRemoved(const Value *V) const {
namespace {
class GVN : public FunctionPass {
- bool runOnFunction(Function &F);
- public:
- static char ID; // Pass identification, replacement for typeid
- explicit GVN(bool noloads = false)
- : FunctionPass(ID), NoLoads(noloads), MD(0) {
- initializeGVNPass(*PassRegistry::getPassRegistry());
- }
-
- private:
bool NoLoads;
MemoryDependenceAnalysis *MD;
DominatorTree *DT;
- const TargetData* TD;
-
+ const TargetData *TD;
+
ValueTable VN;
/// LeaderTable - A mapping from value numbers to lists of Value*'s that
@@ -418,17 +407,39 @@ namespace {
DenseMap<uint32_t, LeaderTableEntry> LeaderTable;
BumpPtrAllocator TableAllocator;
+ SmallVector<Instruction*, 8> InstrsToErase;
+ public:
+ static char ID; // Pass identification, replacement for typeid
+ explicit GVN(bool noloads = false)
+ : FunctionPass(ID), NoLoads(noloads), MD(0) {
+ initializeGVNPass(*PassRegistry::getPassRegistry());
+ }
+
+ bool runOnFunction(Function &F);
+
+ /// markInstructionForDeletion - This removes the specified instruction from
+ /// our various maps and marks it for deletion.
+ void markInstructionForDeletion(Instruction *I) {
+ VN.erase(I);
+ InstrsToErase.push_back(I);
+ }
+
+ const TargetData *getTargetData() const { return TD; }
+ DominatorTree &getDominatorTree() const { return *DT; }
+ AliasAnalysis *getAliasAnalysis() const { return VN.getAliasAnalysis(); }
+ MemoryDependenceAnalysis &getMemDep() const { return *MD; }
+ private:
/// addToLeaderTable - Push a new Value to the LeaderTable onto the list for
/// its value number.
void addToLeaderTable(uint32_t N, Value *V, BasicBlock *BB) {
- LeaderTableEntry& Curr = LeaderTable[N];
+ LeaderTableEntry &Curr = LeaderTable[N];
if (!Curr.Val) {
Curr.Val = V;
Curr.BB = BB;
return;
}
- LeaderTableEntry* Node = TableAllocator.Allocate<LeaderTableEntry>();
+ LeaderTableEntry *Node = TableAllocator.Allocate<LeaderTableEntry>();
Node->Val = V;
Node->BB = BB;
Node->Next = Curr.Next;
@@ -474,19 +485,17 @@ namespace {
AU.addPreserved<DominatorTree>();
AU.addPreserved<AliasAnalysis>();
}
+
// Helper fuctions
// FIXME: eliminate or document these better
- bool processLoad(LoadInst* L,
- SmallVectorImpl<Instruction*> &toErase);
- bool processInstruction(Instruction *I,
- SmallVectorImpl<Instruction*> &toErase);
- bool processNonLocalLoad(LoadInst* L,
- SmallVectorImpl<Instruction*> &toErase);
+ bool processLoad(LoadInst *L);
+ bool processInstruction(Instruction *I);
+ bool processNonLocalLoad(LoadInst *L);
bool processBlock(BasicBlock *BB);
- void dump(DenseMap<uint32_t, Value*>& d);
+ void dump(DenseMap<uint32_t, Value*> &d);
bool iterateOnFunction(Function &F);
- bool performPRE(Function& F);
+ bool performPRE(Function &F);
Value *findLeader(BasicBlock *BB, uint32_t num);
void cleanupGlobalSets();
void verifyRemoved(const Instruction *I) const;
@@ -629,17 +638,17 @@ static Value *CoerceAvailableValueToLoadType(Value *StoredVal,
if (!CanCoerceMustAliasedValueToLoad(StoredVal, LoadedTy, TD))
return 0;
+ // If this is already the right type, just return it.
const Type *StoredValTy = StoredVal->getType();
uint64_t StoreSize = TD.getTypeStoreSizeInBits(StoredValTy);
- uint64_t LoadSize = TD.getTypeSizeInBits(LoadedTy);
+ uint64_t LoadSize = TD.getTypeStoreSizeInBits(LoadedTy);
// If the store and reload are the same size, we can always reuse it.
if (StoreSize == LoadSize) {
- if (StoredValTy->isPointerTy() && LoadedTy->isPointerTy()) {
- // Pointer to Pointer -> use bitcast.
+ // Pointer to Pointer -> use bitcast.
+ if (StoredValTy->isPointerTy() && LoadedTy->isPointerTy())
return new BitCastInst(StoredVal, LoadedTy, "", InsertPt);
- }
// Convert source pointers to integers, which can be bitcast.
if (StoredValTy->isPointerTy()) {
@@ -796,6 +805,36 @@ static int AnalyzeLoadFromClobberingStore(const Type *LoadTy, Value *LoadPtr,
StorePtr, StoreSize, TD);
}
+/// AnalyzeLoadFromClobberingLoad - This function is called when we have a
+/// memdep query of a load that ends up being clobbered by another load. See if
+/// the other load can feed into the second load.
+static int AnalyzeLoadFromClobberingLoad(const Type *LoadTy, Value *LoadPtr,
+ LoadInst *DepLI, const TargetData &TD){
+ // Cannot handle reading from store of first-class aggregate yet.
+ if (DepLI->getType()->isStructTy() || DepLI->getType()->isArrayTy())
+ return -1;
+
+ Value *DepPtr = DepLI->getPointerOperand();
+ uint64_t DepSize = TD.getTypeSizeInBits(DepLI->getType());
+ int R = AnalyzeLoadFromClobberingWrite(LoadTy, LoadPtr, DepPtr, DepSize, TD);
+ if (R != -1) return R;
+
+ // If we have a load/load clobber an DepLI can be widened to cover this load,
+ // then we should widen it!
+ int64_t LoadOffs = 0;
+ const Value *LoadBase =
+ GetPointerBaseWithConstantOffset(LoadPtr, LoadOffs, TD);
+ unsigned LoadSize = TD.getTypeStoreSize(LoadTy);
+
+ unsigned Size = MemoryDependenceAnalysis::
+ getLoadLoadClobberFullWidthSize(LoadBase, LoadOffs, LoadSize, DepLI, TD);
+ if (Size == 0) return -1;
+
+ return AnalyzeLoadFromClobberingWrite(LoadTy, LoadPtr, DepPtr, Size*8, TD);
+}
+
+
+
static int AnalyzeLoadFromClobberingMemInst(const Type *LoadTy, Value *LoadPtr,
MemIntrinsic *MI,
const TargetData &TD) {
@@ -843,9 +882,9 @@ static int AnalyzeLoadFromClobberingMemInst(const Type *LoadTy, Value *LoadPtr,
/// GetStoreValueForLoad - This function is called when we have a
/// memdep query of a load that ends up being a clobbering store. This means
-/// that the store *may* provide bits used by the load but we can't be sure
-/// because the pointers don't mustalias. Check this case to see if there is
-/// anything more we can do before we give up.
+/// that the store provides bits used by the load but we the pointers don't
+/// mustalias. Check this case to see if there is anything more we can do
+/// before we give up.
static Value *GetStoreValueForLoad(Value *SrcVal, unsigned Offset,
const Type *LoadTy,
Instruction *InsertPt, const TargetData &TD){
@@ -881,6 +920,69 @@ static Value *GetStoreValueForLoad(Value *SrcVal, unsigned Offset,
return CoerceAvailableValueToLoadType(SrcVal, LoadTy, InsertPt, TD);
}
+/// GetStoreValueForLoad - This function is called when we have a
+/// memdep query of a load that ends up being a clobbering load. This means
+/// that the load *may* provide bits used by the load but we can't be sure
+/// because the pointers don't mustalias. Check this case to see if there is
+/// anything more we can do before we give up.
+static Value *GetLoadValueForLoad(LoadInst *SrcVal, unsigned Offset,
+ const Type *LoadTy, Instruction *InsertPt,
+ GVN &gvn) {
+ const TargetData &TD = *gvn.getTargetData();
+ // If Offset+LoadTy exceeds the size of SrcVal, then we must be wanting to
+ // widen SrcVal out to a larger load.
+ unsigned SrcValSize = TD.getTypeStoreSize(SrcVal->getType());
+ unsigned LoadSize = TD.getTypeStoreSize(LoadTy);
+ if (Offset+LoadSize > SrcValSize) {
+ assert(!SrcVal->isVolatile() && "Cannot widen volatile load!");
+ assert(isa<IntegerType>(SrcVal->getType())&&"Can't widen non-integer load");
+ // If we have a load/load clobber an DepLI can be widened to cover this
+ // load, then we should widen it to the next power of 2 size big enough!
+ unsigned NewLoadSize = Offset+LoadSize;
+ if (!isPowerOf2_32(NewLoadSize))
+ NewLoadSize = NextPowerOf2(NewLoadSize);
+
+ Value *PtrVal = SrcVal->getPointerOperand();
+
+ // Insert the new load after the old load. This ensures that subsequent
+ // memdep queries will find the new load. We can't easily remove the old
+ // load completely because it is already in the value numbering table.
+ IRBuilder<> Builder(SrcVal->getParent(), ++BasicBlock::iterator(SrcVal));
+ const Type *DestPTy =
+ IntegerType::get(LoadTy->getContext(), NewLoadSize*8);
+ DestPTy = PointerType::get(DestPTy,
+ cast<PointerType>(PtrVal->getType())->getAddressSpace());
+
+ PtrVal = Builder.CreateBitCast(PtrVal, DestPTy);
+ LoadInst *NewLoad = Builder.CreateLoad(PtrVal);
+ NewLoad->takeName(SrcVal);
+ NewLoad->setAlignment(SrcVal->getAlignment());
+
+ DEBUG(dbgs() << "GVN WIDENED LOAD: " << *SrcVal << "\n");
+ DEBUG(dbgs() << "TO: " << *NewLoad << "\n");
+
+ // Replace uses of the original load with the wider load. On a big endian
+ // system, we need to shift down to get the relevant bits.
+ Value *RV = NewLoad;
+ if (TD.isBigEndian())
+ RV = Builder.CreateLShr(RV,
+ NewLoadSize*8-SrcVal->getType()->getPrimitiveSizeInBits());
+ RV = Builder.CreateTrunc(RV, SrcVal->getType());
+ SrcVal->replaceAllUsesWith(RV);
+
+ // We would like to use gvn.markInstructionForDeletion here, but we can't
+ // because the load is already memoized into the leader map table that GVN
+ // tracks. It is potentially possible to remove the load from the table,
+ // but then there all of the operations based on it would need to be
+ // rehashed. Just leave the dead load around.
+ gvn.getMemDep().removeInstruction(SrcVal);
+ SrcVal = NewLoad;
+ }
+
+ return GetStoreValueForLoad(SrcVal, Offset, LoadTy, InsertPt, TD);
+}
+
+
/// GetMemInstValueForLoad - This function is called when we have a
/// memdep query of a load that ends up being a clobbering mem intrinsic.
static Value *GetMemInstValueForLoad(MemIntrinsic *SrcInst, unsigned Offset,
@@ -943,11 +1045,12 @@ struct AvailableValueInBlock {
BasicBlock *BB;
enum ValType {
SimpleVal, // A simple offsetted value that is accessed.
+ LoadVal, // A value produced by a load.
MemIntrin // A memory intrinsic which is loaded from.
};
/// V - The value that is live out of the block.
- PointerIntPair<Value *, 1, ValType> Val;
+ PointerIntPair<Value *, 2, ValType> Val;
/// Offset - The byte offset in Val that is interesting for the load query.
unsigned Offset;
@@ -972,37 +1075,69 @@ struct AvailableValueInBlock {
return Res;
}
+ static AvailableValueInBlock getLoad(BasicBlock *BB, LoadInst *LI,
+ unsigned Offset = 0) {
+ AvailableValueInBlock Res;
+ Res.BB = BB;
+ Res.Val.setPointer(LI);
+ Res.Val.setInt(LoadVal);
+ Res.Offset = Offset;
+ return Res;
+ }
+
bool isSimpleValue() const { return Val.getInt() == SimpleVal; }
+ bool isCoercedLoadValue() const { return Val.getInt() == LoadVal; }
+ bool isMemIntrinValue() const { return Val.getInt() == MemIntrin; }
+
Value *getSimpleValue() const {
assert(isSimpleValue() && "Wrong accessor");
return Val.getPointer();
}
+ LoadInst *getCoercedLoadValue() const {
+ assert(isCoercedLoadValue() && "Wrong accessor");
+ return cast<LoadInst>(Val.getPointer());
+ }
+
MemIntrinsic *getMemIntrinValue() const {
- assert(!isSimpleValue() && "Wrong accessor");
+ assert(isMemIntrinValue() && "Wrong accessor");
return cast<MemIntrinsic>(Val.getPointer());
}
/// MaterializeAdjustedValue - Emit code into this block to adjust the value
/// defined here to the specified type. This handles various coercion cases.
- Value *MaterializeAdjustedValue(const Type *LoadTy,
- const TargetData *TD) const {
+ Value *MaterializeAdjustedValue(const Type *LoadTy, GVN &gvn) const {
Value *Res;
if (isSimpleValue()) {
Res = getSimpleValue();
if (Res->getType() != LoadTy) {
+ const TargetData *TD = gvn.getTargetData();
assert(TD && "Need target data to handle type mismatch case");
Res = GetStoreValueForLoad(Res, Offset, LoadTy, BB->getTerminator(),
*TD);
- DEBUG(errs() << "GVN COERCED NONLOCAL VAL:\nOffset: " << Offset << " "
+ DEBUG(dbgs() << "GVN COERCED NONLOCAL VAL:\nOffset: " << Offset << " "
<< *getSimpleValue() << '\n'
<< *Res << '\n' << "\n\n\n");
}
+ } else if (isCoercedLoadValue()) {
+ LoadInst *Load = getCoercedLoadValue();
+ if (Load->getType() == LoadTy && Offset == 0) {
+ Res = Load;
+ } else {
+ Res = GetLoadValueForLoad(Load, Offset, LoadTy, BB->getTerminator(),
+ gvn);
+
+ DEBUG(dbgs() << "GVN COERCED NONLOCAL LOAD:\nOffset: " << Offset << " "
+ << *getCoercedLoadValue() << '\n'
+ << *Res << '\n' << "\n\n\n");
+ }
} else {
+ const TargetData *TD = gvn.getTargetData();
+ assert(TD && "Need target data to handle type mismatch case");
Res = GetMemInstValueForLoad(getMemIntrinValue(), Offset,
LoadTy, BB->getTerminator(), *TD);
- DEBUG(errs() << "GVN COERCED NONLOCAL MEM INTRIN:\nOffset: " << Offset
+ DEBUG(dbgs() << "GVN COERCED NONLOCAL MEM INTRIN:\nOffset: " << Offset
<< " " << *getMemIntrinValue() << '\n'
<< *Res << '\n' << "\n\n\n");
}
@@ -1010,21 +1145,20 @@ struct AvailableValueInBlock {
}
};
-}
+} // end anonymous namespace
/// ConstructSSAForLoadSet - Given a set of loads specified by ValuesPerBlock,
/// construct SSA form, allowing us to eliminate LI. This returns the value
/// that should be used at LI's definition site.
static Value *ConstructSSAForLoadSet(LoadInst *LI,
SmallVectorImpl<AvailableValueInBlock> &ValuesPerBlock,
- const TargetData *TD,
- const DominatorTree &DT,
- AliasAnalysis *AA) {
+ GVN &gvn) {
// Check for the fully redundant, dominating load case. In this case, we can
// just use the dominating value directly.
if (ValuesPerBlock.size() == 1 &&
- DT.properlyDominates(ValuesPerBlock[0].BB, LI->getParent()))
- return ValuesPerBlock[0].MaterializeAdjustedValue(LI->getType(), TD);
+ gvn.getDominatorTree().properlyDominates(ValuesPerBlock[0].BB,
+ LI->getParent()))
+ return ValuesPerBlock[0].MaterializeAdjustedValue(LI->getType(), gvn);
// Otherwise, we have to construct SSA form.
SmallVector<PHINode*, 8> NewPHIs;
@@ -1040,14 +1174,16 @@ static Value *ConstructSSAForLoadSet(LoadInst *LI,
if (SSAUpdate.HasValueForBlock(BB))
continue;
- SSAUpdate.AddAvailableValue(BB, AV.MaterializeAdjustedValue(LoadTy, TD));
+ SSAUpdate.AddAvailableValue(BB, AV.MaterializeAdjustedValue(LoadTy, gvn));
}
// Perform PHI construction.
Value *V = SSAUpdate.GetValueInMiddleOfBlock(LI->getParent());
// If new PHI nodes were created, notify alias analysis.
- if (V->getType()->isPointerTy())
+ if (V->getType()->isPointerTy()) {
+ AliasAnalysis *AA = gvn.getAliasAnalysis();
+
for (unsigned i = 0, e = NewPHIs.size(); i != e; ++i)
AA->copyValue(LI, NewPHIs[i]);
@@ -1059,6 +1195,7 @@ static Value *ConstructSSAForLoadSet(LoadInst *LI,
for (unsigned ii = 0, ee = P->getNumIncomingValues(); ii != ee; ++ii)
AA->addEscapingUse(P->getOperandUse(2*ii));
}
+ }
return V;
}
@@ -1071,8 +1208,7 @@ static bool isLifetimeStart(const Instruction *Inst) {
/// processNonLocalLoad - Attempt to eliminate a load whose dependencies are
/// non-local by performing PHI construction.
-bool GVN::processNonLocalLoad(LoadInst *LI,
- SmallVectorImpl<Instruction*> &toErase) {
+bool GVN::processNonLocalLoad(LoadInst *LI) {
// Find the non-local dependencies of the load.
SmallVector<NonLocalDepResult, 64> Deps;
AliasAnalysis::Location Loc = VN.getAliasAnalysis()->getLocation(LI);
@@ -1088,7 +1224,8 @@ bool GVN::processNonLocalLoad(LoadInst *LI,
// If we had a phi translation failure, we'll have a single entry which is a
// clobber in the current block. Reject this early.
- if (Deps.size() == 1 && Deps[0].getResult().isClobber()) {
+ if (Deps.size() == 1 && Deps[0].getResult().isClobber() &&
+ Deps[0].getResult().getInst()->getParent() == LI->getParent()) {
DEBUG(
dbgs() << "GVN: non-local load ";
WriteAsOperand(dbgs(), LI);
@@ -1129,6 +1266,26 @@ bool GVN::processNonLocalLoad(LoadInst *LI,
}
}
}
+
+ // Check to see if we have something like this:
+ // load i32* P
+ // load i8* (P+1)
+ // if we have this, replace the later with an extraction from the former.
+ if (LoadInst *DepLI = dyn_cast<LoadInst>(DepInfo.getInst())) {
+ // If this is a clobber and L is the first instruction in its block, then
+ // we have the first instruction in the entry block.
+ if (DepLI != LI && Address && TD) {
+ int Offset = AnalyzeLoadFromClobberingLoad(LI->getType(),
+ LI->getPointerOperand(),
+ DepLI, *TD);
+
+ if (Offset != -1) {
+ ValuesPerBlock.push_back(AvailableValueInBlock::getLoad(DepBB,DepLI,
+ Offset));
+ continue;
+ }
+ }
+ }
// If the clobbering value is a memset/memcpy/memmove, see if we can
// forward a value on from it.
@@ -1187,7 +1344,7 @@ bool GVN::processNonLocalLoad(LoadInst *LI,
continue;
}
}
- ValuesPerBlock.push_back(AvailableValueInBlock::get(DepBB, LD));
+ ValuesPerBlock.push_back(AvailableValueInBlock::getLoad(DepBB, LD));
continue;
}
@@ -1206,16 +1363,14 @@ bool GVN::processNonLocalLoad(LoadInst *LI,
DEBUG(dbgs() << "GVN REMOVING NONLOCAL LOAD: " << *LI << '\n');
// Perform PHI construction.
- Value *V = ConstructSSAForLoadSet(LI, ValuesPerBlock, TD, *DT,
- VN.getAliasAnalysis());
+ Value *V = ConstructSSAForLoadSet(LI, ValuesPerBlock, *this);
LI->replaceAllUsesWith(V);
if (isa<PHINode>(V))
V->takeName(LI);
if (V->getType()->isPointerTy())
MD->invalidateCachedPointerInfo(V);
- VN.erase(LI);
- toErase.push_back(LI);
+ markInstructionForDeletion(LI);
++NumGVNLoad;
return true;
}
@@ -1429,22 +1584,20 @@ bool GVN::processNonLocalLoad(LoadInst *LI,
}
// Perform PHI construction.
- Value *V = ConstructSSAForLoadSet(LI, ValuesPerBlock, TD, *DT,
- VN.getAliasAnalysis());
+ Value *V = ConstructSSAForLoadSet(LI, ValuesPerBlock, *this);
LI->replaceAllUsesWith(V);
if (isa<PHINode>(V))
V->takeName(LI);
if (V->getType()->isPointerTy())
MD->invalidateCachedPointerInfo(V);
- VN.erase(LI);
- toErase.push_back(LI);
+ markInstructionForDeletion(LI);
++NumPRELoad;
return true;
}
/// processLoad - Attempt to eliminate a load, first by eliminating it
/// locally, and then attempting non-local elimination if that fails.
-bool GVN::processLoad(LoadInst *L, SmallVectorImpl<Instruction*> &toErase) {
+bool GVN::processLoad(LoadInst *L) {
if (!MD)
return false;
@@ -1454,8 +1607,9 @@ bool GVN::processLoad(LoadInst *L, SmallVectorImpl<Instruction*> &toErase) {
// ... to a pointer that has been loaded from before...
MemDepResult Dep = MD->getDependency(L);
- // If the value isn't available, don't do anything!
- if (Dep.isClobber()) {
+ // If we have a clobber and target data is around, see if this is a clobber
+ // that we can fix up through code synthesis.
+ if (Dep.isClobber() && TD) {
// Check to see if we have something like this:
// store i32 123, i32* %P
// %A = bitcast i32* %P to i8*
@@ -1467,26 +1621,40 @@ bool GVN::processLoad(LoadInst *L, SmallVectorImpl<Instruction*> &toErase) {
// completely covers this load. This sort of thing can happen in bitfield
// access code.
Value *AvailVal = 0;
- if (StoreInst *DepSI = dyn_cast<StoreInst>(Dep.getInst()))
- if (TD) {
- int Offset = AnalyzeLoadFromClobberingStore(L->getType(),
- L->getPointerOperand(),
- DepSI, *TD);
- if (Offset != -1)
- AvailVal = GetStoreValueForLoad(DepSI->getValueOperand(), Offset,
- L->getType(), L, *TD);
- }
+ if (StoreInst *DepSI = dyn_cast<StoreInst>(Dep.getInst())) {
+ int Offset = AnalyzeLoadFromClobberingStore(L->getType(),
+ L->getPointerOperand(),
+ DepSI, *TD);
+ if (Offset != -1)
+ AvailVal = GetStoreValueForLoad(DepSI->getValueOperand(), Offset,
+ L->getType(), L, *TD);
+ }
+
+ // Check to see if we have something like this:
+ // load i32* P
+ // load i8* (P+1)
+ // if we have this, replace the later with an extraction from the former.
+ if (LoadInst *DepLI = dyn_cast<LoadInst>(Dep.getInst())) {
+ // If this is a clobber and L is the first instruction in its block, then
+ // we have the first instruction in the entry block.
+ if (DepLI == L)
+ return false;
+
+ int Offset = AnalyzeLoadFromClobberingLoad(L->getType(),
+ L->getPointerOperand(),
+ DepLI, *TD);
+ if (Offset != -1)
+ AvailVal = GetLoadValueForLoad(DepLI, Offset, L->getType(), L, *this);
+ }
// If the clobbering value is a memset/memcpy/memmove, see if we can forward
// a value on from it.
if (MemIntrinsic *DepMI = dyn_cast<MemIntrinsic>(Dep.getInst())) {
- if (TD) {
- int Offset = AnalyzeLoadFromClobberingMemInst(L->getType(),
- L->getPointerOperand(),
- DepMI, *TD);
- if (Offset != -1)
- AvailVal = GetMemInstValueForLoad(DepMI, Offset, L->getType(), L,*TD);
- }
+ int Offset = AnalyzeLoadFromClobberingMemInst(L->getType(),
+ L->getPointerOperand(),
+ DepMI, *TD);
+ if (Offset != -1)
+ AvailVal = GetMemInstValueForLoad(DepMI, Offset, L->getType(), L, *TD);
}
if (AvailVal) {
@@ -1497,14 +1665,16 @@ bool GVN::processLoad(LoadInst *L, SmallVectorImpl<Instruction*> &toErase) {
L->replaceAllUsesWith(AvailVal);
if (AvailVal->getType()->isPointerTy())
MD->invalidateCachedPointerInfo(AvailVal);
- VN.erase(L);
- toErase.push_back(L);
+ markInstructionForDeletion(L);
++NumGVNLoad;
return true;
}
-
+ }
+
+ // If the value isn't available, don't do anything!
+ if (Dep.isClobber()) {
DEBUG(
- // fast print dep, using operator<< on instruction would be too slow
+ // fast print dep, using operator<< on instruction is too slow.
dbgs() << "GVN: load ";
WriteAsOperand(dbgs(), L);
Instruction *I = Dep.getInst();
@@ -1515,7 +1685,7 @@ bool GVN::processLoad(LoadInst *L, SmallVectorImpl<Instruction*> &toErase) {
// If it is defined in another block, try harder.
if (Dep.isNonLocal())
- return processNonLocalLoad(L, toErase);
+ return processNonLocalLoad(L);
Instruction *DepInst = Dep.getInst();
if (StoreInst *DepSI = dyn_cast<StoreInst>(DepInst)) {
@@ -1542,8 +1712,7 @@ bool GVN::processLoad(LoadInst *L, SmallVectorImpl<Instruction*> &toErase) {
L->replaceAllUsesWith(StoredVal);
if (StoredVal->getType()->isPointerTy())
MD->invalidateCachedPointerInfo(StoredVal);
- VN.erase(L);
- toErase.push_back(L);
+ markInstructionForDeletion(L);
++NumGVNLoad;
return true;
}
@@ -1556,7 +1725,8 @@ bool GVN::processLoad(LoadInst *L, SmallVectorImpl<Instruction*> &toErase) {
// (depending on its type).
if (DepLI->getType() != L->getType()) {
if (TD) {
- AvailableVal = CoerceAvailableValueToLoadType(DepLI, L->getType(), L,*TD);
+ AvailableVal = CoerceAvailableValueToLoadType(DepLI, L->getType(),
+ L, *TD);
if (AvailableVal == 0)
return false;
@@ -1571,8 +1741,7 @@ bool GVN::processLoad(LoadInst *L, SmallVectorImpl<Instruction*> &toErase) {
L->replaceAllUsesWith(AvailableVal);
if (DepLI->getType()->isPointerTy())
MD->invalidateCachedPointerInfo(DepLI);
- VN.erase(L);
- toErase.push_back(L);
+ markInstructionForDeletion(L);
++NumGVNLoad;
return true;
}
@@ -1582,19 +1751,17 @@ bool GVN::processLoad(LoadInst *L, SmallVectorImpl<Instruction*> &toErase) {
// intervening stores, for example.
if (isa<AllocaInst>(DepInst) || isMalloc(DepInst)) {
L->replaceAllUsesWith(UndefValue::get(L->getType()));
- VN.erase(L);
- toErase.push_back(L);
+ markInstructionForDeletion(L);
++NumGVNLoad;
return true;
}
// If this load occurs either right after a lifetime begin,
// then the loaded value is undefined.
- if (IntrinsicInst* II = dyn_cast<IntrinsicInst>(DepInst)) {
+ if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(DepInst)) {
if (II->getIntrinsicID() == Intrinsic::lifetime_start) {
L->replaceAllUsesWith(UndefValue::get(L->getType()));
- VN.erase(L);
- toErase.push_back(L);
+ markInstructionForDeletion(L);
++NumGVNLoad;
return true;
}
@@ -1634,8 +1801,7 @@ Value *GVN::findLeader(BasicBlock *BB, uint32_t num) {
/// processInstruction - When calculating availability, handle an instruction
/// by inserting it into the appropriate sets
-bool GVN::processInstruction(Instruction *I,
- SmallVectorImpl<Instruction*> &toErase) {
+bool GVN::processInstruction(Instruction *I) {
// Ignore dbg info intrinsics.
if (isa<DbgInfoIntrinsic>(I))
return false;
@@ -1648,20 +1814,17 @@ bool GVN::processInstruction(Instruction *I,
I->replaceAllUsesWith(V);
if (MD && V->getType()->isPointerTy())
MD->invalidateCachedPointerInfo(V);
- VN.erase(I);
- toErase.push_back(I);
+ markInstructionForDeletion(I);
return true;
}
if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
- bool Changed = processLoad(LI, toErase);
-
- if (!Changed) {
- unsigned Num = VN.lookup_or_add(LI);
- addToLeaderTable(Num, LI, LI->getParent());
- }
+ if (processLoad(LI))
+ return true;
- return Changed;
+ unsigned Num = VN.lookup_or_add(LI);
+ addToLeaderTable(Num, LI, LI->getParent());
+ return false;
}
// For conditions branches, we can perform simple conditional propagation on
@@ -1720,11 +1883,10 @@ bool GVN::processInstruction(Instruction *I,
}
// Remove it!
- VN.erase(I);
I->replaceAllUsesWith(repl);
if (MD && repl->getType()->isPointerTy())
MD->invalidateCachedPointerInfo(repl);
- toErase.push_back(I);
+ markInstructionForDeletion(I);
return true;
}
@@ -1781,35 +1943,36 @@ bool GVN::runOnFunction(Function& F) {
bool GVN::processBlock(BasicBlock *BB) {
- // FIXME: Kill off toErase by doing erasing eagerly in a helper function (and
- // incrementing BI before processing an instruction).
- SmallVector<Instruction*, 8> toErase;
+ // FIXME: Kill off InstrsToErase by doing erasing eagerly in a helper function
+ // (and incrementing BI before processing an instruction).
+ assert(InstrsToErase.empty() &&
+ "We expect InstrsToErase to be empty across iterations");
bool ChangedFunction = false;
for (BasicBlock::iterator BI = BB->begin(), BE = BB->end();
BI != BE;) {
- ChangedFunction |= processInstruction(BI, toErase);
- if (toErase.empty()) {
+ ChangedFunction |= processInstruction(BI);
+ if (InstrsToErase.empty()) {
++BI;
continue;
}
// If we need some instructions deleted, do it now.
- NumGVNInstr += toErase.size();
+ NumGVNInstr += InstrsToErase.size();
// Avoid iterator invalidation.
bool AtStart = BI == BB->begin();
if (!AtStart)
--BI;
- for (SmallVector<Instruction*, 4>::iterator I = toErase.begin(),
- E = toErase.end(); I != E; ++I) {
+ for (SmallVector<Instruction*, 4>::iterator I = InstrsToErase.begin(),
+ E = InstrsToErase.end(); I != E; ++I) {
DEBUG(dbgs() << "GVN removed: " << **I << '\n');
if (MD) MD->removeInstruction(*I);
(*I)->eraseFromParent();
DEBUG(verifyRemoved(*I));
}
- toErase.clear();
+ InstrsToErase.clear();
if (AtStart)
BI = BB->begin();
@@ -1944,11 +2107,11 @@ bool GVN::performPRE(Function &F) {
addToLeaderTable(ValNo, PREInstr, PREPred);
// Create a PHI to make the value available in this block.
- PHINode* Phi = PHINode::Create(CurInst->getType(),
+ pred_iterator PB = pred_begin(CurrentBlock), PE = pred_end(CurrentBlock);
+ PHINode* Phi = PHINode::Create(CurInst->getType(), std::distance(PB, PE),
CurInst->getName() + ".pre-phi",
CurrentBlock->begin());
- for (pred_iterator PI = pred_begin(CurrentBlock),
- PE = pred_end(CurrentBlock); PI != PE; ++PI) {
+ for (pred_iterator PI = PB; PI != PE; ++PI) {
BasicBlock *P = *PI;
Phi->addIncoming(predMap[P], P);
}
diff --git a/contrib/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp b/contrib/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp
index 0fb67982a3db..09d569a097dd 100644
--- a/contrib/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp
@@ -73,6 +73,7 @@ namespace {
LoopInfo *LI;
ScalarEvolution *SE;
DominatorTree *DT;
+ SmallVector<WeakVH, 16> DeadInsts;
bool Changed;
public:
@@ -98,6 +99,7 @@ namespace {
}
private:
+ bool isValidRewrite(Value *FromVal, Value *ToVal);
void EliminateIVComparisons();
void EliminateIVRemainders();
@@ -134,6 +136,53 @@ Pass *llvm::createIndVarSimplifyPass() {
return new IndVarSimplify();
}
+/// isValidRewrite - Return true if the SCEV expansion generated by the
+/// rewriter can replace the original value. SCEV guarantees that it
+/// produces the same value, but the way it is produced may be illegal IR.
+/// Ideally, this function will only be called for verification.
+bool IndVarSimplify::isValidRewrite(Value *FromVal, Value *ToVal) {
+ // If an SCEV expression subsumed multiple pointers, its expansion could
+ // reassociate the GEP changing the base pointer. This is illegal because the
+ // final address produced by a GEP chain must be inbounds relative to its
+ // underlying object. Otherwise basic alias analysis, among other things,
+ // could fail in a dangerous way. Ultimately, SCEV will be improved to avoid
+ // producing an expression involving multiple pointers. Until then, we must
+ // bail out here.
+ //
+ // Retrieve the pointer operand of the GEP. Don't use GetUnderlyingObject
+ // because it understands lcssa phis while SCEV does not.
+ Value *FromPtr = FromVal;
+ Value *ToPtr = ToVal;
+ if (GEPOperator *GEP = dyn_cast<GEPOperator>(FromVal)) {
+ FromPtr = GEP->getPointerOperand();
+ }
+ if (GEPOperator *GEP = dyn_cast<GEPOperator>(ToVal)) {
+ ToPtr = GEP->getPointerOperand();
+ }
+ if (FromPtr != FromVal || ToPtr != ToVal) {
+ // Quickly check the common case
+ if (FromPtr == ToPtr)
+ return true;
+
+ // SCEV may have rewritten an expression that produces the GEP's pointer
+ // operand. That's ok as long as the pointer operand has the same base
+ // pointer. Unlike GetUnderlyingObject(), getPointerBase() will find the
+ // base of a recurrence. This handles the case in which SCEV expansion
+ // converts a pointer type recurrence into a nonrecurrent pointer base
+ // indexed by an integer recurrence.
+ const SCEV *FromBase = SE->getPointerBase(SE->getSCEV(FromPtr));
+ const SCEV *ToBase = SE->getPointerBase(SE->getSCEV(ToPtr));
+ if (FromBase == ToBase)
+ return true;
+
+ DEBUG(dbgs() << "INDVARS: GEP rewrite bail out "
+ << *FromBase << " != " << *ToBase << "\n");
+
+ return false;
+ }
+ return true;
+}
+
/// LinearFunctionTestReplace - This method rewrites the exit condition of the
/// loop to be a canonical != comparison against the incremented loop induction
/// variable. This pass is able to rewrite the exit tests of any loop where the
@@ -226,7 +275,7 @@ ICmpInst *IndVarSimplify::LinearFunctionTestReplace(Loop *L,
// update the branch to use the new comparison; in the common case this
// will make old comparison dead.
BI->setCondition(Cond);
- RecursivelyDeleteTriviallyDeadInstructions(OrigCond);
+ DeadInsts.push_back(OrigCond);
++NumLFTR;
Changed = true;
@@ -304,14 +353,18 @@ void IndVarSimplify::RewriteLoopExitValues(Loop *L, SCEVExpander &Rewriter) {
if (!SE->isLoopInvariant(ExitValue, L))
continue;
- Changed = true;
- ++NumReplaced;
-
Value *ExitVal = Rewriter.expandCodeFor(ExitValue, PN->getType(), Inst);
DEBUG(dbgs() << "INDVARS: RLEV: AfterLoopVal = " << *ExitVal << '\n'
<< " LoopVal = " << *Inst << "\n");
+ if (!isValidRewrite(Inst, ExitVal)) {
+ DeadInsts.push_back(ExitVal);
+ continue;
+ }
+ Changed = true;
+ ++NumReplaced;
+
PN->setIncomingValue(i, ExitVal);
// If this instruction is dead now, delete it.
@@ -366,8 +419,6 @@ void IndVarSimplify::RewriteNonIntegerIVs(Loop *L) {
}
void IndVarSimplify::EliminateIVComparisons() {
- SmallVector<WeakVH, 16> DeadInsts;
-
// Look for ICmp users.
for (IVUsers::iterator I = IU->begin(), E = IU->end(); I != E; ++I) {
IVStrideUse &UI = *I;
@@ -399,18 +450,9 @@ void IndVarSimplify::EliminateIVComparisons() {
DEBUG(dbgs() << "INDVARS: Eliminated comparison: " << *ICmp << '\n');
DeadInsts.push_back(ICmp);
}
-
- // Now that we're done iterating through lists, clean up any instructions
- // which are now dead.
- while (!DeadInsts.empty())
- if (Instruction *Inst =
- dyn_cast_or_null<Instruction>(&*DeadInsts.pop_back_val()))
- RecursivelyDeleteTriviallyDeadInstructions(Inst);
}
void IndVarSimplify::EliminateIVRemainders() {
- SmallVector<WeakVH, 16> DeadInsts;
-
// Look for SRem and URem users.
for (IVUsers::iterator I = IU->begin(), E = IU->end(); I != E; ++I) {
IVStrideUse &UI = *I;
@@ -466,13 +508,6 @@ void IndVarSimplify::EliminateIVRemainders() {
DEBUG(dbgs() << "INDVARS: Simplified rem: " << *Rem << '\n');
DeadInsts.push_back(Rem);
}
-
- // Now that we're done iterating through lists, clean up any instructions
- // which are now dead.
- while (!DeadInsts.empty())
- if (Instruction *Inst =
- dyn_cast_or_null<Instruction>(&*DeadInsts.pop_back_val()))
- RecursivelyDeleteTriviallyDeadInstructions(Inst);
}
bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) {
@@ -491,6 +526,7 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) {
LI = &getAnalysis<LoopInfo>();
SE = &getAnalysis<ScalarEvolution>();
DT = &getAnalysis<DominatorTree>();
+ DeadInsts.clear();
Changed = false;
// If there are any floating-point recurrences, attempt to
@@ -589,9 +625,21 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) {
ExitingBlock, BI, Rewriter);
}
- // Rewrite IV-derived expressions. Clears the rewriter cache.
+ // Rewrite IV-derived expressions.
RewriteIVExpressions(L, Rewriter);
+ // Clear the rewriter cache, because values that are in the rewriter's cache
+ // can be deleted in the loop below, causing the AssertingVH in the cache to
+ // trigger.
+ Rewriter.clear();
+
+ // Now that we're done iterating through lists, clean up any instructions
+ // which are now dead.
+ while (!DeadInsts.empty())
+ if (Instruction *Inst =
+ dyn_cast_or_null<Instruction>(&*DeadInsts.pop_back_val()))
+ RecursivelyDeleteTriviallyDeadInstructions(Inst);
+
// The Rewriter may not be used from this point on.
// Loop-invariant instructions in the preheader that aren't used in the
@@ -632,7 +680,7 @@ static bool isSafe(const SCEV *S, const Loop *L, ScalarEvolution *SE) {
if (!isSafe(*I, L, SE)) return false;
return true;
}
-
+
// A cast is safe if its operand is.
if (const SCEVCastExpr *C = dyn_cast<SCEVCastExpr>(S))
return isSafe(C->getOperand(), L, SE);
@@ -651,8 +699,6 @@ static bool isSafe(const SCEV *S, const Loop *L, ScalarEvolution *SE) {
}
void IndVarSimplify::RewriteIVExpressions(Loop *L, SCEVExpander &Rewriter) {
- SmallVector<WeakVH, 16> DeadInsts;
-
// Rewrite all induction variable expressions in terms of the canonical
// induction variable.
//
@@ -705,6 +751,13 @@ void IndVarSimplify::RewriteIVExpressions(Loop *L, SCEVExpander &Rewriter) {
// Now expand it into actual Instructions and patch it into place.
Value *NewVal = Rewriter.expandCodeFor(AR, UseTy, InsertPt);
+ DEBUG(dbgs() << "INDVARS: Rewrote IV '" << *AR << "' " << *Op << '\n'
+ << " into = " << *NewVal << "\n");
+
+ if (!isValidRewrite(Op, NewVal)) {
+ DeadInsts.push_back(NewVal);
+ continue;
+ }
// Inform ScalarEvolution that this value is changing. The change doesn't
// affect its value, but it does potentially affect which use lists the
// value will be on after the replacement, which affects ScalarEvolution's
@@ -717,25 +770,13 @@ void IndVarSimplify::RewriteIVExpressions(Loop *L, SCEVExpander &Rewriter) {
NewVal->takeName(Op);
User->replaceUsesOfWith(Op, NewVal);
UI->setOperandValToReplace(NewVal);
- DEBUG(dbgs() << "INDVARS: Rewrote IV '" << *AR << "' " << *Op << '\n'
- << " into = " << *NewVal << "\n");
+
++NumRemoved;
Changed = true;
// The old value may be dead now.
DeadInsts.push_back(Op);
}
-
- // Clear the rewriter cache, because values that are in the rewriter's cache
- // can be deleted in the loop below, causing the AssertingVH in the cache to
- // trigger.
- Rewriter.clear();
- // Now that we're done iterating through lists, clean up any instructions
- // which are now dead.
- while (!DeadInsts.empty())
- if (Instruction *Inst =
- dyn_cast_or_null<Instruction>(&*DeadInsts.pop_back_val()))
- RecursivelyDeleteTriviallyDeadInstructions(Inst);
}
/// If there's a single exit block, sink any loop-invariant values that
@@ -859,7 +900,7 @@ void IndVarSimplify::HandleFloatingPointIV(Loop *L, PHINode *PN) {
BinaryOperator *Incr =
dyn_cast<BinaryOperator>(PN->getIncomingValue(BackEdge));
if (Incr == 0 || Incr->getOpcode() != Instruction::FAdd) return;
-
+
// If this is not an add of the PHI with a constantfp, or if the constant fp
// is not an integer, bail out.
ConstantFP *IncValueVal = dyn_cast<ConstantFP>(Incr->getOperand(1));
@@ -884,7 +925,7 @@ void IndVarSimplify::HandleFloatingPointIV(Loop *L, PHINode *PN) {
if (Compare == 0 || !Compare->hasOneUse() ||
!isa<BranchInst>(Compare->use_back()))
return;
-
+
BranchInst *TheBr = cast<BranchInst>(Compare->use_back());
// We need to verify that the branch actually controls the iteration count
@@ -896,8 +937,8 @@ void IndVarSimplify::HandleFloatingPointIV(Loop *L, PHINode *PN) {
(L->contains(TheBr->getSuccessor(0)) &&
L->contains(TheBr->getSuccessor(1))))
return;
-
-
+
+
// If it isn't a comparison with an integer-as-fp (the exit value), we can't
// transform it.
ConstantFP *ExitValueVal = dyn_cast<ConstantFP>(Compare->getOperand(1));
@@ -905,7 +946,7 @@ void IndVarSimplify::HandleFloatingPointIV(Loop *L, PHINode *PN) {
if (ExitValueVal == 0 ||
!ConvertToSInt(ExitValueVal->getValueAPF(), ExitValue))
return;
-
+
// Find new predicate for integer comparison.
CmpInst::Predicate NewPred = CmpInst::BAD_ICMP_PREDICATE;
switch (Compare->getPredicate()) {
@@ -923,13 +964,13 @@ void IndVarSimplify::HandleFloatingPointIV(Loop *L, PHINode *PN) {
case CmpInst::FCMP_OLE:
case CmpInst::FCMP_ULE: NewPred = CmpInst::ICMP_SLE; break;
}
-
+
// We convert the floating point induction variable to a signed i32 value if
// we can. This is only safe if the comparison will not overflow in a way
// that won't be trapped by the integer equivalent operations. Check for this
// now.
// TODO: We could use i64 if it is native and the range requires it.
-
+
// The start/stride/exit values must all fit in signed i32.
if (!isInt<32>(InitValue) || !isInt<32>(IncValue) || !isInt<32>(ExitValue))
return;
@@ -945,59 +986,59 @@ void IndVarSimplify::HandleFloatingPointIV(Loop *L, PHINode *PN) {
if (InitValue >= ExitValue ||
NewPred == CmpInst::ICMP_SGT || NewPred == CmpInst::ICMP_SGE)
return;
-
+
uint32_t Range = uint32_t(ExitValue-InitValue);
if (NewPred == CmpInst::ICMP_SLE) {
// Normalize SLE -> SLT, check for infinite loop.
if (++Range == 0) return; // Range overflows.
}
-
+
unsigned Leftover = Range % uint32_t(IncValue);
-
+
// If this is an equality comparison, we require that the strided value
// exactly land on the exit value, otherwise the IV condition will wrap
// around and do things the fp IV wouldn't.
if ((NewPred == CmpInst::ICMP_EQ || NewPred == CmpInst::ICMP_NE) &&
Leftover != 0)
return;
-
+
// If the stride would wrap around the i32 before exiting, we can't
// transform the IV.
if (Leftover != 0 && int32_t(ExitValue+IncValue) < ExitValue)
return;
-
+
} else {
// If we have a negative stride, we require the init to be greater than the
// exit value and an equality or greater than comparison.
if (InitValue >= ExitValue ||
NewPred == CmpInst::ICMP_SLT || NewPred == CmpInst::ICMP_SLE)
return;
-
+
uint32_t Range = uint32_t(InitValue-ExitValue);
if (NewPred == CmpInst::ICMP_SGE) {
// Normalize SGE -> SGT, check for infinite loop.
if (++Range == 0) return; // Range overflows.
}
-
+
unsigned Leftover = Range % uint32_t(-IncValue);
-
+
// If this is an equality comparison, we require that the strided value
// exactly land on the exit value, otherwise the IV condition will wrap
// around and do things the fp IV wouldn't.
if ((NewPred == CmpInst::ICMP_EQ || NewPred == CmpInst::ICMP_NE) &&
Leftover != 0)
return;
-
+
// If the stride would wrap around the i32 before exiting, we can't
// transform the IV.
if (Leftover != 0 && int32_t(ExitValue+IncValue) > ExitValue)
return;
}
-
+
const IntegerType *Int32Ty = Type::getInt32Ty(PN->getContext());
// Insert new integer induction variable.
- PHINode *NewPHI = PHINode::Create(Int32Ty, PN->getName()+".int", PN);
+ PHINode *NewPHI = PHINode::Create(Int32Ty, 2, PN->getName()+".int", PN);
NewPHI->addIncoming(ConstantInt::get(Int32Ty, InitValue),
PN->getIncomingBlock(IncomingEdge));
diff --git a/contrib/llvm/lib/Transforms/Scalar/JumpThreading.cpp b/contrib/llvm/lib/Transforms/Scalar/JumpThreading.cpp
index 90094a8da257..7168177a76b4 100644
--- a/contrib/llvm/lib/Transforms/Scalar/JumpThreading.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/JumpThreading.cpp
@@ -16,6 +16,7 @@
#include "llvm/IntrinsicInst.h"
#include "llvm/LLVMContext.h"
#include "llvm/Pass.h"
+#include "llvm/Analysis/ConstantFolding.h"
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/LazyValueInfo.h"
#include "llvm/Analysis/Loads.h"
@@ -170,9 +171,9 @@ bool JumpThreading::runOnFunction(Function &F) {
Changed = true;
continue;
}
-
+
BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator());
-
+
// Can't thread an unconditional jump, but if the block is "almost
// empty", we can replace uses of it with uses of the successor and make
// this dead.
@@ -608,7 +609,7 @@ static unsigned GetBestDestForJumpOnUndef(BasicBlock *BB) {
static bool hasAddressTakenAndUsed(BasicBlock *BB) {
if (!BB->hasAddressTaken()) return false;
-
+
// If the block has its address taken, it may be a tree of dead constants
// hanging off of it. These shouldn't keep the block alive.
BlockAddress *BA = BlockAddress::get(BB);
@@ -668,6 +669,17 @@ bool JumpThreading::ProcessBlock(BasicBlock *BB) {
return false; // Must be an invoke.
}
+ // Run constant folding to see if we can reduce the condition to a simple
+ // constant.
+ if (Instruction *I = dyn_cast<Instruction>(Condition)) {
+ Value *SimpleVal = ConstantFoldInstruction(I, TD);
+ if (SimpleVal) {
+ I->replaceAllUsesWith(SimpleVal);
+ I->eraseFromParent();
+ Condition = SimpleVal;
+ }
+ }
+
// If the terminator is branching on an undef, we can pick any of the
// successors to branch to. Let GetBestDestForJumpOnUndef decide.
if (isa<UndefValue>(Condition)) {
@@ -928,13 +940,14 @@ bool JumpThreading::SimplifyPartiallyRedundantLoad(LoadInst *LI) {
array_pod_sort(AvailablePreds.begin(), AvailablePreds.end());
// Create a PHI node at the start of the block for the PRE'd load value.
- PHINode *PN = PHINode::Create(LI->getType(), "", LoadBB->begin());
+ pred_iterator PB = pred_begin(LoadBB), PE = pred_end(LoadBB);
+ PHINode *PN = PHINode::Create(LI->getType(), std::distance(PB, PE), "",
+ LoadBB->begin());
PN->takeName(LI);
// Insert new entries into the PHI for each predecessor. A single block may
// have multiple entries here.
- for (pred_iterator PI = pred_begin(LoadBB), E = pred_end(LoadBB); PI != E;
- ++PI) {
+ for (pred_iterator PI = PB; PI != PE; ++PI) {
BasicBlock *P = *PI;
AvailablePredsTy::iterator I =
std::lower_bound(AvailablePreds.begin(), AvailablePreds.end(),
diff --git a/contrib/llvm/lib/Transforms/Scalar/LICM.cpp b/contrib/llvm/lib/Transforms/Scalar/LICM.cpp
index 07867933d08c..93de9cf002eb 100644
--- a/contrib/llvm/lib/Transforms/Scalar/LICM.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/LICM.cpp
@@ -445,7 +445,8 @@ void LICM::sink(Instruction &I) {
// enough that we handle it as a special (more efficient) case. It is more
// efficient to handle because there are no PHI nodes that need to be placed.
if (ExitBlocks.size() == 1) {
- if (!DT->dominates(I.getParent(), ExitBlocks[0])) {
+ if (!isa<DbgInfoIntrinsic>(I) &&
+ !DT->dominates(I.getParent(), ExitBlocks[0])) {
// Instruction is not used, just delete it.
CurAST->deleteValue(&I);
// If I has users in unreachable blocks, eliminate.
@@ -742,30 +743,13 @@ void LICM::PromoteAliasSet(AliasSet &AS) {
Preheader->getTerminator());
SSA.AddAvailableValue(Preheader, PreheaderLoad);
- // Copy any value stored to or loaded from a must-alias of the pointer.
- if (PreheaderLoad->getType()->isPointerTy()) {
- Value *SomeValue;
- if (LoadInst *LI = dyn_cast<LoadInst>(LoopUses[0]))
- SomeValue = LI;
- else
- SomeValue = cast<StoreInst>(LoopUses[0])->getValueOperand();
-
- CurAST->copyValue(SomeValue, PreheaderLoad);
- }
-
// Rewrite all the loads in the loop and remember all the definitions from
// stores in the loop.
Promoter.run(LoopUses);
-
- // If the preheader load is itself a pointer, we need to tell alias analysis
- // about the new pointer we created in the preheader block and about any PHI
- // nodes that just got inserted.
- if (PreheaderLoad->getType()->isPointerTy()) {
- for (unsigned i = 0, e = NewPHIs.size(); i != e; ++i)
- CurAST->copyValue(PreheaderLoad, NewPHIs[i]);
- }
-
- // fwew, we're done!
+
+ // If the SSAUpdater didn't use the load in the preheader, just zap it now.
+ if (PreheaderLoad->use_empty())
+ PreheaderLoad->eraseFromParent();
}
diff --git a/contrib/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp b/contrib/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
index f8ce214750ac..1366231e9a1a 100644
--- a/contrib/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
@@ -81,7 +81,7 @@ namespace {
bool processLoopStore(StoreInst *SI, const SCEV *BECount);
bool processLoopMemSet(MemSetInst *MSI, const SCEV *BECount);
-
+
bool processLoopStridedStore(Value *DestPtr, unsigned StoreSize,
unsigned StoreAlignment,
Value *SplatValue, Instruction *TheStore,
@@ -91,7 +91,7 @@ namespace {
const SCEVAddRecExpr *StoreEv,
const SCEVAddRecExpr *LoadEv,
const SCEV *BECount);
-
+
/// This transformation requires natural loop information & requires that
/// loop preheaders be inserted into the CFG.
///
@@ -134,50 +134,50 @@ Pass *llvm::createLoopIdiomPass() { return new LoopIdiomRecognize(); }
///
static void DeleteDeadInstruction(Instruction *I, ScalarEvolution &SE) {
SmallVector<Instruction*, 32> NowDeadInsts;
-
+
NowDeadInsts.push_back(I);
-
+
// Before we touch this instruction, remove it from SE!
do {
Instruction *DeadInst = NowDeadInsts.pop_back_val();
-
+
// This instruction is dead, zap it, in stages. Start by removing it from
// SCEV.
SE.forgetValue(DeadInst);
-
+
for (unsigned op = 0, e = DeadInst->getNumOperands(); op != e; ++op) {
Value *Op = DeadInst->getOperand(op);
DeadInst->setOperand(op, 0);
-
+
// If this operand just became dead, add it to the NowDeadInsts list.
if (!Op->use_empty()) continue;
-
+
if (Instruction *OpI = dyn_cast<Instruction>(Op))
if (isInstructionTriviallyDead(OpI))
NowDeadInsts.push_back(OpI);
}
-
+
DeadInst->eraseFromParent();
-
+
} while (!NowDeadInsts.empty());
}
bool LoopIdiomRecognize::runOnLoop(Loop *L, LPPassManager &LPM) {
CurLoop = L;
-
+
// The trip count of the loop must be analyzable.
SE = &getAnalysis<ScalarEvolution>();
if (!SE->hasLoopInvariantBackedgeTakenCount(L))
return false;
const SCEV *BECount = SE->getBackedgeTakenCount(L);
if (isa<SCEVCouldNotCompute>(BECount)) return false;
-
+
// If this loop executes exactly one time, then it should be peeled, not
// optimized by this pass.
if (const SCEVConstant *BECst = dyn_cast<SCEVConstant>(BECount))
if (BECst->getValue()->getValue() == 0)
return false;
-
+
// We require target data for now.
TD = getAnalysisIfAvailable<TargetData>();
if (TD == 0) return false;
@@ -185,14 +185,14 @@ bool LoopIdiomRecognize::runOnLoop(Loop *L, LPPassManager &LPM) {
DT = &getAnalysis<DominatorTree>();
LoopInfo &LI = getAnalysis<LoopInfo>();
TLI = &getAnalysis<TargetLibraryInfo>();
-
+
SmallVector<BasicBlock*, 8> ExitBlocks;
CurLoop->getUniqueExitBlocks(ExitBlocks);
DEBUG(dbgs() << "loop-idiom Scanning: F["
<< L->getHeader()->getParent()->getName()
<< "] Loop %" << L->getHeader()->getName() << "\n");
-
+
bool MadeChange = false;
// Scan all the blocks in the loop that are not in subloops.
for (Loop::block_iterator BI = L->block_begin(), E = L->block_end(); BI != E;
@@ -200,7 +200,7 @@ bool LoopIdiomRecognize::runOnLoop(Loop *L, LPPassManager &LPM) {
// Ignore blocks in subloops.
if (LI.getLoopFor(*BI) != CurLoop)
continue;
-
+
MadeChange |= runOnLoopBlock(*BI, BECount, ExitBlocks);
}
return MadeChange;
@@ -217,7 +217,7 @@ bool LoopIdiomRecognize::runOnLoopBlock(BasicBlock *BB, const SCEV *BECount,
for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i)
if (!DT->dominates(BB, ExitBlocks[i]))
return false;
-
+
bool MadeChange = false;
for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ) {
Instruction *Inst = I++;
@@ -226,20 +226,20 @@ bool LoopIdiomRecognize::runOnLoopBlock(BasicBlock *BB, const SCEV *BECount,
WeakVH InstPtr(I);
if (!processLoopStore(SI, BECount)) continue;
MadeChange = true;
-
+
// If processing the store invalidated our iterator, start over from the
// top of the block.
if (InstPtr == 0)
I = BB->begin();
continue;
}
-
+
// Look for memset instructions, which may be optimized to a larger memset.
if (MemSetInst *MSI = dyn_cast<MemSetInst>(Inst)) {
WeakVH InstPtr(I);
if (!processLoopMemSet(MSI, BECount)) continue;
MadeChange = true;
-
+
// If processing the memset invalidated our iterator, start over from the
// top of the block.
if (InstPtr == 0)
@@ -247,7 +247,7 @@ bool LoopIdiomRecognize::runOnLoopBlock(BasicBlock *BB, const SCEV *BECount,
continue;
}
}
-
+
return MadeChange;
}
@@ -258,12 +258,12 @@ bool LoopIdiomRecognize::processLoopStore(StoreInst *SI, const SCEV *BECount) {
Value *StoredVal = SI->getValueOperand();
Value *StorePtr = SI->getPointerOperand();
-
+
// Reject stores that are so large that they overflow an unsigned.
uint64_t SizeInBits = TD->getTypeSizeInBits(StoredVal->getType());
if ((SizeInBits & 7) || (SizeInBits >> 32) != 0)
return false;
-
+
// See if the pointer expression is an AddRec like {base,+,1} on the current
// loop, which indicates a strided store. If we have something else, it's a
// random store we can't handle.
@@ -274,9 +274,9 @@ bool LoopIdiomRecognize::processLoopStore(StoreInst *SI, const SCEV *BECount) {
// Check to see if the stride matches the size of the store. If so, then we
// know that every byte is touched in the loop.
- unsigned StoreSize = (unsigned)SizeInBits >> 3;
+ unsigned StoreSize = (unsigned)SizeInBits >> 3;
const SCEVConstant *Stride = dyn_cast<SCEVConstant>(StoreEv->getOperand(1));
-
+
if (Stride == 0 || StoreSize != Stride->getValue()->getValue()) {
// TODO: Could also handle negative stride here someday, that will require
// the validity check in mayLoopAccessLocation to be updated though.
@@ -285,7 +285,7 @@ bool LoopIdiomRecognize::processLoopStore(StoreInst *SI, const SCEV *BECount) {
dbgs() << "NEGATIVE STRIDE: " << *SI << "\n";
dbgs() << "BB: " << *SI->getParent();
}
-
+
return false;
}
@@ -319,9 +319,9 @@ processLoopMemSet(MemSetInst *MSI, const SCEV *BECount) {
// If we're not allowed to hack on memset, we fail.
if (!TLI->has(LibFunc::memset))
return false;
-
+
Value *Pointer = MSI->getDest();
-
+
// See if the pointer expression is an AddRec like {base,+,1} on the current
// loop, which indicates a strided store. If we have something else, it's a
// random store we can't handle.
@@ -333,16 +333,16 @@ processLoopMemSet(MemSetInst *MSI, const SCEV *BECount) {
uint64_t SizeInBytes = cast<ConstantInt>(MSI->getLength())->getZExtValue();
if ((SizeInBytes >> 32) != 0)
return false;
-
+
// Check to see if the stride matches the size of the memset. If so, then we
// know that every byte is touched in the loop.
const SCEVConstant *Stride = dyn_cast<SCEVConstant>(Ev->getOperand(1));
-
+
// TODO: Could also handle negative stride here someday, that will require the
// validity check in mayLoopAccessLocation to be updated though.
if (Stride == 0 || MSI->getLength() != Stride->getValue())
return false;
-
+
return processLoopStridedStore(Pointer, (unsigned)SizeInBytes,
MSI->getAlignment(), MSI->getValue(),
MSI, Ev, BECount);
@@ -365,7 +365,7 @@ static bool mayLoopAccessLocation(Value *Ptr,AliasAnalysis::ModRefResult Access,
// to be exactly the size of the memset, which is (BECount+1)*StoreSize
if (const SCEVConstant *BECst = dyn_cast<SCEVConstant>(BECount))
AccessSize = (BECst->getValue()->getZExtValue()+1)*StoreSize;
-
+
// TODO: For this to be really effective, we have to dive into the pointer
// operand in the store. Store to &A[i] of 100 will always return may alias
// with store of &A[100], we need to StoreLoc to be "A" with size of 100,
@@ -394,12 +394,12 @@ static Constant *getMemSetPatternValue(Value *V, const TargetData &TD) {
// that doesn't seem worthwhile.
Constant *C = dyn_cast<Constant>(V);
if (C == 0) return 0;
-
+
// Only handle simple values that are a power of two bytes in size.
uint64_t Size = TD.getTypeSizeInBits(V->getType());
if (Size == 0 || (Size & 7) || (Size & (Size-1)))
return 0;
-
+
// Don't care enough about darwin/ppc to implement this.
if (TD.isBigEndian())
return 0;
@@ -410,7 +410,7 @@ static Constant *getMemSetPatternValue(Value *V, const TargetData &TD) {
// TODO: If CI is larger than 16-bytes, we can try slicing it in half to see
// if the top and bottom are the same (e.g. for vectors and large integers).
if (Size > 16) return 0;
-
+
// If the constant is exactly 16 bytes, just use it.
if (Size == 16) return C;
@@ -428,14 +428,14 @@ processLoopStridedStore(Value *DestPtr, unsigned StoreSize,
unsigned StoreAlignment, Value *StoredVal,
Instruction *TheStore, const SCEVAddRecExpr *Ev,
const SCEV *BECount) {
-
+
// If the stored value is a byte-wise value (like i32 -1), then it may be
// turned into a memset of i8 -1, assuming that all the consecutive bytes
// are stored. A store of i32 0x01020304 can never be turned into a memset,
// but it can be turned into memset_pattern if the target supports it.
Value *SplatValue = isBytewiseValue(StoredVal);
Constant *PatternValue = 0;
-
+
// If we're allowed to form a memset, and the stored value would be acceptable
// for memset, use it.
if (SplatValue && TLI->has(LibFunc::memset) &&
@@ -453,8 +453,8 @@ processLoopStridedStore(Value *DestPtr, unsigned StoreSize,
// do anything with a 3-byte store, for example.
return false;
}
-
-
+
+
// Okay, we have a strided store "p[i]" of a splattable value. We can turn
// this into a memset in the loop preheader now if we want. However, this
// would be unsafe to do if there is anything else in the loop that may read
@@ -463,47 +463,47 @@ processLoopStridedStore(Value *DestPtr, unsigned StoreSize,
CurLoop, BECount,
StoreSize, getAnalysis<AliasAnalysis>(), TheStore))
return false;
-
+
// Okay, everything looks good, insert the memset.
BasicBlock *Preheader = CurLoop->getLoopPreheader();
-
+
IRBuilder<> Builder(Preheader->getTerminator());
-
+
// The trip count of the loop and the base pointer of the addrec SCEV is
// guaranteed to be loop invariant, which means that it should dominate the
// header. Just insert code for it in the preheader.
SCEVExpander Expander(*SE);
-
+
unsigned AddrSpace = cast<PointerType>(DestPtr->getType())->getAddressSpace();
- Value *BasePtr =
+ Value *BasePtr =
Expander.expandCodeFor(Ev->getStart(), Builder.getInt8PtrTy(AddrSpace),
Preheader->getTerminator());
-
+
// The # stored bytes is (BECount+1)*Size. Expand the trip count out to
// pointer size if it isn't already.
const Type *IntPtr = TD->getIntPtrType(DestPtr->getContext());
BECount = SE->getTruncateOrZeroExtend(BECount, IntPtr);
-
+
const SCEV *NumBytesS = SE->getAddExpr(BECount, SE->getConstant(IntPtr, 1),
- true /*no unsigned overflow*/);
+ SCEV::FlagNUW);
if (StoreSize != 1)
NumBytesS = SE->getMulExpr(NumBytesS, SE->getConstant(IntPtr, StoreSize),
- true /*no unsigned overflow*/);
-
- Value *NumBytes =
+ SCEV::FlagNUW);
+
+ Value *NumBytes =
Expander.expandCodeFor(NumBytesS, IntPtr, Preheader->getTerminator());
-
- Value *NewCall;
+
+ CallInst *NewCall;
if (SplatValue)
NewCall = Builder.CreateMemSet(BasePtr, SplatValue,NumBytes,StoreAlignment);
else {
Module *M = TheStore->getParent()->getParent()->getParent();
Value *MSP = M->getOrInsertFunction("memset_pattern16",
Builder.getVoidTy(),
- Builder.getInt8PtrTy(),
+ Builder.getInt8PtrTy(),
Builder.getInt8PtrTy(), IntPtr,
(void*)0);
-
+
// Otherwise we should form a memset_pattern16. PatternValue is known to be
// an constant array of 16-bytes. Plop the value into a mergable global.
GlobalVariable *GV = new GlobalVariable(*M, PatternValue->getType(), true,
@@ -514,11 +514,11 @@ processLoopStridedStore(Value *DestPtr, unsigned StoreSize,
Value *PatternPtr = ConstantExpr::getBitCast(GV, Builder.getInt8PtrTy());
NewCall = Builder.CreateCall3(MSP, BasePtr, PatternPtr, NumBytes);
}
-
+
DEBUG(dbgs() << " Formed memset: " << *NewCall << "\n"
<< " from store to: " << *Ev << " at: " << *TheStore << "\n");
- (void)NewCall;
-
+ NewCall->setDebugLoc(TheStore->getDebugLoc());
+
// Okay, the memset has been formed. Zap the original store and anything that
// feeds into it.
DeleteDeadInstruction(TheStore, *SE);
@@ -536,9 +536,9 @@ processLoopStoreOfLoopLoad(StoreInst *SI, unsigned StoreSize,
// If we're not allowed to form memcpy, we fail.
if (!TLI->has(LibFunc::memcpy))
return false;
-
+
LoadInst *LI = cast<LoadInst>(SI->getValueOperand());
-
+
// Okay, we have a strided store "p[i]" of a loaded value. We can turn
// this into a memcpy in the loop preheader now if we want. However, this
// would be unsafe to do if there is anything else in the loop that may read
@@ -555,49 +555,49 @@ processLoopStoreOfLoopLoad(StoreInst *SI, unsigned StoreSize,
CurLoop, BECount, StoreSize,
getAnalysis<AliasAnalysis>(), SI))
return false;
-
+
// Okay, everything looks good, insert the memcpy.
BasicBlock *Preheader = CurLoop->getLoopPreheader();
-
+
IRBuilder<> Builder(Preheader->getTerminator());
-
+
// The trip count of the loop and the base pointer of the addrec SCEV is
// guaranteed to be loop invariant, which means that it should dominate the
// header. Just insert code for it in the preheader.
SCEVExpander Expander(*SE);
- Value *LoadBasePtr =
+ Value *LoadBasePtr =
Expander.expandCodeFor(LoadEv->getStart(),
Builder.getInt8PtrTy(LI->getPointerAddressSpace()),
Preheader->getTerminator());
- Value *StoreBasePtr =
+ Value *StoreBasePtr =
Expander.expandCodeFor(StoreEv->getStart(),
Builder.getInt8PtrTy(SI->getPointerAddressSpace()),
Preheader->getTerminator());
-
+
// The # stored bytes is (BECount+1)*Size. Expand the trip count out to
// pointer size if it isn't already.
const Type *IntPtr = TD->getIntPtrType(SI->getContext());
BECount = SE->getTruncateOrZeroExtend(BECount, IntPtr);
-
+
const SCEV *NumBytesS = SE->getAddExpr(BECount, SE->getConstant(IntPtr, 1),
- true /*no unsigned overflow*/);
+ SCEV::FlagNUW);
if (StoreSize != 1)
NumBytesS = SE->getMulExpr(NumBytesS, SE->getConstant(IntPtr, StoreSize),
- true /*no unsigned overflow*/);
-
+ SCEV::FlagNUW);
+
Value *NumBytes =
Expander.expandCodeFor(NumBytesS, IntPtr, Preheader->getTerminator());
-
+
Value *NewCall =
Builder.CreateMemCpy(StoreBasePtr, LoadBasePtr, NumBytes,
std::min(SI->getAlignment(), LI->getAlignment()));
-
+
DEBUG(dbgs() << " Formed memcpy: " << *NewCall << "\n"
<< " from load ptr=" << *LoadEv << " at: " << *LI << "\n"
<< " from store ptr=" << *StoreEv << " at: " << *SI << "\n");
(void)NewCall;
-
+
// Okay, the memset has been formed. Zap the original store and anything that
// feeds into it.
DeleteDeadInstruction(SI, *SE);
diff --git a/contrib/llvm/lib/Transforms/Scalar/LoopRotation.cpp b/contrib/llvm/lib/Transforms/Scalar/LoopRotation.cpp
index 95e15784df2c..47dced37c3a4 100644
--- a/contrib/llvm/lib/Transforms/Scalar/LoopRotation.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/LoopRotation.cpp
@@ -184,7 +184,11 @@ bool LoopRotate::rotateLoop(Loop *L) {
// Now, this loop is suitable for rotation.
BasicBlock *OrigPreheader = L->getLoopPreheader();
BasicBlock *OrigLatch = L->getLoopLatch();
- assert(OrigPreheader && OrigLatch && "Loop not in canonical form?");
+
+ // If the loop could not be converted to canonical form, it must have an
+ // indirectbr in it, just give up.
+ if (OrigPreheader == 0 || OrigLatch == 0)
+ return false;
// Anything ScalarEvolution may know about this loop or the PHI nodes
// in its header will soon be invalidated.
@@ -322,7 +326,8 @@ bool LoopRotate::rotateLoop(Loop *L) {
// We can fold the conditional branch in the preheader, this makes things
// simpler. The first step is to remove the extra edge to the Exit block.
Exit->removePredecessor(OrigPreheader, true /*preserve LCSSA*/);
- BranchInst::Create(NewHeader, PHBI);
+ BranchInst *NewBI = BranchInst::Create(NewHeader, PHBI);
+ NewBI->setDebugLoc(PHBI->getDebugLoc());
PHBI->eraseFromParent();
// With our CFG finalized, update DomTree if it is available.
diff --git a/contrib/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/contrib/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp
index ac4aea2e404e..5abc79042390 100644
--- a/contrib/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp
@@ -253,7 +253,8 @@ static void DoInitialMatch(const SCEV *S, Loop *L,
DoInitialMatch(AR->getStart(), L, Good, Bad, SE);
DoInitialMatch(SE.getAddRecExpr(SE.getConstant(AR->getType(), 0),
AR->getStepRecurrence(SE),
- AR->getLoop()),
+ // FIXME: AR->getNoWrapFlags()
+ AR->getLoop(), SCEV::FlagAnyWrap),
L, Good, Bad, SE);
return;
}
@@ -455,7 +456,10 @@ static const SCEV *getExactSDiv(const SCEV *LHS, const SCEV *RHS,
const SCEV *Start = getExactSDiv(AR->getStart(), RHS, SE,
IgnoreSignificantBits);
if (!Start) return 0;
- return SE.getAddRecExpr(Start, Step, AR->getLoop());
+ // FlagNW is independent of the start value, step direction, and is
+ // preserved with smaller magnitude steps.
+ // FIXME: AR->getNoWrapFlags(SCEV::FlagNW)
+ return SE.getAddRecExpr(Start, Step, AR->getLoop(), SCEV::FlagAnyWrap);
}
return 0;
}
@@ -520,7 +524,9 @@ static int64_t ExtractImmediate(const SCEV *&S, ScalarEvolution &SE) {
SmallVector<const SCEV *, 8> NewOps(AR->op_begin(), AR->op_end());
int64_t Result = ExtractImmediate(NewOps.front(), SE);
if (Result != 0)
- S = SE.getAddRecExpr(NewOps, AR->getLoop());
+ S = SE.getAddRecExpr(NewOps, AR->getLoop(),
+ // FIXME: AR->getNoWrapFlags(SCEV::FlagNW)
+ SCEV::FlagAnyWrap);
return Result;
}
return 0;
@@ -545,7 +551,9 @@ static GlobalValue *ExtractSymbol(const SCEV *&S, ScalarEvolution &SE) {
SmallVector<const SCEV *, 8> NewOps(AR->op_begin(), AR->op_end());
GlobalValue *Result = ExtractSymbol(NewOps.front(), SE);
if (Result)
- S = SE.getAddRecExpr(NewOps, AR->getLoop());
+ S = SE.getAddRecExpr(NewOps, AR->getLoop(),
+ // FIXME: AR->getNoWrapFlags(SCEV::FlagNW)
+ SCEV::FlagAnyWrap);
return Result;
}
return 0;
@@ -564,9 +572,6 @@ static bool isAddressUse(Instruction *Inst, Value *OperandVal) {
switch (II->getIntrinsicID()) {
default: break;
case Intrinsic::prefetch:
- case Intrinsic::x86_sse2_loadu_dq:
- case Intrinsic::x86_sse2_loadu_pd:
- case Intrinsic::x86_sse_loadu_ps:
case Intrinsic::x86_sse_storeu_ps:
case Intrinsic::x86_sse2_storeu_pd:
case Intrinsic::x86_sse2_storeu_dq:
@@ -781,7 +786,7 @@ void Cost::RateFormula(const Formula &F,
}
}
-/// Loose - Set this cost to a loosing value.
+/// Loose - Set this cost to a losing value.
void Cost::Loose() {
NumRegs = ~0u;
AddRecCost = ~0u;
@@ -1483,7 +1488,7 @@ void LSRInstance::OptimizeShadowIV() {
if (!C->getValue().isStrictlyPositive()) continue;
/* Add new PHINode. */
- PHINode *NewPH = PHINode::Create(DestTy, "IV.S.", PH);
+ PHINode *NewPH = PHINode::Create(DestTy, 2, "IV.S.", PH);
/* create new increment. '++d' in above example. */
Constant *CFP = ConstantFP::get(DestTy, C->getZExtValue());
@@ -1819,7 +1824,7 @@ LSRInstance::OptimizeLoopTermCond() {
}
}
-/// reconcileNewOffset - Determine if the given use can accomodate a fixup
+/// reconcileNewOffset - Determine if the given use can accommodate a fixup
/// at the given offset and other details. If so, update the use and
/// return true.
bool
@@ -2236,7 +2241,9 @@ static void CollectSubexprs(const SCEV *S, const SCEVConstant *C,
if (!AR->getStart()->isZero()) {
CollectSubexprs(SE.getAddRecExpr(SE.getConstant(AR->getType(), 0),
AR->getStepRecurrence(SE),
- AR->getLoop()),
+ AR->getLoop(),
+ //FIXME: AR->getNoWrapFlags(SCEV::FlagNW)
+ SCEV::FlagAnyWrap),
C, Ops, L, SE);
CollectSubexprs(AR->getStart(), C, Ops, L, SE);
return;
@@ -3047,7 +3054,7 @@ void LSRInstance::NarrowSearchSpaceByCollapsingUnrolledCode() {
}
}
-/// NarrowSearchSpaceByRefilteringUndesirableDedicatedRegisters - Call
+/// NarrowSearchSpaceByRefilteringUndesirableDedicatedRegisters - Call
/// FilterOutUndesirableDedicatedRegisters again, if necessary, now that
/// we've done more filtering, as it may be able to find more formulae to
/// eliminate.
diff --git a/contrib/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp b/contrib/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
index 80b263a30cb8..fef6bc31c7b6 100644
--- a/contrib/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
@@ -43,7 +43,13 @@ namespace {
class LoopUnroll : public LoopPass {
public:
static char ID; // Pass ID, replacement for typeid
- LoopUnroll() : LoopPass(ID) {
+ LoopUnroll(int T = -1, int C = -1, int P = -1) : LoopPass(ID) {
+ CurrentThreshold = (T == -1) ? UnrollThreshold : unsigned(T);
+ CurrentCount = (C == -1) ? UnrollCount : unsigned(C);
+ CurrentAllowPartial = (P == -1) ? UnrollAllowPartial : (bool)P;
+
+ UserThreshold = (T != -1) || (UnrollThreshold.getNumOccurrences() > 0);
+
initializeLoopUnrollPass(*PassRegistry::getPassRegistry());
}
@@ -56,7 +62,10 @@ namespace {
// explicit -unroll-threshold).
static const unsigned OptSizeUnrollThreshold = 50;
+ unsigned CurrentCount;
unsigned CurrentThreshold;
+ bool CurrentAllowPartial;
+ bool UserThreshold; // CurrentThreshold is user-specified.
bool runOnLoop(Loop *L, LPPassManager &LPM);
@@ -87,7 +96,9 @@ INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
INITIALIZE_PASS_DEPENDENCY(LCSSA)
INITIALIZE_PASS_END(LoopUnroll, "loop-unroll", "Unroll loops", false, false)
-Pass *llvm::createLoopUnrollPass() { return new LoopUnroll(); }
+Pass *llvm::createLoopUnrollPass(int Threshold, int Count, int AllowPartial) {
+ return new LoopUnroll(Threshold, Count, AllowPartial);
+}
/// ApproximateLoopSize - Approximate the size of the loop.
static unsigned ApproximateLoopSize(const Loop *L, unsigned &NumCalls) {
@@ -119,14 +130,14 @@ bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &LPM) {
// from UnrollThreshold, it is overridden to a smaller value if the current
// function is marked as optimize-for-size, and the unroll threshold was
// not user specified.
- CurrentThreshold = UnrollThreshold;
- if (Header->getParent()->hasFnAttr(Attribute::OptimizeForSize) &&
- UnrollThreshold.getNumOccurrences() == 0)
- CurrentThreshold = OptSizeUnrollThreshold;
+ unsigned Threshold = CurrentThreshold;
+ if (!UserThreshold &&
+ Header->getParent()->hasFnAttr(Attribute::OptimizeForSize))
+ Threshold = OptSizeUnrollThreshold;
// Find trip count
unsigned TripCount = L->getSmallConstantTripCount();
- unsigned Count = UnrollCount;
+ unsigned Count = CurrentCount;
// Automatically select an unroll count.
if (Count == 0) {
@@ -140,7 +151,7 @@ bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &LPM) {
}
// Enforce the threshold.
- if (CurrentThreshold != NoThreshold) {
+ if (Threshold != NoThreshold) {
unsigned NumInlineCandidates;
unsigned LoopSize = ApproximateLoopSize(L, NumInlineCandidates);
DEBUG(dbgs() << " Loop Size = " << LoopSize << "\n");
@@ -149,16 +160,16 @@ bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &LPM) {
return false;
}
uint64_t Size = (uint64_t)LoopSize*Count;
- if (TripCount != 1 && Size > CurrentThreshold) {
+ if (TripCount != 1 && Size > Threshold) {
DEBUG(dbgs() << " Too large to fully unroll with count: " << Count
- << " because size: " << Size << ">" << CurrentThreshold << "\n");
- if (!UnrollAllowPartial) {
+ << " because size: " << Size << ">" << Threshold << "\n");
+ if (!CurrentAllowPartial) {
DEBUG(dbgs() << " will not try to unroll partially because "
<< "-unroll-allow-partial not given\n");
return false;
}
// Reduce unroll count to be modulo of TripCount for partial unrolling
- Count = CurrentThreshold / LoopSize;
+ Count = Threshold / LoopSize;
while (Count != 0 && TripCount%Count != 0) {
Count--;
}
diff --git a/contrib/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/contrib/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
index bde0e5316c3a..a3035cbfb0ee 100644
--- a/contrib/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
@@ -28,6 +28,7 @@
#include "llvm/Support/IRBuilder.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetLibraryInfo.h"
#include <list>
using namespace llvm;
@@ -299,12 +300,15 @@ void MemsetRanges::addRange(int64_t Start, int64_t Size, Value *Ptr,
namespace {
class MemCpyOpt : public FunctionPass {
MemoryDependenceAnalysis *MD;
+ TargetLibraryInfo *TLI;
const TargetData *TD;
public:
static char ID; // Pass identification, replacement for typeid
MemCpyOpt() : FunctionPass(ID) {
initializeMemCpyOptPass(*PassRegistry::getPassRegistry());
MD = 0;
+ TLI = 0;
+ TD = 0;
}
bool runOnFunction(Function &F);
@@ -316,6 +320,7 @@ namespace {
AU.addRequired<DominatorTree>();
AU.addRequired<MemoryDependenceAnalysis>();
AU.addRequired<AliasAnalysis>();
+ AU.addRequired<TargetLibraryInfo>();
AU.addPreserved<AliasAnalysis>();
AU.addPreserved<MemoryDependenceAnalysis>();
}
@@ -346,6 +351,7 @@ INITIALIZE_PASS_BEGIN(MemCpyOpt, "memcpyopt", "MemCpy Optimization",
false, false)
INITIALIZE_PASS_DEPENDENCY(DominatorTree)
INITIALIZE_PASS_DEPENDENCY(MemoryDependenceAnalysis)
+INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo)
INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
INITIALIZE_PASS_END(MemCpyOpt, "memcpyopt", "MemCpy Optimization",
false, false)
@@ -688,7 +694,7 @@ bool MemCpyOpt::processMemCpyMemCpyDependence(MemCpyInst *M, MemCpyInst *MDep,
if (M->getSource() == MDep->getSource())
return false;
- // Second, the length of the memcpy's must be the same, or the preceeding one
+ // Second, the length of the memcpy's must be the same, or the preceding one
// must be larger than the following one.
ConstantInt *MDepLen = dyn_cast<ConstantInt>(MDep->getLength());
ConstantInt *MLen = dyn_cast<ConstantInt>(M->getLength());
@@ -804,6 +810,9 @@ bool MemCpyOpt::processMemCpy(MemCpyInst *M) {
bool MemCpyOpt::processMemMove(MemMoveInst *M) {
AliasAnalysis &AA = getAnalysis<AliasAnalysis>();
+ if (!TLI->has(LibFunc::memmove))
+ return false;
+
// See if the pointers alias.
if (!AA.isNoAlias(AA.getLocationForDest(M), AA.getLocationForSource(M)))
return false;
@@ -935,6 +944,14 @@ bool MemCpyOpt::runOnFunction(Function &F) {
bool MadeChange = false;
MD = &getAnalysis<MemoryDependenceAnalysis>();
TD = getAnalysisIfAvailable<TargetData>();
+ TLI = &getAnalysis<TargetLibraryInfo>();
+
+ // If we don't have at least memset and memcpy, there is little point of doing
+ // anything here. These are required by a freestanding implementation, so if
+ // even they are disabled, there is no point in trying hard.
+ if (!TLI->has(LibFunc::memset) || !TLI->has(LibFunc::memcpy))
+ return false;
+
while (1) {
if (!iterateOnFunction(F))
break;
diff --git a/contrib/llvm/lib/Transforms/Scalar/Reassociate.cpp b/contrib/llvm/lib/Transforms/Scalar/Reassociate.cpp
index e093b52571af..c1dfe154ae3f 100644
--- a/contrib/llvm/lib/Transforms/Scalar/Reassociate.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/Reassociate.cpp
@@ -22,6 +22,7 @@
#define DEBUG_TYPE "reassociate"
#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Constants.h"
#include "llvm/DerivedTypes.h"
#include "llvm/Function.h"
@@ -74,6 +75,8 @@ namespace {
class Reassociate : public FunctionPass {
DenseMap<BasicBlock*, unsigned> RankMap;
DenseMap<AssertingVH<>, unsigned> ValueRankMap;
+ SmallVector<WeakVH, 8> RedoInsts;
+ SmallVector<WeakVH, 8> DeadInsts;
bool MadeChange;
public:
static char ID; // Pass identification, replacement for typeid
@@ -98,7 +101,7 @@ namespace {
void LinearizeExprTree(BinaryOperator *I, SmallVectorImpl<ValueEntry> &Ops);
void LinearizeExpr(BinaryOperator *I);
Value *RemoveFactorFromExpression(Value *V, Value *Factor);
- void ReassociateBB(BasicBlock *BB);
+ void ReassociateInst(BasicBlock::iterator &BBI);
void RemoveDeadBinaryOp(Value *V);
};
@@ -113,13 +116,13 @@ FunctionPass *llvm::createReassociatePass() { return new Reassociate(); }
void Reassociate::RemoveDeadBinaryOp(Value *V) {
Instruction *Op = dyn_cast<Instruction>(V);
- if (!Op || !isa<BinaryOperator>(Op) || !Op->use_empty())
+ if (!Op || !isa<BinaryOperator>(Op))
return;
Value *LHS = Op->getOperand(0), *RHS = Op->getOperand(1);
ValueRankMap.erase(Op);
- Op->eraseFromParent();
+ DeadInsts.push_back(Op);
RemoveDeadBinaryOp(LHS);
RemoveDeadBinaryOp(RHS);
}
@@ -214,6 +217,7 @@ static Instruction *LowerNegateToMultiply(Instruction *Neg,
ValueRankMap.erase(Neg);
Res->takeName(Neg);
Neg->replaceAllUsesWith(Res);
+ Res->setDebugLoc(Neg->getDebugLoc());
Neg->eraseFromParent();
return Res;
}
@@ -503,6 +507,7 @@ static Instruction *BreakUpSubtract(Instruction *Sub,
// Everyone now refers to the add instruction.
ValueRankMap.erase(Sub);
Sub->replaceAllUsesWith(New);
+ New->setDebugLoc(Sub->getDebugLoc());
Sub->eraseFromParent();
DEBUG(dbgs() << "Negated: " << *New << '\n');
@@ -528,6 +533,7 @@ static Instruction *ConvertShiftToMul(Instruction *Shl,
ValueRankMap.erase(Shl);
Mul->takeName(Shl);
Shl->replaceAllUsesWith(Mul);
+ Mul->setDebugLoc(Shl->getDebugLoc());
Shl->eraseFromParent();
return Mul;
}
@@ -603,7 +609,7 @@ Value *Reassociate::RemoveFactorFromExpression(Value *V, Value *Factor) {
// remaining operand.
if (Factors.size() == 1) {
ValueRankMap.erase(BO);
- BO->eraseFromParent();
+ DeadInsts.push_back(BO);
V = Factors[0].Op;
} else {
RewriteExprTree(BO, Factors);
@@ -732,7 +738,7 @@ Value *Reassociate::OptimizeAdd(Instruction *I,
// Now that we have inserted a multiply, optimize it. This allows us to
// handle cases that require multiple factoring steps, such as this:
// (X*2) + (X*2) + (X*2) -> (X*2)*3 -> X*6
- Mul = ReassociateExpression(cast<BinaryOperator>(Mul));
+ RedoInsts.push_back(Mul);
// If every add operand was a duplicate, return the multiply.
if (Ops.empty())
@@ -960,71 +966,69 @@ Value *Reassociate::OptimizeExpression(BinaryOperator *I,
}
-/// ReassociateBB - Inspect all of the instructions in this basic block,
-/// reassociating them as we go.
-void Reassociate::ReassociateBB(BasicBlock *BB) {
- for (BasicBlock::iterator BBI = BB->begin(); BBI != BB->end(); ) {
- Instruction *BI = BBI++;
- if (BI->getOpcode() == Instruction::Shl &&
- isa<ConstantInt>(BI->getOperand(1)))
- if (Instruction *NI = ConvertShiftToMul(BI, ValueRankMap)) {
- MadeChange = true;
- BI = NI;
- }
+/// ReassociateInst - Inspect and reassociate the instruction at the
+/// given position, post-incrementing the position.
+void Reassociate::ReassociateInst(BasicBlock::iterator &BBI) {
+ Instruction *BI = BBI++;
+ if (BI->getOpcode() == Instruction::Shl &&
+ isa<ConstantInt>(BI->getOperand(1)))
+ if (Instruction *NI = ConvertShiftToMul(BI, ValueRankMap)) {
+ MadeChange = true;
+ BI = NI;
+ }
- // Reject cases where it is pointless to do this.
- if (!isa<BinaryOperator>(BI) || BI->getType()->isFloatingPointTy() ||
- BI->getType()->isVectorTy())
- continue; // Floating point ops are not associative.
-
- // Do not reassociate boolean (i1) expressions. We want to preserve the
- // original order of evaluation for short-circuited comparisons that
- // SimplifyCFG has folded to AND/OR expressions. If the expression
- // is not further optimized, it is likely to be transformed back to a
- // short-circuited form for code gen, and the source order may have been
- // optimized for the most likely conditions.
- if (BI->getType()->isIntegerTy(1))
- continue;
+ // Reject cases where it is pointless to do this.
+ if (!isa<BinaryOperator>(BI) || BI->getType()->isFloatingPointTy() ||
+ BI->getType()->isVectorTy())
+ return; // Floating point ops are not associative.
+
+ // Do not reassociate boolean (i1) expressions. We want to preserve the
+ // original order of evaluation for short-circuited comparisons that
+ // SimplifyCFG has folded to AND/OR expressions. If the expression
+ // is not further optimized, it is likely to be transformed back to a
+ // short-circuited form for code gen, and the source order may have been
+ // optimized for the most likely conditions.
+ if (BI->getType()->isIntegerTy(1))
+ return;
- // If this is a subtract instruction which is not already in negate form,
- // see if we can convert it to X+-Y.
- if (BI->getOpcode() == Instruction::Sub) {
- if (ShouldBreakUpSubtract(BI)) {
- BI = BreakUpSubtract(BI, ValueRankMap);
- // Reset the BBI iterator in case BreakUpSubtract changed the
- // instruction it points to.
- BBI = BI;
- ++BBI;
+ // If this is a subtract instruction which is not already in negate form,
+ // see if we can convert it to X+-Y.
+ if (BI->getOpcode() == Instruction::Sub) {
+ if (ShouldBreakUpSubtract(BI)) {
+ BI = BreakUpSubtract(BI, ValueRankMap);
+ // Reset the BBI iterator in case BreakUpSubtract changed the
+ // instruction it points to.
+ BBI = BI;
+ ++BBI;
+ MadeChange = true;
+ } else if (BinaryOperator::isNeg(BI)) {
+ // Otherwise, this is a negation. See if the operand is a multiply tree
+ // and if this is not an inner node of a multiply tree.
+ if (isReassociableOp(BI->getOperand(1), Instruction::Mul) &&
+ (!BI->hasOneUse() ||
+ !isReassociableOp(BI->use_back(), Instruction::Mul))) {
+ BI = LowerNegateToMultiply(BI, ValueRankMap);
MadeChange = true;
- } else if (BinaryOperator::isNeg(BI)) {
- // Otherwise, this is a negation. See if the operand is a multiply tree
- // and if this is not an inner node of a multiply tree.
- if (isReassociableOp(BI->getOperand(1), Instruction::Mul) &&
- (!BI->hasOneUse() ||
- !isReassociableOp(BI->use_back(), Instruction::Mul))) {
- BI = LowerNegateToMultiply(BI, ValueRankMap);
- MadeChange = true;
- }
}
}
+ }
- // If this instruction is a commutative binary operator, process it.
- if (!BI->isAssociative()) continue;
- BinaryOperator *I = cast<BinaryOperator>(BI);
+ // If this instruction is a commutative binary operator, process it.
+ if (!BI->isAssociative()) return;
+ BinaryOperator *I = cast<BinaryOperator>(BI);
- // If this is an interior node of a reassociable tree, ignore it until we
- // get to the root of the tree, to avoid N^2 analysis.
- if (I->hasOneUse() && isReassociableOp(I->use_back(), I->getOpcode()))
- continue;
+ // If this is an interior node of a reassociable tree, ignore it until we
+ // get to the root of the tree, to avoid N^2 analysis.
+ if (I->hasOneUse() && isReassociableOp(I->use_back(), I->getOpcode()))
+ return;
- // If this is an add tree that is used by a sub instruction, ignore it
- // until we process the subtract.
- if (I->hasOneUse() && I->getOpcode() == Instruction::Add &&
- cast<Instruction>(I->use_back())->getOpcode() == Instruction::Sub)
- continue;
+ // If this is an add tree that is used by a sub instruction, ignore it
+ // until we process the subtract.
+ if (I->hasOneUse() && I->getOpcode() == Instruction::Add &&
+ cast<Instruction>(I->use_back())->getOpcode() == Instruction::Sub)
+ return;
- ReassociateExpression(I);
- }
+ ReassociateExpression(I);
}
Value *Reassociate::ReassociateExpression(BinaryOperator *I) {
@@ -1051,6 +1055,8 @@ Value *Reassociate::ReassociateExpression(BinaryOperator *I) {
// eliminate it.
DEBUG(dbgs() << "Reassoc to scalar: " << *V << '\n');
I->replaceAllUsesWith(V);
+ if (Instruction *VI = dyn_cast<Instruction>(V))
+ VI->setDebugLoc(I->getDebugLoc());
RemoveDeadBinaryOp(I);
++NumAnnihil;
return V;
@@ -1074,6 +1080,8 @@ Value *Reassociate::ReassociateExpression(BinaryOperator *I) {
// This expression tree simplified to something that isn't a tree,
// eliminate it.
I->replaceAllUsesWith(Ops[0].Op);
+ if (Instruction *OI = dyn_cast<Instruction>(Ops[0].Op))
+ OI->setDebugLoc(I->getDebugLoc());
RemoveDeadBinaryOp(I);
return Ops[0].Op;
}
@@ -1091,7 +1099,21 @@ bool Reassociate::runOnFunction(Function &F) {
MadeChange = false;
for (Function::iterator FI = F.begin(), FE = F.end(); FI != FE; ++FI)
- ReassociateBB(FI);
+ for (BasicBlock::iterator BBI = FI->begin(); BBI != FI->end(); )
+ ReassociateInst(BBI);
+
+ // Now that we're done, revisit any instructions which are likely to
+ // have secondary reassociation opportunities.
+ while (!RedoInsts.empty())
+ if (Value *V = RedoInsts.pop_back_val()) {
+ BasicBlock::iterator BBI = cast<Instruction>(V);
+ ReassociateInst(BBI);
+ }
+
+ // Now that we're done, delete any instructions which are no longer used.
+ while (!DeadInsts.empty())
+ if (Value *V = DeadInsts.pop_back_val())
+ RecursivelyDeleteTriviallyDeadInstructions(V);
// We are done with the rank map.
RankMap.clear();
diff --git a/contrib/llvm/lib/Transforms/Scalar/Reg2Mem.cpp b/contrib/llvm/lib/Transforms/Scalar/Reg2Mem.cpp
index 459bb0621f88..47afc770bb0c 100644
--- a/contrib/llvm/lib/Transforms/Scalar/Reg2Mem.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/Reg2Mem.cpp
@@ -9,7 +9,7 @@
//
// This file demotes all registers to memory references. It is intented to be
// the inverse of PromoteMemoryToRegister. By converting to loads, the only
-// values live accross basic blocks are allocas and loads before phi nodes.
+// values live across basic blocks are allocas and loads before phi nodes.
// It is intended that this should make CFG hacking much easier.
// To make later hacking easier, the entry block is split into two, such that
// all introduced allocas and nothing else are in the entry block.
diff --git a/contrib/llvm/lib/Transforms/Scalar/SCCP.cpp b/contrib/llvm/lib/Transforms/Scalar/SCCP.cpp
index c82e929b364e..db8eb850448f 100644
--- a/contrib/llvm/lib/Transforms/Scalar/SCCP.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/SCCP.cpp
@@ -1989,7 +1989,7 @@ bool IPSCCP::runOnModule(Module &M) {
ReturnsToZap[i]->setOperand(0, UndefValue::get(F->getReturnType()));
}
- // If we infered constant or undef values for globals variables, we can delete
+ // If we inferred constant or undef values for globals variables, we can delete
// the global and any stores that remain to it.
const DenseMap<GlobalVariable*, LatticeVal> &TG = Solver.getTrackedGlobals();
for (DenseMap<GlobalVariable*, LatticeVal>::const_iterator I = TG.begin(),
diff --git a/contrib/llvm/lib/Transforms/Scalar/Scalar.cpp b/contrib/llvm/lib/Transforms/Scalar/Scalar.cpp
index bf9ca6d803b6..32a050617432 100644
--- a/contrib/llvm/lib/Transforms/Scalar/Scalar.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/Scalar.cpp
@@ -17,6 +17,7 @@
#include "llvm-c/Initialization.h"
#include "llvm/InitializePasses.h"
#include "llvm/PassManager.h"
+#include "llvm/Analysis/Passes.h"
#include "llvm/Analysis/Verifier.h"
#include "llvm/Target/TargetData.h"
#include "llvm/Transforms/Scalar.h"
@@ -34,7 +35,6 @@ void llvm::initializeScalarOpts(PassRegistry &Registry) {
initializeDCEPass(Registry);
initializeDeadInstEliminationPass(Registry);
initializeDSEPass(Registry);
- initializeGEPSplitterPass(Registry);
initializeGVNPass(Registry);
initializeEarlyCSEPass(Registry);
initializeIndVarSimplifyPass(Registry);
@@ -56,7 +56,6 @@ void llvm::initializeScalarOpts(PassRegistry &Registry) {
initializeSROA_DTPass(Registry);
initializeSROA_SSAUpPass(Registry);
initializeCFGSimplifyPassPass(Registry);
- initializeSimplifyHalfPowrLibCallsPass(Registry);
initializeSimplifyLibCallsPass(Registry);
initializeSinkingPass(Registry);
initializeTailDupPass(Registry);
@@ -103,6 +102,10 @@ void LLVMAddLoopDeletionPass(LLVMPassManagerRef PM) {
unwrap(PM)->add(createLoopDeletionPass());
}
+void LLVMAddLoopIdiomPass(LLVMPassManagerRef PM) {
+ unwrap(PM)->add(createLoopIdiomPass());
+}
+
void LLVMAddLoopRotatePass(LLVMPassManagerRef PM) {
unwrap(PM)->add(createLoopRotatePass());
}
@@ -135,6 +138,10 @@ void LLVMAddScalarReplAggregatesPass(LLVMPassManagerRef PM) {
unwrap(PM)->add(createScalarReplAggregatesPass());
}
+void LLVMAddScalarReplAggregatesPassSSA(LLVMPassManagerRef PM) {
+ unwrap(PM)->add(createScalarReplAggregatesPass(-1, false));
+}
+
void LLVMAddScalarReplAggregatesPassWithThreshold(LLVMPassManagerRef PM,
int Threshold) {
unwrap(PM)->add(createScalarReplAggregatesPass(Threshold));
@@ -159,3 +166,19 @@ void LLVMAddDemoteMemoryToRegisterPass(LLVMPassManagerRef PM) {
void LLVMAddVerifierPass(LLVMPassManagerRef PM) {
unwrap(PM)->add(createVerifierPass());
}
+
+void LLVMAddCorrelatedValuePropagationPass(LLVMPassManagerRef PM) {
+ unwrap(PM)->add(createCorrelatedValuePropagationPass());
+}
+
+void LLVMAddEarlyCSEPass(LLVMPassManagerRef PM) {
+ unwrap(PM)->add(createEarlyCSEPass());
+}
+
+void LLVMAddTypeBasedAliasAnalysisPass(LLVMPassManagerRef PM) {
+ unwrap(PM)->add(createTypeBasedAliasAnalysisPass());
+}
+
+void LLVMAddBasicAliasAnalysisPass(LLVMPassManagerRef PM) {
+ unwrap(PM)->add(createBasicAliasAnalysisPass());
+}
diff --git a/contrib/llvm/lib/Transforms/Scalar/ScalarReplAggregates.cpp b/contrib/llvm/lib/Transforms/Scalar/ScalarReplAggregates.cpp
index c3ca85280ee7..8178c2707599 100644
--- a/contrib/llvm/lib/Transforms/Scalar/ScalarReplAggregates.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/ScalarReplAggregates.cpp
@@ -219,7 +219,7 @@ namespace {
/// optimization, which scans the uses of an alloca and determines if it can
/// rewrite it in terms of a single new alloca that can be mem2reg'd.
class ConvertToScalarInfo {
- /// AllocaSize - The size of the alloca being considered.
+ /// AllocaSize - The size of the alloca being considered in bytes.
unsigned AllocaSize;
const TargetData &TD;
@@ -238,19 +238,22 @@ class ConvertToScalarInfo {
/// also declared as a vector, we do want to promote to a vector.
bool HadAVector;
+ /// HadNonMemTransferAccess - True if there is at least one access to the
+ /// alloca that is not a MemTransferInst. We don't want to turn structs into
+ /// large integers unless there is some potential for optimization.
+ bool HadNonMemTransferAccess;
+
public:
explicit ConvertToScalarInfo(unsigned Size, const TargetData &td)
- : AllocaSize(Size), TD(td) {
- IsNotTrivial = false;
- VectorTy = 0;
- HadAVector = false;
- }
+ : AllocaSize(Size), TD(td), IsNotTrivial(false), VectorTy(0),
+ HadAVector(false), HadNonMemTransferAccess(false) { }
AllocaInst *TryConvert(AllocaInst *AI);
private:
bool CanConvertToScalar(Value *V, uint64_t Offset);
- void MergeInType(const Type *In, uint64_t Offset);
+ void MergeInType(const Type *In, uint64_t Offset, bool IsLoadOrStore);
+ bool MergeInVectorType(const VectorType *VInTy, uint64_t Offset);
void ConvertUsesToScalar(Value *Ptr, AllocaInst *NewAI, uint64_t Offset);
Value *ConvertScalar_ExtractValue(Value *NV, const Type *ToType,
@@ -282,9 +285,14 @@ AllocaInst *ConvertToScalarInfo::TryConvert(AllocaInst *AI) {
<< *VectorTy << '\n');
NewTy = VectorTy; // Use the vector type.
} else {
+ unsigned BitWidth = AllocaSize * 8;
+ if (!HadAVector && !HadNonMemTransferAccess &&
+ !TD.fitsInLegalInteger(BitWidth))
+ return 0;
+
DEBUG(dbgs() << "CONVERT TO SCALAR INTEGER: " << *AI << "\n");
// Create and insert the integer alloca.
- NewTy = IntegerType::get(AI->getContext(), AllocaSize*8);
+ NewTy = IntegerType::get(AI->getContext(), BitWidth);
}
AllocaInst *NewAI = new AllocaInst(NewTy, 0, "", AI->getParent()->begin());
ConvertUsesToScalar(AI, NewAI, 0);
@@ -294,16 +302,21 @@ AllocaInst *ConvertToScalarInfo::TryConvert(AllocaInst *AI) {
/// MergeInType - Add the 'In' type to the accumulated vector type (VectorTy)
/// so far at the offset specified by Offset (which is specified in bytes).
///
-/// There are two cases we handle here:
+/// There are three cases we handle here:
/// 1) A union of vector types of the same size and potentially its elements.
/// Here we turn element accesses into insert/extract element operations.
/// This promotes a <4 x float> with a store of float to the third element
/// into a <4 x float> that uses insert element.
-/// 2) A fully general blob of memory, which we turn into some (potentially
+/// 2) A union of vector types with power-of-2 size differences, e.g. a float,
+/// <2 x float> and <4 x float>. Here we turn element accesses into insert
+/// and extract element operations, and <2 x float> accesses into a cast to
+/// <2 x double>, an extract, and a cast back to <2 x float>.
+/// 3) A fully general blob of memory, which we turn into some (potentially
/// large) integer type with extract and insert operations where the loads
/// and stores would mutate the memory. We mark this by setting VectorTy
/// to VoidTy.
-void ConvertToScalarInfo::MergeInType(const Type *In, uint64_t Offset) {
+void ConvertToScalarInfo::MergeInType(const Type *In, uint64_t Offset,
+ bool IsLoadOrStore) {
// If we already decided to turn this into a blob of integer memory, there is
// nothing to be done.
if (VectorTy && VectorTy->isVoidTy())
@@ -314,33 +327,33 @@ void ConvertToScalarInfo::MergeInType(const Type *In, uint64_t Offset) {
// If the In type is a vector that is the same size as the alloca, see if it
// matches the existing VecTy.
if (const VectorType *VInTy = dyn_cast<VectorType>(In)) {
- // Remember if we saw a vector type.
- HadAVector = true;
-
- if (VInTy->getBitWidth()/8 == AllocaSize && Offset == 0) {
- // If we're storing/loading a vector of the right size, allow it as a
- // vector. If this the first vector we see, remember the type so that
- // we know the element size. If this is a subsequent access, ignore it
- // even if it is a differing type but the same size. Worst case we can
- // bitcast the resultant vectors.
- if (VectorTy == 0)
- VectorTy = VInTy;
+ if (MergeInVectorType(VInTy, Offset))
return;
- }
} else if (In->isFloatTy() || In->isDoubleTy() ||
(In->isIntegerTy() && In->getPrimitiveSizeInBits() >= 8 &&
isPowerOf2_32(In->getPrimitiveSizeInBits()))) {
+ // Full width accesses can be ignored, because they can always be turned
+ // into bitcasts.
+ unsigned EltSize = In->getPrimitiveSizeInBits()/8;
+ if (IsLoadOrStore && EltSize == AllocaSize)
+ return;
+
// If we're accessing something that could be an element of a vector, see
// if the implied vector agrees with what we already have and if Offset is
// compatible with it.
- unsigned EltSize = In->getPrimitiveSizeInBits()/8;
- if (Offset % EltSize == 0 && AllocaSize % EltSize == 0 &&
- (VectorTy == 0 ||
- cast<VectorType>(VectorTy)->getElementType()
- ->getPrimitiveSizeInBits()/8 == EltSize)) {
- if (VectorTy == 0)
+ if (Offset % EltSize == 0 && AllocaSize % EltSize == 0) {
+ if (!VectorTy) {
VectorTy = VectorType::get(In, AllocaSize/EltSize);
- return;
+ return;
+ }
+
+ unsigned CurrentEltSize = cast<VectorType>(VectorTy)->getElementType()
+ ->getPrimitiveSizeInBits()/8;
+ if (EltSize == CurrentEltSize)
+ return;
+
+ if (In->isIntegerTy() && isPowerOf2_32(AllocaSize / EltSize))
+ return;
}
}
@@ -349,6 +362,77 @@ void ConvertToScalarInfo::MergeInType(const Type *In, uint64_t Offset) {
VectorTy = Type::getVoidTy(In->getContext());
}
+/// MergeInVectorType - Handles the vector case of MergeInType, returning true
+/// if the type was successfully merged and false otherwise.
+bool ConvertToScalarInfo::MergeInVectorType(const VectorType *VInTy,
+ uint64_t Offset) {
+ // Remember if we saw a vector type.
+ HadAVector = true;
+
+ // TODO: Support nonzero offsets?
+ if (Offset != 0)
+ return false;
+
+ // Only allow vectors that are a power-of-2 away from the size of the alloca.
+ if (!isPowerOf2_64(AllocaSize / (VInTy->getBitWidth() / 8)))
+ return false;
+
+ // If this the first vector we see, remember the type so that we know the
+ // element size.
+ if (!VectorTy) {
+ VectorTy = VInTy;
+ return true;
+ }
+
+ unsigned BitWidth = cast<VectorType>(VectorTy)->getBitWidth();
+ unsigned InBitWidth = VInTy->getBitWidth();
+
+ // Vectors of the same size can be converted using a simple bitcast.
+ if (InBitWidth == BitWidth && AllocaSize == (InBitWidth / 8))
+ return true;
+
+ const Type *ElementTy = cast<VectorType>(VectorTy)->getElementType();
+ const Type *InElementTy = cast<VectorType>(VInTy)->getElementType();
+
+ // Do not allow mixed integer and floating-point accesses from vectors of
+ // different sizes.
+ if (ElementTy->isFloatingPointTy() != InElementTy->isFloatingPointTy())
+ return false;
+
+ if (ElementTy->isFloatingPointTy()) {
+ // Only allow floating-point vectors of different sizes if they have the
+ // same element type.
+ // TODO: This could be loosened a bit, but would anything benefit?
+ if (ElementTy != InElementTy)
+ return false;
+
+ // There are no arbitrary-precision floating-point types, which limits the
+ // number of legal vector types with larger element types that we can form
+ // to bitcast and extract a subvector.
+ // TODO: We could support some more cases with mixed fp128 and double here.
+ if (!(BitWidth == 64 || BitWidth == 128) ||
+ !(InBitWidth == 64 || InBitWidth == 128))
+ return false;
+ } else {
+ assert(ElementTy->isIntegerTy() && "Vector elements must be either integer "
+ "or floating-point.");
+ unsigned BitWidth = ElementTy->getPrimitiveSizeInBits();
+ unsigned InBitWidth = InElementTy->getPrimitiveSizeInBits();
+
+ // Do not allow integer types smaller than a byte or types whose widths are
+ // not a multiple of a byte.
+ if (BitWidth < 8 || InBitWidth < 8 ||
+ BitWidth % 8 != 0 || InBitWidth % 8 != 0)
+ return false;
+ }
+
+ // Pick the largest of the two vector types.
+ if (InBitWidth > BitWidth)
+ VectorTy = VInTy;
+
+ return true;
+}
+
/// CanConvertToScalar - V is a pointer. If we can convert the pointee and all
/// its accesses to a single vector type, return true and set VecTy to
/// the new type. If we could convert the alloca into a single promotable
@@ -369,7 +453,8 @@ bool ConvertToScalarInfo::CanConvertToScalar(Value *V, uint64_t Offset) {
// Don't touch MMX operations.
if (LI->getType()->isX86_MMXTy())
return false;
- MergeInType(LI->getType(), Offset);
+ HadNonMemTransferAccess = true;
+ MergeInType(LI->getType(), Offset, true);
continue;
}
@@ -379,7 +464,8 @@ bool ConvertToScalarInfo::CanConvertToScalar(Value *V, uint64_t Offset) {
// Don't touch MMX operations.
if (SI->getOperand(0)->getType()->isX86_MMXTy())
return false;
- MergeInType(SI->getOperand(0)->getType(), Offset);
+ HadNonMemTransferAccess = true;
+ MergeInType(SI->getOperand(0)->getType(), Offset, true);
continue;
}
@@ -403,6 +489,7 @@ bool ConvertToScalarInfo::CanConvertToScalar(Value *V, uint64_t Offset) {
if (!CanConvertToScalar(GEP, Offset+GEPOffset))
return false;
IsNotTrivial = true; // Can't be mem2reg'd.
+ HadNonMemTransferAccess = true;
continue;
}
@@ -414,6 +501,7 @@ bool ConvertToScalarInfo::CanConvertToScalar(Value *V, uint64_t Offset) {
!isa<ConstantInt>(MSI->getLength()))
return false;
IsNotTrivial = true; // Can't be mem2reg'd.
+ HadNonMemTransferAccess = true;
continue;
}
@@ -575,6 +663,63 @@ void ConvertToScalarInfo::ConvertUsesToScalar(Value *Ptr, AllocaInst *NewAI,
}
}
+/// getScaledElementType - Gets a scaled element type for a partial vector
+/// access of an alloca. The input types must be integer or floating-point
+/// scalar or vector types, and the resulting type is an integer, float or
+/// double.
+static const Type *getScaledElementType(const Type *Ty1, const Type *Ty2,
+ unsigned NewBitWidth) {
+ bool IsFP1 = Ty1->isFloatingPointTy() ||
+ (Ty1->isVectorTy() &&
+ cast<VectorType>(Ty1)->getElementType()->isFloatingPointTy());
+ bool IsFP2 = Ty2->isFloatingPointTy() ||
+ (Ty2->isVectorTy() &&
+ cast<VectorType>(Ty2)->getElementType()->isFloatingPointTy());
+
+ LLVMContext &Context = Ty1->getContext();
+
+ // Prefer floating-point types over integer types, as integer types may have
+ // been created by earlier scalar replacement.
+ if (IsFP1 || IsFP2) {
+ if (NewBitWidth == 32)
+ return Type::getFloatTy(Context);
+ if (NewBitWidth == 64)
+ return Type::getDoubleTy(Context);
+ }
+
+ return Type::getIntNTy(Context, NewBitWidth);
+}
+
+/// CreateShuffleVectorCast - Creates a shuffle vector to convert one vector
+/// to another vector of the same element type which has the same allocation
+/// size but different primitive sizes (e.g. <3 x i32> and <4 x i32>).
+static Value *CreateShuffleVectorCast(Value *FromVal, const Type *ToType,
+ IRBuilder<> &Builder) {
+ const Type *FromType = FromVal->getType();
+ const VectorType *FromVTy = cast<VectorType>(FromType);
+ const VectorType *ToVTy = cast<VectorType>(ToType);
+ assert((ToVTy->getElementType() == FromVTy->getElementType()) &&
+ "Vectors must have the same element type");
+ Value *UnV = UndefValue::get(FromType);
+ unsigned numEltsFrom = FromVTy->getNumElements();
+ unsigned numEltsTo = ToVTy->getNumElements();
+
+ SmallVector<Constant*, 3> Args;
+ const Type* Int32Ty = Builder.getInt32Ty();
+ unsigned minNumElts = std::min(numEltsFrom, numEltsTo);
+ unsigned i;
+ for (i=0; i != minNumElts; ++i)
+ Args.push_back(ConstantInt::get(Int32Ty, i));
+
+ if (i < numEltsTo) {
+ Constant* UnC = UndefValue::get(Int32Ty);
+ for (; i != numEltsTo; ++i)
+ Args.push_back(UnC);
+ }
+ Constant *Mask = ConstantVector::get(Args);
+ return Builder.CreateShuffleVector(FromVal, UnV, Mask, "tmpV");
+}
+
/// ConvertScalar_ExtractValue - Extract a value of type ToType from an integer
/// or vector value FromVal, extracting the bits from the offset specified by
/// Offset. This returns the value, which is of type ToType.
@@ -589,14 +734,46 @@ Value *ConvertToScalarInfo::
ConvertScalar_ExtractValue(Value *FromVal, const Type *ToType,
uint64_t Offset, IRBuilder<> &Builder) {
// If the load is of the whole new alloca, no conversion is needed.
- if (FromVal->getType() == ToType && Offset == 0)
+ const Type *FromType = FromVal->getType();
+ if (FromType == ToType && Offset == 0)
return FromVal;
// If the result alloca is a vector type, this is either an element
// access or a bitcast to another vector type of the same size.
- if (const VectorType *VTy = dyn_cast<VectorType>(FromVal->getType())) {
- if (ToType->isVectorTy())
- return Builder.CreateBitCast(FromVal, ToType, "tmp");
+ if (const VectorType *VTy = dyn_cast<VectorType>(FromType)) {
+ unsigned ToTypeSize = TD.getTypeAllocSize(ToType);
+ if (ToTypeSize == AllocaSize) {
+ // If the two types have the same primitive size, use a bit cast.
+ // Otherwise, it is two vectors with the same element type that has
+ // the same allocation size but different number of elements so use
+ // a shuffle vector.
+ if (FromType->getPrimitiveSizeInBits() ==
+ ToType->getPrimitiveSizeInBits())
+ return Builder.CreateBitCast(FromVal, ToType, "tmp");
+ else
+ return CreateShuffleVectorCast(FromVal, ToType, Builder);
+ }
+
+ if (isPowerOf2_64(AllocaSize / ToTypeSize)) {
+ assert(!(ToType->isVectorTy() && Offset != 0) && "Can't extract a value "
+ "of a smaller vector type at a nonzero offset.");
+
+ const Type *CastElementTy = getScaledElementType(FromType, ToType,
+ ToTypeSize * 8);
+ unsigned NumCastVectorElements = AllocaSize / ToTypeSize;
+
+ LLVMContext &Context = FromVal->getContext();
+ const Type *CastTy = VectorType::get(CastElementTy,
+ NumCastVectorElements);
+ Value *Cast = Builder.CreateBitCast(FromVal, CastTy, "tmp");
+
+ unsigned EltSize = TD.getTypeAllocSizeInBits(CastElementTy);
+ unsigned Elt = Offset/EltSize;
+ assert(EltSize*Elt == Offset && "Invalid modulus in validity checking");
+ Value *Extract = Builder.CreateExtractElement(Cast, ConstantInt::get(
+ Type::getInt32Ty(Context), Elt), "tmp");
+ return Builder.CreateBitCast(Extract, ToType, "tmp");
+ }
// Otherwise it must be an element access.
unsigned Elt = 0;
@@ -714,21 +891,49 @@ ConvertScalar_InsertValue(Value *SV, Value *Old,
// Changing the whole vector with memset or with an access of a different
// vector type?
- if (ValSize == VecSize)
- return Builder.CreateBitCast(SV, AllocaType, "tmp");
+ if (ValSize == VecSize) {
+ // If the two types have the same primitive size, use a bit cast.
+ // Otherwise, it is two vectors with the same element type that has
+ // the same allocation size but different number of elements so use
+ // a shuffle vector.
+ if (VTy->getPrimitiveSizeInBits() ==
+ SV->getType()->getPrimitiveSizeInBits())
+ return Builder.CreateBitCast(SV, AllocaType, "tmp");
+ else
+ return CreateShuffleVectorCast(SV, VTy, Builder);
+ }
- uint64_t EltSize = TD.getTypeAllocSizeInBits(VTy->getElementType());
+ if (isPowerOf2_64(VecSize / ValSize)) {
+ assert(!(SV->getType()->isVectorTy() && Offset != 0) && "Can't insert a "
+ "value of a smaller vector type at a nonzero offset.");
- // Must be an element insertion.
- unsigned Elt = Offset/EltSize;
+ const Type *CastElementTy = getScaledElementType(VTy, SV->getType(),
+ ValSize);
+ unsigned NumCastVectorElements = VecSize / ValSize;
- if (SV->getType() != VTy->getElementType())
- SV = Builder.CreateBitCast(SV, VTy->getElementType(), "tmp");
+ LLVMContext &Context = SV->getContext();
+ const Type *OldCastTy = VectorType::get(CastElementTy,
+ NumCastVectorElements);
+ Value *OldCast = Builder.CreateBitCast(Old, OldCastTy, "tmp");
- SV = Builder.CreateInsertElement(Old, SV,
+ Value *SVCast = Builder.CreateBitCast(SV, CastElementTy, "tmp");
+
+ unsigned EltSize = TD.getTypeAllocSizeInBits(CastElementTy);
+ unsigned Elt = Offset/EltSize;
+ assert(EltSize*Elt == Offset && "Invalid modulus in validity checking");
+ Value *Insert =
+ Builder.CreateInsertElement(OldCast, SVCast, ConstantInt::get(
+ Type::getInt32Ty(Context), Elt), "tmp");
+ return Builder.CreateBitCast(Insert, AllocaType, "tmp");
+ }
+
+ // Must be an element insertion.
+ assert(SV->getType() == VTy->getElementType());
+ uint64_t EltSize = TD.getTypeAllocSizeInBits(VTy->getElementType());
+ unsigned Elt = Offset/EltSize;
+ return Builder.CreateInsertElement(Old, SV,
ConstantInt::get(Type::getInt32Ty(SV->getContext()), Elt),
"tmp");
- return SV;
}
// If SV is a first-class aggregate value, insert each value recursively.
@@ -1083,7 +1288,8 @@ static bool tryToMakeAllocaBePromotable(AllocaInst *AI, const TargetData *TD) {
}
const Type *LoadTy = cast<PointerType>(PN->getType())->getElementType();
- PHINode *NewPN = PHINode::Create(LoadTy, PN->getName()+".ld", PN);
+ PHINode *NewPN = PHINode::Create(LoadTy, PN->getNumIncomingValues(),
+ PN->getName()+".ld", PN);
// Get the TBAA tag and alignment to use from one of the loads. It doesn't
// matter which one we get and if any differ, it doesn't matter.
diff --git a/contrib/llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp b/contrib/llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp
index ce5dd73ace32..1137c2b23f96 100644
--- a/contrib/llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp
@@ -73,7 +73,8 @@ static void ChangeToUnreachable(Instruction *I, bool UseLLVMTrap) {
if (UseLLVMTrap) {
Function *TrapFn =
Intrinsic::getDeclaration(BB->getParent()->getParent(), Intrinsic::trap);
- CallInst::Create(TrapFn, "", I);
+ CallInst *CallTrap = CallInst::Create(TrapFn, "", I);
+ CallTrap->setDebugLoc(I->getDebugLoc());
}
new UnreachableInst(I->getContext(), I);
@@ -259,11 +260,12 @@ static bool MergeEmptyReturnBlocks(Function &F) {
PHINode *RetBlockPHI = dyn_cast<PHINode>(RetBlock->begin());
if (RetBlockPHI == 0) {
Value *InVal = cast<ReturnInst>(RetBlock->getTerminator())->getOperand(0);
- RetBlockPHI = PHINode::Create(Ret->getOperand(0)->getType(), "merge",
+ pred_iterator PB = pred_begin(RetBlock), PE = pred_end(RetBlock);
+ RetBlockPHI = PHINode::Create(Ret->getOperand(0)->getType(),
+ std::distance(PB, PE), "merge",
&RetBlock->front());
- for (pred_iterator PI = pred_begin(RetBlock), E = pred_end(RetBlock);
- PI != E; ++PI)
+ for (pred_iterator PI = PB; PI != PE; ++PI)
RetBlockPHI->addIncoming(InVal, *PI);
RetBlock->getTerminator()->setOperand(0, RetBlockPHI);
}
diff --git a/contrib/llvm/lib/Transforms/Scalar/SimplifyHalfPowrLibCalls.cpp b/contrib/llvm/lib/Transforms/Scalar/SimplifyHalfPowrLibCalls.cpp
deleted file mode 100644
index 70ff32e02310..000000000000
--- a/contrib/llvm/lib/Transforms/Scalar/SimplifyHalfPowrLibCalls.cpp
+++ /dev/null
@@ -1,160 +0,0 @@
-//===- SimplifyHalfPowrLibCalls.cpp - Optimize specific half_powr calls ---===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements a simple pass that applies an experimental
-// transformation on calls to specific functions.
-//
-//===----------------------------------------------------------------------===//
-
-#define DEBUG_TYPE "simplify-libcalls-halfpowr"
-#include "llvm/Transforms/Scalar.h"
-#include "llvm/Instructions.h"
-#include "llvm/Intrinsics.h"
-#include "llvm/Module.h"
-#include "llvm/Pass.h"
-#include "llvm/Transforms/Utils/BasicBlockUtils.h"
-#include "llvm/Transforms/Utils/Cloning.h"
-#include "llvm/Target/TargetData.h"
-#include "llvm/ADT/STLExtras.h"
-#include "llvm/Support/Debug.h"
-using namespace llvm;
-
-namespace {
- /// This pass optimizes well half_powr function calls.
- ///
- class SimplifyHalfPowrLibCalls : public FunctionPass {
- const TargetData *TD;
- public:
- static char ID; // Pass identification
- SimplifyHalfPowrLibCalls() : FunctionPass(ID) {
- initializeSimplifyHalfPowrLibCallsPass(*PassRegistry::getPassRegistry());
- }
-
- bool runOnFunction(Function &F);
-
- virtual void getAnalysisUsage(AnalysisUsage &AU) const {
- }
-
- Instruction *
- InlineHalfPowrs(const std::vector<Instruction *> &HalfPowrs,
- Instruction *InsertPt);
- };
- char SimplifyHalfPowrLibCalls::ID = 0;
-} // end anonymous namespace.
-
-INITIALIZE_PASS(SimplifyHalfPowrLibCalls, "simplify-libcalls-halfpowr",
- "Simplify half_powr library calls", false, false)
-
-// Public interface to the Simplify HalfPowr LibCalls pass.
-FunctionPass *llvm::createSimplifyHalfPowrLibCallsPass() {
- return new SimplifyHalfPowrLibCalls();
-}
-
-/// InlineHalfPowrs - Inline a sequence of adjacent half_powr calls, rearranging
-/// their control flow to better facilitate subsequent optimization.
-Instruction *
-SimplifyHalfPowrLibCalls::
-InlineHalfPowrs(const std::vector<Instruction *> &HalfPowrs,
- Instruction *InsertPt) {
- std::vector<BasicBlock *> Bodies;
- BasicBlock *NewBlock = 0;
-
- for (unsigned i = 0, e = HalfPowrs.size(); i != e; ++i) {
- CallInst *Call = cast<CallInst>(HalfPowrs[i]);
- Function *Callee = Call->getCalledFunction();
-
- // Minimally sanity-check the CFG of half_powr to ensure that it contains
- // the kind of code we expect. If we're running this pass, we have
- // reason to believe it will be what we expect.
- Function::iterator I = Callee->begin();
- BasicBlock *Prologue = I++;
- if (I == Callee->end()) break;
- BasicBlock *SubnormalHandling = I++;
- if (I == Callee->end()) break;
- BasicBlock *Body = I++;
- if (I != Callee->end()) break;
- if (SubnormalHandling->getSinglePredecessor() != Prologue)
- break;
- BranchInst *PBI = dyn_cast<BranchInst>(Prologue->getTerminator());
- if (!PBI || !PBI->isConditional())
- break;
- BranchInst *SNBI = dyn_cast<BranchInst>(SubnormalHandling->getTerminator());
- if (!SNBI || SNBI->isConditional())
- break;
- if (!isa<ReturnInst>(Body->getTerminator()))
- break;
-
- Instruction *NextInst = llvm::next(BasicBlock::iterator(Call));
-
- // Inline the call, taking care of what code ends up where.
- NewBlock = SplitBlock(NextInst->getParent(), NextInst, this);
-
- InlineFunctionInfo IFI(0, TD);
- bool B = InlineFunction(Call, IFI);
- assert(B && "half_powr didn't inline?");
- (void)B;
-
- BasicBlock *NewBody = NewBlock->getSinglePredecessor();
- assert(NewBody);
- Bodies.push_back(NewBody);
- }
-
- if (!NewBlock)
- return InsertPt;
-
- // Put the code for all the bodies into one block, to facilitate
- // subsequent optimization.
- (void)SplitEdge(NewBlock->getSinglePredecessor(), NewBlock, this);
- for (unsigned i = 0, e = Bodies.size(); i != e; ++i) {
- BasicBlock *Body = Bodies[i];
- Instruction *FNP = Body->getFirstNonPHI();
- // Splice the insts from body into NewBlock.
- NewBlock->getInstList().splice(NewBlock->begin(), Body->getInstList(),
- FNP, Body->getTerminator());
- }
-
- return NewBlock->begin();
-}
-
-/// runOnFunction - Top level algorithm.
-///
-bool SimplifyHalfPowrLibCalls::runOnFunction(Function &F) {
- TD = getAnalysisIfAvailable<TargetData>();
-
- bool Changed = false;
- std::vector<Instruction *> HalfPowrs;
- for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) {
- for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) {
- // Look for calls.
- bool IsHalfPowr = false;
- if (CallInst *CI = dyn_cast<CallInst>(I)) {
- // Look for direct calls and calls to non-external functions.
- Function *Callee = CI->getCalledFunction();
- if (Callee && Callee->hasExternalLinkage()) {
- // Look for calls with well-known names.
- if (Callee->getName() == "__half_powrf4")
- IsHalfPowr = true;
- }
- }
- if (IsHalfPowr)
- HalfPowrs.push_back(I);
- // We're looking for sequences of up to three such calls, which we'll
- // simplify as a group.
- if ((!IsHalfPowr && !HalfPowrs.empty()) || HalfPowrs.size() == 3) {
- I = InlineHalfPowrs(HalfPowrs, I);
- E = I->getParent()->end();
- HalfPowrs.clear();
- Changed = true;
- }
- }
- assert(HalfPowrs.empty() && "Block had no terminator!");
- }
-
- return Changed;
-}
diff --git a/contrib/llvm/lib/Transforms/Scalar/SimplifyLibCalls.cpp b/contrib/llvm/lib/Transforms/Scalar/SimplifyLibCalls.cpp
index 9f136d4e3077..6247b0348f14 100644
--- a/contrib/llvm/lib/Transforms/Scalar/SimplifyLibCalls.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/SimplifyLibCalls.cpp
@@ -49,6 +49,7 @@ class LibCallOptimization {
protected:
Function *Caller;
const TargetData *TD;
+ const TargetLibraryInfo *TLI;
LLVMContext* Context;
public:
LibCallOptimization() { }
@@ -62,9 +63,11 @@ public:
virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B)
=0;
- Value *OptimizeCall(CallInst *CI, const TargetData *TD, IRBuilder<> &B) {
+ Value *OptimizeCall(CallInst *CI, const TargetData *TD,
+ const TargetLibraryInfo *TLI, IRBuilder<> &B) {
Caller = CI->getParent()->getParent();
this->TD = TD;
+ this->TLI = TLI;
if (CI->getCalledFunction())
Context = &CI->getCalledFunction()->getContext();
@@ -97,6 +100,15 @@ static bool IsOnlyUsedInZeroEqualityComparison(Value *V) {
}
return true;
}
+
+static bool CallHasFloatingPointArgument(const CallInst *CI) {
+ for (CallInst::const_op_iterator it = CI->op_begin(), e = CI->op_end();
+ it != e; ++it) {
+ if ((*it)->getType()->isFloatingPointTy())
+ return true;
+ }
+ return false;
+}
/// IsOnlyUsedInEqualityComparison - Return true if it is only used in equality
/// comparisons with With.
@@ -1075,14 +1087,8 @@ struct ToAsciiOpt : public LibCallOptimization {
// 'printf' Optimizations
struct PrintFOpt : public LibCallOptimization {
- virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
- // Require one fixed pointer argument and an integer/void result.
- const FunctionType *FT = Callee->getFunctionType();
- if (FT->getNumParams() < 1 || !FT->getParamType(0)->isPointerTy() ||
- !(FT->getReturnType()->isIntegerTy() ||
- FT->getReturnType()->isVoidTy()))
- return 0;
-
+ Value *OptimizeFixedFormatString(Function *Callee, CallInst *CI,
+ IRBuilder<> &B) {
// Check for a fixed format string.
std::string FormatStr;
if (!GetConstantStringInfo(CI->getArgOperand(0), FormatStr))
@@ -1138,20 +1144,40 @@ struct PrintFOpt : public LibCallOptimization {
}
return 0;
}
+
+ virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+ // Require one fixed pointer argument and an integer/void result.
+ const FunctionType *FT = Callee->getFunctionType();
+ if (FT->getNumParams() < 1 || !FT->getParamType(0)->isPointerTy() ||
+ !(FT->getReturnType()->isIntegerTy() ||
+ FT->getReturnType()->isVoidTy()))
+ return 0;
+
+ if (Value *V = OptimizeFixedFormatString(Callee, CI, B)) {
+ return V;
+ }
+
+ // printf(format, ...) -> iprintf(format, ...) if no floating point
+ // arguments.
+ if (TLI->has(LibFunc::iprintf) && !CallHasFloatingPointArgument(CI)) {
+ Module *M = B.GetInsertBlock()->getParent()->getParent();
+ Constant *IPrintFFn =
+ M->getOrInsertFunction("iprintf", FT, Callee->getAttributes());
+ CallInst *New = cast<CallInst>(CI->clone());
+ New->setCalledFunction(IPrintFFn);
+ B.Insert(New);
+ return New;
+ }
+ return 0;
+ }
};
//===---------------------------------------===//
// 'sprintf' Optimizations
struct SPrintFOpt : public LibCallOptimization {
- virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
- // Require two fixed pointer arguments and an integer result.
- const FunctionType *FT = Callee->getFunctionType();
- if (FT->getNumParams() != 2 || !FT->getParamType(0)->isPointerTy() ||
- !FT->getParamType(1)->isPointerTy() ||
- !FT->getReturnType()->isIntegerTy())
- return 0;
-
+ Value *OptimizeFixedFormatString(Function *Callee, CallInst *CI,
+ IRBuilder<> &B) {
// Check for a fixed format string.
std::string FormatStr;
if (!GetConstantStringInfo(CI->getArgOperand(1), FormatStr))
@@ -1212,6 +1238,32 @@ struct SPrintFOpt : public LibCallOptimization {
}
return 0;
}
+
+ virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+ // Require two fixed pointer arguments and an integer result.
+ const FunctionType *FT = Callee->getFunctionType();
+ if (FT->getNumParams() != 2 || !FT->getParamType(0)->isPointerTy() ||
+ !FT->getParamType(1)->isPointerTy() ||
+ !FT->getReturnType()->isIntegerTy())
+ return 0;
+
+ if (Value *V = OptimizeFixedFormatString(Callee, CI, B)) {
+ return V;
+ }
+
+ // sprintf(str, format, ...) -> siprintf(str, format, ...) if no floating
+ // point arguments.
+ if (TLI->has(LibFunc::siprintf) && !CallHasFloatingPointArgument(CI)) {
+ Module *M = B.GetInsertBlock()->getParent()->getParent();
+ Constant *SIPrintFFn =
+ M->getOrInsertFunction("siprintf", FT, Callee->getAttributes());
+ CallInst *New = cast<CallInst>(CI->clone());
+ New->setCalledFunction(SIPrintFFn);
+ B.Insert(New);
+ return New;
+ }
+ return 0;
+ }
};
//===---------------------------------------===//
@@ -1278,14 +1330,8 @@ struct FPutsOpt : public LibCallOptimization {
// 'fprintf' Optimizations
struct FPrintFOpt : public LibCallOptimization {
- virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
- // Require two fixed paramters as pointers and integer result.
- const FunctionType *FT = Callee->getFunctionType();
- if (FT->getNumParams() != 2 || !FT->getParamType(0)->isPointerTy() ||
- !FT->getParamType(1)->isPointerTy() ||
- !FT->getReturnType()->isIntegerTy())
- return 0;
-
+ Value *OptimizeFixedFormatString(Function *Callee, CallInst *CI,
+ IRBuilder<> &B) {
// All the optimizations depend on the format string.
std::string FormatStr;
if (!GetConstantStringInfo(CI->getArgOperand(1), FormatStr))
@@ -1330,6 +1376,32 @@ struct FPrintFOpt : public LibCallOptimization {
}
return 0;
}
+
+ virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+ // Require two fixed paramters as pointers and integer result.
+ const FunctionType *FT = Callee->getFunctionType();
+ if (FT->getNumParams() != 2 || !FT->getParamType(0)->isPointerTy() ||
+ !FT->getParamType(1)->isPointerTy() ||
+ !FT->getReturnType()->isIntegerTy())
+ return 0;
+
+ if (Value *V = OptimizeFixedFormatString(Callee, CI, B)) {
+ return V;
+ }
+
+ // fprintf(stream, format, ...) -> fiprintf(stream, format, ...) if no
+ // floating point arguments.
+ if (TLI->has(LibFunc::fiprintf) && !CallHasFloatingPointArgument(CI)) {
+ Module *M = B.GetInsertBlock()->getParent()->getParent();
+ Constant *FIPrintFFn =
+ M->getOrInsertFunction("fiprintf", FT, Callee->getAttributes());
+ CallInst *New = cast<CallInst>(CI->clone());
+ New->setCalledFunction(FIPrintFFn);
+ B.Insert(New);
+ return New;
+ }
+ return 0;
+ }
};
//===---------------------------------------===//
@@ -1544,8 +1616,11 @@ bool SimplifyLibCalls::runOnFunction(Function &F) {
// Set the builder to the instruction after the call.
Builder.SetInsertPoint(BB, I);
+ // Use debug location of CI for all new instructions.
+ Builder.SetCurrentDebugLocation(CI->getDebugLoc());
+
// Try to optimize this call.
- Value *Result = LCO->OptimizeCall(CI, TD, Builder);
+ Value *Result = LCO->OptimizeCall(CI, TD, TLI, Builder);
if (Result == 0) continue;
DEBUG(dbgs() << "SimplifyLibCalls simplified: " << *CI;
diff --git a/contrib/llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp b/contrib/llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp
index 5b6bc04cc1c2..539cc6f0baf5 100644
--- a/contrib/llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp
@@ -36,7 +36,7 @@
// evaluated each time through the tail recursion. Safely keeping allocas
// in the entry block requires analysis to proves that the tail-called
// function does not read or write the stack object.
-// 2. Tail recursion is only performed if the call immediately preceeds the
+// 2. Tail recursion is only performed if the call immediately precedes the
// return instruction. It's possible that there could be a jump between
// the call and the return.
// 3. There can be intervening operations between the call and the return that
@@ -433,7 +433,7 @@ bool TailCallElim::EliminateRecursiveTailCall(CallInst *CI, ReturnInst *Ret,
if (CanMoveAboveCall(BBI, CI)) continue;
// If we can't move the instruction above the call, it might be because it
- // is an associative and commutative operation that could be tranformed
+ // is an associative and commutative operation that could be transformed
// using accumulator recursion elimination. Check to see if this is the
// case, and if so, remember the initial accumulator value for later.
if ((AccumulatorRecursionEliminationInitVal =
@@ -496,7 +496,7 @@ bool TailCallElim::EliminateRecursiveTailCall(CallInst *CI, ReturnInst *Ret,
Instruction *InsertPos = OldEntry->begin();
for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end();
I != E; ++I) {
- PHINode *PN = PHINode::Create(I->getType(),
+ PHINode *PN = PHINode::Create(I->getType(), 2,
I->getName() + ".tr", InsertPos);
I->replaceAllUsesWith(PN); // Everyone use the PHI node now!
PN->addIncoming(I, NewEntry);
@@ -527,8 +527,10 @@ bool TailCallElim::EliminateRecursiveTailCall(CallInst *CI, ReturnInst *Ret,
if (AccumulatorRecursionEliminationInitVal) {
Instruction *AccRecInstr = AccumulatorRecursionInstr;
// Start by inserting a new PHI node for the accumulator.
+ pred_iterator PB = pred_begin(OldEntry), PE = pred_end(OldEntry);
PHINode *AccPN =
PHINode::Create(AccumulatorRecursionEliminationInitVal->getType(),
+ std::distance(PB, PE) + 1,
"accumulator.tr", OldEntry->begin());
// Loop over all of the predecessors of the tail recursion block. For the
@@ -537,8 +539,7 @@ bool TailCallElim::EliminateRecursiveTailCall(CallInst *CI, ReturnInst *Ret,
// other tail recursions eliminated) the accumulator is not modified.
// Because we haven't added the branch in the current block to OldEntry yet,
// it will not show up as a predecessor.
- for (pred_iterator PI = pred_begin(OldEntry), PE = pred_end(OldEntry);
- PI != PE; ++PI) {
+ for (pred_iterator PI = PB; PI != PE; ++PI) {
BasicBlock *P = *PI;
if (P == &F->getEntryBlock())
AccPN->addIncoming(AccumulatorRecursionEliminationInitVal, P);
@@ -572,7 +573,9 @@ bool TailCallElim::EliminateRecursiveTailCall(CallInst *CI, ReturnInst *Ret,
// Now that all of the PHI nodes are in place, remove the call and
// ret instructions, replacing them with an unconditional branch.
- BranchInst::Create(OldEntry, Ret);
+ BranchInst *NewBI = BranchInst::Create(OldEntry, Ret);
+ NewBI->setDebugLoc(CI->getDebugLoc());
+
BB->getInstList().erase(Ret); // Remove return.
BB->getInstList().erase(CI); // Remove call.
++NumEliminated;
diff --git a/contrib/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp b/contrib/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp
index acaea195e710..c705cc51094a 100644
--- a/contrib/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp
@@ -447,7 +447,7 @@ BasicBlock *llvm::SplitBlockPredecessors(BasicBlock *BB,
// If the values coming into the block are not the same, we need a PHI.
// Create the new PHI node, insert it into NewBB at the end of the block
PHINode *NewPHI =
- PHINode::Create(PN->getType(), PN->getName()+".ph", BI);
+ PHINode::Create(PN->getType(), NumPreds, PN->getName()+".ph", BI);
if (AA) AA->copyValue(PN, NewPHI);
// Move all of the PHI values for 'Preds' to the new PHI.
@@ -538,3 +538,15 @@ ReturnInst *llvm::FoldReturnIntoUncondBranch(ReturnInst *RI, BasicBlock *BB,
UncondBranch->eraseFromParent();
return cast<ReturnInst>(NewRet);
}
+
+/// GetFirstDebugLocInBasicBlock - Return first valid DebugLoc entry in a
+/// given basic block.
+DebugLoc llvm::GetFirstDebugLocInBasicBlock(const BasicBlock *BB) {
+ for (BasicBlock::const_iterator BI = BB->begin(), BE = BB->end();
+ BI != BE; ++BI) {
+ DebugLoc DL = BI->getDebugLoc();
+ if (!DL.isUnknown())
+ return DL;
+ }
+ return DebugLoc();
+}
diff --git a/contrib/llvm/lib/Transforms/Utils/BreakCriticalEdges.cpp b/contrib/llvm/lib/Transforms/Utils/BreakCriticalEdges.cpp
index 616b066b5ab1..caf2aeb4d30a 100644
--- a/contrib/llvm/lib/Transforms/Utils/BreakCriticalEdges.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/BreakCriticalEdges.cpp
@@ -56,7 +56,7 @@ char BreakCriticalEdges::ID = 0;
INITIALIZE_PASS(BreakCriticalEdges, "break-crit-edges",
"Break critical edges in CFG", false, false)
-// Publically exposed interface to pass...
+// Publicly exposed interface to pass...
char &llvm::BreakCriticalEdgesID = BreakCriticalEdges::ID;
FunctionPass *llvm::createBreakCriticalEdgesPass() {
return new BreakCriticalEdges();
@@ -140,7 +140,7 @@ static void CreatePHIsForSplitLoopExit(SmallVectorImpl<BasicBlock *> &Preds,
if (VP->getParent() == SplitBB)
continue;
// Otherwise a new PHI is needed. Create one and populate it.
- PHINode *NewPN = PHINode::Create(PN->getType(), "split",
+ PHINode *NewPN = PHINode::Create(PN->getType(), Preds.size(), "split",
SplitBB->getTerminator());
for (unsigned i = 0, e = Preds.size(); i != e; ++i)
NewPN->addIncoming(V, Preds[i]);
diff --git a/contrib/llvm/lib/Transforms/Utils/CodeExtractor.cpp b/contrib/llvm/lib/Transforms/Utils/CodeExtractor.cpp
index e6337722c8bd..8c133ea7f560 100644
--- a/contrib/llvm/lib/Transforms/Utils/CodeExtractor.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/CodeExtractor.cpp
@@ -104,7 +104,7 @@ namespace {
/// region, we need to split the entry block of the region so that the PHI node
/// is easier to deal with.
void CodeExtractor::severSplitPHINodes(BasicBlock *&Header) {
- bool HasPredsFromRegion = false;
+ unsigned NumPredsFromRegion = 0;
unsigned NumPredsOutsideRegion = 0;
if (Header != &Header->getParent()->getEntryBlock()) {
@@ -116,7 +116,7 @@ void CodeExtractor::severSplitPHINodes(BasicBlock *&Header) {
// header block into two.
for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
if (BlocksToExtract.count(PN->getIncomingBlock(i)))
- HasPredsFromRegion = true;
+ ++NumPredsFromRegion;
else
++NumPredsOutsideRegion;
@@ -147,7 +147,7 @@ void CodeExtractor::severSplitPHINodes(BasicBlock *&Header) {
// Okay, now we need to adjust the PHI nodes and any branches from within the
// region to go to the new header block instead of the old header block.
- if (HasPredsFromRegion) {
+ if (NumPredsFromRegion) {
PHINode *PN = cast<PHINode>(OldPred->begin());
// Loop over all of the predecessors of OldPred that are in the region,
// changing them to branch to NewBB instead.
@@ -157,14 +157,14 @@ void CodeExtractor::severSplitPHINodes(BasicBlock *&Header) {
TI->replaceUsesOfWith(OldPred, NewBB);
}
- // Okay, everthing within the region is now branching to the right block, we
+ // Okay, everything within the region is now branching to the right block, we
// just have to update the PHI nodes now, inserting PHI nodes into NewBB.
for (AfterPHIs = OldPred->begin(); isa<PHINode>(AfterPHIs); ++AfterPHIs) {
PHINode *PN = cast<PHINode>(AfterPHIs);
// Create a new PHI node in the new region, which has an incoming value
// from OldPred of PN.
- PHINode *NewPN = PHINode::Create(PN->getType(), PN->getName()+".ce",
- NewBB->begin());
+ PHINode *NewPN = PHINode::Create(PN->getType(), 1 + NumPredsFromRegion,
+ PN->getName()+".ce", NewBB->begin());
NewPN->addIncoming(PN, OldPred);
// Loop over all of the incoming value in PN, moving them to NewPN if they
diff --git a/contrib/llvm/lib/Transforms/Utils/InlineFunction.cpp b/contrib/llvm/lib/Transforms/Utils/InlineFunction.cpp
index c1faf2411331..7d179092c063 100644
--- a/contrib/llvm/lib/Transforms/Utils/InlineFunction.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/InlineFunction.cpp
@@ -320,7 +320,7 @@ static Value *HandleByValArgument(Value *Arg, Instruction *TheCall,
//
// Note that this only does one level of inlining. For example, if the
// instruction 'call B' is inlined, and 'B' calls 'C', then the call to 'C' now
-// exists in the instruction stream. Similiarly this will inline a recursive
+// exists in the instruction stream. Similarly this will inline a recursive
// function by one level.
//
bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI) {
@@ -624,7 +624,7 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI) {
// The PHI node should go at the front of the new basic block to merge all
// possible incoming values.
if (!TheCall->use_empty()) {
- PHI = PHINode::Create(RTy, TheCall->getName(),
+ PHI = PHINode::Create(RTy, Returns.size(), TheCall->getName(),
AfterCallBB->begin());
// Anything that used the result of the function call should now use the
// PHI node as their operand.
diff --git a/contrib/llvm/lib/Transforms/Utils/LCSSA.cpp b/contrib/llvm/lib/Transforms/Utils/LCSSA.cpp
index b2e5fa6d7e3a..b654111eba74 100644
--- a/contrib/llvm/lib/Transforms/Utils/LCSSA.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/LCSSA.cpp
@@ -207,6 +207,8 @@ bool LCSSA::ProcessInstruction(Instruction *Inst,
DomTreeNode *DomNode = DT->getNode(DomBB);
+ SmallVector<PHINode*, 16> AddedPHIs;
+
SSAUpdater SSAUpdate;
SSAUpdate.Initialize(Inst->getType(), Inst->getName());
@@ -220,9 +222,10 @@ bool LCSSA::ProcessInstruction(Instruction *Inst,
// If we already inserted something for this BB, don't reprocess it.
if (SSAUpdate.HasValueForBlock(ExitBB)) continue;
- PHINode *PN = PHINode::Create(Inst->getType(), Inst->getName()+".lcssa",
+ PHINode *PN = PHINode::Create(Inst->getType(),
+ PredCache.GetNumPreds(ExitBB),
+ Inst->getName()+".lcssa",
ExitBB->begin());
- PN->reserveOperandSpace(PredCache.GetNumPreds(ExitBB));
// Add inputs from inside the loop for this PHI.
for (BasicBlock **PI = PredCache.GetPreds(ExitBB); *PI; ++PI) {
@@ -236,6 +239,8 @@ bool LCSSA::ProcessInstruction(Instruction *Inst,
&PN->getOperandUse(
PN->getOperandNumForIncomingValue(PN->getNumIncomingValues()-1)));
}
+
+ AddedPHIs.push_back(PN);
// Remember that this phi makes the value alive in this block.
SSAUpdate.AddAvailableValue(ExitBB, PN);
@@ -262,6 +267,12 @@ bool LCSSA::ProcessInstruction(Instruction *Inst,
// Otherwise, do full PHI insertion.
SSAUpdate.RewriteUse(*UsesToRewrite[i]);
}
+
+ // Remove PHI nodes that did not have any uses rewritten.
+ for (unsigned i = 0, e = AddedPHIs.size(); i != e; ++i) {
+ if (AddedPHIs[i]->use_empty())
+ AddedPHIs[i]->eraseFromParent();
+ }
return true;
}
diff --git a/contrib/llvm/lib/Transforms/Utils/Local.cpp b/contrib/llvm/lib/Transforms/Utils/Local.cpp
index 3f789fa86589..4bca2fc1fb9d 100644
--- a/contrib/llvm/lib/Transforms/Utils/Local.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/Local.cpp
@@ -20,8 +20,11 @@
#include "llvm/Instructions.h"
#include "llvm/Intrinsics.h"
#include "llvm/IntrinsicInst.h"
+#include "llvm/Operator.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/Analysis/DebugInfo.h"
+#include "llvm/Analysis/DIBuilder.h"
#include "llvm/Analysis/Dominators.h"
#include "llvm/Analysis/ConstantFolding.h"
#include "llvm/Analysis/InstructionSimplify.h"
@@ -65,8 +68,7 @@ bool llvm::ConstantFoldTerminator(BasicBlock *BB) {
// Let the basic block know that we are letting go of it. Based on this,
// it will adjust it's PHI nodes.
- assert(BI->getParent() && "Terminator not inserted in block!");
- OldDest->removePredecessor(BI->getParent());
+ OldDest->removePredecessor(BB);
// Replace the conditional branch with an unconditional one.
BranchInst::Create(Destination, BI);
@@ -209,8 +211,18 @@ bool llvm::ConstantFoldTerminator(BasicBlock *BB) {
bool llvm::isInstructionTriviallyDead(Instruction *I) {
if (!I->use_empty() || isa<TerminatorInst>(I)) return false;
- // We don't want debug info removed by anything this general.
- if (isa<DbgInfoIntrinsic>(I)) return false;
+ // We don't want debug info removed by anything this general, unless
+ // debug info is empty.
+ if (DbgDeclareInst *DDI = dyn_cast<DbgDeclareInst>(I)) {
+ if (DDI->getAddress())
+ return false;
+ return true;
+ }
+ if (DbgValueInst *DVI = dyn_cast<DbgValueInst>(I)) {
+ if (DVI->getValue())
+ return false;
+ return true;
+ }
if (!I->mayHaveSideEffects()) return true;
@@ -320,8 +332,14 @@ bool llvm::SimplifyInstructionsInBlock(BasicBlock *BB, const TargetData *TD) {
BI = BB->begin();
continue;
}
-
+
+ if (Inst->isTerminator())
+ break;
+
+ WeakVH BIHandle(BI);
MadeChange |= RecursivelyDeleteTriviallyDeadInstructions(Inst);
+ if (BIHandle != BI)
+ BI = BB->begin();
}
return MadeChange;
}
@@ -632,6 +650,8 @@ bool llvm::EliminateDuplicatePHINodes(BasicBlock *BB) {
Hash ^= reinterpret_cast<uintptr_t>(static_cast<Value *>(*I));
Hash = (Hash << 7) | (Hash >> (sizeof(uintptr_t) * CHAR_BIT - 7));
}
+ // Avoid colliding with the DenseMap sentinels ~0 and ~0-1.
+ Hash >>= 1;
// If we've never seen this hash value before, it's a unique PHI.
std::pair<DenseMap<uintptr_t, PHINode *>::iterator, bool> Pair =
HashMap.insert(std::make_pair(Hash, PN));
@@ -753,3 +773,83 @@ unsigned llvm::getOrEnforceKnownAlignment(Value *V, unsigned PrefAlign,
return Align;
}
+///===---------------------------------------------------------------------===//
+/// Dbg Intrinsic utilities
+///
+
+/// Inserts a llvm.dbg.value instrinsic before the stores to an alloca'd value
+/// that has an associated llvm.dbg.decl intrinsic.
+bool llvm::ConvertDebugDeclareToDebugValue(DbgDeclareInst *DDI,
+ StoreInst *SI, DIBuilder &Builder) {
+ DIVariable DIVar(DDI->getVariable());
+ if (!DIVar.Verify())
+ return false;
+
+ Instruction *DbgVal =
+ Builder.insertDbgValueIntrinsic(SI->getOperand(0), 0,
+ DIVar, SI);
+
+ // Propagate any debug metadata from the store onto the dbg.value.
+ DebugLoc SIDL = SI->getDebugLoc();
+ if (!SIDL.isUnknown())
+ DbgVal->setDebugLoc(SIDL);
+ // Otherwise propagate debug metadata from dbg.declare.
+ else
+ DbgVal->setDebugLoc(DDI->getDebugLoc());
+ return true;
+}
+
+/// Inserts a llvm.dbg.value instrinsic before the stores to an alloca'd value
+/// that has an associated llvm.dbg.decl intrinsic.
+bool llvm::ConvertDebugDeclareToDebugValue(DbgDeclareInst *DDI,
+ LoadInst *LI, DIBuilder &Builder) {
+ DIVariable DIVar(DDI->getVariable());
+ if (!DIVar.Verify())
+ return false;
+
+ Instruction *DbgVal =
+ Builder.insertDbgValueIntrinsic(LI->getOperand(0), 0,
+ DIVar, LI);
+
+ // Propagate any debug metadata from the store onto the dbg.value.
+ DebugLoc LIDL = LI->getDebugLoc();
+ if (!LIDL.isUnknown())
+ DbgVal->setDebugLoc(LIDL);
+ // Otherwise propagate debug metadata from dbg.declare.
+ else
+ DbgVal->setDebugLoc(DDI->getDebugLoc());
+ return true;
+}
+
+/// LowerDbgDeclare - Lowers llvm.dbg.declare intrinsics into appropriate set
+/// of llvm.dbg.value intrinsics.
+bool llvm::LowerDbgDeclare(Function &F) {
+ DIBuilder DIB(*F.getParent());
+ SmallVector<DbgDeclareInst *, 4> Dbgs;
+ for (Function::iterator FI = F.begin(), FE = F.end(); FI != FE; ++FI)
+ for (BasicBlock::iterator BI = FI->begin(), BE = FI->end(); BI != BE; ++BI) {
+ if (DbgDeclareInst *DDI = dyn_cast<DbgDeclareInst>(BI))
+ Dbgs.push_back(DDI);
+ }
+ if (Dbgs.empty())
+ return false;
+
+ for (SmallVector<DbgDeclareInst *, 4>::iterator I = Dbgs.begin(),
+ E = Dbgs.end(); I != E; ++I) {
+ DbgDeclareInst *DDI = *I;
+ if (AllocaInst *AI = dyn_cast_or_null<AllocaInst>(DDI->getAddress())) {
+ bool RemoveDDI = true;
+ for (Value::use_iterator UI = AI->use_begin(), E = AI->use_end();
+ UI != E; ++UI)
+ if (StoreInst *SI = dyn_cast<StoreInst>(*UI))
+ ConvertDebugDeclareToDebugValue(DDI, SI, DIB);
+ else if (LoadInst *LI = dyn_cast<LoadInst>(*UI))
+ ConvertDebugDeclareToDebugValue(DDI, LI, DIB);
+ else
+ RemoveDDI = false;
+ if (RemoveDDI)
+ DDI->eraseFromParent();
+ }
+ }
+ return true;
+}
diff --git a/contrib/llvm/lib/Transforms/Utils/LoopSimplify.cpp b/contrib/llvm/lib/Transforms/Utils/LoopSimplify.cpp
index 246263026bb4..f02ffd20bca9 100644
--- a/contrib/llvm/lib/Transforms/Utils/LoopSimplify.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/LoopSimplify.cpp
@@ -115,7 +115,7 @@ INITIALIZE_PASS_DEPENDENCY(LoopInfo)
INITIALIZE_PASS_END(LoopSimplify, "loop-simplify",
"Canonicalize natural loops", true, false)
-// Publically exposed interface to pass...
+// Publicly exposed interface to pass...
char &llvm::LoopSimplifyID = LoopSimplify::ID;
Pass *llvm::createLoopSimplifyPass() { return new LoopSimplify(); }
@@ -648,9 +648,8 @@ LoopSimplify::InsertUniqueBackedgeBlock(Loop *L, BasicBlock *Preheader) {
// the backedge block which correspond to any PHI nodes in the header block.
for (BasicBlock::iterator I = Header->begin(); isa<PHINode>(I); ++I) {
PHINode *PN = cast<PHINode>(I);
- PHINode *NewPN = PHINode::Create(PN->getType(), PN->getName()+".be",
- BETerminator);
- NewPN->reserveOperandSpace(BackedgeBlocks.size());
+ PHINode *NewPN = PHINode::Create(PN->getType(), BackedgeBlocks.size(),
+ PN->getName()+".be", BETerminator);
if (AA) AA->copyValue(PN, NewPN);
// Loop over the PHI node, moving all entries except the one for the
diff --git a/contrib/llvm/lib/Transforms/Utils/LowerSwitch.cpp b/contrib/llvm/lib/Transforms/Utils/LowerSwitch.cpp
index 914a439718d4..ed733d393a11 100644
--- a/contrib/llvm/lib/Transforms/Utils/LowerSwitch.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/LowerSwitch.cpp
@@ -84,7 +84,7 @@ char LowerSwitch::ID = 0;
INITIALIZE_PASS(LowerSwitch, "lowerswitch",
"Lower SwitchInst's to branches", false, false)
-// Publically exposed interface to pass...
+// Publicly exposed interface to pass...
char &llvm::LowerSwitchID = LowerSwitch::ID;
// createLowerSwitchPass - Interface to this file...
FunctionPass *llvm::createLowerSwitchPass() {
diff --git a/contrib/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp b/contrib/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
index 778885723e66..50c9ae204a4c 100644
--- a/contrib/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
@@ -38,6 +38,7 @@
#include "llvm/Analysis/DIBuilder.h"
#include "llvm/Analysis/Dominators.h"
#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Transforms/Utils/Local.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallVector.h"
@@ -45,7 +46,6 @@
#include "llvm/ADT/STLExtras.h"
#include "llvm/Support/CFG.h"
#include <algorithm>
-#include <map>
#include <queue>
using namespace llvm;
@@ -103,7 +103,7 @@ bool llvm::isAllocaPromotable(const AllocaInst *AI) {
/// FindAllocaDbgDeclare - Finds the llvm.dbg.declare intrinsic describing the
/// alloca 'V', if any.
static DbgDeclareInst *FindAllocaDbgDeclare(Value *V) {
- if (MDNode *DebugNode = MDNode::getIfExists(V->getContext(), &V, 1))
+ if (MDNode *DebugNode = MDNode::getIfExists(V->getContext(), V))
for (Value::use_iterator UI = DebugNode->use_begin(),
E = DebugNode->use_end(); UI != E; ++UI)
if (DbgDeclareInst *DDI = dyn_cast<DbgDeclareInst>(*UI))
@@ -273,8 +273,6 @@ namespace {
LargeBlockInfo &LBI);
void PromoteSingleBlockAlloca(AllocaInst *AI, AllocaInfo &Info,
LargeBlockInfo &LBI);
- void ConvertDebugDeclareToDebugValue(DbgDeclareInst *DDI, StoreInst *SI);
-
void RenamePass(BasicBlock *BB, BasicBlock *Pred,
RenamePassData::ValVector &IncVals,
@@ -391,7 +389,9 @@ void PromoteMem2Reg::run() {
if (Info.UsingBlocks.empty()) {
// Record debuginfo for the store and remove the declaration's debuginfo.
if (DbgDeclareInst *DDI = Info.DbgDeclare) {
- ConvertDebugDeclareToDebugValue(DDI, Info.OnlyStore);
+ if (!DIB)
+ DIB = new DIBuilder(*DDI->getParent()->getParent()->getParent());
+ ConvertDebugDeclareToDebugValue(DDI, Info.OnlyStore, *DIB);
DDI->eraseFromParent();
}
// Remove the (now dead) store and alloca.
@@ -423,8 +423,11 @@ void PromoteMem2Reg::run() {
while (!AI->use_empty()) {
StoreInst *SI = cast<StoreInst>(AI->use_back());
// Record debuginfo for the store before removing it.
- if (DbgDeclareInst *DDI = Info.DbgDeclare)
- ConvertDebugDeclareToDebugValue(DDI, SI);
+ if (DbgDeclareInst *DDI = Info.DbgDeclare) {
+ if (!DIB)
+ DIB = new DIBuilder(*SI->getParent()->getParent()->getParent());
+ ConvertDebugDeclareToDebugValue(DDI, SI, *DIB);
+ }
SI->eraseFromParent();
LBI.deleteValue(SI);
}
@@ -944,28 +947,6 @@ void PromoteMem2Reg::PromoteSingleBlockAlloca(AllocaInst *AI, AllocaInfo &Info,
}
}
-// Inserts a llvm.dbg.value instrinsic before the stores to an alloca'd value
-// that has an associated llvm.dbg.decl intrinsic.
-void PromoteMem2Reg::ConvertDebugDeclareToDebugValue(DbgDeclareInst *DDI,
- StoreInst *SI) {
- DIVariable DIVar(DDI->getVariable());
- if (!DIVar.Verify())
- return;
-
- if (!DIB)
- DIB = new DIBuilder(*SI->getParent()->getParent()->getParent());
- Instruction *DbgVal = DIB->insertDbgValueIntrinsic(SI->getOperand(0), 0,
- DIVar, SI);
-
- // Propagate any debug metadata from the store onto the dbg.value.
- DebugLoc SIDL = SI->getDebugLoc();
- if (!SIDL.isUnknown())
- DbgVal->setDebugLoc(SIDL);
- // Otherwise propagate debug metadata from dbg.declare.
- else
- DbgVal->setDebugLoc(DDI->getDebugLoc());
-}
-
// QueuePhiNode - queues a phi-node to be added to a basic-block for a specific
// Alloca returns true if there wasn't already a phi-node for that variable
//
@@ -979,12 +960,11 @@ bool PromoteMem2Reg::QueuePhiNode(BasicBlock *BB, unsigned AllocaNo,
// Create a PhiNode using the dereferenced type... and add the phi-node to the
// BasicBlock.
- PN = PHINode::Create(Allocas[AllocaNo]->getAllocatedType(),
+ PN = PHINode::Create(Allocas[AllocaNo]->getAllocatedType(), getNumPreds(BB),
Allocas[AllocaNo]->getName() + "." + Twine(Version++),
BB->begin());
++NumPHIInsert;
PhiToAllocaMap[PN] = AllocaNo;
- PN->reserveOperandSpace(getNumPreds(BB));
if (AST && PN->getType()->isPointerTy())
AST->copyValue(PointerAllocaValues[AllocaNo], PN);
@@ -1076,8 +1056,11 @@ NextIteration:
// what value were we writing?
IncomingVals[ai->second] = SI->getOperand(0);
// Record debuginfo for the store before removing it.
- if (DbgDeclareInst *DDI = AllocaDbgDeclares[ai->second])
- ConvertDebugDeclareToDebugValue(DDI, SI);
+ if (DbgDeclareInst *DDI = AllocaDbgDeclares[ai->second]) {
+ if (!DIB)
+ DIB = new DIBuilder(*SI->getParent()->getParent()->getParent());
+ ConvertDebugDeclareToDebugValue(DDI, SI, *DIB);
+ }
BB->getInstList().erase(SI);
}
}
diff --git a/contrib/llvm/lib/Transforms/Utils/SSAUpdater.cpp b/contrib/llvm/lib/Transforms/Utils/SSAUpdater.cpp
index 3896d9851b26..2860c3e511a6 100644
--- a/contrib/llvm/lib/Transforms/Utils/SSAUpdater.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/SSAUpdater.cpp
@@ -12,6 +12,7 @@
//===----------------------------------------------------------------------===//
#define DEBUG_TYPE "ssaupdater"
+#include "llvm/Constants.h"
#include "llvm/Instructions.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/Analysis/InstructionSimplify.h"
@@ -20,8 +21,10 @@
#include "llvm/Support/CFG.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/SSAUpdater.h"
#include "llvm/Transforms/Utils/SSAUpdaterImpl.h"
+
using namespace llvm;
typedef DenseMap<BasicBlock*, Value*> AvailableValsTy;
@@ -170,8 +173,8 @@ Value *SSAUpdater::GetValueInMiddleOfBlock(BasicBlock *BB) {
}
// Ok, we have no way out, insert a new one now.
- PHINode *InsertedPHI = PHINode::Create(ProtoType, ProtoName, &BB->front());
- InsertedPHI->reserveOperandSpace(PredValues.size());
+ PHINode *InsertedPHI = PHINode::Create(ProtoType, PredValues.size(),
+ ProtoName, &BB->front());
// Fill in all the predecessors of the PHI.
for (unsigned i = 0, e = PredValues.size(); i != e; ++i)
@@ -184,6 +187,9 @@ Value *SSAUpdater::GetValueInMiddleOfBlock(BasicBlock *BB) {
return V;
}
+ // Set DebugLoc.
+ InsertedPHI->setDebugLoc(GetFirstDebugLocInBasicBlock(BB));
+
// If the client wants to know about all new instructions, tell it.
if (InsertedPHIs) InsertedPHIs->push_back(InsertedPHI);
@@ -289,9 +295,8 @@ public:
/// Reserve space for the operands but do not fill them in yet.
static Value *CreateEmptyPHI(BasicBlock *BB, unsigned NumPreds,
SSAUpdater *Updater) {
- PHINode *PHI = PHINode::Create(Updater->ProtoType, Updater->ProtoName,
- &BB->front());
- PHI->reserveOperandSpace(NumPreds);
+ PHINode *PHI = PHINode::Create(Updater->ProtoType, NumPreds,
+ Updater->ProtoName, &BB->front());
return PHI;
}
diff --git a/contrib/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/contrib/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
index c6708857cb56..18b857308e34 100644
--- a/contrib/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -37,6 +37,10 @@
#include <map>
using namespace llvm;
+static cl::opt<unsigned>
+PHINodeFoldingThreshold("phi-node-folding-threshold", cl::Hidden, cl::init(1),
+ cl::desc("Control the amount of phi node folding to perform (default = 1)"));
+
static cl::opt<bool>
DupRet("simplifycfg-dup-ret", cl::Hidden, cl::init(false),
cl::desc("Duplicate return instructions into unconditional branches"));
@@ -201,11 +205,20 @@ static Value *GetIfCondition(BasicBlock *BB, BasicBlock *&IfTrue,
/// which works well enough for us.
///
/// If AggressiveInsts is non-null, and if V does not dominate BB, we check to
-/// see if V (which must be an instruction) is cheap to compute and is
-/// non-trapping. If both are true, the instruction is inserted into the set
-/// and true is returned.
+/// see if V (which must be an instruction) and its recursive operands
+/// that do not dominate BB have a combined cost lower than CostRemaining and
+/// are non-trapping. If both are true, the instruction is inserted into the
+/// set and true is returned.
+///
+/// The cost for most non-trapping instructions is defined as 1 except for
+/// Select whose cost is 2.
+///
+/// After this function returns, CostRemaining is decreased by the cost of
+/// V plus its non-dominating operands. If that cost is greater than
+/// CostRemaining, false is returned and CostRemaining is undefined.
static bool DominatesMergePoint(Value *V, BasicBlock *BB,
- SmallPtrSet<Instruction*, 4> *AggressiveInsts) {
+ SmallPtrSet<Instruction*, 4> *AggressiveInsts,
+ unsigned &CostRemaining) {
Instruction *I = dyn_cast<Instruction>(V);
if (!I) {
// Non-instructions all dominate instructions, but not all constantexprs
@@ -232,12 +245,17 @@ static bool DominatesMergePoint(Value *V, BasicBlock *BB,
// instructions in the 'if region'.
if (AggressiveInsts == 0) return false;
+ // If we have seen this instruction before, don't count it again.
+ if (AggressiveInsts->count(I)) return true;
+
// Okay, it looks like the instruction IS in the "condition". Check to
// see if it's a cheap instruction to unconditionally compute, and if it
// only uses stuff defined outside of the condition. If so, hoist it out.
if (!I->isSafeToSpeculativelyExecute())
return false;
+ unsigned Cost = 0;
+
switch (I->getOpcode()) {
default: return false; // Cannot hoist this out safely.
case Instruction::Load:
@@ -246,11 +264,13 @@ static bool DominatesMergePoint(Value *V, BasicBlock *BB,
// predecessor.
if (PBB->getFirstNonPHIOrDbg() != I)
return false;
+ Cost = 1;
break;
case Instruction::GetElementPtr:
// GEPs are cheap if all indices are constant.
if (!cast<GetElementPtrInst>(I)->hasAllConstantIndices())
return false;
+ Cost = 1;
break;
case Instruction::Add:
case Instruction::Sub:
@@ -261,13 +281,26 @@ static bool DominatesMergePoint(Value *V, BasicBlock *BB,
case Instruction::LShr:
case Instruction::AShr:
case Instruction::ICmp:
+ case Instruction::Trunc:
+ case Instruction::ZExt:
+ case Instruction::SExt:
+ Cost = 1;
break; // These are all cheap and non-trapping instructions.
+
+ case Instruction::Select:
+ Cost = 2;
+ break;
}
- // Okay, we can only really hoist these out if their operands are not
- // defined in the conditional region.
+ if (Cost > CostRemaining)
+ return false;
+
+ CostRemaining -= Cost;
+
+ // Okay, we can only really hoist these out if their operands do
+ // not take us over the cost threshold.
for (User::op_iterator i = I->op_begin(), e = I->op_end(); i != e; ++i)
- if (!DominatesMergePoint(*i, BB, 0))
+ if (!DominatesMergePoint(*i, BB, AggressiveInsts, CostRemaining))
return false;
// Okay, it's safe to do this! Remember this instruction.
AggressiveInsts->insert(I);
@@ -807,12 +840,16 @@ static bool HoistThenElseCodeToIf(BranchInst *BI) {
BasicBlock::iterator BB2_Itr = BB2->begin();
Instruction *I1 = BB1_Itr++, *I2 = BB2_Itr++;
- while (isa<DbgInfoIntrinsic>(I1))
- I1 = BB1_Itr++;
- while (isa<DbgInfoIntrinsic>(I2))
- I2 = BB2_Itr++;
- if (I1->getOpcode() != I2->getOpcode() || isa<PHINode>(I1) ||
- !I1->isIdenticalToWhenDefined(I2) ||
+ // Skip debug info if it is not identical.
+ DbgInfoIntrinsic *DBI1 = dyn_cast<DbgInfoIntrinsic>(I1);
+ DbgInfoIntrinsic *DBI2 = dyn_cast<DbgInfoIntrinsic>(I2);
+ if (!DBI1 || !DBI2 || !DBI1->isIdenticalToWhenDefined(DBI2)) {
+ while (isa<DbgInfoIntrinsic>(I1))
+ I1 = BB1_Itr++;
+ while (isa<DbgInfoIntrinsic>(I2))
+ I2 = BB2_Itr++;
+ }
+ if (isa<PHINode>(I1) || !I1->isIdenticalToWhenDefined(I2) ||
(isa<InvokeInst>(I1) && !isSafeToHoistInvoke(BB1, BB2, I1, I2)))
return false;
@@ -835,13 +872,17 @@ static bool HoistThenElseCodeToIf(BranchInst *BI) {
I2->eraseFromParent();
I1 = BB1_Itr++;
- while (isa<DbgInfoIntrinsic>(I1))
- I1 = BB1_Itr++;
I2 = BB2_Itr++;
- while (isa<DbgInfoIntrinsic>(I2))
- I2 = BB2_Itr++;
- } while (I1->getOpcode() == I2->getOpcode() &&
- I1->isIdenticalToWhenDefined(I2));
+ // Skip debug info if it is not identical.
+ DbgInfoIntrinsic *DBI1 = dyn_cast<DbgInfoIntrinsic>(I1);
+ DbgInfoIntrinsic *DBI2 = dyn_cast<DbgInfoIntrinsic>(I2);
+ if (!DBI1 || !DBI2 || !DBI1->isIdenticalToWhenDefined(DBI2)) {
+ while (isa<DbgInfoIntrinsic>(I1))
+ I1 = BB1_Itr++;
+ while (isa<DbgInfoIntrinsic>(I2))
+ I2 = BB2_Itr++;
+ }
+ } while (I1->isIdenticalToWhenDefined(I2));
return true;
@@ -1209,6 +1250,8 @@ static bool FoldTwoEntryPHINode(PHINode *PN, const TargetData *TD) {
// instructions. While we are at it, keep track of the instructions
// that need to be moved to the dominating block.
SmallPtrSet<Instruction*, 4> AggressiveInsts;
+ unsigned MaxCostVal0 = PHINodeFoldingThreshold,
+ MaxCostVal1 = PHINodeFoldingThreshold;
for (BasicBlock::iterator II = BB->begin(); isa<PHINode>(II);) {
PHINode *PN = cast<PHINode>(II++);
@@ -1218,8 +1261,10 @@ static bool FoldTwoEntryPHINode(PHINode *PN, const TargetData *TD) {
continue;
}
- if (!DominatesMergePoint(PN->getIncomingValue(0), BB, &AggressiveInsts) ||
- !DominatesMergePoint(PN->getIncomingValue(1), BB, &AggressiveInsts))
+ if (!DominatesMergePoint(PN->getIncomingValue(0), BB, &AggressiveInsts,
+ MaxCostVal0) ||
+ !DominatesMergePoint(PN->getIncomingValue(1), BB, &AggressiveInsts,
+ MaxCostVal1))
return false;
}
@@ -1393,24 +1438,23 @@ static bool SimplifyCondBranchToTwoReturns(BranchInst *BI) {
return true;
}
-/// FoldBranchToCommonDest - If this basic block is ONLY a setcc and a branch,
-/// and if a predecessor branches to us and one of our successors, fold the
-/// setcc into the predecessor and use logical operations to pick the right
-/// destination.
+/// FoldBranchToCommonDest - If this basic block is simple enough, and if a
+/// predecessor branches to us and one of our successors, fold the block into
+/// the predecessor and use logical operations to pick the right destination.
bool llvm::FoldBranchToCommonDest(BranchInst *BI) {
BasicBlock *BB = BI->getParent();
Instruction *Cond = dyn_cast<Instruction>(BI->getCondition());
if (Cond == 0 || (!isa<CmpInst>(Cond) && !isa<BinaryOperator>(Cond)) ||
Cond->getParent() != BB || !Cond->hasOneUse())
return false;
-
+
// Only allow this if the condition is a simple instruction that can be
// executed unconditionally. It must be in the same block as the branch, and
// must be at the front of the block.
BasicBlock::iterator FrontIt = BB->front();
+
// Ignore dbg intrinsics.
- while (isa<DbgInfoIntrinsic>(FrontIt))
- ++FrontIt;
+ while (isa<DbgInfoIntrinsic>(FrontIt)) ++FrontIt;
// Allow a single instruction to be hoisted in addition to the compare
// that feeds the branch. We later ensure that any values that _it_ uses
@@ -1422,21 +1466,23 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI) {
FrontIt->isSafeToSpeculativelyExecute()) {
BonusInst = &*FrontIt;
++FrontIt;
+
+ // Ignore dbg intrinsics.
+ while (isa<DbgInfoIntrinsic>(FrontIt)) ++FrontIt;
}
-
+
// Only a single bonus inst is allowed.
if (&*FrontIt != Cond)
return false;
// Make sure the instruction after the condition is the cond branch.
BasicBlock::iterator CondIt = Cond; ++CondIt;
+
// Ingore dbg intrinsics.
- while(isa<DbgInfoIntrinsic>(CondIt))
- ++CondIt;
- if (&*CondIt != BI) {
- assert (!isa<DbgInfoIntrinsic>(CondIt) && "Hey do not forget debug info!");
+ while (isa<DbgInfoIntrinsic>(CondIt)) ++CondIt;
+
+ if (&*CondIt != BI)
return false;
- }
// Cond is known to be a compare or binary operator. Check to make sure that
// neither operand is a potentially-trapping constant expression.
@@ -1447,13 +1493,12 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI) {
if (CE->canTrap())
return false;
-
// Finally, don't infinitely unroll conditional loops.
BasicBlock *TrueDest = BI->getSuccessor(0);
BasicBlock *FalseDest = BI->getSuccessor(1);
if (TrueDest == BB || FalseDest == BB)
return false;
-
+
for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) {
BasicBlock *PredBlock = *PI;
BranchInst *PBI = dyn_cast<BranchInst>(PredBlock->getTerminator());
@@ -1461,10 +1506,24 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI) {
// Check that we have two conditional branches. If there is a PHI node in
// the common successor, verify that the same value flows in from both
// blocks.
- if (PBI == 0 || PBI->isUnconditional() ||
- !SafeToMergeTerminators(BI, PBI))
+ if (PBI == 0 || PBI->isUnconditional() || !SafeToMergeTerminators(BI, PBI))
continue;
+ // Determine if the two branches share a common destination.
+ Instruction::BinaryOps Opc;
+ bool InvertPredCond = false;
+
+ if (PBI->getSuccessor(0) == TrueDest)
+ Opc = Instruction::Or;
+ else if (PBI->getSuccessor(1) == FalseDest)
+ Opc = Instruction::And;
+ else if (PBI->getSuccessor(0) == FalseDest)
+ Opc = Instruction::And, InvertPredCond = true;
+ else if (PBI->getSuccessor(1) == TrueDest)
+ Opc = Instruction::Or, InvertPredCond = true;
+ else
+ continue;
+
// Ensure that any values used in the bonus instruction are also used
// by the terminator of the predecessor. This means that those values
// must already have been resolved, so we won't be inhibiting the
@@ -1502,20 +1561,6 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI) {
if (!UsedValues.empty()) return false;
}
-
- Instruction::BinaryOps Opc;
- bool InvertPredCond = false;
-
- if (PBI->getSuccessor(0) == TrueDest)
- Opc = Instruction::Or;
- else if (PBI->getSuccessor(1) == FalseDest)
- Opc = Instruction::And;
- else if (PBI->getSuccessor(0) == FalseDest)
- Opc = Instruction::And, InvertPredCond = true;
- else if (PBI->getSuccessor(1) == TrueDest)
- Opc = Instruction::Or, InvertPredCond = true;
- else
- continue;
DEBUG(dbgs() << "FOLDING BRANCH TO COMMON DEST:\n" << *PBI << *BB);
@@ -1566,6 +1611,12 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI) {
AddPredecessorToBlock(FalseDest, PredBlock, BB);
PBI->setSuccessor(1, FalseDest);
}
+
+ // Copy any debug value intrinsics into the end of PredBlock.
+ for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I)
+ if (isa<DbgInfoIntrinsic>(*I))
+ I->clone()->insertBefore(PBI);
+
return true;
}
return false;
@@ -1598,13 +1649,15 @@ static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI) {
// in the constant and simplify the block result. Subsequent passes of
// simplifycfg will thread the block.
if (BlockIsSimpleEnoughToThreadThrough(BB)) {
+ pred_iterator PB = pred_begin(BB), PE = pred_end(BB);
PHINode *NewPN = PHINode::Create(Type::getInt1Ty(BB->getContext()),
+ std::distance(PB, PE),
BI->getCondition()->getName() + ".pr",
BB->begin());
// Okay, we're going to insert the PHI node. Since PBI is not the only
// predecessor, compute the PHI'd conditional value for all of the preds.
// Any predecessor where the condition is not computable we keep symbolic.
- for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) {
+ for (pred_iterator PI = PB; PI != PE; ++PI) {
BasicBlock *P = *PI;
if ((PBI = dyn_cast<BranchInst>(P->getTerminator())) &&
PBI != BI && PBI->isConditional() &&
@@ -1800,6 +1853,26 @@ static bool SimplifyTerminatorOnSelect(TerminatorInst *OldTerm, Value *Cond,
return true;
}
+// SimplifySwitchOnSelect - Replaces
+// (switch (select cond, X, Y)) on constant X, Y
+// with a branch - conditional if X and Y lead to distinct BBs,
+// unconditional otherwise.
+static bool SimplifySwitchOnSelect(SwitchInst *SI, SelectInst *Select) {
+ // Check for constant integer values in the select.
+ ConstantInt *TrueVal = dyn_cast<ConstantInt>(Select->getTrueValue());
+ ConstantInt *FalseVal = dyn_cast<ConstantInt>(Select->getFalseValue());
+ if (!TrueVal || !FalseVal)
+ return false;
+
+ // Find the relevant condition and destinations.
+ Value *Condition = Select->getCondition();
+ BasicBlock *TrueBB = SI->getSuccessor(SI->findCaseValue(TrueVal));
+ BasicBlock *FalseBB = SI->getSuccessor(SI->findCaseValue(FalseVal));
+
+ // Perform the actual simplification.
+ return SimplifyTerminatorOnSelect(SI, Condition, TrueBB, FalseBB);
+}
+
// SimplifyIndirectBrOnSelect - Replaces
// (indirectbr (select cond, blockaddress(@fn, BlockA),
// blockaddress(@fn, BlockB)))
@@ -2148,7 +2221,9 @@ bool SimplifyCFGOpt::SimplifyUnreachable(UnreachableInst *UI) {
if (LI->isVolatile())
break;
- // Delete this instruction
+ // Delete this instruction (any uses are guaranteed to be dead)
+ if (!BBI->use_empty())
+ BBI->replaceAllUsesWith(UndefValue::get(BBI->getType()));
BBI->eraseFromParent();
Changed = true;
}
@@ -2189,17 +2264,28 @@ bool SimplifyCFGOpt::SimplifyUnreachable(UnreachableInst *UI) {
// If the default value is unreachable, figure out the most popular
// destination and make it the default.
if (SI->getSuccessor(0) == BB) {
- std::map<BasicBlock*, unsigned> Popularity;
- for (unsigned i = 1, e = SI->getNumCases(); i != e; ++i)
- Popularity[SI->getSuccessor(i)]++;
-
+ std::map<BasicBlock*, std::pair<unsigned, unsigned> > Popularity;
+ for (unsigned i = 1, e = SI->getNumCases(); i != e; ++i) {
+ std::pair<unsigned, unsigned>& entry =
+ Popularity[SI->getSuccessor(i)];
+ if (entry.first == 0) {
+ entry.first = 1;
+ entry.second = i;
+ } else {
+ entry.first++;
+ }
+ }
+
// Find the most popular block.
unsigned MaxPop = 0;
+ unsigned MaxIndex = 0;
BasicBlock *MaxBlock = 0;
- for (std::map<BasicBlock*, unsigned>::iterator
+ for (std::map<BasicBlock*, std::pair<unsigned, unsigned> >::iterator
I = Popularity.begin(), E = Popularity.end(); I != E; ++I) {
- if (I->second > MaxPop) {
- MaxPop = I->second;
+ if (I->second.first > MaxPop ||
+ (I->second.first == MaxPop && MaxIndex > I->second.second)) {
+ MaxPop = I->second.first;
+ MaxIndex = I->second.second;
MaxBlock = I->first;
}
}
@@ -2309,7 +2395,12 @@ bool SimplifyCFGOpt::SimplifySwitch(SwitchInst *SI) {
if (BasicBlock *OnlyPred = BB->getSinglePredecessor())
if (SimplifyEqualityComparisonWithOnlyPredecessor(SI, OnlyPred))
return SimplifyCFG(BB) | true;
-
+
+ Value *Cond = SI->getCondition();
+ if (SelectInst *Select = dyn_cast<SelectInst>(Cond))
+ if (SimplifySwitchOnSelect(SI, Select))
+ return SimplifyCFG(BB) | true;
+
// If the block only contains the switch, see if we can fold the block
// away into any preds.
BasicBlock::iterator BBI = BB->begin();
diff --git a/contrib/llvm/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp b/contrib/llvm/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp
index ccb8287d7969..46d4adaaa154 100644
--- a/contrib/llvm/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp
@@ -116,7 +116,8 @@ bool UnifyFunctionExitNodes::runOnFunction(Function &F) {
ReturnInst::Create(F.getContext(), NULL, NewRetBlock);
} else {
// If the function doesn't return void... add a PHI node to the block...
- PN = PHINode::Create(F.getReturnType(), "UnifiedRetVal");
+ PN = PHINode::Create(F.getReturnType(), ReturningBlocks.size(),
+ "UnifiedRetVal");
NewRetBlock->getInstList().push_back(PN);
ReturnInst::Create(F.getContext(), PN, NewRetBlock);
}
diff --git a/contrib/llvm/lib/Transforms/Utils/ValueMapper.cpp b/contrib/llvm/lib/Transforms/Utils/ValueMapper.cpp
index f5481d31eb8a..a73bf0449813 100644
--- a/contrib/llvm/lib/Transforms/Utils/ValueMapper.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/ValueMapper.cpp
@@ -39,7 +39,7 @@ Value *llvm::MapValue(const Value *V, ValueToValueMapTy &VM,
return VM[V] = const_cast<Value*>(V);
// Create a dummy node in case we have a metadata cycle.
- MDNode *Dummy = MDNode::getTemporary(V->getContext(), 0, 0);
+ MDNode *Dummy = MDNode::getTemporary(V->getContext(), ArrayRef<Value*>());
VM[V] = Dummy;
// Check all operands to see if any need to be remapped.
@@ -54,7 +54,7 @@ Value *llvm::MapValue(const Value *V, ValueToValueMapTy &VM,
Value *Op = MD->getOperand(i);
Elts.push_back(Op ? MapValue(Op, VM, Flags) : 0);
}
- MDNode *NewMD = MDNode::get(V->getContext(), Elts.data(), Elts.size());
+ MDNode *NewMD = MDNode::get(V->getContext(), Elts);
Dummy->replaceAllUsesWith(NewMD);
VM[V] = NewMD;
MDNode::deleteTemporary(Dummy);