From d0e4e96dc17a6c1c6de3340842c80f0e187ba349 Mon Sep 17 00:00:00 2001
From: Dimitry Andric <dim@FreeBSD.org>
Date: Sat, 26 Feb 2011 22:03:50 +0000
Subject: Vendor import of llvm trunk r126547:
 http://llvm.org/svn/llvm-project/llvm/trunk@126547

---
 lib/Analysis/DIBuilder.cpp                         |  154 +--
 lib/Analysis/InstructionSimplify.cpp               |   10 +
 lib/CodeGen/AllocationOrder.h                      |    2 +
 lib/CodeGen/AsmPrinter/AsmPrinter.cpp              |   10 +-
 lib/CodeGen/AsmPrinter/DwarfDebug.cpp              |    9 +-
 lib/CodeGen/BranchFolding.cpp                      |    6 +-
 lib/CodeGen/InlineSpiller.cpp                      |   20 +-
 lib/CodeGen/LowerSubregs.cpp                       |   10 +-
 lib/CodeGen/MachineFunction.cpp                    |   12 +-
 lib/CodeGen/MachineRegisterInfo.cpp                |    9 +-
 lib/CodeGen/RegAllocBase.h                         |   15 +-
 lib/CodeGen/RegAllocBasic.cpp                      |   60 +-
 lib/CodeGen/RegAllocGreedy.cpp                     |  197 ++-
 lib/CodeGen/SelectionDAG/DAGCombiner.cpp           |  133 +-
 lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp  |  118 ++
 lib/CodeGen/SelectionDAG/LegalizeDAG.cpp           |   49 +-
 lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp    |   10 +-
 lib/CodeGen/SelectionDAG/SelectionDAG.cpp          |   19 +-
 lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp   |   31 +-
 lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h     |    2 +-
 lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp      |   55 +-
 lib/CodeGen/SelectionDAG/TargetLowering.cpp        |   26 +-
 lib/CodeGen/SplitKit.cpp                           |   18 +-
 lib/CodeGen/SplitKit.h                             |    7 +
 lib/CodeGen/TargetLoweringObjectFileImpl.cpp       |   30 +-
 lib/CodeGen/VirtRegRewriter.cpp                    |    6 +-
 lib/MC/ELFObjectWriter.cpp                         |   10 +-
 lib/MC/MCAsmInfo.cpp                               |    1 +
 lib/MC/MCAsmInfoDarwin.cpp                         |    1 +
 lib/MC/MCDisassembler/EDOperand.cpp                |   20 +-
 lib/MC/MCDisassembler/EDToken.cpp                  |    4 +
 lib/MC/MCObjectStreamer.cpp                        |   18 +-
 lib/MC/MCParser/AsmParser.cpp                      |    2 +
 lib/MC/MCParser/ELFAsmParser.cpp                   |   12 +-
 lib/MC/MCParser/MCAsmParserExtension.cpp           |    3 +-
 lib/MC/MCSectionMachO.cpp                          |   17 +-
 lib/MC/MCStreamer.cpp                              |    4 +-
 lib/Support/APInt.cpp                              |    2 +-
 lib/Target/ARM/ARMBaseInstrInfo.h                  |    3 +-
 lib/Target/ARM/ARMFastISel.cpp                     |   38 +-
 lib/Target/ARM/ARMFrameLowering.cpp                |   16 +-
 lib/Target/ARM/ARMHazardRecognizer.cpp             |   17 +-
 lib/Target/ARM/ARMISelDAGToDAG.cpp                 |   15 +
 lib/Target/ARM/ARMISelLowering.cpp                 |  102 +-
 lib/Target/ARM/ARMInstrFormats.td                  |    7 +-
 lib/Target/ARM/ARMInstrInfo.td                     |    4 +-
 lib/Target/ARM/ARMInstrNEON.td                     |   31 +-
 lib/Target/ARM/ARMInstrVFP.td                      |  162 +--
 lib/Target/ARM/ARMSubtarget.cpp                    |    4 +-
 lib/Target/ARM/MLxExpansionPass.cpp                |   20 +-
 lib/Target/ARM/NEONMoveFix.cpp                     |    9 +-
 lib/Target/ARM/Thumb2InstrInfo.cpp                 |    6 +
 lib/Target/Alpha/AlphaISelLowering.cpp             |    1 -
 lib/Target/Alpha/AlphaISelLowering.h               |   14 +-
 lib/Target/Blackfin/BlackfinISelLowering.cpp       |    1 -
 lib/Target/Blackfin/BlackfinISelLowering.h         |    1 +
 lib/Target/CellSPU/SPUISelLowering.cpp             |    7 +-
 lib/Target/CellSPU/SPUISelLowering.h               |    6 +-
 lib/Target/MBlaze/MBlazeISelLowering.cpp           |    4 +-
 lib/Target/MSP430/MSP430ISelLowering.cpp           |    6 +-
 lib/Target/MSP430/MSP430ISelLowering.h             |    2 +
 lib/Target/PowerPC/PPCISelLowering.cpp             |   25 +-
 lib/Target/PowerPC/PPCISelLowering.h               |   94 +-
 lib/Target/README.txt                              |   24 +
 lib/Target/Sparc/DelaySlotFiller.cpp               |   31 +
 lib/Target/Sparc/SparcISelLowering.cpp             |   43 +-
 lib/Target/Sparc/SparcISelLowering.h               |    2 +
 lib/Target/Sparc/SparcInstrInfo.td                 |   18 +-
 lib/Target/SystemZ/SystemZISelLowering.cpp         |    3 -
 lib/Target/SystemZ/SystemZISelLowering.h           |    2 +
 lib/Target/X86/AsmParser/X86AsmParser.cpp          |   13 +
 lib/Target/X86/Disassembler/X86Disassembler.cpp    |    8 +-
 .../X86/Disassembler/X86DisassemblerDecoder.h      |    2 +-
 lib/Target/X86/README.txt                          |   82 +-
 lib/Target/X86/X86FastISel.cpp                     |    8 +-
 lib/Target/X86/X86ISelLowering.cpp                 |   50 +-
 lib/Target/X86/X86ISelLowering.h                   |   18 +-
 lib/Target/X86/X86InstrFormats.td                  |    2 +
 lib/Target/X86/X86InstrInfo.cpp                    |    4 +-
 lib/Target/X86/X86InstrInfo.h                      |    4 +
 lib/Target/X86/X86InstrInfo.td                     |    3 +
 lib/Target/X86/X86InstrSystem.td                   |    5 +
 lib/Target/X86/X86MCCodeEmitter.cpp                |    8 +
 lib/Target/X86/X86Subtarget.cpp                    |    7 +-
 lib/Target/X86/X86Subtarget.h                      |    2 +
 lib/Target/XCore/XCoreISelLowering.cpp             |  125 +-
 lib/Target/XCore/XCoreISelLowering.h               |   23 +-
 lib/Target/XCore/XCoreInstrInfo.td                 |   55 +-
 lib/Transforms/InstCombine/InstCombineAndOrXor.cpp |   33 +
 lib/Transforms/InstCombine/InstCombineCalls.cpp    |   20 +-
 lib/Transforms/Scalar/LoopDeletion.cpp             |   23 +-
 lib/Transforms/Scalar/LoopIdiomRecognize.cpp       |   18 +-
 lib/Transforms/Scalar/SimplifyLibCalls.cpp         | 1343 ++++++++++----------
 lib/Transforms/Utils/Local.cpp                     |   42 +-
 lib/Transforms/Utils/PromoteMemoryToRegister.cpp   |   13 +-
 lib/Transforms/Utils/SimplifyCFG.cpp               |    5 +
 96 files changed, 2250 insertions(+), 1496 deletions(-)

(limited to 'lib')

diff --git a/lib/Analysis/DIBuilder.cpp b/lib/Analysis/DIBuilder.cpp
index c1072df72925..590a9c17a8fa 100644
--- a/lib/Analysis/DIBuilder.cpp
+++ b/lib/Analysis/DIBuilder.cpp
@@ -31,9 +31,9 @@ static Constant *GetTagConstant(LLVMContext &VMContext, unsigned Tag) {
 DIBuilder::DIBuilder(Module &m)
   : M(m), VMContext(M.getContext()), TheCU(0), DeclareFn(0), ValueFn(0) {}
 
-/// CreateCompileUnit - A CompileUnit provides an anchor for all debugging
+/// createCompileUnit - A CompileUnit provides an anchor for all debugging
 /// information generated during this instance of compilation.
-void DIBuilder::CreateCompileUnit(unsigned Lang, StringRef Filename, 
+void DIBuilder::createCompileUnit(unsigned Lang, StringRef Filename, 
                                   StringRef Directory, StringRef Producer, 
                                   bool isOptimized, StringRef Flags, 
                                   unsigned RunTimeVer) {
@@ -53,9 +53,9 @@ void DIBuilder::CreateCompileUnit(unsigned Lang, StringRef Filename,
   TheCU = DICompileUnit(MDNode::get(VMContext, &Elts[0], array_lengthof(Elts)));
 }
 
-/// CreateFile - Create a file descriptor to hold debugging information
+/// createFile - Create a file descriptor to hold debugging information
 /// for a file.
-DIFile DIBuilder::CreateFile(StringRef Filename, StringRef Directory) {
+DIFile DIBuilder::createFile(StringRef Filename, StringRef Directory) {
   assert(TheCU && "Unable to create DW_TAG_file_type without CompileUnit");
   Value *Elts[] = {
     GetTagConstant(VMContext, dwarf::DW_TAG_file_type),
@@ -66,8 +66,8 @@ DIFile DIBuilder::CreateFile(StringRef Filename, StringRef Directory) {
   return DIFile(MDNode::get(VMContext, &Elts[0], array_lengthof(Elts)));
 }
 
-/// CreateEnumerator - Create a single enumerator value.
-DIEnumerator DIBuilder::CreateEnumerator(StringRef Name, uint64_t Val) {
+/// createEnumerator - Create a single enumerator value.
+DIEnumerator DIBuilder::createEnumerator(StringRef Name, uint64_t Val) {
   Value *Elts[] = {
     GetTagConstant(VMContext, dwarf::DW_TAG_enumerator),
     MDString::get(VMContext, Name),
@@ -76,9 +76,9 @@ DIEnumerator DIBuilder::CreateEnumerator(StringRef Name, uint64_t Val) {
   return DIEnumerator(MDNode::get(VMContext, &Elts[0], array_lengthof(Elts)));
 }
 
-/// CreateBasicType - Create debugging information entry for a basic 
+/// createBasicType - Create debugging information entry for a basic 
 /// type, e.g 'char'.
-DIType DIBuilder::CreateBasicType(StringRef Name, uint64_t SizeInBits, 
+DIType DIBuilder::createBasicType(StringRef Name, uint64_t SizeInBits, 
                                   uint64_t AlignInBits,
                                   unsigned Encoding) {
   // Basic types are encoded in DIBasicType format. Line number, filename,
@@ -98,9 +98,9 @@ DIType DIBuilder::CreateBasicType(StringRef Name, uint64_t SizeInBits,
   return DIType(MDNode::get(VMContext, &Elts[0], array_lengthof(Elts)));
 }
 
-/// CreateQaulifiedType - Create debugging information entry for a qualified
+/// createQaulifiedType - Create debugging information entry for a qualified
 /// type, e.g. 'const int'.
-DIType DIBuilder::CreateQualifiedType(unsigned Tag, DIType FromTy) {
+DIType DIBuilder::createQualifiedType(unsigned Tag, DIType FromTy) {
   // Qualified types are encoded in DIDerivedType format.
   Value *Elts[] = {
     GetTagConstant(VMContext, Tag),
@@ -117,8 +117,8 @@ DIType DIBuilder::CreateQualifiedType(unsigned Tag, DIType FromTy) {
   return DIType(MDNode::get(VMContext, &Elts[0], array_lengthof(Elts)));
 }
 
-/// CreatePointerType - Create debugging information entry for a pointer.
-DIType DIBuilder::CreatePointerType(DIType PointeeTy, uint64_t SizeInBits,
+/// createPointerType - Create debugging information entry for a pointer.
+DIType DIBuilder::createPointerType(DIType PointeeTy, uint64_t SizeInBits,
                                     uint64_t AlignInBits, StringRef Name) {
   // Pointer types are encoded in DIDerivedType format.
   Value *Elts[] = {
@@ -136,8 +136,8 @@ DIType DIBuilder::CreatePointerType(DIType PointeeTy, uint64_t SizeInBits,
   return DIType(MDNode::get(VMContext, &Elts[0], array_lengthof(Elts)));
 }
 
-/// CreateReferenceType - Create debugging information entry for a reference.
-DIType DIBuilder::CreateReferenceType(DIType RTy) {
+/// createReferenceType - Create debugging information entry for a reference.
+DIType DIBuilder::createReferenceType(DIType RTy) {
   // References are encoded in DIDerivedType format.
   Value *Elts[] = {
     GetTagConstant(VMContext, dwarf::DW_TAG_reference_type),
@@ -154,8 +154,8 @@ DIType DIBuilder::CreateReferenceType(DIType RTy) {
   return DIType(MDNode::get(VMContext, &Elts[0], array_lengthof(Elts)));
 }
 
-/// CreateTypedef - Create debugging information entry for a typedef.
-DIType DIBuilder::CreateTypedef(DIType Ty, StringRef Name, DIFile File,
+/// createTypedef - Create debugging information entry for a typedef.
+DIType DIBuilder::createTypedef(DIType Ty, StringRef Name, DIFile File,
                                 unsigned LineNo) {
   // typedefs are encoded in DIDerivedType format.
   assert(Ty.Verify() && "Invalid typedef type!");
@@ -174,8 +174,8 @@ DIType DIBuilder::CreateTypedef(DIType Ty, StringRef Name, DIFile File,
   return DIType(MDNode::get(VMContext, &Elts[0], array_lengthof(Elts)));
 }
 
-/// CreateFriend - Create debugging information entry for a 'friend'.
-DIType DIBuilder::CreateFriend(DIType Ty, DIType FriendTy) {
+/// createFriend - Create debugging information entry for a 'friend'.
+DIType DIBuilder::createFriend(DIType Ty, DIType FriendTy) {
   // typedefs are encoded in DIDerivedType format.
   assert(Ty.Verify() && "Invalid type!");
   assert(FriendTy.Verify() && "Invalid friend type!");
@@ -194,9 +194,9 @@ DIType DIBuilder::CreateFriend(DIType Ty, DIType FriendTy) {
   return DIType(MDNode::get(VMContext, &Elts[0], array_lengthof(Elts)));
 }
 
-/// CreateInheritance - Create debugging information entry to establish
+/// createInheritance - Create debugging information entry to establish
 /// inheritnace relationship between two types.
-DIType DIBuilder::CreateInheritance(DIType Ty, DIType BaseTy, 
+DIType DIBuilder::createInheritance(DIType Ty, DIType BaseTy, 
                                     uint64_t BaseOffset, unsigned Flags) {
   // TAG_inheritance is encoded in DIDerivedType format.
   Value *Elts[] = {
@@ -214,8 +214,8 @@ DIType DIBuilder::CreateInheritance(DIType Ty, DIType BaseTy,
   return DIType(MDNode::get(VMContext, &Elts[0], array_lengthof(Elts)));
 }
 
-/// CreateMemberType - Create debugging information entry for a member.
-DIType DIBuilder::CreateMemberType(StringRef Name, 
+/// createMemberType - Create debugging information entry for a member.
+DIType DIBuilder::createMemberType(StringRef Name, 
                                    DIFile File, unsigned LineNumber, 
                                    uint64_t SizeInBits, uint64_t AlignInBits,
                                    uint64_t OffsetInBits, unsigned Flags, 
@@ -236,8 +236,8 @@ DIType DIBuilder::CreateMemberType(StringRef Name,
   return DIType(MDNode::get(VMContext, &Elts[0], array_lengthof(Elts)));
 }
 
-/// CreateClassType - Create debugging information entry for a class.
-DIType DIBuilder::CreateClassType(DIDescriptor Context, StringRef Name, 
+/// createClassType - Create debugging information entry for a class.
+DIType DIBuilder::createClassType(DIDescriptor Context, StringRef Name, 
                                   DIFile File, unsigned LineNumber, 
                                   uint64_t SizeInBits, uint64_t AlignInBits,
                                   uint64_t OffsetInBits, unsigned Flags,
@@ -263,10 +263,10 @@ DIType DIBuilder::CreateClassType(DIDescriptor Context, StringRef Name,
   return DIType(MDNode::get(VMContext, &Elts[0], array_lengthof(Elts)));
 }
 
-/// CreateTemplateTypeParameter - Create debugging information for template
+/// createTemplateTypeParameter - Create debugging information for template
 /// type parameter.
 DITemplateTypeParameter 
-DIBuilder::CreateTemplateTypeParameter(DIDescriptor Context, StringRef Name,
+DIBuilder::createTemplateTypeParameter(DIDescriptor Context, StringRef Name,
                                        DIType Ty, MDNode *File, unsigned LineNo,
                                        unsigned ColumnNo) {
   Value *Elts[] = {
@@ -282,10 +282,10 @@ DIBuilder::CreateTemplateTypeParameter(DIDescriptor Context, StringRef Name,
                                              array_lengthof(Elts)));
 }
 
-/// CreateTemplateValueParameter - Create debugging information for template
+/// createTemplateValueParameter - Create debugging information for template
 /// value parameter.
 DITemplateValueParameter 
-DIBuilder::CreateTemplateValueParameter(DIDescriptor Context, StringRef Name,
+DIBuilder::createTemplateValueParameter(DIDescriptor Context, StringRef Name,
                                         DIType Ty, uint64_t Val,
                                         MDNode *File, unsigned LineNo,
                                         unsigned ColumnNo) {
@@ -303,8 +303,8 @@ DIBuilder::CreateTemplateValueParameter(DIDescriptor Context, StringRef Name,
                                               array_lengthof(Elts)));
 }
 
-/// CreateStructType - Create debugging information entry for a struct.
-DIType DIBuilder::CreateStructType(DIDescriptor Context, StringRef Name, 
+/// createStructType - Create debugging information entry for a struct.
+DIType DIBuilder::createStructType(DIDescriptor Context, StringRef Name, 
                                    DIFile File, unsigned LineNumber, 
                                    uint64_t SizeInBits, uint64_t AlignInBits,
                                    unsigned Flags, DIArray Elements, 
@@ -328,8 +328,8 @@ DIType DIBuilder::CreateStructType(DIDescriptor Context, StringRef Name,
   return DIType(MDNode::get(VMContext, &Elts[0], array_lengthof(Elts)));
 }
 
-/// CreateUnionType - Create debugging information entry for an union.
-DIType DIBuilder::CreateUnionType(DIDescriptor Scope, StringRef Name, 
+/// createUnionType - Create debugging information entry for an union.
+DIType DIBuilder::createUnionType(DIDescriptor Scope, StringRef Name, 
                                   DIFile File,
                                   unsigned LineNumber, uint64_t SizeInBits,
                                   uint64_t AlignInBits, unsigned Flags,
@@ -353,8 +353,8 @@ DIType DIBuilder::CreateUnionType(DIDescriptor Scope, StringRef Name,
   return DIType(MDNode::get(VMContext, &Elts[0], array_lengthof(Elts)));
 }
 
-/// CreateSubroutineType - Create subroutine type.
-DIType DIBuilder::CreateSubroutineType(DIFile File, DIArray ParameterTypes) {
+/// createSubroutineType - Create subroutine type.
+DIType DIBuilder::createSubroutineType(DIFile File, DIArray ParameterTypes) {
   // TAG_subroutine_type is encoded in DICompositeType format.
   Value *Elts[] = {
     GetTagConstant(VMContext, dwarf::DW_TAG_subroutine_type),
@@ -374,9 +374,9 @@ DIType DIBuilder::CreateSubroutineType(DIFile File, DIArray ParameterTypes) {
   return DIType(MDNode::get(VMContext, &Elts[0], array_lengthof(Elts)));
 }
 
-/// CreateEnumerationType - Create debugging information entry for an 
+/// createEnumerationType - Create debugging information entry for an 
 /// enumeration.
-DIType DIBuilder::CreateEnumerationType(DIDescriptor Scope, StringRef Name, 
+DIType DIBuilder::createEnumerationType(DIDescriptor Scope, StringRef Name, 
                                         DIFile File, unsigned LineNumber, 
                                         uint64_t SizeInBits, 
                                         uint64_t AlignInBits, DIArray Elements) {
@@ -402,8 +402,8 @@ DIType DIBuilder::CreateEnumerationType(DIDescriptor Scope, StringRef Name,
   return DIType(Node);
 }
 
-/// CreateArrayType - Create debugging information entry for an array.
-DIType DIBuilder::CreateArrayType(uint64_t Size, uint64_t AlignInBits, 
+/// createArrayType - Create debugging information entry for an array.
+DIType DIBuilder::createArrayType(uint64_t Size, uint64_t AlignInBits, 
                                   DIType Ty, DIArray Subscripts) {
   // TAG_array_type is encoded in DICompositeType format.
   Value *Elts[] = {
@@ -424,8 +424,8 @@ DIType DIBuilder::CreateArrayType(uint64_t Size, uint64_t AlignInBits,
   return DIType(MDNode::get(VMContext, &Elts[0], array_lengthof(Elts)));
 }
 
-/// CreateVectorType - Create debugging information entry for a vector.
-DIType DIBuilder::CreateVectorType(uint64_t Size, uint64_t AlignInBits, 
+/// createVectorType - Create debugging information entry for a vector.
+DIType DIBuilder::createVectorType(uint64_t Size, uint64_t AlignInBits, 
                                    DIType Ty, DIArray Subscripts) {
   // TAG_vector_type is encoded in DICompositeType format.
   Value *Elts[] = {
@@ -446,8 +446,8 @@ DIType DIBuilder::CreateVectorType(uint64_t Size, uint64_t AlignInBits,
   return DIType(MDNode::get(VMContext, &Elts[0], array_lengthof(Elts)));
 }
 
-/// CreateArtificialType - Create a new DIType with "artificial" flag set.
-DIType DIBuilder::CreateArtificialType(DIType Ty) {
+/// createArtificialType - Create a new DIType with "artificial" flag set.
+DIType DIBuilder::createArtificialType(DIType Ty) {
   if (Ty.isArtificial())
     return Ty;
 
@@ -470,24 +470,24 @@ DIType DIBuilder::CreateArtificialType(DIType Ty) {
   return DIType(MDNode::get(VMContext, Elts.data(), Elts.size()));
 }
 
-/// RetainType - Retain DIType in a module even if it is not referenced 
+/// retainType - Retain DIType in a module even if it is not referenced 
 /// through debug info anchors.
-void DIBuilder::RetainType(DIType T) {
+void DIBuilder::retainType(DIType T) {
   NamedMDNode *NMD = M.getOrInsertNamedMetadata("llvm.dbg.ty");
   NMD->addOperand(T);
 }
 
-/// CreateUnspecifiedParameter - Create unspeicified type descriptor
+/// createUnspecifiedParameter - Create unspeicified type descriptor
 /// for the subroutine type.
-DIDescriptor DIBuilder::CreateUnspecifiedParameter() {
+DIDescriptor DIBuilder::createUnspecifiedParameter() {
   Value *Elts[] = { 
     GetTagConstant(VMContext, dwarf::DW_TAG_unspecified_parameters) 
   };
   return DIDescriptor(MDNode::get(VMContext, &Elts[0], 1));
 }
 
-/// CreateTemporaryType - Create a temporary forward-declared type.
-DIType DIBuilder::CreateTemporaryType() {
+/// createTemporaryType - Create a temporary forward-declared type.
+DIType DIBuilder::createTemporaryType() {
   // Give the temporary MDNode a tag. It doesn't matter what tag we
   // use here as long as DIType accepts it.
   Value *Elts[] = { GetTagConstant(VMContext, DW_TAG_base_type) };
@@ -495,8 +495,8 @@ DIType DIBuilder::CreateTemporaryType() {
   return DIType(Node);
 }
 
-/// CreateTemporaryType - Create a temporary forward-declared type.
-DIType DIBuilder::CreateTemporaryType(DIFile F) {
+/// createTemporaryType - Create a temporary forward-declared type.
+DIType DIBuilder::createTemporaryType(DIFile F) {
   // Give the temporary MDNode a tag. It doesn't matter what tag we
   // use here as long as DIType accepts it.
   Value *Elts[] = {
@@ -509,8 +509,8 @@ DIType DIBuilder::CreateTemporaryType(DIFile F) {
   return DIType(Node);
 }
 
-/// GetOrCreateArray - Get a DIArray, create one if required.
-DIArray DIBuilder::GetOrCreateArray(Value *const *Elements, unsigned NumElements) {
+/// getOrCreateArray - Get a DIArray, create one if required.
+DIArray DIBuilder::getOrCreateArray(Value *const *Elements, unsigned NumElements) {
   if (NumElements == 0) {
     Value *Null = llvm::Constant::getNullValue(Type::getInt32Ty(VMContext));
     return DIArray(MDNode::get(VMContext, &Null, 1));
@@ -518,9 +518,9 @@ DIArray DIBuilder::GetOrCreateArray(Value *const *Elements, unsigned NumElements
   return DIArray(MDNode::get(VMContext, Elements, NumElements));
 }
 
-/// GetOrCreateSubrange - Create a descriptor for a value range.  This
+/// getOrCreateSubrange - Create a descriptor for a value range.  This
 /// implicitly uniques the values returned.
-DISubrange DIBuilder::GetOrCreateSubrange(int64_t Lo, int64_t Hi) {
+DISubrange DIBuilder::getOrCreateSubrange(int64_t Lo, int64_t Hi) {
   Value *Elts[] = {
     GetTagConstant(VMContext, dwarf::DW_TAG_subrange_type),
     ConstantInt::get(Type::getInt64Ty(VMContext), Lo),
@@ -530,9 +530,9 @@ DISubrange DIBuilder::GetOrCreateSubrange(int64_t Lo, int64_t Hi) {
   return DISubrange(MDNode::get(VMContext, &Elts[0], 3));
 }
 
-/// CreateGlobalVariable - Create a new descriptor for the specified global.
+/// createGlobalVariable - Create a new descriptor for the specified global.
 DIGlobalVariable DIBuilder::
-CreateGlobalVariable(StringRef Name, DIFile F, unsigned LineNumber, 
+createGlobalVariable(StringRef Name, DIFile F, unsigned LineNumber, 
                      DIType Ty, bool isLocalToUnit, llvm::Value *Val) {
   Value *Elts[] = {
     GetTagConstant(VMContext, dwarf::DW_TAG_variable),
@@ -555,10 +555,10 @@ CreateGlobalVariable(StringRef Name, DIFile F, unsigned LineNumber,
   return DIGlobalVariable(Node);
 }
 
-/// CreateStaticVariable - Create a new descriptor for the specified static
+/// createStaticVariable - Create a new descriptor for the specified static
 /// variable.
 DIGlobalVariable DIBuilder::
-CreateStaticVariable(DIDescriptor Context, StringRef Name, 
+createStaticVariable(DIDescriptor Context, StringRef Name, 
                      StringRef LinkageName, DIFile F, unsigned LineNumber, 
                      DIType Ty, bool isLocalToUnit, llvm::Value *Val) {
   Value *Elts[] = {
@@ -582,8 +582,8 @@ CreateStaticVariable(DIDescriptor Context, StringRef Name,
   return DIGlobalVariable(Node);
 }
 
-/// CreateVariable - Create a new descriptor for the specified variable.
-DIVariable DIBuilder::CreateLocalVariable(unsigned Tag, DIDescriptor Scope,
+/// createVariable - Create a new descriptor for the specified variable.
+DIVariable DIBuilder::createLocalVariable(unsigned Tag, DIDescriptor Scope,
                                           StringRef Name, DIFile File,
                                           unsigned LineNo, DIType Ty, 
                                           bool AlwaysPreserve, unsigned Flags) {
@@ -614,9 +614,9 @@ DIVariable DIBuilder::CreateLocalVariable(unsigned Tag, DIDescriptor Scope,
   return DIVariable(Node);
 }
 
-/// CreateComplexVariable - Create a new descriptor for the specified variable
+/// createComplexVariable - Create a new descriptor for the specified variable
 /// which has a complex address expression for its address.
-DIVariable DIBuilder::CreateComplexVariable(unsigned Tag, DIDescriptor Scope,
+DIVariable DIBuilder::createComplexVariable(unsigned Tag, DIDescriptor Scope,
                                             StringRef Name, DIFile F,
                                             unsigned LineNo,
                                             DIType Ty, Value *const *Addr,
@@ -633,8 +633,8 @@ DIVariable DIBuilder::CreateComplexVariable(unsigned Tag, DIDescriptor Scope,
   return DIVariable(MDNode::get(VMContext, Elts.data(), Elts.size()));
 }
 
-/// CreateFunction - Create a new descriptor for the specified function.
-DISubprogram DIBuilder::CreateFunction(DIDescriptor Context,
+/// createFunction - Create a new descriptor for the specified function.
+DISubprogram DIBuilder::createFunction(DIDescriptor Context,
                                        StringRef Name,
                                        StringRef LinkageName,
                                        DIFile File, unsigned LineNo,
@@ -670,8 +670,8 @@ DISubprogram DIBuilder::CreateFunction(DIDescriptor Context,
   return DISubprogram(Node);
 }
 
-/// CreateMethod - Create a new descriptor for the specified C++ method.
-DISubprogram DIBuilder::CreateMethod(DIDescriptor Context,
+/// createMethod - Create a new descriptor for the specified C++ method.
+DISubprogram DIBuilder::createMethod(DIDescriptor Context,
                                      StringRef Name,
                                      StringRef LinkageName,
                                      DIFile F,
@@ -710,9 +710,9 @@ DISubprogram DIBuilder::CreateMethod(DIDescriptor Context,
   return DISubprogram(Node);
 }
 
-/// CreateNameSpace - This creates new descriptor for a namespace
+/// createNameSpace - This creates new descriptor for a namespace
 /// with the specified parent scope.
-DINameSpace DIBuilder::CreateNameSpace(DIDescriptor Scope, StringRef Name,
+DINameSpace DIBuilder::createNameSpace(DIDescriptor Scope, StringRef Name,
                                        DIFile File, unsigned LineNo) {
   Value *Elts[] = {
     GetTagConstant(VMContext, dwarf::DW_TAG_namespace),
@@ -724,7 +724,7 @@ DINameSpace DIBuilder::CreateNameSpace(DIDescriptor Scope, StringRef Name,
   return DINameSpace(MDNode::get(VMContext, &Elts[0], array_lengthof(Elts)));
 }
 
-DILexicalBlock DIBuilder::CreateLexicalBlock(DIDescriptor Scope, DIFile File,
+DILexicalBlock DIBuilder::createLexicalBlock(DIDescriptor Scope, DIFile File,
                                              unsigned Line, unsigned Col) {
   // Defeat MDNode uniqing for lexical blocks by using unique id.
   static unsigned int unique_id = 0;
@@ -739,8 +739,8 @@ DILexicalBlock DIBuilder::CreateLexicalBlock(DIDescriptor Scope, DIFile File,
   return DILexicalBlock(MDNode::get(VMContext, &Elts[0], array_lengthof(Elts)));
 }
 
-/// InsertDeclare - Insert a new llvm.dbg.declare intrinsic call.
-Instruction *DIBuilder::InsertDeclare(Value *Storage, DIVariable VarInfo,
+/// insertDeclare - Insert a new llvm.dbg.declare intrinsic call.
+Instruction *DIBuilder::insertDeclare(Value *Storage, DIVariable VarInfo,
                                       Instruction *InsertBefore) {
   assert(Storage && "no storage passed to dbg.declare");
   assert(VarInfo.Verify() && "empty DIVariable passed to dbg.declare");
@@ -751,8 +751,8 @@ Instruction *DIBuilder::InsertDeclare(Value *Storage, DIVariable VarInfo,
   return CallInst::Create(DeclareFn, Args, Args+2, "", InsertBefore);
 }
 
-/// InsertDeclare - Insert a new llvm.dbg.declare intrinsic call.
-Instruction *DIBuilder::InsertDeclare(Value *Storage, DIVariable VarInfo,
+/// insertDeclare - Insert a new llvm.dbg.declare intrinsic call.
+Instruction *DIBuilder::insertDeclare(Value *Storage, DIVariable VarInfo,
                                       BasicBlock *InsertAtEnd) {
   assert(Storage && "no storage passed to dbg.declare");
   assert(VarInfo.Verify() && "invalid DIVariable passed to dbg.declare");
@@ -769,8 +769,8 @@ Instruction *DIBuilder::InsertDeclare(Value *Storage, DIVariable VarInfo,
     return CallInst::Create(DeclareFn, Args, Args+2, "", InsertAtEnd);
 }
 
-/// InsertDbgValueIntrinsic - Insert a new llvm.dbg.value intrinsic call.
-Instruction *DIBuilder::InsertDbgValueIntrinsic(Value *V, uint64_t Offset,
+/// insertDbgValueIntrinsic - Insert a new llvm.dbg.value intrinsic call.
+Instruction *DIBuilder::insertDbgValueIntrinsic(Value *V, uint64_t Offset,
                                                 DIVariable VarInfo,
                                                 Instruction *InsertBefore) {
   assert(V && "no value passed to dbg.value");
@@ -784,8 +784,8 @@ Instruction *DIBuilder::InsertDbgValueIntrinsic(Value *V, uint64_t Offset,
   return CallInst::Create(ValueFn, Args, Args+3, "", InsertBefore);
 }
 
-/// InsertDbgValueIntrinsic - Insert a new llvm.dbg.value intrinsic call.
-Instruction *DIBuilder::InsertDbgValueIntrinsic(Value *V, uint64_t Offset,
+/// insertDbgValueIntrinsic - Insert a new llvm.dbg.value intrinsic call.
+Instruction *DIBuilder::insertDbgValueIntrinsic(Value *V, uint64_t Offset,
                                                 DIVariable VarInfo,
                                                 BasicBlock *InsertAtEnd) {
   assert(V && "no value passed to dbg.value");
diff --git a/lib/Analysis/InstructionSimplify.cpp b/lib/Analysis/InstructionSimplify.cpp
index a2f9862383fd..982dacb50bfc 100644
--- a/lib/Analysis/InstructionSimplify.cpp
+++ b/lib/Analysis/InstructionSimplify.cpp
@@ -1161,6 +1161,16 @@ static Value *SimplifyOrInst(Value *Op0, Value *Op1, const TargetData *TD,
       (A == Op0 || B == Op0))
     return Op0;
 
+  // ~(A & ?) | A = -1
+  if (match(Op0, m_Not(m_And(m_Value(A), m_Value(B)))) &&
+      (A == Op1 || B == Op1))
+    return Constant::getAllOnesValue(Op1->getType());
+
+  // A | ~(A & ?) = -1
+  if (match(Op1, m_Not(m_And(m_Value(A), m_Value(B)))) &&
+      (A == Op0 || B == Op0))
+    return Constant::getAllOnesValue(Op0->getType());
+
   // Try some generic simplifications for associative operations.
   if (Value *V = SimplifyAssociativeBinOp(Instruction::Or, Op0, Op1, TD, DT,
                                           MaxRecurse))
diff --git a/lib/CodeGen/AllocationOrder.h b/lib/CodeGen/AllocationOrder.h
index 3db4b6925fca..61fd8f881a8c 100644
--- a/lib/CodeGen/AllocationOrder.h
+++ b/lib/CodeGen/AllocationOrder.h
@@ -47,6 +47,8 @@ public:
   /// rewind - Start over from the beginning.
   void rewind() { Pos = 0; }
 
+  /// isHint - Return true if PhysReg is a preferred register.
+  bool isHint(unsigned PhysReg) const { return PhysReg == Hint; }
 };
 
 } // end namespace llvm
diff --git a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
index 43e8990a9da1..9cb882e6a1bb 100644
--- a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
+++ b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
@@ -764,7 +764,7 @@ bool AsmPrinter::doFinalization(Module &M) {
       continue;
 
     MCSymbol *Name = Mang->getSymbol(&F);
-    EmitVisibility(Name, V);
+    EmitVisibility(Name, V, false);
   }
 
   // Finalize debug and EH information.
@@ -1820,13 +1820,17 @@ void AsmPrinter::EmitBasicBlockStart(const MachineBasicBlock *MBB) const {
   }
 }
 
-void AsmPrinter::EmitVisibility(MCSymbol *Sym, unsigned Visibility) const {
+void AsmPrinter::EmitVisibility(MCSymbol *Sym, unsigned Visibility,
+                                bool IsDefinition) const {
   MCSymbolAttr Attr = MCSA_Invalid;
   
   switch (Visibility) {
   default: break;
   case GlobalValue::HiddenVisibility:
-    Attr = MAI->getHiddenVisibilityAttr();
+    if (IsDefinition)
+      Attr = MAI->getHiddenVisibilityAttr();
+    else
+      Attr = MAI->getHiddenDeclarationVisibilityAttr();
     break;
   case GlobalValue::ProtectedVisibility:
     Attr = MAI->getProtectedVisibilityAttr();
diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
index 5106d5778c29..780fa405ef51 100644
--- a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
@@ -31,6 +31,7 @@
 #include "llvm/Target/TargetRegisterInfo.h"
 #include "llvm/Target/TargetOptions.h"
 #include "llvm/Analysis/DebugInfo.h"
+#include "llvm/Analysis/DIBuilder.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/StringExtras.h"
@@ -644,12 +645,12 @@ void DwarfDebug::addComplexAddress(DbgVariable *&DV, DIE *Die,
   for (unsigned i = 0, N = DV->getNumAddrElements(); i < N; ++i) {
     uint64_t Element = DV->getAddrElement(i);
 
-    if (Element == DIFactory::OpPlus) {
+    if (Element == DIBuilder::OpPlus) {
       addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_plus_uconst);
       addUInt(Block, 0, dwarf::DW_FORM_udata, DV->getAddrElement(++i));
-    } else if (Element == DIFactory::OpDeref) {
+    } else if (Element == DIBuilder::OpDeref) {
       addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_deref);
-    } else llvm_unreachable("unknown DIFactory Opcode");
+    } else llvm_unreachable("unknown DIBuilder Opcode");
   }
 
   // Now attach the location information to the DIE.
@@ -1894,7 +1895,7 @@ void DwarfDebug::constructCompileUnit(const MDNode *N) {
   DIE *Die = new DIE(dwarf::DW_TAG_compile_unit);
   addString(Die, dwarf::DW_AT_producer, dwarf::DW_FORM_string,
             DIUnit.getProducer());
-  addUInt(Die, dwarf::DW_AT_language, dwarf::DW_FORM_data1,
+  addUInt(Die, dwarf::DW_AT_language, dwarf::DW_FORM_data2,
           DIUnit.getLanguage());
   addString(Die, dwarf::DW_AT_name, dwarf::DW_FORM_string, FN);
   // Use DW_AT_entry_pc instead of DW_AT_low_pc/DW_AT_high_pc pair. This
diff --git a/lib/CodeGen/BranchFolding.cpp b/lib/CodeGen/BranchFolding.cpp
index cb81aa3c88ce..78a87431feaa 100644
--- a/lib/CodeGen/BranchFolding.cpp
+++ b/lib/CodeGen/BranchFolding.cpp
@@ -501,10 +501,11 @@ static bool ProfitableToMerge(MachineBasicBlock *MBB1,
                               MachineBasicBlock *SuccBB,
                               MachineBasicBlock *PredBB) {
   CommonTailLen = ComputeCommonTailLength(MBB1, MBB2, I1, I2);
-  MachineFunction *MF = MBB1->getParent();
-
   if (CommonTailLen == 0)
     return false;
+  DEBUG(dbgs() << "Common tail length of BB#" << MBB1->getNumber()
+               << " and BB#" << MBB2->getNumber() << " is " << CommonTailLen
+               << '\n');
 
   // It's almost always profitable to merge any number of non-terminator
   // instructions with the block that falls through into the common successor.
@@ -541,6 +542,7 @@ static bool ProfitableToMerge(MachineBasicBlock *MBB1,
   // we don't have to split a block.  At worst we will be introducing 1 new
   // branch instruction, which is likely to be smaller than the 2
   // instructions that would be deleted in the merge.
+  MachineFunction *MF = MBB1->getParent();
   if (EffectiveTailLen >= 2 &&
       MF->getFunction()->hasFnAttr(Attribute::OptimizeForSize) &&
       (I1 == MBB1->begin() || I2 == MBB2->begin()))
diff --git a/lib/CodeGen/InlineSpiller.cpp b/lib/CodeGen/InlineSpiller.cpp
index a1bd972d38e2..38e6c8590269 100644
--- a/lib/CodeGen/InlineSpiller.cpp
+++ b/lib/CodeGen/InlineSpiller.cpp
@@ -102,8 +102,7 @@ Spiller *createInlineSpiller(MachineFunctionPass &pass,
 }
 }
 
-/// reMaterializeFor - Attempt to rematerialize edit_->getReg() before MI instead of
-/// reloading it.
+/// reMaterializeFor - Attempt to rematerialize before MI instead of reloading.
 bool InlineSpiller::reMaterializeFor(MachineBasicBlock::iterator MI) {
   SlotIndex UseIdx = lis_.getInstructionIndex(MI).getUseIndex();
   VNInfo *OrigVNI = edit_->getParent().getVNInfoAt(UseIdx);
@@ -346,7 +345,8 @@ void InlineSpiller::spill(LiveRangeEdit &edit) {
          && "Trying to spill a stack slot.");
   DEBUG(dbgs() << "Inline spilling "
                << mri_.getRegClass(edit.getReg())->getName()
-               << ':' << edit.getParent() << "\n");
+               << ':' << edit.getParent() << "\nFrom original "
+               << PrintReg(vrm_.getOriginal(edit.getReg())) << '\n');
   assert(edit.getParent().isSpillable() &&
          "Attempting to spill already spilled value.");
 
@@ -357,12 +357,20 @@ void InlineSpiller::spill(LiveRangeEdit &edit) {
     return;
 
   rc_ = mri_.getRegClass(edit.getReg());
-  stackSlot_ = vrm_.assignVirt2StackSlot(edit_->getReg());
+
+  // Share a stack slot among all descendants of Orig.
+  unsigned Orig = vrm_.getOriginal(edit.getReg());
+  stackSlot_ = vrm_.getStackSlot(Orig);
+  if (stackSlot_ == VirtRegMap::NO_STACK_SLOT)
+    stackSlot_ = vrm_.assignVirt2StackSlot(Orig);
+
+  if (Orig != edit.getReg())
+    vrm_.assignVirt2StackSlot(edit.getReg(), stackSlot_);
 
   // Update LiveStacks now that we are committed to spilling.
   LiveInterval &stacklvr = lss_.getOrCreateInterval(stackSlot_, rc_);
-  assert(stacklvr.empty() && "Just created stack slot not empty");
-  stacklvr.getNextValue(SlotIndex(), 0, lss_.getVNInfoAllocator());
+  if (!stacklvr.hasAtLeastOneValue())
+    stacklvr.getNextValue(SlotIndex(), 0, lss_.getVNInfoAllocator());
   stacklvr.MergeRangesInAsValue(edit_->getParent(), stacklvr.getValNumInfo(0));
 
   // Iterate over instructions using register.
diff --git a/lib/CodeGen/LowerSubregs.cpp b/lib/CodeGen/LowerSubregs.cpp
index ad1c537c1911..7871ba9c17e4 100644
--- a/lib/CodeGen/LowerSubregs.cpp
+++ b/lib/CodeGen/LowerSubregs.cpp
@@ -37,7 +37,7 @@ namespace {
   public:
     static char ID; // Pass identification, replacement for typeid
     LowerSubregsInstructionPass() : MachineFunctionPass(ID) {}
-    
+
     const char *getPassName() const {
       return "Subregister lowering instruction pass";
     }
@@ -64,8 +64,8 @@ namespace {
   char LowerSubregsInstructionPass::ID = 0;
 }
 
-FunctionPass *llvm::createLowerSubregsPass() { 
-  return new LowerSubregsInstructionPass(); 
+FunctionPass *llvm::createLowerSubregsPass() {
+  return new LowerSubregsInstructionPass();
 }
 
 /// TransferDeadFlag - MI is a pseudo-instruction with DstReg dead,
@@ -192,9 +192,9 @@ bool LowerSubregsInstructionPass::LowerCopy(MachineInstr *MI) {
 /// copies.
 ///
 bool LowerSubregsInstructionPass::runOnMachineFunction(MachineFunction &MF) {
-  DEBUG(dbgs() << "Machine Function\n"  
+  DEBUG(dbgs() << "Machine Function\n"
                << "********** LOWERING SUBREG INSTRS **********\n"
-               << "********** Function: " 
+               << "********** Function: "
                << MF.getFunction()->getName() << '\n');
   TRI = MF.getTarget().getRegisterInfo();
   TII = MF.getTarget().getInstrInfo();
diff --git a/lib/CodeGen/MachineFunction.cpp b/lib/CodeGen/MachineFunction.cpp
index 85532407ca43..d81e4a1d015f 100644
--- a/lib/CodeGen/MachineFunction.cpp
+++ b/lib/CodeGen/MachineFunction.cpp
@@ -396,8 +396,7 @@ void MachineFunction::viewCFGOnly() const
 /// addLiveIn - Add the specified physical register as a live-in value and
 /// create a corresponding virtual register for it.
 unsigned MachineFunction::addLiveIn(unsigned PReg,
-                                    const TargetRegisterClass *RC,
-                                    DebugLoc DL) {
+                                    const TargetRegisterClass *RC) {
   MachineRegisterInfo &MRI = getRegInfo();
   unsigned VReg = MRI.getLiveInVirtReg(PReg);
   if (VReg) {
@@ -406,7 +405,6 @@ unsigned MachineFunction::addLiveIn(unsigned PReg,
   }
   VReg = MRI.createVirtualRegister(RC);
   MRI.addLiveIn(PReg, VReg);
-  MRI.addLiveInLoc(VReg, DL);
   return VReg;
 }
 
@@ -646,6 +644,10 @@ MachineConstantPool::~MachineConstantPool() {
   for (unsigned i = 0, e = Constants.size(); i != e; ++i)
     if (Constants[i].isMachineConstantPoolEntry())
       delete Constants[i].Val.MachineCPVal;
+  for (DenseSet<MachineConstantPoolValue*>::iterator I =
+       MachineCPVsSharingEntries.begin(), E = MachineCPVsSharingEntries.end();
+       I != E; ++I)
+    delete *I;
 }
 
 /// CanShareConstantPoolEntry - Test whether the given two constants
@@ -723,8 +725,10 @@ unsigned MachineConstantPool::getConstantPoolIndex(MachineConstantPoolValue *V,
   //
   // FIXME, this could be made much more efficient for large constant pools.
   int Idx = V->getExistingMachineCPValue(this, Alignment);
-  if (Idx != -1)
+  if (Idx != -1) {
+    MachineCPVsSharingEntries.insert(V);
     return (unsigned)Idx;
+  }
 
   Constants.push_back(MachineConstantPoolEntry(V, Alignment));
   return Constants.size()-1;
diff --git a/lib/CodeGen/MachineRegisterInfo.cpp b/lib/CodeGen/MachineRegisterInfo.cpp
index b3fb33736ffc..7244d5f03a90 100644
--- a/lib/CodeGen/MachineRegisterInfo.cpp
+++ b/lib/CodeGen/MachineRegisterInfo.cpp
@@ -210,15 +210,8 @@ MachineRegisterInfo::EmitLiveInCopies(MachineBasicBlock *EntryMBB,
         LiveIns.erase(LiveIns.begin() + i);
         --i; --e;
       } else {
-        DebugLoc DL;
-        // If there is a location for this live in then use it.
-        DenseMap<unsigned, DebugLoc>::iterator DLI = 
-          LiveInLocs.find(LiveIns[i].second);
-        if (DLI != LiveInLocs.end())
-          DL = DLI->second;
-
         // Emit a copy.
-        BuildMI(*EntryMBB, EntryMBB->begin(), DL,
+        BuildMI(*EntryMBB, EntryMBB->begin(), DebugLoc(),
                 TII.get(TargetOpcode::COPY), LiveIns[i].second)
           .addReg(LiveIns[i].first);
 
diff --git a/lib/CodeGen/RegAllocBase.h b/lib/CodeGen/RegAllocBase.h
index 8c7e5f53b824..5af0ce79acf7 100644
--- a/lib/CodeGen/RegAllocBase.h
+++ b/lib/CodeGen/RegAllocBase.h
@@ -39,7 +39,6 @@
 
 #include "llvm/ADT/OwningPtr.h"
 #include "LiveIntervalUnion.h"
-#include <queue>
 
 namespace llvm {
 
@@ -58,8 +57,8 @@ class LiveVirtRegQueue;
 /// be extended to add interesting heuristics.
 ///
 /// Register allocators must override the selectOrSplit() method to implement
-/// live range splitting. They may also override getPriority() which otherwise
-/// defaults to the spill weight computed by CalculateSpillWeights.
+/// live range splitting. They must also override enqueue/dequeue to provide an
+/// assignment order.
 class RegAllocBase {
   LiveIntervalUnion::Allocator UnionAllocator;
 protected:
@@ -120,9 +119,11 @@ protected:
   // Get a temporary reference to a Spiller instance.
   virtual Spiller &spiller() = 0;
 
-  // getPriority - Calculate the allocation priority for VirtReg.
-  // Virtual registers with higher priorities are allocated first.
-  virtual float getPriority(LiveInterval *LI) = 0;
+  /// enqueue - Add VirtReg to the priority queue of unassigned registers.
+  virtual void enqueue(LiveInterval *LI) = 0;
+
+  /// dequeue - Return the next unassigned register, or NULL.
+  virtual LiveInterval *dequeue() = 0;
 
   // A RegAlloc pass should override this to provide the allocation heuristics.
   // Each call must guarantee forward progess by returning an available PhysReg
@@ -170,7 +171,7 @@ public:
   static bool VerifyEnabled;
 
 private:
-  void seedLiveVirtRegs(std::priority_queue<std::pair<float, unsigned> >&);
+  void seedLiveRegs();
 
   void spillReg(LiveInterval &VirtReg, unsigned PhysReg,
                 SmallVectorImpl<LiveInterval*> &SplitVRegs);
diff --git a/lib/CodeGen/RegAllocBasic.cpp b/lib/CodeGen/RegAllocBasic.cpp
index 045c8db9dadb..6923908a32d9 100644
--- a/lib/CodeGen/RegAllocBasic.cpp
+++ b/lib/CodeGen/RegAllocBasic.cpp
@@ -45,6 +45,7 @@
 #include "llvm/Support/Timer.h"
 
 #include <cstdlib>
+#include <queue>
 
 using namespace llvm;
 
@@ -64,6 +65,14 @@ VerifyRegAlloc("verify-regalloc", cl::location(RegAllocBase::VerifyEnabled),
 const char *RegAllocBase::TimerGroupName = "Register Allocation";
 bool RegAllocBase::VerifyEnabled = false;
 
+namespace {
+  struct CompSpillWeight {
+    bool operator()(LiveInterval *A, LiveInterval *B) const {
+      return A->weight < B->weight;
+    }
+  };
+}
+
 namespace {
 /// RABasic provides a minimal implementation of the basic register allocation
 /// algorithm. It prioritizes live virtual registers by spill weight and spills
@@ -82,7 +91,8 @@ class RABasic : public MachineFunctionPass, public RegAllocBase
 
   // state
   std::auto_ptr<Spiller> SpillerInstance;
-
+  std::priority_queue<LiveInterval*, std::vector<LiveInterval*>,
+                      CompSpillWeight> Queue;
 public:
   RABasic();
 
@@ -100,6 +110,18 @@ public:
 
   virtual float getPriority(LiveInterval *LI) { return LI->weight; }
 
+  virtual void enqueue(LiveInterval *LI) {
+    Queue.push(LI);
+  }
+
+  virtual LiveInterval *dequeue() {
+    if (Queue.empty())
+      return 0;
+    LiveInterval *LI = Queue.top();
+    Queue.pop();
+    return LI;
+  }
+
   virtual unsigned selectOrSplit(LiveInterval &VirtReg,
                                  SmallVectorImpl<LiveInterval*> &SplitVRegs);
 
@@ -227,18 +249,17 @@ void RegAllocBase::releaseMemory() {
   PhysReg2LiveUnion.clear();
 }
 
-// Visit all the live virtual registers. If they are already assigned to a
-// physical register, unify them with the corresponding LiveIntervalUnion,
-// otherwise push them on the priority queue for later assignment.
-void RegAllocBase::
-seedLiveVirtRegs(std::priority_queue<std::pair<float, unsigned> > &VirtRegQ) {
+// Visit all the live registers. If they are already assigned to a physical
+// register, unify them with the corresponding LiveIntervalUnion, otherwise push
+// them on the priority queue for later assignment.
+void RegAllocBase::seedLiveRegs() {
   for (LiveIntervals::iterator I = LIS->begin(), E = LIS->end(); I != E; ++I) {
     unsigned RegNum = I->first;
     LiveInterval &VirtReg = *I->second;
     if (TargetRegisterInfo::isPhysicalRegister(RegNum))
       PhysReg2LiveUnion[RegNum].unify(VirtReg);
     else
-      VirtRegQ.push(std::make_pair(getPriority(&VirtReg), RegNum));
+      enqueue(&VirtReg);
   }
 }
 
@@ -263,38 +284,31 @@ void RegAllocBase::unassign(LiveInterval &VirtReg, unsigned PhysReg) {
 // Top-level driver to manage the queue of unassigned VirtRegs and call the
 // selectOrSplit implementation.
 void RegAllocBase::allocatePhysRegs() {
-
-  // Push each vreg onto a queue or "precolor" by adding it to a physreg union.
-  std::priority_queue<std::pair<float, unsigned> > VirtRegQ;
-  seedLiveVirtRegs(VirtRegQ);
+  seedLiveRegs();
 
   // Continue assigning vregs one at a time to available physical registers.
-  while (!VirtRegQ.empty()) {
-    // Pop the highest priority vreg.
-    LiveInterval &VirtReg = LIS->getInterval(VirtRegQ.top().second);
-    VirtRegQ.pop();
-
+  while (LiveInterval *VirtReg = dequeue()) {
     // selectOrSplit requests the allocator to return an available physical
     // register if possible and populate a list of new live intervals that
     // result from splitting.
-    DEBUG(dbgs() << "\nselectOrSplit " << MRI->getRegClass(VirtReg.reg)->getName()
-                 << ':' << VirtReg << '\n');
+    DEBUG(dbgs() << "\nselectOrSplit "
+                 << MRI->getRegClass(VirtReg->reg)->getName()
+                 << ':' << *VirtReg << '\n');
     typedef SmallVector<LiveInterval*, 4> VirtRegVec;
     VirtRegVec SplitVRegs;
-    unsigned AvailablePhysReg = selectOrSplit(VirtReg, SplitVRegs);
+    unsigned AvailablePhysReg = selectOrSplit(*VirtReg, SplitVRegs);
 
     if (AvailablePhysReg)
-      assign(VirtReg, AvailablePhysReg);
+      assign(*VirtReg, AvailablePhysReg);
 
     for (VirtRegVec::iterator I = SplitVRegs.begin(), E = SplitVRegs.end();
          I != E; ++I) {
-      LiveInterval* SplitVirtReg = *I;
+      LiveInterval *SplitVirtReg = *I;
       if (SplitVirtReg->empty()) continue;
       DEBUG(dbgs() << "queuing new interval: " << *SplitVirtReg << "\n");
       assert(TargetRegisterInfo::isVirtualRegister(SplitVirtReg->reg) &&
              "expect split value in virtual register");
-      VirtRegQ.push(std::make_pair(getPriority(SplitVirtReg),
-                                   SplitVirtReg->reg));
+      enqueue(SplitVirtReg);
       ++NumNewQueued;
     }
   }
diff --git a/lib/CodeGen/RegAllocGreedy.cpp b/lib/CodeGen/RegAllocGreedy.cpp
index c1372cd038cf..406485aaf496 100644
--- a/lib/CodeGen/RegAllocGreedy.cpp
+++ b/lib/CodeGen/RegAllocGreedy.cpp
@@ -43,6 +43,8 @@
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Support/Timer.h"
 
+#include <queue>
+
 using namespace llvm;
 
 STATISTIC(NumGlobalSplits, "Number of split global live ranges");
@@ -71,6 +73,8 @@ class RAGreedy : public MachineFunctionPass, public RegAllocBase {
   // state
   std::auto_ptr<Spiller> SpillerInstance;
   std::auto_ptr<SplitAnalysis> SA;
+  std::priority_queue<std::pair<unsigned, unsigned> > Queue;
+  IndexedMap<unsigned, VirtReg2IndexFunctor> Generation;
 
   // splitting state.
 
@@ -91,13 +95,10 @@ public:
 
   /// RAGreedy analysis usage.
   virtual void getAnalysisUsage(AnalysisUsage &AU) const;
-
   virtual void releaseMemory();
-
   virtual Spiller &spiller() { return *SpillerInstance; }
-
-  virtual float getPriority(LiveInterval *LI);
-
+  virtual void enqueue(LiveInterval *LI);
+  virtual LiveInterval *dequeue();
   virtual unsigned selectOrSplit(LiveInterval&,
                                  SmallVectorImpl<LiveInterval*>&);
 
@@ -119,9 +120,12 @@ private:
   SlotIndex getPrevMappedIndex(const MachineInstr*);
   void calcPrevSlots();
   unsigned nextSplitPoint(unsigned);
+  bool canEvictInterference(LiveInterval&, unsigned, unsigned, float&);
 
-  unsigned tryReassignOrEvict(LiveInterval&, AllocationOrder&,
+  unsigned tryReassign(LiveInterval&, AllocationOrder&,
                               SmallVectorImpl<LiveInterval*>&);
+  unsigned tryEvict(LiveInterval&, AllocationOrder&,
+                    SmallVectorImpl<LiveInterval*>&);
   unsigned tryRegionSplit(LiveInterval&, AllocationOrder&,
                           SmallVectorImpl<LiveInterval*>&);
   unsigned tryLocalSplit(LiveInterval&, AllocationOrder&,
@@ -183,25 +187,42 @@ void RAGreedy::getAnalysisUsage(AnalysisUsage &AU) const {
 
 void RAGreedy::releaseMemory() {
   SpillerInstance.reset(0);
+  Generation.clear();
   RegAllocBase::releaseMemory();
 }
 
-float RAGreedy::getPriority(LiveInterval *LI) {
-  float Priority = LI->weight;
-
-  // Prioritize hinted registers so they are allocated first.
-  std::pair<unsigned, unsigned> Hint;
-  if (Hint.first || Hint.second) {
-    // The hint can be target specific, a virtual register, or a physreg.
-    Priority *= 2;
-
-    // Prefer physreg hints above anything else.
-    if (Hint.first == 0 && TargetRegisterInfo::isPhysicalRegister(Hint.second))
-      Priority *= 2;
-  }
-  return Priority;
+void RAGreedy::enqueue(LiveInterval *LI) {
+  // Prioritize live ranges by size, assigning larger ranges first.
+  // The queue holds (size, reg) pairs.
+  const unsigned Size = LI->getSize();
+  const unsigned Reg = LI->reg;
+  assert(TargetRegisterInfo::isVirtualRegister(Reg) &&
+         "Can only enqueue virtual registers");
+  const unsigned Hint = VRM->getRegAllocPref(Reg);
+  unsigned Prio;
+
+  Generation.grow(Reg);
+  if (++Generation[Reg] == 1)
+    // 1st generation ranges are handled first, long -> short.
+    Prio = (1u << 31) + Size;
+  else
+    // Repeat offenders are handled second, short -> long
+    Prio = (1u << 30) - Size;
+
+  // Boost ranges that have a physical register hint.
+  if (TargetRegisterInfo::isPhysicalRegister(Hint))
+    Prio |= (1u << 30);
+
+  Queue.push(std::make_pair(Prio, Reg));
 }
 
+LiveInterval *RAGreedy::dequeue() {
+  if (Queue.empty())
+    return 0;
+  LiveInterval *LI = &LIS->getInterval(Queue.top().second);
+  Queue.pop();
+  return LI;
+}
 
 //===----------------------------------------------------------------------===//
 //                         Register Reassignment
@@ -230,8 +251,7 @@ LiveInterval *RAGreedy::getSingleInterference(LiveInterval &VirtReg,
     if (Q.checkInterference()) {
       if (Interference)
         return 0;
-      Q.collectInterferingVRegs(1);
-      if (!Q.seenAllInterferences())
+      if (Q.collectInterferingVRegs(2) > 1)
         return 0;
       Interference = Q.interferingVRegs().front();
     }
@@ -276,21 +296,14 @@ bool RAGreedy::reassignVReg(LiveInterval &InterferingVReg,
   return false;
 }
 
-/// tryReassignOrEvict - Try to reassign a single interferences to a different
-/// physreg, or evict a single interference with a lower spill weight.
+/// tryReassign - Try to reassign a single interference to a different physreg.
 /// @param  VirtReg Currently unassigned virtual register.
 /// @param  Order   Physregs to try.
 /// @return         Physreg to assign VirtReg, or 0.
-unsigned RAGreedy::tryReassignOrEvict(LiveInterval &VirtReg,
-                                      AllocationOrder &Order,
-                                      SmallVectorImpl<LiveInterval*> &NewVRegs){
+unsigned RAGreedy::tryReassign(LiveInterval &VirtReg, AllocationOrder &Order,
+                               SmallVectorImpl<LiveInterval*> &NewVRegs){
   NamedRegionTimer T("Reassign", TimerGroupName, TimePassesIsEnabled);
 
-  // Keep track of the lightest single interference seen so far.
-  float BestWeight = VirtReg.weight;
-  LiveInterval *BestVirt = 0;
-  unsigned BestPhys = 0;
-
   Order.rewind();
   while (unsigned PhysReg = Order.next()) {
     LiveInterval *InterferingVReg = getSingleInterference(VirtReg, PhysReg);
@@ -300,25 +313,92 @@ unsigned RAGreedy::tryReassignOrEvict(LiveInterval &VirtReg,
       continue;
     if (reassignVReg(*InterferingVReg, PhysReg))
       return PhysReg;
+  }
+  return 0;
+}
+
+
+//===----------------------------------------------------------------------===//
+//                         Interference eviction
+//===----------------------------------------------------------------------===//
+
+/// canEvict - Return true if all interferences between VirtReg and PhysReg can
+/// be evicted. Set maxWeight to the maximal spill weight of an interference.
+bool RAGreedy::canEvictInterference(LiveInterval &VirtReg, unsigned PhysReg,
+                                    unsigned Size, float &MaxWeight) {
+  float Weight = 0;
+  for (const unsigned *AliasI = TRI->getOverlaps(PhysReg); *AliasI; ++AliasI) {
+    LiveIntervalUnion::Query &Q = query(VirtReg, *AliasI);
+    // If there is 10 or more interferences, chances are one is smaller.
+    if (Q.collectInterferingVRegs(10) >= 10)
+      return false;
 
-    // Cannot reassign, is this an eviction candidate?
-    if (InterferingVReg->weight < BestWeight) {
-      BestVirt = InterferingVReg;
-      BestPhys = PhysReg;
-      BestWeight = InterferingVReg->weight;
+    // CHeck if any interfering live range is shorter than VirtReg.
+    for (unsigned i = 0, e = Q.interferingVRegs().size(); i != e; ++i) {
+      LiveInterval *Intf = Q.interferingVRegs()[i];
+      if (TargetRegisterInfo::isPhysicalRegister(Intf->reg))
+        return false;
+      if (Intf->getSize() <= Size)
+        return false;
+      Weight = std::max(Weight, Intf->weight);
     }
   }
+  MaxWeight = Weight;
+  return true;
+}
+
+/// tryEvict - Try to evict all interferences for a physreg.
+/// @param  VirtReg Currently unassigned virtual register.
+/// @param  Order   Physregs to try.
+/// @return         Physreg to assign VirtReg, or 0.
+unsigned RAGreedy::tryEvict(LiveInterval &VirtReg,
+                            AllocationOrder &Order,
+                            SmallVectorImpl<LiveInterval*> &NewVRegs){
+  NamedRegionTimer T("Evict", TimerGroupName, TimePassesIsEnabled);
+
+  // We can only evict interference if all interfering registers are virtual and
+  // longer than VirtReg.
+  const unsigned Size = VirtReg.getSize();
+
+  // Keep track of the lightest single interference seen so far.
+  float BestWeight = 0;
+  unsigned BestPhys = 0;
 
-  // Nothing reassigned, can we evict a lighter single interference?
-  if (BestVirt) {
-    DEBUG(dbgs() << "evicting lighter " << *BestVirt << '\n');
-    unassign(*BestVirt, VRM->getPhys(BestVirt->reg));
-    ++NumEvicted;
-    NewVRegs.push_back(BestVirt);
-    return BestPhys;
+  Order.rewind();
+  while (unsigned PhysReg = Order.next()) {
+    float Weight = 0;
+    if (!canEvictInterference(VirtReg, PhysReg, Size, Weight))
+      continue;
+
+    // This is an eviction candidate.
+    DEBUG(dbgs() << "max " << PrintReg(PhysReg, TRI) << " interference = "
+                 << Weight << '\n');
+    if (BestPhys && Weight >= BestWeight)
+      continue;
+
+    // Best so far.
+    BestPhys = PhysReg;
+    BestWeight = Weight;
+    // Stop if the hint can be used.
+    if (Order.isHint(PhysReg))
+      break;
   }
 
-  return 0;
+  if (!BestPhys)
+    return 0;
+
+  DEBUG(dbgs() << "evicting " << PrintReg(BestPhys, TRI) << " interference\n");
+  for (const unsigned *AliasI = TRI->getOverlaps(BestPhys); *AliasI; ++AliasI) {
+    LiveIntervalUnion::Query &Q = query(VirtReg, *AliasI);
+    assert(Q.seenAllInterferences() && "Didn't check all interfererences.");
+    for (unsigned i = 0, e = Q.interferingVRegs().size(); i != e; ++i) {
+      LiveInterval *Intf = Q.interferingVRegs()[i];
+      unassign(*Intf, VRM->getPhys(Intf->reg));
+      ++NumEvicted;
+      NewVRegs.push_back(Intf);
+    }
+  }
+  return BestPhys;
 }
 
 
@@ -426,8 +506,13 @@ float RAGreedy::calcInterferenceInfo(LiveInterval &VirtReg, unsigned PhysReg) {
         if (!IntI.valid())
           break;
         // Not live in, but before the first use.
-        if (IntI.start() < BI.FirstUse)
+        if (IntI.start() < BI.FirstUse) {
           BC.Entry = SpillPlacement::PrefSpill;
+          // If the block contains a kill from an earlier split, never split
+          // again in the same block.
+          if (!BI.LiveThrough && !SA->isOriginalEndpoint(BI.Kill))
+            BC.Entry = SpillPlacement::MustSpill;
+        }
       }
 
       // Does interference overlap the uses in the entry segment
@@ -458,8 +543,12 @@ float RAGreedy::calcInterferenceInfo(LiveInterval &VirtReg, unsigned PhysReg) {
           IntI.advanceTo(BI.LastUse);
           if (!IntI.valid())
             break;
-          if (IntI.start() < Stop)
+          if (IntI.start() < Stop) {
             BC.Exit = SpillPlacement::PrefSpill;
+            // Avoid splitting twice in the same block.
+            if (!BI.LiveThrough && !SA->isOriginalEndpoint(BI.Def))
+              BC.Exit = SpillPlacement::MustSpill;
+          }
         }
       }
     }
@@ -1221,12 +1310,22 @@ unsigned RAGreedy::selectOrSplit(LiveInterval &VirtReg,
       return PhysReg;
   }
 
-  // Try to reassign interferences.
-  if (unsigned PhysReg = tryReassignOrEvict(VirtReg, Order, NewVRegs))
+  if (unsigned PhysReg = tryReassign(VirtReg, Order, NewVRegs))
+    return PhysReg;
+
+  if (unsigned PhysReg = tryEvict(VirtReg, Order, NewVRegs))
     return PhysReg;
 
   assert(NewVRegs.empty() && "Cannot append to existing NewVRegs");
 
+  // The first time we see a live range, don't try to split or spill.
+  // Wait until the second time, when all smaller ranges have been allocated.
+  // This gives a better picture of the interference to split around.
+  if (Generation[VirtReg.reg] == 1) {
+    NewVRegs.push_back(&VirtReg);
+    return 0;
+  }
+
   // Try splitting VirtReg or interferences.
   unsigned PhysReg = trySplit(VirtReg, Order, NewVRegs);
   if (PhysReg || !NewVRegs.empty())
diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 90356021f602..9cc70a30927d 100644
--- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -279,8 +279,8 @@ namespace {
 
     /// getShiftAmountTy - Returns a type large enough to hold any valid
     /// shift amount - before type legalization these can be huge.
-    EVT getShiftAmountTy() {
-      return LegalTypes ? TLI.getShiftAmountTy() : TLI.getPointerTy();
+    EVT getShiftAmountTy(EVT LHSTy) {
+      return LegalTypes ? TLI.getShiftAmountTy(LHSTy) : TLI.getPointerTy();
     }
 
     /// isTypeLegal - This method returns true if we are running before type
@@ -670,7 +670,7 @@ SDValue DAGCombiner::PromoteOperand(SDValue Op, EVT PVT, bool &Replace) {
   if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Op)) {
     EVT MemVT = LD->getMemoryVT();
     ISD::LoadExtType ExtType = ISD::isNON_EXTLoad(LD)
-      ? (TLI.isLoadExtLegal(ISD::ZEXTLOAD, MemVT) ? ISD::ZEXTLOAD 
+      ? (TLI.isLoadExtLegal(ISD::ZEXTLOAD, MemVT) ? ISD::ZEXTLOAD
                                                   : ISD::EXTLOAD)
       : LD->getExtensionType();
     Replace = true;
@@ -894,7 +894,7 @@ bool DAGCombiner::PromoteLoad(SDValue Op) {
     LoadSDNode *LD = cast<LoadSDNode>(N);
     EVT MemVT = LD->getMemoryVT();
     ISD::LoadExtType ExtType = ISD::isNON_EXTLoad(LD)
-      ? (TLI.isLoadExtLegal(ISD::ZEXTLOAD, MemVT) ? ISD::ZEXTLOAD 
+      ? (TLI.isLoadExtLegal(ISD::ZEXTLOAD, MemVT) ? ISD::ZEXTLOAD
                                                   : ISD::EXTLOAD)
       : LD->getExtensionType();
     SDValue NewLD = DAG.getExtLoad(ExtType, dl, PVT,
@@ -1521,7 +1521,7 @@ SDValue DAGCombiner::visitADDE(SDNode *N) {
 // Since it may not be valid to emit a fold to zero for vector initializers
 // check if we can before folding.
 static SDValue tryFoldToZero(DebugLoc DL, const TargetLowering &TLI, EVT VT,
-                             SelectionDAG &DAG, bool LegalOperations) {                            
+                             SelectionDAG &DAG, bool LegalOperations) {
   if (!VT.isVector()) {
     return DAG.getConstant(0, VT);
   } else if (!LegalOperations || TLI.isOperationLegal(ISD::BUILD_VECTOR, VT)) {
@@ -1647,7 +1647,7 @@ SDValue DAGCombiner::visitMUL(SDNode *N) {
   if (N1C && N1C->getAPIntValue().isPowerOf2())
     return DAG.getNode(ISD::SHL, N->getDebugLoc(), VT, N0,
                        DAG.getConstant(N1C->getAPIntValue().logBase2(),
-                                       getShiftAmountTy()));
+                                       getShiftAmountTy(N0.getValueType())));
   // fold (mul x, -(1 << c)) -> -(x << c) or (-x) << c
   if (N1C && (-N1C->getAPIntValue()).isPowerOf2()) {
     unsigned Log2Val = (-N1C->getAPIntValue()).logBase2();
@@ -1656,7 +1656,8 @@ SDValue DAGCombiner::visitMUL(SDNode *N) {
     return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT,
                        DAG.getConstant(0, VT),
                        DAG.getNode(ISD::SHL, N->getDebugLoc(), VT, N0,
-                            DAG.getConstant(Log2Val, getShiftAmountTy())));
+                            DAG.getConstant(Log2Val,
+                                      getShiftAmountTy(N0.getValueType()))));
   }
   // (mul (shl X, c1), c2) -> (mul X, c2 << c1)
   if (N1C && N0.getOpcode() == ISD::SHL &&
@@ -1753,18 +1754,18 @@ SDValue DAGCombiner::visitSDIV(SDNode *N) {
     // Splat the sign bit into the register
     SDValue SGN = DAG.getNode(ISD::SRA, N->getDebugLoc(), VT, N0,
                               DAG.getConstant(VT.getSizeInBits()-1,
-                                              getShiftAmountTy()));
+                                       getShiftAmountTy(N0.getValueType())));
     AddToWorkList(SGN.getNode());
 
     // Add (N0 < 0) ? abs2 - 1 : 0;
     SDValue SRL = DAG.getNode(ISD::SRL, N->getDebugLoc(), VT, SGN,
                               DAG.getConstant(VT.getSizeInBits() - lg2,
-                                              getShiftAmountTy()));
+                                       getShiftAmountTy(SGN.getValueType())));
     SDValue ADD = DAG.getNode(ISD::ADD, N->getDebugLoc(), VT, N0, SRL);
     AddToWorkList(SRL.getNode());
     AddToWorkList(ADD.getNode());    // Divide by pow2
     SDValue SRA = DAG.getNode(ISD::SRA, N->getDebugLoc(), VT, ADD,
-                              DAG.getConstant(lg2, getShiftAmountTy()));
+                  DAG.getConstant(lg2, getShiftAmountTy(ADD.getValueType())));
 
     // If we're dividing by a positive value, we're done.  Otherwise, we must
     // negate the result.
@@ -1814,7 +1815,7 @@ SDValue DAGCombiner::visitUDIV(SDNode *N) {
   if (N1C && N1C->getAPIntValue().isPowerOf2())
     return DAG.getNode(ISD::SRL, N->getDebugLoc(), VT, N0,
                        DAG.getConstant(N1C->getAPIntValue().logBase2(),
-                                       getShiftAmountTy()));
+                                       getShiftAmountTy(N0.getValueType())));
   // fold (udiv x, (shl c, y)) -> x >>u (log2(c)+y) iff c is power of 2
   if (N1.getOpcode() == ISD::SHL) {
     if (ConstantSDNode *SHC = dyn_cast<ConstantSDNode>(N1.getOperand(0))) {
@@ -1955,7 +1956,7 @@ SDValue DAGCombiner::visitMULHS(SDNode *N) {
   if (N1C && N1C->getAPIntValue() == 1)
     return DAG.getNode(ISD::SRA, N->getDebugLoc(), N0.getValueType(), N0,
                        DAG.getConstant(N0.getValueType().getSizeInBits() - 1,
-                                       getShiftAmountTy()));
+                                       getShiftAmountTy(N0.getValueType())));
   // fold (mulhs x, undef) -> 0
   if (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF)
     return DAG.getConstant(0, VT);
@@ -1971,11 +1972,11 @@ SDValue DAGCombiner::visitMULHS(SDNode *N) {
       N1 = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N1);
       N1 = DAG.getNode(ISD::MUL, DL, NewVT, N0, N1);
       N1 = DAG.getNode(ISD::SRL, DL, NewVT, N1,
-                       DAG.getConstant(SimpleSize, getShiftAmountTy()));
+            DAG.getConstant(SimpleSize, getShiftAmountTy(N1.getValueType())));
       return DAG.getNode(ISD::TRUNCATE, DL, VT, N1);
     }
   }
-  
+
   return SDValue();
 }
 
@@ -2007,11 +2008,11 @@ SDValue DAGCombiner::visitMULHU(SDNode *N) {
       N1 = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N1);
       N1 = DAG.getNode(ISD::MUL, DL, NewVT, N0, N1);
       N1 = DAG.getNode(ISD::SRL, DL, NewVT, N1,
-                       DAG.getConstant(SimpleSize, getShiftAmountTy()));
+            DAG.getConstant(SimpleSize, getShiftAmountTy(N1.getValueType())));
       return DAG.getNode(ISD::TRUNCATE, DL, VT, N1);
     }
   }
-  
+
   return SDValue();
 }
 
@@ -2090,14 +2091,14 @@ SDValue DAGCombiner::visitSMUL_LOHI(SDNode *N) {
       Lo = DAG.getNode(ISD::MUL, DL, NewVT, Lo, Hi);
       // Compute the high part as N1.
       Hi = DAG.getNode(ISD::SRL, DL, NewVT, Lo,
-                       DAG.getConstant(SimpleSize, getShiftAmountTy()));
+            DAG.getConstant(SimpleSize, getShiftAmountTy(Lo.getValueType())));
       Hi = DAG.getNode(ISD::TRUNCATE, DL, VT, Hi);
       // Compute the low part as N0.
       Lo = DAG.getNode(ISD::TRUNCATE, DL, VT, Lo);
       return CombineTo(N, Lo, Hi);
     }
   }
-  
+
   return SDValue();
 }
 
@@ -2107,7 +2108,7 @@ SDValue DAGCombiner::visitUMUL_LOHI(SDNode *N) {
 
   EVT VT = N->getValueType(0);
   DebugLoc DL = N->getDebugLoc();
-  
+
   // If the type twice as wide is legal, transform the mulhu to a wider multiply
   // plus a shift.
   if (VT.isSimple() && !VT.isVector()) {
@@ -2120,14 +2121,14 @@ SDValue DAGCombiner::visitUMUL_LOHI(SDNode *N) {
       Lo = DAG.getNode(ISD::MUL, DL, NewVT, Lo, Hi);
       // Compute the high part as N1.
       Hi = DAG.getNode(ISD::SRL, DL, NewVT, Lo,
-                       DAG.getConstant(SimpleSize, getShiftAmountTy()));
+            DAG.getConstant(SimpleSize, getShiftAmountTy(Lo.getValueType())));
       Hi = DAG.getNode(ISD::TRUNCATE, DL, VT, Hi);
       // Compute the low part as N0.
       Lo = DAG.getNode(ISD::TRUNCATE, DL, VT, Lo);
       return CombineTo(N, Lo, Hi);
     }
   }
-  
+
   return SDValue();
 }
 
@@ -3004,7 +3005,7 @@ SDValue DAGCombiner::visitSHL(SDNode *N) {
               N0.getOpcode() == ISD::SIGN_EXTEND) &&
       N0.getOperand(0).getOpcode() == ISD::SHL &&
       isa<ConstantSDNode>(N0.getOperand(0)->getOperand(1))) {
-    uint64_t c1 = 
+    uint64_t c1 =
       cast<ConstantSDNode>(N0.getOperand(0)->getOperand(1))->getZExtValue();
     uint64_t c2 = N1C->getZExtValue();
     EVT InnerShiftVT = N0.getOperand(0).getValueType();
@@ -3133,7 +3134,8 @@ SDValue DAGCombiner::visitSRA(SDNode *N) {
           TLI.isOperationLegalOrCustom(ISD::TRUNCATE, VT) &&
           TLI.isTruncateFree(VT, TruncVT)) {
 
-          SDValue Amt = DAG.getConstant(ShiftAmt, getShiftAmountTy());
+          SDValue Amt = DAG.getConstant(ShiftAmt,
+              getShiftAmountTy(N0.getOperand(0).getValueType()));
           SDValue Shift = DAG.getNode(ISD::SRL, N0.getDebugLoc(), VT,
                                       N0.getOperand(0), Amt);
           SDValue Trunc = DAG.getNode(ISD::TRUNCATE, N0.getDebugLoc(), TruncVT,
@@ -3180,7 +3182,7 @@ SDValue DAGCombiner::visitSRA(SDNode *N) {
         LargeShiftAmt->getZExtValue()) {
       SDValue Amt =
         DAG.getConstant(LargeShiftAmt->getZExtValue() + N1C->getZExtValue(),
-                        getShiftAmountTy());
+              getShiftAmountTy(N0.getOperand(0).getOperand(0).getValueType()));
       SDValue SRA = DAG.getNode(ISD::SRA, N->getDebugLoc(), LargeVT,
                                 N0.getOperand(0).getOperand(0), Amt);
       return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, SRA);
@@ -3245,7 +3247,7 @@ SDValue DAGCombiner::visitSRL(SDNode *N) {
   if (N1C && N0.getOpcode() == ISD::TRUNCATE &&
       N0.getOperand(0).getOpcode() == ISD::SRL &&
       isa<ConstantSDNode>(N0.getOperand(0)->getOperand(1))) {
-    uint64_t c1 = 
+    uint64_t c1 =
       cast<ConstantSDNode>(N0.getOperand(0)->getOperand(1))->getZExtValue();
     uint64_t c2 = N1C->getZExtValue();
     EVT InnerShiftVT = N0.getOperand(0).getValueType();
@@ -3256,7 +3258,7 @@ SDValue DAGCombiner::visitSRL(SDNode *N) {
       if (c1 + c2 >= InnerShiftSize)
         return DAG.getConstant(0, VT);
       return DAG.getNode(ISD::TRUNCATE, N0->getDebugLoc(), VT,
-                         DAG.getNode(ISD::SRL, N0->getDebugLoc(), InnerShiftVT, 
+                         DAG.getNode(ISD::SRL, N0->getDebugLoc(), InnerShiftVT,
                                      N0.getOperand(0)->getOperand(0),
                                      DAG.getConstant(c1 + c2, ShiftCountVT)));
     }
@@ -3320,7 +3322,7 @@ SDValue DAGCombiner::visitSRL(SDNode *N) {
 
       if (ShAmt) {
         Op = DAG.getNode(ISD::SRL, N0.getDebugLoc(), VT, Op,
-                         DAG.getConstant(ShAmt, getShiftAmountTy()));
+                  DAG.getConstant(ShAmt, getShiftAmountTy(Op.getValueType())));
         AddToWorkList(Op.getNode());
       }
 
@@ -3685,7 +3687,9 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
   }
 
   // fold (sext (load x)) -> (sext (truncate (sextload x)))
-  if (ISD::isNON_EXTLoad(N0.getNode()) &&
+  // None of the supported targets knows how to perform load and sign extend
+  // in one instruction.  We only perform this transformation on scalars.
+  if (ISD::isNON_EXTLoad(N0.getNode()) && !VT.isVector() &&
       ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) ||
        TLI.isLoadExtLegal(ISD::SEXTLOAD, N0.getValueType()))) {
     bool DoXform = true;
@@ -3887,7 +3891,9 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
   }
 
   // fold (zext (load x)) -> (zext (truncate (zextload x)))
-  if (ISD::isNON_EXTLoad(N0.getNode()) &&
+  // None of the supported targets knows how to perform load and vector_zext
+  // in one instruction.  We only perform this transformation on scalar zext.
+  if (ISD::isNON_EXTLoad(N0.getNode()) && !VT.isVector() &&
       ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) ||
        TLI.isLoadExtLegal(ISD::ZEXTLOAD, N0.getValueType()))) {
     bool DoXform = true;
@@ -4021,11 +4027,11 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
     }
 
     DebugLoc DL = N->getDebugLoc();
-    
-    // Ensure that the shift amount is wide enough for the shifted value. 
+
+    // Ensure that the shift amount is wide enough for the shifted value.
     if (VT.getSizeInBits() >= 256)
       ShAmt = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, ShAmt);
-    
+
     return DAG.getNode(N0.getOpcode(), DL, VT,
                        DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0)),
                        ShAmt);
@@ -4094,7 +4100,9 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) {
   }
 
   // fold (aext (load x)) -> (aext (truncate (extload x)))
-  if (ISD::isNON_EXTLoad(N0.getNode()) &&
+  // None of the supported targets knows how to perform load and any_ext
+  // in one instruction.  We only perform this transformation on scalars.
+  if (ISD::isNON_EXTLoad(N0.getNode()) && !VT.isVector() &&
       ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) ||
        TLI.isLoadExtLegal(ISD::EXTLOAD, N0.getValueType()))) {
     bool DoXform = true;
@@ -4272,12 +4280,12 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) {
     return SDValue();
 
   unsigned EVTBits = ExtVT.getSizeInBits();
-  
+
   // Do not generate loads of non-round integer types since these can
   // be expensive (and would be wrong if the type is not byte sized).
   if (!ExtVT.isRound())
     return SDValue();
-  
+
   unsigned ShAmt = 0;
   if (N0.getOpcode() == ISD::SRL && N0.hasOneUse()) {
     if (ConstantSDNode *N01 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
@@ -4292,7 +4300,7 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) {
 
       // At this point, we must have a load or else we can't do the transform.
       if (!isa<LoadSDNode>(N0)) return SDValue();
-      
+
       // If the shift amount is larger than the input type then we're not
       // accessing any of the loaded bytes.  If the load was a zextload/extload
       // then the result of the shift+trunc is zero/undef (handled elsewhere).
@@ -4313,18 +4321,18 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) {
       N0 = N0.getOperand(0);
     }
   }
-  
+
   // If we haven't found a load, we can't narrow it.  Don't transform one with
   // multiple uses, this would require adding a new load.
   if (!isa<LoadSDNode>(N0) || !N0.hasOneUse() ||
       // Don't change the width of a volatile load.
       cast<LoadSDNode>(N0)->isVolatile())
     return SDValue();
-  
+
   // Verify that we are actually reducing a load width here.
   if (cast<LoadSDNode>(N0)->getMemoryVT().getSizeInBits() < EVTBits)
     return SDValue();
-  
+
   LoadSDNode *LN0 = cast<LoadSDNode>(N0);
   EVT PtrType = N0.getOperand(1).getValueType();
 
@@ -4362,7 +4370,7 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) {
   // Shift the result left, if we've swallowed a left shift.
   SDValue Result = Load;
   if (ShLeftAmt != 0) {
-    EVT ShImmTy = getShiftAmountTy();
+    EVT ShImmTy = getShiftAmountTy(Result.getValueType());
     if (!isUIntN(ShImmTy.getSizeInBits(), ShLeftAmt))
       ShImmTy = VT;
     Result = DAG.getNode(ISD::SHL, N0.getDebugLoc(), VT,
@@ -4504,14 +4512,17 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
   }
 
   // See if we can simplify the input to this truncate through knowledge that
-  // only the low bits are being used.  For example "trunc (or (shl x, 8), y)"
-  // -> trunc y
-  SDValue Shorter =
-    GetDemandedBits(N0, APInt::getLowBitsSet(N0.getValueSizeInBits(),
-                                             VT.getSizeInBits()));
-  if (Shorter.getNode())
-    return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, Shorter);
-
+  // only the low bits are being used.
+  // For example "trunc (or (shl x, 8), y)" // -> trunc y
+  // Currenly we only perform this optimization on scalars because vectors
+  // may have different active low bits.
+  if (!VT.isVector()) {
+    SDValue Shorter =
+      GetDemandedBits(N0, APInt::getLowBitsSet(N0.getValueSizeInBits(),
+                                               VT.getSizeInBits()));
+    if (Shorter.getNode())
+      return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, Shorter);
+  }
   // fold (truncate (load x)) -> (smaller load x)
   // fold (truncate (srl (load x), c)) -> (smaller load (x+c/evtbits))
   if (!LegalTypes || TLI.isTypeDesirableForOp(N0.getOpcode(), VT)) {
@@ -5975,7 +5986,8 @@ ShrinkLoadReplaceStoreWithStore(const std::pair<unsigned, unsigned> &MaskInfo,
   // shifted by ByteShift and truncated down to NumBytes.
   if (ByteShift)
     IVal = DAG.getNode(ISD::SRL, IVal->getDebugLoc(), IVal.getValueType(), IVal,
-                       DAG.getConstant(ByteShift*8, DC->getShiftAmountTy()));
+                       DAG.getConstant(ByteShift*8,
+                                    DC->getShiftAmountTy(IVal.getValueType())));
 
   // Figure out the offset for the store and the alignment of the access.
   unsigned StOffset;
@@ -6390,7 +6402,7 @@ SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) {
 
   EVT VT = InVec.getValueType();
 
-  // If we can't generate a legal BUILD_VECTOR, exit 
+  // If we can't generate a legal BUILD_VECTOR, exit
   if (LegalOperations && !TLI.isOperationLegal(ISD::BUILD_VECTOR, VT))
     return SDValue();
 
@@ -7098,7 +7110,8 @@ SDValue DAGCombiner::SimplifySelectCC(DebugLoc DL, SDValue N0, SDValue N1,
       if (N2C && ((N2C->getAPIntValue() & (N2C->getAPIntValue()-1)) == 0)) {
         unsigned ShCtV = N2C->getAPIntValue().logBase2();
         ShCtV = XType.getSizeInBits()-ShCtV-1;
-        SDValue ShCt = DAG.getConstant(ShCtV, getShiftAmountTy());
+        SDValue ShCt = DAG.getConstant(ShCtV,
+                                       getShiftAmountTy(N0.getValueType()));
         SDValue Shift = DAG.getNode(ISD::SRL, N0.getDebugLoc(),
                                     XType, N0, ShCt);
         AddToWorkList(Shift.getNode());
@@ -7114,7 +7127,7 @@ SDValue DAGCombiner::SimplifySelectCC(DebugLoc DL, SDValue N0, SDValue N1,
       SDValue Shift = DAG.getNode(ISD::SRA, N0.getDebugLoc(),
                                   XType, N0,
                                   DAG.getConstant(XType.getSizeInBits()-1,
-                                                  getShiftAmountTy()));
+                                         getShiftAmountTy(N0.getValueType())));
       AddToWorkList(Shift.getNode());
 
       if (XType.bitsGT(AType)) {
@@ -7142,13 +7155,15 @@ SDValue DAGCombiner::SimplifySelectCC(DebugLoc DL, SDValue N0, SDValue N1,
       // Shift the tested bit over the sign bit.
       APInt AndMask = ConstAndRHS->getAPIntValue();
       SDValue ShlAmt =
-        DAG.getConstant(AndMask.countLeadingZeros(), getShiftAmountTy());
+        DAG.getConstant(AndMask.countLeadingZeros(),
+                        getShiftAmountTy(AndLHS.getValueType()));
       SDValue Shl = DAG.getNode(ISD::SHL, N0.getDebugLoc(), VT, AndLHS, ShlAmt);
 
       // Now arithmetic right shift it all the way over, so the result is either
       // all-ones, or zero.
       SDValue ShrAmt =
-        DAG.getConstant(AndMask.getBitWidth()-1, getShiftAmountTy());
+        DAG.getConstant(AndMask.getBitWidth()-1,
+                        getShiftAmountTy(Shl.getValueType()));
       SDValue Shr = DAG.getNode(ISD::SRA, N0.getDebugLoc(), VT, Shl, ShrAmt);
 
       return DAG.getNode(ISD::AND, DL, VT, Shr, N3);
@@ -7192,7 +7207,7 @@ SDValue DAGCombiner::SimplifySelectCC(DebugLoc DL, SDValue N0, SDValue N1,
     // shl setcc result by log2 n2c
     return DAG.getNode(ISD::SHL, DL, N2.getValueType(), Temp,
                        DAG.getConstant(N2C->getAPIntValue().logBase2(),
-                                       getShiftAmountTy()));
+                                       getShiftAmountTy(Temp.getValueType())));
   }
 
   // Check to see if this is the equivalent of setcc
@@ -7215,7 +7230,7 @@ SDValue DAGCombiner::SimplifySelectCC(DebugLoc DL, SDValue N0, SDValue N1,
       SDValue Ctlz = DAG.getNode(ISD::CTLZ, N0.getDebugLoc(), XType, N0);
       return DAG.getNode(ISD::SRL, DL, XType, Ctlz,
                          DAG.getConstant(Log2_32(XType.getSizeInBits()),
-                                         getShiftAmountTy()));
+                                       getShiftAmountTy(Ctlz.getValueType())));
     }
     // fold (setgt X, 0) -> (srl (and (-X, ~X), size(X)-1))
     if (N1C && N1C->isNullValue() && CC == ISD::SETGT) {
@@ -7225,13 +7240,13 @@ SDValue DAGCombiner::SimplifySelectCC(DebugLoc DL, SDValue N0, SDValue N1,
       return DAG.getNode(ISD::SRL, DL, XType,
                          DAG.getNode(ISD::AND, DL, XType, NegN0, NotN0),
                          DAG.getConstant(XType.getSizeInBits()-1,
-                                         getShiftAmountTy()));
+                                         getShiftAmountTy(XType)));
     }
     // fold (setgt X, -1) -> (xor (srl (X, size(X)-1), 1))
     if (N1C && N1C->isAllOnesValue() && CC == ISD::SETGT) {
       SDValue Sign = DAG.getNode(ISD::SRL, N0.getDebugLoc(), XType, N0,
                                  DAG.getConstant(XType.getSizeInBits()-1,
-                                                 getShiftAmountTy()));
+                                         getShiftAmountTy(N0.getValueType())));
       return DAG.getNode(ISD::XOR, DL, XType, Sign, DAG.getConstant(1, XType));
     }
   }
@@ -7258,7 +7273,7 @@ SDValue DAGCombiner::SimplifySelectCC(DebugLoc DL, SDValue N0, SDValue N1,
       SDValue Shift = DAG.getNode(ISD::SRA, N0.getDebugLoc(), XType,
                                   N0,
                                   DAG.getConstant(XType.getSizeInBits()-1,
-                                                  getShiftAmountTy()));
+                                         getShiftAmountTy(N0.getValueType())));
       SDValue Add = DAG.getNode(ISD::ADD, N0.getDebugLoc(),
                                 XType, N0, Shift);
       AddToWorkList(Shift.getNode());
diff --git a/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp b/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
index 98582ba99f14..2ae3286829dd 100644
--- a/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
+++ b/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
@@ -219,6 +219,7 @@ void FunctionLoweringInfo::clear() {
   CatchInfoFound.clear();
 #endif
   LiveOutRegInfo.clear();
+  VisitedBBs.clear();
   ArgDbgValues.clear();
   ByValArgFrameIndexMap.clear();
   RegFixups.clear();
@@ -254,6 +255,123 @@ unsigned FunctionLoweringInfo::CreateRegs(const Type *Ty) {
   return FirstReg;
 }
 
+/// GetLiveOutRegInfo - Gets LiveOutInfo for a register, returning NULL if the
+/// register is a PHI destination and the PHI's LiveOutInfo is not valid. If
+/// the register's LiveOutInfo is for a smaller bit width, it is extended to
+/// the larger bit width by zero extension. The bit width must be no smaller
+/// than the LiveOutInfo's existing bit width.
+const FunctionLoweringInfo::LiveOutInfo *
+FunctionLoweringInfo::GetLiveOutRegInfo(unsigned Reg, unsigned BitWidth) {
+  if (!LiveOutRegInfo.inBounds(Reg))
+    return NULL;
+
+  LiveOutInfo *LOI = &LiveOutRegInfo[Reg];
+  if (!LOI->IsValid)
+    return NULL;
+
+  if (BitWidth > LOI->KnownZero.getBitWidth()) {
+    LOI->NumSignBits = 1;
+    LOI->KnownZero = LOI->KnownZero.zextOrTrunc(BitWidth);
+    LOI->KnownOne = LOI->KnownOne.zextOrTrunc(BitWidth);
+  }
+
+  return LOI;
+}
+
+/// ComputePHILiveOutRegInfo - Compute LiveOutInfo for a PHI's destination
+/// register based on the LiveOutInfo of its operands.
+void FunctionLoweringInfo::ComputePHILiveOutRegInfo(const PHINode *PN) {
+  const Type *Ty = PN->getType();
+  if (!Ty->isIntegerTy() || Ty->isVectorTy())
+    return;
+
+  SmallVector<EVT, 1> ValueVTs;
+  ComputeValueVTs(TLI, Ty, ValueVTs);
+  assert(ValueVTs.size() == 1 &&
+         "PHIs with non-vector integer types should have a single VT.");
+  EVT IntVT = ValueVTs[0];
+
+  if (TLI.getNumRegisters(PN->getContext(), IntVT) != 1)
+    return;
+  IntVT = TLI.getTypeToTransformTo(PN->getContext(), IntVT);
+  unsigned BitWidth = IntVT.getSizeInBits();
+
+  unsigned DestReg = ValueMap[PN];
+  if (!TargetRegisterInfo::isVirtualRegister(DestReg))
+    return;
+  LiveOutRegInfo.grow(DestReg);
+  LiveOutInfo &DestLOI = LiveOutRegInfo[DestReg];
+
+  Value *V = PN->getIncomingValue(0);
+  if (isa<UndefValue>(V) || isa<ConstantExpr>(V)) {
+    DestLOI.NumSignBits = 1;
+    APInt Zero(BitWidth, 0);
+    DestLOI.KnownZero = Zero;
+    DestLOI.KnownOne = Zero;
+    return;
+  }
+
+  if (ConstantInt *CI = dyn_cast<ConstantInt>(V)) {
+    APInt Val = CI->getValue().zextOrTrunc(BitWidth);
+    DestLOI.NumSignBits = Val.getNumSignBits();
+    DestLOI.KnownZero = ~Val;
+    DestLOI.KnownOne = Val;
+  } else {
+    assert(ValueMap.count(V) && "V should have been placed in ValueMap when its"
+                                "CopyToReg node was created.");
+    unsigned SrcReg = ValueMap[V];
+    if (!TargetRegisterInfo::isVirtualRegister(SrcReg)) {
+      DestLOI.IsValid = false;
+      return;
+    }
+    const LiveOutInfo *SrcLOI = GetLiveOutRegInfo(SrcReg, BitWidth);
+    if (!SrcLOI) {
+      DestLOI.IsValid = false;
+      return;
+    }
+    DestLOI = *SrcLOI;
+  }
+
+  assert(DestLOI.KnownZero.getBitWidth() == BitWidth &&
+         DestLOI.KnownOne.getBitWidth() == BitWidth &&
+         "Masks should have the same bit width as the type.");
+
+  for (unsigned i = 1, e = PN->getNumIncomingValues(); i != e; ++i) {
+    Value *V = PN->getIncomingValue(i);
+    if (isa<UndefValue>(V) || isa<ConstantExpr>(V)) {
+      DestLOI.NumSignBits = 1;
+      APInt Zero(BitWidth, 0);
+      DestLOI.KnownZero = Zero;
+      DestLOI.KnownOne = Zero;
+      return;      
+    }
+
+    if (ConstantInt *CI = dyn_cast<ConstantInt>(V)) {
+      APInt Val = CI->getValue().zextOrTrunc(BitWidth);
+      DestLOI.NumSignBits = std::min(DestLOI.NumSignBits, Val.getNumSignBits());
+      DestLOI.KnownZero &= ~Val;
+      DestLOI.KnownOne &= Val;
+      continue;
+    }
+
+    assert(ValueMap.count(V) && "V should have been placed in ValueMap when "
+                                "its CopyToReg node was created.");
+    unsigned SrcReg = ValueMap[V];
+    if (!TargetRegisterInfo::isVirtualRegister(SrcReg)) {
+      DestLOI.IsValid = false;
+      return;
+    }
+    const LiveOutInfo *SrcLOI = GetLiveOutRegInfo(SrcReg, BitWidth);
+    if (!SrcLOI) {
+      DestLOI.IsValid = false;
+      return;
+    }
+    DestLOI.NumSignBits = std::min(DestLOI.NumSignBits, SrcLOI->NumSignBits);
+    DestLOI.KnownZero &= SrcLOI->KnownZero;
+    DestLOI.KnownOne &= SrcLOI->KnownOne;
+  }
+}
+
 /// setByValArgumentFrameIndex - Record frame index for the byval
 /// argument. This overrides previous frame index entry for this argument,
 /// if any.
diff --git a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
index 49c862ce3e0b..f08528fe2dc3 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@@ -87,7 +87,7 @@ class SelectionDAGLegalize {
     // If someone requests legalization of the new node, return itself.
     if (From != To)
       LegalizedNodes.insert(std::make_pair(To, To));
-    
+
     // Transfer SDDbgValues.
     DAG.TransferDbgValues(From, To);
   }
@@ -498,7 +498,8 @@ SDValue ExpandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG,
   int IncrementSize = NumBits / 8;
 
   // Divide the stored value in two parts.
-  SDValue ShiftAmount = DAG.getConstant(NumBits, TLI.getShiftAmountTy());
+  SDValue ShiftAmount = DAG.getConstant(NumBits,
+                                      TLI.getShiftAmountTy(Val.getValueType()));
   SDValue Lo = Val;
   SDValue Hi = DAG.getNode(ISD::SRL, dl, VT, Val, ShiftAmount);
 
@@ -645,7 +646,8 @@ SDValue ExpandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG,
   }
 
   // aggregate the two parts
-  SDValue ShiftAmount = DAG.getConstant(NumBits, TLI.getShiftAmountTy());
+  SDValue ShiftAmount = DAG.getConstant(NumBits,
+                                       TLI.getShiftAmountTy(Hi.getValueType()));
   SDValue Result = DAG.getNode(ISD::SHL, dl, VT, Hi, ShiftAmount);
   Result = DAG.getNode(ISD::OR, dl, VT, Result, Lo);
 
@@ -1264,7 +1266,8 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
 
         // Move the top bits to the right place.
         Hi = DAG.getNode(ISD::SHL, dl, Hi.getValueType(), Hi,
-                         DAG.getConstant(RoundWidth, TLI.getShiftAmountTy()));
+                         DAG.getConstant(RoundWidth,
+                                      TLI.getShiftAmountTy(Hi.getValueType())));
 
         // Join the hi and lo parts.
         Result = DAG.getNode(ISD::OR, dl, Node->getValueType(0), Lo, Hi);
@@ -1293,7 +1296,8 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
 
         // Move the top bits to the right place.
         Hi = DAG.getNode(ISD::SHL, dl, Hi.getValueType(), Hi,
-                         DAG.getConstant(ExtraWidth, TLI.getShiftAmountTy()));
+                         DAG.getConstant(ExtraWidth,
+                                      TLI.getShiftAmountTy(Hi.getValueType())));
 
         // Join the hi and lo parts.
         Result = DAG.getNode(ISD::OR, dl, Node->getValueType(0), Lo, Hi);
@@ -1482,7 +1486,8 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
           Tmp2 = DAG.getNode(ISD::ADD, dl, Tmp2.getValueType(), Tmp2,
                              DAG.getIntPtrConstant(IncrementSize));
           Hi = DAG.getNode(ISD::SRL, dl, Tmp3.getValueType(), Tmp3,
-                           DAG.getConstant(RoundWidth, TLI.getShiftAmountTy()));
+                           DAG.getConstant(RoundWidth,
+                                    TLI.getShiftAmountTy(Tmp3.getValueType())));
           Hi = DAG.getTruncStore(Tmp1, dl, Hi, Tmp2,
                              ST->getPointerInfo().getWithOffset(IncrementSize),
                                  ExtraVT, isVolatile, isNonTemporal,
@@ -1492,7 +1497,8 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
           // TRUNCSTORE:i24 X -> TRUNCSTORE:i16 (srl X, 8), TRUNCSTORE@+2:i8 X
           // Store the top RoundWidth bits.
           Hi = DAG.getNode(ISD::SRL, dl, Tmp3.getValueType(), Tmp3,
-                           DAG.getConstant(ExtraWidth, TLI.getShiftAmountTy()));
+                           DAG.getConstant(ExtraWidth,
+                                    TLI.getShiftAmountTy(Tmp3.getValueType())));
           Hi = DAG.getTruncStore(Tmp1, dl, Hi, Tmp2, ST->getPointerInfo(),
                                  RoundVT, isVolatile, isNonTemporal, Alignment);
 
@@ -1727,7 +1733,8 @@ SDValue SelectionDAGLegalize::ExpandFCOPYSIGN(SDNode* Node) {
       assert(BitShift < LoadTy.getSizeInBits() && "Pointer advanced wrong?");
       if (BitShift)
         SignBit = DAG.getNode(ISD::SHL, dl, LoadTy, SignBit,
-                              DAG.getConstant(BitShift,TLI.getShiftAmountTy()));
+                              DAG.getConstant(BitShift,
+                                 TLI.getShiftAmountTy(SignBit.getValueType())));
     }
   }
   // Now get the sign bit proper, by seeing whether the value is negative.
@@ -2207,7 +2214,8 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned,
     if (!isSigned) {
       SDValue Fast = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::f32, Op0);
 
-      SDValue ShiftConst = DAG.getConstant(1, TLI.getShiftAmountTy());
+      SDValue ShiftConst =
+          DAG.getConstant(1, TLI.getShiftAmountTy(Op0.getValueType()));
       SDValue Shr = DAG.getNode(ISD::SRL, dl, MVT::i64, Op0, ShiftConst);
       SDValue AndConst = DAG.getConstant(1, MVT::i64);
       SDValue And = DAG.getNode(ISD::AND, dl, MVT::i64, Op0, AndConst);
@@ -2226,7 +2234,6 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned,
     }
 
     // Otherwise, implement the fully general conversion.
-    EVT SHVT = TLI.getShiftAmountTy();
 
     SDValue And = DAG.getNode(ISD::AND, dl, MVT::i64, Op0,
          DAG.getConstant(UINT64_C(0xfffffffffffff800), MVT::i64));
@@ -2241,6 +2248,7 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned,
                    Op0, DAG.getConstant(UINT64_C(0x0020000000000000), MVT::i64),
                    ISD::SETUGE);
     SDValue Sel2 = DAG.getNode(ISD::SELECT, dl, MVT::i64, Ge, Sel, Op0);
+    EVT SHVT = TLI.getShiftAmountTy(Sel2.getValueType());
 
     SDValue Sh = DAG.getNode(ISD::SRL, dl, MVT::i64, Sel2,
                              DAG.getConstant(32, SHVT));
@@ -2387,7 +2395,7 @@ SDValue SelectionDAGLegalize::PromoteLegalFP_TO_INT(SDValue LegalOp,
 ///
 SDValue SelectionDAGLegalize::ExpandBSWAP(SDValue Op, DebugLoc dl) {
   EVT VT = Op.getValueType();
-  EVT SHVT = TLI.getShiftAmountTy();
+  EVT SHVT = TLI.getShiftAmountTy(VT);
   SDValue Tmp1, Tmp2, Tmp3, Tmp4, Tmp5, Tmp6, Tmp7, Tmp8;
   switch (VT.getSimpleVT().SimpleTy) {
   default: assert(0 && "Unhandled Expand type in BSWAP!");
@@ -2450,7 +2458,7 @@ SDValue SelectionDAGLegalize::ExpandBitCount(unsigned Opc, SDValue Op,
   default: assert(0 && "Cannot expand this yet!");
   case ISD::CTPOP: {
     EVT VT = Op.getValueType();
-    EVT ShVT = TLI.getShiftAmountTy();
+    EVT ShVT = TLI.getShiftAmountTy(VT);
     unsigned Len = VT.getSizeInBits();
 
     assert(VT.isInteger() && Len <= 128 && Len % 8 == 0 &&
@@ -2487,7 +2495,7 @@ SDValue SelectionDAGLegalize::ExpandBitCount(unsigned Opc, SDValue Op,
     Op = DAG.getNode(ISD::SRL, dl, VT,
                      DAG.getNode(ISD::MUL, dl, VT, Op, Mask01),
                      DAG.getConstant(Len - 8, ShVT));
-    
+
     return Op;
   }
   case ISD::CTLZ: {
@@ -2501,7 +2509,7 @@ SDValue SelectionDAGLegalize::ExpandBitCount(unsigned Opc, SDValue Op,
     //
     // but see also: http://www.hackersdelight.org/HDcode/nlz.cc
     EVT VT = Op.getValueType();
-    EVT ShVT = TLI.getShiftAmountTy();
+    EVT ShVT = TLI.getShiftAmountTy(VT);
     unsigned len = VT.getSizeInBits();
     for (unsigned i = 0; (1U << i) <= (len / 2); ++i) {
       SDValue Tmp3 = DAG.getConstant(1ULL << i, ShVT);
@@ -2737,7 +2745,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node,
     // SAR.  However, it is doubtful that any exist.
     EVT ExtraVT = cast<VTSDNode>(Node->getOperand(1))->getVT();
     EVT VT = Node->getValueType(0);
-    EVT ShiftAmountTy = TLI.getShiftAmountTy();
+    EVT ShiftAmountTy = TLI.getShiftAmountTy(VT);
     if (VT.isVector())
       ShiftAmountTy = VT;
     unsigned BitsDiff = VT.getScalarType().getSizeInBits() -
@@ -2901,7 +2909,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node,
       // 1 -> Hi
       Tmp1 = DAG.getNode(ISD::SRL, dl, OpTy, Node->getOperand(0),
                          DAG.getConstant(OpTy.getSizeInBits()/2,
-                                         TLI.getShiftAmountTy()));
+                    TLI.getShiftAmountTy(Node->getOperand(0).getValueType())));
       Tmp1 = DAG.getNode(ISD::TRUNCATE, dl, Node->getValueType(0), Tmp1);
     } else {
       // 0 -> Lo
@@ -3260,7 +3268,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node,
       assert(LC != RTLIB::UNKNOWN_LIBCALL && "Cannot expand this operation!");
       LHS = DAG.getNode(Ops[isSigned][2], dl, WideVT, LHS);
       RHS = DAG.getNode(Ops[isSigned][2], dl, WideVT, RHS);
-      
+
       SDValue Ret = ExpandLibCall(LC, Node, isSigned);
       BottomHalf = DAG.getNode(ISD::TRUNCATE, dl, VT, Ret);
       TopHalf = DAG.getNode(ISD::SRL, dl, Ret.getValueType(), Ret,
@@ -3268,7 +3276,8 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node,
       TopHalf = DAG.getNode(ISD::TRUNCATE, dl, VT, TopHalf);
     }
     if (isSigned) {
-      Tmp1 = DAG.getConstant(VT.getSizeInBits() - 1, TLI.getShiftAmountTy());
+      Tmp1 = DAG.getConstant(VT.getSizeInBits() - 1,
+                             TLI.getShiftAmountTy(BottomHalf.getValueType()));
       Tmp1 = DAG.getNode(ISD::SRA, dl, VT, BottomHalf, Tmp1);
       TopHalf = DAG.getSetCC(dl, TLI.getSetCCResultType(VT), TopHalf, Tmp1,
                              ISD::SETNE);
@@ -3286,7 +3295,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node,
     Tmp2 = DAG.getNode(ISD::ANY_EXTEND, dl, PairTy, Node->getOperand(1));
     Tmp2 = DAG.getNode(ISD::SHL, dl, PairTy, Tmp2,
                        DAG.getConstant(PairTy.getSizeInBits()/2,
-                                       TLI.getShiftAmountTy()));
+                                       TLI.getShiftAmountTy(PairTy)));
     Results.push_back(DAG.getNode(ISD::OR, dl, PairTy, Tmp1, Tmp2));
     break;
   }
@@ -3464,7 +3473,7 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node,
     Tmp1 = DAG.getNode(ISD::ZERO_EXTEND, dl, NVT, Node->getOperand(0));
     Tmp1 = DAG.getNode(ISD::BSWAP, dl, NVT, Tmp1);
     Tmp1 = DAG.getNode(ISD::SRL, dl, NVT, Tmp1,
-                          DAG.getConstant(DiffBits, TLI.getShiftAmountTy()));
+                          DAG.getConstant(DiffBits, TLI.getShiftAmountTy(NVT)));
     Results.push_back(Tmp1);
     break;
   }
diff --git a/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
index 27752123aac4..27a466b3a928 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
@@ -177,25 +177,27 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FCOPYSIGN(SDNode *N) {
   // First get the sign bit of second operand.
   SDValue SignBit = DAG.getNode(ISD::SHL, dl, RVT, DAG.getConstant(1, RVT),
                                   DAG.getConstant(RSize - 1,
-                                                  TLI.getShiftAmountTy()));
+                                                  TLI.getShiftAmountTy(RVT)));
   SignBit = DAG.getNode(ISD::AND, dl, RVT, RHS, SignBit);
 
   // Shift right or sign-extend it if the two operands have different types.
   int SizeDiff = RVT.getSizeInBits() - LVT.getSizeInBits();
   if (SizeDiff > 0) {
     SignBit = DAG.getNode(ISD::SRL, dl, RVT, SignBit,
-                          DAG.getConstant(SizeDiff, TLI.getShiftAmountTy()));
+                          DAG.getConstant(SizeDiff,
+                                 TLI.getShiftAmountTy(SignBit.getValueType())));
     SignBit = DAG.getNode(ISD::TRUNCATE, dl, LVT, SignBit);
   } else if (SizeDiff < 0) {
     SignBit = DAG.getNode(ISD::ANY_EXTEND, dl, LVT, SignBit);
     SignBit = DAG.getNode(ISD::SHL, dl, LVT, SignBit,
-                          DAG.getConstant(-SizeDiff, TLI.getShiftAmountTy()));
+                          DAG.getConstant(-SizeDiff,
+                                 TLI.getShiftAmountTy(SignBit.getValueType())));
   }
 
   // Clear the sign bit of the first operand.
   SDValue Mask = DAG.getNode(ISD::SHL, dl, LVT, DAG.getConstant(1, LVT),
                                DAG.getConstant(LSize - 1,
-                                               TLI.getShiftAmountTy()));
+                                               TLI.getShiftAmountTy(LVT)));
   Mask = DAG.getNode(ISD::SUB, dl, LVT, Mask, DAG.getConstant(1, LVT));
   LHS = DAG.getNode(ISD::AND, dl, LVT, LHS, Mask);
 
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 2fb2f2d8aa1e..9120288921e2 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -1420,7 +1420,7 @@ SDValue SelectionDAG::getMDNode(const MDNode *MD) {
 /// the target's desired shift amount type.
 SDValue SelectionDAG::getShiftAmountOperand(SDValue Op) {
   EVT OpTy = Op.getValueType();
-  MVT ShTy = TLI.getShiftAmountTy();
+  MVT ShTy = TLI.getShiftAmountTy(OpTy);
   if (OpTy == ShTy || OpTy.isVector()) return Op;
 
   ISD::NodeType Opcode = OpTy.bitsGT(ShTy) ?  ISD::TRUNCATE : ISD::ZERO_EXTEND;
@@ -2048,7 +2048,7 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask,
       return;
     }
     break;
-      
+
   default:
     // Allow the target to implement this method for its nodes.
     if (Op.getOpcode() >= ISD::BUILTIN_OP_END) {
@@ -2088,12 +2088,7 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, unsigned Depth) const{
 
   case ISD::Constant: {
     const APInt &Val = cast<ConstantSDNode>(Op)->getAPIntValue();
-    // If negative, return # leading ones.
-    if (Val.isNegative())
-      return Val.countLeadingOnes();
-
-    // Return # leading zeros.
-    return Val.countLeadingZeros();
+    return Val.getNumSignBits();
   }
 
   case ISD::SIGN_EXTEND:
@@ -2297,12 +2292,12 @@ bool SelectionDAG::isBaseWithConstantOffset(SDValue Op) const {
   if ((Op.getOpcode() != ISD::ADD && Op.getOpcode() != ISD::OR) ||
       !isa<ConstantSDNode>(Op.getOperand(1)))
     return false;
-  
-  if (Op.getOpcode() == ISD::OR && 
+
+  if (Op.getOpcode() == ISD::OR &&
       !MaskedValueIsZero(Op.getOperand(0),
                      cast<ConstantSDNode>(Op.getOperand(1))->getAPIntValue()))
     return false;
-  
+
   return true;
 }
 
@@ -2753,7 +2748,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT,
     // i8, which is easy to fall into in generic code that uses
     // TLI.getShiftAmount().
     assert(N2.getValueType().getSizeInBits() >=
-                   Log2_32_Ceil(N1.getValueType().getSizeInBits()) && 
+                   Log2_32_Ceil(N1.getValueType().getSizeInBits()) &&
            "Invalid use of small shift amount with oversized value!");
 
     // Always fold shifts of i1 values so the code generator doesn't need to
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index 452f5614b7bf..48d9bbb5132e 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -641,16 +641,17 @@ SDValue RegsForValue::getCopyFromRegs(SelectionDAG &DAG,
       // If the source register was virtual and if we know something about it,
       // add an assert node.
       if (!TargetRegisterInfo::isVirtualRegister(Regs[Part+i]) ||
-          !RegisterVT.isInteger() || RegisterVT.isVector() ||
-          !FuncInfo.LiveOutRegInfo.inBounds(Regs[Part+i]))
+          !RegisterVT.isInteger() || RegisterVT.isVector())
+        continue;
+
+      const FunctionLoweringInfo::LiveOutInfo *LOI =
+        FuncInfo.GetLiveOutRegInfo(Regs[Part+i]);
+      if (!LOI)
         continue;
-      
-      const FunctionLoweringInfo::LiveOutInfo &LOI =
-        FuncInfo.LiveOutRegInfo[Regs[Part+i]];
 
       unsigned RegSize = RegisterVT.getSizeInBits();
-      unsigned NumSignBits = LOI.NumSignBits;
-      unsigned NumZeroBits = LOI.KnownZero.countLeadingOnes();
+      unsigned NumSignBits = LOI->NumSignBits;
+      unsigned NumZeroBits = LOI->KnownZero.countLeadingOnes();
 
       // FIXME: We capture more information than the dag can represent.  For
       // now, just use the tightest assertzext/assertsext possible.
@@ -908,7 +909,7 @@ void SelectionDAGBuilder::resolveDanglingDebugInfo(const Value *V,
                               Val.getResNo(), Offset, dl, DbgSDNodeOrder);
         DAG.AddDbgValue(SDV, Val.getNode(), false);
       }
-    } else 
+    } else
       DEBUG(dbgs() << "Dropping debug info for " << DI);
     DanglingDebugInfoMap[V] = DanglingDebugInfo();
   }
@@ -1417,7 +1418,7 @@ void SelectionDAGBuilder::visitBr(const BranchInst &I) {
   //     jle foo
   //
   if (const BinaryOperator *BOp = dyn_cast<BinaryOperator>(CondVal)) {
-    if (!TLI.isJumpExpensive() && 
+    if (!TLI.isJumpExpensive() &&
         BOp->hasOneUse() &&
         (BOp->getOpcode() == Instruction::And ||
          BOp->getOpcode() == Instruction::Or)) {
@@ -1915,7 +1916,7 @@ bool SelectionDAGBuilder::handleJTSwitchCase(CaseRec& CR,
   DEBUG(dbgs() << "Lowering jump table\n"
                << "First entry: " << First << ". Last entry: " << Last << '\n'
                << "Range: " << Range
-               << "Size: " << TSize << ". Density: " << Density << "\n\n");
+               << ". Size: " << TSize << ". Density: " << Density << "\n\n");
 
   // Get the MachineFunction which holds the current MBB.  This is used when
   // inserting any additional MBBs necessary to represent the switch.
@@ -2408,19 +2409,19 @@ void SelectionDAGBuilder::visitBinary(const User &I, unsigned OpCode) {
 void SelectionDAGBuilder::visitShift(const User &I, unsigned Opcode) {
   SDValue Op1 = getValue(I.getOperand(0));
   SDValue Op2 = getValue(I.getOperand(1));
-  
-  MVT ShiftTy = TLI.getShiftAmountTy();
-  
+
+  MVT ShiftTy = TLI.getShiftAmountTy(Op2.getValueType());
+
   // Coerce the shift amount to the right type if we can.
   if (!I.getType()->isVectorTy() && Op2.getValueType() != ShiftTy) {
     unsigned ShiftSize = ShiftTy.getSizeInBits();
     unsigned Op2Size = Op2.getValueType().getSizeInBits();
     DebugLoc DL = getCurDebugLoc();
-    
+
     // If the operand is smaller than the shift count type, promote it.
     if (ShiftSize > Op2Size)
       Op2 = DAG.getNode(ISD::ZERO_EXTEND, DL, ShiftTy, Op2);
-    
+
     // If the operand is larger than the shift count type but the shift
     // count type has enough bits to represent any shift value, truncate
     // it now. This is a common case and it exposes the truncate to
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
index a1a70c394a51..8f466d913bbb 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
@@ -348,7 +348,7 @@ public:
   SDValue getControlRoot();
 
   DebugLoc getCurDebugLoc() const { return CurDebugLoc; }
-  void setCurDebugLoc(DebugLoc dl){ CurDebugLoc = dl; }
+
   unsigned getSDNodeOrder() const { return SDNodeOrder; }
 
   void CopyValueToVirtualRegister(const Value *V, unsigned Reg);
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
index 62ebc81ef86e..68ba966d268a 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
@@ -49,6 +49,7 @@
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/Timer.h"
 #include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/PostOrderIterator.h"
 #include "llvm/ADT/Statistic.h"
 #include <algorithm>
 using namespace llvm;
@@ -479,16 +480,7 @@ void SelectionDAGISel::ComputeLiveOutVRegInfo() {
     unsigned NumSignBits = CurDAG->ComputeNumSignBits(Src);
     Mask = APInt::getAllOnesValue(SrcVT.getSizeInBits());
     CurDAG->ComputeMaskedBits(Src, Mask, KnownZero, KnownOne);
-
-    // Only install this information if it tells us something.
-    if (NumSignBits != 1 || KnownZero != 0 || KnownOne != 0) {
-      FuncInfo->LiveOutRegInfo.grow(DestReg);
-      FunctionLoweringInfo::LiveOutInfo &LOI =
-        FuncInfo->LiveOutRegInfo[DestReg];
-      LOI.NumSignBits = NumSignBits;
-      LOI.KnownOne = KnownOne;
-      LOI.KnownZero = KnownZero;
-    }
+    FuncInfo->AddLiveOutRegInfo(DestReg, NumSignBits, KnownZero, KnownOne);
   } while (!Worklist.empty());
 }
 
@@ -832,11 +824,39 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) {
     FastIS = TLI.createFastISel(*FuncInfo);
 
   // Iterate over all basic blocks in the function.
-  for (Function::const_iterator I = Fn.begin(), E = Fn.end(); I != E; ++I) {
-    const BasicBlock *LLVMBB = &*I;
+  ReversePostOrderTraversal<const Function*> RPOT(&Fn);
+  for (ReversePostOrderTraversal<const Function*>::rpo_iterator
+       I = RPOT.begin(), E = RPOT.end(); I != E; ++I) {
+    const BasicBlock *LLVMBB = *I;
 #ifndef NDEBUG
     CheckLineNumbers(LLVMBB);
 #endif
+
+    if (OptLevel != CodeGenOpt::None) {
+      bool AllPredsVisited = true;
+      for (const_pred_iterator PI = pred_begin(LLVMBB), PE = pred_end(LLVMBB);
+           PI != PE; ++PI) {
+        if (!FuncInfo->VisitedBBs.count(*PI)) {
+          AllPredsVisited = false;
+          break;
+        }
+      }
+
+      if (AllPredsVisited) {
+        for (BasicBlock::const_iterator I = LLVMBB->begin(), E = LLVMBB->end();
+             I != E && isa<PHINode>(I); ++I) {
+          FuncInfo->ComputePHILiveOutRegInfo(cast<PHINode>(I));
+        }
+      } else {
+        for (BasicBlock::const_iterator I = LLVMBB->begin(), E = LLVMBB->end();
+             I != E && isa<PHINode>(I); ++I) {
+          FuncInfo->InvalidatePHILiveOutRegInfo(cast<PHINode>(I));
+        }
+      }
+
+      FuncInfo->VisitedBBs.insert(LLVMBB);
+    }
+
     FuncInfo->MBB = FuncInfo->MBBMap[LLVMBB];
     FuncInfo->InsertPt = FuncInfo->MBB->getFirstNonPHI();
 
@@ -851,17 +871,8 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) {
       PrepareEHLandingPad();
 
     // Lower any arguments needed in this block if this is the entry block.
-    if (LLVMBB == &Fn.getEntryBlock()) {
-      for (BasicBlock::const_iterator DBI = LLVMBB->begin(), DBE = LLVMBB->end();
-           DBI != DBE; ++DBI) {
-        if (const DbgInfoIntrinsic *DI = dyn_cast<DbgInfoIntrinsic>(DBI)) {
-          const DebugLoc DL = DI->getDebugLoc();
-          SDB->setCurDebugLoc(DL);
-          break;
-        }
-      }
+    if (LLVMBB == &Fn.getEntryBlock())
       LowerArguments(LLVMBB);
-    }
 
     // Before doing SelectionDAG ISel, see if FastISel has been requested.
     if (FastIS) {
diff --git a/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index 691390e2a0e4..35b847ccabfb 100644
--- a/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -563,7 +563,7 @@ TargetLowering::TargetLowering(const TargetMachine &tm,
   setOperationAction(ISD::TRAP, MVT::Other, Expand);
 
   IsLittleEndian = TD->isLittleEndian();
-  ShiftAmountTy = PointerTy = MVT::getIntegerVT(8*TD->getPointerSize());
+  PointerTy = MVT::getIntegerVT(8*TD->getPointerSize());
   memset(RegClassForVT, 0,MVT::LAST_VALUETYPE*sizeof(TargetRegisterClass*));
   memset(TargetDAGCombineArray, 0, array_lengthof(TargetDAGCombineArray));
   maxStoresPerMemset = maxStoresPerMemcpy = maxStoresPerMemmove = 8;
@@ -596,6 +596,10 @@ TargetLowering::~TargetLowering() {
   delete &TLOF;
 }
 
+MVT TargetLowering::getShiftAmountTy(EVT LHSTy) const {
+  return MVT::getIntegerVT(8*TD->getPointerSize());
+}
+
 /// canOpTrap - Returns true if the operation can trap for the value type.
 /// VT must be a legal type.
 bool TargetLowering::canOpTrap(unsigned Op, EVT VT) const {
@@ -1401,7 +1405,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
                                    BitWidth - InnerVT.getSizeInBits()) &
                DemandedMask) == 0 &&
             isTypeDesirableForOp(ISD::SHL, InnerVT)) {
-          EVT ShTy = getShiftAmountTy();
+          EVT ShTy = getShiftAmountTy(InnerVT);
           if (!APInt(BitWidth, ShAmt).isIntN(ShTy.getSizeInBits()))
             ShTy = InnerVT;
           SDValue NarrowShl =
@@ -2188,7 +2192,7 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
       if (ConstantSDNode *AndRHS =
                   dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
         EVT ShiftTy = DCI.isBeforeLegalize() ?
-          getPointerTy() : getShiftAmountTy();
+          getPointerTy() : getShiftAmountTy(N0.getValueType());
         if (Cond == ISD::SETNE && C1 == 0) {// (X & 8) != 0  -->  (X & 8) >> 3
           // Perform the xform if the AND RHS is a single bit.
           if (AndRHS->getAPIntValue().isPowerOf2()) {
@@ -2359,7 +2363,7 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
           // (Z-X) == X  --> Z == X<<1
           SDValue SH = DAG.getNode(ISD::SHL, dl, N1.getValueType(),
                                      N1,
-                                     DAG.getConstant(1, getShiftAmountTy()));
+                       DAG.getConstant(1, getShiftAmountTy(N1.getValueType())));
           if (!DCI.isCalledByLegalizer())
             DCI.AddToWorklist(SH.getNode());
           return DAG.getSetCC(dl, VT, N0.getOperand(0), SH, Cond);
@@ -2381,7 +2385,7 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
           assert(N1.getOpcode() == ISD::SUB && "Unexpected operation!");
           // X == (Z-X)  --> X<<1 == Z
           SDValue SH = DAG.getNode(ISD::SHL, dl, N1.getValueType(), N0,
-                                     DAG.getConstant(1, getShiftAmountTy()));
+                       DAG.getConstant(1, getShiftAmountTy(N0.getValueType())));
           if (!DCI.isCalledByLegalizer())
             DCI.AddToWorklist(SH.getNode());
           return DAG.getSetCC(dl, VT, SH, N1.getOperand(0), Cond);
@@ -2493,7 +2497,7 @@ bool TargetLowering::isGAPlusOffset(SDNode *N, const GlobalValue *&GA,
       }
     }
   }
-  
+
   return false;
 }
 
@@ -3141,14 +3145,14 @@ SDValue TargetLowering::BuildSDIV(SDNode *N, SelectionDAG &DAG,
   // Shift right algebraic if shift value is nonzero
   if (magics.s > 0) {
     Q = DAG.getNode(ISD::SRA, dl, VT, Q,
-                    DAG.getConstant(magics.s, getShiftAmountTy()));
+                 DAG.getConstant(magics.s, getShiftAmountTy(Q.getValueType())));
     if (Created)
       Created->push_back(Q.getNode());
   }
   // Extract the sign bit and add it to the quotient
   SDValue T =
     DAG.getNode(ISD::SRL, dl, VT, Q, DAG.getConstant(VT.getSizeInBits()-1,
-                                                 getShiftAmountTy()));
+                                           getShiftAmountTy(Q.getValueType())));
   if (Created)
     Created->push_back(T.getNode());
   return DAG.getNode(ISD::ADD, dl, VT, Q, T);
@@ -3192,19 +3196,19 @@ SDValue TargetLowering::BuildUDIV(SDNode *N, SelectionDAG &DAG,
     assert(magics.s < N1C->getAPIntValue().getBitWidth() &&
            "We shouldn't generate an undefined shift!");
     return DAG.getNode(ISD::SRL, dl, VT, Q,
-                       DAG.getConstant(magics.s, getShiftAmountTy()));
+                 DAG.getConstant(magics.s, getShiftAmountTy(Q.getValueType())));
   } else {
     SDValue NPQ = DAG.getNode(ISD::SUB, dl, VT, N->getOperand(0), Q);
     if (Created)
       Created->push_back(NPQ.getNode());
     NPQ = DAG.getNode(ISD::SRL, dl, VT, NPQ,
-                      DAG.getConstant(1, getShiftAmountTy()));
+                      DAG.getConstant(1, getShiftAmountTy(NPQ.getValueType())));
     if (Created)
       Created->push_back(NPQ.getNode());
     NPQ = DAG.getNode(ISD::ADD, dl, VT, NPQ, Q);
     if (Created)
       Created->push_back(NPQ.getNode());
     return DAG.getNode(ISD::SRL, dl, VT, NPQ,
-                       DAG.getConstant(magics.s-1, getShiftAmountTy()));
+             DAG.getConstant(magics.s-1, getShiftAmountTy(NPQ.getValueType())));
   }
 }
diff --git a/lib/CodeGen/SplitKit.cpp b/lib/CodeGen/SplitKit.cpp
index 5663936bf3aa..fd5d50b7ecb8 100644
--- a/lib/CodeGen/SplitKit.cpp
+++ b/lib/CodeGen/SplitKit.cpp
@@ -167,6 +167,20 @@ void SplitAnalysis::calcLiveBlockInfo() {
   }
 }
 
+bool SplitAnalysis::isOriginalEndpoint(SlotIndex Idx) const {
+  unsigned OrigReg = VRM.getOriginal(CurLI->reg);
+  const LiveInterval &Orig = LIS.getInterval(OrigReg);
+  assert(!Orig.empty() && "Splitting empty interval?");
+  LiveInterval::const_iterator I = Orig.find(Idx);
+
+  // Range containing Idx should begin at Idx.
+  if (I != Orig.end() && I->start <= Idx)
+    return I->start == Idx;
+
+  // Range does not contain Idx, previous must end at Idx.
+  return I != Orig.begin() && (--I)->end == Idx;
+}
+
 void SplitAnalysis::print(const BlockPtrSet &B, raw_ostream &OS) const {
   for (BlockPtrSet::const_iterator I = B.begin(), E = B.end(); I != E; ++I) {
     unsigned count = UsingBlocks.lookup(*I);
@@ -947,10 +961,10 @@ void SplitEditor::splitSingleBlocks(const SplitAnalysis::BlockPtrSet &Blocks) {
 
     openIntv();
     SlotIndex SegStart = enterIntvBefore(BI.FirstUse);
-    if (BI.LastUse < BI.LastSplitPoint) {
+    if (!BI.LiveOut || BI.LastUse < BI.LastSplitPoint) {
       useIntv(SegStart, leaveIntvAfter(BI.LastUse));
     } else {
-      // THe last use os after tha last valid split point.
+      // The last use is after the last valid split point.
       SlotIndex SegStop = leaveIntvBefore(BI.LastSplitPoint);
       useIntv(SegStart, SegStop);
       overlapIntv(SegStop, BI.LastUse);
diff --git a/lib/CodeGen/SplitKit.h b/lib/CodeGen/SplitKit.h
index 5c34afd1c819..e02e6297035d 100644
--- a/lib/CodeGen/SplitKit.h
+++ b/lib/CodeGen/SplitKit.h
@@ -125,6 +125,13 @@ public:
     return UsingBlocks.lookup(MBB);
   }
 
+  /// isOriginalEndpoint - Return true if the original live range was killed or
+  /// (re-)defined at Idx. Idx should be the 'def' slot for a normal kill/def,
+  /// and 'use' for an early-clobber def.
+  /// This can be used to recognize code inserted by earlier live range
+  /// splitting.
+  bool isOriginalEndpoint(SlotIndex Idx) const;
+
   typedef SmallPtrSet<const MachineBasicBlock*, 16> BlockPtrSet;
 
   // Print a set of blocks with use counts.
diff --git a/lib/CodeGen/TargetLoweringObjectFileImpl.cpp b/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
index 0b7bd98cc692..fa311dc5d66c 100644
--- a/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
+++ b/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
@@ -178,6 +178,10 @@ const MCSection *TargetLoweringObjectFileELF::getEHFrameSection() const {
 
 static SectionKind
 getELFKindForNamedSection(StringRef Name, SectionKind K) {
+  // FIXME: Why is this here? Codegen is should not be in the business
+  // of figuring section flags. If the user wrote section(".eh_frame"),
+  // we should just pass that to MC which will defer to the assembly
+  // or use its default if producing an object file.
   if (Name.empty() || Name[0] != '.') return K;
 
   // Some lame default implementation based on some magic section names.
@@ -203,6 +207,9 @@ getELFKindForNamedSection(StringRef Name, SectionKind K) {
       Name.startswith(".llvm.linkonce.tb."))
     return SectionKind::getThreadBSS();
 
+  if (Name == ".eh_frame")
+    return SectionKind::getDataRel();
+
   return K;
 }
 
@@ -441,11 +448,15 @@ void TargetLoweringObjectFileMachO::Initialize(MCContext &Ctx,
 
   Triple T(((LLVMTargetMachine&)TM).getTargetTriple());
   if (T.getOS() == Triple::Darwin) {
-    unsigned MajNum = T.getDarwinMajorNumber();
-    if (MajNum == 7 || MajNum == 8) // 10.3 Panther, 10.4 Tiger
+    switch (T.getDarwinMajorNumber()) {
+    case 7:  // 10.3 Panther.
+    case 8:  // 10.4 Tiger.
       CommDirectiveSupportsAlignment = false;
-    if (MajNum > 9)                 // 10.6 SnowLeopard
-      IsFunctionEHSymbolGlobal = false;
+      break;
+    case 9:   // 10.5 Leopard.
+    case 10:  // 10.6 SnowLeopard.
+      break;
+    }
   }
 
   TargetLoweringObjectFile::Initialize(Ctx, TM);
@@ -630,7 +641,7 @@ getExplicitSectionGlobal(const GlobalValue *GV, SectionKind Kind,
                          Mangler *Mang, const TargetMachine &TM) const {
   // Parse the section specifier and create it if valid.
   StringRef Segment, Section;
-  unsigned TAA, StubSize;
+  unsigned TAA = (unsigned)MCSectionMachO::SECTION_ATTRIBUTES, StubSize = 0;
   std::string ErrorCode =
     MCSectionMachO::ParseSectionSpecifier(GV->getSection(), Segment, Section,
                                           TAA, StubSize);
@@ -643,10 +654,19 @@ getExplicitSectionGlobal(const GlobalValue *GV, SectionKind Kind,
     return DataSection;
   }
 
+  bool TAAWasSet = (TAA != MCSectionMachO::SECTION_ATTRIBUTES);
+  if (!TAAWasSet)
+    TAA = 0;      // Sensible default if this is a new section.
+    
   // Get the section.
   const MCSectionMachO *S =
     getContext().getMachOSection(Segment, Section, TAA, StubSize, Kind);
 
+  // If TAA wasn't set by ParseSectionSpecifier() above,
+  // use the value returned by getMachOSection() as a default.
+  if (!TAAWasSet)
+    TAA = S->getTypeAndAttributes();
+
   // Okay, now that we got the section, verify that the TAA & StubSize agree.
   // If the user declared multiple globals with different section flags, we need
   // to reject it here.
diff --git a/lib/CodeGen/VirtRegRewriter.cpp b/lib/CodeGen/VirtRegRewriter.cpp
index 458a2134bf4a..ec149dddc1d9 100644
--- a/lib/CodeGen/VirtRegRewriter.cpp
+++ b/lib/CodeGen/VirtRegRewriter.cpp
@@ -478,7 +478,8 @@ static void ResurrectConfirmedKill(unsigned Reg, const TargetRegisterInfo* TRI,
   if (!RegKills[KReg])
     return;
 
-  assert(KillOps[KReg] == KillOp && "invalid superreg kill flags");
+  assert(KillOps[KReg]->getParent() == KillOp->getParent() &&
+         "invalid superreg kill flags");
   KillOps[KReg] = NULL;
   RegKills.reset(KReg);
 
@@ -487,7 +488,8 @@ static void ResurrectConfirmedKill(unsigned Reg, const TargetRegisterInfo* TRI,
   for (const unsigned *SR = TRI->getSubRegisters(KReg); *SR; ++SR) {
     DEBUG(dbgs() << "  Resurrect subreg " << TRI->getName(*SR) << "\n");
 
-    assert(KillOps[*SR] == KillOp && "invalid subreg kill flags");
+    assert(KillOps[*SR]->getParent() == KillOp->getParent() &&
+           "invalid subreg kill flags");
     KillOps[*SR] = NULL;
     RegKills.reset(*SR);
   }
diff --git a/lib/MC/ELFObjectWriter.cpp b/lib/MC/ELFObjectWriter.cpp
index 8a00a16cfb4a..ea1629d30565 100644
--- a/lib/MC/ELFObjectWriter.cpp
+++ b/lib/MC/ELFObjectWriter.cpp
@@ -833,7 +833,11 @@ static bool isInSymtab(const MCAssembler &Asm, const MCSymbolData &Data,
     return true;
 
   const MCSymbol &A = Symbol.AliasedSymbol();
-  if (!A.isVariable() && A.isUndefined() && !Data.isCommon())
+  if (Symbol.isVariable() && !A.isVariable() && A.isUndefined())
+    return false;
+
+  bool IsGlobal = GetBinding(Data) == ELF::STB_GLOBAL;
+  if (!Symbol.isVariable() && Symbol.isUndefined() && !IsGlobal)
     return false;
 
   if (!Asm.isSymbolLinkerVisible(Symbol) && !Symbol.isUndefined())
@@ -1732,6 +1736,10 @@ unsigned X86ELFObjectWriter::GetRelocType(const MCValue &Target,
         assert(Modifier == MCSymbolRefExpr::VK_None);
         Type = ELF::R_X86_64_PC16;
         break;
+      case FK_PCRel_1:
+        assert(Modifier == MCSymbolRefExpr::VK_None);
+        Type = ELF::R_X86_64_PC8;
+        break;
       }
     } else {
       switch ((unsigned)Fixup.getKind()) {
diff --git a/lib/MC/MCAsmInfo.cpp b/lib/MC/MCAsmInfo.cpp
index cc1afbd08926..8199fb2e158a 100644
--- a/lib/MC/MCAsmInfo.cpp
+++ b/lib/MC/MCAsmInfo.cpp
@@ -65,6 +65,7 @@ MCAsmInfo::MCAsmInfo() {
   WeakDefDirective = 0;
   LinkOnceDirective = 0;
   HiddenVisibilityAttr = MCSA_Hidden;
+  HiddenDeclarationVisibilityAttr = MCSA_Hidden;
   ProtectedVisibilityAttr = MCSA_Protected;
   HasLEB128 = false;
   SupportsDebugInformation = false;
diff --git a/lib/MC/MCAsmInfoDarwin.cpp b/lib/MC/MCAsmInfoDarwin.cpp
index 13776f04437d..526ad0da42aa 100644
--- a/lib/MC/MCAsmInfoDarwin.cpp
+++ b/lib/MC/MCAsmInfoDarwin.cpp
@@ -45,6 +45,7 @@ MCAsmInfoDarwin::MCAsmInfoDarwin() {
   HasAggressiveSymbolFolding = false;
 
   HiddenVisibilityAttr = MCSA_PrivateExtern;
+  HiddenDeclarationVisibilityAttr = MCSA_Invalid;
   // Doesn't support protected visibility.
   ProtectedVisibilityAttr = MCSA_Global;
   
diff --git a/lib/MC/MCDisassembler/EDOperand.cpp b/lib/MC/MCDisassembler/EDOperand.cpp
index cfeb56fa3dfd..2b0c73e80593 100644
--- a/lib/MC/MCDisassembler/EDOperand.cpp
+++ b/lib/MC/MCDisassembler/EDOperand.cpp
@@ -152,10 +152,23 @@ int EDOperand::evaluate(uint64_t &result,
       uint64_t scaleAmount = Inst.Inst->getOperand(MCOpIndex+1).getImm();
       unsigned indexReg = Inst.Inst->getOperand(MCOpIndex+2).getReg();
       int64_t displacement = Inst.Inst->getOperand(MCOpIndex+3).getImm();
-      //unsigned segmentReg = Inst.Inst->getOperand(MCOpIndex+4).getReg();
-      
+    
       uint64_t addr = 0;
         
+      unsigned segmentReg = Inst.Inst->getOperand(MCOpIndex+4).getReg();
+        
+      if (segmentReg != 0 && Disassembler.Key.Arch == Triple::x86_64) {
+        unsigned fsID = Disassembler.registerIDWithName("FS");
+        unsigned gsID = Disassembler.registerIDWithName("GS");
+        
+        if (segmentReg == fsID ||
+            segmentReg == gsID) {
+          uint64_t segmentBase;
+          if (!callback(&segmentBase, segmentReg, arg))
+            addr += segmentBase;        
+        }
+      }
+        
       if (baseReg) {
         uint64_t baseVal;
         if (callback(&baseVal, baseReg, arg))
@@ -175,7 +188,7 @@ int EDOperand::evaluate(uint64_t &result,
       result = addr;
       return 0;
     }
-    }
+    } // switch (operandType)
     break;
   case Triple::arm:
   case Triple::thumb:
@@ -203,6 +216,7 @@ int EDOperand::evaluate(uint64_t &result,
       return 0;
     }
     }
+    break;
   }
   
   return -1;
diff --git a/lib/MC/MCDisassembler/EDToken.cpp b/lib/MC/MCDisassembler/EDToken.cpp
index 400e1649e970..de770b41ef35 100644
--- a/lib/MC/MCDisassembler/EDToken.cpp
+++ b/lib/MC/MCDisassembler/EDToken.cpp
@@ -194,6 +194,10 @@ int EDToken::tokenize(std::vector<EDToken*> &tokens,
     tokens.push_back(token);
   }
   
+  // Free any parsed operands.
+  for (unsigned i = 0, e = parsedOperands.size(); i != e; ++i)
+    delete parsedOperands[i];
+
   return 0;
 }
 
diff --git a/lib/MC/MCObjectStreamer.cpp b/lib/MC/MCObjectStreamer.cpp
index 035826690cdf..e67d9b03a95a 100644
--- a/lib/MC/MCObjectStreamer.cpp
+++ b/lib/MC/MCObjectStreamer.cpp
@@ -242,7 +242,23 @@ void MCObjectStreamer::EmitDwarfAdvanceFrameAddr(const MCSymbol *LastLabel,
 
 void MCObjectStreamer::EmitValueToOffset(const MCExpr *Offset,
                                         unsigned char Value) {
-  new MCOrgFragment(*Offset, Value, getCurrentSectionData());
+  int64_t Res;
+  if (Offset->EvaluateAsAbsolute(Res, getAssembler())) {
+    new MCOrgFragment(*Offset, Value, getCurrentSectionData());
+    return;
+  }
+
+  MCSymbol *CurrentPos = getContext().CreateTempSymbol();
+  EmitLabel(CurrentPos);
+  MCSymbolRefExpr::VariantKind Variant = MCSymbolRefExpr::VK_None;
+  const MCExpr *Ref =
+    MCSymbolRefExpr::Create(CurrentPos, Variant, getContext());
+  const MCExpr *Delta =
+    MCBinaryExpr::Create(MCBinaryExpr::Sub, Offset, Ref, getContext());
+
+  if (!Delta->EvaluateAsAbsolute(Res, getAssembler()))
+    report_fatal_error("expected assembly-time absolute expression");
+  EmitFill(Res, Value, 0);
 }
 
 void MCObjectStreamer::Finish() {
diff --git a/lib/MC/MCParser/AsmParser.cpp b/lib/MC/MCParser/AsmParser.cpp
index c6d0da609b3b..a84917ffb86a 100644
--- a/lib/MC/MCParser/AsmParser.cpp
+++ b/lib/MC/MCParser/AsmParser.cpp
@@ -603,6 +603,8 @@ bool AsmParser::ParsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) {
     Lex(); // Eat the '('.
     return ParseParenExpr(Res, EndLoc);
   case AsmToken::LBrac:
+    if (!PlatformParser->HasBracketExpressions())
+      return TokError("brackets expression not supported on this target");
     Lex(); // Eat the '['.
     return ParseBracketExpr(Res, EndLoc);
   case AsmToken::Minus:
diff --git a/lib/MC/MCParser/ELFAsmParser.cpp b/lib/MC/MCParser/ELFAsmParser.cpp
index bfaf36a451b3..dcf689a6f0e7 100644
--- a/lib/MC/MCParser/ELFAsmParser.cpp
+++ b/lib/MC/MCParser/ELFAsmParser.cpp
@@ -30,9 +30,12 @@ class ELFAsmParser : public MCAsmParserExtension {
 
   bool ParseSectionSwitch(StringRef Section, unsigned Type,
                           unsigned Flags, SectionKind Kind);
+  bool SeenIdent;
 
 public:
-  ELFAsmParser() {}
+  ELFAsmParser() : SeenIdent(false) {
+    BracketExpressionsSupported = true;
+  }
 
   virtual void Initialize(MCAsmParser &Parser) {
     // Call the base implementation.
@@ -456,13 +459,12 @@ bool ELFAsmParser::ParseDirectiveIdent(StringRef, SMLoc) {
                                SectionKind::getReadOnly(),
                                1, "");
 
-  static bool First = true;
-
   getStreamer().PushSection();
   getStreamer().SwitchSection(Comment);
-  if (First)
+  if (!SeenIdent) {
     getStreamer().EmitIntValue(0, 1);
-  First = false;
+    SeenIdent = true;
+  }
   getStreamer().EmitBytes(Data, 0);
   getStreamer().EmitIntValue(0, 1);
   getStreamer().PopSection();
diff --git a/lib/MC/MCParser/MCAsmParserExtension.cpp b/lib/MC/MCParser/MCAsmParserExtension.cpp
index c30d3067da59..3f25a14926b6 100644
--- a/lib/MC/MCParser/MCAsmParserExtension.cpp
+++ b/lib/MC/MCParser/MCAsmParserExtension.cpp
@@ -10,7 +10,8 @@
 #include "llvm/MC/MCParser/MCAsmParserExtension.h"
 using namespace llvm;
 
-MCAsmParserExtension::MCAsmParserExtension() {
+MCAsmParserExtension::MCAsmParserExtension() :
+  BracketExpressionsSupported(false) {
 }
 
 MCAsmParserExtension::~MCAsmParserExtension() {
diff --git a/lib/MC/MCSectionMachO.cpp b/lib/MC/MCSectionMachO.cpp
index b897c0bd6855..577e93aed6bc 100644
--- a/lib/MC/MCSectionMachO.cpp
+++ b/lib/MC/MCSectionMachO.cpp
@@ -101,16 +101,18 @@ void MCSectionMachO::PrintSwitchToSection(const MCAsmInfo &MAI,
     return;
   }
 
-  OS << ',';
-
   unsigned SectionType = TAA & MCSectionMachO::SECTION_TYPE;
   assert(SectionType <= MCSectionMachO::LAST_KNOWN_SECTION_TYPE &&
          "Invalid SectionType specified!");
 
-  if (SectionTypeDescriptors[SectionType].AssemblerName)
+  if (SectionTypeDescriptors[SectionType].AssemblerName) {
+    OS << ',';
     OS << SectionTypeDescriptors[SectionType].AssemblerName;
-  else
-    OS << "<<" << SectionTypeDescriptors[SectionType].EnumName << ">>";
+  } else {
+    // If we have no name for the attribute, stop here.
+    OS << '\n';
+    return;
+  }
 
   // If we don't have any attributes, we're done.
   unsigned SectionAttrs = TAA & MCSectionMachO::SECTION_ATTRIBUTES;
@@ -125,7 +127,9 @@ void MCSectionMachO::PrintSwitchToSection(const MCAsmInfo &MAI,
 
   // Check each attribute to see if we have it.
   char Separator = ',';
-  for (unsigned i = 0; SectionAttrDescriptors[i].AttrFlag; ++i) {
+  for (unsigned i = 0;
+       SectionAttrs != 0 && SectionAttrDescriptors[i].AttrFlag;
+       ++i) {
     // Check to see if we have this attribute.
     if ((SectionAttrDescriptors[i].AttrFlag & SectionAttrs) == 0)
       continue;
@@ -207,7 +211,6 @@ std::string MCSectionMachO::ParseSectionSpecifier(StringRef Spec,        // In.
            "between 1 and 16 characters";
 
   // If there is no comma after the section, we're done.
-  TAA = 0;
   StubSize = 0;
   if (Comma.second.empty())
     return "";
diff --git a/lib/MC/MCStreamer.cpp b/lib/MC/MCStreamer.cpp
index 3dcdba13135f..4b302c8602c9 100644
--- a/lib/MC/MCStreamer.cpp
+++ b/lib/MC/MCStreamer.cpp
@@ -20,8 +20,8 @@
 using namespace llvm;
 
 MCStreamer::MCStreamer(MCContext &Ctx) : Context(Ctx) {
-  PrevSectionStack.push_back(NULL);
-  CurSectionStack.push_back(NULL);
+  const MCSection *section = NULL;
+  SectionStack.push_back(std::make_pair(section, section));
 }
 
 MCStreamer::~MCStreamer() {
diff --git a/lib/Support/APInt.cpp b/lib/Support/APInt.cpp
index 77033428b577..08f36d2af3a1 100644
--- a/lib/Support/APInt.cpp
+++ b/lib/Support/APInt.cpp
@@ -1505,7 +1505,7 @@ APInt::ms APInt::magic() const {
       r2 = r2 - ad;
     }
     delta = ad - r2;
-  } while (q1.ule(delta) || (q1 == delta && r1 == 0));
+  } while (q1.ult(delta) || (q1 == delta && r1 == 0));
 
   mag.m = q2 + 1;
   if (d.isNegative()) mag.m = -mag.m;   // resulting magic number
diff --git a/lib/Target/ARM/ARMBaseInstrInfo.h b/lib/Target/ARM/ARMBaseInstrInfo.h
index 1fb88726d0de..7e2183d7cd5e 100644
--- a/lib/Target/ARM/ARMBaseInstrInfo.h
+++ b/lib/Target/ARM/ARMBaseInstrInfo.h
@@ -155,10 +155,11 @@ namespace ARMII {
     //===------------------------------------------------------------------===//
     // Code domain.
     DomainShift   = 18,
-    DomainMask    = 3 << DomainShift,
+    DomainMask    = 7 << DomainShift,
     DomainGeneral = 0 << DomainShift,
     DomainVFP     = 1 << DomainShift,
     DomainNEON    = 2 << DomainShift,
+    DomainNEONA8  = 4 << DomainShift,
 
     //===------------------------------------------------------------------===//
     // Field shifts - such shifts are used to set field while generating
diff --git a/lib/Target/ARM/ARMFastISel.cpp b/lib/Target/ARM/ARMFastISel.cpp
index 9f295302db0e..26f48b308316 100644
--- a/lib/Target/ARM/ARMFastISel.cpp
+++ b/lib/Target/ARM/ARMFastISel.cpp
@@ -172,6 +172,7 @@ class ARMFastISel : public FastISel {
     unsigned ARMMaterializeGV(const GlobalValue *GV, EVT VT);
     unsigned ARMMoveToFPReg(EVT VT, unsigned SrcReg);
     unsigned ARMMoveToIntReg(EVT VT, unsigned SrcReg);
+    unsigned ARMSelectCallOp(const GlobalValue *GV);
 
     // Call handling routines.
   private:
@@ -1633,6 +1634,25 @@ bool ARMFastISel::SelectRet(const Instruction *I) {
   return true;
 }
 
+unsigned ARMFastISel::ARMSelectCallOp(const GlobalValue *GV) {
+
+  // Depend our opcode for thumb on whether or not we're targeting an
+  // externally callable function. For libcalls we'll just pass a NULL GV
+  // in here.
+  bool isExternal = false;
+  if (!GV || GV->hasExternalLinkage()) isExternal = true;
+  
+  // Darwin needs the r9 versions of the opcodes.
+  bool isDarwin = Subtarget->isTargetDarwin();
+  if (isThumb && isExternal) {
+    return isDarwin ? ARM::tBLXi_r9 : ARM::tBLXi;
+  } else if (isThumb) {
+    return isDarwin ? ARM::tBLr9 : ARM::tBL;
+  } else  {
+    return isDarwin ? ARM::BLr9 : ARM::BL;
+  }
+}
+
 // A quick function that will emit a call for a named libcall in F with the
 // vector of passed arguments for the Instruction in I. We can assume that we
 // can emit a call for any libcall we can produce. This is an abridged version
@@ -1694,20 +1714,17 @@ bool ARMFastISel::ARMEmitLibcall(const Instruction *I, RTLIB::Libcall Call) {
   // Issue the call, BLXr9 for darwin, BLX otherwise. This uses V5 ops.
   // TODO: Turn this into the table of arm call ops.
   MachineInstrBuilder MIB;
-  unsigned CallOpc;
-  if(isThumb) {
-    CallOpc = Subtarget->isTargetDarwin() ? ARM::tBLXi_r9 : ARM::tBLXi;
+  unsigned CallOpc = ARMSelectCallOp(NULL);
+  if(isThumb)
     // Explicitly adding the predicate here.
     MIB = AddDefaultPred(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
                          TII.get(CallOpc)))
                          .addExternalSymbol(TLI.getLibcallName(Call));
-  } else {
-    CallOpc = Subtarget->isTargetDarwin() ? ARM::BLr9 : ARM::BL;
+  else
     // Explicitly adding the predicate here.
     MIB = AddDefaultPred(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
                          TII.get(CallOpc))
           .addExternalSymbol(TLI.getLibcallName(Call)));
-  }
 
   // Add implicit physical register uses to the call.
   for (unsigned i = 0, e = RegArgs.size(); i != e; ++i)
@@ -1813,21 +1830,18 @@ bool ARMFastISel::SelectCall(const Instruction *I) {
   // Issue the call, BLXr9 for darwin, BLX otherwise. This uses V5 ops.
   // TODO: Turn this into the table of arm call ops.
   MachineInstrBuilder MIB;
-  unsigned CallOpc;
+  unsigned CallOpc = ARMSelectCallOp(GV);
   // Explicitly adding the predicate here.
-  if(isThumb) {
-    CallOpc = Subtarget->isTargetDarwin() ? ARM::tBLXi_r9 : ARM::tBLXi;
+  if(isThumb)
     // Explicitly adding the predicate here.
     MIB = AddDefaultPred(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
                          TII.get(CallOpc)))
           .addGlobalAddress(GV, 0, 0);
-  } else {
-    CallOpc = Subtarget->isTargetDarwin() ? ARM::BLr9 : ARM::BL;
+  else
     // Explicitly adding the predicate here.
     MIB = AddDefaultPred(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
                          TII.get(CallOpc))
           .addGlobalAddress(GV, 0, 0));
-  }
   
   // Add implicit physical register uses to the call.
   for (unsigned i = 0, e = RegArgs.size(); i != e; ++i)
diff --git a/lib/Target/ARM/ARMFrameLowering.cpp b/lib/Target/ARM/ARMFrameLowering.cpp
index f42c6db84fd3..68c33f098ec9 100644
--- a/lib/Target/ARM/ARMFrameLowering.cpp
+++ b/lib/Target/ARM/ARMFrameLowering.cpp
@@ -215,7 +215,13 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const {
   AFI->setDPRCalleeSavedAreaOffset(DPRCSOffset);
 
   // Move past area 3.
-  if (DPRCSSize > 0) MBBI++;
+  if (DPRCSSize > 0) {
+    MBBI++;
+    // Since vpush register list cannot have gaps, there may be multiple vpush
+    // instructions in the prologue.
+    while (MBBI->getOpcode() == ARM::VSTMDDB_UPD)
+      MBBI++;
+  }
 
   NumBytes = DPRCSOffset;
   if (NumBytes) {
@@ -370,7 +376,13 @@ void ARMFrameLowering::emitEpilogue(MachineFunction &MF,
       emitSPUpdate(isARM, MBB, MBBI, dl, TII, NumBytes);
 
     // Increment past our save areas.
-    if (AFI->getDPRCalleeSavedAreaSize()) MBBI++;
+    if (AFI->getDPRCalleeSavedAreaSize()) {
+      MBBI++;
+      // Since vpop register list cannot have gaps, there may be multiple vpop
+      // instructions in the epilogue.
+      while (MBBI->getOpcode() == ARM::VLDMDIA_UPD)
+        MBBI++;
+    }
     if (AFI->getGPRCalleeSavedArea2Size()) MBBI++;
     if (AFI->getGPRCalleeSavedArea1Size()) MBBI++;
   }
diff --git a/lib/Target/ARM/ARMHazardRecognizer.cpp b/lib/Target/ARM/ARMHazardRecognizer.cpp
index 676b01e91c53..e97ce50bc429 100644
--- a/lib/Target/ARM/ARMHazardRecognizer.cpp
+++ b/lib/Target/ARM/ARMHazardRecognizer.cpp
@@ -21,17 +21,14 @@ static bool hasRAWHazard(MachineInstr *DefMI, MachineInstr *MI,
   // FIXME: Detect integer instructions properly.
   const TargetInstrDesc &TID = MI->getDesc();
   unsigned Domain = TID.TSFlags & ARMII::DomainMask;
-  if (Domain == ARMII::DomainVFP) {
-    unsigned Opcode = MI->getOpcode();
-    if (Opcode == ARM::VSTRS || Opcode == ARM::VSTRD ||
-        Opcode == ARM::VMOVRS || Opcode == ARM::VMOVRRD)
-      return false;
-  } else if (Domain == ARMII::DomainNEON) {
-    if (MI->getDesc().mayStore() || MI->getDesc().mayLoad())
-      return false;
-  } else
+  if (TID.mayStore())
     return false;
-  return MI->readsRegister(DefMI->getOperand(0).getReg(), &TRI);
+  unsigned Opcode = TID.getOpcode();
+  if (Opcode == ARM::VMOVRS || Opcode == ARM::VMOVRRD)
+    return false;
+  if ((Domain & ARMII::DomainVFP) || (Domain & ARMII::DomainNEON))
+    return MI->readsRegister(DefMI->getOperand(0).getReg(), &TRI);
+  return false;
 }
 
 ScheduleHazardRecognizer::HazardType
diff --git a/lib/Target/ARM/ARMISelDAGToDAG.cpp b/lib/Target/ARM/ARMISelDAGToDAG.cpp
index a506cffdba34..f0d5a7d7c2e7 100644
--- a/lib/Target/ARM/ARMISelDAGToDAG.cpp
+++ b/lib/Target/ARM/ARMISelDAGToDAG.cpp
@@ -126,6 +126,7 @@ public:
   bool SelectAddrMode5(SDValue N, SDValue &Base,
                        SDValue &Offset);
   bool SelectAddrMode6(SDNode *Parent, SDValue N, SDValue &Addr,SDValue &Align);
+  bool SelectAddrMode6Offset(SDNode *Op, SDValue N, SDValue &Offset);
 
   bool SelectAddrModePC(SDValue N, SDValue &Offset, SDValue &Label);
 
@@ -886,6 +887,20 @@ bool ARMDAGToDAGISel::SelectAddrMode6(SDNode *Parent, SDValue N, SDValue &Addr,
   return true;
 }
 
+bool ARMDAGToDAGISel::SelectAddrMode6Offset(SDNode *Op, SDValue N,
+                                            SDValue &Offset) {
+  LSBaseSDNode *LdSt = cast<LSBaseSDNode>(Op);
+  ISD::MemIndexedMode AM = LdSt->getAddressingMode();
+  if (AM != ISD::POST_INC)
+    return false;
+  Offset = N;
+  if (ConstantSDNode *NC = dyn_cast<ConstantSDNode>(N)) {
+    if (NC->getZExtValue() * 8 == LdSt->getMemoryVT().getSizeInBits())
+      Offset = CurDAG->getRegister(0, MVT::i32);
+  }
+  return true;
+}
+
 bool ARMDAGToDAGISel::SelectAddrModePC(SDValue N,
                                        SDValue &Offset, SDValue &Label) {
   if (N.getOpcode() == ARMISD::PIC_ADD && N.hasOneUse()) {
diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp
index 1835ec0f0054..ab9f9e1571e3 100644
--- a/lib/Target/ARM/ARMISelLowering.cpp
+++ b/lib/Target/ARM/ARMISelLowering.cpp
@@ -2236,7 +2236,7 @@ ARMTargetLowering::GetF64FormalArgument(CCValAssign &VA, CCValAssign &NextVA,
     RC = ARM::GPRRegisterClass;
 
   // Transform the arguments stored in physical registers into virtual ones.
-  unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC, dl);
+  unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC);
   SDValue ArgValue = DAG.getCopyFromReg(Root, dl, Reg, MVT::i32);
 
   SDValue ArgValue2;
@@ -2250,7 +2250,7 @@ ARMTargetLowering::GetF64FormalArgument(CCValAssign &VA, CCValAssign &NextVA,
                             MachinePointerInfo::getFixedStack(FI),
                             false, false, 0);
   } else {
-    Reg = MF.addLiveIn(NextVA.getLocReg(), RC, dl);
+    Reg = MF.addLiveIn(NextVA.getLocReg(), RC);
     ArgValue2 = DAG.getCopyFromReg(Root, dl, Reg, MVT::i32);
   }
 
@@ -2331,7 +2331,7 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain,
           llvm_unreachable("RegVT not supported by FORMAL_ARGUMENTS Lowering");
 
         // Transform the arguments in physical registers into virtual ones.
-        unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC, dl);
+        unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC);
         ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, RegVT);
       }
 
@@ -2408,7 +2408,7 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain,
         else
           RC = ARM::GPRRegisterClass;
 
-        unsigned VReg = MF.addLiveIn(GPRArgRegs[NumGPRs], RC, dl);
+        unsigned VReg = MF.addLiveIn(GPRArgRegs[NumGPRs], RC);
         SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i32);
         SDValue Store =
           DAG.getStore(Val.getValue(1), dl, Val, FIN,
@@ -2838,8 +2838,51 @@ SDValue ARMTargetLowering::LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const {
   DebugLoc dl = Op.getDebugLoc();
   EVT VT = Op.getValueType();
   EVT SrcVT = Tmp1.getValueType();
-  bool F2IisFast = Subtarget->isCortexA9() ||
-    Tmp0.getOpcode() == ISD::BITCAST || Tmp0.getOpcode() == ARMISD::VMOVDRR;
+  bool InGPR = Tmp0.getOpcode() == ISD::BITCAST ||
+    Tmp0.getOpcode() == ARMISD::VMOVDRR;
+  bool UseNEON = !InGPR && Subtarget->hasNEON();
+
+  if (UseNEON) {
+    // Use VBSL to copy the sign bit.
+    unsigned EncodedVal = ARM_AM::createNEONModImm(0x6, 0x80);
+    SDValue Mask = DAG.getNode(ARMISD::VMOVIMM, dl, MVT::v2i32,
+                               DAG.getTargetConstant(EncodedVal, MVT::i32));
+    EVT OpVT = (VT == MVT::f32) ? MVT::v2i32 : MVT::v1i64;
+    if (VT == MVT::f64)
+      Mask = DAG.getNode(ARMISD::VSHL, dl, OpVT,
+                         DAG.getNode(ISD::BITCAST, dl, OpVT, Mask),
+                         DAG.getConstant(32, MVT::i32));
+    else /*if (VT == MVT::f32)*/
+      Tmp0 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2f32, Tmp0);
+    if (SrcVT == MVT::f32) {
+      Tmp1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2f32, Tmp1);
+      if (VT == MVT::f64)
+        Tmp1 = DAG.getNode(ARMISD::VSHL, dl, OpVT,
+                           DAG.getNode(ISD::BITCAST, dl, OpVT, Tmp1),
+                           DAG.getConstant(32, MVT::i32));
+    }
+    Tmp0 = DAG.getNode(ISD::BITCAST, dl, OpVT, Tmp0);
+    Tmp1 = DAG.getNode(ISD::BITCAST, dl, OpVT, Tmp1);
+
+    SDValue AllOnes = DAG.getTargetConstant(ARM_AM::createNEONModImm(0xe, 0xff),
+                                            MVT::i32);
+    AllOnes = DAG.getNode(ARMISD::VMOVIMM, dl, MVT::v8i8, AllOnes);
+    SDValue MaskNot = DAG.getNode(ISD::XOR, dl, OpVT, Mask,
+                                  DAG.getNode(ISD::BITCAST, dl, OpVT, AllOnes));
+                                              
+    SDValue Res = DAG.getNode(ISD::OR, dl, OpVT,
+                              DAG.getNode(ISD::AND, dl, OpVT, Tmp1, Mask),
+                              DAG.getNode(ISD::AND, dl, OpVT, Tmp0, MaskNot));
+    if (SrcVT == MVT::f32) {
+      Res = DAG.getNode(ISD::BITCAST, dl, MVT::v2f32, Res);
+      Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f32, Res,
+                        DAG.getConstant(0, MVT::i32));
+    } else {
+      Res = DAG.getNode(ISD::BITCAST, dl, MVT::f64, Res);
+    }
+
+    return Res;
+  }
 
   // Bitcast operand 1 to i32.
   if (SrcVT == MVT::f64)
@@ -2847,37 +2890,24 @@ SDValue ARMTargetLowering::LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const {
                        &Tmp1, 1).getValue(1);
   Tmp1 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Tmp1);
 
-  // If float to int conversion isn't going to be super expensive, then simply
-  // or in the signbit.
-  if (F2IisFast) {
-    SDValue Mask1 = DAG.getConstant(0x80000000, MVT::i32);
-    SDValue Mask2 = DAG.getConstant(0x7fffffff, MVT::i32);
-    Tmp1 = DAG.getNode(ISD::AND, dl, MVT::i32, Tmp1, Mask1);
-    if (VT == MVT::f32) {
-      Tmp0 = DAG.getNode(ISD::AND, dl, MVT::i32,
-                         DAG.getNode(ISD::BITCAST, dl, MVT::i32, Tmp0), Mask2);
-      return DAG.getNode(ISD::BITCAST, dl, MVT::f32,
-                         DAG.getNode(ISD::OR, dl, MVT::i32, Tmp0, Tmp1));
-    }
-
-    // f64: Or the high part with signbit and then combine two parts.
-    Tmp0 = DAG.getNode(ARMISD::VMOVRRD, dl, DAG.getVTList(MVT::i32, MVT::i32),
-                       &Tmp0, 1);
-    SDValue Lo = Tmp0.getValue(0);
-    SDValue Hi = DAG.getNode(ISD::AND, dl, MVT::i32, Tmp0.getValue(1), Mask2);
-    Hi = DAG.getNode(ISD::OR, dl, MVT::i32, Hi, Tmp1);
-    return DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi);
+  // Or in the signbit with integer operations.
+  SDValue Mask1 = DAG.getConstant(0x80000000, MVT::i32);
+  SDValue Mask2 = DAG.getConstant(0x7fffffff, MVT::i32);
+  Tmp1 = DAG.getNode(ISD::AND, dl, MVT::i32, Tmp1, Mask1);
+  if (VT == MVT::f32) {
+    Tmp0 = DAG.getNode(ISD::AND, dl, MVT::i32,
+                       DAG.getNode(ISD::BITCAST, dl, MVT::i32, Tmp0), Mask2);
+    return DAG.getNode(ISD::BITCAST, dl, MVT::f32,
+                       DAG.getNode(ISD::OR, dl, MVT::i32, Tmp0, Tmp1));
   }
 
-  // Remove the signbit of operand 0.
-  Tmp0 = DAG.getNode(ISD::FABS, dl, VT, Tmp0);
-
-  // If operand 1 signbit is one, then negate operand 0.
-  SDValue ARMcc;
-  SDValue Cmp = getARMCmp(Tmp1, DAG.getConstant(0, MVT::i32),
-                          ISD::SETLT, ARMcc, DAG, dl);
-  SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
-  return DAG.getNode(ARMISD::CNEG, dl, VT, Tmp0, Tmp0, ARMcc, CCR, Cmp);
+  // f64: Or the high part with signbit and then combine two parts.
+  Tmp0 = DAG.getNode(ARMISD::VMOVRRD, dl, DAG.getVTList(MVT::i32, MVT::i32),
+                     &Tmp0, 1);
+  SDValue Lo = Tmp0.getValue(0);
+  SDValue Hi = DAG.getNode(ISD::AND, dl, MVT::i32, Tmp0.getValue(1), Mask2);
+  Hi = DAG.getNode(ISD::OR, dl, MVT::i32, Hi, Tmp1);
+  return DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi);
 }
 
 SDValue ARMTargetLowering::LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const{
@@ -2897,7 +2927,7 @@ SDValue ARMTargetLowering::LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const{
   }
 
   // Return LR, which contains the return address. Mark it an implicit live-in.
-  unsigned Reg = MF.addLiveIn(ARM::LR, getRegClassFor(MVT::i32), dl);
+  unsigned Reg = MF.addLiveIn(ARM::LR, getRegClassFor(MVT::i32));
   return DAG.getCopyFromReg(DAG.getEntryNode(), dl, Reg, VT);
 }
 
diff --git a/lib/Target/ARM/ARMInstrFormats.td b/lib/Target/ARM/ARMInstrFormats.td
index 765cba42d0bd..359ac45cee1d 100644
--- a/lib/Target/ARM/ARMInstrFormats.td
+++ b/lib/Target/ARM/ARMInstrFormats.td
@@ -127,13 +127,14 @@ def IndexModePost : IndexMode<2>;
 def IndexModeUpd  : IndexMode<3>;
 
 // Instruction execution domain.
-class Domain<bits<2> val> {
-  bits<2> Value = val;
+class Domain<bits<3> val> {
+  bits<3> Value = val;
 }
 def GenericDomain : Domain<0>;
 def VFPDomain     : Domain<1>; // Instructions in VFP domain only
 def NeonDomain    : Domain<2>; // Instructions in Neon domain only
 def VFPNeonDomain : Domain<3>; // Instructions in both VFP & Neon domains
+def VFPNeonA8Domain : Domain<5>; // Instructions in VFP & Neon under A8
 
 //===----------------------------------------------------------------------===//
 // ARM special operands.
@@ -249,7 +250,7 @@ class InstTemplate<AddrMode am, SizeFlagVal sz, IndexMode im,
   let TSFlags{15-10} = Form;
   let TSFlags{16}    = isUnaryDataProc;
   let TSFlags{17}    = canXformTo16Bit;
-  let TSFlags{19-18} = D.Value;
+  let TSFlags{20-18} = D.Value;
 
   let Constraints = cstr;
   let Itinerary = itin;
diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td
index c827ce3da97c..6e3fe2e039f5 100644
--- a/lib/Target/ARM/ARMInstrInfo.td
+++ b/lib/Target/ARM/ARMInstrInfo.td
@@ -561,7 +561,9 @@ def addrmode6 : Operand<i32>,
   let EncoderMethod = "getAddrMode6AddressOpValue";
 }
 
-def am6offset : Operand<i32> {
+def am6offset : Operand<i32>,
+                ComplexPattern<i32, 1, "SelectAddrMode6Offset",
+                               [], [SDNPWantRoot]> {
   let PrintMethod = "printAddrMode6OffsetOperand";
   let MIOperandInfo = (ops GPR);
   let EncoderMethod = "getAddrMode6OffsetOpValue";
diff --git a/lib/Target/ARM/ARMInstrNEON.td b/lib/Target/ARM/ARMInstrNEON.td
index 1e2e5504e662..dc3d63e26ef5 100644
--- a/lib/Target/ARM/ARMInstrNEON.td
+++ b/lib/Target/ARM/ARMInstrNEON.td
@@ -1402,31 +1402,42 @@ def : Pat<(store (extractelt (v2f32 DPR:$src), imm:$lane), addrmode6:$addr),
 def : Pat<(store (extractelt (v4f32 QPR:$src), imm:$lane), addrmode6:$addr),
           (VST1LNq32Pseudo addrmode6:$addr, QPR:$src, imm:$lane)>;
 
-let mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1 in {
-
 // ...with address register writeback:
-class VST1LNWB<bits<4> op11_8, bits<4> op7_4, string Dt>
+class VST1LNWB<bits<4> op11_8, bits<4> op7_4, string Dt, ValueType Ty,
+               PatFrag StoreOp, SDNode ExtractOp>
   : NLdStLn<1, 0b00, op11_8, op7_4, (outs GPR:$wb),
           (ins addrmode6:$Rn, am6offset:$Rm,
            DPR:$Vd, nohash_imm:$lane), IIC_VST1lnu, "vst1", Dt,
           "\\{$Vd[$lane]\\}, $Rn$Rm",
-          "$Rn.addr = $wb", []>;
+          "$Rn.addr = $wb",
+          [(set GPR:$wb, (StoreOp (ExtractOp (Ty DPR:$Vd), imm:$lane),
+                                  addrmode6:$Rn, am6offset:$Rm))]>;
+class VST1QLNWBPseudo<ValueType Ty, PatFrag StoreOp, SDNode ExtractOp>
+  : VSTQLNWBPseudo<IIC_VST1lnu> {
+  let Pattern = [(set GPR:$wb, (StoreOp (ExtractOp (Ty QPR:$src), imm:$lane),
+                                        addrmode6:$addr, am6offset:$offset))];
+}
 
-def VST1LNd8_UPD  : VST1LNWB<0b0000, {?,?,?,0}, "8"> {
+def VST1LNd8_UPD  : VST1LNWB<0b0000, {?,?,?,0}, "8", v8i8, post_truncsti8,
+                             NEONvgetlaneu> {
   let Inst{7-5} = lane{2-0};
 }
-def VST1LNd16_UPD : VST1LNWB<0b0100, {?,?,0,?}, "16"> {
+def VST1LNd16_UPD : VST1LNWB<0b0100, {?,?,0,?}, "16", v4i16, post_truncsti16,
+                             NEONvgetlaneu> {
   let Inst{7-6} = lane{1-0};
   let Inst{4}   = Rn{5};
 }
-def VST1LNd32_UPD : VST1LNWB<0b1000, {?,0,?,?}, "32"> {
+def VST1LNd32_UPD : VST1LNWB<0b1000, {?,0,?,?}, "32", v2i32, post_store,
+                             extractelt> {
   let Inst{7}   = lane{0};
   let Inst{5-4} = Rn{5-4};
 }
 
-def VST1LNq8Pseudo_UPD  : VSTQLNWBPseudo<IIC_VST1lnu>;
-def VST1LNq16Pseudo_UPD : VSTQLNWBPseudo<IIC_VST1lnu>;
-def VST1LNq32Pseudo_UPD : VSTQLNWBPseudo<IIC_VST1lnu>;
+def VST1LNq8Pseudo_UPD  : VST1QLNWBPseudo<v16i8, post_truncsti8, NEONvgetlaneu>;
+def VST1LNq16Pseudo_UPD : VST1QLNWBPseudo<v8i16, post_truncsti16,NEONvgetlaneu>;
+def VST1LNq32Pseudo_UPD : VST1QLNWBPseudo<v4i32, post_store, extractelt>;
+
+let mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1 in {
 
 //   VST2LN   : Vector Store (single 2-element structure from one lane)
 class VST2LN<bits<4> op11_8, bits<4> op7_4, string Dt>
diff --git a/lib/Target/ARM/ARMInstrVFP.td b/lib/Target/ARM/ARMInstrVFP.td
index 920c5c98002a..29902833f2bb 100644
--- a/lib/Target/ARM/ARMInstrVFP.td
+++ b/lib/Target/ARM/ARMInstrVFP.td
@@ -197,9 +197,9 @@ def VADDS  : ASbIn<0b11100, 0b11, 0, 0,
                    (outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm),
                    IIC_fpALU32, "vadd", ".f32\t$Sd, $Sn, $Sm",
                    [(set SPR:$Sd, (fadd SPR:$Sn, SPR:$Sm))]> {
-  // Some single precision VFP instructions may be executed on both NEON and VFP
-  // pipelines.
-  let D = VFPNeonDomain;
+  // Some single precision VFP instructions may be executed on both NEON and
+  // VFP pipelines on A8.
+  let D = VFPNeonA8Domain;
 }
 
 def VSUBD  : ADbI<0b11100, 0b11, 1, 0,
@@ -211,9 +211,9 @@ def VSUBS  : ASbIn<0b11100, 0b11, 1, 0,
                    (outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm),
                    IIC_fpALU32, "vsub", ".f32\t$Sd, $Sn, $Sm",
                    [(set SPR:$Sd, (fsub SPR:$Sn, SPR:$Sm))]> {
-  // Some single precision VFP instructions may be executed on both NEON and VFP
-  // pipelines.
-  let D = VFPNeonDomain;
+  // Some single precision VFP instructions may be executed on both NEON and
+  // VFP pipelines on A8.
+  let D = VFPNeonA8Domain;
 }
 
 def VDIVD  : ADbI<0b11101, 0b00, 0, 0,
@@ -235,9 +235,9 @@ def VMULS  : ASbIn<0b11100, 0b10, 0, 0,
                    (outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm),
                    IIC_fpMUL32, "vmul", ".f32\t$Sd, $Sn, $Sm",
                    [(set SPR:$Sd, (fmul SPR:$Sn, SPR:$Sm))]> {
-  // Some single precision VFP instructions may be executed on both NEON and VFP
-  // pipelines.
-  let D = VFPNeonDomain;
+  // Some single precision VFP instructions may be executed on both NEON and
+  // VFP pipelines on A8.
+  let D = VFPNeonA8Domain;
 }
 
 def VNMULD : ADbI<0b11100, 0b10, 1, 0,
@@ -249,9 +249,9 @@ def VNMULS : ASbI<0b11100, 0b10, 1, 0,
                   (outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm),
                   IIC_fpMUL32, "vnmul", ".f32\t$Sd, $Sn, $Sm",
                   [(set SPR:$Sd, (fneg (fmul SPR:$Sn, SPR:$Sm)))]> {
-  // Some single precision VFP instructions may be executed on both NEON and VFP
-  // pipelines.
-  let D = VFPNeonDomain;
+  // Some single precision VFP instructions may be executed on both NEON and
+  // VFP pipelines on A8.
+  let D = VFPNeonA8Domain;
 }
 
 // Match reassociated forms only if not sign dependent rounding.
@@ -271,9 +271,9 @@ def VCMPES : ASuI<0b11101, 0b11, 0b0100, 0b11, 0,
                   (outs), (ins SPR:$Sd, SPR:$Sm),
                   IIC_fpCMP32, "vcmpe", ".f32\t$Sd, $Sm",
                   [(arm_cmpfp SPR:$Sd, SPR:$Sm)]> {
-  // Some single precision VFP instructions may be executed on both NEON and VFP
-  // pipelines.
-  let D = VFPNeonDomain;
+  // Some single precision VFP instructions may be executed on both NEON and
+  // VFP pipelines on A8.
+  let D = VFPNeonA8Domain;
 }
 
 // FIXME: Verify encoding after integrated assembler is working.
@@ -286,9 +286,9 @@ def VCMPS  : ASuI<0b11101, 0b11, 0b0100, 0b01, 0,
                   (outs), (ins SPR:$Sd, SPR:$Sm),
                   IIC_fpCMP32, "vcmp", ".f32\t$Sd, $Sm",
                   [/* For disassembly only; pattern left blank */]> {
-  // Some single precision VFP instructions may be executed on both NEON and VFP
-  // pipelines.
-  let D = VFPNeonDomain;
+  // Some single precision VFP instructions may be executed on both NEON and
+  // VFP pipelines on A8.
+  let D = VFPNeonA8Domain;
 }
 } // Defs = [FPSCR]
 
@@ -305,9 +305,9 @@ def VABSS  : ASuIn<0b11101, 0b11, 0b0000, 0b11, 0,
                    (outs SPR:$Sd), (ins SPR:$Sm),
                    IIC_fpUNA32, "vabs", ".f32\t$Sd, $Sm",
                    [(set SPR:$Sd, (fabs SPR:$Sm))]> {
-  // Some single precision VFP instructions may be executed on both NEON and VFP
-  // pipelines.
-  let D = VFPNeonDomain;
+  // Some single precision VFP instructions may be executed on both NEON and
+  // VFP pipelines on A8.
+  let D = VFPNeonA8Domain;
 }
 
 let Defs = [FPSCR] in {
@@ -326,9 +326,9 @@ def VCMPEZS : ASuI<0b11101, 0b11, 0b0101, 0b11, 0,
   let Inst{3-0} = 0b0000;
   let Inst{5}   = 0;
 
-  // Some single precision VFP instructions may be executed on both NEON and VFP
-  // pipelines.
-  let D = VFPNeonDomain;
+  // Some single precision VFP instructions may be executed on both NEON and
+  // VFP pipelines on A8.
+  let D = VFPNeonA8Domain;
 }
 
 // FIXME: Verify encoding after integrated assembler is working.
@@ -347,9 +347,9 @@ def VCMPZS  : ASuI<0b11101, 0b11, 0b0101, 0b01, 0,
   let Inst{3-0} = 0b0000;
   let Inst{5}   = 0;
 
-  // Some single precision VFP instructions may be executed on both NEON and VFP
-  // pipelines.
-  let D = VFPNeonDomain;
+  // Some single precision VFP instructions may be executed on both NEON and
+  // VFP pipelines on A8.
+  let D = VFPNeonA8Domain;
 }
 } // Defs = [FPSCR]
 
@@ -423,9 +423,9 @@ def VNEGS  : ASuIn<0b11101, 0b11, 0b0001, 0b01, 0,
                    (outs SPR:$Sd), (ins SPR:$Sm),
                    IIC_fpUNA32, "vneg", ".f32\t$Sd, $Sm",
                    [(set SPR:$Sd, (fneg SPR:$Sm))]> {
-  // Some single precision VFP instructions may be executed on both NEON and VFP
-  // pipelines.
-  let D = VFPNeonDomain;
+  // Some single precision VFP instructions may be executed on both NEON and
+  // VFP pipelines on A8.
+  let D = VFPNeonA8Domain;
 }
 
 def VSQRTD : ADuI<0b11101, 0b11, 0b0001, 0b11, 0,
@@ -598,9 +598,9 @@ def VSITOS : AVConv1InSs_Encode<0b11101, 0b11, 0b1000, 0b1010,
                                 [(set SPR:$Sd, (arm_sitof SPR:$Sm))]> {
   let Inst{7} = 1; // s32
 
-  // Some single precision VFP instructions may be executed on both NEON and VFP
-  // pipelines.
-  let D = VFPNeonDomain;
+  // Some single precision VFP instructions may be executed on both NEON and
+  // VFP pipelines on A8.
+  let D = VFPNeonA8Domain;
 }
 
 def VUITOD : AVConv1IDs_Encode<0b11101, 0b11, 0b1000, 0b1011,
@@ -616,9 +616,9 @@ def VUITOS : AVConv1InSs_Encode<0b11101, 0b11, 0b1000, 0b1010,
                                 [(set SPR:$Sd, (arm_uitof SPR:$Sm))]> {
   let Inst{7} = 0; // u32
 
-  // Some single precision VFP instructions may be executed on both NEON and VFP
-  // pipelines.
-  let D = VFPNeonDomain;
+  // Some single precision VFP instructions may be executed on both NEON and
+  // VFP pipelines on A8.
+  let D = VFPNeonA8Domain;
 }
 
 // FP -> Int:
@@ -671,9 +671,9 @@ def VTOSIZS : AVConv1InsS_Encode<0b11101, 0b11, 0b1101, 0b1010,
                                  [(set SPR:$Sd, (arm_ftosi SPR:$Sm))]> {
   let Inst{7} = 1; // Z bit
 
-  // Some single precision VFP instructions may be executed on both NEON and VFP
-  // pipelines.
-  let D = VFPNeonDomain;
+  // Some single precision VFP instructions may be executed on both NEON and
+  // VFP pipelines on A8.
+  let D = VFPNeonA8Domain;
 }
 
 def VTOUIZD : AVConv1IsD_Encode<0b11101, 0b11, 0b1100, 0b1011,
@@ -689,9 +689,9 @@ def VTOUIZS : AVConv1InsS_Encode<0b11101, 0b11, 0b1100, 0b1010,
                                  [(set SPR:$Sd, (arm_ftoui SPR:$Sm))]> {
   let Inst{7} = 1; // Z bit
 
-  // Some single precision VFP instructions may be executed on both NEON and VFP
-  // pipelines.
-  let D = VFPNeonDomain;
+  // Some single precision VFP instructions may be executed on both NEON and
+  // VFP pipelines on A8.
+  let D = VFPNeonA8Domain;
 }
 
 // And the Z bit '0' variants, i.e. use the rounding mode specified by FPSCR.
@@ -743,36 +743,36 @@ def VTOSHS : AVConv1XI<0b11101, 0b11, 0b1110, 0b1010, 0,
                        (outs SPR:$dst), (ins SPR:$a, i32imm:$fbits),
                  IIC_fpCVTSI, "vcvt", ".s16.f32\t$dst, $a, $fbits",
                  [/* For disassembly only; pattern left blank */]> {
-  // Some single precision VFP instructions may be executed on both NEON and VFP
-  // pipelines.
-  let D = VFPNeonDomain;
+  // Some single precision VFP instructions may be executed on both NEON and
+  // VFP pipelines on A8.
+  let D = VFPNeonA8Domain;
 }
 
 def VTOUHS : AVConv1XI<0b11101, 0b11, 0b1111, 0b1010, 0,
                        (outs SPR:$dst), (ins SPR:$a, i32imm:$fbits),
                  IIC_fpCVTSI, "vcvt", ".u16.f32\t$dst, $a, $fbits",
                  [/* For disassembly only; pattern left blank */]> {
-  // Some single precision VFP instructions may be executed on both NEON and VFP
-  // pipelines.
-  let D = VFPNeonDomain;
+  // Some single precision VFP instructions may be executed on both NEON and
+  // VFP pipelines on A8.
+  let D = VFPNeonA8Domain;
 }
 
 def VTOSLS : AVConv1XI<0b11101, 0b11, 0b1110, 0b1010, 1,
                        (outs SPR:$dst), (ins SPR:$a, i32imm:$fbits),
                  IIC_fpCVTSI, "vcvt", ".s32.f32\t$dst, $a, $fbits",
                  [/* For disassembly only; pattern left blank */]> {
-  // Some single precision VFP instructions may be executed on both NEON and VFP
-  // pipelines.
-  let D = VFPNeonDomain;
+  // Some single precision VFP instructions may be executed on both NEON and
+  // VFP pipelines on A8.
+  let D = VFPNeonA8Domain;
 }
 
 def VTOULS : AVConv1XI<0b11101, 0b11, 0b1111, 0b1010, 1,
                        (outs SPR:$dst), (ins SPR:$a, i32imm:$fbits),
                  IIC_fpCVTSI, "vcvt", ".u32.f32\t$dst, $a, $fbits",
                  [/* For disassembly only; pattern left blank */]> {
-  // Some single precision VFP instructions may be executed on both NEON and VFP
-  // pipelines.
-  let D = VFPNeonDomain;
+  // Some single precision VFP instructions may be executed on both NEON and
+  // VFP pipelines on A8.
+  let D = VFPNeonA8Domain;
 }
 
 def VTOSHD : AVConv1XI<0b11101, 0b11, 0b1110, 0b1011, 0,
@@ -801,36 +801,36 @@ def VSHTOS : AVConv1XI<0b11101, 0b11, 0b1010, 0b1010, 0,
                        (outs SPR:$dst), (ins SPR:$a, i32imm:$fbits),
                  IIC_fpCVTIS, "vcvt", ".f32.s16\t$dst, $a, $fbits",
                  [/* For disassembly only; pattern left blank */]> {
-  // Some single precision VFP instructions may be executed on both NEON and VFP
-  // pipelines.
-  let D = VFPNeonDomain;
+  // Some single precision VFP instructions may be executed on both NEON and
+  // VFP pipelines on A8.
+  let D = VFPNeonA8Domain;
 }
 
 def VUHTOS : AVConv1XI<0b11101, 0b11, 0b1011, 0b1010, 0,
                        (outs SPR:$dst), (ins SPR:$a, i32imm:$fbits),
                  IIC_fpCVTIS, "vcvt", ".f32.u16\t$dst, $a, $fbits",
                  [/* For disassembly only; pattern left blank */]> {
-  // Some single precision VFP instructions may be executed on both NEON and VFP
-  // pipelines.
-  let D = VFPNeonDomain;
+  // Some single precision VFP instructions may be executed on both NEON and
+  // VFP pipelines on A8.
+  let D = VFPNeonA8Domain;
 }
 
 def VSLTOS : AVConv1XI<0b11101, 0b11, 0b1010, 0b1010, 1,
                        (outs SPR:$dst), (ins SPR:$a, i32imm:$fbits),
                  IIC_fpCVTIS, "vcvt", ".f32.s32\t$dst, $a, $fbits",
                  [/* For disassembly only; pattern left blank */]> {
-  // Some single precision VFP instructions may be executed on both NEON and VFP
-  // pipelines.
-  let D = VFPNeonDomain;
+  // Some single precision VFP instructions may be executed on both NEON and
+  // VFP pipelines on A8.
+  let D = VFPNeonA8Domain;
 }
 
 def VULTOS : AVConv1XI<0b11101, 0b11, 0b1011, 0b1010, 1,
                        (outs SPR:$dst), (ins SPR:$a, i32imm:$fbits),
                  IIC_fpCVTIS, "vcvt", ".f32.u32\t$dst, $a, $fbits",
                  [/* For disassembly only; pattern left blank */]> {
-  // Some single precision VFP instructions may be executed on both NEON and VFP
-  // pipelines.
-  let D = VFPNeonDomain;
+  // Some single precision VFP instructions may be executed on both NEON and
+  // VFP pipelines on A8.
+  let D = VFPNeonA8Domain;
 }
 
 def VSHTOD : AVConv1XI<0b11101, 0b11, 0b1010, 0b1011, 0,
@@ -874,9 +874,9 @@ def VMLAS : ASbIn<0b11100, 0b00, 0, 0,
                                            SPR:$Sdin))]>,
               RegConstraint<"$Sdin = $Sd">,
               Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx]> {
-  // Some single precision VFP instructions may be executed on both NEON and VFP
-  // pipelines.
-  let D = VFPNeonDomain;
+  // Some single precision VFP instructions may be executed on both NEON and
+  // VFP pipelines on A8.
+  let D = VFPNeonA8Domain;
 }
 
 def : Pat<(fadd_mlx DPR:$dstin, (fmul_su DPR:$a, (f64 DPR:$b))),
@@ -901,9 +901,9 @@ def VMLSS : ASbIn<0b11100, 0b00, 1, 0,
                                            SPR:$Sdin))]>,
               RegConstraint<"$Sdin = $Sd">,
               Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx]> {
-  // Some single precision VFP instructions may be executed on both NEON and VFP
-  // pipelines.
-  let D = VFPNeonDomain;
+  // Some single precision VFP instructions may be executed on both NEON and
+  // VFP pipelines on A8.
+  let D = VFPNeonA8Domain;
 }
 
 def : Pat<(fsub_mlx DPR:$dstin, (fmul_su DPR:$a, (f64 DPR:$b))),
@@ -928,9 +928,9 @@ def VNMLAS : ASbI<0b11100, 0b01, 1, 0,
                                            SPR:$Sdin))]>,
                 RegConstraint<"$Sdin = $Sd">,
                 Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx]> {
-  // Some single precision VFP instructions may be executed on both NEON and VFP
-  // pipelines.
-  let D = VFPNeonDomain;
+  // Some single precision VFP instructions may be executed on both NEON and
+  // VFP pipelines on A8.
+  let D = VFPNeonA8Domain;
 }
 
 def : Pat<(fsub_mlx (fneg (fmul_su DPR:$a, (f64 DPR:$b))), DPR:$dstin),
@@ -954,9 +954,9 @@ def VNMLSS : ASbI<0b11100, 0b01, 0, 0,
              [(set SPR:$Sd, (fsub_mlx (fmul_su SPR:$Sn, SPR:$Sm), SPR:$Sdin))]>,
                          RegConstraint<"$Sdin = $Sd">,
                   Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx]> {
-  // Some single precision VFP instructions may be executed on both NEON and VFP
-  // pipelines.
-  let D = VFPNeonDomain;
+  // Some single precision VFP instructions may be executed on both NEON and
+  // VFP pipelines on A8.
+  let D = VFPNeonA8Domain;
 }
 
 def : Pat<(fsub_mlx (fmul_su DPR:$a, (f64 DPR:$b)), DPR:$dstin),
@@ -995,9 +995,9 @@ def VNEGScc  : ASuI<0b11101, 0b11, 0b0001, 0b01, 0,
                     IIC_fpUNA32, "vneg", ".f32\t$Sd, $Sm",
                     [/*(set SPR:$Sd, (ARMcneg SPR:$Sn, SPR:$Sm, imm:$cc))*/]>,
                  RegConstraint<"$Sn = $Sd"> {
-  // Some single precision VFP instructions may be executed on both NEON and VFP
-  // pipelines.
-  let D = VFPNeonDomain;
+  // Some single precision VFP instructions may be executed on both NEON and
+  // VFP pipelines on A8.
+  let D = VFPNeonA8Domain;
 }
 } // neverHasSideEffects
 
diff --git a/lib/Target/ARM/ARMSubtarget.cpp b/lib/Target/ARM/ARMSubtarget.cpp
index 0bd740cfb28c..1465984899c6 100644
--- a/lib/Target/ARM/ARMSubtarget.cpp
+++ b/lib/Target/ARM/ARMSubtarget.cpp
@@ -171,7 +171,9 @@ ARMSubtarget::GVIsIndirectSymbol(const GlobalValue *GV,
 
   // Materializable GVs (in JIT lazy compilation mode) do not require an extra
   // load from stub.
-  bool isDecl = GV->isDeclaration() && !GV->isMaterializable();
+  bool isDecl = GV->hasAvailableExternallyLinkage();
+  if (GV->isDeclaration() && !GV->isMaterializable())
+    isDecl = true;
 
   if (!isTargetDarwin()) {
     // Extra load is needed for all externally visible.
diff --git a/lib/Target/ARM/MLxExpansionPass.cpp b/lib/Target/ARM/MLxExpansionPass.cpp
index f9e86eb36e04..9a27e2f47064 100644
--- a/lib/Target/ARM/MLxExpansionPass.cpp
+++ b/lib/Target/ARM/MLxExpansionPass.cpp
@@ -132,22 +132,16 @@ unsigned MLxExpansion::getDefReg(MachineInstr *MI) const {
 }
 
 bool MLxExpansion::hasRAWHazard(unsigned Reg, MachineInstr *MI) const {
-  const TargetInstrDesc &TID = MI->getDesc();
   // FIXME: Detect integer instructions properly.
+  const TargetInstrDesc &TID = MI->getDesc();
   unsigned Domain = TID.TSFlags & ARMII::DomainMask;
-  if (Domain == ARMII::DomainVFP) {
-    unsigned Opcode = TID.getOpcode();
-    if (Opcode == ARM::VSTRS || Opcode == ARM::VSTRD ||
-        Opcode == ARM::VMOVRS || Opcode == ARM::VMOVRRD)
-      return false;
-  } else if (Domain == ARMII::DomainNEON) {
-    if (TID.mayStore() || TID.mayLoad())
-      return false;
-  } else {
+  if (TID.mayStore())
     return false;
-  }
-
-  return MI->readsRegister(Reg, TRI);
+  unsigned Opcode = TID.getOpcode();
+  if (Opcode == ARM::VMOVRS || Opcode == ARM::VMOVRRD)
+    return false;
+  if ((Domain & ARMII::DomainVFP) || (Domain & ARMII::DomainNEON))
+    return MI->readsRegister(Reg, TRI);
   return false;
 }
 
diff --git a/lib/Target/ARM/NEONMoveFix.cpp b/lib/Target/ARM/NEONMoveFix.cpp
index 97e54bfaed9e..965665c2821a 100644
--- a/lib/Target/ARM/NEONMoveFix.cpp
+++ b/lib/Target/ARM/NEONMoveFix.cpp
@@ -35,6 +35,7 @@ namespace {
   private:
     const TargetRegisterInfo *TRI;
     const ARMBaseInstrInfo *TII;
+    bool isA8;
 
     typedef DenseMap<unsigned, const MachineInstr*> RegMap;
 
@@ -43,6 +44,11 @@ namespace {
   char NEONMoveFixPass::ID = 0;
 }
 
+static bool inNEONDomain(unsigned Domain, bool isA8) {
+  return (Domain & ARMII::DomainNEON) ||
+    (isA8 && (Domain & ARMII::DomainNEONA8));
+}
+
 bool NEONMoveFixPass::InsertMoves(MachineBasicBlock &MBB) {
   RegMap Defs;
   bool Modified = false;
@@ -70,7 +76,7 @@ bool NEONMoveFixPass::InsertMoves(MachineBasicBlock &MBB) {
           Domain = ARMII::DomainNEON;
       }
 
-      if (Domain & ARMII::DomainNEON) {
+      if (inNEONDomain(Domain, isA8)) {
         // Convert VMOVD to VMOVDneon
         unsigned DestReg = MI->getOperand(0).getReg();
 
@@ -123,6 +129,7 @@ bool NEONMoveFixPass::runOnMachineFunction(MachineFunction &Fn) {
 
   TRI = TM.getRegisterInfo();
   TII = static_cast<const ARMBaseInstrInfo*>(TM.getInstrInfo());
+  isA8 = TM.getSubtarget<ARMSubtarget>().isCortexA8();
 
   bool Modified = false;
   for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
diff --git a/lib/Target/ARM/Thumb2InstrInfo.cpp b/lib/Target/ARM/Thumb2InstrInfo.cpp
index 2f67257f8fa1..9b1073be3c8e 100644
--- a/lib/Target/ARM/Thumb2InstrInfo.cpp
+++ b/lib/Target/ARM/Thumb2InstrInfo.cpp
@@ -95,6 +95,12 @@ Thumb2InstrInfo::ReplaceTailWithBranchTo(MachineBasicBlock::iterator Tail,
 bool
 Thumb2InstrInfo::isLegalToSplitMBBAt(MachineBasicBlock &MBB,
                                      MachineBasicBlock::iterator MBBI) const {
+  while (MBBI->isDebugValue()) {
+    ++MBBI;
+    if (MBBI == MBB.end())
+      return false;
+  }
+
   unsigned PredReg = 0;
   return llvm::getITInstrPredicate(MBBI, PredReg) == ARMCC::AL;
 }
diff --git a/lib/Target/Alpha/AlphaISelLowering.cpp b/lib/Target/Alpha/AlphaISelLowering.cpp
index 9137d654edba..c4f43ab9e4e7 100644
--- a/lib/Target/Alpha/AlphaISelLowering.cpp
+++ b/lib/Target/Alpha/AlphaISelLowering.cpp
@@ -48,7 +48,6 @@ AlphaTargetLowering::AlphaTargetLowering(TargetMachine &TM)
   : TargetLowering(TM, new TargetLoweringObjectFileELF()) {
   // Set up the TargetLowering object.
   //I am having problems with shr n i8 1
-  setShiftAmountType(MVT::i64);
   setBooleanContents(ZeroOrOneBooleanContent);
 
   addRegisterClass(MVT::i64, Alpha::GPRCRegisterClass);
diff --git a/lib/Target/Alpha/AlphaISelLowering.h b/lib/Target/Alpha/AlphaISelLowering.h
index b429e9fc1390..cb98f921dd68 100644
--- a/lib/Target/Alpha/AlphaISelLowering.h
+++ b/lib/Target/Alpha/AlphaISelLowering.h
@@ -31,25 +31,25 @@ namespace llvm {
 
       /// GPRelHi/GPRelLo - These represent the high and low 16-bit
       /// parts of a global address respectively.
-      GPRelHi, GPRelLo, 
+      GPRelHi, GPRelLo,
 
       /// RetLit - Literal Relocation of a Global
       RelLit,
 
       /// GlobalRetAddr - used to restore the return address
       GlobalRetAddr,
-      
+
       /// CALL - Normal call.
       CALL,
 
       /// DIVCALL - used for special library calls for div and rem
       DivCall,
-      
+
       /// return flag operand
       RET_FLAG,
 
       /// CHAIN = COND_BRANCH CHAIN, OPC, (G|F)PRC, DESTBB [, INFLAG] - This
-      /// corresponds to the COND_BRANCH pseudo instruction.  
+      /// corresponds to the COND_BRANCH pseudo instruction.
       /// *PRC is the input register to compare to zero,
       /// OPC is the branch opcode to use (e.g. Alpha::BEQ),
       /// DESTBB is the destination block to branch to, and INFLAG is
@@ -62,7 +62,9 @@ namespace llvm {
   class AlphaTargetLowering : public TargetLowering {
   public:
     explicit AlphaTargetLowering(TargetMachine &TM);
-    
+
+    virtual MVT getShiftAmountTy(EVT LHSTy) const { return MVT::i64; }
+
     /// getSetCCResultType - Get the SETCC result ValueType
     virtual MVT::SimpleValueType getSetCCResultType(EVT VT) const;
 
@@ -92,7 +94,7 @@ namespace llvm {
     ConstraintWeight getSingleConstraintMatchWeight(
       AsmOperandInfo &info, const char *constraint) const;
 
-    std::vector<unsigned> 
+    std::vector<unsigned>
       getRegClassForInlineAsmConstraint(const std::string &Constraint,
                                         EVT VT) const;
 
diff --git a/lib/Target/Blackfin/BlackfinISelLowering.cpp b/lib/Target/Blackfin/BlackfinISelLowering.cpp
index dd27d0a0ff36..7c80eec3ba63 100644
--- a/lib/Target/Blackfin/BlackfinISelLowering.cpp
+++ b/lib/Target/Blackfin/BlackfinISelLowering.cpp
@@ -41,7 +41,6 @@ using namespace llvm;
 
 BlackfinTargetLowering::BlackfinTargetLowering(TargetMachine &TM)
   : TargetLowering(TM, new TargetLoweringObjectFileELF()) {
-  setShiftAmountType(MVT::i16);
   setBooleanContents(ZeroOrOneBooleanContent);
   setStackPointerRegisterToSaveRestore(BF::SP);
   setIntDivIsCheap(false);
diff --git a/lib/Target/Blackfin/BlackfinISelLowering.h b/lib/Target/Blackfin/BlackfinISelLowering.h
index 15a745fa8724..102c830688e2 100644
--- a/lib/Target/Blackfin/BlackfinISelLowering.h
+++ b/lib/Target/Blackfin/BlackfinISelLowering.h
@@ -32,6 +32,7 @@ namespace llvm {
   class BlackfinTargetLowering : public TargetLowering {
   public:
     BlackfinTargetLowering(TargetMachine &TM);
+    virtual MVT getShiftAmountTy(EVT LHSTy) const { return MVT::i16; }
     virtual MVT::SimpleValueType getSetCCResultType(EVT VT) const;
     virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const;
     virtual void ReplaceNodeResults(SDNode *N,
diff --git a/lib/Target/CellSPU/SPUISelLowering.cpp b/lib/Target/CellSPU/SPUISelLowering.cpp
index e6511d008c2b..743a4d7a0f78 100644
--- a/lib/Target/CellSPU/SPUISelLowering.cpp
+++ b/lib/Target/CellSPU/SPUISelLowering.cpp
@@ -435,7 +435,6 @@ SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM)
 
   setOperationAction(ISD::FDIV, MVT::v4f32, Legal);
 
-  setShiftAmountType(MVT::i32);
   setBooleanContents(ZeroOrNegativeOneBooleanContent);
 
   setStackPointerRegisterToSaveRestore(SPU::R1);
@@ -1219,7 +1218,7 @@ SPUTargetLowering::LowerFormalArguments(SDValue Chain,
       FuncInfo->setVarArgsFrameIndex(
         MFI->CreateFixedObject(StackSlotSize, ArgOffset, true));
       SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
-      unsigned VReg = MF.addLiveIn(ArgRegs[ArgRegIdx], &SPU::R32CRegClass, dl);
+      unsigned VReg = MF.addLiveIn(ArgRegs[ArgRegIdx], &SPU::R32CRegClass);
       SDValue ArgVal = DAG.getRegister(VReg, MVT::v16i8);
       SDValue Store = DAG.getStore(Chain, dl, ArgVal, FIN, MachinePointerInfo(),
                                    false, false, 0);
@@ -2190,7 +2189,7 @@ static SDValue LowerI8Math(SDValue Op, SelectionDAG &DAG, unsigned Opc,
 {
   SDValue N0 = Op.getOperand(0);      // Everything has at least one operand
   DebugLoc dl = Op.getDebugLoc();
-  EVT ShiftVT = TLI.getShiftAmountTy();
+  EVT ShiftVT = TLI.getShiftAmountTy(N0.getValueType());
 
   assert(Op.getValueType() == MVT::i8);
   switch (Opc) {
@@ -3112,7 +3111,7 @@ SPUTargetLowering::getSingleConstraintMatchWeight(
   switch (*constraint) {
   default:
     weight = TargetLowering::getSingleConstraintMatchWeight(info, constraint);
-    break;
+    break;
     //FIXME: Seems like the supported constraint letters were just copied
     // from PPC, as the following doesn't correspond to the GCC docs.
     // I'm leaving it so until someone adds the corresponding lowering support.
diff --git a/lib/Target/CellSPU/SPUISelLowering.h b/lib/Target/CellSPU/SPUISelLowering.h
index 95d44afe37c8..dd48d7bafaef 100644
--- a/lib/Target/CellSPU/SPUISelLowering.h
+++ b/lib/Target/CellSPU/SPUISelLowering.h
@@ -109,6 +109,8 @@ namespace llvm {
     /// getSetCCResultType - Return the ValueType for ISD::SETCC
     virtual MVT::SimpleValueType getSetCCResultType(EVT VT) const;
 
+    virtual MVT getShiftAmountTy(EVT LHSTy) const { return MVT::i32; }
+
     //! Custom lowering hooks
     virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const;
 
@@ -179,9 +181,9 @@ namespace llvm {
 
     virtual bool isLegalICmpImmediate(int64_t Imm) const;
 
-    virtual bool isLegalAddressingMode(const AddrMode &AM, 
+    virtual bool isLegalAddressingMode(const AddrMode &AM,
                                        const Type *Ty) const;
- 
+
     /// After allocating this many registers, the allocator should feel
     /// register pressure. The value is a somewhat random guess, based on the
     /// number of non callee saved registers in the C calling convention.
diff --git a/lib/Target/MBlaze/MBlazeISelLowering.cpp b/lib/Target/MBlaze/MBlazeISelLowering.cpp
index 2f40bfc89601..f39826b1cf17 100644
--- a/lib/Target/MBlaze/MBlazeISelLowering.cpp
+++ b/lib/Target/MBlaze/MBlazeISelLowering.cpp
@@ -907,7 +907,7 @@ LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
 
       // Transform the arguments stored on
       // physical registers into virtual ones
-      unsigned Reg = MF.addLiveIn(ArgRegEnd, RC, dl);
+      unsigned Reg = MF.addLiveIn(ArgRegEnd, RC);
       SDValue ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, RegVT);
 
       // If this is an 8 or 16-bit value, it has been passed promoted
@@ -973,7 +973,7 @@ LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
 
     for (; Start <= End; ++Start, ++StackLoc) {
       unsigned Reg = MBlazeRegisterInfo::getRegisterFromNumbering(Start);
-      unsigned LiveReg = MF.addLiveIn(Reg, RC, dl);
+      unsigned LiveReg = MF.addLiveIn(Reg, RC);
       SDValue ArgValue = DAG.getCopyFromReg(Chain, dl, LiveReg, MVT::i32);
 
       int FI = MFI->CreateFixedObject(4, 0, true);
diff --git a/lib/Target/MSP430/MSP430ISelLowering.cpp b/lib/Target/MSP430/MSP430ISelLowering.cpp
index 30ef4f5da08e..a95d59c0576c 100644
--- a/lib/Target/MSP430/MSP430ISelLowering.cpp
+++ b/lib/Target/MSP430/MSP430ISelLowering.cpp
@@ -77,10 +77,6 @@ MSP430TargetLowering::MSP430TargetLowering(MSP430TargetMachine &tm) :
   // Division is expensive
   setIntDivIsCheap(false);
 
-  // Even if we have only 1 bit shift here, we can perform
-  // shifts of the whole bitwidth 1 bit per step.
-  setShiftAmountType(MVT::i8);
-
   setStackPointerRegisterToSaveRestore(MSP430::SPW);
   setBooleanContents(ZeroOrOneBooleanContent);
   setSchedulingPreference(Sched::Latency);
@@ -330,7 +326,7 @@ MSP430TargetLowering::LowerCCCArguments(SDValue Chain,
       // Arguments passed in registers
       EVT RegVT = VA.getLocVT();
       switch (RegVT.getSimpleVT().SimpleTy) {
-      default: 
+      default:
         {
 #ifndef NDEBUG
           errs() << "LowerFormalArguments Unhandled argument type: "
diff --git a/lib/Target/MSP430/MSP430ISelLowering.h b/lib/Target/MSP430/MSP430ISelLowering.h
index 673c5433b96e..19c9eac589f0 100644
--- a/lib/Target/MSP430/MSP430ISelLowering.h
+++ b/lib/Target/MSP430/MSP430ISelLowering.h
@@ -73,6 +73,8 @@ namespace llvm {
   public:
     explicit MSP430TargetLowering(MSP430TargetMachine &TM);
 
+    virtual MVT getShiftAmountTy(EVT LHSTy) const { return MVT::i8; }
+
     /// LowerOperation - Provide custom lowering hooks for some operations.
     virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const;
 
diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp
index 8f623b859b55..70d00e4b5cc5 100644
--- a/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -362,7 +362,6 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
     setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Custom);
   }
 
-  setShiftAmountType(MVT::i32);
   setBooleanContents(ZeroOrOneBooleanContent);
 
   if (TM.getSubtarget<PPCSubtarget>().isPPC64()) {
@@ -1597,7 +1596,7 @@ PPCTargetLowering::LowerFormalArguments_SVR4(
       }
 
       // Transform the arguments stored in physical registers into virtual ones.
-      unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC, dl);
+      unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC);
       SDValue ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, ValVT);
 
       InVals.push_back(ArgValue);
@@ -1689,7 +1688,7 @@ PPCTargetLowering::LowerFormalArguments_SVR4(
       // Get an existing live-in vreg, or add a new one.
       unsigned VReg = MF.getRegInfo().getLiveInVirtReg(GPArgRegs[GPRIndex]);
       if (!VReg)
-        VReg = MF.addLiveIn(GPArgRegs[GPRIndex], &PPC::GPRCRegClass, dl);
+        VReg = MF.addLiveIn(GPArgRegs[GPRIndex], &PPC::GPRCRegClass);
 
       SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
       SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN,
@@ -1708,7 +1707,7 @@ PPCTargetLowering::LowerFormalArguments_SVR4(
       // Get an existing live-in vreg, or add a new one.
       unsigned VReg = MF.getRegInfo().getLiveInVirtReg(FPArgRegs[FPRIndex]);
       if (!VReg)
-        VReg = MF.addLiveIn(FPArgRegs[FPRIndex], &PPC::F8RCRegClass, dl);
+        VReg = MF.addLiveIn(FPArgRegs[FPRIndex], &PPC::F8RCRegClass);
 
       SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::f64);
       SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN,
@@ -1872,7 +1871,7 @@ PPCTargetLowering::LowerFormalArguments_Darwin(
       InVals.push_back(FIN);
       if (ObjSize==1 || ObjSize==2) {
         if (GPR_idx != Num_GPR_Regs) {
-          unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass, dl);
+          unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass);
           SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
           SDValue Store = DAG.getTruncStore(Val.getValue(1), dl, Val, FIN,
                                             MachinePointerInfo(),
@@ -1891,7 +1890,7 @@ PPCTargetLowering::LowerFormalArguments_Darwin(
         // to memory.  ArgVal will be address of the beginning of
         // the object.
         if (GPR_idx != Num_GPR_Regs) {
-          unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass, dl);
+          unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass);
           int FI = MFI->CreateFixedObject(PtrByteSize, ArgOffset, true);
           SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
           SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
@@ -1914,7 +1913,7 @@ PPCTargetLowering::LowerFormalArguments_Darwin(
     case MVT::i32:
       if (!isPPC64) {
         if (GPR_idx != Num_GPR_Regs) {
-          unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass, dl);
+          unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass);
           ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i32);
           ++GPR_idx;
         } else {
@@ -1928,7 +1927,7 @@ PPCTargetLowering::LowerFormalArguments_Darwin(
       // FALLTHROUGH
     case MVT::i64:  // PPC64
       if (GPR_idx != Num_GPR_Regs) {
-        unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass, dl);
+        unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
         ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
 
         if (ObjectVT == MVT::i32) {
@@ -1966,9 +1965,9 @@ PPCTargetLowering::LowerFormalArguments_Darwin(
         unsigned VReg;
 
         if (ObjectVT == MVT::f32)
-          VReg = MF.addLiveIn(FPR[FPR_idx], &PPC::F4RCRegClass, dl);
+          VReg = MF.addLiveIn(FPR[FPR_idx], &PPC::F4RCRegClass);
         else
-          VReg = MF.addLiveIn(FPR[FPR_idx], &PPC::F8RCRegClass, dl);
+          VReg = MF.addLiveIn(FPR[FPR_idx], &PPC::F8RCRegClass);
 
         ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
         ++FPR_idx;
@@ -1986,7 +1985,7 @@ PPCTargetLowering::LowerFormalArguments_Darwin(
       // Note that vector arguments in registers don't reserve stack space,
       // except in varargs functions.
       if (VR_idx != Num_VR_Regs) {
-        unsigned VReg = MF.addLiveIn(VR[VR_idx], &PPC::VRRCRegClass, dl);
+        unsigned VReg = MF.addLiveIn(VR[VR_idx], &PPC::VRRCRegClass);
         ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
         if (isVarArg) {
           while ((ArgOffset % 16) != 0) {
@@ -2064,9 +2063,9 @@ PPCTargetLowering::LowerFormalArguments_Darwin(
       unsigned VReg;
 
       if (isPPC64)
-        VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass, dl);
+        VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
       else
-        VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass, dl);
+        VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass);
 
       SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
       SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN,
diff --git a/lib/Target/PowerPC/PPCISelLowering.h b/lib/Target/PowerPC/PPCISelLowering.h
index 80cab75b960a..33daae9b5445 100644
--- a/lib/Target/PowerPC/PPCISelLowering.h
+++ b/lib/Target/PowerPC/PPCISelLowering.h
@@ -29,36 +29,36 @@ namespace llvm {
       /// FSEL - Traditional three-operand fsel node.
       ///
       FSEL,
-      
+
       /// FCFID - The FCFID instruction, taking an f64 operand and producing
       /// and f64 value containing the FP representation of the integer that
       /// was temporarily in the f64 operand.
       FCFID,
-      
-      /// FCTI[D,W]Z - The FCTIDZ and FCTIWZ instructions, taking an f32 or f64 
+
+      /// FCTI[D,W]Z - The FCTIDZ and FCTIWZ instructions, taking an f32 or f64
       /// operand, producing an f64 value containing the integer representation
       /// of that FP value.
       FCTIDZ, FCTIWZ,
-      
+
       /// STFIWX - The STFIWX instruction.  The first operand is an input token
       /// chain, then an f64 value to store, then an address to store it to.
       STFIWX,
-      
+
       // VMADDFP, VNMSUBFP - The VMADDFP and VNMSUBFP instructions, taking
       // three v4f32 operands and producing a v4f32 result.
       VMADDFP, VNMSUBFP,
-      
+
       /// VPERM - The PPC VPERM Instruction.
       ///
       VPERM,
-      
+
       /// Hi/Lo - These represent the high and low 16-bit parts of a global
       /// address respectively.  These nodes have two operands, the first of
       /// which must be a TargetGlobalAddress, and the second of which must be a
       /// Constant.  Selected naively, these turn into 'lis G+C' and 'li G+C',
       /// though these are usually folded into other nodes.
       Hi, Lo,
-      
+
       TOC_ENTRY,
 
       /// The following three target-specific nodes are used for calls through
@@ -80,37 +80,37 @@ namespace llvm {
       /// This instruction is lowered in PPCRegisterInfo::eliminateFrameIndex to
       /// compute an allocation on the stack.
       DYNALLOC,
-      
+
       /// GlobalBaseReg - On Darwin, this node represents the result of the mflr
       /// at function entry, used for PIC code.
       GlobalBaseReg,
-      
+
       /// These nodes represent the 32-bit PPC shifts that operate on 6-bit
       /// shift amounts.  These nodes are generated by the multi-precision shift
       /// code.
       SRL, SRA, SHL,
-      
+
       /// EXTSW_32 - This is the EXTSW instruction for use with "32-bit"
       /// registers.
       EXTSW_32,
 
       /// CALL - A direct function call.
       CALL_Darwin, CALL_SVR4,
-      
+
       /// NOP - Special NOP which follows 64-bit SVR4 calls.
       NOP,
 
       /// CHAIN,FLAG = MTCTR(VAL, CHAIN[, INFLAG]) - Directly corresponds to a
       /// MTCTR instruction.
       MTCTR,
-      
+
       /// CHAIN,FLAG = BCTRL(CHAIN, INFLAG) - Directly corresponds to a
       /// BCTRL instruction.
       BCTRL_Darwin, BCTRL_SVR4,
-      
+
       /// Return with a flag operand, matched by 'blr'
       RET_FLAG,
-      
+
       /// R32 = MFCR(CRREG, INFLAG) - Represents the MFCRpseud/MFOCRF
       /// instructions.  This copies the bits corresponding to the specified
       /// CRREG into the resultant GPR.  Bits corresponding to other CR regs
@@ -122,20 +122,20 @@ namespace llvm {
       /// encoding for the OPC field to identify the compare.  For example, 838
       /// is VCMPGTSH.
       VCMP,
-      
+
       /// RESVEC, OUTFLAG = VCMPo(LHS, RHS, OPC) - Represents one of the
-      /// altivec VCMP*o instructions.  For lack of better number, we use the 
+      /// altivec VCMP*o instructions.  For lack of better number, we use the
       /// opcode number encoding for the OPC field to identify the compare.  For
       /// example, 838 is VCMPGTSH.
       VCMPo,
-      
+
       /// CHAIN = COND_BRANCH CHAIN, CRRC, OPC, DESTBB [, INFLAG] - This
       /// corresponds to the COND_BRANCH pseudo instruction.  CRRC is the
       /// condition register to branch on, OPC is the branch opcode to use (e.g.
       /// PPC::BLE), DESTBB is the destination block to branch to, and INFLAG is
       /// an optional input flag argument.
       COND_BRANCH,
-      
+
       // The following 5 instructions are used only as part of the
       // long double-to-int conversion sequence.
 
@@ -150,7 +150,7 @@ namespace llvm {
       MTFSB1,
 
       /// F8RC, OUTFLAG = FADDRTZ F8RC, F8RC, INFLAG - This is an FADD done with
-      /// rounding towards zero.  It has flags added so it won't move past the 
+      /// rounding towards zero.  It has flags added so it won't move past the
       /// FPSCR-setting instructions.
       FADDRTZ,
 
@@ -174,14 +174,14 @@ namespace llvm {
 
       /// STD_32 - This is the STD instruction for use with "32-bit" registers.
       STD_32 = ISD::FIRST_TARGET_MEMORY_OPCODE,
-      
-      /// CHAIN = STBRX CHAIN, GPRC, Ptr, Type - This is a 
+
+      /// CHAIN = STBRX CHAIN, GPRC, Ptr, Type - This is a
       /// byte-swapping store instruction.  It byte-swaps the low "Type" bits of
       /// the GPRC input, then stores it through Ptr.  Type can be either i16 or
       /// i32.
-      STBRX, 
-      
-      /// GPRC, CHAIN = LBRX CHAIN, Ptr, Type - This is a 
+      STBRX,
+
+      /// GPRC, CHAIN = LBRX CHAIN, Ptr, Type - This is a
       /// byte-swapping load instruction.  It loads "Type" bits, byte swaps it,
       /// then puts it in the bottom bits of the GPRC.  TYPE can be either i16
       /// or i32.
@@ -194,7 +194,7 @@ namespace llvm {
     /// isVPKUHUMShuffleMask - Return true if this is the shuffle mask for a
     /// VPKUHUM instruction.
     bool isVPKUHUMShuffleMask(ShuffleVectorSDNode *N, bool isUnary);
-    
+
     /// isVPKUWUMShuffleMask - Return true if this is the shuffle mask for a
     /// VPKUWUM instruction.
     bool isVPKUWUMShuffleMask(ShuffleVectorSDNode *N, bool isUnary);
@@ -208,16 +208,16 @@ namespace llvm {
     /// a VRGH* instruction with the specified unit size (1,2 or 4 bytes).
     bool isVMRGHShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize,
                             bool isUnary);
-    
+
     /// isVSLDOIShuffleMask - If this is a vsldoi shuffle mask, return the shift
     /// amount, otherwise return -1.
     int isVSLDOIShuffleMask(SDNode *N, bool isUnary);
-    
+
     /// isSplatShuffleMask - Return true if the specified VECTOR_SHUFFLE operand
     /// specifies a splat of a single element that is suitable for input to
     /// VSPLTB/VSPLTH/VSPLTW.
     bool isSplatShuffleMask(ShuffleVectorSDNode *N, unsigned EltSize);
-    
+
     /// isAllNegativeZeroVector - Returns true if all elements of build_vector
     /// are -0.0.
     bool isAllNegativeZeroVector(SDNode *N);
@@ -225,24 +225,26 @@ namespace llvm {
     /// getVSPLTImmediate - Return the appropriate VSPLT* immediate to splat the
     /// specified isSplatShuffleMask VECTOR_SHUFFLE mask.
     unsigned getVSPLTImmediate(SDNode *N, unsigned EltSize);
-    
+
     /// get_VSPLTI_elt - If this is a build_vector of constants which can be
     /// formed by using a vspltis[bhw] instruction of the specified element
     /// size, return the constant being splatted.  The ByteSize field indicates
     /// the number of bytes of each element [124] -> [bhw].
     SDValue get_VSPLTI_elt(SDNode *N, unsigned ByteSize, SelectionDAG &DAG);
   }
-  
+
   class PPCTargetLowering : public TargetLowering {
     const PPCSubtarget &PPCSubTarget;
 
   public:
     explicit PPCTargetLowering(PPCTargetMachine &TM);
-    
+
     /// getTargetNodeName() - This method returns the name of a target specific
     /// DAG node.
     virtual const char *getTargetNodeName(unsigned Opcode) const;
 
+    virtual MVT getShiftAmountTy(EVT LHSTy) const { return MVT::i32; }
+
     /// getSetCCResultType - Return the ISD::SETCC ValueType
     virtual MVT::SimpleValueType getSetCCResultType(EVT VT) const;
 
@@ -253,19 +255,19 @@ namespace llvm {
                                            SDValue &Offset,
                                            ISD::MemIndexedMode &AM,
                                            SelectionDAG &DAG) const;
-    
+
     /// SelectAddressRegReg - Given the specified addressed, check to see if it
     /// can be represented as an indexed [r+r] operation.  Returns false if it
     /// can be more efficiently represented with [r+imm].
     bool SelectAddressRegReg(SDValue N, SDValue &Base, SDValue &Index,
                              SelectionDAG &DAG) const;
-    
+
     /// SelectAddressRegImm - Returns true if the address N can be represented
     /// by a base register plus a signed 16-bit displacement [r+imm], and if it
     /// is not better represented as reg+reg.
     bool SelectAddressRegImm(SDValue N, SDValue &Disp, SDValue &Base,
                              SelectionDAG &DAG) const;
-    
+
     /// SelectAddressRegRegOnly - Given the specified addressed, force it to be
     /// represented as an indexed [r+r] operation.
     bool SelectAddressRegRegOnly(SDValue N, SDValue &Base, SDValue &Index,
@@ -277,7 +279,7 @@ namespace llvm {
     bool SelectAddressRegImmShift(SDValue N, SDValue &Disp, SDValue &Base,
                                   SelectionDAG &DAG) const;
 
-    
+
     /// LowerOperation - Provide custom lowering hooks for some operations.
     ///
     virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const;
@@ -289,10 +291,10 @@ namespace llvm {
                                     SelectionDAG &DAG) const;
 
     virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const;
-    
+
     virtual void computeMaskedBitsForTargetNode(const SDValue Op,
                                                 const APInt &Mask,
-                                                APInt &KnownZero, 
+                                                APInt &KnownZero,
                                                 APInt &KnownOne,
                                                 const SelectionDAG &DAG,
                                                 unsigned Depth = 0) const;
@@ -300,13 +302,13 @@ namespace llvm {
     virtual MachineBasicBlock *
       EmitInstrWithCustomInserter(MachineInstr *MI,
                                   MachineBasicBlock *MBB) const;
-    MachineBasicBlock *EmitAtomicBinary(MachineInstr *MI, 
+    MachineBasicBlock *EmitAtomicBinary(MachineInstr *MI,
                                         MachineBasicBlock *MBB, bool is64Bit,
                                         unsigned BinOpcode) const;
-    MachineBasicBlock *EmitPartwordAtomicBinary(MachineInstr *MI, 
-                                                MachineBasicBlock *MBB, 
+    MachineBasicBlock *EmitPartwordAtomicBinary(MachineInstr *MI,
+                                                MachineBasicBlock *MBB,
                                             bool is8bit, unsigned Opcode) const;
-    
+
     ConstraintType getConstraintType(const std::string &Constraint) const;
 
     /// Examine constraint string and operand type and determine a weight value.
@@ -314,7 +316,7 @@ namespace llvm {
     ConstraintWeight getSingleConstraintMatchWeight(
       AsmOperandInfo &info, const char *constraint) const;
 
-    std::pair<unsigned, const TargetRegisterClass*> 
+    std::pair<unsigned, const TargetRegisterClass*>
       getRegForInlineAsmConstraint(const std::string &Constraint,
                                    EVT VT) const;
 
@@ -329,11 +331,11 @@ namespace llvm {
                                               char ConstraintLetter,
                                               std::vector<SDValue> &Ops,
                                               SelectionDAG &DAG) const;
-    
+
     /// isLegalAddressingMode - Return true if the addressing mode represented
     /// by AM is legal for this target, for a load/store of the specified type.
     virtual bool isLegalAddressingMode(const AddrMode &AM, const Type *Ty)const;
-    
+
     /// isLegalAddressImmediate - Return true if the integer value can be used
     /// as the offset of the target addressing mode for load / store of the
     /// given type.
@@ -344,7 +346,7 @@ namespace llvm {
     virtual bool isLegalAddressImmediate(GlobalValue *GV) const;
 
     virtual bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const;
-    
+
     /// getOptimalMemOpType - Returns the target specific optimal type for load
     /// and store operations as a result of memset, memcpy, and memmove
     /// lowering. If DstAlign is zero that means it's safe to destination
diff --git a/lib/Target/README.txt b/lib/Target/README.txt
index 4e14fbbb09ba..f85914b61d9d 100644
--- a/lib/Target/README.txt
+++ b/lib/Target/README.txt
@@ -254,6 +254,20 @@ unsigned long reverse(unsigned v) {
 
 //===---------------------------------------------------------------------===//
 
+[LOOP DELETION]
+
+We don't delete this output free loop, because trip count analysis doesn't
+realize that it is finite (if it were infinite, it would be undefined).  Not
+having this blocks Loop Idiom from matching strlen and friends.  
+
+void foo(char *C) {
+  int x = 0;
+  while (*C)
+    ++x,++C;
+}
+
+//===---------------------------------------------------------------------===//
+
 [LOOP RECOGNITION]
 
 These idioms should be recognized as popcount (see PR1488):
@@ -287,6 +301,16 @@ unsigned int popcount(unsigned int input) {
   return count;
 }
 
+This should be recognized as CLZ:  rdar://8459039
+
+unsigned clz_a(unsigned a) {
+  int i;
+  for (i=0;i<32;i++)
+    if (a & (1<<(31-i)))
+      return i;
+  return 32;
+}
+
 This sort of thing should be added to the loop idiom pass.
 
 //===---------------------------------------------------------------------===//
diff --git a/lib/Target/Sparc/DelaySlotFiller.cpp b/lib/Target/Sparc/DelaySlotFiller.cpp
index ee292758d186..4b12852ef873 100644
--- a/lib/Target/Sparc/DelaySlotFiller.cpp
+++ b/lib/Target/Sparc/DelaySlotFiller.cpp
@@ -79,6 +79,7 @@ namespace {
     MachineBasicBlock::iterator
     findDelayInstr(MachineBasicBlock &MBB, MachineBasicBlock::iterator slot);
 
+    bool needsUnimp(MachineBasicBlock::iterator I, unsigned &StructSize);
 
   };
   char Filler::ID = 0;
@@ -91,6 +92,7 @@ FunctionPass *llvm::createSparcDelaySlotFillerPass(TargetMachine &tm) {
   return new Filler(tm);
 }
 
+
 /// runOnMachineBasicBlock - Fill in delay slots for the given basic block.
 /// We assume there is only one delay slot per delayed instruction.
 ///
@@ -112,6 +114,13 @@ bool Filler::runOnMachineBasicBlock(MachineBasicBlock &MBB) {
         BuildMI(MBB, ++J, I->getDebugLoc(), TII->get(SP::NOP));
       else
         MBB.splice(++J, &MBB, D);
+      unsigned structSize = 0;
+      if (needsUnimp(I, structSize)) {
+        MachineBasicBlock::iterator J = I;
+        ++J; //skip the delay filler.
+        BuildMI(MBB, ++J, I->getDebugLoc(),
+                TII->get(SP::UNIMP)).addImm(structSize);
+      }
     }
   return Changed;
 }
@@ -287,6 +296,28 @@ bool Filler::isDelayFiller(MachineBasicBlock &MBB,
 {
   if (candidate == MBB.begin())
     return false;
+  if (candidate->getOpcode() == SP::UNIMP)
+    return true;
   const TargetInstrDesc &prevdesc = (--candidate)->getDesc();
   return prevdesc.hasDelaySlot();
 }
+
+bool Filler::needsUnimp(MachineBasicBlock::iterator I, unsigned &StructSize)
+{
+  if (!I->getDesc().isCall())
+    return false;
+
+  unsigned structSizeOpNum = 0;
+  switch (I->getOpcode()) {
+  default: llvm_unreachable("Unknown call opcode.");
+  case SP::CALL: structSizeOpNum = 1; break;
+  case SP::JMPLrr:
+  case SP::JMPLri: structSizeOpNum = 2; break;
+  }
+
+  const MachineOperand &MO = I->getOperand(structSizeOpNum);
+  if (!MO.isImm())
+    return false;
+  StructSize = MO.getImm();
+  return true;
+}
diff --git a/lib/Target/Sparc/SparcISelLowering.cpp b/lib/Target/Sparc/SparcISelLowering.cpp
index 196b87dd58d0..70574c370f35 100644
--- a/lib/Target/Sparc/SparcISelLowering.cpp
+++ b/lib/Target/Sparc/SparcISelLowering.cpp
@@ -16,7 +16,9 @@
 #include "SparcISelLowering.h"
 #include "SparcTargetMachine.h"
 #include "SparcMachineFunctionInfo.h"
+#include "llvm/DerivedTypes.h"
 #include "llvm/Function.h"
+#include "llvm/Module.h"
 #include "llvm/CodeGen/CallingConvLower.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineFunction.h"
@@ -116,6 +118,8 @@ SparcTargetLowering::LowerReturn(SDValue Chain,
     // Guarantee that all emitted copies are stuck together with flags.
     Flag = Chain.getValue(1);
   }
+
+  unsigned RetAddrOffset = 8; //Call Inst + Delay Slot
   // If the function returns a struct, copy the SRetReturnReg to I0
   if (MF.getFunction()->hasStructRetAttr()) {
     SparcMachineFunctionInfo *SFI = MF.getInfo<SparcMachineFunctionInfo>();
@@ -127,11 +131,16 @@ SparcTargetLowering::LowerReturn(SDValue Chain,
     Flag = Chain.getValue(1);
     if (MF.getRegInfo().liveout_empty())
       MF.getRegInfo().addLiveOut(SP::I0);
+    RetAddrOffset = 12; // CallInst + Delay Slot + Unimp
   }
 
+  SDValue RetAddrOffsetNode = DAG.getConstant(RetAddrOffset, MVT::i32);
+
   if (Flag.getNode())
-    return DAG.getNode(SPISD::RET_FLAG, dl, MVT::Other, Chain, Flag);
-  return DAG.getNode(SPISD::RET_FLAG, dl, MVT::Other, Chain);
+    return DAG.getNode(SPISD::RET_FLAG, dl, MVT::Other, Chain,
+                       RetAddrOffsetNode, Flag);
+  return DAG.getNode(SPISD::RET_FLAG, dl, MVT::Other, Chain, 
+                     RetAddrOffsetNode);
 }
 
 /// LowerFormalArguments - V8 uses a very simple ABI, where all values are
@@ -194,7 +203,7 @@ SparcTargetLowering::LowerFormalArguments(SDValue Chain,
                               false, false, 0);
         } else {
           unsigned loReg = MF.addLiveIn(NextVA.getLocReg(),
-                                        &SP::IntRegsRegClass, dl);
+                                        &SP::IntRegsRegClass);
           LoVal = DAG.getCopyFromReg(Chain, dl, loReg, MVT::i32);
         }
         SDValue WholeValue =
@@ -393,6 +402,7 @@ SparcTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
   SmallVector<SDValue, 8> MemOpChains;
 
   const unsigned StackOffset = 92;
+  bool hasStructRetAttr = false;
   // Walk the register/memloc assignments, inserting copies/loads.
   for (unsigned i = 0, realArgIdx = 0, byvalArgIdx = 0, e = ArgLocs.size();
        i != e;
@@ -433,6 +443,7 @@ SparcTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
       MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff,
                                          MachinePointerInfo(),
                                          false, false, 0));
+      hasStructRetAttr = true;
       continue;
     }
 
@@ -546,6 +557,8 @@ SparcTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
     InFlag = Chain.getValue(1);
   }
 
+  unsigned SRetArgSize = (hasStructRetAttr)? getSRetArgSize(DAG, Callee):0;
+
   // If the callee is a GlobalAddress node (quite common, every direct call is)
   // turn it into a TargetGlobalAddress node so that legalize doesn't hack it.
   // Likewise ExternalSymbol -> TargetExternalSymbol.
@@ -559,6 +572,8 @@ SparcTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
   SmallVector<SDValue, 8> Ops;
   Ops.push_back(Chain);
   Ops.push_back(Callee);
+  if (hasStructRetAttr)
+    Ops.push_back(DAG.getTargetConstant(SRetArgSize, MVT::i32));
   for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
     unsigned Reg = RegsToPass[i].first;
     if (Reg >= SP::I0 && Reg <= SP::I7)
@@ -600,7 +615,29 @@ SparcTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
   return Chain;
 }
 
+unsigned
+SparcTargetLowering::getSRetArgSize(SelectionDAG &DAG, SDValue Callee) const
+{
+  const Function *CalleeFn = 0;
+  if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
+    CalleeFn = dyn_cast<Function>(G->getGlobal());
+  } else if (ExternalSymbolSDNode *E =
+             dyn_cast<ExternalSymbolSDNode>(Callee)) {
+    const Function *Fn = DAG.getMachineFunction().getFunction();
+    const Module *M = Fn->getParent();
+    CalleeFn = M->getFunction(E->getSymbol());
+  }
+
+  if (!CalleeFn)
+    return 0;
 
+  assert(CalleeFn->hasStructRetAttr() &&
+         "Callee does not have the StructRet attribute.");
+
+  const PointerType *Ty = cast<PointerType>(CalleeFn->arg_begin()->getType());
+  const Type *ElementTy = Ty->getElementType();
+  return getTargetData()->getTypeAllocSize(ElementTy);
+}
 
 //===----------------------------------------------------------------------===//
 // TargetLowering Implementation
diff --git a/lib/Target/Sparc/SparcISelLowering.h b/lib/Target/Sparc/SparcISelLowering.h
index 849e4010af6b..7d02df8adcca 100644
--- a/lib/Target/Sparc/SparcISelLowering.h
+++ b/lib/Target/Sparc/SparcISelLowering.h
@@ -101,6 +101,8 @@ namespace llvm {
 
     SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const;
+
+    unsigned getSRetArgSize(SelectionDAG &DAG, SDValue Callee) const;
   };
 } // end namespace llvm
 
diff --git a/lib/Target/Sparc/SparcInstrInfo.td b/lib/Target/Sparc/SparcInstrInfo.td
index 107232357b3b..cf5c48fd18d9 100644
--- a/lib/Target/Sparc/SparcInstrInfo.td
+++ b/lib/Target/Sparc/SparcInstrInfo.td
@@ -124,7 +124,8 @@ def call          : SDNode<"SPISD::CALL", SDT_SPCall,
                            [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
                             SDNPVariadic]>;
 
-def retflag       : SDNode<"SPISD::RET_FLAG", SDTNone,
+def SDT_SPRet     : SDTypeProfile<0, 1, [SDTCisVT<0, i32>]>;
+def retflag       : SDNode<"SPISD::RET_FLAG", SDT_SPRet,
                            [SDNPHasChain, SDNPOptInGlue]>;
 
 def flushw        : SDNode<"SPISD::FLUSHW", SDTNone,
@@ -132,7 +133,7 @@ def flushw        : SDNode<"SPISD::FLUSHW", SDTNone,
 
 def getPCX        : Operand<i32> {
   let PrintMethod = "printGetPCX";
-}  
+}
 
 //===----------------------------------------------------------------------===//
 // SPARC Flag Conditions
@@ -232,6 +233,9 @@ let hasSideEffects = 1, mayStore = 1 in {
                    [(flushw)]>;
 }
 
+def UNIMP : F2_1<0b000, (outs), (ins i32imm:$val),
+                "unimp $val", []>;
+
 // FpMOVD/FpNEGD/FpABSD - These are lowered to single-precision ops by the 
 // fpmover pass.
 let Predicates = [HasNoV9] in {  // Only emit these in V8 mode.
@@ -292,11 +296,13 @@ let usesCustomInserter = 1, Uses = [FCC] in {
 // Section A.3 - Synthetic Instructions, p. 85
 // special cases of JMPL:
 let isReturn = 1, isTerminator = 1, hasDelaySlot = 1, isBarrier = 1 in {
-  let rd = O7.Num, rs1 = G0.Num, simm13 = 8 in
-    def RETL: F3_2<2, 0b111000, (outs), (ins), "retl", [(retflag)]>;
+  let rd = O7.Num, rs1 = G0.Num in
+    def RETL: F3_2<2, 0b111000, (outs), (ins i32imm:$val),
+                   "jmp %o7+$val", [(retflag simm13:$val)]>;
 
-  let rd = I7.Num, rs1 = G0.Num, simm13 = 8 in
-    def RET: F3_2<2, 0b111000, (outs), (ins), "ret", []>;
+  let rd = I7.Num, rs1 = G0.Num in
+    def RET: F3_2<2, 0b111000, (outs), (ins i32imm:$val),
+                  "jmp %i7+$val", []>;
 }
 
 // Section B.1 - Load Integer Instructions, p. 90
diff --git a/lib/Target/SystemZ/SystemZISelLowering.cpp b/lib/Target/SystemZ/SystemZISelLowering.cpp
index d694f2e67edc..90939c312065 100644
--- a/lib/Target/SystemZ/SystemZISelLowering.cpp
+++ b/lib/Target/SystemZ/SystemZISelLowering.cpp
@@ -59,9 +59,6 @@ SystemZTargetLowering::SystemZTargetLowering(SystemZTargetMachine &tm) :
   // Compute derived properties from the register classes
   computeRegisterProperties();
 
-  // Set shifts properties
-  setShiftAmountType(MVT::i64);
-
   // Provide all sorts of operation actions
   setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote);
   setLoadExtAction(ISD::ZEXTLOAD, MVT::i1, Promote);
diff --git a/lib/Target/SystemZ/SystemZISelLowering.h b/lib/Target/SystemZ/SystemZISelLowering.h
index 51d2df3a3008..30192420dcb6 100644
--- a/lib/Target/SystemZ/SystemZISelLowering.h
+++ b/lib/Target/SystemZ/SystemZISelLowering.h
@@ -57,6 +57,8 @@ namespace llvm {
   public:
     explicit SystemZTargetLowering(SystemZTargetMachine &TM);
 
+    virtual MVT getShiftAmountTy(EVT LHSTy) const { return MVT::i64; }
+
     /// LowerOperation - Provide custom lowering hooks for some operations.
     virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const;
 
diff --git a/lib/Target/X86/AsmParser/X86AsmParser.cpp b/lib/Target/X86/AsmParser/X86AsmParser.cpp
index 1cac07a0e10a..8fe549ba3126 100644
--- a/lib/Target/X86/AsmParser/X86AsmParser.cpp
+++ b/lib/Target/X86/AsmParser/X86AsmParser.cpp
@@ -775,6 +775,19 @@ ParseInstruction(StringRef Name, SMLoc NameLoc,
       delete &Op;
     }
   }
+  // Same hack for "in[bwl]? (%dx), %al" -> "inb %dx, %al".
+  if ((Name == "inb" || Name == "inw" || Name == "inl" || Name == "in") &&
+      Operands.size() == 3) {
+    X86Operand &Op = *(X86Operand*)Operands.begin()[1];
+    if (Op.isMem() && Op.Mem.SegReg == 0 &&
+        isa<MCConstantExpr>(Op.Mem.Disp) &&
+        cast<MCConstantExpr>(Op.Mem.Disp)->getValue() == 0 &&
+        Op.Mem.BaseReg == MatchRegisterName("dx") && Op.Mem.IndexReg == 0) {
+      SMLoc Loc = Op.getEndLoc();
+      Operands.begin()[1] = X86Operand::CreateReg(Op.Mem.BaseReg, Loc, Loc);
+      delete &Op;
+    }
+  }
   
   // FIXME: Hack to handle recognize s{hr,ar,hl} $1, <op>.  Canonicalize to
   // "shift <op>".
diff --git a/lib/Target/X86/Disassembler/X86Disassembler.cpp b/lib/Target/X86/Disassembler/X86Disassembler.cpp
index 691e2d7204ab..f7777561b6a7 100644
--- a/lib/Target/X86/Disassembler/X86Disassembler.cpp
+++ b/lib/Target/X86/Disassembler/X86Disassembler.cpp
@@ -168,16 +168,16 @@ static void translateImmediate(MCInst &mcInst, uint64_t immediate,
     switch (insn.displacementSize) {
     default:
       break;
-    case 8:
+    case 1:
       type = TYPE_MOFFS8;
       break;
-    case 16:
+    case 2:
       type = TYPE_MOFFS16;
       break;
-    case 32:
+    case 4:
       type = TYPE_MOFFS32;
       break;
-    case 64:
+    case 8:
       type = TYPE_MOFFS64;
       break;
     }
diff --git a/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h b/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h
index 4f4fbcdd394c..d0dc8b56aea5 100644
--- a/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h
+++ b/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h
@@ -399,7 +399,7 @@ struct InternalInstruction {
   /* The segment override type */
   SegmentOverride segmentOverride;
   
-  /* Sizes of various critical pieces of data */
+  /* Sizes of various critical pieces of data, in bytes */
   uint8_t registerSize;
   uint8_t addressSize;
   uint8_t displacementSize;
diff --git a/lib/Target/X86/README.txt b/lib/Target/X86/README.txt
index c10e1709f667..abd1515cf5d7 100644
--- a/lib/Target/X86/README.txt
+++ b/lib/Target/X86/README.txt
@@ -1879,39 +1879,71 @@ _add32carry:
 
 //===---------------------------------------------------------------------===//
 
-This:
-char t(char c) {
-  return c/3;
+The hot loop of 256.bzip2 contains code that looks a bit like this:
+
+int foo(char *P, char *Q, int x, int y) {
+  if (P[0] != Q[0])
+     return P[0] < Q[0];
+  if (P[1] != Q[1])
+     return P[1] < Q[1];
+  if (P[2] != Q[2])
+     return P[2] < Q[2];
+   return P[3] < Q[3];
 }
 
-Compiles to: $clang t.c -S -o - -O3 -mkernel -fomit-frame-pointer
+In the real code, we get a lot more wrong than this.  However, even in this
+code we generate:
 
-_t:                                     ## @t
-	movslq	%edi, %rax
-	imulq	$-1431655765, %rax, %rcx ## imm = 0xFFFFFFFFAAAAAAAB
-	shrq	$32, %rcx
-	addl	%ecx, %eax
-	movl	%eax, %ecx
-	shrl	$31, %ecx
-	shrl	%eax
-	addl	%ecx, %eax
-	movsbl	%al, %eax
+_foo:                                   ## @foo
+## BB#0:                                ## %entry
+	movb	(%rsi), %al
+	movb	(%rdi), %cl
+	cmpb	%al, %cl
+	je	LBB0_2
+LBB0_1:                                 ## %if.then
+	cmpb	%al, %cl
+	jmp	LBB0_5
+LBB0_2:                                 ## %if.end
+	movb	1(%rsi), %al
+	movb	1(%rdi), %cl
+	cmpb	%al, %cl
+	jne	LBB0_1
+## BB#3:                                ## %if.end38
+	movb	2(%rsi), %al
+	movb	2(%rdi), %cl
+	cmpb	%al, %cl
+	jne	LBB0_1
+## BB#4:                                ## %if.end60
+	movb	3(%rdi), %al
+	cmpb	3(%rsi), %al
+LBB0_5:                                 ## %if.end60
+	setl	%al
+	movzbl	%al, %eax
 	ret
 
-GCC gets:
+Note that we generate jumps to LBB0_1 which does a redundant compare.  The
+redundant compare also forces the register values to be live, which prevents
+folding one of the loads into the compare.  In contrast, GCC 4.2 produces:
 
-_t:
-	movl	$86, %eax
-	imulb	%dil
-	shrw	$8, %ax
-	sarb	$7, %dil
-	subb	%dil, %al
-	movsbl	%al,%eax
+_foo:
+	movzbl	(%rsi), %eax
+	cmpb	%al, (%rdi)
+	jne	L10
+L12:
+	movzbl	1(%rsi), %eax
+	cmpb	%al, 1(%rdi)
+	jne	L10
+	movzbl	2(%rsi), %eax
+	cmpb	%al, 2(%rdi)
+	jne	L10
+	movzbl	3(%rdi), %eax
+	cmpb	3(%rsi), %al
+L10:
+	setl	%al
+	movzbl	%al, %eax
 	ret
 
-which is nicer.  This also happens for int, not just char.
+which is "perfect".
 
 //===---------------------------------------------------------------------===//
 
-
-
diff --git a/lib/Target/X86/X86FastISel.cpp b/lib/Target/X86/X86FastISel.cpp
index 9d42ac2e470c..6fa928462b28 100644
--- a/lib/Target/X86/X86FastISel.cpp
+++ b/lib/Target/X86/X86FastISel.cpp
@@ -597,9 +597,13 @@ bool X86FastISel::X86SelectCallAddress(const Value *V, X86AddressMode &AM) {
         (AM.Base.Reg != 0 || AM.IndexReg != 0))
       return false;
 
-    // Can't handle TLS or DLLImport.
+    // Can't handle DLLImport.
+    if (GV->hasDLLImportLinkage())
+      return false;
+
+    // Can't handle TLS.
     if (const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV))
-      if (GVar->isThreadLocal() || GVar->hasDLLImportLinkage())
+      if (GVar->isThreadLocal())
         return false;
 
     // Okay, we've committed to selecting this global. Set up the basic address.
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
index 27024b4e9e5a..2f49dbcebf3c 100644
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -45,7 +45,6 @@
 #include "llvm/ADT/Statistic.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/ADT/VectorExtras.h"
-#include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/Dwarf.h"
 #include "llvm/Support/ErrorHandling.h"
@@ -56,10 +55,6 @@ using namespace dwarf;
 
 STATISTIC(NumTailCalls, "Number of tail calls");
 
-static cl::opt<bool>
-Disable256Bit("disable-256bit", cl::Hidden,
-              cl::desc("Disable use of 256-bit vectors"));
-
 // Forward declarations.
 static SDValue getMOVL(SelectionDAG &DAG, DebugLoc dl, EVT VT, SDValue V1,
                        SDValue V2);
@@ -225,7 +220,6 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
   static MVT IntVTs[] = { MVT::i8, MVT::i16, MVT::i32, MVT::i64 };
 
   // X86 is weird, it always uses i8 for shift amounts and setcc results.
-  setShiftAmountType(MVT::i8);
   setBooleanContents(ZeroOrOneBooleanContent);
   setSchedulingPreference(Sched::RegPressure);
   setStackPointerRegisterToSaveRestore(X86StackPtr);
@@ -1713,7 +1707,7 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain,
       else
         llvm_unreachable("Unknown argument type!");
 
-      unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC, dl);
+      unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC);
       ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, RegVT);
 
       // If this is an 8 or 16-bit value, it is really passed promoted to 32
@@ -1845,7 +1839,7 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain,
         SDValue FIN = DAG.getNode(ISD::ADD, dl, getPointerTy(), RSFIN,
                                   DAG.getIntPtrConstant(Offset));
         unsigned VReg = MF.addLiveIn(GPR64ArgRegs[NumIntRegs],
-                                     X86::GR64RegisterClass, dl);
+                                     X86::GR64RegisterClass);
         SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
         SDValue Store =
           DAG.getStore(Val.getValue(1), dl, Val, FIN,
@@ -1861,7 +1855,7 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain,
         SmallVector<SDValue, 11> SaveXMMOps;
         SaveXMMOps.push_back(Chain);
 
-        unsigned AL = MF.addLiveIn(X86::AL, X86::GR8RegisterClass, dl);
+        unsigned AL = MF.addLiveIn(X86::AL, X86::GR8RegisterClass);
         SDValue ALVal = DAG.getCopyFromReg(DAG.getEntryNode(), dl, AL, MVT::i8);
         SaveXMMOps.push_back(ALVal);
 
@@ -1872,7 +1866,7 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain,
 
         for (; NumXMMRegs != TotalNumXMMRegs; ++NumXMMRegs) {
           unsigned VReg = MF.addLiveIn(XMMArgRegs64Bit[NumXMMRegs],
-                                       X86::VR128RegisterClass, dl);
+                                       X86::VR128RegisterClass);
           SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::v4f32);
           SaveXMMOps.push_back(Val);
         }
@@ -2693,6 +2687,10 @@ static bool isTargetShuffle(unsigned Opcode) {
   case X86ISD::MOVSD:
   case X86ISD::UNPCKLPS:
   case X86ISD::UNPCKLPD:
+  case X86ISD::VUNPCKLPS:
+  case X86ISD::VUNPCKLPD:
+  case X86ISD::VUNPCKLPSY:
+  case X86ISD::VUNPCKLPDY:
   case X86ISD::PUNPCKLWD:
   case X86ISD::PUNPCKLBW:
   case X86ISD::PUNPCKLDQ:
@@ -2760,6 +2758,10 @@ static SDValue getTargetShuffleNode(unsigned Opc, DebugLoc dl, EVT VT,
   case X86ISD::MOVSD:
   case X86ISD::UNPCKLPS:
   case X86ISD::UNPCKLPD:
+  case X86ISD::VUNPCKLPS:
+  case X86ISD::VUNPCKLPD:
+  case X86ISD::VUNPCKLPSY:
+  case X86ISD::VUNPCKLPDY:
   case X86ISD::PUNPCKLWD:
   case X86ISD::PUNPCKLBW:
   case X86ISD::PUNPCKLDQ:
@@ -4178,7 +4180,8 @@ static SDValue getVShift(bool isLeft, EVT VT, SDValue SrcOp,
   SrcOp = DAG.getNode(ISD::BITCAST, dl, ShVT, SrcOp);
   return DAG.getNode(ISD::BITCAST, dl, VT,
                      DAG.getNode(Opc, dl, ShVT, SrcOp,
-                             DAG.getConstant(NumBits, TLI.getShiftAmountTy())));
+                             DAG.getConstant(NumBits,
+                                  TLI.getShiftAmountTy(SrcOp.getValueType()))));
 }
 
 SDValue
@@ -4327,16 +4330,15 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
 
   // For AVX-length vectors, build the individual 128-bit pieces and
   // use shuffles to put them in place.
-  if (VT.getSizeInBits() > 256 && 
-      Subtarget->hasAVX() && 
-      !Disable256Bit &&
+  if (VT.getSizeInBits() > 256 &&
+      Subtarget->hasAVX() &&
       !ISD::isBuildVectorAllZeros(Op.getNode())) {
     SmallVector<SDValue, 8> V;
     V.resize(NumElems);
     for (unsigned i = 0; i < NumElems; ++i) {
       V[i] = Op.getOperand(i);
     }
- 
+
     EVT HVT = EVT::getVectorVT(*DAG.getContext(), ExtVT, NumElems/2);
 
     // Build the lower subvector.
@@ -5044,7 +5046,8 @@ SDValue LowerVECTOR_SHUFFLEv16i8(ShuffleVectorSDNode *SVOp,
                            DAG.getIntPtrConstant(Elt1 / 2));
       if ((Elt1 & 1) == 0)
         InsElt = DAG.getNode(ISD::SHL, dl, MVT::i16, InsElt,
-                             DAG.getConstant(8, TLI.getShiftAmountTy()));
+                             DAG.getConstant(8,
+                                  TLI.getShiftAmountTy(InsElt.getValueType())));
       else if (Elt0 >= 0)
         InsElt = DAG.getNode(ISD::AND, dl, MVT::i16, InsElt,
                              DAG.getConstant(0xFF00, MVT::i16));
@@ -5058,7 +5061,8 @@ SDValue LowerVECTOR_SHUFFLEv16i8(ShuffleVectorSDNode *SVOp,
                                     Elt0Src, DAG.getIntPtrConstant(Elt0 / 2));
       if ((Elt0 & 1) != 0)
         InsElt0 = DAG.getNode(ISD::SRL, dl, MVT::i16, InsElt0,
-                              DAG.getConstant(8, TLI.getShiftAmountTy()));
+                              DAG.getConstant(8,
+                                 TLI.getShiftAmountTy(InsElt0.getValueType())));
       else if (Elt1 >= 0)
         InsElt0 = DAG.getNode(ISD::AND, dl, MVT::i16, InsElt0,
                              DAG.getConstant(0x00FF, MVT::i16));
@@ -5475,7 +5479,7 @@ SDValue getMOVLP(SDValue &Op, DebugLoc &dl, SelectionDAG &DAG, bool HasSSE2) {
   // Both of them can't be memory operations though.
   if (MayFoldVectorLoad(V1) && MayFoldVectorLoad(V2))
     CanFoldLoad = false;
-  
+
   if (CanFoldLoad) {
     if (HasSSE2 && NumElems == 2)
       return getTargetShuffleNode(X86ISD::MOVLPD, dl, VT, V1, V2, DAG);
@@ -6088,7 +6092,7 @@ X86TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const {
     SDValue ScaledN2 = N2;
     if (Upper)
       ScaledN2 = DAG.getNode(ISD::SUB, dl, N2.getValueType(), N2,
-                             DAG.getConstant(NumElems / 
+                             DAG.getConstant(NumElems /
                                              (VT.getSizeInBits() / 128),
                                              N2.getValueType()));
     Op = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, SubN0.getValueType(), SubN0,
@@ -9327,6 +9331,10 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
   case X86ISD::MOVSS:              return "X86ISD::MOVSS";
   case X86ISD::UNPCKLPS:           return "X86ISD::UNPCKLPS";
   case X86ISD::UNPCKLPD:           return "X86ISD::UNPCKLPD";
+  case X86ISD::VUNPCKLPS:          return "X86ISD::VUNPCKLPS";
+  case X86ISD::VUNPCKLPD:          return "X86ISD::VUNPCKLPD";
+  case X86ISD::VUNPCKLPSY:         return "X86ISD::VUNPCKLPSY";
+  case X86ISD::VUNPCKLPDY:         return "X86ISD::VUNPCKLPDY";
   case X86ISD::UNPCKHPS:           return "X86ISD::UNPCKHPS";
   case X86ISD::UNPCKHPD:           return "X86ISD::UNPCKHPD";
   case X86ISD::PUNPCKLBW:          return "X86ISD::PUNPCKLBW";
@@ -11984,6 +11992,10 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
   case X86ISD::PUNPCKLQDQ:
   case X86ISD::UNPCKLPS:
   case X86ISD::UNPCKLPD:
+  case X86ISD::VUNPCKLPS:
+  case X86ISD::VUNPCKLPD:
+  case X86ISD::VUNPCKLPSY:
+  case X86ISD::VUNPCKLPDY:
   case X86ISD::MOVHLPS:
   case X86ISD::MOVLHPS:
   case X86ISD::PSHUFD:
diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h
index 419da3742cf8..6ec4a7de7558 100644
--- a/lib/Target/X86/X86ISelLowering.h
+++ b/lib/Target/X86/X86ISelLowering.h
@@ -159,16 +159,16 @@ namespace llvm {
 
       /// PSHUFB - Shuffle 16 8-bit values within a vector.
       PSHUFB,
-      
+
       /// PANDN - and with not'd value.
       PANDN,
-      
+
       /// PSIGNB/W/D - Copy integer sign.
-      PSIGNB, PSIGNW, PSIGND, 
-      
+      PSIGNB, PSIGNW, PSIGND,
+
       /// PBLENDVB - Variable blend
       PBLENDVB,
-      
+
       /// FMAX, FMIN - Floating point max and min.
       ///
       FMAX, FMIN,
@@ -212,7 +212,7 @@ namespace llvm {
       // ADD, SUB, SMUL, etc. - Arithmetic operations with FLAGS results.
       ADD, SUB, ADC, SBB, SMUL,
       INC, DEC, OR, XOR, AND,
-      
+
       UMUL, // LOW, HI, FLAGS = umul LHS, RHS
 
       // MUL_IMM - X86 specific multiply by immediate.
@@ -248,6 +248,10 @@ namespace llvm {
       MOVSS,
       UNPCKLPS,
       UNPCKLPD,
+      VUNPCKLPS,
+      VUNPCKLPD,
+      VUNPCKLPSY,
+      VUNPCKLPDY,
       UNPCKHPS,
       UNPCKHPD,
       PUNPCKLBW,
@@ -463,6 +467,8 @@ namespace llvm {
 
     virtual unsigned getJumpTableEncoding() const;
 
+    virtual MVT getShiftAmountTy(EVT LHSTy) const { return MVT::i8; }
+
     virtual const MCExpr *
     LowerCustomJumpTableEntry(const MachineJumpTableInfo *MJTI,
                               const MachineBasicBlock *MBB, unsigned uid,
diff --git a/lib/Target/X86/X86InstrFormats.td b/lib/Target/X86/X86InstrFormats.td
index 344c14c112a0..0660072589e4 100644
--- a/lib/Target/X86/X86InstrFormats.td
+++ b/lib/Target/X86/X86InstrFormats.td
@@ -41,6 +41,8 @@ def MRM_F8 : Format<41>;
 def MRM_F9 : Format<42>;
 def RawFrmImm8 : Format<43>;
 def RawFrmImm16 : Format<44>;
+def MRM_D0 : Format<45>;
+def MRM_D1 : Format<46>;
 
 // ImmType - This specifies the immediate type used by an instruction. This is
 // part of the ad-hoc solution used to emit machine instruction encodings by our
diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp
index ceb1b6539826..76a9b12b8aad 100644
--- a/lib/Target/X86/X86InstrInfo.cpp
+++ b/lib/Target/X86/X86InstrInfo.cpp
@@ -369,8 +369,6 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
     { X86::IMUL32rri8,      X86::IMUL32rmi8, 0 },
     { X86::IMUL64rri32,     X86::IMUL64rmi32, 0 },
     { X86::IMUL64rri8,      X86::IMUL64rmi8, 0 },
-    { X86::Int_CMPSDrr,     X86::Int_CMPSDrm, 0 },
-    { X86::Int_CMPSSrr,     X86::Int_CMPSSrm, 0 },
     { X86::Int_COMISDrr,    X86::Int_COMISDrm, 0 },
     { X86::Int_COMISSrr,    X86::Int_COMISSrm, 0 },
     { X86::Int_CVTDQ2PDrr,  X86::Int_CVTDQ2PDrm, 16 },
@@ -568,6 +566,8 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
     { X86::IMUL16rr,        X86::IMUL16rm, 0 },
     { X86::IMUL32rr,        X86::IMUL32rm, 0 },
     { X86::IMUL64rr,        X86::IMUL64rm, 0 },
+    { X86::Int_CMPSDrr,     X86::Int_CMPSDrm, 0 },
+    { X86::Int_CMPSSrr,     X86::Int_CMPSSrm, 0 },
     { X86::MAXPDrr,         X86::MAXPDrm, 16 },
     { X86::MAXPDrr_Int,     X86::MAXPDrm_Int, 16 },
     { X86::MAXPSrr,         X86::MAXPSrm, 16 },
diff --git a/lib/Target/X86/X86InstrInfo.h b/lib/Target/X86/X86InstrInfo.h
index 1d4420787273..fcb5a25104ac 100644
--- a/lib/Target/X86/X86InstrInfo.h
+++ b/lib/Target/X86/X86InstrInfo.h
@@ -311,6 +311,8 @@ namespace X86II {
     MRM_F0 = 40,
     MRM_F8 = 41,
     MRM_F9 = 42,
+    MRM_D0 = 45,
+    MRM_D1 = 46,
 
     /// RawFrmImm8 - This is used for the ENTER instruction, which has two
     /// immediates, the first of which is a 16-bit immediate (specified by
@@ -577,6 +579,8 @@ namespace X86II {
     case X86II::MRM_F0:
     case X86II::MRM_F8:
     case X86II::MRM_F9:
+    case X86II::MRM_D0:
+    case X86II::MRM_D1:
       return -1;
     }
   }
diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td
index 87dc4bece742..f832a7c85a8a 100644
--- a/lib/Target/X86/X86InstrInfo.td
+++ b/lib/Target/X86/X86InstrInfo.td
@@ -1296,6 +1296,9 @@ def : MnemonicAlias<"lret", "lretl">;
 def : MnemonicAlias<"leavel", "leave">, Requires<[In32BitMode]>;
 def : MnemonicAlias<"leaveq", "leave">, Requires<[In64BitMode]>;
 
+def : MnemonicAlias<"loopz", "loope">;
+def : MnemonicAlias<"loopnz", "loopne">;
+
 def : MnemonicAlias<"pop", "popl">, Requires<[In32BitMode]>;
 def : MnemonicAlias<"pop", "popq">, Requires<[In64BitMode]>;
 def : MnemonicAlias<"popf", "popfl">, Requires<[In32BitMode]>;
diff --git a/lib/Target/X86/X86InstrSystem.td b/lib/Target/X86/X86InstrSystem.td
index 1a58ba0f96ef..6a24d145c696 100644
--- a/lib/Target/X86/X86InstrSystem.td
+++ b/lib/Target/X86/X86InstrSystem.td
@@ -388,3 +388,8 @@ def CPUID : I<0xA2, RawFrm, (outs), (ins), "cpuid", []>, TB;
 def INVD : I<0x08, RawFrm, (outs), (ins), "invd", []>, TB;
 def WBINVD : I<0x09, RawFrm, (outs), (ins), "wbinvd", []>, TB;
 
+let Defs = [RDX, RAX], Uses = [RCX] in
+  def XGETBV : I<0x01, MRM_D0, (outs), (ins), "xgetbv", []>, TB;
+
+let Uses = [RDX, RAX, RCX] in
+  def XSETBV : I<0x01, MRM_D1, (outs), (ins), "xsetbv", []>, TB;
diff --git a/lib/Target/X86/X86MCCodeEmitter.cpp b/lib/Target/X86/X86MCCodeEmitter.cpp
index e6dc74e65d79..0e3b5711f2b5 100644
--- a/lib/Target/X86/X86MCCodeEmitter.cpp
+++ b/lib/Target/X86/X86MCCodeEmitter.cpp
@@ -979,6 +979,14 @@ EncodeInstruction(const MCInst &MI, raw_ostream &OS,
     EmitByte(BaseOpcode, CurByte, OS);
     EmitByte(0xF9, CurByte, OS);
     break;
+  case X86II::MRM_D0:
+    EmitByte(BaseOpcode, CurByte, OS);
+    EmitByte(0xD0, CurByte, OS);
+    break;
+  case X86II::MRM_D1:
+    EmitByte(BaseOpcode, CurByte, OS);
+    EmitByte(0xD1, CurByte, OS);
+    break;
   }
 
   // If there is a remaining operand, it must be a trailing immediate.  Emit it
diff --git a/lib/Target/X86/X86Subtarget.cpp b/lib/Target/X86/X86Subtarget.cpp
index de768561f111..1ee73123bbc6 100644
--- a/lib/Target/X86/X86Subtarget.cpp
+++ b/lib/Target/X86/X86Subtarget.cpp
@@ -342,9 +342,10 @@ X86Subtarget::X86Subtarget(const std::string &TT, const std::string &FS,
   assert((!Is64Bit || HasX86_64) &&
          "64-bit code requested on a subtarget that doesn't support it!");
 
-  // Stack alignment is 16 bytes on Darwin and Linux (both 32 and 64 bit) and 
-  // for all 64-bit targets.
-  if (isTargetDarwin() || isTargetLinux() || Is64Bit)
+  // Stack alignment is 16 bytes on Darwin, FreeBSD, Linux and Solaris (both
+  // 32 and 64 bit) and for all 64-bit targets.
+  if (isTargetDarwin() || isTargetFreeBSD() || isTargetLinux() ||
+      isTargetSolaris() || Is64Bit)
     stackAlignment = 16;
 
   if (StackAlignment)
diff --git a/lib/Target/X86/X86Subtarget.h b/lib/Target/X86/X86Subtarget.h
index 8a119b43cd91..0a62a029554c 100644
--- a/lib/Target/X86/X86Subtarget.h
+++ b/lib/Target/X86/X86Subtarget.h
@@ -166,6 +166,8 @@ public:
   bool hasVectorUAMem() const { return HasVectorUAMem; }
 
   bool isTargetDarwin() const { return TargetTriple.getOS() == Triple::Darwin; }
+  bool isTargetFreeBSD() const { return TargetTriple.getOS() == Triple::FreeBSD; }
+  bool isTargetSolaris() const { return TargetTriple.getOS() == Triple::Solaris; }
 
   // ELF is a reasonably sane default and the only other X86 targets we
   // support are Darwin and Windows. Just use "not those".
diff --git a/lib/Target/XCore/XCoreISelLowering.cpp b/lib/Target/XCore/XCoreISelLowering.cpp
index 828d6f92caf4..4817787d7515 100644
--- a/lib/Target/XCore/XCoreISelLowering.cpp
+++ b/lib/Target/XCore/XCoreISelLowering.cpp
@@ -42,9 +42,9 @@
 using namespace llvm;
 
 const char *XCoreTargetLowering::
-getTargetNodeName(unsigned Opcode) const 
+getTargetNodeName(unsigned Opcode) const
 {
-  switch (Opcode) 
+  switch (Opcode)
   {
     case XCoreISD::BL                : return "XCoreISD::BL";
     case XCoreISD::PCRelativeWrapper : return "XCoreISD::PCRelativeWrapper";
@@ -77,7 +77,6 @@ XCoreTargetLowering::XCoreTargetLowering(XCoreTargetMachine &XTM)
   // Division is expensive
   setIntDivIsCheap(false);
 
-  setShiftAmountType(MVT::i32);
   setStackPointerRegisterToSaveRestore(XCore::SP);
 
   setSchedulingPreference(Sched::RegPressure);
@@ -95,7 +94,7 @@ XCoreTargetLowering::XCoreTargetLowering(XCoreTargetMachine &XTM)
 
   // Stop the combiner recombining select and set_cc
   setOperationAction(ISD::SELECT_CC, MVT::Other, Expand);
-  
+
   // 64bit
   setOperationAction(ISD::ADD, MVT::i64, Custom);
   setOperationAction(ISD::SUB, MVT::i64, Custom);
@@ -106,14 +105,14 @@ XCoreTargetLowering::XCoreTargetLowering(XCoreTargetMachine &XTM)
   setOperationAction(ISD::SHL_PARTS, MVT::i32, Expand);
   setOperationAction(ISD::SRA_PARTS, MVT::i32, Expand);
   setOperationAction(ISD::SRL_PARTS, MVT::i32, Expand);
-  
+
   // Bit Manipulation
   setOperationAction(ISD::CTPOP, MVT::i32, Expand);
   setOperationAction(ISD::ROTL , MVT::i32, Expand);
   setOperationAction(ISD::ROTR , MVT::i32, Expand);
-  
+
   setOperationAction(ISD::TRAP, MVT::Other, Legal);
-  
+
   // Jump tables.
   setOperationAction(ISD::BR_JT, MVT::Other, Custom);
 
@@ -122,7 +121,7 @@ XCoreTargetLowering::XCoreTargetLowering(XCoreTargetMachine &XTM)
 
   // Thread Local Storage
   setOperationAction(ISD::GlobalTLSAddress, MVT::i32, Custom);
-  
+
   // Conversion of i64 -> double produces constantpool nodes
   setOperationAction(ISD::ConstantPool, MVT::i32,   Custom);
 
@@ -143,7 +142,7 @@ XCoreTargetLowering::XCoreTargetLowering(XCoreTargetMachine &XTM)
   setOperationAction(ISD::VACOPY, MVT::Other, Expand);
   setOperationAction(ISD::VAARG, MVT::Other, Custom);
   setOperationAction(ISD::VASTART, MVT::Other, Custom);
-  
+
   // Dynamic stack
   setOperationAction(ISD::STACKSAVE, MVT::Other, Expand);
   setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand);
@@ -163,7 +162,7 @@ XCoreTargetLowering::XCoreTargetLowering(XCoreTargetMachine &XTM)
 
 SDValue XCoreTargetLowering::
 LowerOperation(SDValue Op, SelectionDAG &DAG) const {
-  switch (Op.getOpcode()) 
+  switch (Op.getOpcode())
   {
   case ISD::GlobalAddress:    return LowerGlobalAddress(Op, DAG);
   case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG);
@@ -414,7 +413,7 @@ LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
   SDValue Chain = LD->getChain();
   SDValue BasePtr = LD->getBasePtr();
   DebugLoc DL = Op.getDebugLoc();
-  
+
   SDValue Base;
   int64_t Offset;
   if (!LD->isVolatile() &&
@@ -437,10 +436,10 @@ LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
     SDValue HighOffset = DAG.getConstant((Offset & ~0x3) + 4, MVT::i32);
     SDValue LowShift = DAG.getConstant((Offset & 0x3) * 8, MVT::i32);
     SDValue HighShift = DAG.getConstant(32 - (Offset & 0x3) * 8, MVT::i32);
-    
+
     SDValue LowAddr = DAG.getNode(ISD::ADD, DL, MVT::i32, Base, LowOffset);
     SDValue HighAddr = DAG.getNode(ISD::ADD, DL, MVT::i32, Base, HighOffset);
-    
+
     SDValue Low = DAG.getLoad(getPointerTy(), DL, Chain,
                               LowAddr, MachinePointerInfo(), false, false, 0);
     SDValue High = DAG.getLoad(getPointerTy(), DL, Chain,
@@ -453,7 +452,7 @@ LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
     SDValue Ops[] = { Result, Chain };
     return DAG.getMergeValues(Ops, 2, DL);
   }
-  
+
   if (LD->getAlignment() == 2) {
     SDValue Low = DAG.getExtLoad(ISD::ZEXTLOAD, DL, MVT::i32, Chain,
                                  BasePtr, LD->getPointerInfo(), MVT::i16,
@@ -473,16 +472,16 @@ LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
     SDValue Ops[] = { Result, Chain };
     return DAG.getMergeValues(Ops, 2, DL);
   }
-  
+
   // Lower to a call to __misaligned_load(BasePtr).
   const Type *IntPtrTy = getTargetData()->getIntPtrType(*DAG.getContext());
   TargetLowering::ArgListTy Args;
   TargetLowering::ArgListEntry Entry;
-  
+
   Entry.Ty = IntPtrTy;
   Entry.Node = BasePtr;
   Args.push_back(Entry);
-  
+
   std::pair<SDValue, SDValue> CallResult =
         LowerCallTo(Chain, IntPtrTy, false, false,
                     false, false, 0, CallingConv::C, false,
@@ -515,7 +514,7 @@ LowerSTORE(SDValue Op, SelectionDAG &DAG) const
   SDValue BasePtr = ST->getBasePtr();
   SDValue Value = ST->getValue();
   DebugLoc dl = Op.getDebugLoc();
-  
+
   if (ST->getAlignment() == 2) {
     SDValue Low = Value;
     SDValue High = DAG.getNode(ISD::SRL, dl, MVT::i32, Value,
@@ -532,19 +531,19 @@ LowerSTORE(SDValue Op, SelectionDAG &DAG) const
                                           ST->isNonTemporal(), 2);
     return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, StoreLow, StoreHigh);
   }
-  
+
   // Lower to a call to __misaligned_store(BasePtr, Value).
   const Type *IntPtrTy = getTargetData()->getIntPtrType(*DAG.getContext());
   TargetLowering::ArgListTy Args;
   TargetLowering::ArgListEntry Entry;
-  
+
   Entry.Ty = IntPtrTy;
   Entry.Node = BasePtr;
   Args.push_back(Entry);
-  
+
   Entry.Node = Value;
   Args.push_back(Entry);
-  
+
   std::pair<SDValue, SDValue> CallResult =
         LowerCallTo(Chain, Type::getVoidTy(*DAG.getContext()), false, false,
                     false, false, 0, CallingConv::C, false,
@@ -722,7 +721,7 @@ ExpandADDSUB(SDNode *N, SelectionDAG &DAG) const
   }
 
   DebugLoc dl = N->getDebugLoc();
-  
+
   // Extract components
   SDValue LHSL = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32,
                             N->getOperand(0),  DAG.getConstant(0, MVT::i32));
@@ -732,7 +731,7 @@ ExpandADDSUB(SDNode *N, SelectionDAG &DAG) const
                              N->getOperand(1), DAG.getConstant(0, MVT::i32));
   SDValue RHSH = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32,
                              N->getOperand(1), DAG.getConstant(1, MVT::i32));
-  
+
   // Expand
   unsigned Opcode = (N->getOpcode() == ISD::ADD) ? XCoreISD::LADD :
                                                    XCoreISD::LSUB;
@@ -740,7 +739,7 @@ ExpandADDSUB(SDNode *N, SelectionDAG &DAG) const
   SDValue Carry = DAG.getNode(Opcode, dl, DAG.getVTList(MVT::i32, MVT::i32),
                                   LHSL, RHSL, Zero);
   SDValue Lo(Carry.getNode(), 1);
-  
+
   SDValue Ignored = DAG.getNode(Opcode, dl, DAG.getVTList(MVT::i32, MVT::i32),
                                   LHSH, RHSH, Carry);
   SDValue Hi(Ignored.getNode(), 1);
@@ -761,8 +760,8 @@ LowerVAARG(SDValue Op, SelectionDAG &DAG) const
                                Node->getOperand(1), MachinePointerInfo(V),
                                false, false, 0);
   // Increment the pointer, VAList, to the next vararg
-  SDValue Tmp3 = DAG.getNode(ISD::ADD, dl, getPointerTy(), VAList, 
-                     DAG.getConstant(VT.getSizeInBits(), 
+  SDValue Tmp3 = DAG.getNode(ISD::ADD, dl, getPointerTy(), VAList,
+                     DAG.getConstant(VT.getSizeInBits(),
                                      getPointerTy()));
   // Store the incremented VAList to the legalized pointer
   Tmp3 = DAG.getStore(VAList.getValue(1), dl, Tmp3, Node->getOperand(1),
@@ -781,20 +780,20 @@ LowerVASTART(SDValue Op, SelectionDAG &DAG) const
   MachineFunction &MF = DAG.getMachineFunction();
   XCoreFunctionInfo *XFI = MF.getInfo<XCoreFunctionInfo>();
   SDValue Addr = DAG.getFrameIndex(XFI->getVarArgsFrameIndex(), MVT::i32);
-  return DAG.getStore(Op.getOperand(0), dl, Addr, Op.getOperand(1), 
+  return DAG.getStore(Op.getOperand(0), dl, Addr, Op.getOperand(1),
                       MachinePointerInfo(), false, false, 0);
 }
 
 SDValue XCoreTargetLowering::LowerFRAMEADDR(SDValue Op,
                                             SelectionDAG &DAG) const {
   DebugLoc dl = Op.getDebugLoc();
-  // Depths > 0 not supported yet! 
+  // Depths > 0 not supported yet!
   if (cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue() > 0)
     return SDValue();
-  
+
   MachineFunction &MF = DAG.getMachineFunction();
   const TargetRegisterInfo *RegInfo = getTargetMachine().getRegisterInfo();
-  return DAG.getCopyFromReg(DAG.getEntryNode(), dl, 
+  return DAG.getCopyFromReg(DAG.getEntryNode(), dl,
                             RegInfo->getFrameRegister(MF), MVT::i32);
 }
 
@@ -919,7 +918,7 @@ XCoreTargetLowering::LowerCCCCallTo(SDValue Chain, SDValue Callee,
   // Get a count of how many bytes are to be pushed on the stack.
   unsigned NumBytes = CCInfo.getNextStackOffset();
 
-  Chain = DAG.getCALLSEQ_START(Chain,DAG.getConstant(NumBytes, 
+  Chain = DAG.getCALLSEQ_START(Chain,DAG.getConstant(NumBytes,
                                  getPointerTy(), true));
 
   SmallVector<std::pair<unsigned, SDValue>, 4> RegsToPass;
@@ -944,8 +943,8 @@ XCoreTargetLowering::LowerCCCCallTo(SDValue Chain, SDValue Callee,
         Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg);
         break;
     }
-    
-    // Arguments that can be passed on register must be kept at 
+
+    // Arguments that can be passed on register must be kept at
     // RegsToPass vector
     if (VA.isRegLoc()) {
       RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
@@ -954,7 +953,7 @@ XCoreTargetLowering::LowerCCCCallTo(SDValue Chain, SDValue Callee,
 
       int Offset = VA.getLocMemOffset();
 
-      MemOpChains.push_back(DAG.getNode(XCoreISD::STWSP, dl, MVT::Other, 
+      MemOpChains.push_back(DAG.getNode(XCoreISD::STWSP, dl, MVT::Other,
                                         Chain, Arg,
                                         DAG.getConstant(Offset/4, MVT::i32)));
     }
@@ -963,16 +962,16 @@ XCoreTargetLowering::LowerCCCCallTo(SDValue Chain, SDValue Callee,
   // Transform all store nodes into one single node because
   // all store nodes are independent of each other.
   if (!MemOpChains.empty())
-    Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, 
+    Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
                         &MemOpChains[0], MemOpChains.size());
 
-  // Build a sequence of copy-to-reg nodes chained together with token 
+  // Build a sequence of copy-to-reg nodes chained together with token
   // chain and flag operands which copy the outgoing args into registers.
   // The InFlag in necessary since all emited instructions must be
   // stuck together.
   SDValue InFlag;
   for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
-    Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first, 
+    Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
                              RegsToPass[i].second, InFlag);
     InFlag = Chain.getValue(1);
   }
@@ -986,7 +985,7 @@ XCoreTargetLowering::LowerCCCCallTo(SDValue Chain, SDValue Callee,
     Callee = DAG.getTargetExternalSymbol(E->getSymbol(), MVT::i32);
 
   // XCoreBranchLink = #chain, #target_address, #opt_in_flags...
-  //             = Chain, Callee, Reg#1, Reg#2, ...  
+  //             = Chain, Callee, Reg#1, Reg#2, ...
   //
   // Returns a chain & a flag for retval copy to use.
   SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
@@ -994,7 +993,7 @@ XCoreTargetLowering::LowerCCCCallTo(SDValue Chain, SDValue Callee,
   Ops.push_back(Chain);
   Ops.push_back(Callee);
 
-  // Add argument registers to the end of the list so that they are 
+  // Add argument registers to the end of the list so that they are
   // known live into the call.
   for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
     Ops.push_back(DAG.getRegister(RegsToPass[i].first,
@@ -1098,11 +1097,11 @@ XCoreTargetLowering::LowerCCCArguments(SDValue Chain,
   unsigned StackSlotSize = XCoreFrameLowering::stackSlotSize();
 
   unsigned LRSaveSize = StackSlotSize;
-  
+
   for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
 
     CCValAssign &VA = ArgLocs[i];
-    
+
     if (VA.isRegLoc()) {
       // Arguments passed in registers
       EVT RegVT = VA.getLocVT();
@@ -1139,12 +1138,12 @@ XCoreTargetLowering::LowerCCCArguments(SDValue Chain,
       // Create the SelectionDAG nodes corresponding to a load
       //from this parameter
       SDValue FIN = DAG.getFrameIndex(FI, MVT::i32);
-      InVals.push_back(DAG.getLoad(VA.getLocVT(), dl, Chain, FIN, 
+      InVals.push_back(DAG.getLoad(VA.getLocVT(), dl, Chain, FIN,
                                    MachinePointerInfo::getFixedStack(FI),
                                    false, false, 0));
     }
   }
-  
+
   if (isVarArg) {
     /* Argument registers */
     static const unsigned ArgRegs[] = {
@@ -1186,7 +1185,7 @@ XCoreTargetLowering::LowerCCCArguments(SDValue Chain,
                                true));
     }
   }
-  
+
   return Chain;
 }
 
@@ -1222,7 +1221,7 @@ XCoreTargetLowering::LowerReturn(SDValue Chain,
   // Analize return values.
   CCInfo.AnalyzeReturn(Outs, RetCC_XCore);
 
-  // If this is the first return lowered for this function, add 
+  // If this is the first return lowered for this function, add
   // the regs to the liveout set for the function.
   if (DAG.getMachineFunction().getRegInfo().liveout_empty()) {
     for (unsigned i = 0; i != RVLocs.size(); ++i)
@@ -1237,7 +1236,7 @@ XCoreTargetLowering::LowerReturn(SDValue Chain,
     CCValAssign &VA = RVLocs[i];
     assert(VA.isRegLoc() && "Can only return in registers!");
 
-    Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), 
+    Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
                              OutVals[i], Flag);
 
     // guarantee that all emitted copies are
@@ -1265,7 +1264,7 @@ XCoreTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
   DebugLoc dl = MI->getDebugLoc();
   assert((MI->getOpcode() == XCore::SELECT_CC) &&
          "Unexpected instr type to insert");
-  
+
   // To "insert" a SELECT_CC instruction, we actually have to insert the diamond
   // control-flow pattern.  The incoming instruction knows the destination vreg
   // to set, the condition code register to branch on, the true/false values to
@@ -1273,7 +1272,7 @@ XCoreTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
   const BasicBlock *LLVM_BB = BB->getBasicBlock();
   MachineFunction::iterator It = BB;
   ++It;
-  
+
   //  thisMBB:
   //  ...
   //   TrueVal = ...
@@ -1296,7 +1295,7 @@ XCoreTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
   // Next, add the true and fallthrough blocks as its successors.
   BB->addSuccessor(copy0MBB);
   BB->addSuccessor(sinkMBB);
-  
+
   BuildMI(BB, dl, TII.get(XCore::BRFT_lru6))
     .addReg(MI->getOperand(1).getReg()).addMBB(sinkMBB);
 
@@ -1304,10 +1303,10 @@ XCoreTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
   //   %FalseValue = ...
   //   # fallthrough to sinkMBB
   BB = copy0MBB;
-  
+
   // Update machine-CFG edges
   BB->addSuccessor(sinkMBB);
-  
+
   //  sinkMBB:
   //   %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ]
   //  ...
@@ -1316,7 +1315,7 @@ XCoreTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
           TII.get(XCore::PHI), MI->getOperand(0).getReg())
     .addReg(MI->getOperand(3).getReg()).addMBB(copy0MBB)
     .addReg(MI->getOperand(2).getReg()).addMBB(thisMBB);
-  
+
   MI->eraseFromParent();   // The pseudo instruction is gone now.
   return BB;
 }
@@ -1354,7 +1353,7 @@ SDValue XCoreTargetLowering::PerformDAGCombine(SDNode *N,
 
     // fold (ladd x, 0, y) -> 0, add x, y iff carry is unused and y has only the
     // low bit set
-    if (N1C && N1C->isNullValue() && N->hasNUsesOfValue(0, 0)) { 
+    if (N1C && N1C->isNullValue() && N->hasNUsesOfValue(0, 0)) {
       APInt KnownZero, KnownOne;
       APInt Mask = APInt::getHighBitsSet(VT.getSizeInBits(),
                                          VT.getSizeInBits() - 1);
@@ -1377,7 +1376,7 @@ SDValue XCoreTargetLowering::PerformDAGCombine(SDNode *N,
     EVT VT = N0.getValueType();
 
     // fold (lsub 0, 0, x) -> x, -x iff x has only the low bit set
-    if (N0C && N0C->isNullValue() && N1C && N1C->isNullValue()) {   
+    if (N0C && N0C->isNullValue() && N1C && N1C->isNullValue()) {
       APInt KnownZero, KnownOne;
       APInt Mask = APInt::getHighBitsSet(VT.getSizeInBits(),
                                          VT.getSizeInBits() - 1);
@@ -1393,7 +1392,7 @@ SDValue XCoreTargetLowering::PerformDAGCombine(SDNode *N,
 
     // fold (lsub x, 0, y) -> 0, sub x, y iff borrow is unused and y has only the
     // low bit set
-    if (N1C && N1C->isNullValue() && N->hasNUsesOfValue(0, 0)) { 
+    if (N1C && N1C->isNullValue() && N->hasNUsesOfValue(0, 0)) {
       APInt KnownZero, KnownOne;
       APInt Mask = APInt::getHighBitsSet(VT.getSizeInBits(),
                                          VT.getSizeInBits() - 1);
@@ -1557,7 +1556,7 @@ static inline bool isImmUs4(int64_t val)
 /// isLegalAddressingMode - Return true if the addressing mode represented
 /// by AM is legal for this target, for a load/store of the specified type.
 bool
-XCoreTargetLowering::isLegalAddressingMode(const AddrMode &AM, 
+XCoreTargetLowering::isLegalAddressingMode(const AddrMode &AM,
                                               const Type *Ty) const {
   if (Ty->getTypeID() == Type::VoidTyID)
     return AM.Scale == 0 && isImmUs(AM.BaseOffs) && isImmUs4(AM.BaseOffs);
@@ -1568,7 +1567,7 @@ XCoreTargetLowering::isLegalAddressingMode(const AddrMode &AM,
     return Size >= 4 && !AM.HasBaseReg && AM.Scale == 0 &&
                  AM.BaseOffs%4 == 0;
   }
-  
+
   switch (Size) {
   case 1:
     // reg + imm
@@ -1593,7 +1592,7 @@ XCoreTargetLowering::isLegalAddressingMode(const AddrMode &AM,
     // reg + reg<<2
     return AM.Scale == 4 && AM.BaseOffs == 0;
   }
-  
+
   return false;
 }
 
@@ -1603,7 +1602,7 @@ XCoreTargetLowering::isLegalAddressingMode(const AddrMode &AM,
 
 std::vector<unsigned> XCoreTargetLowering::
 getRegClassForInlineAsmConstraint(const std::string &Constraint,
-                                  EVT VT) const 
+                                  EVT VT) const
 {
   if (Constraint.size() != 1)
     return std::vector<unsigned>();
@@ -1611,9 +1610,9 @@ getRegClassForInlineAsmConstraint(const std::string &Constraint,
   switch (Constraint[0]) {
     default : break;
     case 'r':
-      return make_vector<unsigned>(XCore::R0, XCore::R1,  XCore::R2, 
-                                   XCore::R3, XCore::R4,  XCore::R5, 
-                                   XCore::R6, XCore::R7,  XCore::R8, 
+      return make_vector<unsigned>(XCore::R0, XCore::R1,  XCore::R2,
+                                   XCore::R3, XCore::R4,  XCore::R5,
+                                   XCore::R6, XCore::R7,  XCore::R8,
                                    XCore::R9, XCore::R10, XCore::R11, 0);
       break;
   }
diff --git a/lib/Target/XCore/XCoreISelLowering.h b/lib/Target/XCore/XCoreISelLowering.h
index 7e5dd2e8e512..bb3f2cc038e7 100644
--- a/lib/Target/XCore/XCoreISelLowering.h
+++ b/lib/Target/XCore/XCoreISelLowering.h
@@ -20,11 +20,11 @@
 #include "XCore.h"
 
 namespace llvm {
-  
+
   // Forward delcarations
   class XCoreSubtarget;
   class XCoreTargetMachine;
-  
+
   namespace XCoreISD {
     enum NodeType {
       // Start the numbering where the builtin ops and target ops leave off.
@@ -38,16 +38,16 @@ namespace llvm {
 
       // dp relative address
       DPRelativeWrapper,
-      
+
       // cp relative address
       CPRelativeWrapper,
-      
+
       // Store word to stack
       STWSP,
 
       // Corresponds to retsp instruction
       RETSP,
-      
+
       // Corresponds to LADD instruction
       LADD,
 
@@ -74,13 +74,14 @@ namespace llvm {
   //===--------------------------------------------------------------------===//
   // TargetLowering Implementation
   //===--------------------------------------------------------------------===//
-  class XCoreTargetLowering : public TargetLowering 
+  class XCoreTargetLowering : public TargetLowering
   {
   public:
 
     explicit XCoreTargetLowering(XCoreTargetMachine &TM);
 
     virtual unsigned getJumpTableEncoding() const;
+    virtual MVT getShiftAmountTy(EVT LHSTy) const { return MVT::i32; }
 
     /// LowerOperation - Provide custom lowering hooks for some operations.
     virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const;
@@ -91,10 +92,10 @@ namespace llvm {
     virtual void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue>&Results,
                                     SelectionDAG &DAG) const;
 
-    /// getTargetNodeName - This method returns the name of a target specific 
+    /// getTargetNodeName - This method returns the name of a target specific
     //  DAG node.
     virtual const char *getTargetNodeName(unsigned Opcode) const;
-  
+
     virtual MachineBasicBlock *
       EmitInstrWithCustomInserter(MachineInstr *MI,
                                   MachineBasicBlock *MBB) const;
@@ -108,7 +109,7 @@ namespace llvm {
   private:
     const XCoreTargetMachine &TM;
     const XCoreSubtarget &Subtarget;
-  
+
     // Lower Operand helpers
     SDValue LowerCCCArguments(SDValue Chain,
                               CallingConv::ID CallConv,
@@ -148,12 +149,12 @@ namespace llvm {
     SDValue LowerSMUL_LOHI(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerTRAMPOLINE(SDValue Op, SelectionDAG &DAG) const;
-  
+
     // Inline asm support
     std::vector<unsigned>
     getRegClassForInlineAsmConstraint(const std::string &Constraint,
               EVT VT) const;
-  
+
     // Expand specifics
     SDValue TryExpandADDWithMul(SDNode *Op, SelectionDAG &DAG) const;
     SDValue ExpandADDSUB(SDNode *Op, SelectionDAG &DAG) const;
diff --git a/lib/Target/XCore/XCoreInstrInfo.td b/lib/Target/XCore/XCoreInstrInfo.td
index 38cc734ce7c3..ecdd4cb63000 100644
--- a/lib/Target/XCore/XCoreInstrInfo.td
+++ b/lib/Target/XCore/XCoreInstrInfo.td
@@ -727,7 +727,7 @@ def NEG : _F2R<(outs GRRegs:$dst), (ins GRRegs:$b),
                  "neg $dst, $b",
                  [(set GRRegs:$dst, (ineg GRRegs:$b))]>;
 
-// TODO setd, eet, eef, getts, setpt, outshr, inshr, testwct, tinitpc, tinitdp,
+// TODO setd, eet, eef, testwct, tinitpc, tinitdp,
 // tinitsp, tinitcp, tsetmr, sext (reg), zext (reg)
 let Constraints = "$src1 = $dst" in {
 let neverHasSideEffects = 1 in
@@ -758,6 +758,14 @@ def GETR_rus : _FRUS<(outs GRRegs:$dst), (ins i32imm:$type),
                  "getr $dst, $type",
                  [(set GRRegs:$dst, (int_xcore_getr immUs:$type))]>;
 
+def GETTS_2r : _F2R<(outs GRRegs:$dst), (ins GRRegs:$r),
+                 "getts $dst, res[$r]",
+                 [(set GRRegs:$dst, (int_xcore_getts GRRegs:$r))]>;
+
+def SETPT_2r : _F2R<(outs), (ins GRRegs:$r, GRRegs:$val),
+                 "setpt res[$r], $val",
+                 [(int_xcore_setpt GRRegs:$r, GRRegs:$val)]>;
+
 def OUTCT_2r : _F2R<(outs), (ins GRRegs:$r, GRRegs:$val),
                  "outct res[$r], $val",
                  [(int_xcore_outct GRRegs:$r, GRRegs:$val)]>;
@@ -774,6 +782,11 @@ def OUT_2r : _F2R<(outs), (ins GRRegs:$r, GRRegs:$val),
                  "out res[$r], $val",
                  [(int_xcore_out GRRegs:$r, GRRegs:$val)]>;
 
+let Constraints = "$src = $dst" in
+def OUTSHR_2r : _F2R<(outs GRRegs:$dst), (ins GRRegs:$r, GRRegs:$src),
+                 "outshr res[$r], $src",
+                 [(set GRRegs:$dst, (int_xcore_outshr GRRegs:$r, GRRegs:$src))]>;
+
 def INCT_2r : _F2R<(outs GRRegs:$dst), (ins GRRegs:$r),
                  "inct $dst, res[$r]",
                  [(set GRRegs:$dst, (int_xcore_inct GRRegs:$r))]>;
@@ -786,6 +799,11 @@ def IN_2r : _F2R<(outs GRRegs:$dst), (ins GRRegs:$r),
                  "in $dst, res[$r]",
                  [(set GRRegs:$dst, (int_xcore_in GRRegs:$r))]>;
 
+let Constraints = "$src = $dst" in
+def INSHR_2r : _F2R<(outs GRRegs:$dst), (ins GRRegs:$r, GRRegs:$src),
+                 "inshr $dst, res[$r]",
+                 [(set GRRegs:$dst, (int_xcore_inshr GRRegs:$r, GRRegs:$src))]>;
+
 def CHKCT_2r : _F2R<(outs), (ins GRRegs:$r, GRRegs:$val),
                  "chkct res[$r], $val",
                  [(int_xcore_chkct GRRegs:$r, GRRegs:$val)]>;
@@ -799,7 +817,7 @@ def SETD_2r : _F2R<(outs), (ins GRRegs:$r, GRRegs:$val),
                  [(int_xcore_setd GRRegs:$r, GRRegs:$val)]>;
 
 // Two operand long
-// TODO settw, setclk, setrdy, setpsc, endin, peek,
+// TODO setclk, setrdy, setpsc, endin, peek,
 // getd, testlcl, tinitlr, getps, setps
 def BITREV_l2r : _FL2R<(outs GRRegs:$dst), (ins GRRegs:$src),
                  "bitrev $dst, $src",
@@ -813,13 +831,17 @@ def CLZ_l2r : _FL2R<(outs GRRegs:$dst), (ins GRRegs:$src),
                  "clz $dst, $src",
                  [(set GRRegs:$dst, (ctlz GRRegs:$src))]>;
 
-def SETC_l2r : _FRU6<(outs), (ins GRRegs:$r, GRRegs:$val),
+def SETC_l2r : _FL2R<(outs), (ins GRRegs:$r, GRRegs:$val),
                   "setc res[$r], $val",
                   [(int_xcore_setc GRRegs:$r, GRRegs:$val)]>;
 
+def SETTW_l2r : _FL2R<(outs), (ins GRRegs:$r, GRRegs:$val),
+                  "settw res[$r], $val",
+                  [(int_xcore_settw GRRegs:$r, GRRegs:$val)]>;
+
 // One operand short
-// TODO edu, eeu, waitet, waitef, tstart, msync, mjoin, syncr, clrtp
-// setdp, setcp, setv, setev, kcall
+// TODO edu, eeu, waitet, waitef, tstart, msync, mjoin, clrtp
+// setdp, setcp, setev, kcall
 // dgetreg
 let isBranch=1, isIndirectBranch=1, isTerminator=1, isBarrier = 1 in
 def BAU_1r : _F1R<(outs), (ins GRRegs:$addr),
@@ -859,20 +881,41 @@ def BLA_1r : _F1R<(outs), (ins GRRegs:$addr, variable_ops),
                  [(XCoreBranchLink GRRegs:$addr)]>;
 }
 
+def SYNCR_1r : _F1R<(outs), (ins GRRegs:$r),
+                 "syncr res[$r]",
+                 [(int_xcore_syncr GRRegs:$r)]>;
+
 def FREER_1r : _F1R<(outs), (ins GRRegs:$r),
                "freer res[$r]",
                [(int_xcore_freer GRRegs:$r)]>;
 
+let Uses=[R11] in
+def SETV_1r : _F1R<(outs), (ins GRRegs:$r),
+               "setv res[$r], r11",
+               [(int_xcore_setv GRRegs:$r, R11)]>;
+
+def EEU_1r : _F1R<(outs), (ins GRRegs:$r),
+               "eeu res[$r]",
+               [(int_xcore_eeu GRRegs:$r)]>;
+
 // Zero operand short
-// TODO waiteu, clre, ssync, freet, ldspc, stspc, ldssr, stssr, ldsed, stsed,
+// TODO ssync, freet, ldspc, stspc, ldssr, stssr, ldsed, stsed,
 // stet, geted, getet, getkep, getksp, setkep, getid, kret, dcall, dret,
 // dentsp, drestsp
 
+def CLRE_0R : _F0R<(outs), (ins), "clre", [(int_xcore_clre)]>;
+
 let Defs = [R11] in
 def GETID_0R : _F0R<(outs), (ins),
                  "get r11, id",
                  [(set R11, (int_xcore_getid))]>;
 
+let isBranch=1, isIndirectBranch=1, isTerminator=1, isBarrier = 1,
+    hasSideEffects = 1 in
+def WAITEU_0R : _F0R<(outs), (ins),
+                 "waiteu",
+                 [(brind (int_xcore_waitevent))]>;
+
 //===----------------------------------------------------------------------===//
 // Non-Instruction Patterns
 //===----------------------------------------------------------------------===//
diff --git a/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
index b6b6b84d9647..7986d1aca762 100644
--- a/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
+++ b/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
@@ -1897,6 +1897,39 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) {
         return BinaryOperator::CreateNot(And);
       }
 
+  // Canonicalize xor to the RHS.
+  if (match(Op0, m_Xor(m_Value(), m_Value())))
+    std::swap(Op0, Op1);
+
+  // A | ( A ^ B) -> A |  B
+  // A | (~A ^ B) -> A | ~B
+  if (match(Op1, m_Xor(m_Value(A), m_Value(B)))) {
+    if (Op0 == A || Op0 == B)
+      return BinaryOperator::CreateOr(A, B);
+
+    if (Op1->hasOneUse() && match(A, m_Not(m_Specific(Op0)))) {
+      Value *Not = Builder->CreateNot(B, B->getName()+".not");
+      return BinaryOperator::CreateOr(Not, Op0);
+    }
+    if (Op1->hasOneUse() && match(B, m_Not(m_Specific(Op0)))) {
+      Value *Not = Builder->CreateNot(A, A->getName()+".not");
+      return BinaryOperator::CreateOr(Not, Op0);
+    }
+  }
+
+  // A | ~(A | B) -> A | ~B
+  // A | ~(A ^ B) -> A | ~B
+  if (match(Op1, m_Not(m_Value(A))))
+    if (BinaryOperator *B = dyn_cast<BinaryOperator>(A))
+      if ((Op0 == B->getOperand(0) || Op0 == B->getOperand(1)) &&
+          Op1->hasOneUse() && (B->getOpcode() == Instruction::Or ||
+                               B->getOpcode() == Instruction::Xor)) {
+        Value *NotOp = Op0 == B->getOperand(0) ? B->getOperand(1) :
+                                                 B->getOperand(0);
+        Value *Not = Builder->CreateNot(NotOp, NotOp->getName()+".not");
+        return BinaryOperator::CreateOr(Not, Op0);
+      }
+
   if (ICmpInst *RHS = dyn_cast<ICmpInst>(I.getOperand(1)))
     if (ICmpInst *LHS = dyn_cast<ICmpInst>(I.getOperand(0)))
       if (Value *Res = FoldOrOfICmps(LHS, RHS))
diff --git a/lib/Transforms/InstCombine/InstCombineCalls.cpp b/lib/Transforms/InstCombine/InstCombineCalls.cpp
index 8449f7b7982c..0e464507a7e4 100644
--- a/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -953,10 +953,19 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) {
     if (Callee->isDeclaration() && !isConvertible) return false;
   }
 
-  if (FT->getNumParams() < NumActualArgs && !FT->isVarArg() &&
-      Callee->isDeclaration())
-    return false;   // Do not delete arguments unless we have a function body.
-
+  if (Callee->isDeclaration()) {
+    // Do not delete arguments unless we have a function body.
+    if (FT->getNumParams() < NumActualArgs && !FT->isVarArg())
+      return false;
+
+    // If the callee is just a declaration, don't change the varargsness of the
+    // call.  We don't want to introduce a varargs call where one doesn't
+    // already exist.
+    const PointerType *APTy = cast<PointerType>(CS.getCalledValue()->getType());
+    if (FT->isVarArg()!=cast<FunctionType>(APTy->getElementType())->isVarArg())
+      return false;
+  }
+      
   if (FT->getNumParams() < NumActualArgs && FT->isVarArg() &&
       !CallerPAL.isEmpty())
     // In this case we have more arguments than the new function type, but we
@@ -970,8 +979,9 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) {
         return false;
     }
 
+  
   // Okay, we decided that this is a safe thing to do: go ahead and start
-  // inserting cast instructions as necessary...
+  // inserting cast instructions as necessary.
   std::vector<Value*> Args;
   Args.reserve(NumActualArgs);
   SmallVector<AttributeWithIndex, 8> attrVec;
diff --git a/lib/Transforms/Scalar/LoopDeletion.cpp b/lib/Transforms/Scalar/LoopDeletion.cpp
index 6d1d344a9296..753a558cfe83 100644
--- a/lib/Transforms/Scalar/LoopDeletion.cpp
+++ b/lib/Transforms/Scalar/LoopDeletion.cpp
@@ -78,7 +78,6 @@ bool LoopDeletion::IsLoopDead(Loop* L,
                               SmallVector<BasicBlock*, 4>& exitingBlocks,
                               SmallVector<BasicBlock*, 4>& exitBlocks,
                               bool &Changed, BasicBlock *Preheader) {
-  BasicBlock* exitingBlock = exitingBlocks[0];
   BasicBlock* exitBlock = exitBlocks[0];
   
   // Make sure that all PHI entries coming from the loop are loop invariant.
@@ -88,11 +87,21 @@ bool LoopDeletion::IsLoopDead(Loop* L,
   // of the loop.
   BasicBlock::iterator BI = exitBlock->begin();
   while (PHINode* P = dyn_cast<PHINode>(BI)) {
-    Value* incoming = P->getIncomingValueForBlock(exitingBlock);
+    Value* incoming = P->getIncomingValueForBlock(exitingBlocks[0]);
+
+    // Make sure all exiting blocks produce the same incoming value for the exit
+    // block.  If there are different incoming values for different exiting
+    // blocks, then it is impossible to statically determine which value should
+    // be used.
+    for (unsigned i = 1; i < exitingBlocks.size(); ++i) {
+      if (incoming != P->getIncomingValueForBlock(exitingBlocks[i]))
+        return false;
+    }
+      
     if (Instruction* I = dyn_cast<Instruction>(incoming))
       if (!L->makeLoopInvariant(I, Changed, Preheader->getTerminator()))
         return false;
-      
+
     ++BI;
   }
   
@@ -147,10 +156,6 @@ bool LoopDeletion::runOnLoop(Loop* L, LPPassManager& LPM) {
   if (exitBlocks.size() != 1)
     return false;
   
-  // Loops with multiple exits are too complicated to handle correctly.
-  if (exitingBlocks.size() != 1)
-    return false;
-  
   // Finally, we have to check that the loop really is dead.
   bool Changed = false;
   if (!IsLoopDead(L, exitingBlocks, exitBlocks, Changed, preheader))
@@ -166,7 +171,6 @@ bool LoopDeletion::runOnLoop(Loop* L, LPPassManager& LPM) {
   // Now that we know the removal is safe, remove the loop by changing the
   // branch from the preheader to go to the single exit block.  
   BasicBlock* exitBlock = exitBlocks[0];
-  BasicBlock* exitingBlock = exitingBlocks[0];
   
   // Because we're deleting a large chunk of code at once, the sequence in which
   // we remove things is very important to avoid invalidation issues.  Don't
@@ -183,9 +187,12 @@ bool LoopDeletion::runOnLoop(Loop* L, LPPassManager& LPM) {
 
   // Rewrite phis in the exit block to get their inputs from
   // the preheader instead of the exiting block.
+  BasicBlock* exitingBlock = exitingBlocks[0];
   BasicBlock::iterator BI = exitBlock->begin();
   while (PHINode* P = dyn_cast<PHINode>(BI)) {
     P->replaceUsesOfWith(exitingBlock, preheader);
+    for (unsigned i = 1; i < exitingBlocks.size(); ++i)
+      P->removeIncomingValue(exitingBlocks[i]);
     ++BI;
   }
   
diff --git a/lib/Transforms/Scalar/LoopIdiomRecognize.cpp b/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
index d7fa149492bd..f8ce214750ac 100644
--- a/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
+++ b/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
@@ -31,6 +31,11 @@
 //   void foo(_Complex float *P)
 //     for (i) { __real__(*P) = 0;  __imag__(*P) = 0; }
 //
+// We should enhance this to handle negative strides through memory.
+// Alternatively (and perhaps better) we could rely on an earlier pass to force
+// forward iteration through memory, which is generally better for cache
+// behavior.  Negative strides *do* happen for memset/memcpy loops.
+//
 // This could recognize common matrix multiplies and dot product idioms and
 // replace them with calls to BLAS (if linked in??).
 //
@@ -272,10 +277,17 @@ bool LoopIdiomRecognize::processLoopStore(StoreInst *SI, const SCEV *BECount) {
   unsigned StoreSize = (unsigned)SizeInBits >> 3; 
   const SCEVConstant *Stride = dyn_cast<SCEVConstant>(StoreEv->getOperand(1));
   
-  // TODO: Could also handle negative stride here someday, that will require the
-  // validity check in mayLoopAccessLocation to be updated though.
-  if (Stride == 0 || StoreSize != Stride->getValue()->getValue())
+  if (Stride == 0 || StoreSize != Stride->getValue()->getValue()) {
+    // TODO: Could also handle negative stride here someday, that will require
+    // the validity check in mayLoopAccessLocation to be updated though.
+    // Enable this to print exact negative strides.
+    if (0 && Stride && StoreSize == -Stride->getValue()->getValue()) {
+      dbgs() << "NEGATIVE STRIDE: " << *SI << "\n";
+      dbgs() << "BB: " << *SI->getParent();
+    }
+    
     return false;
+  }
 
   // See if we can optimize just this store in isolation.
   if (processLoopStridedStore(StorePtr, StoreSize, SI->getAlignment(),
diff --git a/lib/Transforms/Scalar/SimplifyLibCalls.cpp b/lib/Transforms/Scalar/SimplifyLibCalls.cpp
index ec45b71dd368..9f136d4e3077 100644
--- a/lib/Transforms/Scalar/SimplifyLibCalls.cpp
+++ b/lib/Transforms/Scalar/SimplifyLibCalls.cpp
@@ -25,13 +25,14 @@
 #include "llvm/Support/IRBuilder.h"
 #include "llvm/Analysis/ValueTracking.h"
 #include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetLibraryInfo.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/StringMap.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/Config/config.h"
+#include "llvm/Config/config.h"            // FIXME: Shouldn't depend on host!
 using namespace llvm;
 
 STATISTIC(NumSimplified, "Number of library calls simplified");
@@ -1369,6 +1370,8 @@ namespace {
   /// This pass optimizes well known library functions from libc and libm.
   ///
   class SimplifyLibCalls : public FunctionPass {
+    TargetLibraryInfo *TLI;
+    
     StringMap<LibCallOptimization*> Optimizations;
     // String and Memory LibCall Optimizations
     StrCatOpt StrCat; StrNCatOpt StrNCat; StrChrOpt StrChr; StrRChrOpt StrRChr;
@@ -1385,7 +1388,7 @@ namespace {
     SPrintFOpt SPrintF; PrintFOpt PrintF;
     FWriteOpt FWrite; FPutsOpt FPuts; FPrintFOpt FPrintF;
     PutsOpt Puts;
-
+    
     bool Modified;  // This is only used by doInitialization.
   public:
     static char ID; // Pass identification
@@ -1402,14 +1405,20 @@ namespace {
     void setDoesNotAlias(Function &F, unsigned n);
     bool doInitialization(Module &M);
 
+    void inferPrototypeAttributes(Function &F);
     virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+      AU.addRequired<TargetLibraryInfo>();
     }
   };
-  char SimplifyLibCalls::ID = 0;
 } // end anonymous namespace.
 
-INITIALIZE_PASS(SimplifyLibCalls, "simplify-libcalls",
-                "Simplify well-known library calls", false, false)
+char SimplifyLibCalls::ID = 0;
+
+INITIALIZE_PASS_BEGIN(SimplifyLibCalls, "simplify-libcalls",
+                      "Simplify well-known library calls", false, false)
+INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo)
+INITIALIZE_PASS_END(SimplifyLibCalls, "simplify-libcalls",
+                    "Simplify well-known library calls", false, false)
 
 // Public interface to the Simplify LibCalls pass.
 FunctionPass *llvm::createSimplifyLibCallsPass() {
@@ -1441,9 +1450,9 @@ void SimplifyLibCalls::InitOptimizations() {
   Optimizations["strcspn"] = &StrCSpn;
   Optimizations["strstr"] = &StrStr;
   Optimizations["memcmp"] = &MemCmp;
-  Optimizations["memcpy"] = &MemCpy;
+  if (TLI->has(LibFunc::memcpy)) Optimizations["memcpy"] = &MemCpy;
   Optimizations["memmove"] = &MemMove;
-  Optimizations["memset"] = &MemSet;
+  if (TLI->has(LibFunc::memset)) Optimizations["memset"] = &MemSet;
 
   // _chk variants of String and Memory LibCall Optimizations.
   Optimizations["__strcpy_chk"] = &StrCpyChk;
@@ -1506,6 +1515,8 @@ void SimplifyLibCalls::InitOptimizations() {
 /// runOnFunction - Top level algorithm.
 ///
 bool SimplifyLibCalls::runOnFunction(Function &F) {
+  TLI = &getAnalysis<TargetLibraryInfo>();
+
   if (Optimizations.empty())
     InitOptimizations();
 
@@ -1597,688 +1608,654 @@ void SimplifyLibCalls::setDoesNotAlias(Function &F, unsigned n) {
   }
 }
 
+
+void SimplifyLibCalls::inferPrototypeAttributes(Function &F) {
+  const FunctionType *FTy = F.getFunctionType();
+  
+  StringRef Name = F.getName();
+  switch (Name[0]) {
+  case 's':
+    if (Name == "strlen") {
+      if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy())
+        return;
+      setOnlyReadsMemory(F);
+      setDoesNotThrow(F);
+      setDoesNotCapture(F, 1);
+    } else if (Name == "strchr" ||
+               Name == "strrchr") {
+      if (FTy->getNumParams() != 2 ||
+          !FTy->getParamType(0)->isPointerTy() ||
+          !FTy->getParamType(1)->isIntegerTy())
+        return;
+      setOnlyReadsMemory(F);
+      setDoesNotThrow(F);
+    } else if (Name == "strcpy" ||
+               Name == "stpcpy" ||
+               Name == "strcat" ||
+               Name == "strtol" ||
+               Name == "strtod" ||
+               Name == "strtof" ||
+               Name == "strtoul" ||
+               Name == "strtoll" ||
+               Name == "strtold" ||
+               Name == "strncat" ||
+               Name == "strncpy" ||
+               Name == "strtoull") {
+      if (FTy->getNumParams() < 2 ||
+          !FTy->getParamType(1)->isPointerTy())
+        return;
+      setDoesNotThrow(F);
+      setDoesNotCapture(F, 2);
+    } else if (Name == "strxfrm") {
+      if (FTy->getNumParams() != 3 ||
+          !FTy->getParamType(0)->isPointerTy() ||
+          !FTy->getParamType(1)->isPointerTy())
+        return;
+      setDoesNotThrow(F);
+      setDoesNotCapture(F, 1);
+      setDoesNotCapture(F, 2);
+    } else if (Name == "strcmp" ||
+               Name == "strspn" ||
+               Name == "strncmp" ||
+               Name == "strcspn" ||
+               Name == "strcoll" ||
+               Name == "strcasecmp" ||
+               Name == "strncasecmp") {
+      if (FTy->getNumParams() < 2 ||
+          !FTy->getParamType(0)->isPointerTy() ||
+          !FTy->getParamType(1)->isPointerTy())
+        return;
+      setOnlyReadsMemory(F);
+      setDoesNotThrow(F);
+      setDoesNotCapture(F, 1);
+      setDoesNotCapture(F, 2);
+    } else if (Name == "strstr" ||
+               Name == "strpbrk") {
+      if (FTy->getNumParams() != 2 || !FTy->getParamType(1)->isPointerTy())
+        return;
+      setOnlyReadsMemory(F);
+      setDoesNotThrow(F);
+      setDoesNotCapture(F, 2);
+    } else if (Name == "strtok" ||
+               Name == "strtok_r") {
+      if (FTy->getNumParams() < 2 || !FTy->getParamType(1)->isPointerTy())
+        return;
+      setDoesNotThrow(F);
+      setDoesNotCapture(F, 2);
+    } else if (Name == "scanf" ||
+               Name == "setbuf" ||
+               Name == "setvbuf") {
+      if (FTy->getNumParams() < 1 || !FTy->getParamType(0)->isPointerTy())
+        return;
+      setDoesNotThrow(F);
+      setDoesNotCapture(F, 1);
+    } else if (Name == "strdup" ||
+               Name == "strndup") {
+      if (FTy->getNumParams() < 1 || !FTy->getReturnType()->isPointerTy() ||
+          !FTy->getParamType(0)->isPointerTy())
+        return;
+      setDoesNotThrow(F);
+      setDoesNotAlias(F, 0);
+      setDoesNotCapture(F, 1);
+    } else if (Name == "stat" ||
+               Name == "sscanf" ||
+               Name == "sprintf" ||
+               Name == "statvfs") {
+      if (FTy->getNumParams() < 2 ||
+          !FTy->getParamType(0)->isPointerTy() ||
+          !FTy->getParamType(1)->isPointerTy())
+        return;
+      setDoesNotThrow(F);
+      setDoesNotCapture(F, 1);
+      setDoesNotCapture(F, 2);
+    } else if (Name == "snprintf") {
+      if (FTy->getNumParams() != 3 ||
+          !FTy->getParamType(0)->isPointerTy() ||
+          !FTy->getParamType(2)->isPointerTy())
+        return;
+      setDoesNotThrow(F);
+      setDoesNotCapture(F, 1);
+      setDoesNotCapture(F, 3);
+    } else if (Name == "setitimer") {
+      if (FTy->getNumParams() != 3 ||
+          !FTy->getParamType(1)->isPointerTy() ||
+          !FTy->getParamType(2)->isPointerTy())
+        return;
+      setDoesNotThrow(F);
+      setDoesNotCapture(F, 2);
+      setDoesNotCapture(F, 3);
+    } else if (Name == "system") {
+      if (FTy->getNumParams() != 1 ||
+          !FTy->getParamType(0)->isPointerTy())
+        return;
+      // May throw; "system" is a valid pthread cancellation point.
+      setDoesNotCapture(F, 1);
+    }
+    break;
+  case 'm':
+    if (Name == "malloc") {
+      if (FTy->getNumParams() != 1 ||
+          !FTy->getReturnType()->isPointerTy())
+        return;
+      setDoesNotThrow(F);
+      setDoesNotAlias(F, 0);
+    } else if (Name == "memcmp") {
+      if (FTy->getNumParams() != 3 ||
+          !FTy->getParamType(0)->isPointerTy() ||
+          !FTy->getParamType(1)->isPointerTy())
+        return;
+      setOnlyReadsMemory(F);
+      setDoesNotThrow(F);
+      setDoesNotCapture(F, 1);
+      setDoesNotCapture(F, 2);
+    } else if (Name == "memchr" ||
+               Name == "memrchr") {
+      if (FTy->getNumParams() != 3)
+        return;
+      setOnlyReadsMemory(F);
+      setDoesNotThrow(F);
+    } else if (Name == "modf" ||
+               Name == "modff" ||
+               Name == "modfl" ||
+               Name == "memcpy" ||
+               Name == "memccpy" ||
+               Name == "memmove") {
+      if (FTy->getNumParams() < 2 ||
+          !FTy->getParamType(1)->isPointerTy())
+        return;
+      setDoesNotThrow(F);
+      setDoesNotCapture(F, 2);
+    } else if (Name == "memalign") {
+      if (!FTy->getReturnType()->isPointerTy())
+        return;
+      setDoesNotAlias(F, 0);
+    } else if (Name == "mkdir" ||
+               Name == "mktime") {
+      if (FTy->getNumParams() == 0 ||
+          !FTy->getParamType(0)->isPointerTy())
+        return;
+      setDoesNotThrow(F);
+      setDoesNotCapture(F, 1);
+    }
+    break;
+  case 'r':
+    if (Name == "realloc") {
+      if (FTy->getNumParams() != 2 ||
+          !FTy->getParamType(0)->isPointerTy() ||
+          !FTy->getReturnType()->isPointerTy())
+        return;
+      setDoesNotThrow(F);
+      setDoesNotAlias(F, 0);
+      setDoesNotCapture(F, 1);
+    } else if (Name == "read") {
+      if (FTy->getNumParams() != 3 ||
+          !FTy->getParamType(1)->isPointerTy())
+        return;
+      // May throw; "read" is a valid pthread cancellation point.
+      setDoesNotCapture(F, 2);
+    } else if (Name == "rmdir" ||
+               Name == "rewind" ||
+               Name == "remove" ||
+               Name == "realpath") {
+      if (FTy->getNumParams() < 1 ||
+          !FTy->getParamType(0)->isPointerTy())
+        return;
+      setDoesNotThrow(F);
+      setDoesNotCapture(F, 1);
+    } else if (Name == "rename" ||
+               Name == "readlink") {
+      if (FTy->getNumParams() < 2 ||
+          !FTy->getParamType(0)->isPointerTy() ||
+          !FTy->getParamType(1)->isPointerTy())
+        return;
+      setDoesNotThrow(F);
+      setDoesNotCapture(F, 1);
+      setDoesNotCapture(F, 2);
+    }
+    break;
+  case 'w':
+    if (Name == "write") {
+      if (FTy->getNumParams() != 3 || !FTy->getParamType(1)->isPointerTy())
+        return;
+      // May throw; "write" is a valid pthread cancellation point.
+      setDoesNotCapture(F, 2);
+    }
+    break;
+  case 'b':
+    if (Name == "bcopy") {
+      if (FTy->getNumParams() != 3 ||
+          !FTy->getParamType(0)->isPointerTy() ||
+          !FTy->getParamType(1)->isPointerTy())
+        return;
+      setDoesNotThrow(F);
+      setDoesNotCapture(F, 1);
+      setDoesNotCapture(F, 2);
+    } else if (Name == "bcmp") {
+      if (FTy->getNumParams() != 3 ||
+          !FTy->getParamType(0)->isPointerTy() ||
+          !FTy->getParamType(1)->isPointerTy())
+        return;
+      setDoesNotThrow(F);
+      setOnlyReadsMemory(F);
+      setDoesNotCapture(F, 1);
+      setDoesNotCapture(F, 2);
+    } else if (Name == "bzero") {
+      if (FTy->getNumParams() != 2 || !FTy->getParamType(0)->isPointerTy())
+        return;
+      setDoesNotThrow(F);
+      setDoesNotCapture(F, 1);
+    }
+    break;
+  case 'c':
+    if (Name == "calloc") {
+      if (FTy->getNumParams() != 2 ||
+          !FTy->getReturnType()->isPointerTy())
+        return;
+      setDoesNotThrow(F);
+      setDoesNotAlias(F, 0);
+    } else if (Name == "chmod" ||
+               Name == "chown" ||
+               Name == "ctermid" ||
+               Name == "clearerr" ||
+               Name == "closedir") {
+      if (FTy->getNumParams() == 0 || !FTy->getParamType(0)->isPointerTy())
+        return;
+      setDoesNotThrow(F);
+      setDoesNotCapture(F, 1);
+    }
+    break;
+  case 'a':
+    if (Name == "atoi" ||
+        Name == "atol" ||
+        Name == "atof" ||
+        Name == "atoll") {
+      if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy())
+        return;
+      setDoesNotThrow(F);
+      setOnlyReadsMemory(F);
+      setDoesNotCapture(F, 1);
+    } else if (Name == "access") {
+      if (FTy->getNumParams() != 2 || !FTy->getParamType(0)->isPointerTy())
+        return;
+      setDoesNotThrow(F);
+      setDoesNotCapture(F, 1);
+    }
+    break;
+  case 'f':
+    if (Name == "fopen") {
+      if (FTy->getNumParams() != 2 ||
+          !FTy->getReturnType()->isPointerTy() ||
+          !FTy->getParamType(0)->isPointerTy() ||
+          !FTy->getParamType(1)->isPointerTy())
+        return;
+      setDoesNotThrow(F);
+      setDoesNotAlias(F, 0);
+      setDoesNotCapture(F, 1);
+      setDoesNotCapture(F, 2);
+    } else if (Name == "fdopen") {
+      if (FTy->getNumParams() != 2 ||
+          !FTy->getReturnType()->isPointerTy() ||
+          !FTy->getParamType(1)->isPointerTy())
+        return;
+      setDoesNotThrow(F);
+      setDoesNotAlias(F, 0);
+      setDoesNotCapture(F, 2);
+    } else if (Name == "feof" ||
+               Name == "free" ||
+               Name == "fseek" ||
+               Name == "ftell" ||
+               Name == "fgetc" ||
+               Name == "fseeko" ||
+               Name == "ftello" ||
+               Name == "fileno" ||
+               Name == "fflush" ||
+               Name == "fclose" ||
+               Name == "fsetpos" ||
+               Name == "flockfile" ||
+               Name == "funlockfile" ||
+               Name == "ftrylockfile") {
+      if (FTy->getNumParams() == 0 || !FTy->getParamType(0)->isPointerTy())
+        return;
+      setDoesNotThrow(F);
+      setDoesNotCapture(F, 1);
+    } else if (Name == "ferror") {
+      if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy())
+        return;
+      setDoesNotThrow(F);
+      setDoesNotCapture(F, 1);
+      setOnlyReadsMemory(F);
+    } else if (Name == "fputc" ||
+               Name == "fstat" ||
+               Name == "frexp" ||
+               Name == "frexpf" ||
+               Name == "frexpl" ||
+               Name == "fstatvfs") {
+      if (FTy->getNumParams() != 2 || !FTy->getParamType(1)->isPointerTy())
+        return;
+      setDoesNotThrow(F);
+      setDoesNotCapture(F, 2);
+    } else if (Name == "fgets") {
+      if (FTy->getNumParams() != 3 ||
+          !FTy->getParamType(0)->isPointerTy() ||
+          !FTy->getParamType(2)->isPointerTy())
+        return;
+      setDoesNotThrow(F);
+      setDoesNotCapture(F, 3);
+    } else if (Name == "fread" ||
+               Name == "fwrite") {
+      if (FTy->getNumParams() != 4 ||
+          !FTy->getParamType(0)->isPointerTy() ||
+          !FTy->getParamType(3)->isPointerTy())
+        return;
+      setDoesNotThrow(F);
+      setDoesNotCapture(F, 1);
+      setDoesNotCapture(F, 4);
+    } else if (Name == "fputs" ||
+               Name == "fscanf" ||
+               Name == "fprintf" ||
+               Name == "fgetpos") {
+      if (FTy->getNumParams() < 2 ||
+          !FTy->getParamType(0)->isPointerTy() ||
+          !FTy->getParamType(1)->isPointerTy())
+        return;
+      setDoesNotThrow(F);
+      setDoesNotCapture(F, 1);
+      setDoesNotCapture(F, 2);
+    }
+    break;
+  case 'g':
+    if (Name == "getc" ||
+        Name == "getlogin_r" ||
+        Name == "getc_unlocked") {
+      if (FTy->getNumParams() == 0 || !FTy->getParamType(0)->isPointerTy())
+        return;
+      setDoesNotThrow(F);
+      setDoesNotCapture(F, 1);
+    } else if (Name == "getenv") {
+      if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy())
+        return;
+      setDoesNotThrow(F);
+      setOnlyReadsMemory(F);
+      setDoesNotCapture(F, 1);
+    } else if (Name == "gets" ||
+               Name == "getchar") {
+      setDoesNotThrow(F);
+    } else if (Name == "getitimer") {
+      if (FTy->getNumParams() != 2 || !FTy->getParamType(1)->isPointerTy())
+        return;
+      setDoesNotThrow(F);
+      setDoesNotCapture(F, 2);
+    } else if (Name == "getpwnam") {
+      if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy())
+        return;
+      setDoesNotThrow(F);
+      setDoesNotCapture(F, 1);
+    }
+    break;
+  case 'u':
+    if (Name == "ungetc") {
+      if (FTy->getNumParams() != 2 || !FTy->getParamType(1)->isPointerTy())
+        return;
+      setDoesNotThrow(F);
+      setDoesNotCapture(F, 2);
+    } else if (Name == "uname" ||
+               Name == "unlink" ||
+               Name == "unsetenv") {
+      if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy())
+        return;
+      setDoesNotThrow(F);
+      setDoesNotCapture(F, 1);
+    } else if (Name == "utime" ||
+               Name == "utimes") {
+      if (FTy->getNumParams() != 2 ||
+          !FTy->getParamType(0)->isPointerTy() ||
+          !FTy->getParamType(1)->isPointerTy())
+        return;
+      setDoesNotThrow(F);
+      setDoesNotCapture(F, 1);
+      setDoesNotCapture(F, 2);
+    }
+    break;
+  case 'p':
+    if (Name == "putc") {
+      if (FTy->getNumParams() != 2 || !FTy->getParamType(1)->isPointerTy())
+        return;
+      setDoesNotThrow(F);
+      setDoesNotCapture(F, 2);
+    } else if (Name == "puts" ||
+               Name == "printf" ||
+               Name == "perror") {
+      if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy())
+        return;
+      setDoesNotThrow(F);
+      setDoesNotCapture(F, 1);
+    } else if (Name == "pread" ||
+               Name == "pwrite") {
+      if (FTy->getNumParams() != 4 || !FTy->getParamType(1)->isPointerTy())
+        return;
+      // May throw; these are valid pthread cancellation points.
+      setDoesNotCapture(F, 2);
+    } else if (Name == "putchar") {
+      setDoesNotThrow(F);
+    } else if (Name == "popen") {
+      if (FTy->getNumParams() != 2 ||
+          !FTy->getReturnType()->isPointerTy() ||
+          !FTy->getParamType(0)->isPointerTy() ||
+          !FTy->getParamType(1)->isPointerTy())
+        return;
+      setDoesNotThrow(F);
+      setDoesNotAlias(F, 0);
+      setDoesNotCapture(F, 1);
+      setDoesNotCapture(F, 2);
+    } else if (Name == "pclose") {
+      if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy())
+        return;
+      setDoesNotThrow(F);
+      setDoesNotCapture(F, 1);
+    }
+    break;
+  case 'v':
+    if (Name == "vscanf") {
+      if (FTy->getNumParams() != 2 || !FTy->getParamType(1)->isPointerTy())
+        return;
+      setDoesNotThrow(F);
+      setDoesNotCapture(F, 1);
+    } else if (Name == "vsscanf" ||
+               Name == "vfscanf") {
+      if (FTy->getNumParams() != 3 ||
+          !FTy->getParamType(1)->isPointerTy() ||
+          !FTy->getParamType(2)->isPointerTy())
+        return;
+      setDoesNotThrow(F);
+      setDoesNotCapture(F, 1);
+      setDoesNotCapture(F, 2);
+    } else if (Name == "valloc") {
+      if (!FTy->getReturnType()->isPointerTy())
+        return;
+      setDoesNotThrow(F);
+      setDoesNotAlias(F, 0);
+    } else if (Name == "vprintf") {
+      if (FTy->getNumParams() != 2 || !FTy->getParamType(0)->isPointerTy())
+        return;
+      setDoesNotThrow(F);
+      setDoesNotCapture(F, 1);
+    } else if (Name == "vfprintf" ||
+               Name == "vsprintf") {
+      if (FTy->getNumParams() != 3 ||
+          !FTy->getParamType(0)->isPointerTy() ||
+          !FTy->getParamType(1)->isPointerTy())
+        return;
+      setDoesNotThrow(F);
+      setDoesNotCapture(F, 1);
+      setDoesNotCapture(F, 2);
+    } else if (Name == "vsnprintf") {
+      if (FTy->getNumParams() != 4 ||
+          !FTy->getParamType(0)->isPointerTy() ||
+          !FTy->getParamType(2)->isPointerTy())
+        return;
+      setDoesNotThrow(F);
+      setDoesNotCapture(F, 1);
+      setDoesNotCapture(F, 3);
+    }
+    break;
+  case 'o':
+    if (Name == "open") {
+      if (FTy->getNumParams() < 2 || !FTy->getParamType(0)->isPointerTy())
+        return;
+      // May throw; "open" is a valid pthread cancellation point.
+      setDoesNotCapture(F, 1);
+    } else if (Name == "opendir") {
+      if (FTy->getNumParams() != 1 ||
+          !FTy->getReturnType()->isPointerTy() ||
+          !FTy->getParamType(0)->isPointerTy())
+        return;
+      setDoesNotThrow(F);
+      setDoesNotAlias(F, 0);
+      setDoesNotCapture(F, 1);
+    }
+    break;
+  case 't':
+    if (Name == "tmpfile") {
+      if (!FTy->getReturnType()->isPointerTy())
+        return;
+      setDoesNotThrow(F);
+      setDoesNotAlias(F, 0);
+    } else if (Name == "times") {
+      if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy())
+        return;
+      setDoesNotThrow(F);
+      setDoesNotCapture(F, 1);
+    }
+    break;
+  case 'h':
+    if (Name == "htonl" ||
+        Name == "htons") {
+      setDoesNotThrow(F);
+      setDoesNotAccessMemory(F);
+    }
+    break;
+  case 'n':
+    if (Name == "ntohl" ||
+        Name == "ntohs") {
+      setDoesNotThrow(F);
+      setDoesNotAccessMemory(F);
+    }
+    break;
+  case 'l':
+    if (Name == "lstat") {
+      if (FTy->getNumParams() != 2 ||
+          !FTy->getParamType(0)->isPointerTy() ||
+          !FTy->getParamType(1)->isPointerTy())
+        return;
+      setDoesNotThrow(F);
+      setDoesNotCapture(F, 1);
+      setDoesNotCapture(F, 2);
+    } else if (Name == "lchown") {
+      if (FTy->getNumParams() != 3 || !FTy->getParamType(0)->isPointerTy())
+        return;
+      setDoesNotThrow(F);
+      setDoesNotCapture(F, 1);
+    }
+    break;
+  case 'q':
+    if (Name == "qsort") {
+      if (FTy->getNumParams() != 4 || !FTy->getParamType(3)->isPointerTy())
+        return;
+      // May throw; places call through function pointer.
+      setDoesNotCapture(F, 4);
+    }
+    break;
+  case '_':
+    if (Name == "__strdup" ||
+        Name == "__strndup") {
+      if (FTy->getNumParams() < 1 ||
+          !FTy->getReturnType()->isPointerTy() ||
+          !FTy->getParamType(0)->isPointerTy())
+        return;
+      setDoesNotThrow(F);
+      setDoesNotAlias(F, 0);
+      setDoesNotCapture(F, 1);
+    } else if (Name == "__strtok_r") {
+      if (FTy->getNumParams() != 3 ||
+          !FTy->getParamType(1)->isPointerTy())
+        return;
+      setDoesNotThrow(F);
+      setDoesNotCapture(F, 2);
+    } else if (Name == "_IO_getc") {
+      if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy())
+        return;
+      setDoesNotThrow(F);
+      setDoesNotCapture(F, 1);
+    } else if (Name == "_IO_putc") {
+      if (FTy->getNumParams() != 2 || !FTy->getParamType(1)->isPointerTy())
+        return;
+      setDoesNotThrow(F);
+      setDoesNotCapture(F, 2);
+    }
+    break;
+  case 1:
+    if (Name == "\1__isoc99_scanf") {
+      if (FTy->getNumParams() < 1 ||
+          !FTy->getParamType(0)->isPointerTy())
+        return;
+      setDoesNotThrow(F);
+      setDoesNotCapture(F, 1);
+    } else if (Name == "\1stat64" ||
+               Name == "\1lstat64" ||
+               Name == "\1statvfs64" ||
+               Name == "\1__isoc99_sscanf") {
+      if (FTy->getNumParams() < 1 ||
+          !FTy->getParamType(0)->isPointerTy() ||
+          !FTy->getParamType(1)->isPointerTy())
+        return;
+      setDoesNotThrow(F);
+      setDoesNotCapture(F, 1);
+      setDoesNotCapture(F, 2);
+    } else if (Name == "\1fopen64") {
+      if (FTy->getNumParams() != 2 ||
+          !FTy->getReturnType()->isPointerTy() ||
+          !FTy->getParamType(0)->isPointerTy() ||
+          !FTy->getParamType(1)->isPointerTy())
+        return;
+      setDoesNotThrow(F);
+      setDoesNotAlias(F, 0);
+      setDoesNotCapture(F, 1);
+      setDoesNotCapture(F, 2);
+    } else if (Name == "\1fseeko64" ||
+               Name == "\1ftello64") {
+      if (FTy->getNumParams() == 0 || !FTy->getParamType(0)->isPointerTy())
+        return;
+      setDoesNotThrow(F);
+      setDoesNotCapture(F, 1);
+    } else if (Name == "\1tmpfile64") {
+      if (!FTy->getReturnType()->isPointerTy())
+        return;
+      setDoesNotThrow(F);
+      setDoesNotAlias(F, 0);
+    } else if (Name == "\1fstat64" ||
+               Name == "\1fstatvfs64") {
+      if (FTy->getNumParams() != 2 || !FTy->getParamType(1)->isPointerTy())
+        return;
+      setDoesNotThrow(F);
+      setDoesNotCapture(F, 2);
+    } else if (Name == "\1open64") {
+      if (FTy->getNumParams() < 2 || !FTy->getParamType(0)->isPointerTy())
+        return;
+      // May throw; "open" is a valid pthread cancellation point.
+      setDoesNotCapture(F, 1);
+    }
+    break;
+  }
+}
+
 /// doInitialization - Add attributes to well-known functions.
 ///
 bool SimplifyLibCalls::doInitialization(Module &M) {
   Modified = false;
   for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) {
     Function &F = *I;
-    if (!F.isDeclaration())
-      continue;
-
-    if (!F.hasName())
-      continue;
-
-    const FunctionType *FTy = F.getFunctionType();
-
-    StringRef Name = F.getName();
-    switch (Name[0]) {
-      case 's':
-        if (Name == "strlen") {
-          if (FTy->getNumParams() != 1 ||
-              !FTy->getParamType(0)->isPointerTy())
-            continue;
-          setOnlyReadsMemory(F);
-          setDoesNotThrow(F);
-          setDoesNotCapture(F, 1);
-        } else if (Name == "strchr" ||
-                   Name == "strrchr") {
-          if (FTy->getNumParams() != 2 ||
-              !FTy->getParamType(0)->isPointerTy() ||
-              !FTy->getParamType(1)->isIntegerTy())
-            continue;
-          setOnlyReadsMemory(F);
-          setDoesNotThrow(F);
-        } else if (Name == "strcpy" ||
-                   Name == "stpcpy" ||
-                   Name == "strcat" ||
-                   Name == "strtol" ||
-                   Name == "strtod" ||
-                   Name == "strtof" ||
-                   Name == "strtoul" ||
-                   Name == "strtoll" ||
-                   Name == "strtold" ||
-                   Name == "strncat" ||
-                   Name == "strncpy" ||
-                   Name == "strtoull") {
-          if (FTy->getNumParams() < 2 ||
-              !FTy->getParamType(1)->isPointerTy())
-            continue;
-          setDoesNotThrow(F);
-          setDoesNotCapture(F, 2);
-        } else if (Name == "strxfrm") {
-          if (FTy->getNumParams() != 3 ||
-              !FTy->getParamType(0)->isPointerTy() ||
-              !FTy->getParamType(1)->isPointerTy())
-            continue;
-          setDoesNotThrow(F);
-          setDoesNotCapture(F, 1);
-          setDoesNotCapture(F, 2);
-        } else if (Name == "strcmp" ||
-                   Name == "strspn" ||
-                   Name == "strncmp" ||
-                   Name == "strcspn" ||
-                   Name == "strcoll" ||
-                   Name == "strcasecmp" ||
-                   Name == "strncasecmp") {
-          if (FTy->getNumParams() < 2 ||
-              !FTy->getParamType(0)->isPointerTy() ||
-              !FTy->getParamType(1)->isPointerTy())
-            continue;
-          setOnlyReadsMemory(F);
-          setDoesNotThrow(F);
-          setDoesNotCapture(F, 1);
-          setDoesNotCapture(F, 2);
-        } else if (Name == "strstr" ||
-                   Name == "strpbrk") {
-          if (FTy->getNumParams() != 2 ||
-              !FTy->getParamType(1)->isPointerTy())
-            continue;
-          setOnlyReadsMemory(F);
-          setDoesNotThrow(F);
-          setDoesNotCapture(F, 2);
-        } else if (Name == "strtok" ||
-                   Name == "strtok_r") {
-          if (FTy->getNumParams() < 2 ||
-              !FTy->getParamType(1)->isPointerTy())
-            continue;
-          setDoesNotThrow(F);
-          setDoesNotCapture(F, 2);
-        } else if (Name == "scanf" ||
-                   Name == "setbuf" ||
-                   Name == "setvbuf") {
-          if (FTy->getNumParams() < 1 ||
-              !FTy->getParamType(0)->isPointerTy())
-            continue;
-          setDoesNotThrow(F);
-          setDoesNotCapture(F, 1);
-        } else if (Name == "strdup" ||
-                   Name == "strndup") {
-          if (FTy->getNumParams() < 1 ||
-              !FTy->getReturnType()->isPointerTy() ||
-              !FTy->getParamType(0)->isPointerTy())
-            continue;
-          setDoesNotThrow(F);
-          setDoesNotAlias(F, 0);
-          setDoesNotCapture(F, 1);
-        } else if (Name == "stat" ||
-                   Name == "sscanf" ||
-                   Name == "sprintf" ||
-                   Name == "statvfs") {
-          if (FTy->getNumParams() < 2 ||
-              !FTy->getParamType(0)->isPointerTy() ||
-              !FTy->getParamType(1)->isPointerTy())
-            continue;
-          setDoesNotThrow(F);
-          setDoesNotCapture(F, 1);
-          setDoesNotCapture(F, 2);
-        } else if (Name == "snprintf") {
-          if (FTy->getNumParams() != 3 ||
-              !FTy->getParamType(0)->isPointerTy() ||
-              !FTy->getParamType(2)->isPointerTy())
-            continue;
-          setDoesNotThrow(F);
-          setDoesNotCapture(F, 1);
-          setDoesNotCapture(F, 3);
-        } else if (Name == "setitimer") {
-          if (FTy->getNumParams() != 3 ||
-              !FTy->getParamType(1)->isPointerTy() ||
-              !FTy->getParamType(2)->isPointerTy())
-            continue;
-          setDoesNotThrow(F);
-          setDoesNotCapture(F, 2);
-          setDoesNotCapture(F, 3);
-        } else if (Name == "system") {
-          if (FTy->getNumParams() != 1 ||
-              !FTy->getParamType(0)->isPointerTy())
-            continue;
-          // May throw; "system" is a valid pthread cancellation point.
-          setDoesNotCapture(F, 1);
-        }
-        break;
-      case 'm':
-        if (Name == "malloc") {
-          if (FTy->getNumParams() != 1 ||
-              !FTy->getReturnType()->isPointerTy())
-            continue;
-          setDoesNotThrow(F);
-          setDoesNotAlias(F, 0);
-        } else if (Name == "memcmp") {
-          if (FTy->getNumParams() != 3 ||
-              !FTy->getParamType(0)->isPointerTy() ||
-              !FTy->getParamType(1)->isPointerTy())
-            continue;
-          setOnlyReadsMemory(F);
-          setDoesNotThrow(F);
-          setDoesNotCapture(F, 1);
-          setDoesNotCapture(F, 2);
-        } else if (Name == "memchr" ||
-                   Name == "memrchr") {
-          if (FTy->getNumParams() != 3)
-            continue;
-          setOnlyReadsMemory(F);
-          setDoesNotThrow(F);
-        } else if (Name == "modf" ||
-                   Name == "modff" ||
-                   Name == "modfl" ||
-                   Name == "memcpy" ||
-                   Name == "memccpy" ||
-                   Name == "memmove") {
-          if (FTy->getNumParams() < 2 ||
-              !FTy->getParamType(1)->isPointerTy())
-            continue;
-          setDoesNotThrow(F);
-          setDoesNotCapture(F, 2);
-        } else if (Name == "memalign") {
-          if (!FTy->getReturnType()->isPointerTy())
-            continue;
-          setDoesNotAlias(F, 0);
-        } else if (Name == "mkdir" ||
-                   Name == "mktime") {
-          if (FTy->getNumParams() == 0 ||
-              !FTy->getParamType(0)->isPointerTy())
-            continue;
-          setDoesNotThrow(F);
-          setDoesNotCapture(F, 1);
-        }
-        break;
-      case 'r':
-        if (Name == "realloc") {
-          if (FTy->getNumParams() != 2 ||
-              !FTy->getParamType(0)->isPointerTy() ||
-              !FTy->getReturnType()->isPointerTy())
-            continue;
-          setDoesNotThrow(F);
-          setDoesNotAlias(F, 0);
-          setDoesNotCapture(F, 1);
-        } else if (Name == "read") {
-          if (FTy->getNumParams() != 3 ||
-              !FTy->getParamType(1)->isPointerTy())
-            continue;
-          // May throw; "read" is a valid pthread cancellation point.
-          setDoesNotCapture(F, 2);
-        } else if (Name == "rmdir" ||
-                   Name == "rewind" ||
-                   Name == "remove" ||
-                   Name == "realpath") {
-          if (FTy->getNumParams() < 1 ||
-              !FTy->getParamType(0)->isPointerTy())
-            continue;
-          setDoesNotThrow(F);
-          setDoesNotCapture(F, 1);
-        } else if (Name == "rename" ||
-                   Name == "readlink") {
-          if (FTy->getNumParams() < 2 ||
-              !FTy->getParamType(0)->isPointerTy() ||
-              !FTy->getParamType(1)->isPointerTy())
-            continue;
-          setDoesNotThrow(F);
-          setDoesNotCapture(F, 1);
-          setDoesNotCapture(F, 2);
-        }
-        break;
-      case 'w':
-        if (Name == "write") {
-          if (FTy->getNumParams() != 3 ||
-              !FTy->getParamType(1)->isPointerTy())
-            continue;
-          // May throw; "write" is a valid pthread cancellation point.
-          setDoesNotCapture(F, 2);
-        }
-        break;
-      case 'b':
-        if (Name == "bcopy") {
-          if (FTy->getNumParams() != 3 ||
-              !FTy->getParamType(0)->isPointerTy() ||
-              !FTy->getParamType(1)->isPointerTy())
-            continue;
-          setDoesNotThrow(F);
-          setDoesNotCapture(F, 1);
-          setDoesNotCapture(F, 2);
-        } else if (Name == "bcmp") {
-          if (FTy->getNumParams() != 3 ||
-              !FTy->getParamType(0)->isPointerTy() ||
-              !FTy->getParamType(1)->isPointerTy())
-            continue;
-          setDoesNotThrow(F);
-          setOnlyReadsMemory(F);
-          setDoesNotCapture(F, 1);
-          setDoesNotCapture(F, 2);
-        } else if (Name == "bzero") {
-          if (FTy->getNumParams() != 2 ||
-              !FTy->getParamType(0)->isPointerTy())
-            continue;
-          setDoesNotThrow(F);
-          setDoesNotCapture(F, 1);
-        }
-        break;
-      case 'c':
-        if (Name == "calloc") {
-          if (FTy->getNumParams() != 2 ||
-              !FTy->getReturnType()->isPointerTy())
-            continue;
-          setDoesNotThrow(F);
-          setDoesNotAlias(F, 0);
-        } else if (Name == "chmod" ||
-                   Name == "chown" ||
-                   Name == "ctermid" ||
-                   Name == "clearerr" ||
-                   Name == "closedir") {
-          if (FTy->getNumParams() == 0 ||
-              !FTy->getParamType(0)->isPointerTy())
-            continue;
-          setDoesNotThrow(F);
-          setDoesNotCapture(F, 1);
-        }
-        break;
-      case 'a':
-        if (Name == "atoi" ||
-            Name == "atol" ||
-            Name == "atof" ||
-            Name == "atoll") {
-          if (FTy->getNumParams() != 1 ||
-              !FTy->getParamType(0)->isPointerTy())
-            continue;
-          setDoesNotThrow(F);
-          setOnlyReadsMemory(F);
-          setDoesNotCapture(F, 1);
-        } else if (Name == "access") {
-          if (FTy->getNumParams() != 2 ||
-              !FTy->getParamType(0)->isPointerTy())
-            continue;
-          setDoesNotThrow(F);
-          setDoesNotCapture(F, 1);
-        }
-        break;
-      case 'f':
-        if (Name == "fopen") {
-          if (FTy->getNumParams() != 2 ||
-              !FTy->getReturnType()->isPointerTy() ||
-              !FTy->getParamType(0)->isPointerTy() ||
-              !FTy->getParamType(1)->isPointerTy())
-            continue;
-          setDoesNotThrow(F);
-          setDoesNotAlias(F, 0);
-          setDoesNotCapture(F, 1);
-          setDoesNotCapture(F, 2);
-        } else if (Name == "fdopen") {
-          if (FTy->getNumParams() != 2 ||
-              !FTy->getReturnType()->isPointerTy() ||
-              !FTy->getParamType(1)->isPointerTy())
-            continue;
-          setDoesNotThrow(F);
-          setDoesNotAlias(F, 0);
-          setDoesNotCapture(F, 2);
-        } else if (Name == "feof" ||
-                   Name == "free" ||
-                   Name == "fseek" ||
-                   Name == "ftell" ||
-                   Name == "fgetc" ||
-                   Name == "fseeko" ||
-                   Name == "ftello" ||
-                   Name == "fileno" ||
-                   Name == "fflush" ||
-                   Name == "fclose" ||
-                   Name == "fsetpos" ||
-                   Name == "flockfile" ||
-                   Name == "funlockfile" ||
-                   Name == "ftrylockfile") {
-          if (FTy->getNumParams() == 0 ||
-              !FTy->getParamType(0)->isPointerTy())
-            continue;
-          setDoesNotThrow(F);
-          setDoesNotCapture(F, 1);
-        } else if (Name == "ferror") {
-          if (FTy->getNumParams() != 1 ||
-              !FTy->getParamType(0)->isPointerTy())
-            continue;
-          setDoesNotThrow(F);
-          setDoesNotCapture(F, 1);
-          setOnlyReadsMemory(F);
-        } else if (Name == "fputc" ||
-                   Name == "fstat" ||
-                   Name == "frexp" ||
-                   Name == "frexpf" ||
-                   Name == "frexpl" ||
-                   Name == "fstatvfs") {
-          if (FTy->getNumParams() != 2 ||
-              !FTy->getParamType(1)->isPointerTy())
-            continue;
-          setDoesNotThrow(F);
-          setDoesNotCapture(F, 2);
-        } else if (Name == "fgets") {
-          if (FTy->getNumParams() != 3 ||
-              !FTy->getParamType(0)->isPointerTy() ||
-              !FTy->getParamType(2)->isPointerTy())
-            continue;
-          setDoesNotThrow(F);
-          setDoesNotCapture(F, 3);
-        } else if (Name == "fread" ||
-                   Name == "fwrite") {
-          if (FTy->getNumParams() != 4 ||
-              !FTy->getParamType(0)->isPointerTy() ||
-              !FTy->getParamType(3)->isPointerTy())
-            continue;
-          setDoesNotThrow(F);
-          setDoesNotCapture(F, 1);
-          setDoesNotCapture(F, 4);
-        } else if (Name == "fputs" ||
-                   Name == "fscanf" ||
-                   Name == "fprintf" ||
-                   Name == "fgetpos") {
-          if (FTy->getNumParams() < 2 ||
-              !FTy->getParamType(0)->isPointerTy() ||
-              !FTy->getParamType(1)->isPointerTy())
-            continue;
-          setDoesNotThrow(F);
-          setDoesNotCapture(F, 1);
-          setDoesNotCapture(F, 2);
-        }
-        break;
-      case 'g':
-        if (Name == "getc" ||
-            Name == "getlogin_r" ||
-            Name == "getc_unlocked") {
-          if (FTy->getNumParams() == 0 ||
-              !FTy->getParamType(0)->isPointerTy())
-            continue;
-          setDoesNotThrow(F);
-          setDoesNotCapture(F, 1);
-        } else if (Name == "getenv") {
-          if (FTy->getNumParams() != 1 ||
-              !FTy->getParamType(0)->isPointerTy())
-            continue;
-          setDoesNotThrow(F);
-          setOnlyReadsMemory(F);
-          setDoesNotCapture(F, 1);
-        } else if (Name == "gets" ||
-                   Name == "getchar") {
-          setDoesNotThrow(F);
-        } else if (Name == "getitimer") {
-          if (FTy->getNumParams() != 2 ||
-              !FTy->getParamType(1)->isPointerTy())
-            continue;
-          setDoesNotThrow(F);
-          setDoesNotCapture(F, 2);
-        } else if (Name == "getpwnam") {
-          if (FTy->getNumParams() != 1 ||
-              !FTy->getParamType(0)->isPointerTy())
-            continue;
-          setDoesNotThrow(F);
-          setDoesNotCapture(F, 1);
-        }
-        break;
-      case 'u':
-        if (Name == "ungetc") {
-          if (FTy->getNumParams() != 2 ||
-              !FTy->getParamType(1)->isPointerTy())
-            continue;
-          setDoesNotThrow(F);
-          setDoesNotCapture(F, 2);
-        } else if (Name == "uname" ||
-                   Name == "unlink" ||
-                   Name == "unsetenv") {
-          if (FTy->getNumParams() != 1 ||
-              !FTy->getParamType(0)->isPointerTy())
-            continue;
-          setDoesNotThrow(F);
-          setDoesNotCapture(F, 1);
-        } else if (Name == "utime" ||
-                   Name == "utimes") {
-          if (FTy->getNumParams() != 2 ||
-              !FTy->getParamType(0)->isPointerTy() ||
-              !FTy->getParamType(1)->isPointerTy())
-            continue;
-          setDoesNotThrow(F);
-          setDoesNotCapture(F, 1);
-          setDoesNotCapture(F, 2);
-        }
-        break;
-      case 'p':
-        if (Name == "putc") {
-          if (FTy->getNumParams() != 2 ||
-              !FTy->getParamType(1)->isPointerTy())
-            continue;
-          setDoesNotThrow(F);
-          setDoesNotCapture(F, 2);
-        } else if (Name == "puts" ||
-                   Name == "printf" ||
-                   Name == "perror") {
-          if (FTy->getNumParams() != 1 ||
-              !FTy->getParamType(0)->isPointerTy())
-            continue;
-          setDoesNotThrow(F);
-          setDoesNotCapture(F, 1);
-        } else if (Name == "pread" ||
-                   Name == "pwrite") {
-          if (FTy->getNumParams() != 4 ||
-              !FTy->getParamType(1)->isPointerTy())
-            continue;
-          // May throw; these are valid pthread cancellation points.
-          setDoesNotCapture(F, 2);
-        } else if (Name == "putchar") {
-          setDoesNotThrow(F);
-        } else if (Name == "popen") {
-          if (FTy->getNumParams() != 2 ||
-              !FTy->getReturnType()->isPointerTy() ||
-              !FTy->getParamType(0)->isPointerTy() ||
-              !FTy->getParamType(1)->isPointerTy())
-            continue;
-          setDoesNotThrow(F);
-          setDoesNotAlias(F, 0);
-          setDoesNotCapture(F, 1);
-          setDoesNotCapture(F, 2);
-        } else if (Name == "pclose") {
-          if (FTy->getNumParams() != 1 ||
-              !FTy->getParamType(0)->isPointerTy())
-            continue;
-          setDoesNotThrow(F);
-          setDoesNotCapture(F, 1);
-        }
-        break;
-      case 'v':
-        if (Name == "vscanf") {
-          if (FTy->getNumParams() != 2 ||
-              !FTy->getParamType(1)->isPointerTy())
-            continue;
-          setDoesNotThrow(F);
-          setDoesNotCapture(F, 1);
-        } else if (Name == "vsscanf" ||
-                   Name == "vfscanf") {
-          if (FTy->getNumParams() != 3 ||
-              !FTy->getParamType(1)->isPointerTy() ||
-              !FTy->getParamType(2)->isPointerTy())
-            continue;
-          setDoesNotThrow(F);
-          setDoesNotCapture(F, 1);
-          setDoesNotCapture(F, 2);
-        } else if (Name == "valloc") {
-          if (!FTy->getReturnType()->isPointerTy())
-            continue;
-          setDoesNotThrow(F);
-          setDoesNotAlias(F, 0);
-        } else if (Name == "vprintf") {
-          if (FTy->getNumParams() != 2 ||
-              !FTy->getParamType(0)->isPointerTy())
-            continue;
-          setDoesNotThrow(F);
-          setDoesNotCapture(F, 1);
-        } else if (Name == "vfprintf" ||
-                   Name == "vsprintf") {
-          if (FTy->getNumParams() != 3 ||
-              !FTy->getParamType(0)->isPointerTy() ||
-              !FTy->getParamType(1)->isPointerTy())
-            continue;
-          setDoesNotThrow(F);
-          setDoesNotCapture(F, 1);
-          setDoesNotCapture(F, 2);
-        } else if (Name == "vsnprintf") {
-          if (FTy->getNumParams() != 4 ||
-              !FTy->getParamType(0)->isPointerTy() ||
-              !FTy->getParamType(2)->isPointerTy())
-            continue;
-          setDoesNotThrow(F);
-          setDoesNotCapture(F, 1);
-          setDoesNotCapture(F, 3);
-        }
-        break;
-      case 'o':
-        if (Name == "open") {
-          if (FTy->getNumParams() < 2 ||
-              !FTy->getParamType(0)->isPointerTy())
-            continue;
-          // May throw; "open" is a valid pthread cancellation point.
-          setDoesNotCapture(F, 1);
-        } else if (Name == "opendir") {
-          if (FTy->getNumParams() != 1 ||
-              !FTy->getReturnType()->isPointerTy() ||
-              !FTy->getParamType(0)->isPointerTy())
-            continue;
-          setDoesNotThrow(F);
-          setDoesNotAlias(F, 0);
-          setDoesNotCapture(F, 1);
-        }
-        break;
-      case 't':
-        if (Name == "tmpfile") {
-          if (!FTy->getReturnType()->isPointerTy())
-            continue;
-          setDoesNotThrow(F);
-          setDoesNotAlias(F, 0);
-        } else if (Name == "times") {
-          if (FTy->getNumParams() != 1 ||
-              !FTy->getParamType(0)->isPointerTy())
-            continue;
-          setDoesNotThrow(F);
-          setDoesNotCapture(F, 1);
-        }
-        break;
-      case 'h':
-        if (Name == "htonl" ||
-            Name == "htons") {
-          setDoesNotThrow(F);
-          setDoesNotAccessMemory(F);
-        }
-        break;
-      case 'n':
-        if (Name == "ntohl" ||
-            Name == "ntohs") {
-          setDoesNotThrow(F);
-          setDoesNotAccessMemory(F);
-        }
-        break;
-      case 'l':
-        if (Name == "lstat") {
-          if (FTy->getNumParams() != 2 ||
-              !FTy->getParamType(0)->isPointerTy() ||
-              !FTy->getParamType(1)->isPointerTy())
-            continue;
-          setDoesNotThrow(F);
-          setDoesNotCapture(F, 1);
-          setDoesNotCapture(F, 2);
-        } else if (Name == "lchown") {
-          if (FTy->getNumParams() != 3 ||
-              !FTy->getParamType(0)->isPointerTy())
-            continue;
-          setDoesNotThrow(F);
-          setDoesNotCapture(F, 1);
-        }
-        break;
-      case 'q':
-        if (Name == "qsort") {
-          if (FTy->getNumParams() != 4 ||
-              !FTy->getParamType(3)->isPointerTy())
-            continue;
-          // May throw; places call through function pointer.
-          setDoesNotCapture(F, 4);
-        }
-        break;
-      case '_':
-        if (Name == "__strdup" ||
-            Name == "__strndup") {
-          if (FTy->getNumParams() < 1 ||
-              !FTy->getReturnType()->isPointerTy() ||
-              !FTy->getParamType(0)->isPointerTy())
-            continue;
-          setDoesNotThrow(F);
-          setDoesNotAlias(F, 0);
-          setDoesNotCapture(F, 1);
-        } else if (Name == "__strtok_r") {
-          if (FTy->getNumParams() != 3 ||
-              !FTy->getParamType(1)->isPointerTy())
-            continue;
-          setDoesNotThrow(F);
-          setDoesNotCapture(F, 2);
-        } else if (Name == "_IO_getc") {
-          if (FTy->getNumParams() != 1 ||
-              !FTy->getParamType(0)->isPointerTy())
-            continue;
-          setDoesNotThrow(F);
-          setDoesNotCapture(F, 1);
-        } else if (Name == "_IO_putc") {
-          if (FTy->getNumParams() != 2 ||
-              !FTy->getParamType(1)->isPointerTy())
-            continue;
-          setDoesNotThrow(F);
-          setDoesNotCapture(F, 2);
-        }
-        break;
-      case 1:
-        if (Name == "\1__isoc99_scanf") {
-          if (FTy->getNumParams() < 1 ||
-              !FTy->getParamType(0)->isPointerTy())
-            continue;
-          setDoesNotThrow(F);
-          setDoesNotCapture(F, 1);
-        } else if (Name == "\1stat64" ||
-                   Name == "\1lstat64" ||
-                   Name == "\1statvfs64" ||
-                   Name == "\1__isoc99_sscanf") {
-          if (FTy->getNumParams() < 1 ||
-              !FTy->getParamType(0)->isPointerTy() ||
-              !FTy->getParamType(1)->isPointerTy())
-            continue;
-          setDoesNotThrow(F);
-          setDoesNotCapture(F, 1);
-          setDoesNotCapture(F, 2);
-        } else if (Name == "\1fopen64") {
-          if (FTy->getNumParams() != 2 ||
-              !FTy->getReturnType()->isPointerTy() ||
-              !FTy->getParamType(0)->isPointerTy() ||
-              !FTy->getParamType(1)->isPointerTy())
-            continue;
-          setDoesNotThrow(F);
-          setDoesNotAlias(F, 0);
-          setDoesNotCapture(F, 1);
-          setDoesNotCapture(F, 2);
-        } else if (Name == "\1fseeko64" ||
-                   Name == "\1ftello64") {
-          if (FTy->getNumParams() == 0 ||
-              !FTy->getParamType(0)->isPointerTy())
-            continue;
-          setDoesNotThrow(F);
-          setDoesNotCapture(F, 1);
-        } else if (Name == "\1tmpfile64") {
-          if (!FTy->getReturnType()->isPointerTy())
-            continue;
-          setDoesNotThrow(F);
-          setDoesNotAlias(F, 0);
-        } else if (Name == "\1fstat64" ||
-                   Name == "\1fstatvfs64") {
-          if (FTy->getNumParams() != 2 ||
-              !FTy->getParamType(1)->isPointerTy())
-            continue;
-          setDoesNotThrow(F);
-          setDoesNotCapture(F, 2);
-        } else if (Name == "\1open64") {
-          if (FTy->getNumParams() < 2 ||
-              !FTy->getParamType(0)->isPointerTy())
-            continue;
-          // May throw; "open" is a valid pthread cancellation point.
-          setDoesNotCapture(F, 1);
-        }
-        break;
-    }
+    if (F.isDeclaration() && F.hasName())
+      inferPrototypeAttributes(F);
   }
   return Modified;
 }
diff --git a/lib/Transforms/Utils/Local.cpp b/lib/Transforms/Utils/Local.cpp
index 063c76e9522c..3f789fa86589 100644
--- a/lib/Transforms/Utils/Local.cpp
+++ b/lib/Transforms/Utils/Local.cpp
@@ -262,12 +262,13 @@ bool llvm::RecursivelyDeleteTriviallyDeadInstructions(Value *V) {
 
 /// areAllUsesEqual - Check whether the uses of a value are all the same.
 /// This is similar to Instruction::hasOneUse() except this will also return
-/// true when there are multiple uses that all refer to the same value.
+/// true when there are no uses or multiple uses that all refer to the same
+/// value.
 static bool areAllUsesEqual(Instruction *I) {
   Value::use_iterator UI = I->use_begin();
   Value::use_iterator UE = I->use_end();
   if (UI == UE)
-    return false;
+    return true;
 
   User *TheUse = *UI;
   for (++UI; UI != UE; ++UI) {
@@ -281,31 +282,24 @@ static bool areAllUsesEqual(Instruction *I) {
 /// dead PHI node, due to being a def-use chain of single-use nodes that
 /// either forms a cycle or is terminated by a trivially dead instruction,
 /// delete it.  If that makes any of its operands trivially dead, delete them
-/// too, recursively.  Return true if the PHI node is actually deleted.
+/// too, recursively.  Return true if a change was made.
 bool llvm::RecursivelyDeleteDeadPHINode(PHINode *PN) {
-  // We can remove a PHI if it is on a cycle in the def-use graph
-  // where each node in the cycle has degree one, i.e. only one use,
-  // and is an instruction with no side effects.
-  if (!areAllUsesEqual(PN))
-    return false;
+  SmallPtrSet<Instruction*, 4> Visited;
+  for (Instruction *I = PN; areAllUsesEqual(I) && !I->mayHaveSideEffects();
+       I = cast<Instruction>(*I->use_begin())) {
+    if (I->use_empty())
+      return RecursivelyDeleteTriviallyDeadInstructions(I);
 
-  bool Changed = false;
-  SmallPtrSet<PHINode *, 4> PHIs;
-  PHIs.insert(PN);
-  for (Instruction *J = cast<Instruction>(*PN->use_begin());
-       areAllUsesEqual(J) && !J->mayHaveSideEffects();
-       J = cast<Instruction>(*J->use_begin()))
-    // If we find a PHI more than once, we're on a cycle that
+    // If we find an instruction more than once, we're on a cycle that
     // won't prove fruitful.
-    if (PHINode *JP = dyn_cast<PHINode>(J))
-      if (!PHIs.insert(JP)) {
-        // Break the cycle and delete the PHI and its operands.
-        JP->replaceAllUsesWith(UndefValue::get(JP->getType()));
-        (void)RecursivelyDeleteTriviallyDeadInstructions(JP);
-        Changed = true;
-        break;
-      }
-  return Changed;
+    if (!Visited.insert(I)) {
+      // Break the cycle and delete the instruction and its operands.
+      I->replaceAllUsesWith(UndefValue::get(I->getType()));
+      (void)RecursivelyDeleteTriviallyDeadInstructions(I);
+      return true;
+    }
+  }
+  return false;
 }
 
 /// SimplifyInstructionsInBlock - Scan the specified basic block and try to
diff --git a/lib/Transforms/Utils/PromoteMemoryToRegister.cpp b/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
index e6a4373c495b..778885723e66 100644
--- a/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
+++ b/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
@@ -35,6 +35,7 @@
 #include "llvm/Metadata.h"
 #include "llvm/Analysis/AliasSetTracker.h"
 #include "llvm/Analysis/DebugInfo.h"
+#include "llvm/Analysis/DIBuilder.h"
 #include "llvm/Analysis/Dominators.h"
 #include "llvm/Analysis/InstructionSimplify.h"
 #include "llvm/ADT/DenseMap.h"
@@ -190,7 +191,7 @@ namespace {
     ///
     std::vector<AllocaInst*> Allocas;
     DominatorTree &DT;
-    DIFactory *DIF;
+    DIBuilder *DIB;
 
     /// AST - An AliasSetTracker object to update.  If null, don't update it.
     ///
@@ -235,9 +236,9 @@ namespace {
   public:
     PromoteMem2Reg(const std::vector<AllocaInst*> &A, DominatorTree &dt,
                    AliasSetTracker *ast)
-      : Allocas(A), DT(dt), DIF(0), AST(ast) {}
+      : Allocas(A), DT(dt), DIB(0), AST(ast) {}
     ~PromoteMem2Reg() {
-      delete DIF;
+      delete DIB;
     }
 
     void run();
@@ -951,9 +952,9 @@ void PromoteMem2Reg::ConvertDebugDeclareToDebugValue(DbgDeclareInst *DDI,
   if (!DIVar.Verify())
     return;
 
-  if (!DIF)
-    DIF = new DIFactory(*SI->getParent()->getParent()->getParent());
-  Instruction *DbgVal = DIF->InsertDbgValueIntrinsic(SI->getOperand(0), 0,
+  if (!DIB)
+    DIB = new DIBuilder(*SI->getParent()->getParent()->getParent());
+  Instruction *DbgVal = DIB->insertDbgValueIntrinsic(SI->getOperand(0), 0,
                                                      DIVar, SI);
   
   // Propagate any debug metadata from the store onto the dbg.value.
diff --git a/lib/Transforms/Utils/SimplifyCFG.cpp b/lib/Transforms/Utils/SimplifyCFG.cpp
index fb660dbfac10..c6708857cb56 100644
--- a/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -247,6 +247,11 @@ static bool DominatesMergePoint(Value *V, BasicBlock *BB,
     if (PBB->getFirstNonPHIOrDbg() != I)
       return false;
     break;
+  case Instruction::GetElementPtr:
+    // GEPs are cheap if all indices are constant.
+    if (!cast<GetElementPtrInst>(I)->hasAllConstantIndices())
+      return false;
+    break;
   case Instruction::Add:
   case Instruction::Sub:
   case Instruction::And:
-- 
cgit v1.2.3