From d0e4e96dc17a6c1c6de3340842c80f0e187ba349 Mon Sep 17 00:00:00 2001 From: Dimitry Andric Date: Sat, 26 Feb 2011 22:03:50 +0000 Subject: Vendor import of llvm trunk r126547: http://llvm.org/svn/llvm-project/llvm/trunk@126547 --- lib/Analysis/DIBuilder.cpp | 154 +-- lib/Analysis/InstructionSimplify.cpp | 10 + lib/CodeGen/AllocationOrder.h | 2 + lib/CodeGen/AsmPrinter/AsmPrinter.cpp | 10 +- lib/CodeGen/AsmPrinter/DwarfDebug.cpp | 9 +- lib/CodeGen/BranchFolding.cpp | 6 +- lib/CodeGen/InlineSpiller.cpp | 20 +- lib/CodeGen/LowerSubregs.cpp | 10 +- lib/CodeGen/MachineFunction.cpp | 12 +- lib/CodeGen/MachineRegisterInfo.cpp | 9 +- lib/CodeGen/RegAllocBase.h | 15 +- lib/CodeGen/RegAllocBasic.cpp | 60 +- lib/CodeGen/RegAllocGreedy.cpp | 197 ++- lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 133 +- lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp | 118 ++ lib/CodeGen/SelectionDAG/LegalizeDAG.cpp | 49 +- lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp | 10 +- lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 19 +- lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp | 31 +- lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h | 2 +- lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp | 55 +- lib/CodeGen/SelectionDAG/TargetLowering.cpp | 26 +- lib/CodeGen/SplitKit.cpp | 18 +- lib/CodeGen/SplitKit.h | 7 + lib/CodeGen/TargetLoweringObjectFileImpl.cpp | 30 +- lib/CodeGen/VirtRegRewriter.cpp | 6 +- lib/MC/ELFObjectWriter.cpp | 10 +- lib/MC/MCAsmInfo.cpp | 1 + lib/MC/MCAsmInfoDarwin.cpp | 1 + lib/MC/MCDisassembler/EDOperand.cpp | 20 +- lib/MC/MCDisassembler/EDToken.cpp | 4 + lib/MC/MCObjectStreamer.cpp | 18 +- lib/MC/MCParser/AsmParser.cpp | 2 + lib/MC/MCParser/ELFAsmParser.cpp | 12 +- lib/MC/MCParser/MCAsmParserExtension.cpp | 3 +- lib/MC/MCSectionMachO.cpp | 17 +- lib/MC/MCStreamer.cpp | 4 +- lib/Support/APInt.cpp | 2 +- lib/Target/ARM/ARMBaseInstrInfo.h | 3 +- lib/Target/ARM/ARMFastISel.cpp | 38 +- lib/Target/ARM/ARMFrameLowering.cpp | 16 +- lib/Target/ARM/ARMHazardRecognizer.cpp | 17 +- lib/Target/ARM/ARMISelDAGToDAG.cpp | 15 + lib/Target/ARM/ARMISelLowering.cpp | 102 +- lib/Target/ARM/ARMInstrFormats.td | 7 +- lib/Target/ARM/ARMInstrInfo.td | 4 +- lib/Target/ARM/ARMInstrNEON.td | 31 +- lib/Target/ARM/ARMInstrVFP.td | 162 +-- lib/Target/ARM/ARMSubtarget.cpp | 4 +- lib/Target/ARM/MLxExpansionPass.cpp | 20 +- lib/Target/ARM/NEONMoveFix.cpp | 9 +- lib/Target/ARM/Thumb2InstrInfo.cpp | 6 + lib/Target/Alpha/AlphaISelLowering.cpp | 1 - lib/Target/Alpha/AlphaISelLowering.h | 14 +- lib/Target/Blackfin/BlackfinISelLowering.cpp | 1 - lib/Target/Blackfin/BlackfinISelLowering.h | 1 + lib/Target/CellSPU/SPUISelLowering.cpp | 7 +- lib/Target/CellSPU/SPUISelLowering.h | 6 +- lib/Target/MBlaze/MBlazeISelLowering.cpp | 4 +- lib/Target/MSP430/MSP430ISelLowering.cpp | 6 +- lib/Target/MSP430/MSP430ISelLowering.h | 2 + lib/Target/PowerPC/PPCISelLowering.cpp | 25 +- lib/Target/PowerPC/PPCISelLowering.h | 94 +- lib/Target/README.txt | 24 + lib/Target/Sparc/DelaySlotFiller.cpp | 31 + lib/Target/Sparc/SparcISelLowering.cpp | 43 +- lib/Target/Sparc/SparcISelLowering.h | 2 + lib/Target/Sparc/SparcInstrInfo.td | 18 +- lib/Target/SystemZ/SystemZISelLowering.cpp | 3 - lib/Target/SystemZ/SystemZISelLowering.h | 2 + lib/Target/X86/AsmParser/X86AsmParser.cpp | 13 + lib/Target/X86/Disassembler/X86Disassembler.cpp | 8 +- .../X86/Disassembler/X86DisassemblerDecoder.h | 2 +- lib/Target/X86/README.txt | 82 +- lib/Target/X86/X86FastISel.cpp | 8 +- lib/Target/X86/X86ISelLowering.cpp | 50 +- lib/Target/X86/X86ISelLowering.h | 18 +- lib/Target/X86/X86InstrFormats.td | 2 + lib/Target/X86/X86InstrInfo.cpp | 4 +- lib/Target/X86/X86InstrInfo.h | 4 + lib/Target/X86/X86InstrInfo.td | 3 + lib/Target/X86/X86InstrSystem.td | 5 + lib/Target/X86/X86MCCodeEmitter.cpp | 8 + lib/Target/X86/X86Subtarget.cpp | 7 +- lib/Target/X86/X86Subtarget.h | 2 + lib/Target/XCore/XCoreISelLowering.cpp | 125 +- lib/Target/XCore/XCoreISelLowering.h | 23 +- lib/Target/XCore/XCoreInstrInfo.td | 55 +- lib/Transforms/InstCombine/InstCombineAndOrXor.cpp | 33 + lib/Transforms/InstCombine/InstCombineCalls.cpp | 20 +- lib/Transforms/Scalar/LoopDeletion.cpp | 23 +- lib/Transforms/Scalar/LoopIdiomRecognize.cpp | 18 +- lib/Transforms/Scalar/SimplifyLibCalls.cpp | 1343 ++++++++++---------- lib/Transforms/Utils/Local.cpp | 42 +- lib/Transforms/Utils/PromoteMemoryToRegister.cpp | 13 +- lib/Transforms/Utils/SimplifyCFG.cpp | 5 + 96 files changed, 2250 insertions(+), 1496 deletions(-) (limited to 'lib') diff --git a/lib/Analysis/DIBuilder.cpp b/lib/Analysis/DIBuilder.cpp index c1072df72925..590a9c17a8fa 100644 --- a/lib/Analysis/DIBuilder.cpp +++ b/lib/Analysis/DIBuilder.cpp @@ -31,9 +31,9 @@ static Constant *GetTagConstant(LLVMContext &VMContext, unsigned Tag) { DIBuilder::DIBuilder(Module &m) : M(m), VMContext(M.getContext()), TheCU(0), DeclareFn(0), ValueFn(0) {} -/// CreateCompileUnit - A CompileUnit provides an anchor for all debugging +/// createCompileUnit - A CompileUnit provides an anchor for all debugging /// information generated during this instance of compilation. -void DIBuilder::CreateCompileUnit(unsigned Lang, StringRef Filename, +void DIBuilder::createCompileUnit(unsigned Lang, StringRef Filename, StringRef Directory, StringRef Producer, bool isOptimized, StringRef Flags, unsigned RunTimeVer) { @@ -53,9 +53,9 @@ void DIBuilder::CreateCompileUnit(unsigned Lang, StringRef Filename, TheCU = DICompileUnit(MDNode::get(VMContext, &Elts[0], array_lengthof(Elts))); } -/// CreateFile - Create a file descriptor to hold debugging information +/// createFile - Create a file descriptor to hold debugging information /// for a file. -DIFile DIBuilder::CreateFile(StringRef Filename, StringRef Directory) { +DIFile DIBuilder::createFile(StringRef Filename, StringRef Directory) { assert(TheCU && "Unable to create DW_TAG_file_type without CompileUnit"); Value *Elts[] = { GetTagConstant(VMContext, dwarf::DW_TAG_file_type), @@ -66,8 +66,8 @@ DIFile DIBuilder::CreateFile(StringRef Filename, StringRef Directory) { return DIFile(MDNode::get(VMContext, &Elts[0], array_lengthof(Elts))); } -/// CreateEnumerator - Create a single enumerator value. -DIEnumerator DIBuilder::CreateEnumerator(StringRef Name, uint64_t Val) { +/// createEnumerator - Create a single enumerator value. +DIEnumerator DIBuilder::createEnumerator(StringRef Name, uint64_t Val) { Value *Elts[] = { GetTagConstant(VMContext, dwarf::DW_TAG_enumerator), MDString::get(VMContext, Name), @@ -76,9 +76,9 @@ DIEnumerator DIBuilder::CreateEnumerator(StringRef Name, uint64_t Val) { return DIEnumerator(MDNode::get(VMContext, &Elts[0], array_lengthof(Elts))); } -/// CreateBasicType - Create debugging information entry for a basic +/// createBasicType - Create debugging information entry for a basic /// type, e.g 'char'. -DIType DIBuilder::CreateBasicType(StringRef Name, uint64_t SizeInBits, +DIType DIBuilder::createBasicType(StringRef Name, uint64_t SizeInBits, uint64_t AlignInBits, unsigned Encoding) { // Basic types are encoded in DIBasicType format. Line number, filename, @@ -98,9 +98,9 @@ DIType DIBuilder::CreateBasicType(StringRef Name, uint64_t SizeInBits, return DIType(MDNode::get(VMContext, &Elts[0], array_lengthof(Elts))); } -/// CreateQaulifiedType - Create debugging information entry for a qualified +/// createQaulifiedType - Create debugging information entry for a qualified /// type, e.g. 'const int'. -DIType DIBuilder::CreateQualifiedType(unsigned Tag, DIType FromTy) { +DIType DIBuilder::createQualifiedType(unsigned Tag, DIType FromTy) { // Qualified types are encoded in DIDerivedType format. Value *Elts[] = { GetTagConstant(VMContext, Tag), @@ -117,8 +117,8 @@ DIType DIBuilder::CreateQualifiedType(unsigned Tag, DIType FromTy) { return DIType(MDNode::get(VMContext, &Elts[0], array_lengthof(Elts))); } -/// CreatePointerType - Create debugging information entry for a pointer. -DIType DIBuilder::CreatePointerType(DIType PointeeTy, uint64_t SizeInBits, +/// createPointerType - Create debugging information entry for a pointer. +DIType DIBuilder::createPointerType(DIType PointeeTy, uint64_t SizeInBits, uint64_t AlignInBits, StringRef Name) { // Pointer types are encoded in DIDerivedType format. Value *Elts[] = { @@ -136,8 +136,8 @@ DIType DIBuilder::CreatePointerType(DIType PointeeTy, uint64_t SizeInBits, return DIType(MDNode::get(VMContext, &Elts[0], array_lengthof(Elts))); } -/// CreateReferenceType - Create debugging information entry for a reference. -DIType DIBuilder::CreateReferenceType(DIType RTy) { +/// createReferenceType - Create debugging information entry for a reference. +DIType DIBuilder::createReferenceType(DIType RTy) { // References are encoded in DIDerivedType format. Value *Elts[] = { GetTagConstant(VMContext, dwarf::DW_TAG_reference_type), @@ -154,8 +154,8 @@ DIType DIBuilder::CreateReferenceType(DIType RTy) { return DIType(MDNode::get(VMContext, &Elts[0], array_lengthof(Elts))); } -/// CreateTypedef - Create debugging information entry for a typedef. -DIType DIBuilder::CreateTypedef(DIType Ty, StringRef Name, DIFile File, +/// createTypedef - Create debugging information entry for a typedef. +DIType DIBuilder::createTypedef(DIType Ty, StringRef Name, DIFile File, unsigned LineNo) { // typedefs are encoded in DIDerivedType format. assert(Ty.Verify() && "Invalid typedef type!"); @@ -174,8 +174,8 @@ DIType DIBuilder::CreateTypedef(DIType Ty, StringRef Name, DIFile File, return DIType(MDNode::get(VMContext, &Elts[0], array_lengthof(Elts))); } -/// CreateFriend - Create debugging information entry for a 'friend'. -DIType DIBuilder::CreateFriend(DIType Ty, DIType FriendTy) { +/// createFriend - Create debugging information entry for a 'friend'. +DIType DIBuilder::createFriend(DIType Ty, DIType FriendTy) { // typedefs are encoded in DIDerivedType format. assert(Ty.Verify() && "Invalid type!"); assert(FriendTy.Verify() && "Invalid friend type!"); @@ -194,9 +194,9 @@ DIType DIBuilder::CreateFriend(DIType Ty, DIType FriendTy) { return DIType(MDNode::get(VMContext, &Elts[0], array_lengthof(Elts))); } -/// CreateInheritance - Create debugging information entry to establish +/// createInheritance - Create debugging information entry to establish /// inheritnace relationship between two types. -DIType DIBuilder::CreateInheritance(DIType Ty, DIType BaseTy, +DIType DIBuilder::createInheritance(DIType Ty, DIType BaseTy, uint64_t BaseOffset, unsigned Flags) { // TAG_inheritance is encoded in DIDerivedType format. Value *Elts[] = { @@ -214,8 +214,8 @@ DIType DIBuilder::CreateInheritance(DIType Ty, DIType BaseTy, return DIType(MDNode::get(VMContext, &Elts[0], array_lengthof(Elts))); } -/// CreateMemberType - Create debugging information entry for a member. -DIType DIBuilder::CreateMemberType(StringRef Name, +/// createMemberType - Create debugging information entry for a member. +DIType DIBuilder::createMemberType(StringRef Name, DIFile File, unsigned LineNumber, uint64_t SizeInBits, uint64_t AlignInBits, uint64_t OffsetInBits, unsigned Flags, @@ -236,8 +236,8 @@ DIType DIBuilder::CreateMemberType(StringRef Name, return DIType(MDNode::get(VMContext, &Elts[0], array_lengthof(Elts))); } -/// CreateClassType - Create debugging information entry for a class. -DIType DIBuilder::CreateClassType(DIDescriptor Context, StringRef Name, +/// createClassType - Create debugging information entry for a class. +DIType DIBuilder::createClassType(DIDescriptor Context, StringRef Name, DIFile File, unsigned LineNumber, uint64_t SizeInBits, uint64_t AlignInBits, uint64_t OffsetInBits, unsigned Flags, @@ -263,10 +263,10 @@ DIType DIBuilder::CreateClassType(DIDescriptor Context, StringRef Name, return DIType(MDNode::get(VMContext, &Elts[0], array_lengthof(Elts))); } -/// CreateTemplateTypeParameter - Create debugging information for template +/// createTemplateTypeParameter - Create debugging information for template /// type parameter. DITemplateTypeParameter -DIBuilder::CreateTemplateTypeParameter(DIDescriptor Context, StringRef Name, +DIBuilder::createTemplateTypeParameter(DIDescriptor Context, StringRef Name, DIType Ty, MDNode *File, unsigned LineNo, unsigned ColumnNo) { Value *Elts[] = { @@ -282,10 +282,10 @@ DIBuilder::CreateTemplateTypeParameter(DIDescriptor Context, StringRef Name, array_lengthof(Elts))); } -/// CreateTemplateValueParameter - Create debugging information for template +/// createTemplateValueParameter - Create debugging information for template /// value parameter. DITemplateValueParameter -DIBuilder::CreateTemplateValueParameter(DIDescriptor Context, StringRef Name, +DIBuilder::createTemplateValueParameter(DIDescriptor Context, StringRef Name, DIType Ty, uint64_t Val, MDNode *File, unsigned LineNo, unsigned ColumnNo) { @@ -303,8 +303,8 @@ DIBuilder::CreateTemplateValueParameter(DIDescriptor Context, StringRef Name, array_lengthof(Elts))); } -/// CreateStructType - Create debugging information entry for a struct. -DIType DIBuilder::CreateStructType(DIDescriptor Context, StringRef Name, +/// createStructType - Create debugging information entry for a struct. +DIType DIBuilder::createStructType(DIDescriptor Context, StringRef Name, DIFile File, unsigned LineNumber, uint64_t SizeInBits, uint64_t AlignInBits, unsigned Flags, DIArray Elements, @@ -328,8 +328,8 @@ DIType DIBuilder::CreateStructType(DIDescriptor Context, StringRef Name, return DIType(MDNode::get(VMContext, &Elts[0], array_lengthof(Elts))); } -/// CreateUnionType - Create debugging information entry for an union. -DIType DIBuilder::CreateUnionType(DIDescriptor Scope, StringRef Name, +/// createUnionType - Create debugging information entry for an union. +DIType DIBuilder::createUnionType(DIDescriptor Scope, StringRef Name, DIFile File, unsigned LineNumber, uint64_t SizeInBits, uint64_t AlignInBits, unsigned Flags, @@ -353,8 +353,8 @@ DIType DIBuilder::CreateUnionType(DIDescriptor Scope, StringRef Name, return DIType(MDNode::get(VMContext, &Elts[0], array_lengthof(Elts))); } -/// CreateSubroutineType - Create subroutine type. -DIType DIBuilder::CreateSubroutineType(DIFile File, DIArray ParameterTypes) { +/// createSubroutineType - Create subroutine type. +DIType DIBuilder::createSubroutineType(DIFile File, DIArray ParameterTypes) { // TAG_subroutine_type is encoded in DICompositeType format. Value *Elts[] = { GetTagConstant(VMContext, dwarf::DW_TAG_subroutine_type), @@ -374,9 +374,9 @@ DIType DIBuilder::CreateSubroutineType(DIFile File, DIArray ParameterTypes) { return DIType(MDNode::get(VMContext, &Elts[0], array_lengthof(Elts))); } -/// CreateEnumerationType - Create debugging information entry for an +/// createEnumerationType - Create debugging information entry for an /// enumeration. -DIType DIBuilder::CreateEnumerationType(DIDescriptor Scope, StringRef Name, +DIType DIBuilder::createEnumerationType(DIDescriptor Scope, StringRef Name, DIFile File, unsigned LineNumber, uint64_t SizeInBits, uint64_t AlignInBits, DIArray Elements) { @@ -402,8 +402,8 @@ DIType DIBuilder::CreateEnumerationType(DIDescriptor Scope, StringRef Name, return DIType(Node); } -/// CreateArrayType - Create debugging information entry for an array. -DIType DIBuilder::CreateArrayType(uint64_t Size, uint64_t AlignInBits, +/// createArrayType - Create debugging information entry for an array. +DIType DIBuilder::createArrayType(uint64_t Size, uint64_t AlignInBits, DIType Ty, DIArray Subscripts) { // TAG_array_type is encoded in DICompositeType format. Value *Elts[] = { @@ -424,8 +424,8 @@ DIType DIBuilder::CreateArrayType(uint64_t Size, uint64_t AlignInBits, return DIType(MDNode::get(VMContext, &Elts[0], array_lengthof(Elts))); } -/// CreateVectorType - Create debugging information entry for a vector. -DIType DIBuilder::CreateVectorType(uint64_t Size, uint64_t AlignInBits, +/// createVectorType - Create debugging information entry for a vector. +DIType DIBuilder::createVectorType(uint64_t Size, uint64_t AlignInBits, DIType Ty, DIArray Subscripts) { // TAG_vector_type is encoded in DICompositeType format. Value *Elts[] = { @@ -446,8 +446,8 @@ DIType DIBuilder::CreateVectorType(uint64_t Size, uint64_t AlignInBits, return DIType(MDNode::get(VMContext, &Elts[0], array_lengthof(Elts))); } -/// CreateArtificialType - Create a new DIType with "artificial" flag set. -DIType DIBuilder::CreateArtificialType(DIType Ty) { +/// createArtificialType - Create a new DIType with "artificial" flag set. +DIType DIBuilder::createArtificialType(DIType Ty) { if (Ty.isArtificial()) return Ty; @@ -470,24 +470,24 @@ DIType DIBuilder::CreateArtificialType(DIType Ty) { return DIType(MDNode::get(VMContext, Elts.data(), Elts.size())); } -/// RetainType - Retain DIType in a module even if it is not referenced +/// retainType - Retain DIType in a module even if it is not referenced /// through debug info anchors. -void DIBuilder::RetainType(DIType T) { +void DIBuilder::retainType(DIType T) { NamedMDNode *NMD = M.getOrInsertNamedMetadata("llvm.dbg.ty"); NMD->addOperand(T); } -/// CreateUnspecifiedParameter - Create unspeicified type descriptor +/// createUnspecifiedParameter - Create unspeicified type descriptor /// for the subroutine type. -DIDescriptor DIBuilder::CreateUnspecifiedParameter() { +DIDescriptor DIBuilder::createUnspecifiedParameter() { Value *Elts[] = { GetTagConstant(VMContext, dwarf::DW_TAG_unspecified_parameters) }; return DIDescriptor(MDNode::get(VMContext, &Elts[0], 1)); } -/// CreateTemporaryType - Create a temporary forward-declared type. -DIType DIBuilder::CreateTemporaryType() { +/// createTemporaryType - Create a temporary forward-declared type. +DIType DIBuilder::createTemporaryType() { // Give the temporary MDNode a tag. It doesn't matter what tag we // use here as long as DIType accepts it. Value *Elts[] = { GetTagConstant(VMContext, DW_TAG_base_type) }; @@ -495,8 +495,8 @@ DIType DIBuilder::CreateTemporaryType() { return DIType(Node); } -/// CreateTemporaryType - Create a temporary forward-declared type. -DIType DIBuilder::CreateTemporaryType(DIFile F) { +/// createTemporaryType - Create a temporary forward-declared type. +DIType DIBuilder::createTemporaryType(DIFile F) { // Give the temporary MDNode a tag. It doesn't matter what tag we // use here as long as DIType accepts it. Value *Elts[] = { @@ -509,8 +509,8 @@ DIType DIBuilder::CreateTemporaryType(DIFile F) { return DIType(Node); } -/// GetOrCreateArray - Get a DIArray, create one if required. -DIArray DIBuilder::GetOrCreateArray(Value *const *Elements, unsigned NumElements) { +/// getOrCreateArray - Get a DIArray, create one if required. +DIArray DIBuilder::getOrCreateArray(Value *const *Elements, unsigned NumElements) { if (NumElements == 0) { Value *Null = llvm::Constant::getNullValue(Type::getInt32Ty(VMContext)); return DIArray(MDNode::get(VMContext, &Null, 1)); @@ -518,9 +518,9 @@ DIArray DIBuilder::GetOrCreateArray(Value *const *Elements, unsigned NumElements return DIArray(MDNode::get(VMContext, Elements, NumElements)); } -/// GetOrCreateSubrange - Create a descriptor for a value range. This +/// getOrCreateSubrange - Create a descriptor for a value range. This /// implicitly uniques the values returned. -DISubrange DIBuilder::GetOrCreateSubrange(int64_t Lo, int64_t Hi) { +DISubrange DIBuilder::getOrCreateSubrange(int64_t Lo, int64_t Hi) { Value *Elts[] = { GetTagConstant(VMContext, dwarf::DW_TAG_subrange_type), ConstantInt::get(Type::getInt64Ty(VMContext), Lo), @@ -530,9 +530,9 @@ DISubrange DIBuilder::GetOrCreateSubrange(int64_t Lo, int64_t Hi) { return DISubrange(MDNode::get(VMContext, &Elts[0], 3)); } -/// CreateGlobalVariable - Create a new descriptor for the specified global. +/// createGlobalVariable - Create a new descriptor for the specified global. DIGlobalVariable DIBuilder:: -CreateGlobalVariable(StringRef Name, DIFile F, unsigned LineNumber, +createGlobalVariable(StringRef Name, DIFile F, unsigned LineNumber, DIType Ty, bool isLocalToUnit, llvm::Value *Val) { Value *Elts[] = { GetTagConstant(VMContext, dwarf::DW_TAG_variable), @@ -555,10 +555,10 @@ CreateGlobalVariable(StringRef Name, DIFile F, unsigned LineNumber, return DIGlobalVariable(Node); } -/// CreateStaticVariable - Create a new descriptor for the specified static +/// createStaticVariable - Create a new descriptor for the specified static /// variable. DIGlobalVariable DIBuilder:: -CreateStaticVariable(DIDescriptor Context, StringRef Name, +createStaticVariable(DIDescriptor Context, StringRef Name, StringRef LinkageName, DIFile F, unsigned LineNumber, DIType Ty, bool isLocalToUnit, llvm::Value *Val) { Value *Elts[] = { @@ -582,8 +582,8 @@ CreateStaticVariable(DIDescriptor Context, StringRef Name, return DIGlobalVariable(Node); } -/// CreateVariable - Create a new descriptor for the specified variable. -DIVariable DIBuilder::CreateLocalVariable(unsigned Tag, DIDescriptor Scope, +/// createVariable - Create a new descriptor for the specified variable. +DIVariable DIBuilder::createLocalVariable(unsigned Tag, DIDescriptor Scope, StringRef Name, DIFile File, unsigned LineNo, DIType Ty, bool AlwaysPreserve, unsigned Flags) { @@ -614,9 +614,9 @@ DIVariable DIBuilder::CreateLocalVariable(unsigned Tag, DIDescriptor Scope, return DIVariable(Node); } -/// CreateComplexVariable - Create a new descriptor for the specified variable +/// createComplexVariable - Create a new descriptor for the specified variable /// which has a complex address expression for its address. -DIVariable DIBuilder::CreateComplexVariable(unsigned Tag, DIDescriptor Scope, +DIVariable DIBuilder::createComplexVariable(unsigned Tag, DIDescriptor Scope, StringRef Name, DIFile F, unsigned LineNo, DIType Ty, Value *const *Addr, @@ -633,8 +633,8 @@ DIVariable DIBuilder::CreateComplexVariable(unsigned Tag, DIDescriptor Scope, return DIVariable(MDNode::get(VMContext, Elts.data(), Elts.size())); } -/// CreateFunction - Create a new descriptor for the specified function. -DISubprogram DIBuilder::CreateFunction(DIDescriptor Context, +/// createFunction - Create a new descriptor for the specified function. +DISubprogram DIBuilder::createFunction(DIDescriptor Context, StringRef Name, StringRef LinkageName, DIFile File, unsigned LineNo, @@ -670,8 +670,8 @@ DISubprogram DIBuilder::CreateFunction(DIDescriptor Context, return DISubprogram(Node); } -/// CreateMethod - Create a new descriptor for the specified C++ method. -DISubprogram DIBuilder::CreateMethod(DIDescriptor Context, +/// createMethod - Create a new descriptor for the specified C++ method. +DISubprogram DIBuilder::createMethod(DIDescriptor Context, StringRef Name, StringRef LinkageName, DIFile F, @@ -710,9 +710,9 @@ DISubprogram DIBuilder::CreateMethod(DIDescriptor Context, return DISubprogram(Node); } -/// CreateNameSpace - This creates new descriptor for a namespace +/// createNameSpace - This creates new descriptor for a namespace /// with the specified parent scope. -DINameSpace DIBuilder::CreateNameSpace(DIDescriptor Scope, StringRef Name, +DINameSpace DIBuilder::createNameSpace(DIDescriptor Scope, StringRef Name, DIFile File, unsigned LineNo) { Value *Elts[] = { GetTagConstant(VMContext, dwarf::DW_TAG_namespace), @@ -724,7 +724,7 @@ DINameSpace DIBuilder::CreateNameSpace(DIDescriptor Scope, StringRef Name, return DINameSpace(MDNode::get(VMContext, &Elts[0], array_lengthof(Elts))); } -DILexicalBlock DIBuilder::CreateLexicalBlock(DIDescriptor Scope, DIFile File, +DILexicalBlock DIBuilder::createLexicalBlock(DIDescriptor Scope, DIFile File, unsigned Line, unsigned Col) { // Defeat MDNode uniqing for lexical blocks by using unique id. static unsigned int unique_id = 0; @@ -739,8 +739,8 @@ DILexicalBlock DIBuilder::CreateLexicalBlock(DIDescriptor Scope, DIFile File, return DILexicalBlock(MDNode::get(VMContext, &Elts[0], array_lengthof(Elts))); } -/// InsertDeclare - Insert a new llvm.dbg.declare intrinsic call. -Instruction *DIBuilder::InsertDeclare(Value *Storage, DIVariable VarInfo, +/// insertDeclare - Insert a new llvm.dbg.declare intrinsic call. +Instruction *DIBuilder::insertDeclare(Value *Storage, DIVariable VarInfo, Instruction *InsertBefore) { assert(Storage && "no storage passed to dbg.declare"); assert(VarInfo.Verify() && "empty DIVariable passed to dbg.declare"); @@ -751,8 +751,8 @@ Instruction *DIBuilder::InsertDeclare(Value *Storage, DIVariable VarInfo, return CallInst::Create(DeclareFn, Args, Args+2, "", InsertBefore); } -/// InsertDeclare - Insert a new llvm.dbg.declare intrinsic call. -Instruction *DIBuilder::InsertDeclare(Value *Storage, DIVariable VarInfo, +/// insertDeclare - Insert a new llvm.dbg.declare intrinsic call. +Instruction *DIBuilder::insertDeclare(Value *Storage, DIVariable VarInfo, BasicBlock *InsertAtEnd) { assert(Storage && "no storage passed to dbg.declare"); assert(VarInfo.Verify() && "invalid DIVariable passed to dbg.declare"); @@ -769,8 +769,8 @@ Instruction *DIBuilder::InsertDeclare(Value *Storage, DIVariable VarInfo, return CallInst::Create(DeclareFn, Args, Args+2, "", InsertAtEnd); } -/// InsertDbgValueIntrinsic - Insert a new llvm.dbg.value intrinsic call. -Instruction *DIBuilder::InsertDbgValueIntrinsic(Value *V, uint64_t Offset, +/// insertDbgValueIntrinsic - Insert a new llvm.dbg.value intrinsic call. +Instruction *DIBuilder::insertDbgValueIntrinsic(Value *V, uint64_t Offset, DIVariable VarInfo, Instruction *InsertBefore) { assert(V && "no value passed to dbg.value"); @@ -784,8 +784,8 @@ Instruction *DIBuilder::InsertDbgValueIntrinsic(Value *V, uint64_t Offset, return CallInst::Create(ValueFn, Args, Args+3, "", InsertBefore); } -/// InsertDbgValueIntrinsic - Insert a new llvm.dbg.value intrinsic call. -Instruction *DIBuilder::InsertDbgValueIntrinsic(Value *V, uint64_t Offset, +/// insertDbgValueIntrinsic - Insert a new llvm.dbg.value intrinsic call. +Instruction *DIBuilder::insertDbgValueIntrinsic(Value *V, uint64_t Offset, DIVariable VarInfo, BasicBlock *InsertAtEnd) { assert(V && "no value passed to dbg.value"); diff --git a/lib/Analysis/InstructionSimplify.cpp b/lib/Analysis/InstructionSimplify.cpp index a2f9862383fd..982dacb50bfc 100644 --- a/lib/Analysis/InstructionSimplify.cpp +++ b/lib/Analysis/InstructionSimplify.cpp @@ -1161,6 +1161,16 @@ static Value *SimplifyOrInst(Value *Op0, Value *Op1, const TargetData *TD, (A == Op0 || B == Op0)) return Op0; + // ~(A & ?) | A = -1 + if (match(Op0, m_Not(m_And(m_Value(A), m_Value(B)))) && + (A == Op1 || B == Op1)) + return Constant::getAllOnesValue(Op1->getType()); + + // A | ~(A & ?) = -1 + if (match(Op1, m_Not(m_And(m_Value(A), m_Value(B)))) && + (A == Op0 || B == Op0)) + return Constant::getAllOnesValue(Op0->getType()); + // Try some generic simplifications for associative operations. if (Value *V = SimplifyAssociativeBinOp(Instruction::Or, Op0, Op1, TD, DT, MaxRecurse)) diff --git a/lib/CodeGen/AllocationOrder.h b/lib/CodeGen/AllocationOrder.h index 3db4b6925fca..61fd8f881a8c 100644 --- a/lib/CodeGen/AllocationOrder.h +++ b/lib/CodeGen/AllocationOrder.h @@ -47,6 +47,8 @@ public: /// rewind - Start over from the beginning. void rewind() { Pos = 0; } + /// isHint - Return true if PhysReg is a preferred register. + bool isHint(unsigned PhysReg) const { return PhysReg == Hint; } }; } // end namespace llvm diff --git a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp index 43e8990a9da1..9cb882e6a1bb 100644 --- a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp +++ b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp @@ -764,7 +764,7 @@ bool AsmPrinter::doFinalization(Module &M) { continue; MCSymbol *Name = Mang->getSymbol(&F); - EmitVisibility(Name, V); + EmitVisibility(Name, V, false); } // Finalize debug and EH information. @@ -1820,13 +1820,17 @@ void AsmPrinter::EmitBasicBlockStart(const MachineBasicBlock *MBB) const { } } -void AsmPrinter::EmitVisibility(MCSymbol *Sym, unsigned Visibility) const { +void AsmPrinter::EmitVisibility(MCSymbol *Sym, unsigned Visibility, + bool IsDefinition) const { MCSymbolAttr Attr = MCSA_Invalid; switch (Visibility) { default: break; case GlobalValue::HiddenVisibility: - Attr = MAI->getHiddenVisibilityAttr(); + if (IsDefinition) + Attr = MAI->getHiddenVisibilityAttr(); + else + Attr = MAI->getHiddenDeclarationVisibilityAttr(); break; case GlobalValue::ProtectedVisibility: Attr = MAI->getProtectedVisibilityAttr(); diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp index 5106d5778c29..780fa405ef51 100644 --- a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp @@ -31,6 +31,7 @@ #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetOptions.h" #include "llvm/Analysis/DebugInfo.h" +#include "llvm/Analysis/DIBuilder.h" #include "llvm/ADT/Statistic.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/StringExtras.h" @@ -644,12 +645,12 @@ void DwarfDebug::addComplexAddress(DbgVariable *&DV, DIE *Die, for (unsigned i = 0, N = DV->getNumAddrElements(); i < N; ++i) { uint64_t Element = DV->getAddrElement(i); - if (Element == DIFactory::OpPlus) { + if (Element == DIBuilder::OpPlus) { addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_plus_uconst); addUInt(Block, 0, dwarf::DW_FORM_udata, DV->getAddrElement(++i)); - } else if (Element == DIFactory::OpDeref) { + } else if (Element == DIBuilder::OpDeref) { addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_deref); - } else llvm_unreachable("unknown DIFactory Opcode"); + } else llvm_unreachable("unknown DIBuilder Opcode"); } // Now attach the location information to the DIE. @@ -1894,7 +1895,7 @@ void DwarfDebug::constructCompileUnit(const MDNode *N) { DIE *Die = new DIE(dwarf::DW_TAG_compile_unit); addString(Die, dwarf::DW_AT_producer, dwarf::DW_FORM_string, DIUnit.getProducer()); - addUInt(Die, dwarf::DW_AT_language, dwarf::DW_FORM_data1, + addUInt(Die, dwarf::DW_AT_language, dwarf::DW_FORM_data2, DIUnit.getLanguage()); addString(Die, dwarf::DW_AT_name, dwarf::DW_FORM_string, FN); // Use DW_AT_entry_pc instead of DW_AT_low_pc/DW_AT_high_pc pair. This diff --git a/lib/CodeGen/BranchFolding.cpp b/lib/CodeGen/BranchFolding.cpp index cb81aa3c88ce..78a87431feaa 100644 --- a/lib/CodeGen/BranchFolding.cpp +++ b/lib/CodeGen/BranchFolding.cpp @@ -501,10 +501,11 @@ static bool ProfitableToMerge(MachineBasicBlock *MBB1, MachineBasicBlock *SuccBB, MachineBasicBlock *PredBB) { CommonTailLen = ComputeCommonTailLength(MBB1, MBB2, I1, I2); - MachineFunction *MF = MBB1->getParent(); - if (CommonTailLen == 0) return false; + DEBUG(dbgs() << "Common tail length of BB#" << MBB1->getNumber() + << " and BB#" << MBB2->getNumber() << " is " << CommonTailLen + << '\n'); // It's almost always profitable to merge any number of non-terminator // instructions with the block that falls through into the common successor. @@ -541,6 +542,7 @@ static bool ProfitableToMerge(MachineBasicBlock *MBB1, // we don't have to split a block. At worst we will be introducing 1 new // branch instruction, which is likely to be smaller than the 2 // instructions that would be deleted in the merge. + MachineFunction *MF = MBB1->getParent(); if (EffectiveTailLen >= 2 && MF->getFunction()->hasFnAttr(Attribute::OptimizeForSize) && (I1 == MBB1->begin() || I2 == MBB2->begin())) diff --git a/lib/CodeGen/InlineSpiller.cpp b/lib/CodeGen/InlineSpiller.cpp index a1bd972d38e2..38e6c8590269 100644 --- a/lib/CodeGen/InlineSpiller.cpp +++ b/lib/CodeGen/InlineSpiller.cpp @@ -102,8 +102,7 @@ Spiller *createInlineSpiller(MachineFunctionPass &pass, } } -/// reMaterializeFor - Attempt to rematerialize edit_->getReg() before MI instead of -/// reloading it. +/// reMaterializeFor - Attempt to rematerialize before MI instead of reloading. bool InlineSpiller::reMaterializeFor(MachineBasicBlock::iterator MI) { SlotIndex UseIdx = lis_.getInstructionIndex(MI).getUseIndex(); VNInfo *OrigVNI = edit_->getParent().getVNInfoAt(UseIdx); @@ -346,7 +345,8 @@ void InlineSpiller::spill(LiveRangeEdit &edit) { && "Trying to spill a stack slot."); DEBUG(dbgs() << "Inline spilling " << mri_.getRegClass(edit.getReg())->getName() - << ':' << edit.getParent() << "\n"); + << ':' << edit.getParent() << "\nFrom original " + << PrintReg(vrm_.getOriginal(edit.getReg())) << '\n'); assert(edit.getParent().isSpillable() && "Attempting to spill already spilled value."); @@ -357,12 +357,20 @@ void InlineSpiller::spill(LiveRangeEdit &edit) { return; rc_ = mri_.getRegClass(edit.getReg()); - stackSlot_ = vrm_.assignVirt2StackSlot(edit_->getReg()); + + // Share a stack slot among all descendants of Orig. + unsigned Orig = vrm_.getOriginal(edit.getReg()); + stackSlot_ = vrm_.getStackSlot(Orig); + if (stackSlot_ == VirtRegMap::NO_STACK_SLOT) + stackSlot_ = vrm_.assignVirt2StackSlot(Orig); + + if (Orig != edit.getReg()) + vrm_.assignVirt2StackSlot(edit.getReg(), stackSlot_); // Update LiveStacks now that we are committed to spilling. LiveInterval &stacklvr = lss_.getOrCreateInterval(stackSlot_, rc_); - assert(stacklvr.empty() && "Just created stack slot not empty"); - stacklvr.getNextValue(SlotIndex(), 0, lss_.getVNInfoAllocator()); + if (!stacklvr.hasAtLeastOneValue()) + stacklvr.getNextValue(SlotIndex(), 0, lss_.getVNInfoAllocator()); stacklvr.MergeRangesInAsValue(edit_->getParent(), stacklvr.getValNumInfo(0)); // Iterate over instructions using register. diff --git a/lib/CodeGen/LowerSubregs.cpp b/lib/CodeGen/LowerSubregs.cpp index ad1c537c1911..7871ba9c17e4 100644 --- a/lib/CodeGen/LowerSubregs.cpp +++ b/lib/CodeGen/LowerSubregs.cpp @@ -37,7 +37,7 @@ namespace { public: static char ID; // Pass identification, replacement for typeid LowerSubregsInstructionPass() : MachineFunctionPass(ID) {} - + const char *getPassName() const { return "Subregister lowering instruction pass"; } @@ -64,8 +64,8 @@ namespace { char LowerSubregsInstructionPass::ID = 0; } -FunctionPass *llvm::createLowerSubregsPass() { - return new LowerSubregsInstructionPass(); +FunctionPass *llvm::createLowerSubregsPass() { + return new LowerSubregsInstructionPass(); } /// TransferDeadFlag - MI is a pseudo-instruction with DstReg dead, @@ -192,9 +192,9 @@ bool LowerSubregsInstructionPass::LowerCopy(MachineInstr *MI) { /// copies. /// bool LowerSubregsInstructionPass::runOnMachineFunction(MachineFunction &MF) { - DEBUG(dbgs() << "Machine Function\n" + DEBUG(dbgs() << "Machine Function\n" << "********** LOWERING SUBREG INSTRS **********\n" - << "********** Function: " + << "********** Function: " << MF.getFunction()->getName() << '\n'); TRI = MF.getTarget().getRegisterInfo(); TII = MF.getTarget().getInstrInfo(); diff --git a/lib/CodeGen/MachineFunction.cpp b/lib/CodeGen/MachineFunction.cpp index 85532407ca43..d81e4a1d015f 100644 --- a/lib/CodeGen/MachineFunction.cpp +++ b/lib/CodeGen/MachineFunction.cpp @@ -396,8 +396,7 @@ void MachineFunction::viewCFGOnly() const /// addLiveIn - Add the specified physical register as a live-in value and /// create a corresponding virtual register for it. unsigned MachineFunction::addLiveIn(unsigned PReg, - const TargetRegisterClass *RC, - DebugLoc DL) { + const TargetRegisterClass *RC) { MachineRegisterInfo &MRI = getRegInfo(); unsigned VReg = MRI.getLiveInVirtReg(PReg); if (VReg) { @@ -406,7 +405,6 @@ unsigned MachineFunction::addLiveIn(unsigned PReg, } VReg = MRI.createVirtualRegister(RC); MRI.addLiveIn(PReg, VReg); - MRI.addLiveInLoc(VReg, DL); return VReg; } @@ -646,6 +644,10 @@ MachineConstantPool::~MachineConstantPool() { for (unsigned i = 0, e = Constants.size(); i != e; ++i) if (Constants[i].isMachineConstantPoolEntry()) delete Constants[i].Val.MachineCPVal; + for (DenseSet::iterator I = + MachineCPVsSharingEntries.begin(), E = MachineCPVsSharingEntries.end(); + I != E; ++I) + delete *I; } /// CanShareConstantPoolEntry - Test whether the given two constants @@ -723,8 +725,10 @@ unsigned MachineConstantPool::getConstantPoolIndex(MachineConstantPoolValue *V, // // FIXME, this could be made much more efficient for large constant pools. int Idx = V->getExistingMachineCPValue(this, Alignment); - if (Idx != -1) + if (Idx != -1) { + MachineCPVsSharingEntries.insert(V); return (unsigned)Idx; + } Constants.push_back(MachineConstantPoolEntry(V, Alignment)); return Constants.size()-1; diff --git a/lib/CodeGen/MachineRegisterInfo.cpp b/lib/CodeGen/MachineRegisterInfo.cpp index b3fb33736ffc..7244d5f03a90 100644 --- a/lib/CodeGen/MachineRegisterInfo.cpp +++ b/lib/CodeGen/MachineRegisterInfo.cpp @@ -210,15 +210,8 @@ MachineRegisterInfo::EmitLiveInCopies(MachineBasicBlock *EntryMBB, LiveIns.erase(LiveIns.begin() + i); --i; --e; } else { - DebugLoc DL; - // If there is a location for this live in then use it. - DenseMap::iterator DLI = - LiveInLocs.find(LiveIns[i].second); - if (DLI != LiveInLocs.end()) - DL = DLI->second; - // Emit a copy. - BuildMI(*EntryMBB, EntryMBB->begin(), DL, + BuildMI(*EntryMBB, EntryMBB->begin(), DebugLoc(), TII.get(TargetOpcode::COPY), LiveIns[i].second) .addReg(LiveIns[i].first); diff --git a/lib/CodeGen/RegAllocBase.h b/lib/CodeGen/RegAllocBase.h index 8c7e5f53b824..5af0ce79acf7 100644 --- a/lib/CodeGen/RegAllocBase.h +++ b/lib/CodeGen/RegAllocBase.h @@ -39,7 +39,6 @@ #include "llvm/ADT/OwningPtr.h" #include "LiveIntervalUnion.h" -#include namespace llvm { @@ -58,8 +57,8 @@ class LiveVirtRegQueue; /// be extended to add interesting heuristics. /// /// Register allocators must override the selectOrSplit() method to implement -/// live range splitting. They may also override getPriority() which otherwise -/// defaults to the spill weight computed by CalculateSpillWeights. +/// live range splitting. They must also override enqueue/dequeue to provide an +/// assignment order. class RegAllocBase { LiveIntervalUnion::Allocator UnionAllocator; protected: @@ -120,9 +119,11 @@ protected: // Get a temporary reference to a Spiller instance. virtual Spiller &spiller() = 0; - // getPriority - Calculate the allocation priority for VirtReg. - // Virtual registers with higher priorities are allocated first. - virtual float getPriority(LiveInterval *LI) = 0; + /// enqueue - Add VirtReg to the priority queue of unassigned registers. + virtual void enqueue(LiveInterval *LI) = 0; + + /// dequeue - Return the next unassigned register, or NULL. + virtual LiveInterval *dequeue() = 0; // A RegAlloc pass should override this to provide the allocation heuristics. // Each call must guarantee forward progess by returning an available PhysReg @@ -170,7 +171,7 @@ public: static bool VerifyEnabled; private: - void seedLiveVirtRegs(std::priority_queue >&); + void seedLiveRegs(); void spillReg(LiveInterval &VirtReg, unsigned PhysReg, SmallVectorImpl &SplitVRegs); diff --git a/lib/CodeGen/RegAllocBasic.cpp b/lib/CodeGen/RegAllocBasic.cpp index 045c8db9dadb..6923908a32d9 100644 --- a/lib/CodeGen/RegAllocBasic.cpp +++ b/lib/CodeGen/RegAllocBasic.cpp @@ -45,6 +45,7 @@ #include "llvm/Support/Timer.h" #include +#include using namespace llvm; @@ -64,6 +65,14 @@ VerifyRegAlloc("verify-regalloc", cl::location(RegAllocBase::VerifyEnabled), const char *RegAllocBase::TimerGroupName = "Register Allocation"; bool RegAllocBase::VerifyEnabled = false; +namespace { + struct CompSpillWeight { + bool operator()(LiveInterval *A, LiveInterval *B) const { + return A->weight < B->weight; + } + }; +} + namespace { /// RABasic provides a minimal implementation of the basic register allocation /// algorithm. It prioritizes live virtual registers by spill weight and spills @@ -82,7 +91,8 @@ class RABasic : public MachineFunctionPass, public RegAllocBase // state std::auto_ptr SpillerInstance; - + std::priority_queue, + CompSpillWeight> Queue; public: RABasic(); @@ -100,6 +110,18 @@ public: virtual float getPriority(LiveInterval *LI) { return LI->weight; } + virtual void enqueue(LiveInterval *LI) { + Queue.push(LI); + } + + virtual LiveInterval *dequeue() { + if (Queue.empty()) + return 0; + LiveInterval *LI = Queue.top(); + Queue.pop(); + return LI; + } + virtual unsigned selectOrSplit(LiveInterval &VirtReg, SmallVectorImpl &SplitVRegs); @@ -227,18 +249,17 @@ void RegAllocBase::releaseMemory() { PhysReg2LiveUnion.clear(); } -// Visit all the live virtual registers. If they are already assigned to a -// physical register, unify them with the corresponding LiveIntervalUnion, -// otherwise push them on the priority queue for later assignment. -void RegAllocBase:: -seedLiveVirtRegs(std::priority_queue > &VirtRegQ) { +// Visit all the live registers. If they are already assigned to a physical +// register, unify them with the corresponding LiveIntervalUnion, otherwise push +// them on the priority queue for later assignment. +void RegAllocBase::seedLiveRegs() { for (LiveIntervals::iterator I = LIS->begin(), E = LIS->end(); I != E; ++I) { unsigned RegNum = I->first; LiveInterval &VirtReg = *I->second; if (TargetRegisterInfo::isPhysicalRegister(RegNum)) PhysReg2LiveUnion[RegNum].unify(VirtReg); else - VirtRegQ.push(std::make_pair(getPriority(&VirtReg), RegNum)); + enqueue(&VirtReg); } } @@ -263,38 +284,31 @@ void RegAllocBase::unassign(LiveInterval &VirtReg, unsigned PhysReg) { // Top-level driver to manage the queue of unassigned VirtRegs and call the // selectOrSplit implementation. void RegAllocBase::allocatePhysRegs() { - - // Push each vreg onto a queue or "precolor" by adding it to a physreg union. - std::priority_queue > VirtRegQ; - seedLiveVirtRegs(VirtRegQ); + seedLiveRegs(); // Continue assigning vregs one at a time to available physical registers. - while (!VirtRegQ.empty()) { - // Pop the highest priority vreg. - LiveInterval &VirtReg = LIS->getInterval(VirtRegQ.top().second); - VirtRegQ.pop(); - + while (LiveInterval *VirtReg = dequeue()) { // selectOrSplit requests the allocator to return an available physical // register if possible and populate a list of new live intervals that // result from splitting. - DEBUG(dbgs() << "\nselectOrSplit " << MRI->getRegClass(VirtReg.reg)->getName() - << ':' << VirtReg << '\n'); + DEBUG(dbgs() << "\nselectOrSplit " + << MRI->getRegClass(VirtReg->reg)->getName() + << ':' << *VirtReg << '\n'); typedef SmallVector VirtRegVec; VirtRegVec SplitVRegs; - unsigned AvailablePhysReg = selectOrSplit(VirtReg, SplitVRegs); + unsigned AvailablePhysReg = selectOrSplit(*VirtReg, SplitVRegs); if (AvailablePhysReg) - assign(VirtReg, AvailablePhysReg); + assign(*VirtReg, AvailablePhysReg); for (VirtRegVec::iterator I = SplitVRegs.begin(), E = SplitVRegs.end(); I != E; ++I) { - LiveInterval* SplitVirtReg = *I; + LiveInterval *SplitVirtReg = *I; if (SplitVirtReg->empty()) continue; DEBUG(dbgs() << "queuing new interval: " << *SplitVirtReg << "\n"); assert(TargetRegisterInfo::isVirtualRegister(SplitVirtReg->reg) && "expect split value in virtual register"); - VirtRegQ.push(std::make_pair(getPriority(SplitVirtReg), - SplitVirtReg->reg)); + enqueue(SplitVirtReg); ++NumNewQueued; } } diff --git a/lib/CodeGen/RegAllocGreedy.cpp b/lib/CodeGen/RegAllocGreedy.cpp index c1372cd038cf..406485aaf496 100644 --- a/lib/CodeGen/RegAllocGreedy.cpp +++ b/lib/CodeGen/RegAllocGreedy.cpp @@ -43,6 +43,8 @@ #include "llvm/Support/raw_ostream.h" #include "llvm/Support/Timer.h" +#include + using namespace llvm; STATISTIC(NumGlobalSplits, "Number of split global live ranges"); @@ -71,6 +73,8 @@ class RAGreedy : public MachineFunctionPass, public RegAllocBase { // state std::auto_ptr SpillerInstance; std::auto_ptr SA; + std::priority_queue > Queue; + IndexedMap Generation; // splitting state. @@ -91,13 +95,10 @@ public: /// RAGreedy analysis usage. virtual void getAnalysisUsage(AnalysisUsage &AU) const; - virtual void releaseMemory(); - virtual Spiller &spiller() { return *SpillerInstance; } - - virtual float getPriority(LiveInterval *LI); - + virtual void enqueue(LiveInterval *LI); + virtual LiveInterval *dequeue(); virtual unsigned selectOrSplit(LiveInterval&, SmallVectorImpl&); @@ -119,9 +120,12 @@ private: SlotIndex getPrevMappedIndex(const MachineInstr*); void calcPrevSlots(); unsigned nextSplitPoint(unsigned); + bool canEvictInterference(LiveInterval&, unsigned, unsigned, float&); - unsigned tryReassignOrEvict(LiveInterval&, AllocationOrder&, + unsigned tryReassign(LiveInterval&, AllocationOrder&, SmallVectorImpl&); + unsigned tryEvict(LiveInterval&, AllocationOrder&, + SmallVectorImpl&); unsigned tryRegionSplit(LiveInterval&, AllocationOrder&, SmallVectorImpl&); unsigned tryLocalSplit(LiveInterval&, AllocationOrder&, @@ -183,25 +187,42 @@ void RAGreedy::getAnalysisUsage(AnalysisUsage &AU) const { void RAGreedy::releaseMemory() { SpillerInstance.reset(0); + Generation.clear(); RegAllocBase::releaseMemory(); } -float RAGreedy::getPriority(LiveInterval *LI) { - float Priority = LI->weight; - - // Prioritize hinted registers so they are allocated first. - std::pair Hint; - if (Hint.first || Hint.second) { - // The hint can be target specific, a virtual register, or a physreg. - Priority *= 2; - - // Prefer physreg hints above anything else. - if (Hint.first == 0 && TargetRegisterInfo::isPhysicalRegister(Hint.second)) - Priority *= 2; - } - return Priority; +void RAGreedy::enqueue(LiveInterval *LI) { + // Prioritize live ranges by size, assigning larger ranges first. + // The queue holds (size, reg) pairs. + const unsigned Size = LI->getSize(); + const unsigned Reg = LI->reg; + assert(TargetRegisterInfo::isVirtualRegister(Reg) && + "Can only enqueue virtual registers"); + const unsigned Hint = VRM->getRegAllocPref(Reg); + unsigned Prio; + + Generation.grow(Reg); + if (++Generation[Reg] == 1) + // 1st generation ranges are handled first, long -> short. + Prio = (1u << 31) + Size; + else + // Repeat offenders are handled second, short -> long + Prio = (1u << 30) - Size; + + // Boost ranges that have a physical register hint. + if (TargetRegisterInfo::isPhysicalRegister(Hint)) + Prio |= (1u << 30); + + Queue.push(std::make_pair(Prio, Reg)); } +LiveInterval *RAGreedy::dequeue() { + if (Queue.empty()) + return 0; + LiveInterval *LI = &LIS->getInterval(Queue.top().second); + Queue.pop(); + return LI; +} //===----------------------------------------------------------------------===// // Register Reassignment @@ -230,8 +251,7 @@ LiveInterval *RAGreedy::getSingleInterference(LiveInterval &VirtReg, if (Q.checkInterference()) { if (Interference) return 0; - Q.collectInterferingVRegs(1); - if (!Q.seenAllInterferences()) + if (Q.collectInterferingVRegs(2) > 1) return 0; Interference = Q.interferingVRegs().front(); } @@ -276,21 +296,14 @@ bool RAGreedy::reassignVReg(LiveInterval &InterferingVReg, return false; } -/// tryReassignOrEvict - Try to reassign a single interferences to a different -/// physreg, or evict a single interference with a lower spill weight. +/// tryReassign - Try to reassign a single interference to a different physreg. /// @param VirtReg Currently unassigned virtual register. /// @param Order Physregs to try. /// @return Physreg to assign VirtReg, or 0. -unsigned RAGreedy::tryReassignOrEvict(LiveInterval &VirtReg, - AllocationOrder &Order, - SmallVectorImpl &NewVRegs){ +unsigned RAGreedy::tryReassign(LiveInterval &VirtReg, AllocationOrder &Order, + SmallVectorImpl &NewVRegs){ NamedRegionTimer T("Reassign", TimerGroupName, TimePassesIsEnabled); - // Keep track of the lightest single interference seen so far. - float BestWeight = VirtReg.weight; - LiveInterval *BestVirt = 0; - unsigned BestPhys = 0; - Order.rewind(); while (unsigned PhysReg = Order.next()) { LiveInterval *InterferingVReg = getSingleInterference(VirtReg, PhysReg); @@ -300,25 +313,92 @@ unsigned RAGreedy::tryReassignOrEvict(LiveInterval &VirtReg, continue; if (reassignVReg(*InterferingVReg, PhysReg)) return PhysReg; + } + return 0; +} + + +//===----------------------------------------------------------------------===// +// Interference eviction +//===----------------------------------------------------------------------===// + +/// canEvict - Return true if all interferences between VirtReg and PhysReg can +/// be evicted. Set maxWeight to the maximal spill weight of an interference. +bool RAGreedy::canEvictInterference(LiveInterval &VirtReg, unsigned PhysReg, + unsigned Size, float &MaxWeight) { + float Weight = 0; + for (const unsigned *AliasI = TRI->getOverlaps(PhysReg); *AliasI; ++AliasI) { + LiveIntervalUnion::Query &Q = query(VirtReg, *AliasI); + // If there is 10 or more interferences, chances are one is smaller. + if (Q.collectInterferingVRegs(10) >= 10) + return false; - // Cannot reassign, is this an eviction candidate? - if (InterferingVReg->weight < BestWeight) { - BestVirt = InterferingVReg; - BestPhys = PhysReg; - BestWeight = InterferingVReg->weight; + // CHeck if any interfering live range is shorter than VirtReg. + for (unsigned i = 0, e = Q.interferingVRegs().size(); i != e; ++i) { + LiveInterval *Intf = Q.interferingVRegs()[i]; + if (TargetRegisterInfo::isPhysicalRegister(Intf->reg)) + return false; + if (Intf->getSize() <= Size) + return false; + Weight = std::max(Weight, Intf->weight); } } + MaxWeight = Weight; + return true; +} + +/// tryEvict - Try to evict all interferences for a physreg. +/// @param VirtReg Currently unassigned virtual register. +/// @param Order Physregs to try. +/// @return Physreg to assign VirtReg, or 0. +unsigned RAGreedy::tryEvict(LiveInterval &VirtReg, + AllocationOrder &Order, + SmallVectorImpl &NewVRegs){ + NamedRegionTimer T("Evict", TimerGroupName, TimePassesIsEnabled); + + // We can only evict interference if all interfering registers are virtual and + // longer than VirtReg. + const unsigned Size = VirtReg.getSize(); + + // Keep track of the lightest single interference seen so far. + float BestWeight = 0; + unsigned BestPhys = 0; - // Nothing reassigned, can we evict a lighter single interference? - if (BestVirt) { - DEBUG(dbgs() << "evicting lighter " << *BestVirt << '\n'); - unassign(*BestVirt, VRM->getPhys(BestVirt->reg)); - ++NumEvicted; - NewVRegs.push_back(BestVirt); - return BestPhys; + Order.rewind(); + while (unsigned PhysReg = Order.next()) { + float Weight = 0; + if (!canEvictInterference(VirtReg, PhysReg, Size, Weight)) + continue; + + // This is an eviction candidate. + DEBUG(dbgs() << "max " << PrintReg(PhysReg, TRI) << " interference = " + << Weight << '\n'); + if (BestPhys && Weight >= BestWeight) + continue; + + // Best so far. + BestPhys = PhysReg; + BestWeight = Weight; + // Stop if the hint can be used. + if (Order.isHint(PhysReg)) + break; } - return 0; + if (!BestPhys) + return 0; + + DEBUG(dbgs() << "evicting " << PrintReg(BestPhys, TRI) << " interference\n"); + for (const unsigned *AliasI = TRI->getOverlaps(BestPhys); *AliasI; ++AliasI) { + LiveIntervalUnion::Query &Q = query(VirtReg, *AliasI); + assert(Q.seenAllInterferences() && "Didn't check all interfererences."); + for (unsigned i = 0, e = Q.interferingVRegs().size(); i != e; ++i) { + LiveInterval *Intf = Q.interferingVRegs()[i]; + unassign(*Intf, VRM->getPhys(Intf->reg)); + ++NumEvicted; + NewVRegs.push_back(Intf); + } + } + return BestPhys; } @@ -426,8 +506,13 @@ float RAGreedy::calcInterferenceInfo(LiveInterval &VirtReg, unsigned PhysReg) { if (!IntI.valid()) break; // Not live in, but before the first use. - if (IntI.start() < BI.FirstUse) + if (IntI.start() < BI.FirstUse) { BC.Entry = SpillPlacement::PrefSpill; + // If the block contains a kill from an earlier split, never split + // again in the same block. + if (!BI.LiveThrough && !SA->isOriginalEndpoint(BI.Kill)) + BC.Entry = SpillPlacement::MustSpill; + } } // Does interference overlap the uses in the entry segment @@ -458,8 +543,12 @@ float RAGreedy::calcInterferenceInfo(LiveInterval &VirtReg, unsigned PhysReg) { IntI.advanceTo(BI.LastUse); if (!IntI.valid()) break; - if (IntI.start() < Stop) + if (IntI.start() < Stop) { BC.Exit = SpillPlacement::PrefSpill; + // Avoid splitting twice in the same block. + if (!BI.LiveThrough && !SA->isOriginalEndpoint(BI.Def)) + BC.Exit = SpillPlacement::MustSpill; + } } } } @@ -1221,12 +1310,22 @@ unsigned RAGreedy::selectOrSplit(LiveInterval &VirtReg, return PhysReg; } - // Try to reassign interferences. - if (unsigned PhysReg = tryReassignOrEvict(VirtReg, Order, NewVRegs)) + if (unsigned PhysReg = tryReassign(VirtReg, Order, NewVRegs)) + return PhysReg; + + if (unsigned PhysReg = tryEvict(VirtReg, Order, NewVRegs)) return PhysReg; assert(NewVRegs.empty() && "Cannot append to existing NewVRegs"); + // The first time we see a live range, don't try to split or spill. + // Wait until the second time, when all smaller ranges have been allocated. + // This gives a better picture of the interference to split around. + if (Generation[VirtReg.reg] == 1) { + NewVRegs.push_back(&VirtReg); + return 0; + } + // Try splitting VirtReg or interferences. unsigned PhysReg = trySplit(VirtReg, Order, NewVRegs); if (PhysReg || !NewVRegs.empty()) diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 90356021f602..9cc70a30927d 100644 --- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -279,8 +279,8 @@ namespace { /// getShiftAmountTy - Returns a type large enough to hold any valid /// shift amount - before type legalization these can be huge. - EVT getShiftAmountTy() { - return LegalTypes ? TLI.getShiftAmountTy() : TLI.getPointerTy(); + EVT getShiftAmountTy(EVT LHSTy) { + return LegalTypes ? TLI.getShiftAmountTy(LHSTy) : TLI.getPointerTy(); } /// isTypeLegal - This method returns true if we are running before type @@ -670,7 +670,7 @@ SDValue DAGCombiner::PromoteOperand(SDValue Op, EVT PVT, bool &Replace) { if (LoadSDNode *LD = dyn_cast(Op)) { EVT MemVT = LD->getMemoryVT(); ISD::LoadExtType ExtType = ISD::isNON_EXTLoad(LD) - ? (TLI.isLoadExtLegal(ISD::ZEXTLOAD, MemVT) ? ISD::ZEXTLOAD + ? (TLI.isLoadExtLegal(ISD::ZEXTLOAD, MemVT) ? ISD::ZEXTLOAD : ISD::EXTLOAD) : LD->getExtensionType(); Replace = true; @@ -894,7 +894,7 @@ bool DAGCombiner::PromoteLoad(SDValue Op) { LoadSDNode *LD = cast(N); EVT MemVT = LD->getMemoryVT(); ISD::LoadExtType ExtType = ISD::isNON_EXTLoad(LD) - ? (TLI.isLoadExtLegal(ISD::ZEXTLOAD, MemVT) ? ISD::ZEXTLOAD + ? (TLI.isLoadExtLegal(ISD::ZEXTLOAD, MemVT) ? ISD::ZEXTLOAD : ISD::EXTLOAD) : LD->getExtensionType(); SDValue NewLD = DAG.getExtLoad(ExtType, dl, PVT, @@ -1521,7 +1521,7 @@ SDValue DAGCombiner::visitADDE(SDNode *N) { // Since it may not be valid to emit a fold to zero for vector initializers // check if we can before folding. static SDValue tryFoldToZero(DebugLoc DL, const TargetLowering &TLI, EVT VT, - SelectionDAG &DAG, bool LegalOperations) { + SelectionDAG &DAG, bool LegalOperations) { if (!VT.isVector()) { return DAG.getConstant(0, VT); } else if (!LegalOperations || TLI.isOperationLegal(ISD::BUILD_VECTOR, VT)) { @@ -1647,7 +1647,7 @@ SDValue DAGCombiner::visitMUL(SDNode *N) { if (N1C && N1C->getAPIntValue().isPowerOf2()) return DAG.getNode(ISD::SHL, N->getDebugLoc(), VT, N0, DAG.getConstant(N1C->getAPIntValue().logBase2(), - getShiftAmountTy())); + getShiftAmountTy(N0.getValueType()))); // fold (mul x, -(1 << c)) -> -(x << c) or (-x) << c if (N1C && (-N1C->getAPIntValue()).isPowerOf2()) { unsigned Log2Val = (-N1C->getAPIntValue()).logBase2(); @@ -1656,7 +1656,8 @@ SDValue DAGCombiner::visitMUL(SDNode *N) { return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT, DAG.getConstant(0, VT), DAG.getNode(ISD::SHL, N->getDebugLoc(), VT, N0, - DAG.getConstant(Log2Val, getShiftAmountTy()))); + DAG.getConstant(Log2Val, + getShiftAmountTy(N0.getValueType())))); } // (mul (shl X, c1), c2) -> (mul X, c2 << c1) if (N1C && N0.getOpcode() == ISD::SHL && @@ -1753,18 +1754,18 @@ SDValue DAGCombiner::visitSDIV(SDNode *N) { // Splat the sign bit into the register SDValue SGN = DAG.getNode(ISD::SRA, N->getDebugLoc(), VT, N0, DAG.getConstant(VT.getSizeInBits()-1, - getShiftAmountTy())); + getShiftAmountTy(N0.getValueType()))); AddToWorkList(SGN.getNode()); // Add (N0 < 0) ? abs2 - 1 : 0; SDValue SRL = DAG.getNode(ISD::SRL, N->getDebugLoc(), VT, SGN, DAG.getConstant(VT.getSizeInBits() - lg2, - getShiftAmountTy())); + getShiftAmountTy(SGN.getValueType()))); SDValue ADD = DAG.getNode(ISD::ADD, N->getDebugLoc(), VT, N0, SRL); AddToWorkList(SRL.getNode()); AddToWorkList(ADD.getNode()); // Divide by pow2 SDValue SRA = DAG.getNode(ISD::SRA, N->getDebugLoc(), VT, ADD, - DAG.getConstant(lg2, getShiftAmountTy())); + DAG.getConstant(lg2, getShiftAmountTy(ADD.getValueType()))); // If we're dividing by a positive value, we're done. Otherwise, we must // negate the result. @@ -1814,7 +1815,7 @@ SDValue DAGCombiner::visitUDIV(SDNode *N) { if (N1C && N1C->getAPIntValue().isPowerOf2()) return DAG.getNode(ISD::SRL, N->getDebugLoc(), VT, N0, DAG.getConstant(N1C->getAPIntValue().logBase2(), - getShiftAmountTy())); + getShiftAmountTy(N0.getValueType()))); // fold (udiv x, (shl c, y)) -> x >>u (log2(c)+y) iff c is power of 2 if (N1.getOpcode() == ISD::SHL) { if (ConstantSDNode *SHC = dyn_cast(N1.getOperand(0))) { @@ -1955,7 +1956,7 @@ SDValue DAGCombiner::visitMULHS(SDNode *N) { if (N1C && N1C->getAPIntValue() == 1) return DAG.getNode(ISD::SRA, N->getDebugLoc(), N0.getValueType(), N0, DAG.getConstant(N0.getValueType().getSizeInBits() - 1, - getShiftAmountTy())); + getShiftAmountTy(N0.getValueType()))); // fold (mulhs x, undef) -> 0 if (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF) return DAG.getConstant(0, VT); @@ -1971,11 +1972,11 @@ SDValue DAGCombiner::visitMULHS(SDNode *N) { N1 = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N1); N1 = DAG.getNode(ISD::MUL, DL, NewVT, N0, N1); N1 = DAG.getNode(ISD::SRL, DL, NewVT, N1, - DAG.getConstant(SimpleSize, getShiftAmountTy())); + DAG.getConstant(SimpleSize, getShiftAmountTy(N1.getValueType()))); return DAG.getNode(ISD::TRUNCATE, DL, VT, N1); } } - + return SDValue(); } @@ -2007,11 +2008,11 @@ SDValue DAGCombiner::visitMULHU(SDNode *N) { N1 = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N1); N1 = DAG.getNode(ISD::MUL, DL, NewVT, N0, N1); N1 = DAG.getNode(ISD::SRL, DL, NewVT, N1, - DAG.getConstant(SimpleSize, getShiftAmountTy())); + DAG.getConstant(SimpleSize, getShiftAmountTy(N1.getValueType()))); return DAG.getNode(ISD::TRUNCATE, DL, VT, N1); } } - + return SDValue(); } @@ -2090,14 +2091,14 @@ SDValue DAGCombiner::visitSMUL_LOHI(SDNode *N) { Lo = DAG.getNode(ISD::MUL, DL, NewVT, Lo, Hi); // Compute the high part as N1. Hi = DAG.getNode(ISD::SRL, DL, NewVT, Lo, - DAG.getConstant(SimpleSize, getShiftAmountTy())); + DAG.getConstant(SimpleSize, getShiftAmountTy(Lo.getValueType()))); Hi = DAG.getNode(ISD::TRUNCATE, DL, VT, Hi); // Compute the low part as N0. Lo = DAG.getNode(ISD::TRUNCATE, DL, VT, Lo); return CombineTo(N, Lo, Hi); } } - + return SDValue(); } @@ -2107,7 +2108,7 @@ SDValue DAGCombiner::visitUMUL_LOHI(SDNode *N) { EVT VT = N->getValueType(0); DebugLoc DL = N->getDebugLoc(); - + // If the type twice as wide is legal, transform the mulhu to a wider multiply // plus a shift. if (VT.isSimple() && !VT.isVector()) { @@ -2120,14 +2121,14 @@ SDValue DAGCombiner::visitUMUL_LOHI(SDNode *N) { Lo = DAG.getNode(ISD::MUL, DL, NewVT, Lo, Hi); // Compute the high part as N1. Hi = DAG.getNode(ISD::SRL, DL, NewVT, Lo, - DAG.getConstant(SimpleSize, getShiftAmountTy())); + DAG.getConstant(SimpleSize, getShiftAmountTy(Lo.getValueType()))); Hi = DAG.getNode(ISD::TRUNCATE, DL, VT, Hi); // Compute the low part as N0. Lo = DAG.getNode(ISD::TRUNCATE, DL, VT, Lo); return CombineTo(N, Lo, Hi); } } - + return SDValue(); } @@ -3004,7 +3005,7 @@ SDValue DAGCombiner::visitSHL(SDNode *N) { N0.getOpcode() == ISD::SIGN_EXTEND) && N0.getOperand(0).getOpcode() == ISD::SHL && isa(N0.getOperand(0)->getOperand(1))) { - uint64_t c1 = + uint64_t c1 = cast(N0.getOperand(0)->getOperand(1))->getZExtValue(); uint64_t c2 = N1C->getZExtValue(); EVT InnerShiftVT = N0.getOperand(0).getValueType(); @@ -3133,7 +3134,8 @@ SDValue DAGCombiner::visitSRA(SDNode *N) { TLI.isOperationLegalOrCustom(ISD::TRUNCATE, VT) && TLI.isTruncateFree(VT, TruncVT)) { - SDValue Amt = DAG.getConstant(ShiftAmt, getShiftAmountTy()); + SDValue Amt = DAG.getConstant(ShiftAmt, + getShiftAmountTy(N0.getOperand(0).getValueType())); SDValue Shift = DAG.getNode(ISD::SRL, N0.getDebugLoc(), VT, N0.getOperand(0), Amt); SDValue Trunc = DAG.getNode(ISD::TRUNCATE, N0.getDebugLoc(), TruncVT, @@ -3180,7 +3182,7 @@ SDValue DAGCombiner::visitSRA(SDNode *N) { LargeShiftAmt->getZExtValue()) { SDValue Amt = DAG.getConstant(LargeShiftAmt->getZExtValue() + N1C->getZExtValue(), - getShiftAmountTy()); + getShiftAmountTy(N0.getOperand(0).getOperand(0).getValueType())); SDValue SRA = DAG.getNode(ISD::SRA, N->getDebugLoc(), LargeVT, N0.getOperand(0).getOperand(0), Amt); return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, SRA); @@ -3245,7 +3247,7 @@ SDValue DAGCombiner::visitSRL(SDNode *N) { if (N1C && N0.getOpcode() == ISD::TRUNCATE && N0.getOperand(0).getOpcode() == ISD::SRL && isa(N0.getOperand(0)->getOperand(1))) { - uint64_t c1 = + uint64_t c1 = cast(N0.getOperand(0)->getOperand(1))->getZExtValue(); uint64_t c2 = N1C->getZExtValue(); EVT InnerShiftVT = N0.getOperand(0).getValueType(); @@ -3256,7 +3258,7 @@ SDValue DAGCombiner::visitSRL(SDNode *N) { if (c1 + c2 >= InnerShiftSize) return DAG.getConstant(0, VT); return DAG.getNode(ISD::TRUNCATE, N0->getDebugLoc(), VT, - DAG.getNode(ISD::SRL, N0->getDebugLoc(), InnerShiftVT, + DAG.getNode(ISD::SRL, N0->getDebugLoc(), InnerShiftVT, N0.getOperand(0)->getOperand(0), DAG.getConstant(c1 + c2, ShiftCountVT))); } @@ -3320,7 +3322,7 @@ SDValue DAGCombiner::visitSRL(SDNode *N) { if (ShAmt) { Op = DAG.getNode(ISD::SRL, N0.getDebugLoc(), VT, Op, - DAG.getConstant(ShAmt, getShiftAmountTy())); + DAG.getConstant(ShAmt, getShiftAmountTy(Op.getValueType()))); AddToWorkList(Op.getNode()); } @@ -3685,7 +3687,9 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) { } // fold (sext (load x)) -> (sext (truncate (sextload x))) - if (ISD::isNON_EXTLoad(N0.getNode()) && + // None of the supported targets knows how to perform load and sign extend + // in one instruction. We only perform this transformation on scalars. + if (ISD::isNON_EXTLoad(N0.getNode()) && !VT.isVector() && ((!LegalOperations && !cast(N0)->isVolatile()) || TLI.isLoadExtLegal(ISD::SEXTLOAD, N0.getValueType()))) { bool DoXform = true; @@ -3887,7 +3891,9 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) { } // fold (zext (load x)) -> (zext (truncate (zextload x))) - if (ISD::isNON_EXTLoad(N0.getNode()) && + // None of the supported targets knows how to perform load and vector_zext + // in one instruction. We only perform this transformation on scalar zext. + if (ISD::isNON_EXTLoad(N0.getNode()) && !VT.isVector() && ((!LegalOperations && !cast(N0)->isVolatile()) || TLI.isLoadExtLegal(ISD::ZEXTLOAD, N0.getValueType()))) { bool DoXform = true; @@ -4021,11 +4027,11 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) { } DebugLoc DL = N->getDebugLoc(); - - // Ensure that the shift amount is wide enough for the shifted value. + + // Ensure that the shift amount is wide enough for the shifted value. if (VT.getSizeInBits() >= 256) ShAmt = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, ShAmt); - + return DAG.getNode(N0.getOpcode(), DL, VT, DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0)), ShAmt); @@ -4094,7 +4100,9 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) { } // fold (aext (load x)) -> (aext (truncate (extload x))) - if (ISD::isNON_EXTLoad(N0.getNode()) && + // None of the supported targets knows how to perform load and any_ext + // in one instruction. We only perform this transformation on scalars. + if (ISD::isNON_EXTLoad(N0.getNode()) && !VT.isVector() && ((!LegalOperations && !cast(N0)->isVolatile()) || TLI.isLoadExtLegal(ISD::EXTLOAD, N0.getValueType()))) { bool DoXform = true; @@ -4272,12 +4280,12 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) { return SDValue(); unsigned EVTBits = ExtVT.getSizeInBits(); - + // Do not generate loads of non-round integer types since these can // be expensive (and would be wrong if the type is not byte sized). if (!ExtVT.isRound()) return SDValue(); - + unsigned ShAmt = 0; if (N0.getOpcode() == ISD::SRL && N0.hasOneUse()) { if (ConstantSDNode *N01 = dyn_cast(N0.getOperand(1))) { @@ -4292,7 +4300,7 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) { // At this point, we must have a load or else we can't do the transform. if (!isa(N0)) return SDValue(); - + // If the shift amount is larger than the input type then we're not // accessing any of the loaded bytes. If the load was a zextload/extload // then the result of the shift+trunc is zero/undef (handled elsewhere). @@ -4313,18 +4321,18 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) { N0 = N0.getOperand(0); } } - + // If we haven't found a load, we can't narrow it. Don't transform one with // multiple uses, this would require adding a new load. if (!isa(N0) || !N0.hasOneUse() || // Don't change the width of a volatile load. cast(N0)->isVolatile()) return SDValue(); - + // Verify that we are actually reducing a load width here. if (cast(N0)->getMemoryVT().getSizeInBits() < EVTBits) return SDValue(); - + LoadSDNode *LN0 = cast(N0); EVT PtrType = N0.getOperand(1).getValueType(); @@ -4362,7 +4370,7 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) { // Shift the result left, if we've swallowed a left shift. SDValue Result = Load; if (ShLeftAmt != 0) { - EVT ShImmTy = getShiftAmountTy(); + EVT ShImmTy = getShiftAmountTy(Result.getValueType()); if (!isUIntN(ShImmTy.getSizeInBits(), ShLeftAmt)) ShImmTy = VT; Result = DAG.getNode(ISD::SHL, N0.getDebugLoc(), VT, @@ -4504,14 +4512,17 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) { } // See if we can simplify the input to this truncate through knowledge that - // only the low bits are being used. For example "trunc (or (shl x, 8), y)" - // -> trunc y - SDValue Shorter = - GetDemandedBits(N0, APInt::getLowBitsSet(N0.getValueSizeInBits(), - VT.getSizeInBits())); - if (Shorter.getNode()) - return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, Shorter); - + // only the low bits are being used. + // For example "trunc (or (shl x, 8), y)" // -> trunc y + // Currenly we only perform this optimization on scalars because vectors + // may have different active low bits. + if (!VT.isVector()) { + SDValue Shorter = + GetDemandedBits(N0, APInt::getLowBitsSet(N0.getValueSizeInBits(), + VT.getSizeInBits())); + if (Shorter.getNode()) + return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, Shorter); + } // fold (truncate (load x)) -> (smaller load x) // fold (truncate (srl (load x), c)) -> (smaller load (x+c/evtbits)) if (!LegalTypes || TLI.isTypeDesirableForOp(N0.getOpcode(), VT)) { @@ -5975,7 +5986,8 @@ ShrinkLoadReplaceStoreWithStore(const std::pair &MaskInfo, // shifted by ByteShift and truncated down to NumBytes. if (ByteShift) IVal = DAG.getNode(ISD::SRL, IVal->getDebugLoc(), IVal.getValueType(), IVal, - DAG.getConstant(ByteShift*8, DC->getShiftAmountTy())); + DAG.getConstant(ByteShift*8, + DC->getShiftAmountTy(IVal.getValueType()))); // Figure out the offset for the store and the alignment of the access. unsigned StOffset; @@ -6390,7 +6402,7 @@ SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) { EVT VT = InVec.getValueType(); - // If we can't generate a legal BUILD_VECTOR, exit + // If we can't generate a legal BUILD_VECTOR, exit if (LegalOperations && !TLI.isOperationLegal(ISD::BUILD_VECTOR, VT)) return SDValue(); @@ -7098,7 +7110,8 @@ SDValue DAGCombiner::SimplifySelectCC(DebugLoc DL, SDValue N0, SDValue N1, if (N2C && ((N2C->getAPIntValue() & (N2C->getAPIntValue()-1)) == 0)) { unsigned ShCtV = N2C->getAPIntValue().logBase2(); ShCtV = XType.getSizeInBits()-ShCtV-1; - SDValue ShCt = DAG.getConstant(ShCtV, getShiftAmountTy()); + SDValue ShCt = DAG.getConstant(ShCtV, + getShiftAmountTy(N0.getValueType())); SDValue Shift = DAG.getNode(ISD::SRL, N0.getDebugLoc(), XType, N0, ShCt); AddToWorkList(Shift.getNode()); @@ -7114,7 +7127,7 @@ SDValue DAGCombiner::SimplifySelectCC(DebugLoc DL, SDValue N0, SDValue N1, SDValue Shift = DAG.getNode(ISD::SRA, N0.getDebugLoc(), XType, N0, DAG.getConstant(XType.getSizeInBits()-1, - getShiftAmountTy())); + getShiftAmountTy(N0.getValueType()))); AddToWorkList(Shift.getNode()); if (XType.bitsGT(AType)) { @@ -7142,13 +7155,15 @@ SDValue DAGCombiner::SimplifySelectCC(DebugLoc DL, SDValue N0, SDValue N1, // Shift the tested bit over the sign bit. APInt AndMask = ConstAndRHS->getAPIntValue(); SDValue ShlAmt = - DAG.getConstant(AndMask.countLeadingZeros(), getShiftAmountTy()); + DAG.getConstant(AndMask.countLeadingZeros(), + getShiftAmountTy(AndLHS.getValueType())); SDValue Shl = DAG.getNode(ISD::SHL, N0.getDebugLoc(), VT, AndLHS, ShlAmt); // Now arithmetic right shift it all the way over, so the result is either // all-ones, or zero. SDValue ShrAmt = - DAG.getConstant(AndMask.getBitWidth()-1, getShiftAmountTy()); + DAG.getConstant(AndMask.getBitWidth()-1, + getShiftAmountTy(Shl.getValueType())); SDValue Shr = DAG.getNode(ISD::SRA, N0.getDebugLoc(), VT, Shl, ShrAmt); return DAG.getNode(ISD::AND, DL, VT, Shr, N3); @@ -7192,7 +7207,7 @@ SDValue DAGCombiner::SimplifySelectCC(DebugLoc DL, SDValue N0, SDValue N1, // shl setcc result by log2 n2c return DAG.getNode(ISD::SHL, DL, N2.getValueType(), Temp, DAG.getConstant(N2C->getAPIntValue().logBase2(), - getShiftAmountTy())); + getShiftAmountTy(Temp.getValueType()))); } // Check to see if this is the equivalent of setcc @@ -7215,7 +7230,7 @@ SDValue DAGCombiner::SimplifySelectCC(DebugLoc DL, SDValue N0, SDValue N1, SDValue Ctlz = DAG.getNode(ISD::CTLZ, N0.getDebugLoc(), XType, N0); return DAG.getNode(ISD::SRL, DL, XType, Ctlz, DAG.getConstant(Log2_32(XType.getSizeInBits()), - getShiftAmountTy())); + getShiftAmountTy(Ctlz.getValueType()))); } // fold (setgt X, 0) -> (srl (and (-X, ~X), size(X)-1)) if (N1C && N1C->isNullValue() && CC == ISD::SETGT) { @@ -7225,13 +7240,13 @@ SDValue DAGCombiner::SimplifySelectCC(DebugLoc DL, SDValue N0, SDValue N1, return DAG.getNode(ISD::SRL, DL, XType, DAG.getNode(ISD::AND, DL, XType, NegN0, NotN0), DAG.getConstant(XType.getSizeInBits()-1, - getShiftAmountTy())); + getShiftAmountTy(XType))); } // fold (setgt X, -1) -> (xor (srl (X, size(X)-1), 1)) if (N1C && N1C->isAllOnesValue() && CC == ISD::SETGT) { SDValue Sign = DAG.getNode(ISD::SRL, N0.getDebugLoc(), XType, N0, DAG.getConstant(XType.getSizeInBits()-1, - getShiftAmountTy())); + getShiftAmountTy(N0.getValueType()))); return DAG.getNode(ISD::XOR, DL, XType, Sign, DAG.getConstant(1, XType)); } } @@ -7258,7 +7273,7 @@ SDValue DAGCombiner::SimplifySelectCC(DebugLoc DL, SDValue N0, SDValue N1, SDValue Shift = DAG.getNode(ISD::SRA, N0.getDebugLoc(), XType, N0, DAG.getConstant(XType.getSizeInBits()-1, - getShiftAmountTy())); + getShiftAmountTy(N0.getValueType()))); SDValue Add = DAG.getNode(ISD::ADD, N0.getDebugLoc(), XType, N0, Shift); AddToWorkList(Shift.getNode()); diff --git a/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp b/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp index 98582ba99f14..2ae3286829dd 100644 --- a/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp +++ b/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp @@ -219,6 +219,7 @@ void FunctionLoweringInfo::clear() { CatchInfoFound.clear(); #endif LiveOutRegInfo.clear(); + VisitedBBs.clear(); ArgDbgValues.clear(); ByValArgFrameIndexMap.clear(); RegFixups.clear(); @@ -254,6 +255,123 @@ unsigned FunctionLoweringInfo::CreateRegs(const Type *Ty) { return FirstReg; } +/// GetLiveOutRegInfo - Gets LiveOutInfo for a register, returning NULL if the +/// register is a PHI destination and the PHI's LiveOutInfo is not valid. If +/// the register's LiveOutInfo is for a smaller bit width, it is extended to +/// the larger bit width by zero extension. The bit width must be no smaller +/// than the LiveOutInfo's existing bit width. +const FunctionLoweringInfo::LiveOutInfo * +FunctionLoweringInfo::GetLiveOutRegInfo(unsigned Reg, unsigned BitWidth) { + if (!LiveOutRegInfo.inBounds(Reg)) + return NULL; + + LiveOutInfo *LOI = &LiveOutRegInfo[Reg]; + if (!LOI->IsValid) + return NULL; + + if (BitWidth > LOI->KnownZero.getBitWidth()) { + LOI->NumSignBits = 1; + LOI->KnownZero = LOI->KnownZero.zextOrTrunc(BitWidth); + LOI->KnownOne = LOI->KnownOne.zextOrTrunc(BitWidth); + } + + return LOI; +} + +/// ComputePHILiveOutRegInfo - Compute LiveOutInfo for a PHI's destination +/// register based on the LiveOutInfo of its operands. +void FunctionLoweringInfo::ComputePHILiveOutRegInfo(const PHINode *PN) { + const Type *Ty = PN->getType(); + if (!Ty->isIntegerTy() || Ty->isVectorTy()) + return; + + SmallVector ValueVTs; + ComputeValueVTs(TLI, Ty, ValueVTs); + assert(ValueVTs.size() == 1 && + "PHIs with non-vector integer types should have a single VT."); + EVT IntVT = ValueVTs[0]; + + if (TLI.getNumRegisters(PN->getContext(), IntVT) != 1) + return; + IntVT = TLI.getTypeToTransformTo(PN->getContext(), IntVT); + unsigned BitWidth = IntVT.getSizeInBits(); + + unsigned DestReg = ValueMap[PN]; + if (!TargetRegisterInfo::isVirtualRegister(DestReg)) + return; + LiveOutRegInfo.grow(DestReg); + LiveOutInfo &DestLOI = LiveOutRegInfo[DestReg]; + + Value *V = PN->getIncomingValue(0); + if (isa(V) || isa(V)) { + DestLOI.NumSignBits = 1; + APInt Zero(BitWidth, 0); + DestLOI.KnownZero = Zero; + DestLOI.KnownOne = Zero; + return; + } + + if (ConstantInt *CI = dyn_cast(V)) { + APInt Val = CI->getValue().zextOrTrunc(BitWidth); + DestLOI.NumSignBits = Val.getNumSignBits(); + DestLOI.KnownZero = ~Val; + DestLOI.KnownOne = Val; + } else { + assert(ValueMap.count(V) && "V should have been placed in ValueMap when its" + "CopyToReg node was created."); + unsigned SrcReg = ValueMap[V]; + if (!TargetRegisterInfo::isVirtualRegister(SrcReg)) { + DestLOI.IsValid = false; + return; + } + const LiveOutInfo *SrcLOI = GetLiveOutRegInfo(SrcReg, BitWidth); + if (!SrcLOI) { + DestLOI.IsValid = false; + return; + } + DestLOI = *SrcLOI; + } + + assert(DestLOI.KnownZero.getBitWidth() == BitWidth && + DestLOI.KnownOne.getBitWidth() == BitWidth && + "Masks should have the same bit width as the type."); + + for (unsigned i = 1, e = PN->getNumIncomingValues(); i != e; ++i) { + Value *V = PN->getIncomingValue(i); + if (isa(V) || isa(V)) { + DestLOI.NumSignBits = 1; + APInt Zero(BitWidth, 0); + DestLOI.KnownZero = Zero; + DestLOI.KnownOne = Zero; + return; + } + + if (ConstantInt *CI = dyn_cast(V)) { + APInt Val = CI->getValue().zextOrTrunc(BitWidth); + DestLOI.NumSignBits = std::min(DestLOI.NumSignBits, Val.getNumSignBits()); + DestLOI.KnownZero &= ~Val; + DestLOI.KnownOne &= Val; + continue; + } + + assert(ValueMap.count(V) && "V should have been placed in ValueMap when " + "its CopyToReg node was created."); + unsigned SrcReg = ValueMap[V]; + if (!TargetRegisterInfo::isVirtualRegister(SrcReg)) { + DestLOI.IsValid = false; + return; + } + const LiveOutInfo *SrcLOI = GetLiveOutRegInfo(SrcReg, BitWidth); + if (!SrcLOI) { + DestLOI.IsValid = false; + return; + } + DestLOI.NumSignBits = std::min(DestLOI.NumSignBits, SrcLOI->NumSignBits); + DestLOI.KnownZero &= SrcLOI->KnownZero; + DestLOI.KnownOne &= SrcLOI->KnownOne; + } +} + /// setByValArgumentFrameIndex - Record frame index for the byval /// argument. This overrides previous frame index entry for this argument, /// if any. diff --git a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp index 49c862ce3e0b..f08528fe2dc3 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -87,7 +87,7 @@ class SelectionDAGLegalize { // If someone requests legalization of the new node, return itself. if (From != To) LegalizedNodes.insert(std::make_pair(To, To)); - + // Transfer SDDbgValues. DAG.TransferDbgValues(From, To); } @@ -498,7 +498,8 @@ SDValue ExpandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG, int IncrementSize = NumBits / 8; // Divide the stored value in two parts. - SDValue ShiftAmount = DAG.getConstant(NumBits, TLI.getShiftAmountTy()); + SDValue ShiftAmount = DAG.getConstant(NumBits, + TLI.getShiftAmountTy(Val.getValueType())); SDValue Lo = Val; SDValue Hi = DAG.getNode(ISD::SRL, dl, VT, Val, ShiftAmount); @@ -645,7 +646,8 @@ SDValue ExpandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG, } // aggregate the two parts - SDValue ShiftAmount = DAG.getConstant(NumBits, TLI.getShiftAmountTy()); + SDValue ShiftAmount = DAG.getConstant(NumBits, + TLI.getShiftAmountTy(Hi.getValueType())); SDValue Result = DAG.getNode(ISD::SHL, dl, VT, Hi, ShiftAmount); Result = DAG.getNode(ISD::OR, dl, VT, Result, Lo); @@ -1264,7 +1266,8 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { // Move the top bits to the right place. Hi = DAG.getNode(ISD::SHL, dl, Hi.getValueType(), Hi, - DAG.getConstant(RoundWidth, TLI.getShiftAmountTy())); + DAG.getConstant(RoundWidth, + TLI.getShiftAmountTy(Hi.getValueType()))); // Join the hi and lo parts. Result = DAG.getNode(ISD::OR, dl, Node->getValueType(0), Lo, Hi); @@ -1293,7 +1296,8 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { // Move the top bits to the right place. Hi = DAG.getNode(ISD::SHL, dl, Hi.getValueType(), Hi, - DAG.getConstant(ExtraWidth, TLI.getShiftAmountTy())); + DAG.getConstant(ExtraWidth, + TLI.getShiftAmountTy(Hi.getValueType()))); // Join the hi and lo parts. Result = DAG.getNode(ISD::OR, dl, Node->getValueType(0), Lo, Hi); @@ -1482,7 +1486,8 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { Tmp2 = DAG.getNode(ISD::ADD, dl, Tmp2.getValueType(), Tmp2, DAG.getIntPtrConstant(IncrementSize)); Hi = DAG.getNode(ISD::SRL, dl, Tmp3.getValueType(), Tmp3, - DAG.getConstant(RoundWidth, TLI.getShiftAmountTy())); + DAG.getConstant(RoundWidth, + TLI.getShiftAmountTy(Tmp3.getValueType()))); Hi = DAG.getTruncStore(Tmp1, dl, Hi, Tmp2, ST->getPointerInfo().getWithOffset(IncrementSize), ExtraVT, isVolatile, isNonTemporal, @@ -1492,7 +1497,8 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { // TRUNCSTORE:i24 X -> TRUNCSTORE:i16 (srl X, 8), TRUNCSTORE@+2:i8 X // Store the top RoundWidth bits. Hi = DAG.getNode(ISD::SRL, dl, Tmp3.getValueType(), Tmp3, - DAG.getConstant(ExtraWidth, TLI.getShiftAmountTy())); + DAG.getConstant(ExtraWidth, + TLI.getShiftAmountTy(Tmp3.getValueType()))); Hi = DAG.getTruncStore(Tmp1, dl, Hi, Tmp2, ST->getPointerInfo(), RoundVT, isVolatile, isNonTemporal, Alignment); @@ -1727,7 +1733,8 @@ SDValue SelectionDAGLegalize::ExpandFCOPYSIGN(SDNode* Node) { assert(BitShift < LoadTy.getSizeInBits() && "Pointer advanced wrong?"); if (BitShift) SignBit = DAG.getNode(ISD::SHL, dl, LoadTy, SignBit, - DAG.getConstant(BitShift,TLI.getShiftAmountTy())); + DAG.getConstant(BitShift, + TLI.getShiftAmountTy(SignBit.getValueType()))); } } // Now get the sign bit proper, by seeing whether the value is negative. @@ -2207,7 +2214,8 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned, if (!isSigned) { SDValue Fast = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::f32, Op0); - SDValue ShiftConst = DAG.getConstant(1, TLI.getShiftAmountTy()); + SDValue ShiftConst = + DAG.getConstant(1, TLI.getShiftAmountTy(Op0.getValueType())); SDValue Shr = DAG.getNode(ISD::SRL, dl, MVT::i64, Op0, ShiftConst); SDValue AndConst = DAG.getConstant(1, MVT::i64); SDValue And = DAG.getNode(ISD::AND, dl, MVT::i64, Op0, AndConst); @@ -2226,7 +2234,6 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned, } // Otherwise, implement the fully general conversion. - EVT SHVT = TLI.getShiftAmountTy(); SDValue And = DAG.getNode(ISD::AND, dl, MVT::i64, Op0, DAG.getConstant(UINT64_C(0xfffffffffffff800), MVT::i64)); @@ -2241,6 +2248,7 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned, Op0, DAG.getConstant(UINT64_C(0x0020000000000000), MVT::i64), ISD::SETUGE); SDValue Sel2 = DAG.getNode(ISD::SELECT, dl, MVT::i64, Ge, Sel, Op0); + EVT SHVT = TLI.getShiftAmountTy(Sel2.getValueType()); SDValue Sh = DAG.getNode(ISD::SRL, dl, MVT::i64, Sel2, DAG.getConstant(32, SHVT)); @@ -2387,7 +2395,7 @@ SDValue SelectionDAGLegalize::PromoteLegalFP_TO_INT(SDValue LegalOp, /// SDValue SelectionDAGLegalize::ExpandBSWAP(SDValue Op, DebugLoc dl) { EVT VT = Op.getValueType(); - EVT SHVT = TLI.getShiftAmountTy(); + EVT SHVT = TLI.getShiftAmountTy(VT); SDValue Tmp1, Tmp2, Tmp3, Tmp4, Tmp5, Tmp6, Tmp7, Tmp8; switch (VT.getSimpleVT().SimpleTy) { default: assert(0 && "Unhandled Expand type in BSWAP!"); @@ -2450,7 +2458,7 @@ SDValue SelectionDAGLegalize::ExpandBitCount(unsigned Opc, SDValue Op, default: assert(0 && "Cannot expand this yet!"); case ISD::CTPOP: { EVT VT = Op.getValueType(); - EVT ShVT = TLI.getShiftAmountTy(); + EVT ShVT = TLI.getShiftAmountTy(VT); unsigned Len = VT.getSizeInBits(); assert(VT.isInteger() && Len <= 128 && Len % 8 == 0 && @@ -2487,7 +2495,7 @@ SDValue SelectionDAGLegalize::ExpandBitCount(unsigned Opc, SDValue Op, Op = DAG.getNode(ISD::SRL, dl, VT, DAG.getNode(ISD::MUL, dl, VT, Op, Mask01), DAG.getConstant(Len - 8, ShVT)); - + return Op; } case ISD::CTLZ: { @@ -2501,7 +2509,7 @@ SDValue SelectionDAGLegalize::ExpandBitCount(unsigned Opc, SDValue Op, // // but see also: http://www.hackersdelight.org/HDcode/nlz.cc EVT VT = Op.getValueType(); - EVT ShVT = TLI.getShiftAmountTy(); + EVT ShVT = TLI.getShiftAmountTy(VT); unsigned len = VT.getSizeInBits(); for (unsigned i = 0; (1U << i) <= (len / 2); ++i) { SDValue Tmp3 = DAG.getConstant(1ULL << i, ShVT); @@ -2737,7 +2745,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node, // SAR. However, it is doubtful that any exist. EVT ExtraVT = cast(Node->getOperand(1))->getVT(); EVT VT = Node->getValueType(0); - EVT ShiftAmountTy = TLI.getShiftAmountTy(); + EVT ShiftAmountTy = TLI.getShiftAmountTy(VT); if (VT.isVector()) ShiftAmountTy = VT; unsigned BitsDiff = VT.getScalarType().getSizeInBits() - @@ -2901,7 +2909,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node, // 1 -> Hi Tmp1 = DAG.getNode(ISD::SRL, dl, OpTy, Node->getOperand(0), DAG.getConstant(OpTy.getSizeInBits()/2, - TLI.getShiftAmountTy())); + TLI.getShiftAmountTy(Node->getOperand(0).getValueType()))); Tmp1 = DAG.getNode(ISD::TRUNCATE, dl, Node->getValueType(0), Tmp1); } else { // 0 -> Lo @@ -3260,7 +3268,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node, assert(LC != RTLIB::UNKNOWN_LIBCALL && "Cannot expand this operation!"); LHS = DAG.getNode(Ops[isSigned][2], dl, WideVT, LHS); RHS = DAG.getNode(Ops[isSigned][2], dl, WideVT, RHS); - + SDValue Ret = ExpandLibCall(LC, Node, isSigned); BottomHalf = DAG.getNode(ISD::TRUNCATE, dl, VT, Ret); TopHalf = DAG.getNode(ISD::SRL, dl, Ret.getValueType(), Ret, @@ -3268,7 +3276,8 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node, TopHalf = DAG.getNode(ISD::TRUNCATE, dl, VT, TopHalf); } if (isSigned) { - Tmp1 = DAG.getConstant(VT.getSizeInBits() - 1, TLI.getShiftAmountTy()); + Tmp1 = DAG.getConstant(VT.getSizeInBits() - 1, + TLI.getShiftAmountTy(BottomHalf.getValueType())); Tmp1 = DAG.getNode(ISD::SRA, dl, VT, BottomHalf, Tmp1); TopHalf = DAG.getSetCC(dl, TLI.getSetCCResultType(VT), TopHalf, Tmp1, ISD::SETNE); @@ -3286,7 +3295,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node, Tmp2 = DAG.getNode(ISD::ANY_EXTEND, dl, PairTy, Node->getOperand(1)); Tmp2 = DAG.getNode(ISD::SHL, dl, PairTy, Tmp2, DAG.getConstant(PairTy.getSizeInBits()/2, - TLI.getShiftAmountTy())); + TLI.getShiftAmountTy(PairTy))); Results.push_back(DAG.getNode(ISD::OR, dl, PairTy, Tmp1, Tmp2)); break; } @@ -3464,7 +3473,7 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node, Tmp1 = DAG.getNode(ISD::ZERO_EXTEND, dl, NVT, Node->getOperand(0)); Tmp1 = DAG.getNode(ISD::BSWAP, dl, NVT, Tmp1); Tmp1 = DAG.getNode(ISD::SRL, dl, NVT, Tmp1, - DAG.getConstant(DiffBits, TLI.getShiftAmountTy())); + DAG.getConstant(DiffBits, TLI.getShiftAmountTy(NVT))); Results.push_back(Tmp1); break; } diff --git a/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp index 27752123aac4..27a466b3a928 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp @@ -177,25 +177,27 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FCOPYSIGN(SDNode *N) { // First get the sign bit of second operand. SDValue SignBit = DAG.getNode(ISD::SHL, dl, RVT, DAG.getConstant(1, RVT), DAG.getConstant(RSize - 1, - TLI.getShiftAmountTy())); + TLI.getShiftAmountTy(RVT))); SignBit = DAG.getNode(ISD::AND, dl, RVT, RHS, SignBit); // Shift right or sign-extend it if the two operands have different types. int SizeDiff = RVT.getSizeInBits() - LVT.getSizeInBits(); if (SizeDiff > 0) { SignBit = DAG.getNode(ISD::SRL, dl, RVT, SignBit, - DAG.getConstant(SizeDiff, TLI.getShiftAmountTy())); + DAG.getConstant(SizeDiff, + TLI.getShiftAmountTy(SignBit.getValueType()))); SignBit = DAG.getNode(ISD::TRUNCATE, dl, LVT, SignBit); } else if (SizeDiff < 0) { SignBit = DAG.getNode(ISD::ANY_EXTEND, dl, LVT, SignBit); SignBit = DAG.getNode(ISD::SHL, dl, LVT, SignBit, - DAG.getConstant(-SizeDiff, TLI.getShiftAmountTy())); + DAG.getConstant(-SizeDiff, + TLI.getShiftAmountTy(SignBit.getValueType()))); } // Clear the sign bit of the first operand. SDValue Mask = DAG.getNode(ISD::SHL, dl, LVT, DAG.getConstant(1, LVT), DAG.getConstant(LSize - 1, - TLI.getShiftAmountTy())); + TLI.getShiftAmountTy(LVT))); Mask = DAG.getNode(ISD::SUB, dl, LVT, Mask, DAG.getConstant(1, LVT)); LHS = DAG.getNode(ISD::AND, dl, LVT, LHS, Mask); diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index 2fb2f2d8aa1e..9120288921e2 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -1420,7 +1420,7 @@ SDValue SelectionDAG::getMDNode(const MDNode *MD) { /// the target's desired shift amount type. SDValue SelectionDAG::getShiftAmountOperand(SDValue Op) { EVT OpTy = Op.getValueType(); - MVT ShTy = TLI.getShiftAmountTy(); + MVT ShTy = TLI.getShiftAmountTy(OpTy); if (OpTy == ShTy || OpTy.isVector()) return Op; ISD::NodeType Opcode = OpTy.bitsGT(ShTy) ? ISD::TRUNCATE : ISD::ZERO_EXTEND; @@ -2048,7 +2048,7 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask, return; } break; - + default: // Allow the target to implement this method for its nodes. if (Op.getOpcode() >= ISD::BUILTIN_OP_END) { @@ -2088,12 +2088,7 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, unsigned Depth) const{ case ISD::Constant: { const APInt &Val = cast(Op)->getAPIntValue(); - // If negative, return # leading ones. - if (Val.isNegative()) - return Val.countLeadingOnes(); - - // Return # leading zeros. - return Val.countLeadingZeros(); + return Val.getNumSignBits(); } case ISD::SIGN_EXTEND: @@ -2297,12 +2292,12 @@ bool SelectionDAG::isBaseWithConstantOffset(SDValue Op) const { if ((Op.getOpcode() != ISD::ADD && Op.getOpcode() != ISD::OR) || !isa(Op.getOperand(1))) return false; - - if (Op.getOpcode() == ISD::OR && + + if (Op.getOpcode() == ISD::OR && !MaskedValueIsZero(Op.getOperand(0), cast(Op.getOperand(1))->getAPIntValue())) return false; - + return true; } @@ -2753,7 +2748,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT, // i8, which is easy to fall into in generic code that uses // TLI.getShiftAmount(). assert(N2.getValueType().getSizeInBits() >= - Log2_32_Ceil(N1.getValueType().getSizeInBits()) && + Log2_32_Ceil(N1.getValueType().getSizeInBits()) && "Invalid use of small shift amount with oversized value!"); // Always fold shifts of i1 values so the code generator doesn't need to diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index 452f5614b7bf..48d9bbb5132e 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -641,16 +641,17 @@ SDValue RegsForValue::getCopyFromRegs(SelectionDAG &DAG, // If the source register was virtual and if we know something about it, // add an assert node. if (!TargetRegisterInfo::isVirtualRegister(Regs[Part+i]) || - !RegisterVT.isInteger() || RegisterVT.isVector() || - !FuncInfo.LiveOutRegInfo.inBounds(Regs[Part+i])) + !RegisterVT.isInteger() || RegisterVT.isVector()) + continue; + + const FunctionLoweringInfo::LiveOutInfo *LOI = + FuncInfo.GetLiveOutRegInfo(Regs[Part+i]); + if (!LOI) continue; - - const FunctionLoweringInfo::LiveOutInfo &LOI = - FuncInfo.LiveOutRegInfo[Regs[Part+i]]; unsigned RegSize = RegisterVT.getSizeInBits(); - unsigned NumSignBits = LOI.NumSignBits; - unsigned NumZeroBits = LOI.KnownZero.countLeadingOnes(); + unsigned NumSignBits = LOI->NumSignBits; + unsigned NumZeroBits = LOI->KnownZero.countLeadingOnes(); // FIXME: We capture more information than the dag can represent. For // now, just use the tightest assertzext/assertsext possible. @@ -908,7 +909,7 @@ void SelectionDAGBuilder::resolveDanglingDebugInfo(const Value *V, Val.getResNo(), Offset, dl, DbgSDNodeOrder); DAG.AddDbgValue(SDV, Val.getNode(), false); } - } else + } else DEBUG(dbgs() << "Dropping debug info for " << DI); DanglingDebugInfoMap[V] = DanglingDebugInfo(); } @@ -1417,7 +1418,7 @@ void SelectionDAGBuilder::visitBr(const BranchInst &I) { // jle foo // if (const BinaryOperator *BOp = dyn_cast(CondVal)) { - if (!TLI.isJumpExpensive() && + if (!TLI.isJumpExpensive() && BOp->hasOneUse() && (BOp->getOpcode() == Instruction::And || BOp->getOpcode() == Instruction::Or)) { @@ -1915,7 +1916,7 @@ bool SelectionDAGBuilder::handleJTSwitchCase(CaseRec& CR, DEBUG(dbgs() << "Lowering jump table\n" << "First entry: " << First << ". Last entry: " << Last << '\n' << "Range: " << Range - << "Size: " << TSize << ". Density: " << Density << "\n\n"); + << ". Size: " << TSize << ". Density: " << Density << "\n\n"); // Get the MachineFunction which holds the current MBB. This is used when // inserting any additional MBBs necessary to represent the switch. @@ -2408,19 +2409,19 @@ void SelectionDAGBuilder::visitBinary(const User &I, unsigned OpCode) { void SelectionDAGBuilder::visitShift(const User &I, unsigned Opcode) { SDValue Op1 = getValue(I.getOperand(0)); SDValue Op2 = getValue(I.getOperand(1)); - - MVT ShiftTy = TLI.getShiftAmountTy(); - + + MVT ShiftTy = TLI.getShiftAmountTy(Op2.getValueType()); + // Coerce the shift amount to the right type if we can. if (!I.getType()->isVectorTy() && Op2.getValueType() != ShiftTy) { unsigned ShiftSize = ShiftTy.getSizeInBits(); unsigned Op2Size = Op2.getValueType().getSizeInBits(); DebugLoc DL = getCurDebugLoc(); - + // If the operand is smaller than the shift count type, promote it. if (ShiftSize > Op2Size) Op2 = DAG.getNode(ISD::ZERO_EXTEND, DL, ShiftTy, Op2); - + // If the operand is larger than the shift count type but the shift // count type has enough bits to represent any shift value, truncate // it now. This is a common case and it exposes the truncate to diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h index a1a70c394a51..8f466d913bbb 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h +++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h @@ -348,7 +348,7 @@ public: SDValue getControlRoot(); DebugLoc getCurDebugLoc() const { return CurDebugLoc; } - void setCurDebugLoc(DebugLoc dl){ CurDebugLoc = dl; } + unsigned getSDNodeOrder() const { return SDNodeOrder; } void CopyValueToVirtualRegister(const Value *V, unsigned Reg); diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp index 62ebc81ef86e..68ba966d268a 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp @@ -49,6 +49,7 @@ #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/Timer.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/ADT/PostOrderIterator.h" #include "llvm/ADT/Statistic.h" #include using namespace llvm; @@ -479,16 +480,7 @@ void SelectionDAGISel::ComputeLiveOutVRegInfo() { unsigned NumSignBits = CurDAG->ComputeNumSignBits(Src); Mask = APInt::getAllOnesValue(SrcVT.getSizeInBits()); CurDAG->ComputeMaskedBits(Src, Mask, KnownZero, KnownOne); - - // Only install this information if it tells us something. - if (NumSignBits != 1 || KnownZero != 0 || KnownOne != 0) { - FuncInfo->LiveOutRegInfo.grow(DestReg); - FunctionLoweringInfo::LiveOutInfo &LOI = - FuncInfo->LiveOutRegInfo[DestReg]; - LOI.NumSignBits = NumSignBits; - LOI.KnownOne = KnownOne; - LOI.KnownZero = KnownZero; - } + FuncInfo->AddLiveOutRegInfo(DestReg, NumSignBits, KnownZero, KnownOne); } while (!Worklist.empty()); } @@ -832,11 +824,39 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) { FastIS = TLI.createFastISel(*FuncInfo); // Iterate over all basic blocks in the function. - for (Function::const_iterator I = Fn.begin(), E = Fn.end(); I != E; ++I) { - const BasicBlock *LLVMBB = &*I; + ReversePostOrderTraversal RPOT(&Fn); + for (ReversePostOrderTraversal::rpo_iterator + I = RPOT.begin(), E = RPOT.end(); I != E; ++I) { + const BasicBlock *LLVMBB = *I; #ifndef NDEBUG CheckLineNumbers(LLVMBB); #endif + + if (OptLevel != CodeGenOpt::None) { + bool AllPredsVisited = true; + for (const_pred_iterator PI = pred_begin(LLVMBB), PE = pred_end(LLVMBB); + PI != PE; ++PI) { + if (!FuncInfo->VisitedBBs.count(*PI)) { + AllPredsVisited = false; + break; + } + } + + if (AllPredsVisited) { + for (BasicBlock::const_iterator I = LLVMBB->begin(), E = LLVMBB->end(); + I != E && isa(I); ++I) { + FuncInfo->ComputePHILiveOutRegInfo(cast(I)); + } + } else { + for (BasicBlock::const_iterator I = LLVMBB->begin(), E = LLVMBB->end(); + I != E && isa(I); ++I) { + FuncInfo->InvalidatePHILiveOutRegInfo(cast(I)); + } + } + + FuncInfo->VisitedBBs.insert(LLVMBB); + } + FuncInfo->MBB = FuncInfo->MBBMap[LLVMBB]; FuncInfo->InsertPt = FuncInfo->MBB->getFirstNonPHI(); @@ -851,17 +871,8 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) { PrepareEHLandingPad(); // Lower any arguments needed in this block if this is the entry block. - if (LLVMBB == &Fn.getEntryBlock()) { - for (BasicBlock::const_iterator DBI = LLVMBB->begin(), DBE = LLVMBB->end(); - DBI != DBE; ++DBI) { - if (const DbgInfoIntrinsic *DI = dyn_cast(DBI)) { - const DebugLoc DL = DI->getDebugLoc(); - SDB->setCurDebugLoc(DL); - break; - } - } + if (LLVMBB == &Fn.getEntryBlock()) LowerArguments(LLVMBB); - } // Before doing SelectionDAG ISel, see if FastISel has been requested. if (FastIS) { diff --git a/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/lib/CodeGen/SelectionDAG/TargetLowering.cpp index 691390e2a0e4..35b847ccabfb 100644 --- a/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -563,7 +563,7 @@ TargetLowering::TargetLowering(const TargetMachine &tm, setOperationAction(ISD::TRAP, MVT::Other, Expand); IsLittleEndian = TD->isLittleEndian(); - ShiftAmountTy = PointerTy = MVT::getIntegerVT(8*TD->getPointerSize()); + PointerTy = MVT::getIntegerVT(8*TD->getPointerSize()); memset(RegClassForVT, 0,MVT::LAST_VALUETYPE*sizeof(TargetRegisterClass*)); memset(TargetDAGCombineArray, 0, array_lengthof(TargetDAGCombineArray)); maxStoresPerMemset = maxStoresPerMemcpy = maxStoresPerMemmove = 8; @@ -596,6 +596,10 @@ TargetLowering::~TargetLowering() { delete &TLOF; } +MVT TargetLowering::getShiftAmountTy(EVT LHSTy) const { + return MVT::getIntegerVT(8*TD->getPointerSize()); +} + /// canOpTrap - Returns true if the operation can trap for the value type. /// VT must be a legal type. bool TargetLowering::canOpTrap(unsigned Op, EVT VT) const { @@ -1401,7 +1405,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, BitWidth - InnerVT.getSizeInBits()) & DemandedMask) == 0 && isTypeDesirableForOp(ISD::SHL, InnerVT)) { - EVT ShTy = getShiftAmountTy(); + EVT ShTy = getShiftAmountTy(InnerVT); if (!APInt(BitWidth, ShAmt).isIntN(ShTy.getSizeInBits())) ShTy = InnerVT; SDValue NarrowShl = @@ -2188,7 +2192,7 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, if (ConstantSDNode *AndRHS = dyn_cast(N0.getOperand(1))) { EVT ShiftTy = DCI.isBeforeLegalize() ? - getPointerTy() : getShiftAmountTy(); + getPointerTy() : getShiftAmountTy(N0.getValueType()); if (Cond == ISD::SETNE && C1 == 0) {// (X & 8) != 0 --> (X & 8) >> 3 // Perform the xform if the AND RHS is a single bit. if (AndRHS->getAPIntValue().isPowerOf2()) { @@ -2359,7 +2363,7 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, // (Z-X) == X --> Z == X<<1 SDValue SH = DAG.getNode(ISD::SHL, dl, N1.getValueType(), N1, - DAG.getConstant(1, getShiftAmountTy())); + DAG.getConstant(1, getShiftAmountTy(N1.getValueType()))); if (!DCI.isCalledByLegalizer()) DCI.AddToWorklist(SH.getNode()); return DAG.getSetCC(dl, VT, N0.getOperand(0), SH, Cond); @@ -2381,7 +2385,7 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, assert(N1.getOpcode() == ISD::SUB && "Unexpected operation!"); // X == (Z-X) --> X<<1 == Z SDValue SH = DAG.getNode(ISD::SHL, dl, N1.getValueType(), N0, - DAG.getConstant(1, getShiftAmountTy())); + DAG.getConstant(1, getShiftAmountTy(N0.getValueType()))); if (!DCI.isCalledByLegalizer()) DCI.AddToWorklist(SH.getNode()); return DAG.getSetCC(dl, VT, SH, N1.getOperand(0), Cond); @@ -2493,7 +2497,7 @@ bool TargetLowering::isGAPlusOffset(SDNode *N, const GlobalValue *&GA, } } } - + return false; } @@ -3141,14 +3145,14 @@ SDValue TargetLowering::BuildSDIV(SDNode *N, SelectionDAG &DAG, // Shift right algebraic if shift value is nonzero if (magics.s > 0) { Q = DAG.getNode(ISD::SRA, dl, VT, Q, - DAG.getConstant(magics.s, getShiftAmountTy())); + DAG.getConstant(magics.s, getShiftAmountTy(Q.getValueType()))); if (Created) Created->push_back(Q.getNode()); } // Extract the sign bit and add it to the quotient SDValue T = DAG.getNode(ISD::SRL, dl, VT, Q, DAG.getConstant(VT.getSizeInBits()-1, - getShiftAmountTy())); + getShiftAmountTy(Q.getValueType()))); if (Created) Created->push_back(T.getNode()); return DAG.getNode(ISD::ADD, dl, VT, Q, T); @@ -3192,19 +3196,19 @@ SDValue TargetLowering::BuildUDIV(SDNode *N, SelectionDAG &DAG, assert(magics.s < N1C->getAPIntValue().getBitWidth() && "We shouldn't generate an undefined shift!"); return DAG.getNode(ISD::SRL, dl, VT, Q, - DAG.getConstant(magics.s, getShiftAmountTy())); + DAG.getConstant(magics.s, getShiftAmountTy(Q.getValueType()))); } else { SDValue NPQ = DAG.getNode(ISD::SUB, dl, VT, N->getOperand(0), Q); if (Created) Created->push_back(NPQ.getNode()); NPQ = DAG.getNode(ISD::SRL, dl, VT, NPQ, - DAG.getConstant(1, getShiftAmountTy())); + DAG.getConstant(1, getShiftAmountTy(NPQ.getValueType()))); if (Created) Created->push_back(NPQ.getNode()); NPQ = DAG.getNode(ISD::ADD, dl, VT, NPQ, Q); if (Created) Created->push_back(NPQ.getNode()); return DAG.getNode(ISD::SRL, dl, VT, NPQ, - DAG.getConstant(magics.s-1, getShiftAmountTy())); + DAG.getConstant(magics.s-1, getShiftAmountTy(NPQ.getValueType()))); } } diff --git a/lib/CodeGen/SplitKit.cpp b/lib/CodeGen/SplitKit.cpp index 5663936bf3aa..fd5d50b7ecb8 100644 --- a/lib/CodeGen/SplitKit.cpp +++ b/lib/CodeGen/SplitKit.cpp @@ -167,6 +167,20 @@ void SplitAnalysis::calcLiveBlockInfo() { } } +bool SplitAnalysis::isOriginalEndpoint(SlotIndex Idx) const { + unsigned OrigReg = VRM.getOriginal(CurLI->reg); + const LiveInterval &Orig = LIS.getInterval(OrigReg); + assert(!Orig.empty() && "Splitting empty interval?"); + LiveInterval::const_iterator I = Orig.find(Idx); + + // Range containing Idx should begin at Idx. + if (I != Orig.end() && I->start <= Idx) + return I->start == Idx; + + // Range does not contain Idx, previous must end at Idx. + return I != Orig.begin() && (--I)->end == Idx; +} + void SplitAnalysis::print(const BlockPtrSet &B, raw_ostream &OS) const { for (BlockPtrSet::const_iterator I = B.begin(), E = B.end(); I != E; ++I) { unsigned count = UsingBlocks.lookup(*I); @@ -947,10 +961,10 @@ void SplitEditor::splitSingleBlocks(const SplitAnalysis::BlockPtrSet &Blocks) { openIntv(); SlotIndex SegStart = enterIntvBefore(BI.FirstUse); - if (BI.LastUse < BI.LastSplitPoint) { + if (!BI.LiveOut || BI.LastUse < BI.LastSplitPoint) { useIntv(SegStart, leaveIntvAfter(BI.LastUse)); } else { - // THe last use os after tha last valid split point. + // The last use is after the last valid split point. SlotIndex SegStop = leaveIntvBefore(BI.LastSplitPoint); useIntv(SegStart, SegStop); overlapIntv(SegStop, BI.LastUse); diff --git a/lib/CodeGen/SplitKit.h b/lib/CodeGen/SplitKit.h index 5c34afd1c819..e02e6297035d 100644 --- a/lib/CodeGen/SplitKit.h +++ b/lib/CodeGen/SplitKit.h @@ -125,6 +125,13 @@ public: return UsingBlocks.lookup(MBB); } + /// isOriginalEndpoint - Return true if the original live range was killed or + /// (re-)defined at Idx. Idx should be the 'def' slot for a normal kill/def, + /// and 'use' for an early-clobber def. + /// This can be used to recognize code inserted by earlier live range + /// splitting. + bool isOriginalEndpoint(SlotIndex Idx) const; + typedef SmallPtrSet BlockPtrSet; // Print a set of blocks with use counts. diff --git a/lib/CodeGen/TargetLoweringObjectFileImpl.cpp b/lib/CodeGen/TargetLoweringObjectFileImpl.cpp index 0b7bd98cc692..fa311dc5d66c 100644 --- a/lib/CodeGen/TargetLoweringObjectFileImpl.cpp +++ b/lib/CodeGen/TargetLoweringObjectFileImpl.cpp @@ -178,6 +178,10 @@ const MCSection *TargetLoweringObjectFileELF::getEHFrameSection() const { static SectionKind getELFKindForNamedSection(StringRef Name, SectionKind K) { + // FIXME: Why is this here? Codegen is should not be in the business + // of figuring section flags. If the user wrote section(".eh_frame"), + // we should just pass that to MC which will defer to the assembly + // or use its default if producing an object file. if (Name.empty() || Name[0] != '.') return K; // Some lame default implementation based on some magic section names. @@ -203,6 +207,9 @@ getELFKindForNamedSection(StringRef Name, SectionKind K) { Name.startswith(".llvm.linkonce.tb.")) return SectionKind::getThreadBSS(); + if (Name == ".eh_frame") + return SectionKind::getDataRel(); + return K; } @@ -441,11 +448,15 @@ void TargetLoweringObjectFileMachO::Initialize(MCContext &Ctx, Triple T(((LLVMTargetMachine&)TM).getTargetTriple()); if (T.getOS() == Triple::Darwin) { - unsigned MajNum = T.getDarwinMajorNumber(); - if (MajNum == 7 || MajNum == 8) // 10.3 Panther, 10.4 Tiger + switch (T.getDarwinMajorNumber()) { + case 7: // 10.3 Panther. + case 8: // 10.4 Tiger. CommDirectiveSupportsAlignment = false; - if (MajNum > 9) // 10.6 SnowLeopard - IsFunctionEHSymbolGlobal = false; + break; + case 9: // 10.5 Leopard. + case 10: // 10.6 SnowLeopard. + break; + } } TargetLoweringObjectFile::Initialize(Ctx, TM); @@ -630,7 +641,7 @@ getExplicitSectionGlobal(const GlobalValue *GV, SectionKind Kind, Mangler *Mang, const TargetMachine &TM) const { // Parse the section specifier and create it if valid. StringRef Segment, Section; - unsigned TAA, StubSize; + unsigned TAA = (unsigned)MCSectionMachO::SECTION_ATTRIBUTES, StubSize = 0; std::string ErrorCode = MCSectionMachO::ParseSectionSpecifier(GV->getSection(), Segment, Section, TAA, StubSize); @@ -643,10 +654,19 @@ getExplicitSectionGlobal(const GlobalValue *GV, SectionKind Kind, return DataSection; } + bool TAAWasSet = (TAA != MCSectionMachO::SECTION_ATTRIBUTES); + if (!TAAWasSet) + TAA = 0; // Sensible default if this is a new section. + // Get the section. const MCSectionMachO *S = getContext().getMachOSection(Segment, Section, TAA, StubSize, Kind); + // If TAA wasn't set by ParseSectionSpecifier() above, + // use the value returned by getMachOSection() as a default. + if (!TAAWasSet) + TAA = S->getTypeAndAttributes(); + // Okay, now that we got the section, verify that the TAA & StubSize agree. // If the user declared multiple globals with different section flags, we need // to reject it here. diff --git a/lib/CodeGen/VirtRegRewriter.cpp b/lib/CodeGen/VirtRegRewriter.cpp index 458a2134bf4a..ec149dddc1d9 100644 --- a/lib/CodeGen/VirtRegRewriter.cpp +++ b/lib/CodeGen/VirtRegRewriter.cpp @@ -478,7 +478,8 @@ static void ResurrectConfirmedKill(unsigned Reg, const TargetRegisterInfo* TRI, if (!RegKills[KReg]) return; - assert(KillOps[KReg] == KillOp && "invalid superreg kill flags"); + assert(KillOps[KReg]->getParent() == KillOp->getParent() && + "invalid superreg kill flags"); KillOps[KReg] = NULL; RegKills.reset(KReg); @@ -487,7 +488,8 @@ static void ResurrectConfirmedKill(unsigned Reg, const TargetRegisterInfo* TRI, for (const unsigned *SR = TRI->getSubRegisters(KReg); *SR; ++SR) { DEBUG(dbgs() << " Resurrect subreg " << TRI->getName(*SR) << "\n"); - assert(KillOps[*SR] == KillOp && "invalid subreg kill flags"); + assert(KillOps[*SR]->getParent() == KillOp->getParent() && + "invalid subreg kill flags"); KillOps[*SR] = NULL; RegKills.reset(*SR); } diff --git a/lib/MC/ELFObjectWriter.cpp b/lib/MC/ELFObjectWriter.cpp index 8a00a16cfb4a..ea1629d30565 100644 --- a/lib/MC/ELFObjectWriter.cpp +++ b/lib/MC/ELFObjectWriter.cpp @@ -833,7 +833,11 @@ static bool isInSymtab(const MCAssembler &Asm, const MCSymbolData &Data, return true; const MCSymbol &A = Symbol.AliasedSymbol(); - if (!A.isVariable() && A.isUndefined() && !Data.isCommon()) + if (Symbol.isVariable() && !A.isVariable() && A.isUndefined()) + return false; + + bool IsGlobal = GetBinding(Data) == ELF::STB_GLOBAL; + if (!Symbol.isVariable() && Symbol.isUndefined() && !IsGlobal) return false; if (!Asm.isSymbolLinkerVisible(Symbol) && !Symbol.isUndefined()) @@ -1732,6 +1736,10 @@ unsigned X86ELFObjectWriter::GetRelocType(const MCValue &Target, assert(Modifier == MCSymbolRefExpr::VK_None); Type = ELF::R_X86_64_PC16; break; + case FK_PCRel_1: + assert(Modifier == MCSymbolRefExpr::VK_None); + Type = ELF::R_X86_64_PC8; + break; } } else { switch ((unsigned)Fixup.getKind()) { diff --git a/lib/MC/MCAsmInfo.cpp b/lib/MC/MCAsmInfo.cpp index cc1afbd08926..8199fb2e158a 100644 --- a/lib/MC/MCAsmInfo.cpp +++ b/lib/MC/MCAsmInfo.cpp @@ -65,6 +65,7 @@ MCAsmInfo::MCAsmInfo() { WeakDefDirective = 0; LinkOnceDirective = 0; HiddenVisibilityAttr = MCSA_Hidden; + HiddenDeclarationVisibilityAttr = MCSA_Hidden; ProtectedVisibilityAttr = MCSA_Protected; HasLEB128 = false; SupportsDebugInformation = false; diff --git a/lib/MC/MCAsmInfoDarwin.cpp b/lib/MC/MCAsmInfoDarwin.cpp index 13776f04437d..526ad0da42aa 100644 --- a/lib/MC/MCAsmInfoDarwin.cpp +++ b/lib/MC/MCAsmInfoDarwin.cpp @@ -45,6 +45,7 @@ MCAsmInfoDarwin::MCAsmInfoDarwin() { HasAggressiveSymbolFolding = false; HiddenVisibilityAttr = MCSA_PrivateExtern; + HiddenDeclarationVisibilityAttr = MCSA_Invalid; // Doesn't support protected visibility. ProtectedVisibilityAttr = MCSA_Global; diff --git a/lib/MC/MCDisassembler/EDOperand.cpp b/lib/MC/MCDisassembler/EDOperand.cpp index cfeb56fa3dfd..2b0c73e80593 100644 --- a/lib/MC/MCDisassembler/EDOperand.cpp +++ b/lib/MC/MCDisassembler/EDOperand.cpp @@ -152,10 +152,23 @@ int EDOperand::evaluate(uint64_t &result, uint64_t scaleAmount = Inst.Inst->getOperand(MCOpIndex+1).getImm(); unsigned indexReg = Inst.Inst->getOperand(MCOpIndex+2).getReg(); int64_t displacement = Inst.Inst->getOperand(MCOpIndex+3).getImm(); - //unsigned segmentReg = Inst.Inst->getOperand(MCOpIndex+4).getReg(); - + uint64_t addr = 0; + unsigned segmentReg = Inst.Inst->getOperand(MCOpIndex+4).getReg(); + + if (segmentReg != 0 && Disassembler.Key.Arch == Triple::x86_64) { + unsigned fsID = Disassembler.registerIDWithName("FS"); + unsigned gsID = Disassembler.registerIDWithName("GS"); + + if (segmentReg == fsID || + segmentReg == gsID) { + uint64_t segmentBase; + if (!callback(&segmentBase, segmentReg, arg)) + addr += segmentBase; + } + } + if (baseReg) { uint64_t baseVal; if (callback(&baseVal, baseReg, arg)) @@ -175,7 +188,7 @@ int EDOperand::evaluate(uint64_t &result, result = addr; return 0; } - } + } // switch (operandType) break; case Triple::arm: case Triple::thumb: @@ -203,6 +216,7 @@ int EDOperand::evaluate(uint64_t &result, return 0; } } + break; } return -1; diff --git a/lib/MC/MCDisassembler/EDToken.cpp b/lib/MC/MCDisassembler/EDToken.cpp index 400e1649e970..de770b41ef35 100644 --- a/lib/MC/MCDisassembler/EDToken.cpp +++ b/lib/MC/MCDisassembler/EDToken.cpp @@ -194,6 +194,10 @@ int EDToken::tokenize(std::vector &tokens, tokens.push_back(token); } + // Free any parsed operands. + for (unsigned i = 0, e = parsedOperands.size(); i != e; ++i) + delete parsedOperands[i]; + return 0; } diff --git a/lib/MC/MCObjectStreamer.cpp b/lib/MC/MCObjectStreamer.cpp index 035826690cdf..e67d9b03a95a 100644 --- a/lib/MC/MCObjectStreamer.cpp +++ b/lib/MC/MCObjectStreamer.cpp @@ -242,7 +242,23 @@ void MCObjectStreamer::EmitDwarfAdvanceFrameAddr(const MCSymbol *LastLabel, void MCObjectStreamer::EmitValueToOffset(const MCExpr *Offset, unsigned char Value) { - new MCOrgFragment(*Offset, Value, getCurrentSectionData()); + int64_t Res; + if (Offset->EvaluateAsAbsolute(Res, getAssembler())) { + new MCOrgFragment(*Offset, Value, getCurrentSectionData()); + return; + } + + MCSymbol *CurrentPos = getContext().CreateTempSymbol(); + EmitLabel(CurrentPos); + MCSymbolRefExpr::VariantKind Variant = MCSymbolRefExpr::VK_None; + const MCExpr *Ref = + MCSymbolRefExpr::Create(CurrentPos, Variant, getContext()); + const MCExpr *Delta = + MCBinaryExpr::Create(MCBinaryExpr::Sub, Offset, Ref, getContext()); + + if (!Delta->EvaluateAsAbsolute(Res, getAssembler())) + report_fatal_error("expected assembly-time absolute expression"); + EmitFill(Res, Value, 0); } void MCObjectStreamer::Finish() { diff --git a/lib/MC/MCParser/AsmParser.cpp b/lib/MC/MCParser/AsmParser.cpp index c6d0da609b3b..a84917ffb86a 100644 --- a/lib/MC/MCParser/AsmParser.cpp +++ b/lib/MC/MCParser/AsmParser.cpp @@ -603,6 +603,8 @@ bool AsmParser::ParsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) { Lex(); // Eat the '('. return ParseParenExpr(Res, EndLoc); case AsmToken::LBrac: + if (!PlatformParser->HasBracketExpressions()) + return TokError("brackets expression not supported on this target"); Lex(); // Eat the '['. return ParseBracketExpr(Res, EndLoc); case AsmToken::Minus: diff --git a/lib/MC/MCParser/ELFAsmParser.cpp b/lib/MC/MCParser/ELFAsmParser.cpp index bfaf36a451b3..dcf689a6f0e7 100644 --- a/lib/MC/MCParser/ELFAsmParser.cpp +++ b/lib/MC/MCParser/ELFAsmParser.cpp @@ -30,9 +30,12 @@ class ELFAsmParser : public MCAsmParserExtension { bool ParseSectionSwitch(StringRef Section, unsigned Type, unsigned Flags, SectionKind Kind); + bool SeenIdent; public: - ELFAsmParser() {} + ELFAsmParser() : SeenIdent(false) { + BracketExpressionsSupported = true; + } virtual void Initialize(MCAsmParser &Parser) { // Call the base implementation. @@ -456,13 +459,12 @@ bool ELFAsmParser::ParseDirectiveIdent(StringRef, SMLoc) { SectionKind::getReadOnly(), 1, ""); - static bool First = true; - getStreamer().PushSection(); getStreamer().SwitchSection(Comment); - if (First) + if (!SeenIdent) { getStreamer().EmitIntValue(0, 1); - First = false; + SeenIdent = true; + } getStreamer().EmitBytes(Data, 0); getStreamer().EmitIntValue(0, 1); getStreamer().PopSection(); diff --git a/lib/MC/MCParser/MCAsmParserExtension.cpp b/lib/MC/MCParser/MCAsmParserExtension.cpp index c30d3067da59..3f25a14926b6 100644 --- a/lib/MC/MCParser/MCAsmParserExtension.cpp +++ b/lib/MC/MCParser/MCAsmParserExtension.cpp @@ -10,7 +10,8 @@ #include "llvm/MC/MCParser/MCAsmParserExtension.h" using namespace llvm; -MCAsmParserExtension::MCAsmParserExtension() { +MCAsmParserExtension::MCAsmParserExtension() : + BracketExpressionsSupported(false) { } MCAsmParserExtension::~MCAsmParserExtension() { diff --git a/lib/MC/MCSectionMachO.cpp b/lib/MC/MCSectionMachO.cpp index b897c0bd6855..577e93aed6bc 100644 --- a/lib/MC/MCSectionMachO.cpp +++ b/lib/MC/MCSectionMachO.cpp @@ -101,16 +101,18 @@ void MCSectionMachO::PrintSwitchToSection(const MCAsmInfo &MAI, return; } - OS << ','; - unsigned SectionType = TAA & MCSectionMachO::SECTION_TYPE; assert(SectionType <= MCSectionMachO::LAST_KNOWN_SECTION_TYPE && "Invalid SectionType specified!"); - if (SectionTypeDescriptors[SectionType].AssemblerName) + if (SectionTypeDescriptors[SectionType].AssemblerName) { + OS << ','; OS << SectionTypeDescriptors[SectionType].AssemblerName; - else - OS << "<<" << SectionTypeDescriptors[SectionType].EnumName << ">>"; + } else { + // If we have no name for the attribute, stop here. + OS << '\n'; + return; + } // If we don't have any attributes, we're done. unsigned SectionAttrs = TAA & MCSectionMachO::SECTION_ATTRIBUTES; @@ -125,7 +127,9 @@ void MCSectionMachO::PrintSwitchToSection(const MCAsmInfo &MAI, // Check each attribute to see if we have it. char Separator = ','; - for (unsigned i = 0; SectionAttrDescriptors[i].AttrFlag; ++i) { + for (unsigned i = 0; + SectionAttrs != 0 && SectionAttrDescriptors[i].AttrFlag; + ++i) { // Check to see if we have this attribute. if ((SectionAttrDescriptors[i].AttrFlag & SectionAttrs) == 0) continue; @@ -207,7 +211,6 @@ std::string MCSectionMachO::ParseSectionSpecifier(StringRef Spec, // In. "between 1 and 16 characters"; // If there is no comma after the section, we're done. - TAA = 0; StubSize = 0; if (Comma.second.empty()) return ""; diff --git a/lib/MC/MCStreamer.cpp b/lib/MC/MCStreamer.cpp index 3dcdba13135f..4b302c8602c9 100644 --- a/lib/MC/MCStreamer.cpp +++ b/lib/MC/MCStreamer.cpp @@ -20,8 +20,8 @@ using namespace llvm; MCStreamer::MCStreamer(MCContext &Ctx) : Context(Ctx) { - PrevSectionStack.push_back(NULL); - CurSectionStack.push_back(NULL); + const MCSection *section = NULL; + SectionStack.push_back(std::make_pair(section, section)); } MCStreamer::~MCStreamer() { diff --git a/lib/Support/APInt.cpp b/lib/Support/APInt.cpp index 77033428b577..08f36d2af3a1 100644 --- a/lib/Support/APInt.cpp +++ b/lib/Support/APInt.cpp @@ -1505,7 +1505,7 @@ APInt::ms APInt::magic() const { r2 = r2 - ad; } delta = ad - r2; - } while (q1.ule(delta) || (q1 == delta && r1 == 0)); + } while (q1.ult(delta) || (q1 == delta && r1 == 0)); mag.m = q2 + 1; if (d.isNegative()) mag.m = -mag.m; // resulting magic number diff --git a/lib/Target/ARM/ARMBaseInstrInfo.h b/lib/Target/ARM/ARMBaseInstrInfo.h index 1fb88726d0de..7e2183d7cd5e 100644 --- a/lib/Target/ARM/ARMBaseInstrInfo.h +++ b/lib/Target/ARM/ARMBaseInstrInfo.h @@ -155,10 +155,11 @@ namespace ARMII { //===------------------------------------------------------------------===// // Code domain. DomainShift = 18, - DomainMask = 3 << DomainShift, + DomainMask = 7 << DomainShift, DomainGeneral = 0 << DomainShift, DomainVFP = 1 << DomainShift, DomainNEON = 2 << DomainShift, + DomainNEONA8 = 4 << DomainShift, //===------------------------------------------------------------------===// // Field shifts - such shifts are used to set field while generating diff --git a/lib/Target/ARM/ARMFastISel.cpp b/lib/Target/ARM/ARMFastISel.cpp index 9f295302db0e..26f48b308316 100644 --- a/lib/Target/ARM/ARMFastISel.cpp +++ b/lib/Target/ARM/ARMFastISel.cpp @@ -172,6 +172,7 @@ class ARMFastISel : public FastISel { unsigned ARMMaterializeGV(const GlobalValue *GV, EVT VT); unsigned ARMMoveToFPReg(EVT VT, unsigned SrcReg); unsigned ARMMoveToIntReg(EVT VT, unsigned SrcReg); + unsigned ARMSelectCallOp(const GlobalValue *GV); // Call handling routines. private: @@ -1633,6 +1634,25 @@ bool ARMFastISel::SelectRet(const Instruction *I) { return true; } +unsigned ARMFastISel::ARMSelectCallOp(const GlobalValue *GV) { + + // Depend our opcode for thumb on whether or not we're targeting an + // externally callable function. For libcalls we'll just pass a NULL GV + // in here. + bool isExternal = false; + if (!GV || GV->hasExternalLinkage()) isExternal = true; + + // Darwin needs the r9 versions of the opcodes. + bool isDarwin = Subtarget->isTargetDarwin(); + if (isThumb && isExternal) { + return isDarwin ? ARM::tBLXi_r9 : ARM::tBLXi; + } else if (isThumb) { + return isDarwin ? ARM::tBLr9 : ARM::tBL; + } else { + return isDarwin ? ARM::BLr9 : ARM::BL; + } +} + // A quick function that will emit a call for a named libcall in F with the // vector of passed arguments for the Instruction in I. We can assume that we // can emit a call for any libcall we can produce. This is an abridged version @@ -1694,20 +1714,17 @@ bool ARMFastISel::ARMEmitLibcall(const Instruction *I, RTLIB::Libcall Call) { // Issue the call, BLXr9 for darwin, BLX otherwise. This uses V5 ops. // TODO: Turn this into the table of arm call ops. MachineInstrBuilder MIB; - unsigned CallOpc; - if(isThumb) { - CallOpc = Subtarget->isTargetDarwin() ? ARM::tBLXi_r9 : ARM::tBLXi; + unsigned CallOpc = ARMSelectCallOp(NULL); + if(isThumb) // Explicitly adding the predicate here. MIB = AddDefaultPred(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(CallOpc))) .addExternalSymbol(TLI.getLibcallName(Call)); - } else { - CallOpc = Subtarget->isTargetDarwin() ? ARM::BLr9 : ARM::BL; + else // Explicitly adding the predicate here. MIB = AddDefaultPred(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(CallOpc)) .addExternalSymbol(TLI.getLibcallName(Call))); - } // Add implicit physical register uses to the call. for (unsigned i = 0, e = RegArgs.size(); i != e; ++i) @@ -1813,21 +1830,18 @@ bool ARMFastISel::SelectCall(const Instruction *I) { // Issue the call, BLXr9 for darwin, BLX otherwise. This uses V5 ops. // TODO: Turn this into the table of arm call ops. MachineInstrBuilder MIB; - unsigned CallOpc; + unsigned CallOpc = ARMSelectCallOp(GV); // Explicitly adding the predicate here. - if(isThumb) { - CallOpc = Subtarget->isTargetDarwin() ? ARM::tBLXi_r9 : ARM::tBLXi; + if(isThumb) // Explicitly adding the predicate here. MIB = AddDefaultPred(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(CallOpc))) .addGlobalAddress(GV, 0, 0); - } else { - CallOpc = Subtarget->isTargetDarwin() ? ARM::BLr9 : ARM::BL; + else // Explicitly adding the predicate here. MIB = AddDefaultPred(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(CallOpc)) .addGlobalAddress(GV, 0, 0)); - } // Add implicit physical register uses to the call. for (unsigned i = 0, e = RegArgs.size(); i != e; ++i) diff --git a/lib/Target/ARM/ARMFrameLowering.cpp b/lib/Target/ARM/ARMFrameLowering.cpp index f42c6db84fd3..68c33f098ec9 100644 --- a/lib/Target/ARM/ARMFrameLowering.cpp +++ b/lib/Target/ARM/ARMFrameLowering.cpp @@ -215,7 +215,13 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const { AFI->setDPRCalleeSavedAreaOffset(DPRCSOffset); // Move past area 3. - if (DPRCSSize > 0) MBBI++; + if (DPRCSSize > 0) { + MBBI++; + // Since vpush register list cannot have gaps, there may be multiple vpush + // instructions in the prologue. + while (MBBI->getOpcode() == ARM::VSTMDDB_UPD) + MBBI++; + } NumBytes = DPRCSOffset; if (NumBytes) { @@ -370,7 +376,13 @@ void ARMFrameLowering::emitEpilogue(MachineFunction &MF, emitSPUpdate(isARM, MBB, MBBI, dl, TII, NumBytes); // Increment past our save areas. - if (AFI->getDPRCalleeSavedAreaSize()) MBBI++; + if (AFI->getDPRCalleeSavedAreaSize()) { + MBBI++; + // Since vpop register list cannot have gaps, there may be multiple vpop + // instructions in the epilogue. + while (MBBI->getOpcode() == ARM::VLDMDIA_UPD) + MBBI++; + } if (AFI->getGPRCalleeSavedArea2Size()) MBBI++; if (AFI->getGPRCalleeSavedArea1Size()) MBBI++; } diff --git a/lib/Target/ARM/ARMHazardRecognizer.cpp b/lib/Target/ARM/ARMHazardRecognizer.cpp index 676b01e91c53..e97ce50bc429 100644 --- a/lib/Target/ARM/ARMHazardRecognizer.cpp +++ b/lib/Target/ARM/ARMHazardRecognizer.cpp @@ -21,17 +21,14 @@ static bool hasRAWHazard(MachineInstr *DefMI, MachineInstr *MI, // FIXME: Detect integer instructions properly. const TargetInstrDesc &TID = MI->getDesc(); unsigned Domain = TID.TSFlags & ARMII::DomainMask; - if (Domain == ARMII::DomainVFP) { - unsigned Opcode = MI->getOpcode(); - if (Opcode == ARM::VSTRS || Opcode == ARM::VSTRD || - Opcode == ARM::VMOVRS || Opcode == ARM::VMOVRRD) - return false; - } else if (Domain == ARMII::DomainNEON) { - if (MI->getDesc().mayStore() || MI->getDesc().mayLoad()) - return false; - } else + if (TID.mayStore()) return false; - return MI->readsRegister(DefMI->getOperand(0).getReg(), &TRI); + unsigned Opcode = TID.getOpcode(); + if (Opcode == ARM::VMOVRS || Opcode == ARM::VMOVRRD) + return false; + if ((Domain & ARMII::DomainVFP) || (Domain & ARMII::DomainNEON)) + return MI->readsRegister(DefMI->getOperand(0).getReg(), &TRI); + return false; } ScheduleHazardRecognizer::HazardType diff --git a/lib/Target/ARM/ARMISelDAGToDAG.cpp b/lib/Target/ARM/ARMISelDAGToDAG.cpp index a506cffdba34..f0d5a7d7c2e7 100644 --- a/lib/Target/ARM/ARMISelDAGToDAG.cpp +++ b/lib/Target/ARM/ARMISelDAGToDAG.cpp @@ -126,6 +126,7 @@ public: bool SelectAddrMode5(SDValue N, SDValue &Base, SDValue &Offset); bool SelectAddrMode6(SDNode *Parent, SDValue N, SDValue &Addr,SDValue &Align); + bool SelectAddrMode6Offset(SDNode *Op, SDValue N, SDValue &Offset); bool SelectAddrModePC(SDValue N, SDValue &Offset, SDValue &Label); @@ -886,6 +887,20 @@ bool ARMDAGToDAGISel::SelectAddrMode6(SDNode *Parent, SDValue N, SDValue &Addr, return true; } +bool ARMDAGToDAGISel::SelectAddrMode6Offset(SDNode *Op, SDValue N, + SDValue &Offset) { + LSBaseSDNode *LdSt = cast(Op); + ISD::MemIndexedMode AM = LdSt->getAddressingMode(); + if (AM != ISD::POST_INC) + return false; + Offset = N; + if (ConstantSDNode *NC = dyn_cast(N)) { + if (NC->getZExtValue() * 8 == LdSt->getMemoryVT().getSizeInBits()) + Offset = CurDAG->getRegister(0, MVT::i32); + } + return true; +} + bool ARMDAGToDAGISel::SelectAddrModePC(SDValue N, SDValue &Offset, SDValue &Label) { if (N.getOpcode() == ARMISD::PIC_ADD && N.hasOneUse()) { diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index 1835ec0f0054..ab9f9e1571e3 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -2236,7 +2236,7 @@ ARMTargetLowering::GetF64FormalArgument(CCValAssign &VA, CCValAssign &NextVA, RC = ARM::GPRRegisterClass; // Transform the arguments stored in physical registers into virtual ones. - unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC, dl); + unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC); SDValue ArgValue = DAG.getCopyFromReg(Root, dl, Reg, MVT::i32); SDValue ArgValue2; @@ -2250,7 +2250,7 @@ ARMTargetLowering::GetF64FormalArgument(CCValAssign &VA, CCValAssign &NextVA, MachinePointerInfo::getFixedStack(FI), false, false, 0); } else { - Reg = MF.addLiveIn(NextVA.getLocReg(), RC, dl); + Reg = MF.addLiveIn(NextVA.getLocReg(), RC); ArgValue2 = DAG.getCopyFromReg(Root, dl, Reg, MVT::i32); } @@ -2331,7 +2331,7 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain, llvm_unreachable("RegVT not supported by FORMAL_ARGUMENTS Lowering"); // Transform the arguments in physical registers into virtual ones. - unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC, dl); + unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC); ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, RegVT); } @@ -2408,7 +2408,7 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain, else RC = ARM::GPRRegisterClass; - unsigned VReg = MF.addLiveIn(GPRArgRegs[NumGPRs], RC, dl); + unsigned VReg = MF.addLiveIn(GPRArgRegs[NumGPRs], RC); SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i32); SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, @@ -2838,8 +2838,51 @@ SDValue ARMTargetLowering::LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const { DebugLoc dl = Op.getDebugLoc(); EVT VT = Op.getValueType(); EVT SrcVT = Tmp1.getValueType(); - bool F2IisFast = Subtarget->isCortexA9() || - Tmp0.getOpcode() == ISD::BITCAST || Tmp0.getOpcode() == ARMISD::VMOVDRR; + bool InGPR = Tmp0.getOpcode() == ISD::BITCAST || + Tmp0.getOpcode() == ARMISD::VMOVDRR; + bool UseNEON = !InGPR && Subtarget->hasNEON(); + + if (UseNEON) { + // Use VBSL to copy the sign bit. + unsigned EncodedVal = ARM_AM::createNEONModImm(0x6, 0x80); + SDValue Mask = DAG.getNode(ARMISD::VMOVIMM, dl, MVT::v2i32, + DAG.getTargetConstant(EncodedVal, MVT::i32)); + EVT OpVT = (VT == MVT::f32) ? MVT::v2i32 : MVT::v1i64; + if (VT == MVT::f64) + Mask = DAG.getNode(ARMISD::VSHL, dl, OpVT, + DAG.getNode(ISD::BITCAST, dl, OpVT, Mask), + DAG.getConstant(32, MVT::i32)); + else /*if (VT == MVT::f32)*/ + Tmp0 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2f32, Tmp0); + if (SrcVT == MVT::f32) { + Tmp1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2f32, Tmp1); + if (VT == MVT::f64) + Tmp1 = DAG.getNode(ARMISD::VSHL, dl, OpVT, + DAG.getNode(ISD::BITCAST, dl, OpVT, Tmp1), + DAG.getConstant(32, MVT::i32)); + } + Tmp0 = DAG.getNode(ISD::BITCAST, dl, OpVT, Tmp0); + Tmp1 = DAG.getNode(ISD::BITCAST, dl, OpVT, Tmp1); + + SDValue AllOnes = DAG.getTargetConstant(ARM_AM::createNEONModImm(0xe, 0xff), + MVT::i32); + AllOnes = DAG.getNode(ARMISD::VMOVIMM, dl, MVT::v8i8, AllOnes); + SDValue MaskNot = DAG.getNode(ISD::XOR, dl, OpVT, Mask, + DAG.getNode(ISD::BITCAST, dl, OpVT, AllOnes)); + + SDValue Res = DAG.getNode(ISD::OR, dl, OpVT, + DAG.getNode(ISD::AND, dl, OpVT, Tmp1, Mask), + DAG.getNode(ISD::AND, dl, OpVT, Tmp0, MaskNot)); + if (SrcVT == MVT::f32) { + Res = DAG.getNode(ISD::BITCAST, dl, MVT::v2f32, Res); + Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f32, Res, + DAG.getConstant(0, MVT::i32)); + } else { + Res = DAG.getNode(ISD::BITCAST, dl, MVT::f64, Res); + } + + return Res; + } // Bitcast operand 1 to i32. if (SrcVT == MVT::f64) @@ -2847,37 +2890,24 @@ SDValue ARMTargetLowering::LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const { &Tmp1, 1).getValue(1); Tmp1 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Tmp1); - // If float to int conversion isn't going to be super expensive, then simply - // or in the signbit. - if (F2IisFast) { - SDValue Mask1 = DAG.getConstant(0x80000000, MVT::i32); - SDValue Mask2 = DAG.getConstant(0x7fffffff, MVT::i32); - Tmp1 = DAG.getNode(ISD::AND, dl, MVT::i32, Tmp1, Mask1); - if (VT == MVT::f32) { - Tmp0 = DAG.getNode(ISD::AND, dl, MVT::i32, - DAG.getNode(ISD::BITCAST, dl, MVT::i32, Tmp0), Mask2); - return DAG.getNode(ISD::BITCAST, dl, MVT::f32, - DAG.getNode(ISD::OR, dl, MVT::i32, Tmp0, Tmp1)); - } - - // f64: Or the high part with signbit and then combine two parts. - Tmp0 = DAG.getNode(ARMISD::VMOVRRD, dl, DAG.getVTList(MVT::i32, MVT::i32), - &Tmp0, 1); - SDValue Lo = Tmp0.getValue(0); - SDValue Hi = DAG.getNode(ISD::AND, dl, MVT::i32, Tmp0.getValue(1), Mask2); - Hi = DAG.getNode(ISD::OR, dl, MVT::i32, Hi, Tmp1); - return DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi); + // Or in the signbit with integer operations. + SDValue Mask1 = DAG.getConstant(0x80000000, MVT::i32); + SDValue Mask2 = DAG.getConstant(0x7fffffff, MVT::i32); + Tmp1 = DAG.getNode(ISD::AND, dl, MVT::i32, Tmp1, Mask1); + if (VT == MVT::f32) { + Tmp0 = DAG.getNode(ISD::AND, dl, MVT::i32, + DAG.getNode(ISD::BITCAST, dl, MVT::i32, Tmp0), Mask2); + return DAG.getNode(ISD::BITCAST, dl, MVT::f32, + DAG.getNode(ISD::OR, dl, MVT::i32, Tmp0, Tmp1)); } - // Remove the signbit of operand 0. - Tmp0 = DAG.getNode(ISD::FABS, dl, VT, Tmp0); - - // If operand 1 signbit is one, then negate operand 0. - SDValue ARMcc; - SDValue Cmp = getARMCmp(Tmp1, DAG.getConstant(0, MVT::i32), - ISD::SETLT, ARMcc, DAG, dl); - SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); - return DAG.getNode(ARMISD::CNEG, dl, VT, Tmp0, Tmp0, ARMcc, CCR, Cmp); + // f64: Or the high part with signbit and then combine two parts. + Tmp0 = DAG.getNode(ARMISD::VMOVRRD, dl, DAG.getVTList(MVT::i32, MVT::i32), + &Tmp0, 1); + SDValue Lo = Tmp0.getValue(0); + SDValue Hi = DAG.getNode(ISD::AND, dl, MVT::i32, Tmp0.getValue(1), Mask2); + Hi = DAG.getNode(ISD::OR, dl, MVT::i32, Hi, Tmp1); + return DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi); } SDValue ARMTargetLowering::LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const{ @@ -2897,7 +2927,7 @@ SDValue ARMTargetLowering::LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const{ } // Return LR, which contains the return address. Mark it an implicit live-in. - unsigned Reg = MF.addLiveIn(ARM::LR, getRegClassFor(MVT::i32), dl); + unsigned Reg = MF.addLiveIn(ARM::LR, getRegClassFor(MVT::i32)); return DAG.getCopyFromReg(DAG.getEntryNode(), dl, Reg, VT); } diff --git a/lib/Target/ARM/ARMInstrFormats.td b/lib/Target/ARM/ARMInstrFormats.td index 765cba42d0bd..359ac45cee1d 100644 --- a/lib/Target/ARM/ARMInstrFormats.td +++ b/lib/Target/ARM/ARMInstrFormats.td @@ -127,13 +127,14 @@ def IndexModePost : IndexMode<2>; def IndexModeUpd : IndexMode<3>; // Instruction execution domain. -class Domain val> { - bits<2> Value = val; +class Domain val> { + bits<3> Value = val; } def GenericDomain : Domain<0>; def VFPDomain : Domain<1>; // Instructions in VFP domain only def NeonDomain : Domain<2>; // Instructions in Neon domain only def VFPNeonDomain : Domain<3>; // Instructions in both VFP & Neon domains +def VFPNeonA8Domain : Domain<5>; // Instructions in VFP & Neon under A8 //===----------------------------------------------------------------------===// // ARM special operands. @@ -249,7 +250,7 @@ class InstTemplate, let EncoderMethod = "getAddrMode6AddressOpValue"; } -def am6offset : Operand { +def am6offset : Operand, + ComplexPattern { let PrintMethod = "printAddrMode6OffsetOperand"; let MIOperandInfo = (ops GPR); let EncoderMethod = "getAddrMode6OffsetOpValue"; diff --git a/lib/Target/ARM/ARMInstrNEON.td b/lib/Target/ARM/ARMInstrNEON.td index 1e2e5504e662..dc3d63e26ef5 100644 --- a/lib/Target/ARM/ARMInstrNEON.td +++ b/lib/Target/ARM/ARMInstrNEON.td @@ -1402,31 +1402,42 @@ def : Pat<(store (extractelt (v2f32 DPR:$src), imm:$lane), addrmode6:$addr), def : Pat<(store (extractelt (v4f32 QPR:$src), imm:$lane), addrmode6:$addr), (VST1LNq32Pseudo addrmode6:$addr, QPR:$src, imm:$lane)>; -let mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1 in { - // ...with address register writeback: -class VST1LNWB op11_8, bits<4> op7_4, string Dt> +class VST1LNWB op11_8, bits<4> op7_4, string Dt, ValueType Ty, + PatFrag StoreOp, SDNode ExtractOp> : NLdStLn<1, 0b00, op11_8, op7_4, (outs GPR:$wb), (ins addrmode6:$Rn, am6offset:$Rm, DPR:$Vd, nohash_imm:$lane), IIC_VST1lnu, "vst1", Dt, "\\{$Vd[$lane]\\}, $Rn$Rm", - "$Rn.addr = $wb", []>; + "$Rn.addr = $wb", + [(set GPR:$wb, (StoreOp (ExtractOp (Ty DPR:$Vd), imm:$lane), + addrmode6:$Rn, am6offset:$Rm))]>; +class VST1QLNWBPseudo + : VSTQLNWBPseudo { + let Pattern = [(set GPR:$wb, (StoreOp (ExtractOp (Ty QPR:$src), imm:$lane), + addrmode6:$addr, am6offset:$offset))]; +} -def VST1LNd8_UPD : VST1LNWB<0b0000, {?,?,?,0}, "8"> { +def VST1LNd8_UPD : VST1LNWB<0b0000, {?,?,?,0}, "8", v8i8, post_truncsti8, + NEONvgetlaneu> { let Inst{7-5} = lane{2-0}; } -def VST1LNd16_UPD : VST1LNWB<0b0100, {?,?,0,?}, "16"> { +def VST1LNd16_UPD : VST1LNWB<0b0100, {?,?,0,?}, "16", v4i16, post_truncsti16, + NEONvgetlaneu> { let Inst{7-6} = lane{1-0}; let Inst{4} = Rn{5}; } -def VST1LNd32_UPD : VST1LNWB<0b1000, {?,0,?,?}, "32"> { +def VST1LNd32_UPD : VST1LNWB<0b1000, {?,0,?,?}, "32", v2i32, post_store, + extractelt> { let Inst{7} = lane{0}; let Inst{5-4} = Rn{5-4}; } -def VST1LNq8Pseudo_UPD : VSTQLNWBPseudo; -def VST1LNq16Pseudo_UPD : VSTQLNWBPseudo; -def VST1LNq32Pseudo_UPD : VSTQLNWBPseudo; +def VST1LNq8Pseudo_UPD : VST1QLNWBPseudo; +def VST1LNq16Pseudo_UPD : VST1QLNWBPseudo; +def VST1LNq32Pseudo_UPD : VST1QLNWBPseudo; + +let mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1 in { // VST2LN : Vector Store (single 2-element structure from one lane) class VST2LN op11_8, bits<4> op7_4, string Dt> diff --git a/lib/Target/ARM/ARMInstrVFP.td b/lib/Target/ARM/ARMInstrVFP.td index 920c5c98002a..29902833f2bb 100644 --- a/lib/Target/ARM/ARMInstrVFP.td +++ b/lib/Target/ARM/ARMInstrVFP.td @@ -197,9 +197,9 @@ def VADDS : ASbIn<0b11100, 0b11, 0, 0, (outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm), IIC_fpALU32, "vadd", ".f32\t$Sd, $Sn, $Sm", [(set SPR:$Sd, (fadd SPR:$Sn, SPR:$Sm))]> { - // Some single precision VFP instructions may be executed on both NEON and VFP - // pipelines. - let D = VFPNeonDomain; + // Some single precision VFP instructions may be executed on both NEON and + // VFP pipelines on A8. + let D = VFPNeonA8Domain; } def VSUBD : ADbI<0b11100, 0b11, 1, 0, @@ -211,9 +211,9 @@ def VSUBS : ASbIn<0b11100, 0b11, 1, 0, (outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm), IIC_fpALU32, "vsub", ".f32\t$Sd, $Sn, $Sm", [(set SPR:$Sd, (fsub SPR:$Sn, SPR:$Sm))]> { - // Some single precision VFP instructions may be executed on both NEON and VFP - // pipelines. - let D = VFPNeonDomain; + // Some single precision VFP instructions may be executed on both NEON and + // VFP pipelines on A8. + let D = VFPNeonA8Domain; } def VDIVD : ADbI<0b11101, 0b00, 0, 0, @@ -235,9 +235,9 @@ def VMULS : ASbIn<0b11100, 0b10, 0, 0, (outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm), IIC_fpMUL32, "vmul", ".f32\t$Sd, $Sn, $Sm", [(set SPR:$Sd, (fmul SPR:$Sn, SPR:$Sm))]> { - // Some single precision VFP instructions may be executed on both NEON and VFP - // pipelines. - let D = VFPNeonDomain; + // Some single precision VFP instructions may be executed on both NEON and + // VFP pipelines on A8. + let D = VFPNeonA8Domain; } def VNMULD : ADbI<0b11100, 0b10, 1, 0, @@ -249,9 +249,9 @@ def VNMULS : ASbI<0b11100, 0b10, 1, 0, (outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm), IIC_fpMUL32, "vnmul", ".f32\t$Sd, $Sn, $Sm", [(set SPR:$Sd, (fneg (fmul SPR:$Sn, SPR:$Sm)))]> { - // Some single precision VFP instructions may be executed on both NEON and VFP - // pipelines. - let D = VFPNeonDomain; + // Some single precision VFP instructions may be executed on both NEON and + // VFP pipelines on A8. + let D = VFPNeonA8Domain; } // Match reassociated forms only if not sign dependent rounding. @@ -271,9 +271,9 @@ def VCMPES : ASuI<0b11101, 0b11, 0b0100, 0b11, 0, (outs), (ins SPR:$Sd, SPR:$Sm), IIC_fpCMP32, "vcmpe", ".f32\t$Sd, $Sm", [(arm_cmpfp SPR:$Sd, SPR:$Sm)]> { - // Some single precision VFP instructions may be executed on both NEON and VFP - // pipelines. - let D = VFPNeonDomain; + // Some single precision VFP instructions may be executed on both NEON and + // VFP pipelines on A8. + let D = VFPNeonA8Domain; } // FIXME: Verify encoding after integrated assembler is working. @@ -286,9 +286,9 @@ def VCMPS : ASuI<0b11101, 0b11, 0b0100, 0b01, 0, (outs), (ins SPR:$Sd, SPR:$Sm), IIC_fpCMP32, "vcmp", ".f32\t$Sd, $Sm", [/* For disassembly only; pattern left blank */]> { - // Some single precision VFP instructions may be executed on both NEON and VFP - // pipelines. - let D = VFPNeonDomain; + // Some single precision VFP instructions may be executed on both NEON and + // VFP pipelines on A8. + let D = VFPNeonA8Domain; } } // Defs = [FPSCR] @@ -305,9 +305,9 @@ def VABSS : ASuIn<0b11101, 0b11, 0b0000, 0b11, 0, (outs SPR:$Sd), (ins SPR:$Sm), IIC_fpUNA32, "vabs", ".f32\t$Sd, $Sm", [(set SPR:$Sd, (fabs SPR:$Sm))]> { - // Some single precision VFP instructions may be executed on both NEON and VFP - // pipelines. - let D = VFPNeonDomain; + // Some single precision VFP instructions may be executed on both NEON and + // VFP pipelines on A8. + let D = VFPNeonA8Domain; } let Defs = [FPSCR] in { @@ -326,9 +326,9 @@ def VCMPEZS : ASuI<0b11101, 0b11, 0b0101, 0b11, 0, let Inst{3-0} = 0b0000; let Inst{5} = 0; - // Some single precision VFP instructions may be executed on both NEON and VFP - // pipelines. - let D = VFPNeonDomain; + // Some single precision VFP instructions may be executed on both NEON and + // VFP pipelines on A8. + let D = VFPNeonA8Domain; } // FIXME: Verify encoding after integrated assembler is working. @@ -347,9 +347,9 @@ def VCMPZS : ASuI<0b11101, 0b11, 0b0101, 0b01, 0, let Inst{3-0} = 0b0000; let Inst{5} = 0; - // Some single precision VFP instructions may be executed on both NEON and VFP - // pipelines. - let D = VFPNeonDomain; + // Some single precision VFP instructions may be executed on both NEON and + // VFP pipelines on A8. + let D = VFPNeonA8Domain; } } // Defs = [FPSCR] @@ -423,9 +423,9 @@ def VNEGS : ASuIn<0b11101, 0b11, 0b0001, 0b01, 0, (outs SPR:$Sd), (ins SPR:$Sm), IIC_fpUNA32, "vneg", ".f32\t$Sd, $Sm", [(set SPR:$Sd, (fneg SPR:$Sm))]> { - // Some single precision VFP instructions may be executed on both NEON and VFP - // pipelines. - let D = VFPNeonDomain; + // Some single precision VFP instructions may be executed on both NEON and + // VFP pipelines on A8. + let D = VFPNeonA8Domain; } def VSQRTD : ADuI<0b11101, 0b11, 0b0001, 0b11, 0, @@ -598,9 +598,9 @@ def VSITOS : AVConv1InSs_Encode<0b11101, 0b11, 0b1000, 0b1010, [(set SPR:$Sd, (arm_sitof SPR:$Sm))]> { let Inst{7} = 1; // s32 - // Some single precision VFP instructions may be executed on both NEON and VFP - // pipelines. - let D = VFPNeonDomain; + // Some single precision VFP instructions may be executed on both NEON and + // VFP pipelines on A8. + let D = VFPNeonA8Domain; } def VUITOD : AVConv1IDs_Encode<0b11101, 0b11, 0b1000, 0b1011, @@ -616,9 +616,9 @@ def VUITOS : AVConv1InSs_Encode<0b11101, 0b11, 0b1000, 0b1010, [(set SPR:$Sd, (arm_uitof SPR:$Sm))]> { let Inst{7} = 0; // u32 - // Some single precision VFP instructions may be executed on both NEON and VFP - // pipelines. - let D = VFPNeonDomain; + // Some single precision VFP instructions may be executed on both NEON and + // VFP pipelines on A8. + let D = VFPNeonA8Domain; } // FP -> Int: @@ -671,9 +671,9 @@ def VTOSIZS : AVConv1InsS_Encode<0b11101, 0b11, 0b1101, 0b1010, [(set SPR:$Sd, (arm_ftosi SPR:$Sm))]> { let Inst{7} = 1; // Z bit - // Some single precision VFP instructions may be executed on both NEON and VFP - // pipelines. - let D = VFPNeonDomain; + // Some single precision VFP instructions may be executed on both NEON and + // VFP pipelines on A8. + let D = VFPNeonA8Domain; } def VTOUIZD : AVConv1IsD_Encode<0b11101, 0b11, 0b1100, 0b1011, @@ -689,9 +689,9 @@ def VTOUIZS : AVConv1InsS_Encode<0b11101, 0b11, 0b1100, 0b1010, [(set SPR:$Sd, (arm_ftoui SPR:$Sm))]> { let Inst{7} = 1; // Z bit - // Some single precision VFP instructions may be executed on both NEON and VFP - // pipelines. - let D = VFPNeonDomain; + // Some single precision VFP instructions may be executed on both NEON and + // VFP pipelines on A8. + let D = VFPNeonA8Domain; } // And the Z bit '0' variants, i.e. use the rounding mode specified by FPSCR. @@ -743,36 +743,36 @@ def VTOSHS : AVConv1XI<0b11101, 0b11, 0b1110, 0b1010, 0, (outs SPR:$dst), (ins SPR:$a, i32imm:$fbits), IIC_fpCVTSI, "vcvt", ".s16.f32\t$dst, $a, $fbits", [/* For disassembly only; pattern left blank */]> { - // Some single precision VFP instructions may be executed on both NEON and VFP - // pipelines. - let D = VFPNeonDomain; + // Some single precision VFP instructions may be executed on both NEON and + // VFP pipelines on A8. + let D = VFPNeonA8Domain; } def VTOUHS : AVConv1XI<0b11101, 0b11, 0b1111, 0b1010, 0, (outs SPR:$dst), (ins SPR:$a, i32imm:$fbits), IIC_fpCVTSI, "vcvt", ".u16.f32\t$dst, $a, $fbits", [/* For disassembly only; pattern left blank */]> { - // Some single precision VFP instructions may be executed on both NEON and VFP - // pipelines. - let D = VFPNeonDomain; + // Some single precision VFP instructions may be executed on both NEON and + // VFP pipelines on A8. + let D = VFPNeonA8Domain; } def VTOSLS : AVConv1XI<0b11101, 0b11, 0b1110, 0b1010, 1, (outs SPR:$dst), (ins SPR:$a, i32imm:$fbits), IIC_fpCVTSI, "vcvt", ".s32.f32\t$dst, $a, $fbits", [/* For disassembly only; pattern left blank */]> { - // Some single precision VFP instructions may be executed on both NEON and VFP - // pipelines. - let D = VFPNeonDomain; + // Some single precision VFP instructions may be executed on both NEON and + // VFP pipelines on A8. + let D = VFPNeonA8Domain; } def VTOULS : AVConv1XI<0b11101, 0b11, 0b1111, 0b1010, 1, (outs SPR:$dst), (ins SPR:$a, i32imm:$fbits), IIC_fpCVTSI, "vcvt", ".u32.f32\t$dst, $a, $fbits", [/* For disassembly only; pattern left blank */]> { - // Some single precision VFP instructions may be executed on both NEON and VFP - // pipelines. - let D = VFPNeonDomain; + // Some single precision VFP instructions may be executed on both NEON and + // VFP pipelines on A8. + let D = VFPNeonA8Domain; } def VTOSHD : AVConv1XI<0b11101, 0b11, 0b1110, 0b1011, 0, @@ -801,36 +801,36 @@ def VSHTOS : AVConv1XI<0b11101, 0b11, 0b1010, 0b1010, 0, (outs SPR:$dst), (ins SPR:$a, i32imm:$fbits), IIC_fpCVTIS, "vcvt", ".f32.s16\t$dst, $a, $fbits", [/* For disassembly only; pattern left blank */]> { - // Some single precision VFP instructions may be executed on both NEON and VFP - // pipelines. - let D = VFPNeonDomain; + // Some single precision VFP instructions may be executed on both NEON and + // VFP pipelines on A8. + let D = VFPNeonA8Domain; } def VUHTOS : AVConv1XI<0b11101, 0b11, 0b1011, 0b1010, 0, (outs SPR:$dst), (ins SPR:$a, i32imm:$fbits), IIC_fpCVTIS, "vcvt", ".f32.u16\t$dst, $a, $fbits", [/* For disassembly only; pattern left blank */]> { - // Some single precision VFP instructions may be executed on both NEON and VFP - // pipelines. - let D = VFPNeonDomain; + // Some single precision VFP instructions may be executed on both NEON and + // VFP pipelines on A8. + let D = VFPNeonA8Domain; } def VSLTOS : AVConv1XI<0b11101, 0b11, 0b1010, 0b1010, 1, (outs SPR:$dst), (ins SPR:$a, i32imm:$fbits), IIC_fpCVTIS, "vcvt", ".f32.s32\t$dst, $a, $fbits", [/* For disassembly only; pattern left blank */]> { - // Some single precision VFP instructions may be executed on both NEON and VFP - // pipelines. - let D = VFPNeonDomain; + // Some single precision VFP instructions may be executed on both NEON and + // VFP pipelines on A8. + let D = VFPNeonA8Domain; } def VULTOS : AVConv1XI<0b11101, 0b11, 0b1011, 0b1010, 1, (outs SPR:$dst), (ins SPR:$a, i32imm:$fbits), IIC_fpCVTIS, "vcvt", ".f32.u32\t$dst, $a, $fbits", [/* For disassembly only; pattern left blank */]> { - // Some single precision VFP instructions may be executed on both NEON and VFP - // pipelines. - let D = VFPNeonDomain; + // Some single precision VFP instructions may be executed on both NEON and + // VFP pipelines on A8. + let D = VFPNeonA8Domain; } def VSHTOD : AVConv1XI<0b11101, 0b11, 0b1010, 0b1011, 0, @@ -874,9 +874,9 @@ def VMLAS : ASbIn<0b11100, 0b00, 0, 0, SPR:$Sdin))]>, RegConstraint<"$Sdin = $Sd">, Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx]> { - // Some single precision VFP instructions may be executed on both NEON and VFP - // pipelines. - let D = VFPNeonDomain; + // Some single precision VFP instructions may be executed on both NEON and + // VFP pipelines on A8. + let D = VFPNeonA8Domain; } def : Pat<(fadd_mlx DPR:$dstin, (fmul_su DPR:$a, (f64 DPR:$b))), @@ -901,9 +901,9 @@ def VMLSS : ASbIn<0b11100, 0b00, 1, 0, SPR:$Sdin))]>, RegConstraint<"$Sdin = $Sd">, Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx]> { - // Some single precision VFP instructions may be executed on both NEON and VFP - // pipelines. - let D = VFPNeonDomain; + // Some single precision VFP instructions may be executed on both NEON and + // VFP pipelines on A8. + let D = VFPNeonA8Domain; } def : Pat<(fsub_mlx DPR:$dstin, (fmul_su DPR:$a, (f64 DPR:$b))), @@ -928,9 +928,9 @@ def VNMLAS : ASbI<0b11100, 0b01, 1, 0, SPR:$Sdin))]>, RegConstraint<"$Sdin = $Sd">, Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx]> { - // Some single precision VFP instructions may be executed on both NEON and VFP - // pipelines. - let D = VFPNeonDomain; + // Some single precision VFP instructions may be executed on both NEON and + // VFP pipelines on A8. + let D = VFPNeonA8Domain; } def : Pat<(fsub_mlx (fneg (fmul_su DPR:$a, (f64 DPR:$b))), DPR:$dstin), @@ -954,9 +954,9 @@ def VNMLSS : ASbI<0b11100, 0b01, 0, 0, [(set SPR:$Sd, (fsub_mlx (fmul_su SPR:$Sn, SPR:$Sm), SPR:$Sdin))]>, RegConstraint<"$Sdin = $Sd">, Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx]> { - // Some single precision VFP instructions may be executed on both NEON and VFP - // pipelines. - let D = VFPNeonDomain; + // Some single precision VFP instructions may be executed on both NEON and + // VFP pipelines on A8. + let D = VFPNeonA8Domain; } def : Pat<(fsub_mlx (fmul_su DPR:$a, (f64 DPR:$b)), DPR:$dstin), @@ -995,9 +995,9 @@ def VNEGScc : ASuI<0b11101, 0b11, 0b0001, 0b01, 0, IIC_fpUNA32, "vneg", ".f32\t$Sd, $Sm", [/*(set SPR:$Sd, (ARMcneg SPR:$Sn, SPR:$Sm, imm:$cc))*/]>, RegConstraint<"$Sn = $Sd"> { - // Some single precision VFP instructions may be executed on both NEON and VFP - // pipelines. - let D = VFPNeonDomain; + // Some single precision VFP instructions may be executed on both NEON and + // VFP pipelines on A8. + let D = VFPNeonA8Domain; } } // neverHasSideEffects diff --git a/lib/Target/ARM/ARMSubtarget.cpp b/lib/Target/ARM/ARMSubtarget.cpp index 0bd740cfb28c..1465984899c6 100644 --- a/lib/Target/ARM/ARMSubtarget.cpp +++ b/lib/Target/ARM/ARMSubtarget.cpp @@ -171,7 +171,9 @@ ARMSubtarget::GVIsIndirectSymbol(const GlobalValue *GV, // Materializable GVs (in JIT lazy compilation mode) do not require an extra // load from stub. - bool isDecl = GV->isDeclaration() && !GV->isMaterializable(); + bool isDecl = GV->hasAvailableExternallyLinkage(); + if (GV->isDeclaration() && !GV->isMaterializable()) + isDecl = true; if (!isTargetDarwin()) { // Extra load is needed for all externally visible. diff --git a/lib/Target/ARM/MLxExpansionPass.cpp b/lib/Target/ARM/MLxExpansionPass.cpp index f9e86eb36e04..9a27e2f47064 100644 --- a/lib/Target/ARM/MLxExpansionPass.cpp +++ b/lib/Target/ARM/MLxExpansionPass.cpp @@ -132,22 +132,16 @@ unsigned MLxExpansion::getDefReg(MachineInstr *MI) const { } bool MLxExpansion::hasRAWHazard(unsigned Reg, MachineInstr *MI) const { - const TargetInstrDesc &TID = MI->getDesc(); // FIXME: Detect integer instructions properly. + const TargetInstrDesc &TID = MI->getDesc(); unsigned Domain = TID.TSFlags & ARMII::DomainMask; - if (Domain == ARMII::DomainVFP) { - unsigned Opcode = TID.getOpcode(); - if (Opcode == ARM::VSTRS || Opcode == ARM::VSTRD || - Opcode == ARM::VMOVRS || Opcode == ARM::VMOVRRD) - return false; - } else if (Domain == ARMII::DomainNEON) { - if (TID.mayStore() || TID.mayLoad()) - return false; - } else { + if (TID.mayStore()) return false; - } - - return MI->readsRegister(Reg, TRI); + unsigned Opcode = TID.getOpcode(); + if (Opcode == ARM::VMOVRS || Opcode == ARM::VMOVRRD) + return false; + if ((Domain & ARMII::DomainVFP) || (Domain & ARMII::DomainNEON)) + return MI->readsRegister(Reg, TRI); return false; } diff --git a/lib/Target/ARM/NEONMoveFix.cpp b/lib/Target/ARM/NEONMoveFix.cpp index 97e54bfaed9e..965665c2821a 100644 --- a/lib/Target/ARM/NEONMoveFix.cpp +++ b/lib/Target/ARM/NEONMoveFix.cpp @@ -35,6 +35,7 @@ namespace { private: const TargetRegisterInfo *TRI; const ARMBaseInstrInfo *TII; + bool isA8; typedef DenseMap RegMap; @@ -43,6 +44,11 @@ namespace { char NEONMoveFixPass::ID = 0; } +static bool inNEONDomain(unsigned Domain, bool isA8) { + return (Domain & ARMII::DomainNEON) || + (isA8 && (Domain & ARMII::DomainNEONA8)); +} + bool NEONMoveFixPass::InsertMoves(MachineBasicBlock &MBB) { RegMap Defs; bool Modified = false; @@ -70,7 +76,7 @@ bool NEONMoveFixPass::InsertMoves(MachineBasicBlock &MBB) { Domain = ARMII::DomainNEON; } - if (Domain & ARMII::DomainNEON) { + if (inNEONDomain(Domain, isA8)) { // Convert VMOVD to VMOVDneon unsigned DestReg = MI->getOperand(0).getReg(); @@ -123,6 +129,7 @@ bool NEONMoveFixPass::runOnMachineFunction(MachineFunction &Fn) { TRI = TM.getRegisterInfo(); TII = static_cast(TM.getInstrInfo()); + isA8 = TM.getSubtarget().isCortexA8(); bool Modified = false; for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E; diff --git a/lib/Target/ARM/Thumb2InstrInfo.cpp b/lib/Target/ARM/Thumb2InstrInfo.cpp index 2f67257f8fa1..9b1073be3c8e 100644 --- a/lib/Target/ARM/Thumb2InstrInfo.cpp +++ b/lib/Target/ARM/Thumb2InstrInfo.cpp @@ -95,6 +95,12 @@ Thumb2InstrInfo::ReplaceTailWithBranchTo(MachineBasicBlock::iterator Tail, bool Thumb2InstrInfo::isLegalToSplitMBBAt(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI) const { + while (MBBI->isDebugValue()) { + ++MBBI; + if (MBBI == MBB.end()) + return false; + } + unsigned PredReg = 0; return llvm::getITInstrPredicate(MBBI, PredReg) == ARMCC::AL; } diff --git a/lib/Target/Alpha/AlphaISelLowering.cpp b/lib/Target/Alpha/AlphaISelLowering.cpp index 9137d654edba..c4f43ab9e4e7 100644 --- a/lib/Target/Alpha/AlphaISelLowering.cpp +++ b/lib/Target/Alpha/AlphaISelLowering.cpp @@ -48,7 +48,6 @@ AlphaTargetLowering::AlphaTargetLowering(TargetMachine &TM) : TargetLowering(TM, new TargetLoweringObjectFileELF()) { // Set up the TargetLowering object. //I am having problems with shr n i8 1 - setShiftAmountType(MVT::i64); setBooleanContents(ZeroOrOneBooleanContent); addRegisterClass(MVT::i64, Alpha::GPRCRegisterClass); diff --git a/lib/Target/Alpha/AlphaISelLowering.h b/lib/Target/Alpha/AlphaISelLowering.h index b429e9fc1390..cb98f921dd68 100644 --- a/lib/Target/Alpha/AlphaISelLowering.h +++ b/lib/Target/Alpha/AlphaISelLowering.h @@ -31,25 +31,25 @@ namespace llvm { /// GPRelHi/GPRelLo - These represent the high and low 16-bit /// parts of a global address respectively. - GPRelHi, GPRelLo, + GPRelHi, GPRelLo, /// RetLit - Literal Relocation of a Global RelLit, /// GlobalRetAddr - used to restore the return address GlobalRetAddr, - + /// CALL - Normal call. CALL, /// DIVCALL - used for special library calls for div and rem DivCall, - + /// return flag operand RET_FLAG, /// CHAIN = COND_BRANCH CHAIN, OPC, (G|F)PRC, DESTBB [, INFLAG] - This - /// corresponds to the COND_BRANCH pseudo instruction. + /// corresponds to the COND_BRANCH pseudo instruction. /// *PRC is the input register to compare to zero, /// OPC is the branch opcode to use (e.g. Alpha::BEQ), /// DESTBB is the destination block to branch to, and INFLAG is @@ -62,7 +62,9 @@ namespace llvm { class AlphaTargetLowering : public TargetLowering { public: explicit AlphaTargetLowering(TargetMachine &TM); - + + virtual MVT getShiftAmountTy(EVT LHSTy) const { return MVT::i64; } + /// getSetCCResultType - Get the SETCC result ValueType virtual MVT::SimpleValueType getSetCCResultType(EVT VT) const; @@ -92,7 +94,7 @@ namespace llvm { ConstraintWeight getSingleConstraintMatchWeight( AsmOperandInfo &info, const char *constraint) const; - std::vector + std::vector getRegClassForInlineAsmConstraint(const std::string &Constraint, EVT VT) const; diff --git a/lib/Target/Blackfin/BlackfinISelLowering.cpp b/lib/Target/Blackfin/BlackfinISelLowering.cpp index dd27d0a0ff36..7c80eec3ba63 100644 --- a/lib/Target/Blackfin/BlackfinISelLowering.cpp +++ b/lib/Target/Blackfin/BlackfinISelLowering.cpp @@ -41,7 +41,6 @@ using namespace llvm; BlackfinTargetLowering::BlackfinTargetLowering(TargetMachine &TM) : TargetLowering(TM, new TargetLoweringObjectFileELF()) { - setShiftAmountType(MVT::i16); setBooleanContents(ZeroOrOneBooleanContent); setStackPointerRegisterToSaveRestore(BF::SP); setIntDivIsCheap(false); diff --git a/lib/Target/Blackfin/BlackfinISelLowering.h b/lib/Target/Blackfin/BlackfinISelLowering.h index 15a745fa8724..102c830688e2 100644 --- a/lib/Target/Blackfin/BlackfinISelLowering.h +++ b/lib/Target/Blackfin/BlackfinISelLowering.h @@ -32,6 +32,7 @@ namespace llvm { class BlackfinTargetLowering : public TargetLowering { public: BlackfinTargetLowering(TargetMachine &TM); + virtual MVT getShiftAmountTy(EVT LHSTy) const { return MVT::i16; } virtual MVT::SimpleValueType getSetCCResultType(EVT VT) const; virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const; virtual void ReplaceNodeResults(SDNode *N, diff --git a/lib/Target/CellSPU/SPUISelLowering.cpp b/lib/Target/CellSPU/SPUISelLowering.cpp index e6511d008c2b..743a4d7a0f78 100644 --- a/lib/Target/CellSPU/SPUISelLowering.cpp +++ b/lib/Target/CellSPU/SPUISelLowering.cpp @@ -435,7 +435,6 @@ SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM) setOperationAction(ISD::FDIV, MVT::v4f32, Legal); - setShiftAmountType(MVT::i32); setBooleanContents(ZeroOrNegativeOneBooleanContent); setStackPointerRegisterToSaveRestore(SPU::R1); @@ -1219,7 +1218,7 @@ SPUTargetLowering::LowerFormalArguments(SDValue Chain, FuncInfo->setVarArgsFrameIndex( MFI->CreateFixedObject(StackSlotSize, ArgOffset, true)); SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT); - unsigned VReg = MF.addLiveIn(ArgRegs[ArgRegIdx], &SPU::R32CRegClass, dl); + unsigned VReg = MF.addLiveIn(ArgRegs[ArgRegIdx], &SPU::R32CRegClass); SDValue ArgVal = DAG.getRegister(VReg, MVT::v16i8); SDValue Store = DAG.getStore(Chain, dl, ArgVal, FIN, MachinePointerInfo(), false, false, 0); @@ -2190,7 +2189,7 @@ static SDValue LowerI8Math(SDValue Op, SelectionDAG &DAG, unsigned Opc, { SDValue N0 = Op.getOperand(0); // Everything has at least one operand DebugLoc dl = Op.getDebugLoc(); - EVT ShiftVT = TLI.getShiftAmountTy(); + EVT ShiftVT = TLI.getShiftAmountTy(N0.getValueType()); assert(Op.getValueType() == MVT::i8); switch (Opc) { @@ -3112,7 +3111,7 @@ SPUTargetLowering::getSingleConstraintMatchWeight( switch (*constraint) { default: weight = TargetLowering::getSingleConstraintMatchWeight(info, constraint); - break; + break; //FIXME: Seems like the supported constraint letters were just copied // from PPC, as the following doesn't correspond to the GCC docs. // I'm leaving it so until someone adds the corresponding lowering support. diff --git a/lib/Target/CellSPU/SPUISelLowering.h b/lib/Target/CellSPU/SPUISelLowering.h index 95d44afe37c8..dd48d7bafaef 100644 --- a/lib/Target/CellSPU/SPUISelLowering.h +++ b/lib/Target/CellSPU/SPUISelLowering.h @@ -109,6 +109,8 @@ namespace llvm { /// getSetCCResultType - Return the ValueType for ISD::SETCC virtual MVT::SimpleValueType getSetCCResultType(EVT VT) const; + virtual MVT getShiftAmountTy(EVT LHSTy) const { return MVT::i32; } + //! Custom lowering hooks virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const; @@ -179,9 +181,9 @@ namespace llvm { virtual bool isLegalICmpImmediate(int64_t Imm) const; - virtual bool isLegalAddressingMode(const AddrMode &AM, + virtual bool isLegalAddressingMode(const AddrMode &AM, const Type *Ty) const; - + /// After allocating this many registers, the allocator should feel /// register pressure. The value is a somewhat random guess, based on the /// number of non callee saved registers in the C calling convention. diff --git a/lib/Target/MBlaze/MBlazeISelLowering.cpp b/lib/Target/MBlaze/MBlazeISelLowering.cpp index 2f40bfc89601..f39826b1cf17 100644 --- a/lib/Target/MBlaze/MBlazeISelLowering.cpp +++ b/lib/Target/MBlaze/MBlazeISelLowering.cpp @@ -907,7 +907,7 @@ LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, // Transform the arguments stored on // physical registers into virtual ones - unsigned Reg = MF.addLiveIn(ArgRegEnd, RC, dl); + unsigned Reg = MF.addLiveIn(ArgRegEnd, RC); SDValue ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, RegVT); // If this is an 8 or 16-bit value, it has been passed promoted @@ -973,7 +973,7 @@ LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, for (; Start <= End; ++Start, ++StackLoc) { unsigned Reg = MBlazeRegisterInfo::getRegisterFromNumbering(Start); - unsigned LiveReg = MF.addLiveIn(Reg, RC, dl); + unsigned LiveReg = MF.addLiveIn(Reg, RC); SDValue ArgValue = DAG.getCopyFromReg(Chain, dl, LiveReg, MVT::i32); int FI = MFI->CreateFixedObject(4, 0, true); diff --git a/lib/Target/MSP430/MSP430ISelLowering.cpp b/lib/Target/MSP430/MSP430ISelLowering.cpp index 30ef4f5da08e..a95d59c0576c 100644 --- a/lib/Target/MSP430/MSP430ISelLowering.cpp +++ b/lib/Target/MSP430/MSP430ISelLowering.cpp @@ -77,10 +77,6 @@ MSP430TargetLowering::MSP430TargetLowering(MSP430TargetMachine &tm) : // Division is expensive setIntDivIsCheap(false); - // Even if we have only 1 bit shift here, we can perform - // shifts of the whole bitwidth 1 bit per step. - setShiftAmountType(MVT::i8); - setStackPointerRegisterToSaveRestore(MSP430::SPW); setBooleanContents(ZeroOrOneBooleanContent); setSchedulingPreference(Sched::Latency); @@ -330,7 +326,7 @@ MSP430TargetLowering::LowerCCCArguments(SDValue Chain, // Arguments passed in registers EVT RegVT = VA.getLocVT(); switch (RegVT.getSimpleVT().SimpleTy) { - default: + default: { #ifndef NDEBUG errs() << "LowerFormalArguments Unhandled argument type: " diff --git a/lib/Target/MSP430/MSP430ISelLowering.h b/lib/Target/MSP430/MSP430ISelLowering.h index 673c5433b96e..19c9eac589f0 100644 --- a/lib/Target/MSP430/MSP430ISelLowering.h +++ b/lib/Target/MSP430/MSP430ISelLowering.h @@ -73,6 +73,8 @@ namespace llvm { public: explicit MSP430TargetLowering(MSP430TargetMachine &TM); + virtual MVT getShiftAmountTy(EVT LHSTy) const { return MVT::i8; } + /// LowerOperation - Provide custom lowering hooks for some operations. virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const; diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp index 8f623b859b55..70d00e4b5cc5 100644 --- a/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/lib/Target/PowerPC/PPCISelLowering.cpp @@ -362,7 +362,6 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM) setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Custom); } - setShiftAmountType(MVT::i32); setBooleanContents(ZeroOrOneBooleanContent); if (TM.getSubtarget().isPPC64()) { @@ -1597,7 +1596,7 @@ PPCTargetLowering::LowerFormalArguments_SVR4( } // Transform the arguments stored in physical registers into virtual ones. - unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC, dl); + unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC); SDValue ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, ValVT); InVals.push_back(ArgValue); @@ -1689,7 +1688,7 @@ PPCTargetLowering::LowerFormalArguments_SVR4( // Get an existing live-in vreg, or add a new one. unsigned VReg = MF.getRegInfo().getLiveInVirtReg(GPArgRegs[GPRIndex]); if (!VReg) - VReg = MF.addLiveIn(GPArgRegs[GPRIndex], &PPC::GPRCRegClass, dl); + VReg = MF.addLiveIn(GPArgRegs[GPRIndex], &PPC::GPRCRegClass); SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT); SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, @@ -1708,7 +1707,7 @@ PPCTargetLowering::LowerFormalArguments_SVR4( // Get an existing live-in vreg, or add a new one. unsigned VReg = MF.getRegInfo().getLiveInVirtReg(FPArgRegs[FPRIndex]); if (!VReg) - VReg = MF.addLiveIn(FPArgRegs[FPRIndex], &PPC::F8RCRegClass, dl); + VReg = MF.addLiveIn(FPArgRegs[FPRIndex], &PPC::F8RCRegClass); SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::f64); SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, @@ -1872,7 +1871,7 @@ PPCTargetLowering::LowerFormalArguments_Darwin( InVals.push_back(FIN); if (ObjSize==1 || ObjSize==2) { if (GPR_idx != Num_GPR_Regs) { - unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass, dl); + unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass); SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT); SDValue Store = DAG.getTruncStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo(), @@ -1891,7 +1890,7 @@ PPCTargetLowering::LowerFormalArguments_Darwin( // to memory. ArgVal will be address of the beginning of // the object. if (GPR_idx != Num_GPR_Regs) { - unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass, dl); + unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass); int FI = MFI->CreateFixedObject(PtrByteSize, ArgOffset, true); SDValue FIN = DAG.getFrameIndex(FI, PtrVT); SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT); @@ -1914,7 +1913,7 @@ PPCTargetLowering::LowerFormalArguments_Darwin( case MVT::i32: if (!isPPC64) { if (GPR_idx != Num_GPR_Regs) { - unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass, dl); + unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass); ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i32); ++GPR_idx; } else { @@ -1928,7 +1927,7 @@ PPCTargetLowering::LowerFormalArguments_Darwin( // FALLTHROUGH case MVT::i64: // PPC64 if (GPR_idx != Num_GPR_Regs) { - unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass, dl); + unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass); ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64); if (ObjectVT == MVT::i32) { @@ -1966,9 +1965,9 @@ PPCTargetLowering::LowerFormalArguments_Darwin( unsigned VReg; if (ObjectVT == MVT::f32) - VReg = MF.addLiveIn(FPR[FPR_idx], &PPC::F4RCRegClass, dl); + VReg = MF.addLiveIn(FPR[FPR_idx], &PPC::F4RCRegClass); else - VReg = MF.addLiveIn(FPR[FPR_idx], &PPC::F8RCRegClass, dl); + VReg = MF.addLiveIn(FPR[FPR_idx], &PPC::F8RCRegClass); ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT); ++FPR_idx; @@ -1986,7 +1985,7 @@ PPCTargetLowering::LowerFormalArguments_Darwin( // Note that vector arguments in registers don't reserve stack space, // except in varargs functions. if (VR_idx != Num_VR_Regs) { - unsigned VReg = MF.addLiveIn(VR[VR_idx], &PPC::VRRCRegClass, dl); + unsigned VReg = MF.addLiveIn(VR[VR_idx], &PPC::VRRCRegClass); ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT); if (isVarArg) { while ((ArgOffset % 16) != 0) { @@ -2064,9 +2063,9 @@ PPCTargetLowering::LowerFormalArguments_Darwin( unsigned VReg; if (isPPC64) - VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass, dl); + VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass); else - VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass, dl); + VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass); SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT); SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, diff --git a/lib/Target/PowerPC/PPCISelLowering.h b/lib/Target/PowerPC/PPCISelLowering.h index 80cab75b960a..33daae9b5445 100644 --- a/lib/Target/PowerPC/PPCISelLowering.h +++ b/lib/Target/PowerPC/PPCISelLowering.h @@ -29,36 +29,36 @@ namespace llvm { /// FSEL - Traditional three-operand fsel node. /// FSEL, - + /// FCFID - The FCFID instruction, taking an f64 operand and producing /// and f64 value containing the FP representation of the integer that /// was temporarily in the f64 operand. FCFID, - - /// FCTI[D,W]Z - The FCTIDZ and FCTIWZ instructions, taking an f32 or f64 + + /// FCTI[D,W]Z - The FCTIDZ and FCTIWZ instructions, taking an f32 or f64 /// operand, producing an f64 value containing the integer representation /// of that FP value. FCTIDZ, FCTIWZ, - + /// STFIWX - The STFIWX instruction. The first operand is an input token /// chain, then an f64 value to store, then an address to store it to. STFIWX, - + // VMADDFP, VNMSUBFP - The VMADDFP and VNMSUBFP instructions, taking // three v4f32 operands and producing a v4f32 result. VMADDFP, VNMSUBFP, - + /// VPERM - The PPC VPERM Instruction. /// VPERM, - + /// Hi/Lo - These represent the high and low 16-bit parts of a global /// address respectively. These nodes have two operands, the first of /// which must be a TargetGlobalAddress, and the second of which must be a /// Constant. Selected naively, these turn into 'lis G+C' and 'li G+C', /// though these are usually folded into other nodes. Hi, Lo, - + TOC_ENTRY, /// The following three target-specific nodes are used for calls through @@ -80,37 +80,37 @@ namespace llvm { /// This instruction is lowered in PPCRegisterInfo::eliminateFrameIndex to /// compute an allocation on the stack. DYNALLOC, - + /// GlobalBaseReg - On Darwin, this node represents the result of the mflr /// at function entry, used for PIC code. GlobalBaseReg, - + /// These nodes represent the 32-bit PPC shifts that operate on 6-bit /// shift amounts. These nodes are generated by the multi-precision shift /// code. SRL, SRA, SHL, - + /// EXTSW_32 - This is the EXTSW instruction for use with "32-bit" /// registers. EXTSW_32, /// CALL - A direct function call. CALL_Darwin, CALL_SVR4, - + /// NOP - Special NOP which follows 64-bit SVR4 calls. NOP, /// CHAIN,FLAG = MTCTR(VAL, CHAIN[, INFLAG]) - Directly corresponds to a /// MTCTR instruction. MTCTR, - + /// CHAIN,FLAG = BCTRL(CHAIN, INFLAG) - Directly corresponds to a /// BCTRL instruction. BCTRL_Darwin, BCTRL_SVR4, - + /// Return with a flag operand, matched by 'blr' RET_FLAG, - + /// R32 = MFCR(CRREG, INFLAG) - Represents the MFCRpseud/MFOCRF /// instructions. This copies the bits corresponding to the specified /// CRREG into the resultant GPR. Bits corresponding to other CR regs @@ -122,20 +122,20 @@ namespace llvm { /// encoding for the OPC field to identify the compare. For example, 838 /// is VCMPGTSH. VCMP, - + /// RESVEC, OUTFLAG = VCMPo(LHS, RHS, OPC) - Represents one of the - /// altivec VCMP*o instructions. For lack of better number, we use the + /// altivec VCMP*o instructions. For lack of better number, we use the /// opcode number encoding for the OPC field to identify the compare. For /// example, 838 is VCMPGTSH. VCMPo, - + /// CHAIN = COND_BRANCH CHAIN, CRRC, OPC, DESTBB [, INFLAG] - This /// corresponds to the COND_BRANCH pseudo instruction. CRRC is the /// condition register to branch on, OPC is the branch opcode to use (e.g. /// PPC::BLE), DESTBB is the destination block to branch to, and INFLAG is /// an optional input flag argument. COND_BRANCH, - + // The following 5 instructions are used only as part of the // long double-to-int conversion sequence. @@ -150,7 +150,7 @@ namespace llvm { MTFSB1, /// F8RC, OUTFLAG = FADDRTZ F8RC, F8RC, INFLAG - This is an FADD done with - /// rounding towards zero. It has flags added so it won't move past the + /// rounding towards zero. It has flags added so it won't move past the /// FPSCR-setting instructions. FADDRTZ, @@ -174,14 +174,14 @@ namespace llvm { /// STD_32 - This is the STD instruction for use with "32-bit" registers. STD_32 = ISD::FIRST_TARGET_MEMORY_OPCODE, - - /// CHAIN = STBRX CHAIN, GPRC, Ptr, Type - This is a + + /// CHAIN = STBRX CHAIN, GPRC, Ptr, Type - This is a /// byte-swapping store instruction. It byte-swaps the low "Type" bits of /// the GPRC input, then stores it through Ptr. Type can be either i16 or /// i32. - STBRX, - - /// GPRC, CHAIN = LBRX CHAIN, Ptr, Type - This is a + STBRX, + + /// GPRC, CHAIN = LBRX CHAIN, Ptr, Type - This is a /// byte-swapping load instruction. It loads "Type" bits, byte swaps it, /// then puts it in the bottom bits of the GPRC. TYPE can be either i16 /// or i32. @@ -194,7 +194,7 @@ namespace llvm { /// isVPKUHUMShuffleMask - Return true if this is the shuffle mask for a /// VPKUHUM instruction. bool isVPKUHUMShuffleMask(ShuffleVectorSDNode *N, bool isUnary); - + /// isVPKUWUMShuffleMask - Return true if this is the shuffle mask for a /// VPKUWUM instruction. bool isVPKUWUMShuffleMask(ShuffleVectorSDNode *N, bool isUnary); @@ -208,16 +208,16 @@ namespace llvm { /// a VRGH* instruction with the specified unit size (1,2 or 4 bytes). bool isVMRGHShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize, bool isUnary); - + /// isVSLDOIShuffleMask - If this is a vsldoi shuffle mask, return the shift /// amount, otherwise return -1. int isVSLDOIShuffleMask(SDNode *N, bool isUnary); - + /// isSplatShuffleMask - Return true if the specified VECTOR_SHUFFLE operand /// specifies a splat of a single element that is suitable for input to /// VSPLTB/VSPLTH/VSPLTW. bool isSplatShuffleMask(ShuffleVectorSDNode *N, unsigned EltSize); - + /// isAllNegativeZeroVector - Returns true if all elements of build_vector /// are -0.0. bool isAllNegativeZeroVector(SDNode *N); @@ -225,24 +225,26 @@ namespace llvm { /// getVSPLTImmediate - Return the appropriate VSPLT* immediate to splat the /// specified isSplatShuffleMask VECTOR_SHUFFLE mask. unsigned getVSPLTImmediate(SDNode *N, unsigned EltSize); - + /// get_VSPLTI_elt - If this is a build_vector of constants which can be /// formed by using a vspltis[bhw] instruction of the specified element /// size, return the constant being splatted. The ByteSize field indicates /// the number of bytes of each element [124] -> [bhw]. SDValue get_VSPLTI_elt(SDNode *N, unsigned ByteSize, SelectionDAG &DAG); } - + class PPCTargetLowering : public TargetLowering { const PPCSubtarget &PPCSubTarget; public: explicit PPCTargetLowering(PPCTargetMachine &TM); - + /// getTargetNodeName() - This method returns the name of a target specific /// DAG node. virtual const char *getTargetNodeName(unsigned Opcode) const; + virtual MVT getShiftAmountTy(EVT LHSTy) const { return MVT::i32; } + /// getSetCCResultType - Return the ISD::SETCC ValueType virtual MVT::SimpleValueType getSetCCResultType(EVT VT) const; @@ -253,19 +255,19 @@ namespace llvm { SDValue &Offset, ISD::MemIndexedMode &AM, SelectionDAG &DAG) const; - + /// SelectAddressRegReg - Given the specified addressed, check to see if it /// can be represented as an indexed [r+r] operation. Returns false if it /// can be more efficiently represented with [r+imm]. bool SelectAddressRegReg(SDValue N, SDValue &Base, SDValue &Index, SelectionDAG &DAG) const; - + /// SelectAddressRegImm - Returns true if the address N can be represented /// by a base register plus a signed 16-bit displacement [r+imm], and if it /// is not better represented as reg+reg. bool SelectAddressRegImm(SDValue N, SDValue &Disp, SDValue &Base, SelectionDAG &DAG) const; - + /// SelectAddressRegRegOnly - Given the specified addressed, force it to be /// represented as an indexed [r+r] operation. bool SelectAddressRegRegOnly(SDValue N, SDValue &Base, SDValue &Index, @@ -277,7 +279,7 @@ namespace llvm { bool SelectAddressRegImmShift(SDValue N, SDValue &Disp, SDValue &Base, SelectionDAG &DAG) const; - + /// LowerOperation - Provide custom lowering hooks for some operations. /// virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const; @@ -289,10 +291,10 @@ namespace llvm { SelectionDAG &DAG) const; virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const; - + virtual void computeMaskedBitsForTargetNode(const SDValue Op, const APInt &Mask, - APInt &KnownZero, + APInt &KnownZero, APInt &KnownOne, const SelectionDAG &DAG, unsigned Depth = 0) const; @@ -300,13 +302,13 @@ namespace llvm { virtual MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr *MI, MachineBasicBlock *MBB) const; - MachineBasicBlock *EmitAtomicBinary(MachineInstr *MI, + MachineBasicBlock *EmitAtomicBinary(MachineInstr *MI, MachineBasicBlock *MBB, bool is64Bit, unsigned BinOpcode) const; - MachineBasicBlock *EmitPartwordAtomicBinary(MachineInstr *MI, - MachineBasicBlock *MBB, + MachineBasicBlock *EmitPartwordAtomicBinary(MachineInstr *MI, + MachineBasicBlock *MBB, bool is8bit, unsigned Opcode) const; - + ConstraintType getConstraintType(const std::string &Constraint) const; /// Examine constraint string and operand type and determine a weight value. @@ -314,7 +316,7 @@ namespace llvm { ConstraintWeight getSingleConstraintMatchWeight( AsmOperandInfo &info, const char *constraint) const; - std::pair + std::pair getRegForInlineAsmConstraint(const std::string &Constraint, EVT VT) const; @@ -329,11 +331,11 @@ namespace llvm { char ConstraintLetter, std::vector &Ops, SelectionDAG &DAG) const; - + /// isLegalAddressingMode - Return true if the addressing mode represented /// by AM is legal for this target, for a load/store of the specified type. virtual bool isLegalAddressingMode(const AddrMode &AM, const Type *Ty)const; - + /// isLegalAddressImmediate - Return true if the integer value can be used /// as the offset of the target addressing mode for load / store of the /// given type. @@ -344,7 +346,7 @@ namespace llvm { virtual bool isLegalAddressImmediate(GlobalValue *GV) const; virtual bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const; - + /// getOptimalMemOpType - Returns the target specific optimal type for load /// and store operations as a result of memset, memcpy, and memmove /// lowering. If DstAlign is zero that means it's safe to destination diff --git a/lib/Target/README.txt b/lib/Target/README.txt index 4e14fbbb09ba..f85914b61d9d 100644 --- a/lib/Target/README.txt +++ b/lib/Target/README.txt @@ -254,6 +254,20 @@ unsigned long reverse(unsigned v) { //===---------------------------------------------------------------------===// +[LOOP DELETION] + +We don't delete this output free loop, because trip count analysis doesn't +realize that it is finite (if it were infinite, it would be undefined). Not +having this blocks Loop Idiom from matching strlen and friends. + +void foo(char *C) { + int x = 0; + while (*C) + ++x,++C; +} + +//===---------------------------------------------------------------------===// + [LOOP RECOGNITION] These idioms should be recognized as popcount (see PR1488): @@ -287,6 +301,16 @@ unsigned int popcount(unsigned int input) { return count; } +This should be recognized as CLZ: rdar://8459039 + +unsigned clz_a(unsigned a) { + int i; + for (i=0;i<32;i++) + if (a & (1<<(31-i))) + return i; + return 32; +} + This sort of thing should be added to the loop idiom pass. //===---------------------------------------------------------------------===// diff --git a/lib/Target/Sparc/DelaySlotFiller.cpp b/lib/Target/Sparc/DelaySlotFiller.cpp index ee292758d186..4b12852ef873 100644 --- a/lib/Target/Sparc/DelaySlotFiller.cpp +++ b/lib/Target/Sparc/DelaySlotFiller.cpp @@ -79,6 +79,7 @@ namespace { MachineBasicBlock::iterator findDelayInstr(MachineBasicBlock &MBB, MachineBasicBlock::iterator slot); + bool needsUnimp(MachineBasicBlock::iterator I, unsigned &StructSize); }; char Filler::ID = 0; @@ -91,6 +92,7 @@ FunctionPass *llvm::createSparcDelaySlotFillerPass(TargetMachine &tm) { return new Filler(tm); } + /// runOnMachineBasicBlock - Fill in delay slots for the given basic block. /// We assume there is only one delay slot per delayed instruction. /// @@ -112,6 +114,13 @@ bool Filler::runOnMachineBasicBlock(MachineBasicBlock &MBB) { BuildMI(MBB, ++J, I->getDebugLoc(), TII->get(SP::NOP)); else MBB.splice(++J, &MBB, D); + unsigned structSize = 0; + if (needsUnimp(I, structSize)) { + MachineBasicBlock::iterator J = I; + ++J; //skip the delay filler. + BuildMI(MBB, ++J, I->getDebugLoc(), + TII->get(SP::UNIMP)).addImm(structSize); + } } return Changed; } @@ -287,6 +296,28 @@ bool Filler::isDelayFiller(MachineBasicBlock &MBB, { if (candidate == MBB.begin()) return false; + if (candidate->getOpcode() == SP::UNIMP) + return true; const TargetInstrDesc &prevdesc = (--candidate)->getDesc(); return prevdesc.hasDelaySlot(); } + +bool Filler::needsUnimp(MachineBasicBlock::iterator I, unsigned &StructSize) +{ + if (!I->getDesc().isCall()) + return false; + + unsigned structSizeOpNum = 0; + switch (I->getOpcode()) { + default: llvm_unreachable("Unknown call opcode."); + case SP::CALL: structSizeOpNum = 1; break; + case SP::JMPLrr: + case SP::JMPLri: structSizeOpNum = 2; break; + } + + const MachineOperand &MO = I->getOperand(structSizeOpNum); + if (!MO.isImm()) + return false; + StructSize = MO.getImm(); + return true; +} diff --git a/lib/Target/Sparc/SparcISelLowering.cpp b/lib/Target/Sparc/SparcISelLowering.cpp index 196b87dd58d0..70574c370f35 100644 --- a/lib/Target/Sparc/SparcISelLowering.cpp +++ b/lib/Target/Sparc/SparcISelLowering.cpp @@ -16,7 +16,9 @@ #include "SparcISelLowering.h" #include "SparcTargetMachine.h" #include "SparcMachineFunctionInfo.h" +#include "llvm/DerivedTypes.h" #include "llvm/Function.h" +#include "llvm/Module.h" #include "llvm/CodeGen/CallingConvLower.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" @@ -116,6 +118,8 @@ SparcTargetLowering::LowerReturn(SDValue Chain, // Guarantee that all emitted copies are stuck together with flags. Flag = Chain.getValue(1); } + + unsigned RetAddrOffset = 8; //Call Inst + Delay Slot // If the function returns a struct, copy the SRetReturnReg to I0 if (MF.getFunction()->hasStructRetAttr()) { SparcMachineFunctionInfo *SFI = MF.getInfo(); @@ -127,11 +131,16 @@ SparcTargetLowering::LowerReturn(SDValue Chain, Flag = Chain.getValue(1); if (MF.getRegInfo().liveout_empty()) MF.getRegInfo().addLiveOut(SP::I0); + RetAddrOffset = 12; // CallInst + Delay Slot + Unimp } + SDValue RetAddrOffsetNode = DAG.getConstant(RetAddrOffset, MVT::i32); + if (Flag.getNode()) - return DAG.getNode(SPISD::RET_FLAG, dl, MVT::Other, Chain, Flag); - return DAG.getNode(SPISD::RET_FLAG, dl, MVT::Other, Chain); + return DAG.getNode(SPISD::RET_FLAG, dl, MVT::Other, Chain, + RetAddrOffsetNode, Flag); + return DAG.getNode(SPISD::RET_FLAG, dl, MVT::Other, Chain, + RetAddrOffsetNode); } /// LowerFormalArguments - V8 uses a very simple ABI, where all values are @@ -194,7 +203,7 @@ SparcTargetLowering::LowerFormalArguments(SDValue Chain, false, false, 0); } else { unsigned loReg = MF.addLiveIn(NextVA.getLocReg(), - &SP::IntRegsRegClass, dl); + &SP::IntRegsRegClass); LoVal = DAG.getCopyFromReg(Chain, dl, loReg, MVT::i32); } SDValue WholeValue = @@ -393,6 +402,7 @@ SparcTargetLowering::LowerCall(SDValue Chain, SDValue Callee, SmallVector MemOpChains; const unsigned StackOffset = 92; + bool hasStructRetAttr = false; // Walk the register/memloc assignments, inserting copies/loads. for (unsigned i = 0, realArgIdx = 0, byvalArgIdx = 0, e = ArgLocs.size(); i != e; @@ -433,6 +443,7 @@ SparcTargetLowering::LowerCall(SDValue Chain, SDValue Callee, MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo(), false, false, 0)); + hasStructRetAttr = true; continue; } @@ -546,6 +557,8 @@ SparcTargetLowering::LowerCall(SDValue Chain, SDValue Callee, InFlag = Chain.getValue(1); } + unsigned SRetArgSize = (hasStructRetAttr)? getSRetArgSize(DAG, Callee):0; + // If the callee is a GlobalAddress node (quite common, every direct call is) // turn it into a TargetGlobalAddress node so that legalize doesn't hack it. // Likewise ExternalSymbol -> TargetExternalSymbol. @@ -559,6 +572,8 @@ SparcTargetLowering::LowerCall(SDValue Chain, SDValue Callee, SmallVector Ops; Ops.push_back(Chain); Ops.push_back(Callee); + if (hasStructRetAttr) + Ops.push_back(DAG.getTargetConstant(SRetArgSize, MVT::i32)); for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { unsigned Reg = RegsToPass[i].first; if (Reg >= SP::I0 && Reg <= SP::I7) @@ -600,7 +615,29 @@ SparcTargetLowering::LowerCall(SDValue Chain, SDValue Callee, return Chain; } +unsigned +SparcTargetLowering::getSRetArgSize(SelectionDAG &DAG, SDValue Callee) const +{ + const Function *CalleeFn = 0; + if (GlobalAddressSDNode *G = dyn_cast(Callee)) { + CalleeFn = dyn_cast(G->getGlobal()); + } else if (ExternalSymbolSDNode *E = + dyn_cast(Callee)) { + const Function *Fn = DAG.getMachineFunction().getFunction(); + const Module *M = Fn->getParent(); + CalleeFn = M->getFunction(E->getSymbol()); + } + + if (!CalleeFn) + return 0; + assert(CalleeFn->hasStructRetAttr() && + "Callee does not have the StructRet attribute."); + + const PointerType *Ty = cast(CalleeFn->arg_begin()->getType()); + const Type *ElementTy = Ty->getElementType(); + return getTargetData()->getTypeAllocSize(ElementTy); +} //===----------------------------------------------------------------------===// // TargetLowering Implementation diff --git a/lib/Target/Sparc/SparcISelLowering.h b/lib/Target/Sparc/SparcISelLowering.h index 849e4010af6b..7d02df8adcca 100644 --- a/lib/Target/Sparc/SparcISelLowering.h +++ b/lib/Target/Sparc/SparcISelLowering.h @@ -101,6 +101,8 @@ namespace llvm { SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const; SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const; + + unsigned getSRetArgSize(SelectionDAG &DAG, SDValue Callee) const; }; } // end namespace llvm diff --git a/lib/Target/Sparc/SparcInstrInfo.td b/lib/Target/Sparc/SparcInstrInfo.td index 107232357b3b..cf5c48fd18d9 100644 --- a/lib/Target/Sparc/SparcInstrInfo.td +++ b/lib/Target/Sparc/SparcInstrInfo.td @@ -124,7 +124,8 @@ def call : SDNode<"SPISD::CALL", SDT_SPCall, [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue, SDNPVariadic]>; -def retflag : SDNode<"SPISD::RET_FLAG", SDTNone, +def SDT_SPRet : SDTypeProfile<0, 1, [SDTCisVT<0, i32>]>; +def retflag : SDNode<"SPISD::RET_FLAG", SDT_SPRet, [SDNPHasChain, SDNPOptInGlue]>; def flushw : SDNode<"SPISD::FLUSHW", SDTNone, @@ -132,7 +133,7 @@ def flushw : SDNode<"SPISD::FLUSHW", SDTNone, def getPCX : Operand { let PrintMethod = "printGetPCX"; -} +} //===----------------------------------------------------------------------===// // SPARC Flag Conditions @@ -232,6 +233,9 @@ let hasSideEffects = 1, mayStore = 1 in { [(flushw)]>; } +def UNIMP : F2_1<0b000, (outs), (ins i32imm:$val), + "unimp $val", []>; + // FpMOVD/FpNEGD/FpABSD - These are lowered to single-precision ops by the // fpmover pass. let Predicates = [HasNoV9] in { // Only emit these in V8 mode. @@ -292,11 +296,13 @@ let usesCustomInserter = 1, Uses = [FCC] in { // Section A.3 - Synthetic Instructions, p. 85 // special cases of JMPL: let isReturn = 1, isTerminator = 1, hasDelaySlot = 1, isBarrier = 1 in { - let rd = O7.Num, rs1 = G0.Num, simm13 = 8 in - def RETL: F3_2<2, 0b111000, (outs), (ins), "retl", [(retflag)]>; + let rd = O7.Num, rs1 = G0.Num in + def RETL: F3_2<2, 0b111000, (outs), (ins i32imm:$val), + "jmp %o7+$val", [(retflag simm13:$val)]>; - let rd = I7.Num, rs1 = G0.Num, simm13 = 8 in - def RET: F3_2<2, 0b111000, (outs), (ins), "ret", []>; + let rd = I7.Num, rs1 = G0.Num in + def RET: F3_2<2, 0b111000, (outs), (ins i32imm:$val), + "jmp %i7+$val", []>; } // Section B.1 - Load Integer Instructions, p. 90 diff --git a/lib/Target/SystemZ/SystemZISelLowering.cpp b/lib/Target/SystemZ/SystemZISelLowering.cpp index d694f2e67edc..90939c312065 100644 --- a/lib/Target/SystemZ/SystemZISelLowering.cpp +++ b/lib/Target/SystemZ/SystemZISelLowering.cpp @@ -59,9 +59,6 @@ SystemZTargetLowering::SystemZTargetLowering(SystemZTargetMachine &tm) : // Compute derived properties from the register classes computeRegisterProperties(); - // Set shifts properties - setShiftAmountType(MVT::i64); - // Provide all sorts of operation actions setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote); setLoadExtAction(ISD::ZEXTLOAD, MVT::i1, Promote); diff --git a/lib/Target/SystemZ/SystemZISelLowering.h b/lib/Target/SystemZ/SystemZISelLowering.h index 51d2df3a3008..30192420dcb6 100644 --- a/lib/Target/SystemZ/SystemZISelLowering.h +++ b/lib/Target/SystemZ/SystemZISelLowering.h @@ -57,6 +57,8 @@ namespace llvm { public: explicit SystemZTargetLowering(SystemZTargetMachine &TM); + virtual MVT getShiftAmountTy(EVT LHSTy) const { return MVT::i64; } + /// LowerOperation - Provide custom lowering hooks for some operations. virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const; diff --git a/lib/Target/X86/AsmParser/X86AsmParser.cpp b/lib/Target/X86/AsmParser/X86AsmParser.cpp index 1cac07a0e10a..8fe549ba3126 100644 --- a/lib/Target/X86/AsmParser/X86AsmParser.cpp +++ b/lib/Target/X86/AsmParser/X86AsmParser.cpp @@ -775,6 +775,19 @@ ParseInstruction(StringRef Name, SMLoc NameLoc, delete &Op; } } + // Same hack for "in[bwl]? (%dx), %al" -> "inb %dx, %al". + if ((Name == "inb" || Name == "inw" || Name == "inl" || Name == "in") && + Operands.size() == 3) { + X86Operand &Op = *(X86Operand*)Operands.begin()[1]; + if (Op.isMem() && Op.Mem.SegReg == 0 && + isa(Op.Mem.Disp) && + cast(Op.Mem.Disp)->getValue() == 0 && + Op.Mem.BaseReg == MatchRegisterName("dx") && Op.Mem.IndexReg == 0) { + SMLoc Loc = Op.getEndLoc(); + Operands.begin()[1] = X86Operand::CreateReg(Op.Mem.BaseReg, Loc, Loc); + delete &Op; + } + } // FIXME: Hack to handle recognize s{hr,ar,hl} $1, . Canonicalize to // "shift ". diff --git a/lib/Target/X86/Disassembler/X86Disassembler.cpp b/lib/Target/X86/Disassembler/X86Disassembler.cpp index 691e2d7204ab..f7777561b6a7 100644 --- a/lib/Target/X86/Disassembler/X86Disassembler.cpp +++ b/lib/Target/X86/Disassembler/X86Disassembler.cpp @@ -168,16 +168,16 @@ static void translateImmediate(MCInst &mcInst, uint64_t immediate, switch (insn.displacementSize) { default: break; - case 8: + case 1: type = TYPE_MOFFS8; break; - case 16: + case 2: type = TYPE_MOFFS16; break; - case 32: + case 4: type = TYPE_MOFFS32; break; - case 64: + case 8: type = TYPE_MOFFS64; break; } diff --git a/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h b/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h index 4f4fbcdd394c..d0dc8b56aea5 100644 --- a/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h +++ b/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h @@ -399,7 +399,7 @@ struct InternalInstruction { /* The segment override type */ SegmentOverride segmentOverride; - /* Sizes of various critical pieces of data */ + /* Sizes of various critical pieces of data, in bytes */ uint8_t registerSize; uint8_t addressSize; uint8_t displacementSize; diff --git a/lib/Target/X86/README.txt b/lib/Target/X86/README.txt index c10e1709f667..abd1515cf5d7 100644 --- a/lib/Target/X86/README.txt +++ b/lib/Target/X86/README.txt @@ -1879,39 +1879,71 @@ _add32carry: //===---------------------------------------------------------------------===// -This: -char t(char c) { - return c/3; +The hot loop of 256.bzip2 contains code that looks a bit like this: + +int foo(char *P, char *Q, int x, int y) { + if (P[0] != Q[0]) + return P[0] < Q[0]; + if (P[1] != Q[1]) + return P[1] < Q[1]; + if (P[2] != Q[2]) + return P[2] < Q[2]; + return P[3] < Q[3]; } -Compiles to: $clang t.c -S -o - -O3 -mkernel -fomit-frame-pointer +In the real code, we get a lot more wrong than this. However, even in this +code we generate: -_t: ## @t - movslq %edi, %rax - imulq $-1431655765, %rax, %rcx ## imm = 0xFFFFFFFFAAAAAAAB - shrq $32, %rcx - addl %ecx, %eax - movl %eax, %ecx - shrl $31, %ecx - shrl %eax - addl %ecx, %eax - movsbl %al, %eax +_foo: ## @foo +## BB#0: ## %entry + movb (%rsi), %al + movb (%rdi), %cl + cmpb %al, %cl + je LBB0_2 +LBB0_1: ## %if.then + cmpb %al, %cl + jmp LBB0_5 +LBB0_2: ## %if.end + movb 1(%rsi), %al + movb 1(%rdi), %cl + cmpb %al, %cl + jne LBB0_1 +## BB#3: ## %if.end38 + movb 2(%rsi), %al + movb 2(%rdi), %cl + cmpb %al, %cl + jne LBB0_1 +## BB#4: ## %if.end60 + movb 3(%rdi), %al + cmpb 3(%rsi), %al +LBB0_5: ## %if.end60 + setl %al + movzbl %al, %eax ret -GCC gets: +Note that we generate jumps to LBB0_1 which does a redundant compare. The +redundant compare also forces the register values to be live, which prevents +folding one of the loads into the compare. In contrast, GCC 4.2 produces: -_t: - movl $86, %eax - imulb %dil - shrw $8, %ax - sarb $7, %dil - subb %dil, %al - movsbl %al,%eax +_foo: + movzbl (%rsi), %eax + cmpb %al, (%rdi) + jne L10 +L12: + movzbl 1(%rsi), %eax + cmpb %al, 1(%rdi) + jne L10 + movzbl 2(%rsi), %eax + cmpb %al, 2(%rdi) + jne L10 + movzbl 3(%rdi), %eax + cmpb 3(%rsi), %al +L10: + setl %al + movzbl %al, %eax ret -which is nicer. This also happens for int, not just char. +which is "perfect". //===---------------------------------------------------------------------===// - - diff --git a/lib/Target/X86/X86FastISel.cpp b/lib/Target/X86/X86FastISel.cpp index 9d42ac2e470c..6fa928462b28 100644 --- a/lib/Target/X86/X86FastISel.cpp +++ b/lib/Target/X86/X86FastISel.cpp @@ -597,9 +597,13 @@ bool X86FastISel::X86SelectCallAddress(const Value *V, X86AddressMode &AM) { (AM.Base.Reg != 0 || AM.IndexReg != 0)) return false; - // Can't handle TLS or DLLImport. + // Can't handle DLLImport. + if (GV->hasDLLImportLinkage()) + return false; + + // Can't handle TLS. if (const GlobalVariable *GVar = dyn_cast(GV)) - if (GVar->isThreadLocal() || GVar->hasDLLImportLinkage()) + if (GVar->isThreadLocal()) return false; // Okay, we've committed to selecting this global. Set up the basic address. diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 27024b4e9e5a..2f49dbcebf3c 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -45,7 +45,6 @@ #include "llvm/ADT/Statistic.h" #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/VectorExtras.h" -#include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/Dwarf.h" #include "llvm/Support/ErrorHandling.h" @@ -56,10 +55,6 @@ using namespace dwarf; STATISTIC(NumTailCalls, "Number of tail calls"); -static cl::opt -Disable256Bit("disable-256bit", cl::Hidden, - cl::desc("Disable use of 256-bit vectors")); - // Forward declarations. static SDValue getMOVL(SelectionDAG &DAG, DebugLoc dl, EVT VT, SDValue V1, SDValue V2); @@ -225,7 +220,6 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) static MVT IntVTs[] = { MVT::i8, MVT::i16, MVT::i32, MVT::i64 }; // X86 is weird, it always uses i8 for shift amounts and setcc results. - setShiftAmountType(MVT::i8); setBooleanContents(ZeroOrOneBooleanContent); setSchedulingPreference(Sched::RegPressure); setStackPointerRegisterToSaveRestore(X86StackPtr); @@ -1713,7 +1707,7 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain, else llvm_unreachable("Unknown argument type!"); - unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC, dl); + unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC); ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, RegVT); // If this is an 8 or 16-bit value, it is really passed promoted to 32 @@ -1845,7 +1839,7 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain, SDValue FIN = DAG.getNode(ISD::ADD, dl, getPointerTy(), RSFIN, DAG.getIntPtrConstant(Offset)); unsigned VReg = MF.addLiveIn(GPR64ArgRegs[NumIntRegs], - X86::GR64RegisterClass, dl); + X86::GR64RegisterClass); SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64); SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, @@ -1861,7 +1855,7 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain, SmallVector SaveXMMOps; SaveXMMOps.push_back(Chain); - unsigned AL = MF.addLiveIn(X86::AL, X86::GR8RegisterClass, dl); + unsigned AL = MF.addLiveIn(X86::AL, X86::GR8RegisterClass); SDValue ALVal = DAG.getCopyFromReg(DAG.getEntryNode(), dl, AL, MVT::i8); SaveXMMOps.push_back(ALVal); @@ -1872,7 +1866,7 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain, for (; NumXMMRegs != TotalNumXMMRegs; ++NumXMMRegs) { unsigned VReg = MF.addLiveIn(XMMArgRegs64Bit[NumXMMRegs], - X86::VR128RegisterClass, dl); + X86::VR128RegisterClass); SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::v4f32); SaveXMMOps.push_back(Val); } @@ -2693,6 +2687,10 @@ static bool isTargetShuffle(unsigned Opcode) { case X86ISD::MOVSD: case X86ISD::UNPCKLPS: case X86ISD::UNPCKLPD: + case X86ISD::VUNPCKLPS: + case X86ISD::VUNPCKLPD: + case X86ISD::VUNPCKLPSY: + case X86ISD::VUNPCKLPDY: case X86ISD::PUNPCKLWD: case X86ISD::PUNPCKLBW: case X86ISD::PUNPCKLDQ: @@ -2760,6 +2758,10 @@ static SDValue getTargetShuffleNode(unsigned Opc, DebugLoc dl, EVT VT, case X86ISD::MOVSD: case X86ISD::UNPCKLPS: case X86ISD::UNPCKLPD: + case X86ISD::VUNPCKLPS: + case X86ISD::VUNPCKLPD: + case X86ISD::VUNPCKLPSY: + case X86ISD::VUNPCKLPDY: case X86ISD::PUNPCKLWD: case X86ISD::PUNPCKLBW: case X86ISD::PUNPCKLDQ: @@ -4178,7 +4180,8 @@ static SDValue getVShift(bool isLeft, EVT VT, SDValue SrcOp, SrcOp = DAG.getNode(ISD::BITCAST, dl, ShVT, SrcOp); return DAG.getNode(ISD::BITCAST, dl, VT, DAG.getNode(Opc, dl, ShVT, SrcOp, - DAG.getConstant(NumBits, TLI.getShiftAmountTy()))); + DAG.getConstant(NumBits, + TLI.getShiftAmountTy(SrcOp.getValueType())))); } SDValue @@ -4327,16 +4330,15 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const { // For AVX-length vectors, build the individual 128-bit pieces and // use shuffles to put them in place. - if (VT.getSizeInBits() > 256 && - Subtarget->hasAVX() && - !Disable256Bit && + if (VT.getSizeInBits() > 256 && + Subtarget->hasAVX() && !ISD::isBuildVectorAllZeros(Op.getNode())) { SmallVector V; V.resize(NumElems); for (unsigned i = 0; i < NumElems; ++i) { V[i] = Op.getOperand(i); } - + EVT HVT = EVT::getVectorVT(*DAG.getContext(), ExtVT, NumElems/2); // Build the lower subvector. @@ -5044,7 +5046,8 @@ SDValue LowerVECTOR_SHUFFLEv16i8(ShuffleVectorSDNode *SVOp, DAG.getIntPtrConstant(Elt1 / 2)); if ((Elt1 & 1) == 0) InsElt = DAG.getNode(ISD::SHL, dl, MVT::i16, InsElt, - DAG.getConstant(8, TLI.getShiftAmountTy())); + DAG.getConstant(8, + TLI.getShiftAmountTy(InsElt.getValueType()))); else if (Elt0 >= 0) InsElt = DAG.getNode(ISD::AND, dl, MVT::i16, InsElt, DAG.getConstant(0xFF00, MVT::i16)); @@ -5058,7 +5061,8 @@ SDValue LowerVECTOR_SHUFFLEv16i8(ShuffleVectorSDNode *SVOp, Elt0Src, DAG.getIntPtrConstant(Elt0 / 2)); if ((Elt0 & 1) != 0) InsElt0 = DAG.getNode(ISD::SRL, dl, MVT::i16, InsElt0, - DAG.getConstant(8, TLI.getShiftAmountTy())); + DAG.getConstant(8, + TLI.getShiftAmountTy(InsElt0.getValueType()))); else if (Elt1 >= 0) InsElt0 = DAG.getNode(ISD::AND, dl, MVT::i16, InsElt0, DAG.getConstant(0x00FF, MVT::i16)); @@ -5475,7 +5479,7 @@ SDValue getMOVLP(SDValue &Op, DebugLoc &dl, SelectionDAG &DAG, bool HasSSE2) { // Both of them can't be memory operations though. if (MayFoldVectorLoad(V1) && MayFoldVectorLoad(V2)) CanFoldLoad = false; - + if (CanFoldLoad) { if (HasSSE2 && NumElems == 2) return getTargetShuffleNode(X86ISD::MOVLPD, dl, VT, V1, V2, DAG); @@ -6088,7 +6092,7 @@ X86TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const { SDValue ScaledN2 = N2; if (Upper) ScaledN2 = DAG.getNode(ISD::SUB, dl, N2.getValueType(), N2, - DAG.getConstant(NumElems / + DAG.getConstant(NumElems / (VT.getSizeInBits() / 128), N2.getValueType())); Op = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, SubN0.getValueType(), SubN0, @@ -9327,6 +9331,10 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const { case X86ISD::MOVSS: return "X86ISD::MOVSS"; case X86ISD::UNPCKLPS: return "X86ISD::UNPCKLPS"; case X86ISD::UNPCKLPD: return "X86ISD::UNPCKLPD"; + case X86ISD::VUNPCKLPS: return "X86ISD::VUNPCKLPS"; + case X86ISD::VUNPCKLPD: return "X86ISD::VUNPCKLPD"; + case X86ISD::VUNPCKLPSY: return "X86ISD::VUNPCKLPSY"; + case X86ISD::VUNPCKLPDY: return "X86ISD::VUNPCKLPDY"; case X86ISD::UNPCKHPS: return "X86ISD::UNPCKHPS"; case X86ISD::UNPCKHPD: return "X86ISD::UNPCKHPD"; case X86ISD::PUNPCKLBW: return "X86ISD::PUNPCKLBW"; @@ -11984,6 +11992,10 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N, case X86ISD::PUNPCKLQDQ: case X86ISD::UNPCKLPS: case X86ISD::UNPCKLPD: + case X86ISD::VUNPCKLPS: + case X86ISD::VUNPCKLPD: + case X86ISD::VUNPCKLPSY: + case X86ISD::VUNPCKLPDY: case X86ISD::MOVHLPS: case X86ISD::MOVLHPS: case X86ISD::PSHUFD: diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h index 419da3742cf8..6ec4a7de7558 100644 --- a/lib/Target/X86/X86ISelLowering.h +++ b/lib/Target/X86/X86ISelLowering.h @@ -159,16 +159,16 @@ namespace llvm { /// PSHUFB - Shuffle 16 8-bit values within a vector. PSHUFB, - + /// PANDN - and with not'd value. PANDN, - + /// PSIGNB/W/D - Copy integer sign. - PSIGNB, PSIGNW, PSIGND, - + PSIGNB, PSIGNW, PSIGND, + /// PBLENDVB - Variable blend PBLENDVB, - + /// FMAX, FMIN - Floating point max and min. /// FMAX, FMIN, @@ -212,7 +212,7 @@ namespace llvm { // ADD, SUB, SMUL, etc. - Arithmetic operations with FLAGS results. ADD, SUB, ADC, SBB, SMUL, INC, DEC, OR, XOR, AND, - + UMUL, // LOW, HI, FLAGS = umul LHS, RHS // MUL_IMM - X86 specific multiply by immediate. @@ -248,6 +248,10 @@ namespace llvm { MOVSS, UNPCKLPS, UNPCKLPD, + VUNPCKLPS, + VUNPCKLPD, + VUNPCKLPSY, + VUNPCKLPDY, UNPCKHPS, UNPCKHPD, PUNPCKLBW, @@ -463,6 +467,8 @@ namespace llvm { virtual unsigned getJumpTableEncoding() const; + virtual MVT getShiftAmountTy(EVT LHSTy) const { return MVT::i8; } + virtual const MCExpr * LowerCustomJumpTableEntry(const MachineJumpTableInfo *MJTI, const MachineBasicBlock *MBB, unsigned uid, diff --git a/lib/Target/X86/X86InstrFormats.td b/lib/Target/X86/X86InstrFormats.td index 344c14c112a0..0660072589e4 100644 --- a/lib/Target/X86/X86InstrFormats.td +++ b/lib/Target/X86/X86InstrFormats.td @@ -41,6 +41,8 @@ def MRM_F8 : Format<41>; def MRM_F9 : Format<42>; def RawFrmImm8 : Format<43>; def RawFrmImm16 : Format<44>; +def MRM_D0 : Format<45>; +def MRM_D1 : Format<46>; // ImmType - This specifies the immediate type used by an instruction. This is // part of the ad-hoc solution used to emit machine instruction encodings by our diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp index ceb1b6539826..76a9b12b8aad 100644 --- a/lib/Target/X86/X86InstrInfo.cpp +++ b/lib/Target/X86/X86InstrInfo.cpp @@ -369,8 +369,6 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm) { X86::IMUL32rri8, X86::IMUL32rmi8, 0 }, { X86::IMUL64rri32, X86::IMUL64rmi32, 0 }, { X86::IMUL64rri8, X86::IMUL64rmi8, 0 }, - { X86::Int_CMPSDrr, X86::Int_CMPSDrm, 0 }, - { X86::Int_CMPSSrr, X86::Int_CMPSSrm, 0 }, { X86::Int_COMISDrr, X86::Int_COMISDrm, 0 }, { X86::Int_COMISSrr, X86::Int_COMISSrm, 0 }, { X86::Int_CVTDQ2PDrr, X86::Int_CVTDQ2PDrm, 16 }, @@ -568,6 +566,8 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm) { X86::IMUL16rr, X86::IMUL16rm, 0 }, { X86::IMUL32rr, X86::IMUL32rm, 0 }, { X86::IMUL64rr, X86::IMUL64rm, 0 }, + { X86::Int_CMPSDrr, X86::Int_CMPSDrm, 0 }, + { X86::Int_CMPSSrr, X86::Int_CMPSSrm, 0 }, { X86::MAXPDrr, X86::MAXPDrm, 16 }, { X86::MAXPDrr_Int, X86::MAXPDrm_Int, 16 }, { X86::MAXPSrr, X86::MAXPSrm, 16 }, diff --git a/lib/Target/X86/X86InstrInfo.h b/lib/Target/X86/X86InstrInfo.h index 1d4420787273..fcb5a25104ac 100644 --- a/lib/Target/X86/X86InstrInfo.h +++ b/lib/Target/X86/X86InstrInfo.h @@ -311,6 +311,8 @@ namespace X86II { MRM_F0 = 40, MRM_F8 = 41, MRM_F9 = 42, + MRM_D0 = 45, + MRM_D1 = 46, /// RawFrmImm8 - This is used for the ENTER instruction, which has two /// immediates, the first of which is a 16-bit immediate (specified by @@ -577,6 +579,8 @@ namespace X86II { case X86II::MRM_F0: case X86II::MRM_F8: case X86II::MRM_F9: + case X86II::MRM_D0: + case X86II::MRM_D1: return -1; } } diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td index 87dc4bece742..f832a7c85a8a 100644 --- a/lib/Target/X86/X86InstrInfo.td +++ b/lib/Target/X86/X86InstrInfo.td @@ -1296,6 +1296,9 @@ def : MnemonicAlias<"lret", "lretl">; def : MnemonicAlias<"leavel", "leave">, Requires<[In32BitMode]>; def : MnemonicAlias<"leaveq", "leave">, Requires<[In64BitMode]>; +def : MnemonicAlias<"loopz", "loope">; +def : MnemonicAlias<"loopnz", "loopne">; + def : MnemonicAlias<"pop", "popl">, Requires<[In32BitMode]>; def : MnemonicAlias<"pop", "popq">, Requires<[In64BitMode]>; def : MnemonicAlias<"popf", "popfl">, Requires<[In32BitMode]>; diff --git a/lib/Target/X86/X86InstrSystem.td b/lib/Target/X86/X86InstrSystem.td index 1a58ba0f96ef..6a24d145c696 100644 --- a/lib/Target/X86/X86InstrSystem.td +++ b/lib/Target/X86/X86InstrSystem.td @@ -388,3 +388,8 @@ def CPUID : I<0xA2, RawFrm, (outs), (ins), "cpuid", []>, TB; def INVD : I<0x08, RawFrm, (outs), (ins), "invd", []>, TB; def WBINVD : I<0x09, RawFrm, (outs), (ins), "wbinvd", []>, TB; +let Defs = [RDX, RAX], Uses = [RCX] in + def XGETBV : I<0x01, MRM_D0, (outs), (ins), "xgetbv", []>, TB; + +let Uses = [RDX, RAX, RCX] in + def XSETBV : I<0x01, MRM_D1, (outs), (ins), "xsetbv", []>, TB; diff --git a/lib/Target/X86/X86MCCodeEmitter.cpp b/lib/Target/X86/X86MCCodeEmitter.cpp index e6dc74e65d79..0e3b5711f2b5 100644 --- a/lib/Target/X86/X86MCCodeEmitter.cpp +++ b/lib/Target/X86/X86MCCodeEmitter.cpp @@ -979,6 +979,14 @@ EncodeInstruction(const MCInst &MI, raw_ostream &OS, EmitByte(BaseOpcode, CurByte, OS); EmitByte(0xF9, CurByte, OS); break; + case X86II::MRM_D0: + EmitByte(BaseOpcode, CurByte, OS); + EmitByte(0xD0, CurByte, OS); + break; + case X86II::MRM_D1: + EmitByte(BaseOpcode, CurByte, OS); + EmitByte(0xD1, CurByte, OS); + break; } // If there is a remaining operand, it must be a trailing immediate. Emit it diff --git a/lib/Target/X86/X86Subtarget.cpp b/lib/Target/X86/X86Subtarget.cpp index de768561f111..1ee73123bbc6 100644 --- a/lib/Target/X86/X86Subtarget.cpp +++ b/lib/Target/X86/X86Subtarget.cpp @@ -342,9 +342,10 @@ X86Subtarget::X86Subtarget(const std::string &TT, const std::string &FS, assert((!Is64Bit || HasX86_64) && "64-bit code requested on a subtarget that doesn't support it!"); - // Stack alignment is 16 bytes on Darwin and Linux (both 32 and 64 bit) and - // for all 64-bit targets. - if (isTargetDarwin() || isTargetLinux() || Is64Bit) + // Stack alignment is 16 bytes on Darwin, FreeBSD, Linux and Solaris (both + // 32 and 64 bit) and for all 64-bit targets. + if (isTargetDarwin() || isTargetFreeBSD() || isTargetLinux() || + isTargetSolaris() || Is64Bit) stackAlignment = 16; if (StackAlignment) diff --git a/lib/Target/X86/X86Subtarget.h b/lib/Target/X86/X86Subtarget.h index 8a119b43cd91..0a62a029554c 100644 --- a/lib/Target/X86/X86Subtarget.h +++ b/lib/Target/X86/X86Subtarget.h @@ -166,6 +166,8 @@ public: bool hasVectorUAMem() const { return HasVectorUAMem; } bool isTargetDarwin() const { return TargetTriple.getOS() == Triple::Darwin; } + bool isTargetFreeBSD() const { return TargetTriple.getOS() == Triple::FreeBSD; } + bool isTargetSolaris() const { return TargetTriple.getOS() == Triple::Solaris; } // ELF is a reasonably sane default and the only other X86 targets we // support are Darwin and Windows. Just use "not those". diff --git a/lib/Target/XCore/XCoreISelLowering.cpp b/lib/Target/XCore/XCoreISelLowering.cpp index 828d6f92caf4..4817787d7515 100644 --- a/lib/Target/XCore/XCoreISelLowering.cpp +++ b/lib/Target/XCore/XCoreISelLowering.cpp @@ -42,9 +42,9 @@ using namespace llvm; const char *XCoreTargetLowering:: -getTargetNodeName(unsigned Opcode) const +getTargetNodeName(unsigned Opcode) const { - switch (Opcode) + switch (Opcode) { case XCoreISD::BL : return "XCoreISD::BL"; case XCoreISD::PCRelativeWrapper : return "XCoreISD::PCRelativeWrapper"; @@ -77,7 +77,6 @@ XCoreTargetLowering::XCoreTargetLowering(XCoreTargetMachine &XTM) // Division is expensive setIntDivIsCheap(false); - setShiftAmountType(MVT::i32); setStackPointerRegisterToSaveRestore(XCore::SP); setSchedulingPreference(Sched::RegPressure); @@ -95,7 +94,7 @@ XCoreTargetLowering::XCoreTargetLowering(XCoreTargetMachine &XTM) // Stop the combiner recombining select and set_cc setOperationAction(ISD::SELECT_CC, MVT::Other, Expand); - + // 64bit setOperationAction(ISD::ADD, MVT::i64, Custom); setOperationAction(ISD::SUB, MVT::i64, Custom); @@ -106,14 +105,14 @@ XCoreTargetLowering::XCoreTargetLowering(XCoreTargetMachine &XTM) setOperationAction(ISD::SHL_PARTS, MVT::i32, Expand); setOperationAction(ISD::SRA_PARTS, MVT::i32, Expand); setOperationAction(ISD::SRL_PARTS, MVT::i32, Expand); - + // Bit Manipulation setOperationAction(ISD::CTPOP, MVT::i32, Expand); setOperationAction(ISD::ROTL , MVT::i32, Expand); setOperationAction(ISD::ROTR , MVT::i32, Expand); - + setOperationAction(ISD::TRAP, MVT::Other, Legal); - + // Jump tables. setOperationAction(ISD::BR_JT, MVT::Other, Custom); @@ -122,7 +121,7 @@ XCoreTargetLowering::XCoreTargetLowering(XCoreTargetMachine &XTM) // Thread Local Storage setOperationAction(ISD::GlobalTLSAddress, MVT::i32, Custom); - + // Conversion of i64 -> double produces constantpool nodes setOperationAction(ISD::ConstantPool, MVT::i32, Custom); @@ -143,7 +142,7 @@ XCoreTargetLowering::XCoreTargetLowering(XCoreTargetMachine &XTM) setOperationAction(ISD::VACOPY, MVT::Other, Expand); setOperationAction(ISD::VAARG, MVT::Other, Custom); setOperationAction(ISD::VASTART, MVT::Other, Custom); - + // Dynamic stack setOperationAction(ISD::STACKSAVE, MVT::Other, Expand); setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand); @@ -163,7 +162,7 @@ XCoreTargetLowering::XCoreTargetLowering(XCoreTargetMachine &XTM) SDValue XCoreTargetLowering:: LowerOperation(SDValue Op, SelectionDAG &DAG) const { - switch (Op.getOpcode()) + switch (Op.getOpcode()) { case ISD::GlobalAddress: return LowerGlobalAddress(Op, DAG); case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG); @@ -414,7 +413,7 @@ LowerLOAD(SDValue Op, SelectionDAG &DAG) const { SDValue Chain = LD->getChain(); SDValue BasePtr = LD->getBasePtr(); DebugLoc DL = Op.getDebugLoc(); - + SDValue Base; int64_t Offset; if (!LD->isVolatile() && @@ -437,10 +436,10 @@ LowerLOAD(SDValue Op, SelectionDAG &DAG) const { SDValue HighOffset = DAG.getConstant((Offset & ~0x3) + 4, MVT::i32); SDValue LowShift = DAG.getConstant((Offset & 0x3) * 8, MVT::i32); SDValue HighShift = DAG.getConstant(32 - (Offset & 0x3) * 8, MVT::i32); - + SDValue LowAddr = DAG.getNode(ISD::ADD, DL, MVT::i32, Base, LowOffset); SDValue HighAddr = DAG.getNode(ISD::ADD, DL, MVT::i32, Base, HighOffset); - + SDValue Low = DAG.getLoad(getPointerTy(), DL, Chain, LowAddr, MachinePointerInfo(), false, false, 0); SDValue High = DAG.getLoad(getPointerTy(), DL, Chain, @@ -453,7 +452,7 @@ LowerLOAD(SDValue Op, SelectionDAG &DAG) const { SDValue Ops[] = { Result, Chain }; return DAG.getMergeValues(Ops, 2, DL); } - + if (LD->getAlignment() == 2) { SDValue Low = DAG.getExtLoad(ISD::ZEXTLOAD, DL, MVT::i32, Chain, BasePtr, LD->getPointerInfo(), MVT::i16, @@ -473,16 +472,16 @@ LowerLOAD(SDValue Op, SelectionDAG &DAG) const { SDValue Ops[] = { Result, Chain }; return DAG.getMergeValues(Ops, 2, DL); } - + // Lower to a call to __misaligned_load(BasePtr). const Type *IntPtrTy = getTargetData()->getIntPtrType(*DAG.getContext()); TargetLowering::ArgListTy Args; TargetLowering::ArgListEntry Entry; - + Entry.Ty = IntPtrTy; Entry.Node = BasePtr; Args.push_back(Entry); - + std::pair CallResult = LowerCallTo(Chain, IntPtrTy, false, false, false, false, 0, CallingConv::C, false, @@ -515,7 +514,7 @@ LowerSTORE(SDValue Op, SelectionDAG &DAG) const SDValue BasePtr = ST->getBasePtr(); SDValue Value = ST->getValue(); DebugLoc dl = Op.getDebugLoc(); - + if (ST->getAlignment() == 2) { SDValue Low = Value; SDValue High = DAG.getNode(ISD::SRL, dl, MVT::i32, Value, @@ -532,19 +531,19 @@ LowerSTORE(SDValue Op, SelectionDAG &DAG) const ST->isNonTemporal(), 2); return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, StoreLow, StoreHigh); } - + // Lower to a call to __misaligned_store(BasePtr, Value). const Type *IntPtrTy = getTargetData()->getIntPtrType(*DAG.getContext()); TargetLowering::ArgListTy Args; TargetLowering::ArgListEntry Entry; - + Entry.Ty = IntPtrTy; Entry.Node = BasePtr; Args.push_back(Entry); - + Entry.Node = Value; Args.push_back(Entry); - + std::pair CallResult = LowerCallTo(Chain, Type::getVoidTy(*DAG.getContext()), false, false, false, false, 0, CallingConv::C, false, @@ -722,7 +721,7 @@ ExpandADDSUB(SDNode *N, SelectionDAG &DAG) const } DebugLoc dl = N->getDebugLoc(); - + // Extract components SDValue LHSL = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, N->getOperand(0), DAG.getConstant(0, MVT::i32)); @@ -732,7 +731,7 @@ ExpandADDSUB(SDNode *N, SelectionDAG &DAG) const N->getOperand(1), DAG.getConstant(0, MVT::i32)); SDValue RHSH = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, N->getOperand(1), DAG.getConstant(1, MVT::i32)); - + // Expand unsigned Opcode = (N->getOpcode() == ISD::ADD) ? XCoreISD::LADD : XCoreISD::LSUB; @@ -740,7 +739,7 @@ ExpandADDSUB(SDNode *N, SelectionDAG &DAG) const SDValue Carry = DAG.getNode(Opcode, dl, DAG.getVTList(MVT::i32, MVT::i32), LHSL, RHSL, Zero); SDValue Lo(Carry.getNode(), 1); - + SDValue Ignored = DAG.getNode(Opcode, dl, DAG.getVTList(MVT::i32, MVT::i32), LHSH, RHSH, Carry); SDValue Hi(Ignored.getNode(), 1); @@ -761,8 +760,8 @@ LowerVAARG(SDValue Op, SelectionDAG &DAG) const Node->getOperand(1), MachinePointerInfo(V), false, false, 0); // Increment the pointer, VAList, to the next vararg - SDValue Tmp3 = DAG.getNode(ISD::ADD, dl, getPointerTy(), VAList, - DAG.getConstant(VT.getSizeInBits(), + SDValue Tmp3 = DAG.getNode(ISD::ADD, dl, getPointerTy(), VAList, + DAG.getConstant(VT.getSizeInBits(), getPointerTy())); // Store the incremented VAList to the legalized pointer Tmp3 = DAG.getStore(VAList.getValue(1), dl, Tmp3, Node->getOperand(1), @@ -781,20 +780,20 @@ LowerVASTART(SDValue Op, SelectionDAG &DAG) const MachineFunction &MF = DAG.getMachineFunction(); XCoreFunctionInfo *XFI = MF.getInfo(); SDValue Addr = DAG.getFrameIndex(XFI->getVarArgsFrameIndex(), MVT::i32); - return DAG.getStore(Op.getOperand(0), dl, Addr, Op.getOperand(1), + return DAG.getStore(Op.getOperand(0), dl, Addr, Op.getOperand(1), MachinePointerInfo(), false, false, 0); } SDValue XCoreTargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const { DebugLoc dl = Op.getDebugLoc(); - // Depths > 0 not supported yet! + // Depths > 0 not supported yet! if (cast(Op.getOperand(0))->getZExtValue() > 0) return SDValue(); - + MachineFunction &MF = DAG.getMachineFunction(); const TargetRegisterInfo *RegInfo = getTargetMachine().getRegisterInfo(); - return DAG.getCopyFromReg(DAG.getEntryNode(), dl, + return DAG.getCopyFromReg(DAG.getEntryNode(), dl, RegInfo->getFrameRegister(MF), MVT::i32); } @@ -919,7 +918,7 @@ XCoreTargetLowering::LowerCCCCallTo(SDValue Chain, SDValue Callee, // Get a count of how many bytes are to be pushed on the stack. unsigned NumBytes = CCInfo.getNextStackOffset(); - Chain = DAG.getCALLSEQ_START(Chain,DAG.getConstant(NumBytes, + Chain = DAG.getCALLSEQ_START(Chain,DAG.getConstant(NumBytes, getPointerTy(), true)); SmallVector, 4> RegsToPass; @@ -944,8 +943,8 @@ XCoreTargetLowering::LowerCCCCallTo(SDValue Chain, SDValue Callee, Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg); break; } - - // Arguments that can be passed on register must be kept at + + // Arguments that can be passed on register must be kept at // RegsToPass vector if (VA.isRegLoc()) { RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg)); @@ -954,7 +953,7 @@ XCoreTargetLowering::LowerCCCCallTo(SDValue Chain, SDValue Callee, int Offset = VA.getLocMemOffset(); - MemOpChains.push_back(DAG.getNode(XCoreISD::STWSP, dl, MVT::Other, + MemOpChains.push_back(DAG.getNode(XCoreISD::STWSP, dl, MVT::Other, Chain, Arg, DAG.getConstant(Offset/4, MVT::i32))); } @@ -963,16 +962,16 @@ XCoreTargetLowering::LowerCCCCallTo(SDValue Chain, SDValue Callee, // Transform all store nodes into one single node because // all store nodes are independent of each other. if (!MemOpChains.empty()) - Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, + Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &MemOpChains[0], MemOpChains.size()); - // Build a sequence of copy-to-reg nodes chained together with token + // Build a sequence of copy-to-reg nodes chained together with token // chain and flag operands which copy the outgoing args into registers. // The InFlag in necessary since all emited instructions must be // stuck together. SDValue InFlag; for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { - Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first, + Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first, RegsToPass[i].second, InFlag); InFlag = Chain.getValue(1); } @@ -986,7 +985,7 @@ XCoreTargetLowering::LowerCCCCallTo(SDValue Chain, SDValue Callee, Callee = DAG.getTargetExternalSymbol(E->getSymbol(), MVT::i32); // XCoreBranchLink = #chain, #target_address, #opt_in_flags... - // = Chain, Callee, Reg#1, Reg#2, ... + // = Chain, Callee, Reg#1, Reg#2, ... // // Returns a chain & a flag for retval copy to use. SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); @@ -994,7 +993,7 @@ XCoreTargetLowering::LowerCCCCallTo(SDValue Chain, SDValue Callee, Ops.push_back(Chain); Ops.push_back(Callee); - // Add argument registers to the end of the list so that they are + // Add argument registers to the end of the list so that they are // known live into the call. for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) Ops.push_back(DAG.getRegister(RegsToPass[i].first, @@ -1098,11 +1097,11 @@ XCoreTargetLowering::LowerCCCArguments(SDValue Chain, unsigned StackSlotSize = XCoreFrameLowering::stackSlotSize(); unsigned LRSaveSize = StackSlotSize; - + for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { CCValAssign &VA = ArgLocs[i]; - + if (VA.isRegLoc()) { // Arguments passed in registers EVT RegVT = VA.getLocVT(); @@ -1139,12 +1138,12 @@ XCoreTargetLowering::LowerCCCArguments(SDValue Chain, // Create the SelectionDAG nodes corresponding to a load //from this parameter SDValue FIN = DAG.getFrameIndex(FI, MVT::i32); - InVals.push_back(DAG.getLoad(VA.getLocVT(), dl, Chain, FIN, + InVals.push_back(DAG.getLoad(VA.getLocVT(), dl, Chain, FIN, MachinePointerInfo::getFixedStack(FI), false, false, 0)); } } - + if (isVarArg) { /* Argument registers */ static const unsigned ArgRegs[] = { @@ -1186,7 +1185,7 @@ XCoreTargetLowering::LowerCCCArguments(SDValue Chain, true)); } } - + return Chain; } @@ -1222,7 +1221,7 @@ XCoreTargetLowering::LowerReturn(SDValue Chain, // Analize return values. CCInfo.AnalyzeReturn(Outs, RetCC_XCore); - // If this is the first return lowered for this function, add + // If this is the first return lowered for this function, add // the regs to the liveout set for the function. if (DAG.getMachineFunction().getRegInfo().liveout_empty()) { for (unsigned i = 0; i != RVLocs.size(); ++i) @@ -1237,7 +1236,7 @@ XCoreTargetLowering::LowerReturn(SDValue Chain, CCValAssign &VA = RVLocs[i]; assert(VA.isRegLoc() && "Can only return in registers!"); - Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), + Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), OutVals[i], Flag); // guarantee that all emitted copies are @@ -1265,7 +1264,7 @@ XCoreTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, DebugLoc dl = MI->getDebugLoc(); assert((MI->getOpcode() == XCore::SELECT_CC) && "Unexpected instr type to insert"); - + // To "insert" a SELECT_CC instruction, we actually have to insert the diamond // control-flow pattern. The incoming instruction knows the destination vreg // to set, the condition code register to branch on, the true/false values to @@ -1273,7 +1272,7 @@ XCoreTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, const BasicBlock *LLVM_BB = BB->getBasicBlock(); MachineFunction::iterator It = BB; ++It; - + // thisMBB: // ... // TrueVal = ... @@ -1296,7 +1295,7 @@ XCoreTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, // Next, add the true and fallthrough blocks as its successors. BB->addSuccessor(copy0MBB); BB->addSuccessor(sinkMBB); - + BuildMI(BB, dl, TII.get(XCore::BRFT_lru6)) .addReg(MI->getOperand(1).getReg()).addMBB(sinkMBB); @@ -1304,10 +1303,10 @@ XCoreTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, // %FalseValue = ... // # fallthrough to sinkMBB BB = copy0MBB; - + // Update machine-CFG edges BB->addSuccessor(sinkMBB); - + // sinkMBB: // %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ] // ... @@ -1316,7 +1315,7 @@ XCoreTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, TII.get(XCore::PHI), MI->getOperand(0).getReg()) .addReg(MI->getOperand(3).getReg()).addMBB(copy0MBB) .addReg(MI->getOperand(2).getReg()).addMBB(thisMBB); - + MI->eraseFromParent(); // The pseudo instruction is gone now. return BB; } @@ -1354,7 +1353,7 @@ SDValue XCoreTargetLowering::PerformDAGCombine(SDNode *N, // fold (ladd x, 0, y) -> 0, add x, y iff carry is unused and y has only the // low bit set - if (N1C && N1C->isNullValue() && N->hasNUsesOfValue(0, 0)) { + if (N1C && N1C->isNullValue() && N->hasNUsesOfValue(0, 0)) { APInt KnownZero, KnownOne; APInt Mask = APInt::getHighBitsSet(VT.getSizeInBits(), VT.getSizeInBits() - 1); @@ -1377,7 +1376,7 @@ SDValue XCoreTargetLowering::PerformDAGCombine(SDNode *N, EVT VT = N0.getValueType(); // fold (lsub 0, 0, x) -> x, -x iff x has only the low bit set - if (N0C && N0C->isNullValue() && N1C && N1C->isNullValue()) { + if (N0C && N0C->isNullValue() && N1C && N1C->isNullValue()) { APInt KnownZero, KnownOne; APInt Mask = APInt::getHighBitsSet(VT.getSizeInBits(), VT.getSizeInBits() - 1); @@ -1393,7 +1392,7 @@ SDValue XCoreTargetLowering::PerformDAGCombine(SDNode *N, // fold (lsub x, 0, y) -> 0, sub x, y iff borrow is unused and y has only the // low bit set - if (N1C && N1C->isNullValue() && N->hasNUsesOfValue(0, 0)) { + if (N1C && N1C->isNullValue() && N->hasNUsesOfValue(0, 0)) { APInt KnownZero, KnownOne; APInt Mask = APInt::getHighBitsSet(VT.getSizeInBits(), VT.getSizeInBits() - 1); @@ -1557,7 +1556,7 @@ static inline bool isImmUs4(int64_t val) /// isLegalAddressingMode - Return true if the addressing mode represented /// by AM is legal for this target, for a load/store of the specified type. bool -XCoreTargetLowering::isLegalAddressingMode(const AddrMode &AM, +XCoreTargetLowering::isLegalAddressingMode(const AddrMode &AM, const Type *Ty) const { if (Ty->getTypeID() == Type::VoidTyID) return AM.Scale == 0 && isImmUs(AM.BaseOffs) && isImmUs4(AM.BaseOffs); @@ -1568,7 +1567,7 @@ XCoreTargetLowering::isLegalAddressingMode(const AddrMode &AM, return Size >= 4 && !AM.HasBaseReg && AM.Scale == 0 && AM.BaseOffs%4 == 0; } - + switch (Size) { case 1: // reg + imm @@ -1593,7 +1592,7 @@ XCoreTargetLowering::isLegalAddressingMode(const AddrMode &AM, // reg + reg<<2 return AM.Scale == 4 && AM.BaseOffs == 0; } - + return false; } @@ -1603,7 +1602,7 @@ XCoreTargetLowering::isLegalAddressingMode(const AddrMode &AM, std::vector XCoreTargetLowering:: getRegClassForInlineAsmConstraint(const std::string &Constraint, - EVT VT) const + EVT VT) const { if (Constraint.size() != 1) return std::vector(); @@ -1611,9 +1610,9 @@ getRegClassForInlineAsmConstraint(const std::string &Constraint, switch (Constraint[0]) { default : break; case 'r': - return make_vector(XCore::R0, XCore::R1, XCore::R2, - XCore::R3, XCore::R4, XCore::R5, - XCore::R6, XCore::R7, XCore::R8, + return make_vector(XCore::R0, XCore::R1, XCore::R2, + XCore::R3, XCore::R4, XCore::R5, + XCore::R6, XCore::R7, XCore::R8, XCore::R9, XCore::R10, XCore::R11, 0); break; } diff --git a/lib/Target/XCore/XCoreISelLowering.h b/lib/Target/XCore/XCoreISelLowering.h index 7e5dd2e8e512..bb3f2cc038e7 100644 --- a/lib/Target/XCore/XCoreISelLowering.h +++ b/lib/Target/XCore/XCoreISelLowering.h @@ -20,11 +20,11 @@ #include "XCore.h" namespace llvm { - + // Forward delcarations class XCoreSubtarget; class XCoreTargetMachine; - + namespace XCoreISD { enum NodeType { // Start the numbering where the builtin ops and target ops leave off. @@ -38,16 +38,16 @@ namespace llvm { // dp relative address DPRelativeWrapper, - + // cp relative address CPRelativeWrapper, - + // Store word to stack STWSP, // Corresponds to retsp instruction RETSP, - + // Corresponds to LADD instruction LADD, @@ -74,13 +74,14 @@ namespace llvm { //===--------------------------------------------------------------------===// // TargetLowering Implementation //===--------------------------------------------------------------------===// - class XCoreTargetLowering : public TargetLowering + class XCoreTargetLowering : public TargetLowering { public: explicit XCoreTargetLowering(XCoreTargetMachine &TM); virtual unsigned getJumpTableEncoding() const; + virtual MVT getShiftAmountTy(EVT LHSTy) const { return MVT::i32; } /// LowerOperation - Provide custom lowering hooks for some operations. virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const; @@ -91,10 +92,10 @@ namespace llvm { virtual void ReplaceNodeResults(SDNode *N, SmallVectorImpl&Results, SelectionDAG &DAG) const; - /// getTargetNodeName - This method returns the name of a target specific + /// getTargetNodeName - This method returns the name of a target specific // DAG node. virtual const char *getTargetNodeName(unsigned Opcode) const; - + virtual MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr *MI, MachineBasicBlock *MBB) const; @@ -108,7 +109,7 @@ namespace llvm { private: const XCoreTargetMachine &TM; const XCoreSubtarget &Subtarget; - + // Lower Operand helpers SDValue LowerCCCArguments(SDValue Chain, CallingConv::ID CallConv, @@ -148,12 +149,12 @@ namespace llvm { SDValue LowerSMUL_LOHI(SDValue Op, SelectionDAG &DAG) const; SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const; SDValue LowerTRAMPOLINE(SDValue Op, SelectionDAG &DAG) const; - + // Inline asm support std::vector getRegClassForInlineAsmConstraint(const std::string &Constraint, EVT VT) const; - + // Expand specifics SDValue TryExpandADDWithMul(SDNode *Op, SelectionDAG &DAG) const; SDValue ExpandADDSUB(SDNode *Op, SelectionDAG &DAG) const; diff --git a/lib/Target/XCore/XCoreInstrInfo.td b/lib/Target/XCore/XCoreInstrInfo.td index 38cc734ce7c3..ecdd4cb63000 100644 --- a/lib/Target/XCore/XCoreInstrInfo.td +++ b/lib/Target/XCore/XCoreInstrInfo.td @@ -727,7 +727,7 @@ def NEG : _F2R<(outs GRRegs:$dst), (ins GRRegs:$b), "neg $dst, $b", [(set GRRegs:$dst, (ineg GRRegs:$b))]>; -// TODO setd, eet, eef, getts, setpt, outshr, inshr, testwct, tinitpc, tinitdp, +// TODO setd, eet, eef, testwct, tinitpc, tinitdp, // tinitsp, tinitcp, tsetmr, sext (reg), zext (reg) let Constraints = "$src1 = $dst" in { let neverHasSideEffects = 1 in @@ -758,6 +758,14 @@ def GETR_rus : _FRUS<(outs GRRegs:$dst), (ins i32imm:$type), "getr $dst, $type", [(set GRRegs:$dst, (int_xcore_getr immUs:$type))]>; +def GETTS_2r : _F2R<(outs GRRegs:$dst), (ins GRRegs:$r), + "getts $dst, res[$r]", + [(set GRRegs:$dst, (int_xcore_getts GRRegs:$r))]>; + +def SETPT_2r : _F2R<(outs), (ins GRRegs:$r, GRRegs:$val), + "setpt res[$r], $val", + [(int_xcore_setpt GRRegs:$r, GRRegs:$val)]>; + def OUTCT_2r : _F2R<(outs), (ins GRRegs:$r, GRRegs:$val), "outct res[$r], $val", [(int_xcore_outct GRRegs:$r, GRRegs:$val)]>; @@ -774,6 +782,11 @@ def OUT_2r : _F2R<(outs), (ins GRRegs:$r, GRRegs:$val), "out res[$r], $val", [(int_xcore_out GRRegs:$r, GRRegs:$val)]>; +let Constraints = "$src = $dst" in +def OUTSHR_2r : _F2R<(outs GRRegs:$dst), (ins GRRegs:$r, GRRegs:$src), + "outshr res[$r], $src", + [(set GRRegs:$dst, (int_xcore_outshr GRRegs:$r, GRRegs:$src))]>; + def INCT_2r : _F2R<(outs GRRegs:$dst), (ins GRRegs:$r), "inct $dst, res[$r]", [(set GRRegs:$dst, (int_xcore_inct GRRegs:$r))]>; @@ -786,6 +799,11 @@ def IN_2r : _F2R<(outs GRRegs:$dst), (ins GRRegs:$r), "in $dst, res[$r]", [(set GRRegs:$dst, (int_xcore_in GRRegs:$r))]>; +let Constraints = "$src = $dst" in +def INSHR_2r : _F2R<(outs GRRegs:$dst), (ins GRRegs:$r, GRRegs:$src), + "inshr $dst, res[$r]", + [(set GRRegs:$dst, (int_xcore_inshr GRRegs:$r, GRRegs:$src))]>; + def CHKCT_2r : _F2R<(outs), (ins GRRegs:$r, GRRegs:$val), "chkct res[$r], $val", [(int_xcore_chkct GRRegs:$r, GRRegs:$val)]>; @@ -799,7 +817,7 @@ def SETD_2r : _F2R<(outs), (ins GRRegs:$r, GRRegs:$val), [(int_xcore_setd GRRegs:$r, GRRegs:$val)]>; // Two operand long -// TODO settw, setclk, setrdy, setpsc, endin, peek, +// TODO setclk, setrdy, setpsc, endin, peek, // getd, testlcl, tinitlr, getps, setps def BITREV_l2r : _FL2R<(outs GRRegs:$dst), (ins GRRegs:$src), "bitrev $dst, $src", @@ -813,13 +831,17 @@ def CLZ_l2r : _FL2R<(outs GRRegs:$dst), (ins GRRegs:$src), "clz $dst, $src", [(set GRRegs:$dst, (ctlz GRRegs:$src))]>; -def SETC_l2r : _FRU6<(outs), (ins GRRegs:$r, GRRegs:$val), +def SETC_l2r : _FL2R<(outs), (ins GRRegs:$r, GRRegs:$val), "setc res[$r], $val", [(int_xcore_setc GRRegs:$r, GRRegs:$val)]>; +def SETTW_l2r : _FL2R<(outs), (ins GRRegs:$r, GRRegs:$val), + "settw res[$r], $val", + [(int_xcore_settw GRRegs:$r, GRRegs:$val)]>; + // One operand short -// TODO edu, eeu, waitet, waitef, tstart, msync, mjoin, syncr, clrtp -// setdp, setcp, setv, setev, kcall +// TODO edu, eeu, waitet, waitef, tstart, msync, mjoin, clrtp +// setdp, setcp, setev, kcall // dgetreg let isBranch=1, isIndirectBranch=1, isTerminator=1, isBarrier = 1 in def BAU_1r : _F1R<(outs), (ins GRRegs:$addr), @@ -859,20 +881,41 @@ def BLA_1r : _F1R<(outs), (ins GRRegs:$addr, variable_ops), [(XCoreBranchLink GRRegs:$addr)]>; } +def SYNCR_1r : _F1R<(outs), (ins GRRegs:$r), + "syncr res[$r]", + [(int_xcore_syncr GRRegs:$r)]>; + def FREER_1r : _F1R<(outs), (ins GRRegs:$r), "freer res[$r]", [(int_xcore_freer GRRegs:$r)]>; +let Uses=[R11] in +def SETV_1r : _F1R<(outs), (ins GRRegs:$r), + "setv res[$r], r11", + [(int_xcore_setv GRRegs:$r, R11)]>; + +def EEU_1r : _F1R<(outs), (ins GRRegs:$r), + "eeu res[$r]", + [(int_xcore_eeu GRRegs:$r)]>; + // Zero operand short -// TODO waiteu, clre, ssync, freet, ldspc, stspc, ldssr, stssr, ldsed, stsed, +// TODO ssync, freet, ldspc, stspc, ldssr, stssr, ldsed, stsed, // stet, geted, getet, getkep, getksp, setkep, getid, kret, dcall, dret, // dentsp, drestsp +def CLRE_0R : _F0R<(outs), (ins), "clre", [(int_xcore_clre)]>; + let Defs = [R11] in def GETID_0R : _F0R<(outs), (ins), "get r11, id", [(set R11, (int_xcore_getid))]>; +let isBranch=1, isIndirectBranch=1, isTerminator=1, isBarrier = 1, + hasSideEffects = 1 in +def WAITEU_0R : _F0R<(outs), (ins), + "waiteu", + [(brind (int_xcore_waitevent))]>; + //===----------------------------------------------------------------------===// // Non-Instruction Patterns //===----------------------------------------------------------------------===// diff --git a/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp index b6b6b84d9647..7986d1aca762 100644 --- a/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp +++ b/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp @@ -1897,6 +1897,39 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) { return BinaryOperator::CreateNot(And); } + // Canonicalize xor to the RHS. + if (match(Op0, m_Xor(m_Value(), m_Value()))) + std::swap(Op0, Op1); + + // A | ( A ^ B) -> A | B + // A | (~A ^ B) -> A | ~B + if (match(Op1, m_Xor(m_Value(A), m_Value(B)))) { + if (Op0 == A || Op0 == B) + return BinaryOperator::CreateOr(A, B); + + if (Op1->hasOneUse() && match(A, m_Not(m_Specific(Op0)))) { + Value *Not = Builder->CreateNot(B, B->getName()+".not"); + return BinaryOperator::CreateOr(Not, Op0); + } + if (Op1->hasOneUse() && match(B, m_Not(m_Specific(Op0)))) { + Value *Not = Builder->CreateNot(A, A->getName()+".not"); + return BinaryOperator::CreateOr(Not, Op0); + } + } + + // A | ~(A | B) -> A | ~B + // A | ~(A ^ B) -> A | ~B + if (match(Op1, m_Not(m_Value(A)))) + if (BinaryOperator *B = dyn_cast(A)) + if ((Op0 == B->getOperand(0) || Op0 == B->getOperand(1)) && + Op1->hasOneUse() && (B->getOpcode() == Instruction::Or || + B->getOpcode() == Instruction::Xor)) { + Value *NotOp = Op0 == B->getOperand(0) ? B->getOperand(1) : + B->getOperand(0); + Value *Not = Builder->CreateNot(NotOp, NotOp->getName()+".not"); + return BinaryOperator::CreateOr(Not, Op0); + } + if (ICmpInst *RHS = dyn_cast(I.getOperand(1))) if (ICmpInst *LHS = dyn_cast(I.getOperand(0))) if (Value *Res = FoldOrOfICmps(LHS, RHS)) diff --git a/lib/Transforms/InstCombine/InstCombineCalls.cpp b/lib/Transforms/InstCombine/InstCombineCalls.cpp index 8449f7b7982c..0e464507a7e4 100644 --- a/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -953,10 +953,19 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) { if (Callee->isDeclaration() && !isConvertible) return false; } - if (FT->getNumParams() < NumActualArgs && !FT->isVarArg() && - Callee->isDeclaration()) - return false; // Do not delete arguments unless we have a function body. - + if (Callee->isDeclaration()) { + // Do not delete arguments unless we have a function body. + if (FT->getNumParams() < NumActualArgs && !FT->isVarArg()) + return false; + + // If the callee is just a declaration, don't change the varargsness of the + // call. We don't want to introduce a varargs call where one doesn't + // already exist. + const PointerType *APTy = cast(CS.getCalledValue()->getType()); + if (FT->isVarArg()!=cast(APTy->getElementType())->isVarArg()) + return false; + } + if (FT->getNumParams() < NumActualArgs && FT->isVarArg() && !CallerPAL.isEmpty()) // In this case we have more arguments than the new function type, but we @@ -970,8 +979,9 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) { return false; } + // Okay, we decided that this is a safe thing to do: go ahead and start - // inserting cast instructions as necessary... + // inserting cast instructions as necessary. std::vector Args; Args.reserve(NumActualArgs); SmallVector attrVec; diff --git a/lib/Transforms/Scalar/LoopDeletion.cpp b/lib/Transforms/Scalar/LoopDeletion.cpp index 6d1d344a9296..753a558cfe83 100644 --- a/lib/Transforms/Scalar/LoopDeletion.cpp +++ b/lib/Transforms/Scalar/LoopDeletion.cpp @@ -78,7 +78,6 @@ bool LoopDeletion::IsLoopDead(Loop* L, SmallVector& exitingBlocks, SmallVector& exitBlocks, bool &Changed, BasicBlock *Preheader) { - BasicBlock* exitingBlock = exitingBlocks[0]; BasicBlock* exitBlock = exitBlocks[0]; // Make sure that all PHI entries coming from the loop are loop invariant. @@ -88,11 +87,21 @@ bool LoopDeletion::IsLoopDead(Loop* L, // of the loop. BasicBlock::iterator BI = exitBlock->begin(); while (PHINode* P = dyn_cast(BI)) { - Value* incoming = P->getIncomingValueForBlock(exitingBlock); + Value* incoming = P->getIncomingValueForBlock(exitingBlocks[0]); + + // Make sure all exiting blocks produce the same incoming value for the exit + // block. If there are different incoming values for different exiting + // blocks, then it is impossible to statically determine which value should + // be used. + for (unsigned i = 1; i < exitingBlocks.size(); ++i) { + if (incoming != P->getIncomingValueForBlock(exitingBlocks[i])) + return false; + } + if (Instruction* I = dyn_cast(incoming)) if (!L->makeLoopInvariant(I, Changed, Preheader->getTerminator())) return false; - + ++BI; } @@ -147,10 +156,6 @@ bool LoopDeletion::runOnLoop(Loop* L, LPPassManager& LPM) { if (exitBlocks.size() != 1) return false; - // Loops with multiple exits are too complicated to handle correctly. - if (exitingBlocks.size() != 1) - return false; - // Finally, we have to check that the loop really is dead. bool Changed = false; if (!IsLoopDead(L, exitingBlocks, exitBlocks, Changed, preheader)) @@ -166,7 +171,6 @@ bool LoopDeletion::runOnLoop(Loop* L, LPPassManager& LPM) { // Now that we know the removal is safe, remove the loop by changing the // branch from the preheader to go to the single exit block. BasicBlock* exitBlock = exitBlocks[0]; - BasicBlock* exitingBlock = exitingBlocks[0]; // Because we're deleting a large chunk of code at once, the sequence in which // we remove things is very important to avoid invalidation issues. Don't @@ -183,9 +187,12 @@ bool LoopDeletion::runOnLoop(Loop* L, LPPassManager& LPM) { // Rewrite phis in the exit block to get their inputs from // the preheader instead of the exiting block. + BasicBlock* exitingBlock = exitingBlocks[0]; BasicBlock::iterator BI = exitBlock->begin(); while (PHINode* P = dyn_cast(BI)) { P->replaceUsesOfWith(exitingBlock, preheader); + for (unsigned i = 1; i < exitingBlocks.size(); ++i) + P->removeIncomingValue(exitingBlocks[i]); ++BI; } diff --git a/lib/Transforms/Scalar/LoopIdiomRecognize.cpp b/lib/Transforms/Scalar/LoopIdiomRecognize.cpp index d7fa149492bd..f8ce214750ac 100644 --- a/lib/Transforms/Scalar/LoopIdiomRecognize.cpp +++ b/lib/Transforms/Scalar/LoopIdiomRecognize.cpp @@ -31,6 +31,11 @@ // void foo(_Complex float *P) // for (i) { __real__(*P) = 0; __imag__(*P) = 0; } // +// We should enhance this to handle negative strides through memory. +// Alternatively (and perhaps better) we could rely on an earlier pass to force +// forward iteration through memory, which is generally better for cache +// behavior. Negative strides *do* happen for memset/memcpy loops. +// // This could recognize common matrix multiplies and dot product idioms and // replace them with calls to BLAS (if linked in??). // @@ -272,10 +277,17 @@ bool LoopIdiomRecognize::processLoopStore(StoreInst *SI, const SCEV *BECount) { unsigned StoreSize = (unsigned)SizeInBits >> 3; const SCEVConstant *Stride = dyn_cast(StoreEv->getOperand(1)); - // TODO: Could also handle negative stride here someday, that will require the - // validity check in mayLoopAccessLocation to be updated though. - if (Stride == 0 || StoreSize != Stride->getValue()->getValue()) + if (Stride == 0 || StoreSize != Stride->getValue()->getValue()) { + // TODO: Could also handle negative stride here someday, that will require + // the validity check in mayLoopAccessLocation to be updated though. + // Enable this to print exact negative strides. + if (0 && Stride && StoreSize == -Stride->getValue()->getValue()) { + dbgs() << "NEGATIVE STRIDE: " << *SI << "\n"; + dbgs() << "BB: " << *SI->getParent(); + } + return false; + } // See if we can optimize just this store in isolation. if (processLoopStridedStore(StorePtr, StoreSize, SI->getAlignment(), diff --git a/lib/Transforms/Scalar/SimplifyLibCalls.cpp b/lib/Transforms/Scalar/SimplifyLibCalls.cpp index ec45b71dd368..9f136d4e3077 100644 --- a/lib/Transforms/Scalar/SimplifyLibCalls.cpp +++ b/lib/Transforms/Scalar/SimplifyLibCalls.cpp @@ -25,13 +25,14 @@ #include "llvm/Support/IRBuilder.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/Target/TargetData.h" +#include "llvm/Target/TargetLibraryInfo.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/StringMap.h" #include "llvm/ADT/Statistic.h" #include "llvm/ADT/STLExtras.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Config/config.h" +#include "llvm/Config/config.h" // FIXME: Shouldn't depend on host! using namespace llvm; STATISTIC(NumSimplified, "Number of library calls simplified"); @@ -1369,6 +1370,8 @@ namespace { /// This pass optimizes well known library functions from libc and libm. /// class SimplifyLibCalls : public FunctionPass { + TargetLibraryInfo *TLI; + StringMap Optimizations; // String and Memory LibCall Optimizations StrCatOpt StrCat; StrNCatOpt StrNCat; StrChrOpt StrChr; StrRChrOpt StrRChr; @@ -1385,7 +1388,7 @@ namespace { SPrintFOpt SPrintF; PrintFOpt PrintF; FWriteOpt FWrite; FPutsOpt FPuts; FPrintFOpt FPrintF; PutsOpt Puts; - + bool Modified; // This is only used by doInitialization. public: static char ID; // Pass identification @@ -1402,14 +1405,20 @@ namespace { void setDoesNotAlias(Function &F, unsigned n); bool doInitialization(Module &M); + void inferPrototypeAttributes(Function &F); virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequired(); } }; - char SimplifyLibCalls::ID = 0; } // end anonymous namespace. -INITIALIZE_PASS(SimplifyLibCalls, "simplify-libcalls", - "Simplify well-known library calls", false, false) +char SimplifyLibCalls::ID = 0; + +INITIALIZE_PASS_BEGIN(SimplifyLibCalls, "simplify-libcalls", + "Simplify well-known library calls", false, false) +INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo) +INITIALIZE_PASS_END(SimplifyLibCalls, "simplify-libcalls", + "Simplify well-known library calls", false, false) // Public interface to the Simplify LibCalls pass. FunctionPass *llvm::createSimplifyLibCallsPass() { @@ -1441,9 +1450,9 @@ void SimplifyLibCalls::InitOptimizations() { Optimizations["strcspn"] = &StrCSpn; Optimizations["strstr"] = &StrStr; Optimizations["memcmp"] = &MemCmp; - Optimizations["memcpy"] = &MemCpy; + if (TLI->has(LibFunc::memcpy)) Optimizations["memcpy"] = &MemCpy; Optimizations["memmove"] = &MemMove; - Optimizations["memset"] = &MemSet; + if (TLI->has(LibFunc::memset)) Optimizations["memset"] = &MemSet; // _chk variants of String and Memory LibCall Optimizations. Optimizations["__strcpy_chk"] = &StrCpyChk; @@ -1506,6 +1515,8 @@ void SimplifyLibCalls::InitOptimizations() { /// runOnFunction - Top level algorithm. /// bool SimplifyLibCalls::runOnFunction(Function &F) { + TLI = &getAnalysis(); + if (Optimizations.empty()) InitOptimizations(); @@ -1597,688 +1608,654 @@ void SimplifyLibCalls::setDoesNotAlias(Function &F, unsigned n) { } } + +void SimplifyLibCalls::inferPrototypeAttributes(Function &F) { + const FunctionType *FTy = F.getFunctionType(); + + StringRef Name = F.getName(); + switch (Name[0]) { + case 's': + if (Name == "strlen") { + if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy()) + return; + setOnlyReadsMemory(F); + setDoesNotThrow(F); + setDoesNotCapture(F, 1); + } else if (Name == "strchr" || + Name == "strrchr") { + if (FTy->getNumParams() != 2 || + !FTy->getParamType(0)->isPointerTy() || + !FTy->getParamType(1)->isIntegerTy()) + return; + setOnlyReadsMemory(F); + setDoesNotThrow(F); + } else if (Name == "strcpy" || + Name == "stpcpy" || + Name == "strcat" || + Name == "strtol" || + Name == "strtod" || + Name == "strtof" || + Name == "strtoul" || + Name == "strtoll" || + Name == "strtold" || + Name == "strncat" || + Name == "strncpy" || + Name == "strtoull") { + if (FTy->getNumParams() < 2 || + !FTy->getParamType(1)->isPointerTy()) + return; + setDoesNotThrow(F); + setDoesNotCapture(F, 2); + } else if (Name == "strxfrm") { + if (FTy->getNumParams() != 3 || + !FTy->getParamType(0)->isPointerTy() || + !FTy->getParamType(1)->isPointerTy()) + return; + setDoesNotThrow(F); + setDoesNotCapture(F, 1); + setDoesNotCapture(F, 2); + } else if (Name == "strcmp" || + Name == "strspn" || + Name == "strncmp" || + Name == "strcspn" || + Name == "strcoll" || + Name == "strcasecmp" || + Name == "strncasecmp") { + if (FTy->getNumParams() < 2 || + !FTy->getParamType(0)->isPointerTy() || + !FTy->getParamType(1)->isPointerTy()) + return; + setOnlyReadsMemory(F); + setDoesNotThrow(F); + setDoesNotCapture(F, 1); + setDoesNotCapture(F, 2); + } else if (Name == "strstr" || + Name == "strpbrk") { + if (FTy->getNumParams() != 2 || !FTy->getParamType(1)->isPointerTy()) + return; + setOnlyReadsMemory(F); + setDoesNotThrow(F); + setDoesNotCapture(F, 2); + } else if (Name == "strtok" || + Name == "strtok_r") { + if (FTy->getNumParams() < 2 || !FTy->getParamType(1)->isPointerTy()) + return; + setDoesNotThrow(F); + setDoesNotCapture(F, 2); + } else if (Name == "scanf" || + Name == "setbuf" || + Name == "setvbuf") { + if (FTy->getNumParams() < 1 || !FTy->getParamType(0)->isPointerTy()) + return; + setDoesNotThrow(F); + setDoesNotCapture(F, 1); + } else if (Name == "strdup" || + Name == "strndup") { + if (FTy->getNumParams() < 1 || !FTy->getReturnType()->isPointerTy() || + !FTy->getParamType(0)->isPointerTy()) + return; + setDoesNotThrow(F); + setDoesNotAlias(F, 0); + setDoesNotCapture(F, 1); + } else if (Name == "stat" || + Name == "sscanf" || + Name == "sprintf" || + Name == "statvfs") { + if (FTy->getNumParams() < 2 || + !FTy->getParamType(0)->isPointerTy() || + !FTy->getParamType(1)->isPointerTy()) + return; + setDoesNotThrow(F); + setDoesNotCapture(F, 1); + setDoesNotCapture(F, 2); + } else if (Name == "snprintf") { + if (FTy->getNumParams() != 3 || + !FTy->getParamType(0)->isPointerTy() || + !FTy->getParamType(2)->isPointerTy()) + return; + setDoesNotThrow(F); + setDoesNotCapture(F, 1); + setDoesNotCapture(F, 3); + } else if (Name == "setitimer") { + if (FTy->getNumParams() != 3 || + !FTy->getParamType(1)->isPointerTy() || + !FTy->getParamType(2)->isPointerTy()) + return; + setDoesNotThrow(F); + setDoesNotCapture(F, 2); + setDoesNotCapture(F, 3); + } else if (Name == "system") { + if (FTy->getNumParams() != 1 || + !FTy->getParamType(0)->isPointerTy()) + return; + // May throw; "system" is a valid pthread cancellation point. + setDoesNotCapture(F, 1); + } + break; + case 'm': + if (Name == "malloc") { + if (FTy->getNumParams() != 1 || + !FTy->getReturnType()->isPointerTy()) + return; + setDoesNotThrow(F); + setDoesNotAlias(F, 0); + } else if (Name == "memcmp") { + if (FTy->getNumParams() != 3 || + !FTy->getParamType(0)->isPointerTy() || + !FTy->getParamType(1)->isPointerTy()) + return; + setOnlyReadsMemory(F); + setDoesNotThrow(F); + setDoesNotCapture(F, 1); + setDoesNotCapture(F, 2); + } else if (Name == "memchr" || + Name == "memrchr") { + if (FTy->getNumParams() != 3) + return; + setOnlyReadsMemory(F); + setDoesNotThrow(F); + } else if (Name == "modf" || + Name == "modff" || + Name == "modfl" || + Name == "memcpy" || + Name == "memccpy" || + Name == "memmove") { + if (FTy->getNumParams() < 2 || + !FTy->getParamType(1)->isPointerTy()) + return; + setDoesNotThrow(F); + setDoesNotCapture(F, 2); + } else if (Name == "memalign") { + if (!FTy->getReturnType()->isPointerTy()) + return; + setDoesNotAlias(F, 0); + } else if (Name == "mkdir" || + Name == "mktime") { + if (FTy->getNumParams() == 0 || + !FTy->getParamType(0)->isPointerTy()) + return; + setDoesNotThrow(F); + setDoesNotCapture(F, 1); + } + break; + case 'r': + if (Name == "realloc") { + if (FTy->getNumParams() != 2 || + !FTy->getParamType(0)->isPointerTy() || + !FTy->getReturnType()->isPointerTy()) + return; + setDoesNotThrow(F); + setDoesNotAlias(F, 0); + setDoesNotCapture(F, 1); + } else if (Name == "read") { + if (FTy->getNumParams() != 3 || + !FTy->getParamType(1)->isPointerTy()) + return; + // May throw; "read" is a valid pthread cancellation point. + setDoesNotCapture(F, 2); + } else if (Name == "rmdir" || + Name == "rewind" || + Name == "remove" || + Name == "realpath") { + if (FTy->getNumParams() < 1 || + !FTy->getParamType(0)->isPointerTy()) + return; + setDoesNotThrow(F); + setDoesNotCapture(F, 1); + } else if (Name == "rename" || + Name == "readlink") { + if (FTy->getNumParams() < 2 || + !FTy->getParamType(0)->isPointerTy() || + !FTy->getParamType(1)->isPointerTy()) + return; + setDoesNotThrow(F); + setDoesNotCapture(F, 1); + setDoesNotCapture(F, 2); + } + break; + case 'w': + if (Name == "write") { + if (FTy->getNumParams() != 3 || !FTy->getParamType(1)->isPointerTy()) + return; + // May throw; "write" is a valid pthread cancellation point. + setDoesNotCapture(F, 2); + } + break; + case 'b': + if (Name == "bcopy") { + if (FTy->getNumParams() != 3 || + !FTy->getParamType(0)->isPointerTy() || + !FTy->getParamType(1)->isPointerTy()) + return; + setDoesNotThrow(F); + setDoesNotCapture(F, 1); + setDoesNotCapture(F, 2); + } else if (Name == "bcmp") { + if (FTy->getNumParams() != 3 || + !FTy->getParamType(0)->isPointerTy() || + !FTy->getParamType(1)->isPointerTy()) + return; + setDoesNotThrow(F); + setOnlyReadsMemory(F); + setDoesNotCapture(F, 1); + setDoesNotCapture(F, 2); + } else if (Name == "bzero") { + if (FTy->getNumParams() != 2 || !FTy->getParamType(0)->isPointerTy()) + return; + setDoesNotThrow(F); + setDoesNotCapture(F, 1); + } + break; + case 'c': + if (Name == "calloc") { + if (FTy->getNumParams() != 2 || + !FTy->getReturnType()->isPointerTy()) + return; + setDoesNotThrow(F); + setDoesNotAlias(F, 0); + } else if (Name == "chmod" || + Name == "chown" || + Name == "ctermid" || + Name == "clearerr" || + Name == "closedir") { + if (FTy->getNumParams() == 0 || !FTy->getParamType(0)->isPointerTy()) + return; + setDoesNotThrow(F); + setDoesNotCapture(F, 1); + } + break; + case 'a': + if (Name == "atoi" || + Name == "atol" || + Name == "atof" || + Name == "atoll") { + if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy()) + return; + setDoesNotThrow(F); + setOnlyReadsMemory(F); + setDoesNotCapture(F, 1); + } else if (Name == "access") { + if (FTy->getNumParams() != 2 || !FTy->getParamType(0)->isPointerTy()) + return; + setDoesNotThrow(F); + setDoesNotCapture(F, 1); + } + break; + case 'f': + if (Name == "fopen") { + if (FTy->getNumParams() != 2 || + !FTy->getReturnType()->isPointerTy() || + !FTy->getParamType(0)->isPointerTy() || + !FTy->getParamType(1)->isPointerTy()) + return; + setDoesNotThrow(F); + setDoesNotAlias(F, 0); + setDoesNotCapture(F, 1); + setDoesNotCapture(F, 2); + } else if (Name == "fdopen") { + if (FTy->getNumParams() != 2 || + !FTy->getReturnType()->isPointerTy() || + !FTy->getParamType(1)->isPointerTy()) + return; + setDoesNotThrow(F); + setDoesNotAlias(F, 0); + setDoesNotCapture(F, 2); + } else if (Name == "feof" || + Name == "free" || + Name == "fseek" || + Name == "ftell" || + Name == "fgetc" || + Name == "fseeko" || + Name == "ftello" || + Name == "fileno" || + Name == "fflush" || + Name == "fclose" || + Name == "fsetpos" || + Name == "flockfile" || + Name == "funlockfile" || + Name == "ftrylockfile") { + if (FTy->getNumParams() == 0 || !FTy->getParamType(0)->isPointerTy()) + return; + setDoesNotThrow(F); + setDoesNotCapture(F, 1); + } else if (Name == "ferror") { + if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy()) + return; + setDoesNotThrow(F); + setDoesNotCapture(F, 1); + setOnlyReadsMemory(F); + } else if (Name == "fputc" || + Name == "fstat" || + Name == "frexp" || + Name == "frexpf" || + Name == "frexpl" || + Name == "fstatvfs") { + if (FTy->getNumParams() != 2 || !FTy->getParamType(1)->isPointerTy()) + return; + setDoesNotThrow(F); + setDoesNotCapture(F, 2); + } else if (Name == "fgets") { + if (FTy->getNumParams() != 3 || + !FTy->getParamType(0)->isPointerTy() || + !FTy->getParamType(2)->isPointerTy()) + return; + setDoesNotThrow(F); + setDoesNotCapture(F, 3); + } else if (Name == "fread" || + Name == "fwrite") { + if (FTy->getNumParams() != 4 || + !FTy->getParamType(0)->isPointerTy() || + !FTy->getParamType(3)->isPointerTy()) + return; + setDoesNotThrow(F); + setDoesNotCapture(F, 1); + setDoesNotCapture(F, 4); + } else if (Name == "fputs" || + Name == "fscanf" || + Name == "fprintf" || + Name == "fgetpos") { + if (FTy->getNumParams() < 2 || + !FTy->getParamType(0)->isPointerTy() || + !FTy->getParamType(1)->isPointerTy()) + return; + setDoesNotThrow(F); + setDoesNotCapture(F, 1); + setDoesNotCapture(F, 2); + } + break; + case 'g': + if (Name == "getc" || + Name == "getlogin_r" || + Name == "getc_unlocked") { + if (FTy->getNumParams() == 0 || !FTy->getParamType(0)->isPointerTy()) + return; + setDoesNotThrow(F); + setDoesNotCapture(F, 1); + } else if (Name == "getenv") { + if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy()) + return; + setDoesNotThrow(F); + setOnlyReadsMemory(F); + setDoesNotCapture(F, 1); + } else if (Name == "gets" || + Name == "getchar") { + setDoesNotThrow(F); + } else if (Name == "getitimer") { + if (FTy->getNumParams() != 2 || !FTy->getParamType(1)->isPointerTy()) + return; + setDoesNotThrow(F); + setDoesNotCapture(F, 2); + } else if (Name == "getpwnam") { + if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy()) + return; + setDoesNotThrow(F); + setDoesNotCapture(F, 1); + } + break; + case 'u': + if (Name == "ungetc") { + if (FTy->getNumParams() != 2 || !FTy->getParamType(1)->isPointerTy()) + return; + setDoesNotThrow(F); + setDoesNotCapture(F, 2); + } else if (Name == "uname" || + Name == "unlink" || + Name == "unsetenv") { + if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy()) + return; + setDoesNotThrow(F); + setDoesNotCapture(F, 1); + } else if (Name == "utime" || + Name == "utimes") { + if (FTy->getNumParams() != 2 || + !FTy->getParamType(0)->isPointerTy() || + !FTy->getParamType(1)->isPointerTy()) + return; + setDoesNotThrow(F); + setDoesNotCapture(F, 1); + setDoesNotCapture(F, 2); + } + break; + case 'p': + if (Name == "putc") { + if (FTy->getNumParams() != 2 || !FTy->getParamType(1)->isPointerTy()) + return; + setDoesNotThrow(F); + setDoesNotCapture(F, 2); + } else if (Name == "puts" || + Name == "printf" || + Name == "perror") { + if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy()) + return; + setDoesNotThrow(F); + setDoesNotCapture(F, 1); + } else if (Name == "pread" || + Name == "pwrite") { + if (FTy->getNumParams() != 4 || !FTy->getParamType(1)->isPointerTy()) + return; + // May throw; these are valid pthread cancellation points. + setDoesNotCapture(F, 2); + } else if (Name == "putchar") { + setDoesNotThrow(F); + } else if (Name == "popen") { + if (FTy->getNumParams() != 2 || + !FTy->getReturnType()->isPointerTy() || + !FTy->getParamType(0)->isPointerTy() || + !FTy->getParamType(1)->isPointerTy()) + return; + setDoesNotThrow(F); + setDoesNotAlias(F, 0); + setDoesNotCapture(F, 1); + setDoesNotCapture(F, 2); + } else if (Name == "pclose") { + if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy()) + return; + setDoesNotThrow(F); + setDoesNotCapture(F, 1); + } + break; + case 'v': + if (Name == "vscanf") { + if (FTy->getNumParams() != 2 || !FTy->getParamType(1)->isPointerTy()) + return; + setDoesNotThrow(F); + setDoesNotCapture(F, 1); + } else if (Name == "vsscanf" || + Name == "vfscanf") { + if (FTy->getNumParams() != 3 || + !FTy->getParamType(1)->isPointerTy() || + !FTy->getParamType(2)->isPointerTy()) + return; + setDoesNotThrow(F); + setDoesNotCapture(F, 1); + setDoesNotCapture(F, 2); + } else if (Name == "valloc") { + if (!FTy->getReturnType()->isPointerTy()) + return; + setDoesNotThrow(F); + setDoesNotAlias(F, 0); + } else if (Name == "vprintf") { + if (FTy->getNumParams() != 2 || !FTy->getParamType(0)->isPointerTy()) + return; + setDoesNotThrow(F); + setDoesNotCapture(F, 1); + } else if (Name == "vfprintf" || + Name == "vsprintf") { + if (FTy->getNumParams() != 3 || + !FTy->getParamType(0)->isPointerTy() || + !FTy->getParamType(1)->isPointerTy()) + return; + setDoesNotThrow(F); + setDoesNotCapture(F, 1); + setDoesNotCapture(F, 2); + } else if (Name == "vsnprintf") { + if (FTy->getNumParams() != 4 || + !FTy->getParamType(0)->isPointerTy() || + !FTy->getParamType(2)->isPointerTy()) + return; + setDoesNotThrow(F); + setDoesNotCapture(F, 1); + setDoesNotCapture(F, 3); + } + break; + case 'o': + if (Name == "open") { + if (FTy->getNumParams() < 2 || !FTy->getParamType(0)->isPointerTy()) + return; + // May throw; "open" is a valid pthread cancellation point. + setDoesNotCapture(F, 1); + } else if (Name == "opendir") { + if (FTy->getNumParams() != 1 || + !FTy->getReturnType()->isPointerTy() || + !FTy->getParamType(0)->isPointerTy()) + return; + setDoesNotThrow(F); + setDoesNotAlias(F, 0); + setDoesNotCapture(F, 1); + } + break; + case 't': + if (Name == "tmpfile") { + if (!FTy->getReturnType()->isPointerTy()) + return; + setDoesNotThrow(F); + setDoesNotAlias(F, 0); + } else if (Name == "times") { + if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy()) + return; + setDoesNotThrow(F); + setDoesNotCapture(F, 1); + } + break; + case 'h': + if (Name == "htonl" || + Name == "htons") { + setDoesNotThrow(F); + setDoesNotAccessMemory(F); + } + break; + case 'n': + if (Name == "ntohl" || + Name == "ntohs") { + setDoesNotThrow(F); + setDoesNotAccessMemory(F); + } + break; + case 'l': + if (Name == "lstat") { + if (FTy->getNumParams() != 2 || + !FTy->getParamType(0)->isPointerTy() || + !FTy->getParamType(1)->isPointerTy()) + return; + setDoesNotThrow(F); + setDoesNotCapture(F, 1); + setDoesNotCapture(F, 2); + } else if (Name == "lchown") { + if (FTy->getNumParams() != 3 || !FTy->getParamType(0)->isPointerTy()) + return; + setDoesNotThrow(F); + setDoesNotCapture(F, 1); + } + break; + case 'q': + if (Name == "qsort") { + if (FTy->getNumParams() != 4 || !FTy->getParamType(3)->isPointerTy()) + return; + // May throw; places call through function pointer. + setDoesNotCapture(F, 4); + } + break; + case '_': + if (Name == "__strdup" || + Name == "__strndup") { + if (FTy->getNumParams() < 1 || + !FTy->getReturnType()->isPointerTy() || + !FTy->getParamType(0)->isPointerTy()) + return; + setDoesNotThrow(F); + setDoesNotAlias(F, 0); + setDoesNotCapture(F, 1); + } else if (Name == "__strtok_r") { + if (FTy->getNumParams() != 3 || + !FTy->getParamType(1)->isPointerTy()) + return; + setDoesNotThrow(F); + setDoesNotCapture(F, 2); + } else if (Name == "_IO_getc") { + if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy()) + return; + setDoesNotThrow(F); + setDoesNotCapture(F, 1); + } else if (Name == "_IO_putc") { + if (FTy->getNumParams() != 2 || !FTy->getParamType(1)->isPointerTy()) + return; + setDoesNotThrow(F); + setDoesNotCapture(F, 2); + } + break; + case 1: + if (Name == "\1__isoc99_scanf") { + if (FTy->getNumParams() < 1 || + !FTy->getParamType(0)->isPointerTy()) + return; + setDoesNotThrow(F); + setDoesNotCapture(F, 1); + } else if (Name == "\1stat64" || + Name == "\1lstat64" || + Name == "\1statvfs64" || + Name == "\1__isoc99_sscanf") { + if (FTy->getNumParams() < 1 || + !FTy->getParamType(0)->isPointerTy() || + !FTy->getParamType(1)->isPointerTy()) + return; + setDoesNotThrow(F); + setDoesNotCapture(F, 1); + setDoesNotCapture(F, 2); + } else if (Name == "\1fopen64") { + if (FTy->getNumParams() != 2 || + !FTy->getReturnType()->isPointerTy() || + !FTy->getParamType(0)->isPointerTy() || + !FTy->getParamType(1)->isPointerTy()) + return; + setDoesNotThrow(F); + setDoesNotAlias(F, 0); + setDoesNotCapture(F, 1); + setDoesNotCapture(F, 2); + } else if (Name == "\1fseeko64" || + Name == "\1ftello64") { + if (FTy->getNumParams() == 0 || !FTy->getParamType(0)->isPointerTy()) + return; + setDoesNotThrow(F); + setDoesNotCapture(F, 1); + } else if (Name == "\1tmpfile64") { + if (!FTy->getReturnType()->isPointerTy()) + return; + setDoesNotThrow(F); + setDoesNotAlias(F, 0); + } else if (Name == "\1fstat64" || + Name == "\1fstatvfs64") { + if (FTy->getNumParams() != 2 || !FTy->getParamType(1)->isPointerTy()) + return; + setDoesNotThrow(F); + setDoesNotCapture(F, 2); + } else if (Name == "\1open64") { + if (FTy->getNumParams() < 2 || !FTy->getParamType(0)->isPointerTy()) + return; + // May throw; "open" is a valid pthread cancellation point. + setDoesNotCapture(F, 1); + } + break; + } +} + /// doInitialization - Add attributes to well-known functions. /// bool SimplifyLibCalls::doInitialization(Module &M) { Modified = false; for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) { Function &F = *I; - if (!F.isDeclaration()) - continue; - - if (!F.hasName()) - continue; - - const FunctionType *FTy = F.getFunctionType(); - - StringRef Name = F.getName(); - switch (Name[0]) { - case 's': - if (Name == "strlen") { - if (FTy->getNumParams() != 1 || - !FTy->getParamType(0)->isPointerTy()) - continue; - setOnlyReadsMemory(F); - setDoesNotThrow(F); - setDoesNotCapture(F, 1); - } else if (Name == "strchr" || - Name == "strrchr") { - if (FTy->getNumParams() != 2 || - !FTy->getParamType(0)->isPointerTy() || - !FTy->getParamType(1)->isIntegerTy()) - continue; - setOnlyReadsMemory(F); - setDoesNotThrow(F); - } else if (Name == "strcpy" || - Name == "stpcpy" || - Name == "strcat" || - Name == "strtol" || - Name == "strtod" || - Name == "strtof" || - Name == "strtoul" || - Name == "strtoll" || - Name == "strtold" || - Name == "strncat" || - Name == "strncpy" || - Name == "strtoull") { - if (FTy->getNumParams() < 2 || - !FTy->getParamType(1)->isPointerTy()) - continue; - setDoesNotThrow(F); - setDoesNotCapture(F, 2); - } else if (Name == "strxfrm") { - if (FTy->getNumParams() != 3 || - !FTy->getParamType(0)->isPointerTy() || - !FTy->getParamType(1)->isPointerTy()) - continue; - setDoesNotThrow(F); - setDoesNotCapture(F, 1); - setDoesNotCapture(F, 2); - } else if (Name == "strcmp" || - Name == "strspn" || - Name == "strncmp" || - Name == "strcspn" || - Name == "strcoll" || - Name == "strcasecmp" || - Name == "strncasecmp") { - if (FTy->getNumParams() < 2 || - !FTy->getParamType(0)->isPointerTy() || - !FTy->getParamType(1)->isPointerTy()) - continue; - setOnlyReadsMemory(F); - setDoesNotThrow(F); - setDoesNotCapture(F, 1); - setDoesNotCapture(F, 2); - } else if (Name == "strstr" || - Name == "strpbrk") { - if (FTy->getNumParams() != 2 || - !FTy->getParamType(1)->isPointerTy()) - continue; - setOnlyReadsMemory(F); - setDoesNotThrow(F); - setDoesNotCapture(F, 2); - } else if (Name == "strtok" || - Name == "strtok_r") { - if (FTy->getNumParams() < 2 || - !FTy->getParamType(1)->isPointerTy()) - continue; - setDoesNotThrow(F); - setDoesNotCapture(F, 2); - } else if (Name == "scanf" || - Name == "setbuf" || - Name == "setvbuf") { - if (FTy->getNumParams() < 1 || - !FTy->getParamType(0)->isPointerTy()) - continue; - setDoesNotThrow(F); - setDoesNotCapture(F, 1); - } else if (Name == "strdup" || - Name == "strndup") { - if (FTy->getNumParams() < 1 || - !FTy->getReturnType()->isPointerTy() || - !FTy->getParamType(0)->isPointerTy()) - continue; - setDoesNotThrow(F); - setDoesNotAlias(F, 0); - setDoesNotCapture(F, 1); - } else if (Name == "stat" || - Name == "sscanf" || - Name == "sprintf" || - Name == "statvfs") { - if (FTy->getNumParams() < 2 || - !FTy->getParamType(0)->isPointerTy() || - !FTy->getParamType(1)->isPointerTy()) - continue; - setDoesNotThrow(F); - setDoesNotCapture(F, 1); - setDoesNotCapture(F, 2); - } else if (Name == "snprintf") { - if (FTy->getNumParams() != 3 || - !FTy->getParamType(0)->isPointerTy() || - !FTy->getParamType(2)->isPointerTy()) - continue; - setDoesNotThrow(F); - setDoesNotCapture(F, 1); - setDoesNotCapture(F, 3); - } else if (Name == "setitimer") { - if (FTy->getNumParams() != 3 || - !FTy->getParamType(1)->isPointerTy() || - !FTy->getParamType(2)->isPointerTy()) - continue; - setDoesNotThrow(F); - setDoesNotCapture(F, 2); - setDoesNotCapture(F, 3); - } else if (Name == "system") { - if (FTy->getNumParams() != 1 || - !FTy->getParamType(0)->isPointerTy()) - continue; - // May throw; "system" is a valid pthread cancellation point. - setDoesNotCapture(F, 1); - } - break; - case 'm': - if (Name == "malloc") { - if (FTy->getNumParams() != 1 || - !FTy->getReturnType()->isPointerTy()) - continue; - setDoesNotThrow(F); - setDoesNotAlias(F, 0); - } else if (Name == "memcmp") { - if (FTy->getNumParams() != 3 || - !FTy->getParamType(0)->isPointerTy() || - !FTy->getParamType(1)->isPointerTy()) - continue; - setOnlyReadsMemory(F); - setDoesNotThrow(F); - setDoesNotCapture(F, 1); - setDoesNotCapture(F, 2); - } else if (Name == "memchr" || - Name == "memrchr") { - if (FTy->getNumParams() != 3) - continue; - setOnlyReadsMemory(F); - setDoesNotThrow(F); - } else if (Name == "modf" || - Name == "modff" || - Name == "modfl" || - Name == "memcpy" || - Name == "memccpy" || - Name == "memmove") { - if (FTy->getNumParams() < 2 || - !FTy->getParamType(1)->isPointerTy()) - continue; - setDoesNotThrow(F); - setDoesNotCapture(F, 2); - } else if (Name == "memalign") { - if (!FTy->getReturnType()->isPointerTy()) - continue; - setDoesNotAlias(F, 0); - } else if (Name == "mkdir" || - Name == "mktime") { - if (FTy->getNumParams() == 0 || - !FTy->getParamType(0)->isPointerTy()) - continue; - setDoesNotThrow(F); - setDoesNotCapture(F, 1); - } - break; - case 'r': - if (Name == "realloc") { - if (FTy->getNumParams() != 2 || - !FTy->getParamType(0)->isPointerTy() || - !FTy->getReturnType()->isPointerTy()) - continue; - setDoesNotThrow(F); - setDoesNotAlias(F, 0); - setDoesNotCapture(F, 1); - } else if (Name == "read") { - if (FTy->getNumParams() != 3 || - !FTy->getParamType(1)->isPointerTy()) - continue; - // May throw; "read" is a valid pthread cancellation point. - setDoesNotCapture(F, 2); - } else if (Name == "rmdir" || - Name == "rewind" || - Name == "remove" || - Name == "realpath") { - if (FTy->getNumParams() < 1 || - !FTy->getParamType(0)->isPointerTy()) - continue; - setDoesNotThrow(F); - setDoesNotCapture(F, 1); - } else if (Name == "rename" || - Name == "readlink") { - if (FTy->getNumParams() < 2 || - !FTy->getParamType(0)->isPointerTy() || - !FTy->getParamType(1)->isPointerTy()) - continue; - setDoesNotThrow(F); - setDoesNotCapture(F, 1); - setDoesNotCapture(F, 2); - } - break; - case 'w': - if (Name == "write") { - if (FTy->getNumParams() != 3 || - !FTy->getParamType(1)->isPointerTy()) - continue; - // May throw; "write" is a valid pthread cancellation point. - setDoesNotCapture(F, 2); - } - break; - case 'b': - if (Name == "bcopy") { - if (FTy->getNumParams() != 3 || - !FTy->getParamType(0)->isPointerTy() || - !FTy->getParamType(1)->isPointerTy()) - continue; - setDoesNotThrow(F); - setDoesNotCapture(F, 1); - setDoesNotCapture(F, 2); - } else if (Name == "bcmp") { - if (FTy->getNumParams() != 3 || - !FTy->getParamType(0)->isPointerTy() || - !FTy->getParamType(1)->isPointerTy()) - continue; - setDoesNotThrow(F); - setOnlyReadsMemory(F); - setDoesNotCapture(F, 1); - setDoesNotCapture(F, 2); - } else if (Name == "bzero") { - if (FTy->getNumParams() != 2 || - !FTy->getParamType(0)->isPointerTy()) - continue; - setDoesNotThrow(F); - setDoesNotCapture(F, 1); - } - break; - case 'c': - if (Name == "calloc") { - if (FTy->getNumParams() != 2 || - !FTy->getReturnType()->isPointerTy()) - continue; - setDoesNotThrow(F); - setDoesNotAlias(F, 0); - } else if (Name == "chmod" || - Name == "chown" || - Name == "ctermid" || - Name == "clearerr" || - Name == "closedir") { - if (FTy->getNumParams() == 0 || - !FTy->getParamType(0)->isPointerTy()) - continue; - setDoesNotThrow(F); - setDoesNotCapture(F, 1); - } - break; - case 'a': - if (Name == "atoi" || - Name == "atol" || - Name == "atof" || - Name == "atoll") { - if (FTy->getNumParams() != 1 || - !FTy->getParamType(0)->isPointerTy()) - continue; - setDoesNotThrow(F); - setOnlyReadsMemory(F); - setDoesNotCapture(F, 1); - } else if (Name == "access") { - if (FTy->getNumParams() != 2 || - !FTy->getParamType(0)->isPointerTy()) - continue; - setDoesNotThrow(F); - setDoesNotCapture(F, 1); - } - break; - case 'f': - if (Name == "fopen") { - if (FTy->getNumParams() != 2 || - !FTy->getReturnType()->isPointerTy() || - !FTy->getParamType(0)->isPointerTy() || - !FTy->getParamType(1)->isPointerTy()) - continue; - setDoesNotThrow(F); - setDoesNotAlias(F, 0); - setDoesNotCapture(F, 1); - setDoesNotCapture(F, 2); - } else if (Name == "fdopen") { - if (FTy->getNumParams() != 2 || - !FTy->getReturnType()->isPointerTy() || - !FTy->getParamType(1)->isPointerTy()) - continue; - setDoesNotThrow(F); - setDoesNotAlias(F, 0); - setDoesNotCapture(F, 2); - } else if (Name == "feof" || - Name == "free" || - Name == "fseek" || - Name == "ftell" || - Name == "fgetc" || - Name == "fseeko" || - Name == "ftello" || - Name == "fileno" || - Name == "fflush" || - Name == "fclose" || - Name == "fsetpos" || - Name == "flockfile" || - Name == "funlockfile" || - Name == "ftrylockfile") { - if (FTy->getNumParams() == 0 || - !FTy->getParamType(0)->isPointerTy()) - continue; - setDoesNotThrow(F); - setDoesNotCapture(F, 1); - } else if (Name == "ferror") { - if (FTy->getNumParams() != 1 || - !FTy->getParamType(0)->isPointerTy()) - continue; - setDoesNotThrow(F); - setDoesNotCapture(F, 1); - setOnlyReadsMemory(F); - } else if (Name == "fputc" || - Name == "fstat" || - Name == "frexp" || - Name == "frexpf" || - Name == "frexpl" || - Name == "fstatvfs") { - if (FTy->getNumParams() != 2 || - !FTy->getParamType(1)->isPointerTy()) - continue; - setDoesNotThrow(F); - setDoesNotCapture(F, 2); - } else if (Name == "fgets") { - if (FTy->getNumParams() != 3 || - !FTy->getParamType(0)->isPointerTy() || - !FTy->getParamType(2)->isPointerTy()) - continue; - setDoesNotThrow(F); - setDoesNotCapture(F, 3); - } else if (Name == "fread" || - Name == "fwrite") { - if (FTy->getNumParams() != 4 || - !FTy->getParamType(0)->isPointerTy() || - !FTy->getParamType(3)->isPointerTy()) - continue; - setDoesNotThrow(F); - setDoesNotCapture(F, 1); - setDoesNotCapture(F, 4); - } else if (Name == "fputs" || - Name == "fscanf" || - Name == "fprintf" || - Name == "fgetpos") { - if (FTy->getNumParams() < 2 || - !FTy->getParamType(0)->isPointerTy() || - !FTy->getParamType(1)->isPointerTy()) - continue; - setDoesNotThrow(F); - setDoesNotCapture(F, 1); - setDoesNotCapture(F, 2); - } - break; - case 'g': - if (Name == "getc" || - Name == "getlogin_r" || - Name == "getc_unlocked") { - if (FTy->getNumParams() == 0 || - !FTy->getParamType(0)->isPointerTy()) - continue; - setDoesNotThrow(F); - setDoesNotCapture(F, 1); - } else if (Name == "getenv") { - if (FTy->getNumParams() != 1 || - !FTy->getParamType(0)->isPointerTy()) - continue; - setDoesNotThrow(F); - setOnlyReadsMemory(F); - setDoesNotCapture(F, 1); - } else if (Name == "gets" || - Name == "getchar") { - setDoesNotThrow(F); - } else if (Name == "getitimer") { - if (FTy->getNumParams() != 2 || - !FTy->getParamType(1)->isPointerTy()) - continue; - setDoesNotThrow(F); - setDoesNotCapture(F, 2); - } else if (Name == "getpwnam") { - if (FTy->getNumParams() != 1 || - !FTy->getParamType(0)->isPointerTy()) - continue; - setDoesNotThrow(F); - setDoesNotCapture(F, 1); - } - break; - case 'u': - if (Name == "ungetc") { - if (FTy->getNumParams() != 2 || - !FTy->getParamType(1)->isPointerTy()) - continue; - setDoesNotThrow(F); - setDoesNotCapture(F, 2); - } else if (Name == "uname" || - Name == "unlink" || - Name == "unsetenv") { - if (FTy->getNumParams() != 1 || - !FTy->getParamType(0)->isPointerTy()) - continue; - setDoesNotThrow(F); - setDoesNotCapture(F, 1); - } else if (Name == "utime" || - Name == "utimes") { - if (FTy->getNumParams() != 2 || - !FTy->getParamType(0)->isPointerTy() || - !FTy->getParamType(1)->isPointerTy()) - continue; - setDoesNotThrow(F); - setDoesNotCapture(F, 1); - setDoesNotCapture(F, 2); - } - break; - case 'p': - if (Name == "putc") { - if (FTy->getNumParams() != 2 || - !FTy->getParamType(1)->isPointerTy()) - continue; - setDoesNotThrow(F); - setDoesNotCapture(F, 2); - } else if (Name == "puts" || - Name == "printf" || - Name == "perror") { - if (FTy->getNumParams() != 1 || - !FTy->getParamType(0)->isPointerTy()) - continue; - setDoesNotThrow(F); - setDoesNotCapture(F, 1); - } else if (Name == "pread" || - Name == "pwrite") { - if (FTy->getNumParams() != 4 || - !FTy->getParamType(1)->isPointerTy()) - continue; - // May throw; these are valid pthread cancellation points. - setDoesNotCapture(F, 2); - } else if (Name == "putchar") { - setDoesNotThrow(F); - } else if (Name == "popen") { - if (FTy->getNumParams() != 2 || - !FTy->getReturnType()->isPointerTy() || - !FTy->getParamType(0)->isPointerTy() || - !FTy->getParamType(1)->isPointerTy()) - continue; - setDoesNotThrow(F); - setDoesNotAlias(F, 0); - setDoesNotCapture(F, 1); - setDoesNotCapture(F, 2); - } else if (Name == "pclose") { - if (FTy->getNumParams() != 1 || - !FTy->getParamType(0)->isPointerTy()) - continue; - setDoesNotThrow(F); - setDoesNotCapture(F, 1); - } - break; - case 'v': - if (Name == "vscanf") { - if (FTy->getNumParams() != 2 || - !FTy->getParamType(1)->isPointerTy()) - continue; - setDoesNotThrow(F); - setDoesNotCapture(F, 1); - } else if (Name == "vsscanf" || - Name == "vfscanf") { - if (FTy->getNumParams() != 3 || - !FTy->getParamType(1)->isPointerTy() || - !FTy->getParamType(2)->isPointerTy()) - continue; - setDoesNotThrow(F); - setDoesNotCapture(F, 1); - setDoesNotCapture(F, 2); - } else if (Name == "valloc") { - if (!FTy->getReturnType()->isPointerTy()) - continue; - setDoesNotThrow(F); - setDoesNotAlias(F, 0); - } else if (Name == "vprintf") { - if (FTy->getNumParams() != 2 || - !FTy->getParamType(0)->isPointerTy()) - continue; - setDoesNotThrow(F); - setDoesNotCapture(F, 1); - } else if (Name == "vfprintf" || - Name == "vsprintf") { - if (FTy->getNumParams() != 3 || - !FTy->getParamType(0)->isPointerTy() || - !FTy->getParamType(1)->isPointerTy()) - continue; - setDoesNotThrow(F); - setDoesNotCapture(F, 1); - setDoesNotCapture(F, 2); - } else if (Name == "vsnprintf") { - if (FTy->getNumParams() != 4 || - !FTy->getParamType(0)->isPointerTy() || - !FTy->getParamType(2)->isPointerTy()) - continue; - setDoesNotThrow(F); - setDoesNotCapture(F, 1); - setDoesNotCapture(F, 3); - } - break; - case 'o': - if (Name == "open") { - if (FTy->getNumParams() < 2 || - !FTy->getParamType(0)->isPointerTy()) - continue; - // May throw; "open" is a valid pthread cancellation point. - setDoesNotCapture(F, 1); - } else if (Name == "opendir") { - if (FTy->getNumParams() != 1 || - !FTy->getReturnType()->isPointerTy() || - !FTy->getParamType(0)->isPointerTy()) - continue; - setDoesNotThrow(F); - setDoesNotAlias(F, 0); - setDoesNotCapture(F, 1); - } - break; - case 't': - if (Name == "tmpfile") { - if (!FTy->getReturnType()->isPointerTy()) - continue; - setDoesNotThrow(F); - setDoesNotAlias(F, 0); - } else if (Name == "times") { - if (FTy->getNumParams() != 1 || - !FTy->getParamType(0)->isPointerTy()) - continue; - setDoesNotThrow(F); - setDoesNotCapture(F, 1); - } - break; - case 'h': - if (Name == "htonl" || - Name == "htons") { - setDoesNotThrow(F); - setDoesNotAccessMemory(F); - } - break; - case 'n': - if (Name == "ntohl" || - Name == "ntohs") { - setDoesNotThrow(F); - setDoesNotAccessMemory(F); - } - break; - case 'l': - if (Name == "lstat") { - if (FTy->getNumParams() != 2 || - !FTy->getParamType(0)->isPointerTy() || - !FTy->getParamType(1)->isPointerTy()) - continue; - setDoesNotThrow(F); - setDoesNotCapture(F, 1); - setDoesNotCapture(F, 2); - } else if (Name == "lchown") { - if (FTy->getNumParams() != 3 || - !FTy->getParamType(0)->isPointerTy()) - continue; - setDoesNotThrow(F); - setDoesNotCapture(F, 1); - } - break; - case 'q': - if (Name == "qsort") { - if (FTy->getNumParams() != 4 || - !FTy->getParamType(3)->isPointerTy()) - continue; - // May throw; places call through function pointer. - setDoesNotCapture(F, 4); - } - break; - case '_': - if (Name == "__strdup" || - Name == "__strndup") { - if (FTy->getNumParams() < 1 || - !FTy->getReturnType()->isPointerTy() || - !FTy->getParamType(0)->isPointerTy()) - continue; - setDoesNotThrow(F); - setDoesNotAlias(F, 0); - setDoesNotCapture(F, 1); - } else if (Name == "__strtok_r") { - if (FTy->getNumParams() != 3 || - !FTy->getParamType(1)->isPointerTy()) - continue; - setDoesNotThrow(F); - setDoesNotCapture(F, 2); - } else if (Name == "_IO_getc") { - if (FTy->getNumParams() != 1 || - !FTy->getParamType(0)->isPointerTy()) - continue; - setDoesNotThrow(F); - setDoesNotCapture(F, 1); - } else if (Name == "_IO_putc") { - if (FTy->getNumParams() != 2 || - !FTy->getParamType(1)->isPointerTy()) - continue; - setDoesNotThrow(F); - setDoesNotCapture(F, 2); - } - break; - case 1: - if (Name == "\1__isoc99_scanf") { - if (FTy->getNumParams() < 1 || - !FTy->getParamType(0)->isPointerTy()) - continue; - setDoesNotThrow(F); - setDoesNotCapture(F, 1); - } else if (Name == "\1stat64" || - Name == "\1lstat64" || - Name == "\1statvfs64" || - Name == "\1__isoc99_sscanf") { - if (FTy->getNumParams() < 1 || - !FTy->getParamType(0)->isPointerTy() || - !FTy->getParamType(1)->isPointerTy()) - continue; - setDoesNotThrow(F); - setDoesNotCapture(F, 1); - setDoesNotCapture(F, 2); - } else if (Name == "\1fopen64") { - if (FTy->getNumParams() != 2 || - !FTy->getReturnType()->isPointerTy() || - !FTy->getParamType(0)->isPointerTy() || - !FTy->getParamType(1)->isPointerTy()) - continue; - setDoesNotThrow(F); - setDoesNotAlias(F, 0); - setDoesNotCapture(F, 1); - setDoesNotCapture(F, 2); - } else if (Name == "\1fseeko64" || - Name == "\1ftello64") { - if (FTy->getNumParams() == 0 || - !FTy->getParamType(0)->isPointerTy()) - continue; - setDoesNotThrow(F); - setDoesNotCapture(F, 1); - } else if (Name == "\1tmpfile64") { - if (!FTy->getReturnType()->isPointerTy()) - continue; - setDoesNotThrow(F); - setDoesNotAlias(F, 0); - } else if (Name == "\1fstat64" || - Name == "\1fstatvfs64") { - if (FTy->getNumParams() != 2 || - !FTy->getParamType(1)->isPointerTy()) - continue; - setDoesNotThrow(F); - setDoesNotCapture(F, 2); - } else if (Name == "\1open64") { - if (FTy->getNumParams() < 2 || - !FTy->getParamType(0)->isPointerTy()) - continue; - // May throw; "open" is a valid pthread cancellation point. - setDoesNotCapture(F, 1); - } - break; - } + if (F.isDeclaration() && F.hasName()) + inferPrototypeAttributes(F); } return Modified; } diff --git a/lib/Transforms/Utils/Local.cpp b/lib/Transforms/Utils/Local.cpp index 063c76e9522c..3f789fa86589 100644 --- a/lib/Transforms/Utils/Local.cpp +++ b/lib/Transforms/Utils/Local.cpp @@ -262,12 +262,13 @@ bool llvm::RecursivelyDeleteTriviallyDeadInstructions(Value *V) { /// areAllUsesEqual - Check whether the uses of a value are all the same. /// This is similar to Instruction::hasOneUse() except this will also return -/// true when there are multiple uses that all refer to the same value. +/// true when there are no uses or multiple uses that all refer to the same +/// value. static bool areAllUsesEqual(Instruction *I) { Value::use_iterator UI = I->use_begin(); Value::use_iterator UE = I->use_end(); if (UI == UE) - return false; + return true; User *TheUse = *UI; for (++UI; UI != UE; ++UI) { @@ -281,31 +282,24 @@ static bool areAllUsesEqual(Instruction *I) { /// dead PHI node, due to being a def-use chain of single-use nodes that /// either forms a cycle or is terminated by a trivially dead instruction, /// delete it. If that makes any of its operands trivially dead, delete them -/// too, recursively. Return true if the PHI node is actually deleted. +/// too, recursively. Return true if a change was made. bool llvm::RecursivelyDeleteDeadPHINode(PHINode *PN) { - // We can remove a PHI if it is on a cycle in the def-use graph - // where each node in the cycle has degree one, i.e. only one use, - // and is an instruction with no side effects. - if (!areAllUsesEqual(PN)) - return false; + SmallPtrSet Visited; + for (Instruction *I = PN; areAllUsesEqual(I) && !I->mayHaveSideEffects(); + I = cast(*I->use_begin())) { + if (I->use_empty()) + return RecursivelyDeleteTriviallyDeadInstructions(I); - bool Changed = false; - SmallPtrSet PHIs; - PHIs.insert(PN); - for (Instruction *J = cast(*PN->use_begin()); - areAllUsesEqual(J) && !J->mayHaveSideEffects(); - J = cast(*J->use_begin())) - // If we find a PHI more than once, we're on a cycle that + // If we find an instruction more than once, we're on a cycle that // won't prove fruitful. - if (PHINode *JP = dyn_cast(J)) - if (!PHIs.insert(JP)) { - // Break the cycle and delete the PHI and its operands. - JP->replaceAllUsesWith(UndefValue::get(JP->getType())); - (void)RecursivelyDeleteTriviallyDeadInstructions(JP); - Changed = true; - break; - } - return Changed; + if (!Visited.insert(I)) { + // Break the cycle and delete the instruction and its operands. + I->replaceAllUsesWith(UndefValue::get(I->getType())); + (void)RecursivelyDeleteTriviallyDeadInstructions(I); + return true; + } + } + return false; } /// SimplifyInstructionsInBlock - Scan the specified basic block and try to diff --git a/lib/Transforms/Utils/PromoteMemoryToRegister.cpp b/lib/Transforms/Utils/PromoteMemoryToRegister.cpp index e6a4373c495b..778885723e66 100644 --- a/lib/Transforms/Utils/PromoteMemoryToRegister.cpp +++ b/lib/Transforms/Utils/PromoteMemoryToRegister.cpp @@ -35,6 +35,7 @@ #include "llvm/Metadata.h" #include "llvm/Analysis/AliasSetTracker.h" #include "llvm/Analysis/DebugInfo.h" +#include "llvm/Analysis/DIBuilder.h" #include "llvm/Analysis/Dominators.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/ADT/DenseMap.h" @@ -190,7 +191,7 @@ namespace { /// std::vector Allocas; DominatorTree &DT; - DIFactory *DIF; + DIBuilder *DIB; /// AST - An AliasSetTracker object to update. If null, don't update it. /// @@ -235,9 +236,9 @@ namespace { public: PromoteMem2Reg(const std::vector &A, DominatorTree &dt, AliasSetTracker *ast) - : Allocas(A), DT(dt), DIF(0), AST(ast) {} + : Allocas(A), DT(dt), DIB(0), AST(ast) {} ~PromoteMem2Reg() { - delete DIF; + delete DIB; } void run(); @@ -951,9 +952,9 @@ void PromoteMem2Reg::ConvertDebugDeclareToDebugValue(DbgDeclareInst *DDI, if (!DIVar.Verify()) return; - if (!DIF) - DIF = new DIFactory(*SI->getParent()->getParent()->getParent()); - Instruction *DbgVal = DIF->InsertDbgValueIntrinsic(SI->getOperand(0), 0, + if (!DIB) + DIB = new DIBuilder(*SI->getParent()->getParent()->getParent()); + Instruction *DbgVal = DIB->insertDbgValueIntrinsic(SI->getOperand(0), 0, DIVar, SI); // Propagate any debug metadata from the store onto the dbg.value. diff --git a/lib/Transforms/Utils/SimplifyCFG.cpp b/lib/Transforms/Utils/SimplifyCFG.cpp index fb660dbfac10..c6708857cb56 100644 --- a/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/lib/Transforms/Utils/SimplifyCFG.cpp @@ -247,6 +247,11 @@ static bool DominatesMergePoint(Value *V, BasicBlock *BB, if (PBB->getFirstNonPHIOrDbg() != I) return false; break; + case Instruction::GetElementPtr: + // GEPs are cheap if all indices are constant. + if (!cast(I)->hasAllConstantIndices()) + return false; + break; case Instruction::Add: case Instruction::Sub: case Instruction::And: -- cgit v1.2.3