aboutsummaryrefslogtreecommitdiff
path: root/lib/Target
diff options
context:
space:
mode:
authorDimitry Andric <dim@FreeBSD.org>2010-09-17 15:48:55 +0000
committerDimitry Andric <dim@FreeBSD.org>2010-09-17 15:48:55 +0000
commitd39c594d39df7f283c2fb8a704a3f31c501180d9 (patch)
tree36453626c792cccd91f783a38a169d610a6b9db9 /lib/Target
parent6144c1de6a7674dad94290650e4e14f24d42e421 (diff)
downloadsrc-d39c594d39df7f283c2fb8a704a3f31c501180d9.tar.gz
src-d39c594d39df7f283c2fb8a704a3f31c501180d9.zip
Vendor import of llvm r114020 (from the release_28 branch):vendor/llvm/llvm-r114020
Notes
Notes: svn path=/vendor/llvm/dist/; revision=212793 svn path=/vendor/llvm/llvm-r114020/; revision=212794; tag=vendor/llvm/llvm-r114020
Diffstat (limited to 'lib/Target')
-rw-r--r--lib/Target/ARM/ARM.h60
-rw-r--r--lib/Target/ARM/ARM.td76
-rw-r--r--lib/Target/ARM/ARMAddressingModes.h20
-rw-r--r--lib/Target/ARM/ARMAsmPrinter.cpp (renamed from lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp)171
-rw-r--r--lib/Target/ARM/ARMBaseInstrInfo.cpp134
-rw-r--r--lib/Target/ARM/ARMBaseInstrInfo.h81
-rw-r--r--lib/Target/ARM/ARMBaseRegisterInfo.cpp563
-rw-r--r--lib/Target/ARM/ARMBaseRegisterInfo.h29
-rw-r--r--lib/Target/ARM/ARMCallingConv.td4
-rw-r--r--lib/Target/ARM/ARMCodeEmitter.cpp72
-rw-r--r--lib/Target/ARM/ARMConstantIslandPass.cpp96
-rw-r--r--lib/Target/ARM/ARMExpandPseudoInsts.cpp359
-rw-r--r--lib/Target/ARM/ARMFastISel.cpp665
-rw-r--r--lib/Target/ARM/ARMGlobalMerge.cpp212
-rw-r--r--lib/Target/ARM/ARMISelDAGToDAG.cpp371
-rw-r--r--lib/Target/ARM/ARMISelLowering.cpp635
-rw-r--r--lib/Target/ARM/ARMISelLowering.h42
-rw-r--r--lib/Target/ARM/ARMInstrFormats.td296
-rw-r--r--lib/Target/ARM/ARMInstrInfo.td406
-rw-r--r--lib/Target/ARM/ARMInstrNEON.td627
-rw-r--r--lib/Target/ARM/ARMInstrThumb.td19
-rw-r--r--lib/Target/ARM/ARMInstrThumb2.td730
-rw-r--r--lib/Target/ARM/ARMInstrVFP.td76
-rw-r--r--lib/Target/ARM/ARMLoadStoreOptimizer.cpp139
-rw-r--r--lib/Target/ARM/ARMMCInstLower.cpp (renamed from lib/Target/ARM/AsmPrinter/ARMMCInstLower.cpp)0
-rw-r--r--lib/Target/ARM/ARMMCInstLower.h (renamed from lib/Target/ARM/AsmPrinter/ARMMCInstLower.h)0
-rw-r--r--lib/Target/ARM/ARMMachineFunctionInfo.h11
-rw-r--r--lib/Target/ARM/ARMRegisterInfo.td160
-rw-r--r--lib/Target/ARM/ARMSubtarget.cpp5
-rw-r--r--lib/Target/ARM/ARMSubtarget.h22
-rw-r--r--lib/Target/ARM/ARMTargetMachine.cpp14
-rw-r--r--lib/Target/ARM/ARMTargetMachine.h1
-rw-r--r--lib/Target/ARM/AsmParser/ARMAsmParser.cpp245
-rw-r--r--lib/Target/ARM/AsmPrinter/ARMInstPrinter.cpp66
-rw-r--r--lib/Target/ARM/AsmPrinter/ARMInstPrinter.h2
-rw-r--r--lib/Target/ARM/AsmPrinter/CMakeLists.txt2
-rw-r--r--lib/Target/ARM/CMakeLists.txt10
-rw-r--r--lib/Target/ARM/Disassembler/ARMDisassembler.cpp13
-rw-r--r--lib/Target/ARM/Disassembler/ARMDisassemblerCore.cpp183
-rw-r--r--lib/Target/ARM/Disassembler/ARMDisassemblerCore.h66
-rw-r--r--lib/Target/ARM/Disassembler/ThumbDisassemblerCore.h227
-rw-r--r--lib/Target/ARM/Makefile5
-rw-r--r--lib/Target/ARM/NEONMoveFix.cpp2
-rw-r--r--lib/Target/ARM/NEONPreAllocPass.cpp147
-rw-r--r--lib/Target/ARM/README.txt42
-rw-r--r--lib/Target/ARM/Thumb1RegisterInfo.cpp279
-rw-r--r--lib/Target/ARM/Thumb1RegisterInfo.h22
-rw-r--r--lib/Target/ARM/Thumb2ITBlockPass.cpp68
-rw-r--r--lib/Target/ARM/Thumb2InstrInfo.cpp8
-rw-r--r--lib/Target/ARM/Thumb2SizeReduction.cpp14
-rw-r--r--lib/Target/Alpha/AlphaBranchSelector.cpp2
-rw-r--r--lib/Target/Alpha/AlphaCodeEmitter.cpp2
-rw-r--r--lib/Target/Alpha/AlphaISelDAGToDAG.cpp4
-rw-r--r--lib/Target/Alpha/AlphaInstrInfo.cpp26
-rw-r--r--lib/Target/Alpha/AlphaInstrInfo.h6
-rw-r--r--lib/Target/Alpha/AlphaLLRP.cpp2
-rw-r--r--lib/Target/Alpha/AlphaRegisterInfo.cpp6
-rw-r--r--lib/Target/Alpha/AlphaRegisterInfo.h5
-rw-r--r--lib/Target/Alpha/AsmPrinter/AlphaAsmPrinter.cpp2
-rw-r--r--lib/Target/Blackfin/BlackfinInstrInfo.cpp28
-rw-r--r--lib/Target/Blackfin/BlackfinInstrInfo.h4
-rw-r--r--lib/Target/Blackfin/BlackfinRegisterInfo.cpp16
-rw-r--r--lib/Target/Blackfin/BlackfinRegisterInfo.h7
-rw-r--r--lib/Target/CBackend/CBackend.cpp12
-rw-r--r--lib/Target/CellSPU/SPUCallingConv.td24
-rw-r--r--lib/Target/CellSPU/SPUISelDAGToDAG.cpp44
-rw-r--r--lib/Target/CellSPU/SPUISelLowering.cpp105
-rw-r--r--lib/Target/CellSPU/SPUInstrInfo.cpp142
-rw-r--r--lib/Target/CellSPU/SPUInstrInfo.h6
-rw-r--r--lib/Target/CellSPU/SPUInstrInfo.td156
-rw-r--r--lib/Target/CellSPU/SPUOperands.td6
-rw-r--r--lib/Target/CellSPU/SPURegisterInfo.cpp11
-rw-r--r--lib/Target/CellSPU/SPURegisterInfo.h5
-rw-r--r--lib/Target/CellSPU/SPURegisterInfo.td2
-rw-r--r--lib/Target/CppBackend/CPPBackend.cpp17
-rw-r--r--lib/Target/MBlaze/AsmPrinter/MBlazeAsmPrinter.cpp10
-rw-r--r--lib/Target/MBlaze/MBlaze.td2
-rw-r--r--lib/Target/MBlaze/MBlazeCallingConv.td2
-rw-r--r--lib/Target/MBlaze/MBlazeDelaySlotFiller.cpp2
-rw-r--r--lib/Target/MBlaze/MBlazeISelDAGToDAG.cpp2
-rw-r--r--lib/Target/MBlaze/MBlazeInstrFPU.td2
-rw-r--r--lib/Target/MBlaze/MBlazeInstrFSL.td2
-rw-r--r--lib/Target/MBlaze/MBlazeInstrFormats.td2
-rw-r--r--lib/Target/MBlaze/MBlazeInstrInfo.cpp35
-rw-r--r--lib/Target/MBlaze/MBlazeInstrInfo.h6
-rw-r--r--lib/Target/MBlaze/MBlazeInstrInfo.td2
-rw-r--r--lib/Target/MBlaze/MBlazeIntrinsics.td12
-rw-r--r--lib/Target/MBlaze/MBlazeRegisterInfo.cpp5
-rw-r--r--lib/Target/MBlaze/MBlazeRegisterInfo.h5
-rw-r--r--lib/Target/MBlaze/MBlazeRegisterInfo.td2
-rw-r--r--lib/Target/MBlaze/MBlazeSchedule.td2
-rw-r--r--lib/Target/MSIL/CMakeLists.txt3
-rw-r--r--lib/Target/MSIL/MSILWriter.cpp1706
-rw-r--r--lib/Target/MSIL/MSILWriter.h258
-rw-r--r--lib/Target/MSIL/Makefile16
-rw-r--r--lib/Target/MSIL/README.TXT26
-rw-r--r--lib/Target/MSIL/TargetInfo/CMakeLists.txt6
-rw-r--r--lib/Target/MSIL/TargetInfo/MSILTargetInfo.cpp26
-rw-r--r--lib/Target/MSIL/TargetInfo/Makefile15
-rw-r--r--lib/Target/MSP430/MSP430BranchSelector.cpp7
-rw-r--r--lib/Target/MSP430/MSP430InstrInfo.cpp23
-rw-r--r--lib/Target/MSP430/MSP430InstrInfo.h4
-rw-r--r--lib/Target/MSP430/MSP430RegisterInfo.cpp12
-rw-r--r--lib/Target/MSP430/MSP430RegisterInfo.h7
-rw-r--r--lib/Target/Mangler.cpp3
-rw-r--r--lib/Target/Mips/AsmPrinter/MipsAsmPrinter.cpp20
-rw-r--r--lib/Target/Mips/Mips.td2
-rw-r--r--lib/Target/Mips/MipsCallingConv.td2
-rw-r--r--lib/Target/Mips/MipsDelaySlotFiller.cpp2
-rw-r--r--lib/Target/Mips/MipsISelDAGToDAG.cpp11
-rw-r--r--lib/Target/Mips/MipsISelLowering.cpp8
-rw-r--r--lib/Target/Mips/MipsInstrFPU.td2
-rw-r--r--lib/Target/Mips/MipsInstrFormats.td2
-rw-r--r--lib/Target/Mips/MipsInstrInfo.cpp47
-rw-r--r--lib/Target/Mips/MipsInstrInfo.h6
-rw-r--r--lib/Target/Mips/MipsInstrInfo.td9
-rw-r--r--lib/Target/Mips/MipsRegisterInfo.cpp6
-rw-r--r--lib/Target/Mips/MipsRegisterInfo.h5
-rw-r--r--lib/Target/Mips/MipsRegisterInfo.td2
-rw-r--r--lib/Target/Mips/MipsSchedule.td2
-rw-r--r--lib/Target/PIC16/CMakeLists.txt2
-rw-r--r--lib/Target/PIC16/PIC16.h7
-rw-r--r--lib/Target/PIC16/PIC16ISelLowering.cpp10
-rw-r--r--lib/Target/PIC16/PIC16ISelLowering.h5
-rw-r--r--lib/Target/PIC16/PIC16InstrInfo.cpp15
-rw-r--r--lib/Target/PIC16/PIC16InstrInfo.h4
-rw-r--r--lib/Target/PIC16/PIC16MemSelOpt.cpp2
-rw-r--r--lib/Target/PIC16/PIC16Passes/PIC16Cloner.cpp2
-rw-r--r--lib/Target/PIC16/PIC16Passes/PIC16Cloner.h2
-rw-r--r--lib/Target/PIC16/PIC16Passes/PIC16Overlay.cpp5
-rw-r--r--lib/Target/PIC16/PIC16Passes/PIC16Overlay.h2
-rw-r--r--lib/Target/PIC16/PIC16RegisterInfo.cpp9
-rw-r--r--lib/Target/PIC16/PIC16RegisterInfo.h5
-rw-r--r--lib/Target/PowerPC/AsmPrinter/PPCAsmPrinter.cpp14
-rw-r--r--lib/Target/PowerPC/PPCBranchSelector.cpp7
-rw-r--r--lib/Target/PowerPC/PPCCallingConv.td2
-rw-r--r--lib/Target/PowerPC/PPCCodeEmitter.cpp4
-rw-r--r--lib/Target/PowerPC/PPCISelLowering.cpp39
-rw-r--r--lib/Target/PowerPC/PPCInstrInfo.cpp82
-rw-r--r--lib/Target/PowerPC/PPCInstrInfo.h6
-rw-r--r--lib/Target/PowerPC/PPCInstrInfo.td19
-rw-r--r--lib/Target/PowerPC/PPCRegisterInfo.cpp22
-rw-r--r--lib/Target/PowerPC/PPCRegisterInfo.h5
-rw-r--r--lib/Target/PowerPC/PPCSubtarget.cpp4
-rw-r--r--lib/Target/PowerPC/PPCSubtarget.h4
-rw-r--r--lib/Target/README.txt16
-rw-r--r--lib/Target/Sparc/DelaySlotFiller.cpp2
-rw-r--r--lib/Target/Sparc/FPMover.cpp2
-rw-r--r--lib/Target/Sparc/Sparc.td2
-rw-r--r--lib/Target/Sparc/SparcISelDAGToDAG.cpp8
-rw-r--r--lib/Target/Sparc/SparcInstrInfo.cpp40
-rw-r--r--lib/Target/Sparc/SparcInstrInfo.h6
-rw-r--r--lib/Target/Sparc/SparcInstrInfo.td12
-rw-r--r--lib/Target/Sparc/SparcRegisterInfo.cpp6
-rw-r--r--lib/Target/Sparc/SparcRegisterInfo.h5
-rw-r--r--lib/Target/SystemZ/SystemZInstrInfo.cpp25
-rw-r--r--lib/Target/SystemZ/SystemZInstrInfo.h3
-rw-r--r--lib/Target/SystemZ/SystemZRegisterInfo.cpp9
-rw-r--r--lib/Target/SystemZ/SystemZRegisterInfo.h7
-rw-r--r--lib/Target/TargetData.cpp62
-rw-r--r--lib/Target/TargetMachine.cpp20
-rw-r--r--lib/Target/TargetRegisterInfo.cpp14
-rw-r--r--lib/Target/X86/AsmParser/X86AsmParser.cpp274
-rw-r--r--lib/Target/X86/AsmPrinter/CMakeLists.txt3
-rw-r--r--lib/Target/X86/AsmPrinter/X86ATTInstPrinter.cpp5
-rw-r--r--lib/Target/X86/AsmPrinter/X86ATTInstPrinter.h3
-rw-r--r--lib/Target/X86/AsmPrinter/X86InstComments.cpp232
-rw-r--r--lib/Target/X86/AsmPrinter/X86InstComments.h25
-rw-r--r--lib/Target/X86/AsmPrinter/X86IntelInstPrinter.cpp5
-rw-r--r--lib/Target/X86/AsmPrinter/X86IntelInstPrinter.h4
-rw-r--r--lib/Target/X86/CMakeLists.txt10
-rw-r--r--lib/Target/X86/README-FPStack.txt4
-rw-r--r--lib/Target/X86/README-SSE.txt42
-rw-r--r--lib/Target/X86/README.txt104
-rw-r--r--lib/Target/X86/SSEDomainFix.cpp2
-rw-r--r--lib/Target/X86/X86.h5
-rw-r--r--lib/Target/X86/X86.td4
-rw-r--r--lib/Target/X86/X86AsmBackend.cpp45
-rw-r--r--lib/Target/X86/X86AsmPrinter.cpp (renamed from lib/Target/X86/AsmPrinter/X86AsmPrinter.cpp)51
-rw-r--r--lib/Target/X86/X86AsmPrinter.h (renamed from lib/Target/X86/AsmPrinter/X86AsmPrinter.h)6
-rw-r--r--lib/Target/X86/X86CallingConv.td32
-rw-r--r--lib/Target/X86/X86CodeEmitter.cpp119
-rw-r--r--lib/Target/X86/X86FastISel.cpp29
-rw-r--r--lib/Target/X86/X86FloatingPoint.cpp473
-rw-r--r--lib/Target/X86/X86FloatingPointRegKill.cpp153
-rw-r--r--lib/Target/X86/X86ISelDAGToDAG.cpp37
-rw-r--r--lib/Target/X86/X86ISelLowering.cpp1207
-rw-r--r--lib/Target/X86/X86ISelLowering.h58
-rw-r--r--lib/Target/X86/X86Instr64bit.td156
-rw-r--r--lib/Target/X86/X86InstrFMA.td60
-rw-r--r--lib/Target/X86/X86InstrFPStack.td6
-rw-r--r--lib/Target/X86/X86InstrFormats.td33
-rw-r--r--lib/Target/X86/X86InstrFragmentsSIMD.td80
-rw-r--r--lib/Target/X86/X86InstrInfo.cpp692
-rw-r--r--lib/Target/X86/X86InstrInfo.h30
-rw-r--r--lib/Target/X86/X86InstrInfo.td145
-rw-r--r--lib/Target/X86/X86InstrMMX.td2
-rw-r--r--lib/Target/X86/X86InstrSSE.td1945
-rw-r--r--lib/Target/X86/X86MCAsmInfo.cpp3
-rw-r--r--lib/Target/X86/X86MCCodeEmitter.cpp57
-rw-r--r--lib/Target/X86/X86MCInstLower.cpp (renamed from lib/Target/X86/AsmPrinter/X86MCInstLower.cpp)119
-rw-r--r--lib/Target/X86/X86MCInstLower.h (renamed from lib/Target/X86/AsmPrinter/X86MCInstLower.h)13
-rw-r--r--lib/Target/X86/X86RegisterInfo.cpp103
-rw-r--r--lib/Target/X86/X86RegisterInfo.h9
-rw-r--r--lib/Target/X86/X86RegisterInfo.td17
-rw-r--r--lib/Target/X86/X86ShuffleDecode.h155
-rw-r--r--lib/Target/X86/X86Subtarget.cpp10
-rw-r--r--lib/Target/X86/X86Subtarget.h8
-rw-r--r--lib/Target/X86/X86TargetMachine.cpp44
-rw-r--r--lib/Target/XCore/AsmPrinter/XCoreAsmPrinter.cpp12
-rw-r--r--lib/Target/XCore/CMakeLists.txt2
-rw-r--r--lib/Target/XCore/XCoreISelDAGToDAG.cpp21
-rw-r--r--lib/Target/XCore/XCoreInstrInfo.cpp29
-rw-r--r--lib/Target/XCore/XCoreInstrInfo.h6
-rw-r--r--lib/Target/XCore/XCoreInstrInfo.td12
-rw-r--r--lib/Target/XCore/XCoreRegisterInfo.cpp14
-rw-r--r--lib/Target/XCore/XCoreRegisterInfo.h5
217 files changed, 10107 insertions, 7598 deletions
diff --git a/lib/Target/ARM/ARM.h b/lib/Target/ARM/ARM.h
index 14825a785649..271ca44c2b69 100644
--- a/lib/Target/ARM/ARM.h
+++ b/lib/Target/ARM/ARM.h
@@ -30,22 +30,22 @@ class formatted_raw_ostream;
namespace ARMCC {
// The CondCodes constants map directly to the 4-bit encoding of the
// condition field for predicated instructions.
- enum CondCodes {
- EQ,
- NE,
- HS,
- LO,
- MI,
- PL,
- VS,
- VC,
- HI,
- LS,
- GE,
- LT,
- GT,
- LE,
- AL
+ enum CondCodes { // Meaning (integer) Meaning (floating-point)
+ EQ, // Equal Equal
+ NE, // Not equal Not equal, or unordered
+ HS, // Carry set >, ==, or unordered
+ LO, // Carry clear Less than
+ MI, // Minus, negative Less than
+ PL, // Plus, positive or zero >, ==, or unordered
+ VS, // Overflow Unordered
+ VC, // No overflow Not unordered
+ HI, // Unsigned higher Greater than, or unordered
+ LS, // Unsigned lower or same Less than or equal
+ GE, // Greater than or equal Greater than or equal
+ LT, // Less than Less than, or unordered
+ GT, // Greater than Greater than
+ LE, // Less than or equal <, ==, or unordered
+ AL // Always (unconditional) Always (unconditional)
};
inline static CondCodes getOppositeCondition(CondCodes CC) {
@@ -90,6 +90,33 @@ inline static const char *ARMCondCodeToString(ARMCC::CondCodes CC) {
}
}
+namespace ARM_MB {
+ // The Memory Barrier Option constants map directly to the 4-bit encoding of
+ // the option field for memory barrier operations.
+ enum MemBOpt {
+ ST = 14,
+ ISH = 11,
+ ISHST = 10,
+ NSH = 7,
+ NSHST = 6,
+ OSH = 3,
+ OSHST = 2
+ };
+
+ inline static const char *MemBOptToString(unsigned val) {
+ switch (val) {
+ default: llvm_unreachable("Unknown memory opetion");
+ case ST: return "st";
+ case ISH: return "ish";
+ case ISHST: return "ishst";
+ case NSH: return "nsh";
+ case NSHST: return "nshst";
+ case OSH: return "osh";
+ case OSHST: return "oshst";
+ }
+ }
+} // namespace ARM_MB
+
FunctionPass *createARMISelDag(ARMBaseTargetMachine &TM,
CodeGenOpt::Level OptLevel);
@@ -98,6 +125,7 @@ FunctionPass *createARMJITCodeEmitterPass(ARMBaseTargetMachine &TM,
FunctionPass *createARMLoadStoreOptimizationPass(bool PreAlloc = false);
FunctionPass *createARMExpandPseudoPass();
+FunctionPass *createARMGlobalMergePass(const TargetLowering* tli);
FunctionPass *createARMConstantIslandPass();
FunctionPass *createNEONPreAllocPass();
FunctionPass *createNEONMoveFixPass();
diff --git a/lib/Target/ARM/ARM.td b/lib/Target/ARM/ARM.td
index fa64d6c2a4b4..d6a8f19724dc 100644
--- a/lib/Target/ARM/ARM.td
+++ b/lib/Target/ARM/ARM.td
@@ -1,4 +1,4 @@
-//===- ARM.td - Describe the ARM Target Machine -----------------*- C++ -*-===//
+//===- ARM.td - Describe the ARM Target Machine ------------*- tablegen -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -20,20 +20,6 @@ include "llvm/Target/Target.td"
// ARM Subtarget features.
//
-def ArchV4T : SubtargetFeature<"v4t", "ARMArchVersion", "V4T",
- "ARM v4T">;
-def ArchV5T : SubtargetFeature<"v5t", "ARMArchVersion", "V5T",
- "ARM v5T">;
-def ArchV5TE : SubtargetFeature<"v5te", "ARMArchVersion", "V5TE",
- "ARM v5TE, v5TEj, v5TExp">;
-def ArchV6 : SubtargetFeature<"v6", "ARMArchVersion", "V6",
- "ARM v6">;
-def ArchV6T2 : SubtargetFeature<"v6t2", "ARMArchVersion", "V6T2",
- "ARM v6t2">;
-def ArchV7A : SubtargetFeature<"v7a", "ARMArchVersion", "V7A",
- "ARM v7A">;
-def ArchV7M : SubtargetFeature<"v7m", "ARMArchVersion", "V7M",
- "ARM v7M">;
def FeatureVFP2 : SubtargetFeature<"vfp2", "ARMFPUType", "VFPv2",
"Enable VFP2 instructions">;
def FeatureVFP3 : SubtargetFeature<"vfp3", "ARMFPUType", "VFPv3",
@@ -42,14 +28,20 @@ def FeatureNEON : SubtargetFeature<"neon", "ARMFPUType", "NEON",
"Enable NEON instructions">;
def FeatureThumb2 : SubtargetFeature<"thumb2", "ThumbMode", "Thumb2",
"Enable Thumb2 instructions">;
+def FeatureNoARM : SubtargetFeature<"noarm", "NoARM", "true",
+ "Does not support ARM mode execution">;
def FeatureFP16 : SubtargetFeature<"fp16", "HasFP16", "true",
"Enable half-precision floating point">;
def FeatureHWDiv : SubtargetFeature<"hwdiv", "HasHardwareDivide", "true",
"Enable divide instructions">;
-def FeatureT2ExtractPack: SubtargetFeature<"t2xtpk", "HasT2ExtractPack", "true",
+def FeatureT2XtPk : SubtargetFeature<"t2xtpk", "HasT2ExtractPack", "true",
"Enable Thumb2 extract and pack instructions">;
+def FeatureDB : SubtargetFeature<"db", "HasDataBarrier", "true",
+ "Has data barrier (dmb / dsb) instructions">;
def FeatureSlowFPBrcc : SubtargetFeature<"slow-fp-brcc", "SlowFPBrcc", "true",
"FP compare + branch is slow">;
+def FeatureVFPOnlySP : SubtargetFeature<"fp-only-sp", "FPOnlySP", "true",
+ "Floating point unit supports single precision only">;
// Some processors have multiply-accumulate instructions that don't
// play nicely with other VFP instructions, and it's generally better
@@ -57,14 +49,41 @@ def FeatureSlowFPBrcc : SubtargetFeature<"slow-fp-brcc", "SlowFPBrcc", "true",
// FIXME: Currently, this is only flagged for Cortex-A8. It may be true for
// others as well. We should do more benchmarking and confirm one way or
// the other.
-def FeatureHasSlowVMLx : SubtargetFeature<"vmlx", "SlowVMLx", "true",
- "Disable VFP MAC instructions">;
+def FeatureHasSlowVMLx : SubtargetFeature<"vmlx", "SlowVMLx", "true",
+ "Disable VFP MAC instructions">;
// Some processors benefit from using NEON instructions for scalar
// single-precision FP operations.
def FeatureNEONForFP : SubtargetFeature<"neonfp", "UseNEONForSinglePrecisionFP",
"true",
"Use NEON for single precision FP">;
+// Disable 32-bit to 16-bit narrowing for experimentation.
+def FeaturePref32BitThumb : SubtargetFeature<"32bit", "Pref32BitThumb", "true",
+ "Prefer 32-bit Thumb instrs">;
+
+
+// ARM architectures.
+def ArchV4T : SubtargetFeature<"v4t", "ARMArchVersion", "V4T",
+ "ARM v4T">;
+def ArchV5T : SubtargetFeature<"v5t", "ARMArchVersion", "V5T",
+ "ARM v5T">;
+def ArchV5TE : SubtargetFeature<"v5te", "ARMArchVersion", "V5TE",
+ "ARM v5TE, v5TEj, v5TExp">;
+def ArchV6 : SubtargetFeature<"v6", "ARMArchVersion", "V6",
+ "ARM v6">;
+def ArchV6M : SubtargetFeature<"v6m", "ARMArchVersion", "V6M",
+ "ARM v6m",
+ [FeatureNoARM, FeatureDB]>;
+def ArchV6T2 : SubtargetFeature<"v6t2", "ARMArchVersion", "V6T2",
+ "ARM v6t2",
+ [FeatureThumb2]>;
+def ArchV7A : SubtargetFeature<"v7a", "ARMArchVersion", "V7A",
+ "ARM v7A",
+ [FeatureThumb2, FeatureNEON, FeatureDB]>;
+def ArchV7M : SubtargetFeature<"v7m", "ARMArchVersion", "V7M",
+ "ARM v7M",
+ [FeatureThumb2, FeatureNoARM, FeatureDB,
+ FeatureHWDiv]>;
//===----------------------------------------------------------------------===//
// ARM Processors supported.
@@ -122,20 +141,23 @@ def : Processor<"arm1176jzf-s", ARMV6Itineraries, [ArchV6, FeatureVFP2]>;
def : Processor<"mpcorenovfp", ARMV6Itineraries, [ArchV6]>;
def : Processor<"mpcore", ARMV6Itineraries, [ArchV6, FeatureVFP2]>;
+// V6M Processors.
+def : Processor<"cortex-m0", ARMV6Itineraries, [ArchV6M]>;
+
// V6T2 Processors.
-def : Processor<"arm1156t2-s", ARMV6Itineraries,
- [ArchV6T2, FeatureThumb2]>;
-def : Processor<"arm1156t2f-s", ARMV6Itineraries,
- [ArchV6T2, FeatureThumb2, FeatureVFP2]>;
+def : Processor<"arm1156t2-s", ARMV6Itineraries, [ArchV6T2]>;
+def : Processor<"arm1156t2f-s", ARMV6Itineraries, [ArchV6T2, FeatureVFP2]>;
// V7 Processors.
def : Processor<"cortex-a8", CortexA8Itineraries,
- [ArchV7A, FeatureThumb2, FeatureNEON, FeatureHasSlowVMLx,
- FeatureSlowFPBrcc, FeatureNEONForFP, FeatureT2ExtractPack]>;
+ [ArchV7A, FeatureHasSlowVMLx,
+ FeatureSlowFPBrcc, FeatureNEONForFP, FeatureT2XtPk]>;
def : Processor<"cortex-a9", CortexA9Itineraries,
- [ArchV7A, FeatureThumb2, FeatureNEON, FeatureT2ExtractPack]>;
-def : ProcNoItin<"cortex-m3", [ArchV7M, FeatureThumb2, FeatureHWDiv]>;
-def : ProcNoItin<"cortex-m4", [ArchV7M, FeatureThumb2, FeatureHWDiv]>;
+ [ArchV7A, FeatureT2XtPk]>;
+
+// V7M Processors.
+def : ProcNoItin<"cortex-m3", [ArchV7M]>;
+def : ProcNoItin<"cortex-m4", [ArchV7M, FeatureVFP2, FeatureVFPOnlySP]>;
//===----------------------------------------------------------------------===//
// Register File Description
diff --git a/lib/Target/ARM/ARMAddressingModes.h b/lib/Target/ARM/ARMAddressingModes.h
index 92a13f1d751c..db481005b3a4 100644
--- a/lib/Target/ARM/ARMAddressingModes.h
+++ b/lib/Target/ARM/ARMAddressingModes.h
@@ -458,6 +458,7 @@ namespace ARM_AM {
// IB - Increment before
// DA - Decrement after
// DB - Decrement before
+ // For VFP instructions, only the IA and DB modes are valid.
static inline AMSubMode getAM4SubMode(unsigned Mode) {
return (AMSubMode)(Mode & 0x7);
@@ -477,14 +478,6 @@ namespace ARM_AM {
//
// The first operand is always a Reg. The second operand encodes the
// operation in bit 8 and the immediate in bits 0-7.
- //
- // This is also used for FP load/store multiple ops. The second operand
- // encodes the number of registers (or 2 times the number of registers
- // for DPR ops) in bits 0-7. In addition, bits 8-10 encode one of the
- // following two sub-modes:
- //
- // IA - Increment after
- // DB - Decrement before
/// getAM5Opc - This function encodes the addrmode5 opc field.
static inline unsigned getAM5Opc(AddrOpc Opc, unsigned char Offset) {
@@ -498,17 +491,6 @@ namespace ARM_AM {
return ((AM5Opc >> 8) & 1) ? sub : add;
}
- /// getAM5Opc - This function encodes the addrmode5 opc field for VLDM and
- /// VSTM instructions.
- static inline unsigned getAM5Opc(AMSubMode SubMode, unsigned char Offset) {
- assert((SubMode == ia || SubMode == db) &&
- "Illegal addressing mode 5 sub-mode!");
- return ((int)SubMode << 8) | Offset;
- }
- static inline AMSubMode getAM5SubMode(unsigned AM5Opc) {
- return (AMSubMode)((AM5Opc >> 8) & 0x7);
- }
-
//===--------------------------------------------------------------------===//
// Addressing Mode #6
//===--------------------------------------------------------------------===//
diff --git a/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp b/lib/Target/ARM/ARMAsmPrinter.cpp
index 946f4744f5bb..6cfd5961149f 100644
--- a/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp
+++ b/lib/Target/ARM/ARMAsmPrinter.cpp
@@ -17,7 +17,7 @@
#include "ARMBuildAttrs.h"
#include "ARMAddressingModes.h"
#include "ARMConstantPoolValue.h"
-#include "ARMInstPrinter.h"
+#include "AsmPrinter/ARMInstPrinter.h"
#include "ARMMachineFunctionInfo.h"
#include "ARMMCInstLower.h"
#include "ARMTargetMachine.h"
@@ -47,6 +47,7 @@
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
#include <cctype>
@@ -56,6 +57,15 @@ static cl::opt<bool>
EnableMCInst("enable-arm-mcinst-printer", cl::Hidden,
cl::desc("enable experimental asmprinter gunk in the arm backend"));
+namespace llvm {
+ namespace ARM {
+ enum DW_ISA {
+ DW_ISA_ARM_thumb = 1,
+ DW_ISA_ARM_arm = 2
+ };
+ }
+}
+
namespace {
class ARMAsmPrinter : public AsmPrinter {
@@ -80,9 +90,9 @@ namespace {
virtual const char *getPassName() const {
return "ARM Assembly Printer";
}
-
+
void printInstructionThroughMCStreamer(const MachineInstr *MI);
-
+
void printOperand(const MachineInstr *MI, int OpNum, raw_ostream &O,
const char *Modifier = 0);
@@ -110,8 +120,12 @@ namespace {
void printAddrModePCOperand(const MachineInstr *MI, int OpNum,
raw_ostream &O,
const char *Modifier = 0);
- void printBitfieldInvMaskImmOperand (const MachineInstr *MI, int OpNum,
- raw_ostream &O);
+ void printBitfieldInvMaskImmOperand(const MachineInstr *MI, int OpNum,
+ raw_ostream &O);
+ void printMemBOption(const MachineInstr *MI, int OpNum,
+ raw_ostream &O);
+ void printShiftImmOperand(const MachineInstr *MI, int OpNum,
+ raw_ostream &O);
void printThumbS4ImmOperand(const MachineInstr *MI, int OpNum,
raw_ostream &O);
@@ -190,12 +204,32 @@ namespace {
virtual void EmitInstruction(const MachineInstr *MI);
bool runOnMachineFunction(MachineFunction &F);
-
+
virtual void EmitConstantPool() {} // we emit constant pools customly!
virtual void EmitFunctionEntryLabel();
void EmitStartOfAsmFile(Module &M);
void EmitEndOfAsmFile(Module &M);
+ MachineLocation getDebugValueLocation(const MachineInstr *MI) const {
+ MachineLocation Location;
+ assert (MI->getNumOperands() == 4 && "Invalid no. of machine operands!");
+ // Frame address. Currently handles register +- offset only.
+ if (MI->getOperand(0).isReg() && MI->getOperand(1).isImm())
+ Location.set(MI->getOperand(0).getReg(), MI->getOperand(1).getImm());
+ else {
+ DEBUG(dbgs() << "DBG_VALUE instruction ignored! " << *MI << "\n");
+ }
+ return Location;
+ }
+
+ virtual unsigned getISAEncoding() {
+ // ARM/Darwin adds ISA to the DWARF info for each function.
+ if (!Subtarget->isTargetDarwin())
+ return 0;
+ return Subtarget->isThumb() ?
+ llvm::ARM::DW_ISA_ARM_thumb : llvm::ARM::DW_ISA_ARM_arm;
+ }
+
MCSymbol *GetARMSetPICJumpTableLabel2(unsigned uid, unsigned uid2,
const MachineBasicBlock *MBB) const;
MCSymbol *GetARMJTIPICJumpTableLabel2(unsigned uid, unsigned uid2) const;
@@ -208,7 +242,7 @@ namespace {
EmitMachineConstantPoolValue(MCPV, OS);
OutStreamer.EmitRawText(OS.str());
}
-
+
void EmitMachineConstantPoolValue(MachineConstantPoolValue *MCPV,
raw_ostream &O) {
switch (TM.getTargetData()->getTypeAllocSize(MCPV->getType())) {
@@ -234,7 +268,7 @@ namespace {
// FIXME: Remove this when Darwin transition to @GOT like syntax.
MCSymbol *Sym = GetSymbolWithGlobalValueBase(GV, "$non_lazy_ptr");
O << *Sym;
-
+
MachineModuleInfoMachO &MMIMachO =
MMI->getObjFileInfo<MachineModuleInfoMachO>();
MachineModuleInfoImpl::StubValueTy &StubSym =
@@ -278,7 +312,7 @@ void ARMAsmPrinter::EmitFunctionEntryLabel() {
OutStreamer.EmitRawText(OS.str());
}
}
-
+
OutStreamer.EmitLabel(CurrentFnSym);
}
@@ -358,7 +392,7 @@ void ARMAsmPrinter::printOperand(const MachineInstr *MI, int OpNum,
case MachineOperand::MO_ExternalSymbol: {
bool isCallOp = Modifier && !strcmp(Modifier, "call");
O << *GetExternalSymbolSymbol(MO.getSymbolName());
-
+
if (isCallOp && Subtarget->isTargetELF() &&
TM.getRelocationModel() == Reloc::PIC_)
O << "(PLT)";
@@ -438,15 +472,13 @@ void ARMAsmPrinter::printSORegOperand(const MachineInstr *MI, int Op,
O << getRegisterName(MO1.getReg());
// Print the shift opc.
- O << ", "
- << ARM_AM::getShiftOpcStr(ARM_AM::getSORegShOp(MO3.getImm()))
- << " ";
-
+ ARM_AM::ShiftOpc ShOpc = ARM_AM::getSORegShOp(MO3.getImm());
+ O << ", " << ARM_AM::getShiftOpcStr(ShOpc);
if (MO2.getReg()) {
- O << getRegisterName(MO2.getReg());
+ O << ' ' << getRegisterName(MO2.getReg());
assert(ARM_AM::getSORegOffset(MO3.getImm()) == 0);
- } else {
- O << "#" << ARM_AM::getSORegOffset(MO3.getImm());
+ } else if (ShOpc != ARM_AM::rrx) {
+ O << " #" << ARM_AM::getSORegOffset(MO3.getImm());
}
}
@@ -575,16 +607,6 @@ void ARMAsmPrinter::printAddrMode5Operand(const MachineInstr *MI, int Op,
assert(TargetRegisterInfo::isPhysicalRegister(MO1.getReg()));
- if (Modifier && strcmp(Modifier, "submode") == 0) {
- ARM_AM::AMSubMode Mode = ARM_AM::getAM5SubMode(MO2.getImm());
- O << ARM_AM::getAMSubModeStr(Mode);
- return;
- } else if (Modifier && strcmp(Modifier, "base") == 0) {
- // Used for FSTM{D|S} and LSTM{D|S} operations.
- O << getRegisterName(MO1.getReg());
- return;
- }
-
O << "[" << getRegisterName(MO1.getReg());
if (unsigned ImmOffs = ARM_AM::getAM5Offset(MO2.getImm())) {
@@ -641,6 +663,32 @@ ARMAsmPrinter::printBitfieldInvMaskImmOperand(const MachineInstr *MI, int Op,
O << "#" << lsb << ", #" << width;
}
+void
+ARMAsmPrinter::printMemBOption(const MachineInstr *MI, int OpNum,
+ raw_ostream &O) {
+ unsigned val = MI->getOperand(OpNum).getImm();
+ O << ARM_MB::MemBOptToString(val);
+}
+
+void ARMAsmPrinter::printShiftImmOperand(const MachineInstr *MI, int OpNum,
+ raw_ostream &O) {
+ unsigned ShiftOp = MI->getOperand(OpNum).getImm();
+ ARM_AM::ShiftOpc Opc = ARM_AM::getSORegShOp(ShiftOp);
+ switch (Opc) {
+ case ARM_AM::no_shift:
+ return;
+ case ARM_AM::lsl:
+ O << ", lsl #";
+ break;
+ case ARM_AM::asr:
+ O << ", asr #";
+ break;
+ default:
+ assert(0 && "unexpected shift opcode for shift immediate operand");
+ }
+ O << ARM_AM::getSORegOffset(ShiftOp);
+}
+
//===--------------------------------------------------------------------===//
void ARMAsmPrinter::printThumbS4ImmOperand(const MachineInstr *MI, int Op,
@@ -737,12 +785,11 @@ void ARMAsmPrinter::printT2SOOperand(const MachineInstr *MI, int OpNum,
O << getRegisterName(Reg);
// Print the shift opc.
- O << ", "
- << ARM_AM::getShiftOpcStr(ARM_AM::getSORegShOp(MO2.getImm()))
- << " ";
-
assert(MO2.isImm() && "Not a valid t2_so_reg value!");
- O << "#" << ARM_AM::getSORegOffset(MO2.getImm());
+ ARM_AM::ShiftOpc ShOpc = ARM_AM::getSORegShOp(MO2.getImm());
+ O << ", " << ARM_AM::getShiftOpcStr(ShOpc);
+ if (ShOpc != ARM_AM::rrx)
+ O << " #" << ARM_AM::getSORegOffset(MO2.getImm());
}
void ARMAsmPrinter::printT2AddrModeImm12Operand(const MachineInstr *MI,
@@ -916,12 +963,12 @@ void ARMAsmPrinter::printJTBlockOperand(const MachineInstr *MI, int OpNum,
const MachineOperand &MO1 = MI->getOperand(OpNum);
const MachineOperand &MO2 = MI->getOperand(OpNum+1); // Unique Id
-
+
unsigned JTI = MO1.getIndex();
MCSymbol *JTISymbol = GetARMJTIPICJumpTableLabel2(JTI, MO2.getImm());
// Can't use EmitLabel until instprinter happens, label comes out in the wrong
// order.
- O << *JTISymbol << ":\n";
+ O << "\n" << *JTISymbol << ":\n";
const char *JTEntryDirective = MAI->getData32bitsDirective();
@@ -958,12 +1005,12 @@ void ARMAsmPrinter::printJT2BlockOperand(const MachineInstr *MI, int OpNum,
const MachineOperand &MO1 = MI->getOperand(OpNum);
const MachineOperand &MO2 = MI->getOperand(OpNum+1); // Unique Id
unsigned JTI = MO1.getIndex();
-
+
MCSymbol *JTISymbol = GetARMJTIPICJumpTableLabel2(JTI, MO2.getImm());
-
+
// Can't use EmitLabel until instprinter happens, label comes out in the wrong
// order.
- O << *JTISymbol << ":\n";
+ O << "\n" << *JTISymbol << ":\n";
const MachineJumpTableInfo *MJTI = MF->getJumpTableInfo();
const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables();
@@ -980,7 +1027,7 @@ void ARMAsmPrinter::printJT2BlockOperand(const MachineInstr *MI, int OpNum,
O << MAI->getData8bitsDirective();
else if (HalfWordOffset)
O << MAI->getData16bitsDirective();
-
+
if (ByteOffset || HalfWordOffset)
O << '(' << *MBB->getSymbol() << "-" << *JTISymbol << ")/2";
else
@@ -1086,10 +1133,10 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) {
printInstructionThroughMCStreamer(MI);
return;
}
-
+
if (MI->getOpcode() == ARM::CONSTPOOL_ENTRY)
EmitAlignment(2);
-
+
SmallString<128> Str;
raw_svector_ostream OS(Str);
if (MI->getOpcode() == ARM::DBG_VALUE) {
@@ -1112,7 +1159,7 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) {
printInstruction(MI, OS);
OutStreamer.EmitRawText(OS.str());
-
+
// Make sure the instruction that follows TBB is 2-byte aligned.
// FIXME: Constant island pass should insert an "ALIGN" instruction instead.
if (MI->getOpcode() == ARM::t2TBB)
@@ -1129,7 +1176,7 @@ void ARMAsmPrinter::EmitStartOfAsmFile(Module &M) {
// avoid out-of-range branches that are due a fundamental limitation of
// the way symbol offsets are encoded with the current Darwin ARM
// relocations.
- const TargetLoweringObjectFileMachO &TLOFMacho =
+ const TargetLoweringObjectFileMachO &TLOFMacho =
static_cast<const TargetLoweringObjectFileMachO &>(
getObjFileLowering());
OutStreamer.SwitchSection(TLOFMacho.getTextSection());
@@ -1148,6 +1195,12 @@ void ARMAsmPrinter::EmitStartOfAsmFile(Module &M) {
16, SectionKind::getText());
OutStreamer.SwitchSection(sect);
}
+ const MCSection *StaticInitSect =
+ OutContext.getMachOSection("__TEXT", "__StaticInit",
+ MCSectionMachO::S_REGULAR |
+ MCSectionMachO::S_ATTR_PURE_INSTRUCTIONS,
+ SectionKind::getText());
+ OutStreamer.SwitchSection(StaticInitSect);
}
}
@@ -1173,8 +1226,8 @@ void ARMAsmPrinter::EmitStartOfAsmFile(Module &M) {
OutStreamer.EmitRawText("\t.eabi_attribute " +
Twine(ARMBuildAttrs::ABI_FP_exceptions) + ", 1");
}
-
- if (FiniteOnlyFPMath())
+
+ if (NoInfsFPMath && NoNaNsFPMath)
OutStreamer.EmitRawText("\t.eabi_attribute " +
Twine(ARMBuildAttrs::ABI_FP_number_model)+ ", 1");
else
@@ -1280,7 +1333,7 @@ void ARMAsmPrinter::printInstructionThroughMCStreamer(const MachineInstr *MI) {
// LPC0:
// add r0, pc, r0
// This adds the address of LPC0 to r0.
-
+
// Emit the label.
// FIXME: MOVE TO SHARED PLACE.
unsigned Id = (unsigned)MI->getOperand(2).getImm();
@@ -1288,8 +1341,8 @@ void ARMAsmPrinter::printInstructionThroughMCStreamer(const MachineInstr *MI) {
MCSymbol *Label =OutContext.GetOrCreateSymbol(Twine(Prefix)
+ "PC" + Twine(getFunctionNumber()) + "_" + Twine(Id));
OutStreamer.EmitLabel(Label);
-
-
+
+
// Form and emit tha dd.
MCInst AddInst;
AddInst.setOpcode(ARM::ADDrr);
@@ -1315,7 +1368,7 @@ void ARMAsmPrinter::printInstructionThroughMCStreamer(const MachineInstr *MI) {
EmitMachineConstantPoolValue(MCPE.Val.MachineCPVal);
else
EmitGlobalConstant(MCPE.Val.ConstVal);
-
+
return;
}
case ARM::MOVi2pieces: { // FIXME: Remove asmstring from td file.
@@ -1325,13 +1378,13 @@ void ARMAsmPrinter::printInstructionThroughMCStreamer(const MachineInstr *MI) {
unsigned SOImmValV1 = ARM_AM::getSOImmTwoPartFirst(ImmVal);
unsigned SOImmValV2 = ARM_AM::getSOImmTwoPartSecond(ImmVal);
-
+
{
MCInst TmpInst;
TmpInst.setOpcode(ARM::MOVi);
TmpInst.addOperand(MCOperand::CreateReg(DstReg));
TmpInst.addOperand(MCOperand::CreateImm(SOImmValV1));
-
+
// Predicate.
TmpInst.addOperand(MCOperand::CreateImm(MI->getOperand(2).getImm()));
TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(3).getReg()));
@@ -1349,11 +1402,11 @@ void ARMAsmPrinter::printInstructionThroughMCStreamer(const MachineInstr *MI) {
// Predicate.
TmpInst.addOperand(MCOperand::CreateImm(MI->getOperand(2).getImm()));
TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(3).getReg()));
-
+
TmpInst.addOperand(MCOperand::CreateReg(0)); // cc_out
OutStreamer.EmitInstruction(TmpInst);
}
- return;
+ return;
}
case ARM::MOVi32imm: { // FIXME: Remove asmstring from td file.
// This is a hack that lowers as a two instruction sequence.
@@ -1384,32 +1437,32 @@ void ARMAsmPrinter::printInstructionThroughMCStreamer(const MachineInstr *MI) {
TmpInst.setOpcode(ARM::MOVi16);
TmpInst.addOperand(MCOperand::CreateReg(DstReg)); // dstreg
TmpInst.addOperand(V1); // lower16(imm)
-
+
// Predicate.
TmpInst.addOperand(MCOperand::CreateImm(MI->getOperand(2).getImm()));
TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(3).getReg()));
-
+
OutStreamer.EmitInstruction(TmpInst);
}
-
+
{
MCInst TmpInst;
TmpInst.setOpcode(ARM::MOVTi16);
TmpInst.addOperand(MCOperand::CreateReg(DstReg)); // dstreg
TmpInst.addOperand(MCOperand::CreateReg(DstReg)); // srcreg
TmpInst.addOperand(V2); // upper16(imm)
-
+
// Predicate.
TmpInst.addOperand(MCOperand::CreateImm(MI->getOperand(2).getImm()));
TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(3).getReg()));
-
+
OutStreamer.EmitInstruction(TmpInst);
}
-
+
return;
}
}
-
+
MCInst TmpInst;
MCInstLowering.Lower(MI, TmpInst);
OutStreamer.EmitInstruction(TmpInst);
diff --git a/lib/Target/ARM/ARMBaseInstrInfo.cpp b/lib/Target/ARM/ARMBaseInstrInfo.cpp
index 49c16f3e0720..3a8bebe0dd24 100644
--- a/lib/Target/ARM/ARMBaseInstrInfo.cpp
+++ b/lib/Target/ARM/ARMBaseInstrInfo.cpp
@@ -15,9 +15,9 @@
#include "ARM.h"
#include "ARMAddressingModes.h"
#include "ARMConstantPoolValue.h"
-#include "ARMGenInstrInfo.inc"
#include "ARMMachineFunctionInfo.h"
#include "ARMRegisterInfo.h"
+#include "ARMGenInstrInfo.inc"
#include "llvm/Constants.h"
#include "llvm/Function.h"
#include "llvm/GlobalValue.h"
@@ -501,7 +501,7 @@ unsigned ARMBaseInstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const {
llvm_unreachable("Unknown or unset size field for instr!");
case TargetOpcode::IMPLICIT_DEF:
case TargetOpcode::KILL:
- case TargetOpcode::DBG_LABEL:
+ case TargetOpcode::PROLOG_LABEL:
case TargetOpcode::EH_LABEL:
case TargetOpcode::DBG_VALUE:
return 0;
@@ -573,48 +573,6 @@ unsigned ARMBaseInstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const {
return 0; // Not reached
}
-/// Return true if the instruction is a register to register move and
-/// leave the source and dest operands in the passed parameters.
-///
-bool
-ARMBaseInstrInfo::isMoveInstr(const MachineInstr &MI,
- unsigned &SrcReg, unsigned &DstReg,
- unsigned& SrcSubIdx, unsigned& DstSubIdx) const {
- switch (MI.getOpcode()) {
- default: break;
- case ARM::VMOVS:
- case ARM::VMOVD:
- case ARM::VMOVDneon:
- case ARM::VMOVQ:
- case ARM::VMOVQQ : {
- SrcReg = MI.getOperand(1).getReg();
- DstReg = MI.getOperand(0).getReg();
- SrcSubIdx = MI.getOperand(1).getSubReg();
- DstSubIdx = MI.getOperand(0).getSubReg();
- return true;
- }
- case ARM::MOVr:
- case ARM::MOVr_TC:
- case ARM::tMOVr:
- case ARM::tMOVgpr2tgpr:
- case ARM::tMOVtgpr2gpr:
- case ARM::tMOVgpr2gpr:
- case ARM::t2MOVr: {
- assert(MI.getDesc().getNumOperands() >= 2 &&
- MI.getOperand(0).isReg() &&
- MI.getOperand(1).isReg() &&
- "Invalid ARM MOV instruction");
- SrcReg = MI.getOperand(1).getReg();
- DstReg = MI.getOperand(0).getReg();
- SrcSubIdx = MI.getOperand(1).getSubReg();
- DstSubIdx = MI.getOperand(0).getSubReg();
- return true;
- }
- }
-
- return false;
-}
-
unsigned
ARMBaseInstrInfo::isLoadFromStackSlot(const MachineInstr *MI,
int &FrameIndex) const {
@@ -763,8 +721,9 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
Align);
// tGPR is used sometimes in ARM instructions that need to avoid using
- // certain registers. Just treat it as GPR here.
- if (RC == ARM::tGPRRegisterClass || RC == ARM::tcGPRRegisterClass)
+ // certain registers. Just treat it as GPR here. Likewise, rGPR.
+ if (RC == ARM::tGPRRegisterClass || RC == ARM::tcGPRRegisterClass
+ || RC == ARM::rGPRRegisterClass)
RC = ARM::GPRRegisterClass;
switch (RC->getID()) {
@@ -798,7 +757,7 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VSTMQ))
.addReg(SrcReg, getKillRegState(isKill))
.addFrameIndex(FI)
- .addImm(ARM_AM::getAM5Opc(ARM_AM::ia, 4))
+ .addImm(ARM_AM::getAM4ModeImm(ARM_AM::ia))
.addMemOperand(MMO));
}
break;
@@ -818,7 +777,7 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
MachineInstrBuilder MIB =
AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VSTMD))
.addFrameIndex(FI)
- .addImm(ARM_AM::getAM5Opc(ARM_AM::ia, 4)))
+ .addImm(ARM_AM::getAM4ModeImm(ARM_AM::ia)))
.addMemOperand(MMO);
MIB = AddDReg(MIB, SrcReg, ARM::dsub_0, getKillRegState(isKill), TRI);
MIB = AddDReg(MIB, SrcReg, ARM::dsub_1, 0, TRI);
@@ -830,7 +789,7 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
MachineInstrBuilder MIB =
AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VSTMD))
.addFrameIndex(FI)
- .addImm(ARM_AM::getAM5Opc(ARM_AM::ia, 4)))
+ .addImm(ARM_AM::getAM4ModeImm(ARM_AM::ia)))
.addMemOperand(MMO);
MIB = AddDReg(MIB, SrcReg, ARM::dsub_0, getKillRegState(isKill), TRI);
MIB = AddDReg(MIB, SrcReg, ARM::dsub_1, 0, TRI);
@@ -865,7 +824,8 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
// tGPR is used sometimes in ARM instructions that need to avoid using
// certain registers. Just treat it as GPR here.
- if (RC == ARM::tGPRRegisterClass || RC == ARM::tcGPRRegisterClass)
+ if (RC == ARM::tGPRRegisterClass || RC == ARM::tcGPRRegisterClass
+ || RC == ARM::rGPRRegisterClass)
RC = ARM::GPRRegisterClass;
switch (RC->getID()) {
@@ -893,7 +853,7 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
} else {
AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDMQ), DestReg)
.addFrameIndex(FI)
- .addImm(ARM_AM::getAM5Opc(ARM_AM::ia, 4))
+ .addImm(ARM_AM::getAM4ModeImm(ARM_AM::ia))
.addMemOperand(MMO));
}
break;
@@ -910,7 +870,7 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
MachineInstrBuilder MIB =
AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDMD))
.addFrameIndex(FI)
- .addImm(ARM_AM::getAM5Opc(ARM_AM::ia, 4)))
+ .addImm(ARM_AM::getAM4ModeImm(ARM_AM::ia)))
.addMemOperand(MMO);
MIB = AddDReg(MIB, DestReg, ARM::dsub_0, RegState::Define, TRI);
MIB = AddDReg(MIB, DestReg, ARM::dsub_1, RegState::Define, TRI);
@@ -922,7 +882,7 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
MachineInstrBuilder MIB =
AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDMD))
.addFrameIndex(FI)
- .addImm(ARM_AM::getAM5Opc(ARM_AM::ia, 4)))
+ .addImm(ARM_AM::getAM4ModeImm(ARM_AM::ia)))
.addMemOperand(MMO);
MIB = AddDReg(MIB, DestReg, ARM::dsub_0, RegState::Define, TRI);
MIB = AddDReg(MIB, DestReg, ARM::dsub_1, RegState::Define, TRI);
@@ -963,6 +923,11 @@ static unsigned duplicateCPV(MachineFunction &MF, unsigned &CPI) {
unsigned PCLabelId = AFI->createConstPoolEntryUId();
ARMConstantPoolValue *NewCPV = 0;
+ // FIXME: The below assumes PIC relocation model and that the function
+ // is Thumb mode (t1 or t2). PCAdjustment would be 8 for ARM mode PIC, and
+ // zero for non-PIC in ARM or Thumb. The callers are all of thumb LDR
+ // instructions, so that's probably OK, but is PIC always correct when
+ // we get here?
if (ACPV->isGlobalValue())
NewCPV = new ARMConstantPoolValue(ACPV->getGV(), PCLabelId,
ARMCP::CPValue, 4);
@@ -972,6 +937,9 @@ static unsigned duplicateCPV(MachineFunction &MF, unsigned &CPI) {
else if (ACPV->isBlockAddress())
NewCPV = new ARMConstantPoolValue(ACPV->getBlockAddress(), PCLabelId,
ARMCP::CPBlockAddress, 4);
+ else if (ACPV->isLSDA())
+ NewCPV = new ARMConstantPoolValue(MF.getFunction(), PCLabelId,
+ ARMCP::CPLSDA, 4);
else
llvm_unreachable("Unexpected ARM constantpool value type!!");
CPI = MCP->getConstantPoolIndex(NewCPV, MCPE.getAlignment());
@@ -1393,3 +1361,63 @@ bool llvm::rewriteARMFrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
Offset = (isSub) ? -Offset : Offset;
return Offset == 0;
}
+
+bool ARMBaseInstrInfo::
+AnalyzeCompare(const MachineInstr *MI, unsigned &SrcReg, int &CmpValue) const {
+ switch (MI->getOpcode()) {
+ default: break;
+ case ARM::CMPri:
+ case ARM::CMPzri:
+ case ARM::t2CMPri:
+ case ARM::t2CMPzri:
+ SrcReg = MI->getOperand(0).getReg();
+ CmpValue = MI->getOperand(1).getImm();
+ return true;
+ }
+
+ return false;
+}
+
+/// ConvertToSetZeroFlag - Convert the instruction to set the "zero" flag so
+/// that we can remove a "comparison with zero".
+bool ARMBaseInstrInfo::
+ConvertToSetZeroFlag(MachineInstr *MI, MachineInstr *CmpInstr) const {
+ // Conservatively refuse to convert an instruction which isn't in the same BB
+ // as the comparison.
+ if (MI->getParent() != CmpInstr->getParent())
+ return false;
+
+ // Check that CPSR isn't set between the comparison instruction and the one we
+ // want to change.
+ MachineBasicBlock::const_iterator I = CmpInstr, E = MI;
+ --I;
+ for (; I != E; --I) {
+ const MachineInstr &Instr = *I;
+
+ for (unsigned IO = 0, EO = Instr.getNumOperands(); IO != EO; ++IO) {
+ const MachineOperand &MO = Instr.getOperand(IO);
+ if (!MO.isReg() || !MO.isDef()) continue;
+
+ // This instruction modifies CPSR before the one we want to change. We
+ // can't do this transformation.
+ if (MO.getReg() == ARM::CPSR)
+ return false;
+ }
+ }
+
+ // Set the "zero" bit in CPSR.
+ switch (MI->getOpcode()) {
+ default: break;
+ case ARM::ADDri:
+ case ARM::SUBri:
+ case ARM::t2ADDri:
+ case ARM::t2SUBri:
+ MI->RemoveOperand(5);
+ MachineInstrBuilder(MI)
+ .addReg(ARM::CPSR, RegState::Define | RegState::Implicit);
+ CmpInstr->eraseFromParent();
+ return true;
+ }
+
+ return false;
+}
diff --git a/lib/Target/ARM/ARMBaseInstrInfo.h b/lib/Target/ARM/ARMBaseInstrInfo.h
index 89a2db74a75e..b4f4a33a70ad 100644
--- a/lib/Target/ARM/ARMBaseInstrInfo.h
+++ b/lib/Target/ARM/ARMBaseInstrInfo.h
@@ -15,11 +15,12 @@
#define ARMBASEINSTRUCTIONINFO_H
#include "ARM.h"
-#include "ARMRegisterInfo.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/Target/TargetInstrInfo.h"
namespace llvm {
+ class ARMSubtarget;
+ class ARMBaseRegisterInfo;
/// ARMII - This namespace holds all of the target specific flags that
/// instruction info tracks.
@@ -97,44 +98,45 @@ namespace ARMII {
// Miscellaneous arithmetic instructions
ArithMiscFrm = 12 << FormShift,
+ SatFrm = 13 << FormShift,
// Extend instructions
- ExtFrm = 13 << FormShift,
+ ExtFrm = 14 << FormShift,
// VFP formats
- VFPUnaryFrm = 14 << FormShift,
- VFPBinaryFrm = 15 << FormShift,
- VFPConv1Frm = 16 << FormShift,
- VFPConv2Frm = 17 << FormShift,
- VFPConv3Frm = 18 << FormShift,
- VFPConv4Frm = 19 << FormShift,
- VFPConv5Frm = 20 << FormShift,
- VFPLdStFrm = 21 << FormShift,
- VFPLdStMulFrm = 22 << FormShift,
- VFPMiscFrm = 23 << FormShift,
+ VFPUnaryFrm = 15 << FormShift,
+ VFPBinaryFrm = 16 << FormShift,
+ VFPConv1Frm = 17 << FormShift,
+ VFPConv2Frm = 18 << FormShift,
+ VFPConv3Frm = 19 << FormShift,
+ VFPConv4Frm = 20 << FormShift,
+ VFPConv5Frm = 21 << FormShift,
+ VFPLdStFrm = 22 << FormShift,
+ VFPLdStMulFrm = 23 << FormShift,
+ VFPMiscFrm = 24 << FormShift,
// Thumb format
- ThumbFrm = 24 << FormShift,
+ ThumbFrm = 25 << FormShift,
// Miscelleaneous format
- MiscFrm = 25 << FormShift,
+ MiscFrm = 26 << FormShift,
// NEON formats
- NGetLnFrm = 26 << FormShift,
- NSetLnFrm = 27 << FormShift,
- NDupFrm = 28 << FormShift,
- NLdStFrm = 29 << FormShift,
- N1RegModImmFrm= 30 << FormShift,
- N2RegFrm = 31 << FormShift,
- NVCVTFrm = 32 << FormShift,
- NVDupLnFrm = 33 << FormShift,
- N2RegVShLFrm = 34 << FormShift,
- N2RegVShRFrm = 35 << FormShift,
- N3RegFrm = 36 << FormShift,
- N3RegVShFrm = 37 << FormShift,
- NVExtFrm = 38 << FormShift,
- NVMulSLFrm = 39 << FormShift,
- NVTBLFrm = 40 << FormShift,
+ NGetLnFrm = 27 << FormShift,
+ NSetLnFrm = 28 << FormShift,
+ NDupFrm = 29 << FormShift,
+ NLdStFrm = 30 << FormShift,
+ N1RegModImmFrm= 31 << FormShift,
+ N2RegFrm = 32 << FormShift,
+ NVCVTFrm = 33 << FormShift,
+ NVDupLnFrm = 34 << FormShift,
+ N2RegVShLFrm = 35 << FormShift,
+ N2RegVShRFrm = 36 << FormShift,
+ N3RegFrm = 37 << FormShift,
+ N3RegVShFrm = 38 << FormShift,
+ NVExtFrm = 39 << FormShift,
+ NVMulSLFrm = 40 << FormShift,
+ NVTBLFrm = 41 << FormShift,
//===------------------------------------------------------------------===//
// Misc flags.
@@ -198,7 +200,7 @@ namespace ARMII {
}
class ARMBaseInstrInfo : public TargetInstrInfoImpl {
- const ARMSubtarget& Subtarget;
+ const ARMSubtarget &Subtarget;
protected:
// Can be only subclassed.
explicit ARMBaseInstrInfo(const ARMSubtarget &STI);
@@ -223,7 +225,7 @@ public:
virtual bool AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB,
MachineBasicBlock *&FBB,
SmallVectorImpl<MachineOperand> &Cond,
- bool AllowModify) const;
+ bool AllowModify = false) const;
virtual unsigned RemoveBranch(MachineBasicBlock &MBB) const;
virtual unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
MachineBasicBlock *FBB,
@@ -262,12 +264,6 @@ public:
///
virtual unsigned GetInstSizeInBytes(const MachineInstr* MI) const;
- /// Return true if the instruction is a register to register move and return
- /// the source and dest operands and their sub-register indices by reference.
- virtual bool isMoveInstr(const MachineInstr &MI,
- unsigned &SrcReg, unsigned &DstReg,
- unsigned &SrcSubIdx, unsigned &DstSubIdx) const;
-
virtual unsigned isLoadFromStackSlot(const MachineInstr *MI,
int &FrameIndex) const;
virtual unsigned isStoreToStackSlot(const MachineInstr *MI,
@@ -341,6 +337,17 @@ public:
unsigned NumInstrs) const {
return NumInstrs && NumInstrs == 1;
}
+
+ /// AnalyzeCompare - For a comparison instruction, return the source register
+ /// in SrcReg and the value it compares against in CmpValue. Return true if
+ /// the comparison instruction can be analyzed.
+ virtual bool AnalyzeCompare(const MachineInstr *MI, unsigned &SrcReg,
+ int &CmpValue) const;
+
+ /// ConvertToSetZeroFlag - Convert the instruction to set the zero flag so
+ /// that we can remove a "comparison with zero".
+ virtual bool ConvertToSetZeroFlag(MachineInstr *Instr,
+ MachineInstr *CmpInstr) const;
};
static inline
diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.cpp b/lib/Target/ARM/ARMBaseRegisterInfo.cpp
index 182bd9937145..eceafad63f17 100644
--- a/lib/Target/ARM/ARMBaseRegisterInfo.cpp
+++ b/lib/Target/ARM/ARMBaseRegisterInfo.cpp
@@ -40,13 +40,20 @@
#include "llvm/Support/CommandLine.h"
namespace llvm {
-cl::opt<bool>
-ReuseFrameIndexVals("arm-reuse-frame-index-vals", cl::Hidden, cl::init(true),
- cl::desc("Reuse repeated frame index values"));
+static cl::opt<bool>
+ForceAllBaseRegAlloc("arm-force-base-reg-alloc", cl::Hidden, cl::init(false),
+ cl::desc("Force use of virtual base registers for stack load/store"));
+static cl::opt<bool>
+EnableLocalStackAlloc("enable-local-stack-alloc", cl::init(true), cl::Hidden,
+ cl::desc("Enable pre-regalloc stack frame index allocation"));
}
using namespace llvm;
+static cl::opt<bool>
+EnableBasePointer("arm-use-base-pointer", cl::Hidden, cl::init(true),
+ cl::desc("Enable use of a base pointer for complex stack frames"));
+
unsigned ARMBaseRegisterInfo::getRegisterNumbering(unsigned RegEnum,
bool *isSPVFP) {
if (isSPVFP)
@@ -143,7 +150,8 @@ ARMBaseRegisterInfo::ARMBaseRegisterInfo(const ARMBaseInstrInfo &tii,
const ARMSubtarget &sti)
: ARMGenRegisterInfo(ARM::ADJCALLSTACKDOWN, ARM::ADJCALLSTACKUP),
TII(tii), STI(sti),
- FramePtr((STI.isTargetDarwin() || STI.isThumb()) ? ARM::R7 : ARM::R11) {
+ FramePtr((STI.isTargetDarwin() || STI.isThumb()) ? ARM::R7 : ARM::R11),
+ BasePtr(ARM::R6) {
}
const unsigned*
@@ -176,8 +184,11 @@ getReservedRegs(const MachineFunction &MF) const {
BitVector Reserved(getNumRegs());
Reserved.set(ARM::SP);
Reserved.set(ARM::PC);
- if (STI.isTargetDarwin() || hasFP(MF))
+ Reserved.set(ARM::FPSCR);
+ if (hasFP(MF))
Reserved.set(FramePtr);
+ if (hasBasePointer(MF))
+ Reserved.set(BasePtr);
// Some targets reserve R9.
if (STI.isR9Reserved())
Reserved.set(ARM::R9);
@@ -191,9 +202,13 @@ bool ARMBaseRegisterInfo::isReservedReg(const MachineFunction &MF,
case ARM::SP:
case ARM::PC:
return true;
+ case ARM::R6:
+ if (hasBasePointer(MF))
+ return true;
+ break;
case ARM::R7:
case ARM::R11:
- if (FramePtr == Reg && (STI.isTargetDarwin() || hasFP(MF)))
+ if (FramePtr == Reg && hasFP(MF))
return true;
break;
case ARM::R9:
@@ -510,7 +525,7 @@ ARMBaseRegisterInfo::getAllocationOrder(const TargetRegisterClass *RC,
return std::make_pair(RC->allocation_order_begin(MF),
RC->allocation_order_end(MF));
- if (!STI.isTargetDarwin() && !hasFP(MF)) {
+ if (!hasFP(MF)) {
if (!STI.isR9Reserved())
return std::make_pair(GPREven1,
GPREven1 + (sizeof(GPREven1)/sizeof(unsigned)));
@@ -539,7 +554,7 @@ ARMBaseRegisterInfo::getAllocationOrder(const TargetRegisterClass *RC,
return std::make_pair(RC->allocation_order_begin(MF),
RC->allocation_order_end(MF));
- if (!STI.isTargetDarwin() && !hasFP(MF)) {
+ if (!hasFP(MF)) {
if (!STI.isR9Reserved())
return std::make_pair(GPROdd1,
GPROdd1 + (sizeof(GPROdd1)/sizeof(unsigned)));
@@ -609,30 +624,68 @@ ARMBaseRegisterInfo::UpdateRegAllocHint(unsigned Reg, unsigned NewReg,
/// or if frame pointer elimination is disabled.
///
bool ARMBaseRegisterInfo::hasFP(const MachineFunction &MF) const {
+ // Mac OS X requires FP not to be clobbered for backtracing purpose.
+ if (STI.isTargetDarwin())
+ return true;
+
const MachineFrameInfo *MFI = MF.getFrameInfo();
- return ((DisableFramePointerElim(MF) && MFI->adjustsStack())||
+ // Always eliminate non-leaf frame pointers.
+ return ((DisableFramePointerElim(MF) && MFI->hasCalls()) ||
needsStackRealignment(MF) ||
MFI->hasVarSizedObjects() ||
MFI->isFrameAddressTaken());
}
+bool ARMBaseRegisterInfo::hasBasePointer(const MachineFunction &MF) const {
+ const MachineFrameInfo *MFI = MF.getFrameInfo();
+ const ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+
+ if (!EnableBasePointer)
+ return false;
+
+ if (needsStackRealignment(MF) && MFI->hasVarSizedObjects())
+ return true;
+
+ // Thumb has trouble with negative offsets from the FP. Thumb2 has a limited
+ // negative range for ldr/str (255), and thumb1 is positive offsets only.
+ // It's going to be better to use the SP or Base Pointer instead. When there
+ // are variable sized objects, we can't reference off of the SP, so we
+ // reserve a Base Pointer.
+ if (AFI->isThumbFunction() && MFI->hasVarSizedObjects()) {
+ // Conservatively estimate whether the negative offset from the frame
+ // pointer will be sufficient to reach. If a function has a smallish
+ // frame, it's less likely to have lots of spills and callee saved
+ // space, so it's all more likely to be within range of the frame pointer.
+ // If it's wrong, the scavenger will still enable access to work, it just
+ // won't be optimal.
+ if (AFI->isThumb2Function() && MFI->getLocalFrameSize() < 128)
+ return false;
+ return true;
+ }
+
+ return false;
+}
+
bool ARMBaseRegisterInfo::canRealignStack(const MachineFunction &MF) const {
const MachineFrameInfo *MFI = MF.getFrameInfo();
const ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
- return (RealignStack &&
- !AFI->isThumb1OnlyFunction() &&
- !MFI->hasVarSizedObjects());
+ // We can't realign the stack if:
+ // 1. Dynamic stack realignment is explicitly disabled,
+ // 2. This is a Thumb1 function (it's not useful, so we don't bother), or
+ // 3. There are VLAs in the function and the base pointer is disabled.
+ return (RealignStack && !AFI->isThumb1OnlyFunction() &&
+ (!MFI->hasVarSizedObjects() || EnableBasePointer));
}
bool ARMBaseRegisterInfo::
needsStackRealignment(const MachineFunction &MF) const {
const MachineFrameInfo *MFI = MF.getFrameInfo();
- const ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+ const Function *F = MF.getFunction();
unsigned StackAlign = MF.getTarget().getFrameInfo()->getStackAlignment();
- return (RealignStack &&
- !AFI->isThumb1OnlyFunction() &&
- (MFI->getMaxAlignment() > StackAlign) &&
- !MFI->hasVarSizedObjects());
+ bool requiresRealignment = ((MFI->getLocalFrameMaxAlign() > StackAlign) ||
+ F->hasFnAttr(Attribute::StackAlignment));
+
+ return requiresRealignment && canRealignStack(MF);
}
bool ARMBaseRegisterInfo::
@@ -668,6 +721,7 @@ static unsigned estimateStackSize(MachineFunction &MF) {
/// instructions will require a scratch register during their expansion later.
unsigned
ARMBaseRegisterInfo::estimateRSStackSizeLimit(MachineFunction &MF) const {
+ const ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
unsigned Limit = (1 << 12) - 1;
for (MachineFunction::iterator BB = MF.begin(),E = MF.end(); BB != E; ++BB) {
for (MachineBasicBlock::iterator I = BB->begin(), E = BB->end();
@@ -693,7 +747,10 @@ ARMBaseRegisterInfo::estimateRSStackSizeLimit(MachineFunction &MF) const {
Limit = std::min(Limit, ((1U << 8) - 1) * 4);
break;
case ARMII::AddrModeT2_i12:
- if (hasFP(MF)) Limit = std::min(Limit, (1U << 8) - 1);
+ // i12 supports only positive offset so these will be converted to
+ // i8 opcodes. See llvm::rewriteT2FrameIndex.
+ if (hasFP(MF) && AFI->hasStackFrame())
+ Limit = std::min(Limit, (1U << 8) - 1);
break;
case ARMII::AddrMode6:
// Addressing mode 6 (load/store) instructions can't encode an
@@ -710,6 +767,19 @@ ARMBaseRegisterInfo::estimateRSStackSizeLimit(MachineFunction &MF) const {
return Limit;
}
+static unsigned GetFunctionSizeInBytes(const MachineFunction &MF,
+ const ARMBaseInstrInfo &TII) {
+ unsigned FnSize = 0;
+ for (MachineFunction::const_iterator MBBI = MF.begin(), E = MF.end();
+ MBBI != E; ++MBBI) {
+ const MachineBasicBlock &MBB = *MBBI;
+ for (MachineBasicBlock::const_iterator I = MBB.begin(),E = MBB.end();
+ I != E; ++I)
+ FnSize += TII.GetInstSizeInBytes(I);
+ }
+ return FnSize;
+}
+
void
ARMBaseRegisterInfo::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
RegScavenger *RS) const {
@@ -737,6 +807,10 @@ ARMBaseRegisterInfo::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
if (AFI->isThumb1OnlyFunction() && AFI->getVarArgsRegSaveSize() > 0)
MF.getRegInfo().setPhysRegUsed(ARM::LR);
+ // Spill the BasePtr if it's used.
+ if (hasBasePointer(MF))
+ MF.getRegInfo().setPhysRegUsed(BasePtr);
+
// Don't spill FP if the frame can be eliminated. This is determined
// by scanning the callee-save registers to see if any is used.
const unsigned *CSRegs = getCalleeSavedRegs();
@@ -807,7 +881,7 @@ ARMBaseRegisterInfo::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
bool ForceLRSpill = false;
if (!LRSpilled && AFI->isThumb1OnlyFunction()) {
- unsigned FnSize = TII.GetFunctionSizeInBytes(MF);
+ unsigned FnSize = GetFunctionSizeInBytes(MF, TII);
// Force LR to be spilled if the Thumb function size is > 2048. This enables
// use of BL to implement far jump. If it turns out that it's not needed
// then the branch fix up path will undo it.
@@ -824,13 +898,19 @@ ARMBaseRegisterInfo::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
// slot of the previous FP. Also, if we have variable sized objects in the
// function, stack slot references will often be negative, and some of
// our instructions are positive-offset only, so conservatively consider
- // that case to want a spill slot (or register) as well.
+ // that case to want a spill slot (or register) as well. Similarly, if
+ // the function adjusts the stack pointer during execution and the
+ // adjustments aren't already part of our stack size estimate, our offset
+ // calculations may be off, so be conservative.
// FIXME: We could add logic to be more precise about negative offsets
// and which instructions will need a scratch register for them. Is it
// worth the effort and added fragility?
bool BigStack =
- (RS && (estimateStackSize(MF) + (hasFP(MF) ? 4:0) >=
- estimateRSStackSizeLimit(MF))) || MFI->hasVarSizedObjects();
+ (RS &&
+ (estimateStackSize(MF) + ((hasFP(MF) && AFI->hasStackFrame()) ? 4:0) >=
+ estimateRSStackSizeLimit(MF)))
+ || MFI->hasVarSizedObjects()
+ || (MFI->adjustsStack() && !canSimplifyCallFramePseudos(MF));
bool ExtraCSSpill = false;
if (BigStack || !CanEliminateFrame || cannotEliminateFrame(MF)) {
@@ -848,9 +928,7 @@ ARMBaseRegisterInfo::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
ExtraCSSpill = true;
}
- // Darwin ABI requires FP to point to the stack slot that contains the
- // previous FP.
- if (STI.isTargetDarwin() || hasFP(MF)) {
+ if (hasFP(MF)) {
MF.getRegInfo().setPhysRegUsed(FramePtr);
NumGPRSpills++;
}
@@ -941,55 +1019,88 @@ unsigned ARMBaseRegisterInfo::getRARegister() const {
return ARM::LR;
}
-unsigned
+unsigned
ARMBaseRegisterInfo::getFrameRegister(const MachineFunction &MF) const {
- if (STI.isTargetDarwin() || hasFP(MF))
+ if (hasFP(MF))
return FramePtr;
return ARM::SP;
}
+// Provide a base+offset reference to an FI slot for debug info. It's the
+// same as what we use for resolving the code-gen references for now.
+// FIXME: This can go wrong when references are SP-relative and simple call
+// frames aren't used.
int
ARMBaseRegisterInfo::getFrameIndexReference(const MachineFunction &MF, int FI,
unsigned &FrameReg) const {
+ return ResolveFrameIndexReference(MF, FI, FrameReg, 0);
+}
+
+int
+ARMBaseRegisterInfo::ResolveFrameIndexReference(const MachineFunction &MF,
+ int FI,
+ unsigned &FrameReg,
+ int SPAdj) const {
const MachineFrameInfo *MFI = MF.getFrameInfo();
const ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
int Offset = MFI->getObjectOffset(FI) + MFI->getStackSize();
+ int FPOffset = Offset - AFI->getFramePtrSpillOffset();
bool isFixed = MFI->isFixedObjectIndex(FI);
FrameReg = ARM::SP;
+ Offset += SPAdj;
if (AFI->isGPRCalleeSavedArea1Frame(FI))
- Offset -= AFI->getGPRCalleeSavedArea1Offset();
+ return Offset - AFI->getGPRCalleeSavedArea1Offset();
else if (AFI->isGPRCalleeSavedArea2Frame(FI))
- Offset -= AFI->getGPRCalleeSavedArea2Offset();
+ return Offset - AFI->getGPRCalleeSavedArea2Offset();
else if (AFI->isDPRCalleeSavedAreaFrame(FI))
- Offset -= AFI->getDPRCalleeSavedAreaOffset();
- else if (needsStackRealignment(MF)) {
- // When dynamically realigning the stack, use the frame pointer for
- // parameters, and the stack pointer for locals.
+ return Offset - AFI->getDPRCalleeSavedAreaOffset();
+
+ // When dynamically realigning the stack, use the frame pointer for
+ // parameters, and the stack/base pointer for locals.
+ if (needsStackRealignment(MF)) {
assert (hasFP(MF) && "dynamic stack realignment without a FP!");
if (isFixed) {
FrameReg = getFrameRegister(MF);
- Offset -= AFI->getFramePtrSpillOffset();
+ Offset = FPOffset;
+ } else if (MFI->hasVarSizedObjects()) {
+ assert(hasBasePointer(MF) &&
+ "VLAs and dynamic stack alignment, but missing base pointer!");
+ FrameReg = BasePtr;
}
- } else if (hasFP(MF) && AFI->hasStackFrame()) {
- if (isFixed || MFI->hasVarSizedObjects()) {
- // Use frame pointer to reference fixed objects unless this is a
- // frameless function.
+ return Offset;
+ }
+
+ // If there is a frame pointer, use it when we can.
+ if (hasFP(MF) && AFI->hasStackFrame()) {
+ // Use frame pointer to reference fixed objects. Use it for locals if
+ // there are VLAs (and thus the SP isn't reliable as a base).
+ if (isFixed || (MFI->hasVarSizedObjects() && !hasBasePointer(MF))) {
FrameReg = getFrameRegister(MF);
- Offset -= AFI->getFramePtrSpillOffset();
+ return FPOffset;
+ } else if (MFI->hasVarSizedObjects()) {
+ assert(hasBasePointer(MF) && "missing base pointer!");
+ // Use the base register since we have it.
+ FrameReg = BasePtr;
} else if (AFI->isThumb2Function()) {
- // In Thumb2 mode, the negative offset is very limited.
- int FPOffset = Offset - AFI->getFramePtrSpillOffset();
+ // In Thumb2 mode, the negative offset is very limited. Try to avoid
+ // out of range references.
if (FPOffset >= -255 && FPOffset < 0) {
FrameReg = getFrameRegister(MF);
- Offset = FPOffset;
+ return FPOffset;
}
+ } else if (Offset > (FPOffset < 0 ? -FPOffset : FPOffset)) {
+ // Otherwise, use SP or FP, whichever is closer to the stack slot.
+ FrameReg = getFrameRegister(MF);
+ return FPOffset;
}
}
+ // Use the base pointer if we have one.
+ if (hasBasePointer(MF))
+ FrameReg = BasePtr;
return Offset;
}
-
int
ARMBaseRegisterInfo::getFrameIndexOffset(const MachineFunction &MF,
int FI) const {
@@ -1024,7 +1135,8 @@ unsigned ARMBaseRegisterInfo::getRegisterPairEven(unsigned Reg,
case ARM::R5:
return ARM::R4;
case ARM::R7:
- return isReservedReg(MF, ARM::R7) ? 0 : ARM::R6;
+ return (isReservedReg(MF, ARM::R7) || isReservedReg(MF, ARM::R6))
+ ? 0 : ARM::R6;
case ARM::R9:
return isReservedReg(MF, ARM::R9) ? 0 :ARM::R8;
case ARM::R11:
@@ -1113,7 +1225,8 @@ unsigned ARMBaseRegisterInfo::getRegisterPairOdd(unsigned Reg,
case ARM::R4:
return ARM::R5;
case ARM::R6:
- return isReservedReg(MF, ARM::R7) ? 0 : ARM::R7;
+ return (isReservedReg(MF, ARM::R7) || isReservedReg(MF, ARM::R6))
+ ? 0 : ARM::R7;
case ARM::R8:
return isReservedReg(MF, ARM::R9) ? 0 :ARM::R9;
case ARM::R10:
@@ -1220,13 +1333,18 @@ requiresFrameIndexScavenging(const MachineFunction &MF) const {
return true;
}
+bool ARMBaseRegisterInfo::
+requiresVirtualBaseRegisters(const MachineFunction &MF) const {
+ return EnableLocalStackAlloc;
+}
+
// hasReservedCallFrame - Under normal circumstances, when a frame pointer is
// not required, we reserve argument space for call sites in the function
// immediately on entry to the current function. This eliminates the need for
// add/sub sp brackets around call sites. Returns true if the call frame is
// included as part of the stack frame.
bool ARMBaseRegisterInfo::
-hasReservedCallFrame(MachineFunction &MF) const {
+hasReservedCallFrame(const MachineFunction &MF) const {
const MachineFrameInfo *FFI = MF.getFrameInfo();
unsigned CFSize = FFI->getMaxCallFrameSize();
// It's not always a good idea to include the call frame as part of the
@@ -1244,7 +1362,7 @@ hasReservedCallFrame(MachineFunction &MF) const {
// is not sufficient here since we still may reference some objects via SP
// even when FP is available in Thumb2 mode.
bool ARMBaseRegisterInfo::
-canSimplifyCallFramePseudos(MachineFunction &MF) const {
+canSimplifyCallFramePseudos(const MachineFunction &MF) const {
return hasReservedCallFrame(MF) || MF.getFrameInfo()->hasVarSizedObjects();
}
@@ -1305,10 +1423,258 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
MBB.erase(I);
}
-unsigned
+int64_t ARMBaseRegisterInfo::
+getFrameIndexInstrOffset(const MachineInstr *MI, int Idx) const {
+ const TargetInstrDesc &Desc = MI->getDesc();
+ unsigned AddrMode = (Desc.TSFlags & ARMII::AddrModeMask);
+ int64_t InstrOffs = 0;;
+ int Scale = 1;
+ unsigned ImmIdx = 0;
+ switch (AddrMode) {
+ case ARMII::AddrModeT2_i8:
+ case ARMII::AddrModeT2_i12:
+ // i8 supports only negative, and i12 supports only positive, so
+ // based on Offset sign, consider the appropriate instruction
+ InstrOffs = MI->getOperand(Idx+1).getImm();
+ Scale = 1;
+ break;
+ case ARMII::AddrMode5: {
+ // VFP address mode.
+ const MachineOperand &OffOp = MI->getOperand(Idx+1);
+ InstrOffs = ARM_AM::getAM5Offset(OffOp.getImm());
+ if (ARM_AM::getAM5Op(OffOp.getImm()) == ARM_AM::sub)
+ InstrOffs = -InstrOffs;
+ Scale = 4;
+ break;
+ }
+ case ARMII::AddrMode2: {
+ ImmIdx = Idx+2;
+ InstrOffs = ARM_AM::getAM2Offset(MI->getOperand(ImmIdx).getImm());
+ if (ARM_AM::getAM2Op(MI->getOperand(ImmIdx).getImm()) == ARM_AM::sub)
+ InstrOffs = -InstrOffs;
+ break;
+ }
+ case ARMII::AddrMode3: {
+ ImmIdx = Idx+2;
+ InstrOffs = ARM_AM::getAM3Offset(MI->getOperand(ImmIdx).getImm());
+ if (ARM_AM::getAM3Op(MI->getOperand(ImmIdx).getImm()) == ARM_AM::sub)
+ InstrOffs = -InstrOffs;
+ break;
+ }
+ case ARMII::AddrModeT1_s: {
+ ImmIdx = Idx+1;
+ InstrOffs = MI->getOperand(ImmIdx).getImm();
+ Scale = 4;
+ break;
+ }
+ default:
+ llvm_unreachable("Unsupported addressing mode!");
+ break;
+ }
+
+ return InstrOffs * Scale;
+}
+
+/// needsFrameBaseReg - Returns true if the instruction's frame index
+/// reference would be better served by a base register other than FP
+/// or SP. Used by LocalStackFrameAllocation to determine which frame index
+/// references it should create new base registers for.
+bool ARMBaseRegisterInfo::
+needsFrameBaseReg(MachineInstr *MI, int64_t Offset) const {
+ for (unsigned i = 0; !MI->getOperand(i).isFI(); ++i) {
+ assert(i < MI->getNumOperands() &&"Instr doesn't have FrameIndex operand!");
+ }
+
+ // It's the load/store FI references that cause issues, as it can be difficult
+ // to materialize the offset if it won't fit in the literal field. Estimate
+ // based on the size of the local frame and some conservative assumptions
+ // about the rest of the stack frame (note, this is pre-regalloc, so
+ // we don't know everything for certain yet) whether this offset is likely
+ // to be out of range of the immediate. Return true if so.
+
+ // We only generate virtual base registers for loads and stores, so
+ // return false for everything else.
+ unsigned Opc = MI->getOpcode();
+ switch (Opc) {
+ case ARM::LDR: case ARM::LDRH: case ARM::LDRB:
+ case ARM::STR: case ARM::STRH: case ARM::STRB:
+ case ARM::t2LDRi12: case ARM::t2LDRi8:
+ case ARM::t2STRi12: case ARM::t2STRi8:
+ case ARM::VLDRS: case ARM::VLDRD:
+ case ARM::VSTRS: case ARM::VSTRD:
+ case ARM::tSTRspi: case ARM::tLDRspi:
+ if (ForceAllBaseRegAlloc)
+ return true;
+ break;
+ default:
+ return false;
+ }
+
+ // Without a virtual base register, if the function has variable sized
+ // objects, all fixed-size local references will be via the frame pointer,
+ // Approximate the offset and see if it's legal for the instruction.
+ // Note that the incoming offset is based on the SP value at function entry,
+ // so it'll be negative.
+ MachineFunction &MF = *MI->getParent()->getParent();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+
+ // Estimate an offset from the frame pointer.
+ // Conservatively assume all callee-saved registers get pushed. R4-R6
+ // will be earlier than the FP, so we ignore those.
+ // R7, LR
+ int64_t FPOffset = Offset - 8;
+ // ARM and Thumb2 functions also need to consider R8-R11 and D8-D15
+ if (!AFI->isThumbFunction() || !AFI->isThumb1OnlyFunction())
+ FPOffset -= 80;
+ // Estimate an offset from the stack pointer.
+ // The incoming offset is relating to the SP at the start of the function,
+ // but when we access the local it'll be relative to the SP after local
+ // allocation, so adjust our SP-relative offset by that allocation size.
+ Offset = -Offset;
+ Offset += MFI->getLocalFrameSize();
+ // Assume that we'll have at least some spill slots allocated.
+ // FIXME: This is a total SWAG number. We should run some statistics
+ // and pick a real one.
+ Offset += 128; // 128 bytes of spill slots
+
+ // If there is a frame pointer, try using it.
+ // The FP is only available if there is no dynamic realignment. We
+ // don't know for sure yet whether we'll need that, so we guess based
+ // on whether there are any local variables that would trigger it.
+ unsigned StackAlign = MF.getTarget().getFrameInfo()->getStackAlignment();
+ if (hasFP(MF) &&
+ !((MFI->getLocalFrameMaxAlign() > StackAlign) && canRealignStack(MF))) {
+ if (isFrameOffsetLegal(MI, FPOffset))
+ return false;
+ }
+ // If we can reference via the stack pointer, try that.
+ // FIXME: This (and the code that resolves the references) can be improved
+ // to only disallow SP relative references in the live range of
+ // the VLA(s). In practice, it's unclear how much difference that
+ // would make, but it may be worth doing.
+ if (!MFI->hasVarSizedObjects() && isFrameOffsetLegal(MI, Offset))
+ return false;
+
+ // The offset likely isn't legal, we want to allocate a virtual base register.
+ return true;
+}
+
+/// materializeFrameBaseRegister - Insert defining instruction(s) for
+/// BaseReg to be a pointer to FrameIdx before insertion point I.
+void ARMBaseRegisterInfo::
+materializeFrameBaseRegister(MachineBasicBlock::iterator I, unsigned BaseReg,
+ int FrameIdx, int64_t Offset) const {
+ ARMFunctionInfo *AFI =
+ I->getParent()->getParent()->getInfo<ARMFunctionInfo>();
+ unsigned ADDriOpc = !AFI->isThumbFunction() ? ARM::ADDri :
+ (AFI->isThumb1OnlyFunction() ? ARM::tADDrSPi : ARM::t2ADDri);
+
+ MachineInstrBuilder MIB =
+ BuildMI(*I->getParent(), I, I->getDebugLoc(), TII.get(ADDriOpc), BaseReg)
+ .addFrameIndex(FrameIdx).addImm(Offset);
+ if (!AFI->isThumb1OnlyFunction())
+ AddDefaultCC(AddDefaultPred(MIB));
+}
+
+void
+ARMBaseRegisterInfo::resolveFrameIndex(MachineBasicBlock::iterator I,
+ unsigned BaseReg, int64_t Offset) const {
+ MachineInstr &MI = *I;
+ MachineBasicBlock &MBB = *MI.getParent();
+ MachineFunction &MF = *MBB.getParent();
+ ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+ int Off = Offset; // ARM doesn't need the general 64-bit offsets
+ unsigned i = 0;
+
+ assert(!AFI->isThumb1OnlyFunction() &&
+ "This resolveFrameIndex does not support Thumb1!");
+
+ while (!MI.getOperand(i).isFI()) {
+ ++i;
+ assert(i < MI.getNumOperands() && "Instr doesn't have FrameIndex operand!");
+ }
+ bool Done = false;
+ if (!AFI->isThumbFunction())
+ Done = rewriteARMFrameIndex(MI, i, BaseReg, Off, TII);
+ else {
+ assert(AFI->isThumb2Function());
+ Done = rewriteT2FrameIndex(MI, i, BaseReg, Off, TII);
+ }
+ assert (Done && "Unable to resolve frame index!");
+}
+
+bool ARMBaseRegisterInfo::isFrameOffsetLegal(const MachineInstr *MI,
+ int64_t Offset) const {
+ const TargetInstrDesc &Desc = MI->getDesc();
+ unsigned AddrMode = (Desc.TSFlags & ARMII::AddrModeMask);
+ unsigned i = 0;
+
+ while (!MI->getOperand(i).isFI()) {
+ ++i;
+ assert(i < MI->getNumOperands() &&"Instr doesn't have FrameIndex operand!");
+ }
+
+ // AddrMode4 and AddrMode6 cannot handle any offset.
+ if (AddrMode == ARMII::AddrMode4 || AddrMode == ARMII::AddrMode6)
+ return Offset == 0;
+
+ unsigned NumBits = 0;
+ unsigned Scale = 1;
+ bool isSigned = true;
+ switch (AddrMode) {
+ case ARMII::AddrModeT2_i8:
+ case ARMII::AddrModeT2_i12:
+ // i8 supports only negative, and i12 supports only positive, so
+ // based on Offset sign, consider the appropriate instruction
+ Scale = 1;
+ if (Offset < 0) {
+ NumBits = 8;
+ Offset = -Offset;
+ } else {
+ NumBits = 12;
+ }
+ break;
+ case ARMII::AddrMode5:
+ // VFP address mode.
+ NumBits = 8;
+ Scale = 4;
+ break;
+ case ARMII::AddrMode2:
+ NumBits = 12;
+ break;
+ case ARMII::AddrMode3:
+ NumBits = 8;
+ break;
+ case ARMII::AddrModeT1_s:
+ NumBits = 5;
+ Scale = 4;
+ isSigned = false;
+ break;
+ default:
+ llvm_unreachable("Unsupported addressing mode!");
+ break;
+ }
+
+ Offset += getFrameIndexInstrOffset(MI, i);
+ // Make sure the offset is encodable for instructions that scale the
+ // immediate.
+ if ((Offset & (Scale-1)) != 0)
+ return false;
+
+ if (isSigned && Offset < 0)
+ Offset = -Offset;
+
+ unsigned Mask = (1 << NumBits) - 1;
+ if ((unsigned)Offset <= Mask * Scale)
+ return true;
+
+ return false;
+}
+
+void
ARMBaseRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
- int SPAdj, FrameIndexValue *Value,
- RegScavenger *RS) const {
+ int SPAdj, RegScavenger *RS) const {
unsigned i = 0;
MachineInstr &MI = *II;
MachineBasicBlock &MBB = *MI.getParent();
@@ -1325,16 +1691,13 @@ ARMBaseRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
int FrameIndex = MI.getOperand(i).getIndex();
unsigned FrameReg;
- int Offset = getFrameIndexReference(MF, FrameIndex, FrameReg);
- if (FrameReg != ARM::SP)
- SPAdj = 0;
- Offset += SPAdj;
+ int Offset = ResolveFrameIndexReference(MF, FrameIndex, FrameReg, SPAdj);
// Special handling of dbg_value instructions.
if (MI.isDebugValue()) {
MI.getOperand(i). ChangeToRegister(FrameReg, false /*isDef*/);
MI.getOperand(i+1).ChangeToImmediate(Offset);
- return 0;
+ return;
}
// Modify MI as necessary to handle as much of 'Offset' as possible
@@ -1346,7 +1709,7 @@ ARMBaseRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
Done = rewriteT2FrameIndex(MI, i, FrameReg, Offset, TII);
}
if (Done)
- return 0;
+ return;
// If we get here, the immediate doesn't fit into the instruction. We folded
// as much as possible above, handle the rest, providing a register that is
@@ -1366,10 +1729,6 @@ ARMBaseRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
MI.getOperand(i).ChangeToRegister(FrameReg, false, false, false);
else {
ScratchReg = MF.getRegInfo().createVirtualRegister(ARM::GPRRegisterClass);
- if (Value) {
- Value->first = FrameReg; // use the frame register as a kind indicator
- Value->second = Offset;
- }
if (!AFI->isThumbFunction())
emitARMRegPlusImmediate(MBB, II, MI.getDebugLoc(), ScratchReg, FrameReg,
Offset, Pred, PredReg, TII);
@@ -1379,10 +1738,7 @@ ARMBaseRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
Offset, Pred, PredReg, TII);
}
MI.getOperand(i).ChangeToRegister(ScratchReg, false, false, true);
- if (!ReuseFrameIndexVals)
- ScratchReg = 0;
}
- return ScratchReg;
}
/// Move iterator past the next bunch of callee save load / store ops for
@@ -1494,7 +1850,8 @@ emitPrologue(MachineFunction &MF) const {
// Otherwise, if this is not Darwin, all the callee-saved registers go
// into spill area 1, including the FP in R11. In either case, it is
// now safe to emit this assignment.
- if (STI.isTargetDarwin() || hasFP(MF)) {
+ bool HasFP = hasFP(MF);
+ if (HasFP) {
unsigned ADDriOpc = !AFI->isThumbFunction() ? ARM::ADDri : ARM::t2ADDri;
MachineInstrBuilder MIB =
BuildMI(MBB, MBBI, dl, TII.get(ADDriOpc), FramePtr)
@@ -1513,7 +1870,7 @@ emitPrologue(MachineFunction &MF) const {
unsigned DPRCSOffset = NumBytes - (GPRCS1Size + GPRCS2Size + DPRCSSize);
unsigned GPRCS2Offset = DPRCSOffset + DPRCSSize;
unsigned GPRCS1Offset = GPRCS2Offset + GPRCS2Size;
- if (STI.isTargetDarwin() || hasFP(MF))
+ if (HasFP)
AFI->setFramePtrSpillOffset(MFI->getObjectOffset(FramePtrSpillFI) +
NumBytes);
AFI->setGPRCalleeSavedArea1Offset(GPRCS1Offset);
@@ -1525,18 +1882,22 @@ emitPrologue(MachineFunction &MF) const {
if (NumBytes) {
// Adjust SP after all the callee-save spills.
emitSPUpdate(isARM, MBB, MBBI, dl, TII, -NumBytes);
+ if (HasFP)
+ AFI->setShouldRestoreSPFromFP(true);
}
if (STI.isTargetELF() && hasFP(MF)) {
MFI->setOffsetAdjustment(MFI->getOffsetAdjustment() -
AFI->getFramePtrSpillOffset());
+ AFI->setShouldRestoreSPFromFP(true);
}
AFI->setGPRCalleeSavedArea1Size(GPRCS1Size);
AFI->setGPRCalleeSavedArea2Size(GPRCS2Size);
AFI->setDPRCalleeSavedAreaSize(DPRCSSize);
- // If we need dynamic stack realignment, do it here.
+ // If we need dynamic stack realignment, do it here. Be paranoid and make
+ // sure if we also have VLAs, we have a base pointer for frame access.
if (needsStackRealignment(MF)) {
unsigned MaxAlign = MFI->getMaxAlignment();
assert (!AFI->isThumb1OnlyFunction());
@@ -1562,7 +1923,28 @@ emitPrologue(MachineFunction &MF) const {
BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVtgpr2gpr), ARM::SP)
.addReg(ARM::R4, RegState::Kill);
}
+
+ AFI->setShouldRestoreSPFromFP(true);
+ }
+
+ // If we need a base pointer, set it up here. It's whatever the value
+ // of the stack pointer is at this point. Any variable size objects
+ // will be allocated after this, so we can still use the base pointer
+ // to reference locals.
+ if (hasBasePointer(MF)) {
+ if (isARM)
+ BuildMI(MBB, MBBI, dl, TII.get(ARM::MOVr), BasePtr)
+ .addReg(ARM::SP)
+ .addImm((unsigned)ARMCC::AL).addReg(0).addReg(0);
+ else
+ BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVgpr2gpr), BasePtr)
+ .addReg(ARM::SP);
}
+
+ // If the frame has variable sized objects then the epilogue must restore
+ // the sp from fp.
+ if (!AFI->shouldRestoreSPFromFP() && MFI->hasVarSizedObjects())
+ AFI->setShouldRestoreSPFromFP(true);
}
static bool isCalleeSavedRegister(unsigned Reg, const unsigned *CSRegs) {
@@ -1617,34 +1999,25 @@ emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const {
AFI->getGPRCalleeSavedArea2Size() +
AFI->getDPRCalleeSavedAreaSize());
- // Darwin ABI requires FP to point to the stack slot that contains the
- // previous FP.
- bool HasFP = hasFP(MF);
- if ((STI.isTargetDarwin() && NumBytes) || HasFP) {
+ // Reset SP based on frame pointer only if the stack frame extends beyond
+ // frame pointer stack slot or target is ELF and the function has FP.
+ if (AFI->shouldRestoreSPFromFP()) {
NumBytes = AFI->getFramePtrSpillOffset() - NumBytes;
- // Reset SP based on frame pointer only if the stack frame extends beyond
- // frame pointer stack slot or target is ELF and the function has FP.
- if (HasFP ||
- AFI->getGPRCalleeSavedArea2Size() ||
- AFI->getDPRCalleeSavedAreaSize() ||
- AFI->getDPRCalleeSavedAreaOffset()) {
- if (NumBytes) {
- if (isARM)
- emitARMRegPlusImmediate(MBB, MBBI, dl, ARM::SP, FramePtr, -NumBytes,
- ARMCC::AL, 0, TII);
- else
- emitT2RegPlusImmediate(MBB, MBBI, dl, ARM::SP, FramePtr, -NumBytes,
- ARMCC::AL, 0, TII);
- } else {
- // Thumb2 or ARM.
- if (isARM)
- BuildMI(MBB, MBBI, dl, TII.get(ARM::MOVr), ARM::SP)
- .addReg(FramePtr)
- .addImm((unsigned)ARMCC::AL).addReg(0).addReg(0);
- else
- BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVgpr2gpr), ARM::SP)
- .addReg(FramePtr);
- }
+ if (NumBytes) {
+ if (isARM)
+ emitARMRegPlusImmediate(MBB, MBBI, dl, ARM::SP, FramePtr, -NumBytes,
+ ARMCC::AL, 0, TII);
+ else
+ emitT2RegPlusImmediate(MBB, MBBI, dl, ARM::SP, FramePtr, -NumBytes,
+ ARMCC::AL, 0, TII);
+ } else {
+ // Thumb2 or ARM.
+ if (isARM)
+ BuildMI(MBB, MBBI, dl, TII.get(ARM::MOVr), ARM::SP)
+ .addReg(FramePtr).addImm((unsigned)ARMCC::AL).addReg(0).addReg(0);
+ else
+ BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVgpr2gpr), ARM::SP)
+ .addReg(FramePtr);
}
} else if (NumBytes)
emitSPUpdate(isARM, MBB, MBBI, dl, TII, NumBytes);
@@ -1670,7 +2043,7 @@ emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const {
// Jump to label or value in register.
if (RetOpcode == ARM::TCRETURNdi) {
- BuildMI(MBB, MBBI, dl,
+ BuildMI(MBB, MBBI, dl,
TII.get(STI.isThumb() ? ARM::TAILJMPdt : ARM::TAILJMPd)).
addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset(),
JumpTarget.getTargetFlags());
@@ -1685,7 +2058,7 @@ emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const {
} else if (RetOpcode == ARM::TCRETURNriND) {
BuildMI(MBB, MBBI, dl, TII.get(ARM::TAILJMPrND)).
addReg(JumpTarget.getReg(), RegState::Kill);
- }
+ }
MachineInstr *NewMI = prior(MBBI);
for (unsigned i = 1, e = MBBI->getNumOperands(); i != e; ++i)
diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.h b/lib/Target/ARM/ARMBaseRegisterInfo.h
index f7ee0d5cc66d..fa2eb6c10498 100644
--- a/lib/Target/ARM/ARMBaseRegisterInfo.h
+++ b/lib/Target/ARM/ARMBaseRegisterInfo.h
@@ -44,7 +44,7 @@ static inline bool isARMLowRegister(unsigned Reg) {
}
}
-struct ARMBaseRegisterInfo : public ARMGenRegisterInfo {
+class ARMBaseRegisterInfo : public ARMGenRegisterInfo {
protected:
const ARMBaseInstrInfo &TII;
const ARMSubtarget &STI;
@@ -52,6 +52,11 @@ protected:
/// FramePtr - ARM physical register used as frame ptr.
unsigned FramePtr;
+ /// BasePtr - ARM physical register used as a base ptr in complex stack
+ /// frames. I.e., when we need a 3rd base, not just SP and FP, due to
+ /// variable size stack objects.
+ unsigned BasePtr;
+
// Can be only subclassed.
explicit ARMBaseRegisterInfo(const ARMBaseInstrInfo &tii,
const ARMSubtarget &STI);
@@ -102,9 +107,18 @@ public:
MachineFunction &MF) const;
bool hasFP(const MachineFunction &MF) const;
+ bool hasBasePointer(const MachineFunction &MF) const;
bool canRealignStack(const MachineFunction &MF) const;
bool needsStackRealignment(const MachineFunction &MF) const;
+ int64_t getFrameIndexInstrOffset(const MachineInstr *MI, int Idx) const;
+ bool needsFrameBaseReg(MachineInstr *MI, int64_t Offset) const;
+ void materializeFrameBaseRegister(MachineBasicBlock::iterator I,
+ unsigned BaseReg, int FrameIdx,
+ int64_t Offset) const;
+ void resolveFrameIndex(MachineBasicBlock::iterator I,
+ unsigned BaseReg, int64_t Offset) const;
+ bool isFrameOffsetLegal(const MachineInstr *MI, int64_t Offset) const;
bool cannotEliminateFrame(const MachineFunction &MF) const;
@@ -116,6 +130,8 @@ public:
unsigned getFrameRegister(const MachineFunction &MF) const;
int getFrameIndexReference(const MachineFunction &MF, int FI,
unsigned &FrameReg) const;
+ int ResolveFrameIndexReference(const MachineFunction &MF, int FI,
+ unsigned &FrameReg, int SPAdj) const;
int getFrameIndexOffset(const MachineFunction &MF, int FI) const;
// Exception handling queries.
@@ -144,16 +160,17 @@ public:
virtual bool requiresFrameIndexScavenging(const MachineFunction &MF) const;
- virtual bool hasReservedCallFrame(MachineFunction &MF) const;
- virtual bool canSimplifyCallFramePseudos(MachineFunction &MF) const;
+ virtual bool requiresVirtualBaseRegisters(const MachineFunction &MF) const;
+
+ virtual bool hasReservedCallFrame(const MachineFunction &MF) const;
+ virtual bool canSimplifyCallFramePseudos(const MachineFunction &MF) const;
virtual void eliminateCallFramePseudoInstr(MachineFunction &MF,
MachineBasicBlock &MBB,
MachineBasicBlock::iterator I) const;
- virtual unsigned eliminateFrameIndex(MachineBasicBlock::iterator II,
- int SPAdj, FrameIndexValue *Value = NULL,
- RegScavenger *RS = NULL) const;
+ virtual void eliminateFrameIndex(MachineBasicBlock::iterator II,
+ int SPAdj, RegScavenger *RS = NULL) const;
virtual void emitPrologue(MachineFunction &MF) const;
virtual void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const;
diff --git a/lib/Target/ARM/ARMCallingConv.td b/lib/Target/ARM/ARMCallingConv.td
index 8fdb07f81626..293e32aa5376 100644
--- a/lib/Target/ARM/ARMCallingConv.td
+++ b/lib/Target/ARM/ARMCallingConv.td
@@ -1,4 +1,4 @@
-//===- ARMCallingConv.td - Calling Conventions for ARM ----------*- C++ -*-===//
+//===- ARMCallingConv.td - Calling Conventions for ARM -----*- tablegen -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -68,7 +68,7 @@ def CC_ARM_AAPCS_Common : CallingConv<[
"ArgFlags.getOrigAlign() != 8",
CCAssignToReg<[R0, R1, R2, R3]>>>,
- CCIfType<[i32], CCIfAlign<"8", CCAssignToStack<4, 8>>>,
+ CCIfType<[i32], CCIfAlign<"8", CCAssignToStackWithShadow<4, 8, R3>>>,
CCIfType<[i32, f32], CCAssignToStack<4, 4>>,
CCIfType<[f64], CCAssignToStack<8, 8>>,
CCIfType<[v2f64], CCAssignToStack<16, 8>>
diff --git a/lib/Target/ARM/ARMCodeEmitter.cpp b/lib/Target/ARM/ARMCodeEmitter.cpp
index 7895cb071922..b1a702f90cfc 100644
--- a/lib/Target/ARM/ARMCodeEmitter.cpp
+++ b/lib/Target/ARM/ARMCodeEmitter.cpp
@@ -65,7 +65,7 @@ namespace {
static char ID;
public:
ARMCodeEmitter(TargetMachine &tm, JITCodeEmitter &mce)
- : MachineFunctionPass(&ID), JTI(0),
+ : MachineFunctionPass(ID), JTI(0),
II((const ARMInstrInfo *)tm.getInstrInfo()),
TD(tm.getTargetData()), TM(tm),
MCE(mce), MCPEs(0), MJTEs(0),
@@ -124,6 +124,8 @@ namespace {
void emitMiscArithInstruction(const MachineInstr &MI);
+ void emitSaturateInstruction(const MachineInstr &MI);
+
void emitBranchInstruction(const MachineInstr &MI);
void emitInlineJumpTable(unsigned JTIndex);
@@ -389,6 +391,9 @@ void ARMCodeEmitter::emitInstruction(const MachineInstr &MI) {
case ARMII::ArithMiscFrm:
emitMiscArithInstruction(MI);
break;
+ case ARMII::SatFrm:
+ emitSaturateInstruction(MI);
+ break;
case ARMII::BrFrm:
emitBranchInstruction(MI);
break;
@@ -654,6 +659,19 @@ void ARMCodeEmitter::emitPseudoInstruction(const MachineInstr &MI) {
switch (Opcode) {
default:
llvm_unreachable("ARMCodeEmitter::emitPseudoInstruction");
+ case ARM::BX:
+ case ARM::BMOVPCRX:
+ case ARM::BXr9:
+ case ARM::BMOVPCRXr9: {
+ // First emit mov lr, pc
+ unsigned Binary = 0x01a0e00f;
+ Binary |= II->getPredicate(&MI) << ARMII::CondShift;
+ emitWordLE(Binary);
+
+ // and then emit the branch.
+ emitMiscBranchInstruction(MI);
+ break;
+ }
case TargetOpcode::INLINEASM: {
// We allow inline assembler nodes with empty bodies - they can
// implicitly define registers, which is ok for JIT.
@@ -662,7 +680,7 @@ void ARMCodeEmitter::emitPseudoInstruction(const MachineInstr &MI) {
}
break;
}
- case TargetOpcode::DBG_LABEL:
+ case TargetOpcode::PROLOG_LABEL:
case TargetOpcode::EH_LABEL:
MCE.emitLabel(MI.getOperand(0).getMCSymbol());
break;
@@ -1209,12 +1227,58 @@ void ARMCodeEmitter::emitMiscArithInstruction(const MachineInstr &MI) {
// Encode shift_imm.
unsigned ShiftAmt = MI.getOperand(OpIdx).getImm();
+ if (TID.Opcode == ARM::PKHTB) {
+ assert(ShiftAmt != 0 && "PKHTB shift_imm is 0!");
+ if (ShiftAmt == 32)
+ ShiftAmt = 0;
+ }
assert(ShiftAmt < 32 && "shift_imm range is 0 to 31!");
Binary |= ShiftAmt << ARMII::ShiftShift;
emitWordLE(Binary);
}
+void ARMCodeEmitter::emitSaturateInstruction(const MachineInstr &MI) {
+ const TargetInstrDesc &TID = MI.getDesc();
+
+ // Part of binary is determined by TableGen.
+ unsigned Binary = getBinaryCodeForInstr(MI);
+
+ // Set the conditional execution predicate
+ Binary |= II->getPredicate(&MI) << ARMII::CondShift;
+
+ // Encode Rd
+ Binary |= getMachineOpValue(MI, 0) << ARMII::RegRdShift;
+
+ // Encode saturate bit position.
+ unsigned Pos = MI.getOperand(1).getImm();
+ if (TID.Opcode == ARM::SSAT || TID.Opcode == ARM::SSAT16)
+ Pos -= 1;
+ assert((Pos < 16 || (Pos < 32 &&
+ TID.Opcode != ARM::SSAT16 &&
+ TID.Opcode != ARM::USAT16)) &&
+ "saturate bit position out of range");
+ Binary |= Pos << 16;
+
+ // Encode Rm
+ Binary |= getMachineOpValue(MI, 2);
+
+ // Encode shift_imm.
+ if (TID.getNumOperands() == 4) {
+ unsigned ShiftOp = MI.getOperand(3).getImm();
+ ARM_AM::ShiftOpc Opc = ARM_AM::getSORegShOp(ShiftOp);
+ if (Opc == ARM_AM::asr)
+ Binary |= (1 << 6);
+ unsigned ShiftAmt = MI.getOperand(3).getImm();
+ if (ShiftAmt == 32 && Opc == ARM_AM::asr)
+ ShiftAmt = 0;
+ assert(ShiftAmt < 32 && "shift_imm range is 0 to 31!");
+ Binary |= ShiftAmt << ARMII::ShiftShift;
+ }
+
+ emitWordLE(Binary);
+}
+
void ARMCodeEmitter::emitBranchInstruction(const MachineInstr &MI) {
const TargetInstrDesc &TID = MI.getDesc();
@@ -1485,7 +1549,7 @@ ARMCodeEmitter::emitVFPLoadStoreMultipleInstruction(const MachineInstr &MI) {
// Set addressing mode by modifying bits U(23) and P(24)
const MachineOperand &MO = MI.getOperand(OpIdx++);
- Binary |= getAddrModeUPBits(ARM_AM::getAM5SubMode(MO.getImm()));
+ Binary |= getAddrModeUPBits(ARM_AM::getAM4SubMode(MO.getImm()));
// Set bit W(21)
if (IsUpdating)
@@ -1494,7 +1558,7 @@ ARMCodeEmitter::emitVFPLoadStoreMultipleInstruction(const MachineInstr &MI) {
// First register is encoded in Dd.
Binary |= encodeVFPRd(MI, OpIdx+2);
- // Number of registers are encoded in offset field.
+ // Count the number of registers.
unsigned NumRegs = 1;
for (unsigned i = OpIdx+3, e = MI.getNumOperands(); i != e; ++i) {
const MachineOperand &MO = MI.getOperand(i);
diff --git a/lib/Target/ARM/ARMConstantIslandPass.cpp b/lib/Target/ARM/ARMConstantIslandPass.cpp
index 65a3da6f1617..60e923bd2c38 100644
--- a/lib/Target/ARM/ARMConstantIslandPass.cpp
+++ b/lib/Target/ARM/ARMConstantIslandPass.cpp
@@ -18,9 +18,9 @@
#include "ARMAddressingModes.h"
#include "ARMMachineFunctionInfo.h"
#include "ARMInstrInfo.h"
+#include "Thumb2InstrInfo.h"
#include "llvm/CodeGen/MachineConstantPool.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineJumpTableInfo.h"
#include "llvm/Target/TargetData.h"
#include "llvm/Target/TargetMachine.h"
@@ -165,7 +165,7 @@ namespace {
/// HasInlineAsm - True if the function contains inline assembly.
bool HasInlineAsm;
- const TargetInstrInfo *TII;
+ const ARMInstrInfo *TII;
const ARMSubtarget *STI;
ARMFunctionInfo *AFI;
bool isThumb;
@@ -173,7 +173,7 @@ namespace {
bool isThumb2;
public:
static char ID;
- ARMConstantIslands() : MachineFunctionPass(&ID) {}
+ ARMConstantIslands() : MachineFunctionPass(ID) {}
virtual bool runOnMachineFunction(MachineFunction &MF);
@@ -272,7 +272,7 @@ FunctionPass *llvm::createARMConstantIslandPass() {
bool ARMConstantIslands::runOnMachineFunction(MachineFunction &MF) {
MachineConstantPool &MCP = *MF.getConstantPool();
- TII = MF.getTarget().getInstrInfo();
+ TII = (const ARMInstrInfo*)MF.getTarget().getInstrInfo();
AFI = MF.getInfo<ARMFunctionInfo>();
STI = &MF.getTarget().getSubtarget<ARMSubtarget>();
@@ -323,6 +323,8 @@ bool ARMConstantIslands::runOnMachineFunction(MachineFunction &MF) {
// constant pool users.
InitialFunctionScan(MF, CPEMIs);
CPEMIs.clear();
+ DEBUG(dumpBBs());
+
/// Remove dead constant pool entries.
RemoveUnusedCPEntries();
@@ -355,7 +357,7 @@ bool ARMConstantIslands::runOnMachineFunction(MachineFunction &MF) {
}
// Shrink 32-bit Thumb2 branch, load, and store instructions.
- if (isThumb2)
+ if (isThumb2 && !STI->prefers32BitThumb())
MadeChange |= OptimizeThumb2Instructions(MF);
// After a while, this might be made debug-only, but it is not expensive.
@@ -366,6 +368,8 @@ bool ARMConstantIslands::runOnMachineFunction(MachineFunction &MF) {
if (isThumb && !HasFarJump && AFI->isLRSpilledForFarJump())
MadeChange |= UndoLRSpillRestore();
+ DEBUG(errs() << '\n'; dumpBBs());
+
BBSizes.clear();
BBOffsets.clear();
WaterList.clear();
@@ -509,6 +513,10 @@ void ARMConstantIslands::InitialFunctionScan(MachineFunction &MF,
case ARM::tBR_JTr:
// A Thumb1 table jump may involve padding; for the offsets to
// be right, functions containing these must be 4-byte aligned.
+ // tBR_JTr expands to a mov pc followed by .align 2 and then the jump
+ // table entries. So this code checks whether offset of tBR_JTr + 2
+ // is aligned. That is held in Offset+MBBSize, which already has
+ // 2 added in for the size of the mov pc instruction.
MF.EnsureAlignment(2U);
if ((Offset+MBBSize)%4 != 0 || HasInlineAsm)
// FIXME: Add a pseudo ALIGN instruction instead.
@@ -768,28 +776,54 @@ MachineBasicBlock *ARMConstantIslands::SplitBlockBeforeInstr(MachineInstr *MI) {
WaterList.insert(IP, OrigBB);
NewWaterList.insert(OrigBB);
- // Figure out how large the first NewMBB is. (It cannot
- // contain a constpool_entry or tablejump.)
- unsigned NewBBSize = 0;
- for (MachineBasicBlock::iterator I = NewBB->begin(), E = NewBB->end();
- I != E; ++I)
- NewBBSize += TII->GetInstSizeInBytes(I);
-
unsigned OrigBBI = OrigBB->getNumber();
unsigned NewBBI = NewBB->getNumber();
- // Set the size of NewBB in BBSizes.
- BBSizes[NewBBI] = NewBBSize;
- // We removed instructions from UserMBB, subtract that off from its size.
- // Add 2 or 4 to the block to count the unconditional branch we added to it.
int delta = isThumb1 ? 2 : 4;
- BBSizes[OrigBBI] -= NewBBSize - delta;
+
+ // Figure out how large the OrigBB is. As the first half of the original
+ // block, it cannot contain a tablejump. The size includes
+ // the new jump we added. (It should be possible to do this without
+ // recounting everything, but it's very confusing, and this is rarely
+ // executed.)
+ unsigned OrigBBSize = 0;
+ for (MachineBasicBlock::iterator I = OrigBB->begin(), E = OrigBB->end();
+ I != E; ++I)
+ OrigBBSize += TII->GetInstSizeInBytes(I);
+ BBSizes[OrigBBI] = OrigBBSize;
// ...and adjust BBOffsets for NewBB accordingly.
BBOffsets[NewBBI] = BBOffsets[OrigBBI] + BBSizes[OrigBBI];
+ // Figure out how large the NewMBB is. As the second half of the original
+ // block, it may contain a tablejump.
+ unsigned NewBBSize = 0;
+ for (MachineBasicBlock::iterator I = NewBB->begin(), E = NewBB->end();
+ I != E; ++I)
+ NewBBSize += TII->GetInstSizeInBytes(I);
+ // Set the size of NewBB in BBSizes. It does not include any padding now.
+ BBSizes[NewBBI] = NewBBSize;
+
+ MachineInstr* ThumbJTMI = prior(NewBB->end());
+ if (ThumbJTMI->getOpcode() == ARM::tBR_JTr) {
+ // We've added another 2-byte instruction before this tablejump, which
+ // means we will always need padding if we didn't before, and vice versa.
+
+ // The original offset of the jump instruction was:
+ unsigned OrigOffset = BBOffsets[OrigBBI] + BBSizes[OrigBBI] - delta;
+ if (OrigOffset%4 == 0) {
+ // We had padding before and now we don't. No net change in code size.
+ delta = 0;
+ } else {
+ // We didn't have padding before and now we do.
+ BBSizes[NewBBI] += 2;
+ delta = 4;
+ }
+ }
+
// All BBOffsets following these blocks must be modified.
- AdjustBBOffsetsAfter(NewBB, delta);
+ if (delta)
+ AdjustBBOffsetsAfter(NewBB, delta);
return NewBB;
}
@@ -915,6 +949,10 @@ void ARMConstantIslands::AdjustBBOffsetsAfter(MachineBasicBlock *BB,
}
// Thumb1 jump tables require padding. They should be at the end;
// following unconditional branches are removed by AnalyzeBranch.
+ // tBR_JTr expands to a mov pc followed by .align 2 and then the jump
+ // table entries. So this code checks whether offset of tBR_JTr
+ // is aligned; if it is, the offset of the jump table following the
+ // instruction will not be aligned, and we need padding.
MachineInstr *ThumbJTMI = prior(MBB->end());
if (ThumbJTMI->getOpcode() == ARM::tBR_JTr) {
unsigned NewMIOffset = GetOffsetOf(ThumbJTMI);
@@ -1143,11 +1181,13 @@ void ARMConstantIslands::CreateNewWater(unsigned CPUserIndex,
MachineBasicBlock::iterator MI = UserMI;
++MI;
unsigned CPUIndex = CPUserIndex+1;
+ unsigned NumCPUsers = CPUsers.size();
+ MachineInstr *LastIT = 0;
for (unsigned Offset = UserOffset+TII->GetInstSizeInBytes(UserMI);
Offset < BaseInsertOffset;
Offset += TII->GetInstSizeInBytes(MI),
- MI = llvm::next(MI)) {
- if (CPUIndex < CPUsers.size() && CPUsers[CPUIndex].MI == MI) {
+ MI = llvm::next(MI)) {
+ if (CPUIndex < NumCPUsers && CPUsers[CPUIndex].MI == MI) {
CPUser &U = CPUsers[CPUIndex];
if (!OffsetIsInRange(Offset, EndInsertOffset,
U.MaxDisp, U.NegOk, U.IsSoImm)) {
@@ -1159,9 +1199,23 @@ void ARMConstantIslands::CreateNewWater(unsigned CPUserIndex,
EndInsertOffset += CPUsers[CPUIndex].CPEMI->getOperand(2).getImm();
CPUIndex++;
}
+
+ // Remember the last IT instruction.
+ if (MI->getOpcode() == ARM::t2IT)
+ LastIT = MI;
}
+
DEBUG(errs() << "Split in middle of big block\n");
- NewMBB = SplitBlockBeforeInstr(prior(MI));
+ --MI;
+
+ // Avoid splitting an IT block.
+ if (LastIT) {
+ unsigned PredReg = 0;
+ ARMCC::CondCodes CC = llvm::getITInstrPredicate(MI, PredReg);
+ if (CC != ARMCC::AL)
+ MI = LastIT;
+ }
+ NewMBB = SplitBlockBeforeInstr(MI);
}
}
diff --git a/lib/Target/ARM/ARMExpandPseudoInsts.cpp b/lib/Target/ARM/ARMExpandPseudoInsts.cpp
index 9c62597b4323..fc2e3c3fadae 100644
--- a/lib/Target/ARM/ARMExpandPseudoInsts.cpp
+++ b/lib/Target/ARM/ARMExpandPseudoInsts.cpp
@@ -19,14 +19,21 @@
#include "ARMBaseInstrInfo.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
-
+#include "llvm/Target/TargetRegisterInfo.h"
using namespace llvm;
namespace {
class ARMExpandPseudo : public MachineFunctionPass {
+ // Constants for register spacing in NEON load/store instructions.
+ enum NEONRegSpacing {
+ SingleSpc,
+ EvenDblSpc,
+ OddDblSpc
+ };
+
public:
static char ID;
- ARMExpandPseudo() : MachineFunctionPass(&ID) {}
+ ARMExpandPseudo() : MachineFunctionPass(ID) {}
const TargetInstrInfo *TII;
const TargetRegisterInfo *TRI;
@@ -41,6 +48,10 @@ namespace {
void TransferImpOps(MachineInstr &OldMI,
MachineInstrBuilder &UseMI, MachineInstrBuilder &DefMI);
bool ExpandMBB(MachineBasicBlock &MBB);
+ void ExpandVLD(MachineBasicBlock::iterator &MBBI, unsigned Opc,
+ bool hasWriteBack, NEONRegSpacing RegSpc, unsigned NumRegs);
+ void ExpandVST(MachineBasicBlock::iterator &MBBI, unsigned Opc,
+ bool hasWriteBack, NEONRegSpacing RegSpc, unsigned NumRegs);
};
char ARMExpandPseudo::ID = 0;
}
@@ -63,6 +74,129 @@ void ARMExpandPseudo::TransferImpOps(MachineInstr &OldMI,
}
}
+/// ExpandVLD - Translate VLD pseudo instructions with Q, QQ or QQQQ register
+/// operands to real VLD instructions with D register operands.
+void ARMExpandPseudo::ExpandVLD(MachineBasicBlock::iterator &MBBI,
+ unsigned Opc, bool hasWriteBack,
+ NEONRegSpacing RegSpc, unsigned NumRegs) {
+ MachineInstr &MI = *MBBI;
+ MachineBasicBlock &MBB = *MI.getParent();
+
+ MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opc));
+ unsigned OpIdx = 0;
+
+ bool DstIsDead = MI.getOperand(OpIdx).isDead();
+ unsigned DstReg = MI.getOperand(OpIdx++).getReg();
+ unsigned D0, D1, D2, D3;
+ if (RegSpc == SingleSpc) {
+ D0 = TRI->getSubReg(DstReg, ARM::dsub_0);
+ D1 = TRI->getSubReg(DstReg, ARM::dsub_1);
+ D2 = TRI->getSubReg(DstReg, ARM::dsub_2);
+ D3 = TRI->getSubReg(DstReg, ARM::dsub_3);
+ } else if (RegSpc == EvenDblSpc) {
+ D0 = TRI->getSubReg(DstReg, ARM::dsub_0);
+ D1 = TRI->getSubReg(DstReg, ARM::dsub_2);
+ D2 = TRI->getSubReg(DstReg, ARM::dsub_4);
+ D3 = TRI->getSubReg(DstReg, ARM::dsub_6);
+ } else {
+ assert(RegSpc == OddDblSpc && "unknown register spacing for VLD");
+ D0 = TRI->getSubReg(DstReg, ARM::dsub_1);
+ D1 = TRI->getSubReg(DstReg, ARM::dsub_3);
+ D2 = TRI->getSubReg(DstReg, ARM::dsub_5);
+ D3 = TRI->getSubReg(DstReg, ARM::dsub_7);
+ }
+ MIB.addReg(D0, RegState::Define | getDeadRegState(DstIsDead))
+ .addReg(D1, RegState::Define | getDeadRegState(DstIsDead));
+ if (NumRegs > 2)
+ MIB.addReg(D2, RegState::Define | getDeadRegState(DstIsDead));
+ if (NumRegs > 3)
+ MIB.addReg(D3, RegState::Define | getDeadRegState(DstIsDead));
+
+ if (hasWriteBack) {
+ bool WBIsDead = MI.getOperand(OpIdx).isDead();
+ unsigned WBReg = MI.getOperand(OpIdx++).getReg();
+ MIB.addReg(WBReg, RegState::Define | getDeadRegState(WBIsDead));
+ }
+ // Copy the addrmode6 operands.
+ bool AddrIsKill = MI.getOperand(OpIdx).isKill();
+ MIB.addReg(MI.getOperand(OpIdx++).getReg(), getKillRegState(AddrIsKill));
+ MIB.addImm(MI.getOperand(OpIdx++).getImm());
+ if (hasWriteBack) {
+ // Copy the am6offset operand.
+ bool OffsetIsKill = MI.getOperand(OpIdx).isKill();
+ MIB.addReg(MI.getOperand(OpIdx++).getReg(), getKillRegState(OffsetIsKill));
+ }
+
+ MIB = AddDefaultPred(MIB);
+ TransferImpOps(MI, MIB, MIB);
+ // For an instruction writing the odd subregs, add an implicit use of the
+ // super-register because the even subregs were loaded separately.
+ if (RegSpc == OddDblSpc)
+ MIB.addReg(DstReg, RegState::Implicit);
+ // Add an implicit def for the super-register.
+ MIB.addReg(DstReg, RegState::ImplicitDefine | getDeadRegState(DstIsDead));
+ MI.eraseFromParent();
+}
+
+/// ExpandVST - Translate VST pseudo instructions with Q, QQ or QQQQ register
+/// operands to real VST instructions with D register operands.
+void ARMExpandPseudo::ExpandVST(MachineBasicBlock::iterator &MBBI,
+ unsigned Opc, bool hasWriteBack,
+ NEONRegSpacing RegSpc, unsigned NumRegs) {
+ MachineInstr &MI = *MBBI;
+ MachineBasicBlock &MBB = *MI.getParent();
+
+ MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opc));
+ unsigned OpIdx = 0;
+ if (hasWriteBack) {
+ bool DstIsDead = MI.getOperand(OpIdx).isDead();
+ unsigned DstReg = MI.getOperand(OpIdx++).getReg();
+ MIB.addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead));
+ }
+ // Copy the addrmode6 operands.
+ bool AddrIsKill = MI.getOperand(OpIdx).isKill();
+ MIB.addReg(MI.getOperand(OpIdx++).getReg(), getKillRegState(AddrIsKill));
+ MIB.addImm(MI.getOperand(OpIdx++).getImm());
+ if (hasWriteBack) {
+ // Copy the am6offset operand.
+ bool OffsetIsKill = MI.getOperand(OpIdx).isKill();
+ MIB.addReg(MI.getOperand(OpIdx++).getReg(), getKillRegState(OffsetIsKill));
+ }
+
+ bool SrcIsKill = MI.getOperand(OpIdx).isKill();
+ unsigned SrcReg = MI.getOperand(OpIdx).getReg();
+ unsigned D0, D1, D2, D3;
+ if (RegSpc == SingleSpc) {
+ D0 = TRI->getSubReg(SrcReg, ARM::dsub_0);
+ D1 = TRI->getSubReg(SrcReg, ARM::dsub_1);
+ D2 = TRI->getSubReg(SrcReg, ARM::dsub_2);
+ D3 = TRI->getSubReg(SrcReg, ARM::dsub_3);
+ } else if (RegSpc == EvenDblSpc) {
+ D0 = TRI->getSubReg(SrcReg, ARM::dsub_0);
+ D1 = TRI->getSubReg(SrcReg, ARM::dsub_2);
+ D2 = TRI->getSubReg(SrcReg, ARM::dsub_4);
+ D3 = TRI->getSubReg(SrcReg, ARM::dsub_6);
+ } else {
+ assert(RegSpc == OddDblSpc && "unknown register spacing for VST");
+ D0 = TRI->getSubReg(SrcReg, ARM::dsub_1);
+ D1 = TRI->getSubReg(SrcReg, ARM::dsub_3);
+ D2 = TRI->getSubReg(SrcReg, ARM::dsub_5);
+ D3 = TRI->getSubReg(SrcReg, ARM::dsub_7);
+ }
+
+ MIB.addReg(D0).addReg(D1);
+ if (NumRegs > 2)
+ MIB.addReg(D2);
+ if (NumRegs > 3)
+ MIB.addReg(D3);
+ MIB = AddDefaultPred(MIB);
+ TransferImpOps(MI, MIB, MIB);
+ if (SrcIsKill)
+ // Add an implicit kill for the super-reg.
+ (*MIB).addRegisterKilled(SrcReg, TRI, true);
+ MI.eraseFromParent();
+}
+
bool ARMExpandPseudo::ExpandMBB(MachineBasicBlock &MBB) {
bool Modified = false;
@@ -71,9 +205,13 @@ bool ARMExpandPseudo::ExpandMBB(MachineBasicBlock &MBB) {
MachineInstr &MI = *MBBI;
MachineBasicBlock::iterator NMBBI = llvm::next(MBBI);
+ bool ModifiedOp = true;
unsigned Opcode = MI.getOpcode();
switch (Opcode) {
- default: break;
+ default:
+ ModifiedOp = false;
+ break;
+
case ARM::tLDRpci_pic:
case ARM::t2LDRpci_pic: {
unsigned NewLdOpc = (Opcode == ARM::tLDRpci_pic)
@@ -92,10 +230,10 @@ bool ARMExpandPseudo::ExpandMBB(MachineBasicBlock &MBB) {
.addOperand(MI.getOperand(2));
TransferImpOps(MI, MIB1, MIB2);
MI.eraseFromParent();
- Modified = true;
break;
}
+ case ARM::MOVi32imm:
case ARM::t2MOVi32imm: {
unsigned PredReg = 0;
ARMCC::CondCodes Pred = llvm::getInstrPredicate(&MI, PredReg);
@@ -104,9 +242,13 @@ bool ARMExpandPseudo::ExpandMBB(MachineBasicBlock &MBB) {
const MachineOperand &MO = MI.getOperand(1);
MachineInstrBuilder LO16, HI16;
- LO16 = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::t2MOVi16),
+ LO16 = BuildMI(MBB, MBBI, MI.getDebugLoc(),
+ TII->get(Opcode == ARM::MOVi32imm ?
+ ARM::MOVi16 : ARM::t2MOVi16),
DstReg);
- HI16 = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::t2MOVTi16))
+ HI16 = BuildMI(MBB, MBBI, MI.getDebugLoc(),
+ TII->get(Opcode == ARM::MOVi32imm ?
+ ARM::MOVTi16 : ARM::t2MOVTi16))
.addReg(DstReg, getDefRegState(true) | getDeadRegState(DstIsDead))
.addReg(DstReg);
@@ -128,7 +270,6 @@ bool ARMExpandPseudo::ExpandMBB(MachineBasicBlock &MBB) {
HI16.addImm(Pred).addReg(PredReg);
TransferImpOps(MI, LO16, HI16);
MI.eraseFromParent();
- Modified = true;
break;
}
@@ -155,9 +296,211 @@ bool ARMExpandPseudo::ExpandMBB(MachineBasicBlock &MBB) {
.addReg(OddSrc, getKillRegState(SrcIsKill)));
TransferImpOps(MI, Even, Odd);
MI.eraseFromParent();
- Modified = true;
}
+
+ case ARM::VLD1q8Pseudo:
+ ExpandVLD(MBBI, ARM::VLD1q8, false, SingleSpc, 2); break;
+ case ARM::VLD1q16Pseudo:
+ ExpandVLD(MBBI, ARM::VLD1q16, false, SingleSpc, 2); break;
+ case ARM::VLD1q32Pseudo:
+ ExpandVLD(MBBI, ARM::VLD1q32, false, SingleSpc, 2); break;
+ case ARM::VLD1q64Pseudo:
+ ExpandVLD(MBBI, ARM::VLD1q64, false, SingleSpc, 2); break;
+ case ARM::VLD1q8Pseudo_UPD:
+ ExpandVLD(MBBI, ARM::VLD1q8, true, SingleSpc, 2); break;
+ case ARM::VLD1q16Pseudo_UPD:
+ ExpandVLD(MBBI, ARM::VLD1q16, true, SingleSpc, 2); break;
+ case ARM::VLD1q32Pseudo_UPD:
+ ExpandVLD(MBBI, ARM::VLD1q32, true, SingleSpc, 2); break;
+ case ARM::VLD1q64Pseudo_UPD:
+ ExpandVLD(MBBI, ARM::VLD1q64, true, SingleSpc, 2); break;
+
+ case ARM::VLD2d8Pseudo:
+ ExpandVLD(MBBI, ARM::VLD2d8, false, SingleSpc, 2); break;
+ case ARM::VLD2d16Pseudo:
+ ExpandVLD(MBBI, ARM::VLD2d16, false, SingleSpc, 2); break;
+ case ARM::VLD2d32Pseudo:
+ ExpandVLD(MBBI, ARM::VLD2d32, false, SingleSpc, 2); break;
+ case ARM::VLD2q8Pseudo:
+ ExpandVLD(MBBI, ARM::VLD2q8, false, SingleSpc, 4); break;
+ case ARM::VLD2q16Pseudo:
+ ExpandVLD(MBBI, ARM::VLD2q16, false, SingleSpc, 4); break;
+ case ARM::VLD2q32Pseudo:
+ ExpandVLD(MBBI, ARM::VLD2q32, false, SingleSpc, 4); break;
+ case ARM::VLD2d8Pseudo_UPD:
+ ExpandVLD(MBBI, ARM::VLD2d8, true, SingleSpc, 2); break;
+ case ARM::VLD2d16Pseudo_UPD:
+ ExpandVLD(MBBI, ARM::VLD2d16, true, SingleSpc, 2); break;
+ case ARM::VLD2d32Pseudo_UPD:
+ ExpandVLD(MBBI, ARM::VLD2d32, true, SingleSpc, 2); break;
+ case ARM::VLD2q8Pseudo_UPD:
+ ExpandVLD(MBBI, ARM::VLD2q8, true, SingleSpc, 4); break;
+ case ARM::VLD2q16Pseudo_UPD:
+ ExpandVLD(MBBI, ARM::VLD2q16, true, SingleSpc, 4); break;
+ case ARM::VLD2q32Pseudo_UPD:
+ ExpandVLD(MBBI, ARM::VLD2q32, true, SingleSpc, 4); break;
+
+ case ARM::VLD3d8Pseudo:
+ ExpandVLD(MBBI, ARM::VLD3d8, false, SingleSpc, 3); break;
+ case ARM::VLD3d16Pseudo:
+ ExpandVLD(MBBI, ARM::VLD3d16, false, SingleSpc, 3); break;
+ case ARM::VLD3d32Pseudo:
+ ExpandVLD(MBBI, ARM::VLD3d32, false, SingleSpc, 3); break;
+ case ARM::VLD1d64TPseudo:
+ ExpandVLD(MBBI, ARM::VLD1d64T, false, SingleSpc, 3); break;
+ case ARM::VLD3d8Pseudo_UPD:
+ ExpandVLD(MBBI, ARM::VLD3d8_UPD, true, SingleSpc, 3); break;
+ case ARM::VLD3d16Pseudo_UPD:
+ ExpandVLD(MBBI, ARM::VLD3d16_UPD, true, SingleSpc, 3); break;
+ case ARM::VLD3d32Pseudo_UPD:
+ ExpandVLD(MBBI, ARM::VLD3d32_UPD, true, SingleSpc, 3); break;
+ case ARM::VLD1d64TPseudo_UPD:
+ ExpandVLD(MBBI, ARM::VLD1d64T_UPD, true, SingleSpc, 3); break;
+ case ARM::VLD3q8Pseudo_UPD:
+ ExpandVLD(MBBI, ARM::VLD3q8_UPD, true, EvenDblSpc, 3); break;
+ case ARM::VLD3q16Pseudo_UPD:
+ ExpandVLD(MBBI, ARM::VLD3q16_UPD, true, EvenDblSpc, 3); break;
+ case ARM::VLD3q32Pseudo_UPD:
+ ExpandVLD(MBBI, ARM::VLD3q32_UPD, true, EvenDblSpc, 3); break;
+ case ARM::VLD3q8oddPseudo_UPD:
+ ExpandVLD(MBBI, ARM::VLD3q8_UPD, true, OddDblSpc, 3); break;
+ case ARM::VLD3q16oddPseudo_UPD:
+ ExpandVLD(MBBI, ARM::VLD3q16_UPD, true, OddDblSpc, 3); break;
+ case ARM::VLD3q32oddPseudo_UPD:
+ ExpandVLD(MBBI, ARM::VLD3q32_UPD, true, OddDblSpc, 3); break;
+
+ case ARM::VLD4d8Pseudo:
+ ExpandVLD(MBBI, ARM::VLD4d8, false, SingleSpc, 4); break;
+ case ARM::VLD4d16Pseudo:
+ ExpandVLD(MBBI, ARM::VLD4d16, false, SingleSpc, 4); break;
+ case ARM::VLD4d32Pseudo:
+ ExpandVLD(MBBI, ARM::VLD4d32, false, SingleSpc, 4); break;
+ case ARM::VLD1d64QPseudo:
+ ExpandVLD(MBBI, ARM::VLD1d64Q, false, SingleSpc, 4); break;
+ case ARM::VLD4d8Pseudo_UPD:
+ ExpandVLD(MBBI, ARM::VLD4d8_UPD, true, SingleSpc, 4); break;
+ case ARM::VLD4d16Pseudo_UPD:
+ ExpandVLD(MBBI, ARM::VLD4d16_UPD, true, SingleSpc, 4); break;
+ case ARM::VLD4d32Pseudo_UPD:
+ ExpandVLD(MBBI, ARM::VLD4d32_UPD, true, SingleSpc, 4); break;
+ case ARM::VLD1d64QPseudo_UPD:
+ ExpandVLD(MBBI, ARM::VLD1d64Q_UPD, true, SingleSpc, 4); break;
+ case ARM::VLD4q8Pseudo_UPD:
+ ExpandVLD(MBBI, ARM::VLD4q8_UPD, true, EvenDblSpc, 4); break;
+ case ARM::VLD4q16Pseudo_UPD:
+ ExpandVLD(MBBI, ARM::VLD4q16_UPD, true, EvenDblSpc, 4); break;
+ case ARM::VLD4q32Pseudo_UPD:
+ ExpandVLD(MBBI, ARM::VLD4q32_UPD, true, EvenDblSpc, 4); break;
+ case ARM::VLD4q8oddPseudo_UPD:
+ ExpandVLD(MBBI, ARM::VLD4q8_UPD, true, OddDblSpc, 4); break;
+ case ARM::VLD4q16oddPseudo_UPD:
+ ExpandVLD(MBBI, ARM::VLD4q16_UPD, true, OddDblSpc, 4); break;
+ case ARM::VLD4q32oddPseudo_UPD:
+ ExpandVLD(MBBI, ARM::VLD4q32_UPD, true, OddDblSpc, 4); break;
+
+ case ARM::VST1q8Pseudo:
+ ExpandVST(MBBI, ARM::VST1q8, false, SingleSpc, 2); break;
+ case ARM::VST1q16Pseudo:
+ ExpandVST(MBBI, ARM::VST1q16, false, SingleSpc, 2); break;
+ case ARM::VST1q32Pseudo:
+ ExpandVST(MBBI, ARM::VST1q32, false, SingleSpc, 2); break;
+ case ARM::VST1q64Pseudo:
+ ExpandVST(MBBI, ARM::VST1q64, false, SingleSpc, 2); break;
+ case ARM::VST1q8Pseudo_UPD:
+ ExpandVST(MBBI, ARM::VST1q8_UPD, true, SingleSpc, 2); break;
+ case ARM::VST1q16Pseudo_UPD:
+ ExpandVST(MBBI, ARM::VST1q16_UPD, true, SingleSpc, 2); break;
+ case ARM::VST1q32Pseudo_UPD:
+ ExpandVST(MBBI, ARM::VST1q32_UPD, true, SingleSpc, 2); break;
+ case ARM::VST1q64Pseudo_UPD:
+ ExpandVST(MBBI, ARM::VST1q64_UPD, true, SingleSpc, 2); break;
+
+ case ARM::VST2d8Pseudo:
+ ExpandVST(MBBI, ARM::VST2d8, false, SingleSpc, 2); break;
+ case ARM::VST2d16Pseudo:
+ ExpandVST(MBBI, ARM::VST2d16, false, SingleSpc, 2); break;
+ case ARM::VST2d32Pseudo:
+ ExpandVST(MBBI, ARM::VST2d32, false, SingleSpc, 2); break;
+ case ARM::VST2q8Pseudo:
+ ExpandVST(MBBI, ARM::VST2q8, false, SingleSpc, 4); break;
+ case ARM::VST2q16Pseudo:
+ ExpandVST(MBBI, ARM::VST2q16, false, SingleSpc, 4); break;
+ case ARM::VST2q32Pseudo:
+ ExpandVST(MBBI, ARM::VST2q32, false, SingleSpc, 4); break;
+ case ARM::VST2d8Pseudo_UPD:
+ ExpandVST(MBBI, ARM::VST2d8_UPD, true, SingleSpc, 2); break;
+ case ARM::VST2d16Pseudo_UPD:
+ ExpandVST(MBBI, ARM::VST2d16_UPD, true, SingleSpc, 2); break;
+ case ARM::VST2d32Pseudo_UPD:
+ ExpandVST(MBBI, ARM::VST2d32_UPD, true, SingleSpc, 2); break;
+ case ARM::VST2q8Pseudo_UPD:
+ ExpandVST(MBBI, ARM::VST2q8_UPD, true, SingleSpc, 4); break;
+ case ARM::VST2q16Pseudo_UPD:
+ ExpandVST(MBBI, ARM::VST2q16_UPD, true, SingleSpc, 4); break;
+ case ARM::VST2q32Pseudo_UPD:
+ ExpandVST(MBBI, ARM::VST2q32_UPD, true, SingleSpc, 4); break;
+
+ case ARM::VST3d8Pseudo:
+ ExpandVST(MBBI, ARM::VST3d8, false, SingleSpc, 3); break;
+ case ARM::VST3d16Pseudo:
+ ExpandVST(MBBI, ARM::VST3d16, false, SingleSpc, 3); break;
+ case ARM::VST3d32Pseudo:
+ ExpandVST(MBBI, ARM::VST3d32, false, SingleSpc, 3); break;
+ case ARM::VST1d64TPseudo:
+ ExpandVST(MBBI, ARM::VST1d64T, false, SingleSpc, 3); break;
+ case ARM::VST3d8Pseudo_UPD:
+ ExpandVST(MBBI, ARM::VST3d8_UPD, true, SingleSpc, 3); break;
+ case ARM::VST3d16Pseudo_UPD:
+ ExpandVST(MBBI, ARM::VST3d16_UPD, true, SingleSpc, 3); break;
+ case ARM::VST3d32Pseudo_UPD:
+ ExpandVST(MBBI, ARM::VST3d32_UPD, true, SingleSpc, 3); break;
+ case ARM::VST1d64TPseudo_UPD:
+ ExpandVST(MBBI, ARM::VST1d64T_UPD, true, SingleSpc, 3); break;
+ case ARM::VST3q8Pseudo_UPD:
+ ExpandVST(MBBI, ARM::VST3q8_UPD, true, EvenDblSpc, 3); break;
+ case ARM::VST3q16Pseudo_UPD:
+ ExpandVST(MBBI, ARM::VST3q16_UPD, true, EvenDblSpc, 3); break;
+ case ARM::VST3q32Pseudo_UPD:
+ ExpandVST(MBBI, ARM::VST3q32_UPD, true, EvenDblSpc, 3); break;
+ case ARM::VST3q8oddPseudo_UPD:
+ ExpandVST(MBBI, ARM::VST3q8_UPD, true, OddDblSpc, 3); break;
+ case ARM::VST3q16oddPseudo_UPD:
+ ExpandVST(MBBI, ARM::VST3q16_UPD, true, OddDblSpc, 3); break;
+ case ARM::VST3q32oddPseudo_UPD:
+ ExpandVST(MBBI, ARM::VST3q32_UPD, true, OddDblSpc, 3); break;
+
+ case ARM::VST4d8Pseudo:
+ ExpandVST(MBBI, ARM::VST4d8, false, SingleSpc, 4); break;
+ case ARM::VST4d16Pseudo:
+ ExpandVST(MBBI, ARM::VST4d16, false, SingleSpc, 4); break;
+ case ARM::VST4d32Pseudo:
+ ExpandVST(MBBI, ARM::VST4d32, false, SingleSpc, 4); break;
+ case ARM::VST1d64QPseudo:
+ ExpandVST(MBBI, ARM::VST1d64Q, false, SingleSpc, 4); break;
+ case ARM::VST4d8Pseudo_UPD:
+ ExpandVST(MBBI, ARM::VST4d8_UPD, true, SingleSpc, 4); break;
+ case ARM::VST4d16Pseudo_UPD:
+ ExpandVST(MBBI, ARM::VST4d16_UPD, true, SingleSpc, 4); break;
+ case ARM::VST4d32Pseudo_UPD:
+ ExpandVST(MBBI, ARM::VST4d32_UPD, true, SingleSpc, 4); break;
+ case ARM::VST1d64QPseudo_UPD:
+ ExpandVST(MBBI, ARM::VST1d64Q_UPD, true, SingleSpc, 4); break;
+ case ARM::VST4q8Pseudo_UPD:
+ ExpandVST(MBBI, ARM::VST4q8_UPD, true, EvenDblSpc, 4); break;
+ case ARM::VST4q16Pseudo_UPD:
+ ExpandVST(MBBI, ARM::VST4q16_UPD, true, EvenDblSpc, 4); break;
+ case ARM::VST4q32Pseudo_UPD:
+ ExpandVST(MBBI, ARM::VST4q32_UPD, true, EvenDblSpc, 4); break;
+ case ARM::VST4q8oddPseudo_UPD:
+ ExpandVST(MBBI, ARM::VST4q8_UPD, true, OddDblSpc, 4); break;
+ case ARM::VST4q16oddPseudo_UPD:
+ ExpandVST(MBBI, ARM::VST4q16_UPD, true, OddDblSpc, 4); break;
+ case ARM::VST4q32oddPseudo_UPD:
+ ExpandVST(MBBI, ARM::VST4q32_UPD, true, OddDblSpc, 4); break;
}
+
+ if (ModifiedOp)
+ Modified = true;
MBBI = NMBBI;
}
diff --git a/lib/Target/ARM/ARMFastISel.cpp b/lib/Target/ARM/ARMFastISel.cpp
new file mode 100644
index 000000000000..4892eae95833
--- /dev/null
+++ b/lib/Target/ARM/ARMFastISel.cpp
@@ -0,0 +1,665 @@
+//===-- ARMFastISel.cpp - ARM FastISel implementation ---------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the ARM-specific support for the FastISel class. Some
+// of the target-specific code is generated by tablegen in the file
+// ARMGenFastISel.inc, which is #included here.
+//
+//===----------------------------------------------------------------------===//
+
+#include "ARM.h"
+#include "ARMBaseInstrInfo.h"
+#include "ARMRegisterInfo.h"
+#include "ARMTargetMachine.h"
+#include "ARMSubtarget.h"
+#include "llvm/CallingConv.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/GlobalVariable.h"
+#include "llvm/Instructions.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/CodeGen/Analysis.h"
+#include "llvm/CodeGen/FastISel.h"
+#include "llvm/CodeGen/FunctionLoweringInfo.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Support/CallSite.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/GetElementPtrTypeIterator.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+using namespace llvm;
+
+static cl::opt<bool>
+EnableARMFastISel("arm-fast-isel",
+ cl::desc("Turn on experimental ARM fast-isel support"),
+ cl::init(false), cl::Hidden);
+
+namespace {
+
+class ARMFastISel : public FastISel {
+
+ /// Subtarget - Keep a pointer to the ARMSubtarget around so that we can
+ /// make the right decision when generating code for different targets.
+ const ARMSubtarget *Subtarget;
+ const TargetMachine &TM;
+ const TargetInstrInfo &TII;
+ const TargetLowering &TLI;
+ const ARMFunctionInfo *AFI;
+
+ // Convenience variable to avoid checking all the time.
+ bool isThumb;
+
+ public:
+ explicit ARMFastISel(FunctionLoweringInfo &funcInfo)
+ : FastISel(funcInfo),
+ TM(funcInfo.MF->getTarget()),
+ TII(*TM.getInstrInfo()),
+ TLI(*TM.getTargetLowering()) {
+ Subtarget = &TM.getSubtarget<ARMSubtarget>();
+ AFI = funcInfo.MF->getInfo<ARMFunctionInfo>();
+ isThumb = AFI->isThumbFunction();
+ }
+
+ // Code from FastISel.cpp.
+ virtual unsigned FastEmitInst_(unsigned MachineInstOpcode,
+ const TargetRegisterClass *RC);
+ virtual unsigned FastEmitInst_r(unsigned MachineInstOpcode,
+ const TargetRegisterClass *RC,
+ unsigned Op0, bool Op0IsKill);
+ virtual unsigned FastEmitInst_rr(unsigned MachineInstOpcode,
+ const TargetRegisterClass *RC,
+ unsigned Op0, bool Op0IsKill,
+ unsigned Op1, bool Op1IsKill);
+ virtual unsigned FastEmitInst_ri(unsigned MachineInstOpcode,
+ const TargetRegisterClass *RC,
+ unsigned Op0, bool Op0IsKill,
+ uint64_t Imm);
+ virtual unsigned FastEmitInst_rf(unsigned MachineInstOpcode,
+ const TargetRegisterClass *RC,
+ unsigned Op0, bool Op0IsKill,
+ const ConstantFP *FPImm);
+ virtual unsigned FastEmitInst_i(unsigned MachineInstOpcode,
+ const TargetRegisterClass *RC,
+ uint64_t Imm);
+ virtual unsigned FastEmitInst_rri(unsigned MachineInstOpcode,
+ const TargetRegisterClass *RC,
+ unsigned Op0, bool Op0IsKill,
+ unsigned Op1, bool Op1IsKill,
+ uint64_t Imm);
+ virtual unsigned FastEmitInst_extractsubreg(MVT RetVT,
+ unsigned Op0, bool Op0IsKill,
+ uint32_t Idx);
+
+ // Backend specific FastISel code.
+ virtual bool TargetSelectInstruction(const Instruction *I);
+ virtual unsigned TargetMaterializeConstant(const Constant *C);
+
+ #include "ARMGenFastISel.inc"
+
+ // Instruction selection routines.
+ virtual bool ARMSelectLoad(const Instruction *I);
+ virtual bool ARMSelectStore(const Instruction *I);
+ virtual bool ARMSelectBranch(const Instruction *I);
+
+ // Utility routines.
+ private:
+ bool isTypeLegal(const Type *Ty, EVT &VT);
+ bool isLoadTypeLegal(const Type *Ty, EVT &VT);
+ bool ARMEmitLoad(EVT VT, unsigned &ResultReg, unsigned Reg, int Offset);
+ bool ARMEmitStore(EVT VT, unsigned SrcReg, unsigned Reg, int Offset);
+ bool ARMLoadAlloca(const Instruction *I);
+ bool ARMStoreAlloca(const Instruction *I, unsigned SrcReg);
+ bool ARMComputeRegOffset(const Value *Obj, unsigned &Reg, int &Offset);
+ bool ARMMaterializeConstant(const ConstantInt *Val, unsigned &Reg);
+
+ bool DefinesOptionalPredicate(MachineInstr *MI, bool *CPSR);
+ const MachineInstrBuilder &AddOptionalDefs(const MachineInstrBuilder &MIB);
+};
+
+} // end anonymous namespace
+
+// #include "ARMGenCallingConv.inc"
+
+// DefinesOptionalPredicate - This is different from DefinesPredicate in that
+// we don't care about implicit defs here, just places we'll need to add a
+// default CCReg argument. Sets CPSR if we're setting CPSR instead of CCR.
+bool ARMFastISel::DefinesOptionalPredicate(MachineInstr *MI, bool *CPSR) {
+ const TargetInstrDesc &TID = MI->getDesc();
+ if (!TID.hasOptionalDef())
+ return false;
+
+ // Look to see if our OptionalDef is defining CPSR or CCR.
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isReg() || !MO.isDef()) continue;
+ if (MO.getReg() == ARM::CPSR)
+ *CPSR = true;
+ }
+ return true;
+}
+
+// If the machine is predicable go ahead and add the predicate operands, if
+// it needs default CC operands add those.
+const MachineInstrBuilder &
+ARMFastISel::AddOptionalDefs(const MachineInstrBuilder &MIB) {
+ MachineInstr *MI = &*MIB;
+
+ // Do we use a predicate?
+ if (TII.isPredicable(MI))
+ AddDefaultPred(MIB);
+
+ // Do we optionally set a predicate? Preds is size > 0 iff the predicate
+ // defines CPSR. All other OptionalDefines in ARM are the CCR register.
+ bool CPSR = false;
+ if (DefinesOptionalPredicate(MI, &CPSR)) {
+ if (CPSR)
+ AddDefaultT1CC(MIB);
+ else
+ AddDefaultCC(MIB);
+ }
+ return MIB;
+}
+
+unsigned ARMFastISel::FastEmitInst_(unsigned MachineInstOpcode,
+ const TargetRegisterClass* RC) {
+ unsigned ResultReg = createResultReg(RC);
+ const TargetInstrDesc &II = TII.get(MachineInstOpcode);
+
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg));
+ return ResultReg;
+}
+
+unsigned ARMFastISel::FastEmitInst_r(unsigned MachineInstOpcode,
+ const TargetRegisterClass *RC,
+ unsigned Op0, bool Op0IsKill) {
+ unsigned ResultReg = createResultReg(RC);
+ const TargetInstrDesc &II = TII.get(MachineInstOpcode);
+
+ if (II.getNumDefs() >= 1)
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg)
+ .addReg(Op0, Op0IsKill * RegState::Kill));
+ else {
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II)
+ .addReg(Op0, Op0IsKill * RegState::Kill));
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(TargetOpcode::COPY), ResultReg)
+ .addReg(II.ImplicitDefs[0]));
+ }
+ return ResultReg;
+}
+
+unsigned ARMFastISel::FastEmitInst_rr(unsigned MachineInstOpcode,
+ const TargetRegisterClass *RC,
+ unsigned Op0, bool Op0IsKill,
+ unsigned Op1, bool Op1IsKill) {
+ unsigned ResultReg = createResultReg(RC);
+ const TargetInstrDesc &II = TII.get(MachineInstOpcode);
+
+ if (II.getNumDefs() >= 1)
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg)
+ .addReg(Op0, Op0IsKill * RegState::Kill)
+ .addReg(Op1, Op1IsKill * RegState::Kill));
+ else {
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II)
+ .addReg(Op0, Op0IsKill * RegState::Kill)
+ .addReg(Op1, Op1IsKill * RegState::Kill));
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(TargetOpcode::COPY), ResultReg)
+ .addReg(II.ImplicitDefs[0]));
+ }
+ return ResultReg;
+}
+
+unsigned ARMFastISel::FastEmitInst_ri(unsigned MachineInstOpcode,
+ const TargetRegisterClass *RC,
+ unsigned Op0, bool Op0IsKill,
+ uint64_t Imm) {
+ unsigned ResultReg = createResultReg(RC);
+ const TargetInstrDesc &II = TII.get(MachineInstOpcode);
+
+ if (II.getNumDefs() >= 1)
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg)
+ .addReg(Op0, Op0IsKill * RegState::Kill)
+ .addImm(Imm));
+ else {
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II)
+ .addReg(Op0, Op0IsKill * RegState::Kill)
+ .addImm(Imm));
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(TargetOpcode::COPY), ResultReg)
+ .addReg(II.ImplicitDefs[0]));
+ }
+ return ResultReg;
+}
+
+unsigned ARMFastISel::FastEmitInst_rf(unsigned MachineInstOpcode,
+ const TargetRegisterClass *RC,
+ unsigned Op0, bool Op0IsKill,
+ const ConstantFP *FPImm) {
+ unsigned ResultReg = createResultReg(RC);
+ const TargetInstrDesc &II = TII.get(MachineInstOpcode);
+
+ if (II.getNumDefs() >= 1)
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg)
+ .addReg(Op0, Op0IsKill * RegState::Kill)
+ .addFPImm(FPImm));
+ else {
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II)
+ .addReg(Op0, Op0IsKill * RegState::Kill)
+ .addFPImm(FPImm));
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(TargetOpcode::COPY), ResultReg)
+ .addReg(II.ImplicitDefs[0]));
+ }
+ return ResultReg;
+}
+
+unsigned ARMFastISel::FastEmitInst_rri(unsigned MachineInstOpcode,
+ const TargetRegisterClass *RC,
+ unsigned Op0, bool Op0IsKill,
+ unsigned Op1, bool Op1IsKill,
+ uint64_t Imm) {
+ unsigned ResultReg = createResultReg(RC);
+ const TargetInstrDesc &II = TII.get(MachineInstOpcode);
+
+ if (II.getNumDefs() >= 1)
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg)
+ .addReg(Op0, Op0IsKill * RegState::Kill)
+ .addReg(Op1, Op1IsKill * RegState::Kill)
+ .addImm(Imm));
+ else {
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II)
+ .addReg(Op0, Op0IsKill * RegState::Kill)
+ .addReg(Op1, Op1IsKill * RegState::Kill)
+ .addImm(Imm));
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(TargetOpcode::COPY), ResultReg)
+ .addReg(II.ImplicitDefs[0]));
+ }
+ return ResultReg;
+}
+
+unsigned ARMFastISel::FastEmitInst_i(unsigned MachineInstOpcode,
+ const TargetRegisterClass *RC,
+ uint64_t Imm) {
+ unsigned ResultReg = createResultReg(RC);
+ const TargetInstrDesc &II = TII.get(MachineInstOpcode);
+
+ if (II.getNumDefs() >= 1)
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg)
+ .addImm(Imm));
+ else {
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II)
+ .addImm(Imm));
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(TargetOpcode::COPY), ResultReg)
+ .addReg(II.ImplicitDefs[0]));
+ }
+ return ResultReg;
+}
+
+unsigned ARMFastISel::FastEmitInst_extractsubreg(MVT RetVT,
+ unsigned Op0, bool Op0IsKill,
+ uint32_t Idx) {
+ unsigned ResultReg = createResultReg(TLI.getRegClassFor(RetVT));
+ assert(TargetRegisterInfo::isVirtualRegister(Op0) &&
+ "Cannot yet extract from physregs");
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt,
+ DL, TII.get(TargetOpcode::COPY), ResultReg)
+ .addReg(Op0, getKillRegState(Op0IsKill), Idx));
+ return ResultReg;
+}
+
+unsigned ARMFastISel::TargetMaterializeConstant(const Constant *C) {
+ EVT VT = TLI.getValueType(C->getType(), true);
+
+ // Only handle simple types.
+ if (!VT.isSimple()) return 0;
+
+ // TODO: This should be safe for fp because they're just bits from the
+ // Constant.
+ // TODO: Theoretically we could materialize fp constants with instructions
+ // from VFP3.
+
+ // MachineConstantPool wants an explicit alignment.
+ unsigned Align = TD.getPrefTypeAlignment(C->getType());
+ if (Align == 0) {
+ // TODO: Figure out if this is correct.
+ Align = TD.getTypeAllocSize(C->getType());
+ }
+ unsigned Idx = MCP.getConstantPoolIndex(C, Align);
+
+ unsigned DestReg = createResultReg(TLI.getRegClassFor(VT));
+ // Different addressing modes between ARM/Thumb2 for constant pool loads.
+ if (isThumb)
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(ARM::t2LDRpci))
+ .addReg(DestReg).addConstantPoolIndex(Idx));
+ else
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(ARM::LDRcp))
+ .addReg(DestReg).addConstantPoolIndex(Idx)
+ .addReg(0).addImm(0));
+
+ return DestReg;
+}
+
+bool ARMFastISel::isTypeLegal(const Type *Ty, EVT &VT) {
+ VT = TLI.getValueType(Ty, true);
+
+ // Only handle simple types.
+ if (VT == MVT::Other || !VT.isSimple()) return false;
+
+ // Handle all legal types, i.e. a register that will directly hold this
+ // value.
+ return TLI.isTypeLegal(VT);
+}
+
+bool ARMFastISel::isLoadTypeLegal(const Type *Ty, EVT &VT) {
+ if (isTypeLegal(Ty, VT)) return true;
+
+ // If this is a type than can be sign or zero-extended to a basic operation
+ // go ahead and accept it now.
+ if (VT == MVT::i8 || VT == MVT::i16)
+ return true;
+
+ return false;
+}
+
+// Computes the Reg+Offset to get to an object.
+bool ARMFastISel::ARMComputeRegOffset(const Value *Obj, unsigned &Reg,
+ int &Offset) {
+ // Some boilerplate from the X86 FastISel.
+ const User *U = NULL;
+ unsigned Opcode = Instruction::UserOp1;
+ if (const Instruction *I = dyn_cast<Instruction>(Obj)) {
+ // Don't walk into other basic blocks; it's possible we haven't
+ // visited them yet, so the instructions may not yet be assigned
+ // virtual registers.
+ if (FuncInfo.MBBMap[I->getParent()] != FuncInfo.MBB)
+ return false;
+
+ Opcode = I->getOpcode();
+ U = I;
+ } else if (const ConstantExpr *C = dyn_cast<ConstantExpr>(Obj)) {
+ Opcode = C->getOpcode();
+ U = C;
+ }
+
+ if (const PointerType *Ty = dyn_cast<PointerType>(Obj->getType()))
+ if (Ty->getAddressSpace() > 255)
+ // Fast instruction selection doesn't support the special
+ // address spaces.
+ return false;
+
+ switch (Opcode) {
+ default:
+ //errs() << "Failing Opcode is: " << *Op1 << "\n";
+ break;
+ case Instruction::Alloca: {
+ assert(false && "Alloca should have been handled earlier!");
+ return false;
+ }
+ }
+
+ if (const GlobalValue *GV = dyn_cast<GlobalValue>(Obj)) {
+ //errs() << "Failing GV is: " << GV << "\n";
+ (void)GV;
+ return false;
+ }
+
+ // Try to get this in a register if nothing else has worked.
+ Reg = getRegForValue(Obj);
+ if (Reg == 0) return false;
+
+ // Since the offset may be too large for the load instruction
+ // get the reg+offset into a register.
+ // TODO: Verify the additions work, otherwise we'll need to add the
+ // offset instead of 0 to the instructions and do all sorts of operand
+ // munging.
+ // TODO: Optimize this somewhat.
+ if (Offset != 0) {
+ ARMCC::CondCodes Pred = ARMCC::AL;
+ unsigned PredReg = 0;
+
+ if (!isThumb)
+ emitARMRegPlusImmediate(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ Reg, Reg, Offset, Pred, PredReg,
+ static_cast<const ARMBaseInstrInfo&>(TII));
+ else {
+ assert(AFI->isThumb2Function());
+ emitT2RegPlusImmediate(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ Reg, Reg, Offset, Pred, PredReg,
+ static_cast<const ARMBaseInstrInfo&>(TII));
+ }
+ }
+
+ return true;
+}
+
+bool ARMFastISel::ARMLoadAlloca(const Instruction *I) {
+ Value *Op0 = I->getOperand(0);
+
+ // Verify it's an alloca.
+ if (const AllocaInst *AI = dyn_cast<AllocaInst>(Op0)) {
+ DenseMap<const AllocaInst*, int>::iterator SI =
+ FuncInfo.StaticAllocaMap.find(AI);
+
+ if (SI != FuncInfo.StaticAllocaMap.end()) {
+ TargetRegisterClass* RC = TLI.getRegClassFor(TLI.getPointerTy());
+ unsigned ResultReg = createResultReg(RC);
+ TII.loadRegFromStackSlot(*FuncInfo.MBB, *FuncInfo.InsertPt,
+ ResultReg, SI->second, RC,
+ TM.getRegisterInfo());
+ UpdateValueMap(I, ResultReg);
+ return true;
+ }
+ }
+ return false;
+}
+
+bool ARMFastISel::ARMEmitLoad(EVT VT, unsigned &ResultReg,
+ unsigned Reg, int Offset) {
+
+ assert(VT.isSimple() && "Non-simple types are invalid here!");
+ unsigned Opc;
+
+ switch (VT.getSimpleVT().SimpleTy) {
+ default:
+ assert(false && "Trying to emit for an unhandled type!");
+ return false;
+ case MVT::i16:
+ Opc = isThumb ? ARM::tLDRH : ARM::LDRH;
+ VT = MVT::i32;
+ break;
+ case MVT::i8:
+ Opc = isThumb ? ARM::tLDRB : ARM::LDRB;
+ VT = MVT::i32;
+ break;
+ case MVT::i32:
+ Opc = isThumb ? ARM::tLDR : ARM::LDR;
+ break;
+ }
+
+ ResultReg = createResultReg(TLI.getRegClassFor(VT));
+
+ // TODO: Fix the Addressing modes so that these can share some code.
+ // Since this is a Thumb1 load this will work in Thumb1 or 2 mode.
+ if (isThumb)
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(Opc), ResultReg)
+ .addReg(Reg).addImm(Offset).addReg(0));
+ else
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(Opc), ResultReg)
+ .addReg(Reg).addReg(0).addImm(Offset));
+
+ return true;
+}
+
+bool ARMFastISel::ARMStoreAlloca(const Instruction *I, unsigned SrcReg) {
+ Value *Op1 = I->getOperand(1);
+
+ // Verify it's an alloca.
+ if (const AllocaInst *AI = dyn_cast<AllocaInst>(Op1)) {
+ DenseMap<const AllocaInst*, int>::iterator SI =
+ FuncInfo.StaticAllocaMap.find(AI);
+
+ if (SI != FuncInfo.StaticAllocaMap.end()) {
+ TargetRegisterClass* RC = TLI.getRegClassFor(TLI.getPointerTy());
+ assert(SrcReg != 0 && "Nothing to store!");
+ TII.storeRegToStackSlot(*FuncInfo.MBB, *FuncInfo.InsertPt,
+ SrcReg, true /*isKill*/, SI->second, RC,
+ TM.getRegisterInfo());
+ return true;
+ }
+ }
+ return false;
+}
+
+bool ARMFastISel::ARMEmitStore(EVT VT, unsigned SrcReg,
+ unsigned DstReg, int Offset) {
+ unsigned StrOpc;
+ switch (VT.getSimpleVT().SimpleTy) {
+ default: return false;
+ case MVT::i1:
+ case MVT::i8: StrOpc = isThumb ? ARM::tSTRB : ARM::STRB; break;
+ case MVT::i16: StrOpc = isThumb ? ARM::tSTRH : ARM::STRH; break;
+ case MVT::i32: StrOpc = isThumb ? ARM::tSTR : ARM::STR; break;
+ case MVT::f32:
+ if (!Subtarget->hasVFP2()) return false;
+ StrOpc = ARM::VSTRS;
+ break;
+ case MVT::f64:
+ if (!Subtarget->hasVFP2()) return false;
+ StrOpc = ARM::VSTRD;
+ break;
+ }
+
+ if (isThumb)
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(StrOpc), SrcReg)
+ .addReg(DstReg).addImm(Offset).addReg(0));
+ else
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(StrOpc), SrcReg)
+ .addReg(DstReg).addReg(0).addImm(Offset));
+
+ return true;
+}
+
+bool ARMFastISel::ARMSelectStore(const Instruction *I) {
+ Value *Op0 = I->getOperand(0);
+ unsigned SrcReg = 0;
+
+ // Yay type legalization
+ EVT VT;
+ if (!isLoadTypeLegal(I->getOperand(0)->getType(), VT))
+ return false;
+
+ // Get the value to be stored into a register.
+ SrcReg = getRegForValue(Op0);
+ if (SrcReg == 0)
+ return false;
+
+ // If we're an alloca we know we have a frame index and can emit the store
+ // quickly.
+ if (ARMStoreAlloca(I, SrcReg))
+ return true;
+
+ // Our register and offset with innocuous defaults.
+ unsigned Reg = 0;
+ int Offset = 0;
+
+ // See if we can handle this as Reg + Offset
+ if (!ARMComputeRegOffset(I->getOperand(1), Reg, Offset))
+ return false;
+
+ if (!ARMEmitStore(VT, SrcReg, Reg, Offset /* 0 */)) return false;
+
+ return false;
+
+}
+
+bool ARMFastISel::ARMSelectLoad(const Instruction *I) {
+ // If we're an alloca we know we have a frame index and can emit the load
+ // directly in short order.
+ if (ARMLoadAlloca(I))
+ return true;
+
+ // Verify we have a legal type before going any further.
+ EVT VT;
+ if (!isLoadTypeLegal(I->getType(), VT))
+ return false;
+
+ // Our register and offset with innocuous defaults.
+ unsigned Reg = 0;
+ int Offset = 0;
+
+ // See if we can handle this as Reg + Offset
+ if (!ARMComputeRegOffset(I->getOperand(0), Reg, Offset))
+ return false;
+
+ unsigned ResultReg;
+ if (!ARMEmitLoad(VT, ResultReg, Reg, Offset /* 0 */)) return false;
+
+ UpdateValueMap(I, ResultReg);
+ return true;
+}
+
+bool ARMFastISel::ARMSelectBranch(const Instruction *I) {
+ const BranchInst *BI = cast<BranchInst>(I);
+ MachineBasicBlock *TBB = FuncInfo.MBBMap[BI->getSuccessor(0)];
+ MachineBasicBlock *FBB = FuncInfo.MBBMap[BI->getSuccessor(1)];
+
+ // Simple branch support.
+ unsigned CondReg = getRegForValue(BI->getCondition());
+ if (CondReg == 0) return false;
+
+ unsigned CmpOpc = isThumb ? ARM::t2CMPrr : ARM::CMPrr;
+ unsigned BrOpc = isThumb ? ARM::t2Bcc : ARM::Bcc;
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(CmpOpc))
+ .addReg(CondReg).addReg(CondReg));
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(BrOpc))
+ .addMBB(TBB).addImm(ARMCC::NE).addReg(ARM::CPSR);
+ FastEmitBranch(FBB, DL);
+ FuncInfo.MBB->addSuccessor(TBB);
+ return true;
+}
+
+// TODO: SoftFP support.
+bool ARMFastISel::TargetSelectInstruction(const Instruction *I) {
+ // No Thumb-1 for now.
+ if (isThumb && !AFI->isThumb2Function()) return false;
+
+ switch (I->getOpcode()) {
+ case Instruction::Load:
+ return ARMSelectLoad(I);
+ case Instruction::Store:
+ return ARMSelectStore(I);
+ case Instruction::Br:
+ return ARMSelectBranch(I);
+ default: break;
+ }
+ return false;
+}
+
+namespace llvm {
+ llvm::FastISel *ARM::createFastISel(FunctionLoweringInfo &funcInfo) {
+ if (EnableARMFastISel) return new ARMFastISel(funcInfo);
+ return 0;
+ }
+}
diff --git a/lib/Target/ARM/ARMGlobalMerge.cpp b/lib/Target/ARM/ARMGlobalMerge.cpp
new file mode 100644
index 000000000000..85b0c6c248d0
--- /dev/null
+++ b/lib/Target/ARM/ARMGlobalMerge.cpp
@@ -0,0 +1,212 @@
+//===-- ARMGlobalMerge.cpp - Internal globals merging --------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// This pass merges globals with internal linkage into one. This way all the
+// globals which were merged into a biggest one can be addressed using offsets
+// from the same base pointer (no need for separate base pointer for each of the
+// global). Such a transformation can significantly reduce the register pressure
+// when many globals are involved.
+//
+// For example, consider the code which touches several global variables at once:
+//
+// static int foo[N], bar[N], baz[N];
+//
+// for (i = 0; i < N; ++i) {
+// foo[i] = bar[i] * baz[i];
+// }
+//
+// On ARM the addresses of 3 arrays should be kept in the registers, thus
+// this code has quite large register pressure (loop body):
+//
+// ldr r1, [r5], #4
+// ldr r2, [r6], #4
+// mul r1, r2, r1
+// str r1, [r0], #4
+//
+// Pass converts the code to something like:
+//
+// static struct {
+// int foo[N];
+// int bar[N];
+// int baz[N];
+// } merged;
+//
+// for (i = 0; i < N; ++i) {
+// merged.foo[i] = merged.bar[i] * merged.baz[i];
+// }
+//
+// and in ARM code this becomes:
+//
+// ldr r0, [r5, #40]
+// ldr r1, [r5, #80]
+// mul r0, r1, r0
+// str r0, [r5], #4
+//
+// note that we saved 2 registers here almostly "for free".
+// ===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "arm-global-merge"
+#include "ARM.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/Attributes.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Function.h"
+#include "llvm/GlobalVariable.h"
+#include "llvm/Instructions.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetLowering.h"
+using namespace llvm;
+
+namespace {
+ class LLVM_LIBRARY_VISIBILITY ARMGlobalMerge : public FunctionPass {
+ /// TLI - Keep a pointer of a TargetLowering to consult for determining
+ /// target type sizes.
+ const TargetLowering *TLI;
+
+ bool doMerge(SmallVectorImpl<GlobalVariable*> &Globals,
+ Module &M, bool) const;
+
+ public:
+ static char ID; // Pass identification, replacement for typeid.
+ explicit ARMGlobalMerge(const TargetLowering *tli)
+ : FunctionPass(ID), TLI(tli) {}
+
+ virtual bool doInitialization(Module &M);
+ virtual bool runOnFunction(Function& F);
+
+ const char *getPassName() const {
+ return "Merge internal globals";
+ }
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesCFG();
+ FunctionPass::getAnalysisUsage(AU);
+ }
+
+ struct GlobalCmp {
+ const TargetData *TD;
+
+ GlobalCmp(const TargetData *td):
+ TD(td) { }
+
+ bool operator() (const GlobalVariable* GV1,
+ const GlobalVariable* GV2) {
+ const Type* Ty1 = cast<PointerType>(GV1->getType())->getElementType();
+ const Type* Ty2 = cast<PointerType>(GV2->getType())->getElementType();
+
+ return (TD->getTypeAllocSize(Ty1) < TD->getTypeAllocSize(Ty2));
+ }
+ };
+ };
+} // end anonymous namespace
+
+char ARMGlobalMerge::ID = 0;
+
+bool ARMGlobalMerge::doMerge(SmallVectorImpl<GlobalVariable*> &Globals,
+ Module &M, bool isConst) const {
+ const TargetData *TD = TLI->getTargetData();
+
+ // FIXME: Infer the maximum possible offset depending on the actual users
+ // (these max offsets are different for the users inside Thumb or ARM
+ // functions)
+ unsigned MaxOffset = TLI->getMaximalGlobalOffset();
+
+ // FIXME: Find better heuristics
+ std::stable_sort(Globals.begin(), Globals.end(), GlobalCmp(TD));
+
+ const Type *Int32Ty = Type::getInt32Ty(M.getContext());
+
+ for (size_t i = 0, e = Globals.size(); i != e; ) {
+ size_t j = 0;
+ uint64_t MergedSize = 0;
+ std::vector<const Type*> Tys;
+ std::vector<Constant*> Inits;
+ for (j = i; MergedSize < MaxOffset && j != e; ++j) {
+ const Type* Ty = Globals[j]->getType()->getElementType();
+ Tys.push_back(Ty);
+ Inits.push_back(Globals[j]->getInitializer());
+ MergedSize += TD->getTypeAllocSize(Ty);
+ }
+
+ StructType* MergedTy = StructType::get(M.getContext(), Tys);
+ Constant* MergedInit = ConstantStruct::get(MergedTy, Inits);
+ GlobalVariable* MergedGV = new GlobalVariable(M, MergedTy, isConst,
+ GlobalValue::InternalLinkage,
+ MergedInit, "merged");
+ for (size_t k = i; k < j; ++k) {
+ SmallVector<Constant*, 2> Idx;
+ Idx.push_back(ConstantInt::get(Int32Ty, 0));
+ Idx.push_back(ConstantInt::get(Int32Ty, k-i));
+
+ Constant* GEP =
+ ConstantExpr::getInBoundsGetElementPtr(MergedGV,
+ &Idx[0], Idx.size());
+
+ Globals[k]->replaceAllUsesWith(GEP);
+ Globals[k]->eraseFromParent();
+ }
+ i = j;
+ }
+
+ return true;
+}
+
+
+bool ARMGlobalMerge::doInitialization(Module& M) {
+ SmallVector<GlobalVariable*, 16> Globals, ConstGlobals;
+ const TargetData *TD = TLI->getTargetData();
+ unsigned MaxOffset = TLI->getMaximalGlobalOffset();
+ bool Changed = false;
+
+ // Grab all non-const globals.
+ for (Module::global_iterator I = M.global_begin(),
+ E = M.global_end(); I != E; ++I) {
+ // Merge is safe for "normal" internal globals only
+ if (!I->hasLocalLinkage() || I->isThreadLocal() || I->hasSection())
+ continue;
+
+ // Ignore fancy-aligned globals for now.
+ if (I->getAlignment() != 0)
+ continue;
+
+ // Ignore all 'special' globals.
+ if (I->getName().startswith("llvm.") ||
+ I->getName().startswith(".llvm."))
+ continue;
+
+ if (TD->getTypeAllocSize(I->getType()) < MaxOffset) {
+ if (I->isConstant())
+ ConstGlobals.push_back(I);
+ else
+ Globals.push_back(I);
+ }
+ }
+
+ if (Globals.size() > 1)
+ Changed |= doMerge(Globals, M, false);
+ // FIXME: This currently breaks the EH processing due to way how the
+ // typeinfo detection works. We might want to detect the TIs and ignore
+ // them in the future.
+
+ // if (ConstGlobals.size() > 1)
+ // Changed |= doMerge(ConstGlobals, M, true);
+
+ return Changed;
+}
+
+bool ARMGlobalMerge::runOnFunction(Function& F) {
+ return false;
+}
+
+FunctionPass *llvm::createARMGlobalMergePass(const TargetLowering *tli) {
+ return new ARMGlobalMerge(tli);
+}
diff --git a/lib/Target/ARM/ARMISelDAGToDAG.cpp b/lib/Target/ARM/ARMISelDAGToDAG.cpp
index c84d3ff81324..51a30c158dd1 100644
--- a/lib/Target/ARM/ARMISelDAGToDAG.cpp
+++ b/lib/Target/ARM/ARMISelDAGToDAG.cpp
@@ -36,6 +36,11 @@
using namespace llvm;
+static cl::opt<bool>
+DisableShifterOp("disable-shifter-op", cl::Hidden,
+ cl::desc("Disable isel of shifter-op"),
+ cl::init(false));
+
//===--------------------------------------------------------------------===//
/// ARMDAGToDAGISel - ARM specific code to select ARM machine
/// instructions for SelectionDAG operations.
@@ -113,6 +118,16 @@ public:
bool SelectT2AddrModeSoReg(SDNode *Op, SDValue N, SDValue &Base,
SDValue &OffReg, SDValue &ShImm);
+ inline bool Pred_so_imm(SDNode *inN) const {
+ ConstantSDNode *N = cast<ConstantSDNode>(inN);
+ return ARM_AM::getSOImmVal(N->getZExtValue()) != -1;
+ }
+
+ inline bool Pred_t2_so_imm(SDNode *inN) const {
+ ConstantSDNode *N = cast<ConstantSDNode>(inN);
+ return ARM_AM::getT2SOImmVal(N->getZExtValue()) != -1;
+ }
+
// Include the pieces autogenerated from the target description.
#include "ARMGenDAGISel.inc"
@@ -220,6 +235,9 @@ bool ARMDAGToDAGISel::SelectShifterOperandReg(SDNode *Op,
SDValue &BaseReg,
SDValue &ShReg,
SDValue &Opc) {
+ if (DisableShifterOp)
+ return false;
+
ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N);
// Don't match base register only case. That is matched to a separate
@@ -463,7 +481,7 @@ bool ARMDAGToDAGISel::SelectAddrMode3Offset(SDNode *Op, SDValue N,
bool ARMDAGToDAGISel::SelectAddrMode4(SDNode *Op, SDValue N,
SDValue &Addr, SDValue &Mode) {
Addr = N;
- Mode = CurDAG->getTargetConstant(0, MVT::i32);
+ Mode = CurDAG->getTargetConstant(ARM_AM::getAM4ModeImm(ARM_AM::ia), MVT::i32);
return true;
}
@@ -666,6 +684,9 @@ bool ARMDAGToDAGISel::SelectThumbAddrModeSP(SDNode *Op, SDValue N,
bool ARMDAGToDAGISel::SelectT2ShifterOperandReg(SDNode *Op, SDValue N,
SDValue &BaseReg,
SDValue &Opc) {
+ if (DisableShifterOp)
+ return false;
+
ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N);
// Don't match base register only case. That is matched to a separate
@@ -1090,110 +1111,79 @@ SDNode *ARMDAGToDAGISel::SelectVLD(SDNode *N, unsigned NumVecs,
break;
}
+ EVT ResTy;
+ if (NumVecs == 1)
+ ResTy = VT;
+ else {
+ unsigned ResTyElts = (NumVecs == 3) ? 4 : NumVecs;
+ if (!is64BitVector)
+ ResTyElts *= 2;
+ ResTy = EVT::getVectorVT(*CurDAG->getContext(), MVT::i64, ResTyElts);
+ }
+
SDValue Pred = getAL(CurDAG);
SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
+ SDValue SuperReg;
if (is64BitVector) {
unsigned Opc = DOpcodes[OpcodeIndex];
const SDValue Ops[] = { MemAddr, Align, Pred, Reg0, Chain };
- std::vector<EVT> ResTys(NumVecs, VT);
- ResTys.push_back(MVT::Other);
- SDNode *VLd = CurDAG->getMachineNode(Opc, dl, ResTys, Ops, 5);
- if (NumVecs < 2)
+ SDNode *VLd = CurDAG->getMachineNode(Opc, dl, ResTy, MVT::Other, Ops, 5);
+ if (NumVecs == 1)
return VLd;
- SDValue RegSeq;
- SDValue V0 = SDValue(VLd, 0);
- SDValue V1 = SDValue(VLd, 1);
-
- // Form a REG_SEQUENCE to force register allocation.
- if (NumVecs == 2)
- RegSeq = SDValue(PairDRegs(MVT::v2i64, V0, V1), 0);
- else {
- SDValue V2 = SDValue(VLd, 2);
- // If it's a vld3, form a quad D-register but discard the last part.
- SDValue V3 = (NumVecs == 3)
- ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,dl,VT), 0)
- : SDValue(VLd, 3);
- RegSeq = SDValue(QuadDRegs(MVT::v4i64, V0, V1, V2, V3), 0);
- }
-
+ SuperReg = SDValue(VLd, 0);
assert(ARM::dsub_7 == ARM::dsub_0+7 && "Unexpected subreg numbering");
for (unsigned Vec = 0; Vec < NumVecs; ++Vec) {
SDValue D = CurDAG->getTargetExtractSubreg(ARM::dsub_0+Vec,
- dl, VT, RegSeq);
+ dl, VT, SuperReg);
ReplaceUses(SDValue(N, Vec), D);
}
- ReplaceUses(SDValue(N, NumVecs), SDValue(VLd, NumVecs));
+ ReplaceUses(SDValue(N, NumVecs), SDValue(VLd, 1));
return NULL;
}
- EVT RegVT = GetNEONSubregVT(VT);
if (NumVecs <= 2) {
// Quad registers are directly supported for VLD1 and VLD2,
// loading pairs of D regs.
unsigned Opc = QOpcodes0[OpcodeIndex];
const SDValue Ops[] = { MemAddr, Align, Pred, Reg0, Chain };
- std::vector<EVT> ResTys(2 * NumVecs, RegVT);
- ResTys.push_back(MVT::Other);
- SDNode *VLd = CurDAG->getMachineNode(Opc, dl, ResTys, Ops, 5);
- Chain = SDValue(VLd, 2 * NumVecs);
+ SDNode *VLd = CurDAG->getMachineNode(Opc, dl, ResTy, MVT::Other, Ops, 5);
+ if (NumVecs == 1)
+ return VLd;
+
+ SuperReg = SDValue(VLd, 0);
+ Chain = SDValue(VLd, 1);
- // Combine the even and odd subregs to produce the result.
- if (NumVecs == 1) {
- SDNode *Q = PairDRegs(VT, SDValue(VLd, 0), SDValue(VLd, 1));
- ReplaceUses(SDValue(N, 0), SDValue(Q, 0));
- } else {
- SDValue QQ = SDValue(QuadDRegs(MVT::v4i64,
- SDValue(VLd, 0), SDValue(VLd, 1),
- SDValue(VLd, 2), SDValue(VLd, 3)), 0);
- SDValue Q0 = CurDAG->getTargetExtractSubreg(ARM::qsub_0, dl, VT, QQ);
- SDValue Q1 = CurDAG->getTargetExtractSubreg(ARM::qsub_1, dl, VT, QQ);
- ReplaceUses(SDValue(N, 0), Q0);
- ReplaceUses(SDValue(N, 1), Q1);
- }
} else {
// Otherwise, quad registers are loaded with two separate instructions,
// where one loads the even registers and the other loads the odd registers.
-
- std::vector<EVT> ResTys(NumVecs, RegVT);
- ResTys.push_back(MemAddr.getValueType());
- ResTys.push_back(MVT::Other);
+ EVT AddrTy = MemAddr.getValueType();
// Load the even subregs.
unsigned Opc = QOpcodes0[OpcodeIndex];
- const SDValue OpsA[] = { MemAddr, Align, Reg0, Pred, Reg0, Chain };
- SDNode *VLdA = CurDAG->getMachineNode(Opc, dl, ResTys, OpsA, 6);
- Chain = SDValue(VLdA, NumVecs+1);
+ SDValue ImplDef =
+ SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, ResTy), 0);
+ const SDValue OpsA[] = { MemAddr, Align, Reg0, ImplDef, Pred, Reg0, Chain };
+ SDNode *VLdA =
+ CurDAG->getMachineNode(Opc, dl, ResTy, AddrTy, MVT::Other, OpsA, 7);
+ Chain = SDValue(VLdA, 2);
// Load the odd subregs.
Opc = QOpcodes1[OpcodeIndex];
- const SDValue OpsB[] = { SDValue(VLdA, NumVecs),
- Align, Reg0, Pred, Reg0, Chain };
- SDNode *VLdB = CurDAG->getMachineNode(Opc, dl, ResTys, OpsB, 6);
- Chain = SDValue(VLdB, NumVecs+1);
-
- SDValue V0 = SDValue(VLdA, 0);
- SDValue V1 = SDValue(VLdB, 0);
- SDValue V2 = SDValue(VLdA, 1);
- SDValue V3 = SDValue(VLdB, 1);
- SDValue V4 = SDValue(VLdA, 2);
- SDValue V5 = SDValue(VLdB, 2);
- SDValue V6 = (NumVecs == 3)
- ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,dl,RegVT), 0)
- : SDValue(VLdA, 3);
- SDValue V7 = (NumVecs == 3)
- ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,dl,RegVT), 0)
- : SDValue(VLdB, 3);
- SDValue RegSeq = SDValue(OctoDRegs(MVT::v8i64, V0, V1, V2, V3,
- V4, V5, V6, V7), 0);
-
- // Extract out the 3 / 4 Q registers.
- assert(ARM::qsub_3 == ARM::qsub_0+3 && "Unexpected subreg numbering");
- for (unsigned Vec = 0; Vec < NumVecs; ++Vec) {
- SDValue Q = CurDAG->getTargetExtractSubreg(ARM::qsub_0+Vec,
- dl, VT, RegSeq);
- ReplaceUses(SDValue(N, Vec), Q);
- }
+ const SDValue OpsB[] = { SDValue(VLdA, 1), Align, Reg0, SDValue(VLdA, 0),
+ Pred, Reg0, Chain };
+ SDNode *VLdB =
+ CurDAG->getMachineNode(Opc, dl, ResTy, AddrTy, MVT::Other, OpsB, 7);
+ SuperReg = SDValue(VLdB, 0);
+ Chain = SDValue(VLdB, 2);
+ }
+
+ // Extract out the Q registers.
+ assert(ARM::qsub_3 == ARM::qsub_0+3 && "Unexpected subreg numbering");
+ for (unsigned Vec = 0; Vec < NumVecs; ++Vec) {
+ SDValue Q = CurDAG->getTargetExtractSubreg(ARM::qsub_0+Vec,
+ dl, VT, SuperReg);
+ ReplaceUses(SDValue(N, Vec), Q);
}
ReplaceUses(SDValue(N, NumVecs), Chain);
return NULL;
@@ -1235,12 +1225,14 @@ SDNode *ARMDAGToDAGISel::SelectVST(SDNode *N, unsigned NumVecs,
SDValue Pred = getAL(CurDAG);
SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
- SmallVector<SDValue, 10> Ops;
+ SmallVector<SDValue, 7> Ops;
Ops.push_back(MemAddr);
Ops.push_back(Align);
if (is64BitVector) {
- if (NumVecs >= 2) {
+ if (NumVecs == 1) {
+ Ops.push_back(N->getOperand(3));
+ } else {
SDValue RegSeq;
SDValue V0 = N->getOperand(0+3);
SDValue V1 = N->getOperand(1+3);
@@ -1257,111 +1249,61 @@ SDNode *ARMDAGToDAGISel::SelectVST(SDNode *N, unsigned NumVecs,
: N->getOperand(3+3);
RegSeq = SDValue(QuadDRegs(MVT::v4i64, V0, V1, V2, V3), 0);
}
-
- // Now extract the D registers back out.
- Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_0, dl, VT,
- RegSeq));
- Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_1, dl, VT,
- RegSeq));
- if (NumVecs > 2)
- Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_2, dl, VT,
- RegSeq));
- if (NumVecs > 3)
- Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_3, dl, VT,
- RegSeq));
- } else {
- for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
- Ops.push_back(N->getOperand(Vec+3));
+ Ops.push_back(RegSeq);
}
Ops.push_back(Pred);
Ops.push_back(Reg0); // predicate register
Ops.push_back(Chain);
unsigned Opc = DOpcodes[OpcodeIndex];
- return CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops.data(), NumVecs+5);
+ return CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops.data(), 6);
}
- EVT RegVT = GetNEONSubregVT(VT);
if (NumVecs <= 2) {
- // Quad registers are directly supported for VST1 and VST2,
- // storing pairs of D regs.
+ // Quad registers are directly supported for VST1 and VST2.
unsigned Opc = QOpcodes0[OpcodeIndex];
- if (NumVecs == 2) {
- // First extract the pair of Q registers.
+ if (NumVecs == 1) {
+ Ops.push_back(N->getOperand(3));
+ } else {
+ // Form a QQ register.
SDValue Q0 = N->getOperand(3);
SDValue Q1 = N->getOperand(4);
-
- // Form a QQ register.
- SDValue QQ = SDValue(PairQRegs(MVT::v4i64, Q0, Q1), 0);
-
- // Now extract the D registers back out.
- Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_0, dl, RegVT,
- QQ));
- Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_1, dl, RegVT,
- QQ));
- Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_2, dl, RegVT,
- QQ));
- Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_3, dl, RegVT,
- QQ));
- Ops.push_back(Pred);
- Ops.push_back(Reg0); // predicate register
- Ops.push_back(Chain);
- return CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops.data(), 5 + 4);
- } else {
- for (unsigned Vec = 0; Vec < NumVecs; ++Vec) {
- Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_0, dl, RegVT,
- N->getOperand(Vec+3)));
- Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_1, dl, RegVT,
- N->getOperand(Vec+3)));
- }
- Ops.push_back(Pred);
- Ops.push_back(Reg0); // predicate register
- Ops.push_back(Chain);
- return CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops.data(),
- 5 + 2 * NumVecs);
+ Ops.push_back(SDValue(PairQRegs(MVT::v4i64, Q0, Q1), 0));
}
+ Ops.push_back(Pred);
+ Ops.push_back(Reg0); // predicate register
+ Ops.push_back(Chain);
+ return CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops.data(), 6);
}
// Otherwise, quad registers are stored with two separate instructions,
// where one stores the even registers and the other stores the odd registers.
// Form the QQQQ REG_SEQUENCE.
- SDValue V[8];
- for (unsigned Vec = 0, i = 0; Vec < NumVecs; ++Vec, i+=2) {
- V[i] = CurDAG->getTargetExtractSubreg(ARM::dsub_0, dl, RegVT,
- N->getOperand(Vec+3));
- V[i+1] = CurDAG->getTargetExtractSubreg(ARM::dsub_1, dl, RegVT,
- N->getOperand(Vec+3));
- }
- if (NumVecs == 3)
- V[6] = V[7] = SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,
- dl, RegVT), 0);
-
- SDValue RegSeq = SDValue(OctoDRegs(MVT::v8i64, V[0], V[1], V[2], V[3],
- V[4], V[5], V[6], V[7]), 0);
+ SDValue V0 = N->getOperand(0+3);
+ SDValue V1 = N->getOperand(1+3);
+ SDValue V2 = N->getOperand(2+3);
+ SDValue V3 = (NumVecs == 3)
+ ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, VT), 0)
+ : N->getOperand(3+3);
+ SDValue RegSeq = SDValue(QuadQRegs(MVT::v8i64, V0, V1, V2, V3), 0);
// Store the even D registers.
- assert(ARM::dsub_7 == ARM::dsub_0+7 && "Unexpected subreg numbering");
Ops.push_back(Reg0); // post-access address offset
- for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
- Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_0+Vec*2, dl,
- RegVT, RegSeq));
+ Ops.push_back(RegSeq);
Ops.push_back(Pred);
Ops.push_back(Reg0); // predicate register
Ops.push_back(Chain);
unsigned Opc = QOpcodes0[OpcodeIndex];
SDNode *VStA = CurDAG->getMachineNode(Opc, dl, MemAddr.getValueType(),
- MVT::Other, Ops.data(), NumVecs+6);
+ MVT::Other, Ops.data(), 7);
Chain = SDValue(VStA, 1);
// Store the odd D registers.
Ops[0] = SDValue(VStA, 0); // MemAddr
- for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
- Ops[Vec+3] = CurDAG->getTargetExtractSubreg(ARM::dsub_1+Vec*2, dl,
- RegVT, RegSeq);
- Ops[NumVecs+5] = Chain;
+ Ops[6] = Chain;
Opc = QOpcodes1[OpcodeIndex];
SDNode *VStB = CurDAG->getMachineNode(Opc, dl, MemAddr.getValueType(),
- MVT::Other, Ops.data(), NumVecs+6);
+ MVT::Other, Ops.data(), 7);
Chain = SDValue(VStB, 1);
ReplaceUses(SDValue(N, 0), Chain);
return NULL;
@@ -1675,7 +1617,7 @@ SelectT2CMOVSoImmOp(SDNode *N, SDValue FalseVal, SDValue TrueVal,
if (!T)
return 0;
- if (Predicate_t2_so_imm(TrueVal.getNode())) {
+ if (Pred_t2_so_imm(TrueVal.getNode())) {
SDValue True = CurDAG->getTargetConstant(T->getZExtValue(), MVT::i32);
SDValue CC = CurDAG->getTargetConstant(CCVal, MVT::i32);
SDValue Ops[] = { FalseVal, True, CC, CCR, InFlag };
@@ -1692,7 +1634,7 @@ SelectARMCMOVSoImmOp(SDNode *N, SDValue FalseVal, SDValue TrueVal,
if (!T)
return 0;
- if (Predicate_so_imm(TrueVal.getNode())) {
+ if (Pred_so_imm(TrueVal.getNode())) {
SDValue True = CurDAG->getTargetConstant(T->getZExtValue(), MVT::i32);
SDValue CC = CurDAG->getTargetConstant(CCVal, MVT::i32);
SDValue Ops[] = { FalseVal, True, CC, CCR, InFlag };
@@ -1740,7 +1682,7 @@ SDNode *ARMDAGToDAGISel::SelectCMOVOp(SDNode *N) {
}
// Pattern: (ARMcmov:i32 GPR:i32:$false,
- // (imm:i32)<<P:Predicate_so_imm>>:$true,
+ // (imm:i32)<<P:Pred_so_imm>>:$true,
// (imm:i32):$cc)
// Emits: (MOVCCi:i32 GPR:i32:$false,
// (so_imm:i32 (imm:i32):$true), (imm:i32):$cc)
@@ -2013,43 +1955,6 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
ResNode = SelectARMIndexedLoad(N);
if (ResNode)
return ResNode;
-
- // VLDMQ must be custom-selected for "v2f64 load" to set the AM5Opc value.
- if (Subtarget->hasVFP2() &&
- N->getValueType(0).getSimpleVT().SimpleTy == MVT::v2f64) {
- SDValue Chain = N->getOperand(0);
- SDValue AM5Opc =
- CurDAG->getTargetConstant(ARM_AM::getAM5Opc(ARM_AM::ia, 4), MVT::i32);
- SDValue Pred = getAL(CurDAG);
- SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
- SDValue Ops[] = { N->getOperand(1), AM5Opc, Pred, PredReg, Chain };
- MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
- MemOp[0] = cast<MemSDNode>(N)->getMemOperand();
- SDNode *Ret = CurDAG->getMachineNode(ARM::VLDMQ, dl,
- MVT::v2f64, MVT::Other, Ops, 5);
- cast<MachineSDNode>(Ret)->setMemRefs(MemOp, MemOp + 1);
- return Ret;
- }
- // Other cases are autogenerated.
- break;
- }
- case ISD::STORE: {
- // VSTMQ must be custom-selected for "v2f64 store" to set the AM5Opc value.
- if (Subtarget->hasVFP2() &&
- N->getOperand(1).getValueType().getSimpleVT().SimpleTy == MVT::v2f64) {
- SDValue Chain = N->getOperand(0);
- SDValue AM5Opc =
- CurDAG->getTargetConstant(ARM_AM::getAM5Opc(ARM_AM::ia, 4), MVT::i32);
- SDValue Pred = getAL(CurDAG);
- SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
- SDValue Ops[] = { N->getOperand(1), N->getOperand(2),
- AM5Opc, Pred, PredReg, Chain };
- MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
- MemOp[0] = cast<MemSDNode>(N)->getMemOperand();
- SDNode *Ret = CurDAG->getMachineNode(ARM::VSTMQ, dl, MVT::Other, Ops, 6);
- cast<MachineSDNode>(Ret)->setMemRefs(MemOp, MemOp + 1);
- return Ret;
- }
// Other cases are autogenerated.
break;
}
@@ -2206,39 +2111,40 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
case Intrinsic::arm_neon_vld1: {
unsigned DOpcodes[] = { ARM::VLD1d8, ARM::VLD1d16,
ARM::VLD1d32, ARM::VLD1d64 };
- unsigned QOpcodes[] = { ARM::VLD1q8, ARM::VLD1q16,
- ARM::VLD1q32, ARM::VLD1q64 };
+ unsigned QOpcodes[] = { ARM::VLD1q8Pseudo, ARM::VLD1q16Pseudo,
+ ARM::VLD1q32Pseudo, ARM::VLD1q64Pseudo };
return SelectVLD(N, 1, DOpcodes, QOpcodes, 0);
}
case Intrinsic::arm_neon_vld2: {
- unsigned DOpcodes[] = { ARM::VLD2d8, ARM::VLD2d16,
- ARM::VLD2d32, ARM::VLD1q64 };
- unsigned QOpcodes[] = { ARM::VLD2q8, ARM::VLD2q16, ARM::VLD2q32 };
+ unsigned DOpcodes[] = { ARM::VLD2d8Pseudo, ARM::VLD2d16Pseudo,
+ ARM::VLD2d32Pseudo, ARM::VLD1q64Pseudo };
+ unsigned QOpcodes[] = { ARM::VLD2q8Pseudo, ARM::VLD2q16Pseudo,
+ ARM::VLD2q32Pseudo };
return SelectVLD(N, 2, DOpcodes, QOpcodes, 0);
}
case Intrinsic::arm_neon_vld3: {
- unsigned DOpcodes[] = { ARM::VLD3d8, ARM::VLD3d16,
- ARM::VLD3d32, ARM::VLD1d64T };
- unsigned QOpcodes0[] = { ARM::VLD3q8_UPD,
- ARM::VLD3q16_UPD,
- ARM::VLD3q32_UPD };
- unsigned QOpcodes1[] = { ARM::VLD3q8odd_UPD,
- ARM::VLD3q16odd_UPD,
- ARM::VLD3q32odd_UPD };
+ unsigned DOpcodes[] = { ARM::VLD3d8Pseudo, ARM::VLD3d16Pseudo,
+ ARM::VLD3d32Pseudo, ARM::VLD1d64TPseudo };
+ unsigned QOpcodes0[] = { ARM::VLD3q8Pseudo_UPD,
+ ARM::VLD3q16Pseudo_UPD,
+ ARM::VLD3q32Pseudo_UPD };
+ unsigned QOpcodes1[] = { ARM::VLD3q8oddPseudo_UPD,
+ ARM::VLD3q16oddPseudo_UPD,
+ ARM::VLD3q32oddPseudo_UPD };
return SelectVLD(N, 3, DOpcodes, QOpcodes0, QOpcodes1);
}
case Intrinsic::arm_neon_vld4: {
- unsigned DOpcodes[] = { ARM::VLD4d8, ARM::VLD4d16,
- ARM::VLD4d32, ARM::VLD1d64Q };
- unsigned QOpcodes0[] = { ARM::VLD4q8_UPD,
- ARM::VLD4q16_UPD,
- ARM::VLD4q32_UPD };
- unsigned QOpcodes1[] = { ARM::VLD4q8odd_UPD,
- ARM::VLD4q16odd_UPD,
- ARM::VLD4q32odd_UPD };
+ unsigned DOpcodes[] = { ARM::VLD4d8Pseudo, ARM::VLD4d16Pseudo,
+ ARM::VLD4d32Pseudo, ARM::VLD1d64QPseudo };
+ unsigned QOpcodes0[] = { ARM::VLD4q8Pseudo_UPD,
+ ARM::VLD4q16Pseudo_UPD,
+ ARM::VLD4q32Pseudo_UPD };
+ unsigned QOpcodes1[] = { ARM::VLD4q8oddPseudo_UPD,
+ ARM::VLD4q16oddPseudo_UPD,
+ ARM::VLD4q32oddPseudo_UPD };
return SelectVLD(N, 4, DOpcodes, QOpcodes0, QOpcodes1);
}
@@ -2266,39 +2172,40 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
case Intrinsic::arm_neon_vst1: {
unsigned DOpcodes[] = { ARM::VST1d8, ARM::VST1d16,
ARM::VST1d32, ARM::VST1d64 };
- unsigned QOpcodes[] = { ARM::VST1q8, ARM::VST1q16,
- ARM::VST1q32, ARM::VST1q64 };
+ unsigned QOpcodes[] = { ARM::VST1q8Pseudo, ARM::VST1q16Pseudo,
+ ARM::VST1q32Pseudo, ARM::VST1q64Pseudo };
return SelectVST(N, 1, DOpcodes, QOpcodes, 0);
}
case Intrinsic::arm_neon_vst2: {
- unsigned DOpcodes[] = { ARM::VST2d8, ARM::VST2d16,
- ARM::VST2d32, ARM::VST1q64 };
- unsigned QOpcodes[] = { ARM::VST2q8, ARM::VST2q16, ARM::VST2q32 };
+ unsigned DOpcodes[] = { ARM::VST2d8Pseudo, ARM::VST2d16Pseudo,
+ ARM::VST2d32Pseudo, ARM::VST1q64Pseudo };
+ unsigned QOpcodes[] = { ARM::VST2q8Pseudo, ARM::VST2q16Pseudo,
+ ARM::VST2q32Pseudo };
return SelectVST(N, 2, DOpcodes, QOpcodes, 0);
}
case Intrinsic::arm_neon_vst3: {
- unsigned DOpcodes[] = { ARM::VST3d8, ARM::VST3d16,
- ARM::VST3d32, ARM::VST1d64T };
- unsigned QOpcodes0[] = { ARM::VST3q8_UPD,
- ARM::VST3q16_UPD,
- ARM::VST3q32_UPD };
- unsigned QOpcodes1[] = { ARM::VST3q8odd_UPD,
- ARM::VST3q16odd_UPD,
- ARM::VST3q32odd_UPD };
+ unsigned DOpcodes[] = { ARM::VST3d8Pseudo, ARM::VST3d16Pseudo,
+ ARM::VST3d32Pseudo, ARM::VST1d64TPseudo };
+ unsigned QOpcodes0[] = { ARM::VST3q8Pseudo_UPD,
+ ARM::VST3q16Pseudo_UPD,
+ ARM::VST3q32Pseudo_UPD };
+ unsigned QOpcodes1[] = { ARM::VST3q8oddPseudo_UPD,
+ ARM::VST3q16oddPseudo_UPD,
+ ARM::VST3q32oddPseudo_UPD };
return SelectVST(N, 3, DOpcodes, QOpcodes0, QOpcodes1);
}
case Intrinsic::arm_neon_vst4: {
- unsigned DOpcodes[] = { ARM::VST4d8, ARM::VST4d16,
- ARM::VST4d32, ARM::VST1d64Q };
- unsigned QOpcodes0[] = { ARM::VST4q8_UPD,
- ARM::VST4q16_UPD,
- ARM::VST4q32_UPD };
- unsigned QOpcodes1[] = { ARM::VST4q8odd_UPD,
- ARM::VST4q16odd_UPD,
- ARM::VST4q32odd_UPD };
+ unsigned DOpcodes[] = { ARM::VST4d8Pseudo, ARM::VST4d16Pseudo,
+ ARM::VST4d32Pseudo, ARM::VST1d64QPseudo };
+ unsigned QOpcodes0[] = { ARM::VST4q8Pseudo_UPD,
+ ARM::VST4q16Pseudo_UPD,
+ ARM::VST4q32Pseudo_UPD };
+ unsigned QOpcodes1[] = { ARM::VST4q8oddPseudo_UPD,
+ ARM::VST4q16oddPseudo_UPD,
+ ARM::VST4q32oddPseudo_UPD };
return SelectVST(N, 4, DOpcodes, QOpcodes0, QOpcodes1);
}
diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp
index 0091df753eb7..ce4a2c90689c 100644
--- a/lib/Target/ARM/ARMISelLowering.cpp
+++ b/lib/Target/ARM/ARMISelLowering.cpp
@@ -55,7 +55,14 @@ STATISTIC(NumTailCalls, "Number of tail calls");
static cl::opt<bool>
EnableARMTailCalls("arm-tail-calls", cl::Hidden,
cl::desc("Generate tail calls (TEMPORARY OPTION)."),
- cl::init(true));
+ cl::init(false));
+
+// This option should go away when Machine LICM is smart enough to hoist a
+// reg-to-reg VDUP.
+static cl::opt<bool>
+EnableARMVDUPsplat("arm-vdup-splat", cl::Hidden,
+ cl::desc("Generate VDUP for integer constant splats (TEMPORARY OPTION)."),
+ cl::init(false));
static cl::opt<bool>
EnableARMLongCalls("arm-long-calls", cl::Hidden,
@@ -122,7 +129,10 @@ void ARMTargetLowering::addTypeForNEON(EVT VT, EVT PromotedLdStVT,
setOperationAction(ISD::SHL, VT.getSimpleVT(), Custom);
setOperationAction(ISD::SRA, VT.getSimpleVT(), Custom);
setOperationAction(ISD::SRL, VT.getSimpleVT(), Custom);
+ setLoadExtAction(ISD::SEXTLOAD, VT.getSimpleVT(), Expand);
+ setLoadExtAction(ISD::ZEXTLOAD, VT.getSimpleVT(), Expand);
}
+ setLoadExtAction(ISD::EXTLOAD, VT.getSimpleVT(), Expand);
// Promote all bit-wise operations.
if (VT.isInteger() && VT != PromotedBitwiseVT) {
@@ -166,6 +176,7 @@ static TargetLoweringObjectFile *createTLOF(TargetMachine &TM) {
ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
: TargetLowering(TM, createTLOF(TM)) {
Subtarget = &TM.getSubtarget<ARMSubtarget>();
+ RegInfo = TM.getRegisterInfo();
if (Subtarget->isTargetDarwin()) {
// Uses VFP for Thumb libfuncs if available.
@@ -264,7 +275,8 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
addRegisterClass(MVT::i32, ARM::GPRRegisterClass);
if (!UseSoftFloat && Subtarget->hasVFP2() && !Subtarget->isThumb1Only()) {
addRegisterClass(MVT::f32, ARM::SPRRegisterClass);
- addRegisterClass(MVT::f64, ARM::DPRRegisterClass);
+ if (!Subtarget->isFPOnlySP())
+ addRegisterClass(MVT::f64, ARM::DPRRegisterClass);
setTruncStoreAction(MVT::f64, MVT::f32, Expand);
}
@@ -310,9 +322,14 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
setOperationAction(ISD::FNEARBYINT, MVT::v2f64, Expand);
setOperationAction(ISD::FFLOOR, MVT::v2f64, Expand);
+ setTruncStoreAction(MVT::v2f64, MVT::v2f32, Expand);
+
// Neon does not support some operations on v1i64 and v2i64 types.
setOperationAction(ISD::MUL, MVT::v1i64, Expand);
- setOperationAction(ISD::MUL, MVT::v2i64, Expand);
+ // Custom handling for some quad-vector types to detect VMULL.
+ setOperationAction(ISD::MUL, MVT::v8i16, Custom);
+ setOperationAction(ISD::MUL, MVT::v4i32, Custom);
+ setOperationAction(ISD::MUL, MVT::v2i64, Custom);
setOperationAction(ISD::VSETCC, MVT::v1i64, Expand);
setOperationAction(ISD::VSETCC, MVT::v2i64, Expand);
@@ -410,12 +427,10 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
// doesn't yet know how to not do that for SjLj.
setExceptionSelectorRegister(ARM::R0);
setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Expand);
- // Handle atomics directly for ARMv[67] (except for Thumb1), otherwise
- // use the default expansion.
- bool canHandleAtomics =
- (Subtarget->hasV7Ops() ||
- (Subtarget->hasV6Ops() && !Subtarget->isThumb1Only()));
- if (canHandleAtomics) {
+ // ARMv6 Thumb1 (except for CPUs that support dmb / dsb) and earlier use
+ // the default expansion.
+ if (Subtarget->hasDataBarrier() ||
+ (Subtarget->hasV6Ops() && !Subtarget->isThumb1Only())) {
// membarrier needs custom lowering; the rest are legal and handled
// normally.
setOperationAction(ISD::MEMBARRIER, MVT::Other, Custom);
@@ -466,10 +481,12 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
}
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
- if (!UseSoftFloat && Subtarget->hasVFP2() && !Subtarget->isThumb1Only())
+ if (!UseSoftFloat && Subtarget->hasVFP2() && !Subtarget->isThumb1Only()) {
// Turn f64->i64 into VMOVRRD, i64 -> f64 to VMOVDRR
// iff target supports vfp2.
setOperationAction(ISD::BIT_CONVERT, MVT::i64, Custom);
+ setOperationAction(ISD::FLT_ROUNDS_, MVT::i32, Custom);
+ }
// We want to custom lower some of our intrinsics.
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
@@ -481,9 +498,9 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
setOperationAction(ISD::SETCC, MVT::i32, Expand);
setOperationAction(ISD::SETCC, MVT::f32, Expand);
setOperationAction(ISD::SETCC, MVT::f64, Expand);
- setOperationAction(ISD::SELECT, MVT::i32, Expand);
- setOperationAction(ISD::SELECT, MVT::f32, Expand);
- setOperationAction(ISD::SELECT, MVT::f64, Expand);
+ setOperationAction(ISD::SELECT, MVT::i32, Custom);
+ setOperationAction(ISD::SELECT, MVT::f32, Custom);
+ setOperationAction(ISD::SELECT, MVT::f64, Custom);
setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
setOperationAction(ISD::SELECT_CC, MVT::f64, Custom);
@@ -530,6 +547,9 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
setTargetDAGCombine(ISD::SUB);
setTargetDAGCombine(ISD::MUL);
+ if (Subtarget->hasV6T2Ops())
+ setTargetDAGCombine(ISD::OR);
+
setStackPointerRegisterToSaveRestore(ARM::SP);
if (UseSoftFloat || Subtarget->isThumb1Only() || !Subtarget->hasVFP2())
@@ -547,6 +567,37 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
benefitFromCodePlacementOpt = true;
}
+std::pair<const TargetRegisterClass*, uint8_t>
+ARMTargetLowering::findRepresentativeClass(EVT VT) const{
+ const TargetRegisterClass *RRC = 0;
+ uint8_t Cost = 1;
+ switch (VT.getSimpleVT().SimpleTy) {
+ default:
+ return TargetLowering::findRepresentativeClass(VT);
+ // Use DPR as representative register class for all floating point
+ // and vector types. Since there are 32 SPR registers and 32 DPR registers so
+ // the cost is 1 for both f32 and f64.
+ case MVT::f32: case MVT::f64: case MVT::v8i8: case MVT::v4i16:
+ case MVT::v2i32: case MVT::v1i64: case MVT::v2f32:
+ RRC = ARM::DPRRegisterClass;
+ break;
+ case MVT::v16i8: case MVT::v8i16: case MVT::v4i32: case MVT::v2i64:
+ case MVT::v4f32: case MVT::v2f64:
+ RRC = ARM::DPRRegisterClass;
+ Cost = 2;
+ break;
+ case MVT::v4i64:
+ RRC = ARM::DPRRegisterClass;
+ Cost = 4;
+ break;
+ case MVT::v8i64:
+ RRC = ARM::DPRRegisterClass;
+ Cost = 8;
+ break;
+ }
+ return std::make_pair(RRC, Cost);
+}
+
const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const {
switch (Opcode) {
default: return 0;
@@ -561,6 +612,7 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const {
case ARMISD::BR2_JT: return "ARMISD::BR2_JT";
case ARMISD::RET_FLAG: return "ARMISD::RET_FLAG";
case ARMISD::PIC_ADD: return "ARMISD::PIC_ADD";
+ case ARMISD::AND: return "ARMISD::AND";
case ARMISD::CMP: return "ARMISD::CMP";
case ARMISD::CMPZ: return "ARMISD::CMPZ";
case ARMISD::CMPFP: return "ARMISD::CMPFP";
@@ -635,9 +687,12 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const {
case ARMISD::VZIP: return "ARMISD::VZIP";
case ARMISD::VUZP: return "ARMISD::VUZP";
case ARMISD::VTRN: return "ARMISD::VTRN";
+ case ARMISD::VMULLs: return "ARMISD::VMULLs";
+ case ARMISD::VMULLu: return "ARMISD::VMULLu";
case ARMISD::BUILD_VECTOR: return "ARMISD::BUILD_VECTOR";
case ARMISD::FMAX: return "ARMISD::FMAX";
case ARMISD::FMIN: return "ARMISD::FMIN";
+ case ARMISD::BFI: return "ARMISD::BFI";
}
}
@@ -656,11 +711,23 @@ TargetRegisterClass *ARMTargetLowering::getRegClassFor(EVT VT) const {
return TargetLowering::getRegClassFor(VT);
}
+// Create a fast isel object.
+FastISel *
+ARMTargetLowering::createFastISel(FunctionLoweringInfo &funcInfo) const {
+ return ARM::createFastISel(funcInfo);
+}
+
/// getFunctionAlignment - Return the Log2 alignment of this function.
unsigned ARMTargetLowering::getFunctionAlignment(const Function *F) const {
return getTargetMachine().getSubtarget<ARMSubtarget>().isThumb() ? 1 : 2;
}
+/// getMaximalGlobalOffset - Returns the maximal possible offset which can
+/// be used for loads / stores from the global.
+unsigned ARMTargetLowering::getMaximalGlobalOffset() const {
+ return (Subtarget->isThumb1Only() ? 127 : 4095);
+}
+
Sched::Preference ARMTargetLowering::getSchedulingPreference(SDNode *N) const {
unsigned NumVals = N->getNumValues();
if (!NumVals)
@@ -688,6 +755,24 @@ Sched::Preference ARMTargetLowering::getSchedulingPreference(SDNode *N) const {
return Sched::RegPressure;
}
+unsigned
+ARMTargetLowering::getRegPressureLimit(const TargetRegisterClass *RC,
+ MachineFunction &MF) const {
+ switch (RC->getID()) {
+ default:
+ return 0;
+ case ARM::tGPRRegClassID:
+ return RegInfo->hasFP(MF) ? 4 : 5;
+ case ARM::GPRRegClassID: {
+ unsigned FP = RegInfo->hasFP(MF) ? 1 : 0;
+ return 10 - FP - (Subtarget->isR9Reserved() ? 1 : 0);
+ }
+ case ARM::SPRRegClassID: // Currently not used as 'rep' register class.
+ case ARM::DPRRegClassID:
+ return 32 - 10;
+ }
+}
+
//===----------------------------------------------------------------------===//
// Lowering Code
//===----------------------------------------------------------------------===//
@@ -793,8 +878,9 @@ static bool f64AssignAAPCS(unsigned &ValNo, EVT &ValVT, EVT &LocVT,
CCState &State, bool CanFail) {
static const unsigned HiRegList[] = { ARM::R0, ARM::R2 };
static const unsigned LoRegList[] = { ARM::R1, ARM::R3 };
+ static const unsigned ShadowRegList[] = { ARM::R0, ARM::R1 };
- unsigned Reg = State.AllocateReg(HiRegList, LoRegList, 2);
+ unsigned Reg = State.AllocateReg(HiRegList, ShadowRegList, 2);
if (Reg == 0) {
// For the 2nd half of a v2f64, do not just fail.
if (CanFail)
@@ -812,6 +898,10 @@ static bool f64AssignAAPCS(unsigned &ValNo, EVT &ValVT, EVT &LocVT,
if (HiRegList[i] == Reg)
break;
+ unsigned T = State.AllocateReg(LoRegList[i]);
+ (void)T;
+ assert(T == LoRegList[i] && "Could not allocate register");
+
State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, Reg, LocVT, LocInfo));
State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, LoRegList[i],
LocVT, LocInfo));
@@ -1624,6 +1714,10 @@ static SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) {
return DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Res);
}
+unsigned ARMTargetLowering::getJumpTableEncoding() const {
+ return MachineJumpTableInfo::EK_Inline;
+}
+
SDValue ARMTargetLowering::LowerBlockAddress(SDValue Op,
SelectionDAG &DAG) const {
MachineFunction &MF = DAG.getMachineFunction();
@@ -1917,17 +2011,19 @@ static SDValue LowerMEMBARRIER(SDValue Op, SelectionDAG &DAG,
DebugLoc dl = Op.getDebugLoc();
SDValue Op5 = Op.getOperand(5);
unsigned isDeviceBarrier = cast<ConstantSDNode>(Op5)->getZExtValue();
- // v6 and v7 can both handle barriers directly, but need handled a bit
- // differently. Thumb1 and pre-v6 ARM mode use a libcall instead and should
+ // Some subtargets which have dmb and dsb instructions can handle barriers
+ // directly. Some ARMv6 cpus can support them with the help of mcr
+ // instruction. Thumb1 and pre-v6 ARM mode use a libcall instead and should
// never get here.
unsigned Opc = isDeviceBarrier ? ARMISD::SYNCBARRIER : ARMISD::MEMBARRIER;
- if (Subtarget->hasV7Ops())
+ if (Subtarget->hasDataBarrier())
return DAG.getNode(Opc, dl, MVT::Other, Op.getOperand(0));
- else if (Subtarget->hasV6Ops() && !Subtarget->isThumb1Only())
+ else {
+ assert(Subtarget->hasV6Ops() && !Subtarget->isThumb1Only() &&
+ "Unexpected ISD::MEMBARRIER encountered. Should be libcall!");
return DAG.getNode(Opc, dl, MVT::Other, Op.getOperand(0),
DAG.getConstant(0, MVT::i32));
- assert(0 && "Unexpected ISD::MEMBARRIER encountered. Should be libcall!");
- return SDValue();
+ }
}
static SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) {
@@ -1945,54 +2041,6 @@ static SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) {
}
SDValue
-ARMTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
- SelectionDAG &DAG) const {
- SDNode *Node = Op.getNode();
- DebugLoc dl = Node->getDebugLoc();
- EVT VT = Node->getValueType(0);
- SDValue Chain = Op.getOperand(0);
- SDValue Size = Op.getOperand(1);
- SDValue Align = Op.getOperand(2);
-
- // Chain the dynamic stack allocation so that it doesn't modify the stack
- // pointer when other instructions are using the stack.
- Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(0, true));
-
- unsigned AlignVal = cast<ConstantSDNode>(Align)->getZExtValue();
- unsigned StackAlign = getTargetMachine().getFrameInfo()->getStackAlignment();
- if (AlignVal > StackAlign)
- // Do this now since selection pass cannot introduce new target
- // independent node.
- Align = DAG.getConstant(-(uint64_t)AlignVal, VT);
-
- // In Thumb1 mode, there isn't a "sub r, sp, r" instruction, we will end up
- // using a "add r, sp, r" instead. Negate the size now so we don't have to
- // do even more horrible hack later.
- MachineFunction &MF = DAG.getMachineFunction();
- ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
- if (AFI->isThumb1OnlyFunction()) {
- bool Negate = true;
- ConstantSDNode *C = dyn_cast<ConstantSDNode>(Size);
- if (C) {
- uint32_t Val = C->getZExtValue();
- if (Val <= 508 && ((Val & 3) == 0))
- Negate = false;
- }
- if (Negate)
- Size = DAG.getNode(ISD::SUB, dl, VT, DAG.getConstant(0, VT), Size);
- }
-
- SDVTList VTList = DAG.getVTList(VT, MVT::Other);
- SDValue Ops1[] = { Chain, Size, Align };
- SDValue Res = DAG.getNode(ARMISD::DYN_ALLOC, dl, VTList, Ops1, 3);
- Chain = Res.getValue(1);
- Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(0, true),
- DAG.getIntPtrConstant(0, true), SDValue());
- SDValue Ops2[] = { Res, Chain };
- return DAG.getMergeValues(Ops2, 2, dl);
-}
-
-SDValue
ARMTargetLowering::GetF64FormalArgument(CCValAssign &VA, CCValAssign &NextVA,
SDValue &Root, SelectionDAG &DAG,
DebugLoc dl) const {
@@ -2229,28 +2277,28 @@ ARMTargetLowering::getARMCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,
default: break;
case ISD::SETLT:
case ISD::SETGE:
- if (isLegalICmpImmediate(C-1)) {
+ if (C != 0x80000000 && isLegalICmpImmediate(C-1)) {
CC = (CC == ISD::SETLT) ? ISD::SETLE : ISD::SETGT;
RHS = DAG.getConstant(C-1, MVT::i32);
}
break;
case ISD::SETULT:
case ISD::SETUGE:
- if (C > 0 && isLegalICmpImmediate(C-1)) {
+ if (C != 0 && isLegalICmpImmediate(C-1)) {
CC = (CC == ISD::SETULT) ? ISD::SETULE : ISD::SETUGT;
RHS = DAG.getConstant(C-1, MVT::i32);
}
break;
case ISD::SETLE:
case ISD::SETGT:
- if (isLegalICmpImmediate(C+1)) {
+ if (C != 0x7fffffff && isLegalICmpImmediate(C+1)) {
CC = (CC == ISD::SETLE) ? ISD::SETLT : ISD::SETGE;
RHS = DAG.getConstant(C+1, MVT::i32);
}
break;
case ISD::SETULE:
case ISD::SETUGT:
- if (C < 0xffffffff && isLegalICmpImmediate(C+1)) {
+ if (C != 0xffffffff && isLegalICmpImmediate(C+1)) {
CC = (CC == ISD::SETULE) ? ISD::SETULT : ISD::SETUGE;
RHS = DAG.getConstant(C+1, MVT::i32);
}
@@ -2287,6 +2335,52 @@ ARMTargetLowering::getVFPCmp(SDValue LHS, SDValue RHS, SelectionDAG &DAG,
return DAG.getNode(ARMISD::FMSTAT, dl, MVT::Flag, Cmp);
}
+SDValue ARMTargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
+ SDValue Cond = Op.getOperand(0);
+ SDValue SelectTrue = Op.getOperand(1);
+ SDValue SelectFalse = Op.getOperand(2);
+ DebugLoc dl = Op.getDebugLoc();
+
+ // Convert:
+ //
+ // (select (cmov 1, 0, cond), t, f) -> (cmov t, f, cond)
+ // (select (cmov 0, 1, cond), t, f) -> (cmov f, t, cond)
+ //
+ if (Cond.getOpcode() == ARMISD::CMOV && Cond.hasOneUse()) {
+ const ConstantSDNode *CMOVTrue =
+ dyn_cast<ConstantSDNode>(Cond.getOperand(0));
+ const ConstantSDNode *CMOVFalse =
+ dyn_cast<ConstantSDNode>(Cond.getOperand(1));
+
+ if (CMOVTrue && CMOVFalse) {
+ unsigned CMOVTrueVal = CMOVTrue->getZExtValue();
+ unsigned CMOVFalseVal = CMOVFalse->getZExtValue();
+
+ SDValue True;
+ SDValue False;
+ if (CMOVTrueVal == 1 && CMOVFalseVal == 0) {
+ True = SelectTrue;
+ False = SelectFalse;
+ } else if (CMOVTrueVal == 0 && CMOVFalseVal == 1) {
+ True = SelectFalse;
+ False = SelectTrue;
+ }
+
+ if (True.getNode() && False.getNode()) {
+ EVT VT = Cond.getValueType();
+ SDValue ARMcc = Cond.getOperand(2);
+ SDValue CCR = Cond.getOperand(3);
+ SDValue Cmp = Cond.getOperand(4);
+ return DAG.getNode(ARMISD::CMOV, dl, VT, True, False, ARMcc, CCR, Cmp);
+ }
+ }
+ }
+
+ return DAG.getSelectCC(dl, Cond,
+ DAG.getConstant(0, Cond.getValueType()),
+ SelectTrue, SelectFalse, ISD::SETNE);
+}
+
SDValue ARMTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
EVT VT = Op.getValueType();
SDValue LHS = Op.getOperand(0);
@@ -2403,8 +2497,9 @@ ARMTargetLowering::OptimizeVFPBrcond(SDValue Op, SelectionDAG &DAG) const {
bool SeenZero = false;
if (canChangeToInt(LHS, SeenZero, Subtarget) &&
canChangeToInt(RHS, SeenZero, Subtarget) &&
- // If one of the operand is zero, it's safe to ignore the NaN case.
- (FiniteOnlyFPMath() || SeenZero)) {
+ // If one of the operand is zero, it's safe to ignore the NaN case since
+ // we only care about equality comparisons.
+ (SeenZero || (DAG.isKnownNeverNaN(LHS) && DAG.isKnownNeverNaN(RHS)))) {
// If unsafe fp math optimization is enabled and there are no othter uses of
// the CMP operands, and the condition code is EQ oe NE, we can optimize it
// to an integer comparison.
@@ -2587,7 +2682,7 @@ SDValue ARMTargetLowering::LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const{
}
// Return LR, which contains the return address. Mark it an implicit live-in.
- unsigned Reg = MF.addLiveIn(ARM::LR, ARM::GPRRegisterClass);
+ unsigned Reg = MF.addLiveIn(ARM::LR, getRegClassFor(MVT::i32));
return DAG.getCopyFromReg(DAG.getEntryNode(), dl, Reg, VT);
}
@@ -2730,6 +2825,24 @@ SDValue ARMTargetLowering::LowerShiftLeftParts(SDValue Op,
return DAG.getMergeValues(Ops, 2, dl);
}
+SDValue ARMTargetLowering::LowerFLT_ROUNDS_(SDValue Op,
+ SelectionDAG &DAG) const {
+ // The rounding mode is in bits 23:22 of the FPSCR.
+ // The ARM rounding mode value to FLT_ROUNDS mapping is 0->1, 1->2, 2->3, 3->0
+ // The formula we use to implement this is (((FPSCR + 1 << 22) >> 22) & 3)
+ // so that the shift + and get folded into a bitfield extract.
+ DebugLoc dl = Op.getDebugLoc();
+ SDValue FPSCR = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::i32,
+ DAG.getConstant(Intrinsic::arm_get_fpscr,
+ MVT::i32));
+ SDValue FltRounds = DAG.getNode(ISD::ADD, dl, MVT::i32, FPSCR,
+ DAG.getConstant(1U << 22, MVT::i32));
+ SDValue RMODE = DAG.getNode(ISD::SRL, dl, MVT::i32, FltRounds,
+ DAG.getConstant(22, MVT::i32));
+ return DAG.getNode(ISD::AND, dl, MVT::i32, RMODE,
+ DAG.getConstant(3, MVT::i32));
+}
+
static SDValue LowerCTTZ(SDNode *N, SelectionDAG &DAG,
const ARMSubtarget *ST) {
EVT VT = N->getValueType(0);
@@ -3046,6 +3159,11 @@ static bool isVEXTMask(const SmallVectorImpl<int> &M, EVT VT,
bool &ReverseVEXT, unsigned &Imm) {
unsigned NumElts = VT.getVectorNumElements();
ReverseVEXT = false;
+
+ // Assume that the first shuffle index is not UNDEF. Fail if it is.
+ if (M[0] < 0)
+ return false;
+
Imm = M[0];
// If this is a VEXT shuffle, the immediate value is the index of the first
@@ -3061,6 +3179,7 @@ static bool isVEXTMask(const SmallVectorImpl<int> &M, EVT VT,
ReverseVEXT = true;
}
+ if (M[i] < 0) continue; // ignore UNDEF indices
if (ExpectedElt != static_cast<unsigned>(M[i]))
return false;
}
@@ -3086,13 +3205,16 @@ static bool isVREVMask(const SmallVectorImpl<int> &M, EVT VT,
unsigned NumElts = VT.getVectorNumElements();
unsigned BlockElts = M[0] + 1;
+ // If the first shuffle index is UNDEF, be optimistic.
+ if (M[0] < 0)
+ BlockElts = BlockSize / EltSz;
if (BlockSize <= EltSz || BlockSize != BlockElts * EltSz)
return false;
for (unsigned i = 0; i < NumElts; ++i) {
- if ((unsigned) M[i] !=
- (i - i%BlockElts) + (BlockElts - 1 - i%BlockElts))
+ if (M[i] < 0) continue; // ignore UNDEF indices
+ if ((unsigned) M[i] != (i - i%BlockElts) + (BlockElts - 1 - i%BlockElts))
return false;
}
@@ -3108,8 +3230,8 @@ static bool isVTRNMask(const SmallVectorImpl<int> &M, EVT VT,
unsigned NumElts = VT.getVectorNumElements();
WhichResult = (M[0] == 0 ? 0 : 1);
for (unsigned i = 0; i < NumElts; i += 2) {
- if ((unsigned) M[i] != i + WhichResult ||
- (unsigned) M[i+1] != i + NumElts + WhichResult)
+ if ((M[i] >= 0 && (unsigned) M[i] != i + WhichResult) ||
+ (M[i+1] >= 0 && (unsigned) M[i+1] != i + NumElts + WhichResult))
return false;
}
return true;
@@ -3127,8 +3249,8 @@ static bool isVTRN_v_undef_Mask(const SmallVectorImpl<int> &M, EVT VT,
unsigned NumElts = VT.getVectorNumElements();
WhichResult = (M[0] == 0 ? 0 : 1);
for (unsigned i = 0; i < NumElts; i += 2) {
- if ((unsigned) M[i] != i + WhichResult ||
- (unsigned) M[i+1] != i + WhichResult)
+ if ((M[i] >= 0 && (unsigned) M[i] != i + WhichResult) ||
+ (M[i+1] >= 0 && (unsigned) M[i+1] != i + WhichResult))
return false;
}
return true;
@@ -3143,6 +3265,7 @@ static bool isVUZPMask(const SmallVectorImpl<int> &M, EVT VT,
unsigned NumElts = VT.getVectorNumElements();
WhichResult = (M[0] == 0 ? 0 : 1);
for (unsigned i = 0; i != NumElts; ++i) {
+ if (M[i] < 0) continue; // ignore UNDEF indices
if ((unsigned) M[i] != 2 * i + WhichResult)
return false;
}
@@ -3168,7 +3291,8 @@ static bool isVUZP_v_undef_Mask(const SmallVectorImpl<int> &M, EVT VT,
for (unsigned j = 0; j != 2; ++j) {
unsigned Idx = WhichResult;
for (unsigned i = 0; i != Half; ++i) {
- if ((unsigned) M[i + j * Half] != Idx)
+ int MIdx = M[i + j * Half];
+ if (MIdx >= 0 && (unsigned) MIdx != Idx)
return false;
Idx += 2;
}
@@ -3191,8 +3315,8 @@ static bool isVZIPMask(const SmallVectorImpl<int> &M, EVT VT,
WhichResult = (M[0] == 0 ? 0 : 1);
unsigned Idx = WhichResult * NumElts / 2;
for (unsigned i = 0; i != NumElts; i += 2) {
- if ((unsigned) M[i] != Idx ||
- (unsigned) M[i+1] != Idx + NumElts)
+ if ((M[i] >= 0 && (unsigned) M[i] != Idx) ||
+ (M[i+1] >= 0 && (unsigned) M[i+1] != Idx + NumElts))
return false;
Idx += 1;
}
@@ -3217,8 +3341,8 @@ static bool isVZIP_v_undef_Mask(const SmallVectorImpl<int> &M, EVT VT,
WhichResult = (M[0] == 0 ? 0 : 1);
unsigned Idx = WhichResult * NumElts / 2;
for (unsigned i = 0; i != NumElts; i += 2) {
- if ((unsigned) M[i] != Idx ||
- (unsigned) M[i+1] != Idx)
+ if ((M[i] >= 0 && (unsigned) M[i] != Idx) ||
+ (M[i+1] >= 0 && (unsigned) M[i+1] != Idx))
return false;
Idx += 1;
}
@@ -3230,9 +3354,30 @@ static bool isVZIP_v_undef_Mask(const SmallVectorImpl<int> &M, EVT VT,
return true;
}
+// If N is an integer constant that can be moved into a register in one
+// instruction, return an SDValue of such a constant (will become a MOV
+// instruction). Otherwise return null.
+static SDValue IsSingleInstrConstant(SDValue N, SelectionDAG &DAG,
+ const ARMSubtarget *ST, DebugLoc dl) {
+ uint64_t Val;
+ if (!isa<ConstantSDNode>(N))
+ return SDValue();
+ Val = cast<ConstantSDNode>(N)->getZExtValue();
+
+ if (ST->isThumb1Only()) {
+ if (Val <= 255 || ~Val <= 255)
+ return DAG.getConstant(Val, MVT::i32);
+ } else {
+ if (ARM_AM::getSOImmVal(Val) != -1 || ARM_AM::getSOImmVal(~Val) != -1)
+ return DAG.getConstant(Val, MVT::i32);
+ }
+ return SDValue();
+}
+
// If this is a case we can't handle, return null and let the default
// expansion code take care of it.
-static SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) {
+static SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
+ const ARMSubtarget *ST) {
BuildVectorSDNode *BVN = cast<BuildVectorSDNode>(Op.getNode());
DebugLoc dl = Op.getDebugLoc();
EVT VT = Op.getValueType();
@@ -3292,15 +3437,41 @@ static SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) {
if (isOnlyLowElement)
return DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Value);
- // If all elements are constants, fall back to the default expansion, which
- // will generate a load from the constant pool.
+ unsigned EltSize = VT.getVectorElementType().getSizeInBits();
+
+ if (EnableARMVDUPsplat) {
+ // Use VDUP for non-constant splats. For f32 constant splats, reduce to
+ // i32 and try again.
+ if (usesOnlyOneValue && EltSize <= 32) {
+ if (!isConstant)
+ return DAG.getNode(ARMISD::VDUP, dl, VT, Value);
+ if (VT.getVectorElementType().isFloatingPoint()) {
+ SmallVector<SDValue, 8> Ops;
+ for (unsigned i = 0; i < NumElts; ++i)
+ Ops.push_back(DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32,
+ Op.getOperand(i)));
+ SDValue Val = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, &Ops[0],
+ NumElts);
+ return DAG.getNode(ISD::BIT_CONVERT, dl, VT,
+ LowerBUILD_VECTOR(Val, DAG, ST));
+ }
+ SDValue Val = IsSingleInstrConstant(Value, DAG, ST, dl);
+ if (Val.getNode())
+ return DAG.getNode(ARMISD::VDUP, dl, VT, Val);
+ }
+ }
+
+ // If all elements are constants and the case above didn't get hit, fall back
+ // to the default expansion, which will generate a load from the constant
+ // pool.
if (isConstant)
return SDValue();
- // Use VDUP for non-constant splats.
- unsigned EltSize = VT.getVectorElementType().getSizeInBits();
- if (usesOnlyOneValue && EltSize <= 32)
- return DAG.getNode(ARMISD::VDUP, dl, VT, Value);
+ if (!EnableARMVDUPsplat) {
+ // Use VDUP for non-constant splats.
+ if (usesOnlyOneValue && EltSize <= 32)
+ return DAG.getNode(ARMISD::VDUP, dl, VT, Value);
+ }
// Vectors with 32- or 64-bit elements can be built by directly assigning
// the subregisters. Lower it to an ARMISD::BUILD_VECTOR so the operands
@@ -3585,6 +3756,51 @@ static SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) {
return DAG.getNode(ISD::BIT_CONVERT, dl, Op.getValueType(), Val);
}
+/// SkipExtension - For a node that is either a SIGN_EXTEND, ZERO_EXTEND, or
+/// an extending load, return the unextended value.
+static SDValue SkipExtension(SDNode *N, SelectionDAG &DAG) {
+ if (N->getOpcode() == ISD::SIGN_EXTEND || N->getOpcode() == ISD::ZERO_EXTEND)
+ return N->getOperand(0);
+ LoadSDNode *LD = cast<LoadSDNode>(N);
+ return DAG.getLoad(LD->getMemoryVT(), N->getDebugLoc(), LD->getChain(),
+ LD->getBasePtr(), LD->getSrcValue(),
+ LD->getSrcValueOffset(), LD->isVolatile(),
+ LD->isNonTemporal(), LD->getAlignment());
+}
+
+static SDValue LowerMUL(SDValue Op, SelectionDAG &DAG) {
+ // Multiplications are only custom-lowered for 128-bit vectors so that
+ // VMULL can be detected. Otherwise v2i64 multiplications are not legal.
+ EVT VT = Op.getValueType();
+ assert(VT.is128BitVector() && "unexpected type for custom-lowering ISD::MUL");
+ SDNode *N0 = Op.getOperand(0).getNode();
+ SDNode *N1 = Op.getOperand(1).getNode();
+ unsigned NewOpc = 0;
+ if ((N0->getOpcode() == ISD::SIGN_EXTEND || ISD::isSEXTLoad(N0)) &&
+ (N1->getOpcode() == ISD::SIGN_EXTEND || ISD::isSEXTLoad(N1))) {
+ NewOpc = ARMISD::VMULLs;
+ } else if ((N0->getOpcode() == ISD::ZERO_EXTEND || ISD::isZEXTLoad(N0)) &&
+ (N1->getOpcode() == ISD::ZERO_EXTEND || ISD::isZEXTLoad(N1))) {
+ NewOpc = ARMISD::VMULLu;
+ } else if (VT.getSimpleVT().SimpleTy == MVT::v2i64) {
+ // Fall through to expand this. It is not legal.
+ return SDValue();
+ } else {
+ // Other vector multiplications are legal.
+ return Op;
+ }
+
+ // Legalize to a VMULL instruction.
+ DebugLoc DL = Op.getDebugLoc();
+ SDValue Op0 = SkipExtension(N0, DAG);
+ SDValue Op1 = SkipExtension(N1, DAG);
+
+ assert(Op0.getValueType().is64BitVector() &&
+ Op1.getValueType().is64BitVector() &&
+ "unexpected types for extended operands to VMULL");
+ return DAG.getNode(NewOpc, DL, VT, Op0, Op1);
+}
+
SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
switch (Op.getOpcode()) {
default: llvm_unreachable("Don't know how to custom lower this!");
@@ -3594,10 +3810,10 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
return Subtarget->isTargetDarwin() ? LowerGlobalAddressDarwin(Op, DAG) :
LowerGlobalAddressELF(Op, DAG);
case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG);
+ case ISD::SELECT: return LowerSELECT(Op, DAG);
case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
case ISD::BR_CC: return LowerBR_CC(Op, DAG);
case ISD::BR_JT: return LowerBR_JT(Op, DAG);
- case ISD::DYNAMIC_STACKALLOC: return LowerDYNAMIC_STACKALLOC(Op, DAG);
case ISD::VASTART: return LowerVASTART(Op, DAG);
case ISD::MEMBARRIER: return LowerMEMBARRIER(Op, DAG, Subtarget);
case ISD::SINT_TO_FP:
@@ -3621,10 +3837,12 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::SRA_PARTS: return LowerShiftRightParts(Op, DAG);
case ISD::CTTZ: return LowerCTTZ(Op.getNode(), DAG, Subtarget);
case ISD::VSETCC: return LowerVSETCC(Op, DAG);
- case ISD::BUILD_VECTOR: return LowerBUILD_VECTOR(Op, DAG);
+ case ISD::BUILD_VECTOR: return LowerBUILD_VECTOR(Op, DAG, Subtarget);
case ISD::VECTOR_SHUFFLE: return LowerVECTOR_SHUFFLE(Op, DAG);
case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG);
case ISD::CONCAT_VECTORS: return LowerCONCAT_VECTORS(Op, DAG);
+ case ISD::FLT_ROUNDS_: return LowerFLT_ROUNDS_(Op, DAG);
+ case ISD::MUL: return LowerMUL(Op, DAG);
}
return SDValue();
}
@@ -4002,78 +4220,6 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
MI->eraseFromParent(); // The pseudo instruction is gone now.
return BB;
}
-
- case ARM::tANDsp:
- case ARM::tADDspr_:
- case ARM::tSUBspi_:
- case ARM::t2SUBrSPi_:
- case ARM::t2SUBrSPi12_:
- case ARM::t2SUBrSPs_: {
- MachineFunction *MF = BB->getParent();
- unsigned DstReg = MI->getOperand(0).getReg();
- unsigned SrcReg = MI->getOperand(1).getReg();
- bool DstIsDead = MI->getOperand(0).isDead();
- bool SrcIsKill = MI->getOperand(1).isKill();
-
- if (SrcReg != ARM::SP) {
- // Copy the source to SP from virtual register.
- const TargetRegisterClass *RC = MF->getRegInfo().getRegClass(SrcReg);
- unsigned CopyOpc = (RC == ARM::tGPRRegisterClass)
- ? ARM::tMOVtgpr2gpr : ARM::tMOVgpr2gpr;
- BuildMI(*BB, MI, dl, TII->get(CopyOpc), ARM::SP)
- .addReg(SrcReg, getKillRegState(SrcIsKill));
- }
-
- unsigned OpOpc = 0;
- bool NeedPred = false, NeedCC = false, NeedOp3 = false;
- switch (MI->getOpcode()) {
- default:
- llvm_unreachable("Unexpected pseudo instruction!");
- case ARM::tANDsp:
- OpOpc = ARM::tAND;
- NeedPred = true;
- break;
- case ARM::tADDspr_:
- OpOpc = ARM::tADDspr;
- break;
- case ARM::tSUBspi_:
- OpOpc = ARM::tSUBspi;
- break;
- case ARM::t2SUBrSPi_:
- OpOpc = ARM::t2SUBrSPi;
- NeedPred = true; NeedCC = true;
- break;
- case ARM::t2SUBrSPi12_:
- OpOpc = ARM::t2SUBrSPi12;
- NeedPred = true;
- break;
- case ARM::t2SUBrSPs_:
- OpOpc = ARM::t2SUBrSPs;
- NeedPred = true; NeedCC = true; NeedOp3 = true;
- break;
- }
- MachineInstrBuilder MIB = BuildMI(*BB, MI, dl, TII->get(OpOpc), ARM::SP);
- if (OpOpc == ARM::tAND)
- AddDefaultT1CC(MIB);
- MIB.addReg(ARM::SP);
- MIB.addOperand(MI->getOperand(2));
- if (NeedOp3)
- MIB.addOperand(MI->getOperand(3));
- if (NeedPred)
- AddDefaultPred(MIB);
- if (NeedCC)
- AddDefaultCC(MIB);
-
- // Copy the result from SP to virtual register.
- const TargetRegisterClass *RC = MF->getRegInfo().getRegClass(DstReg);
- unsigned CopyOpc = (RC == ARM::tGPRRegisterClass)
- ? ARM::tMOVgpr2tgpr : ARM::tMOVgpr2gpr;
- BuildMI(*BB, MI, dl, TII->get(CopyOpc))
- .addReg(DstReg, getDefRegState(true) | getDeadRegState(DstIsDead))
- .addReg(ARM::SP);
- MI->eraseFromParent(); // The pseudo instruction is gone now.
- return BB;
- }
}
}
@@ -4141,30 +4287,42 @@ SDValue combineSelectAndUse(SDNode *N, SDValue Slct, SDValue OtherOp,
return SDValue();
}
-/// PerformADDCombine - Target-specific dag combine xforms for ISD::ADD.
-static SDValue PerformADDCombine(SDNode *N,
- TargetLowering::DAGCombinerInfo &DCI) {
- // added by evan in r37685 with no testcase.
- SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
-
+/// PerformADDCombineWithOperands - Try DAG combinations for an ADD with
+/// operands N0 and N1. This is a helper for PerformADDCombine that is
+/// called with the default operands, and if that fails, with commuted
+/// operands.
+static SDValue PerformADDCombineWithOperands(SDNode *N, SDValue N0, SDValue N1,
+ TargetLowering::DAGCombinerInfo &DCI) {
// fold (add (select cc, 0, c), x) -> (select cc, x, (add, x, c))
if (N0.getOpcode() == ISD::SELECT && N0.getNode()->hasOneUse()) {
SDValue Result = combineSelectAndUse(N, N0, N1, DCI);
if (Result.getNode()) return Result;
}
- if (N1.getOpcode() == ISD::SELECT && N1.getNode()->hasOneUse()) {
- SDValue Result = combineSelectAndUse(N, N1, N0, DCI);
- if (Result.getNode()) return Result;
- }
-
return SDValue();
}
+/// PerformADDCombine - Target-specific dag combine xforms for ISD::ADD.
+///
+static SDValue PerformADDCombine(SDNode *N,
+ TargetLowering::DAGCombinerInfo &DCI) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+
+ // First try with the default operand order.
+ SDValue Result = PerformADDCombineWithOperands(N, N0, N1, DCI);
+ if (Result.getNode())
+ return Result;
+
+ // If that didn't work, try again with the operands commuted.
+ return PerformADDCombineWithOperands(N, N1, N0, DCI);
+}
+
/// PerformSUBCombine - Target-specific dag combine xforms for ISD::SUB.
+///
static SDValue PerformSUBCombine(SDNode *N,
TargetLowering::DAGCombinerInfo &DCI) {
- // added by evan in r37685 with no testcase.
- SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
// fold (sub x, (select cc, 0, c)) -> (select cc, x, (sub, x, c))
if (N1.getOpcode() == ISD::SELECT && N1.getNode()->hasOneUse()) {
@@ -4231,6 +4389,105 @@ static SDValue PerformMULCombine(SDNode *N,
return SDValue();
}
+/// PerformORCombine - Target-specific dag combine xforms for ISD::OR
+static SDValue PerformORCombine(SDNode *N,
+ TargetLowering::DAGCombinerInfo &DCI,
+ const ARMSubtarget *Subtarget) {
+ // Try to use the ARM/Thumb2 BFI (bitfield insert) instruction when
+ // reasonable.
+
+ // BFI is only available on V6T2+
+ if (Subtarget->isThumb1Only() || !Subtarget->hasV6T2Ops())
+ return SDValue();
+
+ SelectionDAG &DAG = DCI.DAG;
+ SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
+ DebugLoc DL = N->getDebugLoc();
+ // 1) or (and A, mask), val => ARMbfi A, val, mask
+ // iff (val & mask) == val
+ //
+ // 2) or (and A, mask), (and B, mask2) => ARMbfi A, (lsr B, amt), mask
+ // 2a) iff isBitFieldInvertedMask(mask) && isBitFieldInvertedMask(~mask2)
+ // && CountPopulation_32(mask) == CountPopulation_32(~mask2)
+ // 2b) iff isBitFieldInvertedMask(~mask) && isBitFieldInvertedMask(mask2)
+ // && CountPopulation_32(mask) == CountPopulation_32(~mask2)
+ // (i.e., copy a bitfield value into another bitfield of the same width)
+ if (N0.getOpcode() != ISD::AND)
+ return SDValue();
+
+ EVT VT = N->getValueType(0);
+ if (VT != MVT::i32)
+ return SDValue();
+
+
+ // The value and the mask need to be constants so we can verify this is
+ // actually a bitfield set. If the mask is 0xffff, we can do better
+ // via a movt instruction, so don't use BFI in that case.
+ ConstantSDNode *C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
+ if (!C)
+ return SDValue();
+ unsigned Mask = C->getZExtValue();
+ if (Mask == 0xffff)
+ return SDValue();
+ SDValue Res;
+ // Case (1): or (and A, mask), val => ARMbfi A, val, mask
+ if ((C = dyn_cast<ConstantSDNode>(N1))) {
+ unsigned Val = C->getZExtValue();
+ if (!ARM::isBitFieldInvertedMask(Mask) || (Val & ~Mask) != Val)
+ return SDValue();
+ Val >>= CountTrailingZeros_32(~Mask);
+
+ Res = DAG.getNode(ARMISD::BFI, DL, VT, N0.getOperand(0),
+ DAG.getConstant(Val, MVT::i32),
+ DAG.getConstant(Mask, MVT::i32));
+
+ // Do not add new nodes to DAG combiner worklist.
+ DCI.CombineTo(N, Res, false);
+ } else if (N1.getOpcode() == ISD::AND) {
+ // case (2) or (and A, mask), (and B, mask2) => ARMbfi A, (lsr B, amt), mask
+ C = dyn_cast<ConstantSDNode>(N1.getOperand(1));
+ if (!C)
+ return SDValue();
+ unsigned Mask2 = C->getZExtValue();
+
+ if (ARM::isBitFieldInvertedMask(Mask) &&
+ ARM::isBitFieldInvertedMask(~Mask2) &&
+ (CountPopulation_32(Mask) == CountPopulation_32(~Mask2))) {
+ // The pack halfword instruction works better for masks that fit it,
+ // so use that when it's available.
+ if (Subtarget->hasT2ExtractPack() &&
+ (Mask == 0xffff || Mask == 0xffff0000))
+ return SDValue();
+ // 2a
+ unsigned lsb = CountTrailingZeros_32(Mask2);
+ Res = DAG.getNode(ISD::SRL, DL, VT, N1.getOperand(0),
+ DAG.getConstant(lsb, MVT::i32));
+ Res = DAG.getNode(ARMISD::BFI, DL, VT, N0.getOperand(0), Res,
+ DAG.getConstant(Mask, MVT::i32));
+ // Do not add new nodes to DAG combiner worklist.
+ DCI.CombineTo(N, Res, false);
+ } else if (ARM::isBitFieldInvertedMask(~Mask) &&
+ ARM::isBitFieldInvertedMask(Mask2) &&
+ (CountPopulation_32(~Mask) == CountPopulation_32(Mask2))) {
+ // The pack halfword instruction works better for masks that fit it,
+ // so use that when it's available.
+ if (Subtarget->hasT2ExtractPack() &&
+ (Mask2 == 0xffff || Mask2 == 0xffff0000))
+ return SDValue();
+ // 2b
+ unsigned lsb = CountTrailingZeros_32(Mask);
+ Res = DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0),
+ DAG.getConstant(lsb, MVT::i32));
+ Res = DAG.getNode(ARMISD::BFI, DL, VT, N1.getOperand(0), Res,
+ DAG.getConstant(Mask2, MVT::i32));
+ // Do not add new nodes to DAG combiner worklist.
+ DCI.CombineTo(N, Res, false);
+ }
+ }
+
+ return SDValue();
+}
+
/// PerformVMOVRRDCombine - Target-specific dag combine xforms for
/// ARMISD::VMOVRRD.
static SDValue PerformVMOVRRDCombine(SDNode *N,
@@ -4561,7 +4818,7 @@ static SDValue PerformExtendCombine(SDNode *N, SelectionDAG &DAG,
static SDValue PerformSELECT_CCCombine(SDNode *N, SelectionDAG &DAG,
const ARMSubtarget *ST) {
// If the target supports NEON, try to use vmax/vmin instructions for f32
- // selects like "x < y ? x : y". Unless the FiniteOnlyFPMath option is set,
+ // selects like "x < y ? x : y". Unless the NoNaNsFPMath option is set,
// be careful about NaNs: NEON's vmax/vmin return NaN if either operand is
// a NaN; only do the transformation when it matches that behavior.
@@ -4648,6 +4905,7 @@ SDValue ARMTargetLowering::PerformDAGCombine(SDNode *N,
case ISD::ADD: return PerformADDCombine(N, DCI);
case ISD::SUB: return PerformSUBCombine(N, DCI);
case ISD::MUL: return PerformMULCombine(N, DCI, Subtarget);
+ case ISD::OR: return PerformORCombine(N, DCI, Subtarget);
case ARMISD::VMOVRRD: return PerformVMOVRRDCombine(N, DCI);
case ARMISD::VDUPLANE: return PerformVDUPLANECombine(N, DCI);
case ISD::INTRINSIC_WO_CHAIN: return PerformIntrinsicCombine(N, DCI.DAG);
@@ -5379,6 +5637,21 @@ int ARM::getVFPf64Imm(const APFloat &FPImm) {
return ((int)Sign << 7) | (Exp << 4) | Mantissa;
}
+bool ARM::isBitFieldInvertedMask(unsigned v) {
+ if (v == 0xffffffff)
+ return 0;
+ // there can be 1's on either or both "outsides", all the "inside"
+ // bits must be 0's
+ unsigned int lsb = 0, msb = 31;
+ while (v & (1 << msb)) --msb;
+ while (v & (1 << lsb)) ++lsb;
+ for (unsigned int i = lsb; i <= msb; ++i) {
+ if (v & (1 << i))
+ return 0;
+ }
+ return 1;
+}
+
/// isFPImmLegal - Returns true if the target can instruction select the
/// specified FP immediate natively. If false, the legalizer will
/// materialize the FP immediate as a load from a constant pool.
diff --git a/lib/Target/ARM/ARMISelLowering.h b/lib/Target/ARM/ARMISelLowering.h
index 128b72e1e743..ba9ea7f15e7b 100644
--- a/lib/Target/ARM/ARMISelLowering.h
+++ b/lib/Target/ARM/ARMISelLowering.h
@@ -17,6 +17,8 @@
#include "ARMSubtarget.h"
#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/CodeGen/FastISel.h"
#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/CodeGen/CallingConvLower.h"
#include <vector>
@@ -45,6 +47,8 @@ namespace llvm {
PIC_ADD, // Add with a PC operand and a PIC label.
+ AND, // ARM "and" instruction that sets the 's' flag in CPSR.
+
CMP, // ARM compare instructions.
CMPZ, // ARM compare that sets only Z flag.
CMPFP, // ARM VFP compare instruction, sets FPSCR.
@@ -80,7 +84,7 @@ namespace llvm {
MEMBARRIER, // Memory barrier
SYNCBARRIER, // Memory sync barrier
-
+
VCEQ, // Vector compare equal.
VCGE, // Vector compare greater than or equal.
VCGEU, // Vector compare unsigned greater than or equal.
@@ -141,6 +145,10 @@ namespace llvm {
VUZP, // unzip (deinterleave)
VTRN, // transpose
+ // Vector multiply long:
+ VMULLs, // ...signed
+ VMULLu, // ...unsigned
+
// Operands of the standard BUILD_VECTOR node are not legalized, which
// is fine if BUILD_VECTORs are always lowered to shuffles or other
// operations, but for ARM some BUILD_VECTORs are legal as-is and their
@@ -150,7 +158,10 @@ namespace llvm {
// Floating-point max and min:
FMAX,
- FMIN
+ FMIN,
+
+ // Bit-field insert
+ BFI
};
}
@@ -162,6 +173,7 @@ namespace llvm {
/// returns -1.
int getVFPf32Imm(const APFloat &FPImm);
int getVFPf64Imm(const APFloat &FPImm);
+ bool isBitFieldInvertedMask(unsigned v);
}
//===--------------------------------------------------------------------===//
@@ -171,6 +183,8 @@ namespace llvm {
public:
explicit ARMTargetLowering(TargetMachine &TM);
+ virtual unsigned getJumpTableEncoding(void) const;
+
virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const;
/// ReplaceNodeResults - Replace the results of node with an illegal result
@@ -255,8 +269,19 @@ namespace llvm {
/// getFunctionAlignment - Return the Log2 alignment of this function.
virtual unsigned getFunctionAlignment(const Function *F) const;
+ /// getMaximalGlobalOffset - Returns the maximal possible offset which can
+ /// be used for loads / stores from the global.
+ virtual unsigned getMaximalGlobalOffset() const;
+
+ /// createFastISel - This method returns a target specific FastISel object,
+ /// or null if the target does not support "fast" ISel.
+ virtual FastISel *createFastISel(FunctionLoweringInfo &funcInfo) const;
+
Sched::Preference getSchedulingPreference(SDNode *N) const;
+ unsigned getRegPressureLimit(const TargetRegisterClass *RC,
+ MachineFunction &MF) const;
+
bool isShuffleMaskLegal(const SmallVectorImpl<int> &M, EVT VT) const;
bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const;
@@ -265,11 +290,17 @@ namespace llvm {
/// materialize the FP immediate as a load from a constant pool.
virtual bool isFPImmLegal(const APFloat &Imm, EVT VT) const;
+ protected:
+ std::pair<const TargetRegisterClass*, uint8_t>
+ findRepresentativeClass(EVT VT) const;
+
private:
/// Subtarget - Keep a pointer to the ARMSubtarget around so that we can
/// make the right decision when generating code for different targets.
const ARMSubtarget *Subtarget;
+ const TargetRegisterInfo *RegInfo;
+
/// ARMPCLabelIndex - Keep track of the number of ARM PC labels created.
///
unsigned ARMPCLabelIndex;
@@ -310,14 +341,15 @@ namespace llvm {
SelectionDAG &DAG) const;
SDValue LowerGLOBAL_OFFSET_TABLE(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerBR_JT(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerBR_CC(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerShiftRightParts(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerShiftLeftParts(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerFLT_ROUNDS_(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerCallResult(SDValue Chain, SDValue InFlag,
CallingConv::ID CallConv, bool isVarArg,
@@ -377,6 +409,10 @@ namespace llvm {
unsigned BinOpcode) const;
};
+
+ namespace ARM {
+ FastISel *createFastISel(FunctionLoweringInfo &funcInfo);
+ }
}
#endif // ARMISELLOWERING_H
diff --git a/lib/Target/ARM/ARMInstrFormats.td b/lib/Target/ARM/ARMInstrFormats.td
index ac568e75ccc4..113cfffe61f9 100644
--- a/lib/Target/ARM/ARMInstrFormats.td
+++ b/lib/Target/ARM/ARMInstrFormats.td
@@ -36,37 +36,38 @@ def LdStMulFrm : Format<10>;
def LdStExFrm : Format<11>;
def ArithMiscFrm : Format<12>;
-def ExtFrm : Format<13>;
-
-def VFPUnaryFrm : Format<14>;
-def VFPBinaryFrm : Format<15>;
-def VFPConv1Frm : Format<16>;
-def VFPConv2Frm : Format<17>;
-def VFPConv3Frm : Format<18>;
-def VFPConv4Frm : Format<19>;
-def VFPConv5Frm : Format<20>;
-def VFPLdStFrm : Format<21>;
-def VFPLdStMulFrm : Format<22>;
-def VFPMiscFrm : Format<23>;
-
-def ThumbFrm : Format<24>;
-def MiscFrm : Format<25>;
-
-def NGetLnFrm : Format<26>;
-def NSetLnFrm : Format<27>;
-def NDupFrm : Format<28>;
-def NLdStFrm : Format<29>;
-def N1RegModImmFrm: Format<30>;
-def N2RegFrm : Format<31>;
-def NVCVTFrm : Format<32>;
-def NVDupLnFrm : Format<33>;
-def N2RegVShLFrm : Format<34>;
-def N2RegVShRFrm : Format<35>;
-def N3RegFrm : Format<36>;
-def N3RegVShFrm : Format<37>;
-def NVExtFrm : Format<38>;
-def NVMulSLFrm : Format<39>;
-def NVTBLFrm : Format<40>;
+def SatFrm : Format<13>;
+def ExtFrm : Format<14>;
+
+def VFPUnaryFrm : Format<15>;
+def VFPBinaryFrm : Format<16>;
+def VFPConv1Frm : Format<17>;
+def VFPConv2Frm : Format<18>;
+def VFPConv3Frm : Format<19>;
+def VFPConv4Frm : Format<20>;
+def VFPConv5Frm : Format<21>;
+def VFPLdStFrm : Format<22>;
+def VFPLdStMulFrm : Format<23>;
+def VFPMiscFrm : Format<24>;
+
+def ThumbFrm : Format<25>;
+def MiscFrm : Format<26>;
+
+def NGetLnFrm : Format<27>;
+def NSetLnFrm : Format<28>;
+def NDupFrm : Format<29>;
+def NLdStFrm : Format<30>;
+def N1RegModImmFrm: Format<31>;
+def N2RegFrm : Format<32>;
+def NVCVTFrm : Format<33>;
+def NVDupLnFrm : Format<34>;
+def N2RegVShLFrm : Format<35>;
+def N2RegVShRFrm : Format<36>;
+def N3RegFrm : Format<37>;
+def N3RegVShFrm : Format<38>;
+def NVExtFrm : Format<39>;
+def NVMulSLFrm : Format<40>;
+def NVTBLFrm : Format<41>;
// Misc flags.
@@ -87,21 +88,21 @@ class Xform16Bit { bit canXformTo16Bit = 1; }
class AddrMode<bits<4> val> {
bits<4> Value = val;
}
-def AddrModeNone : AddrMode<0>;
-def AddrMode1 : AddrMode<1>;
-def AddrMode2 : AddrMode<2>;
-def AddrMode3 : AddrMode<3>;
-def AddrMode4 : AddrMode<4>;
-def AddrMode5 : AddrMode<5>;
-def AddrMode6 : AddrMode<6>;
-def AddrModeT1_1 : AddrMode<7>;
-def AddrModeT1_2 : AddrMode<8>;
-def AddrModeT1_4 : AddrMode<9>;
-def AddrModeT1_s : AddrMode<10>;
-def AddrModeT2_i12: AddrMode<11>;
-def AddrModeT2_i8 : AddrMode<12>;
-def AddrModeT2_so : AddrMode<13>;
-def AddrModeT2_pc : AddrMode<14>;
+def AddrModeNone : AddrMode<0>;
+def AddrMode1 : AddrMode<1>;
+def AddrMode2 : AddrMode<2>;
+def AddrMode3 : AddrMode<3>;
+def AddrMode4 : AddrMode<4>;
+def AddrMode5 : AddrMode<5>;
+def AddrMode6 : AddrMode<6>;
+def AddrModeT1_1 : AddrMode<7>;
+def AddrModeT1_2 : AddrMode<8>;
+def AddrModeT1_4 : AddrMode<9>;
+def AddrModeT1_s : AddrMode<10>;
+def AddrModeT2_i12 : AddrMode<11>;
+def AddrModeT2_i8 : AddrMode<12>;
+def AddrModeT2_so : AddrMode<13>;
+def AddrModeT2_pc : AddrMode<14>;
def AddrModeT2_i8s4 : AddrMode<15>;
// Instruction size.
@@ -137,11 +138,17 @@ def VFPNeonDomain : Domain<3>; // Instructions in both VFP & Neon domains
// ARM special operands.
//
+def CondCodeOperand : AsmOperandClass {
+ let Name = "CondCode";
+ let SuperClasses = [];
+}
+
// ARM Predicate operand. Default to 14 = always (AL). Second part is CC
// register whose default is 0 (no register).
def pred : PredicateOperand<OtherVT, (ops i32imm, CCR),
(ops (i32 14), (i32 zero_reg))> {
let PrintMethod = "printPredicateOperand";
+ let ParserMatchClass = CondCodeOperand;
}
// Conditional code result for instructions whose 's' bit is set, e.g. subs.
@@ -240,6 +247,7 @@ class I<dag oops, dag iops, AddrMode am, SizeFlagVal sz,
let Pattern = pattern;
list<Predicate> Predicates = [IsARM];
}
+
// A few are not predicable
class InoP<dag oops, dag iops, AddrMode am, SizeFlagVal sz,
IndexMode im, Format f, InstrItinClass itin,
@@ -254,9 +262,9 @@ class InoP<dag oops, dag iops, AddrMode am, SizeFlagVal sz,
list<Predicate> Predicates = [IsARM];
}
-// Same as I except it can optionally modify CPSR. Note it's modeled as
-// an input operand since by default it's a zero register. It will
-// become an implicit def once it's "flipped".
+// Same as I except it can optionally modify CPSR. Note it's modeled as an input
+// operand since by default it's a zero register. It will become an implicit def
+// once it's "flipped".
class sI<dag oops, dag iops, AddrMode am, SizeFlagVal sz,
IndexMode im, Format f, InstrItinClass itin,
string opc, string asm, string cstr,
@@ -313,7 +321,7 @@ class ABXI<bits<4> opcod, dag oops, dag iops, InstrItinClass itin,
}
class ABXIx2<dag oops, dag iops, InstrItinClass itin,
string asm, list<dag> pattern>
- : XI<oops, iops, AddrModeNone, Size8Bytes, IndexModeNone, BrMiscFrm, itin,
+ : XI<oops, iops, AddrModeNone, Size8Bytes, IndexModeNone, Pseudo, itin,
asm, "", pattern>;
// BR_JT instructions
@@ -322,16 +330,14 @@ class JTI<dag oops, dag iops, InstrItinClass itin,
: XI<oops, iops, AddrModeNone, SizeSpecial, IndexModeNone, BrMiscFrm, itin,
asm, "", pattern>;
-
// Atomic load/store instructions
-
class AIldrex<bits<2> opcod, dag oops, dag iops, InstrItinClass itin,
string opc, string asm, list<dag> pattern>
: I<oops, iops, AddrModeNone, Size4Bytes, IndexModeNone, LdStExFrm, itin,
opc, asm, "", pattern> {
let Inst{27-23} = 0b00011;
let Inst{22-21} = opcod;
- let Inst{20} = 1;
+ let Inst{20} = 1;
let Inst{11-0} = 0b111110011111;
}
class AIstrex<bits<2> opcod, dag oops, dag iops, InstrItinClass itin,
@@ -340,7 +346,7 @@ class AIstrex<bits<2> opcod, dag oops, dag iops, InstrItinClass itin,
opc, asm, "", pattern> {
let Inst{27-23} = 0b00011;
let Inst{22-21} = opcod;
- let Inst{20} = 0;
+ let Inst{20} = 0;
let Inst{11-4} = 0b11111001;
}
@@ -350,21 +356,21 @@ class AI1<bits<4> opcod, dag oops, dag iops, Format f, InstrItinClass itin,
: I<oops, iops, AddrMode1, Size4Bytes, IndexModeNone, f, itin,
opc, asm, "", pattern> {
let Inst{24-21} = opcod;
- let Inst{27-26} = {0,0};
+ let Inst{27-26} = 0b00;
}
class AsI1<bits<4> opcod, dag oops, dag iops, Format f, InstrItinClass itin,
string opc, string asm, list<dag> pattern>
: sI<oops, iops, AddrMode1, Size4Bytes, IndexModeNone, f, itin,
opc, asm, "", pattern> {
let Inst{24-21} = opcod;
- let Inst{27-26} = {0,0};
+ let Inst{27-26} = 0b00;
}
class AXI1<bits<4> opcod, dag oops, dag iops, Format f, InstrItinClass itin,
string asm, list<dag> pattern>
: XI<oops, iops, AddrMode1, Size4Bytes, IndexModeNone, f, itin,
asm, "", pattern> {
let Inst{24-21} = opcod;
- let Inst{27-26} = {0,0};
+ let Inst{27-26} = 0b00;
}
class AI1x2<dag oops, dag iops, Format f, InstrItinClass itin,
string opc, string asm, list<dag> pattern>
@@ -377,7 +383,7 @@ class AI2<dag oops, dag iops, Format f, InstrItinClass itin,
string opc, string asm, list<dag> pattern>
: I<oops, iops, AddrMode2, Size4Bytes, IndexModeNone, f, itin,
opc, asm, "", pattern> {
- let Inst{27-26} = {0,1};
+ let Inst{27-26} = 0b01;
}
// loads
@@ -389,7 +395,7 @@ class AI2ldw<dag oops, dag iops, Format f, InstrItinClass itin,
let Inst{21} = 0; // W bit
let Inst{22} = 0; // B bit
let Inst{24} = 1; // P bit
- let Inst{27-26} = {0,1};
+ let Inst{27-26} = 0b01;
}
class AXI2ldw<dag oops, dag iops, Format f, InstrItinClass itin,
string asm, list<dag> pattern>
@@ -399,7 +405,7 @@ class AXI2ldw<dag oops, dag iops, Format f, InstrItinClass itin,
let Inst{21} = 0; // W bit
let Inst{22} = 0; // B bit
let Inst{24} = 1; // P bit
- let Inst{27-26} = {0,1};
+ let Inst{27-26} = 0b01;
}
class AI2ldb<dag oops, dag iops, Format f, InstrItinClass itin,
string opc, string asm, list<dag> pattern>
@@ -409,7 +415,7 @@ class AI2ldb<dag oops, dag iops, Format f, InstrItinClass itin,
let Inst{21} = 0; // W bit
let Inst{22} = 1; // B bit
let Inst{24} = 1; // P bit
- let Inst{27-26} = {0,1};
+ let Inst{27-26} = 0b01;
}
class AXI2ldb<dag oops, dag iops, Format f, InstrItinClass itin,
string asm, list<dag> pattern>
@@ -419,7 +425,7 @@ class AXI2ldb<dag oops, dag iops, Format f, InstrItinClass itin,
let Inst{21} = 0; // W bit
let Inst{22} = 1; // B bit
let Inst{24} = 1; // P bit
- let Inst{27-26} = {0,1};
+ let Inst{27-26} = 0b01;
}
// stores
@@ -431,7 +437,7 @@ class AI2stw<dag oops, dag iops, Format f, InstrItinClass itin,
let Inst{21} = 0; // W bit
let Inst{22} = 0; // B bit
let Inst{24} = 1; // P bit
- let Inst{27-26} = {0,1};
+ let Inst{27-26} = 0b01;
}
class AXI2stw<dag oops, dag iops, Format f, InstrItinClass itin,
string asm, list<dag> pattern>
@@ -441,7 +447,7 @@ class AXI2stw<dag oops, dag iops, Format f, InstrItinClass itin,
let Inst{21} = 0; // W bit
let Inst{22} = 0; // B bit
let Inst{24} = 1; // P bit
- let Inst{27-26} = {0,1};
+ let Inst{27-26} = 0b01;
}
class AI2stb<dag oops, dag iops, Format f, InstrItinClass itin,
string opc, string asm, list<dag> pattern>
@@ -451,7 +457,7 @@ class AI2stb<dag oops, dag iops, Format f, InstrItinClass itin,
let Inst{21} = 0; // W bit
let Inst{22} = 1; // B bit
let Inst{24} = 1; // P bit
- let Inst{27-26} = {0,1};
+ let Inst{27-26} = 0b01;
}
class AXI2stb<dag oops, dag iops, Format f, InstrItinClass itin,
string asm, list<dag> pattern>
@@ -461,7 +467,7 @@ class AXI2stb<dag oops, dag iops, Format f, InstrItinClass itin,
let Inst{21} = 0; // W bit
let Inst{22} = 1; // B bit
let Inst{24} = 1; // P bit
- let Inst{27-26} = {0,1};
+ let Inst{27-26} = 0b01;
}
// Pre-indexed loads
@@ -473,7 +479,7 @@ class AI2ldwpr<dag oops, dag iops, Format f, InstrItinClass itin,
let Inst{21} = 1; // W bit
let Inst{22} = 0; // B bit
let Inst{24} = 1; // P bit
- let Inst{27-26} = {0,1};
+ let Inst{27-26} = 0b01;
}
class AI2ldbpr<dag oops, dag iops, Format f, InstrItinClass itin,
string opc, string asm, string cstr, list<dag> pattern>
@@ -483,7 +489,7 @@ class AI2ldbpr<dag oops, dag iops, Format f, InstrItinClass itin,
let Inst{21} = 1; // W bit
let Inst{22} = 1; // B bit
let Inst{24} = 1; // P bit
- let Inst{27-26} = {0,1};
+ let Inst{27-26} = 0b01;
}
// Pre-indexed stores
@@ -495,7 +501,7 @@ class AI2stwpr<dag oops, dag iops, Format f, InstrItinClass itin,
let Inst{21} = 1; // W bit
let Inst{22} = 0; // B bit
let Inst{24} = 1; // P bit
- let Inst{27-26} = {0,1};
+ let Inst{27-26} = 0b01;
}
class AI2stbpr<dag oops, dag iops, Format f, InstrItinClass itin,
string opc, string asm, string cstr, list<dag> pattern>
@@ -505,7 +511,7 @@ class AI2stbpr<dag oops, dag iops, Format f, InstrItinClass itin,
let Inst{21} = 1; // W bit
let Inst{22} = 1; // B bit
let Inst{24} = 1; // P bit
- let Inst{27-26} = {0,1};
+ let Inst{27-26} = 0b01;
}
// Post-indexed loads
@@ -517,7 +523,7 @@ class AI2ldwpo<dag oops, dag iops, Format f, InstrItinClass itin,
let Inst{21} = 0; // W bit
let Inst{22} = 0; // B bit
let Inst{24} = 0; // P bit
- let Inst{27-26} = {0,1};
+ let Inst{27-26} = 0b01;
}
class AI2ldbpo<dag oops, dag iops, Format f, InstrItinClass itin,
string opc, string asm, string cstr, list<dag> pattern>
@@ -527,7 +533,7 @@ class AI2ldbpo<dag oops, dag iops, Format f, InstrItinClass itin,
let Inst{21} = 0; // W bit
let Inst{22} = 1; // B bit
let Inst{24} = 0; // P bit
- let Inst{27-26} = {0,1};
+ let Inst{27-26} = 0b01;
}
// Post-indexed stores
@@ -539,7 +545,7 @@ class AI2stwpo<dag oops, dag iops, Format f, InstrItinClass itin,
let Inst{21} = 0; // W bit
let Inst{22} = 0; // B bit
let Inst{24} = 0; // P bit
- let Inst{27-26} = {0,1};
+ let Inst{27-26} = 0b01;
}
class AI2stbpo<dag oops, dag iops, Format f, InstrItinClass itin,
string opc, string asm, string cstr, list<dag> pattern>
@@ -549,7 +555,7 @@ class AI2stbpo<dag oops, dag iops, Format f, InstrItinClass itin,
let Inst{21} = 0; // W bit
let Inst{22} = 1; // B bit
let Inst{24} = 0; // P bit
- let Inst{27-26} = {0,1};
+ let Inst{27-26} = 0b01;
}
// addrmode3 instructions
@@ -977,7 +983,7 @@ class TIx2<bits<5> opcod1, bits<2> opcod2, bit opcod3,
Encoding {
let Inst{31-27} = opcod1;
let Inst{15-14} = opcod2;
- let Inst{12} = opcod3;
+ let Inst{12} = opcod3;
}
// BR_JT instructions
@@ -1099,13 +1105,13 @@ class T1Special<bits<4> opcode> : Encoding16 {
// A6.2.4 Load/store single data item encoding.
class T1LoadStore<bits<4> opA, bits<3> opB> : Encoding16 {
let Inst{15-12} = opA;
- let Inst{11-9} = opB;
+ let Inst{11-9} = opB;
}
-class T1LdSt<bits<3> opB> : T1LoadStore<0b0101, opB>;
+class T1LdSt<bits<3> opB> : T1LoadStore<0b0101, opB>;
class T1LdSt4Imm<bits<3> opB> : T1LoadStore<0b0110, opB>; // Immediate, 4 bytes
class T1LdSt1Imm<bits<3> opB> : T1LoadStore<0b0111, opB>; // Immediate, 1 byte
class T1LdSt2Imm<bits<3> opB> : T1LoadStore<0b1000, opB>; // Immediate, 2 bytes
-class T1LdStSP<bits<3> opB> : T1LoadStore<0b1001, opB>; // SP relative
+class T1LdStSP<bits<3> opB> : T1LoadStore<0b1001, opB>; // SP relative
// A6.2.5 Miscellaneous 16-bit instructions encoding.
class T1Misc<bits<7> opcode> : Encoding16 {
@@ -1125,9 +1131,10 @@ class Thumb2I<dag oops, dag iops, AddrMode am, SizeFlagVal sz,
list<Predicate> Predicates = [IsThumb2];
}
-// Same as Thumb2I except it can optionally modify CPSR. Note it's modeled as
-// an input operand since by default it's a zero register. It will
-// become an implicit def once it's "flipped".
+// Same as Thumb2I except it can optionally modify CPSR. Note it's modeled as an
+// input operand since by default it's a zero register. It will become an
+// implicit def once it's "flipped".
+//
// FIXME: This uses unified syntax so {s} comes before {p}. We should make it
// more consistent.
class Thumb2sI<dag oops, dag iops, AddrMode am, SizeFlagVal sz,
@@ -1185,11 +1192,11 @@ class T2Ii8s4<bit P, bit W, bit load, dag oops, dag iops, InstrItinClass itin,
pattern> {
let Inst{31-27} = 0b11101;
let Inst{26-25} = 0b00;
- let Inst{24} = P;
- let Inst{23} = ?; // The U bit.
- let Inst{22} = 1;
- let Inst{21} = W;
- let Inst{20} = load;
+ let Inst{24} = P;
+ let Inst{23} = ?; // The U bit.
+ let Inst{22} = 1;
+ let Inst{21} = W;
+ let Inst{20} = load;
}
class T2sI<dag oops, dag iops, InstrItinClass itin,
@@ -1225,14 +1232,14 @@ class T2Iidxldst<bit signed, bits<2> opcod, bit load, bit pre,
list<Predicate> Predicates = [IsThumb2];
let Inst{31-27} = 0b11111;
let Inst{26-25} = 0b00;
- let Inst{24} = signed;
- let Inst{23} = 0;
+ let Inst{24} = signed;
+ let Inst{23} = 0;
let Inst{22-21} = opcod;
- let Inst{20} = load;
- let Inst{11} = 1;
+ let Inst{20} = load;
+ let Inst{11} = 1;
// (P, W) = (1, 1) Pre-indexed or (0, 1) Post-indexed
- let Inst{10} = pre; // The P bit.
- let Inst{8} = 1; // The W bit.
+ let Inst{10} = pre; // The P bit.
+ let Inst{8} = 1; // The W bit.
}
// Helper class for disassembly only
@@ -1243,9 +1250,9 @@ class T2I_mac<bit long, bits<3> op22_20, bits<4> op7_4, dag oops, dag iops,
: T2I<oops, iops, itin, opc, asm, pattern> {
let Inst{31-27} = 0b11111;
let Inst{26-24} = 0b011;
- let Inst{23} = long;
+ let Inst{23} = long;
let Inst{22-20} = op22_20;
- let Inst{7-4} = op7_4;
+ let Inst{7-4} = op7_4;
}
// Tv5Pat - Same as Pat<>, but requires V5T Thumb mode.
@@ -1325,9 +1332,9 @@ class ASI5<bits<4> opcod1, bits<2> opcod2, dag oops, dag iops,
}
// Load / store multiple
-class AXDI5<dag oops, dag iops, IndexMode im, InstrItinClass itin,
+class AXDI4<dag oops, dag iops, IndexMode im, InstrItinClass itin,
string asm, string cstr, list<dag> pattern>
- : VFPXI<oops, iops, AddrMode5, Size4Bytes, im,
+ : VFPXI<oops, iops, AddrMode4, Size4Bytes, im,
VFPLdStMulFrm, itin, asm, cstr, pattern> {
// TODO: Mark the instructions with the appropriate subtarget info.
let Inst{27-25} = 0b110;
@@ -1337,9 +1344,9 @@ class AXDI5<dag oops, dag iops, IndexMode im, InstrItinClass itin,
let D = VFPNeonDomain;
}
-class AXSI5<dag oops, dag iops, IndexMode im, InstrItinClass itin,
+class AXSI4<dag oops, dag iops, IndexMode im, InstrItinClass itin,
string asm, string cstr, list<dag> pattern>
- : VFPXI<oops, iops, AddrMode5, Size4Bytes, im,
+ : VFPXI<oops, iops, AddrMode4, Size4Bytes, im,
VFPLdStMulFrm, itin, asm, cstr, pattern> {
// TODO: Mark the instructions with the appropriate subtarget info.
let Inst{27-25} = 0b110;
@@ -1367,8 +1374,8 @@ class ADbI<bits<5> opcod1, bits<2> opcod2, bit op6, bit op4, dag oops,
let Inst{27-23} = opcod1;
let Inst{21-20} = opcod2;
let Inst{11-8} = 0b1011;
- let Inst{6} = op6;
- let Inst{4} = op4;
+ let Inst{6} = op6;
+ let Inst{4} = op4;
}
// Double precision, binary, VML[AS] (for additional predicate)
@@ -1379,12 +1386,11 @@ class ADbI_vmlX<bits<5> opcod1, bits<2> opcod2, bit op6, bit op4, dag oops,
let Inst{27-23} = opcod1;
let Inst{21-20} = opcod2;
let Inst{11-8} = 0b1011;
- let Inst{6} = op6;
- let Inst{4} = op4;
+ let Inst{6} = op6;
+ let Inst{4} = op4;
list<Predicate> Predicates = [HasVFP2, UseVMLx];
}
-
// Single precision, unary
class ASuI<bits<5> opcod1, bits<2> opcod2, bits<4> opcod3, bits<2> opcod4,
bit opcod5, dag oops, dag iops, InstrItinClass itin, string opc,
@@ -1415,8 +1421,8 @@ class ASbI<bits<5> opcod1, bits<2> opcod2, bit op6, bit op4, dag oops, dag iops,
let Inst{27-23} = opcod1;
let Inst{21-20} = opcod2;
let Inst{11-8} = 0b1010;
- let Inst{6} = op6;
- let Inst{4} = op4;
+ let Inst{6} = op6;
+ let Inst{4} = op4;
}
// Single precision binary, if no NEON
@@ -1521,10 +1527,18 @@ class NLdSt<bit op23, bits<2> op21_20, bits<4> op11_8, bits<4> op7_4,
: NeonI<oops, iops, AddrMode6, IndexModeNone, NLdStFrm, itin, opc, dt, asm,
cstr, pattern> {
let Inst{31-24} = 0b11110100;
- let Inst{23} = op23;
+ let Inst{23} = op23;
let Inst{21-20} = op21_20;
- let Inst{11-8} = op11_8;
- let Inst{7-4} = op7_4;
+ let Inst{11-8} = op11_8;
+ let Inst{7-4} = op7_4;
+}
+
+class PseudoNLdSt<dag oops, dag iops, InstrItinClass itin, string cstr>
+ : InstARM<AddrMode6, Size4Bytes, IndexModeNone, Pseudo, NeonDomain, cstr,
+ itin> {
+ let OutOperandList = oops;
+ let InOperandList = !con(iops, (ins pred:$p));
+ list<Predicate> Predicates = [HasNEON];
}
class NDataI<dag oops, dag iops, Format f, InstrItinClass itin,
@@ -1548,13 +1562,13 @@ class N1ModImm<bit op23, bits<3> op21_19, bits<4> op11_8, bit op7, bit op6,
string opc, string dt, string asm, string cstr,
list<dag> pattern>
: NDataI<oops, iops, N1RegModImmFrm, itin, opc, dt, asm, cstr, pattern> {
- let Inst{23} = op23;
+ let Inst{23} = op23;
let Inst{21-19} = op21_19;
- let Inst{11-8} = op11_8;
- let Inst{7} = op7;
- let Inst{6} = op6;
- let Inst{5} = op5;
- let Inst{4} = op4;
+ let Inst{11-8} = op11_8;
+ let Inst{7} = op7;
+ let Inst{6} = op6;
+ let Inst{5} = op5;
+ let Inst{4} = op4;
}
// NEON 2 vector register format.
@@ -1567,9 +1581,9 @@ class N2V<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, bits<2> op17_16,
let Inst{21-20} = op21_20;
let Inst{19-18} = op19_18;
let Inst{17-16} = op17_16;
- let Inst{11-7} = op11_7;
- let Inst{6} = op6;
- let Inst{4} = op4;
+ let Inst{11-7} = op11_7;
+ let Inst{6} = op6;
+ let Inst{4} = op4;
}
// Same as N2V except it doesn't have a datatype suffix.
@@ -1582,9 +1596,9 @@ class N2VX<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, bits<2> op17_16,
let Inst{21-20} = op21_20;
let Inst{19-18} = op19_18;
let Inst{17-16} = op17_16;
- let Inst{11-7} = op11_7;
- let Inst{6} = op6;
- let Inst{4} = op4;
+ let Inst{11-7} = op11_7;
+ let Inst{6} = op6;
+ let Inst{4} = op4;
}
// NEON 2 vector register with immediate.
@@ -1592,12 +1606,12 @@ class N2VImm<bit op24, bit op23, bits<4> op11_8, bit op7, bit op6, bit op4,
dag oops, dag iops, Format f, InstrItinClass itin,
string opc, string dt, string asm, string cstr, list<dag> pattern>
: NDataI<oops, iops, f, itin, opc, dt, asm, cstr, pattern> {
- let Inst{24} = op24;
- let Inst{23} = op23;
+ let Inst{24} = op24;
+ let Inst{23} = op23;
let Inst{11-8} = op11_8;
- let Inst{7} = op7;
- let Inst{6} = op6;
- let Inst{4} = op4;
+ let Inst{7} = op7;
+ let Inst{6} = op6;
+ let Inst{4} = op4;
}
// NEON 3 vector register format.
@@ -1605,12 +1619,12 @@ class N3V<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op6, bit op4,
dag oops, dag iops, Format f, InstrItinClass itin,
string opc, string dt, string asm, string cstr, list<dag> pattern>
: NDataI<oops, iops, f, itin, opc, dt, asm, cstr, pattern> {
- let Inst{24} = op24;
- let Inst{23} = op23;
+ let Inst{24} = op24;
+ let Inst{23} = op23;
let Inst{21-20} = op21_20;
- let Inst{11-8} = op11_8;
- let Inst{6} = op6;
- let Inst{4} = op4;
+ let Inst{11-8} = op11_8;
+ let Inst{6} = op6;
+ let Inst{4} = op4;
}
// Same as N3V except it doesn't have a data type suffix.
@@ -1619,12 +1633,12 @@ class N3VX<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op6,
dag oops, dag iops, Format f, InstrItinClass itin,
string opc, string asm, string cstr, list<dag> pattern>
: NDataXI<oops, iops, f, itin, opc, asm, cstr, pattern> {
- let Inst{24} = op24;
- let Inst{23} = op23;
+ let Inst{24} = op24;
+ let Inst{23} = op23;
let Inst{21-20} = op21_20;
- let Inst{11-8} = op11_8;
- let Inst{6} = op6;
- let Inst{4} = op4;
+ let Inst{11-8} = op11_8;
+ let Inst{6} = op6;
+ let Inst{4} = op4;
}
// NEON VMOVs between scalar and core registers.
@@ -1634,9 +1648,9 @@ class NVLaneOp<bits<8> opcod1, bits<4> opcod2, bits<2> opcod3,
: InstARM<AddrModeNone, Size4Bytes, IndexModeNone, f, GenericDomain,
"", itin> {
let Inst{27-20} = opcod1;
- let Inst{11-8} = opcod2;
- let Inst{6-5} = opcod3;
- let Inst{4} = 1;
+ let Inst{11-8} = opcod2;
+ let Inst{6-5} = opcod3;
+ let Inst{4} = 1;
let OutOperandList = oops;
let InOperandList = !con(iops, (ins pred:$p));
@@ -1670,9 +1684,9 @@ class NVDupLane<bits<4> op19_16, bit op6, dag oops, dag iops,
let Inst{24-23} = 0b11;
let Inst{21-20} = 0b11;
let Inst{19-16} = op19_16;
- let Inst{11-7} = 0b11000;
- let Inst{6} = op6;
- let Inst{4} = 0;
+ let Inst{11-7} = 0b11000;
+ let Inst{6} = op6;
+ let Inst{4} = 0;
}
// NEONFPPat - Same as Pat<>, but requires that the compiler be using NEON
diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td
index 51fc1522485f..e66f9b9ad0ac 100644
--- a/lib/Target/ARM/ARMInstrInfo.td
+++ b/lib/Target/ARM/ARMInstrInfo.td
@@ -44,6 +44,10 @@ def SDT_ARMBCC_i64 : SDTypeProfile<0, 6,
SDTCisVT<3, i32>, SDTCisVT<4, i32>,
SDTCisVT<5, OtherVT>]>;
+def SDT_ARMAnd : SDTypeProfile<1, 2,
+ [SDTCisVT<0, i32>, SDTCisVT<1, i32>,
+ SDTCisVT<2, i32>]>;
+
def SDT_ARMCmp : SDTypeProfile<0, 2, [SDTCisSameAs<0, 1>]>;
def SDT_ARMPICAdd : SDTypeProfile<1, 2, [SDTCisSameAs<0, 1>,
@@ -54,13 +58,16 @@ def SDT_ARMEH_SJLJ_Setjmp : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisPtrTy<1>,
SDTCisInt<2>]>;
def SDT_ARMEH_SJLJ_Longjmp: SDTypeProfile<0, 2, [SDTCisPtrTy<0>, SDTCisInt<1>]>;
-def SDT_ARMMEMBARRIERV7 : SDTypeProfile<0, 0, []>;
-def SDT_ARMSYNCBARRIERV7 : SDTypeProfile<0, 0, []>;
-def SDT_ARMMEMBARRIERV6 : SDTypeProfile<0, 1, [SDTCisInt<0>]>;
-def SDT_ARMSYNCBARRIERV6 : SDTypeProfile<0, 1, [SDTCisInt<0>]>;
+def SDT_ARMMEMBARRIER : SDTypeProfile<0, 0, []>;
+def SDT_ARMSYNCBARRIER : SDTypeProfile<0, 0, []>;
+def SDT_ARMMEMBARRIERMCR : SDTypeProfile<0, 1, [SDTCisInt<0>]>;
+def SDT_ARMSYNCBARRIERMCR : SDTypeProfile<0, 1, [SDTCisInt<0>]>;
def SDT_ARMTCRET : SDTypeProfile<0, 1, [SDTCisPtrTy<0>]>;
+def SDT_ARMBFI : SDTypeProfile<1, 3, [SDTCisVT<0, i32>, SDTCisVT<1, i32>,
+ SDTCisVT<2, i32>, SDTCisVT<3, i32>]>;
+
// Node definitions.
def ARMWrapper : SDNode<"ARMISD::Wrapper", SDTIntUnaryOp>;
def ARMWrapperJT : SDNode<"ARMISD::WrapperJT", SDTIntBinOp>;
@@ -99,11 +106,14 @@ def ARMbr2jt : SDNode<"ARMISD::BR2_JT", SDT_ARMBr2JT,
def ARMBcci64 : SDNode<"ARMISD::BCC_i64", SDT_ARMBCC_i64,
[SDNPHasChain]>;
+def ARMand : SDNode<"ARMISD::AND", SDT_ARMAnd,
+ [SDNPOutFlag]>;
+
def ARMcmp : SDNode<"ARMISD::CMP", SDT_ARMCmp,
[SDNPOutFlag]>;
def ARMcmpZ : SDNode<"ARMISD::CMPZ", SDT_ARMCmp,
- [SDNPOutFlag,SDNPCommutative]>;
+ [SDNPOutFlag, SDNPCommutative]>;
def ARMpic_add : SDNode<"ARMISD::PIC_ADD", SDT_ARMPICAdd>;
@@ -117,51 +127,54 @@ def ARMeh_sjlj_setjmp: SDNode<"ARMISD::EH_SJLJ_SETJMP",
def ARMeh_sjlj_longjmp: SDNode<"ARMISD::EH_SJLJ_LONGJMP",
SDT_ARMEH_SJLJ_Longjmp, [SDNPHasChain]>;
-def ARMMemBarrierV7 : SDNode<"ARMISD::MEMBARRIER", SDT_ARMMEMBARRIERV7,
- [SDNPHasChain]>;
-def ARMSyncBarrierV7 : SDNode<"ARMISD::SYNCBARRIER", SDT_ARMMEMBARRIERV7,
- [SDNPHasChain]>;
-def ARMMemBarrierV6 : SDNode<"ARMISD::MEMBARRIER", SDT_ARMMEMBARRIERV6,
- [SDNPHasChain]>;
-def ARMSyncBarrierV6 : SDNode<"ARMISD::SYNCBARRIER", SDT_ARMMEMBARRIERV6,
- [SDNPHasChain]>;
+def ARMMemBarrier : SDNode<"ARMISD::MEMBARRIER", SDT_ARMMEMBARRIER,
+ [SDNPHasChain]>;
+def ARMSyncBarrier : SDNode<"ARMISD::SYNCBARRIER", SDT_ARMMEMBARRIER,
+ [SDNPHasChain]>;
+def ARMMemBarrierMCR : SDNode<"ARMISD::MEMBARRIER", SDT_ARMMEMBARRIERMCR,
+ [SDNPHasChain]>;
+def ARMSyncBarrierMCR : SDNode<"ARMISD::SYNCBARRIER", SDT_ARMMEMBARRIERMCR,
+ [SDNPHasChain]>;
def ARMrbit : SDNode<"ARMISD::RBIT", SDTIntUnaryOp>;
def ARMtcret : SDNode<"ARMISD::TC_RETURN", SDT_ARMTCRET,
[SDNPHasChain, SDNPOptInFlag, SDNPVariadic]>;
+
+def ARMbfi : SDNode<"ARMISD::BFI", SDT_ARMBFI>;
+
//===----------------------------------------------------------------------===//
// ARM Instruction Predicate Definitions.
//
-def HasV4T : Predicate<"Subtarget->hasV4TOps()">;
-def NoV4T : Predicate<"!Subtarget->hasV4TOps()">;
-def HasV5T : Predicate<"Subtarget->hasV5TOps()">;
-def HasV5TE : Predicate<"Subtarget->hasV5TEOps()">;
-def HasV6 : Predicate<"Subtarget->hasV6Ops()">;
-def HasV6T2 : Predicate<"Subtarget->hasV6T2Ops()">;
-def NoV6T2 : Predicate<"!Subtarget->hasV6T2Ops()">;
-def HasV7 : Predicate<"Subtarget->hasV7Ops()">;
-def NoVFP : Predicate<"!Subtarget->hasVFP2()">;
-def HasVFP2 : Predicate<"Subtarget->hasVFP2()">;
-def HasVFP3 : Predicate<"Subtarget->hasVFP3()">;
-def HasNEON : Predicate<"Subtarget->hasNEON()">;
-def HasDivide : Predicate<"Subtarget->hasDivide()">;
+def HasV4T : Predicate<"Subtarget->hasV4TOps()">;
+def NoV4T : Predicate<"!Subtarget->hasV4TOps()">;
+def HasV5T : Predicate<"Subtarget->hasV5TOps()">;
+def HasV5TE : Predicate<"Subtarget->hasV5TEOps()">;
+def HasV6 : Predicate<"Subtarget->hasV6Ops()">;
+def HasV6T2 : Predicate<"Subtarget->hasV6T2Ops()">;
+def NoV6T2 : Predicate<"!Subtarget->hasV6T2Ops()">;
+def HasV7 : Predicate<"Subtarget->hasV7Ops()">;
+def NoVFP : Predicate<"!Subtarget->hasVFP2()">;
+def HasVFP2 : Predicate<"Subtarget->hasVFP2()">;
+def HasVFP3 : Predicate<"Subtarget->hasVFP3()">;
+def HasNEON : Predicate<"Subtarget->hasNEON()">;
+def HasDivide : Predicate<"Subtarget->hasDivide()">;
def HasT2ExtractPack : Predicate<"Subtarget->hasT2ExtractPack()">;
-def UseNEONForFP : Predicate<"Subtarget->useNEONForSinglePrecisionFP()">;
+def HasDB : Predicate<"Subtarget->hasDataBarrier()">;
+def UseNEONForFP : Predicate<"Subtarget->useNEONForSinglePrecisionFP()">;
def DontUseNEONForFP : Predicate<"!Subtarget->useNEONForSinglePrecisionFP()">;
-def IsThumb : Predicate<"Subtarget->isThumb()">;
-def IsThumb1Only : Predicate<"Subtarget->isThumb1Only()">;
-def IsThumb2 : Predicate<"Subtarget->isThumb2()">;
-def IsARM : Predicate<"!Subtarget->isThumb()">;
-def IsDarwin : Predicate<"Subtarget->isTargetDarwin()">;
-def IsNotDarwin : Predicate<"!Subtarget->isTargetDarwin()">;
+def IsThumb : Predicate<"Subtarget->isThumb()">;
+def IsThumb1Only : Predicate<"Subtarget->isThumb1Only()">;
+def IsThumb2 : Predicate<"Subtarget->isThumb2()">;
+def IsARM : Predicate<"!Subtarget->isThumb()">;
+def IsDarwin : Predicate<"Subtarget->isTargetDarwin()">;
+def IsNotDarwin : Predicate<"!Subtarget->isTargetDarwin()">;
// FIXME: Eventually this will be just "hasV6T2Ops".
-def UseMovt : Predicate<"Subtarget->useMovt()">;
-def DontUseMovt : Predicate<"!Subtarget->useMovt()">;
-
-def UseVMLx : Predicate<"Subtarget->useVMLx()">;
+def UseMovt : Predicate<"Subtarget->useMovt()">;
+def DontUseMovt : Predicate<"!Subtarget->useMovt()">;
+def UseVMLx : Predicate<"Subtarget->useVMLx()">;
//===----------------------------------------------------------------------===//
// ARM Flag Definitions.
@@ -221,29 +234,12 @@ def sext_16_node : PatLeaf<(i32 GPR:$a), [{
/// e.g., 0xf000ffff
def bf_inv_mask_imm : Operand<i32>,
PatLeaf<(imm), [{
- uint32_t v = (uint32_t)N->getZExtValue();
- if (v == 0xffffffff)
- return 0;
- // there can be 1's on either or both "outsides", all the "inside"
- // bits must be 0's
- unsigned int lsb = 0, msb = 31;
- while (v & (1 << msb)) --msb;
- while (v & (1 << lsb)) ++lsb;
- for (unsigned int i = lsb; i <= msb; ++i) {
- if (v & (1 << i))
- return 0;
- }
- return 1;
+ return ARM::isBitFieldInvertedMask(N->getZExtValue());
}] > {
let PrintMethod = "printBitfieldInvMaskImmOperand";
}
/// Split a 32-bit immediate into two 16 bit parts.
-def lo16 : SDNodeXForm<imm, [{
- return CurDAG->getTargetConstant((uint32_t)N->getZExtValue() & 0xffff,
- MVT::i32);
-}]>;
-
def hi16 : SDNodeXForm<imm, [{
return CurDAG->getTargetConstant((uint32_t)N->getZExtValue() >> 16, MVT::i32);
}]>;
@@ -306,6 +302,13 @@ def pclabel : Operand<i32> {
let PrintMethod = "printPCLabel";
}
+// shift_imm: An integer that encodes a shift amount and the type of shift
+// (currently either asr or lsl) using the same encoding used for the
+// immediates in so_reg operands.
+def shift_imm : Operand<i32> {
+ let PrintMethod = "printShiftImmOperand";
+}
+
// shifter_operand operands: so_reg and so_imm.
def so_reg : Operand<i32>, // reg reg imm
ComplexPattern<i32, 3, "SelectShifterOperandReg",
@@ -319,10 +322,7 @@ def so_reg : Operand<i32>, // reg reg imm
// represented in the imm field in the same 12-bit form that they are encoded
// into so_imm instructions: the 8-bit immediate is the least significant bits
// [bits 0-7], the 4-bit shift amount is the next 4 bits [bits 8-11].
-def so_imm : Operand<i32>,
- PatLeaf<(imm), [{
- return ARM_AM::getSOImmVal(N->getZExtValue()) != -1;
- }]> {
+def so_imm : Operand<i32>, PatLeaf<(imm), [{ return Pred_so_imm(N); }]> {
let PrintMethod = "printSOImmOperand";
}
@@ -452,11 +452,15 @@ include "ARMInstrFormats.td"
/// binop that produces a value.
multiclass AsI1_bin_irs<bits<4> opcod, string opc, PatFrag opnode,
bit Commutable = 0> {
+ // The register-immediate version is re-materializable. This is useful
+ // in particular for taking the address of a local.
+ let isReMaterializable = 1 in {
def ri : AsI1<opcod, (outs GPR:$dst), (ins GPR:$a, so_imm:$b), DPFrm,
IIC_iALUi, opc, "\t$dst, $a, $b",
[(set GPR:$dst, (opnode GPR:$a, so_imm:$b))]> {
let Inst{25} = 1;
}
+ }
def rr : AsI1<opcod, (outs GPR:$dst), (ins GPR:$a, GPR:$b), DPFrm,
IIC_iALUr, opc, "\t$dst, $a, $b",
[(set GPR:$dst, (opnode GPR:$a, GPR:$b))]> {
@@ -502,7 +506,7 @@ multiclass AI1_bin_s_irs<bits<4> opcod, string opc, PatFrag opnode,
/// AI1_cmp_irs - Defines a set of (op r, {so_imm|r|so_reg}) cmp / test
/// patterns. Similar to AsI1_bin_irs except the instruction does not produce
/// a explicit result, only implicitly set CPSR.
-let Defs = [CPSR] in {
+let isCompare = 1, Defs = [CPSR] in {
multiclass AI1_cmp_irs<bits<4> opcod, string opc, PatFrag opnode,
bit Commutable = 0> {
def ri : AI1<opcod, (outs), (ins GPR:$a, so_imm:$b), DPFrm, IIC_iCMPi,
@@ -1117,7 +1121,7 @@ let isBranch = 1, isTerminator = 1 in {
let isNotDuplicable = 1, isIndirectBranch = 1 in {
def BR_JTr : JTI<(outs), (ins GPR:$target, jtblock_operand:$jt, i32imm:$id),
- IIC_Br, "mov\tpc, $target \n$jt",
+ IIC_Br, "mov\tpc, $target$jt",
[(ARMbrjt GPR:$target, tjumptable:$jt, imm:$id)]> {
let Inst{11-4} = 0b00000000;
let Inst{15-12} = 0b1111;
@@ -1127,7 +1131,7 @@ let isBranch = 1, isTerminator = 1 in {
}
def BR_JTm : JTI<(outs),
(ins addrmode2:$target, jtblock_operand:$jt, i32imm:$id),
- IIC_Br, "ldr\tpc, $target \n$jt",
+ IIC_Br, "ldr\tpc, $target$jt",
[(ARMbrjt (i32 (load addrmode2:$target)), tjumptable:$jt,
imm:$id)]> {
let Inst{15-12} = 0b1111;
@@ -1139,7 +1143,7 @@ let isBranch = 1, isTerminator = 1 in {
}
def BR_JTadd : JTI<(outs),
(ins GPR:$target, GPR:$idx, jtblock_operand:$jt, i32imm:$id),
- IIC_Br, "add\tpc, $target, $idx \n$jt",
+ IIC_Br, "add\tpc, $target, $idx$jt",
[(ARMbrjt (add GPR:$target, GPR:$idx), tjumptable:$jt,
imm:$id)]> {
let Inst{15-12} = 0b1111;
@@ -1573,8 +1577,12 @@ defm UXTH : AI_unary_rrot<0b01101111,
defm UXTB16 : AI_unary_rrot<0b01101100,
"uxtb16", UnOpFrag<(and node:$Src, 0x00FF00FF)>>;
-def : ARMV6Pat<(and (shl GPR:$Src, (i32 8)), 0xFF00FF),
- (UXTB16r_rot GPR:$Src, 24)>;
+// FIXME: This pattern incorrectly assumes the shl operator is a rotate.
+// The transformation should probably be done as a combiner action
+// instead so we can include a check for masking back in the upper
+// eight bits of the source into the lower eight bits of the result.
+//def : ARMV6Pat<(and (shl GPR:$Src, (i32 8)), 0xFF00FF),
+// (UXTB16r_rot GPR:$Src, 24)>;
def : ARMV6Pat<(and (srl GPR:$Src, (i32 8)), 0xFF00FF),
(UXTB16r_rot GPR:$Src, 8)>;
@@ -1631,16 +1639,24 @@ defm ADCS : AI1_adde_sube_s_irs<0b0101, "adcs",
defm SBCS : AI1_adde_sube_s_irs<0b0110, "sbcs",
BinOpFrag<(sube_live_carry node:$LHS, node:$RHS) >>;
-// These don't define reg/reg forms, because they are handled above.
def RSBri : AsI1<0b0011, (outs GPR:$dst), (ins GPR:$a, so_imm:$b), DPFrm,
- IIC_iALUi, "rsb", "\t$dst, $a, $b",
- [(set GPR:$dst, (sub so_imm:$b, GPR:$a))]> {
+ IIC_iALUi, "rsb", "\t$dst, $a, $b",
+ [(set GPR:$dst, (sub so_imm:$b, GPR:$a))]> {
let Inst{25} = 1;
}
+// The reg/reg form is only defined for the disassembler; for codegen it is
+// equivalent to SUBrr.
+def RSBrr : AsI1<0b0011, (outs GPR:$dst), (ins GPR:$a, GPR:$b), DPFrm,
+ IIC_iALUr, "rsb", "\t$dst, $a, $b",
+ [/* For disassembly only; pattern left blank */]> {
+ let Inst{25} = 0;
+ let Inst{11-4} = 0b00000000;
+}
+
def RSBrs : AsI1<0b0011, (outs GPR:$dst), (ins GPR:$a, so_reg:$b), DPSoRegFrm,
- IIC_iALUsr, "rsb", "\t$dst, $a, $b",
- [(set GPR:$dst, (sub so_reg:$b, GPR:$a))]> {
+ IIC_iALUsr, "rsb", "\t$dst, $a, $b",
+ [(set GPR:$dst, (sub so_reg:$b, GPR:$a))]> {
let Inst{25} = 0;
}
@@ -1667,6 +1683,14 @@ def RSCri : AsI1<0b0111, (outs GPR:$dst), (ins GPR:$a, so_imm:$b),
Requires<[IsARM]> {
let Inst{25} = 1;
}
+// The reg/reg form is only defined for the disassembler; for codegen it is
+// equivalent to SUBrr.
+def RSCrr : AsI1<0b0111, (outs GPR:$dst), (ins GPR:$a, GPR:$b),
+ DPFrm, IIC_iALUr, "rsc", "\t$dst, $a, $b",
+ [/* For disassembly only; pattern left blank */]> {
+ let Inst{25} = 0;
+ let Inst{11-4} = 0b00000000;
+}
def RSCrs : AsI1<0b0111, (outs GPR:$dst), (ins GPR:$a, so_reg:$b),
DPSoRegFrm, IIC_iALUsr, "rsc", "\t$dst, $a, $b",
[(set GPR:$dst, (sube_dead_carry so_reg:$b, GPR:$a))]>,
@@ -1716,24 +1740,26 @@ def : ARMPat<(adde GPR:$src, so_imm_not:$imm),
// ARM Arithmetic Instruction -- for disassembly only
// GPR:$dst = GPR:$a op GPR:$b
-class AAI<bits<8> op27_20, bits<4> op7_4, string opc>
+class AAI<bits<8> op27_20, bits<4> op7_4, string opc,
+ list<dag> pattern = [/* For disassembly only; pattern left blank */]>
: AI<(outs GPR:$dst), (ins GPR:$a, GPR:$b), DPFrm, IIC_iALUr,
- opc, "\t$dst, $a, $b",
- [/* For disassembly only; pattern left blank */]> {
+ opc, "\t$dst, $a, $b", pattern> {
let Inst{27-20} = op27_20;
let Inst{7-4} = op7_4;
}
// Saturating add/subtract -- for disassembly only
-def QADD : AAI<0b00010000, 0b0101, "qadd">;
+def QADD : AAI<0b00010000, 0b0101, "qadd",
+ [(set GPR:$dst, (int_arm_qadd GPR:$a, GPR:$b))]>;
def QADD16 : AAI<0b01100010, 0b0001, "qadd16">;
def QADD8 : AAI<0b01100010, 0b1001, "qadd8">;
def QASX : AAI<0b01100010, 0b0011, "qasx">;
def QDADD : AAI<0b00010100, 0b0101, "qdadd">;
def QDSUB : AAI<0b00010110, 0b0101, "qdsub">;
def QSAX : AAI<0b01100010, 0b0101, "qsax">;
-def QSUB : AAI<0b00010010, 0b0101, "qsub">;
+def QSUB : AAI<0b00010010, 0b0101, "qsub",
+ [(set GPR:$dst, (int_arm_qsub GPR:$a, GPR:$b))]>;
def QSUB16 : AAI<0b01100010, 0b0111, "qsub16">;
def QSUB8 : AAI<0b01100010, 0b1111, "qsub8">;
def UQADD16 : AAI<0b01100110, 0b0001, "uqadd16">;
@@ -1793,54 +1819,45 @@ def USADA8 : AI<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc),
// Signed/Unsigned saturate -- for disassembly only
-def SSATlsl : AI<(outs GPR:$dst), (ins i32imm:$bit_pos, GPR:$a, i32imm:$shamt),
- DPFrm, NoItinerary, "ssat", "\t$dst, $bit_pos, $a, lsl $shamt",
- [/* For disassembly only; pattern left blank */]> {
- let Inst{27-21} = 0b0110101;
- let Inst{6-4} = 0b001;
-}
-
-def SSATasr : AI<(outs GPR:$dst), (ins i32imm:$bit_pos, GPR:$a, i32imm:$shamt),
- DPFrm, NoItinerary, "ssat", "\t$dst, $bit_pos, $a, asr $shamt",
- [/* For disassembly only; pattern left blank */]> {
+def SSAT : AI<(outs GPR:$dst), (ins i32imm:$bit_pos, GPR:$a, shift_imm:$sh),
+ SatFrm, NoItinerary, "ssat", "\t$dst, $bit_pos, $a$sh",
+ [/* For disassembly only; pattern left blank */]> {
let Inst{27-21} = 0b0110101;
- let Inst{6-4} = 0b101;
+ let Inst{5-4} = 0b01;
}
-def SSAT16 : AI<(outs GPR:$dst), (ins i32imm:$bit_pos, GPR:$a), DPFrm,
+def SSAT16 : AI<(outs GPR:$dst), (ins i32imm:$bit_pos, GPR:$a), SatFrm,
NoItinerary, "ssat16", "\t$dst, $bit_pos, $a",
[/* For disassembly only; pattern left blank */]> {
let Inst{27-20} = 0b01101010;
let Inst{7-4} = 0b0011;
}
-def USATlsl : AI<(outs GPR:$dst), (ins i32imm:$bit_pos, GPR:$a, i32imm:$shamt),
- DPFrm, NoItinerary, "usat", "\t$dst, $bit_pos, $a, lsl $shamt",
- [/* For disassembly only; pattern left blank */]> {
- let Inst{27-21} = 0b0110111;
- let Inst{6-4} = 0b001;
-}
-
-def USATasr : AI<(outs GPR:$dst), (ins i32imm:$bit_pos, GPR:$a, i32imm:$shamt),
- DPFrm, NoItinerary, "usat", "\t$dst, $bit_pos, $a, asr $shamt",
- [/* For disassembly only; pattern left blank */]> {
+def USAT : AI<(outs GPR:$dst), (ins i32imm:$bit_pos, GPR:$a, shift_imm:$sh),
+ SatFrm, NoItinerary, "usat", "\t$dst, $bit_pos, $a$sh",
+ [/* For disassembly only; pattern left blank */]> {
let Inst{27-21} = 0b0110111;
- let Inst{6-4} = 0b101;
+ let Inst{5-4} = 0b01;
}
-def USAT16 : AI<(outs GPR:$dst), (ins i32imm:$bit_pos, GPR:$a), DPFrm,
+def USAT16 : AI<(outs GPR:$dst), (ins i32imm:$bit_pos, GPR:$a), SatFrm,
NoItinerary, "usat16", "\t$dst, $bit_pos, $a",
[/* For disassembly only; pattern left blank */]> {
let Inst{27-20} = 0b01101110;
let Inst{7-4} = 0b0011;
}
+def : ARMV6Pat<(int_arm_ssat GPR:$a, imm:$pos), (SSAT imm:$pos, GPR:$a, 0)>;
+def : ARMV6Pat<(int_arm_usat GPR:$a, imm:$pos), (USAT imm:$pos, GPR:$a, 0)>;
+
//===----------------------------------------------------------------------===//
// Bitwise Instructions.
//
defm AND : AsI1_bin_irs<0b0000, "and",
BinOpFrag<(and node:$LHS, node:$RHS)>, 1>;
+defm ANDS : AI1_bin_s_irs<0b0000, "and",
+ BinOpFrag<(ARMand node:$LHS, node:$RHS)>, 1>;
defm ORR : AsI1_bin_irs<0b1100, "orr",
BinOpFrag<(or node:$LHS, node:$RHS)>, 1>;
defm EOR : AsI1_bin_irs<0b0001, "eor",
@@ -1858,11 +1875,11 @@ def BFC : I<(outs GPR:$dst), (ins GPR:$src, bf_inv_mask_imm:$imm),
}
// A8.6.18 BFI - Bitfield insert (Encoding A1)
-// Added for disassembler with the pattern field purposely left blank.
-def BFI : I<(outs GPR:$dst), (ins GPR:$src, bf_inv_mask_imm:$imm),
+def BFI : I<(outs GPR:$dst), (ins GPR:$src, GPR:$val, bf_inv_mask_imm:$imm),
AddrMode1, Size4Bytes, IndexModeNone, DPFrm, IIC_iUNAsi,
- "bfi", "\t$dst, $src, $imm", "",
- [/* For disassembly only; pattern left blank */]>,
+ "bfi", "\t$dst, $val, $imm", "$src = $dst",
+ [(set GPR:$dst, (ARMbfi GPR:$src, GPR:$val,
+ bf_inv_mask_imm:$imm))]>,
Requires<[IsARM, HasV6T2]> {
let Inst{27-21} = 0b0111110;
let Inst{6-4} = 0b001; // Rn: Inst{3-0} != 15
@@ -2232,11 +2249,20 @@ def REVSH : AMiscA1I<0b01101111, (outs GPR:$dst), (ins GPR:$src), IIC_iUNAr,
let Inst{19-16} = 0b1111;
}
+def lsl_shift_imm : SDNodeXForm<imm, [{
+ unsigned Sh = ARM_AM::getSORegOpc(ARM_AM::lsl, N->getZExtValue());
+ return CurDAG->getTargetConstant(Sh, MVT::i32);
+}]>;
+
+def lsl_amt : PatLeaf<(i32 imm), [{
+ return (N->getZExtValue() < 32);
+}], lsl_shift_imm>;
+
def PKHBT : AMiscA1I<0b01101000, (outs GPR:$dst),
- (ins GPR:$src1, GPR:$src2, i32imm:$shamt),
- IIC_iALUsi, "pkhbt", "\t$dst, $src1, $src2, lsl $shamt",
+ (ins GPR:$src1, GPR:$src2, shift_imm:$sh),
+ IIC_iALUsi, "pkhbt", "\t$dst, $src1, $src2$sh",
[(set GPR:$dst, (or (and GPR:$src1, 0xFFFF),
- (and (shl GPR:$src2, (i32 imm:$shamt)),
+ (and (shl GPR:$src2, lsl_amt:$sh),
0xFFFF0000)))]>,
Requires<[IsARM, HasV6]> {
let Inst{6-4} = 0b001;
@@ -2245,26 +2271,37 @@ def PKHBT : AMiscA1I<0b01101000, (outs GPR:$dst),
// Alternate cases for PKHBT where identities eliminate some nodes.
def : ARMV6Pat<(or (and GPR:$src1, 0xFFFF), (and GPR:$src2, 0xFFFF0000)),
(PKHBT GPR:$src1, GPR:$src2, 0)>;
-def : ARMV6Pat<(or (and GPR:$src1, 0xFFFF), (shl GPR:$src2, imm16_31:$shamt)),
- (PKHBT GPR:$src1, GPR:$src2, imm16_31:$shamt)>;
+def : ARMV6Pat<(or (and GPR:$src1, 0xFFFF), (shl GPR:$src2, imm16_31:$sh)),
+ (PKHBT GPR:$src1, GPR:$src2, (lsl_shift_imm imm16_31:$sh))>;
+
+def asr_shift_imm : SDNodeXForm<imm, [{
+ unsigned Sh = ARM_AM::getSORegOpc(ARM_AM::asr, N->getZExtValue());
+ return CurDAG->getTargetConstant(Sh, MVT::i32);
+}]>;
+def asr_amt : PatLeaf<(i32 imm), [{
+ return (N->getZExtValue() <= 32);
+}], asr_shift_imm>;
+// Note: Shifts of 1-15 bits will be transformed to srl instead of sra and
+// will match the pattern below.
def PKHTB : AMiscA1I<0b01101000, (outs GPR:$dst),
- (ins GPR:$src1, GPR:$src2, i32imm:$shamt),
- IIC_iALUsi, "pkhtb", "\t$dst, $src1, $src2, asr $shamt",
+ (ins GPR:$src1, GPR:$src2, shift_imm:$sh),
+ IIC_iALUsi, "pkhtb", "\t$dst, $src1, $src2$sh",
[(set GPR:$dst, (or (and GPR:$src1, 0xFFFF0000),
- (and (sra GPR:$src2, imm16_31:$shamt),
- 0xFFFF)))]>, Requires<[IsARM, HasV6]> {
+ (and (sra GPR:$src2, asr_amt:$sh),
+ 0xFFFF)))]>,
+ Requires<[IsARM, HasV6]> {
let Inst{6-4} = 0b101;
}
// Alternate cases for PKHTB where identities eliminate some nodes. Note that
// a shift amount of 0 is *not legal* here, it is PKHBT instead.
-def : ARMV6Pat<(or (and GPR:$src1, 0xFFFF0000), (srl GPR:$src2, (i32 16))),
- (PKHTB GPR:$src1, GPR:$src2, 16)>;
+def : ARMV6Pat<(or (and GPR:$src1, 0xFFFF0000), (srl GPR:$src2, imm16_31:$sh)),
+ (PKHTB GPR:$src1, GPR:$src2, (asr_shift_imm imm16_31:$sh))>;
def : ARMV6Pat<(or (and GPR:$src1, 0xFFFF0000),
- (and (srl GPR:$src2, imm1_15:$shamt), 0xFFFF)),
- (PKHTB GPR:$src1, GPR:$src2, imm1_15:$shamt)>;
+ (and (srl GPR:$src2, imm1_15:$sh), 0xFFFF)),
+ (PKHTB GPR:$src1, GPR:$src2, (asr_shift_imm imm1_15:$sh))>;
//===----------------------------------------------------------------------===//
// Comparison Instructions...
@@ -2272,8 +2309,52 @@ def : ARMV6Pat<(or (and GPR:$src1, 0xFFFF0000),
defm CMP : AI1_cmp_irs<0b1010, "cmp",
BinOpFrag<(ARMcmp node:$LHS, node:$RHS)>>;
-//FIXME: Disable CMN, as CCodes are backwards from compare expectations
-// Compare-to-zero still works out, just not the relationals
+
+// FIXME: There seems to be a (potential) hardware bug with the CMN instruction
+// and comparison with 0. These two pieces of code should give identical
+// results:
+//
+// rsbs r1, r1, 0
+// cmp r0, r1
+// mov r0, #0
+// it ls
+// mov r0, #1
+//
+// and:
+//
+// cmn r0, r1
+// mov r0, #0
+// it ls
+// mov r0, #1
+//
+// However, the CMN gives the *opposite* result when r1 is 0. This is because
+// the carry flag is set in the CMP case but not in the CMN case. In short, the
+// CMP instruction doesn't perform a truncate of the (logical) NOT of 0 plus the
+// value of r0 and the carry bit (because the "carry bit" parameter to
+// AddWithCarry is defined as 1 in this case, the carry flag will always be set
+// when r0 >= 0). The CMN instruction doesn't perform a NOT of 0 so there is
+// never a "carry" when this AddWithCarry is performed (because the "carry bit"
+// parameter to AddWithCarry is defined as 0).
+//
+// The AddWithCarry in the CMP case seems to be relying upon the identity:
+//
+// ~x + 1 = -x
+//
+// However when x is 0 and unsigned, this doesn't hold:
+//
+// x = 0
+// ~x = 0xFFFF FFFF
+// ~x + 1 = 0x1 0000 0000
+// (-x = 0) != (0x1 0000 0000 = ~x + 1)
+//
+// Therefore, we should disable *all* versions of CMN, especially when comparing
+// against zero, until we can limit when the CMN instruction is used (when we
+// know that the RHS is not 0) or when we have a hardware fix for this.
+//
+// (See the ARM docs for the "AddWithCarry" pseudo-code.)
+//
+// This is related to <rdar://problem/7569620>.
+//
//defm CMN : AI1_cmp_irs<0b1011, "cmn",
// BinOpFrag<(ARMcmp node:$LHS,(ineg node:$RHS))>>;
@@ -2298,8 +2379,8 @@ def : ARMPat<(ARMcmpZ GPR:$src, so_imm_neg:$imm),
let usesCustomInserter = 1, isBranch = 1, isTerminator = 1,
Defs = [CPSR] in {
def BCCi64 : PseudoInst<(outs),
- (ins i32imm:$cc, GPR:$lhs1, GPR:$lhs2, GPR:$rhs1, GPR:$rhs2, brtarget:$dst),
- IIC_Br,
+ (ins i32imm:$cc, GPR:$lhs1, GPR:$lhs2, GPR:$rhs1, GPR:$rhs2, brtarget:$dst),
+ IIC_Br,
"${:comment} B\t$dst GPR:$lhs1, GPR:$lhs2, GPR:$rhs1, GPR:$rhs2, imm:$cc",
[(ARMBcci64 imm:$cc, GPR:$lhs1, GPR:$lhs2, GPR:$rhs1, GPR:$rhs2, bb:$dst)]>;
@@ -2346,102 +2427,63 @@ def MOVCCi : AI1<0b1101, (outs GPR:$dst),
// memory barriers protect the atomic sequences
let hasSideEffects = 1 in {
-def Int_MemBarrierV7 : AInoP<(outs), (ins),
- Pseudo, NoItinerary,
- "dmb", "",
- [(ARMMemBarrierV7)]>,
- Requires<[IsARM, HasV7]> {
+def DMBsy : AInoP<(outs), (ins), MiscFrm, NoItinerary, "dmb", "",
+ [(ARMMemBarrier)]>, Requires<[IsARM, HasDB]> {
let Inst{31-4} = 0xf57ff05;
// FIXME: add support for options other than a full system DMB
// See DMB disassembly-only variants below.
let Inst{3-0} = 0b1111;
}
-def Int_SyncBarrierV7 : AInoP<(outs), (ins),
- Pseudo, NoItinerary,
- "dsb", "",
- [(ARMSyncBarrierV7)]>,
- Requires<[IsARM, HasV7]> {
+def DSBsy : AInoP<(outs), (ins), MiscFrm, NoItinerary, "dsb", "",
+ [(ARMSyncBarrier)]>, Requires<[IsARM, HasDB]> {
let Inst{31-4} = 0xf57ff04;
// FIXME: add support for options other than a full system DSB
// See DSB disassembly-only variants below.
let Inst{3-0} = 0b1111;
}
-def Int_MemBarrierV6 : AInoP<(outs), (ins GPR:$zero),
- Pseudo, NoItinerary,
+def DMB_MCR : AInoP<(outs), (ins GPR:$zero), MiscFrm, NoItinerary,
"mcr", "\tp15, 0, $zero, c7, c10, 5",
- [(ARMMemBarrierV6 GPR:$zero)]>,
+ [(ARMMemBarrierMCR GPR:$zero)]>,
Requires<[IsARM, HasV6]> {
// FIXME: add support for options other than a full system DMB
// FIXME: add encoding
}
-def Int_SyncBarrierV6 : AInoP<(outs), (ins GPR:$zero),
- Pseudo, NoItinerary,
+def DSB_MCR : AInoP<(outs), (ins GPR:$zero), MiscFrm, NoItinerary,
"mcr", "\tp15, 0, $zero, c7, c10, 4",
- [(ARMSyncBarrierV6 GPR:$zero)]>,
+ [(ARMSyncBarrierMCR GPR:$zero)]>,
Requires<[IsARM, HasV6]> {
// FIXME: add support for options other than a full system DSB
// FIXME: add encoding
}
}
-// Helper class for multiclass MemB -- for disassembly only
-class AMBI<string opc, string asm>
- : AInoP<(outs), (ins), MiscFrm, NoItinerary, opc, asm,
- [/* For disassembly only; pattern left blank */]>,
- Requires<[IsARM, HasV7]> {
- let Inst{31-20} = 0xf57;
-}
-
-multiclass MemB<bits<4> op7_4, string opc> {
-
- def st : AMBI<opc, "\tst"> {
- let Inst{7-4} = op7_4;
- let Inst{3-0} = 0b1110;
- }
-
- def ish : AMBI<opc, "\tish"> {
- let Inst{7-4} = op7_4;
- let Inst{3-0} = 0b1011;
- }
-
- def ishst : AMBI<opc, "\tishst"> {
- let Inst{7-4} = op7_4;
- let Inst{3-0} = 0b1010;
- }
-
- def nsh : AMBI<opc, "\tnsh"> {
- let Inst{7-4} = op7_4;
- let Inst{3-0} = 0b0111;
- }
-
- def nshst : AMBI<opc, "\tnshst"> {
- let Inst{7-4} = op7_4;
- let Inst{3-0} = 0b0110;
- }
+// Memory Barrier Operations Variants -- for disassembly only
- def osh : AMBI<opc, "\tosh"> {
- let Inst{7-4} = op7_4;
- let Inst{3-0} = 0b0011;
- }
+def memb_opt : Operand<i32> {
+ let PrintMethod = "printMemBOption";
+}
- def oshst : AMBI<opc, "\toshst"> {
- let Inst{7-4} = op7_4;
- let Inst{3-0} = 0b0010;
- }
+class AMBI<bits<4> op7_4, string opc>
+ : AInoP<(outs), (ins memb_opt:$opt), MiscFrm, NoItinerary, opc, "\t$opt",
+ [/* For disassembly only; pattern left blank */]>,
+ Requires<[IsARM, HasDB]> {
+ let Inst{31-8} = 0xf57ff0;
+ let Inst{7-4} = op7_4;
}
// These DMB variants are for disassembly only.
-defm DMB : MemB<0b0101, "dmb">;
+def DMBvar : AMBI<0b0101, "dmb">;
// These DSB variants are for disassembly only.
-defm DSB : MemB<0b0100, "dsb">;
+def DSBvar : AMBI<0b0100, "dsb">;
// ISB has only full system option -- for disassembly only
-def ISBsy : AMBI<"isb", ""> {
- let Inst{7-4} = 0b0110;
+def ISBsy : AInoP<(outs), (ins), MiscFrm, NoItinerary, "isb", "", []>,
+ Requires<[IsARM, HasDB]> {
+ let Inst{31-4} = 0xf57ff06;
let Inst{3-0} = 0b1111;
}
diff --git a/lib/Target/ARM/ARMInstrNEON.td b/lib/Target/ARM/ARMInstrNEON.td
index 7f7eb980abe8..4d2f1169061f 100644
--- a/lib/Target/ARM/ARMInstrNEON.td
+++ b/lib/Target/ARM/ARMInstrNEON.td
@@ -93,6 +93,11 @@ def NEONzip : SDNode<"ARMISD::VZIP", SDTARMVSHUF2>;
def NEONuzp : SDNode<"ARMISD::VUZP", SDTARMVSHUF2>;
def NEONtrn : SDNode<"ARMISD::VTRN", SDTARMVSHUF2>;
+def SDTARMVMULL : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisInt<1>,
+ SDTCisSameAs<1, 2>]>;
+def NEONvmulls : SDNode<"ARMISD::VMULLs", SDTARMVMULL>;
+def NEONvmullu : SDNode<"ARMISD::VMULLu", SDTARMVMULL>;
+
def SDTARMFMAX : SDTypeProfile<1, 2, [SDTCisVT<0, f32>, SDTCisSameAs<0, 1>,
SDTCisSameAs<0, 2>]>;
def NEONfmax : SDNode<"ARMISD::FMAX", SDTARMFMAX>;
@@ -100,14 +105,14 @@ def NEONfmin : SDNode<"ARMISD::FMIN", SDTARMFMAX>;
def NEONimmAllZerosV: PatLeaf<(NEONvmovImm (i32 timm)), [{
ConstantSDNode *ConstVal = cast<ConstantSDNode>(N->getOperand(0));
- unsigned EltBits;
+ unsigned EltBits = 0;
uint64_t EltVal = ARM_AM::decodeNEONModImm(ConstVal->getZExtValue(), EltBits);
return (EltBits == 32 && EltVal == 0);
}]>;
def NEONimmAllOnesV: PatLeaf<(NEONvmovImm (i32 timm)), [{
ConstantSDNode *ConstVal = cast<ConstantSDNode>(N->getOperand(0));
- unsigned EltBits;
+ unsigned EltBits = 0;
uint64_t EltVal = ARM_AM::decodeNEONModImm(ConstVal->getZExtValue(), EltBits);
return (EltBits == 8 && EltVal == 0xff);
}]>;
@@ -124,15 +129,16 @@ def nModImm : Operand<i32> {
// NEON load / store instructions
//===----------------------------------------------------------------------===//
-let mayLoad = 1, neverHasSideEffects = 1 in {
// Use vldmia to load a Q register as a D register pair.
// This is equivalent to VLDMD except that it has a Q register operand
// instead of a pair of D registers.
def VLDMQ
- : AXDI5<(outs QPR:$dst), (ins addrmode5:$addr, pred:$p),
+ : AXDI4<(outs QPR:$dst), (ins addrmode4:$addr, pred:$p),
IndexModeNone, IIC_fpLoadm,
- "vldm${addr:submode}${p}\t${addr:base}, ${dst:dregpair}", "", []>;
+ "vldm${addr:submode}${p}\t$addr, ${dst:dregpair}", "",
+ [(set QPR:$dst, (v2f64 (load addrmode4:$addr)))]>;
+let mayLoad = 1, neverHasSideEffects = 1 in {
// Use vld1 to load a Q register as a D register pair.
// This alternative to VLDMQ allows an alignment to be specified.
// This is equivalent to VLD1q64 except that it has a Q register operand.
@@ -141,15 +147,16 @@ def VLD1q
IIC_VLD1, "vld1", "64", "${dst:dregpair}, $addr", "", []>;
} // mayLoad = 1, neverHasSideEffects = 1
-let mayStore = 1, neverHasSideEffects = 1 in {
// Use vstmia to store a Q register as a D register pair.
// This is equivalent to VSTMD except that it has a Q register operand
// instead of a pair of D registers.
def VSTMQ
- : AXDI5<(outs), (ins QPR:$src, addrmode5:$addr, pred:$p),
+ : AXDI4<(outs), (ins QPR:$src, addrmode4:$addr, pred:$p),
IndexModeNone, IIC_fpStorem,
- "vstm${addr:submode}${p}\t${addr:base}, ${src:dregpair}", "", []>;
+ "vstm${addr:submode}${p}\t$addr, ${src:dregpair}", "",
+ [(store (v2f64 QPR:$src), addrmode4:$addr)]>;
+let mayStore = 1, neverHasSideEffects = 1 in {
// Use vst1 to store a Q register as a D register pair.
// This alternative to VSTMQ allows an alignment to be specified.
// This is equivalent to VST1q64 except that it has a Q register operand.
@@ -160,6 +167,25 @@ def VST1q
let mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1 in {
+// Classes for VLD* pseudo-instructions with multi-register operands.
+// These are expanded to real instructions after register allocation.
+class VLDQPseudo
+ : PseudoNLdSt<(outs QPR:$dst), (ins addrmode6:$addr), IIC_VST, "">;
+class VLDQWBPseudo
+ : PseudoNLdSt<(outs QPR:$dst, GPR:$wb),
+ (ins addrmode6:$addr, am6offset:$offset), IIC_VST,
+ "$addr.addr = $wb">;
+class VLDQQPseudo
+ : PseudoNLdSt<(outs QQPR:$dst), (ins addrmode6:$addr), IIC_VST, "">;
+class VLDQQWBPseudo
+ : PseudoNLdSt<(outs QQPR:$dst, GPR:$wb),
+ (ins addrmode6:$addr, am6offset:$offset), IIC_VST,
+ "$addr.addr = $wb">;
+class VLDQQQQWBPseudo
+ : PseudoNLdSt<(outs QQQQPR:$dst, GPR:$wb),
+ (ins addrmode6:$addr, am6offset:$offset, QQQQPR:$src), IIC_VST,
+ "$addr.addr = $wb, $src = $dst">;
+
// VLD1 : Vector Load (multiple single elements)
class VLD1D<bits<4> op7_4, string Dt>
: NLdSt<0,0b10,0b0111,op7_4, (outs DPR:$dst),
@@ -180,6 +206,11 @@ def VLD1q16 : VLD1Q<0b0100, "16">;
def VLD1q32 : VLD1Q<0b1000, "32">;
def VLD1q64 : VLD1Q<0b1100, "64">;
+def VLD1q8Pseudo : VLDQPseudo;
+def VLD1q16Pseudo : VLDQPseudo;
+def VLD1q32Pseudo : VLDQPseudo;
+def VLD1q64Pseudo : VLDQPseudo;
+
// ...with address register writeback:
class VLD1DWB<bits<4> op7_4, string Dt>
: NLdSt<0,0b10,0b0111,op7_4, (outs DPR:$dst, GPR:$wb),
@@ -202,6 +233,11 @@ def VLD1q16_UPD : VLD1QWB<0b0100, "16">;
def VLD1q32_UPD : VLD1QWB<0b1000, "32">;
def VLD1q64_UPD : VLD1QWB<0b1100, "64">;
+def VLD1q8Pseudo_UPD : VLDQWBPseudo;
+def VLD1q16Pseudo_UPD : VLDQWBPseudo;
+def VLD1q32Pseudo_UPD : VLDQWBPseudo;
+def VLD1q64Pseudo_UPD : VLDQWBPseudo;
+
// ...with 3 registers (some of these are only for the disassembler):
class VLD1D3<bits<4> op7_4, string Dt>
: NLdSt<0,0b10,0b0110,op7_4, (outs DPR:$dst1, DPR:$dst2, DPR:$dst3),
@@ -222,6 +258,9 @@ def VLD1d16T_UPD : VLD1D3WB<0b0100, "16">;
def VLD1d32T_UPD : VLD1D3WB<0b1000, "32">;
def VLD1d64T_UPD : VLD1D3WB<0b1100, "64">;
+def VLD1d64TPseudo : VLDQQPseudo;
+def VLD1d64TPseudo_UPD : VLDQQWBPseudo;
+
// ...with 4 registers (some of these are only for the disassembler):
class VLD1D4<bits<4> op7_4, string Dt>
: NLdSt<0,0b10,0b0010,op7_4,(outs DPR:$dst1, DPR:$dst2, DPR:$dst3, DPR:$dst4),
@@ -244,6 +283,9 @@ def VLD1d16Q_UPD : VLD1D4WB<0b0100, "16">;
def VLD1d32Q_UPD : VLD1D4WB<0b1000, "32">;
def VLD1d64Q_UPD : VLD1D4WB<0b1100, "64">;
+def VLD1d64QPseudo : VLDQQPseudo;
+def VLD1d64QPseudo_UPD : VLDQQWBPseudo;
+
// VLD2 : Vector Load (multiple 2-element structures)
class VLD2D<bits<4> op11_8, bits<4> op7_4, string Dt>
: NLdSt<0, 0b10, op11_8, op7_4, (outs DPR:$dst1, DPR:$dst2),
@@ -263,6 +305,14 @@ def VLD2q8 : VLD2Q<0b0000, "8">;
def VLD2q16 : VLD2Q<0b0100, "16">;
def VLD2q32 : VLD2Q<0b1000, "32">;
+def VLD2d8Pseudo : VLDQPseudo;
+def VLD2d16Pseudo : VLDQPseudo;
+def VLD2d32Pseudo : VLDQPseudo;
+
+def VLD2q8Pseudo : VLDQQPseudo;
+def VLD2q16Pseudo : VLDQQPseudo;
+def VLD2q32Pseudo : VLDQQPseudo;
+
// ...with address register writeback:
class VLD2DWB<bits<4> op11_8, bits<4> op7_4, string Dt>
: NLdSt<0, 0b10, op11_8, op7_4, (outs DPR:$dst1, DPR:$dst2, GPR:$wb),
@@ -284,6 +334,14 @@ def VLD2q8_UPD : VLD2QWB<0b0000, "8">;
def VLD2q16_UPD : VLD2QWB<0b0100, "16">;
def VLD2q32_UPD : VLD2QWB<0b1000, "32">;
+def VLD2d8Pseudo_UPD : VLDQWBPseudo;
+def VLD2d16Pseudo_UPD : VLDQWBPseudo;
+def VLD2d32Pseudo_UPD : VLDQWBPseudo;
+
+def VLD2q8Pseudo_UPD : VLDQQWBPseudo;
+def VLD2q16Pseudo_UPD : VLDQQWBPseudo;
+def VLD2q32Pseudo_UPD : VLDQQWBPseudo;
+
// ...with double-spaced registers (for disassembly only):
def VLD2b8 : VLD2D<0b1001, 0b0000, "8">;
def VLD2b16 : VLD2D<0b1001, 0b0100, "16">;
@@ -302,6 +360,10 @@ def VLD3d8 : VLD3D<0b0100, 0b0000, "8">;
def VLD3d16 : VLD3D<0b0100, 0b0100, "16">;
def VLD3d32 : VLD3D<0b0100, 0b1000, "32">;
+def VLD3d8Pseudo : VLDQQPseudo;
+def VLD3d16Pseudo : VLDQQPseudo;
+def VLD3d32Pseudo : VLDQQPseudo;
+
// ...with address register writeback:
class VLD3DWB<bits<4> op11_8, bits<4> op7_4, string Dt>
: NLdSt<0, 0b10, op11_8, op7_4,
@@ -314,6 +376,10 @@ def VLD3d8_UPD : VLD3DWB<0b0100, 0b0000, "8">;
def VLD3d16_UPD : VLD3DWB<0b0100, 0b0100, "16">;
def VLD3d32_UPD : VLD3DWB<0b0100, 0b1000, "32">;
+def VLD3d8Pseudo_UPD : VLDQQWBPseudo;
+def VLD3d16Pseudo_UPD : VLDQQWBPseudo;
+def VLD3d32Pseudo_UPD : VLDQQWBPseudo;
+
// ...with double-spaced registers (non-updating versions for disassembly only):
def VLD3q8 : VLD3D<0b0101, 0b0000, "8">;
def VLD3q16 : VLD3D<0b0101, 0b0100, "16">;
@@ -322,10 +388,14 @@ def VLD3q8_UPD : VLD3DWB<0b0101, 0b0000, "8">;
def VLD3q16_UPD : VLD3DWB<0b0101, 0b0100, "16">;
def VLD3q32_UPD : VLD3DWB<0b0101, 0b1000, "32">;
+def VLD3q8Pseudo_UPD : VLDQQQQWBPseudo;
+def VLD3q16Pseudo_UPD : VLDQQQQWBPseudo;
+def VLD3q32Pseudo_UPD : VLDQQQQWBPseudo;
+
// ...alternate versions to be allocated odd register numbers:
-def VLD3q8odd_UPD : VLD3DWB<0b0101, 0b0000, "8">;
-def VLD3q16odd_UPD : VLD3DWB<0b0101, 0b0100, "16">;
-def VLD3q32odd_UPD : VLD3DWB<0b0101, 0b1000, "32">;
+def VLD3q8oddPseudo_UPD : VLDQQQQWBPseudo;
+def VLD3q16oddPseudo_UPD : VLDQQQQWBPseudo;
+def VLD3q32oddPseudo_UPD : VLDQQQQWBPseudo;
// VLD4 : Vector Load (multiple 4-element structures)
class VLD4D<bits<4> op11_8, bits<4> op7_4, string Dt>
@@ -338,6 +408,10 @@ def VLD4d8 : VLD4D<0b0000, 0b0000, "8">;
def VLD4d16 : VLD4D<0b0000, 0b0100, "16">;
def VLD4d32 : VLD4D<0b0000, 0b1000, "32">;
+def VLD4d8Pseudo : VLDQQPseudo;
+def VLD4d16Pseudo : VLDQQPseudo;
+def VLD4d32Pseudo : VLDQQPseudo;
+
// ...with address register writeback:
class VLD4DWB<bits<4> op11_8, bits<4> op7_4, string Dt>
: NLdSt<0, 0b10, op11_8, op7_4,
@@ -350,6 +424,10 @@ def VLD4d8_UPD : VLD4DWB<0b0000, 0b0000, "8">;
def VLD4d16_UPD : VLD4DWB<0b0000, 0b0100, "16">;
def VLD4d32_UPD : VLD4DWB<0b0000, 0b1000, "32">;
+def VLD4d8Pseudo_UPD : VLDQQWBPseudo;
+def VLD4d16Pseudo_UPD : VLDQQWBPseudo;
+def VLD4d32Pseudo_UPD : VLDQQWBPseudo;
+
// ...with double-spaced registers (non-updating versions for disassembly only):
def VLD4q8 : VLD4D<0b0001, 0b0000, "8">;
def VLD4q16 : VLD4D<0b0001, 0b0100, "16">;
@@ -358,10 +436,14 @@ def VLD4q8_UPD : VLD4DWB<0b0001, 0b0000, "8">;
def VLD4q16_UPD : VLD4DWB<0b0001, 0b0100, "16">;
def VLD4q32_UPD : VLD4DWB<0b0001, 0b1000, "32">;
+def VLD4q8Pseudo_UPD : VLDQQQQWBPseudo;
+def VLD4q16Pseudo_UPD : VLDQQQQWBPseudo;
+def VLD4q32Pseudo_UPD : VLDQQQQWBPseudo;
+
// ...alternate versions to be allocated odd register numbers:
-def VLD4q8odd_UPD : VLD4DWB<0b0001, 0b0000, "8">;
-def VLD4q16odd_UPD : VLD4DWB<0b0001, 0b0100, "16">;
-def VLD4q32odd_UPD : VLD4DWB<0b0001, 0b1000, "32">;
+def VLD4q8oddPseudo_UPD : VLDQQQQWBPseudo;
+def VLD4q16oddPseudo_UPD : VLDQQQQWBPseudo;
+def VLD4q32oddPseudo_UPD : VLDQQQQWBPseudo;
// VLD1LN : Vector Load (single element to one lane)
// FIXME: Not yet implemented.
@@ -486,6 +568,25 @@ def VLD4LNq32_UPD : VLD4LNWB<0b1011, {?,1,?,?}, "32">;
let mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1 in {
+// Classes for VST* pseudo-instructions with multi-register operands.
+// These are expanded to real instructions after register allocation.
+class VSTQPseudo
+ : PseudoNLdSt<(outs), (ins addrmode6:$addr, QPR:$src), IIC_VST, "">;
+class VSTQWBPseudo
+ : PseudoNLdSt<(outs GPR:$wb),
+ (ins addrmode6:$addr, am6offset:$offset, QPR:$src), IIC_VST,
+ "$addr.addr = $wb">;
+class VSTQQPseudo
+ : PseudoNLdSt<(outs), (ins addrmode6:$addr, QQPR:$src), IIC_VST, "">;
+class VSTQQWBPseudo
+ : PseudoNLdSt<(outs GPR:$wb),
+ (ins addrmode6:$addr, am6offset:$offset, QQPR:$src), IIC_VST,
+ "$addr.addr = $wb">;
+class VSTQQQQWBPseudo
+ : PseudoNLdSt<(outs GPR:$wb),
+ (ins addrmode6:$addr, am6offset:$offset, QQQQPR:$src), IIC_VST,
+ "$addr.addr = $wb">;
+
// VST1 : Vector Store (multiple single elements)
class VST1D<bits<4> op7_4, string Dt>
: NLdSt<0,0b00,0b0111,op7_4, (outs), (ins addrmode6:$addr, DPR:$src), IIC_VST,
@@ -505,6 +606,11 @@ def VST1q16 : VST1Q<0b0100, "16">;
def VST1q32 : VST1Q<0b1000, "32">;
def VST1q64 : VST1Q<0b1100, "64">;
+def VST1q8Pseudo : VSTQPseudo;
+def VST1q16Pseudo : VSTQPseudo;
+def VST1q32Pseudo : VSTQPseudo;
+def VST1q64Pseudo : VSTQPseudo;
+
// ...with address register writeback:
class VST1DWB<bits<4> op7_4, string Dt>
: NLdSt<0, 0b00, 0b0111, op7_4, (outs GPR:$wb),
@@ -525,6 +631,11 @@ def VST1q16_UPD : VST1QWB<0b0100, "16">;
def VST1q32_UPD : VST1QWB<0b1000, "32">;
def VST1q64_UPD : VST1QWB<0b1100, "64">;
+def VST1q8Pseudo_UPD : VSTQWBPseudo;
+def VST1q16Pseudo_UPD : VSTQWBPseudo;
+def VST1q32Pseudo_UPD : VSTQWBPseudo;
+def VST1q64Pseudo_UPD : VSTQWBPseudo;
+
// ...with 3 registers (some of these are only for the disassembler):
class VST1D3<bits<4> op7_4, string Dt>
: NLdSt<0, 0b00, 0b0110, op7_4, (outs),
@@ -547,6 +658,9 @@ def VST1d16T_UPD : VST1D3WB<0b0100, "16">;
def VST1d32T_UPD : VST1D3WB<0b1000, "32">;
def VST1d64T_UPD : VST1D3WB<0b1100, "64">;
+def VST1d64TPseudo : VSTQQPseudo;
+def VST1d64TPseudo_UPD : VSTQQWBPseudo;
+
// ...with 4 registers (some of these are only for the disassembler):
class VST1D4<bits<4> op7_4, string Dt>
: NLdSt<0, 0b00, 0b0010, op7_4, (outs),
@@ -570,6 +684,9 @@ def VST1d16Q_UPD : VST1D4WB<0b0100, "16">;
def VST1d32Q_UPD : VST1D4WB<0b1000, "32">;
def VST1d64Q_UPD : VST1D4WB<0b1100, "64">;
+def VST1d64QPseudo : VSTQQPseudo;
+def VST1d64QPseudo_UPD : VSTQQWBPseudo;
+
// VST2 : Vector Store (multiple 2-element structures)
class VST2D<bits<4> op11_8, bits<4> op7_4, string Dt>
: NLdSt<0, 0b00, op11_8, op7_4, (outs),
@@ -589,6 +706,14 @@ def VST2q8 : VST2Q<0b0000, "8">;
def VST2q16 : VST2Q<0b0100, "16">;
def VST2q32 : VST2Q<0b1000, "32">;
+def VST2d8Pseudo : VSTQPseudo;
+def VST2d16Pseudo : VSTQPseudo;
+def VST2d32Pseudo : VSTQPseudo;
+
+def VST2q8Pseudo : VSTQQPseudo;
+def VST2q16Pseudo : VSTQQPseudo;
+def VST2q32Pseudo : VSTQQPseudo;
+
// ...with address register writeback:
class VST2DWB<bits<4> op11_8, bits<4> op7_4, string Dt>
: NLdSt<0, 0b00, op11_8, op7_4, (outs GPR:$wb),
@@ -610,6 +735,14 @@ def VST2q8_UPD : VST2QWB<0b0000, "8">;
def VST2q16_UPD : VST2QWB<0b0100, "16">;
def VST2q32_UPD : VST2QWB<0b1000, "32">;
+def VST2d8Pseudo_UPD : VSTQWBPseudo;
+def VST2d16Pseudo_UPD : VSTQWBPseudo;
+def VST2d32Pseudo_UPD : VSTQWBPseudo;
+
+def VST2q8Pseudo_UPD : VSTQQWBPseudo;
+def VST2q16Pseudo_UPD : VSTQQWBPseudo;
+def VST2q32Pseudo_UPD : VSTQQWBPseudo;
+
// ...with double-spaced registers (for disassembly only):
def VST2b8 : VST2D<0b1001, 0b0000, "8">;
def VST2b16 : VST2D<0b1001, 0b0100, "16">;
@@ -628,6 +761,10 @@ def VST3d8 : VST3D<0b0100, 0b0000, "8">;
def VST3d16 : VST3D<0b0100, 0b0100, "16">;
def VST3d32 : VST3D<0b0100, 0b1000, "32">;
+def VST3d8Pseudo : VSTQQPseudo;
+def VST3d16Pseudo : VSTQQPseudo;
+def VST3d32Pseudo : VSTQQPseudo;
+
// ...with address register writeback:
class VST3DWB<bits<4> op11_8, bits<4> op7_4, string Dt>
: NLdSt<0, 0b00, op11_8, op7_4, (outs GPR:$wb),
@@ -640,6 +777,10 @@ def VST3d8_UPD : VST3DWB<0b0100, 0b0000, "8">;
def VST3d16_UPD : VST3DWB<0b0100, 0b0100, "16">;
def VST3d32_UPD : VST3DWB<0b0100, 0b1000, "32">;
+def VST3d8Pseudo_UPD : VSTQQWBPseudo;
+def VST3d16Pseudo_UPD : VSTQQWBPseudo;
+def VST3d32Pseudo_UPD : VSTQQWBPseudo;
+
// ...with double-spaced registers (non-updating versions for disassembly only):
def VST3q8 : VST3D<0b0101, 0b0000, "8">;
def VST3q16 : VST3D<0b0101, 0b0100, "16">;
@@ -648,10 +789,14 @@ def VST3q8_UPD : VST3DWB<0b0101, 0b0000, "8">;
def VST3q16_UPD : VST3DWB<0b0101, 0b0100, "16">;
def VST3q32_UPD : VST3DWB<0b0101, 0b1000, "32">;
+def VST3q8Pseudo_UPD : VSTQQQQWBPseudo;
+def VST3q16Pseudo_UPD : VSTQQQQWBPseudo;
+def VST3q32Pseudo_UPD : VSTQQQQWBPseudo;
+
// ...alternate versions to be allocated odd register numbers:
-def VST3q8odd_UPD : VST3DWB<0b0101, 0b0000, "8">;
-def VST3q16odd_UPD : VST3DWB<0b0101, 0b0100, "16">;
-def VST3q32odd_UPD : VST3DWB<0b0101, 0b1000, "32">;
+def VST3q8oddPseudo_UPD : VSTQQQQWBPseudo;
+def VST3q16oddPseudo_UPD : VSTQQQQWBPseudo;
+def VST3q32oddPseudo_UPD : VSTQQQQWBPseudo;
// VST4 : Vector Store (multiple 4-element structures)
class VST4D<bits<4> op11_8, bits<4> op7_4, string Dt>
@@ -664,6 +809,10 @@ def VST4d8 : VST4D<0b0000, 0b0000, "8">;
def VST4d16 : VST4D<0b0000, 0b0100, "16">;
def VST4d32 : VST4D<0b0000, 0b1000, "32">;
+def VST4d8Pseudo : VSTQQPseudo;
+def VST4d16Pseudo : VSTQQPseudo;
+def VST4d32Pseudo : VSTQQPseudo;
+
// ...with address register writeback:
class VST4DWB<bits<4> op11_8, bits<4> op7_4, string Dt>
: NLdSt<0, 0b00, op11_8, op7_4, (outs GPR:$wb),
@@ -676,6 +825,10 @@ def VST4d8_UPD : VST4DWB<0b0000, 0b0000, "8">;
def VST4d16_UPD : VST4DWB<0b0000, 0b0100, "16">;
def VST4d32_UPD : VST4DWB<0b0000, 0b1000, "32">;
+def VST4d8Pseudo_UPD : VSTQQWBPseudo;
+def VST4d16Pseudo_UPD : VSTQQWBPseudo;
+def VST4d32Pseudo_UPD : VSTQQWBPseudo;
+
// ...with double-spaced registers (non-updating versions for disassembly only):
def VST4q8 : VST4D<0b0001, 0b0000, "8">;
def VST4q16 : VST4D<0b0001, 0b0100, "16">;
@@ -684,10 +837,14 @@ def VST4q8_UPD : VST4DWB<0b0001, 0b0000, "8">;
def VST4q16_UPD : VST4DWB<0b0001, 0b0100, "16">;
def VST4q32_UPD : VST4DWB<0b0001, 0b1000, "32">;
+def VST4q8Pseudo_UPD : VSTQQQQWBPseudo;
+def VST4q16Pseudo_UPD : VSTQQQQWBPseudo;
+def VST4q32Pseudo_UPD : VSTQQQQWBPseudo;
+
// ...alternate versions to be allocated odd register numbers:
-def VST4q8odd_UPD : VST4DWB<0b0001, 0b0000, "8">;
-def VST4q16odd_UPD : VST4DWB<0b0001, 0b0100, "16">;
-def VST4q32odd_UPD : VST4DWB<0b0001, 0b1000, "32">;
+def VST4q8oddPseudo_UPD : VSTQQQQWBPseudo;
+def VST4q16oddPseudo_UPD : VSTQQQQWBPseudo;
+def VST4q32oddPseudo_UPD : VSTQQQQWBPseudo;
// VST1LN : Vector Store (single element from one lane)
// FIXME: Not yet implemented.
@@ -879,6 +1036,15 @@ class N2VQInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
(ins QPR:$src), itin, OpcodeStr, Dt, "$dst, $src", "",
[(set QPR:$dst, (ResTy (IntOp (OpTy QPR:$src))))]>;
+// Narrow 2-register operations.
+class N2VN<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
+ bits<2> op17_16, bits<5> op11_7, bit op6, bit op4,
+ InstrItinClass itin, string OpcodeStr, string Dt,
+ ValueType TyD, ValueType TyQ, SDNode OpNode>
+ : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, op6, op4, (outs DPR:$dst),
+ (ins QPR:$src), itin, OpcodeStr, Dt, "$dst, $src", "",
+ [(set DPR:$dst, (TyD (OpNode (TyQ QPR:$src))))]>;
+
// Narrow 2-register intrinsics.
class N2VNInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
bits<2> op17_16, bits<5> op11_7, bit op6, bit op4,
@@ -888,14 +1054,14 @@ class N2VNInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
(ins QPR:$src), itin, OpcodeStr, Dt, "$dst, $src", "",
[(set DPR:$dst, (TyD (IntOp (TyQ QPR:$src))))]>;
-// Long 2-register intrinsics (currently only used for VMOVL).
-class N2VLInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
- bits<2> op17_16, bits<5> op11_7, bit op6, bit op4,
- InstrItinClass itin, string OpcodeStr, string Dt,
- ValueType TyQ, ValueType TyD, Intrinsic IntOp>
+// Long 2-register operations (currently only used for VMOVL).
+class N2VL<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
+ bits<2> op17_16, bits<5> op11_7, bit op6, bit op4,
+ InstrItinClass itin, string OpcodeStr, string Dt,
+ ValueType TyQ, ValueType TyD, SDNode OpNode>
: N2V<op24_23, op21_20, op19_18, op17_16, op11_7, op6, op4, (outs QPR:$dst),
(ins DPR:$src), itin, OpcodeStr, Dt, "$dst, $src", "",
- [(set QPR:$dst, (TyQ (IntOp (TyD DPR:$src))))]>;
+ [(set QPR:$dst, (TyQ (OpNode (TyD DPR:$src))))]>;
// 2-register shuffles (VTRN/VZIP/VUZP), both double- and quad-register.
class N2VDShuffle<bits<2> op19_18, bits<5> op11_7, string OpcodeStr, string Dt>
@@ -1150,6 +1316,24 @@ class N3VQMulOpSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
(ResTy (NEONvduplane (OpTy DPR_8:$src3),
imm:$lane)))))))]>;
+// Neon Intrinsic-Op instructions (VABA): double- and quad-register.
+class N3VDIntOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
+ InstrItinClass itin, string OpcodeStr, string Dt,
+ ValueType Ty, Intrinsic IntOp, SDNode OpNode>
+ : N3V<op24, op23, op21_20, op11_8, 0, op4,
+ (outs DPR:$dst), (ins DPR:$src1, DPR:$src2, DPR:$src3), N3RegFrm, itin,
+ OpcodeStr, Dt, "$dst, $src2, $src3", "$src1 = $dst",
+ [(set DPR:$dst, (Ty (OpNode DPR:$src1,
+ (Ty (IntOp (Ty DPR:$src2), (Ty DPR:$src3))))))]>;
+class N3VQIntOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
+ InstrItinClass itin, string OpcodeStr, string Dt,
+ ValueType Ty, Intrinsic IntOp, SDNode OpNode>
+ : N3V<op24, op23, op21_20, op11_8, 1, op4,
+ (outs QPR:$dst), (ins QPR:$src1, QPR:$src2, QPR:$src3), N3RegFrm, itin,
+ OpcodeStr, Dt, "$dst, $src2, $src3", "$src1 = $dst",
+ [(set QPR:$dst, (Ty (OpNode QPR:$src1,
+ (Ty (IntOp (Ty QPR:$src2), (Ty QPR:$src3))))))]>;
+
// Neon 3-argument intrinsics, both double- and quad-register.
// The destination register is also used as the first source operand register.
class N3VDInt3<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
@@ -1169,6 +1353,53 @@ class N3VQInt3<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
[(set QPR:$dst, (ResTy (IntOp (OpTy QPR:$src1),
(OpTy QPR:$src2), (OpTy QPR:$src3))))]>;
+// Long Multiply-Add/Sub operations.
+class N3VLMulOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
+ InstrItinClass itin, string OpcodeStr, string Dt,
+ ValueType TyQ, ValueType TyD, SDNode MulOp, SDNode OpNode>
+ : N3V<op24, op23, op21_20, op11_8, 0, op4,
+ (outs QPR:$dst), (ins QPR:$src1, DPR:$src2, DPR:$src3), N3RegFrm, itin,
+ OpcodeStr, Dt, "$dst, $src2, $src3", "$src1 = $dst",
+ [(set QPR:$dst, (OpNode (TyQ QPR:$src1),
+ (TyQ (MulOp (TyD DPR:$src2),
+ (TyD DPR:$src3)))))]>;
+class N3VLMulOpSL<bit op24, bits<2> op21_20, bits<4> op11_8,
+ InstrItinClass itin, string OpcodeStr, string Dt,
+ ValueType TyQ, ValueType TyD, SDNode MulOp, SDNode OpNode>
+ : N3V<op24, 1, op21_20, op11_8, 1, 0, (outs QPR:$dst),
+ (ins QPR:$src1, DPR:$src2, DPR_VFP2:$src3, nohash_imm:$lane),
+ NVMulSLFrm, itin,
+ OpcodeStr, Dt, "$dst, $src2, $src3[$lane]", "$src1 = $dst",
+ [(set QPR:$dst,
+ (OpNode (TyQ QPR:$src1),
+ (TyQ (MulOp (TyD DPR:$src2),
+ (TyD (NEONvduplane (TyD DPR_VFP2:$src3),
+ imm:$lane))))))]>;
+class N3VLMulOpSL16<bit op24, bits<2> op21_20, bits<4> op11_8,
+ InstrItinClass itin, string OpcodeStr, string Dt,
+ ValueType TyQ, ValueType TyD, SDNode MulOp, SDNode OpNode>
+ : N3V<op24, 1, op21_20, op11_8, 1, 0, (outs QPR:$dst),
+ (ins QPR:$src1, DPR:$src2, DPR_8:$src3, nohash_imm:$lane),
+ NVMulSLFrm, itin,
+ OpcodeStr, Dt, "$dst, $src2, $src3[$lane]", "$src1 = $dst",
+ [(set QPR:$dst,
+ (OpNode (TyQ QPR:$src1),
+ (TyQ (MulOp (TyD DPR:$src2),
+ (TyD (NEONvduplane (TyD DPR_8:$src3),
+ imm:$lane))))))]>;
+
+// Long Intrinsic-Op vector operations with explicit extend (VABAL).
+class N3VLIntExtOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
+ InstrItinClass itin, string OpcodeStr, string Dt,
+ ValueType TyQ, ValueType TyD, Intrinsic IntOp, SDNode ExtOp,
+ SDNode OpNode>
+ : N3V<op24, op23, op21_20, op11_8, 0, op4,
+ (outs QPR:$dst), (ins QPR:$src1, DPR:$src2, DPR:$src3), N3RegFrm, itin,
+ OpcodeStr, Dt, "$dst, $src2, $src3", "$src1 = $dst",
+ [(set QPR:$dst, (OpNode (TyQ QPR:$src1),
+ (TyQ (ExtOp (TyD (IntOp (TyD DPR:$src2),
+ (TyD DPR:$src3)))))))]>;
+
// Neon Long 3-argument intrinsic. The destination register is
// a quad-register and is also used as the first source operand register.
class N3VLInt3<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
@@ -1217,6 +1448,61 @@ class N3VNInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
let isCommutable = Commutable;
}
+// Long 3-register operations.
+class N3VL<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
+ InstrItinClass itin, string OpcodeStr, string Dt,
+ ValueType TyQ, ValueType TyD, SDNode OpNode, bit Commutable>
+ : N3V<op24, op23, op21_20, op11_8, 0, op4,
+ (outs QPR:$dst), (ins DPR:$src1, DPR:$src2), N3RegFrm, itin,
+ OpcodeStr, Dt, "$dst, $src1, $src2", "",
+ [(set QPR:$dst, (TyQ (OpNode (TyD DPR:$src1), (TyD DPR:$src2))))]> {
+ let isCommutable = Commutable;
+}
+class N3VLSL<bit op24, bits<2> op21_20, bits<4> op11_8,
+ InstrItinClass itin, string OpcodeStr, string Dt,
+ ValueType TyQ, ValueType TyD, SDNode OpNode>
+ : N3V<op24, 1, op21_20, op11_8, 1, 0,
+ (outs QPR:$dst), (ins DPR:$src1, DPR_VFP2:$src2, nohash_imm:$lane),
+ NVMulSLFrm, itin, OpcodeStr, Dt, "$dst, $src1, $src2[$lane]", "",
+ [(set QPR:$dst,
+ (TyQ (OpNode (TyD DPR:$src1),
+ (TyD (NEONvduplane (TyD DPR_VFP2:$src2),imm:$lane)))))]>;
+class N3VLSL16<bit op24, bits<2> op21_20, bits<4> op11_8,
+ InstrItinClass itin, string OpcodeStr, string Dt,
+ ValueType TyQ, ValueType TyD, SDNode OpNode>
+ : N3V<op24, 1, op21_20, op11_8, 1, 0,
+ (outs QPR:$dst), (ins DPR:$src1, DPR_8:$src2, nohash_imm:$lane),
+ NVMulSLFrm, itin, OpcodeStr, Dt, "$dst, $src1, $src2[$lane]", "",
+ [(set QPR:$dst,
+ (TyQ (OpNode (TyD DPR:$src1),
+ (TyD (NEONvduplane (TyD DPR_8:$src2), imm:$lane)))))]>;
+
+// Long 3-register operations with explicitly extended operands.
+class N3VLExt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
+ InstrItinClass itin, string OpcodeStr, string Dt,
+ ValueType TyQ, ValueType TyD, SDNode OpNode, SDNode ExtOp,
+ bit Commutable>
+ : N3V<op24, op23, op21_20, op11_8, 0, op4,
+ (outs QPR:$dst), (ins DPR:$src1, DPR:$src2), N3RegFrm, itin,
+ OpcodeStr, Dt, "$dst, $src1, $src2", "",
+ [(set QPR:$dst, (OpNode (TyQ (ExtOp (TyD DPR:$src1))),
+ (TyQ (ExtOp (TyD DPR:$src2)))))]> {
+ let isCommutable = Commutable;
+}
+
+// Long 3-register intrinsics with explicit extend (VABDL).
+class N3VLIntExt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
+ InstrItinClass itin, string OpcodeStr, string Dt,
+ ValueType TyQ, ValueType TyD, Intrinsic IntOp, SDNode ExtOp,
+ bit Commutable>
+ : N3V<op24, op23, op21_20, op11_8, 0, op4,
+ (outs QPR:$dst), (ins DPR:$src1, DPR:$src2), N3RegFrm, itin,
+ OpcodeStr, Dt, "$dst, $src1, $src2", "",
+ [(set QPR:$dst, (TyQ (ExtOp (TyD (IntOp (TyD DPR:$src1),
+ (TyD DPR:$src2))))))]> {
+ let isCommutable = Commutable;
+}
+
// Long 3-register intrinsics.
class N3VLInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
InstrItinClass itin, string OpcodeStr, string Dt,
@@ -1248,14 +1534,15 @@ class N3VLIntSL16<bit op24, bits<2> op21_20, bits<4> op11_8,
(OpTy (NEONvduplane (OpTy DPR_8:$src2),
imm:$lane)))))]>;
-// Wide 3-register intrinsics.
-class N3VWInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
- string OpcodeStr, string Dt, ValueType TyQ, ValueType TyD,
- Intrinsic IntOp, bit Commutable>
+// Wide 3-register operations.
+class N3VW<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
+ string OpcodeStr, string Dt, ValueType TyQ, ValueType TyD,
+ SDNode OpNode, SDNode ExtOp, bit Commutable>
: N3V<op24, op23, op21_20, op11_8, 0, op4,
(outs QPR:$dst), (ins QPR:$src1, DPR:$src2), N3RegFrm, IIC_VSUBiD,
OpcodeStr, Dt, "$dst, $src1, $src2", "",
- [(set QPR:$dst, (TyQ (IntOp (TyQ QPR:$src1), (TyD DPR:$src2))))]> {
+ [(set QPR:$dst, (OpNode (TyQ QPR:$src1),
+ (TyQ (ExtOp (TyD DPR:$src2)))))]> {
let isCommutable = Commutable;
}
@@ -1488,6 +1775,23 @@ multiclass N3V_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4,
}
+// Neon Narrowing 2-register vector operations,
+// source operand element sizes of 16, 32 and 64 bits:
+multiclass N2VN_HSD<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16,
+ bits<5> op11_7, bit op6, bit op4,
+ InstrItinClass itin, string OpcodeStr, string Dt,
+ SDNode OpNode> {
+ def v8i8 : N2VN<op24_23, op21_20, 0b00, op17_16, op11_7, op6, op4,
+ itin, OpcodeStr, !strconcat(Dt, "16"),
+ v8i8, v8i16, OpNode>;
+ def v4i16 : N2VN<op24_23, op21_20, 0b01, op17_16, op11_7, op6, op4,
+ itin, OpcodeStr, !strconcat(Dt, "32"),
+ v4i16, v4i32, OpNode>;
+ def v2i32 : N2VN<op24_23, op21_20, 0b10, op17_16, op11_7, op6, op4,
+ itin, OpcodeStr, !strconcat(Dt, "64"),
+ v2i32, v2i64, OpNode>;
+}
+
// Neon Narrowing 2-register vector intrinsics,
// source operand element sizes of 16, 32 and 64 bits:
multiclass N2VNInt_HSD<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16,
@@ -1508,14 +1812,14 @@ multiclass N2VNInt_HSD<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16,
// Neon Lengthening 2-register vector intrinsic (currently specific to VMOVL).
// source operand element sizes of 16, 32 and 64 bits:
-multiclass N2VLInt_QHS<bits<2> op24_23, bits<5> op11_7, bit op6, bit op4,
- string OpcodeStr, string Dt, Intrinsic IntOp> {
- def v8i16 : N2VLInt<op24_23, 0b00, 0b10, 0b00, op11_7, op6, op4, IIC_VQUNAiD,
- OpcodeStr, !strconcat(Dt, "8"), v8i16, v8i8, IntOp>;
- def v4i32 : N2VLInt<op24_23, 0b01, 0b00, 0b00, op11_7, op6, op4, IIC_VQUNAiD,
- OpcodeStr, !strconcat(Dt, "16"), v4i32, v4i16, IntOp>;
- def v2i64 : N2VLInt<op24_23, 0b10, 0b00, 0b00, op11_7, op6, op4, IIC_VQUNAiD,
- OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, IntOp>;
+multiclass N2VL_QHS<bits<2> op24_23, bits<5> op11_7, bit op6, bit op4,
+ string OpcodeStr, string Dt, SDNode OpNode> {
+ def v8i16 : N2VL<op24_23, 0b00, 0b10, 0b00, op11_7, op6, op4, IIC_VQUNAiD,
+ OpcodeStr, !strconcat(Dt, "8"), v8i16, v8i8, OpNode>;
+ def v4i32 : N2VL<op24_23, 0b01, 0b00, 0b00, op11_7, op6, op4, IIC_VQUNAiD,
+ OpcodeStr, !strconcat(Dt, "16"), v4i32, v4i16, OpNode>;
+ def v2i64 : N2VL<op24_23, 0b10, 0b00, 0b00, op11_7, op6, op4, IIC_VQUNAiD,
+ OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, OpNode>;
}
@@ -1607,6 +1911,47 @@ multiclass N3VNInt_HSD<bit op24, bit op23, bits<4> op11_8, bit op4,
}
+// Neon Long 3-register vector operations.
+
+multiclass N3VL_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
+ InstrItinClass itin16, InstrItinClass itin32,
+ string OpcodeStr, string Dt,
+ SDNode OpNode, bit Commutable = 0> {
+ def v8i16 : N3VL<op24, op23, 0b00, op11_8, op4, itin16,
+ OpcodeStr, !strconcat(Dt, "8"),
+ v8i16, v8i8, OpNode, Commutable>;
+ def v4i32 : N3VL<op24, op23, 0b01, op11_8, op4, itin16,
+ OpcodeStr, !strconcat(Dt, "16"),
+ v4i32, v4i16, OpNode, Commutable>;
+ def v2i64 : N3VL<op24, op23, 0b10, op11_8, op4, itin32,
+ OpcodeStr, !strconcat(Dt, "32"),
+ v2i64, v2i32, OpNode, Commutable>;
+}
+
+multiclass N3VLSL_HS<bit op24, bits<4> op11_8,
+ InstrItinClass itin, string OpcodeStr, string Dt,
+ SDNode OpNode> {
+ def v4i16 : N3VLSL16<op24, 0b01, op11_8, itin, OpcodeStr,
+ !strconcat(Dt, "16"), v4i32, v4i16, OpNode>;
+ def v2i32 : N3VLSL<op24, 0b10, op11_8, itin, OpcodeStr,
+ !strconcat(Dt, "32"), v2i64, v2i32, OpNode>;
+}
+
+multiclass N3VLExt_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
+ InstrItinClass itin16, InstrItinClass itin32,
+ string OpcodeStr, string Dt,
+ SDNode OpNode, SDNode ExtOp, bit Commutable = 0> {
+ def v8i16 : N3VLExt<op24, op23, 0b00, op11_8, op4, itin16,
+ OpcodeStr, !strconcat(Dt, "8"),
+ v8i16, v8i8, OpNode, ExtOp, Commutable>;
+ def v4i32 : N3VLExt<op24, op23, 0b01, op11_8, op4, itin16,
+ OpcodeStr, !strconcat(Dt, "16"),
+ v4i32, v4i16, OpNode, ExtOp, Commutable>;
+ def v2i64 : N3VLExt<op24, op23, 0b10, op11_8, op4, itin32,
+ OpcodeStr, !strconcat(Dt, "32"),
+ v2i64, v2i32, OpNode, ExtOp, Commutable>;
+}
+
// Neon Long 3-register vector intrinsics.
// First with only element sizes of 16 and 32 bits:
@@ -1643,21 +1988,36 @@ multiclass N3VLInt_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
v8i16, v8i8, IntOp, Commutable>;
}
+// ....with explicit extend (VABDL).
+multiclass N3VLIntExt_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
+ InstrItinClass itin, string OpcodeStr, string Dt,
+ Intrinsic IntOp, SDNode ExtOp, bit Commutable = 0> {
+ def v8i16 : N3VLIntExt<op24, op23, 0b00, op11_8, op4, itin,
+ OpcodeStr, !strconcat(Dt, "8"),
+ v8i16, v8i8, IntOp, ExtOp, Commutable>;
+ def v4i32 : N3VLIntExt<op24, op23, 0b01, op11_8, op4, itin,
+ OpcodeStr, !strconcat(Dt, "16"),
+ v4i32, v4i16, IntOp, ExtOp, Commutable>;
+ def v2i64 : N3VLIntExt<op24, op23, 0b10, op11_8, op4, itin,
+ OpcodeStr, !strconcat(Dt, "32"),
+ v2i64, v2i32, IntOp, ExtOp, Commutable>;
+}
+
// Neon Wide 3-register vector intrinsics,
// source operand element sizes of 8, 16 and 32 bits:
-multiclass N3VWInt_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
- string OpcodeStr, string Dt,
- Intrinsic IntOp, bit Commutable = 0> {
- def v8i16 : N3VWInt<op24, op23, 0b00, op11_8, op4,
- OpcodeStr, !strconcat(Dt, "8"),
- v8i16, v8i8, IntOp, Commutable>;
- def v4i32 : N3VWInt<op24, op23, 0b01, op11_8, op4,
- OpcodeStr, !strconcat(Dt, "16"),
- v4i32, v4i16, IntOp, Commutable>;
- def v2i64 : N3VWInt<op24, op23, 0b10, op11_8, op4,
- OpcodeStr, !strconcat(Dt, "32"),
- v2i64, v2i32, IntOp, Commutable>;
+multiclass N3VW_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
+ string OpcodeStr, string Dt,
+ SDNode OpNode, SDNode ExtOp, bit Commutable = 0> {
+ def v8i16 : N3VW<op24, op23, 0b00, op11_8, op4,
+ OpcodeStr, !strconcat(Dt, "8"),
+ v8i16, v8i8, OpNode, ExtOp, Commutable>;
+ def v4i32 : N3VW<op24, op23, 0b01, op11_8, op4,
+ OpcodeStr, !strconcat(Dt, "16"),
+ v4i32, v4i16, OpNode, ExtOp, Commutable>;
+ def v2i64 : N3VW<op24, op23, 0b10, op11_8, op4,
+ OpcodeStr, !strconcat(Dt, "32"),
+ v2i64, v2i32, OpNode, ExtOp, Commutable>;
}
@@ -1700,6 +2060,29 @@ multiclass N3VMulOpSL_HS<bits<4> op11_8,
mul, ShOp>;
}
+// Neon Intrinsic-Op vector operations,
+// element sizes of 8, 16 and 32 bits:
+multiclass N3VIntOp_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
+ InstrItinClass itinD, InstrItinClass itinQ,
+ string OpcodeStr, string Dt, Intrinsic IntOp,
+ SDNode OpNode> {
+ // 64-bit vector types.
+ def v8i8 : N3VDIntOp<op24, op23, 0b00, op11_8, op4, itinD,
+ OpcodeStr, !strconcat(Dt, "8"), v8i8, IntOp, OpNode>;
+ def v4i16 : N3VDIntOp<op24, op23, 0b01, op11_8, op4, itinD,
+ OpcodeStr, !strconcat(Dt, "16"), v4i16, IntOp, OpNode>;
+ def v2i32 : N3VDIntOp<op24, op23, 0b10, op11_8, op4, itinD,
+ OpcodeStr, !strconcat(Dt, "32"), v2i32, IntOp, OpNode>;
+
+ // 128-bit vector types.
+ def v16i8 : N3VQIntOp<op24, op23, 0b00, op11_8, op4, itinQ,
+ OpcodeStr, !strconcat(Dt, "8"), v16i8, IntOp, OpNode>;
+ def v8i16 : N3VQIntOp<op24, op23, 0b01, op11_8, op4, itinQ,
+ OpcodeStr, !strconcat(Dt, "16"), v8i16, IntOp, OpNode>;
+ def v4i32 : N3VQIntOp<op24, op23, 0b10, op11_8, op4, itinQ,
+ OpcodeStr, !strconcat(Dt, "32"), v4i32, IntOp, OpNode>;
+}
+
// Neon 3-argument intrinsics,
// element sizes of 8, 16 and 32 bits:
multiclass N3VInt3_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
@@ -1723,6 +2106,29 @@ multiclass N3VInt3_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
}
+// Neon Long Multiply-Op vector operations,
+// element sizes of 8, 16 and 32 bits:
+multiclass N3VLMulOp_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
+ InstrItinClass itin16, InstrItinClass itin32,
+ string OpcodeStr, string Dt, SDNode MulOp,
+ SDNode OpNode> {
+ def v8i16 : N3VLMulOp<op24, op23, 0b00, op11_8, op4, itin16, OpcodeStr,
+ !strconcat(Dt, "8"), v8i16, v8i8, MulOp, OpNode>;
+ def v4i32 : N3VLMulOp<op24, op23, 0b01, op11_8, op4, itin16, OpcodeStr,
+ !strconcat(Dt, "16"), v4i32, v4i16, MulOp, OpNode>;
+ def v2i64 : N3VLMulOp<op24, op23, 0b10, op11_8, op4, itin32, OpcodeStr,
+ !strconcat(Dt, "32"), v2i64, v2i32, MulOp, OpNode>;
+}
+
+multiclass N3VLMulOpSL_HS<bit op24, bits<4> op11_8, string OpcodeStr,
+ string Dt, SDNode MulOp, SDNode OpNode> {
+ def v4i16 : N3VLMulOpSL16<op24, 0b01, op11_8, IIC_VMACi16D, OpcodeStr,
+ !strconcat(Dt,"16"), v4i32, v4i16, MulOp, OpNode>;
+ def v2i32 : N3VLMulOpSL<op24, 0b10, op11_8, IIC_VMACi32D, OpcodeStr,
+ !strconcat(Dt, "32"), v2i64, v2i32, MulOp, OpNode>;
+}
+
+
// Neon Long 3-argument intrinsics.
// First with only element sizes of 16 and 32 bits:
@@ -1752,6 +2158,21 @@ multiclass N3VLInt3_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
OpcodeStr, !strconcat(Dt, "8"), v8i16, v8i8, IntOp>;
}
+// ....with explicit extend (VABAL).
+multiclass N3VLIntExtOp_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
+ InstrItinClass itin, string OpcodeStr, string Dt,
+ Intrinsic IntOp, SDNode ExtOp, SDNode OpNode> {
+ def v8i16 : N3VLIntExtOp<op24, op23, 0b00, op11_8, op4, itin,
+ OpcodeStr, !strconcat(Dt, "8"), v8i16, v8i8,
+ IntOp, ExtOp, OpNode>;
+ def v4i32 : N3VLIntExtOp<op24, op23, 0b01, op11_8, op4, itin,
+ OpcodeStr, !strconcat(Dt, "16"), v4i32, v4i16,
+ IntOp, ExtOp, OpNode>;
+ def v2i64 : N3VLIntExtOp<op24, op23, 0b10, op11_8, op4, itin,
+ OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32,
+ IntOp, ExtOp, OpNode>;
+}
+
// Neon 2-register vector intrinsics,
// element sizes of 8, 16 and 32 bits:
@@ -1996,13 +2417,13 @@ def VADDfd : N3VD<0, 0, 0b00, 0b1101, 0, IIC_VBIND, "vadd", "f32",
def VADDfq : N3VQ<0, 0, 0b00, 0b1101, 0, IIC_VBINQ, "vadd", "f32",
v4f32, v4f32, fadd, 1>;
// VADDL : Vector Add Long (Q = D + D)
-defm VADDLs : N3VLInt_QHS<0,1,0b0000,0, IIC_VSHLiD, IIC_VSHLiD,
- "vaddl", "s", int_arm_neon_vaddls, 1>;
-defm VADDLu : N3VLInt_QHS<1,1,0b0000,0, IIC_VSHLiD, IIC_VSHLiD,
- "vaddl", "u", int_arm_neon_vaddlu, 1>;
+defm VADDLs : N3VLExt_QHS<0,1,0b0000,0, IIC_VSHLiD, IIC_VSHLiD,
+ "vaddl", "s", add, sext, 1>;
+defm VADDLu : N3VLExt_QHS<1,1,0b0000,0, IIC_VSHLiD, IIC_VSHLiD,
+ "vaddl", "u", add, zext, 1>;
// VADDW : Vector Add Wide (Q = Q + D)
-defm VADDWs : N3VWInt_QHS<0,1,0b0001,0, "vaddw", "s", int_arm_neon_vaddws, 0>;
-defm VADDWu : N3VWInt_QHS<1,1,0b0001,0, "vaddw", "u", int_arm_neon_vaddwu, 0>;
+defm VADDWs : N3VW_QHS<0,1,0b0001,0, "vaddw", "s", add, sext, 0>;
+defm VADDWu : N3VW_QHS<1,1,0b0001,0, "vaddw", "u", add, zext, 0>;
// VHADD : Vector Halving Add
defm VHADDs : N3VInt_QHS<0, 0, 0b0000, 0, N3RegFrm,
IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q,
@@ -2113,16 +2534,14 @@ def : Pat<(v4i32 (int_arm_neon_vqrdmulh (v4i32 QPR:$src1),
(SubReg_i32_lane imm:$lane)))>;
// VMULL : Vector Multiply Long (integer and polynomial) (Q = D * D)
-defm VMULLs : N3VLInt_QHS<0,1,0b1100,0, IIC_VMULi16D, IIC_VMULi32D,
- "vmull", "s", int_arm_neon_vmulls, 1>;
-defm VMULLu : N3VLInt_QHS<1,1,0b1100,0, IIC_VMULi16D, IIC_VMULi32D,
- "vmull", "u", int_arm_neon_vmullu, 1>;
+defm VMULLs : N3VL_QHS<0,1,0b1100,0, IIC_VMULi16D, IIC_VMULi32D,
+ "vmull", "s", NEONvmulls, 1>;
+defm VMULLu : N3VL_QHS<1,1,0b1100,0, IIC_VMULi16D, IIC_VMULi32D,
+ "vmull", "u", NEONvmullu, 1>;
def VMULLp : N3VLInt<0, 1, 0b00, 0b1110, 0, IIC_VMULi16D, "vmull", "p8",
v8i16, v8i8, int_arm_neon_vmullp, 1>;
-defm VMULLsls : N3VLIntSL_HS<0, 0b1010, IIC_VMULi16D, "vmull", "s",
- int_arm_neon_vmulls>;
-defm VMULLslu : N3VLIntSL_HS<1, 0b1010, IIC_VMULi16D, "vmull", "u",
- int_arm_neon_vmullu>;
+defm VMULLsls : N3VLSL_HS<0, 0b1010, IIC_VMULi16D, "vmull", "s", NEONvmulls>;
+defm VMULLslu : N3VLSL_HS<1, 0b1010, IIC_VMULi16D, "vmull", "u", NEONvmullu>;
// VQDMULL : Vector Saturating Doubling Multiply Long (Q = D * D)
defm VQDMULL : N3VLInt_HS<0,1,0b1101,0, IIC_VMULi16D, IIC_VMULi32D,
@@ -2172,13 +2591,13 @@ def : Pat<(v4f32 (fadd (v4f32 QPR:$src1),
(SubReg_i32_lane imm:$lane)))>;
// VMLAL : Vector Multiply Accumulate Long (Q += D * D)
-defm VMLALs : N3VLInt3_QHS<0,1,0b1000,0, IIC_VMACi16D, IIC_VMACi32D,
- "vmlal", "s", int_arm_neon_vmlals>;
-defm VMLALu : N3VLInt3_QHS<1,1,0b1000,0, IIC_VMACi16D, IIC_VMACi32D,
- "vmlal", "u", int_arm_neon_vmlalu>;
+defm VMLALs : N3VLMulOp_QHS<0,1,0b1000,0, IIC_VMACi16D, IIC_VMACi32D,
+ "vmlal", "s", NEONvmulls, add>;
+defm VMLALu : N3VLMulOp_QHS<1,1,0b1000,0, IIC_VMACi16D, IIC_VMACi32D,
+ "vmlal", "u", NEONvmullu, add>;
-defm VMLALsls : N3VLInt3SL_HS<0, 0b0010, "vmlal", "s", int_arm_neon_vmlals>;
-defm VMLALslu : N3VLInt3SL_HS<1, 0b0010, "vmlal", "u", int_arm_neon_vmlalu>;
+defm VMLALsls : N3VLMulOpSL_HS<0, 0b0010, "vmlal", "s", NEONvmulls, add>;
+defm VMLALslu : N3VLMulOpSL_HS<1, 0b0010, "vmlal", "u", NEONvmullu, add>;
// VQDMLAL : Vector Saturating Doubling Multiply Accumulate Long (Q += D * D)
defm VQDMLAL : N3VLInt3_HS<0, 1, 0b1001, 0, IIC_VMACi16D, IIC_VMACi32D,
@@ -2224,13 +2643,13 @@ def : Pat<(v4f32 (fsub (v4f32 QPR:$src1),
(SubReg_i32_lane imm:$lane)))>;
// VMLSL : Vector Multiply Subtract Long (Q -= D * D)
-defm VMLSLs : N3VLInt3_QHS<0,1,0b1010,0, IIC_VMACi16D, IIC_VMACi32D,
- "vmlsl", "s", int_arm_neon_vmlsls>;
-defm VMLSLu : N3VLInt3_QHS<1,1,0b1010,0, IIC_VMACi16D, IIC_VMACi32D,
- "vmlsl", "u", int_arm_neon_vmlslu>;
+defm VMLSLs : N3VLMulOp_QHS<0,1,0b1010,0, IIC_VMACi16D, IIC_VMACi32D,
+ "vmlsl", "s", NEONvmulls, sub>;
+defm VMLSLu : N3VLMulOp_QHS<1,1,0b1010,0, IIC_VMACi16D, IIC_VMACi32D,
+ "vmlsl", "u", NEONvmullu, sub>;
-defm VMLSLsls : N3VLInt3SL_HS<0, 0b0110, "vmlsl", "s", int_arm_neon_vmlsls>;
-defm VMLSLslu : N3VLInt3SL_HS<1, 0b0110, "vmlsl", "u", int_arm_neon_vmlslu>;
+defm VMLSLsls : N3VLMulOpSL_HS<0, 0b0110, "vmlsl", "s", NEONvmulls, sub>;
+defm VMLSLslu : N3VLMulOpSL_HS<1, 0b0110, "vmlsl", "u", NEONvmullu, sub>;
// VQDMLSL : Vector Saturating Doubling Multiply Subtract Long (Q -= D * D)
defm VQDMLSL : N3VLInt3_HS<0, 1, 0b1011, 0, IIC_VMACi16D, IIC_VMACi32D,
@@ -2247,13 +2666,13 @@ def VSUBfd : N3VD<0, 0, 0b10, 0b1101, 0, IIC_VBIND, "vsub", "f32",
def VSUBfq : N3VQ<0, 0, 0b10, 0b1101, 0, IIC_VBINQ, "vsub", "f32",
v4f32, v4f32, fsub, 0>;
// VSUBL : Vector Subtract Long (Q = D - D)
-defm VSUBLs : N3VLInt_QHS<0,1,0b0010,0, IIC_VSHLiD, IIC_VSHLiD,
- "vsubl", "s", int_arm_neon_vsubls, 1>;
-defm VSUBLu : N3VLInt_QHS<1,1,0b0010,0, IIC_VSHLiD, IIC_VSHLiD,
- "vsubl", "u", int_arm_neon_vsublu, 1>;
+defm VSUBLs : N3VLExt_QHS<0,1,0b0010,0, IIC_VSHLiD, IIC_VSHLiD,
+ "vsubl", "s", sub, sext, 0>;
+defm VSUBLu : N3VLExt_QHS<1,1,0b0010,0, IIC_VSHLiD, IIC_VSHLiD,
+ "vsubl", "u", sub, zext, 0>;
// VSUBW : Vector Subtract Wide (Q = Q - D)
-defm VSUBWs : N3VWInt_QHS<0,1,0b0011,0, "vsubw", "s", int_arm_neon_vsubws, 0>;
-defm VSUBWu : N3VWInt_QHS<1,1,0b0011,0, "vsubw", "u", int_arm_neon_vsubwu, 0>;
+defm VSUBWs : N3VW_QHS<0,1,0b0011,0, "vsubw", "s", sub, sext, 0>;
+defm VSUBWu : N3VW_QHS<1,1,0b0011,0, "vsubw", "u", sub, zext, 0>;
// VHSUB : Vector Halving Subtract
defm VHSUBs : N3VInt_QHS<0, 0, 0b0010, 0, N3RegFrm,
IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q,
@@ -2469,32 +2888,32 @@ def VBITq : N3VX<1, 0, 0b10, 0b0001, 1, 1,
// VABD : Vector Absolute Difference
defm VABDs : N3VInt_QHS<0, 0, 0b0111, 0, N3RegFrm,
IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q,
- "vabd", "s", int_arm_neon_vabds, 0>;
+ "vabd", "s", int_arm_neon_vabds, 1>;
defm VABDu : N3VInt_QHS<1, 0, 0b0111, 0, N3RegFrm,
IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q,
- "vabd", "u", int_arm_neon_vabdu, 0>;
+ "vabd", "u", int_arm_neon_vabdu, 1>;
def VABDfd : N3VDInt<1, 0, 0b10, 0b1101, 0, N3RegFrm, IIC_VBIND,
- "vabd", "f32", v2f32, v2f32, int_arm_neon_vabds, 0>;
+ "vabd", "f32", v2f32, v2f32, int_arm_neon_vabds, 1>;
def VABDfq : N3VQInt<1, 0, 0b10, 0b1101, 0, N3RegFrm, IIC_VBINQ,
- "vabd", "f32", v4f32, v4f32, int_arm_neon_vabds, 0>;
+ "vabd", "f32", v4f32, v4f32, int_arm_neon_vabds, 1>;
// VABDL : Vector Absolute Difference Long (Q = | D - D |)
-defm VABDLs : N3VLInt_QHS<0,1,0b0111,0, IIC_VSUBi4Q, IIC_VSUBi4Q,
- "vabdl", "s", int_arm_neon_vabdls, 0>;
-defm VABDLu : N3VLInt_QHS<1,1,0b0111,0, IIC_VSUBi4Q, IIC_VSUBi4Q,
- "vabdl", "u", int_arm_neon_vabdlu, 0>;
+defm VABDLs : N3VLIntExt_QHS<0,1,0b0111,0, IIC_VSUBi4Q,
+ "vabdl", "s", int_arm_neon_vabds, zext, 1>;
+defm VABDLu : N3VLIntExt_QHS<1,1,0b0111,0, IIC_VSUBi4Q,
+ "vabdl", "u", int_arm_neon_vabdu, zext, 1>;
// VABA : Vector Absolute Difference and Accumulate
-defm VABAs : N3VInt3_QHS<0,0,0b0111,1, IIC_VABAD, IIC_VABAQ,
- "vaba", "s", int_arm_neon_vabas>;
-defm VABAu : N3VInt3_QHS<1,0,0b0111,1, IIC_VABAD, IIC_VABAQ,
- "vaba", "u", int_arm_neon_vabau>;
+defm VABAs : N3VIntOp_QHS<0,0,0b0111,1, IIC_VABAD, IIC_VABAQ,
+ "vaba", "s", int_arm_neon_vabds, add>;
+defm VABAu : N3VIntOp_QHS<1,0,0b0111,1, IIC_VABAD, IIC_VABAQ,
+ "vaba", "u", int_arm_neon_vabdu, add>;
// VABAL : Vector Absolute Difference and Accumulate Long (Q += | D - D |)
-defm VABALs : N3VLInt3_QHS<0,1,0b0101,0, IIC_VABAD, IIC_VABAD,
- "vabal", "s", int_arm_neon_vabals>;
-defm VABALu : N3VLInt3_QHS<1,1,0b0101,0, IIC_VABAD, IIC_VABAD,
- "vabal", "u", int_arm_neon_vabalu>;
+defm VABALs : N3VLIntExtOp_QHS<0,1,0b0101,0, IIC_VABAD,
+ "vabal", "s", int_arm_neon_vabds, zext, add>;
+defm VABALu : N3VLIntExtOp_QHS<1,1,0b0101,0, IIC_VABAD,
+ "vabal", "u", int_arm_neon_vabdu, zext, add>;
// Vector Maximum and Minimum.
@@ -3113,8 +3532,8 @@ def VDUPfqf : N2V<0b11, 0b11, {?,1}, {0,0}, 0b11000, 1, 0,
[(set QPR:$dst, (v4f32 (NEONvdup (f32 SPR:$src))))]>;
// VMOVN : Vector Narrowing Move
-defm VMOVN : N2VNInt_HSD<0b11,0b11,0b10,0b00100,0,0, IIC_VMOVD,
- "vmovn", "i", int_arm_neon_vmovn>;
+defm VMOVN : N2VN_HSD<0b11,0b11,0b10,0b00100,0,0, IIC_VMOVD,
+ "vmovn", "i", trunc>;
// VQMOVN : Vector Saturating Narrowing Move
defm VQMOVNs : N2VNInt_HSD<0b11,0b11,0b10,0b00101,0,0, IIC_VQUNAiD,
"vqmovn", "s", int_arm_neon_vqmovns>;
@@ -3123,10 +3542,8 @@ defm VQMOVNu : N2VNInt_HSD<0b11,0b11,0b10,0b00101,1,0, IIC_VQUNAiD,
defm VQMOVNsu : N2VNInt_HSD<0b11,0b11,0b10,0b00100,1,0, IIC_VQUNAiD,
"vqmovun", "s", int_arm_neon_vqmovnsu>;
// VMOVL : Vector Lengthening Move
-defm VMOVLs : N2VLInt_QHS<0b01,0b10100,0,1, "vmovl", "s",
- int_arm_neon_vmovls>;
-defm VMOVLu : N2VLInt_QHS<0b11,0b10100,0,1, "vmovl", "u",
- int_arm_neon_vmovlu>;
+defm VMOVLs : N2VL_QHS<0b01,0b10100,0,1, "vmovl", "s", sext>;
+defm VMOVLu : N2VL_QHS<0b11,0b10100,0,1, "vmovl", "u", zext>;
// Vector Conversions.
diff --git a/lib/Target/ARM/ARMInstrThumb.td b/lib/Target/ARM/ARMInstrThumb.td
index bc0790dccbb5..a13ff1232749 100644
--- a/lib/Target/ARM/ARMInstrThumb.td
+++ b/lib/Target/ARM/ARMInstrThumb.td
@@ -221,9 +221,13 @@ def tADDrPCi : T1I<(outs tGPR:$dst), (ins t_imm_s4:$rhs), IIC_iALUi,
T1Encoding<{1,0,1,0,0,?}>; // A6.2 & A8.6.10
// ADD rd, sp, #imm8
+// This is rematerializable, which is particularly useful for taking the
+// address of locals.
+let isReMaterializable = 1 in {
def tADDrSPi : T1I<(outs tGPR:$dst), (ins GPR:$sp, t_imm_s4:$rhs), IIC_iALUi,
"add\t$dst, $sp, $rhs", []>,
T1Encoding<{1,0,1,0,1,?}>; // A6.2 & A8.6.8
+}
// ADD sp, sp, #imm7
def tADDspi : TIt<(outs GPR:$dst), (ins GPR:$lhs, t_imm_s4:$rhs), IIC_iALUi,
@@ -251,19 +255,6 @@ def tADDspr : TIt<(outs GPR:$dst), (ins GPR:$lhs, GPR:$rhs), IIC_iALUr,
let Inst{2-0} = 0b101;
}
-// Pseudo instruction that will expand into a tSUBspi + a copy.
-let usesCustomInserter = 1 in { // Expanded after instruction selection.
-def tSUBspi_ : PseudoInst<(outs GPR:$dst), (ins GPR:$lhs, t_imm_s4:$rhs),
- NoItinerary, "${:comment} sub\t$dst, $rhs", []>;
-
-def tADDspr_ : PseudoInst<(outs GPR:$dst), (ins GPR:$lhs, GPR:$rhs),
- NoItinerary, "${:comment} add\t$dst, $rhs", []>;
-
-let Defs = [CPSR] in
-def tANDsp : PseudoInst<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs),
- NoItinerary, "${:comment} and\t$dst, $rhs", []>;
-} // usesCustomInserter
-
//===----------------------------------------------------------------------===//
// Control Flow Instructions.
//
@@ -378,7 +369,7 @@ let isBranch = 1, isTerminator = 1 in {
def tBR_JTr : T1JTI<(outs),
(ins tGPR:$target, jtblock_operand:$jt, i32imm:$id),
- IIC_Br, "mov\tpc, $target\n\t.align\t2\n$jt",
+ IIC_Br, "mov\tpc, $target\n\t.align\t2$jt",
[(ARMbrjt tGPR:$target, tjumptable:$jt, imm:$id)]>,
Encoding16 {
let Inst{15-7} = 0b010001101;
diff --git a/lib/Target/ARM/ARMInstrThumb2.td b/lib/Target/ARM/ARMInstrThumb2.td
index bbe675e81ab1..6ba0a44be470 100644
--- a/lib/Target/ARM/ARMInstrThumb2.td
+++ b/lib/Target/ARM/ARMInstrThumb2.td
@@ -32,7 +32,7 @@ def t2_so_reg : Operand<i32>, // reg imm
ComplexPattern<i32, 2, "SelectT2ShifterOperandReg",
[shl,srl,sra,rotr]> {
let PrintMethod = "printT2SOOperand";
- let MIOperandInfo = (ops GPR, i32imm);
+ let MIOperandInfo = (ops rGPR, i32imm);
}
// t2_so_imm_not_XFORM - Return the complement of a t2_so_imm value
@@ -51,10 +51,7 @@ def t2_so_imm_neg_XFORM : SDNodeXForm<imm, [{
// represented in the imm field in the same 12-bit form that they are encoded
// into t2_so_imm instructions: the 8-bit immediate is the least significant
// bits [bits 0-7], the 4-bit shift/splat amount is the next 4 bits [bits 8-11].
-def t2_so_imm : Operand<i32>,
- PatLeaf<(imm), [{
- return ARM_AM::getT2SOImmVal((uint32_t)N->getZExtValue()) != -1;
-}]>;
+def t2_so_imm : Operand<i32>, PatLeaf<(imm), [{ return Pred_t2_so_imm(N); }]>;
// t2_so_imm_not - Match an immediate that is a complement
// of a t2_so_imm.
@@ -162,7 +159,7 @@ def t2am_imm8s4_offset : Operand<i32> {
def t2addrmode_so_reg : Operand<i32>,
ComplexPattern<i32, 3, "SelectT2AddrModeSoReg", []> {
let PrintMethod = "printT2AddrModeSoRegOperand";
- let MIOperandInfo = (ops GPR:$base, GPR:$offsreg, i32imm:$offsimm);
+ let MIOperandInfo = (ops GPR:$base, rGPR:$offsreg, i32imm:$offsimm);
}
@@ -176,9 +173,9 @@ def t2addrmode_so_reg : Operand<i32>,
multiclass T2I_un_irs<bits<4> opcod, string opc, PatFrag opnode,
bit Cheap = 0, bit ReMat = 0> {
// shifted imm
- def i : T2sI<(outs GPR:$dst), (ins t2_so_imm:$src), IIC_iMOVi,
+ def i : T2sI<(outs rGPR:$dst), (ins t2_so_imm:$src), IIC_iMOVi,
opc, "\t$dst, $src",
- [(set GPR:$dst, (opnode t2_so_imm:$src))]> {
+ [(set rGPR:$dst, (opnode t2_so_imm:$src))]> {
let isAsCheapAsAMove = Cheap;
let isReMaterializable = ReMat;
let Inst{31-27} = 0b11110;
@@ -189,9 +186,9 @@ multiclass T2I_un_irs<bits<4> opcod, string opc, PatFrag opnode,
let Inst{15} = 0;
}
// register
- def r : T2sI<(outs GPR:$dst), (ins GPR:$src), IIC_iMOVr,
+ def r : T2sI<(outs rGPR:$dst), (ins rGPR:$src), IIC_iMOVr,
opc, ".w\t$dst, $src",
- [(set GPR:$dst, (opnode GPR:$src))]> {
+ [(set rGPR:$dst, (opnode rGPR:$src))]> {
let Inst{31-27} = 0b11101;
let Inst{26-25} = 0b01;
let Inst{24-21} = opcod;
@@ -202,9 +199,9 @@ multiclass T2I_un_irs<bits<4> opcod, string opc, PatFrag opnode,
let Inst{5-4} = 0b00; // type
}
// shifted register
- def s : T2sI<(outs GPR:$dst), (ins t2_so_reg:$src), IIC_iMOVsi,
+ def s : T2sI<(outs rGPR:$dst), (ins t2_so_reg:$src), IIC_iMOVsi,
opc, ".w\t$dst, $src",
- [(set GPR:$dst, (opnode t2_so_reg:$src))]> {
+ [(set rGPR:$dst, (opnode t2_so_reg:$src))]> {
let Inst{31-27} = 0b11101;
let Inst{26-25} = 0b01;
let Inst{24-21} = opcod;
@@ -217,11 +214,11 @@ multiclass T2I_un_irs<bits<4> opcod, string opc, PatFrag opnode,
/// binary operation that produces a value. These are predicable and can be
/// changed to modify CPSR.
multiclass T2I_bin_irs<bits<4> opcod, string opc, PatFrag opnode,
- bit Commutable = 0, string wide =""> {
+ bit Commutable = 0, string wide = ""> {
// shifted imm
- def ri : T2sI<(outs GPR:$dst), (ins GPR:$lhs, t2_so_imm:$rhs), IIC_iALUi,
+ def ri : T2sI<(outs rGPR:$dst), (ins rGPR:$lhs, t2_so_imm:$rhs), IIC_iALUi,
opc, "\t$dst, $lhs, $rhs",
- [(set GPR:$dst, (opnode GPR:$lhs, t2_so_imm:$rhs))]> {
+ [(set rGPR:$dst, (opnode rGPR:$lhs, t2_so_imm:$rhs))]> {
let Inst{31-27} = 0b11110;
let Inst{25} = 0;
let Inst{24-21} = opcod;
@@ -229,9 +226,9 @@ multiclass T2I_bin_irs<bits<4> opcod, string opc, PatFrag opnode,
let Inst{15} = 0;
}
// register
- def rr : T2sI<(outs GPR:$dst), (ins GPR:$lhs, GPR:$rhs), IIC_iALUr,
+ def rr : T2sI<(outs rGPR:$dst), (ins rGPR:$lhs, rGPR:$rhs), IIC_iALUr,
opc, !strconcat(wide, "\t$dst, $lhs, $rhs"),
- [(set GPR:$dst, (opnode GPR:$lhs, GPR:$rhs))]> {
+ [(set rGPR:$dst, (opnode rGPR:$lhs, rGPR:$rhs))]> {
let isCommutable = Commutable;
let Inst{31-27} = 0b11101;
let Inst{26-25} = 0b01;
@@ -242,9 +239,9 @@ multiclass T2I_bin_irs<bits<4> opcod, string opc, PatFrag opnode,
let Inst{5-4} = 0b00; // type
}
// shifted register
- def rs : T2sI<(outs GPR:$dst), (ins GPR:$lhs, t2_so_reg:$rhs), IIC_iALUsi,
+ def rs : T2sI<(outs rGPR:$dst), (ins rGPR:$lhs, t2_so_reg:$rhs), IIC_iALUsi,
opc, !strconcat(wide, "\t$dst, $lhs, $rhs"),
- [(set GPR:$dst, (opnode GPR:$lhs, t2_so_reg:$rhs))]> {
+ [(set rGPR:$dst, (opnode rGPR:$lhs, t2_so_reg:$rhs))]> {
let Inst{31-27} = 0b11101;
let Inst{26-25} = 0b01;
let Inst{24-21} = opcod;
@@ -259,23 +256,35 @@ multiclass T2I_bin_w_irs<bits<4> opcod, string opc, PatFrag opnode,
T2I_bin_irs<opcod, opc, opnode, Commutable, ".w">;
/// T2I_rbin_is - Same as T2I_bin_irs except the order of operands are
-/// reversed. It doesn't define the 'rr' form since it's handled by its
-/// T2I_bin_irs counterpart.
-multiclass T2I_rbin_is<bits<4> opcod, string opc, PatFrag opnode> {
+/// reversed. The 'rr' form is only defined for the disassembler; for codegen
+/// it is equivalent to the T2I_bin_irs counterpart.
+multiclass T2I_rbin_irs<bits<4> opcod, string opc, PatFrag opnode> {
// shifted imm
- def ri : T2sI<(outs GPR:$dst), (ins GPR:$rhs, t2_so_imm:$lhs), IIC_iALUi,
+ def ri : T2sI<(outs rGPR:$dst), (ins rGPR:$rhs, t2_so_imm:$lhs), IIC_iALUi,
opc, ".w\t$dst, $rhs, $lhs",
- [(set GPR:$dst, (opnode t2_so_imm:$lhs, GPR:$rhs))]> {
+ [(set rGPR:$dst, (opnode t2_so_imm:$lhs, rGPR:$rhs))]> {
let Inst{31-27} = 0b11110;
let Inst{25} = 0;
let Inst{24-21} = opcod;
let Inst{20} = ?; // The S bit.
let Inst{15} = 0;
}
+ // register
+ def rr : T2sI<(outs rGPR:$dst), (ins rGPR:$rhs, rGPR:$lhs), IIC_iALUr,
+ opc, "\t$dst, $rhs, $lhs",
+ [/* For disassembly only; pattern left blank */]> {
+ let Inst{31-27} = 0b11101;
+ let Inst{26-25} = 0b01;
+ let Inst{24-21} = opcod;
+ let Inst{20} = ?; // The S bit.
+ let Inst{14-12} = 0b000; // imm3
+ let Inst{7-6} = 0b00; // imm2
+ let Inst{5-4} = 0b00; // type
+ }
// shifted register
- def rs : T2sI<(outs GPR:$dst), (ins GPR:$rhs, t2_so_reg:$lhs), IIC_iALUsi,
+ def rs : T2sI<(outs rGPR:$dst), (ins rGPR:$rhs, t2_so_reg:$lhs), IIC_iALUsi,
opc, "\t$dst, $rhs, $lhs",
- [(set GPR:$dst, (opnode t2_so_reg:$lhs, GPR:$rhs))]> {
+ [(set rGPR:$dst, (opnode t2_so_reg:$lhs, rGPR:$rhs))]> {
let Inst{31-27} = 0b11101;
let Inst{26-25} = 0b01;
let Inst{24-21} = opcod;
@@ -289,9 +298,9 @@ let Defs = [CPSR] in {
multiclass T2I_bin_s_irs<bits<4> opcod, string opc, PatFrag opnode,
bit Commutable = 0> {
// shifted imm
- def ri : T2I<(outs GPR:$dst), (ins GPR:$lhs, t2_so_imm:$rhs), IIC_iALUi,
+ def ri : T2I<(outs rGPR:$dst), (ins GPR:$lhs, t2_so_imm:$rhs), IIC_iALUi,
!strconcat(opc, "s"), ".w\t$dst, $lhs, $rhs",
- [(set GPR:$dst, (opnode GPR:$lhs, t2_so_imm:$rhs))]> {
+ [(set rGPR:$dst, (opnode GPR:$lhs, t2_so_imm:$rhs))]> {
let Inst{31-27} = 0b11110;
let Inst{25} = 0;
let Inst{24-21} = opcod;
@@ -299,9 +308,9 @@ multiclass T2I_bin_s_irs<bits<4> opcod, string opc, PatFrag opnode,
let Inst{15} = 0;
}
// register
- def rr : T2I<(outs GPR:$dst), (ins GPR:$lhs, GPR:$rhs), IIC_iALUr,
+ def rr : T2I<(outs rGPR:$dst), (ins GPR:$lhs, rGPR:$rhs), IIC_iALUr,
!strconcat(opc, "s"), ".w\t$dst, $lhs, $rhs",
- [(set GPR:$dst, (opnode GPR:$lhs, GPR:$rhs))]> {
+ [(set rGPR:$dst, (opnode GPR:$lhs, rGPR:$rhs))]> {
let isCommutable = Commutable;
let Inst{31-27} = 0b11101;
let Inst{26-25} = 0b01;
@@ -312,9 +321,9 @@ multiclass T2I_bin_s_irs<bits<4> opcod, string opc, PatFrag opnode,
let Inst{5-4} = 0b00; // type
}
// shifted register
- def rs : T2I<(outs GPR:$dst), (ins GPR:$lhs, t2_so_reg:$rhs), IIC_iALUsi,
+ def rs : T2I<(outs rGPR:$dst), (ins GPR:$lhs, t2_so_reg:$rhs), IIC_iALUsi,
!strconcat(opc, "s"), ".w\t$dst, $lhs, $rhs",
- [(set GPR:$dst, (opnode GPR:$lhs, t2_so_reg:$rhs))]> {
+ [(set rGPR:$dst, (opnode GPR:$lhs, t2_so_reg:$rhs))]> {
let Inst{31-27} = 0b11101;
let Inst{26-25} = 0b01;
let Inst{24-21} = opcod;
@@ -328,9 +337,12 @@ multiclass T2I_bin_s_irs<bits<4> opcod, string opc, PatFrag opnode,
multiclass T2I_bin_ii12rs<bits<3> op23_21, string opc, PatFrag opnode,
bit Commutable = 0> {
// shifted imm
- def ri : T2sI<(outs GPR:$dst), (ins GPR:$lhs, t2_so_imm:$rhs), IIC_iALUi,
+ // The register-immediate version is re-materializable. This is useful
+ // in particular for taking the address of a local.
+ let isReMaterializable = 1 in {
+ def ri : T2sI<(outs rGPR:$dst), (ins GPR:$lhs, t2_so_imm:$rhs), IIC_iALUi,
opc, ".w\t$dst, $lhs, $rhs",
- [(set GPR:$dst, (opnode GPR:$lhs, t2_so_imm:$rhs))]> {
+ [(set rGPR:$dst, (opnode GPR:$lhs, t2_so_imm:$rhs))]> {
let Inst{31-27} = 0b11110;
let Inst{25} = 0;
let Inst{24} = 1;
@@ -338,10 +350,11 @@ multiclass T2I_bin_ii12rs<bits<3> op23_21, string opc, PatFrag opnode,
let Inst{20} = 0; // The S bit.
let Inst{15} = 0;
}
+ }
// 12-bit imm
- def ri12 : T2I<(outs GPR:$dst), (ins GPR:$lhs, imm0_4095:$rhs), IIC_iALUi,
+ def ri12 : T2I<(outs rGPR:$dst), (ins GPR:$lhs, imm0_4095:$rhs), IIC_iALUi,
!strconcat(opc, "w"), "\t$dst, $lhs, $rhs",
- [(set GPR:$dst, (opnode GPR:$lhs, imm0_4095:$rhs))]> {
+ [(set rGPR:$dst, (opnode GPR:$lhs, imm0_4095:$rhs))]> {
let Inst{31-27} = 0b11110;
let Inst{25} = 1;
let Inst{24} = 0;
@@ -350,9 +363,9 @@ multiclass T2I_bin_ii12rs<bits<3> op23_21, string opc, PatFrag opnode,
let Inst{15} = 0;
}
// register
- def rr : T2sI<(outs GPR:$dst), (ins GPR:$lhs, GPR:$rhs), IIC_iALUr,
+ def rr : T2sI<(outs rGPR:$dst), (ins GPR:$lhs, rGPR:$rhs), IIC_iALUr,
opc, ".w\t$dst, $lhs, $rhs",
- [(set GPR:$dst, (opnode GPR:$lhs, GPR:$rhs))]> {
+ [(set rGPR:$dst, (opnode GPR:$lhs, rGPR:$rhs))]> {
let isCommutable = Commutable;
let Inst{31-27} = 0b11101;
let Inst{26-25} = 0b01;
@@ -364,9 +377,9 @@ multiclass T2I_bin_ii12rs<bits<3> op23_21, string opc, PatFrag opnode,
let Inst{5-4} = 0b00; // type
}
// shifted register
- def rs : T2sI<(outs GPR:$dst), (ins GPR:$lhs, t2_so_reg:$rhs), IIC_iALUsi,
+ def rs : T2sI<(outs rGPR:$dst), (ins GPR:$lhs, t2_so_reg:$rhs), IIC_iALUsi,
opc, ".w\t$dst, $lhs, $rhs",
- [(set GPR:$dst, (opnode GPR:$lhs, t2_so_reg:$rhs))]> {
+ [(set rGPR:$dst, (opnode GPR:$lhs, t2_so_reg:$rhs))]> {
let Inst{31-27} = 0b11101;
let Inst{26-25} = 0b01;
let Inst{24} = 1;
@@ -382,9 +395,9 @@ let Uses = [CPSR] in {
multiclass T2I_adde_sube_irs<bits<4> opcod, string opc, PatFrag opnode,
bit Commutable = 0> {
// shifted imm
- def ri : T2sI<(outs GPR:$dst), (ins GPR:$lhs, t2_so_imm:$rhs), IIC_iALUi,
+ def ri : T2sI<(outs rGPR:$dst), (ins rGPR:$lhs, t2_so_imm:$rhs), IIC_iALUi,
opc, "\t$dst, $lhs, $rhs",
- [(set GPR:$dst, (opnode GPR:$lhs, t2_so_imm:$rhs))]>,
+ [(set rGPR:$dst, (opnode rGPR:$lhs, t2_so_imm:$rhs))]>,
Requires<[IsThumb2]> {
let Inst{31-27} = 0b11110;
let Inst{25} = 0;
@@ -393,9 +406,9 @@ multiclass T2I_adde_sube_irs<bits<4> opcod, string opc, PatFrag opnode,
let Inst{15} = 0;
}
// register
- def rr : T2sI<(outs GPR:$dst), (ins GPR:$lhs, GPR:$rhs), IIC_iALUr,
+ def rr : T2sI<(outs rGPR:$dst), (ins rGPR:$lhs, rGPR:$rhs), IIC_iALUr,
opc, ".w\t$dst, $lhs, $rhs",
- [(set GPR:$dst, (opnode GPR:$lhs, GPR:$rhs))]>,
+ [(set rGPR:$dst, (opnode rGPR:$lhs, rGPR:$rhs))]>,
Requires<[IsThumb2]> {
let isCommutable = Commutable;
let Inst{31-27} = 0b11101;
@@ -407,9 +420,9 @@ multiclass T2I_adde_sube_irs<bits<4> opcod, string opc, PatFrag opnode,
let Inst{5-4} = 0b00; // type
}
// shifted register
- def rs : T2sI<(outs GPR:$dst), (ins GPR:$lhs, t2_so_reg:$rhs), IIC_iALUsi,
+ def rs : T2sI<(outs rGPR:$dst), (ins rGPR:$lhs, t2_so_reg:$rhs), IIC_iALUsi,
opc, ".w\t$dst, $lhs, $rhs",
- [(set GPR:$dst, (opnode GPR:$lhs, t2_so_reg:$rhs))]>,
+ [(set rGPR:$dst, (opnode rGPR:$lhs, t2_so_reg:$rhs))]>,
Requires<[IsThumb2]> {
let Inst{31-27} = 0b11101;
let Inst{26-25} = 0b01;
@@ -423,9 +436,9 @@ let Defs = [CPSR] in {
multiclass T2I_adde_sube_s_irs<bits<4> opcod, string opc, PatFrag opnode,
bit Commutable = 0> {
// shifted imm
- def ri : T2sI<(outs GPR:$dst), (ins GPR:$lhs, t2_so_imm:$rhs), IIC_iALUi,
+ def ri : T2sI<(outs rGPR:$dst), (ins rGPR:$lhs, t2_so_imm:$rhs), IIC_iALUi,
opc, "\t$dst, $lhs, $rhs",
- [(set GPR:$dst, (opnode GPR:$lhs, t2_so_imm:$rhs))]>,
+ [(set rGPR:$dst, (opnode rGPR:$lhs, t2_so_imm:$rhs))]>,
Requires<[IsThumb2]> {
let Inst{31-27} = 0b11110;
let Inst{25} = 0;
@@ -434,9 +447,9 @@ multiclass T2I_adde_sube_s_irs<bits<4> opcod, string opc, PatFrag opnode,
let Inst{15} = 0;
}
// register
- def rr : T2sI<(outs GPR:$dst), (ins GPR:$lhs, GPR:$rhs), IIC_iALUr,
+ def rr : T2sI<(outs rGPR:$dst), (ins rGPR:$lhs, rGPR:$rhs), IIC_iALUr,
opc, ".w\t$dst, $lhs, $rhs",
- [(set GPR:$dst, (opnode GPR:$lhs, GPR:$rhs))]>,
+ [(set rGPR:$dst, (opnode rGPR:$lhs, rGPR:$rhs))]>,
Requires<[IsThumb2]> {
let isCommutable = Commutable;
let Inst{31-27} = 0b11101;
@@ -448,9 +461,9 @@ multiclass T2I_adde_sube_s_irs<bits<4> opcod, string opc, PatFrag opnode,
let Inst{5-4} = 0b00; // type
}
// shifted register
- def rs : T2sI<(outs GPR:$dst), (ins GPR:$lhs, t2_so_reg:$rhs), IIC_iALUsi,
+ def rs : T2sI<(outs rGPR:$dst), (ins rGPR:$lhs, t2_so_reg:$rhs), IIC_iALUsi,
opc, ".w\t$dst, $lhs, $rhs",
- [(set GPR:$dst, (opnode GPR:$lhs, t2_so_reg:$rhs))]>,
+ [(set rGPR:$dst, (opnode rGPR:$lhs, t2_so_reg:$rhs))]>,
Requires<[IsThumb2]> {
let Inst{31-27} = 0b11101;
let Inst{26-25} = 0b01;
@@ -461,13 +474,14 @@ multiclass T2I_adde_sube_s_irs<bits<4> opcod, string opc, PatFrag opnode,
}
}
-/// T2I_rbin_s_is - Same as T2I_rbin_is except sets 's' bit.
+/// T2I_rbin_s_is - Same as T2I_rbin_irs except sets 's' bit and the register
+/// version is not needed since this is only for codegen.
let Defs = [CPSR] in {
multiclass T2I_rbin_s_is<bits<4> opcod, string opc, PatFrag opnode> {
// shifted imm
- def ri : T2I<(outs GPR:$dst), (ins GPR:$rhs, t2_so_imm:$lhs), IIC_iALUi,
+ def ri : T2I<(outs rGPR:$dst), (ins rGPR:$rhs, t2_so_imm:$lhs), IIC_iALUi,
!strconcat(opc, "s"), ".w\t$dst, $rhs, $lhs",
- [(set GPR:$dst, (opnode t2_so_imm:$lhs, GPR:$rhs))]> {
+ [(set rGPR:$dst, (opnode t2_so_imm:$lhs, rGPR:$rhs))]> {
let Inst{31-27} = 0b11110;
let Inst{25} = 0;
let Inst{24-21} = opcod;
@@ -475,9 +489,9 @@ multiclass T2I_rbin_s_is<bits<4> opcod, string opc, PatFrag opnode> {
let Inst{15} = 0;
}
// shifted register
- def rs : T2I<(outs GPR:$dst), (ins GPR:$rhs, t2_so_reg:$lhs), IIC_iALUsi,
+ def rs : T2I<(outs rGPR:$dst), (ins rGPR:$rhs, t2_so_reg:$lhs), IIC_iALUsi,
!strconcat(opc, "s"), "\t$dst, $rhs, $lhs",
- [(set GPR:$dst, (opnode t2_so_reg:$lhs, GPR:$rhs))]> {
+ [(set rGPR:$dst, (opnode t2_so_reg:$lhs, rGPR:$rhs))]> {
let Inst{31-27} = 0b11101;
let Inst{26-25} = 0b01;
let Inst{24-21} = opcod;
@@ -490,18 +504,18 @@ multiclass T2I_rbin_s_is<bits<4> opcod, string opc, PatFrag opnode> {
// rotate operation that produces a value.
multiclass T2I_sh_ir<bits<2> opcod, string opc, PatFrag opnode> {
// 5-bit imm
- def ri : T2sI<(outs GPR:$dst), (ins GPR:$lhs, i32imm:$rhs), IIC_iMOVsi,
+ def ri : T2sI<(outs rGPR:$dst), (ins rGPR:$lhs, i32imm:$rhs), IIC_iMOVsi,
opc, ".w\t$dst, $lhs, $rhs",
- [(set GPR:$dst, (opnode GPR:$lhs, imm1_31:$rhs))]> {
+ [(set rGPR:$dst, (opnode rGPR:$lhs, imm1_31:$rhs))]> {
let Inst{31-27} = 0b11101;
let Inst{26-21} = 0b010010;
let Inst{19-16} = 0b1111; // Rn
let Inst{5-4} = opcod;
}
// register
- def rr : T2sI<(outs GPR:$dst), (ins GPR:$lhs, GPR:$rhs), IIC_iMOVsr,
+ def rr : T2sI<(outs rGPR:$dst), (ins rGPR:$lhs, rGPR:$rhs), IIC_iMOVsr,
opc, ".w\t$dst, $lhs, $rhs",
- [(set GPR:$dst, (opnode GPR:$lhs, GPR:$rhs))]> {
+ [(set rGPR:$dst, (opnode rGPR:$lhs, rGPR:$rhs))]> {
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0100;
let Inst{22-21} = opcod;
@@ -513,7 +527,7 @@ multiclass T2I_sh_ir<bits<2> opcod, string opc, PatFrag opnode> {
/// T2I_cmp_irs - Defines a set of (op r, {so_imm|r|so_reg}) cmp / test
/// patterns. Similar to T2I_bin_irs except the instruction does not produce
/// a explicit result, only implicitly set CPSR.
-let Defs = [CPSR] in {
+let isCompare = 1, Defs = [CPSR] in {
multiclass T2I_cmp_irs<bits<4> opcod, string opc, PatFrag opnode> {
// shifted imm
def ri : T2I<(outs), (ins GPR:$lhs, t2_so_imm:$rhs), IIC_iCMPi,
@@ -527,9 +541,9 @@ multiclass T2I_cmp_irs<bits<4> opcod, string opc, PatFrag opnode> {
let Inst{11-8} = 0b1111; // Rd
}
// register
- def rr : T2I<(outs), (ins GPR:$lhs, GPR:$rhs), IIC_iCMPr,
+ def rr : T2I<(outs), (ins GPR:$lhs, rGPR:$rhs), IIC_iCMPr,
opc, ".w\t$lhs, $rhs",
- [(opnode GPR:$lhs, GPR:$rhs)]> {
+ [(opnode GPR:$lhs, rGPR:$rhs)]> {
let Inst{31-27} = 0b11101;
let Inst{26-25} = 0b01;
let Inst{24-21} = opcod;
@@ -639,9 +653,9 @@ multiclass T2I_st<bits<2> opcod, string opc, PatFrag opnode> {
/// T2I_unary_rrot - A unary operation with two forms: one whose operand is a
/// register and one whose operand is a register rotated by 8/16/24.
multiclass T2I_unary_rrot<bits<3> opcod, string opc, PatFrag opnode> {
- def r : T2I<(outs GPR:$dst), (ins GPR:$src), IIC_iUNAr,
+ def r : T2I<(outs rGPR:$dst), (ins rGPR:$src), IIC_iUNAr,
opc, ".w\t$dst, $src",
- [(set GPR:$dst, (opnode GPR:$src))]> {
+ [(set rGPR:$dst, (opnode rGPR:$src))]> {
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0100;
let Inst{22-20} = opcod;
@@ -650,9 +664,9 @@ multiclass T2I_unary_rrot<bits<3> opcod, string opc, PatFrag opnode> {
let Inst{7} = 1;
let Inst{5-4} = 0b00; // rotate
}
- def r_rot : T2I<(outs GPR:$dst), (ins GPR:$src, i32imm:$rot), IIC_iUNAsi,
+ def r_rot : T2I<(outs rGPR:$dst), (ins rGPR:$src, i32imm:$rot), IIC_iUNAsi,
opc, ".w\t$dst, $src, ror $rot",
- [(set GPR:$dst, (opnode (rotr GPR:$src, rot_imm:$rot)))]> {
+ [(set rGPR:$dst, (opnode (rotr rGPR:$src, rot_imm:$rot)))]> {
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0100;
let Inst{22-20} = opcod;
@@ -665,9 +679,9 @@ multiclass T2I_unary_rrot<bits<3> opcod, string opc, PatFrag opnode> {
// UXTB16 - Requres T2ExtractPack, does not need the .w qualifier.
multiclass T2I_unary_rrot_uxtb16<bits<3> opcod, string opc, PatFrag opnode> {
- def r : T2I<(outs GPR:$dst), (ins GPR:$src), IIC_iUNAr,
+ def r : T2I<(outs rGPR:$dst), (ins rGPR:$src), IIC_iUNAr,
opc, "\t$dst, $src",
- [(set GPR:$dst, (opnode GPR:$src))]>,
+ [(set rGPR:$dst, (opnode rGPR:$src))]>,
Requires<[HasT2ExtractPack]> {
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0100;
@@ -677,9 +691,9 @@ multiclass T2I_unary_rrot_uxtb16<bits<3> opcod, string opc, PatFrag opnode> {
let Inst{7} = 1;
let Inst{5-4} = 0b00; // rotate
}
- def r_rot : T2I<(outs GPR:$dst), (ins GPR:$src, i32imm:$rot), IIC_iUNAsi,
+ def r_rot : T2I<(outs rGPR:$dst), (ins rGPR:$src, i32imm:$rot), IIC_iUNAsi,
opc, "\t$dst, $src, ror $rot",
- [(set GPR:$dst, (opnode (rotr GPR:$src, rot_imm:$rot)))]>,
+ [(set rGPR:$dst, (opnode (rotr rGPR:$src, rot_imm:$rot)))]>,
Requires<[HasT2ExtractPack]> {
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0100;
@@ -694,7 +708,7 @@ multiclass T2I_unary_rrot_uxtb16<bits<3> opcod, string opc, PatFrag opnode> {
// SXTB16 - Requres T2ExtractPack, does not need the .w qualifier, no pattern
// supported yet.
multiclass T2I_unary_rrot_sxtb16<bits<3> opcod, string opc> {
- def r : T2I<(outs GPR:$dst), (ins GPR:$src), IIC_iUNAr,
+ def r : T2I<(outs rGPR:$dst), (ins rGPR:$src), IIC_iUNAr,
opc, "\t$dst, $src", []> {
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0100;
@@ -704,7 +718,7 @@ multiclass T2I_unary_rrot_sxtb16<bits<3> opcod, string opc> {
let Inst{7} = 1;
let Inst{5-4} = 0b00; // rotate
}
- def r_rot : T2I<(outs GPR:$dst), (ins GPR:$src, i32imm:$rot), IIC_iUNAsi,
+ def r_rot : T2I<(outs rGPR:$dst), (ins rGPR:$src, i32imm:$rot), IIC_iUNAsi,
opc, "\t$dst, $src, ror $rot", []> {
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0100;
@@ -719,9 +733,9 @@ multiclass T2I_unary_rrot_sxtb16<bits<3> opcod, string opc> {
/// T2I_bin_rrot - A binary operation with two forms: one whose operand is a
/// register and one whose operand is a register rotated by 8/16/24.
multiclass T2I_bin_rrot<bits<3> opcod, string opc, PatFrag opnode> {
- def rr : T2I<(outs GPR:$dst), (ins GPR:$LHS, GPR:$RHS), IIC_iALUr,
+ def rr : T2I<(outs rGPR:$dst), (ins rGPR:$LHS, rGPR:$RHS), IIC_iALUr,
opc, "\t$dst, $LHS, $RHS",
- [(set GPR:$dst, (opnode GPR:$LHS, GPR:$RHS))]>,
+ [(set rGPR:$dst, (opnode rGPR:$LHS, rGPR:$RHS))]>,
Requires<[HasT2ExtractPack]> {
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0100;
@@ -730,10 +744,10 @@ multiclass T2I_bin_rrot<bits<3> opcod, string opc, PatFrag opnode> {
let Inst{7} = 1;
let Inst{5-4} = 0b00; // rotate
}
- def rr_rot : T2I<(outs GPR:$dst), (ins GPR:$LHS, GPR:$RHS, i32imm:$rot),
+ def rr_rot : T2I<(outs rGPR:$dst), (ins rGPR:$LHS, rGPR:$RHS, i32imm:$rot),
IIC_iALUsr, opc, "\t$dst, $LHS, $RHS, ror $rot",
- [(set GPR:$dst, (opnode GPR:$LHS,
- (rotr GPR:$RHS, rot_imm:$rot)))]>,
+ [(set rGPR:$dst, (opnode rGPR:$LHS,
+ (rotr rGPR:$RHS, rot_imm:$rot)))]>,
Requires<[HasT2ExtractPack]> {
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0100;
@@ -747,7 +761,7 @@ multiclass T2I_bin_rrot<bits<3> opcod, string opc, PatFrag opnode> {
// DO variant - disassembly only, no pattern
multiclass T2I_bin_rrot_DO<bits<3> opcod, string opc> {
- def rr : T2I<(outs GPR:$dst), (ins GPR:$LHS, GPR:$RHS), IIC_iALUr,
+ def rr : T2I<(outs rGPR:$dst), (ins rGPR:$LHS, rGPR:$RHS), IIC_iALUr,
opc, "\t$dst, $LHS, $RHS", []> {
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0100;
@@ -756,7 +770,7 @@ multiclass T2I_bin_rrot_DO<bits<3> opcod, string opc> {
let Inst{7} = 1;
let Inst{5-4} = 0b00; // rotate
}
- def rr_rot : T2I<(outs GPR:$dst), (ins GPR:$LHS, GPR:$RHS, i32imm:$rot),
+ def rr_rot : T2I<(outs rGPR:$dst), (ins rGPR:$LHS, rGPR:$RHS, i32imm:$rot),
IIC_iALUsr, opc, "\t$dst, $LHS, $RHS, ror $rot", []> {
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0100;
@@ -779,8 +793,8 @@ multiclass T2I_bin_rrot_DO<bits<3> opcod, string opc> {
// assembler.
let neverHasSideEffects = 1 in {
let isReMaterializable = 1 in
-def t2LEApcrel : T2XI<(outs GPR:$dst), (ins i32imm:$label, pred:$p), IIC_iALUi,
- "adr$p.w\t$dst, #$label", []> {
+def t2LEApcrel : T2XI<(outs rGPR:$dst), (ins i32imm:$label, pred:$p), IIC_iALUi,
+ "adr${p}.w\t$dst, #$label", []> {
let Inst{31-27} = 0b11110;
let Inst{25-24} = 0b10;
// Inst{23:21} = '11' (add = FALSE) or '00' (add = TRUE)
@@ -790,9 +804,9 @@ def t2LEApcrel : T2XI<(outs GPR:$dst), (ins i32imm:$label, pred:$p), IIC_iALUi,
let Inst{15} = 0;
}
} // neverHasSideEffects
-def t2LEApcrelJT : T2XI<(outs GPR:$dst),
+def t2LEApcrelJT : T2XI<(outs rGPR:$dst),
(ins i32imm:$label, nohash_imm:$id, pred:$p), IIC_iALUi,
- "adr$p.w\t$dst, #${label}_${id}", []> {
+ "adr${p}.w\t$dst, #${label}_${id}", []> {
let Inst{31-27} = 0b11110;
let Inst{25-24} = 0b10;
// Inst{23:21} = '11' (add = FALSE) or '00' (add = TRUE)
@@ -866,9 +880,9 @@ def t2SUBrSPs : T2sI<(outs GPR:$dst), (ins GPR:$sp, t2_so_reg:$rhs),
}
// Signed and unsigned division on v7-M
-def t2SDIV : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), IIC_iALUi,
+def t2SDIV : T2I<(outs rGPR:$dst), (ins rGPR:$a, rGPR:$b), IIC_iALUi,
"sdiv", "\t$dst, $a, $b",
- [(set GPR:$dst, (sdiv GPR:$a, GPR:$b))]>,
+ [(set rGPR:$dst, (sdiv rGPR:$a, rGPR:$b))]>,
Requires<[HasDivide]> {
let Inst{31-27} = 0b11111;
let Inst{26-21} = 0b011100;
@@ -877,9 +891,9 @@ def t2SDIV : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), IIC_iALUi,
let Inst{7-4} = 0b1111;
}
-def t2UDIV : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), IIC_iALUi,
+def t2UDIV : T2I<(outs rGPR:$dst), (ins rGPR:$a, rGPR:$b), IIC_iALUi,
"udiv", "\t$dst, $a, $b",
- [(set GPR:$dst, (udiv GPR:$a, GPR:$b))]>,
+ [(set rGPR:$dst, (udiv rGPR:$a, rGPR:$b))]>,
Requires<[HasDivide]> {
let Inst{31-27} = 0b11111;
let Inst{26-21} = 0b011101;
@@ -888,17 +902,6 @@ def t2UDIV : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), IIC_iALUi,
let Inst{7-4} = 0b1111;
}
-// Pseudo instruction that will expand into a t2SUBrSPi + a copy.
-let usesCustomInserter = 1 in { // Expanded after instruction selection.
-def t2SUBrSPi_ : PseudoInst<(outs GPR:$dst), (ins GPR:$sp, t2_so_imm:$imm),
- NoItinerary, "${:comment} sub.w\t$dst, $sp, $imm", []>;
-def t2SUBrSPi12_ : PseudoInst<(outs GPR:$dst), (ins GPR:$sp, imm0_4095:$imm),
- NoItinerary, "${:comment} subw\t$dst, $sp, $imm", []>;
-def t2SUBrSPs_ : PseudoInst<(outs GPR:$dst), (ins GPR:$sp, t2_so_reg:$rhs),
- NoItinerary, "${:comment} sub\t$dst, $sp, $rhs", []>;
-} // usesCustomInserter
-
-
//===----------------------------------------------------------------------===//
// Load / store Instructions.
//
@@ -917,10 +920,10 @@ defm t2LDRSB : T2I_ld<1, 0b00, "ldrsb", UnOpFrag<(sextloadi8 node:$Src)>>;
let mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1 in {
// Load doubleword
-def t2LDRDi8 : T2Ii8s4<1, 0, 1, (outs GPR:$dst1, GPR:$dst2),
+def t2LDRDi8 : T2Ii8s4<1, 0, 1, (outs rGPR:$dst1, rGPR:$dst2),
(ins t2addrmode_imm8s4:$addr),
IIC_iLoadi, "ldrd", "\t$dst1, $addr", []>;
-def t2LDRDpci : T2Ii8s4<1, 0, 1, (outs GPR:$dst1, GPR:$dst2),
+def t2LDRDpci : T2Ii8s4<1, 0, 1, (outs rGPR:$dst1, rGPR:$dst2),
(ins i32imm:$addr), IIC_iLoadi,
"ldrd", "\t$dst1, $addr", []> {
let Inst{19-16} = 0b1111; // Rn
@@ -967,6 +970,11 @@ def : T2Pat<(extloadi16 t2addrmode_so_reg:$addr),
def : T2Pat<(extloadi16 (ARMWrapper tconstpool:$addr)),
(t2LDRHpci tconstpool:$addr)>;
+// FIXME: The destination register of the loads and stores can't be PC, but
+// can be SP. We need another regclass (similar to rGPR) to represent
+// that. Not a pressing issue since these are selected manually,
+// not via pattern.
+
// Indexed loads
let mayLoad = 1, neverHasSideEffects = 1 in {
def t2LDR_PRE : T2Iidxldst<0, 0b10, 1, 1, (outs GPR:$dst, GPR:$base_wb),
@@ -1286,9 +1294,9 @@ def t2MOVr : T2sI<(outs GPR:$dst), (ins GPR:$src), IIC_iMOVr,
// AddedComplexity to ensure isel tries t2MOVi before t2MOVi16.
let isReMaterializable = 1, isAsCheapAsAMove = 1, AddedComplexity = 1 in
-def t2MOVi : T2sI<(outs GPR:$dst), (ins t2_so_imm:$src), IIC_iMOVi,
+def t2MOVi : T2sI<(outs rGPR:$dst), (ins t2_so_imm:$src), IIC_iMOVi,
"mov", ".w\t$dst, $src",
- [(set GPR:$dst, t2_so_imm:$src)]> {
+ [(set rGPR:$dst, t2_so_imm:$src)]> {
let Inst{31-27} = 0b11110;
let Inst{25} = 0;
let Inst{24-21} = 0b0010;
@@ -1298,9 +1306,9 @@ def t2MOVi : T2sI<(outs GPR:$dst), (ins t2_so_imm:$src), IIC_iMOVi,
}
let isReMaterializable = 1, isAsCheapAsAMove = 1 in
-def t2MOVi16 : T2I<(outs GPR:$dst), (ins i32imm:$src), IIC_iMOVi,
+def t2MOVi16 : T2I<(outs rGPR:$dst), (ins i32imm:$src), IIC_iMOVi,
"movw", "\t$dst, $src",
- [(set GPR:$dst, imm0_65535:$src)]> {
+ [(set rGPR:$dst, imm0_65535:$src)]> {
let Inst{31-27} = 0b11110;
let Inst{25} = 1;
let Inst{24-21} = 0b0010;
@@ -1309,10 +1317,10 @@ def t2MOVi16 : T2I<(outs GPR:$dst), (ins i32imm:$src), IIC_iMOVi,
}
let Constraints = "$src = $dst" in
-def t2MOVTi16 : T2I<(outs GPR:$dst), (ins GPR:$src, i32imm:$imm), IIC_iMOVi,
+def t2MOVTi16 : T2I<(outs rGPR:$dst), (ins rGPR:$src, i32imm:$imm), IIC_iMOVi,
"movt", "\t$dst, $imm",
- [(set GPR:$dst,
- (or (and GPR:$src, 0xffff), lo16AllZero:$imm))]> {
+ [(set rGPR:$dst,
+ (or (and rGPR:$src, 0xffff), lo16AllZero:$imm))]> {
let Inst{31-27} = 0b11110;
let Inst{25} = 1;
let Inst{24-21} = 0b0110;
@@ -1320,7 +1328,7 @@ def t2MOVTi16 : T2I<(outs GPR:$dst), (ins GPR:$src, i32imm:$imm), IIC_iMOVi,
let Inst{15} = 0;
}
-def : T2Pat<(or GPR:$src, 0xffff0000), (t2MOVTi16 GPR:$src, 0xffff)>;
+def : T2Pat<(or rGPR:$src, 0xffff0000), (t2MOVTi16 rGPR:$src, 0xffff)>;
//===----------------------------------------------------------------------===//
// Extend Instructions.
@@ -1352,10 +1360,14 @@ defm t2UXTH : T2I_unary_rrot<0b001, "uxth",
defm t2UXTB16 : T2I_unary_rrot_uxtb16<0b011, "uxtb16",
UnOpFrag<(and node:$Src, 0x00FF00FF)>>;
-def : T2Pat<(and (shl GPR:$Src, (i32 8)), 0xFF00FF),
- (t2UXTB16r_rot GPR:$Src, 24)>, Requires<[HasT2ExtractPack]>;
-def : T2Pat<(and (srl GPR:$Src, (i32 8)), 0xFF00FF),
- (t2UXTB16r_rot GPR:$Src, 8)>, Requires<[HasT2ExtractPack]>;
+// FIXME: This pattern incorrectly assumes the shl operator is a rotate.
+// The transformation should probably be done as a combiner action
+// instead so we can include a check for masking back in the upper
+// eight bits of the source into the lower eight bits of the result.
+//def : T2Pat<(and (shl rGPR:$Src, (i32 8)), 0xFF00FF),
+// (t2UXTB16r_rot rGPR:$Src, 24)>, Requires<[HasT2ExtractPack]>;
+def : T2Pat<(and (srl rGPR:$Src, (i32 8)), 0xFF00FF),
+ (t2UXTB16r_rot rGPR:$Src, 8)>, Requires<[HasT2ExtractPack]>;
defm t2UXTAB : T2I_bin_rrot<0b101, "uxtab",
BinOpFrag<(add node:$LHS, (and node:$RHS, 0x00FF))>>;
@@ -1389,7 +1401,7 @@ defm t2SBCS : T2I_adde_sube_s_irs<0b1011, "sbc",
BinOpFrag<(sube_live_carry node:$LHS, node:$RHS)>>;
// RSB
-defm t2RSB : T2I_rbin_is <0b1110, "rsb",
+defm t2RSB : T2I_rbin_irs <0b1110, "rsb",
BinOpFrag<(sub node:$LHS, node:$RHS)>>;
defm t2RSBS : T2I_rbin_s_is <0b1110, "rsb",
BinOpFrag<(subc node:$LHS, node:$RHS)>>;
@@ -1409,18 +1421,18 @@ def : T2Pat<(add GPR:$src, t2_so_imm_neg:$imm),
def : T2Pat<(add GPR:$src, imm0_4095_neg:$imm),
(t2SUBri12 GPR:$src, imm0_4095_neg:$imm)>;
let AddedComplexity = 1 in
-def : T2Pat<(addc GPR:$src, imm0_255_neg:$imm),
- (t2SUBSri GPR:$src, imm0_255_neg:$imm)>;
-def : T2Pat<(addc GPR:$src, t2_so_imm_neg:$imm),
- (t2SUBSri GPR:$src, t2_so_imm_neg:$imm)>;
+def : T2Pat<(addc rGPR:$src, imm0_255_neg:$imm),
+ (t2SUBSri rGPR:$src, imm0_255_neg:$imm)>;
+def : T2Pat<(addc rGPR:$src, t2_so_imm_neg:$imm),
+ (t2SUBSri rGPR:$src, t2_so_imm_neg:$imm)>;
// The with-carry-in form matches bitwise not instead of the negation.
// Effectively, the inverse interpretation of the carry flag already accounts
// for part of the negation.
let AddedComplexity = 1 in
-def : T2Pat<(adde GPR:$src, imm0_255_not:$imm),
- (t2SBCSri GPR:$src, imm0_255_not:$imm)>;
-def : T2Pat<(adde GPR:$src, t2_so_imm_not:$imm),
- (t2SBCSri GPR:$src, t2_so_imm_not:$imm)>;
+def : T2Pat<(adde rGPR:$src, imm0_255_not:$imm),
+ (t2SBCSri rGPR:$src, imm0_255_not:$imm)>;
+def : T2Pat<(adde rGPR:$src, t2_so_imm_not:$imm),
+ (t2SBCSri rGPR:$src, t2_so_imm_not:$imm)>;
// Select Bytes -- for disassembly only
@@ -1437,9 +1449,10 @@ def t2SEL : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), NoItinerary, "sel",
// A6.3.13, A6.3.14, A6.3.15 Parallel addition and subtraction (signed/unsigned)
// And Miscellaneous operations -- for disassembly only
-class T2I_pam<bits<3> op22_20, bits<4> op7_4, string opc>
- : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), NoItinerary, opc,
- "\t$dst, $a, $b", [/* For disassembly only; pattern left blank */]> {
+class T2I_pam<bits<3> op22_20, bits<4> op7_4, string opc,
+ list<dag> pat = [/* For disassembly only; pattern left blank */]>
+ : T2I<(outs rGPR:$dst), (ins rGPR:$a, rGPR:$b), NoItinerary, opc,
+ "\t$dst, $a, $b", pat> {
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0101;
let Inst{22-20} = op22_20;
@@ -1449,14 +1462,16 @@ class T2I_pam<bits<3> op22_20, bits<4> op7_4, string opc>
// Saturating add/subtract -- for disassembly only
-def t2QADD : T2I_pam<0b000, 0b1000, "qadd">;
+def t2QADD : T2I_pam<0b000, 0b1000, "qadd",
+ [(set rGPR:$dst, (int_arm_qadd rGPR:$a, rGPR:$b))]>;
def t2QADD16 : T2I_pam<0b001, 0b0001, "qadd16">;
def t2QADD8 : T2I_pam<0b000, 0b0001, "qadd8">;
def t2QASX : T2I_pam<0b010, 0b0001, "qasx">;
def t2QDADD : T2I_pam<0b000, 0b1001, "qdadd">;
def t2QDSUB : T2I_pam<0b000, 0b1011, "qdsub">;
def t2QSAX : T2I_pam<0b110, 0b0001, "qsax">;
-def t2QSUB : T2I_pam<0b000, 0b1010, "qsub">;
+def t2QSUB : T2I_pam<0b000, 0b1010, "qsub",
+ [(set rGPR:$dst, (int_arm_qsub rGPR:$a, rGPR:$b))]>;
def t2QSUB16 : T2I_pam<0b101, 0b0001, "qsub16">;
def t2QSUB8 : T2I_pam<0b100, 0b0001, "qsub8">;
def t2UQADD16 : T2I_pam<0b001, 0b0101, "uqadd16">;
@@ -1498,37 +1513,27 @@ def t2UHSUB8 : T2I_pam<0b100, 0b0110, "uhsub8">;
// Unsigned Sum of Absolute Differences [and Accumulate] -- for disassembly only
-def t2USAD8 : T2I_mac<0, 0b111, 0b0000, (outs GPR:$dst), (ins GPR:$a, GPR:$b),
+def t2USAD8 : T2I_mac<0, 0b111, 0b0000, (outs rGPR:$dst),
+ (ins rGPR:$a, rGPR:$b),
NoItinerary, "usad8", "\t$dst, $a, $b", []> {
let Inst{15-12} = 0b1111;
}
-def t2USADA8 : T2I_mac<0, 0b111, 0b0000, (outs GPR:$dst),
- (ins GPR:$a, GPR:$b, GPR:$acc), NoItinerary, "usada8",
+def t2USADA8 : T2I_mac<0, 0b111, 0b0000, (outs rGPR:$dst),
+ (ins rGPR:$a, rGPR:$b, rGPR:$acc), NoItinerary, "usada8",
"\t$dst, $a, $b, $acc", []>;
// Signed/Unsigned saturate -- for disassembly only
-def t2SSATlsl : T2I<(outs GPR:$dst), (ins i32imm:$bit_pos,GPR:$a,i32imm:$shamt),
- NoItinerary, "ssat", "\t$dst, $bit_pos, $a, lsl $shamt",
- [/* For disassembly only; pattern left blank */]> {
- let Inst{31-27} = 0b11110;
- let Inst{25-22} = 0b1100;
- let Inst{20} = 0;
- let Inst{15} = 0;
- let Inst{21} = 0; // sh = '0'
-}
-
-def t2SSATasr : T2I<(outs GPR:$dst), (ins i32imm:$bit_pos,GPR:$a,i32imm:$shamt),
- NoItinerary, "ssat", "\t$dst, $bit_pos, $a, asr $shamt",
- [/* For disassembly only; pattern left blank */]> {
+def t2SSAT: T2I<(outs rGPR:$dst), (ins i32imm:$bit_pos, rGPR:$a, shift_imm:$sh),
+ NoItinerary, "ssat", "\t$dst, $bit_pos, $a$sh",
+ [/* For disassembly only; pattern left blank */]> {
let Inst{31-27} = 0b11110;
let Inst{25-22} = 0b1100;
let Inst{20} = 0;
let Inst{15} = 0;
- let Inst{21} = 1; // sh = '1'
}
-def t2SSAT16 : T2I<(outs GPR:$dst), (ins i32imm:$bit_pos, GPR:$a), NoItinerary,
+def t2SSAT16: T2I<(outs rGPR:$dst), (ins i32imm:$bit_pos, rGPR:$a), NoItinerary,
"ssat16", "\t$dst, $bit_pos, $a",
[/* For disassembly only; pattern left blank */]> {
let Inst{31-27} = 0b11110;
@@ -1540,27 +1545,16 @@ def t2SSAT16 : T2I<(outs GPR:$dst), (ins i32imm:$bit_pos, GPR:$a), NoItinerary,
let Inst{7-6} = 0b00; // imm2 = '00'
}
-def t2USATlsl : T2I<(outs GPR:$dst), (ins i32imm:$bit_pos,GPR:$a,i32imm:$shamt),
- NoItinerary, "usat", "\t$dst, $bit_pos, $a, lsl $shamt",
- [/* For disassembly only; pattern left blank */]> {
- let Inst{31-27} = 0b11110;
- let Inst{25-22} = 0b1110;
- let Inst{20} = 0;
- let Inst{15} = 0;
- let Inst{21} = 0; // sh = '0'
-}
-
-def t2USATasr : T2I<(outs GPR:$dst), (ins i32imm:$bit_pos,GPR:$a,i32imm:$shamt),
- NoItinerary, "usat", "\t$dst, $bit_pos, $a, asr $shamt",
- [/* For disassembly only; pattern left blank */]> {
+def t2USAT: T2I<(outs rGPR:$dst), (ins i32imm:$bit_pos, rGPR:$a, shift_imm:$sh),
+ NoItinerary, "usat", "\t$dst, $bit_pos, $a$sh",
+ [/* For disassembly only; pattern left blank */]> {
let Inst{31-27} = 0b11110;
let Inst{25-22} = 0b1110;
let Inst{20} = 0;
let Inst{15} = 0;
- let Inst{21} = 1; // sh = '1'
}
-def t2USAT16 : T2I<(outs GPR:$dst), (ins i32imm:$bit_pos, GPR:$a), NoItinerary,
+def t2USAT16: T2I<(outs rGPR:$dst), (ins i32imm:$bit_pos, rGPR:$a), NoItinerary,
"usat16", "\t$dst, $bit_pos, $a",
[/* For disassembly only; pattern left blank */]> {
let Inst{31-27} = 0b11110;
@@ -1572,6 +1566,9 @@ def t2USAT16 : T2I<(outs GPR:$dst), (ins i32imm:$bit_pos, GPR:$a), NoItinerary,
let Inst{7-6} = 0b00; // imm2 = '00'
}
+def : T2Pat<(int_arm_ssat GPR:$a, imm:$pos), (t2SSAT imm:$pos, GPR:$a, 0)>;
+def : T2Pat<(int_arm_usat GPR:$a, imm:$pos), (t2USAT imm:$pos, GPR:$a, 0)>;
+
//===----------------------------------------------------------------------===//
// Shift and rotate Instructions.
//
@@ -1582,9 +1579,9 @@ defm t2ASR : T2I_sh_ir<0b10, "asr", BinOpFrag<(sra node:$LHS, node:$RHS)>>;
defm t2ROR : T2I_sh_ir<0b11, "ror", BinOpFrag<(rotr node:$LHS, node:$RHS)>>;
let Uses = [CPSR] in {
-def t2MOVrx : T2sI<(outs GPR:$dst), (ins GPR:$src), IIC_iMOVsi,
+def t2MOVrx : T2sI<(outs rGPR:$dst), (ins rGPR:$src), IIC_iMOVsi,
"rrx", "\t$dst, $src",
- [(set GPR:$dst, (ARMrrx GPR:$src))]> {
+ [(set rGPR:$dst, (ARMrrx rGPR:$src))]> {
let Inst{31-27} = 0b11101;
let Inst{26-25} = 0b01;
let Inst{24-21} = 0b0010;
@@ -1596,9 +1593,9 @@ def t2MOVrx : T2sI<(outs GPR:$dst), (ins GPR:$src), IIC_iMOVsi,
}
let Defs = [CPSR] in {
-def t2MOVsrl_flag : T2I<(outs GPR:$dst), (ins GPR:$src), IIC_iMOVsi,
+def t2MOVsrl_flag : T2I<(outs rGPR:$dst), (ins rGPR:$src), IIC_iMOVsi,
"lsrs", ".w\t$dst, $src, #1",
- [(set GPR:$dst, (ARMsrl_flag GPR:$src))]> {
+ [(set rGPR:$dst, (ARMsrl_flag rGPR:$src))]> {
let Inst{31-27} = 0b11101;
let Inst{26-25} = 0b01;
let Inst{24-21} = 0b0010;
@@ -1609,9 +1606,9 @@ def t2MOVsrl_flag : T2I<(outs GPR:$dst), (ins GPR:$src), IIC_iMOVsi,
let Inst{14-12} = 0b000;
let Inst{7-6} = 0b01;
}
-def t2MOVsra_flag : T2I<(outs GPR:$dst), (ins GPR:$src), IIC_iMOVsi,
+def t2MOVsra_flag : T2I<(outs rGPR:$dst), (ins rGPR:$src), IIC_iMOVsi,
"asrs", ".w\t$dst, $src, #1",
- [(set GPR:$dst, (ARMsra_flag GPR:$src))]> {
+ [(set rGPR:$dst, (ARMsra_flag rGPR:$src))]> {
let Inst{31-27} = 0b11101;
let Inst{26-25} = 0b01;
let Inst{24-21} = 0b0010;
@@ -1638,10 +1635,13 @@ defm t2EOR : T2I_bin_w_irs<0b0100, "eor",
defm t2BIC : T2I_bin_w_irs<0b0001, "bic",
BinOpFrag<(and node:$LHS, (not node:$RHS))>>;
+defm t2ANDS : T2I_bin_s_irs<0b0000, "and",
+ BinOpFrag<(ARMand node:$LHS, node:$RHS)>, 1>;
+
let Constraints = "$src = $dst" in
-def t2BFC : T2I<(outs GPR:$dst), (ins GPR:$src, bf_inv_mask_imm:$imm),
+def t2BFC : T2I<(outs rGPR:$dst), (ins rGPR:$src, bf_inv_mask_imm:$imm),
IIC_iUNAsi, "bfc", "\t$dst, $imm",
- [(set GPR:$dst, (and GPR:$src, bf_inv_mask_imm:$imm))]> {
+ [(set rGPR:$dst, (and rGPR:$src, bf_inv_mask_imm:$imm))]> {
let Inst{31-27} = 0b11110;
let Inst{25} = 1;
let Inst{24-20} = 0b10110;
@@ -1649,7 +1649,7 @@ def t2BFC : T2I<(outs GPR:$dst), (ins GPR:$src, bf_inv_mask_imm:$imm),
let Inst{15} = 0;
}
-def t2SBFX : T2I<(outs GPR:$dst), (ins GPR:$src, imm0_31:$lsb, imm0_31:$width),
+def t2SBFX: T2I<(outs rGPR:$dst), (ins rGPR:$src, imm0_31:$lsb, imm0_31:$width),
IIC_iALUi, "sbfx", "\t$dst, $src, $lsb, $width", []> {
let Inst{31-27} = 0b11110;
let Inst{25} = 1;
@@ -1657,7 +1657,7 @@ def t2SBFX : T2I<(outs GPR:$dst), (ins GPR:$src, imm0_31:$lsb, imm0_31:$width),
let Inst{15} = 0;
}
-def t2UBFX : T2I<(outs GPR:$dst), (ins GPR:$src, imm0_31:$lsb, imm0_31:$width),
+def t2UBFX: T2I<(outs rGPR:$dst), (ins rGPR:$src, imm0_31:$lsb, imm0_31:$width),
IIC_iALUi, "ubfx", "\t$dst, $src, $lsb, $width", []> {
let Inst{31-27} = 0b11110;
let Inst{25} = 1;
@@ -1666,10 +1666,12 @@ def t2UBFX : T2I<(outs GPR:$dst), (ins GPR:$src, imm0_31:$lsb, imm0_31:$width),
}
// A8.6.18 BFI - Bitfield insert (Encoding T1)
-// Added for disassembler with the pattern field purposely left blank.
-// FIXME: Utilize this instruction in codgen.
-def t2BFI : T2I<(outs GPR:$dst), (ins GPR:$src, imm0_31:$lsb, imm0_31:$width),
- IIC_iALUi, "bfi", "\t$dst, $src, $lsb, $width", []> {
+let Constraints = "$src = $dst" in
+def t2BFI : T2I<(outs rGPR:$dst),
+ (ins rGPR:$src, rGPR:$val, bf_inv_mask_imm:$imm),
+ IIC_iALUi, "bfi", "\t$dst, $val, $imm",
+ [(set rGPR:$dst, (ARMbfi rGPR:$src, rGPR:$val,
+ bf_inv_mask_imm:$imm))]> {
let Inst{31-27} = 0b11110;
let Inst{25} = 1;
let Inst{24-20} = 0b10110;
@@ -1677,19 +1679,20 @@ def t2BFI : T2I<(outs GPR:$dst), (ins GPR:$src, imm0_31:$lsb, imm0_31:$width),
}
defm t2ORN : T2I_bin_irs<0b0011, "orn", BinOpFrag<(or node:$LHS,
- (not node:$RHS))>>;
+ (not node:$RHS))>, 0, "">;
// Prefer over of t2EORri ra, rb, -1 because mvn has 16-bit version
let AddedComplexity = 1 in
defm t2MVN : T2I_un_irs <0b0011, "mvn", UnOpFrag<(not node:$Src)>, 1, 1>;
-def : T2Pat<(and GPR:$src, t2_so_imm_not:$imm),
- (t2BICri GPR:$src, t2_so_imm_not:$imm)>;
+let AddedComplexity = 1 in
+def : T2Pat<(and rGPR:$src, t2_so_imm_not:$imm),
+ (t2BICri rGPR:$src, t2_so_imm_not:$imm)>;
// FIXME: Disable this pattern on Darwin to workaround an assembler bug.
-def : T2Pat<(or GPR:$src, t2_so_imm_not:$imm),
- (t2ORNri GPR:$src, t2_so_imm_not:$imm)>,
+def : T2Pat<(or rGPR:$src, t2_so_imm_not:$imm),
+ (t2ORNri rGPR:$src, t2_so_imm_not:$imm)>,
Requires<[IsThumb2]>;
def : T2Pat<(t2_so_imm_not:$src),
@@ -1699,9 +1702,9 @@ def : T2Pat<(t2_so_imm_not:$src),
// Multiply Instructions.
//
let isCommutable = 1 in
-def t2MUL: T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), IIC_iMUL32,
+def t2MUL: T2I<(outs rGPR:$dst), (ins rGPR:$a, rGPR:$b), IIC_iMUL32,
"mul", "\t$dst, $a, $b",
- [(set GPR:$dst, (mul GPR:$a, GPR:$b))]> {
+ [(set rGPR:$dst, (mul rGPR:$a, rGPR:$b))]> {
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0110;
let Inst{22-20} = 0b000;
@@ -1709,9 +1712,9 @@ def t2MUL: T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), IIC_iMUL32,
let Inst{7-4} = 0b0000; // Multiply
}
-def t2MLA: T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c), IIC_iMAC32,
+def t2MLA: T2I<(outs rGPR:$dst), (ins rGPR:$a, rGPR:$b, rGPR:$c), IIC_iMAC32,
"mla", "\t$dst, $a, $b, $c",
- [(set GPR:$dst, (add (mul GPR:$a, GPR:$b), GPR:$c))]> {
+ [(set rGPR:$dst, (add (mul rGPR:$a, rGPR:$b), rGPR:$c))]> {
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0110;
let Inst{22-20} = 0b000;
@@ -1719,9 +1722,9 @@ def t2MLA: T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c), IIC_iMAC32,
let Inst{7-4} = 0b0000; // Multiply
}
-def t2MLS: T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c), IIC_iMAC32,
+def t2MLS: T2I<(outs rGPR:$dst), (ins rGPR:$a, rGPR:$b, rGPR:$c), IIC_iMAC32,
"mls", "\t$dst, $a, $b, $c",
- [(set GPR:$dst, (sub GPR:$c, (mul GPR:$a, GPR:$b)))]> {
+ [(set rGPR:$dst, (sub rGPR:$c, (mul rGPR:$a, rGPR:$b)))]> {
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0110;
let Inst{22-20} = 0b000;
@@ -1732,7 +1735,8 @@ def t2MLS: T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c), IIC_iMAC32,
// Extra precision multiplies with low / high results
let neverHasSideEffects = 1 in {
let isCommutable = 1 in {
-def t2SMULL : T2I<(outs GPR:$ldst, GPR:$hdst), (ins GPR:$a, GPR:$b), IIC_iMUL64,
+def t2SMULL : T2I<(outs rGPR:$ldst, rGPR:$hdst),
+ (ins rGPR:$a, rGPR:$b), IIC_iMUL64,
"smull", "\t$ldst, $hdst, $a, $b", []> {
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0111;
@@ -1740,7 +1744,8 @@ def t2SMULL : T2I<(outs GPR:$ldst, GPR:$hdst), (ins GPR:$a, GPR:$b), IIC_iMUL64,
let Inst{7-4} = 0b0000;
}
-def t2UMULL : T2I<(outs GPR:$ldst, GPR:$hdst), (ins GPR:$a, GPR:$b), IIC_iMUL64,
+def t2UMULL : T2I<(outs rGPR:$ldst, rGPR:$hdst),
+ (ins rGPR:$a, rGPR:$b), IIC_iMUL64,
"umull", "\t$ldst, $hdst, $a, $b", []> {
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0111;
@@ -1750,7 +1755,8 @@ def t2UMULL : T2I<(outs GPR:$ldst, GPR:$hdst), (ins GPR:$a, GPR:$b), IIC_iMUL64,
} // isCommutable
// Multiply + accumulate
-def t2SMLAL : T2I<(outs GPR:$ldst, GPR:$hdst), (ins GPR:$a, GPR:$b), IIC_iMAC64,
+def t2SMLAL : T2I<(outs rGPR:$ldst, rGPR:$hdst),
+ (ins rGPR:$a, rGPR:$b), IIC_iMAC64,
"smlal", "\t$ldst, $hdst, $a, $b", []>{
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0111;
@@ -1758,7 +1764,8 @@ def t2SMLAL : T2I<(outs GPR:$ldst, GPR:$hdst), (ins GPR:$a, GPR:$b), IIC_iMAC64,
let Inst{7-4} = 0b0000;
}
-def t2UMLAL : T2I<(outs GPR:$ldst, GPR:$hdst), (ins GPR:$a, GPR:$b), IIC_iMAC64,
+def t2UMLAL : T2I<(outs rGPR:$ldst, rGPR:$hdst),
+ (ins rGPR:$a, rGPR:$b), IIC_iMAC64,
"umlal", "\t$ldst, $hdst, $a, $b", []>{
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0111;
@@ -1766,7 +1773,8 @@ def t2UMLAL : T2I<(outs GPR:$ldst, GPR:$hdst), (ins GPR:$a, GPR:$b), IIC_iMAC64,
let Inst{7-4} = 0b0000;
}
-def t2UMAAL : T2I<(outs GPR:$ldst, GPR:$hdst), (ins GPR:$a, GPR:$b), IIC_iMAC64,
+def t2UMAAL : T2I<(outs rGPR:$ldst, rGPR:$hdst),
+ (ins rGPR:$a, rGPR:$b), IIC_iMAC64,
"umaal", "\t$ldst, $hdst, $a, $b", []>{
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0111;
@@ -1778,9 +1786,9 @@ def t2UMAAL : T2I<(outs GPR:$ldst, GPR:$hdst), (ins GPR:$a, GPR:$b), IIC_iMAC64,
// Rounding variants of the below included for disassembly only
// Most significant word multiply
-def t2SMMUL : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), IIC_iMUL32,
+def t2SMMUL : T2I<(outs rGPR:$dst), (ins rGPR:$a, rGPR:$b), IIC_iMUL32,
"smmul", "\t$dst, $a, $b",
- [(set GPR:$dst, (mulhs GPR:$a, GPR:$b))]> {
+ [(set rGPR:$dst, (mulhs rGPR:$a, rGPR:$b))]> {
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0110;
let Inst{22-20} = 0b101;
@@ -1788,7 +1796,7 @@ def t2SMMUL : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), IIC_iMUL32,
let Inst{7-4} = 0b0000; // No Rounding (Inst{4} = 0)
}
-def t2SMMULR : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), IIC_iMUL32,
+def t2SMMULR : T2I<(outs rGPR:$dst), (ins rGPR:$a, rGPR:$b), IIC_iMUL32,
"smmulr", "\t$dst, $a, $b", []> {
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0110;
@@ -1797,9 +1805,9 @@ def t2SMMULR : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), IIC_iMUL32,
let Inst{7-4} = 0b0001; // Rounding (Inst{4} = 1)
}
-def t2SMMLA : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c), IIC_iMAC32,
+def t2SMMLA : T2I<(outs rGPR:$dst), (ins rGPR:$a, rGPR:$b, rGPR:$c), IIC_iMAC32,
"smmla", "\t$dst, $a, $b, $c",
- [(set GPR:$dst, (add (mulhs GPR:$a, GPR:$b), GPR:$c))]> {
+ [(set rGPR:$dst, (add (mulhs rGPR:$a, rGPR:$b), rGPR:$c))]> {
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0110;
let Inst{22-20} = 0b101;
@@ -1807,7 +1815,7 @@ def t2SMMLA : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c), IIC_iMAC32,
let Inst{7-4} = 0b0000; // No Rounding (Inst{4} = 0)
}
-def t2SMMLAR : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c), IIC_iMAC32,
+def t2SMMLAR: T2I<(outs rGPR:$dst), (ins rGPR:$a, rGPR:$b, rGPR:$c), IIC_iMAC32,
"smmlar", "\t$dst, $a, $b, $c", []> {
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0110;
@@ -1816,9 +1824,9 @@ def t2SMMLAR : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c), IIC_iMAC32,
let Inst{7-4} = 0b0001; // Rounding (Inst{4} = 1)
}
-def t2SMMLS : T2I <(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c), IIC_iMAC32,
+def t2SMMLS: T2I <(outs rGPR:$dst), (ins rGPR:$a, rGPR:$b, rGPR:$c), IIC_iMAC32,
"smmls", "\t$dst, $a, $b, $c",
- [(set GPR:$dst, (sub GPR:$c, (mulhs GPR:$a, GPR:$b)))]> {
+ [(set rGPR:$dst, (sub rGPR:$c, (mulhs rGPR:$a, rGPR:$b)))]> {
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0110;
let Inst{22-20} = 0b110;
@@ -1826,7 +1834,7 @@ def t2SMMLS : T2I <(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c), IIC_iMAC32,
let Inst{7-4} = 0b0000; // No Rounding (Inst{4} = 0)
}
-def t2SMMLSR : T2I <(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c), IIC_iMAC32,
+def t2SMMLSR:T2I <(outs rGPR:$dst), (ins rGPR:$a, rGPR:$b, rGPR:$c), IIC_iMAC32,
"smmlsr", "\t$dst, $a, $b, $c", []> {
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0110;
@@ -1836,10 +1844,10 @@ def t2SMMLSR : T2I <(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c), IIC_iMAC32,
}
multiclass T2I_smul<string opc, PatFrag opnode> {
- def BB : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), IIC_iMUL32,
+ def BB : T2I<(outs rGPR:$dst), (ins rGPR:$a, rGPR:$b), IIC_iMUL32,
!strconcat(opc, "bb"), "\t$dst, $a, $b",
- [(set GPR:$dst, (opnode (sext_inreg GPR:$a, i16),
- (sext_inreg GPR:$b, i16)))]> {
+ [(set rGPR:$dst, (opnode (sext_inreg rGPR:$a, i16),
+ (sext_inreg rGPR:$b, i16)))]> {
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0110;
let Inst{22-20} = 0b001;
@@ -1848,10 +1856,10 @@ multiclass T2I_smul<string opc, PatFrag opnode> {
let Inst{5-4} = 0b00;
}
- def BT : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), IIC_iMUL32,
+ def BT : T2I<(outs rGPR:$dst), (ins rGPR:$a, rGPR:$b), IIC_iMUL32,
!strconcat(opc, "bt"), "\t$dst, $a, $b",
- [(set GPR:$dst, (opnode (sext_inreg GPR:$a, i16),
- (sra GPR:$b, (i32 16))))]> {
+ [(set rGPR:$dst, (opnode (sext_inreg rGPR:$a, i16),
+ (sra rGPR:$b, (i32 16))))]> {
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0110;
let Inst{22-20} = 0b001;
@@ -1860,10 +1868,10 @@ multiclass T2I_smul<string opc, PatFrag opnode> {
let Inst{5-4} = 0b01;
}
- def TB : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), IIC_iMUL32,
+ def TB : T2I<(outs rGPR:$dst), (ins rGPR:$a, rGPR:$b), IIC_iMUL32,
!strconcat(opc, "tb"), "\t$dst, $a, $b",
- [(set GPR:$dst, (opnode (sra GPR:$a, (i32 16)),
- (sext_inreg GPR:$b, i16)))]> {
+ [(set rGPR:$dst, (opnode (sra rGPR:$a, (i32 16)),
+ (sext_inreg rGPR:$b, i16)))]> {
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0110;
let Inst{22-20} = 0b001;
@@ -1872,10 +1880,10 @@ multiclass T2I_smul<string opc, PatFrag opnode> {
let Inst{5-4} = 0b10;
}
- def TT : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), IIC_iMUL32,
+ def TT : T2I<(outs rGPR:$dst), (ins rGPR:$a, rGPR:$b), IIC_iMUL32,
!strconcat(opc, "tt"), "\t$dst, $a, $b",
- [(set GPR:$dst, (opnode (sra GPR:$a, (i32 16)),
- (sra GPR:$b, (i32 16))))]> {
+ [(set rGPR:$dst, (opnode (sra rGPR:$a, (i32 16)),
+ (sra rGPR:$b, (i32 16))))]> {
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0110;
let Inst{22-20} = 0b001;
@@ -1884,10 +1892,10 @@ multiclass T2I_smul<string opc, PatFrag opnode> {
let Inst{5-4} = 0b11;
}
- def WB : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), IIC_iMUL16,
+ def WB : T2I<(outs rGPR:$dst), (ins rGPR:$a, rGPR:$b), IIC_iMUL16,
!strconcat(opc, "wb"), "\t$dst, $a, $b",
- [(set GPR:$dst, (sra (opnode GPR:$a,
- (sext_inreg GPR:$b, i16)), (i32 16)))]> {
+ [(set rGPR:$dst, (sra (opnode rGPR:$a,
+ (sext_inreg rGPR:$b, i16)), (i32 16)))]> {
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0110;
let Inst{22-20} = 0b011;
@@ -1896,10 +1904,10 @@ multiclass T2I_smul<string opc, PatFrag opnode> {
let Inst{5-4} = 0b00;
}
- def WT : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), IIC_iMUL16,
+ def WT : T2I<(outs rGPR:$dst), (ins rGPR:$a, rGPR:$b), IIC_iMUL16,
!strconcat(opc, "wt"), "\t$dst, $a, $b",
- [(set GPR:$dst, (sra (opnode GPR:$a,
- (sra GPR:$b, (i32 16))), (i32 16)))]> {
+ [(set rGPR:$dst, (sra (opnode rGPR:$a,
+ (sra rGPR:$b, (i32 16))), (i32 16)))]> {
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0110;
let Inst{22-20} = 0b011;
@@ -1911,11 +1919,11 @@ multiclass T2I_smul<string opc, PatFrag opnode> {
multiclass T2I_smla<string opc, PatFrag opnode> {
- def BB : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc), IIC_iMAC16,
+ def BB : T2I<(outs rGPR:$dst), (ins rGPR:$a, rGPR:$b, rGPR:$acc), IIC_iMAC16,
!strconcat(opc, "bb"), "\t$dst, $a, $b, $acc",
- [(set GPR:$dst, (add GPR:$acc,
- (opnode (sext_inreg GPR:$a, i16),
- (sext_inreg GPR:$b, i16))))]> {
+ [(set rGPR:$dst, (add rGPR:$acc,
+ (opnode (sext_inreg rGPR:$a, i16),
+ (sext_inreg rGPR:$b, i16))))]> {
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0110;
let Inst{22-20} = 0b001;
@@ -1924,10 +1932,10 @@ multiclass T2I_smla<string opc, PatFrag opnode> {
let Inst{5-4} = 0b00;
}
- def BT : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc), IIC_iMAC16,
+ def BT : T2I<(outs rGPR:$dst), (ins rGPR:$a, rGPR:$b, rGPR:$acc), IIC_iMAC16,
!strconcat(opc, "bt"), "\t$dst, $a, $b, $acc",
- [(set GPR:$dst, (add GPR:$acc, (opnode (sext_inreg GPR:$a, i16),
- (sra GPR:$b, (i32 16)))))]> {
+ [(set rGPR:$dst, (add rGPR:$acc, (opnode (sext_inreg rGPR:$a, i16),
+ (sra rGPR:$b, (i32 16)))))]> {
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0110;
let Inst{22-20} = 0b001;
@@ -1936,10 +1944,10 @@ multiclass T2I_smla<string opc, PatFrag opnode> {
let Inst{5-4} = 0b01;
}
- def TB : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc), IIC_iMAC16,
+ def TB : T2I<(outs rGPR:$dst), (ins rGPR:$a, rGPR:$b, rGPR:$acc), IIC_iMAC16,
!strconcat(opc, "tb"), "\t$dst, $a, $b, $acc",
- [(set GPR:$dst, (add GPR:$acc, (opnode (sra GPR:$a, (i32 16)),
- (sext_inreg GPR:$b, i16))))]> {
+ [(set rGPR:$dst, (add rGPR:$acc, (opnode (sra rGPR:$a, (i32 16)),
+ (sext_inreg rGPR:$b, i16))))]> {
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0110;
let Inst{22-20} = 0b001;
@@ -1948,10 +1956,10 @@ multiclass T2I_smla<string opc, PatFrag opnode> {
let Inst{5-4} = 0b10;
}
- def TT : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc), IIC_iMAC16,
+ def TT : T2I<(outs rGPR:$dst), (ins rGPR:$a, rGPR:$b, rGPR:$acc), IIC_iMAC16,
!strconcat(opc, "tt"), "\t$dst, $a, $b, $acc",
- [(set GPR:$dst, (add GPR:$acc, (opnode (sra GPR:$a, (i32 16)),
- (sra GPR:$b, (i32 16)))))]> {
+ [(set rGPR:$dst, (add rGPR:$acc, (opnode (sra rGPR:$a, (i32 16)),
+ (sra rGPR:$b, (i32 16)))))]> {
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0110;
let Inst{22-20} = 0b001;
@@ -1960,10 +1968,10 @@ multiclass T2I_smla<string opc, PatFrag opnode> {
let Inst{5-4} = 0b11;
}
- def WB : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc), IIC_iMAC16,
+ def WB : T2I<(outs rGPR:$dst), (ins rGPR:$a, rGPR:$b, rGPR:$acc), IIC_iMAC16,
!strconcat(opc, "wb"), "\t$dst, $a, $b, $acc",
- [(set GPR:$dst, (add GPR:$acc, (sra (opnode GPR:$a,
- (sext_inreg GPR:$b, i16)), (i32 16))))]> {
+ [(set rGPR:$dst, (add rGPR:$acc, (sra (opnode rGPR:$a,
+ (sext_inreg rGPR:$b, i16)), (i32 16))))]> {
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0110;
let Inst{22-20} = 0b011;
@@ -1972,10 +1980,10 @@ multiclass T2I_smla<string opc, PatFrag opnode> {
let Inst{5-4} = 0b00;
}
- def WT : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc), IIC_iMAC16,
+ def WT : T2I<(outs rGPR:$dst), (ins rGPR:$a, rGPR:$b, rGPR:$acc), IIC_iMAC16,
!strconcat(opc, "wt"), "\t$dst, $a, $b, $acc",
- [(set GPR:$dst, (add GPR:$acc, (sra (opnode GPR:$a,
- (sra GPR:$b, (i32 16))), (i32 16))))]> {
+ [(set rGPR:$dst, (add rGPR:$acc, (sra (opnode rGPR:$a,
+ (sra rGPR:$b, (i32 16))), (i32 16))))]> {
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0110;
let Inst{22-20} = 0b011;
@@ -1989,61 +1997,61 @@ defm t2SMUL : T2I_smul<"smul", BinOpFrag<(mul node:$LHS, node:$RHS)>>;
defm t2SMLA : T2I_smla<"smla", BinOpFrag<(mul node:$LHS, node:$RHS)>>;
// Halfword multiple accumulate long: SMLAL<x><y> -- for disassembly only
-def t2SMLALBB : T2I_mac<1, 0b100, 0b1000, (outs GPR:$ldst,GPR:$hdst),
- (ins GPR:$a,GPR:$b), IIC_iMAC64, "smlalbb", "\t$ldst, $hdst, $a, $b",
+def t2SMLALBB : T2I_mac<1, 0b100, 0b1000, (outs rGPR:$ldst,rGPR:$hdst),
+ (ins rGPR:$a,rGPR:$b), IIC_iMAC64, "smlalbb", "\t$ldst, $hdst, $a, $b",
[/* For disassembly only; pattern left blank */]>;
-def t2SMLALBT : T2I_mac<1, 0b100, 0b1001, (outs GPR:$ldst,GPR:$hdst),
- (ins GPR:$a,GPR:$b), IIC_iMAC64, "smlalbt", "\t$ldst, $hdst, $a, $b",
+def t2SMLALBT : T2I_mac<1, 0b100, 0b1001, (outs rGPR:$ldst,rGPR:$hdst),
+ (ins rGPR:$a,rGPR:$b), IIC_iMAC64, "smlalbt", "\t$ldst, $hdst, $a, $b",
[/* For disassembly only; pattern left blank */]>;
-def t2SMLALTB : T2I_mac<1, 0b100, 0b1010, (outs GPR:$ldst,GPR:$hdst),
- (ins GPR:$a,GPR:$b), IIC_iMAC64, "smlaltb", "\t$ldst, $hdst, $a, $b",
+def t2SMLALTB : T2I_mac<1, 0b100, 0b1010, (outs rGPR:$ldst,rGPR:$hdst),
+ (ins rGPR:$a,rGPR:$b), IIC_iMAC64, "smlaltb", "\t$ldst, $hdst, $a, $b",
[/* For disassembly only; pattern left blank */]>;
-def t2SMLALTT : T2I_mac<1, 0b100, 0b1011, (outs GPR:$ldst,GPR:$hdst),
- (ins GPR:$a,GPR:$b), IIC_iMAC64, "smlaltt", "\t$ldst, $hdst, $a, $b",
+def t2SMLALTT : T2I_mac<1, 0b100, 0b1011, (outs rGPR:$ldst,rGPR:$hdst),
+ (ins rGPR:$a,rGPR:$b), IIC_iMAC64, "smlaltt", "\t$ldst, $hdst, $a, $b",
[/* For disassembly only; pattern left blank */]>;
// Dual halfword multiple: SMUAD, SMUSD, SMLAD, SMLSD, SMLALD, SMLSLD
// These are for disassembly only.
-def t2SMUAD : T2I_mac<0, 0b010, 0b0000, (outs GPR:$dst), (ins GPR:$a, GPR:$b),
- IIC_iMAC32, "smuad", "\t$dst, $a, $b", []> {
+def t2SMUAD: T2I_mac<0, 0b010, 0b0000, (outs rGPR:$dst), (ins rGPR:$a, rGPR:$b),
+ IIC_iMAC32, "smuad", "\t$dst, $a, $b", []> {
let Inst{15-12} = 0b1111;
}
-def t2SMUADX : T2I_mac<0, 0b010, 0b0001, (outs GPR:$dst), (ins GPR:$a, GPR:$b),
- IIC_iMAC32, "smuadx", "\t$dst, $a, $b", []> {
+def t2SMUADX:T2I_mac<0, 0b010, 0b0001, (outs rGPR:$dst), (ins rGPR:$a, rGPR:$b),
+ IIC_iMAC32, "smuadx", "\t$dst, $a, $b", []> {
let Inst{15-12} = 0b1111;
}
-def t2SMUSD : T2I_mac<0, 0b100, 0b0000, (outs GPR:$dst), (ins GPR:$a, GPR:$b),
- IIC_iMAC32, "smusd", "\t$dst, $a, $b", []> {
+def t2SMUSD: T2I_mac<0, 0b100, 0b0000, (outs rGPR:$dst), (ins rGPR:$a, rGPR:$b),
+ IIC_iMAC32, "smusd", "\t$dst, $a, $b", []> {
let Inst{15-12} = 0b1111;
}
-def t2SMUSDX : T2I_mac<0, 0b100, 0b0001, (outs GPR:$dst), (ins GPR:$a, GPR:$b),
- IIC_iMAC32, "smusdx", "\t$dst, $a, $b", []> {
+def t2SMUSDX:T2I_mac<0, 0b100, 0b0001, (outs rGPR:$dst), (ins rGPR:$a, rGPR:$b),
+ IIC_iMAC32, "smusdx", "\t$dst, $a, $b", []> {
let Inst{15-12} = 0b1111;
}
-def t2SMLAD : T2I_mac<0, 0b010, 0b0000, (outs GPR:$dst),
- (ins GPR:$a, GPR:$b, GPR:$acc), IIC_iMAC32, "smlad",
+def t2SMLAD : T2I_mac<0, 0b010, 0b0000, (outs rGPR:$dst),
+ (ins rGPR:$a, rGPR:$b, rGPR:$acc), IIC_iMAC32, "smlad",
"\t$dst, $a, $b, $acc", []>;
-def t2SMLADX : T2I_mac<0, 0b010, 0b0001, (outs GPR:$dst),
- (ins GPR:$a, GPR:$b, GPR:$acc), IIC_iMAC32, "smladx",
+def t2SMLADX : T2I_mac<0, 0b010, 0b0001, (outs rGPR:$dst),
+ (ins rGPR:$a, rGPR:$b, rGPR:$acc), IIC_iMAC32, "smladx",
"\t$dst, $a, $b, $acc", []>;
-def t2SMLSD : T2I_mac<0, 0b100, 0b0000, (outs GPR:$dst),
- (ins GPR:$a, GPR:$b, GPR:$acc), IIC_iMAC32, "smlsd",
+def t2SMLSD : T2I_mac<0, 0b100, 0b0000, (outs rGPR:$dst),
+ (ins rGPR:$a, rGPR:$b, rGPR:$acc), IIC_iMAC32, "smlsd",
"\t$dst, $a, $b, $acc", []>;
-def t2SMLSDX : T2I_mac<0, 0b100, 0b0001, (outs GPR:$dst),
- (ins GPR:$a, GPR:$b, GPR:$acc), IIC_iMAC32, "smlsdx",
+def t2SMLSDX : T2I_mac<0, 0b100, 0b0001, (outs rGPR:$dst),
+ (ins rGPR:$a, rGPR:$b, rGPR:$acc), IIC_iMAC32, "smlsdx",
"\t$dst, $a, $b, $acc", []>;
-def t2SMLALD : T2I_mac<1, 0b100, 0b1100, (outs GPR:$ldst,GPR:$hdst),
- (ins GPR:$a,GPR:$b), IIC_iMAC64, "smlald",
+def t2SMLALD : T2I_mac<1, 0b100, 0b1100, (outs rGPR:$ldst,rGPR:$hdst),
+ (ins rGPR:$a,rGPR:$b), IIC_iMAC64, "smlald",
"\t$ldst, $hdst, $a, $b", []>;
-def t2SMLALDX : T2I_mac<1, 0b100, 0b1101, (outs GPR:$ldst,GPR:$hdst),
- (ins GPR:$a,GPR:$b), IIC_iMAC64, "smlaldx",
+def t2SMLALDX : T2I_mac<1, 0b100, 0b1101, (outs rGPR:$ldst,rGPR:$hdst),
+ (ins rGPR:$a,rGPR:$b), IIC_iMAC64, "smlaldx",
"\t$ldst, $hdst, $a, $b", []>;
-def t2SMLSLD : T2I_mac<1, 0b101, 0b1100, (outs GPR:$ldst,GPR:$hdst),
- (ins GPR:$a,GPR:$b), IIC_iMAC64, "smlsld",
+def t2SMLSLD : T2I_mac<1, 0b101, 0b1100, (outs rGPR:$ldst,rGPR:$hdst),
+ (ins rGPR:$a,rGPR:$b), IIC_iMAC64, "smlsld",
"\t$ldst, $hdst, $a, $b", []>;
-def t2SMLSLDX : T2I_mac<1, 0b101, 0b1101, (outs GPR:$ldst,GPR:$hdst),
- (ins GPR:$a,GPR:$b), IIC_iMAC64, "smlsldx",
+def t2SMLSLDX : T2I_mac<1, 0b101, 0b1101, (outs rGPR:$ldst,rGPR:$hdst),
+ (ins rGPR:$a,rGPR:$b), IIC_iMAC64, "smlsldx",
"\t$ldst, $hdst, $a, $b", []>;
//===----------------------------------------------------------------------===//
@@ -2061,35 +2069,35 @@ class T2I_misc<bits<2> op1, bits<2> op2, dag oops, dag iops,
let Inst{5-4} = op2;
}
-def t2CLZ : T2I_misc<0b11, 0b00, (outs GPR:$dst), (ins GPR:$src), IIC_iUNAr,
- "clz", "\t$dst, $src", [(set GPR:$dst, (ctlz GPR:$src))]>;
+def t2CLZ : T2I_misc<0b11, 0b00, (outs rGPR:$dst), (ins rGPR:$src), IIC_iUNAr,
+ "clz", "\t$dst, $src", [(set rGPR:$dst, (ctlz rGPR:$src))]>;
-def t2RBIT : T2I_misc<0b01, 0b10, (outs GPR:$dst), (ins GPR:$src), IIC_iUNAr,
+def t2RBIT : T2I_misc<0b01, 0b10, (outs rGPR:$dst), (ins rGPR:$src), IIC_iUNAr,
"rbit", "\t$dst, $src",
- [(set GPR:$dst, (ARMrbit GPR:$src))]>;
+ [(set rGPR:$dst, (ARMrbit rGPR:$src))]>;
-def t2REV : T2I_misc<0b01, 0b00, (outs GPR:$dst), (ins GPR:$src), IIC_iUNAr,
- "rev", ".w\t$dst, $src", [(set GPR:$dst, (bswap GPR:$src))]>;
+def t2REV : T2I_misc<0b01, 0b00, (outs rGPR:$dst), (ins rGPR:$src), IIC_iUNAr,
+ "rev", ".w\t$dst, $src", [(set rGPR:$dst, (bswap rGPR:$src))]>;
-def t2REV16 : T2I_misc<0b01, 0b01, (outs GPR:$dst), (ins GPR:$src), IIC_iUNAr,
+def t2REV16 : T2I_misc<0b01, 0b01, (outs rGPR:$dst), (ins rGPR:$src), IIC_iUNAr,
"rev16", ".w\t$dst, $src",
- [(set GPR:$dst,
- (or (and (srl GPR:$src, (i32 8)), 0xFF),
- (or (and (shl GPR:$src, (i32 8)), 0xFF00),
- (or (and (srl GPR:$src, (i32 8)), 0xFF0000),
- (and (shl GPR:$src, (i32 8)), 0xFF000000)))))]>;
+ [(set rGPR:$dst,
+ (or (and (srl rGPR:$src, (i32 8)), 0xFF),
+ (or (and (shl rGPR:$src, (i32 8)), 0xFF00),
+ (or (and (srl rGPR:$src, (i32 8)), 0xFF0000),
+ (and (shl rGPR:$src, (i32 8)), 0xFF000000)))))]>;
-def t2REVSH : T2I_misc<0b01, 0b11, (outs GPR:$dst), (ins GPR:$src), IIC_iUNAr,
+def t2REVSH : T2I_misc<0b01, 0b11, (outs rGPR:$dst), (ins rGPR:$src), IIC_iUNAr,
"revsh", ".w\t$dst, $src",
- [(set GPR:$dst,
+ [(set rGPR:$dst,
(sext_inreg
- (or (srl (and GPR:$src, 0xFF00), (i32 8)),
- (shl GPR:$src, (i32 8))), i16))]>;
+ (or (srl (and rGPR:$src, 0xFF00), (i32 8)),
+ (shl rGPR:$src, (i32 8))), i16))]>;
-def t2PKHBT : T2I<(outs GPR:$dst), (ins GPR:$src1, GPR:$src2, i32imm:$shamt),
- IIC_iALUsi, "pkhbt", "\t$dst, $src1, $src2, lsl $shamt",
- [(set GPR:$dst, (or (and GPR:$src1, 0xFFFF),
- (and (shl GPR:$src2, (i32 imm:$shamt)),
+def t2PKHBT : T2I<(outs rGPR:$dst), (ins rGPR:$src1, rGPR:$src2, shift_imm:$sh),
+ IIC_iALUsi, "pkhbt", "\t$dst, $src1, $src2$sh",
+ [(set rGPR:$dst, (or (and rGPR:$src1, 0xFFFF),
+ (and (shl rGPR:$src2, lsl_amt:$sh),
0xFFFF0000)))]>,
Requires<[HasT2ExtractPack]> {
let Inst{31-27} = 0b11101;
@@ -2100,18 +2108,20 @@ def t2PKHBT : T2I<(outs GPR:$dst), (ins GPR:$src1, GPR:$src2, i32imm:$shamt),
}
// Alternate cases for PKHBT where identities eliminate some nodes.
-def : T2Pat<(or (and GPR:$src1, 0xFFFF), (and GPR:$src2, 0xFFFF0000)),
- (t2PKHBT GPR:$src1, GPR:$src2, 0)>,
+def : T2Pat<(or (and rGPR:$src1, 0xFFFF), (and rGPR:$src2, 0xFFFF0000)),
+ (t2PKHBT rGPR:$src1, rGPR:$src2, 0)>,
Requires<[HasT2ExtractPack]>;
-def : T2Pat<(or (and GPR:$src1, 0xFFFF), (shl GPR:$src2, imm16_31:$shamt)),
- (t2PKHBT GPR:$src1, GPR:$src2, imm16_31:$shamt)>,
+def : T2Pat<(or (and rGPR:$src1, 0xFFFF), (shl rGPR:$src2, imm16_31:$sh)),
+ (t2PKHBT rGPR:$src1, rGPR:$src2, (lsl_shift_imm imm16_31:$sh))>,
Requires<[HasT2ExtractPack]>;
-def t2PKHTB : T2I<(outs GPR:$dst), (ins GPR:$src1, GPR:$src2, i32imm:$shamt),
- IIC_iALUsi, "pkhtb", "\t$dst, $src1, $src2, asr $shamt",
- [(set GPR:$dst, (or (and GPR:$src1, 0xFFFF0000),
- (and (sra GPR:$src2, imm16_31:$shamt),
- 0xFFFF)))]>,
+// Note: Shifts of 1-15 bits will be transformed to srl instead of sra and
+// will match the pattern below.
+def t2PKHTB : T2I<(outs rGPR:$dst), (ins rGPR:$src1, rGPR:$src2, shift_imm:$sh),
+ IIC_iALUsi, "pkhtb", "\t$dst, $src1, $src2$sh",
+ [(set rGPR:$dst, (or (and rGPR:$src1, 0xFFFF0000),
+ (and (sra rGPR:$src2, asr_amt:$sh),
+ 0xFFFF)))]>,
Requires<[HasT2ExtractPack]> {
let Inst{31-27} = 0b11101;
let Inst{26-25} = 0b01;
@@ -2122,18 +2132,17 @@ def t2PKHTB : T2I<(outs GPR:$dst), (ins GPR:$src1, GPR:$src2, i32imm:$shamt),
// Alternate cases for PKHTB where identities eliminate some nodes. Note that
// a shift amount of 0 is *not legal* here, it is PKHBT instead.
-def : T2Pat<(or (and GPR:$src1, 0xFFFF0000), (srl GPR:$src2, (i32 16))),
- (t2PKHTB GPR:$src1, GPR:$src2, 16)>,
+def : T2Pat<(or (and rGPR:$src1, 0xFFFF0000), (srl rGPR:$src2, imm16_31:$sh)),
+ (t2PKHTB rGPR:$src1, rGPR:$src2, (asr_shift_imm imm16_31:$sh))>,
Requires<[HasT2ExtractPack]>;
-def : T2Pat<(or (and GPR:$src1, 0xFFFF0000),
- (and (srl GPR:$src2, imm1_15:$shamt), 0xFFFF)),
- (t2PKHTB GPR:$src1, GPR:$src2, imm1_15:$shamt)>,
+def : T2Pat<(or (and rGPR:$src1, 0xFFFF0000),
+ (and (srl rGPR:$src2, imm1_15:$sh), 0xFFFF)),
+ (t2PKHTB rGPR:$src1, rGPR:$src2, (asr_shift_imm imm1_15:$sh))>,
Requires<[HasT2ExtractPack]>;
//===----------------------------------------------------------------------===//
// Comparison Instructions...
//
-
defm t2CMP : T2I_cmp_irs<0b1101, "cmp",
BinOpFrag<(ARMcmp node:$LHS, node:$RHS)>>;
defm t2CMPz : T2I_cmp_irs<0b1101, "cmp",
@@ -2157,18 +2166,13 @@ defm t2TST : T2I_cmp_irs<0b0000, "tst",
defm t2TEQ : T2I_cmp_irs<0b0100, "teq",
BinOpFrag<(ARMcmpZ (xor node:$LHS, node:$RHS), 0)>>;
-// A8.6.27 CBNZ, CBZ - Compare and branch on (non)zero.
-// Short range conditional branch. Looks awesome for loops. Need to figure
-// out how to use this one.
-
-
// Conditional moves
// FIXME: should be able to write a pattern for ARMcmov, but can't use
// a two-value operand where a dag node expects two operands. :(
let neverHasSideEffects = 1 in {
-def t2MOVCCr : T2I<(outs GPR:$dst), (ins GPR:$false, GPR:$true), IIC_iCMOVr,
+def t2MOVCCr : T2I<(outs rGPR:$dst), (ins rGPR:$false, rGPR:$true), IIC_iCMOVr,
"mov", ".w\t$dst, $true",
- [/*(set GPR:$dst, (ARMcmov GPR:$false, GPR:$true, imm:$cc, CCR:$ccr))*/]>,
+ [/*(set rGPR:$dst, (ARMcmov rGPR:$false, rGPR:$true, imm:$cc, CCR:$ccr))*/]>,
RegConstraint<"$false = $dst"> {
let Inst{31-27} = 0b11101;
let Inst{26-25} = 0b01;
@@ -2179,9 +2183,9 @@ def t2MOVCCr : T2I<(outs GPR:$dst), (ins GPR:$false, GPR:$true), IIC_iCMOVr,
let Inst{7-4} = 0b0000;
}
-def t2MOVCCi : T2I<(outs GPR:$dst), (ins GPR:$false, t2_so_imm:$true),
+def t2MOVCCi : T2I<(outs rGPR:$dst), (ins rGPR:$false, t2_so_imm:$true),
IIC_iCMOVi, "mov", ".w\t$dst, $true",
-[/*(set GPR:$dst, (ARMcmov GPR:$false, t2_so_imm:$true, imm:$cc, CCR:$ccr))*/]>,
+[/*(set rGPR:$dst,(ARMcmov rGPR:$false,t2_so_imm:$true, imm:$cc, CCR:$ccr))*/]>,
RegConstraint<"$false = $dst"> {
let Inst{31-27} = 0b11110;
let Inst{25} = 0;
@@ -2201,20 +2205,20 @@ class T2I_movcc_sh<bits<2> opcod, dag oops, dag iops, InstrItinClass itin,
let Inst{19-16} = 0b1111; // Rn
let Inst{5-4} = opcod; // Shift type.
}
-def t2MOVCClsl : T2I_movcc_sh<0b00, (outs GPR:$dst),
- (ins GPR:$false, GPR:$true, i32imm:$rhs),
+def t2MOVCClsl : T2I_movcc_sh<0b00, (outs rGPR:$dst),
+ (ins rGPR:$false, rGPR:$true, i32imm:$rhs),
IIC_iCMOVsi, "lsl", ".w\t$dst, $true, $rhs", []>,
RegConstraint<"$false = $dst">;
-def t2MOVCClsr : T2I_movcc_sh<0b01, (outs GPR:$dst),
- (ins GPR:$false, GPR:$true, i32imm:$rhs),
+def t2MOVCClsr : T2I_movcc_sh<0b01, (outs rGPR:$dst),
+ (ins rGPR:$false, rGPR:$true, i32imm:$rhs),
IIC_iCMOVsi, "lsr", ".w\t$dst, $true, $rhs", []>,
RegConstraint<"$false = $dst">;
-def t2MOVCCasr : T2I_movcc_sh<0b10, (outs GPR:$dst),
- (ins GPR:$false, GPR:$true, i32imm:$rhs),
+def t2MOVCCasr : T2I_movcc_sh<0b10, (outs rGPR:$dst),
+ (ins rGPR:$false, rGPR:$true, i32imm:$rhs),
IIC_iCMOVsi, "asr", ".w\t$dst, $true, $rhs", []>,
RegConstraint<"$false = $dst">;
-def t2MOVCCror : T2I_movcc_sh<0b11, (outs GPR:$dst),
- (ins GPR:$false, GPR:$true, i32imm:$rhs),
+def t2MOVCCror : T2I_movcc_sh<0b11, (outs rGPR:$dst),
+ (ins rGPR:$false, rGPR:$true, i32imm:$rhs),
IIC_iCMOVsi, "ror", ".w\t$dst, $true, $rhs", []>,
RegConstraint<"$false = $dst">;
} // neverHasSideEffects
@@ -2225,21 +2229,15 @@ def t2MOVCCror : T2I_movcc_sh<0b11, (outs GPR:$dst),
// memory barriers protect the atomic sequences
let hasSideEffects = 1 in {
-def t2Int_MemBarrierV7 : AInoP<(outs), (ins),
- ThumbFrm, NoItinerary,
- "dmb", "",
- [(ARMMemBarrierV7)]>,
- Requires<[IsThumb2]> {
+def t2DMBsy : AInoP<(outs), (ins), ThumbFrm, NoItinerary, "dmb", "",
+ [(ARMMemBarrier)]>, Requires<[IsThumb, HasDB]> {
let Inst{31-4} = 0xF3BF8F5;
// FIXME: add support for options other than a full system DMB
let Inst{3-0} = 0b1111;
}
-def t2Int_SyncBarrierV7 : AInoP<(outs), (ins),
- ThumbFrm, NoItinerary,
- "dsb", "",
- [(ARMSyncBarrierV7)]>,
- Requires<[IsThumb2]> {
+def t2DSBsy : AInoP<(outs), (ins), ThumbFrm, NoItinerary, "dsb", "",
+ [(ARMSyncBarrier)]>, Requires<[IsThumb, HasDB]> {
let Inst{31-4} = 0xF3BF8F4;
// FIXME: add support for options other than a full system DSB
let Inst{3-0} = 0b1111;
@@ -2329,13 +2327,13 @@ class T2I_strex<bits<2> opcod, dag oops, dag iops, AddrMode am, SizeFlagVal sz,
}
let mayLoad = 1 in {
-def t2LDREXB : T2I_ldrex<0b00, (outs GPR:$dest), (ins GPR:$ptr), AddrModeNone,
+def t2LDREXB : T2I_ldrex<0b00, (outs rGPR:$dest), (ins rGPR:$ptr), AddrModeNone,
Size4Bytes, NoItinerary, "ldrexb", "\t$dest, [$ptr]",
"", []>;
-def t2LDREXH : T2I_ldrex<0b01, (outs GPR:$dest), (ins GPR:$ptr), AddrModeNone,
+def t2LDREXH : T2I_ldrex<0b01, (outs rGPR:$dest), (ins rGPR:$ptr), AddrModeNone,
Size4Bytes, NoItinerary, "ldrexh", "\t$dest, [$ptr]",
"", []>;
-def t2LDREX : Thumb2I<(outs GPR:$dest), (ins GPR:$ptr), AddrModeNone,
+def t2LDREX : Thumb2I<(outs rGPR:$dest), (ins rGPR:$ptr), AddrModeNone,
Size4Bytes, NoItinerary,
"ldrex", "\t$dest, [$ptr]", "",
[]> {
@@ -2344,20 +2342,20 @@ def t2LDREX : Thumb2I<(outs GPR:$dest), (ins GPR:$ptr), AddrModeNone,
let Inst{11-8} = 0b1111;
let Inst{7-0} = 0b00000000; // imm8 = 0
}
-def t2LDREXD : T2I_ldrex<0b11, (outs GPR:$dest, GPR:$dest2), (ins GPR:$ptr),
+def t2LDREXD : T2I_ldrex<0b11, (outs rGPR:$dest, rGPR:$dest2), (ins rGPR:$ptr),
AddrModeNone, Size4Bytes, NoItinerary,
"ldrexd", "\t$dest, $dest2, [$ptr]", "",
[], {?, ?, ?, ?}>;
}
let mayStore = 1, Constraints = "@earlyclobber $success" in {
-def t2STREXB : T2I_strex<0b00, (outs GPR:$success), (ins GPR:$src, GPR:$ptr),
+def t2STREXB : T2I_strex<0b00, (outs rGPR:$success), (ins rGPR:$src, rGPR:$ptr),
AddrModeNone, Size4Bytes, NoItinerary,
"strexb", "\t$success, $src, [$ptr]", "", []>;
-def t2STREXH : T2I_strex<0b01, (outs GPR:$success), (ins GPR:$src, GPR:$ptr),
+def t2STREXH : T2I_strex<0b01, (outs rGPR:$success), (ins rGPR:$src, rGPR:$ptr),
AddrModeNone, Size4Bytes, NoItinerary,
"strexh", "\t$success, $src, [$ptr]", "", []>;
-def t2STREX : Thumb2I<(outs GPR:$success), (ins GPR:$src, GPR:$ptr),
+def t2STREX : Thumb2I<(outs rGPR:$success), (ins rGPR:$src, rGPR:$ptr),
AddrModeNone, Size4Bytes, NoItinerary,
"strex", "\t$success, $src, [$ptr]", "",
[]> {
@@ -2365,8 +2363,8 @@ def t2STREX : Thumb2I<(outs GPR:$success), (ins GPR:$src, GPR:$ptr),
let Inst{26-20} = 0b0000100;
let Inst{7-0} = 0b00000000; // imm8 = 0
}
-def t2STREXD : T2I_strex<0b11, (outs GPR:$success),
- (ins GPR:$src, GPR:$src2, GPR:$ptr),
+def t2STREXD : T2I_strex<0b11, (outs rGPR:$success),
+ (ins rGPR:$src, rGPR:$src2, rGPR:$ptr),
AddrModeNone, Size4Bytes, NoItinerary,
"strexd", "\t$success, $src, $src2, [$ptr]", "", [],
{?, ?, ?, ?}>;
@@ -2416,7 +2414,7 @@ let Defs =
D1, D2, D3, D4, D5, D6, D7, D8, D9, D10, D11, D12, D13, D14, D15,
D16, D17, D18, D19, D20, D21, D22, D23, D24, D25, D26, D27, D28, D29, D30,
D31 ], hasSideEffects = 1, isBarrier = 1 in {
- def t2Int_eh_sjlj_setjmp : Thumb2XI<(outs), (ins GPR:$src, tGPR:$val),
+ def t2Int_eh_sjlj_setjmp : Thumb2XI<(outs), (ins tGPR:$src, tGPR:$val),
AddrModeNone, SizeSpecial, NoItinerary,
"mov\t$val, pc\t${:comment} begin eh.setjmp\n\t"
"adds\t$val, #7\n\t"
@@ -2425,14 +2423,14 @@ let Defs =
"b\t1f\n\t"
"movs\tr0, #1\t${:comment} end eh.setjmp\n\t"
"1:", "",
- [(set R0, (ARMeh_sjlj_setjmp GPR:$src, tGPR:$val))]>,
+ [(set R0, (ARMeh_sjlj_setjmp tGPR:$src, tGPR:$val))]>,
Requires<[IsThumb2, HasVFP2]>;
}
let Defs =
[ R0, R1, R2, R3, R4, R5, R6, R7, R8, R9, R10, R11, R12, LR ],
hasSideEffects = 1, isBarrier = 1 in {
- def t2Int_eh_sjlj_setjmp_nofp : Thumb2XI<(outs), (ins GPR:$src, tGPR:$val),
+ def t2Int_eh_sjlj_setjmp_nofp : Thumb2XI<(outs), (ins tGPR:$src, tGPR:$val),
AddrModeNone, SizeSpecial, NoItinerary,
"mov\t$val, pc\t${:comment} begin eh.setjmp\n\t"
"adds\t$val, #7\n\t"
@@ -2441,7 +2439,7 @@ let Defs =
"b\t1f\n\t"
"movs\tr0, #1\t${:comment} end eh.setjmp\n\t"
"1:", "",
- [(set R0, (ARMeh_sjlj_setjmp GPR:$src, tGPR:$val))]>,
+ [(set R0, (ARMeh_sjlj_setjmp tGPR:$src, tGPR:$val))]>,
Requires<[IsThumb2, NoVFP]>;
}
@@ -2482,7 +2480,7 @@ let isNotDuplicable = 1, isIndirectBranch = 1 in {
def t2BR_JT :
T2JTI<(outs),
(ins GPR:$target, GPR:$index, jt2block_operand:$jt, i32imm:$id),
- IIC_Br, "mov\tpc, $target\n$jt",
+ IIC_Br, "mov\tpc, $target$jt",
[(ARMbr2jt GPR:$target, GPR:$index, tjumptable:$jt, imm:$id)]> {
let Inst{31-27} = 0b11101;
let Inst{26-20} = 0b0100100;
@@ -2496,7 +2494,7 @@ def t2BR_JT :
def t2TBB :
T2JTI<(outs),
(ins tb_addrmode:$index, jt2block_operand:$jt, i32imm:$id),
- IIC_Br, "tbb\t$index\n$jt", []> {
+ IIC_Br, "tbb\t$index$jt", []> {
let Inst{31-27} = 0b11101;
let Inst{26-20} = 0b0001101;
let Inst{19-16} = 0b1111; // Rn = pc (table follows this instruction)
@@ -2507,7 +2505,7 @@ def t2TBB :
def t2TBH :
T2JTI<(outs),
(ins tb_addrmode:$index, jt2block_operand:$jt, i32imm:$id),
- IIC_Br, "tbh\t$index\n$jt", []> {
+ IIC_Br, "tbh\t$index$jt", []> {
let Inst{31-27} = 0b11101;
let Inst{26-20} = 0b0001101;
let Inst{19-16} = 0b1111; // Rn = pc (table follows this instruction)
@@ -2560,7 +2558,7 @@ def t2IT : Thumb2XI<(outs), (ins it_pred:$cc, it_mask:$mask),
// Branch and Exchange Jazelle -- for disassembly only
// Rm = Inst{19-16}
-def t2BXJ : T2I<(outs), (ins GPR:$func), NoItinerary, "bxj", "\t$func",
+def t2BXJ : T2I<(outs), (ins rGPR:$func), NoItinerary, "bxj", "\t$func",
[/* For disassembly only; pattern left blank */]> {
let Inst{31-27} = 0b11110;
let Inst{26} = 0;
@@ -2647,25 +2645,25 @@ def t2SRSIA : T2I<(outs), (ins i32imm:$mode),NoItinerary,"srsia","\tsp, $mode",
}
// Return From Exception is a system instruction -- for disassembly only
-def t2RFEDBW : T2I<(outs), (ins GPR:$base), NoItinerary, "rfedb", "\t$base!",
+def t2RFEDBW : T2I<(outs), (ins rGPR:$base), NoItinerary, "rfedb", "\t$base!",
[/* For disassembly only; pattern left blank */]> {
let Inst{31-27} = 0b11101;
let Inst{26-20} = 0b0000011; // W = 1
}
-def t2RFEDB : T2I<(outs), (ins GPR:$base), NoItinerary, "rfeab", "\t$base",
+def t2RFEDB : T2I<(outs), (ins rGPR:$base), NoItinerary, "rfeab", "\t$base",
[/* For disassembly only; pattern left blank */]> {
let Inst{31-27} = 0b11101;
let Inst{26-20} = 0b0000001; // W = 0
}
-def t2RFEIAW : T2I<(outs), (ins GPR:$base), NoItinerary, "rfeia", "\t$base!",
+def t2RFEIAW : T2I<(outs), (ins rGPR:$base), NoItinerary, "rfeia", "\t$base!",
[/* For disassembly only; pattern left blank */]> {
let Inst{31-27} = 0b11101;
let Inst{26-20} = 0b0011011; // W = 1
}
-def t2RFEIA : T2I<(outs), (ins GPR:$base), NoItinerary, "rfeia", "\t$base",
+def t2RFEIA : T2I<(outs), (ins rGPR:$base), NoItinerary, "rfeia", "\t$base",
[/* For disassembly only; pattern left blank */]> {
let Inst{31-27} = 0b11101;
let Inst{26-20} = 0b0011001; // W = 0
@@ -2676,26 +2674,26 @@ def t2RFEIA : T2I<(outs), (ins GPR:$base), NoItinerary, "rfeia", "\t$base",
//
// Two piece so_imms.
-def : T2Pat<(or GPR:$LHS, t2_so_imm2part:$RHS),
- (t2ORRri (t2ORRri GPR:$LHS, (t2_so_imm2part_1 imm:$RHS)),
+def : T2Pat<(or rGPR:$LHS, t2_so_imm2part:$RHS),
+ (t2ORRri (t2ORRri rGPR:$LHS, (t2_so_imm2part_1 imm:$RHS)),
(t2_so_imm2part_2 imm:$RHS))>;
-def : T2Pat<(xor GPR:$LHS, t2_so_imm2part:$RHS),
- (t2EORri (t2EORri GPR:$LHS, (t2_so_imm2part_1 imm:$RHS)),
+def : T2Pat<(xor rGPR:$LHS, t2_so_imm2part:$RHS),
+ (t2EORri (t2EORri rGPR:$LHS, (t2_so_imm2part_1 imm:$RHS)),
(t2_so_imm2part_2 imm:$RHS))>;
-def : T2Pat<(add GPR:$LHS, t2_so_imm2part:$RHS),
- (t2ADDri (t2ADDri GPR:$LHS, (t2_so_imm2part_1 imm:$RHS)),
+def : T2Pat<(add rGPR:$LHS, t2_so_imm2part:$RHS),
+ (t2ADDri (t2ADDri rGPR:$LHS, (t2_so_imm2part_1 imm:$RHS)),
(t2_so_imm2part_2 imm:$RHS))>;
-def : T2Pat<(add GPR:$LHS, t2_so_neg_imm2part:$RHS),
- (t2SUBri (t2SUBri GPR:$LHS, (t2_so_neg_imm2part_1 imm:$RHS)),
+def : T2Pat<(add rGPR:$LHS, t2_so_neg_imm2part:$RHS),
+ (t2SUBri (t2SUBri rGPR:$LHS, (t2_so_neg_imm2part_1 imm:$RHS)),
(t2_so_neg_imm2part_2 imm:$RHS))>;
// 32-bit immediate using movw + movt.
// This is a single pseudo instruction to make it re-materializable. Remove
// when we can do generalized remat.
let isReMaterializable = 1 in
-def t2MOVi32imm : T2Ix2<(outs GPR:$dst), (ins i32imm:$src), IIC_iMOVi,
+def t2MOVi32imm : T2Ix2<(outs rGPR:$dst), (ins i32imm:$src), IIC_iMOVi,
"movw", "\t$dst, ${src:lo16}\n\tmovt${p}\t$dst, ${src:hi16}",
- [(set GPR:$dst, (i32 imm:$src))]>;
+ [(set rGPR:$dst, (i32 imm:$src))]>;
// ConstantPool, GlobalAddress, and JumpTable
def : T2Pat<(ARMWrapper tglobaladdr :$dst), (t2LEApcrel tglobaladdr :$dst)>,
@@ -2723,7 +2721,7 @@ def t2LDRpci_pic : PseudoInst<(outs GPR:$dst), (ins i32imm:$addr, pclabel:$cp),
//
// Rd = Instr{11-8}
-def t2MRS : T2I<(outs GPR:$dst), (ins), NoItinerary, "mrs", "\t$dst, cpsr",
+def t2MRS : T2I<(outs rGPR:$dst), (ins), NoItinerary, "mrs", "\t$dst, cpsr",
[/* For disassembly only; pattern left blank */]> {
let Inst{31-27} = 0b11110;
let Inst{26} = 0;
@@ -2734,7 +2732,7 @@ def t2MRS : T2I<(outs GPR:$dst), (ins), NoItinerary, "mrs", "\t$dst, cpsr",
}
// Rd = Instr{11-8}
-def t2MRSsys : T2I<(outs GPR:$dst), (ins), NoItinerary, "mrs", "\t$dst, spsr",
+def t2MRSsys : T2I<(outs rGPR:$dst), (ins), NoItinerary, "mrs", "\t$dst, spsr",
[/* For disassembly only; pattern left blank */]> {
let Inst{31-27} = 0b11110;
let Inst{26} = 0;
@@ -2745,7 +2743,7 @@ def t2MRSsys : T2I<(outs GPR:$dst), (ins), NoItinerary, "mrs", "\t$dst, spsr",
}
// Rn = Inst{19-16}
-def t2MSR : T2I<(outs), (ins GPR:$src, msr_mask:$mask), NoItinerary, "msr",
+def t2MSR : T2I<(outs), (ins rGPR:$src, msr_mask:$mask), NoItinerary, "msr",
"\tcpsr$mask, $src",
[/* For disassembly only; pattern left blank */]> {
let Inst{31-27} = 0b11110;
@@ -2757,7 +2755,7 @@ def t2MSR : T2I<(outs), (ins GPR:$src, msr_mask:$mask), NoItinerary, "msr",
}
// Rn = Inst{19-16}
-def t2MSRsys : T2I<(outs), (ins GPR:$src, msr_mask:$mask), NoItinerary, "msr",
+def t2MSRsys : T2I<(outs), (ins rGPR:$src, msr_mask:$mask), NoItinerary, "msr",
"\tspsr$mask, $src",
[/* For disassembly only; pattern left blank */]> {
let Inst{31-27} = 0b11110;
diff --git a/lib/Target/ARM/ARMInstrVFP.td b/lib/Target/ARM/ARMInstrVFP.td
index 84c23e1a784c..c29e09606bd4 100644
--- a/lib/Target/ARM/ARMInstrVFP.td
+++ b/lib/Target/ARM/ARMInstrVFP.td
@@ -77,61 +77,61 @@ def VSTRS : ASI5<0b1101, 0b00, (outs), (ins SPR:$src, addrmode5:$addr),
//
let mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1 in {
-def VLDMD : AXDI5<(outs), (ins addrmode5:$addr, pred:$p, reglist:$dsts,
+def VLDMD : AXDI4<(outs), (ins addrmode4:$addr, pred:$p, reglist:$dsts,
variable_ops), IndexModeNone, IIC_fpLoadm,
- "vldm${addr:submode}${p}\t${addr:base}, $dsts", "", []> {
+ "vldm${addr:submode}${p}\t$addr, $dsts", "", []> {
let Inst{20} = 1;
}
-def VLDMS : AXSI5<(outs), (ins addrmode5:$addr, pred:$p, reglist:$dsts,
+def VLDMS : AXSI4<(outs), (ins addrmode4:$addr, pred:$p, reglist:$dsts,
variable_ops), IndexModeNone, IIC_fpLoadm,
- "vldm${addr:submode}${p}\t${addr:base}, $dsts", "", []> {
+ "vldm${addr:submode}${p}\t$addr, $dsts", "", []> {
let Inst{20} = 1;
}
-def VLDMD_UPD : AXDI5<(outs GPR:$wb), (ins addrmode5:$addr, pred:$p,
+def VLDMD_UPD : AXDI4<(outs GPR:$wb), (ins addrmode4:$addr, pred:$p,
reglist:$dsts, variable_ops),
IndexModeUpd, IIC_fpLoadm,
- "vldm${addr:submode}${p}\t${addr:base}!, $dsts",
- "$addr.base = $wb", []> {
+ "vldm${addr:submode}${p}\t$addr!, $dsts",
+ "$addr.addr = $wb", []> {
let Inst{20} = 1;
}
-def VLDMS_UPD : AXSI5<(outs GPR:$wb), (ins addrmode5:$addr, pred:$p,
+def VLDMS_UPD : AXSI4<(outs GPR:$wb), (ins addrmode4:$addr, pred:$p,
reglist:$dsts, variable_ops),
IndexModeUpd, IIC_fpLoadm,
- "vldm${addr:submode}${p}\t${addr:base}!, $dsts",
- "$addr.base = $wb", []> {
+ "vldm${addr:submode}${p}\t$addr!, $dsts",
+ "$addr.addr = $wb", []> {
let Inst{20} = 1;
}
} // mayLoad, neverHasSideEffects, hasExtraDefRegAllocReq
let mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1 in {
-def VSTMD : AXDI5<(outs), (ins addrmode5:$addr, pred:$p, reglist:$srcs,
+def VSTMD : AXDI4<(outs), (ins addrmode4:$addr, pred:$p, reglist:$srcs,
variable_ops), IndexModeNone, IIC_fpStorem,
- "vstm${addr:submode}${p}\t${addr:base}, $srcs", "", []> {
+ "vstm${addr:submode}${p}\t$addr, $srcs", "", []> {
let Inst{20} = 0;
}
-def VSTMS : AXSI5<(outs), (ins addrmode5:$addr, pred:$p, reglist:$srcs,
+def VSTMS : AXSI4<(outs), (ins addrmode4:$addr, pred:$p, reglist:$srcs,
variable_ops), IndexModeNone, IIC_fpStorem,
- "vstm${addr:submode}${p}\t${addr:base}, $srcs", "", []> {
+ "vstm${addr:submode}${p}\t$addr, $srcs", "", []> {
let Inst{20} = 0;
}
-def VSTMD_UPD : AXDI5<(outs GPR:$wb), (ins addrmode5:$addr, pred:$p,
+def VSTMD_UPD : AXDI4<(outs GPR:$wb), (ins addrmode4:$addr, pred:$p,
reglist:$srcs, variable_ops),
IndexModeUpd, IIC_fpStorem,
- "vstm${addr:submode}${p}\t${addr:base}!, $srcs",
- "$addr.base = $wb", []> {
+ "vstm${addr:submode}${p}\t$addr!, $srcs",
+ "$addr.addr = $wb", []> {
let Inst{20} = 0;
}
-def VSTMS_UPD : AXSI5<(outs GPR:$wb), (ins addrmode5:$addr, pred:$p,
+def VSTMS_UPD : AXSI4<(outs GPR:$wb), (ins addrmode4:$addr, pred:$p,
reglist:$srcs, variable_ops),
IndexModeUpd, IIC_fpStorem,
- "vstm${addr:submode}${p}\t${addr:base}!, $srcs",
- "$addr.base = $wb", []> {
+ "vstm${addr:submode}${p}\t$addr!, $srcs",
+ "$addr.addr = $wb", []> {
let Inst{20} = 0;
}
} // mayStore, neverHasSideEffects, hasExtraSrcRegAllocReq
@@ -420,34 +420,35 @@ def VTOUIZS : AVConv1In<0b11101, 0b11, 0b1100, 0b1010,
// And the Z bit '0' variants, i.e. use the rounding mode specified by FPSCR.
// For disassembly only.
-
+let Uses = [FPSCR] in {
def VTOSIRD : AVConv1I<0b11101, 0b11, 0b1101, 0b1011,
(outs SPR:$dst), (ins DPR:$a),
IIC_fpCVTDI, "vcvtr", ".s32.f64\t$dst, $a",
- [/* For disassembly only; pattern left blank */]> {
+ [(set SPR:$dst, (int_arm_vcvtr (f64 DPR:$a)))]> {
let Inst{7} = 0; // Z bit
}
def VTOSIRS : AVConv1In<0b11101, 0b11, 0b1101, 0b1010,
(outs SPR:$dst), (ins SPR:$a),
IIC_fpCVTSI, "vcvtr", ".s32.f32\t$dst, $a",
- [/* For disassembly only; pattern left blank */]> {
+ [(set SPR:$dst, (int_arm_vcvtr SPR:$a))]> {
let Inst{7} = 0; // Z bit
}
def VTOUIRD : AVConv1I<0b11101, 0b11, 0b1100, 0b1011,
(outs SPR:$dst), (ins DPR:$a),
IIC_fpCVTDI, "vcvtr", ".u32.f64\t$dst, $a",
- [/* For disassembly only; pattern left blank */]> {
+ [(set SPR:$dst, (int_arm_vcvtru (f64 DPR:$a)))]> {
let Inst{7} = 0; // Z bit
}
def VTOUIRS : AVConv1In<0b11101, 0b11, 0b1100, 0b1010,
(outs SPR:$dst), (ins SPR:$a),
IIC_fpCVTSI, "vcvtr", ".u32.f32\t$dst, $a",
- [/* For disassembly only; pattern left blank */]> {
+ [(set SPR:$dst, (int_arm_vcvtru SPR:$a))]> {
let Inst{7} = 0; // Z bit
}
+}
// Convert between floating-point and fixed-point
// Data type for fixed-point naming convention:
@@ -460,6 +461,7 @@ let Constraints = "$a = $dst" in {
// FP to Fixed-Point:
+let isCodeGenOnly = 1 in {
def VTOSHS : AVConv1XI<0b11101, 0b11, 0b1110, 0b1010, 0,
(outs SPR:$dst), (ins SPR:$a, i32imm:$fbits),
IIC_fpCVTSI, "vcvt", ".s16.f32\t$dst, $a, $fbits",
@@ -499,9 +501,11 @@ def VTOULD : AVConv1XI<0b11101, 0b11, 0b1111, 0b1011, 1,
(outs DPR:$dst), (ins DPR:$a, i32imm:$fbits),
IIC_fpCVTDI, "vcvt", ".u32.f64\t$dst, $a, $fbits",
[/* For disassembly only; pattern left blank */]>;
+}
// Fixed-Point to FP:
+let isCodeGenOnly = 1 in {
def VSHTOS : AVConv1XI<0b11101, 0b11, 0b1010, 0b1010, 0,
(outs SPR:$dst), (ins SPR:$a, i32imm:$fbits),
IIC_fpCVTIS, "vcvt", ".f32.s16\t$dst, $a, $fbits",
@@ -541,6 +545,7 @@ def VULTOD : AVConv1XI<0b11101, 0b11, 0b1011, 0b1011, 1,
(outs DPR:$dst), (ins DPR:$a, i32imm:$fbits),
IIC_fpCVTID, "vcvt", ".f64.u32\t$dst, $a, $fbits",
[/* For disassembly only; pattern left blank */]>;
+}
} // End of 'let Constraints = "$src = $dst" in'
@@ -654,32 +659,27 @@ def FMSTAT : VFPAI<(outs), (ins), VFPMiscFrm, IIC_fpSTAT, "vmrs",
}
// FPSCR <-> GPR (for disassembly only)
-
-let neverHasSideEffects = 1 in {
-let Uses = [FPSCR] in {
-def VMRS : VFPAI<(outs GPR:$dst), (ins), VFPMiscFrm, IIC_fpSTAT, "vmrs",
- "\t$dst, fpscr",
- [/* For disassembly only; pattern left blank */]> {
+let hasSideEffects = 1, Uses = [FPSCR] in
+def VMRS : VFPAI<(outs GPR:$dst), (ins), VFPMiscFrm, IIC_fpSTAT,
+ "vmrs", "\t$dst, fpscr",
+ [(set GPR:$dst, (int_arm_get_fpscr))]> {
let Inst{27-20} = 0b11101111;
let Inst{19-16} = 0b0001;
let Inst{11-8} = 0b1010;
let Inst{7} = 0;
let Inst{4} = 1;
}
-}
-let Defs = [FPSCR] in {
-def VMSR : VFPAI<(outs), (ins GPR:$src), VFPMiscFrm, IIC_fpSTAT, "vmsr",
- "\tfpscr, $src",
- [/* For disassembly only; pattern left blank */]> {
+let Defs = [FPSCR] in
+def VMSR : VFPAI<(outs), (ins GPR:$src), VFPMiscFrm, IIC_fpSTAT,
+ "vmsr", "\tfpscr, $src",
+ [(int_arm_set_fpscr GPR:$src)]> {
let Inst{27-20} = 0b11101110;
let Inst{19-16} = 0b0001;
let Inst{11-8} = 0b1010;
let Inst{7} = 0;
let Inst{4} = 1;
}
-}
-} // neverHasSideEffects
// Materialize FP immediates. VFP3 only.
let isReMaterializable = 1 in {
diff --git a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
index f80e316d23e8..2b7645a42119 100644
--- a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
+++ b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
@@ -57,7 +57,7 @@ STATISTIC(NumSTRD2STR, "Number of strd instructions turned back into str's");
namespace {
struct ARMLoadStoreOpt : public MachineFunctionPass {
static char ID;
- ARMLoadStoreOpt() : MachineFunctionPass(&ID) {}
+ ARMLoadStoreOpt() : MachineFunctionPass(ID) {}
const TargetInstrInfo *TII;
const TargetRegisterInfo *TRI;
@@ -193,20 +193,17 @@ ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB,
return false;
ARM_AM::AMSubMode Mode = ARM_AM::ia;
- bool isAM4 = isi32Load(Opcode) || isi32Store(Opcode);
- if (isAM4 && Offset == 4) {
- if (isThumb2)
- // Thumb2 does not support ldmib / stmib.
- return false;
+ // VFP and Thumb2 do not support IB or DA modes.
+ bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode);
+ bool haveIBAndDA = isNotVFP && !isThumb2;
+ if (Offset == 4 && haveIBAndDA)
Mode = ARM_AM::ib;
- } else if (isAM4 && Offset == -4 * (int)NumRegs + 4) {
- if (isThumb2)
- // Thumb2 does not support ldmda / stmda.
- return false;
+ else if (Offset == -4 * (int)NumRegs + 4 && haveIBAndDA)
Mode = ARM_AM::da;
- } else if (isAM4 && Offset == -4 * (int)NumRegs) {
+ else if (Offset == -4 * (int)NumRegs && isNotVFP)
+ // VLDM/VSTM do not support DB mode without also updating the base reg.
Mode = ARM_AM::db;
- } else if (Offset != 0) {
+ else if (Offset != 0) {
// If starting offset isn't zero, insert a MI to materialize a new base.
// But only do so if it is cost effective, i.e. merging more than two
// loads / stores.
@@ -246,18 +243,12 @@ ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB,
BaseKill = true; // New base is always killed right its use.
}
- bool isDPR = (Opcode == ARM::VLDRD || Opcode == ARM::VSTRD);
bool isDef = (isi32Load(Opcode) || Opcode == ARM::VLDRS ||
Opcode == ARM::VLDRD);
Opcode = getLoadStoreMultipleOpcode(Opcode);
- MachineInstrBuilder MIB = (isAM4)
- ? BuildMI(MBB, MBBI, dl, TII->get(Opcode))
- .addReg(Base, getKillRegState(BaseKill))
- .addImm(ARM_AM::getAM4ModeImm(Mode)).addImm(Pred).addReg(PredReg)
- : BuildMI(MBB, MBBI, dl, TII->get(Opcode))
- .addReg(Base, getKillRegState(BaseKill))
- .addImm(ARM_AM::getAM5Opc(Mode, isDPR ? NumRegs<<1 : NumRegs))
- .addImm(Pred).addReg(PredReg);
+ MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII->get(Opcode))
+ .addReg(Base, getKillRegState(BaseKill))
+ .addImm(ARM_AM::getAM4ModeImm(Mode)).addImm(Pred).addReg(PredReg);
for (unsigned i = 0; i != NumRegs; ++i)
MIB = MIB.addReg(Regs[i].first, getDefRegState(isDef)
| getKillRegState(Regs[i].second));
@@ -333,6 +324,7 @@ void ARMLoadStoreOpt::MergeOpsUpdate(MachineBasicBlock &MBB,
if (KilledRegs.count(Reg)) {
unsigned j = Killer[Reg];
memOps[j].MBBI->getOperand(0).setIsKill(false);
+ memOps[j].isKill = false;
}
}
MBB.erase(memOps[i].MBBI);
@@ -348,7 +340,7 @@ ARMLoadStoreOpt::MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex,
ARMCC::CondCodes Pred, unsigned PredReg,
unsigned Scratch, MemOpQueue &MemOps,
SmallVector<MachineBasicBlock::iterator, 4> &Merges) {
- bool isAM4 = isi32Load(Opcode) || isi32Store(Opcode);
+ bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode);
int Offset = MemOps[SIndex].Offset;
int SOffset = Offset;
unsigned insertAfter = SIndex;
@@ -366,12 +358,12 @@ ARMLoadStoreOpt::MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex,
unsigned Reg = MO.getReg();
unsigned RegNum = MO.isUndef() ? UINT_MAX
: ARMRegisterInfo::getRegisterNumbering(Reg);
- // AM4 - register numbers in ascending order.
- // AM5 - consecutive register numbers in ascending order.
- // Can only do up to 16 double-word registers per insn.
+ // Register numbers must be in ascending order. For VFP, the registers
+ // must also be consecutive and there is a limit of 16 double-word
+ // registers per instruction.
if (Reg != ARM::SP &&
NewOffset == Offset + (int)Size &&
- ((isAM4 && RegNum > PRegNum)
+ ((isNotVFP && RegNum > PRegNum)
|| ((Size < 8 || Count < 16) && RegNum == PRegNum+1))) {
Offset += Size;
PRegNum = RegNum;
@@ -409,7 +401,7 @@ static inline bool isMatchingDecrement(MachineInstr *MI, unsigned Base,
return false;
// Make sure the offset fits in 8 bits.
- if (Bytes <= 0 || (Limit && Bytes >= Limit))
+ if (Bytes == 0 || (Limit && Bytes >= Limit))
return false;
unsigned Scale = (MI->getOpcode() == ARM::tSUBspi) ? 4 : 1; // FIXME
@@ -433,7 +425,7 @@ static inline bool isMatchingIncrement(MachineInstr *MI, unsigned Base,
MI->getOpcode() != ARM::ADDri)
return false;
- if (Bytes <= 0 || (Limit && Bytes >= Limit))
+ if (Bytes == 0 || (Limit && Bytes >= Limit))
// Make sure the offset fits in 8 bits.
return false;
@@ -464,12 +456,12 @@ static inline unsigned getLSMultipleTransferSize(MachineInstr *MI) {
case ARM::STM:
case ARM::t2LDM:
case ARM::t2STM:
- return (MI->getNumOperands() - 4) * 4;
case ARM::VLDMS:
case ARM::VSTMS:
+ return (MI->getNumOperands() - 4) * 4;
case ARM::VLDMD:
case ARM::VSTMD:
- return ARM_AM::getAM5Offset(MI->getOperand(1).getImm()) * 4;
+ return (MI->getNumOperands() - 4) * 8;
}
}
@@ -512,26 +504,17 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB,
ARMCC::CondCodes Pred = llvm::getInstrPredicate(MI, PredReg);
int Opcode = MI->getOpcode();
DebugLoc dl = MI->getDebugLoc();
- bool isAM4 = (Opcode == ARM::LDM || Opcode == ARM::t2LDM ||
- Opcode == ARM::STM || Opcode == ARM::t2STM);
bool DoMerge = false;
ARM_AM::AMSubMode Mode = ARM_AM::ia;
- unsigned Offset = 0;
- if (isAM4) {
- // Can't use an updating ld/st if the base register is also a dest
- // register. e.g. ldmdb r0!, {r0, r1, r2}. The behavior is undefined.
- for (unsigned i = 3, e = MI->getNumOperands(); i != e; ++i) {
- if (MI->getOperand(i).getReg() == Base)
- return false;
- }
- Mode = ARM_AM::getAM4SubMode(MI->getOperand(1).getImm());
- } else {
- // VLDM{D|S}, VSTM{D|S} addressing mode 5 ops.
- Mode = ARM_AM::getAM5SubMode(MI->getOperand(1).getImm());
- Offset = ARM_AM::getAM5Offset(MI->getOperand(1).getImm());
+ // Can't use an updating ld/st if the base register is also a dest
+ // register. e.g. ldmdb r0!, {r0, r1, r2}. The behavior is undefined.
+ for (unsigned i = 3, e = MI->getNumOperands(); i != e; ++i) {
+ if (MI->getOperand(i).getReg() == Base)
+ return false;
}
+ Mode = ARM_AM::getAM4SubMode(MI->getOperand(1).getImm());
// Try merging with the previous instruction.
MachineBasicBlock::iterator BeginMBBI = MBB.begin();
@@ -539,22 +522,14 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB,
MachineBasicBlock::iterator PrevMBBI = prior(MBBI);
while (PrevMBBI != BeginMBBI && PrevMBBI->isDebugValue())
--PrevMBBI;
- if (isAM4) {
- if (Mode == ARM_AM::ia &&
- isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) {
- DoMerge = true;
- Mode = ARM_AM::db;
- } else if (isAM4 && Mode == ARM_AM::ib &&
- isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) {
- DoMerge = true;
- Mode = ARM_AM::da;
- }
- } else {
- if (Mode == ARM_AM::ia &&
- isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) {
- Mode = ARM_AM::db;
- DoMerge = true;
- }
+ if (Mode == ARM_AM::ia &&
+ isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) {
+ Mode = ARM_AM::db;
+ DoMerge = true;
+ } else if (Mode == ARM_AM::ib &&
+ isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) {
+ Mode = ARM_AM::da;
+ DoMerge = true;
}
if (DoMerge)
MBB.erase(PrevMBBI);
@@ -566,19 +541,12 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB,
MachineBasicBlock::iterator NextMBBI = llvm::next(MBBI);
while (NextMBBI != EndMBBI && NextMBBI->isDebugValue())
++NextMBBI;
- if (isAM4) {
- if ((Mode == ARM_AM::ia || Mode == ARM_AM::ib) &&
- isMatchingIncrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) {
- DoMerge = true;
- } else if ((Mode == ARM_AM::da || Mode == ARM_AM::db) &&
- isMatchingDecrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) {
- DoMerge = true;
- }
- } else {
- if (Mode == ARM_AM::ia &&
- isMatchingIncrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) {
- DoMerge = true;
- }
+ if ((Mode == ARM_AM::ia || Mode == ARM_AM::ib) &&
+ isMatchingIncrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) {
+ DoMerge = true;
+ } else if ((Mode == ARM_AM::da || Mode == ARM_AM::db) &&
+ isMatchingDecrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) {
+ DoMerge = true;
}
if (DoMerge) {
if (NextMBBI == I) {
@@ -595,16 +563,9 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB,
unsigned NewOpc = getUpdatingLSMultipleOpcode(Opcode);
MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII->get(NewOpc))
.addReg(Base, getDefRegState(true)) // WB base register
- .addReg(Base, getKillRegState(BaseKill));
- if (isAM4) {
- // [t2]LDM_UPD, [t2]STM_UPD
- MIB.addImm(ARM_AM::getAM4ModeImm(Mode))
- .addImm(Pred).addReg(PredReg);
- } else {
- // VLDM[SD}_UPD, VSTM[SD]_UPD
- MIB.addImm(ARM_AM::getAM5Opc(Mode, Offset))
- .addImm(Pred).addReg(PredReg);
- }
+ .addReg(Base, getKillRegState(BaseKill))
+ .addImm(ARM_AM::getAM4ModeImm(Mode))
+ .addImm(Pred).addReg(PredReg);
// Transfer the rest of operands.
for (unsigned OpNum = 4, e = MI->getNumOperands(); OpNum != e; ++OpNum)
MIB.addOperand(MI->getOperand(OpNum));
@@ -736,11 +697,10 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineBasicBlock &MBB,
if (!DoMerge)
return false;
- bool isDPR = NewOpc == ARM::VLDMD || NewOpc == ARM::VSTMD;
unsigned Offset = 0;
if (isAM5)
- Offset = ARM_AM::getAM5Opc(AddSub == ARM_AM::sub ? ARM_AM::db : ARM_AM::ia,
- (isDPR ? 2 : 1));
+ Offset = ARM_AM::getAM4ModeImm(AddSub == ARM_AM::sub ?
+ ARM_AM::db : ARM_AM::ia);
else if (isAM2)
Offset = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift);
else
@@ -748,6 +708,9 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineBasicBlock &MBB,
if (isAM5) {
// VLDM[SD}_UPD, VSTM[SD]_UPD
+ // (There are no base-updating versions of VLDR/VSTR instructions, but the
+ // updating load/store-multiple instructions can be used with only one
+ // register.)
MachineOperand &MO = MI->getOperand(0);
BuildMI(MBB, MBBI, dl, TII->get(NewOpc))
.addReg(Base, getDefRegState(true)) // WB base register
@@ -1268,7 +1231,7 @@ bool ARMLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
namespace {
struct ARMPreAllocLoadStoreOpt : public MachineFunctionPass{
static char ID;
- ARMPreAllocLoadStoreOpt() : MachineFunctionPass(&ID) {}
+ ARMPreAllocLoadStoreOpt() : MachineFunctionPass(ID) {}
const TargetData *TD;
const TargetInstrInfo *TII;
diff --git a/lib/Target/ARM/AsmPrinter/ARMMCInstLower.cpp b/lib/Target/ARM/ARMMCInstLower.cpp
index ab2b06b60783..ab2b06b60783 100644
--- a/lib/Target/ARM/AsmPrinter/ARMMCInstLower.cpp
+++ b/lib/Target/ARM/ARMMCInstLower.cpp
diff --git a/lib/Target/ARM/AsmPrinter/ARMMCInstLower.h b/lib/Target/ARM/ARMMCInstLower.h
index b81a30690ce2..b81a30690ce2 100644
--- a/lib/Target/ARM/AsmPrinter/ARMMCInstLower.h
+++ b/lib/Target/ARM/ARMMCInstLower.h
diff --git a/lib/Target/ARM/ARMMachineFunctionInfo.h b/lib/Target/ARM/ARMMachineFunctionInfo.h
index 7e57a1ca5576..514c26b4daf0 100644
--- a/lib/Target/ARM/ARMMachineFunctionInfo.h
+++ b/lib/Target/ARM/ARMMachineFunctionInfo.h
@@ -43,6 +43,10 @@ class ARMFunctionInfo : public MachineFunctionInfo {
/// processFunctionBeforeCalleeSavedScan().
bool HasStackFrame;
+ /// RestoreSPFromFP - True if epilogue should restore SP from FP. Set by
+ /// emitPrologue.
+ bool RestoreSPFromFP;
+
/// LRSpilledForFarJump - True if the LR register has been for spilled to
/// enable far jump.
bool LRSpilledForFarJump;
@@ -95,7 +99,7 @@ public:
ARMFunctionInfo() :
isThumb(false),
hasThumb2(false),
- VarArgsRegSaveSize(0), HasStackFrame(false),
+ VarArgsRegSaveSize(0), HasStackFrame(false), RestoreSPFromFP(false),
LRSpilledForFarJump(false),
FramePtrSpillOffset(0), GPRCS1Offset(0), GPRCS2Offset(0), DPRCSOffset(0),
GPRCS1Size(0), GPRCS2Size(0), DPRCSSize(0),
@@ -106,7 +110,7 @@ public:
explicit ARMFunctionInfo(MachineFunction &MF) :
isThumb(MF.getTarget().getSubtarget<ARMSubtarget>().isThumb()),
hasThumb2(MF.getTarget().getSubtarget<ARMSubtarget>().hasThumb2()),
- VarArgsRegSaveSize(0), HasStackFrame(false),
+ VarArgsRegSaveSize(0), HasStackFrame(false), RestoreSPFromFP(false),
LRSpilledForFarJump(false),
FramePtrSpillOffset(0), GPRCS1Offset(0), GPRCS2Offset(0), DPRCSOffset(0),
GPRCS1Size(0), GPRCS2Size(0), DPRCSSize(0),
@@ -125,6 +129,9 @@ public:
bool hasStackFrame() const { return HasStackFrame; }
void setHasStackFrame(bool s) { HasStackFrame = s; }
+ bool shouldRestoreSPFromFP() const { return RestoreSPFromFP; }
+ void setShouldRestoreSPFromFP(bool s) { RestoreSPFromFP = s; }
+
bool isLRSpilledForFarJump() const { return LRSpilledForFarJump; }
void setLRIsSpilledForFarJump(bool s) { LRSpilledForFarJump = s; }
diff --git a/lib/Target/ARM/ARMRegisterInfo.td b/lib/Target/ARM/ARMRegisterInfo.td
index d020f3c74bde..305b232e6a99 100644
--- a/lib/Target/ARM/ARMRegisterInfo.td
+++ b/lib/Target/ARM/ARMRegisterInfo.td
@@ -1,4 +1,4 @@
-//===- ARMRegisterInfo.td - ARM Register defs -------------------*- C++ -*-===//
+//===- ARMRegisterInfo.td - ARM Register defs --------------*- tablegen -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -220,41 +220,11 @@ def GPR : RegisterClass<"ARM", [i32], 32, [R0, R1, R2, R3, R4, R5, R6,
iterator allocation_order_end(const MachineFunction &MF) const;
}];
let MethodBodies = [{
- // FP is R11, R9 is available.
- static const unsigned ARM_GPR_AO_1[] = {
+ static const unsigned ARM_GPR_AO[] = {
ARM::R0, ARM::R1, ARM::R2, ARM::R3,
ARM::R12,ARM::LR,
ARM::R4, ARM::R5, ARM::R6, ARM::R7,
- ARM::R8, ARM::R9, ARM::R10,
- ARM::R11 };
- // FP is R11, R9 is not available.
- static const unsigned ARM_GPR_AO_2[] = {
- ARM::R0, ARM::R1, ARM::R2, ARM::R3,
- ARM::R12,ARM::LR,
- ARM::R4, ARM::R5, ARM::R6, ARM::R7,
- ARM::R8, ARM::R10,
- ARM::R11 };
- // FP is R7, R9 is available as non-callee-saved register.
- // This is used by Darwin.
- static const unsigned ARM_GPR_AO_3[] = {
- ARM::R0, ARM::R1, ARM::R2, ARM::R3,
- ARM::R9, ARM::R12,ARM::LR,
- ARM::R4, ARM::R5, ARM::R6,
- ARM::R8, ARM::R10,ARM::R11,ARM::R7 };
- // FP is R7, R9 is not available.
- static const unsigned ARM_GPR_AO_4[] = {
- ARM::R0, ARM::R1, ARM::R2, ARM::R3,
- ARM::R12,ARM::LR,
- ARM::R4, ARM::R5, ARM::R6,
- ARM::R8, ARM::R10,ARM::R11,
- ARM::R7 };
- // FP is R7, R9 is available as callee-saved register.
- // This is used by non-Darwin platform in Thumb mode.
- static const unsigned ARM_GPR_AO_5[] = {
- ARM::R0, ARM::R1, ARM::R2, ARM::R3,
- ARM::R12,ARM::LR,
- ARM::R4, ARM::R5, ARM::R6,
- ARM::R8, ARM::R9, ARM::R10,ARM::R11,ARM::R7 };
+ ARM::R8, ARM::R9, ARM::R10, ARM::R11 };
// For Thumb1 mode, we don't want to allocate hi regs at all, as we
// don't know how to spill them. If we make our prologue/epilogue code
@@ -270,85 +240,71 @@ def GPR : RegisterClass<"ARM", [i32], 32, [R0, R1, R2, R3, R4, R5, R6,
const ARMSubtarget &Subtarget = TM.getSubtarget<ARMSubtarget>();
if (Subtarget.isThumb1Only())
return THUMB_GPR_AO;
- if (Subtarget.isTargetDarwin()) {
- if (Subtarget.isR9Reserved())
- return ARM_GPR_AO_4;
- else
- return ARM_GPR_AO_3;
- } else {
- if (Subtarget.isR9Reserved())
- return ARM_GPR_AO_2;
- else if (Subtarget.isThumb())
- return ARM_GPR_AO_5;
- else
- return ARM_GPR_AO_1;
- }
+ return ARM_GPR_AO;
}
GPRClass::iterator
GPRClass::allocation_order_end(const MachineFunction &MF) const {
const TargetMachine &TM = MF.getTarget();
- const TargetRegisterInfo *RI = TM.getRegisterInfo();
const ARMSubtarget &Subtarget = TM.getSubtarget<ARMSubtarget>();
- GPRClass::iterator I;
-
- if (Subtarget.isThumb1Only()) {
- I = THUMB_GPR_AO + (sizeof(THUMB_GPR_AO)/sizeof(unsigned));
- // Mac OS X requires FP not to be clobbered for backtracing purpose.
- return (Subtarget.isTargetDarwin() || RI->hasFP(MF)) ? I-1 : I;
- }
-
- if (Subtarget.isTargetDarwin()) {
- if (Subtarget.isR9Reserved())
- I = ARM_GPR_AO_4 + (sizeof(ARM_GPR_AO_4)/sizeof(unsigned));
- else
- I = ARM_GPR_AO_3 + (sizeof(ARM_GPR_AO_3)/sizeof(unsigned));
- } else {
- if (Subtarget.isR9Reserved())
- I = ARM_GPR_AO_2 + (sizeof(ARM_GPR_AO_2)/sizeof(unsigned));
- else if (Subtarget.isThumb())
- I = ARM_GPR_AO_5 + (sizeof(ARM_GPR_AO_5)/sizeof(unsigned));
- else
- I = ARM_GPR_AO_1 + (sizeof(ARM_GPR_AO_1)/sizeof(unsigned));
- }
-
- // Mac OS X requires FP not to be clobbered for backtracing purpose.
- return (Subtarget.isTargetDarwin() || RI->hasFP(MF)) ? I-1 : I;
+ if (Subtarget.isThumb1Only())
+ return THUMB_GPR_AO + (sizeof(THUMB_GPR_AO)/sizeof(unsigned));
+ return ARM_GPR_AO + (sizeof(ARM_GPR_AO)/sizeof(unsigned));
}
}];
}
-// Thumb registers are R0-R7 normally. Some instructions can still use
-// the general GPR register class above (MOV, e.g.)
-def tGPR : RegisterClass<"ARM", [i32], 32, [R0, R1, R2, R3, R4, R5, R6, R7]> {
+// restricted GPR register class. Many Thumb2 instructions allow the full
+// register range for operands, but have undefined behaviours when PC
+// or SP (R13 or R15) are used. The ARM ARM refers to these operands
+// via the BadReg() pseudo-code description.
+def rGPR : RegisterClass<"ARM", [i32], 32, [R0, R1, R2, R3, R4, R5, R6,
+ R7, R8, R9, R10, R11, R12, LR]> {
let MethodProtos = [{
iterator allocation_order_begin(const MachineFunction &MF) const;
iterator allocation_order_end(const MachineFunction &MF) const;
}];
let MethodBodies = [{
- static const unsigned THUMB_tGPR_AO[] = {
+ static const unsigned ARM_rGPR_AO[] = {
+ ARM::R0, ARM::R1, ARM::R2, ARM::R3,
+ ARM::R12,ARM::LR,
+ ARM::R4, ARM::R5, ARM::R6, ARM::R7,
+ ARM::R8, ARM::R9, ARM::R10,
+ ARM::R11 };
+
+ // For Thumb1 mode, we don't want to allocate hi regs at all, as we
+ // don't know how to spill them. If we make our prologue/epilogue code
+ // smarter at some point, we can go back to using the above allocation
+ // orders for the Thumb1 instructions that know how to use hi regs.
+ static const unsigned THUMB_rGPR_AO[] = {
ARM::R0, ARM::R1, ARM::R2, ARM::R3,
ARM::R4, ARM::R5, ARM::R6, ARM::R7 };
- // FP is R7, only low registers available.
- tGPRClass::iterator
- tGPRClass::allocation_order_begin(const MachineFunction &MF) const {
- return THUMB_tGPR_AO;
+ rGPRClass::iterator
+ rGPRClass::allocation_order_begin(const MachineFunction &MF) const {
+ const TargetMachine &TM = MF.getTarget();
+ const ARMSubtarget &Subtarget = TM.getSubtarget<ARMSubtarget>();
+ if (Subtarget.isThumb1Only())
+ return THUMB_rGPR_AO;
+ return ARM_rGPR_AO;
}
- tGPRClass::iterator
- tGPRClass::allocation_order_end(const MachineFunction &MF) const {
+ rGPRClass::iterator
+ rGPRClass::allocation_order_end(const MachineFunction &MF) const {
const TargetMachine &TM = MF.getTarget();
- const TargetRegisterInfo *RI = TM.getRegisterInfo();
const ARMSubtarget &Subtarget = TM.getSubtarget<ARMSubtarget>();
- tGPRClass::iterator I =
- THUMB_tGPR_AO + (sizeof(THUMB_tGPR_AO)/sizeof(unsigned));
- // Mac OS X requires FP not to be clobbered for backtracing purpose.
- return (Subtarget.isTargetDarwin() || RI->hasFP(MF)) ? I-1 : I;
+
+ if (Subtarget.isThumb1Only())
+ return THUMB_rGPR_AO + (sizeof(THUMB_rGPR_AO)/sizeof(unsigned));
+ return ARM_rGPR_AO + (sizeof(ARM_rGPR_AO)/sizeof(unsigned));
}
}];
}
+// Thumb registers are R0-R7 normally. Some instructions can still use
+// the general GPR register class above (MOV, e.g.)
+def tGPR : RegisterClass<"ARM", [i32], 32, [R0, R1, R2, R3, R4, R5, R6, R7]> {}
+
// For tail calls, we can't use callee-saved registers, as they are restored
// to the saved value before the tail call, which would clobber a call address.
// Note, getMinimalPhysRegClass(R0) returns tGPR because of the names of
@@ -381,36 +337,20 @@ def tcGPR : RegisterClass<"ARM", [i32], 32, [R0, R1, R2, R3, R9, R12]> {
const ARMSubtarget &Subtarget = TM.getSubtarget<ARMSubtarget>();
if (Subtarget.isThumb1Only())
return THUMB_GPR_AO_TC;
- if (Subtarget.isTargetDarwin()) {
- if (Subtarget.isR9Reserved())
- return ARM_GPR_NOR9_TC;
- else
- return ARM_GPR_R9_TC;
- } else
- // R9 is either callee-saved or reserved; can't use it.
- return ARM_GPR_NOR9_TC;
+ return Subtarget.isTargetDarwin() ? ARM_GPR_R9_TC : ARM_GPR_NOR9_TC;
}
tcGPRClass::iterator
tcGPRClass::allocation_order_end(const MachineFunction &MF) const {
const TargetMachine &TM = MF.getTarget();
const ARMSubtarget &Subtarget = TM.getSubtarget<ARMSubtarget>();
- GPRClass::iterator I;
-
- if (Subtarget.isThumb1Only()) {
- I = THUMB_GPR_AO_TC + (sizeof(THUMB_GPR_AO_TC)/sizeof(unsigned));
- return I;
- }
-
- if (Subtarget.isTargetDarwin()) {
- if (Subtarget.isR9Reserved())
- I = ARM_GPR_NOR9_TC + (sizeof(ARM_GPR_NOR9_TC)/sizeof(unsigned));
- else
- I = ARM_GPR_R9_TC + (sizeof(ARM_GPR_R9_TC)/sizeof(unsigned));
- } else
- // R9 is either callee-saved or reserved; can't use it.
- I = ARM_GPR_NOR9_TC + (sizeof(ARM_GPR_NOR9_TC)/sizeof(unsigned));
- return I;
+
+ if (Subtarget.isThumb1Only())
+ return THUMB_GPR_AO_TC + (sizeof(THUMB_GPR_AO_TC)/sizeof(unsigned));
+
+ return Subtarget.isTargetDarwin() ?
+ ARM_GPR_R9_TC + (sizeof(ARM_GPR_R9_TC)/sizeof(unsigned)) :
+ ARM_GPR_NOR9_TC + (sizeof(ARM_GPR_NOR9_TC)/sizeof(unsigned));
}
}];
}
diff --git a/lib/Target/ARM/ARMSubtarget.cpp b/lib/Target/ARM/ARMSubtarget.cpp
index 10fd257055fb..cb539f4c01ec 100644
--- a/lib/Target/ARM/ARMSubtarget.cpp
+++ b/lib/Target/ARM/ARMSubtarget.cpp
@@ -33,14 +33,19 @@ ARMSubtarget::ARMSubtarget(const std::string &TT, const std::string &FS,
, ARMFPUType(None)
, UseNEONForSinglePrecisionFP(false)
, SlowVMLx(false)
+ , SlowFPBrcc(false)
, IsThumb(isT)
, ThumbMode(Thumb1)
+ , NoARM(false)
, PostRAScheduler(false)
, IsR9Reserved(ReserveR9)
, UseMovt(UseMOVT)
, HasFP16(false)
, HasHardwareDivide(false)
, HasT2ExtractPack(false)
+ , HasDataBarrier(false)
+ , Pref32BitThumb(false)
+ , FPOnlySP(false)
, stackAlignment(4)
, CPUString("generic")
, TargetType(isELF) // Default to ELF unless otherwise specified.
diff --git a/lib/Target/ARM/ARMSubtarget.h b/lib/Target/ARM/ARMSubtarget.h
index e7d92ede9b98..67e58038ee77 100644
--- a/lib/Target/ARM/ARMSubtarget.h
+++ b/lib/Target/ARM/ARMSubtarget.h
@@ -26,7 +26,7 @@ class GlobalValue;
class ARMSubtarget : public TargetSubtarget {
protected:
enum ARMArchEnum {
- V4, V4T, V5T, V5TE, V6, V6T2, V7A, V7M
+ V4, V4T, V5T, V5TE, V6, V6M, V6T2, V7A, V7M
};
enum ARMFPEnum {
@@ -63,6 +63,9 @@ protected:
/// ThumbMode - Indicates supported Thumb version.
ThumbTypeEnum ThumbMode;
+ /// NoARM - True if subtarget does not support ARM mode execution.
+ bool NoARM;
+
/// PostRAScheduler - True if using post-register-allocation scheduler.
bool PostRAScheduler;
@@ -84,6 +87,18 @@ protected:
/// instructions.
bool HasT2ExtractPack;
+ /// HasDataBarrier - True if the subtarget supports DMB / DSB data barrier
+ /// instructions.
+ bool HasDataBarrier;
+
+ /// Pref32BitThumb - If true, codegen would prefer 32-bit Thumb instructions
+ /// over 16-bit ones.
+ bool Pref32BitThumb;
+
+ /// FPOnlySP - If true, the floating point unit only supports single
+ /// precision.
+ bool FPOnlySP;
+
/// stackAlignment - The minimum alignment known to hold of the stack frame on
/// entry to the function and which must be maintained by every function.
unsigned stackAlignment;
@@ -128,6 +143,8 @@ protected:
bool hasV6T2Ops() const { return ARMArchVersion >= V6T2; }
bool hasV7Ops() const { return ARMArchVersion >= V7A; }
+ bool hasARMOps() const { return !NoARM; }
+
bool hasVFP2() const { return ARMFPUType >= VFPv2; }
bool hasVFP3() const { return ARMFPUType >= VFPv3; }
bool hasNEON() const { return ARMFPUType >= NEON; }
@@ -135,8 +152,11 @@ protected:
return hasNEON() && UseNEONForSinglePrecisionFP; }
bool hasDivide() const { return HasHardwareDivide; }
bool hasT2ExtractPack() const { return HasT2ExtractPack; }
+ bool hasDataBarrier() const { return HasDataBarrier; }
bool useVMLx() const {return hasVFP2() && !SlowVMLx; }
bool isFPBrccSlow() const { return SlowFPBrcc; }
+ bool isFPOnlySP() const { return FPOnlySP; }
+ bool prefers32BitThumb() const { return Pref32BitThumb; }
bool hasFP16() const { return HasFP16; }
diff --git a/lib/Target/ARM/ARMTargetMachine.cpp b/lib/Target/ARM/ARMTargetMachine.cpp
index 09203f9304df..30ff8276cdaa 100644
--- a/lib/Target/ARM/ARMTargetMachine.cpp
+++ b/lib/Target/ARM/ARMTargetMachine.cpp
@@ -31,7 +31,6 @@ static MCAsmInfo *createMCAsmInfo(const Target &T, StringRef TT) {
}
}
-
extern "C" void LLVMInitializeARMTarget() {
// Register the target.
RegisterTargetMachine<ARMTargetMachine> X(TheARMTarget);
@@ -66,6 +65,9 @@ ARMTargetMachine::ARMTargetMachine(const Target &T, const std::string &TT,
"v128:64:128-v64:64:64-n32")),
TLInfo(*this),
TSInfo(*this) {
+ if (!Subtarget.hasARMOps())
+ report_fatal_error("CPU: '" + Subtarget.getCPUString() + "' does not "
+ "support ARM mode execution!");
}
ThumbTargetMachine::ThumbTargetMachine(const Target &T, const std::string &TT,
@@ -85,9 +87,15 @@ ThumbTargetMachine::ThumbTargetMachine(const Target &T, const std::string &TT,
TSInfo(*this) {
}
+// Pass Pipeline Configuration
+bool ARMBaseTargetMachine::addPreISel(PassManagerBase &PM,
+ CodeGenOpt::Level OptLevel) {
+ if (OptLevel != CodeGenOpt::None)
+ PM.add(createARMGlobalMergePass(getTargetLowering()));
+ return false;
+}
-// Pass Pipeline Configuration
bool ARMBaseTargetMachine::addInstSelector(PassManagerBase &PM,
CodeGenOpt::Level OptLevel) {
PM.add(createARMISelDag(*this, OptLevel));
@@ -132,7 +140,7 @@ bool ARMBaseTargetMachine::addPreSched2(PassManagerBase &PM,
bool ARMBaseTargetMachine::addPreEmitPass(PassManagerBase &PM,
CodeGenOpt::Level OptLevel) {
- if (Subtarget.isThumb2())
+ if (Subtarget.isThumb2() && !Subtarget.prefers32BitThumb())
PM.add(createThumb2SizeReductionPass());
PM.add(createARMConstantIslandPass());
diff --git a/lib/Target/ARM/ARMTargetMachine.h b/lib/Target/ARM/ARMTargetMachine.h
index a222e57b13ff..17e5425a9d37 100644
--- a/lib/Target/ARM/ARMTargetMachine.h
+++ b/lib/Target/ARM/ARMTargetMachine.h
@@ -50,6 +50,7 @@ public:
}
// Pass Pipeline Configuration
+ virtual bool addPreISel(PassManagerBase &PM, CodeGenOpt::Level OptLevel);
virtual bool addInstSelector(PassManagerBase &PM, CodeGenOpt::Level OptLevel);
virtual bool addPreRegAlloc(PassManagerBase &PM, CodeGenOpt::Level OptLevel);
virtual bool addPreSched2(PassManagerBase &PM, CodeGenOpt::Level OptLevel);
diff --git a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
index 4b083244b241..75e2a739bf1f 100644
--- a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
+++ b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
@@ -8,6 +8,7 @@
//===----------------------------------------------------------------------===//
#include "ARM.h"
+#include "ARMSubtarget.h"
#include "llvm/MC/MCParser/MCAsmLexer.h"
#include "llvm/MC/MCParser/MCAsmParser.h"
#include "llvm/MC/MCParser/MCParsedAsmOperand.h"
@@ -18,8 +19,10 @@
#include "llvm/Target/TargetAsmParser.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/SourceMgr.h"
+#include "llvm/Support/raw_ostream.h"
#include "llvm/ADT/OwningPtr.h"
#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringSwitch.h"
#include "llvm/ADT/Twine.h"
using namespace llvm;
@@ -37,6 +40,7 @@ enum ShiftType {
class ARMAsmParser : public TargetAsmParser {
MCAsmParser &Parser;
+ TargetMachine &TM;
private:
MCAsmParser &getParser() const { return Parser; }
@@ -76,26 +80,33 @@ private:
bool ParseDirectiveSyntax(SMLoc L);
- // TODO - For now hacked versions of the next two are in here in this file to
- // allow some parser testing until the table gen versions are implemented.
+ bool MatchInstruction(SMLoc IDLoc,
+ const SmallVectorImpl<MCParsedAsmOperand*> &Operands,
+ MCInst &Inst) {
+ if (!MatchInstructionImpl(Operands, Inst))
+ return false;
+
+ // FIXME: We should give nicer diagnostics about the exact failure.
+ Error(IDLoc, "unrecognized instruction");
+
+ return true;
+ }
/// @name Auto-generated Match Functions
/// {
- bool MatchInstruction(const SmallVectorImpl<MCParsedAsmOperand*> &Operands,
- MCInst &Inst);
- /// MatchRegisterName - Match the given string to a register name and return
- /// its register number, or -1 if there is no match. To allow return values
- /// to be used directly in register lists, arm registers have values between
- /// 0 and 15.
- int MatchRegisterName(StringRef Name);
+ unsigned ComputeAvailableFeatures(const ARMSubtarget *Subtarget) const;
+
+ bool MatchInstructionImpl(const SmallVectorImpl<MCParsedAsmOperand*>
+ &Operands,
+ MCInst &Inst);
/// }
public:
- ARMAsmParser(const Target &T, MCAsmParser &_Parser)
- : TargetAsmParser(T), Parser(_Parser) {}
+ ARMAsmParser(const Target &T, MCAsmParser &_Parser, TargetMachine &_TM)
+ : TargetAsmParser(T), Parser(_Parser), TM(_TM) {}
virtual bool ParseInstruction(StringRef Name, SMLoc NameLoc,
SmallVectorImpl<MCParsedAsmOperand*> &Operands);
@@ -110,16 +121,21 @@ private:
ARMOperand() {}
public:
enum KindTy {
- Token,
- Register,
+ CondCode,
Immediate,
- Memory
+ Memory,
+ Register,
+ Token
} Kind;
SMLoc StartLoc, EndLoc;
union {
struct {
+ ARMCC::CondCodes Val;
+ } CC;
+
+ struct {
const char *Data;
unsigned Length;
} Tok;
@@ -151,16 +167,19 @@ public:
};
- ARMOperand(KindTy K, SMLoc S, SMLoc E)
- : Kind(K), StartLoc(S), EndLoc(E) {}
+ //ARMOperand(KindTy K, SMLoc S, SMLoc E)
+ // : Kind(K), StartLoc(S), EndLoc(E) {}
ARMOperand(const ARMOperand &o) : MCParsedAsmOperand() {
Kind = o.Kind;
StartLoc = o.StartLoc;
EndLoc = o.EndLoc;
switch (Kind) {
+ case CondCode:
+ CC = o.CC;
+ break;
case Token:
- Tok = o.Tok;
+ Tok = o.Tok;
break;
case Register:
Reg = o.Reg;
@@ -179,6 +198,11 @@ public:
/// getEndLoc - Get the location of the last token of this operand.
SMLoc getEndLoc() const { return EndLoc; }
+ ARMCC::CondCodes getCondCode() const {
+ assert(Kind == CondCode && "Invalid access!");
+ return CC.Val;
+ }
+
StringRef getToken() const {
assert(Kind == Token && "Invalid access!");
return StringRef(Tok.Data, Tok.Length);
@@ -194,15 +218,50 @@ public:
return Imm.Val;
}
- bool isToken() const {return Kind == Token; }
+ bool isCondCode() const { return Kind == CondCode; }
+
+ bool isImm() const { return Kind == Immediate; }
bool isReg() const { return Kind == Register; }
+ bool isToken() const {return Kind == Token; }
+
+ void addExpr(MCInst &Inst, const MCExpr *Expr) const {
+ // Add as immediates when possible.
+ if (const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Expr))
+ Inst.addOperand(MCOperand::CreateImm(CE->getValue()));
+ else
+ Inst.addOperand(MCOperand::CreateExpr(Expr));
+ }
+
+ void addCondCodeOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 2 && "Invalid number of operands!");
+ Inst.addOperand(MCOperand::CreateImm(unsigned(getCondCode())));
+ // FIXME: What belongs here?
+ Inst.addOperand(MCOperand::CreateReg(0));
+ }
+
void addRegOperands(MCInst &Inst, unsigned N) const {
assert(N == 1 && "Invalid number of operands!");
Inst.addOperand(MCOperand::CreateReg(getReg()));
}
+ void addImmOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ addExpr(Inst, getImm());
+ }
+
+ virtual void dump(raw_ostream &OS) const;
+
+ static void CreateCondCode(OwningPtr<ARMOperand> &Op, ARMCC::CondCodes CC,
+ SMLoc S) {
+ Op.reset(new ARMOperand);
+ Op->Kind = CondCode;
+ Op->CC.Val = CC;
+ Op->StartLoc = S;
+ Op->EndLoc = S;
+ }
+
static void CreateToken(OwningPtr<ARMOperand> &Op, StringRef Str,
SMLoc S) {
Op.reset(new ARMOperand);
@@ -262,6 +321,33 @@ public:
} // end anonymous namespace.
+void ARMOperand::dump(raw_ostream &OS) const {
+ switch (Kind) {
+ case CondCode:
+ OS << ARMCondCodeToString(getCondCode());
+ break;
+ case Immediate:
+ getImm()->print(OS);
+ break;
+ case Memory:
+ OS << "<memory>";
+ break;
+ case Register:
+ OS << "<register " << getReg() << ">";
+ break;
+ case Token:
+ OS << "'" << getToken() << "'";
+ break;
+ }
+}
+
+/// @name Auto-generated Match Functions
+/// {
+
+static unsigned MatchRegisterName(StringRef Name);
+
+/// }
+
/// Try to parse a register name. The token must be an Identifier when called,
/// and if it is a register name a Reg operand is created, the token is eaten
/// and false is returned. Else true is returned and no token is eaten.
@@ -548,77 +634,6 @@ bool ARMAsmParser::ParseShift(ShiftType &St,
return false;
}
-/// A hack to allow some testing, to be replaced by a real table gen version.
-int ARMAsmParser::MatchRegisterName(StringRef Name) {
- if (Name == "r0" || Name == "R0")
- return 0;
- else if (Name == "r1" || Name == "R1")
- return 1;
- else if (Name == "r2" || Name == "R2")
- return 2;
- else if (Name == "r3" || Name == "R3")
- return 3;
- else if (Name == "r3" || Name == "R3")
- return 3;
- else if (Name == "r4" || Name == "R4")
- return 4;
- else if (Name == "r5" || Name == "R5")
- return 5;
- else if (Name == "r6" || Name == "R6")
- return 6;
- else if (Name == "r7" || Name == "R7")
- return 7;
- else if (Name == "r8" || Name == "R8")
- return 8;
- else if (Name == "r9" || Name == "R9")
- return 9;
- else if (Name == "r10" || Name == "R10")
- return 10;
- else if (Name == "r11" || Name == "R11" || Name == "fp")
- return 11;
- else if (Name == "r12" || Name == "R12" || Name == "ip")
- return 12;
- else if (Name == "r13" || Name == "R13" || Name == "sp")
- return 13;
- else if (Name == "r14" || Name == "R14" || Name == "lr")
- return 14;
- else if (Name == "r15" || Name == "R15" || Name == "pc")
- return 15;
- return -1;
-}
-
-/// A hack to allow some testing, to be replaced by a real table gen version.
-bool ARMAsmParser::
-MatchInstruction(const SmallVectorImpl<MCParsedAsmOperand*> &Operands,
- MCInst &Inst) {
- ARMOperand &Op0 = *(ARMOperand*)Operands[0];
- assert(Op0.Kind == ARMOperand::Token && "First operand not a Token");
- StringRef Mnemonic = Op0.getToken();
- if (Mnemonic == "add" ||
- Mnemonic == "stmfd" ||
- Mnemonic == "str" ||
- Mnemonic == "ldmfd" ||
- Mnemonic == "ldr" ||
- Mnemonic == "mov" ||
- Mnemonic == "sub" ||
- Mnemonic == "bl" ||
- Mnemonic == "push" ||
- Mnemonic == "blx" ||
- Mnemonic == "pop") {
- // Hard-coded to a valid instruction, till we have a real matcher.
- Inst = MCInst();
- Inst.setOpcode(ARM::MOVr);
- Inst.addOperand(MCOperand::CreateReg(2));
- Inst.addOperand(MCOperand::CreateReg(2));
- Inst.addOperand(MCOperand::CreateImm(0));
- Inst.addOperand(MCOperand::CreateImm(0));
- Inst.addOperand(MCOperand::CreateReg(0));
- return false;
- }
-
- return true;
-}
-
/// Parse a arm instruction operand. For now this parses the operand regardless
/// of the mnemonic.
bool ARMAsmParser::ParseOperand(OwningPtr<ARMOperand> &Op) {
@@ -661,12 +676,56 @@ bool ARMAsmParser::ParseOperand(OwningPtr<ARMOperand> &Op) {
bool ARMAsmParser::ParseInstruction(StringRef Name, SMLoc NameLoc,
SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
OwningPtr<ARMOperand> Op;
- ARMOperand::CreateToken(Op, Name, NameLoc);
-
+
+ // Create the leading tokens for the mnemonic, split by '.' characters.
+ size_t Start = 0, Next = Name.find('.');
+ StringRef Head = Name.slice(Start, Next);
+
+ // Determine the predicate, if any.
+ //
+ // FIXME: We need a way to check whether a prefix supports predication,
+ // otherwise we will end up with an ambiguity for instructions that happen to
+ // end with a predicate name.
+ unsigned CC = StringSwitch<unsigned>(Head.substr(Head.size()-2))
+ .Case("eq", ARMCC::EQ)
+ .Case("ne", ARMCC::NE)
+ .Case("hs", ARMCC::HS)
+ .Case("lo", ARMCC::LO)
+ .Case("mi", ARMCC::MI)
+ .Case("pl", ARMCC::PL)
+ .Case("vs", ARMCC::VS)
+ .Case("vc", ARMCC::VC)
+ .Case("hi", ARMCC::HI)
+ .Case("ls", ARMCC::LS)
+ .Case("ge", ARMCC::GE)
+ .Case("lt", ARMCC::LT)
+ .Case("gt", ARMCC::GT)
+ .Case("le", ARMCC::LE)
+ .Case("al", ARMCC::AL)
+ .Default(~0U);
+ if (CC != ~0U) {
+ Head = Head.slice(0, Head.size() - 2);
+ } else
+ CC = ARMCC::AL;
+
+ ARMOperand::CreateToken(Op, Head, NameLoc);
Operands.push_back(Op.take());
- if (getLexer().isNot(AsmToken::EndOfStatement)) {
+ ARMOperand::CreateCondCode(Op, ARMCC::CondCodes(CC), NameLoc);
+ Operands.push_back(Op.take());
+
+ // Add the remaining tokens in the mnemonic.
+ while (Next != StringRef::npos) {
+ Start = Next;
+ Next = Name.find('.', Start + 1);
+ Head = Name.slice(Start, Next);
+ ARMOperand::CreateToken(Op, Head, NameLoc);
+ Operands.push_back(Op.take());
+ }
+
+ // Read the remaining operands.
+ if (getLexer().isNot(AsmToken::EndOfStatement)) {
// Read the first operand.
OwningPtr<ARMOperand> Op;
if (ParseOperand(Op)) return true;
@@ -809,3 +868,5 @@ extern "C" void LLVMInitializeARMAsmParser() {
RegisterAsmParser<ARMAsmParser> Y(TheThumbTarget);
LLVMInitializeARMAsmLexer();
}
+
+#include "ARMGenAsmMatcher.inc"
diff --git a/lib/Target/ARM/AsmPrinter/ARMInstPrinter.cpp b/lib/Target/ARM/AsmPrinter/ARMInstPrinter.cpp
index edc934549b28..8026e7718ca9 100644
--- a/lib/Target/ARM/AsmPrinter/ARMInstPrinter.cpp
+++ b/lib/Target/ARM/AsmPrinter/ARMInstPrinter.cpp
@@ -158,7 +158,7 @@ void ARMInstPrinter::printInst(const MCInst *MI, raw_ostream &O) {
if ((MI->getOpcode() == ARM::VSTMS_UPD || MI->getOpcode() ==ARM::VSTMD_UPD) &&
MI->getOperand(0).getReg() == ARM::SP) {
const MCOperand &MO1 = MI->getOperand(2);
- if (ARM_AM::getAM5SubMode(MO1.getImm()) == ARM_AM::db) {
+ if (ARM_AM::getAM4SubMode(MO1.getImm()) == ARM_AM::db) {
O << '\t' << "vpush";
printPredicateOperand(MI, 3, O);
O << '\t';
@@ -171,7 +171,7 @@ void ARMInstPrinter::printInst(const MCInst *MI, raw_ostream &O) {
if ((MI->getOpcode() == ARM::VLDMS_UPD || MI->getOpcode() ==ARM::VLDMD_UPD) &&
MI->getOperand(0).getReg() == ARM::SP) {
const MCOperand &MO1 = MI->getOperand(2);
- if (ARM_AM::getAM5SubMode(MO1.getImm()) == ARM_AM::ia) {
+ if (ARM_AM::getAM4SubMode(MO1.getImm()) == ARM_AM::ia) {
O << '\t' << "vpop";
printPredicateOperand(MI, 3, O);
O << '\t';
@@ -278,15 +278,13 @@ void ARMInstPrinter::printSORegOperand(const MCInst *MI, unsigned OpNum,
O << getRegisterName(MO1.getReg());
// Print the shift opc.
- O << ", "
- << ARM_AM::getShiftOpcStr(ARM_AM::getSORegShOp(MO3.getImm()))
- << ' ';
-
+ ARM_AM::ShiftOpc ShOpc = ARM_AM::getSORegShOp(MO3.getImm());
+ O << ", " << ARM_AM::getShiftOpcStr(ShOpc);
if (MO2.getReg()) {
- O << getRegisterName(MO2.getReg());
+ O << ' ' << getRegisterName(MO2.getReg());
assert(ARM_AM::getSORegOffset(MO3.getImm()) == 0);
- } else {
- O << "#" << ARM_AM::getSORegOffset(MO3.getImm());
+ } else if (ShOpc != ARM_AM::rrx) {
+ O << " #" << ARM_AM::getSORegOffset(MO3.getImm());
}
}
@@ -414,16 +412,6 @@ void ARMInstPrinter::printAddrMode5Operand(const MCInst *MI, unsigned OpNum,
return;
}
- if (Modifier && strcmp(Modifier, "submode") == 0) {
- ARM_AM::AMSubMode Mode = ARM_AM::getAM5SubMode(MO2.getImm());
- O << ARM_AM::getAMSubModeStr(Mode);
- return;
- } else if (Modifier && strcmp(Modifier, "base") == 0) {
- // Used for FSTM{D|S} and LSTM{D|S} operations.
- O << getRegisterName(MO1.getReg());
- return;
- }
-
O << "[" << getRegisterName(MO1.getReg());
if (unsigned ImmOffs = ARM_AM::getAM5Offset(MO2.getImm())) {
@@ -463,9 +451,9 @@ void ARMInstPrinter::printAddrModePCOperand(const MCInst *MI, unsigned OpNum,
assert(0 && "FIXME: Implement printAddrModePCOperand");
}
-void ARMInstPrinter::printBitfieldInvMaskImmOperand (const MCInst *MI,
- unsigned OpNum,
- raw_ostream &O) {
+void ARMInstPrinter::printBitfieldInvMaskImmOperand(const MCInst *MI,
+ unsigned OpNum,
+ raw_ostream &O) {
const MCOperand &MO = MI->getOperand(OpNum);
uint32_t v = ~MO.getImm();
int32_t lsb = CountTrailingZeros_32(v);
@@ -474,6 +462,31 @@ void ARMInstPrinter::printBitfieldInvMaskImmOperand (const MCInst *MI,
O << '#' << lsb << ", #" << width;
}
+void ARMInstPrinter::printMemBOption(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O) {
+ unsigned val = MI->getOperand(OpNum).getImm();
+ O << ARM_MB::MemBOptToString(val);
+}
+
+void ARMInstPrinter::printShiftImmOperand(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O) {
+ unsigned ShiftOp = MI->getOperand(OpNum).getImm();
+ ARM_AM::ShiftOpc Opc = ARM_AM::getSORegShOp(ShiftOp);
+ switch (Opc) {
+ case ARM_AM::no_shift:
+ return;
+ case ARM_AM::lsl:
+ O << ", lsl #";
+ break;
+ case ARM_AM::asr:
+ O << ", asr #";
+ break;
+ default:
+ assert(0 && "unexpected shift opcode for shift immediate operand");
+ }
+ O << ARM_AM::getSORegOffset(ShiftOp);
+}
+
void ARMInstPrinter::printRegisterList(const MCInst *MI, unsigned OpNum,
raw_ostream &O) {
O << "{";
@@ -669,12 +682,11 @@ void ARMInstPrinter::printT2SOOperand(const MCInst *MI, unsigned OpNum,
O << getRegisterName(Reg);
// Print the shift opc.
- O << ", "
- << ARM_AM::getShiftOpcStr(ARM_AM::getSORegShOp(MO2.getImm()))
- << " ";
-
assert(MO2.isImm() && "Not a valid t2_so_reg value!");
- O << "#" << ARM_AM::getSORegOffset(MO2.getImm());
+ ARM_AM::ShiftOpc ShOpc = ARM_AM::getSORegShOp(MO2.getImm());
+ O << ", " << ARM_AM::getShiftOpcStr(ShOpc);
+ if (ShOpc != ARM_AM::rrx)
+ O << " #" << ARM_AM::getSORegOffset(MO2.getImm());
}
void ARMInstPrinter::printT2AddrModeImm12Operand(const MCInst *MI,
diff --git a/lib/Target/ARM/AsmPrinter/ARMInstPrinter.h b/lib/Target/ARM/AsmPrinter/ARMInstPrinter.h
index ddf5047793d2..e5ad0d07e9ba 100644
--- a/lib/Target/ARM/AsmPrinter/ARMInstPrinter.h
+++ b/lib/Target/ARM/AsmPrinter/ARMInstPrinter.h
@@ -57,6 +57,8 @@ public:
void printBitfieldInvMaskImmOperand(const MCInst *MI, unsigned OpNum,
raw_ostream &O);
+ void printMemBOption(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+ void printShiftImmOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
void printThumbS4ImmOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
void printThumbITMask(const MCInst *MI, unsigned OpNum, raw_ostream &O);
diff --git a/lib/Target/ARM/AsmPrinter/CMakeLists.txt b/lib/Target/ARM/AsmPrinter/CMakeLists.txt
index 4e299f86ecb6..18645c0864a3 100644
--- a/lib/Target/ARM/AsmPrinter/CMakeLists.txt
+++ b/lib/Target/ARM/AsmPrinter/CMakeLists.txt
@@ -1,8 +1,6 @@
include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. )
add_llvm_library(LLVMARMAsmPrinter
- ARMAsmPrinter.cpp
ARMInstPrinter.cpp
- ARMMCInstLower.cpp
)
add_dependencies(LLVMARMAsmPrinter ARMCodeGenTable_gen)
diff --git a/lib/Target/ARM/CMakeLists.txt b/lib/Target/ARM/CMakeLists.txt
index 0df34666b959..6b4dee5965d2 100644
--- a/lib/Target/ARM/CMakeLists.txt
+++ b/lib/Target/ARM/CMakeLists.txt
@@ -7,25 +7,32 @@ tablegen(ARMGenInstrNames.inc -gen-instr-enums)
tablegen(ARMGenInstrInfo.inc -gen-instr-desc)
tablegen(ARMGenCodeEmitter.inc -gen-emitter)
tablegen(ARMGenAsmWriter.inc -gen-asm-writer)
+tablegen(ARMGenAsmMatcher.inc -gen-asm-matcher)
tablegen(ARMGenDAGISel.inc -gen-dag-isel)
+tablegen(ARMGenFastISel.inc -gen-fast-isel)
tablegen(ARMGenCallingConv.inc -gen-callingconv)
tablegen(ARMGenSubtarget.inc -gen-subtarget)
tablegen(ARMGenEDInfo.inc -gen-enhanced-disassembly-info)
add_llvm_target(ARMCodeGen
+ ARMAsmPrinter.cpp
ARMBaseInstrInfo.cpp
ARMBaseRegisterInfo.cpp
ARMCodeEmitter.cpp
ARMConstantIslandPass.cpp
ARMConstantPoolValue.cpp
ARMExpandPseudoInsts.cpp
+ ARMFastISel.cpp
+ ARMGlobalMerge.cpp
ARMISelDAGToDAG.cpp
ARMISelLowering.cpp
ARMInstrInfo.cpp
ARMJITInfo.cpp
ARMLoadStoreOptimizer.cpp
ARMMCAsmInfo.cpp
+ ARMMCInstLower.cpp
ARMRegisterInfo.cpp
+ ARMSelectionDAGInfo.cpp
ARMSubtarget.cpp
ARMTargetMachine.cpp
ARMTargetObjectFile.cpp
@@ -38,7 +45,6 @@ add_llvm_target(ARMCodeGen
Thumb2InstrInfo.cpp
Thumb2RegisterInfo.cpp
Thumb2SizeReduction.cpp
- ARMSelectionDAGInfo.cpp
)
-target_link_libraries (LLVMARMCodeGen LLVMSelectionDAG)
+target_link_libraries (LLVMARMCodeGen LLVMARMAsmPrinter LLVMSelectionDAG)
diff --git a/lib/Target/ARM/Disassembler/ARMDisassembler.cpp b/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
index 4de697e8bf67..e22028985b46 100644
--- a/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
+++ b/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
@@ -26,6 +26,8 @@
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
+//#define DEBUG(X) do { X; } while (0)
+
/// ARMGenDecoderTables.inc - ARMDecoderTables.inc is tblgen'ed from
/// ARMDecoderEmitter.cpp TableGen backend. It contains:
///
@@ -87,6 +89,11 @@ static unsigned decodeARMInstruction(uint32_t &insn) {
return ARM::BFI;
}
+ // Ditto for STRBT, which is a super-instruction for A8.6.199 Encoding A1 & A2.
+ // As a result, the decoder fails to deocode USAT properly.
+ if (slice(insn, 27, 21) == 0x37 && slice(insn, 5, 4) == 1)
+ return ARM::USAT;
+
// Ditto for ADDSrs, which is a super-instruction for A8.6.7 & A8.6.8.
// As a result, the decoder fails to decode UMULL properly.
if (slice(insn, 27, 21) == 0x04 && slice(insn, 7, 4) == 9) {
@@ -106,7 +113,7 @@ static unsigned decodeARMInstruction(uint32_t &insn) {
// Ditto for STRT, which is a super-instruction for A8.6.210 Encoding A1 & A2.
// As a result, the decoder fails to deocode SSAT properly.
if (slice(insn, 27, 21) == 0x35 && slice(insn, 5, 4) == 1)
- return slice(insn, 6, 6) == 0 ? ARM::SSATlsl : ARM::SSATasr;
+ return ARM::SSAT;
// Ditto for RSCrs, which is a super-instruction for A8.6.146 & A8.6.147.
// As a result, the decoder fails to decode STRHT/LDRHT/LDRSHT/LDRSBT.
@@ -291,7 +298,7 @@ static unsigned T2Morph2LoadLiteral(unsigned Opcode) {
/// decodeInstruction(insn) is invoked on the original insn.
///
/// Otherwise, decodeThumbInstruction is called with the original insn.
-static unsigned decodeThumbSideEffect(bool IsThumb2, uint32_t &insn) {
+static unsigned decodeThumbSideEffect(bool IsThumb2, unsigned &insn) {
if (IsThumb2) {
uint16_t op1 = slice(insn, 28, 27);
uint16_t op2 = slice(insn, 26, 20);
@@ -429,7 +436,7 @@ bool ThumbDisassembler::getInstruction(MCInst &MI,
// passed to decodeThumbInstruction(). For 16-bit Thumb instruction, the top
// halfword of insn is 0x00 0x00; otherwise, the first halfword is moved to
// the top half followed by the second halfword.
- uint32_t insn = 0;
+ unsigned insn = 0;
// Possible second halfword.
uint16_t insn1 = 0;
diff --git a/lib/Target/ARM/Disassembler/ARMDisassemblerCore.cpp b/lib/Target/ARM/Disassembler/ARMDisassemblerCore.cpp
index a07ff2832aa7..9f493b9aee02 100644
--- a/lib/Target/ARM/Disassembler/ARMDisassemblerCore.cpp
+++ b/lib/Target/ARM/Disassembler/ARMDisassemblerCore.cpp
@@ -20,6 +20,8 @@
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
+//#define DEBUG(X) do { X; } while (0)
+
/// ARMGenInstrInfo.inc - ARMGenInstrInfo.inc contains the static const
/// TargetInstrDesc ARMInsts[] definition and the TargetOperandInfo[]'s
/// describing the operand info for each ARMInsts[i].
@@ -93,6 +95,9 @@ static unsigned getRegisterEnum(BO B, unsigned RegClassID, unsigned RawRegister,
RegClassID = ARM::DPRRegClassID;
}
+ // For this purpose, we can treat rGPR as if it were GPR.
+ if (RegClassID == ARM::rGPRRegClassID) RegClassID = ARM::GPRRegClassID;
+
// See also decodeNEONRd(), decodeNEONRn(), decodeNEONRm().
unsigned RegNum =
RegClassID == ARM::QPRRegClassID ? RawRegister >> 1 : RawRegister;
@@ -451,12 +456,23 @@ static inline ARM_AM::ShiftOpc getShiftOpcForBits(unsigned bits) {
//
// A8-11: DecodeImmShift()
static inline void getImmShiftSE(ARM_AM::ShiftOpc &ShOp, unsigned &ShImm) {
- // If type == 0b11 and imm5 == 0, we have an rrx, instead.
- if (ShOp == ARM_AM::ror && ShImm == 0)
- ShOp = ARM_AM::rrx;
- // If (lsr or asr) and imm5 == 0, shift amount is 32.
- if ((ShOp == ARM_AM::lsr || ShOp == ARM_AM::asr) && ShImm == 0)
+ if (ShImm != 0)
+ return;
+ switch (ShOp) {
+ case ARM_AM::no_shift:
+ case ARM_AM::rrx:
+ break;
+ case ARM_AM::lsl:
+ ShOp = ARM_AM::no_shift;
+ break;
+ case ARM_AM::lsr:
+ case ARM_AM::asr:
ShImm = 32;
+ break;
+ case ARM_AM::ror:
+ ShOp = ARM_AM::rrx;
+ break;
+ }
}
// getAMSubModeForBits - getAMSubModeForBits translates from the ARM encoding
@@ -490,9 +506,6 @@ static inline ARM_AM::AMSubMode getAMSubModeForBits(unsigned bits) {
static bool DisassemblePseudo(MCInst &MI, unsigned Opcode, uint32_t insn,
unsigned short NumOps, unsigned &NumOpsAdded, BO) {
- if (Opcode == ARM::Int_MemBarrierV7 || Opcode == ARM::Int_SyncBarrierV7)
- return true;
-
assert(0 && "Unexpected pseudo instruction!");
return false;
}
@@ -887,7 +900,6 @@ static bool DisassembleBrMiscFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
return true;
}
- assert(0 && "Unexpected BrMiscFrm Opcode");
return false;
}
@@ -906,34 +918,6 @@ static inline bool getBFCInvMask(uint32_t insn, uint32_t &mask) {
return true;
}
-static inline bool SaturateOpcode(unsigned Opcode) {
- switch (Opcode) {
- case ARM::SSATlsl: case ARM::SSATasr: case ARM::SSAT16:
- case ARM::USATlsl: case ARM::USATasr: case ARM::USAT16:
- return true;
- default:
- return false;
- }
-}
-
-static inline unsigned decodeSaturatePos(unsigned Opcode, uint32_t insn) {
- switch (Opcode) {
- case ARM::SSATlsl:
- case ARM::SSATasr:
- return slice(insn, 20, 16) + 1;
- case ARM::SSAT16:
- return slice(insn, 19, 16) + 1;
- case ARM::USATlsl:
- case ARM::USATasr:
- return slice(insn, 20, 16);
- case ARM::USAT16:
- return slice(insn, 19, 16);
- default:
- assert(0 && "Invalid opcode passed in");
- return 0;
- }
-}
-
// A major complication is the fact that some of the saturating add/subtract
// operations have Rd Rm Rn, instead of the "normal" Rd Rn Rm.
// They are QADD, QDADD, QDSUB, and QSUB.
@@ -959,40 +943,14 @@ static bool DisassembleDPFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
if (OpIdx >= NumOps)
return false;
- // SSAT/SSAT16/USAT/USAT16 has imm operand after Rd.
- if (SaturateOpcode(Opcode)) {
- MI.addOperand(MCOperand::CreateImm(decodeSaturatePos(Opcode, insn)));
-
- MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
- decodeRm(insn))));
-
- if (Opcode == ARM::SSAT16 || Opcode == ARM::USAT16) {
- OpIdx += 2;
- return true;
- }
-
- // For SSAT operand reg (Rm) has been disassembled above.
- // Now disassemble the shift amount.
-
- // Inst{11-7} encodes the imm5 shift amount.
- unsigned ShAmt = slice(insn, 11, 7);
-
- // A8.6.183. Possible ASR shift amount of 32...
- if (Opcode == ARM::SSATasr && ShAmt == 0)
- ShAmt = 32;
-
- MI.addOperand(MCOperand::CreateImm(ShAmt));
-
- OpIdx += 3;
- return true;
- }
-
// Special-case handling of BFC/BFI/SBFX/UBFX.
if (Opcode == ARM::BFC || Opcode == ARM::BFI) {
- // TIED_TO operand skipped for BFC and Inst{3-0} (Reg) for BFI.
- MI.addOperand(MCOperand::CreateReg(Opcode == ARM::BFC ? 0
- : getRegisterEnum(B, ARM::GPRRegClassID,
+ MI.addOperand(MCOperand::CreateReg(0));
+ if (Opcode == ARM::BFI) {
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
decodeRm(insn))));
+ ++OpIdx;
+ }
uint32_t mask = 0;
if (!getBFCInvMask(insn, mask))
return false;
@@ -1498,13 +1456,55 @@ static bool DisassembleArithMiscFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
&& !OpInfo[OpIdx].isPredicate() && !OpInfo[OpIdx].isOptionalDef()) {
// Extract the 5-bit immediate field Inst{11-7}.
unsigned ShiftAmt = (insn >> ARMII::ShiftShift) & 0x1F;
- MI.addOperand(MCOperand::CreateImm(ShiftAmt));
+ ARM_AM::ShiftOpc Opc = ARM_AM::no_shift;
+ if (Opcode == ARM::PKHBT)
+ Opc = ARM_AM::lsl;
+ else if (Opcode == ARM::PKHBT)
+ Opc = ARM_AM::asr;
+ getImmShiftSE(Opc, ShiftAmt);
+ MI.addOperand(MCOperand::CreateImm(ARM_AM::getSORegOpc(Opc, ShiftAmt)));
++OpIdx;
}
return true;
}
+/// DisassembleSatFrm - Disassemble saturate instructions:
+/// SSAT, SSAT16, USAT, and USAT16.
+static bool DisassembleSatFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
+ unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
+
+ const TargetInstrDesc &TID = ARMInsts[Opcode];
+ NumOpsAdded = TID.getNumOperands() - 2; // ignore predicate operands
+
+ // Disassemble register def.
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+ decodeRd(insn))));
+
+ unsigned Pos = slice(insn, 20, 16);
+ if (Opcode == ARM::SSAT || Opcode == ARM::SSAT16)
+ Pos += 1;
+ MI.addOperand(MCOperand::CreateImm(Pos));
+
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+ decodeRm(insn))));
+
+ if (NumOpsAdded == 4) {
+ ARM_AM::ShiftOpc Opc = (slice(insn, 6, 6) != 0 ? ARM_AM::asr : ARM_AM::lsl);
+ // Inst{11-7} encodes the imm5 shift amount.
+ unsigned ShAmt = slice(insn, 11, 7);
+ if (ShAmt == 0) {
+ // A8.6.183. Possible ASR shift amount of 32...
+ if (Opc == ARM_AM::asr)
+ ShAmt = 32;
+ else
+ Opc = ARM_AM::no_shift;
+ }
+ MI.addOperand(MCOperand::CreateImm(ARM_AM::getSORegOpc(Opc, ShAmt)));
+ }
+ return true;
+}
+
// Extend instructions.
// SXT* and UXT*: Rd [Rn] Rm [rot_imm].
// The 2nd operand register is Rn and the 3rd operand regsiter is Rm for the
@@ -1863,7 +1863,7 @@ static bool DisassembleVFPLdStFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
assert(NumOps >= 3 && "VFPLdStFrm expects NumOps >= 3");
- bool isSPVFP = (Opcode == ARM::VLDRS || Opcode == ARM::VSTRS) ? true : false;
+ bool isSPVFP = (Opcode == ARM::VLDRS || Opcode == ARM::VSTRS);
unsigned RegClassID = isSPVFP ? ARM::SPRRegClassID : ARM::DPRRegClassID;
// Extract Dd/Sd for operand 0.
@@ -1886,7 +1886,7 @@ static bool DisassembleVFPLdStFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
// VFP Load/Store Multiple Instructions.
// This is similar to the algorithm for LDM/STM in that operand 0 (the base) and
-// operand 1 (the AM5 mode imm) is followed by two predicate operands. It is
+// operand 1 (the AM4 mode imm) is followed by two predicate operands. It is
// followed by a reglist of either DPR(s) or SPR(s).
//
// VLDMD[_UPD], VLDMS[_UPD], VSTMD[_UPD], VSTMS[_UPD]
@@ -1910,16 +1910,14 @@ static bool DisassembleVFPLdStMulFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
MI.addOperand(MCOperand::CreateReg(Base));
- // Next comes the AM5 Opcode.
+ // Next comes the AM4 Opcode.
ARM_AM::AMSubMode SubMode = getAMSubModeForBits(getPUBits(insn));
// Must be either "ia" or "db" submode.
if (SubMode != ARM_AM::ia && SubMode != ARM_AM::db) {
- DEBUG(errs() << "Illegal addressing mode 5 sub-mode!\n");
+ DEBUG(errs() << "Illegal addressing mode 4 sub-mode!\n");
return false;
}
-
- unsigned char Imm8 = insn & 0xFF;
- MI.addOperand(MCOperand::CreateImm(ARM_AM::getAM5Opc(SubMode, Imm8)));
+ MI.addOperand(MCOperand::CreateImm(ARM_AM::getAM4ModeImm(SubMode)));
// Handling the two predicate operands before the reglist.
int64_t CondVal = insn >> ARMII::CondShift;
@@ -1929,13 +1927,14 @@ static bool DisassembleVFPLdStMulFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
OpIdx += 4;
bool isSPVFP = (Opcode == ARM::VLDMS || Opcode == ARM::VLDMS_UPD ||
- Opcode == ARM::VSTMS || Opcode == ARM::VSTMS_UPD) ? true : false;
+ Opcode == ARM::VSTMS || Opcode == ARM::VSTMS_UPD);
unsigned RegClassID = isSPVFP ? ARM::SPRRegClassID : ARM::DPRRegClassID;
// Extract Dd/Sd.
unsigned RegD = decodeVFPRd(insn, isSPVFP);
// Fill the variadic part of reglist.
+ unsigned char Imm8 = insn & 0xFF;
unsigned Regs = isSPVFP ? Imm8 : Imm8/2;
for (unsigned i = 0; i < Regs; ++i) {
MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, RegClassID,
@@ -2244,9 +2243,10 @@ static bool DisassembleNLdSt0(MCInst &MI, unsigned Opcode, uint32_t insn,
// We have homogeneous NEON registers for Load/Store.
unsigned RegClass = 0;
+ bool DRegPair = UseDRegPair(Opcode);
// Double-spaced registers have increments of 2.
- unsigned Inc = DblSpaced ? 2 : 1;
+ unsigned Inc = (DblSpaced || DRegPair) ? 2 : 1;
unsigned Rn = decodeRn(insn);
unsigned Rm = decodeRm(insn);
@@ -2292,8 +2292,7 @@ static bool DisassembleNLdSt0(MCInst &MI, unsigned Opcode, uint32_t insn,
RegClass = OpInfo[OpIdx].RegClass;
while (OpIdx < NumOps && (unsigned)OpInfo[OpIdx].RegClass == RegClass) {
MI.addOperand(MCOperand::CreateReg(
- getRegisterEnum(B, RegClass, Rd,
- UseDRegPair(Opcode))));
+ getRegisterEnum(B, RegClass, Rd, DRegPair)));
Rd += Inc;
++OpIdx;
}
@@ -2312,8 +2311,7 @@ static bool DisassembleNLdSt0(MCInst &MI, unsigned Opcode, uint32_t insn,
while (OpIdx < NumOps && (unsigned)OpInfo[OpIdx].RegClass == RegClass) {
MI.addOperand(MCOperand::CreateReg(
- getRegisterEnum(B, RegClass, Rd,
- UseDRegPair(Opcode))));
+ getRegisterEnum(B, RegClass, Rd, DRegPair)));
Rd += Inc;
++OpIdx;
}
@@ -2351,6 +2349,11 @@ static bool DisassembleNLdSt0(MCInst &MI, unsigned Opcode, uint32_t insn,
}
}
+ // Accessing registers past the end of the NEON register file is not
+ // defined.
+ if (Rd > 32)
+ return false;
+
return true;
}
@@ -2423,10 +2426,14 @@ static bool DisassembleN1RegModImmFrm(MCInst &MI, unsigned Opcode,
break;
case ARM::VMOVv4i16:
case ARM::VMOVv8i16:
+ case ARM::VMVNv4i16:
+ case ARM::VMVNv8i16:
esize = ESize16;
break;
case ARM::VMOVv2i32:
case ARM::VMOVv4i32:
+ case ARM::VMVNv2i32:
+ case ARM::VMVNv4i32:
esize = ESize32;
break;
case ARM::VMOVv1i64:
@@ -2944,7 +2951,7 @@ static bool DisassembleNDupFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
// A8.6.49 ISB
static inline bool MemBarrierInstr(uint32_t insn) {
unsigned op7_4 = slice(insn, 7, 4);
- if (slice(insn, 31, 20) == 0xf57 && (op7_4 >= 4 && op7_4 <= 6))
+ if (slice(insn, 31, 8) == 0xf57ff0 && (op7_4 >= 4 && op7_4 <= 6))
return true;
return false;
@@ -3001,8 +3008,15 @@ static bool DisassemblePreLoadFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
static bool DisassembleMiscFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
- if (MemBarrierInstr(insn))
+ if (MemBarrierInstr(insn)) {
+ // DMBsy, DSBsy, and ISBsy instructions have zero operand and are taken care
+ // of within the generic ARMBasicMCBuilder::BuildIt() method.
+ //
+ // Inst{3-0} encodes the memory barrier option for the variants.
+ MI.addOperand(MCOperand::CreateImm(slice(insn, 3, 0)));
+ NumOpsAdded = 1;
return true;
+ }
switch (Opcode) {
case ARM::CLREX:
@@ -3073,6 +3087,7 @@ static const DisassembleFP FuncPtrs[] = {
&DisassembleLdStMulFrm,
&DisassembleLdStExFrm,
&DisassembleArithMiscFrm,
+ &DisassembleSatFrm,
&DisassembleExtFrm,
&DisassembleVFPUnaryFrm,
&DisassembleVFPBinaryFrm,
diff --git a/lib/Target/ARM/Disassembler/ARMDisassemblerCore.h b/lib/Target/ARM/Disassembler/ARMDisassemblerCore.h
index 7d21256a14f9..9c30d332d1f2 100644
--- a/lib/Target/ARM/Disassembler/ARMDisassemblerCore.h
+++ b/lib/Target/ARM/Disassembler/ARMDisassemblerCore.h
@@ -23,7 +23,8 @@
#include "llvm/MC/MCInst.h"
#include "llvm/Target/TargetInstrInfo.h"
-#include "ARMInstrInfo.h"
+#include "ARMBaseInstrInfo.h"
+#include "ARMRegisterInfo.h"
#include "ARMDisassembler.h"
namespace llvm {
@@ -53,36 +54,35 @@ public:
ENTRY(ARM_FORMAT_LDSTMULFRM, 10) \
ENTRY(ARM_FORMAT_LDSTEXFRM, 11) \
ENTRY(ARM_FORMAT_ARITHMISCFRM, 12) \
- ENTRY(ARM_FORMAT_EXTFRM, 13) \
- ENTRY(ARM_FORMAT_VFPUNARYFRM, 14) \
- ENTRY(ARM_FORMAT_VFPBINARYFRM, 15) \
- ENTRY(ARM_FORMAT_VFPCONV1FRM, 16) \
- ENTRY(ARM_FORMAT_VFPCONV2FRM, 17) \
- ENTRY(ARM_FORMAT_VFPCONV3FRM, 18) \
- ENTRY(ARM_FORMAT_VFPCONV4FRM, 19) \
- ENTRY(ARM_FORMAT_VFPCONV5FRM, 20) \
- ENTRY(ARM_FORMAT_VFPLDSTFRM, 21) \
- ENTRY(ARM_FORMAT_VFPLDSTMULFRM, 22) \
- ENTRY(ARM_FORMAT_VFPMISCFRM, 23) \
- ENTRY(ARM_FORMAT_THUMBFRM, 24) \
- ENTRY(ARM_FORMAT_NEONFRM, 25) \
- ENTRY(ARM_FORMAT_NEONGETLNFRM, 26) \
- ENTRY(ARM_FORMAT_NEONSETLNFRM, 27) \
- ENTRY(ARM_FORMAT_NEONDUPFRM, 28) \
- ENTRY(ARM_FORMAT_MISCFRM, 29) \
- ENTRY(ARM_FORMAT_THUMBMISCFRM, 30) \
- ENTRY(ARM_FORMAT_NLdSt, 31) \
- ENTRY(ARM_FORMAT_N1RegModImm, 32) \
- ENTRY(ARM_FORMAT_N2Reg, 33) \
- ENTRY(ARM_FORMAT_NVCVT, 34) \
- ENTRY(ARM_FORMAT_NVecDupLn, 35) \
- ENTRY(ARM_FORMAT_N2RegVecShL, 36) \
- ENTRY(ARM_FORMAT_N2RegVecShR, 37) \
- ENTRY(ARM_FORMAT_N3Reg, 38) \
- ENTRY(ARM_FORMAT_N3RegVecSh, 39) \
- ENTRY(ARM_FORMAT_NVecExtract, 40) \
- ENTRY(ARM_FORMAT_NVecMulScalar, 41) \
- ENTRY(ARM_FORMAT_NVTBL, 42)
+ ENTRY(ARM_FORMAT_SATFRM, 13) \
+ ENTRY(ARM_FORMAT_EXTFRM, 14) \
+ ENTRY(ARM_FORMAT_VFPUNARYFRM, 15) \
+ ENTRY(ARM_FORMAT_VFPBINARYFRM, 16) \
+ ENTRY(ARM_FORMAT_VFPCONV1FRM, 17) \
+ ENTRY(ARM_FORMAT_VFPCONV2FRM, 18) \
+ ENTRY(ARM_FORMAT_VFPCONV3FRM, 19) \
+ ENTRY(ARM_FORMAT_VFPCONV4FRM, 20) \
+ ENTRY(ARM_FORMAT_VFPCONV5FRM, 21) \
+ ENTRY(ARM_FORMAT_VFPLDSTFRM, 22) \
+ ENTRY(ARM_FORMAT_VFPLDSTMULFRM, 23) \
+ ENTRY(ARM_FORMAT_VFPMISCFRM, 24) \
+ ENTRY(ARM_FORMAT_THUMBFRM, 25) \
+ ENTRY(ARM_FORMAT_MISCFRM, 26) \
+ ENTRY(ARM_FORMAT_NEONGETLNFRM, 27) \
+ ENTRY(ARM_FORMAT_NEONSETLNFRM, 28) \
+ ENTRY(ARM_FORMAT_NEONDUPFRM, 29) \
+ ENTRY(ARM_FORMAT_NLdSt, 30) \
+ ENTRY(ARM_FORMAT_N1RegModImm, 31) \
+ ENTRY(ARM_FORMAT_N2Reg, 32) \
+ ENTRY(ARM_FORMAT_NVCVT, 33) \
+ ENTRY(ARM_FORMAT_NVecDupLn, 34) \
+ ENTRY(ARM_FORMAT_N2RegVecShL, 35) \
+ ENTRY(ARM_FORMAT_N2RegVecShR, 36) \
+ ENTRY(ARM_FORMAT_N3Reg, 37) \
+ ENTRY(ARM_FORMAT_N3RegVecSh, 38) \
+ ENTRY(ARM_FORMAT_NVecExtract, 39) \
+ ENTRY(ARM_FORMAT_NVecMulScalar, 40) \
+ ENTRY(ARM_FORMAT_NVTBL, 41)
// ARM instruction format specifies the encoding used by the instruction.
#define ENTRY(n, v) n = v,
@@ -126,8 +126,8 @@ static inline unsigned slice(uint32_t Bits, unsigned From, unsigned To) {
}
/// Utility function for setting [From, To] bits to Val for a uint32_t.
-static inline void setSlice(uint32_t &Bits, unsigned From, unsigned To,
- uint32_t Val) {
+static inline void setSlice(unsigned &Bits, unsigned From, unsigned To,
+ unsigned Val) {
assert(From < 32 && To < 32 && From >= To);
uint32_t Mask = ((1 << (From - To + 1)) - 1);
Bits &= ~(Mask << To);
diff --git a/lib/Target/ARM/Disassembler/ThumbDisassemblerCore.h b/lib/Target/ARM/Disassembler/ThumbDisassemblerCore.h
index 4b7a0bf6fdb9..112817b13cf9 100644
--- a/lib/Target/ARM/Disassembler/ThumbDisassemblerCore.h
+++ b/lib/Target/ARM/Disassembler/ThumbDisassemblerCore.h
@@ -103,7 +103,7 @@ static inline unsigned getT1Cond(uint32_t insn) {
}
static inline bool IsGPR(unsigned RegClass) {
- return RegClass == ARM::GPRRegClassID;
+ return RegClass == ARM::GPRRegClassID || RegClass == ARM::rGPRRegClassID;
}
// Utilities for 32-bit Thumb instructions.
@@ -220,7 +220,7 @@ static inline unsigned decodeImmShift(unsigned bits2, unsigned imm5,
switch (bits2) {
default: assert(0 && "No such value");
case 0:
- ShOp = ARM_AM::lsl;
+ ShOp = (imm5 == 0 ? ARM_AM::no_shift : ARM_AM::lsl);
return imm5;
case 1:
ShOp = ARM_AM::lsr;
@@ -1324,7 +1324,7 @@ static bool DisassembleThumb2DPSoReg(MCInst &MI, unsigned Opcode, uint32_t insn,
&& OpInfo[1].RegClass == ARM::GPRRegClassID
&& OpInfo[2].RegClass < 0
&& OpInfo[3].RegClass < 0
- && "Exactlt 4 operands expect and first two as reg operands");
+ && "Exactly 4 operands expect and first two as reg operands");
// Only need to populate the src reg operand.
MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
decodeRm(insn))));
@@ -1338,17 +1338,20 @@ static bool DisassembleThumb2DPSoReg(MCInst &MI, unsigned Opcode, uint32_t insn,
OpIdx = 0;
assert(NumOps >= 2
- && OpInfo[0].RegClass == ARM::GPRRegClassID
- && OpInfo[1].RegClass == ARM::GPRRegClassID
+ && (OpInfo[0].RegClass == ARM::GPRRegClassID ||
+ OpInfo[0].RegClass == ARM::rGPRRegClassID)
+ && (OpInfo[1].RegClass == ARM::GPRRegClassID ||
+ OpInfo[1].RegClass == ARM::rGPRRegClassID)
&& "Expect >= 2 operands and first two as reg operands");
- bool ThreeReg = (NumOps > 2 && OpInfo[2].RegClass == ARM::GPRRegClassID);
+ bool ThreeReg = (NumOps > 2 && (OpInfo[2].RegClass == ARM::GPRRegClassID ||
+ OpInfo[2].RegClass == ARM::rGPRRegClassID));
bool NoDstReg = (decodeRs(insn) == 0xF);
// Build the register operands, followed by the constant shift specifier.
MI.addOperand(MCOperand::CreateReg(
- getRegisterEnum(B, ARM::GPRRegClassID,
+ getRegisterEnum(B, OpInfo[0].RegClass,
NoDstReg ? decodeRn(insn) : decodeRs(insn))));
++OpIdx;
@@ -1359,7 +1362,7 @@ static bool DisassembleThumb2DPSoReg(MCInst &MI, unsigned Opcode, uint32_t insn,
MI.addOperand(MI.getOperand(Idx));
++OpIdx;
} else if (!NoDstReg) {
- MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, OpInfo[1].RegClass,
decodeRn(insn))));
++OpIdx;
} else {
@@ -1368,7 +1371,7 @@ static bool DisassembleThumb2DPSoReg(MCInst &MI, unsigned Opcode, uint32_t insn,
}
}
- MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, OpInfo[OpIdx].RegClass,
decodeRm(insn))));
++OpIdx;
@@ -1386,14 +1389,7 @@ static bool DisassembleThumb2DPSoReg(MCInst &MI, unsigned Opcode, uint32_t insn,
unsigned imm5 = getShiftAmtBits(insn);
ARM_AM::ShiftOpc ShOp = ARM_AM::no_shift;
unsigned ShAmt = decodeImmShift(bits2, imm5, ShOp);
-
- // PKHBT/PKHTB are special in that we need the decodeImmShift() call to
- // decode the shift amount from raw imm5 and bits2, but we DO NOT need
- // to encode the ShOp, as it's in the asm string already.
- if (Opcode == ARM::t2PKHBT || Opcode == ARM::t2PKHTB)
- MI.addOperand(MCOperand::CreateImm(ShAmt));
- else
- MI.addOperand(MCOperand::CreateImm(ARM_AM::getSORegOpc(ShOp, ShAmt)));
+ MI.addOperand(MCOperand::CreateImm(ARM_AM::getSORegOpc(ShOp, ShAmt)));
}
++OpIdx;
}
@@ -1416,16 +1412,20 @@ static bool DisassembleThumb2DPModImm(MCInst &MI, unsigned Opcode,
OpIdx = 0;
- assert(NumOps >= 2 && OpInfo[0].RegClass == ARM::GPRRegClassID
+ unsigned RdRegClassID = OpInfo[0].RegClass;
+ assert(NumOps >= 2 && (RdRegClassID == ARM::GPRRegClassID ||
+ RdRegClassID == ARM::rGPRRegClassID)
&& "Expect >= 2 operands and first one as reg operand");
- bool TwoReg = (OpInfo[1].RegClass == ARM::GPRRegClassID);
+ unsigned RnRegClassID = OpInfo[1].RegClass;
+ bool TwoReg = (RnRegClassID == ARM::GPRRegClassID
+ || RnRegClassID == ARM::rGPRRegClassID);
bool NoDstReg = (decodeRs(insn) == 0xF);
// Build the register operands, followed by the modified immediate.
MI.addOperand(MCOperand::CreateReg(
- getRegisterEnum(B, ARM::GPRRegClassID,
+ getRegisterEnum(B, RdRegClassID,
NoDstReg ? decodeRn(insn) : decodeRs(insn))));
++OpIdx;
@@ -1434,7 +1434,7 @@ static bool DisassembleThumb2DPModImm(MCInst &MI, unsigned Opcode,
DEBUG(errs()<<"Thumb2 encoding error: d==15 for DPModImm 2-reg instr.\n");
return false;
}
- MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, RnRegClassID,
decodeRn(insn))));
++OpIdx;
}
@@ -1455,30 +1455,48 @@ static bool DisassembleThumb2DPModImm(MCInst &MI, unsigned Opcode,
static inline bool Thumb2SaturateOpcode(unsigned Opcode) {
switch (Opcode) {
- case ARM::t2SSATlsl: case ARM::t2SSATasr: case ARM::t2SSAT16:
- case ARM::t2USATlsl: case ARM::t2USATasr: case ARM::t2USAT16:
+ case ARM::t2SSAT: case ARM::t2SSAT16:
+ case ARM::t2USAT: case ARM::t2USAT16:
return true;
default:
return false;
}
}
-static inline unsigned decodeThumb2SaturatePos(unsigned Opcode, uint32_t insn) {
- switch (Opcode) {
- case ARM::t2SSATlsl:
- case ARM::t2SSATasr:
- return slice(insn, 4, 0) + 1;
- case ARM::t2SSAT16:
- return slice(insn, 3, 0) + 1;
- case ARM::t2USATlsl:
- case ARM::t2USATasr:
- return slice(insn, 4, 0);
- case ARM::t2USAT16:
- return slice(insn, 3, 0);
- default:
- assert(0 && "Unexpected opcode");
- return 0;
+/// DisassembleThumb2Sat - Disassemble Thumb2 saturate instructions:
+/// o t2SSAT, t2USAT: Rs sat_pos Rn shamt
+/// o t2SSAT16, t2USAT16: Rs sat_pos Rn
+static bool DisassembleThumb2Sat(MCInst &MI, unsigned Opcode, uint32_t insn,
+ unsigned &NumOpsAdded, BO B) {
+ const TargetInstrDesc &TID = ARMInsts[Opcode];
+ NumOpsAdded = TID.getNumOperands() - 2; // ignore predicate operands
+
+ // Disassemble the register def.
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::rGPRRegClassID,
+ decodeRs(insn))));
+
+ unsigned Pos = slice(insn, 4, 0);
+ if (Opcode == ARM::t2SSAT || Opcode == ARM::t2SSAT16)
+ Pos += 1;
+ MI.addOperand(MCOperand::CreateImm(Pos));
+
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::rGPRRegClassID,
+ decodeRn(insn))));
+
+ if (NumOpsAdded == 4) {
+ ARM_AM::ShiftOpc Opc = (slice(insn, 21, 21) != 0 ?
+ ARM_AM::asr : ARM_AM::lsl);
+ // Inst{14-12:7-6} encodes the imm5 shift amount.
+ unsigned ShAmt = slice(insn, 14, 12) << 2 | slice(insn, 7, 6);
+ if (ShAmt == 0) {
+ if (Opc == ARM_AM::asr)
+ ShAmt = 32;
+ else
+ Opc = ARM_AM::no_shift;
+ }
+ MI.addOperand(MCOperand::CreateImm(ARM_AM::getSORegOpc(Opc, ShAmt)));
}
+ return true;
}
// A6.3.3 Data-processing (plain binary immediate)
@@ -1492,11 +1510,6 @@ static inline unsigned decodeThumb2SaturatePos(unsigned Opcode, uint32_t insn) {
// o t2SBFX (SBFX): Rs Rn lsb width
// o t2UBFX (UBFX): Rs Rn lsb width
// o t2BFI (BFI): Rs Rn lsb width
-//
-// [Signed|Unsigned] Saturate [16]
-//
-// o t2SSAT[lsl|asr], t2USAT[lsl|asr]: Rs sat_pos Rn shamt
-// o t2SSAT16, t2USAT16: Rs sat_pos Rn
static bool DisassembleThumb2DPBinImm(MCInst &MI, unsigned Opcode,
uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
@@ -1506,41 +1519,21 @@ static bool DisassembleThumb2DPBinImm(MCInst &MI, unsigned Opcode,
OpIdx = 0;
- assert(NumOps >= 2 && OpInfo[0].RegClass == ARM::GPRRegClassID
+ unsigned RdRegClassID = OpInfo[0].RegClass;
+ assert(NumOps >= 2 && (RdRegClassID == ARM::GPRRegClassID ||
+ RdRegClassID == ARM::rGPRRegClassID)
&& "Expect >= 2 operands and first one as reg operand");
- bool TwoReg = (OpInfo[1].RegClass == ARM::GPRRegClassID);
+ unsigned RnRegClassID = OpInfo[1].RegClass;
+ bool TwoReg = (RnRegClassID == ARM::GPRRegClassID
+ || RnRegClassID == ARM::rGPRRegClassID);
// Build the register operand(s), followed by the immediate(s).
- MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, RdRegClassID,
decodeRs(insn))));
++OpIdx;
- // t2SSAT/t2SSAT16/t2USAT/t2USAT16 has imm operand after Rd.
- if (Thumb2SaturateOpcode(Opcode)) {
- MI.addOperand(MCOperand::CreateImm(decodeThumb2SaturatePos(Opcode, insn)));
-
- MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
- decodeRn(insn))));
-
- if (Opcode == ARM::t2SSAT16 || Opcode == ARM::t2USAT16) {
- OpIdx += 2;
- return true;
- }
-
- // For SSAT operand reg (Rn) has been disassembled above.
- // Now disassemble the shift amount.
-
- // Inst{14-12:7-6} encodes the imm5 shift amount.
- unsigned ShAmt = slice(insn, 14, 12) << 2 | slice(insn, 7, 6);
-
- MI.addOperand(MCOperand::CreateImm(ShAmt));
-
- OpIdx += 3;
- return true;
- }
-
if (TwoReg) {
assert(NumOps >= 3 && "Expect >= 3 operands");
int Idx;
@@ -1549,12 +1542,19 @@ static bool DisassembleThumb2DPBinImm(MCInst &MI, unsigned Opcode,
MI.addOperand(MI.getOperand(Idx));
} else {
// Add src reg operand.
- MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, RnRegClassID,
decodeRn(insn))));
}
++OpIdx;
}
+ if (Opcode == ARM::t2BFI) {
+ // Add val reg operand.
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, RnRegClassID,
+ decodeRn(insn))));
+ ++OpIdx;
+ }
+
assert(OpInfo[OpIdx].RegClass < 0 && !OpInfo[OpIdx].isPredicate()
&& !OpInfo[OpIdx].isOptionalDef()
&& "Pure imm operand expected");
@@ -1567,7 +1567,7 @@ static bool DisassembleThumb2DPBinImm(MCInst &MI, unsigned Opcode,
MI.addOperand(MCOperand::CreateImm(getIImm3Imm8(insn)));
else if (Opcode == ARM::t2MOVi16 || Opcode == ARM::t2MOVTi16)
MI.addOperand(MCOperand::CreateImm(getImm16(insn)));
- else if (Opcode == ARM::t2BFC) {
+ else if (Opcode == ARM::t2BFC || Opcode == ARM::t2BFI) {
uint32_t mask = 0;
if (getBitfieldInvMask(insn, mask))
MI.addOperand(MCOperand::CreateImm(mask));
@@ -1575,17 +1575,10 @@ static bool DisassembleThumb2DPBinImm(MCInst &MI, unsigned Opcode,
return false;
} else {
// Handle the case of: lsb width
- assert((Opcode == ARM::t2SBFX || Opcode == ARM::t2UBFX ||
- Opcode == ARM::t2BFI) && "Unexpected opcode");
+ assert((Opcode == ARM::t2SBFX || Opcode == ARM::t2UBFX)
+ && "Unexpected opcode");
MI.addOperand(MCOperand::CreateImm(getLsb(insn)));
- if (Opcode == ARM::t2BFI) {
- if (getMsb(insn) < getLsb(insn)) {
- DEBUG(errs() << "Encoding error: msb < lsb\n");
- return false;
- }
- MI.addOperand(MCOperand::CreateImm(getMsb(insn) - getLsb(insn) + 1));
- } else
- MI.addOperand(MCOperand::CreateImm(getWidthMinus1(insn) + 1));
+ MI.addOperand(MCOperand::CreateImm(getWidthMinus1(insn) + 1));
++OpIdx;
}
@@ -1618,8 +1611,8 @@ static inline bool t2MiscCtrlInstr(uint32_t insn) {
// A8.6.26
// t2BXJ -> Rn
//
-// Miscellaneous control: t2Int_MemBarrierV7 (and its t2DMB variants),
-// t2Int_SyncBarrierV7 (and its t2DSB varianst), t2ISBsy, t2CLREX
+// Miscellaneous control: t2DMBsy (and its t2DMB variants),
+// t2DSBsy (and its t2DSB varianst), t2ISBsy, t2CLREX
// -> no operand (except pred-imm pred-ccr for CLREX, memory barrier variants)
//
// Hint: t2NOP, t2YIELD, t2WFE, t2WFI, t2SEV
@@ -1959,25 +1952,25 @@ static bool DisassembleThumb2DPReg(MCInst &MI, unsigned Opcode, uint32_t insn,
OpIdx = 0;
assert(NumOps >= 2 &&
- OpInfo[0].RegClass == ARM::GPRRegClassID &&
- OpInfo[1].RegClass == ARM::GPRRegClassID &&
+ OpInfo[0].RegClass == ARM::rGPRRegClassID &&
+ OpInfo[1].RegClass == ARM::rGPRRegClassID &&
"Expect >= 2 operands and first two as reg operands");
// Build the register operands, followed by the optional rotation amount.
- bool ThreeReg = NumOps > 2 && OpInfo[2].RegClass == ARM::GPRRegClassID;
+ bool ThreeReg = NumOps > 2 && OpInfo[2].RegClass == ARM::rGPRRegClassID;
- MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::rGPRRegClassID,
decodeRs(insn))));
++OpIdx;
if (ThreeReg) {
- MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::rGPRRegClassID,
decodeRn(insn))));
++OpIdx;
}
- MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::rGPRRegClassID,
decodeRm(insn))));
++OpIdx;
@@ -2009,26 +2002,26 @@ static bool DisassembleThumb2Mul(MCInst &MI, unsigned Opcode, uint32_t insn,
const TargetOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo;
assert(NumOps >= 3 &&
- OpInfo[0].RegClass == ARM::GPRRegClassID &&
- OpInfo[1].RegClass == ARM::GPRRegClassID &&
- OpInfo[2].RegClass == ARM::GPRRegClassID &&
+ OpInfo[0].RegClass == ARM::rGPRRegClassID &&
+ OpInfo[1].RegClass == ARM::rGPRRegClassID &&
+ OpInfo[2].RegClass == ARM::rGPRRegClassID &&
"Expect >= 3 operands and first three as reg operands");
// Build the register operands.
- bool FourReg = NumOps > 3 && OpInfo[3].RegClass == ARM::GPRRegClassID;
+ bool FourReg = NumOps > 3 && OpInfo[3].RegClass == ARM::rGPRRegClassID;
- MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::rGPRRegClassID,
decodeRs(insn))));
- MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::rGPRRegClassID,
decodeRn(insn))));
- MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::rGPRRegClassID,
decodeRm(insn))));
if (FourReg)
- MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::rGPRRegClassID,
decodeRd(insn))));
NumOpsAdded = FourReg ? 4 : 3;
@@ -2054,26 +2047,26 @@ static bool DisassembleThumb2LongMul(MCInst &MI, unsigned Opcode, uint32_t insn,
const TargetOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo;
assert(NumOps >= 3 &&
- OpInfo[0].RegClass == ARM::GPRRegClassID &&
- OpInfo[1].RegClass == ARM::GPRRegClassID &&
- OpInfo[2].RegClass == ARM::GPRRegClassID &&
+ OpInfo[0].RegClass == ARM::rGPRRegClassID &&
+ OpInfo[1].RegClass == ARM::rGPRRegClassID &&
+ OpInfo[2].RegClass == ARM::rGPRRegClassID &&
"Expect >= 3 operands and first three as reg operands");
- bool FourReg = NumOps > 3 && OpInfo[3].RegClass == ARM::GPRRegClassID;
+ bool FourReg = NumOps > 3 && OpInfo[3].RegClass == ARM::rGPRRegClassID;
// Build the register operands.
if (FourReg)
- MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::rGPRRegClassID,
decodeRd(insn))));
- MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::rGPRRegClassID,
decodeRs(insn))));
- MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::rGPRRegClassID,
decodeRn(insn))));
- MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::rGPRRegClassID,
decodeRm(insn))));
if (FourReg)
@@ -2152,22 +2145,20 @@ static bool DisassembleThumb2(uint16_t op1, uint16_t op2, uint16_t op,
break;
case 2:
if (op == 0) {
- if (slice(op2, 5, 5) == 0) {
+ if (slice(op2, 5, 5) == 0)
// Data-processing (modified immediate)
return DisassembleThumb2DPModImm(MI, Opcode, insn, NumOps, NumOpsAdded,
B);
- } else {
- // Data-processing (plain binary immediate)
- return DisassembleThumb2DPBinImm(MI, Opcode, insn, NumOps, NumOpsAdded,
- B);
- }
- } else {
- // Branches and miscellaneous control on page A6-20.
- return DisassembleThumb2BrMiscCtrl(MI, Opcode, insn, NumOps, NumOpsAdded,
- B);
- }
+ if (Thumb2SaturateOpcode(Opcode))
+ return DisassembleThumb2Sat(MI, Opcode, insn, NumOpsAdded, B);
- break;
+ // Data-processing (plain binary immediate)
+ return DisassembleThumb2DPBinImm(MI, Opcode, insn, NumOps, NumOpsAdded,
+ B);
+ }
+ // Branches and miscellaneous control on page A6-20.
+ return DisassembleThumb2BrMiscCtrl(MI, Opcode, insn, NumOps, NumOpsAdded,
+ B);
case 3:
switch (slice(op2, 6, 5)) {
case 0:
diff --git a/lib/Target/ARM/Makefile b/lib/Target/ARM/Makefile
index 9e3ff29e07c4..b3fcfaf6bda7 100644
--- a/lib/Target/ARM/Makefile
+++ b/lib/Target/ARM/Makefile
@@ -14,10 +14,11 @@ TARGET = ARM
# Make sure that tblgen is run, first thing.
BUILT_SOURCES = ARMGenRegisterInfo.h.inc ARMGenRegisterNames.inc \
ARMGenRegisterInfo.inc ARMGenInstrNames.inc \
- ARMGenInstrInfo.inc ARMGenAsmWriter.inc \
+ ARMGenInstrInfo.inc ARMGenAsmWriter.inc ARMGenAsmMatcher.inc \
ARMGenDAGISel.inc ARMGenSubtarget.inc \
ARMGenCodeEmitter.inc ARMGenCallingConv.inc \
- ARMGenDecoderTables.inc ARMGenEDInfo.inc
+ ARMGenDecoderTables.inc ARMGenEDInfo.inc \
+ ARMGenFastISel.inc
DIRS = AsmPrinter AsmParser Disassembler TargetInfo
diff --git a/lib/Target/ARM/NEONMoveFix.cpp b/lib/Target/ARM/NEONMoveFix.cpp
index bbdd3c7f7c3e..97e54bfaed9e 100644
--- a/lib/Target/ARM/NEONMoveFix.cpp
+++ b/lib/Target/ARM/NEONMoveFix.cpp
@@ -24,7 +24,7 @@ STATISTIC(NumVMovs, "Number of reg-reg moves converted");
namespace {
struct NEONMoveFixPass : public MachineFunctionPass {
static char ID;
- NEONMoveFixPass() : MachineFunctionPass(&ID) {}
+ NEONMoveFixPass() : MachineFunctionPass(ID) {}
virtual bool runOnMachineFunction(MachineFunction &Fn);
diff --git a/lib/Target/ARM/NEONPreAllocPass.cpp b/lib/Target/ARM/NEONPreAllocPass.cpp
index f67717cdd56f..3407ac6fe08e 100644
--- a/lib/Target/ARM/NEONPreAllocPass.cpp
+++ b/lib/Target/ARM/NEONPreAllocPass.cpp
@@ -23,7 +23,7 @@ namespace {
public:
static char ID;
- NEONPreAllocPass() : MachineFunctionPass(&ID) {}
+ NEONPreAllocPass() : MachineFunctionPass(ID) {}
virtual bool runOnMachineFunction(MachineFunction &MF);
@@ -51,13 +51,6 @@ static bool isNEONMultiRegOp(int Opcode, unsigned &FirstOpnd, unsigned &NumRegs,
default:
break;
- case ARM::VLD1q8:
- case ARM::VLD1q16:
- case ARM::VLD1q32:
- case ARM::VLD1q64:
- case ARM::VLD2d8:
- case ARM::VLD2d16:
- case ARM::VLD2d32:
case ARM::VLD2LNd8:
case ARM::VLD2LNd16:
case ARM::VLD2LNd32:
@@ -65,13 +58,6 @@ static bool isNEONMultiRegOp(int Opcode, unsigned &FirstOpnd, unsigned &NumRegs,
NumRegs = 2;
return true;
- case ARM::VLD2q8:
- case ARM::VLD2q16:
- case ARM::VLD2q32:
- FirstOpnd = 0;
- NumRegs = 4;
- return true;
-
case ARM::VLD2LNq16:
case ARM::VLD2LNq32:
FirstOpnd = 0;
@@ -88,10 +74,6 @@ static bool isNEONMultiRegOp(int Opcode, unsigned &FirstOpnd, unsigned &NumRegs,
Stride = 2;
return true;
- case ARM::VLD3d8:
- case ARM::VLD3d16:
- case ARM::VLD3d32:
- case ARM::VLD1d64T:
case ARM::VLD3LNd8:
case ARM::VLD3LNd16:
case ARM::VLD3LNd32:
@@ -99,24 +81,6 @@ static bool isNEONMultiRegOp(int Opcode, unsigned &FirstOpnd, unsigned &NumRegs,
NumRegs = 3;
return true;
- case ARM::VLD3q8_UPD:
- case ARM::VLD3q16_UPD:
- case ARM::VLD3q32_UPD:
- FirstOpnd = 0;
- NumRegs = 3;
- Offset = 0;
- Stride = 2;
- return true;
-
- case ARM::VLD3q8odd_UPD:
- case ARM::VLD3q16odd_UPD:
- case ARM::VLD3q32odd_UPD:
- FirstOpnd = 0;
- NumRegs = 3;
- Offset = 1;
- Stride = 2;
- return true;
-
case ARM::VLD3LNq16:
case ARM::VLD3LNq32:
FirstOpnd = 0;
@@ -133,10 +97,6 @@ static bool isNEONMultiRegOp(int Opcode, unsigned &FirstOpnd, unsigned &NumRegs,
Stride = 2;
return true;
- case ARM::VLD4d8:
- case ARM::VLD4d16:
- case ARM::VLD4d32:
- case ARM::VLD1d64Q:
case ARM::VLD4LNd8:
case ARM::VLD4LNd16:
case ARM::VLD4LNd32:
@@ -144,24 +104,6 @@ static bool isNEONMultiRegOp(int Opcode, unsigned &FirstOpnd, unsigned &NumRegs,
NumRegs = 4;
return true;
- case ARM::VLD4q8_UPD:
- case ARM::VLD4q16_UPD:
- case ARM::VLD4q32_UPD:
- FirstOpnd = 0;
- NumRegs = 4;
- Offset = 0;
- Stride = 2;
- return true;
-
- case ARM::VLD4q8odd_UPD:
- case ARM::VLD4q16odd_UPD:
- case ARM::VLD4q32odd_UPD:
- FirstOpnd = 0;
- NumRegs = 4;
- Offset = 1;
- Stride = 2;
- return true;
-
case ARM::VLD4LNq16:
case ARM::VLD4LNq32:
FirstOpnd = 0;
@@ -178,13 +120,6 @@ static bool isNEONMultiRegOp(int Opcode, unsigned &FirstOpnd, unsigned &NumRegs,
Stride = 2;
return true;
- case ARM::VST1q8:
- case ARM::VST1q16:
- case ARM::VST1q32:
- case ARM::VST1q64:
- case ARM::VST2d8:
- case ARM::VST2d16:
- case ARM::VST2d32:
case ARM::VST2LNd8:
case ARM::VST2LNd16:
case ARM::VST2LNd32:
@@ -192,13 +127,6 @@ static bool isNEONMultiRegOp(int Opcode, unsigned &FirstOpnd, unsigned &NumRegs,
NumRegs = 2;
return true;
- case ARM::VST2q8:
- case ARM::VST2q16:
- case ARM::VST2q32:
- FirstOpnd = 2;
- NumRegs = 4;
- return true;
-
case ARM::VST2LNq16:
case ARM::VST2LNq32:
FirstOpnd = 2;
@@ -215,10 +143,6 @@ static bool isNEONMultiRegOp(int Opcode, unsigned &FirstOpnd, unsigned &NumRegs,
Stride = 2;
return true;
- case ARM::VST3d8:
- case ARM::VST3d16:
- case ARM::VST3d32:
- case ARM::VST1d64T:
case ARM::VST3LNd8:
case ARM::VST3LNd16:
case ARM::VST3LNd32:
@@ -226,24 +150,6 @@ static bool isNEONMultiRegOp(int Opcode, unsigned &FirstOpnd, unsigned &NumRegs,
NumRegs = 3;
return true;
- case ARM::VST3q8_UPD:
- case ARM::VST3q16_UPD:
- case ARM::VST3q32_UPD:
- FirstOpnd = 4;
- NumRegs = 3;
- Offset = 0;
- Stride = 2;
- return true;
-
- case ARM::VST3q8odd_UPD:
- case ARM::VST3q16odd_UPD:
- case ARM::VST3q32odd_UPD:
- FirstOpnd = 4;
- NumRegs = 3;
- Offset = 1;
- Stride = 2;
- return true;
-
case ARM::VST3LNq16:
case ARM::VST3LNq32:
FirstOpnd = 2;
@@ -260,10 +166,6 @@ static bool isNEONMultiRegOp(int Opcode, unsigned &FirstOpnd, unsigned &NumRegs,
Stride = 2;
return true;
- case ARM::VST4d8:
- case ARM::VST4d16:
- case ARM::VST4d32:
- case ARM::VST1d64Q:
case ARM::VST4LNd8:
case ARM::VST4LNd16:
case ARM::VST4LNd32:
@@ -271,24 +173,6 @@ static bool isNEONMultiRegOp(int Opcode, unsigned &FirstOpnd, unsigned &NumRegs,
NumRegs = 4;
return true;
- case ARM::VST4q8_UPD:
- case ARM::VST4q16_UPD:
- case ARM::VST4q32_UPD:
- FirstOpnd = 4;
- NumRegs = 4;
- Offset = 0;
- Stride = 2;
- return true;
-
- case ARM::VST4q8odd_UPD:
- case ARM::VST4q16odd_UPD:
- case ARM::VST4q32odd_UPD:
- FirstOpnd = 4;
- NumRegs = 4;
- Offset = 1;
- Stride = 2;
- return true;
-
case ARM::VST4LNq16:
case ARM::VST4LNq32:
FirstOpnd = 2;
@@ -468,7 +352,34 @@ bool NEONPreAllocPass::PreAllocNEONRegisters(MachineBasicBlock &MBB) {
continue;
if (FormsRegSequence(MI, FirstOpnd, NumRegs, Offset, Stride))
continue;
- llvm_unreachable("expected a REG_SEQUENCE");
+
+ MachineBasicBlock::iterator NextI = llvm::next(MBBI);
+ for (unsigned R = 0; R < NumRegs; ++R) {
+ MachineOperand &MO = MI->getOperand(FirstOpnd + R);
+ assert(MO.isReg() && MO.getSubReg() == 0 && "unexpected operand");
+ unsigned VirtReg = MO.getReg();
+ assert(TargetRegisterInfo::isVirtualRegister(VirtReg) &&
+ "expected a virtual register");
+
+ // For now, just assign a fixed set of adjacent registers.
+ // This leaves plenty of room for future improvements.
+ static const unsigned NEONDRegs[] = {
+ ARM::D0, ARM::D1, ARM::D2, ARM::D3,
+ ARM::D4, ARM::D5, ARM::D6, ARM::D7
+ };
+ MO.setReg(NEONDRegs[Offset + R * Stride]);
+
+ if (MO.isUse()) {
+ // Insert a copy from VirtReg.
+ BuildMI(MBB, MBBI, DebugLoc(), TII->get(TargetOpcode::COPY),MO.getReg())
+ .addReg(VirtReg, getKillRegState(MO.isKill()));
+ MO.setIsKill();
+ } else if (MO.isDef() && !MO.isDead()) {
+ // Add a copy to VirtReg.
+ BuildMI(MBB, NextI, DebugLoc(), TII->get(TargetOpcode::COPY), VirtReg)
+ .addReg(MO.getReg());
+ }
+ }
}
return Modified;
diff --git a/lib/Target/ARM/README.txt b/lib/Target/ARM/README.txt
index 0cb8ff01181d..9fc3fb92cb2c 100644
--- a/lib/Target/ARM/README.txt
+++ b/lib/Target/ARM/README.txt
@@ -611,27 +611,6 @@ constant which was already loaded). Not sure what's necessary to do that.
//===---------------------------------------------------------------------===//
-Given the following on ARMv7:
-int test1(int A, int B) {
- return (A&-8388481)|(B&8388480);
-}
-
-We currently generate:
- bfc r0, #7, #16
- movw r2, #:lower16:8388480
- movt r2, #:upper16:8388480
- and r1, r1, r2
- orr r0, r1, r0
- bx lr
-
-The following is much shorter:
- lsr r1, r1, #7
- bfi r0, r1, #7, #16
- bx lr
-
-
-//===---------------------------------------------------------------------===//
-
The code generated for bswap on armv4/5 (CPUs without rev) is less than ideal:
int a(int x) { return __builtin_bswap32(x); }
@@ -657,3 +636,24 @@ A custom Thumb version would also be a slight improvement over the generic
version.
//===---------------------------------------------------------------------===//
+
+Consider the following simple C code:
+
+void foo(unsigned char *a, unsigned char *b, int *c) {
+ if ((*a | *b) == 0) *c = 0;
+}
+
+currently llvm-gcc generates something like this (nice branchless code I'd say):
+
+ ldrb r0, [r0]
+ ldrb r1, [r1]
+ orr r0, r1, r0
+ tst r0, #255
+ moveq r0, #0
+ streq r0, [r2]
+ bx lr
+
+Note that both "tst" and "moveq" are redundant.
+
+//===---------------------------------------------------------------------===//
+
diff --git a/lib/Target/ARM/Thumb1RegisterInfo.cpp b/lib/Target/ARM/Thumb1RegisterInfo.cpp
index 39b70b43b23f..a21a3da10bda 100644
--- a/lib/Target/ARM/Thumb1RegisterInfo.cpp
+++ b/lib/Target/ARM/Thumb1RegisterInfo.cpp
@@ -68,7 +68,7 @@ void Thumb1RegisterInfo::emitLoadConstPool(MachineBasicBlock &MBB,
.addConstantPoolIndex(Idx).addImm(Pred).addReg(PredReg);
}
-bool Thumb1RegisterInfo::hasReservedCallFrame(MachineFunction &MF) const {
+bool Thumb1RegisterInfo::hasReservedCallFrame(const MachineFunction &MF) const {
const MachineFrameInfo *FFI = MF.getFrameInfo();
unsigned CFSize = FFI->getMaxCallFrameSize();
// It's not always a good idea to include the call frame as part of the
@@ -363,107 +363,19 @@ static void removeOperands(MachineInstr &MI, unsigned i) {
MI.RemoveOperand(Op);
}
-int Thumb1RegisterInfo::
-rewriteFrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
- unsigned FrameReg, int Offset,
- unsigned MOVOpc, unsigned ADDriOpc, unsigned SUBriOpc) const
-{
- // if/when eliminateFrameIndex() conforms with ARMBaseRegisterInfo
- // version then can pull out Thumb1 specific parts here
- return 0;
-}
-
-/// saveScavengerRegister - Spill the register so it can be used by the
-/// register scavenger. Return true.
-bool
-Thumb1RegisterInfo::saveScavengerRegister(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator I,
- MachineBasicBlock::iterator &UseMI,
- const TargetRegisterClass *RC,
- unsigned Reg) const {
- // Thumb1 can't use the emergency spill slot on the stack because
- // ldr/str immediate offsets must be positive, and if we're referencing
- // off the frame pointer (if, for example, there are alloca() calls in
- // the function, the offset will be negative. Use R12 instead since that's
- // a call clobbered register that we know won't be used in Thumb1 mode.
- DebugLoc DL;
- BuildMI(MBB, I, DL, TII.get(ARM::tMOVtgpr2gpr)).
- addReg(ARM::R12, RegState::Define).addReg(Reg, RegState::Kill);
-
- // The UseMI is where we would like to restore the register. If there's
- // interference with R12 before then, however, we'll need to restore it
- // before that instead and adjust the UseMI.
- bool done = false;
- for (MachineBasicBlock::iterator II = I; !done && II != UseMI ; ++II) {
- if (II->isDebugValue())
- continue;
- // If this instruction affects R12, adjust our restore point.
- for (unsigned i = 0, e = II->getNumOperands(); i != e; ++i) {
- const MachineOperand &MO = II->getOperand(i);
- if (!MO.isReg() || MO.isUndef() || !MO.getReg() ||
- TargetRegisterInfo::isVirtualRegister(MO.getReg()))
- continue;
- if (MO.getReg() == ARM::R12) {
- UseMI = II;
- done = true;
- break;
- }
- }
- }
- // Restore the register from R12
- BuildMI(MBB, UseMI, DL, TII.get(ARM::tMOVgpr2tgpr)).
- addReg(Reg, RegState::Define).addReg(ARM::R12, RegState::Kill);
-
- return true;
-}
-
-unsigned
-Thumb1RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
- int SPAdj, FrameIndexValue *Value,
- RegScavenger *RS) const{
- unsigned VReg = 0;
- unsigned i = 0;
+bool Thumb1RegisterInfo::
+rewriteFrameIndex(MachineBasicBlock::iterator II, unsigned FrameRegIdx,
+ unsigned FrameReg, int &Offset,
+ const ARMBaseInstrInfo &TII) const {
MachineInstr &MI = *II;
MachineBasicBlock &MBB = *MI.getParent();
- MachineFunction &MF = *MBB.getParent();
- ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
DebugLoc dl = MI.getDebugLoc();
-
- while (!MI.getOperand(i).isFI()) {
- ++i;
- assert(i < MI.getNumOperands() && "Instr doesn't have FrameIndex operand!");
- }
-
- unsigned FrameReg = ARM::SP;
- int FrameIndex = MI.getOperand(i).getIndex();
- int Offset = MF.getFrameInfo()->getObjectOffset(FrameIndex) +
- MF.getFrameInfo()->getStackSize() + SPAdj;
-
- if (AFI->isGPRCalleeSavedArea1Frame(FrameIndex))
- Offset -= AFI->getGPRCalleeSavedArea1Offset();
- else if (AFI->isGPRCalleeSavedArea2Frame(FrameIndex))
- Offset -= AFI->getGPRCalleeSavedArea2Offset();
- else if (MF.getFrameInfo()->hasVarSizedObjects()) {
- assert(SPAdj == 0 && hasFP(MF) && "Unexpected");
- // There are alloca()'s in this function, must reference off the frame
- // pointer instead.
- FrameReg = getFrameRegister(MF);
- Offset -= AFI->getFramePtrSpillOffset();
- }
-
- // Special handling of dbg_value instructions.
- if (MI.isDebugValue()) {
- MI.getOperand(i). ChangeToRegister(FrameReg, false /*isDef*/);
- MI.getOperand(i+1).ChangeToImmediate(Offset);
- return 0;
- }
-
unsigned Opcode = MI.getOpcode();
const TargetInstrDesc &Desc = MI.getDesc();
unsigned AddrMode = (Desc.TSFlags & ARMII::AddrModeMask);
if (Opcode == ARM::tADDrSPi) {
- Offset += MI.getOperand(i+1).getImm();
+ Offset += MI.getOperand(FrameRegIdx+1).getImm();
// Can't use tADDrSPi if it's based off the frame pointer.
unsigned NumBits = 0;
@@ -483,12 +395,13 @@ Thumb1RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
if (Offset == 0 && getInstrPredicate(&MI, PredReg) == ARMCC::AL) {
// Turn it into a move.
MI.setDesc(TII.get(ARM::tMOVgpr2tgpr));
- MI.getOperand(i).ChangeToRegister(FrameReg, false);
+ MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false);
// Remove offset and remaining explicit predicate operands.
- do MI.RemoveOperand(i+1);
- while (MI.getNumOperands() > i+1 &&
- (!MI.getOperand(i+1).isReg() || !MI.getOperand(i+1).isImm()));
- return 0;
+ do MI.RemoveOperand(FrameRegIdx+1);
+ while (MI.getNumOperands() > FrameRegIdx+1 &&
+ (!MI.getOperand(FrameRegIdx+1).isReg() ||
+ !MI.getOperand(FrameRegIdx+1).isImm()));
+ return true;
}
// Common case: small offset, fits into instruction.
@@ -496,15 +409,15 @@ Thumb1RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
if (((Offset / Scale) & ~Mask) == 0) {
// Replace the FrameIndex with sp / fp
if (Opcode == ARM::tADDi3) {
- removeOperands(MI, i);
+ removeOperands(MI, FrameRegIdx);
MachineInstrBuilder MIB(&MI);
AddDefaultPred(AddDefaultT1CC(MIB).addReg(FrameReg)
.addImm(Offset / Scale));
} else {
- MI.getOperand(i).ChangeToRegister(FrameReg, false);
- MI.getOperand(i+1).ChangeToImmediate(Offset / Scale);
+ MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false);
+ MI.getOperand(FrameRegIdx+1).ChangeToImmediate(Offset / Scale);
}
- return 0;
+ return true;
}
unsigned DestReg = MI.getOperand(0).getReg();
@@ -516,7 +429,7 @@ Thumb1RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
emitThumbRegPlusImmediate(MBB, II, DestReg, FrameReg, Offset, TII,
*this, dl);
MBB.erase(II);
- return 0;
+ return true;
}
if (Offset > 0) {
@@ -524,12 +437,12 @@ Thumb1RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
// r0 = add sp, 255*4
// r0 = add r0, (imm - 255*4)
if (Opcode == ARM::tADDi3) {
- removeOperands(MI, i);
+ removeOperands(MI, FrameRegIdx);
MachineInstrBuilder MIB(&MI);
AddDefaultPred(AddDefaultT1CC(MIB).addReg(FrameReg).addImm(Mask));
} else {
- MI.getOperand(i).ChangeToRegister(FrameReg, false);
- MI.getOperand(i+1).ChangeToImmediate(Mask);
+ MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false);
+ MI.getOperand(FrameRegIdx+1).ChangeToImmediate(Mask);
}
Offset = (Offset - Mask * Scale);
MachineBasicBlock::iterator NII = llvm::next(II);
@@ -542,14 +455,14 @@ Thumb1RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
emitThumbConstant(MBB, II, DestReg, Offset, TII, *this, dl);
MI.setDesc(TII.get(ARM::tADDhirr));
- MI.getOperand(i).ChangeToRegister(DestReg, false, false, true);
- MI.getOperand(i+1).ChangeToRegister(FrameReg, false);
+ MI.getOperand(FrameRegIdx).ChangeToRegister(DestReg, false, false, true);
+ MI.getOperand(FrameRegIdx+1).ChangeToRegister(FrameReg, false);
if (Opcode == ARM::tADDi3) {
MachineInstrBuilder MIB(&MI);
AddDefaultPred(MIB);
}
}
- return 0;
+ return true;
} else {
unsigned ImmIdx = 0;
int InstrOffs = 0;
@@ -557,7 +470,7 @@ Thumb1RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
unsigned Scale = 1;
switch (AddrMode) {
case ARMII::AddrModeT1_s: {
- ImmIdx = i+1;
+ ImmIdx = FrameRegIdx+1;
InstrOffs = MI.getOperand(ImmIdx).getImm();
NumBits = (FrameReg == ARM::SP) ? 8 : 5;
Scale = 4;
@@ -577,9 +490,9 @@ Thumb1RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
unsigned Mask = (1 << NumBits) - 1;
if ((unsigned)Offset <= Mask * Scale) {
// Replace the FrameIndex with sp
- MI.getOperand(i).ChangeToRegister(FrameReg, false);
+ MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false);
ImmOp.ChangeToImmediate(ImmedOffset);
- return 0;
+ return true;
}
bool isThumSpillRestore = Opcode == ARM::tRestore || Opcode == ARM::tSpill;
@@ -600,12 +513,126 @@ Thumb1RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
Offset &= ~(Mask*Scale);
}
}
+ return Offset == 0;
+}
+
+void
+Thumb1RegisterInfo::resolveFrameIndex(MachineBasicBlock::iterator I,
+ unsigned BaseReg, int64_t Offset) const {
+ MachineInstr &MI = *I;
+ int Off = Offset; // ARM doesn't need the general 64-bit offsets
+ unsigned i = 0;
+
+ while (!MI.getOperand(i).isFI()) {
+ ++i;
+ assert(i < MI.getNumOperands() && "Instr doesn't have FrameIndex operand!");
+ }
+ bool Done = false;
+ Done = rewriteFrameIndex(MI, i, BaseReg, Off, TII);
+ assert (Done && "Unable to resolve frame index!");
+}
+
+/// saveScavengerRegister - Spill the register so it can be used by the
+/// register scavenger. Return true.
+bool
+Thumb1RegisterInfo::saveScavengerRegister(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I,
+ MachineBasicBlock::iterator &UseMI,
+ const TargetRegisterClass *RC,
+ unsigned Reg) const {
+ // Thumb1 can't use the emergency spill slot on the stack because
+ // ldr/str immediate offsets must be positive, and if we're referencing
+ // off the frame pointer (if, for example, there are alloca() calls in
+ // the function, the offset will be negative. Use R12 instead since that's
+ // a call clobbered register that we know won't be used in Thumb1 mode.
+ DebugLoc DL;
+ BuildMI(MBB, I, DL, TII.get(ARM::tMOVtgpr2gpr)).
+ addReg(ARM::R12, RegState::Define).addReg(Reg, RegState::Kill);
+
+ // The UseMI is where we would like to restore the register. If there's
+ // interference with R12 before then, however, we'll need to restore it
+ // before that instead and adjust the UseMI.
+ bool done = false;
+ for (MachineBasicBlock::iterator II = I; !done && II != UseMI ; ++II) {
+ if (II->isDebugValue())
+ continue;
+ // If this instruction affects R12, adjust our restore point.
+ for (unsigned i = 0, e = II->getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = II->getOperand(i);
+ if (!MO.isReg() || MO.isUndef() || !MO.getReg() ||
+ TargetRegisterInfo::isVirtualRegister(MO.getReg()))
+ continue;
+ if (MO.getReg() == ARM::R12) {
+ UseMI = II;
+ done = true;
+ break;
+ }
+ }
+ }
+ // Restore the register from R12
+ BuildMI(MBB, UseMI, DL, TII.get(ARM::tMOVgpr2tgpr)).
+ addReg(Reg, RegState::Define).addReg(ARM::R12, RegState::Kill);
+
+ return true;
+}
+
+void
+Thumb1RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
+ int SPAdj, RegScavenger *RS) const {
+ unsigned VReg = 0;
+ unsigned i = 0;
+ MachineInstr &MI = *II;
+ MachineBasicBlock &MBB = *MI.getParent();
+ MachineFunction &MF = *MBB.getParent();
+ ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+ DebugLoc dl = MI.getDebugLoc();
+
+ while (!MI.getOperand(i).isFI()) {
+ ++i;
+ assert(i < MI.getNumOperands() && "Instr doesn't have FrameIndex operand!");
+ }
+
+ unsigned FrameReg = ARM::SP;
+ int FrameIndex = MI.getOperand(i).getIndex();
+ int Offset = MF.getFrameInfo()->getObjectOffset(FrameIndex) +
+ MF.getFrameInfo()->getStackSize() + SPAdj;
+
+ if (AFI->isGPRCalleeSavedArea1Frame(FrameIndex))
+ Offset -= AFI->getGPRCalleeSavedArea1Offset();
+ else if (AFI->isGPRCalleeSavedArea2Frame(FrameIndex))
+ Offset -= AFI->getGPRCalleeSavedArea2Offset();
+ else if (MF.getFrameInfo()->hasVarSizedObjects()) {
+ assert(SPAdj == 0 && hasFP(MF) && "Unexpected");
+ // There are alloca()'s in this function, must reference off the frame
+ // pointer or base pointer instead.
+ if (!hasBasePointer(MF)) {
+ FrameReg = getFrameRegister(MF);
+ Offset -= AFI->getFramePtrSpillOffset();
+ } else
+ FrameReg = BasePtr;
+ }
+
+ // Special handling of dbg_value instructions.
+ if (MI.isDebugValue()) {
+ MI.getOperand(i). ChangeToRegister(FrameReg, false /*isDef*/);
+ MI.getOperand(i+1).ChangeToImmediate(Offset);
+ return;
+ }
+
+ // Modify MI as necessary to handle as much of 'Offset' as possible
+ assert(AFI->isThumbFunction() &&
+ "This eliminateFrameIndex only supports Thumb1!");
+ if (rewriteFrameIndex(MI, i, FrameReg, Offset, TII))
+ return;
// If we get here, the immediate doesn't fit into the instruction. We folded
// as much as possible above, handle the rest, providing a register that is
// SP+LargeImm.
assert(Offset && "This code isn't needed if offset already handled!");
+ unsigned Opcode = MI.getOpcode();
+ const TargetInstrDesc &Desc = MI.getDesc();
+
// Remove predicate first.
int PIdx = MI.findFirstPredOperandIdx();
if (PIdx != -1)
@@ -637,11 +664,7 @@ Thumb1RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
MI.addOperand(MachineOperand::CreateReg(0, false));
} else if (Desc.mayStore()) {
VReg = MF.getRegInfo().createVirtualRegister(ARM::tGPRRegisterClass);
- assert (Value && "Frame index virtual allocated, but Value arg is NULL!");
bool UseRR = false;
- bool TrackVReg = true;
- Value->first = FrameReg; // use the frame register as a kind indicator
- Value->second = Offset;
if (Opcode == ARM::tSpill) {
if (FrameReg == ARM::SP)
@@ -650,7 +673,6 @@ Thumb1RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
else {
emitLoadConstPool(MBB, II, dl, VReg, 0, Offset);
UseRR = true;
- TrackVReg = false;
}
} else
emitThumbRegPlusImmediate(MBB, II, VReg, FrameReg, Offset, TII,
@@ -661,8 +683,6 @@ Thumb1RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
MI.addOperand(MachineOperand::CreateReg(FrameReg, false));
else // tSTR has an extra register operand.
MI.addOperand(MachineOperand::CreateReg(0, false));
- if (!ReuseFrameIndexVals || !TrackVReg)
- VReg = 0;
} else
assert(false && "Unexpected opcode!");
@@ -671,7 +691,6 @@ Thumb1RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
MachineInstrBuilder MIB(&MI);
AddDefaultPred(MIB);
}
- return VReg;
}
void Thumb1RegisterInfo::emitPrologue(MachineFunction &MF) const {
@@ -742,11 +761,11 @@ void Thumb1RegisterInfo::emitPrologue(MachineFunction &MF) const {
dl = MBBI->getDebugLoc();
}
- // Darwin ABI requires FP to point to the stack slot that contains the
- // previous FP.
- if (STI.isTargetDarwin() || hasFP(MF)) {
+ // Adjust FP so it point to the stack slot that contains the previous FP.
+ if (hasFP(MF)) {
BuildMI(MBB, MBBI, dl, TII.get(ARM::tADDrSPi), FramePtr)
.addFrameIndex(FramePtrSpillFI).addImm(0);
+ AFI->setShouldRestoreSPFromFP(true);
}
// Determine starting offsets of spill areas.
@@ -764,14 +783,20 @@ void Thumb1RegisterInfo::emitPrologue(MachineFunction &MF) const {
emitSPUpdate(MBB, MBBI, TII, dl, *this, -NumBytes);
}
- if (STI.isTargetELF() && hasFP(MF)) {
+ if (STI.isTargetELF() && hasFP(MF))
MFI->setOffsetAdjustment(MFI->getOffsetAdjustment() -
AFI->getFramePtrSpillOffset());
- }
AFI->setGPRCalleeSavedArea1Size(GPRCS1Size);
AFI->setGPRCalleeSavedArea2Size(GPRCS2Size);
AFI->setDPRCalleeSavedAreaSize(DPRCSSize);
+
+ // If we need a base pointer, set it up here. It's whatever the value
+ // of the stack pointer is at this point. Any variable size objects
+ // will be allocated after this, so we can still use the base pointer
+ // to reference locals.
+ if (hasBasePointer(MF))
+ BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVgpr2gpr), BasePtr).addReg(ARM::SP);
}
static bool isCalleeSavedRegister(unsigned Reg, const unsigned *CSRegs) {
@@ -828,7 +853,7 @@ void Thumb1RegisterInfo::emitEpilogue(MachineFunction &MF,
AFI->getGPRCalleeSavedArea2Size() +
AFI->getDPRCalleeSavedAreaSize());
- if (hasFP(MF)) {
+ if (AFI->shouldRestoreSPFromFP()) {
NumBytes = AFI->getFramePtrSpillOffset() - NumBytes;
// Reset SP based on frame pointer only if the stack frame extends beyond
// frame pointer stack slot or target is ELF and the function has FP.
diff --git a/lib/Target/ARM/Thumb1RegisterInfo.h b/lib/Target/ARM/Thumb1RegisterInfo.h
index 9a0308afa20c..c578054a5d71 100644
--- a/lib/Target/ARM/Thumb1RegisterInfo.h
+++ b/lib/Target/ARM/Thumb1RegisterInfo.h
@@ -38,27 +38,27 @@ public:
unsigned PredReg = 0) const;
/// Code Generation virtual methods...
- bool hasReservedCallFrame(MachineFunction &MF) const;
+ bool hasReservedCallFrame(const MachineFunction &MF) const;
void eliminateCallFramePseudoInstr(MachineFunction &MF,
MachineBasicBlock &MBB,
MachineBasicBlock::iterator I) const;
- // rewrite MI to access 'Offset' bytes from the FP. Return the offset that
- // could not be handled directly in MI.
- int rewriteFrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
- unsigned FrameReg, int Offset,
- unsigned MOVOpc, unsigned ADDriOpc,
- unsigned SUBriOpc) const;
-
+ // rewrite MI to access 'Offset' bytes from the FP. Update Offset to be
+ // however much remains to be handled. Return 'true' if no further
+ // work is required.
+ bool rewriteFrameIndex(MachineBasicBlock::iterator II, unsigned FrameRegIdx,
+ unsigned FrameReg, int &Offset,
+ const ARMBaseInstrInfo &TII) const;
+ void resolveFrameIndex(MachineBasicBlock::iterator I,
+ unsigned BaseReg, int64_t Offset) const;
bool saveScavengerRegister(MachineBasicBlock &MBB,
MachineBasicBlock::iterator I,
MachineBasicBlock::iterator &UseMI,
const TargetRegisterClass *RC,
unsigned Reg) const;
- unsigned eliminateFrameIndex(MachineBasicBlock::iterator II,
- int SPAdj, FrameIndexValue *Value = NULL,
- RegScavenger *RS = NULL) const;
+ void eliminateFrameIndex(MachineBasicBlock::iterator II,
+ int SPAdj, RegScavenger *RS = NULL) const;
void emitPrologue(MachineFunction &MF) const;
void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const;
diff --git a/lib/Target/ARM/Thumb2ITBlockPass.cpp b/lib/Target/ARM/Thumb2ITBlockPass.cpp
index cd15bbed9f23..45e693744b80 100644
--- a/lib/Target/ARM/Thumb2ITBlockPass.cpp
+++ b/lib/Target/ARM/Thumb2ITBlockPass.cpp
@@ -27,7 +27,7 @@ namespace {
public:
static char ID;
- Thumb2ITBlockPass() : MachineFunctionPass(&ID) {}
+ Thumb2ITBlockPass() : MachineFunctionPass(ID) {}
const Thumb2InstrInfo *TII;
const TargetRegisterInfo *TRI;
@@ -91,35 +91,53 @@ static void TrackDefUses(MachineInstr *MI,
}
}
+static bool isCopy(MachineInstr *MI) {
+ switch (MI->getOpcode()) {
+ default:
+ return false;
+ case ARM::MOVr:
+ case ARM::MOVr_TC:
+ case ARM::tMOVr:
+ case ARM::tMOVgpr2tgpr:
+ case ARM::tMOVtgpr2gpr:
+ case ARM::tMOVgpr2gpr:
+ case ARM::t2MOVr:
+ return true;
+ }
+}
+
bool
Thumb2ITBlockPass::MoveCopyOutOfITBlock(MachineInstr *MI,
ARMCC::CondCodes CC, ARMCC::CondCodes OCC,
SmallSet<unsigned, 4> &Defs,
SmallSet<unsigned, 4> &Uses) {
- unsigned SrcReg, DstReg, SrcSubIdx, DstSubIdx;
- if (TII->isMoveInstr(*MI, SrcReg, DstReg, SrcSubIdx, DstSubIdx)) {
- assert(SrcSubIdx == 0 && DstSubIdx == 0 &&
- "Sub-register indices still around?");
- // llvm models select's as two-address instructions. That means a copy
- // is inserted before a t2MOVccr, etc. If the copy is scheduled in
- // between selects we would end up creating multiple IT blocks.
-
- // First check if it's safe to move it.
- if (Uses.count(DstReg) || Defs.count(SrcReg))
- return false;
-
- // Then peek at the next instruction to see if it's predicated on CC or OCC.
- // If not, then there is nothing to be gained by moving the copy.
- MachineBasicBlock::iterator I = MI; ++I;
- MachineBasicBlock::iterator E = MI->getParent()->end();
- while (I != E && I->isDebugValue())
- ++I;
- if (I != E) {
- unsigned NPredReg = 0;
- ARMCC::CondCodes NCC = llvm::getITInstrPredicate(I, NPredReg);
- if (NCC == CC || NCC == OCC)
- return true;
- }
+ if (!isCopy(MI))
+ return false;
+ // llvm models select's as two-address instructions. That means a copy
+ // is inserted before a t2MOVccr, etc. If the copy is scheduled in
+ // between selects we would end up creating multiple IT blocks.
+ assert(MI->getOperand(0).getSubReg() == 0 &&
+ MI->getOperand(1).getSubReg() == 0 &&
+ "Sub-register indices still around?");
+
+ unsigned DstReg = MI->getOperand(0).getReg();
+ unsigned SrcReg = MI->getOperand(1).getReg();
+
+ // First check if it's safe to move it.
+ if (Uses.count(DstReg) || Defs.count(SrcReg))
+ return false;
+
+ // Then peek at the next instruction to see if it's predicated on CC or OCC.
+ // If not, then there is nothing to be gained by moving the copy.
+ MachineBasicBlock::iterator I = MI; ++I;
+ MachineBasicBlock::iterator E = MI->getParent()->end();
+ while (I != E && I->isDebugValue())
+ ++I;
+ if (I != E) {
+ unsigned NPredReg = 0;
+ ARMCC::CondCodes NCC = llvm::getITInstrPredicate(I, NPredReg);
+ if (NCC == CC || NCC == OCC)
+ return true;
}
return false;
}
diff --git a/lib/Target/ARM/Thumb2InstrInfo.cpp b/lib/Target/ARM/Thumb2InstrInfo.cpp
index ee517279c9d7..442f41da8a2d 100644
--- a/lib/Target/ARM/Thumb2InstrInfo.cpp
+++ b/lib/Target/ARM/Thumb2InstrInfo.cpp
@@ -147,8 +147,8 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
unsigned SrcReg, bool isKill, int FI,
const TargetRegisterClass *RC,
const TargetRegisterInfo *TRI) const {
- if (RC == ARM::GPRRegisterClass || RC == ARM::tGPRRegisterClass ||
- RC == ARM::tcGPRRegisterClass) {
+ if (RC == ARM::GPRRegisterClass || RC == ARM::tGPRRegisterClass ||
+ RC == ARM::tcGPRRegisterClass || RC == ARM::rGPRRegisterClass) {
DebugLoc DL;
if (I != MBB.end()) DL = I->getDebugLoc();
@@ -173,8 +173,8 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
unsigned DestReg, int FI,
const TargetRegisterClass *RC,
const TargetRegisterInfo *TRI) const {
- if (RC == ARM::GPRRegisterClass || RC == ARM::tGPRRegisterClass ||
- RC == ARM::tcGPRRegisterClass) {
+ if (RC == ARM::GPRRegisterClass || RC == ARM::tGPRRegisterClass ||
+ RC == ARM::tcGPRRegisterClass || RC == ARM::rGPRRegisterClass) {
DebugLoc DL;
if (I != MBB.end()) DL = I->getDebugLoc();
diff --git a/lib/Target/ARM/Thumb2SizeReduction.cpp b/lib/Target/ARM/Thumb2SizeReduction.cpp
index ba392f36d946..0c3962dd123d 100644
--- a/lib/Target/ARM/Thumb2SizeReduction.cpp
+++ b/lib/Target/ARM/Thumb2SizeReduction.cpp
@@ -173,7 +173,7 @@ namespace {
char Thumb2SizeReduce::ID = 0;
}
-Thumb2SizeReduce::Thumb2SizeReduce() : MachineFunctionPass(&ID) {
+Thumb2SizeReduce::Thumb2SizeReduce() : MachineFunctionPass(ID) {
for (unsigned i = 0, e = array_lengthof(ReduceTable); i != e; ++i) {
unsigned FromOpc = ReduceTable[i].WideOpc;
if (!ReduceOpcodeMap.insert(std::make_pair(FromOpc, i)).second)
@@ -315,6 +315,18 @@ Thumb2SizeReduce::ReduceLoadStore(MachineBasicBlock &MBB, MachineInstr *MI,
ARM_AM::AMSubMode Mode = ARM_AM::getAM4SubMode(MI->getOperand(1).getImm());
if (!isARMLowRegister(BaseReg) || Mode != ARM_AM::ia)
return false;
+ // For the non-writeback version (this one), the base register must be
+ // one of the registers being loaded.
+ bool isOK = false;
+ for (unsigned i = 4; i < MI->getNumOperands(); ++i) {
+ if (MI->getOperand(i).getReg() == BaseReg) {
+ isOK = true;
+ break;
+ }
+ }
+ if (!isOK)
+ return false;
+
OpNum = 0;
isLdStMul = true;
break;
diff --git a/lib/Target/Alpha/AlphaBranchSelector.cpp b/lib/Target/Alpha/AlphaBranchSelector.cpp
index 001656e0121a..376811709536 100644
--- a/lib/Target/Alpha/AlphaBranchSelector.cpp
+++ b/lib/Target/Alpha/AlphaBranchSelector.cpp
@@ -22,7 +22,7 @@ using namespace llvm;
namespace {
struct AlphaBSel : public MachineFunctionPass {
static char ID;
- AlphaBSel() : MachineFunctionPass(&ID) {}
+ AlphaBSel() : MachineFunctionPass(ID) {}
virtual bool runOnMachineFunction(MachineFunction &Fn);
diff --git a/lib/Target/Alpha/AlphaCodeEmitter.cpp b/lib/Target/Alpha/AlphaCodeEmitter.cpp
index a6c6f52704f6..3aec07035d74 100644
--- a/lib/Target/Alpha/AlphaCodeEmitter.cpp
+++ b/lib/Target/Alpha/AlphaCodeEmitter.cpp
@@ -34,7 +34,7 @@ namespace {
public:
static char ID;
- AlphaCodeEmitter(JITCodeEmitter &mce) : MachineFunctionPass(&ID),
+ AlphaCodeEmitter(JITCodeEmitter &mce) : MachineFunctionPass(ID),
MCE(mce) {}
/// getBinaryCodeForInstr - This function, generated by the
diff --git a/lib/Target/Alpha/AlphaISelDAGToDAG.cpp b/lib/Target/Alpha/AlphaISelDAGToDAG.cpp
index d526dc0827b2..d197bd15ef9c 100644
--- a/lib/Target/Alpha/AlphaISelDAGToDAG.cpp
+++ b/lib/Target/Alpha/AlphaISelDAGToDAG.cpp
@@ -113,8 +113,8 @@ namespace {
static uint64_t getNearPower2(uint64_t x) {
if (!x) return 0;
unsigned at = CountLeadingZeros_64(x);
- uint64_t complow = 1 << (63 - at);
- uint64_t comphigh = 1 << (64 - at);
+ uint64_t complow = 1ULL << (63 - at);
+ uint64_t comphigh = 1ULL << (64 - at);
//cerr << x << ":" << complow << ":" << comphigh << "\n";
if (abs64(complow - x) <= abs64(comphigh - x))
return complow;
diff --git a/lib/Target/Alpha/AlphaInstrInfo.cpp b/lib/Target/Alpha/AlphaInstrInfo.cpp
index ad625a269417..5a2f5610fdb4 100644
--- a/lib/Target/Alpha/AlphaInstrInfo.cpp
+++ b/lib/Target/Alpha/AlphaInstrInfo.cpp
@@ -27,32 +27,6 @@ AlphaInstrInfo::AlphaInstrInfo()
RI(*this) { }
-bool AlphaInstrInfo::isMoveInstr(const MachineInstr& MI,
- unsigned& sourceReg, unsigned& destReg,
- unsigned& SrcSR, unsigned& DstSR) const {
- unsigned oc = MI.getOpcode();
- if (oc == Alpha::BISr ||
- oc == Alpha::CPYSS ||
- oc == Alpha::CPYST ||
- oc == Alpha::CPYSSt ||
- oc == Alpha::CPYSTs) {
- // or r1, r2, r2
- // cpys(s|t) r1 r2 r2
- assert(MI.getNumOperands() >= 3 &&
- MI.getOperand(0).isReg() &&
- MI.getOperand(1).isReg() &&
- MI.getOperand(2).isReg() &&
- "invalid Alpha BIS instruction!");
- if (MI.getOperand(1).getReg() == MI.getOperand(2).getReg()) {
- sourceReg = MI.getOperand(1).getReg();
- destReg = MI.getOperand(0).getReg();
- SrcSR = DstSR = 0;
- return true;
- }
- }
- return false;
-}
-
unsigned
AlphaInstrInfo::isLoadFromStackSlot(const MachineInstr *MI,
int &FrameIndex) const {
diff --git a/lib/Target/Alpha/AlphaInstrInfo.h b/lib/Target/Alpha/AlphaInstrInfo.h
index e20e8323b64e..ee6077a4a01a 100644
--- a/lib/Target/Alpha/AlphaInstrInfo.h
+++ b/lib/Target/Alpha/AlphaInstrInfo.h
@@ -30,12 +30,6 @@ public:
///
virtual const AlphaRegisterInfo &getRegisterInfo() const { return RI; }
- /// Return true if the instruction is a register to register move and return
- /// the source and dest operands and their sub-register indices by reference.
- virtual bool isMoveInstr(const MachineInstr &MI,
- unsigned &SrcReg, unsigned &DstReg,
- unsigned &SrcSubIdx, unsigned &DstSubIdx) const;
-
virtual unsigned isLoadFromStackSlot(const MachineInstr *MI,
int &FrameIndex) const;
virtual unsigned isStoreToStackSlot(const MachineInstr *MI,
diff --git a/lib/Target/Alpha/AlphaLLRP.cpp b/lib/Target/Alpha/AlphaLLRP.cpp
index 34be470f03e3..85fbfd1affe2 100644
--- a/lib/Target/Alpha/AlphaLLRP.cpp
+++ b/lib/Target/Alpha/AlphaLLRP.cpp
@@ -39,7 +39,7 @@ namespace {
static char ID;
AlphaLLRPPass(AlphaTargetMachine &tm)
- : MachineFunctionPass(&ID), TM(tm) { }
+ : MachineFunctionPass(ID), TM(tm) { }
virtual const char *getPassName() const {
return "Alpha NOP inserter";
diff --git a/lib/Target/Alpha/AlphaRegisterInfo.cpp b/lib/Target/Alpha/AlphaRegisterInfo.cpp
index dc9d935ec047..327ddb4d9a72 100644
--- a/lib/Target/Alpha/AlphaRegisterInfo.cpp
+++ b/lib/Target/Alpha/AlphaRegisterInfo.cpp
@@ -137,10 +137,9 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
//variable locals
//<- SP
-unsigned
+void
AlphaRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
- int SPAdj, FrameIndexValue *Value,
- RegScavenger *RS) const {
+ int SPAdj, RegScavenger *RS) const {
assert(SPAdj == 0 && "Unexpected");
unsigned i = 0;
@@ -185,7 +184,6 @@ AlphaRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
} else {
MI.getOperand(i).ChangeToImmediate(Offset);
}
- return 0;
}
diff --git a/lib/Target/Alpha/AlphaRegisterInfo.h b/lib/Target/Alpha/AlphaRegisterInfo.h
index f9fd87a63737..b164979a6311 100644
--- a/lib/Target/Alpha/AlphaRegisterInfo.h
+++ b/lib/Target/Alpha/AlphaRegisterInfo.h
@@ -38,9 +38,8 @@ struct AlphaRegisterInfo : public AlphaGenRegisterInfo {
MachineBasicBlock &MBB,
MachineBasicBlock::iterator I) const;
- unsigned eliminateFrameIndex(MachineBasicBlock::iterator II,
- int SPAdj, FrameIndexValue *Value = NULL,
- RegScavenger *RS = NULL) const;
+ void eliminateFrameIndex(MachineBasicBlock::iterator II,
+ int SPAdj, RegScavenger *RS = NULL) const;
//void processFunctionBeforeFrameFinalized(MachineFunction &MF) const;
diff --git a/lib/Target/Alpha/AsmPrinter/AlphaAsmPrinter.cpp b/lib/Target/Alpha/AsmPrinter/AlphaAsmPrinter.cpp
index 9f4aff6fd5f5..5428cb96173b 100644
--- a/lib/Target/Alpha/AsmPrinter/AlphaAsmPrinter.cpp
+++ b/lib/Target/Alpha/AsmPrinter/AlphaAsmPrinter.cpp
@@ -53,8 +53,6 @@ namespace {
void printOp(const MachineOperand &MO, raw_ostream &O);
void printOperand(const MachineInstr *MI, int opNum, raw_ostream &O);
- void printBaseOffsetPair(const MachineInstr *MI, int i, raw_ostream &O,
- bool brackets=true);
virtual void EmitFunctionBodyStart();
virtual void EmitFunctionBodyEnd();
void EmitStartOfAsmFile(Module &M);
diff --git a/lib/Target/Blackfin/BlackfinInstrInfo.cpp b/lib/Target/Blackfin/BlackfinInstrInfo.cpp
index a74d42d59549..e50d57a31b6e 100644
--- a/lib/Target/Blackfin/BlackfinInstrInfo.cpp
+++ b/lib/Target/Blackfin/BlackfinInstrInfo.cpp
@@ -28,34 +28,6 @@ BlackfinInstrInfo::BlackfinInstrInfo(BlackfinSubtarget &ST)
RI(ST, *this),
Subtarget(ST) {}
-/// Return true if the instruction is a register to register move and
-/// leave the source and dest operands in the passed parameters.
-bool BlackfinInstrInfo::isMoveInstr(const MachineInstr &MI,
- unsigned &SrcReg,
- unsigned &DstReg,
- unsigned &SrcSR,
- unsigned &DstSR) const {
- SrcSR = DstSR = 0; // No sub-registers.
- switch (MI.getOpcode()) {
- case BF::MOVE:
- case BF::MOVE_ncccc:
- case BF::MOVE_ccncc:
- case BF::MOVECC_zext:
- case BF::MOVECC_nz:
- DstReg = MI.getOperand(0).getReg();
- SrcReg = MI.getOperand(1).getReg();
- return true;
- case BF::SLL16i:
- if (MI.getOperand(2).getImm()!=0)
- return false;
- DstReg = MI.getOperand(0).getReg();
- SrcReg = MI.getOperand(1).getReg();
- return true;
- default:
- return false;
- }
-}
-
/// isLoadFromStackSlot - If the specified machine instruction is a direct
/// load from a stack slot, return the virtual or physical register number of
/// the destination along with the FrameIndex of the loaded stack slot. If
diff --git a/lib/Target/Blackfin/BlackfinInstrInfo.h b/lib/Target/Blackfin/BlackfinInstrInfo.h
index 6c3591707269..fdc1029da588 100644
--- a/lib/Target/Blackfin/BlackfinInstrInfo.h
+++ b/lib/Target/Blackfin/BlackfinInstrInfo.h
@@ -30,10 +30,6 @@ namespace llvm {
/// always be able to get register info as well (through this method).
virtual const BlackfinRegisterInfo &getRegisterInfo() const { return RI; }
- virtual bool isMoveInstr(const MachineInstr &MI,
- unsigned &SrcReg, unsigned &DstReg,
- unsigned &SrcSubIdx, unsigned &DstSubIdx) const;
-
virtual unsigned isLoadFromStackSlot(const MachineInstr *MI,
int &FrameIndex) const;
diff --git a/lib/Target/Blackfin/BlackfinRegisterInfo.cpp b/lib/Target/Blackfin/BlackfinRegisterInfo.cpp
index 06e95de1587f..a51831263e90 100644
--- a/lib/Target/Blackfin/BlackfinRegisterInfo.cpp
+++ b/lib/Target/Blackfin/BlackfinRegisterInfo.cpp
@@ -190,10 +190,9 @@ static unsigned findScratchRegister(MachineBasicBlock::iterator II,
return Reg;
}
-unsigned
+void
BlackfinRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
- int SPAdj, FrameIndexValue *Value,
- RegScavenger *RS) const {
+ int SPAdj, RegScavenger *RS) const {
MachineInstr &MI = *II;
MachineBasicBlock &MBB = *MI.getParent();
MachineFunction &MF = *MBB.getParent();
@@ -230,20 +229,20 @@ BlackfinRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
MI.setDesc(TII.get(isStore
? BF::STORE32p_uimm6m4
: BF::LOAD32p_uimm6m4));
- return 0;
+ return;
}
if (BaseReg == BF::FP && isUInt<7>(-Offset)) {
MI.setDesc(TII.get(isStore
? BF::STORE32fp_nimm7m4
: BF::LOAD32fp_nimm7m4));
MI.getOperand(FIPos+1).setImm(-Offset);
- return 0;
+ return;
}
if (isInt<18>(Offset)) {
MI.setDesc(TII.get(isStore
? BF::STORE32p_imm18m4
: BF::LOAD32p_imm18m4));
- return 0;
+ return;
}
// Use RegScavenger to calculate proper offset...
MI.dump();
@@ -328,7 +327,6 @@ BlackfinRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
llvm_unreachable("Cannot eliminate frame index");
break;
}
- return 0;
}
void BlackfinRegisterInfo::
@@ -344,10 +342,6 @@ processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
}
}
-void BlackfinRegisterInfo::
-processFunctionBeforeFrameFinalized(MachineFunction &MF) const {
-}
-
// Emit a prologue that sets up a stack frame.
// On function entry, R0-R2 and P0 may hold arguments.
// R3, P1, and P2 may be used as scratch registers
diff --git a/lib/Target/Blackfin/BlackfinRegisterInfo.h b/lib/Target/Blackfin/BlackfinRegisterInfo.h
index ead0b4a73c83..bb83c34f8003 100644
--- a/lib/Target/Blackfin/BlackfinRegisterInfo.h
+++ b/lib/Target/Blackfin/BlackfinRegisterInfo.h
@@ -51,15 +51,12 @@ namespace llvm {
MachineBasicBlock &MBB,
MachineBasicBlock::iterator I) const;
- unsigned eliminateFrameIndex(MachineBasicBlock::iterator II,
- int SPAdj, FrameIndexValue *Value = NULL,
- RegScavenger *RS = NULL) const;
+ void eliminateFrameIndex(MachineBasicBlock::iterator II,
+ int SPAdj, RegScavenger *RS = NULL) const;
void processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
RegScavenger *RS) const;
- void processFunctionBeforeFrameFinalized(MachineFunction &MF) const;
-
void emitPrologue(MachineFunction &MF) const;
void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const;
diff --git a/lib/Target/CBackend/CBackend.cpp b/lib/Target/CBackend/CBackend.cpp
index e8d8474b5be8..270fff6064ad 100644
--- a/lib/Target/CBackend/CBackend.cpp
+++ b/lib/Target/CBackend/CBackend.cpp
@@ -73,7 +73,7 @@ namespace {
public:
static char ID;
CBackendNameAllUsedStructsAndMergeFunctions()
- : ModulePass(&ID) {}
+ : ModulePass(ID) {}
void getAnalysisUsage(AnalysisUsage &AU) const {
AU.addRequired<FindUsedTypes>();
}
@@ -110,7 +110,7 @@ namespace {
public:
static char ID;
explicit CWriter(formatted_raw_ostream &o)
- : FunctionPass(&ID), Out(o), IL(0), Mang(0), LI(0),
+ : FunctionPass(ID), Out(o), IL(0), Mang(0), LI(0),
TheModule(0), TAsm(0), TCtx(0), TD(0), OpaqueCounter(0),
NextAnonValueNumber(0) {
FPCounter = 0;
@@ -199,7 +199,6 @@ namespace {
void lowerIntrinsics(Function &F);
- void printModule(Module *M);
void printModuleTypes(const TypeSymbolTable &ST);
void printContainedStructs(const Type *Ty, std::set<const Type *> &);
void printFloatingPointConstants(Function &F);
@@ -1300,6 +1299,13 @@ void CWriter::printConstantWithCast(Constant* CPV, unsigned Opcode) {
}
std::string CWriter::GetValueName(const Value *Operand) {
+
+ // Resolve potential alias.
+ if (const GlobalAlias *GA = dyn_cast<GlobalAlias>(Operand)) {
+ if (const Value *V = GA->resolveAliasedGlobal(false))
+ Operand = V;
+ }
+
// Mangle globals with the standard mangler interface for LLC compatibility.
if (const GlobalValue *GV = dyn_cast<GlobalValue>(Operand)) {
SmallString<128> Str;
diff --git a/lib/Target/CellSPU/SPUCallingConv.td b/lib/Target/CellSPU/SPUCallingConv.td
index ec2f663908f6..04fa2ae866d6 100644
--- a/lib/Target/CellSPU/SPUCallingConv.td
+++ b/lib/Target/CellSPU/SPUCallingConv.td
@@ -1,4 +1,4 @@
-//===- SPUCallingConv.td - Calling Conventions for CellSPU ------*- C++ -*-===//
+//===- SPUCallingConv.td - Calling Conventions for CellSPU -*- tablegen -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -19,16 +19,17 @@ class CCIfSubtarget<string F, CCAction A>
// Return Value Calling Convention
//===----------------------------------------------------------------------===//
-// Return-value convention for Cell SPU: Everything can be passed back via $3:
+// Return-value convention for Cell SPU: return value to be passed in reg 3-74
def RetCC_SPU : CallingConv<[
- CCIfType<[i8], CCAssignToReg<[R3]>>,
- CCIfType<[i16], CCAssignToReg<[R3]>>,
- CCIfType<[i32], CCAssignToReg<[R3]>>,
- CCIfType<[i64], CCAssignToReg<[R3]>>,
- CCIfType<[i128], CCAssignToReg<[R3]>>,
- CCIfType<[f32, f64], CCAssignToReg<[R3]>>,
- CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], CCAssignToReg<[R3]>>,
- CCIfType<[v2i32], CCAssignToReg<[R3]>>
+ CCIfType<[i8,i16,i32,i64,i128,f32,f64,v16i8,v8i16,v4i32,v2i64,v4f32,v2f64],
+ CCAssignToReg<[R3, R4, R5, R6, R7, R8, R9, R10, R11,
+ R12, R13, R14, R15, R16, R17, R18, R19, R20,
+ R21, R22, R23, R24, R25, R26, R27, R28, R29,
+ R30, R31, R32, R33, R34, R35, R36, R37, R38,
+ R39, R40, R41, R42, R43, R44, R45, R46, R47,
+ R48, R49, R50, R51, R52, R53, R54, R55, R56,
+ R57, R58, R59, R60, R61, R62, R63, R64, R65,
+ R66, R67, R68, R69, R70, R71, R72, R73, R74]>>
]>;
@@ -45,8 +46,7 @@ def CCC_SPU : CallingConv<[
R39, R40, R41, R42, R43, R44, R45, R46, R47,
R48, R49, R50, R51, R52, R53, R54, R55, R56,
R57, R58, R59, R60, R61, R62, R63, R64, R65,
- R66, R67, R68, R69, R70, R71, R72, R73, R74,
- R75, R76, R77, R78, R79]>>,
+ R66, R67, R68, R69, R70, R71, R72, R73, R74]>>,
// Integer/FP values get stored in stack slots that are 8 bytes in size and
// 8-byte aligned if there are no more registers to hold them.
CCIfType<[i32, i64, f32, f64], CCAssignToStack<8, 8>>,
diff --git a/lib/Target/CellSPU/SPUISelDAGToDAG.cpp b/lib/Target/CellSPU/SPUISelDAGToDAG.cpp
index 9b8c2ddd0635..2f1598441f5a 100644
--- a/lib/Target/CellSPU/SPUISelDAGToDAG.cpp
+++ b/lib/Target/CellSPU/SPUISelDAGToDAG.cpp
@@ -41,13 +41,6 @@ using namespace llvm;
namespace {
//! ConstantSDNode predicate for i32 sign-extended, 10-bit immediates
bool
- isI64IntS10Immediate(ConstantSDNode *CN)
- {
- return isInt<10>(CN->getSExtValue());
- }
-
- //! ConstantSDNode predicate for i32 sign-extended, 10-bit immediates
- bool
isI32IntS10Immediate(ConstantSDNode *CN)
{
return isInt<10>(CN->getSExtValue());
@@ -67,14 +60,6 @@ namespace {
return isInt<10>(CN->getSExtValue());
}
- //! SDNode predicate for i16 sign-extended, 10-bit immediate values
- bool
- isI16IntS10Immediate(SDNode *N)
- {
- ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N);
- return (CN != 0 && isI16IntS10Immediate(CN));
- }
-
//! ConstantSDNode predicate for i16 unsigned 10-bit immediate values
bool
isI16IntU10Immediate(ConstantSDNode *CN)
@@ -82,14 +67,6 @@ namespace {
return isUInt<10>((short) CN->getZExtValue());
}
- //! SDNode predicate for i16 sign-extended, 10-bit immediate values
- bool
- isI16IntU10Immediate(SDNode *N)
- {
- return (N->getOpcode() == ISD::Constant
- && isI16IntU10Immediate(cast<ConstantSDNode>(N)));
- }
-
//! ConstantSDNode predicate for signed 16-bit values
/*!
\arg CN The constant SelectionDAG node holding the value
@@ -119,14 +96,6 @@ namespace {
return false;
}
- //! SDNode predicate for signed 16-bit values.
- bool
- isIntS16Immediate(SDNode *N, short &Imm)
- {
- return (N->getOpcode() == ISD::Constant
- && isIntS16Immediate(cast<ConstantSDNode>(N), Imm));
- }
-
//! ConstantFPSDNode predicate for representing floats as 16-bit sign ext.
static bool
isFPS16Immediate(ConstantFPSDNode *FPN, short &Imm)
@@ -142,16 +111,6 @@ namespace {
return false;
}
- bool
- isHighLow(const SDValue &Op)
- {
- return (Op.getOpcode() == SPUISD::IndirectAddr
- && ((Op.getOperand(0).getOpcode() == SPUISD::Hi
- && Op.getOperand(1).getOpcode() == SPUISD::Lo)
- || (Op.getOperand(0).getOpcode() == SPUISD::Lo
- && Op.getOperand(1).getOpcode() == SPUISD::Hi)));
- }
-
//===------------------------------------------------------------------===//
//! EVT to "useful stuff" mapping structure:
@@ -607,7 +566,8 @@ SPUDAGToDAGISel::DFormAddressPredicate(SDNode *Op, SDValue N, SDValue &Base,
return true;
} else if (Opc == ISD::Register
||Opc == ISD::CopyFromReg
- ||Opc == ISD::UNDEF) {
+ ||Opc == ISD::UNDEF
+ ||Opc == ISD::Constant) {
unsigned OpOpc = Op->getOpcode();
if (OpOpc == ISD::STORE || OpOpc == ISD::LOAD) {
diff --git a/lib/Target/CellSPU/SPUISelLowering.cpp b/lib/Target/CellSPU/SPUISelLowering.cpp
index ece19b9b89f6..46f31899be0c 100644
--- a/lib/Target/CellSPU/SPUISelLowering.cpp
+++ b/lib/Target/CellSPU/SPUISelLowering.cpp
@@ -426,9 +426,6 @@ SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM)
addRegisterClass(MVT::v4f32, SPU::VECREGRegisterClass);
addRegisterClass(MVT::v2f64, SPU::VECREGRegisterClass);
- // "Odd size" vector classes that we're willing to support:
- addRegisterClass(MVT::v2i32, SPU::VECREGRegisterClass);
-
for (unsigned i = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
i <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++i) {
MVT::SimpleValueType VT = (MVT::SimpleValueType)i;
@@ -751,7 +748,6 @@ LowerSTORE(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
if (alignment == 16) {
ConstantSDNode *CN;
-
// Special cases for a known aligned load to simplify the base pointer
// and insertion byte:
if (basePtr.getOpcode() == ISD::ADD
@@ -775,6 +771,9 @@ LowerSTORE(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
insertEltOffs = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
basePtr,
DAG.getConstant(0, PtrVT));
+ basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
+ basePtr,
+ DAG.getConstant(0, PtrVT));
}
} else {
// Unaligned load: must be more pessimistic about addressing modes:
@@ -811,8 +810,8 @@ LowerSTORE(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
DAG.getConstant(0, PtrVT));
}
- // Re-emit as a v16i8 vector load
- alignLoadVec = DAG.getLoad(MVT::v16i8, dl, the_chain, basePtr,
+ // Load the memory to which to store.
+ alignLoadVec = DAG.getLoad(vecVT, dl, the_chain, basePtr,
SN->getSrcValue(), SN->getSrcValueOffset(),
SN->isVolatile(), SN->isNonTemporal(), 16);
@@ -843,10 +842,10 @@ LowerSTORE(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
}
#endif
- SDValue insertEltOp =
- DAG.getNode(SPUISD::SHUFFLE_MASK, dl, vecVT, insertEltOffs);
- SDValue vectorizeOp =
- DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, vecVT, theValue);
+ SDValue insertEltOp = DAG.getNode(SPUISD::SHUFFLE_MASK, dl, vecVT,
+ insertEltOffs);
+ SDValue vectorizeOp = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, vecVT,
+ theValue);
result = DAG.getNode(SPUISD::SHUFB, dl, vecVT,
vectorizeOp, alignLoadVec,
@@ -1325,41 +1324,23 @@ SPUTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
if (Ins.empty())
return Chain;
+ // Now handle the return value(s)
+ SmallVector<CCValAssign, 16> RVLocs;
+ CCState CCRetInfo(CallConv, isVarArg, getTargetMachine(),
+ RVLocs, *DAG.getContext());
+ CCRetInfo.AnalyzeCallResult(Ins, CCC_SPU);
+
+
// If the call has results, copy the values out of the ret val registers.
- switch (Ins[0].VT.getSimpleVT().SimpleTy) {
- default: llvm_unreachable("Unexpected ret value!");
- case MVT::Other: break;
- case MVT::i32:
- if (Ins.size() > 1 && Ins[1].VT == MVT::i32) {
- Chain = DAG.getCopyFromReg(Chain, dl, SPU::R4,
- MVT::i32, InFlag).getValue(1);
- InVals.push_back(Chain.getValue(0));
- Chain = DAG.getCopyFromReg(Chain, dl, SPU::R3, MVT::i32,
- Chain.getValue(2)).getValue(1);
- InVals.push_back(Chain.getValue(0));
- } else {
- Chain = DAG.getCopyFromReg(Chain, dl, SPU::R3, MVT::i32,
- InFlag).getValue(1);
- InVals.push_back(Chain.getValue(0));
- }
- break;
- case MVT::i8:
- case MVT::i16:
- case MVT::i64:
- case MVT::i128:
- case MVT::f32:
- case MVT::f64:
- case MVT::v2f64:
- case MVT::v2i64:
- case MVT::v4f32:
- case MVT::v4i32:
- case MVT::v8i16:
- case MVT::v16i8:
- Chain = DAG.getCopyFromReg(Chain, dl, SPU::R3, Ins[0].VT,
- InFlag).getValue(1);
- InVals.push_back(Chain.getValue(0));
- break;
- }
+ for (unsigned i = 0; i != RVLocs.size(); ++i) {
+ CCValAssign VA = RVLocs[i];
+
+ SDValue Val = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), VA.getLocVT(),
+ InFlag);
+ Chain = Val.getValue(1);
+ InFlag = Val.getValue(2);
+ InVals.push_back(Val);
+ }
return Chain;
}
@@ -1621,10 +1602,6 @@ LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) {
SDValue T = DAG.getConstant(unsigned(SplatBits), VT.getVectorElementType());
return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, T, T, T, T);
}
- case MVT::v2i32: {
- SDValue T = DAG.getConstant(unsigned(SplatBits), VT.getVectorElementType());
- return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, T, T);
- }
case MVT::v2i64: {
return SPU::LowerV2I64Splat(VT, DAG, SplatBits, dl);
}
@@ -1748,11 +1725,12 @@ static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
// If we have a single element being moved from V1 to V2, this can be handled
// using the C*[DX] compute mask instructions, but the vector elements have
- // to be monotonically increasing with one exception element.
+ // to be monotonically increasing with one exception element, and the source
+ // slot of the element to move must be the same as the destination.
EVT VecVT = V1.getValueType();
EVT EltVT = VecVT.getVectorElementType();
unsigned EltsFromV2 = 0;
- unsigned V2Elt = 0;
+ unsigned V2EltOffset = 0;
unsigned V2EltIdx0 = 0;
unsigned CurrElt = 0;
unsigned MaxElts = VecVT.getVectorNumElements();
@@ -1785,9 +1763,13 @@ static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
if (monotonic) {
if (SrcElt >= V2EltIdx0) {
- if (1 >= (++EltsFromV2)) {
- V2Elt = (V2EltIdx0 - SrcElt) << 2;
- }
+ // TODO: optimize for the monotonic case when several consecutive
+ // elements are taken form V2. Do we ever get such a case?
+ if (EltsFromV2 == 0 && CurrElt == (SrcElt - V2EltIdx0))
+ V2EltOffset = (SrcElt - V2EltIdx0) * (EltVT.getSizeInBits()/8);
+ else
+ monotonic = false;
+ ++EltsFromV2;
} else if (CurrElt != SrcElt) {
monotonic = false;
}
@@ -1823,7 +1805,7 @@ static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
// R1 ($sp) is used here only as it is guaranteed to have last bits zero
SDValue Pointer = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
DAG.getRegister(SPU::R1, PtrVT),
- DAG.getConstant(V2Elt, MVT::i32));
+ DAG.getConstant(V2EltOffset, MVT::i32));
SDValue ShufMaskOp = DAG.getNode(SPUISD::SHUFFLE_MASK, dl,
maskVT, Pointer);
@@ -1847,7 +1829,6 @@ static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
for (unsigned j = 0; j < BytesPerElement; ++j)
ResultMask.push_back(DAG.getConstant(SrcElt*BytesPerElement+j,MVT::i8));
}
-
SDValue VPermMask = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v16i8,
&ResultMask[0], ResultMask.size());
return DAG.getNode(SPUISD::SHUFB, dl, V1.getValueType(), V1, V2, VPermMask);
@@ -1997,7 +1978,7 @@ static SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
// Variable index: Rotate the requested element into slot 0, then replicate
// slot 0 across the vector
EVT VecVT = N.getValueType();
- if (!VecVT.isSimple() || !VecVT.isVector() || !VecVT.is128BitVector()) {
+ if (!VecVT.isSimple() || !VecVT.isVector()) {
report_fatal_error("LowerEXTRACT_VECTOR_ELT: Must have a simple, 128-bit"
"vector type!");
}
@@ -2072,21 +2053,25 @@ static SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
SDValue IdxOp = Op.getOperand(2);
DebugLoc dl = Op.getDebugLoc();
EVT VT = Op.getValueType();
+ EVT eltVT = ValOp.getValueType();
// use 0 when the lane to insert to is 'undef'
- int64_t Idx=0;
+ int64_t Offset=0;
if (IdxOp.getOpcode() != ISD::UNDEF) {
ConstantSDNode *CN = cast<ConstantSDNode>(IdxOp);
assert(CN != 0 && "LowerINSERT_VECTOR_ELT: Index is not constant!");
- Idx = (CN->getSExtValue());
+ Offset = (CN->getSExtValue()) * eltVT.getSizeInBits()/8;
}
EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
// Use $sp ($1) because it's always 16-byte aligned and it's available:
SDValue Pointer = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
DAG.getRegister(SPU::R1, PtrVT),
- DAG.getConstant(Idx, PtrVT));
- SDValue ShufMask = DAG.getNode(SPUISD::SHUFFLE_MASK, dl, VT, Pointer);
+ DAG.getConstant(Offset, PtrVT));
+ // widen the mask when dealing with half vectors
+ EVT maskVT = EVT::getVectorVT(*(DAG.getContext()), VT.getVectorElementType(),
+ 128/ VT.getVectorElementType().getSizeInBits());
+ SDValue ShufMask = DAG.getNode(SPUISD::SHUFFLE_MASK, dl, maskVT, Pointer);
SDValue result =
DAG.getNode(SPUISD::SHUFB, dl, VT,
diff --git a/lib/Target/CellSPU/SPUInstrInfo.cpp b/lib/Target/CellSPU/SPUInstrInfo.cpp
index 69aa0887bd77..26d6b4f25ef1 100644
--- a/lib/Target/CellSPU/SPUInstrInfo.cpp
+++ b/lib/Target/CellSPU/SPUInstrInfo.cpp
@@ -54,148 +54,6 @@ SPUInstrInfo::SPUInstrInfo(SPUTargetMachine &tm)
RI(*TM.getSubtargetImpl(), *this)
{ /* NOP */ }
-bool
-SPUInstrInfo::isMoveInstr(const MachineInstr& MI,
- unsigned& sourceReg,
- unsigned& destReg,
- unsigned& SrcSR, unsigned& DstSR) const {
- SrcSR = DstSR = 0; // No sub-registers.
-
- switch (MI.getOpcode()) {
- default:
- break;
- case SPU::ORIv4i32:
- case SPU::ORIr32:
- case SPU::ORHIv8i16:
- case SPU::ORHIr16:
- case SPU::ORHIi8i16:
- case SPU::ORBIv16i8:
- case SPU::ORBIr8:
- case SPU::ORIi16i32:
- case SPU::ORIi8i32:
- case SPU::AHIvec:
- case SPU::AHIr16:
- case SPU::AIv4i32:
- assert(MI.getNumOperands() == 3 &&
- MI.getOperand(0).isReg() &&
- MI.getOperand(1).isReg() &&
- MI.getOperand(2).isImm() &&
- "invalid SPU ORI/ORHI/ORBI/AHI/AI/SFI/SFHI instruction!");
- if (MI.getOperand(2).getImm() == 0) {
- sourceReg = MI.getOperand(1).getReg();
- destReg = MI.getOperand(0).getReg();
- return true;
- }
- break;
- case SPU::AIr32:
- assert(MI.getNumOperands() == 3 &&
- "wrong number of operands to AIr32");
- if (MI.getOperand(0).isReg() &&
- MI.getOperand(1).isReg() &&
- (MI.getOperand(2).isImm() &&
- MI.getOperand(2).getImm() == 0)) {
- sourceReg = MI.getOperand(1).getReg();
- destReg = MI.getOperand(0).getReg();
- return true;
- }
- break;
- case SPU::LRr8:
- case SPU::LRr16:
- case SPU::LRr32:
- case SPU::LRf32:
- case SPU::LRr64:
- case SPU::LRf64:
- case SPU::LRr128:
- case SPU::LRv16i8:
- case SPU::LRv8i16:
- case SPU::LRv4i32:
- case SPU::LRv4f32:
- case SPU::LRv2i64:
- case SPU::LRv2f64:
- case SPU::ORv16i8_i8:
- case SPU::ORv8i16_i16:
- case SPU::ORv4i32_i32:
- case SPU::ORv2i64_i64:
- case SPU::ORv4f32_f32:
- case SPU::ORv2f64_f64:
- case SPU::ORi8_v16i8:
- case SPU::ORi16_v8i16:
- case SPU::ORi32_v4i32:
- case SPU::ORi64_v2i64:
- case SPU::ORf32_v4f32:
- case SPU::ORf64_v2f64:
-/*
- case SPU::ORi128_r64:
- case SPU::ORi128_f64:
- case SPU::ORi128_r32:
- case SPU::ORi128_f32:
- case SPU::ORi128_r16:
- case SPU::ORi128_r8:
-*/
- case SPU::ORi128_vec:
-/*
- case SPU::ORr64_i128:
- case SPU::ORf64_i128:
- case SPU::ORr32_i128:
- case SPU::ORf32_i128:
- case SPU::ORr16_i128:
- case SPU::ORr8_i128:
-*/
- case SPU::ORvec_i128:
-/*
- case SPU::ORr16_r32:
- case SPU::ORr8_r32:
- case SPU::ORf32_r32:
- case SPU::ORr32_f32:
- case SPU::ORr32_r16:
- case SPU::ORr32_r8:
- case SPU::ORr16_r64:
- case SPU::ORr8_r64:
- case SPU::ORr64_r16:
- case SPU::ORr64_r8:
-*/
- case SPU::ORr64_r32:
- case SPU::ORr32_r64:
- case SPU::ORf32_r32:
- case SPU::ORr32_f32:
- case SPU::ORf64_r64:
- case SPU::ORr64_f64: {
- assert(MI.getNumOperands() == 2 &&
- MI.getOperand(0).isReg() &&
- MI.getOperand(1).isReg() &&
- "invalid SPU OR<type>_<vec> or LR instruction!");
- sourceReg = MI.getOperand(1).getReg();
- destReg = MI.getOperand(0).getReg();
- return true;
- break;
- }
- case SPU::ORv16i8:
- case SPU::ORv8i16:
- case SPU::ORv4i32:
- case SPU::ORv2i64:
- case SPU::ORr8:
- case SPU::ORr16:
- case SPU::ORr32:
- case SPU::ORr64:
- case SPU::ORr128:
- case SPU::ORf32:
- case SPU::ORf64:
- assert(MI.getNumOperands() == 3 &&
- MI.getOperand(0).isReg() &&
- MI.getOperand(1).isReg() &&
- MI.getOperand(2).isReg() &&
- "invalid SPU OR(vec|r32|r64|gprc) instruction!");
- if (MI.getOperand(1).getReg() == MI.getOperand(2).getReg()) {
- sourceReg = MI.getOperand(1).getReg();
- destReg = MI.getOperand(0).getReg();
- return true;
- }
- break;
- }
-
- return false;
-}
-
unsigned
SPUInstrInfo::isLoadFromStackSlot(const MachineInstr *MI,
int &FrameIndex) const {
diff --git a/lib/Target/CellSPU/SPUInstrInfo.h b/lib/Target/CellSPU/SPUInstrInfo.h
index fbb173318148..191e55d0ca61 100644
--- a/lib/Target/CellSPU/SPUInstrInfo.h
+++ b/lib/Target/CellSPU/SPUInstrInfo.h
@@ -32,12 +32,6 @@ namespace llvm {
///
virtual const SPURegisterInfo &getRegisterInfo() const { return RI; }
- /// Return true if the instruction is a register to register move and return
- /// the source and dest operands and their sub-register indices by reference.
- virtual bool isMoveInstr(const MachineInstr &MI,
- unsigned &SrcReg, unsigned &DstReg,
- unsigned &SrcSubIdx, unsigned &DstSubIdx) const;
-
unsigned isLoadFromStackSlot(const MachineInstr *MI,
int &FrameIndex) const;
unsigned isStoreToStackSlot(const MachineInstr *MI,
diff --git a/lib/Target/CellSPU/SPUInstrInfo.td b/lib/Target/CellSPU/SPUInstrInfo.td
index a7fb14c26a76..ca0fe00e37f8 100644
--- a/lib/Target/CellSPU/SPUInstrInfo.td
+++ b/lib/Target/CellSPU/SPUInstrInfo.td
@@ -62,8 +62,6 @@ let canFoldAsLoad = 1 in {
def v4f32: LoadDFormVec<v4f32>;
def v2f64: LoadDFormVec<v2f64>;
- def v2i32: LoadDFormVec<v2i32>;
-
def r128: LoadDForm<GPRC>;
def r64: LoadDForm<R64C>;
def r32: LoadDForm<R32C>;
@@ -96,8 +94,6 @@ let canFoldAsLoad = 1 in {
def v4f32: LoadAFormVec<v4f32>;
def v2f64: LoadAFormVec<v2f64>;
- def v2i32: LoadAFormVec<v2i32>;
-
def r128: LoadAForm<GPRC>;
def r64: LoadAForm<R64C>;
def r32: LoadAForm<R32C>;
@@ -130,8 +126,6 @@ let canFoldAsLoad = 1 in {
def v4f32: LoadXFormVec<v4f32>;
def v2f64: LoadXFormVec<v2f64>;
- def v2i32: LoadXFormVec<v2i32>;
-
def r128: LoadXForm<GPRC>;
def r64: LoadXForm<R64C>;
def r32: LoadXForm<R32C>;
@@ -180,8 +174,6 @@ multiclass StoreDForms
def v4f32: StoreDFormVec<v4f32>;
def v2f64: StoreDFormVec<v2f64>;
- def v2i32: StoreDFormVec<v2i32>;
-
def r128: StoreDForm<GPRC>;
def r64: StoreDForm<R64C>;
def r32: StoreDForm<R32C>;
@@ -212,8 +204,6 @@ multiclass StoreAForms
def v4f32: StoreAFormVec<v4f32>;
def v2f64: StoreAFormVec<v2f64>;
- def v2i32: StoreAFormVec<v2i32>;
-
def r128: StoreAForm<GPRC>;
def r64: StoreAForm<R64C>;
def r32: StoreAForm<R32C>;
@@ -246,8 +236,6 @@ multiclass StoreXForms
def v4f32: StoreXFormVec<v4f32>;
def v2f64: StoreXFormVec<v2f64>;
- def v2i32: StoreXFormVec<v2i32>;
-
def r128: StoreXForm<GPRC>;
def r64: StoreXForm<R64C>;
def r32: StoreXForm<R32C>;
@@ -607,7 +595,6 @@ class ARegInst<RegisterClass rclass>:
multiclass AddInstruction {
def v4i32: AVecInst<v4i32>;
def v16i8: AVecInst<v16i8>;
-
def r32: ARegInst<R32C>;
}
@@ -672,6 +659,7 @@ def SFvec : RRForm<0b00000010000, (outs VECREG:$rT),
"sf\t$rT, $rA, $rB", IntegerOp,
[(set (v4i32 VECREG:$rT), (sub (v4i32 VECREG:$rB), (v4i32 VECREG:$rA)))]>;
+
def SFr32 : RRForm<0b00000010000, (outs R32C:$rT), (ins R32C:$rA, R32C:$rB),
"sf\t$rT, $rA, $rB", IntegerOp,
[(set R32C:$rT, (sub R32C:$rB, R32C:$rA))]>;
@@ -1448,6 +1436,9 @@ class ORCvtGPRCVec:
class ORCvtVecGPRC:
ORCvtForm<(outs GPRC:$rT), (ins VECREG:$rA)>;
+class ORCvtVecVec:
+ ORCvtForm<(outs VECREG:$rT), (ins VECREG:$rA)>;
+
multiclass BitwiseOr
{
def v16i8: ORVecInst<v16i8>;
@@ -3894,6 +3885,79 @@ multiclass SFPSub
defm FS : SFPSub;
+class FMInst<dag OOL, dag IOL, list<dag> pattern>:
+ RRForm<0b01100011010, OOL, IOL,
+ "fm\t$rT, $rA, $rB", SPrecFP,
+ pattern>;
+
+class FMVecInst<ValueType type>:
+ FMInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ [(set (type VECREG:$rT),
+ (fmul (type VECREG:$rA), (type VECREG:$rB)))]>;
+
+multiclass SFPMul
+{
+ def v4f32: FMVecInst<v4f32>;
+ def f32: FMInst<(outs R32FP:$rT), (ins R32FP:$rA, R32FP:$rB),
+ [(set R32FP:$rT, (fmul R32FP:$rA, R32FP:$rB))]>;
+}
+
+defm FM : SFPMul;
+
+// Floating point multiply and add
+// e.g. d = c + (a * b)
+def FMAv4f32:
+ RRRForm<0b0111, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB, VECREG:$rC),
+ "fma\t$rT, $rA, $rB, $rC", SPrecFP,
+ [(set (v4f32 VECREG:$rT),
+ (fadd (v4f32 VECREG:$rC),
+ (fmul (v4f32 VECREG:$rA), (v4f32 VECREG:$rB))))]>;
+
+def FMAf32:
+ RRRForm<0b0111, (outs R32FP:$rT), (ins R32FP:$rA, R32FP:$rB, R32FP:$rC),
+ "fma\t$rT, $rA, $rB, $rC", SPrecFP,
+ [(set R32FP:$rT, (fadd R32FP:$rC, (fmul R32FP:$rA, R32FP:$rB)))]>;
+
+// FP multiply and subtract
+// Subtracts value in rC from product
+// res = a * b - c
+def FMSv4f32 :
+ RRRForm<0b0111, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB, VECREG:$rC),
+ "fms\t$rT, $rA, $rB, $rC", SPrecFP,
+ [(set (v4f32 VECREG:$rT),
+ (fsub (fmul (v4f32 VECREG:$rA), (v4f32 VECREG:$rB)),
+ (v4f32 VECREG:$rC)))]>;
+
+def FMSf32 :
+ RRRForm<0b0111, (outs R32FP:$rT), (ins R32FP:$rA, R32FP:$rB, R32FP:$rC),
+ "fms\t$rT, $rA, $rB, $rC", SPrecFP,
+ [(set R32FP:$rT,
+ (fsub (fmul R32FP:$rA, R32FP:$rB), R32FP:$rC))]>;
+
+// Floating Negative Mulitply and Subtract
+// Subtracts product from value in rC
+// res = fneg(fms a b c)
+// = - (a * b - c)
+// = c - a * b
+// NOTE: subtraction order
+// fsub a b = a - b
+// fs a b = b - a?
+def FNMSf32 :
+ RRRForm<0b1101, (outs R32FP:$rT), (ins R32FP:$rA, R32FP:$rB, R32FP:$rC),
+ "fnms\t$rT, $rA, $rB, $rC", SPrecFP,
+ [(set R32FP:$rT, (fsub R32FP:$rC, (fmul R32FP:$rA, R32FP:$rB)))]>;
+
+def FNMSv4f32 :
+ RRRForm<0b1101, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB, VECREG:$rC),
+ "fnms\t$rT, $rA, $rB, $rC", SPrecFP,
+ [(set (v4f32 VECREG:$rT),
+ (fsub (v4f32 VECREG:$rC),
+ (fmul (v4f32 VECREG:$rA),
+ (v4f32 VECREG:$rB))))]>;
+
+
+
+
// Floating point reciprocal estimate
class FRESTInst<dag OOL, dag IOL>:
@@ -4019,72 +4083,6 @@ def FSCRRf32 :
// status and control register read
//--------------------------------------
-// Floating point multiply instructions
-//--------------------------------------
-
-def FMv4f32:
- RRForm<0b00100011010, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
- "fm\t$rT, $rA, $rB", SPrecFP,
- [(set (v4f32 VECREG:$rT), (fmul (v4f32 VECREG:$rA),
- (v4f32 VECREG:$rB)))]>;
-
-def FMf32 :
- RRForm<0b01100011010, (outs R32FP:$rT), (ins R32FP:$rA, R32FP:$rB),
- "fm\t$rT, $rA, $rB", SPrecFP,
- [(set R32FP:$rT, (fmul R32FP:$rA, R32FP:$rB))]>;
-
-// Floating point multiply and add
-// e.g. d = c + (a * b)
-def FMAv4f32:
- RRRForm<0b0111, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB, VECREG:$rC),
- "fma\t$rT, $rA, $rB, $rC", SPrecFP,
- [(set (v4f32 VECREG:$rT),
- (fadd (v4f32 VECREG:$rC),
- (fmul (v4f32 VECREG:$rA), (v4f32 VECREG:$rB))))]>;
-
-def FMAf32:
- RRRForm<0b0111, (outs R32FP:$rT), (ins R32FP:$rA, R32FP:$rB, R32FP:$rC),
- "fma\t$rT, $rA, $rB, $rC", SPrecFP,
- [(set R32FP:$rT, (fadd R32FP:$rC, (fmul R32FP:$rA, R32FP:$rB)))]>;
-
-// FP multiply and subtract
-// Subtracts value in rC from product
-// res = a * b - c
-def FMSv4f32 :
- RRRForm<0b0111, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB, VECREG:$rC),
- "fms\t$rT, $rA, $rB, $rC", SPrecFP,
- [(set (v4f32 VECREG:$rT),
- (fsub (fmul (v4f32 VECREG:$rA), (v4f32 VECREG:$rB)),
- (v4f32 VECREG:$rC)))]>;
-
-def FMSf32 :
- RRRForm<0b0111, (outs R32FP:$rT), (ins R32FP:$rA, R32FP:$rB, R32FP:$rC),
- "fms\t$rT, $rA, $rB, $rC", SPrecFP,
- [(set R32FP:$rT,
- (fsub (fmul R32FP:$rA, R32FP:$rB), R32FP:$rC))]>;
-
-// Floating Negative Mulitply and Subtract
-// Subtracts product from value in rC
-// res = fneg(fms a b c)
-// = - (a * b - c)
-// = c - a * b
-// NOTE: subtraction order
-// fsub a b = a - b
-// fs a b = b - a?
-def FNMSf32 :
- RRRForm<0b1101, (outs R32FP:$rT), (ins R32FP:$rA, R32FP:$rB, R32FP:$rC),
- "fnms\t$rT, $rA, $rB, $rC", SPrecFP,
- [(set R32FP:$rT, (fsub R32FP:$rC, (fmul R32FP:$rA, R32FP:$rB)))]>;
-
-def FNMSv4f32 :
- RRRForm<0b1101, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB, VECREG:$rC),
- "fnms\t$rT, $rA, $rB, $rC", SPrecFP,
- [(set (v4f32 VECREG:$rT),
- (fsub (v4f32 VECREG:$rC),
- (fmul (v4f32 VECREG:$rA),
- (v4f32 VECREG:$rB))))]>;
-
-//--------------------------------------
// Floating Point Conversions
// Signed conversions:
def CSiFv4f32:
diff --git a/lib/Target/CellSPU/SPUOperands.td b/lib/Target/CellSPU/SPUOperands.td
index 6216651e48a4..e1a0358abc46 100644
--- a/lib/Target/CellSPU/SPUOperands.td
+++ b/lib/Target/CellSPU/SPUOperands.td
@@ -98,12 +98,6 @@ def immU8 : PatLeaf<(imm), [{
return (N->getZExtValue() <= 0xff);
}]>;
-// i64ImmSExt10 predicate - True if the i64 immediate fits in a 10-bit sign
-// extended field. Used by RI10Form instructions like 'ldq'.
-def i64ImmSExt10 : PatLeaf<(imm), [{
- return isI64IntS10Immediate(N);
-}]>;
-
// i32ImmSExt10 predicate - True if the i32 immediate fits in a 10-bit sign
// extended field. Used by RI10Form instructions like 'ldq'.
def i32ImmSExt10 : PatLeaf<(imm), [{
diff --git a/lib/Target/CellSPU/SPURegisterInfo.cpp b/lib/Target/CellSPU/SPURegisterInfo.cpp
index f7cfa42f2a95..cf718917a561 100644
--- a/lib/Target/CellSPU/SPURegisterInfo.cpp
+++ b/lib/Target/CellSPU/SPURegisterInfo.cpp
@@ -270,9 +270,8 @@ SPURegisterInfo::eliminateCallFramePseudoInstr(MachineFunction &MF,
MBB.erase(I);
}
-unsigned
+void
SPURegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj,
- FrameIndexValue *Value,
RegScavenger *RS) const
{
unsigned i = 0;
@@ -328,7 +327,6 @@ SPURegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj,
} else {
MO.ChangeToImmediate(Offset);
}
- return 0;
}
/// determineFrameLayout - Determine the size of the frame and maximum call
@@ -417,7 +415,7 @@ void SPURegisterInfo::emitPrologue(MachineFunction &MF) const
if (hasDebugInfo) {
// Mark effective beginning of when frame pointer becomes valid.
FrameLabel = MMI.getContext().CreateTempSymbol();
- BuildMI(MBB, MBBI, dl, TII.get(SPU::DBG_LABEL)).addSym(FrameLabel);
+ BuildMI(MBB, MBBI, dl, TII.get(SPU::PROLOG_LABEL)).addSym(FrameLabel);
}
// Adjust stack pointer, spilling $lr -> 16($sp) and $sp -> -FrameSize($sp)
@@ -476,7 +474,7 @@ void SPURegisterInfo::emitPrologue(MachineFunction &MF) const
// Mark effective beginning of when frame pointer is ready.
MCSymbol *ReadyLabel = MMI.getContext().CreateTempSymbol();
- BuildMI(MBB, MBBI, dl, TII.get(SPU::DBG_LABEL)).addSym(ReadyLabel);
+ BuildMI(MBB, MBBI, dl, TII.get(SPU::PROLOG_LABEL)).addSym(ReadyLabel);
MachineLocation FPDst(SPU::R1);
MachineLocation FPSrc(MachineLocation::VirtualFP);
@@ -491,7 +489,7 @@ void SPURegisterInfo::emitPrologue(MachineFunction &MF) const
dl = MBBI->getDebugLoc();
// Insert terminator label
- BuildMI(MBB, MBBI, dl, TII.get(SPU::DBG_LABEL))
+ BuildMI(MBB, MBBI, dl, TII.get(SPU::PROLOG_LABEL))
.addSym(MMI.getContext().CreateTempSymbol());
}
}
@@ -587,6 +585,7 @@ SPURegisterInfo::convertDFormToXForm(int dFormOpcode) const
case SPU::LQDr32: return SPU::LQXr32;
case SPU::LQDr128: return SPU::LQXr128;
case SPU::LQDv16i8: return SPU::LQXv16i8;
+ case SPU::LQDv4i32: return SPU::LQXv4i32;
case SPU::LQDv4f32: return SPU::LQXv4f32;
case SPU::STQDr32: return SPU::STQXr32;
case SPU::STQDr128: return SPU::STQXr128;
diff --git a/lib/Target/CellSPU/SPURegisterInfo.h b/lib/Target/CellSPU/SPURegisterInfo.h
index 7a6ae6d43c7e..aedb769cb4fc 100644
--- a/lib/Target/CellSPU/SPURegisterInfo.h
+++ b/lib/Target/CellSPU/SPURegisterInfo.h
@@ -63,9 +63,8 @@ namespace llvm {
MachineBasicBlock &MBB,
MachineBasicBlock::iterator I) const;
//! Convert frame indicies into machine operands
- unsigned eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj,
- FrameIndexValue *Value = NULL,
- RegScavenger *RS = NULL) const;
+ void eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj,
+ RegScavenger *RS = NULL) const;
//! Determine the frame's layour
void determineFrameLayout(MachineFunction &MF) const;
diff --git a/lib/Target/CellSPU/SPURegisterInfo.td b/lib/Target/CellSPU/SPURegisterInfo.td
index bb88f2bf9a29..3e8f0979256a 100644
--- a/lib/Target/CellSPU/SPURegisterInfo.td
+++ b/lib/Target/CellSPU/SPURegisterInfo.td
@@ -394,7 +394,7 @@ def R8C : RegisterClass<"SPU", [i8], 128,
// The SPU's registers as vector registers:
def VECREG : RegisterClass<"SPU",
- [v16i8,v8i16,v2i32,v4i32,v4f32,v2i64,v2f64],
+ [v16i8,v8i16,v4i32,v4f32,v2i64,v2f64],
128,
[
/* volatile register */
diff --git a/lib/Target/CppBackend/CPPBackend.cpp b/lib/Target/CppBackend/CPPBackend.cpp
index 145568adcd4a..f08559f6e9f2 100644
--- a/lib/Target/CppBackend/CPPBackend.cpp
+++ b/lib/Target/CppBackend/CPPBackend.cpp
@@ -104,7 +104,7 @@ namespace {
public:
static char ID;
explicit CppWriter(formatted_raw_ostream &o) :
- ModulePass(&ID), Out(o), uniqueNum(0), is_inline(false), indent_level(0){}
+ ModulePass(ID), Out(o), uniqueNum(0), is_inline(false), indent_level(0){}
virtual const char *getPassName() const { return "C++ backend"; }
@@ -288,6 +288,8 @@ void CppWriter::printLinkageType(GlobalValue::LinkageTypes LT) {
Out << "GlobalValue::LinkerPrivateLinkage"; break;
case GlobalValue::LinkerPrivateWeakLinkage:
Out << "GlobalValue::LinkerPrivateWeakLinkage"; break;
+ case GlobalValue::LinkerPrivateWeakDefAutoLinkage:
+ Out << "GlobalValue::LinkerPrivateWeakDefAutoLinkage"; break;
case GlobalValue::AvailableExternallyLinkage:
Out << "GlobalValue::AvailableExternallyLinkage "; break;
case GlobalValue::LinkOnceAnyLinkage:
@@ -471,14 +473,22 @@ void CppWriter::printAttributes(const AttrListPtr &PAL,
HANDLE_ATTR(Nest);
HANDLE_ATTR(ReadNone);
HANDLE_ATTR(ReadOnly);
- HANDLE_ATTR(InlineHint);
HANDLE_ATTR(NoInline);
HANDLE_ATTR(AlwaysInline);
HANDLE_ATTR(OptimizeForSize);
HANDLE_ATTR(StackProtect);
HANDLE_ATTR(StackProtectReq);
HANDLE_ATTR(NoCapture);
+ HANDLE_ATTR(NoRedZone);
+ HANDLE_ATTR(NoImplicitFloat);
+ HANDLE_ATTR(Naked);
+ HANDLE_ATTR(InlineHint);
#undef HANDLE_ATTR
+ if (attrs & Attribute::StackAlignment)
+ Out << " | Attribute::constructStackAlignmentFromInt("
+ << Attribute::getStackAlignmentFromAttrs(attrs)
+ << ")";
+ attrs &= ~Attribute::StackAlignment;
assert(attrs == 0 && "Unhandled attribute!");
Out << ";";
nl(Out);
@@ -1404,7 +1414,8 @@ void CppWriter::printInstruction(const Instruction *I,
nl(Out);
}
Out << "CallInst* " << iName << " = CallInst::Create("
- << opNames[call->getNumArgOperands()] << ", " << iName << "_params.begin(), "
+ << opNames[call->getNumArgOperands()] << ", "
+ << iName << "_params.begin(), "
<< iName << "_params.end(), \"";
} else if (call->getNumArgOperands() == 1) {
Out << "CallInst* " << iName << " = CallInst::Create("
diff --git a/lib/Target/MBlaze/AsmPrinter/MBlazeAsmPrinter.cpp b/lib/Target/MBlaze/AsmPrinter/MBlazeAsmPrinter.cpp
index b6e4d654f4aa..f4b30ad271f1 100644
--- a/lib/Target/MBlaze/AsmPrinter/MBlazeAsmPrinter.cpp
+++ b/lib/Target/MBlaze/AsmPrinter/MBlazeAsmPrinter.cpp
@@ -65,11 +65,8 @@ namespace {
void printFSLImm(const MachineInstr *MI, int opNum, raw_ostream &O);
void printMemOperand(const MachineInstr *MI, int opNum, raw_ostream &O,
const char *Modifier = 0);
- void printFCCOperand(const MachineInstr *MI, int opNum, raw_ostream &O,
- const char *Modifier = 0);
void printSavedRegsBitmask(raw_ostream &OS);
- const char *emitCurrentABIString();
void emitFrameDirective();
void printInstruction(const MachineInstr *MI, raw_ostream &O);
@@ -292,13 +289,6 @@ printMemOperand(const MachineInstr *MI, int opNum, raw_ostream &O,
printOperand(MI, opNum, O);
}
-void MBlazeAsmPrinter::
-printFCCOperand(const MachineInstr *MI, int opNum, raw_ostream &O,
- const char *Modifier) {
- const MachineOperand& MO = MI->getOperand(opNum);
- O << MBlaze::MBlazeFCCToString((MBlaze::CondCode)MO.getImm());
-}
-
// Force static initialization.
extern "C" void LLVMInitializeMBlazeAsmPrinter() {
RegisterAsmPrinter<MBlazeAsmPrinter> X(TheMBlazeTarget);
diff --git a/lib/Target/MBlaze/MBlaze.td b/lib/Target/MBlaze/MBlaze.td
index 482ddd3963fb..3815b6d0a398 100644
--- a/lib/Target/MBlaze/MBlaze.td
+++ b/lib/Target/MBlaze/MBlaze.td
@@ -1,4 +1,4 @@
-//===- MBlaze.td - Describe the MBlaze Target Machine -----------*- C++ -*-===//
+//===- MBlaze.td - Describe the MBlaze Target Machine ------*- tablegen -*-===//
//
// The LLVM Compiler Infrastructure
//
diff --git a/lib/Target/MBlaze/MBlazeCallingConv.td b/lib/Target/MBlaze/MBlazeCallingConv.td
index ddd49980e0a2..8622e0d74bcd 100644
--- a/lib/Target/MBlaze/MBlazeCallingConv.td
+++ b/lib/Target/MBlaze/MBlazeCallingConv.td
@@ -1,4 +1,4 @@
-//===- MBlazeCallingConv.td - Calling Conventions for MBlaze ----*- C++ -*-===//
+//===- MBlazeCallingConv.td - Calling Conventions for MBlaze -*- tablegen -*-=//
//
// The LLVM Compiler Infrastructure
//
diff --git a/lib/Target/MBlaze/MBlazeDelaySlotFiller.cpp b/lib/Target/MBlaze/MBlazeDelaySlotFiller.cpp
index 42fea2507332..b551b79b291e 100644
--- a/lib/Target/MBlaze/MBlazeDelaySlotFiller.cpp
+++ b/lib/Target/MBlaze/MBlazeDelaySlotFiller.cpp
@@ -32,7 +32,7 @@ namespace {
static char ID;
Filler(TargetMachine &tm)
- : MachineFunctionPass(&ID), TM(tm), TII(tm.getInstrInfo()) { }
+ : MachineFunctionPass(ID), TM(tm), TII(tm.getInstrInfo()) { }
virtual const char *getPassName() const {
return "MBlaze Delay Slot Filler";
diff --git a/lib/Target/MBlaze/MBlazeISelDAGToDAG.cpp b/lib/Target/MBlaze/MBlazeISelDAGToDAG.cpp
index c7cd5f4e44a9..e64dd0e3e2c3 100644
--- a/lib/Target/MBlaze/MBlazeISelDAGToDAG.cpp
+++ b/lib/Target/MBlaze/MBlazeISelDAGToDAG.cpp
@@ -219,7 +219,7 @@ SelectAddr(SDNode *Op, SDValue Addr, SDValue &Offset, SDValue &Base) {
// Operand is a result from an ADD.
if (Addr.getOpcode() == ISD::ADD) {
if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1))) {
- if (Predicate_immSExt16(CN)) {
+ if (isUInt<16>(CN->getZExtValue())) {
// If the first operand is a FI, get the TargetFI Node
if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>
diff --git a/lib/Target/MBlaze/MBlazeInstrFPU.td b/lib/Target/MBlaze/MBlazeInstrFPU.td
index a48a8c972353..657b1d4940a7 100644
--- a/lib/Target/MBlaze/MBlazeInstrFPU.td
+++ b/lib/Target/MBlaze/MBlazeInstrFPU.td
@@ -1,4 +1,4 @@
-//===- MBlazeInstrFPU.td - MBlaze FPU Instruction defs ----------*- C++ -*-===//
+//===- MBlazeInstrFPU.td - MBlaze FPU Instruction defs -----*- tablegen -*-===//
//
// The LLVM Compiler Infrastructure
//
diff --git a/lib/Target/MBlaze/MBlazeInstrFSL.td b/lib/Target/MBlaze/MBlazeInstrFSL.td
index b59999e76ae5..51584111e666 100644
--- a/lib/Target/MBlaze/MBlazeInstrFSL.td
+++ b/lib/Target/MBlaze/MBlazeInstrFSL.td
@@ -1,4 +1,4 @@
-//===- MBlazeInstrFSL.td - MBlaze FSL Instruction defs ----------*- C++ -*-===//
+//===- MBlazeInstrFSL.td - MBlaze FSL Instruction defs -----*- tablegen -*-===//
//
// The LLVM Compiler Infrastructure
//
diff --git a/lib/Target/MBlaze/MBlazeInstrFormats.td b/lib/Target/MBlaze/MBlazeInstrFormats.td
index 7d655433d4e4..28e8e4402225 100644
--- a/lib/Target/MBlaze/MBlazeInstrFormats.td
+++ b/lib/Target/MBlaze/MBlazeInstrFormats.td
@@ -1,4 +1,4 @@
-//===- MBlazeInstrFormats.td - MB Instruction defs --------------*- C++ -*-===//
+//===- MBlazeInstrFormats.td - MB Instruction defs ---------*- tablegen -*-===//
//
// The LLVM Compiler Infrastructure
//
diff --git a/lib/Target/MBlaze/MBlazeInstrInfo.cpp b/lib/Target/MBlaze/MBlazeInstrInfo.cpp
index 6ff5825a26b6..b590c090e095 100644
--- a/lib/Target/MBlaze/MBlazeInstrInfo.cpp
+++ b/lib/Target/MBlaze/MBlazeInstrInfo.cpp
@@ -30,41 +30,6 @@ static bool isZeroImm(const MachineOperand &op) {
return op.isImm() && op.getImm() == 0;
}
-/// Return true if the instruction is a register to register move and
-/// leave the source and dest operands in the passed parameters.
-bool MBlazeInstrInfo::
-isMoveInstr(const MachineInstr &MI, unsigned &SrcReg, unsigned &DstReg,
- unsigned &SrcSubIdx, unsigned &DstSubIdx) const {
- SrcSubIdx = DstSubIdx = 0; // No sub-registers.
-
- // add $dst, $src, $zero || addu $dst, $zero, $src
- // or $dst, $src, $zero || or $dst, $zero, $src
- if ((MI.getOpcode() == MBlaze::ADD) || (MI.getOpcode() == MBlaze::OR)) {
- if (MI.getOperand(1).isReg() && MI.getOperand(1).getReg() == MBlaze::R0) {
- DstReg = MI.getOperand(0).getReg();
- SrcReg = MI.getOperand(2).getReg();
- return true;
- } else if (MI.getOperand(2).isReg() &&
- MI.getOperand(2).getReg() == MBlaze::R0) {
- DstReg = MI.getOperand(0).getReg();
- SrcReg = MI.getOperand(1).getReg();
- return true;
- }
- }
-
- // addi $dst, $src, 0
- // ori $dst, $src, 0
- if ((MI.getOpcode() == MBlaze::ADDI) || (MI.getOpcode() == MBlaze::ORI)) {
- if ((MI.getOperand(1).isReg()) && (isZeroImm(MI.getOperand(2)))) {
- DstReg = MI.getOperand(0).getReg();
- SrcReg = MI.getOperand(1).getReg();
- return true;
- }
- }
-
- return false;
-}
-
/// isLoadFromStackSlot - If the specified machine instruction is a direct
/// load from a stack slot, return the virtual or physical register number of
/// the destination along with the FrameIndex of the loaded stack slot. If
diff --git a/lib/Target/MBlaze/MBlazeInstrInfo.h b/lib/Target/MBlaze/MBlazeInstrInfo.h
index f0743705f010..b3dba0ec768c 100644
--- a/lib/Target/MBlaze/MBlazeInstrInfo.h
+++ b/lib/Target/MBlaze/MBlazeInstrInfo.h
@@ -173,12 +173,6 @@ public:
///
virtual const MBlazeRegisterInfo &getRegisterInfo() const { return RI; }
- /// Return true if the instruction is a register to register move and return
- /// the source and dest operands and their sub-register indices by reference.
- virtual bool isMoveInstr(const MachineInstr &MI,
- unsigned &SrcReg, unsigned &DstReg,
- unsigned &SrcSubIdx, unsigned &DstSubIdx) const;
-
/// isLoadFromStackSlot - If the specified machine instruction is a direct
/// load from a stack slot, return the virtual or physical register number of
/// the destination along with the FrameIndex of the loaded stack slot. If
diff --git a/lib/Target/MBlaze/MBlazeInstrInfo.td b/lib/Target/MBlaze/MBlazeInstrInfo.td
index 3c406dda0591..e5d153474a7e 100644
--- a/lib/Target/MBlaze/MBlazeInstrInfo.td
+++ b/lib/Target/MBlaze/MBlazeInstrInfo.td
@@ -1,4 +1,4 @@
-//===- MBlazeInstrInfo.td - MBlaze Instruction defs -------------*- C++ -*-===//
+//===- MBlazeInstrInfo.td - MBlaze Instruction defs --------*- tablegen -*-===//
//
// The LLVM Compiler Infrastructure
//
diff --git a/lib/Target/MBlaze/MBlazeIntrinsics.td b/lib/Target/MBlaze/MBlazeIntrinsics.td
index 82552fa4b343..a27cb5ba0dc4 100644
--- a/lib/Target/MBlaze/MBlazeIntrinsics.td
+++ b/lib/Target/MBlaze/MBlazeIntrinsics.td
@@ -17,17 +17,11 @@
// MBlaze intrinsic classes.
let TargetPrefix = "mblaze", isTarget = 1 in {
- class MBFSL_Get_Intrinsic : Intrinsic<[llvm_i32_ty],
- [llvm_i32_ty],
- [IntrWriteMem]>;
+ class MBFSL_Get_Intrinsic : Intrinsic<[llvm_i32_ty], [llvm_i32_ty], []>;
- class MBFSL_Put_Intrinsic : Intrinsic<[],
- [llvm_i32_ty, llvm_i32_ty],
- [IntrWriteMem]>;
+ class MBFSL_Put_Intrinsic : Intrinsic<[], [llvm_i32_ty, llvm_i32_ty], []>;
- class MBFSL_PutT_Intrinsic : Intrinsic<[],
- [llvm_i32_ty],
- [IntrWriteMem]>;
+ class MBFSL_PutT_Intrinsic : Intrinsic<[], [llvm_i32_ty], []>;
}
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/MBlaze/MBlazeRegisterInfo.cpp b/lib/Target/MBlaze/MBlazeRegisterInfo.cpp
index 8cafa8c519c6..22b6a30470d1 100644
--- a/lib/Target/MBlaze/MBlazeRegisterInfo.cpp
+++ b/lib/Target/MBlaze/MBlazeRegisterInfo.cpp
@@ -242,9 +242,9 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
// FrameIndex represent objects inside a abstract stack.
// We must replace FrameIndex with an stack/frame pointer
// direct reference.
-unsigned MBlazeRegisterInfo::
+void MBlazeRegisterInfo::
eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj,
- FrameIndexValue *Value, RegScavenger *RS) const {
+ RegScavenger *RS) const {
MachineInstr &MI = *II;
MachineFunction &MF = *MI.getParent()->getParent();
@@ -277,7 +277,6 @@ eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj,
MI.getOperand(oi).ChangeToImmediate(Offset);
MI.getOperand(i).ChangeToRegister(getFrameRegister(MF), false);
- return 0;
}
void MBlazeRegisterInfo::
diff --git a/lib/Target/MBlaze/MBlazeRegisterInfo.h b/lib/Target/MBlaze/MBlazeRegisterInfo.h
index af97b0e2d79e..1e1fde14ab7b 100644
--- a/lib/Target/MBlaze/MBlazeRegisterInfo.h
+++ b/lib/Target/MBlaze/MBlazeRegisterInfo.h
@@ -63,9 +63,8 @@ struct MBlazeRegisterInfo : public MBlazeGenRegisterInfo {
MachineBasicBlock::iterator I) const;
/// Stack Frame Processing Methods
- unsigned eliminateFrameIndex(MachineBasicBlock::iterator II,
- int SPAdj, FrameIndexValue *Value = NULL,
- RegScavenger *RS = NULL) const;
+ void eliminateFrameIndex(MachineBasicBlock::iterator II,
+ int SPAdj, RegScavenger *RS = NULL) const;
void processFunctionBeforeFrameFinalized(MachineFunction &MF) const;
diff --git a/lib/Target/MBlaze/MBlazeRegisterInfo.td b/lib/Target/MBlaze/MBlazeRegisterInfo.td
index d0a1e7556c43..5e935103389e 100644
--- a/lib/Target/MBlaze/MBlazeRegisterInfo.td
+++ b/lib/Target/MBlaze/MBlazeRegisterInfo.td
@@ -1,4 +1,4 @@
-//===- MBlazeRegisterInfo.td - MBlaze Register defs -------------*- C++ -*-===//
+//===- MBlazeRegisterInfo.td - MBlaze Register defs --------*- tablegen -*-===//
//
// The LLVM Compiler Infrastructure
//
diff --git a/lib/Target/MBlaze/MBlazeSchedule.td b/lib/Target/MBlaze/MBlazeSchedule.td
index 1fec9e694005..4a65542a447c 100644
--- a/lib/Target/MBlaze/MBlazeSchedule.td
+++ b/lib/Target/MBlaze/MBlazeSchedule.td
@@ -1,4 +1,4 @@
-//===- MBlazeSchedule.td - MBlaze Scheduling Definitions --------*- C++ -*-===//
+//===- MBlazeSchedule.td - MBlaze Scheduling Definitions ---*- tablegen -*-===//
//
// The LLVM Compiler Infrastructure
//
diff --git a/lib/Target/MSIL/CMakeLists.txt b/lib/Target/MSIL/CMakeLists.txt
deleted file mode 100644
index b1d47ef05ec5..000000000000
--- a/lib/Target/MSIL/CMakeLists.txt
+++ /dev/null
@@ -1,3 +0,0 @@
-add_llvm_target(MSIL
- MSILWriter.cpp
- )
diff --git a/lib/Target/MSIL/MSILWriter.cpp b/lib/Target/MSIL/MSILWriter.cpp
deleted file mode 100644
index cc350e8a4f89..000000000000
--- a/lib/Target/MSIL/MSILWriter.cpp
+++ /dev/null
@@ -1,1706 +0,0 @@
-//===-- MSILWriter.cpp - Library for converting LLVM code to MSIL ---------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This library converts LLVM code to MSIL code.
-//
-//===----------------------------------------------------------------------===//
-
-#include "MSILWriter.h"
-#include "llvm/CallingConv.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/Intrinsics.h"
-#include "llvm/IntrinsicInst.h"
-#include "llvm/TypeSymbolTable.h"
-#include "llvm/Analysis/ConstantsScanner.h"
-#include "llvm/Support/CallSite.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/InstVisitor.h"
-#include "llvm/Support/MathExtras.h"
-#include "llvm/Target/TargetRegistry.h"
-#include "llvm/Transforms/Scalar.h"
-#include "llvm/ADT/StringExtras.h"
-#include "llvm/CodeGen/Passes.h"
-using namespace llvm;
-
-namespace llvm {
- // TargetMachine for the MSIL
- struct MSILTarget : public TargetMachine {
- MSILTarget(const Target &T, const std::string &TT, const std::string &FS)
- : TargetMachine(T) {}
-
- virtual bool addPassesToEmitFile(PassManagerBase &PM,
- formatted_raw_ostream &Out,
- CodeGenFileType FileType,
- CodeGenOpt::Level OptLevel,
- bool DisableVerify);
-
- virtual const TargetData *getTargetData() const { return 0; }
- };
-}
-
-extern "C" void LLVMInitializeMSILTarget() {
- // Register the target.
- RegisterTargetMachine<MSILTarget> X(TheMSILTarget);
-}
-
-bool MSILModule::runOnModule(Module &M) {
- ModulePtr = &M;
- TD = &getAnalysis<TargetData>();
- bool Changed = false;
- // Find named types.
- TypeSymbolTable& Table = M.getTypeSymbolTable();
- std::set<const Type *> Types = getAnalysis<FindUsedTypes>().getTypes();
- for (TypeSymbolTable::iterator I = Table.begin(), E = Table.end(); I!=E; ) {
- if (!I->second->isStructTy() && !I->second->isOpaqueTy())
- Table.remove(I++);
- else {
- std::set<const Type *>::iterator T = Types.find(I->second);
- if (T==Types.end())
- Table.remove(I++);
- else {
- Types.erase(T);
- ++I;
- }
- }
- }
- // Find unnamed types.
- unsigned RenameCounter = 0;
- for (std::set<const Type *>::const_iterator I = Types.begin(),
- E = Types.end(); I!=E; ++I)
- if (const StructType *STy = dyn_cast<StructType>(*I)) {
- while (ModulePtr->addTypeName("unnamed$"+utostr(RenameCounter), STy))
- ++RenameCounter;
- Changed = true;
- }
- // Pointer for FunctionPass.
- UsedTypes = &getAnalysis<FindUsedTypes>().getTypes();
- return Changed;
-}
-
-char MSILModule::ID = 0;
-char MSILWriter::ID = 0;
-
-bool MSILWriter::runOnFunction(Function &F) {
- if (F.isDeclaration()) return false;
-
- // Do not codegen any 'available_externally' functions at all, they have
- // definitions outside the translation unit.
- if (F.hasAvailableExternallyLinkage())
- return false;
-
- LInfo = &getAnalysis<LoopInfo>();
- printFunction(F);
- return false;
-}
-
-
-bool MSILWriter::doInitialization(Module &M) {
- ModulePtr = &M;
- Out << ".assembly extern mscorlib {}\n";
- Out << ".assembly MSIL {}\n\n";
- Out << "// External\n";
- printExternals();
- Out << "// Declarations\n";
- printDeclarations(M.getTypeSymbolTable());
- Out << "// Definitions\n";
- printGlobalVariables();
- Out << "// Startup code\n";
- printModuleStartup();
- return false;
-}
-
-
-bool MSILWriter::doFinalization(Module &M) {
- return false;
-}
-
-
-void MSILWriter::printModuleStartup() {
- Out <<
- ".method static public int32 $MSIL_Startup() {\n"
- "\t.entrypoint\n"
- "\t.locals (native int i)\n"
- "\t.locals (native int argc)\n"
- "\t.locals (native int ptr)\n"
- "\t.locals (void* argv)\n"
- "\t.locals (string[] args)\n"
- "\tcall\tstring[] [mscorlib]System.Environment::GetCommandLineArgs()\n"
- "\tdup\n"
- "\tstloc\targs\n"
- "\tldlen\n"
- "\tconv.i4\n"
- "\tdup\n"
- "\tstloc\targc\n";
- printPtrLoad(TD->getPointerSize());
- Out <<
- "\tmul\n"
- "\tlocalloc\n"
- "\tstloc\targv\n"
- "\tldc.i4.0\n"
- "\tstloc\ti\n"
- "L_01:\n"
- "\tldloc\ti\n"
- "\tldloc\targc\n"
- "\tceq\n"
- "\tbrtrue\tL_02\n"
- "\tldloc\targs\n"
- "\tldloc\ti\n"
- "\tldelem.ref\n"
- "\tcall\tnative int [mscorlib]System.Runtime.InteropServices.Marshal::"
- "StringToHGlobalAnsi(string)\n"
- "\tstloc\tptr\n"
- "\tldloc\targv\n"
- "\tldloc\ti\n";
- printPtrLoad(TD->getPointerSize());
- Out <<
- "\tmul\n"
- "\tadd\n"
- "\tldloc\tptr\n"
- "\tstind.i\n"
- "\tldloc\ti\n"
- "\tldc.i4.1\n"
- "\tadd\n"
- "\tstloc\ti\n"
- "\tbr\tL_01\n"
- "L_02:\n"
- "\tcall void $MSIL_Init()\n";
-
- // Call user 'main' function.
- const Function* F = ModulePtr->getFunction("main");
- if (!F || F->isDeclaration()) {
- Out << "\tldc.i4.0\n\tret\n}\n";
- return;
- }
- bool BadSig = true;
- std::string Args("");
- Function::const_arg_iterator Arg1,Arg2;
-
- switch (F->arg_size()) {
- case 0:
- BadSig = false;
- break;
- case 1:
- Arg1 = F->arg_begin();
- if (Arg1->getType()->isIntegerTy()) {
- Out << "\tldloc\targc\n";
- Args = getTypeName(Arg1->getType());
- BadSig = false;
- }
- break;
- case 2:
- Arg1 = Arg2 = F->arg_begin(); ++Arg2;
- if (Arg1->getType()->isIntegerTy() &&
- Arg2->getType()->getTypeID() == Type::PointerTyID) {
- Out << "\tldloc\targc\n\tldloc\targv\n";
- Args = getTypeName(Arg1->getType())+","+getTypeName(Arg2->getType());
- BadSig = false;
- }
- break;
- default:
- BadSig = true;
- }
-
- bool RetVoid = (F->getReturnType()->getTypeID() == Type::VoidTyID);
- if (BadSig || (!F->getReturnType()->isIntegerTy() && !RetVoid)) {
- Out << "\tldc.i4.0\n";
- } else {
- Out << "\tcall\t" << getTypeName(F->getReturnType()) <<
- getConvModopt(F->getCallingConv()) << "main(" << Args << ")\n";
- if (RetVoid)
- Out << "\tldc.i4.0\n";
- else
- Out << "\tconv.i4\n";
- }
- Out << "\tret\n}\n";
-}
-
-bool MSILWriter::isZeroValue(const Value* V) {
- if (const Constant *C = dyn_cast<Constant>(V))
- return C->isNullValue();
- return false;
-}
-
-
-std::string MSILWriter::getValueName(const Value* V) {
- std::string Name;
- if (const GlobalValue *GV = dyn_cast<GlobalValue>(V))
- Name = GV->getName();
- else {
- unsigned &No = AnonValueNumbers[V];
- if (No == 0) No = ++NextAnonValueNumber;
- Name = "tmp" + utostr(No);
- }
-
- // Name into the quotes allow control and space characters.
- return "'"+Name+"'";
-}
-
-
-std::string MSILWriter::getLabelName(const std::string& Name) {
- if (Name.find('.')!=std::string::npos) {
- std::string Tmp(Name);
- // Replace unaccepable characters in the label name.
- for (std::string::iterator I = Tmp.begin(), E = Tmp.end(); I!=E; ++I)
- if (*I=='.') *I = '@';
- return Tmp;
- }
- return Name;
-}
-
-
-std::string MSILWriter::getLabelName(const Value* V) {
- std::string Name;
- if (const GlobalValue *GV = dyn_cast<GlobalValue>(V))
- Name = GV->getName();
- else {
- unsigned &No = AnonValueNumbers[V];
- if (No == 0) No = ++NextAnonValueNumber;
- Name = "tmp" + utostr(No);
- }
-
- return getLabelName(Name);
-}
-
-
-std::string MSILWriter::getConvModopt(CallingConv::ID CallingConvID) {
- switch (CallingConvID) {
- case CallingConv::C:
- case CallingConv::Cold:
- case CallingConv::Fast:
- return "modopt([mscorlib]System.Runtime.CompilerServices.CallConvCdecl) ";
- case CallingConv::X86_FastCall:
- return "modopt([mscorlib]System.Runtime.CompilerServices.CallConvFastcall) ";
- case CallingConv::X86_StdCall:
- return "modopt([mscorlib]System.Runtime.CompilerServices.CallConvStdcall) ";
- case CallingConv::X86_ThisCall:
- return "modopt([mscorlib]System.Runtime.CompilerServices.CallConvThiscall) ";
- default:
- errs() << "CallingConvID = " << CallingConvID << '\n';
- llvm_unreachable("Unsupported calling convention");
- }
- return ""; // Not reached
-}
-
-
-std::string MSILWriter::getArrayTypeName(Type::TypeID TyID, const Type* Ty) {
- std::string Tmp = "";
- const Type* ElemTy = Ty;
- assert(Ty->getTypeID()==TyID && "Invalid type passed");
- // Walk trought array element types.
- for (;;) {
- // Multidimensional array.
- if (ElemTy->getTypeID()==TyID) {
- if (const ArrayType* ATy = dyn_cast<ArrayType>(ElemTy))
- Tmp += utostr(ATy->getNumElements());
- else if (const VectorType* VTy = dyn_cast<VectorType>(ElemTy))
- Tmp += utostr(VTy->getNumElements());
- ElemTy = cast<SequentialType>(ElemTy)->getElementType();
- }
- // Base element type found.
- if (ElemTy->getTypeID()!=TyID) break;
- Tmp += ",";
- }
- return getTypeName(ElemTy, false, true)+"["+Tmp+"]";
-}
-
-
-std::string MSILWriter::getPrimitiveTypeName(const Type* Ty, bool isSigned) {
- unsigned NumBits = 0;
- switch (Ty->getTypeID()) {
- case Type::VoidTyID:
- return "void ";
- case Type::IntegerTyID:
- NumBits = getBitWidth(Ty);
- if(NumBits==1)
- return "bool ";
- if (!isSigned)
- return "unsigned int"+utostr(NumBits)+" ";
- return "int"+utostr(NumBits)+" ";
- case Type::FloatTyID:
- return "float32 ";
- case Type::DoubleTyID:
- return "float64 ";
- default:
- errs() << "Type = " << *Ty << '\n';
- llvm_unreachable("Invalid primitive type");
- }
- return ""; // Not reached
-}
-
-
-std::string MSILWriter::getTypeName(const Type* Ty, bool isSigned,
- bool isNested) {
- if (Ty->isPrimitiveType() || Ty->isIntegerTy())
- return getPrimitiveTypeName(Ty,isSigned);
- // FIXME: "OpaqueType" support
- switch (Ty->getTypeID()) {
- case Type::PointerTyID:
- return "void* ";
- case Type::StructTyID:
- if (isNested)
- return ModulePtr->getTypeName(Ty);
- return "valuetype '"+ModulePtr->getTypeName(Ty)+"' ";
- case Type::ArrayTyID:
- if (isNested)
- return getArrayTypeName(Ty->getTypeID(),Ty);
- return "valuetype '"+getArrayTypeName(Ty->getTypeID(),Ty)+"' ";
- case Type::VectorTyID:
- if (isNested)
- return getArrayTypeName(Ty->getTypeID(),Ty);
- return "valuetype '"+getArrayTypeName(Ty->getTypeID(),Ty)+"' ";
- default:
- errs() << "Type = " << *Ty << '\n';
- llvm_unreachable("Invalid type in getTypeName()");
- }
- return ""; // Not reached
-}
-
-
-MSILWriter::ValueType MSILWriter::getValueLocation(const Value* V) {
- // Function argument
- if (isa<Argument>(V))
- return ArgumentVT;
- // Function
- else if (const Function* F = dyn_cast<Function>(V))
- return F->hasLocalLinkage() ? InternalVT : GlobalVT;
- // Variable
- else if (const GlobalVariable* G = dyn_cast<GlobalVariable>(V))
- return G->hasLocalLinkage() ? InternalVT : GlobalVT;
- // Constant
- else if (isa<Constant>(V))
- return isa<ConstantExpr>(V) ? ConstExprVT : ConstVT;
- // Local variable
- return LocalVT;
-}
-
-
-std::string MSILWriter::getTypePostfix(const Type* Ty, bool Expand,
- bool isSigned) {
- unsigned NumBits = 0;
- switch (Ty->getTypeID()) {
- // Integer constant, expanding for stack operations.
- case Type::IntegerTyID:
- NumBits = getBitWidth(Ty);
- // Expand integer value to "int32" or "int64".
- if (Expand) return (NumBits<=32 ? "i4" : "i8");
- if (NumBits==1) return "i1";
- return (isSigned ? "i" : "u")+utostr(NumBits/8);
- // Float constant.
- case Type::FloatTyID:
- return "r4";
- case Type::DoubleTyID:
- return "r8";
- case Type::PointerTyID:
- return "i"+utostr(TD->getTypeAllocSize(Ty));
- default:
- errs() << "TypeID = " << Ty->getTypeID() << '\n';
- llvm_unreachable("Invalid type in TypeToPostfix()");
- }
- return ""; // Not reached
-}
-
-
-void MSILWriter::printConvToPtr() {
- switch (ModulePtr->getPointerSize()) {
- case Module::Pointer32:
- printSimpleInstruction("conv.u4");
- break;
- case Module::Pointer64:
- printSimpleInstruction("conv.u8");
- break;
- default:
- llvm_unreachable("Module use not supporting pointer size");
- }
-}
-
-
-void MSILWriter::printPtrLoad(uint64_t N) {
- switch (ModulePtr->getPointerSize()) {
- case Module::Pointer32:
- printSimpleInstruction("ldc.i4",utostr(N).c_str());
- // FIXME: Need overflow test?
- if (!isUInt<32>(N)) {
- errs() << "Value = " << utostr(N) << '\n';
- llvm_unreachable("32-bit pointer overflowed");
- }
- break;
- case Module::Pointer64:
- printSimpleInstruction("ldc.i8",utostr(N).c_str());
- break;
- default:
- llvm_unreachable("Module use not supporting pointer size");
- }
-}
-
-
-void MSILWriter::printValuePtrLoad(const Value* V) {
- printValueLoad(V);
- printConvToPtr();
-}
-
-
-void MSILWriter::printConstLoad(const Constant* C) {
- if (const ConstantInt* CInt = dyn_cast<ConstantInt>(C)) {
- // Integer constant
- Out << "\tldc." << getTypePostfix(C->getType(),true) << '\t';
- if (CInt->isMinValue(true))
- Out << CInt->getSExtValue();
- else
- Out << CInt->getZExtValue();
- } else if (const ConstantFP* FP = dyn_cast<ConstantFP>(C)) {
- // Float constant
- uint64_t X;
- unsigned Size;
- if (FP->getType()->getTypeID()==Type::FloatTyID) {
- X = (uint32_t)FP->getValueAPF().bitcastToAPInt().getZExtValue();
- Size = 4;
- } else {
- X = FP->getValueAPF().bitcastToAPInt().getZExtValue();
- Size = 8;
- }
- Out << "\tldc.r" << Size << "\t( " << utohexstr(X) << ')';
- } else if (isa<UndefValue>(C)) {
- // Undefined constant value = NULL.
- printPtrLoad(0);
- } else {
- errs() << "Constant = " << *C << '\n';
- llvm_unreachable("Invalid constant value");
- }
- Out << '\n';
-}
-
-
-void MSILWriter::printValueLoad(const Value* V) {
- MSILWriter::ValueType Location = getValueLocation(V);
- switch (Location) {
- // Global variable or function address.
- case GlobalVT:
- case InternalVT:
- if (const Function* F = dyn_cast<Function>(V)) {
- std::string Name = getConvModopt(F->getCallingConv())+getValueName(F);
- printSimpleInstruction("ldftn",
- getCallSignature(F->getFunctionType(),NULL,Name).c_str());
- } else {
- std::string Tmp;
- const Type* ElemTy = cast<PointerType>(V->getType())->getElementType();
- if (Location==GlobalVT && cast<GlobalVariable>(V)->hasDLLImportLinkage()) {
- Tmp = "void* "+getValueName(V);
- printSimpleInstruction("ldsfld",Tmp.c_str());
- } else {
- Tmp = getTypeName(ElemTy)+getValueName(V);
- printSimpleInstruction("ldsflda",Tmp.c_str());
- }
- }
- break;
- // Function argument.
- case ArgumentVT:
- printSimpleInstruction("ldarg",getValueName(V).c_str());
- break;
- // Local function variable.
- case LocalVT:
- printSimpleInstruction("ldloc",getValueName(V).c_str());
- break;
- // Constant value.
- case ConstVT:
- if (isa<ConstantPointerNull>(V))
- printPtrLoad(0);
- else
- printConstLoad(cast<Constant>(V));
- break;
- // Constant expression.
- case ConstExprVT:
- printConstantExpr(cast<ConstantExpr>(V));
- break;
- default:
- errs() << "Value = " << *V << '\n';
- llvm_unreachable("Invalid value location");
- }
-}
-
-
-void MSILWriter::printValueSave(const Value* V) {
- switch (getValueLocation(V)) {
- case ArgumentVT:
- printSimpleInstruction("starg",getValueName(V).c_str());
- break;
- case LocalVT:
- printSimpleInstruction("stloc",getValueName(V).c_str());
- break;
- default:
- errs() << "Value = " << *V << '\n';
- llvm_unreachable("Invalid value location");
- }
-}
-
-
-void MSILWriter::printBinaryInstruction(const char* Name, const Value* Left,
- const Value* Right) {
- printValueLoad(Left);
- printValueLoad(Right);
- Out << '\t' << Name << '\n';
-}
-
-
-void MSILWriter::printSimpleInstruction(const char* Inst, const char* Operand) {
- if(Operand)
- Out << '\t' << Inst << '\t' << Operand << '\n';
- else
- Out << '\t' << Inst << '\n';
-}
-
-
-void MSILWriter::printPHICopy(const BasicBlock* Src, const BasicBlock* Dst) {
- for (BasicBlock::const_iterator I = Dst->begin(); isa<PHINode>(I); ++I) {
- const PHINode* Phi = cast<PHINode>(I);
- const Value* Val = Phi->getIncomingValueForBlock(Src);
- if (isa<UndefValue>(Val)) continue;
- printValueLoad(Val);
- printValueSave(Phi);
- }
-}
-
-
-void MSILWriter::printBranchToBlock(const BasicBlock* CurrBB,
- const BasicBlock* TrueBB,
- const BasicBlock* FalseBB) {
- if (TrueBB==FalseBB) {
- // "TrueBB" and "FalseBB" destination equals
- printPHICopy(CurrBB,TrueBB);
- printSimpleInstruction("pop");
- printSimpleInstruction("br",getLabelName(TrueBB).c_str());
- } else if (FalseBB==NULL) {
- // If "FalseBB" not used the jump have condition
- printPHICopy(CurrBB,TrueBB);
- printSimpleInstruction("brtrue",getLabelName(TrueBB).c_str());
- } else if (TrueBB==NULL) {
- // If "TrueBB" not used the jump is unconditional
- printPHICopy(CurrBB,FalseBB);
- printSimpleInstruction("br",getLabelName(FalseBB).c_str());
- } else {
- // Copy PHI instructions for each block
- std::string TmpLabel;
- // Print PHI instructions for "TrueBB"
- if (isa<PHINode>(TrueBB->begin())) {
- TmpLabel = getLabelName(TrueBB)+"$phi_"+utostr(getUniqID());
- printSimpleInstruction("brtrue",TmpLabel.c_str());
- } else {
- printSimpleInstruction("brtrue",getLabelName(TrueBB).c_str());
- }
- // Print PHI instructions for "FalseBB"
- if (isa<PHINode>(FalseBB->begin())) {
- printPHICopy(CurrBB,FalseBB);
- printSimpleInstruction("br",getLabelName(FalseBB).c_str());
- } else {
- printSimpleInstruction("br",getLabelName(FalseBB).c_str());
- }
- if (isa<PHINode>(TrueBB->begin())) {
- // Handle "TrueBB" PHI Copy
- Out << TmpLabel << ":\n";
- printPHICopy(CurrBB,TrueBB);
- printSimpleInstruction("br",getLabelName(TrueBB).c_str());
- }
- }
-}
-
-
-void MSILWriter::printBranchInstruction(const BranchInst* Inst) {
- if (Inst->isUnconditional()) {
- printBranchToBlock(Inst->getParent(),NULL,Inst->getSuccessor(0));
- } else {
- printValueLoad(Inst->getCondition());
- printBranchToBlock(Inst->getParent(),Inst->getSuccessor(0),
- Inst->getSuccessor(1));
- }
-}
-
-
-void MSILWriter::printSelectInstruction(const Value* Cond, const Value* VTrue,
- const Value* VFalse) {
- std::string TmpLabel = std::string("select$true_")+utostr(getUniqID());
- printValueLoad(VTrue);
- printValueLoad(Cond);
- printSimpleInstruction("brtrue",TmpLabel.c_str());
- printSimpleInstruction("pop");
- printValueLoad(VFalse);
- Out << TmpLabel << ":\n";
-}
-
-
-void MSILWriter::printIndirectLoad(const Value* V) {
- const Type* Ty = V->getType();
- printValueLoad(V);
- if (const PointerType* P = dyn_cast<PointerType>(Ty))
- Ty = P->getElementType();
- std::string Tmp = "ldind."+getTypePostfix(Ty, false);
- printSimpleInstruction(Tmp.c_str());
-}
-
-
-void MSILWriter::printIndirectSave(const Value* Ptr, const Value* Val) {
- printValueLoad(Ptr);
- printValueLoad(Val);
- printIndirectSave(Val->getType());
-}
-
-
-void MSILWriter::printIndirectSave(const Type* Ty) {
- // Instruction need signed postfix for any type.
- std::string postfix = getTypePostfix(Ty, false);
- if (*postfix.begin()=='u') *postfix.begin() = 'i';
- postfix = "stind."+postfix;
- printSimpleInstruction(postfix.c_str());
-}
-
-
-void MSILWriter::printCastInstruction(unsigned int Op, const Value* V,
- const Type* Ty, const Type* SrcTy) {
- std::string Tmp("");
- printValueLoad(V);
- switch (Op) {
- // Signed
- case Instruction::SExt:
- // If sign extending int, convert first from unsigned to signed
- // with the same bit size - because otherwise we will loose the sign.
- if (SrcTy) {
- Tmp = "conv."+getTypePostfix(SrcTy,false,true);
- printSimpleInstruction(Tmp.c_str());
- }
- // FALLTHROUGH
- case Instruction::SIToFP:
- case Instruction::FPToSI:
- Tmp = "conv."+getTypePostfix(Ty,false,true);
- printSimpleInstruction(Tmp.c_str());
- break;
- // Unsigned
- case Instruction::FPTrunc:
- case Instruction::FPExt:
- case Instruction::UIToFP:
- case Instruction::Trunc:
- case Instruction::ZExt:
- case Instruction::FPToUI:
- case Instruction::PtrToInt:
- case Instruction::IntToPtr:
- Tmp = "conv."+getTypePostfix(Ty,false);
- printSimpleInstruction(Tmp.c_str());
- break;
- // Do nothing
- case Instruction::BitCast:
- // FIXME: meaning that ld*/st* instruction do not change data format.
- break;
- default:
- errs() << "Opcode = " << Op << '\n';
- llvm_unreachable("Invalid conversion instruction");
- }
-}
-
-
-void MSILWriter::printGepInstruction(const Value* V, gep_type_iterator I,
- gep_type_iterator E) {
- unsigned Size;
- // Load address
- printValuePtrLoad(V);
- // Calculate element offset.
- for (; I!=E; ++I){
- Size = 0;
- const Value* IndexValue = I.getOperand();
- if (const StructType* StrucTy = dyn_cast<StructType>(*I)) {
- uint64_t FieldIndex = cast<ConstantInt>(IndexValue)->getZExtValue();
- // Offset is the sum of all previous structure fields.
- for (uint64_t F = 0; F<FieldIndex; ++F)
- Size += TD->getTypeAllocSize(StrucTy->getContainedType((unsigned)F));
- printPtrLoad(Size);
- printSimpleInstruction("add");
- continue;
- } else if (const SequentialType* SeqTy = dyn_cast<SequentialType>(*I)) {
- Size = TD->getTypeAllocSize(SeqTy->getElementType());
- } else {
- Size = TD->getTypeAllocSize(*I);
- }
- // Add offset of current element to stack top.
- if (!isZeroValue(IndexValue)) {
- // Constant optimization.
- if (const ConstantInt* C = dyn_cast<ConstantInt>(IndexValue)) {
- if (C->getValue().isNegative()) {
- printPtrLoad(C->getValue().abs().getZExtValue()*Size);
- printSimpleInstruction("sub");
- continue;
- } else
- printPtrLoad(C->getZExtValue()*Size);
- } else {
- printPtrLoad(Size);
- printValuePtrLoad(IndexValue);
- printSimpleInstruction("mul");
- }
- printSimpleInstruction("add");
- }
- }
-}
-
-
-std::string MSILWriter::getCallSignature(const FunctionType* Ty,
- const Instruction* Inst,
- std::string Name) {
- std::string Tmp("");
- if (Ty->isVarArg()) Tmp += "vararg ";
- // Name and return type.
- Tmp += getTypeName(Ty->getReturnType())+Name+"(";
- // Function argument type list.
- unsigned NumParams = Ty->getNumParams();
- for (unsigned I = 0; I!=NumParams; ++I) {
- if (I!=0) Tmp += ",";
- Tmp += getTypeName(Ty->getParamType(I));
- }
- // CLR needs to know the exact amount of parameters received by vararg
- // function, because caller cleans the stack.
- if (Ty->isVarArg() && Inst) {
- // Origin to function arguments in "CallInst" or "InvokeInst".
- unsigned Org = isa<InvokeInst>(Inst) ? 3 : 1;
- // Print variable argument types.
- unsigned NumOperands = Inst->getNumOperands()-Org;
- if (NumParams<NumOperands) {
- if (NumParams!=0) Tmp += ", ";
- Tmp += "... , ";
- for (unsigned J = NumParams; J!=NumOperands; ++J) {
- if (J!=NumParams) Tmp += ", ";
- Tmp += getTypeName(Inst->getOperand(J+Org)->getType());
- }
- }
- }
- return Tmp+")";
-}
-
-
-void MSILWriter::printFunctionCall(const Value* FnVal,
- const Instruction* Inst) {
- // Get function calling convention.
- std::string Name = "";
- if (const CallInst* Call = dyn_cast<CallInst>(Inst))
- Name = getConvModopt(Call->getCallingConv());
- else if (const InvokeInst* Invoke = dyn_cast<InvokeInst>(Inst))
- Name = getConvModopt(Invoke->getCallingConv());
- else {
- errs() << "Instruction = " << Inst->getName() << '\n';
- llvm_unreachable("Need \"Invoke\" or \"Call\" instruction only");
- }
- if (const Function* F = dyn_cast<Function>(FnVal)) {
- // Direct call.
- Name += getValueName(F);
- printSimpleInstruction("call",
- getCallSignature(F->getFunctionType(),Inst,Name).c_str());
- } else {
- // Indirect function call.
- const PointerType* PTy = cast<PointerType>(FnVal->getType());
- const FunctionType* FTy = cast<FunctionType>(PTy->getElementType());
- // Load function address.
- printValueLoad(FnVal);
- printSimpleInstruction("calli",getCallSignature(FTy,Inst,Name).c_str());
- }
-}
-
-
-void MSILWriter::printIntrinsicCall(const IntrinsicInst* Inst) {
- std::string Name;
- switch (Inst->getIntrinsicID()) {
- case Intrinsic::vastart:
- Name = getValueName(Inst->getArgOperand(0));
- Name.insert(Name.length()-1,"$valist");
- // Obtain the argument handle.
- printSimpleInstruction("ldloca",Name.c_str());
- printSimpleInstruction("arglist");
- printSimpleInstruction("call",
- "instance void [mscorlib]System.ArgIterator::.ctor"
- "(valuetype [mscorlib]System.RuntimeArgumentHandle)");
- // Save as pointer type "void*"
- printValueLoad(Inst->getArgOperand(0));
- printSimpleInstruction("ldloca",Name.c_str());
- printIndirectSave(PointerType::getUnqual(
- IntegerType::get(Inst->getContext(), 8)));
- break;
- case Intrinsic::vaend:
- // Close argument list handle.
- printIndirectLoad(Inst->getArgOperand(0));
- printSimpleInstruction("call","instance void [mscorlib]System.ArgIterator::End()");
- break;
- case Intrinsic::vacopy:
- // Copy "ArgIterator" valuetype.
- printIndirectLoad(Inst->getArgOperand(0));
- printIndirectLoad(Inst->getArgOperand(1));
- printSimpleInstruction("cpobj","[mscorlib]System.ArgIterator");
- break;
- default:
- errs() << "Intrinsic ID = " << Inst->getIntrinsicID() << '\n';
- llvm_unreachable("Invalid intrinsic function");
- }
-}
-
-
-void MSILWriter::printCallInstruction(const Instruction* Inst) {
- if (isa<IntrinsicInst>(Inst)) {
- // Handle intrinsic function.
- printIntrinsicCall(cast<IntrinsicInst>(Inst));
- } else {
- const CallInst *CI = cast<CallInst>(Inst);
- // Load arguments to stack and call function.
- for (int I = 0, E = CI->getNumArgOperands(); I!=E; ++I)
- printValueLoad(CI->getArgOperand(I));
- printFunctionCall(CI->getCalledFunction(), Inst);
- }
-}
-
-
-void MSILWriter::printICmpInstruction(unsigned Predicate, const Value* Left,
- const Value* Right) {
- switch (Predicate) {
- case ICmpInst::ICMP_EQ:
- printBinaryInstruction("ceq",Left,Right);
- break;
- case ICmpInst::ICMP_NE:
- // Emulate = not neg (Op1 eq Op2)
- printBinaryInstruction("ceq",Left,Right);
- printSimpleInstruction("neg");
- printSimpleInstruction("not");
- break;
- case ICmpInst::ICMP_ULE:
- case ICmpInst::ICMP_SLE:
- // Emulate = (Op1 eq Op2) or (Op1 lt Op2)
- printBinaryInstruction("ceq",Left,Right);
- if (Predicate==ICmpInst::ICMP_ULE)
- printBinaryInstruction("clt.un",Left,Right);
- else
- printBinaryInstruction("clt",Left,Right);
- printSimpleInstruction("or");
- break;
- case ICmpInst::ICMP_UGE:
- case ICmpInst::ICMP_SGE:
- // Emulate = (Op1 eq Op2) or (Op1 gt Op2)
- printBinaryInstruction("ceq",Left,Right);
- if (Predicate==ICmpInst::ICMP_UGE)
- printBinaryInstruction("cgt.un",Left,Right);
- else
- printBinaryInstruction("cgt",Left,Right);
- printSimpleInstruction("or");
- break;
- case ICmpInst::ICMP_ULT:
- printBinaryInstruction("clt.un",Left,Right);
- break;
- case ICmpInst::ICMP_SLT:
- printBinaryInstruction("clt",Left,Right);
- break;
- case ICmpInst::ICMP_UGT:
- printBinaryInstruction("cgt.un",Left,Right);
- break;
- case ICmpInst::ICMP_SGT:
- printBinaryInstruction("cgt",Left,Right);
- break;
- default:
- errs() << "Predicate = " << Predicate << '\n';
- llvm_unreachable("Invalid icmp predicate");
- }
-}
-
-
-void MSILWriter::printFCmpInstruction(unsigned Predicate, const Value* Left,
- const Value* Right) {
- // FIXME: Correct comparison
- std::string NanFunc = "bool [mscorlib]System.Double::IsNaN(float64)";
- switch (Predicate) {
- case FCmpInst::FCMP_UGT:
- // X > Y || llvm_fcmp_uno(X, Y)
- printBinaryInstruction("cgt",Left,Right);
- printFCmpInstruction(FCmpInst::FCMP_UNO,Left,Right);
- printSimpleInstruction("or");
- break;
- case FCmpInst::FCMP_OGT:
- // X > Y
- printBinaryInstruction("cgt",Left,Right);
- break;
- case FCmpInst::FCMP_UGE:
- // X >= Y || llvm_fcmp_uno(X, Y)
- printBinaryInstruction("ceq",Left,Right);
- printBinaryInstruction("cgt",Left,Right);
- printSimpleInstruction("or");
- printFCmpInstruction(FCmpInst::FCMP_UNO,Left,Right);
- printSimpleInstruction("or");
- break;
- case FCmpInst::FCMP_OGE:
- // X >= Y
- printBinaryInstruction("ceq",Left,Right);
- printBinaryInstruction("cgt",Left,Right);
- printSimpleInstruction("or");
- break;
- case FCmpInst::FCMP_ULT:
- // X < Y || llvm_fcmp_uno(X, Y)
- printBinaryInstruction("clt",Left,Right);
- printFCmpInstruction(FCmpInst::FCMP_UNO,Left,Right);
- printSimpleInstruction("or");
- break;
- case FCmpInst::FCMP_OLT:
- // X < Y
- printBinaryInstruction("clt",Left,Right);
- break;
- case FCmpInst::FCMP_ULE:
- // X <= Y || llvm_fcmp_uno(X, Y)
- printBinaryInstruction("ceq",Left,Right);
- printBinaryInstruction("clt",Left,Right);
- printSimpleInstruction("or");
- printFCmpInstruction(FCmpInst::FCMP_UNO,Left,Right);
- printSimpleInstruction("or");
- break;
- case FCmpInst::FCMP_OLE:
- // X <= Y
- printBinaryInstruction("ceq",Left,Right);
- printBinaryInstruction("clt",Left,Right);
- printSimpleInstruction("or");
- break;
- case FCmpInst::FCMP_UEQ:
- // X == Y || llvm_fcmp_uno(X, Y)
- printBinaryInstruction("ceq",Left,Right);
- printFCmpInstruction(FCmpInst::FCMP_UNO,Left,Right);
- printSimpleInstruction("or");
- break;
- case FCmpInst::FCMP_OEQ:
- // X == Y
- printBinaryInstruction("ceq",Left,Right);
- break;
- case FCmpInst::FCMP_UNE:
- // X != Y
- printBinaryInstruction("ceq",Left,Right);
- printSimpleInstruction("neg");
- printSimpleInstruction("not");
- break;
- case FCmpInst::FCMP_ONE:
- // X != Y && llvm_fcmp_ord(X, Y)
- printBinaryInstruction("ceq",Left,Right);
- printSimpleInstruction("not");
- break;
- case FCmpInst::FCMP_ORD:
- // return X == X && Y == Y
- printBinaryInstruction("ceq",Left,Left);
- printBinaryInstruction("ceq",Right,Right);
- printSimpleInstruction("or");
- break;
- case FCmpInst::FCMP_UNO:
- // X != X || Y != Y
- printBinaryInstruction("ceq",Left,Left);
- printSimpleInstruction("not");
- printBinaryInstruction("ceq",Right,Right);
- printSimpleInstruction("not");
- printSimpleInstruction("or");
- break;
- default:
- llvm_unreachable("Illegal FCmp predicate");
- }
-}
-
-
-void MSILWriter::printInvokeInstruction(const InvokeInst* Inst) {
- std::string Label = "leave$normal_"+utostr(getUniqID());
- Out << ".try {\n";
- // Load arguments
- for (int I = 0, E = Inst->getNumArgOperands(); I!=E; ++I)
- printValueLoad(Inst->getArgOperand(I));
- // Print call instruction
- printFunctionCall(Inst->getOperand(0),Inst);
- // Save function result and leave "try" block
- printValueSave(Inst);
- printSimpleInstruction("leave",Label.c_str());
- Out << "}\n";
- Out << "catch [mscorlib]System.Exception {\n";
- // Redirect to unwind block
- printSimpleInstruction("pop");
- printBranchToBlock(Inst->getParent(),NULL,Inst->getUnwindDest());
- Out << "}\n" << Label << ":\n";
- // Redirect to continue block
- printBranchToBlock(Inst->getParent(),NULL,Inst->getNormalDest());
-}
-
-
-void MSILWriter::printSwitchInstruction(const SwitchInst* Inst) {
- // FIXME: Emulate with IL "switch" instruction
- // Emulate = if () else if () else if () else ...
- for (unsigned int I = 1, E = Inst->getNumCases(); I!=E; ++I) {
- printValueLoad(Inst->getCondition());
- printValueLoad(Inst->getCaseValue(I));
- printSimpleInstruction("ceq");
- // Condition jump to successor block
- printBranchToBlock(Inst->getParent(),Inst->getSuccessor(I),NULL);
- }
- // Jump to default block
- printBranchToBlock(Inst->getParent(),NULL,Inst->getDefaultDest());
-}
-
-
-void MSILWriter::printVAArgInstruction(const VAArgInst* Inst) {
- printIndirectLoad(Inst->getOperand(0));
- printSimpleInstruction("call",
- "instance typedref [mscorlib]System.ArgIterator::GetNextArg()");
- printSimpleInstruction("refanyval","void*");
- std::string Name =
- "ldind."+getTypePostfix(PointerType::getUnqual(
- IntegerType::get(Inst->getContext(), 8)),false);
- printSimpleInstruction(Name.c_str());
-}
-
-
-void MSILWriter::printAllocaInstruction(const AllocaInst* Inst) {
- uint64_t Size = TD->getTypeAllocSize(Inst->getAllocatedType());
- // Constant optimization.
- if (const ConstantInt* CInt = dyn_cast<ConstantInt>(Inst->getOperand(0))) {
- printPtrLoad(CInt->getZExtValue()*Size);
- } else {
- printPtrLoad(Size);
- printValueLoad(Inst->getOperand(0));
- printSimpleInstruction("mul");
- }
- printSimpleInstruction("localloc");
-}
-
-
-void MSILWriter::printInstruction(const Instruction* Inst) {
- const Value *Left = 0, *Right = 0;
- if (Inst->getNumOperands()>=1) Left = Inst->getOperand(0);
- if (Inst->getNumOperands()>=2) Right = Inst->getOperand(1);
- // Print instruction
- // FIXME: "ShuffleVector","ExtractElement","InsertElement" support.
- switch (Inst->getOpcode()) {
- // Terminator
- case Instruction::Ret:
- if (Inst->getNumOperands()) {
- printValueLoad(Left);
- printSimpleInstruction("ret");
- } else
- printSimpleInstruction("ret");
- break;
- case Instruction::Br:
- printBranchInstruction(cast<BranchInst>(Inst));
- break;
- // Binary
- case Instruction::Add:
- case Instruction::FAdd:
- printBinaryInstruction("add",Left,Right);
- break;
- case Instruction::Sub:
- case Instruction::FSub:
- printBinaryInstruction("sub",Left,Right);
- break;
- case Instruction::Mul:
- case Instruction::FMul:
- printBinaryInstruction("mul",Left,Right);
- break;
- case Instruction::UDiv:
- printBinaryInstruction("div.un",Left,Right);
- break;
- case Instruction::SDiv:
- case Instruction::FDiv:
- printBinaryInstruction("div",Left,Right);
- break;
- case Instruction::URem:
- printBinaryInstruction("rem.un",Left,Right);
- break;
- case Instruction::SRem:
- case Instruction::FRem:
- printBinaryInstruction("rem",Left,Right);
- break;
- // Binary Condition
- case Instruction::ICmp:
- printICmpInstruction(cast<ICmpInst>(Inst)->getPredicate(),Left,Right);
- break;
- case Instruction::FCmp:
- printFCmpInstruction(cast<FCmpInst>(Inst)->getPredicate(),Left,Right);
- break;
- // Bitwise Binary
- case Instruction::And:
- printBinaryInstruction("and",Left,Right);
- break;
- case Instruction::Or:
- printBinaryInstruction("or",Left,Right);
- break;
- case Instruction::Xor:
- printBinaryInstruction("xor",Left,Right);
- break;
- case Instruction::Shl:
- printValueLoad(Left);
- printValueLoad(Right);
- printSimpleInstruction("conv.i4");
- printSimpleInstruction("shl");
- break;
- case Instruction::LShr:
- printValueLoad(Left);
- printValueLoad(Right);
- printSimpleInstruction("conv.i4");
- printSimpleInstruction("shr.un");
- break;
- case Instruction::AShr:
- printValueLoad(Left);
- printValueLoad(Right);
- printSimpleInstruction("conv.i4");
- printSimpleInstruction("shr");
- break;
- case Instruction::Select:
- printSelectInstruction(Inst->getOperand(0),Inst->getOperand(1),Inst->getOperand(2));
- break;
- case Instruction::Load:
- printIndirectLoad(Inst->getOperand(0));
- break;
- case Instruction::Store:
- printIndirectSave(Inst->getOperand(1), Inst->getOperand(0));
- break;
- case Instruction::SExt:
- printCastInstruction(Inst->getOpcode(),Left,
- cast<CastInst>(Inst)->getDestTy(),
- cast<CastInst>(Inst)->getSrcTy());
- break;
- case Instruction::Trunc:
- case Instruction::ZExt:
- case Instruction::FPTrunc:
- case Instruction::FPExt:
- case Instruction::UIToFP:
- case Instruction::SIToFP:
- case Instruction::FPToUI:
- case Instruction::FPToSI:
- case Instruction::PtrToInt:
- case Instruction::IntToPtr:
- case Instruction::BitCast:
- printCastInstruction(Inst->getOpcode(),Left,
- cast<CastInst>(Inst)->getDestTy());
- break;
- case Instruction::GetElementPtr:
- printGepInstruction(Inst->getOperand(0),gep_type_begin(Inst),
- gep_type_end(Inst));
- break;
- case Instruction::Call:
- printCallInstruction(cast<CallInst>(Inst));
- break;
- case Instruction::Invoke:
- printInvokeInstruction(cast<InvokeInst>(Inst));
- break;
- case Instruction::Unwind:
- printSimpleInstruction("newobj",
- "instance void [mscorlib]System.Exception::.ctor()");
- printSimpleInstruction("throw");
- break;
- case Instruction::Switch:
- printSwitchInstruction(cast<SwitchInst>(Inst));
- break;
- case Instruction::Alloca:
- printAllocaInstruction(cast<AllocaInst>(Inst));
- break;
- case Instruction::Unreachable:
- printSimpleInstruction("ldstr", "\"Unreachable instruction\"");
- printSimpleInstruction("newobj",
- "instance void [mscorlib]System.Exception::.ctor(string)");
- printSimpleInstruction("throw");
- break;
- case Instruction::VAArg:
- printVAArgInstruction(cast<VAArgInst>(Inst));
- break;
- default:
- errs() << "Instruction = " << Inst->getName() << '\n';
- llvm_unreachable("Unsupported instruction");
- }
-}
-
-
-void MSILWriter::printLoop(const Loop* L) {
- Out << getLabelName(L->getHeader()->getName()) << ":\n";
- const std::vector<BasicBlock*>& blocks = L->getBlocks();
- for (unsigned I = 0, E = blocks.size(); I!=E; I++) {
- BasicBlock* BB = blocks[I];
- Loop* BBLoop = LInfo->getLoopFor(BB);
- if (BBLoop == L)
- printBasicBlock(BB);
- else if (BB==BBLoop->getHeader() && BBLoop->getParentLoop()==L)
- printLoop(BBLoop);
- }
- printSimpleInstruction("br",getLabelName(L->getHeader()->getName()).c_str());
-}
-
-
-void MSILWriter::printBasicBlock(const BasicBlock* BB) {
- Out << getLabelName(BB) << ":\n";
- for (BasicBlock::const_iterator I = BB->begin(), E = BB->end(); I!=E; ++I) {
- const Instruction* Inst = I;
- // Comment llvm original instruction
- // Out << "\n//" << *Inst << "\n";
- // Do not handle PHI instruction in current block
- if (Inst->getOpcode()==Instruction::PHI) continue;
- // Print instruction
- printInstruction(Inst);
- // Save result
- if (Inst->getType()!=Type::getVoidTy(BB->getContext())) {
- // Do not save value after invoke, it done in "try" block
- if (Inst->getOpcode()==Instruction::Invoke) continue;
- printValueSave(Inst);
- }
- }
-}
-
-
-void MSILWriter::printLocalVariables(const Function& F) {
- std::string Name;
- const Type* Ty = NULL;
- std::set<const Value*> Printed;
- const Value* VaList = NULL;
- unsigned StackDepth = 8;
- // Find local variables
- for (const_inst_iterator I = inst_begin(&F), E = inst_end(&F); I!=E; ++I) {
- if (I->getOpcode()==Instruction::Call ||
- I->getOpcode()==Instruction::Invoke) {
- // Test stack depth.
- if (StackDepth<I->getNumOperands())
- StackDepth = I->getNumOperands();
- }
- const AllocaInst* AI = dyn_cast<AllocaInst>(&*I);
- if (AI && !isa<GlobalVariable>(AI)) {
- // Local variable allocation.
- Ty = PointerType::getUnqual(AI->getAllocatedType());
- Name = getValueName(AI);
- Out << "\t.locals (" << getTypeName(Ty) << Name << ")\n";
- } else if (I->getType()!=Type::getVoidTy(F.getContext())) {
- // Operation result.
- Ty = I->getType();
- Name = getValueName(&*I);
- Out << "\t.locals (" << getTypeName(Ty) << Name << ")\n";
- }
- // Test on 'va_list' variable
- bool isVaList = false;
- if (const VAArgInst* VaInst = dyn_cast<VAArgInst>(&*I)) {
- // "va_list" as "va_arg" instruction operand.
- isVaList = true;
- VaList = VaInst->getOperand(0);
- } else if (const IntrinsicInst* Inst = dyn_cast<IntrinsicInst>(&*I)) {
- // "va_list" as intrinsic function operand.
- switch (Inst->getIntrinsicID()) {
- case Intrinsic::vastart:
- case Intrinsic::vaend:
- case Intrinsic::vacopy:
- isVaList = true;
- VaList = Inst->getArgOperand(0);
- break;
- default:
- isVaList = false;
- }
- }
- // Print "va_list" variable.
- if (isVaList && Printed.insert(VaList).second) {
- Name = getValueName(VaList);
- Name.insert(Name.length()-1,"$valist");
- Out << "\t.locals (valuetype [mscorlib]System.ArgIterator "
- << Name << ")\n";
- }
- }
- printSimpleInstruction(".maxstack",utostr(StackDepth*2).c_str());
-}
-
-
-void MSILWriter::printFunctionBody(const Function& F) {
- // Print body
- for (Function::const_iterator I = F.begin(), E = F.end(); I!=E; ++I) {
- if (Loop *L = LInfo->getLoopFor(I)) {
- if (L->getHeader()==I && L->getParentLoop()==0)
- printLoop(L);
- } else {
- printBasicBlock(I);
- }
- }
-}
-
-
-void MSILWriter::printConstantExpr(const ConstantExpr* CE) {
- const Value *left = 0, *right = 0;
- if (CE->getNumOperands()>=1) left = CE->getOperand(0);
- if (CE->getNumOperands()>=2) right = CE->getOperand(1);
- // Print instruction
- switch (CE->getOpcode()) {
- case Instruction::Trunc:
- case Instruction::ZExt:
- case Instruction::SExt:
- case Instruction::FPTrunc:
- case Instruction::FPExt:
- case Instruction::UIToFP:
- case Instruction::SIToFP:
- case Instruction::FPToUI:
- case Instruction::FPToSI:
- case Instruction::PtrToInt:
- case Instruction::IntToPtr:
- case Instruction::BitCast:
- printCastInstruction(CE->getOpcode(),left,CE->getType());
- break;
- case Instruction::GetElementPtr:
- printGepInstruction(CE->getOperand(0),gep_type_begin(CE),gep_type_end(CE));
- break;
- case Instruction::ICmp:
- printICmpInstruction(CE->getPredicate(),left,right);
- break;
- case Instruction::FCmp:
- printFCmpInstruction(CE->getPredicate(),left,right);
- break;
- case Instruction::Select:
- printSelectInstruction(CE->getOperand(0),CE->getOperand(1),CE->getOperand(2));
- break;
- case Instruction::Add:
- case Instruction::FAdd:
- printBinaryInstruction("add",left,right);
- break;
- case Instruction::Sub:
- case Instruction::FSub:
- printBinaryInstruction("sub",left,right);
- break;
- case Instruction::Mul:
- case Instruction::FMul:
- printBinaryInstruction("mul",left,right);
- break;
- case Instruction::UDiv:
- printBinaryInstruction("div.un",left,right);
- break;
- case Instruction::SDiv:
- case Instruction::FDiv:
- printBinaryInstruction("div",left,right);
- break;
- case Instruction::URem:
- printBinaryInstruction("rem.un",left,right);
- break;
- case Instruction::SRem:
- case Instruction::FRem:
- printBinaryInstruction("rem",left,right);
- break;
- case Instruction::And:
- printBinaryInstruction("and",left,right);
- break;
- case Instruction::Or:
- printBinaryInstruction("or",left,right);
- break;
- case Instruction::Xor:
- printBinaryInstruction("xor",left,right);
- break;
- case Instruction::Shl:
- printBinaryInstruction("shl",left,right);
- break;
- case Instruction::LShr:
- printBinaryInstruction("shr.un",left,right);
- break;
- case Instruction::AShr:
- printBinaryInstruction("shr",left,right);
- break;
- default:
- errs() << "Expression = " << *CE << "\n";
- llvm_unreachable("Invalid constant expression");
- }
-}
-
-
-void MSILWriter::printStaticInitializerList() {
- // List of global variables with uninitialized fields.
- for (std::map<const GlobalVariable*,std::vector<StaticInitializer> >::iterator
- VarI = StaticInitList.begin(), VarE = StaticInitList.end(); VarI!=VarE;
- ++VarI) {
- const std::vector<StaticInitializer>& InitList = VarI->second;
- if (InitList.empty()) continue;
- // For each uninitialized field.
- for (std::vector<StaticInitializer>::const_iterator I = InitList.begin(),
- E = InitList.end(); I!=E; ++I) {
- if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(I->constant)) {
- // Out << "\n// Init " << getValueName(VarI->first) << ", offset " <<
- // utostr(I->offset) << ", type "<< *I->constant->getType() << "\n\n";
- // Load variable address
- printValueLoad(VarI->first);
- // Add offset
- if (I->offset!=0) {
- printPtrLoad(I->offset);
- printSimpleInstruction("add");
- }
- // Load value
- printConstantExpr(CE);
- // Save result at offset
- std::string postfix = getTypePostfix(CE->getType(),true);
- if (*postfix.begin()=='u') *postfix.begin() = 'i';
- postfix = "stind."+postfix;
- printSimpleInstruction(postfix.c_str());
- } else {
- errs() << "Constant = " << *I->constant << '\n';
- llvm_unreachable("Invalid static initializer");
- }
- }
- }
-}
-
-
-void MSILWriter::printFunction(const Function& F) {
- bool isSigned = F.paramHasAttr(0, Attribute::SExt);
- Out << "\n.method static ";
- Out << (F.hasLocalLinkage() ? "private " : "public ");
- if (F.isVarArg()) Out << "vararg ";
- Out << getTypeName(F.getReturnType(),isSigned) <<
- getConvModopt(F.getCallingConv()) << getValueName(&F) << '\n';
- // Arguments
- Out << "\t(";
- unsigned ArgIdx = 1;
- for (Function::const_arg_iterator I = F.arg_begin(), E = F.arg_end(); I!=E;
- ++I, ++ArgIdx) {
- isSigned = F.paramHasAttr(ArgIdx, Attribute::SExt);
- if (I!=F.arg_begin()) Out << ", ";
- Out << getTypeName(I->getType(),isSigned) << getValueName(I);
- }
- Out << ") cil managed\n";
- // Body
- Out << "{\n";
- printLocalVariables(F);
- printFunctionBody(F);
- Out << "}\n";
-}
-
-
-void MSILWriter::printDeclarations(const TypeSymbolTable& ST) {
- std::string Name;
- std::set<const Type*> Printed;
- for (std::set<const Type*>::const_iterator
- UI = UsedTypes->begin(), UE = UsedTypes->end(); UI!=UE; ++UI) {
- const Type* Ty = *UI;
- if (Ty->isArrayTy() || Ty->isVectorTy() || Ty->isStructTy())
- Name = getTypeName(Ty, false, true);
- // Type with no need to declare.
- else continue;
- // Print not duplicated type
- if (Printed.insert(Ty).second) {
- Out << ".class value explicit ansi sealed '" << Name << "'";
- Out << " { .pack " << 1 << " .size " << TD->getTypeAllocSize(Ty);
- Out << " }\n\n";
- }
- }
-}
-
-
-unsigned int MSILWriter::getBitWidth(const Type* Ty) {
- unsigned int N = Ty->getPrimitiveSizeInBits();
- assert(N!=0 && "Invalid type in getBitWidth()");
- switch (N) {
- case 1:
- case 8:
- case 16:
- case 32:
- case 64:
- return N;
- default:
- errs() << "Bits = " << N << '\n';
- llvm_unreachable("Unsupported integer width");
- }
- return 0; // Not reached
-}
-
-
-void MSILWriter::printStaticConstant(const Constant* C, uint64_t& Offset) {
- uint64_t TySize = 0;
- const Type* Ty = C->getType();
- // Print zero initialized constant.
- if (isa<ConstantAggregateZero>(C) || C->isNullValue()) {
- TySize = TD->getTypeAllocSize(C->getType());
- Offset += TySize;
- Out << "int8 (0) [" << TySize << "]";
- return;
- }
- // Print constant initializer
- switch (Ty->getTypeID()) {
- case Type::IntegerTyID: {
- TySize = TD->getTypeAllocSize(Ty);
- const ConstantInt* Int = cast<ConstantInt>(C);
- Out << getPrimitiveTypeName(Ty,true) << "(" << Int->getSExtValue() << ")";
- break;
- }
- case Type::FloatTyID:
- case Type::DoubleTyID: {
- TySize = TD->getTypeAllocSize(Ty);
- const ConstantFP* FP = cast<ConstantFP>(C);
- if (Ty->getTypeID() == Type::FloatTyID)
- Out << "int32 (" <<
- (uint32_t)FP->getValueAPF().bitcastToAPInt().getZExtValue() << ')';
- else
- Out << "int64 (" <<
- FP->getValueAPF().bitcastToAPInt().getZExtValue() << ')';
- break;
- }
- case Type::ArrayTyID:
- case Type::VectorTyID:
- case Type::StructTyID:
- for (unsigned I = 0, E = C->getNumOperands(); I<E; I++) {
- if (I!=0) Out << ",\n";
- printStaticConstant(cast<Constant>(C->getOperand(I)), Offset);
- }
- break;
- case Type::PointerTyID:
- TySize = TD->getTypeAllocSize(C->getType());
- // Initialize with global variable address
- if (const GlobalVariable *G = dyn_cast<GlobalVariable>(C)) {
- std::string name = getValueName(G);
- Out << "&(" << name.insert(name.length()-1,"$data") << ")";
- } else {
- // Dynamic initialization
- if (!isa<ConstantPointerNull>(C) && !C->isNullValue())
- InitListPtr->push_back(StaticInitializer(C,Offset));
- // Null pointer initialization
- if (TySize==4) Out << "int32 (0)";
- else if (TySize==8) Out << "int64 (0)";
- else llvm_unreachable("Invalid pointer size");
- }
- break;
- default:
- errs() << "TypeID = " << Ty->getTypeID() << '\n';
- llvm_unreachable("Invalid type in printStaticConstant()");
- }
- // Increase offset.
- Offset += TySize;
-}
-
-
-void MSILWriter::printStaticInitializer(const Constant* C,
- const std::string& Name) {
- switch (C->getType()->getTypeID()) {
- case Type::IntegerTyID:
- case Type::FloatTyID:
- case Type::DoubleTyID:
- Out << getPrimitiveTypeName(C->getType(), false);
- break;
- case Type::ArrayTyID:
- case Type::VectorTyID:
- case Type::StructTyID:
- case Type::PointerTyID:
- Out << getTypeName(C->getType());
- break;
- default:
- errs() << "Type = " << *C << "\n";
- llvm_unreachable("Invalid constant type");
- }
- // Print initializer
- std::string label = Name;
- label.insert(label.length()-1,"$data");
- Out << Name << " at " << label << '\n';
- Out << ".data " << label << " = {\n";
- uint64_t offset = 0;
- printStaticConstant(C,offset);
- Out << "\n}\n\n";
-}
-
-
-void MSILWriter::printVariableDefinition(const GlobalVariable* G) {
- const Constant* C = G->getInitializer();
- if (C->isNullValue() || isa<ConstantAggregateZero>(C) || isa<UndefValue>(C))
- InitListPtr = 0;
- else
- InitListPtr = &StaticInitList[G];
- printStaticInitializer(C,getValueName(G));
-}
-
-
-void MSILWriter::printGlobalVariables() {
- if (ModulePtr->global_empty()) return;
- Module::global_iterator I,E;
- for (I = ModulePtr->global_begin(), E = ModulePtr->global_end(); I!=E; ++I) {
- // Variable definition
- Out << ".field static " << (I->isDeclaration() ? "public " :
- "private ");
- if (I->isDeclaration()) {
- Out << getTypeName(I->getType()) << getValueName(&*I) << "\n\n";
- } else
- printVariableDefinition(&*I);
- }
-}
-
-
-const char* MSILWriter::getLibraryName(const Function* F) {
- return getLibraryForSymbol(F->getName(), true, F->getCallingConv());
-}
-
-
-const char* MSILWriter::getLibraryName(const GlobalVariable* GV) {
- return getLibraryForSymbol(GV->getName(), false, CallingConv::C);
-}
-
-
-const char* MSILWriter::getLibraryForSymbol(StringRef Name, bool isFunction,
- CallingConv::ID CallingConv) {
- // TODO: Read *.def file with function and libraries definitions.
- return "MSVCRT.DLL";
-}
-
-
-void MSILWriter::printExternals() {
- Module::const_iterator I,E;
- // Functions.
- for (I=ModulePtr->begin(),E=ModulePtr->end(); I!=E; ++I) {
- // Skip intrisics
- if (I->isIntrinsic()) continue;
- if (I->isDeclaration()) {
- const Function* F = I;
- std::string Name = getConvModopt(F->getCallingConv())+getValueName(F);
- std::string Sig =
- getCallSignature(cast<FunctionType>(F->getFunctionType()), NULL, Name);
- Out << ".method static hidebysig pinvokeimpl(\""
- << getLibraryName(F) << "\")\n\t" << Sig << " preservesig {}\n\n";
- }
- }
- // External variables and static initialization.
- Out <<
- ".method public hidebysig static pinvokeimpl(\"KERNEL32.DLL\" ansi winapi)"
- " native int LoadLibrary(string) preservesig {}\n"
- ".method public hidebysig static pinvokeimpl(\"KERNEL32.DLL\" ansi winapi)"
- " native int GetProcAddress(native int, string) preservesig {}\n";
- Out <<
- ".method private static void* $MSIL_Import(string lib,string sym)\n"
- " managed cil\n{\n"
- "\tldarg\tlib\n"
- "\tcall\tnative int LoadLibrary(string)\n"
- "\tldarg\tsym\n"
- "\tcall\tnative int GetProcAddress(native int,string)\n"
- "\tdup\n"
- "\tbrtrue\tL_01\n"
- "\tldstr\t\"Can no import variable\"\n"
- "\tnewobj\tinstance void [mscorlib]System.Exception::.ctor(string)\n"
- "\tthrow\n"
- "L_01:\n"
- "\tret\n"
- "}\n\n"
- ".method static private void $MSIL_Init() managed cil\n{\n";
- printStaticInitializerList();
- // Foreach global variable.
- for (Module::global_iterator I = ModulePtr->global_begin(),
- E = ModulePtr->global_end(); I!=E; ++I) {
- if (!I->isDeclaration() || !I->hasDLLImportLinkage()) continue;
- // Use "LoadLibrary"/"GetProcAddress" to recive variable address.
- std::string Tmp = getTypeName(I->getType())+getValueName(&*I);
- printSimpleInstruction("ldsflda",Tmp.c_str());
- Out << "\tldstr\t\"" << getLibraryName(&*I) << "\"\n";
- Out << "\tldstr\t\"" << I->getName() << "\"\n";
- printSimpleInstruction("call","void* $MSIL_Import(string,string)");
- printIndirectSave(I->getType());
- }
- printSimpleInstruction("ret");
- Out << "}\n\n";
-}
-
-
-//===----------------------------------------------------------------------===//
-// External Interface declaration
-//===----------------------------------------------------------------------===//
-
-bool MSILTarget::addPassesToEmitFile(PassManagerBase &PM,
- formatted_raw_ostream &o,
- CodeGenFileType FileType,
- CodeGenOpt::Level OptLevel,
- bool DisableVerify)
-{
- if (FileType != TargetMachine::CGFT_AssemblyFile) return true;
- MSILWriter* Writer = new MSILWriter(o);
- PM.add(createGCLoweringPass());
- // FIXME: Handle switch through native IL instruction "switch"
- PM.add(createLowerSwitchPass());
- PM.add(createCFGSimplificationPass());
- PM.add(new MSILModule(Writer->UsedTypes,Writer->TD));
- PM.add(Writer);
- PM.add(createGCInfoDeleter());
- return false;
-}
diff --git a/lib/Target/MSIL/MSILWriter.h b/lib/Target/MSIL/MSILWriter.h
deleted file mode 100644
index 92a3abe5c0a7..000000000000
--- a/lib/Target/MSIL/MSILWriter.h
+++ /dev/null
@@ -1,258 +0,0 @@
-//===-- MSILWriter.h - TargetMachine for the MSIL ---------------*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file declares the MSILWriter that is used by the MSIL.
-//
-//===----------------------------------------------------------------------===//
-#ifndef MSILWRITER_H
-#define MSILWRITER_H
-
-#include "llvm/CallingConv.h"
-#include "llvm/Constants.h"
-#include "llvm/Module.h"
-#include "llvm/Instructions.h"
-#include "llvm/IntrinsicInst.h"
-#include "llvm/Pass.h"
-#include "llvm/PassManager.h"
-#include "llvm/ADT/StringRef.h"
-#include "llvm/Analysis/FindUsedTypes.h"
-#include "llvm/Analysis/LoopInfo.h"
-#include "llvm/Support/FormattedStream.h"
-#include "llvm/Support/GetElementPtrTypeIterator.h"
-#include "llvm/Target/TargetData.h"
-#include "llvm/Target/TargetMachine.h"
-
-namespace llvm {
- extern Target TheMSILTarget;
-
- class MSILModule : public ModulePass {
- Module *ModulePtr;
- const std::set<const Type *>*& UsedTypes;
- const TargetData*& TD;
-
- public:
- static char ID;
- MSILModule(const std::set<const Type *>*& _UsedTypes,
- const TargetData*& _TD)
- : ModulePass(&ID), UsedTypes(_UsedTypes), TD(_TD) {}
-
- void getAnalysisUsage(AnalysisUsage &AU) const {
- AU.addRequired<FindUsedTypes>();
- AU.addRequired<TargetData>();
- }
-
- virtual const char *getPassName() const {
- return "MSIL backend definitions";
- }
-
- virtual bool runOnModule(Module &M);
-
- };
-
- class MSILWriter : public FunctionPass {
- struct StaticInitializer {
- const Constant* constant;
- uint64_t offset;
-
- StaticInitializer()
- : constant(0), offset(0) {}
-
- StaticInitializer(const Constant* _constant, uint64_t _offset)
- : constant(_constant), offset(_offset) {}
- };
-
- uint64_t UniqID;
-
- uint64_t getUniqID() {
- return ++UniqID;
- }
-
- public:
- formatted_raw_ostream &Out;
- Module* ModulePtr;
- const TargetData* TD;
- LoopInfo *LInfo;
- std::vector<StaticInitializer>* InitListPtr;
- std::map<const GlobalVariable*,std::vector<StaticInitializer> >
- StaticInitList;
- const std::set<const Type *>* UsedTypes;
- static char ID;
- DenseMap<const Value*, unsigned> AnonValueNumbers;
- unsigned NextAnonValueNumber;
-
- MSILWriter(formatted_raw_ostream &o) : FunctionPass(&ID), Out(o),
- NextAnonValueNumber(0) {
- UniqID = 0;
- }
-
- enum ValueType {
- UndefVT,
- GlobalVT,
- InternalVT,
- ArgumentVT,
- LocalVT,
- ConstVT,
- ConstExprVT
- };
-
- bool isVariable(ValueType V) {
- return V==GlobalVT || V==InternalVT || V==ArgumentVT || V==LocalVT;
- }
-
- bool isConstValue(ValueType V) {
- return V==ConstVT || V==ConstExprVT;
- }
-
- virtual const char *getPassName() const { return "MSIL backend"; }
-
- void getAnalysisUsage(AnalysisUsage &AU) const {
- AU.addRequired<LoopInfo>();
- AU.setPreservesAll();
- }
-
- bool runOnFunction(Function &F);
-
- virtual bool doInitialization(Module &M);
-
- virtual bool doFinalization(Module &M);
-
- void printModuleStartup();
-
- bool isZeroValue(const Value* V);
-
- std::string getValueName(const Value* V);
-
- std::string getLabelName(const Value* V);
-
- std::string getLabelName(const std::string& Name);
-
- std::string getConvModopt(CallingConv::ID CallingConvID);
-
- std::string getArrayTypeName(Type::TypeID TyID, const Type* Ty);
-
- std::string getPrimitiveTypeName(const Type* Ty, bool isSigned);
-
- std::string getFunctionTypeName(const Type* Ty);
-
- std::string getPointerTypeName(const Type* Ty);
-
- std::string getTypeName(const Type* Ty, bool isSigned = false,
- bool isNested = false);
-
- ValueType getValueLocation(const Value* V);
-
- std::string getTypePostfix(const Type* Ty, bool Expand,
- bool isSigned = false);
-
- void printConvToPtr();
-
- void printPtrLoad(uint64_t N);
-
- void printValuePtrLoad(const Value* V);
-
- void printConstLoad(const Constant* C);
-
- void printValueLoad(const Value* V);
-
- void printValueSave(const Value* V);
-
- void printBinaryInstruction(const char* Name, const Value* Left,
- const Value* Right);
-
- void printSimpleInstruction(const char* Inst, const char* Operand = NULL);
-
- void printPHICopy(const BasicBlock* Src, const BasicBlock* Dst);
-
- void printBranchToBlock(const BasicBlock* CurrBB,
- const BasicBlock* TrueBB,
- const BasicBlock* FalseBB);
-
- void printBranchInstruction(const BranchInst* Inst);
-
- void printSelectInstruction(const Value* Cond, const Value* VTrue,
- const Value* VFalse);
-
- void printIndirectLoad(const Value* V);
-
- void printIndirectSave(const Value* Ptr, const Value* Val);
-
- void printIndirectSave(const Type* Ty);
-
- void printCastInstruction(unsigned int Op, const Value* V,
- const Type* Ty, const Type* SrcTy=0);
-
- void printGepInstruction(const Value* V, gep_type_iterator I,
- gep_type_iterator E);
-
- std::string getCallSignature(const FunctionType* Ty,
- const Instruction* Inst,
- std::string Name);
-
- void printFunctionCall(const Value* FnVal, const Instruction* Inst);
-
- void printIntrinsicCall(const IntrinsicInst* Inst);
-
- void printCallInstruction(const Instruction* Inst);
-
- void printICmpInstruction(unsigned Predicate, const Value* Left,
- const Value* Right);
-
- void printFCmpInstruction(unsigned Predicate, const Value* Left,
- const Value* Right);
-
- void printInvokeInstruction(const InvokeInst* Inst);
-
- void printSwitchInstruction(const SwitchInst* Inst);
-
- void printVAArgInstruction(const VAArgInst* Inst);
-
- void printAllocaInstruction(const AllocaInst* Inst);
-
- void printInstruction(const Instruction* Inst);
-
- void printLoop(const Loop* L);
-
- void printBasicBlock(const BasicBlock* BB);
-
- void printLocalVariables(const Function& F);
-
- void printFunctionBody(const Function& F);
-
- void printConstantExpr(const ConstantExpr* CE);
-
- void printStaticInitializerList();
-
- void printFunction(const Function& F);
-
- void printDeclarations(const TypeSymbolTable& ST);
-
- unsigned int getBitWidth(const Type* Ty);
-
- void printStaticConstant(const Constant* C, uint64_t& Offset);
-
- void printStaticInitializer(const Constant* C, const std::string& Name);
-
- void printVariableDefinition(const GlobalVariable* G);
-
- void printGlobalVariables();
-
- const char* getLibraryName(const Function* F);
-
- const char* getLibraryName(const GlobalVariable* GV);
-
- const char* getLibraryForSymbol(StringRef Name, bool isFunction,
- CallingConv::ID CallingConv);
-
- void printExternals();
- };
-
-}
-
-#endif
-
diff --git a/lib/Target/MSIL/Makefile b/lib/Target/MSIL/Makefile
deleted file mode 100644
index 70eadb32e360..000000000000
--- a/lib/Target/MSIL/Makefile
+++ /dev/null
@@ -1,16 +0,0 @@
-##===- lib/Target/MSIL/Makefile ----------------------------*- Makefile -*-===##
-#
-# The LLVM Compiler Infrastructure
-#
-# This file is distributed under the University of Illinois Open Source
-# License. See LICENSE.TXT for details.
-#
-##===----------------------------------------------------------------------===##
-
-LEVEL = ../../..
-LIBRARYNAME = LLVMMSIL
-DIRS = TargetInfo
-
-include $(LEVEL)/Makefile.common
-
-CompileCommonOpts := $(CompileCommonOpts) -Wno-format
diff --git a/lib/Target/MSIL/README.TXT b/lib/Target/MSIL/README.TXT
deleted file mode 100644
index d797c71fd39f..000000000000
--- a/lib/Target/MSIL/README.TXT
+++ /dev/null
@@ -1,26 +0,0 @@
-//===---------------------------------------------------------------------===//
-
-Vector instructions support.
-
-ShuffleVector
-ExtractElement
-InsertElement
-
-//===---------------------------------------------------------------------===//
-
-Add "OpaqueType" type.
-
-//===---------------------------------------------------------------------===//
-
-"switch" instruction emulation with CLI "switch" instruction.
-
-//===---------------------------------------------------------------------===//
-
-Write linker for external function, because function export need to know
-dynamic library where function located.
-
-.method static hidebysig pinvokeimpl("msvcrt.dll" cdecl)
- void free(void*) preservesig {}
-
-
-
diff --git a/lib/Target/MSIL/TargetInfo/CMakeLists.txt b/lib/Target/MSIL/TargetInfo/CMakeLists.txt
deleted file mode 100644
index 9f0c3a09341a..000000000000
--- a/lib/Target/MSIL/TargetInfo/CMakeLists.txt
+++ /dev/null
@@ -1,6 +0,0 @@
-include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. )
-
-add_llvm_library(LLVMMSILInfo
- MSILTargetInfo.cpp
- )
-
diff --git a/lib/Target/MSIL/TargetInfo/MSILTargetInfo.cpp b/lib/Target/MSIL/TargetInfo/MSILTargetInfo.cpp
deleted file mode 100644
index dfd42814e51c..000000000000
--- a/lib/Target/MSIL/TargetInfo/MSILTargetInfo.cpp
+++ /dev/null
@@ -1,26 +0,0 @@
-//===-- MSILTargetInfo.cpp - MSIL Target Implementation -------------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-#include "MSILWriter.h"
-#include "llvm/Module.h"
-#include "llvm/Target/TargetRegistry.h"
-using namespace llvm;
-
-Target llvm::TheMSILTarget;
-
-static unsigned MSIL_TripleMatchQuality(const std::string &TT) {
- // This class always works, but shouldn't be the default in most cases.
- return 1;
-}
-
-extern "C" void LLVMInitializeMSILTargetInfo() {
- TargetRegistry::RegisterTarget(TheMSILTarget, "msil",
- "MSIL backend",
- &MSIL_TripleMatchQuality);
-}
diff --git a/lib/Target/MSIL/TargetInfo/Makefile b/lib/Target/MSIL/TargetInfo/Makefile
deleted file mode 100644
index 30b0950db0f7..000000000000
--- a/lib/Target/MSIL/TargetInfo/Makefile
+++ /dev/null
@@ -1,15 +0,0 @@
-##===- lib/Target/MSIL/TargetInfo/Makefile -----------------*- Makefile -*-===##
-#
-# The LLVM Compiler Infrastructure
-#
-# This file is distributed under the University of Illinois Open Source
-# License. See LICENSE.TXT for details.
-#
-##===----------------------------------------------------------------------===##
-LEVEL = ../../../..
-LIBRARYNAME = LLVMMSILInfo
-
-# Hack: we need to include 'main' target directory to grab private headers
-CPPFLAGS = -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
-
-include $(LEVEL)/Makefile.common
diff --git a/lib/Target/MSP430/MSP430BranchSelector.cpp b/lib/Target/MSP430/MSP430BranchSelector.cpp
index 68cb342b08f4..bd644435c76f 100644
--- a/lib/Target/MSP430/MSP430BranchSelector.cpp
+++ b/lib/Target/MSP430/MSP430BranchSelector.cpp
@@ -10,7 +10,7 @@
// This file contains a pass that scans a machine function to determine which
// conditional branches need more than 10 bits of displacement to reach their
// target basic block. It does this in two passes; a calculation of basic block
-// positions pass, and a branch psuedo op to machine branch opcode pass. This
+// positions pass, and a branch pseudo op to machine branch opcode pass. This
// pass should be run last, just before the assembly printer.
//
//===----------------------------------------------------------------------===//
@@ -30,7 +30,7 @@ STATISTIC(NumExpanded, "Number of branches expanded to long format");
namespace {
struct MSP430BSel : public MachineFunctionPass {
static char ID;
- MSP430BSel() : MachineFunctionPass(&ID) {}
+ MSP430BSel() : MachineFunctionPass(ID) {}
/// BlockSizes - The sizes of the basic blocks in the function.
std::vector<unsigned> BlockSizes;
@@ -52,7 +52,8 @@ FunctionPass *llvm::createMSP430BranchSelectionPass() {
}
bool MSP430BSel::runOnMachineFunction(MachineFunction &Fn) {
- const TargetInstrInfo *TII = Fn.getTarget().getInstrInfo();
+ const MSP430InstrInfo *TII =
+ static_cast<const MSP430InstrInfo*>(Fn.getTarget().getInstrInfo());
// Give the blocks of the function a dense, in-order, numbering.
Fn.RenumberBlocks();
BlockSizes.resize(Fn.getNumBlockIDs());
diff --git a/lib/Target/MSP430/MSP430InstrInfo.cpp b/lib/Target/MSP430/MSP430InstrInfo.cpp
index df28d07f5d71..bfab844f5b1a 100644
--- a/lib/Target/MSP430/MSP430InstrInfo.cpp
+++ b/lib/Target/MSP430/MSP430InstrInfo.cpp
@@ -100,27 +100,6 @@ void MSP430InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
}
bool
-MSP430InstrInfo::isMoveInstr(const MachineInstr& MI,
- unsigned &SrcReg, unsigned &DstReg,
- unsigned &SrcSubIdx, unsigned &DstSubIdx) const {
- SrcSubIdx = DstSubIdx = 0; // No sub-registers yet.
-
- switch (MI.getOpcode()) {
- default:
- return false;
- case MSP430::MOV8rr:
- case MSP430::MOV16rr:
- assert(MI.getNumOperands() >= 2 &&
- MI.getOperand(0).isReg() &&
- MI.getOperand(1).isReg() &&
- "invalid register-register move instruction");
- SrcReg = MI.getOperand(1).getReg();
- DstReg = MI.getOperand(0).getReg();
- return true;
- }
-}
-
-bool
MSP430InstrInfo::spillCalleeSavedRegisters(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI,
const std::vector<CalleeSavedInfo> &CSI,
@@ -361,7 +340,7 @@ unsigned MSP430InstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const {
switch (Desc.getOpcode()) {
default:
assert(0 && "Unknown instruction size!");
- case TargetOpcode::DBG_LABEL:
+ case TargetOpcode::PROLOG_LABEL:
case TargetOpcode::EH_LABEL:
case TargetOpcode::IMPLICIT_DEF:
case TargetOpcode::KILL:
diff --git a/lib/Target/MSP430/MSP430InstrInfo.h b/lib/Target/MSP430/MSP430InstrInfo.h
index ebbda1aeef51..49ccc032bf29 100644
--- a/lib/Target/MSP430/MSP430InstrInfo.h
+++ b/lib/Target/MSP430/MSP430InstrInfo.h
@@ -54,10 +54,6 @@ public:
unsigned DestReg, unsigned SrcReg,
bool KillSrc) const;
- bool isMoveInstr(const MachineInstr& MI,
- unsigned &SrcReg, unsigned &DstReg,
- unsigned &SrcSubIdx, unsigned &DstSubIdx) const;
-
virtual void storeRegToStackSlot(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI,
unsigned SrcReg, bool isKill,
diff --git a/lib/Target/MSP430/MSP430RegisterInfo.cpp b/lib/Target/MSP430/MSP430RegisterInfo.cpp
index 608ca49fcf78..3c3fa73477a5 100644
--- a/lib/Target/MSP430/MSP430RegisterInfo.cpp
+++ b/lib/Target/MSP430/MSP430RegisterInfo.cpp
@@ -101,7 +101,7 @@ bool MSP430RegisterInfo::hasFP(const MachineFunction &MF) const {
MFI->isFrameAddressTaken());
}
-bool MSP430RegisterInfo::hasReservedCallFrame(MachineFunction &MF) const {
+bool MSP430RegisterInfo::hasReservedCallFrame(const MachineFunction &MF) const {
return !MF.getFrameInfo()->hasVarSizedObjects();
}
@@ -163,10 +163,9 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
MBB.erase(I);
}
-unsigned
+void
MSP430RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
- int SPAdj, FrameIndexValue *Value,
- RegScavenger *RS) const {
+ int SPAdj, RegScavenger *RS) const {
assert(SPAdj == 0 && "Unexpected");
unsigned i = 0;
@@ -204,7 +203,7 @@ MSP430RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
MI.getOperand(i).ChangeToRegister(BasePtr, false);
if (Offset == 0)
- return 0;
+ return;
// We need to materialize the offset via add instruction.
unsigned DstReg = MI.getOperand(0).getReg();
@@ -215,12 +214,11 @@ MSP430RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
BuildMI(MBB, llvm::next(II), dl, TII.get(MSP430::ADD16ri), DstReg)
.addReg(DstReg).addImm(Offset);
- return 0;
+ return;
}
MI.getOperand(i).ChangeToRegister(BasePtr, false);
MI.getOperand(i+1).ChangeToImmediate(Offset);
- return 0;
}
void
diff --git a/lib/Target/MSP430/MSP430RegisterInfo.h b/lib/Target/MSP430/MSP430RegisterInfo.h
index 6e58d3116d27..4d2795bb4020 100644
--- a/lib/Target/MSP430/MSP430RegisterInfo.h
+++ b/lib/Target/MSP430/MSP430RegisterInfo.h
@@ -40,15 +40,14 @@ public:
const TargetRegisterClass* getPointerRegClass(unsigned Kind = 0) const;
bool hasFP(const MachineFunction &MF) const;
- bool hasReservedCallFrame(MachineFunction &MF) const;
+ bool hasReservedCallFrame(const MachineFunction &MF) const;
void eliminateCallFramePseudoInstr(MachineFunction &MF,
MachineBasicBlock &MBB,
MachineBasicBlock::iterator I) const;
- unsigned eliminateFrameIndex(MachineBasicBlock::iterator II,
- int SPAdj, FrameIndexValue *Value = NULL,
- RegScavenger *RS = NULL) const;
+ void eliminateFrameIndex(MachineBasicBlock::iterator II,
+ int SPAdj, RegScavenger *RS = NULL) const;
void emitPrologue(MachineFunction &MF) const;
void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const;
diff --git a/lib/Target/Mangler.cpp b/lib/Target/Mangler.cpp
index 2037a9114559..49efe75d79d8 100644
--- a/lib/Target/Mangler.cpp
+++ b/lib/Target/Mangler.cpp
@@ -180,7 +180,8 @@ void Mangler::getNameWithPrefix(SmallVectorImpl<char> &OutName,
ManglerPrefixTy PrefixTy = Mangler::Default;
if (GV->hasPrivateLinkage() || isImplicitlyPrivate)
PrefixTy = Mangler::Private;
- else if (GV->hasLinkerPrivateLinkage() || GV->hasLinkerPrivateWeakLinkage())
+ else if (GV->hasLinkerPrivateLinkage() || GV->hasLinkerPrivateWeakLinkage() ||
+ GV->hasLinkerPrivateWeakDefAutoLinkage())
PrefixTy = Mangler::LinkerPrivate;
// If this global has a name, handle it simply.
diff --git a/lib/Target/Mips/AsmPrinter/MipsAsmPrinter.cpp b/lib/Target/Mips/AsmPrinter/MipsAsmPrinter.cpp
index 8ae05b75e919..6660f6b62430 100644
--- a/lib/Target/Mips/AsmPrinter/MipsAsmPrinter.cpp
+++ b/lib/Target/Mips/AsmPrinter/MipsAsmPrinter.cpp
@@ -18,6 +18,8 @@
#include "MipsInstrInfo.h"
#include "MipsTargetMachine.h"
#include "MipsMachineFunction.h"
+#include "llvm/BasicBlock.h"
+#include "llvm/Instructions.h"
#include "llvm/CodeGen/AsmPrinter.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineConstantPool.h"
@@ -75,6 +77,7 @@ namespace {
}
virtual void EmitFunctionBodyStart();
virtual void EmitFunctionBodyEnd();
+ virtual bool isBlockOnlyReachableByFallthrough(const MachineBasicBlock *MBB) const;
static const char *getRegisterName(unsigned RegNo);
virtual void EmitFunctionEntryLabel();
@@ -227,6 +230,23 @@ void MipsAsmPrinter::EmitFunctionBodyEnd() {
}
+/// isBlockOnlyReachableByFallthough - Return true if the basic block has
+/// exactly one predecessor and the control transfer mechanism between
+/// the predecessor and this block is a fall-through.
+bool MipsAsmPrinter::isBlockOnlyReachableByFallthrough(const MachineBasicBlock *MBB)
+ const {
+ // The predecessor has to be immediately before this block.
+ const MachineBasicBlock *Pred = *MBB->pred_begin();
+
+ // If the predecessor is a switch statement, assume a jump table
+ // implementation, so it is not a fall through.
+ if (const BasicBlock *bb = Pred->getBasicBlock())
+ if (isa<SwitchInst>(bb->getTerminator()))
+ return false;
+
+ return AsmPrinter::isBlockOnlyReachableByFallthrough(MBB);
+}
+
// Print out an operand for an inline asm expression.
bool MipsAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
unsigned AsmVariant,const char *ExtraCode,
diff --git a/lib/Target/Mips/Mips.td b/lib/Target/Mips/Mips.td
index aa036aef83d0..a51c3779c7f4 100644
--- a/lib/Target/Mips/Mips.td
+++ b/lib/Target/Mips/Mips.td
@@ -1,4 +1,4 @@
-//===- Mips.td - Describe the Mips Target Machine ---------------*- C++ -*-===//
+//===- Mips.td - Describe the Mips Target Machine ----------*- tablegen -*-===//
//
// The LLVM Compiler Infrastructure
//
diff --git a/lib/Target/Mips/MipsCallingConv.td b/lib/Target/Mips/MipsCallingConv.td
index c2bfb8fa738c..8f313efaf8da 100644
--- a/lib/Target/Mips/MipsCallingConv.td
+++ b/lib/Target/Mips/MipsCallingConv.td
@@ -1,4 +1,4 @@
-//===- MipsCallingConv.td - Calling Conventions for Mips --------*- C++ -*-===//
+//===- MipsCallingConv.td - Calling Conventions for Mips ---*- tablegen -*-===//
//
// The LLVM Compiler Infrastructure
//
diff --git a/lib/Target/Mips/MipsDelaySlotFiller.cpp b/lib/Target/Mips/MipsDelaySlotFiller.cpp
index a2b615d8add2..597ea0d6c207 100644
--- a/lib/Target/Mips/MipsDelaySlotFiller.cpp
+++ b/lib/Target/Mips/MipsDelaySlotFiller.cpp
@@ -32,7 +32,7 @@ namespace {
static char ID;
Filler(TargetMachine &tm)
- : MachineFunctionPass(&ID), TM(tm), TII(tm.getInstrInfo()) { }
+ : MachineFunctionPass(ID), TM(tm), TII(tm.getInstrInfo()) { }
virtual const char *getPassName() const {
return "Mips Delay Slot Filler";
diff --git a/lib/Target/Mips/MipsISelDAGToDAG.cpp b/lib/Target/Mips/MipsISelDAGToDAG.cpp
index 3888bbf09ec7..a47cf7b4f201 100644
--- a/lib/Target/Mips/MipsISelDAGToDAG.cpp
+++ b/lib/Target/Mips/MipsISelDAGToDAG.cpp
@@ -137,7 +137,7 @@ SelectAddr(SDNode *Op, SDValue Addr, SDValue &Offset, SDValue &Base)
// Operand is a result from an ADD.
if (Addr.getOpcode() == ISD::ADD) {
if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1))) {
- if (Predicate_immSExt16(CN)) {
+ if (isInt<16>(CN->getSExtValue())) {
// If the first operand is a FI, get the TargetFI Node
if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>
@@ -184,8 +184,9 @@ SDNode *MipsDAGToDAGISel::SelectLoadFp64(SDNode *N) {
if (!Subtarget.isMips1() || NVT != MVT::f64)
return NULL;
- if (!Predicate_unindexedload(N) ||
- !Predicate_load(N))
+ LoadSDNode *LN = cast<LoadSDNode>(N);
+ if (LN->getExtensionType() != ISD::NON_EXTLOAD ||
+ LN->getAddressingMode() != ISD::UNINDEXED)
return NULL;
SDValue Chain = N->getOperand(0);
@@ -248,8 +249,8 @@ SDNode *MipsDAGToDAGISel::SelectStoreFp64(SDNode *N) {
SDValue Chain = N->getOperand(0);
- if (!Predicate_unindexedstore(N) ||
- !Predicate_store(N))
+ StoreSDNode *SN = cast<StoreSDNode>(N);
+ if (SN->isTruncatingStore() || SN->getAddressingMode() != ISD::UNINDEXED)
return NULL;
SDValue N1 = N->getOperand(1);
diff --git a/lib/Target/Mips/MipsISelLowering.cpp b/lib/Target/Mips/MipsISelLowering.cpp
index b6ff2c371d5c..b0b99bad1607 100644
--- a/lib/Target/Mips/MipsISelLowering.cpp
+++ b/lib/Target/Mips/MipsISelLowering.cpp
@@ -317,13 +317,13 @@ MipsTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
BB->addSuccessor(sinkMBB);
// sinkMBB:
- // %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ]
+ // %Result = phi [ %TrueValue, thisMBB ], [ %FalseValue, copy0MBB ]
// ...
BB = sinkMBB;
BuildMI(*BB, BB->begin(), dl,
TII->get(Mips::PHI), MI->getOperand(0).getReg())
- .addReg(MI->getOperand(2).getReg()).addMBB(copy0MBB)
- .addReg(MI->getOperand(3).getReg()).addMBB(thisMBB);
+ .addReg(MI->getOperand(2).getReg()).addMBB(thisMBB)
+ .addReg(MI->getOperand(3).getReg()).addMBB(copy0MBB);
MI->eraseFromParent(); // The pseudo instruction is gone now.
return BB;
@@ -542,7 +542,7 @@ LowerJumpTable(SDValue Op, SelectionDAG &DAG) const
SDValue JTI = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, OpFlag);
- if (IsPIC) {
+ if (!IsPIC) {
SDValue Ops[] = { JTI };
HiPart = DAG.getNode(MipsISD::Hi, dl, DAG.getVTList(MVT::i32), Ops, 1);
} else // Emit Load from Global Pointer
diff --git a/lib/Target/Mips/MipsInstrFPU.td b/lib/Target/Mips/MipsInstrFPU.td
index e948917eb80e..cff79966dcd3 100644
--- a/lib/Target/Mips/MipsInstrFPU.td
+++ b/lib/Target/Mips/MipsInstrFPU.td
@@ -1,4 +1,4 @@
-//===- MipsInstrFPU.td - Mips FPU Instruction Information -------*- C++ -*-===//
+//===- MipsInstrFPU.td - Mips FPU Instruction Information --*- tablegen -*-===//
//
// The LLVM Compiler Infrastructure
//
diff --git a/lib/Target/Mips/MipsInstrFormats.td b/lib/Target/Mips/MipsInstrFormats.td
index 0853272f7280..98ae2fa7da45 100644
--- a/lib/Target/Mips/MipsInstrFormats.td
+++ b/lib/Target/Mips/MipsInstrFormats.td
@@ -1,4 +1,4 @@
-//===- MipsRegisterInfo.td - Mips Register defs -----------------*- C++ -*-===//
+//===- MipsRegisterInfo.td - Mips Register defs ------------*- tablegen -*-===//
//
// The LLVM Compiler Infrastructure
//
diff --git a/lib/Target/Mips/MipsInstrInfo.cpp b/lib/Target/Mips/MipsInstrInfo.cpp
index 6c09a3e10785..aaf307b1ce3f 100644
--- a/lib/Target/Mips/MipsInstrInfo.cpp
+++ b/lib/Target/Mips/MipsInstrInfo.cpp
@@ -30,53 +30,6 @@ static bool isZeroImm(const MachineOperand &op) {
return op.isImm() && op.getImm() == 0;
}
-/// Return true if the instruction is a register to register move and
-/// leave the source and dest operands in the passed parameters.
-bool MipsInstrInfo::
-isMoveInstr(const MachineInstr &MI, unsigned &SrcReg, unsigned &DstReg,
- unsigned &SrcSubIdx, unsigned &DstSubIdx) const
-{
- SrcSubIdx = DstSubIdx = 0; // No sub-registers.
-
- // addu $dst, $src, $zero || addu $dst, $zero, $src
- // or $dst, $src, $zero || or $dst, $zero, $src
- if ((MI.getOpcode() == Mips::ADDu) || (MI.getOpcode() == Mips::OR)) {
- if (MI.getOperand(1).getReg() == Mips::ZERO) {
- DstReg = MI.getOperand(0).getReg();
- SrcReg = MI.getOperand(2).getReg();
- return true;
- } else if (MI.getOperand(2).getReg() == Mips::ZERO) {
- DstReg = MI.getOperand(0).getReg();
- SrcReg = MI.getOperand(1).getReg();
- return true;
- }
- }
-
- // mov $fpDst, $fpSrc
- // mfc $gpDst, $fpSrc
- // mtc $fpDst, $gpSrc
- if (MI.getOpcode() == Mips::FMOV_S32 ||
- MI.getOpcode() == Mips::FMOV_D32 ||
- MI.getOpcode() == Mips::MFC1 ||
- MI.getOpcode() == Mips::MTC1 ||
- MI.getOpcode() == Mips::MOVCCRToCCR) {
- DstReg = MI.getOperand(0).getReg();
- SrcReg = MI.getOperand(1).getReg();
- return true;
- }
-
- // addiu $dst, $src, 0
- if (MI.getOpcode() == Mips::ADDiu) {
- if ((MI.getOperand(1).isReg()) && (isZeroImm(MI.getOperand(2)))) {
- DstReg = MI.getOperand(0).getReg();
- SrcReg = MI.getOperand(1).getReg();
- return true;
- }
- }
-
- return false;
-}
-
/// isLoadFromStackSlot - If the specified machine instruction is a direct
/// load from a stack slot, return the virtual or physical register number of
/// the destination along with the FrameIndex of the loaded stack slot. If
diff --git a/lib/Target/Mips/MipsInstrInfo.h b/lib/Target/Mips/MipsInstrInfo.h
index d6f87f9b0ce8..52a3d39840ba 100644
--- a/lib/Target/Mips/MipsInstrInfo.h
+++ b/lib/Target/Mips/MipsInstrInfo.h
@@ -174,12 +174,6 @@ public:
///
virtual const MipsRegisterInfo &getRegisterInfo() const { return RI; }
- /// Return true if the instruction is a register to register move and return
- /// the source and dest operands and their sub-register indices by reference.
- virtual bool isMoveInstr(const MachineInstr &MI,
- unsigned &SrcReg, unsigned &DstReg,
- unsigned &SrcSubIdx, unsigned &DstSubIdx) const;
-
/// isLoadFromStackSlot - If the specified machine instruction is a direct
/// load from a stack slot, return the virtual or physical register number of
/// the destination along with the FrameIndex of the loaded stack slot. If
diff --git a/lib/Target/Mips/MipsInstrInfo.td b/lib/Target/Mips/MipsInstrInfo.td
index 5337c9fb816a..320c5b883483 100644
--- a/lib/Target/Mips/MipsInstrInfo.td
+++ b/lib/Target/Mips/MipsInstrInfo.td
@@ -1,4 +1,4 @@
-//===- MipsInstrInfo.td - Mips Register defs --------------------*- C++ -*-===//
+//===- MipsInstrInfo.td - Mips Register defs ---------------*- tablegen -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -96,12 +96,7 @@ def HI16 : SDNodeXForm<imm, [{
// Node immediate fits as 16-bit sign extended on target immediate.
// e.g. addi, andi
-def immSExt16 : PatLeaf<(imm), [{
- if (N->getValueType(0) == MVT::i32)
- return (int32_t)N->getZExtValue() == (short)N->getZExtValue();
- else
- return (int64_t)N->getZExtValue() == (short)N->getZExtValue();
-}]>;
+def immSExt16 : PatLeaf<(imm), [{ return isInt<16>(N->getSExtValue()); }]>;
// Node immediate fits as 16-bit zero extended on target immediate.
// The LO16 param means that only the lower 16 bits of the node
diff --git a/lib/Target/Mips/MipsRegisterInfo.cpp b/lib/Target/Mips/MipsRegisterInfo.cpp
index e15f0a58e501..69436d2acb54 100644
--- a/lib/Target/Mips/MipsRegisterInfo.cpp
+++ b/lib/Target/Mips/MipsRegisterInfo.cpp
@@ -327,10 +327,9 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
// FrameIndex represent objects inside a abstract stack.
// We must replace FrameIndex with an stack/frame pointer
// direct reference.
-unsigned MipsRegisterInfo::
+void MipsRegisterInfo::
eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj,
- FrameIndexValue *Value, RegScavenger *RS) const
-{
+ RegScavenger *RS) const {
MachineInstr &MI = *II;
MachineFunction &MF = *MI.getParent()->getParent();
@@ -361,7 +360,6 @@ eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj,
MI.getOperand(i-1).ChangeToImmediate(Offset);
MI.getOperand(i).ChangeToRegister(getFrameRegister(MF), false);
- return 0;
}
void MipsRegisterInfo::
diff --git a/lib/Target/Mips/MipsRegisterInfo.h b/lib/Target/Mips/MipsRegisterInfo.h
index b500a650f7cc..89282f8fa146 100644
--- a/lib/Target/Mips/MipsRegisterInfo.h
+++ b/lib/Target/Mips/MipsRegisterInfo.h
@@ -51,9 +51,8 @@ struct MipsRegisterInfo : public MipsGenRegisterInfo {
MachineBasicBlock::iterator I) const;
/// Stack Frame Processing Methods
- unsigned eliminateFrameIndex(MachineBasicBlock::iterator II,
- int SPAdj, FrameIndexValue *Value = NULL,
- RegScavenger *RS = NULL) const;
+ void eliminateFrameIndex(MachineBasicBlock::iterator II,
+ int SPAdj, RegScavenger *RS = NULL) const;
void processFunctionBeforeFrameFinalized(MachineFunction &MF) const;
diff --git a/lib/Target/Mips/MipsRegisterInfo.td b/lib/Target/Mips/MipsRegisterInfo.td
index be78a2266268..60efe31fbaf8 100644
--- a/lib/Target/Mips/MipsRegisterInfo.td
+++ b/lib/Target/Mips/MipsRegisterInfo.td
@@ -1,4 +1,4 @@
-//===- MipsRegisterInfo.td - Mips Register defs -----------------*- C++ -*-===//
+//===- MipsRegisterInfo.td - Mips Register defs ------------*- tablegen -*-===//
//
// The LLVM Compiler Infrastructure
//
diff --git a/lib/Target/Mips/MipsSchedule.td b/lib/Target/Mips/MipsSchedule.td
index 616a79bf831c..055ff3237218 100644
--- a/lib/Target/Mips/MipsSchedule.td
+++ b/lib/Target/Mips/MipsSchedule.td
@@ -1,4 +1,4 @@
-//===- MipsSchedule.td - Mips Scheduling Definitions ------------*- C++ -*-===//
+//===- MipsSchedule.td - Mips Scheduling Definitions -------*- tablegen -*-===//
//
// The LLVM Compiler Infrastructure
//
diff --git a/lib/Target/PIC16/CMakeLists.txt b/lib/Target/PIC16/CMakeLists.txt
index cd4afe8e2342..2b6cb9e4e461 100644
--- a/lib/Target/PIC16/CMakeLists.txt
+++ b/lib/Target/PIC16/CMakeLists.txt
@@ -10,7 +10,7 @@ tablegen(PIC16GenDAGISel.inc -gen-dag-isel)
tablegen(PIC16GenCallingConv.inc -gen-callingconv)
tablegen(PIC16GenSubtarget.inc -gen-subtarget)
-add_llvm_target(PIC16
+add_llvm_target(PIC16CodeGen
PIC16DebugInfo.cpp
PIC16InstrInfo.cpp
PIC16ISelDAGToDAG.cpp
diff --git a/lib/Target/PIC16/PIC16.h b/lib/Target/PIC16/PIC16.h
index cee55f4f260f..08bb3e6f055b 100644
--- a/lib/Target/PIC16/PIC16.h
+++ b/lib/Target/PIC16/PIC16.h
@@ -58,13 +58,10 @@ namespace PIC16CC {
ESNames() {}
public:
~ESNames() {
- std::vector<char*>::iterator it = stk.end();
- it--;
- while(stk.end() != stk.begin())
+ while (!stk.empty())
{
- char* p = *it;
+ char* p = stk.back();
delete [] p;
- it--;
stk.pop_back();
}
}
diff --git a/lib/Target/PIC16/PIC16ISelLowering.cpp b/lib/Target/PIC16/PIC16ISelLowering.cpp
index 54a6a28992bf..527b31d0cc9f 100644
--- a/lib/Target/PIC16/PIC16ISelLowering.cpp
+++ b/lib/Target/PIC16/PIC16ISelLowering.cpp
@@ -312,6 +312,16 @@ PIC16TargetLowering::PIC16TargetLowering(PIC16TargetMachine &TM)
computeRegisterProperties();
}
+std::pair<const TargetRegisterClass*, uint8_t>
+PIC16TargetLowering::findRepresentativeClass(EVT VT) const {
+ switch (VT.getSimpleVT().SimpleTy) {
+ default:
+ return TargetLowering::findRepresentativeClass(VT);
+ case MVT::i16:
+ return std::make_pair(PIC16::FSR16RegisterClass, 1);
+ }
+}
+
// getOutFlag - Extract the flag result if the Op has it.
static SDValue getOutFlag(SDValue &Op) {
// Flag is the last value of the node.
diff --git a/lib/Target/PIC16/PIC16ISelLowering.h b/lib/Target/PIC16/PIC16ISelLowering.h
index 0a7506cb497f..d942af46a9e9 100644
--- a/lib/Target/PIC16/PIC16ISelLowering.h
+++ b/lib/Target/PIC16/PIC16ISelLowering.h
@@ -50,7 +50,7 @@ namespace llvm {
CALL, // PIC16 Call instruction
CALLW, // PIC16 CALLW instruction
SUBCC, // Compare for equality or inequality.
- SELECT_ICC, // Psuedo to be caught in schedular and expanded to brcond.
+ SELECT_ICC, // Pseudo to be caught in scheduler and expanded to brcond.
BRCOND, // Conditional branch.
RET, // Return.
Dummy
@@ -181,6 +181,9 @@ namespace llvm {
// FIXME: The function never seems to be aligned.
return 1;
}
+ protected:
+ std::pair<const TargetRegisterClass*, uint8_t>
+ findRepresentativeClass(EVT VT) const;
private:
// If the Node is a BUILD_PAIR representing a direct Address,
// then this function will return true.
diff --git a/lib/Target/PIC16/PIC16InstrInfo.cpp b/lib/Target/PIC16/PIC16InstrInfo.cpp
index e784f746f7f9..81257f3c4108 100644
--- a/lib/Target/PIC16/PIC16InstrInfo.cpp
+++ b/lib/Target/PIC16/PIC16InstrInfo.cpp
@@ -167,21 +167,6 @@ void PIC16InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
.addReg(SrcReg, getKillRegState(KillSrc));
}
-bool PIC16InstrInfo::isMoveInstr(const MachineInstr &MI,
- unsigned &SrcReg, unsigned &DestReg,
- unsigned &SrcSubIdx, unsigned &DstSubIdx) const {
- SrcSubIdx = DstSubIdx = 0; // No sub-registers.
-
- if (MI.getOpcode() == PIC16::copy_fsr
- || MI.getOpcode() == PIC16::copy_w) {
- DestReg = MI.getOperand(0).getReg();
- SrcReg = MI.getOperand(1).getReg();
- return true;
- }
-
- return false;
-}
-
/// InsertBranch - Insert a branch into the end of the specified
/// MachineBasicBlock. This operands to this method are the same as those
/// returned by AnalyzeBranch. This is invoked in cases where AnalyzeBranch
diff --git a/lib/Target/PIC16/PIC16InstrInfo.h b/lib/Target/PIC16/PIC16InstrInfo.h
index a3a77f11ba16..661b335d3b6c 100644
--- a/lib/Target/PIC16/PIC16InstrInfo.h
+++ b/lib/Target/PIC16/PIC16InstrInfo.h
@@ -61,10 +61,6 @@ public:
MachineBasicBlock::iterator I, DebugLoc DL,
unsigned DestReg, unsigned SrcReg,
bool KillSrc) const;
- virtual bool isMoveInstr(const MachineInstr &MI,
- unsigned &SrcReg, unsigned &DstReg,
- unsigned &SrcSubIdx, unsigned &DstSubIdx) const;
-
virtual
unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
MachineBasicBlock *FBB,
diff --git a/lib/Target/PIC16/PIC16MemSelOpt.cpp b/lib/Target/PIC16/PIC16MemSelOpt.cpp
index 241170b11c2a..b6aa38f765ea 100644
--- a/lib/Target/PIC16/PIC16MemSelOpt.cpp
+++ b/lib/Target/PIC16/PIC16MemSelOpt.cpp
@@ -38,7 +38,7 @@ using namespace llvm;
namespace {
struct MemSelOpt : public MachineFunctionPass {
static char ID;
- MemSelOpt() : MachineFunctionPass(&ID) {}
+ MemSelOpt() : MachineFunctionPass(ID) {}
virtual void getAnalysisUsage(AnalysisUsage &AU) const {
AU.addPreservedID(MachineLoopInfoID);
diff --git a/lib/Target/PIC16/PIC16Passes/PIC16Cloner.cpp b/lib/Target/PIC16/PIC16Passes/PIC16Cloner.cpp
index 27f1cf572ae6..56f021157092 100644
--- a/lib/Target/PIC16/PIC16Passes/PIC16Cloner.cpp
+++ b/lib/Target/PIC16/PIC16Passes/PIC16Cloner.cpp
@@ -256,7 +256,7 @@ PIC16Cloner::cloneFunction(Function *OrgF) {
CloneAutos(OrgF);
// Now create the clone.
- ClonedF = CloneFunction(OrgF, VMap);
+ ClonedF = CloneFunction(OrgF, VMap, /*ModuleLevelChanges=*/false);
// The new function should be for interrupt line. Therefore should have
// the name suffixed with IL and section attribute marked with IL.
diff --git a/lib/Target/PIC16/PIC16Passes/PIC16Cloner.h b/lib/Target/PIC16/PIC16Passes/PIC16Cloner.h
index e8b5aa45cdca..e7d67ce09629 100644
--- a/lib/Target/PIC16/PIC16Passes/PIC16Cloner.h
+++ b/lib/Target/PIC16/PIC16Passes/PIC16Cloner.h
@@ -35,7 +35,7 @@ namespace llvm {
class PIC16Cloner : public ModulePass {
public:
static char ID; // Class identification
- PIC16Cloner() : ModulePass(&ID) {}
+ PIC16Cloner() : ModulePass(ID) {}
virtual void getAnalysisUsage(AnalysisUsage &AU) const {
AU.addRequired<CallGraph>();
diff --git a/lib/Target/PIC16/PIC16Passes/PIC16Overlay.cpp b/lib/Target/PIC16/PIC16Passes/PIC16Overlay.cpp
index 5ecb6aa55157..0f8928a4b5f5 100644
--- a/lib/Target/PIC16/PIC16Passes/PIC16Overlay.cpp
+++ b/lib/Target/PIC16/PIC16Passes/PIC16Overlay.cpp
@@ -171,8 +171,9 @@ void PIC16Overlay::MarkIndirectlyCalledFunctions(Module &M) {
for (Module::iterator MI = M.begin(), E = M.end(); MI != E; ++MI) {
for (Value::use_iterator I = MI->use_begin(), E = MI->use_end(); I != E;
++I) {
- if ((!isa<CallInst>(I) && !isa<InvokeInst>(I))
- || !CallSite(cast<Instruction>(I)).isCallee(I)) {
+ User *U = *I;
+ if ((!isa<CallInst>(U) && !isa<InvokeInst>(U))
+ || !CallSite(cast<Instruction>(U)).isCallee(I)) {
setColor(MI, ++IndirectCallColor);
break;
}
diff --git a/lib/Target/PIC16/PIC16Passes/PIC16Overlay.h b/lib/Target/PIC16/PIC16Passes/PIC16Overlay.h
index 5a2551fabcda..2f611e65de1f 100644
--- a/lib/Target/PIC16/PIC16Passes/PIC16Overlay.h
+++ b/lib/Target/PIC16/PIC16Passes/PIC16Overlay.h
@@ -39,7 +39,7 @@ namespace llvm {
unsigned IndirectCallColor;
public:
static char ID; // Class identification
- PIC16Overlay() : ModulePass(&ID) {
+ PIC16Overlay() : ModulePass(ID) {
OverlayStr = "Overlay=";
InterruptDepth = PIC16OVERLAY::StartInterruptColor;
IndirectCallColor = PIC16OVERLAY::StartIndirectCallColor;
diff --git a/lib/Target/PIC16/PIC16RegisterInfo.cpp b/lib/Target/PIC16/PIC16RegisterInfo.cpp
index dff98d12c2ae..76de47fdf0f4 100644
--- a/lib/Target/PIC16/PIC16RegisterInfo.cpp
+++ b/lib/Target/PIC16/PIC16RegisterInfo.cpp
@@ -44,13 +44,10 @@ bool PIC16RegisterInfo::hasFP(const MachineFunction &MF) const {
return false;
}
-unsigned PIC16RegisterInfo::
+void PIC16RegisterInfo::
eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj,
- FrameIndexValue *Value, RegScavenger *RS) const
-{
- /* NOT YET IMPLEMENTED */
- return 0;
-}
+ RegScavenger *RS) const
+{ /* NOT YET IMPLEMENTED */ }
void PIC16RegisterInfo::emitPrologue(MachineFunction &MF) const
{ /* NOT YET IMPLEMENTED */ }
diff --git a/lib/Target/PIC16/PIC16RegisterInfo.h b/lib/Target/PIC16/PIC16RegisterInfo.h
index 5536a617d2be..20052b003442 100644
--- a/lib/Target/PIC16/PIC16RegisterInfo.h
+++ b/lib/Target/PIC16/PIC16RegisterInfo.h
@@ -44,9 +44,8 @@ class PIC16RegisterInfo : public PIC16GenRegisterInfo {
virtual BitVector getReservedRegs(const MachineFunction &MF) const;
virtual bool hasFP(const MachineFunction &MF) const;
- virtual unsigned eliminateFrameIndex(MachineBasicBlock::iterator MI,
- int SPAdj, FrameIndexValue *Value = NULL,
- RegScavenger *RS=NULL) const;
+ virtual void eliminateFrameIndex(MachineBasicBlock::iterator MI,
+ int SPAdj, RegScavenger *RS=NULL) const;
void eliminateCallFramePseudoInstr(MachineFunction &MF,
MachineBasicBlock &MBB,
diff --git a/lib/Target/PowerPC/AsmPrinter/PPCAsmPrinter.cpp b/lib/Target/PowerPC/AsmPrinter/PPCAsmPrinter.cpp
index e35dc579f2cd..c1a5663be931 100644
--- a/lib/Target/PowerPC/AsmPrinter/PPCAsmPrinter.cpp
+++ b/lib/Target/PowerPC/AsmPrinter/PPCAsmPrinter.cpp
@@ -43,6 +43,7 @@
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetOptions.h"
#include "llvm/Target/TargetRegistry.h"
+#include "llvm/Support/Debug.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
@@ -327,6 +328,19 @@ namespace {
void printPredicateOperand(const MachineInstr *MI, unsigned OpNo,
raw_ostream &O, const char *Modifier);
+
+ MachineLocation getDebugValueLocation(const MachineInstr *MI) const {
+
+ MachineLocation Location;
+ assert (MI->getNumOperands() == 4 && "Invalid no. of machine operands!");
+ // Frame address. Currently handles register +- offset only.
+ if (MI->getOperand(0).isReg() && MI->getOperand(2).isImm())
+ Location.set(MI->getOperand(0).getReg(), MI->getOperand(2).getImm());
+ else {
+ DEBUG(dbgs() << "DBG_VALUE instruction ignored! " << *MI << "\n");
+ }
+ return Location;
+ }
};
/// PPCLinuxAsmPrinter - PowerPC assembly printer, customized for Linux
diff --git a/lib/Target/PowerPC/PPCBranchSelector.cpp b/lib/Target/PowerPC/PPCBranchSelector.cpp
index 52948c868b9c..e161d23600e2 100644
--- a/lib/Target/PowerPC/PPCBranchSelector.cpp
+++ b/lib/Target/PowerPC/PPCBranchSelector.cpp
@@ -10,7 +10,7 @@
// This file contains a pass that scans a machine function to determine which
// conditional branches need more than 16 bits of displacement to reach their
// target basic block. It does this in two passes; a calculation of basic block
-// positions pass, and a branch psuedo op to machine branch opcode pass. This
+// positions pass, and a branch pseudo op to machine branch opcode pass. This
// pass should be run last, just before the assembly printer.
//
//===----------------------------------------------------------------------===//
@@ -31,7 +31,7 @@ STATISTIC(NumExpanded, "Number of branches expanded to long format");
namespace {
struct PPCBSel : public MachineFunctionPass {
static char ID;
- PPCBSel() : MachineFunctionPass(&ID) {}
+ PPCBSel() : MachineFunctionPass(ID) {}
/// BlockSizes - The sizes of the basic blocks in the function.
std::vector<unsigned> BlockSizes;
@@ -53,7 +53,8 @@ FunctionPass *llvm::createPPCBranchSelectionPass() {
}
bool PPCBSel::runOnMachineFunction(MachineFunction &Fn) {
- const TargetInstrInfo *TII = Fn.getTarget().getInstrInfo();
+ const PPCInstrInfo *TII =
+ static_cast<const PPCInstrInfo*>(Fn.getTarget().getInstrInfo());
// Give the blocks of the function a dense, in-order, numbering.
Fn.RenumberBlocks();
BlockSizes.resize(Fn.getNumBlockIDs());
diff --git a/lib/Target/PowerPC/PPCCallingConv.td b/lib/Target/PowerPC/PPCCallingConv.td
index 155fba22d9d7..441db94581ae 100644
--- a/lib/Target/PowerPC/PPCCallingConv.td
+++ b/lib/Target/PowerPC/PPCCallingConv.td
@@ -1,4 +1,4 @@
-//===- PPCCallingConv.td - Calling Conventions for PowerPC ------*- C++ -*-===//
+//===- PPCCallingConv.td - Calling Conventions for PowerPC -*- tablegen -*-===//
//
// The LLVM Compiler Infrastructure
//
diff --git a/lib/Target/PowerPC/PPCCodeEmitter.cpp b/lib/Target/PowerPC/PPCCodeEmitter.cpp
index 361fa70fb4c4..df9ab52389ba 100644
--- a/lib/Target/PowerPC/PPCCodeEmitter.cpp
+++ b/lib/Target/PowerPC/PPCCodeEmitter.cpp
@@ -45,7 +45,7 @@ namespace {
public:
PPCCodeEmitter(TargetMachine &tm, JITCodeEmitter &mce)
- : MachineFunctionPass(&ID), TM(tm), MCE(mce) {}
+ : MachineFunctionPass(ID), TM(tm), MCE(mce) {}
/// getBinaryCodeForInstr - This function, generated by the
/// CodeEmitterGenerator using TableGen, produces the binary encoding for
@@ -110,7 +110,7 @@ void PPCCodeEmitter::emitBasicBlock(MachineBasicBlock &MBB) {
default:
MCE.emitWordBE(getBinaryCodeForInstr(MI));
break;
- case TargetOpcode::DBG_LABEL:
+ case TargetOpcode::PROLOG_LABEL:
case TargetOpcode::EH_LABEL:
MCE.emitLabel(MI.getOperand(0).getMCSymbol());
break;
diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp
index d47d989b34c0..14d1b154a5c9 100644
--- a/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -2467,18 +2467,31 @@ unsigned PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag,
unsigned CallOpc = isSVR4ABI ? PPCISD::CALL_SVR4 : PPCISD::CALL_Darwin;
- // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
- // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
- // node so that legalize doesn't hack it.
- if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee))
- Callee = DAG.getTargetGlobalAddress(G->getGlobal(), dl,
- Callee.getValueType());
- else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee))
- Callee = DAG.getTargetExternalSymbol(S->getSymbol(), Callee.getValueType());
- else if (SDNode *Dest = isBLACompatibleAddress(Callee, DAG))
+ bool needIndirectCall = true;
+ if (SDNode *Dest = isBLACompatibleAddress(Callee, DAG)) {
// If this is an absolute destination address, use the munged value.
Callee = SDValue(Dest, 0);
- else {
+ needIndirectCall = false;
+ }
+ // XXX Work around for http://llvm.org/bugs/show_bug.cgi?id=5201
+ // Use indirect calls for ALL functions calls in JIT mode, since the
+ // far-call stubs may be outside relocation limits for a BL instruction.
+ if (!DAG.getTarget().getSubtarget<PPCSubtarget>().isJITCodeModel()) {
+ // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
+ // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
+ // node so that legalize doesn't hack it.
+ if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
+ Callee = DAG.getTargetGlobalAddress(G->getGlobal(), dl,
+ Callee.getValueType());
+ needIndirectCall = false;
+ }
+ }
+ if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
+ Callee = DAG.getTargetExternalSymbol(S->getSymbol(),
+ Callee.getValueType());
+ needIndirectCall = false;
+ }
+ if (needIndirectCall) {
// Otherwise, this is an indirect call. We have to use a MTCTR/BCTRL pair
// to do the call, we can't use PPCISD::CALL.
SDValue MTCTROps[] = {Chain, Callee, InFlag};
@@ -3942,17 +3955,17 @@ SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op,
}
// t = vsplti c, result = vsldoi t, t, 1
- if (SextVal == ((i << 8) | (i >> (TypeShiftAmt-8)))) {
+ if (SextVal == ((i << 8) | (i < 0 ? 0xFF : 0))) {
SDValue T = BuildSplatI(i, SplatSize, MVT::v16i8, DAG, dl);
return BuildVSLDOI(T, T, 1, Op.getValueType(), DAG, dl);
}
// t = vsplti c, result = vsldoi t, t, 2
- if (SextVal == ((i << 16) | (i >> (TypeShiftAmt-16)))) {
+ if (SextVal == ((i << 16) | (i < 0 ? 0xFFFF : 0))) {
SDValue T = BuildSplatI(i, SplatSize, MVT::v16i8, DAG, dl);
return BuildVSLDOI(T, T, 2, Op.getValueType(), DAG, dl);
}
// t = vsplti c, result = vsldoi t, t, 3
- if (SextVal == ((i << 24) | (i >> (TypeShiftAmt-24)))) {
+ if (SextVal == ((i << 24) | (i < 0 ? 0xFFFFFF : 0))) {
SDValue T = BuildSplatI(i, SplatSize, MVT::v16i8, DAG, dl);
return BuildVSLDOI(T, T, 3, Op.getValueType(), DAG, dl);
}
diff --git a/lib/Target/PowerPC/PPCInstrInfo.cpp b/lib/Target/PowerPC/PPCInstrInfo.cpp
index 1574aa3fb23a..c17108fa9230 100644
--- a/lib/Target/PowerPC/PPCInstrInfo.cpp
+++ b/lib/Target/PowerPC/PPCInstrInfo.cpp
@@ -18,8 +18,11 @@
#include "PPCGenInstrInfo.inc"
#include "PPCTargetMachine.h"
#include "llvm/ADT/STLExtras.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineMemOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/PseudoSourceValue.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
@@ -36,67 +39,6 @@ PPCInstrInfo::PPCInstrInfo(PPCTargetMachine &tm)
: TargetInstrInfoImpl(PPCInsts, array_lengthof(PPCInsts)), TM(tm),
RI(*TM.getSubtargetImpl(), *this) {}
-bool PPCInstrInfo::isMoveInstr(const MachineInstr& MI,
- unsigned& sourceReg,
- unsigned& destReg,
- unsigned& sourceSubIdx,
- unsigned& destSubIdx) const {
- sourceSubIdx = destSubIdx = 0; // No sub-registers.
-
- unsigned oc = MI.getOpcode();
- if (oc == PPC::OR || oc == PPC::OR8 || oc == PPC::VOR ||
- oc == PPC::OR4To8 || oc == PPC::OR8To4) { // or r1, r2, r2
- assert(MI.getNumOperands() >= 3 &&
- MI.getOperand(0).isReg() &&
- MI.getOperand(1).isReg() &&
- MI.getOperand(2).isReg() &&
- "invalid PPC OR instruction!");
- if (MI.getOperand(1).getReg() == MI.getOperand(2).getReg()) {
- sourceReg = MI.getOperand(1).getReg();
- destReg = MI.getOperand(0).getReg();
- return true;
- }
- } else if (oc == PPC::ADDI) { // addi r1, r2, 0
- assert(MI.getNumOperands() >= 3 &&
- MI.getOperand(0).isReg() &&
- MI.getOperand(2).isImm() &&
- "invalid PPC ADDI instruction!");
- if (MI.getOperand(1).isReg() && MI.getOperand(2).getImm() == 0) {
- sourceReg = MI.getOperand(1).getReg();
- destReg = MI.getOperand(0).getReg();
- return true;
- }
- } else if (oc == PPC::ORI) { // ori r1, r2, 0
- assert(MI.getNumOperands() >= 3 &&
- MI.getOperand(0).isReg() &&
- MI.getOperand(1).isReg() &&
- MI.getOperand(2).isImm() &&
- "invalid PPC ORI instruction!");
- if (MI.getOperand(2).getImm() == 0) {
- sourceReg = MI.getOperand(1).getReg();
- destReg = MI.getOperand(0).getReg();
- return true;
- }
- } else if (oc == PPC::FMR || oc == PPC::FMRSD) { // fmr r1, r2
- assert(MI.getNumOperands() >= 2 &&
- MI.getOperand(0).isReg() &&
- MI.getOperand(1).isReg() &&
- "invalid PPC FMR instruction");
- sourceReg = MI.getOperand(1).getReg();
- destReg = MI.getOperand(0).getReg();
- return true;
- } else if (oc == PPC::MCRF) { // mcrf cr1, cr2
- assert(MI.getNumOperands() >= 2 &&
- MI.getOperand(0).isReg() &&
- MI.getOperand(1).isReg() &&
- "invalid PPC MCRF instruction");
- sourceReg = MI.getOperand(1).getReg();
- destReg = MI.getOperand(0).getReg();
- return true;
- }
- return false;
-}
-
unsigned PPCInstrInfo::isLoadFromStackSlot(const MachineInstr *MI,
int &FrameIndex) const {
switch (MI->getOpcode()) {
@@ -524,6 +466,14 @@ PPCInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
for (unsigned i = 0, e = NewMIs.size(); i != e; ++i)
MBB.insert(MI, NewMIs[i]);
+
+ const MachineFrameInfo &MFI = *MF.getFrameInfo();
+ MachineMemOperand *MMO =
+ MF.getMachineMemOperand(PseudoSourceValue::getFixedStack(FrameIdx),
+ MachineMemOperand::MOStore, /*Offset=*/0,
+ MFI.getObjectSize(FrameIdx),
+ MFI.getObjectAlignment(FrameIdx));
+ NewMIs.back()->addMemOperand(MF, MMO);
}
void
@@ -637,6 +587,14 @@ PPCInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
LoadRegFromStackSlot(MF, DL, DestReg, FrameIdx, RC, NewMIs);
for (unsigned i = 0, e = NewMIs.size(); i != e; ++i)
MBB.insert(MI, NewMIs[i]);
+
+ const MachineFrameInfo &MFI = *MF.getFrameInfo();
+ MachineMemOperand *MMO =
+ MF.getMachineMemOperand(PseudoSourceValue::getFixedStack(FrameIdx),
+ MachineMemOperand::MOLoad, /*Offset=*/0,
+ MFI.getObjectSize(FrameIdx),
+ MFI.getObjectAlignment(FrameIdx));
+ NewMIs.back()->addMemOperand(MF, MMO);
}
MachineInstr*
@@ -667,7 +625,7 @@ unsigned PPCInstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const {
const char *AsmStr = MI->getOperand(0).getSymbolName();
return getInlineAsmLength(AsmStr, *MF->getTarget().getMCAsmInfo());
}
- case PPC::DBG_LABEL:
+ case PPC::PROLOG_LABEL:
case PPC::EH_LABEL:
case PPC::GC_LABEL:
case PPC::DBG_VALUE:
diff --git a/lib/Target/PowerPC/PPCInstrInfo.h b/lib/Target/PowerPC/PPCInstrInfo.h
index eadb21e21702..fc7b7b3cb897 100644
--- a/lib/Target/PowerPC/PPCInstrInfo.h
+++ b/lib/Target/PowerPC/PPCInstrInfo.h
@@ -82,12 +82,6 @@ public:
///
virtual const PPCRegisterInfo &getRegisterInfo() const { return RI; }
- /// Return true if the instruction is a register to register move and return
- /// the source and dest operands and their sub-register indices by reference.
- virtual bool isMoveInstr(const MachineInstr &MI,
- unsigned &SrcReg, unsigned &DstReg,
- unsigned &SrcSubIdx, unsigned &DstSubIdx) const;
-
unsigned isLoadFromStackSlot(const MachineInstr *MI,
int &FrameIndex) const;
unsigned isStoreToStackSlot(const MachineInstr *MI,
diff --git a/lib/Target/PowerPC/PPCInstrInfo.td b/lib/Target/PowerPC/PPCInstrInfo.td
index 63b4581a37f9..eb100ec75280 100644
--- a/lib/Target/PowerPC/PPCInstrInfo.td
+++ b/lib/Target/PowerPC/PPCInstrInfo.td
@@ -1022,9 +1022,7 @@ let Uses = [RM] in {
}
}
-/// FMR is split into 2 versions, one for 4/8 byte FP, and one for extending.
-///
-/// Note that these are defined as pseudo-ops on the PPC970 because they are
+/// Note that FMR is defined as pseudo-ops on the PPC970 because they are
/// often coalesced away and we don't want the dispatch group builder to think
/// that they will fill slots (which could cause the load of a LSU reject to
/// sneak into a d-group with a store).
@@ -1032,10 +1030,6 @@ def FMR : XForm_26<63, 72, (outs F4RC:$frD), (ins F4RC:$frB),
"fmr $frD, $frB", FPGeneral,
[]>, // (set F4RC:$frD, F4RC:$frB)
PPC970_Unit_Pseudo;
-def FMRSD : XForm_26<63, 72, (outs F8RC:$frD), (ins F4RC:$frB),
- "fmr $frD, $frB", FPGeneral,
- [(set F8RC:$frD, (fextend F4RC:$frB))]>,
- PPC970_Unit_Pseudo;
let PPC970_Unit = 3 in { // FPU Operations.
// These are artificially split into two different forms, for 4/8 byte FP.
@@ -1476,10 +1470,13 @@ def : Pat<(extloadi16 iaddr:$src),
(LHZ iaddr:$src)>;
def : Pat<(extloadi16 xaddr:$src),
(LHZX xaddr:$src)>;
-def : Pat<(extloadf32 iaddr:$src),
- (FMRSD (LFS iaddr:$src))>;
-def : Pat<(extloadf32 xaddr:$src),
- (FMRSD (LFSX xaddr:$src))>;
+def : Pat<(f64 (extloadf32 iaddr:$src)),
+ (COPY_TO_REGCLASS (LFS iaddr:$src), F8RC)>;
+def : Pat<(f64 (extloadf32 xaddr:$src)),
+ (COPY_TO_REGCLASS (LFSX xaddr:$src), F8RC)>;
+
+def : Pat<(f64 (fextend F4RC:$src)),
+ (COPY_TO_REGCLASS F4RC:$src, F8RC)>;
// Memory barriers
def : Pat<(membarrier (i32 imm /*ll*/),
diff --git a/lib/Target/PowerPC/PPCRegisterInfo.cpp b/lib/Target/PowerPC/PPCRegisterInfo.cpp
index 4d6132a9ec50..653e143ba407 100644
--- a/lib/Target/PowerPC/PPCRegisterInfo.cpp
+++ b/lib/Target/PowerPC/PPCRegisterInfo.cpp
@@ -449,8 +449,8 @@ void PPCRegisterInfo::lowerDynamicAlloc(MachineBasicBlock::iterator II,
// Get stack alignments.
unsigned TargetAlign = MF.getTarget().getFrameInfo()->getStackAlignment();
unsigned MaxAlign = MFI->getMaxAlignment();
- assert(MaxAlign <= TargetAlign &&
- "Dynamic alloca with large aligns not supported");
+ if (MaxAlign > TargetAlign)
+ report_fatal_error("Dynamic alloca with large aligns not supported");
// Determine the previous frame's address. If FrameSize can't be
// represented as 16 bits or we need special alignment, then we load the
@@ -580,10 +580,9 @@ void PPCRegisterInfo::lowerCRSpilling(MachineBasicBlock::iterator II,
MBB.erase(II);
}
-unsigned
+void
PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
- int SPAdj, FrameIndexValue *Value,
- RegScavenger *RS) const {
+ int SPAdj, RegScavenger *RS) const {
assert(SPAdj == 0 && "Unexpected");
// Get the instruction.
@@ -622,14 +621,14 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
if (FPSI && FrameIndex == FPSI &&
(OpC == PPC::DYNALLOC || OpC == PPC::DYNALLOC8)) {
lowerDynamicAlloc(II, SPAdj, RS);
- return 0;
+ return;
}
// Special case for pseudo-op SPILL_CR.
if (EnableRegisterScavenging) // FIXME (64-bit): Enable by default.
if (OpC == PPC::SPILL_CR) {
lowerCRSpilling(II, FrameIndex, SPAdj, RS);
- return 0;
+ return;
}
// Replace the FrameIndex with base register with GPR1 (SP) or GPR31 (FP).
@@ -674,7 +673,7 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
if (isIXAddr)
Offset >>= 2; // The actual encoded value has the low two bits zero.
MI.getOperand(OffsetOperandNo).ChangeToImmediate(Offset);
- return 0;
+ return;
}
// The offset doesn't fit into a single register, scavenge one to build the
@@ -710,11 +709,10 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
} else {
OperandBase = OffsetOperandNo;
}
-
+
unsigned StackReg = MI.getOperand(FIOperandNo).getReg();
MI.getOperand(OperandBase).ChangeToRegister(StackReg, false);
MI.getOperand(OperandBase + 1).ChangeToRegister(SReg, false);
- return 0;
}
/// VRRegNo - Map from a numbered VR register to its enum value.
@@ -1318,7 +1316,7 @@ PPCRegisterInfo::emitPrologue(MachineFunction &MF) const {
if (needsFrameMoves) {
// Mark effective beginning of when frame pointer becomes valid.
FrameLabel = MMI.getContext().CreateTempSymbol();
- BuildMI(MBB, MBBI, dl, TII.get(PPC::DBG_LABEL)).addSym(FrameLabel);
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::PROLOG_LABEL)).addSym(FrameLabel);
// Show update of SP.
if (NegFrameSize) {
@@ -1361,7 +1359,7 @@ PPCRegisterInfo::emitPrologue(MachineFunction &MF) const {
ReadyLabel = MMI.getContext().CreateTempSymbol();
// Mark effective beginning of when frame pointer is ready.
- BuildMI(MBB, MBBI, dl, TII.get(PPC::DBG_LABEL)).addSym(ReadyLabel);
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::PROLOG_LABEL)).addSym(ReadyLabel);
MachineLocation FPDst(HasFP ? (isPPC64 ? PPC::X31 : PPC::R31) :
(isPPC64 ? PPC::X1 : PPC::R1));
diff --git a/lib/Target/PowerPC/PPCRegisterInfo.h b/lib/Target/PowerPC/PPCRegisterInfo.h
index f026847a540b..890b24b9c0a8 100644
--- a/lib/Target/PowerPC/PPCRegisterInfo.h
+++ b/lib/Target/PowerPC/PPCRegisterInfo.h
@@ -63,9 +63,8 @@ public:
int SPAdj, RegScavenger *RS) const;
void lowerCRSpilling(MachineBasicBlock::iterator II, unsigned FrameIndex,
int SPAdj, RegScavenger *RS) const;
- unsigned eliminateFrameIndex(MachineBasicBlock::iterator II,
- int SPAdj, FrameIndexValue *Value = NULL,
- RegScavenger *RS = NULL) const;
+ void eliminateFrameIndex(MachineBasicBlock::iterator II,
+ int SPAdj, RegScavenger *RS = NULL) const;
/// determineFrameLayout - Determine the size of the frame and maximum call
/// frame size.
diff --git a/lib/Target/PowerPC/PPCSubtarget.cpp b/lib/Target/PowerPC/PPCSubtarget.cpp
index 40914ba62a70..5d46065d96f2 100644
--- a/lib/Target/PowerPC/PPCSubtarget.cpp
+++ b/lib/Target/PowerPC/PPCSubtarget.cpp
@@ -69,6 +69,7 @@ PPCSubtarget::PPCSubtarget(const std::string &TT, const std::string &FS,
, HasFSQRT(false)
, HasSTFIWX(false)
, HasLazyResolverStubs(false)
+ , IsJITCodeModel(false)
, DarwinVers(0) {
// Determine default and user specified characteristics
@@ -117,6 +118,9 @@ void PPCSubtarget::SetJITMode() {
// everything is. This matters for PPC64, which codegens in PIC mode without
// stubs.
HasLazyResolverStubs = false;
+
+ // Calls to external functions need to use indirect calls
+ IsJITCodeModel = true;
}
diff --git a/lib/Target/PowerPC/PPCSubtarget.h b/lib/Target/PowerPC/PPCSubtarget.h
index 75fcf6238a27..00ec7474c9e3 100644
--- a/lib/Target/PowerPC/PPCSubtarget.h
+++ b/lib/Target/PowerPC/PPCSubtarget.h
@@ -63,6 +63,7 @@ protected:
bool HasFSQRT;
bool HasSTFIWX;
bool HasLazyResolverStubs;
+ bool IsJITCodeModel;
/// DarwinVers - Nonzero if this is a darwin platform. Otherwise, the numeric
/// version of the platform, e.g. 8 = 10.4 (Tiger), 9 = 10.5 (Leopard), etc.
@@ -124,6 +125,9 @@ public:
bool hasLazyResolverStub(const GlobalValue *GV,
const TargetMachine &TM) const;
+ // isJITCodeModel - True if we're generating code for the JIT
+ bool isJITCodeModel() const { return IsJITCodeModel; }
+
// Specific obvious features.
bool hasFSQRT() const { return HasFSQRT; }
bool hasSTFIWX() const { return HasSTFIWX; }
diff --git a/lib/Target/README.txt b/lib/Target/README.txt
index 4d7ee08de1de..4faf8bcfd419 100644
--- a/lib/Target/README.txt
+++ b/lib/Target/README.txt
@@ -1919,5 +1919,21 @@ something like the following, which eliminates a branch:
ret
.LBB0_2:
jmp foo # TAILCALL
+//===---------------------------------------------------------------------===//
+Given a branch where the two target blocks are identical ("ret i32 %b" in
+both), simplifycfg will simplify them away. But not so for a switch statement:
+
+define i32 @f(i32 %a, i32 %b) nounwind readnone {
+entry:
+ switch i32 %a, label %bb3 [
+ i32 4, label %bb
+ i32 6, label %bb
+ ]
+bb: ; preds = %entry, %entry
+ ret i32 %b
+
+bb3: ; preds = %entry
+ ret i32 %b
+}
//===---------------------------------------------------------------------===//
diff --git a/lib/Target/Sparc/DelaySlotFiller.cpp b/lib/Target/Sparc/DelaySlotFiller.cpp
index 9e148ada8853..aae5da856005 100644
--- a/lib/Target/Sparc/DelaySlotFiller.cpp
+++ b/lib/Target/Sparc/DelaySlotFiller.cpp
@@ -32,7 +32,7 @@ namespace {
static char ID;
Filler(TargetMachine &tm)
- : MachineFunctionPass(&ID), TM(tm), TII(tm.getInstrInfo()) { }
+ : MachineFunctionPass(ID), TM(tm), TII(tm.getInstrInfo()) { }
virtual const char *getPassName() const {
return "SPARC Delay Slot Filler";
diff --git a/lib/Target/Sparc/FPMover.cpp b/lib/Target/Sparc/FPMover.cpp
index 88b0927b3550..1423b1e64d66 100644
--- a/lib/Target/Sparc/FPMover.cpp
+++ b/lib/Target/Sparc/FPMover.cpp
@@ -36,7 +36,7 @@ namespace {
static char ID;
explicit FPMover(TargetMachine &tm)
- : MachineFunctionPass(&ID), TM(tm) { }
+ : MachineFunctionPass(ID), TM(tm) { }
virtual const char *getPassName() const {
return "Sparc Double-FP Move Fixer";
diff --git a/lib/Target/Sparc/Sparc.td b/lib/Target/Sparc/Sparc.td
index 925d782d988b..764336665d0b 100644
--- a/lib/Target/Sparc/Sparc.td
+++ b/lib/Target/Sparc/Sparc.td
@@ -1,4 +1,4 @@
-//===- Sparc.td - Describe the Sparc Target Machine -------------*- C++ -*-===//
+//===- Sparc.td - Describe the Sparc Target Machine --------*- tablegen -*-===//
//
// The LLVM Compiler Infrastructure
//
diff --git a/lib/Target/Sparc/SparcISelDAGToDAG.cpp b/lib/Target/Sparc/SparcISelDAGToDAG.cpp
index 698923e3c9e0..4ea94c4cb560 100644
--- a/lib/Target/Sparc/SparcISelDAGToDAG.cpp
+++ b/lib/Target/Sparc/SparcISelDAGToDAG.cpp
@@ -84,7 +84,7 @@ bool SparcDAGToDAGISel::SelectADDRri(SDNode *Op, SDValue Addr,
if (Addr.getOpcode() == ISD::ADD) {
if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1))) {
- if (Predicate_simm13(CN)) {
+ if (isInt<13>(CN->getSExtValue())) {
if (FrameIndexSDNode *FIN =
dyn_cast<FrameIndexSDNode>(Addr.getOperand(0))) {
// Constant offset from frame ref.
@@ -120,9 +120,9 @@ bool SparcDAGToDAGISel::SelectADDRrr(SDNode *Op, SDValue Addr,
return false; // direct calls.
if (Addr.getOpcode() == ISD::ADD) {
- if (isa<ConstantSDNode>(Addr.getOperand(1)) &&
- Predicate_simm13(Addr.getOperand(1).getNode()))
- return false; // Let the reg+imm pattern catch this!
+ if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1)))
+ if (isInt<13>(CN->getSExtValue()))
+ return false; // Let the reg+imm pattern catch this!
if (Addr.getOperand(0).getOpcode() == SPISD::Lo ||
Addr.getOperand(1).getOpcode() == SPISD::Lo)
return false; // Let the reg+imm pattern catch this!
diff --git a/lib/Target/Sparc/SparcInstrInfo.cpp b/lib/Target/Sparc/SparcInstrInfo.cpp
index 3a4c80ad076a..7ede8e7ebbe4 100644
--- a/lib/Target/Sparc/SparcInstrInfo.cpp
+++ b/lib/Target/Sparc/SparcInstrInfo.cpp
@@ -28,46 +28,6 @@ SparcInstrInfo::SparcInstrInfo(SparcSubtarget &ST)
RI(ST, *this), Subtarget(ST) {
}
-static bool isZeroImm(const MachineOperand &op) {
- return op.isImm() && op.getImm() == 0;
-}
-
-/// Return true if the instruction is a register to register move and
-/// leave the source and dest operands in the passed parameters.
-///
-bool SparcInstrInfo::isMoveInstr(const MachineInstr &MI,
- unsigned &SrcReg, unsigned &DstReg,
- unsigned &SrcSR, unsigned &DstSR) const {
- SrcSR = DstSR = 0; // No sub-registers.
-
- // We look for 3 kinds of patterns here:
- // or with G0 or 0
- // add with G0 or 0
- // fmovs or FpMOVD (pseudo double move).
- if (MI.getOpcode() == SP::ORrr || MI.getOpcode() == SP::ADDrr) {
- if (MI.getOperand(1).getReg() == SP::G0) {
- DstReg = MI.getOperand(0).getReg();
- SrcReg = MI.getOperand(2).getReg();
- return true;
- } else if (MI.getOperand(2).getReg() == SP::G0) {
- DstReg = MI.getOperand(0).getReg();
- SrcReg = MI.getOperand(1).getReg();
- return true;
- }
- } else if ((MI.getOpcode() == SP::ORri || MI.getOpcode() == SP::ADDri) &&
- isZeroImm(MI.getOperand(2)) && MI.getOperand(1).isReg()) {
- DstReg = MI.getOperand(0).getReg();
- SrcReg = MI.getOperand(1).getReg();
- return true;
- } else if (MI.getOpcode() == SP::FMOVS || MI.getOpcode() == SP::FpMOVD ||
- MI.getOpcode() == SP::FMOVD) {
- SrcReg = MI.getOperand(1).getReg();
- DstReg = MI.getOperand(0).getReg();
- return true;
- }
- return false;
-}
-
/// isLoadFromStackSlot - If the specified machine instruction is a direct
/// load from a stack slot, return the virtual or physical register number of
/// the destination along with the FrameIndex of the loaded stack slot. If
diff --git a/lib/Target/Sparc/SparcInstrInfo.h b/lib/Target/Sparc/SparcInstrInfo.h
index 133471857bad..c00bd2198765 100644
--- a/lib/Target/Sparc/SparcInstrInfo.h
+++ b/lib/Target/Sparc/SparcInstrInfo.h
@@ -43,12 +43,6 @@ public:
///
virtual const SparcRegisterInfo &getRegisterInfo() const { return RI; }
- /// Return true if the instruction is a register to register move and return
- /// the source and dest operands and their sub-register indices by reference.
- virtual bool isMoveInstr(const MachineInstr &MI,
- unsigned &SrcReg, unsigned &DstReg,
- unsigned &SrcSubIdx, unsigned &DstSubIdx) const;
-
/// isLoadFromStackSlot - If the specified machine instruction is a direct
/// load from a stack slot, return the virtual or physical register number of
/// the destination along with the FrameIndex of the loaded stack slot. If
diff --git a/lib/Target/Sparc/SparcInstrInfo.td b/lib/Target/Sparc/SparcInstrInfo.td
index ddadd51a93a4..467ed48487ad 100644
--- a/lib/Target/Sparc/SparcInstrInfo.td
+++ b/lib/Target/Sparc/SparcInstrInfo.td
@@ -43,17 +43,9 @@ def UseDeprecatedInsts : Predicate<"Subtarget.useDeprecatedV8Instructions()">;
// Instruction Pattern Stuff
//===----------------------------------------------------------------------===//
-def simm11 : PatLeaf<(imm), [{
- // simm11 predicate - True if the imm fits in a 11-bit sign extended field.
- return (((int)N->getZExtValue() << (32-11)) >> (32-11)) ==
- (int)N->getZExtValue();
-}]>;
+def simm11 : PatLeaf<(imm), [{ return isInt<11>(N->getSExtValue()); }]>;
-def simm13 : PatLeaf<(imm), [{
- // simm13 predicate - True if the imm fits in a 13-bit sign extended field.
- return (((int)N->getZExtValue() << (32-13)) >> (32-13)) ==
- (int)N->getZExtValue();
-}]>;
+def simm13 : PatLeaf<(imm), [{ return isInt<13>(N->getSExtValue()); }]>;
def LO10 : SDNodeXForm<imm, [{
return CurDAG->getTargetConstant((unsigned)N->getZExtValue() & 1023,
diff --git a/lib/Target/Sparc/SparcRegisterInfo.cpp b/lib/Target/Sparc/SparcRegisterInfo.cpp
index 427cc7fd4577..c85db20d2b74 100644
--- a/lib/Target/Sparc/SparcRegisterInfo.cpp
+++ b/lib/Target/Sparc/SparcRegisterInfo.cpp
@@ -69,10 +69,9 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
MBB.erase(I);
}
-unsigned
+void
SparcRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
- int SPAdj, FrameIndexValue *Value,
- RegScavenger *RS) const {
+ int SPAdj, RegScavenger *RS) const {
assert(SPAdj == 0 && "Unexpected");
unsigned i = 0;
@@ -108,7 +107,6 @@ SparcRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
MI.getOperand(i).ChangeToRegister(SP::G1, false);
MI.getOperand(i+1).ChangeToImmediate(Offset & ((1 << 10)-1));
}
- return 0;
}
void SparcRegisterInfo::
diff --git a/lib/Target/Sparc/SparcRegisterInfo.h b/lib/Target/Sparc/SparcRegisterInfo.h
index 9f0cda707b3e..020ce567c956 100644
--- a/lib/Target/Sparc/SparcRegisterInfo.h
+++ b/lib/Target/Sparc/SparcRegisterInfo.h
@@ -40,9 +40,8 @@ struct SparcRegisterInfo : public SparcGenRegisterInfo {
MachineBasicBlock &MBB,
MachineBasicBlock::iterator I) const;
- unsigned eliminateFrameIndex(MachineBasicBlock::iterator II,
- int SPAdj, FrameIndexValue *Value = NULL,
- RegScavenger *RS = NULL) const;
+ void eliminateFrameIndex(MachineBasicBlock::iterator II,
+ int SPAdj, RegScavenger *RS = NULL) const;
void processFunctionBeforeFrameFinalized(MachineFunction &MF) const;
diff --git a/lib/Target/SystemZ/SystemZInstrInfo.cpp b/lib/Target/SystemZ/SystemZInstrInfo.cpp
index c03864fe41e4..367bed3a8539 100644
--- a/lib/Target/SystemZ/SystemZInstrInfo.cpp
+++ b/lib/Target/SystemZ/SystemZInstrInfo.cpp
@@ -141,31 +141,6 @@ void SystemZInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
.addReg(SrcReg, getKillRegState(KillSrc));
}
-bool
-SystemZInstrInfo::isMoveInstr(const MachineInstr& MI,
- unsigned &SrcReg, unsigned &DstReg,
- unsigned &SrcSubIdx, unsigned &DstSubIdx) const {
- switch (MI.getOpcode()) {
- default:
- return false;
- case SystemZ::MOV32rr:
- case SystemZ::MOV64rr:
- case SystemZ::MOV64rrP:
- case SystemZ::MOV128rr:
- case SystemZ::FMOV32rr:
- case SystemZ::FMOV64rr:
- assert(MI.getNumOperands() >= 2 &&
- MI.getOperand(0).isReg() &&
- MI.getOperand(1).isReg() &&
- "invalid register-register move instruction");
- SrcReg = MI.getOperand(1).getReg();
- DstReg = MI.getOperand(0).getReg();
- SrcSubIdx = MI.getOperand(1).getSubReg();
- DstSubIdx = MI.getOperand(0).getSubReg();
- return true;
- }
-}
-
unsigned SystemZInstrInfo::isLoadFromStackSlot(const MachineInstr *MI,
int &FrameIndex) const {
switch (MI->getOpcode()) {
diff --git a/lib/Target/SystemZ/SystemZInstrInfo.h b/lib/Target/SystemZ/SystemZInstrInfo.h
index 0559619248a6..c248f2489c49 100644
--- a/lib/Target/SystemZ/SystemZInstrInfo.h
+++ b/lib/Target/SystemZ/SystemZInstrInfo.h
@@ -65,9 +65,6 @@ public:
unsigned DestReg, unsigned SrcReg,
bool KillSrc) const;
- bool isMoveInstr(const MachineInstr& MI,
- unsigned &SrcReg, unsigned &DstReg,
- unsigned &SrcSubIdx, unsigned &DstSubIdx) const;
unsigned isLoadFromStackSlot(const MachineInstr *MI, int &FrameIndex) const;
unsigned isStoreToStackSlot(const MachineInstr *MI, int &FrameIndex) const;
diff --git a/lib/Target/SystemZ/SystemZRegisterInfo.cpp b/lib/Target/SystemZ/SystemZRegisterInfo.cpp
index ae96b0b08ff6..f8d3e6ac8a6f 100644
--- a/lib/Target/SystemZ/SystemZRegisterInfo.cpp
+++ b/lib/Target/SystemZ/SystemZRegisterInfo.cpp
@@ -92,10 +92,9 @@ int SystemZRegisterInfo::getFrameIndexOffset(const MachineFunction &MF,
return Offset;
}
-unsigned
+void
SystemZRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
- int SPAdj, FrameIndexValue *Value,
- RegScavenger *RS) const {
+ int SPAdj, RegScavenger *RS) const {
assert(SPAdj == 0 && "Unxpected");
unsigned i = 0;
@@ -117,13 +116,13 @@ SystemZRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
// Offset is a either 12-bit unsigned or 20-bit signed integer.
// FIXME: handle "too long" displacements.
- int Offset = getFrameIndexOffset(MF, FrameIndex) + MI.getOperand(i+1).getImm();
+ int Offset =
+ getFrameIndexOffset(MF, FrameIndex) + MI.getOperand(i+1).getImm();
// Check whether displacement is too long to fit into 12 bit zext field.
MI.setDesc(TII.getMemoryInstr(MI.getOpcode(), Offset));
MI.getOperand(i+1).ChangeToImmediate(Offset);
- return 0;
}
void
diff --git a/lib/Target/SystemZ/SystemZRegisterInfo.h b/lib/Target/SystemZ/SystemZRegisterInfo.h
index 670025f86e08..5dae865cb79a 100644
--- a/lib/Target/SystemZ/SystemZRegisterInfo.h
+++ b/lib/Target/SystemZ/SystemZRegisterInfo.h
@@ -34,7 +34,7 @@ struct SystemZRegisterInfo : public SystemZGenRegisterInfo {
BitVector getReservedRegs(const MachineFunction &MF) const;
- bool hasReservedCallFrame(MachineFunction &MF) const { return true; }
+ bool hasReservedCallFrame(const MachineFunction &MF) const { return true; }
bool hasFP(const MachineFunction &MF) const;
int getFrameIndexOffset(const MachineFunction &MF, int FI) const;
@@ -43,9 +43,8 @@ struct SystemZRegisterInfo : public SystemZGenRegisterInfo {
MachineBasicBlock &MBB,
MachineBasicBlock::iterator I) const;
- unsigned eliminateFrameIndex(MachineBasicBlock::iterator II,
- int SPAdj, FrameIndexValue *Value = NULL,
- RegScavenger *RS = NULL) const;
+ void eliminateFrameIndex(MachineBasicBlock::iterator II,
+ int SPAdj, RegScavenger *RS = NULL) const;
void processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
diff --git a/lib/Target/TargetData.cpp b/lib/Target/TargetData.cpp
index 5870d8a87004..f35c96dadcee 100644
--- a/lib/Target/TargetData.cpp
+++ b/lib/Target/TargetData.cpp
@@ -34,8 +34,7 @@ using namespace llvm;
// Handle the Pass registration stuff necessary to use TargetData's.
// Register the default SparcV9 implementation...
-static RegisterPass<TargetData> X("targetdata", "Target Data Layout", false,
- true);
+INITIALIZE_PASS(TargetData, "targetdata", "Target Data Layout", false, true);
char TargetData::ID = 0;
//===----------------------------------------------------------------------===//
@@ -98,8 +97,8 @@ unsigned StructLayout::getElementContainingOffset(uint64_t Offset) const {
//===----------------------------------------------------------------------===//
TargetAlignElem
-TargetAlignElem::get(AlignTypeEnum align_type, unsigned char abi_align,
- unsigned char pref_align, uint32_t bit_width) {
+TargetAlignElem::get(AlignTypeEnum align_type, unsigned abi_align,
+ unsigned pref_align, uint32_t bit_width) {
assert(abi_align <= pref_align && "Preferred alignment worse than ABI!");
TargetAlignElem retval;
retval.AlignType = align_type;
@@ -197,10 +196,10 @@ void TargetData::init(StringRef Desc) {
}
unsigned Size = getInt(Specifier.substr(1));
Split = Token.split(':');
- unsigned char ABIAlign = getInt(Split.first) / 8;
+ unsigned ABIAlign = getInt(Split.first) / 8;
Split = Split.second.split(':');
- unsigned char PrefAlign = getInt(Split.first) / 8;
+ unsigned PrefAlign = getInt(Split.first) / 8;
if (PrefAlign == 0)
PrefAlign = ABIAlign;
setAlignment(AlignType, ABIAlign, PrefAlign, Size);
@@ -227,19 +226,19 @@ void TargetData::init(StringRef Desc) {
///
/// @note This has to exist, because this is a pass, but it should never be
/// used.
-TargetData::TargetData() : ImmutablePass(&ID) {
+TargetData::TargetData() : ImmutablePass(ID) {
report_fatal_error("Bad TargetData ctor used. "
"Tool did not specify a TargetData to use?");
}
TargetData::TargetData(const Module *M)
- : ImmutablePass(&ID) {
+ : ImmutablePass(ID) {
init(M->getDataLayout());
}
void
-TargetData::setAlignment(AlignTypeEnum align_type, unsigned char abi_align,
- unsigned char pref_align, uint32_t bit_width) {
+TargetData::setAlignment(AlignTypeEnum align_type, unsigned abi_align,
+ unsigned pref_align, uint32_t bit_width) {
assert(abi_align <= pref_align && "Preferred alignment worse than ABI!");
for (unsigned i = 0, e = Alignments.size(); i != e; ++i) {
if (Alignments[i].AlignType == align_type &&
@@ -455,15 +454,6 @@ uint64_t TargetData::getTypeSizeInBits(const Type *Ty) const {
case Type::StructTyID:
// Get the layout annotation... which is lazily created on demand.
return getStructLayout(cast<StructType>(Ty))->getSizeInBits();
- case Type::UnionTyID: {
- const UnionType *UnTy = cast<UnionType>(Ty);
- uint64_t Size = 0;
- for (UnionType::element_iterator i = UnTy->element_begin(),
- e = UnTy->element_end(); i != e; ++i) {
- Size = std::max(Size, getTypeSizeInBits(*i));
- }
- return Size;
- }
case Type::IntegerTyID:
return cast<IntegerType>(Ty)->getBitWidth();
case Type::VoidTyID:
@@ -496,7 +486,7 @@ uint64_t TargetData::getTypeSizeInBits(const Type *Ty) const {
Get the ABI (\a abi_or_pref == true) or preferred alignment (\a abi_or_pref
== false) for the requested type \a Ty.
*/
-unsigned char TargetData::getAlignment(const Type *Ty, bool abi_or_pref) const {
+unsigned TargetData::getAlignment(const Type *Ty, bool abi_or_pref) const {
int AlignType = -1;
assert(Ty->isSized() && "Cannot getTypeInfo() on a type that is unsized!");
@@ -518,18 +508,7 @@ unsigned char TargetData::getAlignment(const Type *Ty, bool abi_or_pref) const {
// Get the layout annotation... which is lazily created on demand.
const StructLayout *Layout = getStructLayout(cast<StructType>(Ty));
unsigned Align = getAlignmentInfo(AGGREGATE_ALIGN, 0, abi_or_pref, Ty);
- return std::max(Align, (unsigned)Layout->getAlignment());
- }
- case Type::UnionTyID: {
- const UnionType *UnTy = cast<UnionType>(Ty);
- unsigned Align = 1;
-
- // Unions need the maximum alignment of all their entries
- for (UnionType::element_iterator i = UnTy->element_begin(),
- e = UnTy->element_end(); i != e; ++i) {
- Align = std::max(Align, (unsigned)getAlignment(*i, abi_or_pref));
- }
- return Align;
+ return std::max(Align, Layout->getAlignment());
}
case Type::IntegerTyID:
case Type::VoidTyID:
@@ -556,18 +535,18 @@ unsigned char TargetData::getAlignment(const Type *Ty, bool abi_or_pref) const {
abi_or_pref, Ty);
}
-unsigned char TargetData::getABITypeAlignment(const Type *Ty) const {
+unsigned TargetData::getABITypeAlignment(const Type *Ty) const {
return getAlignment(Ty, true);
}
/// getABIIntegerTypeAlignment - Return the minimum ABI-required alignment for
/// an integer type of the specified bitwidth.
-unsigned char TargetData::getABIIntegerTypeAlignment(unsigned BitWidth) const {
+unsigned TargetData::getABIIntegerTypeAlignment(unsigned BitWidth) const {
return getAlignmentInfo(INTEGER_ALIGN, BitWidth, true, 0);
}
-unsigned char TargetData::getCallFrameTypeAlignment(const Type *Ty) const {
+unsigned TargetData::getCallFrameTypeAlignment(const Type *Ty) const {
for (unsigned i = 0, e = Alignments.size(); i != e; ++i)
if (Alignments[i].AlignType == STACK_ALIGN)
return Alignments[i].ABIAlign;
@@ -575,12 +554,12 @@ unsigned char TargetData::getCallFrameTypeAlignment(const Type *Ty) const {
return getABITypeAlignment(Ty);
}
-unsigned char TargetData::getPrefTypeAlignment(const Type *Ty) const {
+unsigned TargetData::getPrefTypeAlignment(const Type *Ty) const {
return getAlignment(Ty, false);
}
-unsigned char TargetData::getPreferredTypeAlignmentShift(const Type *Ty) const {
- unsigned Align = (unsigned) getPrefTypeAlignment(Ty);
+unsigned TargetData::getPreferredTypeAlignmentShift(const Type *Ty) const {
+ unsigned Align = getPrefTypeAlignment(Ty);
assert(!(Align & (Align-1)) && "Alignment is not a power of two!");
return Log2_32(Align);
}
@@ -615,18 +594,13 @@ uint64_t TargetData::getIndexedOffset(const Type *ptrTy, Value* const* Indices,
// Update Ty to refer to current element
Ty = STy->getElementType(FieldNo);
- } else if (const UnionType *UnTy = dyn_cast<UnionType>(*TI)) {
- unsigned FieldNo = cast<ConstantInt>(Indices[CurIDX])->getZExtValue();
-
- // Offset into union is canonically 0, but type changes
- Ty = UnTy->getElementType(FieldNo);
} else {
// Update Ty to refer to current element
Ty = cast<SequentialType>(Ty)->getElementType();
// Get the array index and the size of each array element.
if (int64_t arrayIdx = cast<ConstantInt>(Indices[CurIDX])->getSExtValue())
- Result += arrayIdx * (int64_t)getTypeAllocSize(Ty);
+ Result += (uint64_t)arrayIdx * getTypeAllocSize(Ty);
}
}
diff --git a/lib/Target/TargetMachine.cpp b/lib/Target/TargetMachine.cpp
index 47c91df1400e..705b1c097e55 100644
--- a/lib/Target/TargetMachine.cpp
+++ b/lib/Target/TargetMachine.cpp
@@ -30,7 +30,8 @@ namespace llvm {
bool NoFramePointerElimNonLeaf;
bool NoExcessFPPrecision;
bool UnsafeFPMath;
- bool FiniteOnlyFPMathOption;
+ bool NoInfsFPMath;
+ bool NoNaNsFPMath;
bool HonorSignDependentRoundingFPMathOption;
bool UseSoftFloat;
FloatABI::ABIType FloatABIType;
@@ -80,9 +81,14 @@ EnableUnsafeFPMath("enable-unsafe-fp-math",
cl::location(UnsafeFPMath),
cl::init(false));
static cl::opt<bool, true>
-EnableFiniteOnlyFPMath("enable-finite-only-fp-math",
- cl::desc("Enable optimizations that assumes non- NaNs / +-Infs"),
- cl::location(FiniteOnlyFPMathOption),
+EnableNoInfsFPMath("enable-no-infs-fp-math",
+ cl::desc("Enable FP math optimizations that assume no +-Infs"),
+ cl::location(NoInfsFPMath),
+ cl::init(false));
+static cl::opt<bool, true>
+EnableNoNaNsFPMath("enable-no-nans-fp-math",
+ cl::desc("Enable FP math optimizations that assume no NaNs"),
+ cl::location(NoNaNsFPMath),
cl::init(false));
static cl::opt<bool, true>
EnableHonorSignDependentRoundingFPMath("enable-sign-dependent-rounding-fp-math",
@@ -290,12 +296,6 @@ namespace llvm {
/// result is "less precise" than doing those operations individually.
bool LessPreciseFPMAD() { return UnsafeFPMath || LessPreciseFPMADOption; }
- /// FiniteOnlyFPMath - This returns true when the -enable-finite-only-fp-math
- /// option is specified on the command line. If this returns false (default),
- /// the code generator is not allowed to assume that FP arithmetic arguments
- /// and results are never NaNs or +-Infs.
- bool FiniteOnlyFPMath() { return FiniteOnlyFPMathOption; }
-
/// HonorSignDependentRoundingFPMath - Return true if the codegen must assume
/// that the rounding mode of the FPU can change from its default.
bool HonorSignDependentRoundingFPMath() {
diff --git a/lib/Target/TargetRegisterInfo.cpp b/lib/Target/TargetRegisterInfo.cpp
index 49bfad54136d..55f222c7c1c9 100644
--- a/lib/Target/TargetRegisterInfo.cpp
+++ b/lib/Target/TargetRegisterInfo.cpp
@@ -63,7 +63,7 @@ TargetRegisterInfo::getMinimalPhysRegClass(unsigned reg, EVT VT) const {
/// getAllocatableSetForRC - Toggle the bits that represent allocatable
/// registers for the specific register class.
static void getAllocatableSetForRC(const MachineFunction &MF,
- const TargetRegisterClass *RC, BitVector &R){
+ const TargetRegisterClass *RC, BitVector &R){
for (TargetRegisterClass::iterator I = RC->allocation_order_begin(MF),
E = RC->allocation_order_end(MF); I != E; ++I)
R.set(*I);
@@ -74,12 +74,16 @@ BitVector TargetRegisterInfo::getAllocatableSet(const MachineFunction &MF,
BitVector Allocatable(NumRegs);
if (RC) {
getAllocatableSetForRC(MF, RC, Allocatable);
- return Allocatable;
+ } else {
+ for (TargetRegisterInfo::regclass_iterator I = regclass_begin(),
+ E = regclass_end(); I != E; ++I)
+ getAllocatableSetForRC(MF, *I, Allocatable);
}
- for (TargetRegisterInfo::regclass_iterator I = regclass_begin(),
- E = regclass_end(); I != E; ++I)
- getAllocatableSetForRC(MF, *I, Allocatable);
+ // Mask out the reserved registers
+ BitVector Reserved = getReservedRegs(MF);
+ Allocatable ^= Reserved & Allocatable;
+
return Allocatable;
}
diff --git a/lib/Target/X86/AsmParser/X86AsmParser.cpp b/lib/Target/X86/AsmParser/X86AsmParser.cpp
index f1e66ab9d2c3..f8588d818b75 100644
--- a/lib/Target/X86/AsmParser/X86AsmParser.cpp
+++ b/lib/Target/X86/AsmParser/X86AsmParser.cpp
@@ -9,6 +9,8 @@
#include "llvm/Target/TargetAsmParser.h"
#include "X86.h"
+#include "X86Subtarget.h"
+#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringSwitch.h"
#include "llvm/ADT/Twine.h"
@@ -19,6 +21,7 @@
#include "llvm/MC/MCParser/MCAsmParser.h"
#include "llvm/MC/MCParser/MCParsedAsmOperand.h"
#include "llvm/Support/SourceMgr.h"
+#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetRegistry.h"
#include "llvm/Target/TargetAsmParser.h"
using namespace llvm;
@@ -28,6 +31,7 @@ struct X86Operand;
class X86ATTAsmParser : public TargetAsmParser {
MCAsmParser &Parser;
+ TargetMachine &TM;
protected:
unsigned Is64Bit : 1;
@@ -37,8 +41,6 @@ private:
MCAsmLexer &getLexer() const { return Parser.getLexer(); }
- void Warning(SMLoc L, const Twine &Msg) { Parser.Warning(L, Msg); }
-
bool Error(SMLoc L, const Twine &Msg) { return Parser.Error(L, Msg); }
bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc);
@@ -48,13 +50,14 @@ private:
bool ParseDirectiveWord(unsigned Size, SMLoc L);
- void InstructionCleanup(MCInst &Inst);
+ bool MatchInstruction(SMLoc IDLoc,
+ const SmallVectorImpl<MCParsedAsmOperand*> &Operands,
+ MCInst &Inst);
- /// @name Auto-generated Match Functions
+ /// @name Auto-generated Matcher Functions
/// {
- bool MatchInstruction(const SmallVectorImpl<MCParsedAsmOperand*> &Operands,
- MCInst &Inst);
+ unsigned ComputeAvailableFeatures(const X86Subtarget *Subtarget) const;
bool MatchInstructionImpl(
const SmallVectorImpl<MCParsedAsmOperand*> &Operands, MCInst &Inst);
@@ -62,27 +65,32 @@ private:
/// }
public:
- X86ATTAsmParser(const Target &T, MCAsmParser &_Parser)
- : TargetAsmParser(T), Parser(_Parser) {}
+ X86ATTAsmParser(const Target &T, MCAsmParser &_Parser, TargetMachine &TM)
+ : TargetAsmParser(T), Parser(_Parser), TM(TM) {
+
+ // Initialize the set of available features.
+ setAvailableFeatures(ComputeAvailableFeatures(
+ &TM.getSubtarget<X86Subtarget>()));
+ }
virtual bool ParseInstruction(StringRef Name, SMLoc NameLoc,
SmallVectorImpl<MCParsedAsmOperand*> &Operands);
virtual bool ParseDirective(AsmToken DirectiveID);
};
-
+
class X86_32ATTAsmParser : public X86ATTAsmParser {
public:
- X86_32ATTAsmParser(const Target &T, MCAsmParser &_Parser)
- : X86ATTAsmParser(T, _Parser) {
+ X86_32ATTAsmParser(const Target &T, MCAsmParser &_Parser, TargetMachine &TM)
+ : X86ATTAsmParser(T, _Parser, TM) {
Is64Bit = false;
}
};
class X86_64ATTAsmParser : public X86ATTAsmParser {
public:
- X86_64ATTAsmParser(const Target &T, MCAsmParser &_Parser)
- : X86ATTAsmParser(T, _Parser) {
+ X86_64ATTAsmParser(const Target &T, MCAsmParser &_Parser, TargetMachine &TM)
+ : X86ATTAsmParser(T, _Parser, TM) {
Is64Bit = true;
}
};
@@ -90,7 +98,7 @@ public:
} // end anonymous namespace
/// @name Auto-generated Match Functions
-/// {
+/// {
static unsigned MatchRegisterName(StringRef Name);
@@ -109,7 +117,7 @@ struct X86Operand : public MCParsedAsmOperand {
} Kind;
SMLoc StartLoc, EndLoc;
-
+
union {
struct {
const char *Data;
@@ -141,6 +149,8 @@ struct X86Operand : public MCParsedAsmOperand {
/// getEndLoc - Get the location of the last token of this operand.
SMLoc getEndLoc() const { return EndLoc; }
+ virtual void dump(raw_ostream &OS) const {}
+
StringRef getToken() const {
assert(Kind == Token && "Invalid access!");
return StringRef(Tok.Data, Tok.Length);
@@ -185,7 +195,7 @@ struct X86Operand : public MCParsedAsmOperand {
bool isToken() const {return Kind == Token; }
bool isImm() const { return Kind == Immediate; }
-
+
bool isImmSExti16i8() const {
if (!isImm())
return false;
@@ -260,10 +270,6 @@ struct X86Operand : public MCParsedAsmOperand {
!getMemIndexReg() && getMemScale() == 1;
}
- bool isNoSegMem() const {
- return Kind == Memory && !getMemSegReg();
- }
-
bool isReg() const { return Kind == Register; }
void addExpr(MCInst &Inst, const MCExpr *Expr) const {
@@ -298,14 +304,6 @@ struct X86Operand : public MCParsedAsmOperand {
Inst.addOperand(MCOperand::CreateExpr(getMemDisp()));
}
- void addNoSegMemOperands(MCInst &Inst, unsigned N) const {
- assert((N == 4) && "Invalid number of operands!");
- Inst.addOperand(MCOperand::CreateReg(getMemBaseReg()));
- Inst.addOperand(MCOperand::CreateImm(getMemScale()));
- Inst.addOperand(MCOperand::CreateReg(getMemIndexReg()));
- addExpr(Inst, getMemDisp());
- }
-
static X86Operand *CreateToken(StringRef Str, SMLoc Loc) {
X86Operand *Res = new X86Operand(Token, Loc, Loc);
Res->Tok.Data = Str.data();
@@ -376,13 +374,19 @@ bool X86ATTAsmParser::ParseRegister(unsigned &RegNo,
// FIXME: Validate register for the current architecture; we have to do
// validation later, so maybe there is no need for this here.
RegNo = MatchRegisterName(Tok.getString());
-
+
+ // FIXME: This should be done using Requires<In32BitMode> and
+ // Requires<In64BitMode> so "eiz" usage in 64-bit instructions
+ // can be also checked.
+ if (RegNo == X86::RIZ && !Is64Bit)
+ return Error(Tok.getLoc(), "riz register in 64-bit mode only");
+
// Parse %st(1) and "%st" as "%st(0)"
if (RegNo == 0 && Tok.getString() == "st") {
RegNo = X86::ST0;
EndLoc = Tok.getLoc();
Parser.Lex(); // Eat 'st'
-
+
// Check to see if we have '(4)' after %st.
if (getLexer().isNot(AsmToken::LParen))
return false;
@@ -403,15 +407,15 @@ bool X86ATTAsmParser::ParseRegister(unsigned &RegNo,
case 7: RegNo = X86::ST7; break;
default: return Error(IntTok.getLoc(), "invalid stack index");
}
-
+
if (getParser().Lex().isNot(AsmToken::RParen))
return Error(Parser.getTok().getLoc(), "expected ')'");
-
+
EndLoc = Tok.getLoc();
Parser.Lex(); // Eat ')'
return false;
}
-
+
// If this is "db[0-7]", match it as an alias
// for dr[0-7].
if (RegNo == 0 && Tok.getString().size() == 3 &&
@@ -426,14 +430,14 @@ bool X86ATTAsmParser::ParseRegister(unsigned &RegNo,
case '6': RegNo = X86::DR6; break;
case '7': RegNo = X86::DR7; break;
}
-
+
if (RegNo != 0) {
EndLoc = Tok.getLoc();
Parser.Lex(); // Eat it.
return false;
}
}
-
+
if (RegNo == 0)
return Error(Tok.getLoc(), "invalid register name");
@@ -452,13 +456,17 @@ X86Operand *X86ATTAsmParser::ParseOperand() {
unsigned RegNo;
SMLoc Start, End;
if (ParseRegister(RegNo, Start, End)) return 0;
-
+ if (RegNo == X86::EIZ || RegNo == X86::RIZ) {
+ Error(Start, "eiz and riz can only be used as index registers");
+ return 0;
+ }
+
// If this is a segment register followed by a ':', then this is the start
// of a memory reference, otherwise this is a normal register reference.
if (getLexer().isNot(AsmToken::Colon))
return X86Operand::CreateReg(RegNo, Start, End);
-
-
+
+
getParser().Lex(); // Eat the colon.
return ParseMemOperand(RegNo, Start);
}
@@ -477,7 +485,7 @@ X86Operand *X86ATTAsmParser::ParseOperand() {
/// ParseMemOperand: segment: disp(basereg, indexreg, scale). The '%ds:' prefix
/// has already been parsed if present.
X86Operand *X86ATTAsmParser::ParseMemOperand(unsigned SegReg, SMLoc MemStart) {
-
+
// We have to disambiguate a parenthesized expression "(4+5)" from the start
// of a memory operand with a missing displacement "(%ebx)" or "(,%eax)". The
// only way to do this without lookahead is to eat the '(' and see what is
@@ -486,7 +494,7 @@ X86Operand *X86ATTAsmParser::ParseMemOperand(unsigned SegReg, SMLoc MemStart) {
if (getLexer().isNot(AsmToken::LParen)) {
SMLoc ExprEnd;
if (getParser().ParseExpression(Disp, ExprEnd)) return 0;
-
+
// After parsing the base expression we could either have a parenthesized
// memory address or not. If not, return now. If so, eat the (.
if (getLexer().isNot(AsmToken::LParen)) {
@@ -495,7 +503,7 @@ X86Operand *X86ATTAsmParser::ParseMemOperand(unsigned SegReg, SMLoc MemStart) {
return X86Operand::CreateMem(Disp, MemStart, ExprEnd);
return X86Operand::CreateMem(SegReg, Disp, 0, 0, 1, MemStart, ExprEnd);
}
-
+
// Eat the '('.
Parser.Lex();
} else {
@@ -503,17 +511,17 @@ X86Operand *X86ATTAsmParser::ParseMemOperand(unsigned SegReg, SMLoc MemStart) {
// so we have to eat the ( to see beyond it.
SMLoc LParenLoc = Parser.getTok().getLoc();
Parser.Lex(); // Eat the '('.
-
+
if (getLexer().is(AsmToken::Percent) || getLexer().is(AsmToken::Comma)) {
// Nothing to do here, fall into the code below with the '(' part of the
// memory operand consumed.
} else {
SMLoc ExprEnd;
-
+
// It must be an parenthesized expression, parse it now.
if (getParser().ParseParenExpression(Disp, ExprEnd))
return 0;
-
+
// After parsing the base expression we could either have a parenthesized
// memory address or not. If not, return now. If so, eat the (.
if (getLexer().isNot(AsmToken::LParen)) {
@@ -522,21 +530,25 @@ X86Operand *X86ATTAsmParser::ParseMemOperand(unsigned SegReg, SMLoc MemStart) {
return X86Operand::CreateMem(Disp, LParenLoc, ExprEnd);
return X86Operand::CreateMem(SegReg, Disp, 0, 0, 1, MemStart, ExprEnd);
}
-
+
// Eat the '('.
Parser.Lex();
}
}
-
+
// If we reached here, then we just ate the ( of the memory operand. Process
// the rest of the memory operand.
unsigned BaseReg = 0, IndexReg = 0, Scale = 1;
-
+
if (getLexer().is(AsmToken::Percent)) {
SMLoc L;
if (ParseRegister(BaseReg, L, L)) return 0;
+ if (BaseReg == X86::EIZ || BaseReg == X86::RIZ) {
+ Error(L, "eiz and riz can only be used as index registers");
+ return 0;
+ }
}
-
+
if (getLexer().is(AsmToken::Comma)) {
Parser.Lex(); // Eat the comma.
@@ -545,11 +557,11 @@ X86Operand *X86ATTAsmParser::ParseMemOperand(unsigned SegReg, SMLoc MemStart) {
// correctly.
//
// Not that even though it would be completely consistent to support syntax
- // like "1(%eax,,1)", the assembler doesn't.
+ // like "1(%eax,,1)", the assembler doesn't. Use "eiz" or "riz" for this.
if (getLexer().is(AsmToken::Percent)) {
SMLoc L;
if (ParseRegister(IndexReg, L, L)) return 0;
-
+
if (getLexer().isNot(AsmToken::RParen)) {
// Parse the scale amount:
// ::= ',' [scale-expression]
@@ -566,7 +578,7 @@ X86Operand *X86ATTAsmParser::ParseMemOperand(unsigned SegReg, SMLoc MemStart) {
int64_t ScaleVal;
if (getParser().ParseAbsoluteExpression(ScaleVal))
return 0;
-
+
// Validate the scale amount.
if (ScaleVal != 1 && ScaleVal != 2 && ScaleVal != 4 && ScaleVal != 8){
Error(Loc, "scale factor in address must be 1, 2, 4 or 8");
@@ -576,19 +588,20 @@ X86Operand *X86ATTAsmParser::ParseMemOperand(unsigned SegReg, SMLoc MemStart) {
}
}
} else if (getLexer().isNot(AsmToken::RParen)) {
- // Otherwise we have the unsupported form of a scale amount without an
+ // A scale amount without an index is ignored.
// index.
SMLoc Loc = Parser.getTok().getLoc();
int64_t Value;
if (getParser().ParseAbsoluteExpression(Value))
return 0;
-
- Error(Loc, "cannot have scale factor without index register");
- return 0;
+
+ if (Value != 1)
+ Warning(Loc, "scale factor without index register is ignored");
+ Scale = 1;
}
}
-
+
// Ok, we've eaten the memory operand, verify we have a ')' and eat it too.
if (getLexer().isNot(AsmToken::RParen)) {
Error(Parser.getTok().getLoc(), "unexpected token in memory operand");
@@ -596,7 +609,7 @@ X86Operand *X86ATTAsmParser::ParseMemOperand(unsigned SegReg, SMLoc MemStart) {
}
SMLoc MemEnd = Parser.getTok().getLoc();
Parser.Lex(); // Eat the ')'.
-
+
return X86Operand::CreateMem(SegReg, Disp, BaseReg, IndexReg, Scale,
MemStart, MemEnd);
}
@@ -743,6 +756,23 @@ ParseInstruction(StringRef Name, SMLoc NameLoc,
}
}
}
+
+ // FIXME: Hack to recognize vpclmul<src1_quadword, src2_quadword>dq
+ if (PatchedName.startswith("vpclmul")) {
+ unsigned CLMULQuadWordSelect = StringSwitch<unsigned>(
+ PatchedName.slice(7, PatchedName.size() - 2))
+ .Case("lqlq", 0x00) // src1[63:0], src2[63:0]
+ .Case("hqlq", 0x01) // src1[127:64], src2[63:0]
+ .Case("lqhq", 0x10) // src1[63:0], src2[127:64]
+ .Case("hqhq", 0x11) // src1[127:64], src2[127:64]
+ .Default(~0U);
+ if (CLMULQuadWordSelect != ~0U) {
+ ExtraImmOp = MCConstantExpr::Create(CLMULQuadWordSelect,
+ getParser().getContext());
+ assert(PatchedName.endswith("dq") && "Unexpected mnemonic!");
+ PatchedName = "vpclmulqdq";
+ }
+ }
Operands.push_back(X86Operand::CreateToken(PatchedName, NameLoc));
if (ExtraImmOp)
@@ -785,6 +815,20 @@ ParseInstruction(StringRef Name, SMLoc NameLoc,
Operands.erase(Operands.begin() + 1);
}
+ // FIXME: Hack to handle "out[bwl]? %al, (%dx)" -> "outb %al, %dx".
+ if ((Name == "outb" || Name == "outw" || Name == "outl" || Name == "out") &&
+ Operands.size() == 3) {
+ X86Operand &Op = *(X86Operand*)Operands.back();
+ if (Op.isMem() && Op.Mem.SegReg == 0 &&
+ isa<MCConstantExpr>(Op.Mem.Disp) &&
+ cast<MCConstantExpr>(Op.Mem.Disp)->getValue() == 0 &&
+ Op.Mem.BaseReg == MatchRegisterName("dx") && Op.Mem.IndexReg == 0) {
+ SMLoc Loc = Op.getEndLoc();
+ Operands.back() = X86Operand::CreateReg(Op.Mem.BaseReg, Loc, Loc);
+ delete &Op;
+ }
+ }
+
// FIXME: Hack to handle "f{mul*,add*,sub*,div*} $op, st(0)" the same as
// "f{mul*,add*,sub*,div*} $op"
if ((Name.startswith("fmul") || Name.startswith("fadd") ||
@@ -796,6 +840,16 @@ ParseInstruction(StringRef Name, SMLoc NameLoc,
Operands.erase(Operands.begin() + 2);
}
+ // FIXME: Hack to handle "imul <imm>, B" which is an alias for "imul <imm>, B,
+ // B".
+ if (Name.startswith("imul") && Operands.size() == 3 &&
+ static_cast<X86Operand*>(Operands[1])->isImm() &&
+ static_cast<X86Operand*>(Operands.back())->isReg()) {
+ X86Operand *Op = static_cast<X86Operand*>(Operands.back());
+ Operands.push_back(X86Operand::CreateReg(Op->getReg(), Op->getStartLoc(),
+ Op->getEndLoc()));
+ }
+
return false;
}
@@ -819,7 +873,7 @@ bool X86ATTAsmParser::ParseDirectiveWord(unsigned Size, SMLoc L) {
if (getLexer().is(AsmToken::EndOfStatement))
break;
-
+
// FIXME: Improve diagnostic.
if (getLexer().isNot(AsmToken::Comma))
return Error(L, "unexpected token in directive");
@@ -831,82 +885,32 @@ bool X86ATTAsmParser::ParseDirectiveWord(unsigned Size, SMLoc L) {
return false;
}
-/// LowerMOffset - Lower an 'moffset' form of an instruction, which just has a
-/// imm operand, to having "rm" or "mr" operands with the offset in the disp
-/// field.
-static void LowerMOffset(MCInst &Inst, unsigned Opc, unsigned RegNo,
- bool isMR) {
- MCOperand Disp = Inst.getOperand(0);
-
- // Start over with an empty instruction.
- Inst = MCInst();
- Inst.setOpcode(Opc);
-
- if (!isMR)
- Inst.addOperand(MCOperand::CreateReg(RegNo));
-
- // Add the mem operand.
- Inst.addOperand(MCOperand::CreateReg(0)); // Segment
- Inst.addOperand(MCOperand::CreateImm(1)); // Scale
- Inst.addOperand(MCOperand::CreateReg(0)); // IndexReg
- Inst.addOperand(Disp); // Displacement
- Inst.addOperand(MCOperand::CreateReg(0)); // BaseReg
-
- if (isMR)
- Inst.addOperand(MCOperand::CreateReg(RegNo));
-}
-
-// FIXME: Custom X86 cleanup function to implement a temporary hack to handle
-// matching INCL/DECL correctly for x86_64. This needs to be replaced by a
-// proper mechanism for supporting (ambiguous) feature dependent instructions.
-void X86ATTAsmParser::InstructionCleanup(MCInst &Inst) {
- if (!Is64Bit) return;
-
- switch (Inst.getOpcode()) {
- case X86::DEC16r: Inst.setOpcode(X86::DEC64_16r); break;
- case X86::DEC16m: Inst.setOpcode(X86::DEC64_16m); break;
- case X86::DEC32r: Inst.setOpcode(X86::DEC64_32r); break;
- case X86::DEC32m: Inst.setOpcode(X86::DEC64_32m); break;
- case X86::INC16r: Inst.setOpcode(X86::INC64_16r); break;
- case X86::INC16m: Inst.setOpcode(X86::INC64_16m); break;
- case X86::INC32r: Inst.setOpcode(X86::INC64_32r); break;
- case X86::INC32m: Inst.setOpcode(X86::INC64_32m); break;
-
- // moffset instructions are x86-32 only.
- case X86::MOV8o8a: LowerMOffset(Inst, X86::MOV8rm , X86::AL , false); break;
- case X86::MOV16o16a: LowerMOffset(Inst, X86::MOV16rm, X86::AX , false); break;
- case X86::MOV32o32a: LowerMOffset(Inst, X86::MOV32rm, X86::EAX, false); break;
- case X86::MOV8ao8: LowerMOffset(Inst, X86::MOV8mr , X86::AL , true); break;
- case X86::MOV16ao16: LowerMOffset(Inst, X86::MOV16mr, X86::AX , true); break;
- case X86::MOV32ao32: LowerMOffset(Inst, X86::MOV32mr, X86::EAX, true); break;
- }
-}
bool
-X86ATTAsmParser::MatchInstruction(const SmallVectorImpl<MCParsedAsmOperand*>
+X86ATTAsmParser::MatchInstruction(SMLoc IDLoc,
+ const SmallVectorImpl<MCParsedAsmOperand*>
&Operands,
MCInst &Inst) {
+ assert(!Operands.empty() && "Unexpect empty operand list!");
+
+ X86Operand *Op = static_cast<X86Operand*>(Operands[0]);
+ assert(Op->isToken() && "Leading operand should always be a mnemonic!");
+
// First, try a direct match.
if (!MatchInstructionImpl(Operands, Inst))
return false;
- // Ignore anything which is obviously not a suffix match.
- if (Operands.size() == 0)
- return true;
- X86Operand *Op = static_cast<X86Operand*>(Operands[0]);
- if (!Op->isToken() || Op->getToken().size() > 15)
- return true;
-
// FIXME: Ideally, we would only attempt suffix matches for things which are
// valid prefixes, and we could just infer the right unambiguous
// type. However, that requires substantially more matcher support than the
// following hack.
// Change the operand to point to a temporary token.
- char Tmp[16];
StringRef Base = Op->getToken();
- memcpy(Tmp, Base.data(), Base.size());
- Op->setTokenValue(StringRef(Tmp, Base.size() + 1));
+ SmallString<16> Tmp;
+ Tmp += Base;
+ Tmp += ' ';
+ Op->setTokenValue(Tmp.str());
// Check for the various suffix matches.
Tmp[Base.size()] = 'b';
@@ -928,6 +932,38 @@ X86ATTAsmParser::MatchInstruction(const SmallVectorImpl<MCParsedAsmOperand*>
return false;
// Otherwise, the match failed.
+
+ // If we had multiple suffix matches, then identify this as an ambiguous
+ // match.
+ if (MatchB + MatchW + MatchL + MatchQ != 4) {
+ char MatchChars[4];
+ unsigned NumMatches = 0;
+ if (!MatchB)
+ MatchChars[NumMatches++] = 'b';
+ if (!MatchW)
+ MatchChars[NumMatches++] = 'w';
+ if (!MatchL)
+ MatchChars[NumMatches++] = 'l';
+ if (!MatchQ)
+ MatchChars[NumMatches++] = 'q';
+
+ SmallString<126> Msg;
+ raw_svector_ostream OS(Msg);
+ OS << "ambiguous instructions require an explicit suffix (could be ";
+ for (unsigned i = 0; i != NumMatches; ++i) {
+ if (i != 0)
+ OS << ", ";
+ if (i + 1 == NumMatches)
+ OS << "or ";
+ OS << "'" << Base << MatchChars[i] << "'";
+ }
+ OS << ")";
+ Error(IDLoc, OS.str());
+ } else {
+ // FIXME: We should give nicer diagnostics about the exact failure.
+ Error(IDLoc, "unrecognized instruction");
+ }
+
return true;
}
diff --git a/lib/Target/X86/AsmPrinter/CMakeLists.txt b/lib/Target/X86/AsmPrinter/CMakeLists.txt
index b70a587ec4e2..033973eeeff9 100644
--- a/lib/Target/X86/AsmPrinter/CMakeLists.txt
+++ b/lib/Target/X86/AsmPrinter/CMakeLists.txt
@@ -2,8 +2,7 @@ include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/
add_llvm_library(LLVMX86AsmPrinter
X86ATTInstPrinter.cpp
- X86AsmPrinter.cpp
X86IntelInstPrinter.cpp
- X86MCInstLower.cpp
+ X86InstComments.cpp
)
add_dependencies(LLVMX86AsmPrinter X86CodeGenTable_gen)
diff --git a/lib/Target/X86/AsmPrinter/X86ATTInstPrinter.cpp b/lib/Target/X86/AsmPrinter/X86ATTInstPrinter.cpp
index f2cdb5ba55eb..554b96c96e0e 100644
--- a/lib/Target/X86/AsmPrinter/X86ATTInstPrinter.cpp
+++ b/lib/Target/X86/AsmPrinter/X86ATTInstPrinter.cpp
@@ -14,6 +14,7 @@
#define DEBUG_TYPE "asm-printer"
#include "X86ATTInstPrinter.h"
+#include "X86InstComments.h"
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCExpr.h"
@@ -31,6 +32,10 @@ using namespace llvm;
void X86ATTInstPrinter::printInst(const MCInst *MI, raw_ostream &OS) {
printInstruction(MI, OS);
+
+ // If verbose assembly is enabled, we can print some informative comments.
+ if (CommentStream)
+ EmitAnyX86InstComments(MI, *CommentStream, getRegisterName);
}
StringRef X86ATTInstPrinter::getOpcodeName(unsigned Opcode) const {
return getInstructionName(Opcode);
diff --git a/lib/Target/X86/AsmPrinter/X86ATTInstPrinter.h b/lib/Target/X86/AsmPrinter/X86ATTInstPrinter.h
index 3be4bae5bec2..eb986643014c 100644
--- a/lib/Target/X86/AsmPrinter/X86ATTInstPrinter.h
+++ b/lib/Target/X86/AsmPrinter/X86ATTInstPrinter.h
@@ -56,6 +56,9 @@ public:
void printi128mem(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
printMemReference(MI, OpNo, O);
}
+ void printi256mem(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
+ printMemReference(MI, OpNo, O);
+ }
void printf32mem(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
printMemReference(MI, OpNo, O);
}
diff --git a/lib/Target/X86/AsmPrinter/X86InstComments.cpp b/lib/Target/X86/AsmPrinter/X86InstComments.cpp
new file mode 100644
index 000000000000..da9d5a3579e5
--- /dev/null
+++ b/lib/Target/X86/AsmPrinter/X86InstComments.cpp
@@ -0,0 +1,232 @@
+//===-- X86InstComments.cpp - Generate verbose-asm comments for instrs ----===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This defines functionality used to emit comments about X86 instructions to
+// an output stream for -fverbose-asm.
+//
+//===----------------------------------------------------------------------===//
+
+#include "X86InstComments.h"
+#include "X86GenInstrNames.inc"
+#include "llvm/MC/MCInst.h"
+#include "llvm/Support/raw_ostream.h"
+#include "../X86ShuffleDecode.h"
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+// Top Level Entrypoint
+//===----------------------------------------------------------------------===//
+
+/// EmitAnyX86InstComments - This function decodes x86 instructions and prints
+/// newline terminated strings to the specified string if desired. This
+/// information is shown in disassembly dumps when verbose assembly is enabled.
+void llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
+ const char *(*getRegName)(unsigned)) {
+ // If this is a shuffle operation, the switch should fill in this state.
+ SmallVector<unsigned, 8> ShuffleMask;
+ const char *DestName = 0, *Src1Name = 0, *Src2Name = 0;
+
+ switch (MI->getOpcode()) {
+ case X86::INSERTPSrr:
+ Src1Name = getRegName(MI->getOperand(1).getReg());
+ Src2Name = getRegName(MI->getOperand(2).getReg());
+ DecodeINSERTPSMask(MI->getOperand(3).getImm(), ShuffleMask);
+ break;
+
+ case X86::MOVLHPSrr:
+ Src2Name = getRegName(MI->getOperand(2).getReg());
+ Src1Name = getRegName(MI->getOperand(0).getReg());
+ DecodeMOVLHPSMask(2, ShuffleMask);
+ break;
+
+ case X86::MOVHLPSrr:
+ Src2Name = getRegName(MI->getOperand(2).getReg());
+ Src1Name = getRegName(MI->getOperand(0).getReg());
+ DecodeMOVHLPSMask(2, ShuffleMask);
+ break;
+
+ case X86::PSHUFDri:
+ Src1Name = getRegName(MI->getOperand(1).getReg());
+ // FALL THROUGH.
+ case X86::PSHUFDmi:
+ DestName = getRegName(MI->getOperand(0).getReg());
+ DecodePSHUFMask(4, MI->getOperand(MI->getNumOperands()-1).getImm(),
+ ShuffleMask);
+ break;
+
+ case X86::PSHUFHWri:
+ Src1Name = getRegName(MI->getOperand(1).getReg());
+ // FALL THROUGH.
+ case X86::PSHUFHWmi:
+ DestName = getRegName(MI->getOperand(0).getReg());
+ DecodePSHUFHWMask(MI->getOperand(MI->getNumOperands()-1).getImm(),
+ ShuffleMask);
+ break;
+ case X86::PSHUFLWri:
+ Src1Name = getRegName(MI->getOperand(1).getReg());
+ // FALL THROUGH.
+ case X86::PSHUFLWmi:
+ DestName = getRegName(MI->getOperand(0).getReg());
+ DecodePSHUFLWMask(MI->getOperand(MI->getNumOperands()-1).getImm(),
+ ShuffleMask);
+ break;
+
+ case X86::PUNPCKHBWrr:
+ Src2Name = getRegName(MI->getOperand(2).getReg());
+ // FALL THROUGH.
+ case X86::PUNPCKHBWrm:
+ Src1Name = getRegName(MI->getOperand(0).getReg());
+ DecodePUNPCKHMask(16, ShuffleMask);
+ break;
+ case X86::PUNPCKHWDrr:
+ Src2Name = getRegName(MI->getOperand(2).getReg());
+ // FALL THROUGH.
+ case X86::PUNPCKHWDrm:
+ Src1Name = getRegName(MI->getOperand(0).getReg());
+ DecodePUNPCKHMask(8, ShuffleMask);
+ break;
+ case X86::PUNPCKHDQrr:
+ Src2Name = getRegName(MI->getOperand(2).getReg());
+ // FALL THROUGH.
+ case X86::PUNPCKHDQrm:
+ Src1Name = getRegName(MI->getOperand(0).getReg());
+ DecodePUNPCKHMask(4, ShuffleMask);
+ break;
+ case X86::PUNPCKHQDQrr:
+ Src2Name = getRegName(MI->getOperand(2).getReg());
+ // FALL THROUGH.
+ case X86::PUNPCKHQDQrm:
+ Src1Name = getRegName(MI->getOperand(0).getReg());
+ DecodePUNPCKHMask(2, ShuffleMask);
+ break;
+
+ case X86::PUNPCKLBWrr:
+ Src2Name = getRegName(MI->getOperand(2).getReg());
+ // FALL THROUGH.
+ case X86::PUNPCKLBWrm:
+ Src1Name = getRegName(MI->getOperand(0).getReg());
+ DecodePUNPCKLMask(16, ShuffleMask);
+ break;
+ case X86::PUNPCKLWDrr:
+ Src2Name = getRegName(MI->getOperand(2).getReg());
+ // FALL THROUGH.
+ case X86::PUNPCKLWDrm:
+ Src1Name = getRegName(MI->getOperand(0).getReg());
+ DecodePUNPCKLMask(8, ShuffleMask);
+ break;
+ case X86::PUNPCKLDQrr:
+ Src2Name = getRegName(MI->getOperand(2).getReg());
+ // FALL THROUGH.
+ case X86::PUNPCKLDQrm:
+ Src1Name = getRegName(MI->getOperand(0).getReg());
+ DecodePUNPCKLMask(4, ShuffleMask);
+ break;
+ case X86::PUNPCKLQDQrr:
+ Src2Name = getRegName(MI->getOperand(2).getReg());
+ // FALL THROUGH.
+ case X86::PUNPCKLQDQrm:
+ Src1Name = getRegName(MI->getOperand(0).getReg());
+ DecodePUNPCKLMask(2, ShuffleMask);
+ break;
+
+ case X86::SHUFPDrri:
+ DecodeSHUFPSMask(2, MI->getOperand(3).getImm(), ShuffleMask);
+ Src1Name = getRegName(MI->getOperand(0).getReg());
+ Src2Name = getRegName(MI->getOperand(2).getReg());
+ break;
+
+ case X86::SHUFPSrri:
+ Src2Name = getRegName(MI->getOperand(2).getReg());
+ // FALL THROUGH.
+ case X86::SHUFPSrmi:
+ DecodeSHUFPSMask(4, MI->getOperand(3).getImm(), ShuffleMask);
+ Src1Name = getRegName(MI->getOperand(0).getReg());
+ break;
+
+ case X86::UNPCKLPDrr:
+ Src2Name = getRegName(MI->getOperand(2).getReg());
+ // FALL THROUGH.
+ case X86::UNPCKLPDrm:
+ DecodeUNPCKLPMask(2, ShuffleMask);
+ Src1Name = getRegName(MI->getOperand(0).getReg());
+ break;
+ case X86::UNPCKLPSrr:
+ Src2Name = getRegName(MI->getOperand(2).getReg());
+ // FALL THROUGH.
+ case X86::UNPCKLPSrm:
+ DecodeUNPCKLPMask(4, ShuffleMask);
+ Src1Name = getRegName(MI->getOperand(0).getReg());
+ break;
+ case X86::UNPCKHPDrr:
+ Src2Name = getRegName(MI->getOperand(2).getReg());
+ // FALL THROUGH.
+ case X86::UNPCKHPDrm:
+ DecodeUNPCKHPMask(2, ShuffleMask);
+ Src1Name = getRegName(MI->getOperand(0).getReg());
+ break;
+ case X86::UNPCKHPSrr:
+ Src2Name = getRegName(MI->getOperand(2).getReg());
+ // FALL THROUGH.
+ case X86::UNPCKHPSrm:
+ DecodeUNPCKHPMask(4, ShuffleMask);
+ Src1Name = getRegName(MI->getOperand(0).getReg());
+ break;
+ }
+
+
+ // If this was a shuffle operation, print the shuffle mask.
+ if (!ShuffleMask.empty()) {
+ if (DestName == 0) DestName = Src1Name;
+ OS << (DestName ? DestName : "mem") << " = ";
+
+ // If the two sources are the same, canonicalize the input elements to be
+ // from the first src so that we get larger element spans.
+ if (Src1Name == Src2Name) {
+ for (unsigned i = 0, e = ShuffleMask.size(); i != e; ++i) {
+ if ((int)ShuffleMask[i] >= 0 && // Not sentinel.
+ ShuffleMask[i] >= e) // From second mask.
+ ShuffleMask[i] -= e;
+ }
+ }
+
+ // The shuffle mask specifies which elements of the src1/src2 fill in the
+ // destination, with a few sentinel values. Loop through and print them
+ // out.
+ for (unsigned i = 0, e = ShuffleMask.size(); i != e; ++i) {
+ if (i != 0)
+ OS << ',';
+ if (ShuffleMask[i] == SM_SentinelZero) {
+ OS << "zero";
+ continue;
+ }
+
+ // Otherwise, it must come from src1 or src2. Print the span of elements
+ // that comes from this src.
+ bool isSrc1 = ShuffleMask[i] < ShuffleMask.size();
+ const char *SrcName = isSrc1 ? Src1Name : Src2Name;
+ OS << (SrcName ? SrcName : "mem") << '[';
+ bool IsFirst = true;
+ while (i != e &&
+ (int)ShuffleMask[i] >= 0 &&
+ (ShuffleMask[i] < ShuffleMask.size()) == isSrc1) {
+ if (!IsFirst)
+ OS << ',';
+ else
+ IsFirst = false;
+ OS << ShuffleMask[i] % ShuffleMask.size();
+ ++i;
+ }
+ OS << ']';
+ --i; // For loop increments element #.
+ }
+ //MI->print(OS, 0);
+ OS << "\n";
+ }
+
+}
diff --git a/lib/Target/X86/AsmPrinter/X86InstComments.h b/lib/Target/X86/AsmPrinter/X86InstComments.h
new file mode 100644
index 000000000000..6b86db4f9e5c
--- /dev/null
+++ b/lib/Target/X86/AsmPrinter/X86InstComments.h
@@ -0,0 +1,25 @@
+//===-- X86InstComments.h - Generate verbose-asm comments for instrs ------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This defines functionality used to emit comments about X86 instructions to
+// an output stream for -fverbose-asm.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef X86_INST_COMMENTS_H
+#define X86_INST_COMMENTS_H
+
+namespace llvm {
+ class MCInst;
+ class raw_ostream;
+ void EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
+ const char *(*getRegName)(unsigned));
+}
+
+#endif
diff --git a/lib/Target/X86/AsmPrinter/X86IntelInstPrinter.cpp b/lib/Target/X86/AsmPrinter/X86IntelInstPrinter.cpp
index a632047f6592..5625b0ea618f 100644
--- a/lib/Target/X86/AsmPrinter/X86IntelInstPrinter.cpp
+++ b/lib/Target/X86/AsmPrinter/X86IntelInstPrinter.cpp
@@ -14,6 +14,7 @@
#define DEBUG_TYPE "asm-printer"
#include "X86IntelInstPrinter.h"
+#include "X86InstComments.h"
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCExpr.h"
@@ -30,6 +31,10 @@ using namespace llvm;
void X86IntelInstPrinter::printInst(const MCInst *MI, raw_ostream &OS) {
printInstruction(MI, OS);
+
+ // If verbose assembly is enabled, we can print some informative comments.
+ if (CommentStream)
+ EmitAnyX86InstComments(MI, *CommentStream, getRegisterName);
}
StringRef X86IntelInstPrinter::getOpcodeName(unsigned Opcode) const {
return getInstructionName(Opcode);
diff --git a/lib/Target/X86/AsmPrinter/X86IntelInstPrinter.h b/lib/Target/X86/AsmPrinter/X86IntelInstPrinter.h
index 4d680744dd60..6f120322742b 100644
--- a/lib/Target/X86/AsmPrinter/X86IntelInstPrinter.h
+++ b/lib/Target/X86/AsmPrinter/X86IntelInstPrinter.h
@@ -64,6 +64,10 @@ public:
O << "XMMWORD PTR ";
printMemReference(MI, OpNo, O);
}
+ void printi256mem(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
+ O << "YMMWORD PTR ";
+ printMemReference(MI, OpNo, O);
+ }
void printf32mem(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
O << "DWORD PTR ";
printMemReference(MI, OpNo, O);
diff --git a/lib/Target/X86/CMakeLists.txt b/lib/Target/X86/CMakeLists.txt
index 133482036ce1..e9399f5c8322 100644
--- a/lib/Target/X86/CMakeLists.txt
+++ b/lib/Target/X86/CMakeLists.txt
@@ -18,23 +18,24 @@ tablegen(X86GenEDInfo.inc -gen-enhanced-disassembly-info)
set(sources
SSEDomainFix.cpp
X86AsmBackend.cpp
- X86CodeEmitter.cpp
+ X86AsmPrinter.cpp
X86COFFMachineModuleInfo.cpp
+ X86CodeEmitter.cpp
X86ELFWriterInfo.cpp
+ X86FastISel.cpp
X86FloatingPoint.cpp
- X86FloatingPointRegKill.cpp
X86ISelDAGToDAG.cpp
X86ISelLowering.cpp
X86InstrInfo.cpp
X86JITInfo.cpp
X86MCAsmInfo.cpp
X86MCCodeEmitter.cpp
+ X86MCInstLower.cpp
X86RegisterInfo.cpp
+ X86SelectionDAGInfo.cpp
X86Subtarget.cpp
X86TargetMachine.cpp
X86TargetObjectFile.cpp
- X86FastISel.cpp
- X86SelectionDAGInfo.cpp
)
if( CMAKE_CL_64 )
@@ -49,4 +50,3 @@ endif()
add_llvm_target(X86CodeGen ${sources})
-target_link_libraries (LLVMX86CodeGen LLVMSelectionDAG)
diff --git a/lib/Target/X86/README-FPStack.txt b/lib/Target/X86/README-FPStack.txt
index be28e8b394a4..39efd2dbcf1a 100644
--- a/lib/Target/X86/README-FPStack.txt
+++ b/lib/Target/X86/README-FPStack.txt
@@ -27,8 +27,8 @@ def FpIADD32m : FpI<(ops RFP:$dst, RFP:$src1, i32mem:$src2), OneArgFPRW,
//===---------------------------------------------------------------------===//
-The FP stackifier needs to be global. Also, it should handle simple permutates
-to reduce number of shuffle instructions, e.g. turning:
+The FP stackifier should handle simple permutates to reduce number of shuffle
+instructions, e.g. turning:
fld P -> fld Q
fld Q fld P
diff --git a/lib/Target/X86/README-SSE.txt b/lib/Target/X86/README-SSE.txt
index b6aba93f3738..f96b22f1e204 100644
--- a/lib/Target/X86/README-SSE.txt
+++ b/lib/Target/X86/README-SSE.txt
@@ -2,8 +2,46 @@
// Random ideas for the X86 backend: SSE-specific stuff.
//===---------------------------------------------------------------------===//
-- Consider eliminating the unaligned SSE load intrinsics, replacing them with
- unaligned LLVM load instructions.
+//===---------------------------------------------------------------------===//
+
+SSE Variable shift can be custom lowered to something like this, which uses a
+small table + unaligned load + shuffle instead of going through memory.
+
+__m128i_shift_right:
+ .byte 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
+ .byte -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1
+
+...
+__m128i shift_right(__m128i value, unsigned long offset) {
+ return _mm_shuffle_epi8(value,
+ _mm_loadu_si128((__m128 *) (___m128i_shift_right + offset)));
+}
+
+//===---------------------------------------------------------------------===//
+
+SSE has instructions for doing operations on complex numbers, we should pattern
+match them. Compiling this:
+
+_Complex float f32(_Complex float A, _Complex float B) {
+ return A+B;
+}
+
+into:
+
+_f32:
+ movdqa %xmm0, %xmm2
+ addss %xmm1, %xmm2
+ pshufd $16, %xmm2, %xmm2
+ pshufd $1, %xmm1, %xmm1
+ pshufd $1, %xmm0, %xmm0
+ addss %xmm1, %xmm0
+ pshufd $16, %xmm0, %xmm1
+ movdqa %xmm2, %xmm0
+ unpcklps %xmm1, %xmm0
+ ret
+
+seems silly.
+
//===---------------------------------------------------------------------===//
diff --git a/lib/Target/X86/README.txt b/lib/Target/X86/README.txt
index efc0cd82f23e..a305ae6ec550 100644
--- a/lib/Target/X86/README.txt
+++ b/lib/Target/X86/README.txt
@@ -1135,13 +1135,6 @@ void test(double *P) {
//===---------------------------------------------------------------------===//
-handling llvm.memory.barrier on pre SSE2 cpus
-
-should generate:
-lock ; mov %esp, %esp
-
-//===---------------------------------------------------------------------===//
-
The generated code on x86 for checking for signed overflow on a multiply the
obvious way is much longer than it needs to be.
@@ -1870,3 +1863,100 @@ The code produced by gcc is 3 bytes shorter. This sort of construct often
shows up with bitfields.
//===---------------------------------------------------------------------===//
+
+Take the following C code:
+int f(int a, int b) { return (unsigned char)a == (unsigned char)b; }
+
+We generate the following IR with clang:
+define i32 @f(i32 %a, i32 %b) nounwind readnone {
+entry:
+ %tmp = xor i32 %b, %a ; <i32> [#uses=1]
+ %tmp6 = and i32 %tmp, 255 ; <i32> [#uses=1]
+ %cmp = icmp eq i32 %tmp6, 0 ; <i1> [#uses=1]
+ %conv5 = zext i1 %cmp to i32 ; <i32> [#uses=1]
+ ret i32 %conv5
+}
+
+And the following x86 code:
+ xorl %esi, %edi
+ testb $-1, %dil
+ sete %al
+ movzbl %al, %eax
+ ret
+
+A cmpb instead of the xorl+testb would be one instruction shorter.
+
+//===---------------------------------------------------------------------===//
+
+Given the following C code:
+int f(int a, int b) { return (signed char)a == (signed char)b; }
+
+We generate the following IR with clang:
+define i32 @f(i32 %a, i32 %b) nounwind readnone {
+entry:
+ %sext = shl i32 %a, 24 ; <i32> [#uses=1]
+ %conv1 = ashr i32 %sext, 24 ; <i32> [#uses=1]
+ %sext6 = shl i32 %b, 24 ; <i32> [#uses=1]
+ %conv4 = ashr i32 %sext6, 24 ; <i32> [#uses=1]
+ %cmp = icmp eq i32 %conv1, %conv4 ; <i1> [#uses=1]
+ %conv5 = zext i1 %cmp to i32 ; <i32> [#uses=1]
+ ret i32 %conv5
+}
+
+And the following x86 code:
+ movsbl %sil, %eax
+ movsbl %dil, %ecx
+ cmpl %eax, %ecx
+ sete %al
+ movzbl %al, %eax
+ ret
+
+
+It should be possible to eliminate the sign extensions.
+
+//===---------------------------------------------------------------------===//
+
+LLVM misses a load+store narrowing opportunity in this code:
+
+%struct.bf = type { i64, i16, i16, i32 }
+
+@bfi = external global %struct.bf* ; <%struct.bf**> [#uses=2]
+
+define void @t1() nounwind ssp {
+entry:
+ %0 = load %struct.bf** @bfi, align 8 ; <%struct.bf*> [#uses=1]
+ %1 = getelementptr %struct.bf* %0, i64 0, i32 1 ; <i16*> [#uses=1]
+ %2 = bitcast i16* %1 to i32* ; <i32*> [#uses=2]
+ %3 = load i32* %2, align 1 ; <i32> [#uses=1]
+ %4 = and i32 %3, -65537 ; <i32> [#uses=1]
+ store i32 %4, i32* %2, align 1
+ %5 = load %struct.bf** @bfi, align 8 ; <%struct.bf*> [#uses=1]
+ %6 = getelementptr %struct.bf* %5, i64 0, i32 1 ; <i16*> [#uses=1]
+ %7 = bitcast i16* %6 to i32* ; <i32*> [#uses=2]
+ %8 = load i32* %7, align 1 ; <i32> [#uses=1]
+ %9 = and i32 %8, -131073 ; <i32> [#uses=1]
+ store i32 %9, i32* %7, align 1
+ ret void
+}
+
+LLVM currently emits this:
+
+ movq bfi(%rip), %rax
+ andl $-65537, 8(%rax)
+ movq bfi(%rip), %rax
+ andl $-131073, 8(%rax)
+ ret
+
+It could narrow the loads and stores to emit this:
+
+ movq bfi(%rip), %rax
+ andb $-2, 10(%rax)
+ movq bfi(%rip), %rax
+ andb $-3, 10(%rax)
+ ret
+
+The trouble is that there is a TokenFactor between the store and the
+load, making it non-trivial to determine if there's anything between
+the load and the store which would prohibit narrowing.
+
+//===---------------------------------------------------------------------===//
diff --git a/lib/Target/X86/SSEDomainFix.cpp b/lib/Target/X86/SSEDomainFix.cpp
index dab070e1febd..13680c592e01 100644
--- a/lib/Target/X86/SSEDomainFix.cpp
+++ b/lib/Target/X86/SSEDomainFix.cpp
@@ -115,7 +115,7 @@ class SSEDomainFixPass : public MachineFunctionPass {
unsigned Distance;
public:
- SSEDomainFixPass() : MachineFunctionPass(&ID) {}
+ SSEDomainFixPass() : MachineFunctionPass(ID) {}
virtual void getAnalysisUsage(AnalysisUsage &AU) const {
AU.setPreservesAll();
diff --git a/lib/Target/X86/X86.h b/lib/Target/X86/X86.h
index 677781d3730e..27e88505150b 100644
--- a/lib/Target/X86/X86.h
+++ b/lib/Target/X86/X86.h
@@ -49,11 +49,6 @@ FunctionPass *createX86FloatingPointStackifierPass();
/// crossings.
FunctionPass *createSSEDomainFixPass();
-/// createX87FPRegKillInserterPass - This function returns a pass which
-/// inserts FP_REG_KILL instructions where needed.
-///
-FunctionPass *createX87FPRegKillInserterPass();
-
/// createX86CodeEmitterPass - Return a pass that emits the collected X86 code
/// to the specified MCE object.
FunctionPass *createX86JITCodeEmitterPass(X86TargetMachine &TM,
diff --git a/lib/Target/X86/X86.td b/lib/Target/X86/X86.td
index a53f973c1c43..a19f1acffaca 100644
--- a/lib/Target/X86/X86.td
+++ b/lib/Target/X86/X86.td
@@ -67,6 +67,8 @@ def FeatureSSE4A : SubtargetFeature<"sse4a", "HasSSE4A", "true",
def FeatureAVX : SubtargetFeature<"avx", "HasAVX", "true",
"Enable AVX instructions">;
+def FeatureCLMUL : SubtargetFeature<"clmul", "HasCLMUL", "true",
+ "Enable carry-less multiplication instructions">;
def FeatureFMA3 : SubtargetFeature<"fma3", "HasFMA3", "true",
"Enable three-operand fused multiple-add">;
def FeatureFMA4 : SubtargetFeature<"fma4", "HasFMA4", "true",
@@ -180,8 +182,6 @@ include "X86CallingConv.td"
// Currently the X86 assembly parser only supports ATT syntax.
def ATTAsmParser : AsmParser {
string AsmParserClassName = "ATTAsmParser";
- string AsmParserInstCleanup = "InstructionCleanup";
- string MatchInstructionName = "MatchInstructionImpl";
int Variant = 0;
// Discard comments in assembly strings.
diff --git a/lib/Target/X86/X86AsmBackend.cpp b/lib/Target/X86/X86AsmBackend.cpp
index 2cf65c11f94a..69dc967f9d88 100644
--- a/lib/Target/X86/X86AsmBackend.cpp
+++ b/lib/Target/X86/X86AsmBackend.cpp
@@ -11,9 +11,11 @@
#include "X86.h"
#include "X86FixupKinds.h"
#include "llvm/ADT/Twine.h"
+#include "llvm/MC/ELFObjectWriter.h"
#include "llvm/MC/MCAssembler.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCObjectWriter.h"
+#include "llvm/MC/MCSectionCOFF.h"
#include "llvm/MC/MCSectionELF.h"
#include "llvm/MC/MCSectionMachO.h"
#include "llvm/MC/MachObjectWriter.h"
@@ -190,10 +192,6 @@ public:
HasScatteredSymbols = true;
}
- MCObjectWriter *createObjectWriter(raw_ostream &OS) const {
- return 0;
- }
-
bool isVirtualSection(const MCSection &Section) const {
const MCSectionELF &SE = static_cast<const MCSectionELF&>(Section);
return SE.getType() == MCSectionELF::SHT_NOBITS;;
@@ -204,12 +202,43 @@ class ELFX86_32AsmBackend : public ELFX86AsmBackend {
public:
ELFX86_32AsmBackend(const Target &T)
: ELFX86AsmBackend(T) {}
+
+ MCObjectWriter *createObjectWriter(raw_ostream &OS) const {
+ return new ELFObjectWriter(OS, /*Is64Bit=*/false,
+ /*IsLittleEndian=*/true,
+ /*HasRelocationAddend=*/false);
+ }
};
class ELFX86_64AsmBackend : public ELFX86AsmBackend {
public:
ELFX86_64AsmBackend(const Target &T)
: ELFX86AsmBackend(T) {}
+
+ MCObjectWriter *createObjectWriter(raw_ostream &OS) const {
+ return new ELFObjectWriter(OS, /*Is64Bit=*/true,
+ /*IsLittleEndian=*/true,
+ /*HasRelocationAddend=*/true);
+ }
+};
+
+class WindowsX86AsmBackend : public X86AsmBackend {
+ bool Is64Bit;
+public:
+ WindowsX86AsmBackend(const Target &T, bool is64Bit)
+ : X86AsmBackend(T)
+ , Is64Bit(is64Bit) {
+ HasScatteredSymbols = true;
+ }
+
+ MCObjectWriter *createObjectWriter(raw_ostream &OS) const {
+ return createWinCOFFObjectWriter(OS, Is64Bit);
+ }
+
+ bool isVirtualSection(const MCSection &Section) const {
+ const MCSectionCOFF &SE = static_cast<const MCSectionCOFF&>(Section);
+ return SE.getCharacteristics() & COFF::IMAGE_SCN_CNT_UNINITIALIZED_DATA;
+ }
};
class DarwinX86AsmBackend : public X86AsmBackend {
@@ -290,6 +319,10 @@ TargetAsmBackend *llvm::createX86_32AsmBackend(const Target &T,
switch (Triple(TT).getOS()) {
case Triple::Darwin:
return new DarwinX86_32AsmBackend(T);
+ case Triple::MinGW32:
+ case Triple::Cygwin:
+ case Triple::Win32:
+ return new WindowsX86AsmBackend(T, false);
default:
return new ELFX86_32AsmBackend(T);
}
@@ -300,6 +333,10 @@ TargetAsmBackend *llvm::createX86_64AsmBackend(const Target &T,
switch (Triple(TT).getOS()) {
case Triple::Darwin:
return new DarwinX86_64AsmBackend(T);
+ case Triple::MinGW64:
+ case Triple::Cygwin:
+ case Triple::Win32:
+ return new WindowsX86AsmBackend(T, true);
default:
return new ELFX86_64AsmBackend(T);
}
diff --git a/lib/Target/X86/AsmPrinter/X86AsmPrinter.cpp b/lib/Target/X86/X86AsmPrinter.cpp
index 08e6486d5b7a..20110ad788cd 100644
--- a/lib/Target/X86/AsmPrinter/X86AsmPrinter.cpp
+++ b/lib/Target/X86/X86AsmPrinter.cpp
@@ -13,8 +13,8 @@
//===----------------------------------------------------------------------===//
#include "X86AsmPrinter.h"
-#include "X86ATTInstPrinter.h"
-#include "X86IntelInstPrinter.h"
+#include "AsmPrinter/X86ATTInstPrinter.h"
+#include "AsmPrinter/X86IntelInstPrinter.h"
#include "X86MCInstLower.h"
#include "X86.h"
#include "X86COFFMachineModuleInfo.h"
@@ -24,6 +24,7 @@
#include "llvm/DerivedTypes.h"
#include "llvm/Module.h"
#include "llvm/Type.h"
+#include "llvm/Analysis/DebugInfo.h"
#include "llvm/Assembly/Writer.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCContext.h"
@@ -35,6 +36,7 @@
#include "llvm/CodeGen/MachineModuleInfoImpls.h"
#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
#include "llvm/Support/COFF.h"
+#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Target/Mangler.h"
#include "llvm/Target/TargetOptions.h"
@@ -218,6 +220,10 @@ void X86AsmPrinter::print_pcrel_imm(const MachineInstr *MI, unsigned OpNo,
const MachineOperand &MO = MI->getOperand(OpNo);
switch (MO.getType()) {
default: llvm_unreachable("Unknown pcrel immediate operand");
+ case MachineOperand::MO_Register:
+ // pc-relativeness was handled when computing the value in the reg.
+ printOperand(MI, OpNo, O);
+ return;
case MachineOperand::MO_Immediate:
O << MO.getImm();
return;
@@ -655,6 +661,47 @@ void X86AsmPrinter::EmitEndOfAsmFile(Module &M) {
}
}
+MachineLocation
+X86AsmPrinter::getDebugValueLocation(const MachineInstr *MI) const {
+ MachineLocation Location;
+ assert (MI->getNumOperands() == 7 && "Invalid no. of machine operands!");
+ // Frame address. Currently handles register +- offset only.
+
+ if (MI->getOperand(0).isReg() && MI->getOperand(3).isImm())
+ Location.set(MI->getOperand(0).getReg(), MI->getOperand(3).getImm());
+ else {
+ DEBUG(dbgs() << "DBG_VALUE instruction ignored! " << *MI << "\n");
+ }
+ return Location;
+}
+
+void X86AsmPrinter::PrintDebugValueComment(const MachineInstr *MI,
+ raw_ostream &O) {
+ // Only the target-dependent form of DBG_VALUE should get here.
+ // Referencing the offset and metadata as NOps-2 and NOps-1 is
+ // probably portable to other targets; frame pointer location is not.
+ unsigned NOps = MI->getNumOperands();
+ assert(NOps==7);
+ O << '\t' << MAI->getCommentString() << "DEBUG_VALUE: ";
+ // cast away const; DIetc do not take const operands for some reason.
+ DIVariable V(const_cast<MDNode *>(MI->getOperand(NOps-1).getMetadata()));
+ if (V.getContext().isSubprogram())
+ O << DISubprogram(V.getContext()).getDisplayName() << ":";
+ O << V.getName();
+ O << " <- ";
+ // Frame address. Currently handles register +- offset only.
+ O << '[';
+ if (MI->getOperand(0).isReg() && MI->getOperand(0).getReg())
+ printOperand(MI, 0, O);
+ else
+ O << "undef";
+ O << '+'; printOperand(MI, 3, O);
+ O << ']';
+ O << "+";
+ printOperand(MI, NOps-2, O);
+}
+
+
//===----------------------------------------------------------------------===//
// Target Registry Stuff
diff --git a/lib/Target/X86/AsmPrinter/X86AsmPrinter.h b/lib/Target/X86/X86AsmPrinter.h
index b5a7f8dc321a..e61be66c75a2 100644
--- a/lib/Target/X86/AsmPrinter/X86AsmPrinter.h
+++ b/lib/Target/X86/X86AsmPrinter.h
@@ -14,9 +14,9 @@
#ifndef X86ASMPRINTER_H
#define X86ASMPRINTER_H
-#include "../X86.h"
-#include "../X86MachineFunctionInfo.h"
-#include "../X86TargetMachine.h"
+#include "X86.h"
+#include "X86MachineFunctionInfo.h"
+#include "X86TargetMachine.h"
#include "llvm/ADT/StringSet.h"
#include "llvm/CodeGen/AsmPrinter.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
diff --git a/lib/Target/X86/X86CallingConv.td b/lib/Target/X86/X86CallingConv.td
index a6a1e4e573cf..e3409effc318 100644
--- a/lib/Target/X86/X86CallingConv.td
+++ b/lib/Target/X86/X86CallingConv.td
@@ -33,13 +33,19 @@ def RetCC_X86Common : CallingConv<[
CCIfType<[i16], CCAssignToReg<[AX, DX]>>,
CCIfType<[i32], CCAssignToReg<[EAX, EDX]>>,
CCIfType<[i64], CCAssignToReg<[RAX, RDX]>>,
-
- // Vector types are returned in XMM0 and XMM1, when they fit. XMMM2 and XMM3
+
+ // Vector types are returned in XMM0 and XMM1, when they fit. XMM2 and XMM3
// can only be used by ABI non-compliant code. If the target doesn't have XMM
// registers, it won't have vector types.
CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],
CCAssignToReg<[XMM0,XMM1,XMM2,XMM3]>>,
+ // 256-bit vectors are returned in YMM0 and XMM1, when they fit. YMM2 and YMM3
+ // can only be used by ABI non-compliant code. This vector type is only
+ // supported while using the AVX target feature.
+ CCIfType<[v32i8, v16i16, v8i32, v4i64, v8f32, v4f64],
+ CCIfSubtarget<"hasAVX()", CCAssignToReg<[YMM0,YMM1,YMM2,YMM3]>>>,
+
// MMX vector types are always returned in MM0. If the target doesn't have
// MM0, it doesn't support these vector types.
CCIfType<[v8i8, v4i16, v2i32, v1i64], CCAssignToReg<[MM0]>>,
@@ -164,11 +170,16 @@ def CC_X86_64_C : CallingConv<[
CCIfType<[f32, f64, v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],
CCIfSubtarget<"hasSSE1()",
CCAssignToReg<[XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7]>>>,
-
+
+ // The first 8 256-bit vector arguments are passed in YMM registers.
+ CCIfType<[v32i8, v16i16, v8i32, v4i64, v8f32, v4f64],
+ CCIfSubtarget<"hasAVX()",
+ CCAssignToReg<[YMM0, YMM1, YMM2, YMM3, YMM4, YMM5, YMM6, YMM7]>>>,
+
// Integer/FP values get stored in stack slots that are 8 bytes in size and
// 8-byte aligned if there are no more registers to hold them.
CCIfType<[i32, i64, f32, f64], CCAssignToStack<8, 8>>,
-
+
// Long doubles get stack slots whose size and alignment depends on the
// subtarget.
CCIfType<[f80], CCAssignToStack<0, 0>>,
@@ -176,6 +187,10 @@ def CC_X86_64_C : CallingConv<[
// Vectors get 16-byte stack slots that are 16-byte aligned.
CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], CCAssignToStack<16, 16>>,
+ // 256-bit vectors get 32-byte stack slots that are 32-byte aligned.
+ CCIfType<[v32i8, v16i16, v8i32, v4i64, v8f32, v4f64],
+ CCAssignToStack<32, 32>>,
+
// __m64 vectors get 8-byte stack slots that are 8-byte aligned.
CCIfType<[v8i8, v4i16, v2i32, v1i64], CCAssignToStack<8, 8>>
]>;
@@ -271,9 +286,18 @@ def CC_X86_32_Common : CallingConv<[
CCIfNotVarArg<CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],
CCAssignToReg<[XMM0, XMM1, XMM2, XMM3]>>>,
+ // The first 4 AVX 256-bit vector arguments are passed in YMM registers.
+ CCIfNotVarArg<CCIfType<[v32i8, v16i16, v8i32, v4i64, v8f32, v4f64],
+ CCIfSubtarget<"hasAVX()",
+ CCAssignToReg<[YMM0, YMM1, YMM2, YMM3]>>>>,
+
// Other SSE vectors get 16-byte stack slots that are 16-byte aligned.
CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], CCAssignToStack<16, 16>>,
+ // 256-bit AVX vectors get 32-byte stack slots that are 32-byte aligned.
+ CCIfType<[v32i8, v16i16, v8i32, v4i64, v8f32, v4f64],
+ CCAssignToStack<32, 32>>,
+
// __m64 vectors get 8-byte stack slots that are 4-byte aligned. They are
// passed in the parameter area.
CCIfType<[v8i8, v4i16, v2i32, v1i64], CCAssignToStack<8, 4>>]>;
diff --git a/lib/Target/X86/X86CodeEmitter.cpp b/lib/Target/X86/X86CodeEmitter.cpp
index f13669bd741d..824021c0c882 100644
--- a/lib/Target/X86/X86CodeEmitter.cpp
+++ b/lib/Target/X86/X86CodeEmitter.cpp
@@ -53,12 +53,12 @@ namespace {
public:
static char ID;
explicit Emitter(X86TargetMachine &tm, CodeEmitter &mce)
- : MachineFunctionPass(&ID), II(0), TD(0), TM(tm),
+ : MachineFunctionPass(ID), II(0), TD(0), TM(tm),
MCE(mce), PICBaseOffset(0), Is64BitMode(false),
IsPIC(TM.getRelocationModel() == Reloc::PIC_) {}
Emitter(X86TargetMachine &tm, CodeEmitter &mce,
const X86InstrInfo &ii, const TargetData &td, bool is64)
- : MachineFunctionPass(&ID), II(&ii), TD(&td), TM(tm),
+ : MachineFunctionPass(ID), II(&ii), TD(&td), TM(tm),
MCE(mce), PICBaseOffset(0), Is64BitMode(is64),
IsPIC(TM.getRelocationModel() == Reloc::PIC_) {}
@@ -146,6 +146,103 @@ bool Emitter<CodeEmitter>::runOnMachineFunction(MachineFunction &MF) {
return false;
}
+/// determineREX - Determine if the MachineInstr has to be encoded with a X86-64
+/// REX prefix which specifies 1) 64-bit instructions, 2) non-default operand
+/// size, and 3) use of X86-64 extended registers.
+static unsigned determineREX(const MachineInstr &MI) {
+ unsigned REX = 0;
+ const TargetInstrDesc &Desc = MI.getDesc();
+
+ // Pseudo instructions do not need REX prefix byte.
+ if ((Desc.TSFlags & X86II::FormMask) == X86II::Pseudo)
+ return 0;
+ if (Desc.TSFlags & X86II::REX_W)
+ REX |= 1 << 3;
+
+ unsigned NumOps = Desc.getNumOperands();
+ if (NumOps) {
+ bool isTwoAddr = NumOps > 1 &&
+ Desc.getOperandConstraint(1, TOI::TIED_TO) != -1;
+
+ // If it accesses SPL, BPL, SIL, or DIL, then it requires a 0x40 REX prefix.
+ unsigned i = isTwoAddr ? 1 : 0;
+ for (unsigned e = NumOps; i != e; ++i) {
+ const MachineOperand& MO = MI.getOperand(i);
+ if (MO.isReg()) {
+ unsigned Reg = MO.getReg();
+ if (X86InstrInfo::isX86_64NonExtLowByteReg(Reg))
+ REX |= 0x40;
+ }
+ }
+
+ switch (Desc.TSFlags & X86II::FormMask) {
+ case X86II::MRMInitReg:
+ if (X86InstrInfo::isX86_64ExtendedReg(MI.getOperand(0)))
+ REX |= (1 << 0) | (1 << 2);
+ break;
+ case X86II::MRMSrcReg: {
+ if (X86InstrInfo::isX86_64ExtendedReg(MI.getOperand(0)))
+ REX |= 1 << 2;
+ i = isTwoAddr ? 2 : 1;
+ for (unsigned e = NumOps; i != e; ++i) {
+ const MachineOperand& MO = MI.getOperand(i);
+ if (X86InstrInfo::isX86_64ExtendedReg(MO))
+ REX |= 1 << 0;
+ }
+ break;
+ }
+ case X86II::MRMSrcMem: {
+ if (X86InstrInfo::isX86_64ExtendedReg(MI.getOperand(0)))
+ REX |= 1 << 2;
+ unsigned Bit = 0;
+ i = isTwoAddr ? 2 : 1;
+ for (; i != NumOps; ++i) {
+ const MachineOperand& MO = MI.getOperand(i);
+ if (MO.isReg()) {
+ if (X86InstrInfo::isX86_64ExtendedReg(MO))
+ REX |= 1 << Bit;
+ Bit++;
+ }
+ }
+ break;
+ }
+ case X86II::MRM0m: case X86II::MRM1m:
+ case X86II::MRM2m: case X86II::MRM3m:
+ case X86II::MRM4m: case X86II::MRM5m:
+ case X86II::MRM6m: case X86II::MRM7m:
+ case X86II::MRMDestMem: {
+ unsigned e = (isTwoAddr ? X86::AddrNumOperands+1 : X86::AddrNumOperands);
+ i = isTwoAddr ? 1 : 0;
+ if (NumOps > e && X86InstrInfo::isX86_64ExtendedReg(MI.getOperand(e)))
+ REX |= 1 << 2;
+ unsigned Bit = 0;
+ for (; i != e; ++i) {
+ const MachineOperand& MO = MI.getOperand(i);
+ if (MO.isReg()) {
+ if (X86InstrInfo::isX86_64ExtendedReg(MO))
+ REX |= 1 << Bit;
+ Bit++;
+ }
+ }
+ break;
+ }
+ default: {
+ if (X86InstrInfo::isX86_64ExtendedReg(MI.getOperand(0)))
+ REX |= 1 << 0;
+ i = isTwoAddr ? 2 : 1;
+ for (unsigned e = NumOps; i != e; ++i) {
+ const MachineOperand& MO = MI.getOperand(i);
+ if (X86InstrInfo::isX86_64ExtendedReg(MO))
+ REX |= 1 << 2;
+ }
+ break;
+ }
+ }
+ }
+ return REX;
+}
+
+
/// emitPCRelativeBlockAddress - This method keeps track of the information
/// necessary to resolve the address of this block later and emits a dummy
/// value.
@@ -569,7 +666,7 @@ void Emitter<CodeEmitter>::emitInstruction(const MachineInstr &MI,
// Handle REX prefix.
if (Is64BitMode) {
- if (unsigned REX = X86InstrInfo::determineREX(MI))
+ if (unsigned REX = determineREX(MI))
MCE.emitByte(0x40 | REX);
}
@@ -605,24 +702,29 @@ void Emitter<CodeEmitter>::emitInstruction(const MachineInstr &MI,
// base address.
switch (Opcode) {
default:
- llvm_unreachable("psuedo instructions should be removed before code"
+ llvm_unreachable("pseudo instructions should be removed before code"
" emission");
break;
+ // Do nothing for Int_MemBarrier - it's just a comment. Add a debug
+ // to make it slightly easier to see.
+ case X86::Int_MemBarrier:
+ DEBUG(dbgs() << "#MEMBARRIER\n");
+ break;
+
case TargetOpcode::INLINEASM:
// We allow inline assembler nodes with empty bodies - they can
// implicitly define registers, which is ok for JIT.
if (MI.getOperand(0).getSymbolName()[0])
report_fatal_error("JIT does not support inline asm!");
break;
- case TargetOpcode::DBG_LABEL:
+ case TargetOpcode::PROLOG_LABEL:
case TargetOpcode::GC_LABEL:
case TargetOpcode::EH_LABEL:
MCE.emitLabel(MI.getOperand(0).getMCSymbol());
break;
-
+
case TargetOpcode::IMPLICIT_DEF:
case TargetOpcode::KILL:
- case X86::FP_REG_KILL:
break;
case X86::MOVPC32r: {
// This emits the "call" portion of this pseudo instruction.
@@ -674,7 +776,8 @@ void Emitter<CodeEmitter>::emitInstruction(const MachineInstr &MI,
}
assert(MO.isImm() && "Unknown RawFrm operand!");
- if (Opcode == X86::CALLpcrel32 || Opcode == X86::CALL64pcrel32) {
+ if (Opcode == X86::CALLpcrel32 || Opcode == X86::CALL64pcrel32 ||
+ Opcode == X86::WINCALL64pcrel32) {
// Fix up immediate operand for pc relative calls.
intptr_t Imm = (intptr_t)MO.getImm();
Imm = Imm - MCE.getCurrentPCValue() - 4;
diff --git a/lib/Target/X86/X86FastISel.cpp b/lib/Target/X86/X86FastISel.cpp
index ce1370763b77..0c70eec4827f 100644
--- a/lib/Target/X86/X86FastISel.cpp
+++ b/lib/Target/X86/X86FastISel.cpp
@@ -960,9 +960,11 @@ bool X86FastISel::X86SelectBranch(const Instruction *I) {
MachineBasicBlock *TrueMBB = FuncInfo.MBBMap[BI->getSuccessor(0)];
MachineBasicBlock *FalseMBB = FuncInfo.MBBMap[BI->getSuccessor(1)];
- // Fold the common case of a conditional branch with a comparison.
+ // Fold the common case of a conditional branch with a comparison
+ // in the same block (values defined on other blocks may not have
+ // initialized registers).
if (const CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition())) {
- if (CI->hasOneUse()) {
+ if (CI->hasOneUse() && CI->getParent() == I->getParent()) {
EVT VT = TLI.getValueType(CI->getOperand(0)->getType());
// Try to take advantage of fallthrough opportunities.
@@ -1058,10 +1060,8 @@ bool X86FastISel::X86SelectBranch(const Instruction *I) {
const MachineInstr &MI = *RI;
if (MI.definesRegister(Reg)) {
- unsigned Src, Dst, SrcSR, DstSR;
-
- if (getInstrInfo()->isMoveInstr(MI, Src, Dst, SrcSR, DstSR)) {
- Reg = Src;
+ if (MI.isCopy()) {
+ Reg = MI.getOperand(1).getReg();
continue;
}
@@ -1648,15 +1648,26 @@ bool X86FastISel::X86SelectCall(const Instruction *I) {
MachineInstrBuilder MIB;
if (CalleeOp) {
// Register-indirect call.
- unsigned CallOpc = Subtarget->is64Bit() ? X86::CALL64r : X86::CALL32r;
+ unsigned CallOpc;
+ if (Subtarget->isTargetWin64())
+ CallOpc = X86::WINCALL64r;
+ else if (Subtarget->is64Bit())
+ CallOpc = X86::CALL64r;
+ else
+ CallOpc = X86::CALL32r;
MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(CallOpc))
.addReg(CalleeOp);
} else {
// Direct call.
assert(GV && "Not a direct call");
- unsigned CallOpc =
- Subtarget->is64Bit() ? X86::CALL64pcrel32 : X86::CALLpcrel32;
+ unsigned CallOpc;
+ if (Subtarget->isTargetWin64())
+ CallOpc = X86::WINCALL64pcrel32;
+ else if (Subtarget->is64Bit())
+ CallOpc = X86::CALL64pcrel32;
+ else
+ CallOpc = X86::CALLpcrel32;
// See if we need any target-specific flags on the GV operand.
unsigned char OpFlags = 0;
diff --git a/lib/Target/X86/X86FloatingPoint.cpp b/lib/Target/X86/X86FloatingPoint.cpp
index cee4ad70201a..e6ebf669587d 100644
--- a/lib/Target/X86/X86FloatingPoint.cpp
+++ b/lib/Target/X86/X86FloatingPoint.cpp
@@ -8,23 +8,18 @@
//===----------------------------------------------------------------------===//
//
// This file defines the pass which converts floating point instructions from
-// virtual registers into register stack instructions. This pass uses live
+// pseudo registers into register stack instructions. This pass uses live
// variable information to indicate where the FPn registers are used and their
// lifetimes.
//
-// This pass is hampered by the lack of decent CFG manipulation routines for
-// machine code. In particular, this wants to be able to split critical edges
-// as necessary, traverse the machine basic block CFG in depth-first order, and
-// allow there to be multiple machine basic blocks for each LLVM basicblock
-// (needed for critical edge splitting).
+// The x87 hardware tracks liveness of the stack registers, so it is necessary
+// to implement exact liveness tracking between basic blocks. The CFG edges are
+// partitioned into bundles where the same FP registers must be live in
+// identical stack positions. Instructions are inserted at the end of each basic
+// block to rearrange the live registers to match the outgoing bundle.
//
-// In particular, this pass currently barfs on critical edges. Because of this,
-// it requires the instruction selector to insert FP_REG_KILL instructions on
-// the exits of any basic block that has critical edges going from it, or which
-// branch to a critical basic block.
-//
-// FIXME: this is not implemented yet. The stackifier pass only works on local
-// basic blocks.
+// This approach avoids splitting critical edges at the potential cost of more
+// live register shuffling instructions when critical edges are present.
//
//===----------------------------------------------------------------------===//
@@ -32,6 +27,7 @@
#include "X86.h"
#include "X86InstrInfo.h"
#include "llvm/ADT/DepthFirstIterator.h"
+#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
@@ -54,7 +50,12 @@ STATISTIC(NumFP , "Number of floating point instructions");
namespace {
struct FPS : public MachineFunctionPass {
static char ID;
- FPS() : MachineFunctionPass(&ID) {}
+ FPS() : MachineFunctionPass(ID) {
+ // This is really only to keep valgrind quiet.
+ // The logic in isLive() is too much for it.
+ memset(Stack, 0, sizeof(Stack));
+ memset(RegMap, 0, sizeof(RegMap));
+ }
virtual void getAnalysisUsage(AnalysisUsage &AU) const {
AU.setPreservesCFG();
@@ -69,11 +70,71 @@ namespace {
private:
const TargetInstrInfo *TII; // Machine instruction info.
+
+ // Two CFG edges are related if they leave the same block, or enter the same
+ // block. The transitive closure of an edge under this relation is a
+ // LiveBundle. It represents a set of CFG edges where the live FP stack
+ // registers must be allocated identically in the x87 stack.
+ //
+ // A LiveBundle is usually all the edges leaving a block, or all the edges
+ // entering a block, but it can contain more edges if critical edges are
+ // present.
+ //
+ // The set of live FP registers in a LiveBundle is calculated by bundleCFG,
+ // but the exact mapping of FP registers to stack slots is fixed later.
+ struct LiveBundle {
+ // Bit mask of live FP registers. Bit 0 = FP0, bit 1 = FP1, &c.
+ unsigned Mask;
+
+ // Number of pre-assigned live registers in FixStack. This is 0 when the
+ // stack order has not yet been fixed.
+ unsigned FixCount;
+
+ // Assigned stack order for live-in registers.
+ // FixStack[i] == getStackEntry(i) for all i < FixCount.
+ unsigned char FixStack[8];
+
+ LiveBundle(unsigned m = 0) : Mask(m), FixCount(0) {}
+
+ // Have the live registers been assigned a stack order yet?
+ bool isFixed() const { return !Mask || FixCount; }
+ };
+
+ // Numbered LiveBundle structs. LiveBundles[0] is used for all CFG edges
+ // with no live FP registers.
+ SmallVector<LiveBundle, 8> LiveBundles;
+
+ // Map each MBB in the current function to an (ingoing, outgoing) index into
+ // LiveBundles. Blocks with no FP registers live in or out map to (0, 0)
+ // and are not actually stored in the map.
+ DenseMap<MachineBasicBlock*, std::pair<unsigned, unsigned> > BlockBundle;
+
+ // Return a bitmask of FP registers in block's live-in list.
+ unsigned calcLiveInMask(MachineBasicBlock *MBB) {
+ unsigned Mask = 0;
+ for (MachineBasicBlock::livein_iterator I = MBB->livein_begin(),
+ E = MBB->livein_end(); I != E; ++I) {
+ unsigned Reg = *I - X86::FP0;
+ if (Reg < 8)
+ Mask |= 1 << Reg;
+ }
+ return Mask;
+ }
+
+ // Partition all the CFG edges into LiveBundles.
+ void bundleCFG(MachineFunction &MF);
+
MachineBasicBlock *MBB; // Current basic block
unsigned Stack[8]; // FP<n> Registers in each stack slot...
unsigned RegMap[8]; // Track which stack slot contains each register
unsigned StackTop; // The current top of the FP stack.
+ // Set up our stack model to match the incoming registers to MBB.
+ void setupBlockStack();
+
+ // Shuffle live registers to match the expectations of successor blocks.
+ void finishBlockStack();
+
void dumpStack() const {
dbgs() << "Stack contents:";
for (unsigned i = 0; i != StackTop; ++i) {
@@ -82,27 +143,36 @@ namespace {
}
dbgs() << "\n";
}
- private:
- /// isStackEmpty - Return true if the FP stack is empty.
- bool isStackEmpty() const {
- return StackTop == 0;
- }
-
- // getSlot - Return the stack slot number a particular register number is
- // in.
+
+ /// getSlot - Return the stack slot number a particular register number is
+ /// in.
unsigned getSlot(unsigned RegNo) const {
assert(RegNo < 8 && "Regno out of range!");
return RegMap[RegNo];
}
- // getStackEntry - Return the X86::FP<n> register in register ST(i).
+ /// isLive - Is RegNo currently live in the stack?
+ bool isLive(unsigned RegNo) const {
+ unsigned Slot = getSlot(RegNo);
+ return Slot < StackTop && Stack[Slot] == RegNo;
+ }
+
+ /// getScratchReg - Return an FP register that is not currently in use.
+ unsigned getScratchReg() {
+ for (int i = 7; i >= 0; --i)
+ if (!isLive(i))
+ return i;
+ llvm_unreachable("Ran out of scratch FP registers");
+ }
+
+ /// getStackEntry - Return the X86::FP<n> register in register ST(i).
unsigned getStackEntry(unsigned STi) const {
assert(STi < StackTop && "Access past stack top!");
return Stack[StackTop-1-STi];
}
- // getSTReg - Return the X86::ST(i) register which contains the specified
- // FP<RegNo> register.
+ /// getSTReg - Return the X86::ST(i) register which contains the specified
+ /// FP<RegNo> register.
unsigned getSTReg(unsigned RegNo) const {
return StackTop - 1 - getSlot(RegNo) + llvm::X86::ST0;
}
@@ -117,10 +187,9 @@ namespace {
bool isAtTop(unsigned RegNo) const { return getSlot(RegNo) == StackTop-1; }
void moveToTop(unsigned RegNo, MachineBasicBlock::iterator I) {
- MachineInstr *MI = I;
- DebugLoc dl = MI->getDebugLoc();
+ DebugLoc dl = I == MBB->end() ? DebugLoc() : I->getDebugLoc();
if (isAtTop(RegNo)) return;
-
+
unsigned STReg = getSTReg(RegNo);
unsigned RegOnTop = getStackEntry(0);
@@ -137,24 +206,37 @@ namespace {
}
void duplicateToTop(unsigned RegNo, unsigned AsReg, MachineInstr *I) {
- DebugLoc dl = I->getDebugLoc();
+ DebugLoc dl = I == MBB->end() ? DebugLoc() : I->getDebugLoc();
unsigned STReg = getSTReg(RegNo);
pushReg(AsReg); // New register on top of stack
BuildMI(*MBB, I, dl, TII->get(X86::LD_Frr)).addReg(STReg);
}
- // popStackAfter - Pop the current value off of the top of the FP stack
- // after the specified instruction.
+ /// popStackAfter - Pop the current value off of the top of the FP stack
+ /// after the specified instruction.
void popStackAfter(MachineBasicBlock::iterator &I);
- // freeStackSlotAfter - Free the specified register from the register stack,
- // so that it is no longer in a register. If the register is currently at
- // the top of the stack, we just pop the current instruction, otherwise we
- // store the current top-of-stack into the specified slot, then pop the top
- // of stack.
+ /// freeStackSlotAfter - Free the specified register from the register
+ /// stack, so that it is no longer in a register. If the register is
+ /// currently at the top of the stack, we just pop the current instruction,
+ /// otherwise we store the current top-of-stack into the specified slot,
+ /// then pop the top of stack.
void freeStackSlotAfter(MachineBasicBlock::iterator &I, unsigned Reg);
+ /// freeStackSlotBefore - Just the pop, no folding. Return the inserted
+ /// instruction.
+ MachineBasicBlock::iterator
+ freeStackSlotBefore(MachineBasicBlock::iterator I, unsigned FPRegNo);
+
+ /// Adjust the live registers to be the set in Mask.
+ void adjustLiveRegs(unsigned Mask, MachineBasicBlock::iterator I);
+
+ /// Shuffle the top FixCount stack entries susch that FP reg FixStack[0] is
+ /// st(0), FP reg FixStack[1] is st(1) etc.
+ void shuffleStackTop(const unsigned char *FixStack, unsigned FixCount,
+ MachineBasicBlock::iterator I);
+
bool processBasicBlock(MachineFunction &MF, MachineBasicBlock &MBB);
void handleZeroArgFP(MachineBasicBlock::iterator &I);
@@ -181,7 +263,6 @@ static unsigned getFPReg(const MachineOperand &MO) {
return Reg - X86::FP0;
}
-
/// runOnMachineFunction - Loop over all of the basic blocks, transforming FP
/// register references into FP stack references.
///
@@ -201,6 +282,10 @@ bool FPS::runOnMachineFunction(MachineFunction &MF) {
if (!FPIsUsed) return false;
TII = MF.getTarget().getInstrInfo();
+
+ // Prepare cross-MBB liveness.
+ bundleCFG(MF);
+
StackTop = 0;
// Process the function in depth first order so that we process at least one
@@ -215,16 +300,111 @@ bool FPS::runOnMachineFunction(MachineFunction &MF) {
Changed |= processBasicBlock(MF, **I);
// Process any unreachable blocks in arbitrary order now.
- if (MF.size() == Processed.size())
- return Changed;
+ if (MF.size() != Processed.size())
+ for (MachineFunction::iterator BB = MF.begin(), E = MF.end(); BB != E; ++BB)
+ if (Processed.insert(BB))
+ Changed |= processBasicBlock(MF, *BB);
+
+ BlockBundle.clear();
+ LiveBundles.clear();
- for (MachineFunction::iterator BB = MF.begin(), E = MF.end(); BB != E; ++BB)
- if (Processed.insert(BB))
- Changed |= processBasicBlock(MF, *BB);
-
return Changed;
}
+/// bundleCFG - Scan all the basic blocks to determine consistent live-in and
+/// live-out sets for the FP registers. Consistent means that the set of
+/// registers live-out from a block is identical to the live-in set of all
+/// successors. This is not enforced by the normal live-in lists since
+/// registers may be implicitly defined, or not used by all successors.
+void FPS::bundleCFG(MachineFunction &MF) {
+ assert(LiveBundles.empty() && "Stale data in LiveBundles");
+ assert(BlockBundle.empty() && "Stale data in BlockBundle");
+ SmallPtrSet<MachineBasicBlock*, 8> PropDown, PropUp;
+
+ // LiveBundle[0] is the empty live-in set.
+ LiveBundles.resize(1);
+
+ // First gather the actual live-in masks for all MBBs.
+ for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) {
+ MachineBasicBlock *MBB = I;
+ const unsigned Mask = calcLiveInMask(MBB);
+ if (!Mask)
+ continue;
+ // Ingoing bundle index.
+ unsigned &Idx = BlockBundle[MBB].first;
+ // Already assigned an ingoing bundle?
+ if (Idx)
+ continue;
+ // Allocate a new LiveBundle struct for this block's live-ins.
+ const unsigned BundleIdx = Idx = LiveBundles.size();
+ DEBUG(dbgs() << "Creating LB#" << BundleIdx << ": in:BB#"
+ << MBB->getNumber());
+ LiveBundles.push_back(Mask);
+ LiveBundle &Bundle = LiveBundles.back();
+
+ // Make sure all predecessors have the same live-out set.
+ PropUp.insert(MBB);
+
+ // Keep pushing liveness up and down the CFG until convergence.
+ // Only critical edges cause iteration here, but when they do, multiple
+ // blocks can be assigned to the same LiveBundle index.
+ do {
+ // Assign BundleIdx as liveout from predecessors in PropUp.
+ for (SmallPtrSet<MachineBasicBlock*, 16>::iterator I = PropUp.begin(),
+ E = PropUp.end(); I != E; ++I) {
+ MachineBasicBlock *MBB = *I;
+ for (MachineBasicBlock::const_pred_iterator LinkI = MBB->pred_begin(),
+ LinkE = MBB->pred_end(); LinkI != LinkE; ++LinkI) {
+ MachineBasicBlock *PredMBB = *LinkI;
+ // PredMBB's liveout bundle should be set to LIIdx.
+ unsigned &Idx = BlockBundle[PredMBB].second;
+ if (Idx) {
+ assert(Idx == BundleIdx && "Inconsistent CFG");
+ continue;
+ }
+ Idx = BundleIdx;
+ DEBUG(dbgs() << " out:BB#" << PredMBB->getNumber());
+ // Propagate to siblings.
+ if (PredMBB->succ_size() > 1)
+ PropDown.insert(PredMBB);
+ }
+ }
+ PropUp.clear();
+
+ // Assign BundleIdx as livein to successors in PropDown.
+ for (SmallPtrSet<MachineBasicBlock*, 16>::iterator I = PropDown.begin(),
+ E = PropDown.end(); I != E; ++I) {
+ MachineBasicBlock *MBB = *I;
+ for (MachineBasicBlock::const_succ_iterator LinkI = MBB->succ_begin(),
+ LinkE = MBB->succ_end(); LinkI != LinkE; ++LinkI) {
+ MachineBasicBlock *SuccMBB = *LinkI;
+ // LinkMBB's livein bundle should be set to BundleIdx.
+ unsigned &Idx = BlockBundle[SuccMBB].first;
+ if (Idx) {
+ assert(Idx == BundleIdx && "Inconsistent CFG");
+ continue;
+ }
+ Idx = BundleIdx;
+ DEBUG(dbgs() << " in:BB#" << SuccMBB->getNumber());
+ // Propagate to siblings.
+ if (SuccMBB->pred_size() > 1)
+ PropUp.insert(SuccMBB);
+ // Also accumulate the bundle liveness mask from the liveins here.
+ Bundle.Mask |= calcLiveInMask(SuccMBB);
+ }
+ }
+ PropDown.clear();
+ } while (!PropUp.empty());
+ DEBUG({
+ dbgs() << " live:";
+ for (unsigned i = 0; i < 8; ++i)
+ if (Bundle.Mask & (1<<i))
+ dbgs() << " %FP" << i;
+ dbgs() << '\n';
+ });
+ }
+}
+
/// processBasicBlock - Loop over all of the instructions in the basic block,
/// transforming FP instructions into their stack form.
///
@@ -232,10 +412,12 @@ bool FPS::processBasicBlock(MachineFunction &MF, MachineBasicBlock &BB) {
bool Changed = false;
MBB = &BB;
+ setupBlockStack();
+
for (MachineBasicBlock::iterator I = BB.begin(); I != BB.end(); ++I) {
MachineInstr *MI = I;
uint64_t Flags = MI->getDesc().TSFlags;
-
+
unsigned FPInstClass = Flags & X86II::FPTypeMask;
if (MI->isInlineAsm())
FPInstClass = X86II::SpecialFP;
@@ -302,10 +484,82 @@ bool FPS::processBasicBlock(MachineFunction &MF, MachineBasicBlock &BB) {
Changed = true;
}
- assert(isStackEmpty() && "Stack not empty at end of basic block?");
+ finishBlockStack();
+
return Changed;
}
+/// setupBlockStack - Use the BlockBundle map to set up our model of the stack
+/// to match predecessors' live out stack.
+void FPS::setupBlockStack() {
+ DEBUG(dbgs() << "\nSetting up live-ins for BB#" << MBB->getNumber()
+ << " derived from " << MBB->getName() << ".\n");
+ StackTop = 0;
+ const LiveBundle &Bundle = LiveBundles[BlockBundle.lookup(MBB).first];
+
+ if (!Bundle.Mask) {
+ DEBUG(dbgs() << "Block has no FP live-ins.\n");
+ return;
+ }
+
+ // Depth-first iteration should ensure that we always have an assigned stack.
+ assert(Bundle.isFixed() && "Reached block before any predecessors");
+
+ // Push the fixed live-in registers.
+ for (unsigned i = Bundle.FixCount; i > 0; --i) {
+ MBB->addLiveIn(X86::ST0+i-1);
+ DEBUG(dbgs() << "Live-in st(" << (i-1) << "): %FP"
+ << unsigned(Bundle.FixStack[i-1]) << '\n');
+ pushReg(Bundle.FixStack[i-1]);
+ }
+
+ // Kill off unwanted live-ins. This can happen with a critical edge.
+ // FIXME: We could keep these live registers around as zombies. They may need
+ // to be revived at the end of a short block. It might save a few instrs.
+ adjustLiveRegs(calcLiveInMask(MBB), MBB->begin());
+ DEBUG(MBB->dump());
+}
+
+/// finishBlockStack - Revive live-outs that are implicitly defined out of
+/// MBB. Shuffle live registers to match the expected fixed stack of any
+/// predecessors, and ensure that all predecessors are expecting the same
+/// stack.
+void FPS::finishBlockStack() {
+ // The RET handling below takes care of return blocks for us.
+ if (MBB->succ_empty())
+ return;
+
+ DEBUG(dbgs() << "Setting up live-outs for BB#" << MBB->getNumber()
+ << " derived from " << MBB->getName() << ".\n");
+
+ unsigned BundleIdx = BlockBundle.lookup(MBB).second;
+ LiveBundle &Bundle = LiveBundles[BundleIdx];
+
+ // We may need to kill and define some registers to match successors.
+ // FIXME: This can probably be combined with the shuffle below.
+ MachineBasicBlock::iterator Term = MBB->getFirstTerminator();
+ adjustLiveRegs(Bundle.Mask, Term);
+
+ if (!Bundle.Mask) {
+ DEBUG(dbgs() << "No live-outs.\n");
+ return;
+ }
+
+ // Has the stack order been fixed yet?
+ DEBUG(dbgs() << "LB#" << BundleIdx << ": ");
+ if (Bundle.isFixed()) {
+ DEBUG(dbgs() << "Shuffling stack to match.\n");
+ shuffleStackTop(Bundle.FixStack, Bundle.FixCount, Term);
+ } else {
+ // Not fixed yet, we get to choose.
+ DEBUG(dbgs() << "Fixing stack order now.\n");
+ Bundle.FixCount = StackTop;
+ for (unsigned i = 0; i < StackTop; ++i)
+ Bundle.FixStack[i] = getStackEntry(i);
+ }
+}
+
+
//===----------------------------------------------------------------------===//
// Efficient Lookup Table Support
//===----------------------------------------------------------------------===//
@@ -318,7 +572,7 @@ namespace {
friend bool operator<(const TableEntry &TE, unsigned V) {
return TE.from < V;
}
- friend bool operator<(unsigned V, const TableEntry &TE) {
+ friend bool ATTRIBUTE_USED operator<(unsigned V, const TableEntry &TE) {
return V < TE.from;
}
};
@@ -597,6 +851,13 @@ void FPS::freeStackSlotAfter(MachineBasicBlock::iterator &I, unsigned FPRegNo) {
// Otherwise, store the top of stack into the dead slot, killing the operand
// without having to add in an explicit xchg then pop.
//
+ I = freeStackSlotBefore(++I, FPRegNo);
+}
+
+/// freeStackSlotBefore - Free the specified register without trying any
+/// folding.
+MachineBasicBlock::iterator
+FPS::freeStackSlotBefore(MachineBasicBlock::iterator I, unsigned FPRegNo) {
unsigned STReg = getSTReg(FPRegNo);
unsigned OldSlot = getSlot(FPRegNo);
unsigned TopReg = Stack[StackTop-1];
@@ -604,9 +865,90 @@ void FPS::freeStackSlotAfter(MachineBasicBlock::iterator &I, unsigned FPRegNo) {
RegMap[TopReg] = OldSlot;
RegMap[FPRegNo] = ~0;
Stack[--StackTop] = ~0;
- MachineInstr *MI = I;
- DebugLoc dl = MI->getDebugLoc();
- I = BuildMI(*MBB, ++I, dl, TII->get(X86::ST_FPrr)).addReg(STReg);
+ return BuildMI(*MBB, I, DebugLoc(), TII->get(X86::ST_FPrr)).addReg(STReg);
+}
+
+/// adjustLiveRegs - Kill and revive registers such that exactly the FP
+/// registers with a bit in Mask are live.
+void FPS::adjustLiveRegs(unsigned Mask, MachineBasicBlock::iterator I) {
+ unsigned Defs = Mask;
+ unsigned Kills = 0;
+ for (unsigned i = 0; i < StackTop; ++i) {
+ unsigned RegNo = Stack[i];
+ if (!(Defs & (1 << RegNo)))
+ // This register is live, but we don't want it.
+ Kills |= (1 << RegNo);
+ else
+ // We don't need to imp-def this live register.
+ Defs &= ~(1 << RegNo);
+ }
+ assert((Kills & Defs) == 0 && "Register needs killing and def'ing?");
+
+ // Produce implicit-defs for free by using killed registers.
+ while (Kills && Defs) {
+ unsigned KReg = CountTrailingZeros_32(Kills);
+ unsigned DReg = CountTrailingZeros_32(Defs);
+ DEBUG(dbgs() << "Renaming %FP" << KReg << " as imp %FP" << DReg << "\n");
+ std::swap(Stack[getSlot(KReg)], Stack[getSlot(DReg)]);
+ std::swap(RegMap[KReg], RegMap[DReg]);
+ Kills &= ~(1 << KReg);
+ Defs &= ~(1 << DReg);
+ }
+
+ // Kill registers by popping.
+ if (Kills && I != MBB->begin()) {
+ MachineBasicBlock::iterator I2 = llvm::prior(I);
+ for (;;) {
+ unsigned KReg = getStackEntry(0);
+ if (!(Kills & (1 << KReg)))
+ break;
+ DEBUG(dbgs() << "Popping %FP" << KReg << "\n");
+ popStackAfter(I2);
+ Kills &= ~(1 << KReg);
+ }
+ }
+
+ // Manually kill the rest.
+ while (Kills) {
+ unsigned KReg = CountTrailingZeros_32(Kills);
+ DEBUG(dbgs() << "Killing %FP" << KReg << "\n");
+ freeStackSlotBefore(I, KReg);
+ Kills &= ~(1 << KReg);
+ }
+
+ // Load zeros for all the imp-defs.
+ while(Defs) {
+ unsigned DReg = CountTrailingZeros_32(Defs);
+ DEBUG(dbgs() << "Defining %FP" << DReg << " as 0\n");
+ BuildMI(*MBB, I, DebugLoc(), TII->get(X86::LD_F0));
+ pushReg(DReg);
+ Defs &= ~(1 << DReg);
+ }
+
+ // Now we should have the correct registers live.
+ DEBUG(dumpStack());
+ assert(StackTop == CountPopulation_32(Mask) && "Live count mismatch");
+}
+
+/// shuffleStackTop - emit fxch instructions before I to shuffle the top
+/// FixCount entries into the order given by FixStack.
+/// FIXME: Is there a better algorithm than insertion sort?
+void FPS::shuffleStackTop(const unsigned char *FixStack,
+ unsigned FixCount,
+ MachineBasicBlock::iterator I) {
+ // Move items into place, starting from the desired stack bottom.
+ while (FixCount--) {
+ // Old register at position FixCount.
+ unsigned OldReg = getStackEntry(FixCount);
+ // Desired register at position FixCount.
+ unsigned Reg = FixStack[FixCount];
+ if (Reg == OldReg)
+ continue;
+ // (Reg st0) (OldReg st0) = (Reg OldReg st0)
+ moveToTop(Reg, I);
+ moveToTop(OldReg, I);
+ }
+ DEBUG(dumpStack());
}
@@ -660,7 +1002,7 @@ void FPS::handleOneArgFP(MachineBasicBlock::iterator &I) {
MI->getOpcode() == X86::ISTT_Fp32m80 ||
MI->getOpcode() == X86::ISTT_Fp64m80 ||
MI->getOpcode() == X86::ST_FpP80m)) {
- duplicateToTop(Reg, 7 /*temp register*/, I);
+ duplicateToTop(Reg, getScratchReg(), I);
} else {
moveToTop(Reg, I); // Move to the top of the stack...
}
@@ -1013,8 +1355,7 @@ void FPS::handleSpecialFP(MachineBasicBlock::iterator &I) {
if (!MI->killsRegister(X86::FP0 + Op0)) {
// Duplicate Op0 into a temporary on the stack top.
- // This actually assumes that FP7 is dead.
- duplicateToTop(Op0, 7, I);
+ duplicateToTop(Op0, getScratchReg(), I);
} else {
// Op0 is killed, so just swap it into position.
moveToTop(Op0, I);
@@ -1034,8 +1375,7 @@ void FPS::handleSpecialFP(MachineBasicBlock::iterator &I) {
++StackTop;
unsigned RegOnTop = getStackEntry(0); // This reg must remain in st(0).
if (!MI->killsRegister(X86::FP0 + Op0)) {
- // Assume FP6 is not live, use it as a scratch register.
- duplicateToTop(Op0, 6, I);
+ duplicateToTop(Op0, getScratchReg(), I);
moveToTop(RegOnTop, I);
} else if (getSTReg(Op0) != X86::ST1) {
// We have the wrong value at st(1). Shuffle! Untested!
@@ -1119,11 +1459,11 @@ void FPS::handleSpecialFP(MachineBasicBlock::iterator &I) {
case X86::RETI:
// If RET has an FP register use operand, pass the first one in ST(0) and
// the second one in ST(1).
- if (isStackEmpty()) return; // Quick check to see if any are possible.
-
+
// Find the register operands.
unsigned FirstFPRegOp = ~0U, SecondFPRegOp = ~0U;
-
+ unsigned LiveMask = 0;
+
for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
MachineOperand &Op = MI->getOperand(i);
if (!Op.isReg() || Op.getReg() < X86::FP0 || Op.getReg() > X86::FP6)
@@ -1142,12 +1482,18 @@ void FPS::handleSpecialFP(MachineBasicBlock::iterator &I) {
assert(SecondFPRegOp == ~0U && "More than two fp operands!");
SecondFPRegOp = getFPReg(Op);
}
+ LiveMask |= (1 << getFPReg(Op));
// Remove the operand so that later passes don't see it.
MI->RemoveOperand(i);
--i, --e;
}
-
+
+ // We may have been carrying spurious live-ins, so make sure only the returned
+ // registers are left live.
+ adjustLiveRegs(LiveMask, MI);
+ if (!LiveMask) return; // Quick check to see if any are possible.
+
// There are only four possibilities here:
// 1) we are returning a single FP value. In this case, it has to be in
// ST(0) already, so just declare success by removing the value from the
@@ -1173,7 +1519,7 @@ void FPS::handleSpecialFP(MachineBasicBlock::iterator &I) {
// Duplicate the TOS so that we return it twice. Just pick some other FPx
// register to hold it.
- unsigned NewReg = (FirstFPRegOp+1)%7;
+ unsigned NewReg = getScratchReg();
duplicateToTop(FirstFPRegOp, NewReg, MI);
FirstFPRegOp = NewReg;
}
@@ -1197,7 +1543,14 @@ void FPS::handleSpecialFP(MachineBasicBlock::iterator &I) {
}
I = MBB->erase(I); // Remove the pseudo instruction
- --I;
+
+ // We want to leave I pointing to the previous instruction, but what if we
+ // just erased the first instruction?
+ if (I == MBB->begin()) {
+ DEBUG(dbgs() << "Inserting dummy KILL\n");
+ I = BuildMI(*MBB, I, DebugLoc(), TII->get(TargetOpcode::KILL));
+ } else
+ --I;
}
// Translate a COPY instruction to a pseudo-op that handleSpecialFP understands.
diff --git a/lib/Target/X86/X86FloatingPointRegKill.cpp b/lib/Target/X86/X86FloatingPointRegKill.cpp
deleted file mode 100644
index 2c98b96c510b..000000000000
--- a/lib/Target/X86/X86FloatingPointRegKill.cpp
+++ /dev/null
@@ -1,153 +0,0 @@
-//===-- X86FloatingPoint.cpp - FP_REG_KILL inserter -----------------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file defines the pass which inserts FP_REG_KILL instructions.
-//
-//===----------------------------------------------------------------------===//
-
-#define DEBUG_TYPE "x86-codegen"
-#include "X86.h"
-#include "X86InstrInfo.h"
-#include "llvm/Instructions.h"
-#include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/Passes.h"
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/CFG.h"
-#include "llvm/ADT/Statistic.h"
-using namespace llvm;
-
-STATISTIC(NumFPKill, "Number of FP_REG_KILL instructions added");
-
-namespace {
- struct FPRegKiller : public MachineFunctionPass {
- static char ID;
- FPRegKiller() : MachineFunctionPass(&ID) {}
-
- virtual void getAnalysisUsage(AnalysisUsage &AU) const {
- AU.setPreservesCFG();
- AU.addPreservedID(MachineLoopInfoID);
- AU.addPreservedID(MachineDominatorsID);
- MachineFunctionPass::getAnalysisUsage(AU);
- }
-
- virtual bool runOnMachineFunction(MachineFunction &MF);
-
- virtual const char *getPassName() const {
- return "X86 FP_REG_KILL inserter";
- }
- };
- char FPRegKiller::ID = 0;
-}
-
-FunctionPass *llvm::createX87FPRegKillInserterPass() {
- return new FPRegKiller();
-}
-
-/// isFPStackVReg - Return true if the specified vreg is from a fp stack
-/// register class.
-static bool isFPStackVReg(unsigned RegNo, const MachineRegisterInfo &MRI) {
- if (!TargetRegisterInfo::isVirtualRegister(RegNo))
- return false;
-
- switch (MRI.getRegClass(RegNo)->getID()) {
- default: return false;
- case X86::RFP32RegClassID:
- case X86::RFP64RegClassID:
- case X86::RFP80RegClassID:
- return true;
- }
-}
-
-
-/// ContainsFPStackCode - Return true if the specific MBB has floating point
-/// stack code, and thus needs an FP_REG_KILL.
-static bool ContainsFPStackCode(MachineBasicBlock *MBB,
- const MachineRegisterInfo &MRI) {
- // Scan the block, looking for instructions that define or use fp stack vregs.
- for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end();
- I != E; ++I) {
- for (unsigned op = 0, e = I->getNumOperands(); op != e; ++op) {
- if (!I->getOperand(op).isReg())
- continue;
- if (unsigned Reg = I->getOperand(op).getReg())
- if (isFPStackVReg(Reg, MRI))
- return true;
- }
- }
-
- // Check PHI nodes in successor blocks. These PHI's will be lowered to have
- // a copy of the input value in this block, which is a definition of the
- // value.
- for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(),
- E = MBB->succ_end(); SI != E; ++ SI) {
- MachineBasicBlock *SuccBB = *SI;
- for (MachineBasicBlock::iterator I = SuccBB->begin(), E = SuccBB->end();
- I != E; ++I) {
- // All PHI nodes are at the top of the block.
- if (!I->isPHI()) break;
-
- if (isFPStackVReg(I->getOperand(0).getReg(), MRI))
- return true;
- }
- }
-
- return false;
-}
-
-bool FPRegKiller::runOnMachineFunction(MachineFunction &MF) {
- // If we are emitting FP stack code, scan the basic block to determine if this
- // block defines or uses any FP values. If so, put an FP_REG_KILL instruction
- // before the terminator of the block.
-
- // Note that FP stack instructions are used in all modes for long double,
- // so we always need to do this check.
- // Also note that it's possible for an FP stack register to be live across
- // an instruction that produces multiple basic blocks (SSE CMOV) so we
- // must check all the generated basic blocks.
-
- // Scan all of the machine instructions in these MBBs, checking for FP
- // stores. (RFP32 and RFP64 will not exist in SSE mode, but RFP80 might.)
-
- // Fast-path: If nothing is using the x87 registers, we don't need to do
- // any scanning.
- const MachineRegisterInfo &MRI = MF.getRegInfo();
- if (MRI.getRegClassVirtRegs(X86::RFP80RegisterClass).empty() &&
- MRI.getRegClassVirtRegs(X86::RFP64RegisterClass).empty() &&
- MRI.getRegClassVirtRegs(X86::RFP32RegisterClass).empty())
- return false;
-
- bool Changed = false;
- MachineFunction::iterator MBBI = MF.begin();
- MachineFunction::iterator EndMBB = MF.end();
- for (; MBBI != EndMBB; ++MBBI) {
- MachineBasicBlock *MBB = MBBI;
-
- // If this block returns, ignore it. We don't want to insert an FP_REG_KILL
- // before the return.
- if (!MBB->empty()) {
- MachineBasicBlock::iterator EndI = MBB->end();
- --EndI;
- if (EndI->getDesc().isReturn())
- continue;
- }
-
- // If we find any FP stack code, emit the FP_REG_KILL instruction.
- if (ContainsFPStackCode(MBB, MRI)) {
- BuildMI(*MBB, MBBI->getFirstTerminator(), DebugLoc(),
- MF.getTarget().getInstrInfo()->get(X86::FP_REG_KILL));
- ++NumFPKill;
- Changed = true;
- }
- }
-
- return Changed;
-}
diff --git a/lib/Target/X86/X86ISelDAGToDAG.cpp b/lib/Target/X86/X86ISelDAGToDAG.cpp
index 72f2bc11d7cc..c5234413aba6 100644
--- a/lib/Target/X86/X86ISelDAGToDAG.cpp
+++ b/lib/Target/X86/X86ISelDAGToDAG.cpp
@@ -171,6 +171,17 @@ namespace {
virtual void PreprocessISelDAG();
+ inline bool immSext8(SDNode *N) const {
+ return isInt<8>(cast<ConstantSDNode>(N)->getSExtValue());
+ }
+
+ // i64immSExt32 predicate - True if the 64-bit immediate fits in a 32-bit
+ // sign extended field.
+ inline bool i64immSExt32(SDNode *N) const {
+ uint64_t v = cast<ConstantSDNode>(N)->getZExtValue();
+ return (int64_t)v == (int32_t)v;
+ }
+
// Include the pieces autogenerated from the target description.
#include "X86GenDAGISel.inc"
@@ -1312,13 +1323,6 @@ SDNode *X86DAGToDAGISel::getGlobalBaseReg() {
return CurDAG->getRegister(GlobalBaseReg, TLI.getPointerTy()).getNode();
}
-static SDNode *FindCallStartFromCall(SDNode *Node) {
- if (Node->getOpcode() == ISD::CALLSEQ_START) return Node;
- assert(Node->getOperand(0).getValueType() == MVT::Other &&
- "Node doesn't have a token chain argument!");
- return FindCallStartFromCall(Node->getOperand(0).getNode());
-}
-
SDNode *X86DAGToDAGISel::SelectAtomic64(SDNode *Node, unsigned Opc) {
SDValue Chain = Node->getOperand(0);
SDValue In1 = Node->getOperand(1);
@@ -1403,7 +1407,7 @@ SDNode *X86DAGToDAGISel::SelectAtomicLoadAdd(SDNode *Node, EVT NVT) {
Opc = X86::LOCK_DEC16m;
else if (isSub) {
if (isCN) {
- if (Predicate_immSext8(Val.getNode()))
+ if (immSext8(Val.getNode()))
Opc = X86::LOCK_SUB16mi8;
else
Opc = X86::LOCK_SUB16mi;
@@ -1411,7 +1415,7 @@ SDNode *X86DAGToDAGISel::SelectAtomicLoadAdd(SDNode *Node, EVT NVT) {
Opc = X86::LOCK_SUB16mr;
} else {
if (isCN) {
- if (Predicate_immSext8(Val.getNode()))
+ if (immSext8(Val.getNode()))
Opc = X86::LOCK_ADD16mi8;
else
Opc = X86::LOCK_ADD16mi;
@@ -1426,7 +1430,7 @@ SDNode *X86DAGToDAGISel::SelectAtomicLoadAdd(SDNode *Node, EVT NVT) {
Opc = X86::LOCK_DEC32m;
else if (isSub) {
if (isCN) {
- if (Predicate_immSext8(Val.getNode()))
+ if (immSext8(Val.getNode()))
Opc = X86::LOCK_SUB32mi8;
else
Opc = X86::LOCK_SUB32mi;
@@ -1434,7 +1438,7 @@ SDNode *X86DAGToDAGISel::SelectAtomicLoadAdd(SDNode *Node, EVT NVT) {
Opc = X86::LOCK_SUB32mr;
} else {
if (isCN) {
- if (Predicate_immSext8(Val.getNode()))
+ if (immSext8(Val.getNode()))
Opc = X86::LOCK_ADD32mi8;
else
Opc = X86::LOCK_ADD32mi;
@@ -1450,17 +1454,17 @@ SDNode *X86DAGToDAGISel::SelectAtomicLoadAdd(SDNode *Node, EVT NVT) {
else if (isSub) {
Opc = X86::LOCK_SUB64mr;
if (isCN) {
- if (Predicate_immSext8(Val.getNode()))
+ if (immSext8(Val.getNode()))
Opc = X86::LOCK_SUB64mi8;
- else if (Predicate_i64immSExt32(Val.getNode()))
+ else if (i64immSExt32(Val.getNode()))
Opc = X86::LOCK_SUB64mi32;
}
} else {
Opc = X86::LOCK_ADD64mr;
if (isCN) {
- if (Predicate_immSext8(Val.getNode()))
+ if (immSext8(Val.getNode()))
Opc = X86::LOCK_ADD64mi8;
- else if (Predicate_i64immSExt32(Val.getNode()))
+ else if (i64immSExt32(Val.getNode()))
Opc = X86::LOCK_ADD64mi32;
}
}
@@ -1841,7 +1845,8 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
// Look for (X86cmp (and $op, $imm), 0) and see if we can convert it to
// use a smaller encoding.
- if (N0.getOpcode() == ISD::TRUNCATE && N0.hasOneUse())
+ if (N0.getOpcode() == ISD::TRUNCATE && N0.hasOneUse() &&
+ HasNoSignedComparisonUses(Node))
// Look past the truncate if CMP is the only use of it.
N0 = N0.getOperand(0);
if (N0.getNode()->getOpcode() == ISD::AND && N0.getNode()->hasOneUse() &&
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
index b3c48862898f..95dbb6176687 100644
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -16,6 +16,7 @@
#include "X86.h"
#include "X86InstrBuilder.h"
#include "X86ISelLowering.h"
+#include "X86ShuffleDecode.h"
#include "X86TargetMachine.h"
#include "X86TargetObjectFile.h"
#include "llvm/CallingConv.h"
@@ -343,8 +344,9 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
if (Subtarget->hasSSE1())
setOperationAction(ISD::PREFETCH , MVT::Other, Legal);
- if (!Subtarget->hasSSE2())
- setOperationAction(ISD::MEMBARRIER , MVT::Other, Expand);
+ // We may not have a libcall for MEMBARRIER so we should lower this.
+ setOperationAction(ISD::MEMBARRIER , MVT::Other, Custom);
+
// On X86 and X86-64, atomic operations are lowered to locked instructions.
// Locked instructions, in turn, have implicit fence semantics (all memory
// operations are flushed before issuing the locked instruction, and they
@@ -837,6 +839,10 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
// FIXME: Do we need to handle scalar-to-vector here?
setOperationAction(ISD::MUL, MVT::v4i32, Legal);
+ // Can turn SHL into an integer multiply.
+ setOperationAction(ISD::SHL, MVT::v4i32, Custom);
+ setOperationAction(ISD::SHL, MVT::v16i8, Custom);
+
// i8 and i16 vectors are custom , because the source register and source
// source memory operand types are not the same width. f32 vectors are
// custom since the immediate controlling the insert encodes additional
@@ -866,6 +872,7 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
addRegisterClass(MVT::v4f64, X86::VR256RegisterClass);
addRegisterClass(MVT::v8i32, X86::VR256RegisterClass);
addRegisterClass(MVT::v4i64, X86::VR256RegisterClass);
+ addRegisterClass(MVT::v32i8, X86::VR256RegisterClass);
setOperationAction(ISD::LOAD, MVT::v8f32, Legal);
setOperationAction(ISD::LOAD, MVT::v8i32, Legal);
@@ -877,7 +884,7 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
setOperationAction(ISD::FDIV, MVT::v8f32, Legal);
setOperationAction(ISD::FSQRT, MVT::v8f32, Legal);
setOperationAction(ISD::FNEG, MVT::v8f32, Custom);
- //setOperationAction(ISD::BUILD_VECTOR, MVT::v8f32, Custom);
+ setOperationAction(ISD::BUILD_VECTOR, MVT::v8f32, Custom);
//setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v8f32, Custom);
//setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v8f32, Custom);
//setOperationAction(ISD::SELECT, MVT::v8f32, Custom);
@@ -1189,6 +1196,50 @@ unsigned X86TargetLowering::getFunctionAlignment(const Function *F) const {
return F->hasFnAttr(Attribute::OptimizeForSize) ? 0 : 4;
}
+std::pair<const TargetRegisterClass*, uint8_t>
+X86TargetLowering::findRepresentativeClass(EVT VT) const{
+ const TargetRegisterClass *RRC = 0;
+ uint8_t Cost = 1;
+ switch (VT.getSimpleVT().SimpleTy) {
+ default:
+ return TargetLowering::findRepresentativeClass(VT);
+ case MVT::i8: case MVT::i16: case MVT::i32: case MVT::i64:
+ RRC = (Subtarget->is64Bit()
+ ? X86::GR64RegisterClass : X86::GR32RegisterClass);
+ break;
+ case MVT::v8i8: case MVT::v4i16:
+ case MVT::v2i32: case MVT::v1i64:
+ RRC = X86::VR64RegisterClass;
+ break;
+ case MVT::f32: case MVT::f64:
+ case MVT::v16i8: case MVT::v8i16: case MVT::v4i32: case MVT::v2i64:
+ case MVT::v4f32: case MVT::v2f64:
+ case MVT::v32i8: case MVT::v8i32: case MVT::v4i64: case MVT::v8f32:
+ case MVT::v4f64:
+ RRC = X86::VR128RegisterClass;
+ break;
+ }
+ return std::make_pair(RRC, Cost);
+}
+
+unsigned
+X86TargetLowering::getRegPressureLimit(const TargetRegisterClass *RC,
+ MachineFunction &MF) const {
+ unsigned FPDiff = RegInfo->hasFP(MF) ? 1 : 0;
+ switch (RC->getID()) {
+ default:
+ return 0;
+ case X86::GR32RegClassID:
+ return 4 - FPDiff;
+ case X86::GR64RegClassID:
+ return 8 - FPDiff;
+ case X86::VR128RegClassID:
+ return Subtarget->is64Bit() ? 10 : 4;
+ case X86::VR64RegClassID:
+ return 4;
+ }
+}
+
bool X86TargetLowering::getStackCookieLocation(unsigned &AddressSpace,
unsigned &Offset) const {
if (!Subtarget->isTargetLinux())
@@ -1259,6 +1310,19 @@ X86TargetLowering::LowerReturn(SDValue Chain,
CCValAssign &VA = RVLocs[i];
assert(VA.isRegLoc() && "Can only return in registers!");
SDValue ValToCopy = OutVals[i];
+ EVT ValVT = ValToCopy.getValueType();
+
+ // If this is x86-64, and we disabled SSE, we can't return FP values
+ if ((ValVT == MVT::f32 || ValVT == MVT::f64) &&
+ (Subtarget->is64Bit() && !Subtarget->hasSSE1())) {
+ report_fatal_error("SSE register return with SSE disabled");
+ }
+ // Likewise we can't return F64 values with SSE1 only. gcc does so, but
+ // llvm-gcc has never done it right and no one has noticed, so this
+ // should be OK for now.
+ if (ValVT == MVT::f64 &&
+ (Subtarget->is64Bit() && !Subtarget->hasSSE2()))
+ report_fatal_error("SSE2 register return with SSE2 disabled");
// Returns in ST0/ST1 are handled specially: these are pushed as operands to
// the RET instruction and handled by the FP Stackifier.
@@ -1276,14 +1340,20 @@ X86TargetLowering::LowerReturn(SDValue Chain,
// 64-bit vector (MMX) values are returned in XMM0 / XMM1 except for v1i64
// which is returned in RAX / RDX.
if (Subtarget->is64Bit()) {
- EVT ValVT = ValToCopy.getValueType();
if (ValVT.isVector() && ValVT.getSizeInBits() == 64) {
ValToCopy = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i64, ValToCopy);
- if (VA.getLocReg() == X86::XMM0 || VA.getLocReg() == X86::XMM1)
- ValToCopy = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i64, ValToCopy);
+ if (VA.getLocReg() == X86::XMM0 || VA.getLocReg() == X86::XMM1) {
+ ValToCopy = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i64,
+ ValToCopy);
+
+ // If we don't have SSE2 available, convert to v4f32 so the generated
+ // register is legal.
+ if (!Subtarget->hasSSE2())
+ ValToCopy = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v4f32,ValToCopy);
+ }
}
}
-
+
Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), ValToCopy, Flag);
Flag = Chain.getValue(1);
}
@@ -1570,6 +1640,8 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain,
RC = X86::FR32RegisterClass;
else if (RegVT == MVT::f64)
RC = X86::FR64RegisterClass;
+ else if (RegVT.isVector() && RegVT.getSizeInBits() == 256)
+ RC = X86::VR256RegisterClass;
else if (RegVT.isVector() && RegVT.getSizeInBits() == 128)
RC = X86::VR128RegisterClass;
else if (RegVT.isVector() && RegVT.getSizeInBits() == 64)
@@ -1937,6 +2009,19 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee,
if (VA.isRegLoc()) {
RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
+ if (isVarArg && Subtarget->isTargetWin64()) {
+ // Win64 ABI requires argument XMM reg to be copied to the corresponding
+ // shadow reg if callee is a varargs function.
+ unsigned ShadowReg = 0;
+ switch (VA.getLocReg()) {
+ case X86::XMM0: ShadowReg = X86::RCX; break;
+ case X86::XMM1: ShadowReg = X86::RDX; break;
+ case X86::XMM2: ShadowReg = X86::R8; break;
+ case X86::XMM3: ShadowReg = X86::R9; break;
+ }
+ if (ShadowReg)
+ RegsToPass.push_back(std::make_pair(ShadowReg, Arg));
+ }
} else if (!IsSibcall && (!isTailCall || isByVal)) {
assert(VA.isMemLoc());
if (StackPtr.getNode() == 0)
@@ -1990,7 +2075,7 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee,
}
}
- if (Is64Bit && isVarArg) {
+ if (Is64Bit && isVarArg && !Subtarget->isTargetWin64()) {
// From AMD64 ABI document:
// For calls that may call functions that use varargs or stdargs
// (prototype-less calls or calls to functions containing ellipsis (...) in
@@ -1999,7 +2084,6 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee,
// the number of registers, but must be an ubound on the number of SSE
// registers used and is in the range 0 - 8 inclusive.
- // FIXME: Verify this on Win64
// Count the number of XMM registers allocated.
static const unsigned XMMArgRegs[] = {
X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3,
@@ -2165,8 +2249,8 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee,
if (!isTailCall && Subtarget->isPICStyleGOT())
Ops.push_back(DAG.getRegister(X86::EBX, getPointerTy()));
- // Add an implicit use of AL for x86 vararg functions.
- if (Is64Bit && isVarArg)
+ // Add an implicit use of AL for non-Windows x86 64-bit vararg functions.
+ if (Is64Bit && isVarArg && !Subtarget->isTargetWin64())
Ops.push_back(DAG.getRegister(X86::AL, MVT::i8));
if (InFlag.getNode())
@@ -2356,8 +2440,8 @@ X86TargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
if (RegInfo->needsStackRealignment(MF))
return false;
- // Do not sibcall optimize vararg calls unless the call site is not passing any
- // arguments.
+ // Do not sibcall optimize vararg calls unless the call site is not passing
+ // any arguments.
if (isVarArg && !Outs.empty())
return false;
@@ -2493,6 +2577,112 @@ X86TargetLowering::createFastISel(FunctionLoweringInfo &funcInfo) const {
// Other Lowering Hooks
//===----------------------------------------------------------------------===//
+static bool MayFoldLoad(SDValue Op) {
+ return Op.hasOneUse() && ISD::isNormalLoad(Op.getNode());
+}
+
+static bool MayFoldIntoStore(SDValue Op) {
+ return Op.hasOneUse() && ISD::isNormalStore(*Op.getNode()->use_begin());
+}
+
+static bool isTargetShuffle(unsigned Opcode) {
+ switch(Opcode) {
+ default: return false;
+ case X86ISD::PSHUFD:
+ case X86ISD::PSHUFHW:
+ case X86ISD::PSHUFLW:
+ case X86ISD::SHUFPD:
+ case X86ISD::SHUFPS:
+ case X86ISD::MOVLHPS:
+ case X86ISD::MOVLHPD:
+ case X86ISD::MOVHLPS:
+ case X86ISD::MOVLPS:
+ case X86ISD::MOVLPD:
+ case X86ISD::MOVSHDUP:
+ case X86ISD::MOVSLDUP:
+ case X86ISD::MOVSS:
+ case X86ISD::MOVSD:
+ case X86ISD::UNPCKLPS:
+ case X86ISD::UNPCKLPD:
+ case X86ISD::PUNPCKLWD:
+ case X86ISD::PUNPCKLBW:
+ case X86ISD::PUNPCKLDQ:
+ case X86ISD::PUNPCKLQDQ:
+ case X86ISD::UNPCKHPS:
+ case X86ISD::UNPCKHPD:
+ case X86ISD::PUNPCKHWD:
+ case X86ISD::PUNPCKHBW:
+ case X86ISD::PUNPCKHDQ:
+ case X86ISD::PUNPCKHQDQ:
+ return true;
+ }
+ return false;
+}
+
+static SDValue getTargetShuffleNode(unsigned Opc, DebugLoc dl, EVT VT,
+ SDValue V1, SelectionDAG &DAG) {
+ switch(Opc) {
+ default: llvm_unreachable("Unknown x86 shuffle node");
+ case X86ISD::MOVSHDUP:
+ case X86ISD::MOVSLDUP:
+ return DAG.getNode(Opc, dl, VT, V1);
+ }
+
+ return SDValue();
+}
+
+static SDValue getTargetShuffleNode(unsigned Opc, DebugLoc dl, EVT VT,
+ SDValue V1, unsigned TargetMask, SelectionDAG &DAG) {
+ switch(Opc) {
+ default: llvm_unreachable("Unknown x86 shuffle node");
+ case X86ISD::PSHUFD:
+ case X86ISD::PSHUFHW:
+ case X86ISD::PSHUFLW:
+ return DAG.getNode(Opc, dl, VT, V1, DAG.getConstant(TargetMask, MVT::i8));
+ }
+
+ return SDValue();
+}
+
+static SDValue getTargetShuffleNode(unsigned Opc, DebugLoc dl, EVT VT,
+ SDValue V1, SDValue V2, unsigned TargetMask, SelectionDAG &DAG) {
+ switch(Opc) {
+ default: llvm_unreachable("Unknown x86 shuffle node");
+ case X86ISD::SHUFPD:
+ case X86ISD::SHUFPS:
+ return DAG.getNode(Opc, dl, VT, V1, V2,
+ DAG.getConstant(TargetMask, MVT::i8));
+ }
+ return SDValue();
+}
+
+static SDValue getTargetShuffleNode(unsigned Opc, DebugLoc dl, EVT VT,
+ SDValue V1, SDValue V2, SelectionDAG &DAG) {
+ switch(Opc) {
+ default: llvm_unreachable("Unknown x86 shuffle node");
+ case X86ISD::MOVLHPS:
+ case X86ISD::MOVLHPD:
+ case X86ISD::MOVHLPS:
+ case X86ISD::MOVLPS:
+ case X86ISD::MOVLPD:
+ case X86ISD::MOVSS:
+ case X86ISD::MOVSD:
+ case X86ISD::UNPCKLPS:
+ case X86ISD::UNPCKLPD:
+ case X86ISD::PUNPCKLWD:
+ case X86ISD::PUNPCKLBW:
+ case X86ISD::PUNPCKLDQ:
+ case X86ISD::PUNPCKLQDQ:
+ case X86ISD::UNPCKHPS:
+ case X86ISD::UNPCKHPD:
+ case X86ISD::PUNPCKHWD:
+ case X86ISD::PUNPCKHBW:
+ case X86ISD::PUNPCKHDQ:
+ case X86ISD::PUNPCKHQDQ:
+ return DAG.getNode(Opc, dl, VT, V1, V2);
+ }
+ return SDValue();
+}
SDValue X86TargetLowering::getReturnAddressFrameIndex(SelectionDAG &DAG) const {
MachineFunction &MF = DAG.getMachineFunction();
@@ -3347,18 +3537,27 @@ static SDValue getZeroVector(EVT VT, bool HasSSE2, SelectionDAG &DAG,
DebugLoc dl) {
assert(VT.isVector() && "Expected a vector type");
- // Always build zero vectors as <4 x i32> or <2 x i32> bitcasted to their dest
- // type. This ensures they get CSE'd.
+ // Always build zero vectors as <4 x i32> or <2 x i32> bitcasted
+ // to their dest type. This ensures they get CSE'd.
SDValue Vec;
if (VT.getSizeInBits() == 64) { // MMX
SDValue Cst = DAG.getTargetConstant(0, MVT::i32);
Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2i32, Cst, Cst);
- } else if (HasSSE2) { // SSE2
- SDValue Cst = DAG.getTargetConstant(0, MVT::i32);
- Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, Cst, Cst, Cst, Cst);
- } else { // SSE1
+ } else if (VT.getSizeInBits() == 128) {
+ if (HasSSE2) { // SSE2
+ SDValue Cst = DAG.getTargetConstant(0, MVT::i32);
+ Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, Cst, Cst, Cst, Cst);
+ } else { // SSE1
+ SDValue Cst = DAG.getTargetConstantFP(+0.0, MVT::f32);
+ Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4f32, Cst, Cst, Cst, Cst);
+ }
+ } else if (VT.getSizeInBits() == 256) { // AVX
+ // 256-bit logic and arithmetic instructions in AVX are
+ // all floating-point, no support for integer ops. Default
+ // to emitting fp zeroed vectors then.
SDValue Cst = DAG.getTargetConstantFP(+0.0, MVT::f32);
- Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4f32, Cst, Cst, Cst, Cst);
+ SDValue Ops[] = { Cst, Cst, Cst, Cst, Cst, Cst, Cst, Cst };
+ Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v8f32, Ops, 8);
}
return DAG.getNode(ISD::BIT_CONVERT, dl, VT, Vec);
}
@@ -3372,9 +3571,9 @@ static SDValue getOnesVector(EVT VT, SelectionDAG &DAG, DebugLoc dl) {
// type. This ensures they get CSE'd.
SDValue Cst = DAG.getTargetConstant(~0U, MVT::i32);
SDValue Vec;
- if (VT.getSizeInBits() == 64) // MMX
+ if (VT.getSizeInBits() == 64) // MMX
Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2i32, Cst, Cst);
- else // SSE
+ else // SSE
Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, Cst, Cst, Cst, Cst);
return DAG.getNode(ISD::BIT_CONVERT, dl, VT, Vec);
}
@@ -3439,9 +3638,8 @@ static SDValue getUnpackh(SelectionDAG &DAG, DebugLoc dl, EVT VT, SDValue V1,
return DAG.getVectorShuffle(VT, dl, V1, V2, &Mask[0]);
}
-/// PromoteSplat - Promote a splat of v4f32, v8i16 or v16i8 to v4i32.
-static SDValue PromoteSplat(ShuffleVectorSDNode *SV, SelectionDAG &DAG,
- bool HasSSE2) {
+/// PromoteSplat - Promote a splat of v4i32, v8i16 or v16i8 to v4f32.
+static SDValue PromoteSplat(ShuffleVectorSDNode *SV, SelectionDAG &DAG) {
if (SV->getValueType(0).getVectorNumElements() <= 4)
return SDValue(SV, 0);
@@ -3488,68 +3686,253 @@ static SDValue getShuffleVectorZeroOrUndef(SDValue V2, unsigned Idx,
return DAG.getVectorShuffle(VT, V2.getDebugLoc(), V1, V2, &MaskVec[0]);
}
-/// getNumOfConsecutiveZeros - Return the number of elements in a result of
-/// a shuffle that is zero.
-static
-unsigned getNumOfConsecutiveZeros(ShuffleVectorSDNode *SVOp, int NumElems,
- bool Low, SelectionDAG &DAG) {
- unsigned NumZeros = 0;
- for (int i = 0; i < NumElems; ++i) {
- unsigned Index = Low ? i : NumElems-i-1;
- int Idx = SVOp->getMaskElt(Index);
- if (Idx < 0) {
- ++NumZeros;
- continue;
- }
- SDValue Elt = DAG.getShuffleScalarElt(SVOp, Index);
- if (Elt.getNode() && X86::isZeroNode(Elt))
- ++NumZeros;
- else
+/// getShuffleScalarElt - Returns the scalar element that will make up the ith
+/// element of the result of the vector shuffle.
+SDValue getShuffleScalarElt(SDNode *N, int Index, SelectionDAG &DAG,
+ unsigned Depth) {
+ if (Depth == 6)
+ return SDValue(); // Limit search depth.
+
+ SDValue V = SDValue(N, 0);
+ EVT VT = V.getValueType();
+ unsigned Opcode = V.getOpcode();
+
+ // Recurse into ISD::VECTOR_SHUFFLE node to find scalars.
+ if (const ShuffleVectorSDNode *SV = dyn_cast<ShuffleVectorSDNode>(N)) {
+ Index = SV->getMaskElt(Index);
+
+ if (Index < 0)
+ return DAG.getUNDEF(VT.getVectorElementType());
+
+ int NumElems = VT.getVectorNumElements();
+ SDValue NewV = (Index < NumElems) ? SV->getOperand(0) : SV->getOperand(1);
+ return getShuffleScalarElt(NewV.getNode(), Index % NumElems, DAG, Depth+1);
+ }
+
+ // Recurse into target specific vector shuffles to find scalars.
+ if (isTargetShuffle(Opcode)) {
+ int NumElems = VT.getVectorNumElements();
+ SmallVector<unsigned, 16> ShuffleMask;
+ SDValue ImmN;
+
+ switch(Opcode) {
+ case X86ISD::SHUFPS:
+ case X86ISD::SHUFPD:
+ ImmN = N->getOperand(N->getNumOperands()-1);
+ DecodeSHUFPSMask(NumElems,
+ cast<ConstantSDNode>(ImmN)->getZExtValue(),
+ ShuffleMask);
+ break;
+ case X86ISD::PUNPCKHBW:
+ case X86ISD::PUNPCKHWD:
+ case X86ISD::PUNPCKHDQ:
+ case X86ISD::PUNPCKHQDQ:
+ DecodePUNPCKHMask(NumElems, ShuffleMask);
break;
+ case X86ISD::UNPCKHPS:
+ case X86ISD::UNPCKHPD:
+ DecodeUNPCKHPMask(NumElems, ShuffleMask);
+ break;
+ case X86ISD::PUNPCKLBW:
+ case X86ISD::PUNPCKLWD:
+ case X86ISD::PUNPCKLDQ:
+ case X86ISD::PUNPCKLQDQ:
+ DecodePUNPCKLMask(NumElems, ShuffleMask);
+ break;
+ case X86ISD::UNPCKLPS:
+ case X86ISD::UNPCKLPD:
+ DecodeUNPCKLPMask(NumElems, ShuffleMask);
+ break;
+ case X86ISD::MOVHLPS:
+ DecodeMOVHLPSMask(NumElems, ShuffleMask);
+ break;
+ case X86ISD::MOVLHPS:
+ DecodeMOVLHPSMask(NumElems, ShuffleMask);
+ break;
+ case X86ISD::PSHUFD:
+ ImmN = N->getOperand(N->getNumOperands()-1);
+ DecodePSHUFMask(NumElems,
+ cast<ConstantSDNode>(ImmN)->getZExtValue(),
+ ShuffleMask);
+ break;
+ case X86ISD::PSHUFHW:
+ ImmN = N->getOperand(N->getNumOperands()-1);
+ DecodePSHUFHWMask(cast<ConstantSDNode>(ImmN)->getZExtValue(),
+ ShuffleMask);
+ break;
+ case X86ISD::PSHUFLW:
+ ImmN = N->getOperand(N->getNumOperands()-1);
+ DecodePSHUFLWMask(cast<ConstantSDNode>(ImmN)->getZExtValue(),
+ ShuffleMask);
+ break;
+ case X86ISD::MOVSS:
+ case X86ISD::MOVSD: {
+ // The index 0 always comes from the first element of the second source,
+ // this is why MOVSS and MOVSD are used in the first place. The other
+ // elements come from the other positions of the first source vector.
+ unsigned OpNum = (Index == 0) ? 1 : 0;
+ return getShuffleScalarElt(V.getOperand(OpNum).getNode(), Index, DAG,
+ Depth+1);
+ }
+ default:
+ assert("not implemented for target shuffle node");
+ return SDValue();
+ }
+
+ Index = ShuffleMask[Index];
+ if (Index < 0)
+ return DAG.getUNDEF(VT.getVectorElementType());
+
+ SDValue NewV = (Index < NumElems) ? N->getOperand(0) : N->getOperand(1);
+ return getShuffleScalarElt(NewV.getNode(), Index % NumElems, DAG,
+ Depth+1);
}
- return NumZeros;
-}
-/// isVectorShift - Returns true if the shuffle can be implemented as a
-/// logical left or right shift of a vector.
-/// FIXME: split into pslldqi, psrldqi, palignr variants.
-static bool isVectorShift(ShuffleVectorSDNode *SVOp, SelectionDAG &DAG,
- bool &isLeft, SDValue &ShVal, unsigned &ShAmt) {
- unsigned NumElems = SVOp->getValueType(0).getVectorNumElements();
+ // Actual nodes that may contain scalar elements
+ if (Opcode == ISD::BIT_CONVERT) {
+ V = V.getOperand(0);
+ EVT SrcVT = V.getValueType();
+ unsigned NumElems = VT.getVectorNumElements();
- isLeft = true;
- unsigned NumZeros = getNumOfConsecutiveZeros(SVOp, NumElems, true, DAG);
- if (!NumZeros) {
- isLeft = false;
- NumZeros = getNumOfConsecutiveZeros(SVOp, NumElems, false, DAG);
- if (!NumZeros)
- return false;
+ if (!SrcVT.isVector() || SrcVT.getVectorNumElements() != NumElems)
+ return SDValue();
+ }
+
+ if (V.getOpcode() == ISD::SCALAR_TO_VECTOR)
+ return (Index == 0) ? V.getOperand(0)
+ : DAG.getUNDEF(VT.getVectorElementType());
+
+ if (V.getOpcode() == ISD::BUILD_VECTOR)
+ return V.getOperand(Index);
+
+ return SDValue();
+}
+
+/// getNumOfConsecutiveZeros - Return the number of elements of a vector
+/// shuffle operation which come from a consecutively from a zero. The
+/// search can start in two diferent directions, from left or right.
+static
+unsigned getNumOfConsecutiveZeros(SDNode *N, int NumElems,
+ bool ZerosFromLeft, SelectionDAG &DAG) {
+ int i = 0;
+
+ while (i < NumElems) {
+ unsigned Index = ZerosFromLeft ? i : NumElems-i-1;
+ SDValue Elt = getShuffleScalarElt(N, Index, DAG, 0);
+ if (!(Elt.getNode() &&
+ (Elt.getOpcode() == ISD::UNDEF || X86::isZeroNode(Elt))))
+ break;
+ ++i;
}
+
+ return i;
+}
+
+/// isShuffleMaskConsecutive - Check if the shuffle mask indicies from MaskI to
+/// MaskE correspond consecutively to elements from one of the vector operands,
+/// starting from its index OpIdx. Also tell OpNum which source vector operand.
+static
+bool isShuffleMaskConsecutive(ShuffleVectorSDNode *SVOp, int MaskI, int MaskE,
+ int OpIdx, int NumElems, unsigned &OpNum) {
bool SeenV1 = false;
bool SeenV2 = false;
- for (unsigned i = NumZeros; i < NumElems; ++i) {
- unsigned Val = isLeft ? (i - NumZeros) : i;
- int Idx_ = SVOp->getMaskElt(isLeft ? i : (i - NumZeros));
- if (Idx_ < 0)
+
+ for (int i = MaskI; i <= MaskE; ++i, ++OpIdx) {
+ int Idx = SVOp->getMaskElt(i);
+ // Ignore undef indicies
+ if (Idx < 0)
continue;
- unsigned Idx = (unsigned) Idx_;
+
if (Idx < NumElems)
SeenV1 = true;
- else {
- Idx -= NumElems;
+ else
SeenV2 = true;
- }
- if (Idx != Val)
+
+ // Only accept consecutive elements from the same vector
+ if ((Idx % NumElems != OpIdx) || (SeenV1 && SeenV2))
return false;
}
- if (SeenV1 && SeenV2)
+
+ OpNum = SeenV1 ? 0 : 1;
+ return true;
+}
+
+/// isVectorShiftRight - Returns true if the shuffle can be implemented as a
+/// logical left shift of a vector.
+static bool isVectorShiftRight(ShuffleVectorSDNode *SVOp, SelectionDAG &DAG,
+ bool &isLeft, SDValue &ShVal, unsigned &ShAmt) {
+ unsigned NumElems = SVOp->getValueType(0).getVectorNumElements();
+ unsigned NumZeros = getNumOfConsecutiveZeros(SVOp, NumElems,
+ false /* check zeros from right */, DAG);
+ unsigned OpSrc;
+
+ if (!NumZeros)
+ return false;
+
+ // Considering the elements in the mask that are not consecutive zeros,
+ // check if they consecutively come from only one of the source vectors.
+ //
+ // V1 = {X, A, B, C} 0
+ // \ \ \ /
+ // vector_shuffle V1, V2 <1, 2, 3, X>
+ //
+ if (!isShuffleMaskConsecutive(SVOp,
+ 0, // Mask Start Index
+ NumElems-NumZeros-1, // Mask End Index
+ NumZeros, // Where to start looking in the src vector
+ NumElems, // Number of elements in vector
+ OpSrc)) // Which source operand ?
+ return false;
+
+ isLeft = false;
+ ShAmt = NumZeros;
+ ShVal = SVOp->getOperand(OpSrc);
+ return true;
+}
+
+/// isVectorShiftLeft - Returns true if the shuffle can be implemented as a
+/// logical left shift of a vector.
+static bool isVectorShiftLeft(ShuffleVectorSDNode *SVOp, SelectionDAG &DAG,
+ bool &isLeft, SDValue &ShVal, unsigned &ShAmt) {
+ unsigned NumElems = SVOp->getValueType(0).getVectorNumElements();
+ unsigned NumZeros = getNumOfConsecutiveZeros(SVOp, NumElems,
+ true /* check zeros from left */, DAG);
+ unsigned OpSrc;
+
+ if (!NumZeros)
+ return false;
+
+ // Considering the elements in the mask that are not consecutive zeros,
+ // check if they consecutively come from only one of the source vectors.
+ //
+ // 0 { A, B, X, X } = V2
+ // / \ / /
+ // vector_shuffle V1, V2 <X, X, 4, 5>
+ //
+ if (!isShuffleMaskConsecutive(SVOp,
+ NumZeros, // Mask Start Index
+ NumElems-1, // Mask End Index
+ 0, // Where to start looking in the src vector
+ NumElems, // Number of elements in vector
+ OpSrc)) // Which source operand ?
return false;
- ShVal = SeenV1 ? SVOp->getOperand(0) : SVOp->getOperand(1);
+ isLeft = true;
ShAmt = NumZeros;
+ ShVal = SVOp->getOperand(OpSrc);
return true;
}
+/// isVectorShift - Returns true if the shuffle can be implemented as a
+/// logical left or right shift of a vector.
+static bool isVectorShift(ShuffleVectorSDNode *SVOp, SelectionDAG &DAG,
+ bool &isLeft, SDValue &ShVal, unsigned &ShAmt) {
+ if (isVectorShiftLeft(SVOp, DAG, isLeft, ShVal, ShAmt) ||
+ isVectorShiftRight(SVOp, DAG, isLeft, ShVal, ShAmt))
+ return true;
+
+ return false;
+}
/// LowerBuildVectorv16i8 - Custom lower build_vector of v16i8.
///
@@ -3779,9 +4162,13 @@ static SDValue EltsFromConsecutiveLoads(EVT VT, SmallVectorImpl<SDValue> &Elts,
SDValue
X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
DebugLoc dl = Op.getDebugLoc();
- // All zero's are handled with pxor, all one's are handled with pcmpeqd.
- if (ISD::isBuildVectorAllZeros(Op.getNode())
- || ISD::isBuildVectorAllOnes(Op.getNode())) {
+ // All zero's are handled with pxor in SSE2 and above, xorps in SSE1.
+ // All one's are handled with pcmpeqd. In AVX, zero's are handled with
+ // vpxor in 128-bit and xor{pd,ps} in 256-bit, but no 256 version of pcmpeqd
+ // is present, so AllOnes is ignored.
+ if (ISD::isBuildVectorAllZeros(Op.getNode()) ||
+ (Op.getValueType().getSizeInBits() != 256 &&
+ ISD::isBuildVectorAllOnes(Op.getNode()))) {
// Canonicalize this to either <4 x i32> or <2 x i32> (SSE vs MMX) to
// 1) ensure the zero vectors are CSE'd, and 2) ensure that i64 scalars are
// eliminated on x86-32 hosts.
@@ -3819,10 +4206,9 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
}
}
- if (NumNonZero == 0) {
- // All undef vector. Return an UNDEF. All zero vectors were handled above.
+ // All undef vector. Return an UNDEF. All zero vectors were handled above.
+ if (NumNonZero == 0)
return DAG.getUNDEF(VT);
- }
// Special case for single non-zero, non-undef, element.
if (NumNonZero == 1) {
@@ -3960,7 +4346,7 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
if (EVTBits == 16 && NumElems == 8) {
SDValue V = LowerBuildVectorv8i16(Op, NonZeros,NumNonZero,NumZero, DAG,
- *this);
+ *this);
if (V.getNode()) return V;
}
@@ -4014,28 +4400,51 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
if (LD.getNode())
return LD;
- // For SSE 4.1, use inserts into undef.
+ // For SSE 4.1, use insertps to put the high elements into the low element.
if (getSubtarget()->hasSSE41()) {
- V[0] = DAG.getUNDEF(VT);
- for (unsigned i = 0; i < NumElems; ++i)
- if (Op.getOperand(i).getOpcode() != ISD::UNDEF)
- V[0] = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, V[0],
+ SDValue Result;
+ if (Op.getOperand(0).getOpcode() != ISD::UNDEF)
+ Result = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Op.getOperand(0));
+ else
+ Result = DAG.getUNDEF(VT);
+
+ for (unsigned i = 1; i < NumElems; ++i) {
+ if (Op.getOperand(i).getOpcode() == ISD::UNDEF) continue;
+ Result = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, Result,
Op.getOperand(i), DAG.getIntPtrConstant(i));
- return V[0];
+ }
+ return Result;
}
- // Otherwise, expand into a number of unpckl*
- // e.g. for v4f32
+ // Otherwise, expand into a number of unpckl*, start by extending each of
+ // our (non-undef) elements to the full vector width with the element in the
+ // bottom slot of the vector (which generates no code for SSE).
+ for (unsigned i = 0; i < NumElems; ++i) {
+ if (Op.getOperand(i).getOpcode() != ISD::UNDEF)
+ V[i] = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Op.getOperand(i));
+ else
+ V[i] = DAG.getUNDEF(VT);
+ }
+
+ // Next, we iteratively mix elements, e.g. for v4f32:
// Step 1: unpcklps 0, 2 ==> X: <?, ?, 2, 0>
// : unpcklps 1, 3 ==> Y: <?, ?, 3, 1>
// Step 2: unpcklps X, Y ==> <3, 2, 1, 0>
- for (unsigned i = 0; i < NumElems; ++i)
- V[i] = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Op.getOperand(i));
- NumElems >>= 1;
- while (NumElems != 0) {
- for (unsigned i = 0; i < NumElems; ++i)
- V[i] = getUnpackl(DAG, dl, VT, V[i], V[i + NumElems]);
- NumElems >>= 1;
+ unsigned EltStride = NumElems >> 1;
+ while (EltStride != 0) {
+ for (unsigned i = 0; i < EltStride; ++i) {
+ // If V[i+EltStride] is undef and this is the first round of mixing,
+ // then it is safe to just drop this shuffle: V[i] is already in the
+ // right place, the one element (since it's the first round) being
+ // inserted as undef can be dropped. This isn't safe for successive
+ // rounds because they will permute elements within both vectors.
+ if (V[i+EltStride].getOpcode() == ISD::UNDEF &&
+ EltStride == NumElems/2)
+ continue;
+
+ V[i] = getUnpackl(DAG, dl, VT, V[i], V[i + EltStride]);
+ }
+ EltStride >>= 1;
}
return V[0];
}
@@ -4074,10 +4483,10 @@ X86TargetLowering::LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const {
// 2. [ssse3] 1 x pshufb
// 3. [ssse3] 2 x pshufb + 1 x por
// 4. [all] mov + pshuflw + pshufhw + N x (pextrw + pinsrw)
-static
-SDValue LowerVECTOR_SHUFFLEv8i16(ShuffleVectorSDNode *SVOp,
- SelectionDAG &DAG,
- const X86TargetLowering &TLI) {
+SDValue
+X86TargetLowering::LowerVECTOR_SHUFFLEv8i16(SDValue Op,
+ SelectionDAG &DAG) const {
+ ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
SDValue V1 = SVOp->getOperand(0);
SDValue V2 = SVOp->getOperand(1);
DebugLoc dl = SVOp->getDebugLoc();
@@ -4128,7 +4537,7 @@ SDValue LowerVECTOR_SHUFFLEv8i16(ShuffleVectorSDNode *SVOp,
// quads, disable the next transformation since it does not help SSSE3.
bool V1Used = InputQuads[0] || InputQuads[1];
bool V2Used = InputQuads[2] || InputQuads[3];
- if (TLI.getSubtarget()->hasSSSE3()) {
+ if (Subtarget->hasSSSE3()) {
if (InputQuads.count() == 2 && V1Used && V2Used) {
BestLoQuad = InputQuads.find_first();
BestHiQuad = InputQuads.find_next(BestLoQuad);
@@ -4187,15 +4596,21 @@ SDValue LowerVECTOR_SHUFFLEv8i16(ShuffleVectorSDNode *SVOp,
// If we've eliminated the use of V2, and the new mask is a pshuflw or
// pshufhw, that's as cheap as it gets. Return the new shuffle.
if ((pshufhw && InOrder[0]) || (pshuflw && InOrder[1])) {
- return DAG.getVectorShuffle(MVT::v8i16, dl, NewV,
+ unsigned Opc = pshufhw ? X86ISD::PSHUFHW : X86ISD::PSHUFLW;
+ unsigned TargetMask = 0;
+ NewV = DAG.getVectorShuffle(MVT::v8i16, dl, NewV,
DAG.getUNDEF(MVT::v8i16), &MaskVals[0]);
+ TargetMask = pshufhw ? X86::getShufflePSHUFHWImmediate(NewV.getNode()):
+ X86::getShufflePSHUFLWImmediate(NewV.getNode());
+ V1 = NewV.getOperand(0);
+ return getTargetShuffleNode(Opc, dl, MVT::v8i16, V1, TargetMask, DAG);
}
}
// If we have SSSE3, and all words of the result are from 1 input vector,
// case 2 is generated, otherwise case 3 is generated. If no SSSE3
// is present, fall back to case 4.
- if (TLI.getSubtarget()->hasSSSE3()) {
+ if (Subtarget->hasSSSE3()) {
SmallVector<SDValue,16> pshufbMask;
// If we have elements from both input vectors, set the high bit of the
@@ -4262,6 +4677,12 @@ SDValue LowerVECTOR_SHUFFLEv8i16(ShuffleVectorSDNode *SVOp,
MaskV.push_back(i);
NewV = DAG.getVectorShuffle(MVT::v8i16, dl, NewV, DAG.getUNDEF(MVT::v8i16),
&MaskV[0]);
+
+ if (NewV.getOpcode() == ISD::VECTOR_SHUFFLE && Subtarget->hasSSSE3())
+ NewV = getTargetShuffleNode(X86ISD::PSHUFLW, dl, MVT::v8i16,
+ NewV.getOperand(0),
+ X86::getShufflePSHUFLWImmediate(NewV.getNode()),
+ DAG);
}
// If BestHi >= 0, generate a pshufhw to put the high elements in order,
@@ -4284,6 +4705,12 @@ SDValue LowerVECTOR_SHUFFLEv8i16(ShuffleVectorSDNode *SVOp,
}
NewV = DAG.getVectorShuffle(MVT::v8i16, dl, NewV, DAG.getUNDEF(MVT::v8i16),
&MaskV[0]);
+
+ if (NewV.getOpcode() == ISD::VECTOR_SHUFFLE && Subtarget->hasSSSE3())
+ NewV = getTargetShuffleNode(X86ISD::PSHUFHW, dl, MVT::v8i16,
+ NewV.getOperand(0),
+ X86::getShufflePSHUFHWImmediate(NewV.getNode()),
+ DAG);
}
// In case BestHi & BestLo were both -1, which means each quadword has a word
@@ -4473,7 +4900,7 @@ SDValue RewriteAsNarrowerShuffle(ShuffleVectorSDNode *SVOp,
SDValue V2 = SVOp->getOperand(1);
unsigned NumElems = VT.getVectorNumElements();
unsigned NewWidth = (NumElems == 4) ? 2 : 4;
- EVT MaskVT = MVT::getIntVectorWithNumElements(NewWidth);
+ EVT MaskVT = (NewWidth == 4) ? MVT::v4i16 : MVT::v2i32;
EVT NewVT = MaskVT;
switch (VT.getSimpleVT().SimpleTy) {
default: assert(false && "Unexpected!");
@@ -4697,6 +5124,129 @@ LowerVECTOR_SHUFFLE_4wide(ShuffleVectorSDNode *SVOp, SelectionDAG &DAG) {
return DAG.getVectorShuffle(VT, dl, LoShuffle, HiShuffle, &MaskOps[0]);
}
+static bool MayFoldVectorLoad(SDValue V) {
+ if (V.hasOneUse() && V.getOpcode() == ISD::BIT_CONVERT)
+ V = V.getOperand(0);
+ if (V.hasOneUse() && V.getOpcode() == ISD::SCALAR_TO_VECTOR)
+ V = V.getOperand(0);
+ if (MayFoldLoad(V))
+ return true;
+ return false;
+}
+
+static
+SDValue getMOVLowToHigh(SDValue &Op, DebugLoc &dl, SelectionDAG &DAG,
+ bool HasSSE2) {
+ SDValue V1 = Op.getOperand(0);
+ SDValue V2 = Op.getOperand(1);
+ EVT VT = Op.getValueType();
+
+ assert(VT != MVT::v2i64 && "unsupported shuffle type");
+
+ if (HasSSE2 && VT == MVT::v2f64)
+ return getTargetShuffleNode(X86ISD::MOVLHPD, dl, VT, V1, V2, DAG);
+
+ // v4f32 or v4i32
+ return getTargetShuffleNode(X86ISD::MOVLHPS, dl, VT, V1, V2, DAG);
+}
+
+static
+SDValue getMOVHighToLow(SDValue &Op, DebugLoc &dl, SelectionDAG &DAG) {
+ SDValue V1 = Op.getOperand(0);
+ SDValue V2 = Op.getOperand(1);
+ EVT VT = Op.getValueType();
+
+ assert((VT == MVT::v4i32 || VT == MVT::v4f32) &&
+ "unsupported shuffle type");
+
+ if (V2.getOpcode() == ISD::UNDEF)
+ V2 = V1;
+
+ // v4i32 or v4f32
+ return getTargetShuffleNode(X86ISD::MOVHLPS, dl, VT, V1, V2, DAG);
+}
+
+static
+SDValue getMOVLP(SDValue &Op, DebugLoc &dl, SelectionDAG &DAG, bool HasSSE2) {
+ SDValue V1 = Op.getOperand(0);
+ SDValue V2 = Op.getOperand(1);
+ EVT VT = Op.getValueType();
+ unsigned NumElems = VT.getVectorNumElements();
+
+ // Use MOVLPS and MOVLPD in case V1 or V2 are loads. During isel, the second
+ // operand of these instructions is only memory, so check if there's a
+ // potencial load folding here, otherwise use SHUFPS or MOVSD to match the
+ // same masks.
+ bool CanFoldLoad = false;
+
+ // Trivial case, when V2 comes from a load.
+ if (MayFoldVectorLoad(V2))
+ CanFoldLoad = true;
+
+ // When V1 is a load, it can be folded later into a store in isel, example:
+ // (store (v4f32 (X86Movlps (load addr:$src1), VR128:$src2)), addr:$src1)
+ // turns into:
+ // (MOVLPSmr addr:$src1, VR128:$src2)
+ // So, recognize this potential and also use MOVLPS or MOVLPD
+ if (MayFoldVectorLoad(V1) && MayFoldIntoStore(Op))
+ CanFoldLoad = true;
+
+ if (CanFoldLoad) {
+ if (HasSSE2 && NumElems == 2)
+ return getTargetShuffleNode(X86ISD::MOVLPD, dl, VT, V1, V2, DAG);
+
+ if (NumElems == 4)
+ return getTargetShuffleNode(X86ISD::MOVLPS, dl, VT, V1, V2, DAG);
+ }
+
+ ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
+ // movl and movlp will both match v2i64, but v2i64 is never matched by
+ // movl earlier because we make it strict to avoid messing with the movlp load
+ // folding logic (see the code above getMOVLP call). Match it here then,
+ // this is horrible, but will stay like this until we move all shuffle
+ // matching to x86 specific nodes. Note that for the 1st condition all
+ // types are matched with movsd.
+ if ((HasSSE2 && NumElems == 2) || !X86::isMOVLMask(SVOp))
+ return getTargetShuffleNode(X86ISD::MOVSD, dl, VT, V1, V2, DAG);
+ else if (HasSSE2)
+ return getTargetShuffleNode(X86ISD::MOVSS, dl, VT, V1, V2, DAG);
+
+
+ assert(VT != MVT::v4i32 && "unsupported shuffle type");
+
+ // Invert the operand order and use SHUFPS to match it.
+ return getTargetShuffleNode(X86ISD::SHUFPS, dl, VT, V2, V1,
+ X86::getShuffleSHUFImmediate(SVOp), DAG);
+}
+
+static inline unsigned getUNPCKLOpcode(EVT VT) {
+ switch(VT.getSimpleVT().SimpleTy) {
+ case MVT::v4i32: return X86ISD::PUNPCKLDQ;
+ case MVT::v2i64: return X86ISD::PUNPCKLQDQ;
+ case MVT::v4f32: return X86ISD::UNPCKLPS;
+ case MVT::v2f64: return X86ISD::UNPCKLPD;
+ case MVT::v16i8: return X86ISD::PUNPCKLBW;
+ case MVT::v8i16: return X86ISD::PUNPCKLWD;
+ default:
+ llvm_unreachable("Unknow type for unpckl");
+ }
+ return 0;
+}
+
+static inline unsigned getUNPCKHOpcode(EVT VT) {
+ switch(VT.getSimpleVT().SimpleTy) {
+ case MVT::v4i32: return X86ISD::PUNPCKHDQ;
+ case MVT::v2i64: return X86ISD::PUNPCKHQDQ;
+ case MVT::v4f32: return X86ISD::UNPCKHPS;
+ case MVT::v2f64: return X86ISD::UNPCKHPD;
+ case MVT::v16i8: return X86ISD::PUNPCKHBW;
+ case MVT::v8i16: return X86ISD::PUNPCKHWD;
+ default:
+ llvm_unreachable("Unknow type for unpckh");
+ }
+ return 0;
+}
+
SDValue
X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const {
ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
@@ -4710,6 +5260,10 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const {
bool V2IsUndef = V2.getOpcode() == ISD::UNDEF;
bool V1IsSplat = false;
bool V2IsSplat = false;
+ bool HasSSE2 = Subtarget->hasSSE2() || Subtarget->hasAVX();
+ bool HasSSE3 = Subtarget->hasSSE3() || Subtarget->hasAVX();
+ MachineFunction &MF = DAG.getMachineFunction();
+ bool OptForSize = MF.getFunction()->hasFnAttr(Attribute::OptimizeForSize);
if (isZeroShuffle(SVOp))
return getZeroVector(VT, Subtarget->hasSSE2(), DAG, dl);
@@ -4718,7 +5272,7 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const {
if (SVOp->isSplat()) {
if (isMMX || NumElems < 4)
return Op;
- return PromoteSplat(SVOp, DAG, Subtarget->hasSSE2());
+ return PromoteSplat(SVOp, DAG);
}
// If the shuffle can be profitably rewritten as a narrower shuffle, then
@@ -4746,8 +5300,35 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const {
}
}
- if (X86::isPSHUFDMask(SVOp))
- return Op;
+ // NOTE: isPSHUFDMask can also match both masks below (unpckl_undef and
+ // unpckh_undef). Only use pshufd if speed is more important than size.
+ if (OptForSize && X86::isUNPCKL_v_undef_Mask(SVOp))
+ if (VT != MVT::v2i64 && VT != MVT::v2f64)
+ return getTargetShuffleNode(getUNPCKLOpcode(VT), dl, VT, V1, V1, DAG);
+ if (OptForSize && X86::isUNPCKH_v_undef_Mask(SVOp))
+ if (VT != MVT::v2i64 && VT != MVT::v2f64)
+ return getTargetShuffleNode(getUNPCKHOpcode(VT), dl, VT, V1, V1, DAG);
+
+ if (X86::isPSHUFDMask(SVOp)) {
+ // The actual implementation will match the mask in the if above and then
+ // during isel it can match several different instructions, not only pshufd
+ // as its name says, sad but true, emulate the behavior for now...
+ if (X86::isMOVDDUPMask(SVOp) && ((VT == MVT::v4f32 || VT == MVT::v2i64)))
+ return getTargetShuffleNode(X86ISD::MOVLHPS, dl, VT, V1, V1, DAG);
+
+ unsigned TargetMask = X86::getShuffleSHUFImmediate(SVOp);
+
+ if (HasSSE2 && (VT == MVT::v4f32 || VT == MVT::v4i32))
+ return getTargetShuffleNode(X86ISD::PSHUFD, dl, VT, V1, TargetMask, DAG);
+
+ if (HasSSE2 && (VT == MVT::v2i64 || VT == MVT::v2f64))
+ return getTargetShuffleNode(X86ISD::SHUFPD, dl, VT, V1, V1,
+ TargetMask, DAG);
+
+ if (VT == MVT::v4f32)
+ return getTargetShuffleNode(X86ISD::SHUFPS, dl, VT, V1, V1,
+ TargetMask, DAG);
+ }
// Check if this can be converted into a logical shift.
bool isLeft = false;
@@ -4768,17 +5349,32 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const {
return V2;
if (ISD::isBuildVectorAllZeros(V1.getNode()))
return getVZextMovL(VT, VT, V2, DAG, Subtarget, dl);
- if (!isMMX)
- return Op;
+ if (!isMMX && !X86::isMOVLPMask(SVOp)) {
+ if (HasSSE2 && (VT == MVT::v2i64 || VT == MVT::v2f64))
+ return getTargetShuffleNode(X86ISD::MOVSD, dl, VT, V1, V2, DAG);
+
+ if (VT == MVT::v4i32 || VT == MVT::v4f32)
+ return getTargetShuffleNode(X86ISD::MOVSS, dl, VT, V1, V2, DAG);
+ }
}
// FIXME: fold these into legal mask.
- if (!isMMX && (X86::isMOVSHDUPMask(SVOp) ||
- X86::isMOVSLDUPMask(SVOp) ||
- X86::isMOVHLPSMask(SVOp) ||
- X86::isMOVLHPSMask(SVOp) ||
- X86::isMOVLPMask(SVOp)))
- return Op;
+ if (!isMMX) {
+ if (X86::isMOVLHPSMask(SVOp) && !X86::isUNPCKLMask(SVOp))
+ return getMOVLowToHigh(Op, dl, DAG, HasSSE2);
+
+ if (X86::isMOVHLPSMask(SVOp))
+ return getMOVHighToLow(Op, dl, DAG);
+
+ if (X86::isMOVSHDUPMask(SVOp) && HasSSE3 && V2IsUndef && NumElems == 4)
+ return getTargetShuffleNode(X86ISD::MOVSHDUP, dl, VT, V1, DAG);
+
+ if (X86::isMOVSLDUPMask(SVOp) && HasSSE3 && V2IsUndef && NumElems == 4)
+ return getTargetShuffleNode(X86ISD::MOVSLDUP, dl, VT, V1, DAG);
+
+ if (X86::isMOVLPMask(SVOp))
+ return getMOVLP(Op, dl, DAG, HasSSE2);
+ }
if (ShouldXformToMOVHLPS(SVOp) ||
ShouldXformToMOVLP(V1.getNode(), V2.getNode(), SVOp))
@@ -4818,11 +5414,13 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const {
return getMOVL(DAG, dl, VT, V2, V1);
}
- if (X86::isUNPCKL_v_undef_Mask(SVOp) ||
- X86::isUNPCKH_v_undef_Mask(SVOp) ||
- X86::isUNPCKLMask(SVOp) ||
- X86::isUNPCKHMask(SVOp))
- return Op;
+ if (X86::isUNPCKLMask(SVOp))
+ return (isMMX) ?
+ Op : getTargetShuffleNode(getUNPCKLOpcode(VT), dl, VT, V1, V2, DAG);
+
+ if (X86::isUNPCKHMask(SVOp))
+ return (isMMX) ?
+ Op : getTargetShuffleNode(getUNPCKHOpcode(VT), dl, VT, V1, V2, DAG);
if (V2IsSplat) {
// Normalize mask so all entries that point to V2 points to its first
@@ -4844,11 +5442,14 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const {
// FIXME: this seems wrong.
SDValue NewOp = CommuteVectorShuffle(SVOp, DAG);
ShuffleVectorSDNode *NewSVOp = cast<ShuffleVectorSDNode>(NewOp);
- if (X86::isUNPCKL_v_undef_Mask(NewSVOp) ||
- X86::isUNPCKH_v_undef_Mask(NewSVOp) ||
- X86::isUNPCKLMask(NewSVOp) ||
- X86::isUNPCKHMask(NewSVOp))
- return NewOp;
+
+ if (X86::isUNPCKLMask(NewSVOp))
+ return (isMMX) ?
+ NewOp : getTargetShuffleNode(getUNPCKLOpcode(VT), dl, VT, V2, V1, DAG);
+
+ if (X86::isUNPCKHMask(NewSVOp))
+ return (isMMX) ?
+ NewOp : getTargetShuffleNode(getUNPCKHOpcode(VT), dl, VT, V2, V1, DAG);
}
// FIXME: for mmx, bitcast v2i32 to v4i16 for shuffle.
@@ -4857,15 +5458,52 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const {
if (!isMMX && V2.getOpcode() != ISD::UNDEF && isCommutedSHUFP(SVOp))
return CommuteVectorShuffle(SVOp, DAG);
- // Check for legal shuffle and return?
- SmallVector<int, 16> PermMask;
- SVOp->getMask(PermMask);
- if (isShuffleMaskLegal(PermMask, VT))
+ // The checks below are all present in isShuffleMaskLegal, but they are
+ // inlined here right now to enable us to directly emit target specific
+ // nodes, and remove one by one until they don't return Op anymore.
+ SmallVector<int, 16> M;
+ SVOp->getMask(M);
+
+ // Very little shuffling can be done for 64-bit vectors right now.
+ if (VT.getSizeInBits() == 64)
+ return isPALIGNRMask(M, VT, Subtarget->hasSSSE3()) ? Op : SDValue();
+
+ // FIXME: pshufb, blends, shifts.
+ if (VT.getVectorNumElements() == 2 ||
+ ShuffleVectorSDNode::isSplatMask(&M[0], VT) ||
+ isPALIGNRMask(M, VT, Subtarget->hasSSSE3()))
return Op;
+ if (isPSHUFHWMask(M, VT))
+ return getTargetShuffleNode(X86ISD::PSHUFHW, dl, VT, V1,
+ X86::getShufflePSHUFHWImmediate(SVOp),
+ DAG);
+
+ if (isPSHUFLWMask(M, VT))
+ return getTargetShuffleNode(X86ISD::PSHUFLW, dl, VT, V1,
+ X86::getShufflePSHUFLWImmediate(SVOp),
+ DAG);
+
+ if (isSHUFPMask(M, VT)) {
+ unsigned TargetMask = X86::getShuffleSHUFImmediate(SVOp);
+ if (VT == MVT::v4f32 || VT == MVT::v4i32)
+ return getTargetShuffleNode(X86ISD::SHUFPS, dl, VT, V1, V2,
+ TargetMask, DAG);
+ if (VT == MVT::v2f64 || VT == MVT::v2i64)
+ return getTargetShuffleNode(X86ISD::SHUFPD, dl, VT, V1, V2,
+ TargetMask, DAG);
+ }
+
+ if (X86::isUNPCKL_v_undef_Mask(SVOp))
+ if (VT != MVT::v2i64 && VT != MVT::v2f64)
+ return getTargetShuffleNode(getUNPCKLOpcode(VT), dl, VT, V1, V1, DAG);
+ if (X86::isUNPCKH_v_undef_Mask(SVOp))
+ if (VT != MVT::v2i64 && VT != MVT::v2f64)
+ return getTargetShuffleNode(getUNPCKHOpcode(VT), dl, VT, V1, V1, DAG);
+
// Handle v8i16 specifically since SSE can do byte extraction and insertion.
if (VT == MVT::v8i16) {
- SDValue NewOp = LowerVECTOR_SHUFFLEv8i16(SVOp, DAG, *this);
+ SDValue NewOp = LowerVECTOR_SHUFFLEv8i16(Op, DAG);
if (NewOp.getNode())
return NewOp;
}
@@ -6922,24 +7560,58 @@ X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const
DAG.getConstant(X86CC, MVT::i8), Cond);
return DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, SetCC);
}
- // ptest intrinsics. The intrinsic these come from are designed to return
- // an integer value, not just an instruction so lower it to the ptest
- // pattern and a setcc for the result.
+ // ptest and testp intrinsics. The intrinsic these come from are designed to
+ // return an integer value, not just an instruction so lower it to the ptest
+ // or testp pattern and a setcc for the result.
case Intrinsic::x86_sse41_ptestz:
case Intrinsic::x86_sse41_ptestc:
- case Intrinsic::x86_sse41_ptestnzc:{
+ case Intrinsic::x86_sse41_ptestnzc:
+ case Intrinsic::x86_avx_ptestz_256:
+ case Intrinsic::x86_avx_ptestc_256:
+ case Intrinsic::x86_avx_ptestnzc_256:
+ case Intrinsic::x86_avx_vtestz_ps:
+ case Intrinsic::x86_avx_vtestc_ps:
+ case Intrinsic::x86_avx_vtestnzc_ps:
+ case Intrinsic::x86_avx_vtestz_pd:
+ case Intrinsic::x86_avx_vtestc_pd:
+ case Intrinsic::x86_avx_vtestnzc_pd:
+ case Intrinsic::x86_avx_vtestz_ps_256:
+ case Intrinsic::x86_avx_vtestc_ps_256:
+ case Intrinsic::x86_avx_vtestnzc_ps_256:
+ case Intrinsic::x86_avx_vtestz_pd_256:
+ case Intrinsic::x86_avx_vtestc_pd_256:
+ case Intrinsic::x86_avx_vtestnzc_pd_256: {
+ bool IsTestPacked = false;
unsigned X86CC = 0;
switch (IntNo) {
default: llvm_unreachable("Bad fallthrough in Intrinsic lowering.");
+ case Intrinsic::x86_avx_vtestz_ps:
+ case Intrinsic::x86_avx_vtestz_pd:
+ case Intrinsic::x86_avx_vtestz_ps_256:
+ case Intrinsic::x86_avx_vtestz_pd_256:
+ IsTestPacked = true; // Fallthrough
case Intrinsic::x86_sse41_ptestz:
+ case Intrinsic::x86_avx_ptestz_256:
// ZF = 1
X86CC = X86::COND_E;
break;
+ case Intrinsic::x86_avx_vtestc_ps:
+ case Intrinsic::x86_avx_vtestc_pd:
+ case Intrinsic::x86_avx_vtestc_ps_256:
+ case Intrinsic::x86_avx_vtestc_pd_256:
+ IsTestPacked = true; // Fallthrough
case Intrinsic::x86_sse41_ptestc:
+ case Intrinsic::x86_avx_ptestc_256:
// CF = 1
X86CC = X86::COND_B;
break;
+ case Intrinsic::x86_avx_vtestnzc_ps:
+ case Intrinsic::x86_avx_vtestnzc_pd:
+ case Intrinsic::x86_avx_vtestnzc_ps_256:
+ case Intrinsic::x86_avx_vtestnzc_pd_256:
+ IsTestPacked = true; // Fallthrough
case Intrinsic::x86_sse41_ptestnzc:
+ case Intrinsic::x86_avx_ptestnzc_256:
// ZF and CF = 0
X86CC = X86::COND_A;
break;
@@ -6947,7 +7619,8 @@ X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const
SDValue LHS = Op.getOperand(1);
SDValue RHS = Op.getOperand(2);
- SDValue Test = DAG.getNode(X86ISD::PTEST, dl, MVT::i32, LHS, RHS);
+ unsigned TestOpc = IsTestPacked ? X86ISD::TESTP : X86ISD::PTEST;
+ SDValue Test = DAG.getNode(TestOpc, dl, MVT::i32, LHS, RHS);
SDValue CC = DAG.getConstant(X86CC, MVT::i8);
SDValue SetCC = DAG.getNode(X86ISD::SETCC, dl, MVT::i8, CC, Test);
return DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, SetCC);
@@ -7110,12 +7783,13 @@ SDValue X86TargetLowering::LowerEH_RETURN(SDValue Op, SelectionDAG &DAG) const {
SDValue Handler = Op.getOperand(2);
DebugLoc dl = Op.getDebugLoc();
- SDValue Frame = DAG.getRegister(Subtarget->is64Bit() ? X86::RBP : X86::EBP,
- getPointerTy());
+ SDValue Frame = DAG.getCopyFromReg(DAG.getEntryNode(), dl,
+ Subtarget->is64Bit() ? X86::RBP : X86::EBP,
+ getPointerTy());
unsigned StoreAddrReg = (Subtarget->is64Bit() ? X86::RCX : X86::ECX);
- SDValue StoreAddr = DAG.getNode(ISD::SUB, dl, getPointerTy(), Frame,
- DAG.getIntPtrConstant(-TD->getPointerSize()));
+ SDValue StoreAddr = DAG.getNode(ISD::ADD, dl, getPointerTy(), Frame,
+ DAG.getIntPtrConstant(TD->getPointerSize()));
StoreAddr = DAG.getNode(ISD::ADD, dl, getPointerTy(), StoreAddr, Offset);
Chain = DAG.getStore(Chain, dl, Handler, StoreAddr, NULL, 0, false, false, 0);
Chain = DAG.getCopyToReg(Chain, dl, StoreAddrReg, StoreAddr);
@@ -7218,7 +7892,8 @@ SDValue X86TargetLowering::LowerTRAMPOLINE(SDValue Op,
InRegCount += (TD->getTypeSizeInBits(*I) + 31) / 32;
if (InRegCount > 2) {
- report_fatal_error("Nest register in use - reduce number of inreg parameters!");
+ report_fatal_error("Nest register in use - reduce number of inreg"
+ " parameters!");
}
}
break;
@@ -7439,6 +8114,86 @@ SDValue X86TargetLowering::LowerMUL_V2I64(SDValue Op, SelectionDAG &DAG) const {
return Res;
}
+SDValue X86TargetLowering::LowerSHL(SDValue Op, SelectionDAG &DAG) const {
+ EVT VT = Op.getValueType();
+ DebugLoc dl = Op.getDebugLoc();
+ SDValue R = Op.getOperand(0);
+
+ LLVMContext *Context = DAG.getContext();
+
+ assert(Subtarget->hasSSE41() && "Cannot lower SHL without SSE4.1 or later");
+
+ if (VT == MVT::v4i32) {
+ Op = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
+ DAG.getConstant(Intrinsic::x86_sse2_pslli_d, MVT::i32),
+ Op.getOperand(1), DAG.getConstant(23, MVT::i32));
+
+ ConstantInt *CI = ConstantInt::get(*Context, APInt(32, 0x3f800000U));
+
+ std::vector<Constant*> CV(4, CI);
+ Constant *C = ConstantVector::get(CV);
+ SDValue CPIdx = DAG.getConstantPool(C, getPointerTy(), 16);
+ SDValue Addend = DAG.getLoad(VT, dl, DAG.getEntryNode(), CPIdx,
+ PseudoSourceValue::getConstantPool(), 0,
+ false, false, 16);
+
+ Op = DAG.getNode(ISD::ADD, dl, VT, Op, Addend);
+ Op = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v4f32, Op);
+ Op = DAG.getNode(ISD::FP_TO_SINT, dl, VT, Op);
+ return DAG.getNode(ISD::MUL, dl, VT, Op, R);
+ }
+ if (VT == MVT::v16i8) {
+ // a = a << 5;
+ Op = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
+ DAG.getConstant(Intrinsic::x86_sse2_pslli_w, MVT::i32),
+ Op.getOperand(1), DAG.getConstant(5, MVT::i32));
+
+ ConstantInt *CM1 = ConstantInt::get(*Context, APInt(8, 15));
+ ConstantInt *CM2 = ConstantInt::get(*Context, APInt(8, 63));
+
+ std::vector<Constant*> CVM1(16, CM1);
+ std::vector<Constant*> CVM2(16, CM2);
+ Constant *C = ConstantVector::get(CVM1);
+ SDValue CPIdx = DAG.getConstantPool(C, getPointerTy(), 16);
+ SDValue M = DAG.getLoad(VT, dl, DAG.getEntryNode(), CPIdx,
+ PseudoSourceValue::getConstantPool(), 0,
+ false, false, 16);
+
+ // r = pblendv(r, psllw(r & (char16)15, 4), a);
+ M = DAG.getNode(ISD::AND, dl, VT, R, M);
+ M = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
+ DAG.getConstant(Intrinsic::x86_sse2_pslli_w, MVT::i32), M,
+ DAG.getConstant(4, MVT::i32));
+ R = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
+ DAG.getConstant(Intrinsic::x86_sse41_pblendvb, MVT::i32),
+ R, M, Op);
+ // a += a
+ Op = DAG.getNode(ISD::ADD, dl, VT, Op, Op);
+
+ C = ConstantVector::get(CVM2);
+ CPIdx = DAG.getConstantPool(C, getPointerTy(), 16);
+ M = DAG.getLoad(VT, dl, DAG.getEntryNode(), CPIdx,
+ PseudoSourceValue::getConstantPool(), 0, false, false, 16);
+
+ // r = pblendv(r, psllw(r & (char16)63, 2), a);
+ M = DAG.getNode(ISD::AND, dl, VT, R, M);
+ M = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
+ DAG.getConstant(Intrinsic::x86_sse2_pslli_w, MVT::i32), M,
+ DAG.getConstant(2, MVT::i32));
+ R = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
+ DAG.getConstant(Intrinsic::x86_sse41_pblendvb, MVT::i32),
+ R, M, Op);
+ // a += a
+ Op = DAG.getNode(ISD::ADD, dl, VT, Op, Op);
+
+ // return pblendv(r, r+r, a);
+ R = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
+ DAG.getConstant(Intrinsic::x86_sse41_pblendvb, MVT::i32),
+ R, DAG.getNode(ISD::ADD, dl, VT, R, R), Op);
+ return R;
+ }
+ return SDValue();
+}
SDValue X86TargetLowering::LowerXALUO(SDValue Op, SelectionDAG &DAG) const {
// Lower the "add/sub/mul with overflow" instruction into a regular ins plus
@@ -7508,6 +8263,50 @@ SDValue X86TargetLowering::LowerXALUO(SDValue Op, SelectionDAG &DAG) const {
return Sum;
}
+SDValue X86TargetLowering::LowerMEMBARRIER(SDValue Op, SelectionDAG &DAG) const{
+ DebugLoc dl = Op.getDebugLoc();
+
+ if (!Subtarget->hasSSE2()) {
+ SDValue Chain = Op.getOperand(0);
+ SDValue Zero = DAG.getConstant(0,
+ Subtarget->is64Bit() ? MVT::i64 : MVT::i32);
+ SDValue Ops[] = {
+ DAG.getRegister(X86::ESP, MVT::i32), // Base
+ DAG.getTargetConstant(1, MVT::i8), // Scale
+ DAG.getRegister(0, MVT::i32), // Index
+ DAG.getTargetConstant(0, MVT::i32), // Disp
+ DAG.getRegister(0, MVT::i32), // Segment.
+ Zero,
+ Chain
+ };
+ SDNode *Res =
+ DAG.getMachineNode(X86::OR32mrLocked, dl, MVT::Other, Ops,
+ array_lengthof(Ops));
+ return SDValue(Res, 0);
+ }
+
+ unsigned isDev = cast<ConstantSDNode>(Op.getOperand(5))->getZExtValue();
+ if (!isDev)
+ return DAG.getNode(X86ISD::MEMBARRIER, dl, MVT::Other, Op.getOperand(0));
+
+ unsigned Op1 = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
+ unsigned Op2 = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue();
+ unsigned Op3 = cast<ConstantSDNode>(Op.getOperand(3))->getZExtValue();
+ unsigned Op4 = cast<ConstantSDNode>(Op.getOperand(4))->getZExtValue();
+
+ // def : Pat<(membarrier (i8 0), (i8 0), (i8 0), (i8 1), (i8 1)), (SFENCE)>;
+ if (!Op1 && !Op2 && !Op3 && Op4)
+ return DAG.getNode(X86ISD::SFENCE, dl, MVT::Other, Op.getOperand(0));
+
+ // def : Pat<(membarrier (i8 1), (i8 0), (i8 0), (i8 0), (i8 1)), (LFENCE)>;
+ if (Op1 && !Op2 && !Op3 && !Op4)
+ return DAG.getNode(X86ISD::LFENCE, dl, MVT::Other, Op.getOperand(0));
+
+ // def : Pat<(membarrier (i8 imm), (i8 imm), (i8 imm), (i8 imm), (i8 1)),
+ // (MFENCE)>;
+ return DAG.getNode(X86ISD::MFENCE, dl, MVT::Other, Op.getOperand(0));
+}
+
SDValue X86TargetLowering::LowerCMP_SWAP(SDValue Op, SelectionDAG &DAG) const {
EVT T = Op.getValueType();
DebugLoc dl = Op.getDebugLoc();
@@ -7597,6 +8396,7 @@ SDValue X86TargetLowering::LowerLOAD_SUB(SDValue Op, SelectionDAG &DAG) const {
SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
switch (Op.getOpcode()) {
default: llvm_unreachable("Should not custom lower this!");
+ case ISD::MEMBARRIER: return LowerMEMBARRIER(Op,DAG);
case ISD::ATOMIC_CMP_SWAP: return LowerCMP_SWAP(Op,DAG);
case ISD::ATOMIC_LOAD_SUB: return LowerLOAD_SUB(Op,DAG);
case ISD::BUILD_VECTOR: return LowerBUILD_VECTOR(Op, DAG);
@@ -7640,6 +8440,7 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::CTLZ: return LowerCTLZ(Op, DAG);
case ISD::CTTZ: return LowerCTTZ(Op, DAG);
case ISD::MUL: return LowerMUL_V2I64(Op, DAG);
+ case ISD::SHL: return LowerSHL(Op, DAG);
case ISD::SADDO:
case ISD::UADDO:
case ISD::SSUBO:
@@ -7852,6 +8653,40 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
case X86ISD::AND: return "X86ISD::AND";
case X86ISD::MUL_IMM: return "X86ISD::MUL_IMM";
case X86ISD::PTEST: return "X86ISD::PTEST";
+ case X86ISD::TESTP: return "X86ISD::TESTP";
+ case X86ISD::PALIGN: return "X86ISD::PALIGN";
+ case X86ISD::PSHUFD: return "X86ISD::PSHUFD";
+ case X86ISD::PSHUFHW: return "X86ISD::PSHUFHW";
+ case X86ISD::PSHUFHW_LD: return "X86ISD::PSHUFHW_LD";
+ case X86ISD::PSHUFLW: return "X86ISD::PSHUFLW";
+ case X86ISD::PSHUFLW_LD: return "X86ISD::PSHUFLW_LD";
+ case X86ISD::SHUFPS: return "X86ISD::SHUFPS";
+ case X86ISD::SHUFPD: return "X86ISD::SHUFPD";
+ case X86ISD::MOVLHPS: return "X86ISD::MOVLHPS";
+ case X86ISD::MOVLHPD: return "X86ISD::MOVLHPD";
+ case X86ISD::MOVHLPS: return "X86ISD::MOVHLPS";
+ case X86ISD::MOVHLPD: return "X86ISD::MOVHLPD";
+ case X86ISD::MOVLPS: return "X86ISD::MOVLPS";
+ case X86ISD::MOVLPD: return "X86ISD::MOVLPD";
+ case X86ISD::MOVDDUP: return "X86ISD::MOVDDUP";
+ case X86ISD::MOVSHDUP: return "X86ISD::MOVSHDUP";
+ case X86ISD::MOVSLDUP: return "X86ISD::MOVSLDUP";
+ case X86ISD::MOVSHDUP_LD: return "X86ISD::MOVSHDUP_LD";
+ case X86ISD::MOVSLDUP_LD: return "X86ISD::MOVSLDUP_LD";
+ case X86ISD::MOVSD: return "X86ISD::MOVSD";
+ case X86ISD::MOVSS: return "X86ISD::MOVSS";
+ case X86ISD::UNPCKLPS: return "X86ISD::UNPCKLPS";
+ case X86ISD::UNPCKLPD: return "X86ISD::UNPCKLPD";
+ case X86ISD::UNPCKHPS: return "X86ISD::UNPCKHPS";
+ case X86ISD::UNPCKHPD: return "X86ISD::UNPCKHPD";
+ case X86ISD::PUNPCKLBW: return "X86ISD::PUNPCKLBW";
+ case X86ISD::PUNPCKLWD: return "X86ISD::PUNPCKLWD";
+ case X86ISD::PUNPCKLDQ: return "X86ISD::PUNPCKLDQ";
+ case X86ISD::PUNPCKLQDQ: return "X86ISD::PUNPCKLQDQ";
+ case X86ISD::PUNPCKHBW: return "X86ISD::PUNPCKHBW";
+ case X86ISD::PUNPCKHWD: return "X86ISD::PUNPCKHWD";
+ case X86ISD::PUNPCKHDQ: return "X86ISD::PUNPCKHDQ";
+ case X86ISD::PUNPCKHQDQ: return "X86ISD::PUNPCKHQDQ";
case X86ISD::VASTART_SAVE_XMM_REGS: return "X86ISD::VASTART_SAVE_XMM_REGS";
case X86ISD::MINGW_ALLOCA: return "X86ISD::MINGW_ALLOCA";
}
@@ -7863,6 +8698,7 @@ bool X86TargetLowering::isLegalAddressingMode(const AddrMode &AM,
const Type *Ty) const {
// X86 supports extremely general addressing modes.
CodeModel::Model M = getTargetMachine().getCodeModel();
+ Reloc::Model R = getTargetMachine().getRelocationModel();
// X86 allows a sign-extended 32-bit immediate field as a displacement.
if (!X86::isOffsetSuitableForCodeModel(AM.BaseOffs, M, AM.BaseGV != NULL))
@@ -7882,7 +8718,8 @@ bool X86TargetLowering::isLegalAddressingMode(const AddrMode &AM,
return false;
// If lower 4G is not available, then we must use rip-relative addressing.
- if (Subtarget->is64Bit() && (AM.BaseOffs || AM.Scale > 1))
+ if ((M != CodeModel::Small || R != Reloc::Static) &&
+ Subtarget->is64Bit() && (AM.BaseOffs || AM.Scale > 1))
return false;
}
@@ -8368,19 +9205,31 @@ X86TargetLowering::EmitAtomicMinMaxWithCustomInserter(MachineInstr *mInstr,
}
// FIXME: When we get size specific XMM0 registers, i.e. XMM0_V16I8
-// all of this code can be replaced with that in the .td file.
+// or XMM0_V32I8 in AVX all of this code can be replaced with that
+// in the .td file.
MachineBasicBlock *
X86TargetLowering::EmitPCMP(MachineInstr *MI, MachineBasicBlock *BB,
unsigned numArgs, bool memArg) const {
+ assert((Subtarget->hasSSE42() || Subtarget->hasAVX()) &&
+ "Target must have SSE4.2 or AVX features enabled");
+
DebugLoc dl = MI->getDebugLoc();
const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
unsigned Opc;
- if (memArg)
- Opc = numArgs == 3 ? X86::PCMPISTRM128rm : X86::PCMPESTRM128rm;
- else
- Opc = numArgs == 3 ? X86::PCMPISTRM128rr : X86::PCMPESTRM128rr;
+
+ if (!Subtarget->hasAVX()) {
+ if (memArg)
+ Opc = numArgs == 3 ? X86::PCMPISTRM128rm : X86::PCMPESTRM128rm;
+ else
+ Opc = numArgs == 3 ? X86::PCMPISTRM128rr : X86::PCMPESTRM128rr;
+ } else {
+ if (memArg)
+ Opc = numArgs == 3 ? X86::VPCMPISTRM128rm : X86::VPCMPESTRM128rm;
+ else
+ Opc = numArgs == 3 ? X86::VPCMPISTRM128rr : X86::VPCMPESTRM128rr;
+ }
MachineInstrBuilder MIB = BuildMI(BB, dl, TII->get(Opc));
@@ -8562,7 +9411,8 @@ X86TargetLowering::EmitLoweredMingwAlloca(MachineInstr *MI,
.addReg(X86::EAX, RegState::Implicit)
.addReg(X86::ESP, RegState::Implicit)
.addReg(X86::EAX, RegState::Define | RegState::Implicit)
- .addReg(X86::ESP, RegState::Define | RegState::Implicit);
+ .addReg(X86::ESP, RegState::Define | RegState::Implicit)
+ .addReg(X86::EFLAGS, RegState::Define | RegState::Implicit);
MI->eraseFromParent(); // The pseudo instruction is gone now.
return BB;
@@ -8579,6 +9429,7 @@ X86TargetLowering::EmitLoweredTLSCall(MachineInstr *MI,
= static_cast<const X86InstrInfo*>(getTargetMachine().getInstrInfo());
DebugLoc DL = MI->getDebugLoc();
MachineFunction *F = BB->getParent();
+ bool IsWin64 = Subtarget->isTargetWin64();
assert(MI->getOperand(3).isGlobal() && "This should be a global");
@@ -8590,7 +9441,7 @@ X86TargetLowering::EmitLoweredTLSCall(MachineInstr *MI,
.addGlobalAddress(MI->getOperand(3).getGlobal(), 0,
MI->getOperand(3).getTargetFlags())
.addReg(0);
- MIB = BuildMI(*BB, MI, DL, TII->get(X86::CALL64m));
+ MIB = BuildMI(*BB, MI, DL, TII->get(IsWin64 ? X86::WINCALL64m : X86::CALL64m));
addDirectMem(MIB, X86::RDI);
} else if (getTargetMachine().getRelocationModel() != Reloc::PIC_) {
MachineInstrBuilder MIB = BuildMI(*BB, MI, DL,
@@ -8727,12 +9578,16 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
}
// String/text processing lowering.
case X86::PCMPISTRM128REG:
+ case X86::VPCMPISTRM128REG:
return EmitPCMP(MI, BB, 3, false /* in-mem */);
case X86::PCMPISTRM128MEM:
+ case X86::VPCMPISTRM128MEM:
return EmitPCMP(MI, BB, 3, true /* in-mem */);
case X86::PCMPESTRM128REG:
+ case X86::VPCMPESTRM128REG:
return EmitPCMP(MI, BB, 5, false /* in mem */);
case X86::PCMPESTRM128MEM:
+ case X86::VPCMPESTRM128MEM:
return EmitPCMP(MI, BB, 5, true /* in mem */);
// Atomic Lowering.
@@ -8966,21 +9821,20 @@ static SDValue PerformShuffleCombine(SDNode *N, SelectionDAG &DAG,
const TargetLowering &TLI) {
DebugLoc dl = N->getDebugLoc();
EVT VT = N->getValueType(0);
- ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
if (VT.getSizeInBits() != 128)
return SDValue();
SmallVector<SDValue, 16> Elts;
for (unsigned i = 0, e = VT.getVectorNumElements(); i != e; ++i)
- Elts.push_back(DAG.getShuffleScalarElt(SVN, i));
-
+ Elts.push_back(getShuffleScalarElt(N, i, DAG, 0));
+
return EltsFromConsecutiveLoads(VT, Elts, dl, DAG);
}
-/// PerformShuffleCombine - Detect vector gather/scatter index generation
-/// and convert it from being a bunch of shuffles and extracts to a simple
-/// store and scalar loads to extract the elements.
+/// PerformEXTRACT_VECTOR_ELTCombine - Detect vector gather/scatter index
+/// generation and convert it from being a bunch of shuffles and extracts
+/// to a simple store and scalar loads to extract the elements.
static SDValue PerformEXTRACT_VECTOR_ELTCombine(SDNode *N, SelectionDAG &DAG,
const TargetLowering &TLI) {
SDValue InputVector = N->getOperand(0);
@@ -9030,8 +9884,8 @@ static SDValue PerformEXTRACT_VECTOR_ELTCombine(SDNode *N, SelectionDAG &DAG,
// Store the value to a temporary stack slot.
SDValue StackPtr = DAG.CreateStackTemporary(InputVector.getValueType());
- SDValue Ch = DAG.getStore(DAG.getEntryNode(), dl, InputVector, StackPtr, NULL, 0,
- false, false, 0);
+ SDValue Ch = DAG.getStore(DAG.getEntryNode(), dl, InputVector, StackPtr, NULL,
+ 0, false, false, 0);
// Replace each use (extract) with a load of the appropriate element.
for (SmallVectorImpl<SDNode *>::iterator UI = Uses.begin(),
@@ -9045,11 +9899,12 @@ static SDValue PerformEXTRACT_VECTOR_ELTCombine(SDNode *N, SelectionDAG &DAG,
uint64_t Offset = EltSize * cast<ConstantSDNode>(Idx)->getZExtValue();
SDValue OffsetVal = DAG.getConstant(Offset, TLI.getPointerTy());
- SDValue ScalarAddr = DAG.getNode(ISD::ADD, dl, Idx.getValueType(), OffsetVal, StackPtr);
+ SDValue ScalarAddr = DAG.getNode(ISD::ADD, dl, Idx.getValueType(),
+ OffsetVal, StackPtr);
// Load the scalar.
- SDValue LoadScalar = DAG.getLoad(Extract->getValueType(0), dl, Ch, ScalarAddr,
- NULL, 0, false, false, 0);
+ SDValue LoadScalar = DAG.getLoad(Extract->getValueType(0), dl, Ch,
+ ScalarAddr, NULL, 0, false, false, 0);
// Replace the exact with the load.
DAG.ReplaceAllUsesOfValueWith(SDValue(Extract, 0), LoadScalar);
@@ -9087,8 +9942,7 @@ static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG,
// Converting this to a min would handle NaNs incorrectly, and swapping
// the operands would cause it to handle comparisons between positive
// and negative zero incorrectly.
- if (!FiniteOnlyFPMath() &&
- (!DAG.isKnownNeverNaN(LHS) || !DAG.isKnownNeverNaN(RHS))) {
+ if (!DAG.isKnownNeverNaN(LHS) || !DAG.isKnownNeverNaN(RHS)) {
if (!UnsafeFPMath &&
!(DAG.isKnownNeverZero(LHS) || DAG.isKnownNeverZero(RHS)))
break;
@@ -9126,8 +9980,7 @@ static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG,
// Converting this to a max would handle NaNs incorrectly, and swapping
// the operands would cause it to handle comparisons between positive
// and negative zero incorrectly.
- if (!FiniteOnlyFPMath() &&
- (!DAG.isKnownNeverNaN(LHS) || !DAG.isKnownNeverNaN(RHS))) {
+ if (!DAG.isKnownNeverNaN(LHS) || !DAG.isKnownNeverNaN(RHS)) {
if (!UnsafeFPMath &&
!(DAG.isKnownNeverZero(LHS) || DAG.isKnownNeverZero(RHS)))
break;
@@ -9156,8 +10009,7 @@ static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG,
// cause it to handle NaNs incorrectly.
if (!UnsafeFPMath &&
!(DAG.isKnownNeverZero(LHS) || DAG.isKnownNeverZero(RHS))) {
- if (!FiniteOnlyFPMath() &&
- (!DAG.isKnownNeverNaN(LHS) || !DAG.isKnownNeverNaN(RHS)))
+ if (!DAG.isKnownNeverNaN(LHS) || !DAG.isKnownNeverNaN(RHS))
break;
std::swap(LHS, RHS);
}
@@ -9182,8 +10034,7 @@ static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG,
case ISD::SETULT:
// Converting this to a max would handle NaNs incorrectly.
- if (!FiniteOnlyFPMath() &&
- (!DAG.isKnownNeverNaN(LHS) || !DAG.isKnownNeverNaN(RHS)))
+ if (!DAG.isKnownNeverNaN(LHS) || !DAG.isKnownNeverNaN(RHS))
break;
Opcode = X86ISD::FMAX;
break;
@@ -9193,8 +10044,7 @@ static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG,
// cause it to handle NaNs incorrectly.
if (!UnsafeFPMath &&
!DAG.isKnownNeverZero(LHS) && !DAG.isKnownNeverZero(RHS)) {
- if (!FiniteOnlyFPMath() &&
- (!DAG.isKnownNeverNaN(LHS) || !DAG.isKnownNeverNaN(RHS)))
+ if (!DAG.isKnownNeverNaN(LHS) || !DAG.isKnownNeverNaN(RHS))
break;
std::swap(LHS, RHS);
}
@@ -9905,7 +10755,6 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
SelectionDAG &DAG = DCI.DAG;
switch (N->getOpcode()) {
default: break;
- case ISD::VECTOR_SHUFFLE: return PerformShuffleCombine(N, DAG, *this);
case ISD::EXTRACT_VECTOR_ELT:
return PerformEXTRACT_VECTOR_ELTCombine(N, DAG, *this);
case ISD::SELECT: return PerformSELECTCombine(N, DAG, Subtarget);
@@ -9922,6 +10771,28 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
case X86ISD::BT: return PerformBTCombine(N, DAG, DCI);
case X86ISD::VZEXT_MOVL: return PerformVZEXT_MOVLCombine(N, DAG);
case ISD::ZERO_EXTEND: return PerformZExtCombine(N, DAG);
+ case X86ISD::SHUFPS: // Handle all target specific shuffles
+ case X86ISD::SHUFPD:
+ case X86ISD::PUNPCKHBW:
+ case X86ISD::PUNPCKHWD:
+ case X86ISD::PUNPCKHDQ:
+ case X86ISD::PUNPCKHQDQ:
+ case X86ISD::UNPCKHPS:
+ case X86ISD::UNPCKHPD:
+ case X86ISD::PUNPCKLBW:
+ case X86ISD::PUNPCKLWD:
+ case X86ISD::PUNPCKLDQ:
+ case X86ISD::PUNPCKLQDQ:
+ case X86ISD::UNPCKLPS:
+ case X86ISD::UNPCKLPD:
+ case X86ISD::MOVHLPS:
+ case X86ISD::MOVLHPS:
+ case X86ISD::PSHUFD:
+ case X86ISD::PSHUFHW:
+ case X86ISD::PSHUFLW:
+ case X86ISD::MOVSS:
+ case X86ISD::MOVSD:
+ case ISD::VECTOR_SHUFFLE: return PerformShuffleCombine(N, DAG, *this);
}
return SDValue();
@@ -9956,14 +10827,6 @@ bool X86TargetLowering::isTypeDesirableForOp(unsigned Opc, EVT VT) const {
}
}
-static bool MayFoldLoad(SDValue Op) {
- return Op.hasOneUse() && ISD::isNormalLoad(Op.getNode());
-}
-
-static bool MayFoldIntoStore(SDValue Op) {
- return Op.hasOneUse() && ISD::isNormalStore(*Op.getNode()->use_begin());
-}
-
/// IsDesirableToPromoteOp - This method query the target whether it is
/// beneficial for dag combiner to promote the specified node. If true, it
/// should return the desired promotion type by reference.
diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h
index 4e4daa4bc5ca..d2d9b28a0396 100644
--- a/lib/Target/X86/X86ISelLowering.h
+++ b/lib/Target/X86/X86ISelLowering.h
@@ -248,6 +248,44 @@ namespace llvm {
// PTEST - Vector bitwise comparisons
PTEST,
+ // TESTP - Vector packed fp sign bitwise comparisons
+ TESTP,
+
+ // Several flavors of instructions with vector shuffle behaviors.
+ PALIGN,
+ PSHUFD,
+ PSHUFHW,
+ PSHUFLW,
+ PSHUFHW_LD,
+ PSHUFLW_LD,
+ SHUFPD,
+ SHUFPS,
+ MOVDDUP,
+ MOVSHDUP,
+ MOVSLDUP,
+ MOVSHDUP_LD,
+ MOVSLDUP_LD,
+ MOVLHPS,
+ MOVLHPD,
+ MOVHLPS,
+ MOVHLPD,
+ MOVLPS,
+ MOVLPD,
+ MOVSD,
+ MOVSS,
+ UNPCKLPS,
+ UNPCKLPD,
+ UNPCKHPS,
+ UNPCKHPD,
+ PUNPCKLBW,
+ PUNPCKLWD,
+ PUNPCKLDQ,
+ PUNPCKLQDQ,
+ PUNPCKHBW,
+ PUNPCKHWD,
+ PUNPCKHDQ,
+ PUNPCKHQDQ,
+
// VASTART_SAVE_XMM_REGS - Save xmm argument registers to the stack,
// according to %al. An operator is needed so that this can be expanded
// with control flow.
@@ -265,7 +303,13 @@ namespace llvm {
ATOMXOR64_DAG,
ATOMAND64_DAG,
ATOMNAND64_DAG,
- ATOMSWAP64_DAG
+ ATOMSWAP64_DAG,
+
+ // Memory barrier
+ MEMBARRIER,
+ MFENCE,
+ SFENCE,
+ LFENCE
// WARNING: Do not add anything in the end unless you want the node to
// have memop! In fact, starting from ATOMADD64_DAG all opcodes will be
@@ -584,12 +628,19 @@ namespace llvm {
/// getFunctionAlignment - Return the Log2 alignment of this function.
virtual unsigned getFunctionAlignment(const Function *F) const;
+ unsigned getRegPressureLimit(const TargetRegisterClass *RC,
+ MachineFunction &MF) const;
+
/// getStackCookieLocation - Return true if the target stores stack
/// protector cookies at a fixed offset in some non-standard address
/// space, and populates the address space and offset as
/// appropriate.
virtual bool getStackCookieLocation(unsigned &AddressSpace, unsigned &Offset) const;
+ protected:
+ std::pair<const TargetRegisterClass*, uint8_t>
+ findRepresentativeClass(EVT VT) const;
+
private:
/// Subtarget - Keep a pointer to the X86Subtarget around so that we can
/// make the right decision when generating code for different targets.
@@ -710,11 +761,16 @@ namespace llvm {
SDValue LowerCTLZ(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerCTTZ(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerMUL_V2I64(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerSHL(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerXALUO(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerCMP_SWAP(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerLOAD_SUB(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerREADCYCLECOUNTER(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerMEMBARRIER(SDValue Op, SelectionDAG &DAG) const;
+
+ // Utility functions to help LowerVECTOR_SHUFFLE
+ SDValue LowerVECTOR_SHUFFLEv8i16(SDValue Op, SelectionDAG &DAG) const;
virtual SDValue
LowerFormalArguments(SDValue Chain,
diff --git a/lib/Target/X86/X86Instr64bit.td b/lib/Target/X86/X86Instr64bit.td
index 42d0e7f9778a..0884b61425e9 100644
--- a/lib/Target/X86/X86Instr64bit.td
+++ b/lib/Target/X86/X86Instr64bit.td
@@ -73,11 +73,7 @@ def GetLo32XForm : SDNodeXForm<imm, [{
return getI32Imm((unsigned)N->getZExtValue());
}]>;
-def i64immSExt32 : PatLeaf<(i64 imm), [{
- // i64immSExt32 predicate - True if the 64-bit immediate fits in a 32-bit
- // sign extended field.
- return (int64_t)N->getZExtValue() == (int32_t)N->getZExtValue();
-}]>;
+def i64immSExt32 : PatLeaf<(i64 imm), [{ return i64immSExt32(N); }]>;
def i64immZExt32 : PatLeaf<(i64 imm), [{
@@ -158,7 +154,7 @@ let isCall = 1 in
// FIXME: We need to teach codegen about single list of call-clobbered
// registers.
-let isCall = 1 in
+let isCall = 1, isCodeGenOnly = 1 in
// All calls clobber the non-callee saved registers. RSP is marked as
// a use to prevent stack-pointer assignments that appear immediately
// before calls from potentially appearing dead. Uses for argument
@@ -168,7 +164,7 @@ let isCall = 1 in
MM0, MM1, MM2, MM3, MM4, MM5, MM6, MM7,
XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, EFLAGS],
Uses = [RSP] in {
- def WINCALL64pcrel32 : I<0xE8, RawFrm,
+ def WINCALL64pcrel32 : Ii32PCRel<0xE8, RawFrm,
(outs), (ins i64i32imm_pcrel:$dst, variable_ops),
"call\t$dst", []>,
Requires<[IsWin64]>;
@@ -182,7 +178,8 @@ let isCall = 1 in
}
-let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in
+let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1,
+ isCodeGenOnly = 1 in
let Defs = [RAX, RCX, RDX, RSI, RDI, R8, R9, R10, R11,
FP0, FP1, FP2, FP3, FP4, FP5, FP6, ST0, ST1,
MM0, MM1, MM2, MM3, MM4, MM5, MM6, MM7,
@@ -216,9 +213,9 @@ let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1 in {
def JMP64pcrel32 : I<0xE9, RawFrm, (outs), (ins brtarget:$dst),
"jmp{q}\t$dst", []>;
def JMP64r : I<0xFF, MRM4r, (outs), (ins GR64:$dst), "jmp{q}\t{*}$dst",
- [(brind GR64:$dst)]>;
+ [(brind GR64:$dst)]>, Requires<[In64BitMode]>;
def JMP64m : I<0xFF, MRM4m, (outs), (ins i64mem:$dst), "jmp{q}\t{*}$dst",
- [(brind (loadi64 addr:$dst))]>;
+ [(brind (loadi64 addr:$dst))]>, Requires<[In64BitMode]>;
def FARJMP64 : RI<0xFF, MRM5m, (outs), (ins opaque80mem:$dst),
"ljmp{q}\t{*}$dst", []>;
}
@@ -246,7 +243,7 @@ def POPCNT64rm : RI<0xB8, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src),
let Defs = [RBP,RSP], Uses = [RBP,RSP], mayLoad = 1, neverHasSideEffects = 1 in
def LEAVE64 : I<0xC9, RawFrm,
- (outs), (ins), "leave", []>;
+ (outs), (ins), "leave", []>, Requires<[In64BitMode]>;
let Defs = [RSP], Uses = [RSP], neverHasSideEffects=1 in {
let mayLoad = 1 in {
def POP64r : I<0x58, AddRegFrm,
@@ -330,7 +327,7 @@ def CMPS64 : RI<0xA7, RawFrm, (outs), (ins), "cmpsq", []>;
// Fast system-call instructions
def SYSEXIT64 : RI<0x35, RawFrm,
- (outs), (ins), "sysexit", []>, TB;
+ (outs), (ins), "sysexit", []>, TB, Requires<[In64BitMode]>;
//===----------------------------------------------------------------------===//
// Move Instructions...
@@ -374,6 +371,7 @@ def MOV64mi32 : RIi32<0xC7, MRM0m, (outs), (ins i64mem:$dst, i64i32imm:$src),
[(store i64immSExt32:$src, addr:$dst)]>;
/// Versions of MOV64rr, MOV64rm, and MOV64mr for i64mem_TC and GR64_TC.
+let isCodeGenOnly = 1 in {
let neverHasSideEffects = 1 in
def MOV64rr_TC : RI<0x89, MRMDestReg, (outs GR64_TC:$dst), (ins GR64_TC:$src),
"mov{q}\t{$src, $dst|$dst, $src}", []>;
@@ -388,7 +386,13 @@ let mayStore = 1 in
def MOV64mr_TC : RI<0x89, MRMDestMem, (outs), (ins i64mem_TC:$dst, GR64_TC:$src),
"mov{q}\t{$src, $dst|$dst, $src}",
[]>;
+}
+// FIXME: These definitions are utterly broken
+// Just leave them commented out for now because they're useless outside
+// of the large code model, and most compilers won't generate the instructions
+// in question.
+/*
def MOV64o8a : RIi8<0xA0, RawFrm, (outs), (ins offset8:$src),
"mov{q}\t{$src, %rax|%rax, $src}", []>;
def MOV64o64a : RIi32<0xA1, RawFrm, (outs), (ins offset64:$src),
@@ -397,6 +401,7 @@ def MOV64ao8 : RIi8<0xA2, RawFrm, (outs offset8:$dst), (ins),
"mov{q}\t{%rax, $dst|$dst, %rax}", []>;
def MOV64ao64 : RIi32<0xA3, RawFrm, (outs offset64:$dst), (ins),
"mov{q}\t{%rax, $dst|$dst, %rax}", []>;
+*/
// Moves to and from segment registers
def MOV64rs : RI<0x8C, MRMDestReg, (outs GR64:$dst), (ins SEGMENT_REG:$src),
@@ -1316,14 +1321,13 @@ def BT64mr : RI<0xA3, MRMDestMem, (outs), (ins i64mem:$src1, GR64:$src2),
[]
>, TB;
-def BT64ri8 : Ii8<0xBA, MRM4r, (outs), (ins GR64:$src1, i64i8imm:$src2),
+def BT64ri8 : RIi8<0xBA, MRM4r, (outs), (ins GR64:$src1, i64i8imm:$src2),
"bt{q}\t{$src2, $src1|$src1, $src2}",
- [(set EFLAGS, (X86bt GR64:$src1, i64immSExt8:$src2))]>, TB,
- REX_W;
+ [(set EFLAGS, (X86bt GR64:$src1, i64immSExt8:$src2))]>, TB;
// Note that these instructions don't need FastBTMem because that
// only applies when the other operand is in a register. When it's
// an immediate, bt is still fast.
-def BT64mi8 : Ii8<0xBA, MRM4m, (outs), (ins i64mem:$src1, i64i8imm:$src2),
+def BT64mi8 : RIi8<0xBA, MRM4m, (outs), (ins i64mem:$src1, i64i8imm:$src2),
"bt{q}\t{$src2, $src1|$src1, $src2}",
[(set EFLAGS, (X86bt (loadi64 addr:$src1),
i64immSExt8:$src2))]>, TB;
@@ -1537,116 +1541,6 @@ def : Pat<(i64 (anyext (i8 (X86setcc_c X86_COND_B, EFLAGS)))),
(SETB_C64r)>;
//===----------------------------------------------------------------------===//
-// Conversion Instructions...
-//
-
-// f64 -> signed i64
-def CVTSD2SI64rr: RSDI<0x2D, MRMSrcReg, (outs GR64:$dst), (ins FR64:$src),
- "cvtsd2si{q}\t{$src, $dst|$dst, $src}", []>;
-def CVTSD2SI64rm: RSDI<0x2D, MRMSrcMem, (outs GR64:$dst), (ins f64mem:$src),
- "cvtsd2si{q}\t{$src, $dst|$dst, $src}", []>;
-def Int_CVTSD2SI64rr: RSDI<0x2D, MRMSrcReg, (outs GR64:$dst), (ins VR128:$src),
- "cvtsd2si{q}\t{$src, $dst|$dst, $src}",
- [(set GR64:$dst,
- (int_x86_sse2_cvtsd2si64 VR128:$src))]>;
-def Int_CVTSD2SI64rm: RSDI<0x2D, MRMSrcMem, (outs GR64:$dst),
- (ins f128mem:$src),
- "cvtsd2si{q}\t{$src, $dst|$dst, $src}",
- [(set GR64:$dst, (int_x86_sse2_cvtsd2si64
- (load addr:$src)))]>;
-def CVTTSD2SI64rr: RSDI<0x2C, MRMSrcReg, (outs GR64:$dst), (ins FR64:$src),
- "cvttsd2si{q}\t{$src, $dst|$dst, $src}",
- [(set GR64:$dst, (fp_to_sint FR64:$src))]>;
-def CVTTSD2SI64rm: RSDI<0x2C, MRMSrcMem, (outs GR64:$dst), (ins f64mem:$src),
- "cvttsd2si{q}\t{$src, $dst|$dst, $src}",
- [(set GR64:$dst, (fp_to_sint (loadf64 addr:$src)))]>;
-def Int_CVTTSD2SI64rr: RSDI<0x2C, MRMSrcReg, (outs GR64:$dst), (ins VR128:$src),
- "cvttsd2si{q}\t{$src, $dst|$dst, $src}",
- [(set GR64:$dst,
- (int_x86_sse2_cvttsd2si64 VR128:$src))]>;
-def Int_CVTTSD2SI64rm: RSDI<0x2C, MRMSrcMem, (outs GR64:$dst),
- (ins f128mem:$src),
- "cvttsd2si{q}\t{$src, $dst|$dst, $src}",
- [(set GR64:$dst,
- (int_x86_sse2_cvttsd2si64
- (load addr:$src)))]>;
-
-// Signed i64 -> f64
-def CVTSI2SD64rr: RSDI<0x2A, MRMSrcReg, (outs FR64:$dst), (ins GR64:$src),
- "cvtsi2sd{q}\t{$src, $dst|$dst, $src}",
- [(set FR64:$dst, (sint_to_fp GR64:$src))]>;
-def CVTSI2SD64rm: RSDI<0x2A, MRMSrcMem, (outs FR64:$dst), (ins i64mem:$src),
- "cvtsi2sd{q}\t{$src, $dst|$dst, $src}",
- [(set FR64:$dst, (sint_to_fp (loadi64 addr:$src)))]>;
-
-let Constraints = "$src1 = $dst" in {
-def Int_CVTSI2SD64rr: RSDI<0x2A, MRMSrcReg,
- (outs VR128:$dst), (ins VR128:$src1, GR64:$src2),
- "cvtsi2sd{q}\t{$src2, $dst|$dst, $src2}",
- [(set VR128:$dst,
- (int_x86_sse2_cvtsi642sd VR128:$src1,
- GR64:$src2))]>;
-def Int_CVTSI2SD64rm: RSDI<0x2A, MRMSrcMem,
- (outs VR128:$dst), (ins VR128:$src1, i64mem:$src2),
- "cvtsi2sd{q}\t{$src2, $dst|$dst, $src2}",
- [(set VR128:$dst,
- (int_x86_sse2_cvtsi642sd VR128:$src1,
- (loadi64 addr:$src2)))]>;
-} // Constraints = "$src1 = $dst"
-
-// Signed i64 -> f32
-def CVTSI2SS64rr: RSSI<0x2A, MRMSrcReg, (outs FR32:$dst), (ins GR64:$src),
- "cvtsi2ss{q}\t{$src, $dst|$dst, $src}",
- [(set FR32:$dst, (sint_to_fp GR64:$src))]>;
-def CVTSI2SS64rm: RSSI<0x2A, MRMSrcMem, (outs FR32:$dst), (ins i64mem:$src),
- "cvtsi2ss{q}\t{$src, $dst|$dst, $src}",
- [(set FR32:$dst, (sint_to_fp (loadi64 addr:$src)))]>;
-
-let Constraints = "$src1 = $dst" in {
- def Int_CVTSI2SS64rr : RSSI<0x2A, MRMSrcReg,
- (outs VR128:$dst), (ins VR128:$src1, GR64:$src2),
- "cvtsi2ss{q}\t{$src2, $dst|$dst, $src2}",
- [(set VR128:$dst,
- (int_x86_sse_cvtsi642ss VR128:$src1,
- GR64:$src2))]>;
- def Int_CVTSI2SS64rm : RSSI<0x2A, MRMSrcMem,
- (outs VR128:$dst),
- (ins VR128:$src1, i64mem:$src2),
- "cvtsi2ss{q}\t{$src2, $dst|$dst, $src2}",
- [(set VR128:$dst,
- (int_x86_sse_cvtsi642ss VR128:$src1,
- (loadi64 addr:$src2)))]>;
-} // Constraints = "$src1 = $dst"
-
-// f32 -> signed i64
-def CVTSS2SI64rr: RSSI<0x2D, MRMSrcReg, (outs GR64:$dst), (ins FR32:$src),
- "cvtss2si{q}\t{$src, $dst|$dst, $src}", []>;
-def CVTSS2SI64rm: RSSI<0x2D, MRMSrcMem, (outs GR64:$dst), (ins f32mem:$src),
- "cvtss2si{q}\t{$src, $dst|$dst, $src}", []>;
-def Int_CVTSS2SI64rr: RSSI<0x2D, MRMSrcReg, (outs GR64:$dst), (ins VR128:$src),
- "cvtss2si{q}\t{$src, $dst|$dst, $src}",
- [(set GR64:$dst,
- (int_x86_sse_cvtss2si64 VR128:$src))]>;
-def Int_CVTSS2SI64rm: RSSI<0x2D, MRMSrcMem, (outs GR64:$dst), (ins f32mem:$src),
- "cvtss2si{q}\t{$src, $dst|$dst, $src}",
- [(set GR64:$dst, (int_x86_sse_cvtss2si64
- (load addr:$src)))]>;
-def CVTTSS2SI64rr: RSSI<0x2C, MRMSrcReg, (outs GR64:$dst), (ins FR32:$src),
- "cvttss2si{q}\t{$src, $dst|$dst, $src}",
- [(set GR64:$dst, (fp_to_sint FR32:$src))]>;
-def CVTTSS2SI64rm: RSSI<0x2C, MRMSrcMem, (outs GR64:$dst), (ins f32mem:$src),
- "cvttss2si{q}\t{$src, $dst|$dst, $src}",
- [(set GR64:$dst, (fp_to_sint (loadf32 addr:$src)))]>;
-def Int_CVTTSS2SI64rr: RSSI<0x2C, MRMSrcReg, (outs GR64:$dst), (ins VR128:$src),
- "cvttss2si{q}\t{$src, $dst|$dst, $src}",
- [(set GR64:$dst,
- (int_x86_sse_cvttss2si64 VR128:$src))]>;
-def Int_CVTTSS2SI64rm: RSSI<0x2C, MRMSrcMem, (outs GR64:$dst),
- (ins f32mem:$src),
- "cvttss2si{q}\t{$src, $dst|$dst, $src}",
- [(set GR64:$dst,
- (int_x86_sse_cvttss2si64 (load addr:$src)))]>;
-
// Descriptor-table support instructions
// LLDT is not interpreted specially in 64-bit mode because there is no sign
@@ -1726,6 +1620,14 @@ def MOV64FSrm : RI<0x8B, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src),
// Atomic Instructions
//===----------------------------------------------------------------------===//
+// TODO: Get this to fold the constant into the instruction.
+let hasSideEffects = 1, Defs = [ESP] in
+def Int_MemBarrierNoSSE64 : RI<0x09, MRM1r, (outs), (ins GR64:$zero),
+ "lock\n\t"
+ "or{q}\t{$zero, (%rsp)|(%rsp), $zero}",
+ [(X86MemBarrierNoSSE GR64:$zero)]>,
+ Requires<[In64BitMode]>, LOCK;
+
let Defs = [RAX, EFLAGS], Uses = [RAX] in {
def LCMPXCHG64 : RI<0xB1, MRMDestMem, (outs), (ins i64mem:$ptr, GR64:$swap),
"lock\n\t"
@@ -1772,7 +1674,7 @@ def XCHG64ar : RI<0x90, AddRegFrm, (outs), (ins GR64:$src),
// Optimized codegen when the non-memory output is not used.
let Defs = [EFLAGS], mayLoad = 1, mayStore = 1 in {
// FIXME: Use normal add / sub instructions and add lock prefix dynamically.
-def LOCK_ADD64mr : RI<0x03, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src2),
+def LOCK_ADD64mr : RI<0x01, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src2),
"lock\n\t"
"add{q}\t{$src2, $dst|$dst, $src2}", []>, LOCK;
def LOCK_ADD64mi8 : RIi8<0x83, MRM0m, (outs),
diff --git a/lib/Target/X86/X86InstrFMA.td b/lib/Target/X86/X86InstrFMA.td
new file mode 100644
index 000000000000..d868773d2d69
--- /dev/null
+++ b/lib/Target/X86/X86InstrFMA.td
@@ -0,0 +1,60 @@
+//====- X86InstrFMA.td - Describe the X86 Instruction Set --*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes FMA (Fused Multiply-Add) instructions.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// FMA3 - Intel 3 operand Fused Multiply-Add instructions
+//===----------------------------------------------------------------------===//
+
+multiclass fma_rm<bits<8> opc, string OpcodeStr> {
+ def r : FMA3<opc, MRMSrcReg, (outs VR128:$dst),
+ (ins VR128:$src1, VR128:$src2),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ []>;
+ def m : FMA3<opc, MRMSrcMem, (outs VR128:$dst),
+ (ins VR128:$src1, f128mem:$src2),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ []>;
+ def rY : FMA3<opc, MRMSrcReg, (outs VR256:$dst),
+ (ins VR256:$src1, VR256:$src2),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ []>;
+ def mY : FMA3<opc, MRMSrcMem, (outs VR256:$dst),
+ (ins VR256:$src1, f256mem:$src2),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ []>;
+}
+
+multiclass fma_forms<bits<8> opc132, bits<8> opc213, bits<8> opc231,
+ string OpcodeStr, string PackTy> {
+ defm r132 : fma_rm<opc132, !strconcat(OpcodeStr, !strconcat("132", PackTy))>;
+ defm r213 : fma_rm<opc213, !strconcat(OpcodeStr, !strconcat("213", PackTy))>;
+ defm r231 : fma_rm<opc231, !strconcat(OpcodeStr, !strconcat("231", PackTy))>;
+}
+
+let isAsmParserOnly = 1 in {
+ // Fused Multiply-Add
+ defm VFMADDPS : fma_forms<0x98, 0xA8, 0xB8, "vfmadd", "ps">;
+ defm VFMADDPD : fma_forms<0x98, 0xA8, 0xB8, "vfmadd", "pd">, VEX_W;
+ defm VFMADDSUBPS : fma_forms<0x96, 0xA6, 0xB6, "vfmaddsub", "ps">;
+ defm VFMADDSUBPD : fma_forms<0x96, 0xA6, 0xB6, "vfmaddsub", "pd">, VEX_W;
+ defm VFMSUBADDPS : fma_forms<0x97, 0xA7, 0xB7, "vfmsubadd", "ps">;
+ defm VFMSUBADDPD : fma_forms<0x97, 0xA7, 0xB7, "vfmsubadd", "pd">, VEX_W;
+ defm VFMSUBPS : fma_forms<0x9A, 0xAA, 0xBA, "vfmsub", "ps">;
+ defm VFMSUBPD : fma_forms<0x9A, 0xAA, 0xBA, "vfmsub", "pd">, VEX_W;
+
+ // Fused Negative Multiply-Add
+ defm VFNMADDPS : fma_forms<0x9C, 0xAC, 0xBC, "vfnmadd", "ps">;
+ defm VFNMADDPD : fma_forms<0x9C, 0xAC, 0xBC, "vfnmadd", "pd">, VEX_W;
+ defm VFNMSUBPS : fma_forms<0x9E, 0xAE, 0xBE, "vfnmsub", "ps">;
+ defm VFNMSUBPD : fma_forms<0x9E, 0xAE, 0xBE, "vfnmsub", "pd">, VEX_W;
+}
diff --git a/lib/Target/X86/X86InstrFPStack.td b/lib/Target/X86/X86InstrFPStack.td
index da93de988d50..9c9bcc7d0b6a 100644
--- a/lib/Target/X86/X86InstrFPStack.td
+++ b/lib/Target/X86/X86InstrFPStack.td
@@ -108,10 +108,6 @@ let usesCustomInserter = 1 in { // Expanded after instruction selection.
[(X86fp_to_i64mem RFP80:$src, addr:$dst)]>;
}
-let isTerminator = 1 in
- let Defs = [FP0, FP1, FP2, FP3, FP4, FP5, FP6] in
- def FP_REG_KILL : I<0, Pseudo, (outs), (ins), "##FP_REG_KILL", []>;
-
// All FP Stack operations are represented with four instructions here. The
// first three instructions, generated by the instruction selector, use "RFP32"
// "RFP64" or "RFP80" registers: traditional register files to reference 32-bit,
@@ -157,7 +153,7 @@ def FpSET_ST1_64 : FpI_<(outs), (ins RFP64:$src), SpecialFP, []>; // ST(1) = FPR
def FpSET_ST1_80 : FpI_<(outs), (ins RFP80:$src), SpecialFP, []>; // ST(1) = FPR
}
-// FpIf32, FpIf64 - Floating Point Psuedo Instruction template.
+// FpIf32, FpIf64 - Floating Point Pseudo Instruction template.
// f32 instructions can use SSE1 and are predicated on FPStackf32 == !SSE1.
// f64 instructions can use SSE2 and are predicated on FPStackf64 == !SSE2.
// f80 instructions cannot use SSE and use neither of these.
diff --git a/lib/Target/X86/X86InstrFormats.td b/lib/Target/X86/X86InstrFormats.td
index cc3fdf1efd7b..79187e9a76d7 100644
--- a/lib/Target/X86/X86InstrFormats.td
+++ b/lib/Target/X86/X86InstrFormats.td
@@ -39,6 +39,7 @@ def MRM_E8 : Format<39>;
def MRM_F0 : Format<40>;
def MRM_F8 : Format<41>;
def MRM_F9 : Format<42>;
+def RawFrmImm16 : Format<43>;
// ImmType - This specifies the immediate type used by an instruction. This is
// part of the ad-hoc solution used to emit machine instruction encodings by our
@@ -210,7 +211,7 @@ class Ii32PCRel<bits<8> o, Format f, dag outs, dag ins, string asm,
class FPI<bits<8> o, Format F, dag outs, dag ins, string asm>
: I<o, F, outs, ins, asm, []> {}
-// FpI_ - Floating Point Psuedo Instruction template. Not Predicated.
+// FpI_ - Floating Point Pseudo Instruction template. Not Predicated.
class FpI_<dag outs, dag ins, FPFormat fp, list<dag> pattern>
: X86Inst<0, Pseudo, NoImm, outs, ins, ""> {
let FPForm = fp;
@@ -224,13 +225,13 @@ class FpI_<dag outs, dag ins, FPFormat fp, list<dag> pattern>
// Iseg32 - 16-bit segment selector, 32-bit offset
class Iseg16 <bits<8> o, Format f, dag outs, dag ins, string asm,
- list<dag> pattern> : X86Inst<o, f, NoImm, outs, ins, asm> {
+ list<dag> pattern> : X86Inst<o, f, Imm16, outs, ins, asm> {
let Pattern = pattern;
let CodeSize = 3;
}
class Iseg32 <bits<8> o, Format f, dag outs, dag ins, string asm,
- list<dag> pattern> : X86Inst<o, f, NoImm, outs, ins, asm> {
+ list<dag> pattern> : X86Inst<o, f, Imm32, outs, ins, asm> {
let Pattern = pattern;
let CodeSize = 3;
}
@@ -411,6 +412,20 @@ class SS42AI<bits<8> o, Format F, dag outs, dag ins, string asm,
: Ii8<o, F, outs, ins, asm, pattern, SSEPackedInt>, TA,
Requires<[HasSSE42]>;
+// AVX Instruction Templates:
+// Instructions introduced in AVX (no SSE equivalent forms)
+//
+// AVX8I - AVX instructions with T8 and OpSize prefix.
+// AVXAIi8 - AVX instructions with TA, OpSize prefix and ImmT = Imm8.
+class AVX8I<bits<8> o, Format F, dag outs, dag ins, string asm,
+ list<dag> pattern>
+ : I<o, F, outs, ins, asm, pattern, SSEPackedInt>, T8, OpSize,
+ Requires<[HasAVX]>;
+class AVXAIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
+ list<dag> pattern>
+ : Ii8<o, F, outs, ins, asm, pattern, SSEPackedInt>, TA, OpSize,
+ Requires<[HasAVX]>;
+
// AES Instruction Templates:
//
// AES8I
@@ -425,6 +440,18 @@ class AESAI<bits<8> o, Format F, dag outs, dag ins, string asm,
: Ii8<o, F, outs, ins, asm, pattern, SSEPackedInt>, TA,
Requires<[HasAES]>;
+// CLMUL Instruction Templates
+class CLMULIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
+ list<dag>pattern>
+ : Ii8<o, F, outs, ins, asm, pattern, SSEPackedInt>, TA,
+ OpSize, VEX_4V, Requires<[HasAVX, HasCLMUL]>;
+
+// FMA3 Instruction Templates
+class FMA3<bits<8> o, Format F, dag outs, dag ins, string asm,
+ list<dag>pattern>
+ : I<o, F, outs, ins, asm, pattern, SSEPackedInt>, T8,
+ OpSize, VEX_4V, Requires<[HasFMA3]>;
+
// X86-64 Instruction templates...
//
diff --git a/lib/Target/X86/X86InstrFragmentsSIMD.td b/lib/Target/X86/X86InstrFragmentsSIMD.td
index 71c4e8bc147f..01149b699213 100644
--- a/lib/Target/X86/X86InstrFragmentsSIMD.td
+++ b/lib/Target/X86/X86InstrFragmentsSIMD.td
@@ -117,9 +117,67 @@ def X86pcmpgtd : SDNode<"X86ISD::PCMPGTD", SDTIntBinOp>;
def X86pcmpgtq : SDNode<"X86ISD::PCMPGTQ", SDTIntBinOp>;
def SDTX86CmpPTest : SDTypeProfile<1, 2, [SDTCisVT<0, i32>,
- SDTCisVT<1, v4f32>,
- SDTCisVT<2, v4f32>]>;
+ SDTCisVec<1>,
+ SDTCisSameAs<2, 1>]>;
def X86ptest : SDNode<"X86ISD::PTEST", SDTX86CmpPTest>;
+def X86testp : SDNode<"X86ISD::TESTP", SDTX86CmpPTest>;
+
+// Specific shuffle nodes - At some point ISD::VECTOR_SHUFFLE will always get
+// translated into one of the target nodes below during lowering.
+// Note: this is a work in progress...
+def SDTShuff1Op : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisSameAs<0,1>]>;
+def SDTShuff2Op : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0,1>,
+ SDTCisSameAs<0,2>]>;
+
+def SDTShuff2OpI : SDTypeProfile<1, 2, [SDTCisVec<0>,
+ SDTCisSameAs<0,1>, SDTCisInt<2>]>;
+def SDTShuff3OpI : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0,1>,
+ SDTCisSameAs<0,2>, SDTCisInt<3>]>;
+
+def SDTShuff2OpLdI : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisPtrTy<1>,
+ SDTCisInt<2>]>;
+
+def X86PAlign : SDNode<"X86ISD::PALIGN", SDTShuff3OpI>;
+
+def X86PShufd : SDNode<"X86ISD::PSHUFD", SDTShuff2OpI>;
+def X86PShufhw : SDNode<"X86ISD::PSHUFHW", SDTShuff2OpI>;
+def X86PShuflw : SDNode<"X86ISD::PSHUFLW", SDTShuff2OpI>;
+
+def X86PShufhwLd : SDNode<"X86ISD::PSHUFHW_LD", SDTShuff2OpLdI>;
+def X86PShuflwLd : SDNode<"X86ISD::PSHUFLW_LD", SDTShuff2OpLdI>;
+
+def X86Shufpd : SDNode<"X86ISD::SHUFPD", SDTShuff3OpI>;
+def X86Shufps : SDNode<"X86ISD::SHUFPS", SDTShuff3OpI>;
+
+def X86Movddup : SDNode<"X86ISD::MOVDDUP", SDTShuff1Op>;
+def X86Movshdup : SDNode<"X86ISD::MOVSHDUP", SDTShuff1Op>;
+def X86Movsldup : SDNode<"X86ISD::MOVSLDUP", SDTShuff1Op>;
+
+def X86Movsd : SDNode<"X86ISD::MOVSD", SDTShuff2Op>;
+def X86Movss : SDNode<"X86ISD::MOVSS", SDTShuff2Op>;
+
+def X86Movlhps : SDNode<"X86ISD::MOVLHPS", SDTShuff2Op>;
+def X86Movlhpd : SDNode<"X86ISD::MOVLHPD", SDTShuff2Op>;
+def X86Movhlps : SDNode<"X86ISD::MOVHLPS", SDTShuff2Op>;
+def X86Movhlpd : SDNode<"X86ISD::MOVHLPD", SDTShuff2Op>;
+
+def X86Movlps : SDNode<"X86ISD::MOVLPS", SDTShuff2Op>;
+def X86Movlpd : SDNode<"X86ISD::MOVLPD", SDTShuff2Op>;
+
+def X86Unpcklps : SDNode<"X86ISD::UNPCKLPS", SDTShuff2Op>;
+def X86Unpcklpd : SDNode<"X86ISD::UNPCKLPD", SDTShuff2Op>;
+def X86Unpckhps : SDNode<"X86ISD::UNPCKHPS", SDTShuff2Op>;
+def X86Unpckhpd : SDNode<"X86ISD::UNPCKHPD", SDTShuff2Op>;
+
+def X86Punpcklbw : SDNode<"X86ISD::PUNPCKLBW", SDTShuff2Op>;
+def X86Punpcklwd : SDNode<"X86ISD::PUNPCKLWD", SDTShuff2Op>;
+def X86Punpckldq : SDNode<"X86ISD::PUNPCKLDQ", SDTShuff2Op>;
+def X86Punpcklqdq : SDNode<"X86ISD::PUNPCKLQDQ", SDTShuff2Op>;
+
+def X86Punpckhbw : SDNode<"X86ISD::PUNPCKHBW", SDTShuff2Op>;
+def X86Punpckhwd : SDNode<"X86ISD::PUNPCKHWD", SDTShuff2Op>;
+def X86Punpckhdq : SDNode<"X86ISD::PUNPCKHDQ", SDTShuff2Op>;
+def X86Punpckhqdq : SDNode<"X86ISD::PUNPCKHQDQ", SDTShuff2Op>;
//===----------------------------------------------------------------------===//
// SSE Complex Patterns
@@ -148,12 +206,13 @@ def sdmem : Operand<v2f64> {
// SSE pattern fragments
//===----------------------------------------------------------------------===//
+// 128-bit load pattern fragments
def loadv4f32 : PatFrag<(ops node:$ptr), (v4f32 (load node:$ptr))>;
def loadv2f64 : PatFrag<(ops node:$ptr), (v2f64 (load node:$ptr))>;
def loadv4i32 : PatFrag<(ops node:$ptr), (v4i32 (load node:$ptr))>;
def loadv2i64 : PatFrag<(ops node:$ptr), (v2i64 (load node:$ptr))>;
-// FIXME: move this to a more appropriate place after all AVX is done.
+// 256-bit load pattern fragments
def loadv8f32 : PatFrag<(ops node:$ptr), (v8f32 (load node:$ptr))>;
def loadv4f64 : PatFrag<(ops node:$ptr), (v4f64 (load node:$ptr))>;
def loadv8i32 : PatFrag<(ops node:$ptr), (v8i32 (load node:$ptr))>;
@@ -174,6 +233,8 @@ def alignedloadfsf32 : PatFrag<(ops node:$ptr),
(f32 (alignedload node:$ptr))>;
def alignedloadfsf64 : PatFrag<(ops node:$ptr),
(f64 (alignedload node:$ptr))>;
+
+// 128-bit aligned load pattern fragments
def alignedloadv4f32 : PatFrag<(ops node:$ptr),
(v4f32 (alignedload node:$ptr))>;
def alignedloadv2f64 : PatFrag<(ops node:$ptr),
@@ -183,7 +244,7 @@ def alignedloadv4i32 : PatFrag<(ops node:$ptr),
def alignedloadv2i64 : PatFrag<(ops node:$ptr),
(v2i64 (alignedload node:$ptr))>;
-// FIXME: move this to a more appropriate place after all AVX is done.
+// 256-bit aligned load pattern fragments
def alignedloadv8f32 : PatFrag<(ops node:$ptr),
(v8f32 (alignedload node:$ptr))>;
def alignedloadv4f64 : PatFrag<(ops node:$ptr),
@@ -206,15 +267,20 @@ def memop : PatFrag<(ops node:$ptr), (load node:$ptr), [{
def memopfsf32 : PatFrag<(ops node:$ptr), (f32 (memop node:$ptr))>;
def memopfsf64 : PatFrag<(ops node:$ptr), (f64 (memop node:$ptr))>;
+
+// 128-bit memop pattern fragments
def memopv4f32 : PatFrag<(ops node:$ptr), (v4f32 (memop node:$ptr))>;
def memopv2f64 : PatFrag<(ops node:$ptr), (v2f64 (memop node:$ptr))>;
def memopv4i32 : PatFrag<(ops node:$ptr), (v4i32 (memop node:$ptr))>;
def memopv2i64 : PatFrag<(ops node:$ptr), (v2i64 (memop node:$ptr))>;
def memopv16i8 : PatFrag<(ops node:$ptr), (v16i8 (memop node:$ptr))>;
-// FIXME: move this to a more appropriate place after all AVX is done.
+// 256-bit memop pattern fragments
+def memopv32i8 : PatFrag<(ops node:$ptr), (v32i8 (memop node:$ptr))>;
def memopv8f32 : PatFrag<(ops node:$ptr), (v8f32 (memop node:$ptr))>;
def memopv4f64 : PatFrag<(ops node:$ptr), (v4f64 (memop node:$ptr))>;
+def memopv4i64 : PatFrag<(ops node:$ptr), (v4i64 (memop node:$ptr))>;
+def memopv8i32 : PatFrag<(ops node:$ptr), (v8i32 (memop node:$ptr))>;
// SSSE3 uses MMX registers for some instructions. They aren't aligned on a
// 16-byte boundary.
@@ -254,6 +320,7 @@ def unalignednontemporalstore : PatFrag<(ops node:$val, node:$ptr),
return false;
}]>;
+// 128-bit bitconvert pattern fragments
def bc_v4f32 : PatFrag<(ops node:$in), (v4f32 (bitconvert node:$in))>;
def bc_v2f64 : PatFrag<(ops node:$in), (v2f64 (bitconvert node:$in))>;
def bc_v16i8 : PatFrag<(ops node:$in), (v16i8 (bitconvert node:$in))>;
@@ -261,6 +328,9 @@ def bc_v8i16 : PatFrag<(ops node:$in), (v8i16 (bitconvert node:$in))>;
def bc_v4i32 : PatFrag<(ops node:$in), (v4i32 (bitconvert node:$in))>;
def bc_v2i64 : PatFrag<(ops node:$in), (v2i64 (bitconvert node:$in))>;
+// 256-bit bitconvert pattern fragments
+def bc_v8i32 : PatFrag<(ops node:$in), (v8i32 (bitconvert node:$in))>;
+
def vzmovl_v2i64 : PatFrag<(ops node:$src),
(bitconvert (v2i64 (X86vzmovl
(v2i64 (scalar_to_vector (loadi64 node:$src))))))>;
diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp
index ce471eadd78b..5280940cf437 100644
--- a/lib/Target/X86/X86InstrInfo.cpp
+++ b/lib/Target/X86/X86InstrInfo.cpp
@@ -235,6 +235,7 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
{ X86::BT64ri8, X86::BT64mi8, 1, 0 },
{ X86::CALL32r, X86::CALL32m, 1, 0 },
{ X86::CALL64r, X86::CALL64m, 1, 0 },
+ { X86::WINCALL64r, X86::WINCALL64m, 1, 0 },
{ X86::CMP16ri, X86::CMP16mi, 1, 0 },
{ X86::CMP16ri8, X86::CMP16mi8, 1, 0 },
{ X86::CMP16rr, X86::CMP16mr, 1, 0 },
@@ -667,46 +668,6 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
assert(AmbEntries.empty() && "Duplicated entries in unfolding maps?");
}
-bool X86InstrInfo::isMoveInstr(const MachineInstr& MI,
- unsigned &SrcReg, unsigned &DstReg,
- unsigned &SrcSubIdx, unsigned &DstSubIdx) const {
- switch (MI.getOpcode()) {
- default:
- return false;
- case X86::MOV8rr:
- case X86::MOV8rr_NOREX:
- case X86::MOV16rr:
- case X86::MOV32rr:
- case X86::MOV64rr:
- case X86::MOV32rr_TC:
- case X86::MOV64rr_TC:
-
- // FP Stack register class copies
- case X86::MOV_Fp3232: case X86::MOV_Fp6464: case X86::MOV_Fp8080:
- case X86::MOV_Fp3264: case X86::MOV_Fp3280:
- case X86::MOV_Fp6432: case X86::MOV_Fp8032:
-
- // Note that MOVSSrr and MOVSDrr are not considered copies. FR32 and FR64
- // copies are done with FsMOVAPSrr and FsMOVAPDrr.
-
- case X86::FsMOVAPSrr:
- case X86::FsMOVAPDrr:
- case X86::MOVAPSrr:
- case X86::MOVAPDrr:
- case X86::MOVDQArr:
- case X86::MMX_MOVQ64rr:
- assert(MI.getNumOperands() >= 2 &&
- MI.getOperand(0).isReg() &&
- MI.getOperand(1).isReg() &&
- "invalid register-register move instruction");
- SrcReg = MI.getOperand(1).getReg();
- DstReg = MI.getOperand(0).getReg();
- SrcSubIdx = MI.getOperand(1).getSubReg();
- DstSubIdx = MI.getOperand(0).getSubReg();
- return true;
- }
-}
-
bool
X86InstrInfo::isCoalescableExtInstr(const MachineInstr &MI,
unsigned &SrcReg, unsigned &DstReg,
@@ -827,7 +788,7 @@ static bool isFrameStoreOpcode(int Opcode) {
unsigned X86InstrInfo::isLoadFromStackSlot(const MachineInstr *MI,
int &FrameIndex) const {
if (isFrameLoadOpcode(MI->getOpcode()))
- if (isFrameOperand(MI, 1, FrameIndex))
+ if (MI->getOperand(0).getSubReg() == 0 && isFrameOperand(MI, 1, FrameIndex))
return MI->getOperand(0).getReg();
return 0;
}
@@ -866,7 +827,8 @@ bool X86InstrInfo::hasLoadFromStackSlot(const MachineInstr *MI,
unsigned X86InstrInfo::isStoreToStackSlot(const MachineInstr *MI,
int &FrameIndex) const {
if (isFrameStoreOpcode(MI->getOpcode()))
- if (isFrameOperand(MI, 0, FrameIndex))
+ if (MI->getOperand(X86::AddrNumOperands).getSubReg() == 0 &&
+ isFrameOperand(MI, 0, FrameIndex))
return MI->getOperand(X86::AddrNumOperands).getReg();
return 0;
}
@@ -1664,14 +1626,6 @@ bool X86InstrInfo::isUnpredicatedTerminator(const MachineInstr *MI) const {
return !isPredicated(MI);
}
-// For purposes of branch analysis do not count FP_REG_KILL as a terminator.
-static bool isBrAnalysisUnpredicatedTerminator(const MachineInstr *MI,
- const X86InstrInfo &TII) {
- if (MI->getOpcode() == X86::FP_REG_KILL)
- return false;
- return TII.isUnpredicatedTerminator(MI);
-}
-
bool X86InstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
MachineBasicBlock *&TBB,
MachineBasicBlock *&FBB,
@@ -1688,7 +1642,7 @@ bool X86InstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
// Working from the bottom, when we see a non-terminator instruction, we're
// done.
- if (!isBrAnalysisUnpredicatedTerminator(I, *this))
+ if (!isUnpredicatedTerminator(I))
break;
// A terminator that isn't a branch can't easily be handled by this
@@ -1891,6 +1845,33 @@ static bool isHReg(unsigned Reg) {
return X86::GR8_ABCD_HRegClass.contains(Reg);
}
+// Try and copy between VR128/VR64 and GR64 registers.
+static unsigned CopyToFromAsymmetricReg(unsigned DestReg, unsigned SrcReg) {
+ // SrcReg(VR128) -> DestReg(GR64)
+ // SrcReg(VR64) -> DestReg(GR64)
+ // SrcReg(GR64) -> DestReg(VR128)
+ // SrcReg(GR64) -> DestReg(VR64)
+
+ if (X86::GR64RegClass.contains(DestReg)) {
+ if (X86::VR128RegClass.contains(SrcReg)) {
+ // Copy from a VR128 register to a GR64 register.
+ return X86::MOVPQIto64rr;
+ } else if (X86::VR64RegClass.contains(SrcReg)) {
+ // Copy from a VR64 register to a GR64 register.
+ return X86::MOVSDto64rr;
+ }
+ } else if (X86::GR64RegClass.contains(SrcReg)) {
+ // Copy from a GR64 register to a VR128 register.
+ if (X86::VR128RegClass.contains(DestReg))
+ return X86::MOV64toPQIrr;
+ // Copy from a GR64 register to a VR64 register.
+ else if (X86::VR64RegClass.contains(DestReg))
+ return X86::MOV64toSDrr;
+ }
+
+ return 0;
+}
+
void X86InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI, DebugLoc DL,
unsigned DestReg, unsigned SrcReg,
@@ -1915,6 +1896,8 @@ void X86InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
Opc = X86::MOVAPSrr;
else if (X86::VR64RegClass.contains(DestReg, SrcReg))
Opc = X86::MMX_MOVQ64rr;
+ else
+ Opc = CopyToFromAsymmetricReg(DestReg, SrcReg);
if (Opc) {
BuildMI(MBB, MI, DL, get(Opc), DestReg)
@@ -2046,6 +2029,8 @@ void X86InstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
const TargetRegisterClass *RC,
const TargetRegisterInfo *TRI) const {
const MachineFunction &MF = *MBB.getParent();
+ assert(MF.getFrameInfo()->getObjectSize(FrameIdx) >= RC->getSize() &&
+ "Stack slot too small for store");
bool isAligned = (RI.getStackAlignment() >= 16) || RI.canRealignStack(MF);
unsigned Opc = getStoreRegOpcode(SrcReg, RC, isAligned, TM);
DebugLoc DL = MBB.findDebugLoc(MI);
@@ -2130,8 +2115,9 @@ bool X86InstrInfo::spillCalleeSavedRegisters(MachineBasicBlock &MBB,
CalleeFrameSize += SlotSize;
BuildMI(MBB, MI, DL, get(Opc)).addReg(Reg, RegState::Kill);
} else {
+ const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
storeRegToStackSlot(MBB, MI, Reg, true, CSI[i-1].getFrameIdx(),
- &X86::VR128RegClass, &RI);
+ RC, &RI);
}
}
@@ -2161,8 +2147,9 @@ bool X86InstrInfo::restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
if (!X86::VR128RegClass.contains(Reg) && !isWin64) {
BuildMI(MBB, MI, DL, get(Opc), Reg);
} else {
+ const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
loadRegFromStackSlot(MBB, MI, Reg, CSI[i].getFrameIdx(),
- &X86::VR128RegClass, &RI);
+ RC, &RI);
}
}
return true;
@@ -2423,10 +2410,17 @@ MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
Alignment = (*LoadMI->memoperands_begin())->getAlignment();
else
switch (LoadMI->getOpcode()) {
+ case X86::AVX_SET0PSY:
+ case X86::AVX_SET0PDY:
+ Alignment = 32;
+ break;
case X86::V_SET0PS:
case X86::V_SET0PD:
case X86::V_SET0PI:
case X86::V_SETALLONES:
+ case X86::AVX_SET0PS:
+ case X86::AVX_SET0PD:
+ case X86::AVX_SET0PI:
Alignment = 16;
break;
case X86::FsFLD0SD:
@@ -2453,12 +2447,22 @@ MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
} else if (Ops.size() != 1)
return NULL;
+ // Make sure the subregisters match.
+ // Otherwise we risk changing the size of the load.
+ if (LoadMI->getOperand(0).getSubReg() != MI->getOperand(Ops[0]).getSubReg())
+ return NULL;
+
SmallVector<MachineOperand,X86::AddrNumOperands> MOs;
switch (LoadMI->getOpcode()) {
case X86::V_SET0PS:
case X86::V_SET0PD:
case X86::V_SET0PI:
case X86::V_SETALLONES:
+ case X86::AVX_SET0PS:
+ case X86::AVX_SET0PD:
+ case X86::AVX_SET0PI:
+ case X86::AVX_SET0PSY:
+ case X86::AVX_SET0PDY:
case X86::FsFLD0SD:
case X86::FsFLD0SS: {
// Folding a V_SET0P? or V_SETALLONES as a load, to ease register pressure.
@@ -2485,10 +2489,13 @@ MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
// Create a constant-pool entry.
MachineConstantPool &MCP = *MF.getConstantPool();
const Type *Ty;
- if (LoadMI->getOpcode() == X86::FsFLD0SS)
+ unsigned Opc = LoadMI->getOpcode();
+ if (Opc == X86::FsFLD0SS)
Ty = Type::getFloatTy(MF.getFunction()->getContext());
- else if (LoadMI->getOpcode() == X86::FsFLD0SD)
+ else if (Opc == X86::FsFLD0SD)
Ty = Type::getDoubleTy(MF.getFunction()->getContext());
+ else if (Opc == X86::AVX_SET0PSY || Opc == X86::AVX_SET0PDY)
+ Ty = VectorType::get(Type::getFloatTy(MF.getFunction()->getContext()), 8);
else
Ty = VectorType::get(Type::getInt32Ty(MF.getFunction()->getContext()), 4);
const Constant *C = LoadMI->getOpcode() == X86::V_SETALLONES ?
@@ -2991,561 +2998,6 @@ bool X86InstrInfo::isX86_64ExtendedReg(unsigned RegNo) {
return false;
}
-
-/// determineREX - Determine if the MachineInstr has to be encoded with a X86-64
-/// REX prefix which specifies 1) 64-bit instructions, 2) non-default operand
-/// size, and 3) use of X86-64 extended registers.
-unsigned X86InstrInfo::determineREX(const MachineInstr &MI) {
- unsigned REX = 0;
- const TargetInstrDesc &Desc = MI.getDesc();
-
- // Pseudo instructions do not need REX prefix byte.
- if ((Desc.TSFlags & X86II::FormMask) == X86II::Pseudo)
- return 0;
- if (Desc.TSFlags & X86II::REX_W)
- REX |= 1 << 3;
-
- unsigned NumOps = Desc.getNumOperands();
- if (NumOps) {
- bool isTwoAddr = NumOps > 1 &&
- Desc.getOperandConstraint(1, TOI::TIED_TO) != -1;
-
- // If it accesses SPL, BPL, SIL, or DIL, then it requires a 0x40 REX prefix.
- unsigned i = isTwoAddr ? 1 : 0;
- for (unsigned e = NumOps; i != e; ++i) {
- const MachineOperand& MO = MI.getOperand(i);
- if (MO.isReg()) {
- unsigned Reg = MO.getReg();
- if (isX86_64NonExtLowByteReg(Reg))
- REX |= 0x40;
- }
- }
-
- switch (Desc.TSFlags & X86II::FormMask) {
- case X86II::MRMInitReg:
- if (isX86_64ExtendedReg(MI.getOperand(0)))
- REX |= (1 << 0) | (1 << 2);
- break;
- case X86II::MRMSrcReg: {
- if (isX86_64ExtendedReg(MI.getOperand(0)))
- REX |= 1 << 2;
- i = isTwoAddr ? 2 : 1;
- for (unsigned e = NumOps; i != e; ++i) {
- const MachineOperand& MO = MI.getOperand(i);
- if (isX86_64ExtendedReg(MO))
- REX |= 1 << 0;
- }
- break;
- }
- case X86II::MRMSrcMem: {
- if (isX86_64ExtendedReg(MI.getOperand(0)))
- REX |= 1 << 2;
- unsigned Bit = 0;
- i = isTwoAddr ? 2 : 1;
- for (; i != NumOps; ++i) {
- const MachineOperand& MO = MI.getOperand(i);
- if (MO.isReg()) {
- if (isX86_64ExtendedReg(MO))
- REX |= 1 << Bit;
- Bit++;
- }
- }
- break;
- }
- case X86II::MRM0m: case X86II::MRM1m:
- case X86II::MRM2m: case X86II::MRM3m:
- case X86II::MRM4m: case X86II::MRM5m:
- case X86II::MRM6m: case X86II::MRM7m:
- case X86II::MRMDestMem: {
- unsigned e = (isTwoAddr ? X86::AddrNumOperands+1 : X86::AddrNumOperands);
- i = isTwoAddr ? 1 : 0;
- if (NumOps > e && isX86_64ExtendedReg(MI.getOperand(e)))
- REX |= 1 << 2;
- unsigned Bit = 0;
- for (; i != e; ++i) {
- const MachineOperand& MO = MI.getOperand(i);
- if (MO.isReg()) {
- if (isX86_64ExtendedReg(MO))
- REX |= 1 << Bit;
- Bit++;
- }
- }
- break;
- }
- default: {
- if (isX86_64ExtendedReg(MI.getOperand(0)))
- REX |= 1 << 0;
- i = isTwoAddr ? 2 : 1;
- for (unsigned e = NumOps; i != e; ++i) {
- const MachineOperand& MO = MI.getOperand(i);
- if (isX86_64ExtendedReg(MO))
- REX |= 1 << 2;
- }
- break;
- }
- }
- }
- return REX;
-}
-
-/// sizePCRelativeBlockAddress - This method returns the size of a PC
-/// relative block address instruction
-///
-static unsigned sizePCRelativeBlockAddress() {
- return 4;
-}
-
-/// sizeGlobalAddress - Give the size of the emission of this global address
-///
-static unsigned sizeGlobalAddress(bool dword) {
- return dword ? 8 : 4;
-}
-
-/// sizeConstPoolAddress - Give the size of the emission of this constant
-/// pool address
-///
-static unsigned sizeConstPoolAddress(bool dword) {
- return dword ? 8 : 4;
-}
-
-/// sizeExternalSymbolAddress - Give the size of the emission of this external
-/// symbol
-///
-static unsigned sizeExternalSymbolAddress(bool dword) {
- return dword ? 8 : 4;
-}
-
-/// sizeJumpTableAddress - Give the size of the emission of this jump
-/// table address
-///
-static unsigned sizeJumpTableAddress(bool dword) {
- return dword ? 8 : 4;
-}
-
-static unsigned sizeConstant(unsigned Size) {
- return Size;
-}
-
-static unsigned sizeRegModRMByte(){
- return 1;
-}
-
-static unsigned sizeSIBByte(){
- return 1;
-}
-
-static unsigned getDisplacementFieldSize(const MachineOperand *RelocOp) {
- unsigned FinalSize = 0;
- // If this is a simple integer displacement that doesn't require a relocation.
- if (!RelocOp) {
- FinalSize += sizeConstant(4);
- return FinalSize;
- }
-
- // Otherwise, this is something that requires a relocation.
- if (RelocOp->isGlobal()) {
- FinalSize += sizeGlobalAddress(false);
- } else if (RelocOp->isCPI()) {
- FinalSize += sizeConstPoolAddress(false);
- } else if (RelocOp->isJTI()) {
- FinalSize += sizeJumpTableAddress(false);
- } else {
- llvm_unreachable("Unknown value to relocate!");
- }
- return FinalSize;
-}
-
-static unsigned getMemModRMByteSize(const MachineInstr &MI, unsigned Op,
- bool IsPIC, bool Is64BitMode) {
- const MachineOperand &Op3 = MI.getOperand(Op+3);
- int DispVal = 0;
- const MachineOperand *DispForReloc = 0;
- unsigned FinalSize = 0;
-
- // Figure out what sort of displacement we have to handle here.
- if (Op3.isGlobal()) {
- DispForReloc = &Op3;
- } else if (Op3.isCPI()) {
- if (Is64BitMode || IsPIC) {
- DispForReloc = &Op3;
- } else {
- DispVal = 1;
- }
- } else if (Op3.isJTI()) {
- if (Is64BitMode || IsPIC) {
- DispForReloc = &Op3;
- } else {
- DispVal = 1;
- }
- } else {
- DispVal = 1;
- }
-
- const MachineOperand &Base = MI.getOperand(Op);
- const MachineOperand &IndexReg = MI.getOperand(Op+2);
-
- unsigned BaseReg = Base.getReg();
-
- // Is a SIB byte needed?
- if ((!Is64BitMode || DispForReloc || BaseReg != 0) &&
- IndexReg.getReg() == 0 &&
- (BaseReg == 0 || X86RegisterInfo::getX86RegNum(BaseReg) != N86::ESP)) {
- if (BaseReg == 0) { // Just a displacement?
- // Emit special case [disp32] encoding
- ++FinalSize;
- FinalSize += getDisplacementFieldSize(DispForReloc);
- } else {
- unsigned BaseRegNo = X86RegisterInfo::getX86RegNum(BaseReg);
- if (!DispForReloc && DispVal == 0 && BaseRegNo != N86::EBP) {
- // Emit simple indirect register encoding... [EAX] f.e.
- ++FinalSize;
- // Be pessimistic and assume it's a disp32, not a disp8
- } else {
- // Emit the most general non-SIB encoding: [REG+disp32]
- ++FinalSize;
- FinalSize += getDisplacementFieldSize(DispForReloc);
- }
- }
-
- } else { // We need a SIB byte, so start by outputting the ModR/M byte first
- assert(IndexReg.getReg() != X86::ESP &&
- IndexReg.getReg() != X86::RSP && "Cannot use ESP as index reg!");
-
- bool ForceDisp32 = false;
- if (BaseReg == 0 || DispForReloc) {
- // Emit the normal disp32 encoding.
- ++FinalSize;
- ForceDisp32 = true;
- } else {
- ++FinalSize;
- }
-
- FinalSize += sizeSIBByte();
-
- // Do we need to output a displacement?
- if (DispVal != 0 || ForceDisp32) {
- FinalSize += getDisplacementFieldSize(DispForReloc);
- }
- }
- return FinalSize;
-}
-
-
-static unsigned GetInstSizeWithDesc(const MachineInstr &MI,
- const TargetInstrDesc *Desc,
- bool IsPIC, bool Is64BitMode) {
-
- unsigned Opcode = Desc->Opcode;
- unsigned FinalSize = 0;
-
- // Emit the lock opcode prefix as needed.
- if (Desc->TSFlags & X86II::LOCK) ++FinalSize;
-
- // Emit segment override opcode prefix as needed.
- switch (Desc->TSFlags & X86II::SegOvrMask) {
- case X86II::FS:
- case X86II::GS:
- ++FinalSize;
- break;
- default: llvm_unreachable("Invalid segment!");
- case 0: break; // No segment override!
- }
-
- // Emit the repeat opcode prefix as needed.
- if ((Desc->TSFlags & X86II::Op0Mask) == X86II::REP) ++FinalSize;
-
- // Emit the operand size opcode prefix as needed.
- if (Desc->TSFlags & X86II::OpSize) ++FinalSize;
-
- // Emit the address size opcode prefix as needed.
- if (Desc->TSFlags & X86II::AdSize) ++FinalSize;
-
- bool Need0FPrefix = false;
- switch (Desc->TSFlags & X86II::Op0Mask) {
- case X86II::TB: // Two-byte opcode prefix
- case X86II::T8: // 0F 38
- case X86II::TA: // 0F 3A
- Need0FPrefix = true;
- break;
- case X86II::TF: // F2 0F 38
- ++FinalSize;
- Need0FPrefix = true;
- break;
- case X86II::REP: break; // already handled.
- case X86II::XS: // F3 0F
- ++FinalSize;
- Need0FPrefix = true;
- break;
- case X86II::XD: // F2 0F
- ++FinalSize;
- Need0FPrefix = true;
- break;
- case X86II::D8: case X86II::D9: case X86II::DA: case X86II::DB:
- case X86II::DC: case X86II::DD: case X86II::DE: case X86II::DF:
- ++FinalSize;
- break; // Two-byte opcode prefix
- default: llvm_unreachable("Invalid prefix!");
- case 0: break; // No prefix!
- }
-
- if (Is64BitMode) {
- // REX prefix
- unsigned REX = X86InstrInfo::determineREX(MI);
- if (REX)
- ++FinalSize;
- }
-
- // 0x0F escape code must be emitted just before the opcode.
- if (Need0FPrefix)
- ++FinalSize;
-
- switch (Desc->TSFlags & X86II::Op0Mask) {
- case X86II::T8: // 0F 38
- ++FinalSize;
- break;
- case X86II::TA: // 0F 3A
- ++FinalSize;
- break;
- case X86II::TF: // F2 0F 38
- ++FinalSize;
- break;
- }
-
- // If this is a two-address instruction, skip one of the register operands.
- unsigned NumOps = Desc->getNumOperands();
- unsigned CurOp = 0;
- if (NumOps > 1 && Desc->getOperandConstraint(1, TOI::TIED_TO) != -1)
- CurOp++;
- else if (NumOps > 2 && Desc->getOperandConstraint(NumOps-1, TOI::TIED_TO)== 0)
- // Skip the last source operand that is tied_to the dest reg. e.g. LXADD32
- --NumOps;
-
- switch (Desc->TSFlags & X86II::FormMask) {
- default: llvm_unreachable("Unknown FormMask value in X86 MachineCodeEmitter!");
- case X86II::Pseudo:
- // Remember the current PC offset, this is the PIC relocation
- // base address.
- switch (Opcode) {
- default:
- break;
- case TargetOpcode::INLINEASM: {
- const MachineFunction *MF = MI.getParent()->getParent();
- const TargetInstrInfo &TII = *MF->getTarget().getInstrInfo();
- FinalSize += TII.getInlineAsmLength(MI.getOperand(0).getSymbolName(),
- *MF->getTarget().getMCAsmInfo());
- break;
- }
- case TargetOpcode::DBG_LABEL:
- case TargetOpcode::EH_LABEL:
- case TargetOpcode::DBG_VALUE:
- break;
- case TargetOpcode::IMPLICIT_DEF:
- case TargetOpcode::KILL:
- case X86::FP_REG_KILL:
- break;
- case X86::MOVPC32r: {
- // This emits the "call" portion of this pseudo instruction.
- ++FinalSize;
- FinalSize += sizeConstant(X86II::getSizeOfImm(Desc->TSFlags));
- break;
- }
- }
- CurOp = NumOps;
- break;
- case X86II::RawFrm:
- ++FinalSize;
-
- if (CurOp != NumOps) {
- const MachineOperand &MO = MI.getOperand(CurOp++);
- if (MO.isMBB()) {
- FinalSize += sizePCRelativeBlockAddress();
- } else if (MO.isGlobal()) {
- FinalSize += sizeGlobalAddress(false);
- } else if (MO.isSymbol()) {
- FinalSize += sizeExternalSymbolAddress(false);
- } else if (MO.isImm()) {
- FinalSize += sizeConstant(X86II::getSizeOfImm(Desc->TSFlags));
- } else {
- llvm_unreachable("Unknown RawFrm operand!");
- }
- }
- break;
-
- case X86II::AddRegFrm:
- ++FinalSize;
- ++CurOp;
-
- if (CurOp != NumOps) {
- const MachineOperand &MO1 = MI.getOperand(CurOp++);
- unsigned Size = X86II::getSizeOfImm(Desc->TSFlags);
- if (MO1.isImm())
- FinalSize += sizeConstant(Size);
- else {
- bool dword = false;
- if (Opcode == X86::MOV64ri)
- dword = true;
- if (MO1.isGlobal()) {
- FinalSize += sizeGlobalAddress(dword);
- } else if (MO1.isSymbol())
- FinalSize += sizeExternalSymbolAddress(dword);
- else if (MO1.isCPI())
- FinalSize += sizeConstPoolAddress(dword);
- else if (MO1.isJTI())
- FinalSize += sizeJumpTableAddress(dword);
- }
- }
- break;
-
- case X86II::MRMDestReg: {
- ++FinalSize;
- FinalSize += sizeRegModRMByte();
- CurOp += 2;
- if (CurOp != NumOps) {
- ++CurOp;
- FinalSize += sizeConstant(X86II::getSizeOfImm(Desc->TSFlags));
- }
- break;
- }
- case X86II::MRMDestMem: {
- ++FinalSize;
- FinalSize += getMemModRMByteSize(MI, CurOp, IsPIC, Is64BitMode);
- CurOp += X86::AddrNumOperands + 1;
- if (CurOp != NumOps) {
- ++CurOp;
- FinalSize += sizeConstant(X86II::getSizeOfImm(Desc->TSFlags));
- }
- break;
- }
-
- case X86II::MRMSrcReg:
- ++FinalSize;
- FinalSize += sizeRegModRMByte();
- CurOp += 2;
- if (CurOp != NumOps) {
- ++CurOp;
- FinalSize += sizeConstant(X86II::getSizeOfImm(Desc->TSFlags));
- }
- break;
-
- case X86II::MRMSrcMem: {
- ++FinalSize;
- FinalSize += getMemModRMByteSize(MI, CurOp+1, IsPIC, Is64BitMode);
- CurOp += X86::AddrNumOperands + 1;
- if (CurOp != NumOps) {
- ++CurOp;
- FinalSize += sizeConstant(X86II::getSizeOfImm(Desc->TSFlags));
- }
- break;
- }
-
- case X86II::MRM0r: case X86II::MRM1r:
- case X86II::MRM2r: case X86II::MRM3r:
- case X86II::MRM4r: case X86II::MRM5r:
- case X86II::MRM6r: case X86II::MRM7r:
- ++FinalSize;
- if (Desc->getOpcode() == X86::LFENCE ||
- Desc->getOpcode() == X86::MFENCE) {
- // Special handling of lfence and mfence;
- FinalSize += sizeRegModRMByte();
- } else if (Desc->getOpcode() == X86::MONITOR ||
- Desc->getOpcode() == X86::MWAIT) {
- // Special handling of monitor and mwait.
- FinalSize += sizeRegModRMByte() + 1; // +1 for the opcode.
- } else {
- ++CurOp;
- FinalSize += sizeRegModRMByte();
- }
-
- if (CurOp != NumOps) {
- const MachineOperand &MO1 = MI.getOperand(CurOp++);
- unsigned Size = X86II::getSizeOfImm(Desc->TSFlags);
- if (MO1.isImm())
- FinalSize += sizeConstant(Size);
- else {
- bool dword = false;
- if (Opcode == X86::MOV64ri32)
- dword = true;
- if (MO1.isGlobal()) {
- FinalSize += sizeGlobalAddress(dword);
- } else if (MO1.isSymbol())
- FinalSize += sizeExternalSymbolAddress(dword);
- else if (MO1.isCPI())
- FinalSize += sizeConstPoolAddress(dword);
- else if (MO1.isJTI())
- FinalSize += sizeJumpTableAddress(dword);
- }
- }
- break;
-
- case X86II::MRM0m: case X86II::MRM1m:
- case X86II::MRM2m: case X86II::MRM3m:
- case X86II::MRM4m: case X86II::MRM5m:
- case X86II::MRM6m: case X86II::MRM7m: {
-
- ++FinalSize;
- FinalSize += getMemModRMByteSize(MI, CurOp, IsPIC, Is64BitMode);
- CurOp += X86::AddrNumOperands;
-
- if (CurOp != NumOps) {
- const MachineOperand &MO = MI.getOperand(CurOp++);
- unsigned Size = X86II::getSizeOfImm(Desc->TSFlags);
- if (MO.isImm())
- FinalSize += sizeConstant(Size);
- else {
- bool dword = false;
- if (Opcode == X86::MOV64mi32)
- dword = true;
- if (MO.isGlobal()) {
- FinalSize += sizeGlobalAddress(dword);
- } else if (MO.isSymbol())
- FinalSize += sizeExternalSymbolAddress(dword);
- else if (MO.isCPI())
- FinalSize += sizeConstPoolAddress(dword);
- else if (MO.isJTI())
- FinalSize += sizeJumpTableAddress(dword);
- }
- }
- break;
-
- case X86II::MRM_C1:
- case X86II::MRM_C8:
- case X86II::MRM_C9:
- case X86II::MRM_E8:
- case X86II::MRM_F0:
- FinalSize += 2;
- break;
- }
-
- case X86II::MRMInitReg:
- ++FinalSize;
- // Duplicate register, used by things like MOV8r0 (aka xor reg,reg).
- FinalSize += sizeRegModRMByte();
- ++CurOp;
- break;
- }
-
- if (!Desc->isVariadic() && CurOp != NumOps) {
- std::string msg;
- raw_string_ostream Msg(msg);
- Msg << "Cannot determine size: " << MI;
- report_fatal_error(Msg.str());
- }
-
-
- return FinalSize;
-}
-
-
-unsigned X86InstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const {
- const TargetInstrDesc &Desc = MI->getDesc();
- bool IsPIC = TM.getRelocationModel() == Reloc::PIC_;
- bool Is64BitMode = TM.getSubtargetImpl()->is64Bit();
- unsigned Size = GetInstSizeWithDesc(*MI, &Desc, IsPIC, Is64BitMode);
- if (Desc.getOpcode() == X86::MOVPC32r)
- Size += GetInstSizeWithDesc(*MI, &get(X86::POP32r), IsPIC, Is64BitMode);
- return Size;
-}
-
/// getGlobalBaseReg - Return a virtual register initialized with the
/// the global base register value. Output instructions required to
/// initialize the register in the function entry block, if necessary.
@@ -3573,7 +3025,7 @@ unsigned X86InstrInfo::getGlobalBaseReg(MachineFunction *MF) const {
// that we don't include here. We don't want to replace instructions selected
// by intrinsics.
static const unsigned ReplaceableInstrs[][3] = {
- //PackedInt PackedSingle PackedDouble
+ //PackedSingle PackedDouble PackedInt
{ X86::MOVAPSmr, X86::MOVAPDmr, X86::MOVDQAmr },
{ X86::MOVAPSrm, X86::MOVAPDrm, X86::MOVDQArm },
{ X86::MOVAPSrr, X86::MOVAPDrr, X86::MOVDQArr },
@@ -3589,6 +3041,22 @@ static const unsigned ReplaceableInstrs[][3] = {
{ X86::V_SET0PS, X86::V_SET0PD, X86::V_SET0PI },
{ X86::XORPSrm, X86::XORPDrm, X86::PXORrm },
{ X86::XORPSrr, X86::XORPDrr, X86::PXORrr },
+ // AVX 128-bit support
+ { X86::VMOVAPSmr, X86::VMOVAPDmr, X86::VMOVDQAmr },
+ { X86::VMOVAPSrm, X86::VMOVAPDrm, X86::VMOVDQArm },
+ { X86::VMOVAPSrr, X86::VMOVAPDrr, X86::VMOVDQArr },
+ { X86::VMOVUPSmr, X86::VMOVUPDmr, X86::VMOVDQUmr },
+ { X86::VMOVUPSrm, X86::VMOVUPDrm, X86::VMOVDQUrm },
+ { X86::VMOVNTPSmr, X86::VMOVNTPDmr, X86::VMOVNTDQmr },
+ { X86::VANDNPSrm, X86::VANDNPDrm, X86::VPANDNrm },
+ { X86::VANDNPSrr, X86::VANDNPDrr, X86::VPANDNrr },
+ { X86::VANDPSrm, X86::VANDPDrm, X86::VPANDrm },
+ { X86::VANDPSrr, X86::VANDPDrr, X86::VPANDrr },
+ { X86::VORPSrm, X86::VORPDrm, X86::VPORrm },
+ { X86::VORPSrr, X86::VORPDrr, X86::VPORrr },
+ { X86::AVX_SET0PS, X86::AVX_SET0PD, X86::AVX_SET0PI },
+ { X86::VXORPSrm, X86::VXORPDrm, X86::VPXORrm },
+ { X86::VXORPSrr, X86::VXORPDrr, X86::VPXORrr },
};
// FIXME: Some shuffle and unpack instructions have equivalents in different
@@ -3627,7 +3095,7 @@ namespace {
/// global base register for x86-32.
struct CGBR : public MachineFunctionPass {
static char ID;
- CGBR() : MachineFunctionPass(&ID) {}
+ CGBR() : MachineFunctionPass(ID) {}
virtual bool runOnMachineFunction(MachineFunction &MF) {
const X86TargetMachine *TM =
diff --git a/lib/Target/X86/X86InstrInfo.h b/lib/Target/X86/X86InstrInfo.h
index ad0217adb475..f33620641e88 100644
--- a/lib/Target/X86/X86InstrInfo.h
+++ b/lib/Target/X86/X86InstrInfo.h
@@ -311,6 +311,12 @@ namespace X86II {
MRM_F0 = 40,
MRM_F8 = 41,
MRM_F9 = 42,
+
+ /// RawFrmImm16 - This is used for CALL FAR instructions, which have two
+ /// immediates, the first of which is a 16 or 32-bit immediate (specified by
+ /// the imm encoding) and the second is a 16-bit fixed value. In the AMD
+ /// manual, this operand is described as pntr16:32 and pntr16:16
+ RawFrmImm16 = 43,
FormMask = 63,
@@ -439,27 +445,27 @@ namespace X86II {
//===------------------------------------------------------------------===//
// VEX - The opcode prefix used by AVX instructions
- VEX = 1ULL << 32,
+ VEX = 1U << 0,
// VEX_W - Has a opcode specific functionality, but is used in the same
// way as REX_W is for regular SSE instructions.
- VEX_W = 1ULL << 33,
+ VEX_W = 1U << 1,
// VEX_4V - Used to specify an additional AVX/SSE register. Several 2
// address instructions in SSE are represented as 3 address ones in AVX
// and the additional register is encoded in VEX_VVVV prefix.
- VEX_4V = 1ULL << 34,
+ VEX_4V = 1U << 2,
// VEX_I8IMM - Specifies that the last register used in a AVX instruction,
// must be encoded in the i8 immediate field. This usually happens in
// instructions with 4 operands.
- VEX_I8IMM = 1ULL << 35,
+ VEX_I8IMM = 1U << 3,
// VEX_L - Stands for a bit in the VEX opcode prefix meaning the current
// instruction uses 256-bit wide registers. This is usually auto detected if
// a VR256 register is used, but some AVX instructions also have this field
// marked when using a f256 memory references.
- VEX_L = 1ULL << 36
+ VEX_L = 1U << 4
};
// getBaseOpcodeFor - This function returns the "base" X86 opcode for the
@@ -522,11 +528,12 @@ namespace X86II {
case X86II::AddRegFrm:
case X86II::MRMDestReg:
case X86II::MRMSrcReg:
+ case X86II::RawFrmImm16:
return -1;
case X86II::MRMDestMem:
return 0;
case X86II::MRMSrcMem: {
- bool HasVEX_4V = TSFlags & X86II::VEX_4V;
+ bool HasVEX_4V = (TSFlags >> 32) & X86II::VEX_4V;
unsigned FirstMemOp = 1;
if (HasVEX_4V)
++FirstMemOp;// Skip the register source (which is encoded in VEX_VVVV).
@@ -610,12 +617,6 @@ public:
///
virtual const X86RegisterInfo &getRegisterInfo() const { return RI; }
- /// Return true if the instruction is a register to register move and return
- /// the source and dest operands and their sub-register indices by reference.
- virtual bool isMoveInstr(const MachineInstr &MI,
- unsigned &SrcReg, unsigned &DstReg,
- unsigned &SrcSubIdx, unsigned &DstSubIdx) const;
-
/// isCoalescableExtInstr - Return true if the instruction is a "coalescable"
/// extension instruction. That is, it's like a copy where it's legal for the
/// source to overlap the destination. e.g. X86::MOVSX64rr32. If this returns
@@ -826,16 +827,11 @@ public:
if (!MO.isReg()) return false;
return isX86_64ExtendedReg(MO.getReg());
}
- static unsigned determineREX(const MachineInstr &MI);
/// isX86_64ExtendedReg - Is the MachineOperand a x86-64 extended (r8 or
/// higher) register? e.g. r8, xmm8, xmm13, etc.
static bool isX86_64ExtendedReg(unsigned RegNo);
- /// GetInstSize - Returns the size of the specified MachineInstr.
- ///
- virtual unsigned GetInstSizeInBytes(const MachineInstr *MI) const;
-
/// getGlobalBaseReg - Return a virtual register initialized with the
/// the global base register value. Output instructions required to
/// initialize the register in the function entry block, if necessary.
diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td
index 1efef5a80b1b..09b7721a621d 100644
--- a/lib/Target/X86/X86InstrInfo.td
+++ b/lib/Target/X86/X86InstrInfo.td
@@ -80,6 +80,21 @@ def SDT_X86EHRET : SDTypeProfile<0, 1, [SDTCisInt<0>]>;
def SDT_X86TCRET : SDTypeProfile<0, 2, [SDTCisPtrTy<0>, SDTCisVT<1, i32>]>;
+def SDT_X86MEMBARRIER : SDTypeProfile<0, 0, []>;
+def SDT_X86MEMBARRIERNoSSE : SDTypeProfile<0, 1, [SDTCisInt<0>]>;
+
+def X86MemBarrier : SDNode<"X86ISD::MEMBARRIER", SDT_X86MEMBARRIER,
+ [SDNPHasChain]>;
+def X86MemBarrierNoSSE : SDNode<"X86ISD::MEMBARRIER", SDT_X86MEMBARRIERNoSSE,
+ [SDNPHasChain]>;
+def X86MFence : SDNode<"X86ISD::MFENCE", SDT_X86MEMBARRIER,
+ [SDNPHasChain]>;
+def X86SFence : SDNode<"X86ISD::SFENCE", SDT_X86MEMBARRIER,
+ [SDNPHasChain]>;
+def X86LFence : SDNode<"X86ISD::LFENCE", SDT_X86MEMBARRIER,
+ [SDNPHasChain]>;
+
+
def X86bsf : SDNode<"X86ISD::BSF", SDTUnaryArithWithFlags>;
def X86bsr : SDNode<"X86ISD::BSR", SDTUnaryArithWithFlags>;
def X86shld : SDNode<"X86ISD::SHLD", SDTIntShiftDOp>;
@@ -222,7 +237,7 @@ def i16mem : X86MemOperand<"printi16mem">;
def i32mem : X86MemOperand<"printi32mem">;
def i64mem : X86MemOperand<"printi64mem">;
def i128mem : X86MemOperand<"printi128mem">;
-//def i256mem : X86MemOperand<"printi256mem">;
+def i256mem : X86MemOperand<"printi256mem">;
def f32mem : X86MemOperand<"printf32mem">;
def f64mem : X86MemOperand<"printf64mem">;
def f80mem : X86MemOperand<"printf80mem">;
@@ -333,15 +348,21 @@ def tls32addr : ComplexPattern<i32, 5, "SelectTLSADDRAddr",
// X86 Instruction Predicate Definitions.
def HasCMov : Predicate<"Subtarget->hasCMov()">;
def NoCMov : Predicate<"!Subtarget->hasCMov()">;
-def HasMMX : Predicate<"Subtarget->hasMMX()">;
-def HasSSE1 : Predicate<"Subtarget->hasSSE1()">;
-def HasSSE2 : Predicate<"Subtarget->hasSSE2()">;
-def HasSSE3 : Predicate<"Subtarget->hasSSE3()">;
-def HasSSSE3 : Predicate<"Subtarget->hasSSSE3()">;
-def HasSSE41 : Predicate<"Subtarget->hasSSE41()">;
-def HasSSE42 : Predicate<"Subtarget->hasSSE42()">;
-def HasSSE4A : Predicate<"Subtarget->hasSSE4A()">;
+
+// FIXME: temporary hack to let codegen assert or generate poor code in case
+// no AVX version of the desired intructions is present, this is better for
+// incremental dev (without fallbacks it's easier to spot what's missing)
+def HasMMX : Predicate<"Subtarget->hasMMX() && !Subtarget->hasAVX()">;
+def HasSSE1 : Predicate<"Subtarget->hasSSE1() && !Subtarget->hasAVX()">;
+def HasSSE2 : Predicate<"Subtarget->hasSSE2() && !Subtarget->hasAVX()">;
+def HasSSE3 : Predicate<"Subtarget->hasSSE3() && !Subtarget->hasAVX()">;
+def HasSSSE3 : Predicate<"Subtarget->hasSSSE3() && !Subtarget->hasAVX()">;
+def HasSSE41 : Predicate<"Subtarget->hasSSE41() && !Subtarget->hasAVX()">;
+def HasSSE42 : Predicate<"Subtarget->hasSSE42() && !Subtarget->hasAVX()">;
+def HasSSE4A : Predicate<"Subtarget->hasSSE4A() && !Subtarget->hasAVX()">;
+
def HasAVX : Predicate<"Subtarget->hasAVX()">;
+def HasCLMUL : Predicate<"Subtarget->hasCLMUL()">;
def HasFMA3 : Predicate<"Subtarget->hasFMA3()">;
def HasFMA4 : Predicate<"Subtarget->hasFMA4()">;
def FPStackf32 : Predicate<"!Subtarget->hasSSE1()">;
@@ -393,9 +414,7 @@ def X86_COND_O : PatLeaf<(i8 13)>;
def X86_COND_P : PatLeaf<(i8 14)>; // alt. COND_PE
def X86_COND_S : PatLeaf<(i8 15)>;
-def immSext8 : PatLeaf<(imm), [{
- return N->getSExtValue() == (int8_t)N->getSExtValue();
-}]>;
+def immSext8 : PatLeaf<(imm), [{ return immSext8(N); }]>;
def i16immSExt8 : PatLeaf<(i16 immSext8)>;
def i32immSExt8 : PatLeaf<(i32 immSext8)>;
@@ -559,9 +578,10 @@ def VASTART_SAVE_XMM_REGS : I<0, Pseudo,
// The main point of having separate instruction are extra unmodelled effects
// (compared to ordinary calls) like stack pointer change.
-def MINGW_ALLOCA : I<0, Pseudo, (outs), (ins),
- "# dynamic stack allocation",
- [(X86MingwAlloca)]>;
+let Defs = [EAX, ESP, EFLAGS], Uses = [ESP] in
+ def MINGW_ALLOCA : I<0, Pseudo, (outs), (ins),
+ "# dynamic stack allocation",
+ [(X86MingwAlloca)]>;
}
// Nop
@@ -574,10 +594,14 @@ let neverHasSideEffects = 1 in {
}
// Trap
-def INTO : I<0xce, RawFrm, (outs), (ins), "into", []>;
-def INT3 : I<0xcc, RawFrm, (outs), (ins), "int3", []>;
+let Uses = [EFLAGS] in {
+ def INTO : I<0xce, RawFrm, (outs), (ins), "into", []>;
+}
+def INT3 : I<0xcc, RawFrm, (outs), (ins), "int3",
+ [(int_x86_int (i8 3))]>;
// FIXME: need to make sure that "int $3" matches int3
-def INT : Ii8<0xcd, RawFrm, (outs), (ins i8imm:$trap), "int\t$trap", []>;
+def INT : Ii8<0xcd, RawFrm, (outs), (ins i8imm:$trap), "int\t$trap",
+ [(int_x86_int imm:$trap)]>;
def IRET16 : I<0xcf, RawFrm, (outs), (ins), "iret{w}", []>, OpSize;
def IRET32 : I<0xcf, RawFrm, (outs), (ins), "iret{l}", []>;
@@ -650,16 +674,16 @@ let Uses = [ECX], isBranch = 1, isTerminator = 1 in
// Indirect branches
let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1 in {
def JMP32r : I<0xFF, MRM4r, (outs), (ins GR32:$dst), "jmp{l}\t{*}$dst",
- [(brind GR32:$dst)]>;
+ [(brind GR32:$dst)]>, Requires<[In32BitMode]>;
def JMP32m : I<0xFF, MRM4m, (outs), (ins i32mem:$dst), "jmp{l}\t{*}$dst",
- [(brind (loadi32 addr:$dst))]>;
+ [(brind (loadi32 addr:$dst))]>, Requires<[In32BitMode]>;
- def FARJMP16i : Iseg16<0xEA, RawFrm, (outs),
- (ins i16imm:$seg, i16imm:$off),
- "ljmp{w}\t$seg, $off", []>, OpSize;
- def FARJMP32i : Iseg32<0xEA, RawFrm, (outs),
- (ins i16imm:$seg, i32imm:$off),
- "ljmp{l}\t$seg, $off", []>;
+ def FARJMP16i : Iseg16<0xEA, RawFrmImm16, (outs),
+ (ins i16imm:$off, i16imm:$seg),
+ "ljmp{w}\t{$seg, $off|$off, $seg}", []>, OpSize;
+ def FARJMP32i : Iseg32<0xEA, RawFrmImm16, (outs),
+ (ins i32imm:$off, i16imm:$seg),
+ "ljmp{l}\t{$seg, $off|$off, $seg}", []>;
def FARJMP16m : I<0xFF, MRM5m, (outs), (ins opaque32mem:$dst),
"ljmp{w}\t{*}$dst", []>, OpSize;
@@ -670,9 +694,9 @@ let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1 in {
// Loop instructions
-def LOOP : I<0xE2, RawFrm, (outs), (ins brtarget8:$dst), "loop\t$dst", []>;
-def LOOPE : I<0xE1, RawFrm, (outs), (ins brtarget8:$dst), "loope\t$dst", []>;
-def LOOPNE : I<0xE0, RawFrm, (outs), (ins brtarget8:$dst), "loopne\t$dst", []>;
+def LOOP : Ii8PCRel<0xE2, RawFrm, (outs), (ins brtarget8:$dst), "loop\t$dst", []>;
+def LOOPE : Ii8PCRel<0xE1, RawFrm, (outs), (ins brtarget8:$dst), "loope\t$dst", []>;
+def LOOPNE : Ii8PCRel<0xE0, RawFrm, (outs), (ins brtarget8:$dst), "loopne\t$dst", []>;
//===----------------------------------------------------------------------===//
// Call Instructions...
@@ -695,12 +719,12 @@ let isCall = 1 in
def CALL32m : I<0xFF, MRM2m, (outs), (ins i32mem:$dst, variable_ops),
"call\t{*}$dst", [(X86call (loadi32 addr:$dst))]>;
- def FARCALL16i : Iseg16<0x9A, RawFrm, (outs),
- (ins i16imm:$seg, i16imm:$off),
- "lcall{w}\t$seg, $off", []>, OpSize;
- def FARCALL32i : Iseg32<0x9A, RawFrm, (outs),
- (ins i16imm:$seg, i32imm:$off),
- "lcall{l}\t$seg, $off", []>;
+ def FARCALL16i : Iseg16<0x9A, RawFrmImm16, (outs),
+ (ins i16imm:$off, i16imm:$seg),
+ "lcall{w}\t{$seg, $off|$off, $seg}", []>, OpSize;
+ def FARCALL32i : Iseg32<0x9A, RawFrmImm16, (outs),
+ (ins i32imm:$off, i16imm:$seg),
+ "lcall{l}\t{$seg, $off|$off, $seg}", []>;
def FARCALL16m : I<0xFF, MRM3m, (outs), (ins opaque32mem:$dst),
"lcall{w}\t{*}$dst", []>, OpSize;
@@ -721,7 +745,8 @@ def ENTER : I<0xC8, RawFrm, (outs), (ins i16imm:$len, i8imm:$lvl),
// Tail call stuff.
-let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in
+let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1,
+ isCodeGenOnly = 1 in
let Defs = [EAX, ECX, EDX, FP0, FP1, FP2, FP3, FP4, FP5, FP6, ST0,
MM0, MM1, MM2, MM3, MM4, MM5, MM6, MM7,
XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7,
@@ -756,7 +781,7 @@ let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in
//
let Defs = [EBP, ESP], Uses = [EBP, ESP], mayLoad = 1, neverHasSideEffects=1 in
def LEAVE : I<0xC9, RawFrm,
- (outs), (ins), "leave", []>;
+ (outs), (ins), "leave", []>, Requires<[In32BitMode]>;
def POPCNT16rr : I<0xB8, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src),
"popcnt{w}\t{$src, $dst|$dst, $src}", []>, OpSize, XS;
@@ -934,7 +959,7 @@ def SYSRET : I<0x07, RawFrm,
def SYSENTER : I<0x34, RawFrm,
(outs), (ins), "sysenter", []>, TB;
def SYSEXIT : I<0x35, RawFrm,
- (outs), (ins), "sysexit", []>, TB;
+ (outs), (ins), "sysexit", []>, TB, Requires<[In32BitMode]>;
def WAIT : I<0x9B, RawFrm, (outs), (ins), "wait", []>;
@@ -1025,17 +1050,23 @@ def MOV32mi : Ii32<0xC7, MRM0m, (outs), (ins i32mem:$dst, i32imm:$src),
/// moffs8, moffs16 and moffs32 versions of moves. The immediate is a
/// 32-bit offset from the PC. These are only valid in x86-32 mode.
def MOV8o8a : Ii32 <0xA0, RawFrm, (outs), (ins offset8:$src),
- "mov{b}\t{$src, %al|%al, $src}", []>;
+ "mov{b}\t{$src, %al|%al, $src}", []>,
+ Requires<[In32BitMode]>;
def MOV16o16a : Ii32 <0xA1, RawFrm, (outs), (ins offset16:$src),
- "mov{w}\t{$src, %ax|%ax, $src}", []>, OpSize;
+ "mov{w}\t{$src, %ax|%ax, $src}", []>, OpSize,
+ Requires<[In32BitMode]>;
def MOV32o32a : Ii32 <0xA1, RawFrm, (outs), (ins offset32:$src),
- "mov{l}\t{$src, %eax|%eax, $src}", []>;
+ "mov{l}\t{$src, %eax|%eax, $src}", []>,
+ Requires<[In32BitMode]>;
def MOV8ao8 : Ii32 <0xA2, RawFrm, (outs offset8:$dst), (ins),
- "mov{b}\t{%al, $dst|$dst, %al}", []>;
+ "mov{b}\t{%al, $dst|$dst, %al}", []>,
+ Requires<[In32BitMode]>;
def MOV16ao16 : Ii32 <0xA3, RawFrm, (outs offset16:$dst), (ins),
- "mov{w}\t{%ax, $dst|$dst, %ax}", []>, OpSize;
+ "mov{w}\t{%ax, $dst|$dst, %ax}", []>, OpSize,
+ Requires<[In32BitMode]>;
def MOV32ao32 : Ii32 <0xA3, RawFrm, (outs offset32:$dst), (ins),
- "mov{l}\t{%eax, $dst|$dst, %eax}", []>;
+ "mov{l}\t{%eax, $dst|$dst, %eax}", []>,
+ Requires<[In32BitMode]>;
// Moves to and from segment registers
def MOV16rs : I<0x8C, MRMDestReg, (outs GR16:$dst), (ins SEGMENT_REG:$src),
@@ -1087,6 +1118,7 @@ def MOV32mr : I<0x89, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src),
[(store GR32:$src, addr:$dst)]>;
/// Versions of MOV32rr, MOV32rm, and MOV32mr for i32mem_TC and GR32_TC.
+let isCodeGenOnly = 1 in {
let neverHasSideEffects = 1 in
def MOV32rr_TC : I<0x89, MRMDestReg, (outs GR32_TC:$dst), (ins GR32_TC:$src),
"mov{l}\t{$src, $dst|$dst, $src}", []>;
@@ -1101,10 +1133,12 @@ let mayStore = 1 in
def MOV32mr_TC : I<0x89, MRMDestMem, (outs), (ins i32mem_TC:$dst, GR32_TC:$src),
"mov{l}\t{$src, $dst|$dst, $src}",
[]>;
+}
// Versions of MOV8rr, MOV8mr, and MOV8rm that use i8mem_NOREX and GR8_NOREX so
// that they can be used for copying and storing h registers, which can't be
// encoded when a REX prefix is present.
+let isCodeGenOnly = 1 in {
let neverHasSideEffects = 1 in
def MOV8rr_NOREX : I<0x88, MRMDestReg,
(outs GR8_NOREX:$dst), (ins GR8_NOREX:$src),
@@ -1118,6 +1152,7 @@ let mayLoad = 1,
def MOV8rm_NOREX : I<0x8A, MRMSrcMem,
(outs GR8_NOREX:$dst), (ins i8mem_NOREX:$src),
"mov{b}\t{$src, $dst|$dst, $src} # NOREX", []>;
+}
// Moves to and from debug registers
def MOV32rd : I<0x21, MRMDestReg, (outs GR32:$dst), (ins DEBUG_REG:$src),
@@ -1137,7 +1172,7 @@ def MOV32cr : I<0x22, MRMSrcReg, (outs CONTROL_REG:$dst), (ins GR32:$src),
// Extra precision multiplication
-// AL is really implied by AX, by the registers in Defs must match the
+// AL is really implied by AX, but the registers in Defs must match the
// SDNode results (i8, i32).
let Defs = [AL,EFLAGS,AX], Uses = [AL] in
def MUL8r : I<0xF6, MRM4r, (outs), (ins GR8:$src), "mul{b}\t$src",
@@ -3895,6 +3930,20 @@ def EH_RETURN : I<0xC3, RawFrm, (outs), (ins GR32:$addr),
// Atomic support
//
+// Memory barriers
+
+// TODO: Get this to fold the constant into the instruction.
+def OR32mrLocked : I<0x09, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$zero),
+ "lock\n\t"
+ "or{l}\t{$zero, $dst|$dst, $zero}",
+ []>, Requires<[In32BitMode]>, LOCK;
+
+let hasSideEffects = 1 in {
+def Int_MemBarrier : I<0, Pseudo, (outs), (ins),
+ "#MEMBARRIER",
+ [(X86MemBarrier)]>, Requires<[HasSSE2]>;
+}
+
// Atomic swap. These are just normal xchg instructions. But since a memory
// operand is referenced, the atomicity is ensured.
let Constraints = "$val = $dst" in {
@@ -4928,6 +4977,12 @@ include "X86Instr64bit.td"
include "X86InstrFragmentsSIMD.td"
//===----------------------------------------------------------------------===//
+// FMA - Fused Multiply-Add support (requires FMA)
+//===----------------------------------------------------------------------===//
+
+include "X86InstrFMA.td"
+
+//===----------------------------------------------------------------------===//
// XMM Floating point support (requires SSE / SSE2)
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/X86/X86InstrMMX.td b/lib/Target/X86/X86InstrMMX.td
index 6cf7ac83620e..11d4179534dc 100644
--- a/lib/Target/X86/X86InstrMMX.td
+++ b/lib/Target/X86/X86InstrMMX.td
@@ -164,7 +164,7 @@ let neverHasSideEffects = 1 in
def MMX_MOVQ2FR64rr: SSDIi8<0xD6, MRMSrcReg, (outs FR64:$dst), (ins VR64:$src),
"movq2dq\t{$src, $dst|$dst, $src}", []>;
-def MMX_MOVFR642Qrr: SSDIi8<0xD6, MRMSrcReg, (outs VR64:$dst), (ins FR64:$src),
+def MMX_MOVFR642Qrr: SDIi8<0xD6, MRMSrcReg, (outs VR64:$dst), (ins FR64:$src),
"movdq2q\t{$src, $dst|$dst, $src}", []>;
def MMX_MOVNTQmr : MMXI<0xE7, MRMDestMem, (outs), (ins i64mem:$dst, VR64:$src),
diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td
index ebe161b46bdc..f5466f83f519 100644
--- a/lib/Target/X86/X86InstrSSE.td
+++ b/lib/Target/X86/X86InstrSSE.td
@@ -142,7 +142,7 @@ multiclass sse12_fp_packed_int<bits<8> opc, string OpcodeStr, RegisterClass RC,
!if(Is2Addr,
!strconcat(asm, "\t{$src2, $dst|$dst, $src2}"),
!strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
- [(set RC:$dst, (!nameconcat<Intrinsic>("int_x86_sse",
+ [(set RC:$dst, (!nameconcat<Intrinsic>("int_x86_",
!strconcat(SSEVer, !strconcat("_",
!strconcat(OpcodeStr, FPSizeStr))))
RC:$src1, RC:$src2))], d>;
@@ -150,7 +150,7 @@ multiclass sse12_fp_packed_int<bits<8> opc, string OpcodeStr, RegisterClass RC,
!if(Is2Addr,
!strconcat(asm, "\t{$src2, $dst|$dst, $src2}"),
!strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
- [(set RC:$dst, (!nameconcat<Intrinsic>("int_x86_sse",
+ [(set RC:$dst, (!nameconcat<Intrinsic>("int_x86_",
!strconcat(SSEVer, !strconcat("_",
!strconcat(OpcodeStr, FPSizeStr))))
RC:$src1, (mem_frag addr:$src2)))], d>;
@@ -256,10 +256,10 @@ def MOVSDmr : SDI<0x11, MRMDestMem, (outs), (ins f64mem:$dst, FR64:$src),
let isAsmParserOnly = 1 in {
def VMOVSSmr : SI<0x11, MRMDestMem, (outs), (ins f32mem:$dst, FR32:$src),
"movss\t{$src, $dst|$dst, $src}",
- [(store FR32:$src, addr:$dst)]>, XS, VEX_4V;
+ [(store FR32:$src, addr:$dst)]>, XS, VEX;
def VMOVSDmr : SI<0x11, MRMDestMem, (outs), (ins f64mem:$dst, FR64:$src),
"movsd\t{$src, $dst|$dst, $src}",
- [(store FR64:$src, addr:$dst)]>, XD, VEX_4V;
+ [(store FR64:$src, addr:$dst)]>, XD, VEX;
}
// Extract and store.
@@ -340,6 +340,15 @@ def VMOVUPDYmr : VPDI<0x11, MRMDestMem, (outs), (ins f256mem:$dst, VR256:$src),
"movupd\t{$src, $dst|$dst, $src}",
[(store (v4f64 VR256:$src), addr:$dst)]>, VEX;
}
+
+def : Pat<(int_x86_avx_loadu_ps_256 addr:$src), (VMOVUPSYrm addr:$src)>;
+def : Pat<(int_x86_avx_storeu_ps_256 addr:$dst, VR256:$src),
+ (VMOVUPSYmr addr:$dst, VR256:$src)>;
+
+def : Pat<(int_x86_avx_loadu_pd_256 addr:$src), (VMOVUPDYrm addr:$src)>;
+def : Pat<(int_x86_avx_storeu_pd_256 addr:$dst, VR256:$src),
+ (VMOVUPDYmr addr:$dst, VR256:$src)>;
+
def MOVAPSmr : PSI<0x29, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
"movaps\t{$src, $dst|$dst, $src}",
[(alignedstore (v4f32 VR128:$src), addr:$dst)]>;
@@ -516,6 +525,14 @@ multiclass sse12_cvt_s<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC,
[(set DstRC:$dst, (OpNode (ld_frag addr:$src)))]>;
}
+multiclass sse12_cvt_s_np<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC,
+ X86MemOperand x86memop, string asm> {
+ def rr : SI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src), asm,
+ []>;
+ def rm : SI<opc, MRMSrcMem, (outs DstRC:$dst), (ins x86memop:$src), asm,
+ []>;
+}
+
multiclass sse12_cvt_p<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC,
SDNode OpNode, X86MemOperand x86memop, PatFrag ld_frag,
string asm, Domain d> {
@@ -526,35 +543,58 @@ multiclass sse12_cvt_p<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC,
}
multiclass sse12_vcvt_avx<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC,
- SDNode OpNode, X86MemOperand x86memop, PatFrag ld_frag,
- string asm> {
+ X86MemOperand x86memop, string asm> {
def rr : SI<opc, MRMSrcReg, (outs DstRC:$dst), (ins DstRC:$src1, SrcRC:$src),
- asm, []>;
+ !strconcat(asm,"\t{$src, $src1, $dst|$dst, $src1, $src}"), []>;
def rm : SI<opc, MRMSrcMem, (outs DstRC:$dst),
- (ins DstRC:$src1, x86memop:$src), asm, []>;
+ (ins DstRC:$src1, x86memop:$src),
+ !strconcat(asm,"\t{$src, $src1, $dst|$dst, $src1, $src}"), []>;
}
let isAsmParserOnly = 1 in {
-defm VCVTTSS2SI : sse12_cvt_s<0x2C, FR32, GR32, fp_to_sint, f32mem, loadf32,
- "cvttss2si\t{$src, $dst|$dst, $src}">, XS, VEX;
-defm VCVTTSD2SI : sse12_cvt_s<0x2C, FR64, GR32, fp_to_sint, f64mem, loadf64,
- "cvttsd2si\t{$src, $dst|$dst, $src}">, XD, VEX;
-defm VCVTSI2SS : sse12_vcvt_avx<0x2A, GR32, FR32, sint_to_fp, i32mem, loadi32,
- "cvtsi2ss\t{$src, $src1, $dst|$dst, $src1, $src}">, XS,
- VEX_4V;
-defm VCVTSI2SD : sse12_vcvt_avx<0x2A, GR32, FR64, sint_to_fp, i32mem, loadi32,
- "cvtsi2sd\t{$src, $src1, $dst|$dst, $src1, $src}">, XD,
- VEX_4V;
+defm VCVTTSS2SI : sse12_cvt_s<0x2C, FR32, GR32, fp_to_sint, f32mem, loadf32,
+ "cvttss2si\t{$src, $dst|$dst, $src}">, XS, VEX;
+defm VCVTTSS2SI64 : sse12_cvt_s<0x2C, FR32, GR64, fp_to_sint, f32mem, loadf32,
+ "cvttss2si\t{$src, $dst|$dst, $src}">, XS, VEX,
+ VEX_W;
+defm VCVTTSD2SI : sse12_cvt_s<0x2C, FR64, GR32, fp_to_sint, f64mem, loadf64,
+ "cvttsd2si\t{$src, $dst|$dst, $src}">, XD, VEX;
+defm VCVTTSD2SI64 : sse12_cvt_s<0x2C, FR64, GR64, fp_to_sint, f64mem, loadf64,
+ "cvttsd2si\t{$src, $dst|$dst, $src}">, XD,
+ VEX, VEX_W;
+
+// The assembler can recognize rr 64-bit instructions by seeing a rxx
+// register, but the same isn't true when only using memory operands,
+// provide other assembly "l" and "q" forms to address this explicitly
+// where appropriate to do so.
+defm VCVTSI2SS : sse12_vcvt_avx<0x2A, GR32, FR32, i32mem, "cvtsi2ss">, XS,
+ VEX_4V;
+defm VCVTSI2SS64 : sse12_vcvt_avx<0x2A, GR64, FR32, i64mem, "cvtsi2ss{q}">, XS,
+ VEX_4V, VEX_W;
+defm VCVTSI2SD : sse12_vcvt_avx<0x2A, GR32, FR64, i32mem, "cvtsi2sd">, XD,
+ VEX_4V;
+defm VCVTSI2SDL : sse12_vcvt_avx<0x2A, GR32, FR64, i32mem, "cvtsi2sd{l}">, XD,
+ VEX_4V;
+defm VCVTSI2SD64 : sse12_vcvt_avx<0x2A, GR64, FR64, i64mem, "cvtsi2sd{q}">, XD,
+ VEX_4V, VEX_W;
}
defm CVTTSS2SI : sse12_cvt_s<0x2C, FR32, GR32, fp_to_sint, f32mem, loadf32,
"cvttss2si\t{$src, $dst|$dst, $src}">, XS;
+defm CVTTSS2SI64 : sse12_cvt_s<0x2C, FR32, GR64, fp_to_sint, f32mem, loadf32,
+ "cvttss2si{q}\t{$src, $dst|$dst, $src}">, XS, REX_W;
defm CVTTSD2SI : sse12_cvt_s<0x2C, FR64, GR32, fp_to_sint, f64mem, loadf64,
"cvttsd2si\t{$src, $dst|$dst, $src}">, XD;
+defm CVTTSD2SI64 : sse12_cvt_s<0x2C, FR64, GR64, fp_to_sint, f64mem, loadf64,
+ "cvttsd2si{q}\t{$src, $dst|$dst, $src}">, XD, REX_W;
defm CVTSI2SS : sse12_cvt_s<0x2A, GR32, FR32, sint_to_fp, i32mem, loadi32,
"cvtsi2ss\t{$src, $dst|$dst, $src}">, XS;
+defm CVTSI2SS64 : sse12_cvt_s<0x2A, GR64, FR32, sint_to_fp, i64mem, loadi64,
+ "cvtsi2ss{q}\t{$src, $dst|$dst, $src}">, XS, REX_W;
defm CVTSI2SD : sse12_cvt_s<0x2A, GR32, FR64, sint_to_fp, i32mem, loadi32,
"cvtsi2sd\t{$src, $dst|$dst, $src}">, XD;
+defm CVTSI2SD64 : sse12_cvt_s<0x2A, GR64, FR64, sint_to_fp, i64mem, loadi64,
+ "cvtsi2sd{q}\t{$src, $dst|$dst, $src}">, XD, REX_W;
// Conversion Instructions Intrinsics - Match intrinsics which expect MM
// and/or XMM operand(s).
@@ -570,10 +610,12 @@ multiclass sse12_cvt_pint<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC,
multiclass sse12_cvt_sint<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC,
Intrinsic Int, X86MemOperand x86memop, PatFrag ld_frag,
string asm> {
- def rr : SI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src), asm,
- [(set DstRC:$dst, (Int SrcRC:$src))]>;
- def rm : SI<opc, MRMSrcMem, (outs DstRC:$dst), (ins x86memop:$src), asm,
- [(set DstRC:$dst, (Int (ld_frag addr:$src)))]>;
+ def rr : SI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src),
+ !strconcat(asm, "\t{$src, $dst|$dst, $src}"),
+ [(set DstRC:$dst, (Int SrcRC:$src))]>;
+ def rm : SI<opc, MRMSrcMem, (outs DstRC:$dst), (ins x86memop:$src),
+ !strconcat(asm, "\t{$src, $dst|$dst, $src}"),
+ [(set DstRC:$dst, (Int (ld_frag addr:$src)))]>;
}
multiclass sse12_cvt_pint_3addr<bits<8> opc, RegisterClass SrcRC,
@@ -588,35 +630,79 @@ multiclass sse12_cvt_pint_3addr<bits<8> opc, RegisterClass SrcRC,
multiclass sse12_cvt_sint_3addr<bits<8> opc, RegisterClass SrcRC,
RegisterClass DstRC, Intrinsic Int, X86MemOperand x86memop,
- PatFrag ld_frag, string asm> {
+ PatFrag ld_frag, string asm, bit Is2Addr = 1> {
def rr : SI<opc, MRMSrcReg, (outs DstRC:$dst), (ins DstRC:$src1, SrcRC:$src2),
- asm, [(set DstRC:$dst, (Int DstRC:$src1, SrcRC:$src2))]>;
+ !if(Is2Addr,
+ !strconcat(asm, "\t{$src2, $dst|$dst, $src2}"),
+ !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
+ [(set DstRC:$dst, (Int DstRC:$src1, SrcRC:$src2))]>;
def rm : SI<opc, MRMSrcMem, (outs DstRC:$dst),
- (ins DstRC:$src1, x86memop:$src2), asm,
+ (ins DstRC:$src1, x86memop:$src2),
+ !if(Is2Addr,
+ !strconcat(asm, "\t{$src2, $dst|$dst, $src2}"),
+ !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
[(set DstRC:$dst, (Int DstRC:$src1, (ld_frag addr:$src2)))]>;
}
let isAsmParserOnly = 1 in {
defm Int_VCVTSS2SI : sse12_cvt_sint<0x2D, VR128, GR32, int_x86_sse_cvtss2si,
- f32mem, load, "cvtss2si\t{$src, $dst|$dst, $src}">, XS,
- VEX;
+ f32mem, load, "cvtss2si">, XS, VEX;
+ defm Int_VCVTSS2SI64 : sse12_cvt_sint<0x2D, VR128, GR64,
+ int_x86_sse_cvtss2si64, f32mem, load, "cvtss2si">,
+ XS, VEX, VEX_W;
defm Int_VCVTSD2SI : sse12_cvt_sint<0x2D, VR128, GR32, int_x86_sse2_cvtsd2si,
- f128mem, load, "cvtsd2si\t{$src, $dst|$dst, $src}">, XD,
- VEX;
+ f128mem, load, "cvtsd2si">, XD, VEX;
+ defm Int_VCVTSD2SI64 : sse12_cvt_sint<0x2D, VR128, GR64,
+ int_x86_sse2_cvtsd2si64, f128mem, load, "cvtsd2si">,
+ XD, VEX, VEX_W;
+
+ // FIXME: The asm matcher has a hack to ignore instructions with _Int and Int_
+ // Get rid of this hack or rename the intrinsics, there are several
+ // intructions that only match with the intrinsic form, why create duplicates
+ // to let them be recognized by the assembler?
+ defm VCVTSD2SI_alt : sse12_cvt_s_np<0x2D, FR64, GR32, f64mem,
+ "cvtsd2si\t{$src, $dst|$dst, $src}">, XD, VEX;
+ defm VCVTSD2SI64 : sse12_cvt_s_np<0x2D, FR64, GR64, f64mem,
+ "cvtsd2si\t{$src, $dst|$dst, $src}">, XD, VEX, VEX_W;
}
defm Int_CVTSS2SI : sse12_cvt_sint<0x2D, VR128, GR32, int_x86_sse_cvtss2si,
- f32mem, load, "cvtss2si\t{$src, $dst|$dst, $src}">, XS;
+ f32mem, load, "cvtss2si">, XS;
+defm Int_CVTSS2SI64 : sse12_cvt_sint<0x2D, VR128, GR64, int_x86_sse_cvtss2si64,
+ f32mem, load, "cvtss2si{q}">, XS, REX_W;
defm Int_CVTSD2SI : sse12_cvt_sint<0x2D, VR128, GR32, int_x86_sse2_cvtsd2si,
- f128mem, load, "cvtsd2si\t{$src, $dst|$dst, $src}">, XD;
+ f128mem, load, "cvtsd2si">, XD;
+defm Int_CVTSD2SI64 : sse12_cvt_sint<0x2D, VR128, GR64, int_x86_sse2_cvtsd2si64,
+ f128mem, load, "cvtsd2si">, XD, REX_W;
+defm CVTSD2SI64 : sse12_cvt_s_np<0x2D, VR128, GR64, f64mem, "cvtsd2si{q}">, XD,
+ REX_W;
+
+let isAsmParserOnly = 1 in {
+ defm Int_VCVTSI2SS : sse12_cvt_sint_3addr<0x2A, GR32, VR128,
+ int_x86_sse_cvtsi2ss, i32mem, loadi32, "cvtsi2ss", 0>, XS, VEX_4V;
+ defm Int_VCVTSI2SS64 : sse12_cvt_sint_3addr<0x2A, GR64, VR128,
+ int_x86_sse_cvtsi642ss, i64mem, loadi64, "cvtsi2ss", 0>, XS, VEX_4V,
+ VEX_W;
+ defm Int_VCVTSI2SD : sse12_cvt_sint_3addr<0x2A, GR32, VR128,
+ int_x86_sse2_cvtsi2sd, i32mem, loadi32, "cvtsi2sd", 0>, XD, VEX_4V;
+ defm Int_VCVTSI2SD64 : sse12_cvt_sint_3addr<0x2A, GR64, VR128,
+ int_x86_sse2_cvtsi642sd, i64mem, loadi64, "cvtsi2sd", 0>, XD,
+ VEX_4V, VEX_W;
+}
let Constraints = "$src1 = $dst" in {
defm Int_CVTSI2SS : sse12_cvt_sint_3addr<0x2A, GR32, VR128,
int_x86_sse_cvtsi2ss, i32mem, loadi32,
- "cvtsi2ss\t{$src2, $dst|$dst, $src2}">, XS;
+ "cvtsi2ss">, XS;
+ defm Int_CVTSI2SS64 : sse12_cvt_sint_3addr<0x2A, GR64, VR128,
+ int_x86_sse_cvtsi642ss, i64mem, loadi64,
+ "cvtsi2ss{q}">, XS, REX_W;
defm Int_CVTSI2SD : sse12_cvt_sint_3addr<0x2A, GR32, VR128,
int_x86_sse2_cvtsi2sd, i32mem, loadi32,
- "cvtsi2ss\t{$src2, $dst|$dst, $src2}">, XD;
+ "cvtsi2sd">, XD;
+ defm Int_CVTSI2SD64 : sse12_cvt_sint_3addr<0x2A, GR64, VR128,
+ int_x86_sse2_cvtsi642sd, i64mem, loadi64,
+ "cvtsi2sd">, XD, REX_W;
}
// Instructions below don't have an AVX form.
@@ -645,35 +731,48 @@ let Constraints = "$src1 = $dst" in {
/// SSE 1 Only
// Aliases for intrinsics
-let isAsmParserOnly = 1, Pattern = []<dag> in {
-defm Int_VCVTTSS2SI : sse12_cvt_sint_3addr<0x2C, VR128, GR32,
- int_x86_sse_cvttss2si, f32mem, load,
- "cvttss2si\t{$src2, $src1, $dst|$dst, $src1, $src2}">, XS;
-defm Int_VCVTTSD2SI : sse12_cvt_sint_3addr<0x2C, VR128, GR32,
- int_x86_sse2_cvttsd2si, f128mem, load,
- "cvttss2si\t{$src2, $src1, $dst|$dst, $src1, $src2}">, XD;
+let isAsmParserOnly = 1 in {
+defm Int_VCVTTSS2SI : sse12_cvt_sint<0x2C, VR128, GR32, int_x86_sse_cvttss2si,
+ f32mem, load, "cvttss2si">, XS, VEX;
+defm Int_VCVTTSS2SI64 : sse12_cvt_sint<0x2C, VR128, GR64,
+ int_x86_sse_cvttss2si64, f32mem, load,
+ "cvttss2si">, XS, VEX, VEX_W;
+defm Int_VCVTTSD2SI : sse12_cvt_sint<0x2C, VR128, GR32, int_x86_sse2_cvttsd2si,
+ f128mem, load, "cvttss2si">, XD, VEX;
+defm Int_VCVTTSD2SI64 : sse12_cvt_sint<0x2C, VR128, GR64,
+ int_x86_sse2_cvttsd2si64, f128mem, load,
+ "cvttss2si">, XD, VEX, VEX_W;
}
defm Int_CVTTSS2SI : sse12_cvt_sint<0x2C, VR128, GR32, int_x86_sse_cvttss2si,
- f32mem, load, "cvttss2si\t{$src, $dst|$dst, $src}">,
- XS;
+ f32mem, load, "cvttss2si">, XS;
+defm Int_CVTTSS2SI64 : sse12_cvt_sint<0x2C, VR128, GR64,
+ int_x86_sse_cvttss2si64, f32mem, load,
+ "cvttss2si{q}">, XS, REX_W;
defm Int_CVTTSD2SI : sse12_cvt_sint<0x2C, VR128, GR32, int_x86_sse2_cvttsd2si,
- f128mem, load, "cvttss2si\t{$src, $dst|$dst, $src}">,
- XD;
+ f128mem, load, "cvttss2si">, XD;
+defm Int_CVTTSD2SI64 : sse12_cvt_sint<0x2C, VR128, GR64,
+ int_x86_sse2_cvttsd2si64, f128mem, load,
+ "cvttss2si{q}">, XD, REX_W;
let isAsmParserOnly = 1, Pattern = []<dag> in {
-defm VCVTSS2SI : sse12_cvt_s<0x2D, FR32, GR32, undef, f32mem, load,
- "cvtss2si{l}\t{$src, $dst|$dst, $src}">, XS, VEX;
-defm VCVTDQ2PS : sse12_cvt_p<0x5B, VR128, VR128, undef, f128mem, load,
- "cvtdq2ps\t{$src, $dst|$dst, $src}",
- SSEPackedSingle>, TB, VEX;
-defm VCVTDQ2PSY : sse12_cvt_p<0x5B, VR256, VR256, undef, f256mem, load,
- "cvtdq2ps\t{$src, $dst|$dst, $src}",
- SSEPackedSingle>, TB, VEX;
+defm VCVTSS2SI : sse12_cvt_s<0x2D, FR32, GR32, undef, f32mem, load,
+ "cvtss2si{l}\t{$src, $dst|$dst, $src}">, XS, VEX;
+defm VCVTSS2SI64 : sse12_cvt_s<0x2D, FR32, GR64, undef, f32mem, load,
+ "cvtss2si\t{$src, $dst|$dst, $src}">, XS, VEX,
+ VEX_W;
+defm VCVTDQ2PS : sse12_cvt_p<0x5B, VR128, VR128, undef, i128mem, load,
+ "cvtdq2ps\t{$src, $dst|$dst, $src}",
+ SSEPackedSingle>, TB, VEX;
+defm VCVTDQ2PSY : sse12_cvt_p<0x5B, VR256, VR256, undef, i256mem, load,
+ "cvtdq2ps\t{$src, $dst|$dst, $src}",
+ SSEPackedSingle>, TB, VEX;
}
let Pattern = []<dag> in {
defm CVTSS2SI : sse12_cvt_s<0x2D, FR32, GR32, undef, f32mem, load /*dummy*/,
"cvtss2si{l}\t{$src, $dst|$dst, $src}">, XS;
-defm CVTDQ2PS : sse12_cvt_p<0x5B, VR128, VR128, undef, f128mem, load /*dummy*/,
+defm CVTSS2SI64 : sse12_cvt_s<0x2D, FR32, GR64, undef, f32mem, load /*dummy*/,
+ "cvtss2si{q}\t{$src, $dst|$dst, $src}">, XS, REX_W;
+defm CVTDQ2PS : sse12_cvt_p<0x5B, VR128, VR128, undef, i128mem, load /*dummy*/,
"cvtdq2ps\t{$src, $dst|$dst, $src}",
SSEPackedSingle>, TB; /* PD SSE3 form is avaiable */
}
@@ -701,13 +800,11 @@ def CVTSD2SSrm : I<0x5A, MRMSrcMem, (outs FR32:$dst), (ins f64mem:$src),
let isAsmParserOnly = 1 in
defm Int_VCVTSD2SS: sse12_cvt_sint_3addr<0x5A, VR128, VR128,
- int_x86_sse2_cvtsd2ss, f64mem, load,
- "cvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}">,
- XS, VEX_4V;
+ int_x86_sse2_cvtsd2ss, f64mem, load, "cvtsd2ss", 0>,
+ XS, VEX_4V;
let Constraints = "$src1 = $dst" in
defm Int_CVTSD2SS: sse12_cvt_sint_3addr<0x5A, VR128, VR128,
- int_x86_sse2_cvtsd2ss, f64mem, load,
- "cvtsd2ss\t{$src2, $dst|$dst, $src2}">, XS;
+ int_x86_sse2_cvtsd2ss, f64mem, load, "cvtsd2ss">, XS;
// Convert scalar single to scalar double
let isAsmParserOnly = 1 in { // SSE2 instructions with XS prefix
@@ -806,6 +903,7 @@ def Int_CVTDQ2PDrm : I<0xE6, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
(bitconvert (memopv2i64 addr:$src))))]>,
XS, Requires<[HasSSE2]>;
+
// Convert packed single/double fp to doubleword
let isAsmParserOnly = 1 in {
def VCVTPS2DQrr : VPDI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
@@ -964,11 +1062,11 @@ def CVTPS2PDrm : I<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src),
let isAsmParserOnly = 1 in {
def Int_VCVTPS2PDrr : I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
- "cvtps2pd\t{$src, $dst|$dst, $src}",
+ "vcvtps2pd\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (int_x86_sse2_cvtps2pd VR128:$src))]>,
VEX, Requires<[HasAVX]>;
def Int_VCVTPS2PDrm : I<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src),
- "cvtps2pd\t{$src, $dst|$dst, $src}",
+ "vcvtps2pd\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (int_x86_sse2_cvtps2pd
(load addr:$src)))]>,
VEX, Requires<[HasAVX]>;
@@ -1029,6 +1127,39 @@ def Int_CVTPD2PSrm : PDI<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
[(set VR128:$dst, (int_x86_sse2_cvtpd2ps
(memop addr:$src)))]>;
+// AVX 256-bit register conversion intrinsics
+// FIXME: Migrate SSE conversion intrinsics matching to use patterns as below
+// whenever possible to avoid declaring two versions of each one.
+def : Pat<(int_x86_avx_cvtdq2_ps_256 VR256:$src),
+ (VCVTDQ2PSYrr VR256:$src)>;
+def : Pat<(int_x86_avx_cvtdq2_ps_256 (memopv8i32 addr:$src)),
+ (VCVTDQ2PSYrm addr:$src)>;
+
+def : Pat<(int_x86_avx_cvt_pd2_ps_256 VR256:$src),
+ (VCVTPD2PSYrr VR256:$src)>;
+def : Pat<(int_x86_avx_cvt_pd2_ps_256 (memopv4f64 addr:$src)),
+ (VCVTPD2PSYrm addr:$src)>;
+
+def : Pat<(int_x86_avx_cvt_ps2dq_256 VR256:$src),
+ (VCVTPS2DQYrr VR256:$src)>;
+def : Pat<(int_x86_avx_cvt_ps2dq_256 (memopv8f32 addr:$src)),
+ (VCVTPS2DQYrm addr:$src)>;
+
+def : Pat<(int_x86_avx_cvt_ps2_pd_256 VR128:$src),
+ (VCVTPS2PDYrr VR128:$src)>;
+def : Pat<(int_x86_avx_cvt_ps2_pd_256 (memopv4f32 addr:$src)),
+ (VCVTPS2PDYrm addr:$src)>;
+
+def : Pat<(int_x86_avx_cvtt_pd2dq_256 VR256:$src),
+ (VCVTTPD2DQYrr VR256:$src)>;
+def : Pat<(int_x86_avx_cvtt_pd2dq_256 (memopv4f64 addr:$src)),
+ (VCVTTPD2DQYrm addr:$src)>;
+
+def : Pat<(int_x86_avx_cvtt_ps2dq_256 VR256:$src),
+ (VCVTTPS2DQYrr VR256:$src)>;
+def : Pat<(int_x86_avx_cvtt_ps2dq_256 (memopv8f32 addr:$src)),
+ (VCVTTPS2DQYrm addr:$src)>;
+
//===----------------------------------------------------------------------===//
// SSE 1 & 2 - Compare Instructions
//===----------------------------------------------------------------------===//
@@ -1193,16 +1324,14 @@ let isAsmParserOnly = 1 in {
"cmp${cc}pd\t{$src, $src1, $dst|$dst, $src1, $src}",
"cmppd\t{$src2, $src, $src1, $dst|$dst, $src1, $src, $src2}",
SSEPackedDouble>, OpSize, VEX_4V;
- let Pattern = []<dag> in {
- defm VCMPPSY : sse12_cmp_packed<VR256, f256mem, int_x86_sse_cmp_ps,
- "cmp${cc}ps\t{$src, $src1, $dst|$dst, $src1, $src}",
- "cmpps\t{$src2, $src, $src1, $dst|$dst, $src1, $src, $src2}",
- SSEPackedSingle>, VEX_4V;
- defm VCMPPDY : sse12_cmp_packed<VR256, f256mem, int_x86_sse2_cmp_pd,
- "cmp${cc}pd\t{$src, $src1, $dst|$dst, $src1, $src}",
- "cmppd\t{$src2, $src, $src1, $dst|$dst, $src1, $src, $src2}",
- SSEPackedDouble>, OpSize, VEX_4V;
- }
+ defm VCMPPSY : sse12_cmp_packed<VR256, f256mem, int_x86_avx_cmp_ps_256,
+ "cmp${cc}ps\t{$src, $src1, $dst|$dst, $src1, $src}",
+ "cmpps\t{$src2, $src, $src1, $dst|$dst, $src1, $src, $src2}",
+ SSEPackedSingle>, VEX_4V;
+ defm VCMPPDY : sse12_cmp_packed<VR256, f256mem, int_x86_avx_cmp_pd_256,
+ "cmp${cc}pd\t{$src, $src1, $dst|$dst, $src1, $src}",
+ "cmppd\t{$src2, $src, $src1, $dst|$dst, $src1, $src, $src2}",
+ SSEPackedDouble>, OpSize, VEX_4V;
}
let Constraints = "$src1 = $dst" in {
defm CMPPS : sse12_cmp_packed<VR128, f128mem, int_x86_sse_cmp_ps,
@@ -1232,24 +1361,30 @@ def : Pat<(v2i64 (X86cmppd (v2f64 VR128:$src1), (memop addr:$src2), imm:$cc)),
multiclass sse12_shuffle<RegisterClass RC, X86MemOperand x86memop,
ValueType vt, string asm, PatFrag mem_frag,
Domain d, bit IsConvertibleToThreeAddress = 0> {
- def rmi : PIi8<0xC6, MRMSrcMem, (outs VR128:$dst),
- (ins VR128:$src1, f128mem:$src2, i8imm:$src3), asm,
- [(set VR128:$dst, (vt (shufp:$src3
- VR128:$src1, (mem_frag addr:$src2))))], d>;
+ def rmi : PIi8<0xC6, MRMSrcMem, (outs RC:$dst),
+ (ins RC:$src1, f128mem:$src2, i8imm:$src3), asm,
+ [(set RC:$dst, (vt (shufp:$src3
+ RC:$src1, (mem_frag addr:$src2))))], d>;
let isConvertibleToThreeAddress = IsConvertibleToThreeAddress in
- def rri : PIi8<0xC6, MRMSrcReg, (outs VR128:$dst),
- (ins VR128:$src1, VR128:$src2, i8imm:$src3), asm,
- [(set VR128:$dst,
- (vt (shufp:$src3 VR128:$src1, VR128:$src2)))], d>;
+ def rri : PIi8<0xC6, MRMSrcReg, (outs RC:$dst),
+ (ins RC:$src1, RC:$src2, i8imm:$src3), asm,
+ [(set RC:$dst,
+ (vt (shufp:$src3 RC:$src1, RC:$src2)))], d>;
}
let isAsmParserOnly = 1 in {
- defm VSHUFPS : sse12_shuffle<VR128, f128mem, v4f32,
- "shufps\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
- memopv4f32, SSEPackedSingle>, VEX_4V;
- defm VSHUFPD : sse12_shuffle<VR128, f128mem, v2f64,
- "shufpd\t{$src3, $src2, $src1, $dst|$dst, $src2, $src2, $src3}",
- memopv2f64, SSEPackedDouble>, OpSize, VEX_4V;
+ defm VSHUFPS : sse12_shuffle<VR128, f128mem, v4f32,
+ "shufps\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
+ memopv4f32, SSEPackedSingle>, VEX_4V;
+ defm VSHUFPSY : sse12_shuffle<VR256, f256mem, v8f32,
+ "shufps\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
+ memopv8f32, SSEPackedSingle>, VEX_4V;
+ defm VSHUFPD : sse12_shuffle<VR128, f128mem, v2f64,
+ "shufpd\t{$src3, $src2, $src1, $dst|$dst, $src2, $src2, $src3}",
+ memopv2f64, SSEPackedDouble>, OpSize, VEX_4V;
+ defm VSHUFPDY : sse12_shuffle<VR256, f256mem, v4f64,
+ "shufpd\t{$src3, $src2, $src1, $dst|$dst, $src2, $src2, $src3}",
+ memopv4f64, SSEPackedDouble>, OpSize, VEX_4V;
}
let Constraints = "$src1 = $dst" in {
@@ -1351,12 +1486,23 @@ let isAsmParserOnly = 1 in {
defm VMOVMSKPD : sse12_extr_sign_mask<VR128, int_x86_sse2_movmsk_pd,
"movmskpd", SSEPackedDouble>, OpSize,
VEX;
- // FIXME: merge with multiclass above when the intrinsics come.
- def VMOVMSKPSYrr : PI<0x50, MRMSrcReg, (outs GR32:$dst), (ins VR256:$src),
+ defm VMOVMSKPSY : sse12_extr_sign_mask<VR256, int_x86_avx_movmsk_ps_256,
+ "movmskps", SSEPackedSingle>, VEX;
+ defm VMOVMSKPDY : sse12_extr_sign_mask<VR256, int_x86_avx_movmsk_pd_256,
+ "movmskpd", SSEPackedDouble>, OpSize,
+ VEX;
+
+ // Assembler Only
+ def VMOVMSKPSr64r : PI<0x50, MRMSrcReg, (outs GR64:$dst), (ins VR128:$src),
+ "movmskps\t{$src, $dst|$dst, $src}", [], SSEPackedSingle>, VEX;
+ def VMOVMSKPDr64r : PI<0x50, MRMSrcReg, (outs GR64:$dst), (ins VR128:$src),
+ "movmskpd\t{$src, $dst|$dst, $src}", [], SSEPackedDouble>, OpSize,
+ VEX;
+ def VMOVMSKPSYr64r : PI<0x50, MRMSrcReg, (outs GR64:$dst), (ins VR256:$src),
"movmskps\t{$src, $dst|$dst, $src}", [], SSEPackedSingle>, VEX;
- def VMOVMSKPDYrr : PI<0x50, MRMSrcReg, (outs GR32:$dst), (ins VR256:$src),
+ def VMOVMSKPDYr64r : PI<0x50, MRMSrcReg, (outs GR64:$dst), (ins VR256:$src),
"movmskpd\t{$src, $dst|$dst, $src}", [], SSEPackedDouble>, OpSize,
- VEX;
+ VEX;
}
//===----------------------------------------------------------------------===//
@@ -1536,6 +1682,9 @@ let isCommutable = 0 in
///
/// These three forms can each be reg+reg or reg+mem.
///
+
+/// FIXME: once all 256-bit intrinsics are matched, cleanup and refactor those
+/// classes below
multiclass basic_sse12_fp_binop_s<bits<8> opc, string OpcodeStr, SDNode OpNode,
bit Is2Addr = 1> {
defm SS : sse12_fp_scalar<opc, !strconcat(OpcodeStr, "ss"),
@@ -1565,7 +1714,7 @@ multiclass basic_sse12_fp_binop_p_y<bits<8> opc, string OpcodeStr,
}
multiclass basic_sse12_fp_binop_s_int<bits<8> opc, string OpcodeStr,
- bit Is2Addr = 1> {
+ bit Is2Addr = 1> {
defm SS : sse12_fp_scalar_int<opc, OpcodeStr, VR128,
!strconcat(OpcodeStr, "ss"), "", "_ss", ssmem, sse_load_f32, Is2Addr>, XS;
defm SD : sse12_fp_scalar_int<opc, OpcodeStr, VR128,
@@ -1573,37 +1722,57 @@ multiclass basic_sse12_fp_binop_s_int<bits<8> opc, string OpcodeStr,
}
multiclass basic_sse12_fp_binop_p_int<bits<8> opc, string OpcodeStr,
- bit Is2Addr = 1> {
+ bit Is2Addr = 1> {
defm PS : sse12_fp_packed_int<opc, OpcodeStr, VR128,
- !strconcat(OpcodeStr, "ps"), "", "_ps", f128mem, memopv4f32,
+ !strconcat(OpcodeStr, "ps"), "sse", "_ps", f128mem, memopv4f32,
SSEPackedSingle, Is2Addr>, TB;
defm PD : sse12_fp_packed_int<opc, OpcodeStr, VR128,
- !strconcat(OpcodeStr, "pd"), "2", "_pd", f128mem, memopv2f64,
+ !strconcat(OpcodeStr, "pd"), "sse2", "_pd", f128mem, memopv2f64,
SSEPackedDouble, Is2Addr>, TB, OpSize;
}
+multiclass basic_sse12_fp_binop_p_y_int<bits<8> opc, string OpcodeStr> {
+ defm PSY : sse12_fp_packed_int<opc, OpcodeStr, VR256,
+ !strconcat(OpcodeStr, "ps"), "avx", "_ps_256", f256mem, memopv8f32,
+ SSEPackedSingle, 0>, TB;
+
+ defm PDY : sse12_fp_packed_int<opc, OpcodeStr, VR256,
+ !strconcat(OpcodeStr, "pd"), "avx", "_pd_256", f256mem, memopv4f64,
+ SSEPackedDouble, 0>, TB, OpSize;
+}
+
// Binary Arithmetic instructions
let isAsmParserOnly = 1 in {
defm VADD : basic_sse12_fp_binop_s<0x58, "add", fadd, 0>,
+ basic_sse12_fp_binop_s_int<0x58, "add", 0>,
basic_sse12_fp_binop_p<0x58, "add", fadd, 0>,
basic_sse12_fp_binop_p_y<0x58, "add", fadd>, VEX_4V;
defm VMUL : basic_sse12_fp_binop_s<0x59, "mul", fmul, 0>,
+ basic_sse12_fp_binop_s_int<0x59, "mul", 0>,
basic_sse12_fp_binop_p<0x59, "mul", fmul, 0>,
basic_sse12_fp_binop_p_y<0x59, "mul", fmul>, VEX_4V;
let isCommutable = 0 in {
defm VSUB : basic_sse12_fp_binop_s<0x5C, "sub", fsub, 0>,
+ basic_sse12_fp_binop_s_int<0x5C, "sub", 0>,
basic_sse12_fp_binop_p<0x5C, "sub", fsub, 0>,
basic_sse12_fp_binop_p_y<0x5C, "sub", fsub>, VEX_4V;
defm VDIV : basic_sse12_fp_binop_s<0x5E, "div", fdiv, 0>,
+ basic_sse12_fp_binop_s_int<0x5E, "div", 0>,
basic_sse12_fp_binop_p<0x5E, "div", fdiv, 0>,
basic_sse12_fp_binop_p_y<0x5E, "div", fdiv>, VEX_4V;
defm VMAX : basic_sse12_fp_binop_s<0x5F, "max", X86fmax, 0>,
+ basic_sse12_fp_binop_s_int<0x5F, "max", 0>,
basic_sse12_fp_binop_p<0x5F, "max", X86fmax, 0>,
- basic_sse12_fp_binop_p_y<0x5F, "max", X86fmax>, VEX_4V;
+ basic_sse12_fp_binop_p_int<0x5F, "max", 0>,
+ basic_sse12_fp_binop_p_y<0x5F, "max", X86fmax>,
+ basic_sse12_fp_binop_p_y_int<0x5F, "max">, VEX_4V;
defm VMIN : basic_sse12_fp_binop_s<0x5D, "min", X86fmin, 0>,
+ basic_sse12_fp_binop_s_int<0x5D, "min", 0>,
basic_sse12_fp_binop_p<0x5D, "min", X86fmin, 0>,
+ basic_sse12_fp_binop_p_int<0x5D, "min", 0>,
+ basic_sse12_fp_binop_p_y_int<0x5D, "min">,
basic_sse12_fp_binop_p_y<0x5D, "min", X86fmin>, VEX_4V;
}
}
@@ -1668,20 +1837,20 @@ multiclass sse1_fp_unop_s<bits<8> opc, string OpcodeStr,
multiclass sse1_fp_unop_s_avx<bits<8> opc, string OpcodeStr,
SDNode OpNode, Intrinsic F32Int> {
def SSr : SSI<opc, MRMSrcReg, (outs FR32:$dst), (ins FR32:$src1, FR32:$src2),
- !strconcat(!strconcat("v", OpcodeStr),
+ !strconcat(OpcodeStr,
"ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>;
def SSm : I<opc, MRMSrcMem, (outs FR32:$dst), (ins FR32:$src1, f32mem:$src2),
- !strconcat(!strconcat("v", OpcodeStr),
+ !strconcat(OpcodeStr,
"ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[]>, XS, Requires<[HasAVX, OptForSize]>;
- def SSr_Int : SSI<opc, MRMSrcReg, (outs VR128:$dst),
- (ins VR128:$src1, VR128:$src2),
- !strconcat(!strconcat("v", OpcodeStr),
- "ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>;
- def SSm_Int : SSI<opc, MRMSrcMem, (outs VR128:$dst),
- (ins VR128:$src1, ssmem:$src2),
- !strconcat(!strconcat("v", OpcodeStr),
- "ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>;
+ def SSr_Int : SSI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+ !strconcat(OpcodeStr,
+ "ss\t{$src, $dst, $dst|$dst, $dst, $src}"),
+ [(set VR128:$dst, (F32Int VR128:$src))]>;
+ def SSm_Int : SSI<opc, MRMSrcMem, (outs VR128:$dst), (ins ssmem:$src),
+ !strconcat(OpcodeStr,
+ "ss\t{$src, $dst, $dst|$dst, $dst, $src}"),
+ [(set VR128:$dst, (F32Int sse_load_f32:$src))]>;
}
/// sse1_fp_unop_p - SSE1 unops in packed form.
@@ -1715,6 +1884,16 @@ multiclass sse1_fp_unop_p_int<bits<8> opc, string OpcodeStr,
[(set VR128:$dst, (V4F32Int (memopv4f32 addr:$src)))]>;
}
+/// sse1_fp_unop_p_y_int - AVX 256-bit intrinsics unops in packed forms.
+multiclass sse1_fp_unop_p_y_int<bits<8> opc, string OpcodeStr,
+ Intrinsic V4F32Int> {
+ def PSYr_Int : PSI<opc, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
+ !strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"),
+ [(set VR256:$dst, (V4F32Int VR256:$src))]>;
+ def PSYm_Int : PSI<opc, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src),
+ !strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"),
+ [(set VR256:$dst, (V4F32Int (memopv8f32 addr:$src)))]>;
+}
/// sse2_fp_unop_s - SSE2 unops in scalar form.
multiclass sse2_fp_unop_s<bits<8> opc, string OpcodeStr,
@@ -1738,21 +1917,19 @@ multiclass sse2_fp_unop_s<bits<8> opc, string OpcodeStr,
/// sse2_fp_unop_s_avx - AVX SSE2 unops in scalar form.
multiclass sse2_fp_unop_s_avx<bits<8> opc, string OpcodeStr,
SDNode OpNode, Intrinsic F64Int> {
- def SDr : VSDI<opc, MRMSrcReg, (outs FR64:$dst), (ins FR64:$src1, FR64:$src2),
- !strconcat(OpcodeStr,
- "sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>;
- def SDm : VSDI<opc, MRMSrcMem, (outs FR64:$dst),
- (ins FR64:$src1, f64mem:$src2),
- !strconcat(OpcodeStr,
- "sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>;
- def SDr_Int : VSDI<opc, MRMSrcReg, (outs VR128:$dst),
- (ins VR128:$src1, VR128:$src2),
- !strconcat(OpcodeStr, "sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- []>;
- def SDm_Int : VSDI<opc, MRMSrcMem, (outs VR128:$dst),
- (ins VR128:$src1, sdmem:$src2),
- !strconcat(OpcodeStr, "sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- []>;
+ def SDr : SDI<opc, MRMSrcReg, (outs FR64:$dst), (ins FR64:$src1, FR64:$src2),
+ !strconcat(OpcodeStr,
+ "sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>;
+ def SDm : SDI<opc, MRMSrcMem, (outs FR64:$dst),
+ (ins FR64:$src1, f64mem:$src2),
+ !strconcat(OpcodeStr,
+ "sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>;
+ def SDr_Int : SDI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+ !strconcat(OpcodeStr, "sd\t{$src, $dst, $dst|$dst, $dst, $src}"),
+ [(set VR128:$dst, (F64Int VR128:$src))]>;
+ def SDm_Int : SDI<opc, MRMSrcMem, (outs VR128:$dst), (ins sdmem:$src),
+ !strconcat(OpcodeStr, "sd\t{$src, $dst, $dst|$dst, $dst, $src}"),
+ [(set VR128:$dst, (F64Int sse_load_f64:$src))]>;
}
/// sse2_fp_unop_p - SSE2 unops in vector forms.
@@ -1787,29 +1964,48 @@ multiclass sse2_fp_unop_p_int<bits<8> opc, string OpcodeStr,
[(set VR128:$dst, (V2F64Int (memopv2f64 addr:$src)))]>;
}
+/// sse2_fp_unop_p_y_int - AVX 256-bit intrinsic unops in vector forms.
+multiclass sse2_fp_unop_p_y_int<bits<8> opc, string OpcodeStr,
+ Intrinsic V2F64Int> {
+ def PDYr_Int : PDI<opc, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
+ !strconcat(OpcodeStr, "pd\t{$src, $dst|$dst, $src}"),
+ [(set VR256:$dst, (V2F64Int VR256:$src))]>;
+ def PDYm_Int : PDI<opc, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src),
+ !strconcat(OpcodeStr, "pd\t{$src, $dst|$dst, $src}"),
+ [(set VR256:$dst, (V2F64Int (memopv4f64 addr:$src)))]>;
+}
+
let isAsmParserOnly = 1, Predicates = [HasAVX] in {
// Square root.
- defm VSQRT : sse1_fp_unop_s_avx<0x51, "sqrt", fsqrt, int_x86_sse_sqrt_ss>,
- sse2_fp_unop_s_avx<0x51, "sqrt", fsqrt, int_x86_sse2_sqrt_sd>,
+ defm VSQRT : sse1_fp_unop_s_avx<0x51, "vsqrt", fsqrt, int_x86_sse_sqrt_ss>,
+ sse2_fp_unop_s_avx<0x51, "vsqrt", fsqrt, int_x86_sse2_sqrt_sd>,
VEX_4V;
defm VSQRT : sse1_fp_unop_p<0x51, "vsqrt", fsqrt>,
sse2_fp_unop_p<0x51, "vsqrt", fsqrt>,
sse1_fp_unop_p_y<0x51, "vsqrt", fsqrt>,
sse2_fp_unop_p_y<0x51, "vsqrt", fsqrt>,
+ sse1_fp_unop_p_int<0x51, "vsqrt", int_x86_sse_sqrt_ps>,
+ sse2_fp_unop_p_int<0x51, "vsqrt", int_x86_sse2_sqrt_pd>,
+ sse1_fp_unop_p_y_int<0x51, "vsqrt", int_x86_avx_sqrt_ps_256>,
+ sse2_fp_unop_p_y_int<0x51, "vsqrt", int_x86_avx_sqrt_pd_256>,
VEX;
// Reciprocal approximations. Note that these typically require refinement
// in order to obtain suitable precision.
- defm VRSQRT : sse1_fp_unop_s_avx<0x52, "rsqrt", X86frsqrt,
+ defm VRSQRT : sse1_fp_unop_s_avx<0x52, "vrsqrt", X86frsqrt,
int_x86_sse_rsqrt_ss>, VEX_4V;
defm VRSQRT : sse1_fp_unop_p<0x52, "vrsqrt", X86frsqrt>,
- sse1_fp_unop_p_y<0x52, "vrsqrt", X86frsqrt>, VEX;
+ sse1_fp_unop_p_y<0x52, "vrsqrt", X86frsqrt>,
+ sse1_fp_unop_p_y_int<0x52, "vrsqrt", int_x86_avx_rsqrt_ps_256>,
+ sse1_fp_unop_p_int<0x52, "vrsqrt", int_x86_sse_rsqrt_ps>, VEX;
- defm VRCP : sse1_fp_unop_s_avx<0x53, "rcp", X86frcp, int_x86_sse_rcp_ss>,
+ defm VRCP : sse1_fp_unop_s_avx<0x53, "vrcp", X86frcp, int_x86_sse_rcp_ss>,
VEX_4V;
defm VRCP : sse1_fp_unop_p<0x53, "vrcp", X86frcp>,
- sse1_fp_unop_p_y<0x53, "vrcp", X86frcp>, VEX;
+ sse1_fp_unop_p_y<0x53, "vrcp", X86frcp>,
+ sse1_fp_unop_p_y_int<0x53, "vrcp", int_x86_avx_rcp_ps_256>,
+ sse1_fp_unop_p_int<0x53, "vrcp", int_x86_sse_rcp_ps>, VEX;
}
// Square root.
@@ -1898,6 +2094,13 @@ let isAsmParserOnly = 1 in {
}
}
+def : Pat<(int_x86_avx_movnt_dq_256 addr:$dst, VR256:$src),
+ (VMOVNTDQYmr addr:$dst, VR256:$src)>;
+def : Pat<(int_x86_avx_movnt_pd_256 addr:$dst, VR256:$src),
+ (VMOVNTPDYmr addr:$dst, VR256:$src)>;
+def : Pat<(int_x86_avx_movnt_ps_256 addr:$dst, VR256:$src),
+ (VMOVNTPSYmr addr:$dst, VR256:$src)>;
+
def MOVNTPSmr_Int : PSI<0x2B, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src),
"movntps\t{$src, $dst|$dst, $src}",
[(int_x86_sse_movnt_ps addr:$dst, VR128:$src)]>;
@@ -1961,11 +2164,14 @@ def PREFETCHNTA : PSI<0x18, MRM0m, (outs), (ins i8mem:$src),
// Load, store, and memory fence
def SFENCE : I<0xAE, MRM_F8, (outs), (ins), "sfence", [(int_x86_sse_sfence)]>,
TB, Requires<[HasSSE1]>;
+def : Pat<(X86SFence), (SFENCE)>;
// Alias instructions that map zero vector to pxor / xorp* for sse.
// We set canFoldAsLoad because this can be converted to a constant-pool
// load of an all-zeros value if folding it would be beneficial.
-// FIXME: Change encoding to pseudo!
+// FIXME: Change encoding to pseudo! This is blocked right now by the x86
+// JIT implementatioan, it does not expand the instructions below like
+// X86MCInstLower does.
let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
isCodeGenOnly = 1 in {
def V_SET0PS : PSI<0x57, MRMInitReg, (outs VR128:$dst), (ins), "",
@@ -1977,6 +2183,26 @@ def V_SET0PI : PDI<0xEF, MRMInitReg, (outs VR128:$dst), (ins), "",
[(set VR128:$dst, (v4i32 immAllZerosV))]>;
}
+// The same as done above but for AVX. The 128-bit versions are the
+// same, but re-encoded. The 256-bit does not support PI version.
+// FIXME: Change encoding to pseudo! This is blocked right now by the x86
+// JIT implementatioan, it does not expand the instructions below like
+// X86MCInstLower does.
+let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
+ isCodeGenOnly = 1, Predicates = [HasAVX] in {
+def AVX_SET0PS : PSI<0x57, MRMInitReg, (outs VR128:$dst), (ins), "",
+ [(set VR128:$dst, (v4f32 immAllZerosV))]>, VEX_4V;
+def AVX_SET0PD : PDI<0x57, MRMInitReg, (outs VR128:$dst), (ins), "",
+ [(set VR128:$dst, (v2f64 immAllZerosV))]>, VEX_4V;
+def AVX_SET0PSY : PSI<0x57, MRMInitReg, (outs VR256:$dst), (ins), "",
+ [(set VR256:$dst, (v8f32 immAllZerosV))]>, VEX_4V;
+def AVX_SET0PDY : PDI<0x57, MRMInitReg, (outs VR256:$dst), (ins), "",
+ [(set VR256:$dst, (v4f64 immAllZerosV))]>, VEX_4V;
+let ExeDomain = SSEPackedInt in
+def AVX_SET0PI : PDI<0xEF, MRMInitReg, (outs VR128:$dst), (ins), "",
+ [(set VR128:$dst, (v4i32 immAllZerosV))]>;
+}
+
def : Pat<(v2i64 immAllZerosV), (V_SET0PI)>;
def : Pat<(v8i16 immAllZerosV), (V_SET0PI)>;
def : Pat<(v16i8 immAllZerosV), (V_SET0PI)>;
@@ -2003,35 +2229,47 @@ def STMXCSR : PSI<0xAE, MRM3m, (outs), (ins i32mem:$dst),
//===---------------------------------------------------------------------===//
// SSE2 - Move Aligned/Unaligned Packed Integer Instructions
//===---------------------------------------------------------------------===//
+
let ExeDomain = SSEPackedInt in { // SSE integer instructions
let isAsmParserOnly = 1 in {
- let neverHasSideEffects = 1 in
- def VMOVDQArr : VPDI<0x6F, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
- "movdqa\t{$src, $dst|$dst, $src}", []>, VEX;
- def VMOVDQUrr : VPDI<0x6F, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
- "movdqu\t{$src, $dst|$dst, $src}", []>, XS, VEX;
+ let neverHasSideEffects = 1 in {
+ def VMOVDQArr : VPDI<0x6F, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+ "movdqa\t{$src, $dst|$dst, $src}", []>, VEX;
+ def VMOVDQAYrr : VPDI<0x6F, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
+ "movdqa\t{$src, $dst|$dst, $src}", []>, VEX;
+ }
+ def VMOVDQUrr : VPDI<0x6F, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+ "movdqu\t{$src, $dst|$dst, $src}", []>, XS, VEX;
+ def VMOVDQUYrr : VPDI<0x6F, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
+ "movdqu\t{$src, $dst|$dst, $src}", []>, XS, VEX;
let canFoldAsLoad = 1, mayLoad = 1 in {
- def VMOVDQArm : VPDI<0x6F, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
- "movdqa\t{$src, $dst|$dst, $src}",
- [/*(set VR128:$dst, (alignedloadv2i64 addr:$src))*/]>,
- VEX;
- def VMOVDQUrm : I<0x6F, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
- "vmovdqu\t{$src, $dst|$dst, $src}",
- [/*(set VR128:$dst, (loadv2i64 addr:$src))*/]>,
- XS, VEX, Requires<[HasAVX]>;
+ def VMOVDQArm : VPDI<0x6F, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
+ "movdqa\t{$src, $dst|$dst, $src}", []>, VEX;
+ def VMOVDQAYrm : VPDI<0x6F, MRMSrcMem, (outs VR256:$dst), (ins i256mem:$src),
+ "movdqa\t{$src, $dst|$dst, $src}", []>, VEX;
+ let Predicates = [HasAVX] in {
+ def VMOVDQUrm : I<0x6F, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
+ "vmovdqu\t{$src, $dst|$dst, $src}",[]>, XS, VEX;
+ def VMOVDQUYrm : I<0x6F, MRMSrcMem, (outs VR256:$dst), (ins i256mem:$src),
+ "vmovdqu\t{$src, $dst|$dst, $src}",[]>, XS, VEX;
+ }
}
let mayStore = 1 in {
- def VMOVDQAmr : VPDI<0x7F, MRMDestMem, (outs),
- (ins i128mem:$dst, VR128:$src),
- "movdqa\t{$src, $dst|$dst, $src}",
- [/*(alignedstore (v2i64 VR128:$src), addr:$dst)*/]>, VEX;
- def VMOVDQUmr : I<0x7F, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src),
- "vmovdqu\t{$src, $dst|$dst, $src}",
- [/*(store (v2i64 VR128:$src), addr:$dst)*/]>,
- XS, VEX, Requires<[HasAVX]>;
+ def VMOVDQAmr : VPDI<0x7F, MRMDestMem, (outs),
+ (ins i128mem:$dst, VR128:$src),
+ "movdqa\t{$src, $dst|$dst, $src}", []>, VEX;
+ def VMOVDQAYmr : VPDI<0x7F, MRMDestMem, (outs),
+ (ins i256mem:$dst, VR256:$src),
+ "movdqa\t{$src, $dst|$dst, $src}", []>, VEX;
+ let Predicates = [HasAVX] in {
+ def VMOVDQUmr : I<0x7F, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src),
+ "vmovdqu\t{$src, $dst|$dst, $src}",[]>, XS, VEX;
+ def VMOVDQUYmr : I<0x7F, MRMDestMem, (outs), (ins i256mem:$dst, VR256:$src),
+ "vmovdqu\t{$src, $dst|$dst, $src}",[]>, XS, VEX;
+ }
}
}
@@ -2084,6 +2322,10 @@ def MOVDQUmr_Int : I<0x7F, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src),
} // ExeDomain = SSEPackedInt
+def : Pat<(int_x86_avx_loadu_dq_256 addr:$src), (VMOVDQUYrm addr:$src)>;
+def : Pat<(int_x86_avx_storeu_dq_256 addr:$dst, VR256:$src),
+ (VMOVDQUYmr addr:$dst, VR256:$src)>;
+
//===---------------------------------------------------------------------===//
// SSE2 - Packed Integer Arithmetic Instructions
//===---------------------------------------------------------------------===//
@@ -2376,6 +2618,25 @@ let ExeDomain = SSEPackedInt in {
}
} // Constraints = "$src1 = $dst"
+let Predicates = [HasAVX] in {
+ def : Pat<(int_x86_sse2_psll_dq VR128:$src1, imm:$src2),
+ (v2i64 (VPSLLDQri VR128:$src1, (BYTE_imm imm:$src2)))>;
+ def : Pat<(int_x86_sse2_psrl_dq VR128:$src1, imm:$src2),
+ (v2i64 (VPSRLDQri VR128:$src1, (BYTE_imm imm:$src2)))>;
+ def : Pat<(int_x86_sse2_psll_dq_bs VR128:$src1, imm:$src2),
+ (v2i64 (VPSLLDQri VR128:$src1, imm:$src2))>;
+ def : Pat<(int_x86_sse2_psrl_dq_bs VR128:$src1, imm:$src2),
+ (v2i64 (VPSRLDQri VR128:$src1, imm:$src2))>;
+ def : Pat<(v2f64 (X86fsrl VR128:$src1, i32immSExt8:$src2)),
+ (v2f64 (VPSRLDQri VR128:$src1, (BYTE_imm imm:$src2)))>;
+
+ // Shift up / down and insert zero's.
+ def : Pat<(v2i64 (X86vshl VR128:$src, (i8 imm:$amt))),
+ (v2i64 (VPSLLDQri VR128:$src, (BYTE_imm imm:$amt)))>;
+ def : Pat<(v2i64 (X86vshr VR128:$src, (i8 imm:$amt))),
+ (v2i64 (VPSRLDQri VR128:$src, (BYTE_imm imm:$amt)))>;
+}
+
let Predicates = [HasSSE2] in {
def : Pat<(int_x86_sse2_psll_dq VR128:$src1, imm:$src2),
(v2i64 (PSLLDQri VR128:$src1, (BYTE_imm imm:$src2)))>;
@@ -2662,11 +2923,16 @@ def PEXTRWri : PDIi8<0xC5, MRMSrcReg,
imm:$src2))]>;
// Insert
-let isAsmParserOnly = 1, Predicates = [HasAVX] in
- defm PINSRW : sse2_pinsrw<0>, OpSize, VEX_4V;
+let isAsmParserOnly = 1, Predicates = [HasAVX] in {
+ defm VPINSRW : sse2_pinsrw<0>, OpSize, VEX_4V;
+ def VPINSRWrr64i : Ii8<0xC4, MRMSrcReg, (outs VR128:$dst),
+ (ins VR128:$src1, GR64:$src2, i32i8imm:$src3),
+ "vpinsrw\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
+ []>, OpSize, VEX_4V;
+}
let Constraints = "$src1 = $dst" in
- defm VPINSRW : sse2_pinsrw, TB, OpSize;
+ defm PINSRW : sse2_pinsrw, TB, OpSize, Requires<[HasSSE2]>;
} // ExeDomain = SSEPackedInt
@@ -2676,10 +2942,13 @@ let Constraints = "$src1 = $dst" in
let ExeDomain = SSEPackedInt in {
-let isAsmParserOnly = 1 in
-def VPMOVMSKBrr : VPDI<0xD7, MRMSrcReg, (outs GR32:$dst), (ins VR128:$src),
+let isAsmParserOnly = 1 in {
+def VPMOVMSKBrr : VPDI<0xD7, MRMSrcReg, (outs GR32:$dst), (ins VR128:$src),
"pmovmskb\t{$src, $dst|$dst, $src}",
[(set GR32:$dst, (int_x86_sse2_pmovmskb_128 VR128:$src))]>, VEX;
+def VPMOVMSKBr64r : VPDI<0xD7, MRMSrcReg, (outs GR64:$dst), (ins VR128:$src),
+ "pmovmskb\t{$src, $dst|$dst, $src}", []>, VEX;
+}
def PMOVMSKBrr : PDI<0xD7, MRMSrcReg, (outs GR32:$dst), (ins VR128:$src),
"pmovmskb\t{$src, $dst|$dst, $src}",
[(set GR32:$dst, (int_x86_sse2_pmovmskb_128 VR128:$src))]>;
@@ -2939,18 +3208,20 @@ def : Pat<(v2i64 (X86vzmovl (bc_v2i64 (loadv4i32 addr:$src)))),
// Instructions to match in the assembler
let isAsmParserOnly = 1 in {
-// This instructions is in fact an alias to movd with 64 bit dst
def VMOVQs64rr : VPDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR64:$src),
"movq\t{$src, $dst|$dst, $src}", []>, VEX, VEX_W;
def VMOVQd64rr : VPDI<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128:$src),
"movq\t{$src, $dst|$dst, $src}", []>, VEX, VEX_W;
+// Recognize "movd" with GR64 destination, but encode as a "movq"
+def VMOVQd64rr_alt : VPDI<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128:$src),
+ "movd\t{$src, $dst|$dst, $src}", []>, VEX, VEX_W;
}
// Instructions for the disassembler
// xr = XMM register
// xm = mem64
-let isAsmParserOnly = 1 in
+let isAsmParserOnly = 1, Predicates = [HasAVX] in
def VMOVQxrxr: I<0x7E, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"vmovq\t{$src, $dst|$dst, $src}", []>, VEX, XS;
def MOVQxrxr : I<0x7E, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
@@ -2970,19 +3241,14 @@ def LFENCE : I<0xAE, MRM_E8, (outs), (ins),
"lfence", [(int_x86_sse2_lfence)]>, TB, Requires<[HasSSE2]>;
def MFENCE : I<0xAE, MRM_F0, (outs), (ins),
"mfence", [(int_x86_sse2_mfence)]>, TB, Requires<[HasSSE2]>;
+def : Pat<(X86LFence), (LFENCE)>;
+def : Pat<(X86MFence), (MFENCE)>;
+
// Pause. This "instruction" is encoded as "rep; nop", so even though it
// was introduced with SSE2, it's backward compatible.
def PAUSE : I<0x90, RawFrm, (outs), (ins), "pause", []>, REP;
-//TODO: custom lower this so as to never even generate the noop
-def : Pat<(membarrier (i8 imm), (i8 imm), (i8 imm), (i8 imm),
- (i8 0)), (NOOP)>;
-def : Pat<(membarrier (i8 0), (i8 0), (i8 0), (i8 1), (i8 1)), (SFENCE)>;
-def : Pat<(membarrier (i8 1), (i8 0), (i8 0), (i8 0), (i8 1)), (LFENCE)>;
-def : Pat<(membarrier (i8 imm), (i8 imm), (i8 imm), (i8 imm),
- (i8 1)), (MFENCE)>;
-
// Alias instructions that map zero vector to pxor / xorp* for sse.
// We set canFoldAsLoad because this can be converted to a constant-pool
// load of an all-ones value if folding it would be beneficial.
@@ -3027,13 +3293,13 @@ def CVTPD2DQrr : S3DI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
// Convert Packed DW Integers to Packed Double FP
let isAsmParserOnly = 1, Predicates = [HasAVX] in {
def VCVTDQ2PDrm : S3SI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
- "vcvtdq2pd\t{$src, $dst|$dst, $src}", []>, VEX;
+ "vcvtdq2pd\t{$src, $dst|$dst, $src}", []>, VEX;
def VCVTDQ2PDrr : S3SI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
- "vcvtdq2pd\t{$src, $dst|$dst, $src}", []>, VEX;
+ "vcvtdq2pd\t{$src, $dst|$dst, $src}", []>, VEX;
def VCVTDQ2PDYrm : S3SI<0xE6, MRMSrcMem, (outs VR256:$dst), (ins f128mem:$src),
- "vcvtdq2pd\t{$src, $dst|$dst, $src}", []>, VEX;
+ "vcvtdq2pd\t{$src, $dst|$dst, $src}", []>, VEX;
def VCVTDQ2PDYrr : S3SI<0xE6, MRMSrcReg, (outs VR256:$dst), (ins VR128:$src),
- "vcvtdq2pd\t{$src, $dst|$dst, $src}", []>, VEX;
+ "vcvtdq2pd\t{$src, $dst|$dst, $src}", []>, VEX;
}
def CVTDQ2PDrm : S3SI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
@@ -3041,6 +3307,17 @@ def CVTDQ2PDrm : S3SI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
def CVTDQ2PDrr : S3SI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"cvtdq2pd\t{$src, $dst|$dst, $src}", []>;
+// AVX 256-bit register conversion intrinsics
+def : Pat<(int_x86_avx_cvtdq2_pd_256 VR128:$src),
+ (VCVTDQ2PDYrr VR128:$src)>;
+def : Pat<(int_x86_avx_cvtdq2_pd_256 (memopv4i32 addr:$src)),
+ (VCVTDQ2PDYrm addr:$src)>;
+
+def : Pat<(int_x86_avx_cvt_pd2dq_256 VR256:$src),
+ (VCVTPD2DQYrr VR256:$src)>;
+def : Pat<(int_x86_avx_cvt_pd2dq_256 (memopv4f64 addr:$src)),
+ (VCVTPD2DQYrm addr:$src)>;
+
//===---------------------------------------------------------------------===//
// SSE3 - Move Instructions
//===---------------------------------------------------------------------===//
@@ -3057,9 +3334,20 @@ def rm : S3SI<op, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
(memopv4f32 addr:$src), (undef)))]>;
}
+multiclass sse3_replicate_sfp_y<bits<8> op, PatFrag rep_frag,
+ string OpcodeStr> {
+def rr : S3SI<op, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
+ !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), []>;
+def rm : S3SI<op, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src),
+ !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), []>;
+}
+
let isAsmParserOnly = 1, Predicates = [HasAVX] in {
-defm VMOVSHDUP : sse3_replicate_sfp<0x16, movshdup, "vmovshdup">, VEX;
-defm VMOVSLDUP : sse3_replicate_sfp<0x12, movsldup, "vmovsldup">, VEX;
+ // FIXME: Merge above classes when we have patterns for the ymm version
+ defm VMOVSHDUP : sse3_replicate_sfp<0x16, movshdup, "vmovshdup">, VEX;
+ defm VMOVSLDUP : sse3_replicate_sfp<0x12, movsldup, "vmovsldup">, VEX;
+ defm VMOVSHDUPY : sse3_replicate_sfp_y<0x16, movshdup, "vmovshdup">, VEX;
+ defm VMOVSLDUPY : sse3_replicate_sfp_y<0x12, movsldup, "vmovsldup">, VEX;
}
defm MOVSHDUP : sse3_replicate_sfp<0x16, movshdup, "movshdup">;
defm MOVSLDUP : sse3_replicate_sfp<0x12, movsldup, "movsldup">;
@@ -3076,15 +3364,31 @@ def rm : S3DI<0x12, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src),
(undef))))]>;
}
-let isAsmParserOnly = 1, Predicates = [HasAVX] in
- defm VMOVDDUP : sse3_replicate_dfp<"vmovddup">, VEX;
+multiclass sse3_replicate_dfp_y<string OpcodeStr> {
+def rr : S3DI<0x12, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
+ !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+ []>;
+def rm : S3DI<0x12, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src),
+ !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+ []>;
+}
+
+let isAsmParserOnly = 1, Predicates = [HasAVX] in {
+ // FIXME: Merge above classes when we have patterns for the ymm version
+ defm VMOVDDUP : sse3_replicate_dfp<"vmovddup">, VEX;
+ defm VMOVDDUPY : sse3_replicate_dfp_y<"vmovddup">, VEX;
+}
defm MOVDDUP : sse3_replicate_dfp<"movddup">;
// Move Unaligned Integer
-let isAsmParserOnly = 1 in
+let isAsmParserOnly = 1, Predicates = [HasAVX] in {
def VLDDQUrm : S3DI<0xF0, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
- "vlddqu\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst, (int_x86_sse3_ldu_dq addr:$src))]>, VEX;
+ "vlddqu\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (int_x86_sse3_ldu_dq addr:$src))]>, VEX;
+ def VLDDQUYrm : S3DI<0xF0, MRMSrcMem, (outs VR256:$dst), (ins i256mem:$src),
+ "vlddqu\t{$src, $dst|$dst, $src}",
+ [(set VR256:$dst, (int_x86_avx_ldu_dq_256 addr:$src))]>, VEX;
+}
def LDDQUrm : S3DI<0xF0, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
"lddqu\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (int_x86_sse3_ldu_dq addr:$src))]>;
@@ -3125,35 +3429,39 @@ let AddedComplexity = 20 in
// SSE3 - Arithmetic
//===---------------------------------------------------------------------===//
-multiclass sse3_addsub<Intrinsic Int, string OpcodeStr, bit Is2Addr = 1> {
+multiclass sse3_addsub<Intrinsic Int, string OpcodeStr, RegisterClass RC,
+ X86MemOperand x86memop, bit Is2Addr = 1> {
def rr : I<0xD0, MRMSrcReg,
- (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
+ (outs RC:$dst), (ins RC:$src1, RC:$src2),
!if(Is2Addr,
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
- [(set VR128:$dst, (Int VR128:$src1,
- VR128:$src2))]>;
+ [(set RC:$dst, (Int RC:$src1, RC:$src2))]>;
def rm : I<0xD0, MRMSrcMem,
- (outs VR128:$dst), (ins VR128:$src1, f128mem:$src2),
+ (outs RC:$dst), (ins RC:$src1, x86memop:$src2),
!if(Is2Addr,
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
- [(set VR128:$dst, (Int VR128:$src1,
- (memop addr:$src2)))]>;
-
+ [(set RC:$dst, (Int RC:$src1, (memop addr:$src2)))]>;
}
let isAsmParserOnly = 1, Predicates = [HasAVX],
ExeDomain = SSEPackedDouble in {
- defm VADDSUBPS : sse3_addsub<int_x86_sse3_addsub_ps, "vaddsubps", 0>, XD,
- VEX_4V;
- defm VADDSUBPD : sse3_addsub<int_x86_sse3_addsub_pd, "vaddsubpd", 0>, OpSize,
- VEX_4V;
+ defm VADDSUBPS : sse3_addsub<int_x86_sse3_addsub_ps, "vaddsubps", VR128,
+ f128mem, 0>, XD, VEX_4V;
+ defm VADDSUBPD : sse3_addsub<int_x86_sse3_addsub_pd, "vaddsubpd", VR128,
+ f128mem, 0>, OpSize, VEX_4V;
+ defm VADDSUBPSY : sse3_addsub<int_x86_avx_addsub_ps_256, "vaddsubps", VR256,
+ f256mem, 0>, XD, VEX_4V;
+ defm VADDSUBPDY : sse3_addsub<int_x86_avx_addsub_pd_256, "vaddsubpd", VR256,
+ f256mem, 0>, OpSize, VEX_4V;
}
let Constraints = "$src1 = $dst", Predicates = [HasSSE3],
ExeDomain = SSEPackedDouble in {
- defm ADDSUBPS : sse3_addsub<int_x86_sse3_addsub_ps, "addsubps">, XD;
- defm ADDSUBPD : sse3_addsub<int_x86_sse3_addsub_pd, "addsubpd">, TB, OpSize;
+ defm ADDSUBPS : sse3_addsub<int_x86_sse3_addsub_ps, "addsubps", VR128,
+ f128mem>, XD;
+ defm ADDSUBPD : sse3_addsub<int_x86_sse3_addsub_pd, "addsubpd", VR128,
+ f128mem>, TB, OpSize;
}
//===---------------------------------------------------------------------===//
@@ -3161,61 +3469,72 @@ let Constraints = "$src1 = $dst", Predicates = [HasSSE3],
//===---------------------------------------------------------------------===//
// Horizontal ops
-class S3D_Intrr<bits<8> o, string OpcodeStr, Intrinsic IntId, bit Is2Addr = 1>
- : S3DI<o, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
+multiclass S3D_Int<bits<8> o, string OpcodeStr, ValueType vt, RegisterClass RC,
+ X86MemOperand x86memop, Intrinsic IntId, bit Is2Addr = 1> {
+ def rr : S3DI<o, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2),
!if(Is2Addr,
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
- [(set VR128:$dst, (v4f32 (IntId VR128:$src1, VR128:$src2)))]>;
-class S3D_Intrm<bits<8> o, string OpcodeStr, Intrinsic IntId, bit Is2Addr = 1>
- : S3DI<o, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, f128mem:$src2),
+ [(set RC:$dst, (vt (IntId RC:$src1, RC:$src2)))]>;
+
+ def rm : S3DI<o, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2),
!if(Is2Addr,
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
- [(set VR128:$dst, (v4f32 (IntId VR128:$src1, (memop addr:$src2))))]>;
-class S3_Intrr<bits<8> o, string OpcodeStr, Intrinsic IntId, bit Is2Addr = 1>
- : S3I<o, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
+ [(set RC:$dst, (vt (IntId RC:$src1, (memop addr:$src2))))]>;
+}
+multiclass S3_Int<bits<8> o, string OpcodeStr, ValueType vt, RegisterClass RC,
+ X86MemOperand x86memop, Intrinsic IntId, bit Is2Addr = 1> {
+ def rr : S3I<o, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2),
!if(Is2Addr,
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
- [(set VR128:$dst, (v2f64 (IntId VR128:$src1, VR128:$src2)))]>;
-class S3_Intrm<bits<8> o, string OpcodeStr, Intrinsic IntId, bit Is2Addr = 1>
- : S3I<o, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, f128mem:$src2),
+ [(set RC:$dst, (vt (IntId RC:$src1, RC:$src2)))]>;
+
+ def rm : S3I<o, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2),
!if(Is2Addr,
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
- [(set VR128:$dst, (v2f64 (IntId VR128:$src1, (memopv2f64 addr:$src2))))]>;
+ [(set RC:$dst, (vt (IntId RC:$src1, (memop addr:$src2))))]>;
+}
let isAsmParserOnly = 1, Predicates = [HasAVX] in {
- def VHADDPSrr : S3D_Intrr<0x7C, "vhaddps", int_x86_sse3_hadd_ps, 0>, VEX_4V;
- def VHADDPSrm : S3D_Intrm<0x7C, "vhaddps", int_x86_sse3_hadd_ps, 0>, VEX_4V;
- def VHADDPDrr : S3_Intrr <0x7C, "vhaddpd", int_x86_sse3_hadd_pd, 0>, VEX_4V;
- def VHADDPDrm : S3_Intrm <0x7C, "vhaddpd", int_x86_sse3_hadd_pd, 0>, VEX_4V;
- def VHSUBPSrr : S3D_Intrr<0x7D, "vhsubps", int_x86_sse3_hsub_ps, 0>, VEX_4V;
- def VHSUBPSrm : S3D_Intrm<0x7D, "vhsubps", int_x86_sse3_hsub_ps, 0>, VEX_4V;
- def VHSUBPDrr : S3_Intrr <0x7D, "vhsubpd", int_x86_sse3_hsub_pd, 0>, VEX_4V;
- def VHSUBPDrm : S3_Intrm <0x7D, "vhsubpd", int_x86_sse3_hsub_pd, 0>, VEX_4V;
+ defm VHADDPS : S3D_Int<0x7C, "vhaddps", v4f32, VR128, f128mem,
+ int_x86_sse3_hadd_ps, 0>, VEX_4V;
+ defm VHADDPD : S3_Int <0x7C, "vhaddpd", v2f64, VR128, f128mem,
+ int_x86_sse3_hadd_pd, 0>, VEX_4V;
+ defm VHSUBPS : S3D_Int<0x7D, "vhsubps", v4f32, VR128, f128mem,
+ int_x86_sse3_hsub_ps, 0>, VEX_4V;
+ defm VHSUBPD : S3_Int <0x7D, "vhsubpd", v2f64, VR128, f128mem,
+ int_x86_sse3_hsub_pd, 0>, VEX_4V;
+ defm VHADDPSY : S3D_Int<0x7C, "vhaddps", v8f32, VR256, f256mem,
+ int_x86_avx_hadd_ps_256, 0>, VEX_4V;
+ defm VHADDPDY : S3_Int <0x7C, "vhaddpd", v4f64, VR256, f256mem,
+ int_x86_avx_hadd_pd_256, 0>, VEX_4V;
+ defm VHSUBPSY : S3D_Int<0x7D, "vhsubps", v8f32, VR256, f256mem,
+ int_x86_avx_hsub_ps_256, 0>, VEX_4V;
+ defm VHSUBPDY : S3_Int <0x7D, "vhsubpd", v4f64, VR256, f256mem,
+ int_x86_avx_hsub_pd_256, 0>, VEX_4V;
}
let Constraints = "$src1 = $dst" in {
- def HADDPSrr : S3D_Intrr<0x7C, "haddps", int_x86_sse3_hadd_ps>;
- def HADDPSrm : S3D_Intrm<0x7C, "haddps", int_x86_sse3_hadd_ps>;
- def HADDPDrr : S3_Intrr <0x7C, "haddpd", int_x86_sse3_hadd_pd>;
- def HADDPDrm : S3_Intrm <0x7C, "haddpd", int_x86_sse3_hadd_pd>;
- def HSUBPSrr : S3D_Intrr<0x7D, "hsubps", int_x86_sse3_hsub_ps>;
- def HSUBPSrm : S3D_Intrm<0x7D, "hsubps", int_x86_sse3_hsub_ps>;
- def HSUBPDrr : S3_Intrr <0x7D, "hsubpd", int_x86_sse3_hsub_pd>;
- def HSUBPDrm : S3_Intrm <0x7D, "hsubpd", int_x86_sse3_hsub_pd>;
+ defm HADDPS : S3D_Int<0x7C, "haddps", v4f32, VR128, f128mem,
+ int_x86_sse3_hadd_ps>;
+ defm HADDPD : S3_Int<0x7C, "haddpd", v2f64, VR128, f128mem,
+ int_x86_sse3_hadd_pd>;
+ defm HSUBPS : S3D_Int<0x7D, "hsubps", v4f32, VR128, f128mem,
+ int_x86_sse3_hsub_ps>;
+ defm HSUBPD : S3_Int<0x7D, "hsubpd", v2f64, VR128, f128mem,
+ int_x86_sse3_hsub_pd>;
}
//===---------------------------------------------------------------------===//
// SSSE3 - Packed Absolute Instructions
//===---------------------------------------------------------------------===//
-/// SS3I_unop_rm_int - Simple SSSE3 unary op whose type can be v*{i8,i16,i32}.
-multiclass SS3I_unop_rm_int<bits<8> opc, string OpcodeStr,
- PatFrag mem_frag64, PatFrag mem_frag128,
- Intrinsic IntId64, Intrinsic IntId128> {
+/// SS3I_unop_rm_int_mm - Simple SSSE3 unary whose type can be v*{i8,i16,i32}.
+multiclass SS3I_unop_rm_int_mm<bits<8> opc, string OpcodeStr,
+ PatFrag mem_frag64, Intrinsic IntId64> {
def rr64 : SS38I<opc, MRMSrcReg, (outs VR64:$dst), (ins VR64:$src),
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
[(set VR64:$dst, (IntId64 VR64:$src))]>;
@@ -3224,7 +3543,11 @@ multiclass SS3I_unop_rm_int<bits<8> opc, string OpcodeStr,
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
[(set VR64:$dst,
(IntId64 (bitconvert (mem_frag64 addr:$src))))]>;
+}
+/// SS3I_unop_rm_int - Simple SSSE3 unary op whose type can be v*{i8,i16,i32}.
+multiclass SS3I_unop_rm_int<bits<8> opc, string OpcodeStr,
+ PatFrag mem_frag128, Intrinsic IntId128> {
def rr128 : SS38I<opc, MRMSrcReg, (outs VR128:$dst),
(ins VR128:$src),
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
@@ -3240,26 +3563,28 @@ multiclass SS3I_unop_rm_int<bits<8> opc, string OpcodeStr,
}
let isAsmParserOnly = 1, Predicates = [HasAVX] in {
- defm VPABSB : SS3I_unop_rm_int<0x1C, "vpabsb", memopv8i8, memopv16i8,
- int_x86_ssse3_pabs_b,
+ defm VPABSB : SS3I_unop_rm_int<0x1C, "vpabsb", memopv16i8,
int_x86_ssse3_pabs_b_128>, VEX;
- defm VPABSW : SS3I_unop_rm_int<0x1D, "vpabsw", memopv4i16, memopv8i16,
- int_x86_ssse3_pabs_w,
+ defm VPABSW : SS3I_unop_rm_int<0x1D, "vpabsw", memopv8i16,
int_x86_ssse3_pabs_w_128>, VEX;
- defm VPABSD : SS3I_unop_rm_int<0x1E, "vpabsd", memopv2i32, memopv4i32,
- int_x86_ssse3_pabs_d,
+ defm VPABSD : SS3I_unop_rm_int<0x1E, "vpabsd", memopv4i32,
int_x86_ssse3_pabs_d_128>, VEX;
}
-defm PABSB : SS3I_unop_rm_int<0x1C, "pabsb", memopv8i8, memopv16i8,
- int_x86_ssse3_pabs_b,
- int_x86_ssse3_pabs_b_128>;
-defm PABSW : SS3I_unop_rm_int<0x1D, "pabsw", memopv4i16, memopv8i16,
- int_x86_ssse3_pabs_w,
- int_x86_ssse3_pabs_w_128>;
-defm PABSD : SS3I_unop_rm_int<0x1E, "pabsd", memopv2i32, memopv4i32,
- int_x86_ssse3_pabs_d,
- int_x86_ssse3_pabs_d_128>;
+defm PABSB : SS3I_unop_rm_int<0x1C, "pabsb", memopv16i8,
+ int_x86_ssse3_pabs_b_128>,
+ SS3I_unop_rm_int_mm<0x1C, "pabsb", memopv8i8,
+ int_x86_ssse3_pabs_b>;
+
+defm PABSW : SS3I_unop_rm_int<0x1D, "pabsw", memopv8i16,
+ int_x86_ssse3_pabs_w_128>,
+ SS3I_unop_rm_int_mm<0x1D, "pabsw", memopv4i16,
+ int_x86_ssse3_pabs_w>;
+
+defm PABSD : SS3I_unop_rm_int<0x1E, "pabsd", memopv4i32,
+ int_x86_ssse3_pabs_d_128>,
+ SS3I_unop_rm_int_mm<0x1E, "pabsd", memopv2i32,
+ int_x86_ssse3_pabs_d>;
//===---------------------------------------------------------------------===//
// SSSE3 - Packed Binary Operator Instructions
@@ -3267,26 +3592,9 @@ defm PABSD : SS3I_unop_rm_int<0x1E, "pabsd", memopv2i32, memopv4i32,
/// SS3I_binop_rm_int - Simple SSSE3 bin op whose type can be v*{i8,i16,i32}.
multiclass SS3I_binop_rm_int<bits<8> opc, string OpcodeStr,
- PatFrag mem_frag64, PatFrag mem_frag128,
- Intrinsic IntId64, Intrinsic IntId128,
+ PatFrag mem_frag128, Intrinsic IntId128,
bit Is2Addr = 1> {
let isCommutable = 1 in
- def rr64 : SS38I<opc, MRMSrcReg, (outs VR64:$dst),
- (ins VR64:$src1, VR64:$src2),
- !if(Is2Addr,
- !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
- !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
- [(set VR64:$dst, (IntId64 VR64:$src1, VR64:$src2))]>;
- def rm64 : SS38I<opc, MRMSrcMem, (outs VR64:$dst),
- (ins VR64:$src1, i64mem:$src2),
- !if(Is2Addr,
- !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
- !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
- [(set VR64:$dst,
- (IntId64 VR64:$src1,
- (bitconvert (memopv8i8 addr:$src2))))]>;
-
- let isCommutable = 1 in
def rr128 : SS38I<opc, MRMSrcReg, (outs VR128:$dst),
(ins VR128:$src1, VR128:$src2),
!if(Is2Addr,
@@ -3303,88 +3611,102 @@ multiclass SS3I_binop_rm_int<bits<8> opc, string OpcodeStr,
(IntId128 VR128:$src1,
(bitconvert (memopv16i8 addr:$src2))))]>, OpSize;
}
+multiclass SS3I_binop_rm_int_mm<bits<8> opc, string OpcodeStr,
+ PatFrag mem_frag64, Intrinsic IntId64> {
+ let isCommutable = 1 in
+ def rr64 : SS38I<opc, MRMSrcReg, (outs VR64:$dst),
+ (ins VR64:$src1, VR64:$src2),
+ !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+ [(set VR64:$dst, (IntId64 VR64:$src1, VR64:$src2))]>;
+ def rm64 : SS38I<opc, MRMSrcMem, (outs VR64:$dst),
+ (ins VR64:$src1, i64mem:$src2),
+ !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+ [(set VR64:$dst,
+ (IntId64 VR64:$src1,
+ (bitconvert (memopv8i8 addr:$src2))))]>;
+}
let isAsmParserOnly = 1, Predicates = [HasAVX] in {
let isCommutable = 0 in {
- defm VPHADDW : SS3I_binop_rm_int<0x01, "vphaddw", memopv4i16, memopv8i16,
- int_x86_ssse3_phadd_w,
+ defm VPHADDW : SS3I_binop_rm_int<0x01, "vphaddw", memopv8i16,
int_x86_ssse3_phadd_w_128, 0>, VEX_4V;
- defm VPHADDD : SS3I_binop_rm_int<0x02, "vphaddd", memopv2i32, memopv4i32,
- int_x86_ssse3_phadd_d,
+ defm VPHADDD : SS3I_binop_rm_int<0x02, "vphaddd", memopv4i32,
int_x86_ssse3_phadd_d_128, 0>, VEX_4V;
- defm VPHADDSW : SS3I_binop_rm_int<0x03, "vphaddsw", memopv4i16, memopv8i16,
- int_x86_ssse3_phadd_sw,
+ defm VPHADDSW : SS3I_binop_rm_int<0x03, "vphaddsw", memopv8i16,
int_x86_ssse3_phadd_sw_128, 0>, VEX_4V;
- defm VPHSUBW : SS3I_binop_rm_int<0x05, "vphsubw", memopv4i16, memopv8i16,
- int_x86_ssse3_phsub_w,
+ defm VPHSUBW : SS3I_binop_rm_int<0x05, "vphsubw", memopv8i16,
int_x86_ssse3_phsub_w_128, 0>, VEX_4V;
- defm VPHSUBD : SS3I_binop_rm_int<0x06, "vphsubd", memopv2i32, memopv4i32,
- int_x86_ssse3_phsub_d,
+ defm VPHSUBD : SS3I_binop_rm_int<0x06, "vphsubd", memopv4i32,
int_x86_ssse3_phsub_d_128, 0>, VEX_4V;
- defm VPHSUBSW : SS3I_binop_rm_int<0x07, "vphsubsw", memopv4i16, memopv8i16,
- int_x86_ssse3_phsub_sw,
+ defm VPHSUBSW : SS3I_binop_rm_int<0x07, "vphsubsw", memopv8i16,
int_x86_ssse3_phsub_sw_128, 0>, VEX_4V;
- defm VPMADDUBSW : SS3I_binop_rm_int<0x04, "vpmaddubsw", memopv8i8, memopv16i8,
- int_x86_ssse3_pmadd_ub_sw,
+ defm VPMADDUBSW : SS3I_binop_rm_int<0x04, "vpmaddubsw", memopv16i8,
int_x86_ssse3_pmadd_ub_sw_128, 0>, VEX_4V;
- defm VPSHUFB : SS3I_binop_rm_int<0x00, "vpshufb", memopv8i8, memopv16i8,
- int_x86_ssse3_pshuf_b,
+ defm VPSHUFB : SS3I_binop_rm_int<0x00, "vpshufb", memopv16i8,
int_x86_ssse3_pshuf_b_128, 0>, VEX_4V;
- defm VPSIGNB : SS3I_binop_rm_int<0x08, "vpsignb", memopv8i8, memopv16i8,
- int_x86_ssse3_psign_b,
+ defm VPSIGNB : SS3I_binop_rm_int<0x08, "vpsignb", memopv16i8,
int_x86_ssse3_psign_b_128, 0>, VEX_4V;
- defm VPSIGNW : SS3I_binop_rm_int<0x09, "vpsignw", memopv4i16, memopv8i16,
- int_x86_ssse3_psign_w,
+ defm VPSIGNW : SS3I_binop_rm_int<0x09, "vpsignw", memopv8i16,
int_x86_ssse3_psign_w_128, 0>, VEX_4V;
- defm VPSIGND : SS3I_binop_rm_int<0x0A, "vpsignd", memopv2i32, memopv4i32,
- int_x86_ssse3_psign_d,
+ defm VPSIGND : SS3I_binop_rm_int<0x0A, "vpsignd", memopv4i32,
int_x86_ssse3_psign_d_128, 0>, VEX_4V;
}
-defm VPMULHRSW : SS3I_binop_rm_int<0x0B, "vpmulhrsw", memopv4i16, memopv8i16,
- int_x86_ssse3_pmul_hr_sw,
+defm VPMULHRSW : SS3I_binop_rm_int<0x0B, "vpmulhrsw", memopv8i16,
int_x86_ssse3_pmul_hr_sw_128, 0>, VEX_4V;
}
// None of these have i8 immediate fields.
let ImmT = NoImm, Constraints = "$src1 = $dst" in {
let isCommutable = 0 in {
- defm PHADDW : SS3I_binop_rm_int<0x01, "phaddw", memopv4i16, memopv8i16,
- int_x86_ssse3_phadd_w,
- int_x86_ssse3_phadd_w_128>;
- defm PHADDD : SS3I_binop_rm_int<0x02, "phaddd", memopv2i32, memopv4i32,
- int_x86_ssse3_phadd_d,
- int_x86_ssse3_phadd_d_128>;
- defm PHADDSW : SS3I_binop_rm_int<0x03, "phaddsw", memopv4i16, memopv8i16,
- int_x86_ssse3_phadd_sw,
- int_x86_ssse3_phadd_sw_128>;
- defm PHSUBW : SS3I_binop_rm_int<0x05, "phsubw", memopv4i16, memopv8i16,
- int_x86_ssse3_phsub_w,
- int_x86_ssse3_phsub_w_128>;
- defm PHSUBD : SS3I_binop_rm_int<0x06, "phsubd", memopv2i32, memopv4i32,
- int_x86_ssse3_phsub_d,
- int_x86_ssse3_phsub_d_128>;
- defm PHSUBSW : SS3I_binop_rm_int<0x07, "phsubsw", memopv4i16, memopv8i16,
- int_x86_ssse3_phsub_sw,
- int_x86_ssse3_phsub_sw_128>;
- defm PMADDUBSW : SS3I_binop_rm_int<0x04, "pmaddubsw", memopv8i8, memopv16i8,
- int_x86_ssse3_pmadd_ub_sw,
- int_x86_ssse3_pmadd_ub_sw_128>;
- defm PSHUFB : SS3I_binop_rm_int<0x00, "pshufb", memopv8i8, memopv16i8,
- int_x86_ssse3_pshuf_b,
- int_x86_ssse3_pshuf_b_128>;
- defm PSIGNB : SS3I_binop_rm_int<0x08, "psignb", memopv8i8, memopv16i8,
- int_x86_ssse3_psign_b,
- int_x86_ssse3_psign_b_128>;
- defm PSIGNW : SS3I_binop_rm_int<0x09, "psignw", memopv4i16, memopv8i16,
- int_x86_ssse3_psign_w,
- int_x86_ssse3_psign_w_128>;
- defm PSIGND : SS3I_binop_rm_int<0x0A, "psignd", memopv2i32, memopv4i32,
- int_x86_ssse3_psign_d,
- int_x86_ssse3_psign_d_128>;
-}
-defm PMULHRSW : SS3I_binop_rm_int<0x0B, "pmulhrsw", memopv4i16, memopv8i16,
- int_x86_ssse3_pmul_hr_sw,
- int_x86_ssse3_pmul_hr_sw_128>;
+ defm PHADDW : SS3I_binop_rm_int<0x01, "phaddw", memopv8i16,
+ int_x86_ssse3_phadd_w_128>,
+ SS3I_binop_rm_int_mm<0x01, "phaddw", memopv4i16,
+ int_x86_ssse3_phadd_w>;
+ defm PHADDD : SS3I_binop_rm_int<0x02, "phaddd", memopv4i32,
+ int_x86_ssse3_phadd_d_128>,
+ SS3I_binop_rm_int_mm<0x02, "phaddd", memopv2i32,
+ int_x86_ssse3_phadd_d>;
+ defm PHADDSW : SS3I_binop_rm_int<0x03, "phaddsw", memopv8i16,
+ int_x86_ssse3_phadd_sw_128>,
+ SS3I_binop_rm_int_mm<0x03, "phaddsw", memopv4i16,
+ int_x86_ssse3_phadd_sw>;
+ defm PHSUBW : SS3I_binop_rm_int<0x05, "phsubw", memopv8i16,
+ int_x86_ssse3_phsub_w_128>,
+ SS3I_binop_rm_int_mm<0x05, "phsubw", memopv4i16,
+ int_x86_ssse3_phsub_w>;
+ defm PHSUBD : SS3I_binop_rm_int<0x06, "phsubd", memopv4i32,
+ int_x86_ssse3_phsub_d_128>,
+ SS3I_binop_rm_int_mm<0x06, "phsubd", memopv2i32,
+ int_x86_ssse3_phsub_d>;
+ defm PHSUBSW : SS3I_binop_rm_int<0x07, "phsubsw", memopv8i16,
+ int_x86_ssse3_phsub_sw_128>,
+ SS3I_binop_rm_int_mm<0x07, "phsubsw", memopv4i16,
+ int_x86_ssse3_phsub_sw>;
+ defm PMADDUBSW : SS3I_binop_rm_int<0x04, "pmaddubsw", memopv16i8,
+ int_x86_ssse3_pmadd_ub_sw_128>,
+ SS3I_binop_rm_int_mm<0x04, "pmaddubsw", memopv8i8,
+ int_x86_ssse3_pmadd_ub_sw>;
+ defm PSHUFB : SS3I_binop_rm_int<0x00, "pshufb", memopv8i8,
+ int_x86_ssse3_pshuf_b_128>,
+ SS3I_binop_rm_int_mm<0x00, "pshufb", memopv8i8,
+ int_x86_ssse3_pshuf_b>;
+ defm PSIGNB : SS3I_binop_rm_int<0x08, "psignb", memopv16i8,
+ int_x86_ssse3_psign_b_128>,
+ SS3I_binop_rm_int_mm<0x08, "psignb", memopv8i8,
+ int_x86_ssse3_psign_b>;
+ defm PSIGNW : SS3I_binop_rm_int<0x09, "psignw", memopv8i16,
+ int_x86_ssse3_psign_w_128>,
+ SS3I_binop_rm_int_mm<0x09, "psignw", memopv4i16,
+ int_x86_ssse3_psign_w>;
+ defm PSIGND : SS3I_binop_rm_int<0x0A, "psignd", memopv4i32,
+ int_x86_ssse3_psign_d_128>,
+ SS3I_binop_rm_int_mm<0x0A, "psignd", memopv2i32,
+ int_x86_ssse3_psign_d>;
+}
+defm PMULHRSW : SS3I_binop_rm_int<0x0B, "pmulhrsw", memopv8i16,
+ int_x86_ssse3_pmul_hr_sw_128>,
+ SS3I_binop_rm_int_mm<0x0B, "pmulhrsw", memopv4i16,
+ int_x86_ssse3_pmul_hr_sw>;
}
def : Pat<(X86pshufb VR128:$src, VR128:$mask),
@@ -3396,22 +3718,16 @@ def : Pat<(X86pshufb VR128:$src, (bc_v16i8 (memopv2i64 addr:$mask))),
// SSSE3 - Packed Align Instruction Patterns
//===---------------------------------------------------------------------===//
-multiclass sse3_palign<string asm, bit Is2Addr = 1> {
+multiclass ssse3_palign_mm<string asm> {
def R64rr : SS3AI<0x0F, MRMSrcReg, (outs VR64:$dst),
(ins VR64:$src1, VR64:$src2, i8imm:$src3),
- !if(Is2Addr,
- !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
- !strconcat(asm,
- "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
- []>;
+ !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), []>;
def R64rm : SS3AI<0x0F, MRMSrcMem, (outs VR64:$dst),
(ins VR64:$src1, i64mem:$src2, i8imm:$src3),
- !if(Is2Addr,
- !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
- !strconcat(asm,
- "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
- []>;
+ !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), []>;
+}
+multiclass ssse3_palign<string asm, bit Is2Addr = 1> {
def R128rr : SS3AI<0x0F, MRMSrcReg, (outs VR128:$dst),
(ins VR128:$src1, VR128:$src2, i8imm:$src3),
!if(Is2Addr,
@@ -3429,9 +3745,10 @@ multiclass sse3_palign<string asm, bit Is2Addr = 1> {
}
let isAsmParserOnly = 1, Predicates = [HasAVX] in
- defm VPALIGN : sse3_palign<"vpalignr", 0>, VEX_4V;
+ defm VPALIGN : ssse3_palign<"vpalignr", 0>, VEX_4V;
let Constraints = "$src1 = $dst" in
- defm PALIGN : sse3_palign<"palignr">;
+ defm PALIGN : ssse3_palign<"palignr">,
+ ssse3_palign_mm<"palignr">;
let AddedComplexity = 5 in {
@@ -3732,31 +4049,62 @@ def : Pat<(v2i32 (fp_to_sint (v2f64 VR128:$src))),
(Int_CVTTPD2PIrr VR128:$src)>, Requires<[HasSSE2]>;
// Use movaps / movups for SSE integer load / store (one byte shorter).
-def : Pat<(alignedloadv4i32 addr:$src),
- (MOVAPSrm addr:$src)>;
-def : Pat<(loadv4i32 addr:$src),
- (MOVUPSrm addr:$src)>;
-def : Pat<(alignedloadv2i64 addr:$src),
- (MOVAPSrm addr:$src)>;
-def : Pat<(loadv2i64 addr:$src),
- (MOVUPSrm addr:$src)>;
-
-def : Pat<(alignedstore (v2i64 VR128:$src), addr:$dst),
- (MOVAPSmr addr:$dst, VR128:$src)>;
-def : Pat<(alignedstore (v4i32 VR128:$src), addr:$dst),
- (MOVAPSmr addr:$dst, VR128:$src)>;
-def : Pat<(alignedstore (v8i16 VR128:$src), addr:$dst),
- (MOVAPSmr addr:$dst, VR128:$src)>;
-def : Pat<(alignedstore (v16i8 VR128:$src), addr:$dst),
- (MOVAPSmr addr:$dst, VR128:$src)>;
-def : Pat<(store (v2i64 VR128:$src), addr:$dst),
- (MOVUPSmr addr:$dst, VR128:$src)>;
-def : Pat<(store (v4i32 VR128:$src), addr:$dst),
- (MOVUPSmr addr:$dst, VR128:$src)>;
-def : Pat<(store (v8i16 VR128:$src), addr:$dst),
- (MOVUPSmr addr:$dst, VR128:$src)>;
-def : Pat<(store (v16i8 VR128:$src), addr:$dst),
- (MOVUPSmr addr:$dst, VR128:$src)>;
+let Predicates = [HasSSE1] in {
+ def : Pat<(alignedloadv4i32 addr:$src),
+ (MOVAPSrm addr:$src)>;
+ def : Pat<(loadv4i32 addr:$src),
+ (MOVUPSrm addr:$src)>;
+ def : Pat<(alignedloadv2i64 addr:$src),
+ (MOVAPSrm addr:$src)>;
+ def : Pat<(loadv2i64 addr:$src),
+ (MOVUPSrm addr:$src)>;
+
+ def : Pat<(alignedstore (v2i64 VR128:$src), addr:$dst),
+ (MOVAPSmr addr:$dst, VR128:$src)>;
+ def : Pat<(alignedstore (v4i32 VR128:$src), addr:$dst),
+ (MOVAPSmr addr:$dst, VR128:$src)>;
+ def : Pat<(alignedstore (v8i16 VR128:$src), addr:$dst),
+ (MOVAPSmr addr:$dst, VR128:$src)>;
+ def : Pat<(alignedstore (v16i8 VR128:$src), addr:$dst),
+ (MOVAPSmr addr:$dst, VR128:$src)>;
+ def : Pat<(store (v2i64 VR128:$src), addr:$dst),
+ (MOVUPSmr addr:$dst, VR128:$src)>;
+ def : Pat<(store (v4i32 VR128:$src), addr:$dst),
+ (MOVUPSmr addr:$dst, VR128:$src)>;
+ def : Pat<(store (v8i16 VR128:$src), addr:$dst),
+ (MOVUPSmr addr:$dst, VR128:$src)>;
+ def : Pat<(store (v16i8 VR128:$src), addr:$dst),
+ (MOVUPSmr addr:$dst, VR128:$src)>;
+}
+
+// Use vmovaps/vmovups for AVX 128-bit integer load/store (one byte shorter).
+let Predicates = [HasAVX] in {
+ def : Pat<(alignedloadv4i32 addr:$src),
+ (VMOVAPSrm addr:$src)>;
+ def : Pat<(loadv4i32 addr:$src),
+ (VMOVUPSrm addr:$src)>;
+ def : Pat<(alignedloadv2i64 addr:$src),
+ (VMOVAPSrm addr:$src)>;
+ def : Pat<(loadv2i64 addr:$src),
+ (VMOVUPSrm addr:$src)>;
+
+ def : Pat<(alignedstore (v2i64 VR128:$src), addr:$dst),
+ (VMOVAPSmr addr:$dst, VR128:$src)>;
+ def : Pat<(alignedstore (v4i32 VR128:$src), addr:$dst),
+ (VMOVAPSmr addr:$dst, VR128:$src)>;
+ def : Pat<(alignedstore (v8i16 VR128:$src), addr:$dst),
+ (VMOVAPSmr addr:$dst, VR128:$src)>;
+ def : Pat<(alignedstore (v16i8 VR128:$src), addr:$dst),
+ (VMOVAPSmr addr:$dst, VR128:$src)>;
+ def : Pat<(store (v2i64 VR128:$src), addr:$dst),
+ (VMOVUPSmr addr:$dst, VR128:$src)>;
+ def : Pat<(store (v4i32 VR128:$src), addr:$dst),
+ (VMOVUPSmr addr:$dst, VR128:$src)>;
+ def : Pat<(store (v8i16 VR128:$src), addr:$dst),
+ (VMOVUPSmr addr:$dst, VR128:$src)>;
+ def : Pat<(store (v16i8 VR128:$src), addr:$dst),
+ (VMOVUPSmr addr:$dst, VR128:$src)>;
+}
//===----------------------------------------------------------------------===//
// SSE4.1 - Packed Move with Sign/Zero Extend
@@ -3923,8 +4271,12 @@ multiclass SS41I_extract8<bits<8> opc, string OpcodeStr> {
// (store (i8 (trunc (X86pextrb (v16i8 VR128:$src1), imm:$src2))), addr:$dst)
}
-let isAsmParserOnly = 1, Predicates = [HasAVX] in
+let isAsmParserOnly = 1, Predicates = [HasAVX] in {
defm VPEXTRB : SS41I_extract8<0x14, "vpextrb">, VEX;
+ def VPEXTRBrr64 : SS4AIi8<0x14, MRMDestReg, (outs GR64:$dst),
+ (ins VR128:$src1, i32i8imm:$src2),
+ "vpextrb\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, OpSize, VEX;
+}
defm PEXTRB : SS41I_extract8<0x14, "pextrb">;
@@ -4007,8 +4359,13 @@ multiclass SS41I_extractf32<bits<8> opc, string OpcodeStr> {
addr:$dst)]>, OpSize;
}
-let isAsmParserOnly = 1, Predicates = [HasAVX] in
+let isAsmParserOnly = 1, Predicates = [HasAVX] in {
defm VEXTRACTPS : SS41I_extractf32<0x17, "vextractps">, VEX;
+ def VEXTRACTPSrr64 : SS4AIi8<0x17, MRMDestReg, (outs GR64:$dst),
+ (ins VR128:$src1, i32i8imm:$src2),
+ "vextractps \t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ []>, OpSize, VEX;
+}
defm EXTRACTPS : SS41I_extractf32<0x17, "extractps">;
// Also match an EXTRACTPS store when the store is done as f32 instead of i32.
@@ -4131,80 +4488,84 @@ let isAsmParserOnly = 1, Predicates = [HasAVX] in
defm VINSERTPS : SS41I_insertf32<0x21, "vinsertps", 0>, VEX_4V;
def : Pat<(int_x86_sse41_insertps VR128:$src1, VR128:$src2, imm:$src3),
- (INSERTPSrr VR128:$src1, VR128:$src2, imm:$src3)>;
+ (VINSERTPSrr VR128:$src1, VR128:$src2, imm:$src3)>,
+ Requires<[HasAVX]>;
+def : Pat<(int_x86_sse41_insertps VR128:$src1, VR128:$src2, imm:$src3),
+ (INSERTPSrr VR128:$src1, VR128:$src2, imm:$src3)>,
+ Requires<[HasSSE41]>;
//===----------------------------------------------------------------------===//
// SSE4.1 - Round Instructions
//===----------------------------------------------------------------------===//
-multiclass sse41_fp_unop_rm<bits<8> opcps, bits<8> opcpd,
- string OpcodeStr,
- Intrinsic V4F32Int,
- Intrinsic V2F64Int> {
+multiclass sse41_fp_unop_rm<bits<8> opcps, bits<8> opcpd, string OpcodeStr,
+ X86MemOperand x86memop, RegisterClass RC,
+ PatFrag mem_frag32, PatFrag mem_frag64,
+ Intrinsic V4F32Int, Intrinsic V2F64Int> {
// Intrinsic operation, reg.
// Vector intrinsic operation, reg
def PSr_Int : SS4AIi8<opcps, MRMSrcReg,
- (outs VR128:$dst), (ins VR128:$src1, i32i8imm:$src2),
+ (outs RC:$dst), (ins RC:$src1, i32i8imm:$src2),
!strconcat(OpcodeStr,
"ps\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- [(set VR128:$dst, (V4F32Int VR128:$src1, imm:$src2))]>,
+ [(set RC:$dst, (V4F32Int RC:$src1, imm:$src2))]>,
OpSize;
// Vector intrinsic operation, mem
def PSm_Int : Ii8<opcps, MRMSrcMem,
- (outs VR128:$dst), (ins f128mem:$src1, i32i8imm:$src2),
+ (outs RC:$dst), (ins f256mem:$src1, i32i8imm:$src2),
!strconcat(OpcodeStr,
"ps\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- [(set VR128:$dst,
- (V4F32Int (memopv4f32 addr:$src1),imm:$src2))]>,
+ [(set RC:$dst,
+ (V4F32Int (mem_frag32 addr:$src1),imm:$src2))]>,
TA, OpSize,
Requires<[HasSSE41]>;
// Vector intrinsic operation, reg
def PDr_Int : SS4AIi8<opcpd, MRMSrcReg,
- (outs VR128:$dst), (ins VR128:$src1, i32i8imm:$src2),
+ (outs RC:$dst), (ins RC:$src1, i32i8imm:$src2),
!strconcat(OpcodeStr,
"pd\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- [(set VR128:$dst, (V2F64Int VR128:$src1, imm:$src2))]>,
+ [(set RC:$dst, (V2F64Int RC:$src1, imm:$src2))]>,
OpSize;
// Vector intrinsic operation, mem
def PDm_Int : SS4AIi8<opcpd, MRMSrcMem,
- (outs VR128:$dst), (ins f128mem:$src1, i32i8imm:$src2),
+ (outs RC:$dst), (ins f256mem:$src1, i32i8imm:$src2),
!strconcat(OpcodeStr,
"pd\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- [(set VR128:$dst,
- (V2F64Int (memopv2f64 addr:$src1),imm:$src2))]>,
+ [(set RC:$dst,
+ (V2F64Int (mem_frag64 addr:$src1),imm:$src2))]>,
OpSize;
}
-multiclass sse41_fp_unop_rm_avx<bits<8> opcps, bits<8> opcpd,
- string OpcodeStr> {
+multiclass sse41_fp_unop_rm_avx_p<bits<8> opcps, bits<8> opcpd,
+ RegisterClass RC, X86MemOperand x86memop, string OpcodeStr> {
// Intrinsic operation, reg.
// Vector intrinsic operation, reg
def PSr : SS4AIi8<opcps, MRMSrcReg,
- (outs VR128:$dst), (ins VR128:$src1, i32i8imm:$src2),
+ (outs RC:$dst), (ins RC:$src1, i32i8imm:$src2),
!strconcat(OpcodeStr,
"ps\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[]>, OpSize;
// Vector intrinsic operation, mem
def PSm : Ii8<opcps, MRMSrcMem,
- (outs VR128:$dst), (ins f128mem:$src1, i32i8imm:$src2),
+ (outs RC:$dst), (ins x86memop:$src1, i32i8imm:$src2),
!strconcat(OpcodeStr,
"ps\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[]>, TA, OpSize, Requires<[HasSSE41]>;
// Vector intrinsic operation, reg
def PDr : SS4AIi8<opcpd, MRMSrcReg,
- (outs VR128:$dst), (ins VR128:$src1, i32i8imm:$src2),
+ (outs RC:$dst), (ins RC:$src1, i32i8imm:$src2),
!strconcat(OpcodeStr,
"pd\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[]>, OpSize;
// Vector intrinsic operation, mem
def PDm : SS4AIi8<opcpd, MRMSrcMem,
- (outs VR128:$dst), (ins f128mem:$src1, i32i8imm:$src2),
+ (outs RC:$dst), (ins x86memop:$src1, i32i8imm:$src2),
!strconcat(OpcodeStr,
"pd\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[]>, OpSize;
@@ -4261,8 +4622,8 @@ multiclass sse41_fp_binop_rm<bits<8> opcss, bits<8> opcsd,
OpSize;
}
-multiclass sse41_fp_binop_rm_avx<bits<8> opcss, bits<8> opcsd,
- string OpcodeStr> {
+multiclass sse41_fp_binop_rm_avx_s<bits<8> opcss, bits<8> opcsd,
+ string OpcodeStr> {
// Intrinsic operation, reg.
def SSr : SS4AIi8<opcss, MRMSrcReg,
(outs VR128:$dst), (ins VR128:$src1, VR128:$src2, i32i8imm:$src3),
@@ -4295,24 +4656,90 @@ multiclass sse41_fp_binop_rm_avx<bits<8> opcss, bits<8> opcsd,
// FP round - roundss, roundps, roundsd, roundpd
let isAsmParserOnly = 1, Predicates = [HasAVX] in {
// Intrinsic form
- defm VROUND : sse41_fp_unop_rm<0x08, 0x09, "vround",
- int_x86_sse41_round_ps, int_x86_sse41_round_pd>,
- VEX;
+ defm VROUND : sse41_fp_unop_rm<0x08, 0x09, "vround", f128mem, VR128,
+ memopv4f32, memopv2f64,
+ int_x86_sse41_round_ps,
+ int_x86_sse41_round_pd>, VEX;
+ defm VROUNDY : sse41_fp_unop_rm<0x08, 0x09, "vround", f256mem, VR256,
+ memopv8f32, memopv4f64,
+ int_x86_avx_round_ps_256,
+ int_x86_avx_round_pd_256>, VEX;
defm VROUND : sse41_fp_binop_rm<0x0A, 0x0B, "vround",
- int_x86_sse41_round_ss, int_x86_sse41_round_sd,
- 0>, VEX_4V;
+ int_x86_sse41_round_ss,
+ int_x86_sse41_round_sd, 0>, VEX_4V;
+
// Instructions for the assembler
- defm VROUND : sse41_fp_unop_rm_avx<0x08, 0x09, "vround">, VEX;
- defm VROUND : sse41_fp_binop_rm_avx<0x0A, 0x0B, "vround">, VEX_4V;
+ defm VROUND : sse41_fp_unop_rm_avx_p<0x08, 0x09, VR128, f128mem, "vround">,
+ VEX;
+ defm VROUNDY : sse41_fp_unop_rm_avx_p<0x08, 0x09, VR256, f256mem, "vround">,
+ VEX;
+ defm VROUND : sse41_fp_binop_rm_avx_s<0x0A, 0x0B, "vround">, VEX_4V;
}
-defm ROUND : sse41_fp_unop_rm<0x08, 0x09, "round",
+defm ROUND : sse41_fp_unop_rm<0x08, 0x09, "round", f128mem, VR128,
+ memopv4f32, memopv2f64,
int_x86_sse41_round_ps, int_x86_sse41_round_pd>;
let Constraints = "$src1 = $dst" in
defm ROUND : sse41_fp_binop_rm<0x0A, 0x0B, "round",
int_x86_sse41_round_ss, int_x86_sse41_round_sd>;
//===----------------------------------------------------------------------===//
+// SSE4.1 - Packed Bit Test
+//===----------------------------------------------------------------------===//
+
+// ptest instruction we'll lower to this in X86ISelLowering primarily from
+// the intel intrinsic that corresponds to this.
+let Defs = [EFLAGS], isAsmParserOnly = 1, Predicates = [HasAVX] in {
+def VPTESTrr : SS48I<0x17, MRMSrcReg, (outs), (ins VR128:$src1, VR128:$src2),
+ "vptest\t{$src2, $src1|$src1, $src2}",
+ [(set EFLAGS, (X86ptest VR128:$src1, (v4f32 VR128:$src2)))]>,
+ OpSize, VEX;
+def VPTESTrm : SS48I<0x17, MRMSrcMem, (outs), (ins VR128:$src1, f128mem:$src2),
+ "vptest\t{$src2, $src1|$src1, $src2}",
+ [(set EFLAGS,(X86ptest VR128:$src1, (memopv4f32 addr:$src2)))]>,
+ OpSize, VEX;
+
+def VPTESTYrr : SS48I<0x17, MRMSrcReg, (outs), (ins VR256:$src1, VR256:$src2),
+ "vptest\t{$src2, $src1|$src1, $src2}",
+ [(set EFLAGS, (X86ptest VR256:$src1, (v4i64 VR256:$src2)))]>,
+ OpSize, VEX;
+def VPTESTYrm : SS48I<0x17, MRMSrcMem, (outs), (ins VR256:$src1, i256mem:$src2),
+ "vptest\t{$src2, $src1|$src1, $src2}",
+ [(set EFLAGS,(X86ptest VR256:$src1, (memopv4i64 addr:$src2)))]>,
+ OpSize, VEX;
+}
+
+let Defs = [EFLAGS] in {
+def PTESTrr : SS48I<0x17, MRMSrcReg, (outs), (ins VR128:$src1, VR128:$src2),
+ "ptest \t{$src2, $src1|$src1, $src2}",
+ [(set EFLAGS, (X86ptest VR128:$src1, (v4f32 VR128:$src2)))]>,
+ OpSize;
+def PTESTrm : SS48I<0x17, MRMSrcMem, (outs), (ins VR128:$src1, f128mem:$src2),
+ "ptest \t{$src2, $src1|$src1, $src2}",
+ [(set EFLAGS, (X86ptest VR128:$src1, (memopv4f32 addr:$src2)))]>,
+ OpSize;
+}
+
+// The bit test instructions below are AVX only
+multiclass avx_bittest<bits<8> opc, string OpcodeStr, RegisterClass RC,
+ X86MemOperand x86memop, PatFrag mem_frag, ValueType vt> {
+ def rr : SS48I<opc, MRMSrcReg, (outs), (ins RC:$src1, RC:$src2),
+ !strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"),
+ [(set EFLAGS, (X86testp RC:$src1, (vt RC:$src2)))]>, OpSize, VEX;
+ def rm : SS48I<opc, MRMSrcMem, (outs), (ins RC:$src1, x86memop:$src2),
+ !strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"),
+ [(set EFLAGS, (X86testp RC:$src1, (mem_frag addr:$src2)))]>,
+ OpSize, VEX;
+}
+
+let Defs = [EFLAGS], isAsmParserOnly = 1, Predicates = [HasAVX] in {
+defm VTESTPS : avx_bittest<0x0E, "vtestps", VR128, f128mem, memopv4f32, v4f32>;
+defm VTESTPSY : avx_bittest<0x0E, "vtestps", VR256, f256mem, memopv8f32, v8f32>;
+defm VTESTPD : avx_bittest<0x0F, "vtestpd", VR128, f128mem, memopv2f64, v2f64>;
+defm VTESTPDY : avx_bittest<0x0F, "vtestpd", VR256, f256mem, memopv4f64, v4f64>;
+}
+
+//===----------------------------------------------------------------------===//
// SSE4.1 - Misc Instructions
//===----------------------------------------------------------------------===//
@@ -4431,79 +4858,104 @@ let Constraints = "$src1 = $dst" in
/// SS41I_binop_rmi_int - SSE 4.1 binary operator with 8-bit immediate
multiclass SS41I_binop_rmi_int<bits<8> opc, string OpcodeStr,
- Intrinsic IntId128, bit Is2Addr = 1> {
+ Intrinsic IntId, RegisterClass RC, PatFrag memop_frag,
+ X86MemOperand x86memop, bit Is2Addr = 1> {
let isCommutable = 1 in
- def rri : SS4AIi8<opc, MRMSrcReg, (outs VR128:$dst),
- (ins VR128:$src1, VR128:$src2, i32i8imm:$src3),
+ def rri : SS4AIi8<opc, MRMSrcReg, (outs RC:$dst),
+ (ins RC:$src1, RC:$src2, i32i8imm:$src3),
!if(Is2Addr,
!strconcat(OpcodeStr,
"\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
!strconcat(OpcodeStr,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
- [(set VR128:$dst,
- (IntId128 VR128:$src1, VR128:$src2, imm:$src3))]>,
+ [(set RC:$dst, (IntId RC:$src1, RC:$src2, imm:$src3))]>,
OpSize;
- def rmi : SS4AIi8<opc, MRMSrcMem, (outs VR128:$dst),
- (ins VR128:$src1, i128mem:$src2, i32i8imm:$src3),
+ def rmi : SS4AIi8<opc, MRMSrcMem, (outs RC:$dst),
+ (ins RC:$src1, x86memop:$src2, i32i8imm:$src3),
!if(Is2Addr,
!strconcat(OpcodeStr,
"\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
!strconcat(OpcodeStr,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
- [(set VR128:$dst,
- (IntId128 VR128:$src1,
- (bitconvert (memopv16i8 addr:$src2)), imm:$src3))]>,
+ [(set RC:$dst,
+ (IntId RC:$src1,
+ (bitconvert (memop_frag addr:$src2)), imm:$src3))]>,
OpSize;
}
let isAsmParserOnly = 1, Predicates = [HasAVX] in {
let isCommutable = 0 in {
defm VBLENDPS : SS41I_binop_rmi_int<0x0C, "vblendps", int_x86_sse41_blendps,
- 0>, VEX_4V;
+ VR128, memopv16i8, i128mem, 0>, VEX_4V;
defm VBLENDPD : SS41I_binop_rmi_int<0x0D, "vblendpd", int_x86_sse41_blendpd,
- 0>, VEX_4V;
+ VR128, memopv16i8, i128mem, 0>, VEX_4V;
+ defm VBLENDPSY : SS41I_binop_rmi_int<0x0C, "vblendps",
+ int_x86_avx_blend_ps_256, VR256, memopv32i8, i256mem, 0>, VEX_4V;
+ defm VBLENDPDY : SS41I_binop_rmi_int<0x0D, "vblendpd",
+ int_x86_avx_blend_pd_256, VR256, memopv32i8, i256mem, 0>, VEX_4V;
defm VPBLENDW : SS41I_binop_rmi_int<0x0E, "vpblendw", int_x86_sse41_pblendw,
- 0>, VEX_4V;
+ VR128, memopv16i8, i128mem, 0>, VEX_4V;
defm VMPSADBW : SS41I_binop_rmi_int<0x42, "vmpsadbw", int_x86_sse41_mpsadbw,
- 0>, VEX_4V;
+ VR128, memopv16i8, i128mem, 0>, VEX_4V;
}
defm VDPPS : SS41I_binop_rmi_int<0x40, "vdpps", int_x86_sse41_dpps,
- 0>, VEX_4V;
+ VR128, memopv16i8, i128mem, 0>, VEX_4V;
defm VDPPD : SS41I_binop_rmi_int<0x41, "vdppd", int_x86_sse41_dppd,
- 0>, VEX_4V;
+ VR128, memopv16i8, i128mem, 0>, VEX_4V;
+ defm VDPPSY : SS41I_binop_rmi_int<0x40, "vdpps", int_x86_avx_dp_ps_256,
+ VR256, memopv32i8, i256mem, 0>, VEX_4V;
}
let Constraints = "$src1 = $dst" in {
let isCommutable = 0 in {
- defm BLENDPS : SS41I_binop_rmi_int<0x0C, "blendps", int_x86_sse41_blendps>;
- defm BLENDPD : SS41I_binop_rmi_int<0x0D, "blendpd", int_x86_sse41_blendpd>;
- defm PBLENDW : SS41I_binop_rmi_int<0x0E, "pblendw", int_x86_sse41_pblendw>;
- defm MPSADBW : SS41I_binop_rmi_int<0x42, "mpsadbw", int_x86_sse41_mpsadbw>;
+ defm BLENDPS : SS41I_binop_rmi_int<0x0C, "blendps", int_x86_sse41_blendps,
+ VR128, memopv16i8, i128mem>;
+ defm BLENDPD : SS41I_binop_rmi_int<0x0D, "blendpd", int_x86_sse41_blendpd,
+ VR128, memopv16i8, i128mem>;
+ defm PBLENDW : SS41I_binop_rmi_int<0x0E, "pblendw", int_x86_sse41_pblendw,
+ VR128, memopv16i8, i128mem>;
+ defm MPSADBW : SS41I_binop_rmi_int<0x42, "mpsadbw", int_x86_sse41_mpsadbw,
+ VR128, memopv16i8, i128mem>;
}
- defm DPPS : SS41I_binop_rmi_int<0x40, "dpps", int_x86_sse41_dpps>;
- defm DPPD : SS41I_binop_rmi_int<0x41, "dppd", int_x86_sse41_dppd>;
+ defm DPPS : SS41I_binop_rmi_int<0x40, "dpps", int_x86_sse41_dpps,
+ VR128, memopv16i8, i128mem>;
+ defm DPPD : SS41I_binop_rmi_int<0x41, "dppd", int_x86_sse41_dppd,
+ VR128, memopv16i8, i128mem>;
}
/// SS41I_quaternary_int_avx - AVX SSE 4.1 with 4 operators
let isAsmParserOnly = 1, Predicates = [HasAVX] in {
- multiclass SS41I_quaternary_int_avx<bits<8> opc, string OpcodeStr> {
- def rr : I<opc, MRMSrcReg, (outs VR128:$dst),
- (ins VR128:$src1, VR128:$src2, VR128:$src3),
- !strconcat(OpcodeStr,
- "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
- [], SSEPackedInt>, OpSize, TA, VEX_4V, VEX_I8IMM;
-
- def rm : I<opc, MRMSrcMem, (outs VR128:$dst),
- (ins VR128:$src1, i128mem:$src2, VR128:$src3),
- !strconcat(OpcodeStr,
- "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
- [], SSEPackedInt>, OpSize, TA, VEX_4V, VEX_I8IMM;
- }
-}
-
-defm VBLENDVPD : SS41I_quaternary_int_avx<0x4B, "vblendvpd">;
-defm VBLENDVPS : SS41I_quaternary_int_avx<0x4A, "vblendvps">;
-defm VPBLENDVB : SS41I_quaternary_int_avx<0x4C, "vpblendvb">;
+multiclass SS41I_quaternary_int_avx<bits<8> opc, string OpcodeStr,
+ RegisterClass RC, X86MemOperand x86memop,
+ PatFrag mem_frag, Intrinsic IntId> {
+ def rr : I<opc, MRMSrcReg, (outs RC:$dst),
+ (ins RC:$src1, RC:$src2, RC:$src3),
+ !strconcat(OpcodeStr,
+ "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
+ [(set RC:$dst, (IntId RC:$src1, RC:$src2, RC:$src3))],
+ SSEPackedInt>, OpSize, TA, VEX_4V, VEX_I8IMM;
+
+ def rm : I<opc, MRMSrcMem, (outs RC:$dst),
+ (ins RC:$src1, x86memop:$src2, RC:$src3),
+ !strconcat(OpcodeStr,
+ "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
+ [(set RC:$dst,
+ (IntId RC:$src1, (bitconvert (mem_frag addr:$src2)),
+ RC:$src3))],
+ SSEPackedInt>, OpSize, TA, VEX_4V, VEX_I8IMM;
+}
+}
+
+defm VBLENDVPD : SS41I_quaternary_int_avx<0x4B, "vblendvpd", VR128, i128mem,
+ memopv16i8, int_x86_sse41_blendvpd>;
+defm VBLENDVPS : SS41I_quaternary_int_avx<0x4A, "vblendvps", VR128, i128mem,
+ memopv16i8, int_x86_sse41_blendvps>;
+defm VPBLENDVB : SS41I_quaternary_int_avx<0x4C, "vpblendvb", VR128, i128mem,
+ memopv16i8, int_x86_sse41_pblendvb>;
+defm VBLENDVPDY : SS41I_quaternary_int_avx<0x4B, "vblendvpd", VR256, i256mem,
+ memopv32i8, int_x86_avx_blendv_pd_256>;
+defm VBLENDVPSY : SS41I_quaternary_int_avx<0x4A, "vblendvps", VR256, i256mem,
+ memopv32i8, int_x86_avx_blendv_ps_256>;
/// SS41I_ternary_int - SSE 4.1 ternary operator
let Uses = [XMM0], Constraints = "$src1 = $dst" in {
@@ -4529,30 +4981,6 @@ defm BLENDVPD : SS41I_ternary_int<0x15, "blendvpd", int_x86_sse41_blendvpd>;
defm BLENDVPS : SS41I_ternary_int<0x14, "blendvps", int_x86_sse41_blendvps>;
defm PBLENDVB : SS41I_ternary_int<0x10, "pblendvb", int_x86_sse41_pblendvb>;
-// ptest instruction we'll lower to this in X86ISelLowering primarily from
-// the intel intrinsic that corresponds to this.
-let Defs = [EFLAGS], isAsmParserOnly = 1, Predicates = [HasAVX] in {
-def VPTESTrr : SS48I<0x17, MRMSrcReg, (outs), (ins VR128:$src1, VR128:$src2),
- "vptest\t{$src2, $src1|$src1, $src2}",
- [(set EFLAGS, (X86ptest VR128:$src1, VR128:$src2))]>,
- OpSize, VEX;
-def VPTESTrm : SS48I<0x17, MRMSrcMem, (outs), (ins VR128:$src1, i128mem:$src2),
- "vptest\t{$src2, $src1|$src1, $src2}",
- [(set EFLAGS, (X86ptest VR128:$src1, (load addr:$src2)))]>,
- OpSize, VEX;
-}
-
-let Defs = [EFLAGS] in {
-def PTESTrr : SS48I<0x17, MRMSrcReg, (outs), (ins VR128:$src1, VR128:$src2),
- "ptest \t{$src2, $src1|$src1, $src2}",
- [(set EFLAGS, (X86ptest VR128:$src1, VR128:$src2))]>,
- OpSize;
-def PTESTrm : SS48I<0x17, MRMSrcMem, (outs), (ins VR128:$src1, i128mem:$src2),
- "ptest \t{$src2, $src1|$src1, $src2}",
- [(set EFLAGS, (X86ptest VR128:$src1, (load addr:$src2)))]>,
- OpSize;
-}
-
let isAsmParserOnly = 1, Predicates = [HasAVX] in
def VMOVNTDQArm : SS48I<0x2A, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
"vmovntdqa\t{$src, $dst|$dst, $src}",
@@ -4603,17 +5031,20 @@ def : Pat<(v2i64 (X86pcmpgtq VR128:$src1, (memop addr:$src2))),
//===----------------------------------------------------------------------===//
// Packed Compare Implicit Length Strings, Return Mask
-let Defs = [EFLAGS], usesCustomInserter = 1 in {
- def PCMPISTRM128REG : SS42AI<0, Pseudo, (outs VR128:$dst),
- (ins VR128:$src1, VR128:$src2, i8imm:$src3),
- "#PCMPISTRM128rr PSEUDO!",
+multiclass pseudo_pcmpistrm<string asm> {
+ def REG : Ii8<0, Pseudo, (outs VR128:$dst),
+ (ins VR128:$src1, VR128:$src2, i8imm:$src3), !strconcat(asm, "rr PSEUDO"),
[(set VR128:$dst, (int_x86_sse42_pcmpistrm128 VR128:$src1, VR128:$src2,
- imm:$src3))]>, OpSize;
- def PCMPISTRM128MEM : SS42AI<0, Pseudo, (outs VR128:$dst),
- (ins VR128:$src1, i128mem:$src2, i8imm:$src3),
- "#PCMPISTRM128rm PSEUDO!",
+ imm:$src3))]>;
+ def MEM : Ii8<0, Pseudo, (outs VR128:$dst),
+ (ins VR128:$src1, i128mem:$src2, i8imm:$src3), !strconcat(asm, "rm PSEUDO"),
[(set VR128:$dst, (int_x86_sse42_pcmpistrm128
- VR128:$src1, (load addr:$src2), imm:$src3))]>, OpSize;
+ VR128:$src1, (load addr:$src2), imm:$src3))]>;
+}
+
+let Defs = [EFLAGS], usesCustomInserter = 1 in {
+ defm PCMPISTRM128 : pseudo_pcmpistrm<"#PCMPISTRM128">, Requires<[HasSSE42]>;
+ defm VPCMPISTRM128 : pseudo_pcmpistrm<"#VPCMPISTRM128">, Requires<[HasAVX]>;
}
let Defs = [XMM0, EFLAGS], isAsmParserOnly = 1,
@@ -4636,20 +5067,20 @@ let Defs = [XMM0, EFLAGS] in {
}
// Packed Compare Explicit Length Strings, Return Mask
-let Defs = [EFLAGS], Uses = [EAX, EDX], usesCustomInserter = 1 in {
- def PCMPESTRM128REG : SS42AI<0, Pseudo, (outs VR128:$dst),
- (ins VR128:$src1, VR128:$src3, i8imm:$src5),
- "#PCMPESTRM128rr PSEUDO!",
- [(set VR128:$dst,
- (int_x86_sse42_pcmpestrm128
- VR128:$src1, EAX, VR128:$src3, EDX, imm:$src5))]>, OpSize;
-
- def PCMPESTRM128MEM : SS42AI<0, Pseudo, (outs VR128:$dst),
- (ins VR128:$src1, i128mem:$src3, i8imm:$src5),
- "#PCMPESTRM128rm PSEUDO!",
+multiclass pseudo_pcmpestrm<string asm> {
+ def REG : Ii8<0, Pseudo, (outs VR128:$dst),
+ (ins VR128:$src1, VR128:$src3, i8imm:$src5), !strconcat(asm, "rr PSEUDO"),
+ [(set VR128:$dst, (int_x86_sse42_pcmpestrm128
+ VR128:$src1, EAX, VR128:$src3, EDX, imm:$src5))]>;
+ def MEM : Ii8<0, Pseudo, (outs VR128:$dst),
+ (ins VR128:$src1, i128mem:$src3, i8imm:$src5), !strconcat(asm, "rm PSEUDO"),
[(set VR128:$dst, (int_x86_sse42_pcmpestrm128
- VR128:$src1, EAX, (load addr:$src3), EDX, imm:$src5))]>,
- OpSize;
+ VR128:$src1, EAX, (load addr:$src3), EDX, imm:$src5))]>;
+}
+
+let Defs = [EFLAGS], Uses = [EAX, EDX], usesCustomInserter = 1 in {
+ defm PCMPESTRM128 : pseudo_pcmpestrm<"#PCMPESTRM128">, Requires<[HasSSE42]>;
+ defm VPCMPESTRM128 : pseudo_pcmpestrm<"#VPCMPESTRM128">, Requires<[HasAVX]>;
}
let isAsmParserOnly = 1, Predicates = [HasAVX],
@@ -4941,3 +5372,579 @@ def AESKEYGENASSIST128rm : AESAI<0xDF, MRMSrcMem, (outs VR128:$dst),
(int_x86_aesni_aeskeygenassist (bitconvert (memopv2i64 addr:$src1)),
imm:$src2))]>,
OpSize;
+
+//===----------------------------------------------------------------------===//
+// CLMUL Instructions
+//===----------------------------------------------------------------------===//
+
+// Only the AVX version of CLMUL instructions are described here.
+
+// Carry-less Multiplication instructions
+let isAsmParserOnly = 1 in {
+def VPCLMULQDQrr : CLMULIi8<0x44, MRMSrcReg, (outs VR128:$dst),
+ (ins VR128:$src1, VR128:$src2, i8imm:$src3),
+ "vpclmulqdq\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
+ []>;
+
+def VPCLMULQDQrm : CLMULIi8<0x44, MRMSrcMem, (outs VR128:$dst),
+ (ins VR128:$src1, i128mem:$src2, i8imm:$src3),
+ "vpclmulqdq\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
+ []>;
+
+// Assembler Only
+multiclass avx_vpclmul<string asm> {
+ def rr : I<0, Pseudo, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
+ !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ []>;
+
+ def rm : I<0, Pseudo, (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2),
+ !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ []>;
+}
+defm VPCLMULHQHQDQ : avx_vpclmul<"vpclmulhqhqdq">;
+defm VPCLMULHQLQDQ : avx_vpclmul<"vpclmulhqlqdq">;
+defm VPCLMULLQHQDQ : avx_vpclmul<"vpclmullqhqdq">;
+defm VPCLMULLQLQDQ : avx_vpclmul<"vpclmullqlqdq">;
+
+} // isAsmParserOnly
+
+//===----------------------------------------------------------------------===//
+// AVX Instructions
+//===----------------------------------------------------------------------===//
+
+let isAsmParserOnly = 1 in {
+
+// Load from memory and broadcast to all elements of the destination operand
+class avx_broadcast<bits<8> opc, string OpcodeStr, RegisterClass RC,
+ X86MemOperand x86memop, Intrinsic Int> :
+ AVX8I<opc, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src),
+ !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+ [(set RC:$dst, (Int addr:$src))]>, VEX;
+
+def VBROADCASTSS : avx_broadcast<0x18, "vbroadcastss", VR128, f32mem,
+ int_x86_avx_vbroadcastss>;
+def VBROADCASTSSY : avx_broadcast<0x18, "vbroadcastss", VR256, f32mem,
+ int_x86_avx_vbroadcastss_256>;
+def VBROADCASTSD : avx_broadcast<0x19, "vbroadcastsd", VR256, f64mem,
+ int_x86_avx_vbroadcast_sd_256>;
+def VBROADCASTF128 : avx_broadcast<0x1A, "vbroadcastf128", VR256, f128mem,
+ int_x86_avx_vbroadcastf128_pd_256>;
+
+// Insert packed floating-point values
+def VINSERTF128rr : AVXAIi8<0x18, MRMSrcReg, (outs VR256:$dst),
+ (ins VR256:$src1, VR128:$src2, i8imm:$src3),
+ "vinsertf128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
+ []>, VEX_4V;
+def VINSERTF128rm : AVXAIi8<0x18, MRMSrcMem, (outs VR256:$dst),
+ (ins VR256:$src1, f128mem:$src2, i8imm:$src3),
+ "vinsertf128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
+ []>, VEX_4V;
+
+// Extract packed floating-point values
+def VEXTRACTF128rr : AVXAIi8<0x19, MRMDestReg, (outs VR128:$dst),
+ (ins VR256:$src1, i8imm:$src2),
+ "vextractf128\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ []>, VEX;
+def VEXTRACTF128mr : AVXAIi8<0x19, MRMDestMem, (outs),
+ (ins f128mem:$dst, VR256:$src1, i8imm:$src2),
+ "vextractf128\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ []>, VEX;
+
+// Conditional SIMD Packed Loads and Stores
+multiclass avx_movmask_rm<bits<8> opc_rm, bits<8> opc_mr, string OpcodeStr,
+ Intrinsic IntLd, Intrinsic IntLd256,
+ Intrinsic IntSt, Intrinsic IntSt256,
+ PatFrag pf128, PatFrag pf256> {
+ def rm : AVX8I<opc_rm, MRMSrcMem, (outs VR128:$dst),
+ (ins VR128:$src1, f128mem:$src2),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(set VR128:$dst, (IntLd addr:$src2, VR128:$src1))]>,
+ VEX_4V;
+ def Yrm : AVX8I<opc_rm, MRMSrcMem, (outs VR256:$dst),
+ (ins VR256:$src1, f256mem:$src2),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(set VR256:$dst, (IntLd256 addr:$src2, VR256:$src1))]>,
+ VEX_4V;
+ def mr : AVX8I<opc_mr, MRMDestMem, (outs),
+ (ins f128mem:$dst, VR128:$src1, VR128:$src2),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(IntSt addr:$dst, VR128:$src1, VR128:$src2)]>, VEX_4V;
+ def Ymr : AVX8I<opc_mr, MRMDestMem, (outs),
+ (ins f256mem:$dst, VR256:$src1, VR256:$src2),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(IntSt256 addr:$dst, VR256:$src1, VR256:$src2)]>, VEX_4V;
+}
+
+defm VMASKMOVPS : avx_movmask_rm<0x2C, 0x2E, "vmaskmovps",
+ int_x86_avx_maskload_ps,
+ int_x86_avx_maskload_ps_256,
+ int_x86_avx_maskstore_ps,
+ int_x86_avx_maskstore_ps_256,
+ memopv4f32, memopv8f32>;
+defm VMASKMOVPD : avx_movmask_rm<0x2D, 0x2F, "vmaskmovpd",
+ int_x86_avx_maskload_pd,
+ int_x86_avx_maskload_pd_256,
+ int_x86_avx_maskstore_pd,
+ int_x86_avx_maskstore_pd_256,
+ memopv2f64, memopv4f64>;
+
+// Permute Floating-Point Values
+multiclass avx_permil<bits<8> opc_rm, bits<8> opc_rmi, string OpcodeStr,
+ RegisterClass RC, X86MemOperand x86memop_f,
+ X86MemOperand x86memop_i, PatFrag f_frag, PatFrag i_frag,
+ Intrinsic IntVar, Intrinsic IntImm> {
+ def rr : AVX8I<opc_rm, MRMSrcReg, (outs RC:$dst),
+ (ins RC:$src1, RC:$src2),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(set RC:$dst, (IntVar RC:$src1, RC:$src2))]>, VEX_4V;
+ def rm : AVX8I<opc_rm, MRMSrcMem, (outs RC:$dst),
+ (ins RC:$src1, x86memop_i:$src2),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(set RC:$dst, (IntVar RC:$src1, (i_frag addr:$src2)))]>, VEX_4V;
+
+ def ri : AVXAIi8<opc_rmi, MRMSrcReg, (outs RC:$dst),
+ (ins RC:$src1, i8imm:$src2),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(set RC:$dst, (IntImm RC:$src1, imm:$src2))]>, VEX;
+ def mi : AVXAIi8<opc_rmi, MRMSrcMem, (outs RC:$dst),
+ (ins x86memop_f:$src1, i8imm:$src2),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(set RC:$dst, (IntImm (f_frag addr:$src1), imm:$src2))]>, VEX;
+}
+
+defm VPERMILPS : avx_permil<0x0C, 0x04, "vpermilps", VR128, f128mem, i128mem,
+ memopv4f32, memopv4i32,
+ int_x86_avx_vpermilvar_ps,
+ int_x86_avx_vpermil_ps>;
+defm VPERMILPSY : avx_permil<0x0C, 0x04, "vpermilps", VR256, f256mem, i256mem,
+ memopv8f32, memopv8i32,
+ int_x86_avx_vpermilvar_ps_256,
+ int_x86_avx_vpermil_ps_256>;
+defm VPERMILPD : avx_permil<0x0D, 0x05, "vpermilpd", VR128, f128mem, i128mem,
+ memopv2f64, memopv2i64,
+ int_x86_avx_vpermilvar_pd,
+ int_x86_avx_vpermil_pd>;
+defm VPERMILPDY : avx_permil<0x0D, 0x05, "vpermilpd", VR256, f256mem, i256mem,
+ memopv4f64, memopv4i64,
+ int_x86_avx_vpermilvar_pd_256,
+ int_x86_avx_vpermil_pd_256>;
+
+def VPERM2F128rr : AVXAIi8<0x06, MRMSrcReg, (outs VR256:$dst),
+ (ins VR256:$src1, VR256:$src2, i8imm:$src3),
+ "vperm2f128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
+ []>, VEX_4V;
+def VPERM2F128rm : AVXAIi8<0x06, MRMSrcMem, (outs VR256:$dst),
+ (ins VR256:$src1, f256mem:$src2, i8imm:$src3),
+ "vperm2f128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
+ []>, VEX_4V;
+
+// Zero All YMM registers
+def VZEROALL : I<0x77, RawFrm, (outs), (ins), "vzeroall",
+ [(int_x86_avx_vzeroall)]>, VEX, VEX_L, Requires<[HasAVX]>;
+
+// Zero Upper bits of YMM registers
+def VZEROUPPER : I<0x77, RawFrm, (outs), (ins), "vzeroupper",
+ [(int_x86_avx_vzeroupper)]>, VEX, Requires<[HasAVX]>;
+
+} // isAsmParserOnly
+
+def : Pat<(int_x86_avx_vinsertf128_pd_256 VR256:$src1, VR128:$src2, imm:$src3),
+ (VINSERTF128rr VR256:$src1, VR128:$src2, imm:$src3)>;
+def : Pat<(int_x86_avx_vinsertf128_ps_256 VR256:$src1, VR128:$src2, imm:$src3),
+ (VINSERTF128rr VR256:$src1, VR128:$src2, imm:$src3)>;
+def : Pat<(int_x86_avx_vinsertf128_si_256 VR256:$src1, VR128:$src2, imm:$src3),
+ (VINSERTF128rr VR256:$src1, VR128:$src2, imm:$src3)>;
+
+def : Pat<(int_x86_avx_vextractf128_pd_256 VR256:$src1, imm:$src2),
+ (VEXTRACTF128rr VR256:$src1, imm:$src2)>;
+def : Pat<(int_x86_avx_vextractf128_ps_256 VR256:$src1, imm:$src2),
+ (VEXTRACTF128rr VR256:$src1, imm:$src2)>;
+def : Pat<(int_x86_avx_vextractf128_si_256 VR256:$src1, imm:$src2),
+ (VEXTRACTF128rr VR256:$src1, imm:$src2)>;
+
+def : Pat<(int_x86_avx_vbroadcastf128_ps_256 addr:$src),
+ (VBROADCASTF128 addr:$src)>;
+
+def : Pat<(int_x86_avx_vperm2f128_ps_256 VR256:$src1, VR256:$src2, imm:$src3),
+ (VPERM2F128rr VR256:$src1, VR256:$src2, imm:$src3)>;
+def : Pat<(int_x86_avx_vperm2f128_pd_256 VR256:$src1, VR256:$src2, imm:$src3),
+ (VPERM2F128rr VR256:$src1, VR256:$src2, imm:$src3)>;
+def : Pat<(int_x86_avx_vperm2f128_si_256 VR256:$src1, VR256:$src2, imm:$src3),
+ (VPERM2F128rr VR256:$src1, VR256:$src2, imm:$src3)>;
+
+def : Pat<(int_x86_avx_vperm2f128_ps_256
+ VR256:$src1, (memopv8f32 addr:$src2), imm:$src3),
+ (VPERM2F128rm VR256:$src1, addr:$src2, imm:$src3)>;
+def : Pat<(int_x86_avx_vperm2f128_pd_256
+ VR256:$src1, (memopv4f64 addr:$src2), imm:$src3),
+ (VPERM2F128rm VR256:$src1, addr:$src2, imm:$src3)>;
+def : Pat<(int_x86_avx_vperm2f128_si_256
+ VR256:$src1, (memopv8i32 addr:$src2), imm:$src3),
+ (VPERM2F128rm VR256:$src1, addr:$src2, imm:$src3)>;
+
+//===----------------------------------------------------------------------===//
+// SSE Shuffle pattern fragments
+//===----------------------------------------------------------------------===//
+
+// This is part of a "work in progress" refactoring. The idea is that all
+// vector shuffles are going to be translated into target specific nodes and
+// directly matched by the patterns below (which can be changed along the way)
+// The AVX version of some but not all of them are described here, and more
+// should come in a near future.
+
+// Shuffle with PSHUFD instruction folding loads. The first two patterns match
+// SSE2 loads, which are always promoted to v2i64. The last one should match
+// the SSE1 case, where the only legal load is v4f32, but there is no PSHUFD
+// in SSE2, how does it ever worked? Anyway, the pattern will remain here until
+// we investigate further.
+def : Pat<(v4i32 (X86PShufd (bc_v4i32 (memopv2i64 addr:$src1)),
+ (i8 imm:$imm))),
+ (VPSHUFDmi addr:$src1, imm:$imm)>, Requires<[HasAVX]>;
+def : Pat<(v4i32 (X86PShufd (bc_v4i32 (memopv2i64 addr:$src1)),
+ (i8 imm:$imm))),
+ (PSHUFDmi addr:$src1, imm:$imm)>;
+def : Pat<(v4i32 (X86PShufd (bc_v4i32 (memopv4f32 addr:$src1)),
+ (i8 imm:$imm))),
+ (PSHUFDmi addr:$src1, imm:$imm)>; // FIXME: has this ever worked?
+
+// Shuffle with PSHUFD instruction.
+def : Pat<(v4f32 (X86PShufd VR128:$src1, (i8 imm:$imm))),
+ (VPSHUFDri VR128:$src1, imm:$imm)>, Requires<[HasAVX]>;
+def : Pat<(v4f32 (X86PShufd VR128:$src1, (i8 imm:$imm))),
+ (PSHUFDri VR128:$src1, imm:$imm)>;
+
+def : Pat<(v4i32 (X86PShufd VR128:$src1, (i8 imm:$imm))),
+ (VPSHUFDri VR128:$src1, imm:$imm)>, Requires<[HasAVX]>;
+def : Pat<(v4i32 (X86PShufd VR128:$src1, (i8 imm:$imm))),
+ (PSHUFDri VR128:$src1, imm:$imm)>;
+
+// Shuffle with SHUFPD instruction.
+def : Pat<(v2f64 (X86Shufps VR128:$src1,
+ (memopv2f64 addr:$src2), (i8 imm:$imm))),
+ (VSHUFPDrmi VR128:$src1, addr:$src2, imm:$imm)>, Requires<[HasAVX]>;
+def : Pat<(v2f64 (X86Shufps VR128:$src1,
+ (memopv2f64 addr:$src2), (i8 imm:$imm))),
+ (SHUFPDrmi VR128:$src1, addr:$src2, imm:$imm)>;
+
+def : Pat<(v2i64 (X86Shufpd VR128:$src1, VR128:$src2, (i8 imm:$imm))),
+ (VSHUFPDrri VR128:$src1, VR128:$src2, imm:$imm)>, Requires<[HasAVX]>;
+def : Pat<(v2i64 (X86Shufpd VR128:$src1, VR128:$src2, (i8 imm:$imm))),
+ (SHUFPDrri VR128:$src1, VR128:$src2, imm:$imm)>;
+
+def : Pat<(v2f64 (X86Shufpd VR128:$src1, VR128:$src2, (i8 imm:$imm))),
+ (VSHUFPDrri VR128:$src1, VR128:$src2, imm:$imm)>, Requires<[HasAVX]>;
+def : Pat<(v2f64 (X86Shufpd VR128:$src1, VR128:$src2, (i8 imm:$imm))),
+ (SHUFPDrri VR128:$src1, VR128:$src2, imm:$imm)>;
+
+// Shuffle with SHUFPS instruction.
+def : Pat<(v4f32 (X86Shufps VR128:$src1,
+ (memopv4f32 addr:$src2), (i8 imm:$imm))),
+ (VSHUFPSrmi VR128:$src1, addr:$src2, imm:$imm)>, Requires<[HasAVX]>;
+def : Pat<(v4f32 (X86Shufps VR128:$src1,
+ (memopv4f32 addr:$src2), (i8 imm:$imm))),
+ (SHUFPSrmi VR128:$src1, addr:$src2, imm:$imm)>;
+
+def : Pat<(v4f32 (X86Shufps VR128:$src1, VR128:$src2, (i8 imm:$imm))),
+ (VSHUFPSrri VR128:$src1, VR128:$src2, imm:$imm)>, Requires<[HasAVX]>;
+def : Pat<(v4f32 (X86Shufps VR128:$src1, VR128:$src2, (i8 imm:$imm))),
+ (SHUFPSrri VR128:$src1, VR128:$src2, imm:$imm)>;
+
+def : Pat<(v4i32 (X86Shufps VR128:$src1,
+ (bc_v4i32 (memopv2i64 addr:$src2)), (i8 imm:$imm))),
+ (VSHUFPSrmi VR128:$src1, addr:$src2, imm:$imm)>, Requires<[HasAVX]>;
+def : Pat<(v4i32 (X86Shufps VR128:$src1,
+ (bc_v4i32 (memopv2i64 addr:$src2)), (i8 imm:$imm))),
+ (SHUFPSrmi VR128:$src1, addr:$src2, imm:$imm)>;
+
+def : Pat<(v4i32 (X86Shufps VR128:$src1, VR128:$src2, (i8 imm:$imm))),
+ (VSHUFPSrri VR128:$src1, VR128:$src2, imm:$imm)>, Requires<[HasAVX]>;
+def : Pat<(v4i32 (X86Shufps VR128:$src1, VR128:$src2, (i8 imm:$imm))),
+ (SHUFPSrri VR128:$src1, VR128:$src2, imm:$imm)>;
+
+// Shuffle with MOVHLPS instruction
+def : Pat<(v4f32 (X86Movhlps VR128:$src1, VR128:$src2)),
+ (MOVHLPSrr VR128:$src1, VR128:$src2)>;
+def : Pat<(v4i32 (X86Movhlps VR128:$src1, VR128:$src2)),
+ (MOVHLPSrr VR128:$src1, VR128:$src2)>;
+
+// Shuffle with MOVDDUP instruction
+def : Pat<(X86Movddup (memopv2f64 addr:$src)),
+ (VMOVDDUPrm addr:$src)>, Requires<[HasAVX]>;
+def : Pat<(X86Movddup (memopv2f64 addr:$src)),
+ (MOVDDUPrm addr:$src)>;
+
+def : Pat<(X86Movddup (bc_v4f32 (memopv2f64 addr:$src))),
+ (VMOVDDUPrm addr:$src)>, Requires<[HasAVX]>;
+def : Pat<(X86Movddup (bc_v4f32 (memopv2f64 addr:$src))),
+ (MOVDDUPrm addr:$src)>;
+
+def : Pat<(X86Movddup (memopv2i64 addr:$src)),
+ (VMOVDDUPrm addr:$src)>, Requires<[HasAVX]>;
+def : Pat<(X86Movddup (memopv2i64 addr:$src)),
+ (MOVDDUPrm addr:$src)>;
+
+def : Pat<(X86Movddup (bc_v4i32 (memopv2i64 addr:$src))),
+ (VMOVDDUPrm addr:$src)>, Requires<[HasAVX]>;
+def : Pat<(X86Movddup (bc_v4i32 (memopv2i64 addr:$src))),
+ (MOVDDUPrm addr:$src)>;
+
+def : Pat<(X86Movddup (v2f64 (scalar_to_vector (loadf64 addr:$src)))),
+ (VMOVDDUPrm addr:$src)>, Requires<[HasAVX]>;
+def : Pat<(X86Movddup (v2f64 (scalar_to_vector (loadf64 addr:$src)))),
+ (MOVDDUPrm addr:$src)>;
+
+def : Pat<(X86Movddup (bc_v2f64
+ (v2i64 (scalar_to_vector (loadi64 addr:$src))))),
+ (VMOVDDUPrm addr:$src)>, Requires<[HasAVX]>;
+def : Pat<(X86Movddup (bc_v2f64
+ (v2i64 (scalar_to_vector (loadi64 addr:$src))))),
+ (MOVDDUPrm addr:$src)>;
+
+// Shuffle with UNPCKLPS
+def : Pat<(v4f32 (X86Unpcklps VR128:$src1, (memopv4f32 addr:$src2))),
+ (VUNPCKLPSrm VR128:$src1, addr:$src2)>, Requires<[HasAVX]>;
+def : Pat<(v4f32 (X86Unpcklps VR128:$src1, (memopv4f32 addr:$src2))),
+ (UNPCKLPSrm VR128:$src1, addr:$src2)>;
+
+def : Pat<(v4f32 (X86Unpcklps VR128:$src1, VR128:$src2)),
+ (VUNPCKLPSrr VR128:$src1, VR128:$src2)>, Requires<[HasAVX]>;
+def : Pat<(v4f32 (X86Unpcklps VR128:$src1, VR128:$src2)),
+ (UNPCKLPSrr VR128:$src1, VR128:$src2)>;
+
+// Shuffle with UNPCKHPS
+def : Pat<(v4f32 (X86Unpckhps VR128:$src1, (memopv4f32 addr:$src2))),
+ (VUNPCKHPSrm VR128:$src1, addr:$src2)>, Requires<[HasAVX]>;
+def : Pat<(v4f32 (X86Unpckhps VR128:$src1, (memopv4f32 addr:$src2))),
+ (UNPCKHPSrm VR128:$src1, addr:$src2)>;
+
+def : Pat<(v4f32 (X86Unpckhps VR128:$src1, VR128:$src2)),
+ (VUNPCKHPSrr VR128:$src1, VR128:$src2)>, Requires<[HasAVX]>;
+def : Pat<(v4f32 (X86Unpckhps VR128:$src1, VR128:$src2)),
+ (UNPCKHPSrr VR128:$src1, VR128:$src2)>;
+
+// Shuffle with UNPCKLPD
+def : Pat<(v2f64 (X86Unpcklpd VR128:$src1, (memopv2f64 addr:$src2))),
+ (VUNPCKLPSrm VR128:$src1, addr:$src2)>, Requires<[HasAVX]>;
+def : Pat<(v2f64 (X86Unpcklpd VR128:$src1, (memopv2f64 addr:$src2))),
+ (UNPCKLPSrm VR128:$src1, addr:$src2)>;
+
+def : Pat<(v2f64 (X86Unpcklpd VR128:$src1, VR128:$src2)),
+ (VUNPCKLPDrr VR128:$src1, VR128:$src2)>, Requires<[HasAVX]>;
+def : Pat<(v2f64 (X86Unpcklpd VR128:$src1, VR128:$src2)),
+ (UNPCKLPDrr VR128:$src1, VR128:$src2)>;
+
+// Shuffle with UNPCKHPD
+def : Pat<(v2f64 (X86Unpckhpd VR128:$src1, (memopv2f64 addr:$src2))),
+ (VUNPCKLPSrm VR128:$src1, addr:$src2)>, Requires<[HasAVX]>;
+def : Pat<(v2f64 (X86Unpckhpd VR128:$src1, (memopv2f64 addr:$src2))),
+ (UNPCKLPSrm VR128:$src1, addr:$src2)>;
+
+def : Pat<(v2f64 (X86Unpckhpd VR128:$src1, VR128:$src2)),
+ (VUNPCKHPDrr VR128:$src1, VR128:$src2)>, Requires<[HasAVX]>;
+def : Pat<(v2f64 (X86Unpckhpd VR128:$src1, VR128:$src2)),
+ (UNPCKHPDrr VR128:$src1, VR128:$src2)>;
+
+// Shuffle with PUNPCKLBW
+def : Pat<(v16i8 (X86Punpcklbw VR128:$src1,
+ (bc_v16i8 (memopv2i64 addr:$src2)))),
+ (PUNPCKLBWrm VR128:$src1, addr:$src2)>;
+def : Pat<(v16i8 (X86Punpcklbw VR128:$src1, VR128:$src2)),
+ (PUNPCKLBWrr VR128:$src1, VR128:$src2)>;
+
+// Shuffle with PUNPCKLWD
+def : Pat<(v8i16 (X86Punpcklwd VR128:$src1,
+ (bc_v8i16 (memopv2i64 addr:$src2)))),
+ (PUNPCKLWDrm VR128:$src1, addr:$src2)>;
+def : Pat<(v8i16 (X86Punpcklwd VR128:$src1, VR128:$src2)),
+ (PUNPCKLWDrr VR128:$src1, VR128:$src2)>;
+
+// Shuffle with PUNPCKLDQ
+def : Pat<(v4i32 (X86Punpckldq VR128:$src1,
+ (bc_v4i32 (memopv2i64 addr:$src2)))),
+ (PUNPCKLDQrm VR128:$src1, addr:$src2)>;
+def : Pat<(v4i32 (X86Punpckldq VR128:$src1, VR128:$src2)),
+ (PUNPCKLDQrr VR128:$src1, VR128:$src2)>;
+
+// Shuffle with PUNPCKLQDQ
+def : Pat<(v2i64 (X86Punpcklqdq VR128:$src1, (memopv2i64 addr:$src2))),
+ (PUNPCKLQDQrm VR128:$src1, addr:$src2)>;
+def : Pat<(v2i64 (X86Punpcklqdq VR128:$src1, VR128:$src2)),
+ (PUNPCKLQDQrr VR128:$src1, VR128:$src2)>;
+
+// Shuffle with PUNPCKHBW
+def : Pat<(v16i8 (X86Punpckhbw VR128:$src1,
+ (bc_v16i8 (memopv2i64 addr:$src2)))),
+ (PUNPCKHBWrm VR128:$src1, addr:$src2)>;
+def : Pat<(v16i8 (X86Punpckhbw VR128:$src1, VR128:$src2)),
+ (PUNPCKHBWrr VR128:$src1, VR128:$src2)>;
+
+// Shuffle with PUNPCKHWD
+def : Pat<(v8i16 (X86Punpckhwd VR128:$src1,
+ (bc_v8i16 (memopv2i64 addr:$src2)))),
+ (PUNPCKHWDrm VR128:$src1, addr:$src2)>;
+def : Pat<(v8i16 (X86Punpckhwd VR128:$src1, VR128:$src2)),
+ (PUNPCKHWDrr VR128:$src1, VR128:$src2)>;
+
+// Shuffle with PUNPCKHDQ
+def : Pat<(v4i32 (X86Punpckhdq VR128:$src1,
+ (bc_v4i32 (memopv2i64 addr:$src2)))),
+ (PUNPCKHDQrm VR128:$src1, addr:$src2)>;
+def : Pat<(v4i32 (X86Punpckhdq VR128:$src1, VR128:$src2)),
+ (PUNPCKHDQrr VR128:$src1, VR128:$src2)>;
+
+// Shuffle with PUNPCKHQDQ
+def : Pat<(v2i64 (X86Punpckhqdq VR128:$src1, (memopv2i64 addr:$src2))),
+ (PUNPCKHQDQrm VR128:$src1, addr:$src2)>;
+def : Pat<(v2i64 (X86Punpckhqdq VR128:$src1, VR128:$src2)),
+ (PUNPCKHQDQrr VR128:$src1, VR128:$src2)>;
+
+// Shuffle with MOVLHPS
+def : Pat<(X86Movlhps VR128:$src1,
+ (bc_v4f32 (v2f64 (scalar_to_vector (loadf64 addr:$src2))))),
+ (MOVHPSrm VR128:$src1, addr:$src2)>;
+def : Pat<(X86Movlhps VR128:$src1,
+ (bc_v4i32 (v2i64 (X86vzload addr:$src2)))),
+ (MOVHPSrm VR128:$src1, addr:$src2)>;
+def : Pat<(v4f32 (X86Movlhps VR128:$src1, VR128:$src2)),
+ (MOVLHPSrr VR128:$src1, VR128:$src2)>;
+def : Pat<(v4i32 (X86Movlhps VR128:$src1, VR128:$src2)),
+ (MOVLHPSrr VR128:$src1, VR128:$src2)>;
+def : Pat<(v2i64 (X86Movlhps VR128:$src1, VR128:$src2)),
+ (MOVLHPSrr (v2i64 VR128:$src1), VR128:$src2)>;
+
+// Shuffle with MOVLHPD
+def : Pat<(v2f64 (X86Movlhpd VR128:$src1,
+ (scalar_to_vector (loadf64 addr:$src2)))),
+ (MOVHPDrm VR128:$src1, addr:$src2)>;
+// FIXME: Instead of X86Unpcklpd, there should be a X86Movlhpd here, the problem
+// is during lowering, where it's not possible to recognize the load fold cause
+// it has two uses through a bitcast. One use disappears at isel time and the
+// fold opportunity reappears.
+def : Pat<(v2f64 (X86Unpcklpd VR128:$src1,
+ (scalar_to_vector (loadf64 addr:$src2)))),
+ (MOVHPDrm VR128:$src1, addr:$src2)>;
+
+// Shuffle with MOVSS
+def : Pat<(v4f32 (X86Movss VR128:$src1, (scalar_to_vector FR32:$src2))),
+ (MOVSSrr VR128:$src1, FR32:$src2)>;
+def : Pat<(v4i32 (X86Movss VR128:$src1, VR128:$src2)),
+ (MOVSSrr (v4i32 VR128:$src1),
+ (EXTRACT_SUBREG (v4i32 VR128:$src2), sub_ss))>;
+def : Pat<(v4f32 (X86Movss VR128:$src1, VR128:$src2)),
+ (MOVSSrr (v4f32 VR128:$src1),
+ (EXTRACT_SUBREG (v4f32 VR128:$src2), sub_ss))>;
+// FIXME: Instead of a X86Movss there should be a X86Movlps here, the problem
+// is during lowering, where it's not possible to recognize the load fold cause
+// it has two uses through a bitcast. One use disappears at isel time and the
+// fold opportunity reappears.
+def : Pat<(X86Movss VR128:$src1,
+ (bc_v4i32 (v2i64 (load addr:$src2)))),
+ (MOVLPSrm VR128:$src1, addr:$src2)>;
+
+// Shuffle with MOVSD
+def : Pat<(v2f64 (X86Movsd VR128:$src1, (scalar_to_vector FR64:$src2))),
+ (MOVSDrr VR128:$src1, FR64:$src2)>;
+def : Pat<(v2i64 (X86Movsd VR128:$src1, VR128:$src2)),
+ (MOVSDrr (v2i64 VR128:$src1),
+ (EXTRACT_SUBREG (v2i64 VR128:$src2), sub_sd))>;
+def : Pat<(v2f64 (X86Movsd VR128:$src1, VR128:$src2)),
+ (MOVSDrr (v2f64 VR128:$src1),
+ (EXTRACT_SUBREG (v2f64 VR128:$src2), sub_sd))>;
+def : Pat<(v4f32 (X86Movsd VR128:$src1, VR128:$src2)),
+ (MOVSDrr VR128:$src1, (EXTRACT_SUBREG (v4f32 VR128:$src2), sub_sd))>;
+def : Pat<(v4i32 (X86Movsd VR128:$src1, VR128:$src2)),
+ (MOVSDrr VR128:$src1, (EXTRACT_SUBREG (v4i32 VR128:$src2), sub_sd))>;
+
+// Shuffle with MOVSHDUP
+def : Pat<(v4i32 (X86Movshdup VR128:$src)),
+ (MOVSHDUPrr VR128:$src)>;
+def : Pat<(X86Movshdup (bc_v4i32 (memopv2i64 addr:$src))),
+ (MOVSHDUPrm addr:$src)>;
+
+def : Pat<(v4f32 (X86Movshdup VR128:$src)),
+ (MOVSHDUPrr VR128:$src)>;
+def : Pat<(X86Movshdup (memopv4f32 addr:$src)),
+ (MOVSHDUPrm addr:$src)>;
+
+// Shuffle with MOVSLDUP
+def : Pat<(v4i32 (X86Movsldup VR128:$src)),
+ (MOVSLDUPrr VR128:$src)>;
+def : Pat<(X86Movsldup (bc_v4i32 (memopv2i64 addr:$src))),
+ (MOVSLDUPrm addr:$src)>;
+
+def : Pat<(v4f32 (X86Movsldup VR128:$src)),
+ (MOVSLDUPrr VR128:$src)>;
+def : Pat<(X86Movsldup (memopv4f32 addr:$src)),
+ (MOVSLDUPrm addr:$src)>;
+
+// Shuffle with PSHUFHW
+def : Pat<(v8i16 (X86PShufhwLd addr:$src, (i8 imm:$imm))),
+ (PSHUFHWmi addr:$src, imm:$imm)>;
+def : Pat<(v8i16 (X86PShufhw VR128:$src, (i8 imm:$imm))),
+ (PSHUFHWri VR128:$src, imm:$imm)>;
+def : Pat<(v8i16 (X86PShufhw (bc_v8i16 (memopv2i64 addr:$src)), (i8 imm:$imm))),
+ (PSHUFHWmi addr:$src, imm:$imm)>;
+
+// Shuffle with PSHUFLW
+def : Pat<(v8i16 (X86PShuflwLd addr:$src, (i8 imm:$imm))),
+ (PSHUFLWmi addr:$src, imm:$imm)>;
+def : Pat<(v8i16 (X86PShuflw VR128:$src, (i8 imm:$imm))),
+ (PSHUFLWri VR128:$src, imm:$imm)>;
+def : Pat<(v8i16 (X86PShuflw (bc_v8i16 (memopv2i64 addr:$src)), (i8 imm:$imm))),
+ (PSHUFLWmi addr:$src, imm:$imm)>;
+
+// Shuffle with PALIGN
+def : Pat<(v1i64 (X86PAlign VR64:$src1, VR64:$src2, (i8 imm:$imm))),
+ (PALIGNR64rr VR64:$src2, VR64:$src1, imm:$imm)>;
+def : Pat<(v2i32 (X86PAlign VR64:$src1, VR64:$src2, (i8 imm:$imm))),
+ (PALIGNR64rr VR64:$src2, VR64:$src1, imm:$imm)>;
+def : Pat<(v4i16 (X86PAlign VR64:$src1, VR64:$src2, (i8 imm:$imm))),
+ (PALIGNR64rr VR64:$src2, VR64:$src1, imm:$imm)>;
+def : Pat<(v8i8 (X86PAlign VR64:$src1, VR64:$src2, (i8 imm:$imm))),
+ (PALIGNR64rr VR64:$src2, VR64:$src1, imm:$imm)>;
+
+def : Pat<(v4i32 (X86PAlign VR128:$src1, VR128:$src2, (i8 imm:$imm))),
+ (PALIGNR128rr VR128:$src2, VR128:$src1, imm:$imm)>;
+def : Pat<(v4f32 (X86PAlign VR128:$src1, VR128:$src2, (i8 imm:$imm))),
+ (PALIGNR128rr VR128:$src2, VR128:$src1, imm:$imm)>;
+def : Pat<(v8i16 (X86PAlign VR128:$src1, VR128:$src2, (i8 imm:$imm))),
+ (PALIGNR128rr VR128:$src2, VR128:$src1, imm:$imm)>;
+def : Pat<(v16i8 (X86PAlign VR128:$src1, VR128:$src2, (i8 imm:$imm))),
+ (PALIGNR128rr VR128:$src2, VR128:$src1, imm:$imm)>;
+
+// Shuffle with MOVLPS
+def : Pat<(v4f32 (X86Movlps VR128:$src1, (load addr:$src2))),
+ (MOVLPSrm VR128:$src1, addr:$src2)>;
+def : Pat<(v4i32 (X86Movlps VR128:$src1, (load addr:$src2))),
+ (MOVLPSrm VR128:$src1, addr:$src2)>;
+def : Pat<(X86Movlps VR128:$src1,
+ (bc_v4f32 (v2f64 (scalar_to_vector (loadf64 addr:$src2))))),
+ (MOVLPSrm VR128:$src1, addr:$src2)>;
+
+// Shuffle with MOVLPD
+def : Pat<(v2f64 (X86Movlpd VR128:$src1, (load addr:$src2))),
+ (MOVLPDrm VR128:$src1, addr:$src2)>;
+def : Pat<(v2i64 (X86Movlpd VR128:$src1, (load addr:$src2))),
+ (MOVLPDrm VR128:$src1, addr:$src2)>;
+def : Pat<(v2f64 (X86Movlpd VR128:$src1,
+ (scalar_to_vector (loadf64 addr:$src2)))),
+ (MOVLPDrm VR128:$src1, addr:$src2)>;
+
+// Extra patterns to match stores with MOVHPS/PD and MOVLPS/PD
+def : Pat<(store (f64 (vector_extract
+ (v2f64 (X86Unpckhps VR128:$src, (undef))), (iPTR 0))),addr:$dst),
+ (MOVHPSmr addr:$dst, VR128:$src)>;
+def : Pat<(store (f64 (vector_extract
+ (v2f64 (X86Unpckhpd VR128:$src, (undef))), (iPTR 0))),addr:$dst),
+ (MOVHPDmr addr:$dst, VR128:$src)>;
+
+def : Pat<(store (v4f32 (X86Movlps (load addr:$src1), VR128:$src2)),addr:$src1),
+ (MOVLPSmr addr:$src1, VR128:$src2)>;
+def : Pat<(store (v4i32 (X86Movlps
+ (bc_v4i32 (loadv2i64 addr:$src1)), VR128:$src2)), addr:$src1),
+ (MOVLPSmr addr:$src1, VR128:$src2)>;
+
+def : Pat<(store (v2f64 (X86Movlpd (load addr:$src1), VR128:$src2)),addr:$src1),
+ (MOVLPDmr addr:$src1, VR128:$src2)>;
+def : Pat<(store (v2i64 (X86Movlpd (load addr:$src1), VR128:$src2)),addr:$src1),
+ (MOVLPDmr addr:$src1, VR128:$src2)>;
diff --git a/lib/Target/X86/X86MCAsmInfo.cpp b/lib/Target/X86/X86MCAsmInfo.cpp
index 2b8720bac343..36badb403e81 100644
--- a/lib/Target/X86/X86MCAsmInfo.cpp
+++ b/lib/Target/X86/X86MCAsmInfo.cpp
@@ -103,6 +103,9 @@ getNonexecutableStackSection(MCContext &Ctx) const {
}
X86MCAsmInfoCOFF::X86MCAsmInfoCOFF(const Triple &Triple) {
+ if (Triple.getArch() == Triple::x86_64)
+ GlobalPrefix = "";
+
AsmTransCBE = x86_asm_table;
AssemblerDialect = AsmWriterFlavor;
diff --git a/lib/Target/X86/X86MCCodeEmitter.cpp b/lib/Target/X86/X86MCCodeEmitter.cpp
index 23b0666f5f30..9564fe0b92d4 100644
--- a/lib/Target/X86/X86MCCodeEmitter.cpp
+++ b/lib/Target/X86/X86MCCodeEmitter.cpp
@@ -365,7 +365,7 @@ void X86MCCodeEmitter::EmitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte,
const TargetInstrDesc &Desc,
raw_ostream &OS) const {
bool HasVEX_4V = false;
- if (TSFlags & X86II::VEX_4V)
+ if ((TSFlags >> 32) & X86II::VEX_4V)
HasVEX_4V = true;
// VEX_R: opcode externsion equivalent to REX.R in
@@ -429,10 +429,10 @@ void X86MCCodeEmitter::EmitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte,
if (TSFlags & X86II::OpSize)
VEX_PP = 0x01;
- if (TSFlags & X86II::VEX_W)
+ if ((TSFlags >> 32) & X86II::VEX_W)
VEX_W = 1;
- if (TSFlags & X86II::VEX_L)
+ if ((TSFlags >> 32) & X86II::VEX_L)
VEX_L = 1;
switch (TSFlags & X86II::Op0Mask) {
@@ -469,33 +469,39 @@ void X86MCCodeEmitter::EmitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte,
unsigned NumOps = MI.getNumOperands();
unsigned CurOp = 0;
+ bool IsDestMem = false;
switch (TSFlags & X86II::FormMask) {
case X86II::MRMInitReg: assert(0 && "FIXME: Remove this!");
+ case X86II::MRMDestMem:
+ IsDestMem = true;
+ // The important info for the VEX prefix is never beyond the address
+ // registers. Don't check beyond that.
+ NumOps = CurOp = X86::AddrNumOperands;
case X86II::MRM0m: case X86II::MRM1m:
case X86II::MRM2m: case X86II::MRM3m:
case X86II::MRM4m: case X86II::MRM5m:
case X86II::MRM6m: case X86II::MRM7m:
- case X86II::MRMDestMem:
- NumOps = CurOp = X86::AddrNumOperands;
case X86II::MRMSrcMem:
case X86II::MRMSrcReg:
if (MI.getNumOperands() > CurOp && MI.getOperand(CurOp).isReg() &&
X86InstrInfo::isX86_64ExtendedReg(MI.getOperand(CurOp).getReg()))
VEX_R = 0x0;
-
- // CurOp and NumOps are equal when VEX_R represents a register used
- // to index a memory destination (which is the last operand)
- CurOp = (CurOp == NumOps) ? 0 : CurOp+1;
+ CurOp++;
if (HasVEX_4V) {
- VEX_4V = getVEXRegisterEncoding(MI, CurOp);
+ VEX_4V = getVEXRegisterEncoding(MI, IsDestMem ? CurOp-1 : CurOp);
CurOp++;
}
+ // To only check operands before the memory address ones, start
+ // the search from the begining
+ if (IsDestMem)
+ CurOp = 0;
+
// If the last register should be encoded in the immediate field
// do not use any bit from VEX prefix to this register, ignore it
- if (TSFlags & X86II::VEX_I8IMM)
+ if ((TSFlags >> 32) & X86II::VEX_I8IMM)
NumOps--;
for (; CurOp != NumOps; ++CurOp) {
@@ -508,7 +514,10 @@ void X86MCCodeEmitter::EmitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte,
VEX_X = 0x0;
}
break;
- default: // MRMDestReg, MRM0r-MRM7r
+ default: // MRMDestReg, MRM0r-MRM7r, RawFrm
+ if (!MI.getNumOperands())
+ break;
+
if (MI.getOperand(CurOp).isReg() &&
X86InstrInfo::isX86_64ExtendedReg(MI.getOperand(CurOp).getReg()))
VEX_B = 0;
@@ -524,7 +533,6 @@ void X86MCCodeEmitter::EmitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte,
VEX_R = 0x0;
}
break;
- assert(0 && "Not implemented!");
}
// Emit segment override opcode prefix as needed.
@@ -793,9 +801,9 @@ EncodeInstruction(const MCInst &MI, raw_ostream &OS,
// It uses the VEX.VVVV field?
bool HasVEX_4V = false;
- if (TSFlags & X86II::VEX)
+ if ((TSFlags >> 32) & X86II::VEX)
HasVEXPrefix = true;
- if (TSFlags & X86II::VEX_4V)
+ if ((TSFlags >> 32) & X86II::VEX_4V)
HasVEX_4V = true;
// Determine where the memory operand starts, if present.
@@ -819,6 +827,14 @@ EncodeInstruction(const MCInst &MI, raw_ostream &OS,
case X86II::RawFrm:
EmitByte(BaseOpcode, CurByte, OS);
break;
+
+ case X86II::RawFrmImm16:
+ EmitByte(BaseOpcode, CurByte, OS);
+ EmitImmediate(MI.getOperand(CurOp++),
+ X86II::getSizeOfImm(TSFlags), getImmFixupKind(TSFlags),
+ CurByte, OS, Fixups);
+ EmitImmediate(MI.getOperand(CurOp++), 2, FK_Data_2, CurByte, OS, Fixups);
+ break;
case X86II::AddRegFrm:
EmitByte(BaseOpcode + GetX86RegNum(MI.getOperand(CurOp++)), CurByte, OS);
@@ -833,10 +849,15 @@ EncodeInstruction(const MCInst &MI, raw_ostream &OS,
case X86II::MRMDestMem:
EmitByte(BaseOpcode, CurByte, OS);
+ SrcRegNum = CurOp + X86::AddrNumOperands;
+
+ if (HasVEX_4V) // Skip 1st src (which is encoded in VEX_VVVV)
+ SrcRegNum++;
+
EmitMemModRMByte(MI, CurOp,
- GetX86RegNum(MI.getOperand(CurOp + X86::AddrNumOperands)),
+ GetX86RegNum(MI.getOperand(SrcRegNum)),
TSFlags, CurByte, OS, Fixups);
- CurOp += X86::AddrNumOperands + 1;
+ CurOp = SrcRegNum + 1;
break;
case X86II::MRMSrcReg:
@@ -934,7 +955,7 @@ EncodeInstruction(const MCInst &MI, raw_ostream &OS,
if (CurOp != NumOps) {
// The last source register of a 4 operand instruction in AVX is encoded
// in bits[7:4] of a immediate byte, and bits[3:0] are ignored.
- if (TSFlags & X86II::VEX_I8IMM) {
+ if ((TSFlags >> 32) & X86II::VEX_I8IMM) {
const MCOperand &MO = MI.getOperand(CurOp++);
bool IsExtReg =
X86InstrInfo::isX86_64ExtendedReg(MO.getReg());
diff --git a/lib/Target/X86/AsmPrinter/X86MCInstLower.cpp b/lib/Target/X86/X86MCInstLower.cpp
index e67fc06a6cd7..8c4620f92177 100644
--- a/lib/Target/X86/AsmPrinter/X86MCInstLower.cpp
+++ b/lib/Target/X86/X86MCInstLower.cpp
@@ -16,7 +16,6 @@
#include "X86AsmPrinter.h"
#include "X86COFFMachineModuleInfo.h"
#include "X86MCAsmInfo.h"
-#include "llvm/Analysis/DebugInfo.h"
#include "llvm/CodeGen/MachineModuleInfoImpls.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCExpr.h"
@@ -29,21 +28,19 @@
#include "llvm/Type.h"
using namespace llvm;
-
-const X86Subtarget &X86MCInstLower::getSubtarget() const {
- return AsmPrinter.getSubtarget();
-}
+X86MCInstLower::X86MCInstLower(Mangler *mang, const MachineFunction &mf,
+ X86AsmPrinter &asmprinter)
+: Ctx(mf.getContext()), Mang(mang), MF(mf), TM(mf.getTarget()),
+ MAI(*TM.getMCAsmInfo()), AsmPrinter(asmprinter) {}
MachineModuleInfoMachO &X86MCInstLower::getMachOMMI() const {
- assert(getSubtarget().isTargetDarwin() &&"Can only get MachO info on darwin");
- return AsmPrinter.MMI->getObjFileInfo<MachineModuleInfoMachO>();
+ return MF.getMMI().getObjFileInfo<MachineModuleInfoMachO>();
}
MCSymbol *X86MCInstLower::GetPICBaseSymbol() const {
- const TargetLowering *TLI = AsmPrinter.TM.getTargetLowering();
- return static_cast<const X86TargetLowering*>(TLI)->
- getPICBaseSymbol(AsmPrinter.MF, Ctx);
+ return static_cast<const X86TargetLowering*>(TM.getTargetLowering())->
+ getPICBaseSymbol(&MF, Ctx);
}
/// GetSymbolFromOperand - Lower an MO_GlobalAddress or MO_ExternalSymbol
@@ -56,7 +53,7 @@ GetSymbolFromOperand(const MachineOperand &MO) const {
if (!MO.isGlobal()) {
assert(MO.isSymbol());
- Name += AsmPrinter.MAI->getGlobalPrefix();
+ Name += MAI.getGlobalPrefix();
Name += MO.getSymbolName();
} else {
const GlobalValue *GV = MO.getGlobal();
@@ -91,7 +88,7 @@ GetSymbolFromOperand(const MachineOperand &MO) const {
assert(MO.isGlobal() && "Extern symbol not handled yet");
StubSym =
MachineModuleInfoImpl::
- StubValueTy(AsmPrinter.Mang->getSymbol(MO.getGlobal()),
+ StubValueTy(Mang->getSymbol(MO.getGlobal()),
!MO.getGlobal()->hasInternalLinkage());
}
return Sym;
@@ -105,7 +102,7 @@ GetSymbolFromOperand(const MachineOperand &MO) const {
assert(MO.isGlobal() && "Extern symbol not handled yet");
StubSym =
MachineModuleInfoImpl::
- StubValueTy(AsmPrinter.Mang->getSymbol(MO.getGlobal()),
+ StubValueTy(Mang->getSymbol(MO.getGlobal()),
!MO.getGlobal()->hasInternalLinkage());
}
return Sym;
@@ -121,7 +118,7 @@ GetSymbolFromOperand(const MachineOperand &MO) const {
if (MO.isGlobal()) {
StubSym =
MachineModuleInfoImpl::
- StubValueTy(AsmPrinter.Mang->getSymbol(MO.getGlobal()),
+ StubValueTy(Mang->getSymbol(MO.getGlobal()),
!MO.getGlobal()->hasInternalLinkage());
} else {
Name.erase(Name.end()-5, Name.end());
@@ -178,7 +175,7 @@ MCOperand X86MCInstLower::LowerSymbolOperand(const MachineOperand &MO,
Expr = MCBinaryExpr::CreateSub(Expr,
MCSymbolRefExpr::Create(GetPICBaseSymbol(), Ctx),
Ctx);
- if (MO.isJTI() && AsmPrinter.MAI->hasSetDirective()) {
+ if (MO.isJTI() && MAI.hasSetDirective()) {
// If .set directive is supported, use it to reduce the number of
// relocations the assembler will generate for differences between
// local labels. This is only safe when the symbols are in the same
@@ -255,7 +252,13 @@ static void SimplifyShortImmForm(MCInst &Inst, unsigned Opcode) {
}
/// \brief Simplify things like MOV32rm to MOV32o32a.
-static void SimplifyShortMoveForm(MCInst &Inst, unsigned Opcode) {
+static void SimplifyShortMoveForm(X86AsmPrinter &Printer, MCInst &Inst,
+ unsigned Opcode) {
+ // Don't make these simplifications in 64-bit mode; other assemblers don't
+ // perform them because they make the code larger.
+ if (Printer.getSubtarget().is64Bit())
+ return;
+
bool IsStore = Inst.getOperand(0).isReg() && Inst.getOperand(1).isReg();
unsigned AddrBase = IsStore;
unsigned RegOp = IsStore ? 0 : 5;
@@ -336,7 +339,7 @@ void X86MCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const {
break;
case MachineOperand::MO_BlockAddress:
MCOp = LowerSymbolOperand(MO,
- AsmPrinter.GetBlockAddressSymbol(MO.getBlockAddress()));
+ AsmPrinter.GetBlockAddressSymbol(MO.getBlockAddress()));
break;
}
@@ -377,12 +380,17 @@ void X86MCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const {
case X86::MMX_V_SET0: LowerUnaryToTwoAddr(OutMI, X86::MMX_PXORrr); break;
case X86::MMX_V_SETALLONES:
LowerUnaryToTwoAddr(OutMI, X86::MMX_PCMPEQDrr); break;
- case X86::FsFLD0SS: LowerUnaryToTwoAddr(OutMI, X86::PXORrr); break;
- case X86::FsFLD0SD: LowerUnaryToTwoAddr(OutMI, X86::PXORrr); break;
- case X86::V_SET0PS: LowerUnaryToTwoAddr(OutMI, X86::XORPSrr); break;
- case X86::V_SET0PD: LowerUnaryToTwoAddr(OutMI, X86::XORPDrr); break;
- case X86::V_SET0PI: LowerUnaryToTwoAddr(OutMI, X86::PXORrr); break;
- case X86::V_SETALLONES: LowerUnaryToTwoAddr(OutMI, X86::PCMPEQDrr); break;
+ case X86::FsFLD0SS: LowerUnaryToTwoAddr(OutMI, X86::PXORrr); break;
+ case X86::FsFLD0SD: LowerUnaryToTwoAddr(OutMI, X86::PXORrr); break;
+ case X86::V_SET0PS: LowerUnaryToTwoAddr(OutMI, X86::XORPSrr); break;
+ case X86::V_SET0PD: LowerUnaryToTwoAddr(OutMI, X86::XORPDrr); break;
+ case X86::V_SET0PI: LowerUnaryToTwoAddr(OutMI, X86::PXORrr); break;
+ case X86::V_SETALLONES: LowerUnaryToTwoAddr(OutMI, X86::PCMPEQDrr); break;
+ case X86::AVX_SET0PS: LowerUnaryToTwoAddr(OutMI, X86::VXORPSrr); break;
+ case X86::AVX_SET0PSY: LowerUnaryToTwoAddr(OutMI, X86::VXORPSYrr); break;
+ case X86::AVX_SET0PD: LowerUnaryToTwoAddr(OutMI, X86::VXORPDrr); break;
+ case X86::AVX_SET0PDY: LowerUnaryToTwoAddr(OutMI, X86::VXORPDYrr); break;
+ case X86::AVX_SET0PI: LowerUnaryToTwoAddr(OutMI, X86::VPXORrr); break;
case X86::MOV16r0:
LowerSubReg32_Op0(OutMI, X86::MOV32r0); // MOV16r0 -> MOV32r0
@@ -393,12 +401,14 @@ void X86MCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const {
LowerUnaryToTwoAddr(OutMI, X86::XOR32rr); // MOV32r0 -> XOR32rr
break;
- // TAILJMPr64, CALL64r, CALL64pcrel32 - These instructions have
+ // TAILJMPr64, [WIN]CALL64r, [WIN]CALL64pcrel32 - These instructions have
// register inputs modeled as normal uses instead of implicit uses. As such,
// truncate off all but the first operand (the callee). FIXME: Change isel.
case X86::TAILJMPr64:
case X86::CALL64r:
- case X86::CALL64pcrel32: {
+ case X86::CALL64pcrel32:
+ case X86::WINCALL64r:
+ case X86::WINCALL64pcrel32: {
unsigned Opcode = OutMI.getOpcode();
MCOperand Saved = OutMI.getOperand(0);
OutMI = MCInst();
@@ -456,15 +466,13 @@ void X86MCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const {
// MOV64ao8, MOV64o8a
// XCHG16ar, XCHG32ar, XCHG64ar
case X86::MOV8mr_NOREX:
- case X86::MOV8mr: SimplifyShortMoveForm(OutMI, X86::MOV8ao8); break;
+ case X86::MOV8mr: SimplifyShortMoveForm(AsmPrinter, OutMI, X86::MOV8ao8); break;
case X86::MOV8rm_NOREX:
- case X86::MOV8rm: SimplifyShortMoveForm(OutMI, X86::MOV8o8a); break;
- case X86::MOV16mr: SimplifyShortMoveForm(OutMI, X86::MOV16ao16); break;
- case X86::MOV16rm: SimplifyShortMoveForm(OutMI, X86::MOV16o16a); break;
- case X86::MOV32mr: SimplifyShortMoveForm(OutMI, X86::MOV32ao32); break;
- case X86::MOV32rm: SimplifyShortMoveForm(OutMI, X86::MOV32o32a); break;
- case X86::MOV64mr: SimplifyShortMoveForm(OutMI, X86::MOV64ao64); break;
- case X86::MOV64rm: SimplifyShortMoveForm(OutMI, X86::MOV64o64a); break;
+ case X86::MOV8rm: SimplifyShortMoveForm(AsmPrinter, OutMI, X86::MOV8o8a); break;
+ case X86::MOV16mr: SimplifyShortMoveForm(AsmPrinter, OutMI, X86::MOV16ao16); break;
+ case X86::MOV16rm: SimplifyShortMoveForm(AsmPrinter, OutMI, X86::MOV16o16a); break;
+ case X86::MOV32mr: SimplifyShortMoveForm(AsmPrinter, OutMI, X86::MOV32ao32); break;
+ case X86::MOV32rm: SimplifyShortMoveForm(AsmPrinter, OutMI, X86::MOV32o32a); break;
case X86::ADC8ri: SimplifyShortImmForm(OutMI, X86::ADC8i8); break;
case X86::ADC16ri: SimplifyShortImmForm(OutMI, X86::ADC16i16); break;
@@ -505,46 +513,9 @@ void X86MCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const {
}
}
-void X86AsmPrinter::PrintDebugValueComment(const MachineInstr *MI,
- raw_ostream &O) {
- // Only the target-dependent form of DBG_VALUE should get here.
- // Referencing the offset and metadata as NOps-2 and NOps-1 is
- // probably portable to other targets; frame pointer location is not.
- unsigned NOps = MI->getNumOperands();
- assert(NOps==7);
- O << '\t' << MAI->getCommentString() << "DEBUG_VALUE: ";
- // cast away const; DIetc do not take const operands for some reason.
- DIVariable V(const_cast<MDNode *>(MI->getOperand(NOps-1).getMetadata()));
- if (V.getContext().isSubprogram())
- O << DISubprogram(V.getContext()).getDisplayName() << ":";
- O << V.getName();
- O << " <- ";
- // Frame address. Currently handles register +- offset only.
- O << '[';
- if (MI->getOperand(0).isReg() && MI->getOperand(0).getReg())
- printOperand(MI, 0, O);
- else
- O << "undef";
- O << '+'; printOperand(MI, 3, O);
- O << ']';
- O << "+";
- printOperand(MI, NOps-2, O);
-}
-
-MachineLocation
-X86AsmPrinter::getDebugValueLocation(const MachineInstr *MI) const {
- MachineLocation Location;
- assert (MI->getNumOperands() == 7 && "Invalid no. of machine operands!");
- // Frame address. Currently handles register +- offset only.
-
- if (MI->getOperand(0).isReg() && MI->getOperand(3).isImm())
- Location.set(MI->getOperand(0).getReg(), MI->getOperand(3).getImm());
- return Location;
-}
-
void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) {
- X86MCInstLower MCInstLowering(OutContext, Mang, *this);
+ X86MCInstLower MCInstLowering(Mang, *MF, *this);
switch (MI->getOpcode()) {
case TargetOpcode::DBG_VALUE:
if (isVerbose() && OutStreamer.hasRawTextSupport()) {
@@ -555,6 +526,12 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) {
}
return;
+ // Emit nothing here but a comment if we can.
+ case X86::Int_MemBarrier:
+ if (OutStreamer.hasRawTextSupport())
+ OutStreamer.EmitRawText(StringRef("\t#MEMBARRIER"));
+ return;
+
case X86::TAILJMPr:
case X86::TAILJMPd:
case X86::TAILJMPd64:
diff --git a/lib/Target/X86/AsmPrinter/X86MCInstLower.h b/lib/Target/X86/X86MCInstLower.h
index 9e5474fc81b3..539b09be6fd7 100644
--- a/lib/Target/X86/AsmPrinter/X86MCInstLower.h
+++ b/lib/Target/X86/X86MCInstLower.h
@@ -13,27 +13,30 @@
#include "llvm/Support/Compiler.h"
namespace llvm {
+ class MCAsmInfo;
class MCContext;
class MCInst;
class MCOperand;
class MCSymbol;
class MachineInstr;
+ class MachineFunction;
class MachineModuleInfoMachO;
class MachineOperand;
class Mangler;
+ class TargetMachine;
class X86AsmPrinter;
- class X86Subtarget;
/// X86MCInstLower - This class is used to lower an MachineInstr into an MCInst.
class LLVM_LIBRARY_VISIBILITY X86MCInstLower {
MCContext &Ctx;
Mangler *Mang;
+ const MachineFunction &MF;
+ const TargetMachine &TM;
+ const MCAsmInfo &MAI;
X86AsmPrinter &AsmPrinter;
-
- const X86Subtarget &getSubtarget() const;
public:
- X86MCInstLower(MCContext &ctx, Mangler *mang, X86AsmPrinter &asmprinter)
- : Ctx(ctx), Mang(mang), AsmPrinter(asmprinter) {}
+ X86MCInstLower(Mangler *mang, const MachineFunction &MF,
+ X86AsmPrinter &asmprinter);
void Lower(const MachineInstr *MI, MCInst &OutMI) const;
diff --git a/lib/Target/X86/X86RegisterInfo.cpp b/lib/Target/X86/X86RegisterInfo.cpp
index 5f31e00ebabd..fedd49ebb540 100644
--- a/lib/Target/X86/X86RegisterInfo.cpp
+++ b/lib/Target/X86/X86RegisterInfo.cpp
@@ -38,8 +38,15 @@
#include "llvm/ADT/BitVector.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/CommandLine.h"
using namespace llvm;
+static cl::opt<bool>
+ForceStackAlign("force-align-stack",
+ cl::desc("Force align the stack to the minimum alignment"
+ " needed for the function."),
+ cl::init(false), cl::Hidden);
+
X86RegisterInfo::X86RegisterInfo(X86TargetMachine &tm,
const TargetInstrInfo &tii)
: X86GenRegisterInfo(tm.getSubtarget<X86Subtarget>().is64Bit() ?
@@ -193,6 +200,12 @@ unsigned X86RegisterInfo::getX86RegNum(unsigned RegNo) {
case X86::DR7:
return 7;
+ // Pseudo index registers are equivalent to a "none"
+ // scaled index (See Intel Manual 2A, table 2-3)
+ case X86::EIZ:
+ case X86::RIZ:
+ return 4;
+
default:
assert(isVirtualRegister(RegNo) && "Unknown physical register!");
llvm_unreachable("Register allocator hasn't allocated reg correctly yet!");
@@ -456,26 +469,29 @@ bool X86RegisterInfo::canRealignStack(const MachineFunction &MF) const {
bool X86RegisterInfo::needsStackRealignment(const MachineFunction &MF) const {
const MachineFrameInfo *MFI = MF.getFrameInfo();
const Function *F = MF.getFunction();
- bool requiresRealignment =
- RealignStack && ((MFI->getMaxAlignment() > StackAlign) ||
- F->hasFnAttr(Attribute::StackAlignment));
+ bool requiresRealignment = ((MFI->getMaxAlignment() > StackAlign) ||
+ F->hasFnAttr(Attribute::StackAlignment));
// FIXME: Currently we don't support stack realignment for functions with
// variable-sized allocas.
- // FIXME: Temporary disable the error - it seems to be too conservative.
+ // FIXME: It's more complicated than this...
if (0 && requiresRealignment && MFI->hasVarSizedObjects())
report_fatal_error(
"Stack realignment in presense of dynamic allocas is not supported");
-
- return (requiresRealignment && !MFI->hasVarSizedObjects());
+
+ // If we've requested that we force align the stack do so now.
+ if (ForceStackAlign)
+ return canRealignStack(MF);
+
+ return requiresRealignment && canRealignStack(MF);
}
-bool X86RegisterInfo::hasReservedCallFrame(MachineFunction &MF) const {
+bool X86RegisterInfo::hasReservedCallFrame(const MachineFunction &MF) const {
return !MF.getFrameInfo()->hasVarSizedObjects();
}
-bool X86RegisterInfo::hasReservedSpillSlot(MachineFunction &MF, unsigned Reg,
- int &FrameIdx) const {
+bool X86RegisterInfo::hasReservedSpillSlot(const MachineFunction &MF,
+ unsigned Reg, int &FrameIdx) const {
if (Reg == FramePtr && hasFP(MF)) {
FrameIdx = MF.getFrameInfo()->getObjectIndexBegin();
return true;
@@ -610,10 +626,9 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
MBB.erase(I);
}
-unsigned
+void
X86RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
- int SPAdj, FrameIndexValue *Value,
- RegScavenger *RS) const{
+ int SPAdj, RegScavenger *RS) const{
assert(SPAdj == 0 && "Unexpected");
unsigned i = 0;
@@ -660,7 +675,6 @@ X86RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
uint64_t Offset = FIOffset + (uint64_t)MI.getOperand(i+3).getOffset();
MI.getOperand(i+3).setOffset(Offset);
}
- return 0;
}
void
@@ -750,7 +764,7 @@ void mergeSPUpdatesUp(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI,
}
}
-/// mergeSPUpdatesUp - Merge two stack-manipulating instructions lower iterator.
+/// mergeSPUpdatesDown - Merge two stack-manipulating instructions lower iterator.
static
void mergeSPUpdatesDown(MachineBasicBlock &MBB,
MachineBasicBlock::iterator &MBBI,
@@ -901,6 +915,17 @@ void X86RegisterInfo::emitPrologue(MachineFunction &MF) const {
bool HasFP = hasFP(MF);
DebugLoc DL;
+ // If we're forcing a stack realignment we can't rely on just the frame
+ // info, we need to know the ABI stack alignment as well in case we
+ // have a call out. Otherwise just make sure we have some alignment - we'll
+ // go with the minimum SlotSize.
+ if (ForceStackAlign) {
+ if (MFI->hasCalls())
+ MaxAlign = (StackAlign > MaxAlign) ? StackAlign : MaxAlign;
+ else if (MaxAlign < SlotSize)
+ MaxAlign = SlotSize;
+ }
+
// Add RETADDR move area to callee saved frame size.
int TailCallReturnAddrDelta = X86FI->getTCReturnAddrDelta();
if (TailCallReturnAddrDelta < 0)
@@ -979,7 +1004,7 @@ void X86RegisterInfo::emitPrologue(MachineFunction &MF) const {
if (needsFrameMoves) {
// Mark the place where EBP/RBP was saved.
MCSymbol *FrameLabel = MMI.getContext().CreateTempSymbol();
- BuildMI(MBB, MBBI, DL, TII.get(X86::DBG_LABEL)).addSym(FrameLabel);
+ BuildMI(MBB, MBBI, DL, TII.get(X86::PROLOG_LABEL)).addSym(FrameLabel);
// Define the current CFA rule to use the provided offset.
if (StackSize) {
@@ -1007,7 +1032,7 @@ void X86RegisterInfo::emitPrologue(MachineFunction &MF) const {
if (needsFrameMoves) {
// Mark effective beginning of when frame pointer becomes valid.
MCSymbol *FrameLabel = MMI.getContext().CreateTempSymbol();
- BuildMI(MBB, MBBI, DL, TII.get(X86::DBG_LABEL)).addSym(FrameLabel);
+ BuildMI(MBB, MBBI, DL, TII.get(X86::PROLOG_LABEL)).addSym(FrameLabel);
// Define the current CFA to use the EBP/RBP register.
MachineLocation FPDst(FramePtr);
@@ -1047,7 +1072,7 @@ void X86RegisterInfo::emitPrologue(MachineFunction &MF) const {
if (!HasFP && needsFrameMoves) {
// Mark callee-saved push instruction.
MCSymbol *Label = MMI.getContext().CreateTempSymbol();
- BuildMI(MBB, MBBI, DL, TII.get(X86::DBG_LABEL)).addSym(Label);
+ BuildMI(MBB, MBBI, DL, TII.get(X86::PROLOG_LABEL)).addSym(Label);
// Define the current CFA rule to use the provided offset.
unsigned Ptr = StackSize ?
@@ -1062,7 +1087,17 @@ void X86RegisterInfo::emitPrologue(MachineFunction &MF) const {
DL = MBB.findDebugLoc(MBBI);
// Adjust stack pointer: ESP -= numbytes.
- if (NumBytes >= 4096 && Subtarget->isTargetCygMing()) {
+
+ // Windows and cygwin/mingw require a prologue helper routine when allocating
+ // more than 4K bytes on the stack. Windows uses __chkstk and cygwin/mingw
+ // uses __alloca. __alloca and the 32-bit version of __chkstk will probe
+ // the stack and adjust the stack pointer in one go. The 64-bit version
+ // of __chkstk is only responsible for probing the stack. The 64-bit
+ // prologue is responsible for adjusting the stack pointer. Touching the
+ // stack at 4K increments is necessary to ensure that the guard pages used
+ // by the OS virtual memory manager are allocated in correct sequence.
+ if (NumBytes >= 4096 &&
+ (Subtarget->isTargetCygMing() || Subtarget->isTargetWin32())) {
// Check, whether EAX is livein for this function.
bool isEAXAlive = false;
for (MachineRegisterInfo::livein_iterator
@@ -1073,16 +1108,16 @@ void X86RegisterInfo::emitPrologue(MachineFunction &MF) const {
Reg == X86::AH || Reg == X86::AL);
}
- // Function prologue calls _alloca to probe the stack when allocating more
- // than 4k bytes in one go. Touching the stack at 4K increments is necessary
- // to ensure that the guard pages used by the OS virtual memory manager are
- // allocated in correct sequence.
+
+ const char *StackProbeSymbol =
+ Subtarget->isTargetWindows() ? "_chkstk" : "_alloca";
if (!isEAXAlive) {
BuildMI(MBB, MBBI, DL, TII.get(X86::MOV32ri), X86::EAX)
.addImm(NumBytes);
BuildMI(MBB, MBBI, DL, TII.get(X86::CALLpcrel32))
- .addExternalSymbol("_alloca")
- .addReg(StackPtr, RegState::Define | RegState::Implicit);
+ .addExternalSymbol(StackProbeSymbol)
+ .addReg(StackPtr, RegState::Define | RegState::Implicit)
+ .addReg(X86::EFLAGS, RegState::Define | RegState::Implicit);
} else {
// Save EAX
BuildMI(MBB, MBBI, DL, TII.get(X86::PUSH32r))
@@ -1093,8 +1128,9 @@ void X86RegisterInfo::emitPrologue(MachineFunction &MF) const {
BuildMI(MBB, MBBI, DL, TII.get(X86::MOV32ri), X86::EAX)
.addImm(NumBytes - 4);
BuildMI(MBB, MBBI, DL, TII.get(X86::CALLpcrel32))
- .addExternalSymbol("_alloca")
- .addReg(StackPtr, RegState::Define | RegState::Implicit);
+ .addExternalSymbol(StackProbeSymbol)
+ .addReg(StackPtr, RegState::Define | RegState::Implicit)
+ .addReg(X86::EFLAGS, RegState::Define | RegState::Implicit);
// Restore EAX
MachineInstr *MI = addRegOffset(BuildMI(MF, DL, TII.get(X86::MOV32rm),
@@ -1119,7 +1155,7 @@ void X86RegisterInfo::emitPrologue(MachineFunction &MF) const {
if ((NumBytes || PushedRegs) && needsFrameMoves) {
// Mark end of stack pointer adjustment.
MCSymbol *Label = MMI.getContext().CreateTempSymbol();
- BuildMI(MBB, MBBI, DL, TII.get(X86::DBG_LABEL)).addSym(Label);
+ BuildMI(MBB, MBBI, DL, TII.get(X86::PROLOG_LABEL)).addSym(Label);
if (!HasFP && NumBytes) {
// Define the current CFA rule to use the provided offset.
@@ -1172,6 +1208,17 @@ void X86RegisterInfo::emitEpilogue(MachineFunction &MF,
unsigned CSSize = X86FI->getCalleeSavedFrameSize();
uint64_t NumBytes = 0;
+ // If we're forcing a stack realignment we can't rely on just the frame
+ // info, we need to know the ABI stack alignment as well in case we
+ // have a call out. Otherwise just make sure we have some alignment - we'll
+ // go with the minimum.
+ if (ForceStackAlign) {
+ if (MFI->hasCalls())
+ MaxAlign = (StackAlign > MaxAlign) ? StackAlign : MaxAlign;
+ else
+ MaxAlign = MaxAlign ? MaxAlign : 4;
+ }
+
if (hasFP(MF)) {
// Calculate required stack adjustment.
uint64_t FrameSize = StackSize - SlotSize;
@@ -1519,7 +1566,7 @@ unsigned getX86SubSuperRegister(unsigned Reg, EVT VT, bool High) {
namespace {
struct MSAH : public MachineFunctionPass {
static char ID;
- MSAH() : MachineFunctionPass(&ID) {}
+ MSAH() : MachineFunctionPass(ID) {}
virtual bool runOnMachineFunction(MachineFunction &MF) {
const X86TargetMachine *TM =
diff --git a/lib/Target/X86/X86RegisterInfo.h b/lib/Target/X86/X86RegisterInfo.h
index d852bcd2011c..527df05c58fc 100644
--- a/lib/Target/X86/X86RegisterInfo.h
+++ b/lib/Target/X86/X86RegisterInfo.h
@@ -117,18 +117,17 @@ public:
bool needsStackRealignment(const MachineFunction &MF) const;
- bool hasReservedCallFrame(MachineFunction &MF) const;
+ bool hasReservedCallFrame(const MachineFunction &MF) const;
- bool hasReservedSpillSlot(MachineFunction &MF, unsigned Reg,
+ bool hasReservedSpillSlot(const MachineFunction &MF, unsigned Reg,
int &FrameIdx) const;
void eliminateCallFramePseudoInstr(MachineFunction &MF,
MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI) const;
- unsigned eliminateFrameIndex(MachineBasicBlock::iterator MI,
- int SPAdj, FrameIndexValue *Value = NULL,
- RegScavenger *RS = NULL) const;
+ void eliminateFrameIndex(MachineBasicBlock::iterator MI,
+ int SPAdj, RegScavenger *RS = NULL) const;
void processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
RegScavenger *RS = NULL) const;
diff --git a/lib/Target/X86/X86RegisterInfo.td b/lib/Target/X86/X86RegisterInfo.td
index 9f0382e3fae9..95269b15760e 100644
--- a/lib/Target/X86/X86RegisterInfo.td
+++ b/lib/Target/X86/X86RegisterInfo.td
@@ -241,6 +241,10 @@ let Namespace = "X86" in {
def CR6 : Register<"cr6">;
def CR7 : Register<"cr7">;
def CR8 : Register<"cr8">;
+
+ // Pseudo index registers
+ def EIZ : Register<"eiz">;
+ def RIZ : Register<"riz">;
}
@@ -804,7 +808,7 @@ def VR128 : RegisterClass<"X86", [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],128,
}];
}
-def VR256 : RegisterClass<"X86", [v8i32, v4i64, v8f32, v4f64], 256,
+def VR256 : RegisterClass<"X86", [v32i8, v8i32, v4i64, v8f32, v4f64], 256,
[YMM0, YMM1, YMM2, YMM3, YMM4, YMM5, YMM6, YMM7,
YMM8, YMM9, YMM10, YMM11,
YMM12, YMM13, YMM14, YMM15]> {
@@ -829,4 +833,15 @@ def VR256 : RegisterClass<"X86", [v8i32, v4i64, v8f32, v4f64], 256,
// Status flags registers.
def CCR : RegisterClass<"X86", [i32], 32, [EFLAGS]> {
let CopyCost = -1; // Don't allow copying of status registers.
+
+ // EFLAGS is not allocatable.
+ let MethodProtos = [{
+ iterator allocation_order_end(const MachineFunction &MF) const;
+ }];
+ let MethodBodies = [{
+ CCRClass::iterator
+ CCRClass::allocation_order_end(const MachineFunction &MF) const {
+ return allocation_order_begin(MF);
+ }
+ }];
}
diff --git a/lib/Target/X86/X86ShuffleDecode.h b/lib/Target/X86/X86ShuffleDecode.h
new file mode 100644
index 000000000000..df040520bc8f
--- /dev/null
+++ b/lib/Target/X86/X86ShuffleDecode.h
@@ -0,0 +1,155 @@
+//===-- X86ShuffleDecode.h - X86 shuffle decode logic ---------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Define several functions to decode x86 specific shuffle semantics into a
+// generic vector mask.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef X86_SHUFFLE_DECODE_H
+#define X86_SHUFFLE_DECODE_H
+
+#include "llvm/ADT/SmallVector.h"
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+// Vector Mask Decoding
+//===----------------------------------------------------------------------===//
+
+enum {
+ SM_SentinelZero = ~0U
+};
+
+static inline
+void DecodeINSERTPSMask(unsigned Imm, SmallVectorImpl<unsigned> &ShuffleMask) {
+ // Defaults the copying the dest value.
+ ShuffleMask.push_back(0);
+ ShuffleMask.push_back(1);
+ ShuffleMask.push_back(2);
+ ShuffleMask.push_back(3);
+
+ // Decode the immediate.
+ unsigned ZMask = Imm & 15;
+ unsigned CountD = (Imm >> 4) & 3;
+ unsigned CountS = (Imm >> 6) & 3;
+
+ // CountS selects which input element to use.
+ unsigned InVal = 4+CountS;
+ // CountD specifies which element of destination to update.
+ ShuffleMask[CountD] = InVal;
+ // ZMask zaps values, potentially overriding the CountD elt.
+ if (ZMask & 1) ShuffleMask[0] = SM_SentinelZero;
+ if (ZMask & 2) ShuffleMask[1] = SM_SentinelZero;
+ if (ZMask & 4) ShuffleMask[2] = SM_SentinelZero;
+ if (ZMask & 8) ShuffleMask[3] = SM_SentinelZero;
+}
+
+// <3,1> or <6,7,2,3>
+static void DecodeMOVHLPSMask(unsigned NElts,
+ SmallVectorImpl<unsigned> &ShuffleMask) {
+ for (unsigned i = NElts/2; i != NElts; ++i)
+ ShuffleMask.push_back(NElts+i);
+
+ for (unsigned i = NElts/2; i != NElts; ++i)
+ ShuffleMask.push_back(i);
+}
+
+// <0,2> or <0,1,4,5>
+static void DecodeMOVLHPSMask(unsigned NElts,
+ SmallVectorImpl<unsigned> &ShuffleMask) {
+ for (unsigned i = 0; i != NElts/2; ++i)
+ ShuffleMask.push_back(i);
+
+ for (unsigned i = 0; i != NElts/2; ++i)
+ ShuffleMask.push_back(NElts+i);
+}
+
+static void DecodePSHUFMask(unsigned NElts, unsigned Imm,
+ SmallVectorImpl<unsigned> &ShuffleMask) {
+ for (unsigned i = 0; i != NElts; ++i) {
+ ShuffleMask.push_back(Imm % NElts);
+ Imm /= NElts;
+ }
+}
+
+static void DecodePSHUFHWMask(unsigned Imm,
+ SmallVectorImpl<unsigned> &ShuffleMask) {
+ ShuffleMask.push_back(0);
+ ShuffleMask.push_back(1);
+ ShuffleMask.push_back(2);
+ ShuffleMask.push_back(3);
+ for (unsigned i = 0; i != 4; ++i) {
+ ShuffleMask.push_back(4+(Imm & 3));
+ Imm >>= 2;
+ }
+}
+
+static void DecodePSHUFLWMask(unsigned Imm,
+ SmallVectorImpl<unsigned> &ShuffleMask) {
+ for (unsigned i = 0; i != 4; ++i) {
+ ShuffleMask.push_back((Imm & 3));
+ Imm >>= 2;
+ }
+ ShuffleMask.push_back(4);
+ ShuffleMask.push_back(5);
+ ShuffleMask.push_back(6);
+ ShuffleMask.push_back(7);
+}
+
+static void DecodePUNPCKLMask(unsigned NElts,
+ SmallVectorImpl<unsigned> &ShuffleMask) {
+ for (unsigned i = 0; i != NElts/2; ++i) {
+ ShuffleMask.push_back(i);
+ ShuffleMask.push_back(i+NElts);
+ }
+}
+
+static void DecodePUNPCKHMask(unsigned NElts,
+ SmallVectorImpl<unsigned> &ShuffleMask) {
+ for (unsigned i = 0; i != NElts/2; ++i) {
+ ShuffleMask.push_back(i+NElts/2);
+ ShuffleMask.push_back(i+NElts+NElts/2);
+ }
+}
+
+static void DecodeSHUFPSMask(unsigned NElts, unsigned Imm,
+ SmallVectorImpl<unsigned> &ShuffleMask) {
+ // Part that reads from dest.
+ for (unsigned i = 0; i != NElts/2; ++i) {
+ ShuffleMask.push_back(Imm % NElts);
+ Imm /= NElts;
+ }
+ // Part that reads from src.
+ for (unsigned i = 0; i != NElts/2; ++i) {
+ ShuffleMask.push_back(Imm % NElts + NElts);
+ Imm /= NElts;
+ }
+}
+
+static void DecodeUNPCKHPMask(unsigned NElts,
+ SmallVectorImpl<unsigned> &ShuffleMask) {
+ for (unsigned i = 0; i != NElts/2; ++i) {
+ ShuffleMask.push_back(i+NElts/2); // Reads from dest
+ ShuffleMask.push_back(i+NElts+NElts/2); // Reads from src
+ }
+}
+
+
+/// DecodeUNPCKLPMask - This decodes the shuffle masks for unpcklps/unpcklpd
+/// etc. NElts indicates the number of elements in the vector allowing it to
+/// handle different datatypes and vector widths.
+static void DecodeUNPCKLPMask(unsigned NElts,
+ SmallVectorImpl<unsigned> &ShuffleMask) {
+ for (unsigned i = 0; i != NElts/2; ++i) {
+ ShuffleMask.push_back(i); // Reads from dest
+ ShuffleMask.push_back(i+NElts); // Reads from src
+ }
+}
+
+#endif
diff --git a/lib/Target/X86/X86Subtarget.cpp b/lib/Target/X86/X86Subtarget.cpp
index 4a10be518f03..0d02e5ee472b 100644
--- a/lib/Target/X86/X86Subtarget.cpp
+++ b/lib/Target/X86/X86Subtarget.cpp
@@ -73,7 +73,7 @@ ClassifyGlobalReference(const GlobalValue *GV, const TargetMachine &TM) const {
if (GV->hasDefaultVisibility() &&
(isDecl || GV->isWeakForLinker()))
return X86II::MO_GOTPCREL;
- } else {
+ } else if (!isTargetWin64()) {
assert(isTargetELF() && "Unknown rip-relative target");
// Extra load is needed for all externally visible.
@@ -260,9 +260,10 @@ void X86Subtarget::AutoDetectSubtargetFeatures() {
bool IsIntel = memcmp(text.c, "GenuineIntel", 12) == 0;
bool IsAMD = !IsIntel && memcmp(text.c, "AuthenticAMD", 12) == 0;
- HasFMA3 = IsIntel && ((ECX >> 12) & 0x1);
- HasAVX = ((ECX >> 28) & 0x1);
- HasAES = IsIntel && ((ECX >> 25) & 0x1);
+ HasCLMUL = IsIntel && ((ECX >> 1) & 0x1);
+ HasFMA3 = IsIntel && ((ECX >> 12) & 0x1);
+ HasAVX = ((ECX >> 28) & 0x1);
+ HasAES = IsIntel && ((ECX >> 25) & 0x1);
if (IsIntel || IsAMD) {
// Determine if bit test memory instructions are slow.
@@ -291,6 +292,7 @@ X86Subtarget::X86Subtarget(const std::string &TT, const std::string &FS,
, HasSSE4A(false)
, HasAVX(false)
, HasAES(false)
+ , HasCLMUL(false)
, HasFMA3(false)
, HasFMA4(false)
, IsBTMemSlow(false)
diff --git a/lib/Target/X86/X86Subtarget.h b/lib/Target/X86/X86Subtarget.h
index 486dbc4e2e90..0ee91abe21f4 100644
--- a/lib/Target/X86/X86Subtarget.h
+++ b/lib/Target/X86/X86Subtarget.h
@@ -74,6 +74,9 @@ protected:
/// HasAES - Target has AES instructions
bool HasAES;
+ /// HasCLMUL - Target has carry-less multiplication
+ bool HasCLMUL;
+
/// HasFMA3 - Target has 3-operand fused multiply-add
bool HasFMA3;
@@ -149,6 +152,7 @@ public:
bool has3DNowA() const { return X863DNowLevel >= ThreeDNowA; }
bool hasAVX() const { return HasAVX; }
bool hasAES() const { return HasAES; }
+ bool hasCLMUL() const { return HasCLMUL; }
bool hasFMA3() const { return HasFMA3; }
bool hasFMA4() const { return HasFMA4; }
bool isBTMemSlow() const { return IsBTMemSlow; }
@@ -182,6 +186,10 @@ public:
return Is64Bit && (isTargetMingw() || isTargetWindows());
}
+ bool isTargetWin32() const {
+ return !Is64Bit && (isTargetMingw() || isTargetWindows());
+ }
+
std::string getDataLayout() const {
const char *p;
if (is64Bit())
diff --git a/lib/Target/X86/X86TargetMachine.cpp b/lib/Target/X86/X86TargetMachine.cpp
index df00d3ffcc79..ce8636eb72b5 100644
--- a/lib/Target/X86/X86TargetMachine.cpp
+++ b/lib/Target/X86/X86TargetMachine.cpp
@@ -46,8 +46,15 @@ static MCStreamer *createMCStreamer(const Target &T, const std::string &TT,
bool RelaxAll) {
Triple TheTriple(TT);
switch (TheTriple.getOS()) {
- default:
+ case Triple::Darwin:
return createMachOStreamer(Ctx, TAB, _OS, _Emitter, RelaxAll);
+ case Triple::MinGW32:
+ case Triple::MinGW64:
+ case Triple::Cygwin:
+ case Triple::Win32:
+ return createWinCOFFStreamer(Ctx, TAB, *_Emitter, _OS, RelaxAll);
+ default:
+ return createELFStreamer(Ctx, TAB, _OS, _Emitter, RelaxAll);
}
}
@@ -105,15 +112,21 @@ X86TargetMachine::X86TargetMachine(const Target &T, const std::string &TT,
InstrInfo(*this), JITInfo(*this), TLInfo(*this), TSInfo(*this),
ELFWriterInfo(*this) {
DefRelocModel = getRelocationModel();
-
+
// If no relocation model was picked, default as appropriate for the target.
if (getRelocationModel() == Reloc::Default) {
- if (!Subtarget.isTargetDarwin())
- setRelocationModel(Reloc::Static);
- else if (Subtarget.is64Bit())
+ // Darwin defaults to PIC in 64 bit mode and dynamic-no-pic in 32 bit mode.
+ // Win64 requires rip-rel addressing, thus we force it to PIC. Otherwise we
+ // use static relocation model by default.
+ if (Subtarget.isTargetDarwin()) {
+ if (Subtarget.is64Bit())
+ setRelocationModel(Reloc::PIC_);
+ else
+ setRelocationModel(Reloc::DynamicNoPIC);
+ } else if (Subtarget.isTargetWin64())
setRelocationModel(Reloc::PIC_);
else
- setRelocationModel(Reloc::DynamicNoPIC);
+ setRelocationModel(Reloc::Static);
}
assert(getRelocationModel() != Reloc::Default &&
@@ -136,29 +149,27 @@ X86TargetMachine::X86TargetMachine(const Target &T, const std::string &TT,
Subtarget.isTargetDarwin() &&
is64Bit)
setRelocationModel(Reloc::PIC_);
-
+
// Determine the PICStyle based on the target selected.
if (getRelocationModel() == Reloc::Static) {
// Unless we're in PIC or DynamicNoPIC mode, set the PIC style to None.
Subtarget.setPICStyle(PICStyles::None);
+ } else if (Subtarget.is64Bit()) {
+ // PIC in 64 bit mode is always rip-rel.
+ Subtarget.setPICStyle(PICStyles::RIPRel);
} else if (Subtarget.isTargetCygMing()) {
Subtarget.setPICStyle(PICStyles::None);
} else if (Subtarget.isTargetDarwin()) {
- if (Subtarget.is64Bit())
- Subtarget.setPICStyle(PICStyles::RIPRel);
- else if (getRelocationModel() == Reloc::PIC_)
+ if (getRelocationModel() == Reloc::PIC_)
Subtarget.setPICStyle(PICStyles::StubPIC);
else {
assert(getRelocationModel() == Reloc::DynamicNoPIC);
Subtarget.setPICStyle(PICStyles::StubDynamicNoPIC);
}
} else if (Subtarget.isTargetELF()) {
- if (Subtarget.is64Bit())
- Subtarget.setPICStyle(PICStyles::RIPRel);
- else
- Subtarget.setPICStyle(PICStyles::GOT);
+ Subtarget.setPICStyle(PICStyles::GOT);
}
-
+
// Finally, if we have "none" as our PIC style, force to static mode.
if (Subtarget.getPICStyle() == PICStyles::None)
setRelocationModel(Reloc::Static);
@@ -182,9 +193,6 @@ bool X86TargetMachine::addInstSelector(PassManagerBase &PM,
bool X86TargetMachine::addPreRegAlloc(PassManagerBase &PM,
CodeGenOpt::Level OptLevel) {
- // Install a pass to insert x87 FP_REG_KILL instructions, as needed.
- PM.add(createX87FPRegKillInserterPass());
-
PM.add(createX86MaxStackAlignmentHeuristicPass());
return false; // -print-machineinstr shouldn't print after this.
}
diff --git a/lib/Target/XCore/AsmPrinter/XCoreAsmPrinter.cpp b/lib/Target/XCore/AsmPrinter/XCoreAsmPrinter.cpp
index 6656bdc10eae..8f06dd32662f 100644
--- a/lib/Target/XCore/AsmPrinter/XCoreAsmPrinter.cpp
+++ b/lib/Target/XCore/AsmPrinter/XCoreAsmPrinter.cpp
@@ -264,15 +264,13 @@ bool XCoreAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
void XCoreAsmPrinter::EmitInstruction(const MachineInstr *MI) {
SmallString<128> Str;
raw_svector_ostream O(Str);
-
+
// Check for mov mnemonic
- unsigned src, dst, srcSR, dstSR;
- if (TM.getInstrInfo()->isMoveInstr(*MI, src, dst, srcSR, dstSR)) {
- O << "\tmov " << getRegisterName(dst) << ", ";
- O << getRegisterName(src);
- } else {
+ if (MI->getOpcode() == XCore::ADD_2rus && !MI->getOperand(2).getImm())
+ O << "\tmov " << getRegisterName(MI->getOperand(0).getReg()) << ", "
+ << getRegisterName(MI->getOperand(1).getReg());
+ else
printInstruction(MI, O);
- }
OutStreamer.EmitRawText(O.str());
}
diff --git a/lib/Target/XCore/CMakeLists.txt b/lib/Target/XCore/CMakeLists.txt
index 1b8e7edfc7ca..38b35d7666c0 100644
--- a/lib/Target/XCore/CMakeLists.txt
+++ b/lib/Target/XCore/CMakeLists.txt
@@ -10,7 +10,7 @@ tablegen(XCoreGenDAGISel.inc -gen-dag-isel)
tablegen(XCoreGenCallingConv.inc -gen-callingconv)
tablegen(XCoreGenSubtarget.inc -gen-subtarget)
-add_llvm_target(XCore
+add_llvm_target(XCoreCodeGen
XCoreFrameInfo.cpp
XCoreInstrInfo.cpp
XCoreISelDAGToDAG.cpp
diff --git a/lib/Target/XCore/XCoreISelDAGToDAG.cpp b/lib/Target/XCore/XCoreISelDAGToDAG.cpp
index 5564ddf133ea..755ece7e9aba 100644
--- a/lib/Target/XCore/XCoreISelDAGToDAG.cpp
+++ b/lib/Target/XCore/XCoreISelDAGToDAG.cpp
@@ -56,6 +56,17 @@ namespace {
return CurDAG->getTargetConstant(Imm, MVT::i32);
}
+ inline bool immMskBitp(SDNode *inN) const {
+ ConstantSDNode *N = cast<ConstantSDNode>(inN);
+ uint32_t value = (uint32_t)N->getZExtValue();
+ if (!isMask_32(value)) {
+ return false;
+ }
+ int msksize = 32 - CountLeadingZeros_32(value);
+ return (msksize >= 1 && msksize <= 8) ||
+ msksize == 16 || msksize == 24 || msksize == 32;
+ }
+
// Complex Pattern Selectors.
bool SelectADDRspii(SDNode *Op, SDValue Addr, SDValue &Base,
SDValue &Offset);
@@ -151,17 +162,15 @@ SDNode *XCoreDAGToDAGISel::Select(SDNode *N) {
switch (N->getOpcode()) {
default: break;
case ISD::Constant: {
- if (Predicate_immMskBitp(N)) {
+ uint64_t Val = cast<ConstantSDNode>(N)->getZExtValue();
+ if (immMskBitp(N)) {
// Transformation function: get the size of a mask
- int64_t MaskVal = cast<ConstantSDNode>(N)->getZExtValue();
- assert(isMask_32(MaskVal));
// Look for the first non-zero bit
- SDValue MskSize = getI32Imm(32 - CountLeadingZeros_32(MaskVal));
+ SDValue MskSize = getI32Imm(32 - CountLeadingZeros_32(Val));
return CurDAG->getMachineNode(XCore::MKMSK_rus, dl,
MVT::i32, MskSize);
}
- else if (! Predicate_immU16(N)) {
- unsigned Val = cast<ConstantSDNode>(N)->getZExtValue();
+ else if (!isUInt<16>(Val)) {
SDValue CPIdx =
CurDAG->getTargetConstantPool(ConstantInt::get(
Type::getInt32Ty(*CurDAG->getContext()), Val),
diff --git a/lib/Target/XCore/XCoreInstrInfo.cpp b/lib/Target/XCore/XCoreInstrInfo.cpp
index dd90ea976770..ad00046af17d 100644
--- a/lib/Target/XCore/XCoreInstrInfo.cpp
+++ b/lib/Target/XCore/XCoreInstrInfo.cpp
@@ -46,33 +46,6 @@ static bool isZeroImm(const MachineOperand &op) {
return op.isImm() && op.getImm() == 0;
}
-/// Return true if the instruction is a register to register move and
-/// leave the source and dest operands in the passed parameters.
-///
-bool XCoreInstrInfo::isMoveInstr(const MachineInstr &MI,
- unsigned &SrcReg, unsigned &DstReg,
- unsigned &SrcSR, unsigned &DstSR) const {
- SrcSR = DstSR = 0; // No sub-registers.
-
- // We look for 4 kinds of patterns here:
- // add dst, src, 0
- // sub dst, src, 0
- // or dst, src, src
- // and dst, src, src
- if ((MI.getOpcode() == XCore::ADD_2rus || MI.getOpcode() == XCore::SUB_2rus)
- && isZeroImm(MI.getOperand(2))) {
- DstReg = MI.getOperand(0).getReg();
- SrcReg = MI.getOperand(1).getReg();
- return true;
- } else if ((MI.getOpcode() == XCore::OR_3r || MI.getOpcode() == XCore::AND_3r)
- && MI.getOperand(1).getReg() == MI.getOperand(2).getReg()) {
- DstReg = MI.getOperand(0).getReg();
- SrcReg = MI.getOperand(1).getReg();
- return true;
- }
- return false;
-}
-
/// isLoadFromStackSlot - If the specified machine instruction is a direct
/// load from a stack slot, return the virtual or physical register number of
/// the destination along with the FrameIndex of the loaded stack slot. If
@@ -437,7 +410,7 @@ bool XCoreInstrInfo::spillCalleeSavedRegisters(MachineBasicBlock &MBB,
it->getFrameIdx(), RC, &RI);
if (emitFrameMoves) {
MCSymbol *SaveLabel = MF->getContext().CreateTempSymbol();
- BuildMI(MBB, MI, DL, get(XCore::DBG_LABEL)).addSym(SaveLabel);
+ BuildMI(MBB, MI, DL, get(XCore::PROLOG_LABEL)).addSym(SaveLabel);
XFI->getSpillLabels().push_back(std::make_pair(SaveLabel, *it));
}
}
diff --git a/lib/Target/XCore/XCoreInstrInfo.h b/lib/Target/XCore/XCoreInstrInfo.h
index e5b0171579fc..d2b116eef0d8 100644
--- a/lib/Target/XCore/XCoreInstrInfo.h
+++ b/lib/Target/XCore/XCoreInstrInfo.h
@@ -30,12 +30,6 @@ public:
///
virtual const TargetRegisterInfo &getRegisterInfo() const { return RI; }
- /// Return true if the instruction is a register to register move and return
- /// the source and dest operands and their sub-register indices by reference.
- virtual bool isMoveInstr(const MachineInstr &MI,
- unsigned &SrcReg, unsigned &DstReg,
- unsigned &SrcSubIdx, unsigned &DstSubIdx) const;
-
/// isLoadFromStackSlot - If the specified machine instruction is a direct
/// load from a stack slot, return the virtual or physical register number of
/// the destination along with the FrameIndex of the loaded stack slot. If
diff --git a/lib/Target/XCore/XCoreInstrInfo.td b/lib/Target/XCore/XCoreInstrInfo.td
index 19b9b1f8c00c..6b3b39ba1d49 100644
--- a/lib/Target/XCore/XCoreInstrInfo.td
+++ b/lib/Target/XCore/XCoreInstrInfo.td
@@ -140,17 +140,7 @@ def immU20 : PatLeaf<(imm), [{
return (uint32_t)N->getZExtValue() < (1 << 20);
}]>;
-def immMskBitp : PatLeaf<(imm), [{
- uint32_t value = (uint32_t)N->getZExtValue();
- if (!isMask_32(value)) {
- return false;
- }
- int msksize = 32 - CountLeadingZeros_32(value);
- return (msksize >= 1 && msksize <= 8)
- || msksize == 16
- || msksize == 24
- || msksize == 32;
-}]>;
+def immMskBitp : PatLeaf<(imm), [{ return immMskBitp(N); }]>;
def immBitp : PatLeaf<(imm), [{
uint32_t value = (uint32_t)N->getZExtValue();
diff --git a/lib/Target/XCore/XCoreRegisterInfo.cpp b/lib/Target/XCore/XCoreRegisterInfo.cpp
index 2a88342180e4..f82e59814e77 100644
--- a/lib/Target/XCore/XCoreRegisterInfo.cpp
+++ b/lib/Target/XCore/XCoreRegisterInfo.cpp
@@ -155,10 +155,9 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
MBB.erase(I);
}
-unsigned
+void
XCoreRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
- int SPAdj, FrameIndexValue *Value,
- RegScavenger *RS) const {
+ int SPAdj, RegScavenger *RS) const {
assert(SPAdj == 0 && "Unexpected");
MachineInstr &MI = *II;
DebugLoc dl = MI.getDebugLoc();
@@ -291,7 +290,6 @@ XCoreRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
}
// Erase old instruction.
MBB.erase(II);
- return 0;
}
void
@@ -420,7 +418,7 @@ void XCoreRegisterInfo::emitPrologue(MachineFunction &MF) const {
// Show update of SP.
MCSymbol *FrameLabel = MMI->getContext().CreateTempSymbol();
- BuildMI(MBB, MBBI, dl, TII.get(XCore::DBG_LABEL)).addSym(FrameLabel);
+ BuildMI(MBB, MBBI, dl, TII.get(XCore::PROLOG_LABEL)).addSym(FrameLabel);
MachineLocation SPDst(MachineLocation::VirtualFP);
MachineLocation SPSrc(MachineLocation::VirtualFP, -FrameSize * 4);
@@ -439,7 +437,7 @@ void XCoreRegisterInfo::emitPrologue(MachineFunction &MF) const {
if (emitFrameMoves) {
MCSymbol *SaveLRLabel = MMI->getContext().CreateTempSymbol();
- BuildMI(MBB, MBBI, dl, TII.get(XCore::DBG_LABEL)).addSym(SaveLRLabel);
+ BuildMI(MBB, MBBI, dl, TII.get(XCore::PROLOG_LABEL)).addSym(SaveLRLabel);
MachineLocation CSDst(MachineLocation::VirtualFP, LRSpillOffset);
MachineLocation CSSrc(XCore::LR);
MMI->getFrameMoves().push_back(MachineMove(SaveLRLabel, CSDst, CSSrc));
@@ -455,7 +453,7 @@ void XCoreRegisterInfo::emitPrologue(MachineFunction &MF) const {
MBB.addLiveIn(XCore::R10);
if (emitFrameMoves) {
MCSymbol *SaveR10Label = MMI->getContext().CreateTempSymbol();
- BuildMI(MBB, MBBI, dl, TII.get(XCore::DBG_LABEL)).addSym(SaveR10Label);
+ BuildMI(MBB, MBBI, dl, TII.get(XCore::PROLOG_LABEL)).addSym(SaveR10Label);
MachineLocation CSDst(MachineLocation::VirtualFP, FPSpillOffset);
MachineLocation CSSrc(XCore::R10);
MMI->getFrameMoves().push_back(MachineMove(SaveR10Label, CSDst, CSSrc));
@@ -467,7 +465,7 @@ void XCoreRegisterInfo::emitPrologue(MachineFunction &MF) const {
if (emitFrameMoves) {
// Show FP is now valid.
MCSymbol *FrameLabel = MMI->getContext().CreateTempSymbol();
- BuildMI(MBB, MBBI, dl, TII.get(XCore::DBG_LABEL)).addSym(FrameLabel);
+ BuildMI(MBB, MBBI, dl, TII.get(XCore::PROLOG_LABEL)).addSym(FrameLabel);
MachineLocation SPDst(FramePtr);
MachineLocation SPSrc(MachineLocation::VirtualFP);
MMI->getFrameMoves().push_back(MachineMove(FrameLabel, SPDst, SPSrc));
diff --git a/lib/Target/XCore/XCoreRegisterInfo.h b/lib/Target/XCore/XCoreRegisterInfo.h
index 66132ba8ff66..e636c1c7298a 100644
--- a/lib/Target/XCore/XCoreRegisterInfo.h
+++ b/lib/Target/XCore/XCoreRegisterInfo.h
@@ -54,9 +54,8 @@ public:
MachineBasicBlock &MBB,
MachineBasicBlock::iterator I) const;
- unsigned eliminateFrameIndex(MachineBasicBlock::iterator II,
- int SPAdj, FrameIndexValue *Value = NULL,
- RegScavenger *RS = NULL) const;
+ void eliminateFrameIndex(MachineBasicBlock::iterator II,
+ int SPAdj, RegScavenger *RS = NULL) const;
void processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
RegScavenger *RS = NULL) const;