aboutsummaryrefslogtreecommitdiff
path: root/lib/Target
diff options
context:
space:
mode:
Diffstat (limited to 'lib/Target')
-rw-r--r--lib/Target/ARM/ARM.h4
-rw-r--r--lib/Target/ARM/ARMAddressingModes.h64
-rw-r--r--lib/Target/ARM/ARMBaseInstrInfo.cpp629
-rw-r--r--lib/Target/ARM/ARMBaseInstrInfo.h87
-rw-r--r--lib/Target/ARM/ARMBaseRegisterInfo.cpp191
-rw-r--r--lib/Target/ARM/ARMBaseRegisterInfo.h24
-rw-r--r--lib/Target/ARM/ARMCodeEmitter.cpp174
-rw-r--r--lib/Target/ARM/ARMConstantIslandPass.cpp58
-rw-r--r--lib/Target/ARM/ARMConstantPoolValue.h1
-rw-r--r--lib/Target/ARM/ARMExpandPseudoInsts.cpp10
-rw-r--r--lib/Target/ARM/ARMISelDAGToDAG.cpp551
-rw-r--r--lib/Target/ARM/ARMISelLowering.cpp893
-rw-r--r--lib/Target/ARM/ARMISelLowering.h47
-rw-r--r--lib/Target/ARM/ARMInstrFormats.td44
-rw-r--r--lib/Target/ARM/ARMInstrInfo.cpp2
-rw-r--r--lib/Target/ARM/ARMInstrInfo.h2
-rw-r--r--lib/Target/ARM/ARMInstrInfo.td128
-rw-r--r--lib/Target/ARM/ARMInstrNEON.td99
-rw-r--r--lib/Target/ARM/ARMInstrThumb.td23
-rw-r--r--lib/Target/ARM/ARMInstrThumb2.td63
-rw-r--r--lib/Target/ARM/ARMInstrVFP.td8
-rw-r--r--lib/Target/ARM/ARMJITInfo.h3
-rw-r--r--lib/Target/ARM/ARMLoadStoreOptimizer.cpp194
-rw-r--r--lib/Target/ARM/ARMMachineFunctionInfo.h12
-rw-r--r--lib/Target/ARM/ARMRegisterInfo.td87
-rw-r--r--lib/Target/ARM/ARMScheduleA8.td84
-rw-r--r--lib/Target/ARM/ARMScheduleA9.td364
-rw-r--r--lib/Target/ARM/ARMScheduleV6.td2
-rw-r--r--lib/Target/ARM/ARMTargetMachine.cpp30
-rw-r--r--lib/Target/ARM/AsmParser/ARMAsmParser.cpp23
-rw-r--r--lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp59
-rw-r--r--lib/Target/ARM/AsmPrinter/ARMInstPrinter.cpp24
-rw-r--r--lib/Target/ARM/AsmPrinter/ARMInstPrinter.h5
-rw-r--r--lib/Target/ARM/CMakeLists.txt1
-rw-r--r--lib/Target/ARM/Disassembler/ARMDisassemblerCore.cpp113
-rw-r--r--lib/Target/ARM/Disassembler/ARMDisassemblerCore.h8
-rw-r--r--lib/Target/ARM/Disassembler/ThumbDisassemblerCore.h42
-rw-r--r--lib/Target/ARM/NEONMoveFix.cpp4
-rw-r--r--lib/Target/ARM/NEONPreAllocPass.cpp51
-rw-r--r--lib/Target/ARM/Thumb1InstrInfo.cpp125
-rw-r--r--lib/Target/ARM/Thumb1InstrInfo.h24
-rw-r--r--lib/Target/ARM/Thumb1RegisterInfo.cpp17
-rw-r--r--lib/Target/ARM/Thumb1RegisterInfo.h6
-rw-r--r--lib/Target/ARM/Thumb2HazardRecognizer.cpp53
-rw-r--r--lib/Target/ARM/Thumb2HazardRecognizer.h40
-rw-r--r--lib/Target/ARM/Thumb2ITBlockPass.cpp160
-rw-r--r--lib/Target/ARM/Thumb2InstrInfo.cpp192
-rw-r--r--lib/Target/ARM/Thumb2InstrInfo.h39
-rw-r--r--lib/Target/ARM/Thumb2SizeReduction.cpp16
-rw-r--r--lib/Target/Alpha/AlphaISelLowering.cpp39
-rw-r--r--lib/Target/Alpha/AlphaISelLowering.h2
-rw-r--r--lib/Target/Alpha/AlphaInstrFormats.td4
-rw-r--r--lib/Target/Alpha/AlphaInstrInfo.cpp109
-rw-r--r--lib/Target/Alpha/AlphaInstrInfo.h27
-rw-r--r--lib/Target/Alpha/AlphaInstrInfo.td34
-rw-r--r--lib/Target/Alpha/AlphaRegisterInfo.cpp14
-rw-r--r--lib/Target/Alpha/AlphaRegisterInfo.h3
-rw-r--r--lib/Target/Blackfin/BlackfinISelDAGToDAG.cpp4
-rw-r--r--lib/Target/Blackfin/BlackfinISelLowering.cpp13
-rw-r--r--lib/Target/Blackfin/BlackfinISelLowering.h2
-rw-r--r--lib/Target/Blackfin/BlackfinInstrInfo.cpp108
-rw-r--r--lib/Target/Blackfin/BlackfinInstrInfo.h15
-rw-r--r--lib/Target/Blackfin/BlackfinInstrInfo.td12
-rw-r--r--lib/Target/Blackfin/BlackfinRegisterInfo.cpp30
-rw-r--r--lib/Target/Blackfin/BlackfinRegisterInfo.h6
-rw-r--r--lib/Target/CBackend/CBackend.cpp50
-rw-r--r--lib/Target/CellSPU/SPUCallingConv.td82
-rw-r--r--lib/Target/CellSPU/SPUFrameInfo.h4
-rw-r--r--lib/Target/CellSPU/SPUISelDAGToDAG.cpp15
-rw-r--r--lib/Target/CellSPU/SPUISelLowering.cpp126
-rw-r--r--lib/Target/CellSPU/SPUISelLowering.h3
-rw-r--r--lib/Target/CellSPU/SPUInstrInfo.cpp131
-rw-r--r--lib/Target/CellSPU/SPUInstrInfo.h33
-rw-r--r--lib/Target/CellSPU/SPUNodes.td2
-rw-r--r--lib/Target/CellSPU/SPURegisterInfo.cpp57
-rw-r--r--lib/Target/CellSPU/SPURegisterInfo.h13
-rw-r--r--lib/Target/CppBackend/CPPBackend.cpp3341
-rw-r--r--lib/Target/MBlaze/AsmPrinter/MBlazeAsmPrinter.cpp5
-rw-r--r--lib/Target/MBlaze/MBlazeISelLowering.cpp88
-rw-r--r--lib/Target/MBlaze/MBlazeISelLowering.h2
-rw-r--r--lib/Target/MBlaze/MBlazeInstrInfo.cpp65
-rw-r--r--lib/Target/MBlaze/MBlazeInstrInfo.h25
-rw-r--r--lib/Target/MBlaze/MBlazeRegisterInfo.cpp16
-rw-r--r--lib/Target/MBlaze/MBlazeRegisterInfo.h3
-rw-r--r--lib/Target/MSIL/MSILWriter.cpp23
-rw-r--r--lib/Target/MSP430/MSP430ISelDAGToDAG.cpp3
-rw-r--r--lib/Target/MSP430/MSP430ISelLowering.cpp43
-rw-r--r--lib/Target/MSP430/MSP430ISelLowering.h3
-rw-r--r--lib/Target/MSP430/MSP430InstrInfo.cpp39
-rw-r--r--lib/Target/MSP430/MSP430InstrInfo.h12
-rw-r--r--lib/Target/MSP430/MSP430InstrInfo.td455
-rw-r--r--lib/Target/MSP430/MSP430RegisterInfo.cpp46
-rw-r--r--lib/Target/MSP430/MSP430RegisterInfo.h3
-rw-r--r--lib/Target/Mangler.cpp2
-rw-r--r--lib/Target/Mips/AsmPrinter/MipsAsmPrinter.cpp5
-rw-r--r--lib/Target/Mips/MipsISelLowering.cpp55
-rw-r--r--lib/Target/Mips/MipsISelLowering.h2
-rw-r--r--lib/Target/Mips/MipsInstrInfo.cpp213
-rw-r--r--lib/Target/Mips/MipsInstrInfo.h25
-rw-r--r--lib/Target/Mips/MipsInstrInfo.td2
-rw-r--r--lib/Target/Mips/MipsRegisterInfo.cpp35
-rw-r--r--lib/Target/Mips/MipsRegisterInfo.h3
-rw-r--r--lib/Target/PIC16/PIC16ISelLowering.cpp46
-rw-r--r--lib/Target/PIC16/PIC16ISelLowering.h4
-rw-r--r--lib/Target/PIC16/PIC16InstrInfo.cpp37
-rw-r--r--lib/Target/PIC16/PIC16InstrInfo.h13
-rw-r--r--lib/Target/PIC16/PIC16InstrInfo.td18
-rw-r--r--lib/Target/PIC16/PIC16MemSelOpt.cpp2
-rw-r--r--lib/Target/PIC16/PIC16Passes/PIC16Cloner.cpp24
-rw-r--r--lib/Target/PIC16/PIC16Passes/PIC16Cloner.h4
-rw-r--r--lib/Target/PIC16/PIC16RegisterInfo.cpp7
-rw-r--r--lib/Target/PIC16/PIC16RegisterInfo.h4
-rw-r--r--lib/Target/PowerPC/PPCHazardRecognizers.cpp2
-rw-r--r--lib/Target/PowerPC/PPCISelLowering.cpp124
-rw-r--r--lib/Target/PowerPC/PPCISelLowering.h9
-rw-r--r--lib/Target/PowerPC/PPCInstrInfo.cpp184
-rw-r--r--lib/Target/PowerPC/PPCInstrInfo.h30
-rw-r--r--lib/Target/PowerPC/PPCRegisterInfo.cpp166
-rw-r--r--lib/Target/PowerPC/PPCRegisterInfo.h3
-rw-r--r--lib/Target/README.txt134
-rw-r--r--lib/Target/Sparc/SparcISelLowering.cpp61
-rw-r--r--lib/Target/Sparc/SparcISelLowering.h2
-rw-r--r--lib/Target/Sparc/SparcInstrInfo.cpp98
-rw-r--r--lib/Target/Sparc/SparcInstrInfo.h25
-rw-r--r--lib/Target/Sparc/SparcInstrInfo.td2
-rw-r--r--lib/Target/Sparc/SparcRegisterInfo.cpp7
-rw-r--r--lib/Target/Sparc/SparcRegisterInfo.h3
-rw-r--r--lib/Target/SystemZ/AsmPrinter/SystemZAsmPrinter.cpp2
-rw-r--r--lib/Target/SystemZ/SystemZISelDAGToDAG.cpp4
-rw-r--r--lib/Target/SystemZ/SystemZISelLowering.cpp29
-rw-r--r--lib/Target/SystemZ/SystemZISelLowering.h3
-rw-r--r--lib/Target/SystemZ/SystemZInstrFP.td4
-rw-r--r--lib/Target/SystemZ/SystemZInstrInfo.cpp96
-rw-r--r--lib/Target/SystemZ/SystemZInstrInfo.h12
-rw-r--r--lib/Target/SystemZ/SystemZInstrInfo.td17
-rw-r--r--lib/Target/SystemZ/SystemZRegisterInfo.cpp16
-rw-r--r--lib/Target/SystemZ/SystemZRegisterInfo.h3
-rw-r--r--lib/Target/SystemZ/SystemZRegisterInfo.td10
-rw-r--r--lib/Target/TargetInstrInfo.cpp4
-rw-r--r--lib/Target/TargetLoweringObjectFile.cpp2
-rw-r--r--lib/Target/TargetRegisterInfo.cpp12
-rw-r--r--lib/Target/X86/AsmParser/X86AsmLexer.cpp85
-rw-r--r--lib/Target/X86/AsmParser/X86AsmParser.cpp105
-rw-r--r--lib/Target/X86/AsmPrinter/X86ATTInstPrinter.cpp21
-rw-r--r--lib/Target/X86/AsmPrinter/X86ATTInstPrinter.h11
-rw-r--r--lib/Target/X86/AsmPrinter/X86AsmPrinter.cpp7
-rw-r--r--lib/Target/X86/AsmPrinter/X86IntelInstPrinter.cpp23
-rw-r--r--lib/Target/X86/AsmPrinter/X86IntelInstPrinter.h15
-rw-r--r--lib/Target/X86/AsmPrinter/X86MCInstLower.cpp74
-rw-r--r--lib/Target/X86/Disassembler/CMakeLists.txt4
-rw-r--r--lib/Target/X86/Disassembler/X86Disassembler.cpp15
-rw-r--r--lib/Target/X86/README-SSE.txt184
-rw-r--r--lib/Target/X86/README-X86-64.txt249
-rw-r--r--lib/Target/X86/README.txt103
-rw-r--r--lib/Target/X86/X86.h4
-rw-r--r--lib/Target/X86/X86AsmBackend.cpp8
-rw-r--r--lib/Target/X86/X86CallingConv.td12
-rw-r--r--lib/Target/X86/X86CodeEmitter.cpp20
-rw-r--r--lib/Target/X86/X86FastISel.cpp441
-rw-r--r--lib/Target/X86/X86FixupKinds.h1
-rw-r--r--lib/Target/X86/X86FloatingPoint.cpp111
-rw-r--r--lib/Target/X86/X86FloatingPointRegKill.cpp17
-rw-r--r--lib/Target/X86/X86ISelDAGToDAG.cpp196
-rw-r--r--lib/Target/X86/X86ISelLowering.cpp902
-rw-r--r--lib/Target/X86/X86ISelLowering.h43
-rw-r--r--lib/Target/X86/X86Instr64bit.td238
-rw-r--r--lib/Target/X86/X86InstrBuilder.h39
-rw-r--r--lib/Target/X86/X86InstrFPStack.td16
-rw-r--r--lib/Target/X86/X86InstrFormats.td90
-rw-r--r--lib/Target/X86/X86InstrFragmentsSIMD.td336
-rw-r--r--lib/Target/X86/X86InstrInfo.cpp642
-rw-r--r--lib/Target/X86/X86InstrInfo.h164
-rw-r--r--lib/Target/X86/X86InstrInfo.td369
-rw-r--r--lib/Target/X86/X86InstrMMX.td14
-rw-r--r--lib/Target/X86/X86InstrSSE.td6263
-rw-r--r--lib/Target/X86/X86MCCodeEmitter.cpp543
-rw-r--r--lib/Target/X86/X86RegisterInfo.cpp117
-rw-r--r--lib/Target/X86/X86RegisterInfo.h6
-rw-r--r--lib/Target/X86/X86RegisterInfo.td32
-rw-r--r--lib/Target/X86/X86Subtarget.cpp69
-rw-r--r--lib/Target/X86/X86Subtarget.h47
-rw-r--r--lib/Target/X86/X86TargetMachine.cpp8
-rw-r--r--lib/Target/XCore/AsmPrinter/XCoreAsmPrinter.cpp1
-rw-r--r--lib/Target/XCore/XCoreISelLowering.cpp61
-rw-r--r--lib/Target/XCore/XCoreISelLowering.h8
-rw-r--r--lib/Target/XCore/XCoreInstrInfo.cpp70
-rw-r--r--lib/Target/XCore/XCoreInstrInfo.h15
-rw-r--r--lib/Target/XCore/XCoreInstrInfo.td2
-rw-r--r--lib/Target/XCore/XCoreRegisterInfo.cpp14
-rw-r--r--lib/Target/XCore/XCoreRegisterInfo.h3
190 files changed, 12580 insertions, 10793 deletions
diff --git a/lib/Target/ARM/ARM.h b/lib/Target/ARM/ARM.h
index ae7ae59c9262..14825a785649 100644
--- a/lib/Target/ARM/ARM.h
+++ b/lib/Target/ARM/ARM.h
@@ -90,10 +90,6 @@ inline static const char *ARMCondCodeToString(ARMCC::CondCodes CC) {
}
}
-/// ModelWithRegSequence - Return true if isel should use REG_SEQUENCE to model
-/// operations involving sub-registers.
-bool ModelWithRegSequence();
-
FunctionPass *createARMISelDag(ARMBaseTargetMachine &TM,
CodeGenOpt::Level OptLevel);
diff --git a/lib/Target/ARM/ARMAddressingModes.h b/lib/Target/ARM/ARMAddressingModes.h
index e68354a42f8d..d316b13e0488 100644
--- a/lib/Target/ARM/ARMAddressingModes.h
+++ b/lib/Target/ARM/ARMAddressingModes.h
@@ -520,6 +520,70 @@ namespace ARM_AM {
// This is stored in two operands [regaddr, align]. The first is the
// address register. The second operand is the value of the alignment
// specifier to use or zero if no explicit alignment.
+ // Valid alignments are: 0, 8, 16, and 32 bytes, depending on the specific
+ // instruction.
+
+ //===--------------------------------------------------------------------===//
+ // NEON Modified Immediates
+ //===--------------------------------------------------------------------===//
+ //
+ // Several NEON instructions (e.g., VMOV) take a "modified immediate"
+ // vector operand, where a small immediate encoded in the instruction
+ // specifies a full NEON vector value. These modified immediates are
+ // represented here as encoded integers. The low 8 bits hold the immediate
+ // value; bit 12 holds the "Op" field of the instruction, and bits 11-8 hold
+ // the "Cmode" field of the instruction. The interfaces below treat the
+ // Op and Cmode values as a single 5-bit value.
+
+ static inline unsigned createNEONModImm(unsigned OpCmode, unsigned Val) {
+ return (OpCmode << 8) | Val;
+ }
+ static inline unsigned getNEONModImmOpCmode(unsigned ModImm) {
+ return (ModImm >> 8) & 0x1f;
+ }
+ static inline unsigned getNEONModImmVal(unsigned ModImm) {
+ return ModImm & 0xff;
+ }
+
+ /// decodeNEONModImm - Decode a NEON modified immediate value into the
+ /// element value and the element size in bits. (If the element size is
+ /// smaller than the vector, it is splatted into all the elements.)
+ static inline uint64_t decodeNEONModImm(unsigned ModImm, unsigned &EltBits) {
+ unsigned OpCmode = getNEONModImmOpCmode(ModImm);
+ unsigned Imm8 = getNEONModImmVal(ModImm);
+ uint64_t Val = 0;
+
+ if (OpCmode == 0xe) {
+ // 8-bit vector elements
+ Val = Imm8;
+ EltBits = 8;
+ } else if ((OpCmode & 0xc) == 0x8) {
+ // 16-bit vector elements
+ unsigned ByteNum = (OpCmode & 0x6) >> 1;
+ Val = Imm8 << (8 * ByteNum);
+ EltBits = 16;
+ } else if ((OpCmode & 0x8) == 0) {
+ // 32-bit vector elements, zero with one byte set
+ unsigned ByteNum = (OpCmode & 0x6) >> 1;
+ Val = Imm8 << (8 * ByteNum);
+ EltBits = 32;
+ } else if ((OpCmode & 0xe) == 0xc) {
+ // 32-bit vector elements, one byte with low bits set
+ unsigned ByteNum = 1 + (OpCmode & 0x1);
+ Val = (Imm8 << (8 * ByteNum)) | (0xffff >> (8 * (2 - ByteNum)));
+ EltBits = 32;
+ } else if (OpCmode == 0x1e) {
+ // 64-bit vector elements
+ for (unsigned ByteNum = 0; ByteNum < 8; ++ByteNum) {
+ if ((ModImm >> ByteNum) & 1)
+ Val |= (uint64_t)0xff << (8 * ByteNum);
+ }
+ EltBits = 64;
+ } else {
+ assert(false && "Unsupported NEON immediate");
+ }
+ return Val;
+ }
} // end namespace ARM_AM
} // end namespace llvm
diff --git a/lib/Target/ARM/ARMBaseInstrInfo.cpp b/lib/Target/ARM/ARMBaseInstrInfo.cpp
index 2528854133e5..49c16f3e0720 100644
--- a/lib/Target/ARM/ARMBaseInstrInfo.cpp
+++ b/lib/Target/ARM/ARMBaseInstrInfo.cpp
@@ -56,7 +56,7 @@ ARMBaseInstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
MachineInstr *MI = MBBI;
MachineFunction &MF = *MI->getParent()->getParent();
- unsigned TSFlags = MI->getDesc().TSFlags;
+ uint64_t TSFlags = MI->getDesc().TSFlags;
bool isPre = false;
switch ((TSFlags & ARMII::IndexModeMask) >> ARMII::IndexModeShift) {
default: return NULL;
@@ -199,9 +199,9 @@ ARMBaseInstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
bool
ARMBaseInstrInfo::spillCalleeSavedRegisters(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MI,
- const std::vector<CalleeSavedInfo> &CSI,
- const TargetRegisterInfo *TRI) const {
+ MachineBasicBlock::iterator MI,
+ const std::vector<CalleeSavedInfo> &CSI,
+ const TargetRegisterInfo *TRI) const {
if (CSI.empty())
return false;
@@ -227,8 +227,9 @@ ARMBaseInstrInfo::spillCalleeSavedRegisters(MachineBasicBlock &MBB,
// Insert the spill to the stack frame. The register is killed at the spill
//
+ const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
storeRegToStackSlot(MBB, MI, Reg, isKill,
- CSI[i].getFrameIdx(), CSI[i].getRegClass(), TRI);
+ CSI[i].getFrameIdx(), RC, TRI);
}
return true;
}
@@ -347,10 +348,8 @@ unsigned ARMBaseInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
unsigned
ARMBaseInstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
MachineBasicBlock *FBB,
- const SmallVectorImpl<MachineOperand> &Cond) const {
- // FIXME this should probably have a DebugLoc argument
- DebugLoc dl;
-
+ const SmallVectorImpl<MachineOperand> &Cond,
+ DebugLoc DL) const {
ARMFunctionInfo *AFI = MBB.getParent()->getInfo<ARMFunctionInfo>();
int BOpc = !AFI->isThumbFunction()
? ARM::B : (AFI->isThumb2Function() ? ARM::t2B : ARM::tB);
@@ -364,17 +363,17 @@ ARMBaseInstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
if (FBB == 0) {
if (Cond.empty()) // Unconditional branch?
- BuildMI(&MBB, dl, get(BOpc)).addMBB(TBB);
+ BuildMI(&MBB, DL, get(BOpc)).addMBB(TBB);
else
- BuildMI(&MBB, dl, get(BccOpc)).addMBB(TBB)
+ BuildMI(&MBB, DL, get(BccOpc)).addMBB(TBB)
.addImm(Cond[0].getImm()).addReg(Cond[1].getReg());
return 1;
}
// Two-way conditional branch.
- BuildMI(&MBB, dl, get(BccOpc)).addMBB(TBB)
+ BuildMI(&MBB, DL, get(BccOpc)).addMBB(TBB)
.addImm(Cond[0].getImm()).addReg(Cond[1].getReg());
- BuildMI(&MBB, dl, get(BOpc)).addMBB(FBB);
+ BuildMI(&MBB, DL, get(BOpc)).addMBB(FBB);
return 2;
}
@@ -487,7 +486,7 @@ unsigned ARMBaseInstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const {
// Basic size info comes from the TSFlags field.
const TargetInstrDesc &TID = MI->getDesc();
- unsigned TSFlags = TID.TSFlags;
+ uint64_t TSFlags = TID.TSFlags;
unsigned Opc = MI->getOpcode();
switch ((TSFlags & ARMII::SizeMask) >> ARMII::SizeShift) {
@@ -524,11 +523,11 @@ unsigned ARMBaseInstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const {
return 10;
case ARM::Int_eh_sjlj_setjmp:
case ARM::Int_eh_sjlj_setjmp_nofp:
- return 24;
+ return 20;
case ARM::tInt_eh_sjlj_setjmp:
case ARM::t2Int_eh_sjlj_setjmp:
case ARM::t2Int_eh_sjlj_setjmp_nofp:
- return 14;
+ return 12;
case ARM::BR_JTr:
case ARM::BR_JTm:
case ARM::BR_JTadd:
@@ -595,6 +594,7 @@ ARMBaseInstrInfo::isMoveInstr(const MachineInstr &MI,
return true;
}
case ARM::MOVr:
+ case ARM::MOVr_TC:
case ARM::tMOVr:
case ARM::tMOVgpr2tgpr:
case ARM::tMOVtgpr2gpr:
@@ -693,75 +693,44 @@ ARMBaseInstrInfo::isStoreToStackSlot(const MachineInstr *MI,
return 0;
}
-bool
-ARMBaseInstrInfo::copyRegToReg(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator I,
- unsigned DestReg, unsigned SrcReg,
- const TargetRegisterClass *DestRC,
- const TargetRegisterClass *SrcRC,
- DebugLoc DL) const {
- // tGPR is used sometimes in ARM instructions that need to avoid using
- // certain registers. Just treat it as GPR here.
- if (DestRC == ARM::tGPRRegisterClass)
- DestRC = ARM::GPRRegisterClass;
- if (SrcRC == ARM::tGPRRegisterClass)
- SrcRC = ARM::GPRRegisterClass;
-
- // Allow DPR / DPR_VFP2 / DPR_8 cross-class copies.
- if (DestRC == ARM::DPR_8RegisterClass)
- DestRC = ARM::DPR_VFP2RegisterClass;
- if (SrcRC == ARM::DPR_8RegisterClass)
- SrcRC = ARM::DPR_VFP2RegisterClass;
-
- // Allow QPR / QPR_VFP2 / QPR_8 cross-class copies.
- if (DestRC == ARM::QPR_VFP2RegisterClass ||
- DestRC == ARM::QPR_8RegisterClass)
- DestRC = ARM::QPRRegisterClass;
- if (SrcRC == ARM::QPR_VFP2RegisterClass ||
- SrcRC == ARM::QPR_8RegisterClass)
- SrcRC = ARM::QPRRegisterClass;
-
- // Allow QQPR / QQPR_VFP2 cross-class copies.
- if (DestRC == ARM::QQPR_VFP2RegisterClass)
- DestRC = ARM::QQPRRegisterClass;
- if (SrcRC == ARM::QQPR_VFP2RegisterClass)
- SrcRC = ARM::QQPRRegisterClass;
-
- // Disallow copies of unequal sizes.
- if (DestRC != SrcRC && DestRC->getSize() != SrcRC->getSize())
- return false;
-
- if (DestRC == ARM::GPRRegisterClass) {
- if (SrcRC == ARM::SPRRegisterClass)
- AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VMOVRS), DestReg)
- .addReg(SrcReg));
- else
- AddDefaultCC(AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::MOVr),
- DestReg).addReg(SrcReg)));
- } else {
- unsigned Opc;
-
- if (DestRC == ARM::SPRRegisterClass)
- Opc = (SrcRC == ARM::GPRRegisterClass ? ARM::VMOVSR : ARM::VMOVS);
- else if (DestRC == ARM::DPRRegisterClass)
- Opc = ARM::VMOVD;
- else if (DestRC == ARM::DPR_VFP2RegisterClass ||
- SrcRC == ARM::DPR_VFP2RegisterClass)
- // Always use neon reg-reg move if source or dest is NEON-only regclass.
- Opc = ARM::VMOVDneon;
- else if (DestRC == ARM::QPRRegisterClass)
- Opc = ARM::VMOVQ;
- else if (DestRC == ARM::QQPRRegisterClass)
- Opc = ARM::VMOVQQ;
- else if (DestRC == ARM::QQQQPRRegisterClass)
- Opc = ARM::VMOVQQQQ;
- else
- return false;
-
- AddDefaultPred(BuildMI(MBB, I, DL, get(Opc), DestReg).addReg(SrcReg));
+void ARMBaseInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I, DebugLoc DL,
+ unsigned DestReg, unsigned SrcReg,
+ bool KillSrc) const {
+ bool GPRDest = ARM::GPRRegClass.contains(DestReg);
+ bool GPRSrc = ARM::GPRRegClass.contains(SrcReg);
+
+ if (GPRDest && GPRSrc) {
+ AddDefaultCC(AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::MOVr), DestReg)
+ .addReg(SrcReg, getKillRegState(KillSrc))));
+ return;
}
- return true;
+ bool SPRDest = ARM::SPRRegClass.contains(DestReg);
+ bool SPRSrc = ARM::SPRRegClass.contains(SrcReg);
+
+ unsigned Opc;
+ if (SPRDest && SPRSrc)
+ Opc = ARM::VMOVS;
+ else if (GPRDest && SPRSrc)
+ Opc = ARM::VMOVRS;
+ else if (SPRDest && GPRSrc)
+ Opc = ARM::VMOVSR;
+ else if (ARM::DPRRegClass.contains(DestReg, SrcReg))
+ Opc = ARM::VMOVD;
+ else if (ARM::QPRRegClass.contains(DestReg, SrcReg))
+ Opc = ARM::VMOVQ;
+ else if (ARM::QQPRRegClass.contains(DestReg, SrcReg))
+ Opc = ARM::VMOVQQ;
+ else if (ARM::QQQQPRRegClass.contains(DestReg, SrcReg))
+ Opc = ARM::VMOVQQQQ;
+ else
+ llvm_unreachable("Impossible reg-to-reg copy");
+
+ MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(Opc), DestReg);
+ MIB.addReg(SrcReg, getKillRegState(KillSrc));
+ if (Opc != ARM::VMOVQQ && Opc != ARM::VMOVQQQQ)
+ AddDefaultPred(MIB);
}
static const
@@ -795,30 +764,34 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
// tGPR is used sometimes in ARM instructions that need to avoid using
// certain registers. Just treat it as GPR here.
- if (RC == ARM::tGPRRegisterClass)
+ if (RC == ARM::tGPRRegisterClass || RC == ARM::tcGPRRegisterClass)
RC = ARM::GPRRegisterClass;
- if (RC == ARM::GPRRegisterClass) {
+ switch (RC->getID()) {
+ case ARM::GPRRegClassID:
AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::STR))
.addReg(SrcReg, getKillRegState(isKill))
.addFrameIndex(FI).addReg(0).addImm(0).addMemOperand(MMO));
- } else if (RC == ARM::SPRRegisterClass) {
+ break;
+ case ARM::SPRRegClassID:
AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VSTRS))
.addReg(SrcReg, getKillRegState(isKill))
.addFrameIndex(FI).addImm(0).addMemOperand(MMO));
- } else if (RC == ARM::DPRRegisterClass ||
- RC == ARM::DPR_VFP2RegisterClass ||
- RC == ARM::DPR_8RegisterClass) {
+ break;
+ case ARM::DPRRegClassID:
+ case ARM::DPR_VFP2RegClassID:
+ case ARM::DPR_8RegClassID:
AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VSTRD))
.addReg(SrcReg, getKillRegState(isKill))
.addFrameIndex(FI).addImm(0).addMemOperand(MMO));
- } else if (RC == ARM::QPRRegisterClass ||
- RC == ARM::QPR_VFP2RegisterClass ||
- RC == ARM::QPR_8RegisterClass) {
+ break;
+ case ARM::QPRRegClassID:
+ case ARM::QPR_VFP2RegClassID:
+ case ARM::QPR_8RegClassID:
// FIXME: Neon instructions should support predicates
if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) {
AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VST1q))
- .addFrameIndex(FI).addImm(128)
+ .addFrameIndex(FI).addImm(16)
.addReg(SrcReg, getKillRegState(isKill))
.addMemOperand(MMO));
} else {
@@ -828,12 +801,14 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
.addImm(ARM_AM::getAM5Opc(ARM_AM::ia, 4))
.addMemOperand(MMO));
}
- } else if (RC == ARM::QQPRRegisterClass || RC == ARM::QQPR_VFP2RegisterClass){
+ break;
+ case ARM::QQPRRegClassID:
+ case ARM::QQPR_VFP2RegClassID:
if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) {
// FIXME: It's possible to only store part of the QQ register if the
// spilled def has a sub-register index.
- MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(ARM::VST2q32))
- .addFrameIndex(FI).addImm(128);
+ MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(ARM::VST1d64Q))
+ .addFrameIndex(FI).addImm(16);
MIB = AddDReg(MIB, SrcReg, ARM::dsub_0, getKillRegState(isKill), TRI);
MIB = AddDReg(MIB, SrcReg, ARM::dsub_1, 0, TRI);
MIB = AddDReg(MIB, SrcReg, ARM::dsub_2, 0, TRI);
@@ -850,8 +825,8 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
MIB = AddDReg(MIB, SrcReg, ARM::dsub_2, 0, TRI);
AddDReg(MIB, SrcReg, ARM::dsub_3, 0, TRI);
}
- } else {
- assert(RC == ARM::QQQQPRRegisterClass && "Unknown regclass!");
+ break;
+ case ARM::QQQQPRRegClassID: {
MachineInstrBuilder MIB =
AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VSTMD))
.addFrameIndex(FI)
@@ -865,6 +840,10 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
MIB = AddDReg(MIB, SrcReg, ARM::dsub_5, 0, TRI);
MIB = AddDReg(MIB, SrcReg, ARM::dsub_6, 0, TRI);
AddDReg(MIB, SrcReg, ARM::dsub_7, 0, TRI);
+ break;
+ }
+ default:
+ llvm_unreachable("Unknown regclass!");
}
}
@@ -886,26 +865,30 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
// tGPR is used sometimes in ARM instructions that need to avoid using
// certain registers. Just treat it as GPR here.
- if (RC == ARM::tGPRRegisterClass)
+ if (RC == ARM::tGPRRegisterClass || RC == ARM::tcGPRRegisterClass)
RC = ARM::GPRRegisterClass;
- if (RC == ARM::GPRRegisterClass) {
+ switch (RC->getID()) {
+ case ARM::GPRRegClassID:
AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::LDR), DestReg)
.addFrameIndex(FI).addReg(0).addImm(0).addMemOperand(MMO));
- } else if (RC == ARM::SPRRegisterClass) {
+ break;
+ case ARM::SPRRegClassID:
AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDRS), DestReg)
.addFrameIndex(FI).addImm(0).addMemOperand(MMO));
- } else if (RC == ARM::DPRRegisterClass ||
- RC == ARM::DPR_VFP2RegisterClass ||
- RC == ARM::DPR_8RegisterClass) {
+ break;
+ case ARM::DPRRegClassID:
+ case ARM::DPR_VFP2RegClassID:
+ case ARM::DPR_8RegClassID:
AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDRD), DestReg)
.addFrameIndex(FI).addImm(0).addMemOperand(MMO));
- } else if (RC == ARM::QPRRegisterClass ||
- RC == ARM::QPR_VFP2RegisterClass ||
- RC == ARM::QPR_8RegisterClass) {
+ break;
+ case ARM::QPRRegClassID:
+ case ARM::QPR_VFP2RegClassID:
+ case ARM::QPR_8RegClassID:
if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) {
AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLD1q), DestReg)
- .addFrameIndex(FI).addImm(128)
+ .addFrameIndex(FI).addImm(16)
.addMemOperand(MMO));
} else {
AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDMQ), DestReg)
@@ -913,14 +896,16 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
.addImm(ARM_AM::getAM5Opc(ARM_AM::ia, 4))
.addMemOperand(MMO));
}
- } else if (RC == ARM::QQPRRegisterClass || RC == ARM::QQPR_VFP2RegisterClass){
+ break;
+ case ARM::QQPRRegClassID:
+ case ARM::QQPR_VFP2RegClassID:
if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) {
- MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(ARM::VLD2q32));
+ MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(ARM::VLD1d64Q));
MIB = AddDReg(MIB, DestReg, ARM::dsub_0, RegState::Define, TRI);
MIB = AddDReg(MIB, DestReg, ARM::dsub_1, RegState::Define, TRI);
MIB = AddDReg(MIB, DestReg, ARM::dsub_2, RegState::Define, TRI);
MIB = AddDReg(MIB, DestReg, ARM::dsub_3, RegState::Define, TRI);
- AddDefaultPred(MIB.addFrameIndex(FI).addImm(128).addMemOperand(MMO));
+ AddDefaultPred(MIB.addFrameIndex(FI).addImm(16).addMemOperand(MMO));
} else {
MachineInstrBuilder MIB =
AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDMD))
@@ -932,21 +917,25 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
MIB = AddDReg(MIB, DestReg, ARM::dsub_2, RegState::Define, TRI);
AddDReg(MIB, DestReg, ARM::dsub_3, RegState::Define, TRI);
}
- } else {
- assert(RC == ARM::QQQQPRRegisterClass && "Unknown regclass!");
- MachineInstrBuilder MIB =
- AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDMD))
- .addFrameIndex(FI)
- .addImm(ARM_AM::getAM5Opc(ARM_AM::ia, 4)))
- .addMemOperand(MMO);
- MIB = AddDReg(MIB, DestReg, ARM::dsub_0, RegState::Define, TRI);
- MIB = AddDReg(MIB, DestReg, ARM::dsub_1, RegState::Define, TRI);
- MIB = AddDReg(MIB, DestReg, ARM::dsub_2, RegState::Define, TRI);
- MIB = AddDReg(MIB, DestReg, ARM::dsub_3, RegState::Define, TRI);
- MIB = AddDReg(MIB, DestReg, ARM::dsub_4, RegState::Define, TRI);
- MIB = AddDReg(MIB, DestReg, ARM::dsub_5, RegState::Define, TRI);
- MIB = AddDReg(MIB, DestReg, ARM::dsub_6, RegState::Define, TRI);
- AddDReg(MIB, DestReg, ARM::dsub_7, RegState::Define, TRI);
+ break;
+ case ARM::QQQQPRRegClassID: {
+ MachineInstrBuilder MIB =
+ AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDMD))
+ .addFrameIndex(FI)
+ .addImm(ARM_AM::getAM5Opc(ARM_AM::ia, 4)))
+ .addMemOperand(MMO);
+ MIB = AddDReg(MIB, DestReg, ARM::dsub_0, RegState::Define, TRI);
+ MIB = AddDReg(MIB, DestReg, ARM::dsub_1, RegState::Define, TRI);
+ MIB = AddDReg(MIB, DestReg, ARM::dsub_2, RegState::Define, TRI);
+ MIB = AddDReg(MIB, DestReg, ARM::dsub_3, RegState::Define, TRI);
+ MIB = AddDReg(MIB, DestReg, ARM::dsub_4, RegState::Define, TRI);
+ MIB = AddDReg(MIB, DestReg, ARM::dsub_5, RegState::Define, TRI);
+ MIB = AddDReg(MIB, DestReg, ARM::dsub_6, RegState::Define, TRI);
+ AddDReg(MIB, DestReg, ARM::dsub_7, RegState::Define, TRI);
+ break;
+ }
+ default:
+ llvm_unreachable("Unknown regclass!");
}
}
@@ -960,223 +949,6 @@ ARMBaseInstrInfo::emitFrameIndexDebugValue(MachineFunction &MF,
return &*MIB;
}
-MachineInstr *ARMBaseInstrInfo::
-foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI,
- const SmallVectorImpl<unsigned> &Ops, int FI) const {
- if (Ops.size() != 1) return NULL;
-
- unsigned OpNum = Ops[0];
- unsigned Opc = MI->getOpcode();
- MachineInstr *NewMI = NULL;
- if (Opc == ARM::MOVr || Opc == ARM::t2MOVr) {
- // If it is updating CPSR, then it cannot be folded.
- if (MI->getOperand(4).getReg() == ARM::CPSR && !MI->getOperand(4).isDead())
- return NULL;
- unsigned Pred = MI->getOperand(2).getImm();
- unsigned PredReg = MI->getOperand(3).getReg();
- if (OpNum == 0) { // move -> store
- unsigned SrcReg = MI->getOperand(1).getReg();
- unsigned SrcSubReg = MI->getOperand(1).getSubReg();
- bool isKill = MI->getOperand(1).isKill();
- bool isUndef = MI->getOperand(1).isUndef();
- if (Opc == ARM::MOVr)
- NewMI = BuildMI(MF, MI->getDebugLoc(), get(ARM::STR))
- .addReg(SrcReg,
- getKillRegState(isKill) | getUndefRegState(isUndef),
- SrcSubReg)
- .addFrameIndex(FI).addReg(0).addImm(0).addImm(Pred).addReg(PredReg);
- else // ARM::t2MOVr
- NewMI = BuildMI(MF, MI->getDebugLoc(), get(ARM::t2STRi12))
- .addReg(SrcReg,
- getKillRegState(isKill) | getUndefRegState(isUndef),
- SrcSubReg)
- .addFrameIndex(FI).addImm(0).addImm(Pred).addReg(PredReg);
- } else { // move -> load
- unsigned DstReg = MI->getOperand(0).getReg();
- unsigned DstSubReg = MI->getOperand(0).getSubReg();
- bool isDead = MI->getOperand(0).isDead();
- bool isUndef = MI->getOperand(0).isUndef();
- if (Opc == ARM::MOVr)
- NewMI = BuildMI(MF, MI->getDebugLoc(), get(ARM::LDR))
- .addReg(DstReg,
- RegState::Define |
- getDeadRegState(isDead) |
- getUndefRegState(isUndef), DstSubReg)
- .addFrameIndex(FI).addReg(0).addImm(0).addImm(Pred).addReg(PredReg);
- else // ARM::t2MOVr
- NewMI = BuildMI(MF, MI->getDebugLoc(), get(ARM::t2LDRi12))
- .addReg(DstReg,
- RegState::Define |
- getDeadRegState(isDead) |
- getUndefRegState(isUndef), DstSubReg)
- .addFrameIndex(FI).addImm(0).addImm(Pred).addReg(PredReg);
- }
- } else if (Opc == ARM::tMOVgpr2gpr ||
- Opc == ARM::tMOVtgpr2gpr ||
- Opc == ARM::tMOVgpr2tgpr) {
- if (OpNum == 0) { // move -> store
- unsigned SrcReg = MI->getOperand(1).getReg();
- unsigned SrcSubReg = MI->getOperand(1).getSubReg();
- bool isKill = MI->getOperand(1).isKill();
- bool isUndef = MI->getOperand(1).isUndef();
- NewMI = BuildMI(MF, MI->getDebugLoc(), get(ARM::t2STRi12))
- .addReg(SrcReg,
- getKillRegState(isKill) | getUndefRegState(isUndef),
- SrcSubReg)
- .addFrameIndex(FI).addImm(0).addImm(ARMCC::AL).addReg(0);
- } else { // move -> load
- unsigned DstReg = MI->getOperand(0).getReg();
- unsigned DstSubReg = MI->getOperand(0).getSubReg();
- bool isDead = MI->getOperand(0).isDead();
- bool isUndef = MI->getOperand(0).isUndef();
- NewMI = BuildMI(MF, MI->getDebugLoc(), get(ARM::t2LDRi12))
- .addReg(DstReg,
- RegState::Define |
- getDeadRegState(isDead) |
- getUndefRegState(isUndef),
- DstSubReg)
- .addFrameIndex(FI).addImm(0).addImm(ARMCC::AL).addReg(0);
- }
- } else if (Opc == ARM::VMOVS) {
- unsigned Pred = MI->getOperand(2).getImm();
- unsigned PredReg = MI->getOperand(3).getReg();
- if (OpNum == 0) { // move -> store
- unsigned SrcReg = MI->getOperand(1).getReg();
- unsigned SrcSubReg = MI->getOperand(1).getSubReg();
- bool isKill = MI->getOperand(1).isKill();
- bool isUndef = MI->getOperand(1).isUndef();
- NewMI = BuildMI(MF, MI->getDebugLoc(), get(ARM::VSTRS))
- .addReg(SrcReg, getKillRegState(isKill) | getUndefRegState(isUndef),
- SrcSubReg)
- .addFrameIndex(FI)
- .addImm(0).addImm(Pred).addReg(PredReg);
- } else { // move -> load
- unsigned DstReg = MI->getOperand(0).getReg();
- unsigned DstSubReg = MI->getOperand(0).getSubReg();
- bool isDead = MI->getOperand(0).isDead();
- bool isUndef = MI->getOperand(0).isUndef();
- NewMI = BuildMI(MF, MI->getDebugLoc(), get(ARM::VLDRS))
- .addReg(DstReg,
- RegState::Define |
- getDeadRegState(isDead) |
- getUndefRegState(isUndef),
- DstSubReg)
- .addFrameIndex(FI).addImm(0).addImm(Pred).addReg(PredReg);
- }
- } else if (Opc == ARM::VMOVD || Opc == ARM::VMOVDneon) {
- unsigned Pred = MI->getOperand(2).getImm();
- unsigned PredReg = MI->getOperand(3).getReg();
- if (OpNum == 0) { // move -> store
- unsigned SrcReg = MI->getOperand(1).getReg();
- unsigned SrcSubReg = MI->getOperand(1).getSubReg();
- bool isKill = MI->getOperand(1).isKill();
- bool isUndef = MI->getOperand(1).isUndef();
- NewMI = BuildMI(MF, MI->getDebugLoc(), get(ARM::VSTRD))
- .addReg(SrcReg,
- getKillRegState(isKill) | getUndefRegState(isUndef),
- SrcSubReg)
- .addFrameIndex(FI).addImm(0).addImm(Pred).addReg(PredReg);
- } else { // move -> load
- unsigned DstReg = MI->getOperand(0).getReg();
- unsigned DstSubReg = MI->getOperand(0).getSubReg();
- bool isDead = MI->getOperand(0).isDead();
- bool isUndef = MI->getOperand(0).isUndef();
- NewMI = BuildMI(MF, MI->getDebugLoc(), get(ARM::VLDRD))
- .addReg(DstReg,
- RegState::Define |
- getDeadRegState(isDead) |
- getUndefRegState(isUndef),
- DstSubReg)
- .addFrameIndex(FI).addImm(0).addImm(Pred).addReg(PredReg);
- }
- } else if (Opc == ARM::VMOVQ) {
- MachineFrameInfo &MFI = *MF.getFrameInfo();
- unsigned Pred = MI->getOperand(2).getImm();
- unsigned PredReg = MI->getOperand(3).getReg();
- if (OpNum == 0) { // move -> store
- unsigned SrcReg = MI->getOperand(1).getReg();
- unsigned SrcSubReg = MI->getOperand(1).getSubReg();
- bool isKill = MI->getOperand(1).isKill();
- bool isUndef = MI->getOperand(1).isUndef();
- if (MFI.getObjectAlignment(FI) >= 16 &&
- getRegisterInfo().canRealignStack(MF)) {
- NewMI = BuildMI(MF, MI->getDebugLoc(), get(ARM::VST1q))
- .addFrameIndex(FI).addImm(128)
- .addReg(SrcReg,
- getKillRegState(isKill) | getUndefRegState(isUndef),
- SrcSubReg)
- .addImm(Pred).addReg(PredReg);
- } else {
- NewMI = BuildMI(MF, MI->getDebugLoc(), get(ARM::VSTMQ))
- .addReg(SrcReg,
- getKillRegState(isKill) | getUndefRegState(isUndef),
- SrcSubReg)
- .addFrameIndex(FI).addImm(ARM_AM::getAM5Opc(ARM_AM::ia, 4))
- .addImm(Pred).addReg(PredReg);
- }
- } else { // move -> load
- unsigned DstReg = MI->getOperand(0).getReg();
- unsigned DstSubReg = MI->getOperand(0).getSubReg();
- bool isDead = MI->getOperand(0).isDead();
- bool isUndef = MI->getOperand(0).isUndef();
- if (MFI.getObjectAlignment(FI) >= 16 &&
- getRegisterInfo().canRealignStack(MF)) {
- NewMI = BuildMI(MF, MI->getDebugLoc(), get(ARM::VLD1q))
- .addReg(DstReg,
- RegState::Define |
- getDeadRegState(isDead) |
- getUndefRegState(isUndef),
- DstSubReg)
- .addFrameIndex(FI).addImm(128).addImm(Pred).addReg(PredReg);
- } else {
- NewMI = BuildMI(MF, MI->getDebugLoc(), get(ARM::VLDMQ))
- .addReg(DstReg,
- RegState::Define |
- getDeadRegState(isDead) |
- getUndefRegState(isUndef),
- DstSubReg)
- .addFrameIndex(FI).addImm(ARM_AM::getAM5Opc(ARM_AM::ia, 4))
- .addImm(Pred).addReg(PredReg);
- }
- }
- }
-
- return NewMI;
-}
-
-MachineInstr*
-ARMBaseInstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
- MachineInstr* MI,
- const SmallVectorImpl<unsigned> &Ops,
- MachineInstr* LoadMI) const {
- // FIXME
- return 0;
-}
-
-bool
-ARMBaseInstrInfo::canFoldMemoryOperand(const MachineInstr *MI,
- const SmallVectorImpl<unsigned> &Ops) const {
- if (Ops.size() != 1) return false;
-
- unsigned Opc = MI->getOpcode();
- if (Opc == ARM::MOVr || Opc == ARM::t2MOVr) {
- // If it is updating CPSR, then it cannot be folded.
- return MI->getOperand(4).getReg() != ARM::CPSR ||
- MI->getOperand(4).isDead();
- } else if (Opc == ARM::tMOVgpr2gpr ||
- Opc == ARM::tMOVtgpr2gpr ||
- Opc == ARM::tMOVgpr2tgpr) {
- return true;
- } else if (Opc == ARM::VMOVS || Opc == ARM::VMOVD ||
- Opc == ARM::VMOVDneon || Opc == ARM::VMOVQ) {
- return true;
- }
-
- // FIXME: VMOVQQ and VMOVQQQQ?
-
- return false;
-}
-
/// Create a copy of a const pool value. Update CPI to the new index and return
/// the label UID.
static unsigned duplicateCPV(MachineFunction &MF, unsigned &CPI) {
@@ -1211,17 +983,12 @@ reMaterialize(MachineBasicBlock &MBB,
MachineBasicBlock::iterator I,
unsigned DestReg, unsigned SubIdx,
const MachineInstr *Orig,
- const TargetRegisterInfo *TRI) const {
- if (SubIdx && TargetRegisterInfo::isPhysicalRegister(DestReg)) {
- DestReg = TRI->getSubReg(DestReg, SubIdx);
- SubIdx = 0;
- }
-
+ const TargetRegisterInfo &TRI) const {
unsigned Opcode = Orig->getOpcode();
switch (Opcode) {
default: {
MachineInstr *MI = MBB.getParent()->CloneMachineInstr(Orig);
- MI->getOperand(0).setReg(DestReg);
+ MI->substituteRegister(Orig->getOperand(0).getReg(), DestReg, SubIdx, TRI);
MBB.insert(I, MI);
break;
}
@@ -1237,9 +1004,6 @@ reMaterialize(MachineBasicBlock &MBB,
break;
}
}
-
- MachineInstr *NewMI = prior(I);
- NewMI->getOperand(0).setSubReg(SubIdx);
}
MachineInstr *
@@ -1291,6 +1055,165 @@ bool ARMBaseInstrInfo::produceSameValue(const MachineInstr *MI0,
return MI0->isIdenticalTo(MI1, MachineInstr::IgnoreVRegDefs);
}
+/// areLoadsFromSameBasePtr - This is used by the pre-regalloc scheduler to
+/// determine if two loads are loading from the same base address. It should
+/// only return true if the base pointers are the same and the only differences
+/// between the two addresses is the offset. It also returns the offsets by
+/// reference.
+bool ARMBaseInstrInfo::areLoadsFromSameBasePtr(SDNode *Load1, SDNode *Load2,
+ int64_t &Offset1,
+ int64_t &Offset2) const {
+ // Don't worry about Thumb: just ARM and Thumb2.
+ if (Subtarget.isThumb1Only()) return false;
+
+ if (!Load1->isMachineOpcode() || !Load2->isMachineOpcode())
+ return false;
+
+ switch (Load1->getMachineOpcode()) {
+ default:
+ return false;
+ case ARM::LDR:
+ case ARM::LDRB:
+ case ARM::LDRD:
+ case ARM::LDRH:
+ case ARM::LDRSB:
+ case ARM::LDRSH:
+ case ARM::VLDRD:
+ case ARM::VLDRS:
+ case ARM::t2LDRi8:
+ case ARM::t2LDRDi8:
+ case ARM::t2LDRSHi8:
+ case ARM::t2LDRi12:
+ case ARM::t2LDRSHi12:
+ break;
+ }
+
+ switch (Load2->getMachineOpcode()) {
+ default:
+ return false;
+ case ARM::LDR:
+ case ARM::LDRB:
+ case ARM::LDRD:
+ case ARM::LDRH:
+ case ARM::LDRSB:
+ case ARM::LDRSH:
+ case ARM::VLDRD:
+ case ARM::VLDRS:
+ case ARM::t2LDRi8:
+ case ARM::t2LDRDi8:
+ case ARM::t2LDRSHi8:
+ case ARM::t2LDRi12:
+ case ARM::t2LDRSHi12:
+ break;
+ }
+
+ // Check if base addresses and chain operands match.
+ if (Load1->getOperand(0) != Load2->getOperand(0) ||
+ Load1->getOperand(4) != Load2->getOperand(4))
+ return false;
+
+ // Index should be Reg0.
+ if (Load1->getOperand(3) != Load2->getOperand(3))
+ return false;
+
+ // Determine the offsets.
+ if (isa<ConstantSDNode>(Load1->getOperand(1)) &&
+ isa<ConstantSDNode>(Load2->getOperand(1))) {
+ Offset1 = cast<ConstantSDNode>(Load1->getOperand(1))->getSExtValue();
+ Offset2 = cast<ConstantSDNode>(Load2->getOperand(1))->getSExtValue();
+ return true;
+ }
+
+ return false;
+}
+
+/// shouldScheduleLoadsNear - This is a used by the pre-regalloc scheduler to
+/// determine (in conjuction with areLoadsFromSameBasePtr) if two loads should
+/// be scheduled togther. On some targets if two loads are loading from
+/// addresses in the same cache line, it's better if they are scheduled
+/// together. This function takes two integers that represent the load offsets
+/// from the common base address. It returns true if it decides it's desirable
+/// to schedule the two loads together. "NumLoads" is the number of loads that
+/// have already been scheduled after Load1.
+bool ARMBaseInstrInfo::shouldScheduleLoadsNear(SDNode *Load1, SDNode *Load2,
+ int64_t Offset1, int64_t Offset2,
+ unsigned NumLoads) const {
+ // Don't worry about Thumb: just ARM and Thumb2.
+ if (Subtarget.isThumb1Only()) return false;
+
+ assert(Offset2 > Offset1);
+
+ if ((Offset2 - Offset1) / 8 > 64)
+ return false;
+
+ if (Load1->getMachineOpcode() != Load2->getMachineOpcode())
+ return false; // FIXME: overly conservative?
+
+ // Four loads in a row should be sufficient.
+ if (NumLoads >= 3)
+ return false;
+
+ return true;
+}
+
+bool ARMBaseInstrInfo::isSchedulingBoundary(const MachineInstr *MI,
+ const MachineBasicBlock *MBB,
+ const MachineFunction &MF) const {
+ // Debug info is never a scheduling boundary. It's necessary to be explicit
+ // due to the special treatment of IT instructions below, otherwise a
+ // dbg_value followed by an IT will result in the IT instruction being
+ // considered a scheduling hazard, which is wrong. It should be the actual
+ // instruction preceding the dbg_value instruction(s), just like it is
+ // when debug info is not present.
+ if (MI->isDebugValue())
+ return false;
+
+ // Terminators and labels can't be scheduled around.
+ if (MI->getDesc().isTerminator() || MI->isLabel())
+ return true;
+
+ // Treat the start of the IT block as a scheduling boundary, but schedule
+ // t2IT along with all instructions following it.
+ // FIXME: This is a big hammer. But the alternative is to add all potential
+ // true and anti dependencies to IT block instructions as implicit operands
+ // to the t2IT instruction. The added compile time and complexity does not
+ // seem worth it.
+ MachineBasicBlock::const_iterator I = MI;
+ // Make sure to skip any dbg_value instructions
+ while (++I != MBB->end() && I->isDebugValue())
+ ;
+ if (I != MBB->end() && I->getOpcode() == ARM::t2IT)
+ return true;
+
+ // Don't attempt to schedule around any instruction that defines
+ // a stack-oriented pointer, as it's unlikely to be profitable. This
+ // saves compile time, because it doesn't require every single
+ // stack slot reference to depend on the instruction that does the
+ // modification.
+ if (MI->definesRegister(ARM::SP))
+ return true;
+
+ return false;
+}
+
+bool ARMBaseInstrInfo::
+isProfitableToIfCvt(MachineBasicBlock &MBB, unsigned NumInstrs) const {
+ if (!NumInstrs)
+ return false;
+ if (Subtarget.getCPUString() == "generic")
+ // Generic (and overly aggressive) if-conversion limits for testing.
+ return NumInstrs <= 10;
+ else if (Subtarget.hasV7Ops())
+ return NumInstrs <= 3;
+ return NumInstrs <= 2;
+}
+
+bool ARMBaseInstrInfo::
+isProfitableToIfCvt(MachineBasicBlock &TMBB, unsigned NumT,
+ MachineBasicBlock &FMBB, unsigned NumF) const {
+ return NumT && NumF && NumT <= 2 && NumF <= 2;
+}
+
/// getInstrPredicate - If instruction is predicated, returns its predicate
/// condition, otherwise returns AL. It also returns the condition code
/// register by reference.
diff --git a/lib/Target/ARM/ARMBaseInstrInfo.h b/lib/Target/ARM/ARMBaseInstrInfo.h
index b566271c8db9..89a2db74a75e 100644
--- a/lib/Target/ARM/ARMBaseInstrInfo.h
+++ b/lib/Target/ARM/ARMBaseInstrInfo.h
@@ -116,11 +116,25 @@ namespace ARMII {
// Thumb format
ThumbFrm = 24 << FormShift,
- // NEON format
- NEONFrm = 25 << FormShift,
- NEONGetLnFrm = 26 << FormShift,
- NEONSetLnFrm = 27 << FormShift,
- NEONDupFrm = 28 << FormShift,
+ // Miscelleaneous format
+ MiscFrm = 25 << FormShift,
+
+ // NEON formats
+ NGetLnFrm = 26 << FormShift,
+ NSetLnFrm = 27 << FormShift,
+ NDupFrm = 28 << FormShift,
+ NLdStFrm = 29 << FormShift,
+ N1RegModImmFrm= 30 << FormShift,
+ N2RegFrm = 31 << FormShift,
+ NVCVTFrm = 32 << FormShift,
+ NVDupLnFrm = 33 << FormShift,
+ N2RegVShLFrm = 34 << FormShift,
+ N2RegVShRFrm = 35 << FormShift,
+ N3RegFrm = 36 << FormShift,
+ N3RegVShFrm = 37 << FormShift,
+ NVExtFrm = 38 << FormShift,
+ NVMulSLFrm = 39 << FormShift,
+ NVTBLFrm = 40 << FormShift,
//===------------------------------------------------------------------===//
// Misc flags.
@@ -213,7 +227,8 @@ public:
virtual unsigned RemoveBranch(MachineBasicBlock &MBB) const;
virtual unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
MachineBasicBlock *FBB,
- const SmallVectorImpl<MachineOperand> &Cond) const;
+ const SmallVectorImpl<MachineOperand> &Cond,
+ DebugLoc DL) const;
virtual
bool ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const;
@@ -258,12 +273,10 @@ public:
virtual unsigned isStoreToStackSlot(const MachineInstr *MI,
int &FrameIndex) const;
- virtual bool copyRegToReg(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator I,
- unsigned DestReg, unsigned SrcReg,
- const TargetRegisterClass *DestRC,
- const TargetRegisterClass *SrcRC,
- DebugLoc DL) const;
+ virtual void copyPhysReg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I, DebugLoc DL,
+ unsigned DestReg, unsigned SrcReg,
+ bool KillSrc) const;
virtual void storeRegToStackSlot(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI,
@@ -283,29 +296,51 @@ public:
const MDNode *MDPtr,
DebugLoc DL) const;
- virtual bool canFoldMemoryOperand(const MachineInstr *MI,
- const SmallVectorImpl<unsigned> &Ops) const;
-
- virtual MachineInstr* foldMemoryOperandImpl(MachineFunction &MF,
- MachineInstr* MI,
- const SmallVectorImpl<unsigned> &Ops,
- int FrameIndex) const;
-
- virtual MachineInstr* foldMemoryOperandImpl(MachineFunction &MF,
- MachineInstr* MI,
- const SmallVectorImpl<unsigned> &Ops,
- MachineInstr* LoadMI) const;
-
virtual void reMaterialize(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI,
unsigned DestReg, unsigned SubIdx,
const MachineInstr *Orig,
- const TargetRegisterInfo *TRI) const;
+ const TargetRegisterInfo &TRI) const;
MachineInstr *duplicate(MachineInstr *Orig, MachineFunction &MF) const;
virtual bool produceSameValue(const MachineInstr *MI0,
const MachineInstr *MI1) const;
+
+ /// areLoadsFromSameBasePtr - This is used by the pre-regalloc scheduler to
+ /// determine if two loads are loading from the same base address. It should
+ /// only return true if the base pointers are the same and the only
+ /// differences between the two addresses is the offset. It also returns the
+ /// offsets by reference.
+ virtual bool areLoadsFromSameBasePtr(SDNode *Load1, SDNode *Load2,
+ int64_t &Offset1, int64_t &Offset2)const;
+
+ /// shouldScheduleLoadsNear - This is a used by the pre-regalloc scheduler to
+ /// determine (in conjuction with areLoadsFromSameBasePtr) if two loads should
+ /// be scheduled togther. On some targets if two loads are loading from
+ /// addresses in the same cache line, it's better if they are scheduled
+ /// together. This function takes two integers that represent the load offsets
+ /// from the common base address. It returns true if it decides it's desirable
+ /// to schedule the two loads together. "NumLoads" is the number of loads that
+ /// have already been scheduled after Load1.
+ virtual bool shouldScheduleLoadsNear(SDNode *Load1, SDNode *Load2,
+ int64_t Offset1, int64_t Offset2,
+ unsigned NumLoads) const;
+
+ virtual bool isSchedulingBoundary(const MachineInstr *MI,
+ const MachineBasicBlock *MBB,
+ const MachineFunction &MF) const;
+
+ virtual bool isProfitableToIfCvt(MachineBasicBlock &MBB,
+ unsigned NumInstrs) const;
+
+ virtual bool isProfitableToIfCvt(MachineBasicBlock &TMBB,unsigned NumT,
+ MachineBasicBlock &FMBB,unsigned NumF) const;
+
+ virtual bool isProfitableToDupForIfCvt(MachineBasicBlock &MBB,
+ unsigned NumInstrs) const {
+ return NumInstrs && NumInstrs == 1;
+ }
};
static inline
diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.cpp b/lib/Target/ARM/ARMBaseRegisterInfo.cpp
index 82458d2347cc..182bd9937145 100644
--- a/lib/Target/ARM/ARMBaseRegisterInfo.cpp
+++ b/lib/Target/ARM/ARMBaseRegisterInfo.cpp
@@ -170,56 +170,6 @@ ARMBaseRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
return STI.isTargetDarwin() ? DarwinCalleeSavedRegs : CalleeSavedRegs;
}
-const TargetRegisterClass* const *
-ARMBaseRegisterInfo::getCalleeSavedRegClasses(const MachineFunction *MF) const {
- static const TargetRegisterClass * const CalleeSavedRegClasses[] = {
- &ARM::GPRRegClass, &ARM::GPRRegClass, &ARM::GPRRegClass,
- &ARM::GPRRegClass, &ARM::GPRRegClass, &ARM::GPRRegClass,
- &ARM::GPRRegClass, &ARM::GPRRegClass, &ARM::GPRRegClass,
-
- &ARM::DPRRegClass, &ARM::DPRRegClass, &ARM::DPRRegClass, &ARM::DPRRegClass,
- &ARM::DPRRegClass, &ARM::DPRRegClass, &ARM::DPRRegClass, &ARM::DPRRegClass,
- 0
- };
-
- static const TargetRegisterClass * const ThumbCalleeSavedRegClasses[] = {
- &ARM::GPRRegClass, &ARM::GPRRegClass, &ARM::GPRRegClass,
- &ARM::GPRRegClass, &ARM::GPRRegClass, &ARM::tGPRRegClass,
- &ARM::tGPRRegClass,&ARM::tGPRRegClass,&ARM::tGPRRegClass,
-
- &ARM::DPRRegClass, &ARM::DPRRegClass, &ARM::DPRRegClass, &ARM::DPRRegClass,
- &ARM::DPRRegClass, &ARM::DPRRegClass, &ARM::DPRRegClass, &ARM::DPRRegClass,
- 0
- };
-
- static const TargetRegisterClass * const DarwinCalleeSavedRegClasses[] = {
- &ARM::GPRRegClass, &ARM::GPRRegClass, &ARM::GPRRegClass,
- &ARM::GPRRegClass, &ARM::GPRRegClass, &ARM::GPRRegClass,
- &ARM::GPRRegClass, &ARM::GPRRegClass,
-
- &ARM::DPRRegClass, &ARM::DPRRegClass, &ARM::DPRRegClass, &ARM::DPRRegClass,
- &ARM::DPRRegClass, &ARM::DPRRegClass, &ARM::DPRRegClass, &ARM::DPRRegClass,
- 0
- };
-
- static const TargetRegisterClass * const DarwinThumbCalleeSavedRegClasses[] ={
- &ARM::GPRRegClass, &ARM::tGPRRegClass, &ARM::tGPRRegClass,
- &ARM::tGPRRegClass, &ARM::tGPRRegClass, &ARM::GPRRegClass,
- &ARM::GPRRegClass, &ARM::GPRRegClass,
-
- &ARM::DPRRegClass, &ARM::DPRRegClass, &ARM::DPRRegClass, &ARM::DPRRegClass,
- &ARM::DPRRegClass, &ARM::DPRRegClass, &ARM::DPRRegClass, &ARM::DPRRegClass,
- 0
- };
-
- if (STI.isThumb1Only()) {
- return STI.isTargetDarwin()
- ? DarwinThumbCalleeSavedRegClasses : ThumbCalleeSavedRegClasses;
- }
- return STI.isTargetDarwin()
- ? DarwinCalleeSavedRegClasses : CalleeSavedRegClasses;
-}
-
BitVector ARMBaseRegisterInfo::
getReservedRegs(const MachineFunction &MF) const {
// FIXME: avoid re-calculating this everytime.
@@ -352,7 +302,7 @@ ARMBaseRegisterInfo::getMatchingSuperRegClass(const TargetRegisterClass *A,
}
bool
-ARMBaseRegisterInfo::canCombinedSubRegIndex(const TargetRegisterClass *RC,
+ARMBaseRegisterInfo::canCombineSubRegIndices(const TargetRegisterClass *RC,
SmallVectorImpl<unsigned> &SubIndices,
unsigned &NewSubIdx) const {
@@ -724,6 +674,15 @@ ARMBaseRegisterInfo::estimateRSStackSizeLimit(MachineFunction &MF) const {
I != E; ++I) {
for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) {
if (!I->getOperand(i).isFI()) continue;
+
+ // When using ADDri to get the address of a stack object, 255 is the
+ // largest offset guaranteed to fit in the immediate offset.
+ if (I->getOpcode() == ARM::ADDri) {
+ Limit = std::min(Limit, (1U << 8) - 1);
+ break;
+ }
+
+ // Otherwise check the addressing mode.
switch (I->getDesc().TSFlags & ARMII::AddrModeMask) {
case ARMII::AddrMode3:
case ARMII::AddrModeT2_i8:
@@ -765,6 +724,7 @@ ARMBaseRegisterInfo::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
SmallVector<unsigned, 4> UnspilledCS1GPRs;
SmallVector<unsigned, 4> UnspilledCS2GPRs;
ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
// Spill R4 if Thumb2 function requires stack realignment - it will be used as
// scratch register.
@@ -780,7 +740,6 @@ ARMBaseRegisterInfo::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
// Don't spill FP if the frame can be eliminated. This is determined
// by scanning the callee-save registers to see if any is used.
const unsigned *CSRegs = getCalleeSavedRegs();
- const TargetRegisterClass* const *CSRegClasses = getCalleeSavedRegClasses();
for (unsigned i = 0; CSRegs[i]; ++i) {
unsigned Reg = CSRegs[i];
bool Spilled = false;
@@ -798,50 +757,50 @@ ARMBaseRegisterInfo::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
}
}
- if (CSRegClasses[i] == ARM::GPRRegisterClass ||
- CSRegClasses[i] == ARM::tGPRRegisterClass) {
- if (Spilled) {
- NumGPRSpills++;
+ if (!ARM::GPRRegisterClass->contains(Reg))
+ continue;
- if (!STI.isTargetDarwin()) {
- if (Reg == ARM::LR)
- LRSpilled = true;
- CS1Spilled = true;
- continue;
- }
+ if (Spilled) {
+ NumGPRSpills++;
- // Keep track if LR and any of R4, R5, R6, and R7 is spilled.
- switch (Reg) {
- case ARM::LR:
+ if (!STI.isTargetDarwin()) {
+ if (Reg == ARM::LR)
LRSpilled = true;
- // Fallthrough
- case ARM::R4:
- case ARM::R5:
- case ARM::R6:
- case ARM::R7:
- CS1Spilled = true;
- break;
- default:
- break;
- }
- } else {
- if (!STI.isTargetDarwin()) {
- UnspilledCS1GPRs.push_back(Reg);
- continue;
- }
+ CS1Spilled = true;
+ continue;
+ }
- switch (Reg) {
- case ARM::R4:
- case ARM::R5:
- case ARM::R6:
- case ARM::R7:
- case ARM::LR:
- UnspilledCS1GPRs.push_back(Reg);
- break;
- default:
- UnspilledCS2GPRs.push_back(Reg);
- break;
- }
+ // Keep track if LR and any of R4, R5, R6, and R7 is spilled.
+ switch (Reg) {
+ case ARM::LR:
+ LRSpilled = true;
+ // Fallthrough
+ case ARM::R4:
+ case ARM::R5:
+ case ARM::R6:
+ case ARM::R7:
+ CS1Spilled = true;
+ break;
+ default:
+ break;
+ }
+ } else {
+ if (!STI.isTargetDarwin()) {
+ UnspilledCS1GPRs.push_back(Reg);
+ continue;
+ }
+
+ switch (Reg) {
+ case ARM::R4:
+ case ARM::R5:
+ case ARM::R6:
+ case ARM::R7:
+ case ARM::LR:
+ UnspilledCS1GPRs.push_back(Reg);
+ break;
+ default:
+ UnspilledCS2GPRs.push_back(Reg);
+ break;
}
}
}
@@ -862,9 +821,16 @@ ARMBaseRegisterInfo::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
// offset, make sure a register (or a spill slot) is available for the
// register scavenger. Note that if we're indexing off the frame pointer, the
// effective stack size is 4 bytes larger since the FP points to the stack
- // slot of the previous FP.
- bool BigStack = RS &&
- estimateStackSize(MF) + (hasFP(MF) ? 4 : 0) >= estimateRSStackSizeLimit(MF);
+ // slot of the previous FP. Also, if we have variable sized objects in the
+ // function, stack slot references will often be negative, and some of
+ // our instructions are positive-offset only, so conservatively consider
+ // that case to want a spill slot (or register) as well.
+ // FIXME: We could add logic to be more precise about negative offsets
+ // and which instructions will need a scratch register for them. Is it
+ // worth the effort and added fragility?
+ bool BigStack =
+ (RS && (estimateStackSize(MF) + (hasFP(MF) ? 4:0) >=
+ estimateRSStackSizeLimit(MF))) || MFI->hasVarSizedObjects();
bool ExtraCSSpill = false;
if (BigStack || !CanEliminateFrame || cannotEliminateFrame(MF)) {
@@ -957,7 +923,6 @@ ARMBaseRegisterInfo::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
// note: Thumb1 functions spill to R12, not the stack. Reserve a slot
// closest to SP or frame pointer.
const TargetRegisterClass *RC = ARM::GPRRegisterClass;
- MachineFrameInfo *MFI = MF.getFrameInfo();
RS->setScavengingFrameIndex(MFI->CreateStackObject(RC->getSize(),
RC->getAlignment(),
false));
@@ -1622,6 +1587,7 @@ emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const {
MachineBasicBlock::iterator MBBI = prior(MBB.end());
assert(MBBI->getDesc().isReturn() &&
"Can only insert epilog into returning blocks");
+ unsigned RetOpcode = MBBI->getOpcode();
DebugLoc dl = MBBI->getDebugLoc();
MachineFrameInfo *MFI = MF.getFrameInfo();
ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
@@ -1696,6 +1662,39 @@ emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const {
emitSPUpdate(isARM, MBB, MBBI, dl, TII, AFI->getGPRCalleeSavedArea1Size());
}
+ if (RetOpcode == ARM::TCRETURNdi || RetOpcode == ARM::TCRETURNdiND ||
+ RetOpcode == ARM::TCRETURNri || RetOpcode == ARM::TCRETURNriND) {
+ // Tail call return: adjust the stack pointer and jump to callee.
+ MBBI = prior(MBB.end());
+ MachineOperand &JumpTarget = MBBI->getOperand(0);
+
+ // Jump to label or value in register.
+ if (RetOpcode == ARM::TCRETURNdi) {
+ BuildMI(MBB, MBBI, dl,
+ TII.get(STI.isThumb() ? ARM::TAILJMPdt : ARM::TAILJMPd)).
+ addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset(),
+ JumpTarget.getTargetFlags());
+ } else if (RetOpcode == ARM::TCRETURNdiND) {
+ BuildMI(MBB, MBBI, dl,
+ TII.get(STI.isThumb() ? ARM::TAILJMPdNDt : ARM::TAILJMPdND)).
+ addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset(),
+ JumpTarget.getTargetFlags());
+ } else if (RetOpcode == ARM::TCRETURNri) {
+ BuildMI(MBB, MBBI, dl, TII.get(ARM::TAILJMPr)).
+ addReg(JumpTarget.getReg(), RegState::Kill);
+ } else if (RetOpcode == ARM::TCRETURNriND) {
+ BuildMI(MBB, MBBI, dl, TII.get(ARM::TAILJMPrND)).
+ addReg(JumpTarget.getReg(), RegState::Kill);
+ }
+
+ MachineInstr *NewMI = prior(MBBI);
+ for (unsigned i = 1, e = MBBI->getNumOperands(); i != e; ++i)
+ NewMI->addOperand(MBBI->getOperand(i));
+
+ // Delete the pseudo instruction TCRETURN.
+ MBB.erase(MBBI);
+ }
+
if (VARegSaveSize)
emitSPUpdate(isARM, MBB, MBBI, dl, TII, VARegSaveSize);
}
diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.h b/lib/Target/ARM/ARMBaseRegisterInfo.h
index 2c9c82d0318b..f7ee0d5cc66d 100644
--- a/lib/Target/ARM/ARMBaseRegisterInfo.h
+++ b/lib/Target/ARM/ARMBaseRegisterInfo.h
@@ -69,9 +69,6 @@ public:
/// Code Generation virtual methods...
const unsigned *getCalleeSavedRegs(const MachineFunction *MF = 0) const;
- const TargetRegisterClass* const*
- getCalleeSavedRegClasses(const MachineFunction *MF = 0) const;
-
BitVector getReservedRegs(const MachineFunction &MF) const;
/// getMatchingSuperRegClass - Return a subclass of the specified register
@@ -81,14 +78,15 @@ public:
getMatchingSuperRegClass(const TargetRegisterClass *A,
const TargetRegisterClass *B, unsigned Idx) const;
- /// canCombinedSubRegIndex - Given a register class and a list of sub-register
- /// indices, return true if it's possible to combine the sub-register indices
- /// into one that corresponds to a larger sub-register. Return the new sub-
- /// register index by reference. Note the new index by be zero if the given
- /// sub-registers combined to form the whole register.
- virtual bool canCombinedSubRegIndex(const TargetRegisterClass *RC,
- SmallVectorImpl<unsigned> &SubIndices,
- unsigned &NewSubIdx) const;
+ /// canCombineSubRegIndices - Given a register class and a list of
+ /// subregister indices, return true if it's possible to combine the
+ /// subregister indices into one that corresponds to a larger
+ /// subregister. Return the new subregister index by reference. Note the
+ /// new index may be zero if the given subregisters can be combined to
+ /// form the whole register.
+ virtual bool canCombineSubRegIndices(const TargetRegisterClass *RC,
+ SmallVectorImpl<unsigned> &SubIndices,
+ unsigned &NewSubIdx) const;
const TargetRegisterClass *getPointerRegClass(unsigned Kind = 0) const;
@@ -150,8 +148,8 @@ public:
virtual bool canSimplifyCallFramePseudos(MachineFunction &MF) const;
virtual void eliminateCallFramePseudoInstr(MachineFunction &MF,
- MachineBasicBlock &MBB,
- MachineBasicBlock::iterator I) const;
+ MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I) const;
virtual unsigned eliminateFrameIndex(MachineBasicBlock::iterator II,
int SPAdj, FrameIndexValue *Value = NULL,
diff --git a/lib/Target/ARM/ARMCodeEmitter.cpp b/lib/Target/ARM/ARMCodeEmitter.cpp
index f2730fc8a5cb..7895cb071922 100644
--- a/lib/Target/ARM/ARMCodeEmitter.cpp
+++ b/lib/Target/ARM/ARMCodeEmitter.cpp
@@ -55,6 +55,7 @@ namespace {
const std::vector<MachineConstantPoolEntry> *MCPEs;
const std::vector<MachineJumpTableEntry> *MJTEs;
bool IsPIC;
+ bool IsThumb;
void getAnalysisUsage(AnalysisUsage &AU) const {
AU.addRequired<MachineModuleInfo>();
@@ -67,8 +68,8 @@ namespace {
: MachineFunctionPass(&ID), JTI(0),
II((const ARMInstrInfo *)tm.getInstrInfo()),
TD(tm.getTargetData()), TM(tm),
- MCE(mce), MCPEs(0), MJTEs(0),
- IsPIC(TM.getRelocationModel() == Reloc::PIC_) {}
+ MCE(mce), MCPEs(0), MJTEs(0),
+ IsPIC(TM.getRelocationModel() == Reloc::PIC_), IsThumb(false) {}
/// getBinaryCodeForInstr - This function, generated by the
/// CodeEmitterGenerator using TableGen, produces the binary encoding for
@@ -139,6 +140,12 @@ namespace {
void emitMiscInstruction(const MachineInstr &MI);
+ void emitNEONLaneInstruction(const MachineInstr &MI);
+ void emitNEONDupInstruction(const MachineInstr &MI);
+ void emitNEON1RegModImmInstruction(const MachineInstr &MI);
+ void emitNEON2RegInstruction(const MachineInstr &MI);
+ void emitNEON3RegInstruction(const MachineInstr &MI);
+
/// getMachineOpValue - Return binary encoding of operand. If the machine
/// operand requires relocation, record the relocation and return zero.
unsigned getMachineOpValue(const MachineInstr &MI,const MachineOperand &MO);
@@ -147,7 +154,8 @@ namespace {
}
/// getMovi32Value - Return binary encoding of operand for movw/movt. If the
- /// machine operand requires relocation, record the relocation and return zero.
+ /// machine operand requires relocation, record the relocation and return
+ /// zero.
unsigned getMovi32Value(const MachineInstr &MI,const MachineOperand &MO,
unsigned Reloc);
unsigned getMovi32Value(const MachineInstr &MI, unsigned OpIdx,
@@ -193,6 +201,7 @@ bool ARMCodeEmitter::runOnMachineFunction(MachineFunction &MF) {
MJTEs = 0;
if (MF.getJumpTableInfo()) MJTEs = &MF.getJumpTableInfo()->getJumpTables();
IsPIC = TM.getRelocationModel() == Reloc::PIC_;
+ IsThumb = MF.getInfo<ARMFunctionInfo>()->isThumbFunction();
JTI->Initialize(MF, IsPIC);
MMI = &getAnalysis<MachineModuleInfo>();
MCE.setModuleInfo(MMI);
@@ -347,7 +356,7 @@ void ARMCodeEmitter::emitInstruction(const MachineInstr &MI) {
MCE.processDebugLoc(MI.getDebugLoc(), true);
- NumEmitted++; // Keep track of the # of mi's emitted
+ ++NumEmitted; // Keep track of the # of mi's emitted
switch (MI.getDesc().TSFlags & ARMII::FormMask) {
default: {
llvm_unreachable("Unhandled instruction encoding format!");
@@ -407,6 +416,23 @@ void ARMCodeEmitter::emitInstruction(const MachineInstr &MI) {
case ARMII::VFPMiscFrm:
emitMiscInstruction(MI);
break;
+ // NEON instructions.
+ case ARMII::NGetLnFrm:
+ case ARMII::NSetLnFrm:
+ emitNEONLaneInstruction(MI);
+ break;
+ case ARMII::NDupFrm:
+ emitNEONDupInstruction(MI);
+ break;
+ case ARMII::N1RegModImmFrm:
+ emitNEON1RegModImmInstruction(MI);
+ break;
+ case ARMII::N2RegFrm:
+ emitNEON2RegInstruction(MI);
+ break;
+ case ARMII::N3RegFrm:
+ emitNEON3RegInstruction(MI);
+ break;
}
MCE.processDebugLoc(MI.getDebugLoc(), false);
}
@@ -1539,4 +1565,144 @@ void ARMCodeEmitter::emitMiscInstruction(const MachineInstr &MI) {
emitWordLE(Binary);
}
+static unsigned encodeNEONRd(const MachineInstr &MI, unsigned OpIdx) {
+ unsigned RegD = MI.getOperand(OpIdx).getReg();
+ unsigned Binary = 0;
+ RegD = ARMRegisterInfo::getRegisterNumbering(RegD);
+ Binary |= (RegD & 0xf) << ARMII::RegRdShift;
+ Binary |= ((RegD >> 4) & 1) << ARMII::D_BitShift;
+ return Binary;
+}
+
+static unsigned encodeNEONRn(const MachineInstr &MI, unsigned OpIdx) {
+ unsigned RegN = MI.getOperand(OpIdx).getReg();
+ unsigned Binary = 0;
+ RegN = ARMRegisterInfo::getRegisterNumbering(RegN);
+ Binary |= (RegN & 0xf) << ARMII::RegRnShift;
+ Binary |= ((RegN >> 4) & 1) << ARMII::N_BitShift;
+ return Binary;
+}
+
+static unsigned encodeNEONRm(const MachineInstr &MI, unsigned OpIdx) {
+ unsigned RegM = MI.getOperand(OpIdx).getReg();
+ unsigned Binary = 0;
+ RegM = ARMRegisterInfo::getRegisterNumbering(RegM);
+ Binary |= (RegM & 0xf);
+ Binary |= ((RegM >> 4) & 1) << ARMII::M_BitShift;
+ return Binary;
+}
+
+/// convertNEONDataProcToThumb - Convert the ARM mode encoding for a NEON
+/// data-processing instruction to the corresponding Thumb encoding.
+static unsigned convertNEONDataProcToThumb(unsigned Binary) {
+ assert((Binary & 0xfe000000) == 0xf2000000 &&
+ "not an ARM NEON data-processing instruction");
+ unsigned UBit = (Binary >> 24) & 1;
+ return 0xef000000 | (UBit << 28) | (Binary & 0xffffff);
+}
+
+void ARMCodeEmitter::emitNEONLaneInstruction(const MachineInstr &MI) {
+ unsigned Binary = getBinaryCodeForInstr(MI);
+
+ unsigned RegTOpIdx, RegNOpIdx, LnOpIdx;
+ const TargetInstrDesc &TID = MI.getDesc();
+ if ((TID.TSFlags & ARMII::FormMask) == ARMII::NGetLnFrm) {
+ RegTOpIdx = 0;
+ RegNOpIdx = 1;
+ LnOpIdx = 2;
+ } else { // ARMII::NSetLnFrm
+ RegTOpIdx = 2;
+ RegNOpIdx = 0;
+ LnOpIdx = 3;
+ }
+
+ // Set the conditional execution predicate
+ Binary |= (IsThumb ? ARMCC::AL : II->getPredicate(&MI)) << ARMII::CondShift;
+
+ unsigned RegT = MI.getOperand(RegTOpIdx).getReg();
+ RegT = ARMRegisterInfo::getRegisterNumbering(RegT);
+ Binary |= (RegT << ARMII::RegRdShift);
+ Binary |= encodeNEONRn(MI, RegNOpIdx);
+
+ unsigned LaneShift;
+ if ((Binary & (1 << 22)) != 0)
+ LaneShift = 0; // 8-bit elements
+ else if ((Binary & (1 << 5)) != 0)
+ LaneShift = 1; // 16-bit elements
+ else
+ LaneShift = 2; // 32-bit elements
+
+ unsigned Lane = MI.getOperand(LnOpIdx).getImm() << LaneShift;
+ unsigned Opc1 = Lane >> 2;
+ unsigned Opc2 = Lane & 3;
+ assert((Opc1 & 3) == 0 && "out-of-range lane number operand");
+ Binary |= (Opc1 << 21);
+ Binary |= (Opc2 << 5);
+
+ emitWordLE(Binary);
+}
+
+void ARMCodeEmitter::emitNEONDupInstruction(const MachineInstr &MI) {
+ unsigned Binary = getBinaryCodeForInstr(MI);
+
+ // Set the conditional execution predicate
+ Binary |= (IsThumb ? ARMCC::AL : II->getPredicate(&MI)) << ARMII::CondShift;
+
+ unsigned RegT = MI.getOperand(1).getReg();
+ RegT = ARMRegisterInfo::getRegisterNumbering(RegT);
+ Binary |= (RegT << ARMII::RegRdShift);
+ Binary |= encodeNEONRn(MI, 0);
+ emitWordLE(Binary);
+}
+
+void ARMCodeEmitter::emitNEON1RegModImmInstruction(const MachineInstr &MI) {
+ unsigned Binary = getBinaryCodeForInstr(MI);
+ // Destination register is encoded in Dd.
+ Binary |= encodeNEONRd(MI, 0);
+ // Immediate fields: Op, Cmode, I, Imm3, Imm4
+ unsigned Imm = MI.getOperand(1).getImm();
+ unsigned Op = (Imm >> 12) & 1;
+ unsigned Cmode = (Imm >> 8) & 0xf;
+ unsigned I = (Imm >> 7) & 1;
+ unsigned Imm3 = (Imm >> 4) & 0x7;
+ unsigned Imm4 = Imm & 0xf;
+ Binary |= (I << 24) | (Imm3 << 16) | (Cmode << 8) | (Op << 5) | Imm4;
+ if (IsThumb)
+ Binary = convertNEONDataProcToThumb(Binary);
+ emitWordLE(Binary);
+}
+
+void ARMCodeEmitter::emitNEON2RegInstruction(const MachineInstr &MI) {
+ const TargetInstrDesc &TID = MI.getDesc();
+ unsigned Binary = getBinaryCodeForInstr(MI);
+ // Destination register is encoded in Dd; source register in Dm.
+ unsigned OpIdx = 0;
+ Binary |= encodeNEONRd(MI, OpIdx++);
+ if (TID.getOperandConstraint(OpIdx, TOI::TIED_TO) != -1)
+ ++OpIdx;
+ Binary |= encodeNEONRm(MI, OpIdx);
+ if (IsThumb)
+ Binary = convertNEONDataProcToThumb(Binary);
+ // FIXME: This does not handle VDUPfdf or VDUPfqf.
+ emitWordLE(Binary);
+}
+
+void ARMCodeEmitter::emitNEON3RegInstruction(const MachineInstr &MI) {
+ const TargetInstrDesc &TID = MI.getDesc();
+ unsigned Binary = getBinaryCodeForInstr(MI);
+ // Destination register is encoded in Dd; source registers in Dn and Dm.
+ unsigned OpIdx = 0;
+ Binary |= encodeNEONRd(MI, OpIdx++);
+ if (TID.getOperandConstraint(OpIdx, TOI::TIED_TO) != -1)
+ ++OpIdx;
+ Binary |= encodeNEONRn(MI, OpIdx++);
+ if (TID.getOperandConstraint(OpIdx, TOI::TIED_TO) != -1)
+ ++OpIdx;
+ Binary |= encodeNEONRm(MI, OpIdx);
+ if (IsThumb)
+ Binary = convertNEONDataProcToThumb(Binary);
+ // FIXME: This does not handle VMOVDneon or VMOVQ.
+ emitWordLE(Binary);
+}
+
#include "ARMGenCodeEmitter.inc"
diff --git a/lib/Target/ARM/ARMConstantIslandPass.cpp b/lib/Target/ARM/ARMConstantIslandPass.cpp
index 13d8b74014c5..65a3da6f1617 100644
--- a/lib/Target/ARM/ARMConstantIslandPass.cpp
+++ b/lib/Target/ARM/ARMConstantIslandPass.cpp
@@ -337,7 +337,7 @@ bool ARMConstantIslands::runOnMachineFunction(MachineFunction &MF) {
if (CPChange && ++NoCPIters > 30)
llvm_unreachable("Constant Island pass failed to converge!");
DEBUG(dumpBBs());
-
+
// Clear NewWaterList now. If we split a block for branches, it should
// appear as "new water" for the next iteration of constant pool placement.
NewWaterList.clear();
@@ -361,8 +361,8 @@ bool ARMConstantIslands::runOnMachineFunction(MachineFunction &MF) {
// After a while, this might be made debug-only, but it is not expensive.
verify(MF);
- // If LR has been forced spilled and no far jumps (i.e. BL) has been issued.
- // Undo the spill / restore of LR if possible.
+ // If LR has been forced spilled and no far jump (i.e. BL) has been issued,
+ // undo the spill / restore of LR if possible.
if (isThumb && !HasFarJump && AFI->isLRSpilledForFarJump())
MadeChange |= UndoLRSpillRestore();
@@ -407,7 +407,7 @@ void ARMConstantIslands::DoInitialPlacement(MachineFunction &MF,
std::vector<CPEntry> CPEs;
CPEs.push_back(CPEntry(CPEMI, i));
CPEntries.push_back(CPEs);
- NumCPEs++;
+ ++NumCPEs;
DEBUG(errs() << "Moved CPI#" << i << " to end of function as #" << i
<< "\n");
}
@@ -418,7 +418,8 @@ void ARMConstantIslands::DoInitialPlacement(MachineFunction &MF,
static bool BBHasFallthrough(MachineBasicBlock *MBB) {
// Get the next machine basic block in the function.
MachineFunction::iterator MBBI = MBB;
- if (llvm::next(MBBI) == MBB->getParent()->end()) // Can't fall off end of function.
+ // Can't fall off end of function.
+ if (llvm::next(MBBI) == MBB->getParent()->end())
return false;
MachineBasicBlock *NextBB = llvm::next(MBBI);
@@ -491,6 +492,8 @@ void ARMConstantIslands::InitialFunctionScan(MachineFunction &MF,
unsigned MBBSize = 0;
for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
I != E; ++I) {
+ if (I->isDebugValue())
+ continue;
// Add instruction size to MBBSize.
MBBSize += TII->GetInstSizeInBytes(I);
@@ -722,7 +725,7 @@ MachineBasicBlock *ARMConstantIslands::SplitBlockBeforeInstr(MachineInstr *MI) {
// correspond to anything in the source.
unsigned Opc = isThumb ? (isThumb2 ? ARM::t2B : ARM::tB) : ARM::B;
BuildMI(OrigBB, DebugLoc(), TII->get(Opc)).addMBB(NewBB);
- NumSplit++;
+ ++NumSplit;
// Update the CFG. All succs of OrigBB are now succs of NewBB.
while (!OrigBB->succ_empty()) {
@@ -945,7 +948,7 @@ bool ARMConstantIslands::DecrementOldEntry(unsigned CPI, MachineInstr *CPEMI) {
if (--CPE->RefCount == 0) {
RemoveDeadCPEMI(CPEMI);
CPE->CPEMI = NULL;
- NumCPEs--;
+ --NumCPEs;
return true;
}
return false;
@@ -1246,7 +1249,7 @@ bool ARMConstantIslands::HandleConstantPoolUser(MachineFunction &MF,
U.CPEMI = BuildMI(NewIsland, DebugLoc(), TII->get(ARM::CONSTPOOL_ENTRY))
.addImm(ID).addConstantPoolIndex(CPI).addImm(Size);
CPEntries[CPI].push_back(CPEntry(U.CPEMI, ID, 1));
- NumCPEs++;
+ ++NumCPEs;
BBOffsets[NewIsland->getNumber()] = BBOffsets[NewMBB->getNumber()];
// Compensate for .align 2 in thumb mode.
@@ -1369,7 +1372,7 @@ ARMConstantIslands::FixUpUnconditionalBr(MachineFunction &MF, ImmBranch &Br) {
BBSizes[MBB->getNumber()] += 2;
AdjustBBOffsetsAfter(MBB, 2);
HasFarJump = true;
- NumUBrFixed++;
+ ++NumUBrFixed;
DEBUG(errs() << " Changed B to long jump " << *MI);
@@ -1402,7 +1405,7 @@ ARMConstantIslands::FixUpConditionalBr(MachineFunction &MF, ImmBranch &Br) {
MachineInstr *BMI = &MBB->back();
bool NeedSplit = (BMI != MI) || !BBHasFallthrough(MBB);
- NumCBrFixed++;
+ ++NumCBrFixed;
if (BMI != MI) {
if (llvm::next(MachineBasicBlock::iterator(MI)) == prior(MBB->end()) &&
BMI->getOpcode() == Br.UncondBr) {
@@ -1621,7 +1624,7 @@ bool ARMConstantIslands::OptimizeThumb2JumpTables(MachineFunction &MF) {
// constantpool tables?
MachineJumpTableInfo *MJTI = MF.getJumpTableInfo();
if (MJTI == 0) return false;
-
+
const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables();
for (unsigned i = 0, e = T2JumpTables.size(); i != e; ++i) {
MachineInstr *MI = T2JumpTables[i];
@@ -1658,15 +1661,25 @@ bool ARMConstantIslands::OptimizeThumb2JumpTables(MachineFunction &MF) {
continue;
unsigned IdxReg = MI->getOperand(1).getReg();
bool IdxRegKill = MI->getOperand(1).isKill();
+
+ // Scan backwards to find the instruction that defines the base
+ // register. Due to post-RA scheduling, we can't count on it
+ // immediately preceding the branch instruction.
MachineBasicBlock::iterator PrevI = MI;
- if (PrevI == MBB->begin())
+ MachineBasicBlock::iterator B = MBB->begin();
+ while (PrevI != B && !PrevI->definesRegister(BaseReg))
+ --PrevI;
+
+ // If for some reason we didn't find it, we can't do anything, so
+ // just skip this one.
+ if (!PrevI->definesRegister(BaseReg))
continue;
- MachineInstr *AddrMI = --PrevI;
+ MachineInstr *AddrMI = PrevI;
bool OptOk = true;
- // Examine the instruction that calculate the jumptable entry address.
- // If it's not the one just before the t2BR_JT, we won't delete it, then
- // it's not worth doing the optimization.
+ // Examine the instruction that calculates the jumptable entry address.
+ // Make sure it only defines the base register and kills any uses
+ // other than the index register.
for (unsigned k = 0, eee = AddrMI->getNumOperands(); k != eee; ++k) {
const MachineOperand &MO = AddrMI->getOperand(k);
if (!MO.isReg() || !MO.getReg())
@@ -1683,9 +1696,14 @@ bool ARMConstantIslands::OptimizeThumb2JumpTables(MachineFunction &MF) {
if (!OptOk)
continue;
- // The previous instruction should be a tLEApcrel or t2LEApcrelJT, we want
+ // Now scan back again to find the tLEApcrel or t2LEApcrelJT instruction
+ // that gave us the initial base register definition.
+ for (--PrevI; PrevI != B && !PrevI->definesRegister(BaseReg); --PrevI)
+ ;
+
+ // The instruction should be a tLEApcrel or t2LEApcrelJT; we want
// to delete it as well.
- MachineInstr *LeaMI = --PrevI;
+ MachineInstr *LeaMI = PrevI;
if ((LeaMI->getOpcode() != ARM::tLEApcrelJT &&
LeaMI->getOpcode() != ARM::t2LEApcrelJT) ||
LeaMI->getOperand(0).getReg() != BaseReg)
@@ -1729,7 +1747,7 @@ bool ARMConstantIslands::ReorderThumb2JumpTables(MachineFunction &MF) {
MachineJumpTableInfo *MJTI = MF.getJumpTableInfo();
if (MJTI == 0) return false;
-
+
const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables();
for (unsigned i = 0, e = T2JumpTables.size(); i != e; ++i) {
MachineInstr *MI = T2JumpTables[i];
@@ -1769,7 +1787,7 @@ AdjustJTTargetBlockForward(MachineBasicBlock *BB, MachineBasicBlock *JTBB)
{
MachineFunction &MF = *BB->getParent();
- // If it's the destination block is terminated by an unconditional branch,
+ // If the destination block is terminated by an unconditional branch,
// try to move it; otherwise, create a new block following the jump
// table that branches back to the actual target. This is a very simple
// heuristic. FIXME: We can definitely improve it.
diff --git a/lib/Target/ARM/ARMConstantPoolValue.h b/lib/Target/ARM/ARMConstantPoolValue.h
index 6f4eddf7361d..3119b54563de 100644
--- a/lib/Target/ARM/ARMConstantPoolValue.h
+++ b/lib/Target/ARM/ARMConstantPoolValue.h
@@ -15,6 +15,7 @@
#define LLVM_TARGET_ARM_CONSTANTPOOLVALUE_H
#include "llvm/CodeGen/MachineConstantPool.h"
+#include <cstddef>
namespace llvm {
diff --git a/lib/Target/ARM/ARMExpandPseudoInsts.cpp b/lib/Target/ARM/ARMExpandPseudoInsts.cpp
index c87f5d7c16a5..9c62597b4323 100644
--- a/lib/Target/ARM/ARMExpandPseudoInsts.cpp
+++ b/lib/Target/ARM/ARMExpandPseudoInsts.cpp
@@ -144,13 +144,15 @@ bool ARMExpandPseudo::ExpandMBB(MachineBasicBlock &MBB) {
MachineInstrBuilder Even =
AddDefaultPred(BuildMI(MBB, MBBI, MI.getDebugLoc(),
TII->get(ARM::VMOVQ))
- .addReg(EvenDst, getDefRegState(true) | getDeadRegState(DstIsDead))
- .addReg(EvenSrc, getKillRegState(SrcIsKill)));
+ .addReg(EvenDst,
+ getDefRegState(true) | getDeadRegState(DstIsDead))
+ .addReg(EvenSrc, getKillRegState(SrcIsKill)));
MachineInstrBuilder Odd =
AddDefaultPred(BuildMI(MBB, MBBI, MI.getDebugLoc(),
TII->get(ARM::VMOVQ))
- .addReg(OddDst, getDefRegState(true) | getDeadRegState(DstIsDead))
- .addReg(OddSrc, getKillRegState(SrcIsKill)));
+ .addReg(OddDst,
+ getDefRegState(true) | getDeadRegState(DstIsDead))
+ .addReg(OddSrc, getKillRegState(SrcIsKill)));
TransferImpOps(MI, Even, Odd);
MI.eraseFromParent();
Modified = true;
diff --git a/lib/Target/ARM/ARMISelDAGToDAG.cpp b/lib/Target/ARM/ARMISelDAGToDAG.cpp
index 9baef6bf1243..c84d3ff81324 100644
--- a/lib/Target/ARM/ARMISelDAGToDAG.cpp
+++ b/lib/Target/ARM/ARMISelDAGToDAG.cpp
@@ -11,6 +11,7 @@
//
//===----------------------------------------------------------------------===//
+#define DEBUG_TYPE "arm-isel"
#include "ARM.h"
#include "ARMAddressingModes.h"
#include "ARMTargetMachine.h"
@@ -35,11 +36,6 @@
using namespace llvm;
-static cl::opt<bool>
-UseRegSeq("neon-reg-sequence", cl::Hidden,
- cl::desc("Use reg_sequence to model ld / st of multiple neon regs"),
- cl::init(true));
-
//===--------------------------------------------------------------------===//
/// ARMDAGToDAGISel - ARM specific code to select ARM machine
/// instructions for SelectionDAG operations.
@@ -147,6 +143,11 @@ private:
unsigned *DOpcodes, unsigned *QOpcodes0,
unsigned *QOpcodes1);
+ /// SelectVTBL - Select NEON VTBL and VTBX intrinsics. NumVecs should be 2,
+ /// 3 or 4. These are custom-selected so that a REG_SEQUENCE can be
+ /// generated to force the table registers to be consecutive.
+ SDNode *SelectVTBL(SDNode *N, bool IsExt, unsigned NumVecs, unsigned Opc);
+
/// SelectV6T2BitfieldExtractOp - Select SBFX/UBFX instructions for ARM.
SDNode *SelectV6T2BitfieldExtractOp(SDNode *N, bool isSigned);
@@ -173,24 +174,17 @@ private:
char ConstraintCode,
std::vector<SDValue> &OutOps);
- /// PairDRegs - Form a quad register from a pair of D registers.
- ///
+ // Form pairs of consecutive S, D, or Q registers.
+ SDNode *PairSRegs(EVT VT, SDValue V0, SDValue V1);
SDNode *PairDRegs(EVT VT, SDValue V0, SDValue V1);
-
- /// PairDRegs - Form a quad register pair from a pair of Q registers.
- ///
SDNode *PairQRegs(EVT VT, SDValue V0, SDValue V1);
- /// QuadDRegs - Form a quad register pair from a quad of D registers.
- ///
+ // Form sequences of 4 consecutive S, D, or Q registers.
+ SDNode *QuadSRegs(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3);
SDNode *QuadDRegs(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3);
-
- /// QuadQRegs - Form 4 consecutive Q registers.
- ///
SDNode *QuadQRegs(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3);
- /// OctoDRegs - Form 8 consecutive D registers.
- ///
+ // Form sequences of 8 consecutive D registers.
SDNode *OctoDRegs(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3,
SDValue V4, SDValue V5, SDValue V6, SDValue V7);
};
@@ -544,10 +538,9 @@ bool ARMDAGToDAGISel::SelectAddrModePC(SDNode *Op, SDValue N,
bool ARMDAGToDAGISel::SelectThumbAddrModeRR(SDNode *Op, SDValue N,
SDValue &Base, SDValue &Offset){
// FIXME dl should come from the parent load or store, not the address
- DebugLoc dl = Op->getDebugLoc();
if (N.getOpcode() != ISD::ADD) {
ConstantSDNode *NC = dyn_cast<ConstantSDNode>(N);
- if (!NC || NC->getZExtValue() != 0)
+ if (!NC || !NC->isNullValue())
return false;
Base = Offset = N;
@@ -788,8 +781,9 @@ bool ARMDAGToDAGISel::SelectT2AddrModeImm8s4(SDNode *Op, SDValue N,
if (N.getOpcode() == ISD::ADD) {
if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
int RHSC = (int)RHS->getZExtValue();
+ // 8 bits.
if (((RHSC & 0x3) == 0) &&
- ((RHSC >= 0 && RHSC < 0x400) || (RHSC < 0 && RHSC > -0x400))) { // 8 bits.
+ ((RHSC >= 0 && RHSC < 0x400) || (RHSC < 0 && RHSC > -0x400))) {
Base = N.getOperand(0);
OffImm = CurDAG->getTargetConstant(RHSC, MVT::i32);
return true;
@@ -798,7 +792,8 @@ bool ARMDAGToDAGISel::SelectT2AddrModeImm8s4(SDNode *Op, SDValue N,
} else if (N.getOpcode() == ISD::SUB) {
if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
int RHSC = (int)RHS->getZExtValue();
- if (((RHSC & 0x3) == 0) && (RHSC >= 0 && RHSC < 0x400)) { // 8 bits.
+ // 8 bits.
+ if (((RHSC & 0x3) == 0) && (RHSC >= 0 && RHSC < 0x400)) {
Base = N.getOperand(0);
OffImm = CurDAG->getTargetConstant(-RHSC, MVT::i32);
return true;
@@ -960,22 +955,24 @@ SDNode *ARMDAGToDAGISel::SelectT2IndexedLoad(SDNode *N) {
return NULL;
}
+/// PairSRegs - Form a D register from a pair of S registers.
+///
+SDNode *ARMDAGToDAGISel::PairSRegs(EVT VT, SDValue V0, SDValue V1) {
+ DebugLoc dl = V0.getNode()->getDebugLoc();
+ SDValue SubReg0 = CurDAG->getTargetConstant(ARM::ssub_0, MVT::i32);
+ SDValue SubReg1 = CurDAG->getTargetConstant(ARM::ssub_1, MVT::i32);
+ const SDValue Ops[] = { V0, SubReg0, V1, SubReg1 };
+ return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops, 4);
+}
+
/// PairDRegs - Form a quad register from a pair of D registers.
///
SDNode *ARMDAGToDAGISel::PairDRegs(EVT VT, SDValue V0, SDValue V1) {
DebugLoc dl = V0.getNode()->getDebugLoc();
SDValue SubReg0 = CurDAG->getTargetConstant(ARM::dsub_0, MVT::i32);
SDValue SubReg1 = CurDAG->getTargetConstant(ARM::dsub_1, MVT::i32);
- if (llvm::ModelWithRegSequence()) {
- const SDValue Ops[] = { V0, SubReg0, V1, SubReg1 };
- return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops, 4);
- }
- SDValue Undef =
- SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, VT), 0);
- SDNode *Pair = CurDAG->getMachineNode(TargetOpcode::INSERT_SUBREG, dl,
- VT, Undef, V0, SubReg0);
- return CurDAG->getMachineNode(TargetOpcode::INSERT_SUBREG, dl,
- VT, SDValue(Pair, 0), V1, SubReg1);
+ const SDValue Ops[] = { V0, SubReg0, V1, SubReg1 };
+ return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops, 4);
}
/// PairQRegs - Form 4 consecutive D registers from a pair of Q registers.
@@ -988,6 +985,19 @@ SDNode *ARMDAGToDAGISel::PairQRegs(EVT VT, SDValue V0, SDValue V1) {
return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops, 4);
}
+/// QuadSRegs - Form 4 consecutive S registers.
+///
+SDNode *ARMDAGToDAGISel::QuadSRegs(EVT VT, SDValue V0, SDValue V1,
+ SDValue V2, SDValue V3) {
+ DebugLoc dl = V0.getNode()->getDebugLoc();
+ SDValue SubReg0 = CurDAG->getTargetConstant(ARM::ssub_0, MVT::i32);
+ SDValue SubReg1 = CurDAG->getTargetConstant(ARM::ssub_1, MVT::i32);
+ SDValue SubReg2 = CurDAG->getTargetConstant(ARM::ssub_2, MVT::i32);
+ SDValue SubReg3 = CurDAG->getTargetConstant(ARM::ssub_3, MVT::i32);
+ const SDValue Ops[] = { V0, SubReg0, V1, SubReg1, V2, SubReg2, V3, SubReg3 };
+ return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops, 8);
+}
+
/// QuadDRegs - Form 4 consecutive D registers.
///
SDNode *ARMDAGToDAGISel::QuadDRegs(EVT VT, SDValue V0, SDValue V1,
@@ -1088,7 +1098,7 @@ SDNode *ARMDAGToDAGISel::SelectVLD(SDNode *N, unsigned NumVecs,
std::vector<EVT> ResTys(NumVecs, VT);
ResTys.push_back(MVT::Other);
SDNode *VLd = CurDAG->getMachineNode(Opc, dl, ResTys, Ops, 5);
- if (!llvm::ModelWithRegSequence() || NumVecs < 2)
+ if (NumVecs < 2)
return VLd;
SDValue RegSeq;
@@ -1129,24 +1139,17 @@ SDNode *ARMDAGToDAGISel::SelectVLD(SDNode *N, unsigned NumVecs,
Chain = SDValue(VLd, 2 * NumVecs);
// Combine the even and odd subregs to produce the result.
- if (llvm::ModelWithRegSequence()) {
- if (NumVecs == 1) {
- SDNode *Q = PairDRegs(VT, SDValue(VLd, 0), SDValue(VLd, 1));
- ReplaceUses(SDValue(N, 0), SDValue(Q, 0));
- } else {
- SDValue QQ = SDValue(QuadDRegs(MVT::v4i64,
- SDValue(VLd, 0), SDValue(VLd, 1),
- SDValue(VLd, 2), SDValue(VLd, 3)), 0);
- SDValue Q0 = CurDAG->getTargetExtractSubreg(ARM::qsub_0, dl, VT, QQ);
- SDValue Q1 = CurDAG->getTargetExtractSubreg(ARM::qsub_1, dl, VT, QQ);
- ReplaceUses(SDValue(N, 0), Q0);
- ReplaceUses(SDValue(N, 1), Q1);
- }
+ if (NumVecs == 1) {
+ SDNode *Q = PairDRegs(VT, SDValue(VLd, 0), SDValue(VLd, 1));
+ ReplaceUses(SDValue(N, 0), SDValue(Q, 0));
} else {
- for (unsigned Vec = 0; Vec < NumVecs; ++Vec) {
- SDNode *Q = PairDRegs(VT, SDValue(VLd, 2*Vec), SDValue(VLd, 2*Vec+1));
- ReplaceUses(SDValue(N, Vec), SDValue(Q, 0));
- }
+ SDValue QQ = SDValue(QuadDRegs(MVT::v4i64,
+ SDValue(VLd, 0), SDValue(VLd, 1),
+ SDValue(VLd, 2), SDValue(VLd, 3)), 0);
+ SDValue Q0 = CurDAG->getTargetExtractSubreg(ARM::qsub_0, dl, VT, QQ);
+ SDValue Q1 = CurDAG->getTargetExtractSubreg(ARM::qsub_1, dl, VT, QQ);
+ ReplaceUses(SDValue(N, 0), Q0);
+ ReplaceUses(SDValue(N, 1), Q1);
}
} else {
// Otherwise, quad registers are loaded with two separate instructions,
@@ -1169,37 +1172,27 @@ SDNode *ARMDAGToDAGISel::SelectVLD(SDNode *N, unsigned NumVecs,
SDNode *VLdB = CurDAG->getMachineNode(Opc, dl, ResTys, OpsB, 6);
Chain = SDValue(VLdB, NumVecs+1);
- if (llvm::ModelWithRegSequence()) {
- SDValue V0 = SDValue(VLdA, 0);
- SDValue V1 = SDValue(VLdB, 0);
- SDValue V2 = SDValue(VLdA, 1);
- SDValue V3 = SDValue(VLdB, 1);
- SDValue V4 = SDValue(VLdA, 2);
- SDValue V5 = SDValue(VLdB, 2);
- SDValue V6 = (NumVecs == 3)
- ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,dl,RegVT),
- 0)
- : SDValue(VLdA, 3);
- SDValue V7 = (NumVecs == 3)
- ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,dl,RegVT),
- 0)
- : SDValue(VLdB, 3);
- SDValue RegSeq = SDValue(OctoDRegs(MVT::v8i64, V0, V1, V2, V3,
- V4, V5, V6, V7), 0);
-
- // Extract out the 3 / 4 Q registers.
- assert(ARM::qsub_3 == ARM::qsub_0+3 && "Unexpected subreg numbering");
- for (unsigned Vec = 0; Vec < NumVecs; ++Vec) {
- SDValue Q = CurDAG->getTargetExtractSubreg(ARM::qsub_0+Vec,
- dl, VT, RegSeq);
- ReplaceUses(SDValue(N, Vec), Q);
- }
- } else {
- // Combine the even and odd subregs to produce the result.
- for (unsigned Vec = 0; Vec < NumVecs; ++Vec) {
- SDNode *Q = PairDRegs(VT, SDValue(VLdA, Vec), SDValue(VLdB, Vec));
- ReplaceUses(SDValue(N, Vec), SDValue(Q, 0));
- }
+ SDValue V0 = SDValue(VLdA, 0);
+ SDValue V1 = SDValue(VLdB, 0);
+ SDValue V2 = SDValue(VLdA, 1);
+ SDValue V3 = SDValue(VLdB, 1);
+ SDValue V4 = SDValue(VLdA, 2);
+ SDValue V5 = SDValue(VLdB, 2);
+ SDValue V6 = (NumVecs == 3)
+ ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,dl,RegVT), 0)
+ : SDValue(VLdA, 3);
+ SDValue V7 = (NumVecs == 3)
+ ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,dl,RegVT), 0)
+ : SDValue(VLdB, 3);
+ SDValue RegSeq = SDValue(OctoDRegs(MVT::v8i64, V0, V1, V2, V3,
+ V4, V5, V6, V7), 0);
+
+ // Extract out the 3 / 4 Q registers.
+ assert(ARM::qsub_3 == ARM::qsub_0+3 && "Unexpected subreg numbering");
+ for (unsigned Vec = 0; Vec < NumVecs; ++Vec) {
+ SDValue Q = CurDAG->getTargetExtractSubreg(ARM::qsub_0+Vec,
+ dl, VT, RegSeq);
+ ReplaceUses(SDValue(N, Vec), Q);
}
}
ReplaceUses(SDValue(N, NumVecs), Chain);
@@ -1209,7 +1202,7 @@ SDNode *ARMDAGToDAGISel::SelectVLD(SDNode *N, unsigned NumVecs,
SDNode *ARMDAGToDAGISel::SelectVST(SDNode *N, unsigned NumVecs,
unsigned *DOpcodes, unsigned *QOpcodes0,
unsigned *QOpcodes1) {
- assert(NumVecs >=1 && NumVecs <= 4 && "VST NumVecs out-of-range");
+ assert(NumVecs >= 1 && NumVecs <= 4 && "VST NumVecs out-of-range");
DebugLoc dl = N->getDebugLoc();
SDValue MemAddr, Align;
@@ -1247,7 +1240,7 @@ SDNode *ARMDAGToDAGISel::SelectVST(SDNode *N, unsigned NumVecs,
Ops.push_back(Align);
if (is64BitVector) {
- if (llvm::ModelWithRegSequence() && NumVecs >= 2) {
+ if (NumVecs >= 2) {
SDValue RegSeq;
SDValue V0 = N->getOperand(0+3);
SDValue V1 = N->getOperand(1+3);
@@ -1292,7 +1285,7 @@ SDNode *ARMDAGToDAGISel::SelectVST(SDNode *N, unsigned NumVecs,
// Quad registers are directly supported for VST1 and VST2,
// storing pairs of D regs.
unsigned Opc = QOpcodes0[OpcodeIndex];
- if (llvm::ModelWithRegSequence() && NumVecs == 2) {
+ if (NumVecs == 2) {
// First extract the pair of Q registers.
SDValue Q0 = N->getOperand(3);
SDValue Q1 = N->getOperand(4);
@@ -1330,76 +1323,48 @@ SDNode *ARMDAGToDAGISel::SelectVST(SDNode *N, unsigned NumVecs,
// Otherwise, quad registers are stored with two separate instructions,
// where one stores the even registers and the other stores the odd registers.
- if (llvm::ModelWithRegSequence()) {
- // Form the QQQQ REG_SEQUENCE.
- SDValue V[8];
- for (unsigned Vec = 0, i = 0; Vec < NumVecs; ++Vec, i+=2) {
- V[i] = CurDAG->getTargetExtractSubreg(ARM::dsub_0, dl, RegVT,
- N->getOperand(Vec+3));
- V[i+1] = CurDAG->getTargetExtractSubreg(ARM::dsub_1, dl, RegVT,
- N->getOperand(Vec+3));
- }
- if (NumVecs == 3)
- V[6] = V[7] = SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,
- dl, RegVT), 0);
-
- SDValue RegSeq = SDValue(OctoDRegs(MVT::v8i64, V[0], V[1], V[2], V[3],
- V[4], V[5], V[6], V[7]), 0);
-
- // Store the even D registers.
- assert(ARM::dsub_7 == ARM::dsub_0+7 && "Unexpected subreg numbering");
- Ops.push_back(Reg0); // post-access address offset
- for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
- Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_0+Vec*2, dl,
- RegVT, RegSeq));
- Ops.push_back(Pred);
- Ops.push_back(Reg0); // predicate register
- Ops.push_back(Chain);
- unsigned Opc = QOpcodes0[OpcodeIndex];
- SDNode *VStA = CurDAG->getMachineNode(Opc, dl, MemAddr.getValueType(),
- MVT::Other, Ops.data(), NumVecs+6);
- Chain = SDValue(VStA, 1);
- // Store the odd D registers.
- Ops[0] = SDValue(VStA, 0); // MemAddr
- for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
- Ops[Vec+3] = CurDAG->getTargetExtractSubreg(ARM::dsub_1+Vec*2, dl,
- RegVT, RegSeq);
- Ops[NumVecs+5] = Chain;
- Opc = QOpcodes1[OpcodeIndex];
- SDNode *VStB = CurDAG->getMachineNode(Opc, dl, MemAddr.getValueType(),
- MVT::Other, Ops.data(), NumVecs+6);
- Chain = SDValue(VStB, 1);
- ReplaceUses(SDValue(N, 0), Chain);
- return NULL;
- } else {
- Ops.push_back(Reg0); // post-access address offset
-
- // Store the even subregs.
- for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
- Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_0, dl, RegVT,
- N->getOperand(Vec+3)));
- Ops.push_back(Pred);
- Ops.push_back(Reg0); // predicate register
- Ops.push_back(Chain);
- unsigned Opc = QOpcodes0[OpcodeIndex];
- SDNode *VStA = CurDAG->getMachineNode(Opc, dl, MemAddr.getValueType(),
- MVT::Other, Ops.data(), NumVecs+6);
- Chain = SDValue(VStA, 1);
-
- // Store the odd subregs.
- Ops[0] = SDValue(VStA, 0); // MemAddr
- for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
- Ops[Vec+3] = CurDAG->getTargetExtractSubreg(ARM::dsub_1, dl, RegVT,
- N->getOperand(Vec+3));
- Ops[NumVecs+5] = Chain;
- Opc = QOpcodes1[OpcodeIndex];
- SDNode *VStB = CurDAG->getMachineNode(Opc, dl, MemAddr.getValueType(),
- MVT::Other, Ops.data(), NumVecs+6);
- Chain = SDValue(VStB, 1);
- ReplaceUses(SDValue(N, 0), Chain);
- return NULL;
- }
+ // Form the QQQQ REG_SEQUENCE.
+ SDValue V[8];
+ for (unsigned Vec = 0, i = 0; Vec < NumVecs; ++Vec, i+=2) {
+ V[i] = CurDAG->getTargetExtractSubreg(ARM::dsub_0, dl, RegVT,
+ N->getOperand(Vec+3));
+ V[i+1] = CurDAG->getTargetExtractSubreg(ARM::dsub_1, dl, RegVT,
+ N->getOperand(Vec+3));
+ }
+ if (NumVecs == 3)
+ V[6] = V[7] = SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,
+ dl, RegVT), 0);
+
+ SDValue RegSeq = SDValue(OctoDRegs(MVT::v8i64, V[0], V[1], V[2], V[3],
+ V[4], V[5], V[6], V[7]), 0);
+
+ // Store the even D registers.
+ assert(ARM::dsub_7 == ARM::dsub_0+7 && "Unexpected subreg numbering");
+ Ops.push_back(Reg0); // post-access address offset
+ for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
+ Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_0+Vec*2, dl,
+ RegVT, RegSeq));
+ Ops.push_back(Pred);
+ Ops.push_back(Reg0); // predicate register
+ Ops.push_back(Chain);
+ unsigned Opc = QOpcodes0[OpcodeIndex];
+ SDNode *VStA = CurDAG->getMachineNode(Opc, dl, MemAddr.getValueType(),
+ MVT::Other, Ops.data(), NumVecs+6);
+ Chain = SDValue(VStA, 1);
+
+ // Store the odd D registers.
+ Ops[0] = SDValue(VStA, 0); // MemAddr
+ for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
+ Ops[Vec+3] = CurDAG->getTargetExtractSubreg(ARM::dsub_1+Vec*2, dl,
+ RegVT, RegSeq);
+ Ops[NumVecs+5] = Chain;
+ Opc = QOpcodes1[OpcodeIndex];
+ SDNode *VStB = CurDAG->getMachineNode(Opc, dl, MemAddr.getValueType(),
+ MVT::Other, Ops.data(), NumVecs+6);
+ Chain = SDValue(VStB, 1);
+ ReplaceUses(SDValue(N, 0), Chain);
+ return NULL;
}
SDNode *ARMDAGToDAGISel::SelectVLDSTLane(SDNode *N, bool IsLoad,
@@ -1421,13 +1386,11 @@ SDNode *ARMDAGToDAGISel::SelectVLDSTLane(SDNode *N, bool IsLoad,
// Quad registers are handled by load/store of subregs. Find the subreg info.
unsigned NumElts = 0;
- int SubregIdx = 0;
bool Even = false;
EVT RegVT = VT;
if (!is64BitVector) {
RegVT = GetNEONSubregVT(VT);
NumElts = RegVT.getVectorNumElements();
- SubregIdx = (Lane < NumElts) ? ARM::dsub_0 : ARM::dsub_1;
Even = Lane < NumElts;
}
@@ -1455,35 +1418,26 @@ SDNode *ARMDAGToDAGISel::SelectVLDSTLane(SDNode *N, bool IsLoad,
unsigned Opc = 0;
if (is64BitVector) {
Opc = DOpcodes[OpcodeIndex];
- if (llvm::ModelWithRegSequence()) {
- SDValue RegSeq;
- SDValue V0 = N->getOperand(0+3);
- SDValue V1 = N->getOperand(1+3);
- if (NumVecs == 2) {
- RegSeq = SDValue(PairDRegs(MVT::v2i64, V0, V1), 0);
- } else {
- SDValue V2 = N->getOperand(2+3);
- SDValue V3 = (NumVecs == 3)
- ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,dl,VT), 0)
- : N->getOperand(3+3);
- RegSeq = SDValue(QuadDRegs(MVT::v4i64, V0, V1, V2, V3), 0);
- }
-
- // Now extract the D registers back out.
- Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_0, dl, VT,
- RegSeq));
- Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_1, dl, VT,
- RegSeq));
- if (NumVecs > 2)
- Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_2, dl, VT,
- RegSeq));
- if (NumVecs > 3)
- Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_3, dl, VT,
- RegSeq));
+ SDValue RegSeq;
+ SDValue V0 = N->getOperand(0+3);
+ SDValue V1 = N->getOperand(1+3);
+ if (NumVecs == 2) {
+ RegSeq = SDValue(PairDRegs(MVT::v2i64, V0, V1), 0);
} else {
- for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
- Ops.push_back(N->getOperand(Vec+3));
+ SDValue V2 = N->getOperand(2+3);
+ SDValue V3 = (NumVecs == 3)
+ ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,dl,VT), 0)
+ : N->getOperand(3+3);
+ RegSeq = SDValue(QuadDRegs(MVT::v4i64, V0, V1, V2, V3), 0);
}
+
+ // Now extract the D registers back out.
+ Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_0, dl, VT, RegSeq));
+ Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_1, dl, VT, RegSeq));
+ if (NumVecs > 2)
+ Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_2, dl, VT,RegSeq));
+ if (NumVecs > 3)
+ Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_3, dl, VT,RegSeq));
} else {
// Check if this is loading the even or odd subreg of a Q register.
if (Lane < NumElts) {
@@ -1493,31 +1447,24 @@ SDNode *ARMDAGToDAGISel::SelectVLDSTLane(SDNode *N, bool IsLoad,
Opc = QOpcodes1[OpcodeIndex];
}
- if (llvm::ModelWithRegSequence()) {
- SDValue RegSeq;
- SDValue V0 = N->getOperand(0+3);
- SDValue V1 = N->getOperand(1+3);
- if (NumVecs == 2) {
- RegSeq = SDValue(PairQRegs(MVT::v4i64, V0, V1), 0);
- } else {
- SDValue V2 = N->getOperand(2+3);
- SDValue V3 = (NumVecs == 3)
- ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,dl,VT), 0)
- : N->getOperand(3+3);
- RegSeq = SDValue(QuadQRegs(MVT::v8i64, V0, V1, V2, V3), 0);
- }
-
- // Extract the subregs of the input vector.
- unsigned SubIdx = Even ? ARM::dsub_0 : ARM::dsub_1;
- for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
- Ops.push_back(CurDAG->getTargetExtractSubreg(SubIdx+Vec*2, dl, RegVT,
- RegSeq));
+ SDValue RegSeq;
+ SDValue V0 = N->getOperand(0+3);
+ SDValue V1 = N->getOperand(1+3);
+ if (NumVecs == 2) {
+ RegSeq = SDValue(PairQRegs(MVT::v4i64, V0, V1), 0);
} else {
- // Extract the subregs of the input vector.
- for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
- Ops.push_back(CurDAG->getTargetExtractSubreg(SubregIdx, dl, RegVT,
- N->getOperand(Vec+3)));
+ SDValue V2 = N->getOperand(2+3);
+ SDValue V3 = (NumVecs == 3)
+ ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,dl,VT), 0)
+ : N->getOperand(3+3);
+ RegSeq = SDValue(QuadQRegs(MVT::v8i64, V0, V1, V2, V3), 0);
}
+
+ // Extract the subregs of the input vector.
+ unsigned SubIdx = Even ? ARM::dsub_0 : ARM::dsub_1;
+ for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
+ Ops.push_back(CurDAG->getTargetExtractSubreg(SubIdx+Vec*2, dl, RegVT,
+ RegSeq));
}
Ops.push_back(getI32Imm(Lane));
Ops.push_back(Pred);
@@ -1531,76 +1478,97 @@ SDNode *ARMDAGToDAGISel::SelectVLDSTLane(SDNode *N, bool IsLoad,
ResTys.push_back(MVT::Other);
SDNode *VLdLn = CurDAG->getMachineNode(Opc, dl, ResTys, Ops.data(),NumVecs+6);
- if (llvm::ModelWithRegSequence()) {
- // Form a REG_SEQUENCE to force register allocation.
- SDValue RegSeq;
- if (is64BitVector) {
- SDValue V0 = SDValue(VLdLn, 0);
- SDValue V1 = SDValue(VLdLn, 1);
- if (NumVecs == 2) {
- RegSeq = SDValue(PairDRegs(MVT::v2i64, V0, V1), 0);
- } else {
- SDValue V2 = SDValue(VLdLn, 2);
- // If it's a vld3, form a quad D-register but discard the last part.
- SDValue V3 = (NumVecs == 3)
- ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,dl,VT), 0)
- : SDValue(VLdLn, 3);
- RegSeq = SDValue(QuadDRegs(MVT::v4i64, V0, V1, V2, V3), 0);
- }
+ // Form a REG_SEQUENCE to force register allocation.
+ SDValue RegSeq;
+ if (is64BitVector) {
+ SDValue V0 = SDValue(VLdLn, 0);
+ SDValue V1 = SDValue(VLdLn, 1);
+ if (NumVecs == 2) {
+ RegSeq = SDValue(PairDRegs(MVT::v2i64, V0, V1), 0);
} else {
- // For 128-bit vectors, take the 64-bit results of the load and insert them
- // as subregs into the result.
- SDValue V[8];
- for (unsigned Vec = 0, i = 0; Vec < NumVecs; ++Vec, i+=2) {
- if (Even) {
- V[i] = SDValue(VLdLn, Vec);
- V[i+1] = SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,
- dl, RegVT), 0);
- } else {
- V[i] = SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,
- dl, RegVT), 0);
- V[i+1] = SDValue(VLdLn, Vec);
- }
+ SDValue V2 = SDValue(VLdLn, 2);
+ // If it's a vld3, form a quad D-register but discard the last part.
+ SDValue V3 = (NumVecs == 3)
+ ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,dl,VT), 0)
+ : SDValue(VLdLn, 3);
+ RegSeq = SDValue(QuadDRegs(MVT::v4i64, V0, V1, V2, V3), 0);
+ }
+ } else {
+ // For 128-bit vectors, take the 64-bit results of the load and insert
+ // them as subregs into the result.
+ SDValue V[8];
+ for (unsigned Vec = 0, i = 0; Vec < NumVecs; ++Vec, i+=2) {
+ if (Even) {
+ V[i] = SDValue(VLdLn, Vec);
+ V[i+1] = SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,
+ dl, RegVT), 0);
+ } else {
+ V[i] = SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,
+ dl, RegVT), 0);
+ V[i+1] = SDValue(VLdLn, Vec);
}
- if (NumVecs == 3)
- V[6] = V[7] = SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,
- dl, RegVT), 0);
-
- if (NumVecs == 2)
- RegSeq = SDValue(QuadDRegs(MVT::v4i64, V[0], V[1], V[2], V[3]), 0);
- else
- RegSeq = SDValue(OctoDRegs(MVT::v8i64, V[0], V[1], V[2], V[3],
- V[4], V[5], V[6], V[7]), 0);
}
+ if (NumVecs == 3)
+ V[6] = V[7] = SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,
+ dl, RegVT), 0);
- assert(ARM::dsub_7 == ARM::dsub_0+7 && "Unexpected subreg numbering");
- assert(ARM::qsub_3 == ARM::qsub_0+3 && "Unexpected subreg numbering");
- unsigned SubIdx = is64BitVector ? ARM::dsub_0 : ARM::qsub_0;
- for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
- ReplaceUses(SDValue(N, Vec),
- CurDAG->getTargetExtractSubreg(SubIdx+Vec, dl, VT, RegSeq));
- ReplaceUses(SDValue(N, NumVecs), SDValue(VLdLn, NumVecs));
- return NULL;
- }
-
- // For a 64-bit vector load to D registers, nothing more needs to be done.
- if (is64BitVector)
- return VLdLn;
-
- // For 128-bit vectors, take the 64-bit results of the load and insert them
- // as subregs into the result.
- for (unsigned Vec = 0; Vec < NumVecs; ++Vec) {
- SDValue QuadVec = CurDAG->getTargetInsertSubreg(SubregIdx, dl, VT,
- N->getOperand(Vec+3),
- SDValue(VLdLn, Vec));
- ReplaceUses(SDValue(N, Vec), QuadVec);
+ if (NumVecs == 2)
+ RegSeq = SDValue(QuadDRegs(MVT::v4i64, V[0], V[1], V[2], V[3]), 0);
+ else
+ RegSeq = SDValue(OctoDRegs(MVT::v8i64, V[0], V[1], V[2], V[3],
+ V[4], V[5], V[6], V[7]), 0);
}
- Chain = SDValue(VLdLn, NumVecs);
- ReplaceUses(SDValue(N, NumVecs), Chain);
+ assert(ARM::dsub_7 == ARM::dsub_0+7 && "Unexpected subreg numbering");
+ assert(ARM::qsub_3 == ARM::qsub_0+3 && "Unexpected subreg numbering");
+ unsigned SubIdx = is64BitVector ? ARM::dsub_0 : ARM::qsub_0;
+ for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
+ ReplaceUses(SDValue(N, Vec),
+ CurDAG->getTargetExtractSubreg(SubIdx+Vec, dl, VT, RegSeq));
+ ReplaceUses(SDValue(N, NumVecs), SDValue(VLdLn, NumVecs));
return NULL;
}
+SDNode *ARMDAGToDAGISel::SelectVTBL(SDNode *N, bool IsExt, unsigned NumVecs,
+ unsigned Opc) {
+ assert(NumVecs >= 2 && NumVecs <= 4 && "VTBL NumVecs out-of-range");
+ DebugLoc dl = N->getDebugLoc();
+ EVT VT = N->getValueType(0);
+ unsigned FirstTblReg = IsExt ? 2 : 1;
+
+ // Form a REG_SEQUENCE to force register allocation.
+ SDValue RegSeq;
+ SDValue V0 = N->getOperand(FirstTblReg + 0);
+ SDValue V1 = N->getOperand(FirstTblReg + 1);
+ if (NumVecs == 2)
+ RegSeq = SDValue(PairDRegs(MVT::v16i8, V0, V1), 0);
+ else {
+ SDValue V2 = N->getOperand(FirstTblReg + 2);
+ // If it's a vtbl3, form a quad D-register and leave the last part as
+ // an undef.
+ SDValue V3 = (NumVecs == 3)
+ ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, VT), 0)
+ : N->getOperand(FirstTblReg + 3);
+ RegSeq = SDValue(QuadDRegs(MVT::v4i64, V0, V1, V2, V3), 0);
+ }
+
+ // Now extract the D registers back out.
+ SmallVector<SDValue, 6> Ops;
+ if (IsExt)
+ Ops.push_back(N->getOperand(1));
+ Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_0, dl, VT, RegSeq));
+ Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_1, dl, VT, RegSeq));
+ if (NumVecs > 2)
+ Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_2, dl, VT, RegSeq));
+ if (NumVecs > 3)
+ Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_3, dl, VT, RegSeq));
+
+ Ops.push_back(N->getOperand(FirstTblReg + NumVecs));
+ Ops.push_back(getAL(CurDAG)); // predicate
+ Ops.push_back(CurDAG->getRegister(0, MVT::i32)); // predicate register
+ return CurDAG->getMachineNode(Opc, dl, VT, Ops.data(), Ops.size());
+}
+
SDNode *ARMDAGToDAGISel::SelectV6T2BitfieldExtractOp(SDNode *N,
bool isSigned) {
if (!Subtarget->hasV6T2Ops())
@@ -1954,8 +1922,8 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
SDValue ShImmOp = CurDAG->getTargetConstant(ShImm, MVT::i32);
SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
if (Subtarget->isThumb()) {
- SDValue Ops[] = { V, V, ShImmOp, getAL(CurDAG), Reg0 };
- return CurDAG->SelectNodeTo(N, ARM::t2RSBrs, MVT::i32, Ops, 5);
+ SDValue Ops[] = { V, V, ShImmOp, getAL(CurDAG), Reg0, Reg0 };
+ return CurDAG->SelectNodeTo(N, ARM::t2RSBrs, MVT::i32, Ops, 6);
} else {
SDValue Ops[] = { V, V, Reg0, ShImmOp, getAL(CurDAG), Reg0, Reg0 };
return CurDAG->SelectNodeTo(N, ARM::RSBrs, MVT::i32, Ops, 7);
@@ -2015,7 +1983,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
SDValue Ops[] = { N->getOperand(0), N->getOperand(1),
getAL(CurDAG), CurDAG->getRegister(0, MVT::i32),
CurDAG->getRegister(0, MVT::i32) };
- return CurDAG->getMachineNode(ARM::t2UMULL, dl, MVT::i32, MVT::i32, Ops,4);
+ return CurDAG->getMachineNode(ARM::t2UMULL, dl, MVT::i32, MVT::i32,Ops,4);
} else {
SDValue Ops[] = { N->getOperand(0), N->getOperand(1),
getAL(CurDAG), CurDAG->getRegister(0, MVT::i32),
@@ -2029,7 +1997,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
if (Subtarget->isThumb()) {
SDValue Ops[] = { N->getOperand(0), N->getOperand(1),
getAL(CurDAG), CurDAG->getRegister(0, MVT::i32) };
- return CurDAG->getMachineNode(ARM::t2SMULL, dl, MVT::i32, MVT::i32, Ops,4);
+ return CurDAG->getMachineNode(ARM::t2SMULL, dl, MVT::i32, MVT::i32,Ops,4);
} else {
SDValue Ops[] = { N->getOperand(0), N->getOperand(1),
getAL(CurDAG), CurDAG->getRegister(0, MVT::i32),
@@ -2211,6 +2179,22 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
SDValue Ops[] = { N->getOperand(0), N->getOperand(1), Pred, PredReg };
return CurDAG->getMachineNode(Opc, dl, VT, VT, Ops, 4);
}
+ case ARMISD::BUILD_VECTOR: {
+ EVT VecVT = N->getValueType(0);
+ EVT EltVT = VecVT.getVectorElementType();
+ unsigned NumElts = VecVT.getVectorNumElements();
+ if (EltVT.getSimpleVT() == MVT::f64) {
+ assert(NumElts == 2 && "unexpected type for BUILD_VECTOR");
+ return PairDRegs(VecVT, N->getOperand(0), N->getOperand(1));
+ }
+ assert(EltVT.getSimpleVT() == MVT::f32 &&
+ "unexpected type for BUILD_VECTOR");
+ if (NumElts == 2)
+ return PairSRegs(VecVT, N->getOperand(0), N->getOperand(1));
+ assert(NumElts == 4 && "unexpected type for BUILD_VECTOR");
+ return QuadSRegs(VecVT, N->getOperand(0), N->getOperand(1),
+ N->getOperand(2), N->getOperand(3));
+ }
case ISD::INTRINSIC_VOID:
case ISD::INTRINSIC_W_CHAIN: {
@@ -2342,6 +2326,29 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
break;
}
+ case ISD::INTRINSIC_WO_CHAIN: {
+ unsigned IntNo = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
+ switch (IntNo) {
+ default:
+ break;
+
+ case Intrinsic::arm_neon_vtbl2:
+ return SelectVTBL(N, false, 2, ARM::VTBL2);
+ case Intrinsic::arm_neon_vtbl3:
+ return SelectVTBL(N, false, 3, ARM::VTBL3);
+ case Intrinsic::arm_neon_vtbl4:
+ return SelectVTBL(N, false, 4, ARM::VTBL4);
+
+ case Intrinsic::arm_neon_vtbx2:
+ return SelectVTBL(N, true, 2, ARM::VTBX2);
+ case Intrinsic::arm_neon_vtbx3:
+ return SelectVTBL(N, true, 3, ARM::VTBX3);
+ case Intrinsic::arm_neon_vtbx4:
+ return SelectVTBL(N, true, 4, ARM::VTBX4);
+ }
+ break;
+ }
+
case ISD::CONCAT_VECTORS:
return SelectConcatVector(N);
}
@@ -2367,9 +2374,3 @@ FunctionPass *llvm::createARMISelDag(ARMBaseTargetMachine &TM,
CodeGenOpt::Level OptLevel) {
return new ARMDAGToDAGISel(TM, OptLevel);
}
-
-/// ModelWithRegSequence - Return true if isel should use REG_SEQUENCE to model
-/// operations involving sub-registers.
-bool llvm::ModelWithRegSequence() {
- return UseRegSeq;
-}
diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp
index b8126a3c5d18..98d8b8585428 100644
--- a/lib/Target/ARM/ARMISelLowering.cpp
+++ b/lib/Target/ARM/ARMISelLowering.cpp
@@ -12,6 +12,7 @@
//
//===----------------------------------------------------------------------===//
+#define DEBUG_TYPE "arm-isel"
#include "ARM.h"
#include "ARMAddressingModes.h"
#include "ARMConstantPoolValue.h"
@@ -40,6 +41,7 @@
#include "llvm/MC/MCSectionMachO.h"
#include "llvm/Target/TargetOptions.h"
#include "llvm/ADT/VectorExtras.h"
+#include "llvm/ADT/Statistic.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MathExtras.h"
@@ -47,9 +49,27 @@
#include <sstream>
using namespace llvm;
+STATISTIC(NumTailCalls, "Number of tail calls");
+
+// This option should go away when tail calls fully work.
+static cl::opt<bool>
+EnableARMTailCalls("arm-tail-calls", cl::Hidden,
+ cl::desc("Generate tail calls (TEMPORARY OPTION)."),
+ cl::init(true));
+
static cl::opt<bool>
EnableARMLongCalls("arm-long-calls", cl::Hidden,
- cl::desc("Generate calls via indirect call instructions."),
+ cl::desc("Generate calls via indirect call instructions"),
+ cl::init(false));
+
+static cl::opt<bool>
+ARMInterworking("arm-interworking", cl::Hidden,
+ cl::desc("Enable / disable ARM interworking (for debugging only)"),
+ cl::init(true));
+
+static cl::opt<bool>
+EnableARMCodePlacement("arm-code-placement", cl::Hidden,
+ cl::desc("Enable code placement pass for ARM"),
cl::init(false));
static bool CC_ARM_APCS_Custom_f64(unsigned &ValNo, EVT &ValVT, EVT &LocVT,
@@ -94,10 +114,7 @@ void ARMTargetLowering::addTypeForNEON(EVT VT, EVT PromotedLdStVT,
}
setOperationAction(ISD::BUILD_VECTOR, VT.getSimpleVT(), Custom);
setOperationAction(ISD::VECTOR_SHUFFLE, VT.getSimpleVT(), Custom);
- if (llvm::ModelWithRegSequence())
- setOperationAction(ISD::CONCAT_VECTORS, VT.getSimpleVT(), Legal);
- else
- setOperationAction(ISD::CONCAT_VECTORS, VT.getSimpleVT(), Custom);
+ setOperationAction(ISD::CONCAT_VECTORS, VT.getSimpleVT(), Legal);
setOperationAction(ISD::EXTRACT_SUBVECTOR, VT.getSimpleVT(), Expand);
setOperationAction(ISD::SELECT, VT.getSimpleVT(), Expand);
setOperationAction(ISD::SELECT_CC, VT.getSimpleVT(), Expand);
@@ -393,13 +410,57 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
// doesn't yet know how to not do that for SjLj.
setExceptionSelectorRegister(ARM::R0);
setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Expand);
- setOperationAction(ISD::MEMBARRIER, MVT::Other, Custom);
-
- // If the subtarget does not have extract instructions, sign_extend_inreg
- // needs to be expanded. Extract is available in ARM mode on v6 and up,
- // and on most Thumb2 implementations.
- if ((!Subtarget->isThumb() && !Subtarget->hasV6Ops())
- || (Subtarget->isThumb2() && !Subtarget->hasT2ExtractPack())) {
+ // Handle atomics directly for ARMv[67] (except for Thumb1), otherwise
+ // use the default expansion.
+ bool canHandleAtomics =
+ (Subtarget->hasV7Ops() ||
+ (Subtarget->hasV6Ops() && !Subtarget->isThumb1Only()));
+ if (canHandleAtomics) {
+ // membarrier needs custom lowering; the rest are legal and handled
+ // normally.
+ setOperationAction(ISD::MEMBARRIER, MVT::Other, Custom);
+ } else {
+ // Set them all for expansion, which will force libcalls.
+ setOperationAction(ISD::MEMBARRIER, MVT::Other, Expand);
+ setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i8, Expand);
+ setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i16, Expand);
+ setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i32, Expand);
+ setOperationAction(ISD::ATOMIC_SWAP, MVT::i8, Expand);
+ setOperationAction(ISD::ATOMIC_SWAP, MVT::i16, Expand);
+ setOperationAction(ISD::ATOMIC_SWAP, MVT::i32, Expand);
+ setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i8, Expand);
+ setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i16, Expand);
+ setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i32, Expand);
+ setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i8, Expand);
+ setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i16, Expand);
+ setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i32, Expand);
+ setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i8, Expand);
+ setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i16, Expand);
+ setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i32, Expand);
+ setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i8, Expand);
+ setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i16, Expand);
+ setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i32, Expand);
+ setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i8, Expand);
+ setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i16, Expand);
+ setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i32, Expand);
+ setOperationAction(ISD::ATOMIC_LOAD_NAND, MVT::i8, Expand);
+ setOperationAction(ISD::ATOMIC_LOAD_NAND, MVT::i16, Expand);
+ setOperationAction(ISD::ATOMIC_LOAD_NAND, MVT::i32, Expand);
+ // Since the libcalls include locking, fold in the fences
+ setShouldFoldAtomicFences(true);
+ }
+ // 64-bit versions are always libcalls (for now)
+ setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i64, Expand);
+ setOperationAction(ISD::ATOMIC_SWAP, MVT::i64, Expand);
+ setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i64, Expand);
+ setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i64, Expand);
+ setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i64, Expand);
+ setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i64, Expand);
+ setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i64, Expand);
+ setOperationAction(ISD::ATOMIC_LOAD_NAND, MVT::i64, Expand);
+
+ // Requires SXTB/SXTH, available on v6 and up in both ARM and Thumb modes.
+ if (!Subtarget->hasV6Ops()) {
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Expand);
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8, Expand);
}
@@ -412,8 +473,10 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
// We want to custom lower some of our intrinsics.
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
- setOperationAction(ISD::EH_SJLJ_SETJMP, MVT::i32, Custom);
- setOperationAction(ISD::EH_SJLJ_LONGJMP, MVT::Other, Custom);
+ if (Subtarget->isTargetDarwin()) {
+ setOperationAction(ISD::EH_SJLJ_SETJMP, MVT::i32, Custom);
+ setOperationAction(ISD::EH_SJLJ_LONGJMP, MVT::Other, Custom);
+ }
setOperationAction(ISD::SETCC, MVT::i32, Expand);
setOperationAction(ISD::SETCC, MVT::f32, Expand);
@@ -474,28 +537,14 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
else
setSchedulingPreference(Sched::Hybrid);
- // FIXME: If-converter should use instruction latency to determine
- // profitability rather than relying on fixed limits.
- if (Subtarget->getCPUString() == "generic") {
- // Generic (and overly aggressive) if-conversion limits.
- setIfCvtBlockSizeLimit(10);
- setIfCvtDupBlockSizeLimit(2);
- } else if (Subtarget->hasV7Ops()) {
- setIfCvtBlockSizeLimit(3);
- setIfCvtDupBlockSizeLimit(1);
- } else if (Subtarget->hasV6Ops()) {
- setIfCvtBlockSizeLimit(2);
- setIfCvtDupBlockSizeLimit(1);
- } else {
- setIfCvtBlockSizeLimit(3);
- setIfCvtDupBlockSizeLimit(2);
- }
-
maxStoresPerMemcpy = 1; //// temporary - rewrite interface to use type
- // Do not enable CodePlacementOpt for now: it currently runs after the
- // ARMConstantIslandPass and messes up branch relaxation and placement
- // of constant islands.
- // benefitFromCodePlacementOpt = true;
+
+ // On ARM arguments smaller than 4 bytes are extended, so all arguments
+ // are at least 4 bytes aligned.
+ setMinStackArgumentAlignment(4);
+
+ if (EnableARMCodePlacement)
+ benefitFromCodePlacementOpt = true;
}
const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const {
@@ -537,6 +586,8 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const {
case ARMISD::EH_SJLJ_SETJMP: return "ARMISD::EH_SJLJ_SETJMP";
case ARMISD::EH_SJLJ_LONGJMP:return "ARMISD::EH_SJLJ_LONGJMP";
+ case ARMISD::TC_RETURN: return "ARMISD::TC_RETURN";
+
case ARMISD::THREAD_POINTER:return "ARMISD::THREAD_POINTER";
case ARMISD::DYN_ALLOC: return "ARMISD::DYN_ALLOC";
@@ -581,6 +632,7 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const {
case ARMISD::VZIP: return "ARMISD::VZIP";
case ARMISD::VUZP: return "ARMISD::VUZP";
case ARMISD::VTRN: return "ARMISD::VTRN";
+ case ARMISD::BUILD_VECTOR: return "ARMISD::BUILD_VECTOR";
case ARMISD::FMAX: return "ARMISD::FMAX";
case ARMISD::FMIN: return "ARMISD::FMIN";
}
@@ -603,15 +655,33 @@ TargetRegisterClass *ARMTargetLowering::getRegClassFor(EVT VT) const {
/// getFunctionAlignment - Return the Log2 alignment of this function.
unsigned ARMTargetLowering::getFunctionAlignment(const Function *F) const {
- return getTargetMachine().getSubtarget<ARMSubtarget>().isThumb() ? 0 : 1;
+ return getTargetMachine().getSubtarget<ARMSubtarget>().isThumb() ? 1 : 2;
}
Sched::Preference ARMTargetLowering::getSchedulingPreference(SDNode *N) const {
- for (unsigned i = 0, e = N->getNumValues(); i != e; ++i) {
+ unsigned NumVals = N->getNumValues();
+ if (!NumVals)
+ return Sched::RegPressure;
+
+ for (unsigned i = 0; i != NumVals; ++i) {
EVT VT = N->getValueType(i);
if (VT.isFloatingPoint() || VT.isVector())
return Sched::Latency;
}
+
+ if (!N->isMachineOpcode())
+ return Sched::RegPressure;
+
+ // Load are scheduled for latency even if there instruction itinerary
+ // is not available.
+ const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+ const TargetInstrDesc &TID = TII->get(N->getMachineOpcode());
+ if (TID.mayLoad())
+ return Sched::Latency;
+
+ const InstrItineraryData &Itins = getTargetMachine().getInstrItineraryData();
+ if (!Itins.isEmpty() && Itins.getStageLatency(TID.getSchedClass()) > 2)
+ return Sched::Latency;
return Sched::RegPressure;
}
@@ -964,11 +1034,28 @@ ARMTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
CallingConv::ID CallConv, bool isVarArg,
bool &isTailCall,
const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
const SmallVectorImpl<ISD::InputArg> &Ins,
DebugLoc dl, SelectionDAG &DAG,
SmallVectorImpl<SDValue> &InVals) const {
- // ARM target does not yet support tail call optimization.
- isTailCall = false;
+ MachineFunction &MF = DAG.getMachineFunction();
+ bool IsStructRet = (Outs.empty()) ? false : Outs[0].Flags.isSRet();
+ bool IsSibCall = false;
+ // Temporarily disable tail calls so things don't break.
+ if (!EnableARMTailCalls)
+ isTailCall = false;
+ if (isTailCall) {
+ // Check if it's really possible to do a tail call.
+ isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv,
+ isVarArg, IsStructRet, MF.getFunction()->hasStructRetAttr(),
+ Outs, OutVals, Ins, DAG);
+ // We don't support GuaranteedTailCallOpt for ARM, only automatically
+ // detected sibcalls.
+ if (isTailCall) {
+ ++NumTailCalls;
+ IsSibCall = true;
+ }
+ }
// Analyze operands of the call, assigning locations to each operand.
SmallVector<CCValAssign, 16> ArgLocs;
@@ -981,9 +1068,14 @@ ARMTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
// Get a count of how many bytes are to be pushed on the stack.
unsigned NumBytes = CCInfo.getNextStackOffset();
+ // For tail calls, memory operands are available in our caller's stack.
+ if (IsSibCall)
+ NumBytes = 0;
+
// Adjust the stack pointer for the new arguments...
// These operations are automatically eliminated by the prolog/epilog pass
- Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true));
+ if (!IsSibCall)
+ Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true));
SDValue StackPtr = DAG.getCopyFromReg(Chain, dl, ARM::SP, getPointerTy());
@@ -996,7 +1088,7 @@ ARMTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
i != e;
++i, ++realArgIdx) {
CCValAssign &VA = ArgLocs[i];
- SDValue Arg = Outs[realArgIdx].Val;
+ SDValue Arg = OutVals[realArgIdx];
ISD::ArgFlagsTy Flags = Outs[realArgIdx].Flags;
// Promote the value if needed.
@@ -1044,7 +1136,7 @@ ARMTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
}
} else if (VA.isRegLoc()) {
RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
- } else {
+ } else if (!IsSibCall) {
assert(VA.isMemLoc());
MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, Arg,
@@ -1059,10 +1151,32 @@ ARMTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
// Build a sequence of copy-to-reg nodes chained together with token chain
// and flag operands which copy the outgoing args into the appropriate regs.
SDValue InFlag;
- for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
- Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
- RegsToPass[i].second, InFlag);
- InFlag = Chain.getValue(1);
+ // Tail call byval lowering might overwrite argument registers so in case of
+ // tail call optimization the copies to registers are lowered later.
+ if (!isTailCall)
+ for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
+ Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
+ RegsToPass[i].second, InFlag);
+ InFlag = Chain.getValue(1);
+ }
+
+ // For tail calls lower the arguments to the 'real' stack slot.
+ if (isTailCall) {
+ // Force all the incoming stack arguments to be loaded from the stack
+ // before any new outgoing arguments are stored to the stack, because the
+ // outgoing stack slots may alias the incoming argument stack slots, and
+ // the alias isn't otherwise explicit. This is slightly more conservative
+ // than necessary, because it means that each store effectively depends
+ // on every argument instead of just those arguments it would clobber.
+
+ // Do not flag preceeding copytoreg stuff together with the following stuff.
+ InFlag = SDValue();
+ for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
+ Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
+ RegsToPass[i].second, InFlag);
+ InFlag = Chain.getValue(1);
+ }
+ InFlag =SDValue();
}
// If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
@@ -1071,7 +1185,6 @@ ARMTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
bool isDirect = false;
bool isARMFunc = false;
bool isLocalARMFunc = false;
- MachineFunction &MF = DAG.getMachineFunction();
ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
if (EnableARMLongCalls) {
@@ -1117,7 +1230,7 @@ ARMTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
getTargetMachine().getRelocationModel() != Reloc::Static;
isARMFunc = !Subtarget->isThumb() || isStub;
// ARM call to a local ARM function is predicable.
- isLocalARMFunc = !Subtarget->isThumb() && !isExt;
+ isLocalARMFunc = !Subtarget->isThumb() && (!isExt || !ARMInterworking);
// tBX takes a register source operand.
if (isARMFunc && Subtarget->isThumb1Only() && !Subtarget->hasV5TOps()) {
unsigned ARMPCLabelIndex = AFI->createConstPoolEntryUId();
@@ -1134,7 +1247,7 @@ ARMTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
Callee = DAG.getNode(ARMISD::PIC_ADD, dl,
getPointerTy(), Callee, PICLabel);
} else
- Callee = DAG.getTargetGlobalAddress(GV, getPointerTy());
+ Callee = DAG.getTargetGlobalAddress(GV, dl, getPointerTy());
} else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
isDirect = true;
bool isStub = Subtarget->isTargetDarwin() &&
@@ -1171,11 +1284,6 @@ ARMTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
? (isLocalARMFunc ? ARMISD::CALL_PRED : ARMISD::CALL)
: ARMISD::CALL_NOLINK;
}
- if (CallOpc == ARMISD::CALL_NOLINK && !Subtarget->isThumb1Only()) {
- // implicit def LR - LR mustn't be allocated as GRP:$dst of CALL_NOLINK
- Chain = DAG.getCopyToReg(Chain, dl, ARM::LR, DAG.getUNDEF(MVT::i32),InFlag);
- InFlag = Chain.getValue(1);
- }
std::vector<SDValue> Ops;
Ops.push_back(Chain);
@@ -1189,9 +1297,13 @@ ARMTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
if (InFlag.getNode())
Ops.push_back(InFlag);
+
+ SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Flag);
+ if (isTailCall)
+ return DAG.getNode(ARMISD::TC_RETURN, dl, NodeTys, &Ops[0], Ops.size());
+
// Returns a chain and a flag for retval copy to use.
- Chain = DAG.getNode(CallOpc, dl, DAG.getVTList(MVT::Other, MVT::Flag),
- &Ops[0], Ops.size());
+ Chain = DAG.getNode(CallOpc, dl, NodeTys, &Ops[0], Ops.size());
InFlag = Chain.getValue(1);
Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, true),
@@ -1205,10 +1317,203 @@ ARMTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
dl, DAG, InVals);
}
+/// MatchingStackOffset - Return true if the given stack call argument is
+/// already available in the same position (relatively) of the caller's
+/// incoming argument stack.
+static
+bool MatchingStackOffset(SDValue Arg, unsigned Offset, ISD::ArgFlagsTy Flags,
+ MachineFrameInfo *MFI, const MachineRegisterInfo *MRI,
+ const ARMInstrInfo *TII) {
+ unsigned Bytes = Arg.getValueType().getSizeInBits() / 8;
+ int FI = INT_MAX;
+ if (Arg.getOpcode() == ISD::CopyFromReg) {
+ unsigned VR = cast<RegisterSDNode>(Arg.getOperand(1))->getReg();
+ if (!VR || TargetRegisterInfo::isPhysicalRegister(VR))
+ return false;
+ MachineInstr *Def = MRI->getVRegDef(VR);
+ if (!Def)
+ return false;
+ if (!Flags.isByVal()) {
+ if (!TII->isLoadFromStackSlot(Def, FI))
+ return false;
+ } else {
+ return false;
+ }
+ } else if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Arg)) {
+ if (Flags.isByVal())
+ // ByVal argument is passed in as a pointer but it's now being
+ // dereferenced. e.g.
+ // define @foo(%struct.X* %A) {
+ // tail call @bar(%struct.X* byval %A)
+ // }
+ return false;
+ SDValue Ptr = Ld->getBasePtr();
+ FrameIndexSDNode *FINode = dyn_cast<FrameIndexSDNode>(Ptr);
+ if (!FINode)
+ return false;
+ FI = FINode->getIndex();
+ } else
+ return false;
+
+ assert(FI != INT_MAX);
+ if (!MFI->isFixedObjectIndex(FI))
+ return false;
+ return Offset == MFI->getObjectOffset(FI) && Bytes == MFI->getObjectSize(FI);
+}
+
+/// IsEligibleForTailCallOptimization - Check whether the call is eligible
+/// for tail call optimization. Targets which want to do tail call
+/// optimization should implement this function.
+bool
+ARMTargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
+ CallingConv::ID CalleeCC,
+ bool isVarArg,
+ bool isCalleeStructRet,
+ bool isCallerStructRet,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ SelectionDAG& DAG) const {
+ const Function *CallerF = DAG.getMachineFunction().getFunction();
+ CallingConv::ID CallerCC = CallerF->getCallingConv();
+ bool CCMatch = CallerCC == CalleeCC;
+
+ // Look for obvious safe cases to perform tail call optimization that do not
+ // require ABI changes. This is what gcc calls sibcall.
+
+ // Do not sibcall optimize vararg calls unless the call site is not passing
+ // any arguments.
+ if (isVarArg && !Outs.empty())
+ return false;
+
+ // Also avoid sibcall optimization if either caller or callee uses struct
+ // return semantics.
+ if (isCalleeStructRet || isCallerStructRet)
+ return false;
+
+ // FIXME: Completely disable sibcall for Thumb1 since Thumb1RegisterInfo::
+ // emitEpilogue is not ready for them.
+ // Doing this is tricky, since the LDM/POP instruction on Thumb doesn't take
+ // LR. This means if we need to reload LR, it takes an extra instructions,
+ // which outweighs the value of the tail call; but here we don't know yet
+ // whether LR is going to be used. Probably the right approach is to
+ // generate the tail call here and turn it back into CALL/RET in
+ // emitEpilogue if LR is used.
+ if (Subtarget->isThumb1Only())
+ return false;
+
+ // For the moment, we can only do this to functions defined in this
+ // compilation, or to indirect calls. A Thumb B to an ARM function,
+ // or vice versa, is not easily fixed up in the linker unlike BL.
+ // (We could do this by loading the address of the callee into a register;
+ // that is an extra instruction over the direct call and burns a register
+ // as well, so is not likely to be a win.)
+
+ // It might be safe to remove this restriction on non-Darwin.
+
+ // Thumb1 PIC calls to external symbols use BX, so they can be tail calls,
+ // but we need to make sure there are enough registers; the only valid
+ // registers are the 4 used for parameters. We don't currently do this
+ // case.
+ if (isa<ExternalSymbolSDNode>(Callee))
+ return false;
+
+ if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
+ const GlobalValue *GV = G->getGlobal();
+ if (GV->isDeclaration() || GV->isWeakForLinker())
+ return false;
+ }
+
+ // If the calling conventions do not match, then we'd better make sure the
+ // results are returned in the same way as what the caller expects.
+ if (!CCMatch) {
+ SmallVector<CCValAssign, 16> RVLocs1;
+ CCState CCInfo1(CalleeCC, false, getTargetMachine(),
+ RVLocs1, *DAG.getContext());
+ CCInfo1.AnalyzeCallResult(Ins, CCAssignFnForNode(CalleeCC, true, isVarArg));
+
+ SmallVector<CCValAssign, 16> RVLocs2;
+ CCState CCInfo2(CallerCC, false, getTargetMachine(),
+ RVLocs2, *DAG.getContext());
+ CCInfo2.AnalyzeCallResult(Ins, CCAssignFnForNode(CallerCC, true, isVarArg));
+
+ if (RVLocs1.size() != RVLocs2.size())
+ return false;
+ for (unsigned i = 0, e = RVLocs1.size(); i != e; ++i) {
+ if (RVLocs1[i].isRegLoc() != RVLocs2[i].isRegLoc())
+ return false;
+ if (RVLocs1[i].getLocInfo() != RVLocs2[i].getLocInfo())
+ return false;
+ if (RVLocs1[i].isRegLoc()) {
+ if (RVLocs1[i].getLocReg() != RVLocs2[i].getLocReg())
+ return false;
+ } else {
+ if (RVLocs1[i].getLocMemOffset() != RVLocs2[i].getLocMemOffset())
+ return false;
+ }
+ }
+ }
+
+ // If the callee takes no arguments then go on to check the results of the
+ // call.
+ if (!Outs.empty()) {
+ // Check if stack adjustment is needed. For now, do not do this if any
+ // argument is passed on the stack.
+ SmallVector<CCValAssign, 16> ArgLocs;
+ CCState CCInfo(CalleeCC, isVarArg, getTargetMachine(),
+ ArgLocs, *DAG.getContext());
+ CCInfo.AnalyzeCallOperands(Outs,
+ CCAssignFnForNode(CalleeCC, false, isVarArg));
+ if (CCInfo.getNextStackOffset()) {
+ MachineFunction &MF = DAG.getMachineFunction();
+
+ // Check if the arguments are already laid out in the right way as
+ // the caller's fixed stack objects.
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ const MachineRegisterInfo *MRI = &MF.getRegInfo();
+ const ARMInstrInfo *TII =
+ ((ARMTargetMachine&)getTargetMachine()).getInstrInfo();
+ for (unsigned i = 0, realArgIdx = 0, e = ArgLocs.size();
+ i != e;
+ ++i, ++realArgIdx) {
+ CCValAssign &VA = ArgLocs[i];
+ EVT RegVT = VA.getLocVT();
+ SDValue Arg = OutVals[realArgIdx];
+ ISD::ArgFlagsTy Flags = Outs[realArgIdx].Flags;
+ if (VA.getLocInfo() == CCValAssign::Indirect)
+ return false;
+ if (VA.needsCustom()) {
+ // f64 and vector types are split into multiple registers or
+ // register/stack-slot combinations. The types will not match
+ // the registers; give up on memory f64 refs until we figure
+ // out what to do about this.
+ if (!VA.isRegLoc())
+ return false;
+ if (!ArgLocs[++i].isRegLoc())
+ return false;
+ if (RegVT == MVT::v2f64) {
+ if (!ArgLocs[++i].isRegLoc())
+ return false;
+ if (!ArgLocs[++i].isRegLoc())
+ return false;
+ }
+ } else if (!VA.isRegLoc()) {
+ if (!MatchingStackOffset(Arg, VA.getLocMemOffset(), Flags,
+ MFI, MRI, TII))
+ return false;
+ }
+ }
+ }
+ }
+
+ return true;
+}
+
SDValue
ARMTargetLowering::LowerReturn(SDValue Chain,
CallingConv::ID CallConv, bool isVarArg,
const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
DebugLoc dl, SelectionDAG &DAG) const {
// CCValAssign - represent the assignment of the return value to a location.
@@ -1239,7 +1544,7 @@ ARMTargetLowering::LowerReturn(SDValue Chain,
CCValAssign &VA = RVLocs[i];
assert(VA.isRegLoc() && "Can only return in registers!");
- SDValue Arg = Outs[realRVLocIdx].Val;
+ SDValue Arg = OutVals[realRVLocIdx];
switch (VA.getLocInfo()) {
default: llvm_unreachable("Unknown loc info!");
@@ -1477,7 +1782,7 @@ SDValue ARMTargetLowering::LowerGlobalAddressELF(SDValue Op,
// pair. This is always cheaper.
if (Subtarget->useMovt()) {
return DAG.getNode(ARMISD::Wrapper, dl, PtrVT,
- DAG.getTargetGlobalAddress(GV, PtrVT));
+ DAG.getTargetGlobalAddress(GV, dl, PtrVT));
} else {
SDValue CPAddr = DAG.getTargetConstantPool(GV, PtrVT, 4);
CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
@@ -1552,9 +1857,7 @@ SDValue ARMTargetLowering::LowerGLOBAL_OFFSET_TABLE(SDValue Op,
SDValue
ARMTargetLowering::LowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG) const {
DebugLoc dl = Op.getDebugLoc();
- SDValue Val = Subtarget->isThumb() ?
- DAG.getCopyFromReg(DAG.getEntryNode(), dl, ARM::SP, MVT::i32) :
- DAG.getConstant(0, MVT::i32);
+ SDValue Val = DAG.getConstant(0, MVT::i32);
return DAG.getNode(ARMISD::EH_SJLJ_SETJMP, dl, MVT::i32, Op.getOperand(0),
Op.getOperand(1), Val);
}
@@ -1568,8 +1871,7 @@ ARMTargetLowering::LowerEH_SJLJ_LONGJMP(SDValue Op, SelectionDAG &DAG) const {
SDValue
ARMTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG,
- const ARMSubtarget *Subtarget)
- const {
+ const ARMSubtarget *Subtarget) const {
unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
DebugLoc dl = Op.getDebugLoc();
switch (IntNo) {
@@ -1597,7 +1899,6 @@ ARMTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG,
DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr,
PseudoSourceValue::getConstantPool(), 0,
false, false, 0);
- SDValue Chain = Result.getValue(1);
if (RelocM == Reloc::PIC_) {
SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32);
@@ -1609,25 +1910,21 @@ ARMTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG,
}
static SDValue LowerMEMBARRIER(SDValue Op, SelectionDAG &DAG,
- const ARMSubtarget *Subtarget) {
+ const ARMSubtarget *Subtarget) {
DebugLoc dl = Op.getDebugLoc();
SDValue Op5 = Op.getOperand(5);
- SDValue Res;
unsigned isDeviceBarrier = cast<ConstantSDNode>(Op5)->getZExtValue();
- if (isDeviceBarrier) {
- if (Subtarget->hasV7Ops())
- Res = DAG.getNode(ARMISD::SYNCBARRIER, dl, MVT::Other, Op.getOperand(0));
- else
- Res = DAG.getNode(ARMISD::SYNCBARRIER, dl, MVT::Other, Op.getOperand(0),
- DAG.getConstant(0, MVT::i32));
- } else {
- if (Subtarget->hasV7Ops())
- Res = DAG.getNode(ARMISD::MEMBARRIER, dl, MVT::Other, Op.getOperand(0));
- else
- Res = DAG.getNode(ARMISD::MEMBARRIER, dl, MVT::Other, Op.getOperand(0),
- DAG.getConstant(0, MVT::i32));
- }
- return Res;
+ // v6 and v7 can both handle barriers directly, but need handled a bit
+ // differently. Thumb1 and pre-v6 ARM mode use a libcall instead and should
+ // never get here.
+ unsigned Opc = isDeviceBarrier ? ARMISD::SYNCBARRIER : ARMISD::MEMBARRIER;
+ if (Subtarget->hasV7Ops())
+ return DAG.getNode(Opc, dl, MVT::Other, Op.getOperand(0));
+ else if (Subtarget->hasV6Ops() && !Subtarget->isThumb1Only())
+ return DAG.getNode(Opc, dl, MVT::Other, Op.getOperand(0),
+ DAG.getConstant(0, MVT::i32));
+ assert(0 && "Unexpected ISD::MEMBARRIER encountered. Should be libcall!");
+ return SDValue();
}
static SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) {
@@ -1712,7 +2009,7 @@ ARMTargetLowering::GetF64FormalArgument(CCValAssign &VA, CCValAssign &NextVA,
SDValue ArgValue2;
if (NextVA.isMemLoc()) {
MachineFrameInfo *MFI = MF.getFrameInfo();
- int FI = MFI->CreateFixedObject(4, NextVA.getLocMemOffset(), true, false);
+ int FI = MFI->CreateFixedObject(4, NextVA.getLocMemOffset(), true);
// Create load node to retrieve arguments from the stack.
SDValue FIN = DAG.getFrameIndex(FI, getPointerTy());
@@ -1768,8 +2065,7 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain,
VA = ArgLocs[++i]; // skip ahead to next loc
SDValue ArgValue2;
if (VA.isMemLoc()) {
- int FI = MFI->CreateFixedObject(8, VA.getLocMemOffset(),
- true, false);
+ int FI = MFI->CreateFixedObject(8, VA.getLocMemOffset(), true);
SDValue FIN = DAG.getFrameIndex(FI, getPointerTy());
ArgValue2 = DAG.getLoad(MVT::f64, dl, Chain, FIN,
PseudoSourceValue::getFixedStack(FI), 0,
@@ -1836,8 +2132,7 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain,
assert(VA.getValVT() != MVT::i64 && "i64 should already be lowered");
unsigned ArgSize = VA.getLocVT().getSizeInBits()/8;
- int FI = MFI->CreateFixedObject(ArgSize, VA.getLocMemOffset(),
- true, false);
+ int FI = MFI->CreateFixedObject(ArgSize, VA.getLocMemOffset(), true);
// Create load nodes to retrieve arguments from the stack.
SDValue FIN = DAG.getFrameIndex(FI, getPointerTy());
@@ -1868,7 +2163,7 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain,
AFI->setVarArgsFrameIndex(
MFI->CreateFixedObject(VARegSaveSize,
ArgOffset + VARegSaveSize - VARegSize,
- true, false));
+ true));
SDValue FIN = DAG.getFrameIndex(AFI->getVarArgsFrameIndex(),
getPointerTy());
@@ -1884,8 +2179,8 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain,
SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i32);
SDValue Store =
DAG.getStore(Val.getValue(1), dl, Val, FIN,
- PseudoSourceValue::getFixedStack(AFI->getVarArgsFrameIndex()), 0,
- false, false, 0);
+ PseudoSourceValue::getFixedStack(AFI->getVarArgsFrameIndex()),
+ 0, false, false, 0);
MemOps.push_back(Store);
FIN = DAG.getNode(ISD::ADD, dl, getPointerTy(), FIN,
DAG.getConstant(4, getPointerTy()));
@@ -1895,8 +2190,7 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain,
&MemOps[0], MemOps.size());
} else
// This will point to the next argument passed via stack.
- AFI->setVarArgsFrameIndex(MFI->CreateFixedObject(4, ArgOffset,
- true, false));
+ AFI->setVarArgsFrameIndex(MFI->CreateFixedObject(4, ArgOffset, true));
}
return Chain;
@@ -1978,9 +2272,44 @@ ARMTargetLowering::getARMCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,
return DAG.getNode(CompareType, dl, MVT::Flag, LHS, RHS);
}
+static bool canBitcastToInt(SDNode *Op) {
+ return Op->hasOneUse() &&
+ ISD::isNormalLoad(Op) &&
+ Op->getValueType(0) == MVT::f32;
+}
+
+static SDValue bitcastToInt(SDValue Op, SelectionDAG &DAG) {
+ if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Op))
+ return DAG.getLoad(MVT::i32, Op.getDebugLoc(),
+ Ld->getChain(), Ld->getBasePtr(),
+ Ld->getSrcValue(), Ld->getSrcValueOffset(),
+ Ld->isVolatile(), Ld->isNonTemporal(),
+ Ld->getAlignment());
+
+ llvm_unreachable("Unknown VFP cmp argument!");
+}
+
/// Returns a appropriate VFP CMP (fcmp{s|d}+fmstat) for the given operands.
-static SDValue getVFPCmp(SDValue LHS, SDValue RHS, SelectionDAG &DAG,
- DebugLoc dl) {
+SDValue
+ARMTargetLowering::getVFPCmp(SDValue &LHS, SDValue &RHS, ISD::CondCode CC,
+ SDValue &ARMCC, SelectionDAG &DAG,
+ DebugLoc dl) const {
+ if (UnsafeFPMath && FiniteOnlyFPMath() &&
+ (CC == ISD::SETEQ || CC == ISD::SETOEQ ||
+ CC == ISD::SETNE || CC == ISD::SETUNE) &&
+ canBitcastToInt(LHS.getNode()) && canBitcastToInt(RHS.getNode())) {
+ // If unsafe fp math optimization is enabled and there are no othter uses of
+ // the CMP operands, and the condition code is EQ oe NE, we can optimize it
+ // to an integer comparison.
+ if (CC == ISD::SETOEQ)
+ CC = ISD::SETEQ;
+ else if (CC == ISD::SETUNE)
+ CC = ISD::SETNE;
+ LHS = bitcastToInt(LHS, DAG);
+ RHS = bitcastToInt(RHS, DAG);
+ return getARMCmp(LHS, RHS, CC, ARMCC, DAG, dl);
+ }
+
SDValue Cmp;
if (!isFloatingPointZero(RHS))
Cmp = DAG.getNode(ARMISD::CMPFP, dl, MVT::Flag, LHS, RHS);
@@ -2010,13 +2339,13 @@ SDValue ARMTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
SDValue ARMCC = DAG.getConstant(CondCode, MVT::i32);
SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
- SDValue Cmp = getVFPCmp(LHS, RHS, DAG, dl);
+ SDValue Cmp = getVFPCmp(LHS, RHS, CC, ARMCC, DAG, dl);
SDValue Result = DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal,
- ARMCC, CCR, Cmp);
+ ARMCC, CCR, Cmp);
if (CondCode2 != ARMCC::AL) {
SDValue ARMCC2 = DAG.getConstant(CondCode2, MVT::i32);
// FIXME: Needs another CMP because flag can have but one use.
- SDValue Cmp2 = getVFPCmp(LHS, RHS, DAG, dl);
+ SDValue Cmp2 = getVFPCmp(LHS, RHS, CC, ARMCC2, DAG, dl);
Result = DAG.getNode(ARMISD::CMOV, dl, VT,
Result, TrueVal, ARMCC2, CCR, Cmp2);
}
@@ -2043,8 +2372,8 @@ SDValue ARMTargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
ARMCC::CondCodes CondCode, CondCode2;
FPCCToARMCC(CC, CondCode, CondCode2);
- SDValue Cmp = getVFPCmp(LHS, RHS, DAG, dl);
SDValue ARMCC = DAG.getConstant(CondCode, MVT::i32);
+ SDValue Cmp = getVFPCmp(LHS, RHS, CC, ARMCC, DAG, dl);
SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
SDVTList VTList = DAG.getVTList(MVT::Other, MVT::Flag);
SDValue Ops[] = { Chain, Dest, ARMCC, CCR, Cmp };
@@ -2132,7 +2461,7 @@ static SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) {
return DAG.getNode(Opc, dl, VT, Op);
}
-static SDValue LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) {
+SDValue ARMTargetLowering::LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const {
// Implement fcopysign with a fabs and a conditional fneg.
SDValue Tmp0 = Op.getOperand(0);
SDValue Tmp1 = Op.getOperand(1);
@@ -2140,8 +2469,10 @@ static SDValue LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) {
EVT VT = Op.getValueType();
EVT SrcVT = Tmp1.getValueType();
SDValue AbsVal = DAG.getNode(ISD::FABS, dl, VT, Tmp0);
- SDValue Cmp = getVFPCmp(Tmp1, DAG.getConstantFP(0.0, SrcVT), DAG, dl);
SDValue ARMCC = DAG.getConstant(ARMCC::LT, MVT::i32);
+ SDValue FP0 = DAG.getConstantFP(0.0, SrcVT);
+ SDValue Cmp = getVFPCmp(Tmp1, FP0,
+ ISD::SETLT, ARMCC, DAG, dl);
SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
return DAG.getNode(ARMISD::CNEG, dl, VT, AbsVal, AbsVal, ARMCC, CCR, Cmp);
}
@@ -2206,7 +2537,8 @@ static SDValue ExpandBIT_CONVERT(SDNode *N, SelectionDAG &DAG) {
DAG.getConstant(0, MVT::i32));
SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Op,
DAG.getConstant(1, MVT::i32));
- return DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi);
+ return DAG.getNode(ISD::BIT_CONVERT, dl, DstVT,
+ DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi));
}
// Turn f64->i64 into VMOVRRD.
@@ -2516,76 +2848,149 @@ static SDValue LowerVSETCC(SDValue Op, SelectionDAG &DAG) {
return Result;
}
-/// isVMOVSplat - Check if the specified splat value corresponds to an immediate
-/// VMOV instruction, and if so, return the constant being splatted.
-static SDValue isVMOVSplat(uint64_t SplatBits, uint64_t SplatUndef,
- unsigned SplatBitSize, SelectionDAG &DAG) {
+/// isNEONModifiedImm - Check if the specified splat value corresponds to a
+/// valid vector constant for a NEON instruction with a "modified immediate"
+/// operand (e.g., VMOV). If so, return either the constant being
+/// splatted or the encoded value, depending on the DoEncode parameter.
+static SDValue isNEONModifiedImm(uint64_t SplatBits, uint64_t SplatUndef,
+ unsigned SplatBitSize, SelectionDAG &DAG,
+ bool isVMOV, bool DoEncode) {
+ unsigned OpCmode, Imm;
+ EVT VT;
+
+ // SplatBitSize is set to the smallest size that splats the vector, so a
+ // zero vector will always have SplatBitSize == 8. However, NEON modified
+ // immediate instructions others than VMOV do not support the 8-bit encoding
+ // of a zero vector, and the default encoding of zero is supposed to be the
+ // 32-bit version.
+ if (SplatBits == 0)
+ SplatBitSize = 32;
+
switch (SplatBitSize) {
case 8:
- // Any 1-byte value is OK.
+ // Any 1-byte value is OK. Op=0, Cmode=1110.
assert((SplatBits & ~0xff) == 0 && "one byte splat value is too big");
- return DAG.getTargetConstant(SplatBits, MVT::i8);
+ OpCmode = 0xe;
+ Imm = SplatBits;
+ VT = MVT::i8;
+ break;
case 16:
// NEON's 16-bit VMOV supports splat values where only one byte is nonzero.
- if ((SplatBits & ~0xff) == 0 ||
- (SplatBits & ~0xff00) == 0)
- return DAG.getTargetConstant(SplatBits, MVT::i16);
- break;
+ VT = MVT::i16;
+ if ((SplatBits & ~0xff) == 0) {
+ // Value = 0x00nn: Op=x, Cmode=100x.
+ OpCmode = 0x8;
+ Imm = SplatBits;
+ break;
+ }
+ if ((SplatBits & ~0xff00) == 0) {
+ // Value = 0xnn00: Op=x, Cmode=101x.
+ OpCmode = 0xa;
+ Imm = SplatBits >> 8;
+ break;
+ }
+ return SDValue();
case 32:
// NEON's 32-bit VMOV supports splat values where:
// * only one byte is nonzero, or
// * the least significant byte is 0xff and the second byte is nonzero, or
// * the least significant 2 bytes are 0xff and the third is nonzero.
- if ((SplatBits & ~0xff) == 0 ||
- (SplatBits & ~0xff00) == 0 ||
- (SplatBits & ~0xff0000) == 0 ||
- (SplatBits & ~0xff000000) == 0)
- return DAG.getTargetConstant(SplatBits, MVT::i32);
+ VT = MVT::i32;
+ if ((SplatBits & ~0xff) == 0) {
+ // Value = 0x000000nn: Op=x, Cmode=000x.
+ OpCmode = 0;
+ Imm = SplatBits;
+ break;
+ }
+ if ((SplatBits & ~0xff00) == 0) {
+ // Value = 0x0000nn00: Op=x, Cmode=001x.
+ OpCmode = 0x2;
+ Imm = SplatBits >> 8;
+ break;
+ }
+ if ((SplatBits & ~0xff0000) == 0) {
+ // Value = 0x00nn0000: Op=x, Cmode=010x.
+ OpCmode = 0x4;
+ Imm = SplatBits >> 16;
+ break;
+ }
+ if ((SplatBits & ~0xff000000) == 0) {
+ // Value = 0xnn000000: Op=x, Cmode=011x.
+ OpCmode = 0x6;
+ Imm = SplatBits >> 24;
+ break;
+ }
if ((SplatBits & ~0xffff) == 0 &&
- ((SplatBits | SplatUndef) & 0xff) == 0xff)
- return DAG.getTargetConstant(SplatBits | 0xff, MVT::i32);
+ ((SplatBits | SplatUndef) & 0xff) == 0xff) {
+ // Value = 0x0000nnff: Op=x, Cmode=1100.
+ OpCmode = 0xc;
+ Imm = SplatBits >> 8;
+ SplatBits |= 0xff;
+ break;
+ }
if ((SplatBits & ~0xffffff) == 0 &&
- ((SplatBits | SplatUndef) & 0xffff) == 0xffff)
- return DAG.getTargetConstant(SplatBits | 0xffff, MVT::i32);
+ ((SplatBits | SplatUndef) & 0xffff) == 0xffff) {
+ // Value = 0x00nnffff: Op=x, Cmode=1101.
+ OpCmode = 0xd;
+ Imm = SplatBits >> 16;
+ SplatBits |= 0xffff;
+ break;
+ }
// Note: there are a few 32-bit splat values (specifically: 00ffff00,
// ff000000, ff0000ff, and ffff00ff) that are valid for VMOV.I64 but not
// VMOV.I32. A (very) minor optimization would be to replicate the value
// and fall through here to test for a valid 64-bit splat. But, then the
// caller would also need to check and handle the change in size.
- break;
+ return SDValue();
case 64: {
// NEON has a 64-bit VMOV splat where each byte is either 0 or 0xff.
+ if (!isVMOV)
+ return SDValue();
uint64_t BitMask = 0xff;
uint64_t Val = 0;
+ unsigned ImmMask = 1;
+ Imm = 0;
for (int ByteNum = 0; ByteNum < 8; ++ByteNum) {
- if (((SplatBits | SplatUndef) & BitMask) == BitMask)
+ if (((SplatBits | SplatUndef) & BitMask) == BitMask) {
Val |= BitMask;
- else if ((SplatBits & BitMask) != 0)
+ Imm |= ImmMask;
+ } else if ((SplatBits & BitMask) != 0) {
return SDValue();
+ }
BitMask <<= 8;
+ ImmMask <<= 1;
}
- return DAG.getTargetConstant(Val, MVT::i64);
+ // Op=1, Cmode=1110.
+ OpCmode = 0x1e;
+ SplatBits = Val;
+ VT = MVT::i64;
+ break;
}
default:
- llvm_unreachable("unexpected size for isVMOVSplat");
- break;
+ llvm_unreachable("unexpected size for isNEONModifiedImm");
+ return SDValue();
}
- return SDValue();
+ if (DoEncode) {
+ unsigned EncodedVal = ARM_AM::createNEONModImm(OpCmode, Imm);
+ return DAG.getTargetConstant(EncodedVal, MVT::i32);
+ }
+ return DAG.getTargetConstant(SplatBits, VT);
}
-/// getVMOVImm - If this is a build_vector of constants which can be
-/// formed by using a VMOV instruction of the specified element size,
-/// return the constant being splatted. The ByteSize field indicates the
-/// number of bytes of each element [1248].
-SDValue ARM::getVMOVImm(SDNode *N, unsigned ByteSize, SelectionDAG &DAG) {
+/// getNEONModImm - If this is a valid vector constant for a NEON instruction
+/// with a "modified immediate" operand (e.g., VMOV) of the specified element
+/// size, return the encoded value for that immediate. The ByteSize field
+/// indicates the number of bytes of each element [1248].
+SDValue ARM::getNEONModImm(SDNode *N, unsigned ByteSize, bool isVMOV,
+ SelectionDAG &DAG) {
BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(N);
APInt SplatBits, SplatUndef;
unsigned SplatBitSize;
@@ -2597,8 +3002,8 @@ SDValue ARM::getVMOVImm(SDNode *N, unsigned ByteSize, SelectionDAG &DAG) {
if (SplatBitSize > ByteSize * 8)
return SDValue();
- return isVMOVSplat(SplatBits.getZExtValue(), SplatUndef.getZExtValue(),
- SplatBitSize, DAG);
+ return isNEONModifiedImm(SplatBits.getZExtValue(), SplatUndef.getZExtValue(),
+ SplatBitSize, DAG, isVMOV, true);
}
static bool isVEXTMask(const SmallVectorImpl<int> &M, EVT VT,
@@ -2838,8 +3243,10 @@ static SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) {
bool HasAnyUndefs;
if (BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs)) {
if (SplatBitSize <= 64) {
- SDValue Val = isVMOVSplat(SplatBits.getZExtValue(),
- SplatUndef.getZExtValue(), SplatBitSize, DAG);
+ // Check if an immediate VMOV works.
+ SDValue Val = isNEONModifiedImm(SplatBits.getZExtValue(),
+ SplatUndef.getZExtValue(),
+ SplatBitSize, DAG, true, false);
if (Val.getNode())
return BuildSplat(Val, VT, DAG, dl);
}
@@ -2883,21 +3290,17 @@ static SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) {
return DAG.getNode(ARMISD::VDUP, dl, VT, Value);
// Vectors with 32- or 64-bit elements can be built by directly assigning
- // the subregisters.
+ // the subregisters. Lower it to an ARMISD::BUILD_VECTOR so the operands
+ // will be legalized.
if (EltSize >= 32) {
// Do the expansion with floating-point types, since that is what the VFP
// registers are defined to use, and since i64 is not legal.
EVT EltVT = EVT::getFloatingPointVT(EltSize);
EVT VecVT = EVT::getVectorVT(*DAG.getContext(), EltVT, NumElts);
- SDValue Val = DAG.getUNDEF(VecVT);
- for (unsigned i = 0; i < NumElts; ++i) {
- SDValue Elt = Op.getOperand(i);
- if (Elt.getOpcode() == ISD::UNDEF)
- continue;
- Elt = DAG.getNode(ISD::BIT_CONVERT, dl, EltVT, Elt);
- Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VecVT, Val, Elt,
- DAG.getConstant(i, MVT::i32));
- }
+ SmallVector<SDValue, 8> Ops;
+ for (unsigned i = 0; i < NumElts; ++i)
+ Ops.push_back(DAG.getNode(ISD::BIT_CONVERT, dl, EltVT, Op.getOperand(i)));
+ SDValue Val = DAG.getNode(ARMISD::BUILD_VECTOR, dl, VecVT, &Ops[0],NumElts);
return DAG.getNode(ISD::BIT_CONVERT, dl, VT, Val);
}
@@ -2934,7 +3337,9 @@ ARMTargetLowering::isShuffleMaskLegal(const SmallVectorImpl<int> &M,
bool ReverseVEXT;
unsigned Imm, WhichResult;
- return (ShuffleVectorSDNode::isSplatMask(&M[0], VT) ||
+ unsigned EltSize = VT.getVectorElementType().getSizeInBits();
+ return (EltSize >= 32 ||
+ ShuffleVectorSDNode::isSplatMask(&M[0], VT) ||
isVREVMask(M, VT, 64) ||
isVREVMask(M, VT, 32) ||
isVREVMask(M, VT, 16) ||
@@ -3032,59 +3437,62 @@ static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
// of the same time so that they get CSEd properly.
SVN->getMask(ShuffleMask);
- if (ShuffleVectorSDNode::isSplatMask(&ShuffleMask[0], VT)) {
- int Lane = SVN->getSplatIndex();
- // If this is undef splat, generate it via "just" vdup, if possible.
- if (Lane == -1) Lane = 0;
-
- if (Lane == 0 && V1.getOpcode() == ISD::SCALAR_TO_VECTOR) {
- return DAG.getNode(ARMISD::VDUP, dl, VT, V1.getOperand(0));
+ unsigned EltSize = VT.getVectorElementType().getSizeInBits();
+ if (EltSize <= 32) {
+ if (ShuffleVectorSDNode::isSplatMask(&ShuffleMask[0], VT)) {
+ int Lane = SVN->getSplatIndex();
+ // If this is undef splat, generate it via "just" vdup, if possible.
+ if (Lane == -1) Lane = 0;
+
+ if (Lane == 0 && V1.getOpcode() == ISD::SCALAR_TO_VECTOR) {
+ return DAG.getNode(ARMISD::VDUP, dl, VT, V1.getOperand(0));
+ }
+ return DAG.getNode(ARMISD::VDUPLANE, dl, VT, V1,
+ DAG.getConstant(Lane, MVT::i32));
}
- return DAG.getNode(ARMISD::VDUPLANE, dl, VT, V1,
- DAG.getConstant(Lane, MVT::i32));
- }
- bool ReverseVEXT;
- unsigned Imm;
- if (isVEXTMask(ShuffleMask, VT, ReverseVEXT, Imm)) {
- if (ReverseVEXT)
- std::swap(V1, V2);
- return DAG.getNode(ARMISD::VEXT, dl, VT, V1, V2,
- DAG.getConstant(Imm, MVT::i32));
- }
-
- if (isVREVMask(ShuffleMask, VT, 64))
- return DAG.getNode(ARMISD::VREV64, dl, VT, V1);
- if (isVREVMask(ShuffleMask, VT, 32))
- return DAG.getNode(ARMISD::VREV32, dl, VT, V1);
- if (isVREVMask(ShuffleMask, VT, 16))
- return DAG.getNode(ARMISD::VREV16, dl, VT, V1);
-
- // Check for Neon shuffles that modify both input vectors in place.
- // If both results are used, i.e., if there are two shuffles with the same
- // source operands and with masks corresponding to both results of one of
- // these operations, DAG memoization will ensure that a single node is
- // used for both shuffles.
- unsigned WhichResult;
- if (isVTRNMask(ShuffleMask, VT, WhichResult))
- return DAG.getNode(ARMISD::VTRN, dl, DAG.getVTList(VT, VT),
- V1, V2).getValue(WhichResult);
- if (isVUZPMask(ShuffleMask, VT, WhichResult))
- return DAG.getNode(ARMISD::VUZP, dl, DAG.getVTList(VT, VT),
- V1, V2).getValue(WhichResult);
- if (isVZIPMask(ShuffleMask, VT, WhichResult))
- return DAG.getNode(ARMISD::VZIP, dl, DAG.getVTList(VT, VT),
- V1, V2).getValue(WhichResult);
+ bool ReverseVEXT;
+ unsigned Imm;
+ if (isVEXTMask(ShuffleMask, VT, ReverseVEXT, Imm)) {
+ if (ReverseVEXT)
+ std::swap(V1, V2);
+ return DAG.getNode(ARMISD::VEXT, dl, VT, V1, V2,
+ DAG.getConstant(Imm, MVT::i32));
+ }
- if (isVTRN_v_undef_Mask(ShuffleMask, VT, WhichResult))
- return DAG.getNode(ARMISD::VTRN, dl, DAG.getVTList(VT, VT),
- V1, V1).getValue(WhichResult);
- if (isVUZP_v_undef_Mask(ShuffleMask, VT, WhichResult))
- return DAG.getNode(ARMISD::VUZP, dl, DAG.getVTList(VT, VT),
- V1, V1).getValue(WhichResult);
- if (isVZIP_v_undef_Mask(ShuffleMask, VT, WhichResult))
- return DAG.getNode(ARMISD::VZIP, dl, DAG.getVTList(VT, VT),
- V1, V1).getValue(WhichResult);
+ if (isVREVMask(ShuffleMask, VT, 64))
+ return DAG.getNode(ARMISD::VREV64, dl, VT, V1);
+ if (isVREVMask(ShuffleMask, VT, 32))
+ return DAG.getNode(ARMISD::VREV32, dl, VT, V1);
+ if (isVREVMask(ShuffleMask, VT, 16))
+ return DAG.getNode(ARMISD::VREV16, dl, VT, V1);
+
+ // Check for Neon shuffles that modify both input vectors in place.
+ // If both results are used, i.e., if there are two shuffles with the same
+ // source operands and with masks corresponding to both results of one of
+ // these operations, DAG memoization will ensure that a single node is
+ // used for both shuffles.
+ unsigned WhichResult;
+ if (isVTRNMask(ShuffleMask, VT, WhichResult))
+ return DAG.getNode(ARMISD::VTRN, dl, DAG.getVTList(VT, VT),
+ V1, V2).getValue(WhichResult);
+ if (isVUZPMask(ShuffleMask, VT, WhichResult))
+ return DAG.getNode(ARMISD::VUZP, dl, DAG.getVTList(VT, VT),
+ V1, V2).getValue(WhichResult);
+ if (isVZIPMask(ShuffleMask, VT, WhichResult))
+ return DAG.getNode(ARMISD::VZIP, dl, DAG.getVTList(VT, VT),
+ V1, V2).getValue(WhichResult);
+
+ if (isVTRN_v_undef_Mask(ShuffleMask, VT, WhichResult))
+ return DAG.getNode(ARMISD::VTRN, dl, DAG.getVTList(VT, VT),
+ V1, V1).getValue(WhichResult);
+ if (isVUZP_v_undef_Mask(ShuffleMask, VT, WhichResult))
+ return DAG.getNode(ARMISD::VUZP, dl, DAG.getVTList(VT, VT),
+ V1, V1).getValue(WhichResult);
+ if (isVZIP_v_undef_Mask(ShuffleMask, VT, WhichResult))
+ return DAG.getNode(ARMISD::VZIP, dl, DAG.getVTList(VT, VT),
+ V1, V1).getValue(WhichResult);
+ }
// If the shuffle is not directly supported and it has 4 elements, use
// the PerfectShuffle-generated table to synthesize it from other shuffles.
@@ -3108,8 +3516,7 @@ static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
return GeneratePerfectShuffle(PFEntry, V1, V2, DAG, dl);
}
- // Implement shuffles with 32- or 64-bit elements as subreg copies.
- unsigned EltSize = VT.getVectorElementType().getSizeInBits();
+ // Implement shuffles with 32- or 64-bit elements as ARMISD::BUILD_VECTORs.
if (EltSize >= 32) {
// Do the expansion with floating-point types, since that is what the VFP
// registers are defined to use, and since i64 is not legal.
@@ -3117,17 +3524,17 @@ static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
EVT VecVT = EVT::getVectorVT(*DAG.getContext(), EltVT, NumElts);
V1 = DAG.getNode(ISD::BIT_CONVERT, dl, VecVT, V1);
V2 = DAG.getNode(ISD::BIT_CONVERT, dl, VecVT, V2);
- SDValue Val = DAG.getUNDEF(VecVT);
+ SmallVector<SDValue, 8> Ops;
for (unsigned i = 0; i < NumElts; ++i) {
if (ShuffleMask[i] < 0)
- continue;
- SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT,
- ShuffleMask[i] < (int)NumElts ? V1 : V2,
- DAG.getConstant(ShuffleMask[i] & (NumElts-1),
- MVT::i32));
- Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VecVT, Val,
- Elt, DAG.getConstant(i, MVT::i32));
+ Ops.push_back(DAG.getUNDEF(EltVT));
+ else
+ Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT,
+ ShuffleMask[i] < (int)NumElts ? V1 : V2,
+ DAG.getConstant(ShuffleMask[i] & (NumElts-1),
+ MVT::i32)));
}
+ SDValue Val = DAG.getNode(ARMISD::BUILD_VECTOR, dl, VecVT, &Ops[0],NumElts);
return DAG.getNode(ISD::BIT_CONVERT, dl, VT, Val);
}
@@ -3277,7 +3684,12 @@ ARMTargetLowering::EmitAtomicCmpSwap(MachineInstr *MI,
MF->insert(It, loop1MBB);
MF->insert(It, loop2MBB);
MF->insert(It, exitMBB);
- exitMBB->transferSuccessors(BB);
+
+ // Transfer the remainder of BB and its successor edges to exitMBB.
+ exitMBB->splice(exitMBB->begin(), BB,
+ llvm::next(MachineBasicBlock::iterator(MI)),
+ BB->end());
+ exitMBB->transferSuccessorsAndUpdatePHIs(BB);
// thisMBB:
// ...
@@ -3315,7 +3727,7 @@ ARMTargetLowering::EmitAtomicCmpSwap(MachineInstr *MI,
// ...
BB = exitMBB;
- MF->DeleteMachineInstr(MI); // The instruction is gone now.
+ MI->eraseFromParent(); // The instruction is gone now.
return BB;
}
@@ -3358,7 +3770,12 @@ ARMTargetLowering::EmitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB,
MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB);
MF->insert(It, loopMBB);
MF->insert(It, exitMBB);
- exitMBB->transferSuccessors(BB);
+
+ // Transfer the remainder of BB and its successor edges to exitMBB.
+ exitMBB->splice(exitMBB->begin(), BB,
+ llvm::next(MachineBasicBlock::iterator(MI)),
+ BB->end());
+ exitMBB->transferSuccessorsAndUpdatePHIs(BB);
MachineRegisterInfo &RegInfo = MF->getRegInfo();
unsigned scratch = RegInfo.createVirtualRegister(ARM::GPRRegisterClass);
@@ -3403,7 +3820,7 @@ ARMTargetLowering::EmitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB,
// ...
BB = exitMBB;
- MF->DeleteMachineInstr(MI); // The instruction is gone now.
+ MI->eraseFromParent(); // The instruction is gone now.
return BB;
}
@@ -3488,22 +3905,21 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
MachineFunction *F = BB->getParent();
MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB);
MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
- BuildMI(BB, dl, TII->get(ARM::tBcc)).addMBB(sinkMBB)
- .addImm(MI->getOperand(3).getImm()).addReg(MI->getOperand(4).getReg());
F->insert(It, copy0MBB);
F->insert(It, sinkMBB);
- // Update machine-CFG edges by first adding all successors of the current
- // block to the new block which will contain the Phi node for the select.
- for (MachineBasicBlock::succ_iterator I = BB->succ_begin(),
- E = BB->succ_end(); I != E; ++I)
- sinkMBB->addSuccessor(*I);
- // Next, remove all successors of the current block, and add the true
- // and fallthrough blocks as its successors.
- while (!BB->succ_empty())
- BB->removeSuccessor(BB->succ_begin());
+
+ // Transfer the remainder of BB and its successor edges to sinkMBB.
+ sinkMBB->splice(sinkMBB->begin(), BB,
+ llvm::next(MachineBasicBlock::iterator(MI)),
+ BB->end());
+ sinkMBB->transferSuccessorsAndUpdatePHIs(BB);
+
BB->addSuccessor(copy0MBB);
BB->addSuccessor(sinkMBB);
+ BuildMI(BB, dl, TII->get(ARM::tBcc)).addMBB(sinkMBB)
+ .addImm(MI->getOperand(3).getImm()).addReg(MI->getOperand(4).getReg());
+
// copy0MBB:
// %FalseValue = ...
// # fallthrough to sinkMBB
@@ -3516,11 +3932,12 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
// %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ]
// ...
BB = sinkMBB;
- BuildMI(BB, dl, TII->get(ARM::PHI), MI->getOperand(0).getReg())
+ BuildMI(*BB, BB->begin(), dl,
+ TII->get(ARM::PHI), MI->getOperand(0).getReg())
.addReg(MI->getOperand(1).getReg()).addMBB(copy0MBB)
.addReg(MI->getOperand(2).getReg()).addMBB(thisMBB);
- F->DeleteMachineInstr(MI); // The pseudo instruction is gone now.
+ MI->eraseFromParent(); // The pseudo instruction is gone now.
return BB;
}
@@ -3541,7 +3958,7 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
const TargetRegisterClass *RC = MF->getRegInfo().getRegClass(SrcReg);
unsigned CopyOpc = (RC == ARM::tGPRRegisterClass)
? ARM::tMOVtgpr2gpr : ARM::tMOVgpr2gpr;
- BuildMI(BB, dl, TII->get(CopyOpc), ARM::SP)
+ BuildMI(*BB, MI, dl, TII->get(CopyOpc), ARM::SP)
.addReg(SrcReg, getKillRegState(SrcIsKill));
}
@@ -3573,7 +3990,7 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
NeedPred = true; NeedCC = true; NeedOp3 = true;
break;
}
- MachineInstrBuilder MIB = BuildMI(BB, dl, TII->get(OpOpc), ARM::SP);
+ MachineInstrBuilder MIB = BuildMI(*BB, MI, dl, TII->get(OpOpc), ARM::SP);
if (OpOpc == ARM::tAND)
AddDefaultT1CC(MIB);
MIB.addReg(ARM::SP);
@@ -3589,10 +4006,10 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
const TargetRegisterClass *RC = MF->getRegInfo().getRegClass(DstReg);
unsigned CopyOpc = (RC == ARM::tGPRRegisterClass)
? ARM::tMOVgpr2tgpr : ARM::tMOVgpr2gpr;
- BuildMI(BB, dl, TII->get(CopyOpc))
+ BuildMI(*BB, MI, dl, TII->get(CopyOpc))
.addReg(DstReg, getDefRegState(true) | getDeadRegState(DstIsDead))
.addReg(ARM::SP);
- MF->DeleteMachineInstr(MI); // The pseudo instruction is gone now.
+ MI->eraseFromParent(); // The pseudo instruction is gone now.
return BB;
}
}
@@ -3893,7 +4310,8 @@ static SDValue PerformIntrinsicCombine(SDNode *N, SelectionDAG &DAG) {
// Narrowing shifts require an immediate right shift.
if (isVShiftRImm(N->getOperand(2), VT, true, true, Cnt))
break;
- llvm_unreachable("invalid shift count for narrowing vector shift intrinsic");
+ llvm_unreachable("invalid shift count for narrowing vector shift "
+ "intrinsic");
default:
llvm_unreachable("unhandled vector shift");
@@ -4156,14 +4574,13 @@ bool ARMTargetLowering::allowsUnalignedMemoryAccesses(EVT VT) const {
if (!Subtarget->hasV6Ops())
// Pre-v6 does not support unaligned mem access.
return false;
- else {
- // v6+ may or may not support unaligned mem access depending on the system
- // configuration.
- // FIXME: This is pretty conservative. Should we provide cmdline option to
- // control the behaviour?
- if (!Subtarget->isTargetDarwin())
- return false;
- }
+
+ // v6+ may or may not support unaligned mem access depending on the system
+ // configuration.
+ // FIXME: This is pretty conservative. Should we provide cmdline option to
+ // control the behaviour?
+ if (!Subtarget->isTargetDarwin())
+ return false;
switch (VT.getSimpleVT().SimpleTy) {
default:
@@ -4619,7 +5036,7 @@ ARMTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
}
}
if (StringRef("{cc}").equals_lower(Constraint))
- return std::make_pair(0U, ARM::CCRRegisterClass);
+ return std::make_pair(unsigned(ARM::CPSR), ARM::CCRRegisterClass);
return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
}
@@ -4669,7 +5086,6 @@ getRegClassForInlineAsmConstraint(const std::string &Constraint,
/// vector. If it is invalid, don't add anything to Ops.
void ARMTargetLowering::LowerAsmOperandForConstraint(SDValue Op,
char Constraint,
- bool hasMemory,
std::vector<SDValue>&Ops,
SelectionDAG &DAG) const {
SDValue Result(0, 0);
@@ -4818,8 +5234,7 @@ void ARMTargetLowering::LowerAsmOperandForConstraint(SDValue Op,
Ops.push_back(Result);
return;
}
- return TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, hasMemory,
- Ops, DAG);
+ return TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
}
bool
diff --git a/lib/Target/ARM/ARMISelLowering.h b/lib/Target/ARM/ARMISelLowering.h
index 9c7517c88195..3a3866928a0e 100644
--- a/lib/Target/ARM/ARMISelLowering.h
+++ b/lib/Target/ARM/ARMISelLowering.h
@@ -70,6 +70,8 @@ namespace llvm {
EH_SJLJ_SETJMP, // SjLj exception handling setjmp.
EH_SJLJ_LONGJMP, // SjLj exception handling longjmp.
+ TC_RETURN, // Tail call return pseudo.
+
THREAD_POINTER,
DYN_ALLOC, // Dynamic allocation on the stack.
@@ -133,6 +135,13 @@ namespace llvm {
VUZP, // unzip (deinterleave)
VTRN, // transpose
+ // Operands of the standard BUILD_VECTOR node are not legalized, which
+ // is fine if BUILD_VECTORs are always lowered to shuffles or other
+ // operations, but for ARM some BUILD_VECTORs are legal as-is and their
+ // operands need to be legalized. Define an ARM-specific version of
+ // BUILD_VECTOR for this purpose.
+ BUILD_VECTOR,
+
// Floating-point max and min:
FMAX,
FMIN
@@ -141,11 +150,12 @@ namespace llvm {
/// Define some predicates that are used for node matching.
namespace ARM {
- /// getVMOVImm - If this is a build_vector of constants which can be
- /// formed by using a VMOV instruction of the specified element size,
- /// return the constant being splatted. The ByteSize field indicates the
- /// number of bytes of each element [1248].
- SDValue getVMOVImm(SDNode *N, unsigned ByteSize, SelectionDAG &DAG);
+ /// getNEONModImm - If this is a valid vector constant for a NEON
+ /// instruction with a "modified immediate" operand (e.g., VMOV) of the
+ /// specified element size, return the encoded value for that immediate.
+ /// The ByteSize field indicates the number of bytes of each element [1248].
+ SDValue getNEONModImm(SDNode *N, unsigned ByteSize, bool isVMOV,
+ SelectionDAG &DAG);
/// getVFPf32Imm / getVFPf64Imm - If the given fp immediate can be
/// materialized with a VMOV.f32 / VMOV.f64 (i.e. fconsts / fconstd)
@@ -189,9 +199,9 @@ namespace llvm {
bool isLegalT2ScaledAddressingMode(const AddrMode &AM, EVT VT) const;
/// isLegalICmpImmediate - Return true if the specified immediate is legal
- /// icmp immediate, that is the target has icmp instructions which can compare
- /// a register against the immediate without having to materialize the
- /// immediate into a register.
+ /// icmp immediate, that is the target has icmp instructions which can
+ /// compare a register against the immediate without having to materialize
+ /// the immediate into a register.
virtual bool isLegalICmpImmediate(int64_t Imm) const;
/// getPreIndexedAddressParts - returns true by value, base pointer and
@@ -232,7 +242,6 @@ namespace llvm {
/// being processed is 'm'.
virtual void LowerAsmOperandForConstraint(SDValue Op,
char ConstraintLetter,
- bool hasMemory,
std::vector<SDValue> &Ops,
SelectionDAG &DAG) const;
@@ -282,7 +291,8 @@ namespace llvm {
SDValue &Root, SelectionDAG &DAG,
DebugLoc dl) const;
- CCAssignFn *CCAssignFnForNode(CallingConv::ID CC, bool Return, bool isVarArg) const;
+ CCAssignFn *CCAssignFnForNode(CallingConv::ID CC, bool Return,
+ bool isVarArg) const;
SDValue LowerMemOpCallTo(SDValue Chain, SDValue StackPtr, SDValue Arg,
DebugLoc dl, SelectionDAG &DAG,
const CCValAssign &VA,
@@ -303,6 +313,7 @@ namespace llvm {
SDValue LowerBR_JT(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerBR_CC(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const;
@@ -327,18 +338,34 @@ namespace llvm {
CallingConv::ID CallConv, bool isVarArg,
bool &isTailCall,
const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
const SmallVectorImpl<ISD::InputArg> &Ins,
DebugLoc dl, SelectionDAG &DAG,
SmallVectorImpl<SDValue> &InVals) const;
+ /// IsEligibleForTailCallOptimization - Check whether the call is eligible
+ /// for tail call optimization. Targets which want to do tail call
+ /// optimization should implement this function.
+ bool IsEligibleForTailCallOptimization(SDValue Callee,
+ CallingConv::ID CalleeCC,
+ bool isVarArg,
+ bool isCalleeStructRet,
+ bool isCallerStructRet,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ SelectionDAG& DAG) const;
virtual SDValue
LowerReturn(SDValue Chain,
CallingConv::ID CallConv, bool isVarArg,
const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
DebugLoc dl, SelectionDAG &DAG) const;
SDValue getARMCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,
SDValue &ARMCC, SelectionDAG &DAG, DebugLoc dl) const;
+ SDValue getVFPCmp(SDValue &LHS, SDValue &RHS, ISD::CondCode CC,
+ SDValue &ARMCC, SelectionDAG &DAG, DebugLoc dl) const;
MachineBasicBlock *EmitAtomicCmpSwap(MachineInstr *MI,
MachineBasicBlock *BB,
diff --git a/lib/Target/ARM/ARMInstrFormats.td b/lib/Target/ARM/ARMInstrFormats.td
index d487df16df5e..ac568e75ccc4 100644
--- a/lib/Target/ARM/ARMInstrFormats.td
+++ b/lib/Target/ARM/ARMInstrFormats.td
@@ -50,27 +50,23 @@ def VFPLdStMulFrm : Format<22>;
def VFPMiscFrm : Format<23>;
def ThumbFrm : Format<24>;
-
-def NEONFrm : Format<25>;
-def NEONGetLnFrm : Format<26>;
-def NEONSetLnFrm : Format<27>;
-def NEONDupFrm : Format<28>;
-
-def MiscFrm : Format<29>;
-def ThumbMiscFrm : Format<30>;
-
-def NLdStFrm : Format<31>;
-def N1RegModImmFrm : Format<32>;
-def N2RegFrm : Format<33>;
-def NVCVTFrm : Format<34>;
-def NVDupLnFrm : Format<35>;
-def N2RegVShLFrm : Format<36>;
-def N2RegVShRFrm : Format<37>;
-def N3RegFrm : Format<38>;
-def N3RegVShFrm : Format<39>;
-def NVExtFrm : Format<40>;
-def NVMulSLFrm : Format<41>;
-def NVTBLFrm : Format<42>;
+def MiscFrm : Format<25>;
+
+def NGetLnFrm : Format<26>;
+def NSetLnFrm : Format<27>;
+def NDupFrm : Format<28>;
+def NLdStFrm : Format<29>;
+def N1RegModImmFrm: Format<30>;
+def N2RegFrm : Format<31>;
+def NVCVTFrm : Format<32>;
+def NVDupLnFrm : Format<33>;
+def N2RegVShLFrm : Format<34>;
+def N2RegVShRFrm : Format<35>;
+def N3RegFrm : Format<36>;
+def N3RegVShFrm : Format<37>;
+def NVExtFrm : Format<38>;
+def NVMulSLFrm : Format<39>;
+def NVTBLFrm : Format<40>;
// Misc flags.
@@ -1653,17 +1649,17 @@ class NVLaneOp<bits<8> opcod1, bits<4> opcod2, bits<2> opcod3,
class NVGetLane<bits<8> opcod1, bits<4> opcod2, bits<2> opcod3,
dag oops, dag iops, InstrItinClass itin,
string opc, string dt, string asm, list<dag> pattern>
- : NVLaneOp<opcod1, opcod2, opcod3, oops, iops, NEONGetLnFrm, itin,
+ : NVLaneOp<opcod1, opcod2, opcod3, oops, iops, NGetLnFrm, itin,
opc, dt, asm, pattern>;
class NVSetLane<bits<8> opcod1, bits<4> opcod2, bits<2> opcod3,
dag oops, dag iops, InstrItinClass itin,
string opc, string dt, string asm, list<dag> pattern>
- : NVLaneOp<opcod1, opcod2, opcod3, oops, iops, NEONSetLnFrm, itin,
+ : NVLaneOp<opcod1, opcod2, opcod3, oops, iops, NSetLnFrm, itin,
opc, dt, asm, pattern>;
class NVDup<bits<8> opcod1, bits<4> opcod2, bits<2> opcod3,
dag oops, dag iops, InstrItinClass itin,
string opc, string dt, string asm, list<dag> pattern>
- : NVLaneOp<opcod1, opcod2, opcod3, oops, iops, NEONDupFrm, itin,
+ : NVLaneOp<opcod1, opcod2, opcod3, oops, iops, NDupFrm, itin,
opc, dt, asm, pattern>;
// Vector Duplicate Lane (from scalar to all elements)
diff --git a/lib/Target/ARM/ARMInstrInfo.cpp b/lib/Target/ARM/ARMInstrInfo.cpp
index 85f6b4019a8e..ba228ffac8ed 100644
--- a/lib/Target/ARM/ARMInstrInfo.cpp
+++ b/lib/Target/ARM/ARMInstrInfo.cpp
@@ -63,7 +63,7 @@ unsigned ARMInstrInfo::getUnindexedOpcode(unsigned Opc) const {
void ARMInstrInfo::
reMaterialize(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
unsigned DestReg, unsigned SubIdx, const MachineInstr *Orig,
- const TargetRegisterInfo *TRI) const {
+ const TargetRegisterInfo &TRI) const {
DebugLoc dl = Orig->getDebugLoc();
unsigned Opcode = Orig->getOpcode();
switch (Opcode) {
diff --git a/lib/Target/ARM/ARMInstrInfo.h b/lib/Target/ARM/ARMInstrInfo.h
index d4199d1267fd..4563ffea7b9c 100644
--- a/lib/Target/ARM/ARMInstrInfo.h
+++ b/lib/Target/ARM/ARMInstrInfo.h
@@ -35,7 +35,7 @@ public:
void reMaterialize(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
unsigned DestReg, unsigned SubIdx,
const MachineInstr *Orig,
- const TargetRegisterInfo *TRI) const;
+ const TargetRegisterInfo &TRI) const;
/// getRegisterInfo - TargetInstrInfo is a superset of MRegister info. As
/// such, whenever a client has an instance of instruction info, it should
diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td
index f3156d94693b..c73e204a26b3 100644
--- a/lib/Target/ARM/ARMInstrInfo.td
+++ b/lib/Target/ARM/ARMInstrInfo.td
@@ -53,6 +53,8 @@ def SDT_ARMSYNCBARRIERV7 : SDTypeProfile<0, 0, []>;
def SDT_ARMMEMBARRIERV6 : SDTypeProfile<0, 1, [SDTCisInt<0>]>;
def SDT_ARMSYNCBARRIERV6 : SDTypeProfile<0, 1, [SDTCisInt<0>]>;
+def SDT_ARMTCRET : SDTypeProfile<0, 1, [SDTCisPtrTy<0>]>;
+
// Node definitions.
def ARMWrapper : SDNode<"ARMISD::Wrapper", SDTIntUnaryOp>;
def ARMWrapperJT : SDNode<"ARMISD::WrapperJT", SDTIntBinOp>;
@@ -117,6 +119,9 @@ def ARMSyncBarrierV6 : SDNode<"ARMISD::SYNCBARRIER", SDT_ARMMEMBARRIERV6,
def ARMrbit : SDNode<"ARMISD::RBIT", SDTIntUnaryOp>;
+def ARMtcret : SDNode<"ARMISD::TC_RETURN", SDT_ARMTCRET,
+ [SDNPHasChain, SDNPOptInFlag, SDNPVariadic]>;
+
//===----------------------------------------------------------------------===//
// ARM Instruction Predicate Definitions.
//
@@ -858,13 +863,13 @@ def LEApcrel : AXI1<0x0, (outs GPR:$dst), (ins i32imm:$label, pred:$p),
Pseudo, IIC_iALUi,
"adr$p\t$dst, #$label", []>;
+} // neverHasSideEffects
def LEApcrelJT : AXI1<0x0, (outs GPR:$dst),
(ins i32imm:$label, nohash_imm:$id, pred:$p),
Pseudo, IIC_iALUi,
"adr$p\t$dst, #${label}_${id}", []> {
let Inst{25} = 1;
}
-} // neverHasSideEffects
//===----------------------------------------------------------------------===//
// Control Flow Instructions.
@@ -1026,6 +1031,74 @@ let isCall = 1,
}
}
+// Tail calls.
+
+let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in {
+ // Darwin versions.
+ let Defs = [R0, R1, R2, R3, R9, R12,
+ D0, D1, D2, D3, D4, D5, D6, D7,
+ D16, D17, D18, D19, D20, D21, D22, D23, D24, D25, D26,
+ D27, D28, D29, D30, D31, PC],
+ Uses = [SP] in {
+ def TCRETURNdi : AInoP<(outs), (ins i32imm:$dst, variable_ops),
+ Pseudo, IIC_Br,
+ "@TC_RETURN","\t$dst", []>, Requires<[IsDarwin]>;
+
+ def TCRETURNri : AInoP<(outs), (ins tcGPR:$dst, variable_ops),
+ Pseudo, IIC_Br,
+ "@TC_RETURN","\t$dst", []>, Requires<[IsDarwin]>;
+
+ def TAILJMPd : ABXI<0b1010, (outs), (ins brtarget:$dst, variable_ops),
+ IIC_Br, "b\t$dst @ TAILCALL",
+ []>, Requires<[IsDarwin]>;
+
+ def TAILJMPdt: ABXI<0b1010, (outs), (ins brtarget:$dst, variable_ops),
+ IIC_Br, "b.w\t$dst @ TAILCALL",
+ []>, Requires<[IsDarwin]>;
+
+ def TAILJMPr : AXI<(outs), (ins tcGPR:$dst, variable_ops),
+ BrMiscFrm, IIC_Br, "bx\t$dst @ TAILCALL",
+ []>, Requires<[IsDarwin]> {
+ let Inst{7-4} = 0b0001;
+ let Inst{19-8} = 0b111111111111;
+ let Inst{27-20} = 0b00010010;
+ let Inst{31-28} = 0b1110;
+ }
+ }
+
+ // Non-Darwin versions (the difference is R9).
+ let Defs = [R0, R1, R2, R3, R12,
+ D0, D1, D2, D3, D4, D5, D6, D7,
+ D16, D17, D18, D19, D20, D21, D22, D23, D24, D25, D26,
+ D27, D28, D29, D30, D31, PC],
+ Uses = [SP] in {
+ def TCRETURNdiND : AInoP<(outs), (ins i32imm:$dst, variable_ops),
+ Pseudo, IIC_Br,
+ "@TC_RETURN","\t$dst", []>, Requires<[IsNotDarwin]>;
+
+ def TCRETURNriND : AInoP<(outs), (ins tcGPR:$dst, variable_ops),
+ Pseudo, IIC_Br,
+ "@TC_RETURN","\t$dst", []>, Requires<[IsNotDarwin]>;
+
+ def TAILJMPdND : ABXI<0b1010, (outs), (ins brtarget:$dst, variable_ops),
+ IIC_Br, "b\t$dst @ TAILCALL",
+ []>, Requires<[IsARM, IsNotDarwin]>;
+
+ def TAILJMPdNDt : ABXI<0b1010, (outs), (ins brtarget:$dst, variable_ops),
+ IIC_Br, "b.w\t$dst @ TAILCALL",
+ []>, Requires<[IsThumb, IsNotDarwin]>;
+
+ def TAILJMPrND : AXI<(outs), (ins tcGPR:$dst, variable_ops),
+ BrMiscFrm, IIC_Br, "bx\t$dst @ TAILCALL",
+ []>, Requires<[IsNotDarwin]> {
+ let Inst{7-4} = 0b0001;
+ let Inst{19-8} = 0b111111111111;
+ let Inst{27-20} = 0b00010010;
+ let Inst{31-28} = 0b1110;
+ }
+ }
+}
+
let isBranch = 1, isTerminator = 1 in {
// B is "predicable" since it can be xformed into a Bcc.
let isBarrier = 1 in {
@@ -1397,6 +1470,14 @@ def MOVr : AsI1<0b1101, (outs GPR:$dst), (ins GPR:$src), DPFrm, IIC_iMOVr,
let Inst{25} = 0;
}
+// A version for the smaller set of tail call registers.
+let neverHasSideEffects = 1 in
+def MOVr_TC : AsI1<0b1101, (outs tcGPR:$dst), (ins tcGPR:$src), DPFrm,
+ IIC_iMOVr, "mov", "\t$dst, $src", []>, UnaryDP {
+ let Inst{11-4} = 0b00000000;
+ let Inst{25} = 0;
+}
+
def MOVs : AsI1<0b1101, (outs GPR:$dst), (ins so_reg:$src),
DPSoRegFrm, IIC_iMOVsr,
"mov", "\t$dst, $src", [(set GPR:$dst, so_reg:$src)]>, UnaryDP {
@@ -2530,31 +2611,30 @@ let Defs =
[ R0, R1, R2, R3, R4, R5, R6, R7, R8, R9, R10, R11, R12, LR, D0,
D1, D2, D3, D4, D5, D6, D7, D8, D9, D10, D11, D12, D13, D14, D15,
D16, D17, D18, D19, D20, D21, D22, D23, D24, D25, D26, D27, D28, D29, D30,
- D31 ] in {
+ D31 ], hasSideEffects = 1, isBarrier = 1 in {
def Int_eh_sjlj_setjmp : XI<(outs), (ins GPR:$src, GPR:$val),
AddrModeNone, SizeSpecial, IndexModeNone,
Pseudo, NoItinerary,
- "str\tsp, [$src, #+8] ${:comment} eh_setjmp begin\n\t"
- "add\t$val, pc, #8\n\t"
- "str\t$val, [$src, #+4]\n\t"
- "mov\tr0, #0\n\t"
- "add\tpc, pc, #0\n\t"
- "mov\tr0, #1 ${:comment} eh_setjmp end", "",
+ "add\t$val, pc, #8\t${:comment} eh_setjmp begin\n\t"
+ "str\t$val, [$src, #+4]\n\t"
+ "mov\tr0, #0\n\t"
+ "add\tpc, pc, #0\n\t"
+ "mov\tr0, #1 ${:comment} eh_setjmp end", "",
[(set R0, (ARMeh_sjlj_setjmp GPR:$src, GPR:$val))]>,
Requires<[IsARM, HasVFP2]>;
}
let Defs =
- [ R0, R1, R2, R3, R4, R5, R6, R7, R8, R9, R10, R11, R12, LR ] in {
+ [ R0, R1, R2, R3, R4, R5, R6, R7, R8, R9, R10, R11, R12, LR ],
+ hasSideEffects = 1, isBarrier = 1 in {
def Int_eh_sjlj_setjmp_nofp : XI<(outs), (ins GPR:$src, GPR:$val),
AddrModeNone, SizeSpecial, IndexModeNone,
Pseudo, NoItinerary,
- "str\tsp, [$src, #+8] ${:comment} eh_setjmp begin\n\t"
- "add\t$val, pc, #8\n\t"
- "str\t$val, [$src, #+4]\n\t"
- "mov\tr0, #0\n\t"
- "add\tpc, pc, #0\n\t"
- "mov\tr0, #1 ${:comment} eh_setjmp end", "",
+ "add\t$val, pc, #8\n ${:comment} eh_setjmp begin\n\t"
+ "str\t$val, [$src, #+4]\n\t"
+ "mov\tr0, #0\n\t"
+ "add\tpc, pc, #0\n\t"
+ "mov\tr0, #1 ${:comment} eh_setjmp end", "",
[(set R0, (ARMeh_sjlj_setjmp GPR:$src, GPR:$val))]>,
Requires<[IsARM, NoVFP]>;
}
@@ -2621,6 +2701,24 @@ def : ARMPat<(ARMWrapperJT tjumptable:$dst, imm:$id),
// TODO: add,sub,and, 3-instr forms?
+// Tail calls
+def : ARMPat<(ARMtcret tcGPR:$dst),
+ (TCRETURNri tcGPR:$dst)>, Requires<[IsDarwin]>;
+
+def : ARMPat<(ARMtcret (i32 tglobaladdr:$dst)),
+ (TCRETURNdi texternalsym:$dst)>, Requires<[IsDarwin]>;
+
+def : ARMPat<(ARMtcret (i32 texternalsym:$dst)),
+ (TCRETURNdi texternalsym:$dst)>, Requires<[IsDarwin]>;
+
+def : ARMPat<(ARMtcret tcGPR:$dst),
+ (TCRETURNriND tcGPR:$dst)>, Requires<[IsNotDarwin]>;
+
+def : ARMPat<(ARMtcret (i32 tglobaladdr:$dst)),
+ (TCRETURNdiND texternalsym:$dst)>, Requires<[IsNotDarwin]>;
+
+def : ARMPat<(ARMtcret (i32 texternalsym:$dst)),
+ (TCRETURNdiND texternalsym:$dst)>, Requires<[IsNotDarwin]>;
// Direct calls
def : ARMPat<(ARMcall texternalsym:$func), (BL texternalsym:$func)>,
diff --git a/lib/Target/ARM/ARMInstrNEON.td b/lib/Target/ARM/ARMInstrNEON.td
index 197ec16eedea..a84315f73038 100644
--- a/lib/Target/ARM/ARMInstrNEON.td
+++ b/lib/Target/ARM/ARMInstrNEON.td
@@ -98,17 +98,8 @@ def NEONfmin : SDNode<"ARMISD::FMIN", SDTARMFMAX>;
// NEON operand definitions
//===----------------------------------------------------------------------===//
-def h8imm : Operand<i8> {
- let PrintMethod = "printHex8ImmOperand";
-}
-def h16imm : Operand<i16> {
- let PrintMethod = "printHex16ImmOperand";
-}
-def h32imm : Operand<i32> {
- let PrintMethod = "printHex32ImmOperand";
-}
-def h64imm : Operand<i64> {
- let PrintMethod = "printHex64ImmOperand";
+def nModImm : Operand<i32> {
+ let PrintMethod = "printNEONModImmOperand";
}
//===----------------------------------------------------------------------===//
@@ -812,11 +803,6 @@ def DSubReg_f64_reg : SDNodeXForm<imm, [{
assert(ARM::dsub_7 == ARM::dsub_0+7 && "Unexpected subreg numbering");
return CurDAG->getTargetConstant(ARM::dsub_0 + N->getZExtValue(), MVT::i32);
}]>;
-def DSubReg_f64_other_reg : SDNodeXForm<imm, [{
- assert(ARM::dsub_7 == ARM::dsub_0+7 && "Unexpected subreg numbering");
- return CurDAG->getTargetConstant(ARM::dsub_0 + (1 - N->getZExtValue()),
- MVT::i32);
-}]>;
// Extract S sub-registers of Q/D registers.
def SSubReg_f32_reg : SDNodeXForm<imm, [{
@@ -2282,7 +2268,7 @@ def VCEQfq : N3VQ<0,0,0b00,0b1110,0, IIC_VBINQ, "vceq", "f32", v4i32, v4f32,
NEONvceq, 1>;
// For disassembly only.
defm VCEQz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00010, 0, "vceq", "i",
- "$dst, $src, #0">;
+ "$dst, $src, #0">;
// VCGE : Vector Compare Greater Than or Equal
defm VCGEs : N3V_QHS<0, 0, 0b0011, 1, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q,
@@ -2834,73 +2820,70 @@ def VMOVQQQQ : PseudoInst<(outs QQQQPR:$dst), (ins QQQQPR:$src),
// VMOV_get_imm8 xform function: convert build_vector to VMOV.i8 imm.
def VMOV_get_imm8 : SDNodeXForm<build_vector, [{
- return ARM::getVMOVImm(N, 1, *CurDAG);
+ return ARM::getNEONModImm(N, 1, true, *CurDAG);
}]>;
def vmovImm8 : PatLeaf<(build_vector), [{
- return ARM::getVMOVImm(N, 1, *CurDAG).getNode() != 0;
+ return ARM::getNEONModImm(N, 1, true, *CurDAG).getNode() != 0;
}], VMOV_get_imm8>;
// VMOV_get_imm16 xform function: convert build_vector to VMOV.i16 imm.
def VMOV_get_imm16 : SDNodeXForm<build_vector, [{
- return ARM::getVMOVImm(N, 2, *CurDAG);
+ return ARM::getNEONModImm(N, 2, true, *CurDAG);
}]>;
def vmovImm16 : PatLeaf<(build_vector), [{
- return ARM::getVMOVImm(N, 2, *CurDAG).getNode() != 0;
+ return ARM::getNEONModImm(N, 2, true, *CurDAG).getNode() != 0;
}], VMOV_get_imm16>;
// VMOV_get_imm32 xform function: convert build_vector to VMOV.i32 imm.
def VMOV_get_imm32 : SDNodeXForm<build_vector, [{
- return ARM::getVMOVImm(N, 4, *CurDAG);
+ return ARM::getNEONModImm(N, 4, true, *CurDAG);
}]>;
def vmovImm32 : PatLeaf<(build_vector), [{
- return ARM::getVMOVImm(N, 4, *CurDAG).getNode() != 0;
+ return ARM::getNEONModImm(N, 4, true, *CurDAG).getNode() != 0;
}], VMOV_get_imm32>;
// VMOV_get_imm64 xform function: convert build_vector to VMOV.i64 imm.
def VMOV_get_imm64 : SDNodeXForm<build_vector, [{
- return ARM::getVMOVImm(N, 8, *CurDAG);
+ return ARM::getNEONModImm(N, 8, true, *CurDAG);
}]>;
def vmovImm64 : PatLeaf<(build_vector), [{
- return ARM::getVMOVImm(N, 8, *CurDAG).getNode() != 0;
+ return ARM::getNEONModImm(N, 8, true, *CurDAG).getNode() != 0;
}], VMOV_get_imm64>;
-// Note: Some of the cmode bits in the following VMOV instructions need to
-// be encoded based on the immed values.
-
let isReMaterializable = 1 in {
def VMOVv8i8 : N1ModImm<1, 0b000, 0b1110, 0, 0, 0, 1, (outs DPR:$dst),
- (ins h8imm:$SIMM), IIC_VMOVImm,
+ (ins nModImm:$SIMM), IIC_VMOVImm,
"vmov", "i8", "$dst, $SIMM", "",
[(set DPR:$dst, (v8i8 vmovImm8:$SIMM))]>;
def VMOVv16i8 : N1ModImm<1, 0b000, 0b1110, 0, 1, 0, 1, (outs QPR:$dst),
- (ins h8imm:$SIMM), IIC_VMOVImm,
+ (ins nModImm:$SIMM), IIC_VMOVImm,
"vmov", "i8", "$dst, $SIMM", "",
[(set QPR:$dst, (v16i8 vmovImm8:$SIMM))]>;
-def VMOVv4i16 : N1ModImm<1, 0b000, {1,0,?,?}, 0, 0, {?}, 1, (outs DPR:$dst),
- (ins h16imm:$SIMM), IIC_VMOVImm,
+def VMOVv4i16 : N1ModImm<1, 0b000, {1,0,?,0}, 0, 0, 0, 1, (outs DPR:$dst),
+ (ins nModImm:$SIMM), IIC_VMOVImm,
"vmov", "i16", "$dst, $SIMM", "",
[(set DPR:$dst, (v4i16 vmovImm16:$SIMM))]>;
-def VMOVv8i16 : N1ModImm<1, 0b000, {1,0,?,?}, 0, 1, {?}, 1, (outs QPR:$dst),
- (ins h16imm:$SIMM), IIC_VMOVImm,
+def VMOVv8i16 : N1ModImm<1, 0b000, {1,0,?,0}, 0, 1, 0, 1, (outs QPR:$dst),
+ (ins nModImm:$SIMM), IIC_VMOVImm,
"vmov", "i16", "$dst, $SIMM", "",
[(set QPR:$dst, (v8i16 vmovImm16:$SIMM))]>;
-def VMOVv2i32 : N1ModImm<1, 0b000, {?,?,?,?}, 0, 0, {?}, 1, (outs DPR:$dst),
- (ins h32imm:$SIMM), IIC_VMOVImm,
+def VMOVv2i32 : N1ModImm<1, 0b000, {0,?,?,0}, 0, 0, 0, 1, (outs DPR:$dst),
+ (ins nModImm:$SIMM), IIC_VMOVImm,
"vmov", "i32", "$dst, $SIMM", "",
[(set DPR:$dst, (v2i32 vmovImm32:$SIMM))]>;
-def VMOVv4i32 : N1ModImm<1, 0b000, {?,?,?,?}, 0, 1, {?}, 1, (outs QPR:$dst),
- (ins h32imm:$SIMM), IIC_VMOVImm,
+def VMOVv4i32 : N1ModImm<1, 0b000, {0,?,?,0}, 0, 1, 0, 1, (outs QPR:$dst),
+ (ins nModImm:$SIMM), IIC_VMOVImm,
"vmov", "i32", "$dst, $SIMM", "",
[(set QPR:$dst, (v4i32 vmovImm32:$SIMM))]>;
def VMOVv1i64 : N1ModImm<1, 0b000, 0b1110, 0, 0, 1, 1, (outs DPR:$dst),
- (ins h64imm:$SIMM), IIC_VMOVImm,
+ (ins nModImm:$SIMM), IIC_VMOVImm,
"vmov", "i64", "$dst, $SIMM", "",
[(set DPR:$dst, (v1i64 vmovImm64:$SIMM))]>;
def VMOVv2i64 : N1ModImm<1, 0b000, 0b1110, 0, 1, 1, 1, (outs QPR:$dst),
- (ins h64imm:$SIMM), IIC_VMOVImm,
+ (ins nModImm:$SIMM), IIC_VMOVImm,
"vmov", "i64", "$dst, $SIMM", "",
[(set QPR:$dst, (v2i64 vmovImm64:$SIMM))]>;
} // isReMaterializable
@@ -3122,17 +3105,6 @@ def VDUPfqf : N2V<0b11, 0b11, {?,1}, {0,0}, 0b11000, 1, 0,
IIC_VMOVD, "vdup", "32", "$dst, ${src:lane}", "",
[(set QPR:$dst, (v4f32 (NEONvdup (f32 SPR:$src))))]>;
-def : Pat<(v2i64 (NEONvduplane (v2i64 QPR:$src), imm:$lane)),
- (INSERT_SUBREG QPR:$src,
- (i64 (EXTRACT_SUBREG QPR:$src,
- (DSubReg_f64_reg imm:$lane))),
- (DSubReg_f64_other_reg imm:$lane))>;
-def : Pat<(v2f64 (NEONvduplane (v2f64 QPR:$src), imm:$lane)),
- (INSERT_SUBREG QPR:$src,
- (f64 (EXTRACT_SUBREG QPR:$src,
- (DSubReg_f64_reg imm:$lane))),
- (DSubReg_f64_other_reg imm:$lane))>;
-
// VMOVN : Vector Narrowing Move
defm VMOVN : N2VNInt_HSD<0b11,0b11,0b10,0b00100,0,0, IIC_VMOVD,
"vmovn", "i", int_arm_neon_vmovn>;
@@ -3319,22 +3291,16 @@ let hasExtraSrcRegAllocReq = 1 in {
def VTBL2
: N3V<1,1,0b11,0b1001,0,0, (outs DPR:$dst),
(ins DPR:$tbl1, DPR:$tbl2, DPR:$src), NVTBLFrm, IIC_VTB2,
- "vtbl", "8", "$dst, \\{$tbl1, $tbl2\\}, $src", "",
- [(set DPR:$dst, (v8i8 (int_arm_neon_vtbl2
- DPR:$tbl1, DPR:$tbl2, DPR:$src)))]>;
+ "vtbl", "8", "$dst, \\{$tbl1, $tbl2\\}, $src", "", []>;
def VTBL3
: N3V<1,1,0b11,0b1010,0,0, (outs DPR:$dst),
(ins DPR:$tbl1, DPR:$tbl2, DPR:$tbl3, DPR:$src), NVTBLFrm, IIC_VTB3,
- "vtbl", "8", "$dst, \\{$tbl1, $tbl2, $tbl3\\}, $src", "",
- [(set DPR:$dst, (v8i8 (int_arm_neon_vtbl3
- DPR:$tbl1, DPR:$tbl2, DPR:$tbl3, DPR:$src)))]>;
+ "vtbl", "8", "$dst, \\{$tbl1, $tbl2, $tbl3\\}, $src", "", []>;
def VTBL4
: N3V<1,1,0b11,0b1011,0,0, (outs DPR:$dst),
(ins DPR:$tbl1, DPR:$tbl2, DPR:$tbl3, DPR:$tbl4, DPR:$src),
NVTBLFrm, IIC_VTB4,
- "vtbl", "8", "$dst, \\{$tbl1, $tbl2, $tbl3, $tbl4\\}, $src", "",
- [(set DPR:$dst, (v8i8 (int_arm_neon_vtbl4 DPR:$tbl1, DPR:$tbl2,
- DPR:$tbl3, DPR:$tbl4, DPR:$src)))]>;
+ "vtbl", "8", "$dst, \\{$tbl1, $tbl2, $tbl3, $tbl4\\}, $src", "", []>;
} // hasExtraSrcRegAllocReq = 1
// VTBX : Vector Table Extension
@@ -3348,23 +3314,18 @@ let hasExtraSrcRegAllocReq = 1 in {
def VTBX2
: N3V<1,1,0b11,0b1001,1,0, (outs DPR:$dst),
(ins DPR:$orig, DPR:$tbl1, DPR:$tbl2, DPR:$src), NVTBLFrm, IIC_VTBX2,
- "vtbx", "8", "$dst, \\{$tbl1, $tbl2\\}, $src", "$orig = $dst",
- [(set DPR:$dst, (v8i8 (int_arm_neon_vtbx2
- DPR:$orig, DPR:$tbl1, DPR:$tbl2, DPR:$src)))]>;
+ "vtbx", "8", "$dst, \\{$tbl1, $tbl2\\}, $src", "$orig = $dst", []>;
def VTBX3
: N3V<1,1,0b11,0b1010,1,0, (outs DPR:$dst),
(ins DPR:$orig, DPR:$tbl1, DPR:$tbl2, DPR:$tbl3, DPR:$src),
NVTBLFrm, IIC_VTBX3,
- "vtbx", "8", "$dst, \\{$tbl1, $tbl2, $tbl3\\}, $src", "$orig = $dst",
- [(set DPR:$dst, (v8i8 (int_arm_neon_vtbx3 DPR:$orig, DPR:$tbl1,
- DPR:$tbl2, DPR:$tbl3, DPR:$src)))]>;
+ "vtbx", "8", "$dst, \\{$tbl1, $tbl2, $tbl3\\}, $src",
+ "$orig = $dst", []>;
def VTBX4
: N3V<1,1,0b11,0b1011,1,0, (outs DPR:$dst), (ins DPR:$orig, DPR:$tbl1,
DPR:$tbl2, DPR:$tbl3, DPR:$tbl4, DPR:$src), NVTBLFrm, IIC_VTBX4,
"vtbx", "8", "$dst, \\{$tbl1, $tbl2, $tbl3, $tbl4\\}, $src",
- "$orig = $dst",
- [(set DPR:$dst, (v8i8 (int_arm_neon_vtbx4 DPR:$orig, DPR:$tbl1,
- DPR:$tbl2, DPR:$tbl3, DPR:$tbl4, DPR:$src)))]>;
+ "$orig = $dst", []>;
} // hasExtraSrcRegAllocReq = 1
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/ARM/ARMInstrThumb.td b/lib/Target/ARM/ARMInstrThumb.td
index 40f924b679fa..bc0790dccbb5 100644
--- a/lib/Target/ARM/ARMInstrThumb.td
+++ b/lib/Target/ARM/ARMInstrThumb.td
@@ -894,11 +894,11 @@ def tLEApcrel : T1I<(outs tGPR:$dst), (ins i32imm:$label, pred:$p), IIC_iALUi,
"adr$p\t$dst, #$label", []>,
T1Encoding<{1,0,1,0,0,?}>; // A6.2 & A8.6.10
+} // neverHasSideEffects
def tLEApcrelJT : T1I<(outs tGPR:$dst),
(ins i32imm:$label, nohash_imm:$id, pred:$p),
IIC_iALUi, "adr$p\t$dst, #${label}_${id}", []>,
T1Encoding<{1,0,1,0,0,?}>; // A6.2 & A8.6.10
-} // neverHasSideEffects
//===----------------------------------------------------------------------===//
// TLS Instructions
@@ -923,18 +923,18 @@ let isCall = 1,
// except for our own input by listing the relevant registers in Defs. By
// doing so, we also cause the prologue/epilogue code to actively preserve
// all of the callee-saved resgisters, which is exactly what we want.
-// The current SP is passed in $val, and we reuse the reg as a scratch.
+// $val is a scratch register for our use.
let Defs =
- [ R0, R1, R2, R3, R4, R5, R6, R7, R12 ] in {
+ [ R0, R1, R2, R3, R4, R5, R6, R7, R12 ], hasSideEffects = 1,
+ isBarrier = 1 in {
def tInt_eh_sjlj_setjmp : ThumbXI<(outs),(ins tGPR:$src, tGPR:$val),
AddrModeNone, SizeSpecial, NoItinerary,
- "str\t$val, [$src, #8]\t${:comment} begin eh.setjmp\n"
- "\tmov\t$val, pc\n"
- "\tadds\t$val, #7\n"
- "\tstr\t$val, [$src, #4]\n"
- "\tmovs\tr0, #0\n"
- "\tb\t1f\n"
- "\tmovs\tr0, #1\t${:comment} end eh.setjmp\n"
+ "mov\t$val, pc\t${:comment} begin eh.setjmp\n\t"
+ "adds\t$val, #7\n\t"
+ "str\t$val, [$src, #4]\n\t"
+ "movs\tr0, #0\n\t"
+ "b\t1f\n\t"
+ "movs\tr0, #1\t${:comment} end eh.setjmp\n\t"
"1:", "",
[(set R0, (ARMeh_sjlj_setjmp tGPR:$src, tGPR:$val))]>;
}
@@ -1037,7 +1037,8 @@ def : T1Pat<(i32 imm0_255_comp:$src),
// scheduling.
let isReMaterializable = 1 in
def tLDRpci_pic : PseudoInst<(outs GPR:$dst), (ins i32imm:$addr, pclabel:$cp),
- NoItinerary, "${:comment} ldr.n\t$dst, $addr\n$cp:\n\tadd\t$dst, pc",
+ NoItinerary,
+ "${:comment} ldr.n\t$dst, $addr\n$cp:\n\tadd\t$dst, pc",
[(set GPR:$dst, (ARMpic_add (load (ARMWrapper tconstpool:$addr)),
imm:$cp))]>,
Requires<[IsThumb1Only]>;
diff --git a/lib/Target/ARM/ARMInstrThumb2.td b/lib/Target/ARM/ARMInstrThumb2.td
index b91c089fa5db..4692f2a42133 100644
--- a/lib/Target/ARM/ARMInstrThumb2.td
+++ b/lib/Target/ARM/ARMInstrThumb2.td
@@ -637,8 +637,7 @@ multiclass T2I_st<bits<2> opcod, string opc, PatFrag opnode> {
multiclass T2I_unary_rrot<bits<3> opcod, string opc, PatFrag opnode> {
def r : T2I<(outs GPR:$dst), (ins GPR:$src), IIC_iUNAr,
opc, ".w\t$dst, $src",
- [(set GPR:$dst, (opnode GPR:$src))]>,
- Requires<[HasT2ExtractPack]> {
+ [(set GPR:$dst, (opnode GPR:$src))]> {
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0100;
let Inst{22-20} = opcod;
@@ -649,8 +648,7 @@ multiclass T2I_unary_rrot<bits<3> opcod, string opc, PatFrag opnode> {
}
def r_rot : T2I<(outs GPR:$dst), (ins GPR:$src, i32imm:$rot), IIC_iUNAsi,
opc, ".w\t$dst, $src, ror $rot",
- [(set GPR:$dst, (opnode (rotr GPR:$src, rot_imm:$rot)))]>,
- Requires<[HasT2ExtractPack]> {
+ [(set GPR:$dst, (opnode (rotr GPR:$src, rot_imm:$rot)))]> {
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0100;
let Inst{22-20} = opcod;
@@ -661,8 +659,8 @@ multiclass T2I_unary_rrot<bits<3> opcod, string opc, PatFrag opnode> {
}
}
-// SXTB16 and UXTB16 do not need the .w qualifier.
-multiclass T2I_unary_rrot_nw<bits<3> opcod, string opc, PatFrag opnode> {
+// UXTB16 - Requres T2ExtractPack, does not need the .w qualifier.
+multiclass T2I_unary_rrot_uxtb16<bits<3> opcod, string opc, PatFrag opnode> {
def r : T2I<(outs GPR:$dst), (ins GPR:$src), IIC_iUNAr,
opc, "\t$dst, $src",
[(set GPR:$dst, (opnode GPR:$src))]>,
@@ -689,9 +687,9 @@ multiclass T2I_unary_rrot_nw<bits<3> opcod, string opc, PatFrag opnode> {
}
}
-// DO variant - disassembly only, no pattern
-
-multiclass T2I_unary_rrot_DO<bits<3> opcod, string opc> {
+// SXTB16 - Requres T2ExtractPack, does not need the .w qualifier, no pattern
+// supported yet.
+multiclass T2I_unary_rrot_sxtb16<bits<3> opcod, string opc> {
def r : T2I<(outs GPR:$dst), (ins GPR:$src), IIC_iUNAr,
opc, "\t$dst, $src", []> {
let Inst{31-27} = 0b11111;
@@ -787,6 +785,7 @@ def t2LEApcrel : T2XI<(outs GPR:$dst), (ins i32imm:$label, pred:$p), IIC_iALUi,
let Inst{19-16} = 0b1111; // Rn
let Inst{15} = 0;
}
+} // neverHasSideEffects
def t2LEApcrelJT : T2XI<(outs GPR:$dst),
(ins i32imm:$label, nohash_imm:$id, pred:$p), IIC_iALUi,
"adr$p.w\t$dst, #${label}_${id}", []> {
@@ -798,7 +797,6 @@ def t2LEApcrelJT : T2XI<(outs GPR:$dst),
let Inst{19-16} = 0b1111; // Rn
let Inst{15} = 0;
}
-} // neverHasSideEffects
// ADD r, sp, {so_imm|i12}
def t2ADDrSPi : T2sI<(outs GPR:$dst), (ins GPR:$sp, t2_so_imm:$imm),
@@ -1330,7 +1328,7 @@ defm t2SXTB : T2I_unary_rrot<0b100, "sxtb",
UnOpFrag<(sext_inreg node:$Src, i8)>>;
defm t2SXTH : T2I_unary_rrot<0b000, "sxth",
UnOpFrag<(sext_inreg node:$Src, i16)>>;
-defm t2SXTB16 : T2I_unary_rrot_DO<0b010, "sxtb16">;
+defm t2SXTB16 : T2I_unary_rrot_sxtb16<0b010, "sxtb16">;
defm t2SXTAB : T2I_bin_rrot<0b100, "sxtab",
BinOpFrag<(add node:$LHS, (sext_inreg node:$RHS, i8))>>;
@@ -1347,13 +1345,13 @@ defm t2UXTB : T2I_unary_rrot<0b101, "uxtb",
UnOpFrag<(and node:$Src, 0x000000FF)>>;
defm t2UXTH : T2I_unary_rrot<0b001, "uxth",
UnOpFrag<(and node:$Src, 0x0000FFFF)>>;
-defm t2UXTB16 : T2I_unary_rrot_nw<0b011, "uxtb16",
+defm t2UXTB16 : T2I_unary_rrot_uxtb16<0b011, "uxtb16",
UnOpFrag<(and node:$Src, 0x00FF00FF)>>;
def : T2Pat<(and (shl GPR:$Src, (i32 8)), 0xFF00FF),
- (t2UXTB16r_rot GPR:$Src, 24)>;
+ (t2UXTB16r_rot GPR:$Src, 24)>, Requires<[HasT2ExtractPack]>;
def : T2Pat<(and (srl GPR:$Src, (i32 8)), 0xFF00FF),
- (t2UXTB16r_rot GPR:$Src, 8)>;
+ (t2UXTB16r_rot GPR:$Src, 8)>, Requires<[HasT2ExtractPack]>;
defm t2UXTAB : T2I_bin_rrot<0b101, "uxtab",
BinOpFrag<(add node:$LHS, (and node:$RHS, 0x00FF))>>;
@@ -2389,37 +2387,36 @@ let isCall = 1,
// except for our own input by listing the relevant registers in Defs. By
// doing so, we also cause the prologue/epilogue code to actively preserve
// all of the callee-saved resgisters, which is exactly what we want.
-// The current SP is passed in $val, and we reuse the reg as a scratch.
+// $val is a scratch register for our use.
let Defs =
[ R0, R1, R2, R3, R4, R5, R6, R7, R8, R9, R10, R11, R12, LR, D0,
D1, D2, D3, D4, D5, D6, D7, D8, D9, D10, D11, D12, D13, D14, D15,
D16, D17, D18, D19, D20, D21, D22, D23, D24, D25, D26, D27, D28, D29, D30,
- D31 ] in {
+ D31 ], hasSideEffects = 1, isBarrier = 1 in {
def t2Int_eh_sjlj_setjmp : Thumb2XI<(outs), (ins GPR:$src, tGPR:$val),
AddrModeNone, SizeSpecial, NoItinerary,
- "str\t$val, [$src, #8]\t${:comment} begin eh.setjmp\n"
- "\tmov\t$val, pc\n"
- "\tadds\t$val, #7\n"
- "\tstr\t$val, [$src, #4]\n"
- "\tmovs\tr0, #0\n"
- "\tb\t1f\n"
- "\tmovs\tr0, #1\t${:comment} end eh.setjmp\n"
+ "mov\t$val, pc\t${:comment} begin eh.setjmp\n\t"
+ "adds\t$val, #7\n\t"
+ "str\t$val, [$src, #4]\n\t"
+ "movs\tr0, #0\n\t"
+ "b\t1f\n\t"
+ "movs\tr0, #1\t${:comment} end eh.setjmp\n\t"
"1:", "",
[(set R0, (ARMeh_sjlj_setjmp GPR:$src, tGPR:$val))]>,
Requires<[IsThumb2, HasVFP2]>;
}
let Defs =
- [ R0, R1, R2, R3, R4, R5, R6, R7, R8, R9, R10, R11, R12, LR ] in {
+ [ R0, R1, R2, R3, R4, R5, R6, R7, R8, R9, R10, R11, R12, LR ],
+ hasSideEffects = 1, isBarrier = 1 in {
def t2Int_eh_sjlj_setjmp_nofp : Thumb2XI<(outs), (ins GPR:$src, tGPR:$val),
AddrModeNone, SizeSpecial, NoItinerary,
- "str\t$val, [$src, #8]\t${:comment} begin eh.setjmp\n"
- "\tmov\t$val, pc\n"
- "\tadds\t$val, #7\n"
- "\tstr\t$val, [$src, #4]\n"
- "\tmovs\tr0, #0\n"
- "\tb\t1f\n"
- "\tmovs\tr0, #1\t${:comment} end eh.setjmp\n"
+ "mov\t$val, pc\t${:comment} begin eh.setjmp\n\t"
+ "adds\t$val, #7\n\t"
+ "str\t$val, [$src, #4]\n\t"
+ "movs\tr0, #0\n\t"
+ "b\t1f\n\t"
+ "movs\tr0, #1\t${:comment} end eh.setjmp\n\t"
"1:", "",
[(set R0, (ARMeh_sjlj_setjmp GPR:$src, tGPR:$val))]>,
Requires<[IsThumb2, NoVFP]>;
@@ -2529,6 +2526,7 @@ def t2Bcc : T2I<(outs), (ins brtarget:$target), IIC_Br,
// IT block
+let Defs = [ITSTATE] in
def t2IT : Thumb2XI<(outs), (ins it_pred:$cc, it_mask:$mask),
AddrModeNone, Size2Bytes, IIC_iALUx,
"it$mask\t$cc", "", []> {
@@ -2691,7 +2689,8 @@ def : T2Pat<(ARMWrapperJT tjumptable:$dst, imm:$id),
// scheduling.
let canFoldAsLoad = 1, isReMaterializable = 1 in
def t2LDRpci_pic : PseudoInst<(outs GPR:$dst), (ins i32imm:$addr, pclabel:$cp),
- NoItinerary, "${:comment} ldr.w\t$dst, $addr\n$cp:\n\tadd\t$dst, pc",
+ NoItinerary,
+ "${:comment} ldr.w\t$dst, $addr\n$cp:\n\tadd\t$dst, pc",
[(set GPR:$dst, (ARMpic_add (load (ARMWrapper tconstpool:$addr)),
imm:$cp))]>,
Requires<[IsThumb2]>;
diff --git a/lib/Target/ARM/ARMInstrVFP.td b/lib/Target/ARM/ARMInstrVFP.td
index 54474cfe2e29..84c23e1a784c 100644
--- a/lib/Target/ARM/ARMInstrVFP.td
+++ b/lib/Target/ARM/ARMInstrVFP.td
@@ -255,25 +255,25 @@ def VCVTSD : VFPAI<(outs SPR:$dst), (ins DPR:$a), VFPUnaryFrm,
// Between half-precision and single-precision. For disassembly only.
-def VCVTBSH : ASuI<0b11101, 0b11, 0b0010, 0b01, 0, (outs SPR:$dst), (ins SPR:$a),
+def VCVTBSH: ASuI<0b11101, 0b11, 0b0010, 0b01, 0, (outs SPR:$dst), (ins SPR:$a),
/* FIXME */ IIC_fpCVTSH, "vcvtb", ".f32.f16\t$dst, $a",
[/* For disassembly only; pattern left blank */]>;
def : ARMPat<(f32_to_f16 SPR:$a),
(i32 (COPY_TO_REGCLASS (VCVTBSH SPR:$a), GPR))>;
-def VCVTBHS : ASuI<0b11101, 0b11, 0b0011, 0b01, 0, (outs SPR:$dst), (ins SPR:$a),
+def VCVTBHS: ASuI<0b11101, 0b11, 0b0011, 0b01, 0, (outs SPR:$dst), (ins SPR:$a),
/* FIXME */ IIC_fpCVTHS, "vcvtb", ".f16.f32\t$dst, $a",
[/* For disassembly only; pattern left blank */]>;
def : ARMPat<(f16_to_f32 GPR:$a),
(VCVTBHS (COPY_TO_REGCLASS GPR:$a, SPR))>;
-def VCVTTSH : ASuI<0b11101, 0b11, 0b0010, 0b11, 0, (outs SPR:$dst), (ins SPR:$a),
+def VCVTTSH: ASuI<0b11101, 0b11, 0b0010, 0b11, 0, (outs SPR:$dst), (ins SPR:$a),
/* FIXME */ IIC_fpCVTSH, "vcvtt", ".f32.f16\t$dst, $a",
[/* For disassembly only; pattern left blank */]>;
-def VCVTTHS : ASuI<0b11101, 0b11, 0b0011, 0b11, 0, (outs SPR:$dst), (ins SPR:$a),
+def VCVTTHS: ASuI<0b11101, 0b11, 0b0011, 0b11, 0, (outs SPR:$dst), (ins SPR:$a),
/* FIXME */ IIC_fpCVTHS, "vcvtt", ".f16.f32\t$dst, $a",
[/* For disassembly only; pattern left blank */]>;
diff --git a/lib/Target/ARM/ARMJITInfo.h b/lib/Target/ARM/ARMJITInfo.h
index ff332b7ee15b..f5d9effeb9a5 100644
--- a/lib/Target/ARM/ARMJITInfo.h
+++ b/lib/Target/ARM/ARMJITInfo.h
@@ -143,7 +143,8 @@ namespace llvm {
JumpTableId2AddrMap[JTI] = Addr;
}
- /// getPCLabelAddr - Retrieve the address of the PC label of the specified id.
+ /// getPCLabelAddr - Retrieve the address of the PC label of the
+ /// specified id.
intptr_t getPCLabelAddr(unsigned Id) const {
DenseMap<unsigned, intptr_t>::const_iterator I = PCLabelMap.find(Id);
assert(I != PCLabelMap.end());
diff --git a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
index 8585c1e50105..f80e316d23e8 100644
--- a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
+++ b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
@@ -74,11 +74,14 @@ namespace {
private:
struct MemOpQueueEntry {
int Offset;
+ unsigned Reg;
+ bool isKill;
unsigned Position;
MachineBasicBlock::iterator MBBI;
bool Merged;
- MemOpQueueEntry(int o, int p, MachineBasicBlock::iterator i)
- : Offset(o), Position(p), MBBI(i), Merged(false) {}
+ MemOpQueueEntry(int o, unsigned r, bool k, unsigned p,
+ MachineBasicBlock::iterator i)
+ : Offset(o), Reg(r), isKill(k), Position(p), MBBI(i), Merged(false) {}
};
typedef SmallVector<MemOpQueueEntry,8> MemOpQueue;
typedef MemOpQueue::iterator MemOpQueueIter;
@@ -128,30 +131,30 @@ namespace {
static int getLoadStoreMultipleOpcode(int Opcode) {
switch (Opcode) {
case ARM::LDR:
- NumLDMGened++;
+ ++NumLDMGened;
return ARM::LDM;
case ARM::STR:
- NumSTMGened++;
+ ++NumSTMGened;
return ARM::STM;
case ARM::t2LDRi8:
case ARM::t2LDRi12:
- NumLDMGened++;
+ ++NumLDMGened;
return ARM::t2LDM;
case ARM::t2STRi8:
case ARM::t2STRi12:
- NumSTMGened++;
+ ++NumSTMGened;
return ARM::t2STM;
case ARM::VLDRS:
- NumVLDMGened++;
+ ++NumVLDMGened;
return ARM::VLDMS;
case ARM::VSTRS:
- NumVSTMGened++;
+ ++NumVSTMGened;
return ARM::VSTMS;
case ARM::VLDRD:
- NumVLDMGened++;
+ ++NumVLDMGened;
return ARM::VLDMD;
case ARM::VSTRD:
- NumVSTMGened++;
+ ++NumVSTMGened;
return ARM::VSTMD;
default: llvm_unreachable("Unhandled opcode!");
}
@@ -264,45 +267,59 @@ ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB,
// MergeOpsUpdate - call MergeOps and update MemOps and merges accordingly on
// success.
-void ARMLoadStoreOpt::
-MergeOpsUpdate(MachineBasicBlock &MBB,
- MemOpQueue &memOps,
- unsigned memOpsBegin,
- unsigned memOpsEnd,
- unsigned insertAfter,
- int Offset,
- unsigned Base,
- bool BaseKill,
- int Opcode,
- ARMCC::CondCodes Pred,
- unsigned PredReg,
- unsigned Scratch,
- DebugLoc dl,
- SmallVector<MachineBasicBlock::iterator, 4> &Merges) {
+void ARMLoadStoreOpt::MergeOpsUpdate(MachineBasicBlock &MBB,
+ MemOpQueue &memOps,
+ unsigned memOpsBegin, unsigned memOpsEnd,
+ unsigned insertAfter, int Offset,
+ unsigned Base, bool BaseKill,
+ int Opcode,
+ ARMCC::CondCodes Pred, unsigned PredReg,
+ unsigned Scratch,
+ DebugLoc dl,
+ SmallVector<MachineBasicBlock::iterator, 4> &Merges) {
// First calculate which of the registers should be killed by the merged
// instruction.
- SmallVector<std::pair<unsigned, bool>, 8> Regs;
const unsigned insertPos = memOps[insertAfter].Position;
+
+ SmallSet<unsigned, 4> UnavailRegs;
+ SmallSet<unsigned, 4> KilledRegs;
+ DenseMap<unsigned, unsigned> Killer;
+ for (unsigned i = 0; i < memOpsBegin; ++i) {
+ if (memOps[i].Position < insertPos && memOps[i].isKill) {
+ unsigned Reg = memOps[i].Reg;
+ if (memOps[i].Merged)
+ UnavailRegs.insert(Reg);
+ else {
+ KilledRegs.insert(Reg);
+ Killer[Reg] = i;
+ }
+ }
+ }
+ for (unsigned i = memOpsEnd, e = memOps.size(); i != e; ++i) {
+ if (memOps[i].Position < insertPos && memOps[i].isKill) {
+ unsigned Reg = memOps[i].Reg;
+ KilledRegs.insert(Reg);
+ Killer[Reg] = i;
+ }
+ }
+
+ SmallVector<std::pair<unsigned, bool>, 8> Regs;
for (unsigned i = memOpsBegin; i < memOpsEnd; ++i) {
- const MachineOperand &MO = memOps[i].MBBI->getOperand(0);
- unsigned Reg = MO.getReg();
- bool isKill = MO.isKill();
+ unsigned Reg = memOps[i].Reg;
+ if (UnavailRegs.count(Reg))
+ // Register is killed before and it's not easy / possible to update the
+ // kill marker on already merged instructions. Abort.
+ return;
// If we are inserting the merged operation after an unmerged operation that
// uses the same register, make sure to transfer any kill flag.
- for (unsigned j = memOpsEnd, e = memOps.size(); !isKill && j != e; ++j)
- if (memOps[j].Position<insertPos) {
- const MachineOperand &MOJ = memOps[j].MBBI->getOperand(0);
- if (MOJ.getReg() == Reg && MOJ.isKill())
- isKill = true;
- }
-
+ bool isKill = memOps[i].isKill || KilledRegs.count(Reg);
Regs.push_back(std::make_pair(Reg, isKill));
}
// Try to do the merge.
MachineBasicBlock::iterator Loc = memOps[insertAfter].MBBI;
- Loc++;
+ ++Loc;
if (!MergeOps(MBB, Loc, Offset, Base, BaseKill, Opcode,
Pred, PredReg, Scratch, dl, Regs))
return;
@@ -311,13 +328,13 @@ MergeOpsUpdate(MachineBasicBlock &MBB,
Merges.push_back(prior(Loc));
for (unsigned i = memOpsBegin; i < memOpsEnd; ++i) {
// Remove kill flags from any unmerged memops that come before insertPos.
- if (Regs[i-memOpsBegin].second)
- for (unsigned j = memOpsEnd, e = memOps.size(); j != e; ++j)
- if (memOps[j].Position<insertPos) {
- MachineOperand &MOJ = memOps[j].MBBI->getOperand(0);
- if (MOJ.getReg() == Regs[i-memOpsBegin].first && MOJ.isKill())
- MOJ.setIsKill(false);
- }
+ if (Regs[i-memOpsBegin].second) {
+ unsigned Reg = Regs[i-memOpsBegin].first;
+ if (KilledRegs.count(Reg)) {
+ unsigned j = Killer[Reg];
+ memOps[j].MBBI->getOperand(0).setIsKill(false);
+ }
+ }
MBB.erase(memOps[i].MBBI);
memOps[i].Merged = true;
}
@@ -517,8 +534,11 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB,
}
// Try merging with the previous instruction.
- if (MBBI != MBB.begin()) {
+ MachineBasicBlock::iterator BeginMBBI = MBB.begin();
+ if (MBBI != BeginMBBI) {
MachineBasicBlock::iterator PrevMBBI = prior(MBBI);
+ while (PrevMBBI != BeginMBBI && PrevMBBI->isDebugValue())
+ --PrevMBBI;
if (isAM4) {
if (Mode == ARM_AM::ia &&
isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) {
@@ -541,8 +561,11 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB,
}
// Try merging with the next instruction.
- if (!DoMerge && MBBI != MBB.end()) {
+ MachineBasicBlock::iterator EndMBBI = MBB.end();
+ if (!DoMerge && MBBI != EndMBBI) {
MachineBasicBlock::iterator NextMBBI = llvm::next(MBBI);
+ while (NextMBBI != EndMBBI && NextMBBI->isDebugValue())
+ ++NextMBBI;
if (isAM4) {
if ((Mode == ARM_AM::ia || Mode == ARM_AM::ib) &&
isMatchingIncrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) {
@@ -669,8 +692,11 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineBasicBlock &MBB,
unsigned Limit = isAM5 ? 0 : (isAM2 ? 0x1000 : 0x100);
// Try merging with the previous instruction.
- if (MBBI != MBB.begin()) {
+ MachineBasicBlock::iterator BeginMBBI = MBB.begin();
+ if (MBBI != BeginMBBI) {
MachineBasicBlock::iterator PrevMBBI = prior(MBBI);
+ while (PrevMBBI != BeginMBBI && PrevMBBI->isDebugValue())
+ --PrevMBBI;
if (isMatchingDecrement(PrevMBBI, Base, Bytes, Limit, Pred, PredReg)) {
DoMerge = true;
AddSub = ARM_AM::sub;
@@ -685,8 +711,11 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineBasicBlock &MBB,
}
// Try merging with the next instruction.
- if (!DoMerge && MBBI != MBB.end()) {
+ MachineBasicBlock::iterator EndMBBI = MBB.end();
+ if (!DoMerge && MBBI != EndMBBI) {
MachineBasicBlock::iterator NextMBBI = llvm::next(MBBI);
+ while (NextMBBI != EndMBBI && NextMBBI->isDebugValue())
+ ++NextMBBI;
if (!isAM5 &&
isMatchingDecrement(NextMBBI, Base, Bytes, Limit, Pred, PredReg)) {
DoMerge = true;
@@ -759,18 +788,21 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineBasicBlock &MBB,
/// isMemoryOp - Returns true if instruction is a memory operations (that this
/// pass is capable of operating on).
static bool isMemoryOp(const MachineInstr *MI) {
- if (MI->hasOneMemOperand()) {
- const MachineMemOperand *MMO = *MI->memoperands_begin();
+ // When no memory operands are present, conservatively assume unaligned,
+ // volatile, unfoldable.
+ if (!MI->hasOneMemOperand())
+ return false;
- // Don't touch volatile memory accesses - we may be changing their order.
- if (MMO->isVolatile())
- return false;
+ const MachineMemOperand *MMO = *MI->memoperands_begin();
- // Unaligned ldr/str is emulated by some kernels, but unaligned ldm/stm is
- // not.
- if (MMO->getAlignment() < 4)
- return false;
- }
+ // Don't touch volatile memory accesses - we may be changing their order.
+ if (MMO->isVolatile())
+ return false;
+
+ // Unaligned ldr/str is emulated by some kernels, but unaligned ldm/stm is
+ // not.
+ if (MMO->getAlignment() < 4)
+ return false;
// str <undef> could probably be eliminated entirely, but for now we just want
// to avoid making a mess of it.
@@ -898,6 +930,7 @@ bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB,
if ((EvenRegNum & 1) == 0 && (EvenRegNum + 1) == OddRegNum)
return false;
+ MachineBasicBlock::iterator NewBBI = MBBI;
bool isT2 = Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8;
bool isLd = Opcode == ARM::LDRD || Opcode == ARM::t2LDRDi8;
bool EvenDeadKill = isLd ?
@@ -942,6 +975,7 @@ bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB,
getKillRegState(OddDeadKill) | getUndefRegState(OddUndef));
++NumSTRD2STM;
}
+ NewBBI = llvm::prior(MBBI);
} else {
// Split into two instructions.
assert((!isT2 || !OffReg) &&
@@ -962,14 +996,15 @@ bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB,
OddReg, OddDeadKill, false,
BaseReg, false, BaseUndef, OffReg, false, OffUndef,
Pred, PredReg, TII, isT2);
+ NewBBI = llvm::prior(MBBI);
InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc,
EvenReg, EvenDeadKill, false,
BaseReg, BaseKill, BaseUndef, OffReg, OffKill, OffUndef,
Pred, PredReg, TII, isT2);
} else {
if (OddReg == EvenReg && EvenDeadKill) {
- // If the two source operands are the same, the kill marker is probably
- // on the first one. e.g.
+ // If the two source operands are the same, the kill marker is
+ // probably on the first one. e.g.
// t2STRDi8 %R5<kill>, %R5, %R9<kill>, 0, 14, %reg0
EvenDeadKill = false;
OddDeadKill = true;
@@ -978,6 +1013,7 @@ bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB,
EvenReg, EvenDeadKill, EvenUndef,
BaseReg, false, BaseUndef, OffReg, false, OffUndef,
Pred, PredReg, TII, isT2);
+ NewBBI = llvm::prior(MBBI);
InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc,
OddReg, OddDeadKill, OddUndef,
BaseReg, BaseKill, BaseUndef, OffReg, OffKill, OffUndef,
@@ -989,8 +1025,9 @@ bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB,
++NumSTRD2STR;
}
- MBBI = prior(MBBI);
MBB.erase(MI);
+ MBBI = NewBBI;
+ return true;
}
return false;
}
@@ -1023,6 +1060,9 @@ bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) {
if (isMemOp) {
int Opcode = MBBI->getOpcode();
unsigned Size = getLSMultipleTransferSize(MBBI);
+ const MachineOperand &MO = MBBI->getOperand(0);
+ unsigned Reg = MO.getReg();
+ bool isKill = MO.isDef() ? false : MO.isKill();
unsigned Base = MBBI->getOperand(1).getReg();
unsigned PredReg = 0;
ARMCC::CondCodes Pred = llvm::getInstrPredicate(MBBI, PredReg);
@@ -1044,8 +1084,8 @@ bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) {
CurrSize = Size;
CurrPred = Pred;
CurrPredReg = PredReg;
- MemOps.push_back(MemOpQueueEntry(Offset, Position, MBBI));
- NumMemOps++;
+ MemOps.push_back(MemOpQueueEntry(Offset, Reg, isKill, Position, MBBI));
+ ++NumMemOps;
Advance = true;
} else {
if (Clobber) {
@@ -1057,15 +1097,17 @@ bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) {
// No need to match PredReg.
// Continue adding to the queue.
if (Offset > MemOps.back().Offset) {
- MemOps.push_back(MemOpQueueEntry(Offset, Position, MBBI));
- NumMemOps++;
+ MemOps.push_back(MemOpQueueEntry(Offset, Reg, isKill,
+ Position, MBBI));
+ ++NumMemOps;
Advance = true;
} else {
for (MemOpQueueIter I = MemOps.begin(), E = MemOps.end();
I != E; ++I) {
if (Offset < I->Offset) {
- MemOps.insert(I, MemOpQueueEntry(Offset, Position, MBBI));
- NumMemOps++;
+ MemOps.insert(I, MemOpQueueEntry(Offset, Reg, isKill,
+ Position, MBBI));
+ ++NumMemOps;
Advance = true;
break;
} else if (Offset == I->Offset) {
@@ -1078,7 +1120,12 @@ bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) {
}
}
- if (Advance) {
+ if (MBBI->isDebugValue()) {
+ ++MBBI;
+ if (MBBI == E)
+ // Reach the end of the block, try merging the memory instructions.
+ TryMerge = true;
+ } else if (Advance) {
++Position;
++MBBI;
if (MBBI == E)
@@ -1279,7 +1326,7 @@ static bool IsSafeAndProfitableToMove(bool isLd, unsigned Base,
// some day.
SmallSet<unsigned, 4> AddedRegPressure;
while (++I != E) {
- if (MemOps.count(&*I))
+ if (I->isDebugValue() || MemOps.count(&*I))
continue;
const TargetInstrDesc &TID = I->getDesc();
if (TID.isCall() || TID.isTerminator() || TID.hasUnmodeledSideEffects())
@@ -1411,7 +1458,7 @@ bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB,
std::sort(Ops.begin(), Ops.end(), OffsetCompare());
// The loads / stores of the same base are in order. Scan them from first to
- // last and check for the followins:
+ // last and check for the following:
// 1. Any def of base.
// 2. Any gaps.
while (Ops.size() > 1) {
@@ -1474,7 +1521,8 @@ bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB,
} else {
// This is the new location for the loads / stores.
MachineBasicBlock::iterator InsertPos = isLd ? FirstOp : LastOp;
- while (InsertPos != MBB->end() && MemOps.count(InsertPos))
+ while (InsertPos != MBB->end()
+ && (MemOps.count(InsertPos) || InsertPos->isDebugValue()))
++InsertPos;
// If we are moving a pair of loads / stores, see if it makes sense
@@ -1562,7 +1610,9 @@ ARMPreAllocLoadStoreOpt::RescheduleLoadStoreInstrs(MachineBasicBlock *MBB) {
break;
}
- MI2LocMap[MI] = Loc++;
+ if (!MI->isDebugValue())
+ MI2LocMap[MI] = ++Loc;
+
if (!isMemoryOp(MI))
continue;
unsigned PredReg = 0;
diff --git a/lib/Target/ARM/ARMMachineFunctionInfo.h b/lib/Target/ARM/ARMMachineFunctionInfo.h
index 013427635592..7e57a1ca5576 100644
--- a/lib/Target/ARM/ARMMachineFunctionInfo.h
+++ b/lib/Target/ARM/ARMMachineFunctionInfo.h
@@ -88,6 +88,9 @@ class ARMFunctionInfo : public MachineFunctionInfo {
/// VarArgsFrameIndex - FrameIndex for start of varargs area.
int VarArgsFrameIndex;
+ /// HasITBlocks - True if IT blocks have been inserted.
+ bool HasITBlocks;
+
public:
ARMFunctionInfo() :
isThumb(false),
@@ -97,7 +100,8 @@ public:
FramePtrSpillOffset(0), GPRCS1Offset(0), GPRCS2Offset(0), DPRCSOffset(0),
GPRCS1Size(0), GPRCS2Size(0), DPRCSSize(0),
GPRCS1Frames(0), GPRCS2Frames(0), DPRCSFrames(0),
- JumpTableUId(0), ConstPoolEntryUId(0), VarArgsFrameIndex(0) {}
+ JumpTableUId(0), ConstPoolEntryUId(0), VarArgsFrameIndex(0),
+ HasITBlocks(false) {}
explicit ARMFunctionInfo(MachineFunction &MF) :
isThumb(MF.getTarget().getSubtarget<ARMSubtarget>().isThumb()),
@@ -108,7 +112,8 @@ public:
GPRCS1Size(0), GPRCS2Size(0), DPRCSSize(0),
GPRCS1Frames(32), GPRCS2Frames(32), DPRCSFrames(32),
SpilledCSRegs(MF.getTarget().getRegisterInfo()->getNumRegs()),
- JumpTableUId(0), ConstPoolEntryUId(0), VarArgsFrameIndex(0) {}
+ JumpTableUId(0), ConstPoolEntryUId(0), VarArgsFrameIndex(0),
+ HasITBlocks(false) {}
bool isThumbFunction() const { return isThumb; }
bool isThumb1OnlyFunction() const { return isThumb && !hasThumb2; }
@@ -229,6 +234,9 @@ public:
int getVarArgsFrameIndex() const { return VarArgsFrameIndex; }
void setVarArgsFrameIndex(int Index) { VarArgsFrameIndex = Index; }
+
+ bool hasITBlocks() const { return HasITBlocks; }
+ void setHasITBlocks(bool h) { HasITBlocks = h; }
};
} // End llvm namespace
diff --git a/lib/Target/ARM/ARMRegisterInfo.td b/lib/Target/ARM/ARMRegisterInfo.td
index 6beca8b9199b..d020f3c74bde 100644
--- a/lib/Target/ARM/ARMRegisterInfo.td
+++ b/lib/Target/ARM/ARMRegisterInfo.td
@@ -153,11 +153,11 @@ def Q15 : ARMReg<15, "q15", [D30, D31]>;
// Pseudo 256-bit registers to represent pairs of Q registers. These should
// never be present in the emitted code.
-// These are used for NEON load / store instructions, e.g. vld4, vst3.
-// NOTE: It's possible to define more QQ registers since technical the
-// starting D register number doesn't have to be multiple of 4. e.g.
-// D1, D2, D3, D4 would be a legal quad. But that would make the sub-register
-// stuffs very messy.
+// These are used for NEON load / store instructions, e.g., vld4, vst3.
+// NOTE: It's possible to define more QQ registers since technically the
+// starting D register number doesn't have to be multiple of 4, e.g.,
+// D1, D2, D3, D4 would be a legal quad, but that would make the subregister
+// stuff very messy.
let SubRegIndices = [qsub_0, qsub_1] in {
let CompositeIndices = [(dsub_2 qsub_1, dsub_0), (dsub_3 qsub_1, dsub_1),
(ssub_4 qsub_1, ssub_0), (ssub_5 qsub_1, ssub_1),
@@ -183,7 +183,8 @@ let CompositeIndices = [(qsub_2 qqsub_1, qsub_0), (qsub_3 qqsub_1, qsub_1),
(ssub_8 qqsub_1, ssub_0), (ssub_9 qqsub_1, ssub_1),
(ssub_10 qqsub_1, ssub_2), (ssub_11 qqsub_1, ssub_3),
(ssub_12 qqsub_1, ssub_4), (ssub_13 qqsub_1, ssub_5),
- (ssub_14 qqsub_1, ssub_6), (ssub_15 qqsub_1, ssub_7)] in {
+ (ssub_14 qqsub_1, ssub_6), (ssub_15 qqsub_1, ssub_7)] in
+{
def QQQQ0 : ARMReg<0, "qqqq0", [QQ0, QQ1]>;
def QQQQ1 : ARMReg<1, "qqqq1", [QQ2, QQ3]>;
}
@@ -196,9 +197,9 @@ def QQQQ3 : ARMReg<3, "qqqq3", [QQ6, QQ7]>;
}
// Current Program Status Register.
-def CPSR : ARMReg<0, "cpsr">;
-
-def FPSCR : ARMReg<1, "fpscr">;
+def CPSR : ARMReg<0, "cpsr">;
+def FPSCR : ARMReg<1, "fpscr">;
+def ITSTATE : ARMReg<2, "itstate">;
// Register classes.
//
@@ -348,6 +349,73 @@ def tGPR : RegisterClass<"ARM", [i32], 32, [R0, R1, R2, R3, R4, R5, R6, R7]> {
}];
}
+// For tail calls, we can't use callee-saved registers, as they are restored
+// to the saved value before the tail call, which would clobber a call address.
+// Note, getMinimalPhysRegClass(R0) returns tGPR because of the names of
+// this class and the preceding one(!) This is what we want.
+def tcGPR : RegisterClass<"ARM", [i32], 32, [R0, R1, R2, R3, R9, R12]> {
+ let MethodProtos = [{
+ iterator allocation_order_begin(const MachineFunction &MF) const;
+ iterator allocation_order_end(const MachineFunction &MF) const;
+ }];
+ let MethodBodies = [{
+ // R9 is available.
+ static const unsigned ARM_GPR_R9_TC[] = {
+ ARM::R0, ARM::R1, ARM::R2, ARM::R3,
+ ARM::R9, ARM::R12 };
+ // R9 is not available.
+ static const unsigned ARM_GPR_NOR9_TC[] = {
+ ARM::R0, ARM::R1, ARM::R2, ARM::R3,
+ ARM::R12 };
+
+ // For Thumb1 mode, we don't want to allocate hi regs at all, as we
+ // don't know how to spill them. If we make our prologue/epilogue code
+ // smarter at some point, we can go back to using the above allocation
+ // orders for the Thumb1 instructions that know how to use hi regs.
+ static const unsigned THUMB_GPR_AO_TC[] = {
+ ARM::R0, ARM::R1, ARM::R2, ARM::R3 };
+
+ tcGPRClass::iterator
+ tcGPRClass::allocation_order_begin(const MachineFunction &MF) const {
+ const TargetMachine &TM = MF.getTarget();
+ const ARMSubtarget &Subtarget = TM.getSubtarget<ARMSubtarget>();
+ if (Subtarget.isThumb1Only())
+ return THUMB_GPR_AO_TC;
+ if (Subtarget.isTargetDarwin()) {
+ if (Subtarget.isR9Reserved())
+ return ARM_GPR_NOR9_TC;
+ else
+ return ARM_GPR_R9_TC;
+ } else
+ // R9 is either callee-saved or reserved; can't use it.
+ return ARM_GPR_NOR9_TC;
+ }
+
+ tcGPRClass::iterator
+ tcGPRClass::allocation_order_end(const MachineFunction &MF) const {
+ const TargetMachine &TM = MF.getTarget();
+ const ARMSubtarget &Subtarget = TM.getSubtarget<ARMSubtarget>();
+ GPRClass::iterator I;
+
+ if (Subtarget.isThumb1Only()) {
+ I = THUMB_GPR_AO_TC + (sizeof(THUMB_GPR_AO_TC)/sizeof(unsigned));
+ return I;
+ }
+
+ if (Subtarget.isTargetDarwin()) {
+ if (Subtarget.isR9Reserved())
+ I = ARM_GPR_NOR9_TC + (sizeof(ARM_GPR_NOR9_TC)/sizeof(unsigned));
+ else
+ I = ARM_GPR_R9_TC + (sizeof(ARM_GPR_R9_TC)/sizeof(unsigned));
+ } else
+ // R9 is either callee-saved or reserved; can't use it.
+ I = ARM_GPR_NOR9_TC + (sizeof(ARM_GPR_NOR9_TC)/sizeof(unsigned));
+ return I;
+ }
+ }];
+}
+
+
// Scalar single precision floating point register class..
def SPR : RegisterClass<"ARM", [f32], 32, [S0, S1, S2, S3, S4, S5, S6, S7, S8,
S9, S10, S11, S12, S13, S14, S15, S16, S17, S18, S19, S20, S21, S22,
@@ -479,4 +547,3 @@ def QQQQPR : RegisterClass<"ARM", [v8i64],
// Condition code registers.
def CCR : RegisterClass<"ARM", [i32], 32, [CPSR]>;
-
diff --git a/lib/Target/ARM/ARMScheduleA8.td b/lib/Target/ARM/ARMScheduleA8.td
index bbfc0b2b4744..282abca98803 100644
--- a/lib/Target/ARM/ARMScheduleA8.td
+++ b/lib/Target/ARM/ARMScheduleA8.td
@@ -1,10 +1,10 @@
//=- ARMScheduleA8.td - ARM Cortex-A8 Scheduling Definitions -*- tablegen -*-=//
-//
+//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
-//
+//
//===----------------------------------------------------------------------===//
//
// This file defines the itinerary class data for the ARM Cortex A8 processors.
@@ -32,50 +32,50 @@ def CortexA8Itineraries : ProcessorItineraries<
InstrItinData<IIC_iALUx , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>]>,
//
// Binary Instructions that produce a result
- InstrItinData<IIC_iALUi , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2]>,
- InstrItinData<IIC_iALUr , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2, 2]>,
- InstrItinData<IIC_iALUsi , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2, 1]>,
- InstrItinData<IIC_iALUsr , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2, 1, 1]>,
+ InstrItinData<IIC_iALUi ,[InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2]>,
+ InstrItinData<IIC_iALUr ,[InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2, 2]>,
+ InstrItinData<IIC_iALUsi,[InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2, 1]>,
+ InstrItinData<IIC_iALUsr,[InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2, 1, 1]>,
//
// Unary Instructions that produce a result
- InstrItinData<IIC_iUNAr , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2]>,
- InstrItinData<IIC_iUNAsi , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 1]>,
- InstrItinData<IIC_iUNAsr , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 1, 1]>,
+ InstrItinData<IIC_iUNAr , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2]>,
+ InstrItinData<IIC_iUNAsi, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 1]>,
+ InstrItinData<IIC_iUNAsr, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 1, 1]>,
//
// Compare instructions
- InstrItinData<IIC_iCMPi , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2]>,
- InstrItinData<IIC_iCMPr , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2]>,
- InstrItinData<IIC_iCMPsi , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 1]>,
- InstrItinData<IIC_iCMPsr , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 1, 1]>,
+ InstrItinData<IIC_iCMPi , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2]>,
+ InstrItinData<IIC_iCMPr , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2]>,
+ InstrItinData<IIC_iCMPsi, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 1]>,
+ InstrItinData<IIC_iCMPsr, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 1, 1]>,
//
// Move instructions, unconditional
- InstrItinData<IIC_iMOVi , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [1]>,
- InstrItinData<IIC_iMOVr , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [1, 1]>,
- InstrItinData<IIC_iMOVsi , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [1, 1]>,
- InstrItinData<IIC_iMOVsr , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [1, 1, 1]>,
+ InstrItinData<IIC_iMOVi , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [1]>,
+ InstrItinData<IIC_iMOVr , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [1, 1]>,
+ InstrItinData<IIC_iMOVsi, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [1, 1]>,
+ InstrItinData<IIC_iMOVsr, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [1, 1, 1]>,
//
// Move instructions, conditional
- InstrItinData<IIC_iCMOVi , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2]>,
- InstrItinData<IIC_iCMOVr , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 1]>,
- InstrItinData<IIC_iCMOVsi , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 1]>,
- InstrItinData<IIC_iCMOVsr , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 1, 1]>,
+ InstrItinData<IIC_iCMOVi , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2]>,
+ InstrItinData<IIC_iCMOVr , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 1]>,
+ InstrItinData<IIC_iCMOVsi, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 1]>,
+ InstrItinData<IIC_iCMOVsr, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 1, 1]>,
// Integer multiply pipeline
// Result written in E5, but that is relative to the last cycle of multicycle,
// so we use 6 for those cases
//
InstrItinData<IIC_iMUL16 , [InstrStage<1, [A8_Pipe0]>], [5, 1, 1]>,
- InstrItinData<IIC_iMAC16 , [InstrStage<1, [A8_Pipe1], 0>,
+ InstrItinData<IIC_iMAC16 , [InstrStage<1, [A8_Pipe1], 0>,
InstrStage<2, [A8_Pipe0]>], [6, 1, 1, 4]>,
- InstrItinData<IIC_iMUL32 , [InstrStage<1, [A8_Pipe1], 0>,
+ InstrItinData<IIC_iMUL32 , [InstrStage<1, [A8_Pipe1], 0>,
InstrStage<2, [A8_Pipe0]>], [6, 1, 1]>,
- InstrItinData<IIC_iMAC32 , [InstrStage<1, [A8_Pipe1], 0>,
+ InstrItinData<IIC_iMAC32 , [InstrStage<1, [A8_Pipe1], 0>,
InstrStage<2, [A8_Pipe0]>], [6, 1, 1, 4]>,
- InstrItinData<IIC_iMUL64 , [InstrStage<2, [A8_Pipe1], 0>,
+ InstrItinData<IIC_iMUL64 , [InstrStage<2, [A8_Pipe1], 0>,
InstrStage<3, [A8_Pipe0]>], [6, 6, 1, 1]>,
- InstrItinData<IIC_iMAC64 , [InstrStage<2, [A8_Pipe1], 0>,
+ InstrItinData<IIC_iMAC64 , [InstrStage<2, [A8_Pipe1], 0>,
InstrStage<3, [A8_Pipe0]>], [6, 6, 1, 1]>,
-
+
// Integer load pipeline
//
// loads have an extra cycle of latency, but are fully pipelined
@@ -166,7 +166,7 @@ def CortexA8Itineraries : ProcessorItineraries<
InstrStage<2, [A8_Pipe1]>,
InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
InstrStage<1, [A8_LdSt0]>]>,
-
+
// Branch
//
// no delay slots, so the latency of a branch is unimportant
@@ -276,14 +276,14 @@ def CortexA8Itineraries : ProcessorItineraries<
//
// Single-precision FP Load
// use A8_Issue to enforce the 1 load/store per cycle limit
- InstrItinData<IIC_fpLoad32, [InstrStage<1, [A8_Issue], 0>,
+ InstrItinData<IIC_fpLoad32, [InstrStage<1, [A8_Issue], 0>,
InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
InstrStage<1, [A8_LdSt0], 0>,
InstrStage<1, [A8_NLSPipe]>]>,
//
// Double-precision FP Load
// use A8_Issue to enforce the 1 load/store per cycle limit
- InstrItinData<IIC_fpLoad64, [InstrStage<2, [A8_Issue], 0>,
+ InstrItinData<IIC_fpLoad64, [InstrStage<2, [A8_Issue], 0>,
InstrStage<1, [A8_Pipe0], 0>,
InstrStage<1, [A8_Pipe1]>,
InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
@@ -292,7 +292,7 @@ def CortexA8Itineraries : ProcessorItineraries<
//
// FP Load Multiple
// use A8_Issue to enforce the 1 load/store per cycle limit
- InstrItinData<IIC_fpLoadm, [InstrStage<3, [A8_Issue], 0>,
+ InstrItinData<IIC_fpLoadm, [InstrStage<3, [A8_Issue], 0>,
InstrStage<2, [A8_Pipe0], 0>,
InstrStage<2, [A8_Pipe1]>,
InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
@@ -301,14 +301,14 @@ def CortexA8Itineraries : ProcessorItineraries<
//
// Single-precision FP Store
// use A8_Issue to enforce the 1 load/store per cycle limit
- InstrItinData<IIC_fpStore32,[InstrStage<1, [A8_Issue], 0>,
+ InstrItinData<IIC_fpStore32,[InstrStage<1, [A8_Issue], 0>,
InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
InstrStage<1, [A8_LdSt0], 0>,
InstrStage<1, [A8_NLSPipe]>]>,
//
// Double-precision FP Store
// use A8_Issue to enforce the 1 load/store per cycle limit
- InstrItinData<IIC_fpStore64,[InstrStage<2, [A8_Issue], 0>,
+ InstrItinData<IIC_fpStore64,[InstrStage<2, [A8_Issue], 0>,
InstrStage<1, [A8_Pipe0], 0>,
InstrStage<1, [A8_Pipe1]>,
InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
@@ -317,7 +317,7 @@ def CortexA8Itineraries : ProcessorItineraries<
//
// FP Store Multiple
// use A8_Issue to enforce the 1 load/store per cycle limit
- InstrItinData<IIC_fpStorem, [InstrStage<3, [A8_Issue], 0>,
+ InstrItinData<IIC_fpStorem, [InstrStage<3, [A8_Issue], 0>,
InstrStage<2, [A8_Pipe0], 0>,
InstrStage<2, [A8_Pipe1]>,
InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
@@ -329,35 +329,35 @@ def CortexA8Itineraries : ProcessorItineraries<
//
// VLD1
// FIXME: We don't model this instruction properly
- InstrItinData<IIC_VLD1, [InstrStage<1, [A8_Issue], 0>,
+ InstrItinData<IIC_VLD1, [InstrStage<1, [A8_Issue], 0>,
InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
InstrStage<1, [A8_LdSt0], 0>,
InstrStage<1, [A8_NLSPipe]>]>,
//
// VLD2
// FIXME: We don't model this instruction properly
- InstrItinData<IIC_VLD2, [InstrStage<1, [A8_Issue], 0>,
+ InstrItinData<IIC_VLD2, [InstrStage<1, [A8_Issue], 0>,
InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
InstrStage<1, [A8_LdSt0], 0>,
InstrStage<1, [A8_NLSPipe]>], [2, 2, 1]>,
//
// VLD3
// FIXME: We don't model this instruction properly
- InstrItinData<IIC_VLD3, [InstrStage<1, [A8_Issue], 0>,
+ InstrItinData<IIC_VLD3, [InstrStage<1, [A8_Issue], 0>,
InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
InstrStage<1, [A8_LdSt0], 0>,
InstrStage<1, [A8_NLSPipe]>], [2, 2, 2, 1]>,
//
// VLD4
// FIXME: We don't model this instruction properly
- InstrItinData<IIC_VLD4, [InstrStage<1, [A8_Issue], 0>,
+ InstrItinData<IIC_VLD4, [InstrStage<1, [A8_Issue], 0>,
InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
InstrStage<1, [A8_LdSt0], 0>,
InstrStage<1, [A8_NLSPipe]>], [2, 2, 2, 2, 1]>,
//
// VST
// FIXME: We don't model this instruction properly
- InstrItinData<IIC_VST, [InstrStage<1, [A8_Issue], 0>,
+ InstrItinData<IIC_VST, [InstrStage<1, [A8_Issue], 0>,
InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
InstrStage<1, [A8_LdSt0], 0>,
InstrStage<1, [A8_NLSPipe]>]>,
@@ -600,7 +600,7 @@ def CortexA8Itineraries : ProcessorItineraries<
InstrItinData<IIC_VTB4, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
InstrStage<1, [A8_NLSPipe]>,
InstrStage<1, [A8_NPipe], 0>,
- InstrStage<2, [A8_NLSPipe]>], [4, 2, 2, 3, 3, 1]>,
+ InstrStage<2, [A8_NLSPipe]>],[4, 2, 2, 3, 3, 1]>,
//
// VTBX
InstrItinData<IIC_VTBX1, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
@@ -610,9 +610,9 @@ def CortexA8Itineraries : ProcessorItineraries<
InstrItinData<IIC_VTBX3, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
InstrStage<1, [A8_NLSPipe]>,
InstrStage<1, [A8_NPipe], 0>,
- InstrStage<2, [A8_NLSPipe]>], [4, 1, 2, 2, 3, 1]>,
+ InstrStage<2, [A8_NLSPipe]>],[4, 1, 2, 2, 3, 1]>,
InstrItinData<IIC_VTBX4, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
InstrStage<1, [A8_NLSPipe]>,
InstrStage<1, [A8_NPipe], 0>,
- InstrStage<2, [A8_NLSPipe]>], [4, 1, 2, 2, 3, 3, 1]>
+ InstrStage<2, [A8_NLSPipe]>], [4, 1, 2, 2, 3, 3, 1]>
]>;
diff --git a/lib/Target/ARM/ARMScheduleA9.td b/lib/Target/ARM/ARMScheduleA9.td
index 75320d929952..df2f896a8d4b 100644
--- a/lib/Target/ARM/ARMScheduleA9.td
+++ b/lib/Target/ARM/ARMScheduleA9.td
@@ -1,10 +1,10 @@
//=- ARMScheduleA9.td - ARM Cortex-A9 Scheduling Definitions -*- tablegen -*-=//
-//
+//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
-//
+//
//===----------------------------------------------------------------------===//
//
// This file defines the itinerary class data for the ARM Cortex A9 processors.
@@ -16,7 +16,6 @@
// Reference Manual".
//
// Functional units
-def A9_Issue : FuncUnit; // issue
def A9_Pipe0 : FuncUnit; // pipeline 0
def A9_Pipe1 : FuncUnit; // pipeline 1
def A9_LSPipe : FuncUnit; // LS pipe
@@ -27,7 +26,121 @@ def A9_DRegsN : FuncUnit; // FP register set, NEON side
// Dual issue pipeline represented by A9_Pipe0 | A9_Pipe1
//
def CortexA9Itineraries : ProcessorItineraries<
- [A9_NPipe, A9_DRegsN, A9_DRegsVFP, A9_LSPipe, A9_Pipe0, A9_Pipe1, A9_Issue], [
+ [A9_NPipe, A9_DRegsN, A9_DRegsVFP, A9_LSPipe, A9_Pipe0, A9_Pipe1], [
+ // Two fully-pipelined integer ALU pipelines
+ // FIXME: There are no operand latencies for these instructions at all!
+ //
+ // Move instructions, unconditional
+ InstrItinData<IIC_iMOVi , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>], [1]>,
+ InstrItinData<IIC_iMOVr , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>], [1, 1]>,
+ InstrItinData<IIC_iMOVsi , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>], [1, 1]>,
+ InstrItinData<IIC_iMOVsr , [InstrStage<2, [A9_Pipe0, A9_Pipe1]>], [2, 2, 1]>,
+ //
+ // No operand cycles
+ InstrItinData<IIC_iALUx , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>]>,
+ //
+ // Binary Instructions that produce a result
+ InstrItinData<IIC_iALUi , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>], [2, 2]>,
+ InstrItinData<IIC_iALUr , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>], [2, 2, 2]>,
+ InstrItinData<IIC_iALUsi, [InstrStage<2, [A9_Pipe0, A9_Pipe1]>], [2, 2, 1]>,
+ InstrItinData<IIC_iALUsr,[InstrStage<3, [A9_Pipe0, A9_Pipe1]>], [2, 2, 1, 1]>,
+ //
+ // Unary Instructions that produce a result
+ InstrItinData<IIC_iUNAr , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>], [2, 2]>,
+ InstrItinData<IIC_iUNAsi , [InstrStage<2, [A9_Pipe0, A9_Pipe1]>], [2, 1]>,
+ InstrItinData<IIC_iUNAsr , [InstrStage<3, [A9_Pipe0, A9_Pipe1]>], [2, 1, 1]>,
+ //
+ // Compare instructions
+ InstrItinData<IIC_iCMPi , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>], [2]>,
+ InstrItinData<IIC_iCMPr , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>], [2, 2]>,
+ InstrItinData<IIC_iCMPsi , [InstrStage<2, [A9_Pipe0, A9_Pipe1]>], [2, 1]>,
+ InstrItinData<IIC_iCMPsr , [InstrStage<3, [A9_Pipe0, A9_Pipe1]>], [2, 1, 1]>,
+ //
+ // Move instructions, conditional
+ InstrItinData<IIC_iCMOVi , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>], [2]>,
+ InstrItinData<IIC_iCMOVr , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>], [2, 1]>,
+ InstrItinData<IIC_iCMOVsi , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>], [2, 1]>,
+ InstrItinData<IIC_iCMOVsr , [InstrStage<2, [A9_Pipe0, A9_Pipe1]>], [2, 1, 1]>,
+
+ // Integer multiply pipeline
+ //
+ InstrItinData<IIC_iMUL16 , [InstrStage<1, [A9_Pipe1], 0>,
+ InstrStage<2, [A9_Pipe0]>], [4, 1, 1]>,
+ InstrItinData<IIC_iMAC16 , [InstrStage<1, [A9_Pipe1], 0>,
+ InstrStage<2, [A9_Pipe0]>], [4, 1, 1, 2]>,
+ InstrItinData<IIC_iMUL32 , [InstrStage<1, [A9_Pipe1], 0>,
+ InstrStage<2, [A9_Pipe0]>], [4, 1, 1]>,
+ InstrItinData<IIC_iMAC32 , [InstrStage<1, [A9_Pipe1], 0>,
+ InstrStage<2, [A9_Pipe0]>], [4, 1, 1, 2]>,
+ InstrItinData<IIC_iMUL64 , [InstrStage<2, [A9_Pipe1], 0>,
+ InstrStage<3, [A9_Pipe0]>], [4, 5, 1, 1]>,
+ InstrItinData<IIC_iMAC64 , [InstrStage<2, [A9_Pipe1], 0>,
+ InstrStage<3, [A9_Pipe0]>], [4, 5, 1, 1]>,
+ // Integer load pipeline
+ // FIXME: The timings are some rough approximations
+ //
+ // Immediate offset
+ InstrItinData<IIC_iLoadi , [InstrStage<1, [A9_Pipe1]>,
+ InstrStage<1, [A9_LSPipe]>], [3, 1]>,
+ //
+ // Register offset
+ InstrItinData<IIC_iLoadr , [InstrStage<1, [A9_Pipe1]>,
+ InstrStage<1, [A9_LSPipe]>], [3, 1, 1]>,
+ //
+ // Scaled register offset
+ InstrItinData<IIC_iLoadsi , [InstrStage<1, [A9_Pipe1]>,
+ InstrStage<2, [A9_LSPipe]>], [4, 1, 1]>,
+ //
+ // Immediate offset with update
+ InstrItinData<IIC_iLoadiu , [InstrStage<1, [A9_Pipe1]>,
+ InstrStage<2, [A9_LSPipe]>], [3, 2, 1]>,
+ //
+ // Register offset with update
+ InstrItinData<IIC_iLoadru , [InstrStage<1, [A9_Pipe1]>,
+ InstrStage<2, [A9_LSPipe]>], [3, 2, 1, 1]>,
+ //
+ // Scaled register offset with update
+ InstrItinData<IIC_iLoadsiu , [InstrStage<1, [A9_Pipe1]>,
+ InstrStage<2, [A9_LSPipe]>], [4, 3, 1, 1]>,
+ //
+ // Load multiple
+ InstrItinData<IIC_iLoadm , [InstrStage<1, [A9_Pipe1]>,
+ InstrStage<1, [A9_LSPipe]>]>,
+
+ // Integer store pipeline
+ ///
+ // Immediate offset
+ InstrItinData<IIC_iStorei , [InstrStage<1, [A9_Pipe1]>,
+ InstrStage<1, [A9_LSPipe]>], [3, 1]>,
+ //
+ // Register offset
+ InstrItinData<IIC_iStorer , [InstrStage<1, [ A9_Pipe1]>,
+ InstrStage<1, [A9_LSPipe]>], [3, 1, 1]>,
+ //
+ // Scaled register offset
+ InstrItinData<IIC_iStoresi , [InstrStage<1, [A9_Pipe1]>,
+ InstrStage<2, [A9_LSPipe]>], [3, 1, 1]>,
+ //
+ // Immediate offset with update
+ InstrItinData<IIC_iStoreiu , [InstrStage<1, [A9_Pipe1]>,
+ InstrStage<1, [A9_LSPipe]>], [2, 3, 1]>,
+ //
+ // Register offset with update
+ InstrItinData<IIC_iStoreru , [InstrStage<1, [A9_Pipe1]>,
+ InstrStage<1, [A9_LSPipe]>], [2, 3, 1, 1]>,
+ //
+ // Scaled register offset with update
+ InstrItinData<IIC_iStoresiu, [InstrStage<1, [A9_Pipe1]>,
+ InstrStage<2, [A9_LSPipe]>], [3, 3, 1, 1]>,
+ //
+ // Store multiple
+ InstrItinData<IIC_iStorem , [InstrStage<1, [A9_Pipe1]>,
+ InstrStage<1, [A9_LSPipe]>]>,
+ // Branch
+ //
+ // no delay slots, so the latency of a branch is unimportant
+ InstrItinData<IIC_Br , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>]>,
+
// VFP and NEON shares the same register file. This means that every VFP
// instruction should wait for full completion of the consecutive NEON
// instruction and vice-versa. We model this behavior with two artificial FUs:
@@ -39,8 +152,8 @@ def CortexA9Itineraries : ProcessorItineraries<
// register file writeback!).
// Every NEON instruction does the same but with FUs swapped.
//
- // Since the reserved FU cannot be acquired this models precisly "cross-domain"
- // stalls.
+ // Since the reserved FU cannot be acquired, this models precisely
+ // "cross-domain" stalls.
// VFP
// Issue through integer pipeline, and execute in NEON unit.
@@ -48,21 +161,21 @@ def CortexA9Itineraries : ProcessorItineraries<
// FP Special Register to Integer Register File Move
InstrItinData<IIC_fpSTAT , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
InstrStage<2, [A9_DRegsN], 0, Reserved>,
- InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+ InstrStage<1, [A9_Pipe1]>,
InstrStage<1, [A9_NPipe]>]>,
//
// Single-precision FP Unary
InstrItinData<IIC_fpUNA32 , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
// Extra latency cycles since wbck is 2 cycles
InstrStage<3, [A9_DRegsN], 0, Reserved>,
- InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+ InstrStage<1, [A9_Pipe1]>,
InstrStage<1, [A9_NPipe]>], [1, 1]>,
//
// Double-precision FP Unary
InstrItinData<IIC_fpUNA64 , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
// Extra latency cycles since wbck is 2 cycles
InstrStage<3, [A9_DRegsN], 0, Reserved>,
- InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+ InstrStage<1, [A9_Pipe1]>,
InstrStage<1, [A9_NPipe]>], [1, 1]>,
//
@@ -70,124 +183,124 @@ def CortexA9Itineraries : ProcessorItineraries<
InstrItinData<IIC_fpCMP32 , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
// Extra latency cycles since wbck is 4 cycles
InstrStage<5, [A9_DRegsN], 0, Reserved>,
- InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+ InstrStage<1, [A9_Pipe1]>,
InstrStage<1, [A9_NPipe]>], [1, 1]>,
//
// Double-precision FP Compare
InstrItinData<IIC_fpCMP64 , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
// Extra latency cycles since wbck is 4 cycles
InstrStage<5, [A9_DRegsN], 0, Reserved>,
- InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+ InstrStage<1, [A9_Pipe1]>,
InstrStage<1, [A9_NPipe]>], [1, 1]>,
//
// Single to Double FP Convert
InstrItinData<IIC_fpCVTSD , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
InstrStage<5, [A9_DRegsN], 0, Reserved>,
- InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+ InstrStage<1, [A9_Pipe1]>,
InstrStage<1, [A9_NPipe]>], [4, 1]>,
//
// Double to Single FP Convert
InstrItinData<IIC_fpCVTDS , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
InstrStage<5, [A9_DRegsN], 0, Reserved>,
- InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+ InstrStage<1, [A9_Pipe1]>,
InstrStage<1, [A9_NPipe]>], [4, 1]>,
//
// Single to Half FP Convert
InstrItinData<IIC_fpCVTSH , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
InstrStage<5, [A9_DRegsN], 0, Reserved>,
- InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+ InstrStage<1, [A9_Pipe1]>,
InstrStage<1, [A9_NPipe]>], [4, 1]>,
//
// Half to Single FP Convert
InstrItinData<IIC_fpCVTHS , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
InstrStage<3, [A9_DRegsN], 0, Reserved>,
- InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+ InstrStage<1, [A9_Pipe1]>,
InstrStage<1, [A9_NPipe]>], [2, 1]>,
//
// Single-Precision FP to Integer Convert
InstrItinData<IIC_fpCVTSI , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
InstrStage<5, [A9_DRegsN], 0, Reserved>,
- InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+ InstrStage<1, [A9_Pipe1]>,
InstrStage<1, [A9_NPipe]>], [4, 1]>,
//
// Double-Precision FP to Integer Convert
InstrItinData<IIC_fpCVTDI , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
InstrStage<5, [A9_DRegsN], 0, Reserved>,
- InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+ InstrStage<1, [A9_Pipe1]>,
InstrStage<1, [A9_NPipe]>], [4, 1]>,
//
// Integer to Single-Precision FP Convert
InstrItinData<IIC_fpCVTIS , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
InstrStage<5, [A9_DRegsN], 0, Reserved>,
- InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+ InstrStage<1, [A9_Pipe1]>,
InstrStage<1, [A9_NPipe]>], [4, 1]>,
//
// Integer to Double-Precision FP Convert
InstrItinData<IIC_fpCVTID , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
InstrStage<5, [A9_DRegsN], 0, Reserved>,
- InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+ InstrStage<1, [A9_Pipe1]>,
InstrStage<1, [A9_NPipe]>], [4, 1]>,
//
// Single-precision FP ALU
InstrItinData<IIC_fpALU32 , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
InstrStage<5, [A9_DRegsN], 0, Reserved>,
- InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+ InstrStage<1, [A9_Pipe1]>,
InstrStage<1, [A9_NPipe]>], [4, 1, 1]>,
//
// Double-precision FP ALU
InstrItinData<IIC_fpALU64 , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
InstrStage<5, [A9_DRegsN], 0, Reserved>,
- InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+ InstrStage<1, [A9_Pipe1]>,
InstrStage<1, [A9_NPipe]>], [4, 1, 1]>,
//
// Single-precision FP Multiply
InstrItinData<IIC_fpMUL32 , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
InstrStage<6, [A9_DRegsN], 0, Reserved>,
- InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+ InstrStage<1, [A9_Pipe1]>,
InstrStage<1, [A9_NPipe]>], [5, 1, 1]>,
//
// Double-precision FP Multiply
InstrItinData<IIC_fpMUL64 , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
InstrStage<7, [A9_DRegsN], 0, Reserved>,
- InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+ InstrStage<1, [A9_Pipe1]>,
InstrStage<2, [A9_NPipe]>], [6, 1, 1]>,
//
// Single-precision FP MAC
InstrItinData<IIC_fpMAC32 , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
InstrStage<9, [A9_DRegsN], 0, Reserved>,
- InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+ InstrStage<1, [A9_Pipe1]>,
InstrStage<1, [A9_NPipe]>], [8, 0, 1, 1]>,
//
// Double-precision FP MAC
InstrItinData<IIC_fpMAC64 , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
InstrStage<10, [A9_DRegsN], 0, Reserved>,
- InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+ InstrStage<1, [A9_Pipe1]>,
InstrStage<2, [A9_NPipe]>], [9, 0, 1, 1]>,
//
// Single-precision FP DIV
InstrItinData<IIC_fpDIV32 , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
InstrStage<16, [A9_DRegsN], 0, Reserved>,
- InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+ InstrStage<1, [A9_Pipe1]>,
InstrStage<10, [A9_NPipe]>], [15, 1, 1]>,
//
// Double-precision FP DIV
InstrItinData<IIC_fpDIV64 , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
InstrStage<26, [A9_DRegsN], 0, Reserved>,
- InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+ InstrStage<1, [A9_Pipe1]>,
InstrStage<20, [A9_NPipe]>], [25, 1, 1]>,
//
// Single-precision FP SQRT
InstrItinData<IIC_fpSQRT32, [InstrStage<1, [A9_DRegsVFP], 0, Required>,
InstrStage<18, [A9_DRegsN], 0, Reserved>,
- InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
- InstrStage<13, [A9_NPipe]>], [17, 1]>,
+ InstrStage<1, [A9_Pipe1]>,
+ InstrStage<13, [A9_NPipe]>], [17, 1]>,
//
// Double-precision FP SQRT
InstrItinData<IIC_fpSQRT64, [InstrStage<1, [A9_DRegsVFP], 0, Required>,
InstrStage<33, [A9_DRegsN], 0, Reserved>,
- InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+ InstrStage<1, [A9_Pipe1]>,
InstrStage<28, [A9_NPipe]>], [32, 1]>,
//
@@ -195,92 +308,79 @@ def CortexA9Itineraries : ProcessorItineraries<
InstrItinData<IIC_fpMOVIS, [InstrStage<1, [A9_DRegsVFP], 0, Required>,
// Extra 1 latency cycle since wbck is 2 cycles
InstrStage<3, [A9_DRegsN], 0, Reserved>,
- InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+ InstrStage<1, [A9_Pipe1]>,
InstrStage<1, [A9_NPipe]>], [1, 1]>,
//
// Integer to Double-precision Move
InstrItinData<IIC_fpMOVID, [InstrStage<1, [A9_DRegsVFP], 0, Required>,
// Extra 1 latency cycle since wbck is 2 cycles
InstrStage<3, [A9_DRegsN], 0, Reserved>,
- InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+ InstrStage<1, [A9_Pipe1]>,
InstrStage<1, [A9_NPipe]>], [1, 1, 1]>,
//
// Single-precision to Integer Move
InstrItinData<IIC_fpMOVSI, [InstrStage<1, [A9_DRegsVFP], 0, Required>,
InstrStage<2, [A9_DRegsN], 0, Reserved>,
- InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+ InstrStage<1, [A9_Pipe1]>,
InstrStage<1, [A9_NPipe]>], [1, 1]>,
//
// Double-precision to Integer Move
InstrItinData<IIC_fpMOVDI, [InstrStage<1, [A9_DRegsVFP], 0, Required>,
InstrStage<2, [A9_DRegsN], 0, Reserved>,
- InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+ InstrStage<1, [A9_Pipe1]>,
InstrStage<1, [A9_NPipe]>], [1, 1, 1]>,
//
// Single-precision FP Load
- // use A9_Issue to enforce the 1 load/store per cycle limit
InstrItinData<IIC_fpLoad32, [InstrStage<1, [A9_DRegsVFP], 0, Required>,
InstrStage<2, [A9_DRegsN], 0, Reserved>,
- InstrStage<1, [A9_Issue], 0>,
- InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
- InstrStage<1, [A9_LSPipe], 0>,
+ InstrStage<1, [A9_Pipe1], 0>,
+ InstrStage<1, [A9_LSPipe]>,
InstrStage<1, [A9_NPipe]>]>,
//
// Double-precision FP Load
- // use A9_Issue to enforce the 1 load/store per cycle limit
InstrItinData<IIC_fpLoad64, [InstrStage<1, [A9_DRegsVFP], 0, Required>,
InstrStage<2, [A9_DRegsN], 0, Reserved>,
- InstrStage<1, [A9_Issue], 0>,
- InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
- InstrStage<1, [A9_LSPipe], 0>,
+ InstrStage<1, [A9_Pipe1], 0>,
+ InstrStage<1, [A9_LSPipe]>,
InstrStage<1, [A9_NPipe]>]>,
//
// FP Load Multiple
- // use A9_Issue to enforce the 1 load/store per cycle limit
InstrItinData<IIC_fpLoadm, [InstrStage<1, [A9_DRegsVFP], 0, Required>,
InstrStage<2, [A9_DRegsN], 0, Reserved>,
- InstrStage<1, [A9_Issue], 0>,
- InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
- InstrStage<1, [A9_LSPipe], 0>,
+ InstrStage<1, [A9_Pipe1], 0>,
+ InstrStage<1, [A9_LSPipe]>,
InstrStage<1, [A9_NPipe]>]>,
//
// Single-precision FP Store
- // use A9_Issue to enforce the 1 load/store per cycle limit
InstrItinData<IIC_fpStore32,[InstrStage<1, [A9_DRegsVFP], 0, Required>,
InstrStage<2, [A9_DRegsN], 0, Reserved>,
- InstrStage<1, [A9_Issue], 0>,
- InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
- InstrStage<1, [A9_LSPipe], 0>,
+ InstrStage<1, [A9_Pipe1], 0>,
+ InstrStage<1, [A9_LSPipe]>,
InstrStage<1, [A9_NPipe]>]>,
//
// Double-precision FP Store
- // use A9_Issue to enforce the 1 load/store per cycle limit
InstrItinData<IIC_fpStore64,[InstrStage<1, [A9_DRegsVFP], 0, Required>,
InstrStage<2, [A9_DRegsN], 0, Reserved>,
- InstrStage<1, [A9_Issue], 0>,
- InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
- InstrStage<1, [A9_LSPipe], 0>,
+ InstrStage<1, [A9_Pipe1], 0>,
+ InstrStage<1, [A9_LSPipe]>,
InstrStage<1, [A9_NPipe]>]>,
//
// FP Store Multiple
- // use A9_Issue to enforce the 1 load/store per cycle limit
InstrItinData<IIC_fpStorem, [InstrStage<1, [A9_DRegsVFP], 0, Required>,
InstrStage<2, [A9_DRegsN], 0, Reserved>,
- InstrStage<1, [A9_Issue], 0>,
- InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
- InstrStage<1, [A9_LSPipe], 0>,
+ InstrStage<1, [A9_Pipe1], 0>,
+ InstrStage<1, [A9_LSPipe]>,
InstrStage<1, [A9_NPipe]>]>,
// NEON
// Issue through integer pipeline, and execute in NEON unit.
- // FIXME: Neon pipeline and LdSt unit are multiplexed.
+ // FIXME: Neon pipeline and LdSt unit are multiplexed.
// Add some syntactic sugar to model this!
// VLD1
// FIXME: We don't model this instruction properly
InstrItinData<IIC_VLD1, [InstrStage<1, [A9_DRegsN], 0, Required>,
InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
- InstrStage<1, [A9_Issue], 0>,
- InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
- InstrStage<1, [A9_LSPipe], 0>,
+ InstrStage<1, [A9_Pipe1], 0>,
+ InstrStage<1, [A9_LSPipe]>,
InstrStage<1, [A9_NPipe]>]>,
//
// VLD2
@@ -288,9 +388,8 @@ def CortexA9Itineraries : ProcessorItineraries<
InstrItinData<IIC_VLD2, [InstrStage<1, [A9_DRegsN], 0, Required>,
// Extra latency cycles since wbck is 6 cycles
InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
- InstrStage<1, [A9_Issue], 0>,
- InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
- InstrStage<1, [A9_LSPipe], 0>,
+ InstrStage<1, [A9_Pipe1], 0>,
+ InstrStage<1, [A9_LSPipe]>,
InstrStage<1, [A9_NPipe]>], [2, 2, 1]>,
//
// VLD3
@@ -298,9 +397,8 @@ def CortexA9Itineraries : ProcessorItineraries<
InstrItinData<IIC_VLD3, [InstrStage<1, [A9_DRegsN], 0, Required>,
// Extra latency cycles since wbck is 6 cycles
InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
- InstrStage<1, [A9_Issue], 0>,
- InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
- InstrStage<1, [A9_LSPipe], 0>,
+ InstrStage<1, [A9_Pipe1], 0>,
+ InstrStage<1, [A9_LSPipe]>,
InstrStage<1, [A9_NPipe]>], [2, 2, 2, 1]>,
//
// VLD4
@@ -308,9 +406,8 @@ def CortexA9Itineraries : ProcessorItineraries<
InstrItinData<IIC_VLD4, [InstrStage<1, [A9_DRegsN], 0, Required>,
// Extra latency cycles since wbck is 6 cycles
InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
- InstrStage<1, [A9_Issue], 0>,
- InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
- InstrStage<1, [A9_LSPipe], 0>,
+ InstrStage<1, [A9_Pipe1], 0>,
+ InstrStage<1, [A9_LSPipe]>,
InstrStage<1, [A9_NPipe]>], [2, 2, 2, 2, 1]>,
//
// VST
@@ -318,121 +415,120 @@ def CortexA9Itineraries : ProcessorItineraries<
InstrItinData<IIC_VST, [InstrStage<1, [A9_DRegsN], 0, Required>,
// Extra latency cycles since wbck is 6 cycles
InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
- InstrStage<1, [A9_Issue], 0>,
- InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
- InstrStage<1, [A9_LSPipe], 0>,
+ InstrStage<1, [A9_Pipe1], 0>,
+ InstrStage<1, [A9_LSPipe]>,
InstrStage<1, [A9_NPipe]>]>,
//
// Double-register Integer Unary
InstrItinData<IIC_VUNAiD, [InstrStage<1, [A9_DRegsN], 0, Required>,
// Extra latency cycles since wbck is 6 cycles
InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
- InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+ InstrStage<1, [A9_Pipe1]>,
InstrStage<1, [A9_NPipe]>], [4, 2]>,
//
// Quad-register Integer Unary
InstrItinData<IIC_VUNAiQ, [InstrStage<1, [A9_DRegsN], 0, Required>,
// Extra latency cycles since wbck is 6 cycles
InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
- InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+ InstrStage<1, [A9_Pipe1]>,
InstrStage<1, [A9_NPipe]>], [4, 2]>,
//
// Double-register Integer Q-Unary
InstrItinData<IIC_VQUNAiD, [InstrStage<1, [A9_DRegsN], 0, Required>,
// Extra latency cycles since wbck is 6 cycles
InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
- InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+ InstrStage<1, [A9_Pipe1]>,
InstrStage<1, [A9_NPipe]>], [4, 1]>,
//
// Quad-register Integer CountQ-Unary
InstrItinData<IIC_VQUNAiQ, [InstrStage<1, [A9_DRegsN], 0, Required>,
// Extra latency cycles since wbck is 6 cycles
InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
- InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+ InstrStage<1, [A9_Pipe1]>,
InstrStage<1, [A9_NPipe]>], [4, 1]>,
//
// Double-register Integer Binary
InstrItinData<IIC_VBINiD, [InstrStage<1, [A9_DRegsN], 0, Required>,
// Extra latency cycles since wbck is 6 cycles
InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
- InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+ InstrStage<1, [A9_Pipe1]>,
InstrStage<1, [A9_NPipe]>], [3, 2, 2]>,
//
// Quad-register Integer Binary
InstrItinData<IIC_VBINiQ, [InstrStage<1, [A9_DRegsN], 0, Required>,
// Extra latency cycles since wbck is 6 cycles
InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
- InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+ InstrStage<1, [A9_Pipe1]>,
InstrStage<1, [A9_NPipe]>], [3, 2, 2]>,
//
// Double-register Integer Subtract
InstrItinData<IIC_VSUBiD, [InstrStage<1, [A9_DRegsN], 0, Required>,
// Extra latency cycles since wbck is 6 cycles
InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
- InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+ InstrStage<1, [A9_Pipe1]>,
InstrStage<1, [A9_NPipe]>], [3, 2, 1]>,
//
// Quad-register Integer Subtract
InstrItinData<IIC_VSUBiQ, [InstrStage<1, [A9_DRegsN], 0, Required>,
// Extra latency cycles since wbck is 6 cycles
InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
- InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+ InstrStage<1, [A9_Pipe1]>,
InstrStage<1, [A9_NPipe]>], [3, 2, 1]>,
//
// Double-register Integer Shift
InstrItinData<IIC_VSHLiD, [InstrStage<1, [A9_DRegsN], 0, Required>,
// Extra latency cycles since wbck is 6 cycles
InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
- InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+ InstrStage<1, [A9_Pipe1]>,
InstrStage<1, [A9_NPipe]>], [3, 1, 1]>,
//
// Quad-register Integer Shift
InstrItinData<IIC_VSHLiQ, [InstrStage<1, [A9_DRegsN], 0, Required>,
// Extra latency cycles since wbck is 6 cycles
InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
- InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+ InstrStage<1, [A9_Pipe1]>,
InstrStage<1, [A9_NPipe]>], [3, 1, 1]>,
//
// Double-register Integer Shift (4 cycle)
InstrItinData<IIC_VSHLi4D, [InstrStage<1, [A9_DRegsN], 0, Required>,
// Extra latency cycles since wbck is 6 cycles
InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
- InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+ InstrStage<1, [A9_Pipe1]>,
InstrStage<1, [A9_NPipe]>], [4, 1, 1]>,
//
// Quad-register Integer Shift (4 cycle)
InstrItinData<IIC_VSHLi4Q, [InstrStage<1, [A9_DRegsN], 0, Required>,
// Extra latency cycles since wbck is 6 cycles
InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
- InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+ InstrStage<1, [A9_Pipe1]>,
InstrStage<1, [A9_NPipe]>], [4, 1, 1]>,
//
// Double-register Integer Binary (4 cycle)
InstrItinData<IIC_VBINi4D, [InstrStage<1, [A9_DRegsN], 0, Required>,
// Extra latency cycles since wbck is 6 cycles
InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
- InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+ InstrStage<1, [A9_Pipe1]>,
InstrStage<1, [A9_NPipe]>], [4, 2, 2]>,
//
// Quad-register Integer Binary (4 cycle)
InstrItinData<IIC_VBINi4Q, [InstrStage<1, [A9_DRegsN], 0, Required>,
// Extra latency cycles since wbck is 6 cycles
InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
- InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+ InstrStage<1, [A9_Pipe1]>,
InstrStage<1, [A9_NPipe]>], [4, 2, 2]>,
//
// Double-register Integer Subtract (4 cycle)
InstrItinData<IIC_VSUBiD, [InstrStage<1, [A9_DRegsN], 0, Required>,
// Extra latency cycles since wbck is 6 cycles
InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
- InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+ InstrStage<1, [A9_Pipe1]>,
InstrStage<1, [A9_NPipe]>], [4, 2, 1]>,
//
// Quad-register Integer Subtract (4 cycle)
InstrItinData<IIC_VSUBiQ, [InstrStage<1, [A9_DRegsN], 0, Required>,
// Extra latency cycles since wbck is 6 cycles
InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
- InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+ InstrStage<1, [A9_Pipe1]>,
InstrStage<1, [A9_NPipe]>], [4, 2, 1]>,
//
@@ -440,7 +536,7 @@ def CortexA9Itineraries : ProcessorItineraries<
InstrItinData<IIC_VCNTiD, [InstrStage<1, [A9_DRegsN], 0, Required>,
// Extra latency cycles since wbck is 6 cycles
InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
- InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+ InstrStage<1, [A9_Pipe1]>,
InstrStage<1, [A9_NPipe]>], [3, 2, 2]>,
//
// Quad-register Integer Count
@@ -449,35 +545,35 @@ def CortexA9Itineraries : ProcessorItineraries<
InstrItinData<IIC_VCNTiQ, [InstrStage<1, [A9_DRegsN], 0, Required>,
// Extra latency cycles since wbck is 7 cycles
InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
- InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+ InstrStage<1, [A9_Pipe1]>,
InstrStage<2, [A9_NPipe]>], [4, 2, 2]>,
//
// Double-register Absolute Difference and Accumulate
InstrItinData<IIC_VABAD, [InstrStage<1, [A9_DRegsN], 0, Required>,
// Extra latency cycles since wbck is 6 cycles
InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
- InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+ InstrStage<1, [A9_Pipe1]>,
InstrStage<1, [A9_NPipe]>], [6, 3, 2, 1]>,
//
// Quad-register Absolute Difference and Accumulate
InstrItinData<IIC_VABAQ, [InstrStage<1, [A9_DRegsN], 0, Required>,
// Extra latency cycles since wbck is 6 cycles
InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
- InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+ InstrStage<1, [A9_Pipe1]>,
InstrStage<2, [A9_NPipe]>], [6, 3, 2, 1]>,
//
// Double-register Integer Pair Add Long
InstrItinData<IIC_VPALiD, [InstrStage<1, [A9_DRegsN], 0, Required>,
// Extra latency cycles since wbck is 6 cycles
InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
- InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+ InstrStage<1, [A9_Pipe1]>,
InstrStage<1, [A9_NPipe]>], [6, 3, 1]>,
//
// Quad-register Integer Pair Add Long
InstrItinData<IIC_VPALiQ, [InstrStage<1, [A9_DRegsN], 0, Required>,
// Extra latency cycles since wbck is 6 cycles
InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
- InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+ InstrStage<1, [A9_Pipe1]>,
InstrStage<2, [A9_NPipe]>], [6, 3, 1]>,
//
@@ -485,14 +581,14 @@ def CortexA9Itineraries : ProcessorItineraries<
InstrItinData<IIC_VMULi16D, [InstrStage<1, [A9_DRegsN], 0, Required>,
// Extra latency cycles since wbck is 6 cycles
InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
- InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+ InstrStage<1, [A9_Pipe1]>,
InstrStage<1, [A9_NPipe]>], [6, 2, 2]>,
//
// Quad-register Integer Multiply (.8, .16)
InstrItinData<IIC_VMULi16Q, [InstrStage<1, [A9_DRegsN], 0, Required>,
// Extra latency cycles since wbck is 7 cycles
InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
- InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+ InstrStage<1, [A9_Pipe1]>,
InstrStage<2, [A9_NPipe]>], [7, 2, 2]>,
//
@@ -500,56 +596,56 @@ def CortexA9Itineraries : ProcessorItineraries<
InstrItinData<IIC_VMULi32D, [InstrStage<1, [A9_DRegsN], 0, Required>,
// Extra latency cycles since wbck is 7 cycles
InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
- InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+ InstrStage<1, [A9_Pipe1]>,
InstrStage<2, [A9_NPipe]>], [7, 2, 1]>,
//
// Quad-register Integer Multiply (.32)
InstrItinData<IIC_VMULi32Q, [InstrStage<1, [A9_DRegsN], 0, Required>,
// Extra latency cycles since wbck is 9 cycles
InstrStage<10, [A9_DRegsVFP], 0, Reserved>,
- InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+ InstrStage<1, [A9_Pipe1]>,
InstrStage<4, [A9_NPipe]>], [9, 2, 1]>,
//
// Double-register Integer Multiply-Accumulate (.8, .16)
InstrItinData<IIC_VMACi16D, [InstrStage<1, [A9_DRegsN], 0, Required>,
// Extra latency cycles since wbck is 6 cycles
InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
- InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+ InstrStage<1, [A9_Pipe1]>,
InstrStage<1, [A9_NPipe]>], [6, 3, 2, 2]>,
//
// Double-register Integer Multiply-Accumulate (.32)
InstrItinData<IIC_VMACi32D, [InstrStage<1, [A9_DRegsN], 0, Required>,
// Extra latency cycles since wbck is 7 cycles
InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
- InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+ InstrStage<1, [A9_Pipe1]>,
InstrStage<2, [A9_NPipe]>], [7, 3, 2, 1]>,
//
// Quad-register Integer Multiply-Accumulate (.8, .16)
InstrItinData<IIC_VMACi16Q, [InstrStage<1, [A9_DRegsN], 0, Required>,
// Extra latency cycles since wbck is 7 cycles
InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
- InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+ InstrStage<1, [A9_Pipe1]>,
InstrStage<2, [A9_NPipe]>], [7, 3, 2, 2]>,
//
// Quad-register Integer Multiply-Accumulate (.32)
InstrItinData<IIC_VMACi32Q, [InstrStage<1, [A9_DRegsN], 0, Required>,
// Extra latency cycles since wbck is 9 cycles
InstrStage<10, [A9_DRegsVFP], 0, Reserved>,
- InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+ InstrStage<1, [A9_Pipe1]>,
InstrStage<4, [A9_NPipe]>], [9, 3, 2, 1]>,
//
// Move Immediate
InstrItinData<IIC_VMOVImm, [InstrStage<1, [A9_DRegsN], 0, Required>,
// Extra latency cycles since wbck is 6 cycles
InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
- InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+ InstrStage<1, [A9_Pipe1]>,
InstrStage<1, [A9_NPipe]>], [3]>,
//
// Double-register Permute Move
InstrItinData<IIC_VMOVD, [InstrStage<1, [A9_DRegsN], 0, Required>,
// FIXME: all latencies are arbitrary, no information is available
InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
- InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+ InstrStage<1, [A9_Pipe1]>,
InstrStage<1, [A9_LSPipe]>], [2, 1]>,
//
// Quad-register Permute Move
@@ -558,42 +654,42 @@ def CortexA9Itineraries : ProcessorItineraries<
InstrItinData<IIC_VMOVQ, [InstrStage<1, [A9_DRegsN], 0, Required>,
// FIXME: all latencies are arbitrary, no information is available
InstrStage<4, [A9_DRegsVFP], 0, Reserved>,
- InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+ InstrStage<1, [A9_Pipe1]>,
InstrStage<2, [A9_NPipe]>], [3, 1]>,
//
// Integer to Single-precision Move
InstrItinData<IIC_VMOVIS , [InstrStage<1, [A9_DRegsN], 0, Required>,
// FIXME: all latencies are arbitrary, no information is available
InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
- InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+ InstrStage<1, [A9_Pipe1]>,
InstrStage<1, [A9_NPipe]>], [2, 1]>,
//
// Integer to Double-precision Move
InstrItinData<IIC_VMOVID , [InstrStage<1, [A9_DRegsN], 0, Required>,
// FIXME: all latencies are arbitrary, no information is available
InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
- InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+ InstrStage<1, [A9_Pipe1]>,
InstrStage<1, [A9_NPipe]>], [2, 1, 1]>,
//
// Single-precision to Integer Move
InstrItinData<IIC_VMOVSI , [InstrStage<1, [A9_DRegsN], 0, Required>,
// FIXME: all latencies are arbitrary, no information is available
InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
- InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+ InstrStage<1, [A9_Pipe1]>,
InstrStage<1, [A9_NPipe]>], [2, 1]>,
//
// Double-precision to Integer Move
InstrItinData<IIC_VMOVDI , [InstrStage<1, [A9_DRegsN], 0, Required>,
// FIXME: all latencies are arbitrary, no information is available
InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
- InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+ InstrStage<1, [A9_Pipe1]>,
InstrStage<1, [A9_NPipe]>], [2, 2, 1]>,
//
// Integer to Lane Move
InstrItinData<IIC_VMOVISL , [InstrStage<1, [A9_DRegsN], 0, Required>,
// FIXME: all latencies are arbitrary, no information is available
InstrStage<4, [A9_DRegsVFP], 0, Reserved>,
- InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+ InstrStage<1, [A9_Pipe1]>,
InstrStage<2, [A9_NPipe]>], [3, 1, 1]>,
//
@@ -601,7 +697,7 @@ def CortexA9Itineraries : ProcessorItineraries<
InstrItinData<IIC_VUNAD, [InstrStage<1, [A9_DRegsN], 0, Required>,
// Extra latency cycles since wbck is 6 cycles
InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
- InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+ InstrStage<1, [A9_Pipe1]>,
InstrStage<1, [A9_NPipe]>], [5, 2]>,
//
// Quad-register FP Unary
@@ -610,7 +706,7 @@ def CortexA9Itineraries : ProcessorItineraries<
InstrItinData<IIC_VUNAQ, [InstrStage<1, [A9_DRegsN], 0, Required>,
// Extra latency cycles since wbck is 7 cycles
InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
- InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+ InstrStage<1, [A9_Pipe1]>,
InstrStage<2, [A9_NPipe]>], [6, 2]>,
//
// Double-register FP Binary
@@ -619,7 +715,7 @@ def CortexA9Itineraries : ProcessorItineraries<
InstrItinData<IIC_VBIND, [InstrStage<1, [A9_DRegsN], 0, Required>,
// Extra latency cycles since wbck is 7 cycles
InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
- InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+ InstrStage<1, [A9_Pipe1]>,
InstrStage<1, [A9_NPipe]>], [5, 2, 2]>,
//
// Quad-register FP Binary
@@ -630,14 +726,14 @@ def CortexA9Itineraries : ProcessorItineraries<
InstrItinData<IIC_VBINQ, [InstrStage<1, [A9_DRegsN], 0, Required>,
// Extra latency cycles since wbck is 8 cycles
InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
- InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+ InstrStage<1, [A9_Pipe1]>,
InstrStage<2, [A9_NPipe]>], [6, 2, 2]>,
//
// Double-register FP Multiple-Accumulate
InstrItinData<IIC_VMACD, [InstrStage<1, [A9_DRegsN], 0, Required>,
// Extra latency cycles since wbck is 7 cycles
InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
- InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+ InstrStage<1, [A9_Pipe1]>,
InstrStage<2, [A9_NPipe]>], [6, 3, 2, 1]>,
//
// Quad-register FP Multiple-Accumulate
@@ -646,28 +742,28 @@ def CortexA9Itineraries : ProcessorItineraries<
InstrItinData<IIC_VMACQ, [InstrStage<1, [A9_DRegsN], 0, Required>,
// Extra latency cycles since wbck is 9 cycles
InstrStage<10, [A9_DRegsVFP], 0, Reserved>,
- InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+ InstrStage<1, [A9_Pipe1]>,
InstrStage<4, [A9_NPipe]>], [8, 4, 2, 1]>,
//
// Double-register Reciprical Step
InstrItinData<IIC_VRECSD, [InstrStage<1, [A9_DRegsN], 0, Required>,
// Extra latency cycles since wbck is 7 cycles
InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
- InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+ InstrStage<1, [A9_Pipe1]>,
InstrStage<2, [A9_NPipe]>], [6, 2, 2]>,
//
// Quad-register Reciprical Step
InstrItinData<IIC_VRECSQ, [InstrStage<1, [A9_DRegsN], 0, Required>,
// Extra latency cycles since wbck is 9 cycles
InstrStage<10, [A9_DRegsVFP], 0, Reserved>,
- InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+ InstrStage<1, [A9_Pipe1]>,
InstrStage<4, [A9_NPipe]>], [8, 2, 2]>,
//
// Double-register Permute
InstrItinData<IIC_VPERMD, [InstrStage<1, [A9_DRegsN], 0, Required>,
// Extra latency cycles since wbck is 6 cycles
InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
- InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+ InstrStage<1, [A9_Pipe1]>,
InstrStage<1, [A9_NPipe]>], [2, 2, 1, 1]>,
//
// Quad-register Permute
@@ -676,7 +772,7 @@ def CortexA9Itineraries : ProcessorItineraries<
InstrItinData<IIC_VPERMQ, [InstrStage<1, [A9_DRegsN], 0, Required>,
// Extra latency cycles since wbck is 7 cycles
InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
- InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+ InstrStage<1, [A9_Pipe1]>,
InstrStage<2, [A9_NPipe]>], [3, 3, 1, 1]>,
//
// Quad-register Permute (3 cycle issue)
@@ -685,7 +781,7 @@ def CortexA9Itineraries : ProcessorItineraries<
InstrItinData<IIC_VPERMQ3, [InstrStage<1, [A9_DRegsN], 0, Required>,
// Extra latency cycles since wbck is 8 cycles
InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
- InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+ InstrStage<1, [A9_Pipe1]>,
InstrStage<3, [A9_LSPipe]>], [4, 4, 1, 1]>,
//
@@ -693,57 +789,57 @@ def CortexA9Itineraries : ProcessorItineraries<
InstrItinData<IIC_VEXTD, [InstrStage<1, [A9_DRegsN], 0, Required>,
// Extra latency cycles since wbck is 7 cycles
InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
- InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+ InstrStage<1, [A9_Pipe1]>,
InstrStage<1, [A9_NPipe]>], [2, 1, 1]>,
//
// Quad-register VEXT
InstrItinData<IIC_VEXTQ, [InstrStage<1, [A9_DRegsN], 0, Required>,
// Extra latency cycles since wbck is 9 cycles
InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
- InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+ InstrStage<1, [A9_Pipe1]>,
InstrStage<2, [A9_NPipe]>], [3, 1, 1]>,
//
// VTB
InstrItinData<IIC_VTB1, [InstrStage<1, [A9_DRegsN], 0, Required>,
// Extra latency cycles since wbck is 7 cycles
InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
- InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+ InstrStage<1, [A9_Pipe1]>,
InstrStage<2, [A9_NPipe]>], [3, 2, 1]>,
InstrItinData<IIC_VTB2, [InstrStage<2, [A9_DRegsN], 0, Required>,
// Extra latency cycles since wbck is 7 cycles
InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
- InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+ InstrStage<1, [A9_Pipe1]>,
InstrStage<2, [A9_NPipe]>], [3, 2, 2, 1]>,
InstrItinData<IIC_VTB3, [InstrStage<2, [A9_DRegsN], 0, Required>,
// Extra latency cycles since wbck is 8 cycles
InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
- InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+ InstrStage<1, [A9_Pipe1]>,
InstrStage<3, [A9_NPipe]>], [4, 2, 2, 3, 1]>,
InstrItinData<IIC_VTB4, [InstrStage<1, [A9_DRegsN], 0, Required>,
// Extra latency cycles since wbck is 8 cycles
InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
- InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+ InstrStage<1, [A9_Pipe1]>,
InstrStage<3, [A9_NPipe]>], [4, 2, 2, 3, 3, 1]>,
//
// VTBX
InstrItinData<IIC_VTBX1, [InstrStage<1, [A9_DRegsN], 0, Required>,
// Extra latency cycles since wbck is 7 cycles
InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
- InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+ InstrStage<1, [A9_Pipe1]>,
InstrStage<2, [A9_NPipe]>], [3, 1, 2, 1]>,
InstrItinData<IIC_VTBX2, [InstrStage<1, [A9_DRegsN], 0, Required>,
// Extra latency cycles since wbck is 7 cycles
InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
- InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+ InstrStage<1, [A9_Pipe1]>,
InstrStage<2, [A9_NPipe]>], [3, 1, 2, 2, 1]>,
InstrItinData<IIC_VTBX3, [InstrStage<1, [A9_DRegsN], 0, Required>,
// Extra latency cycles since wbck is 8 cycles
InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
- InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+ InstrStage<1, [A9_Pipe1]>,
InstrStage<3, [A9_NPipe]>], [4, 1, 2, 2, 3, 1]>,
InstrItinData<IIC_VTBX4, [InstrStage<1, [A9_DRegsN], 0, Required>,
// Extra latency cycles since wbck is 8 cycles
InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
- InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
- InstrStage<2, [A9_NPipe]>], [4, 1, 2, 2, 3, 3, 1]>
+ InstrStage<1, [A9_Pipe1]>,
+ InstrStage<2, [A9_NPipe]>], [4, 1, 2, 2, 3, 3, 1]>
]>;
diff --git a/lib/Target/ARM/ARMScheduleV6.td b/lib/Target/ARM/ARMScheduleV6.td
index f813022d8741..08b560cc0c2f 100644
--- a/lib/Target/ARM/ARMScheduleV6.td
+++ b/lib/Target/ARM/ARMScheduleV6.td
@@ -16,7 +16,7 @@
// Functional Units
def V6_Pipe : FuncUnit; // pipeline
-// Scheduling information derived from "ARM1176JZF-S Technical Reference Manual".
+// Scheduling information derived from "ARM1176JZF-S Technical Reference Manual"
//
def ARMV6Itineraries : ProcessorItineraries<
[V6_Pipe], [
diff --git a/lib/Target/ARM/ARMTargetMachine.cpp b/lib/Target/ARM/ARMTargetMachine.cpp
index b4a9252909bc..09203f9304df 100644
--- a/lib/Target/ARM/ARMTargetMachine.cpp
+++ b/lib/Target/ARM/ARMTargetMachine.cpp
@@ -60,8 +60,10 @@ ARMTargetMachine::ARMTargetMachine(const Target &T, const std::string &TT,
const std::string &FS)
: ARMBaseTargetMachine(T, TT, FS, false), InstrInfo(Subtarget),
DataLayout(Subtarget.isAPCS_ABI() ?
- std::string("e-p:32:32-f64:32:32-i64:32:32-n32") :
- std::string("e-p:32:32-f64:64:64-i64:64:64-n32")),
+ std::string("e-p:32:32-f64:32:32-i64:32:32-"
+ "v128:32:128-v64:32:64-n32") :
+ std::string("e-p:32:32-f64:64:64-i64:64:64-"
+ "v128:64:128-v64:64:64-n32")),
TLInfo(*this),
TSInfo(*this) {
}
@@ -74,9 +76,11 @@ ThumbTargetMachine::ThumbTargetMachine(const Target &T, const std::string &TT,
: ((ARMBaseInstrInfo*)new Thumb1InstrInfo(Subtarget))),
DataLayout(Subtarget.isAPCS_ABI() ?
std::string("e-p:32:32-f64:32:32-i64:32:32-"
- "i16:16:32-i8:8:32-i1:8:32-a:0:32-n32") :
+ "i16:16:32-i8:8:32-i1:8:32-"
+ "v128:32:128-v64:32:64-a:0:32-n32") :
std::string("e-p:32:32-f64:64:64-i64:64:64-"
- "i16:16:32-i8:8:32-i1:8:32-a:0:32-n32")),
+ "i16:16:32-i8:8:32-i1:8:32-"
+ "v128:64:128-v64:64:64-a:0:32-n32")),
TLInfo(*this),
TSInfo(*this) {
}
@@ -98,6 +102,7 @@ bool ARMBaseTargetMachine::addPreRegAlloc(PassManagerBase &PM,
// FIXME: temporarily disabling load / store optimization pass for Thumb1.
if (OptLevel != CodeGenOpt::None && !Subtarget.isThumb1Only())
PM.add(createARMLoadStoreOptimizationPass(true));
+
return true;
}
@@ -115,21 +120,20 @@ bool ARMBaseTargetMachine::addPreSched2(PassManagerBase &PM,
// proper scheduling.
PM.add(createARMExpandPseudoPass());
- return true;
-}
-
-bool ARMBaseTargetMachine::addPreEmitPass(PassManagerBase &PM,
- CodeGenOpt::Level OptLevel) {
- // FIXME: temporarily disabling load / store optimization pass for Thumb1.
if (OptLevel != CodeGenOpt::None) {
if (!Subtarget.isThumb1Only())
PM.add(createIfConverterPass());
}
-
- if (Subtarget.isThumb2()) {
+ if (Subtarget.isThumb2())
PM.add(createThumb2ITBlockPass());
+
+ return true;
+}
+
+bool ARMBaseTargetMachine::addPreEmitPass(PassManagerBase &PM,
+ CodeGenOpt::Level OptLevel) {
+ if (Subtarget.isThumb2())
PM.add(createThumb2SizeReductionPass());
- }
PM.add(createARMConstantIslandPass());
return true;
diff --git a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
index bfa89c4a4696..8415d1ad8827 100644
--- a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
+++ b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
@@ -425,7 +425,7 @@ bool ARMAsmParser::ParseMemory(OwningPtr<ARMOperand> &Op) {
const AsmToken &NextTok = Parser.getTok();
if (NextTok.isNot(AsmToken::EndOfStatement)) {
if (NextTok.isNot(AsmToken::Comma))
- return Error(NextTok.getLoc(), "',' expected");
+ return Error(NextTok.getLoc(), "',' expected");
Parser.Lex(); // Eat comma token.
if(ParseMemoryOffsetReg(Negative, OffsetRegShifted, ShiftType,
ShiftAmount, Offset, OffsetIsReg, OffsetRegNum,
@@ -488,7 +488,7 @@ bool ARMAsmParser::ParseMemoryOffsetReg(bool &Negative,
const AsmToken &Tok = Parser.getTok();
if (ParseShift(ShiftType, ShiftAmount, E))
- return Error(Tok.getLoc(), "shift expected");
+ return Error(Tok.getLoc(), "shift expected");
OffsetRegShifted = true;
}
}
@@ -665,7 +665,6 @@ bool ARMAsmParser::ParseInstruction(const StringRef &Name, SMLoc NameLoc,
Operands.push_back(Op.take());
- SMLoc Loc = Parser.getTok().getLoc();
if (getLexer().isNot(AsmToken::EndOfStatement)) {
// Read the first operand.
@@ -763,15 +762,10 @@ bool ARMAsmParser::ParseDirectiveSyntax(SMLoc L) {
if (Tok.isNot(AsmToken::Identifier))
return Error(L, "unexpected token in .syntax directive");
const StringRef &Mode = Tok.getString();
- bool unified_syntax;
- if (Mode == "unified" || Mode == "UNIFIED") {
+ if (Mode == "unified" || Mode == "UNIFIED")
Parser.Lex();
- unified_syntax = true;
- }
- else if (Mode == "divided" || Mode == "DIVIDED") {
+ else if (Mode == "divided" || Mode == "DIVIDED")
Parser.Lex();
- unified_syntax = false;
- }
else
return Error(L, "unrecognized syntax mode in .syntax directive");
@@ -791,15 +785,10 @@ bool ARMAsmParser::ParseDirectiveCode(SMLoc L) {
if (Tok.isNot(AsmToken::Integer))
return Error(L, "unexpected token in .code directive");
int64_t Val = Parser.getTok().getIntVal();
- bool thumb_mode;
- if (Val == 16) {
+ if (Val == 16)
Parser.Lex();
- thumb_mode = true;
- }
- else if (Val == 32) {
+ else if (Val == 32)
Parser.Lex();
- thumb_mode = false;
- }
else
return Error(L, "invalid operand to .code directive");
diff --git a/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp b/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp
index d95efdb80943..6a40cf3602e9 100644
--- a/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp
+++ b/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp
@@ -175,23 +175,8 @@ namespace {
raw_ostream &O);
void printVFPf64ImmOperand(const MachineInstr *MI, int OpNum,
raw_ostream &O);
-
- void printHex8ImmOperand(const MachineInstr *MI, int OpNum,
- raw_ostream &O) {
- O << "#0x" << utohexstr(MI->getOperand(OpNum).getImm() & 0xff);
- }
- void printHex16ImmOperand(const MachineInstr *MI, int OpNum,
- raw_ostream &O) {
- O << "#0x" << utohexstr(MI->getOperand(OpNum).getImm() & 0xffff);
- }
- void printHex32ImmOperand(const MachineInstr *MI, int OpNum,
- raw_ostream &O) {
- O << "#0x" << utohexstr(MI->getOperand(OpNum).getImm() & 0xffffffff);
- }
- void printHex64ImmOperand(const MachineInstr *MI, int OpNum,
- raw_ostream &O) {
- O << "#0x" << utohexstr(MI->getOperand(OpNum).getImm());
- }
+ void printNEONModImmOperand(const MachineInstr *MI, int OpNum,
+ raw_ostream &O);
virtual bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNum,
unsigned AsmVariant, const char *ExtraCode,
@@ -322,7 +307,7 @@ void ARMAsmPrinter::printOperand(const MachineInstr *MI, int OpNum,
unsigned DRegLo = TM.getRegisterInfo()->getSubReg(Reg, ARM::dsub_0);
unsigned DRegHi = TM.getRegisterInfo()->getSubReg(Reg, ARM::dsub_1);
O << '{'
- << getRegisterName(DRegLo) << ',' << getRegisterName(DRegHi)
+ << getRegisterName(DRegLo) << ", " << getRegisterName(DRegHi)
<< '}';
} else if (Modifier && strcmp(Modifier, "lane") == 0) {
unsigned RegNum = ARMRegisterInfo::getRegisterNumbering(Reg);
@@ -617,8 +602,12 @@ void ARMAsmPrinter::printAddrMode6Operand(const MachineInstr *MI, int Op,
O << "[" << getRegisterName(MO1.getReg());
if (MO2.getImm()) {
+ unsigned Align = MO2.getImm();
+ assert((Align == 8 || Align == 16 || Align == 32) &&
+ "unexpected NEON load/store alignment");
+ Align <<= 3;
// FIXME: Both darwin as and GNU as violate ARM docs here.
- O << ", :" << MO2.getImm();
+ O << ", :" << Align;
}
O << "]";
}
@@ -1039,6 +1028,14 @@ void ARMAsmPrinter::printVFPf64ImmOperand(const MachineInstr *MI, int OpNum,
}
}
+void ARMAsmPrinter::printNEONModImmOperand(const MachineInstr *MI, int OpNum,
+ raw_ostream &O) {
+ unsigned EncodedImm = MI->getOperand(OpNum).getImm();
+ unsigned EltBits;
+ uint64_t Val = ARM_AM::decodeNEONModImm(EncodedImm, EltBits);
+ O << "#0x" << utohexstr(Val);
+}
+
bool ARMAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNum,
unsigned AsmVariant, const char *ExtraCode,
raw_ostream &O) {
@@ -1064,20 +1061,10 @@ bool ARMAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNum,
printOperand(MI, OpNum, O);
return false;
case 'Q':
- if (TM.getTargetData()->isLittleEndian())
- break;
- // Fallthrough
case 'R':
- if (TM.getTargetData()->isBigEndian())
- break;
- // Fallthrough
- case 'H': // Write second word of DI / DF reference.
- // Verify that this operand has two consecutive registers.
- if (!MI->getOperand(OpNum).isReg() ||
- OpNum+1 == MI->getNumOperands() ||
- !MI->getOperand(OpNum+1).isReg())
- return true;
- ++OpNum; // Return the high-part.
+ case 'H':
+ report_fatal_error("llvm does not support 'Q', 'R', and 'H' modifiers!");
+ return true;
}
}
@@ -1384,11 +1371,11 @@ void ARMAsmPrinter::printInstructionThroughMCStreamer(const MachineInstr *MI) {
} else if (MO.isGlobal()) {
MCSymbol *Symbol = MCInstLowering.GetGlobalAddressSymbol(MO);
const MCSymbolRefExpr *SymRef1 =
- MCSymbolRefExpr::Create(Symbol,
- MCSymbolRefExpr::VK_ARM_LO16, OutContext);
+ MCSymbolRefExpr::Create(Symbol,
+ MCSymbolRefExpr::VK_ARM_LO16, OutContext);
const MCSymbolRefExpr *SymRef2 =
- MCSymbolRefExpr::Create(Symbol,
- MCSymbolRefExpr::VK_ARM_HI16, OutContext);
+ MCSymbolRefExpr::Create(Symbol,
+ MCSymbolRefExpr::VK_ARM_HI16, OutContext);
V1 = MCOperand::CreateExpr(SymRef1);
V2 = MCOperand::CreateExpr(SymRef2);
} else {
diff --git a/lib/Target/ARM/AsmPrinter/ARMInstPrinter.cpp b/lib/Target/ARM/AsmPrinter/ARMInstPrinter.cpp
index 2b94b76087e7..170819ad4f06 100644
--- a/lib/Target/ARM/AsmPrinter/ARMInstPrinter.cpp
+++ b/lib/Target/ARM/AsmPrinter/ARMInstPrinter.cpp
@@ -779,22 +779,10 @@ void ARMInstPrinter::printVFPf64ImmOperand(const MCInst *MI, unsigned OpNum,
O << '#' << MI->getOperand(OpNum).getImm();
}
-void ARMInstPrinter::printHex8ImmOperand(const MCInst *MI, unsigned OpNum,
- raw_ostream &O) {
- O << "#0x" << utohexstr(MI->getOperand(OpNum).getImm() & 0xff);
-}
-
-void ARMInstPrinter::printHex16ImmOperand(const MCInst *MI, unsigned OpNum,
- raw_ostream &O) {
- O << "#0x" << utohexstr(MI->getOperand(OpNum).getImm() & 0xffff);
-}
-
-void ARMInstPrinter::printHex32ImmOperand(const MCInst *MI, unsigned OpNum,
- raw_ostream &O) {
- O << "#0x" << utohexstr(MI->getOperand(OpNum).getImm() & 0xffffffff);
-}
-
-void ARMInstPrinter::printHex64ImmOperand(const MCInst *MI, unsigned OpNum,
- raw_ostream &O) {
- O << "#0x" << utohexstr(MI->getOperand(OpNum).getImm());
+void ARMInstPrinter::printNEONModImmOperand(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O) {
+ unsigned EncodedImm = MI->getOperand(OpNum).getImm();
+ unsigned EltBits;
+ uint64_t Val = ARM_AM::decodeNEONModImm(EncodedImm, EltBits);
+ O << "#0x" << utohexstr(Val);
}
diff --git a/lib/Target/ARM/AsmPrinter/ARMInstPrinter.h b/lib/Target/ARM/AsmPrinter/ARMInstPrinter.h
index be0b7c1eb027..ddf5047793d2 100644
--- a/lib/Target/ARM/AsmPrinter/ARMInstPrinter.h
+++ b/lib/Target/ARM/AsmPrinter/ARMInstPrinter.h
@@ -104,10 +104,7 @@ public:
void printNoHashImmediate(const MCInst *MI, unsigned OpNum, raw_ostream &O);
void printVFPf32ImmOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
void printVFPf64ImmOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
- void printHex8ImmOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
- void printHex16ImmOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
- void printHex32ImmOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
- void printHex64ImmOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+ void printNEONModImmOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
void printPCLabel(const MCInst *MI, unsigned OpNum, raw_ostream &O);
// FIXME: Implement.
diff --git a/lib/Target/ARM/CMakeLists.txt b/lib/Target/ARM/CMakeLists.txt
index 29e66e13b168..0df34666b959 100644
--- a/lib/Target/ARM/CMakeLists.txt
+++ b/lib/Target/ARM/CMakeLists.txt
@@ -33,6 +33,7 @@ add_llvm_target(ARMCodeGen
NEONPreAllocPass.cpp
Thumb1InstrInfo.cpp
Thumb1RegisterInfo.cpp
+ Thumb2HazardRecognizer.cpp
Thumb2ITBlockPass.cpp
Thumb2InstrInfo.cpp
Thumb2RegisterInfo.cpp
diff --git a/lib/Target/ARM/Disassembler/ARMDisassemblerCore.cpp b/lib/Target/ARM/Disassembler/ARMDisassemblerCore.cpp
index adb7795a746c..a07ff2832aa7 100644
--- a/lib/Target/ARM/Disassembler/ARMDisassemblerCore.cpp
+++ b/lib/Target/ARM/Disassembler/ARMDisassemblerCore.cpp
@@ -34,7 +34,7 @@
/// Uses and Defs by this instr. For the Uses part, the pred:$p operand is
/// defined with two components:
///
-/// def pred { // Operand PredicateOperand
+/// def pred { // Operand PredicateOperand
/// ValueType Type = OtherVT;
/// string PrintMethod = "printPredicateOperand";
/// string AsmOperandLowerMethod = ?;
@@ -54,7 +54,7 @@
///
/// For the Defs part, in the simple case of only cc_out:$s, we have:
///
-/// def cc_out { // Operand OptionalDefOperand
+/// def cc_out { // Operand OptionalDefOperand
/// ValueType Type = OtherVT;
/// string PrintMethod = "printSBitModifierOperand";
/// string AsmOperandLowerMethod = ?;
@@ -765,7 +765,7 @@ static bool DisassembleBrFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
|| Opcode == ARM::SMC || Opcode == ARM::SVC) &&
"Unexpected Opcode");
- assert(NumOps >= 1 && OpInfo[0].RegClass == 0 && "Reg operand expected");
+ assert(NumOps >= 1 && OpInfo[0].RegClass < 0 && "Reg operand expected");
int Imm32 = 0;
if (Opcode == ARM::SMC) {
@@ -1106,7 +1106,7 @@ static bool DisassembleDPSoRegFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
assert((OpInfo[OpIdx].RegClass == ARM::GPRRegClassID) &&
(OpInfo[OpIdx+1].RegClass == ARM::GPRRegClassID) &&
- (OpInfo[OpIdx+2].RegClass == 0) &&
+ (OpInfo[OpIdx+2].RegClass < 0) &&
"Expect 3 reg operands");
// Register-controlled shifts have Inst{7} = 0 and Inst{4} = 1.
@@ -1201,7 +1201,7 @@ static bool DisassembleLdStFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
return false;
assert((OpInfo[OpIdx].RegClass == ARM::GPRRegClassID) &&
- (OpInfo[OpIdx+1].RegClass == 0) &&
+ (OpInfo[OpIdx+1].RegClass < 0) &&
"Expect 1 reg operand followed by 1 imm operand");
ARM_AM::AddrOpc AddrOpcode = getUBit(insn) ? ARM_AM::add : ARM_AM::sub;
@@ -1323,7 +1323,7 @@ static bool DisassembleLdStMiscFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
return false;
assert((OpInfo[OpIdx].RegClass == ARM::GPRRegClassID) &&
- (OpInfo[OpIdx+1].RegClass == 0) &&
+ (OpInfo[OpIdx+1].RegClass < 0) &&
"Expect 1 reg operand followed by 1 imm operand");
ARM_AM::AddrOpc AddrOpcode = getUBit(insn) ? ARM_AM::add : ARM_AM::sub;
@@ -1494,7 +1494,7 @@ static bool DisassembleArithMiscFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
// If there is still an operand info left which is an immediate operand, add
// an additional imm5 LSL/ASR operand.
- if (ThreeReg && OpInfo[OpIdx].RegClass == 0
+ if (ThreeReg && OpInfo[OpIdx].RegClass < 0
&& !OpInfo[OpIdx].isPredicate() && !OpInfo[OpIdx].isOptionalDef()) {
// Extract the 5-bit immediate field Inst{11-7}.
unsigned ShiftAmt = (insn >> ARMII::ShiftShift) & 0x1F;
@@ -1540,7 +1540,7 @@ static bool DisassembleExtFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
// If there is still an operand info left which is an immediate operand, add
// an additional rotate immediate operand.
- if (OpIdx < NumOps && OpInfo[OpIdx].RegClass == 0
+ if (OpIdx < NumOps && OpInfo[OpIdx].RegClass < 0
&& !OpInfo[OpIdx].isPredicate() && !OpInfo[OpIdx].isOptionalDef()) {
// Extract the 2-bit rotate field Inst{11-10}.
unsigned rot = (insn >> ARMII::ExtRotImmShift) & 3;
@@ -1725,7 +1725,7 @@ static bool DisassembleVFPConv1Frm(MCInst &MI, unsigned Opcode, uint32_t insn,
"Tied to operand expected");
MI.addOperand(MI.getOperand(0));
- assert(OpInfo[2].RegClass == 0 && !OpInfo[2].isPredicate() &&
+ assert(OpInfo[2].RegClass < 0 && !OpInfo[2].isPredicate() &&
!OpInfo[2].isOptionalDef() && "Imm operand expected");
MI.addOperand(MCOperand::CreateImm(fbits));
@@ -1984,7 +1984,7 @@ static bool DisassembleVFPMiscFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
++OpIdx;
// Extract/decode the f64/f32 immediate.
- if (OpIdx < NumOps && OpInfo[OpIdx].RegClass == 0
+ if (OpIdx < NumOps && OpInfo[OpIdx].RegClass < 0
&& !OpInfo[OpIdx].isPredicate() && !OpInfo[OpIdx].isOptionalDef()) {
// The asm syntax specifies the before-expanded <imm>.
// Not VFPExpandImm(slice(insn,19,16) << 4 | slice(insn, 3, 0),
@@ -2077,42 +2077,12 @@ static unsigned decodeLaneIndex(uint32_t insn) {
// imm3 = Inst{18-16}, imm4 = Inst{3-0}
// Ref: Table A7-15 Modified immediate values for Advanced SIMD instructions.
static uint64_t decodeN1VImm(uint32_t insn, ElemSize esize) {
+ unsigned char op = (insn >> 5) & 1;
unsigned char cmode = (insn >> 8) & 0xF;
unsigned char Imm8 = ((insn >> 24) & 1) << 7 |
((insn >> 16) & 7) << 4 |
(insn & 0xF);
- uint64_t Imm64 = 0;
-
- switch (esize) {
- case ESize8:
- Imm64 = Imm8;
- break;
- case ESize16:
- Imm64 = Imm8 << 8*(cmode >> 1 & 1);
- break;
- case ESize32: {
- if (cmode == 12)
- Imm64 = (Imm8 << 8) | 0xFF;
- else if (cmode == 13)
- Imm64 = (Imm8 << 16) | 0xFFFF;
- else {
- // Imm8 to be shifted left by how many bytes...
- Imm64 = Imm8 << 8*(cmode >> 1 & 3);
- }
- break;
- }
- case ESize64: {
- for (unsigned i = 0; i < 8; ++i)
- if ((Imm8 >> i) & 1)
- Imm64 |= (uint64_t)0xFF << 8*i;
- break;
- }
- default:
- assert(0 && "Unreachable code!");
- return 0;
- }
-
- return Imm64;
+ return (op << 12) | (cmode << 8) | Imm8;
}
// A8.6.339 VMUL, VMULL (by scalar)
@@ -2303,7 +2273,7 @@ static bool DisassembleNLdSt0(MCInst &MI, unsigned Opcode, uint32_t insn,
}
assert((OpIdx+1) < NumOps && OpInfo[OpIdx].RegClass == ARM::GPRRegClassID &&
- OpInfo[OpIdx + 1].RegClass == 0 && "Addrmode #6 Operands expected");
+ OpInfo[OpIdx + 1].RegClass < 0 && "Addrmode #6 Operands expected");
MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
Rn)));
MI.addOperand(MCOperand::CreateImm(0)); // Alignment ignored?
@@ -2320,7 +2290,7 @@ static bool DisassembleNLdSt0(MCInst &MI, unsigned Opcode, uint32_t insn,
"Reg operand expected");
RegClass = OpInfo[OpIdx].RegClass;
- while (OpIdx < NumOps && OpInfo[OpIdx].RegClass == RegClass) {
+ while (OpIdx < NumOps && (unsigned)OpInfo[OpIdx].RegClass == RegClass) {
MI.addOperand(MCOperand::CreateReg(
getRegisterEnum(B, RegClass, Rd,
UseDRegPair(Opcode))));
@@ -2329,7 +2299,7 @@ static bool DisassembleNLdSt0(MCInst &MI, unsigned Opcode, uint32_t insn,
}
// Handle possible lane index.
- if (OpIdx < NumOps && OpInfo[OpIdx].RegClass == 0
+ if (OpIdx < NumOps && OpInfo[OpIdx].RegClass < 0
&& !OpInfo[OpIdx].isPredicate() && !OpInfo[OpIdx].isOptionalDef()) {
MI.addOperand(MCOperand::CreateImm(decodeLaneIndex(insn)));
++OpIdx;
@@ -2340,7 +2310,7 @@ static bool DisassembleNLdSt0(MCInst &MI, unsigned Opcode, uint32_t insn,
// possible TIED_TO DPR/QPR's (ignored), then possible lane index.
RegClass = OpInfo[0].RegClass;
- while (OpIdx < NumOps && OpInfo[OpIdx].RegClass == RegClass) {
+ while (OpIdx < NumOps && (unsigned)OpInfo[OpIdx].RegClass == RegClass) {
MI.addOperand(MCOperand::CreateReg(
getRegisterEnum(B, RegClass, Rd,
UseDRegPair(Opcode))));
@@ -2355,7 +2325,7 @@ static bool DisassembleNLdSt0(MCInst &MI, unsigned Opcode, uint32_t insn,
}
assert((OpIdx+1) < NumOps && OpInfo[OpIdx].RegClass == ARM::GPRRegClassID &&
- OpInfo[OpIdx + 1].RegClass == 0 && "Addrmode #6 Operands expected");
+ OpInfo[OpIdx + 1].RegClass < 0 && "Addrmode #6 Operands expected");
MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
Rn)));
MI.addOperand(MCOperand::CreateImm(0)); // Alignment ignored?
@@ -2366,7 +2336,7 @@ static bool DisassembleNLdSt0(MCInst &MI, unsigned Opcode, uint32_t insn,
++OpIdx;
}
- while (OpIdx < NumOps && OpInfo[OpIdx].RegClass == RegClass) {
+ while (OpIdx < NumOps && (unsigned)OpInfo[OpIdx].RegClass == RegClass) {
assert(TID.getOperandConstraint(OpIdx, TOI::TIED_TO) != -1 &&
"Tied to operand expected");
MI.addOperand(MCOperand::CreateReg(0));
@@ -2374,7 +2344,7 @@ static bool DisassembleNLdSt0(MCInst &MI, unsigned Opcode, uint32_t insn,
}
// Handle possible lane index.
- if (OpIdx < NumOps && OpInfo[OpIdx].RegClass == 0
+ if (OpIdx < NumOps && OpInfo[OpIdx].RegClass < 0
&& !OpInfo[OpIdx].isPredicate() && !OpInfo[OpIdx].isOptionalDef()) {
MI.addOperand(MCOperand::CreateImm(decodeLaneIndex(insn)));
++OpIdx;
@@ -2438,7 +2408,7 @@ static bool DisassembleN1RegModImmFrm(MCInst &MI, unsigned Opcode,
assert(NumOps >= 2 &&
(OpInfo[0].RegClass == ARM::DPRRegClassID ||
OpInfo[0].RegClass == ARM::QPRRegClassID) &&
- (OpInfo[1].RegClass == 0) &&
+ (OpInfo[1].RegClass < 0) &&
"Expect 1 reg operand followed by 1 imm operand");
// Qd/Dd = Inst{22:15-12} => NEON Rd
@@ -2552,7 +2522,7 @@ static bool DisassembleNVdVmOptImm(MCInst &MI, unsigned Opc, uint32_t insn,
}
// Add the imm operand, if required.
- if (OpIdx < NumOps && OpInfo[OpIdx].RegClass == 0
+ if (OpIdx < NumOps && OpInfo[OpIdx].RegClass < 0
&& !OpInfo[OpIdx].isPredicate() && !OpInfo[OpIdx].isOptionalDef()) {
unsigned imm = 0xFFFFFFFF;
@@ -2632,7 +2602,7 @@ static bool DisassembleNVectorShift(MCInst &MI, unsigned Opcode, uint32_t insn,
decodeNEONRm(insn))));
++OpIdx;
- assert(OpInfo[OpIdx].RegClass == 0 && "Imm operand expected");
+ assert(OpInfo[OpIdx].RegClass < 0 && "Imm operand expected");
// Add the imm operand.
@@ -2762,7 +2732,7 @@ static bool DisassembleNVdVnVmOptImm(MCInst &MI, unsigned Opcode, uint32_t insn,
getRegisterEnum(B, OpInfo[OpIdx].RegClass, m)));
++OpIdx;
- if (OpIdx < NumOps && OpInfo[OpIdx].RegClass == 0
+ if (OpIdx < NumOps && OpInfo[OpIdx].RegClass < 0
&& !OpInfo[OpIdx].isPredicate() && !OpInfo[OpIdx].isOptionalDef()) {
// Add the imm operand.
unsigned Imm = 0;
@@ -2869,15 +2839,9 @@ static bool DisassembleNVTBLFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
return true;
}
-static bool DisassembleNEONFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
- unsigned short NumOps, unsigned &NumOpsAdded, BO) {
- assert(0 && "Unreachable code!");
- return false;
-}
-
// Vector Get Lane (move scalar to ARM core register) Instructions.
// VGETLNi32, VGETLNs16, VGETLNs8, VGETLNu16, VGETLNu8: Rt Dn index
-static bool DisassembleNEONGetLnFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
+static bool DisassembleNGetLnFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
const TargetInstrDesc &TID = ARMInsts[Opcode];
@@ -2887,7 +2851,7 @@ static bool DisassembleNEONGetLnFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
assert(TID.getNumDefs() == 1 && NumOps >= 3 &&
OpInfo[0].RegClass == ARM::GPRRegClassID &&
OpInfo[1].RegClass == ARM::DPRRegClassID &&
- OpInfo[2].RegClass == 0 &&
+ OpInfo[2].RegClass < 0 &&
"Expect >= 3 operands with one dst operand");
ElemSize esize =
@@ -2911,7 +2875,7 @@ static bool DisassembleNEONGetLnFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
// Vector Set Lane (move ARM core register to scalar) Instructions.
// VSETLNi16, VSETLNi32, VSETLNi8: Dd Dd (TIED_TO) Rt index
-static bool DisassembleNEONSetLnFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
+static bool DisassembleNSetLnFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
const TargetInstrDesc &TID = ARMInsts[Opcode];
@@ -2923,7 +2887,7 @@ static bool DisassembleNEONSetLnFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
OpInfo[1].RegClass == ARM::DPRRegClassID &&
TID.getOperandConstraint(1, TOI::TIED_TO) != -1 &&
OpInfo[2].RegClass == ARM::GPRRegClassID &&
- OpInfo[3].RegClass == 0 &&
+ OpInfo[3].RegClass < 0 &&
"Expect >= 3 operands with one dst operand");
ElemSize esize =
@@ -2950,7 +2914,7 @@ static bool DisassembleNEONSetLnFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
// Vector Duplicate Instructions (from ARM core register to all elements).
// VDUP8d, VDUP16d, VDUP32d, VDUP8q, VDUP16q, VDUP32q: Qd/Dd Rt
-static bool DisassembleNEONDupFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
+static bool DisassembleNDupFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
const TargetOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo;
@@ -3090,13 +3054,6 @@ static bool DisassembleMiscFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
return false;
}
-static bool DisassembleThumbMiscFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
- unsigned short NumOps, unsigned &NumOpsAdded, BO) {
-
- assert(0 && "Unexpected thumb misc. instruction!");
- return false;
-}
-
/// FuncPtrs - FuncPtrs maps ARMFormat to its corresponding DisassembleFP.
/// We divide the disassembly task into different categories, with each one
/// corresponding to a specific instruction encoding format. There could be
@@ -3128,12 +3085,10 @@ static const DisassembleFP FuncPtrs[] = {
&DisassembleVFPLdStMulFrm,
&DisassembleVFPMiscFrm,
&DisassembleThumbFrm,
- &DisassembleNEONFrm,
- &DisassembleNEONGetLnFrm,
- &DisassembleNEONSetLnFrm,
- &DisassembleNEONDupFrm,
&DisassembleMiscFrm,
- &DisassembleThumbMiscFrm,
+ &DisassembleNGetLnFrm,
+ &DisassembleNSetLnFrm,
+ &DisassembleNDupFrm,
// VLD and VST (including one lane) Instructions.
&DisassembleNLdSt,
@@ -3233,7 +3188,8 @@ bool ARMBasicMCBuilder::DoPredicateOperands(MCInst& MI, unsigned Opcode,
// a pair of TargetOperandInfos with isPredicate() property.
if (NumOpsRemaining >= 2 &&
OpInfo[Idx].isPredicate() && OpInfo[Idx+1].isPredicate() &&
- OpInfo[Idx].RegClass == 0 && OpInfo[Idx+1].RegClass == ARM::CCRRegClassID)
+ OpInfo[Idx].RegClass < 0 &&
+ OpInfo[Idx+1].RegClass == ARM::CCRRegClassID)
{
// If we are inside an IT block, get the IT condition bits maintained via
// ARMBasicMCBuilder::ITState[7:0], through ARMBasicMCBuilder::GetITCond().
@@ -3265,7 +3221,8 @@ bool ARMBasicMCBuilder::TryPredicateAndSBitModifier(MCInst& MI, unsigned Opcode,
// a pair of TargetOperandInfos with isPredicate() property.
if (NumOpsRemaining >= 2 &&
OpInfo[Idx].isPredicate() && OpInfo[Idx+1].isPredicate() &&
- OpInfo[Idx].RegClass == 0 && OpInfo[Idx+1].RegClass == ARM::CCRRegClassID)
+ OpInfo[Idx].RegClass < 0 &&
+ OpInfo[Idx+1].RegClass == ARM::CCRRegClassID)
{
// If we are inside an IT block, get the IT condition bits maintained via
// ARMBasicMCBuilder::ITState[7:0], through ARMBasicMCBuilder::GetITCond().
diff --git a/lib/Target/ARM/Disassembler/ARMDisassemblerCore.h b/lib/Target/ARM/Disassembler/ARMDisassemblerCore.h
index b1d90df34177..7d21256a14f9 100644
--- a/lib/Target/ARM/Disassembler/ARMDisassemblerCore.h
+++ b/lib/Target/ARM/Disassembler/ARMDisassemblerCore.h
@@ -137,25 +137,25 @@ static inline void setSlice(uint32_t &Bits, unsigned From, unsigned To,
/// Various utilities for checking the target specific flags.
/// A unary data processing instruction doesn't have an Rn operand.
-static inline bool isUnaryDP(unsigned TSFlags) {
+static inline bool isUnaryDP(uint64_t TSFlags) {
return (TSFlags & ARMII::UnaryDP);
}
/// This four-bit field describes the addressing mode used.
/// See also ARMBaseInstrInfo.h.
-static inline unsigned getAddrMode(unsigned TSFlags) {
+static inline unsigned getAddrMode(uint64_t TSFlags) {
return (TSFlags & ARMII::AddrModeMask);
}
/// {IndexModePre, IndexModePost}
/// Only valid for load and store ops.
/// See also ARMBaseInstrInfo.h.
-static inline unsigned getIndexMode(unsigned TSFlags) {
+static inline unsigned getIndexMode(uint64_t TSFlags) {
return (TSFlags & ARMII::IndexModeMask) >> ARMII::IndexModeShift;
}
/// Pre-/post-indexed operations define an extra $base_wb in the OutOperandList.
-static inline bool isPrePostLdSt(unsigned TSFlags) {
+static inline bool isPrePostLdSt(uint64_t TSFlags) {
return (TSFlags & ARMII::IndexModeMask) != 0;
}
diff --git a/lib/Target/ARM/Disassembler/ThumbDisassemblerCore.h b/lib/Target/ARM/Disassembler/ThumbDisassemblerCore.h
index 4b2e308248c5..4b7a0bf6fdb9 100644
--- a/lib/Target/ARM/Disassembler/ThumbDisassemblerCore.h
+++ b/lib/Target/ARM/Disassembler/ThumbDisassemblerCore.h
@@ -395,7 +395,7 @@ static bool DisassembleThumb1General(MCInst &MI, unsigned Opcode, uint32_t insn,
MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::tGPRRegClassID,
getT1tRm(insn))));
} else {
- assert(OpInfo[OpIdx].RegClass == 0 &&
+ assert(OpInfo[OpIdx].RegClass < 0 &&
!OpInfo[OpIdx].isPredicate() && !OpInfo[OpIdx].isOptionalDef()
&& "Pure imm operand expected");
MI.addOperand(MCOperand::CreateImm(UseRt ? getT1Imm8(insn)
@@ -531,7 +531,7 @@ static bool DisassembleThumb1LdPC(MCInst &MI, unsigned Opcode, uint32_t insn,
if (!OpInfo) return false;
assert(NumOps >= 2 && OpInfo[0].RegClass == ARM::tGPRRegClassID &&
- (OpInfo[1].RegClass == 0 &&
+ (OpInfo[1].RegClass < 0 &&
!OpInfo[1].isPredicate() &&
!OpInfo[1].isOptionalDef())
&& "Invalid arguments");
@@ -598,7 +598,7 @@ static bool DisassembleThumb1LdSt(unsigned opA, MCInst &MI, unsigned Opcode,
assert(OpIdx < NumOps && "More operands expected");
- if (OpInfo[OpIdx].RegClass == 0 && !OpInfo[OpIdx].isPredicate() &&
+ if (OpInfo[OpIdx].RegClass < 0 && !OpInfo[OpIdx].isPredicate() &&
!OpInfo[OpIdx].isOptionalDef()) {
MI.addOperand(MCOperand::CreateImm(Imm5 ? getT1Imm5(insn) : 0));
@@ -632,7 +632,7 @@ static bool DisassembleThumb1LdStSP(MCInst &MI, unsigned Opcode, uint32_t insn,
assert(NumOps >= 3 &&
OpInfo[0].RegClass == ARM::tGPRRegClassID &&
OpInfo[1].RegClass == ARM::GPRRegClassID &&
- (OpInfo[2].RegClass == 0 &&
+ (OpInfo[2].RegClass < 0 &&
!OpInfo[2].isPredicate() &&
!OpInfo[2].isOptionalDef())
&& "Invalid arguments");
@@ -658,7 +658,7 @@ static bool DisassembleThumb1AddPCi(MCInst &MI, unsigned Opcode, uint32_t insn,
if (!OpInfo) return false;
assert(NumOps >= 2 && OpInfo[0].RegClass == ARM::tGPRRegClassID &&
- (OpInfo[1].RegClass == 0 &&
+ (OpInfo[1].RegClass < 0 &&
!OpInfo[1].isPredicate() &&
!OpInfo[1].isOptionalDef())
&& "Invalid arguments");
@@ -685,7 +685,7 @@ static bool DisassembleThumb1AddSPi(MCInst &MI, unsigned Opcode, uint32_t insn,
assert(NumOps >= 3 &&
OpInfo[0].RegClass == ARM::tGPRRegClassID &&
OpInfo[1].RegClass == ARM::GPRRegClassID &&
- (OpInfo[2].RegClass == 0 &&
+ (OpInfo[2].RegClass < 0 &&
!OpInfo[2].isPredicate() &&
!OpInfo[2].isOptionalDef())
&& "Invalid arguments");
@@ -761,7 +761,7 @@ static bool DisassembleThumb1Misc(MCInst &MI, unsigned Opcode, uint32_t insn,
// Predicate operands are handled elsewhere.
if (NumOps == 2 &&
OpInfo[0].isPredicate() && OpInfo[1].isPredicate() &&
- OpInfo[0].RegClass == 0 && OpInfo[1].RegClass == ARM::CCRRegClassID) {
+ OpInfo[0].RegClass < 0 && OpInfo[1].RegClass == ARM::CCRRegClassID) {
return true;
}
@@ -808,7 +808,7 @@ static bool DisassembleThumb1Misc(MCInst &MI, unsigned Opcode, uint32_t insn,
}
assert(NumOps >= 2 && OpInfo[0].RegClass == ARM::tGPRRegClassID &&
- (OpInfo[1].RegClass==0 || OpInfo[1].RegClass==ARM::tGPRRegClassID)
+ (OpInfo[1].RegClass < 0 || OpInfo[1].RegClass==ARM::tGPRRegClassID)
&& "Expect >=2 operands");
// Add the destination operand.
@@ -913,7 +913,7 @@ static bool DisassembleThumb1CondBr(MCInst &MI, unsigned Opcode, uint32_t insn,
const TargetOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo;
if (!OpInfo) return false;
- assert(NumOps == 3 && OpInfo[0].RegClass == 0 &&
+ assert(NumOps == 3 && OpInfo[0].RegClass < 0 &&
OpInfo[1].isPredicate() && OpInfo[2].RegClass == ARM::CCRRegClassID
&& "Exactly 3 operands expected");
@@ -939,7 +939,7 @@ static bool DisassembleThumb1Br(MCInst &MI, unsigned Opcode, uint32_t insn,
const TargetOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo;
if (!OpInfo) return false;
- assert(NumOps == 1 && OpInfo[0].RegClass == 0 && "1 imm operand expected");
+ assert(NumOps == 1 && OpInfo[0].RegClass < 0 && "1 imm operand expected");
unsigned Imm11 = getT1Imm11(insn);
@@ -1239,7 +1239,7 @@ static bool DisassembleThumb2LdStDual(MCInst &MI, unsigned Opcode,
&& OpInfo[0].RegClass == ARM::GPRRegClassID
&& OpInfo[1].RegClass == ARM::GPRRegClassID
&& OpInfo[2].RegClass == ARM::GPRRegClassID
- && OpInfo[3].RegClass == 0
+ && OpInfo[3].RegClass < 0
&& "Expect >= 4 operands and first 3 as reg operands");
// Add the <Rt> <Rt2> operands.
@@ -1322,8 +1322,8 @@ static bool DisassembleThumb2DPSoReg(MCInst &MI, unsigned Opcode, uint32_t insn,
assert(NumOps == 4
&& OpInfo[0].RegClass == ARM::GPRRegClassID
&& OpInfo[1].RegClass == ARM::GPRRegClassID
- && OpInfo[2].RegClass == 0
- && OpInfo[3].RegClass == 0
+ && OpInfo[2].RegClass < 0
+ && OpInfo[3].RegClass < 0
&& "Exactlt 4 operands expect and first two as reg operands");
// Only need to populate the src reg operand.
MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
@@ -1375,7 +1375,7 @@ static bool DisassembleThumb2DPSoReg(MCInst &MI, unsigned Opcode, uint32_t insn,
if (NumOps == OpIdx)
return true;
- if (OpInfo[OpIdx].RegClass == 0 && !OpInfo[OpIdx].isPredicate()
+ if (OpInfo[OpIdx].RegClass < 0 && !OpInfo[OpIdx].isPredicate()
&& !OpInfo[OpIdx].isOptionalDef()) {
if (Thumb2ShiftOpcode(Opcode))
@@ -1440,7 +1440,7 @@ static bool DisassembleThumb2DPModImm(MCInst &MI, unsigned Opcode,
}
// The modified immediate operand should come next.
- assert(OpIdx < NumOps && OpInfo[OpIdx].RegClass == 0 &&
+ assert(OpIdx < NumOps && OpInfo[OpIdx].RegClass < 0 &&
!OpInfo[OpIdx].isPredicate() && !OpInfo[OpIdx].isOptionalDef()
&& "Pure imm operand expected");
@@ -1555,7 +1555,7 @@ static bool DisassembleThumb2DPBinImm(MCInst &MI, unsigned Opcode,
++OpIdx;
}
- assert(OpInfo[OpIdx].RegClass == 0 && !OpInfo[OpIdx].isPredicate()
+ assert(OpInfo[OpIdx].RegClass < 0 && !OpInfo[OpIdx].isPredicate()
&& !OpInfo[OpIdx].isOptionalDef()
&& "Pure imm operand expected");
@@ -1772,7 +1772,7 @@ static bool DisassembleThumb2PreLoad(MCInst &MI, unsigned Opcode, uint32_t insn,
MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
decodeRm(insn))));
} else {
- assert(OpInfo[OpIdx].RegClass == 0 && !OpInfo[OpIdx].isPredicate()
+ assert(OpInfo[OpIdx].RegClass < 0 && !OpInfo[OpIdx].isPredicate()
&& !OpInfo[OpIdx].isOptionalDef()
&& "Pure imm operand expected");
int Offset = 0;
@@ -1792,7 +1792,7 @@ static bool DisassembleThumb2PreLoad(MCInst &MI, unsigned Opcode, uint32_t insn,
}
++OpIdx;
- if (OpIdx < NumOps && OpInfo[OpIdx].RegClass == 0 &&
+ if (OpIdx < NumOps && OpInfo[OpIdx].RegClass < 0 &&
!OpInfo[OpIdx].isPredicate() && !OpInfo[OpIdx].isOptionalDef()) {
// Fills in the shift amount for t2PLDs, t2PLDWs, t2PLIs.
MI.addOperand(MCOperand::CreateImm(slice(insn, 5, 4)));
@@ -1818,7 +1818,7 @@ static bool DisassembleThumb2Ldpci(MCInst &MI, unsigned Opcode,
assert(NumOps >= 2 &&
OpInfo[0].RegClass == ARM::GPRRegClassID &&
- OpInfo[1].RegClass == 0 &&
+ OpInfo[1].RegClass < 0 &&
"Expect >= 2 operands, first as reg, and second as imm operand");
// Build the register operand, followed by the (+/-)imm12 immediate.
@@ -1930,7 +1930,7 @@ static bool DisassembleThumb2LdSt(bool Load, MCInst &MI, unsigned Opcode,
++OpIdx;
}
- assert(OpInfo[OpIdx].RegClass == 0 && !OpInfo[OpIdx].isPredicate()
+ assert(OpInfo[OpIdx].RegClass < 0 && !OpInfo[OpIdx].isPredicate()
&& !OpInfo[OpIdx].isOptionalDef()
&& "Pure imm operand expected");
@@ -1981,7 +1981,7 @@ static bool DisassembleThumb2DPReg(MCInst &MI, unsigned Opcode, uint32_t insn,
decodeRm(insn))));
++OpIdx;
- if (OpIdx < NumOps && OpInfo[OpIdx].RegClass == 0
+ if (OpIdx < NumOps && OpInfo[OpIdx].RegClass < 0
&& !OpInfo[OpIdx].isPredicate() && !OpInfo[OpIdx].isOptionalDef()) {
// Add the rotation amount immediate.
MI.addOperand(MCOperand::CreateImm(decodeRotate(insn)));
diff --git a/lib/Target/ARM/NEONMoveFix.cpp b/lib/Target/ARM/NEONMoveFix.cpp
index 0a4400cc7ddd..bbdd3c7f7c3e 100644
--- a/lib/Target/ARM/NEONMoveFix.cpp
+++ b/lib/Target/ARM/NEONMoveFix.cpp
@@ -105,8 +105,8 @@ bool NEONMoveFixPass::InsertMoves(MachineBasicBlock &MBB) {
unsigned MOReg = MO.getReg();
Defs[MOReg] = MI;
- // Catch subregs as well.
- for (const unsigned *R = TRI->getSubRegisters(MOReg); *R; ++R)
+ // Catch aliases as well.
+ for (const unsigned *R = TRI->getAliasSet(MOReg); *R; ++R)
Defs[*R] = MI;
}
}
diff --git a/lib/Target/ARM/NEONPreAllocPass.cpp b/lib/Target/ARM/NEONPreAllocPass.cpp
index a7258985e104..f67717cdd56f 100644
--- a/lib/Target/ARM/NEONPreAllocPass.cpp
+++ b/lib/Target/ARM/NEONPreAllocPass.cpp
@@ -407,7 +407,7 @@ NEONPreAllocPass::FormsRegSequence(MachineInstr *MI,
"expected a virtual register");
// Extracting from a Q or QQ register.
MachineInstr *DefMI = MRI->getVRegDef(VirtReg);
- if (!DefMI || !DefMI->isExtractSubreg())
+ if (!DefMI || !DefMI->isCopy() || !DefMI->getOperand(1).getSubReg())
return false;
VirtReg = DefMI->getOperand(1).getReg();
if (LastSrcReg && LastSrcReg != VirtReg)
@@ -418,7 +418,7 @@ NEONPreAllocPass::FormsRegSequence(MachineInstr *MI,
RC != ARM::QQPRRegisterClass &&
RC != ARM::QQQQPRRegisterClass)
return false;
- unsigned SubIdx = DefMI->getOperand(2).getImm();
+ unsigned SubIdx = DefMI->getOperand(1).getSubReg();
if (LastSubIdx) {
if (LastSubIdx != SubIdx-Stride)
return false;
@@ -434,22 +434,21 @@ NEONPreAllocPass::FormsRegSequence(MachineInstr *MI,
// FIXME: Update the uses of EXTRACT_SUBREG from REG_SEQUENCE is
// currently required for correctness. e.g.
- // %reg1041;<def> = REG_SEQUENCE %reg1040<kill>, 5, %reg1035<kill>, 6
+ // %reg1041<def> = REG_SEQUENCE %reg1040<kill>, 5, %reg1035<kill>, 6
// %reg1042<def> = EXTRACT_SUBREG %reg1041, 6
// %reg1043<def> = EXTRACT_SUBREG %reg1041, 5
// VST1q16 %reg1025<kill>, 0, %reg1043<kill>, %reg1042<kill>,
- // reg1025 and reg1043 should be replaced with reg1041:6 and reg1041:5
+ // reg1042 and reg1043 should be replaced with reg1041:6 and reg1041:5
// respectively.
// We need to change how we model uses of REG_SEQUENCE.
for (unsigned R = 0; R < NumRegs; ++R) {
MachineOperand &MO = MI->getOperand(FirstOpnd + R);
unsigned OldReg = MO.getReg();
MachineInstr *DefMI = MRI->getVRegDef(OldReg);
- assert(DefMI->isExtractSubreg());
+ assert(DefMI->isCopy());
MO.setReg(LastSrcReg);
MO.setSubReg(SubIds[R]);
- if (R != 0)
- MO.setIsKill(false);
+ MO.setIsKill(false);
// Delete the EXTRACT_SUBREG if its result is now dead.
if (MRI->use_empty(OldReg))
DefMI->eraseFromParent();
@@ -467,43 +466,9 @@ bool NEONPreAllocPass::PreAllocNEONRegisters(MachineBasicBlock &MBB) {
unsigned FirstOpnd, NumRegs, Offset, Stride;
if (!isNEONMultiRegOp(MI->getOpcode(), FirstOpnd, NumRegs, Offset, Stride))
continue;
- if (llvm::ModelWithRegSequence() &&
- FormsRegSequence(MI, FirstOpnd, NumRegs, Offset, Stride))
+ if (FormsRegSequence(MI, FirstOpnd, NumRegs, Offset, Stride))
continue;
-
- MachineBasicBlock::iterator NextI = llvm::next(MBBI);
- for (unsigned R = 0; R < NumRegs; ++R) {
- MachineOperand &MO = MI->getOperand(FirstOpnd + R);
- assert(MO.isReg() && MO.getSubReg() == 0 && "unexpected operand");
- unsigned VirtReg = MO.getReg();
- assert(TargetRegisterInfo::isVirtualRegister(VirtReg) &&
- "expected a virtual register");
-
- // For now, just assign a fixed set of adjacent registers.
- // This leaves plenty of room for future improvements.
- static const unsigned NEONDRegs[] = {
- ARM::D0, ARM::D1, ARM::D2, ARM::D3,
- ARM::D4, ARM::D5, ARM::D6, ARM::D7
- };
- MO.setReg(NEONDRegs[Offset + R * Stride]);
-
- if (MO.isUse()) {
- // Insert a copy from VirtReg.
- TII->copyRegToReg(MBB, MBBI, MO.getReg(), VirtReg,
- ARM::DPRRegisterClass, ARM::DPRRegisterClass,
- DebugLoc());
- if (MO.isKill()) {
- MachineInstr *CopyMI = prior(MBBI);
- CopyMI->findRegisterUseOperand(VirtReg)->setIsKill();
- }
- MO.setIsKill();
- } else if (MO.isDef() && !MO.isDead()) {
- // Add a copy to VirtReg.
- TII->copyRegToReg(MBB, NextI, VirtReg, MO.getReg(),
- ARM::DPRRegisterClass, ARM::DPRRegisterClass,
- DebugLoc());
- }
- }
+ llvm_unreachable("expected a REG_SEQUENCE");
}
return Modified;
diff --git a/lib/Target/ARM/Thumb1InstrInfo.cpp b/lib/Target/ARM/Thumb1InstrInfo.cpp
index fae84d4ee022..af630ac797c5 100644
--- a/lib/Target/ARM/Thumb1InstrInfo.cpp
+++ b/lib/Target/ARM/Thumb1InstrInfo.cpp
@@ -33,64 +33,24 @@ unsigned Thumb1InstrInfo::getUnindexedOpcode(unsigned Opc) const {
return 0;
}
-bool Thumb1InstrInfo::copyRegToReg(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator I,
- unsigned DestReg, unsigned SrcReg,
- const TargetRegisterClass *DestRC,
- const TargetRegisterClass *SrcRC,
- DebugLoc DL) const {
- if (DestRC == ARM::GPRRegisterClass) {
- if (SrcRC == ARM::GPRRegisterClass) {
- BuildMI(MBB, I, DL, get(ARM::tMOVgpr2gpr), DestReg).addReg(SrcReg);
- return true;
- } else if (SrcRC == ARM::tGPRRegisterClass) {
- BuildMI(MBB, I, DL, get(ARM::tMOVtgpr2gpr), DestReg).addReg(SrcReg);
- return true;
- }
- } else if (DestRC == ARM::tGPRRegisterClass) {
- if (SrcRC == ARM::GPRRegisterClass) {
- BuildMI(MBB, I, DL, get(ARM::tMOVgpr2tgpr), DestReg).addReg(SrcReg);
- return true;
- } else if (SrcRC == ARM::tGPRRegisterClass) {
- BuildMI(MBB, I, DL, get(ARM::tMOVr), DestReg).addReg(SrcReg);
- return true;
- }
- }
-
- return false;
-}
-
-bool Thumb1InstrInfo::
-canFoldMemoryOperand(const MachineInstr *MI,
- const SmallVectorImpl<unsigned> &Ops) const {
- if (Ops.size() != 1) return false;
-
- unsigned OpNum = Ops[0];
- unsigned Opc = MI->getOpcode();
- switch (Opc) {
- default: break;
- case ARM::tMOVr:
- case ARM::tMOVtgpr2gpr:
- case ARM::tMOVgpr2tgpr:
- case ARM::tMOVgpr2gpr: {
- if (OpNum == 0) { // move -> store
- unsigned SrcReg = MI->getOperand(1).getReg();
- if (TargetRegisterInfo::isPhysicalRegister(SrcReg) &&
- !isARMLowRegister(SrcReg))
- // tSpill cannot take a high register operand.
- return false;
- } else { // move -> load
- unsigned DstReg = MI->getOperand(0).getReg();
- if (TargetRegisterInfo::isPhysicalRegister(DstReg) &&
- !isARMLowRegister(DstReg))
- // tRestore cannot target a high register operand.
- return false;
- }
- return true;
- }
- }
-
- return false;
+void Thumb1InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I, DebugLoc DL,
+ unsigned DestReg, unsigned SrcReg,
+ bool KillSrc) const {
+ bool tDest = ARM::tGPRRegClass.contains(DestReg);
+ bool tSrc = ARM::tGPRRegClass.contains(SrcReg);
+ unsigned Opc = ARM::tMOVgpr2gpr;
+ if (tDest && tSrc)
+ Opc = ARM::tMOVr;
+ else if (tSrc)
+ Opc = ARM::tMOVtgpr2gpr;
+ else if (tDest)
+ Opc = ARM::tMOVgpr2tgpr;
+
+ BuildMI(MBB, I, DL, get(Opc), DestReg)
+ .addReg(SrcReg, getKillRegState(KillSrc));
+ assert(ARM::GPRRegClass.contains(DestReg, SrcReg) &&
+ "Thumb1 can only copy GPR registers");
}
void Thumb1InstrInfo::
@@ -175,10 +135,10 @@ spillCalleeSavedRegisters(MachineBasicBlock &MBB,
isKill = false;
}
- if (isKill) {
+ if (isKill)
MBB.addLiveIn(Reg);
- MIB.addReg(Reg, RegState::Kill);
- }
+
+ MIB.addReg(Reg, getKillRegState(isKill));
}
return true;
}
@@ -221,46 +181,3 @@ restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
return true;
}
-
-MachineInstr *Thumb1InstrInfo::
-foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI,
- const SmallVectorImpl<unsigned> &Ops, int FI) const {
- if (Ops.size() != 1) return NULL;
-
- unsigned OpNum = Ops[0];
- unsigned Opc = MI->getOpcode();
- MachineInstr *NewMI = NULL;
- switch (Opc) {
- default: break;
- case ARM::tMOVr:
- case ARM::tMOVtgpr2gpr:
- case ARM::tMOVgpr2tgpr:
- case ARM::tMOVgpr2gpr: {
- if (OpNum == 0) { // move -> store
- unsigned SrcReg = MI->getOperand(1).getReg();
- bool isKill = MI->getOperand(1).isKill();
- if (TargetRegisterInfo::isPhysicalRegister(SrcReg) &&
- !isARMLowRegister(SrcReg))
- // tSpill cannot take a high register operand.
- break;
- NewMI = AddDefaultPred(BuildMI(MF, MI->getDebugLoc(), get(ARM::tSpill))
- .addReg(SrcReg, getKillRegState(isKill))
- .addFrameIndex(FI).addImm(0));
- } else { // move -> load
- unsigned DstReg = MI->getOperand(0).getReg();
- if (TargetRegisterInfo::isPhysicalRegister(DstReg) &&
- !isARMLowRegister(DstReg))
- // tRestore cannot target a high register operand.
- break;
- bool isDead = MI->getOperand(0).isDead();
- NewMI = AddDefaultPred(BuildMI(MF, MI->getDebugLoc(), get(ARM::tRestore))
- .addReg(DstReg,
- RegState::Define | getDeadRegState(isDead))
- .addFrameIndex(FI).addImm(0));
- }
- break;
- }
- }
-
- return NewMI;
-}
diff --git a/lib/Target/ARM/Thumb1InstrInfo.h b/lib/Target/ARM/Thumb1InstrInfo.h
index c937296bbee1..555135a8b76c 100644
--- a/lib/Target/ARM/Thumb1InstrInfo.h
+++ b/lib/Target/ARM/Thumb1InstrInfo.h
@@ -46,12 +46,10 @@ public:
const std::vector<CalleeSavedInfo> &CSI,
const TargetRegisterInfo *TRI) const;
- bool copyRegToReg(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator I,
- unsigned DestReg, unsigned SrcReg,
- const TargetRegisterClass *DestRC,
- const TargetRegisterClass *SrcRC,
- DebugLoc DL) const;
+ void copyPhysReg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I, DebugLoc DL,
+ unsigned DestReg, unsigned SrcReg,
+ bool KillSrc) const;
void storeRegToStackSlot(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI,
unsigned SrcReg, bool isKill, int FrameIndex,
@@ -64,20 +62,6 @@ public:
const TargetRegisterClass *RC,
const TargetRegisterInfo *TRI) const;
- bool canFoldMemoryOperand(const MachineInstr *MI,
- const SmallVectorImpl<unsigned> &Ops) const;
-
- MachineInstr* foldMemoryOperandImpl(MachineFunction &MF,
- MachineInstr* MI,
- const SmallVectorImpl<unsigned> &Ops,
- int FrameIndex) const;
-
- MachineInstr* foldMemoryOperandImpl(MachineFunction &MF,
- MachineInstr* MI,
- const SmallVectorImpl<unsigned> &Ops,
- MachineInstr* LoadMI) const {
- return 0;
- }
};
}
diff --git a/lib/Target/ARM/Thumb1RegisterInfo.cpp b/lib/Target/ARM/Thumb1RegisterInfo.cpp
index 2f635fe50ad5..39b70b43b23f 100644
--- a/lib/Target/ARM/Thumb1RegisterInfo.cpp
+++ b/lib/Target/ARM/Thumb1RegisterInfo.cpp
@@ -68,21 +68,6 @@ void Thumb1RegisterInfo::emitLoadConstPool(MachineBasicBlock &MBB,
.addConstantPoolIndex(Idx).addImm(Pred).addReg(PredReg);
}
-const TargetRegisterClass*
-Thumb1RegisterInfo::getPhysicalRegisterRegClass(unsigned Reg, EVT VT) const {
- if (isARMLowRegister(Reg))
- return ARM::tGPRRegisterClass;
- switch (Reg) {
- default:
- break;
- case ARM::R8: case ARM::R9: case ARM::R10: case ARM::R11:
- case ARM::R12: case ARM::SP: case ARM::LR: case ARM::PC:
- return ARM::GPRRegisterClass;
- }
-
- return TargetRegisterInfo::getPhysicalRegisterRegClass(Reg, VT);
-}
-
bool Thumb1RegisterInfo::hasReservedCallFrame(MachineFunction &MF) const {
const MachineFrameInfo *FFI = MF.getFrameInfo();
unsigned CFSize = FFI->getMaxCallFrameSize();
@@ -410,6 +395,8 @@ Thumb1RegisterInfo::saveScavengerRegister(MachineBasicBlock &MBB,
// before that instead and adjust the UseMI.
bool done = false;
for (MachineBasicBlock::iterator II = I; !done && II != UseMI ; ++II) {
+ if (II->isDebugValue())
+ continue;
// If this instruction affects R12, adjust our restore point.
for (unsigned i = 0, e = II->getNumOperands(); i != e; ++i) {
const MachineOperand &MO = II->getOperand(i);
diff --git a/lib/Target/ARM/Thumb1RegisterInfo.h b/lib/Target/ARM/Thumb1RegisterInfo.h
index 4eca3673391f..9a0308afa20c 100644
--- a/lib/Target/ARM/Thumb1RegisterInfo.h
+++ b/lib/Target/ARM/Thumb1RegisterInfo.h
@@ -38,9 +38,6 @@ public:
unsigned PredReg = 0) const;
/// Code Generation virtual methods...
- const TargetRegisterClass *
- getPhysicalRegisterRegClass(unsigned Reg, EVT VT = MVT::Other) const;
-
bool hasReservedCallFrame(MachineFunction &MF) const;
void eliminateCallFramePseudoInstr(MachineFunction &MF,
@@ -51,7 +48,8 @@ public:
// could not be handled directly in MI.
int rewriteFrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
unsigned FrameReg, int Offset,
- unsigned MOVOpc, unsigned ADDriOpc, unsigned SUBriOpc) const;
+ unsigned MOVOpc, unsigned ADDriOpc,
+ unsigned SUBriOpc) const;
bool saveScavengerRegister(MachineBasicBlock &MBB,
MachineBasicBlock::iterator I,
diff --git a/lib/Target/ARM/Thumb2HazardRecognizer.cpp b/lib/Target/ARM/Thumb2HazardRecognizer.cpp
new file mode 100644
index 000000000000..172908da228a
--- /dev/null
+++ b/lib/Target/ARM/Thumb2HazardRecognizer.cpp
@@ -0,0 +1,53 @@
+//===-- Thumb2HazardRecognizer.cpp - Thumb2 postra hazard recognizer ------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "ARM.h"
+#include "Thumb2HazardRecognizer.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/ScheduleDAG.h"
+using namespace llvm;
+
+ScheduleHazardRecognizer::HazardType
+Thumb2HazardRecognizer::getHazardType(SUnit *SU) {
+ if (ITBlockSize) {
+ MachineInstr *MI = SU->getInstr();
+ if (!MI->isDebugValue() && MI != ITBlockMIs[ITBlockSize-1])
+ return Hazard;
+ }
+
+ return PostRAHazardRecognizer::getHazardType(SU);
+}
+
+void Thumb2HazardRecognizer::Reset() {
+ ITBlockSize = 0;
+ PostRAHazardRecognizer::Reset();
+}
+
+void Thumb2HazardRecognizer::EmitInstruction(SUnit *SU) {
+ MachineInstr *MI = SU->getInstr();
+ unsigned Opcode = MI->getOpcode();
+ if (ITBlockSize) {
+ --ITBlockSize;
+ } else if (Opcode == ARM::t2IT) {
+ unsigned Mask = MI->getOperand(1).getImm();
+ unsigned NumTZ = CountTrailingZeros_32(Mask);
+ assert(NumTZ <= 3 && "Invalid IT mask!");
+ ITBlockSize = 4 - NumTZ;
+ MachineBasicBlock::iterator I = MI;
+ for (unsigned i = 0; i < ITBlockSize; ++i) {
+ // Advance to the next instruction, skipping any dbg_value instructions.
+ do {
+ ++I;
+ } while (I->isDebugValue());
+ ITBlockMIs[ITBlockSize-1-i] = &*I;
+ }
+ }
+
+ PostRAHazardRecognizer::EmitInstruction(SU);
+}
diff --git a/lib/Target/ARM/Thumb2HazardRecognizer.h b/lib/Target/ARM/Thumb2HazardRecognizer.h
new file mode 100644
index 000000000000..472665862e41
--- /dev/null
+++ b/lib/Target/ARM/Thumb2HazardRecognizer.h
@@ -0,0 +1,40 @@
+//===-- Thumb2HazardRecognizer.h - Thumb2 Hazard Recognizers ----*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines hazard recognizers for scheduling Thumb2 functions on
+// ARM processors.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef THUMB2HAZARDRECOGNIZER_H
+#define THUMB2HAZARDRECOGNIZER_H
+
+#include "llvm/CodeGen/PostRAHazardRecognizer.h"
+
+namespace llvm {
+
+class MachineInstr;
+
+class Thumb2HazardRecognizer : public PostRAHazardRecognizer {
+ unsigned ITBlockSize; // No. of MIs in current IT block yet to be scheduled.
+ MachineInstr *ITBlockMIs[4];
+
+public:
+ Thumb2HazardRecognizer(const InstrItineraryData &ItinData) :
+ PostRAHazardRecognizer(ItinData) {}
+
+ virtual HazardType getHazardType(SUnit *SU);
+ virtual void Reset();
+ virtual void EmitInstruction(SUnit *SU);
+};
+
+
+} // end namespace llvm
+
+#endif // THUMB2HAZARDRECOGNIZER_H
diff --git a/lib/Target/ARM/Thumb2ITBlockPass.cpp b/lib/Target/ARM/Thumb2ITBlockPass.cpp
index f36d4ef7567e..cd15bbed9f23 100644
--- a/lib/Target/ARM/Thumb2ITBlockPass.cpp
+++ b/lib/Target/ARM/Thumb2ITBlockPass.cpp
@@ -14,17 +14,23 @@
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/Statistic.h"
using namespace llvm;
-STATISTIC(NumITs, "Number of IT blocks inserted");
+STATISTIC(NumITs, "Number of IT blocks inserted");
+STATISTIC(NumMovedInsts, "Number of predicated instructions moved");
namespace {
- struct Thumb2ITBlockPass : public MachineFunctionPass {
+ class Thumb2ITBlockPass : public MachineFunctionPass {
+ bool PreRegAlloc;
+
+ public:
static char ID;
Thumb2ITBlockPass() : MachineFunctionPass(&ID) {}
const Thumb2InstrInfo *TII;
+ const TargetRegisterInfo *TRI;
ARMFunctionInfo *AFI;
virtual bool runOnMachineFunction(MachineFunction &Fn);
@@ -34,61 +40,167 @@ namespace {
}
private:
- bool InsertITBlocks(MachineBasicBlock &MBB);
+ bool MoveCopyOutOfITBlock(MachineInstr *MI,
+ ARMCC::CondCodes CC, ARMCC::CondCodes OCC,
+ SmallSet<unsigned, 4> &Defs,
+ SmallSet<unsigned, 4> &Uses);
+ bool InsertITInstructions(MachineBasicBlock &MBB);
};
char Thumb2ITBlockPass::ID = 0;
}
-static ARMCC::CondCodes getPredicate(const MachineInstr *MI, unsigned &PredReg){
- unsigned Opc = MI->getOpcode();
- if (Opc == ARM::tBcc || Opc == ARM::t2Bcc)
- return ARMCC::AL;
- return llvm::getInstrPredicate(MI, PredReg);
+/// TrackDefUses - Tracking what registers are being defined and used by
+/// instructions in the IT block. This also tracks "dependencies", i.e. uses
+/// in the IT block that are defined before the IT instruction.
+static void TrackDefUses(MachineInstr *MI,
+ SmallSet<unsigned, 4> &Defs,
+ SmallSet<unsigned, 4> &Uses,
+ const TargetRegisterInfo *TRI) {
+ SmallVector<unsigned, 4> LocalDefs;
+ SmallVector<unsigned, 4> LocalUses;
+
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isReg())
+ continue;
+ unsigned Reg = MO.getReg();
+ if (!Reg || Reg == ARM::ITSTATE || Reg == ARM::SP)
+ continue;
+ if (MO.isUse())
+ LocalUses.push_back(Reg);
+ else
+ LocalDefs.push_back(Reg);
+ }
+
+ for (unsigned i = 0, e = LocalUses.size(); i != e; ++i) {
+ unsigned Reg = LocalUses[i];
+ Uses.insert(Reg);
+ for (const unsigned *Subreg = TRI->getSubRegisters(Reg);
+ *Subreg; ++Subreg)
+ Uses.insert(*Subreg);
+ }
+
+ for (unsigned i = 0, e = LocalDefs.size(); i != e; ++i) {
+ unsigned Reg = LocalDefs[i];
+ Defs.insert(Reg);
+ for (const unsigned *Subreg = TRI->getSubRegisters(Reg);
+ *Subreg; ++Subreg)
+ Defs.insert(*Subreg);
+ if (Reg == ARM::CPSR)
+ continue;
+ }
+}
+
+bool
+Thumb2ITBlockPass::MoveCopyOutOfITBlock(MachineInstr *MI,
+ ARMCC::CondCodes CC, ARMCC::CondCodes OCC,
+ SmallSet<unsigned, 4> &Defs,
+ SmallSet<unsigned, 4> &Uses) {
+ unsigned SrcReg, DstReg, SrcSubIdx, DstSubIdx;
+ if (TII->isMoveInstr(*MI, SrcReg, DstReg, SrcSubIdx, DstSubIdx)) {
+ assert(SrcSubIdx == 0 && DstSubIdx == 0 &&
+ "Sub-register indices still around?");
+ // llvm models select's as two-address instructions. That means a copy
+ // is inserted before a t2MOVccr, etc. If the copy is scheduled in
+ // between selects we would end up creating multiple IT blocks.
+
+ // First check if it's safe to move it.
+ if (Uses.count(DstReg) || Defs.count(SrcReg))
+ return false;
+
+ // Then peek at the next instruction to see if it's predicated on CC or OCC.
+ // If not, then there is nothing to be gained by moving the copy.
+ MachineBasicBlock::iterator I = MI; ++I;
+ MachineBasicBlock::iterator E = MI->getParent()->end();
+ while (I != E && I->isDebugValue())
+ ++I;
+ if (I != E) {
+ unsigned NPredReg = 0;
+ ARMCC::CondCodes NCC = llvm::getITInstrPredicate(I, NPredReg);
+ if (NCC == CC || NCC == OCC)
+ return true;
+ }
+ }
+ return false;
}
-bool Thumb2ITBlockPass::InsertITBlocks(MachineBasicBlock &MBB) {
+bool Thumb2ITBlockPass::InsertITInstructions(MachineBasicBlock &MBB) {
bool Modified = false;
+ SmallSet<unsigned, 4> Defs;
+ SmallSet<unsigned, 4> Uses;
MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
while (MBBI != E) {
MachineInstr *MI = &*MBBI;
DebugLoc dl = MI->getDebugLoc();
unsigned PredReg = 0;
- ARMCC::CondCodes CC = getPredicate(MI, PredReg);
-
+ ARMCC::CondCodes CC = llvm::getITInstrPredicate(MI, PredReg);
if (CC == ARMCC::AL) {
++MBBI;
continue;
}
+ Defs.clear();
+ Uses.clear();
+ TrackDefUses(MI, Defs, Uses, TRI);
+
// Insert an IT instruction.
MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII->get(ARM::t2IT))
.addImm(CC);
+
+ // Add implicit use of ITSTATE to IT block instructions.
+ MI->addOperand(MachineOperand::CreateReg(ARM::ITSTATE, false/*ifDef*/,
+ true/*isImp*/, false/*isKill*/));
+
+ MachineInstr *LastITMI = MI;
+ MachineBasicBlock::iterator InsertPos = MIB;
++MBBI;
- // Finalize IT mask.
+ // Form IT block.
ARMCC::CondCodes OCC = ARMCC::getOppositeCondition(CC);
unsigned Mask = 0, Pos = 3;
// Branches, including tricky ones like LDM_RET, need to end an IT
// block so check the instruction we just put in the block.
- while (MBBI != E && Pos &&
- (!MI->getDesc().isBranch() && !MI->getDesc().isReturn())) {
+ for (; MBBI != E && Pos &&
+ (!MI->getDesc().isBranch() && !MI->getDesc().isReturn()) ; ++MBBI) {
+ if (MBBI->isDebugValue())
+ continue;
+
MachineInstr *NMI = &*MBBI;
MI = NMI;
- DebugLoc ndl = NMI->getDebugLoc();
+
unsigned NPredReg = 0;
- ARMCC::CondCodes NCC = getPredicate(NMI, NPredReg);
- if (NCC == CC || NCC == OCC)
+ ARMCC::CondCodes NCC = llvm::getITInstrPredicate(NMI, NPredReg);
+ if (NCC == CC || NCC == OCC) {
Mask |= (NCC & 1) << Pos;
- else
+ // Add implicit use of ITSTATE.
+ NMI->addOperand(MachineOperand::CreateReg(ARM::ITSTATE, false/*ifDef*/,
+ true/*isImp*/, false/*isKill*/));
+ LastITMI = NMI;
+ } else {
+ if (NCC == ARMCC::AL &&
+ MoveCopyOutOfITBlock(NMI, CC, OCC, Defs, Uses)) {
+ --MBBI;
+ MBB.remove(NMI);
+ MBB.insert(InsertPos, NMI);
+ ++NumMovedInsts;
+ continue;
+ }
break;
+ }
+ TrackDefUses(NMI, Defs, Uses, TRI);
--Pos;
- ++MBBI;
}
+
+ // Finalize IT mask.
Mask |= (1 << Pos);
// Tag along (firstcond[0] << 4) with the mask.
Mask |= (CC & 1) << 4;
MIB.addImm(Mask);
+
+ // Last instruction in IT block kills ITSTATE.
+ LastITMI->findRegisterUseOperand(ARM::ITSTATE)->setIsKill();
+
Modified = true;
++NumITs;
}
@@ -100,17 +212,21 @@ bool Thumb2ITBlockPass::runOnMachineFunction(MachineFunction &Fn) {
const TargetMachine &TM = Fn.getTarget();
AFI = Fn.getInfo<ARMFunctionInfo>();
TII = static_cast<const Thumb2InstrInfo*>(TM.getInstrInfo());
+ TRI = TM.getRegisterInfo();
if (!AFI->isThumbFunction())
return false;
bool Modified = false;
- for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
- ++MFI) {
+ for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E; ) {
MachineBasicBlock &MBB = *MFI;
- Modified |= InsertITBlocks(MBB);
+ ++MFI;
+ Modified |= InsertITInstructions(MBB);
}
+ if (Modified)
+ AFI->setHasITBlocks(true);
+
return Modified;
}
diff --git a/lib/Target/ARM/Thumb2InstrInfo.cpp b/lib/Target/ARM/Thumb2InstrInfo.cpp
index 531d5e9f147e..ee517279c9d7 100644
--- a/lib/Target/ARM/Thumb2InstrInfo.cpp
+++ b/lib/Target/ARM/Thumb2InstrInfo.cpp
@@ -17,15 +17,27 @@
#include "ARMAddressingModes.h"
#include "ARMGenInstrInfo.inc"
#include "ARMMachineFunctionInfo.h"
+#include "Thumb2HazardRecognizer.h"
+#include "Thumb2InstrInfo.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineMemOperand.h"
#include "llvm/CodeGen/PseudoSourceValue.h"
#include "llvm/ADT/SmallVector.h"
-#include "Thumb2InstrInfo.h"
+#include "llvm/Support/CommandLine.h"
using namespace llvm;
+static cl::opt<unsigned>
+IfCvtLimit("thumb2-ifcvt-limit", cl::Hidden,
+ cl::desc("Thumb2 if-conversion limit (default 3)"),
+ cl::init(3));
+
+static cl::opt<unsigned>
+IfCvtDiamondLimit("thumb2-ifcvt-diamond-limit", cl::Hidden,
+ cl::desc("Thumb2 diamond if-conversion limit (default 3)"),
+ cl::init(3));
+
Thumb2InstrInfo::Thumb2InstrInfo(const ARMSubtarget &STI)
: ARMBaseInstrInfo(STI), RI(*this, STI) {
}
@@ -35,33 +47,99 @@ unsigned Thumb2InstrInfo::getUnindexedOpcode(unsigned Opc) const {
return 0;
}
-bool
-Thumb2InstrInfo::copyRegToReg(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator I,
- unsigned DestReg, unsigned SrcReg,
- const TargetRegisterClass *DestRC,
- const TargetRegisterClass *SrcRC,
- DebugLoc DL) const {
- if (DestRC == ARM::GPRRegisterClass) {
- if (SrcRC == ARM::GPRRegisterClass) {
- BuildMI(MBB, I, DL, get(ARM::tMOVgpr2gpr), DestReg).addReg(SrcReg);
- return true;
- } else if (SrcRC == ARM::tGPRRegisterClass) {
- BuildMI(MBB, I, DL, get(ARM::tMOVtgpr2gpr), DestReg).addReg(SrcReg);
- return true;
- }
- } else if (DestRC == ARM::tGPRRegisterClass) {
- if (SrcRC == ARM::GPRRegisterClass) {
- BuildMI(MBB, I, DL, get(ARM::tMOVgpr2tgpr), DestReg).addReg(SrcReg);
- return true;
- } else if (SrcRC == ARM::tGPRRegisterClass) {
- BuildMI(MBB, I, DL, get(ARM::tMOVr), DestReg).addReg(SrcReg);
- return true;
+void
+Thumb2InstrInfo::ReplaceTailWithBranchTo(MachineBasicBlock::iterator Tail,
+ MachineBasicBlock *NewDest) const {
+ MachineBasicBlock *MBB = Tail->getParent();
+ ARMFunctionInfo *AFI = MBB->getParent()->getInfo<ARMFunctionInfo>();
+ if (!AFI->hasITBlocks()) {
+ TargetInstrInfoImpl::ReplaceTailWithBranchTo(Tail, NewDest);
+ return;
+ }
+
+ // If the first instruction of Tail is predicated, we may have to update
+ // the IT instruction.
+ unsigned PredReg = 0;
+ ARMCC::CondCodes CC = llvm::getInstrPredicate(Tail, PredReg);
+ MachineBasicBlock::iterator MBBI = Tail;
+ if (CC != ARMCC::AL)
+ // Expecting at least the t2IT instruction before it.
+ --MBBI;
+
+ // Actually replace the tail.
+ TargetInstrInfoImpl::ReplaceTailWithBranchTo(Tail, NewDest);
+
+ // Fix up IT.
+ if (CC != ARMCC::AL) {
+ MachineBasicBlock::iterator E = MBB->begin();
+ unsigned Count = 4; // At most 4 instructions in an IT block.
+ while (Count && MBBI != E) {
+ if (MBBI->isDebugValue()) {
+ --MBBI;
+ continue;
+ }
+ if (MBBI->getOpcode() == ARM::t2IT) {
+ unsigned Mask = MBBI->getOperand(1).getImm();
+ if (Count == 4)
+ MBBI->eraseFromParent();
+ else {
+ unsigned MaskOn = 1 << Count;
+ unsigned MaskOff = ~(MaskOn - 1);
+ MBBI->getOperand(1).setImm((Mask & MaskOff) | MaskOn);
+ }
+ return;
+ }
+ --MBBI;
+ --Count;
}
+
+ // Ctrl flow can reach here if branch folding is run before IT block
+ // formation pass.
}
+}
+
+bool
+Thumb2InstrInfo::isLegalToSplitMBBAt(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI) const {
+ unsigned PredReg = 0;
+ return llvm::getITInstrPredicate(MBBI, PredReg) == ARMCC::AL;
+}
+bool Thumb2InstrInfo::isProfitableToIfCvt(MachineBasicBlock &MBB,
+ unsigned NumInstrs) const {
+ return NumInstrs && NumInstrs <= IfCvtLimit;
+}
+
+bool Thumb2InstrInfo::
+isProfitableToIfCvt(MachineBasicBlock &TMBB, unsigned NumT,
+ MachineBasicBlock &FMBB, unsigned NumF) const {
+ // FIXME: Catch optimization such as:
+ // r0 = movne
+ // r0 = moveq
+ return NumT && NumF &&
+ NumT <= (IfCvtDiamondLimit) && NumF <= (IfCvtDiamondLimit);
+}
+
+void Thumb2InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I, DebugLoc DL,
+ unsigned DestReg, unsigned SrcReg,
+ bool KillSrc) const {
// Handle SPR, DPR, and QPR copies.
- return ARMBaseInstrInfo::copyRegToReg(MBB, I, DestReg, SrcReg, DestRC, SrcRC, DL);
+ if (!ARM::GPRRegClass.contains(DestReg, SrcReg))
+ return ARMBaseInstrInfo::copyPhysReg(MBB, I, DL, DestReg, SrcReg, KillSrc);
+
+ bool tDest = ARM::tGPRRegClass.contains(DestReg);
+ bool tSrc = ARM::tGPRRegClass.contains(SrcReg);
+ unsigned Opc = ARM::tMOVgpr2gpr;
+ if (tDest && tSrc)
+ Opc = ARM::tMOVr;
+ else if (tSrc)
+ Opc = ARM::tMOVtgpr2gpr;
+ else if (tDest)
+ Opc = ARM::tMOVgpr2tgpr;
+
+ BuildMI(MBB, I, DL, get(Opc), DestReg)
+ .addReg(SrcReg, getKillRegState(KillSrc));
}
void Thumb2InstrInfo::
@@ -69,7 +147,8 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
unsigned SrcReg, bool isKill, int FI,
const TargetRegisterClass *RC,
const TargetRegisterInfo *TRI) const {
- if (RC == ARM::GPRRegisterClass || RC == ARM::tGPRRegisterClass) {
+ if (RC == ARM::GPRRegisterClass || RC == ARM::tGPRRegisterClass ||
+ RC == ARM::tcGPRRegisterClass) {
DebugLoc DL;
if (I != MBB.end()) DL = I->getDebugLoc();
@@ -94,7 +173,8 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
unsigned DestReg, int FI,
const TargetRegisterClass *RC,
const TargetRegisterInfo *TRI) const {
- if (RC == ARM::GPRRegisterClass || RC == ARM::tGPRRegisterClass) {
+ if (RC == ARM::GPRRegisterClass || RC == ARM::tGPRRegisterClass ||
+ RC == ARM::tcGPRRegisterClass) {
DebugLoc DL;
if (I != MBB.end()) DL = I->getDebugLoc();
@@ -113,6 +193,11 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
ARMBaseInstrInfo::loadRegFromStackSlot(MBB, I, DestReg, FI, RC, TRI);
}
+ScheduleHazardRecognizer *Thumb2InstrInfo::
+CreateTargetPostRAHazardRecognizer(const InstrItineraryData &II) const {
+ return (ScheduleHazardRecognizer *)new Thumb2HazardRecognizer(II);
+}
+
void llvm::emitT2RegPlusImmediate(MachineBasicBlock &MBB,
MachineBasicBlock::iterator &MBBI, DebugLoc dl,
unsigned DestReg, unsigned BaseReg, int NumBytes,
@@ -131,14 +216,14 @@ void llvm::emitT2RegPlusImmediate(MachineBasicBlock &MBB,
// Use a movw to materialize the 16-bit constant.
BuildMI(MBB, MBBI, dl, TII.get(ARM::t2MOVi16), DestReg)
.addImm(NumBytes)
- .addImm((unsigned)Pred).addReg(PredReg).addReg(0);
+ .addImm((unsigned)Pred).addReg(PredReg);
Fits = true;
} else if ((NumBytes & 0xffff) == 0) {
// Use a movt to materialize the 32-bit constant.
BuildMI(MBB, MBBI, dl, TII.get(ARM::t2MOVTi16), DestReg)
.addReg(DestReg)
.addImm(NumBytes >> 16)
- .addImm((unsigned)Pred).addReg(PredReg).addReg(0);
+ .addImm((unsigned)Pred).addReg(PredReg);
Fits = true;
}
@@ -502,3 +587,54 @@ bool llvm::rewriteT2FrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
Offset = (isSub) ? -Offset : Offset;
return Offset == 0;
}
+
+/// scheduleTwoAddrSource - Schedule the copy / re-mat of the source of the
+/// two-addrss instruction inserted by two-address pass.
+void
+Thumb2InstrInfo::scheduleTwoAddrSource(MachineInstr *SrcMI,
+ MachineInstr *UseMI,
+ const TargetRegisterInfo &TRI) const {
+ if (SrcMI->getOpcode() != ARM::tMOVgpr2gpr ||
+ SrcMI->getOperand(1).isKill())
+ return;
+
+ unsigned PredReg = 0;
+ ARMCC::CondCodes CC = llvm::getInstrPredicate(UseMI, PredReg);
+ if (CC == ARMCC::AL || PredReg != ARM::CPSR)
+ return;
+
+ // Schedule the copy so it doesn't come between previous instructions
+ // and UseMI which can form an IT block.
+ unsigned SrcReg = SrcMI->getOperand(1).getReg();
+ ARMCC::CondCodes OCC = ARMCC::getOppositeCondition(CC);
+ MachineBasicBlock *MBB = UseMI->getParent();
+ MachineBasicBlock::iterator MBBI = SrcMI;
+ unsigned NumInsts = 0;
+ while (--MBBI != MBB->begin()) {
+ if (MBBI->isDebugValue())
+ continue;
+
+ MachineInstr *NMI = &*MBBI;
+ ARMCC::CondCodes NCC = llvm::getInstrPredicate(NMI, PredReg);
+ if (!(NCC == CC || NCC == OCC) ||
+ NMI->modifiesRegister(SrcReg, &TRI) ||
+ NMI->definesRegister(ARM::CPSR))
+ break;
+ if (++NumInsts == 4)
+ // Too many in a row!
+ return;
+ }
+
+ if (NumInsts) {
+ MBB->remove(SrcMI);
+ MBB->insert(++MBBI, SrcMI);
+ }
+}
+
+ARMCC::CondCodes
+llvm::getITInstrPredicate(const MachineInstr *MI, unsigned &PredReg) {
+ unsigned Opc = MI->getOpcode();
+ if (Opc == ARM::tBcc || Opc == ARM::t2Bcc)
+ return ARMCC::AL;
+ return llvm::getInstrPredicate(MI, PredReg);
+}
diff --git a/lib/Target/ARM/Thumb2InstrInfo.h b/lib/Target/ARM/Thumb2InstrInfo.h
index 29487700d19b..3a9f8b194d3c 100644
--- a/lib/Target/ARM/Thumb2InstrInfo.h
+++ b/lib/Target/ARM/Thumb2InstrInfo.h
@@ -20,7 +20,8 @@
#include "Thumb2RegisterInfo.h"
namespace llvm {
- class ARMSubtarget;
+class ARMSubtarget;
+class ScheduleHazardRecognizer;
class Thumb2InstrInfo : public ARMBaseInstrInfo {
Thumb2RegisterInfo RI;
@@ -31,12 +32,21 @@ public:
// if there is not such an opcode.
unsigned getUnindexedOpcode(unsigned Opc) const;
- bool copyRegToReg(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator I,
- unsigned DestReg, unsigned SrcReg,
- const TargetRegisterClass *DestRC,
- const TargetRegisterClass *SrcRC,
- DebugLoc DL) const;
+ void ReplaceTailWithBranchTo(MachineBasicBlock::iterator Tail,
+ MachineBasicBlock *NewDest) const;
+
+ bool isLegalToSplitMBBAt(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI) const;
+
+ bool isProfitableToIfCvt(MachineBasicBlock &MBB, unsigned NumInstrs) const;
+
+ bool isProfitableToIfCvt(MachineBasicBlock &TMBB, unsigned NumTInstrs,
+ MachineBasicBlock &FMBB, unsigned NumFInstrs) const;
+
+ void copyPhysReg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I, DebugLoc DL,
+ unsigned DestReg, unsigned SrcReg,
+ bool KillSrc) const;
void storeRegToStackSlot(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI,
@@ -50,12 +60,27 @@ public:
const TargetRegisterClass *RC,
const TargetRegisterInfo *TRI) const;
+ /// scheduleTwoAddrSource - Schedule the copy / re-mat of the source of the
+ /// two-addrss instruction inserted by two-address pass.
+ void scheduleTwoAddrSource(MachineInstr *SrcMI, MachineInstr *UseMI,
+ const TargetRegisterInfo &TRI) const;
+
/// getRegisterInfo - TargetInstrInfo is a superset of MRegister info. As
/// such, whenever a client has an instance of instruction info, it should
/// always be able to get register info as well (through this method).
///
const Thumb2RegisterInfo &getRegisterInfo() const { return RI; }
+
+ ScheduleHazardRecognizer *
+ CreateTargetPostRAHazardRecognizer(const InstrItineraryData &II) const;
};
+
+/// getITInstrPredicate - Valid only in Thumb2 mode. This function is identical
+/// to llvm::getInstrPredicate except it returns AL for conditional branch
+/// instructions which are "predicated", but are not in IT blocks.
+ARMCC::CondCodes getITInstrPredicate(const MachineInstr *MI, unsigned &PredReg);
+
+
}
#endif // THUMB2INSTRUCTIONINFO_H
diff --git a/lib/Target/ARM/Thumb2SizeReduction.cpp b/lib/Target/ARM/Thumb2SizeReduction.cpp
index 8fe2e42a7cdd..ba392f36d946 100644
--- a/lib/Target/ARM/Thumb2SizeReduction.cpp
+++ b/lib/Target/ARM/Thumb2SizeReduction.cpp
@@ -451,11 +451,18 @@ Thumb2SizeReduce::ReduceTo2Addr(MachineBasicBlock &MBB, MachineInstr *MI,
if (ReduceLimit2Addr != -1 && ((int)Num2Addrs >= ReduceLimit2Addr))
return false;
- const TargetInstrDesc &TID = MI->getDesc();
unsigned Reg0 = MI->getOperand(0).getReg();
unsigned Reg1 = MI->getOperand(1).getReg();
- if (Reg0 != Reg1)
- return false;
+ if (Reg0 != Reg1) {
+ // Try to commute the operands to make it a 2-address instruction.
+ unsigned CommOpIdx1, CommOpIdx2;
+ if (!TII->findCommutedOpIndices(MI, CommOpIdx1, CommOpIdx2) ||
+ CommOpIdx1 != 1 || MI->getOperand(CommOpIdx2).getReg() != Reg0)
+ return false;
+ MachineInstr *CommutedMI = TII->commuteInstruction(MI);
+ if (!CommutedMI)
+ return false;
+ }
if (Entry.LowRegs2 && !isARMLowRegister(Reg0))
return false;
if (Entry.Imm2Limit) {
@@ -484,6 +491,7 @@ Thumb2SizeReduce::ReduceTo2Addr(MachineBasicBlock &MBB, MachineInstr *MI,
bool HasCC = false;
bool CCDead = false;
+ const TargetInstrDesc &TID = MI->getDesc();
if (TID.hasOptionalDef()) {
unsigned NumOps = TID.getNumOperands();
HasCC = (MI->getOperand(NumOps-1).getReg() == ARM::CPSR);
@@ -689,7 +697,7 @@ bool Thumb2SizeReduce::ReduceMBB(MachineBasicBlock &MBB) {
goto ProcessNext;
}
- // Try to transform ro a 16-bit non-two-address instruction.
+ // Try to transform to a 16-bit non-two-address instruction.
if (Entry.NarrowOpc1 && ReduceToNarrow(MBB, MI, Entry, LiveCPSR)) {
Modified = true;
MachineBasicBlock::iterator I = prior(NextMII);
diff --git a/lib/Target/Alpha/AlphaISelLowering.cpp b/lib/Target/Alpha/AlphaISelLowering.cpp
index 1d85f12c3ef9..ea78bf374200 100644
--- a/lib/Target/Alpha/AlphaISelLowering.cpp
+++ b/lib/Target/Alpha/AlphaISelLowering.cpp
@@ -224,6 +224,7 @@ AlphaTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
CallingConv::ID CallConv, bool isVarArg,
bool &isTailCall,
const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
const SmallVectorImpl<ISD::InputArg> &Ins,
DebugLoc dl, SelectionDAG &DAG,
SmallVectorImpl<SDValue> &InVals) const {
@@ -251,7 +252,7 @@ AlphaTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
CCValAssign &VA = ArgLocs[i];
- SDValue Arg = Outs[i].Val;
+ SDValue Arg = OutVals[i];
// Promote the value if needed.
switch (VA.getLocInfo()) {
@@ -425,7 +426,7 @@ AlphaTargetLowering::LowerFormalArguments(SDValue Chain,
}
} else { //more args
// Create the frame index object for this incoming parameter...
- int FI = MFI->CreateFixedObject(8, 8 * (ArgNo - 6), true, false);
+ int FI = MFI->CreateFixedObject(8, 8 * (ArgNo - 6), true);
// Create the SelectionDAG nodes corresponding to a load
//from this parameter
@@ -444,7 +445,7 @@ AlphaTargetLowering::LowerFormalArguments(SDValue Chain,
if (TargetRegisterInfo::isPhysicalRegister(args_int[i]))
args_int[i] = AddLiveIn(MF, args_int[i], &Alpha::GPRCRegClass);
SDValue argt = DAG.getCopyFromReg(Chain, dl, args_int[i], MVT::i64);
- int FI = MFI->CreateFixedObject(8, -8 * (6 - i), true, false);
+ int FI = MFI->CreateFixedObject(8, -8 * (6 - i), true);
if (i == 0) FuncInfo->setVarArgsBase(FI);
SDValue SDFI = DAG.getFrameIndex(FI, MVT::i64);
LS.push_back(DAG.getStore(Chain, dl, argt, SDFI, NULL, 0,
@@ -453,7 +454,7 @@ AlphaTargetLowering::LowerFormalArguments(SDValue Chain,
if (TargetRegisterInfo::isPhysicalRegister(args_float[i]))
args_float[i] = AddLiveIn(MF, args_float[i], &Alpha::F8RCRegClass);
argt = DAG.getCopyFromReg(Chain, dl, args_float[i], MVT::f64);
- FI = MFI->CreateFixedObject(8, - 8 * (12 - i), true, false);
+ FI = MFI->CreateFixedObject(8, - 8 * (12 - i), true);
SDFI = DAG.getFrameIndex(FI, MVT::i64);
LS.push_back(DAG.getStore(Chain, dl, argt, SDFI, NULL, 0,
false, false, 0));
@@ -470,6 +471,7 @@ SDValue
AlphaTargetLowering::LowerReturn(SDValue Chain,
CallingConv::ID CallConv, bool isVarArg,
const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
DebugLoc dl, SelectionDAG &DAG) const {
SDValue Copy = DAG.getCopyToReg(Chain, dl, Alpha::R26,
@@ -483,7 +485,7 @@ AlphaTargetLowering::LowerReturn(SDValue Chain,
break;
//return SDValue(); // ret void is legal
case 1: {
- EVT ArgVT = Outs[0].Val.getValueType();
+ EVT ArgVT = Outs[0].VT;
unsigned ArgReg;
if (ArgVT.isInteger())
ArgReg = Alpha::R0;
@@ -492,13 +494,13 @@ AlphaTargetLowering::LowerReturn(SDValue Chain,
ArgReg = Alpha::F0;
}
Copy = DAG.getCopyToReg(Copy, dl, ArgReg,
- Outs[0].Val, Copy.getValue(1));
+ OutVals[0], Copy.getValue(1));
if (DAG.getMachineFunction().getRegInfo().liveout_empty())
DAG.getMachineFunction().getRegInfo().addLiveOut(ArgReg);
break;
}
case 2: {
- EVT ArgVT = Outs[0].Val.getValueType();
+ EVT ArgVT = Outs[0].VT;
unsigned ArgReg1, ArgReg2;
if (ArgVT.isInteger()) {
ArgReg1 = Alpha::R0;
@@ -509,13 +511,13 @@ AlphaTargetLowering::LowerReturn(SDValue Chain,
ArgReg2 = Alpha::F1;
}
Copy = DAG.getCopyToReg(Copy, dl, ArgReg1,
- Outs[0].Val, Copy.getValue(1));
+ OutVals[0], Copy.getValue(1));
if (std::find(DAG.getMachineFunction().getRegInfo().liveout_begin(),
DAG.getMachineFunction().getRegInfo().liveout_end(), ArgReg1)
== DAG.getMachineFunction().getRegInfo().liveout_end())
DAG.getMachineFunction().getRegInfo().addLiveOut(ArgReg1);
Copy = DAG.getCopyToReg(Copy, dl, ArgReg2,
- Outs[1].Val, Copy.getValue(1));
+ OutVals[1], Copy.getValue(1));
if (std::find(DAG.getMachineFunction().getRegInfo().liveout_begin(),
DAG.getMachineFunction().getRegInfo().liveout_end(), ArgReg2)
== DAG.getMachineFunction().getRegInfo().liveout_end())
@@ -539,7 +541,7 @@ void AlphaTargetLowering::LowerVAARG(SDNode *N, SDValue &Chain,
false, false, 0);
SDValue Tmp = DAG.getNode(ISD::ADD, dl, MVT::i64, VAListP,
DAG.getConstant(8, MVT::i64));
- SDValue Offset = DAG.getExtLoad(ISD::SEXTLOAD, dl, MVT::i64, Base.getValue(1),
+ SDValue Offset = DAG.getExtLoad(ISD::SEXTLOAD, MVT::i64, dl, Base.getValue(1),
Tmp, NULL, 0, MVT::i32, false, false, 0);
DataPtr = DAG.getNode(ISD::ADD, dl, MVT::i64, Base, Offset);
if (N->getValueType(0).isFloatingPoint())
@@ -643,10 +645,12 @@ SDValue AlphaTargetLowering::LowerOperation(SDValue Op,
case ISD::GlobalAddress: {
GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op);
const GlobalValue *GV = GSDN->getGlobal();
- SDValue GA = DAG.getTargetGlobalAddress(GV, MVT::i64, GSDN->getOffset());
+ SDValue GA = DAG.getTargetGlobalAddress(GV, dl, MVT::i64,
+ GSDN->getOffset());
// FIXME there isn't really any debug info here
- // if (!GV->hasWeakLinkage() && !GV->isDeclaration() && !GV->hasLinkOnceLinkage()) {
+ // if (!GV->hasWeakLinkage() && !GV->isDeclaration()
+ // && !GV->hasLinkOnceLinkage()) {
if (GV->hasLocalLinkage()) {
SDValue Hi = DAG.getNode(AlphaISD::GPRelHi, dl, MVT::i64, GA,
DAG.getGLOBAL_OFFSET_TABLE(MVT::i64));
@@ -702,7 +706,7 @@ SDValue AlphaTargetLowering::LowerOperation(SDValue Op,
SDValue Result;
if (Op.getValueType() == MVT::i32)
- Result = DAG.getExtLoad(ISD::SEXTLOAD, dl, MVT::i64, Chain, DataPtr,
+ Result = DAG.getExtLoad(ISD::SEXTLOAD, MVT::i64, dl, Chain, DataPtr,
NULL, 0, MVT::i32, false, false, 0);
else
Result = DAG.getLoad(Op.getValueType(), dl, Chain, DataPtr, NULL, 0,
@@ -722,7 +726,7 @@ SDValue AlphaTargetLowering::LowerOperation(SDValue Op,
false, false, 0);
SDValue NP = DAG.getNode(ISD::ADD, dl, MVT::i64, SrcP,
DAG.getConstant(8, MVT::i64));
- Val = DAG.getExtLoad(ISD::SEXTLOAD, dl, MVT::i64, Result,
+ Val = DAG.getExtLoad(ISD::SEXTLOAD, MVT::i64, dl, Result,
NP, NULL,0, MVT::i32, false, false, 0);
SDValue NPD = DAG.getNode(ISD::ADD, dl, MVT::i64, DestP,
DAG.getConstant(8, MVT::i64));
@@ -863,7 +867,10 @@ AlphaTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
MachineBasicBlock *llscMBB = F->CreateMachineBasicBlock(LLVM_BB);
MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
- sinkMBB->transferSuccessors(thisMBB);
+ sinkMBB->splice(sinkMBB->begin(), thisMBB,
+ llvm::next(MachineBasicBlock::iterator(MI)),
+ thisMBB->end());
+ sinkMBB->transferSuccessorsAndUpdatePHIs(thisMBB);
F->insert(It, llscMBB);
F->insert(It, sinkMBB);
@@ -912,7 +919,7 @@ AlphaTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
thisMBB->addSuccessor(llscMBB);
llscMBB->addSuccessor(llscMBB);
llscMBB->addSuccessor(sinkMBB);
- F->DeleteMachineInstr(MI); // The pseudo instruction is gone now.
+ MI->eraseFromParent(); // The pseudo instruction is gone now.
return sinkMBB;
}
diff --git a/lib/Target/Alpha/AlphaISelLowering.h b/lib/Target/Alpha/AlphaISelLowering.h
index 7ee823a86a4d..46e0c7dc9f80 100644
--- a/lib/Target/Alpha/AlphaISelLowering.h
+++ b/lib/Target/Alpha/AlphaISelLowering.h
@@ -121,6 +121,7 @@ namespace llvm {
LowerCall(SDValue Chain, SDValue Callee,
CallingConv::ID CallConv, bool isVarArg, bool &isTailCall,
const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
const SmallVectorImpl<ISD::InputArg> &Ins,
DebugLoc dl, SelectionDAG &DAG,
SmallVectorImpl<SDValue> &InVals) const;
@@ -129,6 +130,7 @@ namespace llvm {
LowerReturn(SDValue Chain,
CallingConv::ID CallConv, bool isVarArg,
const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
DebugLoc dl, SelectionDAG &DAG) const;
};
}
diff --git a/lib/Target/Alpha/AlphaInstrFormats.td b/lib/Target/Alpha/AlphaInstrFormats.td
index d98455628887..6f4ebf279643 100644
--- a/lib/Target/Alpha/AlphaInstrFormats.td
+++ b/lib/Target/Alpha/AlphaInstrFormats.td
@@ -182,7 +182,7 @@ class OForm4<bits<6> opcode, bits<7> fun, string asmstr, list<dag> pattern, Inst
bits<5> Rb;
bits<7> Function = fun;
-// let isTwoAddress = 1;
+// let Constraints = "$RFALSE = $RDEST";
let Inst{25-21} = Ra;
let Inst{20-16} = Rb;
let Inst{15-13} = 0;
@@ -223,7 +223,7 @@ class OForm4L<bits<6> opcode, bits<7> fun, string asmstr, list<dag> pattern, Ins
bits<8> LIT;
bits<7> Function = fun;
-// let isTwoAddress = 1;
+// let Constraints = "$RFALSE = $RDEST";
let Inst{25-21} = Ra;
let Inst{20-13} = LIT;
let Inst{12} = 1;
diff --git a/lib/Target/Alpha/AlphaInstrInfo.cpp b/lib/Target/Alpha/AlphaInstrInfo.cpp
index 3aba3639a5f3..ad625a269417 100644
--- a/lib/Target/Alpha/AlphaInstrInfo.cpp
+++ b/lib/Target/Alpha/AlphaInstrInfo.cpp
@@ -110,9 +110,8 @@ static bool isAlphaIntCondCode(unsigned Opcode) {
unsigned AlphaInstrInfo::InsertBranch(MachineBasicBlock &MBB,
MachineBasicBlock *TBB,
MachineBasicBlock *FBB,
- const SmallVectorImpl<MachineOperand> &Cond) const {
- // FIXME this should probably have a DebugLoc argument
- DebugLoc dl;
+ const SmallVectorImpl<MachineOperand> &Cond,
+ DebugLoc DL) const {
assert(TBB && "InsertBranch must not be told to insert a fallthrough");
assert((Cond.size() == 2 || Cond.size() == 0) &&
"Alpha branch conditions have two components!");
@@ -120,58 +119,47 @@ unsigned AlphaInstrInfo::InsertBranch(MachineBasicBlock &MBB,
// One-way branch.
if (FBB == 0) {
if (Cond.empty()) // Unconditional branch
- BuildMI(&MBB, dl, get(Alpha::BR)).addMBB(TBB);
+ BuildMI(&MBB, DL, get(Alpha::BR)).addMBB(TBB);
else // Conditional branch
if (isAlphaIntCondCode(Cond[0].getImm()))
- BuildMI(&MBB, dl, get(Alpha::COND_BRANCH_I))
+ BuildMI(&MBB, DL, get(Alpha::COND_BRANCH_I))
.addImm(Cond[0].getImm()).addReg(Cond[1].getReg()).addMBB(TBB);
else
- BuildMI(&MBB, dl, get(Alpha::COND_BRANCH_F))
+ BuildMI(&MBB, DL, get(Alpha::COND_BRANCH_F))
.addImm(Cond[0].getImm()).addReg(Cond[1].getReg()).addMBB(TBB);
return 1;
}
// Two-way Conditional Branch.
if (isAlphaIntCondCode(Cond[0].getImm()))
- BuildMI(&MBB, dl, get(Alpha::COND_BRANCH_I))
+ BuildMI(&MBB, DL, get(Alpha::COND_BRANCH_I))
.addImm(Cond[0].getImm()).addReg(Cond[1].getReg()).addMBB(TBB);
else
- BuildMI(&MBB, dl, get(Alpha::COND_BRANCH_F))
+ BuildMI(&MBB, DL, get(Alpha::COND_BRANCH_F))
.addImm(Cond[0].getImm()).addReg(Cond[1].getReg()).addMBB(TBB);
- BuildMI(&MBB, dl, get(Alpha::BR)).addMBB(FBB);
+ BuildMI(&MBB, DL, get(Alpha::BR)).addMBB(FBB);
return 2;
}
-bool AlphaInstrInfo::copyRegToReg(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MI,
- unsigned DestReg, unsigned SrcReg,
- const TargetRegisterClass *DestRC,
- const TargetRegisterClass *SrcRC,
- DebugLoc DL) const {
- //cerr << "copyRegToReg " << DestReg << " <- " << SrcReg << "\n";
- if (DestRC != SrcRC) {
- // Not yet supported!
- return false;
- }
-
- if (DestRC == Alpha::GPRCRegisterClass) {
+void AlphaInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI, DebugLoc DL,
+ unsigned DestReg, unsigned SrcReg,
+ bool KillSrc) const {
+ if (Alpha::GPRCRegClass.contains(DestReg, SrcReg)) {
BuildMI(MBB, MI, DL, get(Alpha::BISr), DestReg)
.addReg(SrcReg)
- .addReg(SrcReg);
- } else if (DestRC == Alpha::F4RCRegisterClass) {
+ .addReg(SrcReg, getKillRegState(KillSrc));
+ } else if (Alpha::F4RCRegClass.contains(DestReg, SrcReg)) {
BuildMI(MBB, MI, DL, get(Alpha::CPYSS), DestReg)
.addReg(SrcReg)
- .addReg(SrcReg);
- } else if (DestRC == Alpha::F8RCRegisterClass) {
+ .addReg(SrcReg, getKillRegState(KillSrc));
+ } else if (Alpha::F8RCRegClass.contains(DestReg, SrcReg)) {
BuildMI(MBB, MI, DL, get(Alpha::CPYST), DestReg)
.addReg(SrcReg)
- .addReg(SrcReg);
+ .addReg(SrcReg, getKillRegState(KillSrc));
} else {
- // Attempt to copy register that is not GPR or FPR
- return false;
+ llvm_unreachable("Attempt to copy register that is not GPR or FPR");
}
-
- return true;
}
void
@@ -227,51 +215,6 @@ AlphaInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
llvm_unreachable("Unhandled register class");
}
-MachineInstr *AlphaInstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
- MachineInstr *MI,
- const SmallVectorImpl<unsigned> &Ops,
- int FrameIndex) const {
- if (Ops.size() != 1) return NULL;
-
- // Make sure this is a reg-reg copy.
- unsigned Opc = MI->getOpcode();
-
- MachineInstr *NewMI = NULL;
- switch(Opc) {
- default:
- break;
- case Alpha::BISr:
- case Alpha::CPYSS:
- case Alpha::CPYST:
- if (MI->getOperand(1).getReg() == MI->getOperand(2).getReg()) {
- if (Ops[0] == 0) { // move -> store
- unsigned InReg = MI->getOperand(1).getReg();
- bool isKill = MI->getOperand(1).isKill();
- bool isUndef = MI->getOperand(1).isUndef();
- Opc = (Opc == Alpha::BISr) ? Alpha::STQ :
- ((Opc == Alpha::CPYSS) ? Alpha::STS : Alpha::STT);
- NewMI = BuildMI(MF, MI->getDebugLoc(), get(Opc))
- .addReg(InReg, getKillRegState(isKill) | getUndefRegState(isUndef))
- .addFrameIndex(FrameIndex)
- .addReg(Alpha::F31);
- } else { // load -> move
- unsigned OutReg = MI->getOperand(0).getReg();
- bool isDead = MI->getOperand(0).isDead();
- bool isUndef = MI->getOperand(0).isUndef();
- Opc = (Opc == Alpha::BISr) ? Alpha::LDQ :
- ((Opc == Alpha::CPYSS) ? Alpha::LDS : Alpha::LDT);
- NewMI = BuildMI(MF, MI->getDebugLoc(), get(Opc))
- .addReg(OutReg, RegState::Define | getDeadRegState(isDead) |
- getUndefRegState(isUndef))
- .addFrameIndex(FrameIndex)
- .addReg(Alpha::F31);
- }
- }
- break;
- }
- return NewMI;
-}
-
static unsigned AlphaRevCondCode(unsigned Opcode) {
switch (Opcode) {
case Alpha::BEQ: return Alpha::BNE;
@@ -428,11 +371,8 @@ unsigned AlphaInstrInfo::getGlobalBaseReg(MachineFunction *MF) const {
const TargetInstrInfo *TII = MF->getTarget().getInstrInfo();
GlobalBaseReg = RegInfo.createVirtualRegister(&Alpha::GPRCRegClass);
- bool Ok = TII->copyRegToReg(FirstMBB, MBBI, GlobalBaseReg, Alpha::R29,
- &Alpha::GPRCRegClass, &Alpha::GPRCRegClass,
- DebugLoc());
- assert(Ok && "Couldn't assign to global base register!");
- Ok = Ok; // Silence warning when assertions are turned off.
+ BuildMI(FirstMBB, MBBI, DebugLoc(), TII->get(TargetOpcode::COPY),
+ GlobalBaseReg).addReg(Alpha::R29);
RegInfo.addLiveIn(Alpha::R29);
AlphaFI->setGlobalBaseReg(GlobalBaseReg);
@@ -456,11 +396,8 @@ unsigned AlphaInstrInfo::getGlobalRetAddr(MachineFunction *MF) const {
const TargetInstrInfo *TII = MF->getTarget().getInstrInfo();
GlobalRetAddr = RegInfo.createVirtualRegister(&Alpha::GPRCRegClass);
- bool Ok = TII->copyRegToReg(FirstMBB, MBBI, GlobalRetAddr, Alpha::R26,
- &Alpha::GPRCRegClass, &Alpha::GPRCRegClass,
- DebugLoc());
- assert(Ok && "Couldn't assign to global return address register!");
- Ok = Ok; // Silence warning when assertions are turned off.
+ BuildMI(FirstMBB, MBBI, DebugLoc(), TII->get(TargetOpcode::COPY),
+ GlobalRetAddr).addReg(Alpha::R26);
RegInfo.addLiveIn(Alpha::R26);
AlphaFI->setGlobalRetAddr(GlobalRetAddr);
diff --git a/lib/Target/Alpha/AlphaInstrInfo.h b/lib/Target/Alpha/AlphaInstrInfo.h
index 7d7365b75bae..e20e8323b64e 100644
--- a/lib/Target/Alpha/AlphaInstrInfo.h
+++ b/lib/Target/Alpha/AlphaInstrInfo.h
@@ -42,14 +42,13 @@ public:
int &FrameIndex) const;
virtual unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
- MachineBasicBlock *FBB,
- const SmallVectorImpl<MachineOperand> &Cond) const;
- virtual bool copyRegToReg(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MI,
- unsigned DestReg, unsigned SrcReg,
- const TargetRegisterClass *DestRC,
- const TargetRegisterClass *SrcRC,
- DebugLoc DL) const;
+ MachineBasicBlock *FBB,
+ const SmallVectorImpl<MachineOperand> &Cond,
+ DebugLoc DL) const;
+ virtual void copyPhysReg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI, DebugLoc DL,
+ unsigned DestReg, unsigned SrcReg,
+ bool KillSrc) const;
virtual void storeRegToStackSlot(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI,
unsigned SrcReg, bool isKill, int FrameIndex,
@@ -62,18 +61,6 @@ public:
const TargetRegisterClass *RC,
const TargetRegisterInfo *TRI) const;
- virtual MachineInstr* foldMemoryOperandImpl(MachineFunction &MF,
- MachineInstr* MI,
- const SmallVectorImpl<unsigned> &Ops,
- int FrameIndex) const;
-
- virtual MachineInstr* foldMemoryOperandImpl(MachineFunction &MF,
- MachineInstr* MI,
- const SmallVectorImpl<unsigned> &Ops,
- MachineInstr* LoadMI) const {
- return 0;
- }
-
bool AnalyzeBranch(MachineBasicBlock &MBB,MachineBasicBlock *&TBB,
MachineBasicBlock *&FBB,
SmallVectorImpl<MachineOperand> &Cond,
diff --git a/lib/Target/Alpha/AlphaInstrInfo.td b/lib/Target/Alpha/AlphaInstrInfo.td
index a47a29b4255a..92de78a364ba 100644
--- a/lib/Target/Alpha/AlphaInstrInfo.td
+++ b/lib/Target/Alpha/AlphaInstrInfo.td
@@ -680,18 +680,32 @@ def CPYSNSt : FPForm<0x17, 0x021, "cpysn $RA,$RB,$RC",
}
//conditional moves, floats
-let OutOperandList = (outs F4RC:$RDEST), InOperandList = (ins F4RC:$RFALSE, F4RC:$RTRUE, F8RC:$RCOND),
- isTwoAddress = 1 in {
-def FCMOVEQS : FPForm<0x17, 0x02A, "fcmoveq $RCOND,$RTRUE,$RDEST",[], s_fcmov>; //FCMOVE if = zero
-def FCMOVGES : FPForm<0x17, 0x02D, "fcmovge $RCOND,$RTRUE,$RDEST",[], s_fcmov>; //FCMOVE if >= zero
-def FCMOVGTS : FPForm<0x17, 0x02F, "fcmovgt $RCOND,$RTRUE,$RDEST",[], s_fcmov>; //FCMOVE if > zero
-def FCMOVLES : FPForm<0x17, 0x02E, "fcmovle $RCOND,$RTRUE,$RDEST",[], s_fcmov>; //FCMOVE if <= zero
-def FCMOVLTS : FPForm<0x17, 0x02C, "fcmovlt $RCOND,$RTRUE,$RDEST",[], s_fcmov>; // FCMOVE if < zero
-def FCMOVNES : FPForm<0x17, 0x02B, "fcmovne $RCOND,$RTRUE,$RDEST",[], s_fcmov>; //FCMOVE if != zero
+let OutOperandList = (outs F4RC:$RDEST),
+ InOperandList = (ins F4RC:$RFALSE, F4RC:$RTRUE, F8RC:$RCOND),
+ Constraints = "$RTRUE = $RDEST" in {
+def FCMOVEQS : FPForm<0x17, 0x02A,
+ "fcmoveq $RCOND,$RTRUE,$RDEST",
+ [], s_fcmov>; //FCMOVE if = zero
+def FCMOVGES : FPForm<0x17, 0x02D,
+ "fcmovge $RCOND,$RTRUE,$RDEST",
+ [], s_fcmov>; //FCMOVE if >= zero
+def FCMOVGTS : FPForm<0x17, 0x02F,
+ "fcmovgt $RCOND,$RTRUE,$RDEST",
+ [], s_fcmov>; //FCMOVE if > zero
+def FCMOVLES : FPForm<0x17, 0x02E,
+ "fcmovle $RCOND,$RTRUE,$RDEST",
+ [], s_fcmov>; //FCMOVE if <= zero
+def FCMOVLTS : FPForm<0x17, 0x02C,
+ "fcmovlt $RCOND,$RTRUE,$RDEST",
+ [], s_fcmov>; // FCMOVE if < zero
+def FCMOVNES : FPForm<0x17, 0x02B,
+ "fcmovne $RCOND,$RTRUE,$RDEST",
+ [], s_fcmov>; //FCMOVE if != zero
}
//conditional moves, doubles
-let OutOperandList = (outs F8RC:$RDEST), InOperandList = (ins F8RC:$RFALSE, F8RC:$RTRUE, F8RC:$RCOND),
- isTwoAddress = 1 in {
+let OutOperandList = (outs F8RC:$RDEST),
+ InOperandList = (ins F8RC:$RFALSE, F8RC:$RTRUE, F8RC:$RCOND),
+ Constraints = "$RTRUE = $RDEST" in {
def FCMOVEQT : FPForm<0x17, 0x02A, "fcmoveq $RCOND,$RTRUE,$RDEST", [], s_fcmov>;
def FCMOVGET : FPForm<0x17, 0x02D, "fcmovge $RCOND,$RTRUE,$RDEST", [], s_fcmov>;
def FCMOVGTT : FPForm<0x17, 0x02F, "fcmovgt $RCOND,$RTRUE,$RDEST", [], s_fcmov>;
diff --git a/lib/Target/Alpha/AlphaRegisterInfo.cpp b/lib/Target/Alpha/AlphaRegisterInfo.cpp
index c083d8c30b16..dc9d935ec047 100644
--- a/lib/Target/Alpha/AlphaRegisterInfo.cpp
+++ b/lib/Target/Alpha/AlphaRegisterInfo.cpp
@@ -74,20 +74,6 @@ const unsigned* AlphaRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF)
return CalleeSavedRegs;
}
-const TargetRegisterClass* const*
-AlphaRegisterInfo::getCalleeSavedRegClasses(const MachineFunction *MF) const {
- static const TargetRegisterClass * const CalleeSavedRegClasses[] = {
- &Alpha::GPRCRegClass, &Alpha::GPRCRegClass,
- &Alpha::GPRCRegClass, &Alpha::GPRCRegClass,
- &Alpha::GPRCRegClass, &Alpha::GPRCRegClass,
- &Alpha::F8RCRegClass, &Alpha::F8RCRegClass,
- &Alpha::F8RCRegClass, &Alpha::F8RCRegClass,
- &Alpha::F8RCRegClass, &Alpha::F8RCRegClass,
- &Alpha::F8RCRegClass, &Alpha::F8RCRegClass, 0
- };
- return CalleeSavedRegClasses;
-}
-
BitVector AlphaRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
BitVector Reserved(getNumRegs());
Reserved.set(Alpha::R15);
diff --git a/lib/Target/Alpha/AlphaRegisterInfo.h b/lib/Target/Alpha/AlphaRegisterInfo.h
index 720367aa87dc..f9fd87a63737 100644
--- a/lib/Target/Alpha/AlphaRegisterInfo.h
+++ b/lib/Target/Alpha/AlphaRegisterInfo.h
@@ -30,9 +30,6 @@ struct AlphaRegisterInfo : public AlphaGenRegisterInfo {
/// Code Generation virtual methods...
const unsigned *getCalleeSavedRegs(const MachineFunction *MF = 0) const;
- const TargetRegisterClass* const* getCalleeSavedRegClasses(
- const MachineFunction *MF = 0) const;
-
BitVector getReservedRegs(const MachineFunction &MF) const;
bool hasFP(const MachineFunction &MF) const;
diff --git a/lib/Target/Blackfin/BlackfinISelDAGToDAG.cpp b/lib/Target/Blackfin/BlackfinISelDAGToDAG.cpp
index b4da96cba59e..80ee1075aada 100644
--- a/lib/Target/Blackfin/BlackfinISelDAGToDAG.cpp
+++ b/lib/Target/Blackfin/BlackfinISelDAGToDAG.cpp
@@ -132,8 +132,8 @@ static void UpdateNodeOperand(SelectionDAG &DAG,
SDValue Val) {
SmallVector<SDValue, 8> ops(N->op_begin(), N->op_end());
ops[Num] = Val;
- SDValue New = DAG.UpdateNodeOperands(SDValue(N, 0), ops.data(), ops.size());
- DAG.ReplaceAllUsesWith(N, New.getNode());
+ SDNode *New = DAG.UpdateNodeOperands(N, ops.data(), ops.size());
+ DAG.ReplaceAllUsesWith(N, New);
}
// After instruction selection, insert COPY_TO_REGCLASS nodes to help in
diff --git a/lib/Target/Blackfin/BlackfinISelLowering.cpp b/lib/Target/Blackfin/BlackfinISelLowering.cpp
index adf211881870..6e828e1b36b3 100644
--- a/lib/Target/Blackfin/BlackfinISelLowering.cpp
+++ b/lib/Target/Blackfin/BlackfinISelLowering.cpp
@@ -143,7 +143,7 @@ SDValue BlackfinTargetLowering::LowerGlobalAddress(SDValue Op,
DebugLoc DL = Op.getDebugLoc();
const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
- Op = DAG.getTargetGlobalAddress(GV, MVT::i32);
+ Op = DAG.getTargetGlobalAddress(GV, DL, MVT::i32);
return DAG.getNode(BFISD::Wrapper, DL, MVT::i32, Op);
}
@@ -205,8 +205,7 @@ BlackfinTargetLowering::LowerFormalArguments(SDValue Chain,
} else {
assert(VA.isMemLoc() && "CCValAssign must be RegLoc or MemLoc");
unsigned ObjSize = VA.getLocVT().getStoreSize();
- int FI = MFI->CreateFixedObject(ObjSize, VA.getLocMemOffset(),
- true, false);
+ int FI = MFI->CreateFixedObject(ObjSize, VA.getLocMemOffset(), true);
SDValue FIN = DAG.getFrameIndex(FI, MVT::i32);
InVals.push_back(DAG.getLoad(VA.getValVT(), dl, Chain, FIN, NULL, 0,
false, false, 0));
@@ -220,6 +219,7 @@ SDValue
BlackfinTargetLowering::LowerReturn(SDValue Chain,
CallingConv::ID CallConv, bool isVarArg,
const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
DebugLoc dl, SelectionDAG &DAG) const {
// CCValAssign - represent the assignment of the return value to locations.
@@ -245,7 +245,7 @@ BlackfinTargetLowering::LowerReturn(SDValue Chain,
for (unsigned i = 0; i != RVLocs.size(); ++i) {
CCValAssign &VA = RVLocs[i];
assert(VA.isRegLoc() && "Can only return in registers!");
- SDValue Opi = Outs[i].Val;
+ SDValue Opi = OutVals[i];
// Expand to i32 if necessary
switch (VA.getLocInfo()) {
@@ -278,6 +278,7 @@ BlackfinTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
CallingConv::ID CallConv, bool isVarArg,
bool &isTailCall,
const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
const SmallVectorImpl<ISD::InputArg> &Ins,
DebugLoc dl, SelectionDAG &DAG,
SmallVectorImpl<SDValue> &InVals) const {
@@ -301,7 +302,7 @@ BlackfinTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
// Walk the register/memloc assignments, inserting copies/loads.
for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
CCValAssign &VA = ArgLocs[i];
- SDValue Arg = Outs[i].Val;
+ SDValue Arg = OutVals[i];
// Promote the value if needed.
switch (VA.getLocInfo()) {
@@ -357,7 +358,7 @@ BlackfinTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
// turn it into a TargetGlobalAddress node so that legalize doesn't hack it.
// Likewise ExternalSymbol -> TargetExternalSymbol.
if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee))
- Callee = DAG.getTargetGlobalAddress(G->getGlobal(), MVT::i32);
+ Callee = DAG.getTargetGlobalAddress(G->getGlobal(), dl, MVT::i32);
else if (ExternalSymbolSDNode *E = dyn_cast<ExternalSymbolSDNode>(Callee))
Callee = DAG.getTargetExternalSymbol(E->getSymbol(), MVT::i32);
diff --git a/lib/Target/Blackfin/BlackfinISelLowering.h b/lib/Target/Blackfin/BlackfinISelLowering.h
index a7842482687f..6bebcc320ce9 100644
--- a/lib/Target/Blackfin/BlackfinISelLowering.h
+++ b/lib/Target/Blackfin/BlackfinISelLowering.h
@@ -63,6 +63,7 @@ namespace llvm {
LowerCall(SDValue Chain, SDValue Callee,
CallingConv::ID CallConv, bool isVarArg, bool &isTailCall,
const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
const SmallVectorImpl<ISD::InputArg> &Ins,
DebugLoc dl, SelectionDAG &DAG,
SmallVectorImpl<SDValue> &InVals) const;
@@ -71,6 +72,7 @@ namespace llvm {
LowerReturn(SDValue Chain,
CallingConv::ID CallConv, bool isVarArg,
const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
DebugLoc dl, SelectionDAG &DAG) const;
};
} // end namespace llvm
diff --git a/lib/Target/Blackfin/BlackfinInstrInfo.cpp b/lib/Target/Blackfin/BlackfinInstrInfo.cpp
index 73924b750a1c..a74d42d59549 100644
--- a/lib/Target/Blackfin/BlackfinInstrInfo.cpp
+++ b/lib/Target/Blackfin/BlackfinInstrInfo.cpp
@@ -104,10 +104,8 @@ unsigned BlackfinInstrInfo::
InsertBranch(MachineBasicBlock &MBB,
MachineBasicBlock *TBB,
MachineBasicBlock *FBB,
- const SmallVectorImpl<MachineOperand> &Cond) const {
- // FIXME this should probably have a DebugLoc operand
- DebugLoc DL;
-
+ const SmallVectorImpl<MachineOperand> &Cond,
+ DebugLoc DL) const {
// Shouldn't be a fall through.
assert(TBB && "InsertBranch must not be told to insert a fallthrough");
assert((Cond.size() == 1 || Cond.size() == 0) &&
@@ -124,69 +122,73 @@ InsertBranch(MachineBasicBlock &MBB,
llvm_unreachable("Implement conditional branches!");
}
-static bool inClass(const TargetRegisterClass &Test,
- unsigned Reg,
- const TargetRegisterClass *RC) {
- if (TargetRegisterInfo::isPhysicalRegister(Reg))
- return Test.contains(Reg);
- else
- return &Test==RC || Test.hasSubClass(RC);
-}
-
-bool BlackfinInstrInfo::copyRegToReg(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator I,
- unsigned DestReg,
- unsigned SrcReg,
- const TargetRegisterClass *DestRC,
- const TargetRegisterClass *SrcRC,
- DebugLoc DL) const {
- if (inClass(BF::ALLRegClass, DestReg, DestRC) &&
- inClass(BF::ALLRegClass, SrcReg, SrcRC)) {
- BuildMI(MBB, I, DL, get(BF::MOVE), DestReg).addReg(SrcReg);
- return true;
+void BlackfinInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I, DebugLoc DL,
+ unsigned DestReg, unsigned SrcReg,
+ bool KillSrc) const {
+ if (BF::ALLRegClass.contains(DestReg, SrcReg)) {
+ BuildMI(MBB, I, DL, get(BF::MOVE), DestReg)
+ .addReg(SrcReg, getKillRegState(KillSrc));
+ return;
}
- if (inClass(BF::D16RegClass, DestReg, DestRC) &&
- inClass(BF::D16RegClass, SrcReg, SrcRC)) {
- BuildMI(MBB, I, DL, get(BF::SLL16i), DestReg).addReg(SrcReg).addImm(0);
- return true;
+ if (BF::D16RegClass.contains(DestReg, SrcReg)) {
+ BuildMI(MBB, I, DL, get(BF::SLL16i), DestReg)
+ .addReg(SrcReg, getKillRegState(KillSrc))
+ .addImm(0);
+ return;
}
- if (inClass(BF::AnyCCRegClass, SrcReg, SrcRC) &&
- inClass(BF::DRegClass, DestReg, DestRC)) {
- if (inClass(BF::NotCCRegClass, SrcReg, SrcRC)) {
- BuildMI(MBB, I, DL, get(BF::MOVENCC_z), DestReg).addReg(SrcReg);
+ if (BF::DRegClass.contains(DestReg)) {
+ if (SrcReg == BF::NCC) {
+ BuildMI(MBB, I, DL, get(BF::MOVENCC_z), DestReg)
+ .addReg(SrcReg, getKillRegState(KillSrc));
BuildMI(MBB, I, DL, get(BF::BITTGL), DestReg).addReg(DestReg).addImm(0);
- } else {
- BuildMI(MBB, I, DL, get(BF::MOVECC_zext), DestReg).addReg(SrcReg);
+ return;
+ }
+ if (SrcReg == BF::CC) {
+ BuildMI(MBB, I, DL, get(BF::MOVECC_zext), DestReg)
+ .addReg(SrcReg, getKillRegState(KillSrc));
+ return;
}
- return true;
}
- if (inClass(BF::AnyCCRegClass, DestReg, DestRC) &&
- inClass(BF::DRegClass, SrcReg, SrcRC)) {
- if (inClass(BF::NotCCRegClass, DestReg, DestRC))
- BuildMI(MBB, I, DL, get(BF::SETEQri_not), DestReg).addReg(SrcReg);
- else
- BuildMI(MBB, I, DL, get(BF::MOVECC_nz), DestReg).addReg(SrcReg);
- return true;
+ if (BF::DRegClass.contains(SrcReg)) {
+ if (DestReg == BF::NCC) {
+ BuildMI(MBB, I, DL, get(BF::SETEQri_not), DestReg)
+ .addReg(SrcReg, getKillRegState(KillSrc)).addImm(0);
+ return;
+ }
+ if (DestReg == BF::CC) {
+ BuildMI(MBB, I, DL, get(BF::MOVECC_nz), DestReg)
+ .addReg(SrcReg, getKillRegState(KillSrc));
+ return;
+ }
}
- if (inClass(BF::NotCCRegClass, DestReg, DestRC) &&
- inClass(BF::JustCCRegClass, SrcReg, SrcRC)) {
- BuildMI(MBB, I, DL, get(BF::MOVE_ncccc), DestReg).addReg(SrcReg);
- return true;
+
+ if (DestReg == BF::NCC && SrcReg == BF::CC) {
+ BuildMI(MBB, I, DL, get(BF::MOVE_ncccc), DestReg)
+ .addReg(SrcReg, getKillRegState(KillSrc));
+ return;
}
- if (inClass(BF::JustCCRegClass, DestReg, DestRC) &&
- inClass(BF::NotCCRegClass, SrcReg, SrcRC)) {
- BuildMI(MBB, I, DL, get(BF::MOVE_ccncc), DestReg).addReg(SrcReg);
- return true;
+ if (DestReg == BF::CC && SrcReg == BF::NCC) {
+ BuildMI(MBB, I, DL, get(BF::MOVE_ccncc), DestReg)
+ .addReg(SrcReg, getKillRegState(KillSrc));
+ return;
}
- llvm_unreachable((std::string("Bad regclasses for reg-to-reg copy: ")+
- SrcRC->getName() + " -> " + DestRC->getName()).c_str());
- return false;
+ llvm_unreachable("Bad reg-to-reg copy");
+}
+
+static bool inClass(const TargetRegisterClass &Test,
+ unsigned Reg,
+ const TargetRegisterClass *RC) {
+ if (TargetRegisterInfo::isPhysicalRegister(Reg))
+ return Test.contains(Reg);
+ else
+ return &Test==RC || Test.hasSubClass(RC);
}
void
diff --git a/lib/Target/Blackfin/BlackfinInstrInfo.h b/lib/Target/Blackfin/BlackfinInstrInfo.h
index c1dcd58ef5ed..6c3591707269 100644
--- a/lib/Target/Blackfin/BlackfinInstrInfo.h
+++ b/lib/Target/Blackfin/BlackfinInstrInfo.h
@@ -44,14 +44,13 @@ namespace llvm {
InsertBranch(MachineBasicBlock &MBB,
MachineBasicBlock *TBB,
MachineBasicBlock *FBB,
- const SmallVectorImpl<MachineOperand> &Cond) const;
-
- virtual bool copyRegToReg(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator I,
- unsigned DestReg, unsigned SrcReg,
- const TargetRegisterClass *DestRC,
- const TargetRegisterClass *SrcRC,
- DebugLoc DL) const;
+ const SmallVectorImpl<MachineOperand> &Cond,
+ DebugLoc DL) const;
+
+ virtual void copyPhysReg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI, DebugLoc DL,
+ unsigned DestReg, unsigned SrcReg,
+ bool KillSrc) const;
virtual void storeRegToStackSlot(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI,
diff --git a/lib/Target/Blackfin/BlackfinInstrInfo.td b/lib/Target/Blackfin/BlackfinInstrInfo.td
index 5cf350a0cb4f..8034a7fd0fc2 100644
--- a/lib/Target/Blackfin/BlackfinInstrInfo.td
+++ b/lib/Target/Blackfin/BlackfinInstrInfo.td
@@ -488,7 +488,7 @@ def MOVE: F1<(outs ALL:$dst), (ins ALL:$src),
"$dst = $src;",
[]>;
-let isTwoAddress = 1 in
+let Constraints = "$src1 = $dst" in
def MOVEcc: F1<(outs DP:$dst), (ins DP:$src1, DP:$src2, AnyCC:$cc),
"if $cc $dst = $src2;",
[(set DP:$dst, (select AnyCC:$cc, DP:$src2, DP:$src1))]>;
@@ -645,7 +645,7 @@ def XOR: F1<(outs D:$dst), (ins D:$src1, D:$src2),
// Table C-15. Bit Operations Instructions
//===----------------------------------------------------------------------===//
-let isTwoAddress = 1 in {
+let Constraints = "$src1 = $dst" in {
def BITCLR: F1<(outs D:$dst), (ins D:$src1, uimm5imask:$src2),
"bitclr($dst, $src2);",
[(set D:$dst, (and D:$src1, uimm5imask:$src2))]>;
@@ -691,7 +691,7 @@ multiclass SHIFT32<SDNode opnode, string ops> {
}
let Defs = [AZ, AN, V, VS],
- isTwoAddress = 1 in {
+ Constraints = "$src = $dst" in {
defm SRA : SHIFT32<sra, ">>>">;
defm SRL : SHIFT32<srl, ">>">;
defm SLL : SHIFT32<shl, "<<">;
@@ -748,7 +748,7 @@ def ADD16: F2<(outs D16:$dst), (ins D16:$src1, D16:$src2),
"$dst = $src1 + $src2;",
[(set D16:$dst, (add D16:$src1, D16:$src2))]>;
-let isTwoAddress = 1 in
+let Constraints = "$src1 = $dst" in
def ADDimm7: F1<(outs D:$dst), (ins D:$src1, i32imm:$src2),
"$dst += $src2;",
[(set D:$dst, (add D:$src1, imm7:$src2))]>;
@@ -775,7 +775,7 @@ def NEG: F1<(outs D:$dst), (ins D:$src),
def ADDpp: F1<(outs P:$dst), (ins P:$src1, P:$src2),
"$dst = $src1 + $src2;", []>;
-let isTwoAddress = 1 in
+let Constraints = "$src1 = $dst" in
def ADDpp_imm7: F1<(outs P:$dst), (ins P:$src1, i32imm:$src2),
"$dst += $src2;", []>;
@@ -802,7 +802,7 @@ def MULhh32u: F2<(outs D:$dst), (ins D16:$src1, D16:$src2),
}
-let isTwoAddress = 1 in
+let Constraints = "$src1 = $dst" in
def MUL32: F1<(outs D:$dst), (ins D:$src1, D:$src2),
"$dst *= $src2;",
[(set D:$dst, (mul D:$src1, D:$src2))]>;
diff --git a/lib/Target/Blackfin/BlackfinRegisterInfo.cpp b/lib/Target/Blackfin/BlackfinRegisterInfo.cpp
index 5153aceb8f73..06e95de1587f 100644
--- a/lib/Target/Blackfin/BlackfinRegisterInfo.cpp
+++ b/lib/Target/Blackfin/BlackfinRegisterInfo.cpp
@@ -48,17 +48,6 @@ BlackfinRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
return CalleeSavedRegs;
}
-const TargetRegisterClass* const *BlackfinRegisterInfo::
-getCalleeSavedRegClasses(const MachineFunction *MF) const {
- using namespace BF;
- static const TargetRegisterClass * const CalleeSavedRegClasses[] = {
- &PRegClass,
- &DRegClass, &DRegClass, &DRegClass, &DRegClass,
- &PRegClass, &PRegClass, &PRegClass,
- 0 };
- return CalleeSavedRegClasses;
-}
-
BitVector
BlackfinRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
using namespace BF;
@@ -86,25 +75,6 @@ BlackfinRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
return Reserved;
}
-const TargetRegisterClass*
-BlackfinRegisterInfo::getPhysicalRegisterRegClass(unsigned reg, EVT VT) const {
- assert(isPhysicalRegister(reg) && "reg must be a physical register");
-
- // Pick the smallest register class of the right type that contains
- // this physreg.
- const TargetRegisterClass* BestRC = 0;
- for (regclass_iterator I = regclass_begin(), E = regclass_end();
- I != E; ++I) {
- const TargetRegisterClass* RC = *I;
- if ((VT == MVT::Other || RC->hasType(VT)) && RC->contains(reg) &&
- (!BestRC || RC->getNumRegs() < BestRC->getNumRegs()))
- BestRC = RC;
- }
-
- assert(BestRC && "Couldn't find the register class");
- return BestRC;
-}
-
// hasFP - Return true if the specified function should have a dedicated frame
// pointer register. This is true if the function has variable sized allocas or
// if frame pointer elimination is disabled.
diff --git a/lib/Target/Blackfin/BlackfinRegisterInfo.h b/lib/Target/Blackfin/BlackfinRegisterInfo.h
index 03c54507ff6e..ead0b4a73c83 100644
--- a/lib/Target/Blackfin/BlackfinRegisterInfo.h
+++ b/lib/Target/Blackfin/BlackfinRegisterInfo.h
@@ -33,9 +33,6 @@ namespace llvm {
/// Code Generation virtual methods...
const unsigned *getCalleeSavedRegs(const MachineFunction *MF = 0) const;
- const TargetRegisterClass* const*
- getCalleeSavedRegClasses(const MachineFunction *MF = 0) const;
-
BitVector getReservedRegs(const MachineFunction &MF) const;
// getSubReg implemented by tablegen
@@ -44,9 +41,6 @@ namespace llvm {
return &BF::PRegClass;
}
- const TargetRegisterClass *getPhysicalRegisterRegClass(unsigned reg,
- EVT VT) const;
-
bool hasFP(const MachineFunction &MF) const;
// bool hasReservedCallFrame(MachineFunction &MF) const;
diff --git a/lib/Target/CBackend/CBackend.cpp b/lib/Target/CBackend/CBackend.cpp
index 55b8aaa4179f..e8d8474b5be8 100644
--- a/lib/Target/CBackend/CBackend.cpp
+++ b/lib/Target/CBackend/CBackend.cpp
@@ -264,7 +264,7 @@ namespace {
//
static const AllocaInst *isDirectAlloca(const Value *V) {
const AllocaInst *AI = dyn_cast<AllocaInst>(V);
- if (!AI) return false;
+ if (!AI) return 0;
if (AI->isArrayAllocation())
return 0; // FIXME: we can also inline fixed size array allocas!
if (AI->getParent() != &AI->getParent()->getParent()->getEntryBlock())
@@ -2889,7 +2889,7 @@ void CWriter::visitCallInst(CallInst &I) {
bool hasByVal = I.hasByValArgument();
bool isStructRet = I.hasStructRetAttr();
if (isStructRet) {
- writeOperandDeref(I.getOperand(1));
+ writeOperandDeref(I.getArgOperand(0));
Out << " = ";
}
@@ -2944,8 +2944,8 @@ void CWriter::visitCallInst(CallInst &I) {
}
unsigned NumDeclaredParams = FTy->getNumParams();
-
- CallSite::arg_iterator AI = I.op_begin()+1, AE = I.op_end();
+ CallSite CS(&I);
+ CallSite::arg_iterator AI = CS.arg_begin(), AE = CS.arg_end();
unsigned ArgNo = 0;
if (isStructRet) { // Skip struct return argument.
++AI;
@@ -2999,7 +2999,7 @@ bool CWriter::visitBuiltinCall(CallInst &I, Intrinsic::ID ID,
Out << "0; ";
Out << "va_start(*(va_list*)";
- writeOperand(I.getOperand(1));
+ writeOperand(I.getArgOperand(0));
Out << ", ";
// Output the last argument to the enclosing function.
if (I.getParent()->getParent()->arg_empty())
@@ -3009,9 +3009,9 @@ bool CWriter::visitBuiltinCall(CallInst &I, Intrinsic::ID ID,
Out << ')';
return true;
case Intrinsic::vaend:
- if (!isa<ConstantPointerNull>(I.getOperand(1))) {
+ if (!isa<ConstantPointerNull>(I.getArgOperand(0))) {
Out << "0; va_end(*(va_list*)";
- writeOperand(I.getOperand(1));
+ writeOperand(I.getArgOperand(0));
Out << ')';
} else {
Out << "va_end(*(va_list*)0)";
@@ -3020,47 +3020,47 @@ bool CWriter::visitBuiltinCall(CallInst &I, Intrinsic::ID ID,
case Intrinsic::vacopy:
Out << "0; ";
Out << "va_copy(*(va_list*)";
- writeOperand(I.getOperand(1));
+ writeOperand(I.getArgOperand(0));
Out << ", *(va_list*)";
- writeOperand(I.getOperand(2));
+ writeOperand(I.getArgOperand(1));
Out << ')';
return true;
case Intrinsic::returnaddress:
Out << "__builtin_return_address(";
- writeOperand(I.getOperand(1));
+ writeOperand(I.getArgOperand(0));
Out << ')';
return true;
case Intrinsic::frameaddress:
Out << "__builtin_frame_address(";
- writeOperand(I.getOperand(1));
+ writeOperand(I.getArgOperand(0));
Out << ')';
return true;
case Intrinsic::powi:
Out << "__builtin_powi(";
- writeOperand(I.getOperand(1));
+ writeOperand(I.getArgOperand(0));
Out << ", ";
- writeOperand(I.getOperand(2));
+ writeOperand(I.getArgOperand(1));
Out << ')';
return true;
case Intrinsic::setjmp:
Out << "setjmp(*(jmp_buf*)";
- writeOperand(I.getOperand(1));
+ writeOperand(I.getArgOperand(0));
Out << ')';
return true;
case Intrinsic::longjmp:
Out << "longjmp(*(jmp_buf*)";
- writeOperand(I.getOperand(1));
+ writeOperand(I.getArgOperand(0));
Out << ", ";
- writeOperand(I.getOperand(2));
+ writeOperand(I.getArgOperand(1));
Out << ')';
return true;
case Intrinsic::prefetch:
Out << "LLVM_PREFETCH((const void *)";
- writeOperand(I.getOperand(1));
+ writeOperand(I.getArgOperand(0));
Out << ", ";
- writeOperand(I.getOperand(2));
+ writeOperand(I.getArgOperand(1));
Out << ", ";
- writeOperand(I.getOperand(3));
+ writeOperand(I.getArgOperand(2));
Out << ")";
return true;
case Intrinsic::stacksave:
@@ -3077,7 +3077,7 @@ bool CWriter::visitBuiltinCall(CallInst &I, Intrinsic::ID ID,
printType(Out, I.getType());
Out << ')';
// Multiple GCC builtins multiplex onto this intrinsic.
- switch (cast<ConstantInt>(I.getOperand(3))->getZExtValue()) {
+ switch (cast<ConstantInt>(I.getArgOperand(2))->getZExtValue()) {
default: llvm_unreachable("Invalid llvm.x86.sse.cmp!");
case 0: Out << "__builtin_ia32_cmpeq"; break;
case 1: Out << "__builtin_ia32_cmplt"; break;
@@ -3098,9 +3098,9 @@ bool CWriter::visitBuiltinCall(CallInst &I, Intrinsic::ID ID,
Out << 'd';
Out << "(";
- writeOperand(I.getOperand(1));
+ writeOperand(I.getArgOperand(0));
Out << ", ";
- writeOperand(I.getOperand(2));
+ writeOperand(I.getArgOperand(1));
Out << ")";
return true;
case Intrinsic::ppc_altivec_lvsl:
@@ -3108,7 +3108,7 @@ bool CWriter::visitBuiltinCall(CallInst &I, Intrinsic::ID ID,
printType(Out, I.getType());
Out << ')';
Out << "__builtin_altivec_lvsl(0, (void*)";
- writeOperand(I.getOperand(1));
+ writeOperand(I.getArgOperand(0));
Out << ")";
return true;
}
@@ -3221,7 +3221,7 @@ void CWriter::visitInlineAsm(CallInst &CI) {
DestVal = ResultVals[ValueCount].first;
DestValNo = ResultVals[ValueCount].second;
} else
- DestVal = CI.getOperand(ValueCount-ResultVals.size()+1);
+ DestVal = CI.getArgOperand(ValueCount-ResultVals.size());
if (I->isEarlyClobber)
C = "&"+C;
@@ -3255,7 +3255,7 @@ void CWriter::visitInlineAsm(CallInst &CI) {
}
assert(ValueCount >= ResultVals.size() && "Input can't refer to result");
- Value *SrcVal = CI.getOperand(ValueCount-ResultVals.size()+1);
+ Value *SrcVal = CI.getArgOperand(ValueCount-ResultVals.size());
Out << "\"" << C << "\"(";
if (!I->isIndirect)
diff --git a/lib/Target/CellSPU/SPUCallingConv.td b/lib/Target/CellSPU/SPUCallingConv.td
index 10dc837d90b7..ec2f663908f6 100644
--- a/lib/Target/CellSPU/SPUCallingConv.td
+++ b/lib/Target/CellSPU/SPUCallingConv.td
@@ -34,76 +34,19 @@ def RetCC_SPU : CallingConv<[
//===----------------------------------------------------------------------===//
// CellSPU Argument Calling Conventions
-// (note: this isn't used, but presumably should be at some point when other
-// targets do.)
//===----------------------------------------------------------------------===//
-/*
-def CC_SPU : CallingConv<[
- CCIfType<[i8], CCAssignToReg<[R3, R4, R5, R6, R7, R8, R9, R10, R11,
- R12, R13, R14, R15, R16, R17, R18, R19, R20,
- R21, R22, R23, R24, R25, R26, R27, R28, R29,
- R30, R31, R32, R33, R34, R35, R36, R37, R38,
- R39, R40, R41, R42, R43, R44, R45, R46, R47,
- R48, R49, R50, R51, R52, R53, R54, R55, R56,
- R57, R58, R59, R60, R61, R62, R63, R64, R65,
- R66, R67, R68, R69, R70, R71, R72, R73, R74,
- R75, R76, R77, R78, R79]>>,
- CCIfType<[i16], CCAssignToReg<[R3, R4, R5, R6, R7, R8, R9, R10, R11,
- R12, R13, R14, R15, R16, R17, R18, R19, R20,
- R21, R22, R23, R24, R25, R26, R27, R28, R29,
- R30, R31, R32, R33, R34, R35, R36, R37, R38,
- R39, R40, R41, R42, R43, R44, R45, R46, R47,
- R48, R49, R50, R51, R52, R53, R54, R55, R56,
- R57, R58, R59, R60, R61, R62, R63, R64, R65,
- R66, R67, R68, R69, R70, R71, R72, R73, R74,
- R75, R76, R77, R78, R79]>>,
- CCIfType<[i32], CCAssignToReg<[R3, R4, R5, R6, R7, R8, R9, R10, R11,
- R12, R13, R14, R15, R16, R17, R18, R19, R20,
- R21, R22, R23, R24, R25, R26, R27, R28, R29,
- R30, R31, R32, R33, R34, R35, R36, R37, R38,
- R39, R40, R41, R42, R43, R44, R45, R46, R47,
- R48, R49, R50, R51, R52, R53, R54, R55, R56,
- R57, R58, R59, R60, R61, R62, R63, R64, R65,
- R66, R67, R68, R69, R70, R71, R72, R73, R74,
- R75, R76, R77, R78, R79]>>,
- CCIfType<[f32], CCAssignToReg<[R3, R4, R5, R6, R7, R8, R9, R10, R11,
- R12, R13, R14, R15, R16, R17, R18, R19, R20,
- R21, R22, R23, R24, R25, R26, R27, R28, R29,
- R30, R31, R32, R33, R34, R35, R36, R37, R38,
- R39, R40, R41, R42, R43, R44, R45, R46, R47,
- R48, R49, R50, R51, R52, R53, R54, R55, R56,
- R57, R58, R59, R60, R61, R62, R63, R64, R65,
- R66, R67, R68, R69, R70, R71, R72, R73, R74,
- R75, R76, R77, R78, R79]>>,
- CCIfType<[i64], CCAssignToReg<[R3, R4, R5, R6, R7, R8, R9, R10, R11,
- R12, R13, R14, R15, R16, R17, R18, R19, R20,
- R21, R22, R23, R24, R25, R26, R27, R28, R29,
- R30, R31, R32, R33, R34, R35, R36, R37, R38,
- R39, R40, R41, R42, R43, R44, R45, R46, R47,
- R48, R49, R50, R51, R52, R53, R54, R55, R56,
- R57, R58, R59, R60, R61, R62, R63, R64, R65,
- R66, R67, R68, R69, R70, R71, R72, R73, R74,
- R75, R76, R77, R78, R79]>>,
- CCIfType<[f64], CCAssignToReg<[R3, R4, R5, R6, R7, R8, R9, R10, R11,
- R12, R13, R14, R15, R16, R17, R18, R19, R20,
- R21, R22, R23, R24, R25, R26, R27, R28, R29,
- R30, R31, R32, R33, R34, R35, R36, R37, R38,
- R39, R40, R41, R42, R43, R44, R45, R46, R47,
- R48, R49, R50, R51, R52, R53, R54, R55, R56,
- R57, R58, R59, R60, R61, R62, R63, R64, R65,
- R66, R67, R68, R69, R70, R71, R72, R73, R74,
- R75, R76, R77, R78, R79]>>,
- CCIfType<[v16i8, v8i16, v4i32, v4f32, v2i64, v2f64],
- CCAssignToReg<[R3, R4, R5, R6, R7, R8, R9, R10, R11,
- R12, R13, R14, R15, R16, R17, R18, R19, R20,
- R21, R22, R23, R24, R25, R26, R27, R28, R29,
- R30, R31, R32, R33, R34, R35, R36, R37, R38,
- R39, R40, R41, R42, R43, R44, R45, R46, R47,
- R48, R49, R50, R51, R52, R53, R54, R55, R56,
- R57, R58, R59, R60, R61, R62, R63, R64, R65,
- R66, R67, R68, R69, R70, R71, R72, R73, R74,
- R75, R76, R77, R78, R79]>>,
-
+def CCC_SPU : CallingConv<[
+ CCIfType<[i8, i16, i32, i64, i128, f32, f64,
+ v16i8, v8i16, v4i32, v4f32, v2i64, v2f64],
+ CCAssignToReg<[R3, R4, R5, R6, R7, R8, R9, R10, R11,
+ R12, R13, R14, R15, R16, R17, R18, R19, R20,
+ R21, R22, R23, R24, R25, R26, R27, R28, R29,
+ R30, R31, R32, R33, R34, R35, R36, R37, R38,
+ R39, R40, R41, R42, R43, R44, R45, R46, R47,
+ R48, R49, R50, R51, R52, R53, R54, R55, R56,
+ R57, R58, R59, R60, R61, R62, R63, R64, R65,
+ R66, R67, R68, R69, R70, R71, R72, R73, R74,
+ R75, R76, R77, R78, R79]>>,
// Integer/FP values get stored in stack slots that are 8 bytes in size and
// 8-byte aligned if there are no more registers to hold them.
CCIfType<[i32, i64, f32, f64], CCAssignToStack<8, 8>>,
@@ -112,4 +55,3 @@ def CC_SPU : CallingConv<[
CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],
CCAssignToStack<16, 16>>
]>;
-*/
diff --git a/lib/Target/CellSPU/SPUFrameInfo.h b/lib/Target/CellSPU/SPUFrameInfo.h
index e8ca333f0b69..f511acd64954 100644
--- a/lib/Target/CellSPU/SPUFrameInfo.h
+++ b/lib/Target/CellSPU/SPUFrameInfo.h
@@ -53,10 +53,6 @@ namespace llvm {
static int minStackSize() {
return (2 * stackSlotSize());
}
- //! Frame size required to spill all registers plus frame info
- static int fullSpillSize() {
- return (SPURegisterInfo::getNumArgRegs() * stackSlotSize());
- }
//! Convert frame index to stack offset
static int FItoStackOffset(int frame_index) {
return frame_index * stackSlotSize();
diff --git a/lib/Target/CellSPU/SPUISelDAGToDAG.cpp b/lib/Target/CellSPU/SPUISelDAGToDAG.cpp
index 9afdb2b97f30..9b8c2ddd0635 100644
--- a/lib/Target/CellSPU/SPUISelDAGToDAG.cpp
+++ b/lib/Target/CellSPU/SPUISelDAGToDAG.cpp
@@ -275,7 +275,6 @@ namespace {
SDNode *emitBuildVector(SDNode *bvNode) {
EVT vecVT = bvNode->getValueType(0);
- EVT eltVT = vecVT.getVectorElementType();
DebugLoc dl = bvNode->getDebugLoc();
// Check to see if this vector can be represented as a CellSPU immediate
@@ -606,18 +605,14 @@ SPUDAGToDAGISel::DFormAddressPredicate(SDNode *Op, SDValue N, SDValue &Base,
Base = CurDAG->getTargetConstant(0, N.getValueType());
Index = N;
return true;
- } else if (Opc == ISD::Register || Opc == ISD::CopyFromReg) {
+ } else if (Opc == ISD::Register
+ ||Opc == ISD::CopyFromReg
+ ||Opc == ISD::UNDEF) {
unsigned OpOpc = Op->getOpcode();
if (OpOpc == ISD::STORE || OpOpc == ISD::LOAD) {
// Direct load/store without getelementptr
- SDValue Addr, Offs;
-
- // Get the register from CopyFromReg
- if (Opc == ISD::CopyFromReg)
- Addr = N.getOperand(1);
- else
- Addr = N; // Register
+ SDValue Offs;
Offs = ((OpOpc == ISD::STORE) ? Op->getOperand(3) : Op->getOperand(2));
@@ -626,7 +621,7 @@ SPUDAGToDAGISel::DFormAddressPredicate(SDNode *Op, SDValue N, SDValue &Base,
Offs = CurDAG->getTargetConstant(0, Offs.getValueType());
Base = Offs;
- Index = Addr;
+ Index = N;
return true;
}
} else {
diff --git a/lib/Target/CellSPU/SPUISelLowering.cpp b/lib/Target/CellSPU/SPUISelLowering.cpp
index 081e8d0db0ee..ece19b9b89f6 100644
--- a/lib/Target/CellSPU/SPUISelLowering.cpp
+++ b/lib/Target/CellSPU/SPUISelLowering.cpp
@@ -953,7 +953,8 @@ LowerGlobalAddress(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
EVT PtrVT = Op.getValueType();
GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op);
const GlobalValue *GV = GSDN->getGlobal();
- SDValue GA = DAG.getTargetGlobalAddress(GV, PtrVT, GSDN->getOffset());
+ SDValue GA = DAG.getTargetGlobalAddress(GV, Op.getDebugLoc(),
+ PtrVT, GSDN->getOffset());
const TargetMachine &TM = DAG.getTarget();
SDValue Zero = DAG.getConstant(0, PtrVT);
// FIXME there is no actual debug info here
@@ -1013,22 +1014,26 @@ SPUTargetLowering::LowerFormalArguments(SDValue Chain,
MachineRegisterInfo &RegInfo = MF.getRegInfo();
SPUFunctionInfo *FuncInfo = MF.getInfo<SPUFunctionInfo>();
- const unsigned *ArgRegs = SPURegisterInfo::getArgRegs();
- const unsigned NumArgRegs = SPURegisterInfo::getNumArgRegs();
-
unsigned ArgOffset = SPUFrameInfo::minStackSize();
unsigned ArgRegIdx = 0;
unsigned StackSlotSize = SPUFrameInfo::stackSlotSize();
EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
+ SmallVector<CCValAssign, 16> ArgLocs;
+ CCState CCInfo(CallConv, isVarArg, getTargetMachine(), ArgLocs,
+ *DAG.getContext());
+ // FIXME: allow for other calling conventions
+ CCInfo.AnalyzeFormalArguments(Ins, CCC_SPU);
+
// Add DAG nodes to load the arguments or copy them out of registers.
for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e; ++ArgNo) {
EVT ObjectVT = Ins[ArgNo].VT;
unsigned ObjSize = ObjectVT.getSizeInBits()/8;
SDValue ArgVal;
+ CCValAssign &VA = ArgLocs[ArgNo];
- if (ArgRegIdx < NumArgRegs) {
+ if (VA.isRegLoc()) {
const TargetRegisterClass *ArgRegClass;
switch (ObjectVT.getSimpleVT().SimpleTy) {
@@ -1067,14 +1072,14 @@ SPUTargetLowering::LowerFormalArguments(SDValue Chain,
}
unsigned VReg = RegInfo.createVirtualRegister(ArgRegClass);
- RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg);
+ RegInfo.addLiveIn(VA.getLocReg(), VReg);
ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
++ArgRegIdx;
} else {
// We need to load the argument to a virtual register if we determined
// above that we ran out of physical registers of the appropriate type
// or we're forced to do vararg
- int FI = MFI->CreateFixedObject(ObjSize, ArgOffset, true, false);
+ int FI = MFI->CreateFixedObject(ObjSize, ArgOffset, true);
SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, NULL, 0, false, false, 0);
ArgOffset += StackSlotSize;
@@ -1087,16 +1092,31 @@ SPUTargetLowering::LowerFormalArguments(SDValue Chain,
// vararg handling:
if (isVarArg) {
- // unsigned int ptr_size = PtrVT.getSizeInBits() / 8;
+ // FIXME: we should be able to query the argument registers from
+ // tablegen generated code.
+ static const unsigned ArgRegs[] = {
+ SPU::R3, SPU::R4, SPU::R5, SPU::R6, SPU::R7, SPU::R8, SPU::R9,
+ SPU::R10, SPU::R11, SPU::R12, SPU::R13, SPU::R14, SPU::R15, SPU::R16,
+ SPU::R17, SPU::R18, SPU::R19, SPU::R20, SPU::R21, SPU::R22, SPU::R23,
+ SPU::R24, SPU::R25, SPU::R26, SPU::R27, SPU::R28, SPU::R29, SPU::R30,
+ SPU::R31, SPU::R32, SPU::R33, SPU::R34, SPU::R35, SPU::R36, SPU::R37,
+ SPU::R38, SPU::R39, SPU::R40, SPU::R41, SPU::R42, SPU::R43, SPU::R44,
+ SPU::R45, SPU::R46, SPU::R47, SPU::R48, SPU::R49, SPU::R50, SPU::R51,
+ SPU::R52, SPU::R53, SPU::R54, SPU::R55, SPU::R56, SPU::R57, SPU::R58,
+ SPU::R59, SPU::R60, SPU::R61, SPU::R62, SPU::R63, SPU::R64, SPU::R65,
+ SPU::R66, SPU::R67, SPU::R68, SPU::R69, SPU::R70, SPU::R71, SPU::R72,
+ SPU::R73, SPU::R74, SPU::R75, SPU::R76, SPU::R77, SPU::R78, SPU::R79
+ };
+ // size of ArgRegs array
+ unsigned NumArgRegs = 77;
+
// We will spill (79-3)+1 registers to the stack
SmallVector<SDValue, 79-3+1> MemOps;
// Create the frame slot
-
for (; ArgRegIdx != NumArgRegs; ++ArgRegIdx) {
FuncInfo->setVarArgsFrameIndex(
- MFI->CreateFixedObject(StackSlotSize, ArgOffset,
- true, false));
+ MFI->CreateFixedObject(StackSlotSize, ArgOffset, true));
SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
unsigned VReg = MF.addLiveIn(ArgRegs[ArgRegIdx], &SPU::R32CRegClass);
SDValue ArgVal = DAG.getRegister(VReg, MVT::v16i8);
@@ -1135,6 +1155,7 @@ SPUTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
CallingConv::ID CallConv, bool isVarArg,
bool &isTailCall,
const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
const SmallVectorImpl<ISD::InputArg> &Ins,
DebugLoc dl, SelectionDAG &DAG,
SmallVectorImpl<SDValue> &InVals) const {
@@ -1144,8 +1165,15 @@ SPUTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
const SPUSubtarget *ST = SPUTM.getSubtargetImpl();
unsigned NumOps = Outs.size();
unsigned StackSlotSize = SPUFrameInfo::stackSlotSize();
- const unsigned *ArgRegs = SPURegisterInfo::getArgRegs();
- const unsigned NumArgRegs = SPURegisterInfo::getNumArgRegs();
+
+ SmallVector<CCValAssign, 16> ArgLocs;
+ CCState CCInfo(CallConv, isVarArg, getTargetMachine(), ArgLocs,
+ *DAG.getContext());
+ // FIXME: allow for other calling conventions
+ CCInfo.AnalyzeCallOperands(Outs, CCC_SPU);
+
+ const unsigned NumArgRegs = ArgLocs.size();
+
// Handy pointer type
EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
@@ -1165,8 +1193,9 @@ SPUTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
// And the arguments passed on the stack
SmallVector<SDValue, 8> MemOpChains;
- for (unsigned i = 0; i != NumOps; ++i) {
- SDValue Arg = Outs[i].Val;
+ for (; ArgRegIdx != NumOps; ++ArgRegIdx) {
+ SDValue Arg = OutVals[ArgRegIdx];
+ CCValAssign &VA = ArgLocs[ArgRegIdx];
// PtrOff will be used to store the current argument to the stack if a
// register cannot be found for it.
@@ -1180,24 +1209,8 @@ SPUTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
case MVT::i32:
case MVT::i64:
case MVT::i128:
- if (ArgRegIdx != NumArgRegs) {
- RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
- } else {
- MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff, NULL, 0,
- false, false, 0));
- ArgOffset += StackSlotSize;
- }
- break;
case MVT::f32:
case MVT::f64:
- if (ArgRegIdx != NumArgRegs) {
- RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
- } else {
- MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff, NULL, 0,
- false, false, 0));
- ArgOffset += StackSlotSize;
- }
- break;
case MVT::v2i64:
case MVT::v2f64:
case MVT::v4f32:
@@ -1205,7 +1218,7 @@ SPUTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
case MVT::v8i16:
case MVT::v16i8:
if (ArgRegIdx != NumArgRegs) {
- RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
+ RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
} else {
MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff, NULL, 0,
false, false, 0));
@@ -1249,7 +1262,7 @@ SPUTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
const GlobalValue *GV = G->getGlobal();
EVT CalleeVT = Callee.getValueType();
SDValue Zero = DAG.getConstant(0, PtrVT);
- SDValue GA = DAG.getTargetGlobalAddress(GV, CalleeVT);
+ SDValue GA = DAG.getTargetGlobalAddress(GV, dl, CalleeVT);
if (!ST->usingLargeMem()) {
// Turn calls to targets that are defined (i.e., have bodies) into BRSL
@@ -1355,6 +1368,7 @@ SDValue
SPUTargetLowering::LowerReturn(SDValue Chain,
CallingConv::ID CallConv, bool isVarArg,
const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
DebugLoc dl, SelectionDAG &DAG) const {
SmallVector<CCValAssign, 16> RVLocs;
@@ -1376,7 +1390,7 @@ SPUTargetLowering::LowerReturn(SDValue Chain,
CCValAssign &VA = RVLocs[i];
assert(VA.isRegLoc() && "Can only return in registers!");
Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
- Outs[i].Val, Flag);
+ OutVals[i], Flag);
Flag = Chain.getValue(1);
}
@@ -1746,15 +1760,20 @@ static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
unsigned V0Elt = 0;
bool monotonic = true;
bool rotate = true;
+ EVT maskVT; // which of the c?d instructions to use
if (EltVT == MVT::i8) {
V2EltIdx0 = 16;
+ maskVT = MVT::v16i8;
} else if (EltVT == MVT::i16) {
V2EltIdx0 = 8;
+ maskVT = MVT::v8i16;
} else if (EltVT == MVT::i32 || EltVT == MVT::f32) {
V2EltIdx0 = 4;
+ maskVT = MVT::v4i32;
} else if (EltVT == MVT::i64 || EltVT == MVT::f64) {
V2EltIdx0 = 2;
+ maskVT = MVT::v2i64;
} else
llvm_unreachable("Unhandled vector type in LowerVECTOR_SHUFFLE");
@@ -1786,7 +1805,7 @@ static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
} else {
rotate = false;
}
- } else if (PrevElt == 0) {
+ } else if (i == 0) {
// First time through, need to keep track of previous element
PrevElt = SrcElt;
} else {
@@ -1798,18 +1817,16 @@ static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
if (EltsFromV2 == 1 && monotonic) {
// Compute mask and shuffle
- MachineFunction &MF = DAG.getMachineFunction();
- MachineRegisterInfo &RegInfo = MF.getRegInfo();
- unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
- // Initialize temporary register to 0
- SDValue InitTempReg =
- DAG.getCopyToReg(DAG.getEntryNode(), dl, VReg, DAG.getConstant(0, PtrVT));
- // Copy register's contents as index in SHUFFLE_MASK:
- SDValue ShufMaskOp =
- DAG.getNode(SPUISD::SHUFFLE_MASK, dl, MVT::v4i32,
- DAG.getTargetConstant(V2Elt, MVT::i32),
- DAG.getCopyFromReg(InitTempReg, dl, VReg, PtrVT));
+
+ // As SHUFFLE_MASK becomes a c?d instruction, feed it an address
+ // R1 ($sp) is used here only as it is guaranteed to have last bits zero
+ SDValue Pointer = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
+ DAG.getRegister(SPU::R1, PtrVT),
+ DAG.getConstant(V2Elt, MVT::i32));
+ SDValue ShufMaskOp = DAG.getNode(SPUISD::SHUFFLE_MASK, dl,
+ maskVT, Pointer);
+
// Use shuffle mask in SHUFB synthetic instruction:
return DAG.getNode(SPUISD::SHUFB, dl, V1.getValueType(), V2, V1,
ShufMaskOp);
@@ -2056,14 +2073,19 @@ static SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
DebugLoc dl = Op.getDebugLoc();
EVT VT = Op.getValueType();
- ConstantSDNode *CN = cast<ConstantSDNode>(IdxOp);
- assert(CN != 0 && "LowerINSERT_VECTOR_ELT: Index is not constant!");
+ // use 0 when the lane to insert to is 'undef'
+ int64_t Idx=0;
+ if (IdxOp.getOpcode() != ISD::UNDEF) {
+ ConstantSDNode *CN = cast<ConstantSDNode>(IdxOp);
+ assert(CN != 0 && "LowerINSERT_VECTOR_ELT: Index is not constant!");
+ Idx = (CN->getSExtValue());
+ }
EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
// Use $sp ($1) because it's always 16-byte aligned and it's available:
SDValue Pointer = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
DAG.getRegister(SPU::R1, PtrVT),
- DAG.getConstant(CN->getSExtValue(), PtrVT));
+ DAG.getConstant(Idx, PtrVT));
SDValue ShufMask = DAG.getNode(SPUISD::SHUFFLE_MASK, dl, VT, Pointer);
SDValue result =
@@ -2862,7 +2884,7 @@ SPUTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const
case SPUISD::IndirectAddr: {
if (!ST->usingLargeMem() && Op0.getOpcode() == SPUISD::AFormAddr) {
ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N->getOperand(1));
- if (CN != 0 && CN->getZExtValue() == 0) {
+ if (CN != 0 && CN->isNullValue()) {
// (SPUindirect (SPUaform <addr>, 0), 0) ->
// (SPUaform <addr>, 0)
@@ -3056,12 +3078,10 @@ SPUTargetLowering::ComputeNumSignBitsForTargetNode(SDValue Op,
void
SPUTargetLowering::LowerAsmOperandForConstraint(SDValue Op,
char ConstraintLetter,
- bool hasMemory,
std::vector<SDValue> &Ops,
SelectionDAG &DAG) const {
// Default, for the time being, to the base class handler
- TargetLowering::LowerAsmOperandForConstraint(Op, ConstraintLetter, hasMemory,
- Ops, DAG);
+ TargetLowering::LowerAsmOperandForConstraint(Op, ConstraintLetter, Ops, DAG);
}
/// isLegalAddressImmediate - Return true if the integer value can be used
diff --git a/lib/Target/CellSPU/SPUISelLowering.h b/lib/Target/CellSPU/SPUISelLowering.h
index 9ebd442b43c7..6d3c90b7512c 100644
--- a/lib/Target/CellSPU/SPUISelLowering.h
+++ b/lib/Target/CellSPU/SPUISelLowering.h
@@ -134,7 +134,6 @@ namespace llvm {
EVT VT) const;
void LowerAsmOperandForConstraint(SDValue Op, char ConstraintLetter,
- bool hasMemory,
std::vector<SDValue> &Ops,
SelectionDAG &DAG) const;
@@ -160,6 +159,7 @@ namespace llvm {
CallingConv::ID CallConv, bool isVarArg,
bool &isTailCall,
const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
const SmallVectorImpl<ISD::InputArg> &Ins,
DebugLoc dl, SelectionDAG &DAG,
SmallVectorImpl<SDValue> &InVals) const;
@@ -168,6 +168,7 @@ namespace llvm {
LowerReturn(SDValue Chain,
CallingConv::ID CallConv, bool isVarArg,
const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
DebugLoc dl, SelectionDAG &DAG) const;
};
}
diff --git a/lib/Target/CellSPU/SPUInstrInfo.cpp b/lib/Target/CellSPU/SPUInstrInfo.cpp
index 4c53c988d336..69aa0887bd77 100644
--- a/lib/Target/CellSPU/SPUInstrInfo.cpp
+++ b/lib/Target/CellSPU/SPUInstrInfo.cpp
@@ -164,11 +164,9 @@ SPUInstrInfo::isMoveInstr(const MachineInstr& MI,
MI.getOperand(0).isReg() &&
MI.getOperand(1).isReg() &&
"invalid SPU OR<type>_<vec> or LR instruction!");
- if (MI.getOperand(0).getReg() == MI.getOperand(1).getReg()) {
sourceReg = MI.getOperand(1).getReg();
destReg = MI.getOperand(0).getReg();
return true;
- }
break;
}
case SPU::ORv16i8:
@@ -251,40 +249,18 @@ SPUInstrInfo::isStoreToStackSlot(const MachineInstr *MI,
return 0;
}
-bool SPUInstrInfo::copyRegToReg(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MI,
- unsigned DestReg, unsigned SrcReg,
- const TargetRegisterClass *DestRC,
- const TargetRegisterClass *SrcRC,
- DebugLoc DL) const
+void SPUInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I, DebugLoc DL,
+ unsigned DestReg, unsigned SrcReg,
+ bool KillSrc) const
{
// We support cross register class moves for our aliases, such as R3 in any
// reg class to any other reg class containing R3. This is required because
// we instruction select bitconvert i64 -> f64 as a noop for example, so our
// types have no specific meaning.
- if (DestRC == SPU::R8CRegisterClass) {
- BuildMI(MBB, MI, DL, get(SPU::LRr8), DestReg).addReg(SrcReg);
- } else if (DestRC == SPU::R16CRegisterClass) {
- BuildMI(MBB, MI, DL, get(SPU::LRr16), DestReg).addReg(SrcReg);
- } else if (DestRC == SPU::R32CRegisterClass) {
- BuildMI(MBB, MI, DL, get(SPU::LRr32), DestReg).addReg(SrcReg);
- } else if (DestRC == SPU::R32FPRegisterClass) {
- BuildMI(MBB, MI, DL, get(SPU::LRf32), DestReg).addReg(SrcReg);
- } else if (DestRC == SPU::R64CRegisterClass) {
- BuildMI(MBB, MI, DL, get(SPU::LRr64), DestReg).addReg(SrcReg);
- } else if (DestRC == SPU::R64FPRegisterClass) {
- BuildMI(MBB, MI, DL, get(SPU::LRf64), DestReg).addReg(SrcReg);
- } else if (DestRC == SPU::GPRCRegisterClass) {
- BuildMI(MBB, MI, DL, get(SPU::LRr128), DestReg).addReg(SrcReg);
- } else if (DestRC == SPU::VECREGRegisterClass) {
- BuildMI(MBB, MI, DL, get(SPU::LRv16i8), DestReg).addReg(SrcReg);
- } else {
- // Attempt to copy unknown/unsupported register class!
- return false;
- }
-
- return true;
+ BuildMI(MBB, I, DL, get(SPU::LRr128), DestReg)
+ .addReg(SrcReg, getKillRegState(KillSrc));
}
void
@@ -356,88 +332,6 @@ SPUInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
addFrameReference(BuildMI(MBB, MI, DL, get(opc), DestReg), FrameIdx);
}
-//! Return true if the specified load or store can be folded
-bool
-SPUInstrInfo::canFoldMemoryOperand(const MachineInstr *MI,
- const SmallVectorImpl<unsigned> &Ops) const {
- if (Ops.size() != 1) return false;
-
- // Make sure this is a reg-reg copy.
- unsigned Opc = MI->getOpcode();
-
- switch (Opc) {
- case SPU::ORv16i8:
- case SPU::ORv8i16:
- case SPU::ORv4i32:
- case SPU::ORv2i64:
- case SPU::ORr8:
- case SPU::ORr16:
- case SPU::ORr32:
- case SPU::ORr64:
- case SPU::ORf32:
- case SPU::ORf64:
- if (MI->getOperand(1).getReg() == MI->getOperand(2).getReg())
- return true;
- break;
- }
-
- return false;
-}
-
-/// foldMemoryOperand - SPU, like PPC, can only fold spills into
-/// copy instructions, turning them into load/store instructions.
-MachineInstr *
-SPUInstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
- MachineInstr *MI,
- const SmallVectorImpl<unsigned> &Ops,
- int FrameIndex) const
-{
- if (Ops.size() != 1) return 0;
-
- unsigned OpNum = Ops[0];
- unsigned Opc = MI->getOpcode();
- MachineInstr *NewMI = 0;
-
- switch (Opc) {
- case SPU::ORv16i8:
- case SPU::ORv8i16:
- case SPU::ORv4i32:
- case SPU::ORv2i64:
- case SPU::ORr8:
- case SPU::ORr16:
- case SPU::ORr32:
- case SPU::ORr64:
- case SPU::ORf32:
- case SPU::ORf64:
- if (OpNum == 0) { // move -> store
- unsigned InReg = MI->getOperand(1).getReg();
- bool isKill = MI->getOperand(1).isKill();
- bool isUndef = MI->getOperand(1).isUndef();
- if (FrameIndex < SPUFrameInfo::maxFrameOffset()) {
- MachineInstrBuilder MIB = BuildMI(MF, MI->getDebugLoc(),
- get(SPU::STQDr32));
-
- MIB.addReg(InReg, getKillRegState(isKill) | getUndefRegState(isUndef));
- NewMI = addFrameReference(MIB, FrameIndex);
- }
- } else { // move -> load
- unsigned OutReg = MI->getOperand(0).getReg();
- bool isDead = MI->getOperand(0).isDead();
- bool isUndef = MI->getOperand(0).isUndef();
- MachineInstrBuilder MIB = BuildMI(MF, MI->getDebugLoc(), get(Opc));
-
- MIB.addReg(OutReg, RegState::Define | getDeadRegState(isDead) |
- getUndefRegState(isUndef));
- Opc = (FrameIndex < SPUFrameInfo::maxFrameOffset())
- ? SPU::STQDr32 : SPU::STQXr32;
- NewMI = addFrameReference(MIB, FrameIndex);
- break;
- }
- }
-
- return NewMI;
-}
-
//! Branch analysis
/*!
\note This code was kiped from PPC. There may be more branch analysis for
@@ -554,9 +448,8 @@ SPUInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
unsigned
SPUInstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
MachineBasicBlock *FBB,
- const SmallVectorImpl<MachineOperand> &Cond) const {
- // FIXME this should probably have a DebugLoc argument
- DebugLoc dl;
+ const SmallVectorImpl<MachineOperand> &Cond,
+ DebugLoc DL) const {
// Shouldn't be a fall through.
assert(TBB && "InsertBranch must not be told to insert a fallthrough");
assert((Cond.size() == 2 || Cond.size() == 0) &&
@@ -566,14 +459,14 @@ SPUInstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
if (FBB == 0) {
if (Cond.empty()) {
// Unconditional branch
- MachineInstrBuilder MIB = BuildMI(&MBB, dl, get(SPU::BR));
+ MachineInstrBuilder MIB = BuildMI(&MBB, DL, get(SPU::BR));
MIB.addMBB(TBB);
DEBUG(errs() << "Inserted one-way uncond branch: ");
DEBUG((*MIB).dump());
} else {
// Conditional branch
- MachineInstrBuilder MIB = BuildMI(&MBB, dl, get(Cond[0].getImm()));
+ MachineInstrBuilder MIB = BuildMI(&MBB, DL, get(Cond[0].getImm()));
MIB.addReg(Cond[1].getReg()).addMBB(TBB);
DEBUG(errs() << "Inserted one-way cond branch: ");
@@ -581,8 +474,8 @@ SPUInstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
}
return 1;
} else {
- MachineInstrBuilder MIB = BuildMI(&MBB, dl, get(Cond[0].getImm()));
- MachineInstrBuilder MIB2 = BuildMI(&MBB, dl, get(SPU::BR));
+ MachineInstrBuilder MIB = BuildMI(&MBB, DL, get(Cond[0].getImm()));
+ MachineInstrBuilder MIB2 = BuildMI(&MBB, DL, get(SPU::BR));
// Two-way Conditional Branch.
MIB.addReg(Cond[1].getReg()).addMBB(TBB);
diff --git a/lib/Target/CellSPU/SPUInstrInfo.h b/lib/Target/CellSPU/SPUInstrInfo.h
index 6dabd7c27273..fbb173318148 100644
--- a/lib/Target/CellSPU/SPUInstrInfo.h
+++ b/lib/Target/CellSPU/SPUInstrInfo.h
@@ -23,19 +23,6 @@ namespace llvm {
class SPUInstrInfo : public TargetInstrInfoImpl {
SPUTargetMachine &TM;
const SPURegisterInfo RI;
- protected:
- virtual MachineInstr* foldMemoryOperandImpl(MachineFunction &MF,
- MachineInstr* MI,
- const SmallVectorImpl<unsigned> &Ops,
- int FrameIndex) const;
-
- virtual MachineInstr* foldMemoryOperandImpl(MachineFunction &MF,
- MachineInstr* MI,
- const SmallVectorImpl<unsigned> &Ops,
- MachineInstr* LoadMI) const {
- return 0;
- }
-
public:
explicit SPUInstrInfo(SPUTargetMachine &tm);
@@ -56,12 +43,10 @@ namespace llvm {
unsigned isStoreToStackSlot(const MachineInstr *MI,
int &FrameIndex) const;
- virtual bool copyRegToReg(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MI,
- unsigned DestReg, unsigned SrcReg,
- const TargetRegisterClass *DestRC,
- const TargetRegisterClass *SrcRC,
- DebugLoc DL) const;
+ virtual void copyPhysReg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I, DebugLoc DL,
+ unsigned DestReg, unsigned SrcReg,
+ bool KillSrc) const;
//! Store a register to a stack slot, based on its register class.
virtual void storeRegToStackSlot(MachineBasicBlock &MBB,
@@ -77,11 +62,6 @@ namespace llvm {
const TargetRegisterClass *RC,
const TargetRegisterInfo *TRI) const;
- //! Return true if the specified load or store can be folded
- virtual
- bool canFoldMemoryOperand(const MachineInstr *MI,
- const SmallVectorImpl<unsigned> &Ops) const;
-
//! Reverses a branch's condition, returning false on success.
virtual
bool ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const;
@@ -94,8 +74,9 @@ namespace llvm {
virtual unsigned RemoveBranch(MachineBasicBlock &MBB) const;
virtual unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
- MachineBasicBlock *FBB,
- const SmallVectorImpl<MachineOperand> &Cond) const;
+ MachineBasicBlock *FBB,
+ const SmallVectorImpl<MachineOperand> &Cond,
+ DebugLoc DL) const;
};
}
diff --git a/lib/Target/CellSPU/SPUNodes.td b/lib/Target/CellSPU/SPUNodes.td
index 846c7edc02a2..647da3051d3d 100644
--- a/lib/Target/CellSPU/SPUNodes.td
+++ b/lib/Target/CellSPU/SPUNodes.td
@@ -21,7 +21,7 @@ def SPUshufmask : SDNode<"SPUISD::SHUFFLE_MASK", SPU_GenControl, []>;
def callseq_start : SDNode<"ISD::CALLSEQ_START", SDT_SPUCallSeq,
[SDNPHasChain, SDNPOutFlag]>;
def callseq_end : SDNode<"ISD::CALLSEQ_END", SDT_SPUCallSeq,
- [SDNPHasChain, SDNPOutFlag]>;
+ [SDNPHasChain, SDNPInFlag, SDNPOutFlag]>;
//===----------------------------------------------------------------------===//
// Operand constraints:
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/CellSPU/SPURegisterInfo.cpp b/lib/Target/CellSPU/SPURegisterInfo.cpp
index d8937ec5745c..f7cfa42f2a95 100644
--- a/lib/Target/CellSPU/SPURegisterInfo.cpp
+++ b/lib/Target/CellSPU/SPURegisterInfo.cpp
@@ -191,33 +191,6 @@ SPURegisterInfo::SPURegisterInfo(const SPUSubtarget &subtarget,
{
}
-// SPU's 128-bit registers used for argument passing:
-static const unsigned SPU_ArgRegs[] = {
- SPU::R3, SPU::R4, SPU::R5, SPU::R6, SPU::R7, SPU::R8, SPU::R9,
- SPU::R10, SPU::R11, SPU::R12, SPU::R13, SPU::R14, SPU::R15, SPU::R16,
- SPU::R17, SPU::R18, SPU::R19, SPU::R20, SPU::R21, SPU::R22, SPU::R23,
- SPU::R24, SPU::R25, SPU::R26, SPU::R27, SPU::R28, SPU::R29, SPU::R30,
- SPU::R31, SPU::R32, SPU::R33, SPU::R34, SPU::R35, SPU::R36, SPU::R37,
- SPU::R38, SPU::R39, SPU::R40, SPU::R41, SPU::R42, SPU::R43, SPU::R44,
- SPU::R45, SPU::R46, SPU::R47, SPU::R48, SPU::R49, SPU::R50, SPU::R51,
- SPU::R52, SPU::R53, SPU::R54, SPU::R55, SPU::R56, SPU::R57, SPU::R58,
- SPU::R59, SPU::R60, SPU::R61, SPU::R62, SPU::R63, SPU::R64, SPU::R65,
- SPU::R66, SPU::R67, SPU::R68, SPU::R69, SPU::R70, SPU::R71, SPU::R72,
- SPU::R73, SPU::R74, SPU::R75, SPU::R76, SPU::R77, SPU::R78, SPU::R79
-};
-
-const unsigned *
-SPURegisterInfo::getArgRegs()
-{
- return SPU_ArgRegs;
-}
-
-unsigned
-SPURegisterInfo::getNumArgRegs()
-{
- return sizeof(SPU_ArgRegs) / sizeof(SPU_ArgRegs[0]);
-}
-
/// getPointerRegClass - Return the register class to use to hold pointers.
/// This is used for addressing modes.
const TargetRegisterClass *
@@ -251,36 +224,6 @@ SPURegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const
return SPU_CalleeSaveRegs;
}
-const TargetRegisterClass* const*
-SPURegisterInfo::getCalleeSavedRegClasses(const MachineFunction *MF) const
-{
- // Cell ABI Calling Convention
- static const TargetRegisterClass * const SPU_CalleeSaveRegClasses[] = {
- &SPU::GPRCRegClass, &SPU::GPRCRegClass, &SPU::GPRCRegClass,
- &SPU::GPRCRegClass, &SPU::GPRCRegClass, &SPU::GPRCRegClass,
- &SPU::GPRCRegClass, &SPU::GPRCRegClass, &SPU::GPRCRegClass,
- &SPU::GPRCRegClass, &SPU::GPRCRegClass, &SPU::GPRCRegClass,
- &SPU::GPRCRegClass, &SPU::GPRCRegClass, &SPU::GPRCRegClass,
- &SPU::GPRCRegClass, &SPU::GPRCRegClass, &SPU::GPRCRegClass,
- &SPU::GPRCRegClass, &SPU::GPRCRegClass, &SPU::GPRCRegClass,
- &SPU::GPRCRegClass, &SPU::GPRCRegClass, &SPU::GPRCRegClass,
- &SPU::GPRCRegClass, &SPU::GPRCRegClass, &SPU::GPRCRegClass,
- &SPU::GPRCRegClass, &SPU::GPRCRegClass, &SPU::GPRCRegClass,
- &SPU::GPRCRegClass, &SPU::GPRCRegClass, &SPU::GPRCRegClass,
- &SPU::GPRCRegClass, &SPU::GPRCRegClass, &SPU::GPRCRegClass,
- &SPU::GPRCRegClass, &SPU::GPRCRegClass, &SPU::GPRCRegClass,
- &SPU::GPRCRegClass, &SPU::GPRCRegClass, &SPU::GPRCRegClass,
- &SPU::GPRCRegClass, &SPU::GPRCRegClass, &SPU::GPRCRegClass,
- &SPU::GPRCRegClass, &SPU::GPRCRegClass, &SPU::GPRCRegClass,
- &SPU::GPRCRegClass, /* environment pointer */
- &SPU::GPRCRegClass, /* stack pointer */
- &SPU::GPRCRegClass, /* link register */
- 0 /* end */
- };
-
- return SPU_CalleeSaveRegClasses;
-}
-
/*!
R0 (link register), R1 (stack pointer) and R2 (environment pointer -- this is
generally unused) are the Cell's reserved registers
diff --git a/lib/Target/CellSPU/SPURegisterInfo.h b/lib/Target/CellSPU/SPURegisterInfo.h
index 0a7031835696..7a6ae6d43c7e 100644
--- a/lib/Target/CellSPU/SPURegisterInfo.h
+++ b/lib/Target/CellSPU/SPURegisterInfo.h
@@ -49,10 +49,6 @@ namespace llvm {
//! Return the array of callee-saved registers
virtual const unsigned* getCalleeSavedRegs(const MachineFunction *MF) const;
- //! Return the register class array of the callee-saved registers
- virtual const TargetRegisterClass* const *
- getCalleeSavedRegClasses(const MachineFunction *MF) const;
-
//! Allow for scavenging, so we can get scratch registers when needed.
virtual bool requiresRegisterScavenging(const MachineFunction &MF) const
{ return true; }
@@ -90,15 +86,6 @@ namespace llvm {
// New methods added:
//------------------------------------------------------------------------
- //! Return the array of argument passing registers
- /*!
- \note The size of this array is returned by getArgRegsSize().
- */
- static const unsigned *getArgRegs();
-
- //! Return the size of the argument passing register array
- static unsigned getNumArgRegs();
-
//! Get DWARF debugging register number
int getDwarfRegNum(unsigned RegNum, bool isEH) const;
diff --git a/lib/Target/CppBackend/CPPBackend.cpp b/lib/Target/CppBackend/CPPBackend.cpp
index 45a0c84a4f01..145568adcd4a 100644
--- a/lib/Target/CppBackend/CPPBackend.cpp
+++ b/lib/Target/CppBackend/CPPBackend.cpp
@@ -99,11 +99,12 @@ namespace {
ValueSet DefinedValues;
ForwardRefMap ForwardRefs;
bool is_inline;
+ unsigned indent_level;
public:
static char ID;
explicit CppWriter(formatted_raw_ostream &o) :
- ModulePass(&ID), Out(o), uniqueNum(0), is_inline(false) {}
+ ModulePass(&ID), Out(o), uniqueNum(0), is_inline(false), indent_level(0){}
virtual const char *getPassName() const { return "C++ backend"; }
@@ -120,6 +121,11 @@ namespace {
void error(const std::string& msg);
+
+ formatted_raw_ostream& nl(formatted_raw_ostream &Out, int delta = 0);
+ inline void in() { indent_level++; }
+ inline void out() { if (indent_level >0) indent_level--; }
+
private:
void printLinkageType(GlobalValue::LinkageTypes LT);
void printVisibilityType(GlobalValue::VisibilityTypes VisTypes);
@@ -153,1857 +159,1856 @@ namespace {
void printModuleBody();
};
+} // end anonymous namespace.
+
+formatted_raw_ostream &CppWriter::nl(formatted_raw_ostream &Out, int delta) {
+ Out << '\n';
+ if (delta >= 0 || indent_level >= unsigned(-delta))
+ indent_level += delta;
+ Out.indent(indent_level);
+ return Out;
+}
+
+static inline void sanitize(std::string &str) {
+ for (size_t i = 0; i < str.length(); ++i)
+ if (!isalnum(str[i]) && str[i] != '_')
+ str[i] = '_';
+}
- static unsigned indent_level = 0;
- inline formatted_raw_ostream& nl(formatted_raw_ostream& Out, int delta = 0) {
- Out << "\n";
- if (delta >= 0 || indent_level >= unsigned(-delta))
- indent_level += delta;
- for (unsigned i = 0; i < indent_level; ++i)
- Out << " ";
- return Out;
+static std::string getTypePrefix(const Type *Ty) {
+ switch (Ty->getTypeID()) {
+ case Type::VoidTyID: return "void_";
+ case Type::IntegerTyID:
+ return "int" + utostr(cast<IntegerType>(Ty)->getBitWidth()) + "_";
+ case Type::FloatTyID: return "float_";
+ case Type::DoubleTyID: return "double_";
+ case Type::LabelTyID: return "label_";
+ case Type::FunctionTyID: return "func_";
+ case Type::StructTyID: return "struct_";
+ case Type::ArrayTyID: return "array_";
+ case Type::PointerTyID: return "ptr_";
+ case Type::VectorTyID: return "packed_";
+ case Type::OpaqueTyID: return "opaque_";
+ default: return "other_";
}
+ return "unknown_";
+}
- inline void in() { indent_level++; }
- inline void out() { if (indent_level >0) indent_level--; }
+// Looks up the type in the symbol table and returns a pointer to its name or
+// a null pointer if it wasn't found. Note that this isn't the same as the
+// Mode::getTypeName function which will return an empty string, not a null
+// pointer if the name is not found.
+static const std::string *
+findTypeName(const TypeSymbolTable& ST, const Type* Ty) {
+ TypeSymbolTable::const_iterator TI = ST.begin();
+ TypeSymbolTable::const_iterator TE = ST.end();
+ for (;TI != TE; ++TI)
+ if (TI->second == Ty)
+ return &(TI->first);
+ return 0;
+}
- inline void
- sanitize(std::string& str) {
- for (size_t i = 0; i < str.length(); ++i)
- if (!isalnum(str[i]) && str[i] != '_')
- str[i] = '_';
- }
+void CppWriter::error(const std::string& msg) {
+ report_fatal_error(msg);
+}
- inline std::string
- getTypePrefix(const Type* Ty ) {
- switch (Ty->getTypeID()) {
- case Type::VoidTyID: return "void_";
- case Type::IntegerTyID:
- return std::string("int") + utostr(cast<IntegerType>(Ty)->getBitWidth()) +
- "_";
- case Type::FloatTyID: return "float_";
- case Type::DoubleTyID: return "double_";
- case Type::LabelTyID: return "label_";
- case Type::FunctionTyID: return "func_";
- case Type::StructTyID: return "struct_";
- case Type::ArrayTyID: return "array_";
- case Type::PointerTyID: return "ptr_";
- case Type::VectorTyID: return "packed_";
- case Type::OpaqueTyID: return "opaque_";
- default: return "other_";
- }
- return "unknown_";
- }
-
- // Looks up the type in the symbol table and returns a pointer to its name or
- // a null pointer if it wasn't found. Note that this isn't the same as the
- // Mode::getTypeName function which will return an empty string, not a null
- // pointer if the name is not found.
- inline const std::string*
- findTypeName(const TypeSymbolTable& ST, const Type* Ty) {
- TypeSymbolTable::const_iterator TI = ST.begin();
- TypeSymbolTable::const_iterator TE = ST.end();
- for (;TI != TE; ++TI)
- if (TI->second == Ty)
- return &(TI->first);
- return 0;
- }
-
- void CppWriter::error(const std::string& msg) {
- report_fatal_error(msg);
- }
-
- // printCFP - Print a floating point constant .. very carefully :)
- // This makes sure that conversion to/from floating yields the same binary
- // result so that we don't lose precision.
- void CppWriter::printCFP(const ConstantFP *CFP) {
- bool ignored;
- APFloat APF = APFloat(CFP->getValueAPF()); // copy
- if (CFP->getType() == Type::getFloatTy(CFP->getContext()))
- APF.convert(APFloat::IEEEdouble, APFloat::rmNearestTiesToEven, &ignored);
- Out << "ConstantFP::get(mod->getContext(), ";
- Out << "APFloat(";
+// printCFP - Print a floating point constant .. very carefully :)
+// This makes sure that conversion to/from floating yields the same binary
+// result so that we don't lose precision.
+void CppWriter::printCFP(const ConstantFP *CFP) {
+ bool ignored;
+ APFloat APF = APFloat(CFP->getValueAPF()); // copy
+ if (CFP->getType() == Type::getFloatTy(CFP->getContext()))
+ APF.convert(APFloat::IEEEdouble, APFloat::rmNearestTiesToEven, &ignored);
+ Out << "ConstantFP::get(mod->getContext(), ";
+ Out << "APFloat(";
#if HAVE_PRINTF_A
- char Buffer[100];
- sprintf(Buffer, "%A", APF.convertToDouble());
- if ((!strncmp(Buffer, "0x", 2) ||
- !strncmp(Buffer, "-0x", 3) ||
- !strncmp(Buffer, "+0x", 3)) &&
- APF.bitwiseIsEqual(APFloat(atof(Buffer)))) {
- if (CFP->getType() == Type::getDoubleTy(CFP->getContext()))
- Out << "BitsToDouble(" << Buffer << ")";
- else
- Out << "BitsToFloat((float)" << Buffer << ")";
- Out << ")";
- } else {
+ char Buffer[100];
+ sprintf(Buffer, "%A", APF.convertToDouble());
+ if ((!strncmp(Buffer, "0x", 2) ||
+ !strncmp(Buffer, "-0x", 3) ||
+ !strncmp(Buffer, "+0x", 3)) &&
+ APF.bitwiseIsEqual(APFloat(atof(Buffer)))) {
+ if (CFP->getType() == Type::getDoubleTy(CFP->getContext()))
+ Out << "BitsToDouble(" << Buffer << ")";
+ else
+ Out << "BitsToFloat((float)" << Buffer << ")";
+ Out << ")";
+ } else {
#endif
- std::string StrVal = ftostr(CFP->getValueAPF());
-
- while (StrVal[0] == ' ')
- StrVal.erase(StrVal.begin());
-
- // Check to make sure that the stringized number is not some string like
- // "Inf" or NaN. Check that the string matches the "[-+]?[0-9]" regex.
- if (((StrVal[0] >= '0' && StrVal[0] <= '9') ||
- ((StrVal[0] == '-' || StrVal[0] == '+') &&
- (StrVal[1] >= '0' && StrVal[1] <= '9'))) &&
- (CFP->isExactlyValue(atof(StrVal.c_str())))) {
- if (CFP->getType() == Type::getDoubleTy(CFP->getContext()))
- Out << StrVal;
- else
- Out << StrVal << "f";
- } else if (CFP->getType() == Type::getDoubleTy(CFP->getContext()))
- Out << "BitsToDouble(0x"
- << utohexstr(CFP->getValueAPF().bitcastToAPInt().getZExtValue())
- << "ULL) /* " << StrVal << " */";
+ std::string StrVal = ftostr(CFP->getValueAPF());
+
+ while (StrVal[0] == ' ')
+ StrVal.erase(StrVal.begin());
+
+ // Check to make sure that the stringized number is not some string like
+ // "Inf" or NaN. Check that the string matches the "[-+]?[0-9]" regex.
+ if (((StrVal[0] >= '0' && StrVal[0] <= '9') ||
+ ((StrVal[0] == '-' || StrVal[0] == '+') &&
+ (StrVal[1] >= '0' && StrVal[1] <= '9'))) &&
+ (CFP->isExactlyValue(atof(StrVal.c_str())))) {
+ if (CFP->getType() == Type::getDoubleTy(CFP->getContext()))
+ Out << StrVal;
else
- Out << "BitsToFloat(0x"
- << utohexstr((uint32_t)CFP->getValueAPF().
- bitcastToAPInt().getZExtValue())
- << "U) /* " << StrVal << " */";
- Out << ")";
+ Out << StrVal << "f";
+ } else if (CFP->getType() == Type::getDoubleTy(CFP->getContext()))
+ Out << "BitsToDouble(0x"
+ << utohexstr(CFP->getValueAPF().bitcastToAPInt().getZExtValue())
+ << "ULL) /* " << StrVal << " */";
+ else
+ Out << "BitsToFloat(0x"
+ << utohexstr((uint32_t)CFP->getValueAPF().
+ bitcastToAPInt().getZExtValue())
+ << "U) /* " << StrVal << " */";
+ Out << ")";
#if HAVE_PRINTF_A
- }
+ }
#endif
- Out << ")";
+ Out << ")";
+}
+
+void CppWriter::printCallingConv(CallingConv::ID cc){
+ // Print the calling convention.
+ switch (cc) {
+ case CallingConv::C: Out << "CallingConv::C"; break;
+ case CallingConv::Fast: Out << "CallingConv::Fast"; break;
+ case CallingConv::Cold: Out << "CallingConv::Cold"; break;
+ case CallingConv::FirstTargetCC: Out << "CallingConv::FirstTargetCC"; break;
+ default: Out << cc; break;
}
+}
- void CppWriter::printCallingConv(CallingConv::ID cc){
- // Print the calling convention.
- switch (cc) {
- case CallingConv::C: Out << "CallingConv::C"; break;
- case CallingConv::Fast: Out << "CallingConv::Fast"; break;
- case CallingConv::Cold: Out << "CallingConv::Cold"; break;
- case CallingConv::FirstTargetCC: Out << "CallingConv::FirstTargetCC"; break;
- default: Out << cc; break;
- }
+void CppWriter::printLinkageType(GlobalValue::LinkageTypes LT) {
+ switch (LT) {
+ case GlobalValue::InternalLinkage:
+ Out << "GlobalValue::InternalLinkage"; break;
+ case GlobalValue::PrivateLinkage:
+ Out << "GlobalValue::PrivateLinkage"; break;
+ case GlobalValue::LinkerPrivateLinkage:
+ Out << "GlobalValue::LinkerPrivateLinkage"; break;
+ case GlobalValue::LinkerPrivateWeakLinkage:
+ Out << "GlobalValue::LinkerPrivateWeakLinkage"; break;
+ case GlobalValue::AvailableExternallyLinkage:
+ Out << "GlobalValue::AvailableExternallyLinkage "; break;
+ case GlobalValue::LinkOnceAnyLinkage:
+ Out << "GlobalValue::LinkOnceAnyLinkage "; break;
+ case GlobalValue::LinkOnceODRLinkage:
+ Out << "GlobalValue::LinkOnceODRLinkage "; break;
+ case GlobalValue::WeakAnyLinkage:
+ Out << "GlobalValue::WeakAnyLinkage"; break;
+ case GlobalValue::WeakODRLinkage:
+ Out << "GlobalValue::WeakODRLinkage"; break;
+ case GlobalValue::AppendingLinkage:
+ Out << "GlobalValue::AppendingLinkage"; break;
+ case GlobalValue::ExternalLinkage:
+ Out << "GlobalValue::ExternalLinkage"; break;
+ case GlobalValue::DLLImportLinkage:
+ Out << "GlobalValue::DLLImportLinkage"; break;
+ case GlobalValue::DLLExportLinkage:
+ Out << "GlobalValue::DLLExportLinkage"; break;
+ case GlobalValue::ExternalWeakLinkage:
+ Out << "GlobalValue::ExternalWeakLinkage"; break;
+ case GlobalValue::CommonLinkage:
+ Out << "GlobalValue::CommonLinkage"; break;
}
+}
- void CppWriter::printLinkageType(GlobalValue::LinkageTypes LT) {
- switch (LT) {
- case GlobalValue::InternalLinkage:
- Out << "GlobalValue::InternalLinkage"; break;
- case GlobalValue::PrivateLinkage:
- Out << "GlobalValue::PrivateLinkage"; break;
- case GlobalValue::LinkerPrivateLinkage:
- Out << "GlobalValue::LinkerPrivateLinkage"; break;
- case GlobalValue::AvailableExternallyLinkage:
- Out << "GlobalValue::AvailableExternallyLinkage "; break;
- case GlobalValue::LinkOnceAnyLinkage:
- Out << "GlobalValue::LinkOnceAnyLinkage "; break;
- case GlobalValue::LinkOnceODRLinkage:
- Out << "GlobalValue::LinkOnceODRLinkage "; break;
- case GlobalValue::WeakAnyLinkage:
- Out << "GlobalValue::WeakAnyLinkage"; break;
- case GlobalValue::WeakODRLinkage:
- Out << "GlobalValue::WeakODRLinkage"; break;
- case GlobalValue::AppendingLinkage:
- Out << "GlobalValue::AppendingLinkage"; break;
- case GlobalValue::ExternalLinkage:
- Out << "GlobalValue::ExternalLinkage"; break;
- case GlobalValue::DLLImportLinkage:
- Out << "GlobalValue::DLLImportLinkage"; break;
- case GlobalValue::DLLExportLinkage:
- Out << "GlobalValue::DLLExportLinkage"; break;
- case GlobalValue::ExternalWeakLinkage:
- Out << "GlobalValue::ExternalWeakLinkage"; break;
- case GlobalValue::CommonLinkage:
- Out << "GlobalValue::CommonLinkage"; break;
- }
+void CppWriter::printVisibilityType(GlobalValue::VisibilityTypes VisType) {
+ switch (VisType) {
+ default: llvm_unreachable("Unknown GVar visibility");
+ case GlobalValue::DefaultVisibility:
+ Out << "GlobalValue::DefaultVisibility";
+ break;
+ case GlobalValue::HiddenVisibility:
+ Out << "GlobalValue::HiddenVisibility";
+ break;
+ case GlobalValue::ProtectedVisibility:
+ Out << "GlobalValue::ProtectedVisibility";
+ break;
}
+}
- void CppWriter::printVisibilityType(GlobalValue::VisibilityTypes VisType) {
- switch (VisType) {
- default: llvm_unreachable("Unknown GVar visibility");
- case GlobalValue::DefaultVisibility:
- Out << "GlobalValue::DefaultVisibility";
- break;
- case GlobalValue::HiddenVisibility:
- Out << "GlobalValue::HiddenVisibility";
- break;
- case GlobalValue::ProtectedVisibility:
- Out << "GlobalValue::ProtectedVisibility";
- break;
+// printEscapedString - Print each character of the specified string, escaping
+// it if it is not printable or if it is an escape char.
+void CppWriter::printEscapedString(const std::string &Str) {
+ for (unsigned i = 0, e = Str.size(); i != e; ++i) {
+ unsigned char C = Str[i];
+ if (isprint(C) && C != '"' && C != '\\') {
+ Out << C;
+ } else {
+ Out << "\\x"
+ << (char) ((C/16 < 10) ? ( C/16 +'0') : ( C/16 -10+'A'))
+ << (char)(((C&15) < 10) ? ((C&15)+'0') : ((C&15)-10+'A'));
}
}
+}
- // printEscapedString - Print each character of the specified string, escaping
- // it if it is not printable or if it is an escape char.
- void CppWriter::printEscapedString(const std::string &Str) {
- for (unsigned i = 0, e = Str.size(); i != e; ++i) {
- unsigned char C = Str[i];
- if (isprint(C) && C != '"' && C != '\\') {
- Out << C;
- } else {
- Out << "\\x"
- << (char) ((C/16 < 10) ? ( C/16 +'0') : ( C/16 -10+'A'))
- << (char)(((C&15) < 10) ? ((C&15)+'0') : ((C&15)-10+'A'));
- }
+std::string CppWriter::getCppName(const Type* Ty) {
+ // First, handle the primitive types .. easy
+ if (Ty->isPrimitiveType() || Ty->isIntegerTy()) {
+ switch (Ty->getTypeID()) {
+ case Type::VoidTyID: return "Type::getVoidTy(mod->getContext())";
+ case Type::IntegerTyID: {
+ unsigned BitWidth = cast<IntegerType>(Ty)->getBitWidth();
+ return "IntegerType::get(mod->getContext(), " + utostr(BitWidth) + ")";
+ }
+ case Type::X86_FP80TyID: return "Type::getX86_FP80Ty(mod->getContext())";
+ case Type::FloatTyID: return "Type::getFloatTy(mod->getContext())";
+ case Type::DoubleTyID: return "Type::getDoubleTy(mod->getContext())";
+ case Type::LabelTyID: return "Type::getLabelTy(mod->getContext())";
+ default:
+ error("Invalid primitive type");
+ break;
}
+ // shouldn't be returned, but make it sensible
+ return "Type::getVoidTy(mod->getContext())";
}
- std::string CppWriter::getCppName(const Type* Ty) {
- // First, handle the primitive types .. easy
- if (Ty->isPrimitiveType() || Ty->isIntegerTy()) {
- switch (Ty->getTypeID()) {
- case Type::VoidTyID: return "Type::getVoidTy(mod->getContext())";
- case Type::IntegerTyID: {
- unsigned BitWidth = cast<IntegerType>(Ty)->getBitWidth();
- return "IntegerType::get(mod->getContext(), " + utostr(BitWidth) + ")";
- }
- case Type::X86_FP80TyID: return "Type::getX86_FP80Ty(mod->getContext())";
- case Type::FloatTyID: return "Type::getFloatTy(mod->getContext())";
- case Type::DoubleTyID: return "Type::getDoubleTy(mod->getContext())";
- case Type::LabelTyID: return "Type::getLabelTy(mod->getContext())";
- default:
- error("Invalid primitive type");
- break;
- }
- // shouldn't be returned, but make it sensible
- return "Type::getVoidTy(mod->getContext())";
- }
+ // Now, see if we've seen the type before and return that
+ TypeMap::iterator I = TypeNames.find(Ty);
+ if (I != TypeNames.end())
+ return I->second;
+
+ // Okay, let's build a new name for this type. Start with a prefix
+ const char* prefix = 0;
+ switch (Ty->getTypeID()) {
+ case Type::FunctionTyID: prefix = "FuncTy_"; break;
+ case Type::StructTyID: prefix = "StructTy_"; break;
+ case Type::ArrayTyID: prefix = "ArrayTy_"; break;
+ case Type::PointerTyID: prefix = "PointerTy_"; break;
+ case Type::OpaqueTyID: prefix = "OpaqueTy_"; break;
+ case Type::VectorTyID: prefix = "VectorTy_"; break;
+ default: prefix = "OtherTy_"; break; // prevent breakage
+ }
- // Now, see if we've seen the type before and return that
- TypeMap::iterator I = TypeNames.find(Ty);
- if (I != TypeNames.end())
- return I->second;
+ // See if the type has a name in the symboltable and build accordingly
+ const std::string* tName = findTypeName(TheModule->getTypeSymbolTable(), Ty);
+ std::string name;
+ if (tName)
+ name = std::string(prefix) + *tName;
+ else
+ name = std::string(prefix) + utostr(uniqueNum++);
+ sanitize(name);
+
+ // Save the name
+ return TypeNames[Ty] = name;
+}
- // Okay, let's build a new name for this type. Start with a prefix
- const char* prefix = 0;
- switch (Ty->getTypeID()) {
- case Type::FunctionTyID: prefix = "FuncTy_"; break;
- case Type::StructTyID: prefix = "StructTy_"; break;
- case Type::ArrayTyID: prefix = "ArrayTy_"; break;
- case Type::PointerTyID: prefix = "PointerTy_"; break;
- case Type::OpaqueTyID: prefix = "OpaqueTy_"; break;
- case Type::VectorTyID: prefix = "VectorTy_"; break;
- default: prefix = "OtherTy_"; break; // prevent breakage
- }
+void CppWriter::printCppName(const Type* Ty) {
+ printEscapedString(getCppName(Ty));
+}
- // See if the type has a name in the symboltable and build accordingly
- const std::string* tName = findTypeName(TheModule->getTypeSymbolTable(), Ty);
- std::string name;
- if (tName)
- name = std::string(prefix) + *tName;
- else
- name = std::string(prefix) + utostr(uniqueNum++);
- sanitize(name);
-
- // Save the name
- return TypeNames[Ty] = name;
- }
-
- void CppWriter::printCppName(const Type* Ty) {
- printEscapedString(getCppName(Ty));
- }
-
- std::string CppWriter::getCppName(const Value* val) {
- std::string name;
- ValueMap::iterator I = ValueNames.find(val);
- if (I != ValueNames.end() && I->first == val)
- return I->second;
-
- if (const GlobalVariable* GV = dyn_cast<GlobalVariable>(val)) {
- name = std::string("gvar_") +
- getTypePrefix(GV->getType()->getElementType());
- } else if (isa<Function>(val)) {
- name = std::string("func_");
- } else if (const Constant* C = dyn_cast<Constant>(val)) {
- name = std::string("const_") + getTypePrefix(C->getType());
- } else if (const Argument* Arg = dyn_cast<Argument>(val)) {
- if (is_inline) {
- unsigned argNum = std::distance(Arg->getParent()->arg_begin(),
- Function::const_arg_iterator(Arg)) + 1;
- name = std::string("arg_") + utostr(argNum);
- NameSet::iterator NI = UsedNames.find(name);
- if (NI != UsedNames.end())
- name += std::string("_") + utostr(uniqueNum++);
- UsedNames.insert(name);
- return ValueNames[val] = name;
- } else {
- name = getTypePrefix(val->getType());
- }
+std::string CppWriter::getCppName(const Value* val) {
+ std::string name;
+ ValueMap::iterator I = ValueNames.find(val);
+ if (I != ValueNames.end() && I->first == val)
+ return I->second;
+
+ if (const GlobalVariable* GV = dyn_cast<GlobalVariable>(val)) {
+ name = std::string("gvar_") +
+ getTypePrefix(GV->getType()->getElementType());
+ } else if (isa<Function>(val)) {
+ name = std::string("func_");
+ } else if (const Constant* C = dyn_cast<Constant>(val)) {
+ name = std::string("const_") + getTypePrefix(C->getType());
+ } else if (const Argument* Arg = dyn_cast<Argument>(val)) {
+ if (is_inline) {
+ unsigned argNum = std::distance(Arg->getParent()->arg_begin(),
+ Function::const_arg_iterator(Arg)) + 1;
+ name = std::string("arg_") + utostr(argNum);
+ NameSet::iterator NI = UsedNames.find(name);
+ if (NI != UsedNames.end())
+ name += std::string("_") + utostr(uniqueNum++);
+ UsedNames.insert(name);
+ return ValueNames[val] = name;
} else {
name = getTypePrefix(val->getType());
}
- if (val->hasName())
- name += val->getName();
- else
- name += utostr(uniqueNum++);
- sanitize(name);
- NameSet::iterator NI = UsedNames.find(name);
- if (NI != UsedNames.end())
- name += std::string("_") + utostr(uniqueNum++);
- UsedNames.insert(name);
- return ValueNames[val] = name;
+ } else {
+ name = getTypePrefix(val->getType());
}
+ if (val->hasName())
+ name += val->getName();
+ else
+ name += utostr(uniqueNum++);
+ sanitize(name);
+ NameSet::iterator NI = UsedNames.find(name);
+ if (NI != UsedNames.end())
+ name += std::string("_") + utostr(uniqueNum++);
+ UsedNames.insert(name);
+ return ValueNames[val] = name;
+}
- void CppWriter::printCppName(const Value* val) {
- printEscapedString(getCppName(val));
- }
+void CppWriter::printCppName(const Value* val) {
+ printEscapedString(getCppName(val));
+}
- void CppWriter::printAttributes(const AttrListPtr &PAL,
- const std::string &name) {
- Out << "AttrListPtr " << name << "_PAL;";
- nl(Out);
- if (!PAL.isEmpty()) {
- Out << '{'; in(); nl(Out);
- Out << "SmallVector<AttributeWithIndex, 4> Attrs;"; nl(Out);
- Out << "AttributeWithIndex PAWI;"; nl(Out);
- for (unsigned i = 0; i < PAL.getNumSlots(); ++i) {
- unsigned index = PAL.getSlot(i).Index;
- Attributes attrs = PAL.getSlot(i).Attrs;
- Out << "PAWI.Index = " << index << "U; PAWI.Attrs = 0 ";
+void CppWriter::printAttributes(const AttrListPtr &PAL,
+ const std::string &name) {
+ Out << "AttrListPtr " << name << "_PAL;";
+ nl(Out);
+ if (!PAL.isEmpty()) {
+ Out << '{'; in(); nl(Out);
+ Out << "SmallVector<AttributeWithIndex, 4> Attrs;"; nl(Out);
+ Out << "AttributeWithIndex PAWI;"; nl(Out);
+ for (unsigned i = 0; i < PAL.getNumSlots(); ++i) {
+ unsigned index = PAL.getSlot(i).Index;
+ Attributes attrs = PAL.getSlot(i).Attrs;
+ Out << "PAWI.Index = " << index << "U; PAWI.Attrs = 0 ";
#define HANDLE_ATTR(X) \
- if (attrs & Attribute::X) \
- Out << " | Attribute::" #X; \
- attrs &= ~Attribute::X;
-
- HANDLE_ATTR(SExt);
- HANDLE_ATTR(ZExt);
- HANDLE_ATTR(NoReturn);
- HANDLE_ATTR(InReg);
- HANDLE_ATTR(StructRet);
- HANDLE_ATTR(NoUnwind);
- HANDLE_ATTR(NoAlias);
- HANDLE_ATTR(ByVal);
- HANDLE_ATTR(Nest);
- HANDLE_ATTR(ReadNone);
- HANDLE_ATTR(ReadOnly);
- HANDLE_ATTR(InlineHint);
- HANDLE_ATTR(NoInline);
- HANDLE_ATTR(AlwaysInline);
- HANDLE_ATTR(OptimizeForSize);
- HANDLE_ATTR(StackProtect);
- HANDLE_ATTR(StackProtectReq);
- HANDLE_ATTR(NoCapture);
+ if (attrs & Attribute::X) \
+ Out << " | Attribute::" #X; \
+ attrs &= ~Attribute::X;
+
+ HANDLE_ATTR(SExt);
+ HANDLE_ATTR(ZExt);
+ HANDLE_ATTR(NoReturn);
+ HANDLE_ATTR(InReg);
+ HANDLE_ATTR(StructRet);
+ HANDLE_ATTR(NoUnwind);
+ HANDLE_ATTR(NoAlias);
+ HANDLE_ATTR(ByVal);
+ HANDLE_ATTR(Nest);
+ HANDLE_ATTR(ReadNone);
+ HANDLE_ATTR(ReadOnly);
+ HANDLE_ATTR(InlineHint);
+ HANDLE_ATTR(NoInline);
+ HANDLE_ATTR(AlwaysInline);
+ HANDLE_ATTR(OptimizeForSize);
+ HANDLE_ATTR(StackProtect);
+ HANDLE_ATTR(StackProtectReq);
+ HANDLE_ATTR(NoCapture);
#undef HANDLE_ATTR
- assert(attrs == 0 && "Unhandled attribute!");
- Out << ";";
- nl(Out);
- Out << "Attrs.push_back(PAWI);";
- nl(Out);
- }
- Out << name << "_PAL = AttrListPtr::get(Attrs.begin(), Attrs.end());";
+ assert(attrs == 0 && "Unhandled attribute!");
+ Out << ";";
+ nl(Out);
+ Out << "Attrs.push_back(PAWI);";
nl(Out);
- out(); nl(Out);
- Out << '}'; nl(Out);
}
+ Out << name << "_PAL = AttrListPtr::get(Attrs.begin(), Attrs.end());";
+ nl(Out);
+ out(); nl(Out);
+ Out << '}'; nl(Out);
}
+}
- bool CppWriter::printTypeInternal(const Type* Ty) {
- // We don't print definitions for primitive types
- if (Ty->isPrimitiveType() || Ty->isIntegerTy())
- return false;
-
- // If we already defined this type, we don't need to define it again.
- if (DefinedTypes.find(Ty) != DefinedTypes.end())
- return false;
-
- // Everything below needs the name for the type so get it now.
- std::string typeName(getCppName(Ty));
-
- // Search the type stack for recursion. If we find it, then generate this
- // as an OpaqueType, but make sure not to do this multiple times because
- // the type could appear in multiple places on the stack. Once the opaque
- // definition is issued, it must not be re-issued. Consequently we have to
- // check the UnresolvedTypes list as well.
- TypeList::const_iterator TI = std::find(TypeStack.begin(), TypeStack.end(),
- Ty);
- if (TI != TypeStack.end()) {
- TypeMap::const_iterator I = UnresolvedTypes.find(Ty);
- if (I == UnresolvedTypes.end()) {
- Out << "PATypeHolder " << typeName;
- Out << "_fwd = OpaqueType::get(mod->getContext());";
- nl(Out);
- UnresolvedTypes[Ty] = typeName;
- }
- return true;
- }
+bool CppWriter::printTypeInternal(const Type* Ty) {
+ // We don't print definitions for primitive types
+ if (Ty->isPrimitiveType() || Ty->isIntegerTy())
+ return false;
- // We're going to print a derived type which, by definition, contains other
- // types. So, push this one we're printing onto the type stack to assist with
- // recursive definitions.
- TypeStack.push_back(Ty);
+ // If we already defined this type, we don't need to define it again.
+ if (DefinedTypes.find(Ty) != DefinedTypes.end())
+ return false;
- // Print the type definition
- switch (Ty->getTypeID()) {
- case Type::FunctionTyID: {
- const FunctionType* FT = cast<FunctionType>(Ty);
- Out << "std::vector<const Type*>" << typeName << "_args;";
+ // Everything below needs the name for the type so get it now.
+ std::string typeName(getCppName(Ty));
+
+ // Search the type stack for recursion. If we find it, then generate this
+ // as an OpaqueType, but make sure not to do this multiple times because
+ // the type could appear in multiple places on the stack. Once the opaque
+ // definition is issued, it must not be re-issued. Consequently we have to
+ // check the UnresolvedTypes list as well.
+ TypeList::const_iterator TI = std::find(TypeStack.begin(), TypeStack.end(),
+ Ty);
+ if (TI != TypeStack.end()) {
+ TypeMap::const_iterator I = UnresolvedTypes.find(Ty);
+ if (I == UnresolvedTypes.end()) {
+ Out << "PATypeHolder " << typeName;
+ Out << "_fwd = OpaqueType::get(mod->getContext());";
nl(Out);
- FunctionType::param_iterator PI = FT->param_begin();
- FunctionType::param_iterator PE = FT->param_end();
- for (; PI != PE; ++PI) {
- const Type* argTy = static_cast<const Type*>(*PI);
- bool isForward = printTypeInternal(argTy);
- std::string argName(getCppName(argTy));
- Out << typeName << "_args.push_back(" << argName;
- if (isForward)
- Out << "_fwd";
- Out << ");";
- nl(Out);
- }
- bool isForward = printTypeInternal(FT->getReturnType());
- std::string retTypeName(getCppName(FT->getReturnType()));
- Out << "FunctionType* " << typeName << " = FunctionType::get(";
- in(); nl(Out) << "/*Result=*/" << retTypeName;
+ UnresolvedTypes[Ty] = typeName;
+ }
+ return true;
+ }
+
+ // We're going to print a derived type which, by definition, contains other
+ // types. So, push this one we're printing onto the type stack to assist with
+ // recursive definitions.
+ TypeStack.push_back(Ty);
+
+ // Print the type definition
+ switch (Ty->getTypeID()) {
+ case Type::FunctionTyID: {
+ const FunctionType* FT = cast<FunctionType>(Ty);
+ Out << "std::vector<const Type*>" << typeName << "_args;";
+ nl(Out);
+ FunctionType::param_iterator PI = FT->param_begin();
+ FunctionType::param_iterator PE = FT->param_end();
+ for (; PI != PE; ++PI) {
+ const Type* argTy = static_cast<const Type*>(*PI);
+ bool isForward = printTypeInternal(argTy);
+ std::string argName(getCppName(argTy));
+ Out << typeName << "_args.push_back(" << argName;
if (isForward)
Out << "_fwd";
- Out << ",";
- nl(Out) << "/*Params=*/" << typeName << "_args,";
- nl(Out) << "/*isVarArg=*/" << (FT->isVarArg() ? "true" : "false") << ");";
- out();
- nl(Out);
- break;
- }
- case Type::StructTyID: {
- const StructType* ST = cast<StructType>(Ty);
- Out << "std::vector<const Type*>" << typeName << "_fields;";
- nl(Out);
- StructType::element_iterator EI = ST->element_begin();
- StructType::element_iterator EE = ST->element_end();
- for (; EI != EE; ++EI) {
- const Type* fieldTy = static_cast<const Type*>(*EI);
- bool isForward = printTypeInternal(fieldTy);
- std::string fieldName(getCppName(fieldTy));
- Out << typeName << "_fields.push_back(" << fieldName;
- if (isForward)
- Out << "_fwd";
- Out << ");";
- nl(Out);
- }
- Out << "StructType* " << typeName << " = StructType::get("
- << "mod->getContext(), "
- << typeName << "_fields, /*isPacked=*/"
- << (ST->isPacked() ? "true" : "false") << ");";
- nl(Out);
- break;
- }
- case Type::ArrayTyID: {
- const ArrayType* AT = cast<ArrayType>(Ty);
- const Type* ET = AT->getElementType();
- bool isForward = printTypeInternal(ET);
- std::string elemName(getCppName(ET));
- Out << "ArrayType* " << typeName << " = ArrayType::get("
- << elemName << (isForward ? "_fwd" : "")
- << ", " << utostr(AT->getNumElements()) << ");";
- nl(Out);
- break;
- }
- case Type::PointerTyID: {
- const PointerType* PT = cast<PointerType>(Ty);
- const Type* ET = PT->getElementType();
- bool isForward = printTypeInternal(ET);
- std::string elemName(getCppName(ET));
- Out << "PointerType* " << typeName << " = PointerType::get("
- << elemName << (isForward ? "_fwd" : "")
- << ", " << utostr(PT->getAddressSpace()) << ");";
- nl(Out);
- break;
- }
- case Type::VectorTyID: {
- const VectorType* PT = cast<VectorType>(Ty);
- const Type* ET = PT->getElementType();
- bool isForward = printTypeInternal(ET);
- std::string elemName(getCppName(ET));
- Out << "VectorType* " << typeName << " = VectorType::get("
- << elemName << (isForward ? "_fwd" : "")
- << ", " << utostr(PT->getNumElements()) << ");";
- nl(Out);
- break;
- }
- case Type::OpaqueTyID: {
- Out << "OpaqueType* " << typeName;
- Out << " = OpaqueType::get(mod->getContext());";
+ Out << ");";
nl(Out);
- break;
- }
- default:
- error("Invalid TypeID");
}
-
- // If the type had a name, make sure we recreate it.
- const std::string* progTypeName =
- findTypeName(TheModule->getTypeSymbolTable(),Ty);
- if (progTypeName) {
- Out << "mod->addTypeName(\"" << *progTypeName << "\", "
- << typeName << ");";
+ bool isForward = printTypeInternal(FT->getReturnType());
+ std::string retTypeName(getCppName(FT->getReturnType()));
+ Out << "FunctionType* " << typeName << " = FunctionType::get(";
+ in(); nl(Out) << "/*Result=*/" << retTypeName;
+ if (isForward)
+ Out << "_fwd";
+ Out << ",";
+ nl(Out) << "/*Params=*/" << typeName << "_args,";
+ nl(Out) << "/*isVarArg=*/" << (FT->isVarArg() ? "true" : "false") << ");";
+ out();
+ nl(Out);
+ break;
+ }
+ case Type::StructTyID: {
+ const StructType* ST = cast<StructType>(Ty);
+ Out << "std::vector<const Type*>" << typeName << "_fields;";
+ nl(Out);
+ StructType::element_iterator EI = ST->element_begin();
+ StructType::element_iterator EE = ST->element_end();
+ for (; EI != EE; ++EI) {
+ const Type* fieldTy = static_cast<const Type*>(*EI);
+ bool isForward = printTypeInternal(fieldTy);
+ std::string fieldName(getCppName(fieldTy));
+ Out << typeName << "_fields.push_back(" << fieldName;
+ if (isForward)
+ Out << "_fwd";
+ Out << ");";
nl(Out);
}
+ Out << "StructType* " << typeName << " = StructType::get("
+ << "mod->getContext(), "
+ << typeName << "_fields, /*isPacked=*/"
+ << (ST->isPacked() ? "true" : "false") << ");";
+ nl(Out);
+ break;
+ }
+ case Type::ArrayTyID: {
+ const ArrayType* AT = cast<ArrayType>(Ty);
+ const Type* ET = AT->getElementType();
+ bool isForward = printTypeInternal(ET);
+ std::string elemName(getCppName(ET));
+ Out << "ArrayType* " << typeName << " = ArrayType::get("
+ << elemName << (isForward ? "_fwd" : "")
+ << ", " << utostr(AT->getNumElements()) << ");";
+ nl(Out);
+ break;
+ }
+ case Type::PointerTyID: {
+ const PointerType* PT = cast<PointerType>(Ty);
+ const Type* ET = PT->getElementType();
+ bool isForward = printTypeInternal(ET);
+ std::string elemName(getCppName(ET));
+ Out << "PointerType* " << typeName << " = PointerType::get("
+ << elemName << (isForward ? "_fwd" : "")
+ << ", " << utostr(PT->getAddressSpace()) << ");";
+ nl(Out);
+ break;
+ }
+ case Type::VectorTyID: {
+ const VectorType* PT = cast<VectorType>(Ty);
+ const Type* ET = PT->getElementType();
+ bool isForward = printTypeInternal(ET);
+ std::string elemName(getCppName(ET));
+ Out << "VectorType* " << typeName << " = VectorType::get("
+ << elemName << (isForward ? "_fwd" : "")
+ << ", " << utostr(PT->getNumElements()) << ");";
+ nl(Out);
+ break;
+ }
+ case Type::OpaqueTyID: {
+ Out << "OpaqueType* " << typeName;
+ Out << " = OpaqueType::get(mod->getContext());";
+ nl(Out);
+ break;
+ }
+ default:
+ error("Invalid TypeID");
+ }
- // Pop us off the type stack
- TypeStack.pop_back();
+ // If the type had a name, make sure we recreate it.
+ const std::string* progTypeName =
+ findTypeName(TheModule->getTypeSymbolTable(),Ty);
+ if (progTypeName) {
+ Out << "mod->addTypeName(\"" << *progTypeName << "\", "
+ << typeName << ");";
+ nl(Out);
+ }
- // Indicate that this type is now defined.
- DefinedTypes.insert(Ty);
+ // Pop us off the type stack
+ TypeStack.pop_back();
- // Early resolve as many unresolved types as possible. Search the unresolved
- // types map for the type we just printed. Now that its definition is complete
- // we can resolve any previous references to it. This prevents a cascade of
- // unresolved types.
- TypeMap::iterator I = UnresolvedTypes.find(Ty);
- if (I != UnresolvedTypes.end()) {
- Out << "cast<OpaqueType>(" << I->second
- << "_fwd.get())->refineAbstractTypeTo(" << I->second << ");";
- nl(Out);
- Out << I->second << " = cast<";
- switch (Ty->getTypeID()) {
- case Type::FunctionTyID: Out << "FunctionType"; break;
- case Type::ArrayTyID: Out << "ArrayType"; break;
- case Type::StructTyID: Out << "StructType"; break;
- case Type::VectorTyID: Out << "VectorType"; break;
- case Type::PointerTyID: Out << "PointerType"; break;
- case Type::OpaqueTyID: Out << "OpaqueType"; break;
- default: Out << "NoSuchDerivedType"; break;
- }
- Out << ">(" << I->second << "_fwd.get());";
- nl(Out); nl(Out);
- UnresolvedTypes.erase(I);
- }
+ // Indicate that this type is now defined.
+ DefinedTypes.insert(Ty);
- // Finally, separate the type definition from other with a newline.
+ // Early resolve as many unresolved types as possible. Search the unresolved
+ // types map for the type we just printed. Now that its definition is complete
+ // we can resolve any previous references to it. This prevents a cascade of
+ // unresolved types.
+ TypeMap::iterator I = UnresolvedTypes.find(Ty);
+ if (I != UnresolvedTypes.end()) {
+ Out << "cast<OpaqueType>(" << I->second
+ << "_fwd.get())->refineAbstractTypeTo(" << I->second << ");";
nl(Out);
-
- // We weren't a recursive type
- return false;
+ Out << I->second << " = cast<";
+ switch (Ty->getTypeID()) {
+ case Type::FunctionTyID: Out << "FunctionType"; break;
+ case Type::ArrayTyID: Out << "ArrayType"; break;
+ case Type::StructTyID: Out << "StructType"; break;
+ case Type::VectorTyID: Out << "VectorType"; break;
+ case Type::PointerTyID: Out << "PointerType"; break;
+ case Type::OpaqueTyID: Out << "OpaqueType"; break;
+ default: Out << "NoSuchDerivedType"; break;
+ }
+ Out << ">(" << I->second << "_fwd.get());";
+ nl(Out); nl(Out);
+ UnresolvedTypes.erase(I);
}
- // Prints a type definition. Returns true if it could not resolve all the
- // types in the definition but had to use a forward reference.
- void CppWriter::printType(const Type* Ty) {
- assert(TypeStack.empty());
- TypeStack.clear();
- printTypeInternal(Ty);
- assert(TypeStack.empty());
- }
-
- void CppWriter::printTypes(const Module* M) {
- // Walk the symbol table and print out all its types
- const TypeSymbolTable& symtab = M->getTypeSymbolTable();
- for (TypeSymbolTable::const_iterator TI = symtab.begin(), TE = symtab.end();
- TI != TE; ++TI) {
-
- // For primitive types and types already defined, just add a name
- TypeMap::const_iterator TNI = TypeNames.find(TI->second);
- if (TI->second->isIntegerTy() || TI->second->isPrimitiveType() ||
- TNI != TypeNames.end()) {
- Out << "mod->addTypeName(\"";
- printEscapedString(TI->first);
- Out << "\", " << getCppName(TI->second) << ");";
- nl(Out);
- // For everything else, define the type
- } else {
- printType(TI->second);
- }
- }
+ // Finally, separate the type definition from other with a newline.
+ nl(Out);
- // Add all of the global variables to the value table...
- for (Module::const_global_iterator I = TheModule->global_begin(),
- E = TheModule->global_end(); I != E; ++I) {
- if (I->hasInitializer())
- printType(I->getInitializer()->getType());
- printType(I->getType());
+ // We weren't a recursive type
+ return false;
+}
+
+// Prints a type definition. Returns true if it could not resolve all the
+// types in the definition but had to use a forward reference.
+void CppWriter::printType(const Type* Ty) {
+ assert(TypeStack.empty());
+ TypeStack.clear();
+ printTypeInternal(Ty);
+ assert(TypeStack.empty());
+}
+
+void CppWriter::printTypes(const Module* M) {
+ // Walk the symbol table and print out all its types
+ const TypeSymbolTable& symtab = M->getTypeSymbolTable();
+ for (TypeSymbolTable::const_iterator TI = symtab.begin(), TE = symtab.end();
+ TI != TE; ++TI) {
+
+ // For primitive types and types already defined, just add a name
+ TypeMap::const_iterator TNI = TypeNames.find(TI->second);
+ if (TI->second->isIntegerTy() || TI->second->isPrimitiveType() ||
+ TNI != TypeNames.end()) {
+ Out << "mod->addTypeName(\"";
+ printEscapedString(TI->first);
+ Out << "\", " << getCppName(TI->second) << ");";
+ nl(Out);
+ // For everything else, define the type
+ } else {
+ printType(TI->second);
}
+ }
- // Add all the functions to the table
- for (Module::const_iterator FI = TheModule->begin(), FE = TheModule->end();
- FI != FE; ++FI) {
- printType(FI->getReturnType());
- printType(FI->getFunctionType());
- // Add all the function arguments
- for (Function::const_arg_iterator AI = FI->arg_begin(),
- AE = FI->arg_end(); AI != AE; ++AI) {
- printType(AI->getType());
- }
+ // Add all of the global variables to the value table...
+ for (Module::const_global_iterator I = TheModule->global_begin(),
+ E = TheModule->global_end(); I != E; ++I) {
+ if (I->hasInitializer())
+ printType(I->getInitializer()->getType());
+ printType(I->getType());
+ }
- // Add all of the basic blocks and instructions
- for (Function::const_iterator BB = FI->begin(),
- E = FI->end(); BB != E; ++BB) {
- printType(BB->getType());
- for (BasicBlock::const_iterator I = BB->begin(), E = BB->end(); I!=E;
- ++I) {
- printType(I->getType());
- for (unsigned i = 0; i < I->getNumOperands(); ++i)
- printType(I->getOperand(i)->getType());
- }
+ // Add all the functions to the table
+ for (Module::const_iterator FI = TheModule->begin(), FE = TheModule->end();
+ FI != FE; ++FI) {
+ printType(FI->getReturnType());
+ printType(FI->getFunctionType());
+ // Add all the function arguments
+ for (Function::const_arg_iterator AI = FI->arg_begin(),
+ AE = FI->arg_end(); AI != AE; ++AI) {
+ printType(AI->getType());
+ }
+
+ // Add all of the basic blocks and instructions
+ for (Function::const_iterator BB = FI->begin(),
+ E = FI->end(); BB != E; ++BB) {
+ printType(BB->getType());
+ for (BasicBlock::const_iterator I = BB->begin(), E = BB->end(); I!=E;
+ ++I) {
+ printType(I->getType());
+ for (unsigned i = 0; i < I->getNumOperands(); ++i)
+ printType(I->getOperand(i)->getType());
}
}
}
+}
- // printConstant - Print out a constant pool entry...
- void CppWriter::printConstant(const Constant *CV) {
- // First, if the constant is actually a GlobalValue (variable or function)
- // or its already in the constant list then we've printed it already and we
- // can just return.
- if (isa<GlobalValue>(CV) || ValueNames.find(CV) != ValueNames.end())
- return;
+// printConstant - Print out a constant pool entry...
+void CppWriter::printConstant(const Constant *CV) {
+ // First, if the constant is actually a GlobalValue (variable or function)
+ // or its already in the constant list then we've printed it already and we
+ // can just return.
+ if (isa<GlobalValue>(CV) || ValueNames.find(CV) != ValueNames.end())
+ return;
- std::string constName(getCppName(CV));
- std::string typeName(getCppName(CV->getType()));
+ std::string constName(getCppName(CV));
+ std::string typeName(getCppName(CV->getType()));
- if (isa<GlobalValue>(CV)) {
- // Skip variables and functions, we emit them elsewhere
- return;
- }
+ if (isa<GlobalValue>(CV)) {
+ // Skip variables and functions, we emit them elsewhere
+ return;
+ }
- if (const ConstantInt *CI = dyn_cast<ConstantInt>(CV)) {
- std::string constValue = CI->getValue().toString(10, true);
- Out << "ConstantInt* " << constName
- << " = ConstantInt::get(mod->getContext(), APInt("
- << cast<IntegerType>(CI->getType())->getBitWidth()
- << ", StringRef(\"" << constValue << "\"), 10));";
- } else if (isa<ConstantAggregateZero>(CV)) {
- Out << "ConstantAggregateZero* " << constName
- << " = ConstantAggregateZero::get(" << typeName << ");";
- } else if (isa<ConstantPointerNull>(CV)) {
- Out << "ConstantPointerNull* " << constName
- << " = ConstantPointerNull::get(" << typeName << ");";
- } else if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CV)) {
- Out << "ConstantFP* " << constName << " = ";
- printCFP(CFP);
- Out << ";";
- } else if (const ConstantArray *CA = dyn_cast<ConstantArray>(CV)) {
- if (CA->isString() &&
- CA->getType()->getElementType() ==
- Type::getInt8Ty(CA->getContext())) {
- Out << "Constant* " << constName <<
- " = ConstantArray::get(mod->getContext(), \"";
- std::string tmp = CA->getAsString();
- bool nullTerminate = false;
- if (tmp[tmp.length()-1] == 0) {
- tmp.erase(tmp.length()-1);
- nullTerminate = true;
- }
- printEscapedString(tmp);
- // Determine if we want null termination or not.
- if (nullTerminate)
- Out << "\", true"; // Indicate that the null terminator should be
- // added.
- else
- Out << "\", false";// No null terminator
- Out << ");";
- } else {
- Out << "std::vector<Constant*> " << constName << "_elems;";
- nl(Out);
- unsigned N = CA->getNumOperands();
- for (unsigned i = 0; i < N; ++i) {
- printConstant(CA->getOperand(i)); // recurse to print operands
- Out << constName << "_elems.push_back("
- << getCppName(CA->getOperand(i)) << ");";
- nl(Out);
- }
- Out << "Constant* " << constName << " = ConstantArray::get("
- << typeName << ", " << constName << "_elems);";
- }
- } else if (const ConstantStruct *CS = dyn_cast<ConstantStruct>(CV)) {
- Out << "std::vector<Constant*> " << constName << "_fields;";
- nl(Out);
- unsigned N = CS->getNumOperands();
- for (unsigned i = 0; i < N; i++) {
- printConstant(CS->getOperand(i));
- Out << constName << "_fields.push_back("
- << getCppName(CS->getOperand(i)) << ");";
- nl(Out);
+ if (const ConstantInt *CI = dyn_cast<ConstantInt>(CV)) {
+ std::string constValue = CI->getValue().toString(10, true);
+ Out << "ConstantInt* " << constName
+ << " = ConstantInt::get(mod->getContext(), APInt("
+ << cast<IntegerType>(CI->getType())->getBitWidth()
+ << ", StringRef(\"" << constValue << "\"), 10));";
+ } else if (isa<ConstantAggregateZero>(CV)) {
+ Out << "ConstantAggregateZero* " << constName
+ << " = ConstantAggregateZero::get(" << typeName << ");";
+ } else if (isa<ConstantPointerNull>(CV)) {
+ Out << "ConstantPointerNull* " << constName
+ << " = ConstantPointerNull::get(" << typeName << ");";
+ } else if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CV)) {
+ Out << "ConstantFP* " << constName << " = ";
+ printCFP(CFP);
+ Out << ";";
+ } else if (const ConstantArray *CA = dyn_cast<ConstantArray>(CV)) {
+ if (CA->isString() &&
+ CA->getType()->getElementType() ==
+ Type::getInt8Ty(CA->getContext())) {
+ Out << "Constant* " << constName <<
+ " = ConstantArray::get(mod->getContext(), \"";
+ std::string tmp = CA->getAsString();
+ bool nullTerminate = false;
+ if (tmp[tmp.length()-1] == 0) {
+ tmp.erase(tmp.length()-1);
+ nullTerminate = true;
}
- Out << "Constant* " << constName << " = ConstantStruct::get("
- << typeName << ", " << constName << "_fields);";
- } else if (const ConstantVector *CP = dyn_cast<ConstantVector>(CV)) {
+ printEscapedString(tmp);
+ // Determine if we want null termination or not.
+ if (nullTerminate)
+ Out << "\", true"; // Indicate that the null terminator should be
+ // added.
+ else
+ Out << "\", false";// No null terminator
+ Out << ");";
+ } else {
Out << "std::vector<Constant*> " << constName << "_elems;";
nl(Out);
- unsigned N = CP->getNumOperands();
+ unsigned N = CA->getNumOperands();
for (unsigned i = 0; i < N; ++i) {
- printConstant(CP->getOperand(i));
+ printConstant(CA->getOperand(i)); // recurse to print operands
Out << constName << "_elems.push_back("
- << getCppName(CP->getOperand(i)) << ");";
+ << getCppName(CA->getOperand(i)) << ");";
nl(Out);
}
- Out << "Constant* " << constName << " = ConstantVector::get("
+ Out << "Constant* " << constName << " = ConstantArray::get("
<< typeName << ", " << constName << "_elems);";
- } else if (isa<UndefValue>(CV)) {
- Out << "UndefValue* " << constName << " = UndefValue::get("
- << typeName << ");";
- } else if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(CV)) {
- if (CE->getOpcode() == Instruction::GetElementPtr) {
- Out << "std::vector<Constant*> " << constName << "_indices;";
+ }
+ } else if (const ConstantStruct *CS = dyn_cast<ConstantStruct>(CV)) {
+ Out << "std::vector<Constant*> " << constName << "_fields;";
+ nl(Out);
+ unsigned N = CS->getNumOperands();
+ for (unsigned i = 0; i < N; i++) {
+ printConstant(CS->getOperand(i));
+ Out << constName << "_fields.push_back("
+ << getCppName(CS->getOperand(i)) << ");";
+ nl(Out);
+ }
+ Out << "Constant* " << constName << " = ConstantStruct::get("
+ << typeName << ", " << constName << "_fields);";
+ } else if (const ConstantVector *CP = dyn_cast<ConstantVector>(CV)) {
+ Out << "std::vector<Constant*> " << constName << "_elems;";
+ nl(Out);
+ unsigned N = CP->getNumOperands();
+ for (unsigned i = 0; i < N; ++i) {
+ printConstant(CP->getOperand(i));
+ Out << constName << "_elems.push_back("
+ << getCppName(CP->getOperand(i)) << ");";
+ nl(Out);
+ }
+ Out << "Constant* " << constName << " = ConstantVector::get("
+ << typeName << ", " << constName << "_elems);";
+ } else if (isa<UndefValue>(CV)) {
+ Out << "UndefValue* " << constName << " = UndefValue::get("
+ << typeName << ");";
+ } else if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(CV)) {
+ if (CE->getOpcode() == Instruction::GetElementPtr) {
+ Out << "std::vector<Constant*> " << constName << "_indices;";
+ nl(Out);
+ printConstant(CE->getOperand(0));
+ for (unsigned i = 1; i < CE->getNumOperands(); ++i ) {
+ printConstant(CE->getOperand(i));
+ Out << constName << "_indices.push_back("
+ << getCppName(CE->getOperand(i)) << ");";
nl(Out);
- printConstant(CE->getOperand(0));
- for (unsigned i = 1; i < CE->getNumOperands(); ++i ) {
- printConstant(CE->getOperand(i));
- Out << constName << "_indices.push_back("
- << getCppName(CE->getOperand(i)) << ");";
- nl(Out);
- }
- Out << "Constant* " << constName
- << " = ConstantExpr::getGetElementPtr("
- << getCppName(CE->getOperand(0)) << ", "
- << "&" << constName << "_indices[0], "
- << constName << "_indices.size()"
- << ");";
- } else if (CE->isCast()) {
- printConstant(CE->getOperand(0));
- Out << "Constant* " << constName << " = ConstantExpr::getCast(";
- switch (CE->getOpcode()) {
- default: llvm_unreachable("Invalid cast opcode");
- case Instruction::Trunc: Out << "Instruction::Trunc"; break;
- case Instruction::ZExt: Out << "Instruction::ZExt"; break;
- case Instruction::SExt: Out << "Instruction::SExt"; break;
- case Instruction::FPTrunc: Out << "Instruction::FPTrunc"; break;
- case Instruction::FPExt: Out << "Instruction::FPExt"; break;
- case Instruction::FPToUI: Out << "Instruction::FPToUI"; break;
- case Instruction::FPToSI: Out << "Instruction::FPToSI"; break;
- case Instruction::UIToFP: Out << "Instruction::UIToFP"; break;
- case Instruction::SIToFP: Out << "Instruction::SIToFP"; break;
- case Instruction::PtrToInt: Out << "Instruction::PtrToInt"; break;
- case Instruction::IntToPtr: Out << "Instruction::IntToPtr"; break;
- case Instruction::BitCast: Out << "Instruction::BitCast"; break;
- }
- Out << ", " << getCppName(CE->getOperand(0)) << ", "
- << getCppName(CE->getType()) << ");";
- } else {
- unsigned N = CE->getNumOperands();
- for (unsigned i = 0; i < N; ++i ) {
- printConstant(CE->getOperand(i));
+ }
+ Out << "Constant* " << constName
+ << " = ConstantExpr::getGetElementPtr("
+ << getCppName(CE->getOperand(0)) << ", "
+ << "&" << constName << "_indices[0], "
+ << constName << "_indices.size()"
+ << ");";
+ } else if (CE->isCast()) {
+ printConstant(CE->getOperand(0));
+ Out << "Constant* " << constName << " = ConstantExpr::getCast(";
+ switch (CE->getOpcode()) {
+ default: llvm_unreachable("Invalid cast opcode");
+ case Instruction::Trunc: Out << "Instruction::Trunc"; break;
+ case Instruction::ZExt: Out << "Instruction::ZExt"; break;
+ case Instruction::SExt: Out << "Instruction::SExt"; break;
+ case Instruction::FPTrunc: Out << "Instruction::FPTrunc"; break;
+ case Instruction::FPExt: Out << "Instruction::FPExt"; break;
+ case Instruction::FPToUI: Out << "Instruction::FPToUI"; break;
+ case Instruction::FPToSI: Out << "Instruction::FPToSI"; break;
+ case Instruction::UIToFP: Out << "Instruction::UIToFP"; break;
+ case Instruction::SIToFP: Out << "Instruction::SIToFP"; break;
+ case Instruction::PtrToInt: Out << "Instruction::PtrToInt"; break;
+ case Instruction::IntToPtr: Out << "Instruction::IntToPtr"; break;
+ case Instruction::BitCast: Out << "Instruction::BitCast"; break;
+ }
+ Out << ", " << getCppName(CE->getOperand(0)) << ", "
+ << getCppName(CE->getType()) << ");";
+ } else {
+ unsigned N = CE->getNumOperands();
+ for (unsigned i = 0; i < N; ++i ) {
+ printConstant(CE->getOperand(i));
+ }
+ Out << "Constant* " << constName << " = ConstantExpr::";
+ switch (CE->getOpcode()) {
+ case Instruction::Add: Out << "getAdd("; break;
+ case Instruction::FAdd: Out << "getFAdd("; break;
+ case Instruction::Sub: Out << "getSub("; break;
+ case Instruction::FSub: Out << "getFSub("; break;
+ case Instruction::Mul: Out << "getMul("; break;
+ case Instruction::FMul: Out << "getFMul("; break;
+ case Instruction::UDiv: Out << "getUDiv("; break;
+ case Instruction::SDiv: Out << "getSDiv("; break;
+ case Instruction::FDiv: Out << "getFDiv("; break;
+ case Instruction::URem: Out << "getURem("; break;
+ case Instruction::SRem: Out << "getSRem("; break;
+ case Instruction::FRem: Out << "getFRem("; break;
+ case Instruction::And: Out << "getAnd("; break;
+ case Instruction::Or: Out << "getOr("; break;
+ case Instruction::Xor: Out << "getXor("; break;
+ case Instruction::ICmp:
+ Out << "getICmp(ICmpInst::ICMP_";
+ switch (CE->getPredicate()) {
+ case ICmpInst::ICMP_EQ: Out << "EQ"; break;
+ case ICmpInst::ICMP_NE: Out << "NE"; break;
+ case ICmpInst::ICMP_SLT: Out << "SLT"; break;
+ case ICmpInst::ICMP_ULT: Out << "ULT"; break;
+ case ICmpInst::ICMP_SGT: Out << "SGT"; break;
+ case ICmpInst::ICMP_UGT: Out << "UGT"; break;
+ case ICmpInst::ICMP_SLE: Out << "SLE"; break;
+ case ICmpInst::ICMP_ULE: Out << "ULE"; break;
+ case ICmpInst::ICMP_SGE: Out << "SGE"; break;
+ case ICmpInst::ICMP_UGE: Out << "UGE"; break;
+ default: error("Invalid ICmp Predicate");
}
- Out << "Constant* " << constName << " = ConstantExpr::";
- switch (CE->getOpcode()) {
- case Instruction::Add: Out << "getAdd("; break;
- case Instruction::FAdd: Out << "getFAdd("; break;
- case Instruction::Sub: Out << "getSub("; break;
- case Instruction::FSub: Out << "getFSub("; break;
- case Instruction::Mul: Out << "getMul("; break;
- case Instruction::FMul: Out << "getFMul("; break;
- case Instruction::UDiv: Out << "getUDiv("; break;
- case Instruction::SDiv: Out << "getSDiv("; break;
- case Instruction::FDiv: Out << "getFDiv("; break;
- case Instruction::URem: Out << "getURem("; break;
- case Instruction::SRem: Out << "getSRem("; break;
- case Instruction::FRem: Out << "getFRem("; break;
- case Instruction::And: Out << "getAnd("; break;
- case Instruction::Or: Out << "getOr("; break;
- case Instruction::Xor: Out << "getXor("; break;
- case Instruction::ICmp:
- Out << "getICmp(ICmpInst::ICMP_";
- switch (CE->getPredicate()) {
- case ICmpInst::ICMP_EQ: Out << "EQ"; break;
- case ICmpInst::ICMP_NE: Out << "NE"; break;
- case ICmpInst::ICMP_SLT: Out << "SLT"; break;
- case ICmpInst::ICMP_ULT: Out << "ULT"; break;
- case ICmpInst::ICMP_SGT: Out << "SGT"; break;
- case ICmpInst::ICMP_UGT: Out << "UGT"; break;
- case ICmpInst::ICMP_SLE: Out << "SLE"; break;
- case ICmpInst::ICMP_ULE: Out << "ULE"; break;
- case ICmpInst::ICMP_SGE: Out << "SGE"; break;
- case ICmpInst::ICMP_UGE: Out << "UGE"; break;
- default: error("Invalid ICmp Predicate");
- }
- break;
- case Instruction::FCmp:
- Out << "getFCmp(FCmpInst::FCMP_";
- switch (CE->getPredicate()) {
- case FCmpInst::FCMP_FALSE: Out << "FALSE"; break;
- case FCmpInst::FCMP_ORD: Out << "ORD"; break;
- case FCmpInst::FCMP_UNO: Out << "UNO"; break;
- case FCmpInst::FCMP_OEQ: Out << "OEQ"; break;
- case FCmpInst::FCMP_UEQ: Out << "UEQ"; break;
- case FCmpInst::FCMP_ONE: Out << "ONE"; break;
- case FCmpInst::FCMP_UNE: Out << "UNE"; break;
- case FCmpInst::FCMP_OLT: Out << "OLT"; break;
- case FCmpInst::FCMP_ULT: Out << "ULT"; break;
- case FCmpInst::FCMP_OGT: Out << "OGT"; break;
- case FCmpInst::FCMP_UGT: Out << "UGT"; break;
- case FCmpInst::FCMP_OLE: Out << "OLE"; break;
- case FCmpInst::FCMP_ULE: Out << "ULE"; break;
- case FCmpInst::FCMP_OGE: Out << "OGE"; break;
- case FCmpInst::FCMP_UGE: Out << "UGE"; break;
- case FCmpInst::FCMP_TRUE: Out << "TRUE"; break;
- default: error("Invalid FCmp Predicate");
- }
- break;
- case Instruction::Shl: Out << "getShl("; break;
- case Instruction::LShr: Out << "getLShr("; break;
- case Instruction::AShr: Out << "getAShr("; break;
- case Instruction::Select: Out << "getSelect("; break;
- case Instruction::ExtractElement: Out << "getExtractElement("; break;
- case Instruction::InsertElement: Out << "getInsertElement("; break;
- case Instruction::ShuffleVector: Out << "getShuffleVector("; break;
- default:
- error("Invalid constant expression");
- break;
+ break;
+ case Instruction::FCmp:
+ Out << "getFCmp(FCmpInst::FCMP_";
+ switch (CE->getPredicate()) {
+ case FCmpInst::FCMP_FALSE: Out << "FALSE"; break;
+ case FCmpInst::FCMP_ORD: Out << "ORD"; break;
+ case FCmpInst::FCMP_UNO: Out << "UNO"; break;
+ case FCmpInst::FCMP_OEQ: Out << "OEQ"; break;
+ case FCmpInst::FCMP_UEQ: Out << "UEQ"; break;
+ case FCmpInst::FCMP_ONE: Out << "ONE"; break;
+ case FCmpInst::FCMP_UNE: Out << "UNE"; break;
+ case FCmpInst::FCMP_OLT: Out << "OLT"; break;
+ case FCmpInst::FCMP_ULT: Out << "ULT"; break;
+ case FCmpInst::FCMP_OGT: Out << "OGT"; break;
+ case FCmpInst::FCMP_UGT: Out << "UGT"; break;
+ case FCmpInst::FCMP_OLE: Out << "OLE"; break;
+ case FCmpInst::FCMP_ULE: Out << "ULE"; break;
+ case FCmpInst::FCMP_OGE: Out << "OGE"; break;
+ case FCmpInst::FCMP_UGE: Out << "UGE"; break;
+ case FCmpInst::FCMP_TRUE: Out << "TRUE"; break;
+ default: error("Invalid FCmp Predicate");
}
- Out << getCppName(CE->getOperand(0));
- for (unsigned i = 1; i < CE->getNumOperands(); ++i)
- Out << ", " << getCppName(CE->getOperand(i));
- Out << ");";
+ break;
+ case Instruction::Shl: Out << "getShl("; break;
+ case Instruction::LShr: Out << "getLShr("; break;
+ case Instruction::AShr: Out << "getAShr("; break;
+ case Instruction::Select: Out << "getSelect("; break;
+ case Instruction::ExtractElement: Out << "getExtractElement("; break;
+ case Instruction::InsertElement: Out << "getInsertElement("; break;
+ case Instruction::ShuffleVector: Out << "getShuffleVector("; break;
+ default:
+ error("Invalid constant expression");
+ break;
}
- } else {
- error("Bad Constant");
- Out << "Constant* " << constName << " = 0; ";
+ Out << getCppName(CE->getOperand(0));
+ for (unsigned i = 1; i < CE->getNumOperands(); ++i)
+ Out << ", " << getCppName(CE->getOperand(i));
+ Out << ");";
}
- nl(Out);
+ } else if (const BlockAddress *BA = dyn_cast<BlockAddress>(CV)) {
+ Out << "Constant* " << constName << " = ";
+ Out << "BlockAddress::get(" << getOpName(BA->getBasicBlock()) << ");";
+ } else {
+ error("Bad Constant");
+ Out << "Constant* " << constName << " = 0; ";
}
+ nl(Out);
+}
- void CppWriter::printConstants(const Module* M) {
- // Traverse all the global variables looking for constant initializers
- for (Module::const_global_iterator I = TheModule->global_begin(),
- E = TheModule->global_end(); I != E; ++I)
- if (I->hasInitializer())
- printConstant(I->getInitializer());
-
- // Traverse the LLVM functions looking for constants
- for (Module::const_iterator FI = TheModule->begin(), FE = TheModule->end();
- FI != FE; ++FI) {
- // Add all of the basic blocks and instructions
- for (Function::const_iterator BB = FI->begin(),
- E = FI->end(); BB != E; ++BB) {
- for (BasicBlock::const_iterator I = BB->begin(), E = BB->end(); I!=E;
- ++I) {
- for (unsigned i = 0; i < I->getNumOperands(); ++i) {
- if (Constant* C = dyn_cast<Constant>(I->getOperand(i))) {
- printConstant(C);
- }
+void CppWriter::printConstants(const Module* M) {
+ // Traverse all the global variables looking for constant initializers
+ for (Module::const_global_iterator I = TheModule->global_begin(),
+ E = TheModule->global_end(); I != E; ++I)
+ if (I->hasInitializer())
+ printConstant(I->getInitializer());
+
+ // Traverse the LLVM functions looking for constants
+ for (Module::const_iterator FI = TheModule->begin(), FE = TheModule->end();
+ FI != FE; ++FI) {
+ // Add all of the basic blocks and instructions
+ for (Function::const_iterator BB = FI->begin(),
+ E = FI->end(); BB != E; ++BB) {
+ for (BasicBlock::const_iterator I = BB->begin(), E = BB->end(); I!=E;
+ ++I) {
+ for (unsigned i = 0; i < I->getNumOperands(); ++i) {
+ if (Constant* C = dyn_cast<Constant>(I->getOperand(i))) {
+ printConstant(C);
}
}
}
}
}
+}
- void CppWriter::printVariableUses(const GlobalVariable *GV) {
- nl(Out) << "// Type Definitions";
- nl(Out);
- printType(GV->getType());
- if (GV->hasInitializer()) {
- Constant *Init = GV->getInitializer();
- printType(Init->getType());
- if (Function *F = dyn_cast<Function>(Init)) {
- nl(Out)<< "/ Function Declarations"; nl(Out);
- printFunctionHead(F);
- } else if (GlobalVariable* gv = dyn_cast<GlobalVariable>(Init)) {
- nl(Out) << "// Global Variable Declarations"; nl(Out);
- printVariableHead(gv);
-
- nl(Out) << "// Global Variable Definitions"; nl(Out);
- printVariableBody(gv);
- } else {
- nl(Out) << "// Constant Definitions"; nl(Out);
- printConstant(Init);
- }
+void CppWriter::printVariableUses(const GlobalVariable *GV) {
+ nl(Out) << "// Type Definitions";
+ nl(Out);
+ printType(GV->getType());
+ if (GV->hasInitializer()) {
+ Constant *Init = GV->getInitializer();
+ printType(Init->getType());
+ if (Function *F = dyn_cast<Function>(Init)) {
+ nl(Out)<< "/ Function Declarations"; nl(Out);
+ printFunctionHead(F);
+ } else if (GlobalVariable* gv = dyn_cast<GlobalVariable>(Init)) {
+ nl(Out) << "// Global Variable Declarations"; nl(Out);
+ printVariableHead(gv);
+
+ nl(Out) << "// Global Variable Definitions"; nl(Out);
+ printVariableBody(gv);
+ } else {
+ nl(Out) << "// Constant Definitions"; nl(Out);
+ printConstant(Init);
}
}
+}
- void CppWriter::printVariableHead(const GlobalVariable *GV) {
- nl(Out) << "GlobalVariable* " << getCppName(GV);
- if (is_inline) {
- Out << " = mod->getGlobalVariable(mod->getContext(), ";
- printEscapedString(GV->getName());
- Out << ", " << getCppName(GV->getType()->getElementType()) << ",true)";
- nl(Out) << "if (!" << getCppName(GV) << ") {";
- in(); nl(Out) << getCppName(GV);
- }
- Out << " = new GlobalVariable(/*Module=*/*mod, ";
- nl(Out) << "/*Type=*/";
- printCppName(GV->getType()->getElementType());
- Out << ",";
- nl(Out) << "/*isConstant=*/" << (GV->isConstant()?"true":"false");
- Out << ",";
- nl(Out) << "/*Linkage=*/";
- printLinkageType(GV->getLinkage());
- Out << ",";
- nl(Out) << "/*Initializer=*/0, ";
- if (GV->hasInitializer()) {
- Out << "// has initializer, specified below";
- }
- nl(Out) << "/*Name=*/\"";
+void CppWriter::printVariableHead(const GlobalVariable *GV) {
+ nl(Out) << "GlobalVariable* " << getCppName(GV);
+ if (is_inline) {
+ Out << " = mod->getGlobalVariable(mod->getContext(), ";
printEscapedString(GV->getName());
+ Out << ", " << getCppName(GV->getType()->getElementType()) << ",true)";
+ nl(Out) << "if (!" << getCppName(GV) << ") {";
+ in(); nl(Out) << getCppName(GV);
+ }
+ Out << " = new GlobalVariable(/*Module=*/*mod, ";
+ nl(Out) << "/*Type=*/";
+ printCppName(GV->getType()->getElementType());
+ Out << ",";
+ nl(Out) << "/*isConstant=*/" << (GV->isConstant()?"true":"false");
+ Out << ",";
+ nl(Out) << "/*Linkage=*/";
+ printLinkageType(GV->getLinkage());
+ Out << ",";
+ nl(Out) << "/*Initializer=*/0, ";
+ if (GV->hasInitializer()) {
+ Out << "// has initializer, specified below";
+ }
+ nl(Out) << "/*Name=*/\"";
+ printEscapedString(GV->getName());
+ Out << "\");";
+ nl(Out);
+
+ if (GV->hasSection()) {
+ printCppName(GV);
+ Out << "->setSection(\"";
+ printEscapedString(GV->getSection());
Out << "\");";
nl(Out);
-
- if (GV->hasSection()) {
- printCppName(GV);
- Out << "->setSection(\"";
- printEscapedString(GV->getSection());
- Out << "\");";
- nl(Out);
- }
- if (GV->getAlignment()) {
- printCppName(GV);
- Out << "->setAlignment(" << utostr(GV->getAlignment()) << ");";
- nl(Out);
- }
- if (GV->getVisibility() != GlobalValue::DefaultVisibility) {
- printCppName(GV);
- Out << "->setVisibility(";
- printVisibilityType(GV->getVisibility());
- Out << ");";
- nl(Out);
- }
- if (GV->isThreadLocal()) {
- printCppName(GV);
- Out << "->setThreadLocal(true);";
- nl(Out);
- }
- if (is_inline) {
- out(); Out << "}"; nl(Out);
- }
}
-
- void CppWriter::printVariableBody(const GlobalVariable *GV) {
- if (GV->hasInitializer()) {
- printCppName(GV);
- Out << "->setInitializer(";
- Out << getCppName(GV->getInitializer()) << ");";
- nl(Out);
- }
+ if (GV->getAlignment()) {
+ printCppName(GV);
+ Out << "->setAlignment(" << utostr(GV->getAlignment()) << ");";
+ nl(Out);
}
+ if (GV->getVisibility() != GlobalValue::DefaultVisibility) {
+ printCppName(GV);
+ Out << "->setVisibility(";
+ printVisibilityType(GV->getVisibility());
+ Out << ");";
+ nl(Out);
+ }
+ if (GV->isThreadLocal()) {
+ printCppName(GV);
+ Out << "->setThreadLocal(true);";
+ nl(Out);
+ }
+ if (is_inline) {
+ out(); Out << "}"; nl(Out);
+ }
+}
- std::string CppWriter::getOpName(Value* V) {
- if (!isa<Instruction>(V) || DefinedValues.find(V) != DefinedValues.end())
- return getCppName(V);
-
- // See if its alread in the map of forward references, if so just return the
- // name we already set up for it
- ForwardRefMap::const_iterator I = ForwardRefs.find(V);
- if (I != ForwardRefs.end())
- return I->second;
-
- // This is a new forward reference. Generate a unique name for it
- std::string result(std::string("fwdref_") + utostr(uniqueNum++));
-
- // Yes, this is a hack. An Argument is the smallest instantiable value that
- // we can make as a placeholder for the real value. We'll replace these
- // Argument instances later.
- Out << "Argument* " << result << " = new Argument("
- << getCppName(V->getType()) << ");";
+void CppWriter::printVariableBody(const GlobalVariable *GV) {
+ if (GV->hasInitializer()) {
+ printCppName(GV);
+ Out << "->setInitializer(";
+ Out << getCppName(GV->getInitializer()) << ");";
nl(Out);
- ForwardRefs[V] = result;
- return result;
}
+}
- // printInstruction - This member is called for each Instruction in a function.
- void CppWriter::printInstruction(const Instruction *I,
- const std::string& bbname) {
- std::string iName(getCppName(I));
+std::string CppWriter::getOpName(Value* V) {
+ if (!isa<Instruction>(V) || DefinedValues.find(V) != DefinedValues.end())
+ return getCppName(V);
- // Before we emit this instruction, we need to take care of generating any
- // forward references. So, we get the names of all the operands in advance
- const unsigned Ops(I->getNumOperands());
- std::string* opNames = new std::string[Ops];
- for (unsigned i = 0; i < Ops; i++) {
- opNames[i] = getOpName(I->getOperand(i));
- }
+ // See if its alread in the map of forward references, if so just return the
+ // name we already set up for it
+ ForwardRefMap::const_iterator I = ForwardRefs.find(V);
+ if (I != ForwardRefs.end())
+ return I->second;
- switch (I->getOpcode()) {
- default:
- error("Invalid instruction");
- break;
+ // This is a new forward reference. Generate a unique name for it
+ std::string result(std::string("fwdref_") + utostr(uniqueNum++));
- case Instruction::Ret: {
- const ReturnInst* ret = cast<ReturnInst>(I);
- Out << "ReturnInst::Create(mod->getContext(), "
- << (ret->getReturnValue() ? opNames[0] + ", " : "") << bbname << ");";
- break;
+ // Yes, this is a hack. An Argument is the smallest instantiable value that
+ // we can make as a placeholder for the real value. We'll replace these
+ // Argument instances later.
+ Out << "Argument* " << result << " = new Argument("
+ << getCppName(V->getType()) << ");";
+ nl(Out);
+ ForwardRefs[V] = result;
+ return result;
+}
+
+// printInstruction - This member is called for each Instruction in a function.
+void CppWriter::printInstruction(const Instruction *I,
+ const std::string& bbname) {
+ std::string iName(getCppName(I));
+
+ // Before we emit this instruction, we need to take care of generating any
+ // forward references. So, we get the names of all the operands in advance
+ const unsigned Ops(I->getNumOperands());
+ std::string* opNames = new std::string[Ops];
+ for (unsigned i = 0; i < Ops; i++)
+ opNames[i] = getOpName(I->getOperand(i));
+
+ switch (I->getOpcode()) {
+ default:
+ error("Invalid instruction");
+ break;
+
+ case Instruction::Ret: {
+ const ReturnInst* ret = cast<ReturnInst>(I);
+ Out << "ReturnInst::Create(mod->getContext(), "
+ << (ret->getReturnValue() ? opNames[0] + ", " : "") << bbname << ");";
+ break;
+ }
+ case Instruction::Br: {
+ const BranchInst* br = cast<BranchInst>(I);
+ Out << "BranchInst::Create(" ;
+ if (br->getNumOperands() == 3) {
+ Out << opNames[2] << ", "
+ << opNames[1] << ", "
+ << opNames[0] << ", ";
+
+ } else if (br->getNumOperands() == 1) {
+ Out << opNames[0] << ", ";
+ } else {
+ error("Branch with 2 operands?");
}
- case Instruction::Br: {
- const BranchInst* br = cast<BranchInst>(I);
- Out << "BranchInst::Create(" ;
- if (br->getNumOperands() == 3 ) {
- Out << opNames[2] << ", "
- << opNames[1] << ", "
- << opNames[0] << ", ";
-
- } else if (br->getNumOperands() == 1) {
- Out << opNames[0] << ", ";
- } else {
- error("Branch with 2 operands?");
- }
- Out << bbname << ");";
- break;
+ Out << bbname << ");";
+ break;
+ }
+ case Instruction::Switch: {
+ const SwitchInst *SI = cast<SwitchInst>(I);
+ Out << "SwitchInst* " << iName << " = SwitchInst::Create("
+ << opNames[0] << ", "
+ << opNames[1] << ", "
+ << SI->getNumCases() << ", " << bbname << ");";
+ nl(Out);
+ for (unsigned i = 2; i != SI->getNumOperands(); i += 2) {
+ Out << iName << "->addCase("
+ << opNames[i] << ", "
+ << opNames[i+1] << ");";
+ nl(Out);
}
- case Instruction::Switch: {
- const SwitchInst *SI = cast<SwitchInst>(I);
- Out << "SwitchInst* " << iName << " = SwitchInst::Create("
- << opNames[0] << ", "
- << opNames[1] << ", "
- << SI->getNumCases() << ", " << bbname << ");";
+ break;
+ }
+ case Instruction::IndirectBr: {
+ const IndirectBrInst *IBI = cast<IndirectBrInst>(I);
+ Out << "IndirectBrInst *" << iName << " = IndirectBrInst::Create("
+ << opNames[0] << ", " << IBI->getNumDestinations() << ");";
+ nl(Out);
+ for (unsigned i = 1; i != IBI->getNumOperands(); ++i) {
+ Out << iName << "->addDestination(" << opNames[i] << ");";
nl(Out);
- for (unsigned i = 2; i != SI->getNumOperands(); i += 2) {
- Out << iName << "->addCase("
- << opNames[i] << ", "
- << opNames[i+1] << ");";
- nl(Out);
- }
- break;
}
- case Instruction::IndirectBr: {
- const IndirectBrInst *IBI = cast<IndirectBrInst>(I);
- Out << "IndirectBrInst *" << iName << " = IndirectBrInst::Create("
- << opNames[0] << ", " << IBI->getNumDestinations() << ");";
+ break;
+ }
+ case Instruction::Invoke: {
+ const InvokeInst* inv = cast<InvokeInst>(I);
+ Out << "std::vector<Value*> " << iName << "_params;";
+ nl(Out);
+ for (unsigned i = 0; i < inv->getNumArgOperands(); ++i) {
+ Out << iName << "_params.push_back("
+ << getOpName(inv->getArgOperand(i)) << ");";
nl(Out);
- for (unsigned i = 1; i != IBI->getNumOperands(); ++i) {
- Out << iName << "->addDestination(" << opNames[i] << ");";
- nl(Out);
- }
- break;
}
- case Instruction::Invoke: {
- const InvokeInst* inv = cast<InvokeInst>(I);
- Out << "std::vector<Value*> " << iName << "_params;";
+ // FIXME: This shouldn't use magic numbers -3, -2, and -1.
+ Out << "InvokeInst *" << iName << " = InvokeInst::Create("
+ << getOpName(inv->getCalledFunction()) << ", "
+ << getOpName(inv->getNormalDest()) << ", "
+ << getOpName(inv->getUnwindDest()) << ", "
+ << iName << "_params.begin(), "
+ << iName << "_params.end(), \"";
+ printEscapedString(inv->getName());
+ Out << "\", " << bbname << ");";
+ nl(Out) << iName << "->setCallingConv(";
+ printCallingConv(inv->getCallingConv());
+ Out << ");";
+ printAttributes(inv->getAttributes(), iName);
+ Out << iName << "->setAttributes(" << iName << "_PAL);";
+ nl(Out);
+ break;
+ }
+ case Instruction::Unwind: {
+ Out << "new UnwindInst("
+ << bbname << ");";
+ break;
+ }
+ case Instruction::Unreachable: {
+ Out << "new UnreachableInst("
+ << "mod->getContext(), "
+ << bbname << ");";
+ break;
+ }
+ case Instruction::Add:
+ case Instruction::FAdd:
+ case Instruction::Sub:
+ case Instruction::FSub:
+ case Instruction::Mul:
+ case Instruction::FMul:
+ case Instruction::UDiv:
+ case Instruction::SDiv:
+ case Instruction::FDiv:
+ case Instruction::URem:
+ case Instruction::SRem:
+ case Instruction::FRem:
+ case Instruction::And:
+ case Instruction::Or:
+ case Instruction::Xor:
+ case Instruction::Shl:
+ case Instruction::LShr:
+ case Instruction::AShr:{
+ Out << "BinaryOperator* " << iName << " = BinaryOperator::Create(";
+ switch (I->getOpcode()) {
+ case Instruction::Add: Out << "Instruction::Add"; break;
+ case Instruction::FAdd: Out << "Instruction::FAdd"; break;
+ case Instruction::Sub: Out << "Instruction::Sub"; break;
+ case Instruction::FSub: Out << "Instruction::FSub"; break;
+ case Instruction::Mul: Out << "Instruction::Mul"; break;
+ case Instruction::FMul: Out << "Instruction::FMul"; break;
+ case Instruction::UDiv:Out << "Instruction::UDiv"; break;
+ case Instruction::SDiv:Out << "Instruction::SDiv"; break;
+ case Instruction::FDiv:Out << "Instruction::FDiv"; break;
+ case Instruction::URem:Out << "Instruction::URem"; break;
+ case Instruction::SRem:Out << "Instruction::SRem"; break;
+ case Instruction::FRem:Out << "Instruction::FRem"; break;
+ case Instruction::And: Out << "Instruction::And"; break;
+ case Instruction::Or: Out << "Instruction::Or"; break;
+ case Instruction::Xor: Out << "Instruction::Xor"; break;
+ case Instruction::Shl: Out << "Instruction::Shl"; break;
+ case Instruction::LShr:Out << "Instruction::LShr"; break;
+ case Instruction::AShr:Out << "Instruction::AShr"; break;
+ default: Out << "Instruction::BadOpCode"; break;
+ }
+ Out << ", " << opNames[0] << ", " << opNames[1] << ", \"";
+ printEscapedString(I->getName());
+ Out << "\", " << bbname << ");";
+ break;
+ }
+ case Instruction::FCmp: {
+ Out << "FCmpInst* " << iName << " = new FCmpInst(*" << bbname << ", ";
+ switch (cast<FCmpInst>(I)->getPredicate()) {
+ case FCmpInst::FCMP_FALSE: Out << "FCmpInst::FCMP_FALSE"; break;
+ case FCmpInst::FCMP_OEQ : Out << "FCmpInst::FCMP_OEQ"; break;
+ case FCmpInst::FCMP_OGT : Out << "FCmpInst::FCMP_OGT"; break;
+ case FCmpInst::FCMP_OGE : Out << "FCmpInst::FCMP_OGE"; break;
+ case FCmpInst::FCMP_OLT : Out << "FCmpInst::FCMP_OLT"; break;
+ case FCmpInst::FCMP_OLE : Out << "FCmpInst::FCMP_OLE"; break;
+ case FCmpInst::FCMP_ONE : Out << "FCmpInst::FCMP_ONE"; break;
+ case FCmpInst::FCMP_ORD : Out << "FCmpInst::FCMP_ORD"; break;
+ case FCmpInst::FCMP_UNO : Out << "FCmpInst::FCMP_UNO"; break;
+ case FCmpInst::FCMP_UEQ : Out << "FCmpInst::FCMP_UEQ"; break;
+ case FCmpInst::FCMP_UGT : Out << "FCmpInst::FCMP_UGT"; break;
+ case FCmpInst::FCMP_UGE : Out << "FCmpInst::FCMP_UGE"; break;
+ case FCmpInst::FCMP_ULT : Out << "FCmpInst::FCMP_ULT"; break;
+ case FCmpInst::FCMP_ULE : Out << "FCmpInst::FCMP_ULE"; break;
+ case FCmpInst::FCMP_UNE : Out << "FCmpInst::FCMP_UNE"; break;
+ case FCmpInst::FCMP_TRUE : Out << "FCmpInst::FCMP_TRUE"; break;
+ default: Out << "FCmpInst::BAD_ICMP_PREDICATE"; break;
+ }
+ Out << ", " << opNames[0] << ", " << opNames[1] << ", \"";
+ printEscapedString(I->getName());
+ Out << "\");";
+ break;
+ }
+ case Instruction::ICmp: {
+ Out << "ICmpInst* " << iName << " = new ICmpInst(*" << bbname << ", ";
+ switch (cast<ICmpInst>(I)->getPredicate()) {
+ case ICmpInst::ICMP_EQ: Out << "ICmpInst::ICMP_EQ"; break;
+ case ICmpInst::ICMP_NE: Out << "ICmpInst::ICMP_NE"; break;
+ case ICmpInst::ICMP_ULE: Out << "ICmpInst::ICMP_ULE"; break;
+ case ICmpInst::ICMP_SLE: Out << "ICmpInst::ICMP_SLE"; break;
+ case ICmpInst::ICMP_UGE: Out << "ICmpInst::ICMP_UGE"; break;
+ case ICmpInst::ICMP_SGE: Out << "ICmpInst::ICMP_SGE"; break;
+ case ICmpInst::ICMP_ULT: Out << "ICmpInst::ICMP_ULT"; break;
+ case ICmpInst::ICMP_SLT: Out << "ICmpInst::ICMP_SLT"; break;
+ case ICmpInst::ICMP_UGT: Out << "ICmpInst::ICMP_UGT"; break;
+ case ICmpInst::ICMP_SGT: Out << "ICmpInst::ICMP_SGT"; break;
+ default: Out << "ICmpInst::BAD_ICMP_PREDICATE"; break;
+ }
+ Out << ", " << opNames[0] << ", " << opNames[1] << ", \"";
+ printEscapedString(I->getName());
+ Out << "\");";
+ break;
+ }
+ case Instruction::Alloca: {
+ const AllocaInst* allocaI = cast<AllocaInst>(I);
+ Out << "AllocaInst* " << iName << " = new AllocaInst("
+ << getCppName(allocaI->getAllocatedType()) << ", ";
+ if (allocaI->isArrayAllocation())
+ Out << opNames[0] << ", ";
+ Out << "\"";
+ printEscapedString(allocaI->getName());
+ Out << "\", " << bbname << ");";
+ if (allocaI->getAlignment())
+ nl(Out) << iName << "->setAlignment("
+ << allocaI->getAlignment() << ");";
+ break;
+ }
+ case Instruction::Load: {
+ const LoadInst* load = cast<LoadInst>(I);
+ Out << "LoadInst* " << iName << " = new LoadInst("
+ << opNames[0] << ", \"";
+ printEscapedString(load->getName());
+ Out << "\", " << (load->isVolatile() ? "true" : "false" )
+ << ", " << bbname << ");";
+ break;
+ }
+ case Instruction::Store: {
+ const StoreInst* store = cast<StoreInst>(I);
+ Out << " new StoreInst("
+ << opNames[0] << ", "
+ << opNames[1] << ", "
+ << (store->isVolatile() ? "true" : "false")
+ << ", " << bbname << ");";
+ break;
+ }
+ case Instruction::GetElementPtr: {
+ const GetElementPtrInst* gep = cast<GetElementPtrInst>(I);
+ if (gep->getNumOperands() <= 2) {
+ Out << "GetElementPtrInst* " << iName << " = GetElementPtrInst::Create("
+ << opNames[0];
+ if (gep->getNumOperands() == 2)
+ Out << ", " << opNames[1];
+ } else {
+ Out << "std::vector<Value*> " << iName << "_indices;";
nl(Out);
- for (unsigned i = 0; i < inv->getNumOperands() - 3; ++i) {
- Out << iName << "_params.push_back("
+ for (unsigned i = 1; i < gep->getNumOperands(); ++i ) {
+ Out << iName << "_indices.push_back("
<< opNames[i] << ");";
nl(Out);
}
- Out << "InvokeInst *" << iName << " = InvokeInst::Create("
- << opNames[Ops - 3] << ", "
- << opNames[Ops - 2] << ", "
- << opNames[Ops - 1] << ", "
- << iName << "_params.begin(), " << iName << "_params.end(), \"";
- printEscapedString(inv->getName());
- Out << "\", " << bbname << ");";
- nl(Out) << iName << "->setCallingConv(";
- printCallingConv(inv->getCallingConv());
- Out << ");";
- printAttributes(inv->getAttributes(), iName);
- Out << iName << "->setAttributes(" << iName << "_PAL);";
+ Out << "Instruction* " << iName << " = GetElementPtrInst::Create("
+ << opNames[0] << ", " << iName << "_indices.begin(), "
+ << iName << "_indices.end()";
+ }
+ Out << ", \"";
+ printEscapedString(gep->getName());
+ Out << "\", " << bbname << ");";
+ break;
+ }
+ case Instruction::PHI: {
+ const PHINode* phi = cast<PHINode>(I);
+
+ Out << "PHINode* " << iName << " = PHINode::Create("
+ << getCppName(phi->getType()) << ", \"";
+ printEscapedString(phi->getName());
+ Out << "\", " << bbname << ");";
+ nl(Out) << iName << "->reserveOperandSpace("
+ << phi->getNumIncomingValues()
+ << ");";
+ nl(Out);
+ for (unsigned i = 0; i < phi->getNumOperands(); i+=2) {
+ Out << iName << "->addIncoming("
+ << opNames[i] << ", " << opNames[i+1] << ");";
nl(Out);
- break;
- }
- case Instruction::Unwind: {
- Out << "new UnwindInst("
- << bbname << ");";
- break;
- }
- case Instruction::Unreachable: {
- Out << "new UnreachableInst("
- << "mod->getContext(), "
- << bbname << ");";
- break;
- }
- case Instruction::Add:
- case Instruction::FAdd:
- case Instruction::Sub:
- case Instruction::FSub:
- case Instruction::Mul:
- case Instruction::FMul:
- case Instruction::UDiv:
- case Instruction::SDiv:
- case Instruction::FDiv:
- case Instruction::URem:
- case Instruction::SRem:
- case Instruction::FRem:
- case Instruction::And:
- case Instruction::Or:
- case Instruction::Xor:
- case Instruction::Shl:
- case Instruction::LShr:
- case Instruction::AShr:{
- Out << "BinaryOperator* " << iName << " = BinaryOperator::Create(";
- switch (I->getOpcode()) {
- case Instruction::Add: Out << "Instruction::Add"; break;
- case Instruction::FAdd: Out << "Instruction::FAdd"; break;
- case Instruction::Sub: Out << "Instruction::Sub"; break;
- case Instruction::FSub: Out << "Instruction::FSub"; break;
- case Instruction::Mul: Out << "Instruction::Mul"; break;
- case Instruction::FMul: Out << "Instruction::FMul"; break;
- case Instruction::UDiv:Out << "Instruction::UDiv"; break;
- case Instruction::SDiv:Out << "Instruction::SDiv"; break;
- case Instruction::FDiv:Out << "Instruction::FDiv"; break;
- case Instruction::URem:Out << "Instruction::URem"; break;
- case Instruction::SRem:Out << "Instruction::SRem"; break;
- case Instruction::FRem:Out << "Instruction::FRem"; break;
- case Instruction::And: Out << "Instruction::And"; break;
- case Instruction::Or: Out << "Instruction::Or"; break;
- case Instruction::Xor: Out << "Instruction::Xor"; break;
- case Instruction::Shl: Out << "Instruction::Shl"; break;
- case Instruction::LShr:Out << "Instruction::LShr"; break;
- case Instruction::AShr:Out << "Instruction::AShr"; break;
- default: Out << "Instruction::BadOpCode"; break;
- }
- Out << ", " << opNames[0] << ", " << opNames[1] << ", \"";
- printEscapedString(I->getName());
- Out << "\", " << bbname << ");";
- break;
}
- case Instruction::FCmp: {
- Out << "FCmpInst* " << iName << " = new FCmpInst(*" << bbname << ", ";
- switch (cast<FCmpInst>(I)->getPredicate()) {
- case FCmpInst::FCMP_FALSE: Out << "FCmpInst::FCMP_FALSE"; break;
- case FCmpInst::FCMP_OEQ : Out << "FCmpInst::FCMP_OEQ"; break;
- case FCmpInst::FCMP_OGT : Out << "FCmpInst::FCMP_OGT"; break;
- case FCmpInst::FCMP_OGE : Out << "FCmpInst::FCMP_OGE"; break;
- case FCmpInst::FCMP_OLT : Out << "FCmpInst::FCMP_OLT"; break;
- case FCmpInst::FCMP_OLE : Out << "FCmpInst::FCMP_OLE"; break;
- case FCmpInst::FCMP_ONE : Out << "FCmpInst::FCMP_ONE"; break;
- case FCmpInst::FCMP_ORD : Out << "FCmpInst::FCMP_ORD"; break;
- case FCmpInst::FCMP_UNO : Out << "FCmpInst::FCMP_UNO"; break;
- case FCmpInst::FCMP_UEQ : Out << "FCmpInst::FCMP_UEQ"; break;
- case FCmpInst::FCMP_UGT : Out << "FCmpInst::FCMP_UGT"; break;
- case FCmpInst::FCMP_UGE : Out << "FCmpInst::FCMP_UGE"; break;
- case FCmpInst::FCMP_ULT : Out << "FCmpInst::FCMP_ULT"; break;
- case FCmpInst::FCMP_ULE : Out << "FCmpInst::FCMP_ULE"; break;
- case FCmpInst::FCMP_UNE : Out << "FCmpInst::FCMP_UNE"; break;
- case FCmpInst::FCMP_TRUE : Out << "FCmpInst::FCMP_TRUE"; break;
- default: Out << "FCmpInst::BAD_ICMP_PREDICATE"; break;
- }
- Out << ", " << opNames[0] << ", " << opNames[1] << ", \"";
- printEscapedString(I->getName());
- Out << "\");";
- break;
- }
- case Instruction::ICmp: {
- Out << "ICmpInst* " << iName << " = new ICmpInst(*" << bbname << ", ";
- switch (cast<ICmpInst>(I)->getPredicate()) {
- case ICmpInst::ICMP_EQ: Out << "ICmpInst::ICMP_EQ"; break;
- case ICmpInst::ICMP_NE: Out << "ICmpInst::ICMP_NE"; break;
- case ICmpInst::ICMP_ULE: Out << "ICmpInst::ICMP_ULE"; break;
- case ICmpInst::ICMP_SLE: Out << "ICmpInst::ICMP_SLE"; break;
- case ICmpInst::ICMP_UGE: Out << "ICmpInst::ICMP_UGE"; break;
- case ICmpInst::ICMP_SGE: Out << "ICmpInst::ICMP_SGE"; break;
- case ICmpInst::ICMP_ULT: Out << "ICmpInst::ICMP_ULT"; break;
- case ICmpInst::ICMP_SLT: Out << "ICmpInst::ICMP_SLT"; break;
- case ICmpInst::ICMP_UGT: Out << "ICmpInst::ICMP_UGT"; break;
- case ICmpInst::ICMP_SGT: Out << "ICmpInst::ICMP_SGT"; break;
- default: Out << "ICmpInst::BAD_ICMP_PREDICATE"; break;
- }
- Out << ", " << opNames[0] << ", " << opNames[1] << ", \"";
- printEscapedString(I->getName());
- Out << "\");";
- break;
- }
- case Instruction::Alloca: {
- const AllocaInst* allocaI = cast<AllocaInst>(I);
- Out << "AllocaInst* " << iName << " = new AllocaInst("
- << getCppName(allocaI->getAllocatedType()) << ", ";
- if (allocaI->isArrayAllocation())
- Out << opNames[0] << ", ";
- Out << "\"";
- printEscapedString(allocaI->getName());
- Out << "\", " << bbname << ");";
- if (allocaI->getAlignment())
- nl(Out) << iName << "->setAlignment("
- << allocaI->getAlignment() << ");";
- break;
- }
- case Instruction::Load:{
- const LoadInst* load = cast<LoadInst>(I);
- Out << "LoadInst* " << iName << " = new LoadInst("
- << opNames[0] << ", \"";
- printEscapedString(load->getName());
- Out << "\", " << (load->isVolatile() ? "true" : "false" )
- << ", " << bbname << ");";
- break;
- }
- case Instruction::Store: {
- const StoreInst* store = cast<StoreInst>(I);
- Out << " new StoreInst("
- << opNames[0] << ", "
- << opNames[1] << ", "
- << (store->isVolatile() ? "true" : "false")
- << ", " << bbname << ");";
- break;
- }
- case Instruction::GetElementPtr: {
- const GetElementPtrInst* gep = cast<GetElementPtrInst>(I);
- if (gep->getNumOperands() <= 2) {
- Out << "GetElementPtrInst* " << iName << " = GetElementPtrInst::Create("
- << opNames[0];
- if (gep->getNumOperands() == 2)
- Out << ", " << opNames[1];
- } else {
- Out << "std::vector<Value*> " << iName << "_indices;";
- nl(Out);
- for (unsigned i = 1; i < gep->getNumOperands(); ++i ) {
- Out << iName << "_indices.push_back("
- << opNames[i] << ");";
- nl(Out);
- }
- Out << "Instruction* " << iName << " = GetElementPtrInst::Create("
- << opNames[0] << ", " << iName << "_indices.begin(), "
- << iName << "_indices.end()";
- }
- Out << ", \"";
- printEscapedString(gep->getName());
- Out << "\", " << bbname << ");";
- break;
- }
- case Instruction::PHI: {
- const PHINode* phi = cast<PHINode>(I);
-
- Out << "PHINode* " << iName << " = PHINode::Create("
- << getCppName(phi->getType()) << ", \"";
- printEscapedString(phi->getName());
- Out << "\", " << bbname << ");";
- nl(Out) << iName << "->reserveOperandSpace("
- << phi->getNumIncomingValues()
- << ");";
+ break;
+ }
+ case Instruction::Trunc:
+ case Instruction::ZExt:
+ case Instruction::SExt:
+ case Instruction::FPTrunc:
+ case Instruction::FPExt:
+ case Instruction::FPToUI:
+ case Instruction::FPToSI:
+ case Instruction::UIToFP:
+ case Instruction::SIToFP:
+ case Instruction::PtrToInt:
+ case Instruction::IntToPtr:
+ case Instruction::BitCast: {
+ const CastInst* cst = cast<CastInst>(I);
+ Out << "CastInst* " << iName << " = new ";
+ switch (I->getOpcode()) {
+ case Instruction::Trunc: Out << "TruncInst"; break;
+ case Instruction::ZExt: Out << "ZExtInst"; break;
+ case Instruction::SExt: Out << "SExtInst"; break;
+ case Instruction::FPTrunc: Out << "FPTruncInst"; break;
+ case Instruction::FPExt: Out << "FPExtInst"; break;
+ case Instruction::FPToUI: Out << "FPToUIInst"; break;
+ case Instruction::FPToSI: Out << "FPToSIInst"; break;
+ case Instruction::UIToFP: Out << "UIToFPInst"; break;
+ case Instruction::SIToFP: Out << "SIToFPInst"; break;
+ case Instruction::PtrToInt: Out << "PtrToIntInst"; break;
+ case Instruction::IntToPtr: Out << "IntToPtrInst"; break;
+ case Instruction::BitCast: Out << "BitCastInst"; break;
+ default: assert(!"Unreachable"); break;
+ }
+ Out << "(" << opNames[0] << ", "
+ << getCppName(cst->getType()) << ", \"";
+ printEscapedString(cst->getName());
+ Out << "\", " << bbname << ");";
+ break;
+ }
+ case Instruction::Call: {
+ const CallInst* call = cast<CallInst>(I);
+ if (const InlineAsm* ila = dyn_cast<InlineAsm>(call->getCalledValue())) {
+ Out << "InlineAsm* " << getCppName(ila) << " = InlineAsm::get("
+ << getCppName(ila->getFunctionType()) << ", \""
+ << ila->getAsmString() << "\", \""
+ << ila->getConstraintString() << "\","
+ << (ila->hasSideEffects() ? "true" : "false") << ");";
nl(Out);
- for (unsigned i = 0; i < phi->getNumOperands(); i+=2) {
- Out << iName << "->addIncoming("
- << opNames[i] << ", " << opNames[i+1] << ");";
- nl(Out);
- }
- break;
- }
- case Instruction::Trunc:
- case Instruction::ZExt:
- case Instruction::SExt:
- case Instruction::FPTrunc:
- case Instruction::FPExt:
- case Instruction::FPToUI:
- case Instruction::FPToSI:
- case Instruction::UIToFP:
- case Instruction::SIToFP:
- case Instruction::PtrToInt:
- case Instruction::IntToPtr:
- case Instruction::BitCast: {
- const CastInst* cst = cast<CastInst>(I);
- Out << "CastInst* " << iName << " = new ";
- switch (I->getOpcode()) {
- case Instruction::Trunc: Out << "TruncInst"; break;
- case Instruction::ZExt: Out << "ZExtInst"; break;
- case Instruction::SExt: Out << "SExtInst"; break;
- case Instruction::FPTrunc: Out << "FPTruncInst"; break;
- case Instruction::FPExt: Out << "FPExtInst"; break;
- case Instruction::FPToUI: Out << "FPToUIInst"; break;
- case Instruction::FPToSI: Out << "FPToSIInst"; break;
- case Instruction::UIToFP: Out << "UIToFPInst"; break;
- case Instruction::SIToFP: Out << "SIToFPInst"; break;
- case Instruction::PtrToInt: Out << "PtrToIntInst"; break;
- case Instruction::IntToPtr: Out << "IntToPtrInst"; break;
- case Instruction::BitCast: Out << "BitCastInst"; break;
- default: assert(!"Unreachable"); break;
- }
- Out << "(" << opNames[0] << ", "
- << getCppName(cst->getType()) << ", \"";
- printEscapedString(cst->getName());
- Out << "\", " << bbname << ");";
- break;
}
- case Instruction::Call:{
- const CallInst* call = cast<CallInst>(I);
- if (const InlineAsm* ila = dyn_cast<InlineAsm>(call->getCalledValue())) {
- Out << "InlineAsm* " << getCppName(ila) << " = InlineAsm::get("
- << getCppName(ila->getFunctionType()) << ", \""
- << ila->getAsmString() << "\", \""
- << ila->getConstraintString() << "\","
- << (ila->hasSideEffects() ? "true" : "false") << ");";
- nl(Out);
- }
- if (call->getNumOperands() > 2) {
- Out << "std::vector<Value*> " << iName << "_params;";
+ if (call->getNumArgOperands() > 1) {
+ Out << "std::vector<Value*> " << iName << "_params;";
+ nl(Out);
+ for (unsigned i = 0; i < call->getNumArgOperands(); ++i) {
+ Out << iName << "_params.push_back(" << opNames[i] << ");";
nl(Out);
- for (unsigned i = 1; i < call->getNumOperands(); ++i) {
- Out << iName << "_params.push_back(" << opNames[i] << ");";
- nl(Out);
- }
- Out << "CallInst* " << iName << " = CallInst::Create("
- << opNames[0] << ", " << iName << "_params.begin(), "
- << iName << "_params.end(), \"";
- } else if (call->getNumOperands() == 2) {
- Out << "CallInst* " << iName << " = CallInst::Create("
- << opNames[0] << ", " << opNames[1] << ", \"";
- } else {
- Out << "CallInst* " << iName << " = CallInst::Create(" << opNames[0]
- << ", \"";
}
- printEscapedString(call->getName());
- Out << "\", " << bbname << ");";
- nl(Out) << iName << "->setCallingConv(";
- printCallingConv(call->getCallingConv());
- Out << ");";
- nl(Out) << iName << "->setTailCall("
- << (call->isTailCall() ? "true":"false");
- Out << ");";
- printAttributes(call->getAttributes(), iName);
- Out << iName << "->setAttributes(" << iName << "_PAL);";
- nl(Out);
- break;
- }
- case Instruction::Select: {
- const SelectInst* sel = cast<SelectInst>(I);
- Out << "SelectInst* " << getCppName(sel) << " = SelectInst::Create(";
- Out << opNames[0] << ", " << opNames[1] << ", " << opNames[2] << ", \"";
- printEscapedString(sel->getName());
- Out << "\", " << bbname << ");";
- break;
- }
- case Instruction::UserOp1:
- /// FALL THROUGH
- case Instruction::UserOp2: {
- /// FIXME: What should be done here?
- break;
- }
- case Instruction::VAArg: {
- const VAArgInst* va = cast<VAArgInst>(I);
- Out << "VAArgInst* " << getCppName(va) << " = new VAArgInst("
- << opNames[0] << ", " << getCppName(va->getType()) << ", \"";
- printEscapedString(va->getName());
- Out << "\", " << bbname << ");";
- break;
- }
- case Instruction::ExtractElement: {
- const ExtractElementInst* eei = cast<ExtractElementInst>(I);
- Out << "ExtractElementInst* " << getCppName(eei)
- << " = new ExtractElementInst(" << opNames[0]
- << ", " << opNames[1] << ", \"";
- printEscapedString(eei->getName());
- Out << "\", " << bbname << ");";
- break;
- }
- case Instruction::InsertElement: {
- const InsertElementInst* iei = cast<InsertElementInst>(I);
- Out << "InsertElementInst* " << getCppName(iei)
- << " = InsertElementInst::Create(" << opNames[0]
- << ", " << opNames[1] << ", " << opNames[2] << ", \"";
- printEscapedString(iei->getName());
- Out << "\", " << bbname << ");";
- break;
- }
- case Instruction::ShuffleVector: {
- const ShuffleVectorInst* svi = cast<ShuffleVectorInst>(I);
- Out << "ShuffleVectorInst* " << getCppName(svi)
- << " = new ShuffleVectorInst(" << opNames[0]
- << ", " << opNames[1] << ", " << opNames[2] << ", \"";
- printEscapedString(svi->getName());
- Out << "\", " << bbname << ");";
- break;
+ Out << "CallInst* " << iName << " = CallInst::Create("
+ << opNames[call->getNumArgOperands()] << ", " << iName << "_params.begin(), "
+ << iName << "_params.end(), \"";
+ } else if (call->getNumArgOperands() == 1) {
+ Out << "CallInst* " << iName << " = CallInst::Create("
+ << opNames[call->getNumArgOperands()] << ", " << opNames[0] << ", \"";
+ } else {
+ Out << "CallInst* " << iName << " = CallInst::Create("
+ << opNames[call->getNumArgOperands()] << ", \"";
}
- case Instruction::ExtractValue: {
- const ExtractValueInst *evi = cast<ExtractValueInst>(I);
- Out << "std::vector<unsigned> " << iName << "_indices;";
+ printEscapedString(call->getName());
+ Out << "\", " << bbname << ");";
+ nl(Out) << iName << "->setCallingConv(";
+ printCallingConv(call->getCallingConv());
+ Out << ");";
+ nl(Out) << iName << "->setTailCall("
+ << (call->isTailCall() ? "true" : "false");
+ Out << ");";
+ nl(Out);
+ printAttributes(call->getAttributes(), iName);
+ Out << iName << "->setAttributes(" << iName << "_PAL);";
+ nl(Out);
+ break;
+ }
+ case Instruction::Select: {
+ const SelectInst* sel = cast<SelectInst>(I);
+ Out << "SelectInst* " << getCppName(sel) << " = SelectInst::Create(";
+ Out << opNames[0] << ", " << opNames[1] << ", " << opNames[2] << ", \"";
+ printEscapedString(sel->getName());
+ Out << "\", " << bbname << ");";
+ break;
+ }
+ case Instruction::UserOp1:
+ /// FALL THROUGH
+ case Instruction::UserOp2: {
+ /// FIXME: What should be done here?
+ break;
+ }
+ case Instruction::VAArg: {
+ const VAArgInst* va = cast<VAArgInst>(I);
+ Out << "VAArgInst* " << getCppName(va) << " = new VAArgInst("
+ << opNames[0] << ", " << getCppName(va->getType()) << ", \"";
+ printEscapedString(va->getName());
+ Out << "\", " << bbname << ");";
+ break;
+ }
+ case Instruction::ExtractElement: {
+ const ExtractElementInst* eei = cast<ExtractElementInst>(I);
+ Out << "ExtractElementInst* " << getCppName(eei)
+ << " = new ExtractElementInst(" << opNames[0]
+ << ", " << opNames[1] << ", \"";
+ printEscapedString(eei->getName());
+ Out << "\", " << bbname << ");";
+ break;
+ }
+ case Instruction::InsertElement: {
+ const InsertElementInst* iei = cast<InsertElementInst>(I);
+ Out << "InsertElementInst* " << getCppName(iei)
+ << " = InsertElementInst::Create(" << opNames[0]
+ << ", " << opNames[1] << ", " << opNames[2] << ", \"";
+ printEscapedString(iei->getName());
+ Out << "\", " << bbname << ");";
+ break;
+ }
+ case Instruction::ShuffleVector: {
+ const ShuffleVectorInst* svi = cast<ShuffleVectorInst>(I);
+ Out << "ShuffleVectorInst* " << getCppName(svi)
+ << " = new ShuffleVectorInst(" << opNames[0]
+ << ", " << opNames[1] << ", " << opNames[2] << ", \"";
+ printEscapedString(svi->getName());
+ Out << "\", " << bbname << ");";
+ break;
+ }
+ case Instruction::ExtractValue: {
+ const ExtractValueInst *evi = cast<ExtractValueInst>(I);
+ Out << "std::vector<unsigned> " << iName << "_indices;";
+ nl(Out);
+ for (unsigned i = 0; i < evi->getNumIndices(); ++i) {
+ Out << iName << "_indices.push_back("
+ << evi->idx_begin()[i] << ");";
nl(Out);
- for (unsigned i = 0; i < evi->getNumIndices(); ++i) {
- Out << iName << "_indices.push_back("
- << evi->idx_begin()[i] << ");";
- nl(Out);
- }
- Out << "ExtractValueInst* " << getCppName(evi)
- << " = ExtractValueInst::Create(" << opNames[0]
- << ", "
- << iName << "_indices.begin(), " << iName << "_indices.end(), \"";
- printEscapedString(evi->getName());
- Out << "\", " << bbname << ");";
- break;
}
- case Instruction::InsertValue: {
- const InsertValueInst *ivi = cast<InsertValueInst>(I);
- Out << "std::vector<unsigned> " << iName << "_indices;";
+ Out << "ExtractValueInst* " << getCppName(evi)
+ << " = ExtractValueInst::Create(" << opNames[0]
+ << ", "
+ << iName << "_indices.begin(), " << iName << "_indices.end(), \"";
+ printEscapedString(evi->getName());
+ Out << "\", " << bbname << ");";
+ break;
+ }
+ case Instruction::InsertValue: {
+ const InsertValueInst *ivi = cast<InsertValueInst>(I);
+ Out << "std::vector<unsigned> " << iName << "_indices;";
+ nl(Out);
+ for (unsigned i = 0; i < ivi->getNumIndices(); ++i) {
+ Out << iName << "_indices.push_back("
+ << ivi->idx_begin()[i] << ");";
nl(Out);
- for (unsigned i = 0; i < ivi->getNumIndices(); ++i) {
- Out << iName << "_indices.push_back("
- << ivi->idx_begin()[i] << ");";
- nl(Out);
- }
- Out << "InsertValueInst* " << getCppName(ivi)
- << " = InsertValueInst::Create(" << opNames[0]
- << ", " << opNames[1] << ", "
- << iName << "_indices.begin(), " << iName << "_indices.end(), \"";
- printEscapedString(ivi->getName());
- Out << "\", " << bbname << ");";
- break;
}
+ Out << "InsertValueInst* " << getCppName(ivi)
+ << " = InsertValueInst::Create(" << opNames[0]
+ << ", " << opNames[1] << ", "
+ << iName << "_indices.begin(), " << iName << "_indices.end(), \"";
+ printEscapedString(ivi->getName());
+ Out << "\", " << bbname << ");";
+ break;
+ }
}
DefinedValues.insert(I);
nl(Out);
delete [] opNames;
}
- // Print out the types, constants and declarations needed by one function
- void CppWriter::printFunctionUses(const Function* F) {
- nl(Out) << "// Type Definitions"; nl(Out);
- if (!is_inline) {
- // Print the function's return type
- printType(F->getReturnType());
+// Print out the types, constants and declarations needed by one function
+void CppWriter::printFunctionUses(const Function* F) {
+ nl(Out) << "// Type Definitions"; nl(Out);
+ if (!is_inline) {
+ // Print the function's return type
+ printType(F->getReturnType());
- // Print the function's function type
- printType(F->getFunctionType());
+ // Print the function's function type
+ printType(F->getFunctionType());
- // Print the types of each of the function's arguments
- for (Function::const_arg_iterator AI = F->arg_begin(), AE = F->arg_end();
- AI != AE; ++AI) {
- printType(AI->getType());
- }
+ // Print the types of each of the function's arguments
+ for (Function::const_arg_iterator AI = F->arg_begin(), AE = F->arg_end();
+ AI != AE; ++AI) {
+ printType(AI->getType());
}
+ }
- // Print type definitions for every type referenced by an instruction and
- // make a note of any global values or constants that are referenced
- SmallPtrSet<GlobalValue*,64> gvs;
- SmallPtrSet<Constant*,64> consts;
- for (Function::const_iterator BB = F->begin(), BE = F->end();
- BB != BE; ++BB){
- for (BasicBlock::const_iterator I = BB->begin(), E = BB->end();
- I != E; ++I) {
- // Print the type of the instruction itself
- printType(I->getType());
+ // Print type definitions for every type referenced by an instruction and
+ // make a note of any global values or constants that are referenced
+ SmallPtrSet<GlobalValue*,64> gvs;
+ SmallPtrSet<Constant*,64> consts;
+ for (Function::const_iterator BB = F->begin(), BE = F->end();
+ BB != BE; ++BB){
+ for (BasicBlock::const_iterator I = BB->begin(), E = BB->end();
+ I != E; ++I) {
+ // Print the type of the instruction itself
+ printType(I->getType());
- // Print the type of each of the instruction's operands
- for (unsigned i = 0; i < I->getNumOperands(); ++i) {
- Value* operand = I->getOperand(i);
- printType(operand->getType());
-
- // If the operand references a GVal or Constant, make a note of it
- if (GlobalValue* GV = dyn_cast<GlobalValue>(operand)) {
- gvs.insert(GV);
- if (GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV))
- if (GVar->hasInitializer())
- consts.insert(GVar->getInitializer());
- } else if (Constant* C = dyn_cast<Constant>(operand))
- consts.insert(C);
- }
+ // Print the type of each of the instruction's operands
+ for (unsigned i = 0; i < I->getNumOperands(); ++i) {
+ Value* operand = I->getOperand(i);
+ printType(operand->getType());
+
+ // If the operand references a GVal or Constant, make a note of it
+ if (GlobalValue* GV = dyn_cast<GlobalValue>(operand)) {
+ gvs.insert(GV);
+ if (GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV))
+ if (GVar->hasInitializer())
+ consts.insert(GVar->getInitializer());
+ } else if (Constant* C = dyn_cast<Constant>(operand))
+ consts.insert(C);
}
}
+ }
- // Print the function declarations for any functions encountered
- nl(Out) << "// Function Declarations"; nl(Out);
- for (SmallPtrSet<GlobalValue*,64>::iterator I = gvs.begin(), E = gvs.end();
- I != E; ++I) {
- if (Function* Fun = dyn_cast<Function>(*I)) {
- if (!is_inline || Fun != F)
- printFunctionHead(Fun);
- }
+ // Print the function declarations for any functions encountered
+ nl(Out) << "// Function Declarations"; nl(Out);
+ for (SmallPtrSet<GlobalValue*,64>::iterator I = gvs.begin(), E = gvs.end();
+ I != E; ++I) {
+ if (Function* Fun = dyn_cast<Function>(*I)) {
+ if (!is_inline || Fun != F)
+ printFunctionHead(Fun);
}
+ }
- // Print the global variable declarations for any variables encountered
- nl(Out) << "// Global Variable Declarations"; nl(Out);
- for (SmallPtrSet<GlobalValue*,64>::iterator I = gvs.begin(), E = gvs.end();
- I != E; ++I) {
- if (GlobalVariable* F = dyn_cast<GlobalVariable>(*I))
- printVariableHead(F);
- }
+ // Print the global variable declarations for any variables encountered
+ nl(Out) << "// Global Variable Declarations"; nl(Out);
+ for (SmallPtrSet<GlobalValue*,64>::iterator I = gvs.begin(), E = gvs.end();
+ I != E; ++I) {
+ if (GlobalVariable* F = dyn_cast<GlobalVariable>(*I))
+ printVariableHead(F);
+ }
- // Print the constants found
- nl(Out) << "// Constant Definitions"; nl(Out);
- for (SmallPtrSet<Constant*,64>::iterator I = consts.begin(),
- E = consts.end(); I != E; ++I) {
- printConstant(*I);
- }
+// Print the constants found
+ nl(Out) << "// Constant Definitions"; nl(Out);
+ for (SmallPtrSet<Constant*,64>::iterator I = consts.begin(),
+ E = consts.end(); I != E; ++I) {
+ printConstant(*I);
+ }
- // Process the global variables definitions now that all the constants have
- // been emitted. These definitions just couple the gvars with their constant
- // initializers.
- nl(Out) << "// Global Variable Definitions"; nl(Out);
- for (SmallPtrSet<GlobalValue*,64>::iterator I = gvs.begin(), E = gvs.end();
- I != E; ++I) {
- if (GlobalVariable* GV = dyn_cast<GlobalVariable>(*I))
- printVariableBody(GV);
- }
+ // Process the global variables definitions now that all the constants have
+ // been emitted. These definitions just couple the gvars with their constant
+ // initializers.
+ nl(Out) << "// Global Variable Definitions"; nl(Out);
+ for (SmallPtrSet<GlobalValue*,64>::iterator I = gvs.begin(), E = gvs.end();
+ I != E; ++I) {
+ if (GlobalVariable* GV = dyn_cast<GlobalVariable>(*I))
+ printVariableBody(GV);
}
+}
- void CppWriter::printFunctionHead(const Function* F) {
- nl(Out) << "Function* " << getCppName(F);
- if (is_inline) {
- Out << " = mod->getFunction(\"";
- printEscapedString(F->getName());
- Out << "\", " << getCppName(F->getFunctionType()) << ");";
- nl(Out) << "if (!" << getCppName(F) << ") {";
- nl(Out) << getCppName(F);
- }
- Out<< " = Function::Create(";
- nl(Out,1) << "/*Type=*/" << getCppName(F->getFunctionType()) << ",";
- nl(Out) << "/*Linkage=*/";
- printLinkageType(F->getLinkage());
- Out << ",";
- nl(Out) << "/*Name=*/\"";
+void CppWriter::printFunctionHead(const Function* F) {
+ nl(Out) << "Function* " << getCppName(F);
+ if (is_inline) {
+ Out << " = mod->getFunction(\"";
printEscapedString(F->getName());
- Out << "\", mod); " << (F->isDeclaration()? "// (external, no body)" : "");
- nl(Out,-1);
+ Out << "\", " << getCppName(F->getFunctionType()) << ");";
+ nl(Out) << "if (!" << getCppName(F) << ") {";
+ nl(Out) << getCppName(F);
+ }
+ Out<< " = Function::Create(";
+ nl(Out,1) << "/*Type=*/" << getCppName(F->getFunctionType()) << ",";
+ nl(Out) << "/*Linkage=*/";
+ printLinkageType(F->getLinkage());
+ Out << ",";
+ nl(Out) << "/*Name=*/\"";
+ printEscapedString(F->getName());
+ Out << "\", mod); " << (F->isDeclaration()? "// (external, no body)" : "");
+ nl(Out,-1);
+ printCppName(F);
+ Out << "->setCallingConv(";
+ printCallingConv(F->getCallingConv());
+ Out << ");";
+ nl(Out);
+ if (F->hasSection()) {
+ printCppName(F);
+ Out << "->setSection(\"" << F->getSection() << "\");";
+ nl(Out);
+ }
+ if (F->getAlignment()) {
+ printCppName(F);
+ Out << "->setAlignment(" << F->getAlignment() << ");";
+ nl(Out);
+ }
+ if (F->getVisibility() != GlobalValue::DefaultVisibility) {
printCppName(F);
- Out << "->setCallingConv(";
- printCallingConv(F->getCallingConv());
+ Out << "->setVisibility(";
+ printVisibilityType(F->getVisibility());
Out << ");";
nl(Out);
- if (F->hasSection()) {
- printCppName(F);
- Out << "->setSection(\"" << F->getSection() << "\");";
- nl(Out);
- }
- if (F->getAlignment()) {
- printCppName(F);
- Out << "->setAlignment(" << F->getAlignment() << ");";
- nl(Out);
- }
- if (F->getVisibility() != GlobalValue::DefaultVisibility) {
- printCppName(F);
- Out << "->setVisibility(";
- printVisibilityType(F->getVisibility());
- Out << ");";
- nl(Out);
- }
- if (F->hasGC()) {
- printCppName(F);
- Out << "->setGC(\"" << F->getGC() << "\");";
- nl(Out);
- }
- if (is_inline) {
- Out << "}";
- nl(Out);
- }
- printAttributes(F->getAttributes(), getCppName(F));
+ }
+ if (F->hasGC()) {
printCppName(F);
- Out << "->setAttributes(" << getCppName(F) << "_PAL);";
+ Out << "->setGC(\"" << F->getGC() << "\");";
nl(Out);
}
+ if (is_inline) {
+ Out << "}";
+ nl(Out);
+ }
+ printAttributes(F->getAttributes(), getCppName(F));
+ printCppName(F);
+ Out << "->setAttributes(" << getCppName(F) << "_PAL);";
+ nl(Out);
+}
- void CppWriter::printFunctionBody(const Function *F) {
- if (F->isDeclaration())
- return; // external functions have no bodies.
-
- // Clear the DefinedValues and ForwardRefs maps because we can't have
- // cross-function forward refs
- ForwardRefs.clear();
- DefinedValues.clear();
+void CppWriter::printFunctionBody(const Function *F) {
+ if (F->isDeclaration())
+ return; // external functions have no bodies.
- // Create all the argument values
- if (!is_inline) {
- if (!F->arg_empty()) {
- Out << "Function::arg_iterator args = " << getCppName(F)
- << "->arg_begin();";
- nl(Out);
- }
- for (Function::const_arg_iterator AI = F->arg_begin(), AE = F->arg_end();
- AI != AE; ++AI) {
- Out << "Value* " << getCppName(AI) << " = args++;";
- nl(Out);
- if (AI->hasName()) {
- Out << getCppName(AI) << "->setName(\"" << AI->getName() << "\");";
- nl(Out);
- }
- }
- }
+ // Clear the DefinedValues and ForwardRefs maps because we can't have
+ // cross-function forward refs
+ ForwardRefs.clear();
+ DefinedValues.clear();
- // Create all the basic blocks
- nl(Out);
- for (Function::const_iterator BI = F->begin(), BE = F->end();
- BI != BE; ++BI) {
- std::string bbname(getCppName(BI));
- Out << "BasicBlock* " << bbname <<
- " = BasicBlock::Create(mod->getContext(), \"";
- if (BI->hasName())
- printEscapedString(BI->getName());
- Out << "\"," << getCppName(BI->getParent()) << ",0);";
+ // Create all the argument values
+ if (!is_inline) {
+ if (!F->arg_empty()) {
+ Out << "Function::arg_iterator args = " << getCppName(F)
+ << "->arg_begin();";
nl(Out);
}
-
- // Output all of its basic blocks... for the function
- for (Function::const_iterator BI = F->begin(), BE = F->end();
- BI != BE; ++BI) {
- std::string bbname(getCppName(BI));
- nl(Out) << "// Block " << BI->getName() << " (" << bbname << ")";
+ for (Function::const_arg_iterator AI = F->arg_begin(), AE = F->arg_end();
+ AI != AE; ++AI) {
+ Out << "Value* " << getCppName(AI) << " = args++;";
nl(Out);
-
- // Output all of the instructions in the basic block...
- for (BasicBlock::const_iterator I = BI->begin(), E = BI->end();
- I != E; ++I) {
- printInstruction(I,bbname);
+ if (AI->hasName()) {
+ Out << getCppName(AI) << "->setName(\"" << AI->getName() << "\");";
+ nl(Out);
}
}
+ }
- // Loop over the ForwardRefs and resolve them now that all instructions
- // are generated.
- if (!ForwardRefs.empty()) {
- nl(Out) << "// Resolve Forward References";
- nl(Out);
- }
+ // Create all the basic blocks
+ nl(Out);
+ for (Function::const_iterator BI = F->begin(), BE = F->end();
+ BI != BE; ++BI) {
+ std::string bbname(getCppName(BI));
+ Out << "BasicBlock* " << bbname <<
+ " = BasicBlock::Create(mod->getContext(), \"";
+ if (BI->hasName())
+ printEscapedString(BI->getName());
+ Out << "\"," << getCppName(BI->getParent()) << ",0);";
+ nl(Out);
+ }
- while (!ForwardRefs.empty()) {
- ForwardRefMap::iterator I = ForwardRefs.begin();
- Out << I->second << "->replaceAllUsesWith("
- << getCppName(I->first) << "); delete " << I->second << ";";
- nl(Out);
- ForwardRefs.erase(I);
+ // Output all of its basic blocks... for the function
+ for (Function::const_iterator BI = F->begin(), BE = F->end();
+ BI != BE; ++BI) {
+ std::string bbname(getCppName(BI));
+ nl(Out) << "// Block " << BI->getName() << " (" << bbname << ")";
+ nl(Out);
+
+ // Output all of the instructions in the basic block...
+ for (BasicBlock::const_iterator I = BI->begin(), E = BI->end();
+ I != E; ++I) {
+ printInstruction(I,bbname);
}
}
- void CppWriter::printInline(const std::string& fname,
- const std::string& func) {
- const Function* F = TheModule->getFunction(func);
- if (!F) {
- error(std::string("Function '") + func + "' not found in input module");
- return;
- }
- if (F->isDeclaration()) {
- error(std::string("Function '") + func + "' is external!");
- return;
- }
- nl(Out) << "BasicBlock* " << fname << "(Module* mod, Function *"
- << getCppName(F);
- unsigned arg_count = 1;
- for (Function::const_arg_iterator AI = F->arg_begin(), AE = F->arg_end();
- AI != AE; ++AI) {
- Out << ", Value* arg_" << arg_count;
- }
- Out << ") {";
+ // Loop over the ForwardRefs and resolve them now that all instructions
+ // are generated.
+ if (!ForwardRefs.empty()) {
+ nl(Out) << "// Resolve Forward References";
nl(Out);
- is_inline = true;
- printFunctionUses(F);
- printFunctionBody(F);
- is_inline = false;
- Out << "return " << getCppName(F->begin()) << ";";
- nl(Out) << "}";
+ }
+
+ while (!ForwardRefs.empty()) {
+ ForwardRefMap::iterator I = ForwardRefs.begin();
+ Out << I->second << "->replaceAllUsesWith("
+ << getCppName(I->first) << "); delete " << I->second << ";";
nl(Out);
+ ForwardRefs.erase(I);
}
+}
- void CppWriter::printModuleBody() {
- // Print out all the type definitions
- nl(Out) << "// Type Definitions"; nl(Out);
- printTypes(TheModule);
-
- // Functions can call each other and global variables can reference them so
- // define all the functions first before emitting their function bodies.
- nl(Out) << "// Function Declarations"; nl(Out);
- for (Module::const_iterator I = TheModule->begin(), E = TheModule->end();
- I != E; ++I)
- printFunctionHead(I);
-
- // Process the global variables declarations. We can't initialze them until
- // after the constants are printed so just print a header for each global
- nl(Out) << "// Global Variable Declarations\n"; nl(Out);
- for (Module::const_global_iterator I = TheModule->global_begin(),
- E = TheModule->global_end(); I != E; ++I) {
- printVariableHead(I);
- }
+void CppWriter::printInline(const std::string& fname,
+ const std::string& func) {
+ const Function* F = TheModule->getFunction(func);
+ if (!F) {
+ error(std::string("Function '") + func + "' not found in input module");
+ return;
+ }
+ if (F->isDeclaration()) {
+ error(std::string("Function '") + func + "' is external!");
+ return;
+ }
+ nl(Out) << "BasicBlock* " << fname << "(Module* mod, Function *"
+ << getCppName(F);
+ unsigned arg_count = 1;
+ for (Function::const_arg_iterator AI = F->arg_begin(), AE = F->arg_end();
+ AI != AE; ++AI) {
+ Out << ", Value* arg_" << arg_count;
+ }
+ Out << ") {";
+ nl(Out);
+ is_inline = true;
+ printFunctionUses(F);
+ printFunctionBody(F);
+ is_inline = false;
+ Out << "return " << getCppName(F->begin()) << ";";
+ nl(Out) << "}";
+ nl(Out);
+}
- // Print out all the constants definitions. Constants don't recurse except
- // through GlobalValues. All GlobalValues have been declared at this point
- // so we can proceed to generate the constants.
- nl(Out) << "// Constant Definitions"; nl(Out);
- printConstants(TheModule);
-
- // Process the global variables definitions now that all the constants have
- // been emitted. These definitions just couple the gvars with their constant
- // initializers.
- nl(Out) << "// Global Variable Definitions"; nl(Out);
- for (Module::const_global_iterator I = TheModule->global_begin(),
- E = TheModule->global_end(); I != E; ++I) {
- printVariableBody(I);
- }
+void CppWriter::printModuleBody() {
+ // Print out all the type definitions
+ nl(Out) << "// Type Definitions"; nl(Out);
+ printTypes(TheModule);
+
+ // Functions can call each other and global variables can reference them so
+ // define all the functions first before emitting their function bodies.
+ nl(Out) << "// Function Declarations"; nl(Out);
+ for (Module::const_iterator I = TheModule->begin(), E = TheModule->end();
+ I != E; ++I)
+ printFunctionHead(I);
+
+ // Process the global variables declarations. We can't initialze them until
+ // after the constants are printed so just print a header for each global
+ nl(Out) << "// Global Variable Declarations\n"; nl(Out);
+ for (Module::const_global_iterator I = TheModule->global_begin(),
+ E = TheModule->global_end(); I != E; ++I) {
+ printVariableHead(I);
+ }
- // Finally, we can safely put out all of the function bodies.
- nl(Out) << "// Function Definitions"; nl(Out);
- for (Module::const_iterator I = TheModule->begin(), E = TheModule->end();
- I != E; ++I) {
- if (!I->isDeclaration()) {
- nl(Out) << "// Function: " << I->getName() << " (" << getCppName(I)
- << ")";
- nl(Out) << "{";
- nl(Out,1);
- printFunctionBody(I);
- nl(Out,-1) << "}";
- nl(Out);
- }
- }
+ // Print out all the constants definitions. Constants don't recurse except
+ // through GlobalValues. All GlobalValues have been declared at this point
+ // so we can proceed to generate the constants.
+ nl(Out) << "// Constant Definitions"; nl(Out);
+ printConstants(TheModule);
+
+ // Process the global variables definitions now that all the constants have
+ // been emitted. These definitions just couple the gvars with their constant
+ // initializers.
+ nl(Out) << "// Global Variable Definitions"; nl(Out);
+ for (Module::const_global_iterator I = TheModule->global_begin(),
+ E = TheModule->global_end(); I != E; ++I) {
+ printVariableBody(I);
}
- void CppWriter::printProgram(const std::string& fname,
- const std::string& mName) {
- Out << "#include <llvm/LLVMContext.h>\n";
- Out << "#include <llvm/Module.h>\n";
- Out << "#include <llvm/DerivedTypes.h>\n";
- Out << "#include <llvm/Constants.h>\n";
- Out << "#include <llvm/GlobalVariable.h>\n";
- Out << "#include <llvm/Function.h>\n";
- Out << "#include <llvm/CallingConv.h>\n";
- Out << "#include <llvm/BasicBlock.h>\n";
- Out << "#include <llvm/Instructions.h>\n";
- Out << "#include <llvm/InlineAsm.h>\n";
- Out << "#include <llvm/Support/FormattedStream.h>\n";
- Out << "#include <llvm/Support/MathExtras.h>\n";
- Out << "#include <llvm/Pass.h>\n";
- Out << "#include <llvm/PassManager.h>\n";
- Out << "#include <llvm/ADT/SmallVector.h>\n";
- Out << "#include <llvm/Analysis/Verifier.h>\n";
- Out << "#include <llvm/Assembly/PrintModulePass.h>\n";
- Out << "#include <algorithm>\n";
- Out << "using namespace llvm;\n\n";
- Out << "Module* " << fname << "();\n\n";
- Out << "int main(int argc, char**argv) {\n";
- Out << " Module* Mod = " << fname << "();\n";
- Out << " verifyModule(*Mod, PrintMessageAction);\n";
- Out << " PassManager PM;\n";
- Out << " PM.add(createPrintModulePass(&outs()));\n";
- Out << " PM.run(*Mod);\n";
- Out << " return 0;\n";
- Out << "}\n\n";
- printModule(fname,mName);
- }
-
- void CppWriter::printModule(const std::string& fname,
- const std::string& mName) {
- nl(Out) << "Module* " << fname << "() {";
- nl(Out,1) << "// Module Construction";
- nl(Out) << "Module* mod = new Module(\"";
- printEscapedString(mName);
- Out << "\", getGlobalContext());";
- if (!TheModule->getTargetTriple().empty()) {
- nl(Out) << "mod->setDataLayout(\"" << TheModule->getDataLayout() << "\");";
- }
- if (!TheModule->getTargetTriple().empty()) {
- nl(Out) << "mod->setTargetTriple(\"" << TheModule->getTargetTriple()
- << "\");";
+ // Finally, we can safely put out all of the function bodies.
+ nl(Out) << "// Function Definitions"; nl(Out);
+ for (Module::const_iterator I = TheModule->begin(), E = TheModule->end();
+ I != E; ++I) {
+ if (!I->isDeclaration()) {
+ nl(Out) << "// Function: " << I->getName() << " (" << getCppName(I)
+ << ")";
+ nl(Out) << "{";
+ nl(Out,1);
+ printFunctionBody(I);
+ nl(Out,-1) << "}";
+ nl(Out);
}
+ }
+}
- if (!TheModule->getModuleInlineAsm().empty()) {
- nl(Out) << "mod->setModuleInlineAsm(\"";
- printEscapedString(TheModule->getModuleInlineAsm());
- Out << "\");";
- }
- nl(Out);
+void CppWriter::printProgram(const std::string& fname,
+ const std::string& mName) {
+ Out << "#include <llvm/LLVMContext.h>\n";
+ Out << "#include <llvm/Module.h>\n";
+ Out << "#include <llvm/DerivedTypes.h>\n";
+ Out << "#include <llvm/Constants.h>\n";
+ Out << "#include <llvm/GlobalVariable.h>\n";
+ Out << "#include <llvm/Function.h>\n";
+ Out << "#include <llvm/CallingConv.h>\n";
+ Out << "#include <llvm/BasicBlock.h>\n";
+ Out << "#include <llvm/Instructions.h>\n";
+ Out << "#include <llvm/InlineAsm.h>\n";
+ Out << "#include <llvm/Support/FormattedStream.h>\n";
+ Out << "#include <llvm/Support/MathExtras.h>\n";
+ Out << "#include <llvm/Pass.h>\n";
+ Out << "#include <llvm/PassManager.h>\n";
+ Out << "#include <llvm/ADT/SmallVector.h>\n";
+ Out << "#include <llvm/Analysis/Verifier.h>\n";
+ Out << "#include <llvm/Assembly/PrintModulePass.h>\n";
+ Out << "#include <algorithm>\n";
+ Out << "using namespace llvm;\n\n";
+ Out << "Module* " << fname << "();\n\n";
+ Out << "int main(int argc, char**argv) {\n";
+ Out << " Module* Mod = " << fname << "();\n";
+ Out << " verifyModule(*Mod, PrintMessageAction);\n";
+ Out << " PassManager PM;\n";
+ Out << " PM.add(createPrintModulePass(&outs()));\n";
+ Out << " PM.run(*Mod);\n";
+ Out << " return 0;\n";
+ Out << "}\n\n";
+ printModule(fname,mName);
+}
- // Loop over the dependent libraries and emit them.
- Module::lib_iterator LI = TheModule->lib_begin();
- Module::lib_iterator LE = TheModule->lib_end();
- while (LI != LE) {
- Out << "mod->addLibrary(\"" << *LI << "\");";
- nl(Out);
- ++LI;
- }
- printModuleBody();
- nl(Out) << "return mod;";
- nl(Out,-1) << "}";
+void CppWriter::printModule(const std::string& fname,
+ const std::string& mName) {
+ nl(Out) << "Module* " << fname << "() {";
+ nl(Out,1) << "// Module Construction";
+ nl(Out) << "Module* mod = new Module(\"";
+ printEscapedString(mName);
+ Out << "\", getGlobalContext());";
+ if (!TheModule->getTargetTriple().empty()) {
+ nl(Out) << "mod->setDataLayout(\"" << TheModule->getDataLayout() << "\");";
+ }
+ if (!TheModule->getTargetTriple().empty()) {
+ nl(Out) << "mod->setTargetTriple(\"" << TheModule->getTargetTriple()
+ << "\");";
+ }
+
+ if (!TheModule->getModuleInlineAsm().empty()) {
+ nl(Out) << "mod->setModuleInlineAsm(\"";
+ printEscapedString(TheModule->getModuleInlineAsm());
+ Out << "\");";
+ }
+ nl(Out);
+
+ // Loop over the dependent libraries and emit them.
+ Module::lib_iterator LI = TheModule->lib_begin();
+ Module::lib_iterator LE = TheModule->lib_end();
+ while (LI != LE) {
+ Out << "mod->addLibrary(\"" << *LI << "\");";
nl(Out);
+ ++LI;
}
+ printModuleBody();
+ nl(Out) << "return mod;";
+ nl(Out,-1) << "}";
+ nl(Out);
+}
+
+void CppWriter::printContents(const std::string& fname,
+ const std::string& mName) {
+ Out << "\nModule* " << fname << "(Module *mod) {\n";
+ Out << "\nmod->setModuleIdentifier(\"";
+ printEscapedString(mName);
+ Out << "\");\n";
+ printModuleBody();
+ Out << "\nreturn mod;\n";
+ Out << "\n}\n";
+}
- void CppWriter::printContents(const std::string& fname,
- const std::string& mName) {
- Out << "\nModule* " << fname << "(Module *mod) {\n";
- Out << "\nmod->setModuleIdentifier(\"";
- printEscapedString(mName);
- Out << "\");\n";
- printModuleBody();
- Out << "\nreturn mod;\n";
- Out << "\n}\n";
+void CppWriter::printFunction(const std::string& fname,
+ const std::string& funcName) {
+ const Function* F = TheModule->getFunction(funcName);
+ if (!F) {
+ error(std::string("Function '") + funcName + "' not found in input module");
+ return;
}
+ Out << "\nFunction* " << fname << "(Module *mod) {\n";
+ printFunctionUses(F);
+ printFunctionHead(F);
+ printFunctionBody(F);
+ Out << "return " << getCppName(F) << ";\n";
+ Out << "}\n";
+}
- void CppWriter::printFunction(const std::string& fname,
- const std::string& funcName) {
- const Function* F = TheModule->getFunction(funcName);
- if (!F) {
- error(std::string("Function '") + funcName + "' not found in input module");
- return;
- }
- Out << "\nFunction* " << fname << "(Module *mod) {\n";
- printFunctionUses(F);
- printFunctionHead(F);
- printFunctionBody(F);
- Out << "return " << getCppName(F) << ";\n";
- Out << "}\n";
- }
-
- void CppWriter::printFunctions() {
- const Module::FunctionListType &funcs = TheModule->getFunctionList();
- Module::const_iterator I = funcs.begin();
- Module::const_iterator IE = funcs.end();
-
- for (; I != IE; ++I) {
- const Function &func = *I;
- if (!func.isDeclaration()) {
- std::string name("define_");
- name += func.getName();
- printFunction(name, func.getName());
- }
+void CppWriter::printFunctions() {
+ const Module::FunctionListType &funcs = TheModule->getFunctionList();
+ Module::const_iterator I = funcs.begin();
+ Module::const_iterator IE = funcs.end();
+
+ for (; I != IE; ++I) {
+ const Function &func = *I;
+ if (!func.isDeclaration()) {
+ std::string name("define_");
+ name += func.getName();
+ printFunction(name, func.getName());
}
}
+}
- void CppWriter::printVariable(const std::string& fname,
- const std::string& varName) {
- const GlobalVariable* GV = TheModule->getNamedGlobal(varName);
+void CppWriter::printVariable(const std::string& fname,
+ const std::string& varName) {
+ const GlobalVariable* GV = TheModule->getNamedGlobal(varName);
- if (!GV) {
- error(std::string("Variable '") + varName + "' not found in input module");
- return;
- }
- Out << "\nGlobalVariable* " << fname << "(Module *mod) {\n";
- printVariableUses(GV);
- printVariableHead(GV);
- printVariableBody(GV);
- Out << "return " << getCppName(GV) << ";\n";
- Out << "}\n";
- }
-
- void CppWriter::printType(const std::string& fname,
- const std::string& typeName) {
- const Type* Ty = TheModule->getTypeByName(typeName);
- if (!Ty) {
- error(std::string("Type '") + typeName + "' not found in input module");
- return;
- }
- Out << "\nType* " << fname << "(Module *mod) {\n";
- printType(Ty);
- Out << "return " << getCppName(Ty) << ";\n";
- Out << "}\n";
- }
-
- bool CppWriter::runOnModule(Module &M) {
- TheModule = &M;
-
- // Emit a header
- Out << "// Generated by llvm2cpp - DO NOT MODIFY!\n\n";
-
- // Get the name of the function we're supposed to generate
- std::string fname = FuncName.getValue();
-
- // Get the name of the thing we are to generate
- std::string tgtname = NameToGenerate.getValue();
- if (GenerationType == GenModule ||
- GenerationType == GenContents ||
- GenerationType == GenProgram ||
- GenerationType == GenFunctions) {
- if (tgtname == "!bad!") {
- if (M.getModuleIdentifier() == "-")
- tgtname = "<stdin>";
- else
- tgtname = M.getModuleIdentifier();
- }
- } else if (tgtname == "!bad!")
- error("You must use the -for option with -gen-{function,variable,type}");
-
- switch (WhatToGenerate(GenerationType)) {
- case GenProgram:
- if (fname.empty())
- fname = "makeLLVMModule";
- printProgram(fname,tgtname);
- break;
- case GenModule:
- if (fname.empty())
- fname = "makeLLVMModule";
- printModule(fname,tgtname);
- break;
- case GenContents:
- if (fname.empty())
- fname = "makeLLVMModuleContents";
- printContents(fname,tgtname);
- break;
- case GenFunction:
- if (fname.empty())
- fname = "makeLLVMFunction";
- printFunction(fname,tgtname);
- break;
- case GenFunctions:
- printFunctions();
- break;
- case GenInline:
- if (fname.empty())
- fname = "makeLLVMInline";
- printInline(fname,tgtname);
- break;
- case GenVariable:
- if (fname.empty())
- fname = "makeLLVMVariable";
- printVariable(fname,tgtname);
- break;
- case GenType:
- if (fname.empty())
- fname = "makeLLVMType";
- printType(fname,tgtname);
- break;
- default:
- error("Invalid generation option");
- }
+ if (!GV) {
+ error(std::string("Variable '") + varName + "' not found in input module");
+ return;
+ }
+ Out << "\nGlobalVariable* " << fname << "(Module *mod) {\n";
+ printVariableUses(GV);
+ printVariableHead(GV);
+ printVariableBody(GV);
+ Out << "return " << getCppName(GV) << ";\n";
+ Out << "}\n";
+}
- return false;
+void CppWriter::printType(const std::string& fname,
+ const std::string& typeName) {
+ const Type* Ty = TheModule->getTypeByName(typeName);
+ if (!Ty) {
+ error(std::string("Type '") + typeName + "' not found in input module");
+ return;
}
+ Out << "\nType* " << fname << "(Module *mod) {\n";
+ printType(Ty);
+ Out << "return " << getCppName(Ty) << ";\n";
+ Out << "}\n";
+}
+
+bool CppWriter::runOnModule(Module &M) {
+ TheModule = &M;
+
+ // Emit a header
+ Out << "// Generated by llvm2cpp - DO NOT MODIFY!\n\n";
+
+ // Get the name of the function we're supposed to generate
+ std::string fname = FuncName.getValue();
+
+ // Get the name of the thing we are to generate
+ std::string tgtname = NameToGenerate.getValue();
+ if (GenerationType == GenModule ||
+ GenerationType == GenContents ||
+ GenerationType == GenProgram ||
+ GenerationType == GenFunctions) {
+ if (tgtname == "!bad!") {
+ if (M.getModuleIdentifier() == "-")
+ tgtname = "<stdin>";
+ else
+ tgtname = M.getModuleIdentifier();
+ }
+ } else if (tgtname == "!bad!")
+ error("You must use the -for option with -gen-{function,variable,type}");
+
+ switch (WhatToGenerate(GenerationType)) {
+ case GenProgram:
+ if (fname.empty())
+ fname = "makeLLVMModule";
+ printProgram(fname,tgtname);
+ break;
+ case GenModule:
+ if (fname.empty())
+ fname = "makeLLVMModule";
+ printModule(fname,tgtname);
+ break;
+ case GenContents:
+ if (fname.empty())
+ fname = "makeLLVMModuleContents";
+ printContents(fname,tgtname);
+ break;
+ case GenFunction:
+ if (fname.empty())
+ fname = "makeLLVMFunction";
+ printFunction(fname,tgtname);
+ break;
+ case GenFunctions:
+ printFunctions();
+ break;
+ case GenInline:
+ if (fname.empty())
+ fname = "makeLLVMInline";
+ printInline(fname,tgtname);
+ break;
+ case GenVariable:
+ if (fname.empty())
+ fname = "makeLLVMVariable";
+ printVariable(fname,tgtname);
+ break;
+ case GenType:
+ if (fname.empty())
+ fname = "makeLLVMType";
+ printType(fname,tgtname);
+ break;
+ default:
+ error("Invalid generation option");
+ }
+
+ return false;
}
char CppWriter::ID = 0;
diff --git a/lib/Target/MBlaze/AsmPrinter/MBlazeAsmPrinter.cpp b/lib/Target/MBlaze/AsmPrinter/MBlazeAsmPrinter.cpp
index e42e9b3505d8..b6e4d654f4aa 100644
--- a/lib/Target/MBlaze/AsmPrinter/MBlazeAsmPrinter.cpp
+++ b/lib/Target/MBlaze/AsmPrinter/MBlazeAsmPrinter.cpp
@@ -145,8 +145,9 @@ void MBlazeAsmPrinter::printSavedRegsBitmask(raw_ostream &O) {
const MachineFrameInfo *MFI = MF->getFrameInfo();
const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo();
for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
- unsigned RegNum = MBlazeRegisterInfo::getRegisterNumbering(CSI[i].getReg());
- if (CSI[i].getRegClass() == MBlaze::CPURegsRegisterClass)
+ unsigned Reg = CSI[i].getReg();
+ unsigned RegNum = MBlazeRegisterInfo::getRegisterNumbering(Reg);
+ if (MBlaze::CPURegsRegisterClass->contains(Reg))
CPUBitmask |= (1 << RegNum);
}
diff --git a/lib/Target/MBlaze/MBlazeISelLowering.cpp b/lib/Target/MBlaze/MBlazeISelLowering.cpp
index 23889b128ffe..1730b689d361 100644
--- a/lib/Target/MBlaze/MBlazeISelLowering.cpp
+++ b/lib/Target/MBlaze/MBlazeISelLowering.cpp
@@ -234,6 +234,24 @@ MBlazeTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
MachineRegisterInfo &R = F->getRegInfo();
MachineBasicBlock *loop = F->CreateMachineBasicBlock(LLVM_BB);
MachineBasicBlock *finish = F->CreateMachineBasicBlock(LLVM_BB);
+ F->insert(It, loop);
+ F->insert(It, finish);
+
+ // Update machine-CFG edges by transfering adding all successors and
+ // remaining instructions from the current block to the new block which
+ // will contain the Phi node for the select.
+ finish->splice(finish->begin(), BB,
+ llvm::next(MachineBasicBlock::iterator(MI)),
+ BB->end());
+ finish->transferSuccessorsAndUpdatePHIs(BB);
+
+ // Add the true and fallthrough blocks as its successors.
+ BB->addSuccessor(loop);
+ BB->addSuccessor(finish);
+
+ // Next, add the finish block as a successor of the loop block
+ loop->addSuccessor(finish);
+ loop->addSuccessor(loop);
unsigned IAMT = R.createVirtualRegister(MBlaze::CPURegsRegisterClass);
BuildMI(BB, dl, TII->get(MBlaze::ANDI), IAMT)
@@ -249,26 +267,6 @@ MBlazeTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
.addReg(IAMT)
.addMBB(finish);
- F->insert(It, loop);
- F->insert(It, finish);
-
- // Update machine-CFG edges by first adding all successors of the current
- // block to the new block which will contain the Phi node for the select.
- for(MachineBasicBlock::succ_iterator i = BB->succ_begin(),
- e = BB->succ_end(); i != e; ++i)
- finish->addSuccessor(*i);
-
- // Next, remove all successors of the current block, and add the true
- // and fallthrough blocks as its successors.
- while(!BB->succ_empty())
- BB->removeSuccessor(BB->succ_begin());
- BB->addSuccessor(loop);
- BB->addSuccessor(finish);
-
- // Next, add the finish block as a successor of the loop block
- loop->addSuccessor(finish);
- loop->addSuccessor(loop);
-
unsigned DST = R.createVirtualRegister(MBlaze::CPURegsRegisterClass);
unsigned NDST = R.createVirtualRegister(MBlaze::CPURegsRegisterClass);
BuildMI(loop, dl, TII->get(MBlaze::PHI), DST)
@@ -298,12 +296,13 @@ MBlazeTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
.addReg(NAMT)
.addMBB(loop);
- BuildMI(finish, dl, TII->get(MBlaze::PHI), MI->getOperand(0).getReg())
+ BuildMI(*finish, finish->begin(), dl,
+ TII->get(MBlaze::PHI), MI->getOperand(0).getReg())
.addReg(IVAL).addMBB(BB)
.addReg(NDST).addMBB(loop);
// The pseudo instruction is no longer needed so remove it
- F->DeleteMachineInstr(MI);
+ MI->eraseFromParent();
return finish;
}
@@ -338,27 +337,23 @@ MBlazeTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
case MBlazeCC::LE: Opc = MBlaze::BGTID; break;
}
- BuildMI(BB, dl, TII->get(Opc))
- .addReg(MI->getOperand(3).getReg())
- .addMBB(dneBB);
-
F->insert(It, flsBB);
F->insert(It, dneBB);
- // Update machine-CFG edges by first adding all successors of the current
- // block to the new block which will contain the Phi node for the select.
- for(MachineBasicBlock::succ_iterator i = BB->succ_begin(),
- e = BB->succ_end(); i != e; ++i)
- dneBB->addSuccessor(*i);
+ // Transfer the remainder of BB and its successor edges to dneBB.
+ dneBB->splice(dneBB->begin(), BB,
+ llvm::next(MachineBasicBlock::iterator(MI)),
+ BB->end());
+ dneBB->transferSuccessorsAndUpdatePHIs(BB);
- // Next, remove all successors of the current block, and add the true
- // and fallthrough blocks as its successors.
- while(!BB->succ_empty())
- BB->removeSuccessor(BB->succ_begin());
BB->addSuccessor(flsBB);
BB->addSuccessor(dneBB);
flsBB->addSuccessor(dneBB);
+ BuildMI(BB, dl, TII->get(Opc))
+ .addReg(MI->getOperand(3).getReg())
+ .addMBB(dneBB);
+
// sinkMBB:
// %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ]
// ...
@@ -366,11 +361,12 @@ MBlazeTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
// .addReg(MI->getOperand(1).getReg()).addMBB(flsBB)
// .addReg(MI->getOperand(2).getReg()).addMBB(BB);
- BuildMI(dneBB, dl, TII->get(MBlaze::PHI), MI->getOperand(0).getReg())
+ BuildMI(*dneBB, dneBB->begin(), dl,
+ TII->get(MBlaze::PHI), MI->getOperand(0).getReg())
.addReg(MI->getOperand(2).getReg()).addMBB(flsBB)
.addReg(MI->getOperand(1).getReg()).addMBB(BB);
- F->DeleteMachineInstr(MI); // The pseudo instruction is gone now.
+ MI->eraseFromParent(); // The pseudo instruction is gone now.
return dneBB;
}
}
@@ -408,7 +404,7 @@ LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const {
// FIXME there isn't actually debug info here
DebugLoc dl = Op.getDebugLoc();
const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
- SDValue GA = DAG.getTargetGlobalAddress(GV, MVT::i32);
+ SDValue GA = DAG.getTargetGlobalAddress(GV, dl, MVT::i32);
return DAG.getNode(MBlazeISD::Wrap, dl, MVT::i32, GA);
}
@@ -439,10 +435,8 @@ LowerJumpTable(SDValue Op, SelectionDAG &DAG) const {
SDValue MBlazeTargetLowering::
LowerConstantPool(SDValue Op, SelectionDAG &DAG) const {
SDValue ResNode;
- EVT PtrVT = Op.getValueType();
ConstantPoolSDNode *N = cast<ConstantPoolSDNode>(Op);
const Constant *C = N->getConstVal();
- SDValue Zero = DAG.getConstant(0, PtrVT);
DebugLoc dl = Op.getDebugLoc();
SDValue CP = DAG.getTargetConstantPool(C, MVT::i32, N->getAlignment(),
@@ -531,6 +525,7 @@ SDValue MBlazeTargetLowering::
LowerCall(SDValue Chain, SDValue Callee, CallingConv::ID CallConv,
bool isVarArg, bool &isTailCall,
const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
const SmallVectorImpl<ISD::InputArg> &Ins,
DebugLoc dl, SelectionDAG &DAG,
SmallVectorImpl<SDValue> &InVals) const {
@@ -562,7 +557,7 @@ LowerCall(SDValue Chain, SDValue Callee, CallingConv::ID CallConv,
for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
CCValAssign &VA = ArgLocs[i];
EVT RegVT = VA.getLocVT();
- SDValue Arg = Outs[i].Val;
+ SDValue Arg = OutVals[i];
// Promote the value if needed.
switch (VA.getLocInfo()) {
@@ -590,7 +585,7 @@ LowerCall(SDValue Chain, SDValue Callee, CallingConv::ID CallConv,
// Create the frame index object for this incoming parameter
LastArgStackLoc = (FirstStackArgLoc + VA.getLocMemOffset());
int FI = MFI->CreateFixedObject(VA.getValVT().getSizeInBits()/8,
- LastArgStackLoc, true, false);
+ LastArgStackLoc, true);
SDValue PtrOff = DAG.getFrameIndex(FI,getPointerTy());
@@ -623,7 +618,7 @@ LowerCall(SDValue Chain, SDValue Callee, CallingConv::ID CallConv,
// node so that legalize doesn't hack it.
unsigned char OpFlag = MBlazeII::MO_NO_FLAG;
if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee))
- Callee = DAG.getTargetGlobalAddress(G->getGlobal(),
+ Callee = DAG.getTargetGlobalAddress(G->getGlobal(), dl,
getPointerTy(), 0, OpFlag);
else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee))
Callee = DAG.getTargetExternalSymbol(S->getSymbol(),
@@ -779,7 +774,7 @@ LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
// offset on PEI::calculateFrameObjectOffsets.
// Arguments are always 32-bit.
unsigned ArgSize = VA.getLocVT().getSizeInBits()/8;
- int FI = MFI->CreateFixedObject(ArgSize, 0, true, false);
+ int FI = MFI->CreateFixedObject(ArgSize, 0, true);
MBlazeFI->recordLoadArgsFI(FI, -(ArgSize+
(FirstStackArgLoc + VA.getLocMemOffset())));
@@ -810,7 +805,7 @@ LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
unsigned LiveReg = MF.addLiveIn(Reg, RC);
SDValue ArgValue = DAG.getCopyFromReg(Chain, dl, LiveReg, MVT::i32);
- int FI = MFI->CreateFixedObject(4, 0, true, false);
+ int FI = MFI->CreateFixedObject(4, 0, true);
MBlazeFI->recordStoreVarArgsFI(FI, -(4+(StackLoc*4)));
SDValue PtrOff = DAG.getFrameIndex(FI, getPointerTy());
OutChains.push_back(DAG.getStore(Chain, dl, ArgValue, PtrOff, NULL, 0,
@@ -841,6 +836,7 @@ LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
SDValue MBlazeTargetLowering::
LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
DebugLoc dl, SelectionDAG &DAG) const {
// CCValAssign - represent the assignment of
// the return value to a location
@@ -869,7 +865,7 @@ LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
assert(VA.isRegLoc() && "Can only return in registers!");
Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
- Outs[i].Val, Flag);
+ OutVals[i], Flag);
// guarantee that all emitted copies are
// stuck together, avoiding something bad
diff --git a/lib/Target/MBlaze/MBlazeISelLowering.h b/lib/Target/MBlaze/MBlazeISelLowering.h
index 9f9ac899c6fc..5ec2563c555c 100644
--- a/lib/Target/MBlaze/MBlazeISelLowering.h
+++ b/lib/Target/MBlaze/MBlazeISelLowering.h
@@ -109,6 +109,7 @@ namespace llvm {
CallingConv::ID CallConv, bool isVarArg,
bool &isTailCall,
const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
const SmallVectorImpl<ISD::InputArg> &Ins,
DebugLoc dl, SelectionDAG &DAG,
SmallVectorImpl<SDValue> &InVals) const;
@@ -117,6 +118,7 @@ namespace llvm {
LowerReturn(SDValue Chain,
CallingConv::ID CallConv, bool isVarArg,
const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
DebugLoc dl, SelectionDAG &DAG) const;
virtual MachineBasicBlock *
diff --git a/lib/Target/MBlaze/MBlazeInstrInfo.cpp b/lib/Target/MBlaze/MBlazeInstrInfo.cpp
index 4c4d86bd206d..6ff5825a26b6 100644
--- a/lib/Target/MBlaze/MBlazeInstrInfo.cpp
+++ b/lib/Target/MBlaze/MBlazeInstrInfo.cpp
@@ -110,15 +110,13 @@ insertNoop(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI) const {
BuildMI(MBB, MI, DL, get(MBlaze::NOP));
}
-bool MBlazeInstrInfo::
-copyRegToReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
- unsigned DestReg, unsigned SrcReg,
- const TargetRegisterClass *DestRC,
- const TargetRegisterClass *SrcRC,
- DebugLoc DL) const {
+void MBlazeInstrInfo::
+copyPhysReg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I, DebugLoc DL,
+ unsigned DestReg, unsigned SrcReg,
+ bool KillSrc) const {
llvm::BuildMI(MBB, I, DL, get(MBlaze::ADD), DestReg)
- .addReg(SrcReg).addReg(MBlaze::R0);
- return true;
+ .addReg(SrcReg, getKillRegState(KillSrc)).addReg(MBlaze::R0);
}
void MBlazeInstrInfo::
@@ -141,54 +139,17 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
.addImm(0).addFrameIndex(FI);
}
-MachineInstr *MBlazeInstrInfo::
-foldMemoryOperandImpl(MachineFunction &MF,
- MachineInstr* MI,
- const SmallVectorImpl<unsigned> &Ops, int FI) const {
- if (Ops.size() != 1) return NULL;
-
- MachineInstr *NewMI = NULL;
-
- switch (MI->getOpcode()) {
- case MBlaze::OR:
- case MBlaze::ADD:
- if ((MI->getOperand(0).isReg()) &&
- (MI->getOperand(2).isReg()) &&
- (MI->getOperand(2).getReg() == MBlaze::R0) &&
- (MI->getOperand(1).isReg())) {
- if (Ops[0] == 0) { // COPY -> STORE
- unsigned SrcReg = MI->getOperand(1).getReg();
- bool isKill = MI->getOperand(1).isKill();
- bool isUndef = MI->getOperand(1).isUndef();
- NewMI = BuildMI(MF, MI->getDebugLoc(), get(MBlaze::SW))
- .addReg(SrcReg, getKillRegState(isKill) | getUndefRegState(isUndef))
- .addImm(0).addFrameIndex(FI);
- } else { // COPY -> LOAD
- unsigned DstReg = MI->getOperand(0).getReg();
- bool isDead = MI->getOperand(0).isDead();
- bool isUndef = MI->getOperand(0).isUndef();
- NewMI = BuildMI(MF, MI->getDebugLoc(), get(MBlaze::LW))
- .addReg(DstReg, RegState::Define | getDeadRegState(isDead) |
- getUndefRegState(isUndef))
- .addImm(0).addFrameIndex(FI);
- }
- }
- break;
- }
-
- return NewMI;
-}
-
//===----------------------------------------------------------------------===//
// Branch Analysis
//===----------------------------------------------------------------------===//
unsigned MBlazeInstrInfo::
InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
MachineBasicBlock *FBB,
- const SmallVectorImpl<MachineOperand> &Cond) const {
+ const SmallVectorImpl<MachineOperand> &Cond,
+ DebugLoc DL) const {
// Can only insert uncond branches so far.
assert(Cond.empty() && !FBB && TBB && "Can only handle uncond branches!");
- BuildMI(&MBB, DebugLoc(), get(MBlaze::BRI)).addMBB(TBB);
+ BuildMI(&MBB, DL, get(MBlaze::BRI)).addMBB(TBB);
return 1;
}
@@ -209,12 +170,8 @@ unsigned MBlazeInstrInfo::getGlobalBaseReg(MachineFunction *MF) const {
const TargetInstrInfo *TII = MF->getTarget().getInstrInfo();
GlobalBaseReg = RegInfo.createVirtualRegister(MBlaze::CPURegsRegisterClass);
- bool Ok = TII->copyRegToReg(FirstMBB, MBBI, GlobalBaseReg, MBlaze::R20,
- MBlaze::CPURegsRegisterClass,
- MBlaze::CPURegsRegisterClass,
- DebugLoc());
- assert(Ok && "Couldn't assign to global base register!");
- Ok = Ok; // Silence warning when assertions are turned off.
+ BuildMI(FirstMBB, MBBI, DebugLoc(), TII->get(TargetOpcode::COPY),
+ GlobalBaseReg).addReg(MBlaze::R20);
RegInfo.addLiveIn(MBlaze::R20);
MBlazeFI->setGlobalBaseReg(GlobalBaseReg);
diff --git a/lib/Target/MBlaze/MBlazeInstrInfo.h b/lib/Target/MBlaze/MBlazeInstrInfo.h
index c9fdc8877d68..f0743705f010 100644
--- a/lib/Target/MBlaze/MBlazeInstrInfo.h
+++ b/lib/Target/MBlaze/MBlazeInstrInfo.h
@@ -198,13 +198,12 @@ public:
/// Branch Analysis
virtual unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
MachineBasicBlock *FBB,
- const SmallVectorImpl<MachineOperand> &Cond) const;
- virtual bool copyRegToReg(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator I,
- unsigned DestReg, unsigned SrcReg,
- const TargetRegisterClass *DestRC,
- const TargetRegisterClass *SrcRC,
- DebugLoc DL) const;
+ const SmallVectorImpl<MachineOperand> &Cond,
+ DebugLoc DL) const;
+ virtual void copyPhysReg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I, DebugLoc DL,
+ unsigned DestReg, unsigned SrcReg,
+ bool KillSrc) const;
virtual void storeRegToStackSlot(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI,
unsigned SrcReg, bool isKill, int FrameIndex,
@@ -217,18 +216,6 @@ public:
const TargetRegisterClass *RC,
const TargetRegisterInfo *TRI) const;
- virtual MachineInstr* foldMemoryOperandImpl(MachineFunction &MF,
- MachineInstr* MI,
- const SmallVectorImpl<unsigned> &Ops,
- int FrameIndex) const;
-
- virtual MachineInstr* foldMemoryOperandImpl(MachineFunction &MF,
- MachineInstr* MI,
- const SmallVectorImpl<unsigned> &Ops,
- MachineInstr* LoadMI) const {
- return 0;
- }
-
/// Insert nop instruction when hazard condition is found
virtual void insertNoop(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI) const;
diff --git a/lib/Target/MBlaze/MBlazeRegisterInfo.cpp b/lib/Target/MBlaze/MBlazeRegisterInfo.cpp
index f15eea9507d6..8cafa8c519c6 100644
--- a/lib/Target/MBlaze/MBlazeRegisterInfo.cpp
+++ b/lib/Target/MBlaze/MBlazeRegisterInfo.cpp
@@ -148,22 +148,6 @@ getCalleeSavedRegs(const MachineFunction *MF) const {
return CalleeSavedRegs;
}
-/// MBlaze Callee Saved Register Classes
-const TargetRegisterClass* const* MBlazeRegisterInfo::
-getCalleeSavedRegClasses(const MachineFunction *MF) const {
- static const TargetRegisterClass * const CalleeSavedRC[] = {
- &MBlaze::CPURegsRegClass, &MBlaze::CPURegsRegClass,
- &MBlaze::CPURegsRegClass, &MBlaze::CPURegsRegClass,
- &MBlaze::CPURegsRegClass, &MBlaze::CPURegsRegClass,
- &MBlaze::CPURegsRegClass, &MBlaze::CPURegsRegClass,
- &MBlaze::CPURegsRegClass, &MBlaze::CPURegsRegClass,
- &MBlaze::CPURegsRegClass, &MBlaze::CPURegsRegClass,
- 0
- };
-
- return CalleeSavedRC;
-}
-
BitVector MBlazeRegisterInfo::
getReservedRegs(const MachineFunction &MF) const {
BitVector Reserved(getNumRegs());
diff --git a/lib/Target/MBlaze/MBlazeRegisterInfo.h b/lib/Target/MBlaze/MBlazeRegisterInfo.h
index b618bf4cf4c5..af97b0e2d79e 100644
--- a/lib/Target/MBlaze/MBlazeRegisterInfo.h
+++ b/lib/Target/MBlaze/MBlazeRegisterInfo.h
@@ -54,9 +54,6 @@ struct MBlazeRegisterInfo : public MBlazeGenRegisterInfo {
/// Code Generation virtual methods...
const unsigned *getCalleeSavedRegs(const MachineFunction* MF = 0) const;
- const TargetRegisterClass* const*
- getCalleeSavedRegClasses(const MachineFunction* MF = 0) const;
-
BitVector getReservedRegs(const MachineFunction &MF) const;
bool hasFP(const MachineFunction &MF) const;
diff --git a/lib/Target/MSIL/MSILWriter.cpp b/lib/Target/MSIL/MSILWriter.cpp
index 3de173cc26ce..8f97d255077f 100644
--- a/lib/Target/MSIL/MSILWriter.cpp
+++ b/lib/Target/MSIL/MSILWriter.cpp
@@ -808,7 +808,7 @@ void MSILWriter::printIntrinsicCall(const IntrinsicInst* Inst) {
std::string Name;
switch (Inst->getIntrinsicID()) {
case Intrinsic::vastart:
- Name = getValueName(Inst->getOperand(1));
+ Name = getValueName(Inst->getArgOperand(0));
Name.insert(Name.length()-1,"$valist");
// Obtain the argument handle.
printSimpleInstruction("ldloca",Name.c_str());
@@ -817,20 +817,20 @@ void MSILWriter::printIntrinsicCall(const IntrinsicInst* Inst) {
"instance void [mscorlib]System.ArgIterator::.ctor"
"(valuetype [mscorlib]System.RuntimeArgumentHandle)");
// Save as pointer type "void*"
- printValueLoad(Inst->getOperand(1));
+ printValueLoad(Inst->getArgOperand(0));
printSimpleInstruction("ldloca",Name.c_str());
printIndirectSave(PointerType::getUnqual(
IntegerType::get(Inst->getContext(), 8)));
break;
case Intrinsic::vaend:
// Close argument list handle.
- printIndirectLoad(Inst->getOperand(1));
+ printIndirectLoad(Inst->getArgOperand(0));
printSimpleInstruction("call","instance void [mscorlib]System.ArgIterator::End()");
break;
case Intrinsic::vacopy:
// Copy "ArgIterator" valuetype.
- printIndirectLoad(Inst->getOperand(1));
- printIndirectLoad(Inst->getOperand(2));
+ printIndirectLoad(Inst->getArgOperand(0));
+ printIndirectLoad(Inst->getArgOperand(1));
printSimpleInstruction("cpobj","[mscorlib]System.ArgIterator");
break;
default:
@@ -845,10 +845,11 @@ void MSILWriter::printCallInstruction(const Instruction* Inst) {
// Handle intrinsic function.
printIntrinsicCall(cast<IntrinsicInst>(Inst));
} else {
+ const CallInst *CI = cast<CallInst>(Inst);
// Load arguments to stack and call function.
- for (int I = 1, E = Inst->getNumOperands(); I!=E; ++I)
- printValueLoad(Inst->getOperand(I));
- printFunctionCall(Inst->getOperand(0),Inst);
+ for (int I = 0, E = CI->getNumArgOperands(); I!=E; ++I)
+ printValueLoad(CI->getArgOperand(I));
+ printFunctionCall(CI->getCalledFunction(), Inst);
}
}
@@ -1002,8 +1003,8 @@ void MSILWriter::printInvokeInstruction(const InvokeInst* Inst) {
std::string Label = "leave$normal_"+utostr(getUniqID());
Out << ".try {\n";
// Load arguments
- for (int I = 3, E = Inst->getNumOperands(); I!=E; ++I)
- printValueLoad(Inst->getOperand(I));
+ for (int I = 0, E = Inst->getNumArgOperands(); I!=E; ++I)
+ printValueLoad(Inst->getArgOperand(I));
// Print call instruction
printFunctionCall(Inst->getOperand(0),Inst);
// Save function result and leave "try" block
@@ -1280,7 +1281,7 @@ void MSILWriter::printLocalVariables(const Function& F) {
case Intrinsic::vaend:
case Intrinsic::vacopy:
isVaList = true;
- VaList = Inst->getOperand(1);
+ VaList = Inst->getArgOperand(0);
break;
default:
isVaList = false;
diff --git a/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp b/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp
index 7b328bb12c3d..3395e9fc3437 100644
--- a/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp
+++ b/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp
@@ -272,7 +272,8 @@ bool MSP430DAGToDAGISel::SelectAddr(SDNode *Op, SDValue N,
AM.Base.Reg;
if (AM.GV)
- Disp = CurDAG->getTargetGlobalAddress(AM.GV, MVT::i16, AM.Disp,
+ Disp = CurDAG->getTargetGlobalAddress(AM.GV, Op->getDebugLoc(),
+ MVT::i16, AM.Disp,
0/*AM.SymbolFlags*/);
else if (AM.CP)
Disp = CurDAG->getTargetConstantPool(AM.CP, MVT::i16,
diff --git a/lib/Target/MSP430/MSP430ISelLowering.cpp b/lib/Target/MSP430/MSP430ISelLowering.cpp
index 403400e35add..a1703a3e78bf 100644
--- a/lib/Target/MSP430/MSP430ISelLowering.cpp
+++ b/lib/Target/MSP430/MSP430ISelLowering.cpp
@@ -278,6 +278,7 @@ MSP430TargetLowering::LowerCall(SDValue Chain, SDValue Callee,
CallingConv::ID CallConv, bool isVarArg,
bool &isTailCall,
const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
const SmallVectorImpl<ISD::InputArg> &Ins,
DebugLoc dl, SelectionDAG &DAG,
SmallVectorImpl<SDValue> &InVals) const {
@@ -290,7 +291,7 @@ MSP430TargetLowering::LowerCall(SDValue Chain, SDValue Callee,
case CallingConv::Fast:
case CallingConv::C:
return LowerCCCCallTo(Chain, Callee, CallConv, isVarArg, isTailCall,
- Outs, Ins, dl, DAG, InVals);
+ Outs, OutVals, Ins, dl, DAG, InVals);
case CallingConv::MSP430_INTR:
report_fatal_error("ISRs cannot be called directly");
return SDValue();
@@ -369,7 +370,7 @@ MSP430TargetLowering::LowerCCCArguments(SDValue Chain,
<< "\n";
}
// Create the frame index object for this incoming parameter...
- int FI = MFI->CreateFixedObject(ObjSize, VA.getLocMemOffset(), true, false);
+ int FI = MFI->CreateFixedObject(ObjSize, VA.getLocMemOffset(), true);
// Create the SelectionDAG nodes corresponding to a load
//from this parameter
@@ -387,6 +388,7 @@ SDValue
MSP430TargetLowering::LowerReturn(SDValue Chain,
CallingConv::ID CallConv, bool isVarArg,
const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
DebugLoc dl, SelectionDAG &DAG) const {
// CCValAssign - represent the assignment of the return value to a location
@@ -421,7 +423,7 @@ MSP430TargetLowering::LowerReturn(SDValue Chain,
assert(VA.isRegLoc() && "Can only return in registers!");
Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
- Outs[i].Val, Flag);
+ OutVals[i], Flag);
// Guarantee that all emitted copies are stuck together,
// avoiding something bad.
@@ -447,6 +449,7 @@ MSP430TargetLowering::LowerCCCCallTo(SDValue Chain, SDValue Callee,
bool isTailCall,
const SmallVectorImpl<ISD::OutputArg>
&Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
const SmallVectorImpl<ISD::InputArg> &Ins,
DebugLoc dl, SelectionDAG &DAG,
SmallVectorImpl<SDValue> &InVals) const {
@@ -471,7 +474,7 @@ MSP430TargetLowering::LowerCCCCallTo(SDValue Chain, SDValue Callee,
for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
CCValAssign &VA = ArgLocs[i];
- SDValue Arg = Outs[i].Val;
+ SDValue Arg = OutVals[i];
// Promote the value if needed.
switch (VA.getLocInfo()) {
@@ -529,7 +532,7 @@ MSP430TargetLowering::LowerCCCCallTo(SDValue Chain, SDValue Callee,
// turn it into a TargetGlobalAddress node so that legalize doesn't hack it.
// Likewise ExternalSymbol -> TargetExternalSymbol.
if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee))
- Callee = DAG.getTargetGlobalAddress(G->getGlobal(), MVT::i16);
+ Callee = DAG.getTargetGlobalAddress(G->getGlobal(), dl, MVT::i16);
else if (ExternalSymbolSDNode *E = dyn_cast<ExternalSymbolSDNode>(Callee))
Callee = DAG.getTargetExternalSymbol(E->getSymbol(), MVT::i16);
@@ -642,7 +645,8 @@ SDValue MSP430TargetLowering::LowerGlobalAddress(SDValue Op,
int64_t Offset = cast<GlobalAddressSDNode>(Op)->getOffset();
// Create the TargetGlobalAddress node, folding in the constant offset.
- SDValue Result = DAG.getTargetGlobalAddress(GV, getPointerTy(), Offset);
+ SDValue Result = DAG.getTargetGlobalAddress(GV, Op.getDebugLoc(),
+ getPointerTy(), Offset);
return DAG.getNode(MSP430ISD::Wrapper, Op.getDebugLoc(),
getPointerTy(), Result);
}
@@ -888,7 +892,7 @@ MSP430TargetLowering::getReturnAddressFrameIndex(SelectionDAG &DAG) const {
// Set up a frame object for the return address.
uint64_t SlotSize = TD->getPointerSize();
ReturnAddrIndex = MF.getFrameInfo()->CreateFixedObject(SlotSize, -SlotSize,
- true, false);
+ true);
FuncInfo->setRAIndex(ReturnAddrIndex);
}
@@ -1070,7 +1074,10 @@ MSP430TargetLowering::EmitShiftInstr(MachineInstr *MI,
// Update machine-CFG edges by transferring all successors of the current
// block to the block containing instructions after shift.
- RemBB->transferSuccessors(BB);
+ RemBB->splice(RemBB->begin(), BB,
+ llvm::next(MachineBasicBlock::iterator(MI)),
+ BB->end());
+ RemBB->transferSuccessorsAndUpdatePHIs(BB);
// Add adges BB => LoopBB => RemBB, BB => RemBB, LoopBB => LoopBB
BB->addSuccessor(LoopBB);
@@ -1116,11 +1123,11 @@ MSP430TargetLowering::EmitShiftInstr(MachineInstr *MI,
// RemBB:
// DestReg = phi [%SrcReg, BB], [%ShiftReg, LoopBB]
- BuildMI(RemBB, dl, TII.get(MSP430::PHI), DstReg)
+ BuildMI(*RemBB, RemBB->begin(), dl, TII.get(MSP430::PHI), DstReg)
.addReg(SrcReg).addMBB(BB)
.addReg(ShiftReg2).addMBB(LoopBB);
- F->DeleteMachineInstr(MI); // The pseudo instruction is gone now.
+ MI->eraseFromParent(); // The pseudo instruction is gone now.
return RemBB;
}
@@ -1158,18 +1165,22 @@ MSP430TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
MachineFunction *F = BB->getParent();
MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB);
MachineBasicBlock *copy1MBB = F->CreateMachineBasicBlock(LLVM_BB);
- BuildMI(BB, dl, TII.get(MSP430::JCC))
- .addMBB(copy1MBB)
- .addImm(MI->getOperand(3).getImm());
F->insert(I, copy0MBB);
F->insert(I, copy1MBB);
// Update machine-CFG edges by transferring all successors of the current
// block to the new block which will contain the Phi node for the select.
- copy1MBB->transferSuccessors(BB);
+ copy1MBB->splice(copy1MBB->begin(), BB,
+ llvm::next(MachineBasicBlock::iterator(MI)),
+ BB->end());
+ copy1MBB->transferSuccessorsAndUpdatePHIs(BB);
// Next, add the true and fallthrough blocks as its successors.
BB->addSuccessor(copy0MBB);
BB->addSuccessor(copy1MBB);
+ BuildMI(BB, dl, TII.get(MSP430::JCC))
+ .addMBB(copy1MBB)
+ .addImm(MI->getOperand(3).getImm());
+
// copy0MBB:
// %FalseValue = ...
// # fallthrough to copy1MBB
@@ -1182,11 +1193,11 @@ MSP430TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
// %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ]
// ...
BB = copy1MBB;
- BuildMI(BB, dl, TII.get(MSP430::PHI),
+ BuildMI(*BB, BB->begin(), dl, TII.get(MSP430::PHI),
MI->getOperand(0).getReg())
.addReg(MI->getOperand(2).getReg()).addMBB(copy0MBB)
.addReg(MI->getOperand(1).getReg()).addMBB(thisMBB);
- F->DeleteMachineInstr(MI); // The pseudo instruction is gone now.
+ MI->eraseFromParent(); // The pseudo instruction is gone now.
return BB;
}
diff --git a/lib/Target/MSP430/MSP430ISelLowering.h b/lib/Target/MSP430/MSP430ISelLowering.h
index 01c5071622a7..673c5433b96e 100644
--- a/lib/Target/MSP430/MSP430ISelLowering.h
+++ b/lib/Target/MSP430/MSP430ISelLowering.h
@@ -127,6 +127,7 @@ namespace llvm {
CallingConv::ID CallConv, bool isVarArg,
bool isTailCall,
const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
const SmallVectorImpl<ISD::InputArg> &Ins,
DebugLoc dl, SelectionDAG &DAG,
SmallVectorImpl<SDValue> &InVals) const;
@@ -155,6 +156,7 @@ namespace llvm {
LowerCall(SDValue Chain, SDValue Callee,
CallingConv::ID CallConv, bool isVarArg, bool &isTailCall,
const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
const SmallVectorImpl<ISD::InputArg> &Ins,
DebugLoc dl, SelectionDAG &DAG,
SmallVectorImpl<SDValue> &InVals) const;
@@ -163,6 +165,7 @@ namespace llvm {
LowerReturn(SDValue Chain,
CallingConv::ID CallConv, bool isVarArg,
const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
DebugLoc dl, SelectionDAG &DAG) const;
virtual bool getPostIndexedAddressParts(SDNode *N, SDNode *Op,
diff --git a/lib/Target/MSP430/MSP430InstrInfo.cpp b/lib/Target/MSP430/MSP430InstrInfo.cpp
index 18226ab0f4ab..df28d07f5d71 100644
--- a/lib/Target/MSP430/MSP430InstrInfo.cpp
+++ b/lib/Target/MSP430/MSP430InstrInfo.cpp
@@ -83,27 +83,20 @@ void MSP430InstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
llvm_unreachable("Cannot store this register to stack slot!");
}
-bool MSP430InstrInfo::copyRegToReg(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator I,
- unsigned DestReg, unsigned SrcReg,
- const TargetRegisterClass *DestRC,
- const TargetRegisterClass *SrcRC,
- DebugLoc DL) const {
- if (DestRC == SrcRC) {
- unsigned Opc;
- if (DestRC == &MSP430::GR16RegClass) {
- Opc = MSP430::MOV16rr;
- } else if (DestRC == &MSP430::GR8RegClass) {
- Opc = MSP430::MOV8rr;
- } else {
- return false;
- }
-
- BuildMI(MBB, I, DL, get(Opc), DestReg).addReg(SrcReg);
- return true;
- }
+void MSP430InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I, DebugLoc DL,
+ unsigned DestReg, unsigned SrcReg,
+ bool KillSrc) const {
+ unsigned Opc;
+ if (MSP430::GR16RegClass.contains(DestReg, SrcReg))
+ Opc = MSP430::MOV16rr;
+ else if (MSP430::GR8RegClass.contains(DestReg, SrcReg))
+ Opc = MSP430::MOV8rr;
+ else
+ llvm_unreachable("Impossible reg-to-reg copy");
- return false;
+ BuildMI(MBB, I, DL, get(Opc), DestReg)
+ .addReg(SrcReg, getKillRegState(KillSrc));
}
bool
@@ -330,10 +323,8 @@ bool MSP430InstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
unsigned
MSP430InstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
MachineBasicBlock *FBB,
- const SmallVectorImpl<MachineOperand> &Cond) const {
- // FIXME this should probably have a DebugLoc operand
- DebugLoc DL;
-
+ const SmallVectorImpl<MachineOperand> &Cond,
+ DebugLoc DL) const {
// Shouldn't be a fall through.
assert(TBB && "InsertBranch must not be told to insert a fallthrough");
assert((Cond.size() == 1 || Cond.size() == 0) &&
diff --git a/lib/Target/MSP430/MSP430InstrInfo.h b/lib/Target/MSP430/MSP430InstrInfo.h
index 842b4cb06e41..ebbda1aeef51 100644
--- a/lib/Target/MSP430/MSP430InstrInfo.h
+++ b/lib/Target/MSP430/MSP430InstrInfo.h
@@ -49,11 +49,10 @@ public:
///
virtual const TargetRegisterInfo &getRegisterInfo() const { return RI; }
- bool copyRegToReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
- unsigned DestReg, unsigned SrcReg,
- const TargetRegisterClass *DestRC,
- const TargetRegisterClass *SrcRC,
- DebugLoc DL) const;
+ void copyPhysReg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I, DebugLoc DL,
+ unsigned DestReg, unsigned SrcReg,
+ bool KillSrc) const;
bool isMoveInstr(const MachineInstr& MI,
unsigned &SrcReg, unsigned &DstReg,
@@ -93,7 +92,8 @@ public:
unsigned RemoveBranch(MachineBasicBlock &MBB) const;
unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
MachineBasicBlock *FBB,
- const SmallVectorImpl<MachineOperand> &Cond) const;
+ const SmallVectorImpl<MachineOperand> &Cond,
+ DebugLoc DL) const;
};
diff --git a/lib/Target/MSP430/MSP430InstrInfo.td b/lib/Target/MSP430/MSP430InstrInfo.td
index 6b9a2f2f29f4..8792b2236855 100644
--- a/lib/Target/MSP430/MSP430InstrInfo.td
+++ b/lib/Target/MSP430/MSP430InstrInfo.td
@@ -25,13 +25,16 @@ class SDTCisI16<int OpNum> : SDTCisVT<OpNum, i16>;
def SDT_MSP430Call : SDTypeProfile<0, -1, [SDTCisVT<0, iPTR>]>;
def SDT_MSP430CallSeqStart : SDCallSeqStart<[SDTCisVT<0, i16>]>;
def SDT_MSP430CallSeqEnd : SDCallSeqEnd<[SDTCisVT<0, i16>, SDTCisVT<1, i16>]>;
-def SDT_MSP430Wrapper : SDTypeProfile<1, 1, [SDTCisSameAs<0, 1>, SDTCisPtrTy<0>]>;
+def SDT_MSP430Wrapper : SDTypeProfile<1, 1, [SDTCisSameAs<0, 1>,
+ SDTCisPtrTy<0>]>;
def SDT_MSP430Cmp : SDTypeProfile<0, 2, [SDTCisSameAs<0, 1>]>;
def SDT_MSP430BrCC : SDTypeProfile<0, 2, [SDTCisVT<0, OtherVT>,
SDTCisVT<1, i8>]>;
-def SDT_MSP430SelectCC : SDTypeProfile<1, 3, [SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2>,
+def SDT_MSP430SelectCC : SDTypeProfile<1, 3, [SDTCisSameAs<0, 1>,
+ SDTCisSameAs<1, 2>,
SDTCisVT<3, i8>]>;
-def SDT_MSP430Shift : SDTypeProfile<1, 2, [SDTCisSameAs<0, 1>, SDTCisI8<2>]>;
+def SDT_MSP430Shift : SDTypeProfile<1, 2, [SDTCisSameAs<0, 1>,
+ SDTCisI8<2>]>;
//===----------------------------------------------------------------------===//
// MSP430 Specific Node Definitions.
@@ -46,7 +49,7 @@ def MSP430rla : SDNode<"MSP430ISD::RLA", SDTIntUnaryOp, []>;
def MSP430rrc : SDNode<"MSP430ISD::RRC", SDTIntUnaryOp, []>;
def MSP430call : SDNode<"MSP430ISD::CALL", SDT_MSP430Call,
- [SDNPHasChain, SDNPOutFlag, SDNPOptInFlag]>;
+ [SDNPHasChain, SDNPOutFlag, SDNPOptInFlag, SDNPVariadic]>;
def MSP430callseq_start :
SDNode<"ISD::CALLSEQ_START", SDT_MSP430CallSeqStart,
[SDNPHasChain, SDNPOutFlag]>;
@@ -55,8 +58,10 @@ def MSP430callseq_end :
[SDNPHasChain, SDNPOptInFlag, SDNPOutFlag]>;
def MSP430Wrapper : SDNode<"MSP430ISD::Wrapper", SDT_MSP430Wrapper>;
def MSP430cmp : SDNode<"MSP430ISD::CMP", SDT_MSP430Cmp, [SDNPOutFlag]>;
-def MSP430brcc : SDNode<"MSP430ISD::BR_CC", SDT_MSP430BrCC, [SDNPHasChain, SDNPInFlag]>;
-def MSP430selectcc: SDNode<"MSP430ISD::SELECT_CC", SDT_MSP430SelectCC, [SDNPInFlag]>;
+def MSP430brcc : SDNode<"MSP430ISD::BR_CC", SDT_MSP430BrCC,
+ [SDNPHasChain, SDNPInFlag]>;
+def MSP430selectcc: SDNode<"MSP430ISD::SELECT_CC", SDT_MSP430SelectCC,
+ [SDNPInFlag]>;
def MSP430shl : SDNode<"MSP430ISD::SHL", SDT_MSP430Shift, []>;
def MSP430sra : SDNode<"MSP430ISD::SRA", SDT_MSP430Shift, []>;
def MSP430srl : SDNode<"MSP430ISD::SRL", SDT_MSP430Shift, []>;
@@ -117,14 +122,14 @@ def ADJCALLSTACKUP : Pseudo<(outs), (ins i16imm:$amt1, i16imm:$amt2),
}
let usesCustomInserter = 1 in {
- def Select8 : Pseudo<(outs GR8:$dst), (ins GR8:$src1, GR8:$src2, i8imm:$cc),
+ def Select8 : Pseudo<(outs GR8:$dst), (ins GR8:$src, GR8:$src2, i8imm:$cc),
"# Select8 PSEUDO",
[(set GR8:$dst,
- (MSP430selectcc GR8:$src1, GR8:$src2, imm:$cc))]>;
- def Select16 : Pseudo<(outs GR16:$dst), (ins GR16:$src1, GR16:$src2, i8imm:$cc),
+ (MSP430selectcc GR8:$src, GR8:$src2, imm:$cc))]>;
+ def Select16 : Pseudo<(outs GR16:$dst), (ins GR16:$src, GR16:$src2, i8imm:$cc),
"# Select16 PSEUDO",
[(set GR16:$dst,
- (MSP430selectcc GR16:$src1, GR16:$src2, imm:$cc))]>;
+ (MSP430selectcc GR16:$src, GR16:$src2, imm:$cc))]>;
let Defs = [SRW] in {
def Shl8 : Pseudo<(outs GR8:$dst), (ins GR8:$src, GR8:$cnt),
"# Shl8 PSEUDO",
@@ -330,60 +335,60 @@ def MOV16mm : I16mm<0x0,
//===----------------------------------------------------------------------===//
// Arithmetic Instructions
-let isTwoAddress = 1 in {
+let Constraints = "$src = $dst" in {
let Defs = [SRW] in {
let isCommutable = 1 in { // X = ADD Y, Z == X = ADD Z, Y
def ADD8rr : I8rr<0x0,
- (outs GR8:$dst), (ins GR8:$src1, GR8:$src2),
+ (outs GR8:$dst), (ins GR8:$src, GR8:$src2),
"add.b\t{$src2, $dst}",
- [(set GR8:$dst, (add GR8:$src1, GR8:$src2)),
+ [(set GR8:$dst, (add GR8:$src, GR8:$src2)),
(implicit SRW)]>;
def ADD16rr : I16rr<0x0,
- (outs GR16:$dst), (ins GR16:$src1, GR16:$src2),
+ (outs GR16:$dst), (ins GR16:$src, GR16:$src2),
"add.w\t{$src2, $dst}",
- [(set GR16:$dst, (add GR16:$src1, GR16:$src2)),
+ [(set GR16:$dst, (add GR16:$src, GR16:$src2)),
(implicit SRW)]>;
}
def ADD8rm : I8rm<0x0,
- (outs GR8:$dst), (ins GR8:$src1, memsrc:$src2),
+ (outs GR8:$dst), (ins GR8:$src, memsrc:$src2),
"add.b\t{$src2, $dst}",
- [(set GR8:$dst, (add GR8:$src1, (load addr:$src2))),
+ [(set GR8:$dst, (add GR8:$src, (load addr:$src2))),
(implicit SRW)]>;
def ADD16rm : I16rm<0x0,
- (outs GR16:$dst), (ins GR16:$src1, memsrc:$src2),
+ (outs GR16:$dst), (ins GR16:$src, memsrc:$src2),
"add.w\t{$src2, $dst}",
- [(set GR16:$dst, (add GR16:$src1, (load addr:$src2))),
+ [(set GR16:$dst, (add GR16:$src, (load addr:$src2))),
(implicit SRW)]>;
let mayLoad = 1, hasExtraDefRegAllocReq = 1,
-Constraints = "$base = $base_wb, $src1 = $dst" in {
+Constraints = "$base = $base_wb, $src = $dst" in {
def ADD8rm_POST : IForm8<0x0, DstReg, SrcPostInc, Size2Bytes,
(outs GR8:$dst, GR16:$base_wb),
- (ins GR8:$src1, GR16:$base),
+ (ins GR8:$src, GR16:$base),
"add.b\t{@$base+, $dst}", []>;
def ADD16rm_POST : IForm16<0x0, DstReg, SrcPostInc, Size2Bytes,
(outs GR16:$dst, GR16:$base_wb),
- (ins GR16:$src1, GR16:$base),
+ (ins GR16:$src, GR16:$base),
"add.w\t{@$base+, $dst}", []>;
}
def ADD8ri : I8ri<0x0,
- (outs GR8:$dst), (ins GR8:$src1, i8imm:$src2),
+ (outs GR8:$dst), (ins GR8:$src, i8imm:$src2),
"add.b\t{$src2, $dst}",
- [(set GR8:$dst, (add GR8:$src1, imm:$src2)),
+ [(set GR8:$dst, (add GR8:$src, imm:$src2)),
(implicit SRW)]>;
def ADD16ri : I16ri<0x0,
- (outs GR16:$dst), (ins GR16:$src1, i16imm:$src2),
+ (outs GR16:$dst), (ins GR16:$src, i16imm:$src2),
"add.w\t{$src2, $dst}",
- [(set GR16:$dst, (add GR16:$src1, imm:$src2)),
+ [(set GR16:$dst, (add GR16:$src, imm:$src2)),
(implicit SRW)]>;
-let isTwoAddress = 0 in {
+let Constraints = "" in {
def ADD8mr : I8mr<0x0,
(outs), (ins memdst:$dst, GR8:$src),
"add.b\t{$src, $dst}",
@@ -424,40 +429,40 @@ let Uses = [SRW] in {
let isCommutable = 1 in { // X = ADDC Y, Z == X = ADDC Z, Y
def ADC8rr : I8rr<0x0,
- (outs GR8:$dst), (ins GR8:$src1, GR8:$src2),
+ (outs GR8:$dst), (ins GR8:$src, GR8:$src2),
"addc.b\t{$src2, $dst}",
- [(set GR8:$dst, (adde GR8:$src1, GR8:$src2)),
+ [(set GR8:$dst, (adde GR8:$src, GR8:$src2)),
(implicit SRW)]>;
def ADC16rr : I16rr<0x0,
- (outs GR16:$dst), (ins GR16:$src1, GR16:$src2),
+ (outs GR16:$dst), (ins GR16:$src, GR16:$src2),
"addc.w\t{$src2, $dst}",
- [(set GR16:$dst, (adde GR16:$src1, GR16:$src2)),
+ [(set GR16:$dst, (adde GR16:$src, GR16:$src2)),
(implicit SRW)]>;
} // isCommutable
def ADC8ri : I8ri<0x0,
- (outs GR8:$dst), (ins GR8:$src1, i8imm:$src2),
+ (outs GR8:$dst), (ins GR8:$src, i8imm:$src2),
"addc.b\t{$src2, $dst}",
- [(set GR8:$dst, (adde GR8:$src1, imm:$src2)),
+ [(set GR8:$dst, (adde GR8:$src, imm:$src2)),
(implicit SRW)]>;
def ADC16ri : I16ri<0x0,
- (outs GR16:$dst), (ins GR16:$src1, i16imm:$src2),
+ (outs GR16:$dst), (ins GR16:$src, i16imm:$src2),
"addc.w\t{$src2, $dst}",
- [(set GR16:$dst, (adde GR16:$src1, imm:$src2)),
+ [(set GR16:$dst, (adde GR16:$src, imm:$src2)),
(implicit SRW)]>;
def ADC8rm : I8rm<0x0,
- (outs GR8:$dst), (ins GR8:$src1, memsrc:$src2),
+ (outs GR8:$dst), (ins GR8:$src, memsrc:$src2),
"addc.b\t{$src2, $dst}",
- [(set GR8:$dst, (adde GR8:$src1, (load addr:$src2))),
+ [(set GR8:$dst, (adde GR8:$src, (load addr:$src2))),
(implicit SRW)]>;
def ADC16rm : I16rm<0x0,
- (outs GR16:$dst), (ins GR16:$src1, memsrc:$src2),
+ (outs GR16:$dst), (ins GR16:$src, memsrc:$src2),
"addc.w\t{$src2, $dst}",
- [(set GR16:$dst, (adde GR16:$src1, (load addr:$src2))),
+ [(set GR16:$dst, (adde GR16:$src, (load addr:$src2))),
(implicit SRW)]>;
-let isTwoAddress = 0 in {
+let Constraints = "" in {
def ADC8mr : I8mr<0x0,
(outs), (ins memdst:$dst, GR8:$src),
"addc.b\t{$src, $dst}",
@@ -498,52 +503,52 @@ def ADC16mm : I8mm<0x0,
let isCommutable = 1 in { // X = AND Y, Z == X = AND Z, Y
def AND8rr : I8rr<0x0,
- (outs GR8:$dst), (ins GR8:$src1, GR8:$src2),
+ (outs GR8:$dst), (ins GR8:$src, GR8:$src2),
"and.b\t{$src2, $dst}",
- [(set GR8:$dst, (and GR8:$src1, GR8:$src2)),
+ [(set GR8:$dst, (and GR8:$src, GR8:$src2)),
(implicit SRW)]>;
def AND16rr : I16rr<0x0,
- (outs GR16:$dst), (ins GR16:$src1, GR16:$src2),
+ (outs GR16:$dst), (ins GR16:$src, GR16:$src2),
"and.w\t{$src2, $dst}",
- [(set GR16:$dst, (and GR16:$src1, GR16:$src2)),
+ [(set GR16:$dst, (and GR16:$src, GR16:$src2)),
(implicit SRW)]>;
}
def AND8ri : I8ri<0x0,
- (outs GR8:$dst), (ins GR8:$src1, i8imm:$src2),
+ (outs GR8:$dst), (ins GR8:$src, i8imm:$src2),
"and.b\t{$src2, $dst}",
- [(set GR8:$dst, (and GR8:$src1, imm:$src2)),
+ [(set GR8:$dst, (and GR8:$src, imm:$src2)),
(implicit SRW)]>;
def AND16ri : I16ri<0x0,
- (outs GR16:$dst), (ins GR16:$src1, i16imm:$src2),
+ (outs GR16:$dst), (ins GR16:$src, i16imm:$src2),
"and.w\t{$src2, $dst}",
- [(set GR16:$dst, (and GR16:$src1, imm:$src2)),
+ [(set GR16:$dst, (and GR16:$src, imm:$src2)),
(implicit SRW)]>;
def AND8rm : I8rm<0x0,
- (outs GR8:$dst), (ins GR8:$src1, memsrc:$src2),
+ (outs GR8:$dst), (ins GR8:$src, memsrc:$src2),
"and.b\t{$src2, $dst}",
- [(set GR8:$dst, (and GR8:$src1, (load addr:$src2))),
+ [(set GR8:$dst, (and GR8:$src, (load addr:$src2))),
(implicit SRW)]>;
def AND16rm : I16rm<0x0,
- (outs GR16:$dst), (ins GR16:$src1, memsrc:$src2),
+ (outs GR16:$dst), (ins GR16:$src, memsrc:$src2),
"and.w\t{$src2, $dst}",
- [(set GR16:$dst, (and GR16:$src1, (load addr:$src2))),
+ [(set GR16:$dst, (and GR16:$src, (load addr:$src2))),
(implicit SRW)]>;
let mayLoad = 1, hasExtraDefRegAllocReq = 1,
-Constraints = "$base = $base_wb, $src1 = $dst" in {
+Constraints = "$base = $base_wb, $src = $dst" in {
def AND8rm_POST : IForm8<0x0, DstReg, SrcPostInc, Size2Bytes,
(outs GR8:$dst, GR16:$base_wb),
- (ins GR8:$src1, GR16:$base),
+ (ins GR8:$src, GR16:$base),
"and.b\t{@$base+, $dst}", []>;
def AND16rm_POST : IForm16<0x0, DstReg, SrcPostInc, Size2Bytes,
(outs GR16:$dst, GR16:$base_wb),
- (ins GR16:$src1, GR16:$base),
+ (ins GR16:$src, GR16:$base),
"and.w\t{@$base+, $dst}", []>;
}
-let isTwoAddress = 0 in {
+let Constraints = "" in {
def AND8mr : I8mr<0x0,
(outs), (ins memdst:$dst, GR8:$src),
"and.b\t{$src, $dst}",
@@ -582,46 +587,46 @@ def AND16mm : I16mm<0x0,
let isCommutable = 1 in { // X = OR Y, Z == X = OR Z, Y
def OR8rr : I8rr<0x0,
- (outs GR8:$dst), (ins GR8:$src1, GR8:$src2),
+ (outs GR8:$dst), (ins GR8:$src, GR8:$src2),
"bis.b\t{$src2, $dst}",
- [(set GR8:$dst, (or GR8:$src1, GR8:$src2))]>;
+ [(set GR8:$dst, (or GR8:$src, GR8:$src2))]>;
def OR16rr : I16rr<0x0,
- (outs GR16:$dst), (ins GR16:$src1, GR16:$src2),
+ (outs GR16:$dst), (ins GR16:$src, GR16:$src2),
"bis.w\t{$src2, $dst}",
- [(set GR16:$dst, (or GR16:$src1, GR16:$src2))]>;
+ [(set GR16:$dst, (or GR16:$src, GR16:$src2))]>;
}
def OR8ri : I8ri<0x0,
- (outs GR8:$dst), (ins GR8:$src1, i8imm:$src2),
+ (outs GR8:$dst), (ins GR8:$src, i8imm:$src2),
"bis.b\t{$src2, $dst}",
- [(set GR8:$dst, (or GR8:$src1, imm:$src2))]>;
+ [(set GR8:$dst, (or GR8:$src, imm:$src2))]>;
def OR16ri : I16ri<0x0,
- (outs GR16:$dst), (ins GR16:$src1, i16imm:$src2),
+ (outs GR16:$dst), (ins GR16:$src, i16imm:$src2),
"bis.w\t{$src2, $dst}",
- [(set GR16:$dst, (or GR16:$src1, imm:$src2))]>;
+ [(set GR16:$dst, (or GR16:$src, imm:$src2))]>;
def OR8rm : I8rm<0x0,
- (outs GR8:$dst), (ins GR8:$src1, memsrc:$src2),
+ (outs GR8:$dst), (ins GR8:$src, memsrc:$src2),
"bis.b\t{$src2, $dst}",
- [(set GR8:$dst, (or GR8:$src1, (load addr:$src2)))]>;
+ [(set GR8:$dst, (or GR8:$src, (load addr:$src2)))]>;
def OR16rm : I16rm<0x0,
- (outs GR16:$dst), (ins GR16:$src1, memsrc:$src2),
+ (outs GR16:$dst), (ins GR16:$src, memsrc:$src2),
"bis.w\t{$src2, $dst}",
- [(set GR16:$dst, (or GR16:$src1, (load addr:$src2)))]>;
+ [(set GR16:$dst, (or GR16:$src, (load addr:$src2)))]>;
let mayLoad = 1, hasExtraDefRegAllocReq = 1,
-Constraints = "$base = $base_wb, $src1 = $dst" in {
+Constraints = "$base = $base_wb, $src = $dst" in {
def OR8rm_POST : IForm8<0x0, DstReg, SrcPostInc, Size2Bytes,
(outs GR8:$dst, GR16:$base_wb),
- (ins GR8:$src1, GR16:$base),
+ (ins GR8:$src, GR16:$base),
"bis.b\t{@$base+, $dst}", []>;
def OR16rm_POST : IForm16<0x0, DstReg, SrcPostInc, Size2Bytes,
(outs GR16:$dst, GR16:$base_wb),
- (ins GR16:$src1, GR16:$base),
+ (ins GR16:$src, GR16:$base),
"bis.w\t{@$base+, $dst}", []>;
}
-let isTwoAddress = 0 in {
+let Constraints = "" in {
def OR8mr : I8mr<0x0,
(outs), (ins memdst:$dst, GR8:$src),
"bis.b\t{$src, $dst}",
@@ -654,24 +659,24 @@ def OR16mm : I16mm<0x0,
// bic does not modify condition codes
def BIC8rr : I8rr<0x0,
- (outs GR8:$dst), (ins GR8:$src1, GR8:$src2),
+ (outs GR8:$dst), (ins GR8:$src, GR8:$src2),
"bic.b\t{$src2, $dst}",
- [(set GR8:$dst, (and GR8:$src1, (not GR8:$src2)))]>;
+ [(set GR8:$dst, (and GR8:$src, (not GR8:$src2)))]>;
def BIC16rr : I16rr<0x0,
- (outs GR16:$dst), (ins GR16:$src1, GR16:$src2),
+ (outs GR16:$dst), (ins GR16:$src, GR16:$src2),
"bic.w\t{$src2, $dst}",
- [(set GR16:$dst, (and GR16:$src1, (not GR16:$src2)))]>;
+ [(set GR16:$dst, (and GR16:$src, (not GR16:$src2)))]>;
def BIC8rm : I8rm<0x0,
- (outs GR8:$dst), (ins GR8:$src1, memsrc:$src2),
+ (outs GR8:$dst), (ins GR8:$src, memsrc:$src2),
"bic.b\t{$src2, $dst}",
- [(set GR8:$dst, (and GR8:$src1, (not (i8 (load addr:$src2)))))]>;
+ [(set GR8:$dst, (and GR8:$src, (not (i8 (load addr:$src2)))))]>;
def BIC16rm : I16rm<0x0,
- (outs GR16:$dst), (ins GR16:$src1, memsrc:$src2),
+ (outs GR16:$dst), (ins GR16:$src, memsrc:$src2),
"bic.w\t{$src2, $dst}",
- [(set GR16:$dst, (and GR16:$src1, (not (i16 (load addr:$src2)))))]>;
+ [(set GR16:$dst, (and GR16:$src, (not (i16 (load addr:$src2)))))]>;
-let isTwoAddress = 0 in {
+let Constraints = "" in {
def BIC8mr : I8mr<0x0,
(outs), (ins memdst:$dst, GR8:$src),
"bic.b\t{$src, $dst}",
@@ -695,52 +700,52 @@ def BIC16mm : I16mm<0x0,
let isCommutable = 1 in { // X = XOR Y, Z == X = XOR Z, Y
def XOR8rr : I8rr<0x0,
- (outs GR8:$dst), (ins GR8:$src1, GR8:$src2),
+ (outs GR8:$dst), (ins GR8:$src, GR8:$src2),
"xor.b\t{$src2, $dst}",
- [(set GR8:$dst, (xor GR8:$src1, GR8:$src2)),
+ [(set GR8:$dst, (xor GR8:$src, GR8:$src2)),
(implicit SRW)]>;
def XOR16rr : I16rr<0x0,
- (outs GR16:$dst), (ins GR16:$src1, GR16:$src2),
+ (outs GR16:$dst), (ins GR16:$src, GR16:$src2),
"xor.w\t{$src2, $dst}",
- [(set GR16:$dst, (xor GR16:$src1, GR16:$src2)),
+ [(set GR16:$dst, (xor GR16:$src, GR16:$src2)),
(implicit SRW)]>;
}
def XOR8ri : I8ri<0x0,
- (outs GR8:$dst), (ins GR8:$src1, i8imm:$src2),
+ (outs GR8:$dst), (ins GR8:$src, i8imm:$src2),
"xor.b\t{$src2, $dst}",
- [(set GR8:$dst, (xor GR8:$src1, imm:$src2)),
+ [(set GR8:$dst, (xor GR8:$src, imm:$src2)),
(implicit SRW)]>;
def XOR16ri : I16ri<0x0,
- (outs GR16:$dst), (ins GR16:$src1, i16imm:$src2),
+ (outs GR16:$dst), (ins GR16:$src, i16imm:$src2),
"xor.w\t{$src2, $dst}",
- [(set GR16:$dst, (xor GR16:$src1, imm:$src2)),
+ [(set GR16:$dst, (xor GR16:$src, imm:$src2)),
(implicit SRW)]>;
def XOR8rm : I8rm<0x0,
- (outs GR8:$dst), (ins GR8:$src1, memsrc:$src2),
+ (outs GR8:$dst), (ins GR8:$src, memsrc:$src2),
"xor.b\t{$src2, $dst}",
- [(set GR8:$dst, (xor GR8:$src1, (load addr:$src2))),
+ [(set GR8:$dst, (xor GR8:$src, (load addr:$src2))),
(implicit SRW)]>;
def XOR16rm : I16rm<0x0,
- (outs GR16:$dst), (ins GR16:$src1, memsrc:$src2),
+ (outs GR16:$dst), (ins GR16:$src, memsrc:$src2),
"xor.w\t{$src2, $dst}",
- [(set GR16:$dst, (xor GR16:$src1, (load addr:$src2))),
+ [(set GR16:$dst, (xor GR16:$src, (load addr:$src2))),
(implicit SRW)]>;
let mayLoad = 1, hasExtraDefRegAllocReq = 1,
-Constraints = "$base = $base_wb, $src1 = $dst" in {
+Constraints = "$base = $base_wb, $src = $dst" in {
def XOR8rm_POST : IForm8<0x0, DstReg, SrcPostInc, Size2Bytes,
(outs GR8:$dst, GR16:$base_wb),
- (ins GR8:$src1, GR16:$base),
+ (ins GR8:$src, GR16:$base),
"xor.b\t{@$base+, $dst}", []>;
def XOR16rm_POST : IForm16<0x0, DstReg, SrcPostInc, Size2Bytes,
(outs GR16:$dst, GR16:$base_wb),
- (ins GR16:$src1, GR16:$base),
+ (ins GR16:$src, GR16:$base),
"xor.w\t{@$base+, $dst}", []>;
}
-let isTwoAddress = 0 in {
+let Constraints = "" in {
def XOR8mr : I8mr<0x0,
(outs), (ins memdst:$dst, GR8:$src),
"xor.b\t{$src, $dst}",
@@ -777,51 +782,51 @@ def XOR16mm : I16mm<0x0,
def SUB8rr : I8rr<0x0,
- (outs GR8:$dst), (ins GR8:$src1, GR8:$src2),
+ (outs GR8:$dst), (ins GR8:$src, GR8:$src2),
"sub.b\t{$src2, $dst}",
- [(set GR8:$dst, (sub GR8:$src1, GR8:$src2)),
+ [(set GR8:$dst, (sub GR8:$src, GR8:$src2)),
(implicit SRW)]>;
def SUB16rr : I16rr<0x0,
- (outs GR16:$dst), (ins GR16:$src1, GR16:$src2),
+ (outs GR16:$dst), (ins GR16:$src, GR16:$src2),
"sub.w\t{$src2, $dst}",
- [(set GR16:$dst, (sub GR16:$src1, GR16:$src2)),
+ [(set GR16:$dst, (sub GR16:$src, GR16:$src2)),
(implicit SRW)]>;
def SUB8ri : I8ri<0x0,
- (outs GR8:$dst), (ins GR8:$src1, i8imm:$src2),
+ (outs GR8:$dst), (ins GR8:$src, i8imm:$src2),
"sub.b\t{$src2, $dst}",
- [(set GR8:$dst, (sub GR8:$src1, imm:$src2)),
+ [(set GR8:$dst, (sub GR8:$src, imm:$src2)),
(implicit SRW)]>;
def SUB16ri : I16ri<0x0,
- (outs GR16:$dst), (ins GR16:$src1, i16imm:$src2),
+ (outs GR16:$dst), (ins GR16:$src, i16imm:$src2),
"sub.w\t{$src2, $dst}",
- [(set GR16:$dst, (sub GR16:$src1, imm:$src2)),
+ [(set GR16:$dst, (sub GR16:$src, imm:$src2)),
(implicit SRW)]>;
def SUB8rm : I8rm<0x0,
- (outs GR8:$dst), (ins GR8:$src1, memsrc:$src2),
+ (outs GR8:$dst), (ins GR8:$src, memsrc:$src2),
"sub.b\t{$src2, $dst}",
- [(set GR8:$dst, (sub GR8:$src1, (load addr:$src2))),
+ [(set GR8:$dst, (sub GR8:$src, (load addr:$src2))),
(implicit SRW)]>;
def SUB16rm : I16rm<0x0,
- (outs GR16:$dst), (ins GR16:$src1, memsrc:$src2),
+ (outs GR16:$dst), (ins GR16:$src, memsrc:$src2),
"sub.w\t{$src2, $dst}",
- [(set GR16:$dst, (sub GR16:$src1, (load addr:$src2))),
+ [(set GR16:$dst, (sub GR16:$src, (load addr:$src2))),
(implicit SRW)]>;
let mayLoad = 1, hasExtraDefRegAllocReq = 1,
-Constraints = "$base = $base_wb, $src1 = $dst" in {
+Constraints = "$base = $base_wb, $src = $dst" in {
def SUB8rm_POST : IForm8<0x0, DstReg, SrcPostInc, Size2Bytes,
(outs GR8:$dst, GR16:$base_wb),
- (ins GR8:$src1, GR16:$base),
+ (ins GR8:$src, GR16:$base),
"sub.b\t{@$base+, $dst}", []>;
def SUB16rm_POST : IForm16<0x0, DstReg, SrcPostInc, Size2Bytes,
(outs GR16:$dst, GR16:$base_wb),
- (ins GR16:$src1, GR16:$base),
+ (ins GR16:$src, GR16:$base),
"sub.w\t{@$base+, $dst}", []>;
}
-let isTwoAddress = 0 in {
+let Constraints = "" in {
def SUB8mr : I8mr<0x0,
(outs), (ins memdst:$dst, GR8:$src),
"sub.b\t{$src, $dst}",
@@ -860,39 +865,39 @@ def SUB16mm : I16mm<0x0,
let Uses = [SRW] in {
def SBC8rr : I8rr<0x0,
- (outs GR8:$dst), (ins GR8:$src1, GR8:$src2),
+ (outs GR8:$dst), (ins GR8:$src, GR8:$src2),
"subc.b\t{$src2, $dst}",
- [(set GR8:$dst, (sube GR8:$src1, GR8:$src2)),
+ [(set GR8:$dst, (sube GR8:$src, GR8:$src2)),
(implicit SRW)]>;
def SBC16rr : I16rr<0x0,
- (outs GR16:$dst), (ins GR16:$src1, GR16:$src2),
+ (outs GR16:$dst), (ins GR16:$src, GR16:$src2),
"subc.w\t{$src2, $dst}",
- [(set GR16:$dst, (sube GR16:$src1, GR16:$src2)),
+ [(set GR16:$dst, (sube GR16:$src, GR16:$src2)),
(implicit SRW)]>;
def SBC8ri : I8ri<0x0,
- (outs GR8:$dst), (ins GR8:$src1, i8imm:$src2),
+ (outs GR8:$dst), (ins GR8:$src, i8imm:$src2),
"subc.b\t{$src2, $dst}",
- [(set GR8:$dst, (sube GR8:$src1, imm:$src2)),
+ [(set GR8:$dst, (sube GR8:$src, imm:$src2)),
(implicit SRW)]>;
def SBC16ri : I16ri<0x0,
- (outs GR16:$dst), (ins GR16:$src1, i16imm:$src2),
+ (outs GR16:$dst), (ins GR16:$src, i16imm:$src2),
"subc.w\t{$src2, $dst}",
- [(set GR16:$dst, (sube GR16:$src1, imm:$src2)),
+ [(set GR16:$dst, (sube GR16:$src, imm:$src2)),
(implicit SRW)]>;
def SBC8rm : I8rm<0x0,
- (outs GR8:$dst), (ins GR8:$src1, memsrc:$src2),
+ (outs GR8:$dst), (ins GR8:$src, memsrc:$src2),
"subc.b\t{$src2, $dst}",
- [(set GR8:$dst, (sube GR8:$src1, (load addr:$src2))),
+ [(set GR8:$dst, (sube GR8:$src, (load addr:$src2))),
(implicit SRW)]>;
def SBC16rm : I16rm<0x0,
- (outs GR16:$dst), (ins GR16:$src1, memsrc:$src2),
+ (outs GR16:$dst), (ins GR16:$src, memsrc:$src2),
"subc.w\t{$src2, $dst}",
- [(set GR16:$dst, (sube GR16:$src1, (load addr:$src2))),
+ [(set GR16:$dst, (sube GR16:$src, (load addr:$src2))),
(implicit SRW)]>;
-let isTwoAddress = 0 in {
+let Constraints = "" in {
def SBC8mr : I8mr<0x0,
(outs), (ins memdst:$dst, GR8:$src),
"subc.b\t{$src, $dst}",
@@ -985,59 +990,59 @@ def SWPB16r : II16r<0x0,
"swpb\t$dst",
[(set GR16:$dst, (bswap GR16:$src))]>;
-} // isTwoAddress = 1
+} // Constraints = "$src = $dst"
// Integer comparisons
let Defs = [SRW] in {
def CMP8rr : I8rr<0x0,
- (outs), (ins GR8:$src1, GR8:$src2),
- "cmp.b\t{$src2, $src1}",
- [(MSP430cmp GR8:$src1, GR8:$src2), (implicit SRW)]>;
+ (outs), (ins GR8:$src, GR8:$src2),
+ "cmp.b\t{$src2, $src}",
+ [(MSP430cmp GR8:$src, GR8:$src2), (implicit SRW)]>;
def CMP16rr : I16rr<0x0,
- (outs), (ins GR16:$src1, GR16:$src2),
- "cmp.w\t{$src2, $src1}",
- [(MSP430cmp GR16:$src1, GR16:$src2), (implicit SRW)]>;
+ (outs), (ins GR16:$src, GR16:$src2),
+ "cmp.w\t{$src2, $src}",
+ [(MSP430cmp GR16:$src, GR16:$src2), (implicit SRW)]>;
def CMP8ri : I8ri<0x0,
- (outs), (ins GR8:$src1, i8imm:$src2),
- "cmp.b\t{$src2, $src1}",
- [(MSP430cmp GR8:$src1, imm:$src2), (implicit SRW)]>;
+ (outs), (ins GR8:$src, i8imm:$src2),
+ "cmp.b\t{$src2, $src}",
+ [(MSP430cmp GR8:$src, imm:$src2), (implicit SRW)]>;
def CMP16ri : I16ri<0x0,
- (outs), (ins GR16:$src1, i16imm:$src2),
- "cmp.w\t{$src2, $src1}",
- [(MSP430cmp GR16:$src1, imm:$src2), (implicit SRW)]>;
+ (outs), (ins GR16:$src, i16imm:$src2),
+ "cmp.w\t{$src2, $src}",
+ [(MSP430cmp GR16:$src, imm:$src2), (implicit SRW)]>;
def CMP8mi : I8mi<0x0,
- (outs), (ins memsrc:$src1, i8imm:$src2),
- "cmp.b\t{$src2, $src1}",
- [(MSP430cmp (load addr:$src1),
+ (outs), (ins memsrc:$src, i8imm:$src2),
+ "cmp.b\t{$src2, $src}",
+ [(MSP430cmp (load addr:$src),
(i8 imm:$src2)), (implicit SRW)]>;
def CMP16mi : I16mi<0x0,
- (outs), (ins memsrc:$src1, i16imm:$src2),
- "cmp.w\t{$src2, $src1}",
- [(MSP430cmp (load addr:$src1),
+ (outs), (ins memsrc:$src, i16imm:$src2),
+ "cmp.w\t{$src2, $src}",
+ [(MSP430cmp (load addr:$src),
(i16 imm:$src2)), (implicit SRW)]>;
def CMP8rm : I8rm<0x0,
- (outs), (ins GR8:$src1, memsrc:$src2),
- "cmp.b\t{$src2, $src1}",
- [(MSP430cmp GR8:$src1, (load addr:$src2)),
+ (outs), (ins GR8:$src, memsrc:$src2),
+ "cmp.b\t{$src2, $src}",
+ [(MSP430cmp GR8:$src, (load addr:$src2)),
(implicit SRW)]>;
def CMP16rm : I16rm<0x0,
- (outs), (ins GR16:$src1, memsrc:$src2),
- "cmp.w\t{$src2, $src1}",
- [(MSP430cmp GR16:$src1, (load addr:$src2)),
+ (outs), (ins GR16:$src, memsrc:$src2),
+ "cmp.w\t{$src2, $src}",
+ [(MSP430cmp GR16:$src, (load addr:$src2)),
(implicit SRW)]>;
def CMP8mr : I8mr<0x0,
- (outs), (ins memsrc:$src1, GR8:$src2),
- "cmp.b\t{$src2, $src1}",
- [(MSP430cmp (load addr:$src1), GR8:$src2),
+ (outs), (ins memsrc:$src, GR8:$src2),
+ "cmp.b\t{$src2, $src}",
+ [(MSP430cmp (load addr:$src), GR8:$src2),
(implicit SRW)]>;
def CMP16mr : I16mr<0x0,
- (outs), (ins memsrc:$src1, GR16:$src2),
- "cmp.w\t{$src2, $src1}",
- [(MSP430cmp (load addr:$src1), GR16:$src2),
+ (outs), (ins memsrc:$src, GR16:$src2),
+ "cmp.w\t{$src2, $src}",
+ [(MSP430cmp (load addr:$src), GR16:$src2),
(implicit SRW)]>;
@@ -1045,71 +1050,71 @@ def CMP16mr : I16mr<0x0,
// Note that the C condition is set differently than when using CMP.
let isCommutable = 1 in {
def BIT8rr : I8rr<0x0,
- (outs), (ins GR8:$src1, GR8:$src2),
- "bit.b\t{$src2, $src1}",
- [(MSP430cmp (and_su GR8:$src1, GR8:$src2), 0),
+ (outs), (ins GR8:$src, GR8:$src2),
+ "bit.b\t{$src2, $src}",
+ [(MSP430cmp (and_su GR8:$src, GR8:$src2), 0),
(implicit SRW)]>;
def BIT16rr : I16rr<0x0,
- (outs), (ins GR16:$src1, GR16:$src2),
- "bit.w\t{$src2, $src1}",
- [(MSP430cmp (and_su GR16:$src1, GR16:$src2), 0),
+ (outs), (ins GR16:$src, GR16:$src2),
+ "bit.w\t{$src2, $src}",
+ [(MSP430cmp (and_su GR16:$src, GR16:$src2), 0),
(implicit SRW)]>;
}
def BIT8ri : I8ri<0x0,
- (outs), (ins GR8:$src1, i8imm:$src2),
- "bit.b\t{$src2, $src1}",
- [(MSP430cmp (and_su GR8:$src1, imm:$src2), 0),
+ (outs), (ins GR8:$src, i8imm:$src2),
+ "bit.b\t{$src2, $src}",
+ [(MSP430cmp (and_su GR8:$src, imm:$src2), 0),
(implicit SRW)]>;
def BIT16ri : I16ri<0x0,
- (outs), (ins GR16:$src1, i16imm:$src2),
- "bit.w\t{$src2, $src1}",
- [(MSP430cmp (and_su GR16:$src1, imm:$src2), 0),
+ (outs), (ins GR16:$src, i16imm:$src2),
+ "bit.w\t{$src2, $src}",
+ [(MSP430cmp (and_su GR16:$src, imm:$src2), 0),
(implicit SRW)]>;
def BIT8rm : I8rm<0x0,
- (outs), (ins GR8:$src1, memdst:$src2),
- "bit.b\t{$src2, $src1}",
- [(MSP430cmp (and_su GR8:$src1, (load addr:$src2)), 0),
+ (outs), (ins GR8:$src, memdst:$src2),
+ "bit.b\t{$src2, $src}",
+ [(MSP430cmp (and_su GR8:$src, (load addr:$src2)), 0),
(implicit SRW)]>;
def BIT16rm : I16rm<0x0,
- (outs), (ins GR16:$src1, memdst:$src2),
- "bit.w\t{$src2, $src1}",
- [(MSP430cmp (and_su GR16:$src1, (load addr:$src2)), 0),
+ (outs), (ins GR16:$src, memdst:$src2),
+ "bit.w\t{$src2, $src}",
+ [(MSP430cmp (and_su GR16:$src, (load addr:$src2)), 0),
(implicit SRW)]>;
def BIT8mr : I8mr<0x0,
- (outs), (ins memsrc:$src1, GR8:$src2),
- "bit.b\t{$src2, $src1}",
- [(MSP430cmp (and_su (load addr:$src1), GR8:$src2), 0),
+ (outs), (ins memsrc:$src, GR8:$src2),
+ "bit.b\t{$src2, $src}",
+ [(MSP430cmp (and_su (load addr:$src), GR8:$src2), 0),
(implicit SRW)]>;
def BIT16mr : I16mr<0x0,
- (outs), (ins memsrc:$src1, GR16:$src2),
- "bit.w\t{$src2, $src1}",
- [(MSP430cmp (and_su (load addr:$src1), GR16:$src2), 0),
+ (outs), (ins memsrc:$src, GR16:$src2),
+ "bit.w\t{$src2, $src}",
+ [(MSP430cmp (and_su (load addr:$src), GR16:$src2), 0),
(implicit SRW)]>;
def BIT8mi : I8mi<0x0,
- (outs), (ins memsrc:$src1, i8imm:$src2),
- "bit.b\t{$src2, $src1}",
- [(MSP430cmp (and_su (load addr:$src1), (i8 imm:$src2)), 0),
+ (outs), (ins memsrc:$src, i8imm:$src2),
+ "bit.b\t{$src2, $src}",
+ [(MSP430cmp (and_su (load addr:$src), (i8 imm:$src2)), 0),
(implicit SRW)]>;
def BIT16mi : I16mi<0x0,
- (outs), (ins memsrc:$src1, i16imm:$src2),
- "bit.w\t{$src2, $src1}",
- [(MSP430cmp (and_su (load addr:$src1), (i16 imm:$src2)), 0),
+ (outs), (ins memsrc:$src, i16imm:$src2),
+ "bit.w\t{$src2, $src}",
+ [(MSP430cmp (and_su (load addr:$src), (i16 imm:$src2)), 0),
(implicit SRW)]>;
def BIT8mm : I8mm<0x0,
- (outs), (ins memsrc:$src1, memsrc:$src2),
- "bit.b\t{$src2, $src1}",
- [(MSP430cmp (and_su (i8 (load addr:$src1)),
+ (outs), (ins memsrc:$src, memsrc:$src2),
+ "bit.b\t{$src2, $src}",
+ [(MSP430cmp (and_su (i8 (load addr:$src)),
(load addr:$src2)),
0),
(implicit SRW)]>;
def BIT16mm : I16mm<0x0,
- (outs), (ins memsrc:$src1, memsrc:$src2),
- "bit.w\t{$src2, $src1}",
- [(MSP430cmp (and_su (i16 (load addr:$src1)),
+ (outs), (ins memsrc:$src, memsrc:$src2),
+ "bit.w\t{$src2, $src}",
+ [(MSP430cmp (and_su (i16 (load addr:$src)),
(load addr:$src2)),
0),
(implicit SRW)]>;
@@ -1134,12 +1139,12 @@ def : Pat<(i16 (MSP430Wrapper tglobaladdr:$dst)), (MOV16ri tglobaladdr:$dst)>;
def : Pat<(i16 (MSP430Wrapper texternalsym:$dst)), (MOV16ri texternalsym:$dst)>;
def : Pat<(i16 (MSP430Wrapper tblockaddress:$dst)), (MOV16ri tblockaddress:$dst)>;
-def : Pat<(add GR16:$src1, (MSP430Wrapper tglobaladdr :$src2)),
- (ADD16ri GR16:$src1, tglobaladdr:$src2)>;
-def : Pat<(add GR16:$src1, (MSP430Wrapper texternalsym:$src2)),
- (ADD16ri GR16:$src1, texternalsym:$src2)>;
-def : Pat<(add GR16:$src1, (MSP430Wrapper tblockaddress:$src2)),
- (ADD16ri GR16:$src1, tblockaddress:$src2)>;
+def : Pat<(add GR16:$src, (MSP430Wrapper tglobaladdr :$src2)),
+ (ADD16ri GR16:$src, tglobaladdr:$src2)>;
+def : Pat<(add GR16:$src, (MSP430Wrapper texternalsym:$src2)),
+ (ADD16ri GR16:$src, texternalsym:$src2)>;
+def : Pat<(add GR16:$src, (MSP430Wrapper tblockaddress:$src2)),
+ (ADD16ri GR16:$src, tblockaddress:$src2)>;
def : Pat<(store (i16 (MSP430Wrapper tglobaladdr:$src)), addr:$dst),
(MOV16mi addr:$dst, tglobaladdr:$src)>;
@@ -1155,45 +1160,45 @@ def : Pat<(MSP430call (i16 texternalsym:$dst)),
(CALLi texternalsym:$dst)>;
// add and sub always produce carry
-def : Pat<(addc GR16:$src1, GR16:$src2),
- (ADD16rr GR16:$src1, GR16:$src2)>;
-def : Pat<(addc GR16:$src1, (load addr:$src2)),
- (ADD16rm GR16:$src1, addr:$src2)>;
-def : Pat<(addc GR16:$src1, imm:$src2),
- (ADD16ri GR16:$src1, imm:$src2)>;
+def : Pat<(addc GR16:$src, GR16:$src2),
+ (ADD16rr GR16:$src, GR16:$src2)>;
+def : Pat<(addc GR16:$src, (load addr:$src2)),
+ (ADD16rm GR16:$src, addr:$src2)>;
+def : Pat<(addc GR16:$src, imm:$src2),
+ (ADD16ri GR16:$src, imm:$src2)>;
def : Pat<(store (addc (load addr:$dst), GR16:$src), addr:$dst),
(ADD16mr addr:$dst, GR16:$src)>;
def : Pat<(store (addc (load addr:$dst), (i16 (load addr:$src))), addr:$dst),
(ADD16mm addr:$dst, addr:$src)>;
-def : Pat<(addc GR8:$src1, GR8:$src2),
- (ADD8rr GR8:$src1, GR8:$src2)>;
-def : Pat<(addc GR8:$src1, (load addr:$src2)),
- (ADD8rm GR8:$src1, addr:$src2)>;
-def : Pat<(addc GR8:$src1, imm:$src2),
- (ADD8ri GR8:$src1, imm:$src2)>;
+def : Pat<(addc GR8:$src, GR8:$src2),
+ (ADD8rr GR8:$src, GR8:$src2)>;
+def : Pat<(addc GR8:$src, (load addr:$src2)),
+ (ADD8rm GR8:$src, addr:$src2)>;
+def : Pat<(addc GR8:$src, imm:$src2),
+ (ADD8ri GR8:$src, imm:$src2)>;
def : Pat<(store (addc (load addr:$dst), GR8:$src), addr:$dst),
(ADD8mr addr:$dst, GR8:$src)>;
def : Pat<(store (addc (load addr:$dst), (i8 (load addr:$src))), addr:$dst),
(ADD8mm addr:$dst, addr:$src)>;
-def : Pat<(subc GR16:$src1, GR16:$src2),
- (SUB16rr GR16:$src1, GR16:$src2)>;
-def : Pat<(subc GR16:$src1, (load addr:$src2)),
- (SUB16rm GR16:$src1, addr:$src2)>;
-def : Pat<(subc GR16:$src1, imm:$src2),
- (SUB16ri GR16:$src1, imm:$src2)>;
+def : Pat<(subc GR16:$src, GR16:$src2),
+ (SUB16rr GR16:$src, GR16:$src2)>;
+def : Pat<(subc GR16:$src, (load addr:$src2)),
+ (SUB16rm GR16:$src, addr:$src2)>;
+def : Pat<(subc GR16:$src, imm:$src2),
+ (SUB16ri GR16:$src, imm:$src2)>;
def : Pat<(store (subc (load addr:$dst), GR16:$src), addr:$dst),
(SUB16mr addr:$dst, GR16:$src)>;
def : Pat<(store (subc (load addr:$dst), (i16 (load addr:$src))), addr:$dst),
(SUB16mm addr:$dst, addr:$src)>;
-def : Pat<(subc GR8:$src1, GR8:$src2),
- (SUB8rr GR8:$src1, GR8:$src2)>;
-def : Pat<(subc GR8:$src1, (load addr:$src2)),
- (SUB8rm GR8:$src1, addr:$src2)>;
-def : Pat<(subc GR8:$src1, imm:$src2),
- (SUB8ri GR8:$src1, imm:$src2)>;
+def : Pat<(subc GR8:$src, GR8:$src2),
+ (SUB8rr GR8:$src, GR8:$src2)>;
+def : Pat<(subc GR8:$src, (load addr:$src2)),
+ (SUB8rm GR8:$src, addr:$src2)>;
+def : Pat<(subc GR8:$src, imm:$src2),
+ (SUB8ri GR8:$src, imm:$src2)>;
def : Pat<(store (subc (load addr:$dst), GR8:$src), addr:$dst),
(SUB8mr addr:$dst, GR8:$src)>;
def : Pat<(store (subc (load addr:$dst), (i8 (load addr:$src))), addr:$dst),
@@ -1201,6 +1206,6 @@ def : Pat<(store (subc (load addr:$dst), (i8 (load addr:$src))), addr:$dst),
// peephole patterns
def : Pat<(and GR16:$src, 255), (ZEXT16r GR16:$src)>;
-def : Pat<(MSP430cmp (trunc (and_su GR16:$src1, GR16:$src2)), 0),
- (BIT8rr (EXTRACT_SUBREG GR16:$src1, subreg_8bit),
+def : Pat<(MSP430cmp (trunc (and_su GR16:$src, GR16:$src2)), 0),
+ (BIT8rr (EXTRACT_SUBREG GR16:$src, subreg_8bit),
(EXTRACT_SUBREG GR16:$src2, subreg_8bit))>;
diff --git a/lib/Target/MSP430/MSP430RegisterInfo.cpp b/lib/Target/MSP430/MSP430RegisterInfo.cpp
index 0cae26714bfc..608ca49fcf78 100644
--- a/lib/Target/MSP430/MSP430RegisterInfo.cpp
+++ b/lib/Target/MSP430/MSP430RegisterInfo.cpp
@@ -71,48 +71,6 @@ MSP430RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
}
-const TargetRegisterClass *const *
-MSP430RegisterInfo::getCalleeSavedRegClasses(const MachineFunction *MF) const {
- const Function* F = MF->getFunction();
- static const TargetRegisterClass * const CalleeSavedRegClasses[] = {
- &MSP430::GR16RegClass, &MSP430::GR16RegClass,
- &MSP430::GR16RegClass, &MSP430::GR16RegClass,
- &MSP430::GR16RegClass, &MSP430::GR16RegClass,
- &MSP430::GR16RegClass, &MSP430::GR16RegClass,
- 0
- };
- static const TargetRegisterClass * const CalleeSavedRegClassesFP[] = {
- &MSP430::GR16RegClass, &MSP430::GR16RegClass,
- &MSP430::GR16RegClass, &MSP430::GR16RegClass,
- &MSP430::GR16RegClass, &MSP430::GR16RegClass,
- &MSP430::GR16RegClass, 0
- };
- static const TargetRegisterClass * const CalleeSavedRegClassesIntr[] = {
- &MSP430::GR16RegClass, &MSP430::GR16RegClass,
- &MSP430::GR16RegClass, &MSP430::GR16RegClass,
- &MSP430::GR16RegClass, &MSP430::GR16RegClass,
- &MSP430::GR16RegClass, &MSP430::GR16RegClass,
- &MSP430::GR16RegClass, &MSP430::GR16RegClass,
- &MSP430::GR16RegClass, &MSP430::GR16RegClass,
- 0
- };
- static const TargetRegisterClass * const CalleeSavedRegClassesIntrFP[] = {
- &MSP430::GR16RegClass, &MSP430::GR16RegClass,
- &MSP430::GR16RegClass, &MSP430::GR16RegClass,
- &MSP430::GR16RegClass, &MSP430::GR16RegClass,
- &MSP430::GR16RegClass, &MSP430::GR16RegClass,
- &MSP430::GR16RegClass, &MSP430::GR16RegClass,
- &MSP430::GR16RegClass, 0
- };
-
- if (hasFP(*MF))
- return (F->getCallingConv() == CallingConv::MSP430_INTR ?
- CalleeSavedRegClassesIntrFP : CalleeSavedRegClassesFP);
- else
- return (F->getCallingConv() == CallingConv::MSP430_INTR ?
- CalleeSavedRegClassesIntr : CalleeSavedRegClasses);
-}
-
BitVector MSP430RegisterInfo::getReservedRegs(const MachineFunction &MF) const {
BitVector Reserved(getNumRegs());
@@ -270,8 +228,8 @@ MSP430RegisterInfo::processFunctionBeforeFrameFinalized(MachineFunction &MF)
const {
// Create a frame entry for the FPW register that must be saved.
if (hasFP(MF)) {
- int ATTRIBUTE_UNUSED FrameIdx =
- MF.getFrameInfo()->CreateFixedObject(2, -4, true, false);
+ int FrameIdx = MF.getFrameInfo()->CreateFixedObject(2, -4, true);
+ (void)FrameIdx;
assert(FrameIdx == MF.getFrameInfo()->getObjectIndexBegin() &&
"Slot for FPW register must be last in order to be found!");
}
diff --git a/lib/Target/MSP430/MSP430RegisterInfo.h b/lib/Target/MSP430/MSP430RegisterInfo.h
index c8684dfdb041..6e58d3116d27 100644
--- a/lib/Target/MSP430/MSP430RegisterInfo.h
+++ b/lib/Target/MSP430/MSP430RegisterInfo.h
@@ -36,9 +36,6 @@ public:
/// Code Generation virtual methods...
const unsigned *getCalleeSavedRegs(const MachineFunction *MF = 0) const;
- const TargetRegisterClass* const*
- getCalleeSavedRegClasses(const MachineFunction *MF = 0) const;
-
BitVector getReservedRegs(const MachineFunction &MF) const;
const TargetRegisterClass* getPointerRegClass(unsigned Kind = 0) const;
diff --git a/lib/Target/Mangler.cpp b/lib/Target/Mangler.cpp
index 4ef017ab9295..2037a9114559 100644
--- a/lib/Target/Mangler.cpp
+++ b/lib/Target/Mangler.cpp
@@ -180,7 +180,7 @@ void Mangler::getNameWithPrefix(SmallVectorImpl<char> &OutName,
ManglerPrefixTy PrefixTy = Mangler::Default;
if (GV->hasPrivateLinkage() || isImplicitlyPrivate)
PrefixTy = Mangler::Private;
- else if (GV->hasLinkerPrivateLinkage())
+ else if (GV->hasLinkerPrivateLinkage() || GV->hasLinkerPrivateWeakLinkage())
PrefixTy = Mangler::LinkerPrivate;
// If this global has a name, handle it simply.
diff --git a/lib/Target/Mips/AsmPrinter/MipsAsmPrinter.cpp b/lib/Target/Mips/AsmPrinter/MipsAsmPrinter.cpp
index 4d7fe4cc8262..8ae05b75e919 100644
--- a/lib/Target/Mips/AsmPrinter/MipsAsmPrinter.cpp
+++ b/lib/Target/Mips/AsmPrinter/MipsAsmPrinter.cpp
@@ -133,8 +133,9 @@ void MipsAsmPrinter::printSavedRegsBitmask(raw_ostream &O) {
const MachineFrameInfo *MFI = MF->getFrameInfo();
const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo();
for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
- unsigned RegNum = MipsRegisterInfo::getRegisterNumbering(CSI[i].getReg());
- if (CSI[i].getRegClass() == Mips::CPURegsRegisterClass)
+ unsigned Reg = CSI[i].getReg();
+ unsigned RegNum = MipsRegisterInfo::getRegisterNumbering(Reg);
+ if (Mips::CPURegsRegisterClass->contains(Reg))
CPUBitmask |= (1 << RegNum);
else
FPUBitmask |= (1 << RegNum);
diff --git a/lib/Target/Mips/MipsISelLowering.cpp b/lib/Target/Mips/MipsISelLowering.cpp
index e979c3fe69fc..b6ff2c371d5c 100644
--- a/lib/Target/Mips/MipsISelLowering.cpp
+++ b/lib/Target/Mips/MipsISelLowering.cpp
@@ -284,6 +284,18 @@ MipsTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
MachineFunction *F = BB->getParent();
MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB);
MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
+ F->insert(It, copy0MBB);
+ F->insert(It, sinkMBB);
+
+ // Transfer the remainder of BB and its successor edges to sinkMBB.
+ sinkMBB->splice(sinkMBB->begin(), BB,
+ llvm::next(MachineBasicBlock::iterator(MI)),
+ BB->end());
+ sinkMBB->transferSuccessorsAndUpdatePHIs(BB);
+
+ // Next, add the true and fallthrough blocks as its successors.
+ BB->addSuccessor(copy0MBB);
+ BB->addSuccessor(sinkMBB);
// Emit the right instruction according to the type of the operands compared
if (isFPCmp) {
@@ -296,20 +308,6 @@ MipsTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
BuildMI(BB, dl, TII->get(Mips::BNE)).addReg(MI->getOperand(1).getReg())
.addReg(Mips::ZERO).addMBB(sinkMBB);
- F->insert(It, copy0MBB);
- F->insert(It, sinkMBB);
- // Update machine-CFG edges by first adding all successors of the current
- // block to the new block which will contain the Phi node for the select.
- for(MachineBasicBlock::succ_iterator i = BB->succ_begin(),
- e = BB->succ_end(); i != e; ++i)
- sinkMBB->addSuccessor(*i);
- // Next, remove all successors of the current block, and add the true
- // and fallthrough blocks as its successors.
- while(!BB->succ_empty())
- BB->removeSuccessor(BB->succ_begin());
- BB->addSuccessor(copy0MBB);
- BB->addSuccessor(sinkMBB);
-
// copy0MBB:
// %FalseValue = ...
// # fallthrough to sinkMBB
@@ -322,11 +320,12 @@ MipsTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
// %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ]
// ...
BB = sinkMBB;
- BuildMI(BB, dl, TII->get(Mips::PHI), MI->getOperand(0).getReg())
+ BuildMI(*BB, BB->begin(), dl,
+ TII->get(Mips::PHI), MI->getOperand(0).getReg())
.addReg(MI->getOperand(2).getReg()).addMBB(copy0MBB)
.addReg(MI->getOperand(3).getReg()).addMBB(thisMBB);
- F->DeleteMachineInstr(MI); // The pseudo instruction is gone now.
+ MI->eraseFromParent(); // The pseudo instruction is gone now.
return BB;
}
}
@@ -490,21 +489,21 @@ SDValue MipsTargetLowering::LowerGlobalAddress(SDValue Op,
// %gp_rel relocation
if (TLOF.IsGlobalInSmallSection(GV, getTargetMachine())) {
- SDValue GA = DAG.getTargetGlobalAddress(GV, MVT::i32, 0,
+ SDValue GA = DAG.getTargetGlobalAddress(GV, dl, MVT::i32, 0,
MipsII::MO_GPREL);
SDValue GPRelNode = DAG.getNode(MipsISD::GPRel, dl, VTs, &GA, 1);
SDValue GOT = DAG.getGLOBAL_OFFSET_TABLE(MVT::i32);
return DAG.getNode(ISD::ADD, dl, MVT::i32, GOT, GPRelNode);
}
// %hi/%lo relocation
- SDValue GA = DAG.getTargetGlobalAddress(GV, MVT::i32, 0,
+ SDValue GA = DAG.getTargetGlobalAddress(GV, dl, MVT::i32, 0,
MipsII::MO_ABS_HILO);
SDValue HiPart = DAG.getNode(MipsISD::Hi, dl, VTs, &GA, 1);
SDValue Lo = DAG.getNode(MipsISD::Lo, dl, MVT::i32, GA);
return DAG.getNode(ISD::ADD, dl, MVT::i32, HiPart, Lo);
} else {
- SDValue GA = DAG.getTargetGlobalAddress(GV, MVT::i32, 0,
+ SDValue GA = DAG.getTargetGlobalAddress(GV, dl, MVT::i32, 0,
MipsII::MO_GOT);
SDValue ResNode = DAG.getLoad(MVT::i32, dl,
DAG.getEntryNode(), GA, NULL, 0,
@@ -768,6 +767,7 @@ MipsTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
CallingConv::ID CallConv, bool isVarArg,
bool &isTailCall,
const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
const SmallVectorImpl<ISD::InputArg> &Ins,
DebugLoc dl, SelectionDAG &DAG,
SmallVectorImpl<SDValue> &InVals) const {
@@ -787,7 +787,7 @@ MipsTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
// the stack (even if less than 4 are used as arguments)
if (Subtarget->isABI_O32()) {
int VTsize = EVT(MVT::i32).getSizeInBits()/8;
- MFI->CreateFixedObject(VTsize, (VTsize*3), true, false);
+ MFI->CreateFixedObject(VTsize, (VTsize*3), true);
CCInfo.AnalyzeCallOperands(Outs,
isVarArg ? CC_MipsO32_VarArgs : CC_MipsO32);
} else
@@ -808,7 +808,7 @@ MipsTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
// Walk the register/memloc assignments, inserting copies/loads.
for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
- SDValue Arg = Outs[i].Val;
+ SDValue Arg = OutVals[i];
CCValAssign &VA = ArgLocs[i];
// Promote the value if needed.
@@ -857,7 +857,7 @@ MipsTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
// if O32 ABI is used. For EABI the first address is zero.
LastArgStackLoc = (FirstStackArgLoc + VA.getLocMemOffset());
int FI = MFI->CreateFixedObject(VA.getValVT().getSizeInBits()/8,
- LastArgStackLoc, true, false);
+ LastArgStackLoc, true);
SDValue PtrOff = DAG.getFrameIndex(FI,getPointerTy());
@@ -889,7 +889,7 @@ MipsTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
// node so that legalize doesn't hack it.
unsigned char OpFlag = IsPIC ? MipsII::MO_GOT_CALL : MipsII::MO_NO_FLAG;
if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee))
- Callee = DAG.getTargetGlobalAddress(G->getGlobal(),
+ Callee = DAG.getTargetGlobalAddress(G->getGlobal(), dl,
getPointerTy(), 0, OpFlag);
else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee))
Callee = DAG.getTargetExternalSymbol(S->getSymbol(),
@@ -929,7 +929,7 @@ MipsTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
// Create the frame index only once. SPOffset here can be anything
// (this will be fixed on processFunctionBeforeFrameFinalized)
if (MipsFI->getGPStackOffset() == -1) {
- FI = MFI->CreateFixedObject(4, 0, true, false);
+ FI = MFI->CreateFixedObject(4, 0, true);
MipsFI->setGPFI(FI);
}
MipsFI->setGPStackOffset(LastArgStackLoc);
@@ -1098,7 +1098,7 @@ MipsTargetLowering::LowerFormalArguments(SDValue Chain,
// offset on PEI::calculateFrameObjectOffsets.
// Arguments are always 32-bit.
unsigned ArgSize = VA.getLocVT().getSizeInBits()/8;
- int FI = MFI->CreateFixedObject(ArgSize, 0, true, false);
+ int FI = MFI->CreateFixedObject(ArgSize, 0, true);
MipsFI->recordLoadArgsFI(FI, -(ArgSize+
(FirstStackArgLoc + VA.getLocMemOffset())));
@@ -1137,7 +1137,7 @@ MipsTargetLowering::LowerFormalArguments(SDValue Chain,
unsigned Reg = AddLiveIn(DAG.getMachineFunction(), ArgRegEnd, RC);
SDValue ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, MVT::i32);
- int FI = MFI->CreateFixedObject(4, 0, true, false);
+ int FI = MFI->CreateFixedObject(4, 0, true);
MipsFI->recordStoreVarArgsFI(FI, -(4+(StackLoc*4)));
SDValue PtrOff = DAG.getFrameIndex(FI, getPointerTy());
OutChains.push_back(DAG.getStore(Chain, dl, ArgValue, PtrOff, NULL, 0,
@@ -1169,6 +1169,7 @@ SDValue
MipsTargetLowering::LowerReturn(SDValue Chain,
CallingConv::ID CallConv, bool isVarArg,
const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
DebugLoc dl, SelectionDAG &DAG) const {
// CCValAssign - represent the assignment of
@@ -1198,7 +1199,7 @@ MipsTargetLowering::LowerReturn(SDValue Chain,
assert(VA.isRegLoc() && "Can only return in registers!");
Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
- Outs[i].Val, Flag);
+ OutVals[i], Flag);
// guarantee that all emitted copies are
// stuck together, avoiding something bad
diff --git a/lib/Target/Mips/MipsISelLowering.h b/lib/Target/Mips/MipsISelLowering.h
index f2de489a2643..460747bf5438 100644
--- a/lib/Target/Mips/MipsISelLowering.h
+++ b/lib/Target/Mips/MipsISelLowering.h
@@ -120,6 +120,7 @@ namespace llvm {
CallingConv::ID CallConv, bool isVarArg,
bool &isTailCall,
const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
const SmallVectorImpl<ISD::InputArg> &Ins,
DebugLoc dl, SelectionDAG &DAG,
SmallVectorImpl<SDValue> &InVals) const;
@@ -128,6 +129,7 @@ namespace llvm {
LowerReturn(SDValue Chain,
CallingConv::ID CallConv, bool isVarArg,
const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
DebugLoc dl, SelectionDAG &DAG) const;
virtual MachineBasicBlock *
diff --git a/lib/Target/Mips/MipsInstrInfo.cpp b/lib/Target/Mips/MipsInstrInfo.cpp
index 4005e35b35b1..6c09a3e10785 100644
--- a/lib/Target/Mips/MipsInstrInfo.cpp
+++ b/lib/Target/Mips/MipsInstrInfo.cpp
@@ -127,61 +127,75 @@ insertNoop(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI) const
BuildMI(MBB, MI, DL, get(Mips::NOP));
}
-bool MipsInstrInfo::
-copyRegToReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
- unsigned DestReg, unsigned SrcReg,
- const TargetRegisterClass *DestRC,
- const TargetRegisterClass *SrcRC,
- DebugLoc DL) const {
+void MipsInstrInfo::
+copyPhysReg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I, DebugLoc DL,
+ unsigned DestReg, unsigned SrcReg,
+ bool KillSrc) const {
+ bool DestCPU = Mips::CPURegsRegClass.contains(DestReg);
+ bool SrcCPU = Mips::CPURegsRegClass.contains(SrcReg);
+
+ // CPU-CPU is the most common.
+ if (DestCPU && SrcCPU) {
+ BuildMI(MBB, I, DL, get(Mips::ADDu), DestReg).addReg(Mips::ZERO)
+ .addReg(SrcReg, getKillRegState(KillSrc));
+ return;
+ }
- if (DestRC != SrcRC) {
-
- // Copy to/from FCR31 condition register
- if ((DestRC == Mips::CPURegsRegisterClass) &&
- (SrcRC == Mips::CCRRegisterClass))
- BuildMI(MBB, I, DL, get(Mips::CFC1), DestReg).addReg(SrcReg);
- else if ((DestRC == Mips::CCRRegisterClass) &&
- (SrcRC == Mips::CPURegsRegisterClass))
- BuildMI(MBB, I, DL, get(Mips::CTC1), DestReg).addReg(SrcReg);
-
- // Moves between coprocessors and cpu
- else if ((DestRC == Mips::CPURegsRegisterClass) &&
- (SrcRC == Mips::FGR32RegisterClass))
- BuildMI(MBB, I, DL, get(Mips::MFC1), DestReg).addReg(SrcReg);
- else if ((DestRC == Mips::FGR32RegisterClass) &&
- (SrcRC == Mips::CPURegsRegisterClass))
- BuildMI(MBB, I, DL, get(Mips::MTC1), DestReg).addReg(SrcReg);
-
- // Move from/to Hi/Lo registers
- else if ((DestRC == Mips::HILORegisterClass) &&
- (SrcRC == Mips::CPURegsRegisterClass)) {
- unsigned Opc = (DestReg == Mips::HI) ? Mips::MTHI : Mips::MTLO;
- BuildMI(MBB, I, DL, get(Opc), DestReg);
- } else if ((SrcRC == Mips::HILORegisterClass) &&
- (DestRC == Mips::CPURegsRegisterClass)) {
- unsigned Opc = (SrcReg == Mips::HI) ? Mips::MFHI : Mips::MFLO;
- BuildMI(MBB, I, DL, get(Opc), DestReg);
- } else
- // Can't copy this register
- return false;
+ // Copy to CPU from other registers.
+ if (DestCPU) {
+ if (Mips::CCRRegClass.contains(SrcReg))
+ BuildMI(MBB, I, DL, get(Mips::CFC1), DestReg)
+ .addReg(SrcReg, getKillRegState(KillSrc));
+ else if (Mips::FGR32RegClass.contains(SrcReg))
+ BuildMI(MBB, I, DL, get(Mips::MFC1), DestReg)
+ .addReg(SrcReg, getKillRegState(KillSrc));
+ else if (SrcReg == Mips::HI)
+ BuildMI(MBB, I, DL, get(Mips::MFHI), DestReg);
+ else if (SrcReg == Mips::LO)
+ BuildMI(MBB, I, DL, get(Mips::MFLO), DestReg);
+ else
+ llvm_unreachable("Copy to CPU from invalid register");
+ return;
+ }
- return true;
+ // Copy to other registers from CPU.
+ if (SrcCPU) {
+ if (Mips::CCRRegClass.contains(DestReg))
+ BuildMI(MBB, I, DL, get(Mips::CTC1), DestReg)
+ .addReg(SrcReg, getKillRegState(KillSrc));
+ else if (Mips::FGR32RegClass.contains(DestReg))
+ BuildMI(MBB, I, DL, get(Mips::MTC1), DestReg)
+ .addReg(SrcReg, getKillRegState(KillSrc));
+ else if (DestReg == Mips::HI)
+ BuildMI(MBB, I, DL, get(Mips::MTHI))
+ .addReg(SrcReg, getKillRegState(KillSrc));
+ else if (DestReg == Mips::LO)
+ BuildMI(MBB, I, DL, get(Mips::MTLO))
+ .addReg(SrcReg, getKillRegState(KillSrc));
+ else
+ llvm_unreachable("Copy from CPU to invalid register");
+ return;
}
- if (DestRC == Mips::CPURegsRegisterClass)
- BuildMI(MBB, I, DL, get(Mips::ADDu), DestReg).addReg(Mips::ZERO)
- .addReg(SrcReg);
- else if (DestRC == Mips::FGR32RegisterClass)
- BuildMI(MBB, I, DL, get(Mips::FMOV_S32), DestReg).addReg(SrcReg);
- else if (DestRC == Mips::AFGR64RegisterClass)
- BuildMI(MBB, I, DL, get(Mips::FMOV_D32), DestReg).addReg(SrcReg);
- else if (DestRC == Mips::CCRRegisterClass)
- BuildMI(MBB, I, DL, get(Mips::MOVCCRToCCR), DestReg).addReg(SrcReg);
- else
- // Can't copy this register
- return false;
+ if (Mips::FGR32RegClass.contains(DestReg, SrcReg)) {
+ BuildMI(MBB, I, DL, get(Mips::FMOV_S32), DestReg)
+ .addReg(SrcReg, getKillRegState(KillSrc));
+ return;
+ }
- return true;
+ if (Mips::AFGR64RegClass.contains(DestReg, SrcReg)) {
+ BuildMI(MBB, I, DL, get(Mips::FMOV_D32), DestReg)
+ .addReg(SrcReg, getKillRegState(KillSrc));
+ return;
+ }
+
+ if (Mips::CCRRegClass.contains(DestReg, SrcReg)) {
+ BuildMI(MBB, I, DL, get(Mips::MOVCCRToCCR), DestReg)
+ .addReg(SrcReg, getKillRegState(KillSrc));
+ return;
+ }
+ llvm_unreachable("Cannot copy registers");
}
void MipsInstrInfo::
@@ -247,80 +261,6 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
llvm_unreachable("Register class not handled!");
}
-MachineInstr *MipsInstrInfo::
-foldMemoryOperandImpl(MachineFunction &MF,
- MachineInstr* MI,
- const SmallVectorImpl<unsigned> &Ops, int FI) const
-{
- if (Ops.size() != 1) return NULL;
-
- MachineInstr *NewMI = NULL;
-
- switch (MI->getOpcode()) {
- case Mips::ADDu:
- if ((MI->getOperand(0).isReg()) &&
- (MI->getOperand(1).isReg()) &&
- (MI->getOperand(1).getReg() == Mips::ZERO) &&
- (MI->getOperand(2).isReg())) {
- if (Ops[0] == 0) { // COPY -> STORE
- unsigned SrcReg = MI->getOperand(2).getReg();
- bool isKill = MI->getOperand(2).isKill();
- bool isUndef = MI->getOperand(2).isUndef();
- NewMI = BuildMI(MF, MI->getDebugLoc(), get(Mips::SW))
- .addReg(SrcReg, getKillRegState(isKill) | getUndefRegState(isUndef))
- .addImm(0).addFrameIndex(FI);
- } else { // COPY -> LOAD
- unsigned DstReg = MI->getOperand(0).getReg();
- bool isDead = MI->getOperand(0).isDead();
- bool isUndef = MI->getOperand(0).isUndef();
- NewMI = BuildMI(MF, MI->getDebugLoc(), get(Mips::LW))
- .addReg(DstReg, RegState::Define | getDeadRegState(isDead) |
- getUndefRegState(isUndef))
- .addImm(0).addFrameIndex(FI);
- }
- }
- break;
- case Mips::FMOV_S32:
- case Mips::FMOV_D32:
- if ((MI->getOperand(0).isReg()) &&
- (MI->getOperand(1).isReg())) {
- const TargetRegisterClass
- *RC = RI.getRegClass(MI->getOperand(0).getReg());
- unsigned StoreOpc, LoadOpc;
- bool IsMips1 = TM.getSubtarget<MipsSubtarget>().isMips1();
-
- if (RC == Mips::FGR32RegisterClass) {
- LoadOpc = Mips::LWC1; StoreOpc = Mips::SWC1;
- } else {
- assert(RC == Mips::AFGR64RegisterClass);
- // Mips1 doesn't have ldc/sdc instructions.
- if (IsMips1) break;
- LoadOpc = Mips::LDC1; StoreOpc = Mips::SDC1;
- }
-
- if (Ops[0] == 0) { // COPY -> STORE
- unsigned SrcReg = MI->getOperand(1).getReg();
- bool isKill = MI->getOperand(1).isKill();
- bool isUndef = MI->getOperand(2).isUndef();
- NewMI = BuildMI(MF, MI->getDebugLoc(), get(StoreOpc))
- .addReg(SrcReg, getKillRegState(isKill) | getUndefRegState(isUndef))
- .addImm(0).addFrameIndex(FI) ;
- } else { // COPY -> LOAD
- unsigned DstReg = MI->getOperand(0).getReg();
- bool isDead = MI->getOperand(0).isDead();
- bool isUndef = MI->getOperand(0).isUndef();
- NewMI = BuildMI(MF, MI->getDebugLoc(), get(LoadOpc))
- .addReg(DstReg, RegState::Define | getDeadRegState(isDead) |
- getUndefRegState(isUndef))
- .addImm(0).addFrameIndex(FI);
- }
- }
- break;
- }
-
- return NewMI;
-}
-
//===----------------------------------------------------------------------===//
// Branch Analysis
//===----------------------------------------------------------------------===//
@@ -520,9 +460,8 @@ bool MipsInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
unsigned MipsInstrInfo::
InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
MachineBasicBlock *FBB,
- const SmallVectorImpl<MachineOperand> &Cond) const {
- // FIXME this should probably have a DebugLoc argument
- DebugLoc dl;
+ const SmallVectorImpl<MachineOperand> &Cond,
+ DebugLoc DL) const {
// Shouldn't be a fall through.
assert(TBB && "InsertBranch must not be told to insert a fallthrough");
assert((Cond.size() == 3 || Cond.size() == 2 || Cond.size() == 0) &&
@@ -531,18 +470,18 @@ InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
if (FBB == 0) { // One way branch.
if (Cond.empty()) {
// Unconditional branch?
- BuildMI(&MBB, dl, get(Mips::J)).addMBB(TBB);
+ BuildMI(&MBB, DL, get(Mips::J)).addMBB(TBB);
} else {
// Conditional branch.
unsigned Opc = GetCondBranchFromCond((Mips::CondCode)Cond[0].getImm());
const TargetInstrDesc &TID = get(Opc);
if (TID.getNumOperands() == 3)
- BuildMI(&MBB, dl, TID).addReg(Cond[1].getReg())
+ BuildMI(&MBB, DL, TID).addReg(Cond[1].getReg())
.addReg(Cond[2].getReg())
.addMBB(TBB);
else
- BuildMI(&MBB, dl, TID).addReg(Cond[1].getReg())
+ BuildMI(&MBB, DL, TID).addReg(Cond[1].getReg())
.addMBB(TBB);
}
@@ -554,12 +493,12 @@ InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
const TargetInstrDesc &TID = get(Opc);
if (TID.getNumOperands() == 3)
- BuildMI(&MBB, dl, TID).addReg(Cond[1].getReg()).addReg(Cond[2].getReg())
+ BuildMI(&MBB, DL, TID).addReg(Cond[1].getReg()).addReg(Cond[2].getReg())
.addMBB(TBB);
else
- BuildMI(&MBB, dl, TID).addReg(Cond[1].getReg()).addMBB(TBB);
+ BuildMI(&MBB, DL, TID).addReg(Cond[1].getReg()).addMBB(TBB);
- BuildMI(&MBB, dl, get(Mips::J)).addMBB(FBB);
+ BuildMI(&MBB, DL, get(Mips::J)).addMBB(FBB);
return 2;
}
@@ -621,12 +560,8 @@ unsigned MipsInstrInfo::getGlobalBaseReg(MachineFunction *MF) const {
const TargetInstrInfo *TII = MF->getTarget().getInstrInfo();
GlobalBaseReg = RegInfo.createVirtualRegister(Mips::CPURegsRegisterClass);
- bool Ok = TII->copyRegToReg(FirstMBB, MBBI, GlobalBaseReg, Mips::GP,
- Mips::CPURegsRegisterClass,
- Mips::CPURegsRegisterClass,
- DebugLoc());
- assert(Ok && "Couldn't assign to global base register!");
- Ok = Ok; // Silence warning when assertions are turned off.
+ BuildMI(FirstMBB, MBBI, DebugLoc(), TII->get(TargetOpcode::COPY),
+ GlobalBaseReg).addReg(Mips::GP);
RegInfo.addLiveIn(Mips::GP);
MipsFI->setGlobalBaseReg(GlobalBaseReg);
diff --git a/lib/Target/Mips/MipsInstrInfo.h b/lib/Target/Mips/MipsInstrInfo.h
index 7919d9a03853..d6f87f9b0ce8 100644
--- a/lib/Target/Mips/MipsInstrInfo.h
+++ b/lib/Target/Mips/MipsInstrInfo.h
@@ -204,13 +204,12 @@ public:
virtual unsigned RemoveBranch(MachineBasicBlock &MBB) const;
virtual unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
MachineBasicBlock *FBB,
- const SmallVectorImpl<MachineOperand> &Cond) const;
- virtual bool copyRegToReg(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator I,
- unsigned DestReg, unsigned SrcReg,
- const TargetRegisterClass *DestRC,
- const TargetRegisterClass *SrcRC,
- DebugLoc DL) const;
+ const SmallVectorImpl<MachineOperand> &Cond,
+ DebugLoc DL) const;
+ virtual void copyPhysReg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI, DebugLoc DL,
+ unsigned DestReg, unsigned SrcReg,
+ bool KillSrc) const;
virtual void storeRegToStackSlot(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI,
unsigned SrcReg, bool isKill, int FrameIndex,
@@ -223,18 +222,6 @@ public:
const TargetRegisterClass *RC,
const TargetRegisterInfo *TRI) const;
- virtual MachineInstr* foldMemoryOperandImpl(MachineFunction &MF,
- MachineInstr* MI,
- const SmallVectorImpl<unsigned> &Ops,
- int FrameIndex) const;
-
- virtual MachineInstr* foldMemoryOperandImpl(MachineFunction &MF,
- MachineInstr* MI,
- const SmallVectorImpl<unsigned> &Ops,
- MachineInstr* LoadMI) const {
- return 0;
- }
-
virtual
bool ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const;
diff --git a/lib/Target/Mips/MipsInstrInfo.td b/lib/Target/Mips/MipsInstrInfo.td
index 2b9e94191dba..5337c9fb816a 100644
--- a/lib/Target/Mips/MipsInstrInfo.td
+++ b/lib/Target/Mips/MipsInstrInfo.td
@@ -541,7 +541,7 @@ let Predicates = [HasSwap] in {
def MIPS_CMOV_ZERO : PatLeaf<(i32 0)>;
def MIPS_CMOV_NZERO : PatLeaf<(i32 1)>;
-let Predicates = [HasCondMov], isTwoAddress = 1 in {
+let Predicates = [HasCondMov], Constraints = "$F = $dst" in {
def MOVN : CondMov<0x0a, "movn", MIPS_CMOV_NZERO>;
def MOVZ : CondMov<0x0b, "movz", MIPS_CMOV_ZERO>;
}
diff --git a/lib/Target/Mips/MipsRegisterInfo.cpp b/lib/Target/Mips/MipsRegisterInfo.cpp
index 5e719af871d2..e15f0a58e501 100644
--- a/lib/Target/Mips/MipsRegisterInfo.cpp
+++ b/lib/Target/Mips/MipsRegisterInfo.cpp
@@ -116,34 +116,6 @@ getCalleeSavedRegs(const MachineFunction *MF) const
return BitMode32CalleeSavedRegs;
}
-/// Mips Callee Saved Register Classes
-const TargetRegisterClass* const*
-MipsRegisterInfo::getCalleeSavedRegClasses(const MachineFunction *MF) const
-{
- static const TargetRegisterClass * const SingleFloatOnlyCalleeSavedRC[] = {
- &Mips::CPURegsRegClass, &Mips::CPURegsRegClass, &Mips::CPURegsRegClass,
- &Mips::CPURegsRegClass, &Mips::CPURegsRegClass, &Mips::CPURegsRegClass,
- &Mips::CPURegsRegClass, &Mips::CPURegsRegClass,
- &Mips::FGR32RegClass, &Mips::FGR32RegClass, &Mips::FGR32RegClass,
- &Mips::FGR32RegClass, &Mips::FGR32RegClass, &Mips::FGR32RegClass,
- &Mips::FGR32RegClass, &Mips::FGR32RegClass, &Mips::FGR32RegClass,
- &Mips::FGR32RegClass, &Mips::FGR32RegClass, 0
- };
-
- static const TargetRegisterClass * const BitMode32CalleeSavedRC[] = {
- &Mips::CPURegsRegClass, &Mips::CPURegsRegClass, &Mips::CPURegsRegClass,
- &Mips::CPURegsRegClass, &Mips::CPURegsRegClass, &Mips::CPURegsRegClass,
- &Mips::CPURegsRegClass, &Mips::CPURegsRegClass,
- &Mips::FGR32RegClass, &Mips::FGR32RegClass, &Mips::FGR32RegClass,
- &Mips::FGR32RegClass, &Mips::FGR32RegClass, &Mips::FGR32RegClass, 0
- };
-
- if (Subtarget.isSingleFloat())
- return SingleFloatOnlyCalleeSavedRC;
- else
- return BitMode32CalleeSavedRC;
-}
-
BitVector MipsRegisterInfo::
getReservedRegs(const MachineFunction &MF) const
{
@@ -279,7 +251,8 @@ void MipsRegisterInfo::adjustMipsStackFrame(MachineFunction &MF) const
StackOffset = ((StackOffset+StackAlign-1)/StackAlign*StackAlign);
for (unsigned i = 0, e = CSI.size(); i != e ; ++i) {
- if (CSI[i].getRegClass() != Mips::CPURegsRegisterClass)
+ unsigned Reg = CSI[i].getReg();
+ if (!Mips::CPURegsRegisterClass->contains(Reg))
break;
MFI->setObjectOffset(CSI[i].getFrameIdx(), StackOffset);
TopCPUSavedRegOff = StackOffset;
@@ -311,7 +284,8 @@ void MipsRegisterInfo::adjustMipsStackFrame(MachineFunction &MF) const
// Adjust FPU Callee Saved Registers Area. This Area must be
// aligned to the default Stack Alignment requirements.
for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
- if (CSI[i].getRegClass() == Mips::CPURegsRegisterClass)
+ unsigned Reg = CSI[i].getReg();
+ if (Mips::CPURegsRegisterClass->contains(Reg))
continue;
MFI->setObjectOffset(CSI[i].getFrameIdx(), StackOffset);
TopFPUSavedRegOff = StackOffset;
@@ -528,4 +502,3 @@ getDwarfRegNum(unsigned RegNum, bool isEH) const {
}
#include "MipsGenRegisterInfo.inc"
-
diff --git a/lib/Target/Mips/MipsRegisterInfo.h b/lib/Target/Mips/MipsRegisterInfo.h
index bc857b85cabe..b500a650f7cc 100644
--- a/lib/Target/Mips/MipsRegisterInfo.h
+++ b/lib/Target/Mips/MipsRegisterInfo.h
@@ -42,9 +42,6 @@ struct MipsRegisterInfo : public MipsGenRegisterInfo {
/// Code Generation virtual methods...
const unsigned *getCalleeSavedRegs(const MachineFunction* MF = 0) const;
- const TargetRegisterClass* const*
- getCalleeSavedRegClasses(const MachineFunction* MF = 0) const;
-
BitVector getReservedRegs(const MachineFunction &MF) const;
bool hasFP(const MachineFunction &MF) const;
diff --git a/lib/Target/PIC16/PIC16ISelLowering.cpp b/lib/Target/PIC16/PIC16ISelLowering.cpp
index f479f4626fd7..54a6a28992bf 100644
--- a/lib/Target/PIC16/PIC16ISelLowering.cpp
+++ b/lib/Target/PIC16/PIC16ISelLowering.cpp
@@ -672,7 +672,8 @@ SDValue PIC16TargetLowering::ExpandGlobalAddress(SDNode *N,
// FIXME there isn't really debug info here
DebugLoc dl = G->getDebugLoc();
- SDValue TGA = DAG.getTargetGlobalAddress(G->getGlobal(), MVT::i8,
+ SDValue TGA = DAG.getTargetGlobalAddress(G->getGlobal(), N->getDebugLoc(),
+ MVT::i8,
G->getOffset());
SDValue Offset = DAG.getConstant(0, MVT::i8);
@@ -1120,6 +1121,7 @@ SDValue PIC16TargetLowering::
LowerIndirectCallArguments(SDValue Chain, SDValue InFlag,
SDValue DataAddr_Lo, SDValue DataAddr_Hi,
const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
const SmallVectorImpl<ISD::InputArg> &Ins,
DebugLoc dl, SelectionDAG &DAG) const {
unsigned NumOps = Outs.size();
@@ -1136,7 +1138,7 @@ LowerIndirectCallArguments(SDValue Chain, SDValue InFlag,
unsigned RetVals = Ins.size();
for (unsigned i = 0, ArgOffset = RetVals; i < NumOps; i++) {
// Get the arguments
- Arg = Outs[i].Val;
+ Arg = OutVals[i];
Ops.clear();
Ops.push_back(Chain);
@@ -1158,6 +1160,7 @@ LowerIndirectCallArguments(SDValue Chain, SDValue InFlag,
SDValue PIC16TargetLowering::
LowerDirectCallArguments(SDValue ArgLabel, SDValue Chain, SDValue InFlag,
const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
DebugLoc dl, SelectionDAG &DAG) const {
unsigned NumOps = Outs.size();
std::string Name;
@@ -1183,7 +1186,7 @@ LowerDirectCallArguments(SDValue ArgLabel, SDValue Chain, SDValue InFlag,
SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Flag);
for (unsigned i=0, Offset = 0; i<NumOps; i++) {
// Get the argument
- Arg = Outs[i].Val;
+ Arg = OutVals[i];
StoreOffset = (Offset + AddressOffset);
// Store the argument on frame
@@ -1282,6 +1285,7 @@ SDValue
PIC16TargetLowering::LowerReturn(SDValue Chain,
CallingConv::ID CallConv, bool isVarArg,
const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
DebugLoc dl, SelectionDAG &DAG) const {
// Number of values to return
@@ -1298,7 +1302,7 @@ PIC16TargetLowering::LowerReturn(SDValue Chain,
SDValue BS = DAG.getConstant(1, MVT::i8);
SDValue RetVal;
for(unsigned i=0;i<NumRet; ++i) {
- RetVal = Outs[i].Val;
+ RetVal = OutVals[i];
Chain = DAG.getNode (PIC16ISD::PIC16Store, dl, MVT::Other, Chain, RetVal,
ES, BS,
DAG.getConstant (i, MVT::i8));
@@ -1374,6 +1378,7 @@ PIC16TargetLowering::LowerCall(SDValue Chain, SDValue Callee,
CallingConv::ID CallConv, bool isVarArg,
bool &isTailCall,
const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
const SmallVectorImpl<ISD::InputArg> &Ins,
DebugLoc dl, SelectionDAG &DAG,
SmallVectorImpl<SDValue> &InVals) const {
@@ -1428,7 +1433,7 @@ PIC16TargetLowering::LowerCall(SDValue Chain, SDValue Callee,
// Considering the GlobalAddressNode case here.
if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
const GlobalValue *GV = G->getGlobal();
- Callee = DAG.getTargetGlobalAddress(GV, MVT::i8);
+ Callee = DAG.getTargetGlobalAddress(GV, dl, MVT::i8);
Name = G->getGlobal()->getName();
} else {// Considering the ExternalSymbol case here
ExternalSymbolSDNode *ES = dyn_cast<ExternalSymbolSDNode>(Callee);
@@ -1461,12 +1466,13 @@ PIC16TargetLowering::LowerCall(SDValue Chain, SDValue Callee,
SDValue CallArgs;
if (IsDirectCall) {
CallArgs = LowerDirectCallArguments(ArgLabel, Chain, OperFlag,
- Outs, dl, DAG);
+ Outs, OutVals, dl, DAG);
Chain = getChain(CallArgs);
OperFlag = getOutFlag(CallArgs);
} else {
CallArgs = LowerIndirectCallArguments(Chain, OperFlag, DataAddr_Lo,
- DataAddr_Hi, Outs, Ins, dl, DAG);
+ DataAddr_Hi, Outs, OutVals, Ins,
+ dl, DAG);
Chain = getChain(CallArgs);
OperFlag = getOutFlag(CallArgs);
}
@@ -1791,14 +1797,14 @@ static PIC16CC::CondCodes IntCCToPIC16CC(ISD::CondCode CC) {
static void LookThroughSetCC(SDValue &LHS, SDValue &RHS,
ISD::CondCode CC, unsigned &SPCC) {
if (isa<ConstantSDNode>(RHS) &&
- cast<ConstantSDNode>(RHS)->getZExtValue() == 0 &&
+ cast<ConstantSDNode>(RHS)->isNullValue() &&
CC == ISD::SETNE &&
(LHS.getOpcode() == PIC16ISD::SELECT_ICC &&
LHS.getOperand(3).getOpcode() == PIC16ISD::SUBCC) &&
isa<ConstantSDNode>(LHS.getOperand(0)) &&
isa<ConstantSDNode>(LHS.getOperand(1)) &&
- cast<ConstantSDNode>(LHS.getOperand(0))->getZExtValue() == 1 &&
- cast<ConstantSDNode>(LHS.getOperand(1))->getZExtValue() == 0) {
+ cast<ConstantSDNode>(LHS.getOperand(0))->isOne() &&
+ cast<ConstantSDNode>(LHS.getOperand(1))->isNullValue()) {
SDValue CMPCC = LHS.getOperand(3);
SPCC = cast<ConstantSDNode>(LHS.getOperand(2))->getZExtValue();
LHS = CMPCC.getOperand(0);
@@ -1928,15 +1934,12 @@ PIC16TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
F->insert(It, copy0MBB);
F->insert(It, sinkMBB);
- // Update machine-CFG edges by first adding all successors of the current
- // block to the new block which will contain the Phi node for the select.
- for (MachineBasicBlock::succ_iterator I = BB->succ_begin(),
- E = BB->succ_end(); I != E; ++I)
- sinkMBB->addSuccessor(*I);
- // Next, remove all successors of the current block, and add the true
- // and fallthrough blocks as its successors.
- while (!BB->succ_empty())
- BB->removeSuccessor(BB->succ_begin());
+ // Transfer the remainder of BB and its successor edges to sinkMBB.
+ sinkMBB->splice(sinkMBB->begin(), BB,
+ llvm::next(MachineBasicBlock::iterator(MI)),
+ BB->end());
+ sinkMBB->transferSuccessorsAndUpdatePHIs(BB);
+
// Next, add the true and fallthrough blocks as its successors.
BB->addSuccessor(copy0MBB);
BB->addSuccessor(sinkMBB);
@@ -1953,11 +1956,12 @@ PIC16TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
// %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ]
// ...
BB = sinkMBB;
- BuildMI(BB, dl, TII.get(PIC16::PHI), MI->getOperand(0).getReg())
+ BuildMI(*BB, BB->begin(), dl,
+ TII.get(PIC16::PHI), MI->getOperand(0).getReg())
.addReg(MI->getOperand(2).getReg()).addMBB(copy0MBB)
.addReg(MI->getOperand(1).getReg()).addMBB(thisMBB);
- F->DeleteMachineInstr(MI); // The pseudo instruction is gone now.
+ MI->eraseFromParent(); // The pseudo instruction is gone now.
return BB;
}
diff --git a/lib/Target/PIC16/PIC16ISelLowering.h b/lib/Target/PIC16/PIC16ISelLowering.h
index eea17f898365..0a7506cb497f 100644
--- a/lib/Target/PIC16/PIC16ISelLowering.h
+++ b/lib/Target/PIC16/PIC16ISelLowering.h
@@ -106,12 +106,14 @@ namespace llvm {
SDValue
LowerDirectCallArguments(SDValue ArgLabel, SDValue Chain, SDValue InFlag,
const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
DebugLoc dl, SelectionDAG &DAG) const;
SDValue
LowerIndirectCallArguments(SDValue Chain, SDValue InFlag,
SDValue DataAddr_Lo, SDValue DataAddr_Hi,
const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
const SmallVectorImpl<ISD::InputArg> &Ins,
DebugLoc dl, SelectionDAG &DAG) const;
@@ -143,6 +145,7 @@ namespace llvm {
LowerCall(SDValue Chain, SDValue Callee,
CallingConv::ID CallConv, bool isVarArg, bool &isTailCall,
const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
const SmallVectorImpl<ISD::InputArg> &Ins,
DebugLoc dl, SelectionDAG &DAG,
SmallVectorImpl<SDValue> &InVals) const;
@@ -151,6 +154,7 @@ namespace llvm {
LowerReturn(SDValue Chain,
CallingConv::ID CallConv, bool isVarArg,
const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
DebugLoc dl, SelectionDAG &DAG) const;
SDValue ExpandStore(SDNode *N, SelectionDAG &DAG) const;
diff --git a/lib/Target/PIC16/PIC16InstrInfo.cpp b/lib/Target/PIC16/PIC16InstrInfo.cpp
index 793dd9f029f9..e784f746f7f9 100644
--- a/lib/Target/PIC16/PIC16InstrInfo.cpp
+++ b/lib/Target/PIC16/PIC16InstrInfo.cpp
@@ -151,25 +151,20 @@ void PIC16InstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
llvm_unreachable("Can't load this register from stack slot");
}
-bool PIC16InstrInfo::copyRegToReg (MachineBasicBlock &MBB,
- MachineBasicBlock::iterator I,
- unsigned DestReg, unsigned SrcReg,
- const TargetRegisterClass *DestRC,
- const TargetRegisterClass *SrcRC,
- DebugLoc DL) const {
-
- if (DestRC == PIC16::FSR16RegisterClass) {
- BuildMI(MBB, I, DL, get(PIC16::copy_fsr), DestReg).addReg(SrcReg);
- return true;
- }
-
- if (DestRC == PIC16::GPRRegisterClass) {
- BuildMI(MBB, I, DL, get(PIC16::copy_w), DestReg).addReg(SrcReg);
- return true;
- }
+void PIC16InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I, DebugLoc DL,
+ unsigned DestReg, unsigned SrcReg,
+ bool KillSrc) const {
+ unsigned Opc;
+ if (PIC16::FSR16RegClass.contains(DestReg, SrcReg))
+ Opc = PIC16::copy_fsr;
+ else if (PIC16::GPRRegClass.contains(DestReg, SrcReg))
+ Opc = PIC16::copy_w;
+ else
+ llvm_unreachable("Impossible reg-to-reg copy");
- // Not yet supported.
- return false;
+ BuildMI(MBB, I, DL, get(Opc), DestReg)
+ .addReg(SrcReg, getKillRegState(KillSrc));
}
bool PIC16InstrInfo::isMoveInstr(const MachineInstr &MI,
@@ -196,15 +191,15 @@ bool PIC16InstrInfo::isMoveInstr(const MachineInstr &MI,
unsigned PIC16InstrInfo::
InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
MachineBasicBlock *FBB,
- const SmallVectorImpl<MachineOperand> &Cond) const {
+ const SmallVectorImpl<MachineOperand> &Cond,
+ DebugLoc DL) const {
// Shouldn't be a fall through.
assert(TBB && "InsertBranch must not be told to insert a fallthrough");
if (FBB == 0) { // One way branch.
if (Cond.empty()) {
// Unconditional branch?
- DebugLoc dl;
- BuildMI(&MBB, dl, get(PIC16::br_uncond)).addMBB(TBB);
+ BuildMI(&MBB, DL, get(PIC16::br_uncond)).addMBB(TBB);
}
return 1;
}
diff --git a/lib/Target/PIC16/PIC16InstrInfo.h b/lib/Target/PIC16/PIC16InstrInfo.h
index 40a4cb4ddb2d..a3a77f11ba16 100644
--- a/lib/Target/PIC16/PIC16InstrInfo.h
+++ b/lib/Target/PIC16/PIC16InstrInfo.h
@@ -57,12 +57,10 @@ public:
unsigned DestReg, int FrameIndex,
const TargetRegisterClass *RC,
const TargetRegisterInfo *TRI) const;
- virtual bool copyRegToReg(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MBBI,
- unsigned DestReg, unsigned SrcReg,
- const TargetRegisterClass *DestRC,
- const TargetRegisterClass *SrcRC,
- DebugLoc DL) const;
+ virtual void copyPhysReg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I, DebugLoc DL,
+ unsigned DestReg, unsigned SrcReg,
+ bool KillSrc) const;
virtual bool isMoveInstr(const MachineInstr &MI,
unsigned &SrcReg, unsigned &DstReg,
unsigned &SrcSubIdx, unsigned &DstSubIdx) const;
@@ -70,7 +68,8 @@ public:
virtual
unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
MachineBasicBlock *FBB,
- const SmallVectorImpl<MachineOperand> &Cond) const;
+ const SmallVectorImpl<MachineOperand> &Cond,
+ DebugLoc DL) const;
virtual bool AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB,
MachineBasicBlock *&FBB,
SmallVectorImpl<MachineOperand> &Cond,
diff --git a/lib/Target/PIC16/PIC16InstrInfo.td b/lib/Target/PIC16/PIC16InstrInfo.td
index 24df251b5088..86d36cb76ee4 100644
--- a/lib/Target/PIC16/PIC16InstrInfo.td
+++ b/lib/Target/PIC16/PIC16InstrInfo.td
@@ -134,7 +134,7 @@ include "PIC16InstrFormats.td"
//===----------------------------------------------------------------------===//
// W = W Op F : Load the value from F and do Op to W.
-let isTwoAddress = 1, mayLoad = 1 in
+let Constraints = "$src = $dst", mayLoad = 1 in
class BinOpFW<bits<6> OpCode, string OpcStr, SDNode OpNode>:
ByteFormat<OpCode, (outs GPR:$dst),
(ins GPR:$src, i8imm:$offset, i8mem:$ptrlo, i8imm:$ptrhi),
@@ -146,7 +146,7 @@ class BinOpFW<bits<6> OpCode, string OpcStr, SDNode OpNode>:
// F = F Op W : Load the value from F, do op with W and store in F.
// This insn class is not marked as TwoAddress because the reg is
// being used as a source operand only. (Remember a TwoAddress insn
-// needs a copyRegToReg.)
+// needs a copy.)
let mayStore = 1 in
class BinOpWF<bits<6> OpCode, string OpcStr, SDNode OpNode>:
ByteFormat<OpCode, (outs),
@@ -160,7 +160,7 @@ class BinOpWF<bits<6> OpCode, string OpcStr, SDNode OpNode>:
)]>;
// W = W Op L : Do Op of L with W and place result in W.
-let isTwoAddress = 1 in
+let Constraints = "$src = $dst" in
class BinOpWL<bits<6> opcode, string OpcStr, SDNode OpNode> :
LiteralFormat<opcode, (outs GPR:$dst),
(ins GPR:$src, i8imm:$literal),
@@ -220,7 +220,7 @@ def set_fsrlo:
"movwf ${fsr}L",
[]>;
-let isTwoAddress = 1 in
+let Constraints = "$src = $dst" in
def set_fsrhi:
ByteFormat<0, (outs FSR16:$dst),
(ins FSR16:$src, GPR:$val),
@@ -234,8 +234,8 @@ def set_pclath:
[(set PCLATHR:$dst , (MTPCLATH GPR:$val))]>;
//----------------------------
-// copyRegToReg
-// copyRegToReg insns. These are dummy. They should always be deleted
+// copyPhysReg
+// copyPhysReg insns. These are dummy. They should always be deleted
// by the optimizer and never be present in the final generated code.
// if they are, then we have to write correct macros for these insns.
//----------------------------
@@ -362,7 +362,7 @@ def addwfc: BinOpWF<0, "addwfc", adde>; // With Carry.
}
// W -= [F] ; load from F and sub the value from W.
-let isTwoAddress = 1, mayLoad = 1 in
+let Constraints = "$src = $dst", mayLoad = 1 in
class SUBFW<bits<6> OpCode, string OpcStr, SDNode OpNode>:
ByteFormat<OpCode, (outs GPR:$dst),
(ins GPR:$src, i8imm:$offset, i8mem:$ptrlo, i8imm:$ptrhi),
@@ -418,7 +418,7 @@ def orlw : BinOpWL<0, "iorlw", or>;
// sublw
// W = C - W ; sub W from literal. (Without borrow).
-let isTwoAddress = 1 in
+let Constraints = "$src = $dst" in
class SUBLW<bits<6> opcode, string OpcStr, SDNode OpNode> :
LiteralFormat<opcode, (outs GPR:$dst),
(ins GPR:$src, i8imm:$literal),
@@ -426,7 +426,7 @@ class SUBLW<bits<6> opcode, string OpcStr, SDNode OpNode> :
[(set GPR:$dst, (OpNode (i8 imm:$literal), GPR:$src))]>;
// subwl
// W = W - C ; sub literal from W (Without borrow).
-let isTwoAddress = 1 in
+let Constraints = "$src = $dst" in
class SUBWL<bits<6> opcode, string OpcStr, SDNode OpNode> :
LiteralFormat<opcode, (outs GPR:$dst),
(ins GPR:$src, i8imm:$literal),
diff --git a/lib/Target/PIC16/PIC16MemSelOpt.cpp b/lib/Target/PIC16/PIC16MemSelOpt.cpp
index ab81ed1bca99..241170b11c2a 100644
--- a/lib/Target/PIC16/PIC16MemSelOpt.cpp
+++ b/lib/Target/PIC16/PIC16MemSelOpt.cpp
@@ -117,7 +117,7 @@ bool MemSelOpt::processBasicBlock(MachineFunction &MF, MachineBasicBlock &BB) {
DebugLoc dl = I->getDebugLoc();
BuildMI(*MBB, I, dl, TII->get(PIC16::pagesel)).addExternalSymbol("$");
Changed = true;
- PageChanged = 0;
+ PageChanged = 0;
}
}
}
diff --git a/lib/Target/PIC16/PIC16Passes/PIC16Cloner.cpp b/lib/Target/PIC16/PIC16Passes/PIC16Cloner.cpp
index c282521be324..27f1cf572ae6 100644
--- a/lib/Target/PIC16/PIC16Passes/PIC16Cloner.cpp
+++ b/lib/Target/PIC16/PIC16Passes/PIC16Cloner.cpp
@@ -150,8 +150,8 @@ void PIC16Cloner::markCallGraph(CallGraphNode *CGN, string StringMark) {
// For PIC16, automatic variables of a function are emitted as globals.
-// Clone the auto variables of a function and put them in ValueMap,
-// this ValueMap will be used while
+// Clone the auto variables of a function and put them in VMap,
+// this VMap will be used while
// Cloning the code of function itself.
//
void PIC16Cloner::CloneAutos(Function *F) {
@@ -160,11 +160,11 @@ void PIC16Cloner::CloneAutos(Function *F) {
Module *M = F->getParent();
Module::GlobalListType &Globals = M->getGlobalList();
- // Clear the leftovers in ValueMap by any previous cloning.
- ValueMap.clear();
+ // Clear the leftovers in VMap by any previous cloning.
+ VMap.clear();
// Find the auto globls for this function and clone them, and put them
- // in ValueMap.
+ // in VMap.
std::string FnName = F->getName().str();
std::string VarName, ClonedVarName;
for (Module::global_iterator I = M->global_begin(), E = M->global_end();
@@ -182,8 +182,8 @@ void PIC16Cloner::CloneAutos(Function *F) {
// Add these new globals to module's globals list.
Globals.push_back(ClonedGV);
- // Update ValueMap.
- ValueMap[GV] = ClonedGV;
+ // Update VMap.
+ VMap[GV] = ClonedGV;
}
}
}
@@ -236,10 +236,10 @@ void PIC16Cloner::cloneSharedFunctions(CallGraphNode *CGN) {
}
// Clone the given function and return it.
-// Note: it uses the ValueMap member of the class, which is already populated
+// Note: it uses the VMap member of the class, which is already populated
// by cloneAutos by the time we reach here.
-// FIXME: Should we just pass ValueMap's ref as a parameter here? rather
-// than keeping the ValueMap as a member.
+// FIXME: Should we just pass VMap's ref as a parameter here? rather
+// than keeping the VMap as a member.
Function *
PIC16Cloner::cloneFunction(Function *OrgF) {
Function *ClonedF;
@@ -252,11 +252,11 @@ PIC16Cloner::cloneFunction(Function *OrgF) {
}
// Clone does not exist.
- // First clone the autos, and populate ValueMap.
+ // First clone the autos, and populate VMap.
CloneAutos(OrgF);
// Now create the clone.
- ClonedF = CloneFunction(OrgF, ValueMap);
+ ClonedF = CloneFunction(OrgF, VMap);
// The new function should be for interrupt line. Therefore should have
// the name suffixed with IL and section attribute marked with IL.
diff --git a/lib/Target/PIC16/PIC16Passes/PIC16Cloner.h b/lib/Target/PIC16/PIC16Passes/PIC16Cloner.h
index 24c11527b03c..e8b5aa45cdca 100644
--- a/lib/Target/PIC16/PIC16Passes/PIC16Cloner.h
+++ b/lib/Target/PIC16/PIC16Passes/PIC16Cloner.h
@@ -15,7 +15,7 @@
#ifndef PIC16CLONER_H
#define PIC16CLONER_H
-#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/ValueMap.h"
using namespace llvm;
using std::vector;
@@ -72,7 +72,7 @@ namespace llvm {
// the corresponding cloned auto variable of the cloned function.
// This value map is passed during the function cloning so that all the
// uses of auto variables be updated properly.
- DenseMap<const Value*, Value*> ValueMap;
+ ValueMap<const Value*, Value*> VMap;
// Map of a already cloned functions.
map<Function *, Function *> ClonedFunctionMap;
diff --git a/lib/Target/PIC16/PIC16RegisterInfo.cpp b/lib/Target/PIC16/PIC16RegisterInfo.cpp
index 30a1d4a25d3b..dff98d12c2ae 100644
--- a/lib/Target/PIC16/PIC16RegisterInfo.cpp
+++ b/lib/Target/PIC16/PIC16RegisterInfo.cpp
@@ -35,13 +35,6 @@ getCalleeSavedRegs(const MachineFunction *MF) const {
return CalleeSavedRegs;
}
-// PIC16 Callee Saved Reg Classes
-const TargetRegisterClass* const*
-PIC16RegisterInfo::getCalleeSavedRegClasses(const MachineFunction *MF) const {
- static const TargetRegisterClass * const CalleeSavedRegClasses[] = { 0 };
- return CalleeSavedRegClasses;
-}
-
BitVector PIC16RegisterInfo::getReservedRegs(const MachineFunction &MF) const {
BitVector Reserved(getNumRegs());
return Reserved;
diff --git a/lib/Target/PIC16/PIC16RegisterInfo.h b/lib/Target/PIC16/PIC16RegisterInfo.h
index 6a9a038feda9..5536a617d2be 100644
--- a/lib/Target/PIC16/PIC16RegisterInfo.h
+++ b/lib/Target/PIC16/PIC16RegisterInfo.h
@@ -41,10 +41,6 @@ class PIC16RegisterInfo : public PIC16GenRegisterInfo {
virtual const unsigned*
getCalleeSavedRegs(const MachineFunction *MF = 0) const;
- // PIC16 callee saved register classes
- virtual const TargetRegisterClass* const *
- getCalleeSavedRegClasses(const MachineFunction *MF) const;
-
virtual BitVector getReservedRegs(const MachineFunction &MF) const;
virtual bool hasFP(const MachineFunction &MF) const;
diff --git a/lib/Target/PowerPC/PPCHazardRecognizers.cpp b/lib/Target/PowerPC/PPCHazardRecognizers.cpp
index 66dfd4b73794..db11fdeb7c1e 100644
--- a/lib/Target/PowerPC/PPCHazardRecognizers.cpp
+++ b/lib/Target/PowerPC/PPCHazardRecognizers.cpp
@@ -78,7 +78,7 @@ PPCHazardRecognizer970::GetInstrType(unsigned Opcode,
isLoad = TID.mayLoad();
isStore = TID.mayStore();
- unsigned TSFlags = TID.TSFlags;
+ uint64_t TSFlags = TID.TSFlags;
isFirst = TSFlags & PPCII::PPC970_First;
isSingle = TSFlags & PPCII::PPC970_Single;
diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp
index 10b516aa133a..d47d989b34c0 100644
--- a/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -1203,11 +1203,11 @@ SDValue PPCTargetLowering::LowerGlobalAddress(SDValue Op,
SelectionDAG &DAG) const {
EVT PtrVT = Op.getValueType();
GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op);
- const GlobalValue *GV = GSDN->getGlobal();
- SDValue GA = DAG.getTargetGlobalAddress(GV, PtrVT, GSDN->getOffset());
- SDValue Zero = DAG.getConstant(0, PtrVT);
// FIXME there isn't really any debug info here
DebugLoc dl = GSDN->getDebugLoc();
+ const GlobalValue *GV = GSDN->getGlobal();
+ SDValue GA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, GSDN->getOffset());
+ SDValue Zero = DAG.getConstant(0, PtrVT);
const TargetMachine &TM = DAG.getTarget();
@@ -1631,7 +1631,7 @@ PPCTargetLowering::LowerFormalArguments_SVR4(
unsigned ArgSize = VA.getLocVT().getSizeInBits() / 8;
int FI = MFI->CreateFixedObject(ArgSize, VA.getLocMemOffset(),
- isImmutable, false);
+ isImmutable);
// Create load nodes to retrieve arguments from the stack.
SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
@@ -1700,8 +1700,7 @@ PPCTargetLowering::LowerFormalArguments_SVR4(
FuncInfo->setVarArgsStackOffset(
MFI->CreateFixedObject(PtrVT.getSizeInBits()/8,
- CCInfo.getNextStackOffset(),
- true, false));
+ CCInfo.getNextStackOffset(), true));
FuncInfo->setVarArgsFrameIndex(MFI->CreateStackObject(Depth, 8, false));
SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
@@ -1911,7 +1910,7 @@ PPCTargetLowering::LowerFormalArguments_Darwin(
CurArgOffset = CurArgOffset + (4 - ObjSize);
}
// The value of the object is its address.
- int FI = MFI->CreateFixedObject(ObjSize, CurArgOffset, true, false);
+ int FI = MFI->CreateFixedObject(ObjSize, CurArgOffset, true);
SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
InVals.push_back(FIN);
if (ObjSize==1 || ObjSize==2) {
@@ -1936,7 +1935,7 @@ PPCTargetLowering::LowerFormalArguments_Darwin(
// the object.
if (GPR_idx != Num_GPR_Regs) {
unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass);
- int FI = MFI->CreateFixedObject(PtrByteSize, ArgOffset, true, false);
+ int FI = MFI->CreateFixedObject(PtrByteSize, ArgOffset, true);
SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, NULL, 0,
@@ -2062,7 +2061,7 @@ PPCTargetLowering::LowerFormalArguments_Darwin(
if (needsLoad) {
int FI = MFI->CreateFixedObject(ObjSize,
CurArgOffset + (ArgSize - ObjSize),
- isImmutable, false);
+ isImmutable);
SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, NULL, 0,
false, false, 0);
@@ -2097,7 +2096,7 @@ PPCTargetLowering::LowerFormalArguments_Darwin(
FuncInfo->setVarArgsFrameIndex(
MFI->CreateFixedObject(PtrVT.getSizeInBits()/8,
- Depth, true, false));
+ Depth, true));
SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
// If this function is vararg, store any remaining integer argument regs
@@ -2137,6 +2136,7 @@ CalculateParameterAndLinkageAreaSize(SelectionDAG &DAG,
unsigned CC,
const SmallVectorImpl<ISD::OutputArg>
&Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
unsigned &nAltivecParamsAtEnd) {
// Count how many bytes are to be pushed on the stack, including the linkage
// area, and parameter passing area. We start with 24/48 bytes, which is
@@ -2153,9 +2153,9 @@ CalculateParameterAndLinkageAreaSize(SelectionDAG &DAG,
// 16-byte aligned.
nAltivecParamsAtEnd = 0;
for (unsigned i = 0; i != NumOps; ++i) {
- SDValue Arg = Outs[i].Val;
+ SDValue Arg = OutVals[i];
ISD::ArgFlagsTy Flags = Outs[i].Flags;
- EVT ArgVT = Arg.getValueType();
+ EVT ArgVT = Outs[i].VT;
// Varargs Altivec parameters are padded to a 16 byte boundary.
if (ArgVT==MVT::v4f32 || ArgVT==MVT::v4i32 ||
ArgVT==MVT::v8i16 || ArgVT==MVT::v16i8) {
@@ -2314,8 +2314,7 @@ static SDValue EmitTailCallStoreFPAndRetAddr(SelectionDAG &DAG,
int NewRetAddrLoc = SPDiff + PPCFrameInfo::getReturnSaveOffset(isPPC64,
isDarwinABI);
int NewRetAddr = MF.getFrameInfo()->CreateFixedObject(SlotSize,
- NewRetAddrLoc,
- true, false);
+ NewRetAddrLoc, true);
EVT VT = isPPC64 ? MVT::i64 : MVT::i32;
SDValue NewRetAddrFrIdx = DAG.getFrameIndex(NewRetAddr, VT);
Chain = DAG.getStore(Chain, dl, OldRetAddr, NewRetAddrFrIdx,
@@ -2328,7 +2327,7 @@ static SDValue EmitTailCallStoreFPAndRetAddr(SelectionDAG &DAG,
int NewFPLoc =
SPDiff + PPCFrameInfo::getFramePointerSaveOffset(isPPC64, isDarwinABI);
int NewFPIdx = MF.getFrameInfo()->CreateFixedObject(SlotSize, NewFPLoc,
- true, false);
+ true);
SDValue NewFramePtrIdx = DAG.getFrameIndex(NewFPIdx, VT);
Chain = DAG.getStore(Chain, dl, OldFP, NewFramePtrIdx,
PseudoSourceValue::getFixedStack(NewFPIdx), 0,
@@ -2346,7 +2345,7 @@ CalculateTailCallArgDest(SelectionDAG &DAG, MachineFunction &MF, bool isPPC64,
SmallVector<TailCallArgumentInfo, 8>& TailCallArguments) {
int Offset = ArgOffset + SPDiff;
uint32_t OpSize = (Arg.getValueType().getSizeInBits()+7)/8;
- int FI = MF.getFrameInfo()->CreateFixedObject(OpSize, Offset, true,false);
+ int FI = MF.getFrameInfo()->CreateFixedObject(OpSize, Offset, true);
EVT VT = isPPC64 ? MVT::i64 : MVT::i32;
SDValue FIN = DAG.getFrameIndex(FI, VT);
TailCallArgumentInfo Info;
@@ -2472,7 +2471,8 @@ unsigned PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag,
// direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
// node so that legalize doesn't hack it.
if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee))
- Callee = DAG.getTargetGlobalAddress(G->getGlobal(), Callee.getValueType());
+ Callee = DAG.getTargetGlobalAddress(G->getGlobal(), dl,
+ Callee.getValueType());
else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee))
Callee = DAG.getTargetExternalSymbol(S->getSymbol(), Callee.getValueType());
else if (SDNode *Dest = isBLACompatibleAddress(Callee, DAG))
@@ -2705,6 +2705,7 @@ PPCTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
CallingConv::ID CallConv, bool isVarArg,
bool &isTailCall,
const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
const SmallVectorImpl<ISD::InputArg> &Ins,
DebugLoc dl, SelectionDAG &DAG,
SmallVectorImpl<SDValue> &InVals) const {
@@ -2714,11 +2715,11 @@ PPCTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
if (PPCSubTarget.isSVR4ABI() && !PPCSubTarget.isPPC64()) {
return LowerCall_SVR4(Chain, Callee, CallConv, isVarArg,
- isTailCall, Outs, Ins,
+ isTailCall, Outs, OutVals, Ins,
dl, DAG, InVals);
} else {
return LowerCall_Darwin(Chain, Callee, CallConv, isVarArg,
- isTailCall, Outs, Ins,
+ isTailCall, Outs, OutVals, Ins,
dl, DAG, InVals);
}
}
@@ -2728,6 +2729,7 @@ PPCTargetLowering::LowerCall_SVR4(SDValue Chain, SDValue Callee,
CallingConv::ID CallConv, bool isVarArg,
bool isTailCall,
const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
const SmallVectorImpl<ISD::InputArg> &Ins,
DebugLoc dl, SelectionDAG &DAG,
SmallVectorImpl<SDValue> &InVals) const {
@@ -2737,7 +2739,6 @@ PPCTargetLowering::LowerCall_SVR4(SDValue Chain, SDValue Callee,
assert((CallConv == CallingConv::C ||
CallConv == CallingConv::Fast) && "Unknown calling convention!");
- EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
unsigned PtrByteSize = 4;
MachineFunction &MF = DAG.getMachineFunction();
@@ -2769,7 +2770,7 @@ PPCTargetLowering::LowerCall_SVR4(SDValue Chain, SDValue Callee,
unsigned NumArgs = Outs.size();
for (unsigned i = 0; i != NumArgs; ++i) {
- EVT ArgVT = Outs[i].Val.getValueType();
+ EVT ArgVT = Outs[i].VT;
ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
bool Result;
@@ -2838,7 +2839,7 @@ PPCTargetLowering::LowerCall_SVR4(SDValue Chain, SDValue Callee,
i != e;
++i) {
CCValAssign &VA = ArgLocs[i];
- SDValue Arg = Outs[i].Val;
+ SDValue Arg = OutVals[i];
ISD::ArgFlagsTy Flags = Outs[i].Flags;
if (Flags.isByVal()) {
@@ -2934,6 +2935,7 @@ PPCTargetLowering::LowerCall_Darwin(SDValue Chain, SDValue Callee,
CallingConv::ID CallConv, bool isVarArg,
bool isTailCall,
const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
const SmallVectorImpl<ISD::InputArg> &Ins,
DebugLoc dl, SelectionDAG &DAG,
SmallVectorImpl<SDValue> &InVals) const {
@@ -2961,7 +2963,7 @@ PPCTargetLowering::LowerCall_Darwin(SDValue Chain, SDValue Callee,
// prereserved space for [SP][CR][LR][3 x unused].
unsigned NumBytes =
CalculateParameterAndLinkageAreaSize(DAG, isPPC64, isVarArg, CallConv,
- Outs,
+ Outs, OutVals,
nAltivecParamsAtEnd);
// Calculate by how many bytes the stack has to be adjusted in case of tail
@@ -3025,7 +3027,7 @@ PPCTargetLowering::LowerCall_Darwin(SDValue Chain, SDValue Callee,
SmallVector<SDValue, 8> MemOpChains;
for (unsigned i = 0; i != NumOps; ++i) {
- SDValue Arg = Outs[i].Val;
+ SDValue Arg = OutVals[i];
ISD::ArgFlagsTy Flags = Outs[i].Flags;
// PtrOff will be used to store the current argument to the stack if a
@@ -3051,7 +3053,7 @@ PPCTargetLowering::LowerCall_Darwin(SDValue Chain, SDValue Callee,
// Everything else is passed left-justified.
EVT VT = (Size==1) ? MVT::i8 : MVT::i16;
if (GPR_idx != NumGPRs) {
- SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, PtrVT, Chain, Arg,
+ SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, PtrVT, dl, Chain, Arg,
NULL, 0, VT, false, false, 0);
MemOpChains.push_back(Load.getValue(1));
RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
@@ -3228,8 +3230,8 @@ PPCTargetLowering::LowerCall_Darwin(SDValue Chain, SDValue Callee,
ArgOffset = ((ArgOffset+15)/16)*16;
ArgOffset += 12*16;
for (unsigned i = 0; i != NumOps; ++i) {
- SDValue Arg = Outs[i].Val;
- EVT ArgType = Arg.getValueType();
+ SDValue Arg = OutVals[i];
+ EVT ArgType = Outs[i].VT;
if (ArgType==MVT::v4f32 || ArgType==MVT::v4i32 ||
ArgType==MVT::v8i16 || ArgType==MVT::v16i8) {
if (++j > NumVRs) {
@@ -3297,6 +3299,7 @@ SDValue
PPCTargetLowering::LowerReturn(SDValue Chain,
CallingConv::ID CallConv, bool isVarArg,
const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
DebugLoc dl, SelectionDAG &DAG) const {
SmallVector<CCValAssign, 16> RVLocs;
@@ -3318,7 +3321,7 @@ PPCTargetLowering::LowerReturn(SDValue Chain,
CCValAssign &VA = RVLocs[i];
assert(VA.isRegLoc() && "Can only return in registers!");
Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
- Outs[i].Val, Flag);
+ OutVals[i], Flag);
Flag = Chain.getValue(1);
}
@@ -3376,8 +3379,7 @@ PPCTargetLowering::getReturnAddrFrameIndex(SelectionDAG & DAG) const {
// Find out what the fix offset of the frame pointer save area.
int LROffset = PPCFrameInfo::getReturnSaveOffset(isPPC64, isDarwinABI);
// Allocate the frame index for frame pointer save area.
- RASI = MF.getFrameInfo()->CreateFixedObject(isPPC64? 8 : 4, LROffset,
- true, false);
+ RASI = MF.getFrameInfo()->CreateFixedObject(isPPC64? 8 : 4, LROffset, true);
// Save the result.
FI->setReturnAddrSaveIndex(RASI);
}
@@ -3403,8 +3405,7 @@ PPCTargetLowering::getFramePointerFrameIndex(SelectionDAG & DAG) const {
isDarwinABI);
// Allocate the frame index for frame pointer save area.
- FPSI = MF.getFrameInfo()->CreateFixedObject(isPPC64? 8 : 4, FPOffset,
- true, false);
+ FPSI = MF.getFrameInfo()->CreateFixedObject(isPPC64? 8 : 4, FPOffset, true);
// Save the result.
FI->setFramePointerSaveIndex(FPSI);
}
@@ -4518,7 +4519,10 @@ PPCTargetLowering::EmitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB,
MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
F->insert(It, loopMBB);
F->insert(It, exitMBB);
- exitMBB->transferSuccessors(BB);
+ exitMBB->splice(exitMBB->begin(), BB,
+ llvm::next(MachineBasicBlock::iterator(MI)),
+ BB->end());
+ exitMBB->transferSuccessorsAndUpdatePHIs(BB);
MachineRegisterInfo &RegInfo = F->getRegInfo();
unsigned TmpReg = (!BinOpcode) ? incr :
@@ -4583,7 +4587,10 @@ PPCTargetLowering::EmitPartwordAtomicBinary(MachineInstr *MI,
MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
F->insert(It, loopMBB);
F->insert(It, exitMBB);
- exitMBB->transferSuccessors(BB);
+ exitMBB->splice(exitMBB->begin(), BB,
+ llvm::next(MachineBasicBlock::iterator(MI)),
+ BB->end());
+ exitMBB->transferSuccessorsAndUpdatePHIs(BB);
MachineRegisterInfo &RegInfo = F->getRegInfo();
const TargetRegisterClass *RC =
@@ -4716,23 +4723,22 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
unsigned SelectPred = MI->getOperand(4).getImm();
DebugLoc dl = MI->getDebugLoc();
- BuildMI(BB, dl, TII->get(PPC::BCC))
- .addImm(SelectPred).addReg(MI->getOperand(1).getReg()).addMBB(sinkMBB);
F->insert(It, copy0MBB);
F->insert(It, sinkMBB);
- // Update machine-CFG edges by first adding all successors of the current
- // block to the new block which will contain the Phi node for the select.
- for (MachineBasicBlock::succ_iterator I = BB->succ_begin(),
- E = BB->succ_end(); I != E; ++I)
- sinkMBB->addSuccessor(*I);
- // Next, remove all successors of the current block, and add the true
- // and fallthrough blocks as its successors.
- while (!BB->succ_empty())
- BB->removeSuccessor(BB->succ_begin());
+
+ // Transfer the remainder of BB and its successor edges to sinkMBB.
+ sinkMBB->splice(sinkMBB->begin(), BB,
+ llvm::next(MachineBasicBlock::iterator(MI)),
+ BB->end());
+ sinkMBB->transferSuccessorsAndUpdatePHIs(BB);
+
// Next, add the true and fallthrough blocks as its successors.
BB->addSuccessor(copy0MBB);
BB->addSuccessor(sinkMBB);
+ BuildMI(BB, dl, TII->get(PPC::BCC))
+ .addImm(SelectPred).addReg(MI->getOperand(1).getReg()).addMBB(sinkMBB);
+
// copy0MBB:
// %FalseValue = ...
// # fallthrough to sinkMBB
@@ -4745,7 +4751,8 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
// %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ]
// ...
BB = sinkMBB;
- BuildMI(BB, dl, TII->get(PPC::PHI), MI->getOperand(0).getReg())
+ BuildMI(*BB, BB->begin(), dl,
+ TII->get(PPC::PHI), MI->getOperand(0).getReg())
.addReg(MI->getOperand(3).getReg()).addMBB(copy0MBB)
.addReg(MI->getOperand(2).getReg()).addMBB(thisMBB);
}
@@ -4831,7 +4838,10 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
F->insert(It, loop2MBB);
F->insert(It, midMBB);
F->insert(It, exitMBB);
- exitMBB->transferSuccessors(BB);
+ exitMBB->splice(exitMBB->begin(), BB,
+ llvm::next(MachineBasicBlock::iterator(MI)),
+ BB->end());
+ exitMBB->transferSuccessorsAndUpdatePHIs(BB);
// thisMBB:
// ...
@@ -4899,7 +4909,10 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
F->insert(It, loop2MBB);
F->insert(It, midMBB);
F->insert(It, exitMBB);
- exitMBB->transferSuccessors(BB);
+ exitMBB->splice(exitMBB->begin(), BB,
+ llvm::next(MachineBasicBlock::iterator(MI)),
+ BB->end());
+ exitMBB->transferSuccessorsAndUpdatePHIs(BB);
MachineRegisterInfo &RegInfo = F->getRegInfo();
const TargetRegisterClass *RC =
@@ -5025,7 +5038,7 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
llvm_unreachable("Unexpected instr type to insert");
}
- F->DeleteMachineInstr(MI); // The pseudo instruction is gone now.
+ MI->eraseFromParent(); // The pseudo instruction is gone now.
return BB;
}
@@ -5042,19 +5055,19 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
default: break;
case PPCISD::SHL:
if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(0))) {
- if (C->getZExtValue() == 0) // 0 << V -> 0.
+ if (C->isNullValue()) // 0 << V -> 0.
return N->getOperand(0);
}
break;
case PPCISD::SRL:
if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(0))) {
- if (C->getZExtValue() == 0) // 0 >>u V -> 0.
+ if (C->isNullValue()) // 0 >>u V -> 0.
return N->getOperand(0);
}
break;
case PPCISD::SRA:
if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(0))) {
- if (C->getZExtValue() == 0 || // 0 >>s V -> 0.
+ if (C->isNullValue() || // 0 >>s V -> 0.
C->isAllOnesValue()) // -1 >>s V -> -1.
return N->getOperand(0);
}
@@ -5380,11 +5393,8 @@ PPCTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
/// LowerAsmOperandForConstraint - Lower the specified operand into the Ops
-/// vector. If it is invalid, don't add anything to Ops. If hasMemory is true
-/// it means one of the asm constraint of the inline asm instruction being
-/// processed is 'm'.
+/// vector. If it is invalid, don't add anything to Ops.
void PPCTargetLowering::LowerAsmOperandForConstraint(SDValue Op, char Letter,
- bool hasMemory,
std::vector<SDValue>&Ops,
SelectionDAG &DAG) const {
SDValue Result(0,0);
@@ -5443,7 +5453,7 @@ void PPCTargetLowering::LowerAsmOperandForConstraint(SDValue Op, char Letter,
}
// Handle standard constraint letters.
- TargetLowering::LowerAsmOperandForConstraint(Op, Letter, hasMemory, Ops, DAG);
+ TargetLowering::LowerAsmOperandForConstraint(Op, Letter, Ops, DAG);
}
// isLegalAddressingMode - Return true if the addressing mode represented
diff --git a/lib/Target/PowerPC/PPCISelLowering.h b/lib/Target/PowerPC/PPCISelLowering.h
index 6dcaf1e1a2c7..700816f5a129 100644
--- a/lib/Target/PowerPC/PPCISelLowering.h
+++ b/lib/Target/PowerPC/PPCISelLowering.h
@@ -318,12 +318,9 @@ namespace llvm {
unsigned getByValTypeAlignment(const Type *Ty) const;
/// LowerAsmOperandForConstraint - Lower the specified operand into the Ops
- /// vector. If it is invalid, don't add anything to Ops. If hasMemory is
- /// true it means one of the asm constraint of the inline asm instruction
- /// being processed is 'm'.
+ /// vector. If it is invalid, don't add anything to Ops.
virtual void LowerAsmOperandForConstraint(SDValue Op,
char ConstraintLetter,
- bool hasMemory,
std::vector<SDValue> &Ops,
SelectionDAG &DAG) const;
@@ -438,6 +435,7 @@ namespace llvm {
LowerCall(SDValue Chain, SDValue Callee,
CallingConv::ID CallConv, bool isVarArg, bool &isTailCall,
const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
const SmallVectorImpl<ISD::InputArg> &Ins,
DebugLoc dl, SelectionDAG &DAG,
SmallVectorImpl<SDValue> &InVals) const;
@@ -446,6 +444,7 @@ namespace llvm {
LowerReturn(SDValue Chain,
CallingConv::ID CallConv, bool isVarArg,
const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
DebugLoc dl, SelectionDAG &DAG) const;
SDValue
@@ -465,6 +464,7 @@ namespace llvm {
LowerCall_Darwin(SDValue Chain, SDValue Callee,
CallingConv::ID CallConv, bool isVarArg, bool isTailCall,
const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
const SmallVectorImpl<ISD::InputArg> &Ins,
DebugLoc dl, SelectionDAG &DAG,
SmallVectorImpl<SDValue> &InVals) const;
@@ -472,6 +472,7 @@ namespace llvm {
LowerCall_SVR4(SDValue Chain, SDValue Callee,
CallingConv::ID CallConv, bool isVarArg, bool isTailCall,
const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
const SmallVectorImpl<ISD::InputArg> &Ins,
DebugLoc dl, SelectionDAG &DAG,
SmallVectorImpl<SDValue> &InVals) const;
diff --git a/lib/Target/PowerPC/PPCInstrInfo.cpp b/lib/Target/PowerPC/PPCInstrInfo.cpp
index 1b7a7783b23c..1574aa3fb23a 100644
--- a/lib/Target/PowerPC/PPCInstrInfo.cpp
+++ b/lib/Target/PowerPC/PPCInstrInfo.cpp
@@ -316,9 +316,8 @@ unsigned PPCInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
unsigned
PPCInstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
MachineBasicBlock *FBB,
- const SmallVectorImpl<MachineOperand> &Cond) const {
- // FIXME this should probably have a DebugLoc argument
- DebugLoc dl;
+ const SmallVectorImpl<MachineOperand> &Cond,
+ DebugLoc DL) const {
// Shouldn't be a fall through.
assert(TBB && "InsertBranch must not be told to insert a fallthrough");
assert((Cond.size() == 2 || Cond.size() == 0) &&
@@ -327,50 +326,46 @@ PPCInstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
// One-way branch.
if (FBB == 0) {
if (Cond.empty()) // Unconditional branch
- BuildMI(&MBB, dl, get(PPC::B)).addMBB(TBB);
+ BuildMI(&MBB, DL, get(PPC::B)).addMBB(TBB);
else // Conditional branch
- BuildMI(&MBB, dl, get(PPC::BCC))
+ BuildMI(&MBB, DL, get(PPC::BCC))
.addImm(Cond[0].getImm()).addReg(Cond[1].getReg()).addMBB(TBB);
return 1;
}
// Two-way Conditional Branch.
- BuildMI(&MBB, dl, get(PPC::BCC))
+ BuildMI(&MBB, DL, get(PPC::BCC))
.addImm(Cond[0].getImm()).addReg(Cond[1].getReg()).addMBB(TBB);
- BuildMI(&MBB, dl, get(PPC::B)).addMBB(FBB);
+ BuildMI(&MBB, DL, get(PPC::B)).addMBB(FBB);
return 2;
}
-bool PPCInstrInfo::copyRegToReg(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MI,
- unsigned DestReg, unsigned SrcReg,
- const TargetRegisterClass *DestRC,
- const TargetRegisterClass *SrcRC,
- DebugLoc DL) const {
- if (DestRC != SrcRC) {
- // Not yet supported!
- return false;
- }
-
- if (DestRC == PPC::GPRCRegisterClass) {
- BuildMI(MBB, MI, DL, get(PPC::OR), DestReg).addReg(SrcReg).addReg(SrcReg);
- } else if (DestRC == PPC::G8RCRegisterClass) {
- BuildMI(MBB, MI, DL, get(PPC::OR8), DestReg).addReg(SrcReg).addReg(SrcReg);
- } else if (DestRC == PPC::F4RCRegisterClass ||
- DestRC == PPC::F8RCRegisterClass) {
- BuildMI(MBB, MI, DL, get(PPC::FMR), DestReg).addReg(SrcReg);
- } else if (DestRC == PPC::CRRCRegisterClass) {
- BuildMI(MBB, MI, DL, get(PPC::MCRF), DestReg).addReg(SrcReg);
- } else if (DestRC == PPC::VRRCRegisterClass) {
- BuildMI(MBB, MI, DL, get(PPC::VOR), DestReg).addReg(SrcReg).addReg(SrcReg);
- } else if (DestRC == PPC::CRBITRCRegisterClass) {
- BuildMI(MBB, MI, DL, get(PPC::CROR), DestReg).addReg(SrcReg).addReg(SrcReg);
- } else {
- // Attempt to copy register that is not GPR or FPR
- return false;
- }
-
- return true;
+void PPCInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I, DebugLoc DL,
+ unsigned DestReg, unsigned SrcReg,
+ bool KillSrc) const {
+ unsigned Opc;
+ if (PPC::GPRCRegClass.contains(DestReg, SrcReg))
+ Opc = PPC::OR;
+ else if (PPC::G8RCRegClass.contains(DestReg, SrcReg))
+ Opc = PPC::OR8;
+ else if (PPC::F4RCRegClass.contains(DestReg, SrcReg))
+ Opc = PPC::FMR;
+ else if (PPC::CRRCRegClass.contains(DestReg, SrcReg))
+ Opc = PPC::MCRF;
+ else if (PPC::VRRCRegClass.contains(DestReg, SrcReg))
+ Opc = PPC::VOR;
+ else if (PPC::CRBITRCRegClass.contains(DestReg, SrcReg))
+ Opc = PPC::CROR;
+ else
+ llvm_unreachable("Impossible reg-to-reg copy");
+
+ const TargetInstrDesc &TID = get(Opc);
+ if (TID.getNumOperands() == 3)
+ BuildMI(MBB, I, DL, TID, DestReg)
+ .addReg(SrcReg).addReg(SrcReg, getKillRegState(KillSrc));
+ else
+ BuildMI(MBB, I, DL, TID, DestReg).addReg(SrcReg, getKillRegState(KillSrc));
}
bool
@@ -654,121 +649,6 @@ PPCInstrInfo::emitFrameIndexDebugValue(MachineFunction &MF,
return &*MIB;
}
-/// foldMemoryOperand - PowerPC (like most RISC's) can only fold spills into
-/// copy instructions, turning them into load/store instructions.
-MachineInstr *PPCInstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
- MachineInstr *MI,
- const SmallVectorImpl<unsigned> &Ops,
- int FrameIndex) const {
- if (Ops.size() != 1) return NULL;
-
- // Make sure this is a reg-reg copy. Note that we can't handle MCRF, because
- // it takes more than one instruction to store it.
- unsigned Opc = MI->getOpcode();
- unsigned OpNum = Ops[0];
-
- MachineInstr *NewMI = NULL;
- if ((Opc == PPC::OR &&
- MI->getOperand(1).getReg() == MI->getOperand(2).getReg())) {
- if (OpNum == 0) { // move -> store
- unsigned InReg = MI->getOperand(1).getReg();
- bool isKill = MI->getOperand(1).isKill();
- bool isUndef = MI->getOperand(1).isUndef();
- NewMI = addFrameReference(BuildMI(MF, MI->getDebugLoc(), get(PPC::STW))
- .addReg(InReg,
- getKillRegState(isKill) |
- getUndefRegState(isUndef)),
- FrameIndex);
- } else { // move -> load
- unsigned OutReg = MI->getOperand(0).getReg();
- bool isDead = MI->getOperand(0).isDead();
- bool isUndef = MI->getOperand(0).isUndef();
- NewMI = addFrameReference(BuildMI(MF, MI->getDebugLoc(), get(PPC::LWZ))
- .addReg(OutReg,
- RegState::Define |
- getDeadRegState(isDead) |
- getUndefRegState(isUndef)),
- FrameIndex);
- }
- } else if ((Opc == PPC::OR8 &&
- MI->getOperand(1).getReg() == MI->getOperand(2).getReg())) {
- if (OpNum == 0) { // move -> store
- unsigned InReg = MI->getOperand(1).getReg();
- bool isKill = MI->getOperand(1).isKill();
- bool isUndef = MI->getOperand(1).isUndef();
- NewMI = addFrameReference(BuildMI(MF, MI->getDebugLoc(), get(PPC::STD))
- .addReg(InReg,
- getKillRegState(isKill) |
- getUndefRegState(isUndef)),
- FrameIndex);
- } else { // move -> load
- unsigned OutReg = MI->getOperand(0).getReg();
- bool isDead = MI->getOperand(0).isDead();
- bool isUndef = MI->getOperand(0).isUndef();
- NewMI = addFrameReference(BuildMI(MF, MI->getDebugLoc(), get(PPC::LD))
- .addReg(OutReg,
- RegState::Define |
- getDeadRegState(isDead) |
- getUndefRegState(isUndef)),
- FrameIndex);
- }
- } else if (Opc == PPC::FMR || Opc == PPC::FMRSD) {
- // The register may be F4RC or F8RC, and that determines the memory op.
- unsigned OrigReg = MI->getOperand(OpNum).getReg();
- // We cannot tell the register class from a physreg alone.
- if (TargetRegisterInfo::isPhysicalRegister(OrigReg))
- return NULL;
- const TargetRegisterClass *RC = MF.getRegInfo().getRegClass(OrigReg);
- const bool is64 = RC == PPC::F8RCRegisterClass;
-
- if (OpNum == 0) { // move -> store
- unsigned InReg = MI->getOperand(1).getReg();
- bool isKill = MI->getOperand(1).isKill();
- bool isUndef = MI->getOperand(1).isUndef();
- NewMI = addFrameReference(BuildMI(MF, MI->getDebugLoc(),
- get(is64 ? PPC::STFD : PPC::STFS))
- .addReg(InReg,
- getKillRegState(isKill) |
- getUndefRegState(isUndef)),
- FrameIndex);
- } else { // move -> load
- unsigned OutReg = MI->getOperand(0).getReg();
- bool isDead = MI->getOperand(0).isDead();
- bool isUndef = MI->getOperand(0).isUndef();
- NewMI = addFrameReference(BuildMI(MF, MI->getDebugLoc(),
- get(is64 ? PPC::LFD : PPC::LFS))
- .addReg(OutReg,
- RegState::Define |
- getDeadRegState(isDead) |
- getUndefRegState(isUndef)),
- FrameIndex);
- }
- }
-
- return NewMI;
-}
-
-bool PPCInstrInfo::canFoldMemoryOperand(const MachineInstr *MI,
- const SmallVectorImpl<unsigned> &Ops) const {
- if (Ops.size() != 1) return false;
-
- // Make sure this is a reg-reg copy. Note that we can't handle MCRF, because
- // it takes more than one instruction to store it.
- unsigned Opc = MI->getOpcode();
-
- if ((Opc == PPC::OR &&
- MI->getOperand(1).getReg() == MI->getOperand(2).getReg()))
- return true;
- else if ((Opc == PPC::OR8 &&
- MI->getOperand(1).getReg() == MI->getOperand(2).getReg()))
- return true;
- else if (Opc == PPC::FMR || Opc == PPC::FMRSD)
- return true;
-
- return false;
-}
-
-
bool PPCInstrInfo::
ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const {
assert(Cond.size() == 2 && "Invalid PPC branch opcode!");
diff --git a/lib/Target/PowerPC/PPCInstrInfo.h b/lib/Target/PowerPC/PPCInstrInfo.h
index 7a9e11bd6676..eadb21e21702 100644
--- a/lib/Target/PowerPC/PPCInstrInfo.h
+++ b/lib/Target/PowerPC/PPCInstrInfo.h
@@ -109,13 +109,12 @@ public:
virtual unsigned RemoveBranch(MachineBasicBlock &MBB) const;
virtual unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
MachineBasicBlock *FBB,
- const SmallVectorImpl<MachineOperand> &Cond) const;
- virtual bool copyRegToReg(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MI,
- unsigned DestReg, unsigned SrcReg,
- const TargetRegisterClass *DestRC,
- const TargetRegisterClass *SrcRC,
- DebugLoc DL) const;
+ const SmallVectorImpl<MachineOperand> &Cond,
+ DebugLoc DL) const;
+ virtual void copyPhysReg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I, DebugLoc DL,
+ unsigned DestReg, unsigned SrcReg,
+ bool KillSrc) const;
virtual void storeRegToStackSlot(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI,
@@ -135,23 +134,6 @@ public:
const MDNode *MDPtr,
DebugLoc DL) const;
- /// foldMemoryOperand - PowerPC (like most RISC's) can only fold spills into
- /// copy instructions, turning them into load/store instructions.
- virtual MachineInstr* foldMemoryOperandImpl(MachineFunction &MF,
- MachineInstr* MI,
- const SmallVectorImpl<unsigned> &Ops,
- int FrameIndex) const;
-
- virtual MachineInstr* foldMemoryOperandImpl(MachineFunction &MF,
- MachineInstr* MI,
- const SmallVectorImpl<unsigned> &Ops,
- MachineInstr* LoadMI) const {
- return 0;
- }
-
- virtual bool canFoldMemoryOperand(const MachineInstr *MI,
- const SmallVectorImpl<unsigned> &Ops) const;
-
virtual
bool ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const;
diff --git a/lib/Target/PowerPC/PPCRegisterInfo.cpp b/lib/Target/PowerPC/PPCRegisterInfo.cpp
index 0ff852cf15a1..4d6132a9ec50 100644
--- a/lib/Target/PowerPC/PPCRegisterInfo.cpp
+++ b/lib/Target/PowerPC/PPCRegisterInfo.cpp
@@ -269,140 +269,6 @@ PPCRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
return Subtarget.isPPC64() ? SVR4_64_CalleeSavedRegs : SVR4_CalleeSavedRegs;
}
-const TargetRegisterClass* const*
-PPCRegisterInfo::getCalleeSavedRegClasses(const MachineFunction *MF) const {
- // 32-bit Darwin calling convention.
- static const TargetRegisterClass * const Darwin32_CalleeSavedRegClasses[] = {
- &PPC::GPRCRegClass,&PPC::GPRCRegClass,&PPC::GPRCRegClass,
- &PPC::GPRCRegClass,&PPC::GPRCRegClass,&PPC::GPRCRegClass,&PPC::GPRCRegClass,
- &PPC::GPRCRegClass,&PPC::GPRCRegClass,&PPC::GPRCRegClass,&PPC::GPRCRegClass,
- &PPC::GPRCRegClass,&PPC::GPRCRegClass,&PPC::GPRCRegClass,&PPC::GPRCRegClass,
- &PPC::GPRCRegClass,&PPC::GPRCRegClass,&PPC::GPRCRegClass,&PPC::GPRCRegClass,
-
- &PPC::F8RCRegClass,&PPC::F8RCRegClass,&PPC::F8RCRegClass,&PPC::F8RCRegClass,
- &PPC::F8RCRegClass,&PPC::F8RCRegClass,&PPC::F8RCRegClass,&PPC::F8RCRegClass,
- &PPC::F8RCRegClass,&PPC::F8RCRegClass,&PPC::F8RCRegClass,&PPC::F8RCRegClass,
- &PPC::F8RCRegClass,&PPC::F8RCRegClass,&PPC::F8RCRegClass,&PPC::F8RCRegClass,
- &PPC::F8RCRegClass,&PPC::F8RCRegClass,
-
- &PPC::CRRCRegClass,&PPC::CRRCRegClass,&PPC::CRRCRegClass,
-
- &PPC::VRRCRegClass,&PPC::VRRCRegClass,&PPC::VRRCRegClass,&PPC::VRRCRegClass,
- &PPC::VRRCRegClass,&PPC::VRRCRegClass,&PPC::VRRCRegClass,&PPC::VRRCRegClass,
- &PPC::VRRCRegClass,&PPC::VRRCRegClass,&PPC::VRRCRegClass,&PPC::VRRCRegClass,
-
- &PPC::CRBITRCRegClass,&PPC::CRBITRCRegClass,&PPC::CRBITRCRegClass,
- &PPC::CRBITRCRegClass,
- &PPC::CRBITRCRegClass,&PPC::CRBITRCRegClass,&PPC::CRBITRCRegClass,
- &PPC::CRBITRCRegClass,
- &PPC::CRBITRCRegClass,&PPC::CRBITRCRegClass,&PPC::CRBITRCRegClass,
- &PPC::CRBITRCRegClass,
-
- &PPC::GPRCRegClass, 0
- };
-
- // 32-bit SVR4 calling convention.
- static const TargetRegisterClass * const SVR4_CalleeSavedRegClasses[] = {
- &PPC::GPRCRegClass,&PPC::GPRCRegClass,
- &PPC::GPRCRegClass,&PPC::GPRCRegClass,&PPC::GPRCRegClass,&PPC::GPRCRegClass,
- &PPC::GPRCRegClass,&PPC::GPRCRegClass,&PPC::GPRCRegClass,&PPC::GPRCRegClass,
- &PPC::GPRCRegClass,&PPC::GPRCRegClass,&PPC::GPRCRegClass,&PPC::GPRCRegClass,
- &PPC::GPRCRegClass,&PPC::GPRCRegClass,&PPC::GPRCRegClass,&PPC::GPRCRegClass,
-
- &PPC::F8RCRegClass,&PPC::F8RCRegClass,&PPC::F8RCRegClass,&PPC::F8RCRegClass,
- &PPC::F8RCRegClass,&PPC::F8RCRegClass,&PPC::F8RCRegClass,&PPC::F8RCRegClass,
- &PPC::F8RCRegClass,&PPC::F8RCRegClass,&PPC::F8RCRegClass,&PPC::F8RCRegClass,
- &PPC::F8RCRegClass,&PPC::F8RCRegClass,&PPC::F8RCRegClass,&PPC::F8RCRegClass,
- &PPC::F8RCRegClass,&PPC::F8RCRegClass,
-
- &PPC::CRRCRegClass,&PPC::CRRCRegClass,&PPC::CRRCRegClass,
-
- &PPC::VRSAVERCRegClass,
-
- &PPC::VRRCRegClass,&PPC::VRRCRegClass,&PPC::VRRCRegClass,&PPC::VRRCRegClass,
- &PPC::VRRCRegClass,&PPC::VRRCRegClass,&PPC::VRRCRegClass,&PPC::VRRCRegClass,
- &PPC::VRRCRegClass,&PPC::VRRCRegClass,&PPC::VRRCRegClass,&PPC::VRRCRegClass,
-
- &PPC::CRBITRCRegClass,&PPC::CRBITRCRegClass,&PPC::CRBITRCRegClass,
- &PPC::CRBITRCRegClass,
- &PPC::CRBITRCRegClass,&PPC::CRBITRCRegClass,&PPC::CRBITRCRegClass,
- &PPC::CRBITRCRegClass,
- &PPC::CRBITRCRegClass,&PPC::CRBITRCRegClass,&PPC::CRBITRCRegClass,
- &PPC::CRBITRCRegClass,
-
- 0
- };
-
- // 64-bit Darwin calling convention.
- static const TargetRegisterClass * const Darwin64_CalleeSavedRegClasses[] = {
- &PPC::G8RCRegClass,&PPC::G8RCRegClass,
- &PPC::G8RCRegClass,&PPC::G8RCRegClass,&PPC::G8RCRegClass,&PPC::G8RCRegClass,
- &PPC::G8RCRegClass,&PPC::G8RCRegClass,&PPC::G8RCRegClass,&PPC::G8RCRegClass,
- &PPC::G8RCRegClass,&PPC::G8RCRegClass,&PPC::G8RCRegClass,&PPC::G8RCRegClass,
- &PPC::G8RCRegClass,&PPC::G8RCRegClass,&PPC::G8RCRegClass,&PPC::G8RCRegClass,
-
- &PPC::F8RCRegClass,&PPC::F8RCRegClass,&PPC::F8RCRegClass,&PPC::F8RCRegClass,
- &PPC::F8RCRegClass,&PPC::F8RCRegClass,&PPC::F8RCRegClass,&PPC::F8RCRegClass,
- &PPC::F8RCRegClass,&PPC::F8RCRegClass,&PPC::F8RCRegClass,&PPC::F8RCRegClass,
- &PPC::F8RCRegClass,&PPC::F8RCRegClass,&PPC::F8RCRegClass,&PPC::F8RCRegClass,
- &PPC::F8RCRegClass,&PPC::F8RCRegClass,
-
- &PPC::CRRCRegClass,&PPC::CRRCRegClass,&PPC::CRRCRegClass,
-
- &PPC::VRRCRegClass,&PPC::VRRCRegClass,&PPC::VRRCRegClass,&PPC::VRRCRegClass,
- &PPC::VRRCRegClass,&PPC::VRRCRegClass,&PPC::VRRCRegClass,&PPC::VRRCRegClass,
- &PPC::VRRCRegClass,&PPC::VRRCRegClass,&PPC::VRRCRegClass,&PPC::VRRCRegClass,
-
- &PPC::CRBITRCRegClass,&PPC::CRBITRCRegClass,&PPC::CRBITRCRegClass,
- &PPC::CRBITRCRegClass,
- &PPC::CRBITRCRegClass,&PPC::CRBITRCRegClass,&PPC::CRBITRCRegClass,
- &PPC::CRBITRCRegClass,
- &PPC::CRBITRCRegClass,&PPC::CRBITRCRegClass,&PPC::CRBITRCRegClass,
- &PPC::CRBITRCRegClass,
-
- &PPC::G8RCRegClass, 0
- };
-
- // 64-bit SVR4 calling convention.
- static const TargetRegisterClass * const SVR4_64_CalleeSavedRegClasses[] = {
- &PPC::G8RCRegClass,&PPC::G8RCRegClass,
- &PPC::G8RCRegClass,&PPC::G8RCRegClass,&PPC::G8RCRegClass,&PPC::G8RCRegClass,
- &PPC::G8RCRegClass,&PPC::G8RCRegClass,&PPC::G8RCRegClass,&PPC::G8RCRegClass,
- &PPC::G8RCRegClass,&PPC::G8RCRegClass,&PPC::G8RCRegClass,&PPC::G8RCRegClass,
- &PPC::G8RCRegClass,&PPC::G8RCRegClass,&PPC::G8RCRegClass,&PPC::G8RCRegClass,
-
- &PPC::F8RCRegClass,&PPC::F8RCRegClass,&PPC::F8RCRegClass,&PPC::F8RCRegClass,
- &PPC::F8RCRegClass,&PPC::F8RCRegClass,&PPC::F8RCRegClass,&PPC::F8RCRegClass,
- &PPC::F8RCRegClass,&PPC::F8RCRegClass,&PPC::F8RCRegClass,&PPC::F8RCRegClass,
- &PPC::F8RCRegClass,&PPC::F8RCRegClass,&PPC::F8RCRegClass,&PPC::F8RCRegClass,
- &PPC::F8RCRegClass,&PPC::F8RCRegClass,
-
- &PPC::CRRCRegClass,&PPC::CRRCRegClass,&PPC::CRRCRegClass,
-
- &PPC::VRSAVERCRegClass,
-
- &PPC::VRRCRegClass,&PPC::VRRCRegClass,&PPC::VRRCRegClass,&PPC::VRRCRegClass,
- &PPC::VRRCRegClass,&PPC::VRRCRegClass,&PPC::VRRCRegClass,&PPC::VRRCRegClass,
- &PPC::VRRCRegClass,&PPC::VRRCRegClass,&PPC::VRRCRegClass,&PPC::VRRCRegClass,
-
- &PPC::CRBITRCRegClass,&PPC::CRBITRCRegClass,&PPC::CRBITRCRegClass,
- &PPC::CRBITRCRegClass,
- &PPC::CRBITRCRegClass,&PPC::CRBITRCRegClass,&PPC::CRBITRCRegClass,
- &PPC::CRBITRCRegClass,
- &PPC::CRBITRCRegClass,&PPC::CRBITRCRegClass,&PPC::CRBITRCRegClass,
- &PPC::CRBITRCRegClass,
-
- 0
- };
-
- if (Subtarget.isDarwinABI())
- return Subtarget.isPPC64() ? Darwin64_CalleeSavedRegClasses :
- Darwin32_CalleeSavedRegClasses;
-
- return Subtarget.isPPC64() ? SVR4_64_CalleeSavedRegClasses
- : SVR4_CalleeSavedRegClasses;
-}
-
// needsFP - Return true if the specified function should have a dedicated frame
// pointer register. This is true if the function has variable sized allocas or
// if frame pointer elimination is disabled.
@@ -1060,8 +926,7 @@ PPCRegisterInfo::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
int FPOffset = PPCFrameInfo::getFramePointerSaveOffset(isPPC64,
isDarwinABI);
// Allocate the frame index for frame pointer save area.
- FPSI = MF.getFrameInfo()->CreateFixedObject(isPPC64? 8 : 4, FPOffset,
- true, false);
+ FPSI = MF.getFrameInfo()->CreateFixedObject(isPPC64? 8 : 4, FPOffset, true);
// Save the result.
FI->setFramePointerSaveIndex(FPSI);
}
@@ -1069,8 +934,7 @@ PPCRegisterInfo::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
// Reserve stack space to move the linkage area to in case of a tail call.
int TCSPDelta = 0;
if (GuaranteedTailCallOpt && (TCSPDelta = FI->getTailCallSPDelta()) < 0) {
- MF.getFrameInfo()->CreateFixedObject(-1 * TCSPDelta, TCSPDelta,
- true, false);
+ MF.getFrameInfo()->CreateFixedObject(-1 * TCSPDelta, TCSPDelta, true);
}
// Reserve a slot closest to SP or frame pointer if we have a dynalloc or
@@ -1127,9 +991,7 @@ PPCRegisterInfo::processFunctionBeforeFrameFinalized(MachineFunction &MF)
for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
unsigned Reg = CSI[i].getReg();
- const TargetRegisterClass *RC = CSI[i].getRegClass();
-
- if (RC == PPC::GPRCRegisterClass) {
+ if (PPC::GPRCRegisterClass->contains(Reg)) {
HasGPSaveArea = true;
GPRegs.push_back(CSI[i]);
@@ -1137,7 +999,7 @@ PPCRegisterInfo::processFunctionBeforeFrameFinalized(MachineFunction &MF)
if (Reg < MinGPR) {
MinGPR = Reg;
}
- } else if (RC == PPC::G8RCRegisterClass) {
+ } else if (PPC::G8RCRegisterClass->contains(Reg)) {
HasG8SaveArea = true;
G8Regs.push_back(CSI[i]);
@@ -1145,7 +1007,7 @@ PPCRegisterInfo::processFunctionBeforeFrameFinalized(MachineFunction &MF)
if (Reg < MinG8R) {
MinG8R = Reg;
}
- } else if (RC == PPC::F8RCRegisterClass) {
+ } else if (PPC::F8RCRegisterClass->contains(Reg)) {
HasFPSaveArea = true;
FPRegs.push_back(CSI[i]);
@@ -1154,12 +1016,12 @@ PPCRegisterInfo::processFunctionBeforeFrameFinalized(MachineFunction &MF)
MinFPR = Reg;
}
// FIXME SVR4: Disable CR save area for now.
- } else if ( RC == PPC::CRBITRCRegisterClass
- || RC == PPC::CRRCRegisterClass) {
+ } else if (PPC::CRBITRCRegisterClass->contains(Reg)
+ || PPC::CRRCRegisterClass->contains(Reg)) {
// HasCRSaveArea = true;
- } else if (RC == PPC::VRSAVERCRegisterClass) {
+ } else if (PPC::VRSAVERCRegisterClass->contains(Reg)) {
HasVRSAVESaveArea = true;
- } else if (RC == PPC::VRRCRegisterClass) {
+ } else if (PPC::VRRCRegisterClass->contains(Reg)) {
HasVRSaveArea = true;
VRegs.push_back(CSI[i]);
@@ -1240,9 +1102,10 @@ PPCRegisterInfo::processFunctionBeforeFrameFinalized(MachineFunction &MF)
// which have the CR/CRBIT register class?
// Adjust the frame index of the CR spill slot.
for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
- const TargetRegisterClass *RC = CSI[i].getRegClass();
+ unsigned Reg = CSI[i].getReg();
- if (RC == PPC::CRBITRCRegisterClass || RC == PPC::CRRCRegisterClass) {
+ if (PPC::CRBITRCRegisterClass->contains(Reg) ||
+ PPC::CRRCRegisterClass->contains(Reg)) {
int FI = CSI[i].getFrameIdx();
FFI->setObjectOffset(FI, LowerBound + FFI->getObjectOffset(FI));
@@ -1257,9 +1120,9 @@ PPCRegisterInfo::processFunctionBeforeFrameFinalized(MachineFunction &MF)
// which have the VRSAVE register class?
// Adjust the frame index of the VRSAVE spill slot.
for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
- const TargetRegisterClass *RC = CSI[i].getRegClass();
+ unsigned Reg = CSI[i].getReg();
- if (RC == PPC::VRSAVERCRegisterClass) {
+ if (PPC::VRSAVERCRegisterClass->contains(Reg)) {
int FI = CSI[i].getFrameIdx();
FFI->setObjectOffset(FI, LowerBound + FFI->getObjectOffset(FI));
@@ -1762,4 +1625,3 @@ int PPCRegisterInfo::getDwarfRegNum(unsigned RegNum, bool isEH) const {
}
#include "PPCGenRegisterInfo.inc"
-
diff --git a/lib/Target/PowerPC/PPCRegisterInfo.h b/lib/Target/PowerPC/PPCRegisterInfo.h
index 43cf53546bdb..f026847a540b 100644
--- a/lib/Target/PowerPC/PPCRegisterInfo.h
+++ b/lib/Target/PowerPC/PPCRegisterInfo.h
@@ -42,9 +42,6 @@ public:
/// Code Generation virtual methods...
const unsigned *getCalleeSavedRegs(const MachineFunction* MF = 0) const;
- const TargetRegisterClass* const*
- getCalleeSavedRegClasses(const MachineFunction *MF = 0) const;
-
BitVector getReservedRegs(const MachineFunction &MF) const;
/// targetHandlesStackFrameRounding - Returns true if the target is
diff --git a/lib/Target/README.txt b/lib/Target/README.txt
index 7fa73edaefee..4d7ee08de1de 100644
--- a/lib/Target/README.txt
+++ b/lib/Target/README.txt
@@ -300,6 +300,14 @@ unsigned long reverse(unsigned v) {
return v ^ (t >> 8);
}
+Neither is this (very standard idiom):
+
+int f(int n)
+{
+ return (((n) << 24) | (((n) & 0xff00) << 8)
+ | (((n) >> 8) & 0xff00) | ((n) >> 24));
+}
+
//===---------------------------------------------------------------------===//
[LOOP RECOGNITION]
@@ -898,17 +906,6 @@ The expression should optimize to something like
//===---------------------------------------------------------------------===//
-From GCC Bug 3756:
-int
-pn (int n)
-{
- return (n >= 0 ? 1 : -1);
-}
-Should combine to (n >> 31) | 1. Currently not optimized with "clang
--emit-llvm-bc | opt -std-compile-opts | llc".
-
-//===---------------------------------------------------------------------===//
-
void a(int variable)
{
if (variable == 4 || variable == 6)
@@ -1439,33 +1436,6 @@ This pattern repeats several times, basically doing:
//===---------------------------------------------------------------------===//
-186.crafty contains this interesting pattern:
-
-%77 = call i8* @strstr(i8* getelementptr ([6 x i8]* @"\01LC5", i32 0, i32 0),
- i8* %30)
-%phitmp648 = icmp eq i8* %77, getelementptr ([6 x i8]* @"\01LC5", i32 0, i32 0)
-br i1 %phitmp648, label %bb70, label %bb76
-
-bb70: ; preds = %OptionMatch.exit91, %bb69
- %78 = call i32 @strlen(i8* %30) nounwind readonly align 1 ; <i32> [#uses=1]
-
-This is basically:
- cststr = "abcdef";
- if (strstr(cststr, P) == cststr) {
- x = strlen(P);
- ...
-
-The strstr call would be significantly cheaper written as:
-
-cststr = "abcdef";
-if (memcmp(P, str, strlen(P)))
- x = strlen(P);
-
-This is memcmp+strlen instead of strstr. This also makes the strlen fully
-redundant.
-
-//===---------------------------------------------------------------------===//
-
186.crafty also contains this code:
%1906 = call i32 @strlen(i8* getelementptr ([32 x i8]* @pgn_event, i32 0,i32 0))
@@ -1863,3 +1833,91 @@ LLVM prefers comparisons with zero over non-zero in general, but in this
case it choses instead to keep the max operation obvious.
//===---------------------------------------------------------------------===//
+
+Take the following testcase on x86-64 (similar testcases exist for all targets
+with addc/adde):
+
+define void @a(i64* nocapture %s, i64* nocapture %t, i64 %a, i64 %b,
+i64 %c) nounwind {
+entry:
+ %0 = zext i64 %a to i128 ; <i128> [#uses=1]
+ %1 = zext i64 %b to i128 ; <i128> [#uses=1]
+ %2 = add i128 %1, %0 ; <i128> [#uses=2]
+ %3 = zext i64 %c to i128 ; <i128> [#uses=1]
+ %4 = shl i128 %3, 64 ; <i128> [#uses=1]
+ %5 = add i128 %4, %2 ; <i128> [#uses=1]
+ %6 = lshr i128 %5, 64 ; <i128> [#uses=1]
+ %7 = trunc i128 %6 to i64 ; <i64> [#uses=1]
+ store i64 %7, i64* %s, align 8
+ %8 = trunc i128 %2 to i64 ; <i64> [#uses=1]
+ store i64 %8, i64* %t, align 8
+ ret void
+}
+
+Generated code:
+ addq %rcx, %rdx
+ movl $0, %eax
+ adcq $0, %rax
+ addq %r8, %rax
+ movq %rax, (%rdi)
+ movq %rdx, (%rsi)
+ ret
+
+Expected code:
+ addq %rcx, %rdx
+ adcq $0, %r8
+ movq %r8, (%rdi)
+ movq %rdx, (%rsi)
+ ret
+
+The generated SelectionDAG has an ADD of an ADDE, where both operands of the
+ADDE are zero. Replacing one of the operands of the ADDE with the other operand
+of the ADD, and replacing the ADD with the ADDE, should give the desired result.
+
+(That said, we are doing a lot better than gcc on this testcase. :) )
+
+//===---------------------------------------------------------------------===//
+
+Switch lowering generates less than ideal code for the following switch:
+define void @a(i32 %x) nounwind {
+entry:
+ switch i32 %x, label %if.end [
+ i32 0, label %if.then
+ i32 1, label %if.then
+ i32 2, label %if.then
+ i32 3, label %if.then
+ i32 5, label %if.then
+ ]
+if.then:
+ tail call void @foo() nounwind
+ ret void
+if.end:
+ ret void
+}
+declare void @foo()
+
+Generated code on x86-64 (other platforms give similar results):
+a:
+ cmpl $5, %edi
+ ja .LBB0_2
+ movl %edi, %eax
+ movl $47, %ecx
+ btq %rax, %rcx
+ jb .LBB0_3
+.LBB0_2:
+ ret
+.LBB0_3:
+ jmp foo # TAILCALL
+
+The movl+movl+btq+jb could be simplified to a cmpl+jne.
+
+Or, if we wanted to be really clever, we could simplify the whole thing to
+something like the following, which eliminates a branch:
+ xorl $1, %edi
+ cmpl $4, %edi
+ ja .LBB0_2
+ ret
+.LBB0_2:
+ jmp foo # TAILCALL
+
+//===---------------------------------------------------------------------===//
diff --git a/lib/Target/Sparc/SparcISelLowering.cpp b/lib/Target/Sparc/SparcISelLowering.cpp
index f47e53acbfc1..4099a628773f 100644
--- a/lib/Target/Sparc/SparcISelLowering.cpp
+++ b/lib/Target/Sparc/SparcISelLowering.cpp
@@ -38,6 +38,7 @@ SDValue
SparcTargetLowering::LowerReturn(SDValue Chain,
CallingConv::ID CallConv, bool isVarArg,
const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
DebugLoc dl, SelectionDAG &DAG) const {
// CCValAssign - represent the assignment of the return value to locations.
@@ -66,7 +67,7 @@ SparcTargetLowering::LowerReturn(SDValue Chain,
assert(VA.isRegLoc() && "Can only return in registers!");
Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
- Outs[i].Val, Flag);
+ OutVals[i], Flag);
// Guarantee that all emitted copies are stuck together with flags.
Flag = Chain.getValue(1);
@@ -133,7 +134,7 @@ SparcTargetLowering::LowerFormalArguments(SDValue Chain,
InVals.push_back(Arg);
} else {
int FrameIdx = MF.getFrameInfo()->CreateFixedObject(4, ArgOffset,
- true, false);
+ true);
SDValue FIPtr = DAG.getFrameIndex(FrameIdx, MVT::i32);
SDValue Load;
if (ObjectVT == MVT::i32) {
@@ -146,7 +147,7 @@ SparcTargetLowering::LowerFormalArguments(SDValue Chain,
unsigned Offset = 4-std::max(1U, ObjectVT.getSizeInBits()/8);
FIPtr = DAG.getNode(ISD::ADD, dl, MVT::i32, FIPtr,
DAG.getConstant(Offset, MVT::i32));
- Load = DAG.getExtLoad(LoadOp, dl, MVT::i32, Chain, FIPtr,
+ Load = DAG.getExtLoad(LoadOp, MVT::i32, dl, Chain, FIPtr,
NULL, 0, ObjectVT, false, false, 0);
Load = DAG.getNode(ISD::TRUNCATE, dl, ObjectVT, Load);
}
@@ -169,7 +170,7 @@ SparcTargetLowering::LowerFormalArguments(SDValue Chain,
InVals.push_back(Arg);
} else {
int FrameIdx = MF.getFrameInfo()->CreateFixedObject(4, ArgOffset,
- true, false);
+ true);
SDValue FIPtr = DAG.getFrameIndex(FrameIdx, MVT::i32);
SDValue Load = DAG.getLoad(MVT::f32, dl, Chain, FIPtr, NULL, 0,
false, false, 0);
@@ -192,7 +193,7 @@ SparcTargetLowering::LowerFormalArguments(SDValue Chain,
HiVal = DAG.getCopyFromReg(Chain, dl, VRegHi, MVT::i32);
} else {
int FrameIdx = MF.getFrameInfo()->CreateFixedObject(4, ArgOffset,
- true, false);
+ true);
SDValue FIPtr = DAG.getFrameIndex(FrameIdx, MVT::i32);
HiVal = DAG.getLoad(MVT::i32, dl, Chain, FIPtr, NULL, 0,
false, false, 0);
@@ -205,7 +206,7 @@ SparcTargetLowering::LowerFormalArguments(SDValue Chain,
LoVal = DAG.getCopyFromReg(Chain, dl, VRegLo, MVT::i32);
} else {
int FrameIdx = MF.getFrameInfo()->CreateFixedObject(4, ArgOffset+4,
- true, false);
+ true);
SDValue FIPtr = DAG.getFrameIndex(FrameIdx, MVT::i32);
LoVal = DAG.getLoad(MVT::i32, dl, Chain, FIPtr, NULL, 0,
false, false, 0);
@@ -239,7 +240,7 @@ SparcTargetLowering::LowerFormalArguments(SDValue Chain,
SDValue Arg = DAG.getCopyFromReg(DAG.getRoot(), dl, VReg, MVT::i32);
int FrameIdx = MF.getFrameInfo()->CreateFixedObject(4, ArgOffset,
- true, false);
+ true);
SDValue FIPtr = DAG.getFrameIndex(FrameIdx, MVT::i32);
OutChains.push_back(DAG.getStore(DAG.getRoot(), dl, Arg, FIPtr, NULL, 0,
@@ -262,6 +263,7 @@ SparcTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
CallingConv::ID CallConv, bool isVarArg,
bool &isTailCall,
const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
const SmallVectorImpl<ISD::InputArg> &Ins,
DebugLoc dl, SelectionDAG &DAG,
SmallVectorImpl<SDValue> &InVals) const {
@@ -283,7 +285,7 @@ SparcTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
// Count the size of the outgoing arguments.
unsigned ArgsSize = 0;
for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
- switch (Outs[i].Val.getValueType().getSimpleVT().SimpleTy) {
+ switch (Outs[i].VT.getSimpleVT().SimpleTy) {
default: llvm_unreachable("Unknown value type!");
case MVT::i1:
case MVT::i8:
@@ -316,7 +318,7 @@ SparcTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
// Walk the register/memloc assignments, inserting copies/loads.
for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
CCValAssign &VA = ArgLocs[i];
- SDValue Arg = Outs[i].Val;
+ SDValue Arg = OutVals[i];
// Promote the value if needed.
switch (VA.getLocInfo()) {
@@ -358,8 +360,8 @@ SparcTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
unsigned ArgOffset = 68;
for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
- SDValue Val = Outs[i].Val;
- EVT ObjectVT = Val.getValueType();
+ SDValue Val = OutVals[i];
+ EVT ObjectVT = Outs[i].VT;
SDValue ValToStore(0, 0);
unsigned ObjSize;
switch (ObjectVT.getSimpleVT().SimpleTy) {
@@ -478,7 +480,7 @@ SparcTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
// turn it into a TargetGlobalAddress node so that legalize doesn't hack it.
// Likewise ExternalSymbol -> TargetExternalSymbol.
if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee))
- Callee = DAG.getTargetGlobalAddress(G->getGlobal(), MVT::i32);
+ Callee = DAG.getTargetGlobalAddress(G->getGlobal(), dl, MVT::i32);
else if (ExternalSymbolSDNode *E = dyn_cast<ExternalSymbolSDNode>(Callee))
Callee = DAG.getTargetExternalSymbol(E->getSymbol(), MVT::i32);
@@ -737,7 +739,7 @@ void SparcTargetLowering::computeMaskedBitsForTargetNode(const SDValue Op,
static void LookThroughSetCC(SDValue &LHS, SDValue &RHS,
ISD::CondCode CC, unsigned &SPCC) {
if (isa<ConstantSDNode>(RHS) &&
- cast<ConstantSDNode>(RHS)->getZExtValue() == 0 &&
+ cast<ConstantSDNode>(RHS)->isNullValue() &&
CC == ISD::SETNE &&
((LHS.getOpcode() == SPISD::SELECT_ICC &&
LHS.getOperand(3).getOpcode() == SPISD::CMPICC) ||
@@ -745,8 +747,8 @@ static void LookThroughSetCC(SDValue &LHS, SDValue &RHS,
LHS.getOperand(3).getOpcode() == SPISD::CMPFCC)) &&
isa<ConstantSDNode>(LHS.getOperand(0)) &&
isa<ConstantSDNode>(LHS.getOperand(1)) &&
- cast<ConstantSDNode>(LHS.getOperand(0))->getZExtValue() == 1 &&
- cast<ConstantSDNode>(LHS.getOperand(1))->getZExtValue() == 0) {
+ cast<ConstantSDNode>(LHS.getOperand(0))->isOne() &&
+ cast<ConstantSDNode>(LHS.getOperand(1))->isNullValue()) {
SDValue CMPCC = LHS.getOperand(3);
SPCC = cast<ConstantSDNode>(LHS.getOperand(2))->getZExtValue();
LHS = CMPCC.getOperand(0);
@@ -759,7 +761,7 @@ SDValue SparcTargetLowering::LowerGlobalAddress(SDValue Op,
const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
// FIXME there isn't really any debug info here
DebugLoc dl = Op.getDebugLoc();
- SDValue GA = DAG.getTargetGlobalAddress(GV, MVT::i32);
+ SDValue GA = DAG.getTargetGlobalAddress(GV, dl, MVT::i32);
SDValue Hi = DAG.getNode(SPISD::Hi, dl, MVT::i32, GA);
SDValue Lo = DAG.getNode(SPISD::Lo, dl, MVT::i32, GA);
@@ -1007,21 +1009,20 @@ SparcTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
MachineFunction *F = BB->getParent();
MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB);
MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
+
+ // Transfer the remainder of BB and its successor edges to sinkMBB.
+ sinkMBB->splice(sinkMBB->begin(), BB,
+ llvm::next(MachineBasicBlock::iterator(MI)),
+ BB->end());
+ sinkMBB->transferSuccessorsAndUpdatePHIs(BB);
+
+ // Add the true and fallthrough blocks as its successors.
+ BB->addSuccessor(copy0MBB);
+ BB->addSuccessor(sinkMBB);
+
BuildMI(BB, dl, TII.get(BROpcode)).addMBB(sinkMBB).addImm(CC);
F->insert(It, copy0MBB);
F->insert(It, sinkMBB);
- // Update machine-CFG edges by first adding all successors of the current
- // block to the new block which will contain the Phi node for the select.
- for (MachineBasicBlock::succ_iterator I = BB->succ_begin(),
- E = BB->succ_end(); I != E; ++I)
- sinkMBB->addSuccessor(*I);
- // Next, remove all successors of the current block, and add the true
- // and fallthrough blocks as its successors.
- while (!BB->succ_empty())
- BB->removeSuccessor(BB->succ_begin());
- // Next, add the true and fallthrough blocks as its successors.
- BB->addSuccessor(copy0MBB);
- BB->addSuccessor(sinkMBB);
// copy0MBB:
// %FalseValue = ...
@@ -1035,11 +1036,11 @@ SparcTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
// %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ]
// ...
BB = sinkMBB;
- BuildMI(BB, dl, TII.get(SP::PHI), MI->getOperand(0).getReg())
+ BuildMI(*BB, BB->begin(), dl, TII.get(SP::PHI), MI->getOperand(0).getReg())
.addReg(MI->getOperand(2).getReg()).addMBB(copy0MBB)
.addReg(MI->getOperand(1).getReg()).addMBB(thisMBB);
- F->DeleteMachineInstr(MI); // The pseudo instruction is gone now.
+ MI->eraseFromParent(); // The pseudo instruction is gone now.
return BB;
}
diff --git a/lib/Target/Sparc/SparcISelLowering.h b/lib/Target/Sparc/SparcISelLowering.h
index 5ebdcacba57d..db39e083a836 100644
--- a/lib/Target/Sparc/SparcISelLowering.h
+++ b/lib/Target/Sparc/SparcISelLowering.h
@@ -86,6 +86,7 @@ namespace llvm {
CallingConv::ID CallConv, bool isVarArg,
bool &isTailCall,
const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
const SmallVectorImpl<ISD::InputArg> &Ins,
DebugLoc dl, SelectionDAG &DAG,
SmallVectorImpl<SDValue> &InVals) const;
@@ -94,6 +95,7 @@ namespace llvm {
LowerReturn(SDValue Chain,
CallingConv::ID CallConv, bool isVarArg,
const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
DebugLoc dl, SelectionDAG &DAG) const;
SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
diff --git a/lib/Target/Sparc/SparcInstrInfo.cpp b/lib/Target/Sparc/SparcInstrInfo.cpp
index 8e49ecac4dfb..3a4c80ad076a 100644
--- a/lib/Target/Sparc/SparcInstrInfo.cpp
+++ b/lib/Target/Sparc/SparcInstrInfo.cpp
@@ -109,38 +109,29 @@ unsigned SparcInstrInfo::isStoreToStackSlot(const MachineInstr *MI,
unsigned
SparcInstrInfo::InsertBranch(MachineBasicBlock &MBB,MachineBasicBlock *TBB,
MachineBasicBlock *FBB,
- const SmallVectorImpl<MachineOperand> &Cond)const{
- // FIXME this should probably take a DebugLoc argument
- DebugLoc dl;
+ const SmallVectorImpl<MachineOperand> &Cond,
+ DebugLoc DL)const{
// Can only insert uncond branches so far.
assert(Cond.empty() && !FBB && TBB && "Can only handle uncond branches!");
- BuildMI(&MBB, dl, get(SP::BA)).addMBB(TBB);
+ BuildMI(&MBB, DL, get(SP::BA)).addMBB(TBB);
return 1;
}
-bool SparcInstrInfo::copyRegToReg(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator I,
- unsigned DestReg, unsigned SrcReg,
- const TargetRegisterClass *DestRC,
- const TargetRegisterClass *SrcRC,
- DebugLoc DL) const {
- if (DestRC != SrcRC) {
- // Not yet supported!
- return false;
- }
-
- if (DestRC == SP::IntRegsRegisterClass)
- BuildMI(MBB, I, DL, get(SP::ORrr), DestReg).addReg(SP::G0).addReg(SrcReg);
- else if (DestRC == SP::FPRegsRegisterClass)
- BuildMI(MBB, I, DL, get(SP::FMOVS), DestReg).addReg(SrcReg);
- else if (DestRC == SP::DFPRegsRegisterClass)
- BuildMI(MBB, I, DL, get(Subtarget.isV9() ? SP::FMOVD : SP::FpMOVD),DestReg)
- .addReg(SrcReg);
+void SparcInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I, DebugLoc DL,
+ unsigned DestReg, unsigned SrcReg,
+ bool KillSrc) const {
+ if (SP::IntRegsRegClass.contains(DestReg, SrcReg))
+ BuildMI(MBB, I, DL, get(SP::ORrr), DestReg).addReg(SP::G0)
+ .addReg(SrcReg, getKillRegState(KillSrc));
+ else if (SP::FPRegsRegClass.contains(DestReg, SrcReg))
+ BuildMI(MBB, I, DL, get(SP::FMOVS), DestReg)
+ .addReg(SrcReg, getKillRegState(KillSrc));
+ else if (SP::DFPRegsRegClass.contains(DestReg, SrcReg))
+ BuildMI(MBB, I, DL, get(Subtarget.isV9() ? SP::FMOVD : SP::FpMOVD), DestReg)
+ .addReg(SrcReg, getKillRegState(KillSrc));
else
- // Can't copy this register
- return false;
-
- return true;
+ llvm_unreachable("Impossible reg-to-reg copy");
}
void SparcInstrInfo::
@@ -183,61 +174,6 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
llvm_unreachable("Can't load this register from stack slot");
}
-MachineInstr *SparcInstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
- MachineInstr* MI,
- const SmallVectorImpl<unsigned> &Ops,
- int FI) const {
- if (Ops.size() != 1) return NULL;
-
- unsigned OpNum = Ops[0];
- bool isFloat = false;
- MachineInstr *NewMI = NULL;
- switch (MI->getOpcode()) {
- case SP::ORrr:
- if (MI->getOperand(1).isReg() && MI->getOperand(1).getReg() == SP::G0&&
- MI->getOperand(0).isReg() && MI->getOperand(2).isReg()) {
- if (OpNum == 0) // COPY -> STORE
- NewMI = BuildMI(MF, MI->getDebugLoc(), get(SP::STri))
- .addFrameIndex(FI)
- .addImm(0)
- .addReg(MI->getOperand(2).getReg());
- else // COPY -> LOAD
- NewMI = BuildMI(MF, MI->getDebugLoc(), get(SP::LDri),
- MI->getOperand(0).getReg())
- .addFrameIndex(FI)
- .addImm(0);
- }
- break;
- case SP::FMOVS:
- isFloat = true;
- // FALLTHROUGH
- case SP::FMOVD:
- if (OpNum == 0) { // COPY -> STORE
- unsigned SrcReg = MI->getOperand(1).getReg();
- bool isKill = MI->getOperand(1).isKill();
- bool isUndef = MI->getOperand(1).isUndef();
- NewMI = BuildMI(MF, MI->getDebugLoc(),
- get(isFloat ? SP::STFri : SP::STDFri))
- .addFrameIndex(FI)
- .addImm(0)
- .addReg(SrcReg, getKillRegState(isKill) | getUndefRegState(isUndef));
- } else { // COPY -> LOAD
- unsigned DstReg = MI->getOperand(0).getReg();
- bool isDead = MI->getOperand(0).isDead();
- bool isUndef = MI->getOperand(0).isUndef();
- NewMI = BuildMI(MF, MI->getDebugLoc(),
- get(isFloat ? SP::LDFri : SP::LDDFri))
- .addReg(DstReg, RegState::Define |
- getDeadRegState(isDead) | getUndefRegState(isUndef))
- .addFrameIndex(FI)
- .addImm(0);
- }
- break;
- }
-
- return NewMI;
-}
-
unsigned SparcInstrInfo::getGlobalBaseReg(MachineFunction *MF) const
{
SparcMachineFunctionInfo *SparcFI = MF->getInfo<SparcMachineFunctionInfo>();
diff --git a/lib/Target/Sparc/SparcInstrInfo.h b/lib/Target/Sparc/SparcInstrInfo.h
index a00ba3939fde..133471857bad 100644
--- a/lib/Target/Sparc/SparcInstrInfo.h
+++ b/lib/Target/Sparc/SparcInstrInfo.h
@@ -68,14 +68,13 @@ public:
virtual unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
MachineBasicBlock *FBB,
- const SmallVectorImpl<MachineOperand> &Cond) const;
+ const SmallVectorImpl<MachineOperand> &Cond,
+ DebugLoc DL) const;
- virtual bool copyRegToReg(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator I,
- unsigned DestReg, unsigned SrcReg,
- const TargetRegisterClass *DestRC,
- const TargetRegisterClass *SrcRC,
- DebugLoc DL) const;
+ virtual void copyPhysReg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I, DebugLoc DL,
+ unsigned DestReg, unsigned SrcReg,
+ bool KillSrc) const;
virtual void storeRegToStackSlot(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI,
@@ -89,18 +88,6 @@ public:
const TargetRegisterClass *RC,
const TargetRegisterInfo *TRI) const;
- virtual MachineInstr* foldMemoryOperandImpl(MachineFunction &MF,
- MachineInstr* MI,
- const SmallVectorImpl<unsigned> &Ops,
- int FrameIndex) const;
-
- virtual MachineInstr* foldMemoryOperandImpl(MachineFunction &MF,
- MachineInstr* MI,
- const SmallVectorImpl<unsigned> &Ops,
- MachineInstr* LoadMI) const {
- return 0;
- }
-
unsigned getGlobalBaseReg(MachineFunction *MF) const;
};
diff --git a/lib/Target/Sparc/SparcInstrInfo.td b/lib/Target/Sparc/SparcInstrInfo.td
index 9489580e900c..ddadd51a93a4 100644
--- a/lib/Target/Sparc/SparcInstrInfo.td
+++ b/lib/Target/Sparc/SparcInstrInfo.td
@@ -665,7 +665,7 @@ let Defs = [FCC] in {
//===----------------------------------------------------------------------===//
// V9 Conditional Moves.
-let Predicates = [HasV9], isTwoAddress = 1 in {
+let Predicates = [HasV9], Constraints = "$T = $dst" in {
// Move Integer Register on Condition (MOVcc) p. 194 of the V9 manual.
// FIXME: Add instruction encodings for the JIT some day.
def MOVICCrr
diff --git a/lib/Target/Sparc/SparcRegisterInfo.cpp b/lib/Target/Sparc/SparcRegisterInfo.cpp
index 08373bb83869..427cc7fd4577 100644
--- a/lib/Target/Sparc/SparcRegisterInfo.cpp
+++ b/lib/Target/Sparc/SparcRegisterInfo.cpp
@@ -52,13 +52,6 @@ BitVector SparcRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
return Reserved;
}
-
-const TargetRegisterClass* const*
-SparcRegisterInfo::getCalleeSavedRegClasses(const MachineFunction *MF) const {
- static const TargetRegisterClass * const CalleeSavedRegClasses[] = { 0 };
- return CalleeSavedRegClasses;
-}
-
bool SparcRegisterInfo::hasFP(const MachineFunction &MF) const {
return false;
}
diff --git a/lib/Target/Sparc/SparcRegisterInfo.h b/lib/Target/Sparc/SparcRegisterInfo.h
index 24d43e3049cc..9f0cda707b3e 100644
--- a/lib/Target/Sparc/SparcRegisterInfo.h
+++ b/lib/Target/Sparc/SparcRegisterInfo.h
@@ -32,9 +32,6 @@ struct SparcRegisterInfo : public SparcGenRegisterInfo {
/// Code Generation virtual methods...
const unsigned *getCalleeSavedRegs(const MachineFunction *MF = 0) const;
- const TargetRegisterClass* const* getCalleeSavedRegClasses(
- const MachineFunction *MF = 0) const;
-
BitVector getReservedRegs(const MachineFunction &MF) const;
bool hasFP(const MachineFunction &MF) const;
diff --git a/lib/Target/SystemZ/AsmPrinter/SystemZAsmPrinter.cpp b/lib/Target/SystemZ/AsmPrinter/SystemZAsmPrinter.cpp
index 90be2226b78d..d7ac8f50b69e 100644
--- a/lib/Target/SystemZ/AsmPrinter/SystemZAsmPrinter.cpp
+++ b/lib/Target/SystemZ/AsmPrinter/SystemZAsmPrinter.cpp
@@ -124,7 +124,7 @@ void SystemZAsmPrinter::printOperand(const MachineInstr *MI, int OpNum,
unsigned Reg = MO.getReg();
if (Modifier && strncmp(Modifier, "subreg", 6) == 0) {
if (strncmp(Modifier + 7, "even", 4) == 0)
- Reg = TM.getRegisterInfo()->getSubReg(Reg, SystemZ::subreg_even32);
+ Reg = TM.getRegisterInfo()->getSubReg(Reg, SystemZ::subreg_32bit);
else if (strncmp(Modifier + 7, "odd", 3) == 0)
Reg = TM.getRegisterInfo()->getSubReg(Reg, SystemZ::subreg_odd32);
else
diff --git a/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp b/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp
index bb2952a986dd..ed290ca7ed95 100644
--- a/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp
+++ b/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp
@@ -670,7 +670,7 @@ SDNode *SystemZDAGToDAGISel::Select(SDNode *Node) {
// Copy the remainder (even subreg) result, if it is needed.
if (!SDValue(Node, 1).use_empty()) {
unsigned SubRegIdx = (is32Bit ?
- SystemZ::subreg_even32 : SystemZ::subreg_even);
+ SystemZ::subreg_32bit : SystemZ::subreg_even);
SDNode *Rem = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
dl, NVT,
SDValue(Result, 0),
@@ -754,7 +754,7 @@ SDNode *SystemZDAGToDAGISel::Select(SDNode *Node) {
// Copy the remainder (even subreg) result, if it is needed.
if (!SDValue(Node, 1).use_empty()) {
unsigned SubRegIdx = (is32Bit ?
- SystemZ::subreg_even32 : SystemZ::subreg_even);
+ SystemZ::subreg_32bit : SystemZ::subreg_even);
SDNode *Rem = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
dl, NVT,
SDValue(Result, 0),
diff --git a/lib/Target/SystemZ/SystemZISelLowering.cpp b/lib/Target/SystemZ/SystemZISelLowering.cpp
index 76f29010d08d..67f739f690dd 100644
--- a/lib/Target/SystemZ/SystemZISelLowering.cpp
+++ b/lib/Target/SystemZ/SystemZISelLowering.cpp
@@ -254,6 +254,7 @@ SystemZTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
CallingConv::ID CallConv, bool isVarArg,
bool &isTailCall,
const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
const SmallVectorImpl<ISD::InputArg> &Ins,
DebugLoc dl, SelectionDAG &DAG,
SmallVectorImpl<SDValue> &InVals) const {
@@ -266,7 +267,7 @@ SystemZTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
case CallingConv::Fast:
case CallingConv::C:
return LowerCCCCallTo(Chain, Callee, CallConv, isVarArg, isTailCall,
- Outs, Ins, dl, DAG, InVals);
+ Outs, OutVals, Ins, dl, DAG, InVals);
}
}
@@ -334,7 +335,7 @@ SystemZTargetLowering::LowerCCCArguments(SDValue Chain,
// Create the nodes corresponding to a load from this parameter slot.
// Create the frame index object for this incoming parameter...
int FI = MFI->CreateFixedObject(LocVT.getSizeInBits()/8,
- VA.getLocMemOffset(), true, false);
+ VA.getLocMemOffset(), true);
// Create the SelectionDAG nodes corresponding to a load
// from this parameter
@@ -372,6 +373,7 @@ SystemZTargetLowering::LowerCCCCallTo(SDValue Chain, SDValue Callee,
bool isTailCall,
const SmallVectorImpl<ISD::OutputArg>
&Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
const SmallVectorImpl<ISD::InputArg> &Ins,
DebugLoc dl, SelectionDAG &DAG,
SmallVectorImpl<SDValue> &InVals) const {
@@ -402,7 +404,7 @@ SystemZTargetLowering::LowerCCCCallTo(SDValue Chain, SDValue Callee,
for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
CCValAssign &VA = ArgLocs[i];
- SDValue Arg = Outs[i].Val;
+ SDValue Arg = OutVals[i];
// Promote the value if needed.
switch (VA.getLocInfo()) {
@@ -464,7 +466,7 @@ SystemZTargetLowering::LowerCCCCallTo(SDValue Chain, SDValue Callee,
// turn it into a TargetGlobalAddress node so that legalize doesn't hack it.
// Likewise ExternalSymbol -> TargetExternalSymbol.
if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee))
- Callee = DAG.getTargetGlobalAddress(G->getGlobal(), getPointerTy());
+ Callee = DAG.getTargetGlobalAddress(G->getGlobal(), dl, getPointerTy());
else if (ExternalSymbolSDNode *E = dyn_cast<ExternalSymbolSDNode>(Callee))
Callee = DAG.getTargetExternalSymbol(E->getSymbol(), getPointerTy());
@@ -550,6 +552,7 @@ SDValue
SystemZTargetLowering::LowerReturn(SDValue Chain,
CallingConv::ID CallConv, bool isVarArg,
const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
DebugLoc dl, SelectionDAG &DAG) const {
// CCValAssign - represent the assignment of the return value to a location
@@ -575,7 +578,7 @@ SystemZTargetLowering::LowerReturn(SDValue Chain,
// Copy the result values into the output registers.
for (unsigned i = 0; i != RVLocs.size(); ++i) {
CCValAssign &VA = RVLocs[i];
- SDValue ResValue = Outs[i].Val;
+ SDValue ResValue = OutVals[i];
assert(VA.isRegLoc() && "Can only return in registers!");
// If this is an 8/16/32-bit value, it is really should be passed promoted
@@ -729,14 +732,14 @@ SDValue SystemZTargetLowering::LowerGlobalAddress(SDValue Op,
SDValue Result;
if (!IsPic && !ExtraLoadRequired) {
- Result = DAG.getTargetGlobalAddress(GV, getPointerTy(), Offset);
+ Result = DAG.getTargetGlobalAddress(GV, dl, getPointerTy(), Offset);
Offset = 0;
} else {
unsigned char OpFlags = 0;
if (ExtraLoadRequired)
OpFlags = SystemZII::MO_GOTENT;
- Result = DAG.getTargetGlobalAddress(GV, getPointerTy(), 0, OpFlags);
+ Result = DAG.getTargetGlobalAddress(GV, dl, getPointerTy(), 0, OpFlags);
}
Result = DAG.getNode(SystemZISD::PCRelativeWrapper, dl,
@@ -827,16 +830,20 @@ SystemZTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB);
MachineBasicBlock *copy1MBB = F->CreateMachineBasicBlock(LLVM_BB);
SystemZCC::CondCodes CC = (SystemZCC::CondCodes)MI->getOperand(3).getImm();
- BuildMI(BB, dl, TII.getBrCond(CC)).addMBB(copy1MBB);
F->insert(I, copy0MBB);
F->insert(I, copy1MBB);
// Update machine-CFG edges by transferring all successors of the current
// block to the new block which will contain the Phi node for the select.
- copy1MBB->transferSuccessors(BB);
+ copy1MBB->splice(copy1MBB->begin(), BB,
+ llvm::next(MachineBasicBlock::iterator(MI)),
+ BB->end());
+ copy1MBB->transferSuccessorsAndUpdatePHIs(BB);
// Next, add the true and fallthrough blocks as its successors.
BB->addSuccessor(copy0MBB);
BB->addSuccessor(copy1MBB);
+ BuildMI(BB, dl, TII.getBrCond(CC)).addMBB(copy1MBB);
+
// copy0MBB:
// %FalseValue = ...
// # fallthrough to copy1MBB
@@ -849,11 +856,11 @@ SystemZTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
// %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ]
// ...
BB = copy1MBB;
- BuildMI(BB, dl, TII.get(SystemZ::PHI),
+ BuildMI(*BB, BB->begin(), dl, TII.get(SystemZ::PHI),
MI->getOperand(0).getReg())
.addReg(MI->getOperand(2).getReg()).addMBB(copy0MBB)
.addReg(MI->getOperand(1).getReg()).addMBB(thisMBB);
- F->DeleteMachineInstr(MI); // The pseudo instruction is gone now.
+ MI->eraseFromParent(); // The pseudo instruction is gone now.
return BB;
}
diff --git a/lib/Target/SystemZ/SystemZISelLowering.h b/lib/Target/SystemZ/SystemZISelLowering.h
index 94bd90617517..51d2df3a3008 100644
--- a/lib/Target/SystemZ/SystemZISelLowering.h
+++ b/lib/Target/SystemZ/SystemZISelLowering.h
@@ -98,6 +98,7 @@ namespace llvm {
CallingConv::ID CallConv, bool isVarArg,
bool isTailCall,
const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
const SmallVectorImpl<ISD::InputArg> &Ins,
DebugLoc dl, SelectionDAG &DAG,
SmallVectorImpl<SDValue> &InVals) const;
@@ -126,6 +127,7 @@ namespace llvm {
LowerCall(SDValue Chain, SDValue Callee,
CallingConv::ID CallConv, bool isVarArg, bool &isTailCall,
const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
const SmallVectorImpl<ISD::InputArg> &Ins,
DebugLoc dl, SelectionDAG &DAG,
SmallVectorImpl<SDValue> &InVals) const;
@@ -134,6 +136,7 @@ namespace llvm {
LowerReturn(SDValue Chain,
CallingConv::ID CallConv, bool isVarArg,
const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
DebugLoc dl, SelectionDAG &DAG) const;
const SystemZSubtarget &Subtarget;
diff --git a/lib/Target/SystemZ/SystemZInstrFP.td b/lib/Target/SystemZ/SystemZInstrFP.td
index 8c5e9058561a..a65828061d3b 100644
--- a/lib/Target/SystemZ/SystemZInstrFP.td
+++ b/lib/Target/SystemZ/SystemZInstrFP.td
@@ -126,7 +126,7 @@ def FNABS64rr : Pseudo<(outs FP64:$dst), (ins FP64:$src),
(implicit PSW)]>;
}
-let isTwoAddress = 1 in {
+let Constraints = "$src1 = $dst" in {
let Defs = [PSW] in {
let isCommutable = 1 in { // X = ADD Y, Z == X = ADD Z, Y
def FADD32rr : Pseudo<(outs FP32:$dst), (ins FP32:$src1, FP32:$src2),
@@ -237,7 +237,7 @@ def FDIV64rm : Pseudo<(outs FP64:$dst), (ins FP64:$src1, rriaddr12:$src2),
"ddb\t{$dst, $src2}",
[(set FP64:$dst, (fdiv FP64:$src1, (load rriaddr12:$src2)))]>;
-} // isTwoAddress = 1
+} // Constraints = "$src1 = $dst"
def FSQRT32rr : Pseudo<(outs FP32:$dst), (ins FP32:$src),
"sqebr\t{$dst, $src}",
diff --git a/lib/Target/SystemZ/SystemZInstrInfo.cpp b/lib/Target/SystemZ/SystemZInstrInfo.cpp
index 043686cfd49d..c03864fe41e4 100644
--- a/lib/Target/SystemZ/SystemZInstrInfo.cpp
+++ b/lib/Target/SystemZ/SystemZInstrInfo.cpp
@@ -117,59 +117,28 @@ void SystemZInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
addFrameReference(BuildMI(MBB, MI, DL, get(Opc), DestReg), FrameIdx);
}
-bool SystemZInstrInfo::copyRegToReg(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator I,
- unsigned DestReg, unsigned SrcReg,
- const TargetRegisterClass *DestRC,
- const TargetRegisterClass *SrcRC,
- DebugLoc DL) const {
-
- // Determine if DstRC and SrcRC have a common superclass.
- const TargetRegisterClass *CommonRC = DestRC;
- if (DestRC == SrcRC)
- /* Same regclass for source and dest */;
- else if (CommonRC->hasSuperClass(SrcRC))
- CommonRC = SrcRC;
- else if (!CommonRC->hasSubClass(SrcRC))
- CommonRC = 0;
-
- if (CommonRC) {
- if (CommonRC == &SystemZ::GR64RegClass ||
- CommonRC == &SystemZ::ADDR64RegClass) {
- BuildMI(MBB, I, DL, get(SystemZ::MOV64rr), DestReg).addReg(SrcReg);
- } else if (CommonRC == &SystemZ::GR32RegClass ||
- CommonRC == &SystemZ::ADDR32RegClass) {
- BuildMI(MBB, I, DL, get(SystemZ::MOV32rr), DestReg).addReg(SrcReg);
- } else if (CommonRC == &SystemZ::GR64PRegClass) {
- BuildMI(MBB, I, DL, get(SystemZ::MOV64rrP), DestReg).addReg(SrcReg);
- } else if (CommonRC == &SystemZ::GR128RegClass) {
- BuildMI(MBB, I, DL, get(SystemZ::MOV128rr), DestReg).addReg(SrcReg);
- } else if (CommonRC == &SystemZ::FP32RegClass) {
- BuildMI(MBB, I, DL, get(SystemZ::FMOV32rr), DestReg).addReg(SrcReg);
- } else if (CommonRC == &SystemZ::FP64RegClass) {
- BuildMI(MBB, I, DL, get(SystemZ::FMOV64rr), DestReg).addReg(SrcReg);
- } else {
- return false;
- }
-
- return true;
- }
-
- if ((SrcRC == &SystemZ::GR64RegClass &&
- DestRC == &SystemZ::ADDR64RegClass) ||
- (DestRC == &SystemZ::GR64RegClass &&
- SrcRC == &SystemZ::ADDR64RegClass)) {
- BuildMI(MBB, I, DL, get(SystemZ::MOV64rr), DestReg).addReg(SrcReg);
- return true;
- } else if ((SrcRC == &SystemZ::GR32RegClass &&
- DestRC == &SystemZ::ADDR32RegClass) ||
- (DestRC == &SystemZ::GR32RegClass &&
- SrcRC == &SystemZ::ADDR32RegClass)) {
- BuildMI(MBB, I, DL, get(SystemZ::MOV32rr), DestReg).addReg(SrcReg);
- return true;
- }
-
- return false;
+void SystemZInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I, DebugLoc DL,
+ unsigned DestReg, unsigned SrcReg,
+ bool KillSrc) const {
+ unsigned Opc;
+ if (SystemZ::GR64RegClass.contains(DestReg, SrcReg))
+ Opc = SystemZ::MOV64rr;
+ else if (SystemZ::GR32RegClass.contains(DestReg, SrcReg))
+ Opc = SystemZ::MOV32rr;
+ else if (SystemZ::GR64PRegClass.contains(DestReg, SrcReg))
+ Opc = SystemZ::MOV64rrP;
+ else if (SystemZ::GR128RegClass.contains(DestReg, SrcReg))
+ Opc = SystemZ::MOV128rr;
+ else if (SystemZ::FP32RegClass.contains(DestReg, SrcReg))
+ Opc = SystemZ::FMOV32rr;
+ else if (SystemZ::FP64RegClass.contains(DestReg, SrcReg))
+ Opc = SystemZ::FMOV64rr;
+ else
+ llvm_unreachable("Impossible reg-to-reg copy");
+
+ BuildMI(MBB, I, DL, get(Opc), DestReg)
+ .addReg(SrcReg, getKillRegState(KillSrc));
}
bool
@@ -286,8 +255,7 @@ SystemZInstrInfo::spillCalleeSavedRegisters(MachineBasicBlock &MBB,
unsigned LowReg = 0, HighReg = 0, StartOffset = -1U, EndOffset = 0;
for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
unsigned Reg = CSI[i].getReg();
- const TargetRegisterClass *RegClass = CSI[i].getRegClass();
- if (RegClass != &SystemZ::FP64RegClass) {
+ if (!SystemZ::FP64RegClass.contains(Reg)) {
unsigned Offset = RegSpillOffsets[Reg];
CalleeFrameSize += 8;
if (StartOffset > Offset) {
@@ -332,11 +300,10 @@ SystemZInstrInfo::spillCalleeSavedRegisters(MachineBasicBlock &MBB,
// Save FPRs
for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
unsigned Reg = CSI[i].getReg();
- const TargetRegisterClass *RegClass = CSI[i].getRegClass();
- if (RegClass == &SystemZ::FP64RegClass) {
+ if (SystemZ::FP64RegClass.contains(Reg)) {
MBB.addLiveIn(Reg);
- storeRegToStackSlot(MBB, MI, Reg, true, CSI[i].getFrameIdx(), RegClass,
- &RI);
+ storeRegToStackSlot(MBB, MI, Reg, true, CSI[i].getFrameIdx(),
+ &SystemZ::FP64RegClass, &RI);
}
}
@@ -361,9 +328,9 @@ SystemZInstrInfo::restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
// Restore FP registers
for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
unsigned Reg = CSI[i].getReg();
- const TargetRegisterClass *RegClass = CSI[i].getRegClass();
- if (RegClass == &SystemZ::FP64RegClass)
- loadRegFromStackSlot(MBB, MI, Reg, CSI[i].getFrameIdx(), RegClass, &RI);
+ if (SystemZ::FP64RegClass.contains(Reg))
+ loadRegFromStackSlot(MBB, MI, Reg, CSI[i].getFrameIdx(),
+ &SystemZ::FP64RegClass, &RI);
}
// Restore GP registers
@@ -523,9 +490,8 @@ unsigned SystemZInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
unsigned
SystemZInstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
MachineBasicBlock *FBB,
- const SmallVectorImpl<MachineOperand> &Cond) const {
- // FIXME: this should probably have a DebugLoc operand
- DebugLoc DL;
+ const SmallVectorImpl<MachineOperand> &Cond,
+ DebugLoc DL) const {
// Shouldn't be a fall through.
assert(TBB && "InsertBranch must not be told to insert a fallthrough");
assert((Cond.size() == 1 || Cond.size() == 0) &&
diff --git a/lib/Target/SystemZ/SystemZInstrInfo.h b/lib/Target/SystemZ/SystemZInstrInfo.h
index a753f14c0b82..0559619248a6 100644
--- a/lib/Target/SystemZ/SystemZInstrInfo.h
+++ b/lib/Target/SystemZ/SystemZInstrInfo.h
@@ -60,11 +60,10 @@ public:
///
virtual const SystemZRegisterInfo &getRegisterInfo() const { return RI; }
- bool copyRegToReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
- unsigned DestReg, unsigned SrcReg,
- const TargetRegisterClass *DestRC,
- const TargetRegisterClass *SrcRC,
- DebugLoc DL) const;
+ virtual void copyPhysReg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I, DebugLoc DL,
+ unsigned DestReg, unsigned SrcReg,
+ bool KillSrc) const;
bool isMoveInstr(const MachineInstr& MI,
unsigned &SrcReg, unsigned &DstReg,
@@ -102,7 +101,8 @@ public:
bool AllowModify) const;
virtual unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
MachineBasicBlock *FBB,
- const SmallVectorImpl<MachineOperand> &Cond) const;
+ const SmallVectorImpl<MachineOperand> &Cond,
+ DebugLoc DL) const;
virtual unsigned RemoveBranch(MachineBasicBlock &MBB) const;
SystemZCC::CondCodes getOppositeCondition(SystemZCC::CondCodes CC) const;
diff --git a/lib/Target/SystemZ/SystemZInstrInfo.td b/lib/Target/SystemZ/SystemZInstrInfo.td
index 22bde4ee7df2..8df07c034385 100644
--- a/lib/Target/SystemZ/SystemZInstrInfo.td
+++ b/lib/Target/SystemZ/SystemZInstrInfo.td
@@ -478,7 +478,8 @@ def MOV64rmm : RSYI<0x04EB,
"lmg\t{$from, $to, $dst}",
[]>;
-let isReMaterializable = 1, isAsCheapAsAMove = 1, isTwoAddress = 1 in {
+let isReMaterializable = 1, isAsCheapAsAMove = 1,
+ Constraints = "$src = $dst" in {
def MOV64Pr0_even : Pseudo<(outs GR64P:$dst), (ins GR64P:$src),
"lhi\t${dst:subreg_even}, 0",
[]>;
@@ -537,7 +538,7 @@ def NEG64rr32 : RREI<0xB913, (outs GR64:$dst), (ins GR32:$src),
(implicit PSW)]>;
}
-let isTwoAddress = 1 in {
+let Constraints = "$src1 = $dst" in {
let Defs = [PSW] in {
@@ -924,12 +925,12 @@ def UDIVREM64m : RXYI<0xE387, (outs GR128:$dst), (ins GR128:$src1, rriaddr:$src2
"dlg\t{$dst, $src2}",
[]>;
} // mayLoad
-} // isTwoAddress = 1
+} // Constraints = "$src1 = $dst"
//===----------------------------------------------------------------------===//
// Shifts
-let isTwoAddress = 1 in
+let Constraints = "$src = $dst" in
def SRL32rri : RSI<0x88,
(outs GR32:$dst), (ins GR32:$src, riaddr32:$amt),
"srl\t{$src, $amt}",
@@ -939,7 +940,7 @@ def SRL64rri : RSYI<0xEB0C,
"srlg\t{$dst, $src, $amt}",
[(set GR64:$dst, (srl GR64:$src, riaddr:$amt))]>;
-let isTwoAddress = 1 in
+let Constraints = "$src = $dst" in
def SHL32rri : RSI<0x89,
(outs GR32:$dst), (ins GR32:$src, riaddr32:$amt),
"sll\t{$src, $amt}",
@@ -950,7 +951,7 @@ def SHL64rri : RSYI<0xEB0D,
[(set GR64:$dst, (shl GR64:$src, riaddr:$amt))]>;
let Defs = [PSW] in {
-let isTwoAddress = 1 in
+let Constraints = "$src = $dst" in
def SRA32rri : RSI<0x8A,
(outs GR32:$dst), (ins GR32:$src, riaddr32:$amt),
"sra\t{$src, $amt}",
@@ -1129,13 +1130,13 @@ def : Pat<(mulhs GR32:$src1, GR32:$src2),
(EXTRACT_SUBREG (MUL64rrP (INSERT_SUBREG (v2i32 (IMPLICIT_DEF)),
GR32:$src1, subreg_odd32),
GR32:$src2),
- subreg_even32)>;
+ subreg_32bit)>;
def : Pat<(mulhu GR32:$src1, GR32:$src2),
(EXTRACT_SUBREG (UMUL64rrP (INSERT_SUBREG (v2i32 (IMPLICIT_DEF)),
GR32:$src1, subreg_odd32),
GR32:$src2),
- subreg_even32)>;
+ subreg_32bit)>;
def : Pat<(mulhu GR64:$src1, GR64:$src2),
(EXTRACT_SUBREG (UMUL128rrP (INSERT_SUBREG (v2i64 (IMPLICIT_DEF)),
GR64:$src1, subreg_odd),
diff --git a/lib/Target/SystemZ/SystemZRegisterInfo.cpp b/lib/Target/SystemZ/SystemZRegisterInfo.cpp
index 638fd17c9953..ae96b0b08ff6 100644
--- a/lib/Target/SystemZ/SystemZRegisterInfo.cpp
+++ b/lib/Target/SystemZ/SystemZRegisterInfo.cpp
@@ -47,22 +47,6 @@ SystemZRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
return CalleeSavedRegs;
}
-const TargetRegisterClass* const*
-SystemZRegisterInfo::getCalleeSavedRegClasses(const MachineFunction *MF) const {
- static const TargetRegisterClass * const CalleeSavedRegClasses[] = {
- &SystemZ::GR64RegClass, &SystemZ::GR64RegClass,
- &SystemZ::GR64RegClass, &SystemZ::GR64RegClass,
- &SystemZ::GR64RegClass, &SystemZ::GR64RegClass,
- &SystemZ::GR64RegClass, &SystemZ::GR64RegClass,
- &SystemZ::GR64RegClass, &SystemZ::GR64RegClass,
- &SystemZ::FP64RegClass, &SystemZ::FP64RegClass,
- &SystemZ::FP64RegClass, &SystemZ::FP64RegClass,
- &SystemZ::FP64RegClass, &SystemZ::FP64RegClass,
- &SystemZ::FP64RegClass, &SystemZ::FP64RegClass, 0
- };
- return CalleeSavedRegClasses;
-}
-
BitVector SystemZRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
BitVector Reserved(getNumRegs());
if (hasFP(MF))
diff --git a/lib/Target/SystemZ/SystemZRegisterInfo.h b/lib/Target/SystemZ/SystemZRegisterInfo.h
index 42aa5dddb4ab..670025f86e08 100644
--- a/lib/Target/SystemZ/SystemZRegisterInfo.h
+++ b/lib/Target/SystemZ/SystemZRegisterInfo.h
@@ -32,9 +32,6 @@ struct SystemZRegisterInfo : public SystemZGenRegisterInfo {
/// Code Generation virtual methods...
const unsigned *getCalleeSavedRegs(const MachineFunction *MF = 0) const;
- const TargetRegisterClass* const* getCalleeSavedRegClasses(
- const MachineFunction *MF = 0) const;
-
BitVector getReservedRegs(const MachineFunction &MF) const;
bool hasReservedCallFrame(MachineFunction &MF) const { return true; }
diff --git a/lib/Target/SystemZ/SystemZRegisterInfo.td b/lib/Target/SystemZ/SystemZRegisterInfo.td
index b561744d1561..33be8ddffbed 100644
--- a/lib/Target/SystemZ/SystemZRegisterInfo.td
+++ b/lib/Target/SystemZ/SystemZRegisterInfo.td
@@ -55,7 +55,6 @@ class FPRL<bits<4> num, string n, list<Register> subregs>
let Namespace = "SystemZ" in {
def subreg_32bit : SubRegIndex;
-def subreg_even32 : SubRegIndex;
def subreg_odd32 : SubRegIndex;
def subreg_even : SubRegIndex;
def subreg_odd : SubRegIndex;
@@ -99,7 +98,7 @@ def R15D : GPR64<15, "r15", [R15W]>, DwarfRegNum<[15]>;
}
// Register pairs
-let SubRegIndices = [subreg_even32, subreg_odd32] in {
+let SubRegIndices = [subreg_32bit, subreg_odd32] in {
def R0P : GPR64< 0, "r0", [R0W, R1W], [R0D, R1D]>, DwarfRegNum<[0]>;
def R2P : GPR64< 2, "r2", [R2W, R3W], [R2D, R3D]>, DwarfRegNum<[2]>;
def R4P : GPR64< 4, "r4", [R4W, R5W], [R4D, R5D]>, DwarfRegNum<[4]>;
@@ -111,8 +110,7 @@ def R14P : GPR64<14, "r14", [R14W, R15W], [R14D, R15D]>, DwarfRegNum<[14]>;
}
let SubRegIndices = [subreg_even, subreg_odd],
- CompositeIndices = [(subreg_even32 subreg_even, subreg_32bit),
- (subreg_odd32 subreg_odd, subreg_32bit)] in {
+ CompositeIndices = [(subreg_odd32 subreg_odd, subreg_32bit)] in {
def R0Q : GPR128< 0, "r0", [R0D, R1D], [R0P]>, DwarfRegNum<[0]>;
def R2Q : GPR128< 2, "r2", [R2D, R3D], [R2P]>, DwarfRegNum<[2]>;
def R4Q : GPR128< 4, "r4", [R4D, R5D], [R4P]>, DwarfRegNum<[4]>;
@@ -355,7 +353,7 @@ def ADDR64 : RegisterClass<"SystemZ", [i64], 64,
def GR64P : RegisterClass<"SystemZ", [v2i32], 64,
[R0P, R2P, R4P, R6P, R8P, R10P, R12P, R14P]>
{
- let SubRegClasses = [(GR32 subreg_even32, subreg_odd32)];
+ let SubRegClasses = [(GR32 subreg_32bit, subreg_odd32)];
let MethodProtos = [{
iterator allocation_order_begin(const MachineFunction &MF) const;
iterator allocation_order_end(const MachineFunction &MF) const;
@@ -391,7 +389,7 @@ def GR64P : RegisterClass<"SystemZ", [v2i32], 64,
def GR128 : RegisterClass<"SystemZ", [v2i64], 128,
[R0Q, R2Q, R4Q, R6Q, R8Q, R10Q, R12Q, R14Q]>
{
- let SubRegClasses = [(GR32 subreg_even32, subreg_odd32),
+ let SubRegClasses = [(GR32 subreg_32bit, subreg_odd32),
(GR64 subreg_even, subreg_odd)];
let MethodProtos = [{
iterator allocation_order_begin(const MachineFunction &MF) const;
diff --git a/lib/Target/TargetInstrInfo.cpp b/lib/Target/TargetInstrInfo.cpp
index 094a57edb419..c099a7eaefe7 100644
--- a/lib/Target/TargetInstrInfo.cpp
+++ b/lib/Target/TargetInstrInfo.cpp
@@ -28,6 +28,10 @@ const TargetRegisterClass *
TargetOperandInfo::getRegClass(const TargetRegisterInfo *TRI) const {
if (isLookupPtrRegClass())
return TRI->getPointerRegClass(RegClass);
+ // Instructions like INSERT_SUBREG do not have fixed register classes.
+ if (RegClass < 0)
+ return 0;
+ // Otherwise just look it up normally.
return TRI->getRegClass(RegClass);
}
diff --git a/lib/Target/TargetLoweringObjectFile.cpp b/lib/Target/TargetLoweringObjectFile.cpp
index b9372d04bb17..dd7b532bbfba 100644
--- a/lib/Target/TargetLoweringObjectFile.cpp
+++ b/lib/Target/TargetLoweringObjectFile.cpp
@@ -101,7 +101,7 @@ static bool IsNullTerminatedString(const Constant *C) {
ConstantInt *Null =
dyn_cast<ConstantInt>(CVA->getOperand(ATy->getNumElements()-1));
- if (Null == 0 || Null->getZExtValue() != 0)
+ if (Null == 0 || !Null->isZero())
return false; // Not null terminated.
// Verify that the null doesn't occur anywhere else in the string.
diff --git a/lib/Target/TargetRegisterInfo.cpp b/lib/Target/TargetRegisterInfo.cpp
index dcc5f610e6d4..49bfad54136d 100644
--- a/lib/Target/TargetRegisterInfo.cpp
+++ b/lib/Target/TargetRegisterInfo.cpp
@@ -39,20 +39,20 @@ TargetRegisterInfo::TargetRegisterInfo(const TargetRegisterDesc *D, unsigned NR,
TargetRegisterInfo::~TargetRegisterInfo() {}
-/// getPhysicalRegisterRegClass - Returns the Register Class of a physical
-/// register of the given type. If type is EVT::Other, then just return any
-/// register class the register belongs to.
+/// getMinimalPhysRegClass - Returns the Register Class of a physical
+/// register of the given type, picking the most sub register class of
+/// the right type that contains this physreg.
const TargetRegisterClass *
-TargetRegisterInfo::getPhysicalRegisterRegClass(unsigned reg, EVT VT) const {
+TargetRegisterInfo::getMinimalPhysRegClass(unsigned reg, EVT VT) const {
assert(isPhysicalRegister(reg) && "reg must be a physical register");
- // Pick the most super register class of the right type that contains
+ // Pick the most sub register class of the right type that contains
// this physreg.
const TargetRegisterClass* BestRC = 0;
for (regclass_iterator I = regclass_begin(), E = regclass_end(); I != E; ++I){
const TargetRegisterClass* RC = *I;
if ((VT == MVT::Other || RC->hasType(VT)) && RC->contains(reg) &&
- (!BestRC || BestRC->hasSuperClass(RC)))
+ (!BestRC || BestRC->hasSubClass(RC)))
BestRC = RC;
}
diff --git a/lib/Target/X86/AsmParser/X86AsmLexer.cpp b/lib/Target/X86/AsmParser/X86AsmLexer.cpp
index a58f58e03325..26797ab353b6 100644
--- a/lib/Target/X86/AsmParser/X86AsmLexer.cpp
+++ b/lib/Target/X86/AsmParser/X86AsmLexer.cpp
@@ -33,13 +33,11 @@ class X86AsmLexer : public TargetAsmLexer {
}
const AsmToken &lexDefinite() {
- if(tentativeIsValid) {
+ if (tentativeIsValid) {
tentativeIsValid = false;
return tentativeToken;
}
- else {
- return getLexer()->Lex();
- }
+ return getLexer()->Lex();
}
AsmToken LexTokenATT();
@@ -72,38 +70,65 @@ public:
static unsigned MatchRegisterName(StringRef Name);
AsmToken X86AsmLexer::LexTokenATT() {
- const AsmToken lexedToken = lexDefinite();
+ AsmToken lexedToken = lexDefinite();
switch (lexedToken.getKind()) {
default:
- return AsmToken(lexedToken);
+ return lexedToken;
case AsmToken::Error:
SetError(Lexer->getErrLoc(), Lexer->getErr());
- return AsmToken(lexedToken);
- case AsmToken::Percent:
- {
+ return lexedToken;
+
+ case AsmToken::Percent: {
const AsmToken &nextToken = lexTentative();
- if (nextToken.getKind() == AsmToken::Identifier) {
- unsigned regID = MatchRegisterName(nextToken.getString());
+ if (nextToken.getKind() != AsmToken::Identifier)
+ return lexedToken;
+
- if (regID) {
- lexDefinite();
+ if (unsigned regID = MatchRegisterName(nextToken.getString())) {
+ lexDefinite();
+ // FIXME: This is completely wrong when there is a space or other
+ // punctuation between the % and the register name.
+ StringRef regStr(lexedToken.getString().data(),
+ lexedToken.getString().size() +
+ nextToken.getString().size());
+
+ return AsmToken(AsmToken::Register, regStr,
+ static_cast<int64_t>(regID));
+ }
+
+ // Match register name failed. If this is "db[0-7]", match it as an alias
+ // for dr[0-7].
+ if (nextToken.getString().size() == 3 &&
+ nextToken.getString().startswith("db")) {
+ int RegNo = -1;
+ switch (nextToken.getString()[2]) {
+ case '0': RegNo = X86::DR0; break;
+ case '1': RegNo = X86::DR1; break;
+ case '2': RegNo = X86::DR2; break;
+ case '3': RegNo = X86::DR3; break;
+ case '4': RegNo = X86::DR4; break;
+ case '5': RegNo = X86::DR5; break;
+ case '6': RegNo = X86::DR6; break;
+ case '7': RegNo = X86::DR7; break;
+ }
+
+ if (RegNo != -1) {
+ lexDefinite();
+
+ // FIXME: This is completely wrong when there is a space or other
+ // punctuation between the % and the register name.
StringRef regStr(lexedToken.getString().data(),
lexedToken.getString().size() +
nextToken.getString().size());
-
- return AsmToken(AsmToken::Register,
- regStr,
- static_cast<int64_t>(regID));
- }
- else {
- return AsmToken(lexedToken);
+ return AsmToken(AsmToken::Register, regStr,
+ static_cast<int64_t>(RegNo));
}
}
- else {
- return AsmToken(lexedToken);
- }
+
+
+ return lexedToken;
}
}
}
@@ -113,26 +138,22 @@ AsmToken X86AsmLexer::LexTokenIntel() {
switch(lexedToken.getKind()) {
default:
- return AsmToken(lexedToken);
+ return lexedToken;
case AsmToken::Error:
SetError(Lexer->getErrLoc(), Lexer->getErr());
- return AsmToken(lexedToken);
- case AsmToken::Identifier:
- {
+ return lexedToken;
+ case AsmToken::Identifier: {
std::string upperCase = lexedToken.getString().str();
std::string lowerCase = LowercaseString(upperCase);
StringRef lowerRef(lowerCase);
unsigned regID = MatchRegisterName(lowerRef);
- if (regID) {
+ if (regID)
return AsmToken(AsmToken::Register,
lexedToken.getString(),
static_cast<int64_t>(regID));
- }
- else {
- return AsmToken(lexedToken);
- }
+ return lexedToken;
}
}
}
diff --git a/lib/Target/X86/AsmParser/X86AsmParser.cpp b/lib/Target/X86/AsmParser/X86AsmParser.cpp
index 40a6a7b56169..a856e9cc7a4e 100644
--- a/lib/Target/X86/AsmParser/X86AsmParser.cpp
+++ b/lib/Target/X86/AsmParser/X86AsmParser.cpp
@@ -412,6 +412,28 @@ bool X86ATTAsmParser::ParseRegister(unsigned &RegNo,
return false;
}
+ // If this is "db[0-7]", match it as an alias
+ // for dr[0-7].
+ if (RegNo == 0 && Tok.getString().size() == 3 &&
+ Tok.getString().startswith("db")) {
+ switch (Tok.getString()[2]) {
+ case '0': RegNo = X86::DR0; break;
+ case '1': RegNo = X86::DR1; break;
+ case '2': RegNo = X86::DR2; break;
+ case '3': RegNo = X86::DR3; break;
+ case '4': RegNo = X86::DR4; break;
+ case '5': RegNo = X86::DR5; break;
+ case '6': RegNo = X86::DR6; break;
+ case '7': RegNo = X86::DR7; break;
+ }
+
+ if (RegNo != 0) {
+ EndLoc = Tok.getLoc();
+ Parser.Lex(); // Eat it.
+ return false;
+ }
+ }
+
if (RegNo == 0)
return Error(Tok.getLoc(), "invalid register name");
@@ -597,6 +619,16 @@ ParseInstruction(const StringRef &Name, SMLoc NameLoc,
return Error(NameLoc, "pushfq cannot be encoded in 32-bit mode");
}
+ // The "Jump if rCX Zero" form jcxz is not allowed in 64-bit mode and
+ // the form jrcxz is not allowed in 32-bit mode.
+ if (Is64Bit) {
+ if (Name == "jcxz")
+ return Error(NameLoc, "jcxz cannot be encoded in 64-bit mode");
+ } else {
+ if (Name == "jrcxz")
+ return Error(NameLoc, "jrcxz cannot be encoded in 32-bit mode");
+ }
+
// FIXME: Hack to recognize "sal..." and "rep..." for now. We need a way to
// represent alternative syntaxes in the .td file, without requiring
// instruction duplication.
@@ -617,6 +649,23 @@ ParseInstruction(const StringRef &Name, SMLoc NameLoc,
.Case("setnz", "setne")
.Case("jz", "je")
.Case("jnz", "jne")
+ .Case("jc", "jb")
+ // FIXME: in 32-bit mode jcxz requires an AdSize prefix. In 64-bit mode
+ // jecxz requires an AdSize prefix but jecxz does not have a prefix in
+ // 32-bit mode.
+ .Case("jecxz", "jcxz")
+ .Case("jrcxz", "jcxz")
+ .Case("jna", "jbe")
+ .Case("jnae", "jb")
+ .Case("jnb", "jae")
+ .Case("jnbe", "ja")
+ .Case("jnc", "jae")
+ .Case("jng", "jle")
+ .Case("jnge", "jl")
+ .Case("jnl", "jge")
+ .Case("jnle", "jg")
+ .Case("jpe", "jp")
+ .Case("jpo", "jnp")
.Case("cmovcl", "cmovbl")
.Case("cmovcl", "cmovbl")
.Case("cmovnal", "cmovbel")
@@ -631,36 +680,64 @@ ParseInstruction(const StringRef &Name, SMLoc NameLoc,
.Case("cmovnlel", "cmovgl")
.Case("cmovnzl", "cmovnel")
.Case("cmovzl", "cmovel")
+ .Case("fwait", "wait")
+ .Case("movzx", "movzb")
.Default(Name);
// FIXME: Hack to recognize cmp<comparison code>{ss,sd,ps,pd}.
const MCExpr *ExtraImmOp = 0;
- if (PatchedName.startswith("cmp") &&
+ if ((PatchedName.startswith("cmp") || PatchedName.startswith("vcmp")) &&
(PatchedName.endswith("ss") || PatchedName.endswith("sd") ||
PatchedName.endswith("ps") || PatchedName.endswith("pd"))) {
+ bool IsVCMP = PatchedName.startswith("vcmp");
+ unsigned SSECCIdx = IsVCMP ? 4 : 3;
unsigned SSEComparisonCode = StringSwitch<unsigned>(
- PatchedName.slice(3, PatchedName.size() - 2))
- .Case("eq", 0)
- .Case("lt", 1)
- .Case("le", 2)
- .Case("unord", 3)
- .Case("neq", 4)
- .Case("nlt", 5)
- .Case("nle", 6)
- .Case("ord", 7)
+ PatchedName.slice(SSECCIdx, PatchedName.size() - 2))
+ .Case("eq", 0)
+ .Case("lt", 1)
+ .Case("le", 2)
+ .Case("unord", 3)
+ .Case("neq", 4)
+ .Case("nlt", 5)
+ .Case("nle", 6)
+ .Case("ord", 7)
+ .Case("eq_uq", 8)
+ .Case("nge", 9)
+ .Case("ngt", 0x0A)
+ .Case("false", 0x0B)
+ .Case("neq_oq", 0x0C)
+ .Case("ge", 0x0D)
+ .Case("gt", 0x0E)
+ .Case("true", 0x0F)
+ .Case("eq_os", 0x10)
+ .Case("lt_oq", 0x11)
+ .Case("le_oq", 0x12)
+ .Case("unord_s", 0x13)
+ .Case("neq_us", 0x14)
+ .Case("nlt_uq", 0x15)
+ .Case("nle_uq", 0x16)
+ .Case("ord_s", 0x17)
+ .Case("eq_us", 0x18)
+ .Case("nge_uq", 0x19)
+ .Case("ngt_uq", 0x1A)
+ .Case("false_os", 0x1B)
+ .Case("neq_os", 0x1C)
+ .Case("ge_oq", 0x1D)
+ .Case("gt_oq", 0x1E)
+ .Case("true_us", 0x1F)
.Default(~0U);
if (SSEComparisonCode != ~0U) {
ExtraImmOp = MCConstantExpr::Create(SSEComparisonCode,
getParser().getContext());
if (PatchedName.endswith("ss")) {
- PatchedName = "cmpss";
+ PatchedName = IsVCMP ? "vcmpss" : "cmpss";
} else if (PatchedName.endswith("sd")) {
- PatchedName = "cmpsd";
+ PatchedName = IsVCMP ? "vcmpsd" : "cmpsd";
} else if (PatchedName.endswith("ps")) {
- PatchedName = "cmpps";
+ PatchedName = IsVCMP ? "vcmpps" : "cmpps";
} else {
assert(PatchedName.endswith("pd") && "Unexpected mnemonic!");
- PatchedName = "cmppd";
+ PatchedName = IsVCMP ? "vcmppd" : "cmppd";
}
}
}
diff --git a/lib/Target/X86/AsmPrinter/X86ATTInstPrinter.cpp b/lib/Target/X86/AsmPrinter/X86ATTInstPrinter.cpp
index 0b64cb4f2828..f2cdb5ba55eb 100644
--- a/lib/Target/X86/AsmPrinter/X86ATTInstPrinter.cpp
+++ b/lib/Target/X86/AsmPrinter/X86ATTInstPrinter.cpp
@@ -85,11 +85,18 @@ void X86ATTInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
}
}
-void X86ATTInstPrinter::printLeaMemReference(const MCInst *MI, unsigned Op,
- raw_ostream &O) {
+void X86ATTInstPrinter::printMemReference(const MCInst *MI, unsigned Op,
+ raw_ostream &O) {
const MCOperand &BaseReg = MI->getOperand(Op);
const MCOperand &IndexReg = MI->getOperand(Op+2);
const MCOperand &DispSpec = MI->getOperand(Op+3);
+ const MCOperand &SegReg = MI->getOperand(Op+4);
+
+ // If this has a segment register, print it.
+ if (SegReg.getReg()) {
+ printOperand(MI, Op+4, O);
+ O << ':';
+ }
if (DispSpec.isImm()) {
int64_t DispVal = DispSpec.getImm();
@@ -115,13 +122,3 @@ void X86ATTInstPrinter::printLeaMemReference(const MCInst *MI, unsigned Op,
O << ')';
}
}
-
-void X86ATTInstPrinter::printMemReference(const MCInst *MI, unsigned Op,
- raw_ostream &O) {
- // If this has a segment register, print it.
- if (MI->getOperand(Op+4).getReg()) {
- printOperand(MI, Op+4, O);
- O << ':';
- }
- printLeaMemReference(MI, Op, O);
-}
diff --git a/lib/Target/X86/AsmPrinter/X86ATTInstPrinter.h b/lib/Target/X86/AsmPrinter/X86ATTInstPrinter.h
index 8d5d50832d49..3be4bae5bec2 100644
--- a/lib/Target/X86/AsmPrinter/X86ATTInstPrinter.h
+++ b/lib/Target/X86/AsmPrinter/X86ATTInstPrinter.h
@@ -34,7 +34,6 @@ public:
void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &OS);
void printMemReference(const MCInst *MI, unsigned Op, raw_ostream &OS);
- void printLeaMemReference(const MCInst *MI, unsigned Op, raw_ostream &OS);
void printSSECC(const MCInst *MI, unsigned Op, raw_ostream &OS);
void print_pcrel_imm(const MCInst *MI, unsigned OpNo, raw_ostream &OS);
@@ -69,14 +68,8 @@ public:
void printf128mem(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
printMemReference(MI, OpNo, O);
}
- void printlea32mem(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
- printLeaMemReference(MI, OpNo, O);
- }
- void printlea64mem(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
- printLeaMemReference(MI, OpNo, O);
- }
- void printlea64_32mem(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
- printLeaMemReference(MI, OpNo, O);
+ void printf256mem(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
+ printMemReference(MI, OpNo, O);
}
};
diff --git a/lib/Target/X86/AsmPrinter/X86AsmPrinter.cpp b/lib/Target/X86/AsmPrinter/X86AsmPrinter.cpp
index 183213d324b3..73bc603f18f1 100644
--- a/lib/Target/X86/AsmPrinter/X86AsmPrinter.cpp
+++ b/lib/Target/X86/AsmPrinter/X86AsmPrinter.cpp
@@ -200,6 +200,11 @@ void X86AsmPrinter::printSymbolOperand(const MachineOperand &MO,
case X86II::MO_GOT: O << "@GOT"; break;
case X86II::MO_GOTOFF: O << "@GOTOFF"; break;
case X86II::MO_PLT: O << "@PLT"; break;
+ case X86II::MO_TLVP: O << "@TLVP"; break;
+ case X86II::MO_TLVP_PIC_BASE:
+ O << "@TLVP" << '-';
+ PrintPICBaseSymbol(O);
+ break;
}
}
@@ -383,6 +388,8 @@ bool X86AsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
}
if (MO.isGlobal() || MO.isCPI() || MO.isJTI() || MO.isSymbol()) {
printSymbolOperand(MO, O);
+ if (Subtarget->isPICStyleRIPRel())
+ O << "(%rip)";
return false;
}
if (MO.isReg()) {
diff --git a/lib/Target/X86/AsmPrinter/X86IntelInstPrinter.cpp b/lib/Target/X86/AsmPrinter/X86IntelInstPrinter.cpp
index 7e0a9bb891fa..a632047f6592 100644
--- a/lib/Target/X86/AsmPrinter/X86IntelInstPrinter.cpp
+++ b/lib/Target/X86/AsmPrinter/X86IntelInstPrinter.cpp
@@ -81,12 +81,19 @@ void X86IntelInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
}
}
-void X86IntelInstPrinter::printLeaMemReference(const MCInst *MI, unsigned Op,
- raw_ostream &O) {
+void X86IntelInstPrinter::printMemReference(const MCInst *MI, unsigned Op,
+ raw_ostream &O) {
const MCOperand &BaseReg = MI->getOperand(Op);
unsigned ScaleVal = MI->getOperand(Op+1).getImm();
const MCOperand &IndexReg = MI->getOperand(Op+2);
const MCOperand &DispSpec = MI->getOperand(Op+3);
+ const MCOperand &SegReg = MI->getOperand(Op+4);
+
+ // If this has a segment register, print it.
+ if (SegReg.getReg()) {
+ printOperand(MI, Op+4, O);
+ O << ':';
+ }
O << '[';
@@ -104,7 +111,7 @@ void X86IntelInstPrinter::printLeaMemReference(const MCInst *MI, unsigned Op,
NeedPlus = true;
}
-
+
if (!DispSpec.isImm()) {
if (NeedPlus) O << " + ";
assert(DispSpec.isExpr() && "non-immediate displacement for LEA?");
@@ -126,13 +133,3 @@ void X86IntelInstPrinter::printLeaMemReference(const MCInst *MI, unsigned Op,
O << ']';
}
-
-void X86IntelInstPrinter::printMemReference(const MCInst *MI, unsigned Op,
- raw_ostream &O) {
- // If this has a segment register, print it.
- if (MI->getOperand(Op+4).getReg()) {
- printOperand(MI, Op+4, O);
- O << ':';
- }
- printLeaMemReference(MI, Op, O);
-}
diff --git a/lib/Target/X86/AsmPrinter/X86IntelInstPrinter.h b/lib/Target/X86/AsmPrinter/X86IntelInstPrinter.h
index a0beeb202e72..4d680744dd60 100644
--- a/lib/Target/X86/AsmPrinter/X86IntelInstPrinter.h
+++ b/lib/Target/X86/AsmPrinter/X86IntelInstPrinter.h
@@ -36,7 +36,6 @@ public:
void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
void printMemReference(const MCInst *MI, unsigned Op, raw_ostream &O);
- void printLeaMemReference(const MCInst *MI, unsigned Op, raw_ostream &O);
void printSSECC(const MCInst *MI, unsigned Op, raw_ostream &O);
void print_pcrel_imm(const MCInst *MI, unsigned OpNo, raw_ostream &O);
@@ -81,17 +80,9 @@ public:
O << "XMMWORD PTR ";
printMemReference(MI, OpNo, O);
}
- void printlea32mem(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
- O << "DWORD PTR ";
- printLeaMemReference(MI, OpNo, O);
- }
- void printlea64mem(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
- O << "QWORD PTR ";
- printLeaMemReference(MI, OpNo, O);
- }
- void printlea64_32mem(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
- O << "QWORD PTR ";
- printLeaMemReference(MI, OpNo, O);
+ void printf256mem(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
+ O << "YMMWORD PTR ";
+ printMemReference(MI, OpNo, O);
}
};
diff --git a/lib/Target/X86/AsmPrinter/X86MCInstLower.cpp b/lib/Target/X86/AsmPrinter/X86MCInstLower.cpp
index 4edeca979872..09f150bb7946 100644
--- a/lib/Target/X86/AsmPrinter/X86MCInstLower.cpp
+++ b/lib/Target/X86/AsmPrinter/X86MCInstLower.cpp
@@ -152,6 +152,17 @@ MCOperand X86MCInstLower::LowerSymbolOperand(const MachineOperand &MO,
case X86II::MO_DARWIN_STUB:
break;
+ case X86II::MO_TLVP: RefKind = MCSymbolRefExpr::VK_TLVP; break;
+ case X86II::MO_TLVP_PIC_BASE:
+ Expr = MCSymbolRefExpr::Create(Sym, MCSymbolRefExpr::VK_TLVP, Ctx);
+ // Subtract the pic base.
+ Expr
+ = MCBinaryExpr::CreateSub(Expr,
+ MCSymbolRefExpr::Create(GetPICBaseSymbol(),
+ Ctx),
+ Ctx);
+
+ break;
case X86II::MO_TLSGD: RefKind = MCSymbolRefExpr::VK_TLSGD; break;
case X86II::MO_GOTTPOFF: RefKind = MCSymbolRefExpr::VK_GOTTPOFF; break;
case X86II::MO_INDNTPOFF: RefKind = MCSymbolRefExpr::VK_INDNTPOFF; break;
@@ -266,10 +277,21 @@ static void SimplifyShortMoveForm(MCInst &Inst, unsigned Opcode) {
return;
// Check whether this is an absolute address.
- if (Inst.getOperand(AddrBase + 0).getReg() != 0 ||
- Inst.getOperand(AddrBase + 2).getReg() != 0 ||
- Inst.getOperand(AddrBase + 4).getReg() != 0 ||
- Inst.getOperand(AddrBase + 1).getImm() != 1)
+ // FIXME: We know TLVP symbol refs aren't, but there should be a better way
+ // to do this here.
+ bool Absolute = true;
+ if (Inst.getOperand(AddrOp).isExpr()) {
+ const MCExpr *MCE = Inst.getOperand(AddrOp).getExpr();
+ if (const MCSymbolRefExpr *SRE = dyn_cast<MCSymbolRefExpr>(MCE))
+ if (SRE->getKind() == MCSymbolRefExpr::VK_TLVP)
+ Absolute = false;
+ }
+
+ if (Absolute &&
+ (Inst.getOperand(AddrBase + 0).getReg() != 0 ||
+ Inst.getOperand(AddrBase + 2).getReg() != 0 ||
+ Inst.getOperand(AddrBase + 4).getReg() != 0 ||
+ Inst.getOperand(AddrBase + 1).getImm() != 1))
return;
// If so, rewrite the instruction.
@@ -327,6 +349,15 @@ void X86MCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const {
switch (OutMI.getOpcode()) {
case X86::LEA64_32r: // Handle 'subreg rewriting' for the lea64_32mem operand.
lower_lea64_32mem(&OutMI, 1);
+ // FALL THROUGH.
+ case X86::LEA64r:
+ case X86::LEA16r:
+ case X86::LEA32r:
+ // LEA should have a segment register, but it must be empty.
+ assert(OutMI.getNumOperands() == 1+X86::AddrNumOperands &&
+ "Unexpected # of LEA operands");
+ assert(OutMI.getOperand(1+X86::AddrSegmentReg).getReg() == 0 &&
+ "LEA has segment specified!");
break;
case X86::MOVZX16rr8: LowerSubReg32_Op0(OutMI, X86::MOVZX32rr8); break;
case X86::MOVZX16rm8: LowerSubReg32_Op0(OutMI, X86::MOVZX32rm8); break;
@@ -364,10 +395,9 @@ void X86MCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const {
LowerUnaryToTwoAddr(OutMI, X86::XOR32rr); // MOV32r0 -> XOR32rr
break;
- // TAILJMPr, TAILJMPr64, CALL64r, CALL64pcrel32 - These instructions have
+ // TAILJMPr64, CALL64r, CALL64pcrel32 - These instructions have
// register inputs modeled as normal uses instead of implicit uses. As such,
// truncate off all but the first operand (the callee). FIXME: Change isel.
- case X86::TAILJMPr:
case X86::TAILJMPr64:
case X86::CALL64r:
case X86::CALL64pcrel32: {
@@ -380,11 +410,20 @@ void X86MCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const {
}
// TAILJMPd, TAILJMPd64 - Lower to the correct jump instructions.
+ case X86::TAILJMPr:
case X86::TAILJMPd:
case X86::TAILJMPd64: {
+ unsigned Opcode;
+ switch (OutMI.getOpcode()) {
+ default: assert(0 && "Invalid opcode");
+ case X86::TAILJMPr: Opcode = X86::JMP32r; break;
+ case X86::TAILJMPd:
+ case X86::TAILJMPd64: Opcode = X86::JMP_1; break;
+ }
+
MCOperand Saved = OutMI.getOperand(0);
OutMI = MCInst();
- OutMI.setOpcode(X86::TAILJMP_1);
+ OutMI.setOpcode(Opcode);
OutMI.addOperand(Saved);
break;
}
@@ -483,8 +522,12 @@ void X86AsmPrinter::PrintDebugValueComment(const MachineInstr *MI,
O << V.getName();
O << " <- ";
// Frame address. Currently handles register +- offset only.
- assert(MI->getOperand(0).isReg() && MI->getOperand(3).isImm());
- O << '['; printOperand(MI, 0, O); O << '+'; printOperand(MI, 3, O);
+ O << '[';
+ if (MI->getOperand(0).isReg() && MI->getOperand(0).getReg())
+ printOperand(MI, 0, O);
+ else
+ O << "undef";
+ O << '+'; printOperand(MI, 3, O);
O << ']';
O << "+";
printOperand(MI, NOps-2, O);
@@ -495,8 +538,9 @@ X86AsmPrinter::getDebugValueLocation(const MachineInstr *MI) const {
MachineLocation Location;
assert (MI->getNumOperands() == 7 && "Invalid no. of machine operands!");
// Frame address. Currently handles register +- offset only.
- assert(MI->getOperand(0).isReg() && MI->getOperand(3).isImm());
- Location.set(MI->getOperand(0).getReg(), MI->getOperand(3).getImm());
+
+ if (MI->getOperand(0).isReg() && MI->getOperand(3).isImm())
+ Location.set(MI->getOperand(0).getReg(), MI->getOperand(3).getImm());
return Location;
}
@@ -513,6 +557,13 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) {
}
return;
+ case X86::TAILJMPr:
+ case X86::TAILJMPd:
+ case X86::TAILJMPd64:
+ // Lower these as normal, but add some comments.
+ OutStreamer.AddComment("TAILCALL");
+ break;
+
case X86::MOVPC32r: {
MCInst TmpInst;
// This is a pseudo op for a two instruction sequence with a label, which
@@ -578,7 +629,6 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) {
MCInst TmpInst;
MCInstLowering.Lower(MI, TmpInst);
-
OutStreamer.EmitInstruction(TmpInst);
}
diff --git a/lib/Target/X86/Disassembler/CMakeLists.txt b/lib/Target/X86/Disassembler/CMakeLists.txt
index 9f91060179e6..97589c00515b 100644
--- a/lib/Target/X86/Disassembler/CMakeLists.txt
+++ b/lib/Target/X86/Disassembler/CMakeLists.txt
@@ -4,8 +4,8 @@ add_llvm_library(LLVMX86Disassembler
X86Disassembler.cpp
X86DisassemblerDecoder.c
)
-# workaround for hanging compilation on MSVC9
-if( MSVC_VERSION EQUAL 1500 )
+# workaround for hanging compilation on MSVC9 and 10
+if( MSVC_VERSION EQUAL 1500 OR MSVC_VERSION EQUAL 1600 )
set_property(
SOURCE X86Disassembler.cpp
PROPERTY COMPILE_FLAGS "/Od"
diff --git a/lib/Target/X86/Disassembler/X86Disassembler.cpp b/lib/Target/X86/Disassembler/X86Disassembler.cpp
index 8a5a6308704a..09f1584ce4d9 100644
--- a/lib/Target/X86/Disassembler/X86Disassembler.cpp
+++ b/lib/Target/X86/Disassembler/X86Disassembler.cpp
@@ -252,13 +252,8 @@ static bool translateRMRegister(MCInst &mcInst,
/// @param mcInst - The MCInst to append to.
/// @param insn - The instruction to extract Mod, R/M, and SIB fields
/// from.
-/// @param sr - Whether or not to emit the segment register. The
-/// LEA instruction does not expect a segment-register
-/// operand.
/// @return - 0 on success; nonzero otherwise
-static bool translateRMMemory(MCInst &mcInst,
- InternalInstruction &insn,
- bool sr) {
+static bool translateRMMemory(MCInst &mcInst, InternalInstruction &insn) {
// Addresses in an MCInst are represented as five operands:
// 1. basereg (register) The R/M base, or (if there is a SIB) the
// SIB base
@@ -385,10 +380,7 @@ static bool translateRMMemory(MCInst &mcInst,
mcInst.addOperand(scaleAmount);
mcInst.addOperand(indexReg);
mcInst.addOperand(displacement);
-
- if (sr)
- mcInst.addOperand(segmentReg);
-
+ mcInst.addOperand(segmentReg);
return false;
}
@@ -439,9 +431,8 @@ static bool translateRM(MCInst &mcInst,
case TYPE_M1616:
case TYPE_M1632:
case TYPE_M1664:
- return translateRMMemory(mcInst, insn, true);
case TYPE_LEA:
- return translateRMMemory(mcInst, insn, false);
+ return translateRMMemory(mcInst, insn);
}
}
diff --git a/lib/Target/X86/README-SSE.txt b/lib/Target/X86/README-SSE.txt
index e5f84e8b169e..b6aba93f3738 100644
--- a/lib/Target/X86/README-SSE.txt
+++ b/lib/Target/X86/README-SSE.txt
@@ -36,62 +36,6 @@ The pattern isel got this one right.
//===---------------------------------------------------------------------===//
-SSE doesn't have [mem] op= reg instructions. If we have an SSE instruction
-like this:
-
- X += y
-
-and the register allocator decides to spill X, it is cheaper to emit this as:
-
-Y += [xslot]
-store Y -> [xslot]
-
-than as:
-
-tmp = [xslot]
-tmp += y
-store tmp -> [xslot]
-
-..and this uses one fewer register (so this should be done at load folding
-time, not at spiller time). *Note* however that this can only be done
-if Y is dead. Here's a testcase:
-
-@.str_3 = external global [15 x i8]
-declare void @printf(i32, ...)
-define void @main() {
-build_tree.exit:
- br label %no_exit.i7
-
-no_exit.i7: ; preds = %no_exit.i7, %build_tree.exit
- %tmp.0.1.0.i9 = phi double [ 0.000000e+00, %build_tree.exit ],
- [ %tmp.34.i18, %no_exit.i7 ]
- %tmp.0.0.0.i10 = phi double [ 0.000000e+00, %build_tree.exit ],
- [ %tmp.28.i16, %no_exit.i7 ]
- %tmp.28.i16 = fadd double %tmp.0.0.0.i10, 0.000000e+00
- %tmp.34.i18 = fadd double %tmp.0.1.0.i9, 0.000000e+00
- br i1 false, label %Compute_Tree.exit23, label %no_exit.i7
-
-Compute_Tree.exit23: ; preds = %no_exit.i7
- tail call void (i32, ...)* @printf( i32 0 )
- store double %tmp.34.i18, double* null
- ret void
-}
-
-We currently emit:
-
-.BBmain_1:
- xorpd %XMM1, %XMM1
- addsd %XMM0, %XMM1
-*** movsd %XMM2, QWORD PTR [%ESP + 8]
-*** addsd %XMM2, %XMM1
-*** movsd QWORD PTR [%ESP + 8], %XMM2
- jmp .BBmain_1 # no_exit.i7
-
-This is a bugpoint reduced testcase, which is why the testcase doesn't make
-much sense (e.g. its an infinite loop). :)
-
-//===---------------------------------------------------------------------===//
-
SSE should implement 'select_cc' using 'emulated conditional moves' that use
pcmp/pand/pandn/por to do a selection instead of a conditional branch:
@@ -122,12 +66,6 @@ LBB_X_2:
//===---------------------------------------------------------------------===//
-It's not clear whether we should use pxor or xorps / xorpd to clear XMM
-registers. The choice may depend on subtarget information. We should do some
-more experiments on different x86 machines.
-
-//===---------------------------------------------------------------------===//
-
Lower memcpy / memset to a series of SSE 128 bit move instructions when it's
feasible.
@@ -151,45 +89,6 @@ Perhaps use pxor / xorp* to clear a XMM register first?
//===---------------------------------------------------------------------===//
-How to decide when to use the "floating point version" of logical ops? Here are
-some code fragments:
-
- movaps LCPI5_5, %xmm2
- divps %xmm1, %xmm2
- mulps %xmm2, %xmm3
- mulps 8656(%ecx), %xmm3
- addps 8672(%ecx), %xmm3
- andps LCPI5_6, %xmm2
- andps LCPI5_1, %xmm3
- por %xmm2, %xmm3
- movdqa %xmm3, (%edi)
-
- movaps LCPI5_5, %xmm1
- divps %xmm0, %xmm1
- mulps %xmm1, %xmm3
- mulps 8656(%ecx), %xmm3
- addps 8672(%ecx), %xmm3
- andps LCPI5_6, %xmm1
- andps LCPI5_1, %xmm3
- orps %xmm1, %xmm3
- movaps %xmm3, 112(%esp)
- movaps %xmm3, (%ebx)
-
-Due to some minor source change, the later case ended up using orps and movaps
-instead of por and movdqa. Does it matter?
-
-//===---------------------------------------------------------------------===//
-
-X86RegisterInfo::copyRegToReg() returns X86::MOVAPSrr for VR128. Is it possible
-to choose between movaps, movapd, and movdqa based on types of source and
-destination?
-
-How about andps, andpd, and pand? Do we really care about the type of the packed
-elements? If not, why not always use the "ps" variants which are likely to be
-shorter.
-
-//===---------------------------------------------------------------------===//
-
External test Nurbs exposed some problems. Look for
__ZN15Nurbs_SSE_Cubic17TessellateSurfaceE, bb cond_next140. This is what icc
emits:
@@ -278,41 +177,6 @@ It also exposes some other problems. See MOV32ri -3 and the spills.
//===---------------------------------------------------------------------===//
-http://gcc.gnu.org/bugzilla/show_bug.cgi?id=25500
-
-LLVM is producing bad code.
-
-LBB_main_4: # cond_true44
- addps %xmm1, %xmm2
- subps %xmm3, %xmm2
- movaps (%ecx), %xmm4
- movaps %xmm2, %xmm1
- addps %xmm4, %xmm1
- addl $16, %ecx
- incl %edx
- cmpl $262144, %edx
- movaps %xmm3, %xmm2
- movaps %xmm4, %xmm3
- jne LBB_main_4 # cond_true44
-
-There are two problems. 1) No need to two loop induction variables. We can
-compare against 262144 * 16. 2) Known register coalescer issue. We should
-be able eliminate one of the movaps:
-
- addps %xmm2, %xmm1 <=== Commute!
- subps %xmm3, %xmm1
- movaps (%ecx), %xmm4
- movaps %xmm1, %xmm1 <=== Eliminate!
- addps %xmm4, %xmm1
- addl $16, %ecx
- incl %edx
- cmpl $262144, %edx
- movaps %xmm3, %xmm2
- movaps %xmm4, %xmm3
- jne LBB_main_4 # cond_true44
-
-//===---------------------------------------------------------------------===//
-
Consider:
__m128 test(float a) {
@@ -382,22 +246,6 @@ elements are fixed zeros.
//===---------------------------------------------------------------------===//
-__m128d test1( __m128d A, __m128d B) {
- return _mm_shuffle_pd(A, B, 0x3);
-}
-
-compiles to
-
-shufpd $3, %xmm1, %xmm0
-
-Perhaps it's better to use unpckhpd instead?
-
-unpckhpd %xmm1, %xmm0
-
-Don't know if unpckhpd is faster. But it is shorter.
-
-//===---------------------------------------------------------------------===//
-
This code generates ugly code, probably due to costs being off or something:
define void @test(float* %P, <4 x float>* %P2 ) {
@@ -549,6 +397,7 @@ entry:
%tmp20 = tail call i64 @ccoshf( float %tmp6, float %z.0 ) nounwind readonly
ret i64 %tmp20
}
+declare i64 @ccoshf(float %z.0, float %z.1) nounwind readonly
This currently compiles to:
@@ -987,3 +836,34 @@ This would be better kept in the SSE unit by treating XMM0 as a 4xfloat and
doing a shuffle from v[1] to v[0] then a float store.
//===---------------------------------------------------------------------===//
+
+On SSE4 machines, we compile this code:
+
+define <2 x float> @test2(<2 x float> %Q, <2 x float> %R,
+ <2 x float> *%P) nounwind {
+ %Z = fadd <2 x float> %Q, %R
+
+ store <2 x float> %Z, <2 x float> *%P
+ ret <2 x float> %Z
+}
+
+into:
+
+_test2: ## @test2
+## BB#0:
+ insertps $0, %xmm2, %xmm2
+ insertps $16, %xmm3, %xmm2
+ insertps $0, %xmm0, %xmm3
+ insertps $16, %xmm1, %xmm3
+ addps %xmm2, %xmm3
+ movq %xmm3, (%rdi)
+ movaps %xmm3, %xmm0
+ pshufd $1, %xmm3, %xmm1
+ ## kill: XMM1<def> XMM1<kill>
+ ret
+
+The insertps's of $0 are pointless complex copies.
+
+//===---------------------------------------------------------------------===//
+
+
diff --git a/lib/Target/X86/README-X86-64.txt b/lib/Target/X86/README-X86-64.txt
index e8f7c5d6dd22..78c4dc00ee72 100644
--- a/lib/Target/X86/README-X86-64.txt
+++ b/lib/Target/X86/README-X86-64.txt
@@ -1,27 +1,5 @@
//===- README_X86_64.txt - Notes for X86-64 code gen ----------------------===//
-Implement different PIC models? Right now we only support Mac OS X with small
-PIC code model.
-
-//===---------------------------------------------------------------------===//
-
-For this:
-
-extern void xx(void);
-void bar(void) {
- xx();
-}
-
-gcc compiles to:
-
-.globl _bar
-_bar:
- jmp _xx
-
-We need to do the tailcall optimization as well.
-
-//===---------------------------------------------------------------------===//
-
AMD64 Optimization Manual 8.2 has some nice information about optimizing integer
multiplication by a constant. How much of it applies to Intel's X86-64
implementation? There are definite trade-offs to consider: latency vs. register
@@ -96,123 +74,14 @@ gcc:
movq %rax, (%rdx)
ret
-//===---------------------------------------------------------------------===//
-
-Vararg function prologue can be further optimized. Currently all XMM registers
-are stored into register save area. Most of them can be eliminated since the
-upper bound of the number of XMM registers used are passed in %al. gcc produces
-something like the following:
-
- movzbl %al, %edx
- leaq 0(,%rdx,4), %rax
- leaq 4+L2(%rip), %rdx
- leaq 239(%rsp), %rax
- jmp *%rdx
- movaps %xmm7, -15(%rax)
- movaps %xmm6, -31(%rax)
- movaps %xmm5, -47(%rax)
- movaps %xmm4, -63(%rax)
- movaps %xmm3, -79(%rax)
- movaps %xmm2, -95(%rax)
- movaps %xmm1, -111(%rax)
- movaps %xmm0, -127(%rax)
-L2:
-
-It jumps over the movaps that do not need to be stored. Hard to see this being
-significant as it added 5 instruciton (including a indirect branch) to avoid
-executing 0 to 8 stores in the function prologue.
-
-Perhaps we can optimize for the common case where no XMM registers are used for
-parameter passing. i.e. is %al == 0 jump over all stores. Or in the case of a
-leaf function where we can determine that no XMM input parameter is need, avoid
-emitting the stores at all.
-
-//===---------------------------------------------------------------------===//
+And the codegen is even worse for the following
+(from http://gcc.gnu.org/bugzilla/show_bug.cgi?id=33103):
+ void fill1(char *s, int a)
+ {
+ __builtin_memset(s, a, 15);
+ }
-AMD64 has a complex calling convention for aggregate passing by value:
-
-1. If the size of an object is larger than two eightbytes, or in C++, is a non-
- POD structure or union type, or contains unaligned fields, it has class
- MEMORY.
-2. Both eightbytes get initialized to class NO_CLASS.
-3. Each field of an object is classified recursively so that always two fields
- are considered. The resulting class is calculated according to the classes
- of the fields in the eightbyte:
- (a) If both classes are equal, this is the resulting class.
- (b) If one of the classes is NO_CLASS, the resulting class is the other
- class.
- (c) If one of the classes is MEMORY, the result is the MEMORY class.
- (d) If one of the classes is INTEGER, the result is the INTEGER.
- (e) If one of the classes is X87, X87UP, COMPLEX_X87 class, MEMORY is used as
- class.
- (f) Otherwise class SSE is used.
-4. Then a post merger cleanup is done:
- (a) If one of the classes is MEMORY, the whole argument is passed in memory.
- (b) If SSEUP is not preceeded by SSE, it is converted to SSE.
-
-Currently llvm frontend does not handle this correctly.
-
-Problem 1:
- typedef struct { int i; double d; } QuadWordS;
-It is currently passed in two i64 integer registers. However, gcc compiled
-callee expects the second element 'd' to be passed in XMM0.
-
-Problem 2:
- typedef struct { int32_t i; float j; double d; } QuadWordS;
-The size of the first two fields == i64 so they will be combined and passed in
-a integer register RDI. The third field is still passed in XMM0.
-
-Problem 3:
- typedef struct { int64_t i; int8_t j; int64_t d; } S;
- void test(S s)
-The size of this aggregate is greater than two i64 so it should be passed in
-memory. Currently llvm breaks this down and passed it in three integer
-registers.
-
-Problem 4:
-Taking problem 3 one step ahead where a function expects a aggregate value
-in memory followed by more parameter(s) passed in register(s).
- void test(S s, int b)
-
-LLVM IR does not allow parameter passing by aggregates, therefore it must break
-the aggregates value (in problem 3 and 4) into a number of scalar values:
- void %test(long %s.i, byte %s.j, long %s.d);
-
-However, if the backend were to lower this code literally it would pass the 3
-values in integer registers. To force it be passed in memory, the frontend
-should change the function signiture to:
- void %test(long %undef1, long %undef2, long %undef3, long %undef4,
- long %undef5, long %undef6,
- long %s.i, byte %s.j, long %s.d);
-And the callee would look something like this:
- call void %test( undef, undef, undef, undef, undef, undef,
- %tmp.s.i, %tmp.s.j, %tmp.s.d );
-The first 6 undef parameters would exhaust the 6 integer registers used for
-parameter passing. The following three integer values would then be forced into
-memory.
-
-For problem 4, the parameter 'd' would be moved to the front of the parameter
-list so it will be passed in register:
- void %test(int %d,
- long %undef1, long %undef2, long %undef3, long %undef4,
- long %undef5, long %undef6,
- long %s.i, byte %s.j, long %s.d);
-
-//===---------------------------------------------------------------------===//
-
-Right now the asm printer assumes GlobalAddress are accessed via RIP relative
-addressing. Therefore, it is not possible to generate this:
- movabsq $__ZTV10polynomialIdE+16, %rax
-
-That is ok for now since we currently only support small model. So the above
-is selected as
- leaq __ZTV10polynomialIdE+16(%rip), %rax
-
-This is probably slightly slower but is much shorter than movabsq. However, if
-we were to support medium or larger code models, we need to use the movabs
-instruction. We should probably introduce something like AbsoluteAddress to
-distinguish it from GlobalAddress so the asm printer and JIT code emitter can
-do the right thing.
+For this version, we duplicate the computation of the constant to store.
//===---------------------------------------------------------------------===//
@@ -298,3 +167,107 @@ be able to recognize the zero extend. This could also presumably be implemented
if we have whole-function selectiondags.
//===---------------------------------------------------------------------===//
+
+Take the following C code
+(from http://gcc.gnu.org/bugzilla/show_bug.cgi?id=43640):
+
+struct u1
+{
+ float x;
+ float y;
+};
+
+float foo(struct u1 u)
+{
+ return u.x + u.y;
+}
+
+Optimizes to the following IR:
+define float @foo(double %u.0) nounwind readnone {
+entry:
+ %tmp8 = bitcast double %u.0 to i64 ; <i64> [#uses=2]
+ %tmp6 = trunc i64 %tmp8 to i32 ; <i32> [#uses=1]
+ %tmp7 = bitcast i32 %tmp6 to float ; <float> [#uses=1]
+ %tmp2 = lshr i64 %tmp8, 32 ; <i64> [#uses=1]
+ %tmp3 = trunc i64 %tmp2 to i32 ; <i32> [#uses=1]
+ %tmp4 = bitcast i32 %tmp3 to float ; <float> [#uses=1]
+ %0 = fadd float %tmp7, %tmp4 ; <float> [#uses=1]
+ ret float %0
+}
+
+And current llvm-gcc/clang output:
+ movd %xmm0, %rax
+ movd %eax, %xmm1
+ shrq $32, %rax
+ movd %eax, %xmm0
+ addss %xmm1, %xmm0
+ ret
+
+We really shouldn't move the floats to RAX, only to immediately move them
+straight back to the XMM registers.
+
+There really isn't any good way to handle this purely in IR optimizers; it
+could possibly be handled by changing the output of the fronted, though. It
+would also be feasible to add a x86-specific DAGCombine to optimize the
+bitcast+trunc+(lshr+)bitcast combination.
+
+//===---------------------------------------------------------------------===//
+
+Take the following code
+(from http://gcc.gnu.org/bugzilla/show_bug.cgi?id=34653):
+extern unsigned long table[];
+unsigned long foo(unsigned char *p) {
+ unsigned long tag = *p;
+ return table[tag >> 4] + table[tag & 0xf];
+}
+
+Current code generated:
+ movzbl (%rdi), %eax
+ movq %rax, %rcx
+ andq $240, %rcx
+ shrq %rcx
+ andq $15, %rax
+ movq table(,%rax,8), %rax
+ addq table(%rcx), %rax
+ ret
+
+Issues:
+1. First movq should be movl; saves a byte.
+2. Both andq's should be andl; saves another two bytes. I think this was
+ implemented at one point, but subsequently regressed.
+3. shrq should be shrl; saves another byte.
+4. The first andq can be completely eliminated by using a slightly more
+ expensive addressing mode.
+
+//===---------------------------------------------------------------------===//
+
+Consider the following (contrived testcase, but contains common factors):
+
+#include <stdarg.h>
+int test(int x, ...) {
+ int sum, i;
+ va_list l;
+ va_start(l, x);
+ for (i = 0; i < x; i++)
+ sum += va_arg(l, int);
+ va_end(l);
+ return sum;
+}
+
+Testcase given in C because fixing it will likely involve changing the IR
+generated for it. The primary issue with the result is that it doesn't do any
+of the optimizations which are possible if we know the address of a va_list
+in the current function is never taken:
+1. We shouldn't spill the XMM registers because we only call va_arg with "int".
+2. It would be nice if we could scalarrepl the va_list.
+3. Probably overkill, but it'd be cool if we could peel off the first five
+iterations of the loop.
+
+Other optimizations involving functions which use va_arg on floats which don't
+have the address of a va_list taken:
+1. Conversely to the above, we shouldn't spill general registers if we only
+ call va_arg on "double".
+2. If we know nothing more than 64 bits wide is read from the XMM registers,
+ we can change the spilling code to reduce the amount of stack used by half.
+
+//===---------------------------------------------------------------------===//
diff --git a/lib/Target/X86/README.txt b/lib/Target/X86/README.txt
index d4545a6fcfd3..efc0cd82f23e 100644
--- a/lib/Target/X86/README.txt
+++ b/lib/Target/X86/README.txt
@@ -1103,57 +1103,6 @@ be folded into: shl [mem], 1
//===---------------------------------------------------------------------===//
-This testcase misses a read/modify/write opportunity (from PR1425):
-
-void vertical_decompose97iH1(int *b0, int *b1, int *b2, int width){
- int i;
- for(i=0; i<width; i++)
- b1[i] += (1*(b0[i] + b2[i])+0)>>0;
-}
-
-We compile it down to:
-
-LBB1_2: # bb
- movl (%esi,%edi,4), %ebx
- addl (%ecx,%edi,4), %ebx
- addl (%edx,%edi,4), %ebx
- movl %ebx, (%ecx,%edi,4)
- incl %edi
- cmpl %eax, %edi
- jne LBB1_2 # bb
-
-the inner loop should add to the memory location (%ecx,%edi,4), saving
-a mov. Something like:
-
- movl (%esi,%edi,4), %ebx
- addl (%edx,%edi,4), %ebx
- addl %ebx, (%ecx,%edi,4)
-
-Here is another interesting example:
-
-void vertical_compose97iH1(int *b0, int *b1, int *b2, int width){
- int i;
- for(i=0; i<width; i++)
- b1[i] -= (1*(b0[i] + b2[i])+0)>>0;
-}
-
-We miss the r/m/w opportunity here by using 2 subs instead of an add+sub[mem]:
-
-LBB9_2: # bb
- movl (%ecx,%edi,4), %ebx
- subl (%esi,%edi,4), %ebx
- subl (%edx,%edi,4), %ebx
- movl %ebx, (%ecx,%edi,4)
- incl %edi
- cmpl %eax, %edi
- jne LBB9_2 # bb
-
-Additionally, LSR should rewrite the exit condition of these loops to use
-a stride-4 IV, would would allow all the scales in the loop to go away.
-This would result in smaller code and more efficient microops.
-
-//===---------------------------------------------------------------------===//
-
In SSE mode, we turn abs and neg into a load from the constant pool plus a xor
or and instruction, for example:
@@ -1301,15 +1250,8 @@ FirstOnet:
xorl %eax, %eax
ret
-There are a few possible improvements here:
-1. We should be able to eliminate the dead load into %ecx
-2. We could change the "movl 8(%esp), %eax" into
- "movzwl 10(%esp), %eax"; this lets us change the cmpl
- into a testl, which is shorter, and eliminate the shift.
-
-We could also in theory eliminate the branch by using a conditional
-for the address of the load, but that seems unlikely to be worthwhile
-in general.
+We could change the "movl 8(%esp), %eax" into "movzwl 10(%esp), %eax"; this
+lets us change the cmpl into a testl, which is shorter, and eliminate the shift.
//===---------------------------------------------------------------------===//
@@ -1331,22 +1273,23 @@ bb7: ; preds = %entry
to:
-_foo:
+foo: # @foo
+# BB#0: # %entry
+ movl 4(%esp), %ecx
cmpb $0, 16(%esp)
- movl 12(%esp), %ecx
+ je .LBB0_2
+# BB#1: # %bb
movl 8(%esp), %eax
- movl 4(%esp), %edx
- je LBB1_2 # bb7
-LBB1_1: # bb
- addl %edx, %eax
+ addl %ecx, %eax
ret
-LBB1_2: # bb7
- movl %edx, %eax
- subl %ecx, %eax
+.LBB0_2: # %bb7
+ movl 12(%esp), %edx
+ movl %ecx, %eax
+ subl %edx, %eax
ret
-The coalescer could coalesce "edx" with "eax" to avoid the movl in LBB1_2
-if it commuted the addl in LBB1_1.
+There's an obviously unnecessary movl in .LBB0_2, and we could eliminate a
+couple more movls by putting 4(%esp) into %eax instead of %ecx.
//===---------------------------------------------------------------------===//
@@ -1396,8 +1339,7 @@ Also check why xmm7 is not used at all in the function.
//===---------------------------------------------------------------------===//
-Legalize loses track of the fact that bools are always zero extended when in
-memory. This causes us to compile abort_gzip (from 164.gzip) from:
+Take the following:
target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
target triple = "i386-apple-darwin8"
@@ -1416,16 +1358,15 @@ bb4.i: ; preds = %entry
}
declare void @exit(i32) noreturn nounwind
-into:
-
-_abort_gzip:
+This compiles into:
+_abort_gzip: ## @abort_gzip
+## BB#0: ## %entry
subl $12, %esp
movb _in_exit.4870.b, %al
- notb %al
- testb $1, %al
- jne LBB1_2 ## bb4.i
-LBB1_1: ## bb.i
- ...
+ cmpb $1, %al
+ jne LBB0_2
+
+We somehow miss folding the movb into the cmpb.
//===---------------------------------------------------------------------===//
diff --git a/lib/Target/X86/X86.h b/lib/Target/X86/X86.h
index 22e89a57f63d..677781d3730e 100644
--- a/lib/Target/X86/X86.h
+++ b/lib/Target/X86/X86.h
@@ -35,6 +35,10 @@ class formatted_raw_ostream;
FunctionPass *createX86ISelDag(X86TargetMachine &TM,
CodeGenOpt::Level OptLevel);
+/// createGlobalBaseRegPass - This pass initializes a global base
+/// register for PIC on x86-32.
+FunctionPass* createGlobalBaseRegPass();
+
/// createX86FloatingPointStackifierPass - This function returns a pass which
/// converts floating point register references and pseudo instructions into
/// floating point stack references and physical instructions.
diff --git a/lib/Target/X86/X86AsmBackend.cpp b/lib/Target/X86/X86AsmBackend.cpp
index 151087f58ed0..2cf65c11f94a 100644
--- a/lib/Target/X86/X86AsmBackend.cpp
+++ b/lib/Target/X86/X86AsmBackend.cpp
@@ -23,13 +23,13 @@
#include "llvm/Target/TargetAsmBackend.h"
using namespace llvm;
-namespace {
static unsigned getFixupKindLog2Size(unsigned Kind) {
switch (Kind) {
default: assert(0 && "invalid fixup kind!");
case X86::reloc_pcrel_1byte:
case FK_Data_1: return 0;
+ case X86::reloc_pcrel_2byte:
case FK_Data_2: return 1;
case X86::reloc_pcrel_4byte:
case X86::reloc_riprel_4byte:
@@ -39,6 +39,7 @@ static unsigned getFixupKindLog2Size(unsigned Kind) {
}
}
+namespace {
class X86AsmBackend : public TargetAsmBackend {
public:
X86AsmBackend(const Target &T)
@@ -60,6 +61,7 @@ public:
bool WriteNopData(uint64_t Count, MCObjectWriter *OW) const;
};
+} // end anonymous namespace
static unsigned getRelaxedOpcode(unsigned Op) {
switch (Op) {
@@ -75,7 +77,6 @@ static unsigned getRelaxedOpcode(unsigned Op) {
case X86::JG_1: return X86::JG_4;
case X86::JLE_1: return X86::JLE_4;
case X86::JL_1: return X86::JL_4;
- case X86::TAILJMP_1:
case X86::JMP_1: return X86::JMP_4;
case X86::JNE_1: return X86::JNE_4;
case X86::JNO_1: return X86::JNO_4;
@@ -180,6 +181,7 @@ bool X86AsmBackend::WriteNopData(uint64_t Count, MCObjectWriter *OW) const {
/* *** */
+namespace {
class ELFX86AsmBackend : public X86AsmBackend {
public:
ELFX86AsmBackend(const Target &T)
@@ -281,7 +283,7 @@ public:
}
};
-}
+} // end anonymous namespace
TargetAsmBackend *llvm::createX86_32AsmBackend(const Target &T,
const std::string &TT) {
diff --git a/lib/Target/X86/X86CallingConv.td b/lib/Target/X86/X86CallingConv.td
index a5774e1f241f..a6a1e4e573cf 100644
--- a/lib/Target/X86/X86CallingConv.td
+++ b/lib/Target/X86/X86CallingConv.td
@@ -42,7 +42,7 @@ def RetCC_X86Common : CallingConv<[
// MMX vector types are always returned in MM0. If the target doesn't have
// MM0, it doesn't support these vector types.
- CCIfType<[v8i8, v4i16, v2i32, v1i64, v2f32], CCAssignToReg<[MM0]>>,
+ CCIfType<[v8i8, v4i16, v2i32, v1i64], CCAssignToReg<[MM0]>>,
// Long double types are always returned in ST0 (even with SSE).
CCIfType<[f80], CCAssignToReg<[ST0, ST1]>>
@@ -89,7 +89,7 @@ def RetCC_X86_64_C : CallingConv<[
// returned in RAX. This disagrees with ABI documentation but is bug
// compatible with gcc.
CCIfType<[v1i64], CCAssignToReg<[RAX]>>,
- CCIfType<[v8i8, v4i16, v2i32, v2f32], CCAssignToReg<[XMM0, XMM1]>>,
+ CCIfType<[v8i8, v4i16, v2i32], CCAssignToReg<[XMM0, XMM1]>>,
CCDelegateTo<RetCC_X86Common>
]>;
@@ -155,7 +155,7 @@ def CC_X86_64_C : CallingConv<[
// The first 8 MMX (except for v1i64) vector arguments are passed in XMM
// registers on Darwin.
- CCIfType<[v8i8, v4i16, v2i32, v2f32],
+ CCIfType<[v8i8, v4i16, v2i32],
CCIfSubtarget<"isTargetDarwin()",
CCIfSubtarget<"hasSSE2()",
CCPromoteToType<v2i64>>>>,
@@ -177,7 +177,7 @@ def CC_X86_64_C : CallingConv<[
CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], CCAssignToStack<16, 16>>,
// __m64 vectors get 8-byte stack slots that are 8-byte aligned.
- CCIfType<[v8i8, v4i16, v2i32, v1i64, v2f32], CCAssignToStack<8, 8>>
+ CCIfType<[v8i8, v4i16, v2i32, v1i64], CCAssignToStack<8, 8>>
]>;
// Calling convention used on Win64
@@ -195,7 +195,7 @@ def CC_X86_Win64_C : CallingConv<[
CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], CCPassIndirect<i64>>,
// The first 4 MMX vector arguments are passed in GPRs.
- CCIfType<[v8i8, v4i16, v2i32, v1i64, v2f32],
+ CCIfType<[v8i8, v4i16, v2i32, v1i64],
CCBitConvertToType<i64>>,
// The first 4 integer arguments are passed in integer registers.
@@ -254,7 +254,7 @@ def CC_X86_32_Common : CallingConv<[
// The first 3 __m64 (except for v1i64) vector arguments are passed in mmx
// registers if the call is not a vararg call.
- CCIfNotVarArg<CCIfType<[v8i8, v4i16, v2i32, v2f32],
+ CCIfNotVarArg<CCIfType<[v8i8, v4i16, v2i32],
CCAssignToReg<[MM0, MM1, MM2]>>>,
// Integer/Float values get stored in stack slots that are 4 bytes in
diff --git a/lib/Target/X86/X86CodeEmitter.cpp b/lib/Target/X86/X86CodeEmitter.cpp
index 8f026049cb84..f13669bd741d 100644
--- a/lib/Target/X86/X86CodeEmitter.cpp
+++ b/lib/Target/X86/X86CodeEmitter.cpp
@@ -138,7 +138,7 @@ bool Emitter<CodeEmitter>::runOnMachineFunction(MachineFunction &MF) {
// MOVPC32r is basically a call plus a pop instruction.
if (Desc.getOpcode() == X86::MOVPC32r)
emitInstruction(*I, &II->get(X86::POP32r));
- NumEmitted++; // Keep track of the # of mi's emitted
+ ++NumEmitted; // Keep track of the # of mi's emitted
}
}
} while (MCE.finishFunction(MF));
@@ -730,9 +730,9 @@ void Emitter<CodeEmitter>::emitInstruction(const MachineInstr &MI,
case X86II::MRMDestMem: {
MCE.emitByte(BaseOpcode);
emitMemModRMByte(MI, CurOp,
- getX86RegNum(MI.getOperand(CurOp + X86AddrNumOperands)
+ getX86RegNum(MI.getOperand(CurOp + X86::AddrNumOperands)
.getReg()));
- CurOp += X86AddrNumOperands + 1;
+ CurOp += X86::AddrNumOperands + 1;
if (CurOp != NumOps)
emitConstant(MI.getOperand(CurOp++).getImm(),
X86II::getSizeOfImm(Desc->TSFlags));
@@ -750,13 +750,7 @@ void Emitter<CodeEmitter>::emitInstruction(const MachineInstr &MI,
break;
case X86II::MRMSrcMem: {
- // FIXME: Maybe lea should have its own form?
- int AddrOperands;
- if (Opcode == X86::LEA64r || Opcode == X86::LEA64_32r ||
- Opcode == X86::LEA16r || Opcode == X86::LEA32r)
- AddrOperands = X86AddrNumOperands - 1; // No segment register
- else
- AddrOperands = X86AddrNumOperands;
+ int AddrOperands = X86::AddrNumOperands;
intptr_t PCAdj = (CurOp + AddrOperands + 1 != NumOps) ?
X86II::getSizeOfImm(Desc->TSFlags) : 0;
@@ -810,14 +804,14 @@ void Emitter<CodeEmitter>::emitInstruction(const MachineInstr &MI,
case X86II::MRM2m: case X86II::MRM3m:
case X86II::MRM4m: case X86II::MRM5m:
case X86II::MRM6m: case X86II::MRM7m: {
- intptr_t PCAdj = (CurOp + X86AddrNumOperands != NumOps) ?
- (MI.getOperand(CurOp+X86AddrNumOperands).isImm() ?
+ intptr_t PCAdj = (CurOp + X86::AddrNumOperands != NumOps) ?
+ (MI.getOperand(CurOp+X86::AddrNumOperands).isImm() ?
X86II::getSizeOfImm(Desc->TSFlags) : 4) : 0;
MCE.emitByte(BaseOpcode);
emitMemModRMByte(MI, CurOp, (Desc->TSFlags & X86II::FormMask)-X86II::MRM0m,
PCAdj);
- CurOp += X86AddrNumOperands;
+ CurOp += X86::AddrNumOperands;
if (CurOp == NumOps)
break;
diff --git a/lib/Target/X86/X86FastISel.cpp b/lib/Target/X86/X86FastISel.cpp
index 1bc5eb75d752..cdde24a156d0 100644
--- a/lib/Target/X86/X86FastISel.cpp
+++ b/lib/Target/X86/X86FastISel.cpp
@@ -23,7 +23,9 @@
#include "llvm/GlobalVariable.h"
#include "llvm/Instructions.h"
#include "llvm/IntrinsicInst.h"
+#include "llvm/CodeGen/Analysis.h"
#include "llvm/CodeGen/FastISel.h"
+#include "llvm/CodeGen/FunctionLoweringInfo.h"
#include "llvm/CodeGen/MachineConstantPool.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
@@ -52,20 +54,7 @@ class X86FastISel : public FastISel {
bool X86ScalarSSEf32;
public:
- explicit X86FastISel(MachineFunction &mf,
- DenseMap<const Value *, unsigned> &vm,
- DenseMap<const BasicBlock *, MachineBasicBlock *> &bm,
- DenseMap<const AllocaInst *, int> &am,
- std::vector<std::pair<MachineInstr*, unsigned> > &pn
-#ifndef NDEBUG
- , SmallSet<const Instruction *, 8> &cil
-#endif
- )
- : FastISel(mf, vm, bm, am, pn
-#ifndef NDEBUG
- , cil
-#endif
- ) {
+ explicit X86FastISel(FunctionLoweringInfo &funcInfo) : FastISel(funcInfo) {
Subtarget = &TM.getSubtarget<X86Subtarget>();
StackPtr = Subtarget->is64Bit() ? X86::RSP : X86::ESP;
X86ScalarSSEf64 = Subtarget->hasSSE2();
@@ -96,6 +85,8 @@ private:
bool X86SelectStore(const Instruction *I);
+ bool X86SelectRet(const Instruction *I);
+
bool X86SelectCmp(const Instruction *I);
bool X86SelectZExt(const Instruction *I);
@@ -117,6 +108,7 @@ private:
bool X86SelectCall(const Instruction *I);
CCAssignFn *CCAssignFnForCall(CallingConv::ID CC, bool isTailCall = false);
+ CCAssignFn *CCAssignFnForRet(CallingConv::ID CC, bool isTailCall = false);
const X86InstrInfo *getInstrInfo() const {
return getTargetMachine()->getInstrInfo();
@@ -190,6 +182,20 @@ CCAssignFn *X86FastISel::CCAssignFnForCall(CallingConv::ID CC,
return CC_X86_32_C;
}
+/// CCAssignFnForRet - Selects the correct CCAssignFn for a given calling
+/// convention.
+CCAssignFn *X86FastISel::CCAssignFnForRet(CallingConv::ID CC,
+ bool isTaillCall) {
+ if (Subtarget->is64Bit()) {
+ if (Subtarget->isTargetWin64())
+ return RetCC_X86_Win64_C;
+ else
+ return RetCC_X86_64_C;
+ }
+
+ return RetCC_X86_32_C;
+}
+
/// X86FastEmitLoad - Emit a machine instruction to load a value of type VT.
/// The address is either pre-computed, i.e. Ptr, or a GlobalAddress, i.e. GV.
/// Return true and the result register by reference if it is possible.
@@ -242,7 +248,8 @@ bool X86FastISel::X86FastEmitLoad(EVT VT, const X86AddressMode &AM,
}
ResultReg = createResultReg(RC);
- addFullAddress(BuildMI(MBB, DL, TII.get(Opc), ResultReg), AM);
+ addFullAddress(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt,
+ DL, TII.get(Opc), ResultReg), AM);
return true;
}
@@ -261,7 +268,7 @@ X86FastISel::X86FastEmitStore(EVT VT, unsigned Val,
case MVT::i1: {
// Mask out all but lowest bit.
unsigned AndResult = createResultReg(X86::GR8RegisterClass);
- BuildMI(MBB, DL,
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
TII.get(X86::AND8ri), AndResult).addReg(Val).addImm(1);
Val = AndResult;
}
@@ -278,7 +285,8 @@ X86FastISel::X86FastEmitStore(EVT VT, unsigned Val,
break;
}
- addFullAddress(BuildMI(MBB, DL, TII.get(Opc)), AM).addReg(Val);
+ addFullAddress(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt,
+ DL, TII.get(Opc)), AM).addReg(Val);
return true;
}
@@ -306,7 +314,8 @@ bool X86FastISel::X86FastEmitStore(EVT VT, const Value *Val,
}
if (Opc) {
- addFullAddress(BuildMI(MBB, DL, TII.get(Opc)), AM)
+ addFullAddress(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt,
+ DL, TII.get(Opc)), AM)
.addImm(Signed ? (uint64_t) CI->getSExtValue() :
CI->getZExtValue());
return true;
@@ -342,6 +351,12 @@ bool X86FastISel::X86SelectAddress(const Value *V, X86AddressMode &AM) {
const User *U = NULL;
unsigned Opcode = Instruction::UserOp1;
if (const Instruction *I = dyn_cast<Instruction>(V)) {
+ // Don't walk into other basic blocks; it's possible we haven't
+ // visited them yet, so the instructions may not yet be assigned
+ // virtual registers.
+ if (FuncInfo.MBBMap[I->getParent()] != FuncInfo.MBB)
+ return false;
+
Opcode = I->getOpcode();
U = I;
} else if (const ConstantExpr *C = dyn_cast<ConstantExpr>(V)) {
@@ -349,6 +364,12 @@ bool X86FastISel::X86SelectAddress(const Value *V, X86AddressMode &AM) {
U = C;
}
+ if (const PointerType *Ty = dyn_cast<PointerType>(V->getType()))
+ if (Ty->getAddressSpace() > 255)
+ // Fast instruction selection doesn't support the special
+ // address spaces.
+ return false;
+
switch (Opcode) {
default: break;
case Instruction::BitCast:
@@ -370,8 +391,9 @@ bool X86FastISel::X86SelectAddress(const Value *V, X86AddressMode &AM) {
case Instruction::Alloca: {
// Do static allocas.
const AllocaInst *A = cast<AllocaInst>(V);
- DenseMap<const AllocaInst*, int>::iterator SI = StaticAllocaMap.find(A);
- if (SI != StaticAllocaMap.end()) {
+ DenseMap<const AllocaInst*, int>::iterator SI =
+ FuncInfo.StaticAllocaMap.find(A);
+ if (SI != FuncInfo.StaticAllocaMap.end()) {
AM.BaseType = X86AddressMode::FrameIndexBase;
AM.Base.FrameIndex = SI->second;
return true;
@@ -411,20 +433,33 @@ bool X86FastISel::X86SelectAddress(const Value *V, X86AddressMode &AM) {
Disp += SL->getElementOffset(Idx);
} else {
uint64_t S = TD.getTypeAllocSize(GTI.getIndexedType());
- if (const ConstantInt *CI = dyn_cast<ConstantInt>(Op)) {
- // Constant-offset addressing.
- Disp += CI->getSExtValue() * S;
- } else if (IndexReg == 0 &&
- (!AM.GV || !Subtarget->isPICStyleRIPRel()) &&
- (S == 1 || S == 2 || S == 4 || S == 8)) {
- // Scaled-index addressing.
- Scale = S;
- IndexReg = getRegForGEPIndex(Op).first;
- if (IndexReg == 0)
- return false;
- } else
- // Unsupported.
- goto unsupported_gep;
+ SmallVector<const Value *, 4> Worklist;
+ Worklist.push_back(Op);
+ do {
+ Op = Worklist.pop_back_val();
+ if (const ConstantInt *CI = dyn_cast<ConstantInt>(Op)) {
+ // Constant-offset addressing.
+ Disp += CI->getSExtValue() * S;
+ } else if (isa<AddOperator>(Op) &&
+ isa<ConstantInt>(cast<AddOperator>(Op)->getOperand(1))) {
+ // An add with a constant operand. Fold the constant.
+ ConstantInt *CI =
+ cast<ConstantInt>(cast<AddOperator>(Op)->getOperand(1));
+ Disp += CI->getSExtValue() * S;
+ // Add the other operand back to the work list.
+ Worklist.push_back(cast<AddOperator>(Op)->getOperand(0));
+ } else if (IndexReg == 0 &&
+ (!AM.GV || !Subtarget->isPICStyleRIPRel()) &&
+ (S == 1 || S == 2 || S == 4 || S == 8)) {
+ // Scaled-index addressing.
+ Scale = S;
+ IndexReg = getRegForGEPIndex(Op).first;
+ if (IndexReg == 0)
+ return false;
+ } else
+ // Unsupported.
+ goto unsupported_gep;
+ } while (!Worklist.empty());
}
}
// Check for displacement overflow.
@@ -473,7 +508,7 @@ bool X86FastISel::X86SelectAddress(const Value *V, X86AddressMode &AM) {
// If this reference is relative to the pic base, set it now.
if (isGlobalRelativeToPICBase(GVFlags)) {
// FIXME: How do we know Base.Reg is free??
- AM.Base.Reg = getInstrInfo()->getGlobalBaseReg(&MF);
+ AM.Base.Reg = getInstrInfo()->getGlobalBaseReg(FuncInfo.MF);
}
// Unless the ABI requires an extra load, return a direct reference to
@@ -504,6 +539,9 @@ bool X86FastISel::X86SelectAddress(const Value *V, X86AddressMode &AM) {
StubAM.GV = GV;
StubAM.GVOpFlags = GVFlags;
+ // Prepare for inserting code in the local-value area.
+ MachineBasicBlock::iterator SaveInsertPt = enterLocalValueArea();
+
if (TLI.getPointerTy() == MVT::i64) {
Opc = X86::MOV64rm;
RC = X86::GR64RegisterClass;
@@ -516,8 +554,13 @@ bool X86FastISel::X86SelectAddress(const Value *V, X86AddressMode &AM) {
}
LoadReg = createResultReg(RC);
- addFullAddress(BuildMI(MBB, DL, TII.get(Opc), LoadReg), StubAM);
-
+ MachineInstrBuilder LoadMI =
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), LoadReg);
+ addFullAddress(LoadMI, StubAM);
+
+ // Ok, back to normal mode.
+ leaveLocalValueArea(SaveInsertPt);
+
// Prevent loading GV stub multiple times in same MBB.
LocalValueMap[V] = LoadReg;
}
@@ -642,6 +685,93 @@ bool X86FastISel::X86SelectStore(const Instruction *I) {
return X86FastEmitStore(VT, I->getOperand(0), AM);
}
+/// X86SelectRet - Select and emit code to implement ret instructions.
+bool X86FastISel::X86SelectRet(const Instruction *I) {
+ const ReturnInst *Ret = cast<ReturnInst>(I);
+ const Function &F = *I->getParent()->getParent();
+
+ if (!FuncInfo.CanLowerReturn)
+ return false;
+
+ CallingConv::ID CC = F.getCallingConv();
+ if (CC != CallingConv::C &&
+ CC != CallingConv::Fast &&
+ CC != CallingConv::X86_FastCall)
+ return false;
+
+ if (Subtarget->isTargetWin64())
+ return false;
+
+ // Don't handle popping bytes on return for now.
+ if (FuncInfo.MF->getInfo<X86MachineFunctionInfo>()
+ ->getBytesToPopOnReturn() != 0)
+ return 0;
+
+ // fastcc with -tailcallopt is intended to provide a guaranteed
+ // tail call optimization. Fastisel doesn't know how to do that.
+ if (CC == CallingConv::Fast && GuaranteedTailCallOpt)
+ return false;
+
+ // Let SDISel handle vararg functions.
+ if (F.isVarArg())
+ return false;
+
+ if (Ret->getNumOperands() > 0) {
+ SmallVector<ISD::OutputArg, 4> Outs;
+ GetReturnInfo(F.getReturnType(), F.getAttributes().getRetAttributes(),
+ Outs, TLI);
+
+ // Analyze operands of the call, assigning locations to each operand.
+ SmallVector<CCValAssign, 16> ValLocs;
+ CCState CCInfo(CC, F.isVarArg(), TM, ValLocs, I->getContext());
+ CCInfo.AnalyzeReturn(Outs, CCAssignFnForRet(CC));
+
+ const Value *RV = Ret->getOperand(0);
+ unsigned Reg = getRegForValue(RV);
+ if (Reg == 0)
+ return false;
+
+ // Only handle a single return value for now.
+ if (ValLocs.size() != 1)
+ return false;
+
+ CCValAssign &VA = ValLocs[0];
+
+ // Don't bother handling odd stuff for now.
+ if (VA.getLocInfo() != CCValAssign::Full)
+ return false;
+ // Only handle register returns for now.
+ if (!VA.isRegLoc())
+ return false;
+ // TODO: For now, don't try to handle cases where getLocInfo()
+ // says Full but the types don't match.
+ if (VA.getValVT() != TLI.getValueType(RV->getType()))
+ return false;
+
+ // The calling-convention tables for x87 returns don't tell
+ // the whole story.
+ if (VA.getLocReg() == X86::ST0 || VA.getLocReg() == X86::ST1)
+ return false;
+
+ // Make the copy.
+ unsigned SrcReg = Reg + VA.getValNo();
+ unsigned DstReg = VA.getLocReg();
+ const TargetRegisterClass* SrcRC = MRI.getRegClass(SrcReg);
+ // Avoid a cross-class copy. This is very unlikely.
+ if (!SrcRC->contains(DstReg))
+ return false;
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
+ DstReg).addReg(SrcReg);
+
+ // Mark the register as live out of the function.
+ MRI.addLiveOut(VA.getLocReg());
+ }
+
+ // Now emit the RET.
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(X86::RET));
+ return true;
+}
+
/// X86SelectLoad - Select and emit code to implement load instructions.
///
bool X86FastISel::X86SelectLoad(const Instruction *I) {
@@ -661,15 +791,15 @@ bool X86FastISel::X86SelectLoad(const Instruction *I) {
return false;
}
-static unsigned X86ChooseCmpOpcode(EVT VT) {
+static unsigned X86ChooseCmpOpcode(EVT VT, const X86Subtarget *Subtarget) {
switch (VT.getSimpleVT().SimpleTy) {
default: return 0;
case MVT::i8: return X86::CMP8rr;
case MVT::i16: return X86::CMP16rr;
case MVT::i32: return X86::CMP32rr;
case MVT::i64: return X86::CMP64rr;
- case MVT::f32: return X86::UCOMISSrr;
- case MVT::f64: return X86::UCOMISDrr;
+ case MVT::f32: return Subtarget->hasSSE1() ? X86::UCOMISSrr : 0;
+ case MVT::f64: return Subtarget->hasSSE2() ? X86::UCOMISDrr : 0;
}
}
@@ -706,18 +836,21 @@ bool X86FastISel::X86FastEmitCompare(const Value *Op0, const Value *Op1,
// CMPri, otherwise use CMPrr.
if (const ConstantInt *Op1C = dyn_cast<ConstantInt>(Op1)) {
if (unsigned CompareImmOpc = X86ChooseCmpImmediateOpcode(VT, Op1C)) {
- BuildMI(MBB, DL, TII.get(CompareImmOpc)).addReg(Op0Reg)
- .addImm(Op1C->getSExtValue());
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(CompareImmOpc))
+ .addReg(Op0Reg)
+ .addImm(Op1C->getSExtValue());
return true;
}
}
- unsigned CompareOpc = X86ChooseCmpOpcode(VT);
+ unsigned CompareOpc = X86ChooseCmpOpcode(VT, Subtarget);
if (CompareOpc == 0) return false;
unsigned Op1Reg = getRegForValue(Op1);
if (Op1Reg == 0) return false;
- BuildMI(MBB, DL, TII.get(CompareOpc)).addReg(Op0Reg).addReg(Op1Reg);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(CompareOpc))
+ .addReg(Op0Reg)
+ .addReg(Op1Reg);
return true;
}
@@ -739,9 +872,10 @@ bool X86FastISel::X86SelectCmp(const Instruction *I) {
unsigned EReg = createResultReg(&X86::GR8RegClass);
unsigned NPReg = createResultReg(&X86::GR8RegClass);
- BuildMI(MBB, DL, TII.get(X86::SETEr), EReg);
- BuildMI(MBB, DL, TII.get(X86::SETNPr), NPReg);
- BuildMI(MBB, DL,
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(X86::SETEr), EReg);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(X86::SETNPr), NPReg);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
TII.get(X86::AND8rr), ResultReg).addReg(NPReg).addReg(EReg);
UpdateValueMap(I, ResultReg);
return true;
@@ -752,9 +886,13 @@ bool X86FastISel::X86SelectCmp(const Instruction *I) {
unsigned NEReg = createResultReg(&X86::GR8RegClass);
unsigned PReg = createResultReg(&X86::GR8RegClass);
- BuildMI(MBB, DL, TII.get(X86::SETNEr), NEReg);
- BuildMI(MBB, DL, TII.get(X86::SETPr), PReg);
- BuildMI(MBB, DL, TII.get(X86::OR8rr), ResultReg).addReg(PReg).addReg(NEReg);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(X86::SETNEr), NEReg);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(X86::SETPr), PReg);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(X86::OR8rr), ResultReg)
+ .addReg(PReg).addReg(NEReg);
UpdateValueMap(I, ResultReg);
return true;
}
@@ -793,7 +931,7 @@ bool X86FastISel::X86SelectCmp(const Instruction *I) {
if (!X86FastEmitCompare(Op0, Op1, VT))
return false;
- BuildMI(MBB, DL, TII.get(SetCCOpc), ResultReg);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(SetCCOpc), ResultReg);
UpdateValueMap(I, ResultReg);
return true;
}
@@ -819,8 +957,8 @@ bool X86FastISel::X86SelectBranch(const Instruction *I) {
// Unconditional branches are selected by tablegen-generated code.
// Handle a conditional branch.
const BranchInst *BI = cast<BranchInst>(I);
- MachineBasicBlock *TrueMBB = MBBMap[BI->getSuccessor(0)];
- MachineBasicBlock *FalseMBB = MBBMap[BI->getSuccessor(1)];
+ MachineBasicBlock *TrueMBB = FuncInfo.MBBMap[BI->getSuccessor(0)];
+ MachineBasicBlock *FalseMBB = FuncInfo.MBBMap[BI->getSuccessor(1)];
// Fold the common case of a conditional branch with a comparison.
if (const CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition())) {
@@ -829,7 +967,7 @@ bool X86FastISel::X86SelectBranch(const Instruction *I) {
// Try to take advantage of fallthrough opportunities.
CmpInst::Predicate Predicate = CI->getPredicate();
- if (MBB->isLayoutSuccessor(TrueMBB)) {
+ if (FuncInfo.MBB->isLayoutSuccessor(TrueMBB)) {
std::swap(TrueMBB, FalseMBB);
Predicate = CmpInst::getInversePredicate(Predicate);
}
@@ -878,16 +1016,18 @@ bool X86FastISel::X86SelectBranch(const Instruction *I) {
if (!X86FastEmitCompare(Op0, Op1, VT))
return false;
- BuildMI(MBB, DL, TII.get(BranchOpc)).addMBB(TrueMBB);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(BranchOpc))
+ .addMBB(TrueMBB);
if (Predicate == CmpInst::FCMP_UNE) {
// X86 requires a second branch to handle UNE (and OEQ,
// which is mapped to UNE above).
- BuildMI(MBB, DL, TII.get(X86::JP_4)).addMBB(TrueMBB);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(X86::JP_4))
+ .addMBB(TrueMBB);
}
- FastEmitBranch(FalseMBB);
- MBB->addSuccessor(TrueMBB);
+ FastEmitBranch(FalseMBB, DL);
+ FuncInfo.MBB->addSuccessor(TrueMBB);
return true;
}
} else if (ExtractValueInst *EI =
@@ -910,10 +1050,11 @@ bool X86FastISel::X86SelectBranch(const Instruction *I) {
if (CI->getIntrinsicID() == Intrinsic::sadd_with_overflow ||
CI->getIntrinsicID() == Intrinsic::uadd_with_overflow) {
const MachineInstr *SetMI = 0;
- unsigned Reg = lookUpRegForValue(EI);
+ unsigned Reg = getRegForValue(EI);
for (MachineBasicBlock::const_reverse_iterator
- RI = MBB->rbegin(), RE = MBB->rend(); RI != RE; ++RI) {
+ RI = FuncInfo.MBB->rbegin(), RE = FuncInfo.MBB->rend();
+ RI != RE; ++RI) {
const MachineInstr &MI = *RI;
if (MI.definesRegister(Reg)) {
@@ -938,11 +1079,11 @@ bool X86FastISel::X86SelectBranch(const Instruction *I) {
unsigned OpCode = SetMI->getOpcode();
if (OpCode == X86::SETOr || OpCode == X86::SETBr) {
- BuildMI(MBB, DL, TII.get(OpCode == X86::SETOr ?
- X86::JO_4 : X86::JB_4))
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(OpCode == X86::SETOr ? X86::JO_4 : X86::JB_4))
.addMBB(TrueMBB);
- FastEmitBranch(FalseMBB);
- MBB->addSuccessor(TrueMBB);
+ FastEmitBranch(FalseMBB, DL);
+ FuncInfo.MBB->addSuccessor(TrueMBB);
return true;
}
}
@@ -954,10 +1095,12 @@ bool X86FastISel::X86SelectBranch(const Instruction *I) {
unsigned OpReg = getRegForValue(BI->getCondition());
if (OpReg == 0) return false;
- BuildMI(MBB, DL, TII.get(X86::TEST8rr)).addReg(OpReg).addReg(OpReg);
- BuildMI(MBB, DL, TII.get(X86::JNE_4)).addMBB(TrueMBB);
- FastEmitBranch(FalseMBB);
- MBB->addSuccessor(TrueMBB);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(X86::TEST8rr))
+ .addReg(OpReg).addReg(OpReg);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(X86::JNE_4))
+ .addMBB(TrueMBB);
+ FastEmitBranch(FalseMBB, DL);
+ FuncInfo.MBB->addSuccessor(TrueMBB);
return true;
}
@@ -1014,7 +1157,7 @@ bool X86FastISel::X86SelectShift(const Instruction *I) {
// Fold immediate in shl(x,3).
if (const ConstantInt *CI = dyn_cast<ConstantInt>(I->getOperand(1))) {
unsigned ResultReg = createResultReg(RC);
- BuildMI(MBB, DL, TII.get(OpImm),
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(OpImm),
ResultReg).addReg(Op0Reg).addImm(CI->getZExtValue() & 0xff);
UpdateValueMap(I, ResultReg);
return true;
@@ -1022,17 +1165,19 @@ bool X86FastISel::X86SelectShift(const Instruction *I) {
unsigned Op1Reg = getRegForValue(I->getOperand(1));
if (Op1Reg == 0) return false;
- TII.copyRegToReg(*MBB, MBB->end(), CReg, Op1Reg, RC, RC, DL);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
+ CReg).addReg(Op1Reg);
// The shift instruction uses X86::CL. If we defined a super-register
- // of X86::CL, emit an EXTRACT_SUBREG to precisely describe what
- // we're doing here.
+ // of X86::CL, emit a subreg KILL to precisely describe what we're doing here.
if (CReg != X86::CL)
- BuildMI(MBB, DL, TII.get(TargetOpcode::EXTRACT_SUBREG), X86::CL)
- .addReg(CReg).addImm(X86::sub_8bit);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(TargetOpcode::KILL), X86::CL)
+ .addReg(CReg, RegState::Kill);
unsigned ResultReg = createResultReg(RC);
- BuildMI(MBB, DL, TII.get(OpReg), ResultReg).addReg(Op0Reg);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(OpReg), ResultReg)
+ .addReg(Op0Reg);
UpdateValueMap(I, ResultReg);
return true;
}
@@ -1064,9 +1209,11 @@ bool X86FastISel::X86SelectSelect(const Instruction *I) {
unsigned Op2Reg = getRegForValue(I->getOperand(2));
if (Op2Reg == 0) return false;
- BuildMI(MBB, DL, TII.get(X86::TEST8rr)).addReg(Op0Reg).addReg(Op0Reg);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(X86::TEST8rr))
+ .addReg(Op0Reg).addReg(Op0Reg);
unsigned ResultReg = createResultReg(RC);
- BuildMI(MBB, DL, TII.get(Opc), ResultReg).addReg(Op1Reg).addReg(Op2Reg);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), ResultReg)
+ .addReg(Op1Reg).addReg(Op2Reg);
UpdateValueMap(I, ResultReg);
return true;
}
@@ -1080,7 +1227,9 @@ bool X86FastISel::X86SelectFPExt(const Instruction *I) {
unsigned OpReg = getRegForValue(V);
if (OpReg == 0) return false;
unsigned ResultReg = createResultReg(X86::FR64RegisterClass);
- BuildMI(MBB, DL, TII.get(X86::CVTSS2SDrr), ResultReg).addReg(OpReg);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(X86::CVTSS2SDrr), ResultReg)
+ .addReg(OpReg);
UpdateValueMap(I, ResultReg);
return true;
}
@@ -1097,7 +1246,9 @@ bool X86FastISel::X86SelectFPTrunc(const Instruction *I) {
unsigned OpReg = getRegForValue(V);
if (OpReg == 0) return false;
unsigned ResultReg = createResultReg(X86::FR32RegisterClass);
- BuildMI(MBB, DL, TII.get(X86::CVTSD2SSrr), ResultReg).addReg(OpReg);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(X86::CVTSD2SSrr), ResultReg)
+ .addReg(OpReg);
UpdateValueMap(I, ResultReg);
return true;
}
@@ -1132,7 +1283,8 @@ bool X86FastISel::X86SelectTrunc(const Instruction *I) {
const TargetRegisterClass *CopyRC = (SrcVT == MVT::i16)
? X86::GR16_ABCDRegisterClass : X86::GR32_ABCDRegisterClass;
unsigned CopyReg = createResultReg(CopyRC);
- BuildMI(MBB, DL, TII.get(CopyOpc), CopyReg).addReg(InputReg);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(CopyOpc), CopyReg)
+ .addReg(InputReg);
// Then issue an extract_subreg.
unsigned ResultReg = FastEmitInst_extractsubreg(MVT::i8,
@@ -1153,14 +1305,18 @@ bool X86FastISel::X86SelectExtractValue(const Instruction *I) {
switch (CI->getIntrinsicID()) {
default: break;
case Intrinsic::sadd_with_overflow:
- case Intrinsic::uadd_with_overflow:
+ case Intrinsic::uadd_with_overflow: {
// Cheat a little. We know that the registers for "add" and "seto" are
// allocated sequentially. However, we only keep track of the register
// for "add" in the value map. Use extractvalue's index to get the
// correct register for "seto".
- UpdateValueMap(I, lookUpRegForValue(Agg) + *EI->idx_begin());
+ unsigned OpReg = getRegForValue(Agg);
+ if (OpReg == 0)
+ return false;
+ UpdateValueMap(I, OpReg + *EI->idx_begin());
return true;
}
+ }
}
return false;
@@ -1174,8 +1330,8 @@ bool X86FastISel::X86VisitIntrinsicCall(const IntrinsicInst &I) {
// Emit code inline code to store the stack guard onto the stack.
EVT PtrTy = TLI.getPointerTy();
- const Value *Op1 = I.getOperand(1); // The guard's value.
- const AllocaInst *Slot = cast<AllocaInst>(I.getOperand(2));
+ const Value *Op1 = I.getArgOperand(0); // The guard's value.
+ const AllocaInst *Slot = cast<AllocaInst>(I.getArgOperand(1));
// Grab the frame index.
X86AddressMode AM;
@@ -1186,7 +1342,7 @@ bool X86FastISel::X86VisitIntrinsicCall(const IntrinsicInst &I) {
return true;
}
case Intrinsic::objectsize: {
- ConstantInt *CI = dyn_cast<ConstantInt>(I.getOperand(2));
+ ConstantInt *CI = dyn_cast<ConstantInt>(I.getArgOperand(1));
const Type *Ty = I.getCalledFunction()->getReturnType();
assert(CI && "Non-constant type in Intrinsic::objectsize?");
@@ -1204,8 +1360,8 @@ bool X86FastISel::X86VisitIntrinsicCall(const IntrinsicInst &I) {
return false;
unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT));
- BuildMI(MBB, DL, TII.get(OpC), ResultReg).
- addImm(CI->getZExtValue() == 0 ? -1ULL : 0);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(OpC), ResultReg).
+ addImm(CI->isZero() ? -1ULL : 0);
UpdateValueMap(&I, ResultReg);
return true;
}
@@ -1218,12 +1374,12 @@ bool X86FastISel::X86VisitIntrinsicCall(const IntrinsicInst &I) {
const TargetInstrDesc &II = TII.get(TargetOpcode::DBG_VALUE);
// FIXME may need to add RegState::Debug to any registers produced,
// although ESP/EBP should be the only ones at the moment.
- addFullAddress(BuildMI(MBB, DL, II), AM).addImm(0).
- addMetadata(DI->getVariable());
+ addFullAddress(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II), AM).
+ addImm(0).addMetadata(DI->getVariable());
return true;
}
case Intrinsic::trap: {
- BuildMI(MBB, DL, TII.get(X86::TRAP));
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(X86::TRAP));
return true;
}
case Intrinsic::sadd_with_overflow:
@@ -1241,8 +1397,8 @@ bool X86FastISel::X86VisitIntrinsicCall(const IntrinsicInst &I) {
if (!isTypeLegal(RetTy, VT))
return false;
- const Value *Op1 = I.getOperand(1);
- const Value *Op2 = I.getOperand(2);
+ const Value *Op1 = I.getArgOperand(0);
+ const Value *Op2 = I.getArgOperand(1);
unsigned Reg1 = getRegForValue(Op1);
unsigned Reg2 = getRegForValue(Op2);
@@ -1259,7 +1415,8 @@ bool X86FastISel::X86VisitIntrinsicCall(const IntrinsicInst &I) {
return false;
unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT));
- BuildMI(MBB, DL, TII.get(OpC), ResultReg).addReg(Reg1).addReg(Reg2);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(OpC), ResultReg)
+ .addReg(Reg1).addReg(Reg2);
unsigned DestReg1 = UpdateValueMap(&I, ResultReg);
// If the add with overflow is an intra-block value then we just want to
@@ -1277,7 +1434,7 @@ bool X86FastISel::X86VisitIntrinsicCall(const IntrinsicInst &I) {
unsigned Opc = X86::SETBr;
if (I.getIntrinsicID() == Intrinsic::sadd_with_overflow)
Opc = X86::SETOr;
- BuildMI(MBB, DL, TII.get(Opc), ResultReg);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), ResultReg);
return true;
}
}
@@ -1285,7 +1442,7 @@ bool X86FastISel::X86VisitIntrinsicCall(const IntrinsicInst &I) {
bool X86FastISel::X86SelectCall(const Instruction *I) {
const CallInst *CI = cast<CallInst>(I);
- const Value *Callee = I->getOperand(0);
+ const Value *Callee = CI->getCalledValue();
// Can't handle inline asm yet.
if (isa<InlineAsm>(Callee))
@@ -1314,6 +1471,10 @@ bool X86FastISel::X86SelectCall(const Instruction *I) {
if (FTy->isVarArg())
return false;
+ // Fast-isel doesn't know about callee-pop yet.
+ if (Subtarget->IsCalleePop(FTy->isVarArg(), CC))
+ return false;
+
// Handle *simple* calls for now.
const Type *RetTy = CS.getType();
EVT RetVT;
@@ -1387,6 +1548,12 @@ bool X86FastISel::X86SelectCall(const Instruction *I) {
// Analyze operands of the call, assigning locations to each operand.
SmallVector<CCValAssign, 16> ArgLocs;
CCState CCInfo(CC, false, TM, ArgLocs, I->getParent()->getContext());
+
+ // Allocate shadow area for Win64
+ if (Subtarget->isTargetWin64()) {
+ CCInfo.AllocateStack(32, 8);
+ }
+
CCInfo.AnalyzeCallOperands(ArgVTs, ArgFlags, CCAssignFnForCall(CC));
// Get a count of how many bytes are to be pushed on the stack.
@@ -1394,7 +1561,8 @@ bool X86FastISel::X86SelectCall(const Instruction *I) {
// Issue CALLSEQ_START
unsigned AdjStackDown = TM.getRegisterInfo()->getCallFrameSetupOpcode();
- BuildMI(MBB, DL, TII.get(AdjStackDown)).addImm(NumBytes);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(AdjStackDown))
+ .addImm(NumBytes);
// Process argument: walk the register/memloc assignments, inserting
// copies / loads.
@@ -1449,11 +1617,8 @@ bool X86FastISel::X86SelectCall(const Instruction *I) {
}
if (VA.isRegLoc()) {
- TargetRegisterClass* RC = TLI.getRegClassFor(ArgVT);
- bool Emitted = TII.copyRegToReg(*MBB, MBB->end(), VA.getLocReg(),
- Arg, RC, RC, DL);
- assert(Emitted && "Failed to emit a copy instruction!"); Emitted=Emitted;
- Emitted = true;
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
+ VA.getLocReg()).addReg(Arg);
RegArgs.push_back(VA.getLocReg());
} else {
unsigned LocMemOffset = VA.getLocMemOffset();
@@ -1475,12 +1640,9 @@ bool X86FastISel::X86SelectCall(const Instruction *I) {
// ELF / PIC requires GOT in the EBX register before function calls via PLT
// GOT pointer.
if (Subtarget->isPICStyleGOT()) {
- TargetRegisterClass *RC = X86::GR32RegisterClass;
- unsigned Base = getInstrInfo()->getGlobalBaseReg(&MF);
- bool Emitted = TII.copyRegToReg(*MBB, MBB->end(), X86::EBX, Base, RC, RC,
- DL);
- assert(Emitted && "Failed to emit a copy instruction!"); Emitted=Emitted;
- Emitted = true;
+ unsigned Base = getInstrInfo()->getGlobalBaseReg(FuncInfo.MF);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
+ X86::EBX).addReg(Base);
}
// Issue the call.
@@ -1488,7 +1650,8 @@ bool X86FastISel::X86SelectCall(const Instruction *I) {
if (CalleeOp) {
// Register-indirect call.
unsigned CallOpc = Subtarget->is64Bit() ? X86::CALL64r : X86::CALL32r;
- MIB = BuildMI(MBB, DL, TII.get(CallOpc)).addReg(CalleeOp);
+ MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(CallOpc))
+ .addReg(CalleeOp);
} else {
// Direct call.
@@ -1517,7 +1680,8 @@ bool X86FastISel::X86SelectCall(const Instruction *I) {
}
- MIB = BuildMI(MBB, DL, TII.get(CallOpc)).addGlobalAddress(GV, 0, OpFlags);
+ MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(CallOpc))
+ .addGlobalAddress(GV, 0, OpFlags);
}
// Add an implicit use GOT pointer in EBX.
@@ -1530,9 +1694,11 @@ bool X86FastISel::X86SelectCall(const Instruction *I) {
// Issue CALLSEQ_END
unsigned AdjStackUp = TM.getRegisterInfo()->getCallFrameDestroyOpcode();
- BuildMI(MBB, DL, TII.get(AdjStackUp)).addImm(NumBytes).addImm(0);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(AdjStackUp))
+ .addImm(NumBytes).addImm(0);
// Now handle call return value (if any).
+ SmallVector<unsigned, 4> UsedRegs;
if (RetVT.getSimpleVT().SimpleTy != MVT::isVoid) {
SmallVector<CCValAssign, 16> RVLocs;
CCState CCInfo(CC, false, TM, RVLocs, I->getParent()->getContext());
@@ -1542,7 +1708,6 @@ bool X86FastISel::X86SelectCall(const Instruction *I) {
assert(RVLocs.size() == 1 && "Can't handle multi-value calls!");
EVT CopyVT = RVLocs[0].getValVT();
TargetRegisterClass* DstRC = TLI.getRegClassFor(CopyVT);
- TargetRegisterClass *SrcRC = DstRC;
// If this is a call to a function that returns an fp value on the x87 fp
// stack, but where we prefer to use the value in xmm registers, copy it
@@ -1551,15 +1716,14 @@ bool X86FastISel::X86SelectCall(const Instruction *I) {
RVLocs[0].getLocReg() == X86::ST1) &&
isScalarFPTypeInSSEReg(RVLocs[0].getValVT())) {
CopyVT = MVT::f80;
- SrcRC = X86::RSTRegisterClass;
DstRC = X86::RFP80RegisterClass;
}
unsigned ResultReg = createResultReg(DstRC);
- bool Emitted = TII.copyRegToReg(*MBB, MBB->end(), ResultReg,
- RVLocs[0].getLocReg(), DstRC, SrcRC, DL);
- assert(Emitted && "Failed to emit a copy instruction!"); Emitted=Emitted;
- Emitted = true;
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
+ ResultReg).addReg(RVLocs[0].getLocReg());
+ UsedRegs.push_back(RVLocs[0].getLocReg());
+
if (CopyVT != RVLocs[0].getValVT()) {
// Round the F80 the right size, which also moves to the appropriate xmm
// register. This is accomplished by storing the F80 value in memory and
@@ -1568,18 +1732,21 @@ bool X86FastISel::X86SelectCall(const Instruction *I) {
unsigned Opc = ResVT == MVT::f32 ? X86::ST_Fp80m32 : X86::ST_Fp80m64;
unsigned MemSize = ResVT.getSizeInBits()/8;
int FI = MFI.CreateStackObject(MemSize, MemSize, false);
- addFrameReference(BuildMI(MBB, DL, TII.get(Opc)), FI).addReg(ResultReg);
+ addFrameReference(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(Opc)), FI)
+ .addReg(ResultReg);
DstRC = ResVT == MVT::f32
? X86::FR32RegisterClass : X86::FR64RegisterClass;
Opc = ResVT == MVT::f32 ? X86::MOVSSrm : X86::MOVSDrm;
ResultReg = createResultReg(DstRC);
- addFrameReference(BuildMI(MBB, DL, TII.get(Opc), ResultReg), FI);
+ addFrameReference(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(Opc), ResultReg), FI);
}
if (AndToI1) {
// Mask out all but lowest bit for some call which produces an i1.
unsigned AndResult = createResultReg(X86::GR8RegisterClass);
- BuildMI(MBB, DL,
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
TII.get(X86::AND8ri), AndResult).addReg(ResultReg).addImm(1);
ResultReg = AndResult;
}
@@ -1587,6 +1754,9 @@ bool X86FastISel::X86SelectCall(const Instruction *I) {
UpdateValueMap(I, ResultReg);
}
+ // Set all unused physreg defs as dead.
+ static_cast<MachineInstr *>(MIB)->setPhysRegsDeadExcept(UsedRegs, TRI);
+
return true;
}
@@ -1599,6 +1769,8 @@ X86FastISel::TargetSelectInstruction(const Instruction *I) {
return X86SelectLoad(I);
case Instruction::Store:
return X86SelectStore(I);
+ case Instruction::Ret:
+ return X86SelectRet(I);
case Instruction::ICmp:
case Instruction::FCmp:
return X86SelectCmp(I);
@@ -1699,7 +1871,8 @@ unsigned X86FastISel::TargetMaterializeConstant(const Constant *C) {
else
Opc = X86::LEA64r;
unsigned ResultReg = createResultReg(RC);
- addLeaAddress(BuildMI(MBB, DL, TII.get(Opc), ResultReg), AM);
+ addFullAddress(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(Opc), ResultReg), AM);
return ResultReg;
}
return 0;
@@ -1717,10 +1890,10 @@ unsigned X86FastISel::TargetMaterializeConstant(const Constant *C) {
unsigned char OpFlag = 0;
if (Subtarget->isPICStyleStubPIC()) { // Not dynamic-no-pic
OpFlag = X86II::MO_PIC_BASE_OFFSET;
- PICBase = getInstrInfo()->getGlobalBaseReg(&MF);
+ PICBase = getInstrInfo()->getGlobalBaseReg(FuncInfo.MF);
} else if (Subtarget->isPICStyleGOT()) {
OpFlag = X86II::MO_GOTOFF;
- PICBase = getInstrInfo()->getGlobalBaseReg(&MF);
+ PICBase = getInstrInfo()->getGlobalBaseReg(FuncInfo.MF);
} else if (Subtarget->isPICStyleRIPRel() &&
TM.getCodeModel() == CodeModel::Small) {
PICBase = X86::RIP;
@@ -1729,7 +1902,8 @@ unsigned X86FastISel::TargetMaterializeConstant(const Constant *C) {
// Create the load from the constant pool.
unsigned MCPOffset = MCP.getConstantPoolIndex(C, Align);
unsigned ResultReg = createResultReg(RC);
- addConstantPoolReference(BuildMI(MBB, DL, TII.get(Opc), ResultReg),
+ addConstantPoolReference(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(Opc), ResultReg),
MCPOffset, PICBase, OpFlag);
return ResultReg;
@@ -1743,7 +1917,7 @@ unsigned X86FastISel::TargetMaterializeAlloca(const AllocaInst *C) {
// various places, but TargetMaterializeAlloca also needs a check
// in order to avoid recursion between getRegForValue,
// X86SelectAddrss, and TargetMaterializeAlloca.
- if (!StaticAllocaMap.count(C))
+ if (!FuncInfo.StaticAllocaMap.count(C))
return 0;
X86AddressMode AM;
@@ -1752,24 +1926,13 @@ unsigned X86FastISel::TargetMaterializeAlloca(const AllocaInst *C) {
unsigned Opc = Subtarget->is64Bit() ? X86::LEA64r : X86::LEA32r;
TargetRegisterClass* RC = TLI.getRegClassFor(TLI.getPointerTy());
unsigned ResultReg = createResultReg(RC);
- addLeaAddress(BuildMI(MBB, DL, TII.get(Opc), ResultReg), AM);
+ addFullAddress(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(Opc), ResultReg), AM);
return ResultReg;
}
namespace llvm {
- llvm::FastISel *X86::createFastISel(MachineFunction &mf,
- DenseMap<const Value *, unsigned> &vm,
- DenseMap<const BasicBlock *, MachineBasicBlock *> &bm,
- DenseMap<const AllocaInst *, int> &am,
- std::vector<std::pair<MachineInstr*, unsigned> > &pn
-#ifndef NDEBUG
- , SmallSet<const Instruction *, 8> &cil
-#endif
- ) {
- return new X86FastISel(mf, vm, bm, am, pn
-#ifndef NDEBUG
- , cil
-#endif
- );
+ llvm::FastISel *X86::createFastISel(FunctionLoweringInfo &funcInfo) {
+ return new X86FastISel(funcInfo);
}
}
diff --git a/lib/Target/X86/X86FixupKinds.h b/lib/Target/X86/X86FixupKinds.h
index a8117d47bf66..96e0aaec580b 100644
--- a/lib/Target/X86/X86FixupKinds.h
+++ b/lib/Target/X86/X86FixupKinds.h
@@ -17,6 +17,7 @@ namespace X86 {
enum Fixups {
reloc_pcrel_4byte = FirstTargetFixupKind, // 32-bit pcrel, e.g. a branch.
reloc_pcrel_1byte, // 8-bit pcrel, e.g. branch_1
+ reloc_pcrel_2byte, // 16-bit pcrel, e.g. callw
reloc_riprel_4byte, // 32-bit rip-relative
reloc_riprel_4byte_movq_load // 32-bit rip-relative in movq
};
diff --git a/lib/Target/X86/X86FloatingPoint.cpp b/lib/Target/X86/X86FloatingPoint.cpp
index 93460ef308cc..cee4ad70201a 100644
--- a/lib/Target/X86/X86FloatingPoint.cpp
+++ b/lib/Target/X86/X86FloatingPoint.cpp
@@ -133,7 +133,7 @@ namespace {
// Emit an fxch to update the runtime processors version of the state.
BuildMI(*MBB, I, dl, TII->get(X86::XCH_F)).addReg(STReg);
- NumFXCH++;
+ ++NumFXCH;
}
void duplicateToTop(unsigned RegNo, unsigned AsReg, MachineInstr *I) {
@@ -164,6 +164,8 @@ namespace {
void handleCompareFP(MachineBasicBlock::iterator &I);
void handleCondMovFP(MachineBasicBlock::iterator &I);
void handleSpecialFP(MachineBasicBlock::iterator &I);
+
+ bool translateCopy(MachineInstr*);
};
char FPS::ID = 0;
}
@@ -232,12 +234,15 @@ bool FPS::processBasicBlock(MachineFunction &MF, MachineBasicBlock &BB) {
for (MachineBasicBlock::iterator I = BB.begin(); I != BB.end(); ++I) {
MachineInstr *MI = I;
- unsigned Flags = MI->getDesc().TSFlags;
+ uint64_t Flags = MI->getDesc().TSFlags;
unsigned FPInstClass = Flags & X86II::FPTypeMask;
if (MI->isInlineAsm())
FPInstClass = X86II::SpecialFP;
-
+
+ if (MI->isCopy() && translateCopy(MI))
+ FPInstClass = X86II::SpecialFP;
+
if (FPInstClass == X86II::NotFP)
continue; // Efficiently ignore non-fp insts!
@@ -628,7 +633,7 @@ void FPS::handleZeroArgFP(MachineBasicBlock::iterator &I) {
void FPS::handleOneArgFP(MachineBasicBlock::iterator &I) {
MachineInstr *MI = I;
unsigned NumOps = MI->getDesc().getNumOperands();
- assert((NumOps == X86AddrNumOperands + 1 || NumOps == 1) &&
+ assert((NumOps == X86::AddrNumOperands + 1 || NumOps == 1) &&
"Can only handle fst* & ftst instructions!");
// Is this the last use of the source register?
@@ -1001,15 +1006,17 @@ void FPS::handleSpecialFP(MachineBasicBlock::iterator &I) {
case X86::FpSET_ST0_32:
case X86::FpSET_ST0_64:
case X86::FpSET_ST0_80: {
+ // FpSET_ST0_80 is generated by copyRegToReg for setting up inline asm
+ // arguments that use an st constraint. We expect a sequence of
+ // instructions: Fp_SET_ST0 Fp_SET_ST1? INLINEASM
unsigned Op0 = getFPReg(MI->getOperand(0));
- // FpSET_ST0_80 is generated by copyRegToReg for both function return
- // and inline assembly with the "st" constrain. In the latter case,
- // it is possible for ST(0) to be alive after this instruction.
if (!MI->killsRegister(X86::FP0 + Op0)) {
- // Duplicate Op0
- duplicateToTop(0, 7 /*temp register*/, I);
+ // Duplicate Op0 into a temporary on the stack top.
+ // This actually assumes that FP7 is dead.
+ duplicateToTop(Op0, 7, I);
} else {
+ // Op0 is killed, so just swap it into position.
moveToTop(Op0, I);
}
--StackTop; // "Forget" we have something on the top of stack!
@@ -1017,17 +1024,29 @@ void FPS::handleSpecialFP(MachineBasicBlock::iterator &I) {
}
case X86::FpSET_ST1_32:
case X86::FpSET_ST1_64:
- case X86::FpSET_ST1_80:
- // StackTop can be 1 if a FpSET_ST0_* was before this. Exchange them.
- if (StackTop == 1) {
- BuildMI(*MBB, I, dl, TII->get(X86::XCH_F)).addReg(X86::ST1);
- NumFXCH++;
- StackTop = 0;
- break;
+ case X86::FpSET_ST1_80: {
+ // Set up st(1) for inline asm. We are assuming that st(0) has already been
+ // set up by FpSET_ST0, and our StackTop is off by one because of it.
+ unsigned Op0 = getFPReg(MI->getOperand(0));
+ // Restore the actual StackTop from before Fp_SET_ST0.
+ // Note we can't handle Fp_SET_ST1 without a preceeding Fp_SET_ST0, and we
+ // are not enforcing the constraint.
+ ++StackTop;
+ unsigned RegOnTop = getStackEntry(0); // This reg must remain in st(0).
+ if (!MI->killsRegister(X86::FP0 + Op0)) {
+ // Assume FP6 is not live, use it as a scratch register.
+ duplicateToTop(Op0, 6, I);
+ moveToTop(RegOnTop, I);
+ } else if (getSTReg(Op0) != X86::ST1) {
+ // We have the wrong value at st(1). Shuffle! Untested!
+ moveToTop(getStackEntry(1), I);
+ moveToTop(Op0, I);
+ moveToTop(RegOnTop, I);
}
- assert(StackTop == 2 && "Stack should have two element on it to return!");
- --StackTop; // "Forget" we have something on the top of stack!
+ assert(StackTop >= 2 && "Too few live registers");
+ StackTop -= 2; // "Forget" both st(0) and st(1).
break;
+ }
case X86::MOV_Fp3232:
case X86::MOV_Fp3264:
case X86::MOV_Fp6432:
@@ -1041,32 +1060,6 @@ void FPS::handleSpecialFP(MachineBasicBlock::iterator &I) {
unsigned SrcReg = getFPReg(MO1);
const MachineOperand &MO0 = MI->getOperand(0);
- // These can be created due to inline asm. Two address pass can introduce
- // copies from RFP registers to virtual registers.
- if (MO0.getReg() == X86::ST0 && SrcReg == 0) {
- assert(MO1.isKill());
- // Treat %ST0<def> = MOV_Fp8080 %FP0<kill>
- // like FpSET_ST0_80 %FP0<kill>, %ST0<imp-def>
- assert((StackTop == 1 || StackTop == 2)
- && "Stack should have one or two element on it to return!");
- --StackTop; // "Forget" we have something on the top of stack!
- break;
- } else if (MO0.getReg() == X86::ST1 && SrcReg == 1) {
- assert(MO1.isKill());
- // Treat %ST1<def> = MOV_Fp8080 %FP1<kill>
- // like FpSET_ST1_80 %FP0<kill>, %ST1<imp-def>
- // StackTop can be 1 if a FpSET_ST0_* was before this. Exchange them.
- if (StackTop == 1) {
- BuildMI(*MBB, I, dl, TII->get(X86::XCH_F)).addReg(X86::ST1);
- NumFXCH++;
- StackTop = 0;
- break;
- }
- assert(StackTop == 2 && "Stack should have two element on it to return!");
- --StackTop; // "Forget" we have something on the top of stack!
- break;
- }
-
unsigned DestReg = getFPReg(MO0);
if (MI->killsRegister(X86::FP0+SrcReg)) {
// If the input operand is killed, we can just change the owner of the
@@ -1206,3 +1199,33 @@ void FPS::handleSpecialFP(MachineBasicBlock::iterator &I) {
I = MBB->erase(I); // Remove the pseudo instruction
--I;
}
+
+// Translate a COPY instruction to a pseudo-op that handleSpecialFP understands.
+bool FPS::translateCopy(MachineInstr *MI) {
+ unsigned DstReg = MI->getOperand(0).getReg();
+ unsigned SrcReg = MI->getOperand(1).getReg();
+
+ if (DstReg == X86::ST0) {
+ MI->setDesc(TII->get(X86::FpSET_ST0_80));
+ MI->RemoveOperand(0);
+ return true;
+ }
+ if (DstReg == X86::ST1) {
+ MI->setDesc(TII->get(X86::FpSET_ST1_80));
+ MI->RemoveOperand(0);
+ return true;
+ }
+ if (SrcReg == X86::ST0) {
+ MI->setDesc(TII->get(X86::FpGET_ST0_80));
+ return true;
+ }
+ if (SrcReg == X86::ST1) {
+ MI->setDesc(TII->get(X86::FpGET_ST1_80));
+ return true;
+ }
+ if (X86::RFP80RegClass.contains(DstReg, SrcReg)) {
+ MI->setDesc(TII->get(X86::MOV_Fp8080));
+ return true;
+ }
+ return false;
+}
diff --git a/lib/Target/X86/X86FloatingPointRegKill.cpp b/lib/Target/X86/X86FloatingPointRegKill.cpp
index 747683dcc412..2c98b96c510b 100644
--- a/lib/Target/X86/X86FloatingPointRegKill.cpp
+++ b/lib/Target/X86/X86FloatingPointRegKill.cpp
@@ -72,18 +72,15 @@ static bool isFPStackVReg(unsigned RegNo, const MachineRegisterInfo &MRI) {
/// stack code, and thus needs an FP_REG_KILL.
static bool ContainsFPStackCode(MachineBasicBlock *MBB,
const MachineRegisterInfo &MRI) {
- // Scan the block, looking for instructions that define fp stack vregs.
+ // Scan the block, looking for instructions that define or use fp stack vregs.
for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end();
I != E; ++I) {
- if (I->getNumOperands() == 0 || !I->getOperand(0).isReg())
- continue;
-
for (unsigned op = 0, e = I->getNumOperands(); op != e; ++op) {
- if (!I->getOperand(op).isReg() || !I->getOperand(op).isDef())
+ if (!I->getOperand(op).isReg())
continue;
-
- if (isFPStackVReg(I->getOperand(op).getReg(), MRI))
- return true;
+ if (unsigned Reg = I->getOperand(op).getReg())
+ if (isFPStackVReg(Reg, MRI))
+ return true;
}
}
@@ -108,8 +105,8 @@ static bool ContainsFPStackCode(MachineBasicBlock *MBB,
bool FPRegKiller::runOnMachineFunction(MachineFunction &MF) {
// If we are emitting FP stack code, scan the basic block to determine if this
- // block defines any FP values. If so, put an FP_REG_KILL instruction before
- // the terminator of the block.
+ // block defines or uses any FP values. If so, put an FP_REG_KILL instruction
+ // before the terminator of the block.
// Note that FP stack instructions are used in all modes for long double,
// so we always need to do this check.
diff --git a/lib/Target/X86/X86ISelDAGToDAG.cpp b/lib/Target/X86/X86ISelDAGToDAG.cpp
index 0f64383b0b72..72f2bc11d7cc 100644
--- a/lib/Target/X86/X86ISelDAGToDAG.cpp
+++ b/lib/Target/X86/X86ISelDAGToDAG.cpp
@@ -137,21 +137,6 @@ namespace {
}
namespace {
- class X86ISelListener : public SelectionDAG::DAGUpdateListener {
- SmallSet<SDNode*, 4> Deletes;
- public:
- explicit X86ISelListener() {}
- virtual void NodeDeleted(SDNode *N, SDNode *E) {
- Deletes.insert(N);
- }
- virtual void NodeUpdated(SDNode *N) {
- // Ignore updates.
- }
- bool IsDeleted(SDNode *N) {
- return Deletes.count(N);
- }
- };
-
//===--------------------------------------------------------------------===//
/// ISel - X86 specific code to select X86 machine instructions for
/// SelectionDAG operations.
@@ -199,16 +184,17 @@ namespace {
bool MatchWrapper(SDValue N, X86ISelAddressMode &AM);
bool MatchAddress(SDValue N, X86ISelAddressMode &AM);
bool MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM,
- X86ISelListener &DeadNodes,
unsigned Depth);
bool MatchAddressBase(SDValue N, X86ISelAddressMode &AM);
bool SelectAddr(SDNode *Op, SDValue N, SDValue &Base,
SDValue &Scale, SDValue &Index, SDValue &Disp,
SDValue &Segment);
bool SelectLEAAddr(SDNode *Op, SDValue N, SDValue &Base,
- SDValue &Scale, SDValue &Index, SDValue &Disp);
+ SDValue &Scale, SDValue &Index, SDValue &Disp,
+ SDValue &Segment);
bool SelectTLSADDRAddr(SDNode *Op, SDValue N, SDValue &Base,
- SDValue &Scale, SDValue &Index, SDValue &Disp);
+ SDValue &Scale, SDValue &Index, SDValue &Disp,
+ SDValue &Segment);
bool SelectScalarSSELoad(SDNode *Root, SDValue N,
SDValue &Base, SDValue &Scale,
SDValue &Index, SDValue &Disp,
@@ -239,7 +225,8 @@ namespace {
// These are 32-bit even in 64-bit mode since RIP relative offset
// is 32-bit.
if (AM.GV)
- Disp = CurDAG->getTargetGlobalAddress(AM.GV, MVT::i32, AM.Disp,
+ Disp = CurDAG->getTargetGlobalAddress(AM.GV, DebugLoc(),
+ MVT::i32, AM.Disp,
AM.SymbolFlags);
else if (AM.CP)
Disp = CurDAG->getTargetConstantPool(AM.CP, MVT::i32,
@@ -386,14 +373,14 @@ static void MoveBelowOrigChain(SelectionDAG *CurDAG, SDValue Load,
}
for (unsigned i = 1, e = OrigChain.getNumOperands(); i != e; ++i)
Ops.push_back(OrigChain.getOperand(i));
- CurDAG->UpdateNodeOperands(OrigChain, &Ops[0], Ops.size());
- CurDAG->UpdateNodeOperands(Load, Call.getOperand(0),
+ CurDAG->UpdateNodeOperands(OrigChain.getNode(), &Ops[0], Ops.size());
+ CurDAG->UpdateNodeOperands(Load.getNode(), Call.getOperand(0),
Load.getOperand(1), Load.getOperand(2));
Ops.clear();
Ops.push_back(SDValue(Load.getNode(), 1));
for (unsigned i = 1, e = Call.getNode()->getNumOperands(); i != e; ++i)
Ops.push_back(Call.getOperand(i));
- CurDAG->UpdateNodeOperands(Call, &Ops[0], Ops.size());
+ CurDAG->UpdateNodeOperands(Call.getNode(), &Ops[0], Ops.size());
}
/// isCalleeLoad - Return true if call address is a load and it can be
@@ -515,7 +502,7 @@ void X86DAGToDAGISel::PreprocessISelDAG() {
N->getOperand(0),
MemTmp, NULL, 0, MemVT,
false, false, 0);
- SDValue Result = CurDAG->getExtLoad(ISD::EXTLOAD, dl, DstVT, Store, MemTmp,
+ SDValue Result = CurDAG->getExtLoad(ISD::EXTLOAD, DstVT, dl, Store, MemTmp,
NULL, 0, MemVT, false, false, 0);
// We're about to replace all uses of the FP_ROUND/FP_EXTEND with the
@@ -664,8 +651,7 @@ bool X86DAGToDAGISel::MatchWrapper(SDValue N, X86ISelAddressMode &AM) {
/// returning true if it cannot be done. This just pattern matches for the
/// addressing mode.
bool X86DAGToDAGISel::MatchAddress(SDValue N, X86ISelAddressMode &AM) {
- X86ISelListener DeadNodes;
- if (MatchAddressRecursively(N, AM, DeadNodes, 0))
+ if (MatchAddressRecursively(N, AM, 0))
return true;
// Post-processing: Convert lea(,%reg,2) to lea(%reg,%reg), which has
@@ -713,7 +699,6 @@ static bool isLogicallyAddWithConstant(SDValue V, SelectionDAG *CurDAG) {
}
bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM,
- X86ISelListener &DeadNodes,
unsigned Depth) {
bool is64Bit = Subtarget->is64Bit();
DebugLoc dl = N.getDebugLoc();
@@ -876,13 +861,13 @@ bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM,
// other uses, since it avoids a two-address sub instruction, however
// it costs an additional mov if the index register has other uses.
+ // Add an artificial use to this node so that we can keep track of
+ // it if it gets CSE'd with a different node.
+ HandleSDNode Handle(N);
+
// Test if the LHS of the sub can be folded.
X86ISelAddressMode Backup = AM;
- if (MatchAddressRecursively(N.getNode()->getOperand(0), AM,
- DeadNodes, Depth+1) ||
- // If it is successful but the recursive update causes N to be deleted,
- // then it's not safe to continue.
- DeadNodes.IsDeleted(N.getNode())) {
+ if (MatchAddressRecursively(N.getNode()->getOperand(0), AM, Depth+1)) {
AM = Backup;
break;
}
@@ -893,7 +878,7 @@ bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM,
}
int Cost = 0;
- SDValue RHS = N.getNode()->getOperand(1);
+ SDValue RHS = Handle.getValue().getNode()->getOperand(1);
// If the RHS involves a register with multiple uses, this
// transformation incurs an extra mov, due to the neg instruction
// clobbering its operand.
@@ -944,35 +929,27 @@ bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM,
}
case ISD::ADD: {
+ // Add an artificial use to this node so that we can keep track of
+ // it if it gets CSE'd with a different node.
+ HandleSDNode Handle(N);
+ SDValue LHS = Handle.getValue().getNode()->getOperand(0);
+ SDValue RHS = Handle.getValue().getNode()->getOperand(1);
+
X86ISelAddressMode Backup = AM;
- if (!MatchAddressRecursively(N.getNode()->getOperand(0), AM,
- DeadNodes, Depth+1)) {
- if (DeadNodes.IsDeleted(N.getNode()))
- // If it is successful but the recursive update causes N to be deleted,
- // then it's not safe to continue.
- return true;
- if (!MatchAddressRecursively(N.getNode()->getOperand(1), AM,
- DeadNodes, Depth+1))
- // If it is successful but the recursive update causes N to be deleted,
- // then it's not safe to continue.
- return DeadNodes.IsDeleted(N.getNode());
- }
+ if (!MatchAddressRecursively(LHS, AM, Depth+1) &&
+ !MatchAddressRecursively(RHS, AM, Depth+1))
+ return false;
+ AM = Backup;
+ LHS = Handle.getValue().getNode()->getOperand(0);
+ RHS = Handle.getValue().getNode()->getOperand(1);
// Try again after commuting the operands.
+ if (!MatchAddressRecursively(RHS, AM, Depth+1) &&
+ !MatchAddressRecursively(LHS, AM, Depth+1))
+ return false;
AM = Backup;
- if (!MatchAddressRecursively(N.getNode()->getOperand(1), AM,
- DeadNodes, Depth+1)) {
- if (DeadNodes.IsDeleted(N.getNode()))
- // If it is successful but the recursive update causes N to be deleted,
- // then it's not safe to continue.
- return true;
- if (!MatchAddressRecursively(N.getNode()->getOperand(0), AM,
- DeadNodes, Depth+1))
- // If it is successful but the recursive update causes N to be deleted,
- // then it's not safe to continue.
- return DeadNodes.IsDeleted(N.getNode());
- }
- AM = Backup;
+ LHS = Handle.getValue().getNode()->getOperand(0);
+ RHS = Handle.getValue().getNode()->getOperand(1);
// If we couldn't fold both operands into the address at the same time,
// see if we can just put each operand into a register and fold at least
@@ -980,8 +957,8 @@ bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM,
if (AM.BaseType == X86ISelAddressMode::RegBase &&
!AM.Base_Reg.getNode() &&
!AM.IndexReg.getNode()) {
- AM.Base_Reg = N.getNode()->getOperand(0);
- AM.IndexReg = N.getNode()->getOperand(1);
+ AM.Base_Reg = LHS;
+ AM.IndexReg = RHS;
AM.Scale = 1;
return false;
}
@@ -996,7 +973,7 @@ bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM,
uint64_t Offset = CN->getSExtValue();
// Start with the LHS as an addr mode.
- if (!MatchAddressRecursively(N.getOperand(0), AM, DeadNodes, Depth+1) &&
+ if (!MatchAddressRecursively(N.getOperand(0), AM, Depth+1) &&
// Address could not have picked a GV address for the displacement.
AM.GV == NULL &&
// On x86-64, the resultant disp must fit in 32-bits.
@@ -1073,7 +1050,7 @@ bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM,
CurDAG->RepositionNode(N.getNode(), Shl.getNode());
Shl.getNode()->setNodeId(N.getNode()->getNodeId());
}
- CurDAG->ReplaceAllUsesWith(N, Shl, &DeadNodes);
+ CurDAG->ReplaceAllUsesWith(N, Shl);
AM.IndexReg = And;
AM.Scale = (1 << ScaleLog);
return false;
@@ -1124,7 +1101,7 @@ bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM,
NewSHIFT.getNode()->setNodeId(N.getNode()->getNodeId());
}
- CurDAG->ReplaceAllUsesWith(N, NewSHIFT, &DeadNodes);
+ CurDAG->ReplaceAllUsesWith(N, NewSHIFT);
AM.Scale = 1 << ShiftCst;
AM.IndexReg = NewAND;
@@ -1230,7 +1207,8 @@ bool X86DAGToDAGISel::SelectScalarSSELoad(SDNode *Root,
/// mode it matches can be cost effectively emitted as an LEA instruction.
bool X86DAGToDAGISel::SelectLEAAddr(SDNode *Op, SDValue N,
SDValue &Base, SDValue &Scale,
- SDValue &Index, SDValue &Disp) {
+ SDValue &Index, SDValue &Disp,
+ SDValue &Segment) {
X86ISelAddressMode AM;
// Set AM.Segment to prevent MatchAddress from using one. LEA doesn't support
@@ -1284,7 +1262,6 @@ bool X86DAGToDAGISel::SelectLEAAddr(SDNode *Op, SDValue N,
if (Complexity <= 2)
return false;
- SDValue Segment;
getAddressOperands(AM, Base, Scale, Index, Disp, Segment);
return true;
}
@@ -1292,10 +1269,10 @@ bool X86DAGToDAGISel::SelectLEAAddr(SDNode *Op, SDValue N,
/// SelectTLSADDRAddr - This is only run on TargetGlobalTLSAddress nodes.
bool X86DAGToDAGISel::SelectTLSADDRAddr(SDNode *Op, SDValue N, SDValue &Base,
SDValue &Scale, SDValue &Index,
- SDValue &Disp) {
+ SDValue &Disp, SDValue &Segment) {
assert(N.getOpcode() == ISD::TargetGlobalTLSAddress);
const GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(N);
-
+
X86ISelAddressMode AM;
AM.GV = GA->getGlobal();
AM.Disp += GA->getOffset();
@@ -1309,7 +1286,6 @@ bool X86DAGToDAGISel::SelectTLSADDRAddr(SDNode *Op, SDValue N, SDValue &Base,
AM.IndexReg = CurDAG->getRegister(0, MVT::i64);
}
- SDValue Segment;
getAddressOperands(AM, Base, Scale, Index, Disp, Segment);
return true;
}
@@ -1672,6 +1648,26 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
SDValue(CurDAG->getMachineNode(Opc, dl, MVT::Flag, N1, InFlag), 0);
}
+ // Prevent use of AH in a REX instruction by referencing AX instead.
+ if (HiReg == X86::AH && Subtarget->is64Bit() &&
+ !SDValue(Node, 1).use_empty()) {
+ SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
+ X86::AX, MVT::i16, InFlag);
+ InFlag = Result.getValue(2);
+ // Get the low part if needed. Don't use getCopyFromReg for aliasing
+ // registers.
+ if (!SDValue(Node, 0).use_empty())
+ ReplaceUses(SDValue(Node, 1),
+ CurDAG->getTargetExtractSubreg(X86::sub_8bit, dl, MVT::i8, Result));
+
+ // Shift AX down 8 bits.
+ Result = SDValue(CurDAG->getMachineNode(X86::SHR16ri, dl, MVT::i16,
+ Result,
+ CurDAG->getTargetConstant(8, MVT::i8)), 0);
+ // Then truncate it down to i8.
+ ReplaceUses(SDValue(Node, 1),
+ CurDAG->getTargetExtractSubreg(X86::sub_8bit, dl, MVT::i8, Result));
+ }
// Copy the low half of the result, if it is needed.
if (!SDValue(Node, 0).use_empty()) {
SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
@@ -1682,24 +1678,9 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
}
// Copy the high half of the result, if it is needed.
if (!SDValue(Node, 1).use_empty()) {
- SDValue Result;
- if (HiReg == X86::AH && Subtarget->is64Bit()) {
- // Prevent use of AH in a REX instruction by referencing AX instead.
- // Shift it down 8 bits.
- Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
- X86::AX, MVT::i16, InFlag);
- InFlag = Result.getValue(2);
- Result = SDValue(CurDAG->getMachineNode(X86::SHR16ri, dl, MVT::i16,
- Result,
- CurDAG->getTargetConstant(8, MVT::i8)), 0);
- // Then truncate it down to i8.
- Result = CurDAG->getTargetExtractSubreg(X86::sub_8bit, dl,
- MVT::i8, Result);
- } else {
- Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
- HiReg, NVT, InFlag);
- InFlag = Result.getValue(2);
- }
+ SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
+ HiReg, NVT, InFlag);
+ InFlag = Result.getValue(2);
ReplaceUses(SDValue(Node, 1), Result);
DEBUG(dbgs() << "=> "; Result.getNode()->dump(CurDAG); dbgs() << '\n');
}
@@ -1812,6 +1793,29 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
SDValue(CurDAG->getMachineNode(Opc, dl, MVT::Flag, N1, InFlag), 0);
}
+ // Prevent use of AH in a REX instruction by referencing AX instead.
+ // Shift it down 8 bits.
+ if (HiReg == X86::AH && Subtarget->is64Bit() &&
+ !SDValue(Node, 1).use_empty()) {
+ SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
+ X86::AX, MVT::i16, InFlag);
+ InFlag = Result.getValue(2);
+
+ // If we also need AL (the quotient), get it by extracting a subreg from
+ // Result. The fast register allocator does not like multiple CopyFromReg
+ // nodes using aliasing registers.
+ if (!SDValue(Node, 0).use_empty())
+ ReplaceUses(SDValue(Node, 0),
+ CurDAG->getTargetExtractSubreg(X86::sub_8bit, dl, MVT::i8, Result));
+
+ // Shift AX right by 8 bits instead of using AH.
+ Result = SDValue(CurDAG->getMachineNode(X86::SHR16ri, dl, MVT::i16,
+ Result,
+ CurDAG->getTargetConstant(8, MVT::i8)),
+ 0);
+ ReplaceUses(SDValue(Node, 1),
+ CurDAG->getTargetExtractSubreg(X86::sub_8bit, dl, MVT::i8, Result));
+ }
// Copy the division (low) result, if it is needed.
if (!SDValue(Node, 0).use_empty()) {
SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
@@ -1822,25 +1826,9 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
}
// Copy the remainder (high) result, if it is needed.
if (!SDValue(Node, 1).use_empty()) {
- SDValue Result;
- if (HiReg == X86::AH && Subtarget->is64Bit()) {
- // Prevent use of AH in a REX instruction by referencing AX instead.
- // Shift it down 8 bits.
- Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
- X86::AX, MVT::i16, InFlag);
- InFlag = Result.getValue(2);
- Result = SDValue(CurDAG->getMachineNode(X86::SHR16ri, dl, MVT::i16,
- Result,
- CurDAG->getTargetConstant(8, MVT::i8)),
- 0);
- // Then truncate it down to i8.
- Result = CurDAG->getTargetExtractSubreg(X86::sub_8bit, dl,
- MVT::i8, Result);
- } else {
- Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
- HiReg, NVT, InFlag);
- InFlag = Result.getValue(2);
- }
+ SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
+ HiReg, NVT, InFlag);
+ InFlag = Result.getValue(2);
ReplaceUses(SDValue(Node, 1), Result);
DEBUG(dbgs() << "=> "; Result.getNode()->dump(CurDAG); dbgs() << '\n');
}
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
index b02c33d3f880..1a634744806e 100644
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -62,21 +62,19 @@ static SDValue getMOVL(SelectionDAG &DAG, DebugLoc dl, EVT VT, SDValue V1,
SDValue V2);
static TargetLoweringObjectFile *createTLOF(X86TargetMachine &TM) {
- switch (TM.getSubtarget<X86Subtarget>().TargetType) {
- default: llvm_unreachable("unknown subtarget type");
- case X86Subtarget::isDarwin:
- if (TM.getSubtarget<X86Subtarget>().is64Bit())
- return new X8664_MachoTargetObjectFile();
+
+ bool is64Bit = TM.getSubtarget<X86Subtarget>().is64Bit();
+
+ if (TM.getSubtarget<X86Subtarget>().isTargetDarwin()) {
+ if (is64Bit) return new X8664_MachoTargetObjectFile();
return new TargetLoweringObjectFileMachO();
- case X86Subtarget::isELF:
- if (TM.getSubtarget<X86Subtarget>().is64Bit())
- return new X8664_ELFTargetObjectFile(TM);
+ } else if (TM.getSubtarget<X86Subtarget>().isTargetELF() ){
+ if (is64Bit) return new X8664_ELFTargetObjectFile(TM);
return new X8632_ELFTargetObjectFile(TM);
- case X86Subtarget::isMingw:
- case X86Subtarget::isCygwin:
- case X86Subtarget::isWindows:
+ } else if (TM.getSubtarget<X86Subtarget>().isTargetCOFF()) {
return new TargetLoweringObjectFileCOFF();
- }
+ }
+ llvm_unreachable("unknown subtarget type");
}
X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
@@ -347,6 +345,12 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
if (!Subtarget->hasSSE2())
setOperationAction(ISD::MEMBARRIER , MVT::Other, Expand);
+ // On X86 and X86-64, atomic operations are lowered to locked instructions.
+ // Locked instructions, in turn, have implicit fence semantics (all memory
+ // operations are flushed before issuing the locked instruction, and they
+ // are not buffered), so we can fold away the common pattern of
+ // fence-atomic-fence.
+ setShouldFoldAtomicFences(true);
// Expand certain atomics
setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i8, Custom);
@@ -611,7 +615,7 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
addRegisterClass(MVT::v8i8, X86::VR64RegisterClass, false);
addRegisterClass(MVT::v4i16, X86::VR64RegisterClass, false);
addRegisterClass(MVT::v2i32, X86::VR64RegisterClass, false);
- addRegisterClass(MVT::v2f32, X86::VR64RegisterClass, false);
+
addRegisterClass(MVT::v1i64, X86::VR64RegisterClass, false);
setOperationAction(ISD::ADD, MVT::v8i8, Legal);
@@ -657,14 +661,11 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
AddPromotedToType (ISD::LOAD, MVT::v4i16, MVT::v1i64);
setOperationAction(ISD::LOAD, MVT::v2i32, Promote);
AddPromotedToType (ISD::LOAD, MVT::v2i32, MVT::v1i64);
- setOperationAction(ISD::LOAD, MVT::v2f32, Promote);
- AddPromotedToType (ISD::LOAD, MVT::v2f32, MVT::v1i64);
setOperationAction(ISD::LOAD, MVT::v1i64, Legal);
setOperationAction(ISD::BUILD_VECTOR, MVT::v8i8, Custom);
setOperationAction(ISD::BUILD_VECTOR, MVT::v4i16, Custom);
setOperationAction(ISD::BUILD_VECTOR, MVT::v2i32, Custom);
- setOperationAction(ISD::BUILD_VECTOR, MVT::v2f32, Custom);
setOperationAction(ISD::BUILD_VECTOR, MVT::v1i64, Custom);
setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v8i8, Custom);
@@ -672,7 +673,6 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2i32, Custom);
setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v1i64, Custom);
- setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v2f32, Custom);
setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v8i8, Custom);
setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4i16, Custom);
setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v1i64, Custom);
@@ -691,7 +691,6 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
setOperationAction(ISD::BIT_CONVERT, MVT::v8i8, Custom);
setOperationAction(ISD::BIT_CONVERT, MVT::v4i16, Custom);
setOperationAction(ISD::BIT_CONVERT, MVT::v2i32, Custom);
- setOperationAction(ISD::BIT_CONVERT, MVT::v2f32, Custom);
setOperationAction(ISD::BIT_CONVERT, MVT::v1i64, Custom);
}
}
@@ -792,9 +791,8 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
EVT VT = SVT;
// Do not attempt to promote non-128-bit vectors
- if (!VT.is128BitVector()) {
+ if (!VT.is128BitVector())
continue;
- }
setOperationAction(ISD::AND, SVT, Promote);
AddPromotedToType (ISD::AND, SVT, MVT::v2i64);
@@ -825,6 +823,17 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
}
if (Subtarget->hasSSE41()) {
+ setOperationAction(ISD::FFLOOR, MVT::f32, Legal);
+ setOperationAction(ISD::FCEIL, MVT::f32, Legal);
+ setOperationAction(ISD::FTRUNC, MVT::f32, Legal);
+ setOperationAction(ISD::FRINT, MVT::f32, Legal);
+ setOperationAction(ISD::FNEARBYINT, MVT::f32, Legal);
+ setOperationAction(ISD::FFLOOR, MVT::f64, Legal);
+ setOperationAction(ISD::FCEIL, MVT::f64, Legal);
+ setOperationAction(ISD::FTRUNC, MVT::f64, Legal);
+ setOperationAction(ISD::FRINT, MVT::f64, Legal);
+ setOperationAction(ISD::FNEARBYINT, MVT::f64, Legal);
+
// FIXME: Do we need to handle scalar-to-vector here?
setOperationAction(ISD::MUL, MVT::v4i32, Legal);
@@ -965,15 +974,24 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
// Add/Sub/Mul with overflow operations are custom lowered.
setOperationAction(ISD::SADDO, MVT::i32, Custom);
- setOperationAction(ISD::SADDO, MVT::i64, Custom);
setOperationAction(ISD::UADDO, MVT::i32, Custom);
- setOperationAction(ISD::UADDO, MVT::i64, Custom);
setOperationAction(ISD::SSUBO, MVT::i32, Custom);
- setOperationAction(ISD::SSUBO, MVT::i64, Custom);
setOperationAction(ISD::USUBO, MVT::i32, Custom);
- setOperationAction(ISD::USUBO, MVT::i64, Custom);
setOperationAction(ISD::SMULO, MVT::i32, Custom);
- setOperationAction(ISD::SMULO, MVT::i64, Custom);
+
+ // Only custom-lower 64-bit SADDO and friends on 64-bit because we don't
+ // handle type legalization for these operations here.
+ //
+ // FIXME: We really should do custom legalization for addition and
+ // subtraction on x86-32 once PR3203 is fixed. We really can't do much better
+ // than generic legalization for 64-bit multiplication-with-overflow, though.
+ if (Subtarget->is64Bit()) {
+ setOperationAction(ISD::SADDO, MVT::i64, Custom);
+ setOperationAction(ISD::UADDO, MVT::i64, Custom);
+ setOperationAction(ISD::SSUBO, MVT::i64, Custom);
+ setOperationAction(ISD::USUBO, MVT::i64, Custom);
+ setOperationAction(ISD::SMULO, MVT::i64, Custom);
+ }
if (!Subtarget->is64Bit()) {
// These libcalls are not available in 32-bit.
@@ -992,7 +1010,6 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
setTargetDAGCombine(ISD::SRL);
setTargetDAGCombine(ISD::OR);
setTargetDAGCombine(ISD::STORE);
- setTargetDAGCombine(ISD::MEMBARRIER);
setTargetDAGCombine(ISD::ZERO_EXTEND);
if (Subtarget->is64Bit())
setTargetDAGCombine(ISD::MUL);
@@ -1172,6 +1189,27 @@ unsigned X86TargetLowering::getFunctionAlignment(const Function *F) const {
return F->hasFnAttr(Attribute::OptimizeForSize) ? 0 : 4;
}
+bool X86TargetLowering::getStackCookieLocation(unsigned &AddressSpace,
+ unsigned &Offset) const {
+ if (!Subtarget->isTargetLinux())
+ return false;
+
+ if (Subtarget->is64Bit()) {
+ // %fs:0x28, unless we're using a Kernel code model, in which case it's %gs:
+ Offset = 0x28;
+ if (getTargetMachine().getCodeModel() == CodeModel::Kernel)
+ AddressSpace = 256;
+ else
+ AddressSpace = 257;
+ } else {
+ // %gs:0x14 on i386
+ Offset = 0x14;
+ AddressSpace = 256;
+ }
+ return true;
+}
+
+
//===----------------------------------------------------------------------===//
// Return Value Calling Convention Implementation
//===----------------------------------------------------------------------===//
@@ -1180,19 +1218,19 @@ unsigned X86TargetLowering::getFunctionAlignment(const Function *F) const {
bool
X86TargetLowering::CanLowerReturn(CallingConv::ID CallConv, bool isVarArg,
- const SmallVectorImpl<EVT> &OutTys,
- const SmallVectorImpl<ISD::ArgFlagsTy> &ArgsFlags,
- SelectionDAG &DAG) const {
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ LLVMContext &Context) const {
SmallVector<CCValAssign, 16> RVLocs;
CCState CCInfo(CallConv, isVarArg, getTargetMachine(),
- RVLocs, *DAG.getContext());
- return CCInfo.CheckReturn(OutTys, ArgsFlags, RetCC_X86);
+ RVLocs, Context);
+ return CCInfo.CheckReturn(Outs, RetCC_X86);
}
SDValue
X86TargetLowering::LowerReturn(SDValue Chain,
CallingConv::ID CallConv, bool isVarArg,
const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
DebugLoc dl, SelectionDAG &DAG) const {
MachineFunction &MF = DAG.getMachineFunction();
X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
@@ -1220,7 +1258,7 @@ X86TargetLowering::LowerReturn(SDValue Chain,
for (unsigned i = 0; i != RVLocs.size(); ++i) {
CCValAssign &VA = RVLocs[i];
assert(VA.isRegLoc() && "Can only return in registers!");
- SDValue ValToCopy = Outs[i].Val;
+ SDValue ValToCopy = OutVals[i];
// Returns in ST0/ST1 are handled specially: these are pushed as operands to
// the RET instruction and handled by the FP Stackifier.
@@ -1308,17 +1346,34 @@ X86TargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag,
report_fatal_error("SSE register return with SSE disabled");
}
+ SDValue Val;
+
// If this is a call to a function that returns an fp value on the floating
- // point stack, but where we prefer to use the value in xmm registers, copy
- // it out as F80 and use a truncate to move it from fp stack reg to xmm reg.
- if ((VA.getLocReg() == X86::ST0 ||
- VA.getLocReg() == X86::ST1) &&
- isScalarFPTypeInSSEReg(VA.getValVT())) {
- CopyVT = MVT::f80;
- }
+ // point stack, we must guarantee the the value is popped from the stack, so
+ // a CopyFromReg is not good enough - the copy instruction may be eliminated
+ // if the return value is not used. We use the FpGET_ST0 instructions
+ // instead.
+ if (VA.getLocReg() == X86::ST0 || VA.getLocReg() == X86::ST1) {
+ // If we prefer to use the value in xmm registers, copy it out as f80 and
+ // use a truncate to move it from fp stack reg to xmm reg.
+ if (isScalarFPTypeInSSEReg(VA.getValVT())) CopyVT = MVT::f80;
+ bool isST0 = VA.getLocReg() == X86::ST0;
+ unsigned Opc = 0;
+ if (CopyVT == MVT::f32) Opc = isST0 ? X86::FpGET_ST0_32:X86::FpGET_ST1_32;
+ if (CopyVT == MVT::f64) Opc = isST0 ? X86::FpGET_ST0_64:X86::FpGET_ST1_64;
+ if (CopyVT == MVT::f80) Opc = isST0 ? X86::FpGET_ST0_80:X86::FpGET_ST1_80;
+ SDValue Ops[] = { Chain, InFlag };
+ Chain = SDValue(DAG.getMachineNode(Opc, dl, CopyVT, MVT::Other, MVT::Flag,
+ Ops, 2), 1);
+ Val = Chain.getValue(0);
- SDValue Val;
- if (Is64Bit && CopyVT.isVector() && CopyVT.getSizeInBits() == 64) {
+ // Round the f80 to the right size, which also moves it to the appropriate
+ // xmm register.
+ if (CopyVT != VA.getValVT())
+ Val = DAG.getNode(ISD::FP_ROUND, dl, VA.getValVT(), Val,
+ // This truncation won't change the value.
+ DAG.getIntPtrConstant(1));
+ } else if (Is64Bit && CopyVT.isVector() && CopyVT.getSizeInBits() == 64) {
// For x86-64, MMX values are returned in XMM0 / XMM1 except for v1i64.
if (VA.getLocReg() == X86::XMM0 || VA.getLocReg() == X86::XMM1) {
Chain = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(),
@@ -1338,15 +1393,6 @@ X86TargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag,
Val = Chain.getValue(0);
}
InFlag = Chain.getValue(2);
-
- if (CopyVT != VA.getValVT()) {
- // Round the F80 the right size, which also moves to the appropriate xmm
- // register.
- Val = DAG.getNode(ISD::FP_ROUND, dl, VA.getValVT(), Val,
- // This truncation won't change the value.
- DAG.getIntPtrConstant(1));
- }
-
InVals.push_back(Val);
}
@@ -1383,29 +1429,6 @@ ArgsAreStructReturn(const SmallVectorImpl<ISD::InputArg> &Ins) {
return Ins[0].Flags.isSRet();
}
-/// IsCalleePop - Determines whether the callee is required to pop its
-/// own arguments. Callee pop is necessary to support tail calls.
-bool X86TargetLowering::IsCalleePop(bool IsVarArg,
- CallingConv::ID CallingConv) const {
- if (IsVarArg)
- return false;
-
- switch (CallingConv) {
- default:
- return false;
- case CallingConv::X86_StdCall:
- return !Subtarget->is64Bit();
- case CallingConv::X86_FastCall:
- return !Subtarget->is64Bit();
- case CallingConv::X86_ThisCall:
- return !Subtarget->is64Bit();
- case CallingConv::Fast:
- return GuaranteedTailCallOpt;
- case CallingConv::GHC:
- return GuaranteedTailCallOpt;
- }
-}
-
/// CCAssignFnForNode - Selects the correct CCAssignFn for a the
/// given CallingConvention value.
CCAssignFn *X86TargetLowering::CCAssignFnForNode(CallingConv::ID CC) const {
@@ -1483,11 +1506,11 @@ X86TargetLowering::LowerMemArgument(SDValue Chain,
// could be overwritten by lowering of arguments in case of a tail call.
if (Flags.isByVal()) {
int FI = MFI->CreateFixedObject(Flags.getByValSize(),
- VA.getLocMemOffset(), isImmutable, false);
+ VA.getLocMemOffset(), isImmutable);
return DAG.getFrameIndex(FI, getPointerTy());
} else {
int FI = MFI->CreateFixedObject(ValVT.getSizeInBits()/8,
- VA.getLocMemOffset(), isImmutable, false);
+ VA.getLocMemOffset(), isImmutable);
SDValue FIN = DAG.getFrameIndex(FI, getPointerTy());
return DAG.getLoad(ValVT, dl, Chain, FIN,
PseudoSourceValue::getFixedStack(FI), 0,
@@ -1615,8 +1638,7 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain,
if (isVarArg) {
if (Is64Bit || (CallConv != CallingConv::X86_FastCall &&
CallConv != CallingConv::X86_ThisCall)) {
- FuncInfo->setVarArgsFrameIndex(MFI->CreateFixedObject(1, StackSize,
- true, false));
+ FuncInfo->setVarArgsFrameIndex(MFI->CreateFixedObject(1, StackSize,true));
}
if (Is64Bit) {
unsigned TotalNumIntRegs = 0, TotalNumXMMRegs = 0;
@@ -1722,7 +1744,7 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain,
}
// Some CCs need callee pop.
- if (IsCalleePop(isVarArg, CallConv)) {
+ if (Subtarget->IsCalleePop(isVarArg, CallConv)) {
FuncInfo->setBytesToPopOnReturn(StackSize); // Callee pops everything.
} else {
FuncInfo->setBytesToPopOnReturn(0); // Callee pops nothing.
@@ -1788,7 +1810,7 @@ EmitTailCallStoreRetAddr(SelectionDAG & DAG, MachineFunction &MF,
// Calculate the new stack slot for the return address.
int SlotSize = Is64Bit ? 8 : 4;
int NewReturnAddrFI =
- MF.getFrameInfo()->CreateFixedObject(SlotSize, FPDiff-SlotSize, false, false);
+ MF.getFrameInfo()->CreateFixedObject(SlotSize, FPDiff-SlotSize, false);
EVT VT = Is64Bit ? MVT::i64 : MVT::i32;
SDValue NewRetAddrFrIdx = DAG.getFrameIndex(NewReturnAddrFI, VT);
Chain = DAG.getStore(Chain, dl, RetAddrFrIdx, NewRetAddrFrIdx,
@@ -1802,6 +1824,7 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee,
CallingConv::ID CallConv, bool isVarArg,
bool &isTailCall,
const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
const SmallVectorImpl<ISD::InputArg> &Ins,
DebugLoc dl, SelectionDAG &DAG,
SmallVectorImpl<SDValue> &InVals) const {
@@ -1814,7 +1837,7 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee,
// Check if it's really possible to do a tail call.
isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv,
isVarArg, IsStructRet, MF.getFunction()->hasStructRetAttr(),
- Outs, Ins, DAG);
+ Outs, OutVals, Ins, DAG);
// Sibcalls are automatically detected tailcalls which do not require
// ABI changes.
@@ -1874,7 +1897,7 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee,
for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
CCValAssign &VA = ArgLocs[i];
EVT RegVT = VA.getLocVT();
- SDValue Arg = Outs[i].Val;
+ SDValue Arg = OutVals[i];
ISD::ArgFlagsTy Flags = Outs[i].Flags;
bool isByVal = Flags.isByVal();
@@ -2013,12 +2036,12 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee,
if (VA.isRegLoc())
continue;
assert(VA.isMemLoc());
- SDValue Arg = Outs[i].Val;
+ SDValue Arg = OutVals[i];
ISD::ArgFlagsTy Flags = Outs[i].Flags;
// Create frame index.
int32_t Offset = VA.getLocMemOffset()+FPDiff;
uint32_t OpSize = (VA.getLocVT().getSizeInBits()+7)/8;
- FI = MF.getFrameInfo()->CreateFixedObject(OpSize, Offset, true, false);
+ FI = MF.getFrameInfo()->CreateFixedObject(OpSize, Offset, true);
FIN = DAG.getFrameIndex(FI, getPointerTy());
if (Flags.isByVal()) {
@@ -2059,7 +2082,6 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee,
FPDiff, dl);
}
- bool WasGlobalOrExternal = false;
if (getTargetMachine().getCodeModel() == CodeModel::Large) {
assert(Is64Bit && "Large code model is only legal in 64-bit mode.");
// In the 64-bit large code model, we have to make all calls
@@ -2067,7 +2089,6 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee,
// pc-relative offset may not be large enough to hold the whole
// address.
} else if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
- WasGlobalOrExternal = true;
// If the callee is a GlobalAddress node (quite common, every direct call
// is) turn it into a TargetGlobalAddress node so that legalize doesn't hack
// it.
@@ -2095,11 +2116,10 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee,
OpFlags = X86II::MO_DARWIN_STUB;
}
- Callee = DAG.getTargetGlobalAddress(GV, getPointerTy(),
+ Callee = DAG.getTargetGlobalAddress(GV, dl, getPointerTy(),
G->getOffset(), OpFlags);
}
} else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
- WasGlobalOrExternal = true;
unsigned char OpFlags = 0;
// On ELF targets, in either X86-64 or X86-32 mode, direct calls to external
@@ -2153,17 +2173,12 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee,
Ops.push_back(InFlag);
if (isTailCall) {
- // If this is the first return lowered for this function, add the regs
- // to the liveout set for the function.
- if (MF.getRegInfo().liveout_empty()) {
- SmallVector<CCValAssign, 16> RVLocs;
- CCState CCInfo(CallConv, isVarArg, getTargetMachine(), RVLocs,
- *DAG.getContext());
- CCInfo.AnalyzeCallResult(Ins, RetCC_X86);
- for (unsigned i = 0; i != RVLocs.size(); ++i)
- if (RVLocs[i].isRegLoc())
- MF.getRegInfo().addLiveOut(RVLocs[i].getLocReg());
- }
+ // We used to do:
+ //// If this is the first return lowered for this function, add the regs
+ //// to the liveout set for the function.
+ // This isn't right, although it's probably harmless on x86; liveouts
+ // should be computed from returns not tail calls. Consider a void
+ // function making a tail call to a function returning int.
return DAG.getNode(X86ISD::TC_RETURN, dl,
NodeTys, &Ops[0], Ops.size());
}
@@ -2173,7 +2188,7 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee,
// Create the CALLSEQ_END node.
unsigned NumBytesForCalleeToPush;
- if (IsCalleePop(isVarArg, CallConv))
+ if (Subtarget->IsCalleePop(isVarArg, CallConv))
NumBytesForCalleeToPush = NumBytes; // Callee pops everything
else if (!Is64Bit && !IsTailCallConvention(CallConv) && IsStructRet)
// If this is a call to a struct-return function, the callee
@@ -2314,6 +2329,7 @@ X86TargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
bool isCalleeStructRet,
bool isCallerStructRet,
const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
const SmallVectorImpl<ISD::InputArg> &Ins,
SelectionDAG& DAG) const {
if (!IsTailCallConvention(CalleeCC) &&
@@ -2332,8 +2348,8 @@ X86TargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
return false;
}
- // Look for obvious safe cases to perform tail call optimization that does not
- // requite ABI changes. This is what gcc calls sibcall.
+ // Look for obvious safe cases to perform tail call optimization that do not
+ // require ABI changes. This is what gcc calls sibcall.
// Can't do sibcall if stack needs to be dynamically re-aligned. PEI needs to
// emit a special epilogue.
@@ -2427,8 +2443,7 @@ X86TargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
((X86TargetMachine&)getTargetMachine()).getInstrInfo();
for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
CCValAssign &VA = ArgLocs[i];
- EVT RegVT = VA.getLocVT();
- SDValue Arg = Outs[i].Val;
+ SDValue Arg = OutVals[i];
ISD::ArgFlagsTy Flags = Outs[i].Flags;
if (VA.getLocInfo() == CCValAssign::Indirect)
return false;
@@ -2439,26 +2454,32 @@ X86TargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
}
}
}
+
+ // If the tailcall address may be in a register, then make sure it's
+ // possible to register allocate for it. In 32-bit, the call address can
+ // only target EAX, EDX, or ECX since the tail call must be scheduled after
+ // callee-saved registers are restored. In 64-bit, it's RAX, RCX, RDX, RSI,
+ // RDI, R8, R9, R11.
+ if (!isa<GlobalAddressSDNode>(Callee) &&
+ !isa<ExternalSymbolSDNode>(Callee)) {
+ unsigned Limit = Subtarget->is64Bit() ? 8 : 3;
+ unsigned NumInRegs = 0;
+ for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
+ CCValAssign &VA = ArgLocs[i];
+ if (VA.isRegLoc()) {
+ if (++NumInRegs == Limit)
+ return false;
+ }
+ }
+ }
}
return true;
}
FastISel *
-X86TargetLowering::createFastISel(MachineFunction &mf,
- DenseMap<const Value *, unsigned> &vm,
- DenseMap<const BasicBlock*, MachineBasicBlock*> &bm,
- DenseMap<const AllocaInst *, int> &am,
- std::vector<std::pair<MachineInstr*, unsigned> > &pn
-#ifndef NDEBUG
- , SmallSet<const Instruction *, 8> &cil
-#endif
- ) const {
- return X86::createFastISel(mf, vm, bm, am, pn
-#ifndef NDEBUG
- , cil
-#endif
- );
+X86TargetLowering::createFastISel(FunctionLoweringInfo &funcInfo) const {
+ return X86::createFastISel(funcInfo);
}
@@ -2476,7 +2497,7 @@ SDValue X86TargetLowering::getReturnAddressFrameIndex(SelectionDAG &DAG) const {
// Set up a frame object for the return address.
uint64_t SlotSize = TD->getPointerSize();
ReturnAddrIndex = MF.getFrameInfo()->CreateFixedObject(SlotSize, -SlotSize,
- false, false);
+ false);
FuncInfo->setRAIndex(ReturnAddrIndex);
}
@@ -3175,7 +3196,7 @@ unsigned X86::getShufflePALIGNRImmediate(SDNode *N) {
/// constant +0.0.
bool X86::isZeroNode(SDValue Elt) {
return ((isa<ConstantSDNode>(Elt) &&
- cast<ConstantSDNode>(Elt)->getZExtValue() == 0) ||
+ cast<ConstantSDNode>(Elt)->isNullValue()) ||
(isa<ConstantFPSDNode>(Elt) &&
cast<ConstantFPSDNode>(Elt)->getValueAPF().isPosZero()));
}
@@ -4433,7 +4454,7 @@ SDValue LowerVECTOR_SHUFFLEv16i8(ShuffleVectorSDNode *SVOp,
}
/// RewriteAsNarrowerShuffle - Try rewriting v8i16 and v16i8 shuffles as 4 wide
-/// ones, or rewriting v4i32 / v2f32 as 2 wide ones if possible. This can be
+/// ones, or rewriting v4i32 / v2i32 as 2 wide ones if possible. This can be
/// done when every pair / quad of shuffle mask elements point to elements in
/// the right sequence. e.g.
/// vector_shuffle <>, <>, < 3, 4, | 10, 11, | 0, 1, | 14, 15>
@@ -4447,7 +4468,6 @@ SDValue RewriteAsNarrowerShuffle(ShuffleVectorSDNode *SVOp,
unsigned NumElems = VT.getVectorNumElements();
unsigned NewWidth = (NumElems == 4) ? 2 : 4;
EVT MaskVT = MVT::getIntVectorWithNumElements(NewWidth);
- EVT MaskEltVT = MaskVT.getVectorElementType();
EVT NewVT = MaskVT;
switch (VT.getSimpleVT().SimpleTy) {
default: assert(false && "Unexpected!");
@@ -5059,13 +5079,9 @@ X86TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const {
SDValue
X86TargetLowering::LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) const {
DebugLoc dl = Op.getDebugLoc();
- if (Op.getValueType() == MVT::v2f32)
- return DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v2f32,
- DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i32,
- DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32,
- Op.getOperand(0))));
-
- if (Op.getValueType() == MVT::v1i64 && Op.getOperand(0).getValueType() == MVT::i64)
+
+ if (Op.getValueType() == MVT::v1i64 &&
+ Op.getOperand(0).getValueType() == MVT::i64)
return DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v1i64, Op.getOperand(0));
SDValue AnyExt = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, Op.getOperand(0));
@@ -5230,10 +5246,10 @@ X86TargetLowering::LowerGlobalAddress(const GlobalValue *GV, DebugLoc dl,
if (OpFlags == X86II::MO_NO_FLAG &&
X86::isOffsetSuitableForCodeModel(Offset, M)) {
// A direct static reference to a global.
- Result = DAG.getTargetGlobalAddress(GV, getPointerTy(), Offset);
+ Result = DAG.getTargetGlobalAddress(GV, dl, getPointerTy(), Offset);
Offset = 0;
} else {
- Result = DAG.getTargetGlobalAddress(GV, getPointerTy(), 0, OpFlags);
+ Result = DAG.getTargetGlobalAddress(GV, dl, getPointerTy(), 0, OpFlags);
}
if (Subtarget->isPICStyleRIPRel() &&
@@ -5278,7 +5294,7 @@ GetTLSADDR(SelectionDAG &DAG, SDValue Chain, GlobalAddressSDNode *GA,
MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Flag);
DebugLoc dl = GA->getDebugLoc();
- SDValue TGA = DAG.getTargetGlobalAddress(GA->getGlobal(),
+ SDValue TGA = DAG.getTargetGlobalAddress(GA->getGlobal(), dl,
GA->getValueType(0),
GA->getOffset(),
OperandFlags);
@@ -5351,7 +5367,8 @@ static SDValue LowerToTLSExecModel(GlobalAddressSDNode *GA, SelectionDAG &DAG,
// emit "addl x@ntpoff,%eax" (local exec) or "addl x@indntpoff,%eax" (initial
// exec)
- SDValue TGA = DAG.getTargetGlobalAddress(GA->getGlobal(), GA->getValueType(0),
+ SDValue TGA = DAG.getTargetGlobalAddress(GA->getGlobal(), dl,
+ GA->getValueType(0),
GA->getOffset(), OperandFlags);
SDValue Offset = DAG.getNode(WrapperKind, dl, PtrVT, TGA);
@@ -5366,33 +5383,78 @@ static SDValue LowerToTLSExecModel(GlobalAddressSDNode *GA, SelectionDAG &DAG,
SDValue
X86TargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const {
- // TODO: implement the "local dynamic" model
- // TODO: implement the "initial exec"model for pic executables
- assert(Subtarget->isTargetELF() &&
- "TLS not implemented for non-ELF targets");
+
GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
const GlobalValue *GV = GA->getGlobal();
- // If GV is an alias then use the aliasee for determining
- // thread-localness.
- if (const GlobalAlias *GA = dyn_cast<GlobalAlias>(GV))
- GV = GA->resolveAliasedGlobal(false);
-
- TLSModel::Model model = getTLSModel(GV,
- getTargetMachine().getRelocationModel());
-
- switch (model) {
- case TLSModel::GeneralDynamic:
- case TLSModel::LocalDynamic: // not implemented
- if (Subtarget->is64Bit())
- return LowerToTLSGeneralDynamicModel64(GA, DAG, getPointerTy());
- return LowerToTLSGeneralDynamicModel32(GA, DAG, getPointerTy());
+ if (Subtarget->isTargetELF()) {
+ // TODO: implement the "local dynamic" model
+ // TODO: implement the "initial exec"model for pic executables
+
+ // If GV is an alias then use the aliasee for determining
+ // thread-localness.
+ if (const GlobalAlias *GA = dyn_cast<GlobalAlias>(GV))
+ GV = GA->resolveAliasedGlobal(false);
+
+ TLSModel::Model model
+ = getTLSModel(GV, getTargetMachine().getRelocationModel());
+
+ switch (model) {
+ case TLSModel::GeneralDynamic:
+ case TLSModel::LocalDynamic: // not implemented
+ if (Subtarget->is64Bit())
+ return LowerToTLSGeneralDynamicModel64(GA, DAG, getPointerTy());
+ return LowerToTLSGeneralDynamicModel32(GA, DAG, getPointerTy());
+
+ case TLSModel::InitialExec:
+ case TLSModel::LocalExec:
+ return LowerToTLSExecModel(GA, DAG, getPointerTy(), model,
+ Subtarget->is64Bit());
+ }
+ } else if (Subtarget->isTargetDarwin()) {
+ // Darwin only has one model of TLS. Lower to that.
+ unsigned char OpFlag = 0;
+ unsigned WrapperKind = Subtarget->isPICStyleRIPRel() ?
+ X86ISD::WrapperRIP : X86ISD::Wrapper;
+
+ // In PIC mode (unless we're in RIPRel PIC mode) we add an offset to the
+ // global base reg.
+ bool PIC32 = (getTargetMachine().getRelocationModel() == Reloc::PIC_) &&
+ !Subtarget->is64Bit();
+ if (PIC32)
+ OpFlag = X86II::MO_TLVP_PIC_BASE;
+ else
+ OpFlag = X86II::MO_TLVP;
+ DebugLoc DL = Op.getDebugLoc();
+ SDValue Result = DAG.getTargetGlobalAddress(GA->getGlobal(), DL,
+ getPointerTy(),
+ GA->getOffset(), OpFlag);
+ SDValue Offset = DAG.getNode(WrapperKind, DL, getPointerTy(), Result);
+
+ // With PIC32, the address is actually $g + Offset.
+ if (PIC32)
+ Offset = DAG.getNode(ISD::ADD, DL, getPointerTy(),
+ DAG.getNode(X86ISD::GlobalBaseReg,
+ DebugLoc(), getPointerTy()),
+ Offset);
+
+ // Lowering the machine isd will make sure everything is in the right
+ // location.
+ SDValue Args[] = { Offset };
+ SDValue Chain = DAG.getNode(X86ISD::TLSCALL, DL, MVT::Other, Args, 1);
+
+ // TLSCALL will be codegen'ed as call. Inform MFI that function has calls.
+ MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
+ MFI->setAdjustsStack(true);
- case TLSModel::InitialExec:
- case TLSModel::LocalExec:
- return LowerToTLSExecModel(GA, DAG, getPointerTy(), model,
- Subtarget->is64Bit());
+ // And our return value (tls address) is in the standard call return value
+ // location.
+ unsigned Reg = Subtarget->is64Bit() ? X86::RAX : X86::EAX;
+ return DAG.getCopyFromReg(Chain, DL, Reg, getPointerTy());
}
+
+ assert(false &&
+ "TLS not implemented for this target.");
llvm_unreachable("Unreachable");
return SDValue();
@@ -5715,7 +5777,7 @@ SDValue X86TargetLowering::LowerUINT_TO_FP(SDValue Op,
// Load the value out, extending it from f32 to f80.
// FIXME: Avoid the extend by constructing the right constant pool?
- SDValue Fudge = DAG.getExtLoad(ISD::EXTLOAD, dl, MVT::f80, DAG.getEntryNode(),
+ SDValue Fudge = DAG.getExtLoad(ISD::EXTLOAD, MVT::f80, dl, DAG.getEntryNode(),
FudgePtr, PseudoSourceValue::getConstantPool(),
0, MVT::f32, false, false, 4);
// Extend everything to 80 bits to force it to be done on x87.
@@ -5964,6 +6026,7 @@ SDValue X86TargetLowering::EmitTest(SDValue Op, unsigned X86CC,
bool NeedCF = false;
bool NeedOF = false;
switch (X86CC) {
+ default: break;
case X86::COND_A: case X86::COND_AE:
case X86::COND_B: case X86::COND_BE:
NeedCF = true;
@@ -5973,120 +6036,129 @@ SDValue X86TargetLowering::EmitTest(SDValue Op, unsigned X86CC,
case X86::COND_O: case X86::COND_NO:
NeedOF = true;
break;
- default: break;
}
// See if we can use the EFLAGS value from the operand instead of
// doing a separate TEST. TEST always sets OF and CF to 0, so unless
// we prove that the arithmetic won't overflow, we can't use OF or CF.
- if (Op.getResNo() == 0 && !NeedOF && !NeedCF) {
- unsigned Opcode = 0;
- unsigned NumOperands = 0;
- switch (Op.getNode()->getOpcode()) {
- case ISD::ADD:
- // Due to an isel shortcoming, be conservative if this add is
- // likely to be selected as part of a load-modify-store
- // instruction. When the root node in a match is a store, isel
- // doesn't know how to remap non-chain non-flag uses of other
- // nodes in the match, such as the ADD in this case. This leads
- // to the ADD being left around and reselected, with the result
- // being two adds in the output. Alas, even if none our users
- // are stores, that doesn't prove we're O.K. Ergo, if we have
- // any parents that aren't CopyToReg or SETCC, eschew INC/DEC.
- // A better fix seems to require climbing the DAG back to the
- // root, and it doesn't seem to be worth the effort.
- for (SDNode::use_iterator UI = Op.getNode()->use_begin(),
- UE = Op.getNode()->use_end(); UI != UE; ++UI)
- if (UI->getOpcode() != ISD::CopyToReg && UI->getOpcode() != ISD::SETCC)
- goto default_case;
- if (ConstantSDNode *C =
- dyn_cast<ConstantSDNode>(Op.getNode()->getOperand(1))) {
- // An add of one will be selected as an INC.
- if (C->getAPIntValue() == 1) {
- Opcode = X86ISD::INC;
- NumOperands = 1;
- break;
- }
- // An add of negative one (subtract of one) will be selected as a DEC.
- if (C->getAPIntValue().isAllOnesValue()) {
- Opcode = X86ISD::DEC;
- NumOperands = 1;
- break;
- }
+ if (Op.getResNo() != 0 || NeedOF || NeedCF)
+ // Emit a CMP with 0, which is the TEST pattern.
+ return DAG.getNode(X86ISD::CMP, dl, MVT::i32, Op,
+ DAG.getConstant(0, Op.getValueType()));
+
+ unsigned Opcode = 0;
+ unsigned NumOperands = 0;
+ switch (Op.getNode()->getOpcode()) {
+ case ISD::ADD:
+ // Due to an isel shortcoming, be conservative if this add is likely to be
+ // selected as part of a load-modify-store instruction. When the root node
+ // in a match is a store, isel doesn't know how to remap non-chain non-flag
+ // uses of other nodes in the match, such as the ADD in this case. This
+ // leads to the ADD being left around and reselected, with the result being
+ // two adds in the output. Alas, even if none our users are stores, that
+ // doesn't prove we're O.K. Ergo, if we have any parents that aren't
+ // CopyToReg or SETCC, eschew INC/DEC. A better fix seems to require
+ // climbing the DAG back to the root, and it doesn't seem to be worth the
+ // effort.
+ for (SDNode::use_iterator UI = Op.getNode()->use_begin(),
+ UE = Op.getNode()->use_end(); UI != UE; ++UI)
+ if (UI->getOpcode() != ISD::CopyToReg && UI->getOpcode() != ISD::SETCC)
+ goto default_case;
+
+ if (ConstantSDNode *C =
+ dyn_cast<ConstantSDNode>(Op.getNode()->getOperand(1))) {
+ // An add of one will be selected as an INC.
+ if (C->getAPIntValue() == 1) {
+ Opcode = X86ISD::INC;
+ NumOperands = 1;
+ break;
}
- // Otherwise use a regular EFLAGS-setting add.
- Opcode = X86ISD::ADD;
- NumOperands = 2;
- break;
- case ISD::AND: {
- // If the primary and result isn't used, don't bother using X86ISD::AND,
- // because a TEST instruction will be better.
- bool NonFlagUse = false;
- for (SDNode::use_iterator UI = Op.getNode()->use_begin(),
- UE = Op.getNode()->use_end(); UI != UE; ++UI) {
- SDNode *User = *UI;
- unsigned UOpNo = UI.getOperandNo();
- if (User->getOpcode() == ISD::TRUNCATE && User->hasOneUse()) {
- // Look pass truncate.
- UOpNo = User->use_begin().getOperandNo();
- User = *User->use_begin();
- }
- if (User->getOpcode() != ISD::BRCOND &&
- User->getOpcode() != ISD::SETCC &&
- (User->getOpcode() != ISD::SELECT || UOpNo != 0)) {
- NonFlagUse = true;
- break;
- }
+
+ // An add of negative one (subtract of one) will be selected as a DEC.
+ if (C->getAPIntValue().isAllOnesValue()) {
+ Opcode = X86ISD::DEC;
+ NumOperands = 1;
+ break;
}
- if (!NonFlagUse)
+ }
+
+ // Otherwise use a regular EFLAGS-setting add.
+ Opcode = X86ISD::ADD;
+ NumOperands = 2;
+ break;
+ case ISD::AND: {
+ // If the primary and result isn't used, don't bother using X86ISD::AND,
+ // because a TEST instruction will be better.
+ bool NonFlagUse = false;
+ for (SDNode::use_iterator UI = Op.getNode()->use_begin(),
+ UE = Op.getNode()->use_end(); UI != UE; ++UI) {
+ SDNode *User = *UI;
+ unsigned UOpNo = UI.getOperandNo();
+ if (User->getOpcode() == ISD::TRUNCATE && User->hasOneUse()) {
+ // Look pass truncate.
+ UOpNo = User->use_begin().getOperandNo();
+ User = *User->use_begin();
+ }
+
+ if (User->getOpcode() != ISD::BRCOND &&
+ User->getOpcode() != ISD::SETCC &&
+ (User->getOpcode() != ISD::SELECT || UOpNo != 0)) {
+ NonFlagUse = true;
break;
+ }
}
+
+ if (!NonFlagUse)
+ break;
+ }
// FALL THROUGH
- case ISD::SUB:
- case ISD::OR:
- case ISD::XOR:
- // Due to the ISEL shortcoming noted above, be conservative if this op is
- // likely to be selected as part of a load-modify-store instruction.
- for (SDNode::use_iterator UI = Op.getNode()->use_begin(),
+ case ISD::SUB:
+ case ISD::OR:
+ case ISD::XOR:
+ // Due to the ISEL shortcoming noted above, be conservative if this op is
+ // likely to be selected as part of a load-modify-store instruction.
+ for (SDNode::use_iterator UI = Op.getNode()->use_begin(),
UE = Op.getNode()->use_end(); UI != UE; ++UI)
- if (UI->getOpcode() == ISD::STORE)
- goto default_case;
- // Otherwise use a regular EFLAGS-setting instruction.
- switch (Op.getNode()->getOpcode()) {
- case ISD::SUB: Opcode = X86ISD::SUB; break;
- case ISD::OR: Opcode = X86ISD::OR; break;
- case ISD::XOR: Opcode = X86ISD::XOR; break;
- case ISD::AND: Opcode = X86ISD::AND; break;
- default: llvm_unreachable("unexpected operator!");
- }
- NumOperands = 2;
- break;
- case X86ISD::ADD:
- case X86ISD::SUB:
- case X86ISD::INC:
- case X86ISD::DEC:
- case X86ISD::OR:
- case X86ISD::XOR:
- case X86ISD::AND:
- return SDValue(Op.getNode(), 1);
- default:
- default_case:
- break;
- }
- if (Opcode != 0) {
- SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::i32);
- SmallVector<SDValue, 4> Ops;
- for (unsigned i = 0; i != NumOperands; ++i)
- Ops.push_back(Op.getOperand(i));
- SDValue New = DAG.getNode(Opcode, dl, VTs, &Ops[0], NumOperands);
- DAG.ReplaceAllUsesWith(Op, New);
- return SDValue(New.getNode(), 1);
+ if (UI->getOpcode() == ISD::STORE)
+ goto default_case;
+
+ // Otherwise use a regular EFLAGS-setting instruction.
+ switch (Op.getNode()->getOpcode()) {
+ default: llvm_unreachable("unexpected operator!");
+ case ISD::SUB: Opcode = X86ISD::SUB; break;
+ case ISD::OR: Opcode = X86ISD::OR; break;
+ case ISD::XOR: Opcode = X86ISD::XOR; break;
+ case ISD::AND: Opcode = X86ISD::AND; break;
}
+
+ NumOperands = 2;
+ break;
+ case X86ISD::ADD:
+ case X86ISD::SUB:
+ case X86ISD::INC:
+ case X86ISD::DEC:
+ case X86ISD::OR:
+ case X86ISD::XOR:
+ case X86ISD::AND:
+ return SDValue(Op.getNode(), 1);
+ default:
+ default_case:
+ break;
}
- // Otherwise just emit a CMP with 0, which is the TEST pattern.
- return DAG.getNode(X86ISD::CMP, dl, MVT::i32, Op,
- DAG.getConstant(0, Op.getValueType()));
+ if (Opcode == 0)
+ // Emit a CMP with 0, which is the TEST pattern.
+ return DAG.getNode(X86ISD::CMP, dl, MVT::i32, Op,
+ DAG.getConstant(0, Op.getValueType()));
+
+ SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::i32);
+ SmallVector<SDValue, 4> Ops;
+ for (unsigned i = 0; i != NumOperands; ++i)
+ Ops.push_back(Op.getOperand(i));
+
+ SDValue New = DAG.getNode(Opcode, dl, VTs, &Ops[0], NumOperands);
+ DAG.ReplaceAllUsesWith(Op, New);
+ return SDValue(New.getNode(), 1);
}
/// Emit nodes that will be selected as "cmp Op0,Op1", or something
@@ -6113,15 +6185,21 @@ SDValue X86TargetLowering::LowerToBT(SDValue And, ISD::CondCode CC,
Op1 = Op1.getOperand(0);
SDValue LHS, RHS;
- if (Op1.getOpcode() == ISD::SHL) {
- if (ConstantSDNode *And10C = dyn_cast<ConstantSDNode>(Op1.getOperand(0)))
- if (And10C->getZExtValue() == 1) {
- LHS = Op0;
- RHS = Op1.getOperand(1);
- }
- } else if (Op0.getOpcode() == ISD::SHL) {
+ if (Op1.getOpcode() == ISD::SHL)
+ std::swap(Op0, Op1);
+ if (Op0.getOpcode() == ISD::SHL) {
if (ConstantSDNode *And00C = dyn_cast<ConstantSDNode>(Op0.getOperand(0)))
if (And00C->getZExtValue() == 1) {
+ // If we looked past a truncate, check that it's only truncating away
+ // known zeros.
+ unsigned BitWidth = Op0.getValueSizeInBits();
+ unsigned AndBitWidth = And.getValueSizeInBits();
+ if (BitWidth > AndBitWidth) {
+ APInt Mask = APInt::getAllOnesValue(BitWidth), Zeros, Ones;
+ DAG.ComputeMaskedBits(Op0, Mask, Zeros, Ones);
+ if (Zeros.countLeadingOnes() < BitWidth - AndBitWidth)
+ return SDValue();
+ }
LHS = Op1;
RHS = Op0.getOperand(1);
}
@@ -6172,7 +6250,7 @@ SDValue X86TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
if (Op0.getOpcode() == ISD::AND &&
Op0.hasOneUse() &&
Op1.getOpcode() == ISD::Constant &&
- cast<ConstantSDNode>(Op1)->getZExtValue() == 0 &&
+ cast<ConstantSDNode>(Op1)->isNullValue() &&
(CC == ISD::SETEQ || CC == ISD::SETNE)) {
SDValue NewSetCC = LowerToBT(Op0, CC, dl, DAG);
if (NewSetCC.getNode())
@@ -6552,15 +6630,16 @@ SDValue X86TargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const {
(X86::CondCode)Cond.getOperand(0).getConstantOperandVal(0);
CCode = X86::GetOppositeBranchCondition(CCode);
CC = DAG.getConstant(CCode, MVT::i8);
- SDValue User = SDValue(*Op.getNode()->use_begin(), 0);
+ SDNode *User = *Op.getNode()->use_begin();
// Look for an unconditional branch following this conditional branch.
// We need this because we need to reverse the successors in order
// to implement FCMP_OEQ.
- if (User.getOpcode() == ISD::BR) {
- SDValue FalseBB = User.getOperand(1);
- SDValue NewBR =
- DAG.UpdateNodeOperands(User, User.getOperand(0), Dest);
+ if (User->getOpcode() == ISD::BR) {
+ SDValue FalseBB = User->getOperand(1);
+ SDNode *NewBR =
+ DAG.UpdateNodeOperands(User, User->getOperand(0), Dest);
assert(NewBR == User);
+ (void)NewBR;
Dest = FalseBB;
Chain = DAG.getNode(X86ISD::BRCOND, dl, Op.getValueType(),
@@ -6632,7 +6711,6 @@ X86TargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
SDValue Flag;
- EVT IntPtr = getPointerTy();
EVT SPTy = Subtarget->is64Bit() ? MVT::i64 : MVT::i32;
Chain = DAG.getCopyToReg(Chain, dl, X86::EAX, Size, Flag);
@@ -6685,7 +6763,7 @@ SDValue X86TargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const {
Store = DAG.getStore(Op.getOperand(0), dl,
DAG.getConstant(FuncInfo->getVarArgsFPOffset(),
MVT::i32),
- FIN, SV, 0, false, false, 0);
+ FIN, SV, 4, false, false, 0);
MemOps.push_back(Store);
// Store ptr to overflow_arg_area
@@ -6693,7 +6771,7 @@ SDValue X86TargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const {
FIN, DAG.getIntPtrConstant(4));
SDValue OVFIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(),
getPointerTy());
- Store = DAG.getStore(Op.getOperand(0), dl, OVFIN, FIN, SV, 0,
+ Store = DAG.getStore(Op.getOperand(0), dl, OVFIN, FIN, SV, 8,
false, false, 0);
MemOps.push_back(Store);
@@ -6702,7 +6780,7 @@ SDValue X86TargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const {
FIN, DAG.getIntPtrConstant(8));
SDValue RSFIN = DAG.getFrameIndex(FuncInfo->getRegSaveFrameIndex(),
getPointerTy());
- Store = DAG.getStore(Op.getOperand(0), dl, RSFIN, FIN, SV, 0,
+ Store = DAG.getStore(Op.getOperand(0), dl, RSFIN, FIN, SV, 16,
false, false, 0);
MemOps.push_back(Store);
return DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
@@ -6712,9 +6790,6 @@ SDValue X86TargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const {
SDValue X86TargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) const {
// X86-64 va_list is a struct { i32, i32, i8*, i8* }.
assert(Subtarget->is64Bit() && "This code only handles 64-bit va_arg!");
- SDValue Chain = Op.getOperand(0);
- SDValue SrcPtr = Op.getOperand(1);
- SDValue SrcSV = Op.getOperand(2);
report_fatal_error("VAArgInst is not yet implemented for x86-64!");
return SDValue();
@@ -7733,6 +7808,7 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
case X86ISD::FRSQRT: return "X86ISD::FRSQRT";
case X86ISD::FRCP: return "X86ISD::FRCP";
case X86ISD::TLSADDR: return "X86ISD::TLSADDR";
+ case X86ISD::TLSCALL: return "X86ISD::TLSCALL";
case X86ISD::SegmentBaseAddress: return "X86ISD::SegmentBaseAddress";
case X86ISD::EH_RETURN: return "X86ISD::EH_RETURN";
case X86ISD::TC_RETURN: return "X86ISD::TC_RETURN";
@@ -7944,8 +8020,11 @@ X86TargetLowering::EmitAtomicBitwiseWithCustomInserter(MachineInstr *bInstr,
F->insert(MBBIter, newMBB);
F->insert(MBBIter, nextMBB);
- // Move all successors to thisMBB to nextMBB
- nextMBB->transferSuccessors(thisMBB);
+ // Transfer the remainder of thisMBB and its successor edges to nextMBB.
+ nextMBB->splice(nextMBB->begin(), thisMBB,
+ llvm::next(MachineBasicBlock::iterator(bInstr)),
+ thisMBB->end());
+ nextMBB->transferSuccessorsAndUpdatePHIs(thisMBB);
// Update thisMBB to fall through to newMBB
thisMBB->addSuccessor(newMBB);
@@ -7955,17 +8034,17 @@ X86TargetLowering::EmitAtomicBitwiseWithCustomInserter(MachineInstr *bInstr,
newMBB->addSuccessor(newMBB);
// Insert instructions into newMBB based on incoming instruction
- assert(bInstr->getNumOperands() < X86AddrNumOperands + 4 &&
+ assert(bInstr->getNumOperands() < X86::AddrNumOperands + 4 &&
"unexpected number of operands");
DebugLoc dl = bInstr->getDebugLoc();
MachineOperand& destOper = bInstr->getOperand(0);
- MachineOperand* argOpers[2 + X86AddrNumOperands];
+ MachineOperand* argOpers[2 + X86::AddrNumOperands];
int numArgs = bInstr->getNumOperands() - 1;
for (int i=0; i < numArgs; ++i)
argOpers[i] = &bInstr->getOperand(i+1);
// x86 address has 4 operands: base, index, scale, and displacement
- int lastAddrIndx = X86AddrNumOperands - 1; // [0,3]
+ int lastAddrIndx = X86::AddrNumOperands - 1; // [0,3]
int valArgIndx = lastAddrIndx + 1;
unsigned t1 = F->getRegInfo().createVirtualRegister(RC);
@@ -8008,7 +8087,7 @@ X86TargetLowering::EmitAtomicBitwiseWithCustomInserter(MachineInstr *bInstr,
// insert branch
BuildMI(newMBB, dl, TII->get(X86::JNE_4)).addMBB(newMBB);
- F->DeleteMachineInstr(bInstr); // The pseudo instruction is gone now.
+ bInstr->eraseFromParent(); // The pseudo instruction is gone now.
return nextMBB;
}
@@ -8053,8 +8132,11 @@ X86TargetLowering::EmitAtomicBit6432WithCustomInserter(MachineInstr *bInstr,
F->insert(MBBIter, newMBB);
F->insert(MBBIter, nextMBB);
- // Move all successors to thisMBB to nextMBB
- nextMBB->transferSuccessors(thisMBB);
+ // Transfer the remainder of thisMBB and its successor edges to nextMBB.
+ nextMBB->splice(nextMBB->begin(), thisMBB,
+ llvm::next(MachineBasicBlock::iterator(bInstr)),
+ thisMBB->end());
+ nextMBB->transferSuccessorsAndUpdatePHIs(thisMBB);
// Update thisMBB to fall through to newMBB
thisMBB->addSuccessor(newMBB);
@@ -8066,12 +8148,12 @@ X86TargetLowering::EmitAtomicBit6432WithCustomInserter(MachineInstr *bInstr,
DebugLoc dl = bInstr->getDebugLoc();
// Insert instructions into newMBB based on incoming instruction
// There are 8 "real" operands plus 9 implicit def/uses, ignored here.
- assert(bInstr->getNumOperands() < X86AddrNumOperands + 14 &&
+ assert(bInstr->getNumOperands() < X86::AddrNumOperands + 14 &&
"unexpected number of operands");
MachineOperand& dest1Oper = bInstr->getOperand(0);
MachineOperand& dest2Oper = bInstr->getOperand(1);
- MachineOperand* argOpers[2 + X86AddrNumOperands];
- for (int i=0; i < 2 + X86AddrNumOperands; ++i) {
+ MachineOperand* argOpers[2 + X86::AddrNumOperands];
+ for (int i=0; i < 2 + X86::AddrNumOperands; ++i) {
argOpers[i] = &bInstr->getOperand(i+2);
// We use some of the operands multiple times, so conservatively just
@@ -8081,7 +8163,7 @@ X86TargetLowering::EmitAtomicBit6432WithCustomInserter(MachineInstr *bInstr,
}
// x86 address has 5 operands: base, index, scale, displacement, and segment.
- int lastAddrIndx = X86AddrNumOperands - 1; // [0,3]
+ int lastAddrIndx = X86::AddrNumOperands - 1; // [0,3]
unsigned t1 = F->getRegInfo().createVirtualRegister(RC);
MachineInstrBuilder MIB = BuildMI(thisMBB, dl, TII->get(LoadOpc), t1);
@@ -8171,7 +8253,7 @@ X86TargetLowering::EmitAtomicBit6432WithCustomInserter(MachineInstr *bInstr,
// insert branch
BuildMI(newMBB, dl, TII->get(X86::JNE_4)).addMBB(newMBB);
- F->DeleteMachineInstr(bInstr); // The pseudo instruction is gone now.
+ bInstr->eraseFromParent(); // The pseudo instruction is gone now.
return nextMBB;
}
@@ -8205,8 +8287,11 @@ X86TargetLowering::EmitAtomicMinMaxWithCustomInserter(MachineInstr *mInstr,
F->insert(MBBIter, newMBB);
F->insert(MBBIter, nextMBB);
- // Move all successors of thisMBB to nextMBB
- nextMBB->transferSuccessors(thisMBB);
+ // Transfer the remainder of thisMBB and its successor edges to nextMBB.
+ nextMBB->splice(nextMBB->begin(), thisMBB,
+ llvm::next(MachineBasicBlock::iterator(mInstr)),
+ thisMBB->end());
+ nextMBB->transferSuccessorsAndUpdatePHIs(thisMBB);
// Update thisMBB to fall through to newMBB
thisMBB->addSuccessor(newMBB);
@@ -8217,16 +8302,16 @@ X86TargetLowering::EmitAtomicMinMaxWithCustomInserter(MachineInstr *mInstr,
DebugLoc dl = mInstr->getDebugLoc();
// Insert instructions into newMBB based on incoming instruction
- assert(mInstr->getNumOperands() < X86AddrNumOperands + 4 &&
+ assert(mInstr->getNumOperands() < X86::AddrNumOperands + 4 &&
"unexpected number of operands");
MachineOperand& destOper = mInstr->getOperand(0);
- MachineOperand* argOpers[2 + X86AddrNumOperands];
+ MachineOperand* argOpers[2 + X86::AddrNumOperands];
int numArgs = mInstr->getNumOperands() - 1;
for (int i=0; i < numArgs; ++i)
argOpers[i] = &mInstr->getOperand(i+1);
// x86 address has 4 operands: base, index, scale, and displacement
- int lastAddrIndx = X86AddrNumOperands - 1; // [0,3]
+ int lastAddrIndx = X86::AddrNumOperands - 1; // [0,3]
int valArgIndx = lastAddrIndx + 1;
unsigned t1 = F->getRegInfo().createVirtualRegister(X86::GR32RegisterClass);
@@ -8274,7 +8359,7 @@ X86TargetLowering::EmitAtomicMinMaxWithCustomInserter(MachineInstr *mInstr,
// insert branch
BuildMI(newMBB, dl, TII->get(X86::JNE_4)).addMBB(newMBB);
- F->DeleteMachineInstr(mInstr); // The pseudo instruction is gone now.
+ mInstr->eraseFromParent(); // The pseudo instruction is gone now.
return nextMBB;
}
@@ -8284,7 +8369,6 @@ MachineBasicBlock *
X86TargetLowering::EmitPCMP(MachineInstr *MI, MachineBasicBlock *BB,
unsigned numArgs, bool memArg) const {
- MachineFunction *F = BB->getParent();
DebugLoc dl = MI->getDebugLoc();
const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
@@ -8306,7 +8390,7 @@ X86TargetLowering::EmitPCMP(MachineInstr *MI, MachineBasicBlock *BB,
BuildMI(BB, dl, TII->get(X86::MOVAPSrr), MI->getOperand(0).getReg())
.addReg(X86::XMM0);
- F->DeleteMachineInstr(MI);
+ MI->eraseFromParent();
return BB;
}
@@ -8335,9 +8419,12 @@ X86TargetLowering::EmitVAStartSaveXMMRegsWithCustomInserter(
F->insert(MBBIter, XMMSaveMBB);
F->insert(MBBIter, EndMBB);
- // Set up the CFG.
- // Move any original successors of MBB to the end block.
- EndMBB->transferSuccessors(MBB);
+ // Transfer the remainder of MBB and its successor edges to EndMBB.
+ EndMBB->splice(EndMBB->begin(), MBB,
+ llvm::next(MachineBasicBlock::iterator(MI)),
+ MBB->end());
+ EndMBB->transferSuccessorsAndUpdatePHIs(MBB);
+
// The original block will now fall through to the XMM save block.
MBB->addSuccessor(XMMSaveMBB);
// The XMMSaveMBB will fall through to the end block.
@@ -8376,7 +8463,7 @@ X86TargetLowering::EmitVAStartSaveXMMRegsWithCustomInserter(
.addMemOperand(MMO);
}
- F->DeleteMachineInstr(MI); // The pseudo instruction is gone now.
+ MI->eraseFromParent(); // The pseudo instruction is gone now.
return EndMBB;
}
@@ -8405,24 +8492,39 @@ X86TargetLowering::EmitLoweredSelect(MachineInstr *MI,
MachineFunction *F = BB->getParent();
MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB);
MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
- unsigned Opc =
- X86::GetCondBranchFromCond((X86::CondCode)MI->getOperand(3).getImm());
- BuildMI(BB, DL, TII->get(Opc)).addMBB(sinkMBB);
F->insert(It, copy0MBB);
F->insert(It, sinkMBB);
- // Update machine-CFG edges by first adding all successors of the current
- // block to the new block which will contain the Phi node for the select.
- for (MachineBasicBlock::succ_iterator I = BB->succ_begin(),
- E = BB->succ_end(); I != E; ++I)
- sinkMBB->addSuccessor(*I);
- // Next, remove all successors of the current block, and add the true
- // and fallthrough blocks as its successors.
- while (!BB->succ_empty())
- BB->removeSuccessor(BB->succ_begin());
+
+ // If the EFLAGS register isn't dead in the terminator, then claim that it's
+ // live into the sink and copy blocks.
+ const MachineFunction *MF = BB->getParent();
+ const TargetRegisterInfo *TRI = MF->getTarget().getRegisterInfo();
+ BitVector ReservedRegs = TRI->getReservedRegs(*MF);
+
+ for (unsigned I = 0, E = MI->getNumOperands(); I != E; ++I) {
+ const MachineOperand &MO = MI->getOperand(I);
+ if (!MO.isReg() || !MO.isUse() || MO.isKill()) continue;
+ unsigned Reg = MO.getReg();
+ if (Reg != X86::EFLAGS) continue;
+ copy0MBB->addLiveIn(Reg);
+ sinkMBB->addLiveIn(Reg);
+ }
+
+ // Transfer the remainder of BB and its successor edges to sinkMBB.
+ sinkMBB->splice(sinkMBB->begin(), BB,
+ llvm::next(MachineBasicBlock::iterator(MI)),
+ BB->end());
+ sinkMBB->transferSuccessorsAndUpdatePHIs(BB);
+
// Add the true and fallthrough blocks as its successors.
BB->addSuccessor(copy0MBB);
BB->addSuccessor(sinkMBB);
+ // Create the conditional branch instruction.
+ unsigned Opc =
+ X86::GetCondBranchFromCond((X86::CondCode)MI->getOperand(3).getImm());
+ BuildMI(BB, DL, TII->get(Opc)).addMBB(sinkMBB);
+
// copy0MBB:
// %FalseValue = ...
// # fallthrough to sinkMBB
@@ -8431,11 +8533,12 @@ X86TargetLowering::EmitLoweredSelect(MachineInstr *MI,
// sinkMBB:
// %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ]
// ...
- BuildMI(sinkMBB, DL, TII->get(X86::PHI), MI->getOperand(0).getReg())
+ BuildMI(*sinkMBB, sinkMBB->begin(), DL,
+ TII->get(X86::PHI), MI->getOperand(0).getReg())
.addReg(MI->getOperand(1).getReg()).addMBB(copy0MBB)
.addReg(MI->getOperand(2).getReg()).addMBB(thisMBB);
- F->DeleteMachineInstr(MI); // The pseudo instruction is gone now.
+ MI->eraseFromParent(); // The pseudo instruction is gone now.
return sinkMBB;
}
@@ -8444,21 +8547,70 @@ X86TargetLowering::EmitLoweredMingwAlloca(MachineInstr *MI,
MachineBasicBlock *BB) const {
const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
DebugLoc DL = MI->getDebugLoc();
- MachineFunction *F = BB->getParent();
// The lowering is pretty easy: we're just emitting the call to _alloca. The
// non-trivial part is impdef of ESP.
// FIXME: The code should be tweaked as soon as we'll try to do codegen for
// mingw-w64.
- BuildMI(BB, DL, TII->get(X86::CALLpcrel32))
+ BuildMI(*BB, MI, DL, TII->get(X86::CALLpcrel32))
.addExternalSymbol("_alloca")
.addReg(X86::EAX, RegState::Implicit)
.addReg(X86::ESP, RegState::Implicit)
.addReg(X86::EAX, RegState::Define | RegState::Implicit)
.addReg(X86::ESP, RegState::Define | RegState::Implicit);
- F->DeleteMachineInstr(MI); // The pseudo instruction is gone now.
+ MI->eraseFromParent(); // The pseudo instruction is gone now.
+ return BB;
+}
+
+MachineBasicBlock *
+X86TargetLowering::EmitLoweredTLSCall(MachineInstr *MI,
+ MachineBasicBlock *BB) const {
+ // This is pretty easy. We're taking the value that we received from
+ // our load from the relocation, sticking it in either RDI (x86-64)
+ // or EAX and doing an indirect call. The return value will then
+ // be in the normal return register.
+ const X86InstrInfo *TII
+ = static_cast<const X86InstrInfo*>(getTargetMachine().getInstrInfo());
+ DebugLoc DL = MI->getDebugLoc();
+ MachineFunction *F = BB->getParent();
+
+ assert(MI->getOperand(3).isGlobal() && "This should be a global");
+
+ if (Subtarget->is64Bit()) {
+ MachineInstrBuilder MIB = BuildMI(*BB, MI, DL,
+ TII->get(X86::MOV64rm), X86::RDI)
+ .addReg(X86::RIP)
+ .addImm(0).addReg(0)
+ .addGlobalAddress(MI->getOperand(3).getGlobal(), 0,
+ MI->getOperand(3).getTargetFlags())
+ .addReg(0);
+ MIB = BuildMI(*BB, MI, DL, TII->get(X86::CALL64m));
+ addDirectMem(MIB, X86::RDI);
+ } else if (getTargetMachine().getRelocationModel() != Reloc::PIC_) {
+ MachineInstrBuilder MIB = BuildMI(*BB, MI, DL,
+ TII->get(X86::MOV32rm), X86::EAX)
+ .addReg(0)
+ .addImm(0).addReg(0)
+ .addGlobalAddress(MI->getOperand(3).getGlobal(), 0,
+ MI->getOperand(3).getTargetFlags())
+ .addReg(0);
+ MIB = BuildMI(*BB, MI, DL, TII->get(X86::CALL32m));
+ addDirectMem(MIB, X86::EAX);
+ } else {
+ MachineInstrBuilder MIB = BuildMI(*BB, MI, DL,
+ TII->get(X86::MOV32rm), X86::EAX)
+ .addReg(TII->getGlobalBaseReg(F))
+ .addImm(0).addReg(0)
+ .addGlobalAddress(MI->getOperand(3).getGlobal(), 0,
+ MI->getOperand(3).getTargetFlags())
+ .addReg(0);
+ MIB = BuildMI(*BB, MI, DL, TII->get(X86::CALL32m));
+ addDirectMem(MIB, X86::EAX);
+ }
+
+ MI->eraseFromParent(); // The pseudo instruction is gone now.
return BB;
}
@@ -8469,6 +8621,9 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
default: assert(false && "Unexpected instr type to insert");
case X86::MINGW_ALLOCA:
return EmitLoweredMingwAlloca(MI, BB);
+ case X86::TLSCall_32:
+ case X86::TLSCall_64:
+ return EmitLoweredTLSCall(MI, BB);
case X86::CMOV_GR8:
case X86::CMOV_V1I64:
case X86::CMOV_FR32:
@@ -8499,23 +8654,25 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
// mode when truncating to an integer value.
MachineFunction *F = BB->getParent();
int CWFrameIdx = F->getFrameInfo()->CreateStackObject(2, 2, false);
- addFrameReference(BuildMI(BB, DL, TII->get(X86::FNSTCW16m)), CWFrameIdx);
+ addFrameReference(BuildMI(*BB, MI, DL,
+ TII->get(X86::FNSTCW16m)), CWFrameIdx);
// Load the old value of the high byte of the control word...
unsigned OldCW =
F->getRegInfo().createVirtualRegister(X86::GR16RegisterClass);
- addFrameReference(BuildMI(BB, DL, TII->get(X86::MOV16rm), OldCW),
+ addFrameReference(BuildMI(*BB, MI, DL, TII->get(X86::MOV16rm), OldCW),
CWFrameIdx);
// Set the high part to be round to zero...
- addFrameReference(BuildMI(BB, DL, TII->get(X86::MOV16mi)), CWFrameIdx)
+ addFrameReference(BuildMI(*BB, MI, DL, TII->get(X86::MOV16mi)), CWFrameIdx)
.addImm(0xC7F);
// Reload the modified control word now...
- addFrameReference(BuildMI(BB, DL, TII->get(X86::FLDCW16m)), CWFrameIdx);
+ addFrameReference(BuildMI(*BB, MI, DL,
+ TII->get(X86::FLDCW16m)), CWFrameIdx);
// Restore the memory image of control word to original value
- addFrameReference(BuildMI(BB, DL, TII->get(X86::MOV16mr)), CWFrameIdx)
+ addFrameReference(BuildMI(*BB, MI, DL, TII->get(X86::MOV16mr)), CWFrameIdx)
.addReg(OldCW);
// Get the X86 opcode to use.
@@ -8554,13 +8711,14 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
} else {
AM.Disp = Op.getImm();
}
- addFullAddress(BuildMI(BB, DL, TII->get(Opc)), AM)
- .addReg(MI->getOperand(X86AddrNumOperands).getReg());
+ addFullAddress(BuildMI(*BB, MI, DL, TII->get(Opc)), AM)
+ .addReg(MI->getOperand(X86::AddrNumOperands).getReg());
// Reload the original control word now.
- addFrameReference(BuildMI(BB, DL, TII->get(X86::FLDCW16m)), CWFrameIdx);
+ addFrameReference(BuildMI(*BB, MI, DL,
+ TII->get(X86::FLDCW16m)), CWFrameIdx);
- F->DeleteMachineInstr(MI); // The pseudo instruction is gone now.
+ MI->eraseFromParent(); // The pseudo instruction is gone now.
return BB;
}
// String/text processing lowering.
@@ -9513,8 +9671,10 @@ static SDValue PerformOrCombine(SDNode *N, SelectionDAG &DAG,
if (ShAmt1.getOpcode() == ISD::SUB) {
SDValue Sum = ShAmt1.getOperand(0);
if (ConstantSDNode *SumC = dyn_cast<ConstantSDNode>(Sum)) {
- if (SumC->getSExtValue() == Bits &&
- ShAmt1.getOperand(1) == ShAmt0)
+ SDValue ShAmt1Op1 = ShAmt1.getOperand(1);
+ if (ShAmt1Op1.getNode()->getOpcode() == ISD::TRUNCATE)
+ ShAmt1Op1 = ShAmt1Op1.getOperand(0);
+ if (SumC->getSExtValue() == Bits && ShAmt1Op1 == ShAmt0)
return DAG.getNode(Opc, DL, VT,
Op0, Op1,
DAG.getNode(ISD::TRUNCATE, DL,
@@ -9710,58 +9870,6 @@ static SDValue PerformVZEXT_MOVLCombine(SDNode *N, SelectionDAG &DAG) {
return SDValue();
}
-// On X86 and X86-64, atomic operations are lowered to locked instructions.
-// Locked instructions, in turn, have implicit fence semantics (all memory
-// operations are flushed before issuing the locked instruction, and the
-// are not buffered), so we can fold away the common pattern of
-// fence-atomic-fence.
-static SDValue PerformMEMBARRIERCombine(SDNode* N, SelectionDAG &DAG) {
- SDValue atomic = N->getOperand(0);
- switch (atomic.getOpcode()) {
- case ISD::ATOMIC_CMP_SWAP:
- case ISD::ATOMIC_SWAP:
- case ISD::ATOMIC_LOAD_ADD:
- case ISD::ATOMIC_LOAD_SUB:
- case ISD::ATOMIC_LOAD_AND:
- case ISD::ATOMIC_LOAD_OR:
- case ISD::ATOMIC_LOAD_XOR:
- case ISD::ATOMIC_LOAD_NAND:
- case ISD::ATOMIC_LOAD_MIN:
- case ISD::ATOMIC_LOAD_MAX:
- case ISD::ATOMIC_LOAD_UMIN:
- case ISD::ATOMIC_LOAD_UMAX:
- break;
- default:
- return SDValue();
- }
-
- SDValue fence = atomic.getOperand(0);
- if (fence.getOpcode() != ISD::MEMBARRIER)
- return SDValue();
-
- switch (atomic.getOpcode()) {
- case ISD::ATOMIC_CMP_SWAP:
- return DAG.UpdateNodeOperands(atomic, fence.getOperand(0),
- atomic.getOperand(1), atomic.getOperand(2),
- atomic.getOperand(3));
- case ISD::ATOMIC_SWAP:
- case ISD::ATOMIC_LOAD_ADD:
- case ISD::ATOMIC_LOAD_SUB:
- case ISD::ATOMIC_LOAD_AND:
- case ISD::ATOMIC_LOAD_OR:
- case ISD::ATOMIC_LOAD_XOR:
- case ISD::ATOMIC_LOAD_NAND:
- case ISD::ATOMIC_LOAD_MIN:
- case ISD::ATOMIC_LOAD_MAX:
- case ISD::ATOMIC_LOAD_UMIN:
- case ISD::ATOMIC_LOAD_UMAX:
- return DAG.UpdateNodeOperands(atomic, fence.getOperand(0),
- atomic.getOperand(1), atomic.getOperand(2));
- default:
- return SDValue();
- }
-}
-
static SDValue PerformZExtCombine(SDNode *N, SelectionDAG &DAG) {
// (i32 zext (and (i8 x86isd::setcc_carry), 1)) ->
// (and (i32 x86isd::setcc_carry), 1)
@@ -9809,7 +9917,6 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
case X86ISD::FAND: return PerformFANDCombine(N, DAG);
case X86ISD::BT: return PerformBTCombine(N, DAG, DCI);
case X86ISD::VZEXT_MOVL: return PerformVZEXT_MOVLCombine(N, DAG);
- case ISD::MEMBARRIER: return PerformMEMBARRIERCombine(N, DAG);
case ISD::ZERO_EXTEND: return PerformZExtCombine(N, DAG);
}
@@ -9932,8 +10039,8 @@ static bool LowerToBSwap(CallInst *CI) {
// so don't worry about this.
// Verify this is a simple bswap.
- if (CI->getNumOperands() != 2 ||
- CI->getType() != CI->getOperand(1)->getType() ||
+ if (CI->getNumArgOperands() != 1 ||
+ CI->getType() != CI->getArgOperand(0)->getType() ||
!CI->getType()->isIntegerTy())
return false;
@@ -9946,7 +10053,7 @@ static bool LowerToBSwap(CallInst *CI) {
Module *M = CI->getParent()->getParent()->getParent();
Constant *Int = Intrinsic::getDeclaration(M, Intrinsic::bswap, Tys, 1);
- Value *Op = CI->getOperand(1);
+ Value *Op = CI->getArgOperand(0);
Op = CallInst::Create(Int, Op, CI->getName(), CI);
CI->replaceAllUsesWith(Op);
@@ -10079,7 +10186,6 @@ LowerXConstraint(EVT ConstraintVT) const {
/// vector. If it is invalid, don't add anything to Ops.
void X86TargetLowering::LowerAsmOperandForConstraint(SDValue Op,
char Constraint,
- bool hasMemory,
std::vector<SDValue>&Ops,
SelectionDAG &DAG) const {
SDValue Result(0, 0);
@@ -10121,9 +10227,8 @@ void X86TargetLowering::LowerAsmOperandForConstraint(SDValue Op,
case 'e': {
// 32-bit signed value
if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) {
- const ConstantInt *CI = C->getConstantIntValue();
- if (CI->isValueValidForType(Type::getInt32Ty(*DAG.getContext()),
- C->getSExtValue())) {
+ if (ConstantInt::isValueValidForType(Type::getInt32Ty(*DAG.getContext()),
+ C->getSExtValue())) {
// Widen to 64 bits here to get it sign extended.
Result = DAG.getTargetConstant(C->getSExtValue(), MVT::i64);
break;
@@ -10136,9 +10241,8 @@ void X86TargetLowering::LowerAsmOperandForConstraint(SDValue Op,
case 'Z': {
// 32-bit unsigned value
if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) {
- const ConstantInt *CI = C->getConstantIntValue();
- if (CI->isValueValidForType(Type::getInt32Ty(*DAG.getContext()),
- C->getZExtValue())) {
+ if (ConstantInt::isValueValidForType(Type::getInt32Ty(*DAG.getContext()),
+ C->getZExtValue())) {
Result = DAG.getTargetConstant(C->getZExtValue(), Op.getValueType());
break;
}
@@ -10155,6 +10259,12 @@ void X86TargetLowering::LowerAsmOperandForConstraint(SDValue Op,
break;
}
+ // In any sort of PIC mode addresses need to be computed at runtime by
+ // adding in a register or some sort of table lookup. These can't
+ // be used as immediates.
+ if (Subtarget->isPICStyleGOT() || Subtarget->isPICStyleStubPIC())
+ return;
+
// If we are in non-pic codegen mode, we allow the address of a global (with
// an optional displacement) to be used with 'i'.
GlobalAddressSDNode *GA = 0;
@@ -10190,11 +10300,8 @@ void X86TargetLowering::LowerAsmOperandForConstraint(SDValue Op,
getTargetMachine())))
return;
- if (hasMemory)
- Op = LowerGlobalAddress(GV, Op.getDebugLoc(), Offset, DAG);
- else
- Op = DAG.getTargetGlobalAddress(GV, GA->getValueType(0), Offset);
- Result = Op;
+ Result = DAG.getTargetGlobalAddress(GV, Op.getDebugLoc(),
+ GA->getValueType(0), Offset);
break;
}
}
@@ -10203,8 +10310,7 @@ void X86TargetLowering::LowerAsmOperandForConstraint(SDValue Op,
Ops.push_back(Result);
return;
}
- return TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, hasMemory,
- Ops, DAG);
+ return TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
}
std::vector<unsigned> X86TargetLowering::
diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h
index 1ef1a7b018a9..2d28e5cc2ea7 100644
--- a/lib/Target/X86/X86ISelLowering.h
+++ b/lib/Target/X86/X86ISelLowering.h
@@ -196,6 +196,10 @@ namespace llvm {
// TLSADDR - Thread Local Storage.
TLSADDR,
+
+ // TLSCALL - Thread Local Storage. When calling to an OS provided
+ // thunk at the address from an earlier relocation.
+ TLSCALL,
// SegmentBaseAddress - The address segment:0
SegmentBaseAddress,
@@ -496,7 +500,6 @@ namespace llvm {
/// being processed is 'm'.
virtual void LowerAsmOperandForConstraint(SDValue Op,
char ConstraintLetter,
- bool hasMemory,
std::vector<SDValue> &Ops,
SelectionDAG &DAG) const;
@@ -576,20 +579,17 @@ namespace llvm {
/// createFastISel - This method returns a target specific FastISel object,
/// or null if the target does not support "fast" ISel.
- virtual FastISel *
- createFastISel(MachineFunction &mf,
- DenseMap<const Value *, unsigned> &,
- DenseMap<const BasicBlock *, MachineBasicBlock *> &,
- DenseMap<const AllocaInst *, int> &,
- std::vector<std::pair<MachineInstr*, unsigned> > &
-#ifndef NDEBUG
- , SmallSet<const Instruction *, 8> &
-#endif
- ) const;
+ virtual FastISel *createFastISel(FunctionLoweringInfo &funcInfo) const;
/// getFunctionAlignment - Return the Log2 alignment of this function.
virtual unsigned getFunctionAlignment(const Function *F) const;
+ /// getStackCookieLocation - Return true if the target stores stack
+ /// protector cookies at a fixed offset in some non-standard address
+ /// space, and populates the address space and offset as
+ /// appropriate.
+ virtual bool getStackCookieLocation(unsigned &AddressSpace, unsigned &Offset) const;
+
private:
/// Subtarget - Keep a pointer to the X86Subtarget around so that we can
/// make the right decision when generating code for different targets.
@@ -643,6 +643,7 @@ namespace llvm {
bool isCalleeStructRet,
bool isCallerStructRet,
const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
const SmallVectorImpl<ISD::InputArg> &Ins,
SelectionDAG& DAG) const;
bool IsCalleePop(bool isVarArg, CallingConv::ID CallConv) const;
@@ -725,6 +726,7 @@ namespace llvm {
LowerCall(SDValue Chain, SDValue Callee,
CallingConv::ID CallConv, bool isVarArg, bool &isTailCall,
const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
const SmallVectorImpl<ISD::InputArg> &Ins,
DebugLoc dl, SelectionDAG &DAG,
SmallVectorImpl<SDValue> &InVals) const;
@@ -733,13 +735,13 @@ namespace llvm {
LowerReturn(SDValue Chain,
CallingConv::ID CallConv, bool isVarArg,
const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
DebugLoc dl, SelectionDAG &DAG) const;
virtual bool
CanLowerReturn(CallingConv::ID CallConv, bool isVarArg,
- const SmallVectorImpl<EVT> &OutTys,
- const SmallVectorImpl<ISD::ArgFlagsTy> &ArgsFlags,
- SelectionDAG &DAG) const;
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ LLVMContext &Context) const;
void ReplaceATOMIC_BINARY_64(SDNode *N, SmallVectorImpl<SDValue> &Results,
SelectionDAG &DAG, unsigned NewOp) const;
@@ -794,6 +796,9 @@ namespace llvm {
MachineBasicBlock *EmitLoweredMingwAlloca(MachineInstr *MI,
MachineBasicBlock *BB) const;
+
+ MachineBasicBlock *EmitLoweredTLSCall(MachineInstr *MI,
+ MachineBasicBlock *BB) const;
/// Emit nodes that will be selected as "test Op0,Op0", or something
/// equivalent, for use with the given x86 condition code.
@@ -806,15 +811,7 @@ namespace llvm {
};
namespace X86 {
- FastISel *createFastISel(MachineFunction &mf,
- DenseMap<const Value *, unsigned> &,
- DenseMap<const BasicBlock *, MachineBasicBlock *> &,
- DenseMap<const AllocaInst *, int> &,
- std::vector<std::pair<MachineInstr*, unsigned> > &
-#ifndef NDEBUG
- , SmallSet<const Instruction*, 8> &
-#endif
- );
+ FastISel *createFastISel(FunctionLoweringInfo &funcInfo);
}
}
diff --git a/lib/Target/X86/X86Instr64bit.td b/lib/Target/X86/X86Instr64bit.td
index 97eb17c689d0..42d0e7f9778a 100644
--- a/lib/Target/X86/X86Instr64bit.td
+++ b/lib/Target/X86/X86Instr64bit.td
@@ -35,6 +35,14 @@ def i64i8imm : Operand<i64> {
let ParserMatchClass = ImmSExti64i8AsmOperand;
}
+def lea64_32mem : Operand<i32> {
+ let PrintMethod = "printi32mem";
+ let AsmOperandLowerMethod = "lower_lea64_32mem";
+ let MIOperandInfo = (ops GR32, i8imm, GR32_NOSP, i32imm, i8imm);
+ let ParserMatchClass = X86MemAsmOperand;
+}
+
+
// Special i64mem for addresses of load folding tail calls. These are not
// allowed to use callee-saved registers since they must be scheduled
// after callee-saved register are popped.
@@ -44,29 +52,16 @@ def i64mem_TC : Operand<i64> {
let ParserMatchClass = X86MemAsmOperand;
}
-def lea64mem : Operand<i64> {
- let PrintMethod = "printlea64mem";
- let MIOperandInfo = (ops GR64, i8imm, GR64_NOSP, i32imm);
- let ParserMatchClass = X86NoSegMemAsmOperand;
-}
-
-def lea64_32mem : Operand<i32> {
- let PrintMethod = "printlea64_32mem";
- let AsmOperandLowerMethod = "lower_lea64_32mem";
- let MIOperandInfo = (ops GR32, i8imm, GR32_NOSP, i32imm);
- let ParserMatchClass = X86NoSegMemAsmOperand;
-}
-
//===----------------------------------------------------------------------===//
// Complex Pattern Definitions.
//
-def lea64addr : ComplexPattern<i64, 4, "SelectLEAAddr",
+def lea64addr : ComplexPattern<i64, 5, "SelectLEAAddr",
[add, sub, mul, X86mul_imm, shl, or, frameindex,
X86WrapperRIP], []>;
-def tls64addr : ComplexPattern<i64, 4, "SelectTLSADDRAddr",
+def tls64addr : ComplexPattern<i64, 5, "SelectTLSADDRAddr",
[tglobaltlsaddr], []>;
-
+
//===----------------------------------------------------------------------===//
// Pattern fragments.
//
@@ -289,11 +284,11 @@ def LEA64_32r : I<0x8D, MRMSrcMem,
[(set GR32:$dst, lea32addr:$src)]>, Requires<[In64BitMode]>;
let isReMaterializable = 1 in
-def LEA64r : RI<0x8D, MRMSrcMem, (outs GR64:$dst), (ins lea64mem:$src),
+def LEA64r : RI<0x8D, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src),
"lea{q}\t{$src|$dst}, {$dst|$src}",
[(set GR64:$dst, lea64addr:$src)]>;
-let isTwoAddress = 1 in
+let Constraints = "$src = $dst" in
def BSWAP64r : RI<0xC8, AddRegFrm, (outs GR64:$dst), (ins GR64:$src),
"bswap{q}\t$dst",
[(set GR64:$dst, (bswap GR64:$src))]>, TB;
@@ -521,7 +516,7 @@ let Defs = [EFLAGS] in {
def ADD64i32 : RIi32<0x05, RawFrm, (outs), (ins i64i32imm:$src),
"add{q}\t{$src, %rax|%rax, $src}", []>;
-let isTwoAddress = 1 in {
+let Constraints = "$src1 = $dst" in {
let isConvertibleToThreeAddress = 1 in {
let isCommutable = 1 in
// Register-Register Addition
@@ -559,7 +554,7 @@ def ADD64rm : RI<0x03, MRMSrcMem, (outs GR64:$dst),
[(set GR64:$dst, EFLAGS,
(X86add_flag GR64:$src1, (load addr:$src2)))]>;
-} // isTwoAddress
+} // Constraints = "$src1 = $dst"
// Memory-Register Addition
def ADD64mr : RI<0x01, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src2),
@@ -580,7 +575,7 @@ let Uses = [EFLAGS] in {
def ADC64i32 : RIi32<0x15, RawFrm, (outs), (ins i64i32imm:$src),
"adc{q}\t{$src, %rax|%rax, $src}", []>;
-let isTwoAddress = 1 in {
+let Constraints = "$src1 = $dst" in {
let isCommutable = 1 in
def ADC64rr : RI<0x11, MRMDestReg, (outs GR64:$dst),
(ins GR64:$src1, GR64:$src2),
@@ -606,7 +601,7 @@ def ADC64ri32 : RIi32<0x81, MRM2r, (outs GR64:$dst),
(ins GR64:$src1, i64i32imm:$src2),
"adc{q}\t{$src2, $dst|$dst, $src2}",
[(set GR64:$dst, (adde GR64:$src1, i64immSExt32:$src2))]>;
-} // isTwoAddress
+} // Constraints = "$src1 = $dst"
def ADC64mr : RI<0x11, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src2),
"adc{q}\t{$src2, $dst|$dst, $src2}",
@@ -621,7 +616,7 @@ def ADC64mi32 : RIi32<0x81, MRM2m, (outs), (ins i64mem:$dst, i64i32imm:$src2),
addr:$dst)]>;
} // Uses = [EFLAGS]
-let isTwoAddress = 1 in {
+let Constraints = "$src1 = $dst" in {
// Register-Register Subtraction
def SUB64rr : RI<0x29, MRMDestReg, (outs GR64:$dst),
(ins GR64:$src1, GR64:$src2),
@@ -653,7 +648,7 @@ def SUB64ri32 : RIi32<0x81, MRM5r, (outs GR64:$dst),
"sub{q}\t{$src2, $dst|$dst, $src2}",
[(set GR64:$dst, EFLAGS,
(X86sub_flag GR64:$src1, i64immSExt32:$src2))]>;
-} // isTwoAddress
+} // Constraints = "$src1 = $dst"
def SUB64i32 : RIi32<0x2D, RawFrm, (outs), (ins i64i32imm:$src),
"sub{q}\t{$src, %rax|%rax, $src}", []>;
@@ -677,7 +672,7 @@ def SUB64mi32 : RIi32<0x81, MRM5m, (outs), (ins i64mem:$dst, i64i32imm:$src2),
(implicit EFLAGS)]>;
let Uses = [EFLAGS] in {
-let isTwoAddress = 1 in {
+let Constraints = "$src1 = $dst" in {
def SBB64rr : RI<0x19, MRMDestReg, (outs GR64:$dst),
(ins GR64:$src1, GR64:$src2),
"sbb{q}\t{$src2, $dst|$dst, $src2}",
@@ -702,7 +697,7 @@ def SBB64ri32 : RIi32<0x81, MRM3r, (outs GR64:$dst),
(ins GR64:$src1, i64i32imm:$src2),
"sbb{q}\t{$src2, $dst|$dst, $src2}",
[(set GR64:$dst, (sube GR64:$src1, i64immSExt32:$src2))]>;
-} // isTwoAddress
+} // Constraints = "$src1 = $dst"
def SBB64i32 : RIi32<0x1D, RawFrm, (outs), (ins i64i32imm:$src),
"sbb{q}\t{$src, %rax|%rax, $src}", []>;
@@ -736,7 +731,7 @@ def IMUL64m : RI<0xF7, MRM5m, (outs), (ins i64mem:$src),
}
let Defs = [EFLAGS] in {
-let isTwoAddress = 1 in {
+let Constraints = "$src1 = $dst" in {
let isCommutable = 1 in
// Register-Register Signed Integer Multiplication
def IMUL64rr : RI<0xAF, MRMSrcReg, (outs GR64:$dst),
@@ -751,7 +746,7 @@ def IMUL64rm : RI<0xAF, MRMSrcMem, (outs GR64:$dst),
"imul{q}\t{$src2, $dst|$dst, $src2}",
[(set GR64:$dst, EFLAGS,
(X86smul_flag GR64:$src1, (load addr:$src2)))]>, TB;
-} // isTwoAddress
+} // Constraints = "$src1 = $dst"
// Suprisingly enough, these are not two address instructions!
@@ -803,7 +798,7 @@ def IDIV64m: RI<0xF7, MRM7m, (outs), (ins i64mem:$src),
// Unary instructions
let Defs = [EFLAGS], CodeSize = 2 in {
-let isTwoAddress = 1 in
+let Constraints = "$src = $dst" in
def NEG64r : RI<0xF7, MRM3r, (outs GR64:$dst), (ins GR64:$src), "neg{q}\t$dst",
[(set GR64:$dst, (ineg GR64:$src)),
(implicit EFLAGS)]>;
@@ -811,14 +806,14 @@ def NEG64m : RI<0xF7, MRM3m, (outs), (ins i64mem:$dst), "neg{q}\t$dst",
[(store (ineg (loadi64 addr:$dst)), addr:$dst),
(implicit EFLAGS)]>;
-let isTwoAddress = 1, isConvertibleToThreeAddress = 1 in
+let Constraints = "$src = $dst", isConvertibleToThreeAddress = 1 in
def INC64r : RI<0xFF, MRM0r, (outs GR64:$dst), (ins GR64:$src), "inc{q}\t$dst",
[(set GR64:$dst, EFLAGS, (X86inc_flag GR64:$src))]>;
def INC64m : RI<0xFF, MRM0m, (outs), (ins i64mem:$dst), "inc{q}\t$dst",
[(store (add (loadi64 addr:$dst), 1), addr:$dst),
(implicit EFLAGS)]>;
-let isTwoAddress = 1, isConvertibleToThreeAddress = 1 in
+let Constraints = "$src = $dst", isConvertibleToThreeAddress = 1 in
def DEC64r : RI<0xFF, MRM1r, (outs GR64:$dst), (ins GR64:$src), "dec{q}\t$dst",
[(set GR64:$dst, EFLAGS, (X86dec_flag GR64:$src))]>;
def DEC64m : RI<0xFF, MRM1m, (outs), (ins i64mem:$dst), "dec{q}\t$dst",
@@ -826,7 +821,7 @@ def DEC64m : RI<0xFF, MRM1m, (outs), (ins i64mem:$dst), "dec{q}\t$dst",
(implicit EFLAGS)]>;
// In 64-bit mode, single byte INC and DEC cannot be encoded.
-let isTwoAddress = 1, isConvertibleToThreeAddress = 1 in {
+let Constraints = "$src = $dst", isConvertibleToThreeAddress = 1 in {
// Can transform into LEA.
def INC64_16r : I<0xFF, MRM0r, (outs GR16:$dst), (ins GR16:$src),
"inc{w}\t$dst",
@@ -844,38 +839,36 @@ def DEC64_32r : I<0xFF, MRM1r, (outs GR32:$dst), (ins GR32:$src),
"dec{l}\t$dst",
[(set GR32:$dst, EFLAGS, (X86dec_flag GR32:$src))]>,
Requires<[In64BitMode]>;
-} // isConvertibleToThreeAddress
+} // Constraints = "$src = $dst", isConvertibleToThreeAddress
// These are duplicates of their 32-bit counterparts. Only needed so X86 knows
// how to unfold them.
-let isTwoAddress = 0, CodeSize = 2 in {
- def INC64_16m : I<0xFF, MRM0m, (outs), (ins i16mem:$dst), "inc{w}\t$dst",
- [(store (add (loadi16 addr:$dst), 1), addr:$dst),
- (implicit EFLAGS)]>,
- OpSize, Requires<[In64BitMode]>;
- def INC64_32m : I<0xFF, MRM0m, (outs), (ins i32mem:$dst), "inc{l}\t$dst",
- [(store (add (loadi32 addr:$dst), 1), addr:$dst),
- (implicit EFLAGS)]>,
- Requires<[In64BitMode]>;
- def DEC64_16m : I<0xFF, MRM1m, (outs), (ins i16mem:$dst), "dec{w}\t$dst",
- [(store (add (loadi16 addr:$dst), -1), addr:$dst),
- (implicit EFLAGS)]>,
- OpSize, Requires<[In64BitMode]>;
- def DEC64_32m : I<0xFF, MRM1m, (outs), (ins i32mem:$dst), "dec{l}\t$dst",
- [(store (add (loadi32 addr:$dst), -1), addr:$dst),
- (implicit EFLAGS)]>,
- Requires<[In64BitMode]>;
-}
+def INC64_16m : I<0xFF, MRM0m, (outs), (ins i16mem:$dst), "inc{w}\t$dst",
+ [(store (add (loadi16 addr:$dst), 1), addr:$dst),
+ (implicit EFLAGS)]>,
+ OpSize, Requires<[In64BitMode]>;
+def INC64_32m : I<0xFF, MRM0m, (outs), (ins i32mem:$dst), "inc{l}\t$dst",
+ [(store (add (loadi32 addr:$dst), 1), addr:$dst),
+ (implicit EFLAGS)]>,
+ Requires<[In64BitMode]>;
+def DEC64_16m : I<0xFF, MRM1m, (outs), (ins i16mem:$dst), "dec{w}\t$dst",
+ [(store (add (loadi16 addr:$dst), -1), addr:$dst),
+ (implicit EFLAGS)]>,
+ OpSize, Requires<[In64BitMode]>;
+def DEC64_32m : I<0xFF, MRM1m, (outs), (ins i32mem:$dst), "dec{l}\t$dst",
+ [(store (add (loadi32 addr:$dst), -1), addr:$dst),
+ (implicit EFLAGS)]>,
+ Requires<[In64BitMode]>;
} // Defs = [EFLAGS], CodeSize
let Defs = [EFLAGS] in {
// Shift instructions
-let isTwoAddress = 1 in {
+let Constraints = "$src1 = $dst" in {
let Uses = [CL] in
-def SHL64rCL : RI<0xD3, MRM4r, (outs GR64:$dst), (ins GR64:$src),
+def SHL64rCL : RI<0xD3, MRM4r, (outs GR64:$dst), (ins GR64:$src1),
"shl{q}\t{%cl, $dst|$dst, %CL}",
- [(set GR64:$dst, (shl GR64:$src, CL))]>;
+ [(set GR64:$dst, (shl GR64:$src1, CL))]>;
let isConvertibleToThreeAddress = 1 in // Can transform into LEA.
def SHL64ri : RIi8<0xC1, MRM4r, (outs GR64:$dst),
(ins GR64:$src1, i8imm:$src2),
@@ -885,7 +878,7 @@ def SHL64ri : RIi8<0xC1, MRM4r, (outs GR64:$dst),
// 'add reg,reg' is cheaper.
def SHL64r1 : RI<0xD1, MRM4r, (outs GR64:$dst), (ins GR64:$src1),
"shl{q}\t$dst", []>;
-} // isTwoAddress
+} // Constraints = "$src1 = $dst"
let Uses = [CL] in
def SHL64mCL : RI<0xD3, MRM4m, (outs), (ins i64mem:$dst),
@@ -898,18 +891,18 @@ def SHL64m1 : RI<0xD1, MRM4m, (outs), (ins i64mem:$dst),
"shl{q}\t$dst",
[(store (shl (loadi64 addr:$dst), (i8 1)), addr:$dst)]>;
-let isTwoAddress = 1 in {
+let Constraints = "$src1 = $dst" in {
let Uses = [CL] in
-def SHR64rCL : RI<0xD3, MRM5r, (outs GR64:$dst), (ins GR64:$src),
+def SHR64rCL : RI<0xD3, MRM5r, (outs GR64:$dst), (ins GR64:$src1),
"shr{q}\t{%cl, $dst|$dst, %CL}",
- [(set GR64:$dst, (srl GR64:$src, CL))]>;
+ [(set GR64:$dst, (srl GR64:$src1, CL))]>;
def SHR64ri : RIi8<0xC1, MRM5r, (outs GR64:$dst), (ins GR64:$src1, i8imm:$src2),
"shr{q}\t{$src2, $dst|$dst, $src2}",
[(set GR64:$dst, (srl GR64:$src1, (i8 imm:$src2)))]>;
def SHR64r1 : RI<0xD1, MRM5r, (outs GR64:$dst), (ins GR64:$src1),
"shr{q}\t$dst",
[(set GR64:$dst, (srl GR64:$src1, (i8 1)))]>;
-} // isTwoAddress
+} // Constraints = "$src1 = $dst"
let Uses = [CL] in
def SHR64mCL : RI<0xD3, MRM5m, (outs), (ins i64mem:$dst),
@@ -922,11 +915,11 @@ def SHR64m1 : RI<0xD1, MRM5m, (outs), (ins i64mem:$dst),
"shr{q}\t$dst",
[(store (srl (loadi64 addr:$dst), (i8 1)), addr:$dst)]>;
-let isTwoAddress = 1 in {
+let Constraints = "$src1 = $dst" in {
let Uses = [CL] in
-def SAR64rCL : RI<0xD3, MRM7r, (outs GR64:$dst), (ins GR64:$src),
+def SAR64rCL : RI<0xD3, MRM7r, (outs GR64:$dst), (ins GR64:$src1),
"sar{q}\t{%cl, $dst|$dst, %CL}",
- [(set GR64:$dst, (sra GR64:$src, CL))]>;
+ [(set GR64:$dst, (sra GR64:$src1, CL))]>;
def SAR64ri : RIi8<0xC1, MRM7r, (outs GR64:$dst),
(ins GR64:$src1, i8imm:$src2),
"sar{q}\t{$src2, $dst|$dst, $src2}",
@@ -934,7 +927,7 @@ def SAR64ri : RIi8<0xC1, MRM7r, (outs GR64:$dst),
def SAR64r1 : RI<0xD1, MRM7r, (outs GR64:$dst), (ins GR64:$src1),
"sar{q}\t$dst",
[(set GR64:$dst, (sra GR64:$src1, (i8 1)))]>;
-} // isTwoAddress
+} // Constraints = "$src = $dst"
let Uses = [CL] in
def SAR64mCL : RI<0xD3, MRM7m, (outs), (ins i64mem:$dst),
@@ -949,7 +942,7 @@ def SAR64m1 : RI<0xD1, MRM7m, (outs), (ins i64mem:$dst),
// Rotate instructions
-let isTwoAddress = 1 in {
+let Constraints = "$src = $dst" in {
def RCL64r1 : RI<0xD1, MRM2r, (outs GR64:$dst), (ins GR64:$src),
"rcl{q}\t{1, $dst|$dst, 1}", []>;
def RCL64ri : RIi8<0xC1, MRM2r, (outs GR64:$dst), (ins GR64:$src, i8imm:$cnt),
@@ -966,9 +959,8 @@ def RCL64rCL : RI<0xD3, MRM2r, (outs GR64:$dst), (ins GR64:$src),
def RCR64rCL : RI<0xD3, MRM3r, (outs GR64:$dst), (ins GR64:$src),
"rcr{q}\t{%cl, $dst|$dst, CL}", []>;
}
-}
+} // Constraints = "$src = $dst"
-let isTwoAddress = 0 in {
def RCL64m1 : RI<0xD1, MRM2m, (outs), (ins i64mem:$dst),
"rcl{q}\t{1, $dst|$dst, 1}", []>;
def RCL64mi : RIi8<0xC1, MRM2m, (outs), (ins i64mem:$dst, i8imm:$cnt),
@@ -984,13 +976,12 @@ def RCL64mCL : RI<0xD3, MRM2m, (outs), (ins i64mem:$dst),
def RCR64mCL : RI<0xD3, MRM3m, (outs), (ins i64mem:$dst),
"rcr{q}\t{%cl, $dst|$dst, CL}", []>;
}
-}
-let isTwoAddress = 1 in {
+let Constraints = "$src1 = $dst" in {
let Uses = [CL] in
-def ROL64rCL : RI<0xD3, MRM0r, (outs GR64:$dst), (ins GR64:$src),
+def ROL64rCL : RI<0xD3, MRM0r, (outs GR64:$dst), (ins GR64:$src1),
"rol{q}\t{%cl, $dst|$dst, %CL}",
- [(set GR64:$dst, (rotl GR64:$src, CL))]>;
+ [(set GR64:$dst, (rotl GR64:$src1, CL))]>;
def ROL64ri : RIi8<0xC1, MRM0r, (outs GR64:$dst),
(ins GR64:$src1, i8imm:$src2),
"rol{q}\t{$src2, $dst|$dst, $src2}",
@@ -998,7 +989,7 @@ def ROL64ri : RIi8<0xC1, MRM0r, (outs GR64:$dst),
def ROL64r1 : RI<0xD1, MRM0r, (outs GR64:$dst), (ins GR64:$src1),
"rol{q}\t$dst",
[(set GR64:$dst, (rotl GR64:$src1, (i8 1)))]>;
-} // isTwoAddress
+} // Constraints = "$src1 = $dst"
let Uses = [CL] in
def ROL64mCL : RI<0xD3, MRM0m, (outs), (ins i64mem:$dst),
@@ -1011,11 +1002,11 @@ def ROL64m1 : RI<0xD1, MRM0m, (outs), (ins i64mem:$dst),
"rol{q}\t$dst",
[(store (rotl (loadi64 addr:$dst), (i8 1)), addr:$dst)]>;
-let isTwoAddress = 1 in {
+let Constraints = "$src1 = $dst" in {
let Uses = [CL] in
-def ROR64rCL : RI<0xD3, MRM1r, (outs GR64:$dst), (ins GR64:$src),
+def ROR64rCL : RI<0xD3, MRM1r, (outs GR64:$dst), (ins GR64:$src1),
"ror{q}\t{%cl, $dst|$dst, %CL}",
- [(set GR64:$dst, (rotr GR64:$src, CL))]>;
+ [(set GR64:$dst, (rotr GR64:$src1, CL))]>;
def ROR64ri : RIi8<0xC1, MRM1r, (outs GR64:$dst),
(ins GR64:$src1, i8imm:$src2),
"ror{q}\t{$src2, $dst|$dst, $src2}",
@@ -1023,7 +1014,7 @@ def ROR64ri : RIi8<0xC1, MRM1r, (outs GR64:$dst),
def ROR64r1 : RI<0xD1, MRM1r, (outs GR64:$dst), (ins GR64:$src1),
"ror{q}\t$dst",
[(set GR64:$dst, (rotr GR64:$src1, (i8 1)))]>;
-} // isTwoAddress
+} // Constraints = "$src1 = $dst"
let Uses = [CL] in
def ROR64mCL : RI<0xD3, MRM1m, (outs), (ins i64mem:$dst),
@@ -1037,7 +1028,7 @@ def ROR64m1 : RI<0xD1, MRM1m, (outs), (ins i64mem:$dst),
[(store (rotr (loadi64 addr:$dst), (i8 1)), addr:$dst)]>;
// Double shift instructions (generalizations of rotate)
-let isTwoAddress = 1 in {
+let Constraints = "$src1 = $dst" in {
let Uses = [CL] in {
def SHLD64rrCL : RI<0xA5, MRMDestReg, (outs GR64:$dst),
(ins GR64:$src1, GR64:$src2),
@@ -1067,7 +1058,7 @@ def SHRD64rri8 : RIi8<0xAC, MRMDestReg,
(i8 imm:$src3)))]>,
TB;
} // isCommutable
-} // isTwoAddress
+} // Constraints = "$src1 = $dst"
let Uses = [CL] in {
def SHLD64mrCL : RI<0xA5, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src2),
@@ -1097,7 +1088,7 @@ def SHRD64mri8 : RIi8<0xAC, MRMDestMem,
// Logical Instructions...
//
-let isTwoAddress = 1 , AddedComplexity = 15 in
+let Constraints = "$src = $dst" , AddedComplexity = 15 in
def NOT64r : RI<0xF7, MRM2r, (outs GR64:$dst), (ins GR64:$src), "not{q}\t$dst",
[(set GR64:$dst, (not GR64:$src))]>;
def NOT64m : RI<0xF7, MRM2m, (outs), (ins i64mem:$dst), "not{q}\t$dst",
@@ -1107,7 +1098,7 @@ let Defs = [EFLAGS] in {
def AND64i32 : RIi32<0x25, RawFrm, (outs), (ins i64i32imm:$src),
"and{q}\t{$src, %rax|%rax, $src}", []>;
-let isTwoAddress = 1 in {
+let Constraints = "$src1 = $dst" in {
let isCommutable = 1 in
def AND64rr : RI<0x21, MRMDestReg,
(outs GR64:$dst), (ins GR64:$src1, GR64:$src2),
@@ -1134,7 +1125,7 @@ def AND64ri32 : RIi32<0x81, MRM4r,
"and{q}\t{$src2, $dst|$dst, $src2}",
[(set GR64:$dst, EFLAGS,
(X86and_flag GR64:$src1, i64immSExt32:$src2))]>;
-} // isTwoAddress
+} // Constraints = "$src1 = $dst"
def AND64mr : RI<0x21, MRMDestMem,
(outs), (ins i64mem:$dst, GR64:$src),
@@ -1152,7 +1143,7 @@ def AND64mi32 : RIi32<0x81, MRM4m,
[(store (and (loadi64 addr:$dst), i64immSExt32:$src), addr:$dst),
(implicit EFLAGS)]>;
-let isTwoAddress = 1 in {
+let Constraints = "$src1 = $dst" in {
let isCommutable = 1 in
def OR64rr : RI<0x09, MRMDestReg, (outs GR64:$dst),
(ins GR64:$src1, GR64:$src2),
@@ -1179,7 +1170,7 @@ def OR64ri32 : RIi32<0x81, MRM1r, (outs GR64:$dst),
"or{q}\t{$src2, $dst|$dst, $src2}",
[(set GR64:$dst, EFLAGS,
(X86or_flag GR64:$src1, i64immSExt32:$src2))]>;
-} // isTwoAddress
+} // Constraints = "$src1 = $dst"
def OR64mr : RI<0x09, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src),
"or{q}\t{$src, $dst|$dst, $src}",
@@ -1197,7 +1188,7 @@ def OR64mi32 : RIi32<0x81, MRM1m, (outs), (ins i64mem:$dst, i64i32imm:$src),
def OR64i32 : RIi32<0x0D, RawFrm, (outs), (ins i64i32imm:$src),
"or{q}\t{$src, %rax|%rax, $src}", []>;
-let isTwoAddress = 1 in {
+let Constraints = "$src1 = $dst" in {
let isCommutable = 1 in
def XOR64rr : RI<0x31, MRMDestReg, (outs GR64:$dst),
(ins GR64:$src1, GR64:$src2),
@@ -1224,7 +1215,7 @@ def XOR64ri32 : RIi32<0x81, MRM6r,
"xor{q}\t{$src2, $dst|$dst, $src2}",
[(set GR64:$dst, EFLAGS,
(X86xor_flag GR64:$src1, i64immSExt32:$src2))]>;
-} // isTwoAddress
+} // Constraints = "$src1 = $dst"
def XOR64mr : RI<0x31, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src),
"xor{q}\t{$src, $dst|$dst, $src}",
@@ -1366,7 +1357,7 @@ def BTS64mi8 : RIi8<0xBA, MRM5m, (outs), (ins i64mem:$src1, i64i8imm:$src2),
} // Defs = [EFLAGS]
// Conditional moves
-let Uses = [EFLAGS], isTwoAddress = 1 in {
+let Uses = [EFLAGS], Constraints = "$src1 = $dst" in {
let isCommutable = 1 in {
def CMOVB64rr : RI<0x42, MRMSrcReg, // if <u, GR64 = GR64
(outs GR64:$dst), (ins GR64:$src1, GR64:$src2),
@@ -1530,7 +1521,7 @@ def CMOVNO64rm : RI<0x41, MRMSrcMem, // if !overflow, GR64 = [mem64]
"cmovno{q}\t{$src2, $dst|$dst, $src2}",
[(set GR64:$dst, (X86cmov GR64:$src1, (loadi64 addr:$src2),
X86_COND_NO, EFLAGS))]>, TB;
-} // isTwoAddress
+} // Constraints = "$src1 = $dst"
// Use sbb to materialize carry flag into a GPR.
// FIXME: This are pseudo ops that should be replaced with Pat<> patterns.
@@ -1588,7 +1579,7 @@ def CVTSI2SD64rm: RSDI<0x2A, MRMSrcMem, (outs FR64:$dst), (ins i64mem:$src),
"cvtsi2sd{q}\t{$src, $dst|$dst, $src}",
[(set FR64:$dst, (sint_to_fp (loadi64 addr:$src)))]>;
-let isTwoAddress = 1 in {
+let Constraints = "$src1 = $dst" in {
def Int_CVTSI2SD64rr: RSDI<0x2A, MRMSrcReg,
(outs VR128:$dst), (ins VR128:$src1, GR64:$src2),
"cvtsi2sd{q}\t{$src2, $dst|$dst, $src2}",
@@ -1601,7 +1592,7 @@ def Int_CVTSI2SD64rm: RSDI<0x2A, MRMSrcMem,
[(set VR128:$dst,
(int_x86_sse2_cvtsi642sd VR128:$src1,
(loadi64 addr:$src2)))]>;
-} // isTwoAddress
+} // Constraints = "$src1 = $dst"
// Signed i64 -> f32
def CVTSI2SS64rr: RSSI<0x2A, MRMSrcReg, (outs FR32:$dst), (ins GR64:$src),
@@ -1611,7 +1602,7 @@ def CVTSI2SS64rm: RSSI<0x2A, MRMSrcMem, (outs FR32:$dst), (ins i64mem:$src),
"cvtsi2ss{q}\t{$src, $dst|$dst, $src}",
[(set FR32:$dst, (sint_to_fp (loadi64 addr:$src)))]>;
-let isTwoAddress = 1 in {
+let Constraints = "$src1 = $dst" in {
def Int_CVTSI2SS64rr : RSSI<0x2A, MRMSrcReg,
(outs VR128:$dst), (ins VR128:$src1, GR64:$src2),
"cvtsi2ss{q}\t{$src2, $dst|$dst, $src2}",
@@ -1625,7 +1616,7 @@ let isTwoAddress = 1 in {
[(set VR128:$dst,
(int_x86_sse_cvtsi642ss VR128:$src1,
(loadi64 addr:$src2)))]>;
-}
+} // Constraints = "$src1 = $dst"
// f32 -> signed i64
def CVTSS2SI64rr: RSSI<0x2D, MRMSrcReg, (outs GR64:$dst), (ins FR32:$src),
@@ -1691,6 +1682,7 @@ def MOV64ri64i32 : Ii32<0xB8, AddRegFrm, (outs GR64:$dst), (ins i64i32imm:$src),
// Thread Local Storage Instructions
//===----------------------------------------------------------------------===//
+// ELF TLS Support
// All calls clobber the non-callee saved registers. RSP is marked as
// a use to prevent stack-pointer assignments that appear immediately
// before calls from potentially appearing dead.
@@ -1700,7 +1692,7 @@ let Defs = [RAX, RCX, RDX, RSI, RDI, R8, R9, R10, R11,
XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7,
XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15, EFLAGS],
Uses = [RSP] in
-def TLS_addr64 : I<0, Pseudo, (outs), (ins lea64mem:$sym),
+def TLS_addr64 : I<0, Pseudo, (outs), (ins i64mem:$sym),
".byte\t0x66; "
"leaq\t$sym(%rip), %rdi; "
".word\t0x6666; "
@@ -1709,6 +1701,17 @@ def TLS_addr64 : I<0, Pseudo, (outs), (ins lea64mem:$sym),
[(X86tlsaddr tls64addr:$sym)]>,
Requires<[In64BitMode]>;
+// Darwin TLS Support
+// For x86_64, the address of the thunk is passed in %rdi, on return
+// the address of the variable is in %rax. All other registers are preserved.
+let Defs = [RAX],
+ Uses = [RDI],
+ usesCustomInserter = 1 in
+def TLSCall_64 : I<0, Pseudo, (outs), (ins i64mem:$sym),
+ "# TLSCall_64",
+ [(X86TLSCall addr:$sym)]>,
+ Requires<[In64BitMode]>;
+
let AddedComplexity = 5, isCodeGenOnly = 1 in
def MOV64GSrm : RI<0x8B, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src),
"movq\t%gs:$src, $dst",
@@ -1964,6 +1967,17 @@ def : Pat<(X86tcret (i64 texternalsym:$dst), imm:$off),
(TCRETURNdi64 texternalsym:$dst, imm:$off)>,
Requires<[In64BitMode]>;
+// tls has some funny stuff here...
+// This corresponds to movabs $foo@tpoff, %rax
+def : Pat<(i64 (X86Wrapper tglobaltlsaddr :$dst)),
+ (MOV64ri tglobaltlsaddr :$dst)>;
+// This corresponds to add $foo@tpoff, %rax
+def : Pat<(add GR64:$src1, (X86Wrapper tglobaltlsaddr :$dst)),
+ (ADD64ri32 GR64:$src1, tglobaltlsaddr :$dst)>;
+// This corresponds to mov foo@tpoff(%rbx), %eax
+def : Pat<(load (i64 (X86Wrapper tglobaltlsaddr :$dst))),
+ (MOV64rm tglobaltlsaddr :$dst)>;
+
// Comparisons.
// TEST R,R is smaller than CMP R,0
@@ -2332,45 +2346,3 @@ def MOVSDto64mr : RPDI<0x7E, MRMDestMem, (outs), (ins i64mem:$dst, FR64:$src),
"movq\t{$src, $dst|$dst, $src}",
[(store (i64 (bitconvert FR64:$src)), addr:$dst)]>;
-//===----------------------------------------------------------------------===//
-// X86-64 SSE4.1 Instructions
-//===----------------------------------------------------------------------===//
-
-/// SS41I_extract32 - SSE 4.1 extract 32 bits to int reg or memory destination
-multiclass SS41I_extract64<bits<8> opc, string OpcodeStr> {
- def rr : SS4AIi8<opc, MRMDestReg, (outs GR64:$dst),
- (ins VR128:$src1, i32i8imm:$src2),
- !strconcat(OpcodeStr,
- "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- [(set GR64:$dst,
- (extractelt (v2i64 VR128:$src1), imm:$src2))]>, OpSize, REX_W;
- def mr : SS4AIi8<opc, MRMDestMem, (outs),
- (ins i64mem:$dst, VR128:$src1, i32i8imm:$src2),
- !strconcat(OpcodeStr,
- "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- [(store (extractelt (v2i64 VR128:$src1), imm:$src2),
- addr:$dst)]>, OpSize, REX_W;
-}
-
-defm PEXTRQ : SS41I_extract64<0x16, "pextrq">;
-
-let isTwoAddress = 1 in {
- multiclass SS41I_insert64<bits<8> opc, string OpcodeStr> {
- def rr : SS4AIi8<opc, MRMSrcReg, (outs VR128:$dst),
- (ins VR128:$src1, GR64:$src2, i32i8imm:$src3),
- !strconcat(OpcodeStr,
- "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
- [(set VR128:$dst,
- (v2i64 (insertelt VR128:$src1, GR64:$src2, imm:$src3)))]>,
- OpSize, REX_W;
- def rm : SS4AIi8<opc, MRMSrcMem, (outs VR128:$dst),
- (ins VR128:$src1, i64mem:$src2, i32i8imm:$src3),
- !strconcat(OpcodeStr,
- "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
- [(set VR128:$dst,
- (v2i64 (insertelt VR128:$src1, (loadi64 addr:$src2),
- imm:$src3)))]>, OpSize, REX_W;
- }
-}
-
-defm PINSRQ : SS41I_insert64<0x22, "pinsrq">;
diff --git a/lib/Target/X86/X86InstrBuilder.h b/lib/Target/X86/X86InstrBuilder.h
index 5a82a7b1979b..2a6a71dd91a3 100644
--- a/lib/Target/X86/X86InstrBuilder.h
+++ b/lib/Target/X86/X86InstrBuilder.h
@@ -64,19 +64,15 @@ struct X86AddressMode {
///
static inline const MachineInstrBuilder &
addDirectMem(const MachineInstrBuilder &MIB, unsigned Reg) {
- // Because memory references are always represented with four
- // values, this adds: Reg, [1, NoReg, 0] to the instruction.
- return MIB.addReg(Reg).addImm(1).addReg(0).addImm(0);
+ // Because memory references are always represented with five
+ // values, this adds: Reg, 1, NoReg, 0, NoReg to the instruction.
+ return MIB.addReg(Reg).addImm(1).addReg(0).addImm(0).addReg(0);
}
-static inline const MachineInstrBuilder &
-addLeaOffset(const MachineInstrBuilder &MIB, int Offset) {
- return MIB.addImm(1).addReg(0).addImm(Offset);
-}
static inline const MachineInstrBuilder &
addOffset(const MachineInstrBuilder &MIB, int Offset) {
- return addLeaOffset(MIB, Offset).addReg(0);
+ return MIB.addImm(1).addReg(0).addImm(Offset).addReg(0);
}
/// addRegOffset - This function is used to add a memory reference of the form
@@ -89,25 +85,20 @@ addRegOffset(const MachineInstrBuilder &MIB,
return addOffset(MIB.addReg(Reg, getKillRegState(isKill)), Offset);
}
-static inline const MachineInstrBuilder &
-addLeaRegOffset(const MachineInstrBuilder &MIB,
- unsigned Reg, bool isKill, int Offset) {
- return addLeaOffset(MIB.addReg(Reg, getKillRegState(isKill)), Offset);
-}
-
/// addRegReg - This function is used to add a memory reference of the form:
/// [Reg + Reg].
static inline const MachineInstrBuilder &addRegReg(const MachineInstrBuilder &MIB,
unsigned Reg1, bool isKill1,
unsigned Reg2, bool isKill2) {
return MIB.addReg(Reg1, getKillRegState(isKill1)).addImm(1)
- .addReg(Reg2, getKillRegState(isKill2)).addImm(0);
+ .addReg(Reg2, getKillRegState(isKill2)).addImm(0).addReg(0);
}
static inline const MachineInstrBuilder &
-addLeaAddress(const MachineInstrBuilder &MIB, const X86AddressMode &AM) {
- assert (AM.Scale == 1 || AM.Scale == 2 || AM.Scale == 4 || AM.Scale == 8);
-
+addFullAddress(const MachineInstrBuilder &MIB,
+ const X86AddressMode &AM) {
+ assert(AM.Scale == 1 || AM.Scale == 2 || AM.Scale == 4 || AM.Scale == 8);
+
if (AM.BaseType == X86AddressMode::RegBase)
MIB.addReg(AM.Base.Reg);
else if (AM.BaseType == X86AddressMode::FrameIndexBase)
@@ -116,15 +107,11 @@ addLeaAddress(const MachineInstrBuilder &MIB, const X86AddressMode &AM) {
assert (0);
MIB.addImm(AM.Scale).addReg(AM.IndexReg);
if (AM.GV)
- return MIB.addGlobalAddress(AM.GV, AM.Disp, AM.GVOpFlags);
+ MIB.addGlobalAddress(AM.GV, AM.Disp, AM.GVOpFlags);
else
- return MIB.addImm(AM.Disp);
-}
-
-static inline const MachineInstrBuilder &
-addFullAddress(const MachineInstrBuilder &MIB,
- const X86AddressMode &AM) {
- return addLeaAddress(MIB, AM).addReg(0);
+ MIB.addImm(AM.Disp);
+
+ return MIB.addReg(0);
}
/// addFrameReference - This function is used to add a reference to the base of
diff --git a/lib/Target/X86/X86InstrFPStack.td b/lib/Target/X86/X86InstrFPStack.td
index 0aae4a8653b0..da93de988d50 100644
--- a/lib/Target/X86/X86InstrFPStack.td
+++ b/lib/Target/X86/X86InstrFPStack.td
@@ -371,7 +371,7 @@ multiclass FPCMov<PatLeaf cc> {
Requires<[HasCMov]>;
}
-let Uses = [EFLAGS], isTwoAddress = 1 in {
+let Uses = [EFLAGS], Constraints = "$src1 = $dst" in {
defm CMOVB : FPCMov<X86_COND_B>;
defm CMOVBE : FPCMov<X86_COND_BE>;
defm CMOVE : FPCMov<X86_COND_E>;
@@ -380,7 +380,7 @@ defm CMOVNB : FPCMov<X86_COND_AE>;
defm CMOVNBE: FPCMov<X86_COND_A>;
defm CMOVNE : FPCMov<X86_COND_NE>;
defm CMOVNP : FPCMov<X86_COND_NP>;
-}
+} // Uses = [EFLAGS], Constraints = "$src1 = $dst"
let Predicates = [HasCMov] in {
// These are not factored because there's no clean way to pass DA/DB.
@@ -680,19 +680,19 @@ def : Pat<(X86fildflag addr:$src, i64), (ILD_Fp64m64 addr:$src)>;
// FP extensions map onto simple pseudo-value conversions if they are to/from
// the FP stack.
-def : Pat<(f64 (fextend RFP32:$src)), (MOV_Fp3264 RFP32:$src)>,
+def : Pat<(f64 (fextend RFP32:$src)), (COPY_TO_REGCLASS RFP32:$src, RFP64)>,
Requires<[FPStackf32]>;
-def : Pat<(f80 (fextend RFP32:$src)), (MOV_Fp3280 RFP32:$src)>,
+def : Pat<(f80 (fextend RFP32:$src)), (COPY_TO_REGCLASS RFP32:$src, RFP80)>,
Requires<[FPStackf32]>;
-def : Pat<(f80 (fextend RFP64:$src)), (MOV_Fp6480 RFP64:$src)>,
+def : Pat<(f80 (fextend RFP64:$src)), (COPY_TO_REGCLASS RFP64:$src, RFP80)>,
Requires<[FPStackf64]>;
// FP truncations map onto simple pseudo-value conversions if they are to/from
// the FP stack. We have validated that only value-preserving truncations make
// it through isel.
-def : Pat<(f32 (fround RFP64:$src)), (MOV_Fp6432 RFP64:$src)>,
+def : Pat<(f32 (fround RFP64:$src)), (COPY_TO_REGCLASS RFP64:$src, RFP32)>,
Requires<[FPStackf32]>;
-def : Pat<(f32 (fround RFP80:$src)), (MOV_Fp8032 RFP80:$src)>,
+def : Pat<(f32 (fround RFP80:$src)), (COPY_TO_REGCLASS RFP80:$src, RFP32)>,
Requires<[FPStackf32]>;
-def : Pat<(f64 (fround RFP80:$src)), (MOV_Fp8064 RFP80:$src)>,
+def : Pat<(f64 (fround RFP80:$src)), (COPY_TO_REGCLASS RFP80:$src, RFP64)>,
Requires<[FPStackf64]>;
diff --git a/lib/Target/X86/X86InstrFormats.td b/lib/Target/X86/X86InstrFormats.td
index c4522f3fd9e8..97578af499be 100644
--- a/lib/Target/X86/X86InstrFormats.td
+++ b/lib/Target/X86/X86InstrFormats.td
@@ -50,9 +50,10 @@ def NoImm : ImmType<0>;
def Imm8 : ImmType<1>;
def Imm8PCRel : ImmType<2>;
def Imm16 : ImmType<3>;
-def Imm32 : ImmType<4>;
-def Imm32PCRel : ImmType<5>;
-def Imm64 : ImmType<6>;
+def Imm16PCRel : ImmType<4>;
+def Imm32 : ImmType<5>;
+def Imm32PCRel : ImmType<6>;
+def Imm64 : ImmType<7>;
// FPFormat - This specifies what form this FP instruction has. This is used by
// the Floating-Point stackifier pass.
@@ -101,6 +102,10 @@ class XS { bits<4> Prefix = 12; }
class T8 { bits<4> Prefix = 13; }
class TA { bits<4> Prefix = 14; }
class TF { bits<4> Prefix = 15; }
+class VEX { bit hasVEXPrefix = 1; }
+class VEX_W { bit hasVEX_WPrefix = 1; }
+class VEX_4V : VEX { bit hasVEX_4VPrefix = 1; }
+class VEX_I8IMM { bit hasVEX_i8ImmReg = 1; }
class X86Inst<bits<8> opcod, Format f, ImmType i, dag outs, dag ins,
string AsmStr, Domain d = GenericDomain>
@@ -128,6 +133,11 @@ class X86Inst<bits<8> opcod, Format f, ImmType i, dag outs, dag ins,
bit hasLockPrefix = 0; // Does this inst have a 0xF0 prefix?
bits<2> SegOvrBits = 0; // Segment override prefix.
Domain ExeDomain = d;
+ bit hasVEXPrefix = 0; // Does this inst requires a VEX prefix?
+ bit hasVEX_WPrefix = 0; // Does this inst set the VEX_W field?
+ bit hasVEX_4VPrefix = 0; // Does this inst requires the VEX.VVVV field?
+ bit hasVEX_i8ImmReg = 0; // Does this inst requires the last source register
+ // to be encoded in a immediate field?
// TSFlags layout should be kept in sync with X86InstrInfo.h.
let TSFlags{5-0} = FormBits;
@@ -141,6 +151,10 @@ class X86Inst<bits<8> opcod, Format f, ImmType i, dag outs, dag ins,
let TSFlags{21-20} = SegOvrBits;
let TSFlags{23-22} = ExeDomain.Value;
let TSFlags{31-24} = Opcode;
+ let TSFlags{32} = hasVEXPrefix;
+ let TSFlags{33} = hasVEX_WPrefix;
+ let TSFlags{34} = hasVEX_4VPrefix;
+ let TSFlags{35} = hasVEX_i8ImmReg;
}
class I<bits<8> o, Format f, dag outs, dag ins, string asm,
@@ -174,6 +188,13 @@ class Ii32<bits<8> o, Format f, dag outs, dag ins, string asm,
let CodeSize = 3;
}
+class Ii16PCRel<bits<8> o, Format f, dag outs, dag ins, string asm,
+ list<dag> pattern>
+ : X86Inst<o, f, Imm16PCRel, outs, ins, asm> {
+ let Pattern = pattern;
+ let CodeSize = 3;
+}
+
class Ii32PCRel<bits<8> o, Format f, dag outs, dag ins, string asm,
list<dag> pattern>
: X86Inst<o, f, Imm32PCRel, outs, ins, asm> {
@@ -211,11 +232,56 @@ class Iseg32 <bits<8> o, Format f, dag outs, dag ins, string asm,
let CodeSize = 3;
}
+// SI - SSE 1 & 2 scalar instructions
+class SI<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern>
+ : I<o, F, outs, ins, asm, pattern> {
+ let Predicates = !if(hasVEXPrefix /* VEX */, [HasAVX],
+ !if(!eq(Prefix, 12 /* XS */), [HasSSE1], [HasSSE2]));
+
+ // AVX instructions have a 'v' prefix in the mnemonic
+ let AsmString = !if(hasVEXPrefix, !strconcat("v", asm), asm);
+}
+
+// SIi8 - SSE 1 & 2 scalar instructions
+class SIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
+ list<dag> pattern>
+ : Ii8<o, F, outs, ins, asm, pattern> {
+ let Predicates = !if(hasVEXPrefix /* VEX */, [HasAVX],
+ !if(!eq(Prefix, 12 /* XS */), [HasSSE1], [HasSSE2]));
+
+ // AVX instructions have a 'v' prefix in the mnemonic
+ let AsmString = !if(hasVEXPrefix, !strconcat("v", asm), asm);
+}
+
+// PI - SSE 1 & 2 packed instructions
+class PI<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern,
+ Domain d>
+ : I<o, F, outs, ins, asm, pattern, d> {
+ let Predicates = !if(hasVEXPrefix /* VEX */, [HasAVX],
+ !if(hasOpSizePrefix /* OpSize */, [HasSSE2], [HasSSE1]));
+
+ // AVX instructions have a 'v' prefix in the mnemonic
+ let AsmString = !if(hasVEXPrefix, !strconcat("v", asm), asm);
+}
+
+// PIi8 - SSE 1 & 2 packed instructions with immediate
+class PIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
+ list<dag> pattern, Domain d>
+ : Ii8<o, F, outs, ins, asm, pattern, d> {
+ let Predicates = !if(hasVEX_4VPrefix /* VEX */, [HasAVX],
+ !if(hasOpSizePrefix /* OpSize */, [HasSSE2], [HasSSE1]));
+
+ // AVX instructions have a 'v' prefix in the mnemonic
+ let AsmString = !if(hasVEX_4VPrefix, !strconcat("v", asm), asm);
+}
+
// SSE1 Instruction Templates:
//
// SSI - SSE1 instructions with XS prefix.
// PSI - SSE1 instructions with TB prefix.
// PSIi8 - SSE1 instructions with ImmT == Imm8 and TB prefix.
+// VSSI - SSE1 instructions with XS prefix in AVX form.
+// VPSI - SSE1 instructions with TB prefix in AVX form.
class SSI<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern>
: I<o, F, outs, ins, asm, pattern>, XS, Requires<[HasSSE1]>;
@@ -229,6 +295,14 @@ class PSIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern>
: Ii8<o, F, outs, ins, asm, pattern, SSEPackedSingle>, TB,
Requires<[HasSSE1]>;
+class VSSI<bits<8> o, Format F, dag outs, dag ins, string asm,
+ list<dag> pattern>
+ : I<o, F, outs, ins, !strconcat("v", asm), pattern>, XS,
+ Requires<[HasAVX]>;
+class VPSI<bits<8> o, Format F, dag outs, dag ins, string asm,
+ list<dag> pattern>
+ : I<o, F, outs, ins, !strconcat("v", asm), pattern, SSEPackedSingle>,
+ Requires<[HasAVX]>;
// SSE2 Instruction Templates:
//
@@ -237,6 +311,8 @@ class PSIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
// SSDIi8 - SSE2 instructions with ImmT == Imm8 and XS prefix.
// PDI - SSE2 instructions with TB and OpSize prefixes.
// PDIi8 - SSE2 instructions with ImmT == Imm8 and TB and OpSize prefixes.
+// VSDI - SSE2 instructions with XD prefix in AVX form.
+// VPDI - SSE2 instructions with TB and OpSize prefixes in AVX form.
class SDI<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern>
: I<o, F, outs, ins, asm, pattern>, XD, Requires<[HasSSE2]>;
@@ -253,6 +329,14 @@ class PDIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern>
: Ii8<o, F, outs, ins, asm, pattern, SSEPackedDouble>, TB, OpSize,
Requires<[HasSSE2]>;
+class VSDI<bits<8> o, Format F, dag outs, dag ins, string asm,
+ list<dag> pattern>
+ : I<o, F, outs, ins, !strconcat("v", asm), pattern>, XD,
+ Requires<[HasAVX]>;
+class VPDI<bits<8> o, Format F, dag outs, dag ins, string asm,
+ list<dag> pattern>
+ : I<o, F, outs, ins, !strconcat("v", asm), pattern, SSEPackedDouble>,
+ OpSize, Requires<[HasAVX]>;
// SSE3 Instruction Templates:
//
diff --git a/lib/Target/X86/X86InstrFragmentsSIMD.td b/lib/Target/X86/X86InstrFragmentsSIMD.td
index 6b9478dfa002..71c4e8bc147f 100644
--- a/lib/Target/X86/X86InstrFragmentsSIMD.td
+++ b/lib/Target/X86/X86InstrFragmentsSIMD.td
@@ -60,3 +60,339 @@ def mmx_pshufw : PatFrag<(ops node:$lhs, node:$rhs),
(vector_shuffle node:$lhs, node:$rhs), [{
return X86::isPSHUFDMask(cast<ShuffleVectorSDNode>(N));
}], MMX_SHUFFLE_get_shuf_imm>;
+
+//===----------------------------------------------------------------------===//
+// SSE specific DAG Nodes.
+//===----------------------------------------------------------------------===//
+
+def SDTX86FPShiftOp : SDTypeProfile<1, 2, [ SDTCisSameAs<0, 1>,
+ SDTCisFP<0>, SDTCisInt<2> ]>;
+def SDTX86VFCMP : SDTypeProfile<1, 3, [SDTCisInt<0>, SDTCisSameAs<1, 2>,
+ SDTCisFP<1>, SDTCisVT<3, i8>]>;
+
+def X86fmin : SDNode<"X86ISD::FMIN", SDTFPBinOp>;
+def X86fmax : SDNode<"X86ISD::FMAX", SDTFPBinOp>;
+def X86fand : SDNode<"X86ISD::FAND", SDTFPBinOp,
+ [SDNPCommutative, SDNPAssociative]>;
+def X86for : SDNode<"X86ISD::FOR", SDTFPBinOp,
+ [SDNPCommutative, SDNPAssociative]>;
+def X86fxor : SDNode<"X86ISD::FXOR", SDTFPBinOp,
+ [SDNPCommutative, SDNPAssociative]>;
+def X86frsqrt : SDNode<"X86ISD::FRSQRT", SDTFPUnaryOp>;
+def X86frcp : SDNode<"X86ISD::FRCP", SDTFPUnaryOp>;
+def X86fsrl : SDNode<"X86ISD::FSRL", SDTX86FPShiftOp>;
+def X86comi : SDNode<"X86ISD::COMI", SDTX86CmpTest>;
+def X86ucomi : SDNode<"X86ISD::UCOMI", SDTX86CmpTest>;
+def X86pshufb : SDNode<"X86ISD::PSHUFB",
+ SDTypeProfile<1, 2, [SDTCisVT<0, v16i8>, SDTCisSameAs<0,1>,
+ SDTCisSameAs<0,2>]>>;
+def X86pextrb : SDNode<"X86ISD::PEXTRB",
+ SDTypeProfile<1, 2, [SDTCisVT<0, i32>, SDTCisPtrTy<2>]>>;
+def X86pextrw : SDNode<"X86ISD::PEXTRW",
+ SDTypeProfile<1, 2, [SDTCisVT<0, i32>, SDTCisPtrTy<2>]>>;
+def X86pinsrb : SDNode<"X86ISD::PINSRB",
+ SDTypeProfile<1, 3, [SDTCisVT<0, v16i8>, SDTCisSameAs<0,1>,
+ SDTCisVT<2, i32>, SDTCisPtrTy<3>]>>;
+def X86pinsrw : SDNode<"X86ISD::PINSRW",
+ SDTypeProfile<1, 3, [SDTCisVT<0, v8i16>, SDTCisSameAs<0,1>,
+ SDTCisVT<2, i32>, SDTCisPtrTy<3>]>>;
+def X86insrtps : SDNode<"X86ISD::INSERTPS",
+ SDTypeProfile<1, 3, [SDTCisVT<0, v4f32>, SDTCisSameAs<0,1>,
+ SDTCisVT<2, v4f32>, SDTCisPtrTy<3>]>>;
+def X86vzmovl : SDNode<"X86ISD::VZEXT_MOVL",
+ SDTypeProfile<1, 1, [SDTCisSameAs<0,1>]>>;
+def X86vzload : SDNode<"X86ISD::VZEXT_LOAD", SDTLoad,
+ [SDNPHasChain, SDNPMayLoad]>;
+def X86vshl : SDNode<"X86ISD::VSHL", SDTIntShiftOp>;
+def X86vshr : SDNode<"X86ISD::VSRL", SDTIntShiftOp>;
+def X86cmpps : SDNode<"X86ISD::CMPPS", SDTX86VFCMP>;
+def X86cmppd : SDNode<"X86ISD::CMPPD", SDTX86VFCMP>;
+def X86pcmpeqb : SDNode<"X86ISD::PCMPEQB", SDTIntBinOp, [SDNPCommutative]>;
+def X86pcmpeqw : SDNode<"X86ISD::PCMPEQW", SDTIntBinOp, [SDNPCommutative]>;
+def X86pcmpeqd : SDNode<"X86ISD::PCMPEQD", SDTIntBinOp, [SDNPCommutative]>;
+def X86pcmpeqq : SDNode<"X86ISD::PCMPEQQ", SDTIntBinOp, [SDNPCommutative]>;
+def X86pcmpgtb : SDNode<"X86ISD::PCMPGTB", SDTIntBinOp>;
+def X86pcmpgtw : SDNode<"X86ISD::PCMPGTW", SDTIntBinOp>;
+def X86pcmpgtd : SDNode<"X86ISD::PCMPGTD", SDTIntBinOp>;
+def X86pcmpgtq : SDNode<"X86ISD::PCMPGTQ", SDTIntBinOp>;
+
+def SDTX86CmpPTest : SDTypeProfile<1, 2, [SDTCisVT<0, i32>,
+ SDTCisVT<1, v4f32>,
+ SDTCisVT<2, v4f32>]>;
+def X86ptest : SDNode<"X86ISD::PTEST", SDTX86CmpPTest>;
+
+//===----------------------------------------------------------------------===//
+// SSE Complex Patterns
+//===----------------------------------------------------------------------===//
+
+// These are 'extloads' from a scalar to the low element of a vector, zeroing
+// the top elements. These are used for the SSE 'ss' and 'sd' instruction
+// forms.
+def sse_load_f32 : ComplexPattern<v4f32, 5, "SelectScalarSSELoad", [],
+ [SDNPHasChain, SDNPMayLoad]>;
+def sse_load_f64 : ComplexPattern<v2f64, 5, "SelectScalarSSELoad", [],
+ [SDNPHasChain, SDNPMayLoad]>;
+
+def ssmem : Operand<v4f32> {
+ let PrintMethod = "printf32mem";
+ let MIOperandInfo = (ops ptr_rc, i8imm, ptr_rc_nosp, i32imm, i8imm);
+ let ParserMatchClass = X86MemAsmOperand;
+}
+def sdmem : Operand<v2f64> {
+ let PrintMethod = "printf64mem";
+ let MIOperandInfo = (ops ptr_rc, i8imm, ptr_rc_nosp, i32imm, i8imm);
+ let ParserMatchClass = X86MemAsmOperand;
+}
+
+//===----------------------------------------------------------------------===//
+// SSE pattern fragments
+//===----------------------------------------------------------------------===//
+
+def loadv4f32 : PatFrag<(ops node:$ptr), (v4f32 (load node:$ptr))>;
+def loadv2f64 : PatFrag<(ops node:$ptr), (v2f64 (load node:$ptr))>;
+def loadv4i32 : PatFrag<(ops node:$ptr), (v4i32 (load node:$ptr))>;
+def loadv2i64 : PatFrag<(ops node:$ptr), (v2i64 (load node:$ptr))>;
+
+// FIXME: move this to a more appropriate place after all AVX is done.
+def loadv8f32 : PatFrag<(ops node:$ptr), (v8f32 (load node:$ptr))>;
+def loadv4f64 : PatFrag<(ops node:$ptr), (v4f64 (load node:$ptr))>;
+def loadv8i32 : PatFrag<(ops node:$ptr), (v8i32 (load node:$ptr))>;
+def loadv4i64 : PatFrag<(ops node:$ptr), (v4i64 (load node:$ptr))>;
+
+// Like 'store', but always requires vector alignment.
+def alignedstore : PatFrag<(ops node:$val, node:$ptr),
+ (store node:$val, node:$ptr), [{
+ return cast<StoreSDNode>(N)->getAlignment() >= 16;
+}]>;
+
+// Like 'load', but always requires vector alignment.
+def alignedload : PatFrag<(ops node:$ptr), (load node:$ptr), [{
+ return cast<LoadSDNode>(N)->getAlignment() >= 16;
+}]>;
+
+def alignedloadfsf32 : PatFrag<(ops node:$ptr),
+ (f32 (alignedload node:$ptr))>;
+def alignedloadfsf64 : PatFrag<(ops node:$ptr),
+ (f64 (alignedload node:$ptr))>;
+def alignedloadv4f32 : PatFrag<(ops node:$ptr),
+ (v4f32 (alignedload node:$ptr))>;
+def alignedloadv2f64 : PatFrag<(ops node:$ptr),
+ (v2f64 (alignedload node:$ptr))>;
+def alignedloadv4i32 : PatFrag<(ops node:$ptr),
+ (v4i32 (alignedload node:$ptr))>;
+def alignedloadv2i64 : PatFrag<(ops node:$ptr),
+ (v2i64 (alignedload node:$ptr))>;
+
+// FIXME: move this to a more appropriate place after all AVX is done.
+def alignedloadv8f32 : PatFrag<(ops node:$ptr),
+ (v8f32 (alignedload node:$ptr))>;
+def alignedloadv4f64 : PatFrag<(ops node:$ptr),
+ (v4f64 (alignedload node:$ptr))>;
+def alignedloadv8i32 : PatFrag<(ops node:$ptr),
+ (v8i32 (alignedload node:$ptr))>;
+def alignedloadv4i64 : PatFrag<(ops node:$ptr),
+ (v4i64 (alignedload node:$ptr))>;
+
+// Like 'load', but uses special alignment checks suitable for use in
+// memory operands in most SSE instructions, which are required to
+// be naturally aligned on some targets but not on others. If the subtarget
+// allows unaligned accesses, match any load, though this may require
+// setting a feature bit in the processor (on startup, for example).
+// Opteron 10h and later implement such a feature.
+def memop : PatFrag<(ops node:$ptr), (load node:$ptr), [{
+ return Subtarget->hasVectorUAMem()
+ || cast<LoadSDNode>(N)->getAlignment() >= 16;
+}]>;
+
+def memopfsf32 : PatFrag<(ops node:$ptr), (f32 (memop node:$ptr))>;
+def memopfsf64 : PatFrag<(ops node:$ptr), (f64 (memop node:$ptr))>;
+def memopv4f32 : PatFrag<(ops node:$ptr), (v4f32 (memop node:$ptr))>;
+def memopv2f64 : PatFrag<(ops node:$ptr), (v2f64 (memop node:$ptr))>;
+def memopv4i32 : PatFrag<(ops node:$ptr), (v4i32 (memop node:$ptr))>;
+def memopv2i64 : PatFrag<(ops node:$ptr), (v2i64 (memop node:$ptr))>;
+def memopv16i8 : PatFrag<(ops node:$ptr), (v16i8 (memop node:$ptr))>;
+
+// FIXME: move this to a more appropriate place after all AVX is done.
+def memopv8f32 : PatFrag<(ops node:$ptr), (v8f32 (memop node:$ptr))>;
+def memopv4f64 : PatFrag<(ops node:$ptr), (v4f64 (memop node:$ptr))>;
+
+// SSSE3 uses MMX registers for some instructions. They aren't aligned on a
+// 16-byte boundary.
+// FIXME: 8 byte alignment for mmx reads is not required
+def memop64 : PatFrag<(ops node:$ptr), (load node:$ptr), [{
+ return cast<LoadSDNode>(N)->getAlignment() >= 8;
+}]>;
+
+def memopv8i8 : PatFrag<(ops node:$ptr), (v8i8 (memop64 node:$ptr))>;
+def memopv4i16 : PatFrag<(ops node:$ptr), (v4i16 (memop64 node:$ptr))>;
+def memopv8i16 : PatFrag<(ops node:$ptr), (v8i16 (memop64 node:$ptr))>;
+def memopv2i32 : PatFrag<(ops node:$ptr), (v2i32 (memop64 node:$ptr))>;
+
+// MOVNT Support
+// Like 'store', but requires the non-temporal bit to be set
+def nontemporalstore : PatFrag<(ops node:$val, node:$ptr),
+ (st node:$val, node:$ptr), [{
+ if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N))
+ return ST->isNonTemporal();
+ return false;
+}]>;
+
+def alignednontemporalstore : PatFrag<(ops node:$val, node:$ptr),
+ (st node:$val, node:$ptr), [{
+ if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N))
+ return ST->isNonTemporal() && !ST->isTruncatingStore() &&
+ ST->getAddressingMode() == ISD::UNINDEXED &&
+ ST->getAlignment() >= 16;
+ return false;
+}]>;
+
+def unalignednontemporalstore : PatFrag<(ops node:$val, node:$ptr),
+ (st node:$val, node:$ptr), [{
+ if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N))
+ return ST->isNonTemporal() &&
+ ST->getAlignment() < 16;
+ return false;
+}]>;
+
+def bc_v4f32 : PatFrag<(ops node:$in), (v4f32 (bitconvert node:$in))>;
+def bc_v2f64 : PatFrag<(ops node:$in), (v2f64 (bitconvert node:$in))>;
+def bc_v16i8 : PatFrag<(ops node:$in), (v16i8 (bitconvert node:$in))>;
+def bc_v8i16 : PatFrag<(ops node:$in), (v8i16 (bitconvert node:$in))>;
+def bc_v4i32 : PatFrag<(ops node:$in), (v4i32 (bitconvert node:$in))>;
+def bc_v2i64 : PatFrag<(ops node:$in), (v2i64 (bitconvert node:$in))>;
+
+def vzmovl_v2i64 : PatFrag<(ops node:$src),
+ (bitconvert (v2i64 (X86vzmovl
+ (v2i64 (scalar_to_vector (loadi64 node:$src))))))>;
+def vzmovl_v4i32 : PatFrag<(ops node:$src),
+ (bitconvert (v4i32 (X86vzmovl
+ (v4i32 (scalar_to_vector (loadi32 node:$src))))))>;
+
+def vzload_v2i64 : PatFrag<(ops node:$src),
+ (bitconvert (v2i64 (X86vzload node:$src)))>;
+
+
+def fp32imm0 : PatLeaf<(f32 fpimm), [{
+ return N->isExactlyValue(+0.0);
+}]>;
+
+// BYTE_imm - Transform bit immediates into byte immediates.
+def BYTE_imm : SDNodeXForm<imm, [{
+ // Transformation function: imm >> 3
+ return getI32Imm(N->getZExtValue() >> 3);
+}]>;
+
+// SHUFFLE_get_shuf_imm xform function: convert vector_shuffle mask to PSHUF*,
+// SHUFP* etc. imm.
+def SHUFFLE_get_shuf_imm : SDNodeXForm<vector_shuffle, [{
+ return getI8Imm(X86::getShuffleSHUFImmediate(N));
+}]>;
+
+// SHUFFLE_get_pshufhw_imm xform function: convert vector_shuffle mask to
+// PSHUFHW imm.
+def SHUFFLE_get_pshufhw_imm : SDNodeXForm<vector_shuffle, [{
+ return getI8Imm(X86::getShufflePSHUFHWImmediate(N));
+}]>;
+
+// SHUFFLE_get_pshuflw_imm xform function: convert vector_shuffle mask to
+// PSHUFLW imm.
+def SHUFFLE_get_pshuflw_imm : SDNodeXForm<vector_shuffle, [{
+ return getI8Imm(X86::getShufflePSHUFLWImmediate(N));
+}]>;
+
+// SHUFFLE_get_palign_imm xform function: convert vector_shuffle mask to
+// a PALIGNR imm.
+def SHUFFLE_get_palign_imm : SDNodeXForm<vector_shuffle, [{
+ return getI8Imm(X86::getShufflePALIGNRImmediate(N));
+}]>;
+
+def splat_lo : PatFrag<(ops node:$lhs, node:$rhs),
+ (vector_shuffle node:$lhs, node:$rhs), [{
+ ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
+ return SVOp->isSplat() && SVOp->getSplatIndex() == 0;
+}]>;
+
+def movddup : PatFrag<(ops node:$lhs, node:$rhs),
+ (vector_shuffle node:$lhs, node:$rhs), [{
+ return X86::isMOVDDUPMask(cast<ShuffleVectorSDNode>(N));
+}]>;
+
+def movhlps : PatFrag<(ops node:$lhs, node:$rhs),
+ (vector_shuffle node:$lhs, node:$rhs), [{
+ return X86::isMOVHLPSMask(cast<ShuffleVectorSDNode>(N));
+}]>;
+
+def movhlps_undef : PatFrag<(ops node:$lhs, node:$rhs),
+ (vector_shuffle node:$lhs, node:$rhs), [{
+ return X86::isMOVHLPS_v_undef_Mask(cast<ShuffleVectorSDNode>(N));
+}]>;
+
+def movlhps : PatFrag<(ops node:$lhs, node:$rhs),
+ (vector_shuffle node:$lhs, node:$rhs), [{
+ return X86::isMOVLHPSMask(cast<ShuffleVectorSDNode>(N));
+}]>;
+
+def movlp : PatFrag<(ops node:$lhs, node:$rhs),
+ (vector_shuffle node:$lhs, node:$rhs), [{
+ return X86::isMOVLPMask(cast<ShuffleVectorSDNode>(N));
+}]>;
+
+def movl : PatFrag<(ops node:$lhs, node:$rhs),
+ (vector_shuffle node:$lhs, node:$rhs), [{
+ return X86::isMOVLMask(cast<ShuffleVectorSDNode>(N));
+}]>;
+
+def movshdup : PatFrag<(ops node:$lhs, node:$rhs),
+ (vector_shuffle node:$lhs, node:$rhs), [{
+ return X86::isMOVSHDUPMask(cast<ShuffleVectorSDNode>(N));
+}]>;
+
+def movsldup : PatFrag<(ops node:$lhs, node:$rhs),
+ (vector_shuffle node:$lhs, node:$rhs), [{
+ return X86::isMOVSLDUPMask(cast<ShuffleVectorSDNode>(N));
+}]>;
+
+def unpckl : PatFrag<(ops node:$lhs, node:$rhs),
+ (vector_shuffle node:$lhs, node:$rhs), [{
+ return X86::isUNPCKLMask(cast<ShuffleVectorSDNode>(N));
+}]>;
+
+def unpckh : PatFrag<(ops node:$lhs, node:$rhs),
+ (vector_shuffle node:$lhs, node:$rhs), [{
+ return X86::isUNPCKHMask(cast<ShuffleVectorSDNode>(N));
+}]>;
+
+def unpckl_undef : PatFrag<(ops node:$lhs, node:$rhs),
+ (vector_shuffle node:$lhs, node:$rhs), [{
+ return X86::isUNPCKL_v_undef_Mask(cast<ShuffleVectorSDNode>(N));
+}]>;
+
+def unpckh_undef : PatFrag<(ops node:$lhs, node:$rhs),
+ (vector_shuffle node:$lhs, node:$rhs), [{
+ return X86::isUNPCKH_v_undef_Mask(cast<ShuffleVectorSDNode>(N));
+}]>;
+
+def pshufd : PatFrag<(ops node:$lhs, node:$rhs),
+ (vector_shuffle node:$lhs, node:$rhs), [{
+ return X86::isPSHUFDMask(cast<ShuffleVectorSDNode>(N));
+}], SHUFFLE_get_shuf_imm>;
+
+def shufp : PatFrag<(ops node:$lhs, node:$rhs),
+ (vector_shuffle node:$lhs, node:$rhs), [{
+ return X86::isSHUFPMask(cast<ShuffleVectorSDNode>(N));
+}], SHUFFLE_get_shuf_imm>;
+
+def pshufhw : PatFrag<(ops node:$lhs, node:$rhs),
+ (vector_shuffle node:$lhs, node:$rhs), [{
+ return X86::isPSHUFHWMask(cast<ShuffleVectorSDNode>(N));
+}], SHUFFLE_get_pshufhw_imm>;
+
+def pshuflw : PatFrag<(ops node:$lhs, node:$rhs),
+ (vector_shuffle node:$lhs, node:$rhs), [{
+ return X86::isPSHUFLWMask(cast<ShuffleVectorSDNode>(N));
+}], SHUFFLE_get_pshuflw_imm>;
+
+def palign : PatFrag<(ops node:$lhs, node:$rhs),
+ (vector_shuffle node:$lhs, node:$rhs), [{
+ return X86::isPALIGNRMask(cast<ShuffleVectorSDNode>(N));
+}], SHUFFLE_get_palign_imm>;
diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp
index 34e12cade236..ce471eadd78b 100644
--- a/lib/Target/X86/X86InstrInfo.cpp
+++ b/lib/Target/X86/X86InstrInfo.cpp
@@ -784,7 +784,9 @@ static bool isFrameLoadOpcode(int Opcode) {
case X86::MOV8rm:
case X86::MOV16rm:
case X86::MOV32rm:
+ case X86::MOV32rm_TC:
case X86::MOV64rm:
+ case X86::MOV64rm_TC:
case X86::LD_Fp64m:
case X86::MOVSSrm:
case X86::MOVSDrm:
@@ -805,7 +807,9 @@ static bool isFrameStoreOpcode(int Opcode) {
case X86::MOV8mr:
case X86::MOV16mr:
case X86::MOV32mr:
+ case X86::MOV32mr_TC:
case X86::MOV64mr:
+ case X86::MOV64mr_TC:
case X86::ST_FpP64m:
case X86::MOVSSmr:
case X86::MOVSDmr:
@@ -863,7 +867,7 @@ unsigned X86InstrInfo::isStoreToStackSlot(const MachineInstr *MI,
int &FrameIndex) const {
if (isFrameStoreOpcode(MI->getOpcode()))
if (isFrameOperand(MI, 0, FrameIndex))
- return MI->getOperand(X86AddrNumOperands).getReg();
+ return MI->getOperand(X86::AddrNumOperands).getReg();
return 0;
}
@@ -1064,14 +1068,9 @@ void X86InstrInfo::reMaterialize(MachineBasicBlock &MBB,
MachineBasicBlock::iterator I,
unsigned DestReg, unsigned SubIdx,
const MachineInstr *Orig,
- const TargetRegisterInfo *TRI) const {
+ const TargetRegisterInfo &TRI) const {
DebugLoc DL = Orig->getDebugLoc();
- if (SubIdx && TargetRegisterInfo::isPhysicalRegister(DestReg)) {
- DestReg = TRI->getSubReg(DestReg, SubIdx);
- SubIdx = 0;
- }
-
// MOV32r0 etc. are implemented with xor which clobbers condition code.
// Re-materialize them as movri instructions to avoid side effects.
bool Clone = true;
@@ -1098,14 +1097,13 @@ void X86InstrInfo::reMaterialize(MachineBasicBlock &MBB,
if (Clone) {
MachineInstr *MI = MBB.getParent()->CloneMachineInstr(Orig);
- MI->getOperand(0).setReg(DestReg);
MBB.insert(I, MI);
} else {
- BuildMI(MBB, I, DL, get(Opc), DestReg).addImm(0);
+ BuildMI(MBB, I, DL, get(Opc)).addOperand(Orig->getOperand(0)).addImm(0);
}
MachineInstr *NewMI = prior(I);
- NewMI->getOperand(0).setSubReg(SubIdx);
+ NewMI->substituteRegister(Orig->getOperand(0).getReg(), DestReg, SubIdx, TRI);
}
/// hasLiveCondCodeDef - True if MI has a condition code def, e.g. EFLAGS, that
@@ -1151,10 +1149,9 @@ X86InstrInfo::convertToThreeAddressWithLEA(unsigned MIOpc,
// least on modern x86 machines).
BuildMI(*MFI, MBBI, MI->getDebugLoc(), get(X86::IMPLICIT_DEF), leaInReg);
MachineInstr *InsMI =
- BuildMI(*MFI, MBBI, MI->getDebugLoc(), get(X86::INSERT_SUBREG),leaInReg)
- .addReg(leaInReg)
- .addReg(Src, getKillRegState(isKill))
- .addImm(X86::sub_16bit);
+ BuildMI(*MFI, MBBI, MI->getDebugLoc(), get(TargetOpcode::COPY))
+ .addReg(leaInReg, RegState::Define, X86::sub_16bit)
+ .addReg(Src, getKillRegState(isKill));
MachineInstrBuilder MIB = BuildMI(*MFI, MBBI, MI->getDebugLoc(),
get(Opc), leaOutReg);
@@ -1165,20 +1162,20 @@ X86InstrInfo::convertToThreeAddressWithLEA(unsigned MIOpc,
case X86::SHL16ri: {
unsigned ShAmt = MI->getOperand(2).getImm();
MIB.addReg(0).addImm(1 << ShAmt)
- .addReg(leaInReg, RegState::Kill).addImm(0);
+ .addReg(leaInReg, RegState::Kill).addImm(0).addReg(0);
break;
}
case X86::INC16r:
case X86::INC64_16r:
- addLeaRegOffset(MIB, leaInReg, true, 1);
+ addRegOffset(MIB, leaInReg, true, 1);
break;
case X86::DEC16r:
case X86::DEC64_16r:
- addLeaRegOffset(MIB, leaInReg, true, -1);
+ addRegOffset(MIB, leaInReg, true, -1);
break;
case X86::ADD16ri:
case X86::ADD16ri8:
- addLeaRegOffset(MIB, leaInReg, true, MI->getOperand(2).getImm());
+ addRegOffset(MIB, leaInReg, true, MI->getOperand(2).getImm());
break;
case X86::ADD16rr: {
unsigned Src2 = MI->getOperand(2).getReg();
@@ -1195,10 +1192,9 @@ X86InstrInfo::convertToThreeAddressWithLEA(unsigned MIOpc,
// well be shifting and then extracting the lower 16-bits.
BuildMI(*MFI, MIB, MI->getDebugLoc(), get(X86::IMPLICIT_DEF), leaInReg2);
InsMI2 =
- BuildMI(*MFI, MIB, MI->getDebugLoc(), get(X86::INSERT_SUBREG),leaInReg2)
- .addReg(leaInReg2)
- .addReg(Src2, getKillRegState(isKill2))
- .addImm(X86::sub_16bit);
+ BuildMI(*MFI, MIB, MI->getDebugLoc(), get(TargetOpcode::COPY))
+ .addReg(leaInReg2, RegState::Define, X86::sub_16bit)
+ .addReg(Src2, getKillRegState(isKill2));
addRegReg(MIB, leaInReg, true, leaInReg2, true);
}
if (LV && isKill2 && InsMI2)
@@ -1209,10 +1205,9 @@ X86InstrInfo::convertToThreeAddressWithLEA(unsigned MIOpc,
MachineInstr *NewMI = MIB;
MachineInstr *ExtMI =
- BuildMI(*MFI, MBBI, MI->getDebugLoc(), get(X86::EXTRACT_SUBREG))
+ BuildMI(*MFI, MBBI, MI->getDebugLoc(), get(TargetOpcode::COPY))
.addReg(Dest, RegState::Define | getDeadRegState(isDead))
- .addReg(leaOutReg, RegState::Kill)
- .addImm(X86::sub_16bit);
+ .addReg(leaOutReg, RegState::Kill, X86::sub_16bit);
if (LV) {
// Update live variables
@@ -1283,7 +1278,7 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
.addReg(Dest, RegState::Define | getDeadRegState(isDead))
.addReg(0).addImm(1 << ShAmt)
.addReg(Src, getKillRegState(isKill))
- .addImm(0);
+ .addImm(0).addReg(0);
break;
}
case X86::SHL32ri: {
@@ -1297,7 +1292,7 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
NewMI = BuildMI(MF, MI->getDebugLoc(), get(Opc))
.addReg(Dest, RegState::Define | getDeadRegState(isDead))
.addReg(0).addImm(1 << ShAmt)
- .addReg(Src, getKillRegState(isKill)).addImm(0);
+ .addReg(Src, getKillRegState(isKill)).addImm(0).addReg(0);
break;
}
case X86::SHL16ri: {
@@ -1313,7 +1308,7 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
.addReg(Dest, RegState::Define | getDeadRegState(isDead))
.addReg(0).addImm(1 << ShAmt)
.addReg(Src, getKillRegState(isKill))
- .addImm(0);
+ .addImm(0).addReg(0);
break;
}
default: {
@@ -1331,7 +1326,7 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
assert(MI->getNumOperands() >= 2 && "Unknown inc instruction!");
unsigned Opc = MIOpc == X86::INC64r ? X86::LEA64r
: (is64Bit ? X86::LEA64_32r : X86::LEA32r);
- NewMI = addLeaRegOffset(BuildMI(MF, MI->getDebugLoc(), get(Opc))
+ NewMI = addRegOffset(BuildMI(MF, MI->getDebugLoc(), get(Opc))
.addReg(Dest, RegState::Define |
getDeadRegState(isDead)),
Src, isKill, 1);
@@ -1353,7 +1348,7 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
assert(MI->getNumOperands() >= 2 && "Unknown dec instruction!");
unsigned Opc = MIOpc == X86::DEC64r ? X86::LEA64r
: (is64Bit ? X86::LEA64_32r : X86::LEA32r);
- NewMI = addLeaRegOffset(BuildMI(MF, MI->getDebugLoc(), get(Opc))
+ NewMI = addRegOffset(BuildMI(MF, MI->getDebugLoc(), get(Opc))
.addReg(Dest, RegState::Define |
getDeadRegState(isDead)),
Src, isKill, -1);
@@ -1401,7 +1396,7 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
case X86::ADD64ri32:
case X86::ADD64ri8:
assert(MI->getNumOperands() >= 3 && "Unknown add instruction!");
- NewMI = addLeaRegOffset(BuildMI(MF, MI->getDebugLoc(), get(X86::LEA64r))
+ NewMI = addRegOffset(BuildMI(MF, MI->getDebugLoc(), get(X86::LEA64r))
.addReg(Dest, RegState::Define |
getDeadRegState(isDead)),
Src, isKill, MI->getOperand(2).getImm());
@@ -1410,7 +1405,7 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
case X86::ADD32ri8: {
assert(MI->getNumOperands() >= 3 && "Unknown add instruction!");
unsigned Opc = is64Bit ? X86::LEA64_32r : X86::LEA32r;
- NewMI = addLeaRegOffset(BuildMI(MF, MI->getDebugLoc(), get(Opc))
+ NewMI = addRegOffset(BuildMI(MF, MI->getDebugLoc(), get(Opc))
.addReg(Dest, RegState::Define |
getDeadRegState(isDead)),
Src, isKill, MI->getOperand(2).getImm());
@@ -1421,7 +1416,7 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
if (DisableLEA16)
return is64Bit ? convertToThreeAddressWithLEA(MIOpc, MFI, MBBI, LV) : 0;
assert(MI->getNumOperands() >= 3 && "Unknown add instruction!");
- NewMI = addLeaRegOffset(BuildMI(MF, MI->getDebugLoc(), get(X86::LEA16r))
+ NewMI = addRegOffset(BuildMI(MF, MI->getDebugLoc(), get(X86::LEA16r))
.addReg(Dest, RegState::Define |
getDeadRegState(isDead)),
Src, isKill, MI->getOperand(2).getImm());
@@ -1845,9 +1840,8 @@ unsigned X86InstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
unsigned
X86InstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
MachineBasicBlock *FBB,
- const SmallVectorImpl<MachineOperand> &Cond) const {
- // FIXME this should probably have a DebugLoc operand
- DebugLoc dl;
+ const SmallVectorImpl<MachineOperand> &Cond,
+ DebugLoc DL) const {
// Shouldn't be a fall through.
assert(TBB && "InsertBranch must not be told to insert a fallthrough");
assert((Cond.size() == 1 || Cond.size() == 0) &&
@@ -1856,7 +1850,7 @@ X86InstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
if (Cond.empty()) {
// Unconditional branch?
assert(!FBB && "Unconditional branch with multiple successors!");
- BuildMI(&MBB, dl, get(X86::JMP_4)).addMBB(TBB);
+ BuildMI(&MBB, DL, get(X86::JMP_4)).addMBB(TBB);
return 1;
}
@@ -1866,27 +1860,27 @@ X86InstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
switch (CC) {
case X86::COND_NP_OR_E:
// Synthesize NP_OR_E with two branches.
- BuildMI(&MBB, dl, get(X86::JNP_4)).addMBB(TBB);
+ BuildMI(&MBB, DL, get(X86::JNP_4)).addMBB(TBB);
++Count;
- BuildMI(&MBB, dl, get(X86::JE_4)).addMBB(TBB);
+ BuildMI(&MBB, DL, get(X86::JE_4)).addMBB(TBB);
++Count;
break;
case X86::COND_NE_OR_P:
// Synthesize NE_OR_P with two branches.
- BuildMI(&MBB, dl, get(X86::JNE_4)).addMBB(TBB);
+ BuildMI(&MBB, DL, get(X86::JNE_4)).addMBB(TBB);
++Count;
- BuildMI(&MBB, dl, get(X86::JP_4)).addMBB(TBB);
+ BuildMI(&MBB, DL, get(X86::JP_4)).addMBB(TBB);
++Count;
break;
default: {
unsigned Opc = GetCondBranchFromCond(CC);
- BuildMI(&MBB, dl, get(Opc)).addMBB(TBB);
+ BuildMI(&MBB, DL, get(Opc)).addMBB(TBB);
++Count;
}
}
if (FBB) {
// Two-way Conditional branch. Insert the second branch.
- BuildMI(&MBB, dl, get(X86::JMP_4)).addMBB(FBB);
+ BuildMI(&MBB, DL, get(X86::JMP_4)).addMBB(FBB);
++Count;
}
return Count;
@@ -1897,237 +1891,153 @@ static bool isHReg(unsigned Reg) {
return X86::GR8_ABCD_HRegClass.contains(Reg);
}
-bool X86InstrInfo::copyRegToReg(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MI,
- unsigned DestReg, unsigned SrcReg,
- const TargetRegisterClass *DestRC,
- const TargetRegisterClass *SrcRC,
- DebugLoc DL) const {
-
- // Determine if DstRC and SrcRC have a common superclass in common.
- const TargetRegisterClass *CommonRC = DestRC;
- if (DestRC == SrcRC)
- /* Source and destination have the same register class. */;
- else if (CommonRC->hasSuperClass(SrcRC))
- CommonRC = SrcRC;
- else if (!DestRC->hasSubClass(SrcRC)) {
- // Neither of GR64_NOREX or GR64_NOSP is a superclass of the other,
- // but we want to copy them as GR64. Similarly, for GR32_NOREX and
- // GR32_NOSP, copy as GR32.
- if (SrcRC->hasSuperClass(&X86::GR64RegClass) &&
- DestRC->hasSuperClass(&X86::GR64RegClass))
- CommonRC = &X86::GR64RegClass;
- else if (SrcRC->hasSuperClass(&X86::GR32RegClass) &&
- DestRC->hasSuperClass(&X86::GR32RegClass))
- CommonRC = &X86::GR32RegClass;
+void X86InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI, DebugLoc DL,
+ unsigned DestReg, unsigned SrcReg,
+ bool KillSrc) const {
+ // First deal with the normal symmetric copies.
+ unsigned Opc = 0;
+ if (X86::GR64RegClass.contains(DestReg, SrcReg))
+ Opc = X86::MOV64rr;
+ else if (X86::GR32RegClass.contains(DestReg, SrcReg))
+ Opc = X86::MOV32rr;
+ else if (X86::GR16RegClass.contains(DestReg, SrcReg))
+ Opc = X86::MOV16rr;
+ else if (X86::GR8RegClass.contains(DestReg, SrcReg)) {
+ // Copying to or from a physical H register on x86-64 requires a NOREX
+ // move. Otherwise use a normal move.
+ if ((isHReg(DestReg) || isHReg(SrcReg)) &&
+ TM.getSubtarget<X86Subtarget>().is64Bit())
+ Opc = X86::MOV8rr_NOREX;
else
- CommonRC = 0;
- }
-
- if (CommonRC) {
- unsigned Opc;
- if (CommonRC == &X86::GR64RegClass || CommonRC == &X86::GR64_NOSPRegClass) {
- Opc = X86::MOV64rr;
- } else if (CommonRC == &X86::GR32RegClass ||
- CommonRC == &X86::GR32_NOSPRegClass) {
- Opc = X86::MOV32rr;
- } else if (CommonRC == &X86::GR16RegClass) {
- Opc = X86::MOV16rr;
- } else if (CommonRC == &X86::GR8RegClass) {
- // Copying to or from a physical H register on x86-64 requires a NOREX
- // move. Otherwise use a normal move.
- if ((isHReg(DestReg) || isHReg(SrcReg)) &&
- TM.getSubtarget<X86Subtarget>().is64Bit())
- Opc = X86::MOV8rr_NOREX;
- else
- Opc = X86::MOV8rr;
- } else if (CommonRC == &X86::GR64_ABCDRegClass) {
- Opc = X86::MOV64rr;
- } else if (CommonRC == &X86::GR32_ABCDRegClass) {
- Opc = X86::MOV32rr;
- } else if (CommonRC == &X86::GR16_ABCDRegClass) {
- Opc = X86::MOV16rr;
- } else if (CommonRC == &X86::GR8_ABCD_LRegClass) {
Opc = X86::MOV8rr;
- } else if (CommonRC == &X86::GR8_ABCD_HRegClass) {
- if (TM.getSubtarget<X86Subtarget>().is64Bit())
- Opc = X86::MOV8rr_NOREX;
- else
- Opc = X86::MOV8rr;
- } else if (CommonRC == &X86::GR64_NOREXRegClass ||
- CommonRC == &X86::GR64_NOREX_NOSPRegClass) {
- Opc = X86::MOV64rr;
- } else if (CommonRC == &X86::GR32_NOREXRegClass) {
- Opc = X86::MOV32rr;
- } else if (CommonRC == &X86::GR16_NOREXRegClass) {
- Opc = X86::MOV16rr;
- } else if (CommonRC == &X86::GR8_NOREXRegClass) {
- Opc = X86::MOV8rr;
- } else if (CommonRC == &X86::GR64_TCRegClass) {
- Opc = X86::MOV64rr_TC;
- } else if (CommonRC == &X86::GR32_TCRegClass) {
- Opc = X86::MOV32rr_TC;
- } else if (CommonRC == &X86::RFP32RegClass) {
- Opc = X86::MOV_Fp3232;
- } else if (CommonRC == &X86::RFP64RegClass || CommonRC == &X86::RSTRegClass) {
- Opc = X86::MOV_Fp6464;
- } else if (CommonRC == &X86::RFP80RegClass) {
- Opc = X86::MOV_Fp8080;
- } else if (CommonRC == &X86::FR32RegClass) {
- Opc = X86::FsMOVAPSrr;
- } else if (CommonRC == &X86::FR64RegClass) {
- Opc = X86::FsMOVAPDrr;
- } else if (CommonRC == &X86::VR128RegClass) {
- Opc = X86::MOVAPSrr;
- } else if (CommonRC == &X86::VR64RegClass) {
- Opc = X86::MMX_MOVQ64rr;
- } else {
- return false;
- }
- BuildMI(MBB, MI, DL, get(Opc), DestReg).addReg(SrcReg);
- return true;
+ } else if (X86::VR128RegClass.contains(DestReg, SrcReg))
+ Opc = X86::MOVAPSrr;
+ else if (X86::VR64RegClass.contains(DestReg, SrcReg))
+ Opc = X86::MMX_MOVQ64rr;
+
+ if (Opc) {
+ BuildMI(MBB, MI, DL, get(Opc), DestReg)
+ .addReg(SrcReg, getKillRegState(KillSrc));
+ return;
}
// Moving EFLAGS to / from another register requires a push and a pop.
- if (SrcRC == &X86::CCRRegClass) {
- if (SrcReg != X86::EFLAGS)
- return false;
- if (DestRC == &X86::GR64RegClass || DestRC == &X86::GR64_NOSPRegClass) {
+ if (SrcReg == X86::EFLAGS) {
+ if (X86::GR64RegClass.contains(DestReg)) {
BuildMI(MBB, MI, DL, get(X86::PUSHF64));
BuildMI(MBB, MI, DL, get(X86::POP64r), DestReg);
- return true;
- } else if (DestRC == &X86::GR32RegClass ||
- DestRC == &X86::GR32_NOSPRegClass) {
+ return;
+ } else if (X86::GR32RegClass.contains(DestReg)) {
BuildMI(MBB, MI, DL, get(X86::PUSHF32));
BuildMI(MBB, MI, DL, get(X86::POP32r), DestReg);
- return true;
+ return;
}
- } else if (DestRC == &X86::CCRRegClass) {
- if (DestReg != X86::EFLAGS)
- return false;
- if (SrcRC == &X86::GR64RegClass || DestRC == &X86::GR64_NOSPRegClass) {
- BuildMI(MBB, MI, DL, get(X86::PUSH64r)).addReg(SrcReg);
+ }
+ if (DestReg == X86::EFLAGS) {
+ if (X86::GR64RegClass.contains(SrcReg)) {
+ BuildMI(MBB, MI, DL, get(X86::PUSH64r))
+ .addReg(SrcReg, getKillRegState(KillSrc));
BuildMI(MBB, MI, DL, get(X86::POPF64));
- return true;
- } else if (SrcRC == &X86::GR32RegClass ||
- DestRC == &X86::GR32_NOSPRegClass) {
- BuildMI(MBB, MI, DL, get(X86::PUSH32r)).addReg(SrcReg);
+ return;
+ } else if (X86::GR32RegClass.contains(SrcReg)) {
+ BuildMI(MBB, MI, DL, get(X86::PUSH32r))
+ .addReg(SrcReg, getKillRegState(KillSrc));
BuildMI(MBB, MI, DL, get(X86::POPF32));
- return true;
- }
- }
-
- // Moving from ST(0) turns into FpGET_ST0_32 etc.
- if (SrcRC == &X86::RSTRegClass) {
- // Copying from ST(0)/ST(1).
- if (SrcReg != X86::ST0 && SrcReg != X86::ST1)
- // Can only copy from ST(0)/ST(1) right now
- return false;
- bool isST0 = SrcReg == X86::ST0;
- unsigned Opc;
- if (DestRC == &X86::RFP32RegClass)
- Opc = isST0 ? X86::FpGET_ST0_32 : X86::FpGET_ST1_32;
- else if (DestRC == &X86::RFP64RegClass)
- Opc = isST0 ? X86::FpGET_ST0_64 : X86::FpGET_ST1_64;
- else {
- if (DestRC != &X86::RFP80RegClass)
- return false;
- Opc = isST0 ? X86::FpGET_ST0_80 : X86::FpGET_ST1_80;
+ return;
}
- BuildMI(MBB, MI, DL, get(Opc), DestReg);
- return true;
}
- // Moving to ST(0) turns into FpSET_ST0_32 etc.
- if (DestRC == &X86::RSTRegClass) {
- // Copying to ST(0) / ST(1).
- if (DestReg != X86::ST0 && DestReg != X86::ST1)
- // Can only copy to TOS right now
- return false;
- bool isST0 = DestReg == X86::ST0;
- unsigned Opc;
- if (SrcRC == &X86::RFP32RegClass)
- Opc = isST0 ? X86::FpSET_ST0_32 : X86::FpSET_ST1_32;
- else if (SrcRC == &X86::RFP64RegClass)
- Opc = isST0 ? X86::FpSET_ST0_64 : X86::FpSET_ST1_64;
- else {
- if (SrcRC != &X86::RFP80RegClass)
- return false;
- Opc = isST0 ? X86::FpSET_ST0_80 : X86::FpSET_ST1_80;
- }
- BuildMI(MBB, MI, DL, get(Opc)).addReg(SrcReg);
- return true;
- }
-
- // Not yet supported!
- return false;
+ DEBUG(dbgs() << "Cannot copy " << RI.getName(SrcReg)
+ << " to " << RI.getName(DestReg) << '\n');
+ llvm_unreachable("Cannot emit physreg copy instruction");
}
-static unsigned getStoreRegOpcode(unsigned SrcReg,
- const TargetRegisterClass *RC,
- bool isStackAligned,
- TargetMachine &TM) {
- unsigned Opc = 0;
- if (RC == &X86::GR64RegClass || RC == &X86::GR64_NOSPRegClass) {
- Opc = X86::MOV64mr;
- } else if (RC == &X86::GR32RegClass || RC == &X86::GR32_NOSPRegClass) {
- Opc = X86::MOV32mr;
- } else if (RC == &X86::GR16RegClass) {
- Opc = X86::MOV16mr;
- } else if (RC == &X86::GR8RegClass) {
+static unsigned getLoadStoreRegOpcode(unsigned Reg,
+ const TargetRegisterClass *RC,
+ bool isStackAligned,
+ const TargetMachine &TM,
+ bool load) {
+ switch (RC->getID()) {
+ default:
+ llvm_unreachable("Unknown regclass");
+ case X86::GR64RegClassID:
+ case X86::GR64_NOSPRegClassID:
+ return load ? X86::MOV64rm : X86::MOV64mr;
+ case X86::GR32RegClassID:
+ case X86::GR32_NOSPRegClassID:
+ case X86::GR32_ADRegClassID:
+ return load ? X86::MOV32rm : X86::MOV32mr;
+ case X86::GR16RegClassID:
+ return load ? X86::MOV16rm : X86::MOV16mr;
+ case X86::GR8RegClassID:
// Copying to or from a physical H register on x86-64 requires a NOREX
// move. Otherwise use a normal move.
- if (isHReg(SrcReg) &&
+ if (isHReg(Reg) &&
TM.getSubtarget<X86Subtarget>().is64Bit())
- Opc = X86::MOV8mr_NOREX;
+ return load ? X86::MOV8rm_NOREX : X86::MOV8mr_NOREX;
else
- Opc = X86::MOV8mr;
- } else if (RC == &X86::GR64_ABCDRegClass) {
- Opc = X86::MOV64mr;
- } else if (RC == &X86::GR32_ABCDRegClass) {
- Opc = X86::MOV32mr;
- } else if (RC == &X86::GR16_ABCDRegClass) {
- Opc = X86::MOV16mr;
- } else if (RC == &X86::GR8_ABCD_LRegClass) {
- Opc = X86::MOV8mr;
- } else if (RC == &X86::GR8_ABCD_HRegClass) {
+ return load ? X86::MOV8rm : X86::MOV8mr;
+ case X86::GR64_ABCDRegClassID:
+ return load ? X86::MOV64rm : X86::MOV64mr;
+ case X86::GR32_ABCDRegClassID:
+ return load ? X86::MOV32rm : X86::MOV32mr;
+ case X86::GR16_ABCDRegClassID:
+ return load ? X86::MOV16rm : X86::MOV16mr;
+ case X86::GR8_ABCD_LRegClassID:
+ return load ? X86::MOV8rm :X86::MOV8mr;
+ case X86::GR8_ABCD_HRegClassID:
if (TM.getSubtarget<X86Subtarget>().is64Bit())
- Opc = X86::MOV8mr_NOREX;
+ return load ? X86::MOV8rm_NOREX : X86::MOV8mr_NOREX;
else
- Opc = X86::MOV8mr;
- } else if (RC == &X86::GR64_NOREXRegClass ||
- RC == &X86::GR64_NOREX_NOSPRegClass) {
- Opc = X86::MOV64mr;
- } else if (RC == &X86::GR32_NOREXRegClass) {
- Opc = X86::MOV32mr;
- } else if (RC == &X86::GR16_NOREXRegClass) {
- Opc = X86::MOV16mr;
- } else if (RC == &X86::GR8_NOREXRegClass) {
- Opc = X86::MOV8mr;
- } else if (RC == &X86::GR64_TCRegClass) {
- Opc = X86::MOV64mr_TC;
- } else if (RC == &X86::GR32_TCRegClass) {
- Opc = X86::MOV32mr_TC;
- } else if (RC == &X86::RFP80RegClass) {
- Opc = X86::ST_FpP80m; // pops
- } else if (RC == &X86::RFP64RegClass) {
- Opc = X86::ST_Fp64m;
- } else if (RC == &X86::RFP32RegClass) {
- Opc = X86::ST_Fp32m;
- } else if (RC == &X86::FR32RegClass) {
- Opc = X86::MOVSSmr;
- } else if (RC == &X86::FR64RegClass) {
- Opc = X86::MOVSDmr;
- } else if (RC == &X86::VR128RegClass) {
+ return load ? X86::MOV8rm : X86::MOV8mr;
+ case X86::GR64_NOREXRegClassID:
+ case X86::GR64_NOREX_NOSPRegClassID:
+ return load ? X86::MOV64rm : X86::MOV64mr;
+ case X86::GR32_NOREXRegClassID:
+ return load ? X86::MOV32rm : X86::MOV32mr;
+ case X86::GR16_NOREXRegClassID:
+ return load ? X86::MOV16rm : X86::MOV16mr;
+ case X86::GR8_NOREXRegClassID:
+ return load ? X86::MOV8rm : X86::MOV8mr;
+ case X86::GR64_TCRegClassID:
+ return load ? X86::MOV64rm_TC : X86::MOV64mr_TC;
+ case X86::GR32_TCRegClassID:
+ return load ? X86::MOV32rm_TC : X86::MOV32mr_TC;
+ case X86::RFP80RegClassID:
+ return load ? X86::LD_Fp80m : X86::ST_FpP80m;
+ case X86::RFP64RegClassID:
+ return load ? X86::LD_Fp64m : X86::ST_Fp64m;
+ case X86::RFP32RegClassID:
+ return load ? X86::LD_Fp32m : X86::ST_Fp32m;
+ case X86::FR32RegClassID:
+ return load ? X86::MOVSSrm : X86::MOVSSmr;
+ case X86::FR64RegClassID:
+ return load ? X86::MOVSDrm : X86::MOVSDmr;
+ case X86::VR128RegClassID:
// If stack is realigned we can use aligned stores.
- Opc = isStackAligned ? X86::MOVAPSmr : X86::MOVUPSmr;
- } else if (RC == &X86::VR64RegClass) {
- Opc = X86::MMX_MOVQ64mr;
- } else {
- llvm_unreachable("Unknown regclass");
+ if (isStackAligned)
+ return load ? X86::MOVAPSrm : X86::MOVAPSmr;
+ else
+ return load ? X86::MOVUPSrm : X86::MOVUPSmr;
+ case X86::VR64RegClassID:
+ return load ? X86::MMX_MOVQ64rm : X86::MMX_MOVQ64mr;
}
+}
+
+static unsigned getStoreRegOpcode(unsigned SrcReg,
+ const TargetRegisterClass *RC,
+ bool isStackAligned,
+ TargetMachine &TM) {
+ return getLoadStoreRegOpcode(SrcReg, RC, isStackAligned, TM, false);
+}
- return Opc;
+
+static unsigned getLoadRegOpcode(unsigned DestReg,
+ const TargetRegisterClass *RC,
+ bool isStackAligned,
+ const TargetMachine &TM) {
+ return getLoadStoreRegOpcode(DestReg, RC, isStackAligned, TM, true);
}
void X86InstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
@@ -2150,7 +2060,7 @@ void X86InstrInfo::storeRegToAddr(MachineFunction &MF, unsigned SrcReg,
MachineInstr::mmo_iterator MMOBegin,
MachineInstr::mmo_iterator MMOEnd,
SmallVectorImpl<MachineInstr*> &NewMIs) const {
- bool isAligned = (*MMOBegin)->getAlignment() >= 16;
+ bool isAligned = MMOBegin != MMOEnd && (*MMOBegin)->getAlignment() >= 16;
unsigned Opc = getStoreRegOpcode(SrcReg, RC, isAligned, TM);
DebugLoc DL;
MachineInstrBuilder MIB = BuildMI(MF, DL, get(Opc));
@@ -2161,72 +2071,6 @@ void X86InstrInfo::storeRegToAddr(MachineFunction &MF, unsigned SrcReg,
NewMIs.push_back(MIB);
}
-static unsigned getLoadRegOpcode(unsigned DestReg,
- const TargetRegisterClass *RC,
- bool isStackAligned,
- const TargetMachine &TM) {
- unsigned Opc = 0;
- if (RC == &X86::GR64RegClass || RC == &X86::GR64_NOSPRegClass) {
- Opc = X86::MOV64rm;
- } else if (RC == &X86::GR32RegClass || RC == &X86::GR32_NOSPRegClass) {
- Opc = X86::MOV32rm;
- } else if (RC == &X86::GR16RegClass) {
- Opc = X86::MOV16rm;
- } else if (RC == &X86::GR8RegClass) {
- // Copying to or from a physical H register on x86-64 requires a NOREX
- // move. Otherwise use a normal move.
- if (isHReg(DestReg) &&
- TM.getSubtarget<X86Subtarget>().is64Bit())
- Opc = X86::MOV8rm_NOREX;
- else
- Opc = X86::MOV8rm;
- } else if (RC == &X86::GR64_ABCDRegClass) {
- Opc = X86::MOV64rm;
- } else if (RC == &X86::GR32_ABCDRegClass) {
- Opc = X86::MOV32rm;
- } else if (RC == &X86::GR16_ABCDRegClass) {
- Opc = X86::MOV16rm;
- } else if (RC == &X86::GR8_ABCD_LRegClass) {
- Opc = X86::MOV8rm;
- } else if (RC == &X86::GR8_ABCD_HRegClass) {
- if (TM.getSubtarget<X86Subtarget>().is64Bit())
- Opc = X86::MOV8rm_NOREX;
- else
- Opc = X86::MOV8rm;
- } else if (RC == &X86::GR64_NOREXRegClass ||
- RC == &X86::GR64_NOREX_NOSPRegClass) {
- Opc = X86::MOV64rm;
- } else if (RC == &X86::GR32_NOREXRegClass) {
- Opc = X86::MOV32rm;
- } else if (RC == &X86::GR16_NOREXRegClass) {
- Opc = X86::MOV16rm;
- } else if (RC == &X86::GR8_NOREXRegClass) {
- Opc = X86::MOV8rm;
- } else if (RC == &X86::GR64_TCRegClass) {
- Opc = X86::MOV64rm_TC;
- } else if (RC == &X86::GR32_TCRegClass) {
- Opc = X86::MOV32rm_TC;
- } else if (RC == &X86::RFP80RegClass) {
- Opc = X86::LD_Fp80m;
- } else if (RC == &X86::RFP64RegClass) {
- Opc = X86::LD_Fp64m;
- } else if (RC == &X86::RFP32RegClass) {
- Opc = X86::LD_Fp32m;
- } else if (RC == &X86::FR32RegClass) {
- Opc = X86::MOVSSrm;
- } else if (RC == &X86::FR64RegClass) {
- Opc = X86::MOVSDrm;
- } else if (RC == &X86::VR128RegClass) {
- // If stack is realigned we can use aligned loads.
- Opc = isStackAligned ? X86::MOVAPSrm : X86::MOVUPSrm;
- } else if (RC == &X86::VR64RegClass) {
- Opc = X86::MMX_MOVQ64rm;
- } else {
- llvm_unreachable("Unknown regclass");
- }
-
- return Opc;
-}
void X86InstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI,
@@ -2246,7 +2090,7 @@ void X86InstrInfo::loadRegFromAddr(MachineFunction &MF, unsigned DestReg,
MachineInstr::mmo_iterator MMOBegin,
MachineInstr::mmo_iterator MMOEnd,
SmallVectorImpl<MachineInstr*> &NewMIs) const {
- bool isAligned = (*MMOBegin)->getAlignment() >= 16;
+ bool isAligned = MMOBegin != MMOEnd && (*MMOBegin)->getAlignment() >= 16;
unsigned Opc = getLoadRegOpcode(DestReg, RC, isAligned, TM);
DebugLoc DL;
MachineInstrBuilder MIB = BuildMI(MF, DL, get(Opc), DestReg);
@@ -2277,18 +2121,17 @@ bool X86InstrInfo::spillCalleeSavedRegisters(MachineBasicBlock &MBB,
unsigned Opc = is64Bit ? X86::PUSH64r : X86::PUSH32r;
for (unsigned i = CSI.size(); i != 0; --i) {
unsigned Reg = CSI[i-1].getReg();
- const TargetRegisterClass *RegClass = CSI[i-1].getRegClass();
// Add the callee-saved register as live-in. It's killed at the spill.
MBB.addLiveIn(Reg);
if (Reg == FPReg)
// X86RegisterInfo::emitPrologue will handle spilling of frame register.
continue;
- if (RegClass != &X86::VR128RegClass && !isWin64) {
+ if (!X86::VR128RegClass.contains(Reg) && !isWin64) {
CalleeFrameSize += SlotSize;
BuildMI(MBB, MI, DL, get(Opc)).addReg(Reg, RegState::Kill);
} else {
- storeRegToStackSlot(MBB, MI, Reg, true, CSI[i-1].getFrameIdx(), RegClass,
- &RI);
+ storeRegToStackSlot(MBB, MI, Reg, true, CSI[i-1].getFrameIdx(),
+ &X86::VR128RegClass, &RI);
}
}
@@ -2315,11 +2158,11 @@ bool X86InstrInfo::restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
if (Reg == FPReg)
// X86RegisterInfo::emitEpilogue will handle restoring of frame register.
continue;
- const TargetRegisterClass *RegClass = CSI[i].getRegClass();
- if (RegClass != &X86::VR128RegClass && !isWin64) {
+ if (!X86::VR128RegClass.contains(Reg) && !isWin64) {
BuildMI(MBB, MI, DL, get(Opc), Reg);
} else {
- loadRegFromStackSlot(MBB, MI, Reg, CSI[i].getFrameIdx(), RegClass, &RI);
+ loadRegFromStackSlot(MBB, MI, Reg, CSI[i].getFrameIdx(),
+ &X86::VR128RegClass, &RI);
}
}
return true;
@@ -2492,7 +2335,7 @@ X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
}
// No fusion
- if (PrintFailedFusing)
+ if (PrintFailedFusing && !MI->isCopy())
dbgs() << "We failed to fuse operand " << i << " in " << *MI;
return NULL;
}
@@ -2610,7 +2453,7 @@ MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
} else if (Ops.size() != 1)
return NULL;
- SmallVector<MachineOperand,X86AddrNumOperands> MOs;
+ SmallVector<MachineOperand,X86::AddrNumOperands> MOs;
switch (LoadMI->getOpcode()) {
case X86::V_SET0PS:
case X86::V_SET0PD:
@@ -2632,7 +2475,7 @@ MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
if (TM.getSubtarget<X86Subtarget>().is64Bit())
PICBase = X86::RIP;
else
- // FIXME: PICBase = TM.getInstrInfo()->getGlobalBaseReg(&MF);
+ // FIXME: PICBase = getGlobalBaseReg(&MF);
// This doesn't work for several reasons.
// 1. GlobalBaseReg may have been spilled.
// 2. It may not be live at MI.
@@ -2664,7 +2507,7 @@ MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
default: {
// Folding a normal load. Just copy the load's address operands.
unsigned NumOps = LoadMI->getDesc().getNumOperands();
- for (unsigned i = NumOps - X86AddrNumOperands; i != NumOps; ++i)
+ for (unsigned i = NumOps - X86::AddrNumOperands; i != NumOps; ++i)
MOs.push_back(LoadMI->getOperand(i));
break;
}
@@ -2727,7 +2570,7 @@ bool X86InstrInfo::canFoldMemoryOperand(const MachineInstr *MI,
if (I != OpcodeTablePtr->end())
return true;
}
- return false;
+ return TargetInstrInfoImpl::canFoldMemoryOperand(MI, Ops);
}
bool X86InstrInfo::unfoldMemoryOperand(MachineFunction &MF, MachineInstr *MI,
@@ -2751,13 +2594,20 @@ bool X86InstrInfo::unfoldMemoryOperand(MachineFunction &MF, MachineInstr *MI,
const TargetInstrDesc &TID = get(Opc);
const TargetOperandInfo &TOI = TID.OpInfo[Index];
const TargetRegisterClass *RC = TOI.getRegClass(&RI);
- SmallVector<MachineOperand, X86AddrNumOperands> AddrOps;
+ if (!MI->hasOneMemOperand() &&
+ RC == &X86::VR128RegClass &&
+ !TM.getSubtarget<X86Subtarget>().isUnalignedMemAccessFast())
+ // Without memoperands, loadRegFromAddr and storeRegToStackSlot will
+ // conservatively assume the address is unaligned. That's bad for
+ // performance.
+ return false;
+ SmallVector<MachineOperand, X86::AddrNumOperands> AddrOps;
SmallVector<MachineOperand,2> BeforeOps;
SmallVector<MachineOperand,2> AfterOps;
SmallVector<MachineOperand,4> ImpOps;
for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
MachineOperand &Op = MI->getOperand(i);
- if (i >= Index && i < Index + X86AddrNumOperands)
+ if (i >= Index && i < Index + X86::AddrNumOperands)
AddrOps.push_back(Op);
else if (Op.isReg() && Op.isImplicit())
ImpOps.push_back(Op);
@@ -2776,7 +2626,7 @@ bool X86InstrInfo::unfoldMemoryOperand(MachineFunction &MF, MachineInstr *MI,
loadRegFromAddr(MF, Reg, AddrOps, RC, MMOs.first, MMOs.second, NewMIs);
if (UnfoldStore) {
// Address operands cannot be marked isKill.
- for (unsigned i = 1; i != 1 + X86AddrNumOperands; ++i) {
+ for (unsigned i = 1; i != 1 + X86::AddrNumOperands; ++i) {
MachineOperand &MO = NewMIs[0]->getOperand(i);
if (MO.isReg())
MO.setIsKill(false);
@@ -2873,7 +2723,7 @@ X86InstrInfo::unfoldMemoryOperand(SelectionDAG &DAG, SDNode *N,
unsigned NumOps = N->getNumOperands();
for (unsigned i = 0; i != NumOps-1; ++i) {
SDValue Op = N->getOperand(i);
- if (i >= Index-NumDefs && i < Index-NumDefs + X86AddrNumOperands)
+ if (i >= Index-NumDefs && i < Index-NumDefs + X86::AddrNumOperands)
AddrOps.push_back(Op);
else if (i < Index-NumDefs)
BeforeOps.push_back(Op);
@@ -2892,7 +2742,12 @@ X86InstrInfo::unfoldMemoryOperand(SelectionDAG &DAG, SDNode *N,
MachineInstr::mmo_iterator> MMOs =
MF.extractLoadMemRefs(cast<MachineSDNode>(N)->memoperands_begin(),
cast<MachineSDNode>(N)->memoperands_end());
- bool isAligned = (*MMOs.first)->getAlignment() >= 16;
+ if (!(*MMOs.first) &&
+ RC == &X86::VR128RegClass &&
+ !TM.getSubtarget<X86Subtarget>().isUnalignedMemAccessFast())
+ // Do not introduce a slow unaligned load.
+ return false;
+ bool isAligned = (*MMOs.first) && (*MMOs.first)->getAlignment() >= 16;
Load = DAG.getMachineNode(getLoadRegOpcode(0, RC, isAligned, TM), dl,
VT, MVT::Other, &AddrOps[0], AddrOps.size());
NewNodes.push_back(Load);
@@ -2929,7 +2784,12 @@ X86InstrInfo::unfoldMemoryOperand(SelectionDAG &DAG, SDNode *N,
MachineInstr::mmo_iterator> MMOs =
MF.extractStoreMemRefs(cast<MachineSDNode>(N)->memoperands_begin(),
cast<MachineSDNode>(N)->memoperands_end());
- bool isAligned = (*MMOs.first)->getAlignment() >= 16;
+ if (!(*MMOs.first) &&
+ RC == &X86::VR128RegClass &&
+ !TM.getSubtarget<X86Subtarget>().isUnalignedMemAccessFast())
+ // Do not introduce a slow unaligned store.
+ return false;
+ bool isAligned = (*MMOs.first) && (*MMOs.first)->getAlignment() >= 16;
SDNode *Store = DAG.getMachineNode(getStoreRegOpcode(0, DstRC,
isAligned, TM),
dl, MVT::Other,
@@ -3065,16 +2925,16 @@ bool X86InstrInfo::shouldScheduleLoadsNear(SDNode *Load1, SDNode *Load2,
EVT VT = Load1->getValueType(0);
switch (VT.getSimpleVT().SimpleTy) {
- default: {
+ default:
// XMM registers. In 64-bit mode we can be a bit more aggressive since we
// have 16 of them to play with.
if (TM.getSubtargetImpl()->is64Bit()) {
if (NumLoads >= 3)
return false;
- } else if (NumLoads)
+ } else if (NumLoads) {
return false;
+ }
break;
- }
case MVT::i8:
case MVT::i16:
case MVT::i32:
@@ -3083,6 +2943,7 @@ bool X86InstrInfo::shouldScheduleLoadsNear(SDNode *Load1, SDNode *Load2,
case MVT::f64:
if (NumLoads)
return false;
+ break;
}
return true;
@@ -3123,6 +2984,8 @@ bool X86InstrInfo::isX86_64ExtendedReg(unsigned RegNo) {
case X86::R12B: case X86::R13B: case X86::R14B: case X86::R15B:
case X86::XMM8: case X86::XMM9: case X86::XMM10: case X86::XMM11:
case X86::XMM12: case X86::XMM13: case X86::XMM14: case X86::XMM15:
+ case X86::YMM8: case X86::YMM9: case X86::YMM10: case X86::YMM11:
+ case X86::YMM12: case X86::YMM13: case X86::YMM14: case X86::YMM15:
return true;
}
return false;
@@ -3194,7 +3057,7 @@ unsigned X86InstrInfo::determineREX(const MachineInstr &MI) {
case X86II::MRM4m: case X86II::MRM5m:
case X86II::MRM6m: case X86II::MRM7m:
case X86II::MRMDestMem: {
- unsigned e = (isTwoAddr ? X86AddrNumOperands+1 : X86AddrNumOperands);
+ unsigned e = (isTwoAddr ? X86::AddrNumOperands+1 : X86::AddrNumOperands);
i = isTwoAddr ? 1 : 0;
if (NumOps > e && isX86_64ExtendedReg(MI.getOperand(e)))
REX |= 1 << 2;
@@ -3546,7 +3409,7 @@ static unsigned GetInstSizeWithDesc(const MachineInstr &MI,
case X86II::MRMDestMem: {
++FinalSize;
FinalSize += getMemModRMByteSize(MI, CurOp, IsPIC, Is64BitMode);
- CurOp += X86AddrNumOperands + 1;
+ CurOp += X86::AddrNumOperands + 1;
if (CurOp != NumOps) {
++CurOp;
FinalSize += sizeConstant(X86II::getSizeOfImm(Desc->TSFlags));
@@ -3565,16 +3428,9 @@ static unsigned GetInstSizeWithDesc(const MachineInstr &MI,
break;
case X86II::MRMSrcMem: {
- int AddrOperands;
- if (Opcode == X86::LEA64r || Opcode == X86::LEA64_32r ||
- Opcode == X86::LEA16r || Opcode == X86::LEA32r)
- AddrOperands = X86AddrNumOperands - 1; // No segment register
- else
- AddrOperands = X86AddrNumOperands;
-
++FinalSize;
FinalSize += getMemModRMByteSize(MI, CurOp+1, IsPIC, Is64BitMode);
- CurOp += AddrOperands + 1;
+ CurOp += X86::AddrNumOperands + 1;
if (CurOp != NumOps) {
++CurOp;
FinalSize += sizeConstant(X86II::getSizeOfImm(Desc->TSFlags));
@@ -3628,7 +3484,7 @@ static unsigned GetInstSizeWithDesc(const MachineInstr &MI,
++FinalSize;
FinalSize += getMemModRMByteSize(MI, CurOp, IsPIC, Is64BitMode);
- CurOp += X86AddrNumOperands;
+ CurOp += X86::AddrNumOperands;
if (CurOp != NumOps) {
const MachineOperand &MO = MI.getOperand(CurOp++);
@@ -3694,6 +3550,8 @@ unsigned X86InstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const {
/// the global base register value. Output instructions required to
/// initialize the register in the function entry block, if necessary.
///
+/// TODO: Eliminate this and move the code to X86MachineFunctionInfo.
+///
unsigned X86InstrInfo::getGlobalBaseReg(MachineFunction *MF) const {
assert(!TM.getSubtarget<X86Subtarget>().is64Bit() &&
"X86-64 PIC uses RIP relative addressing");
@@ -3703,30 +3561,10 @@ unsigned X86InstrInfo::getGlobalBaseReg(MachineFunction *MF) const {
if (GlobalBaseReg != 0)
return GlobalBaseReg;
- // Insert the set of GlobalBaseReg into the first MBB of the function
- MachineBasicBlock &FirstMBB = MF->front();
- MachineBasicBlock::iterator MBBI = FirstMBB.begin();
- DebugLoc DL = FirstMBB.findDebugLoc(MBBI);
+ // Create the register. The code to initialize it is inserted
+ // later, by the CGBR pass (below).
MachineRegisterInfo &RegInfo = MF->getRegInfo();
- unsigned PC = RegInfo.createVirtualRegister(X86::GR32RegisterClass);
-
- const TargetInstrInfo *TII = TM.getInstrInfo();
- // Operand of MovePCtoStack is completely ignored by asm printer. It's
- // only used in JIT code emission as displacement to pc.
- BuildMI(FirstMBB, MBBI, DL, TII->get(X86::MOVPC32r), PC).addImm(0);
-
- // If we're using vanilla 'GOT' PIC style, we should use relative addressing
- // not to pc, but to _GLOBAL_OFFSET_TABLE_ external.
- if (TM.getSubtarget<X86Subtarget>().isPICStyleGOT()) {
- GlobalBaseReg = RegInfo.createVirtualRegister(X86::GR32RegisterClass);
- // Generate addl $__GLOBAL_OFFSET_TABLE_ + [.-piclabel], %some_register
- BuildMI(FirstMBB, MBBI, DL, TII->get(X86::ADD32ri), GlobalBaseReg)
- .addReg(PC).addExternalSymbol("_GLOBAL_OFFSET_TABLE_",
- X86II::MO_GOT_ABSOLUTE_ADDRESS);
- } else {
- GlobalBaseReg = PC;
- }
-
+ GlobalBaseReg = RegInfo.createVirtualRegister(X86::GR32RegisterClass);
X86FI->setGlobalBaseReg(GlobalBaseReg);
return GlobalBaseReg;
}
@@ -3784,3 +3622,65 @@ void X86InstrInfo::getNoopForMachoTarget(MCInst &NopInst) const {
NopInst.setOpcode(X86::NOOP);
}
+namespace {
+ /// CGBR - Create Global Base Reg pass. This initializes the PIC
+ /// global base register for x86-32.
+ struct CGBR : public MachineFunctionPass {
+ static char ID;
+ CGBR() : MachineFunctionPass(&ID) {}
+
+ virtual bool runOnMachineFunction(MachineFunction &MF) {
+ const X86TargetMachine *TM =
+ static_cast<const X86TargetMachine *>(&MF.getTarget());
+
+ assert(!TM->getSubtarget<X86Subtarget>().is64Bit() &&
+ "X86-64 PIC uses RIP relative addressing");
+
+ // Only emit a global base reg in PIC mode.
+ if (TM->getRelocationModel() != Reloc::PIC_)
+ return false;
+
+ // Insert the set of GlobalBaseReg into the first MBB of the function
+ MachineBasicBlock &FirstMBB = MF.front();
+ MachineBasicBlock::iterator MBBI = FirstMBB.begin();
+ DebugLoc DL = FirstMBB.findDebugLoc(MBBI);
+ MachineRegisterInfo &RegInfo = MF.getRegInfo();
+ const X86InstrInfo *TII = TM->getInstrInfo();
+
+ unsigned PC;
+ if (TM->getSubtarget<X86Subtarget>().isPICStyleGOT())
+ PC = RegInfo.createVirtualRegister(X86::GR32RegisterClass);
+ else
+ PC = TII->getGlobalBaseReg(&MF);
+
+ // Operand of MovePCtoStack is completely ignored by asm printer. It's
+ // only used in JIT code emission as displacement to pc.
+ BuildMI(FirstMBB, MBBI, DL, TII->get(X86::MOVPC32r), PC).addImm(0);
+
+ // If we're using vanilla 'GOT' PIC style, we should use relative addressing
+ // not to pc, but to _GLOBAL_OFFSET_TABLE_ external.
+ if (TM->getSubtarget<X86Subtarget>().isPICStyleGOT()) {
+ unsigned GlobalBaseReg = TII->getGlobalBaseReg(&MF);
+ // Generate addl $__GLOBAL_OFFSET_TABLE_ + [.-piclabel], %some_register
+ BuildMI(FirstMBB, MBBI, DL, TII->get(X86::ADD32ri), GlobalBaseReg)
+ .addReg(PC).addExternalSymbol("_GLOBAL_OFFSET_TABLE_",
+ X86II::MO_GOT_ABSOLUTE_ADDRESS);
+ }
+
+ return true;
+ }
+
+ virtual const char *getPassName() const {
+ return "X86 PIC Global Base Reg Initialization";
+ }
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesCFG();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+ };
+}
+
+char CGBR::ID = 0;
+FunctionPass*
+llvm::createGlobalBaseRegPass() { return new CGBR(); }
diff --git a/lib/Target/X86/X86InstrInfo.h b/lib/Target/X86/X86InstrInfo.h
index 62d7c74484d5..f762b5827075 100644
--- a/lib/Target/X86/X86InstrInfo.h
+++ b/lib/Target/X86/X86InstrInfo.h
@@ -24,6 +24,24 @@ namespace llvm {
class X86TargetMachine;
namespace X86 {
+ // Enums for memory operand decoding. Each memory operand is represented with
+ // a 5 operand sequence in the form:
+ // [BaseReg, ScaleAmt, IndexReg, Disp, Segment]
+ // These enums help decode this.
+ enum {
+ AddrBaseReg = 0,
+ AddrScaleAmt = 1,
+ AddrIndexReg = 2,
+ AddrDisp = 3,
+
+ /// AddrSegmentReg - The operand # of the segment in the memory operand.
+ AddrSegmentReg = 4,
+
+ /// AddrNumOperands - Total number of operands in a memory reference.
+ AddrNumOperands = 5
+ };
+
+
// X86 specific condition code. These correspond to X86_*_COND in
// X86InstrInfo.td. They must be kept in synch.
enum CondCode {
@@ -173,7 +191,19 @@ namespace X86II {
/// indicates that the reference is actually to "FOO$non_lazy_ptr -PICBASE",
/// which is a PIC-base-relative reference to a hidden dyld lazy pointer
/// stub.
- MO_DARWIN_HIDDEN_NONLAZY_PIC_BASE
+ MO_DARWIN_HIDDEN_NONLAZY_PIC_BASE,
+
+ /// MO_TLVP - On a symbol operand this indicates that the immediate is
+ /// some TLS offset.
+ ///
+ /// This is the TLS offset for the Darwin TLS mechanism.
+ MO_TLVP,
+
+ /// MO_TLVP_PIC_BASE - On a symbol operand this indicates that the immediate
+ /// is some TLS offset from the picbase.
+ ///
+ /// This is the 32-bit TLS offset for Darwin TLS in PIC mode.
+ MO_TLVP_PIC_BASE
};
}
@@ -203,6 +233,7 @@ inline static bool isGlobalRelativeToPICBase(unsigned char TargetFlag) {
case X86II::MO_PIC_BASE_OFFSET: // Darwin local global.
case X86II::MO_DARWIN_NONLAZY_PIC_BASE: // Darwin/32 external global.
case X86II::MO_DARWIN_HIDDEN_NONLAZY_PIC_BASE: // Darwin/32 hidden global.
+ case X86II::MO_TLVP: // ??? Pretty sure..
return true;
default:
return false;
@@ -347,9 +378,10 @@ namespace X86II {
Imm8 = 1 << ImmShift,
Imm8PCRel = 2 << ImmShift,
Imm16 = 3 << ImmShift,
- Imm32 = 4 << ImmShift,
- Imm32PCRel = 5 << ImmShift,
- Imm64 = 6 << ImmShift,
+ Imm16PCRel = 4 << ImmShift,
+ Imm32 = 5 << ImmShift,
+ Imm32PCRel = 6 << ImmShift,
+ Imm64 = 7 << ImmShift,
//===------------------------------------------------------------------===//
// FP Instruction Classification... Zero is non-fp instruction.
@@ -403,28 +435,47 @@ namespace X86II {
SSEDomainShift = 22,
OpcodeShift = 24,
- OpcodeMask = 0xFF << OpcodeShift
+ OpcodeMask = 0xFF << OpcodeShift,
+
+ //===------------------------------------------------------------------===//
+ // VEX - The opcode prefix used by AVX instructions
+ VEX = 1ULL << 32,
+
+ // VEX_W - Has a opcode specific functionality, but is used in the same
+ // way as REX_W is for regular SSE instructions.
+ VEX_W = 1ULL << 33,
+
+ // VEX_4V - Used to specify an additional AVX/SSE register. Several 2
+ // address instructions in SSE are represented as 3 address ones in AVX
+ // and the additional register is encoded in VEX_VVVV prefix.
+ VEX_4V = 1ULL << 34,
+
+ // VEX_I8IMM - Specifies that the last register used in a AVX instruction,
+ // must be encoded in the i8 immediate field. This usually happens in
+ // instructions with 4 operands.
+ VEX_I8IMM = 1ULL << 35
};
// getBaseOpcodeFor - This function returns the "base" X86 opcode for the
// specified machine instruction.
//
- static inline unsigned char getBaseOpcodeFor(unsigned TSFlags) {
+ static inline unsigned char getBaseOpcodeFor(uint64_t TSFlags) {
return TSFlags >> X86II::OpcodeShift;
}
- static inline bool hasImm(unsigned TSFlags) {
+ static inline bool hasImm(uint64_t TSFlags) {
return (TSFlags & X86II::ImmMask) != 0;
}
/// getSizeOfImm - Decode the "size of immediate" field from the TSFlags field
/// of the specified instruction.
- static inline unsigned getSizeOfImm(unsigned TSFlags) {
+ static inline unsigned getSizeOfImm(uint64_t TSFlags) {
switch (TSFlags & X86II::ImmMask) {
default: assert(0 && "Unknown immediate size");
case X86II::Imm8:
case X86II::Imm8PCRel: return 1;
- case X86II::Imm16: return 2;
+ case X86II::Imm16:
+ case X86II::Imm16PCRel: return 2;
case X86II::Imm32:
case X86II::Imm32PCRel: return 4;
case X86II::Imm64: return 8;
@@ -433,23 +484,77 @@ namespace X86II {
/// isImmPCRel - Return true if the immediate of the specified instruction's
/// TSFlags indicates that it is pc relative.
- static inline unsigned isImmPCRel(unsigned TSFlags) {
+ static inline unsigned isImmPCRel(uint64_t TSFlags) {
switch (TSFlags & X86II::ImmMask) {
- default: assert(0 && "Unknown immediate size");
- case X86II::Imm8PCRel:
- case X86II::Imm32PCRel:
- return true;
- case X86II::Imm8:
- case X86II::Imm16:
- case X86II::Imm32:
- case X86II::Imm64:
- return false;
+ default: assert(0 && "Unknown immediate size");
+ case X86II::Imm8PCRel:
+ case X86II::Imm16PCRel:
+ case X86II::Imm32PCRel:
+ return true;
+ case X86II::Imm8:
+ case X86II::Imm16:
+ case X86II::Imm32:
+ case X86II::Imm64:
+ return false;
+ }
+ }
+
+ /// getMemoryOperandNo - The function returns the MCInst operand # for the
+ /// first field of the memory operand. If the instruction doesn't have a
+ /// memory operand, this returns -1.
+ ///
+ /// Note that this ignores tied operands. If there is a tied register which
+ /// is duplicated in the MCInst (e.g. "EAX = addl EAX, [mem]") it is only
+ /// counted as one operand.
+ ///
+ static inline int getMemoryOperandNo(uint64_t TSFlags) {
+ switch (TSFlags & X86II::FormMask) {
+ case X86II::MRMInitReg: assert(0 && "FIXME: Remove this form");
+ default: assert(0 && "Unknown FormMask value in getMemoryOperandNo!");
+ case X86II::Pseudo:
+ case X86II::RawFrm:
+ case X86II::AddRegFrm:
+ case X86II::MRMDestReg:
+ case X86II::MRMSrcReg:
+ return -1;
+ case X86II::MRMDestMem:
+ return 0;
+ case X86II::MRMSrcMem: {
+ bool HasVEX_4V = TSFlags & X86II::VEX_4V;
+ unsigned FirstMemOp = 1;
+ if (HasVEX_4V)
+ ++FirstMemOp;// Skip the register source (which is encoded in VEX_VVVV).
+
+ // FIXME: Maybe lea should have its own form? This is a horrible hack.
+ //if (Opcode == X86::LEA64r || Opcode == X86::LEA64_32r ||
+ // Opcode == X86::LEA16r || Opcode == X86::LEA32r)
+ return FirstMemOp;
}
- }
+ case X86II::MRM0r: case X86II::MRM1r:
+ case X86II::MRM2r: case X86II::MRM3r:
+ case X86II::MRM4r: case X86II::MRM5r:
+ case X86II::MRM6r: case X86II::MRM7r:
+ return -1;
+ case X86II::MRM0m: case X86II::MRM1m:
+ case X86II::MRM2m: case X86II::MRM3m:
+ case X86II::MRM4m: case X86II::MRM5m:
+ case X86II::MRM6m: case X86II::MRM7m:
+ return 0;
+ case X86II::MRM_C1:
+ case X86II::MRM_C2:
+ case X86II::MRM_C3:
+ case X86II::MRM_C4:
+ case X86II::MRM_C8:
+ case X86II::MRM_C9:
+ case X86II::MRM_E8:
+ case X86II::MRM_F0:
+ case X86II::MRM_F8:
+ case X86II::MRM_F9:
+ return -1;
+ }
+ }
}
-const int X86AddrNumOperands = 5;
-
inline static bool isScale(const MachineOperand &MO) {
return MO.isImm() &&
(MO.getImm() == 1 || MO.getImm() == 2 ||
@@ -555,7 +660,7 @@ public:
void reMaterialize(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
unsigned DestReg, unsigned SubIdx,
const MachineInstr *Orig,
- const TargetRegisterInfo *TRI) const;
+ const TargetRegisterInfo &TRI) const;
/// convertToThreeAddress - This method must be implemented by targets that
/// set the M_CONVERTIBLE_TO_3_ADDR flag. When this flag is set, the target
@@ -585,13 +690,12 @@ public:
virtual unsigned RemoveBranch(MachineBasicBlock &MBB) const;
virtual unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
MachineBasicBlock *FBB,
- const SmallVectorImpl<MachineOperand> &Cond) const;
- virtual bool copyRegToReg(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MI,
- unsigned DestReg, unsigned SrcReg,
- const TargetRegisterClass *DestRC,
- const TargetRegisterClass *SrcRC,
- DebugLoc DL) const;
+ const SmallVectorImpl<MachineOperand> &Cond,
+ DebugLoc DL) const;
+ virtual void copyPhysReg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI, DebugLoc DL,
+ unsigned DestReg, unsigned SrcReg,
+ bool KillSrc) const;
virtual void storeRegToStackSlot(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI,
unsigned SrcReg, bool isKill, int FrameIndex,
diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td
index 0d59c42dd164..1efef5a80b1b 100644
--- a/lib/Target/X86/X86InstrInfo.td
+++ b/lib/Target/X86/X86InstrInfo.td
@@ -72,6 +72,8 @@ def SDTX86Wrapper : SDTypeProfile<1, 1, [SDTCisSameAs<0, 1>, SDTCisPtrTy<0>]>;
def SDT_X86TLSADDR : SDTypeProfile<0, 1, [SDTCisInt<0>]>;
+def SDT_X86TLSCALL : SDTypeProfile<0, 1, [SDTCisPtrTy<0>]>;
+
def SDT_X86SegmentBaseAddress : SDTypeProfile<1, 1, [SDTCisPtrTy<0>]>;
def SDT_X86EHRET : SDTypeProfile<0, 1, [SDTCisInt<0>]>;
@@ -182,6 +184,9 @@ def X86mul_imm : SDNode<"X86ISD::MUL_IMM", SDTIntBinOp>;
def X86MingwAlloca : SDNode<"X86ISD::MINGW_ALLOCA", SDTX86Void,
[SDNPHasChain, SDNPInFlag, SDNPOutFlag]>;
+
+def X86TLSCall : SDNode<"X86ISD::TLSCALL", SDT_X86TLSCALL,
+ []>;
//===----------------------------------------------------------------------===//
// X86 Operand Definitions.
@@ -197,13 +202,9 @@ def X86MemAsmOperand : AsmOperandClass {
let Name = "Mem";
let SuperClasses = [];
}
-def X86NoSegMemAsmOperand : AsmOperandClass {
- let Name = "NoSegMem";
- let SuperClasses = [X86MemAsmOperand];
-}
def X86AbsMemAsmOperand : AsmOperandClass {
let Name = "AbsMem";
- let SuperClasses = [X86NoSegMemAsmOperand];
+ let SuperClasses = [X86MemAsmOperand];
}
class X86MemOperand<string printMethod> : Operand<iPTR> {
let PrintMethod = printMethod;
@@ -226,7 +227,7 @@ def f32mem : X86MemOperand<"printf32mem">;
def f64mem : X86MemOperand<"printf64mem">;
def f80mem : X86MemOperand<"printf80mem">;
def f128mem : X86MemOperand<"printf128mem">;
-//def f256mem : X86MemOperand<"printf256mem">;
+def f256mem : X86MemOperand<"printf256mem">;
// A version of i8mem for use on x86-64 that uses GR64_NOREX instead of
// plain GR64, so that it doesn't potentially require a REX prefix.
@@ -245,15 +246,11 @@ def i32mem_TC : Operand<i32> {
let ParserMatchClass = X86MemAsmOperand;
}
-def lea32mem : Operand<i32> {
- let PrintMethod = "printlea32mem";
- let MIOperandInfo = (ops GR32, i8imm, GR32_NOSP, i32imm);
- let ParserMatchClass = X86NoSegMemAsmOperand;
-}
let ParserMatchClass = X86AbsMemAsmOperand,
PrintMethod = "print_pcrel_imm" in {
def i32imm_pcrel : Operand<i32>;
+def i16imm_pcrel : Operand<i16>;
def offset8 : Operand<i64>;
def offset16 : Operand<i64>;
@@ -283,26 +280,31 @@ class ImmSExtAsmOperandClass : AsmOperandClass {
// 64-bit immediates, but for a 16-bit target value we want to accept both "-1"
// (which will be a -1ULL), and "0xFF" (-1 in 16-bits).
-// [0, 0x7FFFFFFF] | [0xFFFFFFFF80000000, 0xFFFFFFFFFFFFFFFF]
+// [0, 0x7FFFFFFF] |
+// [0xFFFFFFFF80000000, 0xFFFFFFFFFFFFFFFF]
def ImmSExti64i32AsmOperand : ImmSExtAsmOperandClass {
let Name = "ImmSExti64i32";
}
-// [0, 0x0000007F] | [0x000000000000FF80, 0x000000000000FFFF] | [0xFFFFFFFFFFFFFF80, 0xFFFFFFFFFFFFFFFF]
+// [0, 0x0000007F] | [0x000000000000FF80, 0x000000000000FFFF] |
+// [0xFFFFFFFFFFFFFF80, 0xFFFFFFFFFFFFFFFF]
def ImmSExti16i8AsmOperand : ImmSExtAsmOperandClass {
let Name = "ImmSExti16i8";
let SuperClasses = [ImmSExti64i32AsmOperand];
}
-// [0, 0x0000007F] | [0x00000000FFFFFF80, 0x00000000FFFFFFFF] | [0xFFFFFFFFFFFFFF80, 0xFFFFFFFFFFFFFFFF]
+// [0, 0x0000007F] | [0x00000000FFFFFF80, 0x00000000FFFFFFFF] |
+// [0xFFFFFFFFFFFFFF80, 0xFFFFFFFFFFFFFFFF]
def ImmSExti32i8AsmOperand : ImmSExtAsmOperandClass {
let Name = "ImmSExti32i8";
}
-// [0, 0x0000007F] | [0xFFFFFFFFFFFFFF80, 0xFFFFFFFFFFFFFFFF]
+// [0, 0x0000007F] |
+// [0xFFFFFFFFFFFFFF80, 0xFFFFFFFFFFFFFFFF]
def ImmSExti64i8AsmOperand : ImmSExtAsmOperandClass {
let Name = "ImmSExti64i8";
- let SuperClasses = [ImmSExti16i8AsmOperand, ImmSExti32i8AsmOperand, ImmSExti64i32AsmOperand];
+ let SuperClasses = [ImmSExti16i8AsmOperand, ImmSExti32i8AsmOperand,
+ ImmSExti64i32AsmOperand];
}
// A couple of more descriptive operand definitions.
@@ -321,10 +323,10 @@ def i32i8imm : Operand<i32> {
// Define X86 specific addressing mode.
def addr : ComplexPattern<iPTR, 5, "SelectAddr", [], []>;
-def lea32addr : ComplexPattern<i32, 4, "SelectLEAAddr",
+def lea32addr : ComplexPattern<i32, 5, "SelectLEAAddr",
[add, sub, mul, X86mul_imm, shl, or, frameindex],
[]>;
-def tls32addr : ComplexPattern<i32, 4, "SelectTLSADDRAddr",
+def tls32addr : ComplexPattern<i32, 5, "SelectTLSADDRAddr",
[tglobaltlsaddr], []>;
//===----------------------------------------------------------------------===//
@@ -704,6 +706,12 @@ let isCall = 1 in
"lcall{w}\t{*}$dst", []>, OpSize;
def FARCALL32m : I<0xFF, MRM3m, (outs), (ins opaque48mem:$dst),
"lcall{l}\t{*}$dst", []>;
+
+ // callw for 16 bit code for the assembler.
+ let isAsmParserOnly = 1 in
+ def CALLpcrel16 : Ii16PCRel<0xE8, RawFrm,
+ (outs), (ins i16imm_pcrel:$dst, variable_ops),
+ "callw\t$dst", []>, OpSize;
}
// Constructing a stack frame.
@@ -737,18 +745,10 @@ let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in
"jmp\t$dst # TAILCALL",
[]>;
def TAILJMPr : I<0xFF, MRM4r, (outs), (ins GR32_TC:$dst, variable_ops),
- "jmp{l}\t{*}$dst # TAILCALL",
- []>;
+ "", []>; // FIXME: Remove encoding when JIT is dead.
let mayLoad = 1 in
def TAILJMPm : I<0xFF, MRM4m, (outs), (ins i32mem_TC:$dst, variable_ops),
"jmp{l}\t{*}$dst # TAILCALL", []>;
-
- // FIXME: This is a hack so that MCInst lowering can preserve the TAILCALL
- // marker on instructions, while still being able to relax.
- let isCodeGenOnly = 1 in {
- def TAILJMP_1 : Ii8PCRel<0xEB, RawFrm, (outs), (ins brtarget8:$dst),
- "jmp\t$dst # TAILCALL", []>;
- }
}
//===----------------------------------------------------------------------===//
@@ -815,7 +815,18 @@ def PUSHF32 : I<0x9C, RawFrm, (outs), (ins), "pushf{l|d}", []>,
Requires<[In32BitMode]>;
}
-let isTwoAddress = 1 in // GR32 = bswap GR32
+let Defs = [EDI, ESI, EBP, EBX, EDX, ECX, EAX, ESP], Uses = [ESP],
+ mayLoad=1, neverHasSideEffects=1 in {
+def POPA32 : I<0x61, RawFrm, (outs), (ins), "popa{l}", []>,
+ Requires<[In32BitMode]>;
+}
+let Defs = [ESP], Uses = [EDI, ESI, EBP, EBX, EDX, ECX, EAX, ESP],
+ mayStore=1, neverHasSideEffects=1 in {
+def PUSHA32 : I<0x60, RawFrm, (outs), (ins), "pusha{l}", []>,
+ Requires<[In32BitMode]>;
+}
+
+let Uses = [EFLAGS], Constraints = "$src = $dst" in // GR32 = bswap GR32
def BSWAP32r : I<0xC8, AddRegFrm,
(outs GR32:$dst), (ins GR32:$src),
"bswap{l}\t$dst",
@@ -855,11 +866,11 @@ def BSR32rm : I<0xBD, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src),
let neverHasSideEffects = 1 in
def LEA16r : I<0x8D, MRMSrcMem,
- (outs GR16:$dst), (ins lea32mem:$src),
+ (outs GR16:$dst), (ins i32mem:$src),
"lea{w}\t{$src|$dst}, {$dst|$src}", []>, OpSize;
let isReMaterializable = 1 in
def LEA32r : I<0x8D, MRMSrcMem,
- (outs GR32:$dst), (ins lea32mem:$src),
+ (outs GR32:$dst), (ins i32mem:$src),
"lea{l}\t{$src|$dst}, {$dst|$src}",
[(set GR32:$dst, lea32addr:$src)]>, Requires<[In32BitMode]>;
@@ -1239,7 +1250,7 @@ def IDIV32m: I<0xF7, MRM7m, (outs), (ins i32mem:$src),
//===----------------------------------------------------------------------===//
// Two address Instructions.
//
-let isTwoAddress = 1 in {
+let Constraints = "$src1 = $dst" in {
// Conditional moves
let Uses = [EFLAGS] in {
@@ -1640,7 +1651,7 @@ def CMOVNO32rm : I<0x41, MRMSrcMem, // if !overflow, GR32 = [mem32]
// i8 register pressure. Note that CMOV_GR8 is conservatively considered to
// clobber EFLAGS, because if one of the operands is zero, the expansion
// could involve an xor.
-let usesCustomInserter = 1, isTwoAddress = 0, Defs = [EFLAGS] in {
+let usesCustomInserter = 1, Constraints = "", Defs = [EFLAGS] in {
def CMOV_GR8 : I<0, Pseudo,
(outs GR8:$dst), (ins GR8:$src1, GR8:$src2, i8imm:$cond),
"#CMOV_GR8 PSEUDO!",
@@ -1659,86 +1670,106 @@ def CMOV_GR16 : I<0, Pseudo,
[(set GR16:$dst,
(X86cmov GR16:$src1, GR16:$src2, imm:$cond, EFLAGS))]>;
def CMOV_RFP32 : I<0, Pseudo,
- (outs RFP32:$dst), (ins RFP32:$src1, RFP32:$src2, i8imm:$cond),
+ (outs RFP32:$dst),
+ (ins RFP32:$src1, RFP32:$src2, i8imm:$cond),
"#CMOV_RFP32 PSEUDO!",
- [(set RFP32:$dst, (X86cmov RFP32:$src1, RFP32:$src2, imm:$cond,
+ [(set RFP32:$dst,
+ (X86cmov RFP32:$src1, RFP32:$src2, imm:$cond,
EFLAGS))]>;
def CMOV_RFP64 : I<0, Pseudo,
- (outs RFP64:$dst), (ins RFP64:$src1, RFP64:$src2, i8imm:$cond),
+ (outs RFP64:$dst),
+ (ins RFP64:$src1, RFP64:$src2, i8imm:$cond),
"#CMOV_RFP64 PSEUDO!",
- [(set RFP64:$dst, (X86cmov RFP64:$src1, RFP64:$src2, imm:$cond,
+ [(set RFP64:$dst,
+ (X86cmov RFP64:$src1, RFP64:$src2, imm:$cond,
EFLAGS))]>;
def CMOV_RFP80 : I<0, Pseudo,
- (outs RFP80:$dst), (ins RFP80:$src1, RFP80:$src2, i8imm:$cond),
+ (outs RFP80:$dst),
+ (ins RFP80:$src1, RFP80:$src2, i8imm:$cond),
"#CMOV_RFP80 PSEUDO!",
- [(set RFP80:$dst, (X86cmov RFP80:$src1, RFP80:$src2, imm:$cond,
+ [(set RFP80:$dst,
+ (X86cmov RFP80:$src1, RFP80:$src2, imm:$cond,
EFLAGS))]>;
} // Predicates = [NoCMov]
-} // UsesCustomInserter = 1, isTwoAddress = 0, Defs = [EFLAGS]
+} // UsesCustomInserter = 1, Constraints = "", Defs = [EFLAGS]
} // Uses = [EFLAGS]
// unary instructions
let CodeSize = 2 in {
let Defs = [EFLAGS] in {
-def NEG8r : I<0xF6, MRM3r, (outs GR8 :$dst), (ins GR8 :$src), "neg{b}\t$dst",
- [(set GR8:$dst, (ineg GR8:$src)),
+def NEG8r : I<0xF6, MRM3r, (outs GR8 :$dst), (ins GR8 :$src1),
+ "neg{b}\t$dst",
+ [(set GR8:$dst, (ineg GR8:$src1)),
(implicit EFLAGS)]>;
-def NEG16r : I<0xF7, MRM3r, (outs GR16:$dst), (ins GR16:$src), "neg{w}\t$dst",
- [(set GR16:$dst, (ineg GR16:$src)),
+def NEG16r : I<0xF7, MRM3r, (outs GR16:$dst), (ins GR16:$src1),
+ "neg{w}\t$dst",
+ [(set GR16:$dst, (ineg GR16:$src1)),
(implicit EFLAGS)]>, OpSize;
-def NEG32r : I<0xF7, MRM3r, (outs GR32:$dst), (ins GR32:$src), "neg{l}\t$dst",
- [(set GR32:$dst, (ineg GR32:$src)),
+def NEG32r : I<0xF7, MRM3r, (outs GR32:$dst), (ins GR32:$src1),
+ "neg{l}\t$dst",
+ [(set GR32:$dst, (ineg GR32:$src1)),
(implicit EFLAGS)]>;
-let isTwoAddress = 0 in {
- def NEG8m : I<0xF6, MRM3m, (outs), (ins i8mem :$dst), "neg{b}\t$dst",
+
+let Constraints = "" in {
+ def NEG8m : I<0xF6, MRM3m, (outs), (ins i8mem :$dst),
+ "neg{b}\t$dst",
[(store (ineg (loadi8 addr:$dst)), addr:$dst),
(implicit EFLAGS)]>;
- def NEG16m : I<0xF7, MRM3m, (outs), (ins i16mem:$dst), "neg{w}\t$dst",
+ def NEG16m : I<0xF7, MRM3m, (outs), (ins i16mem:$dst),
+ "neg{w}\t$dst",
[(store (ineg (loadi16 addr:$dst)), addr:$dst),
(implicit EFLAGS)]>, OpSize;
- def NEG32m : I<0xF7, MRM3m, (outs), (ins i32mem:$dst), "neg{l}\t$dst",
+ def NEG32m : I<0xF7, MRM3m, (outs), (ins i32mem:$dst),
+ "neg{l}\t$dst",
[(store (ineg (loadi32 addr:$dst)), addr:$dst),
(implicit EFLAGS)]>;
-}
+} // Constraints = ""
} // Defs = [EFLAGS]
// Match xor -1 to not. Favors these over a move imm + xor to save code size.
let AddedComplexity = 15 in {
-def NOT8r : I<0xF6, MRM2r, (outs GR8 :$dst), (ins GR8 :$src), "not{b}\t$dst",
- [(set GR8:$dst, (not GR8:$src))]>;
-def NOT16r : I<0xF7, MRM2r, (outs GR16:$dst), (ins GR16:$src), "not{w}\t$dst",
- [(set GR16:$dst, (not GR16:$src))]>, OpSize;
-def NOT32r : I<0xF7, MRM2r, (outs GR32:$dst), (ins GR32:$src), "not{l}\t$dst",
- [(set GR32:$dst, (not GR32:$src))]>;
+def NOT8r : I<0xF6, MRM2r, (outs GR8 :$dst), (ins GR8 :$src1),
+ "not{b}\t$dst",
+ [(set GR8:$dst, (not GR8:$src1))]>;
+def NOT16r : I<0xF7, MRM2r, (outs GR16:$dst), (ins GR16:$src1),
+ "not{w}\t$dst",
+ [(set GR16:$dst, (not GR16:$src1))]>, OpSize;
+def NOT32r : I<0xF7, MRM2r, (outs GR32:$dst), (ins GR32:$src1),
+ "not{l}\t$dst",
+ [(set GR32:$dst, (not GR32:$src1))]>;
}
-let isTwoAddress = 0 in {
- def NOT8m : I<0xF6, MRM2m, (outs), (ins i8mem :$dst), "not{b}\t$dst",
+let Constraints = "" in {
+ def NOT8m : I<0xF6, MRM2m, (outs), (ins i8mem :$dst),
+ "not{b}\t$dst",
[(store (not (loadi8 addr:$dst)), addr:$dst)]>;
- def NOT16m : I<0xF7, MRM2m, (outs), (ins i16mem:$dst), "not{w}\t$dst",
+ def NOT16m : I<0xF7, MRM2m, (outs), (ins i16mem:$dst),
+ "not{w}\t$dst",
[(store (not (loadi16 addr:$dst)), addr:$dst)]>, OpSize;
- def NOT32m : I<0xF7, MRM2m, (outs), (ins i32mem:$dst), "not{l}\t$dst",
+ def NOT32m : I<0xF7, MRM2m, (outs), (ins i32mem:$dst),
+ "not{l}\t$dst",
[(store (not (loadi32 addr:$dst)), addr:$dst)]>;
-}
+} // Constraints = ""
} // CodeSize
// TODO: inc/dec is slow for P4, but fast for Pentium-M.
let Defs = [EFLAGS] in {
let CodeSize = 2 in
-def INC8r : I<0xFE, MRM0r, (outs GR8 :$dst), (ins GR8 :$src), "inc{b}\t$dst",
- [(set GR8:$dst, EFLAGS, (X86inc_flag GR8:$src))]>;
+def INC8r : I<0xFE, MRM0r, (outs GR8 :$dst), (ins GR8 :$src1),
+ "inc{b}\t$dst",
+ [(set GR8:$dst, EFLAGS, (X86inc_flag GR8:$src1))]>;
let isConvertibleToThreeAddress = 1, CodeSize = 1 in { // Can xform into LEA.
-def INC16r : I<0x40, AddRegFrm, (outs GR16:$dst), (ins GR16:$src),
+def INC16r : I<0x40, AddRegFrm, (outs GR16:$dst), (ins GR16:$src1),
"inc{w}\t$dst",
- [(set GR16:$dst, EFLAGS, (X86inc_flag GR16:$src))]>,
+ [(set GR16:$dst, EFLAGS, (X86inc_flag GR16:$src1))]>,
OpSize, Requires<[In32BitMode]>;
-def INC32r : I<0x40, AddRegFrm, (outs GR32:$dst), (ins GR32:$src),
+def INC32r : I<0x40, AddRegFrm, (outs GR32:$dst), (ins GR32:$src1),
"inc{l}\t$dst",
- [(set GR32:$dst, EFLAGS, (X86inc_flag GR32:$src))]>,
+ [(set GR32:$dst, EFLAGS, (X86inc_flag GR32:$src1))]>,
Requires<[In32BitMode]>;
}
-let isTwoAddress = 0, CodeSize = 2 in {
+let Constraints = "", CodeSize = 2 in {
def INC8m : I<0xFE, MRM0m, (outs), (ins i8mem :$dst), "inc{b}\t$dst",
[(store (add (loadi8 addr:$dst), 1), addr:$dst),
(implicit EFLAGS)]>;
@@ -1750,23 +1781,24 @@ let isTwoAddress = 0, CodeSize = 2 in {
[(store (add (loadi32 addr:$dst), 1), addr:$dst),
(implicit EFLAGS)]>,
Requires<[In32BitMode]>;
-}
+} // Constraints = "", CodeSize = 2
let CodeSize = 2 in
-def DEC8r : I<0xFE, MRM1r, (outs GR8 :$dst), (ins GR8 :$src), "dec{b}\t$dst",
- [(set GR8:$dst, EFLAGS, (X86dec_flag GR8:$src))]>;
+def DEC8r : I<0xFE, MRM1r, (outs GR8 :$dst), (ins GR8 :$src1),
+ "dec{b}\t$dst",
+ [(set GR8:$dst, EFLAGS, (X86dec_flag GR8:$src1))]>;
let isConvertibleToThreeAddress = 1, CodeSize = 1 in { // Can xform into LEA.
-def DEC16r : I<0x48, AddRegFrm, (outs GR16:$dst), (ins GR16:$src),
+def DEC16r : I<0x48, AddRegFrm, (outs GR16:$dst), (ins GR16:$src1),
"dec{w}\t$dst",
- [(set GR16:$dst, EFLAGS, (X86dec_flag GR16:$src))]>,
+ [(set GR16:$dst, EFLAGS, (X86dec_flag GR16:$src1))]>,
OpSize, Requires<[In32BitMode]>;
-def DEC32r : I<0x48, AddRegFrm, (outs GR32:$dst), (ins GR32:$src),
+def DEC32r : I<0x48, AddRegFrm, (outs GR32:$dst), (ins GR32:$src1),
"dec{l}\t$dst",
- [(set GR32:$dst, EFLAGS, (X86dec_flag GR32:$src))]>,
+ [(set GR32:$dst, EFLAGS, (X86dec_flag GR32:$src1))]>,
Requires<[In32BitMode]>;
-}
+} // CodeSize = 2
-let isTwoAddress = 0, CodeSize = 2 in {
+let Constraints = "", CodeSize = 2 in {
def DEC8m : I<0xFE, MRM1m, (outs), (ins i8mem :$dst), "dec{b}\t$dst",
[(store (add (loadi8 addr:$dst), -1), addr:$dst),
(implicit EFLAGS)]>;
@@ -1778,7 +1810,7 @@ let isTwoAddress = 0, CodeSize = 2 in {
[(store (add (loadi32 addr:$dst), -1), addr:$dst),
(implicit EFLAGS)]>,
Requires<[In32BitMode]>;
-}
+} // Constraints = "", CodeSize = 2
} // Defs = [EFLAGS]
// Logical operators...
@@ -1857,7 +1889,7 @@ def AND32ri8 : Ii8<0x83, MRM4r,
[(set GR32:$dst, EFLAGS, (X86and_flag GR32:$src1,
i32immSExt8:$src2))]>;
-let isTwoAddress = 0 in {
+let Constraints = "" in {
def AND8mr : I<0x20, MRMDestMem,
(outs), (ins i8mem :$dst, GR8 :$src),
"and{b}\t{$src, $dst|$dst, $src}",
@@ -1909,7 +1941,7 @@ let isTwoAddress = 0 in {
def AND32i32 : Ii32<0x25, RawFrm, (outs), (ins i32imm:$src),
"and{l}\t{$src, %eax|%eax, $src}", []>;
-}
+} // Constraints = ""
let isCommutable = 1 in { // X = OR Y, Z --> X = OR Z, Y
@@ -1983,7 +2015,7 @@ def OR32ri8 : Ii8<0x83, MRM1r, (outs GR32:$dst),
"or{l}\t{$src2, $dst|$dst, $src2}",
[(set GR32:$dst, EFLAGS, (X86or_flag GR32:$src1,
i32immSExt8:$src2))]>;
-let isTwoAddress = 0 in {
+let Constraints = "" in {
def OR8mr : I<0x08, MRMDestMem, (outs), (ins i8mem:$dst, GR8:$src),
"or{b}\t{$src, $dst|$dst, $src}",
[(store (or (load addr:$dst), GR8:$src), addr:$dst),
@@ -2025,7 +2057,7 @@ let isTwoAddress = 0 in {
"or{w}\t{$src, %ax|%ax, $src}", []>, OpSize;
def OR32i32 : Ii32 <0x0D, RawFrm, (outs), (ins i32imm:$src),
"or{l}\t{$src, %eax|%eax, $src}", []>;
-} // isTwoAddress = 0
+} // Constraints = ""
let isCommutable = 1 in { // X = XOR Y, Z --> X = XOR Z, Y
@@ -2102,7 +2134,7 @@ def XOR32ri8 : Ii8<0x83, MRM6r,
[(set GR32:$dst, EFLAGS, (X86xor_flag GR32:$src1,
i32immSExt8:$src2))]>;
-let isTwoAddress = 0 in {
+let Constraints = "" in {
def XOR8mr : I<0x30, MRMDestMem,
(outs), (ins i8mem :$dst, GR8 :$src),
"xor{b}\t{$src, $dst|$dst, $src}",
@@ -2153,26 +2185,27 @@ let isTwoAddress = 0 in {
"xor{w}\t{$src, %ax|%ax, $src}", []>, OpSize;
def XOR32i32 : Ii32<0x35, RawFrm, (outs), (ins i32imm:$src),
"xor{l}\t{$src, %eax|%eax, $src}", []>;
-} // isTwoAddress = 0
+} // Constraints = ""
} // Defs = [EFLAGS]
// Shift instructions
let Defs = [EFLAGS] in {
let Uses = [CL] in {
-def SHL8rCL : I<0xD2, MRM4r, (outs GR8 :$dst), (ins GR8 :$src),
+def SHL8rCL : I<0xD2, MRM4r, (outs GR8 :$dst), (ins GR8 :$src1),
"shl{b}\t{%cl, $dst|$dst, CL}",
- [(set GR8:$dst, (shl GR8:$src, CL))]>;
-def SHL16rCL : I<0xD3, MRM4r, (outs GR16:$dst), (ins GR16:$src),
+ [(set GR8:$dst, (shl GR8:$src1, CL))]>;
+def SHL16rCL : I<0xD3, MRM4r, (outs GR16:$dst), (ins GR16:$src1),
"shl{w}\t{%cl, $dst|$dst, CL}",
- [(set GR16:$dst, (shl GR16:$src, CL))]>, OpSize;
-def SHL32rCL : I<0xD3, MRM4r, (outs GR32:$dst), (ins GR32:$src),
+ [(set GR16:$dst, (shl GR16:$src1, CL))]>, OpSize;
+def SHL32rCL : I<0xD3, MRM4r, (outs GR32:$dst), (ins GR32:$src1),
"shl{l}\t{%cl, $dst|$dst, CL}",
- [(set GR32:$dst, (shl GR32:$src, CL))]>;
+ [(set GR32:$dst, (shl GR32:$src1, CL))]>;
} // Uses = [CL]
def SHL8ri : Ii8<0xC0, MRM4r, (outs GR8 :$dst), (ins GR8 :$src1, i8imm:$src2),
"shl{b}\t{$src2, $dst|$dst, $src2}",
[(set GR8:$dst, (shl GR8:$src1, (i8 imm:$src2)))]>;
+
let isConvertibleToThreeAddress = 1 in { // Can transform into LEA.
def SHL16ri : Ii8<0xC1, MRM4r, (outs GR16:$dst), (ins GR16:$src1, i8imm:$src2),
"shl{w}\t{$src2, $dst|$dst, $src2}",
@@ -2193,7 +2226,7 @@ def SHL32r1 : I<0xD1, MRM4r, (outs GR32:$dst), (ins GR32:$src1),
} // isConvertibleToThreeAddress = 1
-let isTwoAddress = 0 in {
+let Constraints = "" in {
let Uses = [CL] in {
def SHL8mCL : I<0xD2, MRM4m, (outs), (ins i8mem :$dst),
"shl{b}\t{%cl, $dst|$dst, CL}",
@@ -2227,18 +2260,18 @@ let isTwoAddress = 0 in {
def SHL32m1 : I<0xD1, MRM4m, (outs), (ins i32mem:$dst),
"shl{l}\t$dst",
[(store (shl (loadi32 addr:$dst), (i8 1)), addr:$dst)]>;
-}
+} // Constraints = ""
let Uses = [CL] in {
-def SHR8rCL : I<0xD2, MRM5r, (outs GR8 :$dst), (ins GR8 :$src),
+def SHR8rCL : I<0xD2, MRM5r, (outs GR8 :$dst), (ins GR8 :$src1),
"shr{b}\t{%cl, $dst|$dst, CL}",
- [(set GR8:$dst, (srl GR8:$src, CL))]>;
-def SHR16rCL : I<0xD3, MRM5r, (outs GR16:$dst), (ins GR16:$src),
+ [(set GR8:$dst, (srl GR8:$src1, CL))]>;
+def SHR16rCL : I<0xD3, MRM5r, (outs GR16:$dst), (ins GR16:$src1),
"shr{w}\t{%cl, $dst|$dst, CL}",
- [(set GR16:$dst, (srl GR16:$src, CL))]>, OpSize;
-def SHR32rCL : I<0xD3, MRM5r, (outs GR32:$dst), (ins GR32:$src),
+ [(set GR16:$dst, (srl GR16:$src1, CL))]>, OpSize;
+def SHR32rCL : I<0xD3, MRM5r, (outs GR32:$dst), (ins GR32:$src1),
"shr{l}\t{%cl, $dst|$dst, CL}",
- [(set GR32:$dst, (srl GR32:$src, CL))]>;
+ [(set GR32:$dst, (srl GR32:$src1, CL))]>;
}
def SHR8ri : Ii8<0xC0, MRM5r, (outs GR8:$dst), (ins GR8:$src1, i8imm:$src2),
@@ -2262,7 +2295,7 @@ def SHR32r1 : I<0xD1, MRM5r, (outs GR32:$dst), (ins GR32:$src1),
"shr{l}\t$dst",
[(set GR32:$dst, (srl GR32:$src1, (i8 1)))]>;
-let isTwoAddress = 0 in {
+let Constraints = "" in {
let Uses = [CL] in {
def SHR8mCL : I<0xD2, MRM5m, (outs), (ins i8mem :$dst),
"shr{b}\t{%cl, $dst|$dst, CL}",
@@ -2296,18 +2329,18 @@ let isTwoAddress = 0 in {
def SHR32m1 : I<0xD1, MRM5m, (outs), (ins i32mem:$dst),
"shr{l}\t$dst",
[(store (srl (loadi32 addr:$dst), (i8 1)), addr:$dst)]>;
-}
+} // Constraints = ""
let Uses = [CL] in {
-def SAR8rCL : I<0xD2, MRM7r, (outs GR8 :$dst), (ins GR8 :$src),
+def SAR8rCL : I<0xD2, MRM7r, (outs GR8 :$dst), (ins GR8 :$src1),
"sar{b}\t{%cl, $dst|$dst, CL}",
- [(set GR8:$dst, (sra GR8:$src, CL))]>;
-def SAR16rCL : I<0xD3, MRM7r, (outs GR16:$dst), (ins GR16:$src),
+ [(set GR8:$dst, (sra GR8:$src1, CL))]>;
+def SAR16rCL : I<0xD3, MRM7r, (outs GR16:$dst), (ins GR16:$src1),
"sar{w}\t{%cl, $dst|$dst, CL}",
- [(set GR16:$dst, (sra GR16:$src, CL))]>, OpSize;
-def SAR32rCL : I<0xD3, MRM7r, (outs GR32:$dst), (ins GR32:$src),
+ [(set GR16:$dst, (sra GR16:$src1, CL))]>, OpSize;
+def SAR32rCL : I<0xD3, MRM7r, (outs GR32:$dst), (ins GR32:$src1),
"sar{l}\t{%cl, $dst|$dst, CL}",
- [(set GR32:$dst, (sra GR32:$src, CL))]>;
+ [(set GR32:$dst, (sra GR32:$src1, CL))]>;
}
def SAR8ri : Ii8<0xC0, MRM7r, (outs GR8 :$dst), (ins GR8 :$src1, i8imm:$src2),
@@ -2332,7 +2365,7 @@ def SAR32r1 : I<0xD1, MRM7r, (outs GR32:$dst), (ins GR32:$src1),
"sar{l}\t$dst",
[(set GR32:$dst, (sra GR32:$src1, (i8 1)))]>;
-let isTwoAddress = 0 in {
+let Constraints = "" in {
let Uses = [CL] in {
def SAR8mCL : I<0xD2, MRM7m, (outs), (ins i8mem :$dst),
"sar{b}\t{%cl, $dst|$dst, CL}",
@@ -2366,65 +2399,65 @@ let isTwoAddress = 0 in {
def SAR32m1 : I<0xD1, MRM7m, (outs), (ins i32mem:$dst),
"sar{l}\t$dst",
[(store (sra (loadi32 addr:$dst), (i8 1)), addr:$dst)]>;
-}
+} // Constraints = ""
// Rotate instructions
-def RCL8r1 : I<0xD0, MRM2r, (outs GR8:$dst), (ins GR8:$src),
+def RCL8r1 : I<0xD0, MRM2r, (outs GR8:$dst), (ins GR8:$src1),
"rcl{b}\t{1, $dst|$dst, 1}", []>;
let Uses = [CL] in {
-def RCL8rCL : I<0xD2, MRM2r, (outs GR8:$dst), (ins GR8:$src),
+def RCL8rCL : I<0xD2, MRM2r, (outs GR8:$dst), (ins GR8:$src1),
"rcl{b}\t{%cl, $dst|$dst, CL}", []>;
}
-def RCL8ri : Ii8<0xC0, MRM2r, (outs GR8:$dst), (ins GR8:$src, i8imm:$cnt),
+def RCL8ri : Ii8<0xC0, MRM2r, (outs GR8:$dst), (ins GR8:$src1, i8imm:$cnt),
"rcl{b}\t{$cnt, $dst|$dst, $cnt}", []>;
-def RCL16r1 : I<0xD1, MRM2r, (outs GR16:$dst), (ins GR16:$src),
+def RCL16r1 : I<0xD1, MRM2r, (outs GR16:$dst), (ins GR16:$src1),
"rcl{w}\t{1, $dst|$dst, 1}", []>, OpSize;
let Uses = [CL] in {
-def RCL16rCL : I<0xD3, MRM2r, (outs GR16:$dst), (ins GR16:$src),
+def RCL16rCL : I<0xD3, MRM2r, (outs GR16:$dst), (ins GR16:$src1),
"rcl{w}\t{%cl, $dst|$dst, CL}", []>, OpSize;
}
-def RCL16ri : Ii8<0xC1, MRM2r, (outs GR16:$dst), (ins GR16:$src, i8imm:$cnt),
+def RCL16ri : Ii8<0xC1, MRM2r, (outs GR16:$dst), (ins GR16:$src1, i8imm:$cnt),
"rcl{w}\t{$cnt, $dst|$dst, $cnt}", []>, OpSize;
-def RCL32r1 : I<0xD1, MRM2r, (outs GR32:$dst), (ins GR32:$src),
+def RCL32r1 : I<0xD1, MRM2r, (outs GR32:$dst), (ins GR32:$src1),
"rcl{l}\t{1, $dst|$dst, 1}", []>;
let Uses = [CL] in {
-def RCL32rCL : I<0xD3, MRM2r, (outs GR32:$dst), (ins GR32:$src),
+def RCL32rCL : I<0xD3, MRM2r, (outs GR32:$dst), (ins GR32:$src1),
"rcl{l}\t{%cl, $dst|$dst, CL}", []>;
}
-def RCL32ri : Ii8<0xC1, MRM2r, (outs GR32:$dst), (ins GR32:$src, i8imm:$cnt),
+def RCL32ri : Ii8<0xC1, MRM2r, (outs GR32:$dst), (ins GR32:$src1, i8imm:$cnt),
"rcl{l}\t{$cnt, $dst|$dst, $cnt}", []>;
-def RCR8r1 : I<0xD0, MRM3r, (outs GR8:$dst), (ins GR8:$src),
+def RCR8r1 : I<0xD0, MRM3r, (outs GR8:$dst), (ins GR8:$src1),
"rcr{b}\t{1, $dst|$dst, 1}", []>;
let Uses = [CL] in {
-def RCR8rCL : I<0xD2, MRM3r, (outs GR8:$dst), (ins GR8:$src),
+def RCR8rCL : I<0xD2, MRM3r, (outs GR8:$dst), (ins GR8:$src1),
"rcr{b}\t{%cl, $dst|$dst, CL}", []>;
}
-def RCR8ri : Ii8<0xC0, MRM3r, (outs GR8:$dst), (ins GR8:$src, i8imm:$cnt),
+def RCR8ri : Ii8<0xC0, MRM3r, (outs GR8:$dst), (ins GR8:$src1, i8imm:$cnt),
"rcr{b}\t{$cnt, $dst|$dst, $cnt}", []>;
-def RCR16r1 : I<0xD1, MRM3r, (outs GR16:$dst), (ins GR16:$src),
+def RCR16r1 : I<0xD1, MRM3r, (outs GR16:$dst), (ins GR16:$src1),
"rcr{w}\t{1, $dst|$dst, 1}", []>, OpSize;
let Uses = [CL] in {
-def RCR16rCL : I<0xD3, MRM3r, (outs GR16:$dst), (ins GR16:$src),
+def RCR16rCL : I<0xD3, MRM3r, (outs GR16:$dst), (ins GR16:$src1),
"rcr{w}\t{%cl, $dst|$dst, CL}", []>, OpSize;
}
-def RCR16ri : Ii8<0xC1, MRM3r, (outs GR16:$dst), (ins GR16:$src, i8imm:$cnt),
+def RCR16ri : Ii8<0xC1, MRM3r, (outs GR16:$dst), (ins GR16:$src1, i8imm:$cnt),
"rcr{w}\t{$cnt, $dst|$dst, $cnt}", []>, OpSize;
-def RCR32r1 : I<0xD1, MRM3r, (outs GR32:$dst), (ins GR32:$src),
+def RCR32r1 : I<0xD1, MRM3r, (outs GR32:$dst), (ins GR32:$src1),
"rcr{l}\t{1, $dst|$dst, 1}", []>;
let Uses = [CL] in {
-def RCR32rCL : I<0xD3, MRM3r, (outs GR32:$dst), (ins GR32:$src),
+def RCR32rCL : I<0xD3, MRM3r, (outs GR32:$dst), (ins GR32:$src1),
"rcr{l}\t{%cl, $dst|$dst, CL}", []>;
}
-def RCR32ri : Ii8<0xC1, MRM3r, (outs GR32:$dst), (ins GR32:$src, i8imm:$cnt),
+def RCR32ri : Ii8<0xC1, MRM3r, (outs GR32:$dst), (ins GR32:$src1, i8imm:$cnt),
"rcr{l}\t{$cnt, $dst|$dst, $cnt}", []>;
-let isTwoAddress = 0 in {
+let Constraints = "" in {
def RCL8m1 : I<0xD0, MRM2m, (outs), (ins i8mem:$dst),
"rcl{b}\t{1, $dst|$dst, 1}", []>;
def RCL8mi : Ii8<0xC0, MRM2m, (outs), (ins i8mem:$dst, i8imm:$cnt),
@@ -2464,19 +2497,19 @@ def RCR16mCL : I<0xD3, MRM3m, (outs), (ins i16mem:$dst),
def RCR32mCL : I<0xD3, MRM3m, (outs), (ins i32mem:$dst),
"rcr{l}\t{%cl, $dst|$dst, CL}", []>;
}
-}
+} // Constraints = ""
// FIXME: provide shorter instructions when imm8 == 1
let Uses = [CL] in {
-def ROL8rCL : I<0xD2, MRM0r, (outs GR8 :$dst), (ins GR8 :$src),
+def ROL8rCL : I<0xD2, MRM0r, (outs GR8 :$dst), (ins GR8 :$src1),
"rol{b}\t{%cl, $dst|$dst, CL}",
- [(set GR8:$dst, (rotl GR8:$src, CL))]>;
-def ROL16rCL : I<0xD3, MRM0r, (outs GR16:$dst), (ins GR16:$src),
+ [(set GR8:$dst, (rotl GR8:$src1, CL))]>;
+def ROL16rCL : I<0xD3, MRM0r, (outs GR16:$dst), (ins GR16:$src1),
"rol{w}\t{%cl, $dst|$dst, CL}",
- [(set GR16:$dst, (rotl GR16:$src, CL))]>, OpSize;
-def ROL32rCL : I<0xD3, MRM0r, (outs GR32:$dst), (ins GR32:$src),
+ [(set GR16:$dst, (rotl GR16:$src1, CL))]>, OpSize;
+def ROL32rCL : I<0xD3, MRM0r, (outs GR32:$dst), (ins GR32:$src1),
"rol{l}\t{%cl, $dst|$dst, CL}",
- [(set GR32:$dst, (rotl GR32:$src, CL))]>;
+ [(set GR32:$dst, (rotl GR32:$src1, CL))]>;
}
def ROL8ri : Ii8<0xC0, MRM0r, (outs GR8 :$dst), (ins GR8 :$src1, i8imm:$src2),
@@ -2501,7 +2534,7 @@ def ROL32r1 : I<0xD1, MRM0r, (outs GR32:$dst), (ins GR32:$src1),
"rol{l}\t$dst",
[(set GR32:$dst, (rotl GR32:$src1, (i8 1)))]>;
-let isTwoAddress = 0 in {
+let Constraints = "" in {
let Uses = [CL] in {
def ROL8mCL : I<0xD2, MRM0m, (outs), (ins i8mem :$dst),
"rol{b}\t{%cl, $dst|$dst, CL}",
@@ -2535,18 +2568,18 @@ let isTwoAddress = 0 in {
def ROL32m1 : I<0xD1, MRM0m, (outs), (ins i32mem:$dst),
"rol{l}\t$dst",
[(store (rotl (loadi32 addr:$dst), (i8 1)), addr:$dst)]>;
-}
+} // Constraints = ""
let Uses = [CL] in {
-def ROR8rCL : I<0xD2, MRM1r, (outs GR8 :$dst), (ins GR8 :$src),
+def ROR8rCL : I<0xD2, MRM1r, (outs GR8 :$dst), (ins GR8 :$src1),
"ror{b}\t{%cl, $dst|$dst, CL}",
- [(set GR8:$dst, (rotr GR8:$src, CL))]>;
-def ROR16rCL : I<0xD3, MRM1r, (outs GR16:$dst), (ins GR16:$src),
+ [(set GR8:$dst, (rotr GR8:$src1, CL))]>;
+def ROR16rCL : I<0xD3, MRM1r, (outs GR16:$dst), (ins GR16:$src1),
"ror{w}\t{%cl, $dst|$dst, CL}",
- [(set GR16:$dst, (rotr GR16:$src, CL))]>, OpSize;
-def ROR32rCL : I<0xD3, MRM1r, (outs GR32:$dst), (ins GR32:$src),
+ [(set GR16:$dst, (rotr GR16:$src1, CL))]>, OpSize;
+def ROR32rCL : I<0xD3, MRM1r, (outs GR32:$dst), (ins GR32:$src1),
"ror{l}\t{%cl, $dst|$dst, CL}",
- [(set GR32:$dst, (rotr GR32:$src, CL))]>;
+ [(set GR32:$dst, (rotr GR32:$src1, CL))]>;
}
def ROR8ri : Ii8<0xC0, MRM1r, (outs GR8 :$dst), (ins GR8 :$src1, i8imm:$src2),
@@ -2571,7 +2604,7 @@ def ROR32r1 : I<0xD1, MRM1r, (outs GR32:$dst), (ins GR32:$src1),
"ror{l}\t$dst",
[(set GR32:$dst, (rotr GR32:$src1, (i8 1)))]>;
-let isTwoAddress = 0 in {
+let Constraints = "" in {
let Uses = [CL] in {
def ROR8mCL : I<0xD2, MRM1m, (outs), (ins i8mem :$dst),
"ror{b}\t{%cl, $dst|$dst, CL}",
@@ -2605,8 +2638,7 @@ let isTwoAddress = 0 in {
def ROR32m1 : I<0xD1, MRM1m, (outs), (ins i32mem:$dst),
"ror{l}\t$dst",
[(store (rotr (loadi32 addr:$dst), (i8 1)), addr:$dst)]>;
-}
-
+} // Constraints = ""
// Double shift instructions (generalizations of rotate)
@@ -2662,7 +2694,7 @@ def SHRD16rri8 : Ii8<0xAC, MRMDestReg,
TB, OpSize;
}
-let isTwoAddress = 0 in {
+let Constraints = "" in {
let Uses = [CL] in {
def SHLD32mrCL : I<0xA5, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src2),
"shld{l}\t{%cl, $src2, $dst|$dst, $src2, CL}",
@@ -2708,7 +2740,7 @@ let isTwoAddress = 0 in {
[(store (X86shrd (loadi16 addr:$dst), GR16:$src2,
(i8 imm:$src3)), addr:$dst)]>,
TB, OpSize;
-}
+} // Constraints = ""
} // Defs = [EFLAGS]
@@ -2794,7 +2826,7 @@ def ADD32ri8 : Ii8<0x83, MRM0r, (outs GR32:$dst),
(X86add_flag GR32:$src1, i32immSExt8:$src2))]>;
}
-let isTwoAddress = 0 in {
+let Constraints = "" in {
// Memory-Register Addition
def ADD8mr : I<0x00, MRMDestMem, (outs), (ins i8mem:$dst, GR8:$src2),
"add{b}\t{$src2, $dst|$dst, $src2}",
@@ -2838,7 +2870,7 @@ let isTwoAddress = 0 in {
"add{w}\t{$src, %ax|%ax, $src}", []>, OpSize;
def ADD32i32 : Ii32<0x05, RawFrm, (outs), (ins i32imm:$src),
"add{l}\t{$src, %eax|%eax, $src}", []>;
-}
+} // Constraints = ""
let Uses = [EFLAGS] in {
let isCommutable = 1 in { // X = ADC Y, Z --> X = ADC Z, Y
@@ -2900,7 +2932,7 @@ def ADC32ri8 : Ii8<0x83, MRM2r, (outs GR32:$dst),
"adc{l}\t{$src2, $dst|$dst, $src2}",
[(set GR32:$dst, (adde GR32:$src1, i32immSExt8:$src2))]>;
-let isTwoAddress = 0 in {
+let Constraints = "" in {
def ADC8mr : I<0x10, MRMDestMem, (outs), (ins i8mem:$dst, GR8:$src2),
"adc{b}\t{$src2, $dst|$dst, $src2}",
[(store (adde (load addr:$dst), GR8:$src2), addr:$dst)]>;
@@ -2935,7 +2967,7 @@ let isTwoAddress = 0 in {
"adc{w}\t{$src, %ax|%ax, $src}", []>, OpSize;
def ADC32i32 : Ii32<0x15, RawFrm, (outs), (ins i32imm:$src),
"adc{l}\t{$src, %eax|%eax, $src}", []>;
-}
+} // Constraints = ""
} // Uses = [EFLAGS]
// Register-Register Subtraction
@@ -3007,7 +3039,7 @@ def SUB32ri8 : Ii8<0x83, MRM5r, (outs GR32:$dst),
[(set GR32:$dst, EFLAGS,
(X86sub_flag GR32:$src1, i32immSExt8:$src2))]>;
-let isTwoAddress = 0 in {
+let Constraints = "" in {
// Memory-Register Subtraction
def SUB8mr : I<0x28, MRMDestMem, (outs), (ins i8mem :$dst, GR8 :$src2),
"sub{b}\t{$src2, $dst|$dst, $src2}",
@@ -3052,7 +3084,7 @@ let isTwoAddress = 0 in {
"sub{w}\t{$src, %ax|%ax, $src}", []>, OpSize;
def SUB32i32 : Ii32<0x2D, RawFrm, (outs), (ins i32imm:$src),
"sub{l}\t{$src, %eax|%eax, $src}", []>;
-}
+} // Constraints = ""
let Uses = [EFLAGS] in {
def SBB8rr : I<0x18, MRMDestReg, (outs GR8:$dst),
@@ -3068,7 +3100,7 @@ def SBB32rr : I<0x19, MRMDestReg, (outs GR32:$dst),
"sbb{l}\t{$src2, $dst|$dst, $src2}",
[(set GR32:$dst, (sube GR32:$src1, GR32:$src2))]>;
-let isTwoAddress = 0 in {
+let Constraints = "" in {
def SBB8mr : I<0x18, MRMDestMem, (outs), (ins i8mem:$dst, GR8:$src2),
"sbb{b}\t{$src2, $dst|$dst, $src2}",
[(store (sube (load addr:$dst), GR8:$src2), addr:$dst)]>;
@@ -3103,7 +3135,7 @@ let isTwoAddress = 0 in {
"sbb{w}\t{$src, %ax|%ax, $src}", []>, OpSize;
def SBB32i32 : Ii32<0x1D, RawFrm, (outs), (ins i32imm:$src),
"sbb{l}\t{$src, %eax|%eax, $src}", []>;
-}
+} // Constraints = ""
let isCodeGenOnly = 1 in {
def SBB8rr_REV : I<0x1A, MRMSrcReg, (outs GR8:$dst), (ins GR8:$src1, GR8:$src2),
@@ -3811,6 +3843,7 @@ def MOV32r0 : I<0x31, MRMInitReg, (outs GR32:$dst), (ins), "",
// Thread Local Storage Instructions
//
+// ELF TLS Support
// All calls clobber the non-callee saved registers. ESP is marked as
// a use to prevent stack-pointer assignments that appear immediately
// before calls from potentially appearing dead.
@@ -3819,12 +3852,24 @@ let Defs = [EAX, ECX, EDX, FP0, FP1, FP2, FP3, FP4, FP5, FP6, ST0,
XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7,
XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15, EFLAGS],
Uses = [ESP] in
-def TLS_addr32 : I<0, Pseudo, (outs), (ins lea32mem:$sym),
+def TLS_addr32 : I<0, Pseudo, (outs), (ins i32mem:$sym),
"leal\t$sym, %eax; "
"call\t___tls_get_addr@PLT",
[(X86tlsaddr tls32addr:$sym)]>,
Requires<[In32BitMode]>;
+// Darwin TLS Support
+// For i386, the address of the thunk is passed on the stack, on return the
+// address of the variable is in %eax. %ecx is trashed during the function
+// call. All other registers are preserved.
+let Defs = [EAX, ECX],
+ Uses = [ESP],
+ usesCustomInserter = 1 in
+def TLSCall_32 : I<0, Pseudo, (outs), (ins i32mem:$sym),
+ "# TLSCall_32",
+ [(X86TLSCall addr:$sym)]>,
+ Requires<[In32BitMode]>;
+
let AddedComplexity = 5, isCodeGenOnly = 1 in
def GS_MOV32rm : I<0x8B, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src),
"movl\t%gs:$src, $dst",
@@ -4783,14 +4828,14 @@ def : Pat<(X86smul_flag GR32:$src1, 2), (ADD32rr GR32:$src1, GR32:$src1)>;
// Patterns for nodes that do not produce flags, for instructions that do.
// Increment reg.
-def : Pat<(add GR8:$src , 1), (INC8r GR8:$src)>;
-def : Pat<(add GR16:$src, 1), (INC16r GR16:$src)>, Requires<[In32BitMode]>;
-def : Pat<(add GR32:$src, 1), (INC32r GR32:$src)>, Requires<[In32BitMode]>;
+def : Pat<(add GR8:$src1 , 1), (INC8r GR8:$src1)>;
+def : Pat<(add GR16:$src1, 1), (INC16r GR16:$src1)>, Requires<[In32BitMode]>;
+def : Pat<(add GR32:$src1, 1), (INC32r GR32:$src1)>, Requires<[In32BitMode]>;
// Decrement reg.
-def : Pat<(add GR8:$src , -1), (DEC8r GR8:$src)>;
-def : Pat<(add GR16:$src, -1), (DEC16r GR16:$src)>, Requires<[In32BitMode]>;
-def : Pat<(add GR32:$src, -1), (DEC32r GR32:$src)>, Requires<[In32BitMode]>;
+def : Pat<(add GR8:$src1 , -1), (DEC8r GR8:$src1)>;
+def : Pat<(add GR16:$src1, -1), (DEC16r GR16:$src1)>, Requires<[In32BitMode]>;
+def : Pat<(add GR32:$src1, -1), (DEC32r GR32:$src1)>, Requires<[In32BitMode]>;
// or reg/reg.
def : Pat<(or GR8 :$src1, GR8 :$src2), (OR8rr GR8 :$src1, GR8 :$src2)>;
diff --git a/lib/Target/X86/X86InstrMMX.td b/lib/Target/X86/X86InstrMMX.td
index 0952fc8d7dcb..6cf7ac83620e 100644
--- a/lib/Target/X86/X86InstrMMX.td
+++ b/lib/Target/X86/X86InstrMMX.td
@@ -513,30 +513,20 @@ def : Pat<(store (v4i16 VR64:$src), addr:$dst),
(MMX_MOVQ64mr addr:$dst, VR64:$src)>;
def : Pat<(store (v2i32 VR64:$src), addr:$dst),
(MMX_MOVQ64mr addr:$dst, VR64:$src)>;
-def : Pat<(store (v2f32 VR64:$src), addr:$dst),
- (MMX_MOVQ64mr addr:$dst, VR64:$src)>;
def : Pat<(store (v1i64 VR64:$src), addr:$dst),
(MMX_MOVQ64mr addr:$dst, VR64:$src)>;
// Bit convert.
def : Pat<(v8i8 (bitconvert (v1i64 VR64:$src))), (v8i8 VR64:$src)>;
def : Pat<(v8i8 (bitconvert (v2i32 VR64:$src))), (v8i8 VR64:$src)>;
-def : Pat<(v8i8 (bitconvert (v2f32 VR64:$src))), (v8i8 VR64:$src)>;
def : Pat<(v8i8 (bitconvert (v4i16 VR64:$src))), (v8i8 VR64:$src)>;
def : Pat<(v4i16 (bitconvert (v1i64 VR64:$src))), (v4i16 VR64:$src)>;
def : Pat<(v4i16 (bitconvert (v2i32 VR64:$src))), (v4i16 VR64:$src)>;
-def : Pat<(v4i16 (bitconvert (v2f32 VR64:$src))), (v4i16 VR64:$src)>;
def : Pat<(v4i16 (bitconvert (v8i8 VR64:$src))), (v4i16 VR64:$src)>;
def : Pat<(v2i32 (bitconvert (v1i64 VR64:$src))), (v2i32 VR64:$src)>;
-def : Pat<(v2i32 (bitconvert (v2f32 VR64:$src))), (v2i32 VR64:$src)>;
def : Pat<(v2i32 (bitconvert (v4i16 VR64:$src))), (v2i32 VR64:$src)>;
def : Pat<(v2i32 (bitconvert (v8i8 VR64:$src))), (v2i32 VR64:$src)>;
-def : Pat<(v2f32 (bitconvert (v1i64 VR64:$src))), (v2f32 VR64:$src)>;
-def : Pat<(v2f32 (bitconvert (v2i32 VR64:$src))), (v2f32 VR64:$src)>;
-def : Pat<(v2f32 (bitconvert (v4i16 VR64:$src))), (v2f32 VR64:$src)>;
-def : Pat<(v2f32 (bitconvert (v8i8 VR64:$src))), (v2f32 VR64:$src)>;
def : Pat<(v1i64 (bitconvert (v2i32 VR64:$src))), (v1i64 VR64:$src)>;
-def : Pat<(v1i64 (bitconvert (v2f32 VR64:$src))), (v1i64 VR64:$src)>;
def : Pat<(v1i64 (bitconvert (v4i16 VR64:$src))), (v1i64 VR64:$src)>;
def : Pat<(v1i64 (bitconvert (v8i8 VR64:$src))), (v1i64 VR64:$src)>;
@@ -545,8 +535,6 @@ def : Pat<(v1i64 (bitconvert (i64 GR64:$src))),
(MMX_MOVD64to64rr GR64:$src)>;
def : Pat<(v2i32 (bitconvert (i64 GR64:$src))),
(MMX_MOVD64to64rr GR64:$src)>;
-def : Pat<(v2f32 (bitconvert (i64 GR64:$src))),
- (MMX_MOVD64to64rr GR64:$src)>;
def : Pat<(v4i16 (bitconvert (i64 GR64:$src))),
(MMX_MOVD64to64rr GR64:$src)>;
def : Pat<(v8i8 (bitconvert (i64 GR64:$src))),
@@ -555,8 +543,6 @@ def : Pat<(i64 (bitconvert (v1i64 VR64:$src))),
(MMX_MOVD64from64rr VR64:$src)>;
def : Pat<(i64 (bitconvert (v2i32 VR64:$src))),
(MMX_MOVD64from64rr VR64:$src)>;
-def : Pat<(i64 (bitconvert (v2f32 VR64:$src))),
- (MMX_MOVD64from64rr VR64:$src)>;
def : Pat<(i64 (bitconvert (v4i16 VR64:$src))),
(MMX_MOVD64from64rr VR64:$src)>;
def : Pat<(i64 (bitconvert (v8i8 VR64:$src))),
diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td
index 5580ba74e64e..ab0005b31bb8 100644
--- a/lib/Target/X86/X86InstrSSE.td
+++ b/lib/Target/X86/X86InstrSSE.td
@@ -15,322 +15,6 @@
//===----------------------------------------------------------------------===//
-// SSE specific DAG Nodes.
-//===----------------------------------------------------------------------===//
-
-def SDTX86FPShiftOp : SDTypeProfile<1, 2, [ SDTCisSameAs<0, 1>,
- SDTCisFP<0>, SDTCisInt<2> ]>;
-def SDTX86VFCMP : SDTypeProfile<1, 3, [SDTCisInt<0>, SDTCisSameAs<1, 2>,
- SDTCisFP<1>, SDTCisVT<3, i8>]>;
-
-def X86fmin : SDNode<"X86ISD::FMIN", SDTFPBinOp>;
-def X86fmax : SDNode<"X86ISD::FMAX", SDTFPBinOp>;
-def X86fand : SDNode<"X86ISD::FAND", SDTFPBinOp,
- [SDNPCommutative, SDNPAssociative]>;
-def X86for : SDNode<"X86ISD::FOR", SDTFPBinOp,
- [SDNPCommutative, SDNPAssociative]>;
-def X86fxor : SDNode<"X86ISD::FXOR", SDTFPBinOp,
- [SDNPCommutative, SDNPAssociative]>;
-def X86frsqrt : SDNode<"X86ISD::FRSQRT", SDTFPUnaryOp>;
-def X86frcp : SDNode<"X86ISD::FRCP", SDTFPUnaryOp>;
-def X86fsrl : SDNode<"X86ISD::FSRL", SDTX86FPShiftOp>;
-def X86comi : SDNode<"X86ISD::COMI", SDTX86CmpTest>;
-def X86ucomi : SDNode<"X86ISD::UCOMI", SDTX86CmpTest>;
-def X86pshufb : SDNode<"X86ISD::PSHUFB",
- SDTypeProfile<1, 2, [SDTCisVT<0, v16i8>, SDTCisSameAs<0,1>,
- SDTCisSameAs<0,2>]>>;
-def X86pextrb : SDNode<"X86ISD::PEXTRB",
- SDTypeProfile<1, 2, [SDTCisVT<0, i32>, SDTCisPtrTy<2>]>>;
-def X86pextrw : SDNode<"X86ISD::PEXTRW",
- SDTypeProfile<1, 2, [SDTCisVT<0, i32>, SDTCisPtrTy<2>]>>;
-def X86pinsrb : SDNode<"X86ISD::PINSRB",
- SDTypeProfile<1, 3, [SDTCisVT<0, v16i8>, SDTCisSameAs<0,1>,
- SDTCisVT<2, i32>, SDTCisPtrTy<3>]>>;
-def X86pinsrw : SDNode<"X86ISD::PINSRW",
- SDTypeProfile<1, 3, [SDTCisVT<0, v8i16>, SDTCisSameAs<0,1>,
- SDTCisVT<2, i32>, SDTCisPtrTy<3>]>>;
-def X86insrtps : SDNode<"X86ISD::INSERTPS",
- SDTypeProfile<1, 3, [SDTCisVT<0, v4f32>, SDTCisSameAs<0,1>,
- SDTCisVT<2, v4f32>, SDTCisPtrTy<3>]>>;
-def X86vzmovl : SDNode<"X86ISD::VZEXT_MOVL",
- SDTypeProfile<1, 1, [SDTCisSameAs<0,1>]>>;
-def X86vzload : SDNode<"X86ISD::VZEXT_LOAD", SDTLoad,
- [SDNPHasChain, SDNPMayLoad]>;
-def X86vshl : SDNode<"X86ISD::VSHL", SDTIntShiftOp>;
-def X86vshr : SDNode<"X86ISD::VSRL", SDTIntShiftOp>;
-def X86cmpps : SDNode<"X86ISD::CMPPS", SDTX86VFCMP>;
-def X86cmppd : SDNode<"X86ISD::CMPPD", SDTX86VFCMP>;
-def X86pcmpeqb : SDNode<"X86ISD::PCMPEQB", SDTIntBinOp, [SDNPCommutative]>;
-def X86pcmpeqw : SDNode<"X86ISD::PCMPEQW", SDTIntBinOp, [SDNPCommutative]>;
-def X86pcmpeqd : SDNode<"X86ISD::PCMPEQD", SDTIntBinOp, [SDNPCommutative]>;
-def X86pcmpeqq : SDNode<"X86ISD::PCMPEQQ", SDTIntBinOp, [SDNPCommutative]>;
-def X86pcmpgtb : SDNode<"X86ISD::PCMPGTB", SDTIntBinOp>;
-def X86pcmpgtw : SDNode<"X86ISD::PCMPGTW", SDTIntBinOp>;
-def X86pcmpgtd : SDNode<"X86ISD::PCMPGTD", SDTIntBinOp>;
-def X86pcmpgtq : SDNode<"X86ISD::PCMPGTQ", SDTIntBinOp>;
-
-def SDTX86CmpPTest : SDTypeProfile<1, 2, [SDTCisVT<0, i32>,
- SDTCisVT<1, v4f32>,
- SDTCisVT<2, v4f32>]>;
-def X86ptest : SDNode<"X86ISD::PTEST", SDTX86CmpPTest>;
-
-//===----------------------------------------------------------------------===//
-// SSE Complex Patterns
-//===----------------------------------------------------------------------===//
-
-// These are 'extloads' from a scalar to the low element of a vector, zeroing
-// the top elements. These are used for the SSE 'ss' and 'sd' instruction
-// forms.
-def sse_load_f32 : ComplexPattern<v4f32, 5, "SelectScalarSSELoad", [],
- [SDNPHasChain, SDNPMayLoad]>;
-def sse_load_f64 : ComplexPattern<v2f64, 5, "SelectScalarSSELoad", [],
- [SDNPHasChain, SDNPMayLoad]>;
-
-def ssmem : Operand<v4f32> {
- let PrintMethod = "printf32mem";
- let MIOperandInfo = (ops ptr_rc, i8imm, ptr_rc_nosp, i32imm, i8imm);
- let ParserMatchClass = X86MemAsmOperand;
-}
-def sdmem : Operand<v2f64> {
- let PrintMethod = "printf64mem";
- let MIOperandInfo = (ops ptr_rc, i8imm, ptr_rc_nosp, i32imm, i8imm);
- let ParserMatchClass = X86MemAsmOperand;
-}
-
-//===----------------------------------------------------------------------===//
-// SSE pattern fragments
-//===----------------------------------------------------------------------===//
-
-def loadv4f32 : PatFrag<(ops node:$ptr), (v4f32 (load node:$ptr))>;
-def loadv2f64 : PatFrag<(ops node:$ptr), (v2f64 (load node:$ptr))>;
-def loadv4i32 : PatFrag<(ops node:$ptr), (v4i32 (load node:$ptr))>;
-def loadv2i64 : PatFrag<(ops node:$ptr), (v2i64 (load node:$ptr))>;
-
-// Like 'store', but always requires vector alignment.
-def alignedstore : PatFrag<(ops node:$val, node:$ptr),
- (store node:$val, node:$ptr), [{
- return cast<StoreSDNode>(N)->getAlignment() >= 16;
-}]>;
-
-// Like 'load', but always requires vector alignment.
-def alignedload : PatFrag<(ops node:$ptr), (load node:$ptr), [{
- return cast<LoadSDNode>(N)->getAlignment() >= 16;
-}]>;
-
-def alignedloadfsf32 : PatFrag<(ops node:$ptr),
- (f32 (alignedload node:$ptr))>;
-def alignedloadfsf64 : PatFrag<(ops node:$ptr),
- (f64 (alignedload node:$ptr))>;
-def alignedloadv4f32 : PatFrag<(ops node:$ptr),
- (v4f32 (alignedload node:$ptr))>;
-def alignedloadv2f64 : PatFrag<(ops node:$ptr),
- (v2f64 (alignedload node:$ptr))>;
-def alignedloadv4i32 : PatFrag<(ops node:$ptr),
- (v4i32 (alignedload node:$ptr))>;
-def alignedloadv2i64 : PatFrag<(ops node:$ptr),
- (v2i64 (alignedload node:$ptr))>;
-
-// Like 'load', but uses special alignment checks suitable for use in
-// memory operands in most SSE instructions, which are required to
-// be naturally aligned on some targets but not on others. If the subtarget
-// allows unaligned accesses, match any load, though this may require
-// setting a feature bit in the processor (on startup, for example).
-// Opteron 10h and later implement such a feature.
-def memop : PatFrag<(ops node:$ptr), (load node:$ptr), [{
- return Subtarget->hasVectorUAMem()
- || cast<LoadSDNode>(N)->getAlignment() >= 16;
-}]>;
-
-def memopfsf32 : PatFrag<(ops node:$ptr), (f32 (memop node:$ptr))>;
-def memopfsf64 : PatFrag<(ops node:$ptr), (f64 (memop node:$ptr))>;
-def memopv4f32 : PatFrag<(ops node:$ptr), (v4f32 (memop node:$ptr))>;
-def memopv2f64 : PatFrag<(ops node:$ptr), (v2f64 (memop node:$ptr))>;
-def memopv4i32 : PatFrag<(ops node:$ptr), (v4i32 (memop node:$ptr))>;
-def memopv2i64 : PatFrag<(ops node:$ptr), (v2i64 (memop node:$ptr))>;
-def memopv16i8 : PatFrag<(ops node:$ptr), (v16i8 (memop node:$ptr))>;
-
-// SSSE3 uses MMX registers for some instructions. They aren't aligned on a
-// 16-byte boundary.
-// FIXME: 8 byte alignment for mmx reads is not required
-def memop64 : PatFrag<(ops node:$ptr), (load node:$ptr), [{
- return cast<LoadSDNode>(N)->getAlignment() >= 8;
-}]>;
-
-def memopv8i8 : PatFrag<(ops node:$ptr), (v8i8 (memop64 node:$ptr))>;
-def memopv4i16 : PatFrag<(ops node:$ptr), (v4i16 (memop64 node:$ptr))>;
-def memopv8i16 : PatFrag<(ops node:$ptr), (v8i16 (memop64 node:$ptr))>;
-def memopv2i32 : PatFrag<(ops node:$ptr), (v2i32 (memop64 node:$ptr))>;
-
-// MOVNT Support
-// Like 'store', but requires the non-temporal bit to be set
-def nontemporalstore : PatFrag<(ops node:$val, node:$ptr),
- (st node:$val, node:$ptr), [{
- if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N))
- return ST->isNonTemporal();
- return false;
-}]>;
-
-def alignednontemporalstore : PatFrag<(ops node:$val, node:$ptr),
- (st node:$val, node:$ptr), [{
- if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N))
- return ST->isNonTemporal() && !ST->isTruncatingStore() &&
- ST->getAddressingMode() == ISD::UNINDEXED &&
- ST->getAlignment() >= 16;
- return false;
-}]>;
-
-def unalignednontemporalstore : PatFrag<(ops node:$val, node:$ptr),
- (st node:$val, node:$ptr), [{
- if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N))
- return ST->isNonTemporal() &&
- ST->getAlignment() < 16;
- return false;
-}]>;
-
-def bc_v4f32 : PatFrag<(ops node:$in), (v4f32 (bitconvert node:$in))>;
-def bc_v2f64 : PatFrag<(ops node:$in), (v2f64 (bitconvert node:$in))>;
-def bc_v16i8 : PatFrag<(ops node:$in), (v16i8 (bitconvert node:$in))>;
-def bc_v8i16 : PatFrag<(ops node:$in), (v8i16 (bitconvert node:$in))>;
-def bc_v4i32 : PatFrag<(ops node:$in), (v4i32 (bitconvert node:$in))>;
-def bc_v2i64 : PatFrag<(ops node:$in), (v2i64 (bitconvert node:$in))>;
-
-def vzmovl_v2i64 : PatFrag<(ops node:$src),
- (bitconvert (v2i64 (X86vzmovl
- (v2i64 (scalar_to_vector (loadi64 node:$src))))))>;
-def vzmovl_v4i32 : PatFrag<(ops node:$src),
- (bitconvert (v4i32 (X86vzmovl
- (v4i32 (scalar_to_vector (loadi32 node:$src))))))>;
-
-def vzload_v2i64 : PatFrag<(ops node:$src),
- (bitconvert (v2i64 (X86vzload node:$src)))>;
-
-
-def fp32imm0 : PatLeaf<(f32 fpimm), [{
- return N->isExactlyValue(+0.0);
-}]>;
-
-// BYTE_imm - Transform bit immediates into byte immediates.
-def BYTE_imm : SDNodeXForm<imm, [{
- // Transformation function: imm >> 3
- return getI32Imm(N->getZExtValue() >> 3);
-}]>;
-
-// SHUFFLE_get_shuf_imm xform function: convert vector_shuffle mask to PSHUF*,
-// SHUFP* etc. imm.
-def SHUFFLE_get_shuf_imm : SDNodeXForm<vector_shuffle, [{
- return getI8Imm(X86::getShuffleSHUFImmediate(N));
-}]>;
-
-// SHUFFLE_get_pshufhw_imm xform function: convert vector_shuffle mask to
-// PSHUFHW imm.
-def SHUFFLE_get_pshufhw_imm : SDNodeXForm<vector_shuffle, [{
- return getI8Imm(X86::getShufflePSHUFHWImmediate(N));
-}]>;
-
-// SHUFFLE_get_pshuflw_imm xform function: convert vector_shuffle mask to
-// PSHUFLW imm.
-def SHUFFLE_get_pshuflw_imm : SDNodeXForm<vector_shuffle, [{
- return getI8Imm(X86::getShufflePSHUFLWImmediate(N));
-}]>;
-
-// SHUFFLE_get_palign_imm xform function: convert vector_shuffle mask to
-// a PALIGNR imm.
-def SHUFFLE_get_palign_imm : SDNodeXForm<vector_shuffle, [{
- return getI8Imm(X86::getShufflePALIGNRImmediate(N));
-}]>;
-
-def splat_lo : PatFrag<(ops node:$lhs, node:$rhs),
- (vector_shuffle node:$lhs, node:$rhs), [{
- ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
- return SVOp->isSplat() && SVOp->getSplatIndex() == 0;
-}]>;
-
-def movddup : PatFrag<(ops node:$lhs, node:$rhs),
- (vector_shuffle node:$lhs, node:$rhs), [{
- return X86::isMOVDDUPMask(cast<ShuffleVectorSDNode>(N));
-}]>;
-
-def movhlps : PatFrag<(ops node:$lhs, node:$rhs),
- (vector_shuffle node:$lhs, node:$rhs), [{
- return X86::isMOVHLPSMask(cast<ShuffleVectorSDNode>(N));
-}]>;
-
-def movhlps_undef : PatFrag<(ops node:$lhs, node:$rhs),
- (vector_shuffle node:$lhs, node:$rhs), [{
- return X86::isMOVHLPS_v_undef_Mask(cast<ShuffleVectorSDNode>(N));
-}]>;
-
-def movlhps : PatFrag<(ops node:$lhs, node:$rhs),
- (vector_shuffle node:$lhs, node:$rhs), [{
- return X86::isMOVLHPSMask(cast<ShuffleVectorSDNode>(N));
-}]>;
-
-def movlp : PatFrag<(ops node:$lhs, node:$rhs),
- (vector_shuffle node:$lhs, node:$rhs), [{
- return X86::isMOVLPMask(cast<ShuffleVectorSDNode>(N));
-}]>;
-
-def movl : PatFrag<(ops node:$lhs, node:$rhs),
- (vector_shuffle node:$lhs, node:$rhs), [{
- return X86::isMOVLMask(cast<ShuffleVectorSDNode>(N));
-}]>;
-
-def movshdup : PatFrag<(ops node:$lhs, node:$rhs),
- (vector_shuffle node:$lhs, node:$rhs), [{
- return X86::isMOVSHDUPMask(cast<ShuffleVectorSDNode>(N));
-}]>;
-
-def movsldup : PatFrag<(ops node:$lhs, node:$rhs),
- (vector_shuffle node:$lhs, node:$rhs), [{
- return X86::isMOVSLDUPMask(cast<ShuffleVectorSDNode>(N));
-}]>;
-
-def unpckl : PatFrag<(ops node:$lhs, node:$rhs),
- (vector_shuffle node:$lhs, node:$rhs), [{
- return X86::isUNPCKLMask(cast<ShuffleVectorSDNode>(N));
-}]>;
-
-def unpckh : PatFrag<(ops node:$lhs, node:$rhs),
- (vector_shuffle node:$lhs, node:$rhs), [{
- return X86::isUNPCKHMask(cast<ShuffleVectorSDNode>(N));
-}]>;
-
-def unpckl_undef : PatFrag<(ops node:$lhs, node:$rhs),
- (vector_shuffle node:$lhs, node:$rhs), [{
- return X86::isUNPCKL_v_undef_Mask(cast<ShuffleVectorSDNode>(N));
-}]>;
-
-def unpckh_undef : PatFrag<(ops node:$lhs, node:$rhs),
- (vector_shuffle node:$lhs, node:$rhs), [{
- return X86::isUNPCKH_v_undef_Mask(cast<ShuffleVectorSDNode>(N));
-}]>;
-
-def pshufd : PatFrag<(ops node:$lhs, node:$rhs),
- (vector_shuffle node:$lhs, node:$rhs), [{
- return X86::isPSHUFDMask(cast<ShuffleVectorSDNode>(N));
-}], SHUFFLE_get_shuf_imm>;
-
-def shufp : PatFrag<(ops node:$lhs, node:$rhs),
- (vector_shuffle node:$lhs, node:$rhs), [{
- return X86::isSHUFPMask(cast<ShuffleVectorSDNode>(N));
-}], SHUFFLE_get_shuf_imm>;
-
-def pshufhw : PatFrag<(ops node:$lhs, node:$rhs),
- (vector_shuffle node:$lhs, node:$rhs), [{
- return X86::isPSHUFHWMask(cast<ShuffleVectorSDNode>(N));
-}], SHUFFLE_get_pshufhw_imm>;
-
-def pshuflw : PatFrag<(ops node:$lhs, node:$rhs),
- (vector_shuffle node:$lhs, node:$rhs), [{
- return X86::isPSHUFLWMask(cast<ShuffleVectorSDNode>(N));
-}], SHUFFLE_get_pshuflw_imm>;
-
-def palign : PatFrag<(ops node:$lhs, node:$rhs),
- (vector_shuffle node:$lhs, node:$rhs), [{
- return X86::isPALIGNRMask(cast<ShuffleVectorSDNode>(N));
-}], SHUFFLE_get_palign_imm>;
-
-//===----------------------------------------------------------------------===//
// SSE scalar FP Instructions
//===----------------------------------------------------------------------===//
@@ -368,857 +52,642 @@ let Uses = [EFLAGS], usesCustomInserter = 1 in {
}
//===----------------------------------------------------------------------===//
-// SSE1 Instructions
+// SSE 1 & 2 Instructions Classes
//===----------------------------------------------------------------------===//
-// Move Instructions. Register-to-register movss is not used for FR32
-// register copies because it's a partial register update; FsMOVAPSrr is
-// used instead. Register-to-register movss is not modeled as an INSERT_SUBREG
-// because INSERT_SUBREG requires that the insert be implementable in terms of
-// a copy, and just mentioned, we don't use movss for copies.
-let Constraints = "$src1 = $dst" in
-def MOVSSrr : SSI<0x10, MRMSrcReg,
- (outs VR128:$dst), (ins VR128:$src1, FR32:$src2),
- "movss\t{$src2, $dst|$dst, $src2}",
- [(set (v4f32 VR128:$dst),
- (movl VR128:$src1, (scalar_to_vector FR32:$src2)))]>;
+/// sse12_fp_scalar - SSE 1 & 2 scalar instructions class
+multiclass sse12_fp_scalar<bits<8> opc, string OpcodeStr, SDNode OpNode,
+ RegisterClass RC, X86MemOperand x86memop,
+ bit Is2Addr = 1> {
+ let isCommutable = 1 in {
+ def rr : SI<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2),
+ !if(Is2Addr,
+ !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
+ [(set RC:$dst, (OpNode RC:$src1, RC:$src2))]>;
+ }
+ def rm : SI<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2),
+ !if(Is2Addr,
+ !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
+ [(set RC:$dst, (OpNode RC:$src1, (load addr:$src2)))]>;
+}
+
+/// sse12_fp_scalar_int - SSE 1 & 2 scalar instructions intrinsics class
+multiclass sse12_fp_scalar_int<bits<8> opc, string OpcodeStr, RegisterClass RC,
+ string asm, string SSEVer, string FPSizeStr,
+ Operand memopr, ComplexPattern mem_cpat,
+ bit Is2Addr = 1> {
+ def rr_Int : SI<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2),
+ !if(Is2Addr,
+ !strconcat(asm, "\t{$src2, $dst|$dst, $src2}"),
+ !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
+ [(set RC:$dst, (!nameconcat<Intrinsic>("int_x86_sse",
+ !strconcat(SSEVer, !strconcat("_",
+ !strconcat(OpcodeStr, FPSizeStr))))
+ RC:$src1, RC:$src2))]>;
+ def rm_Int : SI<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, memopr:$src2),
+ !if(Is2Addr,
+ !strconcat(asm, "\t{$src2, $dst|$dst, $src2}"),
+ !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
+ [(set RC:$dst, (!nameconcat<Intrinsic>("int_x86_sse",
+ !strconcat(SSEVer, !strconcat("_",
+ !strconcat(OpcodeStr, FPSizeStr))))
+ RC:$src1, mem_cpat:$src2))]>;
+}
+
+/// sse12_fp_packed - SSE 1 & 2 packed instructions class
+multiclass sse12_fp_packed<bits<8> opc, string OpcodeStr, SDNode OpNode,
+ RegisterClass RC, ValueType vt,
+ X86MemOperand x86memop, PatFrag mem_frag,
+ Domain d, bit Is2Addr = 1> {
+ let isCommutable = 1 in
+ def rr : PI<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2),
+ !if(Is2Addr,
+ !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
+ [(set RC:$dst, (vt (OpNode RC:$src1, RC:$src2)))], d>;
+ let mayLoad = 1 in
+ def rm : PI<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2),
+ !if(Is2Addr,
+ !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
+ [(set RC:$dst, (OpNode RC:$src1, (mem_frag addr:$src2)))], d>;
+}
+
+/// sse12_fp_packed_logical_rm - SSE 1 & 2 packed instructions class
+multiclass sse12_fp_packed_logical_rm<bits<8> opc, RegisterClass RC, Domain d,
+ string OpcodeStr, X86MemOperand x86memop,
+ list<dag> pat_rr, list<dag> pat_rm,
+ bit Is2Addr = 1> {
+ let isCommutable = 1 in
+ def rr : PI<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2),
+ !if(Is2Addr,
+ !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
+ pat_rr, d>;
+ def rm : PI<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2),
+ !if(Is2Addr,
+ !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
+ pat_rm, d>;
+}
+
+/// sse12_fp_packed_int - SSE 1 & 2 packed instructions intrinsics class
+multiclass sse12_fp_packed_int<bits<8> opc, string OpcodeStr, RegisterClass RC,
+ string asm, string SSEVer, string FPSizeStr,
+ X86MemOperand x86memop, PatFrag mem_frag,
+ Domain d, bit Is2Addr = 1> {
+ def rr_Int : PI<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2),
+ !if(Is2Addr,
+ !strconcat(asm, "\t{$src2, $dst|$dst, $src2}"),
+ !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
+ [(set RC:$dst, (!nameconcat<Intrinsic>("int_x86_sse",
+ !strconcat(SSEVer, !strconcat("_",
+ !strconcat(OpcodeStr, FPSizeStr))))
+ RC:$src1, RC:$src2))], d>;
+ def rm_Int : PI<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1,x86memop:$src2),
+ !if(Is2Addr,
+ !strconcat(asm, "\t{$src2, $dst|$dst, $src2}"),
+ !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
+ [(set RC:$dst, (!nameconcat<Intrinsic>("int_x86_sse",
+ !strconcat(SSEVer, !strconcat("_",
+ !strconcat(OpcodeStr, FPSizeStr))))
+ RC:$src1, (mem_frag addr:$src2)))], d>;
+}
+
+//===----------------------------------------------------------------------===//
+// SSE 1 & 2 - Move Instructions
+//===----------------------------------------------------------------------===//
+
+class sse12_move_rr<RegisterClass RC, ValueType vt, string asm> :
+ SI<0x10, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, RC:$src2), asm,
+ [(set (vt VR128:$dst), (movl VR128:$src1, (scalar_to_vector RC:$src2)))]>;
+
+// Loading from memory automatically zeroing upper bits.
+class sse12_move_rm<RegisterClass RC, X86MemOperand x86memop,
+ PatFrag mem_pat, string OpcodeStr> :
+ SI<0x10, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src),
+ !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+ [(set RC:$dst, (mem_pat addr:$src))]>;
+
+// Move Instructions. Register-to-register movss/movsd is not used for FR32/64
+// register copies because it's a partial register update; FsMOVAPSrr/FsMOVAPDrr
+// is used instead. Register-to-register movss/movsd is not modeled as an
+// INSERT_SUBREG because INSERT_SUBREG requires that the insert be implementable
+// in terms of a copy, and just mentioned, we don't use movss/movsd for copies.
+let isAsmParserOnly = 1 in {
+ def VMOVSSrr : sse12_move_rr<FR32, v4f32,
+ "movss\t{$src2, $src1, $dst|$dst, $src1, $src2}">, XS, VEX_4V;
+ def VMOVSDrr : sse12_move_rr<FR64, v2f64,
+ "movsd\t{$src2, $src1, $dst|$dst, $src1, $src2}">, XD, VEX_4V;
+
+ let canFoldAsLoad = 1, isReMaterializable = 1 in {
+ def VMOVSSrm : sse12_move_rm<FR32, f32mem, loadf32, "movss">, XS, VEX;
+
+ let AddedComplexity = 20 in
+ def VMOVSDrm : sse12_move_rm<FR64, f64mem, loadf64, "movsd">, XD, VEX;
+ }
+}
+
+let Constraints = "$src1 = $dst" in {
+ def MOVSSrr : sse12_move_rr<FR32, v4f32,
+ "movss\t{$src2, $dst|$dst, $src2}">, XS;
+ def MOVSDrr : sse12_move_rr<FR64, v2f64,
+ "movsd\t{$src2, $dst|$dst, $src2}">, XD;
+}
+
+let canFoldAsLoad = 1, isReMaterializable = 1 in {
+ def MOVSSrm : sse12_move_rm<FR32, f32mem, loadf32, "movss">, XS;
+ let AddedComplexity = 20 in
+ def MOVSDrm : sse12_move_rm<FR64, f64mem, loadf64, "movsd">, XD;
+}
+
+let AddedComplexity = 15 in {
// Extract the low 32-bit value from one vector and insert it into another.
-let AddedComplexity = 15 in
def : Pat<(v4f32 (movl VR128:$src1, VR128:$src2)),
(MOVSSrr (v4f32 VR128:$src1),
(EXTRACT_SUBREG (v4f32 VR128:$src2), sub_ss))>;
+// Extract the low 64-bit value from one vector and insert it into another.
+def : Pat<(v2f64 (movl VR128:$src1, VR128:$src2)),
+ (MOVSDrr (v2f64 VR128:$src1),
+ (EXTRACT_SUBREG (v2f64 VR128:$src2), sub_sd))>;
+}
// Implicitly promote a 32-bit scalar to a vector.
def : Pat<(v4f32 (scalar_to_vector FR32:$src)),
(INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), FR32:$src, sub_ss)>;
+// Implicitly promote a 64-bit scalar to a vector.
+def : Pat<(v2f64 (scalar_to_vector FR64:$src)),
+ (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), FR64:$src, sub_sd)>;
-// Loading from memory automatically zeroing upper bits.
-let canFoldAsLoad = 1, isReMaterializable = 1 in
-def MOVSSrm : SSI<0x10, MRMSrcMem, (outs FR32:$dst), (ins f32mem:$src),
- "movss\t{$src, $dst|$dst, $src}",
- [(set FR32:$dst, (loadf32 addr:$src))]>;
-
+let AddedComplexity = 20 in {
// MOVSSrm zeros the high parts of the register; represent this
// with SUBREG_TO_REG.
-let AddedComplexity = 20 in {
def : Pat<(v4f32 (X86vzmovl (v4f32 (scalar_to_vector (loadf32 addr:$src))))),
(SUBREG_TO_REG (i32 0), (MOVSSrm addr:$src), sub_ss)>;
def : Pat<(v4f32 (scalar_to_vector (loadf32 addr:$src))),
(SUBREG_TO_REG (i32 0), (MOVSSrm addr:$src), sub_ss)>;
def : Pat<(v4f32 (X86vzmovl (loadv4f32 addr:$src))),
(SUBREG_TO_REG (i32 0), (MOVSSrm addr:$src), sub_ss)>;
+// MOVSDrm zeros the high parts of the register; represent this
+// with SUBREG_TO_REG.
+def : Pat<(v2f64 (X86vzmovl (v2f64 (scalar_to_vector (loadf64 addr:$src))))),
+ (SUBREG_TO_REG (i64 0), (MOVSDrm addr:$src), sub_sd)>;
+def : Pat<(v2f64 (scalar_to_vector (loadf64 addr:$src))),
+ (SUBREG_TO_REG (i64 0), (MOVSDrm addr:$src), sub_sd)>;
+def : Pat<(v2f64 (X86vzmovl (loadv2f64 addr:$src))),
+ (SUBREG_TO_REG (i64 0), (MOVSDrm addr:$src), sub_sd)>;
+def : Pat<(v2f64 (X86vzmovl (bc_v2f64 (loadv4f32 addr:$src)))),
+ (SUBREG_TO_REG (i64 0), (MOVSDrm addr:$src), sub_sd)>;
+def : Pat<(v2f64 (X86vzload addr:$src)),
+ (SUBREG_TO_REG (i64 0), (MOVSDrm addr:$src), sub_sd)>;
}
// Store scalar value to memory.
def MOVSSmr : SSI<0x11, MRMDestMem, (outs), (ins f32mem:$dst, FR32:$src),
"movss\t{$src, $dst|$dst, $src}",
[(store FR32:$src, addr:$dst)]>;
+def MOVSDmr : SDI<0x11, MRMDestMem, (outs), (ins f64mem:$dst, FR64:$src),
+ "movsd\t{$src, $dst|$dst, $src}",
+ [(store FR64:$src, addr:$dst)]>;
+
+let isAsmParserOnly = 1 in {
+def VMOVSSmr : SI<0x11, MRMDestMem, (outs), (ins f32mem:$dst, FR32:$src),
+ "movss\t{$src, $dst|$dst, $src}",
+ [(store FR32:$src, addr:$dst)]>, XS, VEX_4V;
+def VMOVSDmr : SI<0x11, MRMDestMem, (outs), (ins f64mem:$dst, FR64:$src),
+ "movsd\t{$src, $dst|$dst, $src}",
+ [(store FR64:$src, addr:$dst)]>, XD, VEX_4V;
+}
// Extract and store.
def : Pat<(store (f32 (vector_extract (v4f32 VR128:$src), (iPTR 0))),
addr:$dst),
(MOVSSmr addr:$dst,
(EXTRACT_SUBREG (v4f32 VR128:$src), sub_ss))>;
+def : Pat<(store (f64 (vector_extract (v2f64 VR128:$src), (iPTR 0))),
+ addr:$dst),
+ (MOVSDmr addr:$dst,
+ (EXTRACT_SUBREG (v2f64 VR128:$src), sub_sd))>;
-// Conversion instructions
-def CVTTSS2SIrr : SSI<0x2C, MRMSrcReg, (outs GR32:$dst), (ins FR32:$src),
- "cvttss2si\t{$src, $dst|$dst, $src}",
- [(set GR32:$dst, (fp_to_sint FR32:$src))]>;
-def CVTTSS2SIrm : SSI<0x2C, MRMSrcMem, (outs GR32:$dst), (ins f32mem:$src),
- "cvttss2si\t{$src, $dst|$dst, $src}",
- [(set GR32:$dst, (fp_to_sint (loadf32 addr:$src)))]>;
-def CVTSI2SSrr : SSI<0x2A, MRMSrcReg, (outs FR32:$dst), (ins GR32:$src),
- "cvtsi2ss\t{$src, $dst|$dst, $src}",
- [(set FR32:$dst, (sint_to_fp GR32:$src))]>;
-def CVTSI2SSrm : SSI<0x2A, MRMSrcMem, (outs FR32:$dst), (ins i32mem:$src),
- "cvtsi2ss\t{$src, $dst|$dst, $src}",
- [(set FR32:$dst, (sint_to_fp (loadi32 addr:$src)))]>;
-
-// Match intrinsics which expect XMM operand(s).
-def CVTSS2SIrr: SSI<0x2D, MRMSrcReg, (outs GR32:$dst), (ins FR32:$src),
- "cvtss2si{l}\t{$src, $dst|$dst, $src}", []>;
-def CVTSS2SIrm: SSI<0x2D, MRMSrcMem, (outs GR32:$dst), (ins f32mem:$src),
- "cvtss2si{l}\t{$src, $dst|$dst, $src}", []>;
-
-def Int_CVTSS2SIrr : SSI<0x2D, MRMSrcReg, (outs GR32:$dst), (ins VR128:$src),
- "cvtss2si\t{$src, $dst|$dst, $src}",
- [(set GR32:$dst, (int_x86_sse_cvtss2si VR128:$src))]>;
-def Int_CVTSS2SIrm : SSI<0x2D, MRMSrcMem, (outs GR32:$dst), (ins f32mem:$src),
- "cvtss2si\t{$src, $dst|$dst, $src}",
- [(set GR32:$dst, (int_x86_sse_cvtss2si
- (load addr:$src)))]>;
-
-// Match intrinsics which expect MM and XMM operand(s).
-def Int_CVTPS2PIrr : PSI<0x2D, MRMSrcReg, (outs VR64:$dst), (ins VR128:$src),
- "cvtps2pi\t{$src, $dst|$dst, $src}",
- [(set VR64:$dst, (int_x86_sse_cvtps2pi VR128:$src))]>;
-def Int_CVTPS2PIrm : PSI<0x2D, MRMSrcMem, (outs VR64:$dst), (ins f64mem:$src),
- "cvtps2pi\t{$src, $dst|$dst, $src}",
- [(set VR64:$dst, (int_x86_sse_cvtps2pi
- (load addr:$src)))]>;
-def Int_CVTTPS2PIrr: PSI<0x2C, MRMSrcReg, (outs VR64:$dst), (ins VR128:$src),
- "cvttps2pi\t{$src, $dst|$dst, $src}",
- [(set VR64:$dst, (int_x86_sse_cvttps2pi VR128:$src))]>;
-def Int_CVTTPS2PIrm: PSI<0x2C, MRMSrcMem, (outs VR64:$dst), (ins f64mem:$src),
- "cvttps2pi\t{$src, $dst|$dst, $src}",
- [(set VR64:$dst, (int_x86_sse_cvttps2pi
- (load addr:$src)))]>;
-let Constraints = "$src1 = $dst" in {
- def Int_CVTPI2PSrr : PSI<0x2A, MRMSrcReg,
- (outs VR128:$dst), (ins VR128:$src1, VR64:$src2),
- "cvtpi2ps\t{$src2, $dst|$dst, $src2}",
- [(set VR128:$dst, (int_x86_sse_cvtpi2ps VR128:$src1,
- VR64:$src2))]>;
- def Int_CVTPI2PSrm : PSI<0x2A, MRMSrcMem,
- (outs VR128:$dst), (ins VR128:$src1, i64mem:$src2),
- "cvtpi2ps\t{$src2, $dst|$dst, $src2}",
- [(set VR128:$dst, (int_x86_sse_cvtpi2ps VR128:$src1,
- (load addr:$src2)))]>;
-}
-
-// Aliases for intrinsics
-def Int_CVTTSS2SIrr : SSI<0x2C, MRMSrcReg, (outs GR32:$dst), (ins VR128:$src),
- "cvttss2si\t{$src, $dst|$dst, $src}",
- [(set GR32:$dst,
- (int_x86_sse_cvttss2si VR128:$src))]>;
-def Int_CVTTSS2SIrm : SSI<0x2C, MRMSrcMem, (outs GR32:$dst), (ins f32mem:$src),
- "cvttss2si\t{$src, $dst|$dst, $src}",
- [(set GR32:$dst,
- (int_x86_sse_cvttss2si(load addr:$src)))]>;
-
-let Constraints = "$src1 = $dst" in {
- def Int_CVTSI2SSrr : SSI<0x2A, MRMSrcReg,
- (outs VR128:$dst), (ins VR128:$src1, GR32:$src2),
- "cvtsi2ss\t{$src2, $dst|$dst, $src2}",
- [(set VR128:$dst, (int_x86_sse_cvtsi2ss VR128:$src1,
- GR32:$src2))]>;
- def Int_CVTSI2SSrm : SSI<0x2A, MRMSrcMem,
- (outs VR128:$dst), (ins VR128:$src1, i32mem:$src2),
- "cvtsi2ss\t{$src2, $dst|$dst, $src2}",
- [(set VR128:$dst, (int_x86_sse_cvtsi2ss VR128:$src1,
- (loadi32 addr:$src2)))]>;
-}
-
-// Comparison instructions
-let Constraints = "$src1 = $dst", neverHasSideEffects = 1 in {
- def CMPSSrr : SSIi8<0xC2, MRMSrcReg,
- (outs FR32:$dst), (ins FR32:$src1, FR32:$src, SSECC:$cc),
- "cmp${cc}ss\t{$src, $dst|$dst, $src}", []>;
-let mayLoad = 1 in
- def CMPSSrm : SSIi8<0xC2, MRMSrcMem,
- (outs FR32:$dst), (ins FR32:$src1, f32mem:$src, SSECC:$cc),
- "cmp${cc}ss\t{$src, $dst|$dst, $src}", []>;
-
- // Accept explicit immediate argument form instead of comparison code.
-let isAsmParserOnly = 1 in {
- def CMPSSrr_alt : SSIi8<0xC2, MRMSrcReg,
- (outs FR32:$dst), (ins FR32:$src1, FR32:$src, i8imm:$src2),
- "cmpss\t{$src2, $src, $dst|$dst, $src, $src2}", []>;
-let mayLoad = 1 in
- def CMPSSrm_alt : SSIi8<0xC2, MRMSrcMem,
- (outs FR32:$dst), (ins FR32:$src1, f32mem:$src, i8imm:$src2),
- "cmpss\t{$src2, $src, $dst|$dst, $src, $src2}", []>;
-}
-}
-
-let Defs = [EFLAGS] in {
-def UCOMISSrr: PSI<0x2E, MRMSrcReg, (outs), (ins FR32:$src1, FR32:$src2),
- "ucomiss\t{$src2, $src1|$src1, $src2}",
- [(set EFLAGS, (X86cmp FR32:$src1, FR32:$src2))]>;
-def UCOMISSrm: PSI<0x2E, MRMSrcMem, (outs), (ins FR32:$src1, f32mem:$src2),
- "ucomiss\t{$src2, $src1|$src1, $src2}",
- [(set EFLAGS, (X86cmp FR32:$src1, (loadf32 addr:$src2)))]>;
-
-def COMISSrr: PSI<0x2F, MRMSrcReg, (outs), (ins VR128:$src1, VR128:$src2),
- "comiss\t{$src2, $src1|$src1, $src2}", []>;
-def COMISSrm: PSI<0x2F, MRMSrcMem, (outs), (ins VR128:$src1, f128mem:$src2),
- "comiss\t{$src2, $src1|$src1, $src2}", []>;
-
-} // Defs = [EFLAGS]
-
-// Aliases to match intrinsics which expect XMM operand(s).
-let Constraints = "$src1 = $dst" in {
- def Int_CMPSSrr : SSIi8<0xC2, MRMSrcReg,
- (outs VR128:$dst),
- (ins VR128:$src1, VR128:$src, SSECC:$cc),
- "cmp${cc}ss\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst, (int_x86_sse_cmp_ss
- VR128:$src1,
- VR128:$src, imm:$cc))]>;
- def Int_CMPSSrm : SSIi8<0xC2, MRMSrcMem,
- (outs VR128:$dst),
- (ins VR128:$src1, f32mem:$src, SSECC:$cc),
- "cmp${cc}ss\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst, (int_x86_sse_cmp_ss VR128:$src1,
- (load addr:$src), imm:$cc))]>;
-}
-
-let Defs = [EFLAGS] in {
-def Int_UCOMISSrr: PSI<0x2E, MRMSrcReg, (outs), (ins VR128:$src1, VR128:$src2),
- "ucomiss\t{$src2, $src1|$src1, $src2}",
- [(set EFLAGS, (X86ucomi (v4f32 VR128:$src1),
- VR128:$src2))]>;
-def Int_UCOMISSrm: PSI<0x2E, MRMSrcMem, (outs),(ins VR128:$src1, f128mem:$src2),
- "ucomiss\t{$src2, $src1|$src1, $src2}",
- [(set EFLAGS, (X86ucomi (v4f32 VR128:$src1),
- (load addr:$src2)))]>;
-
-def Int_COMISSrr: PSI<0x2F, MRMSrcReg, (outs), (ins VR128:$src1, VR128:$src2),
- "comiss\t{$src2, $src1|$src1, $src2}",
- [(set EFLAGS, (X86comi (v4f32 VR128:$src1),
- VR128:$src2))]>;
-def Int_COMISSrm: PSI<0x2F, MRMSrcMem, (outs), (ins VR128:$src1, f128mem:$src2),
- "comiss\t{$src2, $src1|$src1, $src2}",
- [(set EFLAGS, (X86comi (v4f32 VR128:$src1),
- (load addr:$src2)))]>;
-} // Defs = [EFLAGS]
-
-// Aliases of packed SSE1 instructions for scalar use. These all have names
-// that start with 'Fs'.
-
-// Alias instructions that map fld0 to pxor for sse.
-let isReMaterializable = 1, isAsCheapAsAMove = 1, isCodeGenOnly = 1,
- canFoldAsLoad = 1 in
- // FIXME: Set encoding to pseudo!
-def FsFLD0SS : I<0xEF, MRMInitReg, (outs FR32:$dst), (ins), "",
- [(set FR32:$dst, fp32imm0)]>,
- Requires<[HasSSE1]>, TB, OpSize;
-
-// Alias instruction to do FR32 reg-to-reg copy using movaps. Upper bits are
-// disregarded.
+// Move Aligned/Unaligned floating point values
+multiclass sse12_mov_packed<bits<8> opc, RegisterClass RC,
+ X86MemOperand x86memop, PatFrag ld_frag,
+ string asm, Domain d,
+ bit IsReMaterializable = 1> {
let neverHasSideEffects = 1 in
-def FsMOVAPSrr : PSI<0x28, MRMSrcReg, (outs FR32:$dst), (ins FR32:$src),
- "movaps\t{$src, $dst|$dst, $src}", []>;
-
-// Alias instruction to load FR32 from f128mem using movaps. Upper bits are
-// disregarded.
-let canFoldAsLoad = 1, isReMaterializable = 1 in
-def FsMOVAPSrm : PSI<0x28, MRMSrcMem, (outs FR32:$dst), (ins f128mem:$src),
- "movaps\t{$src, $dst|$dst, $src}",
- [(set FR32:$dst, (alignedloadfsf32 addr:$src))]>;
-
-// Alias bitwise logical operations using SSE logical ops on packed FP values.
-let Constraints = "$src1 = $dst" in {
-let isCommutable = 1 in {
- def FsANDPSrr : PSI<0x54, MRMSrcReg, (outs FR32:$dst),
- (ins FR32:$src1, FR32:$src2),
- "andps\t{$src2, $dst|$dst, $src2}",
- [(set FR32:$dst, (X86fand FR32:$src1, FR32:$src2))]>;
- def FsORPSrr : PSI<0x56, MRMSrcReg, (outs FR32:$dst),
- (ins FR32:$src1, FR32:$src2),
- "orps\t{$src2, $dst|$dst, $src2}",
- [(set FR32:$dst, (X86for FR32:$src1, FR32:$src2))]>;
- def FsXORPSrr : PSI<0x57, MRMSrcReg, (outs FR32:$dst),
- (ins FR32:$src1, FR32:$src2),
- "xorps\t{$src2, $dst|$dst, $src2}",
- [(set FR32:$dst, (X86fxor FR32:$src1, FR32:$src2))]>;
-}
-
-def FsANDPSrm : PSI<0x54, MRMSrcMem, (outs FR32:$dst),
- (ins FR32:$src1, f128mem:$src2),
- "andps\t{$src2, $dst|$dst, $src2}",
- [(set FR32:$dst, (X86fand FR32:$src1,
- (memopfsf32 addr:$src2)))]>;
-def FsORPSrm : PSI<0x56, MRMSrcMem, (outs FR32:$dst),
- (ins FR32:$src1, f128mem:$src2),
- "orps\t{$src2, $dst|$dst, $src2}",
- [(set FR32:$dst, (X86for FR32:$src1,
- (memopfsf32 addr:$src2)))]>;
-def FsXORPSrm : PSI<0x57, MRMSrcMem, (outs FR32:$dst),
- (ins FR32:$src1, f128mem:$src2),
- "xorps\t{$src2, $dst|$dst, $src2}",
- [(set FR32:$dst, (X86fxor FR32:$src1,
- (memopfsf32 addr:$src2)))]>;
-
-let neverHasSideEffects = 1 in {
-def FsANDNPSrr : PSI<0x55, MRMSrcReg,
- (outs FR32:$dst), (ins FR32:$src1, FR32:$src2),
- "andnps\t{$src2, $dst|$dst, $src2}", []>;
-let mayLoad = 1 in
-def FsANDNPSrm : PSI<0x55, MRMSrcMem,
- (outs FR32:$dst), (ins FR32:$src1, f128mem:$src2),
- "andnps\t{$src2, $dst|$dst, $src2}", []>;
-}
-}
-
-/// basic_sse1_fp_binop_rm - SSE1 binops come in both scalar and vector forms.
-///
-/// In addition, we also have a special variant of the scalar form here to
-/// represent the associated intrinsic operation. This form is unlike the
-/// plain scalar form, in that it takes an entire vector (instead of a scalar)
-/// and leaves the top elements unmodified (therefore these cannot be commuted).
-///
-/// These three forms can each be reg+reg or reg+mem, so there are a total of
-/// six "instructions".
-///
-let Constraints = "$src1 = $dst" in {
-multiclass basic_sse1_fp_binop_rm<bits<8> opc, string OpcodeStr,
- SDNode OpNode, Intrinsic F32Int,
- bit Commutable = 0> {
- // Scalar operation, reg+reg.
- def SSrr : SSI<opc, MRMSrcReg, (outs FR32:$dst), (ins FR32:$src1, FR32:$src2),
- !strconcat(OpcodeStr, "ss\t{$src2, $dst|$dst, $src2}"),
- [(set FR32:$dst, (OpNode FR32:$src1, FR32:$src2))]> {
- let isCommutable = Commutable;
- }
-
- // Scalar operation, reg+mem.
- def SSrm : SSI<opc, MRMSrcMem, (outs FR32:$dst),
- (ins FR32:$src1, f32mem:$src2),
- !strconcat(OpcodeStr, "ss\t{$src2, $dst|$dst, $src2}"),
- [(set FR32:$dst, (OpNode FR32:$src1, (load addr:$src2)))]>;
-
- // Vector operation, reg+reg.
- def PSrr : PSI<opc, MRMSrcReg, (outs VR128:$dst),
- (ins VR128:$src1, VR128:$src2),
- !strconcat(OpcodeStr, "ps\t{$src2, $dst|$dst, $src2}"),
- [(set VR128:$dst, (v4f32 (OpNode VR128:$src1, VR128:$src2)))]> {
- let isCommutable = Commutable;
- }
-
- // Vector operation, reg+mem.
- def PSrm : PSI<opc, MRMSrcMem, (outs VR128:$dst),
- (ins VR128:$src1, f128mem:$src2),
- !strconcat(OpcodeStr, "ps\t{$src2, $dst|$dst, $src2}"),
- [(set VR128:$dst, (OpNode VR128:$src1, (memopv4f32 addr:$src2)))]>;
-
- // Intrinsic operation, reg+reg.
- def SSrr_Int : SSI<opc, MRMSrcReg, (outs VR128:$dst),
- (ins VR128:$src1, VR128:$src2),
- !strconcat(OpcodeStr, "ss\t{$src2, $dst|$dst, $src2}"),
- [(set VR128:$dst, (F32Int VR128:$src1, VR128:$src2))]>;
-
- // Intrinsic operation, reg+mem.
- def SSrm_Int : SSI<opc, MRMSrcMem, (outs VR128:$dst),
- (ins VR128:$src1, ssmem:$src2),
- !strconcat(OpcodeStr, "ss\t{$src2, $dst|$dst, $src2}"),
- [(set VR128:$dst, (F32Int VR128:$src1,
- sse_load_f32:$src2))]>;
+ def rr : PI<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src),
+ !strconcat(asm, "\t{$src, $dst|$dst, $src}"), [], d>;
+let canFoldAsLoad = 1, isReMaterializable = IsReMaterializable in
+ def rm : PI<opc, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src),
+ !strconcat(asm, "\t{$src, $dst|$dst, $src}"),
+ [(set RC:$dst, (ld_frag addr:$src))], d>;
}
-}
-
-// Arithmetic instructions
-defm ADD : basic_sse1_fp_binop_rm<0x58, "add", fadd, int_x86_sse_add_ss, 1>;
-defm MUL : basic_sse1_fp_binop_rm<0x59, "mul", fmul, int_x86_sse_mul_ss, 1>;
-defm SUB : basic_sse1_fp_binop_rm<0x5C, "sub", fsub, int_x86_sse_sub_ss>;
-defm DIV : basic_sse1_fp_binop_rm<0x5E, "div", fdiv, int_x86_sse_div_ss>;
-/// sse1_fp_binop_rm - Other SSE1 binops
-///
-/// This multiclass is like basic_sse1_fp_binop_rm, with the addition of
-/// instructions for a full-vector intrinsic form. Operations that map
-/// onto C operators don't use this form since they just use the plain
-/// vector form instead of having a separate vector intrinsic form.
-///
-/// This provides a total of eight "instructions".
-///
-let Constraints = "$src1 = $dst" in {
-multiclass sse1_fp_binop_rm<bits<8> opc, string OpcodeStr,
- SDNode OpNode,
- Intrinsic F32Int,
- Intrinsic V4F32Int,
- bit Commutable = 0> {
-
- // Scalar operation, reg+reg.
- def SSrr : SSI<opc, MRMSrcReg, (outs FR32:$dst), (ins FR32:$src1, FR32:$src2),
- !strconcat(OpcodeStr, "ss\t{$src2, $dst|$dst, $src2}"),
- [(set FR32:$dst, (OpNode FR32:$src1, FR32:$src2))]> {
- let isCommutable = Commutable;
- }
-
- // Scalar operation, reg+mem.
- def SSrm : SSI<opc, MRMSrcMem, (outs FR32:$dst),
- (ins FR32:$src1, f32mem:$src2),
- !strconcat(OpcodeStr, "ss\t{$src2, $dst|$dst, $src2}"),
- [(set FR32:$dst, (OpNode FR32:$src1, (load addr:$src2)))]>;
-
- // Vector operation, reg+reg.
- def PSrr : PSI<opc, MRMSrcReg, (outs VR128:$dst),
- (ins VR128:$src1, VR128:$src2),
- !strconcat(OpcodeStr, "ps\t{$src2, $dst|$dst, $src2}"),
- [(set VR128:$dst, (v4f32 (OpNode VR128:$src1, VR128:$src2)))]> {
- let isCommutable = Commutable;
- }
-
- // Vector operation, reg+mem.
- def PSrm : PSI<opc, MRMSrcMem, (outs VR128:$dst),
- (ins VR128:$src1, f128mem:$src2),
- !strconcat(OpcodeStr, "ps\t{$src2, $dst|$dst, $src2}"),
- [(set VR128:$dst, (OpNode VR128:$src1, (memopv4f32 addr:$src2)))]>;
-
- // Intrinsic operation, reg+reg.
- def SSrr_Int : SSI<opc, MRMSrcReg, (outs VR128:$dst),
- (ins VR128:$src1, VR128:$src2),
- !strconcat(OpcodeStr, "ss\t{$src2, $dst|$dst, $src2}"),
- [(set VR128:$dst, (F32Int VR128:$src1, VR128:$src2))]> {
- let isCommutable = Commutable;
- }
-
- // Intrinsic operation, reg+mem.
- def SSrm_Int : SSI<opc, MRMSrcMem, (outs VR128:$dst),
- (ins VR128:$src1, ssmem:$src2),
- !strconcat(OpcodeStr, "ss\t{$src2, $dst|$dst, $src2}"),
- [(set VR128:$dst, (F32Int VR128:$src1,
- sse_load_f32:$src2))]>;
-
- // Vector intrinsic operation, reg+reg.
- def PSrr_Int : PSI<opc, MRMSrcReg, (outs VR128:$dst),
- (ins VR128:$src1, VR128:$src2),
- !strconcat(OpcodeStr, "ps\t{$src2, $dst|$dst, $src2}"),
- [(set VR128:$dst, (V4F32Int VR128:$src1, VR128:$src2))]> {
- let isCommutable = Commutable;
- }
-
- // Vector intrinsic operation, reg+mem.
- def PSrm_Int : PSI<opc, MRMSrcMem, (outs VR128:$dst),
- (ins VR128:$src1, f128mem:$src2),
- !strconcat(OpcodeStr, "ps\t{$src2, $dst|$dst, $src2}"),
- [(set VR128:$dst, (V4F32Int VR128:$src1, (memopv4f32 addr:$src2)))]>;
-}
+let isAsmParserOnly = 1 in {
+defm VMOVAPS : sse12_mov_packed<0x28, VR128, f128mem, alignedloadv4f32,
+ "movaps", SSEPackedSingle>, VEX;
+defm VMOVAPD : sse12_mov_packed<0x28, VR128, f128mem, alignedloadv2f64,
+ "movapd", SSEPackedDouble>, OpSize, VEX;
+defm VMOVUPS : sse12_mov_packed<0x10, VR128, f128mem, loadv4f32,
+ "movups", SSEPackedSingle>, VEX;
+defm VMOVUPD : sse12_mov_packed<0x10, VR128, f128mem, loadv2f64,
+ "movupd", SSEPackedDouble, 0>, OpSize, VEX;
+
+defm VMOVAPSY : sse12_mov_packed<0x28, VR256, f256mem, alignedloadv8f32,
+ "movaps", SSEPackedSingle>, VEX;
+defm VMOVAPDY : sse12_mov_packed<0x28, VR256, f256mem, alignedloadv4f64,
+ "movapd", SSEPackedDouble>, OpSize, VEX;
+defm VMOVUPSY : sse12_mov_packed<0x10, VR256, f256mem, loadv8f32,
+ "movups", SSEPackedSingle>, VEX;
+defm VMOVUPDY : sse12_mov_packed<0x10, VR256, f256mem, loadv4f64,
+ "movupd", SSEPackedDouble, 0>, OpSize, VEX;
}
+defm MOVAPS : sse12_mov_packed<0x28, VR128, f128mem, alignedloadv4f32,
+ "movaps", SSEPackedSingle>, TB;
+defm MOVAPD : sse12_mov_packed<0x28, VR128, f128mem, alignedloadv2f64,
+ "movapd", SSEPackedDouble>, TB, OpSize;
+defm MOVUPS : sse12_mov_packed<0x10, VR128, f128mem, loadv4f32,
+ "movups", SSEPackedSingle>, TB;
+defm MOVUPD : sse12_mov_packed<0x10, VR128, f128mem, loadv2f64,
+ "movupd", SSEPackedDouble, 0>, TB, OpSize;
-defm MAX : sse1_fp_binop_rm<0x5F, "max", X86fmax,
- int_x86_sse_max_ss, int_x86_sse_max_ps>;
-defm MIN : sse1_fp_binop_rm<0x5D, "min", X86fmin,
- int_x86_sse_min_ss, int_x86_sse_min_ps>;
-
-//===----------------------------------------------------------------------===//
-// SSE packed FP Instructions
-
-// Move Instructions
-let neverHasSideEffects = 1 in
-def MOVAPSrr : PSI<0x28, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
- "movaps\t{$src, $dst|$dst, $src}", []>;
-let canFoldAsLoad = 1, isReMaterializable = 1 in
-def MOVAPSrm : PSI<0x28, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
+let isAsmParserOnly = 1 in {
+def VMOVAPSmr : VPSI<0x29, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
"movaps\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst, (alignedloadv4f32 addr:$src))]>;
-
+ [(alignedstore (v4f32 VR128:$src), addr:$dst)]>, VEX;
+def VMOVAPDmr : VPDI<0x29, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
+ "movapd\t{$src, $dst|$dst, $src}",
+ [(alignedstore (v2f64 VR128:$src), addr:$dst)]>, VEX;
+def VMOVUPSmr : VPSI<0x11, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
+ "movups\t{$src, $dst|$dst, $src}",
+ [(store (v4f32 VR128:$src), addr:$dst)]>, VEX;
+def VMOVUPDmr : VPDI<0x11, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
+ "movupd\t{$src, $dst|$dst, $src}",
+ [(store (v2f64 VR128:$src), addr:$dst)]>, VEX;
+def VMOVAPSYmr : VPSI<0x29, MRMDestMem, (outs), (ins f256mem:$dst, VR256:$src),
+ "movaps\t{$src, $dst|$dst, $src}",
+ [(alignedstore (v8f32 VR256:$src), addr:$dst)]>, VEX;
+def VMOVAPDYmr : VPDI<0x29, MRMDestMem, (outs), (ins f256mem:$dst, VR256:$src),
+ "movapd\t{$src, $dst|$dst, $src}",
+ [(alignedstore (v4f64 VR256:$src), addr:$dst)]>, VEX;
+def VMOVUPSYmr : VPSI<0x11, MRMDestMem, (outs), (ins f256mem:$dst, VR256:$src),
+ "movups\t{$src, $dst|$dst, $src}",
+ [(store (v8f32 VR256:$src), addr:$dst)]>, VEX;
+def VMOVUPDYmr : VPDI<0x11, MRMDestMem, (outs), (ins f256mem:$dst, VR256:$src),
+ "movupd\t{$src, $dst|$dst, $src}",
+ [(store (v4f64 VR256:$src), addr:$dst)]>, VEX;
+}
def MOVAPSmr : PSI<0x29, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
"movaps\t{$src, $dst|$dst, $src}",
[(alignedstore (v4f32 VR128:$src), addr:$dst)]>;
-
-let neverHasSideEffects = 1 in
-def MOVUPSrr : PSI<0x10, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
- "movups\t{$src, $dst|$dst, $src}", []>;
-let canFoldAsLoad = 1, isReMaterializable = 1 in
-def MOVUPSrm : PSI<0x10, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
- "movups\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst, (loadv4f32 addr:$src))]>;
+def MOVAPDmr : PDI<0x29, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
+ "movapd\t{$src, $dst|$dst, $src}",
+ [(alignedstore (v2f64 VR128:$src), addr:$dst)]>;
def MOVUPSmr : PSI<0x11, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
"movups\t{$src, $dst|$dst, $src}",
[(store (v4f32 VR128:$src), addr:$dst)]>;
+def MOVUPDmr : PDI<0x11, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
+ "movupd\t{$src, $dst|$dst, $src}",
+ [(store (v2f64 VR128:$src), addr:$dst)]>;
-// Intrinsic forms of MOVUPS load and store
+// Intrinsic forms of MOVUPS/D load and store
+let isAsmParserOnly = 1 in {
+ let canFoldAsLoad = 1, isReMaterializable = 1 in
+ def VMOVUPSrm_Int : VPSI<0x10, MRMSrcMem, (outs VR128:$dst),
+ (ins f128mem:$src),
+ "movups\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (int_x86_sse_loadu_ps addr:$src))]>, VEX;
+ def VMOVUPDrm_Int : VPDI<0x10, MRMSrcMem, (outs VR128:$dst),
+ (ins f128mem:$src),
+ "movupd\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (int_x86_sse2_loadu_pd addr:$src))]>, VEX;
+ def VMOVUPSmr_Int : VPSI<0x11, MRMDestMem, (outs),
+ (ins f128mem:$dst, VR128:$src),
+ "movups\t{$src, $dst|$dst, $src}",
+ [(int_x86_sse_storeu_ps addr:$dst, VR128:$src)]>, VEX;
+ def VMOVUPDmr_Int : VPDI<0x11, MRMDestMem, (outs),
+ (ins f128mem:$dst, VR128:$src),
+ "movupd\t{$src, $dst|$dst, $src}",
+ [(int_x86_sse2_storeu_pd addr:$dst, VR128:$src)]>, VEX;
+}
let canFoldAsLoad = 1, isReMaterializable = 1 in
def MOVUPSrm_Int : PSI<0x10, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
"movups\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (int_x86_sse_loadu_ps addr:$src))]>;
+def MOVUPDrm_Int : PDI<0x10, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
+ "movupd\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (int_x86_sse2_loadu_pd addr:$src))]>;
+
def MOVUPSmr_Int : PSI<0x11, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
"movups\t{$src, $dst|$dst, $src}",
[(int_x86_sse_storeu_ps addr:$dst, VR128:$src)]>;
+def MOVUPDmr_Int : PDI<0x11, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
+ "movupd\t{$src, $dst|$dst, $src}",
+ [(int_x86_sse2_storeu_pd addr:$dst, VR128:$src)]>;
-let Constraints = "$src1 = $dst" in {
- let AddedComplexity = 20 in {
- def MOVLPSrm : PSI<0x12, MRMSrcMem,
- (outs VR128:$dst), (ins VR128:$src1, f64mem:$src2),
- "movlps\t{$src2, $dst|$dst, $src2}",
- [(set VR128:$dst,
- (movlp VR128:$src1,
- (bc_v4f32 (v2f64 (scalar_to_vector (loadf64 addr:$src2))))))]>;
- def MOVHPSrm : PSI<0x16, MRMSrcMem,
- (outs VR128:$dst), (ins VR128:$src1, f64mem:$src2),
- "movhps\t{$src2, $dst|$dst, $src2}",
- [(set VR128:$dst,
- (movlhps VR128:$src1,
- (bc_v4f32 (v2f64 (scalar_to_vector (loadf64 addr:$src2))))))]>;
- } // AddedComplexity
-} // Constraints = "$src1 = $dst"
-
+// Move Low/High packed floating point values
+multiclass sse12_mov_hilo_packed<bits<8>opc, RegisterClass RC,
+ PatFrag mov_frag, string base_opc,
+ string asm_opr> {
+ def PSrm : PI<opc, MRMSrcMem,
+ (outs VR128:$dst), (ins VR128:$src1, f64mem:$src2),
+ !strconcat(!strconcat(base_opc,"s"), asm_opr),
+ [(set RC:$dst,
+ (mov_frag RC:$src1,
+ (bc_v4f32 (v2f64 (scalar_to_vector (loadf64 addr:$src2))))))],
+ SSEPackedSingle>, TB;
+
+ def PDrm : PI<opc, MRMSrcMem,
+ (outs RC:$dst), (ins RC:$src1, f64mem:$src2),
+ !strconcat(!strconcat(base_opc,"d"), asm_opr),
+ [(set RC:$dst, (v2f64 (mov_frag RC:$src1,
+ (scalar_to_vector (loadf64 addr:$src2)))))],
+ SSEPackedDouble>, TB, OpSize;
+}
-def : Pat<(movlhps VR128:$src1, (bc_v4i32 (v2i64 (X86vzload addr:$src2)))),
- (MOVHPSrm (v4i32 VR128:$src1), addr:$src2)>;
+let isAsmParserOnly = 1, AddedComplexity = 20 in {
+ defm VMOVL : sse12_mov_hilo_packed<0x12, VR128, movlp, "movlp",
+ "\t{$src2, $src1, $dst|$dst, $src1, $src2}">, VEX_4V;
+ defm VMOVH : sse12_mov_hilo_packed<0x16, VR128, movlhps, "movhp",
+ "\t{$src2, $src1, $dst|$dst, $src1, $src2}">, VEX_4V;
+}
+let Constraints = "$src1 = $dst", AddedComplexity = 20 in {
+ defm MOVL : sse12_mov_hilo_packed<0x12, VR128, movlp, "movlp",
+ "\t{$src2, $dst|$dst, $src2}">;
+ defm MOVH : sse12_mov_hilo_packed<0x16, VR128, movlhps, "movhp",
+ "\t{$src2, $dst|$dst, $src2}">;
+}
+let isAsmParserOnly = 1 in {
+def VMOVLPSmr : VPSI<0x13, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
+ "movlps\t{$src, $dst|$dst, $src}",
+ [(store (f64 (vector_extract (bc_v2f64 (v4f32 VR128:$src)),
+ (iPTR 0))), addr:$dst)]>, VEX;
+def VMOVLPDmr : VPDI<0x13, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
+ "movlpd\t{$src, $dst|$dst, $src}",
+ [(store (f64 (vector_extract (v2f64 VR128:$src),
+ (iPTR 0))), addr:$dst)]>, VEX;
+}
def MOVLPSmr : PSI<0x13, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
"movlps\t{$src, $dst|$dst, $src}",
[(store (f64 (vector_extract (bc_v2f64 (v4f32 VR128:$src)),
(iPTR 0))), addr:$dst)]>;
+def MOVLPDmr : PDI<0x13, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
+ "movlpd\t{$src, $dst|$dst, $src}",
+ [(store (f64 (vector_extract (v2f64 VR128:$src),
+ (iPTR 0))), addr:$dst)]>;
// v2f64 extract element 1 is always custom lowered to unpack high to low
// and extract element 0 so the non-store version isn't too horrible.
+let isAsmParserOnly = 1 in {
+def VMOVHPSmr : VPSI<0x17, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
+ "movhps\t{$src, $dst|$dst, $src}",
+ [(store (f64 (vector_extract
+ (unpckh (bc_v2f64 (v4f32 VR128:$src)),
+ (undef)), (iPTR 0))), addr:$dst)]>,
+ VEX;
+def VMOVHPDmr : VPDI<0x17, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
+ "movhpd\t{$src, $dst|$dst, $src}",
+ [(store (f64 (vector_extract
+ (v2f64 (unpckh VR128:$src, (undef))),
+ (iPTR 0))), addr:$dst)]>,
+ VEX;
+}
def MOVHPSmr : PSI<0x17, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
"movhps\t{$src, $dst|$dst, $src}",
[(store (f64 (vector_extract
(unpckh (bc_v2f64 (v4f32 VR128:$src)),
(undef)), (iPTR 0))), addr:$dst)]>;
+def MOVHPDmr : PDI<0x17, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
+ "movhpd\t{$src, $dst|$dst, $src}",
+ [(store (f64 (vector_extract
+ (v2f64 (unpckh VR128:$src, (undef))),
+ (iPTR 0))), addr:$dst)]>;
-let Constraints = "$src1 = $dst" in {
-let AddedComplexity = 20 in {
-def MOVLHPSrr : PSI<0x16, MRMSrcReg, (outs VR128:$dst),
- (ins VR128:$src1, VR128:$src2),
- "movlhps\t{$src2, $dst|$dst, $src2}",
- [(set VR128:$dst,
- (v4f32 (movlhps VR128:$src1, VR128:$src2)))]>;
-
-def MOVHLPSrr : PSI<0x12, MRMSrcReg, (outs VR128:$dst),
- (ins VR128:$src1, VR128:$src2),
- "movhlps\t{$src2, $dst|$dst, $src2}",
- [(set VR128:$dst,
- (v4f32 (movhlps VR128:$src1, VR128:$src2)))]>;
-} // AddedComplexity
-} // Constraints = "$src1 = $dst"
+let isAsmParserOnly = 1, AddedComplexity = 20 in {
+ def VMOVLHPSrr : VPSI<0x16, MRMSrcReg, (outs VR128:$dst),
+ (ins VR128:$src1, VR128:$src2),
+ "movlhps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ [(set VR128:$dst,
+ (v4f32 (movlhps VR128:$src1, VR128:$src2)))]>,
+ VEX_4V;
+ def VMOVHLPSrr : VPSI<0x12, MRMSrcReg, (outs VR128:$dst),
+ (ins VR128:$src1, VR128:$src2),
+ "movhlps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ [(set VR128:$dst,
+ (v4f32 (movhlps VR128:$src1, VR128:$src2)))]>,
+ VEX_4V;
+}
+let Constraints = "$src1 = $dst", AddedComplexity = 20 in {
+ def MOVLHPSrr : PSI<0x16, MRMSrcReg, (outs VR128:$dst),
+ (ins VR128:$src1, VR128:$src2),
+ "movlhps\t{$src2, $dst|$dst, $src2}",
+ [(set VR128:$dst,
+ (v4f32 (movlhps VR128:$src1, VR128:$src2)))]>;
+ def MOVHLPSrr : PSI<0x12, MRMSrcReg, (outs VR128:$dst),
+ (ins VR128:$src1, VR128:$src2),
+ "movhlps\t{$src2, $dst|$dst, $src2}",
+ [(set VR128:$dst,
+ (v4f32 (movhlps VR128:$src1, VR128:$src2)))]>;
+}
+def : Pat<(movlhps VR128:$src1, (bc_v4i32 (v2i64 (X86vzload addr:$src2)))),
+ (MOVHPSrm (v4i32 VR128:$src1), addr:$src2)>;
let AddedComplexity = 20 in {
-def : Pat<(v4f32 (movddup VR128:$src, (undef))),
- (MOVLHPSrr (v4f32 VR128:$src), (v4f32 VR128:$src))>;
-def : Pat<(v2i64 (movddup VR128:$src, (undef))),
- (MOVLHPSrr (v2i64 VR128:$src), (v2i64 VR128:$src))>;
+ def : Pat<(v4f32 (movddup VR128:$src, (undef))),
+ (MOVLHPSrr (v4f32 VR128:$src), (v4f32 VR128:$src))>;
+ def : Pat<(v2i64 (movddup VR128:$src, (undef))),
+ (MOVLHPSrr (v2i64 VR128:$src), (v2i64 VR128:$src))>;
}
+//===----------------------------------------------------------------------===//
+// SSE 1 & 2 - Conversion Instructions
+//===----------------------------------------------------------------------===//
+multiclass sse12_cvt_s<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC,
+ SDNode OpNode, X86MemOperand x86memop, PatFrag ld_frag,
+ string asm> {
+ def rr : SI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src), asm,
+ [(set DstRC:$dst, (OpNode SrcRC:$src))]>;
+ def rm : SI<opc, MRMSrcMem, (outs DstRC:$dst), (ins x86memop:$src), asm,
+ [(set DstRC:$dst, (OpNode (ld_frag addr:$src)))]>;
+}
-// Arithmetic
-
-/// sse1_fp_unop_rm - SSE1 unops come in both scalar and vector forms.
-///
-/// In addition, we also have a special variant of the scalar form here to
-/// represent the associated intrinsic operation. This form is unlike the
-/// plain scalar form, in that it takes an entire vector (instead of a
-/// scalar) and leaves the top elements undefined.
-///
-/// And, we have a special variant form for a full-vector intrinsic form.
-///
-/// These four forms can each have a reg or a mem operand, so there are a
-/// total of eight "instructions".
-///
-multiclass sse1_fp_unop_rm<bits<8> opc, string OpcodeStr,
- SDNode OpNode,
- Intrinsic F32Int,
- Intrinsic V4F32Int,
- bit Commutable = 0> {
- // Scalar operation, reg.
- def SSr : SSI<opc, MRMSrcReg, (outs FR32:$dst), (ins FR32:$src),
- !strconcat(OpcodeStr, "ss\t{$src, $dst|$dst, $src}"),
- [(set FR32:$dst, (OpNode FR32:$src))]> {
- let isCommutable = Commutable;
- }
-
- // Scalar operation, mem.
- def SSm : I<opc, MRMSrcMem, (outs FR32:$dst), (ins f32mem:$src),
- !strconcat(OpcodeStr, "ss\t{$src, $dst|$dst, $src}"),
- [(set FR32:$dst, (OpNode (load addr:$src)))]>, XS,
- Requires<[HasSSE1, OptForSize]>;
+multiclass sse12_cvt_p<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC,
+ SDNode OpNode, X86MemOperand x86memop, PatFrag ld_frag,
+ string asm, Domain d> {
+ def rr : PI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src), asm,
+ [(set DstRC:$dst, (OpNode SrcRC:$src))], d>;
+ def rm : PI<opc, MRMSrcMem, (outs DstRC:$dst), (ins x86memop:$src), asm,
+ [(set DstRC:$dst, (OpNode (ld_frag addr:$src)))], d>;
+}
- // Vector operation, reg.
- def PSr : PSI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
- !strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"),
- [(set VR128:$dst, (v4f32 (OpNode VR128:$src)))]> {
- let isCommutable = Commutable;
- }
+multiclass sse12_vcvt_avx<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC,
+ SDNode OpNode, X86MemOperand x86memop, PatFrag ld_frag,
+ string asm> {
+ def rr : SI<opc, MRMSrcReg, (outs DstRC:$dst), (ins DstRC:$src1, SrcRC:$src),
+ asm, []>;
+ def rm : SI<opc, MRMSrcMem, (outs DstRC:$dst),
+ (ins DstRC:$src1, x86memop:$src), asm, []>;
+}
- // Vector operation, mem.
- def PSm : PSI<opc, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
- !strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"),
- [(set VR128:$dst, (OpNode (memopv4f32 addr:$src)))]>;
+let isAsmParserOnly = 1 in {
+defm VCVTTSS2SI : sse12_cvt_s<0x2C, FR32, GR32, fp_to_sint, f32mem, loadf32,
+ "cvttss2si\t{$src, $dst|$dst, $src}">, XS, VEX;
+defm VCVTTSD2SI : sse12_cvt_s<0x2C, FR64, GR32, fp_to_sint, f64mem, loadf64,
+ "cvttsd2si\t{$src, $dst|$dst, $src}">, XD, VEX;
+defm VCVTSI2SS : sse12_vcvt_avx<0x2A, GR32, FR32, sint_to_fp, i32mem, loadi32,
+ "cvtsi2ss\t{$src, $src1, $dst|$dst, $src1, $src}">, XS,
+ VEX_4V;
+defm VCVTSI2SD : sse12_vcvt_avx<0x2A, GR32, FR64, sint_to_fp, i32mem, loadi32,
+ "cvtsi2sd\t{$src, $src1, $dst|$dst, $src1, $src}">, XD,
+ VEX_4V;
+}
- // Intrinsic operation, reg.
- def SSr_Int : SSI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
- !strconcat(OpcodeStr, "ss\t{$src, $dst|$dst, $src}"),
- [(set VR128:$dst, (F32Int VR128:$src))]> {
- let isCommutable = Commutable;
- }
+defm CVTTSS2SI : sse12_cvt_s<0x2C, FR32, GR32, fp_to_sint, f32mem, loadf32,
+ "cvttss2si\t{$src, $dst|$dst, $src}">, XS;
+defm CVTTSD2SI : sse12_cvt_s<0x2C, FR64, GR32, fp_to_sint, f64mem, loadf64,
+ "cvttsd2si\t{$src, $dst|$dst, $src}">, XD;
+defm CVTSI2SS : sse12_cvt_s<0x2A, GR32, FR32, sint_to_fp, i32mem, loadi32,
+ "cvtsi2ss\t{$src, $dst|$dst, $src}">, XS;
+defm CVTSI2SD : sse12_cvt_s<0x2A, GR32, FR64, sint_to_fp, i32mem, loadi32,
+ "cvtsi2sd\t{$src, $dst|$dst, $src}">, XD;
+
+// Conversion Instructions Intrinsics - Match intrinsics which expect MM
+// and/or XMM operand(s).
+multiclass sse12_cvt_pint<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC,
+ Intrinsic Int, X86MemOperand x86memop, PatFrag ld_frag,
+ string asm, Domain d> {
+ def rr : PI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src), asm,
+ [(set DstRC:$dst, (Int SrcRC:$src))], d>;
+ def rm : PI<opc, MRMSrcMem, (outs DstRC:$dst), (ins x86memop:$src), asm,
+ [(set DstRC:$dst, (Int (ld_frag addr:$src)))], d>;
+}
- // Intrinsic operation, mem.
- def SSm_Int : SSI<opc, MRMSrcMem, (outs VR128:$dst), (ins ssmem:$src),
- !strconcat(OpcodeStr, "ss\t{$src, $dst|$dst, $src}"),
- [(set VR128:$dst, (F32Int sse_load_f32:$src))]>;
+multiclass sse12_cvt_sint<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC,
+ Intrinsic Int, X86MemOperand x86memop, PatFrag ld_frag,
+ string asm> {
+ def rr : SI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src), asm,
+ [(set DstRC:$dst, (Int SrcRC:$src))]>;
+ def rm : SI<opc, MRMSrcMem, (outs DstRC:$dst), (ins x86memop:$src), asm,
+ [(set DstRC:$dst, (Int (ld_frag addr:$src)))]>;
+}
- // Vector intrinsic operation, reg
- def PSr_Int : PSI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
- !strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"),
- [(set VR128:$dst, (V4F32Int VR128:$src))]> {
- let isCommutable = Commutable;
- }
+multiclass sse12_cvt_pint_3addr<bits<8> opc, RegisterClass SrcRC,
+ RegisterClass DstRC, Intrinsic Int, X86MemOperand x86memop,
+ PatFrag ld_frag, string asm, Domain d> {
+ def rr : PI<opc, MRMSrcReg, (outs DstRC:$dst), (ins DstRC:$src1, SrcRC:$src2),
+ asm, [(set DstRC:$dst, (Int DstRC:$src1, SrcRC:$src2))], d>;
+ def rm : PI<opc, MRMSrcMem, (outs DstRC:$dst),
+ (ins DstRC:$src1, x86memop:$src2), asm,
+ [(set DstRC:$dst, (Int DstRC:$src1, (ld_frag addr:$src2)))], d>;
+}
- // Vector intrinsic operation, mem
- def PSm_Int : PSI<opc, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
- !strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"),
- [(set VR128:$dst, (V4F32Int (memopv4f32 addr:$src)))]>;
+multiclass sse12_cvt_sint_3addr<bits<8> opc, RegisterClass SrcRC,
+ RegisterClass DstRC, Intrinsic Int, X86MemOperand x86memop,
+ PatFrag ld_frag, string asm> {
+ def rr : SI<opc, MRMSrcReg, (outs DstRC:$dst), (ins DstRC:$src1, SrcRC:$src2),
+ asm, [(set DstRC:$dst, (Int DstRC:$src1, SrcRC:$src2))]>;
+ def rm : SI<opc, MRMSrcMem, (outs DstRC:$dst),
+ (ins DstRC:$src1, x86memop:$src2), asm,
+ [(set DstRC:$dst, (Int DstRC:$src1, (ld_frag addr:$src2)))]>;
}
-// Square root.
-defm SQRT : sse1_fp_unop_rm<0x51, "sqrt", fsqrt,
- int_x86_sse_sqrt_ss, int_x86_sse_sqrt_ps>;
+let isAsmParserOnly = 1 in {
+ defm Int_VCVTSS2SI : sse12_cvt_sint<0x2D, VR128, GR32, int_x86_sse_cvtss2si,
+ f32mem, load, "cvtss2si\t{$src, $dst|$dst, $src}">, XS,
+ VEX;
+ defm Int_VCVTSD2SI : sse12_cvt_sint<0x2D, VR128, GR32, int_x86_sse2_cvtsd2si,
+ f128mem, load, "cvtsd2si\t{$src, $dst|$dst, $src}">, XD,
+ VEX;
+}
+defm Int_CVTSS2SI : sse12_cvt_sint<0x2D, VR128, GR32, int_x86_sse_cvtss2si,
+ f32mem, load, "cvtss2si\t{$src, $dst|$dst, $src}">, XS;
+defm Int_CVTSD2SI : sse12_cvt_sint<0x2D, VR128, GR32, int_x86_sse2_cvtsd2si,
+ f128mem, load, "cvtsd2si\t{$src, $dst|$dst, $src}">, XD;
-// Reciprocal approximations. Note that these typically require refinement
-// in order to obtain suitable precision.
-defm RSQRT : sse1_fp_unop_rm<0x52, "rsqrt", X86frsqrt,
- int_x86_sse_rsqrt_ss, int_x86_sse_rsqrt_ps>;
-defm RCP : sse1_fp_unop_rm<0x53, "rcp", X86frcp,
- int_x86_sse_rcp_ss, int_x86_sse_rcp_ps>;
-// Logical
let Constraints = "$src1 = $dst" in {
- let isCommutable = 1 in {
- def ANDPSrr : PSI<0x54, MRMSrcReg,
- (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
- "andps\t{$src2, $dst|$dst, $src2}",
- [(set VR128:$dst, (v2i64
- (and VR128:$src1, VR128:$src2)))]>;
- def ORPSrr : PSI<0x56, MRMSrcReg,
- (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
- "orps\t{$src2, $dst|$dst, $src2}",
- [(set VR128:$dst, (v2i64
- (or VR128:$src1, VR128:$src2)))]>;
- def XORPSrr : PSI<0x57, MRMSrcReg,
- (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
- "xorps\t{$src2, $dst|$dst, $src2}",
- [(set VR128:$dst, (v2i64
- (xor VR128:$src1, VR128:$src2)))]>;
- }
-
- def ANDPSrm : PSI<0x54, MRMSrcMem,
- (outs VR128:$dst), (ins VR128:$src1, f128mem:$src2),
- "andps\t{$src2, $dst|$dst, $src2}",
- [(set VR128:$dst, (and (bc_v2i64 (v4f32 VR128:$src1)),
- (memopv2i64 addr:$src2)))]>;
- def ORPSrm : PSI<0x56, MRMSrcMem,
- (outs VR128:$dst), (ins VR128:$src1, f128mem:$src2),
- "orps\t{$src2, $dst|$dst, $src2}",
- [(set VR128:$dst, (or (bc_v2i64 (v4f32 VR128:$src1)),
- (memopv2i64 addr:$src2)))]>;
- def XORPSrm : PSI<0x57, MRMSrcMem,
- (outs VR128:$dst), (ins VR128:$src1, f128mem:$src2),
- "xorps\t{$src2, $dst|$dst, $src2}",
- [(set VR128:$dst, (xor (bc_v2i64 (v4f32 VR128:$src1)),
- (memopv2i64 addr:$src2)))]>;
- def ANDNPSrr : PSI<0x55, MRMSrcReg,
- (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
- "andnps\t{$src2, $dst|$dst, $src2}",
- [(set VR128:$dst,
- (v2i64 (and (xor VR128:$src1,
- (bc_v2i64 (v4i32 immAllOnesV))),
- VR128:$src2)))]>;
- def ANDNPSrm : PSI<0x55, MRMSrcMem,
- (outs VR128:$dst), (ins VR128:$src1,f128mem:$src2),
- "andnps\t{$src2, $dst|$dst, $src2}",
- [(set VR128:$dst,
- (v2i64 (and (xor (bc_v2i64 (v4f32 VR128:$src1)),
- (bc_v2i64 (v4i32 immAllOnesV))),
- (memopv2i64 addr:$src2))))]>;
+ defm Int_CVTSI2SS : sse12_cvt_sint_3addr<0x2A, GR32, VR128,
+ int_x86_sse_cvtsi2ss, i32mem, loadi32,
+ "cvtsi2ss\t{$src2, $dst|$dst, $src2}">, XS;
+ defm Int_CVTSI2SD : sse12_cvt_sint_3addr<0x2A, GR32, VR128,
+ int_x86_sse2_cvtsi2sd, i32mem, loadi32,
+ "cvtsi2ss\t{$src2, $dst|$dst, $src2}">, XD;
}
+// Instructions below don't have an AVX form.
+defm Int_CVTPS2PI : sse12_cvt_pint<0x2D, VR128, VR64, int_x86_sse_cvtps2pi,
+ f64mem, load, "cvtps2pi\t{$src, $dst|$dst, $src}",
+ SSEPackedSingle>, TB;
+defm Int_CVTPD2PI : sse12_cvt_pint<0x2D, VR128, VR64, int_x86_sse_cvtpd2pi,
+ f128mem, memop, "cvtpd2pi\t{$src, $dst|$dst, $src}",
+ SSEPackedDouble>, TB, OpSize;
+defm Int_CVTTPS2PI : sse12_cvt_pint<0x2C, VR128, VR64, int_x86_sse_cvttps2pi,
+ f64mem, load, "cvttps2pi\t{$src, $dst|$dst, $src}",
+ SSEPackedSingle>, TB;
+defm Int_CVTTPD2PI : sse12_cvt_pint<0x2C, VR128, VR64, int_x86_sse_cvttpd2pi,
+ f128mem, memop, "cvttpd2pi\t{$src, $dst|$dst, $src}",
+ SSEPackedDouble>, TB, OpSize;
+defm Int_CVTPI2PD : sse12_cvt_pint<0x2A, VR64, VR128, int_x86_sse_cvtpi2pd,
+ i64mem, load, "cvtpi2pd\t{$src, $dst|$dst, $src}",
+ SSEPackedDouble>, TB, OpSize;
let Constraints = "$src1 = $dst" in {
- def CMPPSrri : PSIi8<0xC2, MRMSrcReg,
- (outs VR128:$dst), (ins VR128:$src1, VR128:$src, SSECC:$cc),
- "cmp${cc}ps\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst, (int_x86_sse_cmp_ps VR128:$src1,
- VR128:$src, imm:$cc))]>;
- def CMPPSrmi : PSIi8<0xC2, MRMSrcMem,
- (outs VR128:$dst), (ins VR128:$src1, f128mem:$src, SSECC:$cc),
- "cmp${cc}ps\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst, (int_x86_sse_cmp_ps VR128:$src1,
- (memop addr:$src), imm:$cc))]>;
-
- // Accept explicit immediate argument form instead of comparison code.
-let isAsmParserOnly = 1 in {
- def CMPPSrri_alt : PSIi8<0xC2, MRMSrcReg,
- (outs VR128:$dst), (ins VR128:$src1, VR128:$src, i8imm:$src2),
- "cmpps\t{$src2, $src, $dst|$dst, $src, $src}", []>;
- def CMPPSrmi_alt : PSIi8<0xC2, MRMSrcMem,
- (outs VR128:$dst), (ins VR128:$src1, f128mem:$src, i8imm:$src2),
- "cmpps\t{$src2, $src, $dst|$dst, $src, $src}", []>;
+ defm Int_CVTPI2PS : sse12_cvt_pint_3addr<0x2A, VR64, VR128,
+ int_x86_sse_cvtpi2ps,
+ i64mem, load, "cvtpi2ps\t{$src2, $dst|$dst, $src2}",
+ SSEPackedSingle>, TB;
}
-}
-def : Pat<(v4i32 (X86cmpps (v4f32 VR128:$src1), VR128:$src2, imm:$cc)),
- (CMPPSrri (v4f32 VR128:$src1), (v4f32 VR128:$src2), imm:$cc)>;
-def : Pat<(v4i32 (X86cmpps (v4f32 VR128:$src1), (memop addr:$src2), imm:$cc)),
- (CMPPSrmi (v4f32 VR128:$src1), addr:$src2, imm:$cc)>;
-
-// Shuffle and unpack instructions
-let Constraints = "$src1 = $dst" in {
- let isConvertibleToThreeAddress = 1 in // Convert to pshufd
- def SHUFPSrri : PSIi8<0xC6, MRMSrcReg,
- (outs VR128:$dst), (ins VR128:$src1,
- VR128:$src2, i8imm:$src3),
- "shufps\t{$src3, $src2, $dst|$dst, $src2, $src3}",
- [(set VR128:$dst,
- (v4f32 (shufp:$src3 VR128:$src1, VR128:$src2)))]>;
- def SHUFPSrmi : PSIi8<0xC6, MRMSrcMem,
- (outs VR128:$dst), (ins VR128:$src1,
- f128mem:$src2, i8imm:$src3),
- "shufps\t{$src3, $src2, $dst|$dst, $src2, $src3}",
- [(set VR128:$dst,
- (v4f32 (shufp:$src3
- VR128:$src1, (memopv4f32 addr:$src2))))]>;
-
- let AddedComplexity = 10 in {
- def UNPCKHPSrr : PSI<0x15, MRMSrcReg,
- (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
- "unpckhps\t{$src2, $dst|$dst, $src2}",
- [(set VR128:$dst,
- (v4f32 (unpckh VR128:$src1, VR128:$src2)))]>;
- def UNPCKHPSrm : PSI<0x15, MRMSrcMem,
- (outs VR128:$dst), (ins VR128:$src1, f128mem:$src2),
- "unpckhps\t{$src2, $dst|$dst, $src2}",
- [(set VR128:$dst,
- (v4f32 (unpckh VR128:$src1,
- (memopv4f32 addr:$src2))))]>;
-
- def UNPCKLPSrr : PSI<0x14, MRMSrcReg,
- (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
- "unpcklps\t{$src2, $dst|$dst, $src2}",
- [(set VR128:$dst,
- (v4f32 (unpckl VR128:$src1, VR128:$src2)))]>;
- def UNPCKLPSrm : PSI<0x14, MRMSrcMem,
- (outs VR128:$dst), (ins VR128:$src1, f128mem:$src2),
- "unpcklps\t{$src2, $dst|$dst, $src2}",
- [(set VR128:$dst,
- (unpckl VR128:$src1, (memopv4f32 addr:$src2)))]>;
- } // AddedComplexity
-} // Constraints = "$src1 = $dst"
-
-// Mask creation
-def MOVMSKPSrr : PSI<0x50, MRMSrcReg, (outs GR32:$dst), (ins VR128:$src),
- "movmskps\t{$src, $dst|$dst, $src}",
- [(set GR32:$dst, (int_x86_sse_movmsk_ps VR128:$src))]>;
-def MOVMSKPDrr : PDI<0x50, MRMSrcReg, (outs GR32:$dst), (ins VR128:$src),
- "movmskpd\t{$src, $dst|$dst, $src}",
- [(set GR32:$dst, (int_x86_sse2_movmsk_pd VR128:$src))]>;
-
-// Prefetch intrinsic.
-def PREFETCHT0 : PSI<0x18, MRM1m, (outs), (ins i8mem:$src),
- "prefetcht0\t$src", [(prefetch addr:$src, imm, (i32 3))]>;
-def PREFETCHT1 : PSI<0x18, MRM2m, (outs), (ins i8mem:$src),
- "prefetcht1\t$src", [(prefetch addr:$src, imm, (i32 2))]>;
-def PREFETCHT2 : PSI<0x18, MRM3m, (outs), (ins i8mem:$src),
- "prefetcht2\t$src", [(prefetch addr:$src, imm, (i32 1))]>;
-def PREFETCHNTA : PSI<0x18, MRM0m, (outs), (ins i8mem:$src),
- "prefetchnta\t$src", [(prefetch addr:$src, imm, (i32 0))]>;
-// Non-temporal stores
-def MOVNTPSmr_Int : PSI<0x2B, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src),
- "movntps\t{$src, $dst|$dst, $src}",
- [(int_x86_sse_movnt_ps addr:$dst, VR128:$src)]>;
+/// SSE 1 Only
-let AddedComplexity = 400 in { // Prefer non-temporal versions
-def MOVNTPSmr : PSI<0x2B, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
- "movntps\t{$src, $dst|$dst, $src}",
- [(alignednontemporalstore (v4f32 VR128:$src), addr:$dst)]>;
-
-def MOVNTDQ_64mr : PDI<0xE7, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
- "movntdq\t{$src, $dst|$dst, $src}",
- [(alignednontemporalstore (v2f64 VR128:$src), addr:$dst)]>;
-
-def MOVNTImr : I<0xC3, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src),
- "movnti\t{$src, $dst|$dst, $src}",
- [(nontemporalstore (i32 GR32:$src), addr:$dst)]>,
- TB, Requires<[HasSSE2]>;
-
-def MOVNTI_64mr : RI<0xC3, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src),
- "movnti\t{$src, $dst|$dst, $src}",
- [(nontemporalstore (i64 GR64:$src), addr:$dst)]>,
- TB, Requires<[HasSSE2]>;
+// Aliases for intrinsics
+let isAsmParserOnly = 1, Pattern = []<dag> in {
+defm Int_VCVTTSS2SI : sse12_cvt_sint_3addr<0x2C, VR128, GR32,
+ int_x86_sse_cvttss2si, f32mem, load,
+ "cvttss2si\t{$src2, $src1, $dst|$dst, $src1, $src2}">, XS;
+defm Int_VCVTTSD2SI : sse12_cvt_sint_3addr<0x2C, VR128, GR32,
+ int_x86_sse2_cvttsd2si, f128mem, load,
+ "cvttss2si\t{$src2, $src1, $dst|$dst, $src1, $src2}">, XD;
}
-
-// Load, store, and memory fence
-def SFENCE : I<0xAE, MRM_F8, (outs), (ins), "sfence", [(int_x86_sse_sfence)]>,
- TB, Requires<[HasSSE1]>;
-
-// MXCSR register
-def LDMXCSR : PSI<0xAE, MRM2m, (outs), (ins i32mem:$src),
- "ldmxcsr\t$src", [(int_x86_sse_ldmxcsr addr:$src)]>;
-def STMXCSR : PSI<0xAE, MRM3m, (outs), (ins i32mem:$dst),
- "stmxcsr\t$dst", [(int_x86_sse_stmxcsr addr:$dst)]>;
-
-// Alias instructions that map zero vector to pxor / xorp* for sse.
-// We set canFoldAsLoad because this can be converted to a constant-pool
-// load of an all-zeros value if folding it would be beneficial.
-// FIXME: Change encoding to pseudo!
-let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
- isCodeGenOnly = 1 in {
-def V_SET0PS : PSI<0x57, MRMInitReg, (outs VR128:$dst), (ins), "",
- [(set VR128:$dst, (v4f32 immAllZerosV))]>;
-def V_SET0PD : PDI<0x57, MRMInitReg, (outs VR128:$dst), (ins), "",
- [(set VR128:$dst, (v2f64 immAllZerosV))]>;
-let ExeDomain = SSEPackedInt in
-def V_SET0PI : PDI<0xEF, MRMInitReg, (outs VR128:$dst), (ins), "",
- [(set VR128:$dst, (v4i32 immAllZerosV))]>;
+defm Int_CVTTSS2SI : sse12_cvt_sint<0x2C, VR128, GR32, int_x86_sse_cvttss2si,
+ f32mem, load, "cvttss2si\t{$src, $dst|$dst, $src}">,
+ XS;
+defm Int_CVTTSD2SI : sse12_cvt_sint<0x2C, VR128, GR32, int_x86_sse2_cvttsd2si,
+ f128mem, load, "cvttss2si\t{$src, $dst|$dst, $src}">,
+ XD;
+
+let isAsmParserOnly = 1, Pattern = []<dag> in {
+defm VCVTSS2SI : sse12_cvt_s<0x2D, FR32, GR32, undef, f32mem, load,
+ "cvtss2si{l}\t{$src, $dst|$dst, $src}">, XS, VEX;
+defm VCVTDQ2PS : sse12_cvt_p<0x5B, VR128, VR128, undef, f128mem, load,
+ "cvtdq2ps\t{$src, $dst|$dst, $src}",
+ SSEPackedSingle>, TB, VEX;
}
-
-def : Pat<(v2i64 immAllZerosV), (V_SET0PI)>;
-def : Pat<(v8i16 immAllZerosV), (V_SET0PI)>;
-def : Pat<(v16i8 immAllZerosV), (V_SET0PI)>;
-
-def : Pat<(f32 (vector_extract (v4f32 VR128:$src), (iPTR 0))),
- (f32 (EXTRACT_SUBREG (v4f32 VR128:$src), sub_ss))>;
-
-//===---------------------------------------------------------------------===//
-// SSE2 Instructions
-//===---------------------------------------------------------------------===//
-
-// Move Instructions. Register-to-register movsd is not used for FR64
-// register copies because it's a partial register update; FsMOVAPDrr is
-// used instead. Register-to-register movsd is not modeled as an INSERT_SUBREG
-// because INSERT_SUBREG requires that the insert be implementable in terms of
-// a copy, and just mentioned, we don't use movsd for copies.
-let Constraints = "$src1 = $dst" in
-def MOVSDrr : SDI<0x10, MRMSrcReg,
- (outs VR128:$dst), (ins VR128:$src1, FR64:$src2),
- "movsd\t{$src2, $dst|$dst, $src2}",
- [(set (v2f64 VR128:$dst),
- (movl VR128:$src1, (scalar_to_vector FR64:$src2)))]>;
-
-// Extract the low 64-bit value from one vector and insert it into another.
-let AddedComplexity = 15 in
-def : Pat<(v2f64 (movl VR128:$src1, VR128:$src2)),
- (MOVSDrr (v2f64 VR128:$src1),
- (EXTRACT_SUBREG (v2f64 VR128:$src2), sub_sd))>;
-
-// Implicitly promote a 64-bit scalar to a vector.
-def : Pat<(v2f64 (scalar_to_vector FR64:$src)),
- (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), FR64:$src, sub_sd)>;
-
-// Loading from memory automatically zeroing upper bits.
-let canFoldAsLoad = 1, isReMaterializable = 1, AddedComplexity = 20 in
-def MOVSDrm : SDI<0x10, MRMSrcMem, (outs FR64:$dst), (ins f64mem:$src),
- "movsd\t{$src, $dst|$dst, $src}",
- [(set FR64:$dst, (loadf64 addr:$src))]>;
-
-// MOVSDrm zeros the high parts of the register; represent this
-// with SUBREG_TO_REG.
-let AddedComplexity = 20 in {
-def : Pat<(v2f64 (X86vzmovl (v2f64 (scalar_to_vector (loadf64 addr:$src))))),
- (SUBREG_TO_REG (i64 0), (MOVSDrm addr:$src), sub_sd)>;
-def : Pat<(v2f64 (scalar_to_vector (loadf64 addr:$src))),
- (SUBREG_TO_REG (i64 0), (MOVSDrm addr:$src), sub_sd)>;
-def : Pat<(v2f64 (X86vzmovl (loadv2f64 addr:$src))),
- (SUBREG_TO_REG (i64 0), (MOVSDrm addr:$src), sub_sd)>;
-def : Pat<(v2f64 (X86vzmovl (bc_v2f64 (loadv4f32 addr:$src)))),
- (SUBREG_TO_REG (i64 0), (MOVSDrm addr:$src), sub_sd)>;
-def : Pat<(v2f64 (X86vzload addr:$src)),
- (SUBREG_TO_REG (i64 0), (MOVSDrm addr:$src), sub_sd)>;
+let Pattern = []<dag> in {
+defm CVTSS2SI : sse12_cvt_s<0x2D, FR32, GR32, undef, f32mem, load /*dummy*/,
+ "cvtss2si{l}\t{$src, $dst|$dst, $src}">, XS;
+defm CVTDQ2PS : sse12_cvt_p<0x5B, VR128, VR128, undef, f128mem, load /*dummy*/,
+ "cvtdq2ps\t{$src, $dst|$dst, $src}",
+ SSEPackedSingle>, TB; /* PD SSE3 form is avaiable */
}
-// Store scalar value to memory.
-def MOVSDmr : SDI<0x11, MRMDestMem, (outs), (ins f64mem:$dst, FR64:$src),
- "movsd\t{$src, $dst|$dst, $src}",
- [(store FR64:$src, addr:$dst)]>;
-
-// Extract and store.
-def : Pat<(store (f64 (vector_extract (v2f64 VR128:$src), (iPTR 0))),
- addr:$dst),
- (MOVSDmr addr:$dst,
- (EXTRACT_SUBREG (v2f64 VR128:$src), sub_sd))>;
+/// SSE 2 Only
-// Conversion instructions
-def CVTTSD2SIrr : SDI<0x2C, MRMSrcReg, (outs GR32:$dst), (ins FR64:$src),
- "cvttsd2si\t{$src, $dst|$dst, $src}",
- [(set GR32:$dst, (fp_to_sint FR64:$src))]>;
-def CVTTSD2SIrm : SDI<0x2C, MRMSrcMem, (outs GR32:$dst), (ins f64mem:$src),
- "cvttsd2si\t{$src, $dst|$dst, $src}",
- [(set GR32:$dst, (fp_to_sint (loadf64 addr:$src)))]>;
+// Convert scalar double to scalar single
+let isAsmParserOnly = 1 in {
+def VCVTSD2SSrr : VSDI<0x5A, MRMSrcReg, (outs FR32:$dst),
+ (ins FR64:$src1, FR64:$src2),
+ "cvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
+ VEX_4V;
+def VCVTSD2SSrm : I<0x5A, MRMSrcMem, (outs FR32:$dst),
+ (ins FR64:$src1, f64mem:$src2),
+ "vcvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ []>, XD, Requires<[HasAVX, OptForSize]>, VEX_4V;
+}
def CVTSD2SSrr : SDI<0x5A, MRMSrcReg, (outs FR32:$dst), (ins FR64:$src),
"cvtsd2ss\t{$src, $dst|$dst, $src}",
[(set FR32:$dst, (fround FR64:$src))]>;
@@ -1226,35 +695,28 @@ def CVTSD2SSrm : I<0x5A, MRMSrcMem, (outs FR32:$dst), (ins f64mem:$src),
"cvtsd2ss\t{$src, $dst|$dst, $src}",
[(set FR32:$dst, (fround (loadf64 addr:$src)))]>, XD,
Requires<[HasSSE2, OptForSize]>;
-def CVTSI2SDrr : SDI<0x2A, MRMSrcReg, (outs FR64:$dst), (ins GR32:$src),
- "cvtsi2sd\t{$src, $dst|$dst, $src}",
- [(set FR64:$dst, (sint_to_fp GR32:$src))]>;
-def CVTSI2SDrm : SDI<0x2A, MRMSrcMem, (outs FR64:$dst), (ins i32mem:$src),
- "cvtsi2sd\t{$src, $dst|$dst, $src}",
- [(set FR64:$dst, (sint_to_fp (loadi32 addr:$src)))]>;
-def CVTPD2DQrm : S3DI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
- "cvtpd2dq\t{$src, $dst|$dst, $src}", []>;
-def CVTPD2DQrr : S3DI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
- "cvtpd2dq\t{$src, $dst|$dst, $src}", []>;
-def CVTDQ2PDrm : S3SI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
- "cvtdq2pd\t{$src, $dst|$dst, $src}", []>;
-def CVTDQ2PDrr : S3SI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
- "cvtdq2pd\t{$src, $dst|$dst, $src}", []>;
-def CVTPS2DQrr : PDI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
- "cvtps2dq\t{$src, $dst|$dst, $src}", []>;
-def CVTPS2DQrm : PDI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
- "cvtps2dq\t{$src, $dst|$dst, $src}", []>;
-def CVTDQ2PSrr : PSI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
- "cvtdq2ps\t{$src, $dst|$dst, $src}", []>;
-def CVTDQ2PSrm : PSI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
- "cvtdq2ps\t{$src, $dst|$dst, $src}", []>;
-def COMISDrr: PDI<0x2F, MRMSrcReg, (outs), (ins VR128:$src1, VR128:$src2),
- "comisd\t{$src2, $src1|$src1, $src2}", []>;
-def COMISDrm: PDI<0x2F, MRMSrcMem, (outs), (ins VR128:$src1, f128mem:$src2),
- "comisd\t{$src2, $src1|$src1, $src2}", []>;
-
-// SSE2 instructions with XS prefix
+let isAsmParserOnly = 1 in
+defm Int_VCVTSD2SS: sse12_cvt_sint_3addr<0x5A, VR128, VR128,
+ int_x86_sse2_cvtsd2ss, f64mem, load,
+ "cvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}">,
+ XS, VEX_4V;
+let Constraints = "$src1 = $dst" in
+defm Int_CVTSD2SS: sse12_cvt_sint_3addr<0x5A, VR128, VR128,
+ int_x86_sse2_cvtsd2ss, f64mem, load,
+ "cvtsd2ss\t{$src2, $dst|$dst, $src2}">, XS;
+
+// Convert scalar single to scalar double
+let isAsmParserOnly = 1 in { // SSE2 instructions with XS prefix
+def VCVTSS2SDrr : I<0x5A, MRMSrcReg, (outs FR64:$dst),
+ (ins FR32:$src1, FR32:$src2),
+ "vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ []>, XS, Requires<[HasAVX]>, VEX_4V;
+def VCVTSS2SDrm : I<0x5A, MRMSrcMem, (outs FR64:$dst),
+ (ins FR32:$src1, f32mem:$src2),
+ "vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ []>, XS, VEX_4V, Requires<[HasAVX, OptForSize]>;
+}
def CVTSS2SDrr : I<0x5A, MRMSrcReg, (outs FR64:$dst), (ins FR32:$src),
"cvtss2sd\t{$src, $dst|$dst, $src}",
[(set FR64:$dst, (fextend FR32:$src))]>, XS,
@@ -1264,394 +726,51 @@ def CVTSS2SDrm : I<0x5A, MRMSrcMem, (outs FR64:$dst), (ins f32mem:$src),
[(set FR64:$dst, (extloadf32 addr:$src))]>, XS,
Requires<[HasSSE2, OptForSize]>;
-def : Pat<(extloadf32 addr:$src),
- (CVTSS2SDrr (MOVSSrm addr:$src))>,
- Requires<[HasSSE2, OptForSpeed]>;
-
-// Match intrinsics which expect XMM operand(s).
-def Int_CVTSD2SIrr : SDI<0x2D, MRMSrcReg, (outs GR32:$dst), (ins VR128:$src),
- "cvtsd2si\t{$src, $dst|$dst, $src}",
- [(set GR32:$dst, (int_x86_sse2_cvtsd2si VR128:$src))]>;
-def Int_CVTSD2SIrm : SDI<0x2D, MRMSrcMem, (outs GR32:$dst), (ins f128mem:$src),
- "cvtsd2si\t{$src, $dst|$dst, $src}",
- [(set GR32:$dst, (int_x86_sse2_cvtsd2si
- (load addr:$src)))]>;
-
-// Match intrinsics which expect MM and XMM operand(s).
-def Int_CVTPD2PIrr : PDI<0x2D, MRMSrcReg, (outs VR64:$dst), (ins VR128:$src),
- "cvtpd2pi\t{$src, $dst|$dst, $src}",
- [(set VR64:$dst, (int_x86_sse_cvtpd2pi VR128:$src))]>;
-def Int_CVTPD2PIrm : PDI<0x2D, MRMSrcMem, (outs VR64:$dst), (ins f128mem:$src),
- "cvtpd2pi\t{$src, $dst|$dst, $src}",
- [(set VR64:$dst, (int_x86_sse_cvtpd2pi
- (memop addr:$src)))]>;
-def Int_CVTTPD2PIrr: PDI<0x2C, MRMSrcReg, (outs VR64:$dst), (ins VR128:$src),
- "cvttpd2pi\t{$src, $dst|$dst, $src}",
- [(set VR64:$dst, (int_x86_sse_cvttpd2pi VR128:$src))]>;
-def Int_CVTTPD2PIrm: PDI<0x2C, MRMSrcMem, (outs VR64:$dst), (ins f128mem:$src),
- "cvttpd2pi\t{$src, $dst|$dst, $src}",
- [(set VR64:$dst, (int_x86_sse_cvttpd2pi
- (memop addr:$src)))]>;
-def Int_CVTPI2PDrr : PDI<0x2A, MRMSrcReg, (outs VR128:$dst), (ins VR64:$src),
- "cvtpi2pd\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst, (int_x86_sse_cvtpi2pd VR64:$src))]>;
-def Int_CVTPI2PDrm : PDI<0x2A, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
- "cvtpi2pd\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst, (int_x86_sse_cvtpi2pd
- (load addr:$src)))]>;
-
-// Aliases for intrinsics
-def Int_CVTTSD2SIrr : SDI<0x2C, MRMSrcReg, (outs GR32:$dst), (ins VR128:$src),
- "cvttsd2si\t{$src, $dst|$dst, $src}",
- [(set GR32:$dst,
- (int_x86_sse2_cvttsd2si VR128:$src))]>;
-def Int_CVTTSD2SIrm : SDI<0x2C, MRMSrcMem, (outs GR32:$dst), (ins f128mem:$src),
- "cvttsd2si\t{$src, $dst|$dst, $src}",
- [(set GR32:$dst, (int_x86_sse2_cvttsd2si
- (load addr:$src)))]>;
-
-// Comparison instructions
-let Constraints = "$src1 = $dst", neverHasSideEffects = 1 in {
- def CMPSDrr : SDIi8<0xC2, MRMSrcReg,
- (outs FR64:$dst), (ins FR64:$src1, FR64:$src, SSECC:$cc),
- "cmp${cc}sd\t{$src, $dst|$dst, $src}", []>;
-let mayLoad = 1 in
- def CMPSDrm : SDIi8<0xC2, MRMSrcMem,
- (outs FR64:$dst), (ins FR64:$src1, f64mem:$src, SSECC:$cc),
- "cmp${cc}sd\t{$src, $dst|$dst, $src}", []>;
-
- // Accept explicit immediate argument form instead of comparison code.
let isAsmParserOnly = 1 in {
- def CMPSDrr_alt : SDIi8<0xC2, MRMSrcReg,
- (outs FR64:$dst), (ins FR64:$src1, FR64:$src, i8imm:$src2),
- "cmpsd\t{$src2, $src, $dst|$dst, $src, $src2}", []>;
-let mayLoad = 1 in
- def CMPSDrm_alt : SDIi8<0xC2, MRMSrcMem,
- (outs FR64:$dst), (ins FR64:$src1, f64mem:$src, i8imm:$src2),
- "cmpsd\t{$src2, $src, $dst|$dst, $src, $src2}", []>;
-}
-}
-
-let Defs = [EFLAGS] in {
-def UCOMISDrr: PDI<0x2E, MRMSrcReg, (outs), (ins FR64:$src1, FR64:$src2),
- "ucomisd\t{$src2, $src1|$src1, $src2}",
- [(set EFLAGS, (X86cmp FR64:$src1, FR64:$src2))]>;
-def UCOMISDrm: PDI<0x2E, MRMSrcMem, (outs), (ins FR64:$src1, f64mem:$src2),
- "ucomisd\t{$src2, $src1|$src1, $src2}",
- [(set EFLAGS, (X86cmp FR64:$src1, (loadf64 addr:$src2)))]>;
-} // Defs = [EFLAGS]
-
-// Aliases to match intrinsics which expect XMM operand(s).
-let Constraints = "$src1 = $dst" in {
- def Int_CMPSDrr : SDIi8<0xC2, MRMSrcReg,
- (outs VR128:$dst),
- (ins VR128:$src1, VR128:$src, SSECC:$cc),
- "cmp${cc}sd\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst, (int_x86_sse2_cmp_sd VR128:$src1,
- VR128:$src, imm:$cc))]>;
- def Int_CMPSDrm : SDIi8<0xC2, MRMSrcMem,
- (outs VR128:$dst),
- (ins VR128:$src1, f64mem:$src, SSECC:$cc),
- "cmp${cc}sd\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst, (int_x86_sse2_cmp_sd VR128:$src1,
- (load addr:$src), imm:$cc))]>;
-}
-
-let Defs = [EFLAGS] in {
-def Int_UCOMISDrr: PDI<0x2E, MRMSrcReg, (outs), (ins VR128:$src1, VR128:$src2),
- "ucomisd\t{$src2, $src1|$src1, $src2}",
- [(set EFLAGS, (X86ucomi (v2f64 VR128:$src1),
- VR128:$src2))]>;
-def Int_UCOMISDrm: PDI<0x2E, MRMSrcMem, (outs),(ins VR128:$src1, f128mem:$src2),
- "ucomisd\t{$src2, $src1|$src1, $src2}",
- [(set EFLAGS, (X86ucomi (v2f64 VR128:$src1),
- (load addr:$src2)))]>;
-
-def Int_COMISDrr: PDI<0x2F, MRMSrcReg, (outs), (ins VR128:$src1, VR128:$src2),
- "comisd\t{$src2, $src1|$src1, $src2}",
- [(set EFLAGS, (X86comi (v2f64 VR128:$src1),
- VR128:$src2))]>;
-def Int_COMISDrm: PDI<0x2F, MRMSrcMem, (outs), (ins VR128:$src1, f128mem:$src2),
- "comisd\t{$src2, $src1|$src1, $src2}",
- [(set EFLAGS, (X86comi (v2f64 VR128:$src1),
- (load addr:$src2)))]>;
-} // Defs = [EFLAGS]
-
-// Aliases of packed SSE2 instructions for scalar use. These all have names
-// that start with 'Fs'.
-
-// Alias instructions that map fld0 to pxor for sse.
-let isReMaterializable = 1, isAsCheapAsAMove = 1, isCodeGenOnly = 1,
- canFoldAsLoad = 1 in
-def FsFLD0SD : I<0xEF, MRMInitReg, (outs FR64:$dst), (ins), "",
- [(set FR64:$dst, fpimm0)]>,
- Requires<[HasSSE2]>, TB, OpSize;
-
-// Alias instruction to do FR64 reg-to-reg copy using movapd. Upper bits are
-// disregarded.
-let neverHasSideEffects = 1 in
-def FsMOVAPDrr : PDI<0x28, MRMSrcReg, (outs FR64:$dst), (ins FR64:$src),
- "movapd\t{$src, $dst|$dst, $src}", []>;
-
-// Alias instruction to load FR64 from f128mem using movapd. Upper bits are
-// disregarded.
-let canFoldAsLoad = 1, isReMaterializable = 1 in
-def FsMOVAPDrm : PDI<0x28, MRMSrcMem, (outs FR64:$dst), (ins f128mem:$src),
- "movapd\t{$src, $dst|$dst, $src}",
- [(set FR64:$dst, (alignedloadfsf64 addr:$src))]>;
-
-// Alias bitwise logical operations using SSE logical ops on packed FP values.
-let Constraints = "$src1 = $dst" in {
-let isCommutable = 1 in {
- def FsANDPDrr : PDI<0x54, MRMSrcReg, (outs FR64:$dst),
- (ins FR64:$src1, FR64:$src2),
- "andpd\t{$src2, $dst|$dst, $src2}",
- [(set FR64:$dst, (X86fand FR64:$src1, FR64:$src2))]>;
- def FsORPDrr : PDI<0x56, MRMSrcReg, (outs FR64:$dst),
- (ins FR64:$src1, FR64:$src2),
- "orpd\t{$src2, $dst|$dst, $src2}",
- [(set FR64:$dst, (X86for FR64:$src1, FR64:$src2))]>;
- def FsXORPDrr : PDI<0x57, MRMSrcReg, (outs FR64:$dst),
- (ins FR64:$src1, FR64:$src2),
- "xorpd\t{$src2, $dst|$dst, $src2}",
- [(set FR64:$dst, (X86fxor FR64:$src1, FR64:$src2))]>;
-}
-
-def FsANDPDrm : PDI<0x54, MRMSrcMem, (outs FR64:$dst),
- (ins FR64:$src1, f128mem:$src2),
- "andpd\t{$src2, $dst|$dst, $src2}",
- [(set FR64:$dst, (X86fand FR64:$src1,
- (memopfsf64 addr:$src2)))]>;
-def FsORPDrm : PDI<0x56, MRMSrcMem, (outs FR64:$dst),
- (ins FR64:$src1, f128mem:$src2),
- "orpd\t{$src2, $dst|$dst, $src2}",
- [(set FR64:$dst, (X86for FR64:$src1,
- (memopfsf64 addr:$src2)))]>;
-def FsXORPDrm : PDI<0x57, MRMSrcMem, (outs FR64:$dst),
- (ins FR64:$src1, f128mem:$src2),
- "xorpd\t{$src2, $dst|$dst, $src2}",
- [(set FR64:$dst, (X86fxor FR64:$src1,
- (memopfsf64 addr:$src2)))]>;
-
-let neverHasSideEffects = 1 in {
-def FsANDNPDrr : PDI<0x55, MRMSrcReg,
- (outs FR64:$dst), (ins FR64:$src1, FR64:$src2),
- "andnpd\t{$src2, $dst|$dst, $src2}", []>;
-let mayLoad = 1 in
-def FsANDNPDrm : PDI<0x55, MRMSrcMem,
- (outs FR64:$dst), (ins FR64:$src1, f128mem:$src2),
- "andnpd\t{$src2, $dst|$dst, $src2}", []>;
-}
-}
-
-/// basic_sse2_fp_binop_rm - SSE2 binops come in both scalar and vector forms.
-///
-/// In addition, we also have a special variant of the scalar form here to
-/// represent the associated intrinsic operation. This form is unlike the
-/// plain scalar form, in that it takes an entire vector (instead of a scalar)
-/// and leaves the top elements unmodified (therefore these cannot be commuted).
-///
-/// These three forms can each be reg+reg or reg+mem, so there are a total of
-/// six "instructions".
-///
-let Constraints = "$src1 = $dst" in {
-multiclass basic_sse2_fp_binop_rm<bits<8> opc, string OpcodeStr,
- SDNode OpNode, Intrinsic F64Int,
- bit Commutable = 0> {
- // Scalar operation, reg+reg.
- def SDrr : SDI<opc, MRMSrcReg, (outs FR64:$dst), (ins FR64:$src1, FR64:$src2),
- !strconcat(OpcodeStr, "sd\t{$src2, $dst|$dst, $src2}"),
- [(set FR64:$dst, (OpNode FR64:$src1, FR64:$src2))]> {
- let isCommutable = Commutable;
- }
-
- // Scalar operation, reg+mem.
- def SDrm : SDI<opc, MRMSrcMem, (outs FR64:$dst),
- (ins FR64:$src1, f64mem:$src2),
- !strconcat(OpcodeStr, "sd\t{$src2, $dst|$dst, $src2}"),
- [(set FR64:$dst, (OpNode FR64:$src1, (load addr:$src2)))]>;
-
- // Vector operation, reg+reg.
- def PDrr : PDI<opc, MRMSrcReg, (outs VR128:$dst),
- (ins VR128:$src1, VR128:$src2),
- !strconcat(OpcodeStr, "pd\t{$src2, $dst|$dst, $src2}"),
- [(set VR128:$dst, (v2f64 (OpNode VR128:$src1, VR128:$src2)))]> {
- let isCommutable = Commutable;
- }
-
- // Vector operation, reg+mem.
- def PDrm : PDI<opc, MRMSrcMem, (outs VR128:$dst),
- (ins VR128:$src1, f128mem:$src2),
- !strconcat(OpcodeStr, "pd\t{$src2, $dst|$dst, $src2}"),
- [(set VR128:$dst, (OpNode VR128:$src1, (memopv2f64 addr:$src2)))]>;
-
- // Intrinsic operation, reg+reg.
- def SDrr_Int : SDI<opc, MRMSrcReg, (outs VR128:$dst),
- (ins VR128:$src1, VR128:$src2),
- !strconcat(OpcodeStr, "sd\t{$src2, $dst|$dst, $src2}"),
- [(set VR128:$dst, (F64Int VR128:$src1, VR128:$src2))]>;
-
- // Intrinsic operation, reg+mem.
- def SDrm_Int : SDI<opc, MRMSrcMem, (outs VR128:$dst),
- (ins VR128:$src1, sdmem:$src2),
- !strconcat(OpcodeStr, "sd\t{$src2, $dst|$dst, $src2}"),
- [(set VR128:$dst, (F64Int VR128:$src1,
- sse_load_f64:$src2))]>;
+def Int_VCVTSS2SDrr: I<0x5A, MRMSrcReg,
+ (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
+ "vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ [(set VR128:$dst, (int_x86_sse2_cvtss2sd VR128:$src1,
+ VR128:$src2))]>, XS, VEX_4V,
+ Requires<[HasAVX]>;
+def Int_VCVTSS2SDrm: I<0x5A, MRMSrcMem,
+ (outs VR128:$dst), (ins VR128:$src1, f32mem:$src2),
+ "vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ [(set VR128:$dst, (int_x86_sse2_cvtss2sd VR128:$src1,
+ (load addr:$src2)))]>, XS, VEX_4V,
+ Requires<[HasAVX]>;
}
+let Constraints = "$src1 = $dst" in { // SSE2 instructions with XS prefix
+def Int_CVTSS2SDrr: I<0x5A, MRMSrcReg,
+ (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
+ "cvtss2sd\t{$src2, $dst|$dst, $src2}",
+ [(set VR128:$dst, (int_x86_sse2_cvtss2sd VR128:$src1,
+ VR128:$src2))]>, XS,
+ Requires<[HasSSE2]>;
+def Int_CVTSS2SDrm: I<0x5A, MRMSrcMem,
+ (outs VR128:$dst), (ins VR128:$src1, f32mem:$src2),
+ "cvtss2sd\t{$src2, $dst|$dst, $src2}",
+ [(set VR128:$dst, (int_x86_sse2_cvtss2sd VR128:$src1,
+ (load addr:$src2)))]>, XS,
+ Requires<[HasSSE2]>;
}
-// Arithmetic instructions
-defm ADD : basic_sse2_fp_binop_rm<0x58, "add", fadd, int_x86_sse2_add_sd, 1>;
-defm MUL : basic_sse2_fp_binop_rm<0x59, "mul", fmul, int_x86_sse2_mul_sd, 1>;
-defm SUB : basic_sse2_fp_binop_rm<0x5C, "sub", fsub, int_x86_sse2_sub_sd>;
-defm DIV : basic_sse2_fp_binop_rm<0x5E, "div", fdiv, int_x86_sse2_div_sd>;
-
-/// sse2_fp_binop_rm - Other SSE2 binops
-///
-/// This multiclass is like basic_sse2_fp_binop_rm, with the addition of
-/// instructions for a full-vector intrinsic form. Operations that map
-/// onto C operators don't use this form since they just use the plain
-/// vector form instead of having a separate vector intrinsic form.
-///
-/// This provides a total of eight "instructions".
-///
-let Constraints = "$src1 = $dst" in {
-multiclass sse2_fp_binop_rm<bits<8> opc, string OpcodeStr,
- SDNode OpNode,
- Intrinsic F64Int,
- Intrinsic V2F64Int,
- bit Commutable = 0> {
-
- // Scalar operation, reg+reg.
- def SDrr : SDI<opc, MRMSrcReg, (outs FR64:$dst), (ins FR64:$src1, FR64:$src2),
- !strconcat(OpcodeStr, "sd\t{$src2, $dst|$dst, $src2}"),
- [(set FR64:$dst, (OpNode FR64:$src1, FR64:$src2))]> {
- let isCommutable = Commutable;
- }
-
- // Scalar operation, reg+mem.
- def SDrm : SDI<opc, MRMSrcMem, (outs FR64:$dst),
- (ins FR64:$src1, f64mem:$src2),
- !strconcat(OpcodeStr, "sd\t{$src2, $dst|$dst, $src2}"),
- [(set FR64:$dst, (OpNode FR64:$src1, (load addr:$src2)))]>;
-
- // Vector operation, reg+reg.
- def PDrr : PDI<opc, MRMSrcReg, (outs VR128:$dst),
- (ins VR128:$src1, VR128:$src2),
- !strconcat(OpcodeStr, "pd\t{$src2, $dst|$dst, $src2}"),
- [(set VR128:$dst, (v2f64 (OpNode VR128:$src1, VR128:$src2)))]> {
- let isCommutable = Commutable;
- }
-
- // Vector operation, reg+mem.
- def PDrm : PDI<opc, MRMSrcMem, (outs VR128:$dst),
- (ins VR128:$src1, f128mem:$src2),
- !strconcat(OpcodeStr, "pd\t{$src2, $dst|$dst, $src2}"),
- [(set VR128:$dst, (OpNode VR128:$src1, (memopv2f64 addr:$src2)))]>;
-
- // Intrinsic operation, reg+reg.
- def SDrr_Int : SDI<opc, MRMSrcReg, (outs VR128:$dst),
- (ins VR128:$src1, VR128:$src2),
- !strconcat(OpcodeStr, "sd\t{$src2, $dst|$dst, $src2}"),
- [(set VR128:$dst, (F64Int VR128:$src1, VR128:$src2))]> {
- let isCommutable = Commutable;
- }
-
- // Intrinsic operation, reg+mem.
- def SDrm_Int : SDI<opc, MRMSrcMem, (outs VR128:$dst),
- (ins VR128:$src1, sdmem:$src2),
- !strconcat(OpcodeStr, "sd\t{$src2, $dst|$dst, $src2}"),
- [(set VR128:$dst, (F64Int VR128:$src1,
- sse_load_f64:$src2))]>;
-
- // Vector intrinsic operation, reg+reg.
- def PDrr_Int : PDI<opc, MRMSrcReg, (outs VR128:$dst),
- (ins VR128:$src1, VR128:$src2),
- !strconcat(OpcodeStr, "pd\t{$src2, $dst|$dst, $src2}"),
- [(set VR128:$dst, (V2F64Int VR128:$src1, VR128:$src2))]> {
- let isCommutable = Commutable;
- }
+def : Pat<(extloadf32 addr:$src),
+ (CVTSS2SDrr (MOVSSrm addr:$src))>,
+ Requires<[HasSSE2, OptForSpeed]>;
- // Vector intrinsic operation, reg+mem.
- def PDrm_Int : PDI<opc, MRMSrcMem, (outs VR128:$dst),
- (ins VR128:$src1, f128mem:$src2),
- !strconcat(OpcodeStr, "pd\t{$src2, $dst|$dst, $src2}"),
- [(set VR128:$dst, (V2F64Int VR128:$src1,
- (memopv2f64 addr:$src2)))]>;
-}
+// Convert doubleword to packed single/double fp
+let isAsmParserOnly = 1 in { // SSE2 instructions without OpSize prefix
+def Int_VCVTDQ2PSrr : I<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+ "vcvtdq2ps\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (int_x86_sse2_cvtdq2ps VR128:$src))]>,
+ TB, VEX, Requires<[HasAVX]>;
+def Int_VCVTDQ2PSrm : I<0x5B, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
+ "vcvtdq2ps\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (int_x86_sse2_cvtdq2ps
+ (bitconvert (memopv2i64 addr:$src))))]>,
+ TB, VEX, Requires<[HasAVX]>;
}
-
-defm MAX : sse2_fp_binop_rm<0x5F, "max", X86fmax,
- int_x86_sse2_max_sd, int_x86_sse2_max_pd>;
-defm MIN : sse2_fp_binop_rm<0x5D, "min", X86fmin,
- int_x86_sse2_min_sd, int_x86_sse2_min_pd>;
-
-//===---------------------------------------------------------------------===//
-// SSE packed FP Instructions
-
-// Move Instructions
-let neverHasSideEffects = 1 in
-def MOVAPDrr : PDI<0x28, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
- "movapd\t{$src, $dst|$dst, $src}", []>;
-let canFoldAsLoad = 1, isReMaterializable = 1 in
-def MOVAPDrm : PDI<0x28, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
- "movapd\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst, (alignedloadv2f64 addr:$src))]>;
-
-def MOVAPDmr : PDI<0x29, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
- "movapd\t{$src, $dst|$dst, $src}",
- [(alignedstore (v2f64 VR128:$src), addr:$dst)]>;
-
-let neverHasSideEffects = 1 in
-def MOVUPDrr : PDI<0x10, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
- "movupd\t{$src, $dst|$dst, $src}", []>;
-let canFoldAsLoad = 1 in
-def MOVUPDrm : PDI<0x10, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
- "movupd\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst, (loadv2f64 addr:$src))]>;
-def MOVUPDmr : PDI<0x11, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
- "movupd\t{$src, $dst|$dst, $src}",
- [(store (v2f64 VR128:$src), addr:$dst)]>;
-
-// Intrinsic forms of MOVUPD load and store
-def MOVUPDrm_Int : PDI<0x10, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
- "movupd\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst, (int_x86_sse2_loadu_pd addr:$src))]>;
-def MOVUPDmr_Int : PDI<0x11, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
- "movupd\t{$src, $dst|$dst, $src}",
- [(int_x86_sse2_storeu_pd addr:$dst, VR128:$src)]>;
-
-let Constraints = "$src1 = $dst" in {
- let AddedComplexity = 20 in {
- def MOVLPDrm : PDI<0x12, MRMSrcMem,
- (outs VR128:$dst), (ins VR128:$src1, f64mem:$src2),
- "movlpd\t{$src2, $dst|$dst, $src2}",
- [(set VR128:$dst,
- (v2f64 (movlp VR128:$src1,
- (scalar_to_vector (loadf64 addr:$src2)))))]>;
- def MOVHPDrm : PDI<0x16, MRMSrcMem,
- (outs VR128:$dst), (ins VR128:$src1, f64mem:$src2),
- "movhpd\t{$src2, $dst|$dst, $src2}",
- [(set VR128:$dst,
- (v2f64 (movlhps VR128:$src1,
- (scalar_to_vector (loadf64 addr:$src2)))))]>;
- } // AddedComplexity
-} // Constraints = "$src1 = $dst"
-
-def MOVLPDmr : PDI<0x13, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
- "movlpd\t{$src, $dst|$dst, $src}",
- [(store (f64 (vector_extract (v2f64 VR128:$src),
- (iPTR 0))), addr:$dst)]>;
-
-// v2f64 extract element 1 is always custom lowered to unpack high to low
-// and extract element 0 so the non-store version isn't too horrible.
-def MOVHPDmr : PDI<0x17, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
- "movhpd\t{$src, $dst|$dst, $src}",
- [(store (f64 (vector_extract
- (v2f64 (unpckh VR128:$src, (undef))),
- (iPTR 0))), addr:$dst)]>;
-
-// SSE2 instructions without OpSize prefix
def Int_CVTDQ2PSrr : I<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"cvtdq2ps\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (int_x86_sse2_cvtdq2ps VR128:$src))]>,
@@ -1662,7 +781,18 @@ def Int_CVTDQ2PSrm : I<0x5B, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
(bitconvert (memopv2i64 addr:$src))))]>,
TB, Requires<[HasSSE2]>;
-// SSE2 instructions with XS prefix
+// FIXME: why the non-intrinsic version is described as SSE3?
+let isAsmParserOnly = 1 in { // SSE2 instructions with XS prefix
+def Int_VCVTDQ2PDrr : I<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+ "vcvtdq2pd\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (int_x86_sse2_cvtdq2pd VR128:$src))]>,
+ XS, VEX, Requires<[HasAVX]>;
+def Int_VCVTDQ2PDrm : I<0xE6, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
+ "vcvtdq2pd\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (int_x86_sse2_cvtdq2pd
+ (bitconvert (memopv2i64 addr:$src))))]>,
+ XS, VEX, Requires<[HasAVX]>;
+}
def Int_CVTDQ2PDrr : I<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"cvtdq2pd\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (int_x86_sse2_cvtdq2pd VR128:$src))]>,
@@ -1673,6 +803,29 @@ def Int_CVTDQ2PDrm : I<0xE6, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
(bitconvert (memopv2i64 addr:$src))))]>,
XS, Requires<[HasSSE2]>;
+// Convert packed single/double fp to doubleword
+let isAsmParserOnly = 1 in {
+def VCVTPS2DQrr : VPDI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+ "cvtps2dq\t{$src, $dst|$dst, $src}", []>, VEX;
+def VCVTPS2DQrm : VPDI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
+ "cvtps2dq\t{$src, $dst|$dst, $src}", []>, VEX;
+}
+def CVTPS2DQrr : PDI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+ "cvtps2dq\t{$src, $dst|$dst, $src}", []>;
+def CVTPS2DQrm : PDI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
+ "cvtps2dq\t{$src, $dst|$dst, $src}", []>;
+
+let isAsmParserOnly = 1 in {
+def Int_VCVTPS2DQrr : VPDI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+ "cvtps2dq\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (int_x86_sse2_cvtps2dq VR128:$src))]>,
+ VEX;
+def Int_VCVTPS2DQrm : VPDI<0x5B, MRMSrcMem, (outs VR128:$dst),
+ (ins f128mem:$src),
+ "cvtps2dq\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (int_x86_sse2_cvtps2dq
+ (memop addr:$src)))]>, VEX;
+}
def Int_CVTPS2DQrr : PDI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"cvtps2dq\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (int_x86_sse2_cvtps2dq VR128:$src))]>;
@@ -1680,12 +833,54 @@ def Int_CVTPS2DQrm : PDI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
"cvtps2dq\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (int_x86_sse2_cvtps2dq
(memop addr:$src)))]>;
-// SSE2 packed instructions with XS prefix
+
+let isAsmParserOnly = 1 in { // SSE2 packed instructions with XD prefix
+def Int_VCVTPD2DQrr : I<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+ "vcvtpd2dq\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (int_x86_sse2_cvtpd2dq VR128:$src))]>,
+ XD, VEX, Requires<[HasAVX]>;
+def Int_VCVTPD2DQrm : I<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
+ "vcvtpd2dq\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (int_x86_sse2_cvtpd2dq
+ (memop addr:$src)))]>,
+ XD, VEX, Requires<[HasAVX]>;
+}
+def Int_CVTPD2DQrr : I<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+ "cvtpd2dq\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (int_x86_sse2_cvtpd2dq VR128:$src))]>,
+ XD, Requires<[HasSSE2]>;
+def Int_CVTPD2DQrm : I<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
+ "cvtpd2dq\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (int_x86_sse2_cvtpd2dq
+ (memop addr:$src)))]>,
+ XD, Requires<[HasSSE2]>;
+
+
+// Convert with truncation packed single/double fp to doubleword
+let isAsmParserOnly = 1 in { // SSE2 packed instructions with XS prefix
+def VCVTTPS2DQrr : VSSI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+ "cvttps2dq\t{$src, $dst|$dst, $src}", []>, VEX;
+def VCVTTPS2DQrm : VSSI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
+ "cvttps2dq\t{$src, $dst|$dst, $src}", []>, VEX;
+}
def CVTTPS2DQrr : SSI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"cvttps2dq\t{$src, $dst|$dst, $src}", []>;
def CVTTPS2DQrm : SSI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
"cvttps2dq\t{$src, $dst|$dst, $src}", []>;
+
+let isAsmParserOnly = 1 in {
+def Int_VCVTTPS2DQrr : I<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+ "vcvttps2dq\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst,
+ (int_x86_sse2_cvttps2dq VR128:$src))]>,
+ XS, VEX, Requires<[HasAVX]>;
+def Int_VCVTTPS2DQrm : I<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
+ "vcvttps2dq\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (int_x86_sse2_cvttps2dq
+ (memop addr:$src)))]>,
+ XS, VEX, Requires<[HasAVX]>;
+}
def Int_CVTTPS2DQrr : I<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"cvttps2dq\t{$src, $dst|$dst, $src}",
[(set VR128:$dst,
@@ -1697,17 +892,18 @@ def Int_CVTTPS2DQrm : I<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
(memop addr:$src)))]>,
XS, Requires<[HasSSE2]>;
-// SSE2 packed instructions with XD prefix
-def Int_CVTPD2DQrr : I<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
- "cvtpd2dq\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst, (int_x86_sse2_cvtpd2dq VR128:$src))]>,
- XD, Requires<[HasSSE2]>;
-def Int_CVTPD2DQrm : I<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
- "cvtpd2dq\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst, (int_x86_sse2_cvtpd2dq
- (memop addr:$src)))]>,
- XD, Requires<[HasSSE2]>;
-
+let isAsmParserOnly = 1 in {
+def Int_VCVTTPD2DQrr : VPDI<0xE6, MRMSrcReg, (outs VR128:$dst),
+ (ins VR128:$src),
+ "cvttpd2dq\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (int_x86_sse2_cvttpd2dq VR128:$src))]>,
+ VEX;
+def Int_VCVTTPD2DQrm : VPDI<0xE6, MRMSrcMem, (outs VR128:$dst),
+ (ins f128mem:$src),
+ "cvttpd2dq\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (int_x86_sse2_cvttpd2dq
+ (memop addr:$src)))]>, VEX;
+}
def Int_CVTTPD2DQrr : PDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"cvttpd2dq\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (int_x86_sse2_cvttpd2dq VR128:$src))]>;
@@ -1716,12 +912,31 @@ def Int_CVTTPD2DQrm : PDI<0xE6, MRMSrcMem, (outs VR128:$dst),(ins f128mem:$src),
[(set VR128:$dst, (int_x86_sse2_cvttpd2dq
(memop addr:$src)))]>;
-// SSE2 instructions without OpSize prefix
+// Convert packed single to packed double
+let isAsmParserOnly = 1 in { // SSE2 instructions without OpSize prefix
+def VCVTPS2PDrr : I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+ "vcvtps2pd\t{$src, $dst|$dst, $src}", []>, VEX,
+ Requires<[HasAVX]>;
+def VCVTPS2PDrm : I<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src),
+ "vcvtps2pd\t{$src, $dst|$dst, $src}", []>, VEX,
+ Requires<[HasAVX]>;
+}
def CVTPS2PDrr : I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"cvtps2pd\t{$src, $dst|$dst, $src}", []>, TB;
def CVTPS2PDrm : I<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src),
"cvtps2pd\t{$src, $dst|$dst, $src}", []>, TB;
+let isAsmParserOnly = 1 in {
+def Int_VCVTPS2PDrr : I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+ "cvtps2pd\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (int_x86_sse2_cvtps2pd VR128:$src))]>,
+ VEX, Requires<[HasAVX]>;
+def Int_VCVTPS2PDrm : I<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src),
+ "cvtps2pd\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (int_x86_sse2_cvtps2pd
+ (load addr:$src)))]>,
+ VEX, Requires<[HasAVX]>;
+}
def Int_CVTPS2PDrr : I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"cvtps2pd\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (int_x86_sse2_cvtps2pd VR128:$src))]>,
@@ -1732,12 +947,29 @@ def Int_CVTPS2PDrm : I<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src),
(load addr:$src)))]>,
TB, Requires<[HasSSE2]>;
+// Convert packed double to packed single
+let isAsmParserOnly = 1 in {
+def VCVTPD2PSrr : VPDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+ "cvtpd2ps\t{$src, $dst|$dst, $src}", []>, VEX;
+// FIXME: the memory form of this instruction should described using
+// use extra asm syntax
+}
def CVTPD2PSrr : PDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"cvtpd2ps\t{$src, $dst|$dst, $src}", []>;
def CVTPD2PSrm : PDI<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
"cvtpd2ps\t{$src, $dst|$dst, $src}", []>;
+let isAsmParserOnly = 1 in {
+def Int_VCVTPD2PSrr : VPDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+ "cvtpd2ps\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (int_x86_sse2_cvtpd2ps VR128:$src))]>;
+def Int_VCVTPD2PSrm : VPDI<0x5A, MRMSrcMem, (outs VR128:$dst),
+ (ins f128mem:$src),
+ "cvtpd2ps\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (int_x86_sse2_cvtpd2ps
+ (memop addr:$src)))]>;
+}
def Int_CVTPD2PSrr : PDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"cvtpd2ps\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (int_x86_sse2_cvtpd2ps VR128:$src))]>;
@@ -1746,269 +978,1039 @@ def Int_CVTPD2PSrm : PDI<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
[(set VR128:$dst, (int_x86_sse2_cvtpd2ps
(memop addr:$src)))]>;
-// Match intrinsics which expect XMM operand(s).
-// Aliases for intrinsics
+//===----------------------------------------------------------------------===//
+// SSE 1 & 2 - Compare Instructions
+//===----------------------------------------------------------------------===//
+
+// sse12_cmp_scalar - sse 1 & 2 compare scalar instructions
+multiclass sse12_cmp_scalar<RegisterClass RC, X86MemOperand x86memop,
+ string asm, string asm_alt> {
+ def rr : SIi8<0xC2, MRMSrcReg,
+ (outs RC:$dst), (ins RC:$src1, RC:$src, SSECC:$cc),
+ asm, []>;
+ let mayLoad = 1 in
+ def rm : SIi8<0xC2, MRMSrcMem,
+ (outs RC:$dst), (ins RC:$src1, x86memop:$src, SSECC:$cc),
+ asm, []>;
+ // Accept explicit immediate argument form instead of comparison code.
+ let isAsmParserOnly = 1 in {
+ def rr_alt : SIi8<0xC2, MRMSrcReg,
+ (outs RC:$dst), (ins RC:$src1, RC:$src, i8imm:$src2),
+ asm_alt, []>;
+ let mayLoad = 1 in
+ def rm_alt : SIi8<0xC2, MRMSrcMem,
+ (outs RC:$dst), (ins RC:$src1, x86memop:$src, i8imm:$src2),
+ asm_alt, []>;
+ }
+}
+
+let neverHasSideEffects = 1, isAsmParserOnly = 1 in {
+ defm VCMPSS : sse12_cmp_scalar<FR32, f32mem,
+ "cmp${cc}ss\t{$src, $src1, $dst|$dst, $src1, $src}",
+ "cmpss\t{$src2, $src, $src1, $dst|$dst, $src1, $src, $src2}">,
+ XS, VEX_4V;
+ defm VCMPSD : sse12_cmp_scalar<FR64, f64mem,
+ "cmp${cc}sd\t{$src, $src1, $dst|$dst, $src1, $src}",
+ "cmpsd\t{$src2, $src, $src1, $dst|$dst, $src1, $src, $src2}">,
+ XD, VEX_4V;
+}
+
+let Constraints = "$src1 = $dst", neverHasSideEffects = 1 in {
+ defm CMPSS : sse12_cmp_scalar<FR32, f32mem,
+ "cmp${cc}ss\t{$src, $dst|$dst, $src}",
+ "cmpss\t{$src2, $src, $dst|$dst, $src, $src2}">, XS;
+ defm CMPSD : sse12_cmp_scalar<FR64, f64mem,
+ "cmp${cc}sd\t{$src, $dst|$dst, $src}",
+ "cmpsd\t{$src2, $src, $dst|$dst, $src, $src2}">, XD;
+}
+
+multiclass sse12_cmp_scalar_int<RegisterClass RC, X86MemOperand x86memop,
+ Intrinsic Int, string asm> {
+ def rr : SIi8<0xC2, MRMSrcReg, (outs VR128:$dst),
+ (ins VR128:$src1, VR128:$src, SSECC:$cc), asm,
+ [(set VR128:$dst, (Int VR128:$src1,
+ VR128:$src, imm:$cc))]>;
+ def rm : SIi8<0xC2, MRMSrcMem, (outs VR128:$dst),
+ (ins VR128:$src1, f32mem:$src, SSECC:$cc), asm,
+ [(set VR128:$dst, (Int VR128:$src1,
+ (load addr:$src), imm:$cc))]>;
+}
+
+// Aliases to match intrinsics which expect XMM operand(s).
+let isAsmParserOnly = 1 in {
+ defm Int_VCMPSS : sse12_cmp_scalar_int<VR128, f32mem, int_x86_sse_cmp_ss,
+ "cmp${cc}ss\t{$src, $src1, $dst|$dst, $src1, $src}">,
+ XS, VEX_4V;
+ defm Int_VCMPSD : sse12_cmp_scalar_int<VR128, f64mem, int_x86_sse2_cmp_sd,
+ "cmp${cc}sd\t{$src, $src1, $dst|$dst, $src1, $src}">,
+ XD, VEX_4V;
+}
let Constraints = "$src1 = $dst" in {
-def Int_CVTSI2SDrr: SDI<0x2A, MRMSrcReg,
- (outs VR128:$dst), (ins VR128:$src1, GR32:$src2),
- "cvtsi2sd\t{$src2, $dst|$dst, $src2}",
- [(set VR128:$dst, (int_x86_sse2_cvtsi2sd VR128:$src1,
- GR32:$src2))]>;
-def Int_CVTSI2SDrm: SDI<0x2A, MRMSrcMem,
- (outs VR128:$dst), (ins VR128:$src1, i32mem:$src2),
- "cvtsi2sd\t{$src2, $dst|$dst, $src2}",
- [(set VR128:$dst, (int_x86_sse2_cvtsi2sd VR128:$src1,
- (loadi32 addr:$src2)))]>;
-def Int_CVTSD2SSrr: SDI<0x5A, MRMSrcReg,
- (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
- "cvtsd2ss\t{$src2, $dst|$dst, $src2}",
- [(set VR128:$dst, (int_x86_sse2_cvtsd2ss VR128:$src1,
- VR128:$src2))]>;
-def Int_CVTSD2SSrm: SDI<0x5A, MRMSrcMem,
- (outs VR128:$dst), (ins VR128:$src1, f64mem:$src2),
- "cvtsd2ss\t{$src2, $dst|$dst, $src2}",
- [(set VR128:$dst, (int_x86_sse2_cvtsd2ss VR128:$src1,
- (load addr:$src2)))]>;
-def Int_CVTSS2SDrr: I<0x5A, MRMSrcReg,
- (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
- "cvtss2sd\t{$src2, $dst|$dst, $src2}",
- [(set VR128:$dst, (int_x86_sse2_cvtss2sd VR128:$src1,
- VR128:$src2))]>, XS,
- Requires<[HasSSE2]>;
-def Int_CVTSS2SDrm: I<0x5A, MRMSrcMem,
- (outs VR128:$dst), (ins VR128:$src1, f32mem:$src2),
- "cvtss2sd\t{$src2, $dst|$dst, $src2}",
- [(set VR128:$dst, (int_x86_sse2_cvtss2sd VR128:$src1,
- (load addr:$src2)))]>, XS,
- Requires<[HasSSE2]>;
+ defm Int_CMPSS : sse12_cmp_scalar_int<VR128, f32mem, int_x86_sse_cmp_ss,
+ "cmp${cc}ss\t{$src, $dst|$dst, $src}">, XS;
+ defm Int_CMPSD : sse12_cmp_scalar_int<VR128, f64mem, int_x86_sse2_cmp_sd,
+ "cmp${cc}sd\t{$src, $dst|$dst, $src}">, XD;
+}
+
+
+// sse12_ord_cmp - Unordered/Ordered scalar fp compare and set EFLAGS
+multiclass sse12_ord_cmp<bits<8> opc, RegisterClass RC, SDNode OpNode,
+ ValueType vt, X86MemOperand x86memop,
+ PatFrag ld_frag, string OpcodeStr, Domain d> {
+ def rr: PI<opc, MRMSrcReg, (outs), (ins RC:$src1, RC:$src2),
+ !strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"),
+ [(set EFLAGS, (OpNode (vt RC:$src1), RC:$src2))], d>;
+ def rm: PI<opc, MRMSrcMem, (outs), (ins RC:$src1, x86memop:$src2),
+ !strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"),
+ [(set EFLAGS, (OpNode (vt RC:$src1),
+ (ld_frag addr:$src2)))], d>;
+}
+
+let Defs = [EFLAGS] in {
+ let isAsmParserOnly = 1 in {
+ defm VUCOMISS : sse12_ord_cmp<0x2E, FR32, X86cmp, f32, f32mem, loadf32,
+ "ucomiss", SSEPackedSingle>, VEX;
+ defm VUCOMISD : sse12_ord_cmp<0x2E, FR64, X86cmp, f64, f64mem, loadf64,
+ "ucomisd", SSEPackedDouble>, OpSize, VEX;
+ let Pattern = []<dag> in {
+ defm VCOMISS : sse12_ord_cmp<0x2F, VR128, undef, v4f32, f128mem, load,
+ "comiss", SSEPackedSingle>, VEX;
+ defm VCOMISD : sse12_ord_cmp<0x2F, VR128, undef, v2f64, f128mem, load,
+ "comisd", SSEPackedDouble>, OpSize, VEX;
+ }
+
+ defm Int_VUCOMISS : sse12_ord_cmp<0x2E, VR128, X86ucomi, v4f32, f128mem,
+ load, "ucomiss", SSEPackedSingle>, VEX;
+ defm Int_VUCOMISD : sse12_ord_cmp<0x2E, VR128, X86ucomi, v2f64, f128mem,
+ load, "ucomisd", SSEPackedDouble>, OpSize, VEX;
+
+ defm Int_VCOMISS : sse12_ord_cmp<0x2F, VR128, X86comi, v4f32, f128mem,
+ load, "comiss", SSEPackedSingle>, VEX;
+ defm Int_VCOMISD : sse12_ord_cmp<0x2F, VR128, X86comi, v2f64, f128mem,
+ load, "comisd", SSEPackedDouble>, OpSize, VEX;
+ }
+ defm UCOMISS : sse12_ord_cmp<0x2E, FR32, X86cmp, f32, f32mem, loadf32,
+ "ucomiss", SSEPackedSingle>, TB;
+ defm UCOMISD : sse12_ord_cmp<0x2E, FR64, X86cmp, f64, f64mem, loadf64,
+ "ucomisd", SSEPackedDouble>, TB, OpSize;
+
+ let Pattern = []<dag> in {
+ defm COMISS : sse12_ord_cmp<0x2F, VR128, undef, v4f32, f128mem, load,
+ "comiss", SSEPackedSingle>, TB;
+ defm COMISD : sse12_ord_cmp<0x2F, VR128, undef, v2f64, f128mem, load,
+ "comisd", SSEPackedDouble>, TB, OpSize;
+ }
+
+ defm Int_UCOMISS : sse12_ord_cmp<0x2E, VR128, X86ucomi, v4f32, f128mem,
+ load, "ucomiss", SSEPackedSingle>, TB;
+ defm Int_UCOMISD : sse12_ord_cmp<0x2E, VR128, X86ucomi, v2f64, f128mem,
+ load, "ucomisd", SSEPackedDouble>, TB, OpSize;
+
+ defm Int_COMISS : sse12_ord_cmp<0x2F, VR128, X86comi, v4f32, f128mem, load,
+ "comiss", SSEPackedSingle>, TB;
+ defm Int_COMISD : sse12_ord_cmp<0x2F, VR128, X86comi, v2f64, f128mem, load,
+ "comisd", SSEPackedDouble>, TB, OpSize;
+} // Defs = [EFLAGS]
+
+// sse12_cmp_packed - sse 1 & 2 compared packed instructions
+multiclass sse12_cmp_packed<RegisterClass RC, X86MemOperand x86memop,
+ Intrinsic Int, string asm, string asm_alt,
+ Domain d> {
+ def rri : PIi8<0xC2, MRMSrcReg,
+ (outs RC:$dst), (ins RC:$src1, RC:$src, SSECC:$cc), asm,
+ [(set RC:$dst, (Int RC:$src1, RC:$src, imm:$cc))], d>;
+ def rmi : PIi8<0xC2, MRMSrcMem,
+ (outs RC:$dst), (ins RC:$src1, f128mem:$src, SSECC:$cc), asm,
+ [(set RC:$dst, (Int RC:$src1, (memop addr:$src), imm:$cc))], d>;
+ // Accept explicit immediate argument form instead of comparison code.
+ let isAsmParserOnly = 1 in {
+ def rri_alt : PIi8<0xC2, MRMSrcReg,
+ (outs RC:$dst), (ins RC:$src1, RC:$src, i8imm:$src2),
+ asm_alt, [], d>;
+ def rmi_alt : PIi8<0xC2, MRMSrcMem,
+ (outs RC:$dst), (ins RC:$src1, f128mem:$src, i8imm:$src2),
+ asm_alt, [], d>;
+ }
+}
+
+let isAsmParserOnly = 1 in {
+ defm VCMPPS : sse12_cmp_packed<VR128, f128mem, int_x86_sse_cmp_ps,
+ "cmp${cc}ps\t{$src, $src1, $dst|$dst, $src1, $src}",
+ "cmpps\t{$src2, $src, $src1, $dst|$dst, $src1, $src, $src2}",
+ SSEPackedSingle>, VEX_4V;
+ defm VCMPPD : sse12_cmp_packed<VR128, f128mem, int_x86_sse2_cmp_pd,
+ "cmp${cc}pd\t{$src, $src1, $dst|$dst, $src1, $src}",
+ "cmppd\t{$src2, $src, $src1, $dst|$dst, $src1, $src, $src2}",
+ SSEPackedDouble>, OpSize, VEX_4V;
+}
+let Constraints = "$src1 = $dst" in {
+ defm CMPPS : sse12_cmp_packed<VR128, f128mem, int_x86_sse_cmp_ps,
+ "cmp${cc}ps\t{$src, $dst|$dst, $src}",
+ "cmpps\t{$src2, $src, $dst|$dst, $src, $src2}",
+ SSEPackedSingle>, TB;
+ defm CMPPD : sse12_cmp_packed<VR128, f128mem, int_x86_sse2_cmp_pd,
+ "cmp${cc}pd\t{$src, $dst|$dst, $src}",
+ "cmppd\t{$src2, $src, $dst|$dst, $src, $src2}",
+ SSEPackedDouble>, TB, OpSize;
+}
+
+def : Pat<(v4i32 (X86cmpps (v4f32 VR128:$src1), VR128:$src2, imm:$cc)),
+ (CMPPSrri (v4f32 VR128:$src1), (v4f32 VR128:$src2), imm:$cc)>;
+def : Pat<(v4i32 (X86cmpps (v4f32 VR128:$src1), (memop addr:$src2), imm:$cc)),
+ (CMPPSrmi (v4f32 VR128:$src1), addr:$src2, imm:$cc)>;
+def : Pat<(v2i64 (X86cmppd (v2f64 VR128:$src1), VR128:$src2, imm:$cc)),
+ (CMPPDrri VR128:$src1, VR128:$src2, imm:$cc)>;
+def : Pat<(v2i64 (X86cmppd (v2f64 VR128:$src1), (memop addr:$src2), imm:$cc)),
+ (CMPPDrmi VR128:$src1, addr:$src2, imm:$cc)>;
+
+//===----------------------------------------------------------------------===//
+// SSE 1 & 2 - Shuffle Instructions
+//===----------------------------------------------------------------------===//
+
+/// sse12_shuffle - sse 1 & 2 shuffle instructions
+multiclass sse12_shuffle<RegisterClass RC, X86MemOperand x86memop,
+ ValueType vt, string asm, PatFrag mem_frag,
+ Domain d, bit IsConvertibleToThreeAddress = 0> {
+ def rmi : PIi8<0xC6, MRMSrcMem, (outs VR128:$dst),
+ (ins VR128:$src1, f128mem:$src2, i8imm:$src3), asm,
+ [(set VR128:$dst, (vt (shufp:$src3
+ VR128:$src1, (mem_frag addr:$src2))))], d>;
+ let isConvertibleToThreeAddress = IsConvertibleToThreeAddress in
+ def rri : PIi8<0xC6, MRMSrcReg, (outs VR128:$dst),
+ (ins VR128:$src1, VR128:$src2, i8imm:$src3), asm,
+ [(set VR128:$dst,
+ (vt (shufp:$src3 VR128:$src1, VR128:$src2)))], d>;
+}
+
+let isAsmParserOnly = 1 in {
+ defm VSHUFPS : sse12_shuffle<VR128, f128mem, v4f32,
+ "shufps\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
+ memopv4f32, SSEPackedSingle>, VEX_4V;
+ defm VSHUFPD : sse12_shuffle<VR128, f128mem, v2f64,
+ "shufpd\t{$src3, $src2, $src1, $dst|$dst, $src2, $src2, $src3}",
+ memopv2f64, SSEPackedDouble>, OpSize, VEX_4V;
+}
+
+let Constraints = "$src1 = $dst" in {
+ defm SHUFPS : sse12_shuffle<VR128, f128mem, v4f32,
+ "shufps\t{$src3, $src2, $dst|$dst, $src2, $src3}",
+ memopv4f32, SSEPackedSingle, 1 /* cvt to pshufd */>,
+ TB;
+ defm SHUFPD : sse12_shuffle<VR128, f128mem, v2f64,
+ "shufpd\t{$src3, $src2, $dst|$dst, $src2, $src3}",
+ memopv2f64, SSEPackedDouble>, TB, OpSize;
+}
+
+//===----------------------------------------------------------------------===//
+// SSE 1 & 2 - Unpack Instructions
+//===----------------------------------------------------------------------===//
+
+/// sse12_unpack_interleave - sse 1 & 2 unpack and interleave
+multiclass sse12_unpack_interleave<bits<8> opc, PatFrag OpNode, ValueType vt,
+ PatFrag mem_frag, RegisterClass RC,
+ X86MemOperand x86memop, string asm,
+ Domain d> {
+ def rr : PI<opc, MRMSrcReg,
+ (outs RC:$dst), (ins RC:$src1, RC:$src2),
+ asm, [(set RC:$dst,
+ (vt (OpNode RC:$src1, RC:$src2)))], d>;
+ def rm : PI<opc, MRMSrcMem,
+ (outs RC:$dst), (ins RC:$src1, x86memop:$src2),
+ asm, [(set RC:$dst,
+ (vt (OpNode RC:$src1,
+ (mem_frag addr:$src2))))], d>;
+}
+
+let AddedComplexity = 10 in {
+ let isAsmParserOnly = 1 in {
+ defm VUNPCKHPS: sse12_unpack_interleave<0x15, unpckh, v4f32, memopv4f32,
+ VR128, f128mem, "unpckhps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ SSEPackedSingle>, VEX_4V;
+ defm VUNPCKHPD: sse12_unpack_interleave<0x15, unpckh, v2f64, memopv2f64,
+ VR128, f128mem, "unpckhpd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ SSEPackedDouble>, OpSize, VEX_4V;
+ defm VUNPCKLPS: sse12_unpack_interleave<0x14, unpckl, v4f32, memopv4f32,
+ VR128, f128mem, "unpcklps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ SSEPackedSingle>, VEX_4V;
+ defm VUNPCKLPD: sse12_unpack_interleave<0x14, unpckl, v2f64, memopv2f64,
+ VR128, f128mem, "unpcklpd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ SSEPackedDouble>, OpSize, VEX_4V;
+
+ defm VUNPCKHPSY: sse12_unpack_interleave<0x15, unpckh, v8f32, memopv8f32,
+ VR256, f256mem, "unpckhps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ SSEPackedSingle>, VEX_4V;
+ defm VUNPCKHPDY: sse12_unpack_interleave<0x15, unpckh, v4f64, memopv4f64,
+ VR256, f256mem, "unpckhpd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ SSEPackedDouble>, OpSize, VEX_4V;
+ defm VUNPCKLPSY: sse12_unpack_interleave<0x14, unpckl, v8f32, memopv8f32,
+ VR256, f256mem, "unpcklps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ SSEPackedSingle>, VEX_4V;
+ defm VUNPCKLPDY: sse12_unpack_interleave<0x14, unpckl, v4f64, memopv4f64,
+ VR256, f256mem, "unpcklpd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ SSEPackedDouble>, OpSize, VEX_4V;
+ }
+
+ let Constraints = "$src1 = $dst" in {
+ defm UNPCKHPS: sse12_unpack_interleave<0x15, unpckh, v4f32, memopv4f32,
+ VR128, f128mem, "unpckhps\t{$src2, $dst|$dst, $src2}",
+ SSEPackedSingle>, TB;
+ defm UNPCKHPD: sse12_unpack_interleave<0x15, unpckh, v2f64, memopv2f64,
+ VR128, f128mem, "unpckhpd\t{$src2, $dst|$dst, $src2}",
+ SSEPackedDouble>, TB, OpSize;
+ defm UNPCKLPS: sse12_unpack_interleave<0x14, unpckl, v4f32, memopv4f32,
+ VR128, f128mem, "unpcklps\t{$src2, $dst|$dst, $src2}",
+ SSEPackedSingle>, TB;
+ defm UNPCKLPD: sse12_unpack_interleave<0x14, unpckl, v2f64, memopv2f64,
+ VR128, f128mem, "unpcklpd\t{$src2, $dst|$dst, $src2}",
+ SSEPackedDouble>, TB, OpSize;
+ } // Constraints = "$src1 = $dst"
+} // AddedComplexity
+
+//===----------------------------------------------------------------------===//
+// SSE 1 & 2 - Extract Floating-Point Sign mask
+//===----------------------------------------------------------------------===//
+
+/// sse12_extr_sign_mask - sse 1 & 2 unpack and interleave
+multiclass sse12_extr_sign_mask<RegisterClass RC, Intrinsic Int, string asm,
+ Domain d> {
+ def rr : PI<0x50, MRMSrcReg, (outs GR32:$dst), (ins RC:$src),
+ !strconcat(asm, "\t{$src, $dst|$dst, $src}"),
+ [(set GR32:$dst, (Int RC:$src))], d>;
+}
+
+// Mask creation
+defm MOVMSKPS : sse12_extr_sign_mask<VR128, int_x86_sse_movmsk_ps, "movmskps",
+ SSEPackedSingle>, TB;
+defm MOVMSKPD : sse12_extr_sign_mask<VR128, int_x86_sse2_movmsk_pd, "movmskpd",
+ SSEPackedDouble>, TB, OpSize;
+
+let isAsmParserOnly = 1 in {
+ defm VMOVMSKPS : sse12_extr_sign_mask<VR128, int_x86_sse_movmsk_ps,
+ "movmskps", SSEPackedSingle>, VEX;
+ defm VMOVMSKPD : sse12_extr_sign_mask<VR128, int_x86_sse2_movmsk_pd,
+ "movmskpd", SSEPackedDouble>, OpSize,
+ VEX;
+ // FIXME: merge with multiclass above when the intrinsics come.
+ def VMOVMSKPSYrr : PI<0x50, MRMSrcReg, (outs GR32:$dst), (ins VR256:$src),
+ "movmskps\t{$src, $dst|$dst, $src}", [], SSEPackedSingle>, VEX;
+ def VMOVMSKPDYrr : PI<0x50, MRMSrcReg, (outs GR32:$dst), (ins VR256:$src),
+ "movmskpd\t{$src, $dst|$dst, $src}", [], SSEPackedDouble>, OpSize,
+ VEX;
+}
+
+//===----------------------------------------------------------------------===//
+// SSE 1 & 2 - Misc aliasing of packed SSE 1 & 2 instructions
+//===----------------------------------------------------------------------===//
+
+// Aliases of packed SSE1 & SSE2 instructions for scalar use. These all have
+// names that start with 'Fs'.
+
+// Alias instructions that map fld0 to pxor for sse.
+let isReMaterializable = 1, isAsCheapAsAMove = 1, isCodeGenOnly = 1,
+ canFoldAsLoad = 1 in {
+ // FIXME: Set encoding to pseudo!
+def FsFLD0SS : I<0xEF, MRMInitReg, (outs FR32:$dst), (ins), "",
+ [(set FR32:$dst, fp32imm0)]>,
+ Requires<[HasSSE1]>, TB, OpSize;
+def FsFLD0SD : I<0xEF, MRMInitReg, (outs FR64:$dst), (ins), "",
+ [(set FR64:$dst, fpimm0)]>,
+ Requires<[HasSSE2]>, TB, OpSize;
}
-// Arithmetic
+// Alias instruction to do FR32 or FR64 reg-to-reg copy using movaps. Upper
+// bits are disregarded.
+let neverHasSideEffects = 1 in {
+def FsMOVAPSrr : PSI<0x28, MRMSrcReg, (outs FR32:$dst), (ins FR32:$src),
+ "movaps\t{$src, $dst|$dst, $src}", []>;
+def FsMOVAPDrr : PDI<0x28, MRMSrcReg, (outs FR64:$dst), (ins FR64:$src),
+ "movapd\t{$src, $dst|$dst, $src}", []>;
+}
-/// sse2_fp_unop_rm - SSE2 unops come in both scalar and vector forms.
+// Alias instruction to load FR32 or FR64 from f128mem using movaps. Upper
+// bits are disregarded.
+let canFoldAsLoad = 1, isReMaterializable = 1 in {
+def FsMOVAPSrm : PSI<0x28, MRMSrcMem, (outs FR32:$dst), (ins f128mem:$src),
+ "movaps\t{$src, $dst|$dst, $src}",
+ [(set FR32:$dst, (alignedloadfsf32 addr:$src))]>;
+def FsMOVAPDrm : PDI<0x28, MRMSrcMem, (outs FR64:$dst), (ins f128mem:$src),
+ "movapd\t{$src, $dst|$dst, $src}",
+ [(set FR64:$dst, (alignedloadfsf64 addr:$src))]>;
+}
+
+//===----------------------------------------------------------------------===//
+// SSE 1 & 2 - Logical Instructions
+//===----------------------------------------------------------------------===//
+
+/// sse12_fp_alias_pack_logical - SSE 1 & 2 aliased packed FP logical ops
+///
+multiclass sse12_fp_alias_pack_logical<bits<8> opc, string OpcodeStr,
+ SDNode OpNode> {
+ let isAsmParserOnly = 1 in {
+ defm V#NAME#PS : sse12_fp_packed<opc, !strconcat(OpcodeStr, "ps"), OpNode,
+ FR32, f32, f128mem, memopfsf32, SSEPackedSingle, 0>, VEX_4V;
+
+ defm V#NAME#PD : sse12_fp_packed<opc, !strconcat(OpcodeStr, "pd"), OpNode,
+ FR64, f64, f128mem, memopfsf64, SSEPackedDouble, 0>, OpSize, VEX_4V;
+ }
+
+ let Constraints = "$src1 = $dst" in {
+ defm PS : sse12_fp_packed<opc, !strconcat(OpcodeStr, "ps"), OpNode, FR32,
+ f32, f128mem, memopfsf32, SSEPackedSingle>, TB;
+
+ defm PD : sse12_fp_packed<opc, !strconcat(OpcodeStr, "pd"), OpNode, FR64,
+ f64, f128mem, memopfsf64, SSEPackedDouble>, TB, OpSize;
+ }
+}
+
+// Alias bitwise logical operations using SSE logical ops on packed FP values.
+let mayLoad = 0 in {
+ defm FsAND : sse12_fp_alias_pack_logical<0x54, "and", X86fand>;
+ defm FsOR : sse12_fp_alias_pack_logical<0x56, "or", X86for>;
+ defm FsXOR : sse12_fp_alias_pack_logical<0x57, "xor", X86fxor>;
+}
+
+let neverHasSideEffects = 1, Pattern = []<dag>, isCommutable = 0 in
+ defm FsANDN : sse12_fp_alias_pack_logical<0x55, "andn", undef>;
+
+/// sse12_fp_packed_logical - SSE 1 & 2 packed FP logical ops
///
+multiclass sse12_fp_packed_logical<bits<8> opc, string OpcodeStr,
+ SDNode OpNode, int HasPat = 0,
+ list<list<dag>> Pattern = []> {
+ let isAsmParserOnly = 1, Pattern = []<dag> in {
+ defm V#NAME#PS : sse12_fp_packed_logical_rm<opc, VR128, SSEPackedSingle,
+ !strconcat(OpcodeStr, "ps"), f128mem,
+ !if(HasPat, Pattern[0], // rr
+ [(set VR128:$dst, (v2i64 (OpNode VR128:$src1,
+ VR128:$src2)))]),
+ !if(HasPat, Pattern[2], // rm
+ [(set VR128:$dst, (OpNode (bc_v2i64 (v4f32 VR128:$src1)),
+ (memopv2i64 addr:$src2)))]), 0>,
+ VEX_4V;
+
+ defm V#NAME#PD : sse12_fp_packed_logical_rm<opc, VR128, SSEPackedDouble,
+ !strconcat(OpcodeStr, "pd"), f128mem,
+ !if(HasPat, Pattern[1], // rr
+ [(set VR128:$dst, (OpNode (bc_v2i64 (v2f64 VR128:$src1)),
+ (bc_v2i64 (v2f64
+ VR128:$src2))))]),
+ !if(HasPat, Pattern[3], // rm
+ [(set VR128:$dst, (OpNode (bc_v2i64 (v2f64 VR128:$src1)),
+ (memopv2i64 addr:$src2)))]), 0>,
+ OpSize, VEX_4V;
+ }
+ let Constraints = "$src1 = $dst" in {
+ defm PS : sse12_fp_packed_logical_rm<opc, VR128, SSEPackedSingle,
+ !strconcat(OpcodeStr, "ps"), f128mem,
+ !if(HasPat, Pattern[0], // rr
+ [(set VR128:$dst, (v2i64 (OpNode VR128:$src1,
+ VR128:$src2)))]),
+ !if(HasPat, Pattern[2], // rm
+ [(set VR128:$dst, (OpNode (bc_v2i64 (v4f32 VR128:$src1)),
+ (memopv2i64 addr:$src2)))])>, TB;
+
+ defm PD : sse12_fp_packed_logical_rm<opc, VR128, SSEPackedDouble,
+ !strconcat(OpcodeStr, "pd"), f128mem,
+ !if(HasPat, Pattern[1], // rr
+ [(set VR128:$dst, (OpNode (bc_v2i64 (v2f64 VR128:$src1)),
+ (bc_v2i64 (v2f64
+ VR128:$src2))))]),
+ !if(HasPat, Pattern[3], // rm
+ [(set VR128:$dst, (OpNode (bc_v2i64 (v2f64 VR128:$src1)),
+ (memopv2i64 addr:$src2)))])>,
+ TB, OpSize;
+ }
+}
+
+/// sse12_fp_packed_logical_y - AVX 256-bit SSE 1 & 2 logical ops forms
+///
+let isAsmParserOnly = 1 in {
+multiclass sse12_fp_packed_logical_y<bits<8> opc, string OpcodeStr> {
+ defm PSY : sse12_fp_packed_logical_rm<opc, VR256, SSEPackedSingle,
+ !strconcat(OpcodeStr, "ps"), f256mem, [], [], 0>, VEX_4V;
+
+ defm PDY : sse12_fp_packed_logical_rm<opc, VR256, SSEPackedDouble,
+ !strconcat(OpcodeStr, "pd"), f256mem, [], [], 0>, OpSize, VEX_4V;
+}
+}
+
+// AVX 256-bit packed logical ops forms
+defm VAND : sse12_fp_packed_logical_y<0x54, "and">;
+defm VOR : sse12_fp_packed_logical_y<0x56, "or">;
+defm VXOR : sse12_fp_packed_logical_y<0x57, "xor">;
+let isCommutable = 0 in
+ defm VANDN : sse12_fp_packed_logical_y<0x55, "andn">;
+
+defm AND : sse12_fp_packed_logical<0x54, "and", and>;
+defm OR : sse12_fp_packed_logical<0x56, "or", or>;
+defm XOR : sse12_fp_packed_logical<0x57, "xor", xor>;
+let isCommutable = 0 in
+ defm ANDN : sse12_fp_packed_logical<0x55, "andn", undef /* dummy */, 1, [
+ // single r+r
+ [(set VR128:$dst, (v2i64 (and (xor VR128:$src1,
+ (bc_v2i64 (v4i32 immAllOnesV))),
+ VR128:$src2)))],
+ // double r+r
+ [(set VR128:$dst, (and (vnot (bc_v2i64 (v2f64 VR128:$src1))),
+ (bc_v2i64 (v2f64 VR128:$src2))))],
+ // single r+m
+ [(set VR128:$dst, (v2i64 (and (xor (bc_v2i64 (v4f32 VR128:$src1)),
+ (bc_v2i64 (v4i32 immAllOnesV))),
+ (memopv2i64 addr:$src2))))],
+ // double r+m
+ [(set VR128:$dst, (and (vnot (bc_v2i64 (v2f64 VR128:$src1))),
+ (memopv2i64 addr:$src2)))]]>;
+
+//===----------------------------------------------------------------------===//
+// SSE 1 & 2 - Arithmetic Instructions
+//===----------------------------------------------------------------------===//
+
+/// basic_sse12_fp_binop_xxx - SSE 1 & 2 binops come in both scalar and
+/// vector forms.
+///
+/// In addition, we also have a special variant of the scalar form here to
+/// represent the associated intrinsic operation. This form is unlike the
+/// plain scalar form, in that it takes an entire vector (instead of a scalar)
+/// and leaves the top elements unmodified (therefore these cannot be commuted).
+///
+/// These three forms can each be reg+reg or reg+mem.
+///
+multiclass basic_sse12_fp_binop_s<bits<8> opc, string OpcodeStr, SDNode OpNode,
+ bit Is2Addr = 1> {
+ defm SS : sse12_fp_scalar<opc, !strconcat(OpcodeStr, "ss"),
+ OpNode, FR32, f32mem, Is2Addr>, XS;
+ defm SD : sse12_fp_scalar<opc, !strconcat(OpcodeStr, "sd"),
+ OpNode, FR64, f64mem, Is2Addr>, XD;
+}
+
+multiclass basic_sse12_fp_binop_p<bits<8> opc, string OpcodeStr, SDNode OpNode,
+ bit Is2Addr = 1> {
+ let mayLoad = 0 in {
+ defm PS : sse12_fp_packed<opc, !strconcat(OpcodeStr, "ps"), OpNode, VR128,
+ v4f32, f128mem, memopv4f32, SSEPackedSingle, Is2Addr>, TB;
+ defm PD : sse12_fp_packed<opc, !strconcat(OpcodeStr, "pd"), OpNode, VR128,
+ v2f64, f128mem, memopv2f64, SSEPackedDouble, Is2Addr>, TB, OpSize;
+ }
+}
+
+multiclass basic_sse12_fp_binop_p_y<bits<8> opc, string OpcodeStr,
+ SDNode OpNode> {
+ let mayLoad = 0 in {
+ defm PSY : sse12_fp_packed<opc, !strconcat(OpcodeStr, "ps"), OpNode, VR256,
+ v8f32, f256mem, memopv8f32, SSEPackedSingle, 0>, TB;
+ defm PDY : sse12_fp_packed<opc, !strconcat(OpcodeStr, "pd"), OpNode, VR256,
+ v4f64, f256mem, memopv4f64, SSEPackedDouble, 0>, TB, OpSize;
+ }
+}
+
+multiclass basic_sse12_fp_binop_s_int<bits<8> opc, string OpcodeStr,
+ bit Is2Addr = 1> {
+ defm SS : sse12_fp_scalar_int<opc, OpcodeStr, VR128,
+ !strconcat(OpcodeStr, "ss"), "", "_ss", ssmem, sse_load_f32, Is2Addr>, XS;
+ defm SD : sse12_fp_scalar_int<opc, OpcodeStr, VR128,
+ !strconcat(OpcodeStr, "sd"), "2", "_sd", sdmem, sse_load_f64, Is2Addr>, XD;
+}
+
+multiclass basic_sse12_fp_binop_p_int<bits<8> opc, string OpcodeStr,
+ bit Is2Addr = 1> {
+ defm PS : sse12_fp_packed_int<opc, OpcodeStr, VR128,
+ !strconcat(OpcodeStr, "ps"), "", "_ps", f128mem, memopv4f32,
+ SSEPackedSingle, Is2Addr>, TB;
+
+ defm PD : sse12_fp_packed_int<opc, OpcodeStr, VR128,
+ !strconcat(OpcodeStr, "pd"), "2", "_pd", f128mem, memopv2f64,
+ SSEPackedDouble, Is2Addr>, TB, OpSize;
+}
+
+// Binary Arithmetic instructions
+let isAsmParserOnly = 1 in {
+ defm VADD : basic_sse12_fp_binop_s<0x58, "add", fadd, 0>,
+ basic_sse12_fp_binop_p<0x58, "add", fadd, 0>,
+ basic_sse12_fp_binop_p_y<0x58, "add", fadd>, VEX_4V;
+ defm VMUL : basic_sse12_fp_binop_s<0x59, "mul", fmul, 0>,
+ basic_sse12_fp_binop_p<0x59, "mul", fmul, 0>,
+ basic_sse12_fp_binop_p_y<0x59, "mul", fmul>, VEX_4V;
+
+ let isCommutable = 0 in {
+ defm VSUB : basic_sse12_fp_binop_s<0x5C, "sub", fsub, 0>,
+ basic_sse12_fp_binop_p<0x5C, "sub", fsub, 0>,
+ basic_sse12_fp_binop_p_y<0x5C, "sub", fsub>, VEX_4V;
+ defm VDIV : basic_sse12_fp_binop_s<0x5E, "div", fdiv, 0>,
+ basic_sse12_fp_binop_p<0x5E, "div", fdiv, 0>,
+ basic_sse12_fp_binop_p_y<0x5E, "div", fdiv>, VEX_4V;
+ defm VMAX : basic_sse12_fp_binop_s<0x5F, "max", X86fmax, 0>,
+ basic_sse12_fp_binop_p<0x5F, "max", X86fmax, 0>,
+ basic_sse12_fp_binop_p_y<0x5F, "max", X86fmax>, VEX_4V;
+ defm VMIN : basic_sse12_fp_binop_s<0x5D, "min", X86fmin, 0>,
+ basic_sse12_fp_binop_p<0x5D, "min", X86fmin, 0>,
+ basic_sse12_fp_binop_p_y<0x5D, "min", X86fmin>, VEX_4V;
+ }
+}
+
+let Constraints = "$src1 = $dst" in {
+ defm ADD : basic_sse12_fp_binop_s<0x58, "add", fadd>,
+ basic_sse12_fp_binop_p<0x58, "add", fadd>,
+ basic_sse12_fp_binop_s_int<0x58, "add">;
+ defm MUL : basic_sse12_fp_binop_s<0x59, "mul", fmul>,
+ basic_sse12_fp_binop_p<0x59, "mul", fmul>,
+ basic_sse12_fp_binop_s_int<0x59, "mul">;
+
+ let isCommutable = 0 in {
+ defm SUB : basic_sse12_fp_binop_s<0x5C, "sub", fsub>,
+ basic_sse12_fp_binop_p<0x5C, "sub", fsub>,
+ basic_sse12_fp_binop_s_int<0x5C, "sub">;
+ defm DIV : basic_sse12_fp_binop_s<0x5E, "div", fdiv>,
+ basic_sse12_fp_binop_p<0x5E, "div", fdiv>,
+ basic_sse12_fp_binop_s_int<0x5E, "div">;
+ defm MAX : basic_sse12_fp_binop_s<0x5F, "max", X86fmax>,
+ basic_sse12_fp_binop_p<0x5F, "max", X86fmax>,
+ basic_sse12_fp_binop_s_int<0x5F, "max">,
+ basic_sse12_fp_binop_p_int<0x5F, "max">;
+ defm MIN : basic_sse12_fp_binop_s<0x5D, "min", X86fmin>,
+ basic_sse12_fp_binop_p<0x5D, "min", X86fmin>,
+ basic_sse12_fp_binop_s_int<0x5D, "min">,
+ basic_sse12_fp_binop_p_int<0x5D, "min">;
+ }
+}
+
+/// Unop Arithmetic
/// In addition, we also have a special variant of the scalar form here to
/// represent the associated intrinsic operation. This form is unlike the
/// plain scalar form, in that it takes an entire vector (instead of a
/// scalar) and leaves the top elements undefined.
///
/// And, we have a special variant form for a full-vector intrinsic form.
-///
-/// These four forms can each have a reg or a mem operand, so there are a
-/// total of eight "instructions".
-///
-multiclass sse2_fp_unop_rm<bits<8> opc, string OpcodeStr,
- SDNode OpNode,
- Intrinsic F64Int,
- Intrinsic V2F64Int,
- bit Commutable = 0> {
- // Scalar operation, reg.
+
+/// sse1_fp_unop_s - SSE1 unops in scalar form.
+multiclass sse1_fp_unop_s<bits<8> opc, string OpcodeStr,
+ SDNode OpNode, Intrinsic F32Int> {
+ def SSr : SSI<opc, MRMSrcReg, (outs FR32:$dst), (ins FR32:$src),
+ !strconcat(OpcodeStr, "ss\t{$src, $dst|$dst, $src}"),
+ [(set FR32:$dst, (OpNode FR32:$src))]>;
+ // For scalar unary operations, fold a load into the operation
+ // only in OptForSize mode. It eliminates an instruction, but it also
+ // eliminates a whole-register clobber (the load), so it introduces a
+ // partial register update condition.
+ def SSm : I<opc, MRMSrcMem, (outs FR32:$dst), (ins f32mem:$src),
+ !strconcat(OpcodeStr, "ss\t{$src, $dst|$dst, $src}"),
+ [(set FR32:$dst, (OpNode (load addr:$src)))]>, XS,
+ Requires<[HasSSE1, OptForSize]>;
+ def SSr_Int : SSI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+ !strconcat(OpcodeStr, "ss\t{$src, $dst|$dst, $src}"),
+ [(set VR128:$dst, (F32Int VR128:$src))]>;
+ def SSm_Int : SSI<opc, MRMSrcMem, (outs VR128:$dst), (ins ssmem:$src),
+ !strconcat(OpcodeStr, "ss\t{$src, $dst|$dst, $src}"),
+ [(set VR128:$dst, (F32Int sse_load_f32:$src))]>;
+}
+
+/// sse1_fp_unop_s_avx - AVX SSE1 unops in scalar form.
+multiclass sse1_fp_unop_s_avx<bits<8> opc, string OpcodeStr,
+ SDNode OpNode, Intrinsic F32Int> {
+ def SSr : SSI<opc, MRMSrcReg, (outs FR32:$dst), (ins FR32:$src1, FR32:$src2),
+ !strconcat(!strconcat("v", OpcodeStr),
+ "ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>;
+ def SSm : I<opc, MRMSrcMem, (outs FR32:$dst), (ins FR32:$src1, f32mem:$src2),
+ !strconcat(!strconcat("v", OpcodeStr),
+ "ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ []>, XS, Requires<[HasAVX, OptForSize]>;
+ def SSr_Int : SSI<opc, MRMSrcReg, (outs VR128:$dst),
+ (ins VR128:$src1, VR128:$src2),
+ !strconcat(!strconcat("v", OpcodeStr),
+ "ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>;
+ def SSm_Int : SSI<opc, MRMSrcMem, (outs VR128:$dst),
+ (ins VR128:$src1, ssmem:$src2),
+ !strconcat(!strconcat("v", OpcodeStr),
+ "ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>;
+}
+
+/// sse1_fp_unop_p - SSE1 unops in packed form.
+multiclass sse1_fp_unop_p<bits<8> opc, string OpcodeStr, SDNode OpNode> {
+ def PSr : PSI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+ !strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"),
+ [(set VR128:$dst, (v4f32 (OpNode VR128:$src)))]>;
+ def PSm : PSI<opc, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
+ !strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"),
+ [(set VR128:$dst, (OpNode (memopv4f32 addr:$src)))]>;
+}
+
+/// sse1_fp_unop_p_y - AVX 256-bit SSE1 unops in packed form.
+multiclass sse1_fp_unop_p_y<bits<8> opc, string OpcodeStr, SDNode OpNode> {
+ def PSYr : PSI<opc, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
+ !strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"),
+ [(set VR256:$dst, (v8f32 (OpNode VR256:$src)))]>;
+ def PSYm : PSI<opc, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src),
+ !strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"),
+ [(set VR256:$dst, (OpNode (memopv8f32 addr:$src)))]>;
+}
+
+/// sse1_fp_unop_p_int - SSE1 intrinsics unops in packed forms.
+multiclass sse1_fp_unop_p_int<bits<8> opc, string OpcodeStr,
+ Intrinsic V4F32Int> {
+ def PSr_Int : PSI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+ !strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"),
+ [(set VR128:$dst, (V4F32Int VR128:$src))]>;
+ def PSm_Int : PSI<opc, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
+ !strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"),
+ [(set VR128:$dst, (V4F32Int (memopv4f32 addr:$src)))]>;
+}
+
+
+/// sse2_fp_unop_s - SSE2 unops in scalar form.
+multiclass sse2_fp_unop_s<bits<8> opc, string OpcodeStr,
+ SDNode OpNode, Intrinsic F64Int> {
def SDr : SDI<opc, MRMSrcReg, (outs FR64:$dst), (ins FR64:$src),
!strconcat(OpcodeStr, "sd\t{$src, $dst|$dst, $src}"),
- [(set FR64:$dst, (OpNode FR64:$src))]> {
- let isCommutable = Commutable;
- }
-
- // Scalar operation, mem.
- def SDm : SDI<opc, MRMSrcMem, (outs FR64:$dst), (ins f64mem:$src),
+ [(set FR64:$dst, (OpNode FR64:$src))]>;
+ // See the comments in sse1_fp_unop_s for why this is OptForSize.
+ def SDm : I<opc, MRMSrcMem, (outs FR64:$dst), (ins f64mem:$src),
!strconcat(OpcodeStr, "sd\t{$src, $dst|$dst, $src}"),
- [(set FR64:$dst, (OpNode (load addr:$src)))]>;
+ [(set FR64:$dst, (OpNode (load addr:$src)))]>, XD,
+ Requires<[HasSSE2, OptForSize]>;
+ def SDr_Int : SDI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+ !strconcat(OpcodeStr, "sd\t{$src, $dst|$dst, $src}"),
+ [(set VR128:$dst, (F64Int VR128:$src))]>;
+ def SDm_Int : SDI<opc, MRMSrcMem, (outs VR128:$dst), (ins sdmem:$src),
+ !strconcat(OpcodeStr, "sd\t{$src, $dst|$dst, $src}"),
+ [(set VR128:$dst, (F64Int sse_load_f64:$src))]>;
+}
+
+/// sse2_fp_unop_s_avx - AVX SSE2 unops in scalar form.
+multiclass sse2_fp_unop_s_avx<bits<8> opc, string OpcodeStr,
+ SDNode OpNode, Intrinsic F64Int> {
+ def SDr : VSDI<opc, MRMSrcReg, (outs FR64:$dst), (ins FR64:$src1, FR64:$src2),
+ !strconcat(OpcodeStr,
+ "sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>;
+ def SDm : VSDI<opc, MRMSrcMem, (outs FR64:$dst),
+ (ins FR64:$src1, f64mem:$src2),
+ !strconcat(OpcodeStr,
+ "sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>;
+ def SDr_Int : VSDI<opc, MRMSrcReg, (outs VR128:$dst),
+ (ins VR128:$src1, VR128:$src2),
+ !strconcat(OpcodeStr, "sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ []>;
+ def SDm_Int : VSDI<opc, MRMSrcMem, (outs VR128:$dst),
+ (ins VR128:$src1, sdmem:$src2),
+ !strconcat(OpcodeStr, "sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ []>;
+}
- // Vector operation, reg.
+/// sse2_fp_unop_p - SSE2 unops in vector forms.
+multiclass sse2_fp_unop_p<bits<8> opc, string OpcodeStr,
+ SDNode OpNode> {
def PDr : PDI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
!strconcat(OpcodeStr, "pd\t{$src, $dst|$dst, $src}"),
- [(set VR128:$dst, (v2f64 (OpNode VR128:$src)))]> {
- let isCommutable = Commutable;
- }
-
- // Vector operation, mem.
+ [(set VR128:$dst, (v2f64 (OpNode VR128:$src)))]>;
def PDm : PDI<opc, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
!strconcat(OpcodeStr, "pd\t{$src, $dst|$dst, $src}"),
[(set VR128:$dst, (OpNode (memopv2f64 addr:$src)))]>;
+}
- // Intrinsic operation, reg.
- def SDr_Int : SDI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
- !strconcat(OpcodeStr, "sd\t{$src, $dst|$dst, $src}"),
- [(set VR128:$dst, (F64Int VR128:$src))]> {
- let isCommutable = Commutable;
- }
-
- // Intrinsic operation, mem.
- def SDm_Int : SDI<opc, MRMSrcMem, (outs VR128:$dst), (ins sdmem:$src),
- !strconcat(OpcodeStr, "sd\t{$src, $dst|$dst, $src}"),
- [(set VR128:$dst, (F64Int sse_load_f64:$src))]>;
+/// sse2_fp_unop_p_y - AVX SSE2 256-bit unops in vector forms.
+multiclass sse2_fp_unop_p_y<bits<8> opc, string OpcodeStr, SDNode OpNode> {
+ def PDYr : PDI<opc, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
+ !strconcat(OpcodeStr, "pd\t{$src, $dst|$dst, $src}"),
+ [(set VR256:$dst, (v4f64 (OpNode VR256:$src)))]>;
+ def PDYm : PDI<opc, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src),
+ !strconcat(OpcodeStr, "pd\t{$src, $dst|$dst, $src}"),
+ [(set VR256:$dst, (OpNode (memopv4f64 addr:$src)))]>;
+}
- // Vector intrinsic operation, reg
+/// sse2_fp_unop_p_int - SSE2 intrinsic unops in vector forms.
+multiclass sse2_fp_unop_p_int<bits<8> opc, string OpcodeStr,
+ Intrinsic V2F64Int> {
def PDr_Int : PDI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
!strconcat(OpcodeStr, "pd\t{$src, $dst|$dst, $src}"),
- [(set VR128:$dst, (V2F64Int VR128:$src))]> {
- let isCommutable = Commutable;
- }
-
- // Vector intrinsic operation, mem
+ [(set VR128:$dst, (V2F64Int VR128:$src))]>;
def PDm_Int : PDI<opc, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
!strconcat(OpcodeStr, "pd\t{$src, $dst|$dst, $src}"),
[(set VR128:$dst, (V2F64Int (memopv2f64 addr:$src)))]>;
}
+let isAsmParserOnly = 1, Predicates = [HasAVX] in {
+ // Square root.
+ defm VSQRT : sse1_fp_unop_s_avx<0x51, "sqrt", fsqrt, int_x86_sse_sqrt_ss>,
+ sse2_fp_unop_s_avx<0x51, "sqrt", fsqrt, int_x86_sse2_sqrt_sd>,
+ VEX_4V;
+
+ defm VSQRT : sse1_fp_unop_p<0x51, "vsqrt", fsqrt>,
+ sse2_fp_unop_p<0x51, "vsqrt", fsqrt>,
+ sse1_fp_unop_p_y<0x51, "vsqrt", fsqrt>,
+ sse2_fp_unop_p_y<0x51, "vsqrt", fsqrt>,
+ VEX;
+
+ // Reciprocal approximations. Note that these typically require refinement
+ // in order to obtain suitable precision.
+ defm VRSQRT : sse1_fp_unop_s_avx<0x52, "rsqrt", X86frsqrt,
+ int_x86_sse_rsqrt_ss>, VEX_4V;
+ defm VRSQRT : sse1_fp_unop_p<0x52, "vrsqrt", X86frsqrt>,
+ sse1_fp_unop_p_y<0x52, "vrsqrt", X86frsqrt>, VEX;
+
+ defm VRCP : sse1_fp_unop_s_avx<0x53, "rcp", X86frcp, int_x86_sse_rcp_ss>,
+ VEX_4V;
+ defm VRCP : sse1_fp_unop_p<0x53, "vrcp", X86frcp>,
+ sse1_fp_unop_p_y<0x53, "vrcp", X86frcp>, VEX;
+}
+
// Square root.
-defm SQRT : sse2_fp_unop_rm<0x51, "sqrt", fsqrt,
- int_x86_sse2_sqrt_sd, int_x86_sse2_sqrt_pd>;
+defm SQRT : sse1_fp_unop_s<0x51, "sqrt", fsqrt, int_x86_sse_sqrt_ss>,
+ sse1_fp_unop_p<0x51, "sqrt", fsqrt>,
+ sse1_fp_unop_p_int<0x51, "sqrt", int_x86_sse_sqrt_ps>,
+ sse2_fp_unop_s<0x51, "sqrt", fsqrt, int_x86_sse2_sqrt_sd>,
+ sse2_fp_unop_p<0x51, "sqrt", fsqrt>,
+ sse2_fp_unop_p_int<0x51, "sqrt", int_x86_sse2_sqrt_pd>;
+
+// Reciprocal approximations. Note that these typically require refinement
+// in order to obtain suitable precision.
+defm RSQRT : sse1_fp_unop_s<0x52, "rsqrt", X86frsqrt, int_x86_sse_rsqrt_ss>,
+ sse1_fp_unop_p<0x52, "rsqrt", X86frsqrt>,
+ sse1_fp_unop_p_int<0x52, "rsqrt", int_x86_sse_rsqrt_ps>;
+defm RCP : sse1_fp_unop_s<0x53, "rcp", X86frcp, int_x86_sse_rcp_ss>,
+ sse1_fp_unop_p<0x53, "rcp", X86frcp>,
+ sse1_fp_unop_p_int<0x53, "rcp", int_x86_sse_rcp_ps>;
// There is no f64 version of the reciprocal approximation instructions.
-// Logical
-let Constraints = "$src1 = $dst" in {
- let isCommutable = 1 in {
- def ANDPDrr : PDI<0x54, MRMSrcReg,
- (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
- "andpd\t{$src2, $dst|$dst, $src2}",
- [(set VR128:$dst,
- (and (bc_v2i64 (v2f64 VR128:$src1)),
- (bc_v2i64 (v2f64 VR128:$src2))))]>;
- def ORPDrr : PDI<0x56, MRMSrcReg,
- (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
- "orpd\t{$src2, $dst|$dst, $src2}",
- [(set VR128:$dst,
- (or (bc_v2i64 (v2f64 VR128:$src1)),
- (bc_v2i64 (v2f64 VR128:$src2))))]>;
- def XORPDrr : PDI<0x57, MRMSrcReg,
- (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
- "xorpd\t{$src2, $dst|$dst, $src2}",
- [(set VR128:$dst,
- (xor (bc_v2i64 (v2f64 VR128:$src1)),
- (bc_v2i64 (v2f64 VR128:$src2))))]>;
- }
+//===----------------------------------------------------------------------===//
+// SSE 1 & 2 - Non-temporal stores
+//===----------------------------------------------------------------------===//
- def ANDPDrm : PDI<0x54, MRMSrcMem,
- (outs VR128:$dst), (ins VR128:$src1, f128mem:$src2),
- "andpd\t{$src2, $dst|$dst, $src2}",
- [(set VR128:$dst,
- (and (bc_v2i64 (v2f64 VR128:$src1)),
- (memopv2i64 addr:$src2)))]>;
- def ORPDrm : PDI<0x56, MRMSrcMem,
- (outs VR128:$dst), (ins VR128:$src1, f128mem:$src2),
- "orpd\t{$src2, $dst|$dst, $src2}",
- [(set VR128:$dst,
- (or (bc_v2i64 (v2f64 VR128:$src1)),
- (memopv2i64 addr:$src2)))]>;
- def XORPDrm : PDI<0x57, MRMSrcMem,
- (outs VR128:$dst), (ins VR128:$src1, f128mem:$src2),
- "xorpd\t{$src2, $dst|$dst, $src2}",
- [(set VR128:$dst,
- (xor (bc_v2i64 (v2f64 VR128:$src1)),
- (memopv2i64 addr:$src2)))]>;
- def ANDNPDrr : PDI<0x55, MRMSrcReg,
- (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
- "andnpd\t{$src2, $dst|$dst, $src2}",
- [(set VR128:$dst,
- (and (vnot (bc_v2i64 (v2f64 VR128:$src1))),
- (bc_v2i64 (v2f64 VR128:$src2))))]>;
- def ANDNPDrm : PDI<0x55, MRMSrcMem,
- (outs VR128:$dst), (ins VR128:$src1,f128mem:$src2),
- "andnpd\t{$src2, $dst|$dst, $src2}",
- [(set VR128:$dst,
- (and (vnot (bc_v2i64 (v2f64 VR128:$src1))),
- (memopv2i64 addr:$src2)))]>;
+let isAsmParserOnly = 1 in {
+ def VMOVNTPSmr_Int : VPSI<0x2B, MRMDestMem, (outs),
+ (ins i128mem:$dst, VR128:$src),
+ "movntps\t{$src, $dst|$dst, $src}",
+ [(int_x86_sse_movnt_ps addr:$dst, VR128:$src)]>, VEX;
+ def VMOVNTPDmr_Int : VPDI<0x2B, MRMDestMem, (outs),
+ (ins i128mem:$dst, VR128:$src),
+ "movntpd\t{$src, $dst|$dst, $src}",
+ [(int_x86_sse2_movnt_pd addr:$dst, VR128:$src)]>, VEX;
+
+ let ExeDomain = SSEPackedInt in
+ def VMOVNTDQmr_Int : VPDI<0xE7, MRMDestMem, (outs),
+ (ins f128mem:$dst, VR128:$src),
+ "movntdq\t{$src, $dst|$dst, $src}",
+ [(int_x86_sse2_movnt_dq addr:$dst, VR128:$src)]>, VEX;
+
+ let AddedComplexity = 400 in { // Prefer non-temporal versions
+ def VMOVNTPSmr : VPSI<0x2B, MRMDestMem, (outs),
+ (ins f128mem:$dst, VR128:$src),
+ "movntps\t{$src, $dst|$dst, $src}",
+ [(alignednontemporalstore (v4f32 VR128:$src),
+ addr:$dst)]>, VEX;
+ def VMOVNTPDmr : VPDI<0x2B, MRMDestMem, (outs),
+ (ins f128mem:$dst, VR128:$src),
+ "movntpd\t{$src, $dst|$dst, $src}",
+ [(alignednontemporalstore (v2f64 VR128:$src),
+ addr:$dst)]>, VEX;
+ def VMOVNTDQ_64mr : VPDI<0xE7, MRMDestMem, (outs),
+ (ins f128mem:$dst, VR128:$src),
+ "movntdq\t{$src, $dst|$dst, $src}",
+ [(alignednontemporalstore (v2f64 VR128:$src),
+ addr:$dst)]>, VEX;
+ let ExeDomain = SSEPackedInt in
+ def VMOVNTDQmr : VPDI<0xE7, MRMDestMem, (outs),
+ (ins f128mem:$dst, VR128:$src),
+ "movntdq\t{$src, $dst|$dst, $src}",
+ [(alignednontemporalstore (v4f32 VR128:$src),
+ addr:$dst)]>, VEX;
+
+ def VMOVNTPSYmr : VPSI<0x2B, MRMDestMem, (outs),
+ (ins f256mem:$dst, VR256:$src),
+ "movntps\t{$src, $dst|$dst, $src}",
+ [(alignednontemporalstore (v8f32 VR256:$src),
+ addr:$dst)]>, VEX;
+ def VMOVNTPDYmr : VPDI<0x2B, MRMDestMem, (outs),
+ (ins f256mem:$dst, VR256:$src),
+ "movntpd\t{$src, $dst|$dst, $src}",
+ [(alignednontemporalstore (v4f64 VR256:$src),
+ addr:$dst)]>, VEX;
+ def VMOVNTDQY_64mr : VPDI<0xE7, MRMDestMem, (outs),
+ (ins f256mem:$dst, VR256:$src),
+ "movntdq\t{$src, $dst|$dst, $src}",
+ [(alignednontemporalstore (v4f64 VR256:$src),
+ addr:$dst)]>, VEX;
+ let ExeDomain = SSEPackedInt in
+ def VMOVNTDQYmr : VPDI<0xE7, MRMDestMem, (outs),
+ (ins f256mem:$dst, VR256:$src),
+ "movntdq\t{$src, $dst|$dst, $src}",
+ [(alignednontemporalstore (v8f32 VR256:$src),
+ addr:$dst)]>, VEX;
+ }
}
-let Constraints = "$src1 = $dst" in {
- def CMPPDrri : PDIi8<0xC2, MRMSrcReg,
- (outs VR128:$dst), (ins VR128:$src1, VR128:$src, SSECC:$cc),
- "cmp${cc}pd\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst, (int_x86_sse2_cmp_pd VR128:$src1,
- VR128:$src, imm:$cc))]>;
- def CMPPDrmi : PDIi8<0xC2, MRMSrcMem,
- (outs VR128:$dst), (ins VR128:$src1, f128mem:$src, SSECC:$cc),
- "cmp${cc}pd\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst, (int_x86_sse2_cmp_pd VR128:$src1,
- (memop addr:$src), imm:$cc))]>;
+def MOVNTPSmr_Int : PSI<0x2B, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src),
+ "movntps\t{$src, $dst|$dst, $src}",
+ [(int_x86_sse_movnt_ps addr:$dst, VR128:$src)]>;
+def MOVNTPDmr_Int : PDI<0x2B, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src),
+ "movntpd\t{$src, $dst|$dst, $src}",
+ [(int_x86_sse2_movnt_pd addr:$dst, VR128:$src)]>;
+
+let ExeDomain = SSEPackedInt in
+def MOVNTDQmr_Int : PDI<0xE7, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
+ "movntdq\t{$src, $dst|$dst, $src}",
+ [(int_x86_sse2_movnt_dq addr:$dst, VR128:$src)]>;
+
+let AddedComplexity = 400 in { // Prefer non-temporal versions
+def MOVNTPSmr : PSI<0x2B, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
+ "movntps\t{$src, $dst|$dst, $src}",
+ [(alignednontemporalstore (v4f32 VR128:$src), addr:$dst)]>;
+def MOVNTPDmr : PDI<0x2B, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
+ "movntpd\t{$src, $dst|$dst, $src}",
+ [(alignednontemporalstore(v2f64 VR128:$src), addr:$dst)]>;
+
+def MOVNTDQ_64mr : PDI<0xE7, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
+ "movntdq\t{$src, $dst|$dst, $src}",
+ [(alignednontemporalstore (v2f64 VR128:$src), addr:$dst)]>;
+
+let ExeDomain = SSEPackedInt in
+def MOVNTDQmr : PDI<0xE7, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
+ "movntdq\t{$src, $dst|$dst, $src}",
+ [(alignednontemporalstore (v4f32 VR128:$src), addr:$dst)]>;
+
+// There is no AVX form for instructions below this point
+def MOVNTImr : I<0xC3, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src),
+ "movnti\t{$src, $dst|$dst, $src}",
+ [(nontemporalstore (i32 GR32:$src), addr:$dst)]>,
+ TB, Requires<[HasSSE2]>;
+
+def MOVNTI_64mr : RI<0xC3, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src),
+ "movnti\t{$src, $dst|$dst, $src}",
+ [(nontemporalstore (i64 GR64:$src), addr:$dst)]>,
+ TB, Requires<[HasSSE2]>;
- // Accept explicit immediate argument form instead of comparison code.
-let isAsmParserOnly = 1 in {
- def CMPPDrri_alt : PDIi8<0xC2, MRMSrcReg,
- (outs VR128:$dst), (ins VR128:$src1, VR128:$src, i8imm:$src2),
- "cmppd\t{$src2, $src, $dst|$dst, $src, $src2}", []>;
- def CMPPDrmi_alt : PDIi8<0xC2, MRMSrcMem,
- (outs VR128:$dst), (ins VR128:$src1, f128mem:$src, i8imm:$src2),
- "cmppd\t{$src2, $src, $dst|$dst, $src, $src2}", []>;
}
+def MOVNTImr_Int : I<0xC3, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src),
+ "movnti\t{$src, $dst|$dst, $src}",
+ [(int_x86_sse2_movnt_i addr:$dst, GR32:$src)]>,
+ TB, Requires<[HasSSE2]>;
+
+//===----------------------------------------------------------------------===//
+// SSE 1 & 2 - Misc Instructions (No AVX form)
+//===----------------------------------------------------------------------===//
+
+// Prefetch intrinsic.
+def PREFETCHT0 : PSI<0x18, MRM1m, (outs), (ins i8mem:$src),
+ "prefetcht0\t$src", [(prefetch addr:$src, imm, (i32 3))]>;
+def PREFETCHT1 : PSI<0x18, MRM2m, (outs), (ins i8mem:$src),
+ "prefetcht1\t$src", [(prefetch addr:$src, imm, (i32 2))]>;
+def PREFETCHT2 : PSI<0x18, MRM3m, (outs), (ins i8mem:$src),
+ "prefetcht2\t$src", [(prefetch addr:$src, imm, (i32 1))]>;
+def PREFETCHNTA : PSI<0x18, MRM0m, (outs), (ins i8mem:$src),
+ "prefetchnta\t$src", [(prefetch addr:$src, imm, (i32 0))]>;
+
+// Load, store, and memory fence
+def SFENCE : I<0xAE, MRM_F8, (outs), (ins), "sfence", [(int_x86_sse_sfence)]>,
+ TB, Requires<[HasSSE1]>;
+
+// Alias instructions that map zero vector to pxor / xorp* for sse.
+// We set canFoldAsLoad because this can be converted to a constant-pool
+// load of an all-zeros value if folding it would be beneficial.
+// FIXME: Change encoding to pseudo!
+let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
+ isCodeGenOnly = 1 in {
+def V_SET0PS : PSI<0x57, MRMInitReg, (outs VR128:$dst), (ins), "",
+ [(set VR128:$dst, (v4f32 immAllZerosV))]>;
+def V_SET0PD : PDI<0x57, MRMInitReg, (outs VR128:$dst), (ins), "",
+ [(set VR128:$dst, (v2f64 immAllZerosV))]>;
+let ExeDomain = SSEPackedInt in
+def V_SET0PI : PDI<0xEF, MRMInitReg, (outs VR128:$dst), (ins), "",
+ [(set VR128:$dst, (v4i32 immAllZerosV))]>;
}
-def : Pat<(v2i64 (X86cmppd (v2f64 VR128:$src1), VR128:$src2, imm:$cc)),
- (CMPPDrri VR128:$src1, VR128:$src2, imm:$cc)>;
-def : Pat<(v2i64 (X86cmppd (v2f64 VR128:$src1), (memop addr:$src2), imm:$cc)),
- (CMPPDrmi VR128:$src1, addr:$src2, imm:$cc)>;
-// Shuffle and unpack instructions
-let Constraints = "$src1 = $dst" in {
- def SHUFPDrri : PDIi8<0xC6, MRMSrcReg,
- (outs VR128:$dst), (ins VR128:$src1, VR128:$src2, i8imm:$src3),
- "shufpd\t{$src3, $src2, $dst|$dst, $src2, $src3}",
- [(set VR128:$dst,
- (v2f64 (shufp:$src3 VR128:$src1, VR128:$src2)))]>;
- def SHUFPDrmi : PDIi8<0xC6, MRMSrcMem,
- (outs VR128:$dst), (ins VR128:$src1,
- f128mem:$src2, i8imm:$src3),
- "shufpd\t{$src3, $src2, $dst|$dst, $src2, $src3}",
- [(set VR128:$dst,
- (v2f64 (shufp:$src3
- VR128:$src1, (memopv2f64 addr:$src2))))]>;
+def : Pat<(v2i64 immAllZerosV), (V_SET0PI)>;
+def : Pat<(v8i16 immAllZerosV), (V_SET0PI)>;
+def : Pat<(v16i8 immAllZerosV), (V_SET0PI)>;
- let AddedComplexity = 10 in {
- def UNPCKHPDrr : PDI<0x15, MRMSrcReg,
- (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
- "unpckhpd\t{$src2, $dst|$dst, $src2}",
- [(set VR128:$dst,
- (v2f64 (unpckh VR128:$src1, VR128:$src2)))]>;
- def UNPCKHPDrm : PDI<0x15, MRMSrcMem,
- (outs VR128:$dst), (ins VR128:$src1, f128mem:$src2),
- "unpckhpd\t{$src2, $dst|$dst, $src2}",
- [(set VR128:$dst,
- (v2f64 (unpckh VR128:$src1,
- (memopv2f64 addr:$src2))))]>;
-
- def UNPCKLPDrr : PDI<0x14, MRMSrcReg,
- (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
- "unpcklpd\t{$src2, $dst|$dst, $src2}",
- [(set VR128:$dst,
- (v2f64 (unpckl VR128:$src1, VR128:$src2)))]>;
- def UNPCKLPDrm : PDI<0x14, MRMSrcMem,
- (outs VR128:$dst), (ins VR128:$src1, f128mem:$src2),
- "unpcklpd\t{$src2, $dst|$dst, $src2}",
- [(set VR128:$dst,
- (unpckl VR128:$src1, (memopv2f64 addr:$src2)))]>;
- } // AddedComplexity
-} // Constraints = "$src1 = $dst"
+def : Pat<(f32 (vector_extract (v4f32 VR128:$src), (iPTR 0))),
+ (f32 (EXTRACT_SUBREG (v4f32 VR128:$src), sub_ss))>;
+
+//===----------------------------------------------------------------------===//
+// SSE 1 & 2 - Load/Store XCSR register
+//===----------------------------------------------------------------------===//
+let isAsmParserOnly = 1 in {
+ def VLDMXCSR : VPSI<0xAE, MRM2m, (outs), (ins i32mem:$src),
+ "ldmxcsr\t$src", [(int_x86_sse_ldmxcsr addr:$src)]>, VEX;
+ def VSTMXCSR : VPSI<0xAE, MRM3m, (outs), (ins i32mem:$dst),
+ "stmxcsr\t$dst", [(int_x86_sse_stmxcsr addr:$dst)]>, VEX;
+}
+
+def LDMXCSR : PSI<0xAE, MRM2m, (outs), (ins i32mem:$src),
+ "ldmxcsr\t$src", [(int_x86_sse_ldmxcsr addr:$src)]>;
+def STMXCSR : PSI<0xAE, MRM3m, (outs), (ins i32mem:$dst),
+ "stmxcsr\t$dst", [(int_x86_sse_stmxcsr addr:$dst)]>;
//===---------------------------------------------------------------------===//
-// SSE integer instructions
-let ExeDomain = SSEPackedInt in {
+// SSE2 - Move Aligned/Unaligned Packed Integer Instructions
+//===---------------------------------------------------------------------===//
+let ExeDomain = SSEPackedInt in { // SSE integer instructions
+
+let isAsmParserOnly = 1 in {
+ let neverHasSideEffects = 1 in
+ def VMOVDQArr : VPDI<0x6F, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+ "movdqa\t{$src, $dst|$dst, $src}", []>, VEX;
+ def VMOVDQUrr : VPDI<0x6F, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+ "movdqu\t{$src, $dst|$dst, $src}", []>, XS, VEX;
+
+ let canFoldAsLoad = 1, mayLoad = 1 in {
+ def VMOVDQArm : VPDI<0x6F, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
+ "movdqa\t{$src, $dst|$dst, $src}",
+ [/*(set VR128:$dst, (alignedloadv2i64 addr:$src))*/]>,
+ VEX;
+ def VMOVDQUrm : I<0x6F, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
+ "vmovdqu\t{$src, $dst|$dst, $src}",
+ [/*(set VR128:$dst, (loadv2i64 addr:$src))*/]>,
+ XS, VEX, Requires<[HasAVX]>;
+ }
+
+ let mayStore = 1 in {
+ def VMOVDQAmr : VPDI<0x7F, MRMDestMem, (outs),
+ (ins i128mem:$dst, VR128:$src),
+ "movdqa\t{$src, $dst|$dst, $src}",
+ [/*(alignedstore (v2i64 VR128:$src), addr:$dst)*/]>, VEX;
+ def VMOVDQUmr : I<0x7F, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src),
+ "vmovdqu\t{$src, $dst|$dst, $src}",
+ [/*(store (v2i64 VR128:$src), addr:$dst)*/]>,
+ XS, VEX, Requires<[HasAVX]>;
+ }
+}
-// Move Instructions
let neverHasSideEffects = 1 in
def MOVDQArr : PDI<0x6F, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"movdqa\t{$src, $dst|$dst, $src}", []>;
-let canFoldAsLoad = 1, mayLoad = 1 in
+
+let canFoldAsLoad = 1, mayLoad = 1 in {
def MOVDQArm : PDI<0x6F, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
"movdqa\t{$src, $dst|$dst, $src}",
[/*(set VR128:$dst, (alignedloadv2i64 addr:$src))*/]>;
-let mayStore = 1 in
-def MOVDQAmr : PDI<0x7F, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src),
- "movdqa\t{$src, $dst|$dst, $src}",
- [/*(alignedstore (v2i64 VR128:$src), addr:$dst)*/]>;
-let canFoldAsLoad = 1, mayLoad = 1 in
def MOVDQUrm : I<0x6F, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
"movdqu\t{$src, $dst|$dst, $src}",
[/*(set VR128:$dst, (loadv2i64 addr:$src))*/]>,
XS, Requires<[HasSSE2]>;
-let mayStore = 1 in
+}
+
+let mayStore = 1 in {
+def MOVDQAmr : PDI<0x7F, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src),
+ "movdqa\t{$src, $dst|$dst, $src}",
+ [/*(alignedstore (v2i64 VR128:$src), addr:$dst)*/]>;
def MOVDQUmr : I<0x7F, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src),
"movdqu\t{$src, $dst|$dst, $src}",
[/*(store (v2i64 VR128:$src), addr:$dst)*/]>,
XS, Requires<[HasSSE2]>;
+}
// Intrinsic forms of MOVDQU load and store
+let isAsmParserOnly = 1 in {
+let canFoldAsLoad = 1 in
+def VMOVDQUrm_Int : I<0x6F, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
+ "vmovdqu\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (int_x86_sse2_loadu_dq addr:$src))]>,
+ XS, VEX, Requires<[HasAVX]>;
+def VMOVDQUmr_Int : I<0x7F, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src),
+ "vmovdqu\t{$src, $dst|$dst, $src}",
+ [(int_x86_sse2_storeu_dq addr:$dst, VR128:$src)]>,
+ XS, VEX, Requires<[HasAVX]>;
+}
+
let canFoldAsLoad = 1 in
def MOVDQUrm_Int : I<0x6F, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
"movdqu\t{$src, $dst|$dst, $src}",
@@ -2019,55 +2021,72 @@ def MOVDQUmr_Int : I<0x7F, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src),
[(int_x86_sse2_storeu_dq addr:$dst, VR128:$src)]>,
XS, Requires<[HasSSE2]>;
-let Constraints = "$src1 = $dst" in {
+} // ExeDomain = SSEPackedInt
+
+//===---------------------------------------------------------------------===//
+// SSE2 - Packed Integer Arithmetic Instructions
+//===---------------------------------------------------------------------===//
+
+let ExeDomain = SSEPackedInt in { // SSE integer instructions
multiclass PDI_binop_rm_int<bits<8> opc, string OpcodeStr, Intrinsic IntId,
- bit Commutable = 0> {
+ bit IsCommutable = 0, bit Is2Addr = 1> {
+ let isCommutable = IsCommutable in
def rr : PDI<opc, MRMSrcReg, (outs VR128:$dst),
- (ins VR128:$src1, VR128:$src2),
- !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
- [(set VR128:$dst, (IntId VR128:$src1, VR128:$src2))]> {
- let isCommutable = Commutable;
- }
+ (ins VR128:$src1, VR128:$src2),
+ !if(Is2Addr,
+ !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
+ [(set VR128:$dst, (IntId VR128:$src1, VR128:$src2))]>;
def rm : PDI<opc, MRMSrcMem, (outs VR128:$dst),
- (ins VR128:$src1, i128mem:$src2),
- !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
- [(set VR128:$dst, (IntId VR128:$src1,
- (bitconvert (memopv2i64
- addr:$src2))))]>;
+ (ins VR128:$src1, i128mem:$src2),
+ !if(Is2Addr,
+ !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
+ [(set VR128:$dst, (IntId VR128:$src1,
+ (bitconvert (memopv2i64 addr:$src2))))]>;
}
multiclass PDI_binop_rmi_int<bits<8> opc, bits<8> opc2, Format ImmForm,
- string OpcodeStr,
- Intrinsic IntId, Intrinsic IntId2> {
+ string OpcodeStr, Intrinsic IntId,
+ Intrinsic IntId2, bit Is2Addr = 1> {
def rr : PDI<opc, MRMSrcReg, (outs VR128:$dst),
- (ins VR128:$src1, VR128:$src2),
- !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
- [(set VR128:$dst, (IntId VR128:$src1, VR128:$src2))]>;
+ (ins VR128:$src1, VR128:$src2),
+ !if(Is2Addr,
+ !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
+ [(set VR128:$dst, (IntId VR128:$src1, VR128:$src2))]>;
def rm : PDI<opc, MRMSrcMem, (outs VR128:$dst),
- (ins VR128:$src1, i128mem:$src2),
- !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
- [(set VR128:$dst, (IntId VR128:$src1,
+ (ins VR128:$src1, i128mem:$src2),
+ !if(Is2Addr,
+ !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
+ [(set VR128:$dst, (IntId VR128:$src1,
(bitconvert (memopv2i64 addr:$src2))))]>;
def ri : PDIi8<opc2, ImmForm, (outs VR128:$dst),
- (ins VR128:$src1, i32i8imm:$src2),
- !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
- [(set VR128:$dst, (IntId2 VR128:$src1, (i32 imm:$src2)))]>;
+ (ins VR128:$src1, i32i8imm:$src2),
+ !if(Is2Addr,
+ !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
+ [(set VR128:$dst, (IntId2 VR128:$src1, (i32 imm:$src2)))]>;
}
/// PDI_binop_rm - Simple SSE2 binary operator.
multiclass PDI_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
- ValueType OpVT, bit Commutable = 0> {
+ ValueType OpVT, bit IsCommutable = 0, bit Is2Addr = 1> {
+ let isCommutable = IsCommutable in
def rr : PDI<opc, MRMSrcReg, (outs VR128:$dst),
- (ins VR128:$src1, VR128:$src2),
- !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
- [(set VR128:$dst, (OpVT (OpNode VR128:$src1, VR128:$src2)))]> {
- let isCommutable = Commutable;
- }
+ (ins VR128:$src1, VR128:$src2),
+ !if(Is2Addr,
+ !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
+ [(set VR128:$dst, (OpVT (OpNode VR128:$src1, VR128:$src2)))]>;
def rm : PDI<opc, MRMSrcMem, (outs VR128:$dst),
- (ins VR128:$src1, i128mem:$src2),
- !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
- [(set VR128:$dst, (OpVT (OpNode VR128:$src1,
+ (ins VR128:$src1, i128mem:$src2),
+ !if(Is2Addr,
+ !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
+ [(set VR128:$dst, (OpVT (OpNode VR128:$src1,
(bitconvert (memopv2i64 addr:$src2)))))]>;
}
@@ -2077,64 +2096,177 @@ multiclass PDI_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
/// to collapse (bitconvert VT to VT) into its operand.
///
multiclass PDI_binop_rm_v2i64<bits<8> opc, string OpcodeStr, SDNode OpNode,
- bit Commutable = 0> {
+ bit IsCommutable = 0, bit Is2Addr = 1> {
+ let isCommutable = IsCommutable in
def rr : PDI<opc, MRMSrcReg, (outs VR128:$dst),
- (ins VR128:$src1, VR128:$src2),
- !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
- [(set VR128:$dst, (v2i64 (OpNode VR128:$src1, VR128:$src2)))]> {
- let isCommutable = Commutable;
- }
+ (ins VR128:$src1, VR128:$src2),
+ !if(Is2Addr,
+ !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
+ [(set VR128:$dst, (v2i64 (OpNode VR128:$src1, VR128:$src2)))]>;
def rm : PDI<opc, MRMSrcMem, (outs VR128:$dst),
- (ins VR128:$src1, i128mem:$src2),
- !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
- [(set VR128:$dst, (OpNode VR128:$src1,
- (memopv2i64 addr:$src2)))]>;
+ (ins VR128:$src1, i128mem:$src2),
+ !if(Is2Addr,
+ !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
+ [(set VR128:$dst, (OpNode VR128:$src1, (memopv2i64 addr:$src2)))]>;
}
-} // Constraints = "$src1 = $dst"
} // ExeDomain = SSEPackedInt
// 128-bit Integer Arithmetic
-defm PADDB : PDI_binop_rm<0xFC, "paddb", add, v16i8, 1>;
-defm PADDW : PDI_binop_rm<0xFD, "paddw", add, v8i16, 1>;
-defm PADDD : PDI_binop_rm<0xFE, "paddd", add, v4i32, 1>;
-defm PADDQ : PDI_binop_rm_v2i64<0xD4, "paddq", add, 1>;
-
-defm PADDSB : PDI_binop_rm_int<0xEC, "paddsb" , int_x86_sse2_padds_b, 1>;
-defm PADDSW : PDI_binop_rm_int<0xED, "paddsw" , int_x86_sse2_padds_w, 1>;
-defm PADDUSB : PDI_binop_rm_int<0xDC, "paddusb", int_x86_sse2_paddus_b, 1>;
-defm PADDUSW : PDI_binop_rm_int<0xDD, "paddusw", int_x86_sse2_paddus_w, 1>;
+let isAsmParserOnly = 1, Predicates = [HasAVX] in {
+defm VPADDB : PDI_binop_rm<0xFC, "vpaddb", add, v16i8, 1, 0 /*3addr*/>, VEX_4V;
+defm VPADDW : PDI_binop_rm<0xFD, "vpaddw", add, v8i16, 1, 0>, VEX_4V;
+defm VPADDD : PDI_binop_rm<0xFE, "vpaddd", add, v4i32, 1, 0>, VEX_4V;
+defm VPADDQ : PDI_binop_rm_v2i64<0xD4, "vpaddq", add, 1, 0>, VEX_4V;
+defm VPMULLW : PDI_binop_rm<0xD5, "vpmullw", mul, v8i16, 1, 0>, VEX_4V;
+defm VPSUBB : PDI_binop_rm<0xF8, "vpsubb", sub, v16i8, 0, 0>, VEX_4V;
+defm VPSUBW : PDI_binop_rm<0xF9, "vpsubw", sub, v8i16, 0, 0>, VEX_4V;
+defm VPSUBD : PDI_binop_rm<0xFA, "vpsubd", sub, v4i32, 0, 0>, VEX_4V;
+defm VPSUBQ : PDI_binop_rm_v2i64<0xFB, "vpsubq", sub, 0, 0>, VEX_4V;
+
+// Intrinsic forms
+defm VPSUBSB : PDI_binop_rm_int<0xE8, "vpsubsb" , int_x86_sse2_psubs_b, 0, 0>,
+ VEX_4V;
+defm VPSUBSW : PDI_binop_rm_int<0xE9, "vpsubsw" , int_x86_sse2_psubs_w, 0, 0>,
+ VEX_4V;
+defm VPSUBUSB : PDI_binop_rm_int<0xD8, "vpsubusb", int_x86_sse2_psubus_b, 0, 0>,
+ VEX_4V;
+defm VPSUBUSW : PDI_binop_rm_int<0xD9, "vpsubusw", int_x86_sse2_psubus_w, 0, 0>,
+ VEX_4V;
+defm VPADDSB : PDI_binop_rm_int<0xEC, "vpaddsb" , int_x86_sse2_padds_b, 1, 0>,
+ VEX_4V;
+defm VPADDSW : PDI_binop_rm_int<0xED, "vpaddsw" , int_x86_sse2_padds_w, 1, 0>,
+ VEX_4V;
+defm VPADDUSB : PDI_binop_rm_int<0xDC, "vpaddusb", int_x86_sse2_paddus_b, 1, 0>,
+ VEX_4V;
+defm VPADDUSW : PDI_binop_rm_int<0xDD, "vpaddusw", int_x86_sse2_paddus_w, 1, 0>,
+ VEX_4V;
+defm VPMULHUW : PDI_binop_rm_int<0xE4, "vpmulhuw", int_x86_sse2_pmulhu_w, 1, 0>,
+ VEX_4V;
+defm VPMULHW : PDI_binop_rm_int<0xE5, "vpmulhw" , int_x86_sse2_pmulh_w, 1, 0>,
+ VEX_4V;
+defm VPMULUDQ : PDI_binop_rm_int<0xF4, "vpmuludq", int_x86_sse2_pmulu_dq, 1, 0>,
+ VEX_4V;
+defm VPMADDWD : PDI_binop_rm_int<0xF5, "vpmaddwd", int_x86_sse2_pmadd_wd, 1, 0>,
+ VEX_4V;
+defm VPAVGB : PDI_binop_rm_int<0xE0, "vpavgb", int_x86_sse2_pavg_b, 1, 0>,
+ VEX_4V;
+defm VPAVGW : PDI_binop_rm_int<0xE3, "vpavgw", int_x86_sse2_pavg_w, 1, 0>,
+ VEX_4V;
+defm VPMINUB : PDI_binop_rm_int<0xDA, "vpminub", int_x86_sse2_pminu_b, 1, 0>,
+ VEX_4V;
+defm VPMINSW : PDI_binop_rm_int<0xEA, "vpminsw", int_x86_sse2_pmins_w, 1, 0>,
+ VEX_4V;
+defm VPMAXUB : PDI_binop_rm_int<0xDE, "vpmaxub", int_x86_sse2_pmaxu_b, 1, 0>,
+ VEX_4V;
+defm VPMAXSW : PDI_binop_rm_int<0xEE, "vpmaxsw", int_x86_sse2_pmaxs_w, 1, 0>,
+ VEX_4V;
+defm VPSADBW : PDI_binop_rm_int<0xF6, "vpsadbw", int_x86_sse2_psad_bw, 1, 0>,
+ VEX_4V;
+}
+let Constraints = "$src1 = $dst" in {
+defm PADDB : PDI_binop_rm<0xFC, "paddb", add, v16i8, 1>;
+defm PADDW : PDI_binop_rm<0xFD, "paddw", add, v8i16, 1>;
+defm PADDD : PDI_binop_rm<0xFE, "paddd", add, v4i32, 1>;
+defm PADDQ : PDI_binop_rm_v2i64<0xD4, "paddq", add, 1>;
+defm PMULLW : PDI_binop_rm<0xD5, "pmullw", mul, v8i16, 1>;
defm PSUBB : PDI_binop_rm<0xF8, "psubb", sub, v16i8>;
defm PSUBW : PDI_binop_rm<0xF9, "psubw", sub, v8i16>;
defm PSUBD : PDI_binop_rm<0xFA, "psubd", sub, v4i32>;
defm PSUBQ : PDI_binop_rm_v2i64<0xFB, "psubq", sub>;
+// Intrinsic forms
defm PSUBSB : PDI_binop_rm_int<0xE8, "psubsb" , int_x86_sse2_psubs_b>;
defm PSUBSW : PDI_binop_rm_int<0xE9, "psubsw" , int_x86_sse2_psubs_w>;
defm PSUBUSB : PDI_binop_rm_int<0xD8, "psubusb", int_x86_sse2_psubus_b>;
defm PSUBUSW : PDI_binop_rm_int<0xD9, "psubusw", int_x86_sse2_psubus_w>;
-
-defm PMULLW : PDI_binop_rm<0xD5, "pmullw", mul, v8i16, 1>;
-
+defm PADDSB : PDI_binop_rm_int<0xEC, "paddsb" , int_x86_sse2_padds_b, 1>;
+defm PADDSW : PDI_binop_rm_int<0xED, "paddsw" , int_x86_sse2_padds_w, 1>;
+defm PADDUSB : PDI_binop_rm_int<0xDC, "paddusb", int_x86_sse2_paddus_b, 1>;
+defm PADDUSW : PDI_binop_rm_int<0xDD, "paddusw", int_x86_sse2_paddus_w, 1>;
defm PMULHUW : PDI_binop_rm_int<0xE4, "pmulhuw", int_x86_sse2_pmulhu_w, 1>;
-defm PMULHW : PDI_binop_rm_int<0xE5, "pmulhw" , int_x86_sse2_pmulh_w , 1>;
+defm PMULHW : PDI_binop_rm_int<0xE5, "pmulhw" , int_x86_sse2_pmulh_w, 1>;
defm PMULUDQ : PDI_binop_rm_int<0xF4, "pmuludq", int_x86_sse2_pmulu_dq, 1>;
-
defm PMADDWD : PDI_binop_rm_int<0xF5, "pmaddwd", int_x86_sse2_pmadd_wd, 1>;
+defm PAVGB : PDI_binop_rm_int<0xE0, "pavgb", int_x86_sse2_pavg_b, 1>;
+defm PAVGW : PDI_binop_rm_int<0xE3, "pavgw", int_x86_sse2_pavg_w, 1>;
+defm PMINUB : PDI_binop_rm_int<0xDA, "pminub", int_x86_sse2_pminu_b, 1>;
+defm PMINSW : PDI_binop_rm_int<0xEA, "pminsw", int_x86_sse2_pmins_w, 1>;
+defm PMAXUB : PDI_binop_rm_int<0xDE, "pmaxub", int_x86_sse2_pmaxu_b, 1>;
+defm PMAXSW : PDI_binop_rm_int<0xEE, "pmaxsw", int_x86_sse2_pmaxs_w, 1>;
+defm PSADBW : PDI_binop_rm_int<0xF6, "psadbw", int_x86_sse2_psad_bw, 1>;
+
+} // Constraints = "$src1 = $dst"
-defm PAVGB : PDI_binop_rm_int<0xE0, "pavgb", int_x86_sse2_pavg_b, 1>;
-defm PAVGW : PDI_binop_rm_int<0xE3, "pavgw", int_x86_sse2_pavg_w, 1>;
+//===---------------------------------------------------------------------===//
+// SSE2 - Packed Integer Logical Instructions
+//===---------------------------------------------------------------------===//
+let isAsmParserOnly = 1, Predicates = [HasAVX] in {
+defm VPSLLW : PDI_binop_rmi_int<0xF1, 0x71, MRM6r, "vpsllw",
+ int_x86_sse2_psll_w, int_x86_sse2_pslli_w, 0>,
+ VEX_4V;
+defm VPSLLD : PDI_binop_rmi_int<0xF2, 0x72, MRM6r, "vpslld",
+ int_x86_sse2_psll_d, int_x86_sse2_pslli_d, 0>,
+ VEX_4V;
+defm VPSLLQ : PDI_binop_rmi_int<0xF3, 0x73, MRM6r, "vpsllq",
+ int_x86_sse2_psll_q, int_x86_sse2_pslli_q, 0>,
+ VEX_4V;
+
+defm VPSRLW : PDI_binop_rmi_int<0xD1, 0x71, MRM2r, "vpsrlw",
+ int_x86_sse2_psrl_w, int_x86_sse2_psrli_w, 0>,
+ VEX_4V;
+defm VPSRLD : PDI_binop_rmi_int<0xD2, 0x72, MRM2r, "vpsrld",
+ int_x86_sse2_psrl_d, int_x86_sse2_psrli_d, 0>,
+ VEX_4V;
+defm VPSRLQ : PDI_binop_rmi_int<0xD3, 0x73, MRM2r, "vpsrlq",
+ int_x86_sse2_psrl_q, int_x86_sse2_psrli_q, 0>,
+ VEX_4V;
+
+defm VPSRAW : PDI_binop_rmi_int<0xE1, 0x71, MRM4r, "vpsraw",
+ int_x86_sse2_psra_w, int_x86_sse2_psrai_w, 0>,
+ VEX_4V;
+defm VPSRAD : PDI_binop_rmi_int<0xE2, 0x72, MRM4r, "vpsrad",
+ int_x86_sse2_psra_d, int_x86_sse2_psrai_d, 0>,
+ VEX_4V;
+
+defm VPAND : PDI_binop_rm_v2i64<0xDB, "vpand", and, 1, 0>, VEX_4V;
+defm VPOR : PDI_binop_rm_v2i64<0xEB, "vpor" , or, 1, 0>, VEX_4V;
+defm VPXOR : PDI_binop_rm_v2i64<0xEF, "vpxor", xor, 1, 0>, VEX_4V;
-defm PMINUB : PDI_binop_rm_int<0xDA, "pminub", int_x86_sse2_pminu_b, 1>;
-defm PMINSW : PDI_binop_rm_int<0xEA, "pminsw", int_x86_sse2_pmins_w, 1>;
-defm PMAXUB : PDI_binop_rm_int<0xDE, "pmaxub", int_x86_sse2_pmaxu_b, 1>;
-defm PMAXSW : PDI_binop_rm_int<0xEE, "pmaxsw", int_x86_sse2_pmaxs_w, 1>;
-defm PSADBW : PDI_binop_rm_int<0xF6, "psadbw", int_x86_sse2_psad_bw, 1>;
+let ExeDomain = SSEPackedInt in {
+ let neverHasSideEffects = 1 in {
+ // 128-bit logical shifts.
+ def VPSLLDQri : PDIi8<0x73, MRM7r,
+ (outs VR128:$dst), (ins VR128:$src1, i32i8imm:$src2),
+ "vpslldq\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
+ VEX_4V;
+ def VPSRLDQri : PDIi8<0x73, MRM3r,
+ (outs VR128:$dst), (ins VR128:$src1, i32i8imm:$src2),
+ "vpsrldq\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
+ VEX_4V;
+ // PSRADQri doesn't exist in SSE[1-3].
+ }
+ def VPANDNrr : PDI<0xDF, MRMSrcReg,
+ (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
+ "vpandn\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ [(set VR128:$dst, (v2i64 (and (vnot VR128:$src1),
+ VR128:$src2)))]>, VEX_4V;
+ def VPANDNrm : PDI<0xDF, MRMSrcMem,
+ (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2),
+ "vpandn\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ [(set VR128:$dst, (v2i64 (and (vnot VR128:$src1),
+ (memopv2i64 addr:$src2))))]>,
+ VEX_4V;
+}
+}
+let Constraints = "$src1 = $dst" in {
defm PSLLW : PDI_binop_rmi_int<0xF1, 0x71, MRM6r, "psllw",
int_x86_sse2_psll_w, int_x86_sse2_pslli_w>;
defm PSLLD : PDI_binop_rmi_int<0xF2, 0x72, MRM6r, "pslld",
@@ -2154,17 +2286,34 @@ defm PSRAW : PDI_binop_rmi_int<0xE1, 0x71, MRM4r, "psraw",
defm PSRAD : PDI_binop_rmi_int<0xE2, 0x72, MRM4r, "psrad",
int_x86_sse2_psra_d, int_x86_sse2_psrai_d>;
-// 128-bit logical shifts.
-let Constraints = "$src1 = $dst", neverHasSideEffects = 1,
- ExeDomain = SSEPackedInt in {
- def PSLLDQri : PDIi8<0x73, MRM7r,
- (outs VR128:$dst), (ins VR128:$src1, i32i8imm:$src2),
- "pslldq\t{$src2, $dst|$dst, $src2}", []>;
- def PSRLDQri : PDIi8<0x73, MRM3r,
- (outs VR128:$dst), (ins VR128:$src1, i32i8imm:$src2),
- "psrldq\t{$src2, $dst|$dst, $src2}", []>;
- // PSRADQri doesn't exist in SSE[1-3].
+defm PAND : PDI_binop_rm_v2i64<0xDB, "pand", and, 1>;
+defm POR : PDI_binop_rm_v2i64<0xEB, "por" , or, 1>;
+defm PXOR : PDI_binop_rm_v2i64<0xEF, "pxor", xor, 1>;
+
+let ExeDomain = SSEPackedInt in {
+ let neverHasSideEffects = 1 in {
+ // 128-bit logical shifts.
+ def PSLLDQri : PDIi8<0x73, MRM7r,
+ (outs VR128:$dst), (ins VR128:$src1, i32i8imm:$src2),
+ "pslldq\t{$src2, $dst|$dst, $src2}", []>;
+ def PSRLDQri : PDIi8<0x73, MRM3r,
+ (outs VR128:$dst), (ins VR128:$src1, i32i8imm:$src2),
+ "psrldq\t{$src2, $dst|$dst, $src2}", []>;
+ // PSRADQri doesn't exist in SSE[1-3].
+ }
+ def PANDNrr : PDI<0xDF, MRMSrcReg,
+ (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
+ "pandn\t{$src2, $dst|$dst, $src2}",
+ [(set VR128:$dst, (v2i64 (and (vnot VR128:$src1),
+ VR128:$src2)))]>;
+
+ def PANDNrm : PDI<0xDF, MRMSrcMem,
+ (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2),
+ "pandn\t{$src2, $dst|$dst, $src2}",
+ [(set VR128:$dst, (v2i64 (and (vnot VR128:$src1),
+ (memopv2i64 addr:$src2))))]>;
}
+} // Constraints = "$src1 = $dst"
let Predicates = [HasSSE2] in {
def : Pat<(int_x86_sse2_psll_dq VR128:$src1, imm:$src2),
@@ -2185,32 +2334,33 @@ let Predicates = [HasSSE2] in {
(v2i64 (PSRLDQri VR128:$src, (BYTE_imm imm:$amt)))>;
}
-// Logical
-defm PAND : PDI_binop_rm_v2i64<0xDB, "pand", and, 1>;
-defm POR : PDI_binop_rm_v2i64<0xEB, "por" , or , 1>;
-defm PXOR : PDI_binop_rm_v2i64<0xEF, "pxor", xor, 1>;
-
-let Constraints = "$src1 = $dst", ExeDomain = SSEPackedInt in {
- def PANDNrr : PDI<0xDF, MRMSrcReg,
- (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
- "pandn\t{$src2, $dst|$dst, $src2}",
- [(set VR128:$dst, (v2i64 (and (vnot VR128:$src1),
- VR128:$src2)))]>;
+//===---------------------------------------------------------------------===//
+// SSE2 - Packed Integer Comparison Instructions
+//===---------------------------------------------------------------------===//
- def PANDNrm : PDI<0xDF, MRMSrcMem,
- (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2),
- "pandn\t{$src2, $dst|$dst, $src2}",
- [(set VR128:$dst, (v2i64 (and (vnot VR128:$src1),
- (memopv2i64 addr:$src2))))]>;
+let isAsmParserOnly = 1, Predicates = [HasAVX] in {
+ defm VPCMPEQB : PDI_binop_rm_int<0x74, "vpcmpeqb", int_x86_sse2_pcmpeq_b, 1,
+ 0>, VEX_4V;
+ defm VPCMPEQW : PDI_binop_rm_int<0x75, "vpcmpeqw", int_x86_sse2_pcmpeq_w, 1,
+ 0>, VEX_4V;
+ defm VPCMPEQD : PDI_binop_rm_int<0x76, "vpcmpeqd", int_x86_sse2_pcmpeq_d, 1,
+ 0>, VEX_4V;
+ defm VPCMPGTB : PDI_binop_rm_int<0x64, "vpcmpgtb", int_x86_sse2_pcmpgt_b, 0,
+ 0>, VEX_4V;
+ defm VPCMPGTW : PDI_binop_rm_int<0x65, "vpcmpgtw", int_x86_sse2_pcmpgt_w, 0,
+ 0>, VEX_4V;
+ defm VPCMPGTD : PDI_binop_rm_int<0x66, "vpcmpgtd", int_x86_sse2_pcmpgt_d, 0,
+ 0>, VEX_4V;
}
-// SSE2 Integer comparison
-defm PCMPEQB : PDI_binop_rm_int<0x74, "pcmpeqb", int_x86_sse2_pcmpeq_b>;
-defm PCMPEQW : PDI_binop_rm_int<0x75, "pcmpeqw", int_x86_sse2_pcmpeq_w>;
-defm PCMPEQD : PDI_binop_rm_int<0x76, "pcmpeqd", int_x86_sse2_pcmpeq_d>;
-defm PCMPGTB : PDI_binop_rm_int<0x64, "pcmpgtb", int_x86_sse2_pcmpgt_b>;
-defm PCMPGTW : PDI_binop_rm_int<0x65, "pcmpgtw", int_x86_sse2_pcmpgt_w>;
-defm PCMPGTD : PDI_binop_rm_int<0x66, "pcmpgtd", int_x86_sse2_pcmpgt_d>;
+let Constraints = "$src1 = $dst" in {
+ defm PCMPEQB : PDI_binop_rm_int<0x74, "pcmpeqb", int_x86_sse2_pcmpeq_b, 1>;
+ defm PCMPEQW : PDI_binop_rm_int<0x75, "pcmpeqw", int_x86_sse2_pcmpeq_w, 1>;
+ defm PCMPEQD : PDI_binop_rm_int<0x76, "pcmpeqd", int_x86_sse2_pcmpeq_d, 1>;
+ defm PCMPGTB : PDI_binop_rm_int<0x64, "pcmpgtb", int_x86_sse2_pcmpgt_b>;
+ defm PCMPGTW : PDI_binop_rm_int<0x65, "pcmpgtw", int_x86_sse2_pcmpgt_w>;
+ defm PCMPGTD : PDI_binop_rm_int<0x66, "pcmpgtd", int_x86_sse2_pcmpgt_d>;
+} // Constraints = "$src1 = $dst"
def : Pat<(v16i8 (X86pcmpeqb VR128:$src1, VR128:$src2)),
(PCMPEQBrr VR128:$src1, VR128:$src2)>;
@@ -2238,94 +2388,147 @@ def : Pat<(v4i32 (X86pcmpgtd VR128:$src1, VR128:$src2)),
def : Pat<(v4i32 (X86pcmpgtd VR128:$src1, (memop addr:$src2))),
(PCMPGTDrm VR128:$src1, addr:$src2)>;
+//===---------------------------------------------------------------------===//
+// SSE2 - Packed Integer Pack Instructions
+//===---------------------------------------------------------------------===//
-// Pack instructions
+let isAsmParserOnly = 1, Predicates = [HasAVX] in {
+defm VPACKSSWB : PDI_binop_rm_int<0x63, "vpacksswb", int_x86_sse2_packsswb_128,
+ 0, 0>, VEX_4V;
+defm VPACKSSDW : PDI_binop_rm_int<0x6B, "vpackssdw", int_x86_sse2_packssdw_128,
+ 0, 0>, VEX_4V;
+defm VPACKUSWB : PDI_binop_rm_int<0x67, "vpackuswb", int_x86_sse2_packuswb_128,
+ 0, 0>, VEX_4V;
+}
+
+let Constraints = "$src1 = $dst" in {
defm PACKSSWB : PDI_binop_rm_int<0x63, "packsswb", int_x86_sse2_packsswb_128>;
defm PACKSSDW : PDI_binop_rm_int<0x6B, "packssdw", int_x86_sse2_packssdw_128>;
defm PACKUSWB : PDI_binop_rm_int<0x67, "packuswb", int_x86_sse2_packuswb_128>;
+} // Constraints = "$src1 = $dst"
+
+//===---------------------------------------------------------------------===//
+// SSE2 - Packed Integer Shuffle Instructions
+//===---------------------------------------------------------------------===//
let ExeDomain = SSEPackedInt in {
+multiclass sse2_pshuffle<string OpcodeStr, ValueType vt, PatFrag pshuf_frag,
+ PatFrag bc_frag> {
+def ri : Ii8<0x70, MRMSrcReg,
+ (outs VR128:$dst), (ins VR128:$src1, i8imm:$src2),
+ !strconcat(OpcodeStr,
+ "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(set VR128:$dst, (vt (pshuf_frag:$src2 VR128:$src1,
+ (undef))))]>;
+def mi : Ii8<0x70, MRMSrcMem,
+ (outs VR128:$dst), (ins i128mem:$src1, i8imm:$src2),
+ !strconcat(OpcodeStr,
+ "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(set VR128:$dst, (vt (pshuf_frag:$src2
+ (bc_frag (memopv2i64 addr:$src1)),
+ (undef))))]>;
+}
+} // ExeDomain = SSEPackedInt
-// Shuffle and unpack instructions
-let AddedComplexity = 5 in {
-def PSHUFDri : PDIi8<0x70, MRMSrcReg,
- (outs VR128:$dst), (ins VR128:$src1, i8imm:$src2),
- "pshufd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- [(set VR128:$dst, (v4i32 (pshufd:$src2
- VR128:$src1, (undef))))]>;
-def PSHUFDmi : PDIi8<0x70, MRMSrcMem,
- (outs VR128:$dst), (ins i128mem:$src1, i8imm:$src2),
- "pshufd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- [(set VR128:$dst, (v4i32 (pshufd:$src2
- (bc_v4i32 (memopv2i64 addr:$src1)),
- (undef))))]>;
-}
-
-// SSE2 with ImmT == Imm8 and XS prefix.
-def PSHUFHWri : Ii8<0x70, MRMSrcReg,
- (outs VR128:$dst), (ins VR128:$src1, i8imm:$src2),
- "pshufhw\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- [(set VR128:$dst, (v8i16 (pshufhw:$src2 VR128:$src1,
- (undef))))]>,
- XS, Requires<[HasSSE2]>;
-def PSHUFHWmi : Ii8<0x70, MRMSrcMem,
- (outs VR128:$dst), (ins i128mem:$src1, i8imm:$src2),
- "pshufhw\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- [(set VR128:$dst, (v8i16 (pshufhw:$src2
- (bc_v8i16 (memopv2i64 addr:$src1)),
- (undef))))]>,
- XS, Requires<[HasSSE2]>;
-
-// SSE2 with ImmT == Imm8 and XD prefix.
-def PSHUFLWri : Ii8<0x70, MRMSrcReg,
- (outs VR128:$dst), (ins VR128:$src1, i8imm:$src2),
- "pshuflw\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- [(set VR128:$dst, (v8i16 (pshuflw:$src2 VR128:$src1,
- (undef))))]>,
- XD, Requires<[HasSSE2]>;
-def PSHUFLWmi : Ii8<0x70, MRMSrcMem,
- (outs VR128:$dst), (ins i128mem:$src1, i8imm:$src2),
- "pshuflw\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- [(set VR128:$dst, (v8i16 (pshuflw:$src2
- (bc_v8i16 (memopv2i64 addr:$src1)),
- (undef))))]>,
- XD, Requires<[HasSSE2]>;
+let isAsmParserOnly = 1, Predicates = [HasAVX] in {
+ let AddedComplexity = 5 in
+ defm VPSHUFD : sse2_pshuffle<"vpshufd", v4i32, pshufd, bc_v4i32>, OpSize,
+ VEX;
+ // SSE2 with ImmT == Imm8 and XS prefix.
+ defm VPSHUFHW : sse2_pshuffle<"vpshufhw", v8i16, pshufhw, bc_v8i16>, XS,
+ VEX;
-let Constraints = "$src1 = $dst" in {
- def PUNPCKLBWrr : PDI<0x60, MRMSrcReg,
- (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
- "punpcklbw\t{$src2, $dst|$dst, $src2}",
- [(set VR128:$dst,
- (v16i8 (unpckl VR128:$src1, VR128:$src2)))]>;
- def PUNPCKLBWrm : PDI<0x60, MRMSrcMem,
- (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2),
- "punpcklbw\t{$src2, $dst|$dst, $src2}",
- [(set VR128:$dst,
- (unpckl VR128:$src1,
- (bc_v16i8 (memopv2i64 addr:$src2))))]>;
- def PUNPCKLWDrr : PDI<0x61, MRMSrcReg,
- (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
- "punpcklwd\t{$src2, $dst|$dst, $src2}",
+ // SSE2 with ImmT == Imm8 and XD prefix.
+ defm VPSHUFLW : sse2_pshuffle<"vpshuflw", v8i16, pshuflw, bc_v8i16>, XD,
+ VEX;
+}
+
+let Predicates = [HasSSE2] in {
+ let AddedComplexity = 5 in
+ defm PSHUFD : sse2_pshuffle<"pshufd", v4i32, pshufd, bc_v4i32>, TB, OpSize;
+
+ // SSE2 with ImmT == Imm8 and XS prefix.
+ defm PSHUFHW : sse2_pshuffle<"pshufhw", v8i16, pshufhw, bc_v8i16>, XS;
+
+ // SSE2 with ImmT == Imm8 and XD prefix.
+ defm PSHUFLW : sse2_pshuffle<"pshuflw", v8i16, pshuflw, bc_v8i16>, XD;
+}
+
+//===---------------------------------------------------------------------===//
+// SSE2 - Packed Integer Unpack Instructions
+//===---------------------------------------------------------------------===//
+
+let ExeDomain = SSEPackedInt in {
+multiclass sse2_unpack<bits<8> opc, string OpcodeStr, ValueType vt,
+ PatFrag unp_frag, PatFrag bc_frag, bit Is2Addr = 1> {
+ def rr : PDI<opc, MRMSrcReg,
+ (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
+ !if(Is2Addr,
+ !strconcat(OpcodeStr,"\t{$src2, $dst|$dst, $src2}"),
+ !strconcat(OpcodeStr,"\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
+ [(set VR128:$dst, (vt (unp_frag VR128:$src1, VR128:$src2)))]>;
+ def rm : PDI<opc, MRMSrcMem,
+ (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2),
+ !if(Is2Addr,
+ !strconcat(OpcodeStr,"\t{$src2, $dst|$dst, $src2}"),
+ !strconcat(OpcodeStr,"\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
+ [(set VR128:$dst, (unp_frag VR128:$src1,
+ (bc_frag (memopv2i64
+ addr:$src2))))]>;
+}
+
+let isAsmParserOnly = 1, Predicates = [HasAVX] in {
+ defm VPUNPCKLBW : sse2_unpack<0x60, "vpunpcklbw", v16i8, unpckl, bc_v16i8,
+ 0>, VEX_4V;
+ defm VPUNPCKLWD : sse2_unpack<0x61, "vpunpcklwd", v8i16, unpckl, bc_v8i16,
+ 0>, VEX_4V;
+ defm VPUNPCKLDQ : sse2_unpack<0x62, "vpunpckldq", v4i32, unpckl, bc_v4i32,
+ 0>, VEX_4V;
+
+ /// FIXME: we could eliminate this and use sse2_unpack instead if tblgen
+ /// knew to collapse (bitconvert VT to VT) into its operand.
+ def VPUNPCKLQDQrr : PDI<0x6C, MRMSrcReg,
+ (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
+ "vpunpcklqdq\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set VR128:$dst,
- (v8i16 (unpckl VR128:$src1, VR128:$src2)))]>;
- def PUNPCKLWDrm : PDI<0x61, MRMSrcMem,
- (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2),
- "punpcklwd\t{$src2, $dst|$dst, $src2}",
+ (v2i64 (unpckl VR128:$src1, VR128:$src2)))]>, VEX_4V;
+ def VPUNPCKLQDQrm : PDI<0x6C, MRMSrcMem,
+ (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2),
+ "vpunpcklqdq\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set VR128:$dst,
- (unpckl VR128:$src1,
- (bc_v8i16 (memopv2i64 addr:$src2))))]>;
- def PUNPCKLDQrr : PDI<0x62, MRMSrcReg,
- (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
- "punpckldq\t{$src2, $dst|$dst, $src2}",
+ (v2i64 (unpckl VR128:$src1,
+ (memopv2i64 addr:$src2))))]>, VEX_4V;
+
+ defm VPUNPCKHBW : sse2_unpack<0x68, "vpunpckhbw", v16i8, unpckh, bc_v16i8,
+ 0>, VEX_4V;
+ defm VPUNPCKHWD : sse2_unpack<0x69, "vpunpckhwd", v8i16, unpckh, bc_v8i16,
+ 0>, VEX_4V;
+ defm VPUNPCKHDQ : sse2_unpack<0x6A, "vpunpckhdq", v4i32, unpckh, bc_v4i32,
+ 0>, VEX_4V;
+
+ /// FIXME: we could eliminate this and use sse2_unpack instead if tblgen
+ /// knew to collapse (bitconvert VT to VT) into its operand.
+ def VPUNPCKHQDQrr : PDI<0x6D, MRMSrcReg,
+ (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
+ "vpunpckhqdq\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set VR128:$dst,
- (v4i32 (unpckl VR128:$src1, VR128:$src2)))]>;
- def PUNPCKLDQrm : PDI<0x62, MRMSrcMem,
+ (v2i64 (unpckh VR128:$src1, VR128:$src2)))]>, VEX_4V;
+ def VPUNPCKHQDQrm : PDI<0x6D, MRMSrcMem,
(outs VR128:$dst), (ins VR128:$src1, i128mem:$src2),
- "punpckldq\t{$src2, $dst|$dst, $src2}",
+ "vpunpckhqdq\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set VR128:$dst,
- (unpckl VR128:$src1,
- (bc_v4i32 (memopv2i64 addr:$src2))))]>;
+ (v2i64 (unpckh VR128:$src1,
+ (memopv2i64 addr:$src2))))]>, VEX_4V;
+}
+
+let Constraints = "$src1 = $dst" in {
+ defm PUNPCKLBW : sse2_unpack<0x60, "punpcklbw", v16i8, unpckl, bc_v16i8>;
+ defm PUNPCKLWD : sse2_unpack<0x61, "punpcklwd", v8i16, unpckl, bc_v8i16>;
+ defm PUNPCKLDQ : sse2_unpack<0x62, "punpckldq", v4i32, unpckl, bc_v4i32>;
+
+ /// FIXME: we could eliminate this and use sse2_unpack instead if tblgen
+ /// knew to collapse (bitconvert VT to VT) into its operand.
def PUNPCKLQDQrr : PDI<0x6C, MRMSrcReg,
(outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
"punpcklqdq\t{$src2, $dst|$dst, $src2}",
@@ -2338,39 +2541,12 @@ let Constraints = "$src1 = $dst" in {
(v2i64 (unpckl VR128:$src1,
(memopv2i64 addr:$src2))))]>;
- def PUNPCKHBWrr : PDI<0x68, MRMSrcReg,
- (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
- "punpckhbw\t{$src2, $dst|$dst, $src2}",
- [(set VR128:$dst,
- (v16i8 (unpckh VR128:$src1, VR128:$src2)))]>;
- def PUNPCKHBWrm : PDI<0x68, MRMSrcMem,
- (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2),
- "punpckhbw\t{$src2, $dst|$dst, $src2}",
- [(set VR128:$dst,
- (unpckh VR128:$src1,
- (bc_v16i8 (memopv2i64 addr:$src2))))]>;
- def PUNPCKHWDrr : PDI<0x69, MRMSrcReg,
- (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
- "punpckhwd\t{$src2, $dst|$dst, $src2}",
- [(set VR128:$dst,
- (v8i16 (unpckh VR128:$src1, VR128:$src2)))]>;
- def PUNPCKHWDrm : PDI<0x69, MRMSrcMem,
- (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2),
- "punpckhwd\t{$src2, $dst|$dst, $src2}",
- [(set VR128:$dst,
- (unpckh VR128:$src1,
- (bc_v8i16 (memopv2i64 addr:$src2))))]>;
- def PUNPCKHDQrr : PDI<0x6A, MRMSrcReg,
- (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
- "punpckhdq\t{$src2, $dst|$dst, $src2}",
- [(set VR128:$dst,
- (v4i32 (unpckh VR128:$src1, VR128:$src2)))]>;
- def PUNPCKHDQrm : PDI<0x6A, MRMSrcMem,
- (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2),
- "punpckhdq\t{$src2, $dst|$dst, $src2}",
- [(set VR128:$dst,
- (unpckh VR128:$src1,
- (bc_v4i32 (memopv2i64 addr:$src2))))]>;
+ defm PUNPCKHBW : sse2_unpack<0x68, "punpckhbw", v16i8, unpckh, bc_v16i8>;
+ defm PUNPCKHWD : sse2_unpack<0x69, "punpckhwd", v8i16, unpckh, bc_v8i16>;
+ defm PUNPCKHDQ : sse2_unpack<0x6A, "punpckhdq", v4i32, unpckh, bc_v4i32>;
+
+ /// FIXME: we could eliminate this and use sse2_unpack instead if tblgen
+ /// knew to collapse (bitconvert VT to VT) into its operand.
def PUNPCKHQDQrr : PDI<0x6D, MRMSrcReg,
(outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
"punpckhqdq\t{$src2, $dst|$dst, $src2}",
@@ -2384,102 +2560,117 @@ let Constraints = "$src1 = $dst" in {
(memopv2i64 addr:$src2))))]>;
}
-// Extract / Insert
+} // ExeDomain = SSEPackedInt
+
+//===---------------------------------------------------------------------===//
+// SSE2 - Packed Integer Extract and Insert
+//===---------------------------------------------------------------------===//
+
+let ExeDomain = SSEPackedInt in {
+multiclass sse2_pinsrw<bit Is2Addr = 1> {
+ def rri : Ii8<0xC4, MRMSrcReg,
+ (outs VR128:$dst), (ins VR128:$src1,
+ GR32:$src2, i32i8imm:$src3),
+ !if(Is2Addr,
+ "pinsrw\t{$src3, $src2, $dst|$dst, $src2, $src3}",
+ "vpinsrw\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
+ [(set VR128:$dst,
+ (X86pinsrw VR128:$src1, GR32:$src2, imm:$src3))]>;
+ def rmi : Ii8<0xC4, MRMSrcMem,
+ (outs VR128:$dst), (ins VR128:$src1,
+ i16mem:$src2, i32i8imm:$src3),
+ !if(Is2Addr,
+ "pinsrw\t{$src3, $src2, $dst|$dst, $src2, $src3}",
+ "vpinsrw\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
+ [(set VR128:$dst,
+ (X86pinsrw VR128:$src1, (extloadi16 addr:$src2),
+ imm:$src3))]>;
+}
+
+// Extract
+let isAsmParserOnly = 1, Predicates = [HasAVX] in
+def VPEXTRWri : Ii8<0xC5, MRMSrcReg,
+ (outs GR32:$dst), (ins VR128:$src1, i32i8imm:$src2),
+ "vpextrw\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ [(set GR32:$dst, (X86pextrw (v8i16 VR128:$src1),
+ imm:$src2))]>, OpSize, VEX;
def PEXTRWri : PDIi8<0xC5, MRMSrcReg,
(outs GR32:$dst), (ins VR128:$src1, i32i8imm:$src2),
"pextrw\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set GR32:$dst, (X86pextrw (v8i16 VR128:$src1),
imm:$src2))]>;
-let Constraints = "$src1 = $dst" in {
- def PINSRWrri : PDIi8<0xC4, MRMSrcReg,
- (outs VR128:$dst), (ins VR128:$src1,
- GR32:$src2, i32i8imm:$src3),
- "pinsrw\t{$src3, $src2, $dst|$dst, $src2, $src3}",
- [(set VR128:$dst,
- (X86pinsrw VR128:$src1, GR32:$src2, imm:$src3))]>;
- def PINSRWrmi : PDIi8<0xC4, MRMSrcMem,
- (outs VR128:$dst), (ins VR128:$src1,
- i16mem:$src2, i32i8imm:$src3),
- "pinsrw\t{$src3, $src2, $dst|$dst, $src2, $src3}",
- [(set VR128:$dst,
- (X86pinsrw VR128:$src1, (extloadi16 addr:$src2),
- imm:$src3))]>;
-}
-// Mask creation
-def PMOVMSKBrr : PDI<0xD7, MRMSrcReg, (outs GR32:$dst), (ins VR128:$src),
- "pmovmskb\t{$src, $dst|$dst, $src}",
- [(set GR32:$dst, (int_x86_sse2_pmovmskb_128 VR128:$src))]>;
+// Insert
+let isAsmParserOnly = 1, Predicates = [HasAVX] in
+ defm PINSRW : sse2_pinsrw<0>, OpSize, VEX_4V;
-// Conditional store
-let Uses = [EDI] in
-def MASKMOVDQU : PDI<0xF7, MRMSrcReg, (outs), (ins VR128:$src, VR128:$mask),
- "maskmovdqu\t{$mask, $src|$src, $mask}",
- [(int_x86_sse2_maskmov_dqu VR128:$src, VR128:$mask, EDI)]>;
-
-let Uses = [RDI] in
-def MASKMOVDQU64 : PDI<0xF7, MRMSrcReg, (outs), (ins VR128:$src, VR128:$mask),
- "maskmovdqu\t{$mask, $src|$src, $mask}",
- [(int_x86_sse2_maskmov_dqu VR128:$src, VR128:$mask, RDI)]>;
+let Constraints = "$src1 = $dst" in
+ defm VPINSRW : sse2_pinsrw, TB, OpSize;
} // ExeDomain = SSEPackedInt
-// Non-temporal stores
-def MOVNTPDmr_Int : PDI<0x2B, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src),
- "movntpd\t{$src, $dst|$dst, $src}",
- [(int_x86_sse2_movnt_pd addr:$dst, VR128:$src)]>;
-let ExeDomain = SSEPackedInt in
-def MOVNTDQmr_Int : PDI<0xE7, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
- "movntdq\t{$src, $dst|$dst, $src}",
- [(int_x86_sse2_movnt_dq addr:$dst, VR128:$src)]>;
-def MOVNTImr_Int : I<0xC3, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src),
- "movnti\t{$src, $dst|$dst, $src}",
- [(int_x86_sse2_movnt_i addr:$dst, GR32:$src)]>,
- TB, Requires<[HasSSE2]>;
+//===---------------------------------------------------------------------===//
+// SSE2 - Packed Mask Creation
+//===---------------------------------------------------------------------===//
-let AddedComplexity = 400 in { // Prefer non-temporal versions
-def MOVNTPDmr : PDI<0x2B, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
- "movntpd\t{$src, $dst|$dst, $src}",
- [(alignednontemporalstore(v2f64 VR128:$src), addr:$dst)]>;
+let ExeDomain = SSEPackedInt in {
-let ExeDomain = SSEPackedInt in
-def MOVNTDQmr : PDI<0xE7, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
- "movntdq\t{$src, $dst|$dst, $src}",
- [(alignednontemporalstore (v4f32 VR128:$src), addr:$dst)]>;
-}
+let isAsmParserOnly = 1 in
+def VPMOVMSKBrr : VPDI<0xD7, MRMSrcReg, (outs GR32:$dst), (ins VR128:$src),
+ "pmovmskb\t{$src, $dst|$dst, $src}",
+ [(set GR32:$dst, (int_x86_sse2_pmovmskb_128 VR128:$src))]>, VEX;
+def PMOVMSKBrr : PDI<0xD7, MRMSrcReg, (outs GR32:$dst), (ins VR128:$src),
+ "pmovmskb\t{$src, $dst|$dst, $src}",
+ [(set GR32:$dst, (int_x86_sse2_pmovmskb_128 VR128:$src))]>;
-// Flush cache
-def CLFLUSH : I<0xAE, MRM7m, (outs), (ins i8mem:$src),
- "clflush\t$src", [(int_x86_sse2_clflush addr:$src)]>,
- TB, Requires<[HasSSE2]>;
+} // ExeDomain = SSEPackedInt
-// Load, store, and memory fence
-def LFENCE : I<0xAE, MRM_E8, (outs), (ins),
- "lfence", [(int_x86_sse2_lfence)]>, TB, Requires<[HasSSE2]>;
-def MFENCE : I<0xAE, MRM_F0, (outs), (ins),
- "mfence", [(int_x86_sse2_mfence)]>, TB, Requires<[HasSSE2]>;
+//===---------------------------------------------------------------------===//
+// SSE2 - Conditional Store
+//===---------------------------------------------------------------------===//
-// Pause. This "instruction" is encoded as "rep; nop", so even though it
-// was introduced with SSE2, it's backward compatible.
-def PAUSE : I<0x90, RawFrm, (outs), (ins), "pause", []>, REP;
+let ExeDomain = SSEPackedInt in {
-//TODO: custom lower this so as to never even generate the noop
-def : Pat<(membarrier (i8 imm), (i8 imm), (i8 imm), (i8 imm),
- (i8 0)), (NOOP)>;
-def : Pat<(membarrier (i8 0), (i8 0), (i8 0), (i8 1), (i8 1)), (SFENCE)>;
-def : Pat<(membarrier (i8 1), (i8 0), (i8 0), (i8 0), (i8 1)), (LFENCE)>;
-def : Pat<(membarrier (i8 imm), (i8 imm), (i8 imm), (i8 imm),
- (i8 1)), (MFENCE)>;
+let isAsmParserOnly = 1 in {
+let Uses = [EDI] in
+def VMASKMOVDQU : VPDI<0xF7, MRMSrcReg, (outs),
+ (ins VR128:$src, VR128:$mask),
+ "maskmovdqu\t{$mask, $src|$src, $mask}",
+ [(int_x86_sse2_maskmov_dqu VR128:$src, VR128:$mask, EDI)]>, VEX;
+let Uses = [RDI] in
+def VMASKMOVDQU64 : VPDI<0xF7, MRMSrcReg, (outs),
+ (ins VR128:$src, VR128:$mask),
+ "maskmovdqu\t{$mask, $src|$src, $mask}",
+ [(int_x86_sse2_maskmov_dqu VR128:$src, VR128:$mask, RDI)]>, VEX;
+}
-// Alias instructions that map zero vector to pxor / xorp* for sse.
-// We set canFoldAsLoad because this can be converted to a constant-pool
-// load of an all-ones value if folding it would be beneficial.
-let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
- isCodeGenOnly = 1, ExeDomain = SSEPackedInt in
- // FIXME: Change encoding to pseudo.
- def V_SETALLONES : PDI<0x76, MRMInitReg, (outs VR128:$dst), (ins), "",
- [(set VR128:$dst, (v4i32 immAllOnesV))]>;
+let Uses = [EDI] in
+def MASKMOVDQU : PDI<0xF7, MRMSrcReg, (outs), (ins VR128:$src, VR128:$mask),
+ "maskmovdqu\t{$mask, $src|$src, $mask}",
+ [(int_x86_sse2_maskmov_dqu VR128:$src, VR128:$mask, EDI)]>;
+let Uses = [RDI] in
+def MASKMOVDQU64 : PDI<0xF7, MRMSrcReg, (outs), (ins VR128:$src, VR128:$mask),
+ "maskmovdqu\t{$mask, $src|$src, $mask}",
+ [(int_x86_sse2_maskmov_dqu VR128:$src, VR128:$mask, RDI)]>;
+
+} // ExeDomain = SSEPackedInt
+
+//===---------------------------------------------------------------------===//
+// SSE2 - Move Doubleword
+//===---------------------------------------------------------------------===//
+// Move Int Doubleword to Packed Double Int
+let isAsmParserOnly = 1 in {
+def VMOVDI2PDIrr : VPDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR32:$src),
+ "movd\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst,
+ (v4i32 (scalar_to_vector GR32:$src)))]>, VEX;
+def VMOVDI2PDIrm : VPDI<0x6E, MRMSrcMem, (outs VR128:$dst), (ins i32mem:$src),
+ "movd\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst,
+ (v4i32 (scalar_to_vector (loadi32 addr:$src))))]>,
+ VEX;
+}
def MOVDI2PDIrr : PDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR32:$src),
"movd\t{$src, $dst|$dst, $src}",
[(set VR128:$dst,
@@ -2489,6 +2680,18 @@ def MOVDI2PDIrm : PDI<0x6E, MRMSrcMem, (outs VR128:$dst), (ins i32mem:$src),
[(set VR128:$dst,
(v4i32 (scalar_to_vector (loadi32 addr:$src))))]>;
+
+// Move Int Doubleword to Single Scalar
+let isAsmParserOnly = 1 in {
+def VMOVDI2SSrr : VPDI<0x6E, MRMSrcReg, (outs FR32:$dst), (ins GR32:$src),
+ "movd\t{$src, $dst|$dst, $src}",
+ [(set FR32:$dst, (bitconvert GR32:$src))]>, VEX;
+
+def VMOVDI2SSrm : VPDI<0x6E, MRMSrcMem, (outs FR32:$dst), (ins i32mem:$src),
+ "movd\t{$src, $dst|$dst, $src}",
+ [(set FR32:$dst, (bitconvert (loadi32 addr:$src)))]>,
+ VEX;
+}
def MOVDI2SSrr : PDI<0x6E, MRMSrcReg, (outs FR32:$dst), (ins GR32:$src),
"movd\t{$src, $dst|$dst, $src}",
[(set FR32:$dst, (bitconvert GR32:$src))]>;
@@ -2497,20 +2700,18 @@ def MOVDI2SSrm : PDI<0x6E, MRMSrcMem, (outs FR32:$dst), (ins i32mem:$src),
"movd\t{$src, $dst|$dst, $src}",
[(set FR32:$dst, (bitconvert (loadi32 addr:$src)))]>;
-// SSE2 instructions with XS prefix
-def MOVQI2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
- "movq\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst,
- (v2i64 (scalar_to_vector (loadi64 addr:$src))))]>, XS,
- Requires<[HasSSE2]>;
-def MOVPQI2QImr : PDI<0xD6, MRMDestMem, (outs), (ins i64mem:$dst, VR128:$src),
- "movq\t{$src, $dst|$dst, $src}",
- [(store (i64 (vector_extract (v2i64 VR128:$src),
- (iPTR 0))), addr:$dst)]>;
-
-def : Pat<(f64 (vector_extract (v2f64 VR128:$src), (iPTR 0))),
- (f64 (EXTRACT_SUBREG (v2f64 VR128:$src), sub_sd))>;
-
+// Move Packed Doubleword Int to Packed Double Int
+let isAsmParserOnly = 1 in {
+def VMOVPDI2DIrr : VPDI<0x7E, MRMDestReg, (outs GR32:$dst), (ins VR128:$src),
+ "movd\t{$src, $dst|$dst, $src}",
+ [(set GR32:$dst, (vector_extract (v4i32 VR128:$src),
+ (iPTR 0)))]>, VEX;
+def VMOVPDI2DImr : VPDI<0x7E, MRMDestMem, (outs),
+ (ins i32mem:$dst, VR128:$src),
+ "movd\t{$src, $dst|$dst, $src}",
+ [(store (i32 (vector_extract (v4i32 VR128:$src),
+ (iPTR 0))), addr:$dst)]>, VEX;
+}
def MOVPDI2DIrr : PDI<0x7E, MRMDestReg, (outs GR32:$dst), (ins VR128:$src),
"movd\t{$src, $dst|$dst, $src}",
[(set GR32:$dst, (vector_extract (v4i32 VR128:$src),
@@ -2520,6 +2721,15 @@ def MOVPDI2DImr : PDI<0x7E, MRMDestMem, (outs), (ins i32mem:$dst, VR128:$src),
[(store (i32 (vector_extract (v4i32 VR128:$src),
(iPTR 0))), addr:$dst)]>;
+// Move Scalar Single to Double Int
+let isAsmParserOnly = 1 in {
+def VMOVSS2DIrr : VPDI<0x7E, MRMDestReg, (outs GR32:$dst), (ins FR32:$src),
+ "movd\t{$src, $dst|$dst, $src}",
+ [(set GR32:$dst, (bitconvert FR32:$src))]>, VEX;
+def VMOVSS2DImr : VPDI<0x7E, MRMDestMem, (outs), (ins i32mem:$dst, FR32:$src),
+ "movd\t{$src, $dst|$dst, $src}",
+ [(store (i32 (bitconvert FR32:$src)), addr:$dst)]>, VEX;
+}
def MOVSS2DIrr : PDI<0x7E, MRMDestReg, (outs GR32:$dst), (ins FR32:$src),
"movd\t{$src, $dst|$dst, $src}",
[(set GR32:$dst, (bitconvert FR32:$src))]>;
@@ -2527,25 +2737,38 @@ def MOVSS2DImr : PDI<0x7E, MRMDestMem, (outs), (ins i32mem:$dst, FR32:$src),
"movd\t{$src, $dst|$dst, $src}",
[(store (i32 (bitconvert FR32:$src)), addr:$dst)]>;
-// Store / copy lower 64-bits of a XMM register.
-def MOVLQ128mr : PDI<0xD6, MRMDestMem, (outs), (ins i64mem:$dst, VR128:$src),
- "movq\t{$src, $dst|$dst, $src}",
- [(int_x86_sse2_storel_dq addr:$dst, VR128:$src)]>;
-
// movd / movq to XMM register zero-extends
+let AddedComplexity = 15, isAsmParserOnly = 1 in {
+def VMOVZDI2PDIrr : VPDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR32:$src),
+ "movd\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (v4i32 (X86vzmovl
+ (v4i32 (scalar_to_vector GR32:$src)))))]>,
+ VEX;
+def VMOVZQI2PQIrr : VPDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR64:$src),
+ "mov{d|q}\t{$src, $dst|$dst, $src}", // X86-64 only
+ [(set VR128:$dst, (v2i64 (X86vzmovl
+ (v2i64 (scalar_to_vector GR64:$src)))))]>,
+ VEX, VEX_W;
+}
let AddedComplexity = 15 in {
def MOVZDI2PDIrr : PDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR32:$src),
"movd\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (v4i32 (X86vzmovl
(v4i32 (scalar_to_vector GR32:$src)))))]>;
-// This is X86-64 only.
def MOVZQI2PQIrr : RPDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR64:$src),
- "mov{d|q}\t{$src, $dst|$dst, $src}",
+ "mov{d|q}\t{$src, $dst|$dst, $src}", // X86-64 only
[(set VR128:$dst, (v2i64 (X86vzmovl
(v2i64 (scalar_to_vector GR64:$src)))))]>;
}
let AddedComplexity = 20 in {
+let isAsmParserOnly = 1 in
+def VMOVZDI2PDIrm : VPDI<0x6E, MRMSrcMem, (outs VR128:$dst), (ins i32mem:$src),
+ "movd\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst,
+ (v4i32 (X86vzmovl (v4i32 (scalar_to_vector
+ (loadi32 addr:$src))))))]>,
+ VEX;
def MOVZDI2PDIrm : PDI<0x6E, MRMSrcMem, (outs VR128:$dst), (ins i32mem:$src),
"movd\t{$src, $dst|$dst, $src}",
[(set VR128:$dst,
@@ -2558,13 +2781,63 @@ def : Pat<(v4i32 (X86vzmovl (bc_v4i32 (loadv4f32 addr:$src)))),
(MOVZDI2PDIrm addr:$src)>;
def : Pat<(v4i32 (X86vzmovl (bc_v4i32 (loadv2i64 addr:$src)))),
(MOVZDI2PDIrm addr:$src)>;
+}
+//===---------------------------------------------------------------------===//
+// SSE2 - Move Quadword
+//===---------------------------------------------------------------------===//
+
+// Move Quadword Int to Packed Quadword Int
+let isAsmParserOnly = 1 in
+def VMOVQI2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
+ "vmovq\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst,
+ (v2i64 (scalar_to_vector (loadi64 addr:$src))))]>, XS,
+ VEX, Requires<[HasAVX]>;
+def MOVQI2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
+ "movq\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst,
+ (v2i64 (scalar_to_vector (loadi64 addr:$src))))]>, XS,
+ Requires<[HasSSE2]>; // SSE2 instruction with XS Prefix
+
+// Move Packed Quadword Int to Quadword Int
+let isAsmParserOnly = 1 in
+def VMOVPQI2QImr : VPDI<0xD6, MRMDestMem, (outs), (ins i64mem:$dst, VR128:$src),
+ "movq\t{$src, $dst|$dst, $src}",
+ [(store (i64 (vector_extract (v2i64 VR128:$src),
+ (iPTR 0))), addr:$dst)]>, VEX;
+def MOVPQI2QImr : PDI<0xD6, MRMDestMem, (outs), (ins i64mem:$dst, VR128:$src),
+ "movq\t{$src, $dst|$dst, $src}",
+ [(store (i64 (vector_extract (v2i64 VR128:$src),
+ (iPTR 0))), addr:$dst)]>;
+
+def : Pat<(f64 (vector_extract (v2f64 VR128:$src), (iPTR 0))),
+ (f64 (EXTRACT_SUBREG (v2f64 VR128:$src), sub_sd))>;
+
+// Store / copy lower 64-bits of a XMM register.
+let isAsmParserOnly = 1 in
+def VMOVLQ128mr : VPDI<0xD6, MRMDestMem, (outs), (ins i64mem:$dst, VR128:$src),
+ "movq\t{$src, $dst|$dst, $src}",
+ [(int_x86_sse2_storel_dq addr:$dst, VR128:$src)]>, VEX;
+def MOVLQ128mr : PDI<0xD6, MRMDestMem, (outs), (ins i64mem:$dst, VR128:$src),
+ "movq\t{$src, $dst|$dst, $src}",
+ [(int_x86_sse2_storel_dq addr:$dst, VR128:$src)]>;
+
+let AddedComplexity = 20, isAsmParserOnly = 1 in
+def VMOVZQI2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
+ "vmovq\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst,
+ (v2i64 (X86vzmovl (v2i64 (scalar_to_vector
+ (loadi64 addr:$src))))))]>,
+ XS, VEX, Requires<[HasAVX]>;
+
+let AddedComplexity = 20 in {
def MOVZQI2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
"movq\t{$src, $dst|$dst, $src}",
[(set VR128:$dst,
(v2i64 (X86vzmovl (v2i64 (scalar_to_vector
- (loadi64 addr:$src))))))]>, XS,
- Requires<[HasSSE2]>;
+ (loadi64 addr:$src))))))]>,
+ XS, Requires<[HasSSE2]>;
def : Pat<(v2i64 (X86vzmovl (loadv2i64 addr:$src))),
(MOVZQI2PQIrm addr:$src)>;
@@ -2575,12 +2848,23 @@ def : Pat<(v2i64 (X86vzload addr:$src)), (MOVZQI2PQIrm addr:$src)>;
// Moving from XMM to XMM and clear upper 64 bits. Note, there is a bug in
// IA32 document. movq xmm1, xmm2 does clear the high bits.
+let isAsmParserOnly = 1, AddedComplexity = 15 in
+def VMOVZPQILo2PQIrr : I<0x7E, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+ "vmovq\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (v2i64 (X86vzmovl (v2i64 VR128:$src))))]>,
+ XS, VEX, Requires<[HasAVX]>;
let AddedComplexity = 15 in
def MOVZPQILo2PQIrr : I<0x7E, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"movq\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (v2i64 (X86vzmovl (v2i64 VR128:$src))))]>,
XS, Requires<[HasSSE2]>;
+let AddedComplexity = 20, isAsmParserOnly = 1 in
+def VMOVZPQILo2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
+ "vmovq\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (v2i64 (X86vzmovl
+ (loadv2i64 addr:$src))))]>,
+ XS, VEX, Requires<[HasAVX]>;
let AddedComplexity = 20 in {
def MOVZPQILo2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
"movq\t{$src, $dst|$dst, $src}",
@@ -2592,49 +2876,136 @@ def : Pat<(v2i64 (X86vzmovl (bc_v2i64 (loadv4i32 addr:$src)))),
(MOVZPQILo2PQIrm addr:$src)>;
}
+// Instructions to match in the assembler
+let isAsmParserOnly = 1 in {
+// This instructions is in fact an alias to movd with 64 bit dst
+def VMOVQs64rr : VPDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR64:$src),
+ "movq\t{$src, $dst|$dst, $src}", []>, VEX, VEX_W;
+def VMOVQd64rr : VPDI<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128:$src),
+ "movq\t{$src, $dst|$dst, $src}", []>, VEX, VEX_W;
+}
+
// Instructions for the disassembler
// xr = XMM register
// xm = mem64
+let isAsmParserOnly = 1 in
+def VMOVQxrxr: I<0x7E, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+ "vmovq\t{$src, $dst|$dst, $src}", []>, VEX, XS;
def MOVQxrxr : I<0x7E, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"movq\t{$src, $dst|$dst, $src}", []>, XS;
//===---------------------------------------------------------------------===//
-// SSE3 Instructions
+// SSE2 - Misc Instructions
//===---------------------------------------------------------------------===//
-// Move Instructions
-def MOVSHDUPrr : S3SI<0x16, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
- "movshdup\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst, (v4f32 (movshdup
- VR128:$src, (undef))))]>;
-def MOVSHDUPrm : S3SI<0x16, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
- "movshdup\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst, (movshdup
- (memopv4f32 addr:$src), (undef)))]>;
+// Flush cache
+def CLFLUSH : I<0xAE, MRM7m, (outs), (ins i8mem:$src),
+ "clflush\t$src", [(int_x86_sse2_clflush addr:$src)]>,
+ TB, Requires<[HasSSE2]>;
+
+// Load, store, and memory fence
+def LFENCE : I<0xAE, MRM_E8, (outs), (ins),
+ "lfence", [(int_x86_sse2_lfence)]>, TB, Requires<[HasSSE2]>;
+def MFENCE : I<0xAE, MRM_F0, (outs), (ins),
+ "mfence", [(int_x86_sse2_mfence)]>, TB, Requires<[HasSSE2]>;
+
+// Pause. This "instruction" is encoded as "rep; nop", so even though it
+// was introduced with SSE2, it's backward compatible.
+def PAUSE : I<0x90, RawFrm, (outs), (ins), "pause", []>, REP;
+
+//TODO: custom lower this so as to never even generate the noop
+def : Pat<(membarrier (i8 imm), (i8 imm), (i8 imm), (i8 imm),
+ (i8 0)), (NOOP)>;
+def : Pat<(membarrier (i8 0), (i8 0), (i8 0), (i8 1), (i8 1)), (SFENCE)>;
+def : Pat<(membarrier (i8 1), (i8 0), (i8 0), (i8 0), (i8 1)), (LFENCE)>;
+def : Pat<(membarrier (i8 imm), (i8 imm), (i8 imm), (i8 imm),
+ (i8 1)), (MFENCE)>;
+
+// Alias instructions that map zero vector to pxor / xorp* for sse.
+// We set canFoldAsLoad because this can be converted to a constant-pool
+// load of an all-ones value if folding it would be beneficial.
+let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
+ isCodeGenOnly = 1, ExeDomain = SSEPackedInt in
+ // FIXME: Change encoding to pseudo.
+ def V_SETALLONES : PDI<0x76, MRMInitReg, (outs VR128:$dst), (ins), "",
+ [(set VR128:$dst, (v4i32 immAllOnesV))]>;
-def MOVSLDUPrr : S3SI<0x12, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
- "movsldup\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst, (v4f32 (movsldup
+//===---------------------------------------------------------------------===//
+// SSE3 - Conversion Instructions
+//===---------------------------------------------------------------------===//
+
+let isAsmParserOnly = 1, Predicates = [HasAVX] in {
+def VCVTPD2DQrr : S3DI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+ "vcvtpd2dq\t{$src, $dst|$dst, $src}", []>, VEX;
+def VCVTDQ2PDrm : S3SI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
+ "vcvtdq2pd\t{$src, $dst|$dst, $src}", []>, VEX;
+def VCVTDQ2PDrr : S3SI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+ "vcvtdq2pd\t{$src, $dst|$dst, $src}", []>, VEX;
+}
+
+def CVTPD2DQrm : S3DI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
+ "cvtpd2dq\t{$src, $dst|$dst, $src}", []>;
+def CVTPD2DQrr : S3DI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+ "cvtpd2dq\t{$src, $dst|$dst, $src}", []>;
+def CVTDQ2PDrm : S3SI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
+ "cvtdq2pd\t{$src, $dst|$dst, $src}", []>;
+def CVTDQ2PDrr : S3SI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+ "cvtdq2pd\t{$src, $dst|$dst, $src}", []>;
+
+//===---------------------------------------------------------------------===//
+// SSE3 - Move Instructions
+//===---------------------------------------------------------------------===//
+
+// Replicate Single FP
+multiclass sse3_replicate_sfp<bits<8> op, PatFrag rep_frag, string OpcodeStr> {
+def rr : S3SI<op, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+ !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+ [(set VR128:$dst, (v4f32 (rep_frag
VR128:$src, (undef))))]>;
-def MOVSLDUPrm : S3SI<0x12, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
- "movsldup\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst, (movsldup
+def rm : S3SI<op, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
+ !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+ [(set VR128:$dst, (rep_frag
(memopv4f32 addr:$src), (undef)))]>;
+}
-def MOVDDUPrr : S3DI<0x12, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
- "movddup\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst,(v2f64 (movddup VR128:$src, (undef))))]>;
-def MOVDDUPrm : S3DI<0x12, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src),
- "movddup\t{$src, $dst|$dst, $src}",
+let isAsmParserOnly = 1, Predicates = [HasAVX] in {
+defm VMOVSHDUP : sse3_replicate_sfp<0x16, movshdup, "vmovshdup">, VEX;
+defm VMOVSLDUP : sse3_replicate_sfp<0x12, movsldup, "vmovsldup">, VEX;
+}
+defm MOVSHDUP : sse3_replicate_sfp<0x16, movshdup, "movshdup">;
+defm MOVSLDUP : sse3_replicate_sfp<0x12, movsldup, "movsldup">;
+
+// Replicate Double FP
+multiclass sse3_replicate_dfp<string OpcodeStr> {
+def rr : S3DI<0x12, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+ !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+ [(set VR128:$dst,(v2f64 (movddup VR128:$src, (undef))))]>;
+def rm : S3DI<0x12, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src),
+ !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
[(set VR128:$dst,
(v2f64 (movddup (scalar_to_vector (loadf64 addr:$src)),
(undef))))]>;
+}
+
+let isAsmParserOnly = 1, Predicates = [HasAVX] in
+ defm VMOVDDUP : sse3_replicate_dfp<"vmovddup">, VEX;
+defm MOVDDUP : sse3_replicate_dfp<"movddup">;
+
+// Move Unaligned Integer
+let isAsmParserOnly = 1 in
+ def VLDDQUrm : S3DI<0xF0, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
+ "vlddqu\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (int_x86_sse3_ldu_dq addr:$src))]>, VEX;
+def LDDQUrm : S3DI<0xF0, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
+ "lddqu\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (int_x86_sse3_ldu_dq addr:$src))]>;
def : Pat<(movddup (bc_v2f64 (v2i64 (scalar_to_vector (loadi64 addr:$src)))),
(undef)),
(MOVDDUPrm addr:$src)>, Requires<[HasSSE3]>;
+// Several Move patterns
let AddedComplexity = 5 in {
def : Pat<(movddup (memopv2f64 addr:$src), (undef)),
(MOVDDUPrm addr:$src)>, Requires<[HasSSE3]>;
@@ -2646,52 +3017,98 @@ def : Pat<(movddup (bc_v4i32 (memopv2i64 addr:$src)), (undef)),
(MOVDDUPrm addr:$src)>, Requires<[HasSSE3]>;
}
-// Arithmetic
-let Constraints = "$src1 = $dst" in {
- def ADDSUBPSrr : S3DI<0xD0, MRMSrcReg,
- (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
- "addsubps\t{$src2, $dst|$dst, $src2}",
- [(set VR128:$dst, (int_x86_sse3_addsub_ps VR128:$src1,
- VR128:$src2))]>;
- def ADDSUBPSrm : S3DI<0xD0, MRMSrcMem,
- (outs VR128:$dst), (ins VR128:$src1, f128mem:$src2),
- "addsubps\t{$src2, $dst|$dst, $src2}",
- [(set VR128:$dst, (int_x86_sse3_addsub_ps VR128:$src1,
- (memop addr:$src2)))]>;
- def ADDSUBPDrr : S3I<0xD0, MRMSrcReg,
- (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
- "addsubpd\t{$src2, $dst|$dst, $src2}",
- [(set VR128:$dst, (int_x86_sse3_addsub_pd VR128:$src1,
- VR128:$src2))]>;
- def ADDSUBPDrm : S3I<0xD0, MRMSrcMem,
- (outs VR128:$dst), (ins VR128:$src1, f128mem:$src2),
- "addsubpd\t{$src2, $dst|$dst, $src2}",
- [(set VR128:$dst, (int_x86_sse3_addsub_pd VR128:$src1,
- (memop addr:$src2)))]>;
+// vector_shuffle v1, <undef> <1, 1, 3, 3>
+let AddedComplexity = 15 in
+def : Pat<(v4i32 (movshdup VR128:$src, (undef))),
+ (MOVSHDUPrr VR128:$src)>, Requires<[HasSSE3]>;
+let AddedComplexity = 20 in
+def : Pat<(v4i32 (movshdup (bc_v4i32 (memopv2i64 addr:$src)), (undef))),
+ (MOVSHDUPrm addr:$src)>, Requires<[HasSSE3]>;
+
+// vector_shuffle v1, <undef> <0, 0, 2, 2>
+let AddedComplexity = 15 in
+ def : Pat<(v4i32 (movsldup VR128:$src, (undef))),
+ (MOVSLDUPrr VR128:$src)>, Requires<[HasSSE3]>;
+let AddedComplexity = 20 in
+ def : Pat<(v4i32 (movsldup (bc_v4i32 (memopv2i64 addr:$src)), (undef))),
+ (MOVSLDUPrm addr:$src)>, Requires<[HasSSE3]>;
+
+//===---------------------------------------------------------------------===//
+// SSE3 - Arithmetic
+//===---------------------------------------------------------------------===//
+
+multiclass sse3_addsub<Intrinsic Int, string OpcodeStr, bit Is2Addr = 1> {
+ def rr : I<0xD0, MRMSrcReg,
+ (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
+ !if(Is2Addr,
+ !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
+ [(set VR128:$dst, (Int VR128:$src1,
+ VR128:$src2))]>;
+ def rm : I<0xD0, MRMSrcMem,
+ (outs VR128:$dst), (ins VR128:$src1, f128mem:$src2),
+ !if(Is2Addr,
+ !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
+ [(set VR128:$dst, (Int VR128:$src1,
+ (memop addr:$src2)))]>;
+
}
-def LDDQUrm : S3DI<0xF0, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
- "lddqu\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst, (int_x86_sse3_ldu_dq addr:$src))]>;
+let isAsmParserOnly = 1, Predicates = [HasAVX],
+ ExeDomain = SSEPackedDouble in {
+ defm VADDSUBPS : sse3_addsub<int_x86_sse3_addsub_ps, "vaddsubps", 0>, XD,
+ VEX_4V;
+ defm VADDSUBPD : sse3_addsub<int_x86_sse3_addsub_pd, "vaddsubpd", 0>, OpSize,
+ VEX_4V;
+}
+let Constraints = "$src1 = $dst", Predicates = [HasSSE3],
+ ExeDomain = SSEPackedDouble in {
+ defm ADDSUBPS : sse3_addsub<int_x86_sse3_addsub_ps, "addsubps">, XD;
+ defm ADDSUBPD : sse3_addsub<int_x86_sse3_addsub_pd, "addsubpd">, TB, OpSize;
+}
+
+//===---------------------------------------------------------------------===//
+// SSE3 Instructions
+//===---------------------------------------------------------------------===//
// Horizontal ops
-class S3D_Intrr<bits<8> o, string OpcodeStr, Intrinsic IntId>
+class S3D_Intrr<bits<8> o, string OpcodeStr, Intrinsic IntId, bit Is2Addr = 1>
: S3DI<o, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
+ !if(Is2Addr,
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
[(set VR128:$dst, (v4f32 (IntId VR128:$src1, VR128:$src2)))]>;
-class S3D_Intrm<bits<8> o, string OpcodeStr, Intrinsic IntId>
+class S3D_Intrm<bits<8> o, string OpcodeStr, Intrinsic IntId, bit Is2Addr = 1>
: S3DI<o, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, f128mem:$src2),
+ !if(Is2Addr,
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
[(set VR128:$dst, (v4f32 (IntId VR128:$src1, (memop addr:$src2))))]>;
-class S3_Intrr<bits<8> o, string OpcodeStr, Intrinsic IntId>
+class S3_Intrr<bits<8> o, string OpcodeStr, Intrinsic IntId, bit Is2Addr = 1>
: S3I<o, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
- !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+ !if(Is2Addr,
+ !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
[(set VR128:$dst, (v2f64 (IntId VR128:$src1, VR128:$src2)))]>;
-class S3_Intrm<bits<8> o, string OpcodeStr, Intrinsic IntId>
+class S3_Intrm<bits<8> o, string OpcodeStr, Intrinsic IntId, bit Is2Addr = 1>
: S3I<o, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, f128mem:$src2),
- !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+ !if(Is2Addr,
+ !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
[(set VR128:$dst, (v2f64 (IntId VR128:$src1, (memopv2f64 addr:$src2))))]>;
+let isAsmParserOnly = 1, Predicates = [HasAVX] in {
+ def VHADDPSrr : S3D_Intrr<0x7C, "vhaddps", int_x86_sse3_hadd_ps, 0>, VEX_4V;
+ def VHADDPSrm : S3D_Intrm<0x7C, "vhaddps", int_x86_sse3_hadd_ps, 0>, VEX_4V;
+ def VHADDPDrr : S3_Intrr <0x7C, "vhaddpd", int_x86_sse3_hadd_pd, 0>, VEX_4V;
+ def VHADDPDrm : S3_Intrm <0x7C, "vhaddpd", int_x86_sse3_hadd_pd, 0>, VEX_4V;
+ def VHSUBPSrr : S3D_Intrr<0x7D, "vhsubps", int_x86_sse3_hsub_ps, 0>, VEX_4V;
+ def VHSUBPSrm : S3D_Intrm<0x7D, "vhsubps", int_x86_sse3_hsub_ps, 0>, VEX_4V;
+ def VHSUBPDrr : S3_Intrr <0x7D, "vhsubpd", int_x86_sse3_hsub_pd, 0>, VEX_4V;
+ def VHSUBPDrm : S3_Intrm <0x7D, "vhsubpd", int_x86_sse3_hsub_pd, 0>, VEX_4V;
+}
+
let Constraints = "$src1 = $dst" in {
def HADDPSrr : S3D_Intrr<0x7C, "haddps", int_x86_sse3_hadd_ps>;
def HADDPSrm : S3D_Intrm<0x7C, "haddps", int_x86_sse3_hadd_ps>;
@@ -2703,35 +3120,14 @@ let Constraints = "$src1 = $dst" in {
def HSUBPDrm : S3_Intrm <0x7D, "hsubpd", int_x86_sse3_hsub_pd>;
}
-// Thread synchronization
-def MONITOR : I<0x01, MRM_C8, (outs), (ins), "monitor",
- [(int_x86_sse3_monitor EAX, ECX, EDX)]>,TB, Requires<[HasSSE3]>;
-def MWAIT : I<0x01, MRM_C9, (outs), (ins), "mwait",
- [(int_x86_sse3_mwait ECX, EAX)]>, TB, Requires<[HasSSE3]>;
-
-// vector_shuffle v1, <undef> <1, 1, 3, 3>
-let AddedComplexity = 15 in
-def : Pat<(v4i32 (movshdup VR128:$src, (undef))),
- (MOVSHDUPrr VR128:$src)>, Requires<[HasSSE3]>;
-let AddedComplexity = 20 in
-def : Pat<(v4i32 (movshdup (bc_v4i32 (memopv2i64 addr:$src)), (undef))),
- (MOVSHDUPrm addr:$src)>, Requires<[HasSSE3]>;
-
-// vector_shuffle v1, <undef> <0, 0, 2, 2>
-let AddedComplexity = 15 in
- def : Pat<(v4i32 (movsldup VR128:$src, (undef))),
- (MOVSLDUPrr VR128:$src)>, Requires<[HasSSE3]>;
-let AddedComplexity = 20 in
- def : Pat<(v4i32 (movsldup (bc_v4i32 (memopv2i64 addr:$src)), (undef))),
- (MOVSLDUPrm addr:$src)>, Requires<[HasSSE3]>;
-
//===---------------------------------------------------------------------===//
-// SSSE3 Instructions
+// SSSE3 - Packed Absolute Instructions
//===---------------------------------------------------------------------===//
-/// SS3I_unop_rm_int_8 - Simple SSSE3 unary operator whose type is v*i8.
-multiclass SS3I_unop_rm_int_8<bits<8> opc, string OpcodeStr,
- Intrinsic IntId64, Intrinsic IntId128> {
+/// SS3I_unop_rm_int - Simple SSSE3 unary op whose type can be v*{i8,i16,i32}.
+multiclass SS3I_unop_rm_int<bits<8> opc, string OpcodeStr,
+ PatFrag mem_frag64, PatFrag mem_frag128,
+ Intrinsic IntId64, Intrinsic IntId128> {
def rr64 : SS38I<opc, MRMSrcReg, (outs VR64:$dst), (ins VR64:$src),
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
[(set VR64:$dst, (IntId64 VR64:$src))]>;
@@ -2739,7 +3135,7 @@ multiclass SS3I_unop_rm_int_8<bits<8> opc, string OpcodeStr,
def rm64 : SS38I<opc, MRMSrcMem, (outs VR64:$dst), (ins i64mem:$src),
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
[(set VR64:$dst,
- (IntId64 (bitconvert (memopv8i8 addr:$src))))]>;
+ (IntId64 (bitconvert (mem_frag64 addr:$src))))]>;
def rr128 : SS38I<opc, MRMSrcReg, (outs VR128:$dst),
(ins VR128:$src),
@@ -2752,240 +3148,203 @@ multiclass SS3I_unop_rm_int_8<bits<8> opc, string OpcodeStr,
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
[(set VR128:$dst,
(IntId128
- (bitconvert (memopv16i8 addr:$src))))]>, OpSize;
+ (bitconvert (mem_frag128 addr:$src))))]>, OpSize;
}
-/// SS3I_unop_rm_int_16 - Simple SSSE3 unary operator whose type is v*i16.
-multiclass SS3I_unop_rm_int_16<bits<8> opc, string OpcodeStr,
- Intrinsic IntId64, Intrinsic IntId128> {
- def rr64 : SS38I<opc, MRMSrcReg, (outs VR64:$dst),
- (ins VR64:$src),
- !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
- [(set VR64:$dst, (IntId64 VR64:$src))]>;
-
- def rm64 : SS38I<opc, MRMSrcMem, (outs VR64:$dst),
- (ins i64mem:$src),
- !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
- [(set VR64:$dst,
- (IntId64
- (bitconvert (memopv4i16 addr:$src))))]>;
+let isAsmParserOnly = 1, Predicates = [HasAVX] in {
+ defm VPABSB : SS3I_unop_rm_int<0x1C, "vpabsb", memopv8i8, memopv16i8,
+ int_x86_ssse3_pabs_b,
+ int_x86_ssse3_pabs_b_128>, VEX;
+ defm VPABSW : SS3I_unop_rm_int<0x1D, "vpabsw", memopv4i16, memopv8i16,
+ int_x86_ssse3_pabs_w,
+ int_x86_ssse3_pabs_w_128>, VEX;
+ defm VPABSD : SS3I_unop_rm_int<0x1E, "vpabsd", memopv2i32, memopv4i32,
+ int_x86_ssse3_pabs_d,
+ int_x86_ssse3_pabs_d_128>, VEX;
+}
- def rr128 : SS38I<opc, MRMSrcReg, (outs VR128:$dst),
- (ins VR128:$src),
- !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
- [(set VR128:$dst, (IntId128 VR128:$src))]>,
- OpSize;
+defm PABSB : SS3I_unop_rm_int<0x1C, "pabsb", memopv8i8, memopv16i8,
+ int_x86_ssse3_pabs_b,
+ int_x86_ssse3_pabs_b_128>;
+defm PABSW : SS3I_unop_rm_int<0x1D, "pabsw", memopv4i16, memopv8i16,
+ int_x86_ssse3_pabs_w,
+ int_x86_ssse3_pabs_w_128>;
+defm PABSD : SS3I_unop_rm_int<0x1E, "pabsd", memopv2i32, memopv4i32,
+ int_x86_ssse3_pabs_d,
+ int_x86_ssse3_pabs_d_128>;
- def rm128 : SS38I<opc, MRMSrcMem, (outs VR128:$dst),
- (ins i128mem:$src),
- !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
- [(set VR128:$dst,
- (IntId128
- (bitconvert (memopv8i16 addr:$src))))]>, OpSize;
-}
+//===---------------------------------------------------------------------===//
+// SSSE3 - Packed Binary Operator Instructions
+//===---------------------------------------------------------------------===//
-/// SS3I_unop_rm_int_32 - Simple SSSE3 unary operator whose type is v*i32.
-multiclass SS3I_unop_rm_int_32<bits<8> opc, string OpcodeStr,
- Intrinsic IntId64, Intrinsic IntId128> {
+/// SS3I_binop_rm_int - Simple SSSE3 bin op whose type can be v*{i8,i16,i32}.
+multiclass SS3I_binop_rm_int<bits<8> opc, string OpcodeStr,
+ PatFrag mem_frag64, PatFrag mem_frag128,
+ Intrinsic IntId64, Intrinsic IntId128,
+ bit Is2Addr = 1> {
+ let isCommutable = 1 in
def rr64 : SS38I<opc, MRMSrcReg, (outs VR64:$dst),
- (ins VR64:$src),
- !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
- [(set VR64:$dst, (IntId64 VR64:$src))]>;
-
+ (ins VR64:$src1, VR64:$src2),
+ !if(Is2Addr,
+ !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
+ [(set VR64:$dst, (IntId64 VR64:$src1, VR64:$src2))]>;
def rm64 : SS38I<opc, MRMSrcMem, (outs VR64:$dst),
- (ins i64mem:$src),
- !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
- [(set VR64:$dst,
- (IntId64
- (bitconvert (memopv2i32 addr:$src))))]>;
+ (ins VR64:$src1, i64mem:$src2),
+ !if(Is2Addr,
+ !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
+ [(set VR64:$dst,
+ (IntId64 VR64:$src1,
+ (bitconvert (memopv8i8 addr:$src2))))]>;
+ let isCommutable = 1 in
def rr128 : SS38I<opc, MRMSrcReg, (outs VR128:$dst),
- (ins VR128:$src),
- !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
- [(set VR128:$dst, (IntId128 VR128:$src))]>,
- OpSize;
-
+ (ins VR128:$src1, VR128:$src2),
+ !if(Is2Addr,
+ !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
+ [(set VR128:$dst, (IntId128 VR128:$src1, VR128:$src2))]>,
+ OpSize;
def rm128 : SS38I<opc, MRMSrcMem, (outs VR128:$dst),
- (ins i128mem:$src),
- !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
- [(set VR128:$dst,
- (IntId128
- (bitconvert (memopv4i32 addr:$src))))]>, OpSize;
+ (ins VR128:$src1, i128mem:$src2),
+ !if(Is2Addr,
+ !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
+ [(set VR128:$dst,
+ (IntId128 VR128:$src1,
+ (bitconvert (memopv16i8 addr:$src2))))]>, OpSize;
}
-defm PABSB : SS3I_unop_rm_int_8 <0x1C, "pabsb",
- int_x86_ssse3_pabs_b,
- int_x86_ssse3_pabs_b_128>;
-defm PABSW : SS3I_unop_rm_int_16<0x1D, "pabsw",
- int_x86_ssse3_pabs_w,
- int_x86_ssse3_pabs_w_128>;
-defm PABSD : SS3I_unop_rm_int_32<0x1E, "pabsd",
- int_x86_ssse3_pabs_d,
- int_x86_ssse3_pabs_d_128>;
-
-/// SS3I_binop_rm_int_8 - Simple SSSE3 binary operator whose type is v*i8.
-let Constraints = "$src1 = $dst" in {
- multiclass SS3I_binop_rm_int_8<bits<8> opc, string OpcodeStr,
- Intrinsic IntId64, Intrinsic IntId128,
- bit Commutable = 0> {
- def rr64 : SS38I<opc, MRMSrcReg, (outs VR64:$dst),
- (ins VR64:$src1, VR64:$src2),
- !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
- [(set VR64:$dst, (IntId64 VR64:$src1, VR64:$src2))]> {
- let isCommutable = Commutable;
- }
- def rm64 : SS38I<opc, MRMSrcMem, (outs VR64:$dst),
- (ins VR64:$src1, i64mem:$src2),
- !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
- [(set VR64:$dst,
- (IntId64 VR64:$src1,
- (bitconvert (memopv8i8 addr:$src2))))]>;
-
- def rr128 : SS38I<opc, MRMSrcReg, (outs VR128:$dst),
- (ins VR128:$src1, VR128:$src2),
- !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
- [(set VR128:$dst, (IntId128 VR128:$src1, VR128:$src2))]>,
- OpSize {
- let isCommutable = Commutable;
- }
- def rm128 : SS38I<opc, MRMSrcMem, (outs VR128:$dst),
- (ins VR128:$src1, i128mem:$src2),
- !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
- [(set VR128:$dst,
- (IntId128 VR128:$src1,
- (bitconvert (memopv16i8 addr:$src2))))]>, OpSize;
- }
+let isAsmParserOnly = 1, Predicates = [HasAVX] in {
+let isCommutable = 0 in {
+ defm VPHADDW : SS3I_binop_rm_int<0x01, "vphaddw", memopv4i16, memopv8i16,
+ int_x86_ssse3_phadd_w,
+ int_x86_ssse3_phadd_w_128, 0>, VEX_4V;
+ defm VPHADDD : SS3I_binop_rm_int<0x02, "vphaddd", memopv2i32, memopv4i32,
+ int_x86_ssse3_phadd_d,
+ int_x86_ssse3_phadd_d_128, 0>, VEX_4V;
+ defm VPHADDSW : SS3I_binop_rm_int<0x03, "vphaddsw", memopv4i16, memopv8i16,
+ int_x86_ssse3_phadd_sw,
+ int_x86_ssse3_phadd_sw_128, 0>, VEX_4V;
+ defm VPHSUBW : SS3I_binop_rm_int<0x05, "vphsubw", memopv4i16, memopv8i16,
+ int_x86_ssse3_phsub_w,
+ int_x86_ssse3_phsub_w_128, 0>, VEX_4V;
+ defm VPHSUBD : SS3I_binop_rm_int<0x06, "vphsubd", memopv2i32, memopv4i32,
+ int_x86_ssse3_phsub_d,
+ int_x86_ssse3_phsub_d_128, 0>, VEX_4V;
+ defm VPHSUBSW : SS3I_binop_rm_int<0x07, "vphsubsw", memopv4i16, memopv8i16,
+ int_x86_ssse3_phsub_sw,
+ int_x86_ssse3_phsub_sw_128, 0>, VEX_4V;
+ defm VPMADDUBSW : SS3I_binop_rm_int<0x04, "vpmaddubsw", memopv8i8, memopv16i8,
+ int_x86_ssse3_pmadd_ub_sw,
+ int_x86_ssse3_pmadd_ub_sw_128, 0>, VEX_4V;
+ defm VPSHUFB : SS3I_binop_rm_int<0x00, "vpshufb", memopv8i8, memopv16i8,
+ int_x86_ssse3_pshuf_b,
+ int_x86_ssse3_pshuf_b_128, 0>, VEX_4V;
+ defm VPSIGNB : SS3I_binop_rm_int<0x08, "vpsignb", memopv8i8, memopv16i8,
+ int_x86_ssse3_psign_b,
+ int_x86_ssse3_psign_b_128, 0>, VEX_4V;
+ defm VPSIGNW : SS3I_binop_rm_int<0x09, "vpsignw", memopv4i16, memopv8i16,
+ int_x86_ssse3_psign_w,
+ int_x86_ssse3_psign_w_128, 0>, VEX_4V;
+ defm VPSIGND : SS3I_binop_rm_int<0x0A, "vpsignd", memopv2i32, memopv4i32,
+ int_x86_ssse3_psign_d,
+ int_x86_ssse3_psign_d_128, 0>, VEX_4V;
}
-
-/// SS3I_binop_rm_int_16 - Simple SSSE3 binary operator whose type is v*i16.
-let Constraints = "$src1 = $dst" in {
- multiclass SS3I_binop_rm_int_16<bits<8> opc, string OpcodeStr,
- Intrinsic IntId64, Intrinsic IntId128,
- bit Commutable = 0> {
- def rr64 : SS38I<opc, MRMSrcReg, (outs VR64:$dst),
- (ins VR64:$src1, VR64:$src2),
- !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
- [(set VR64:$dst, (IntId64 VR64:$src1, VR64:$src2))]> {
- let isCommutable = Commutable;
- }
- def rm64 : SS38I<opc, MRMSrcMem, (outs VR64:$dst),
- (ins VR64:$src1, i64mem:$src2),
- !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
- [(set VR64:$dst,
- (IntId64 VR64:$src1,
- (bitconvert (memopv4i16 addr:$src2))))]>;
-
- def rr128 : SS38I<opc, MRMSrcReg, (outs VR128:$dst),
- (ins VR128:$src1, VR128:$src2),
- !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
- [(set VR128:$dst, (IntId128 VR128:$src1, VR128:$src2))]>,
- OpSize {
- let isCommutable = Commutable;
- }
- def rm128 : SS38I<opc, MRMSrcMem, (outs VR128:$dst),
- (ins VR128:$src1, i128mem:$src2),
- !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
- [(set VR128:$dst,
- (IntId128 VR128:$src1,
- (bitconvert (memopv8i16 addr:$src2))))]>, OpSize;
- }
+defm VPMULHRSW : SS3I_binop_rm_int<0x0B, "vpmulhrsw", memopv4i16, memopv8i16,
+ int_x86_ssse3_pmul_hr_sw,
+ int_x86_ssse3_pmul_hr_sw_128, 0>, VEX_4V;
}
-/// SS3I_binop_rm_int_32 - Simple SSSE3 binary operator whose type is v*i32.
-let Constraints = "$src1 = $dst" in {
- multiclass SS3I_binop_rm_int_32<bits<8> opc, string OpcodeStr,
- Intrinsic IntId64, Intrinsic IntId128,
- bit Commutable = 0> {
- def rr64 : SS38I<opc, MRMSrcReg, (outs VR64:$dst),
- (ins VR64:$src1, VR64:$src2),
- !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
- [(set VR64:$dst, (IntId64 VR64:$src1, VR64:$src2))]> {
- let isCommutable = Commutable;
- }
- def rm64 : SS38I<opc, MRMSrcMem, (outs VR64:$dst),
- (ins VR64:$src1, i64mem:$src2),
- !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
- [(set VR64:$dst,
- (IntId64 VR64:$src1,
- (bitconvert (memopv2i32 addr:$src2))))]>;
-
- def rr128 : SS38I<opc, MRMSrcReg, (outs VR128:$dst),
- (ins VR128:$src1, VR128:$src2),
- !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
- [(set VR128:$dst, (IntId128 VR128:$src1, VR128:$src2))]>,
- OpSize {
- let isCommutable = Commutable;
- }
- def rm128 : SS38I<opc, MRMSrcMem, (outs VR128:$dst),
- (ins VR128:$src1, i128mem:$src2),
- !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
- [(set VR128:$dst,
- (IntId128 VR128:$src1,
- (bitconvert (memopv4i32 addr:$src2))))]>, OpSize;
- }
+// None of these have i8 immediate fields.
+let ImmT = NoImm, Constraints = "$src1 = $dst" in {
+let isCommutable = 0 in {
+ defm PHADDW : SS3I_binop_rm_int<0x01, "phaddw", memopv4i16, memopv8i16,
+ int_x86_ssse3_phadd_w,
+ int_x86_ssse3_phadd_w_128>;
+ defm PHADDD : SS3I_binop_rm_int<0x02, "phaddd", memopv2i32, memopv4i32,
+ int_x86_ssse3_phadd_d,
+ int_x86_ssse3_phadd_d_128>;
+ defm PHADDSW : SS3I_binop_rm_int<0x03, "phaddsw", memopv4i16, memopv8i16,
+ int_x86_ssse3_phadd_sw,
+ int_x86_ssse3_phadd_sw_128>;
+ defm PHSUBW : SS3I_binop_rm_int<0x05, "phsubw", memopv4i16, memopv8i16,
+ int_x86_ssse3_phsub_w,
+ int_x86_ssse3_phsub_w_128>;
+ defm PHSUBD : SS3I_binop_rm_int<0x06, "phsubd", memopv2i32, memopv4i32,
+ int_x86_ssse3_phsub_d,
+ int_x86_ssse3_phsub_d_128>;
+ defm PHSUBSW : SS3I_binop_rm_int<0x07, "phsubsw", memopv4i16, memopv8i16,
+ int_x86_ssse3_phsub_sw,
+ int_x86_ssse3_phsub_sw_128>;
+ defm PMADDUBSW : SS3I_binop_rm_int<0x04, "pmaddubsw", memopv8i8, memopv16i8,
+ int_x86_ssse3_pmadd_ub_sw,
+ int_x86_ssse3_pmadd_ub_sw_128>;
+ defm PSHUFB : SS3I_binop_rm_int<0x00, "pshufb", memopv8i8, memopv16i8,
+ int_x86_ssse3_pshuf_b,
+ int_x86_ssse3_pshuf_b_128>;
+ defm PSIGNB : SS3I_binop_rm_int<0x08, "psignb", memopv8i8, memopv16i8,
+ int_x86_ssse3_psign_b,
+ int_x86_ssse3_psign_b_128>;
+ defm PSIGNW : SS3I_binop_rm_int<0x09, "psignw", memopv4i16, memopv8i16,
+ int_x86_ssse3_psign_w,
+ int_x86_ssse3_psign_w_128>;
+ defm PSIGND : SS3I_binop_rm_int<0x0A, "psignd", memopv2i32, memopv4i32,
+ int_x86_ssse3_psign_d,
+ int_x86_ssse3_psign_d_128>;
+}
+defm PMULHRSW : SS3I_binop_rm_int<0x0B, "pmulhrsw", memopv4i16, memopv8i16,
+ int_x86_ssse3_pmul_hr_sw,
+ int_x86_ssse3_pmul_hr_sw_128>;
}
-let ImmT = NoImm in { // None of these have i8 immediate fields.
-defm PHADDW : SS3I_binop_rm_int_16<0x01, "phaddw",
- int_x86_ssse3_phadd_w,
- int_x86_ssse3_phadd_w_128>;
-defm PHADDD : SS3I_binop_rm_int_32<0x02, "phaddd",
- int_x86_ssse3_phadd_d,
- int_x86_ssse3_phadd_d_128>;
-defm PHADDSW : SS3I_binop_rm_int_16<0x03, "phaddsw",
- int_x86_ssse3_phadd_sw,
- int_x86_ssse3_phadd_sw_128>;
-defm PHSUBW : SS3I_binop_rm_int_16<0x05, "phsubw",
- int_x86_ssse3_phsub_w,
- int_x86_ssse3_phsub_w_128>;
-defm PHSUBD : SS3I_binop_rm_int_32<0x06, "phsubd",
- int_x86_ssse3_phsub_d,
- int_x86_ssse3_phsub_d_128>;
-defm PHSUBSW : SS3I_binop_rm_int_16<0x07, "phsubsw",
- int_x86_ssse3_phsub_sw,
- int_x86_ssse3_phsub_sw_128>;
-defm PMADDUBSW : SS3I_binop_rm_int_8 <0x04, "pmaddubsw",
- int_x86_ssse3_pmadd_ub_sw,
- int_x86_ssse3_pmadd_ub_sw_128>;
-defm PMULHRSW : SS3I_binop_rm_int_16<0x0B, "pmulhrsw",
- int_x86_ssse3_pmul_hr_sw,
- int_x86_ssse3_pmul_hr_sw_128, 1>;
-
-defm PSHUFB : SS3I_binop_rm_int_8 <0x00, "pshufb",
- int_x86_ssse3_pshuf_b,
- int_x86_ssse3_pshuf_b_128>;
-defm PSIGNB : SS3I_binop_rm_int_8 <0x08, "psignb",
- int_x86_ssse3_psign_b,
- int_x86_ssse3_psign_b_128>;
-defm PSIGNW : SS3I_binop_rm_int_16<0x09, "psignw",
- int_x86_ssse3_psign_w,
- int_x86_ssse3_psign_w_128>;
-defm PSIGND : SS3I_binop_rm_int_32<0x0A, "psignd",
- int_x86_ssse3_psign_d,
- int_x86_ssse3_psign_d_128>;
-}
-
-// palignr patterns.
-let Constraints = "$src1 = $dst" in {
- def PALIGNR64rr : SS3AI<0x0F, MRMSrcReg, (outs VR64:$dst),
- (ins VR64:$src1, VR64:$src2, i8imm:$src3),
- "palignr\t{$src3, $src2, $dst|$dst, $src2, $src3}",
- []>;
- def PALIGNR64rm : SS3AI<0x0F, MRMSrcMem, (outs VR64:$dst),
- (ins VR64:$src1, i64mem:$src2, i8imm:$src3),
- "palignr\t{$src3, $src2, $dst|$dst, $src2, $src3}",
- []>;
-
- def PALIGNR128rr : SS3AI<0x0F, MRMSrcReg, (outs VR128:$dst),
- (ins VR128:$src1, VR128:$src2, i8imm:$src3),
- "palignr\t{$src3, $src2, $dst|$dst, $src2, $src3}",
- []>, OpSize;
- def PALIGNR128rm : SS3AI<0x0F, MRMSrcMem, (outs VR128:$dst),
- (ins VR128:$src1, i128mem:$src2, i8imm:$src3),
- "palignr\t{$src3, $src2, $dst|$dst, $src2, $src3}",
- []>, OpSize;
+def : Pat<(X86pshufb VR128:$src, VR128:$mask),
+ (PSHUFBrr128 VR128:$src, VR128:$mask)>, Requires<[HasSSSE3]>;
+def : Pat<(X86pshufb VR128:$src, (bc_v16i8 (memopv2i64 addr:$mask))),
+ (PSHUFBrm128 VR128:$src, addr:$mask)>, Requires<[HasSSSE3]>;
+
+//===---------------------------------------------------------------------===//
+// SSSE3 - Packed Align Instruction Patterns
+//===---------------------------------------------------------------------===//
+
+multiclass sse3_palign<string asm, bit Is2Addr = 1> {
+ def R64rr : SS3AI<0x0F, MRMSrcReg, (outs VR64:$dst),
+ (ins VR64:$src1, VR64:$src2, i8imm:$src3),
+ !if(Is2Addr,
+ !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
+ !strconcat(asm,
+ "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
+ []>;
+ def R64rm : SS3AI<0x0F, MRMSrcMem, (outs VR64:$dst),
+ (ins VR64:$src1, i64mem:$src2, i8imm:$src3),
+ !if(Is2Addr,
+ !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
+ !strconcat(asm,
+ "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
+ []>;
+
+ def R128rr : SS3AI<0x0F, MRMSrcReg, (outs VR128:$dst),
+ (ins VR128:$src1, VR128:$src2, i8imm:$src3),
+ !if(Is2Addr,
+ !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
+ !strconcat(asm,
+ "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
+ []>, OpSize;
+ def R128rm : SS3AI<0x0F, MRMSrcMem, (outs VR128:$dst),
+ (ins VR128:$src1, i128mem:$src2, i8imm:$src3),
+ !if(Is2Addr,
+ !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
+ !strconcat(asm,
+ "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
+ []>, OpSize;
}
+let isAsmParserOnly = 1, Predicates = [HasAVX] in
+ defm VPALIGN : sse3_palign<"vpalignr", 0>, VEX_4V;
+let Constraints = "$src1 = $dst" in
+ defm PALIGN : sse3_palign<"palignr">;
+
let AddedComplexity = 5 in {
def : Pat<(v1i64 (palign:$src3 VR64:$src1, VR64:$src2)),
@@ -2996,10 +3355,6 @@ def : Pat<(v2i32 (palign:$src3 VR64:$src1, VR64:$src2)),
(PALIGNR64rr VR64:$src2, VR64:$src1,
(SHUFFLE_get_palign_imm VR64:$src3))>,
Requires<[HasSSSE3]>;
-def : Pat<(v2f32 (palign:$src3 VR64:$src1, VR64:$src2)),
- (PALIGNR64rr VR64:$src2, VR64:$src1,
- (SHUFFLE_get_palign_imm VR64:$src3))>,
- Requires<[HasSSSE3]>;
def : Pat<(v4i16 (palign:$src3 VR64:$src1, VR64:$src2)),
(PALIGNR64rr VR64:$src2, VR64:$src1,
(SHUFFLE_get_palign_imm VR64:$src3))>,
@@ -3027,10 +3382,15 @@ def : Pat<(v16i8 (palign:$src3 VR128:$src1, VR128:$src2)),
Requires<[HasSSSE3]>;
}
-def : Pat<(X86pshufb VR128:$src, VR128:$mask),
- (PSHUFBrr128 VR128:$src, VR128:$mask)>, Requires<[HasSSSE3]>;
-def : Pat<(X86pshufb VR128:$src, (bc_v16i8 (memopv2i64 addr:$mask))),
- (PSHUFBrm128 VR128:$src, addr:$mask)>, Requires<[HasSSSE3]>;
+//===---------------------------------------------------------------------===//
+// SSSE3 Misc Instructions
+//===---------------------------------------------------------------------===//
+
+// Thread synchronization
+def MONITOR : I<0x01, MRM_C8, (outs), (ins), "monitor",
+ [(int_x86_sse3_monitor EAX, ECX, EDX)]>,TB, Requires<[HasSSE3]>;
+def MWAIT : I<0x01, MRM_C9, (outs), (ins), "mwait",
+ [(int_x86_sse3_mwait ECX, EAX)]>, TB, Requires<[HasSSE3]>;
//===---------------------------------------------------------------------===//
// Non-Instruction Patterns
@@ -3311,287 +3671,9 @@ def : Pat<(store (v16i8 VR128:$src), addr:$dst),
(MOVUPSmr addr:$dst, VR128:$src)>;
//===----------------------------------------------------------------------===//
-// SSE4.1 Instructions
+// SSE4.1 - Packed Move with Sign/Zero Extend
//===----------------------------------------------------------------------===//
-multiclass sse41_fp_unop_rm<bits<8> opcps, bits<8> opcpd,
- string OpcodeStr,
- Intrinsic V4F32Int,
- Intrinsic V2F64Int> {
- // Intrinsic operation, reg.
- // Vector intrinsic operation, reg
- def PSr_Int : SS4AIi8<opcps, MRMSrcReg,
- (outs VR128:$dst), (ins VR128:$src1, i32i8imm:$src2),
- !strconcat(OpcodeStr,
- "ps\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- [(set VR128:$dst, (V4F32Int VR128:$src1, imm:$src2))]>,
- OpSize;
-
- // Vector intrinsic operation, mem
- def PSm_Int : Ii8<opcps, MRMSrcMem,
- (outs VR128:$dst), (ins f128mem:$src1, i32i8imm:$src2),
- !strconcat(OpcodeStr,
- "ps\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- [(set VR128:$dst,
- (V4F32Int (memopv4f32 addr:$src1),imm:$src2))]>,
- TA, OpSize,
- Requires<[HasSSE41]>;
-
- // Vector intrinsic operation, reg
- def PDr_Int : SS4AIi8<opcpd, MRMSrcReg,
- (outs VR128:$dst), (ins VR128:$src1, i32i8imm:$src2),
- !strconcat(OpcodeStr,
- "pd\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- [(set VR128:$dst, (V2F64Int VR128:$src1, imm:$src2))]>,
- OpSize;
-
- // Vector intrinsic operation, mem
- def PDm_Int : SS4AIi8<opcpd, MRMSrcMem,
- (outs VR128:$dst), (ins f128mem:$src1, i32i8imm:$src2),
- !strconcat(OpcodeStr,
- "pd\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- [(set VR128:$dst,
- (V2F64Int (memopv2f64 addr:$src1),imm:$src2))]>,
- OpSize;
-}
-
-let Constraints = "$src1 = $dst" in {
-multiclass sse41_fp_binop_rm<bits<8> opcss, bits<8> opcsd,
- string OpcodeStr,
- Intrinsic F32Int,
- Intrinsic F64Int> {
- // Intrinsic operation, reg.
- def SSr_Int : SS4AIi8<opcss, MRMSrcReg,
- (outs VR128:$dst),
- (ins VR128:$src1, VR128:$src2, i32i8imm:$src3),
- !strconcat(OpcodeStr,
- "ss\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
- [(set VR128:$dst,
- (F32Int VR128:$src1, VR128:$src2, imm:$src3))]>,
- OpSize;
-
- // Intrinsic operation, mem.
- def SSm_Int : SS4AIi8<opcss, MRMSrcMem,
- (outs VR128:$dst),
- (ins VR128:$src1, ssmem:$src2, i32i8imm:$src3),
- !strconcat(OpcodeStr,
- "ss\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
- [(set VR128:$dst,
- (F32Int VR128:$src1, sse_load_f32:$src2, imm:$src3))]>,
- OpSize;
-
- // Intrinsic operation, reg.
- def SDr_Int : SS4AIi8<opcsd, MRMSrcReg,
- (outs VR128:$dst),
- (ins VR128:$src1, VR128:$src2, i32i8imm:$src3),
- !strconcat(OpcodeStr,
- "sd\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
- [(set VR128:$dst,
- (F64Int VR128:$src1, VR128:$src2, imm:$src3))]>,
- OpSize;
-
- // Intrinsic operation, mem.
- def SDm_Int : SS4AIi8<opcsd, MRMSrcMem,
- (outs VR128:$dst),
- (ins VR128:$src1, sdmem:$src2, i32i8imm:$src3),
- !strconcat(OpcodeStr,
- "sd\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
- [(set VR128:$dst,
- (F64Int VR128:$src1, sse_load_f64:$src2, imm:$src3))]>,
- OpSize;
-}
-}
-
-// FP round - roundss, roundps, roundsd, roundpd
-defm ROUND : sse41_fp_unop_rm<0x08, 0x09, "round",
- int_x86_sse41_round_ps, int_x86_sse41_round_pd>;
-defm ROUND : sse41_fp_binop_rm<0x0A, 0x0B, "round",
- int_x86_sse41_round_ss, int_x86_sse41_round_sd>;
-
-// SS41I_unop_rm_int_v16 - SSE 4.1 unary operator whose type is v8i16.
-multiclass SS41I_unop_rm_int_v16<bits<8> opc, string OpcodeStr,
- Intrinsic IntId128> {
- def rr128 : SS48I<opc, MRMSrcReg, (outs VR128:$dst),
- (ins VR128:$src),
- !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
- [(set VR128:$dst, (IntId128 VR128:$src))]>, OpSize;
- def rm128 : SS48I<opc, MRMSrcMem, (outs VR128:$dst),
- (ins i128mem:$src),
- !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
- [(set VR128:$dst,
- (IntId128
- (bitconvert (memopv8i16 addr:$src))))]>, OpSize;
-}
-
-defm PHMINPOSUW : SS41I_unop_rm_int_v16 <0x41, "phminposuw",
- int_x86_sse41_phminposuw>;
-
-/// SS41I_binop_rm_int - Simple SSE 4.1 binary operator
-let Constraints = "$src1 = $dst" in {
- multiclass SS41I_binop_rm_int<bits<8> opc, string OpcodeStr,
- Intrinsic IntId128, bit Commutable = 0> {
- def rr : SS48I<opc, MRMSrcReg, (outs VR128:$dst),
- (ins VR128:$src1, VR128:$src2),
- !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
- [(set VR128:$dst, (IntId128 VR128:$src1, VR128:$src2))]>,
- OpSize {
- let isCommutable = Commutable;
- }
- def rm : SS48I<opc, MRMSrcMem, (outs VR128:$dst),
- (ins VR128:$src1, i128mem:$src2),
- !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
- [(set VR128:$dst,
- (IntId128 VR128:$src1,
- (bitconvert (memopv16i8 addr:$src2))))]>, OpSize;
- }
-}
-
-defm PCMPEQQ : SS41I_binop_rm_int<0x29, "pcmpeqq",
- int_x86_sse41_pcmpeqq, 1>;
-defm PACKUSDW : SS41I_binop_rm_int<0x2B, "packusdw",
- int_x86_sse41_packusdw, 0>;
-defm PMINSB : SS41I_binop_rm_int<0x38, "pminsb",
- int_x86_sse41_pminsb, 1>;
-defm PMINSD : SS41I_binop_rm_int<0x39, "pminsd",
- int_x86_sse41_pminsd, 1>;
-defm PMINUD : SS41I_binop_rm_int<0x3B, "pminud",
- int_x86_sse41_pminud, 1>;
-defm PMINUW : SS41I_binop_rm_int<0x3A, "pminuw",
- int_x86_sse41_pminuw, 1>;
-defm PMAXSB : SS41I_binop_rm_int<0x3C, "pmaxsb",
- int_x86_sse41_pmaxsb, 1>;
-defm PMAXSD : SS41I_binop_rm_int<0x3D, "pmaxsd",
- int_x86_sse41_pmaxsd, 1>;
-defm PMAXUD : SS41I_binop_rm_int<0x3F, "pmaxud",
- int_x86_sse41_pmaxud, 1>;
-defm PMAXUW : SS41I_binop_rm_int<0x3E, "pmaxuw",
- int_x86_sse41_pmaxuw, 1>;
-
-defm PMULDQ : SS41I_binop_rm_int<0x28, "pmuldq", int_x86_sse41_pmuldq, 1>;
-
-def : Pat<(v2i64 (X86pcmpeqq VR128:$src1, VR128:$src2)),
- (PCMPEQQrr VR128:$src1, VR128:$src2)>;
-def : Pat<(v2i64 (X86pcmpeqq VR128:$src1, (memop addr:$src2))),
- (PCMPEQQrm VR128:$src1, addr:$src2)>;
-
-/// SS41I_binop_rm_int - Simple SSE 4.1 binary operator
-let Constraints = "$src1 = $dst" in {
- multiclass SS41I_binop_patint<bits<8> opc, string OpcodeStr, ValueType OpVT,
- SDNode OpNode, Intrinsic IntId128,
- bit Commutable = 0> {
- def rr : SS48I<opc, MRMSrcReg, (outs VR128:$dst),
- (ins VR128:$src1, VR128:$src2),
- !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
- [(set VR128:$dst, (OpNode (OpVT VR128:$src1),
- VR128:$src2))]>, OpSize {
- let isCommutable = Commutable;
- }
- def rr_int : SS48I<opc, MRMSrcReg, (outs VR128:$dst),
- (ins VR128:$src1, VR128:$src2),
- !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
- [(set VR128:$dst, (IntId128 VR128:$src1, VR128:$src2))]>,
- OpSize {
- let isCommutable = Commutable;
- }
- def rm : SS48I<opc, MRMSrcMem, (outs VR128:$dst),
- (ins VR128:$src1, i128mem:$src2),
- !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
- [(set VR128:$dst,
- (OpVT (OpNode VR128:$src1, (memop addr:$src2))))]>, OpSize;
- def rm_int : SS48I<opc, MRMSrcMem, (outs VR128:$dst),
- (ins VR128:$src1, i128mem:$src2),
- !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
- [(set VR128:$dst,
- (IntId128 VR128:$src1, (memop addr:$src2)))]>,
- OpSize;
- }
-}
-
-/// SS48I_binop_rm - Simple SSE41 binary operator.
-let Constraints = "$src1 = $dst" in {
-multiclass SS48I_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
- ValueType OpVT, bit Commutable = 0> {
- def rr : SS48I<opc, MRMSrcReg, (outs VR128:$dst),
- (ins VR128:$src1, VR128:$src2),
- !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
- [(set VR128:$dst, (OpVT (OpNode VR128:$src1, VR128:$src2)))]>,
- OpSize {
- let isCommutable = Commutable;
- }
- def rm : SS48I<opc, MRMSrcMem, (outs VR128:$dst),
- (ins VR128:$src1, i128mem:$src2),
- !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
- [(set VR128:$dst, (OpNode VR128:$src1,
- (bc_v4i32 (memopv2i64 addr:$src2))))]>,
- OpSize;
-}
-}
-
-defm PMULLD : SS48I_binop_rm<0x40, "pmulld", mul, v4i32, 1>;
-
-/// SS41I_binop_rmi_int - SSE 4.1 binary operator with 8-bit immediate
-let Constraints = "$src1 = $dst" in {
- multiclass SS41I_binop_rmi_int<bits<8> opc, string OpcodeStr,
- Intrinsic IntId128, bit Commutable = 0> {
- def rri : SS4AIi8<opc, MRMSrcReg, (outs VR128:$dst),
- (ins VR128:$src1, VR128:$src2, i32i8imm:$src3),
- !strconcat(OpcodeStr,
- "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
- [(set VR128:$dst,
- (IntId128 VR128:$src1, VR128:$src2, imm:$src3))]>,
- OpSize {
- let isCommutable = Commutable;
- }
- def rmi : SS4AIi8<opc, MRMSrcMem, (outs VR128:$dst),
- (ins VR128:$src1, i128mem:$src2, i32i8imm:$src3),
- !strconcat(OpcodeStr,
- "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
- [(set VR128:$dst,
- (IntId128 VR128:$src1,
- (bitconvert (memopv16i8 addr:$src2)), imm:$src3))]>,
- OpSize;
- }
-}
-
-defm BLENDPS : SS41I_binop_rmi_int<0x0C, "blendps",
- int_x86_sse41_blendps, 0>;
-defm BLENDPD : SS41I_binop_rmi_int<0x0D, "blendpd",
- int_x86_sse41_blendpd, 0>;
-defm PBLENDW : SS41I_binop_rmi_int<0x0E, "pblendw",
- int_x86_sse41_pblendw, 0>;
-defm DPPS : SS41I_binop_rmi_int<0x40, "dpps",
- int_x86_sse41_dpps, 1>;
-defm DPPD : SS41I_binop_rmi_int<0x41, "dppd",
- int_x86_sse41_dppd, 1>;
-defm MPSADBW : SS41I_binop_rmi_int<0x42, "mpsadbw",
- int_x86_sse41_mpsadbw, 0>;
-
-
-/// SS41I_ternary_int - SSE 4.1 ternary operator
-let Uses = [XMM0], Constraints = "$src1 = $dst" in {
- multiclass SS41I_ternary_int<bits<8> opc, string OpcodeStr, Intrinsic IntId> {
- def rr0 : SS48I<opc, MRMSrcReg, (outs VR128:$dst),
- (ins VR128:$src1, VR128:$src2),
- !strconcat(OpcodeStr,
- "\t{%xmm0, $src2, $dst|$dst, $src2, %xmm0}"),
- [(set VR128:$dst, (IntId VR128:$src1, VR128:$src2, XMM0))]>,
- OpSize;
-
- def rm0 : SS48I<opc, MRMSrcMem, (outs VR128:$dst),
- (ins VR128:$src1, i128mem:$src2),
- !strconcat(OpcodeStr,
- "\t{%xmm0, $src2, $dst|$dst, $src2, %xmm0}"),
- [(set VR128:$dst,
- (IntId VR128:$src1,
- (bitconvert (memopv16i8 addr:$src2)), XMM0))]>, OpSize;
- }
-}
-
-defm BLENDVPD : SS41I_ternary_int<0x15, "blendvpd", int_x86_sse41_blendvpd>;
-defm BLENDVPS : SS41I_ternary_int<0x14, "blendvps", int_x86_sse41_blendvps>;
-defm PBLENDVB : SS41I_ternary_int<0x10, "pblendvb", int_x86_sse41_pblendvb>;
-
-
multiclass SS41I_binop_rm_int8<bits<8> opc, string OpcodeStr, Intrinsic IntId> {
def rr : SS48I<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
@@ -3604,6 +3686,21 @@ multiclass SS41I_binop_rm_int8<bits<8> opc, string OpcodeStr, Intrinsic IntId> {
OpSize;
}
+let isAsmParserOnly = 1, Predicates = [HasAVX] in {
+defm VPMOVSXBW : SS41I_binop_rm_int8<0x20, "vpmovsxbw", int_x86_sse41_pmovsxbw>,
+ VEX;
+defm VPMOVSXWD : SS41I_binop_rm_int8<0x23, "vpmovsxwd", int_x86_sse41_pmovsxwd>,
+ VEX;
+defm VPMOVSXDQ : SS41I_binop_rm_int8<0x25, "vpmovsxdq", int_x86_sse41_pmovsxdq>,
+ VEX;
+defm VPMOVZXBW : SS41I_binop_rm_int8<0x30, "vpmovzxbw", int_x86_sse41_pmovzxbw>,
+ VEX;
+defm VPMOVZXWD : SS41I_binop_rm_int8<0x33, "vpmovzxwd", int_x86_sse41_pmovzxwd>,
+ VEX;
+defm VPMOVZXDQ : SS41I_binop_rm_int8<0x35, "vpmovzxdq", int_x86_sse41_pmovzxdq>,
+ VEX;
+}
+
defm PMOVSXBW : SS41I_binop_rm_int8<0x20, "pmovsxbw", int_x86_sse41_pmovsxbw>;
defm PMOVSXWD : SS41I_binop_rm_int8<0x23, "pmovsxwd", int_x86_sse41_pmovsxwd>;
defm PMOVSXDQ : SS41I_binop_rm_int8<0x25, "pmovsxdq", int_x86_sse41_pmovsxdq>;
@@ -3655,6 +3752,17 @@ multiclass SS41I_binop_rm_int4<bits<8> opc, string OpcodeStr, Intrinsic IntId> {
OpSize;
}
+let isAsmParserOnly = 1, Predicates = [HasAVX] in {
+defm VPMOVSXBD : SS41I_binop_rm_int4<0x21, "vpmovsxbd", int_x86_sse41_pmovsxbd>,
+ VEX;
+defm VPMOVSXWQ : SS41I_binop_rm_int4<0x24, "vpmovsxwq", int_x86_sse41_pmovsxwq>,
+ VEX;
+defm VPMOVZXBD : SS41I_binop_rm_int4<0x31, "vpmovzxbd", int_x86_sse41_pmovzxbd>,
+ VEX;
+defm VPMOVZXWQ : SS41I_binop_rm_int4<0x34, "vpmovzxwq", int_x86_sse41_pmovzxwq>,
+ VEX;
+}
+
defm PMOVSXBD : SS41I_binop_rm_int4<0x21, "pmovsxbd", int_x86_sse41_pmovsxbd>;
defm PMOVSXWQ : SS41I_binop_rm_int4<0x24, "pmovsxwq", int_x86_sse41_pmovsxwq>;
defm PMOVZXBD : SS41I_binop_rm_int4<0x31, "pmovzxbd", int_x86_sse41_pmovzxbd>;
@@ -3685,6 +3793,12 @@ multiclass SS41I_binop_rm_int2<bits<8> opc, string OpcodeStr, Intrinsic IntId> {
OpSize;
}
+let isAsmParserOnly = 1, Predicates = [HasAVX] in {
+defm VPMOVSXBQ : SS41I_binop_rm_int2<0x22, "vpmovsxbq", int_x86_sse41_pmovsxbq>,
+ VEX;
+defm VPMOVZXBQ : SS41I_binop_rm_int2<0x32, "vpmovzxbq", int_x86_sse41_pmovzxbq>,
+ VEX;
+}
defm PMOVSXBQ : SS41I_binop_rm_int2<0x22, "pmovsxbq", int_x86_sse41_pmovsxbq>;
defm PMOVZXBQ : SS41I_binop_rm_int2<0x32, "pmovzxbq", int_x86_sse41_pmovzxbq>;
@@ -3699,6 +3813,9 @@ def : Pat<(int_x86_sse41_pmovzxbq
(v4i32 (scalar_to_vector (loadi32 addr:$src))))))),
(PMOVZXBQrm addr:$src)>, Requires<[HasSSE41]>;
+//===----------------------------------------------------------------------===//
+// SSE4.1 - Extract Instructions
+//===----------------------------------------------------------------------===//
/// SS41I_binop_ext8 - SSE 4.1 extract 8 bits to 32 bit reg or 8 bit mem
multiclass SS41I_extract8<bits<8> opc, string OpcodeStr> {
@@ -3718,6 +3835,9 @@ multiclass SS41I_extract8<bits<8> opc, string OpcodeStr> {
// (store (i8 (trunc (X86pextrb (v16i8 VR128:$src1), imm:$src2))), addr:$dst)
}
+let isAsmParserOnly = 1, Predicates = [HasAVX] in
+ defm VPEXTRB : SS41I_extract8<0x14, "vpextrb">, VEX;
+
defm PEXTRB : SS41I_extract8<0x14, "pextrb">;
@@ -3733,6 +3853,9 @@ multiclass SS41I_extract16<bits<8> opc, string OpcodeStr> {
// (store (i16 (trunc (X86pextrw (v16i8 VR128:$src1), imm:$src2))), addr:$dst)
}
+let isAsmParserOnly = 1, Predicates = [HasAVX] in
+ defm VPEXTRW : SS41I_extract16<0x15, "vpextrw">, VEX;
+
defm PEXTRW : SS41I_extract16<0x15, "pextrw">;
@@ -3752,8 +3875,31 @@ multiclass SS41I_extract32<bits<8> opc, string OpcodeStr> {
addr:$dst)]>, OpSize;
}
+let isAsmParserOnly = 1, Predicates = [HasAVX] in
+ defm VPEXTRD : SS41I_extract32<0x16, "vpextrd">, VEX;
+
defm PEXTRD : SS41I_extract32<0x16, "pextrd">;
+/// SS41I_extract32 - SSE 4.1 extract 32 bits to int reg or memory destination
+multiclass SS41I_extract64<bits<8> opc, string OpcodeStr> {
+ def rr : SS4AIi8<opc, MRMDestReg, (outs GR64:$dst),
+ (ins VR128:$src1, i32i8imm:$src2),
+ !strconcat(OpcodeStr,
+ "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(set GR64:$dst,
+ (extractelt (v2i64 VR128:$src1), imm:$src2))]>, OpSize, REX_W;
+ def mr : SS4AIi8<opc, MRMDestMem, (outs),
+ (ins i64mem:$dst, VR128:$src1, i32i8imm:$src2),
+ !strconcat(OpcodeStr,
+ "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(store (extractelt (v2i64 VR128:$src1), imm:$src2),
+ addr:$dst)]>, OpSize, REX_W;
+}
+
+let isAsmParserOnly = 1, Predicates = [HasAVX] in
+ defm VPEXTRQ : SS41I_extract64<0x16, "vpextrq">, VEX, VEX_W;
+
+defm PEXTRQ : SS41I_extract64<0x16, "pextrq">;
/// SS41I_extractf32 - SSE 4.1 extract 32 bits fp value to int reg or memory
/// destination
@@ -3773,6 +3919,8 @@ multiclass SS41I_extractf32<bits<8> opc, string OpcodeStr> {
addr:$dst)]>, OpSize;
}
+let isAsmParserOnly = 1, Predicates = [HasAVX] in
+ defm VEXTRACTPS : SS41I_extractf32<0x17, "vextractps">, VEX;
defm EXTRACTPS : SS41I_extractf32<0x17, "extractps">;
// Also match an EXTRACTPS store when the store is done as f32 instead of i32.
@@ -3782,78 +3930,530 @@ def : Pat<(store (f32 (bitconvert (extractelt (bc_v4i32 (v4f32 VR128:$src1)),
(EXTRACTPSmr addr:$dst, VR128:$src1, imm:$src2)>,
Requires<[HasSSE41]>;
-let Constraints = "$src1 = $dst" in {
- multiclass SS41I_insert8<bits<8> opc, string OpcodeStr> {
- def rr : SS4AIi8<opc, MRMSrcReg, (outs VR128:$dst),
- (ins VR128:$src1, GR32:$src2, i32i8imm:$src3),
- !strconcat(OpcodeStr,
- "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
- [(set VR128:$dst,
- (X86pinsrb VR128:$src1, GR32:$src2, imm:$src3))]>, OpSize;
- def rm : SS4AIi8<opc, MRMSrcMem, (outs VR128:$dst),
- (ins VR128:$src1, i8mem:$src2, i32i8imm:$src3),
- !strconcat(OpcodeStr,
- "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
- [(set VR128:$dst,
- (X86pinsrb VR128:$src1, (extloadi8 addr:$src2),
- imm:$src3))]>, OpSize;
- }
+//===----------------------------------------------------------------------===//
+// SSE4.1 - Insert Instructions
+//===----------------------------------------------------------------------===//
+
+multiclass SS41I_insert8<bits<8> opc, string asm, bit Is2Addr = 1> {
+ def rr : SS4AIi8<opc, MRMSrcReg, (outs VR128:$dst),
+ (ins VR128:$src1, GR32:$src2, i32i8imm:$src3),
+ !if(Is2Addr,
+ !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
+ !strconcat(asm,
+ "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
+ [(set VR128:$dst,
+ (X86pinsrb VR128:$src1, GR32:$src2, imm:$src3))]>, OpSize;
+ def rm : SS4AIi8<opc, MRMSrcMem, (outs VR128:$dst),
+ (ins VR128:$src1, i8mem:$src2, i32i8imm:$src3),
+ !if(Is2Addr,
+ !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
+ !strconcat(asm,
+ "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
+ [(set VR128:$dst,
+ (X86pinsrb VR128:$src1, (extloadi8 addr:$src2),
+ imm:$src3))]>, OpSize;
}
-defm PINSRB : SS41I_insert8<0x20, "pinsrb">;
+let isAsmParserOnly = 1, Predicates = [HasAVX] in
+ defm VPINSRB : SS41I_insert8<0x20, "vpinsrb", 0>, VEX_4V;
+let Constraints = "$src1 = $dst" in
+ defm PINSRB : SS41I_insert8<0x20, "pinsrb">;
+
+multiclass SS41I_insert32<bits<8> opc, string asm, bit Is2Addr = 1> {
+ def rr : SS4AIi8<opc, MRMSrcReg, (outs VR128:$dst),
+ (ins VR128:$src1, GR32:$src2, i32i8imm:$src3),
+ !if(Is2Addr,
+ !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
+ !strconcat(asm,
+ "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
+ [(set VR128:$dst,
+ (v4i32 (insertelt VR128:$src1, GR32:$src2, imm:$src3)))]>,
+ OpSize;
+ def rm : SS4AIi8<opc, MRMSrcMem, (outs VR128:$dst),
+ (ins VR128:$src1, i32mem:$src2, i32i8imm:$src3),
+ !if(Is2Addr,
+ !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
+ !strconcat(asm,
+ "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
+ [(set VR128:$dst,
+ (v4i32 (insertelt VR128:$src1, (loadi32 addr:$src2),
+ imm:$src3)))]>, OpSize;
+}
-let Constraints = "$src1 = $dst" in {
- multiclass SS41I_insert32<bits<8> opc, string OpcodeStr> {
- def rr : SS4AIi8<opc, MRMSrcReg, (outs VR128:$dst),
- (ins VR128:$src1, GR32:$src2, i32i8imm:$src3),
- !strconcat(OpcodeStr,
- "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
- [(set VR128:$dst,
- (v4i32 (insertelt VR128:$src1, GR32:$src2, imm:$src3)))]>,
- OpSize;
- def rm : SS4AIi8<opc, MRMSrcMem, (outs VR128:$dst),
- (ins VR128:$src1, i32mem:$src2, i32i8imm:$src3),
- !strconcat(OpcodeStr,
- "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
- [(set VR128:$dst,
- (v4i32 (insertelt VR128:$src1, (loadi32 addr:$src2),
- imm:$src3)))]>, OpSize;
- }
+let isAsmParserOnly = 1, Predicates = [HasAVX] in
+ defm VPINSRD : SS41I_insert32<0x22, "vpinsrd", 0>, VEX_4V;
+let Constraints = "$src1 = $dst" in
+ defm PINSRD : SS41I_insert32<0x22, "pinsrd">;
+
+multiclass SS41I_insert64<bits<8> opc, string asm, bit Is2Addr = 1> {
+ def rr : SS4AIi8<opc, MRMSrcReg, (outs VR128:$dst),
+ (ins VR128:$src1, GR64:$src2, i32i8imm:$src3),
+ !if(Is2Addr,
+ !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
+ !strconcat(asm,
+ "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
+ [(set VR128:$dst,
+ (v2i64 (insertelt VR128:$src1, GR64:$src2, imm:$src3)))]>,
+ OpSize;
+ def rm : SS4AIi8<opc, MRMSrcMem, (outs VR128:$dst),
+ (ins VR128:$src1, i64mem:$src2, i32i8imm:$src3),
+ !if(Is2Addr,
+ !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
+ !strconcat(asm,
+ "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
+ [(set VR128:$dst,
+ (v2i64 (insertelt VR128:$src1, (loadi64 addr:$src2),
+ imm:$src3)))]>, OpSize;
}
-defm PINSRD : SS41I_insert32<0x22, "pinsrd">;
+let isAsmParserOnly = 1, Predicates = [HasAVX] in
+ defm VPINSRQ : SS41I_insert64<0x22, "vpinsrq", 0>, VEX_4V, VEX_W;
+let Constraints = "$src1 = $dst" in
+ defm PINSRQ : SS41I_insert64<0x22, "pinsrq">, REX_W;
// insertps has a few different modes, there's the first two here below which
// are optimized inserts that won't zero arbitrary elements in the destination
// vector. The next one matches the intrinsic and could zero arbitrary elements
// in the target vector.
-let Constraints = "$src1 = $dst" in {
- multiclass SS41I_insertf32<bits<8> opc, string OpcodeStr> {
- def rr : SS4AIi8<opc, MRMSrcReg, (outs VR128:$dst),
- (ins VR128:$src1, VR128:$src2, i32i8imm:$src3),
- !strconcat(OpcodeStr,
- "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
- [(set VR128:$dst,
- (X86insrtps VR128:$src1, VR128:$src2, imm:$src3))]>,
+multiclass SS41I_insertf32<bits<8> opc, string asm, bit Is2Addr = 1> {
+ def rr : SS4AIi8<opc, MRMSrcReg, (outs VR128:$dst),
+ (ins VR128:$src1, VR128:$src2, i32i8imm:$src3),
+ !if(Is2Addr,
+ !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
+ !strconcat(asm,
+ "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
+ [(set VR128:$dst,
+ (X86insrtps VR128:$src1, VR128:$src2, imm:$src3))]>,
OpSize;
- def rm : SS4AIi8<opc, MRMSrcMem, (outs VR128:$dst),
- (ins VR128:$src1, f32mem:$src2, i32i8imm:$src3),
- !strconcat(OpcodeStr,
- "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
- [(set VR128:$dst,
- (X86insrtps VR128:$src1,
- (v4f32 (scalar_to_vector (loadf32 addr:$src2))),
- imm:$src3))]>, OpSize;
- }
+ def rm : SS4AIi8<opc, MRMSrcMem, (outs VR128:$dst),
+ (ins VR128:$src1, f32mem:$src2, i32i8imm:$src3),
+ !if(Is2Addr,
+ !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
+ !strconcat(asm,
+ "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
+ [(set VR128:$dst,
+ (X86insrtps VR128:$src1,
+ (v4f32 (scalar_to_vector (loadf32 addr:$src2))),
+ imm:$src3))]>, OpSize;
}
-defm INSERTPS : SS41I_insertf32<0x21, "insertps">;
+let Constraints = "$src1 = $dst" in
+ defm INSERTPS : SS41I_insertf32<0x21, "insertps">;
+let isAsmParserOnly = 1, Predicates = [HasAVX] in
+ defm VINSERTPS : SS41I_insertf32<0x21, "vinsertps", 0>, VEX_4V;
def : Pat<(int_x86_sse41_insertps VR128:$src1, VR128:$src2, imm:$src3),
(INSERTPSrr VR128:$src1, VR128:$src2, imm:$src3)>;
+//===----------------------------------------------------------------------===//
+// SSE4.1 - Round Instructions
+//===----------------------------------------------------------------------===//
+
+multiclass sse41_fp_unop_rm<bits<8> opcps, bits<8> opcpd,
+ string OpcodeStr,
+ Intrinsic V4F32Int,
+ Intrinsic V2F64Int> {
+ // Intrinsic operation, reg.
+ // Vector intrinsic operation, reg
+ def PSr_Int : SS4AIi8<opcps, MRMSrcReg,
+ (outs VR128:$dst), (ins VR128:$src1, i32i8imm:$src2),
+ !strconcat(OpcodeStr,
+ "ps\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(set VR128:$dst, (V4F32Int VR128:$src1, imm:$src2))]>,
+ OpSize;
+
+ // Vector intrinsic operation, mem
+ def PSm_Int : Ii8<opcps, MRMSrcMem,
+ (outs VR128:$dst), (ins f128mem:$src1, i32i8imm:$src2),
+ !strconcat(OpcodeStr,
+ "ps\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(set VR128:$dst,
+ (V4F32Int (memopv4f32 addr:$src1),imm:$src2))]>,
+ TA, OpSize,
+ Requires<[HasSSE41]>;
+
+ // Vector intrinsic operation, reg
+ def PDr_Int : SS4AIi8<opcpd, MRMSrcReg,
+ (outs VR128:$dst), (ins VR128:$src1, i32i8imm:$src2),
+ !strconcat(OpcodeStr,
+ "pd\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(set VR128:$dst, (V2F64Int VR128:$src1, imm:$src2))]>,
+ OpSize;
+
+ // Vector intrinsic operation, mem
+ def PDm_Int : SS4AIi8<opcpd, MRMSrcMem,
+ (outs VR128:$dst), (ins f128mem:$src1, i32i8imm:$src2),
+ !strconcat(OpcodeStr,
+ "pd\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(set VR128:$dst,
+ (V2F64Int (memopv2f64 addr:$src1),imm:$src2))]>,
+ OpSize;
+}
+
+multiclass sse41_fp_unop_rm_avx<bits<8> opcps, bits<8> opcpd,
+ string OpcodeStr> {
+ // Intrinsic operation, reg.
+ // Vector intrinsic operation, reg
+ def PSr : SS4AIi8<opcps, MRMSrcReg,
+ (outs VR128:$dst), (ins VR128:$src1, i32i8imm:$src2),
+ !strconcat(OpcodeStr,
+ "ps\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ []>, OpSize;
+
+ // Vector intrinsic operation, mem
+ def PSm : Ii8<opcps, MRMSrcMem,
+ (outs VR128:$dst), (ins f128mem:$src1, i32i8imm:$src2),
+ !strconcat(OpcodeStr,
+ "ps\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ []>, TA, OpSize, Requires<[HasSSE41]>;
+
+ // Vector intrinsic operation, reg
+ def PDr : SS4AIi8<opcpd, MRMSrcReg,
+ (outs VR128:$dst), (ins VR128:$src1, i32i8imm:$src2),
+ !strconcat(OpcodeStr,
+ "pd\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ []>, OpSize;
+
+ // Vector intrinsic operation, mem
+ def PDm : SS4AIi8<opcpd, MRMSrcMem,
+ (outs VR128:$dst), (ins f128mem:$src1, i32i8imm:$src2),
+ !strconcat(OpcodeStr,
+ "pd\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ []>, OpSize;
+}
+
+multiclass sse41_fp_binop_rm<bits<8> opcss, bits<8> opcsd,
+ string OpcodeStr,
+ Intrinsic F32Int,
+ Intrinsic F64Int, bit Is2Addr = 1> {
+ // Intrinsic operation, reg.
+ def SSr_Int : SS4AIi8<opcss, MRMSrcReg,
+ (outs VR128:$dst), (ins VR128:$src1, VR128:$src2, i32i8imm:$src3),
+ !if(Is2Addr,
+ !strconcat(OpcodeStr,
+ "ss\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
+ !strconcat(OpcodeStr,
+ "ss\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
+ [(set VR128:$dst, (F32Int VR128:$src1, VR128:$src2, imm:$src3))]>,
+ OpSize;
+
+ // Intrinsic operation, mem.
+ def SSm_Int : SS4AIi8<opcss, MRMSrcMem,
+ (outs VR128:$dst), (ins VR128:$src1, ssmem:$src2, i32i8imm:$src3),
+ !if(Is2Addr,
+ !strconcat(OpcodeStr,
+ "ss\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
+ !strconcat(OpcodeStr,
+ "ss\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
+ [(set VR128:$dst,
+ (F32Int VR128:$src1, sse_load_f32:$src2, imm:$src3))]>,
+ OpSize;
+
+ // Intrinsic operation, reg.
+ def SDr_Int : SS4AIi8<opcsd, MRMSrcReg,
+ (outs VR128:$dst), (ins VR128:$src1, VR128:$src2, i32i8imm:$src3),
+ !if(Is2Addr,
+ !strconcat(OpcodeStr,
+ "sd\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
+ !strconcat(OpcodeStr,
+ "sd\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
+ [(set VR128:$dst, (F64Int VR128:$src1, VR128:$src2, imm:$src3))]>,
+ OpSize;
+
+ // Intrinsic operation, mem.
+ def SDm_Int : SS4AIi8<opcsd, MRMSrcMem,
+ (outs VR128:$dst), (ins VR128:$src1, sdmem:$src2, i32i8imm:$src3),
+ !if(Is2Addr,
+ !strconcat(OpcodeStr,
+ "sd\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
+ !strconcat(OpcodeStr,
+ "sd\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
+ [(set VR128:$dst,
+ (F64Int VR128:$src1, sse_load_f64:$src2, imm:$src3))]>,
+ OpSize;
+}
+
+multiclass sse41_fp_binop_rm_avx<bits<8> opcss, bits<8> opcsd,
+ string OpcodeStr> {
+ // Intrinsic operation, reg.
+ def SSr : SS4AIi8<opcss, MRMSrcReg,
+ (outs VR128:$dst), (ins VR128:$src1, VR128:$src2, i32i8imm:$src3),
+ !strconcat(OpcodeStr,
+ "ss\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
+ []>, OpSize;
+
+ // Intrinsic operation, mem.
+ def SSm : SS4AIi8<opcss, MRMSrcMem,
+ (outs VR128:$dst), (ins VR128:$src1, ssmem:$src2, i32i8imm:$src3),
+ !strconcat(OpcodeStr,
+ "ss\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
+ []>, OpSize;
+
+ // Intrinsic operation, reg.
+ def SDr : SS4AIi8<opcsd, MRMSrcReg,
+ (outs VR128:$dst), (ins VR128:$src1, VR128:$src2, i32i8imm:$src3),
+ !strconcat(OpcodeStr,
+ "sd\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
+ []>, OpSize;
+
+ // Intrinsic operation, mem.
+ def SDm : SS4AIi8<opcsd, MRMSrcMem,
+ (outs VR128:$dst), (ins VR128:$src1, sdmem:$src2, i32i8imm:$src3),
+ !strconcat(OpcodeStr,
+ "sd\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
+ []>, OpSize;
+}
+
+// FP round - roundss, roundps, roundsd, roundpd
+let isAsmParserOnly = 1, Predicates = [HasAVX] in {
+ // Intrinsic form
+ defm VROUND : sse41_fp_unop_rm<0x08, 0x09, "vround",
+ int_x86_sse41_round_ps, int_x86_sse41_round_pd>,
+ VEX;
+ defm VROUND : sse41_fp_binop_rm<0x0A, 0x0B, "vround",
+ int_x86_sse41_round_ss, int_x86_sse41_round_sd,
+ 0>, VEX_4V;
+ // Instructions for the assembler
+ defm VROUND : sse41_fp_unop_rm_avx<0x08, 0x09, "vround">, VEX;
+ defm VROUND : sse41_fp_binop_rm_avx<0x0A, 0x0B, "vround">, VEX_4V;
+}
+
+defm ROUND : sse41_fp_unop_rm<0x08, 0x09, "round",
+ int_x86_sse41_round_ps, int_x86_sse41_round_pd>;
+let Constraints = "$src1 = $dst" in
+defm ROUND : sse41_fp_binop_rm<0x0A, 0x0B, "round",
+ int_x86_sse41_round_ss, int_x86_sse41_round_sd>;
+
+//===----------------------------------------------------------------------===//
+// SSE4.1 - Misc Instructions
+//===----------------------------------------------------------------------===//
+
+// SS41I_unop_rm_int_v16 - SSE 4.1 unary operator whose type is v8i16.
+multiclass SS41I_unop_rm_int_v16<bits<8> opc, string OpcodeStr,
+ Intrinsic IntId128> {
+ def rr128 : SS48I<opc, MRMSrcReg, (outs VR128:$dst),
+ (ins VR128:$src),
+ !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+ [(set VR128:$dst, (IntId128 VR128:$src))]>, OpSize;
+ def rm128 : SS48I<opc, MRMSrcMem, (outs VR128:$dst),
+ (ins i128mem:$src),
+ !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+ [(set VR128:$dst,
+ (IntId128
+ (bitconvert (memopv8i16 addr:$src))))]>, OpSize;
+}
+
+let isAsmParserOnly = 1, Predicates = [HasAVX] in
+defm VPHMINPOSUW : SS41I_unop_rm_int_v16 <0x41, "vphminposuw",
+ int_x86_sse41_phminposuw>, VEX;
+defm PHMINPOSUW : SS41I_unop_rm_int_v16 <0x41, "phminposuw",
+ int_x86_sse41_phminposuw>;
+
+/// SS41I_binop_rm_int - Simple SSE 4.1 binary operator
+multiclass SS41I_binop_rm_int<bits<8> opc, string OpcodeStr,
+ Intrinsic IntId128, bit Is2Addr = 1> {
+ let isCommutable = 1 in
+ def rr : SS48I<opc, MRMSrcReg, (outs VR128:$dst),
+ (ins VR128:$src1, VR128:$src2),
+ !if(Is2Addr,
+ !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
+ [(set VR128:$dst, (IntId128 VR128:$src1, VR128:$src2))]>, OpSize;
+ def rm : SS48I<opc, MRMSrcMem, (outs VR128:$dst),
+ (ins VR128:$src1, i128mem:$src2),
+ !if(Is2Addr,
+ !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
+ [(set VR128:$dst,
+ (IntId128 VR128:$src1,
+ (bitconvert (memopv16i8 addr:$src2))))]>, OpSize;
+}
+
+let isAsmParserOnly = 1, Predicates = [HasAVX] in {
+ let isCommutable = 0 in
+ defm VPACKUSDW : SS41I_binop_rm_int<0x2B, "vpackusdw", int_x86_sse41_packusdw,
+ 0>, VEX_4V;
+ defm VPCMPEQQ : SS41I_binop_rm_int<0x29, "vpcmpeqq", int_x86_sse41_pcmpeqq,
+ 0>, VEX_4V;
+ defm VPMINSB : SS41I_binop_rm_int<0x38, "vpminsb", int_x86_sse41_pminsb,
+ 0>, VEX_4V;
+ defm VPMINSD : SS41I_binop_rm_int<0x39, "vpminsd", int_x86_sse41_pminsd,
+ 0>, VEX_4V;
+ defm VPMINUD : SS41I_binop_rm_int<0x3B, "vpminud", int_x86_sse41_pminud,
+ 0>, VEX_4V;
+ defm VPMINUW : SS41I_binop_rm_int<0x3A, "vpminuw", int_x86_sse41_pminuw,
+ 0>, VEX_4V;
+ defm VPMAXSB : SS41I_binop_rm_int<0x3C, "vpmaxsb", int_x86_sse41_pmaxsb,
+ 0>, VEX_4V;
+ defm VPMAXSD : SS41I_binop_rm_int<0x3D, "vpmaxsd", int_x86_sse41_pmaxsd,
+ 0>, VEX_4V;
+ defm VPMAXUD : SS41I_binop_rm_int<0x3F, "vpmaxud", int_x86_sse41_pmaxud,
+ 0>, VEX_4V;
+ defm VPMAXUW : SS41I_binop_rm_int<0x3E, "vpmaxuw", int_x86_sse41_pmaxuw,
+ 0>, VEX_4V;
+ defm VPMULDQ : SS41I_binop_rm_int<0x28, "vpmuldq", int_x86_sse41_pmuldq,
+ 0>, VEX_4V;
+}
+
+let Constraints = "$src1 = $dst" in {
+ let isCommutable = 0 in
+ defm PACKUSDW : SS41I_binop_rm_int<0x2B, "packusdw", int_x86_sse41_packusdw>;
+ defm PCMPEQQ : SS41I_binop_rm_int<0x29, "pcmpeqq", int_x86_sse41_pcmpeqq>;
+ defm PMINSB : SS41I_binop_rm_int<0x38, "pminsb", int_x86_sse41_pminsb>;
+ defm PMINSD : SS41I_binop_rm_int<0x39, "pminsd", int_x86_sse41_pminsd>;
+ defm PMINUD : SS41I_binop_rm_int<0x3B, "pminud", int_x86_sse41_pminud>;
+ defm PMINUW : SS41I_binop_rm_int<0x3A, "pminuw", int_x86_sse41_pminuw>;
+ defm PMAXSB : SS41I_binop_rm_int<0x3C, "pmaxsb", int_x86_sse41_pmaxsb>;
+ defm PMAXSD : SS41I_binop_rm_int<0x3D, "pmaxsd", int_x86_sse41_pmaxsd>;
+ defm PMAXUD : SS41I_binop_rm_int<0x3F, "pmaxud", int_x86_sse41_pmaxud>;
+ defm PMAXUW : SS41I_binop_rm_int<0x3E, "pmaxuw", int_x86_sse41_pmaxuw>;
+ defm PMULDQ : SS41I_binop_rm_int<0x28, "pmuldq", int_x86_sse41_pmuldq>;
+}
+
+def : Pat<(v2i64 (X86pcmpeqq VR128:$src1, VR128:$src2)),
+ (PCMPEQQrr VR128:$src1, VR128:$src2)>;
+def : Pat<(v2i64 (X86pcmpeqq VR128:$src1, (memop addr:$src2))),
+ (PCMPEQQrm VR128:$src1, addr:$src2)>;
+
+/// SS48I_binop_rm - Simple SSE41 binary operator.
+multiclass SS48I_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
+ ValueType OpVT, bit Is2Addr = 1> {
+ let isCommutable = 1 in
+ def rr : SS48I<opc, MRMSrcReg, (outs VR128:$dst),
+ (ins VR128:$src1, VR128:$src2),
+ !if(Is2Addr,
+ !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
+ [(set VR128:$dst, (OpVT (OpNode VR128:$src1, VR128:$src2)))]>,
+ OpSize;
+ def rm : SS48I<opc, MRMSrcMem, (outs VR128:$dst),
+ (ins VR128:$src1, i128mem:$src2),
+ !if(Is2Addr,
+ !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
+ [(set VR128:$dst, (OpNode VR128:$src1,
+ (bc_v4i32 (memopv2i64 addr:$src2))))]>,
+ OpSize;
+}
+
+let isAsmParserOnly = 1, Predicates = [HasAVX] in
+ defm VPMULLD : SS48I_binop_rm<0x40, "vpmulld", mul, v4i32, 0>, VEX_4V;
+let Constraints = "$src1 = $dst" in
+ defm PMULLD : SS48I_binop_rm<0x40, "pmulld", mul, v4i32>;
+
+/// SS41I_binop_rmi_int - SSE 4.1 binary operator with 8-bit immediate
+multiclass SS41I_binop_rmi_int<bits<8> opc, string OpcodeStr,
+ Intrinsic IntId128, bit Is2Addr = 1> {
+ let isCommutable = 1 in
+ def rri : SS4AIi8<opc, MRMSrcReg, (outs VR128:$dst),
+ (ins VR128:$src1, VR128:$src2, i32i8imm:$src3),
+ !if(Is2Addr,
+ !strconcat(OpcodeStr,
+ "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
+ !strconcat(OpcodeStr,
+ "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
+ [(set VR128:$dst,
+ (IntId128 VR128:$src1, VR128:$src2, imm:$src3))]>,
+ OpSize;
+ def rmi : SS4AIi8<opc, MRMSrcMem, (outs VR128:$dst),
+ (ins VR128:$src1, i128mem:$src2, i32i8imm:$src3),
+ !if(Is2Addr,
+ !strconcat(OpcodeStr,
+ "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
+ !strconcat(OpcodeStr,
+ "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
+ [(set VR128:$dst,
+ (IntId128 VR128:$src1,
+ (bitconvert (memopv16i8 addr:$src2)), imm:$src3))]>,
+ OpSize;
+}
+
+let isAsmParserOnly = 1, Predicates = [HasAVX] in {
+ let isCommutable = 0 in {
+ defm VBLENDPS : SS41I_binop_rmi_int<0x0C, "vblendps", int_x86_sse41_blendps,
+ 0>, VEX_4V;
+ defm VBLENDPD : SS41I_binop_rmi_int<0x0D, "vblendpd", int_x86_sse41_blendpd,
+ 0>, VEX_4V;
+ defm VPBLENDW : SS41I_binop_rmi_int<0x0E, "vpblendw", int_x86_sse41_pblendw,
+ 0>, VEX_4V;
+ defm VMPSADBW : SS41I_binop_rmi_int<0x42, "vmpsadbw", int_x86_sse41_mpsadbw,
+ 0>, VEX_4V;
+ }
+ defm VDPPS : SS41I_binop_rmi_int<0x40, "vdpps", int_x86_sse41_dpps,
+ 0>, VEX_4V;
+ defm VDPPD : SS41I_binop_rmi_int<0x41, "vdppd", int_x86_sse41_dppd,
+ 0>, VEX_4V;
+}
+
+let Constraints = "$src1 = $dst" in {
+ let isCommutable = 0 in {
+ defm BLENDPS : SS41I_binop_rmi_int<0x0C, "blendps", int_x86_sse41_blendps>;
+ defm BLENDPD : SS41I_binop_rmi_int<0x0D, "blendpd", int_x86_sse41_blendpd>;
+ defm PBLENDW : SS41I_binop_rmi_int<0x0E, "pblendw", int_x86_sse41_pblendw>;
+ defm MPSADBW : SS41I_binop_rmi_int<0x42, "mpsadbw", int_x86_sse41_mpsadbw>;
+ }
+ defm DPPS : SS41I_binop_rmi_int<0x40, "dpps", int_x86_sse41_dpps>;
+ defm DPPD : SS41I_binop_rmi_int<0x41, "dppd", int_x86_sse41_dppd>;
+}
+
+/// SS41I_quaternary_int_avx - AVX SSE 4.1 with 4 operators
+let isAsmParserOnly = 1, Predicates = [HasAVX] in {
+ multiclass SS41I_quaternary_int_avx<bits<8> opc, string OpcodeStr> {
+ def rr : I<opc, MRMSrcReg, (outs VR128:$dst),
+ (ins VR128:$src1, VR128:$src2, VR128:$src3),
+ !strconcat(OpcodeStr,
+ "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
+ [], SSEPackedInt>, OpSize, TA, VEX_4V, VEX_I8IMM;
+
+ def rm : I<opc, MRMSrcMem, (outs VR128:$dst),
+ (ins VR128:$src1, i128mem:$src2, VR128:$src3),
+ !strconcat(OpcodeStr,
+ "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
+ [], SSEPackedInt>, OpSize, TA, VEX_4V, VEX_I8IMM;
+ }
+}
+
+defm VBLENDVPD : SS41I_quaternary_int_avx<0x4B, "vblendvpd">;
+defm VBLENDVPS : SS41I_quaternary_int_avx<0x4A, "vblendvps">;
+defm VPBLENDVB : SS41I_quaternary_int_avx<0x4C, "vpblendvb">;
+
+/// SS41I_ternary_int - SSE 4.1 ternary operator
+let Uses = [XMM0], Constraints = "$src1 = $dst" in {
+ multiclass SS41I_ternary_int<bits<8> opc, string OpcodeStr, Intrinsic IntId> {
+ def rr0 : SS48I<opc, MRMSrcReg, (outs VR128:$dst),
+ (ins VR128:$src1, VR128:$src2),
+ !strconcat(OpcodeStr,
+ "\t{%xmm0, $src2, $dst|$dst, $src2, %xmm0}"),
+ [(set VR128:$dst, (IntId VR128:$src1, VR128:$src2, XMM0))]>,
+ OpSize;
+
+ def rm0 : SS48I<opc, MRMSrcMem, (outs VR128:$dst),
+ (ins VR128:$src1, i128mem:$src2),
+ !strconcat(OpcodeStr,
+ "\t{%xmm0, $src2, $dst|$dst, $src2, %xmm0}"),
+ [(set VR128:$dst,
+ (IntId VR128:$src1,
+ (bitconvert (memopv16i8 addr:$src2)), XMM0))]>, OpSize;
+ }
+}
+
+defm BLENDVPD : SS41I_ternary_int<0x15, "blendvpd", int_x86_sse41_blendvpd>;
+defm BLENDVPS : SS41I_ternary_int<0x14, "blendvps", int_x86_sse41_blendvps>;
+defm PBLENDVB : SS41I_ternary_int<0x10, "pblendvb", int_x86_sse41_pblendvb>;
+
// ptest instruction we'll lower to this in X86ISelLowering primarily from
// the intel intrinsic that corresponds to this.
+let Defs = [EFLAGS], isAsmParserOnly = 1, Predicates = [HasAVX] in {
+def VPTESTrr : SS48I<0x17, MRMSrcReg, (outs), (ins VR128:$src1, VR128:$src2),
+ "vptest\t{$src2, $src1|$src1, $src2}",
+ [(set EFLAGS, (X86ptest VR128:$src1, VR128:$src2))]>,
+ OpSize, VEX;
+def VPTESTrm : SS48I<0x17, MRMSrcMem, (outs), (ins VR128:$src1, i128mem:$src2),
+ "vptest\t{$src2, $src1|$src1, $src2}",
+ [(set EFLAGS, (X86ptest VR128:$src1, (load addr:$src2)))]>,
+ OpSize, VEX;
+}
+
let Defs = [EFLAGS] in {
def PTESTrr : SS48I<0x17, MRMSrcReg, (outs), (ins VR128:$src1, VR128:$src2),
"ptest \t{$src2, $src1|$src1, $src2}",
@@ -3865,43 +4465,207 @@ def PTESTrm : SS48I<0x17, MRMSrcMem, (outs), (ins VR128:$src1, i128mem:$src2),
OpSize;
}
+let isAsmParserOnly = 1, Predicates = [HasAVX] in
+def VMOVNTDQArm : SS48I<0x2A, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
+ "vmovntdqa\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (int_x86_sse41_movntdqa addr:$src))]>,
+ OpSize, VEX;
def MOVNTDQArm : SS48I<0x2A, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
"movntdqa\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (int_x86_sse41_movntdqa addr:$src))]>,
OpSize;
-
//===----------------------------------------------------------------------===//
-// SSE4.2 Instructions
+// SSE4.2 - Compare Instructions
//===----------------------------------------------------------------------===//
/// SS42I_binop_rm_int - Simple SSE 4.2 binary operator
-let Constraints = "$src1 = $dst" in {
- multiclass SS42I_binop_rm_int<bits<8> opc, string OpcodeStr,
- Intrinsic IntId128, bit Commutable = 0> {
- def rr : SS428I<opc, MRMSrcReg, (outs VR128:$dst),
- (ins VR128:$src1, VR128:$src2),
- !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
- [(set VR128:$dst, (IntId128 VR128:$src1, VR128:$src2))]>,
- OpSize {
- let isCommutable = Commutable;
- }
- def rm : SS428I<opc, MRMSrcMem, (outs VR128:$dst),
- (ins VR128:$src1, i128mem:$src2),
- !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
- [(set VR128:$dst,
- (IntId128 VR128:$src1,
- (bitconvert (memopv16i8 addr:$src2))))]>, OpSize;
- }
+multiclass SS42I_binop_rm_int<bits<8> opc, string OpcodeStr,
+ Intrinsic IntId128, bit Is2Addr = 1> {
+ def rr : SS428I<opc, MRMSrcReg, (outs VR128:$dst),
+ (ins VR128:$src1, VR128:$src2),
+ !if(Is2Addr,
+ !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
+ [(set VR128:$dst, (IntId128 VR128:$src1, VR128:$src2))]>,
+ OpSize;
+ def rm : SS428I<opc, MRMSrcMem, (outs VR128:$dst),
+ (ins VR128:$src1, i128mem:$src2),
+ !if(Is2Addr,
+ !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
+ [(set VR128:$dst,
+ (IntId128 VR128:$src1,
+ (bitconvert (memopv16i8 addr:$src2))))]>, OpSize;
}
-defm PCMPGTQ : SS42I_binop_rm_int<0x37, "pcmpgtq", int_x86_sse42_pcmpgtq>;
+let isAsmParserOnly = 1, Predicates = [HasAVX] in
+ defm VPCMPGTQ : SS42I_binop_rm_int<0x37, "vpcmpgtq", int_x86_sse42_pcmpgtq,
+ 0>, VEX_4V;
+let Constraints = "$src1 = $dst" in
+ defm PCMPGTQ : SS42I_binop_rm_int<0x37, "pcmpgtq", int_x86_sse42_pcmpgtq>;
def : Pat<(v2i64 (X86pcmpgtq VR128:$src1, VR128:$src2)),
(PCMPGTQrr VR128:$src1, VR128:$src2)>;
def : Pat<(v2i64 (X86pcmpgtq VR128:$src1, (memop addr:$src2))),
(PCMPGTQrm VR128:$src1, addr:$src2)>;
+//===----------------------------------------------------------------------===//
+// SSE4.2 - String/text Processing Instructions
+//===----------------------------------------------------------------------===//
+
+// Packed Compare Implicit Length Strings, Return Mask
+let Defs = [EFLAGS], usesCustomInserter = 1 in {
+ def PCMPISTRM128REG : SS42AI<0, Pseudo, (outs VR128:$dst),
+ (ins VR128:$src1, VR128:$src2, i8imm:$src3),
+ "#PCMPISTRM128rr PSEUDO!",
+ [(set VR128:$dst, (int_x86_sse42_pcmpistrm128 VR128:$src1, VR128:$src2,
+ imm:$src3))]>, OpSize;
+ def PCMPISTRM128MEM : SS42AI<0, Pseudo, (outs VR128:$dst),
+ (ins VR128:$src1, i128mem:$src2, i8imm:$src3),
+ "#PCMPISTRM128rm PSEUDO!",
+ [(set VR128:$dst, (int_x86_sse42_pcmpistrm128
+ VR128:$src1, (load addr:$src2), imm:$src3))]>, OpSize;
+}
+
+let Defs = [XMM0, EFLAGS], isAsmParserOnly = 1,
+ Predicates = [HasAVX] in {
+ def VPCMPISTRM128rr : SS42AI<0x62, MRMSrcReg, (outs),
+ (ins VR128:$src1, VR128:$src2, i8imm:$src3),
+ "vpcmpistrm\t{$src3, $src2, $src1|$src1, $src2, $src3}", []>, OpSize, VEX;
+ def VPCMPISTRM128rm : SS42AI<0x62, MRMSrcMem, (outs),
+ (ins VR128:$src1, i128mem:$src2, i8imm:$src3),
+ "vpcmpistrm\t{$src3, $src2, $src1|$src1, $src2, $src3}", []>, OpSize, VEX;
+}
+
+let Defs = [XMM0, EFLAGS] in {
+ def PCMPISTRM128rr : SS42AI<0x62, MRMSrcReg, (outs),
+ (ins VR128:$src1, VR128:$src2, i8imm:$src3),
+ "pcmpistrm\t{$src3, $src2, $src1|$src1, $src2, $src3}", []>, OpSize;
+ def PCMPISTRM128rm : SS42AI<0x62, MRMSrcMem, (outs),
+ (ins VR128:$src1, i128mem:$src2, i8imm:$src3),
+ "pcmpistrm\t{$src3, $src2, $src1|$src1, $src2, $src3}", []>, OpSize;
+}
+
+// Packed Compare Explicit Length Strings, Return Mask
+let Defs = [EFLAGS], Uses = [EAX, EDX], usesCustomInserter = 1 in {
+ def PCMPESTRM128REG : SS42AI<0, Pseudo, (outs VR128:$dst),
+ (ins VR128:$src1, VR128:$src3, i8imm:$src5),
+ "#PCMPESTRM128rr PSEUDO!",
+ [(set VR128:$dst,
+ (int_x86_sse42_pcmpestrm128
+ VR128:$src1, EAX, VR128:$src3, EDX, imm:$src5))]>, OpSize;
+
+ def PCMPESTRM128MEM : SS42AI<0, Pseudo, (outs VR128:$dst),
+ (ins VR128:$src1, i128mem:$src3, i8imm:$src5),
+ "#PCMPESTRM128rm PSEUDO!",
+ [(set VR128:$dst, (int_x86_sse42_pcmpestrm128
+ VR128:$src1, EAX, (load addr:$src3), EDX, imm:$src5))]>,
+ OpSize;
+}
+
+let isAsmParserOnly = 1, Predicates = [HasAVX],
+ Defs = [XMM0, EFLAGS], Uses = [EAX, EDX] in {
+ def VPCMPESTRM128rr : SS42AI<0x60, MRMSrcReg, (outs),
+ (ins VR128:$src1, VR128:$src3, i8imm:$src5),
+ "vpcmpestrm\t{$src5, $src3, $src1|$src1, $src3, $src5}", []>, OpSize, VEX;
+ def VPCMPESTRM128rm : SS42AI<0x60, MRMSrcMem, (outs),
+ (ins VR128:$src1, i128mem:$src3, i8imm:$src5),
+ "vpcmpestrm\t{$src5, $src3, $src1|$src1, $src3, $src5}", []>, OpSize, VEX;
+}
+
+let Defs = [XMM0, EFLAGS], Uses = [EAX, EDX] in {
+ def PCMPESTRM128rr : SS42AI<0x60, MRMSrcReg, (outs),
+ (ins VR128:$src1, VR128:$src3, i8imm:$src5),
+ "pcmpestrm\t{$src5, $src3, $src1|$src1, $src3, $src5}", []>, OpSize;
+ def PCMPESTRM128rm : SS42AI<0x60, MRMSrcMem, (outs),
+ (ins VR128:$src1, i128mem:$src3, i8imm:$src5),
+ "pcmpestrm\t{$src5, $src3, $src1|$src1, $src3, $src5}", []>, OpSize;
+}
+
+// Packed Compare Implicit Length Strings, Return Index
+let Defs = [ECX, EFLAGS] in {
+ multiclass SS42AI_pcmpistri<Intrinsic IntId128, string asm = "pcmpistri"> {
+ def rr : SS42AI<0x63, MRMSrcReg, (outs),
+ (ins VR128:$src1, VR128:$src2, i8imm:$src3),
+ !strconcat(asm, "\t{$src3, $src2, $src1|$src1, $src2, $src3}"),
+ [(set ECX, (IntId128 VR128:$src1, VR128:$src2, imm:$src3)),
+ (implicit EFLAGS)]>, OpSize;
+ def rm : SS42AI<0x63, MRMSrcMem, (outs),
+ (ins VR128:$src1, i128mem:$src2, i8imm:$src3),
+ !strconcat(asm, "\t{$src3, $src2, $src1|$src1, $src2, $src3}"),
+ [(set ECX, (IntId128 VR128:$src1, (load addr:$src2), imm:$src3)),
+ (implicit EFLAGS)]>, OpSize;
+ }
+}
+
+let isAsmParserOnly = 1, Predicates = [HasAVX] in {
+defm VPCMPISTRI : SS42AI_pcmpistri<int_x86_sse42_pcmpistri128, "vpcmpistri">,
+ VEX;
+defm VPCMPISTRIA : SS42AI_pcmpistri<int_x86_sse42_pcmpistria128, "vpcmpistri">,
+ VEX;
+defm VPCMPISTRIC : SS42AI_pcmpistri<int_x86_sse42_pcmpistric128, "vpcmpistri">,
+ VEX;
+defm VPCMPISTRIO : SS42AI_pcmpistri<int_x86_sse42_pcmpistrio128, "vpcmpistri">,
+ VEX;
+defm VPCMPISTRIS : SS42AI_pcmpistri<int_x86_sse42_pcmpistris128, "vpcmpistri">,
+ VEX;
+defm VPCMPISTRIZ : SS42AI_pcmpistri<int_x86_sse42_pcmpistriz128, "vpcmpistri">,
+ VEX;
+}
+
+defm PCMPISTRI : SS42AI_pcmpistri<int_x86_sse42_pcmpistri128>;
+defm PCMPISTRIA : SS42AI_pcmpistri<int_x86_sse42_pcmpistria128>;
+defm PCMPISTRIC : SS42AI_pcmpistri<int_x86_sse42_pcmpistric128>;
+defm PCMPISTRIO : SS42AI_pcmpistri<int_x86_sse42_pcmpistrio128>;
+defm PCMPISTRIS : SS42AI_pcmpistri<int_x86_sse42_pcmpistris128>;
+defm PCMPISTRIZ : SS42AI_pcmpistri<int_x86_sse42_pcmpistriz128>;
+
+// Packed Compare Explicit Length Strings, Return Index
+let Defs = [ECX, EFLAGS], Uses = [EAX, EDX] in {
+ multiclass SS42AI_pcmpestri<Intrinsic IntId128, string asm = "pcmpestri"> {
+ def rr : SS42AI<0x61, MRMSrcReg, (outs),
+ (ins VR128:$src1, VR128:$src3, i8imm:$src5),
+ !strconcat(asm, "\t{$src5, $src3, $src1|$src1, $src3, $src5}"),
+ [(set ECX, (IntId128 VR128:$src1, EAX, VR128:$src3, EDX, imm:$src5)),
+ (implicit EFLAGS)]>, OpSize;
+ def rm : SS42AI<0x61, MRMSrcMem, (outs),
+ (ins VR128:$src1, i128mem:$src3, i8imm:$src5),
+ !strconcat(asm, "\t{$src5, $src3, $src1|$src1, $src3, $src5}"),
+ [(set ECX,
+ (IntId128 VR128:$src1, EAX, (load addr:$src3), EDX, imm:$src5)),
+ (implicit EFLAGS)]>, OpSize;
+ }
+}
+
+let isAsmParserOnly = 1, Predicates = [HasAVX] in {
+defm VPCMPESTRI : SS42AI_pcmpestri<int_x86_sse42_pcmpestri128, "vpcmpestri">,
+ VEX;
+defm VPCMPESTRIA : SS42AI_pcmpestri<int_x86_sse42_pcmpestria128, "vpcmpestri">,
+ VEX;
+defm VPCMPESTRIC : SS42AI_pcmpestri<int_x86_sse42_pcmpestric128, "vpcmpestri">,
+ VEX;
+defm VPCMPESTRIO : SS42AI_pcmpestri<int_x86_sse42_pcmpestrio128, "vpcmpestri">,
+ VEX;
+defm VPCMPESTRIS : SS42AI_pcmpestri<int_x86_sse42_pcmpestris128, "vpcmpestri">,
+ VEX;
+defm VPCMPESTRIZ : SS42AI_pcmpestri<int_x86_sse42_pcmpestriz128, "vpcmpestri">,
+ VEX;
+}
+
+defm PCMPESTRI : SS42AI_pcmpestri<int_x86_sse42_pcmpestri128>;
+defm PCMPESTRIA : SS42AI_pcmpestri<int_x86_sse42_pcmpestria128>;
+defm PCMPESTRIC : SS42AI_pcmpestri<int_x86_sse42_pcmpestric128>;
+defm PCMPESTRIO : SS42AI_pcmpestri<int_x86_sse42_pcmpestrio128>;
+defm PCMPESTRIS : SS42AI_pcmpestri<int_x86_sse42_pcmpestris128>;
+defm PCMPESTRIZ : SS42AI_pcmpestri<int_x86_sse42_pcmpestriz128>;
+
+//===----------------------------------------------------------------------===//
+// SSE4.2 - CRC Instructions
+//===----------------------------------------------------------------------===//
+
+// No CRC instructions have AVX equivalents
+
// crc intrinsic instruction
// This set of instructions are only rm, the only difference is the size
// of r and m.
@@ -3969,133 +4733,52 @@ let Constraints = "$src1 = $dst" in {
REX_W;
}
-// String/text processing instructions.
-let Defs = [EFLAGS], usesCustomInserter = 1 in {
-def PCMPISTRM128REG : SS42AI<0, Pseudo, (outs VR128:$dst),
- (ins VR128:$src1, VR128:$src2, i8imm:$src3),
- "#PCMPISTRM128rr PSEUDO!",
- [(set VR128:$dst, (int_x86_sse42_pcmpistrm128 VR128:$src1, VR128:$src2,
- imm:$src3))]>, OpSize;
-def PCMPISTRM128MEM : SS42AI<0, Pseudo, (outs VR128:$dst),
- (ins VR128:$src1, i128mem:$src2, i8imm:$src3),
- "#PCMPISTRM128rm PSEUDO!",
- [(set VR128:$dst, (int_x86_sse42_pcmpistrm128 VR128:$src1, (load addr:$src2),
- imm:$src3))]>, OpSize;
-}
-
-let Defs = [XMM0, EFLAGS] in {
-def PCMPISTRM128rr : SS42AI<0x62, MRMSrcReg, (outs),
- (ins VR128:$src1, VR128:$src2, i8imm:$src3),
- "pcmpistrm\t{$src3, $src2, $src1|$src1, $src2, $src3}", []>, OpSize;
-def PCMPISTRM128rm : SS42AI<0x62, MRMSrcMem, (outs),
- (ins VR128:$src1, i128mem:$src2, i8imm:$src3),
- "pcmpistrm\t{$src3, $src2, $src1|$src1, $src2, $src3}", []>, OpSize;
-}
+//===----------------------------------------------------------------------===//
+// AES-NI Instructions
+//===----------------------------------------------------------------------===//
-let Defs = [EFLAGS], Uses = [EAX, EDX], usesCustomInserter = 1 in {
-def PCMPESTRM128REG : SS42AI<0, Pseudo, (outs VR128:$dst),
- (ins VR128:$src1, VR128:$src3, i8imm:$src5),
- "#PCMPESTRM128rr PSEUDO!",
- [(set VR128:$dst,
- (int_x86_sse42_pcmpestrm128
- VR128:$src1, EAX, VR128:$src3, EDX, imm:$src5))]>, OpSize;
-
-def PCMPESTRM128MEM : SS42AI<0, Pseudo, (outs VR128:$dst),
- (ins VR128:$src1, i128mem:$src3, i8imm:$src5),
- "#PCMPESTRM128rm PSEUDO!",
- [(set VR128:$dst, (int_x86_sse42_pcmpestrm128
- VR128:$src1, EAX, (load addr:$src3), EDX, imm:$src5))]>,
- OpSize;
+multiclass AESI_binop_rm_int<bits<8> opc, string OpcodeStr,
+ Intrinsic IntId128, bit Is2Addr = 1> {
+ def rr : AES8I<opc, MRMSrcReg, (outs VR128:$dst),
+ (ins VR128:$src1, VR128:$src2),
+ !if(Is2Addr,
+ !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
+ [(set VR128:$dst, (IntId128 VR128:$src1, VR128:$src2))]>,
+ OpSize;
+ def rm : AES8I<opc, MRMSrcMem, (outs VR128:$dst),
+ (ins VR128:$src1, i128mem:$src2),
+ !if(Is2Addr,
+ !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
+ [(set VR128:$dst,
+ (IntId128 VR128:$src1,
+ (bitconvert (memopv16i8 addr:$src2))))]>, OpSize;
}
-let Defs = [XMM0, EFLAGS], Uses = [EAX, EDX] in {
-def PCMPESTRM128rr : SS42AI<0x60, MRMSrcReg, (outs),
- (ins VR128:$src1, VR128:$src3, i8imm:$src5),
- "pcmpestrm\t{$src5, $src3, $src1|$src1, $src3, $src5}", []>, OpSize;
-def PCMPESTRM128rm : SS42AI<0x60, MRMSrcMem, (outs),
- (ins VR128:$src1, i128mem:$src3, i8imm:$src5),
- "pcmpestrm\t{$src5, $src3, $src1|$src1, $src3, $src5}", []>, OpSize;
+// Perform One Round of an AES Encryption/Decryption Flow
+let isAsmParserOnly = 1, Predicates = [HasAVX, HasAES] in {
+ defm VAESENC : AESI_binop_rm_int<0xDC, "vaesenc",
+ int_x86_aesni_aesenc, 0>, VEX_4V;
+ defm VAESENCLAST : AESI_binop_rm_int<0xDD, "vaesenclast",
+ int_x86_aesni_aesenclast, 0>, VEX_4V;
+ defm VAESDEC : AESI_binop_rm_int<0xDE, "vaesdec",
+ int_x86_aesni_aesdec, 0>, VEX_4V;
+ defm VAESDECLAST : AESI_binop_rm_int<0xDF, "vaesdeclast",
+ int_x86_aesni_aesdeclast, 0>, VEX_4V;
}
-let Defs = [ECX, EFLAGS] in {
- multiclass SS42AI_pcmpistri<Intrinsic IntId128> {
- def rr : SS42AI<0x63, MRMSrcReg, (outs),
- (ins VR128:$src1, VR128:$src2, i8imm:$src3),
- "pcmpistri\t{$src3, $src2, $src1|$src1, $src2, $src3}",
- [(set ECX, (IntId128 VR128:$src1, VR128:$src2, imm:$src3)),
- (implicit EFLAGS)]>, OpSize;
- def rm : SS42AI<0x63, MRMSrcMem, (outs),
- (ins VR128:$src1, i128mem:$src2, i8imm:$src3),
- "pcmpistri\t{$src3, $src2, $src1|$src1, $src2, $src3}",
- [(set ECX, (IntId128 VR128:$src1, (load addr:$src2), imm:$src3)),
- (implicit EFLAGS)]>, OpSize;
- }
-}
-
-defm PCMPISTRI : SS42AI_pcmpistri<int_x86_sse42_pcmpistri128>;
-defm PCMPISTRIA : SS42AI_pcmpistri<int_x86_sse42_pcmpistria128>;
-defm PCMPISTRIC : SS42AI_pcmpistri<int_x86_sse42_pcmpistric128>;
-defm PCMPISTRIO : SS42AI_pcmpistri<int_x86_sse42_pcmpistrio128>;
-defm PCMPISTRIS : SS42AI_pcmpistri<int_x86_sse42_pcmpistris128>;
-defm PCMPISTRIZ : SS42AI_pcmpistri<int_x86_sse42_pcmpistriz128>;
-
-let Defs = [ECX, EFLAGS] in {
-let Uses = [EAX, EDX] in {
- multiclass SS42AI_pcmpestri<Intrinsic IntId128> {
- def rr : SS42AI<0x61, MRMSrcReg, (outs),
- (ins VR128:$src1, VR128:$src3, i8imm:$src5),
- "pcmpestri\t{$src5, $src3, $src1|$src1, $src3, $src5}",
- [(set ECX, (IntId128 VR128:$src1, EAX, VR128:$src3, EDX, imm:$src5)),
- (implicit EFLAGS)]>, OpSize;
- def rm : SS42AI<0x61, MRMSrcMem, (outs),
- (ins VR128:$src1, i128mem:$src3, i8imm:$src5),
- "pcmpestri\t{$src5, $src3, $src1|$src1, $src3, $src5}",
- [(set ECX,
- (IntId128 VR128:$src1, EAX, (load addr:$src3), EDX, imm:$src5)),
- (implicit EFLAGS)]>, OpSize;
- }
-}
-}
-
-defm PCMPESTRI : SS42AI_pcmpestri<int_x86_sse42_pcmpestri128>;
-defm PCMPESTRIA : SS42AI_pcmpestri<int_x86_sse42_pcmpestria128>;
-defm PCMPESTRIC : SS42AI_pcmpestri<int_x86_sse42_pcmpestric128>;
-defm PCMPESTRIO : SS42AI_pcmpestri<int_x86_sse42_pcmpestrio128>;
-defm PCMPESTRIS : SS42AI_pcmpestri<int_x86_sse42_pcmpestris128>;
-defm PCMPESTRIZ : SS42AI_pcmpestri<int_x86_sse42_pcmpestriz128>;
-
-//===----------------------------------------------------------------------===//
-// AES-NI Instructions
-//===----------------------------------------------------------------------===//
-
let Constraints = "$src1 = $dst" in {
- multiclass AESI_binop_rm_int<bits<8> opc, string OpcodeStr,
- Intrinsic IntId128, bit Commutable = 0> {
- def rr : AES8I<opc, MRMSrcReg, (outs VR128:$dst),
- (ins VR128:$src1, VR128:$src2),
- !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
- [(set VR128:$dst, (IntId128 VR128:$src1, VR128:$src2))]>,
- OpSize {
- let isCommutable = Commutable;
- }
- def rm : AES8I<opc, MRMSrcMem, (outs VR128:$dst),
- (ins VR128:$src1, i128mem:$src2),
- !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
- [(set VR128:$dst,
- (IntId128 VR128:$src1,
- (bitconvert (memopv16i8 addr:$src2))))]>, OpSize;
- }
+ defm AESENC : AESI_binop_rm_int<0xDC, "aesenc",
+ int_x86_aesni_aesenc>;
+ defm AESENCLAST : AESI_binop_rm_int<0xDD, "aesenclast",
+ int_x86_aesni_aesenclast>;
+ defm AESDEC : AESI_binop_rm_int<0xDE, "aesdec",
+ int_x86_aesni_aesdec>;
+ defm AESDECLAST : AESI_binop_rm_int<0xDF, "aesdeclast",
+ int_x86_aesni_aesdeclast>;
}
-defm AESENC : AESI_binop_rm_int<0xDC, "aesenc",
- int_x86_aesni_aesenc>;
-defm AESENCLAST : AESI_binop_rm_int<0xDD, "aesenclast",
- int_x86_aesni_aesenclast>;
-defm AESDEC : AESI_binop_rm_int<0xDE, "aesdec",
- int_x86_aesni_aesdec>;
-defm AESDECLAST : AESI_binop_rm_int<0xDF, "aesdeclast",
- int_x86_aesni_aesdeclast>;
-
def : Pat<(v2i64 (int_x86_aesni_aesenc VR128:$src1, VR128:$src2)),
(AESENCrr VR128:$src1, VR128:$src2)>;
def : Pat<(v2i64 (int_x86_aesni_aesenc VR128:$src1, (memop addr:$src2))),
@@ -4113,13 +4796,27 @@ def : Pat<(v2i64 (int_x86_aesni_aesdeclast VR128:$src1, VR128:$src2)),
def : Pat<(v2i64 (int_x86_aesni_aesdeclast VR128:$src1, (memop addr:$src2))),
(AESDECLASTrm VR128:$src1, addr:$src2)>;
+// Perform the AES InvMixColumn Transformation
+let isAsmParserOnly = 1, Predicates = [HasAVX, HasAES] in {
+ def VAESIMCrr : AES8I<0xDB, MRMSrcReg, (outs VR128:$dst),
+ (ins VR128:$src1),
+ "vaesimc\t{$src1, $dst|$dst, $src1}",
+ [(set VR128:$dst,
+ (int_x86_aesni_aesimc VR128:$src1))]>,
+ OpSize, VEX;
+ def VAESIMCrm : AES8I<0xDB, MRMSrcMem, (outs VR128:$dst),
+ (ins i128mem:$src1),
+ "vaesimc\t{$src1, $dst|$dst, $src1}",
+ [(set VR128:$dst,
+ (int_x86_aesni_aesimc (bitconvert (memopv2i64 addr:$src1))))]>,
+ OpSize, VEX;
+}
def AESIMCrr : AES8I<0xDB, MRMSrcReg, (outs VR128:$dst),
(ins VR128:$src1),
"aesimc\t{$src1, $dst|$dst, $src1}",
[(set VR128:$dst,
(int_x86_aesni_aesimc VR128:$src1))]>,
OpSize;
-
def AESIMCrm : AES8I<0xDB, MRMSrcMem, (outs VR128:$dst),
(ins i128mem:$src1),
"aesimc\t{$src1, $dst|$dst, $src1}",
@@ -4127,6 +4824,22 @@ def AESIMCrm : AES8I<0xDB, MRMSrcMem, (outs VR128:$dst),
(int_x86_aesni_aesimc (bitconvert (memopv2i64 addr:$src1))))]>,
OpSize;
+// AES Round Key Generation Assist
+let isAsmParserOnly = 1, Predicates = [HasAVX, HasAES] in {
+ def VAESKEYGENASSIST128rr : AESAI<0xDF, MRMSrcReg, (outs VR128:$dst),
+ (ins VR128:$src1, i8imm:$src2),
+ "vaeskeygenassist\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ [(set VR128:$dst,
+ (int_x86_aesni_aeskeygenassist VR128:$src1, imm:$src2))]>,
+ OpSize, VEX;
+ def VAESKEYGENASSIST128rm : AESAI<0xDF, MRMSrcMem, (outs VR128:$dst),
+ (ins i128mem:$src1, i8imm:$src2),
+ "vaeskeygenassist\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ [(set VR128:$dst,
+ (int_x86_aesni_aeskeygenassist (bitconvert (memopv2i64 addr:$src1)),
+ imm:$src2))]>,
+ OpSize, VEX;
+}
def AESKEYGENASSIST128rr : AESAI<0xDF, MRMSrcReg, (outs VR128:$dst),
(ins VR128:$src1, i8imm:$src2),
"aeskeygenassist\t{$src2, $src1, $dst|$dst, $src1, $src2}",
diff --git a/lib/Target/X86/X86MCCodeEmitter.cpp b/lib/Target/X86/X86MCCodeEmitter.cpp
index a9681e6670fc..633ddd49d74d 100644
--- a/lib/Target/X86/X86MCCodeEmitter.cpp
+++ b/lib/Target/X86/X86MCCodeEmitter.cpp
@@ -30,7 +30,7 @@ class X86MCCodeEmitter : public MCCodeEmitter {
MCContext &Ctx;
bool Is64BitMode;
public:
- X86MCCodeEmitter(TargetMachine &tm, MCContext &ctx, bool is64Bit)
+ X86MCCodeEmitter(TargetMachine &tm, MCContext &ctx, bool is64Bit)
: TM(tm), TII(*TM.getInstrInfo()), Ctx(ctx) {
Is64BitMode = is64Bit;
}
@@ -38,17 +38,18 @@ public:
~X86MCCodeEmitter() {}
unsigned getNumFixupKinds() const {
- return 4;
+ return 5;
}
const MCFixupKindInfo &getFixupKindInfo(MCFixupKind Kind) const {
const static MCFixupKindInfo Infos[] = {
{ "reloc_pcrel_4byte", 0, 4 * 8, MCFixupKindInfo::FKF_IsPCRel },
{ "reloc_pcrel_1byte", 0, 1 * 8, MCFixupKindInfo::FKF_IsPCRel },
+ { "reloc_pcrel_2byte", 0, 2 * 8, MCFixupKindInfo::FKF_IsPCRel },
{ "reloc_riprel_4byte", 0, 4 * 8, MCFixupKindInfo::FKF_IsPCRel },
{ "reloc_riprel_4byte_movq_load", 0, 4 * 8, MCFixupKindInfo::FKF_IsPCRel }
};
-
+
if (Kind < FirstTargetFixupKind)
return MCCodeEmitter::getFixupKindInfo(Kind);
@@ -56,16 +57,38 @@ public:
"Invalid kind!");
return Infos[Kind - FirstTargetFixupKind];
}
-
+
static unsigned GetX86RegNum(const MCOperand &MO) {
return X86RegisterInfo::getX86RegNum(MO.getReg());
}
-
+
+ // On regular x86, both XMM0-XMM7 and XMM8-XMM15 are encoded in the range
+ // 0-7 and the difference between the 2 groups is given by the REX prefix.
+ // In the VEX prefix, registers are seen sequencially from 0-15 and encoded
+ // in 1's complement form, example:
+ //
+ // ModRM field => XMM9 => 1
+ // VEX.VVVV => XMM9 => ~9
+ //
+ // See table 4-35 of Intel AVX Programming Reference for details.
+ static unsigned char getVEXRegisterEncoding(const MCInst &MI,
+ unsigned OpNum) {
+ unsigned SrcReg = MI.getOperand(OpNum).getReg();
+ unsigned SrcRegNum = GetX86RegNum(MI.getOperand(OpNum));
+ if ((SrcReg >= X86::XMM8 && SrcReg <= X86::XMM15) ||
+ (SrcReg >= X86::YMM8 && SrcReg <= X86::YMM15))
+ SrcRegNum += 8;
+
+ // The registers represented through VEX_VVVV should
+ // be encoded in 1's complement form.
+ return (~SrcRegNum) & 0xf;
+ }
+
void EmitByte(unsigned char C, unsigned &CurByte, raw_ostream &OS) const {
OS << (char)C;
++CurByte;
}
-
+
void EmitConstant(uint64_t Val, unsigned Size, unsigned &CurByte,
raw_ostream &OS) const {
// Output the constant in little endian byte order.
@@ -75,38 +98,49 @@ public:
}
}
- void EmitImmediate(const MCOperand &Disp,
+ void EmitImmediate(const MCOperand &Disp,
unsigned ImmSize, MCFixupKind FixupKind,
unsigned &CurByte, raw_ostream &OS,
SmallVectorImpl<MCFixup> &Fixups,
int ImmOffset = 0) const;
-
+
inline static unsigned char ModRMByte(unsigned Mod, unsigned RegOpcode,
unsigned RM) {
assert(Mod < 4 && RegOpcode < 8 && RM < 8 && "ModRM Fields out of range!");
return RM | (RegOpcode << 3) | (Mod << 6);
}
-
+
void EmitRegModRMByte(const MCOperand &ModRMReg, unsigned RegOpcodeFld,
unsigned &CurByte, raw_ostream &OS) const {
EmitByte(ModRMByte(3, RegOpcodeFld, GetX86RegNum(ModRMReg)), CurByte, OS);
}
-
+
void EmitSIBByte(unsigned SS, unsigned Index, unsigned Base,
unsigned &CurByte, raw_ostream &OS) const {
// SIB byte is in the same format as the ModRMByte.
EmitByte(ModRMByte(SS, Index, Base), CurByte, OS);
}
-
-
+
+
void EmitMemModRMByte(const MCInst &MI, unsigned Op,
- unsigned RegOpcodeField,
- unsigned TSFlags, unsigned &CurByte, raw_ostream &OS,
+ unsigned RegOpcodeField,
+ uint64_t TSFlags, unsigned &CurByte, raw_ostream &OS,
SmallVectorImpl<MCFixup> &Fixups) const;
-
+
void EncodeInstruction(const MCInst &MI, raw_ostream &OS,
SmallVectorImpl<MCFixup> &Fixups) const;
-
+
+ void EmitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte, int MemOperand,
+ const MCInst &MI, const TargetInstrDesc &Desc,
+ raw_ostream &OS) const;
+
+ void EmitSegmentOverridePrefix(uint64_t TSFlags, unsigned &CurByte,
+ int MemOperand, const MCInst &MI,
+ raw_ostream &OS) const;
+
+ void EmitOpcodePrefix(uint64_t TSFlags, unsigned &CurByte, int MemOperand,
+ const MCInst &MI, const TargetInstrDesc &Desc,
+ raw_ostream &OS) const;
};
} // end anonymous namespace
@@ -124,24 +158,23 @@ MCCodeEmitter *llvm::createX86_64MCCodeEmitter(const Target &,
return new X86MCCodeEmitter(TM, Ctx, true);
}
-
-/// isDisp8 - Return true if this signed displacement fits in a 8-bit
-/// sign-extended field.
+/// isDisp8 - Return true if this signed displacement fits in a 8-bit
+/// sign-extended field.
static bool isDisp8(int Value) {
return Value == (signed char)Value;
}
/// getImmFixupKind - Return the appropriate fixup kind to use for an immediate
/// in an instruction with the specified TSFlags.
-static MCFixupKind getImmFixupKind(unsigned TSFlags) {
+static MCFixupKind getImmFixupKind(uint64_t TSFlags) {
unsigned Size = X86II::getSizeOfImm(TSFlags);
bool isPCRel = X86II::isImmPCRel(TSFlags);
-
+
switch (Size) {
default: assert(0 && "Unknown immediate size");
case 1: return isPCRel ? MCFixupKind(X86::reloc_pcrel_1byte) : FK_Data_1;
+ case 2: return isPCRel ? MCFixupKind(X86::reloc_pcrel_2byte) : FK_Data_2;
case 4: return isPCRel ? MCFixupKind(X86::reloc_pcrel_4byte) : FK_Data_4;
- case 2: assert(!isPCRel); return FK_Data_2;
case 8: assert(!isPCRel); return FK_Data_8;
}
}
@@ -162,29 +195,30 @@ EmitImmediate(const MCOperand &DispOp, unsigned Size, MCFixupKind FixupKind,
// If we have an immoffset, add it to the expression.
const MCExpr *Expr = DispOp.getExpr();
-
+
// If the fixup is pc-relative, we need to bias the value to be relative to
// the start of the field, not the end of the field.
if (FixupKind == MCFixupKind(X86::reloc_pcrel_4byte) ||
FixupKind == MCFixupKind(X86::reloc_riprel_4byte) ||
FixupKind == MCFixupKind(X86::reloc_riprel_4byte_movq_load))
ImmOffset -= 4;
+ if (FixupKind == MCFixupKind(X86::reloc_pcrel_2byte))
+ ImmOffset -= 2;
if (FixupKind == MCFixupKind(X86::reloc_pcrel_1byte))
ImmOffset -= 1;
-
+
if (ImmOffset)
Expr = MCBinaryExpr::CreateAdd(Expr, MCConstantExpr::Create(ImmOffset, Ctx),
Ctx);
-
+
// Emit a symbolic constant as a fixup and 4 zeros.
Fixups.push_back(MCFixup::Create(CurByte, Expr, FixupKind));
EmitConstant(0, Size, CurByte, OS);
}
-
void X86MCCodeEmitter::EmitMemModRMByte(const MCInst &MI, unsigned Op,
unsigned RegOpcodeField,
- unsigned TSFlags, unsigned &CurByte,
+ uint64_t TSFlags, unsigned &CurByte,
raw_ostream &OS,
SmallVectorImpl<MCFixup> &Fixups) const{
const MCOperand &Disp = MI.getOperand(Op+3);
@@ -192,43 +226,43 @@ void X86MCCodeEmitter::EmitMemModRMByte(const MCInst &MI, unsigned Op,
const MCOperand &Scale = MI.getOperand(Op+1);
const MCOperand &IndexReg = MI.getOperand(Op+2);
unsigned BaseReg = Base.getReg();
-
+
// Handle %rip relative addressing.
if (BaseReg == X86::RIP) { // [disp32+RIP] in X86-64 mode
- assert(IndexReg.getReg() == 0 && Is64BitMode &&
- "Invalid rip-relative address");
+ assert(Is64BitMode && "Rip-relative addressing requires 64-bit mode");
+ assert(IndexReg.getReg() == 0 && "Invalid rip-relative address");
EmitByte(ModRMByte(0, RegOpcodeField, 5), CurByte, OS);
-
+
unsigned FixupKind = X86::reloc_riprel_4byte;
-
+
// movq loads are handled with a special relocation form which allows the
// linker to eliminate some loads for GOT references which end up in the
// same linkage unit.
if (MI.getOpcode() == X86::MOV64rm ||
MI.getOpcode() == X86::MOV64rm_TC)
FixupKind = X86::reloc_riprel_4byte_movq_load;
-
+
// rip-relative addressing is actually relative to the *next* instruction.
// Since an immediate can follow the mod/rm byte for an instruction, this
// means that we need to bias the immediate field of the instruction with
// the size of the immediate field. If we have this case, add it into the
// expression to emit.
int ImmSize = X86II::hasImm(TSFlags) ? X86II::getSizeOfImm(TSFlags) : 0;
-
+
EmitImmediate(Disp, 4, MCFixupKind(FixupKind),
CurByte, OS, Fixups, -ImmSize);
return;
}
-
+
unsigned BaseRegNo = BaseReg ? GetX86RegNum(Base) : -1U;
-
+
// Determine whether a SIB byte is needed.
- // If no BaseReg, issue a RIP relative instruction only if the MCE can
+ // If no BaseReg, issue a RIP relative instruction only if the MCE can
// resolve addresses on-the-fly, otherwise use SIB (Intel Manual 2A, table
// 2-7) and absolute references.
if (// The SIB byte must be used if there is an index register.
- IndexReg.getReg() == 0 &&
+ IndexReg.getReg() == 0 &&
// The SIB byte must be used if the base is ESP/RSP/R12, all of which
// encode to an R/M value of 4, which indicates that a SIB byte is
// present.
@@ -242,7 +276,7 @@ void X86MCCodeEmitter::EmitMemModRMByte(const MCInst &MI, unsigned Op,
EmitImmediate(Disp, 4, FK_Data_4, CurByte, OS, Fixups);
return;
}
-
+
// If the base is not EBP/ESP and there is no displacement, use simple
// indirect register encoding, this handles addresses like [EAX]. The
// encoding for [EBP] with no displacement means [disp32] so we handle it
@@ -251,24 +285,24 @@ void X86MCCodeEmitter::EmitMemModRMByte(const MCInst &MI, unsigned Op,
EmitByte(ModRMByte(0, RegOpcodeField, BaseRegNo), CurByte, OS);
return;
}
-
+
// Otherwise, if the displacement fits in a byte, encode as [REG+disp8].
if (Disp.isImm() && isDisp8(Disp.getImm())) {
EmitByte(ModRMByte(1, RegOpcodeField, BaseRegNo), CurByte, OS);
EmitImmediate(Disp, 1, FK_Data_1, CurByte, OS, Fixups);
return;
}
-
+
// Otherwise, emit the most general non-SIB encoding: [REG+disp32]
EmitByte(ModRMByte(2, RegOpcodeField, BaseRegNo), CurByte, OS);
EmitImmediate(Disp, 4, FK_Data_4, CurByte, OS, Fixups);
return;
}
-
+
// We need a SIB byte, so start by outputting the ModR/M byte first
assert(IndexReg.getReg() != X86::ESP &&
IndexReg.getReg() != X86::RSP && "Cannot use ESP as index reg!");
-
+
bool ForceDisp32 = false;
bool ForceDisp8 = false;
if (BaseReg == 0) {
@@ -294,13 +328,13 @@ void X86MCCodeEmitter::EmitMemModRMByte(const MCInst &MI, unsigned Op,
// Emit the normal disp32 encoding.
EmitByte(ModRMByte(2, RegOpcodeField, 4), CurByte, OS);
}
-
+
// Calculate what the SS field value should be...
static const unsigned SSTable[] = { ~0, 0, 1, ~0, 2, ~0, ~0, ~0, 3 };
unsigned SS = SSTable[Scale.getImm()];
-
+
if (BaseReg == 0) {
- // Handle the SIB byte for the case where there is no base, see Intel
+ // Handle the SIB byte for the case where there is no base, see Intel
// Manual 2A, table 2-7. The displacement has already been output.
unsigned IndexRegNo;
if (IndexReg.getReg())
@@ -316,7 +350,7 @@ void X86MCCodeEmitter::EmitMemModRMByte(const MCInst &MI, unsigned Op,
IndexRegNo = 4; // For example [ESP+1*<noreg>+4]
EmitSIBByte(SS, IndexRegNo, GetX86RegNum(Base), CurByte, OS);
}
-
+
// Do we need to output a displacement?
if (ForceDisp8)
EmitImmediate(Disp, 1, FK_Data_1, CurByte, OS, Fixups);
@@ -324,26 +358,216 @@ void X86MCCodeEmitter::EmitMemModRMByte(const MCInst &MI, unsigned Op,
EmitImmediate(Disp, 4, FK_Data_4, CurByte, OS, Fixups);
}
+/// EmitVEXOpcodePrefix - AVX instructions are encoded using a opcode prefix
+/// called VEX.
+void X86MCCodeEmitter::EmitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte,
+ int MemOperand, const MCInst &MI,
+ const TargetInstrDesc &Desc,
+ raw_ostream &OS) const {
+ bool HasVEX_4V = false;
+ if (TSFlags & X86II::VEX_4V)
+ HasVEX_4V = true;
+
+ // VEX_R: opcode externsion equivalent to REX.R in
+ // 1's complement (inverted) form
+ //
+ // 1: Same as REX_R=0 (must be 1 in 32-bit mode)
+ // 0: Same as REX_R=1 (64 bit mode only)
+ //
+ unsigned char VEX_R = 0x1;
+
+ // VEX_X: equivalent to REX.X, only used when a
+ // register is used for index in SIB Byte.
+ //
+ // 1: Same as REX.X=0 (must be 1 in 32-bit mode)
+ // 0: Same as REX.X=1 (64-bit mode only)
+ unsigned char VEX_X = 0x1;
+
+ // VEX_B:
+ //
+ // 1: Same as REX_B=0 (ignored in 32-bit mode)
+ // 0: Same as REX_B=1 (64 bit mode only)
+ //
+ unsigned char VEX_B = 0x1;
+
+ // VEX_W: opcode specific (use like REX.W, or used for
+ // opcode extension, or ignored, depending on the opcode byte)
+ unsigned char VEX_W = 0;
+
+ // VEX_5M (VEX m-mmmmm field):
+ //
+ // 0b00000: Reserved for future use
+ // 0b00001: implied 0F leading opcode
+ // 0b00010: implied 0F 38 leading opcode bytes
+ // 0b00011: implied 0F 3A leading opcode bytes
+ // 0b00100-0b11111: Reserved for future use
+ //
+ unsigned char VEX_5M = 0x1;
+
+ // VEX_4V (VEX vvvv field): a register specifier
+ // (in 1's complement form) or 1111 if unused.
+ unsigned char VEX_4V = 0xf;
+
+ // VEX_L (Vector Length):
+ //
+ // 0: scalar or 128-bit vector
+ // 1: 256-bit vector
+ //
+ unsigned char VEX_L = 0;
+
+ // VEX_PP: opcode extension providing equivalent
+ // functionality of a SIMD prefix
+ //
+ // 0b00: None
+ // 0b01: 66
+ // 0b10: F3
+ // 0b11: F2
+ //
+ unsigned char VEX_PP = 0;
+
+ // Encode the operand size opcode prefix as needed.
+ if (TSFlags & X86II::OpSize)
+ VEX_PP = 0x01;
+
+ if (TSFlags & X86II::VEX_W)
+ VEX_W = 1;
+
+ switch (TSFlags & X86II::Op0Mask) {
+ default: assert(0 && "Invalid prefix!");
+ case X86II::T8: // 0F 38
+ VEX_5M = 0x2;
+ break;
+ case X86II::TA: // 0F 3A
+ VEX_5M = 0x3;
+ break;
+ case X86II::TF: // F2 0F 38
+ VEX_PP = 0x3;
+ VEX_5M = 0x2;
+ break;
+ case X86II::XS: // F3 0F
+ VEX_PP = 0x2;
+ break;
+ case X86II::XD: // F2 0F
+ VEX_PP = 0x3;
+ break;
+ case X86II::TB: // Bypass: Not used by VEX
+ case 0:
+ break; // No prefix!
+ }
+
+ // Set the vector length to 256-bit if YMM0-YMM15 is used
+ for (unsigned i = 0; i != MI.getNumOperands(); ++i) {
+ if (!MI.getOperand(i).isReg())
+ continue;
+ unsigned SrcReg = MI.getOperand(i).getReg();
+ if (SrcReg >= X86::YMM0 && SrcReg <= X86::YMM15)
+ VEX_L = 1;
+ }
+
+ unsigned NumOps = MI.getNumOperands();
+ unsigned CurOp = 0;
+
+ switch (TSFlags & X86II::FormMask) {
+ case X86II::MRMInitReg: assert(0 && "FIXME: Remove this!");
+ case X86II::MRM0m: case X86II::MRM1m:
+ case X86II::MRM2m: case X86II::MRM3m:
+ case X86II::MRM4m: case X86II::MRM5m:
+ case X86II::MRM6m: case X86II::MRM7m:
+ case X86II::MRMDestMem:
+ NumOps = CurOp = X86::AddrNumOperands;
+ case X86II::MRMSrcMem:
+ case X86II::MRMSrcReg:
+ if (MI.getNumOperands() > CurOp && MI.getOperand(CurOp).isReg() &&
+ X86InstrInfo::isX86_64ExtendedReg(MI.getOperand(CurOp).getReg()))
+ VEX_R = 0x0;
+
+ // CurOp and NumOps are equal when VEX_R represents a register used
+ // to index a memory destination (which is the last operand)
+ CurOp = (CurOp == NumOps) ? 0 : CurOp+1;
+
+ if (HasVEX_4V) {
+ VEX_4V = getVEXRegisterEncoding(MI, CurOp);
+ CurOp++;
+ }
+
+ // If the last register should be encoded in the immediate field
+ // do not use any bit from VEX prefix to this register, ignore it
+ if (TSFlags & X86II::VEX_I8IMM)
+ NumOps--;
+
+ for (; CurOp != NumOps; ++CurOp) {
+ const MCOperand &MO = MI.getOperand(CurOp);
+ if (MO.isReg() && X86InstrInfo::isX86_64ExtendedReg(MO.getReg()))
+ VEX_B = 0x0;
+ if (!VEX_B && MO.isReg() &&
+ ((TSFlags & X86II::FormMask) == X86II::MRMSrcMem) &&
+ X86InstrInfo::isX86_64ExtendedReg(MO.getReg()))
+ VEX_X = 0x0;
+ }
+ break;
+ default: // MRMDestReg, MRM0r-MRM7r
+ if (MI.getOperand(CurOp).isReg() &&
+ X86InstrInfo::isX86_64ExtendedReg(MI.getOperand(CurOp).getReg()))
+ VEX_B = 0;
+
+ if (HasVEX_4V)
+ VEX_4V = getVEXRegisterEncoding(MI, CurOp);
+
+ CurOp++;
+ for (; CurOp != NumOps; ++CurOp) {
+ const MCOperand &MO = MI.getOperand(CurOp);
+ if (MO.isReg() && !HasVEX_4V &&
+ X86InstrInfo::isX86_64ExtendedReg(MO.getReg()))
+ VEX_R = 0x0;
+ }
+ break;
+ assert(0 && "Not implemented!");
+ }
+
+ // Emit segment override opcode prefix as needed.
+ EmitSegmentOverridePrefix(TSFlags, CurByte, MemOperand, MI, OS);
+
+ // VEX opcode prefix can have 2 or 3 bytes
+ //
+ // 3 bytes:
+ // +-----+ +--------------+ +-------------------+
+ // | C4h | | RXB | m-mmmm | | W | vvvv | L | pp |
+ // +-----+ +--------------+ +-------------------+
+ // 2 bytes:
+ // +-----+ +-------------------+
+ // | C5h | | R | vvvv | L | pp |
+ // +-----+ +-------------------+
+ //
+ unsigned char LastByte = VEX_PP | (VEX_L << 2) | (VEX_4V << 3);
+
+ if (VEX_B && VEX_X && !VEX_W && (VEX_5M == 1)) { // 2 byte VEX prefix
+ EmitByte(0xC5, CurByte, OS);
+ EmitByte(LastByte | (VEX_R << 7), CurByte, OS);
+ return;
+ }
+
+ // 3 byte VEX prefix
+ EmitByte(0xC4, CurByte, OS);
+ EmitByte(VEX_R << 7 | VEX_X << 6 | VEX_B << 5 | VEX_5M, CurByte, OS);
+ EmitByte(LastByte | (VEX_W << 7), CurByte, OS);
+}
+
/// DetermineREXPrefix - Determine if the MCInst has to be encoded with a X86-64
/// REX prefix which specifies 1) 64-bit instructions, 2) non-default operand
/// size, and 3) use of X86-64 extended registers.
-static unsigned DetermineREXPrefix(const MCInst &MI, unsigned TSFlags,
+static unsigned DetermineREXPrefix(const MCInst &MI, uint64_t TSFlags,
const TargetInstrDesc &Desc) {
- // Pseudo instructions never have a rex byte.
- if ((TSFlags & X86II::FormMask) == X86II::Pseudo)
- return 0;
-
unsigned REX = 0;
if (TSFlags & X86II::REX_W)
- REX |= 1 << 3;
-
+ REX |= 1 << 3; // set REX.W
+
if (MI.getNumOperands() == 0) return REX;
-
+
unsigned NumOps = MI.getNumOperands();
// FIXME: MCInst should explicitize the two-addrness.
bool isTwoAddr = NumOps > 1 &&
Desc.getOperandConstraint(1, TOI::TIED_TO) != -1;
-
+
// If it accesses SPL, BPL, SIL, or DIL, then it requires a 0x40 REX prefix.
unsigned i = isTwoAddr ? 1 : 0;
for (; i != NumOps; ++i) {
@@ -353,34 +577,34 @@ static unsigned DetermineREXPrefix(const MCInst &MI, unsigned TSFlags,
if (!X86InstrInfo::isX86_64NonExtLowByteReg(Reg)) continue;
// FIXME: The caller of DetermineREXPrefix slaps this prefix onto anything
// that returns non-zero.
- REX |= 0x40;
+ REX |= 0x40; // REX fixed encoding prefix
break;
}
-
+
switch (TSFlags & X86II::FormMask) {
case X86II::MRMInitReg: assert(0 && "FIXME: Remove this!");
case X86II::MRMSrcReg:
if (MI.getOperand(0).isReg() &&
X86InstrInfo::isX86_64ExtendedReg(MI.getOperand(0).getReg()))
- REX |= 1 << 2;
+ REX |= 1 << 2; // set REX.R
i = isTwoAddr ? 2 : 1;
for (; i != NumOps; ++i) {
const MCOperand &MO = MI.getOperand(i);
if (MO.isReg() && X86InstrInfo::isX86_64ExtendedReg(MO.getReg()))
- REX |= 1 << 0;
+ REX |= 1 << 0; // set REX.B
}
break;
case X86II::MRMSrcMem: {
if (MI.getOperand(0).isReg() &&
X86InstrInfo::isX86_64ExtendedReg(MI.getOperand(0).getReg()))
- REX |= 1 << 2;
+ REX |= 1 << 2; // set REX.R
unsigned Bit = 0;
i = isTwoAddr ? 2 : 1;
for (; i != NumOps; ++i) {
const MCOperand &MO = MI.getOperand(i);
if (MO.isReg()) {
if (X86InstrInfo::isX86_64ExtendedReg(MO.getReg()))
- REX |= 1 << Bit;
+ REX |= 1 << Bit; // set REX.B (Bit=0) and REX.X (Bit=1)
Bit++;
}
}
@@ -391,17 +615,17 @@ static unsigned DetermineREXPrefix(const MCInst &MI, unsigned TSFlags,
case X86II::MRM4m: case X86II::MRM5m:
case X86II::MRM6m: case X86II::MRM7m:
case X86II::MRMDestMem: {
- unsigned e = (isTwoAddr ? X86AddrNumOperands+1 : X86AddrNumOperands);
+ unsigned e = (isTwoAddr ? X86::AddrNumOperands+1 : X86::AddrNumOperands);
i = isTwoAddr ? 1 : 0;
if (NumOps > e && MI.getOperand(e).isReg() &&
X86InstrInfo::isX86_64ExtendedReg(MI.getOperand(e).getReg()))
- REX |= 1 << 2;
+ REX |= 1 << 2; // set REX.R
unsigned Bit = 0;
for (; i != e; ++i) {
const MCOperand &MO = MI.getOperand(i);
if (MO.isReg()) {
if (X86InstrInfo::isX86_64ExtendedReg(MO.getReg()))
- REX |= 1 << Bit;
+ REX |= 1 << Bit; // REX.B (Bit=0) and REX.X (Bit=1)
Bit++;
}
}
@@ -410,39 +634,40 @@ static unsigned DetermineREXPrefix(const MCInst &MI, unsigned TSFlags,
default:
if (MI.getOperand(0).isReg() &&
X86InstrInfo::isX86_64ExtendedReg(MI.getOperand(0).getReg()))
- REX |= 1 << 0;
+ REX |= 1 << 0; // set REX.B
i = isTwoAddr ? 2 : 1;
for (unsigned e = NumOps; i != e; ++i) {
const MCOperand &MO = MI.getOperand(i);
if (MO.isReg() && X86InstrInfo::isX86_64ExtendedReg(MO.getReg()))
- REX |= 1 << 2;
+ REX |= 1 << 2; // set REX.R
}
break;
}
return REX;
}
-void X86MCCodeEmitter::
-EncodeInstruction(const MCInst &MI, raw_ostream &OS,
- SmallVectorImpl<MCFixup> &Fixups) const {
- unsigned Opcode = MI.getOpcode();
- const TargetInstrDesc &Desc = TII.get(Opcode);
- unsigned TSFlags = Desc.TSFlags;
-
- // Keep track of the current byte being emitted.
- unsigned CurByte = 0;
-
- // FIXME: We should emit the prefixes in exactly the same order as GAS does,
- // in order to provide diffability.
-
- // Emit the lock opcode prefix as needed.
- if (TSFlags & X86II::LOCK)
- EmitByte(0xF0, CurByte, OS);
-
- // Emit segment override opcode prefix as needed.
+/// EmitSegmentOverridePrefix - Emit segment override opcode prefix as needed
+void X86MCCodeEmitter::EmitSegmentOverridePrefix(uint64_t TSFlags,
+ unsigned &CurByte, int MemOperand,
+ const MCInst &MI,
+ raw_ostream &OS) const {
switch (TSFlags & X86II::SegOvrMask) {
default: assert(0 && "Invalid segment!");
- case 0: break; // No segment override!
+ case 0:
+ // No segment override, check for explicit one on memory operand.
+ if (MemOperand != -1) { // If the instruction has a memory operand.
+ switch (MI.getOperand(MemOperand+X86::AddrSegmentReg).getReg()) {
+ default: assert(0 && "Unknown segment register!");
+ case 0: break;
+ case X86::CS: EmitByte(0x2E, CurByte, OS); break;
+ case X86::SS: EmitByte(0x36, CurByte, OS); break;
+ case X86::DS: EmitByte(0x3E, CurByte, OS); break;
+ case X86::ES: EmitByte(0x26, CurByte, OS); break;
+ case X86::FS: EmitByte(0x64, CurByte, OS); break;
+ case X86::GS: EmitByte(0x65, CurByte, OS); break;
+ }
+ }
+ break;
case X86II::FS:
EmitByte(0x64, CurByte, OS);
break;
@@ -450,19 +675,36 @@ EncodeInstruction(const MCInst &MI, raw_ostream &OS,
EmitByte(0x65, CurByte, OS);
break;
}
-
+}
+
+/// EmitOpcodePrefix - Emit all instruction prefixes prior to the opcode.
+///
+/// MemOperand is the operand # of the start of a memory operand if present. If
+/// Not present, it is -1.
+void X86MCCodeEmitter::EmitOpcodePrefix(uint64_t TSFlags, unsigned &CurByte,
+ int MemOperand, const MCInst &MI,
+ const TargetInstrDesc &Desc,
+ raw_ostream &OS) const {
+
+ // Emit the lock opcode prefix as needed.
+ if (TSFlags & X86II::LOCK)
+ EmitByte(0xF0, CurByte, OS);
+
+ // Emit segment override opcode prefix as needed.
+ EmitSegmentOverridePrefix(TSFlags, CurByte, MemOperand, MI, OS);
+
// Emit the repeat opcode prefix as needed.
if ((TSFlags & X86II::Op0Mask) == X86II::REP)
EmitByte(0xF3, CurByte, OS);
-
+
// Emit the operand size opcode prefix as needed.
if (TSFlags & X86II::OpSize)
EmitByte(0x66, CurByte, OS);
-
+
// Emit the address size opcode prefix as needed.
if (TSFlags & X86II::AdSize)
EmitByte(0x67, CurByte, OS);
-
+
bool Need0FPrefix = false;
switch (TSFlags & X86II::Op0Mask) {
default: assert(0 && "Invalid prefix!");
@@ -494,18 +736,18 @@ EncodeInstruction(const MCInst &MI, raw_ostream &OS,
case X86II::DE: EmitByte(0xDE, CurByte, OS); break;
case X86II::DF: EmitByte(0xDF, CurByte, OS); break;
}
-
+
// Handle REX prefix.
// FIXME: Can this come before F2 etc to simplify emission?
if (Is64BitMode) {
if (unsigned REX = DetermineREXPrefix(MI, TSFlags, Desc))
EmitByte(0x40 | REX, CurByte, OS);
}
-
+
// 0x0F escape code must be emitted just before the opcode.
if (Need0FPrefix)
EmitByte(0x0F, CurByte, OS);
-
+
// FIXME: Pull this up into previous switch if REX can be moved earlier.
switch (TSFlags & X86II::Op0Mask) {
case X86II::TF: // F2 0F 38
@@ -516,8 +758,21 @@ EncodeInstruction(const MCInst &MI, raw_ostream &OS,
EmitByte(0x3A, CurByte, OS);
break;
}
-
+}
+
+void X86MCCodeEmitter::
+EncodeInstruction(const MCInst &MI, raw_ostream &OS,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ unsigned Opcode = MI.getOpcode();
+ const TargetInstrDesc &Desc = TII.get(Opcode);
+ uint64_t TSFlags = Desc.TSFlags;
+
+ // Pseudo instructions don't get encoded.
+ if ((TSFlags & X86II::FormMask) == X86II::Pseudo)
+ return;
+
// If this is a two-address instruction, skip one of the register operands.
+ // FIXME: This should be handled during MCInst lowering.
unsigned NumOps = Desc.getNumOperands();
unsigned CurOp = 0;
if (NumOps > 1 && Desc.getOperandConstraint(1, TOI::TIED_TO) != -1)
@@ -525,56 +780,85 @@ EncodeInstruction(const MCInst &MI, raw_ostream &OS,
else if (NumOps > 2 && Desc.getOperandConstraint(NumOps-1, TOI::TIED_TO)== 0)
// Skip the last source operand that is tied_to the dest reg. e.g. LXADD32
--NumOps;
-
+
+ // Keep track of the current byte being emitted.
+ unsigned CurByte = 0;
+
+ // Is this instruction encoded using the AVX VEX prefix?
+ bool HasVEXPrefix = false;
+
+ // It uses the VEX.VVVV field?
+ bool HasVEX_4V = false;
+
+ if (TSFlags & X86II::VEX)
+ HasVEXPrefix = true;
+ if (TSFlags & X86II::VEX_4V)
+ HasVEX_4V = true;
+
+ // Determine where the memory operand starts, if present.
+ int MemoryOperand = X86II::getMemoryOperandNo(TSFlags);
+ if (MemoryOperand != -1) MemoryOperand += CurOp;
+
+ if (!HasVEXPrefix)
+ EmitOpcodePrefix(TSFlags, CurByte, MemoryOperand, MI, Desc, OS);
+ else
+ EmitVEXOpcodePrefix(TSFlags, CurByte, MemoryOperand, MI, Desc, OS);
+
unsigned char BaseOpcode = X86II::getBaseOpcodeFor(TSFlags);
+ unsigned SrcRegNum = 0;
switch (TSFlags & X86II::FormMask) {
case X86II::MRMInitReg:
assert(0 && "FIXME: Remove this form when the JIT moves to MCCodeEmitter!");
default: errs() << "FORM: " << (TSFlags & X86II::FormMask) << "\n";
assert(0 && "Unknown FormMask value in X86MCCodeEmitter!");
- case X86II::Pseudo: return; // Pseudo instructions encode to nothing.
+ case X86II::Pseudo:
+ assert(0 && "Pseudo instruction shouldn't be emitted");
case X86II::RawFrm:
EmitByte(BaseOpcode, CurByte, OS);
break;
-
+
case X86II::AddRegFrm:
EmitByte(BaseOpcode + GetX86RegNum(MI.getOperand(CurOp++)), CurByte, OS);
break;
-
+
case X86II::MRMDestReg:
EmitByte(BaseOpcode, CurByte, OS);
EmitRegModRMByte(MI.getOperand(CurOp),
GetX86RegNum(MI.getOperand(CurOp+1)), CurByte, OS);
CurOp += 2;
break;
-
+
case X86II::MRMDestMem:
EmitByte(BaseOpcode, CurByte, OS);
EmitMemModRMByte(MI, CurOp,
- GetX86RegNum(MI.getOperand(CurOp + X86AddrNumOperands)),
+ GetX86RegNum(MI.getOperand(CurOp + X86::AddrNumOperands)),
TSFlags, CurByte, OS, Fixups);
- CurOp += X86AddrNumOperands + 1;
+ CurOp += X86::AddrNumOperands + 1;
break;
-
+
case X86II::MRMSrcReg:
EmitByte(BaseOpcode, CurByte, OS);
- EmitRegModRMByte(MI.getOperand(CurOp+1), GetX86RegNum(MI.getOperand(CurOp)),
- CurByte, OS);
- CurOp += 2;
+ SrcRegNum = CurOp + 1;
+
+ if (HasVEX_4V) // Skip 1st src (which is encoded in VEX_VVVV)
+ SrcRegNum++;
+
+ EmitRegModRMByte(MI.getOperand(SrcRegNum),
+ GetX86RegNum(MI.getOperand(CurOp)), CurByte, OS);
+ CurOp = SrcRegNum + 1;
break;
-
+
case X86II::MRMSrcMem: {
+ int AddrOperands = X86::AddrNumOperands;
+ unsigned FirstMemOp = CurOp+1;
+ if (HasVEX_4V) {
+ ++AddrOperands;
+ ++FirstMemOp; // Skip the register source (which is encoded in VEX_VVVV).
+ }
+
EmitByte(BaseOpcode, CurByte, OS);
- // FIXME: Maybe lea should have its own form? This is a horrible hack.
- int AddrOperands;
- if (Opcode == X86::LEA64r || Opcode == X86::LEA64_32r ||
- Opcode == X86::LEA16r || Opcode == X86::LEA32r)
- AddrOperands = X86AddrNumOperands - 1; // No segment register
- else
- AddrOperands = X86AddrNumOperands;
-
- EmitMemModRMByte(MI, CurOp+1, GetX86RegNum(MI.getOperand(CurOp)),
+ EmitMemModRMByte(MI, FirstMemOp, GetX86RegNum(MI.getOperand(CurOp)),
TSFlags, CurByte, OS, Fixups);
CurOp += AddrOperands + 1;
break;
@@ -584,6 +868,8 @@ EncodeInstruction(const MCInst &MI, raw_ostream &OS,
case X86II::MRM2r: case X86II::MRM3r:
case X86II::MRM4r: case X86II::MRM5r:
case X86II::MRM6r: case X86II::MRM7r:
+ if (HasVEX_4V) // Skip the register dst (which is encoded in VEX_VVVV).
+ CurOp++;
EmitByte(BaseOpcode, CurByte, OS);
EmitRegModRMByte(MI.getOperand(CurOp++),
(TSFlags & X86II::FormMask)-X86II::MRM0r,
@@ -596,7 +882,7 @@ EncodeInstruction(const MCInst &MI, raw_ostream &OS,
EmitByte(BaseOpcode, CurByte, OS);
EmitMemModRMByte(MI, CurOp, (TSFlags & X86II::FormMask)-X86II::MRM0m,
TSFlags, CurByte, OS, Fixups);
- CurOp += X86AddrNumOperands;
+ CurOp += X86::AddrNumOperands;
break;
case X86II::MRM_C1:
EmitByte(BaseOpcode, CurByte, OS);
@@ -639,14 +925,27 @@ EncodeInstruction(const MCInst &MI, raw_ostream &OS,
EmitByte(0xF9, CurByte, OS);
break;
}
-
+
// If there is a remaining operand, it must be a trailing immediate. Emit it
// according to the right size for the instruction.
- if (CurOp != NumOps)
- EmitImmediate(MI.getOperand(CurOp++),
- X86II::getSizeOfImm(TSFlags), getImmFixupKind(TSFlags),
- CurByte, OS, Fixups);
-
+ if (CurOp != NumOps) {
+ // The last source register of a 4 operand instruction in AVX is encoded
+ // in bits[7:4] of a immediate byte, and bits[3:0] are ignored.
+ if (TSFlags & X86II::VEX_I8IMM) {
+ const MCOperand &MO = MI.getOperand(CurOp++);
+ bool IsExtReg =
+ X86InstrInfo::isX86_64ExtendedReg(MO.getReg());
+ unsigned RegNum = (IsExtReg ? (1 << 7) : 0);
+ RegNum |= GetX86RegNum(MO) << 4;
+ EmitImmediate(MCOperand::CreateImm(RegNum), 1, FK_Data_1, CurByte, OS,
+ Fixups);
+ } else
+ EmitImmediate(MI.getOperand(CurOp++),
+ X86II::getSizeOfImm(TSFlags), getImmFixupKind(TSFlags),
+ CurByte, OS, Fixups);
+ }
+
+
#ifndef NDEBUG
// FIXME: Verify.
if (/*!Desc.isVariadic() &&*/ CurOp != NumOps) {
diff --git a/lib/Target/X86/X86RegisterInfo.cpp b/lib/Target/X86/X86RegisterInfo.cpp
index 98975ea01bae..5f31e00ebabd 100644
--- a/lib/Target/X86/X86RegisterInfo.cpp
+++ b/lib/Target/X86/X86RegisterInfo.cpp
@@ -127,21 +127,29 @@ unsigned X86RegisterInfo::getX86RegNum(unsigned RegNo) {
case X86::ST4: case X86::ST5: case X86::ST6: case X86::ST7:
return RegNo-X86::ST0;
- case X86::XMM0: case X86::XMM8: case X86::MM0:
+ case X86::XMM0: case X86::XMM8:
+ case X86::YMM0: case X86::YMM8: case X86::MM0:
return 0;
- case X86::XMM1: case X86::XMM9: case X86::MM1:
+ case X86::XMM1: case X86::XMM9:
+ case X86::YMM1: case X86::YMM9: case X86::MM1:
return 1;
- case X86::XMM2: case X86::XMM10: case X86::MM2:
+ case X86::XMM2: case X86::XMM10:
+ case X86::YMM2: case X86::YMM10: case X86::MM2:
return 2;
- case X86::XMM3: case X86::XMM11: case X86::MM3:
+ case X86::XMM3: case X86::XMM11:
+ case X86::YMM3: case X86::YMM11: case X86::MM3:
return 3;
- case X86::XMM4: case X86::XMM12: case X86::MM4:
+ case X86::XMM4: case X86::XMM12:
+ case X86::YMM4: case X86::YMM12: case X86::MM4:
return 4;
- case X86::XMM5: case X86::XMM13: case X86::MM5:
+ case X86::XMM5: case X86::XMM13:
+ case X86::YMM5: case X86::YMM13: case X86::MM5:
return 5;
- case X86::XMM6: case X86::XMM14: case X86::MM6:
+ case X86::XMM6: case X86::XMM14:
+ case X86::YMM6: case X86::YMM14: case X86::MM6:
return 6;
- case X86::XMM7: case X86::XMM15: case X86::MM7:
+ case X86::XMM7: case X86::XMM15:
+ case X86::YMM7: case X86::YMM15: case X86::MM7:
return 7;
case X86::ES:
@@ -157,6 +165,34 @@ unsigned X86RegisterInfo::getX86RegNum(unsigned RegNo) {
case X86::GS:
return 5;
+ case X86::CR0:
+ return 0;
+ case X86::CR1:
+ return 1;
+ case X86::CR2:
+ return 2;
+ case X86::CR3:
+ return 3;
+ case X86::CR4:
+ return 4;
+
+ case X86::DR0:
+ return 0;
+ case X86::DR1:
+ return 1;
+ case X86::DR2:
+ return 2;
+ case X86::DR3:
+ return 3;
+ case X86::DR4:
+ return 4;
+ case X86::DR5:
+ return 5;
+ case X86::DR6:
+ return 6;
+ case X86::DR7:
+ return 7;
+
default:
assert(isVirtualRegister(RegNo) && "Unknown physical register!");
llvm_unreachable("Register allocator hasn't allocated reg correctly yet!");
@@ -357,56 +393,6 @@ X86RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
}
}
-const TargetRegisterClass* const*
-X86RegisterInfo::getCalleeSavedRegClasses(const MachineFunction *MF) const {
- bool callsEHReturn = false;
- if (MF)
- callsEHReturn = MF->getMMI().callsEHReturn();
-
- static const TargetRegisterClass * const CalleeSavedRegClasses32Bit[] = {
- &X86::GR32RegClass, &X86::GR32RegClass,
- &X86::GR32RegClass, &X86::GR32RegClass, 0
- };
- static const TargetRegisterClass * const CalleeSavedRegClasses32EHRet[] = {
- &X86::GR32RegClass, &X86::GR32RegClass,
- &X86::GR32RegClass, &X86::GR32RegClass,
- &X86::GR32RegClass, &X86::GR32RegClass, 0
- };
- static const TargetRegisterClass * const CalleeSavedRegClasses64Bit[] = {
- &X86::GR64RegClass, &X86::GR64RegClass,
- &X86::GR64RegClass, &X86::GR64RegClass,
- &X86::GR64RegClass, &X86::GR64RegClass, 0
- };
- static const TargetRegisterClass * const CalleeSavedRegClasses64EHRet[] = {
- &X86::GR64RegClass, &X86::GR64RegClass,
- &X86::GR64RegClass, &X86::GR64RegClass,
- &X86::GR64RegClass, &X86::GR64RegClass,
- &X86::GR64RegClass, &X86::GR64RegClass, 0
- };
- static const TargetRegisterClass * const CalleeSavedRegClassesWin64[] = {
- &X86::GR64RegClass, &X86::GR64RegClass,
- &X86::GR64RegClass, &X86::GR64RegClass,
- &X86::GR64RegClass, &X86::GR64RegClass,
- &X86::GR64RegClass, &X86::GR64RegClass,
- &X86::VR128RegClass, &X86::VR128RegClass,
- &X86::VR128RegClass, &X86::VR128RegClass,
- &X86::VR128RegClass, &X86::VR128RegClass,
- &X86::VR128RegClass, &X86::VR128RegClass,
- &X86::VR128RegClass, &X86::VR128RegClass, 0
- };
-
- if (Is64Bit) {
- if (IsWin64)
- return CalleeSavedRegClassesWin64;
- else
- return (callsEHReturn ?
- CalleeSavedRegClasses64EHRet : CalleeSavedRegClasses64Bit);
- } else {
- return (callsEHReturn ?
- CalleeSavedRegClasses32EHRet : CalleeSavedRegClasses32Bit);
- }
-}
-
BitVector X86RegisterInfo::getReservedRegs(const MachineFunction &MF) const {
BitVector Reserved(getNumRegs());
// Set the stack-pointer register and its aliases as reserved.
@@ -696,8 +682,7 @@ X86RegisterInfo::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
// }
// [EBP]
MFI->CreateFixedObject(-TailCallReturnAddrDelta,
- (-1U*SlotSize)+TailCallReturnAddrDelta,
- true, false);
+ (-1U*SlotSize)+TailCallReturnAddrDelta, true);
}
if (hasFP(MF)) {
@@ -710,7 +695,7 @@ X86RegisterInfo::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
-(int)SlotSize +
TFI.getOffsetOfLocalArea() +
TailCallReturnAddrDelta,
- true, false);
+ true);
assert(FrameIdx == MFI->getObjectIndexBegin() &&
"Slot for EBP register must be last in order to be found!");
FrameIdx = 0;
@@ -1240,8 +1225,8 @@ void X86RegisterInfo::emitEpilogue(MachineFunction &MF,
if (CSSize) {
unsigned Opc = Is64Bit ? X86::LEA64r : X86::LEA32r;
MachineInstr *MI =
- addLeaRegOffset(BuildMI(MF, DL, TII.get(Opc), StackPtr),
- FramePtr, false, -CSSize);
+ addRegOffset(BuildMI(MF, DL, TII.get(Opc), StackPtr),
+ FramePtr, false, -CSSize);
MBB.insert(MBBI, MI);
} else {
BuildMI(MBB, MBBI, DL,
@@ -1301,9 +1286,11 @@ void X86RegisterInfo::emitEpilogue(MachineFunction &MF,
for (unsigned i = 0; i != 5; ++i)
MIB.addOperand(MBBI->getOperand(i));
} else if (RetOpcode == X86::TCRETURNri64) {
- BuildMI(MBB, MBBI, DL, TII.get(X86::TAILJMPr64), JumpTarget.getReg());
+ BuildMI(MBB, MBBI, DL, TII.get(X86::TAILJMPr64)).
+ addReg(JumpTarget.getReg(), RegState::Kill);
} else {
- BuildMI(MBB, MBBI, DL, TII.get(X86::TAILJMPr), JumpTarget.getReg());
+ BuildMI(MBB, MBBI, DL, TII.get(X86::TAILJMPr)).
+ addReg(JumpTarget.getReg(), RegState::Kill);
}
MachineInstr *NewMI = prior(MBBI);
diff --git a/lib/Target/X86/X86RegisterInfo.h b/lib/Target/X86/X86RegisterInfo.h
index d0b82e2f1ce1..d852bcd2011c 100644
--- a/lib/Target/X86/X86RegisterInfo.h
+++ b/lib/Target/X86/X86RegisterInfo.h
@@ -105,12 +105,6 @@ public:
/// callee-save registers on this target.
const unsigned *getCalleeSavedRegs(const MachineFunction* MF = 0) const;
- /// getCalleeSavedRegClasses - Return a null-terminated list of the preferred
- /// register classes to spill each callee-saved register with. The order and
- /// length of this list match the getCalleeSavedRegs() list.
- const TargetRegisterClass* const*
- getCalleeSavedRegClasses(const MachineFunction *MF = 0) const;
-
/// getReservedRegs - Returns a bitset indexed by physical register number
/// indicating if a register is a special register that has particular uses and
/// should be considered unavailable at all times, e.g. SP, RA. This is used by
diff --git a/lib/Target/X86/X86RegisterInfo.td b/lib/Target/X86/X86RegisterInfo.td
index 91cfaa977eb5..9f0382e3fae9 100644
--- a/lib/Target/X86/X86RegisterInfo.td
+++ b/lib/Target/X86/X86RegisterInfo.td
@@ -147,7 +147,7 @@ let Namespace = "X86" in {
def MM5 : Register<"mm5">, DwarfRegNum<[46, 34, 34]>;
def MM6 : Register<"mm6">, DwarfRegNum<[47, 35, 35]>;
def MM7 : Register<"mm7">, DwarfRegNum<[48, 36, 36]>;
-
+
// Pseudo Floating Point registers
def FP0 : Register<"fp0">;
def FP1 : Register<"fp1">;
@@ -155,7 +155,7 @@ let Namespace = "X86" in {
def FP3 : Register<"fp3">;
def FP4 : Register<"fp4">;
def FP5 : Register<"fp5">;
- def FP6 : Register<"fp6">;
+ def FP6 : Register<"fp6">;
// XMM Registers, used by the various SSE instruction set extensions.
// The sub_ss and sub_sd subregs are the same registers with another regclass.
@@ -357,7 +357,7 @@ def GR16 : RegisterClass<"X86", [i16], 16,
}];
}
-def GR32 : RegisterClass<"X86", [i32], 32,
+def GR32 : RegisterClass<"X86", [i32], 32,
[EAX, ECX, EDX, ESI, EDI, EBX, EBP, ESP,
R8D, R9D, R10D, R11D, R14D, R15D, R12D, R13D]> {
let SubRegClasses = [(GR8 sub_8bit, sub_8bit_hi), (GR16 sub_16bit)];
@@ -412,7 +412,7 @@ def GR32 : RegisterClass<"X86", [i32], 32,
// GR64 - 64-bit GPRs. This oddly includes RIP, which isn't accurate, since
// RIP isn't really a register and it can't be used anywhere except in an
// address, but it doesn't cause trouble.
-def GR64 : RegisterClass<"X86", [i64], 64,
+def GR64 : RegisterClass<"X86", [i64], 64,
[RAX, RCX, RDX, RSI, RDI, R8, R9, R10, R11,
RBX, R14, R15, R12, R13, RBP, RSP, RIP]> {
let SubRegClasses = [(GR8 sub_8bit, sub_8bit_hi),
@@ -446,7 +446,7 @@ def SEGMENT_REG : RegisterClass<"X86", [i16], 16, [CS, DS, SS, ES, FS, GS]> {
}
// Debug registers.
-def DEBUG_REG : RegisterClass<"X86", [i32], 32,
+def DEBUG_REG : RegisterClass<"X86", [i32], 32,
[DR0, DR1, DR2, DR3, DR4, DR5, DR6, DR7]> {
}
@@ -780,14 +780,14 @@ def RST : RegisterClass<"X86", [f80, f64, f32], 32,
}
// Generic vector registers: VR64 and VR128.
-def VR64 : RegisterClass<"X86", [v8i8, v4i16, v2i32, v1i64, v2f32], 64,
+def VR64 : RegisterClass<"X86", [v8i8, v4i16, v2i32, v1i64], 64,
[MM0, MM1, MM2, MM3, MM4, MM5, MM6, MM7]>;
def VR128 : RegisterClass<"X86", [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],128,
[XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7,
XMM8, XMM9, XMM10, XMM11,
XMM12, XMM13, XMM14, XMM15]> {
let SubRegClasses = [(FR32 sub_ss), (FR64 sub_sd)];
-
+
let MethodProtos = [{
iterator allocation_order_end(const MachineFunction &MF) const;
}];
@@ -803,11 +803,27 @@ def VR128 : RegisterClass<"X86", [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],128,
}
}];
}
-def VR256 : RegisterClass<"X86", [ v8i32, v4i64, v8f32, v4f64],256,
+
+def VR256 : RegisterClass<"X86", [v8i32, v4i64, v8f32, v4f64], 256,
[YMM0, YMM1, YMM2, YMM3, YMM4, YMM5, YMM6, YMM7,
YMM8, YMM9, YMM10, YMM11,
YMM12, YMM13, YMM14, YMM15]> {
let SubRegClasses = [(FR32 sub_ss), (FR64 sub_sd), (VR128 sub_xmm)];
+
+ let MethodProtos = [{
+ iterator allocation_order_end(const MachineFunction &MF) const;
+ }];
+ let MethodBodies = [{
+ VR256Class::iterator
+ VR256Class::allocation_order_end(const MachineFunction &MF) const {
+ const TargetMachine &TM = MF.getTarget();
+ const X86Subtarget &Subtarget = TM.getSubtarget<X86Subtarget>();
+ if (!Subtarget.is64Bit())
+ return end()-8; // Only YMM0 to YMM7 are available in 32-bit mode.
+ else
+ return end();
+ }
+ }];
}
// Status flags registers.
diff --git a/lib/Target/X86/X86Subtarget.cpp b/lib/Target/X86/X86Subtarget.cpp
index 09a26858eb7e..4a10be518f03 100644
--- a/lib/Target/X86/X86Subtarget.cpp
+++ b/lib/Target/X86/X86Subtarget.cpp
@@ -53,9 +53,12 @@ ClassifyGlobalReference(const GlobalValue *GV, const TargetMachine &TM) const {
if (GV->hasDLLImportLinkage())
return X86II::MO_DLLIMPORT;
- // Materializable GVs (in JIT lazy compilation mode) do not require an
- // extra load from stub.
- bool isDecl = GV->isDeclaration() && !GV->isMaterializable();
+ // Determine whether this is a reference to a definition or a declaration.
+ // Materializable GVs (in JIT lazy compilation mode) do not require an extra
+ // load from stub.
+ bool isDecl = GV->hasAvailableExternallyLinkage();
+ if (GV->isDeclaration() && !GV->isMaterializable())
+ isDecl = true;
// X86-64 in PIC mode.
if (isPICStyleRIPRel()) {
@@ -293,12 +296,11 @@ X86Subtarget::X86Subtarget(const std::string &TT, const std::string &FS,
, IsBTMemSlow(false)
, IsUAMemFast(false)
, HasVectorUAMem(false)
- , DarwinVers(0)
, stackAlignment(8)
// FIXME: this is a known good value for Yonah. How about others?
, MaxInlineSizeThreshold(128)
- , Is64Bit(is64Bit)
- , TargetType(isELF) { // Default to ELF unless otherwise specified.
+ , TargetTriple(TT)
+ , Is64Bit(is64Bit) {
// default to hard float ABI
if (FloatABIType == FloatABI::Default)
@@ -328,47 +330,40 @@ X86Subtarget::X86Subtarget(const std::string &TT, const std::string &FS,
HasCMov = true;
}
-
DEBUG(dbgs() << "Subtarget features: SSELevel " << X86SSELevel
<< ", 3DNowLevel " << X863DNowLevel
<< ", 64bit " << HasX86_64 << "\n");
assert((!Is64Bit || HasX86_64) &&
"64-bit code requested on a subtarget that doesn't support it!");
- // Set the boolean corresponding to the current target triple, or the default
- // if one cannot be determined, to true.
- if (TT.length() > 5) {
- size_t Pos;
- if ((Pos = TT.find("-darwin")) != std::string::npos) {
- TargetType = isDarwin;
-
- // Compute the darwin version number.
- if (isdigit(TT[Pos+7]))
- DarwinVers = atoi(&TT[Pos+7]);
- else
- DarwinVers = 8; // Minimum supported darwin is Tiger.
- } else if (TT.find("linux") != std::string::npos) {
- // Linux doesn't imply ELF, but we don't currently support anything else.
- TargetType = isELF;
- } else if (TT.find("cygwin") != std::string::npos) {
- TargetType = isCygwin;
- } else if (TT.find("mingw") != std::string::npos) {
- TargetType = isMingw;
- } else if (TT.find("win32") != std::string::npos) {
- TargetType = isWindows;
- } else if (TT.find("windows") != std::string::npos) {
- TargetType = isWindows;
- } else if (TT.find("-cl") != std::string::npos) {
- TargetType = isDarwin;
- DarwinVers = 9;
- }
- }
-
// Stack alignment is 16 bytes on Darwin (both 32 and 64 bit) and for all 64
// bit targets.
- if (TargetType == isDarwin || Is64Bit)
+ if (isTargetDarwin() || Is64Bit)
stackAlignment = 16;
if (StackAlignment)
stackAlignment = StackAlignment;
}
+
+/// IsCalleePop - Determines whether the callee is required to pop its
+/// own arguments. Callee pop is necessary to support tail calls.
+bool X86Subtarget::IsCalleePop(bool IsVarArg,
+ CallingConv::ID CallingConv) const {
+ if (IsVarArg)
+ return false;
+
+ switch (CallingConv) {
+ default:
+ return false;
+ case CallingConv::X86_StdCall:
+ return !is64Bit();
+ case CallingConv::X86_FastCall:
+ return !is64Bit();
+ case CallingConv::X86_ThisCall:
+ return !is64Bit();
+ case CallingConv::Fast:
+ return GuaranteedTailCallOpt;
+ case CallingConv::GHC:
+ return GuaranteedTailCallOpt;
+ }
+}
diff --git a/lib/Target/X86/X86Subtarget.h b/lib/Target/X86/X86Subtarget.h
index 646af91517fe..486dbc4e2e90 100644
--- a/lib/Target/X86/X86Subtarget.h
+++ b/lib/Target/X86/X86Subtarget.h
@@ -14,7 +14,9 @@
#ifndef X86SUBTARGET_H
#define X86SUBTARGET_H
+#include "llvm/ADT/Triple.h"
#include "llvm/Target/TargetSubtarget.h"
+#include "llvm/CallingConv.h"
#include <string>
namespace llvm {
@@ -88,10 +90,6 @@ protected:
/// operands. This may require setting a feature bit in the processor.
bool HasVectorUAMem;
- /// DarwinVers - Nonzero if this is a darwin platform: the numeric
- /// version of the platform, e.g. 8 = 10.4 (Tiger), 9 = 10.5 (Leopard), etc.
- unsigned char DarwinVers; // Is any darwin-x86 platform.
-
/// stackAlignment - The minimum alignment known to hold of the stack frame on
/// entry to the function and which must be maintained by every function.
unsigned stackAlignment;
@@ -99,6 +97,9 @@ protected:
/// Max. memset / memcpy size that is turned into rep/movs, rep/stos ops.
///
unsigned MaxInlineSizeThreshold;
+
+ /// TargetTriple - What processor and OS we're targeting.
+ Triple TargetTriple;
private:
/// Is64Bit - True if the processor supports 64-bit instructions and
@@ -106,9 +107,6 @@ private:
bool Is64Bit;
public:
- enum {
- isELF, isCygwin, isDarwin, isWindows, isMingw
- } TargetType;
/// This constructor initializes the data members to match that
/// of the specified triple.
@@ -157,24 +155,31 @@ public:
bool isUnalignedMemAccessFast() const { return IsUAMemFast; }
bool hasVectorUAMem() const { return HasVectorUAMem; }
- bool isTargetDarwin() const { return TargetType == isDarwin; }
- bool isTargetELF() const { return TargetType == isELF; }
+ bool isTargetDarwin() const { return TargetTriple.getOS() == Triple::Darwin; }
+
+ // ELF is a reasonably sane default and the only other X86 targets we
+ // support are Darwin and Windows. Just use "not those".
+ bool isTargetELF() const {
+ return !isTargetDarwin() && !isTargetWindows() && !isTargetCygMing();
+ }
+ bool isTargetLinux() const { return TargetTriple.getOS() == Triple::Linux; }
- bool isTargetWindows() const { return TargetType == isWindows; }
- bool isTargetMingw() const { return TargetType == isMingw; }
- bool isTargetCygwin() const { return TargetType == isCygwin; }
+ bool isTargetWindows() const { return TargetTriple.getOS() == Triple::Win32; }
+ bool isTargetMingw() const {
+ return TargetTriple.getOS() == Triple::MinGW32 ||
+ TargetTriple.getOS() == Triple::MinGW64; }
+ bool isTargetCygwin() const { return TargetTriple.getOS() == Triple::Cygwin; }
bool isTargetCygMing() const {
- return TargetType == isMingw || TargetType == isCygwin;
+ return isTargetMingw() || isTargetCygwin();
}
-
+
/// isTargetCOFF - Return true if this is any COFF/Windows target variant.
bool isTargetCOFF() const {
- return TargetType == isMingw || TargetType == isCygwin ||
- TargetType == isWindows;
+ return isTargetMingw() || isTargetCygwin() || isTargetWindows();
}
bool isTargetWin64() const {
- return Is64Bit && (TargetType == isMingw || TargetType == isWindows);
+ return Is64Bit && (isTargetMingw() || isTargetWindows());
}
std::string getDataLayout() const {
@@ -208,7 +213,10 @@ public:
/// getDarwinVers - Return the darwin version number, 8 = Tiger, 9 = Leopard,
/// 10 = Snow Leopard, etc.
- unsigned getDarwinVers() const { return DarwinVers; }
+ unsigned getDarwinVers() const {
+ if (isTargetDarwin()) return TargetTriple.getDarwinMajorNumber();
+ return 0;
+ }
/// ClassifyGlobalReference - Classify a global variable reference for the
/// current subtarget according to how we should reference it in a non-pcrel
@@ -237,6 +245,9 @@ public:
/// indicating the number of scheduling cycles of backscheduling that
/// should be attempted.
unsigned getSpecialAddressLatency() const;
+
+ /// IsCalleePop - Test whether a function should pop its own arguments.
+ bool IsCalleePop(bool isVarArg, CallingConv::ID CallConv) const;
};
} // End llvm namespace
diff --git a/lib/Target/X86/X86TargetMachine.cpp b/lib/Target/X86/X86TargetMachine.cpp
index f2c50583c95f..df00d3ffcc79 100644
--- a/lib/Target/X86/X86TargetMachine.cpp
+++ b/lib/Target/X86/X86TargetMachine.cpp
@@ -173,14 +173,18 @@ bool X86TargetMachine::addInstSelector(PassManagerBase &PM,
// Install an instruction selector.
PM.add(createX86ISelDag(*this, OptLevel));
- // Install a pass to insert x87 FP_REG_KILL instructions, as needed.
- PM.add(createX87FPRegKillInserterPass());
+ // For 32-bit, prepend instructions to set the "global base reg" for PIC.
+ if (!Subtarget.is64Bit())
+ PM.add(createGlobalBaseRegPass());
return false;
}
bool X86TargetMachine::addPreRegAlloc(PassManagerBase &PM,
CodeGenOpt::Level OptLevel) {
+ // Install a pass to insert x87 FP_REG_KILL instructions, as needed.
+ PM.add(createX87FPRegKillInserterPass());
+
PM.add(createX86MaxStackAlignmentHeuristicPass());
return false; // -print-machineinstr shouldn't print after this.
}
diff --git a/lib/Target/XCore/AsmPrinter/XCoreAsmPrinter.cpp b/lib/Target/XCore/AsmPrinter/XCoreAsmPrinter.cpp
index c100c590135e..6656bdc10eae 100644
--- a/lib/Target/XCore/AsmPrinter/XCoreAsmPrinter.cpp
+++ b/lib/Target/XCore/AsmPrinter/XCoreAsmPrinter.cpp
@@ -138,7 +138,6 @@ void XCoreAsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) {
// FALL THROUGH
case GlobalValue::InternalLinkage:
case GlobalValue::PrivateLinkage:
- case GlobalValue::LinkerPrivateLinkage:
break;
case GlobalValue::DLLImportLinkage:
llvm_unreachable("DLLImport linkage is not supported by this target!");
diff --git a/lib/Target/XCore/XCoreISelLowering.cpp b/lib/Target/XCore/XCoreISelLowering.cpp
index b23057226c99..abe7b2fd42be 100644
--- a/lib/Target/XCore/XCoreISelLowering.cpp
+++ b/lib/Target/XCore/XCoreISelLowering.cpp
@@ -245,7 +245,7 @@ SDValue XCoreTargetLowering::
LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const
{
const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
- SDValue GA = DAG.getTargetGlobalAddress(GV, MVT::i32);
+ SDValue GA = DAG.getTargetGlobalAddress(GV, Op.getDebugLoc(), MVT::i32);
// If it's a debug information descriptor, don't mess with it.
if (DAG.isVerifiedDebugInfoDesc(Op))
return GA;
@@ -269,7 +269,7 @@ LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const
DebugLoc dl = Op.getDebugLoc();
// transform to label + getid() * size
const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
- SDValue GA = DAG.getTargetGlobalAddress(GV, MVT::i32);
+ SDValue GA = DAG.getTargetGlobalAddress(GV, dl, MVT::i32);
const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV);
if (!GVar) {
// If GV is an alias then use the aliasee to determine size
@@ -454,12 +454,12 @@ LowerLOAD(SDValue Op, SelectionDAG &DAG) const
if (LD->getAlignment() == 2) {
int SVOffset = LD->getSrcValueOffset();
- SDValue Low = DAG.getExtLoad(ISD::ZEXTLOAD, dl, MVT::i32, Chain,
+ SDValue Low = DAG.getExtLoad(ISD::ZEXTLOAD, MVT::i32, dl, Chain,
BasePtr, LD->getSrcValue(), SVOffset, MVT::i16,
LD->isVolatile(), LD->isNonTemporal(), 2);
SDValue HighAddr = DAG.getNode(ISD::ADD, dl, MVT::i32, BasePtr,
DAG.getConstant(2, MVT::i32));
- SDValue High = DAG.getExtLoad(ISD::EXTLOAD, dl, MVT::i32, Chain,
+ SDValue High = DAG.getExtLoad(ISD::EXTLOAD, MVT::i32, dl, Chain,
HighAddr, LD->getSrcValue(), SVOffset + 2,
MVT::i16, LD->isVolatile(),
LD->isNonTemporal(), 2);
@@ -812,6 +812,7 @@ XCoreTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
CallingConv::ID CallConv, bool isVarArg,
bool &isTailCall,
const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
const SmallVectorImpl<ISD::InputArg> &Ins,
DebugLoc dl, SelectionDAG &DAG,
SmallVectorImpl<SDValue> &InVals) const {
@@ -826,7 +827,7 @@ XCoreTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
case CallingConv::Fast:
case CallingConv::C:
return LowerCCCCallTo(Chain, Callee, CallConv, isVarArg, isTailCall,
- Outs, Ins, dl, DAG, InVals);
+ Outs, OutVals, Ins, dl, DAG, InVals);
}
}
@@ -839,6 +840,7 @@ XCoreTargetLowering::LowerCCCCallTo(SDValue Chain, SDValue Callee,
CallingConv::ID CallConv, bool isVarArg,
bool isTailCall,
const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
const SmallVectorImpl<ISD::InputArg> &Ins,
DebugLoc dl, SelectionDAG &DAG,
SmallVectorImpl<SDValue> &InVals) const {
@@ -866,7 +868,7 @@ XCoreTargetLowering::LowerCCCCallTo(SDValue Chain, SDValue Callee,
// Walk the register/memloc assignments, inserting copies/loads.
for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
CCValAssign &VA = ArgLocs[i];
- SDValue Arg = Outs[i].Val;
+ SDValue Arg = OutVals[i];
// Promote the value if needed.
switch (VA.getLocInfo()) {
@@ -919,7 +921,7 @@ XCoreTargetLowering::LowerCCCCallTo(SDValue Chain, SDValue Callee,
// turn it into a TargetGlobalAddress node so that legalize doesn't hack it.
// Likewise ExternalSymbol -> TargetExternalSymbol.
if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee))
- Callee = DAG.getTargetGlobalAddress(G->getGlobal(), MVT::i32);
+ Callee = DAG.getTargetGlobalAddress(G->getGlobal(), dl, MVT::i32);
else if (ExternalSymbolSDNode *E = dyn_cast<ExternalSymbolSDNode>(Callee))
Callee = DAG.getTargetExternalSymbol(E->getSymbol(), MVT::i32);
@@ -1072,7 +1074,7 @@ XCoreTargetLowering::LowerCCCArguments(SDValue Chain,
// Create the frame index object for this incoming parameter...
int FI = MFI->CreateFixedObject(ObjSize,
LRSaveSize + VA.getLocMemOffset(),
- true, false);
+ true);
// Create the SelectionDAG nodes corresponding to a load
//from this parameter
@@ -1097,7 +1099,7 @@ XCoreTargetLowering::LowerCCCArguments(SDValue Chain,
// address
for (unsigned i = array_lengthof(ArgRegs) - 1; i >= FirstVAReg; --i) {
// Create a stack slot
- int FI = MFI->CreateFixedObject(4, offset, true, false);
+ int FI = MFI->CreateFixedObject(4, offset, true);
if (i == FirstVAReg) {
XFI->setVarArgsFrameIndex(FI);
}
@@ -1120,7 +1122,7 @@ XCoreTargetLowering::LowerCCCArguments(SDValue Chain,
// This will point to the next argument passed via stack.
XFI->setVarArgsFrameIndex(
MFI->CreateFixedObject(4, LRSaveSize + CCInfo.getNextStackOffset(),
- true, false));
+ true));
}
}
@@ -1133,19 +1135,19 @@ XCoreTargetLowering::LowerCCCArguments(SDValue Chain,
bool XCoreTargetLowering::
CanLowerReturn(CallingConv::ID CallConv, bool isVarArg,
- const SmallVectorImpl<EVT> &OutTys,
- const SmallVectorImpl<ISD::ArgFlagsTy> &ArgsFlags,
- SelectionDAG &DAG) const {
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ LLVMContext &Context) const {
SmallVector<CCValAssign, 16> RVLocs;
CCState CCInfo(CallConv, isVarArg, getTargetMachine(),
- RVLocs, *DAG.getContext());
- return CCInfo.CheckReturn(OutTys, ArgsFlags, RetCC_XCore);
+ RVLocs, Context);
+ return CCInfo.CheckReturn(Outs, RetCC_XCore);
}
SDValue
XCoreTargetLowering::LowerReturn(SDValue Chain,
CallingConv::ID CallConv, bool isVarArg,
const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
DebugLoc dl, SelectionDAG &DAG) const {
// CCValAssign - represent the assignment of
@@ -1175,7 +1177,7 @@ XCoreTargetLowering::LowerReturn(SDValue Chain,
assert(VA.isRegLoc() && "Can only return in registers!");
Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
- Outs[i].Val, Flag);
+ OutVals[i], Flag);
// guarantee that all emitted copies are
// stuck together, avoiding something bad
@@ -1221,23 +1223,22 @@ XCoreTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
MachineFunction *F = BB->getParent();
MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB);
MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
- BuildMI(BB, dl, TII.get(XCore::BRFT_lru6))
- .addReg(MI->getOperand(1).getReg()).addMBB(sinkMBB);
F->insert(It, copy0MBB);
F->insert(It, sinkMBB);
- // Update machine-CFG edges by first adding all successors of the current
- // block to the new block which will contain the Phi node for the select.
- for (MachineBasicBlock::succ_iterator I = BB->succ_begin(),
- E = BB->succ_end(); I != E; ++I)
- sinkMBB->addSuccessor(*I);
- // Next, remove all successors of the current block, and add the true
- // and fallthrough blocks as its successors.
- while (!BB->succ_empty())
- BB->removeSuccessor(BB->succ_begin());
+
+ // Transfer the remainder of BB and its successor edges to sinkMBB.
+ sinkMBB->splice(sinkMBB->begin(), BB,
+ llvm::next(MachineBasicBlock::iterator(MI)),
+ BB->end());
+ sinkMBB->transferSuccessorsAndUpdatePHIs(BB);
+
// Next, add the true and fallthrough blocks as its successors.
BB->addSuccessor(copy0MBB);
BB->addSuccessor(sinkMBB);
+ BuildMI(BB, dl, TII.get(XCore::BRFT_lru6))
+ .addReg(MI->getOperand(1).getReg()).addMBB(sinkMBB);
+
// copy0MBB:
// %FalseValue = ...
// # fallthrough to sinkMBB
@@ -1250,11 +1251,12 @@ XCoreTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
// %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ]
// ...
BB = sinkMBB;
- BuildMI(BB, dl, TII.get(XCore::PHI), MI->getOperand(0).getReg())
+ BuildMI(*BB, BB->begin(), dl,
+ TII.get(XCore::PHI), MI->getOperand(0).getReg())
.addReg(MI->getOperand(3).getReg()).addMBB(copy0MBB)
.addReg(MI->getOperand(2).getReg()).addMBB(thisMBB);
- F->DeleteMachineInstr(MI); // The pseudo instruction is gone now.
+ MI->eraseFromParent(); // The pseudo instruction is gone now.
return BB;
}
@@ -1379,7 +1381,6 @@ SDValue XCoreTargetLowering::PerformDAGCombine(SDNode *N,
SDValue Mul0, Mul1, Addend0, Addend1;
if (N->getValueType(0) == MVT::i32 &&
isADDADDMUL(SDValue(N, 0), Mul0, Mul1, Addend0, Addend1, true)) {
- SDValue Zero = DAG.getConstant(0, MVT::i32);
SDValue Ignored = DAG.getNode(XCoreISD::LMUL, dl,
DAG.getVTList(MVT::i32, MVT::i32), Mul0,
Mul1, Addend0, Addend1);
diff --git a/lib/Target/XCore/XCoreISelLowering.h b/lib/Target/XCore/XCoreISelLowering.h
index d8d2a3aa7315..febc198f4faf 100644
--- a/lib/Target/XCore/XCoreISelLowering.h
+++ b/lib/Target/XCore/XCoreISelLowering.h
@@ -120,6 +120,7 @@ namespace llvm {
CallingConv::ID CallConv, bool isVarArg,
bool isTailCall,
const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
const SmallVectorImpl<ISD::InputArg> &Ins,
DebugLoc dl, SelectionDAG &DAG,
SmallVectorImpl<SDValue> &InVals) const;
@@ -178,6 +179,7 @@ namespace llvm {
CallingConv::ID CallConv, bool isVarArg,
bool &isTailCall,
const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
const SmallVectorImpl<ISD::InputArg> &Ins,
DebugLoc dl, SelectionDAG &DAG,
SmallVectorImpl<SDValue> &InVals) const;
@@ -186,13 +188,13 @@ namespace llvm {
LowerReturn(SDValue Chain,
CallingConv::ID CallConv, bool isVarArg,
const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
DebugLoc dl, SelectionDAG &DAG) const;
virtual bool
CanLowerReturn(CallingConv::ID CallConv, bool isVarArg,
- const SmallVectorImpl<EVT> &OutTys,
- const SmallVectorImpl<ISD::ArgFlagsTy> &ArgsFlags,
- SelectionDAG &DAG) const;
+ const SmallVectorImpl<ISD::OutputArg> &ArgsFlags,
+ LLVMContext &Context) const;
};
}
diff --git a/lib/Target/XCore/XCoreInstrInfo.cpp b/lib/Target/XCore/XCoreInstrInfo.cpp
index 5260258d6b7e..dd90ea976770 100644
--- a/lib/Target/XCore/XCoreInstrInfo.cpp
+++ b/lib/Target/XCore/XCoreInstrInfo.cpp
@@ -299,9 +299,8 @@ XCoreInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB,
unsigned
XCoreInstrInfo::InsertBranch(MachineBasicBlock &MBB,MachineBasicBlock *TBB,
MachineBasicBlock *FBB,
- const SmallVectorImpl<MachineOperand> &Cond)const{
- // FIXME there should probably be a DebugLoc argument here
- DebugLoc dl;
+ const SmallVectorImpl<MachineOperand> &Cond,
+ DebugLoc DL)const{
// Shouldn't be a fall through.
assert(TBB && "InsertBranch must not be told to insert a fallthrough");
assert((Cond.size() == 2 || Cond.size() == 0) &&
@@ -310,11 +309,11 @@ XCoreInstrInfo::InsertBranch(MachineBasicBlock &MBB,MachineBasicBlock *TBB,
if (FBB == 0) { // One way branch.
if (Cond.empty()) {
// Unconditional branch
- BuildMI(&MBB, dl, get(XCore::BRFU_lu6)).addMBB(TBB);
+ BuildMI(&MBB, DL, get(XCore::BRFU_lu6)).addMBB(TBB);
} else {
// Conditional branch.
unsigned Opc = GetCondBranchFromCond((XCore::CondCode)Cond[0].getImm());
- BuildMI(&MBB, dl, get(Opc)).addReg(Cond[1].getReg())
+ BuildMI(&MBB, DL, get(Opc)).addReg(Cond[1].getReg())
.addMBB(TBB);
}
return 1;
@@ -323,9 +322,9 @@ XCoreInstrInfo::InsertBranch(MachineBasicBlock &MBB,MachineBasicBlock *TBB,
// Two-way Conditional branch.
assert(Cond.size() == 2 && "Unexpected number of components!");
unsigned Opc = GetCondBranchFromCond((XCore::CondCode)Cond[0].getImm());
- BuildMI(&MBB, dl, get(Opc)).addReg(Cond[1].getReg())
+ BuildMI(&MBB, DL, get(Opc)).addReg(Cond[1].getReg())
.addMBB(TBB);
- BuildMI(&MBB, dl, get(XCore::BRFU_lu6)).addMBB(FBB);
+ BuildMI(&MBB, DL, get(XCore::BRFU_lu6)).addMBB(FBB);
return 2;
}
@@ -357,37 +356,31 @@ XCoreInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
return 2;
}
-bool XCoreInstrInfo::copyRegToReg(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator I,
- unsigned DestReg, unsigned SrcReg,
- const TargetRegisterClass *DestRC,
- const TargetRegisterClass *SrcRC,
- DebugLoc DL) const {
-
- if (DestRC == SrcRC) {
- if (DestRC == XCore::GRRegsRegisterClass) {
- BuildMI(MBB, I, DL, get(XCore::ADD_2rus), DestReg)
- .addReg(SrcReg)
- .addImm(0);
- return true;
- } else {
- return false;
- }
+void XCoreInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I, DebugLoc DL,
+ unsigned DestReg, unsigned SrcReg,
+ bool KillSrc) const {
+ bool GRDest = XCore::GRRegsRegClass.contains(DestReg);
+ bool GRSrc = XCore::GRRegsRegClass.contains(SrcReg);
+
+ if (GRDest && GRSrc) {
+ BuildMI(MBB, I, DL, get(XCore::ADD_2rus), DestReg)
+ .addReg(SrcReg, getKillRegState(KillSrc))
+ .addImm(0);
+ return;
}
- if (SrcRC == XCore::RRegsRegisterClass && SrcReg == XCore::SP &&
- DestRC == XCore::GRRegsRegisterClass) {
- BuildMI(MBB, I, DL, get(XCore::LDAWSP_ru6), DestReg)
- .addImm(0);
- return true;
+ if (GRDest && SrcReg == XCore::SP) {
+ BuildMI(MBB, I, DL, get(XCore::LDAWSP_ru6), DestReg).addImm(0);
+ return;
}
- if (DestRC == XCore::RRegsRegisterClass && DestReg == XCore::SP &&
- SrcRC == XCore::GRRegsRegisterClass) {
+
+ if (DestReg == XCore::SP && GRSrc) {
BuildMI(MBB, I, DL, get(XCore::SETSP_1r))
- .addReg(SrcReg);
- return true;
+ .addReg(SrcReg, getKillRegState(KillSrc));
+ return;
}
- return false;
+ llvm_unreachable("Impossible reg-to-reg copy");
}
void XCoreInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
@@ -438,8 +431,10 @@ bool XCoreInstrInfo::spillCalleeSavedRegisters(MachineBasicBlock &MBB,
// Add the callee-saved register as live-in. It's killed at the spill.
MBB.addLiveIn(it->getReg());
- storeRegToStackSlot(MBB, MI, it->getReg(), true,
- it->getFrameIdx(), it->getRegClass(), &RI);
+ unsigned Reg = it->getReg();
+ const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
+ storeRegToStackSlot(MBB, MI, Reg, true,
+ it->getFrameIdx(), RC, &RI);
if (emitFrameMoves) {
MCSymbol *SaveLabel = MF->getContext().CreateTempSymbol();
BuildMI(MBB, MI, DL, get(XCore::DBG_LABEL)).addSym(SaveLabel);
@@ -460,10 +455,11 @@ bool XCoreInstrInfo::restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
--BeforeI;
for (std::vector<CalleeSavedInfo>::const_iterator it = CSI.begin();
it != CSI.end(); ++it) {
-
+ unsigned Reg = it->getReg();
+ const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
loadRegFromStackSlot(MBB, MI, it->getReg(),
it->getFrameIdx(),
- it->getRegClass(), &RI);
+ RC, &RI);
assert(MI != MBB.begin() &&
"loadRegFromStackSlot didn't insert any code!");
// Insert in reverse order. loadRegFromStackSlot can insert multiple
diff --git a/lib/Target/XCore/XCoreInstrInfo.h b/lib/Target/XCore/XCoreInstrInfo.h
index 9035ea90c9bd..e5b0171579fc 100644
--- a/lib/Target/XCore/XCoreInstrInfo.h
+++ b/lib/Target/XCore/XCoreInstrInfo.h
@@ -58,17 +58,16 @@ public:
bool AllowModify) const;
virtual unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
- MachineBasicBlock *FBB,
- const SmallVectorImpl<MachineOperand> &Cond) const;
+ MachineBasicBlock *FBB,
+ const SmallVectorImpl<MachineOperand> &Cond,
+ DebugLoc DL) const;
virtual unsigned RemoveBranch(MachineBasicBlock &MBB) const;
- virtual bool copyRegToReg(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator I,
- unsigned DestReg, unsigned SrcReg,
- const TargetRegisterClass *DestRC,
- const TargetRegisterClass *SrcRC,
- DebugLoc DL) const;
+ virtual void copyPhysReg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I, DebugLoc DL,
+ unsigned DestReg, unsigned SrcReg,
+ bool KillSrc) const;
virtual void storeRegToStackSlot(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI,
diff --git a/lib/Target/XCore/XCoreInstrInfo.td b/lib/Target/XCore/XCoreInstrInfo.td
index dd3cbc169908..19b9b1f8c00c 100644
--- a/lib/Target/XCore/XCoreInstrInfo.td
+++ b/lib/Target/XCore/XCoreInstrInfo.td
@@ -733,7 +733,7 @@ def NEG : _F2R<(outs GRRegs:$dst), (ins GRRegs:$b),
// TODO setd, eet, eef, getts, setpt, outct, inct, chkct, outt, intt, out,
// in, outshr, inshr, testct, testwct, tinitpc, tinitdp, tinitsp, tinitcp,
// tsetmr, sext (reg), zext (reg)
-let isTwoAddress = 1 in {
+let Constraints = "$src1 = $dst" in {
let neverHasSideEffects = 1 in
def SEXT_rus : _FRUS<(outs GRRegs:$dst), (ins GRRegs:$src1, i32imm:$src2),
"sext $dst, $src2",
diff --git a/lib/Target/XCore/XCoreRegisterInfo.cpp b/lib/Target/XCore/XCoreRegisterInfo.cpp
index 0cfb358617e8..2a88342180e4 100644
--- a/lib/Target/XCore/XCoreRegisterInfo.cpp
+++ b/lib/Target/XCore/XCoreRegisterInfo.cpp
@@ -82,18 +82,6 @@ const unsigned* XCoreRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF)
return CalleeSavedRegs;
}
-const TargetRegisterClass* const*
-XCoreRegisterInfo::getCalleeSavedRegClasses(const MachineFunction *MF) const {
- static const TargetRegisterClass * const CalleeSavedRegClasses[] = {
- XCore::GRRegsRegisterClass, XCore::GRRegsRegisterClass,
- XCore::GRRegsRegisterClass, XCore::GRRegsRegisterClass,
- XCore::GRRegsRegisterClass, XCore::GRRegsRegisterClass,
- XCore::GRRegsRegisterClass, XCore::RRegsRegisterClass,
- 0
- };
- return CalleeSavedRegClasses;
-}
-
BitVector XCoreRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
BitVector Reserved(getNumRegs());
Reserved.set(XCore::CP);
@@ -320,7 +308,7 @@ XCoreRegisterInfo::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
int FrameIdx;
if (! isVarArg) {
// A fixed offset of 0 allows us to save / restore LR using entsp / retsp.
- FrameIdx = MFI->CreateFixedObject(RC->getSize(), 0, true, false);
+ FrameIdx = MFI->CreateFixedObject(RC->getSize(), 0, true);
} else {
FrameIdx = MFI->CreateStackObject(RC->getSize(), RC->getAlignment(),
false);
diff --git a/lib/Target/XCore/XCoreRegisterInfo.h b/lib/Target/XCore/XCoreRegisterInfo.h
index 5bdd05922598..66132ba8ff66 100644
--- a/lib/Target/XCore/XCoreRegisterInfo.h
+++ b/lib/Target/XCore/XCoreRegisterInfo.h
@@ -44,9 +44,6 @@ public:
const unsigned *getCalleeSavedRegs(const MachineFunction *MF = 0) const;
- const TargetRegisterClass* const* getCalleeSavedRegClasses(
- const MachineFunction *MF = 0) const;
-
BitVector getReservedRegs(const MachineFunction &MF) const;
bool requiresRegisterScavenging(const MachineFunction &MF) const;