aboutsummaryrefslogtreecommitdiff
path: root/lib/Target/ARM
diff options
context:
space:
mode:
authorRoman Divacky <rdivacky@FreeBSD.org>2010-07-13 17:19:57 +0000
committerRoman Divacky <rdivacky@FreeBSD.org>2010-07-13 17:19:57 +0000
commit66e41e3c6e8b8fbc48d5d3b4d2bd9ce0be4ecb75 (patch)
tree9de1c5f67a98cd0e73c60838396486c984f63ac2 /lib/Target/ARM
parentabdf259d487163e72081a8cf4991b1617206b41e (diff)
downloadsrc-66e41e3c6e8b8fbc48d5d3b4d2bd9ce0be4ecb75.tar.gz
src-66e41e3c6e8b8fbc48d5d3b4d2bd9ce0be4ecb75.zip
Update LLVM to r108243.vendor/llvm/llvm-r108243
Notes
Notes: svn path=/vendor/llvm/dist/; revision=210006 svn path=/vendor/llvm/llvm-r108243/; revision=210077; tag=vendor/llvm/llvm-r108243
Diffstat (limited to 'lib/Target/ARM')
-rw-r--r--lib/Target/ARM/ARM.h4
-rw-r--r--lib/Target/ARM/ARMAddressingModes.h64
-rw-r--r--lib/Target/ARM/ARMBaseInstrInfo.cpp629
-rw-r--r--lib/Target/ARM/ARMBaseInstrInfo.h87
-rw-r--r--lib/Target/ARM/ARMBaseRegisterInfo.cpp191
-rw-r--r--lib/Target/ARM/ARMBaseRegisterInfo.h24
-rw-r--r--lib/Target/ARM/ARMCodeEmitter.cpp174
-rw-r--r--lib/Target/ARM/ARMConstantIslandPass.cpp58
-rw-r--r--lib/Target/ARM/ARMConstantPoolValue.h1
-rw-r--r--lib/Target/ARM/ARMExpandPseudoInsts.cpp10
-rw-r--r--lib/Target/ARM/ARMISelDAGToDAG.cpp551
-rw-r--r--lib/Target/ARM/ARMISelLowering.cpp893
-rw-r--r--lib/Target/ARM/ARMISelLowering.h47
-rw-r--r--lib/Target/ARM/ARMInstrFormats.td44
-rw-r--r--lib/Target/ARM/ARMInstrInfo.cpp2
-rw-r--r--lib/Target/ARM/ARMInstrInfo.h2
-rw-r--r--lib/Target/ARM/ARMInstrInfo.td128
-rw-r--r--lib/Target/ARM/ARMInstrNEON.td99
-rw-r--r--lib/Target/ARM/ARMInstrThumb.td23
-rw-r--r--lib/Target/ARM/ARMInstrThumb2.td63
-rw-r--r--lib/Target/ARM/ARMInstrVFP.td8
-rw-r--r--lib/Target/ARM/ARMJITInfo.h3
-rw-r--r--lib/Target/ARM/ARMLoadStoreOptimizer.cpp194
-rw-r--r--lib/Target/ARM/ARMMachineFunctionInfo.h12
-rw-r--r--lib/Target/ARM/ARMRegisterInfo.td87
-rw-r--r--lib/Target/ARM/ARMScheduleA8.td84
-rw-r--r--lib/Target/ARM/ARMScheduleA9.td364
-rw-r--r--lib/Target/ARM/ARMScheduleV6.td2
-rw-r--r--lib/Target/ARM/ARMTargetMachine.cpp30
-rw-r--r--lib/Target/ARM/AsmParser/ARMAsmParser.cpp23
-rw-r--r--lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp59
-rw-r--r--lib/Target/ARM/AsmPrinter/ARMInstPrinter.cpp24
-rw-r--r--lib/Target/ARM/AsmPrinter/ARMInstPrinter.h5
-rw-r--r--lib/Target/ARM/CMakeLists.txt1
-rw-r--r--lib/Target/ARM/Disassembler/ARMDisassemblerCore.cpp113
-rw-r--r--lib/Target/ARM/Disassembler/ARMDisassemblerCore.h8
-rw-r--r--lib/Target/ARM/Disassembler/ThumbDisassemblerCore.h42
-rw-r--r--lib/Target/ARM/NEONMoveFix.cpp4
-rw-r--r--lib/Target/ARM/NEONPreAllocPass.cpp51
-rw-r--r--lib/Target/ARM/Thumb1InstrInfo.cpp125
-rw-r--r--lib/Target/ARM/Thumb1InstrInfo.h24
-rw-r--r--lib/Target/ARM/Thumb1RegisterInfo.cpp17
-rw-r--r--lib/Target/ARM/Thumb1RegisterInfo.h6
-rw-r--r--lib/Target/ARM/Thumb2HazardRecognizer.cpp53
-rw-r--r--lib/Target/ARM/Thumb2HazardRecognizer.h40
-rw-r--r--lib/Target/ARM/Thumb2ITBlockPass.cpp160
-rw-r--r--lib/Target/ARM/Thumb2InstrInfo.cpp192
-rw-r--r--lib/Target/ARM/Thumb2InstrInfo.h39
-rw-r--r--lib/Target/ARM/Thumb2SizeReduction.cpp16
49 files changed, 2977 insertions, 1903 deletions
diff --git a/lib/Target/ARM/ARM.h b/lib/Target/ARM/ARM.h
index ae7ae59c9262..14825a785649 100644
--- a/lib/Target/ARM/ARM.h
+++ b/lib/Target/ARM/ARM.h
@@ -90,10 +90,6 @@ inline static const char *ARMCondCodeToString(ARMCC::CondCodes CC) {
}
}
-/// ModelWithRegSequence - Return true if isel should use REG_SEQUENCE to model
-/// operations involving sub-registers.
-bool ModelWithRegSequence();
-
FunctionPass *createARMISelDag(ARMBaseTargetMachine &TM,
CodeGenOpt::Level OptLevel);
diff --git a/lib/Target/ARM/ARMAddressingModes.h b/lib/Target/ARM/ARMAddressingModes.h
index e68354a42f8d..d316b13e0488 100644
--- a/lib/Target/ARM/ARMAddressingModes.h
+++ b/lib/Target/ARM/ARMAddressingModes.h
@@ -520,6 +520,70 @@ namespace ARM_AM {
// This is stored in two operands [regaddr, align]. The first is the
// address register. The second operand is the value of the alignment
// specifier to use or zero if no explicit alignment.
+ // Valid alignments are: 0, 8, 16, and 32 bytes, depending on the specific
+ // instruction.
+
+ //===--------------------------------------------------------------------===//
+ // NEON Modified Immediates
+ //===--------------------------------------------------------------------===//
+ //
+ // Several NEON instructions (e.g., VMOV) take a "modified immediate"
+ // vector operand, where a small immediate encoded in the instruction
+ // specifies a full NEON vector value. These modified immediates are
+ // represented here as encoded integers. The low 8 bits hold the immediate
+ // value; bit 12 holds the "Op" field of the instruction, and bits 11-8 hold
+ // the "Cmode" field of the instruction. The interfaces below treat the
+ // Op and Cmode values as a single 5-bit value.
+
+ static inline unsigned createNEONModImm(unsigned OpCmode, unsigned Val) {
+ return (OpCmode << 8) | Val;
+ }
+ static inline unsigned getNEONModImmOpCmode(unsigned ModImm) {
+ return (ModImm >> 8) & 0x1f;
+ }
+ static inline unsigned getNEONModImmVal(unsigned ModImm) {
+ return ModImm & 0xff;
+ }
+
+ /// decodeNEONModImm - Decode a NEON modified immediate value into the
+ /// element value and the element size in bits. (If the element size is
+ /// smaller than the vector, it is splatted into all the elements.)
+ static inline uint64_t decodeNEONModImm(unsigned ModImm, unsigned &EltBits) {
+ unsigned OpCmode = getNEONModImmOpCmode(ModImm);
+ unsigned Imm8 = getNEONModImmVal(ModImm);
+ uint64_t Val = 0;
+
+ if (OpCmode == 0xe) {
+ // 8-bit vector elements
+ Val = Imm8;
+ EltBits = 8;
+ } else if ((OpCmode & 0xc) == 0x8) {
+ // 16-bit vector elements
+ unsigned ByteNum = (OpCmode & 0x6) >> 1;
+ Val = Imm8 << (8 * ByteNum);
+ EltBits = 16;
+ } else if ((OpCmode & 0x8) == 0) {
+ // 32-bit vector elements, zero with one byte set
+ unsigned ByteNum = (OpCmode & 0x6) >> 1;
+ Val = Imm8 << (8 * ByteNum);
+ EltBits = 32;
+ } else if ((OpCmode & 0xe) == 0xc) {
+ // 32-bit vector elements, one byte with low bits set
+ unsigned ByteNum = 1 + (OpCmode & 0x1);
+ Val = (Imm8 << (8 * ByteNum)) | (0xffff >> (8 * (2 - ByteNum)));
+ EltBits = 32;
+ } else if (OpCmode == 0x1e) {
+ // 64-bit vector elements
+ for (unsigned ByteNum = 0; ByteNum < 8; ++ByteNum) {
+ if ((ModImm >> ByteNum) & 1)
+ Val |= (uint64_t)0xff << (8 * ByteNum);
+ }
+ EltBits = 64;
+ } else {
+ assert(false && "Unsupported NEON immediate");
+ }
+ return Val;
+ }
} // end namespace ARM_AM
} // end namespace llvm
diff --git a/lib/Target/ARM/ARMBaseInstrInfo.cpp b/lib/Target/ARM/ARMBaseInstrInfo.cpp
index 2528854133e5..49c16f3e0720 100644
--- a/lib/Target/ARM/ARMBaseInstrInfo.cpp
+++ b/lib/Target/ARM/ARMBaseInstrInfo.cpp
@@ -56,7 +56,7 @@ ARMBaseInstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
MachineInstr *MI = MBBI;
MachineFunction &MF = *MI->getParent()->getParent();
- unsigned TSFlags = MI->getDesc().TSFlags;
+ uint64_t TSFlags = MI->getDesc().TSFlags;
bool isPre = false;
switch ((TSFlags & ARMII::IndexModeMask) >> ARMII::IndexModeShift) {
default: return NULL;
@@ -199,9 +199,9 @@ ARMBaseInstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
bool
ARMBaseInstrInfo::spillCalleeSavedRegisters(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MI,
- const std::vector<CalleeSavedInfo> &CSI,
- const TargetRegisterInfo *TRI) const {
+ MachineBasicBlock::iterator MI,
+ const std::vector<CalleeSavedInfo> &CSI,
+ const TargetRegisterInfo *TRI) const {
if (CSI.empty())
return false;
@@ -227,8 +227,9 @@ ARMBaseInstrInfo::spillCalleeSavedRegisters(MachineBasicBlock &MBB,
// Insert the spill to the stack frame. The register is killed at the spill
//
+ const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
storeRegToStackSlot(MBB, MI, Reg, isKill,
- CSI[i].getFrameIdx(), CSI[i].getRegClass(), TRI);
+ CSI[i].getFrameIdx(), RC, TRI);
}
return true;
}
@@ -347,10 +348,8 @@ unsigned ARMBaseInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
unsigned
ARMBaseInstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
MachineBasicBlock *FBB,
- const SmallVectorImpl<MachineOperand> &Cond) const {
- // FIXME this should probably have a DebugLoc argument
- DebugLoc dl;
-
+ const SmallVectorImpl<MachineOperand> &Cond,
+ DebugLoc DL) const {
ARMFunctionInfo *AFI = MBB.getParent()->getInfo<ARMFunctionInfo>();
int BOpc = !AFI->isThumbFunction()
? ARM::B : (AFI->isThumb2Function() ? ARM::t2B : ARM::tB);
@@ -364,17 +363,17 @@ ARMBaseInstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
if (FBB == 0) {
if (Cond.empty()) // Unconditional branch?
- BuildMI(&MBB, dl, get(BOpc)).addMBB(TBB);
+ BuildMI(&MBB, DL, get(BOpc)).addMBB(TBB);
else
- BuildMI(&MBB, dl, get(BccOpc)).addMBB(TBB)
+ BuildMI(&MBB, DL, get(BccOpc)).addMBB(TBB)
.addImm(Cond[0].getImm()).addReg(Cond[1].getReg());
return 1;
}
// Two-way conditional branch.
- BuildMI(&MBB, dl, get(BccOpc)).addMBB(TBB)
+ BuildMI(&MBB, DL, get(BccOpc)).addMBB(TBB)
.addImm(Cond[0].getImm()).addReg(Cond[1].getReg());
- BuildMI(&MBB, dl, get(BOpc)).addMBB(FBB);
+ BuildMI(&MBB, DL, get(BOpc)).addMBB(FBB);
return 2;
}
@@ -487,7 +486,7 @@ unsigned ARMBaseInstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const {
// Basic size info comes from the TSFlags field.
const TargetInstrDesc &TID = MI->getDesc();
- unsigned TSFlags = TID.TSFlags;
+ uint64_t TSFlags = TID.TSFlags;
unsigned Opc = MI->getOpcode();
switch ((TSFlags & ARMII::SizeMask) >> ARMII::SizeShift) {
@@ -524,11 +523,11 @@ unsigned ARMBaseInstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const {
return 10;
case ARM::Int_eh_sjlj_setjmp:
case ARM::Int_eh_sjlj_setjmp_nofp:
- return 24;
+ return 20;
case ARM::tInt_eh_sjlj_setjmp:
case ARM::t2Int_eh_sjlj_setjmp:
case ARM::t2Int_eh_sjlj_setjmp_nofp:
- return 14;
+ return 12;
case ARM::BR_JTr:
case ARM::BR_JTm:
case ARM::BR_JTadd:
@@ -595,6 +594,7 @@ ARMBaseInstrInfo::isMoveInstr(const MachineInstr &MI,
return true;
}
case ARM::MOVr:
+ case ARM::MOVr_TC:
case ARM::tMOVr:
case ARM::tMOVgpr2tgpr:
case ARM::tMOVtgpr2gpr:
@@ -693,75 +693,44 @@ ARMBaseInstrInfo::isStoreToStackSlot(const MachineInstr *MI,
return 0;
}
-bool
-ARMBaseInstrInfo::copyRegToReg(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator I,
- unsigned DestReg, unsigned SrcReg,
- const TargetRegisterClass *DestRC,
- const TargetRegisterClass *SrcRC,
- DebugLoc DL) const {
- // tGPR is used sometimes in ARM instructions that need to avoid using
- // certain registers. Just treat it as GPR here.
- if (DestRC == ARM::tGPRRegisterClass)
- DestRC = ARM::GPRRegisterClass;
- if (SrcRC == ARM::tGPRRegisterClass)
- SrcRC = ARM::GPRRegisterClass;
-
- // Allow DPR / DPR_VFP2 / DPR_8 cross-class copies.
- if (DestRC == ARM::DPR_8RegisterClass)
- DestRC = ARM::DPR_VFP2RegisterClass;
- if (SrcRC == ARM::DPR_8RegisterClass)
- SrcRC = ARM::DPR_VFP2RegisterClass;
-
- // Allow QPR / QPR_VFP2 / QPR_8 cross-class copies.
- if (DestRC == ARM::QPR_VFP2RegisterClass ||
- DestRC == ARM::QPR_8RegisterClass)
- DestRC = ARM::QPRRegisterClass;
- if (SrcRC == ARM::QPR_VFP2RegisterClass ||
- SrcRC == ARM::QPR_8RegisterClass)
- SrcRC = ARM::QPRRegisterClass;
-
- // Allow QQPR / QQPR_VFP2 cross-class copies.
- if (DestRC == ARM::QQPR_VFP2RegisterClass)
- DestRC = ARM::QQPRRegisterClass;
- if (SrcRC == ARM::QQPR_VFP2RegisterClass)
- SrcRC = ARM::QQPRRegisterClass;
-
- // Disallow copies of unequal sizes.
- if (DestRC != SrcRC && DestRC->getSize() != SrcRC->getSize())
- return false;
-
- if (DestRC == ARM::GPRRegisterClass) {
- if (SrcRC == ARM::SPRRegisterClass)
- AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VMOVRS), DestReg)
- .addReg(SrcReg));
- else
- AddDefaultCC(AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::MOVr),
- DestReg).addReg(SrcReg)));
- } else {
- unsigned Opc;
-
- if (DestRC == ARM::SPRRegisterClass)
- Opc = (SrcRC == ARM::GPRRegisterClass ? ARM::VMOVSR : ARM::VMOVS);
- else if (DestRC == ARM::DPRRegisterClass)
- Opc = ARM::VMOVD;
- else if (DestRC == ARM::DPR_VFP2RegisterClass ||
- SrcRC == ARM::DPR_VFP2RegisterClass)
- // Always use neon reg-reg move if source or dest is NEON-only regclass.
- Opc = ARM::VMOVDneon;
- else if (DestRC == ARM::QPRRegisterClass)
- Opc = ARM::VMOVQ;
- else if (DestRC == ARM::QQPRRegisterClass)
- Opc = ARM::VMOVQQ;
- else if (DestRC == ARM::QQQQPRRegisterClass)
- Opc = ARM::VMOVQQQQ;
- else
- return false;
-
- AddDefaultPred(BuildMI(MBB, I, DL, get(Opc), DestReg).addReg(SrcReg));
+void ARMBaseInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I, DebugLoc DL,
+ unsigned DestReg, unsigned SrcReg,
+ bool KillSrc) const {
+ bool GPRDest = ARM::GPRRegClass.contains(DestReg);
+ bool GPRSrc = ARM::GPRRegClass.contains(SrcReg);
+
+ if (GPRDest && GPRSrc) {
+ AddDefaultCC(AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::MOVr), DestReg)
+ .addReg(SrcReg, getKillRegState(KillSrc))));
+ return;
}
- return true;
+ bool SPRDest = ARM::SPRRegClass.contains(DestReg);
+ bool SPRSrc = ARM::SPRRegClass.contains(SrcReg);
+
+ unsigned Opc;
+ if (SPRDest && SPRSrc)
+ Opc = ARM::VMOVS;
+ else if (GPRDest && SPRSrc)
+ Opc = ARM::VMOVRS;
+ else if (SPRDest && GPRSrc)
+ Opc = ARM::VMOVSR;
+ else if (ARM::DPRRegClass.contains(DestReg, SrcReg))
+ Opc = ARM::VMOVD;
+ else if (ARM::QPRRegClass.contains(DestReg, SrcReg))
+ Opc = ARM::VMOVQ;
+ else if (ARM::QQPRRegClass.contains(DestReg, SrcReg))
+ Opc = ARM::VMOVQQ;
+ else if (ARM::QQQQPRRegClass.contains(DestReg, SrcReg))
+ Opc = ARM::VMOVQQQQ;
+ else
+ llvm_unreachable("Impossible reg-to-reg copy");
+
+ MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(Opc), DestReg);
+ MIB.addReg(SrcReg, getKillRegState(KillSrc));
+ if (Opc != ARM::VMOVQQ && Opc != ARM::VMOVQQQQ)
+ AddDefaultPred(MIB);
}
static const
@@ -795,30 +764,34 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
// tGPR is used sometimes in ARM instructions that need to avoid using
// certain registers. Just treat it as GPR here.
- if (RC == ARM::tGPRRegisterClass)
+ if (RC == ARM::tGPRRegisterClass || RC == ARM::tcGPRRegisterClass)
RC = ARM::GPRRegisterClass;
- if (RC == ARM::GPRRegisterClass) {
+ switch (RC->getID()) {
+ case ARM::GPRRegClassID:
AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::STR))
.addReg(SrcReg, getKillRegState(isKill))
.addFrameIndex(FI).addReg(0).addImm(0).addMemOperand(MMO));
- } else if (RC == ARM::SPRRegisterClass) {
+ break;
+ case ARM::SPRRegClassID:
AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VSTRS))
.addReg(SrcReg, getKillRegState(isKill))
.addFrameIndex(FI).addImm(0).addMemOperand(MMO));
- } else if (RC == ARM::DPRRegisterClass ||
- RC == ARM::DPR_VFP2RegisterClass ||
- RC == ARM::DPR_8RegisterClass) {
+ break;
+ case ARM::DPRRegClassID:
+ case ARM::DPR_VFP2RegClassID:
+ case ARM::DPR_8RegClassID:
AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VSTRD))
.addReg(SrcReg, getKillRegState(isKill))
.addFrameIndex(FI).addImm(0).addMemOperand(MMO));
- } else if (RC == ARM::QPRRegisterClass ||
- RC == ARM::QPR_VFP2RegisterClass ||
- RC == ARM::QPR_8RegisterClass) {
+ break;
+ case ARM::QPRRegClassID:
+ case ARM::QPR_VFP2RegClassID:
+ case ARM::QPR_8RegClassID:
// FIXME: Neon instructions should support predicates
if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) {
AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VST1q))
- .addFrameIndex(FI).addImm(128)
+ .addFrameIndex(FI).addImm(16)
.addReg(SrcReg, getKillRegState(isKill))
.addMemOperand(MMO));
} else {
@@ -828,12 +801,14 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
.addImm(ARM_AM::getAM5Opc(ARM_AM::ia, 4))
.addMemOperand(MMO));
}
- } else if (RC == ARM::QQPRRegisterClass || RC == ARM::QQPR_VFP2RegisterClass){
+ break;
+ case ARM::QQPRRegClassID:
+ case ARM::QQPR_VFP2RegClassID:
if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) {
// FIXME: It's possible to only store part of the QQ register if the
// spilled def has a sub-register index.
- MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(ARM::VST2q32))
- .addFrameIndex(FI).addImm(128);
+ MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(ARM::VST1d64Q))
+ .addFrameIndex(FI).addImm(16);
MIB = AddDReg(MIB, SrcReg, ARM::dsub_0, getKillRegState(isKill), TRI);
MIB = AddDReg(MIB, SrcReg, ARM::dsub_1, 0, TRI);
MIB = AddDReg(MIB, SrcReg, ARM::dsub_2, 0, TRI);
@@ -850,8 +825,8 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
MIB = AddDReg(MIB, SrcReg, ARM::dsub_2, 0, TRI);
AddDReg(MIB, SrcReg, ARM::dsub_3, 0, TRI);
}
- } else {
- assert(RC == ARM::QQQQPRRegisterClass && "Unknown regclass!");
+ break;
+ case ARM::QQQQPRRegClassID: {
MachineInstrBuilder MIB =
AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VSTMD))
.addFrameIndex(FI)
@@ -865,6 +840,10 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
MIB = AddDReg(MIB, SrcReg, ARM::dsub_5, 0, TRI);
MIB = AddDReg(MIB, SrcReg, ARM::dsub_6, 0, TRI);
AddDReg(MIB, SrcReg, ARM::dsub_7, 0, TRI);
+ break;
+ }
+ default:
+ llvm_unreachable("Unknown regclass!");
}
}
@@ -886,26 +865,30 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
// tGPR is used sometimes in ARM instructions that need to avoid using
// certain registers. Just treat it as GPR here.
- if (RC == ARM::tGPRRegisterClass)
+ if (RC == ARM::tGPRRegisterClass || RC == ARM::tcGPRRegisterClass)
RC = ARM::GPRRegisterClass;
- if (RC == ARM::GPRRegisterClass) {
+ switch (RC->getID()) {
+ case ARM::GPRRegClassID:
AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::LDR), DestReg)
.addFrameIndex(FI).addReg(0).addImm(0).addMemOperand(MMO));
- } else if (RC == ARM::SPRRegisterClass) {
+ break;
+ case ARM::SPRRegClassID:
AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDRS), DestReg)
.addFrameIndex(FI).addImm(0).addMemOperand(MMO));
- } else if (RC == ARM::DPRRegisterClass ||
- RC == ARM::DPR_VFP2RegisterClass ||
- RC == ARM::DPR_8RegisterClass) {
+ break;
+ case ARM::DPRRegClassID:
+ case ARM::DPR_VFP2RegClassID:
+ case ARM::DPR_8RegClassID:
AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDRD), DestReg)
.addFrameIndex(FI).addImm(0).addMemOperand(MMO));
- } else if (RC == ARM::QPRRegisterClass ||
- RC == ARM::QPR_VFP2RegisterClass ||
- RC == ARM::QPR_8RegisterClass) {
+ break;
+ case ARM::QPRRegClassID:
+ case ARM::QPR_VFP2RegClassID:
+ case ARM::QPR_8RegClassID:
if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) {
AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLD1q), DestReg)
- .addFrameIndex(FI).addImm(128)
+ .addFrameIndex(FI).addImm(16)
.addMemOperand(MMO));
} else {
AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDMQ), DestReg)
@@ -913,14 +896,16 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
.addImm(ARM_AM::getAM5Opc(ARM_AM::ia, 4))
.addMemOperand(MMO));
}
- } else if (RC == ARM::QQPRRegisterClass || RC == ARM::QQPR_VFP2RegisterClass){
+ break;
+ case ARM::QQPRRegClassID:
+ case ARM::QQPR_VFP2RegClassID:
if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) {
- MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(ARM::VLD2q32));
+ MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(ARM::VLD1d64Q));
MIB = AddDReg(MIB, DestReg, ARM::dsub_0, RegState::Define, TRI);
MIB = AddDReg(MIB, DestReg, ARM::dsub_1, RegState::Define, TRI);
MIB = AddDReg(MIB, DestReg, ARM::dsub_2, RegState::Define, TRI);
MIB = AddDReg(MIB, DestReg, ARM::dsub_3, RegState::Define, TRI);
- AddDefaultPred(MIB.addFrameIndex(FI).addImm(128).addMemOperand(MMO));
+ AddDefaultPred(MIB.addFrameIndex(FI).addImm(16).addMemOperand(MMO));
} else {
MachineInstrBuilder MIB =
AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDMD))
@@ -932,21 +917,25 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
MIB = AddDReg(MIB, DestReg, ARM::dsub_2, RegState::Define, TRI);
AddDReg(MIB, DestReg, ARM::dsub_3, RegState::Define, TRI);
}
- } else {
- assert(RC == ARM::QQQQPRRegisterClass && "Unknown regclass!");
- MachineInstrBuilder MIB =
- AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDMD))
- .addFrameIndex(FI)
- .addImm(ARM_AM::getAM5Opc(ARM_AM::ia, 4)))
- .addMemOperand(MMO);
- MIB = AddDReg(MIB, DestReg, ARM::dsub_0, RegState::Define, TRI);
- MIB = AddDReg(MIB, DestReg, ARM::dsub_1, RegState::Define, TRI);
- MIB = AddDReg(MIB, DestReg, ARM::dsub_2, RegState::Define, TRI);
- MIB = AddDReg(MIB, DestReg, ARM::dsub_3, RegState::Define, TRI);
- MIB = AddDReg(MIB, DestReg, ARM::dsub_4, RegState::Define, TRI);
- MIB = AddDReg(MIB, DestReg, ARM::dsub_5, RegState::Define, TRI);
- MIB = AddDReg(MIB, DestReg, ARM::dsub_6, RegState::Define, TRI);
- AddDReg(MIB, DestReg, ARM::dsub_7, RegState::Define, TRI);
+ break;
+ case ARM::QQQQPRRegClassID: {
+ MachineInstrBuilder MIB =
+ AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDMD))
+ .addFrameIndex(FI)
+ .addImm(ARM_AM::getAM5Opc(ARM_AM::ia, 4)))
+ .addMemOperand(MMO);
+ MIB = AddDReg(MIB, DestReg, ARM::dsub_0, RegState::Define, TRI);
+ MIB = AddDReg(MIB, DestReg, ARM::dsub_1, RegState::Define, TRI);
+ MIB = AddDReg(MIB, DestReg, ARM::dsub_2, RegState::Define, TRI);
+ MIB = AddDReg(MIB, DestReg, ARM::dsub_3, RegState::Define, TRI);
+ MIB = AddDReg(MIB, DestReg, ARM::dsub_4, RegState::Define, TRI);
+ MIB = AddDReg(MIB, DestReg, ARM::dsub_5, RegState::Define, TRI);
+ MIB = AddDReg(MIB, DestReg, ARM::dsub_6, RegState::Define, TRI);
+ AddDReg(MIB, DestReg, ARM::dsub_7, RegState::Define, TRI);
+ break;
+ }
+ default:
+ llvm_unreachable("Unknown regclass!");
}
}
@@ -960,223 +949,6 @@ ARMBaseInstrInfo::emitFrameIndexDebugValue(MachineFunction &MF,
return &*MIB;
}
-MachineInstr *ARMBaseInstrInfo::
-foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI,
- const SmallVectorImpl<unsigned> &Ops, int FI) const {
- if (Ops.size() != 1) return NULL;
-
- unsigned OpNum = Ops[0];
- unsigned Opc = MI->getOpcode();
- MachineInstr *NewMI = NULL;
- if (Opc == ARM::MOVr || Opc == ARM::t2MOVr) {
- // If it is updating CPSR, then it cannot be folded.
- if (MI->getOperand(4).getReg() == ARM::CPSR && !MI->getOperand(4).isDead())
- return NULL;
- unsigned Pred = MI->getOperand(2).getImm();
- unsigned PredReg = MI->getOperand(3).getReg();
- if (OpNum == 0) { // move -> store
- unsigned SrcReg = MI->getOperand(1).getReg();
- unsigned SrcSubReg = MI->getOperand(1).getSubReg();
- bool isKill = MI->getOperand(1).isKill();
- bool isUndef = MI->getOperand(1).isUndef();
- if (Opc == ARM::MOVr)
- NewMI = BuildMI(MF, MI->getDebugLoc(), get(ARM::STR))
- .addReg(SrcReg,
- getKillRegState(isKill) | getUndefRegState(isUndef),
- SrcSubReg)
- .addFrameIndex(FI).addReg(0).addImm(0).addImm(Pred).addReg(PredReg);
- else // ARM::t2MOVr
- NewMI = BuildMI(MF, MI->getDebugLoc(), get(ARM::t2STRi12))
- .addReg(SrcReg,
- getKillRegState(isKill) | getUndefRegState(isUndef),
- SrcSubReg)
- .addFrameIndex(FI).addImm(0).addImm(Pred).addReg(PredReg);
- } else { // move -> load
- unsigned DstReg = MI->getOperand(0).getReg();
- unsigned DstSubReg = MI->getOperand(0).getSubReg();
- bool isDead = MI->getOperand(0).isDead();
- bool isUndef = MI->getOperand(0).isUndef();
- if (Opc == ARM::MOVr)
- NewMI = BuildMI(MF, MI->getDebugLoc(), get(ARM::LDR))
- .addReg(DstReg,
- RegState::Define |
- getDeadRegState(isDead) |
- getUndefRegState(isUndef), DstSubReg)
- .addFrameIndex(FI).addReg(0).addImm(0).addImm(Pred).addReg(PredReg);
- else // ARM::t2MOVr
- NewMI = BuildMI(MF, MI->getDebugLoc(), get(ARM::t2LDRi12))
- .addReg(DstReg,
- RegState::Define |
- getDeadRegState(isDead) |
- getUndefRegState(isUndef), DstSubReg)
- .addFrameIndex(FI).addImm(0).addImm(Pred).addReg(PredReg);
- }
- } else if (Opc == ARM::tMOVgpr2gpr ||
- Opc == ARM::tMOVtgpr2gpr ||
- Opc == ARM::tMOVgpr2tgpr) {
- if (OpNum == 0) { // move -> store
- unsigned SrcReg = MI->getOperand(1).getReg();
- unsigned SrcSubReg = MI->getOperand(1).getSubReg();
- bool isKill = MI->getOperand(1).isKill();
- bool isUndef = MI->getOperand(1).isUndef();
- NewMI = BuildMI(MF, MI->getDebugLoc(), get(ARM::t2STRi12))
- .addReg(SrcReg,
- getKillRegState(isKill) | getUndefRegState(isUndef),
- SrcSubReg)
- .addFrameIndex(FI).addImm(0).addImm(ARMCC::AL).addReg(0);
- } else { // move -> load
- unsigned DstReg = MI->getOperand(0).getReg();
- unsigned DstSubReg = MI->getOperand(0).getSubReg();
- bool isDead = MI->getOperand(0).isDead();
- bool isUndef = MI->getOperand(0).isUndef();
- NewMI = BuildMI(MF, MI->getDebugLoc(), get(ARM::t2LDRi12))
- .addReg(DstReg,
- RegState::Define |
- getDeadRegState(isDead) |
- getUndefRegState(isUndef),
- DstSubReg)
- .addFrameIndex(FI).addImm(0).addImm(ARMCC::AL).addReg(0);
- }
- } else if (Opc == ARM::VMOVS) {
- unsigned Pred = MI->getOperand(2).getImm();
- unsigned PredReg = MI->getOperand(3).getReg();
- if (OpNum == 0) { // move -> store
- unsigned SrcReg = MI->getOperand(1).getReg();
- unsigned SrcSubReg = MI->getOperand(1).getSubReg();
- bool isKill = MI->getOperand(1).isKill();
- bool isUndef = MI->getOperand(1).isUndef();
- NewMI = BuildMI(MF, MI->getDebugLoc(), get(ARM::VSTRS))
- .addReg(SrcReg, getKillRegState(isKill) | getUndefRegState(isUndef),
- SrcSubReg)
- .addFrameIndex(FI)
- .addImm(0).addImm(Pred).addReg(PredReg);
- } else { // move -> load
- unsigned DstReg = MI->getOperand(0).getReg();
- unsigned DstSubReg = MI->getOperand(0).getSubReg();
- bool isDead = MI->getOperand(0).isDead();
- bool isUndef = MI->getOperand(0).isUndef();
- NewMI = BuildMI(MF, MI->getDebugLoc(), get(ARM::VLDRS))
- .addReg(DstReg,
- RegState::Define |
- getDeadRegState(isDead) |
- getUndefRegState(isUndef),
- DstSubReg)
- .addFrameIndex(FI).addImm(0).addImm(Pred).addReg(PredReg);
- }
- } else if (Opc == ARM::VMOVD || Opc == ARM::VMOVDneon) {
- unsigned Pred = MI->getOperand(2).getImm();
- unsigned PredReg = MI->getOperand(3).getReg();
- if (OpNum == 0) { // move -> store
- unsigned SrcReg = MI->getOperand(1).getReg();
- unsigned SrcSubReg = MI->getOperand(1).getSubReg();
- bool isKill = MI->getOperand(1).isKill();
- bool isUndef = MI->getOperand(1).isUndef();
- NewMI = BuildMI(MF, MI->getDebugLoc(), get(ARM::VSTRD))
- .addReg(SrcReg,
- getKillRegState(isKill) | getUndefRegState(isUndef),
- SrcSubReg)
- .addFrameIndex(FI).addImm(0).addImm(Pred).addReg(PredReg);
- } else { // move -> load
- unsigned DstReg = MI->getOperand(0).getReg();
- unsigned DstSubReg = MI->getOperand(0).getSubReg();
- bool isDead = MI->getOperand(0).isDead();
- bool isUndef = MI->getOperand(0).isUndef();
- NewMI = BuildMI(MF, MI->getDebugLoc(), get(ARM::VLDRD))
- .addReg(DstReg,
- RegState::Define |
- getDeadRegState(isDead) |
- getUndefRegState(isUndef),
- DstSubReg)
- .addFrameIndex(FI).addImm(0).addImm(Pred).addReg(PredReg);
- }
- } else if (Opc == ARM::VMOVQ) {
- MachineFrameInfo &MFI = *MF.getFrameInfo();
- unsigned Pred = MI->getOperand(2).getImm();
- unsigned PredReg = MI->getOperand(3).getReg();
- if (OpNum == 0) { // move -> store
- unsigned SrcReg = MI->getOperand(1).getReg();
- unsigned SrcSubReg = MI->getOperand(1).getSubReg();
- bool isKill = MI->getOperand(1).isKill();
- bool isUndef = MI->getOperand(1).isUndef();
- if (MFI.getObjectAlignment(FI) >= 16 &&
- getRegisterInfo().canRealignStack(MF)) {
- NewMI = BuildMI(MF, MI->getDebugLoc(), get(ARM::VST1q))
- .addFrameIndex(FI).addImm(128)
- .addReg(SrcReg,
- getKillRegState(isKill) | getUndefRegState(isUndef),
- SrcSubReg)
- .addImm(Pred).addReg(PredReg);
- } else {
- NewMI = BuildMI(MF, MI->getDebugLoc(), get(ARM::VSTMQ))
- .addReg(SrcReg,
- getKillRegState(isKill) | getUndefRegState(isUndef),
- SrcSubReg)
- .addFrameIndex(FI).addImm(ARM_AM::getAM5Opc(ARM_AM::ia, 4))
- .addImm(Pred).addReg(PredReg);
- }
- } else { // move -> load
- unsigned DstReg = MI->getOperand(0).getReg();
- unsigned DstSubReg = MI->getOperand(0).getSubReg();
- bool isDead = MI->getOperand(0).isDead();
- bool isUndef = MI->getOperand(0).isUndef();
- if (MFI.getObjectAlignment(FI) >= 16 &&
- getRegisterInfo().canRealignStack(MF)) {
- NewMI = BuildMI(MF, MI->getDebugLoc(), get(ARM::VLD1q))
- .addReg(DstReg,
- RegState::Define |
- getDeadRegState(isDead) |
- getUndefRegState(isUndef),
- DstSubReg)
- .addFrameIndex(FI).addImm(128).addImm(Pred).addReg(PredReg);
- } else {
- NewMI = BuildMI(MF, MI->getDebugLoc(), get(ARM::VLDMQ))
- .addReg(DstReg,
- RegState::Define |
- getDeadRegState(isDead) |
- getUndefRegState(isUndef),
- DstSubReg)
- .addFrameIndex(FI).addImm(ARM_AM::getAM5Opc(ARM_AM::ia, 4))
- .addImm(Pred).addReg(PredReg);
- }
- }
- }
-
- return NewMI;
-}
-
-MachineInstr*
-ARMBaseInstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
- MachineInstr* MI,
- const SmallVectorImpl<unsigned> &Ops,
- MachineInstr* LoadMI) const {
- // FIXME
- return 0;
-}
-
-bool
-ARMBaseInstrInfo::canFoldMemoryOperand(const MachineInstr *MI,
- const SmallVectorImpl<unsigned> &Ops) const {
- if (Ops.size() != 1) return false;
-
- unsigned Opc = MI->getOpcode();
- if (Opc == ARM::MOVr || Opc == ARM::t2MOVr) {
- // If it is updating CPSR, then it cannot be folded.
- return MI->getOperand(4).getReg() != ARM::CPSR ||
- MI->getOperand(4).isDead();
- } else if (Opc == ARM::tMOVgpr2gpr ||
- Opc == ARM::tMOVtgpr2gpr ||
- Opc == ARM::tMOVgpr2tgpr) {
- return true;
- } else if (Opc == ARM::VMOVS || Opc == ARM::VMOVD ||
- Opc == ARM::VMOVDneon || Opc == ARM::VMOVQ) {
- return true;
- }
-
- // FIXME: VMOVQQ and VMOVQQQQ?
-
- return false;
-}
-
/// Create a copy of a const pool value. Update CPI to the new index and return
/// the label UID.
static unsigned duplicateCPV(MachineFunction &MF, unsigned &CPI) {
@@ -1211,17 +983,12 @@ reMaterialize(MachineBasicBlock &MBB,
MachineBasicBlock::iterator I,
unsigned DestReg, unsigned SubIdx,
const MachineInstr *Orig,
- const TargetRegisterInfo *TRI) const {
- if (SubIdx && TargetRegisterInfo::isPhysicalRegister(DestReg)) {
- DestReg = TRI->getSubReg(DestReg, SubIdx);
- SubIdx = 0;
- }
-
+ const TargetRegisterInfo &TRI) const {
unsigned Opcode = Orig->getOpcode();
switch (Opcode) {
default: {
MachineInstr *MI = MBB.getParent()->CloneMachineInstr(Orig);
- MI->getOperand(0).setReg(DestReg);
+ MI->substituteRegister(Orig->getOperand(0).getReg(), DestReg, SubIdx, TRI);
MBB.insert(I, MI);
break;
}
@@ -1237,9 +1004,6 @@ reMaterialize(MachineBasicBlock &MBB,
break;
}
}
-
- MachineInstr *NewMI = prior(I);
- NewMI->getOperand(0).setSubReg(SubIdx);
}
MachineInstr *
@@ -1291,6 +1055,165 @@ bool ARMBaseInstrInfo::produceSameValue(const MachineInstr *MI0,
return MI0->isIdenticalTo(MI1, MachineInstr::IgnoreVRegDefs);
}
+/// areLoadsFromSameBasePtr - This is used by the pre-regalloc scheduler to
+/// determine if two loads are loading from the same base address. It should
+/// only return true if the base pointers are the same and the only differences
+/// between the two addresses is the offset. It also returns the offsets by
+/// reference.
+bool ARMBaseInstrInfo::areLoadsFromSameBasePtr(SDNode *Load1, SDNode *Load2,
+ int64_t &Offset1,
+ int64_t &Offset2) const {
+ // Don't worry about Thumb: just ARM and Thumb2.
+ if (Subtarget.isThumb1Only()) return false;
+
+ if (!Load1->isMachineOpcode() || !Load2->isMachineOpcode())
+ return false;
+
+ switch (Load1->getMachineOpcode()) {
+ default:
+ return false;
+ case ARM::LDR:
+ case ARM::LDRB:
+ case ARM::LDRD:
+ case ARM::LDRH:
+ case ARM::LDRSB:
+ case ARM::LDRSH:
+ case ARM::VLDRD:
+ case ARM::VLDRS:
+ case ARM::t2LDRi8:
+ case ARM::t2LDRDi8:
+ case ARM::t2LDRSHi8:
+ case ARM::t2LDRi12:
+ case ARM::t2LDRSHi12:
+ break;
+ }
+
+ switch (Load2->getMachineOpcode()) {
+ default:
+ return false;
+ case ARM::LDR:
+ case ARM::LDRB:
+ case ARM::LDRD:
+ case ARM::LDRH:
+ case ARM::LDRSB:
+ case ARM::LDRSH:
+ case ARM::VLDRD:
+ case ARM::VLDRS:
+ case ARM::t2LDRi8:
+ case ARM::t2LDRDi8:
+ case ARM::t2LDRSHi8:
+ case ARM::t2LDRi12:
+ case ARM::t2LDRSHi12:
+ break;
+ }
+
+ // Check if base addresses and chain operands match.
+ if (Load1->getOperand(0) != Load2->getOperand(0) ||
+ Load1->getOperand(4) != Load2->getOperand(4))
+ return false;
+
+ // Index should be Reg0.
+ if (Load1->getOperand(3) != Load2->getOperand(3))
+ return false;
+
+ // Determine the offsets.
+ if (isa<ConstantSDNode>(Load1->getOperand(1)) &&
+ isa<ConstantSDNode>(Load2->getOperand(1))) {
+ Offset1 = cast<ConstantSDNode>(Load1->getOperand(1))->getSExtValue();
+ Offset2 = cast<ConstantSDNode>(Load2->getOperand(1))->getSExtValue();
+ return true;
+ }
+
+ return false;
+}
+
+/// shouldScheduleLoadsNear - This is a used by the pre-regalloc scheduler to
+/// determine (in conjuction with areLoadsFromSameBasePtr) if two loads should
+/// be scheduled togther. On some targets if two loads are loading from
+/// addresses in the same cache line, it's better if they are scheduled
+/// together. This function takes two integers that represent the load offsets
+/// from the common base address. It returns true if it decides it's desirable
+/// to schedule the two loads together. "NumLoads" is the number of loads that
+/// have already been scheduled after Load1.
+bool ARMBaseInstrInfo::shouldScheduleLoadsNear(SDNode *Load1, SDNode *Load2,
+ int64_t Offset1, int64_t Offset2,
+ unsigned NumLoads) const {
+ // Don't worry about Thumb: just ARM and Thumb2.
+ if (Subtarget.isThumb1Only()) return false;
+
+ assert(Offset2 > Offset1);
+
+ if ((Offset2 - Offset1) / 8 > 64)
+ return false;
+
+ if (Load1->getMachineOpcode() != Load2->getMachineOpcode())
+ return false; // FIXME: overly conservative?
+
+ // Four loads in a row should be sufficient.
+ if (NumLoads >= 3)
+ return false;
+
+ return true;
+}
+
+bool ARMBaseInstrInfo::isSchedulingBoundary(const MachineInstr *MI,
+ const MachineBasicBlock *MBB,
+ const MachineFunction &MF) const {
+ // Debug info is never a scheduling boundary. It's necessary to be explicit
+ // due to the special treatment of IT instructions below, otherwise a
+ // dbg_value followed by an IT will result in the IT instruction being
+ // considered a scheduling hazard, which is wrong. It should be the actual
+ // instruction preceding the dbg_value instruction(s), just like it is
+ // when debug info is not present.
+ if (MI->isDebugValue())
+ return false;
+
+ // Terminators and labels can't be scheduled around.
+ if (MI->getDesc().isTerminator() || MI->isLabel())
+ return true;
+
+ // Treat the start of the IT block as a scheduling boundary, but schedule
+ // t2IT along with all instructions following it.
+ // FIXME: This is a big hammer. But the alternative is to add all potential
+ // true and anti dependencies to IT block instructions as implicit operands
+ // to the t2IT instruction. The added compile time and complexity does not
+ // seem worth it.
+ MachineBasicBlock::const_iterator I = MI;
+ // Make sure to skip any dbg_value instructions
+ while (++I != MBB->end() && I->isDebugValue())
+ ;
+ if (I != MBB->end() && I->getOpcode() == ARM::t2IT)
+ return true;
+
+ // Don't attempt to schedule around any instruction that defines
+ // a stack-oriented pointer, as it's unlikely to be profitable. This
+ // saves compile time, because it doesn't require every single
+ // stack slot reference to depend on the instruction that does the
+ // modification.
+ if (MI->definesRegister(ARM::SP))
+ return true;
+
+ return false;
+}
+
+bool ARMBaseInstrInfo::
+isProfitableToIfCvt(MachineBasicBlock &MBB, unsigned NumInstrs) const {
+ if (!NumInstrs)
+ return false;
+ if (Subtarget.getCPUString() == "generic")
+ // Generic (and overly aggressive) if-conversion limits for testing.
+ return NumInstrs <= 10;
+ else if (Subtarget.hasV7Ops())
+ return NumInstrs <= 3;
+ return NumInstrs <= 2;
+}
+
+bool ARMBaseInstrInfo::
+isProfitableToIfCvt(MachineBasicBlock &TMBB, unsigned NumT,
+ MachineBasicBlock &FMBB, unsigned NumF) const {
+ return NumT && NumF && NumT <= 2 && NumF <= 2;
+}
+
/// getInstrPredicate - If instruction is predicated, returns its predicate
/// condition, otherwise returns AL. It also returns the condition code
/// register by reference.
diff --git a/lib/Target/ARM/ARMBaseInstrInfo.h b/lib/Target/ARM/ARMBaseInstrInfo.h
index b566271c8db9..89a2db74a75e 100644
--- a/lib/Target/ARM/ARMBaseInstrInfo.h
+++ b/lib/Target/ARM/ARMBaseInstrInfo.h
@@ -116,11 +116,25 @@ namespace ARMII {
// Thumb format
ThumbFrm = 24 << FormShift,
- // NEON format
- NEONFrm = 25 << FormShift,
- NEONGetLnFrm = 26 << FormShift,
- NEONSetLnFrm = 27 << FormShift,
- NEONDupFrm = 28 << FormShift,
+ // Miscelleaneous format
+ MiscFrm = 25 << FormShift,
+
+ // NEON formats
+ NGetLnFrm = 26 << FormShift,
+ NSetLnFrm = 27 << FormShift,
+ NDupFrm = 28 << FormShift,
+ NLdStFrm = 29 << FormShift,
+ N1RegModImmFrm= 30 << FormShift,
+ N2RegFrm = 31 << FormShift,
+ NVCVTFrm = 32 << FormShift,
+ NVDupLnFrm = 33 << FormShift,
+ N2RegVShLFrm = 34 << FormShift,
+ N2RegVShRFrm = 35 << FormShift,
+ N3RegFrm = 36 << FormShift,
+ N3RegVShFrm = 37 << FormShift,
+ NVExtFrm = 38 << FormShift,
+ NVMulSLFrm = 39 << FormShift,
+ NVTBLFrm = 40 << FormShift,
//===------------------------------------------------------------------===//
// Misc flags.
@@ -213,7 +227,8 @@ public:
virtual unsigned RemoveBranch(MachineBasicBlock &MBB) const;
virtual unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
MachineBasicBlock *FBB,
- const SmallVectorImpl<MachineOperand> &Cond) const;
+ const SmallVectorImpl<MachineOperand> &Cond,
+ DebugLoc DL) const;
virtual
bool ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const;
@@ -258,12 +273,10 @@ public:
virtual unsigned isStoreToStackSlot(const MachineInstr *MI,
int &FrameIndex) const;
- virtual bool copyRegToReg(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator I,
- unsigned DestReg, unsigned SrcReg,
- const TargetRegisterClass *DestRC,
- const TargetRegisterClass *SrcRC,
- DebugLoc DL) const;
+ virtual void copyPhysReg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I, DebugLoc DL,
+ unsigned DestReg, unsigned SrcReg,
+ bool KillSrc) const;
virtual void storeRegToStackSlot(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI,
@@ -283,29 +296,51 @@ public:
const MDNode *MDPtr,
DebugLoc DL) const;
- virtual bool canFoldMemoryOperand(const MachineInstr *MI,
- const SmallVectorImpl<unsigned> &Ops) const;
-
- virtual MachineInstr* foldMemoryOperandImpl(MachineFunction &MF,
- MachineInstr* MI,
- const SmallVectorImpl<unsigned> &Ops,
- int FrameIndex) const;
-
- virtual MachineInstr* foldMemoryOperandImpl(MachineFunction &MF,
- MachineInstr* MI,
- const SmallVectorImpl<unsigned> &Ops,
- MachineInstr* LoadMI) const;
-
virtual void reMaterialize(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI,
unsigned DestReg, unsigned SubIdx,
const MachineInstr *Orig,
- const TargetRegisterInfo *TRI) const;
+ const TargetRegisterInfo &TRI) const;
MachineInstr *duplicate(MachineInstr *Orig, MachineFunction &MF) const;
virtual bool produceSameValue(const MachineInstr *MI0,
const MachineInstr *MI1) const;
+
+ /// areLoadsFromSameBasePtr - This is used by the pre-regalloc scheduler to
+ /// determine if two loads are loading from the same base address. It should
+ /// only return true if the base pointers are the same and the only
+ /// differences between the two addresses is the offset. It also returns the
+ /// offsets by reference.
+ virtual bool areLoadsFromSameBasePtr(SDNode *Load1, SDNode *Load2,
+ int64_t &Offset1, int64_t &Offset2)const;
+
+ /// shouldScheduleLoadsNear - This is a used by the pre-regalloc scheduler to
+ /// determine (in conjuction with areLoadsFromSameBasePtr) if two loads should
+ /// be scheduled togther. On some targets if two loads are loading from
+ /// addresses in the same cache line, it's better if they are scheduled
+ /// together. This function takes two integers that represent the load offsets
+ /// from the common base address. It returns true if it decides it's desirable
+ /// to schedule the two loads together. "NumLoads" is the number of loads that
+ /// have already been scheduled after Load1.
+ virtual bool shouldScheduleLoadsNear(SDNode *Load1, SDNode *Load2,
+ int64_t Offset1, int64_t Offset2,
+ unsigned NumLoads) const;
+
+ virtual bool isSchedulingBoundary(const MachineInstr *MI,
+ const MachineBasicBlock *MBB,
+ const MachineFunction &MF) const;
+
+ virtual bool isProfitableToIfCvt(MachineBasicBlock &MBB,
+ unsigned NumInstrs) const;
+
+ virtual bool isProfitableToIfCvt(MachineBasicBlock &TMBB,unsigned NumT,
+ MachineBasicBlock &FMBB,unsigned NumF) const;
+
+ virtual bool isProfitableToDupForIfCvt(MachineBasicBlock &MBB,
+ unsigned NumInstrs) const {
+ return NumInstrs && NumInstrs == 1;
+ }
};
static inline
diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.cpp b/lib/Target/ARM/ARMBaseRegisterInfo.cpp
index 82458d2347cc..182bd9937145 100644
--- a/lib/Target/ARM/ARMBaseRegisterInfo.cpp
+++ b/lib/Target/ARM/ARMBaseRegisterInfo.cpp
@@ -170,56 +170,6 @@ ARMBaseRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
return STI.isTargetDarwin() ? DarwinCalleeSavedRegs : CalleeSavedRegs;
}
-const TargetRegisterClass* const *
-ARMBaseRegisterInfo::getCalleeSavedRegClasses(const MachineFunction *MF) const {
- static const TargetRegisterClass * const CalleeSavedRegClasses[] = {
- &ARM::GPRRegClass, &ARM::GPRRegClass, &ARM::GPRRegClass,
- &ARM::GPRRegClass, &ARM::GPRRegClass, &ARM::GPRRegClass,
- &ARM::GPRRegClass, &ARM::GPRRegClass, &ARM::GPRRegClass,
-
- &ARM::DPRRegClass, &ARM::DPRRegClass, &ARM::DPRRegClass, &ARM::DPRRegClass,
- &ARM::DPRRegClass, &ARM::DPRRegClass, &ARM::DPRRegClass, &ARM::DPRRegClass,
- 0
- };
-
- static const TargetRegisterClass * const ThumbCalleeSavedRegClasses[] = {
- &ARM::GPRRegClass, &ARM::GPRRegClass, &ARM::GPRRegClass,
- &ARM::GPRRegClass, &ARM::GPRRegClass, &ARM::tGPRRegClass,
- &ARM::tGPRRegClass,&ARM::tGPRRegClass,&ARM::tGPRRegClass,
-
- &ARM::DPRRegClass, &ARM::DPRRegClass, &ARM::DPRRegClass, &ARM::DPRRegClass,
- &ARM::DPRRegClass, &ARM::DPRRegClass, &ARM::DPRRegClass, &ARM::DPRRegClass,
- 0
- };
-
- static const TargetRegisterClass * const DarwinCalleeSavedRegClasses[] = {
- &ARM::GPRRegClass, &ARM::GPRRegClass, &ARM::GPRRegClass,
- &ARM::GPRRegClass, &ARM::GPRRegClass, &ARM::GPRRegClass,
- &ARM::GPRRegClass, &ARM::GPRRegClass,
-
- &ARM::DPRRegClass, &ARM::DPRRegClass, &ARM::DPRRegClass, &ARM::DPRRegClass,
- &ARM::DPRRegClass, &ARM::DPRRegClass, &ARM::DPRRegClass, &ARM::DPRRegClass,
- 0
- };
-
- static const TargetRegisterClass * const DarwinThumbCalleeSavedRegClasses[] ={
- &ARM::GPRRegClass, &ARM::tGPRRegClass, &ARM::tGPRRegClass,
- &ARM::tGPRRegClass, &ARM::tGPRRegClass, &ARM::GPRRegClass,
- &ARM::GPRRegClass, &ARM::GPRRegClass,
-
- &ARM::DPRRegClass, &ARM::DPRRegClass, &ARM::DPRRegClass, &ARM::DPRRegClass,
- &ARM::DPRRegClass, &ARM::DPRRegClass, &ARM::DPRRegClass, &ARM::DPRRegClass,
- 0
- };
-
- if (STI.isThumb1Only()) {
- return STI.isTargetDarwin()
- ? DarwinThumbCalleeSavedRegClasses : ThumbCalleeSavedRegClasses;
- }
- return STI.isTargetDarwin()
- ? DarwinCalleeSavedRegClasses : CalleeSavedRegClasses;
-}
-
BitVector ARMBaseRegisterInfo::
getReservedRegs(const MachineFunction &MF) const {
// FIXME: avoid re-calculating this everytime.
@@ -352,7 +302,7 @@ ARMBaseRegisterInfo::getMatchingSuperRegClass(const TargetRegisterClass *A,
}
bool
-ARMBaseRegisterInfo::canCombinedSubRegIndex(const TargetRegisterClass *RC,
+ARMBaseRegisterInfo::canCombineSubRegIndices(const TargetRegisterClass *RC,
SmallVectorImpl<unsigned> &SubIndices,
unsigned &NewSubIdx) const {
@@ -724,6 +674,15 @@ ARMBaseRegisterInfo::estimateRSStackSizeLimit(MachineFunction &MF) const {
I != E; ++I) {
for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) {
if (!I->getOperand(i).isFI()) continue;
+
+ // When using ADDri to get the address of a stack object, 255 is the
+ // largest offset guaranteed to fit in the immediate offset.
+ if (I->getOpcode() == ARM::ADDri) {
+ Limit = std::min(Limit, (1U << 8) - 1);
+ break;
+ }
+
+ // Otherwise check the addressing mode.
switch (I->getDesc().TSFlags & ARMII::AddrModeMask) {
case ARMII::AddrMode3:
case ARMII::AddrModeT2_i8:
@@ -765,6 +724,7 @@ ARMBaseRegisterInfo::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
SmallVector<unsigned, 4> UnspilledCS1GPRs;
SmallVector<unsigned, 4> UnspilledCS2GPRs;
ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
// Spill R4 if Thumb2 function requires stack realignment - it will be used as
// scratch register.
@@ -780,7 +740,6 @@ ARMBaseRegisterInfo::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
// Don't spill FP if the frame can be eliminated. This is determined
// by scanning the callee-save registers to see if any is used.
const unsigned *CSRegs = getCalleeSavedRegs();
- const TargetRegisterClass* const *CSRegClasses = getCalleeSavedRegClasses();
for (unsigned i = 0; CSRegs[i]; ++i) {
unsigned Reg = CSRegs[i];
bool Spilled = false;
@@ -798,50 +757,50 @@ ARMBaseRegisterInfo::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
}
}
- if (CSRegClasses[i] == ARM::GPRRegisterClass ||
- CSRegClasses[i] == ARM::tGPRRegisterClass) {
- if (Spilled) {
- NumGPRSpills++;
+ if (!ARM::GPRRegisterClass->contains(Reg))
+ continue;
- if (!STI.isTargetDarwin()) {
- if (Reg == ARM::LR)
- LRSpilled = true;
- CS1Spilled = true;
- continue;
- }
+ if (Spilled) {
+ NumGPRSpills++;
- // Keep track if LR and any of R4, R5, R6, and R7 is spilled.
- switch (Reg) {
- case ARM::LR:
+ if (!STI.isTargetDarwin()) {
+ if (Reg == ARM::LR)
LRSpilled = true;
- // Fallthrough
- case ARM::R4:
- case ARM::R5:
- case ARM::R6:
- case ARM::R7:
- CS1Spilled = true;
- break;
- default:
- break;
- }
- } else {
- if (!STI.isTargetDarwin()) {
- UnspilledCS1GPRs.push_back(Reg);
- continue;
- }
+ CS1Spilled = true;
+ continue;
+ }
- switch (Reg) {
- case ARM::R4:
- case ARM::R5:
- case ARM::R6:
- case ARM::R7:
- case ARM::LR:
- UnspilledCS1GPRs.push_back(Reg);
- break;
- default:
- UnspilledCS2GPRs.push_back(Reg);
- break;
- }
+ // Keep track if LR and any of R4, R5, R6, and R7 is spilled.
+ switch (Reg) {
+ case ARM::LR:
+ LRSpilled = true;
+ // Fallthrough
+ case ARM::R4:
+ case ARM::R5:
+ case ARM::R6:
+ case ARM::R7:
+ CS1Spilled = true;
+ break;
+ default:
+ break;
+ }
+ } else {
+ if (!STI.isTargetDarwin()) {
+ UnspilledCS1GPRs.push_back(Reg);
+ continue;
+ }
+
+ switch (Reg) {
+ case ARM::R4:
+ case ARM::R5:
+ case ARM::R6:
+ case ARM::R7:
+ case ARM::LR:
+ UnspilledCS1GPRs.push_back(Reg);
+ break;
+ default:
+ UnspilledCS2GPRs.push_back(Reg);
+ break;
}
}
}
@@ -862,9 +821,16 @@ ARMBaseRegisterInfo::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
// offset, make sure a register (or a spill slot) is available for the
// register scavenger. Note that if we're indexing off the frame pointer, the
// effective stack size is 4 bytes larger since the FP points to the stack
- // slot of the previous FP.
- bool BigStack = RS &&
- estimateStackSize(MF) + (hasFP(MF) ? 4 : 0) >= estimateRSStackSizeLimit(MF);
+ // slot of the previous FP. Also, if we have variable sized objects in the
+ // function, stack slot references will often be negative, and some of
+ // our instructions are positive-offset only, so conservatively consider
+ // that case to want a spill slot (or register) as well.
+ // FIXME: We could add logic to be more precise about negative offsets
+ // and which instructions will need a scratch register for them. Is it
+ // worth the effort and added fragility?
+ bool BigStack =
+ (RS && (estimateStackSize(MF) + (hasFP(MF) ? 4:0) >=
+ estimateRSStackSizeLimit(MF))) || MFI->hasVarSizedObjects();
bool ExtraCSSpill = false;
if (BigStack || !CanEliminateFrame || cannotEliminateFrame(MF)) {
@@ -957,7 +923,6 @@ ARMBaseRegisterInfo::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
// note: Thumb1 functions spill to R12, not the stack. Reserve a slot
// closest to SP or frame pointer.
const TargetRegisterClass *RC = ARM::GPRRegisterClass;
- MachineFrameInfo *MFI = MF.getFrameInfo();
RS->setScavengingFrameIndex(MFI->CreateStackObject(RC->getSize(),
RC->getAlignment(),
false));
@@ -1622,6 +1587,7 @@ emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const {
MachineBasicBlock::iterator MBBI = prior(MBB.end());
assert(MBBI->getDesc().isReturn() &&
"Can only insert epilog into returning blocks");
+ unsigned RetOpcode = MBBI->getOpcode();
DebugLoc dl = MBBI->getDebugLoc();
MachineFrameInfo *MFI = MF.getFrameInfo();
ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
@@ -1696,6 +1662,39 @@ emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const {
emitSPUpdate(isARM, MBB, MBBI, dl, TII, AFI->getGPRCalleeSavedArea1Size());
}
+ if (RetOpcode == ARM::TCRETURNdi || RetOpcode == ARM::TCRETURNdiND ||
+ RetOpcode == ARM::TCRETURNri || RetOpcode == ARM::TCRETURNriND) {
+ // Tail call return: adjust the stack pointer and jump to callee.
+ MBBI = prior(MBB.end());
+ MachineOperand &JumpTarget = MBBI->getOperand(0);
+
+ // Jump to label or value in register.
+ if (RetOpcode == ARM::TCRETURNdi) {
+ BuildMI(MBB, MBBI, dl,
+ TII.get(STI.isThumb() ? ARM::TAILJMPdt : ARM::TAILJMPd)).
+ addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset(),
+ JumpTarget.getTargetFlags());
+ } else if (RetOpcode == ARM::TCRETURNdiND) {
+ BuildMI(MBB, MBBI, dl,
+ TII.get(STI.isThumb() ? ARM::TAILJMPdNDt : ARM::TAILJMPdND)).
+ addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset(),
+ JumpTarget.getTargetFlags());
+ } else if (RetOpcode == ARM::TCRETURNri) {
+ BuildMI(MBB, MBBI, dl, TII.get(ARM::TAILJMPr)).
+ addReg(JumpTarget.getReg(), RegState::Kill);
+ } else if (RetOpcode == ARM::TCRETURNriND) {
+ BuildMI(MBB, MBBI, dl, TII.get(ARM::TAILJMPrND)).
+ addReg(JumpTarget.getReg(), RegState::Kill);
+ }
+
+ MachineInstr *NewMI = prior(MBBI);
+ for (unsigned i = 1, e = MBBI->getNumOperands(); i != e; ++i)
+ NewMI->addOperand(MBBI->getOperand(i));
+
+ // Delete the pseudo instruction TCRETURN.
+ MBB.erase(MBBI);
+ }
+
if (VARegSaveSize)
emitSPUpdate(isARM, MBB, MBBI, dl, TII, VARegSaveSize);
}
diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.h b/lib/Target/ARM/ARMBaseRegisterInfo.h
index 2c9c82d0318b..f7ee0d5cc66d 100644
--- a/lib/Target/ARM/ARMBaseRegisterInfo.h
+++ b/lib/Target/ARM/ARMBaseRegisterInfo.h
@@ -69,9 +69,6 @@ public:
/// Code Generation virtual methods...
const unsigned *getCalleeSavedRegs(const MachineFunction *MF = 0) const;
- const TargetRegisterClass* const*
- getCalleeSavedRegClasses(const MachineFunction *MF = 0) const;
-
BitVector getReservedRegs(const MachineFunction &MF) const;
/// getMatchingSuperRegClass - Return a subclass of the specified register
@@ -81,14 +78,15 @@ public:
getMatchingSuperRegClass(const TargetRegisterClass *A,
const TargetRegisterClass *B, unsigned Idx) const;
- /// canCombinedSubRegIndex - Given a register class and a list of sub-register
- /// indices, return true if it's possible to combine the sub-register indices
- /// into one that corresponds to a larger sub-register. Return the new sub-
- /// register index by reference. Note the new index by be zero if the given
- /// sub-registers combined to form the whole register.
- virtual bool canCombinedSubRegIndex(const TargetRegisterClass *RC,
- SmallVectorImpl<unsigned> &SubIndices,
- unsigned &NewSubIdx) const;
+ /// canCombineSubRegIndices - Given a register class and a list of
+ /// subregister indices, return true if it's possible to combine the
+ /// subregister indices into one that corresponds to a larger
+ /// subregister. Return the new subregister index by reference. Note the
+ /// new index may be zero if the given subregisters can be combined to
+ /// form the whole register.
+ virtual bool canCombineSubRegIndices(const TargetRegisterClass *RC,
+ SmallVectorImpl<unsigned> &SubIndices,
+ unsigned &NewSubIdx) const;
const TargetRegisterClass *getPointerRegClass(unsigned Kind = 0) const;
@@ -150,8 +148,8 @@ public:
virtual bool canSimplifyCallFramePseudos(MachineFunction &MF) const;
virtual void eliminateCallFramePseudoInstr(MachineFunction &MF,
- MachineBasicBlock &MBB,
- MachineBasicBlock::iterator I) const;
+ MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I) const;
virtual unsigned eliminateFrameIndex(MachineBasicBlock::iterator II,
int SPAdj, FrameIndexValue *Value = NULL,
diff --git a/lib/Target/ARM/ARMCodeEmitter.cpp b/lib/Target/ARM/ARMCodeEmitter.cpp
index f2730fc8a5cb..7895cb071922 100644
--- a/lib/Target/ARM/ARMCodeEmitter.cpp
+++ b/lib/Target/ARM/ARMCodeEmitter.cpp
@@ -55,6 +55,7 @@ namespace {
const std::vector<MachineConstantPoolEntry> *MCPEs;
const std::vector<MachineJumpTableEntry> *MJTEs;
bool IsPIC;
+ bool IsThumb;
void getAnalysisUsage(AnalysisUsage &AU) const {
AU.addRequired<MachineModuleInfo>();
@@ -67,8 +68,8 @@ namespace {
: MachineFunctionPass(&ID), JTI(0),
II((const ARMInstrInfo *)tm.getInstrInfo()),
TD(tm.getTargetData()), TM(tm),
- MCE(mce), MCPEs(0), MJTEs(0),
- IsPIC(TM.getRelocationModel() == Reloc::PIC_) {}
+ MCE(mce), MCPEs(0), MJTEs(0),
+ IsPIC(TM.getRelocationModel() == Reloc::PIC_), IsThumb(false) {}
/// getBinaryCodeForInstr - This function, generated by the
/// CodeEmitterGenerator using TableGen, produces the binary encoding for
@@ -139,6 +140,12 @@ namespace {
void emitMiscInstruction(const MachineInstr &MI);
+ void emitNEONLaneInstruction(const MachineInstr &MI);
+ void emitNEONDupInstruction(const MachineInstr &MI);
+ void emitNEON1RegModImmInstruction(const MachineInstr &MI);
+ void emitNEON2RegInstruction(const MachineInstr &MI);
+ void emitNEON3RegInstruction(const MachineInstr &MI);
+
/// getMachineOpValue - Return binary encoding of operand. If the machine
/// operand requires relocation, record the relocation and return zero.
unsigned getMachineOpValue(const MachineInstr &MI,const MachineOperand &MO);
@@ -147,7 +154,8 @@ namespace {
}
/// getMovi32Value - Return binary encoding of operand for movw/movt. If the
- /// machine operand requires relocation, record the relocation and return zero.
+ /// machine operand requires relocation, record the relocation and return
+ /// zero.
unsigned getMovi32Value(const MachineInstr &MI,const MachineOperand &MO,
unsigned Reloc);
unsigned getMovi32Value(const MachineInstr &MI, unsigned OpIdx,
@@ -193,6 +201,7 @@ bool ARMCodeEmitter::runOnMachineFunction(MachineFunction &MF) {
MJTEs = 0;
if (MF.getJumpTableInfo()) MJTEs = &MF.getJumpTableInfo()->getJumpTables();
IsPIC = TM.getRelocationModel() == Reloc::PIC_;
+ IsThumb = MF.getInfo<ARMFunctionInfo>()->isThumbFunction();
JTI->Initialize(MF, IsPIC);
MMI = &getAnalysis<MachineModuleInfo>();
MCE.setModuleInfo(MMI);
@@ -347,7 +356,7 @@ void ARMCodeEmitter::emitInstruction(const MachineInstr &MI) {
MCE.processDebugLoc(MI.getDebugLoc(), true);
- NumEmitted++; // Keep track of the # of mi's emitted
+ ++NumEmitted; // Keep track of the # of mi's emitted
switch (MI.getDesc().TSFlags & ARMII::FormMask) {
default: {
llvm_unreachable("Unhandled instruction encoding format!");
@@ -407,6 +416,23 @@ void ARMCodeEmitter::emitInstruction(const MachineInstr &MI) {
case ARMII::VFPMiscFrm:
emitMiscInstruction(MI);
break;
+ // NEON instructions.
+ case ARMII::NGetLnFrm:
+ case ARMII::NSetLnFrm:
+ emitNEONLaneInstruction(MI);
+ break;
+ case ARMII::NDupFrm:
+ emitNEONDupInstruction(MI);
+ break;
+ case ARMII::N1RegModImmFrm:
+ emitNEON1RegModImmInstruction(MI);
+ break;
+ case ARMII::N2RegFrm:
+ emitNEON2RegInstruction(MI);
+ break;
+ case ARMII::N3RegFrm:
+ emitNEON3RegInstruction(MI);
+ break;
}
MCE.processDebugLoc(MI.getDebugLoc(), false);
}
@@ -1539,4 +1565,144 @@ void ARMCodeEmitter::emitMiscInstruction(const MachineInstr &MI) {
emitWordLE(Binary);
}
+static unsigned encodeNEONRd(const MachineInstr &MI, unsigned OpIdx) {
+ unsigned RegD = MI.getOperand(OpIdx).getReg();
+ unsigned Binary = 0;
+ RegD = ARMRegisterInfo::getRegisterNumbering(RegD);
+ Binary |= (RegD & 0xf) << ARMII::RegRdShift;
+ Binary |= ((RegD >> 4) & 1) << ARMII::D_BitShift;
+ return Binary;
+}
+
+static unsigned encodeNEONRn(const MachineInstr &MI, unsigned OpIdx) {
+ unsigned RegN = MI.getOperand(OpIdx).getReg();
+ unsigned Binary = 0;
+ RegN = ARMRegisterInfo::getRegisterNumbering(RegN);
+ Binary |= (RegN & 0xf) << ARMII::RegRnShift;
+ Binary |= ((RegN >> 4) & 1) << ARMII::N_BitShift;
+ return Binary;
+}
+
+static unsigned encodeNEONRm(const MachineInstr &MI, unsigned OpIdx) {
+ unsigned RegM = MI.getOperand(OpIdx).getReg();
+ unsigned Binary = 0;
+ RegM = ARMRegisterInfo::getRegisterNumbering(RegM);
+ Binary |= (RegM & 0xf);
+ Binary |= ((RegM >> 4) & 1) << ARMII::M_BitShift;
+ return Binary;
+}
+
+/// convertNEONDataProcToThumb - Convert the ARM mode encoding for a NEON
+/// data-processing instruction to the corresponding Thumb encoding.
+static unsigned convertNEONDataProcToThumb(unsigned Binary) {
+ assert((Binary & 0xfe000000) == 0xf2000000 &&
+ "not an ARM NEON data-processing instruction");
+ unsigned UBit = (Binary >> 24) & 1;
+ return 0xef000000 | (UBit << 28) | (Binary & 0xffffff);
+}
+
+void ARMCodeEmitter::emitNEONLaneInstruction(const MachineInstr &MI) {
+ unsigned Binary = getBinaryCodeForInstr(MI);
+
+ unsigned RegTOpIdx, RegNOpIdx, LnOpIdx;
+ const TargetInstrDesc &TID = MI.getDesc();
+ if ((TID.TSFlags & ARMII::FormMask) == ARMII::NGetLnFrm) {
+ RegTOpIdx = 0;
+ RegNOpIdx = 1;
+ LnOpIdx = 2;
+ } else { // ARMII::NSetLnFrm
+ RegTOpIdx = 2;
+ RegNOpIdx = 0;
+ LnOpIdx = 3;
+ }
+
+ // Set the conditional execution predicate
+ Binary |= (IsThumb ? ARMCC::AL : II->getPredicate(&MI)) << ARMII::CondShift;
+
+ unsigned RegT = MI.getOperand(RegTOpIdx).getReg();
+ RegT = ARMRegisterInfo::getRegisterNumbering(RegT);
+ Binary |= (RegT << ARMII::RegRdShift);
+ Binary |= encodeNEONRn(MI, RegNOpIdx);
+
+ unsigned LaneShift;
+ if ((Binary & (1 << 22)) != 0)
+ LaneShift = 0; // 8-bit elements
+ else if ((Binary & (1 << 5)) != 0)
+ LaneShift = 1; // 16-bit elements
+ else
+ LaneShift = 2; // 32-bit elements
+
+ unsigned Lane = MI.getOperand(LnOpIdx).getImm() << LaneShift;
+ unsigned Opc1 = Lane >> 2;
+ unsigned Opc2 = Lane & 3;
+ assert((Opc1 & 3) == 0 && "out-of-range lane number operand");
+ Binary |= (Opc1 << 21);
+ Binary |= (Opc2 << 5);
+
+ emitWordLE(Binary);
+}
+
+void ARMCodeEmitter::emitNEONDupInstruction(const MachineInstr &MI) {
+ unsigned Binary = getBinaryCodeForInstr(MI);
+
+ // Set the conditional execution predicate
+ Binary |= (IsThumb ? ARMCC::AL : II->getPredicate(&MI)) << ARMII::CondShift;
+
+ unsigned RegT = MI.getOperand(1).getReg();
+ RegT = ARMRegisterInfo::getRegisterNumbering(RegT);
+ Binary |= (RegT << ARMII::RegRdShift);
+ Binary |= encodeNEONRn(MI, 0);
+ emitWordLE(Binary);
+}
+
+void ARMCodeEmitter::emitNEON1RegModImmInstruction(const MachineInstr &MI) {
+ unsigned Binary = getBinaryCodeForInstr(MI);
+ // Destination register is encoded in Dd.
+ Binary |= encodeNEONRd(MI, 0);
+ // Immediate fields: Op, Cmode, I, Imm3, Imm4
+ unsigned Imm = MI.getOperand(1).getImm();
+ unsigned Op = (Imm >> 12) & 1;
+ unsigned Cmode = (Imm >> 8) & 0xf;
+ unsigned I = (Imm >> 7) & 1;
+ unsigned Imm3 = (Imm >> 4) & 0x7;
+ unsigned Imm4 = Imm & 0xf;
+ Binary |= (I << 24) | (Imm3 << 16) | (Cmode << 8) | (Op << 5) | Imm4;
+ if (IsThumb)
+ Binary = convertNEONDataProcToThumb(Binary);
+ emitWordLE(Binary);
+}
+
+void ARMCodeEmitter::emitNEON2RegInstruction(const MachineInstr &MI) {
+ const TargetInstrDesc &TID = MI.getDesc();
+ unsigned Binary = getBinaryCodeForInstr(MI);
+ // Destination register is encoded in Dd; source register in Dm.
+ unsigned OpIdx = 0;
+ Binary |= encodeNEONRd(MI, OpIdx++);
+ if (TID.getOperandConstraint(OpIdx, TOI::TIED_TO) != -1)
+ ++OpIdx;
+ Binary |= encodeNEONRm(MI, OpIdx);
+ if (IsThumb)
+ Binary = convertNEONDataProcToThumb(Binary);
+ // FIXME: This does not handle VDUPfdf or VDUPfqf.
+ emitWordLE(Binary);
+}
+
+void ARMCodeEmitter::emitNEON3RegInstruction(const MachineInstr &MI) {
+ const TargetInstrDesc &TID = MI.getDesc();
+ unsigned Binary = getBinaryCodeForInstr(MI);
+ // Destination register is encoded in Dd; source registers in Dn and Dm.
+ unsigned OpIdx = 0;
+ Binary |= encodeNEONRd(MI, OpIdx++);
+ if (TID.getOperandConstraint(OpIdx, TOI::TIED_TO) != -1)
+ ++OpIdx;
+ Binary |= encodeNEONRn(MI, OpIdx++);
+ if (TID.getOperandConstraint(OpIdx, TOI::TIED_TO) != -1)
+ ++OpIdx;
+ Binary |= encodeNEONRm(MI, OpIdx);
+ if (IsThumb)
+ Binary = convertNEONDataProcToThumb(Binary);
+ // FIXME: This does not handle VMOVDneon or VMOVQ.
+ emitWordLE(Binary);
+}
+
#include "ARMGenCodeEmitter.inc"
diff --git a/lib/Target/ARM/ARMConstantIslandPass.cpp b/lib/Target/ARM/ARMConstantIslandPass.cpp
index 13d8b74014c5..65a3da6f1617 100644
--- a/lib/Target/ARM/ARMConstantIslandPass.cpp
+++ b/lib/Target/ARM/ARMConstantIslandPass.cpp
@@ -337,7 +337,7 @@ bool ARMConstantIslands::runOnMachineFunction(MachineFunction &MF) {
if (CPChange && ++NoCPIters > 30)
llvm_unreachable("Constant Island pass failed to converge!");
DEBUG(dumpBBs());
-
+
// Clear NewWaterList now. If we split a block for branches, it should
// appear as "new water" for the next iteration of constant pool placement.
NewWaterList.clear();
@@ -361,8 +361,8 @@ bool ARMConstantIslands::runOnMachineFunction(MachineFunction &MF) {
// After a while, this might be made debug-only, but it is not expensive.
verify(MF);
- // If LR has been forced spilled and no far jumps (i.e. BL) has been issued.
- // Undo the spill / restore of LR if possible.
+ // If LR has been forced spilled and no far jump (i.e. BL) has been issued,
+ // undo the spill / restore of LR if possible.
if (isThumb && !HasFarJump && AFI->isLRSpilledForFarJump())
MadeChange |= UndoLRSpillRestore();
@@ -407,7 +407,7 @@ void ARMConstantIslands::DoInitialPlacement(MachineFunction &MF,
std::vector<CPEntry> CPEs;
CPEs.push_back(CPEntry(CPEMI, i));
CPEntries.push_back(CPEs);
- NumCPEs++;
+ ++NumCPEs;
DEBUG(errs() << "Moved CPI#" << i << " to end of function as #" << i
<< "\n");
}
@@ -418,7 +418,8 @@ void ARMConstantIslands::DoInitialPlacement(MachineFunction &MF,
static bool BBHasFallthrough(MachineBasicBlock *MBB) {
// Get the next machine basic block in the function.
MachineFunction::iterator MBBI = MBB;
- if (llvm::next(MBBI) == MBB->getParent()->end()) // Can't fall off end of function.
+ // Can't fall off end of function.
+ if (llvm::next(MBBI) == MBB->getParent()->end())
return false;
MachineBasicBlock *NextBB = llvm::next(MBBI);
@@ -491,6 +492,8 @@ void ARMConstantIslands::InitialFunctionScan(MachineFunction &MF,
unsigned MBBSize = 0;
for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
I != E; ++I) {
+ if (I->isDebugValue())
+ continue;
// Add instruction size to MBBSize.
MBBSize += TII->GetInstSizeInBytes(I);
@@ -722,7 +725,7 @@ MachineBasicBlock *ARMConstantIslands::SplitBlockBeforeInstr(MachineInstr *MI) {
// correspond to anything in the source.
unsigned Opc = isThumb ? (isThumb2 ? ARM::t2B : ARM::tB) : ARM::B;
BuildMI(OrigBB, DebugLoc(), TII->get(Opc)).addMBB(NewBB);
- NumSplit++;
+ ++NumSplit;
// Update the CFG. All succs of OrigBB are now succs of NewBB.
while (!OrigBB->succ_empty()) {
@@ -945,7 +948,7 @@ bool ARMConstantIslands::DecrementOldEntry(unsigned CPI, MachineInstr *CPEMI) {
if (--CPE->RefCount == 0) {
RemoveDeadCPEMI(CPEMI);
CPE->CPEMI = NULL;
- NumCPEs--;
+ --NumCPEs;
return true;
}
return false;
@@ -1246,7 +1249,7 @@ bool ARMConstantIslands::HandleConstantPoolUser(MachineFunction &MF,
U.CPEMI = BuildMI(NewIsland, DebugLoc(), TII->get(ARM::CONSTPOOL_ENTRY))
.addImm(ID).addConstantPoolIndex(CPI).addImm(Size);
CPEntries[CPI].push_back(CPEntry(U.CPEMI, ID, 1));
- NumCPEs++;
+ ++NumCPEs;
BBOffsets[NewIsland->getNumber()] = BBOffsets[NewMBB->getNumber()];
// Compensate for .align 2 in thumb mode.
@@ -1369,7 +1372,7 @@ ARMConstantIslands::FixUpUnconditionalBr(MachineFunction &MF, ImmBranch &Br) {
BBSizes[MBB->getNumber()] += 2;
AdjustBBOffsetsAfter(MBB, 2);
HasFarJump = true;
- NumUBrFixed++;
+ ++NumUBrFixed;
DEBUG(errs() << " Changed B to long jump " << *MI);
@@ -1402,7 +1405,7 @@ ARMConstantIslands::FixUpConditionalBr(MachineFunction &MF, ImmBranch &Br) {
MachineInstr *BMI = &MBB->back();
bool NeedSplit = (BMI != MI) || !BBHasFallthrough(MBB);
- NumCBrFixed++;
+ ++NumCBrFixed;
if (BMI != MI) {
if (llvm::next(MachineBasicBlock::iterator(MI)) == prior(MBB->end()) &&
BMI->getOpcode() == Br.UncondBr) {
@@ -1621,7 +1624,7 @@ bool ARMConstantIslands::OptimizeThumb2JumpTables(MachineFunction &MF) {
// constantpool tables?
MachineJumpTableInfo *MJTI = MF.getJumpTableInfo();
if (MJTI == 0) return false;
-
+
const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables();
for (unsigned i = 0, e = T2JumpTables.size(); i != e; ++i) {
MachineInstr *MI = T2JumpTables[i];
@@ -1658,15 +1661,25 @@ bool ARMConstantIslands::OptimizeThumb2JumpTables(MachineFunction &MF) {
continue;
unsigned IdxReg = MI->getOperand(1).getReg();
bool IdxRegKill = MI->getOperand(1).isKill();
+
+ // Scan backwards to find the instruction that defines the base
+ // register. Due to post-RA scheduling, we can't count on it
+ // immediately preceding the branch instruction.
MachineBasicBlock::iterator PrevI = MI;
- if (PrevI == MBB->begin())
+ MachineBasicBlock::iterator B = MBB->begin();
+ while (PrevI != B && !PrevI->definesRegister(BaseReg))
+ --PrevI;
+
+ // If for some reason we didn't find it, we can't do anything, so
+ // just skip this one.
+ if (!PrevI->definesRegister(BaseReg))
continue;
- MachineInstr *AddrMI = --PrevI;
+ MachineInstr *AddrMI = PrevI;
bool OptOk = true;
- // Examine the instruction that calculate the jumptable entry address.
- // If it's not the one just before the t2BR_JT, we won't delete it, then
- // it's not worth doing the optimization.
+ // Examine the instruction that calculates the jumptable entry address.
+ // Make sure it only defines the base register and kills any uses
+ // other than the index register.
for (unsigned k = 0, eee = AddrMI->getNumOperands(); k != eee; ++k) {
const MachineOperand &MO = AddrMI->getOperand(k);
if (!MO.isReg() || !MO.getReg())
@@ -1683,9 +1696,14 @@ bool ARMConstantIslands::OptimizeThumb2JumpTables(MachineFunction &MF) {
if (!OptOk)
continue;
- // The previous instruction should be a tLEApcrel or t2LEApcrelJT, we want
+ // Now scan back again to find the tLEApcrel or t2LEApcrelJT instruction
+ // that gave us the initial base register definition.
+ for (--PrevI; PrevI != B && !PrevI->definesRegister(BaseReg); --PrevI)
+ ;
+
+ // The instruction should be a tLEApcrel or t2LEApcrelJT; we want
// to delete it as well.
- MachineInstr *LeaMI = --PrevI;
+ MachineInstr *LeaMI = PrevI;
if ((LeaMI->getOpcode() != ARM::tLEApcrelJT &&
LeaMI->getOpcode() != ARM::t2LEApcrelJT) ||
LeaMI->getOperand(0).getReg() != BaseReg)
@@ -1729,7 +1747,7 @@ bool ARMConstantIslands::ReorderThumb2JumpTables(MachineFunction &MF) {
MachineJumpTableInfo *MJTI = MF.getJumpTableInfo();
if (MJTI == 0) return false;
-
+
const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables();
for (unsigned i = 0, e = T2JumpTables.size(); i != e; ++i) {
MachineInstr *MI = T2JumpTables[i];
@@ -1769,7 +1787,7 @@ AdjustJTTargetBlockForward(MachineBasicBlock *BB, MachineBasicBlock *JTBB)
{
MachineFunction &MF = *BB->getParent();
- // If it's the destination block is terminated by an unconditional branch,
+ // If the destination block is terminated by an unconditional branch,
// try to move it; otherwise, create a new block following the jump
// table that branches back to the actual target. This is a very simple
// heuristic. FIXME: We can definitely improve it.
diff --git a/lib/Target/ARM/ARMConstantPoolValue.h b/lib/Target/ARM/ARMConstantPoolValue.h
index 6f4eddf7361d..3119b54563de 100644
--- a/lib/Target/ARM/ARMConstantPoolValue.h
+++ b/lib/Target/ARM/ARMConstantPoolValue.h
@@ -15,6 +15,7 @@
#define LLVM_TARGET_ARM_CONSTANTPOOLVALUE_H
#include "llvm/CodeGen/MachineConstantPool.h"
+#include <cstddef>
namespace llvm {
diff --git a/lib/Target/ARM/ARMExpandPseudoInsts.cpp b/lib/Target/ARM/ARMExpandPseudoInsts.cpp
index c87f5d7c16a5..9c62597b4323 100644
--- a/lib/Target/ARM/ARMExpandPseudoInsts.cpp
+++ b/lib/Target/ARM/ARMExpandPseudoInsts.cpp
@@ -144,13 +144,15 @@ bool ARMExpandPseudo::ExpandMBB(MachineBasicBlock &MBB) {
MachineInstrBuilder Even =
AddDefaultPred(BuildMI(MBB, MBBI, MI.getDebugLoc(),
TII->get(ARM::VMOVQ))
- .addReg(EvenDst, getDefRegState(true) | getDeadRegState(DstIsDead))
- .addReg(EvenSrc, getKillRegState(SrcIsKill)));
+ .addReg(EvenDst,
+ getDefRegState(true) | getDeadRegState(DstIsDead))
+ .addReg(EvenSrc, getKillRegState(SrcIsKill)));
MachineInstrBuilder Odd =
AddDefaultPred(BuildMI(MBB, MBBI, MI.getDebugLoc(),
TII->get(ARM::VMOVQ))
- .addReg(OddDst, getDefRegState(true) | getDeadRegState(DstIsDead))
- .addReg(OddSrc, getKillRegState(SrcIsKill)));
+ .addReg(OddDst,
+ getDefRegState(true) | getDeadRegState(DstIsDead))
+ .addReg(OddSrc, getKillRegState(SrcIsKill)));
TransferImpOps(MI, Even, Odd);
MI.eraseFromParent();
Modified = true;
diff --git a/lib/Target/ARM/ARMISelDAGToDAG.cpp b/lib/Target/ARM/ARMISelDAGToDAG.cpp
index 9baef6bf1243..c84d3ff81324 100644
--- a/lib/Target/ARM/ARMISelDAGToDAG.cpp
+++ b/lib/Target/ARM/ARMISelDAGToDAG.cpp
@@ -11,6 +11,7 @@
//
//===----------------------------------------------------------------------===//
+#define DEBUG_TYPE "arm-isel"
#include "ARM.h"
#include "ARMAddressingModes.h"
#include "ARMTargetMachine.h"
@@ -35,11 +36,6 @@
using namespace llvm;
-static cl::opt<bool>
-UseRegSeq("neon-reg-sequence", cl::Hidden,
- cl::desc("Use reg_sequence to model ld / st of multiple neon regs"),
- cl::init(true));
-
//===--------------------------------------------------------------------===//
/// ARMDAGToDAGISel - ARM specific code to select ARM machine
/// instructions for SelectionDAG operations.
@@ -147,6 +143,11 @@ private:
unsigned *DOpcodes, unsigned *QOpcodes0,
unsigned *QOpcodes1);
+ /// SelectVTBL - Select NEON VTBL and VTBX intrinsics. NumVecs should be 2,
+ /// 3 or 4. These are custom-selected so that a REG_SEQUENCE can be
+ /// generated to force the table registers to be consecutive.
+ SDNode *SelectVTBL(SDNode *N, bool IsExt, unsigned NumVecs, unsigned Opc);
+
/// SelectV6T2BitfieldExtractOp - Select SBFX/UBFX instructions for ARM.
SDNode *SelectV6T2BitfieldExtractOp(SDNode *N, bool isSigned);
@@ -173,24 +174,17 @@ private:
char ConstraintCode,
std::vector<SDValue> &OutOps);
- /// PairDRegs - Form a quad register from a pair of D registers.
- ///
+ // Form pairs of consecutive S, D, or Q registers.
+ SDNode *PairSRegs(EVT VT, SDValue V0, SDValue V1);
SDNode *PairDRegs(EVT VT, SDValue V0, SDValue V1);
-
- /// PairDRegs - Form a quad register pair from a pair of Q registers.
- ///
SDNode *PairQRegs(EVT VT, SDValue V0, SDValue V1);
- /// QuadDRegs - Form a quad register pair from a quad of D registers.
- ///
+ // Form sequences of 4 consecutive S, D, or Q registers.
+ SDNode *QuadSRegs(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3);
SDNode *QuadDRegs(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3);
-
- /// QuadQRegs - Form 4 consecutive Q registers.
- ///
SDNode *QuadQRegs(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3);
- /// OctoDRegs - Form 8 consecutive D registers.
- ///
+ // Form sequences of 8 consecutive D registers.
SDNode *OctoDRegs(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3,
SDValue V4, SDValue V5, SDValue V6, SDValue V7);
};
@@ -544,10 +538,9 @@ bool ARMDAGToDAGISel::SelectAddrModePC(SDNode *Op, SDValue N,
bool ARMDAGToDAGISel::SelectThumbAddrModeRR(SDNode *Op, SDValue N,
SDValue &Base, SDValue &Offset){
// FIXME dl should come from the parent load or store, not the address
- DebugLoc dl = Op->getDebugLoc();
if (N.getOpcode() != ISD::ADD) {
ConstantSDNode *NC = dyn_cast<ConstantSDNode>(N);
- if (!NC || NC->getZExtValue() != 0)
+ if (!NC || !NC->isNullValue())
return false;
Base = Offset = N;
@@ -788,8 +781,9 @@ bool ARMDAGToDAGISel::SelectT2AddrModeImm8s4(SDNode *Op, SDValue N,
if (N.getOpcode() == ISD::ADD) {
if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
int RHSC = (int)RHS->getZExtValue();
+ // 8 bits.
if (((RHSC & 0x3) == 0) &&
- ((RHSC >= 0 && RHSC < 0x400) || (RHSC < 0 && RHSC > -0x400))) { // 8 bits.
+ ((RHSC >= 0 && RHSC < 0x400) || (RHSC < 0 && RHSC > -0x400))) {
Base = N.getOperand(0);
OffImm = CurDAG->getTargetConstant(RHSC, MVT::i32);
return true;
@@ -798,7 +792,8 @@ bool ARMDAGToDAGISel::SelectT2AddrModeImm8s4(SDNode *Op, SDValue N,
} else if (N.getOpcode() == ISD::SUB) {
if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
int RHSC = (int)RHS->getZExtValue();
- if (((RHSC & 0x3) == 0) && (RHSC >= 0 && RHSC < 0x400)) { // 8 bits.
+ // 8 bits.
+ if (((RHSC & 0x3) == 0) && (RHSC >= 0 && RHSC < 0x400)) {
Base = N.getOperand(0);
OffImm = CurDAG->getTargetConstant(-RHSC, MVT::i32);
return true;
@@ -960,22 +955,24 @@ SDNode *ARMDAGToDAGISel::SelectT2IndexedLoad(SDNode *N) {
return NULL;
}
+/// PairSRegs - Form a D register from a pair of S registers.
+///
+SDNode *ARMDAGToDAGISel::PairSRegs(EVT VT, SDValue V0, SDValue V1) {
+ DebugLoc dl = V0.getNode()->getDebugLoc();
+ SDValue SubReg0 = CurDAG->getTargetConstant(ARM::ssub_0, MVT::i32);
+ SDValue SubReg1 = CurDAG->getTargetConstant(ARM::ssub_1, MVT::i32);
+ const SDValue Ops[] = { V0, SubReg0, V1, SubReg1 };
+ return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops, 4);
+}
+
/// PairDRegs - Form a quad register from a pair of D registers.
///
SDNode *ARMDAGToDAGISel::PairDRegs(EVT VT, SDValue V0, SDValue V1) {
DebugLoc dl = V0.getNode()->getDebugLoc();
SDValue SubReg0 = CurDAG->getTargetConstant(ARM::dsub_0, MVT::i32);
SDValue SubReg1 = CurDAG->getTargetConstant(ARM::dsub_1, MVT::i32);
- if (llvm::ModelWithRegSequence()) {
- const SDValue Ops[] = { V0, SubReg0, V1, SubReg1 };
- return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops, 4);
- }
- SDValue Undef =
- SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, VT), 0);
- SDNode *Pair = CurDAG->getMachineNode(TargetOpcode::INSERT_SUBREG, dl,
- VT, Undef, V0, SubReg0);
- return CurDAG->getMachineNode(TargetOpcode::INSERT_SUBREG, dl,
- VT, SDValue(Pair, 0), V1, SubReg1);
+ const SDValue Ops[] = { V0, SubReg0, V1, SubReg1 };
+ return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops, 4);
}
/// PairQRegs - Form 4 consecutive D registers from a pair of Q registers.
@@ -988,6 +985,19 @@ SDNode *ARMDAGToDAGISel::PairQRegs(EVT VT, SDValue V0, SDValue V1) {
return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops, 4);
}
+/// QuadSRegs - Form 4 consecutive S registers.
+///
+SDNode *ARMDAGToDAGISel::QuadSRegs(EVT VT, SDValue V0, SDValue V1,
+ SDValue V2, SDValue V3) {
+ DebugLoc dl = V0.getNode()->getDebugLoc();
+ SDValue SubReg0 = CurDAG->getTargetConstant(ARM::ssub_0, MVT::i32);
+ SDValue SubReg1 = CurDAG->getTargetConstant(ARM::ssub_1, MVT::i32);
+ SDValue SubReg2 = CurDAG->getTargetConstant(ARM::ssub_2, MVT::i32);
+ SDValue SubReg3 = CurDAG->getTargetConstant(ARM::ssub_3, MVT::i32);
+ const SDValue Ops[] = { V0, SubReg0, V1, SubReg1, V2, SubReg2, V3, SubReg3 };
+ return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops, 8);
+}
+
/// QuadDRegs - Form 4 consecutive D registers.
///
SDNode *ARMDAGToDAGISel::QuadDRegs(EVT VT, SDValue V0, SDValue V1,
@@ -1088,7 +1098,7 @@ SDNode *ARMDAGToDAGISel::SelectVLD(SDNode *N, unsigned NumVecs,
std::vector<EVT> ResTys(NumVecs, VT);
ResTys.push_back(MVT::Other);
SDNode *VLd = CurDAG->getMachineNode(Opc, dl, ResTys, Ops, 5);
- if (!llvm::ModelWithRegSequence() || NumVecs < 2)
+ if (NumVecs < 2)
return VLd;
SDValue RegSeq;
@@ -1129,24 +1139,17 @@ SDNode *ARMDAGToDAGISel::SelectVLD(SDNode *N, unsigned NumVecs,
Chain = SDValue(VLd, 2 * NumVecs);
// Combine the even and odd subregs to produce the result.
- if (llvm::ModelWithRegSequence()) {
- if (NumVecs == 1) {
- SDNode *Q = PairDRegs(VT, SDValue(VLd, 0), SDValue(VLd, 1));
- ReplaceUses(SDValue(N, 0), SDValue(Q, 0));
- } else {
- SDValue QQ = SDValue(QuadDRegs(MVT::v4i64,
- SDValue(VLd, 0), SDValue(VLd, 1),
- SDValue(VLd, 2), SDValue(VLd, 3)), 0);
- SDValue Q0 = CurDAG->getTargetExtractSubreg(ARM::qsub_0, dl, VT, QQ);
- SDValue Q1 = CurDAG->getTargetExtractSubreg(ARM::qsub_1, dl, VT, QQ);
- ReplaceUses(SDValue(N, 0), Q0);
- ReplaceUses(SDValue(N, 1), Q1);
- }
+ if (NumVecs == 1) {
+ SDNode *Q = PairDRegs(VT, SDValue(VLd, 0), SDValue(VLd, 1));
+ ReplaceUses(SDValue(N, 0), SDValue(Q, 0));
} else {
- for (unsigned Vec = 0; Vec < NumVecs; ++Vec) {
- SDNode *Q = PairDRegs(VT, SDValue(VLd, 2*Vec), SDValue(VLd, 2*Vec+1));
- ReplaceUses(SDValue(N, Vec), SDValue(Q, 0));
- }
+ SDValue QQ = SDValue(QuadDRegs(MVT::v4i64,
+ SDValue(VLd, 0), SDValue(VLd, 1),
+ SDValue(VLd, 2), SDValue(VLd, 3)), 0);
+ SDValue Q0 = CurDAG->getTargetExtractSubreg(ARM::qsub_0, dl, VT, QQ);
+ SDValue Q1 = CurDAG->getTargetExtractSubreg(ARM::qsub_1, dl, VT, QQ);
+ ReplaceUses(SDValue(N, 0), Q0);
+ ReplaceUses(SDValue(N, 1), Q1);
}
} else {
// Otherwise, quad registers are loaded with two separate instructions,
@@ -1169,37 +1172,27 @@ SDNode *ARMDAGToDAGISel::SelectVLD(SDNode *N, unsigned NumVecs,
SDNode *VLdB = CurDAG->getMachineNode(Opc, dl, ResTys, OpsB, 6);
Chain = SDValue(VLdB, NumVecs+1);
- if (llvm::ModelWithRegSequence()) {
- SDValue V0 = SDValue(VLdA, 0);
- SDValue V1 = SDValue(VLdB, 0);
- SDValue V2 = SDValue(VLdA, 1);
- SDValue V3 = SDValue(VLdB, 1);
- SDValue V4 = SDValue(VLdA, 2);
- SDValue V5 = SDValue(VLdB, 2);
- SDValue V6 = (NumVecs == 3)
- ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,dl,RegVT),
- 0)
- : SDValue(VLdA, 3);
- SDValue V7 = (NumVecs == 3)
- ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,dl,RegVT),
- 0)
- : SDValue(VLdB, 3);
- SDValue RegSeq = SDValue(OctoDRegs(MVT::v8i64, V0, V1, V2, V3,
- V4, V5, V6, V7), 0);
-
- // Extract out the 3 / 4 Q registers.
- assert(ARM::qsub_3 == ARM::qsub_0+3 && "Unexpected subreg numbering");
- for (unsigned Vec = 0; Vec < NumVecs; ++Vec) {
- SDValue Q = CurDAG->getTargetExtractSubreg(ARM::qsub_0+Vec,
- dl, VT, RegSeq);
- ReplaceUses(SDValue(N, Vec), Q);
- }
- } else {
- // Combine the even and odd subregs to produce the result.
- for (unsigned Vec = 0; Vec < NumVecs; ++Vec) {
- SDNode *Q = PairDRegs(VT, SDValue(VLdA, Vec), SDValue(VLdB, Vec));
- ReplaceUses(SDValue(N, Vec), SDValue(Q, 0));
- }
+ SDValue V0 = SDValue(VLdA, 0);
+ SDValue V1 = SDValue(VLdB, 0);
+ SDValue V2 = SDValue(VLdA, 1);
+ SDValue V3 = SDValue(VLdB, 1);
+ SDValue V4 = SDValue(VLdA, 2);
+ SDValue V5 = SDValue(VLdB, 2);
+ SDValue V6 = (NumVecs == 3)
+ ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,dl,RegVT), 0)
+ : SDValue(VLdA, 3);
+ SDValue V7 = (NumVecs == 3)
+ ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,dl,RegVT), 0)
+ : SDValue(VLdB, 3);
+ SDValue RegSeq = SDValue(OctoDRegs(MVT::v8i64, V0, V1, V2, V3,
+ V4, V5, V6, V7), 0);
+
+ // Extract out the 3 / 4 Q registers.
+ assert(ARM::qsub_3 == ARM::qsub_0+3 && "Unexpected subreg numbering");
+ for (unsigned Vec = 0; Vec < NumVecs; ++Vec) {
+ SDValue Q = CurDAG->getTargetExtractSubreg(ARM::qsub_0+Vec,
+ dl, VT, RegSeq);
+ ReplaceUses(SDValue(N, Vec), Q);
}
}
ReplaceUses(SDValue(N, NumVecs), Chain);
@@ -1209,7 +1202,7 @@ SDNode *ARMDAGToDAGISel::SelectVLD(SDNode *N, unsigned NumVecs,
SDNode *ARMDAGToDAGISel::SelectVST(SDNode *N, unsigned NumVecs,
unsigned *DOpcodes, unsigned *QOpcodes0,
unsigned *QOpcodes1) {
- assert(NumVecs >=1 && NumVecs <= 4 && "VST NumVecs out-of-range");
+ assert(NumVecs >= 1 && NumVecs <= 4 && "VST NumVecs out-of-range");
DebugLoc dl = N->getDebugLoc();
SDValue MemAddr, Align;
@@ -1247,7 +1240,7 @@ SDNode *ARMDAGToDAGISel::SelectVST(SDNode *N, unsigned NumVecs,
Ops.push_back(Align);
if (is64BitVector) {
- if (llvm::ModelWithRegSequence() && NumVecs >= 2) {
+ if (NumVecs >= 2) {
SDValue RegSeq;
SDValue V0 = N->getOperand(0+3);
SDValue V1 = N->getOperand(1+3);
@@ -1292,7 +1285,7 @@ SDNode *ARMDAGToDAGISel::SelectVST(SDNode *N, unsigned NumVecs,
// Quad registers are directly supported for VST1 and VST2,
// storing pairs of D regs.
unsigned Opc = QOpcodes0[OpcodeIndex];
- if (llvm::ModelWithRegSequence() && NumVecs == 2) {
+ if (NumVecs == 2) {
// First extract the pair of Q registers.
SDValue Q0 = N->getOperand(3);
SDValue Q1 = N->getOperand(4);
@@ -1330,76 +1323,48 @@ SDNode *ARMDAGToDAGISel::SelectVST(SDNode *N, unsigned NumVecs,
// Otherwise, quad registers are stored with two separate instructions,
// where one stores the even registers and the other stores the odd registers.
- if (llvm::ModelWithRegSequence()) {
- // Form the QQQQ REG_SEQUENCE.
- SDValue V[8];
- for (unsigned Vec = 0, i = 0; Vec < NumVecs; ++Vec, i+=2) {
- V[i] = CurDAG->getTargetExtractSubreg(ARM::dsub_0, dl, RegVT,
- N->getOperand(Vec+3));
- V[i+1] = CurDAG->getTargetExtractSubreg(ARM::dsub_1, dl, RegVT,
- N->getOperand(Vec+3));
- }
- if (NumVecs == 3)
- V[6] = V[7] = SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,
- dl, RegVT), 0);
-
- SDValue RegSeq = SDValue(OctoDRegs(MVT::v8i64, V[0], V[1], V[2], V[3],
- V[4], V[5], V[6], V[7]), 0);
-
- // Store the even D registers.
- assert(ARM::dsub_7 == ARM::dsub_0+7 && "Unexpected subreg numbering");
- Ops.push_back(Reg0); // post-access address offset
- for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
- Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_0+Vec*2, dl,
- RegVT, RegSeq));
- Ops.push_back(Pred);
- Ops.push_back(Reg0); // predicate register
- Ops.push_back(Chain);
- unsigned Opc = QOpcodes0[OpcodeIndex];
- SDNode *VStA = CurDAG->getMachineNode(Opc, dl, MemAddr.getValueType(),
- MVT::Other, Ops.data(), NumVecs+6);
- Chain = SDValue(VStA, 1);
- // Store the odd D registers.
- Ops[0] = SDValue(VStA, 0); // MemAddr
- for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
- Ops[Vec+3] = CurDAG->getTargetExtractSubreg(ARM::dsub_1+Vec*2, dl,
- RegVT, RegSeq);
- Ops[NumVecs+5] = Chain;
- Opc = QOpcodes1[OpcodeIndex];
- SDNode *VStB = CurDAG->getMachineNode(Opc, dl, MemAddr.getValueType(),
- MVT::Other, Ops.data(), NumVecs+6);
- Chain = SDValue(VStB, 1);
- ReplaceUses(SDValue(N, 0), Chain);
- return NULL;
- } else {
- Ops.push_back(Reg0); // post-access address offset
-
- // Store the even subregs.
- for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
- Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_0, dl, RegVT,
- N->getOperand(Vec+3)));
- Ops.push_back(Pred);
- Ops.push_back(Reg0); // predicate register
- Ops.push_back(Chain);
- unsigned Opc = QOpcodes0[OpcodeIndex];
- SDNode *VStA = CurDAG->getMachineNode(Opc, dl, MemAddr.getValueType(),
- MVT::Other, Ops.data(), NumVecs+6);
- Chain = SDValue(VStA, 1);
-
- // Store the odd subregs.
- Ops[0] = SDValue(VStA, 0); // MemAddr
- for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
- Ops[Vec+3] = CurDAG->getTargetExtractSubreg(ARM::dsub_1, dl, RegVT,
- N->getOperand(Vec+3));
- Ops[NumVecs+5] = Chain;
- Opc = QOpcodes1[OpcodeIndex];
- SDNode *VStB = CurDAG->getMachineNode(Opc, dl, MemAddr.getValueType(),
- MVT::Other, Ops.data(), NumVecs+6);
- Chain = SDValue(VStB, 1);
- ReplaceUses(SDValue(N, 0), Chain);
- return NULL;
- }
+ // Form the QQQQ REG_SEQUENCE.
+ SDValue V[8];
+ for (unsigned Vec = 0, i = 0; Vec < NumVecs; ++Vec, i+=2) {
+ V[i] = CurDAG->getTargetExtractSubreg(ARM::dsub_0, dl, RegVT,
+ N->getOperand(Vec+3));
+ V[i+1] = CurDAG->getTargetExtractSubreg(ARM::dsub_1, dl, RegVT,
+ N->getOperand(Vec+3));
+ }
+ if (NumVecs == 3)
+ V[6] = V[7] = SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,
+ dl, RegVT), 0);
+
+ SDValue RegSeq = SDValue(OctoDRegs(MVT::v8i64, V[0], V[1], V[2], V[3],
+ V[4], V[5], V[6], V[7]), 0);
+
+ // Store the even D registers.
+ assert(ARM::dsub_7 == ARM::dsub_0+7 && "Unexpected subreg numbering");
+ Ops.push_back(Reg0); // post-access address offset
+ for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
+ Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_0+Vec*2, dl,
+ RegVT, RegSeq));
+ Ops.push_back(Pred);
+ Ops.push_back(Reg0); // predicate register
+ Ops.push_back(Chain);
+ unsigned Opc = QOpcodes0[OpcodeIndex];
+ SDNode *VStA = CurDAG->getMachineNode(Opc, dl, MemAddr.getValueType(),
+ MVT::Other, Ops.data(), NumVecs+6);
+ Chain = SDValue(VStA, 1);
+
+ // Store the odd D registers.
+ Ops[0] = SDValue(VStA, 0); // MemAddr
+ for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
+ Ops[Vec+3] = CurDAG->getTargetExtractSubreg(ARM::dsub_1+Vec*2, dl,
+ RegVT, RegSeq);
+ Ops[NumVecs+5] = Chain;
+ Opc = QOpcodes1[OpcodeIndex];
+ SDNode *VStB = CurDAG->getMachineNode(Opc, dl, MemAddr.getValueType(),
+ MVT::Other, Ops.data(), NumVecs+6);
+ Chain = SDValue(VStB, 1);
+ ReplaceUses(SDValue(N, 0), Chain);
+ return NULL;
}
SDNode *ARMDAGToDAGISel::SelectVLDSTLane(SDNode *N, bool IsLoad,
@@ -1421,13 +1386,11 @@ SDNode *ARMDAGToDAGISel::SelectVLDSTLane(SDNode *N, bool IsLoad,
// Quad registers are handled by load/store of subregs. Find the subreg info.
unsigned NumElts = 0;
- int SubregIdx = 0;
bool Even = false;
EVT RegVT = VT;
if (!is64BitVector) {
RegVT = GetNEONSubregVT(VT);
NumElts = RegVT.getVectorNumElements();
- SubregIdx = (Lane < NumElts) ? ARM::dsub_0 : ARM::dsub_1;
Even = Lane < NumElts;
}
@@ -1455,35 +1418,26 @@ SDNode *ARMDAGToDAGISel::SelectVLDSTLane(SDNode *N, bool IsLoad,
unsigned Opc = 0;
if (is64BitVector) {
Opc = DOpcodes[OpcodeIndex];
- if (llvm::ModelWithRegSequence()) {
- SDValue RegSeq;
- SDValue V0 = N->getOperand(0+3);
- SDValue V1 = N->getOperand(1+3);
- if (NumVecs == 2) {
- RegSeq = SDValue(PairDRegs(MVT::v2i64, V0, V1), 0);
- } else {
- SDValue V2 = N->getOperand(2+3);
- SDValue V3 = (NumVecs == 3)
- ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,dl,VT), 0)
- : N->getOperand(3+3);
- RegSeq = SDValue(QuadDRegs(MVT::v4i64, V0, V1, V2, V3), 0);
- }
-
- // Now extract the D registers back out.
- Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_0, dl, VT,
- RegSeq));
- Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_1, dl, VT,
- RegSeq));
- if (NumVecs > 2)
- Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_2, dl, VT,
- RegSeq));
- if (NumVecs > 3)
- Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_3, dl, VT,
- RegSeq));
+ SDValue RegSeq;
+ SDValue V0 = N->getOperand(0+3);
+ SDValue V1 = N->getOperand(1+3);
+ if (NumVecs == 2) {
+ RegSeq = SDValue(PairDRegs(MVT::v2i64, V0, V1), 0);
} else {
- for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
- Ops.push_back(N->getOperand(Vec+3));
+ SDValue V2 = N->getOperand(2+3);
+ SDValue V3 = (NumVecs == 3)
+ ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,dl,VT), 0)
+ : N->getOperand(3+3);
+ RegSeq = SDValue(QuadDRegs(MVT::v4i64, V0, V1, V2, V3), 0);
}
+
+ // Now extract the D registers back out.
+ Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_0, dl, VT, RegSeq));
+ Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_1, dl, VT, RegSeq));
+ if (NumVecs > 2)
+ Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_2, dl, VT,RegSeq));
+ if (NumVecs > 3)
+ Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_3, dl, VT,RegSeq));
} else {
// Check if this is loading the even or odd subreg of a Q register.
if (Lane < NumElts) {
@@ -1493,31 +1447,24 @@ SDNode *ARMDAGToDAGISel::SelectVLDSTLane(SDNode *N, bool IsLoad,
Opc = QOpcodes1[OpcodeIndex];
}
- if (llvm::ModelWithRegSequence()) {
- SDValue RegSeq;
- SDValue V0 = N->getOperand(0+3);
- SDValue V1 = N->getOperand(1+3);
- if (NumVecs == 2) {
- RegSeq = SDValue(PairQRegs(MVT::v4i64, V0, V1), 0);
- } else {
- SDValue V2 = N->getOperand(2+3);
- SDValue V3 = (NumVecs == 3)
- ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,dl,VT), 0)
- : N->getOperand(3+3);
- RegSeq = SDValue(QuadQRegs(MVT::v8i64, V0, V1, V2, V3), 0);
- }
-
- // Extract the subregs of the input vector.
- unsigned SubIdx = Even ? ARM::dsub_0 : ARM::dsub_1;
- for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
- Ops.push_back(CurDAG->getTargetExtractSubreg(SubIdx+Vec*2, dl, RegVT,
- RegSeq));
+ SDValue RegSeq;
+ SDValue V0 = N->getOperand(0+3);
+ SDValue V1 = N->getOperand(1+3);
+ if (NumVecs == 2) {
+ RegSeq = SDValue(PairQRegs(MVT::v4i64, V0, V1), 0);
} else {
- // Extract the subregs of the input vector.
- for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
- Ops.push_back(CurDAG->getTargetExtractSubreg(SubregIdx, dl, RegVT,
- N->getOperand(Vec+3)));
+ SDValue V2 = N->getOperand(2+3);
+ SDValue V3 = (NumVecs == 3)
+ ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,dl,VT), 0)
+ : N->getOperand(3+3);
+ RegSeq = SDValue(QuadQRegs(MVT::v8i64, V0, V1, V2, V3), 0);
}
+
+ // Extract the subregs of the input vector.
+ unsigned SubIdx = Even ? ARM::dsub_0 : ARM::dsub_1;
+ for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
+ Ops.push_back(CurDAG->getTargetExtractSubreg(SubIdx+Vec*2, dl, RegVT,
+ RegSeq));
}
Ops.push_back(getI32Imm(Lane));
Ops.push_back(Pred);
@@ -1531,76 +1478,97 @@ SDNode *ARMDAGToDAGISel::SelectVLDSTLane(SDNode *N, bool IsLoad,
ResTys.push_back(MVT::Other);
SDNode *VLdLn = CurDAG->getMachineNode(Opc, dl, ResTys, Ops.data(),NumVecs+6);
- if (llvm::ModelWithRegSequence()) {
- // Form a REG_SEQUENCE to force register allocation.
- SDValue RegSeq;
- if (is64BitVector) {
- SDValue V0 = SDValue(VLdLn, 0);
- SDValue V1 = SDValue(VLdLn, 1);
- if (NumVecs == 2) {
- RegSeq = SDValue(PairDRegs(MVT::v2i64, V0, V1), 0);
- } else {
- SDValue V2 = SDValue(VLdLn, 2);
- // If it's a vld3, form a quad D-register but discard the last part.
- SDValue V3 = (NumVecs == 3)
- ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,dl,VT), 0)
- : SDValue(VLdLn, 3);
- RegSeq = SDValue(QuadDRegs(MVT::v4i64, V0, V1, V2, V3), 0);
- }
+ // Form a REG_SEQUENCE to force register allocation.
+ SDValue RegSeq;
+ if (is64BitVector) {
+ SDValue V0 = SDValue(VLdLn, 0);
+ SDValue V1 = SDValue(VLdLn, 1);
+ if (NumVecs == 2) {
+ RegSeq = SDValue(PairDRegs(MVT::v2i64, V0, V1), 0);
} else {
- // For 128-bit vectors, take the 64-bit results of the load and insert them
- // as subregs into the result.
- SDValue V[8];
- for (unsigned Vec = 0, i = 0; Vec < NumVecs; ++Vec, i+=2) {
- if (Even) {
- V[i] = SDValue(VLdLn, Vec);
- V[i+1] = SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,
- dl, RegVT), 0);
- } else {
- V[i] = SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,
- dl, RegVT), 0);
- V[i+1] = SDValue(VLdLn, Vec);
- }
+ SDValue V2 = SDValue(VLdLn, 2);
+ // If it's a vld3, form a quad D-register but discard the last part.
+ SDValue V3 = (NumVecs == 3)
+ ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,dl,VT), 0)
+ : SDValue(VLdLn, 3);
+ RegSeq = SDValue(QuadDRegs(MVT::v4i64, V0, V1, V2, V3), 0);
+ }
+ } else {
+ // For 128-bit vectors, take the 64-bit results of the load and insert
+ // them as subregs into the result.
+ SDValue V[8];
+ for (unsigned Vec = 0, i = 0; Vec < NumVecs; ++Vec, i+=2) {
+ if (Even) {
+ V[i] = SDValue(VLdLn, Vec);
+ V[i+1] = SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,
+ dl, RegVT), 0);
+ } else {
+ V[i] = SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,
+ dl, RegVT), 0);
+ V[i+1] = SDValue(VLdLn, Vec);
}
- if (NumVecs == 3)
- V[6] = V[7] = SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,
- dl, RegVT), 0);
-
- if (NumVecs == 2)
- RegSeq = SDValue(QuadDRegs(MVT::v4i64, V[0], V[1], V[2], V[3]), 0);
- else
- RegSeq = SDValue(OctoDRegs(MVT::v8i64, V[0], V[1], V[2], V[3],
- V[4], V[5], V[6], V[7]), 0);
}
+ if (NumVecs == 3)
+ V[6] = V[7] = SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,
+ dl, RegVT), 0);
- assert(ARM::dsub_7 == ARM::dsub_0+7 && "Unexpected subreg numbering");
- assert(ARM::qsub_3 == ARM::qsub_0+3 && "Unexpected subreg numbering");
- unsigned SubIdx = is64BitVector ? ARM::dsub_0 : ARM::qsub_0;
- for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
- ReplaceUses(SDValue(N, Vec),
- CurDAG->getTargetExtractSubreg(SubIdx+Vec, dl, VT, RegSeq));
- ReplaceUses(SDValue(N, NumVecs), SDValue(VLdLn, NumVecs));
- return NULL;
- }
-
- // For a 64-bit vector load to D registers, nothing more needs to be done.
- if (is64BitVector)
- return VLdLn;
-
- // For 128-bit vectors, take the 64-bit results of the load and insert them
- // as subregs into the result.
- for (unsigned Vec = 0; Vec < NumVecs; ++Vec) {
- SDValue QuadVec = CurDAG->getTargetInsertSubreg(SubregIdx, dl, VT,
- N->getOperand(Vec+3),
- SDValue(VLdLn, Vec));
- ReplaceUses(SDValue(N, Vec), QuadVec);
+ if (NumVecs == 2)
+ RegSeq = SDValue(QuadDRegs(MVT::v4i64, V[0], V[1], V[2], V[3]), 0);
+ else
+ RegSeq = SDValue(OctoDRegs(MVT::v8i64, V[0], V[1], V[2], V[3],
+ V[4], V[5], V[6], V[7]), 0);
}
- Chain = SDValue(VLdLn, NumVecs);
- ReplaceUses(SDValue(N, NumVecs), Chain);
+ assert(ARM::dsub_7 == ARM::dsub_0+7 && "Unexpected subreg numbering");
+ assert(ARM::qsub_3 == ARM::qsub_0+3 && "Unexpected subreg numbering");
+ unsigned SubIdx = is64BitVector ? ARM::dsub_0 : ARM::qsub_0;
+ for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
+ ReplaceUses(SDValue(N, Vec),
+ CurDAG->getTargetExtractSubreg(SubIdx+Vec, dl, VT, RegSeq));
+ ReplaceUses(SDValue(N, NumVecs), SDValue(VLdLn, NumVecs));
return NULL;
}
+SDNode *ARMDAGToDAGISel::SelectVTBL(SDNode *N, bool IsExt, unsigned NumVecs,
+ unsigned Opc) {
+ assert(NumVecs >= 2 && NumVecs <= 4 && "VTBL NumVecs out-of-range");
+ DebugLoc dl = N->getDebugLoc();
+ EVT VT = N->getValueType(0);
+ unsigned FirstTblReg = IsExt ? 2 : 1;
+
+ // Form a REG_SEQUENCE to force register allocation.
+ SDValue RegSeq;
+ SDValue V0 = N->getOperand(FirstTblReg + 0);
+ SDValue V1 = N->getOperand(FirstTblReg + 1);
+ if (NumVecs == 2)
+ RegSeq = SDValue(PairDRegs(MVT::v16i8, V0, V1), 0);
+ else {
+ SDValue V2 = N->getOperand(FirstTblReg + 2);
+ // If it's a vtbl3, form a quad D-register and leave the last part as
+ // an undef.
+ SDValue V3 = (NumVecs == 3)
+ ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, VT), 0)
+ : N->getOperand(FirstTblReg + 3);
+ RegSeq = SDValue(QuadDRegs(MVT::v4i64, V0, V1, V2, V3), 0);
+ }
+
+ // Now extract the D registers back out.
+ SmallVector<SDValue, 6> Ops;
+ if (IsExt)
+ Ops.push_back(N->getOperand(1));
+ Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_0, dl, VT, RegSeq));
+ Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_1, dl, VT, RegSeq));
+ if (NumVecs > 2)
+ Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_2, dl, VT, RegSeq));
+ if (NumVecs > 3)
+ Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_3, dl, VT, RegSeq));
+
+ Ops.push_back(N->getOperand(FirstTblReg + NumVecs));
+ Ops.push_back(getAL(CurDAG)); // predicate
+ Ops.push_back(CurDAG->getRegister(0, MVT::i32)); // predicate register
+ return CurDAG->getMachineNode(Opc, dl, VT, Ops.data(), Ops.size());
+}
+
SDNode *ARMDAGToDAGISel::SelectV6T2BitfieldExtractOp(SDNode *N,
bool isSigned) {
if (!Subtarget->hasV6T2Ops())
@@ -1954,8 +1922,8 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
SDValue ShImmOp = CurDAG->getTargetConstant(ShImm, MVT::i32);
SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
if (Subtarget->isThumb()) {
- SDValue Ops[] = { V, V, ShImmOp, getAL(CurDAG), Reg0 };
- return CurDAG->SelectNodeTo(N, ARM::t2RSBrs, MVT::i32, Ops, 5);
+ SDValue Ops[] = { V, V, ShImmOp, getAL(CurDAG), Reg0, Reg0 };
+ return CurDAG->SelectNodeTo(N, ARM::t2RSBrs, MVT::i32, Ops, 6);
} else {
SDValue Ops[] = { V, V, Reg0, ShImmOp, getAL(CurDAG), Reg0, Reg0 };
return CurDAG->SelectNodeTo(N, ARM::RSBrs, MVT::i32, Ops, 7);
@@ -2015,7 +1983,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
SDValue Ops[] = { N->getOperand(0), N->getOperand(1),
getAL(CurDAG), CurDAG->getRegister(0, MVT::i32),
CurDAG->getRegister(0, MVT::i32) };
- return CurDAG->getMachineNode(ARM::t2UMULL, dl, MVT::i32, MVT::i32, Ops,4);
+ return CurDAG->getMachineNode(ARM::t2UMULL, dl, MVT::i32, MVT::i32,Ops,4);
} else {
SDValue Ops[] = { N->getOperand(0), N->getOperand(1),
getAL(CurDAG), CurDAG->getRegister(0, MVT::i32),
@@ -2029,7 +1997,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
if (Subtarget->isThumb()) {
SDValue Ops[] = { N->getOperand(0), N->getOperand(1),
getAL(CurDAG), CurDAG->getRegister(0, MVT::i32) };
- return CurDAG->getMachineNode(ARM::t2SMULL, dl, MVT::i32, MVT::i32, Ops,4);
+ return CurDAG->getMachineNode(ARM::t2SMULL, dl, MVT::i32, MVT::i32,Ops,4);
} else {
SDValue Ops[] = { N->getOperand(0), N->getOperand(1),
getAL(CurDAG), CurDAG->getRegister(0, MVT::i32),
@@ -2211,6 +2179,22 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
SDValue Ops[] = { N->getOperand(0), N->getOperand(1), Pred, PredReg };
return CurDAG->getMachineNode(Opc, dl, VT, VT, Ops, 4);
}
+ case ARMISD::BUILD_VECTOR: {
+ EVT VecVT = N->getValueType(0);
+ EVT EltVT = VecVT.getVectorElementType();
+ unsigned NumElts = VecVT.getVectorNumElements();
+ if (EltVT.getSimpleVT() == MVT::f64) {
+ assert(NumElts == 2 && "unexpected type for BUILD_VECTOR");
+ return PairDRegs(VecVT, N->getOperand(0), N->getOperand(1));
+ }
+ assert(EltVT.getSimpleVT() == MVT::f32 &&
+ "unexpected type for BUILD_VECTOR");
+ if (NumElts == 2)
+ return PairSRegs(VecVT, N->getOperand(0), N->getOperand(1));
+ assert(NumElts == 4 && "unexpected type for BUILD_VECTOR");
+ return QuadSRegs(VecVT, N->getOperand(0), N->getOperand(1),
+ N->getOperand(2), N->getOperand(3));
+ }
case ISD::INTRINSIC_VOID:
case ISD::INTRINSIC_W_CHAIN: {
@@ -2342,6 +2326,29 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
break;
}
+ case ISD::INTRINSIC_WO_CHAIN: {
+ unsigned IntNo = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
+ switch (IntNo) {
+ default:
+ break;
+
+ case Intrinsic::arm_neon_vtbl2:
+ return SelectVTBL(N, false, 2, ARM::VTBL2);
+ case Intrinsic::arm_neon_vtbl3:
+ return SelectVTBL(N, false, 3, ARM::VTBL3);
+ case Intrinsic::arm_neon_vtbl4:
+ return SelectVTBL(N, false, 4, ARM::VTBL4);
+
+ case Intrinsic::arm_neon_vtbx2:
+ return SelectVTBL(N, true, 2, ARM::VTBX2);
+ case Intrinsic::arm_neon_vtbx3:
+ return SelectVTBL(N, true, 3, ARM::VTBX3);
+ case Intrinsic::arm_neon_vtbx4:
+ return SelectVTBL(N, true, 4, ARM::VTBX4);
+ }
+ break;
+ }
+
case ISD::CONCAT_VECTORS:
return SelectConcatVector(N);
}
@@ -2367,9 +2374,3 @@ FunctionPass *llvm::createARMISelDag(ARMBaseTargetMachine &TM,
CodeGenOpt::Level OptLevel) {
return new ARMDAGToDAGISel(TM, OptLevel);
}
-
-/// ModelWithRegSequence - Return true if isel should use REG_SEQUENCE to model
-/// operations involving sub-registers.
-bool llvm::ModelWithRegSequence() {
- return UseRegSeq;
-}
diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp
index b8126a3c5d18..98d8b8585428 100644
--- a/lib/Target/ARM/ARMISelLowering.cpp
+++ b/lib/Target/ARM/ARMISelLowering.cpp
@@ -12,6 +12,7 @@
//
//===----------------------------------------------------------------------===//
+#define DEBUG_TYPE "arm-isel"
#include "ARM.h"
#include "ARMAddressingModes.h"
#include "ARMConstantPoolValue.h"
@@ -40,6 +41,7 @@
#include "llvm/MC/MCSectionMachO.h"
#include "llvm/Target/TargetOptions.h"
#include "llvm/ADT/VectorExtras.h"
+#include "llvm/ADT/Statistic.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MathExtras.h"
@@ -47,9 +49,27 @@
#include <sstream>
using namespace llvm;
+STATISTIC(NumTailCalls, "Number of tail calls");
+
+// This option should go away when tail calls fully work.
+static cl::opt<bool>
+EnableARMTailCalls("arm-tail-calls", cl::Hidden,
+ cl::desc("Generate tail calls (TEMPORARY OPTION)."),
+ cl::init(true));
+
static cl::opt<bool>
EnableARMLongCalls("arm-long-calls", cl::Hidden,
- cl::desc("Generate calls via indirect call instructions."),
+ cl::desc("Generate calls via indirect call instructions"),
+ cl::init(false));
+
+static cl::opt<bool>
+ARMInterworking("arm-interworking", cl::Hidden,
+ cl::desc("Enable / disable ARM interworking (for debugging only)"),
+ cl::init(true));
+
+static cl::opt<bool>
+EnableARMCodePlacement("arm-code-placement", cl::Hidden,
+ cl::desc("Enable code placement pass for ARM"),
cl::init(false));
static bool CC_ARM_APCS_Custom_f64(unsigned &ValNo, EVT &ValVT, EVT &LocVT,
@@ -94,10 +114,7 @@ void ARMTargetLowering::addTypeForNEON(EVT VT, EVT PromotedLdStVT,
}
setOperationAction(ISD::BUILD_VECTOR, VT.getSimpleVT(), Custom);
setOperationAction(ISD::VECTOR_SHUFFLE, VT.getSimpleVT(), Custom);
- if (llvm::ModelWithRegSequence())
- setOperationAction(ISD::CONCAT_VECTORS, VT.getSimpleVT(), Legal);
- else
- setOperationAction(ISD::CONCAT_VECTORS, VT.getSimpleVT(), Custom);
+ setOperationAction(ISD::CONCAT_VECTORS, VT.getSimpleVT(), Legal);
setOperationAction(ISD::EXTRACT_SUBVECTOR, VT.getSimpleVT(), Expand);
setOperationAction(ISD::SELECT, VT.getSimpleVT(), Expand);
setOperationAction(ISD::SELECT_CC, VT.getSimpleVT(), Expand);
@@ -393,13 +410,57 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
// doesn't yet know how to not do that for SjLj.
setExceptionSelectorRegister(ARM::R0);
setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Expand);
- setOperationAction(ISD::MEMBARRIER, MVT::Other, Custom);
-
- // If the subtarget does not have extract instructions, sign_extend_inreg
- // needs to be expanded. Extract is available in ARM mode on v6 and up,
- // and on most Thumb2 implementations.
- if ((!Subtarget->isThumb() && !Subtarget->hasV6Ops())
- || (Subtarget->isThumb2() && !Subtarget->hasT2ExtractPack())) {
+ // Handle atomics directly for ARMv[67] (except for Thumb1), otherwise
+ // use the default expansion.
+ bool canHandleAtomics =
+ (Subtarget->hasV7Ops() ||
+ (Subtarget->hasV6Ops() && !Subtarget->isThumb1Only()));
+ if (canHandleAtomics) {
+ // membarrier needs custom lowering; the rest are legal and handled
+ // normally.
+ setOperationAction(ISD::MEMBARRIER, MVT::Other, Custom);
+ } else {
+ // Set them all for expansion, which will force libcalls.
+ setOperationAction(ISD::MEMBARRIER, MVT::Other, Expand);
+ setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i8, Expand);
+ setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i16, Expand);
+ setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i32, Expand);
+ setOperationAction(ISD::ATOMIC_SWAP, MVT::i8, Expand);
+ setOperationAction(ISD::ATOMIC_SWAP, MVT::i16, Expand);
+ setOperationAction(ISD::ATOMIC_SWAP, MVT::i32, Expand);
+ setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i8, Expand);
+ setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i16, Expand);
+ setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i32, Expand);
+ setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i8, Expand);
+ setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i16, Expand);
+ setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i32, Expand);
+ setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i8, Expand);
+ setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i16, Expand);
+ setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i32, Expand);
+ setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i8, Expand);
+ setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i16, Expand);
+ setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i32, Expand);
+ setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i8, Expand);
+ setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i16, Expand);
+ setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i32, Expand);
+ setOperationAction(ISD::ATOMIC_LOAD_NAND, MVT::i8, Expand);
+ setOperationAction(ISD::ATOMIC_LOAD_NAND, MVT::i16, Expand);
+ setOperationAction(ISD::ATOMIC_LOAD_NAND, MVT::i32, Expand);
+ // Since the libcalls include locking, fold in the fences
+ setShouldFoldAtomicFences(true);
+ }
+ // 64-bit versions are always libcalls (for now)
+ setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i64, Expand);
+ setOperationAction(ISD::ATOMIC_SWAP, MVT::i64, Expand);
+ setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i64, Expand);
+ setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i64, Expand);
+ setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i64, Expand);
+ setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i64, Expand);
+ setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i64, Expand);
+ setOperationAction(ISD::ATOMIC_LOAD_NAND, MVT::i64, Expand);
+
+ // Requires SXTB/SXTH, available on v6 and up in both ARM and Thumb modes.
+ if (!Subtarget->hasV6Ops()) {
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Expand);
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8, Expand);
}
@@ -412,8 +473,10 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
// We want to custom lower some of our intrinsics.
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
- setOperationAction(ISD::EH_SJLJ_SETJMP, MVT::i32, Custom);
- setOperationAction(ISD::EH_SJLJ_LONGJMP, MVT::Other, Custom);
+ if (Subtarget->isTargetDarwin()) {
+ setOperationAction(ISD::EH_SJLJ_SETJMP, MVT::i32, Custom);
+ setOperationAction(ISD::EH_SJLJ_LONGJMP, MVT::Other, Custom);
+ }
setOperationAction(ISD::SETCC, MVT::i32, Expand);
setOperationAction(ISD::SETCC, MVT::f32, Expand);
@@ -474,28 +537,14 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
else
setSchedulingPreference(Sched::Hybrid);
- // FIXME: If-converter should use instruction latency to determine
- // profitability rather than relying on fixed limits.
- if (Subtarget->getCPUString() == "generic") {
- // Generic (and overly aggressive) if-conversion limits.
- setIfCvtBlockSizeLimit(10);
- setIfCvtDupBlockSizeLimit(2);
- } else if (Subtarget->hasV7Ops()) {
- setIfCvtBlockSizeLimit(3);
- setIfCvtDupBlockSizeLimit(1);
- } else if (Subtarget->hasV6Ops()) {
- setIfCvtBlockSizeLimit(2);
- setIfCvtDupBlockSizeLimit(1);
- } else {
- setIfCvtBlockSizeLimit(3);
- setIfCvtDupBlockSizeLimit(2);
- }
-
maxStoresPerMemcpy = 1; //// temporary - rewrite interface to use type
- // Do not enable CodePlacementOpt for now: it currently runs after the
- // ARMConstantIslandPass and messes up branch relaxation and placement
- // of constant islands.
- // benefitFromCodePlacementOpt = true;
+
+ // On ARM arguments smaller than 4 bytes are extended, so all arguments
+ // are at least 4 bytes aligned.
+ setMinStackArgumentAlignment(4);
+
+ if (EnableARMCodePlacement)
+ benefitFromCodePlacementOpt = true;
}
const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const {
@@ -537,6 +586,8 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const {
case ARMISD::EH_SJLJ_SETJMP: return "ARMISD::EH_SJLJ_SETJMP";
case ARMISD::EH_SJLJ_LONGJMP:return "ARMISD::EH_SJLJ_LONGJMP";
+ case ARMISD::TC_RETURN: return "ARMISD::TC_RETURN";
+
case ARMISD::THREAD_POINTER:return "ARMISD::THREAD_POINTER";
case ARMISD::DYN_ALLOC: return "ARMISD::DYN_ALLOC";
@@ -581,6 +632,7 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const {
case ARMISD::VZIP: return "ARMISD::VZIP";
case ARMISD::VUZP: return "ARMISD::VUZP";
case ARMISD::VTRN: return "ARMISD::VTRN";
+ case ARMISD::BUILD_VECTOR: return "ARMISD::BUILD_VECTOR";
case ARMISD::FMAX: return "ARMISD::FMAX";
case ARMISD::FMIN: return "ARMISD::FMIN";
}
@@ -603,15 +655,33 @@ TargetRegisterClass *ARMTargetLowering::getRegClassFor(EVT VT) const {
/// getFunctionAlignment - Return the Log2 alignment of this function.
unsigned ARMTargetLowering::getFunctionAlignment(const Function *F) const {
- return getTargetMachine().getSubtarget<ARMSubtarget>().isThumb() ? 0 : 1;
+ return getTargetMachine().getSubtarget<ARMSubtarget>().isThumb() ? 1 : 2;
}
Sched::Preference ARMTargetLowering::getSchedulingPreference(SDNode *N) const {
- for (unsigned i = 0, e = N->getNumValues(); i != e; ++i) {
+ unsigned NumVals = N->getNumValues();
+ if (!NumVals)
+ return Sched::RegPressure;
+
+ for (unsigned i = 0; i != NumVals; ++i) {
EVT VT = N->getValueType(i);
if (VT.isFloatingPoint() || VT.isVector())
return Sched::Latency;
}
+
+ if (!N->isMachineOpcode())
+ return Sched::RegPressure;
+
+ // Load are scheduled for latency even if there instruction itinerary
+ // is not available.
+ const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+ const TargetInstrDesc &TID = TII->get(N->getMachineOpcode());
+ if (TID.mayLoad())
+ return Sched::Latency;
+
+ const InstrItineraryData &Itins = getTargetMachine().getInstrItineraryData();
+ if (!Itins.isEmpty() && Itins.getStageLatency(TID.getSchedClass()) > 2)
+ return Sched::Latency;
return Sched::RegPressure;
}
@@ -964,11 +1034,28 @@ ARMTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
CallingConv::ID CallConv, bool isVarArg,
bool &isTailCall,
const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
const SmallVectorImpl<ISD::InputArg> &Ins,
DebugLoc dl, SelectionDAG &DAG,
SmallVectorImpl<SDValue> &InVals) const {
- // ARM target does not yet support tail call optimization.
- isTailCall = false;
+ MachineFunction &MF = DAG.getMachineFunction();
+ bool IsStructRet = (Outs.empty()) ? false : Outs[0].Flags.isSRet();
+ bool IsSibCall = false;
+ // Temporarily disable tail calls so things don't break.
+ if (!EnableARMTailCalls)
+ isTailCall = false;
+ if (isTailCall) {
+ // Check if it's really possible to do a tail call.
+ isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv,
+ isVarArg, IsStructRet, MF.getFunction()->hasStructRetAttr(),
+ Outs, OutVals, Ins, DAG);
+ // We don't support GuaranteedTailCallOpt for ARM, only automatically
+ // detected sibcalls.
+ if (isTailCall) {
+ ++NumTailCalls;
+ IsSibCall = true;
+ }
+ }
// Analyze operands of the call, assigning locations to each operand.
SmallVector<CCValAssign, 16> ArgLocs;
@@ -981,9 +1068,14 @@ ARMTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
// Get a count of how many bytes are to be pushed on the stack.
unsigned NumBytes = CCInfo.getNextStackOffset();
+ // For tail calls, memory operands are available in our caller's stack.
+ if (IsSibCall)
+ NumBytes = 0;
+
// Adjust the stack pointer for the new arguments...
// These operations are automatically eliminated by the prolog/epilog pass
- Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true));
+ if (!IsSibCall)
+ Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true));
SDValue StackPtr = DAG.getCopyFromReg(Chain, dl, ARM::SP, getPointerTy());
@@ -996,7 +1088,7 @@ ARMTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
i != e;
++i, ++realArgIdx) {
CCValAssign &VA = ArgLocs[i];
- SDValue Arg = Outs[realArgIdx].Val;
+ SDValue Arg = OutVals[realArgIdx];
ISD::ArgFlagsTy Flags = Outs[realArgIdx].Flags;
// Promote the value if needed.
@@ -1044,7 +1136,7 @@ ARMTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
}
} else if (VA.isRegLoc()) {
RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
- } else {
+ } else if (!IsSibCall) {
assert(VA.isMemLoc());
MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, Arg,
@@ -1059,10 +1151,32 @@ ARMTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
// Build a sequence of copy-to-reg nodes chained together with token chain
// and flag operands which copy the outgoing args into the appropriate regs.
SDValue InFlag;
- for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
- Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
- RegsToPass[i].second, InFlag);
- InFlag = Chain.getValue(1);
+ // Tail call byval lowering might overwrite argument registers so in case of
+ // tail call optimization the copies to registers are lowered later.
+ if (!isTailCall)
+ for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
+ Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
+ RegsToPass[i].second, InFlag);
+ InFlag = Chain.getValue(1);
+ }
+
+ // For tail calls lower the arguments to the 'real' stack slot.
+ if (isTailCall) {
+ // Force all the incoming stack arguments to be loaded from the stack
+ // before any new outgoing arguments are stored to the stack, because the
+ // outgoing stack slots may alias the incoming argument stack slots, and
+ // the alias isn't otherwise explicit. This is slightly more conservative
+ // than necessary, because it means that each store effectively depends
+ // on every argument instead of just those arguments it would clobber.
+
+ // Do not flag preceeding copytoreg stuff together with the following stuff.
+ InFlag = SDValue();
+ for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
+ Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
+ RegsToPass[i].second, InFlag);
+ InFlag = Chain.getValue(1);
+ }
+ InFlag =SDValue();
}
// If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
@@ -1071,7 +1185,6 @@ ARMTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
bool isDirect = false;
bool isARMFunc = false;
bool isLocalARMFunc = false;
- MachineFunction &MF = DAG.getMachineFunction();
ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
if (EnableARMLongCalls) {
@@ -1117,7 +1230,7 @@ ARMTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
getTargetMachine().getRelocationModel() != Reloc::Static;
isARMFunc = !Subtarget->isThumb() || isStub;
// ARM call to a local ARM function is predicable.
- isLocalARMFunc = !Subtarget->isThumb() && !isExt;
+ isLocalARMFunc = !Subtarget->isThumb() && (!isExt || !ARMInterworking);
// tBX takes a register source operand.
if (isARMFunc && Subtarget->isThumb1Only() && !Subtarget->hasV5TOps()) {
unsigned ARMPCLabelIndex = AFI->createConstPoolEntryUId();
@@ -1134,7 +1247,7 @@ ARMTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
Callee = DAG.getNode(ARMISD::PIC_ADD, dl,
getPointerTy(), Callee, PICLabel);
} else
- Callee = DAG.getTargetGlobalAddress(GV, getPointerTy());
+ Callee = DAG.getTargetGlobalAddress(GV, dl, getPointerTy());
} else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
isDirect = true;
bool isStub = Subtarget->isTargetDarwin() &&
@@ -1171,11 +1284,6 @@ ARMTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
? (isLocalARMFunc ? ARMISD::CALL_PRED : ARMISD::CALL)
: ARMISD::CALL_NOLINK;
}
- if (CallOpc == ARMISD::CALL_NOLINK && !Subtarget->isThumb1Only()) {
- // implicit def LR - LR mustn't be allocated as GRP:$dst of CALL_NOLINK
- Chain = DAG.getCopyToReg(Chain, dl, ARM::LR, DAG.getUNDEF(MVT::i32),InFlag);
- InFlag = Chain.getValue(1);
- }
std::vector<SDValue> Ops;
Ops.push_back(Chain);
@@ -1189,9 +1297,13 @@ ARMTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
if (InFlag.getNode())
Ops.push_back(InFlag);
+
+ SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Flag);
+ if (isTailCall)
+ return DAG.getNode(ARMISD::TC_RETURN, dl, NodeTys, &Ops[0], Ops.size());
+
// Returns a chain and a flag for retval copy to use.
- Chain = DAG.getNode(CallOpc, dl, DAG.getVTList(MVT::Other, MVT::Flag),
- &Ops[0], Ops.size());
+ Chain = DAG.getNode(CallOpc, dl, NodeTys, &Ops[0], Ops.size());
InFlag = Chain.getValue(1);
Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, true),
@@ -1205,10 +1317,203 @@ ARMTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
dl, DAG, InVals);
}
+/// MatchingStackOffset - Return true if the given stack call argument is
+/// already available in the same position (relatively) of the caller's
+/// incoming argument stack.
+static
+bool MatchingStackOffset(SDValue Arg, unsigned Offset, ISD::ArgFlagsTy Flags,
+ MachineFrameInfo *MFI, const MachineRegisterInfo *MRI,
+ const ARMInstrInfo *TII) {
+ unsigned Bytes = Arg.getValueType().getSizeInBits() / 8;
+ int FI = INT_MAX;
+ if (Arg.getOpcode() == ISD::CopyFromReg) {
+ unsigned VR = cast<RegisterSDNode>(Arg.getOperand(1))->getReg();
+ if (!VR || TargetRegisterInfo::isPhysicalRegister(VR))
+ return false;
+ MachineInstr *Def = MRI->getVRegDef(VR);
+ if (!Def)
+ return false;
+ if (!Flags.isByVal()) {
+ if (!TII->isLoadFromStackSlot(Def, FI))
+ return false;
+ } else {
+ return false;
+ }
+ } else if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Arg)) {
+ if (Flags.isByVal())
+ // ByVal argument is passed in as a pointer but it's now being
+ // dereferenced. e.g.
+ // define @foo(%struct.X* %A) {
+ // tail call @bar(%struct.X* byval %A)
+ // }
+ return false;
+ SDValue Ptr = Ld->getBasePtr();
+ FrameIndexSDNode *FINode = dyn_cast<FrameIndexSDNode>(Ptr);
+ if (!FINode)
+ return false;
+ FI = FINode->getIndex();
+ } else
+ return false;
+
+ assert(FI != INT_MAX);
+ if (!MFI->isFixedObjectIndex(FI))
+ return false;
+ return Offset == MFI->getObjectOffset(FI) && Bytes == MFI->getObjectSize(FI);
+}
+
+/// IsEligibleForTailCallOptimization - Check whether the call is eligible
+/// for tail call optimization. Targets which want to do tail call
+/// optimization should implement this function.
+bool
+ARMTargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
+ CallingConv::ID CalleeCC,
+ bool isVarArg,
+ bool isCalleeStructRet,
+ bool isCallerStructRet,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ SelectionDAG& DAG) const {
+ const Function *CallerF = DAG.getMachineFunction().getFunction();
+ CallingConv::ID CallerCC = CallerF->getCallingConv();
+ bool CCMatch = CallerCC == CalleeCC;
+
+ // Look for obvious safe cases to perform tail call optimization that do not
+ // require ABI changes. This is what gcc calls sibcall.
+
+ // Do not sibcall optimize vararg calls unless the call site is not passing
+ // any arguments.
+ if (isVarArg && !Outs.empty())
+ return false;
+
+ // Also avoid sibcall optimization if either caller or callee uses struct
+ // return semantics.
+ if (isCalleeStructRet || isCallerStructRet)
+ return false;
+
+ // FIXME: Completely disable sibcall for Thumb1 since Thumb1RegisterInfo::
+ // emitEpilogue is not ready for them.
+ // Doing this is tricky, since the LDM/POP instruction on Thumb doesn't take
+ // LR. This means if we need to reload LR, it takes an extra instructions,
+ // which outweighs the value of the tail call; but here we don't know yet
+ // whether LR is going to be used. Probably the right approach is to
+ // generate the tail call here and turn it back into CALL/RET in
+ // emitEpilogue if LR is used.
+ if (Subtarget->isThumb1Only())
+ return false;
+
+ // For the moment, we can only do this to functions defined in this
+ // compilation, or to indirect calls. A Thumb B to an ARM function,
+ // or vice versa, is not easily fixed up in the linker unlike BL.
+ // (We could do this by loading the address of the callee into a register;
+ // that is an extra instruction over the direct call and burns a register
+ // as well, so is not likely to be a win.)
+
+ // It might be safe to remove this restriction on non-Darwin.
+
+ // Thumb1 PIC calls to external symbols use BX, so they can be tail calls,
+ // but we need to make sure there are enough registers; the only valid
+ // registers are the 4 used for parameters. We don't currently do this
+ // case.
+ if (isa<ExternalSymbolSDNode>(Callee))
+ return false;
+
+ if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
+ const GlobalValue *GV = G->getGlobal();
+ if (GV->isDeclaration() || GV->isWeakForLinker())
+ return false;
+ }
+
+ // If the calling conventions do not match, then we'd better make sure the
+ // results are returned in the same way as what the caller expects.
+ if (!CCMatch) {
+ SmallVector<CCValAssign, 16> RVLocs1;
+ CCState CCInfo1(CalleeCC, false, getTargetMachine(),
+ RVLocs1, *DAG.getContext());
+ CCInfo1.AnalyzeCallResult(Ins, CCAssignFnForNode(CalleeCC, true, isVarArg));
+
+ SmallVector<CCValAssign, 16> RVLocs2;
+ CCState CCInfo2(CallerCC, false, getTargetMachine(),
+ RVLocs2, *DAG.getContext());
+ CCInfo2.AnalyzeCallResult(Ins, CCAssignFnForNode(CallerCC, true, isVarArg));
+
+ if (RVLocs1.size() != RVLocs2.size())
+ return false;
+ for (unsigned i = 0, e = RVLocs1.size(); i != e; ++i) {
+ if (RVLocs1[i].isRegLoc() != RVLocs2[i].isRegLoc())
+ return false;
+ if (RVLocs1[i].getLocInfo() != RVLocs2[i].getLocInfo())
+ return false;
+ if (RVLocs1[i].isRegLoc()) {
+ if (RVLocs1[i].getLocReg() != RVLocs2[i].getLocReg())
+ return false;
+ } else {
+ if (RVLocs1[i].getLocMemOffset() != RVLocs2[i].getLocMemOffset())
+ return false;
+ }
+ }
+ }
+
+ // If the callee takes no arguments then go on to check the results of the
+ // call.
+ if (!Outs.empty()) {
+ // Check if stack adjustment is needed. For now, do not do this if any
+ // argument is passed on the stack.
+ SmallVector<CCValAssign, 16> ArgLocs;
+ CCState CCInfo(CalleeCC, isVarArg, getTargetMachine(),
+ ArgLocs, *DAG.getContext());
+ CCInfo.AnalyzeCallOperands(Outs,
+ CCAssignFnForNode(CalleeCC, false, isVarArg));
+ if (CCInfo.getNextStackOffset()) {
+ MachineFunction &MF = DAG.getMachineFunction();
+
+ // Check if the arguments are already laid out in the right way as
+ // the caller's fixed stack objects.
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ const MachineRegisterInfo *MRI = &MF.getRegInfo();
+ const ARMInstrInfo *TII =
+ ((ARMTargetMachine&)getTargetMachine()).getInstrInfo();
+ for (unsigned i = 0, realArgIdx = 0, e = ArgLocs.size();
+ i != e;
+ ++i, ++realArgIdx) {
+ CCValAssign &VA = ArgLocs[i];
+ EVT RegVT = VA.getLocVT();
+ SDValue Arg = OutVals[realArgIdx];
+ ISD::ArgFlagsTy Flags = Outs[realArgIdx].Flags;
+ if (VA.getLocInfo() == CCValAssign::Indirect)
+ return false;
+ if (VA.needsCustom()) {
+ // f64 and vector types are split into multiple registers or
+ // register/stack-slot combinations. The types will not match
+ // the registers; give up on memory f64 refs until we figure
+ // out what to do about this.
+ if (!VA.isRegLoc())
+ return false;
+ if (!ArgLocs[++i].isRegLoc())
+ return false;
+ if (RegVT == MVT::v2f64) {
+ if (!ArgLocs[++i].isRegLoc())
+ return false;
+ if (!ArgLocs[++i].isRegLoc())
+ return false;
+ }
+ } else if (!VA.isRegLoc()) {
+ if (!MatchingStackOffset(Arg, VA.getLocMemOffset(), Flags,
+ MFI, MRI, TII))
+ return false;
+ }
+ }
+ }
+ }
+
+ return true;
+}
+
SDValue
ARMTargetLowering::LowerReturn(SDValue Chain,
CallingConv::ID CallConv, bool isVarArg,
const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
DebugLoc dl, SelectionDAG &DAG) const {
// CCValAssign - represent the assignment of the return value to a location.
@@ -1239,7 +1544,7 @@ ARMTargetLowering::LowerReturn(SDValue Chain,
CCValAssign &VA = RVLocs[i];
assert(VA.isRegLoc() && "Can only return in registers!");
- SDValue Arg = Outs[realRVLocIdx].Val;
+ SDValue Arg = OutVals[realRVLocIdx];
switch (VA.getLocInfo()) {
default: llvm_unreachable("Unknown loc info!");
@@ -1477,7 +1782,7 @@ SDValue ARMTargetLowering::LowerGlobalAddressELF(SDValue Op,
// pair. This is always cheaper.
if (Subtarget->useMovt()) {
return DAG.getNode(ARMISD::Wrapper, dl, PtrVT,
- DAG.getTargetGlobalAddress(GV, PtrVT));
+ DAG.getTargetGlobalAddress(GV, dl, PtrVT));
} else {
SDValue CPAddr = DAG.getTargetConstantPool(GV, PtrVT, 4);
CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
@@ -1552,9 +1857,7 @@ SDValue ARMTargetLowering::LowerGLOBAL_OFFSET_TABLE(SDValue Op,
SDValue
ARMTargetLowering::LowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG) const {
DebugLoc dl = Op.getDebugLoc();
- SDValue Val = Subtarget->isThumb() ?
- DAG.getCopyFromReg(DAG.getEntryNode(), dl, ARM::SP, MVT::i32) :
- DAG.getConstant(0, MVT::i32);
+ SDValue Val = DAG.getConstant(0, MVT::i32);
return DAG.getNode(ARMISD::EH_SJLJ_SETJMP, dl, MVT::i32, Op.getOperand(0),
Op.getOperand(1), Val);
}
@@ -1568,8 +1871,7 @@ ARMTargetLowering::LowerEH_SJLJ_LONGJMP(SDValue Op, SelectionDAG &DAG) const {
SDValue
ARMTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG,
- const ARMSubtarget *Subtarget)
- const {
+ const ARMSubtarget *Subtarget) const {
unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
DebugLoc dl = Op.getDebugLoc();
switch (IntNo) {
@@ -1597,7 +1899,6 @@ ARMTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG,
DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr,
PseudoSourceValue::getConstantPool(), 0,
false, false, 0);
- SDValue Chain = Result.getValue(1);
if (RelocM == Reloc::PIC_) {
SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32);
@@ -1609,25 +1910,21 @@ ARMTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG,
}
static SDValue LowerMEMBARRIER(SDValue Op, SelectionDAG &DAG,
- const ARMSubtarget *Subtarget) {
+ const ARMSubtarget *Subtarget) {
DebugLoc dl = Op.getDebugLoc();
SDValue Op5 = Op.getOperand(5);
- SDValue Res;
unsigned isDeviceBarrier = cast<ConstantSDNode>(Op5)->getZExtValue();
- if (isDeviceBarrier) {
- if (Subtarget->hasV7Ops())
- Res = DAG.getNode(ARMISD::SYNCBARRIER, dl, MVT::Other, Op.getOperand(0));
- else
- Res = DAG.getNode(ARMISD::SYNCBARRIER, dl, MVT::Other, Op.getOperand(0),
- DAG.getConstant(0, MVT::i32));
- } else {
- if (Subtarget->hasV7Ops())
- Res = DAG.getNode(ARMISD::MEMBARRIER, dl, MVT::Other, Op.getOperand(0));
- else
- Res = DAG.getNode(ARMISD::MEMBARRIER, dl, MVT::Other, Op.getOperand(0),
- DAG.getConstant(0, MVT::i32));
- }
- return Res;
+ // v6 and v7 can both handle barriers directly, but need handled a bit
+ // differently. Thumb1 and pre-v6 ARM mode use a libcall instead and should
+ // never get here.
+ unsigned Opc = isDeviceBarrier ? ARMISD::SYNCBARRIER : ARMISD::MEMBARRIER;
+ if (Subtarget->hasV7Ops())
+ return DAG.getNode(Opc, dl, MVT::Other, Op.getOperand(0));
+ else if (Subtarget->hasV6Ops() && !Subtarget->isThumb1Only())
+ return DAG.getNode(Opc, dl, MVT::Other, Op.getOperand(0),
+ DAG.getConstant(0, MVT::i32));
+ assert(0 && "Unexpected ISD::MEMBARRIER encountered. Should be libcall!");
+ return SDValue();
}
static SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) {
@@ -1712,7 +2009,7 @@ ARMTargetLowering::GetF64FormalArgument(CCValAssign &VA, CCValAssign &NextVA,
SDValue ArgValue2;
if (NextVA.isMemLoc()) {
MachineFrameInfo *MFI = MF.getFrameInfo();
- int FI = MFI->CreateFixedObject(4, NextVA.getLocMemOffset(), true, false);
+ int FI = MFI->CreateFixedObject(4, NextVA.getLocMemOffset(), true);
// Create load node to retrieve arguments from the stack.
SDValue FIN = DAG.getFrameIndex(FI, getPointerTy());
@@ -1768,8 +2065,7 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain,
VA = ArgLocs[++i]; // skip ahead to next loc
SDValue ArgValue2;
if (VA.isMemLoc()) {
- int FI = MFI->CreateFixedObject(8, VA.getLocMemOffset(),
- true, false);
+ int FI = MFI->CreateFixedObject(8, VA.getLocMemOffset(), true);
SDValue FIN = DAG.getFrameIndex(FI, getPointerTy());
ArgValue2 = DAG.getLoad(MVT::f64, dl, Chain, FIN,
PseudoSourceValue::getFixedStack(FI), 0,
@@ -1836,8 +2132,7 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain,
assert(VA.getValVT() != MVT::i64 && "i64 should already be lowered");
unsigned ArgSize = VA.getLocVT().getSizeInBits()/8;
- int FI = MFI->CreateFixedObject(ArgSize, VA.getLocMemOffset(),
- true, false);
+ int FI = MFI->CreateFixedObject(ArgSize, VA.getLocMemOffset(), true);
// Create load nodes to retrieve arguments from the stack.
SDValue FIN = DAG.getFrameIndex(FI, getPointerTy());
@@ -1868,7 +2163,7 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain,
AFI->setVarArgsFrameIndex(
MFI->CreateFixedObject(VARegSaveSize,
ArgOffset + VARegSaveSize - VARegSize,
- true, false));
+ true));
SDValue FIN = DAG.getFrameIndex(AFI->getVarArgsFrameIndex(),
getPointerTy());
@@ -1884,8 +2179,8 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain,
SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i32);
SDValue Store =
DAG.getStore(Val.getValue(1), dl, Val, FIN,
- PseudoSourceValue::getFixedStack(AFI->getVarArgsFrameIndex()), 0,
- false, false, 0);
+ PseudoSourceValue::getFixedStack(AFI->getVarArgsFrameIndex()),
+ 0, false, false, 0);
MemOps.push_back(Store);
FIN = DAG.getNode(ISD::ADD, dl, getPointerTy(), FIN,
DAG.getConstant(4, getPointerTy()));
@@ -1895,8 +2190,7 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain,
&MemOps[0], MemOps.size());
} else
// This will point to the next argument passed via stack.
- AFI->setVarArgsFrameIndex(MFI->CreateFixedObject(4, ArgOffset,
- true, false));
+ AFI->setVarArgsFrameIndex(MFI->CreateFixedObject(4, ArgOffset, true));
}
return Chain;
@@ -1978,9 +2272,44 @@ ARMTargetLowering::getARMCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,
return DAG.getNode(CompareType, dl, MVT::Flag, LHS, RHS);
}
+static bool canBitcastToInt(SDNode *Op) {
+ return Op->hasOneUse() &&
+ ISD::isNormalLoad(Op) &&
+ Op->getValueType(0) == MVT::f32;
+}
+
+static SDValue bitcastToInt(SDValue Op, SelectionDAG &DAG) {
+ if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Op))
+ return DAG.getLoad(MVT::i32, Op.getDebugLoc(),
+ Ld->getChain(), Ld->getBasePtr(),
+ Ld->getSrcValue(), Ld->getSrcValueOffset(),
+ Ld->isVolatile(), Ld->isNonTemporal(),
+ Ld->getAlignment());
+
+ llvm_unreachable("Unknown VFP cmp argument!");
+}
+
/// Returns a appropriate VFP CMP (fcmp{s|d}+fmstat) for the given operands.
-static SDValue getVFPCmp(SDValue LHS, SDValue RHS, SelectionDAG &DAG,
- DebugLoc dl) {
+SDValue
+ARMTargetLowering::getVFPCmp(SDValue &LHS, SDValue &RHS, ISD::CondCode CC,
+ SDValue &ARMCC, SelectionDAG &DAG,
+ DebugLoc dl) const {
+ if (UnsafeFPMath && FiniteOnlyFPMath() &&
+ (CC == ISD::SETEQ || CC == ISD::SETOEQ ||
+ CC == ISD::SETNE || CC == ISD::SETUNE) &&
+ canBitcastToInt(LHS.getNode()) && canBitcastToInt(RHS.getNode())) {
+ // If unsafe fp math optimization is enabled and there are no othter uses of
+ // the CMP operands, and the condition code is EQ oe NE, we can optimize it
+ // to an integer comparison.
+ if (CC == ISD::SETOEQ)
+ CC = ISD::SETEQ;
+ else if (CC == ISD::SETUNE)
+ CC = ISD::SETNE;
+ LHS = bitcastToInt(LHS, DAG);
+ RHS = bitcastToInt(RHS, DAG);
+ return getARMCmp(LHS, RHS, CC, ARMCC, DAG, dl);
+ }
+
SDValue Cmp;
if (!isFloatingPointZero(RHS))
Cmp = DAG.getNode(ARMISD::CMPFP, dl, MVT::Flag, LHS, RHS);
@@ -2010,13 +2339,13 @@ SDValue ARMTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
SDValue ARMCC = DAG.getConstant(CondCode, MVT::i32);
SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
- SDValue Cmp = getVFPCmp(LHS, RHS, DAG, dl);
+ SDValue Cmp = getVFPCmp(LHS, RHS, CC, ARMCC, DAG, dl);
SDValue Result = DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal,
- ARMCC, CCR, Cmp);
+ ARMCC, CCR, Cmp);
if (CondCode2 != ARMCC::AL) {
SDValue ARMCC2 = DAG.getConstant(CondCode2, MVT::i32);
// FIXME: Needs another CMP because flag can have but one use.
- SDValue Cmp2 = getVFPCmp(LHS, RHS, DAG, dl);
+ SDValue Cmp2 = getVFPCmp(LHS, RHS, CC, ARMCC2, DAG, dl);
Result = DAG.getNode(ARMISD::CMOV, dl, VT,
Result, TrueVal, ARMCC2, CCR, Cmp2);
}
@@ -2043,8 +2372,8 @@ SDValue ARMTargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
ARMCC::CondCodes CondCode, CondCode2;
FPCCToARMCC(CC, CondCode, CondCode2);
- SDValue Cmp = getVFPCmp(LHS, RHS, DAG, dl);
SDValue ARMCC = DAG.getConstant(CondCode, MVT::i32);
+ SDValue Cmp = getVFPCmp(LHS, RHS, CC, ARMCC, DAG, dl);
SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
SDVTList VTList = DAG.getVTList(MVT::Other, MVT::Flag);
SDValue Ops[] = { Chain, Dest, ARMCC, CCR, Cmp };
@@ -2132,7 +2461,7 @@ static SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) {
return DAG.getNode(Opc, dl, VT, Op);
}
-static SDValue LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) {
+SDValue ARMTargetLowering::LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const {
// Implement fcopysign with a fabs and a conditional fneg.
SDValue Tmp0 = Op.getOperand(0);
SDValue Tmp1 = Op.getOperand(1);
@@ -2140,8 +2469,10 @@ static SDValue LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) {
EVT VT = Op.getValueType();
EVT SrcVT = Tmp1.getValueType();
SDValue AbsVal = DAG.getNode(ISD::FABS, dl, VT, Tmp0);
- SDValue Cmp = getVFPCmp(Tmp1, DAG.getConstantFP(0.0, SrcVT), DAG, dl);
SDValue ARMCC = DAG.getConstant(ARMCC::LT, MVT::i32);
+ SDValue FP0 = DAG.getConstantFP(0.0, SrcVT);
+ SDValue Cmp = getVFPCmp(Tmp1, FP0,
+ ISD::SETLT, ARMCC, DAG, dl);
SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
return DAG.getNode(ARMISD::CNEG, dl, VT, AbsVal, AbsVal, ARMCC, CCR, Cmp);
}
@@ -2206,7 +2537,8 @@ static SDValue ExpandBIT_CONVERT(SDNode *N, SelectionDAG &DAG) {
DAG.getConstant(0, MVT::i32));
SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Op,
DAG.getConstant(1, MVT::i32));
- return DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi);
+ return DAG.getNode(ISD::BIT_CONVERT, dl, DstVT,
+ DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi));
}
// Turn f64->i64 into VMOVRRD.
@@ -2516,76 +2848,149 @@ static SDValue LowerVSETCC(SDValue Op, SelectionDAG &DAG) {
return Result;
}
-/// isVMOVSplat - Check if the specified splat value corresponds to an immediate
-/// VMOV instruction, and if so, return the constant being splatted.
-static SDValue isVMOVSplat(uint64_t SplatBits, uint64_t SplatUndef,
- unsigned SplatBitSize, SelectionDAG &DAG) {
+/// isNEONModifiedImm - Check if the specified splat value corresponds to a
+/// valid vector constant for a NEON instruction with a "modified immediate"
+/// operand (e.g., VMOV). If so, return either the constant being
+/// splatted or the encoded value, depending on the DoEncode parameter.
+static SDValue isNEONModifiedImm(uint64_t SplatBits, uint64_t SplatUndef,
+ unsigned SplatBitSize, SelectionDAG &DAG,
+ bool isVMOV, bool DoEncode) {
+ unsigned OpCmode, Imm;
+ EVT VT;
+
+ // SplatBitSize is set to the smallest size that splats the vector, so a
+ // zero vector will always have SplatBitSize == 8. However, NEON modified
+ // immediate instructions others than VMOV do not support the 8-bit encoding
+ // of a zero vector, and the default encoding of zero is supposed to be the
+ // 32-bit version.
+ if (SplatBits == 0)
+ SplatBitSize = 32;
+
switch (SplatBitSize) {
case 8:
- // Any 1-byte value is OK.
+ // Any 1-byte value is OK. Op=0, Cmode=1110.
assert((SplatBits & ~0xff) == 0 && "one byte splat value is too big");
- return DAG.getTargetConstant(SplatBits, MVT::i8);
+ OpCmode = 0xe;
+ Imm = SplatBits;
+ VT = MVT::i8;
+ break;
case 16:
// NEON's 16-bit VMOV supports splat values where only one byte is nonzero.
- if ((SplatBits & ~0xff) == 0 ||
- (SplatBits & ~0xff00) == 0)
- return DAG.getTargetConstant(SplatBits, MVT::i16);
- break;
+ VT = MVT::i16;
+ if ((SplatBits & ~0xff) == 0) {
+ // Value = 0x00nn: Op=x, Cmode=100x.
+ OpCmode = 0x8;
+ Imm = SplatBits;
+ break;
+ }
+ if ((SplatBits & ~0xff00) == 0) {
+ // Value = 0xnn00: Op=x, Cmode=101x.
+ OpCmode = 0xa;
+ Imm = SplatBits >> 8;
+ break;
+ }
+ return SDValue();
case 32:
// NEON's 32-bit VMOV supports splat values where:
// * only one byte is nonzero, or
// * the least significant byte is 0xff and the second byte is nonzero, or
// * the least significant 2 bytes are 0xff and the third is nonzero.
- if ((SplatBits & ~0xff) == 0 ||
- (SplatBits & ~0xff00) == 0 ||
- (SplatBits & ~0xff0000) == 0 ||
- (SplatBits & ~0xff000000) == 0)
- return DAG.getTargetConstant(SplatBits, MVT::i32);
+ VT = MVT::i32;
+ if ((SplatBits & ~0xff) == 0) {
+ // Value = 0x000000nn: Op=x, Cmode=000x.
+ OpCmode = 0;
+ Imm = SplatBits;
+ break;
+ }
+ if ((SplatBits & ~0xff00) == 0) {
+ // Value = 0x0000nn00: Op=x, Cmode=001x.
+ OpCmode = 0x2;
+ Imm = SplatBits >> 8;
+ break;
+ }
+ if ((SplatBits & ~0xff0000) == 0) {
+ // Value = 0x00nn0000: Op=x, Cmode=010x.
+ OpCmode = 0x4;
+ Imm = SplatBits >> 16;
+ break;
+ }
+ if ((SplatBits & ~0xff000000) == 0) {
+ // Value = 0xnn000000: Op=x, Cmode=011x.
+ OpCmode = 0x6;
+ Imm = SplatBits >> 24;
+ break;
+ }
if ((SplatBits & ~0xffff) == 0 &&
- ((SplatBits | SplatUndef) & 0xff) == 0xff)
- return DAG.getTargetConstant(SplatBits | 0xff, MVT::i32);
+ ((SplatBits | SplatUndef) & 0xff) == 0xff) {
+ // Value = 0x0000nnff: Op=x, Cmode=1100.
+ OpCmode = 0xc;
+ Imm = SplatBits >> 8;
+ SplatBits |= 0xff;
+ break;
+ }
if ((SplatBits & ~0xffffff) == 0 &&
- ((SplatBits | SplatUndef) & 0xffff) == 0xffff)
- return DAG.getTargetConstant(SplatBits | 0xffff, MVT::i32);
+ ((SplatBits | SplatUndef) & 0xffff) == 0xffff) {
+ // Value = 0x00nnffff: Op=x, Cmode=1101.
+ OpCmode = 0xd;
+ Imm = SplatBits >> 16;
+ SplatBits |= 0xffff;
+ break;
+ }
// Note: there are a few 32-bit splat values (specifically: 00ffff00,
// ff000000, ff0000ff, and ffff00ff) that are valid for VMOV.I64 but not
// VMOV.I32. A (very) minor optimization would be to replicate the value
// and fall through here to test for a valid 64-bit splat. But, then the
// caller would also need to check and handle the change in size.
- break;
+ return SDValue();
case 64: {
// NEON has a 64-bit VMOV splat where each byte is either 0 or 0xff.
+ if (!isVMOV)
+ return SDValue();
uint64_t BitMask = 0xff;
uint64_t Val = 0;
+ unsigned ImmMask = 1;
+ Imm = 0;
for (int ByteNum = 0; ByteNum < 8; ++ByteNum) {
- if (((SplatBits | SplatUndef) & BitMask) == BitMask)
+ if (((SplatBits | SplatUndef) & BitMask) == BitMask) {
Val |= BitMask;
- else if ((SplatBits & BitMask) != 0)
+ Imm |= ImmMask;
+ } else if ((SplatBits & BitMask) != 0) {
return SDValue();
+ }
BitMask <<= 8;
+ ImmMask <<= 1;
}
- return DAG.getTargetConstant(Val, MVT::i64);
+ // Op=1, Cmode=1110.
+ OpCmode = 0x1e;
+ SplatBits = Val;
+ VT = MVT::i64;
+ break;
}
default:
- llvm_unreachable("unexpected size for isVMOVSplat");
- break;
+ llvm_unreachable("unexpected size for isNEONModifiedImm");
+ return SDValue();
}
- return SDValue();
+ if (DoEncode) {
+ unsigned EncodedVal = ARM_AM::createNEONModImm(OpCmode, Imm);
+ return DAG.getTargetConstant(EncodedVal, MVT::i32);
+ }
+ return DAG.getTargetConstant(SplatBits, VT);
}
-/// getVMOVImm - If this is a build_vector of constants which can be
-/// formed by using a VMOV instruction of the specified element size,
-/// return the constant being splatted. The ByteSize field indicates the
-/// number of bytes of each element [1248].
-SDValue ARM::getVMOVImm(SDNode *N, unsigned ByteSize, SelectionDAG &DAG) {
+/// getNEONModImm - If this is a valid vector constant for a NEON instruction
+/// with a "modified immediate" operand (e.g., VMOV) of the specified element
+/// size, return the encoded value for that immediate. The ByteSize field
+/// indicates the number of bytes of each element [1248].
+SDValue ARM::getNEONModImm(SDNode *N, unsigned ByteSize, bool isVMOV,
+ SelectionDAG &DAG) {
BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(N);
APInt SplatBits, SplatUndef;
unsigned SplatBitSize;
@@ -2597,8 +3002,8 @@ SDValue ARM::getVMOVImm(SDNode *N, unsigned ByteSize, SelectionDAG &DAG) {
if (SplatBitSize > ByteSize * 8)
return SDValue();
- return isVMOVSplat(SplatBits.getZExtValue(), SplatUndef.getZExtValue(),
- SplatBitSize, DAG);
+ return isNEONModifiedImm(SplatBits.getZExtValue(), SplatUndef.getZExtValue(),
+ SplatBitSize, DAG, isVMOV, true);
}
static bool isVEXTMask(const SmallVectorImpl<int> &M, EVT VT,
@@ -2838,8 +3243,10 @@ static SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) {
bool HasAnyUndefs;
if (BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs)) {
if (SplatBitSize <= 64) {
- SDValue Val = isVMOVSplat(SplatBits.getZExtValue(),
- SplatUndef.getZExtValue(), SplatBitSize, DAG);
+ // Check if an immediate VMOV works.
+ SDValue Val = isNEONModifiedImm(SplatBits.getZExtValue(),
+ SplatUndef.getZExtValue(),
+ SplatBitSize, DAG, true, false);
if (Val.getNode())
return BuildSplat(Val, VT, DAG, dl);
}
@@ -2883,21 +3290,17 @@ static SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) {
return DAG.getNode(ARMISD::VDUP, dl, VT, Value);
// Vectors with 32- or 64-bit elements can be built by directly assigning
- // the subregisters.
+ // the subregisters. Lower it to an ARMISD::BUILD_VECTOR so the operands
+ // will be legalized.
if (EltSize >= 32) {
// Do the expansion with floating-point types, since that is what the VFP
// registers are defined to use, and since i64 is not legal.
EVT EltVT = EVT::getFloatingPointVT(EltSize);
EVT VecVT = EVT::getVectorVT(*DAG.getContext(), EltVT, NumElts);
- SDValue Val = DAG.getUNDEF(VecVT);
- for (unsigned i = 0; i < NumElts; ++i) {
- SDValue Elt = Op.getOperand(i);
- if (Elt.getOpcode() == ISD::UNDEF)
- continue;
- Elt = DAG.getNode(ISD::BIT_CONVERT, dl, EltVT, Elt);
- Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VecVT, Val, Elt,
- DAG.getConstant(i, MVT::i32));
- }
+ SmallVector<SDValue, 8> Ops;
+ for (unsigned i = 0; i < NumElts; ++i)
+ Ops.push_back(DAG.getNode(ISD::BIT_CONVERT, dl, EltVT, Op.getOperand(i)));
+ SDValue Val = DAG.getNode(ARMISD::BUILD_VECTOR, dl, VecVT, &Ops[0],NumElts);
return DAG.getNode(ISD::BIT_CONVERT, dl, VT, Val);
}
@@ -2934,7 +3337,9 @@ ARMTargetLowering::isShuffleMaskLegal(const SmallVectorImpl<int> &M,
bool ReverseVEXT;
unsigned Imm, WhichResult;
- return (ShuffleVectorSDNode::isSplatMask(&M[0], VT) ||
+ unsigned EltSize = VT.getVectorElementType().getSizeInBits();
+ return (EltSize >= 32 ||
+ ShuffleVectorSDNode::isSplatMask(&M[0], VT) ||
isVREVMask(M, VT, 64) ||
isVREVMask(M, VT, 32) ||
isVREVMask(M, VT, 16) ||
@@ -3032,59 +3437,62 @@ static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
// of the same time so that they get CSEd properly.
SVN->getMask(ShuffleMask);
- if (ShuffleVectorSDNode::isSplatMask(&ShuffleMask[0], VT)) {
- int Lane = SVN->getSplatIndex();
- // If this is undef splat, generate it via "just" vdup, if possible.
- if (Lane == -1) Lane = 0;
-
- if (Lane == 0 && V1.getOpcode() == ISD::SCALAR_TO_VECTOR) {
- return DAG.getNode(ARMISD::VDUP, dl, VT, V1.getOperand(0));
+ unsigned EltSize = VT.getVectorElementType().getSizeInBits();
+ if (EltSize <= 32) {
+ if (ShuffleVectorSDNode::isSplatMask(&ShuffleMask[0], VT)) {
+ int Lane = SVN->getSplatIndex();
+ // If this is undef splat, generate it via "just" vdup, if possible.
+ if (Lane == -1) Lane = 0;
+
+ if (Lane == 0 && V1.getOpcode() == ISD::SCALAR_TO_VECTOR) {
+ return DAG.getNode(ARMISD::VDUP, dl, VT, V1.getOperand(0));
+ }
+ return DAG.getNode(ARMISD::VDUPLANE, dl, VT, V1,
+ DAG.getConstant(Lane, MVT::i32));
}
- return DAG.getNode(ARMISD::VDUPLANE, dl, VT, V1,
- DAG.getConstant(Lane, MVT::i32));
- }
- bool ReverseVEXT;
- unsigned Imm;
- if (isVEXTMask(ShuffleMask, VT, ReverseVEXT, Imm)) {
- if (ReverseVEXT)
- std::swap(V1, V2);
- return DAG.getNode(ARMISD::VEXT, dl, VT, V1, V2,
- DAG.getConstant(Imm, MVT::i32));
- }
-
- if (isVREVMask(ShuffleMask, VT, 64))
- return DAG.getNode(ARMISD::VREV64, dl, VT, V1);
- if (isVREVMask(ShuffleMask, VT, 32))
- return DAG.getNode(ARMISD::VREV32, dl, VT, V1);
- if (isVREVMask(ShuffleMask, VT, 16))
- return DAG.getNode(ARMISD::VREV16, dl, VT, V1);
-
- // Check for Neon shuffles that modify both input vectors in place.
- // If both results are used, i.e., if there are two shuffles with the same
- // source operands and with masks corresponding to both results of one of
- // these operations, DAG memoization will ensure that a single node is
- // used for both shuffles.
- unsigned WhichResult;
- if (isVTRNMask(ShuffleMask, VT, WhichResult))
- return DAG.getNode(ARMISD::VTRN, dl, DAG.getVTList(VT, VT),
- V1, V2).getValue(WhichResult);
- if (isVUZPMask(ShuffleMask, VT, WhichResult))
- return DAG.getNode(ARMISD::VUZP, dl, DAG.getVTList(VT, VT),
- V1, V2).getValue(WhichResult);
- if (isVZIPMask(ShuffleMask, VT, WhichResult))
- return DAG.getNode(ARMISD::VZIP, dl, DAG.getVTList(VT, VT),
- V1, V2).getValue(WhichResult);
+ bool ReverseVEXT;
+ unsigned Imm;
+ if (isVEXTMask(ShuffleMask, VT, ReverseVEXT, Imm)) {
+ if (ReverseVEXT)
+ std::swap(V1, V2);
+ return DAG.getNode(ARMISD::VEXT, dl, VT, V1, V2,
+ DAG.getConstant(Imm, MVT::i32));
+ }
- if (isVTRN_v_undef_Mask(ShuffleMask, VT, WhichResult))
- return DAG.getNode(ARMISD::VTRN, dl, DAG.getVTList(VT, VT),
- V1, V1).getValue(WhichResult);
- if (isVUZP_v_undef_Mask(ShuffleMask, VT, WhichResult))
- return DAG.getNode(ARMISD::VUZP, dl, DAG.getVTList(VT, VT),
- V1, V1).getValue(WhichResult);
- if (isVZIP_v_undef_Mask(ShuffleMask, VT, WhichResult))
- return DAG.getNode(ARMISD::VZIP, dl, DAG.getVTList(VT, VT),
- V1, V1).getValue(WhichResult);
+ if (isVREVMask(ShuffleMask, VT, 64))
+ return DAG.getNode(ARMISD::VREV64, dl, VT, V1);
+ if (isVREVMask(ShuffleMask, VT, 32))
+ return DAG.getNode(ARMISD::VREV32, dl, VT, V1);
+ if (isVREVMask(ShuffleMask, VT, 16))
+ return DAG.getNode(ARMISD::VREV16, dl, VT, V1);
+
+ // Check for Neon shuffles that modify both input vectors in place.
+ // If both results are used, i.e., if there are two shuffles with the same
+ // source operands and with masks corresponding to both results of one of
+ // these operations, DAG memoization will ensure that a single node is
+ // used for both shuffles.
+ unsigned WhichResult;
+ if (isVTRNMask(ShuffleMask, VT, WhichResult))
+ return DAG.getNode(ARMISD::VTRN, dl, DAG.getVTList(VT, VT),
+ V1, V2).getValue(WhichResult);
+ if (isVUZPMask(ShuffleMask, VT, WhichResult))
+ return DAG.getNode(ARMISD::VUZP, dl, DAG.getVTList(VT, VT),
+ V1, V2).getValue(WhichResult);
+ if (isVZIPMask(ShuffleMask, VT, WhichResult))
+ return DAG.getNode(ARMISD::VZIP, dl, DAG.getVTList(VT, VT),
+ V1, V2).getValue(WhichResult);
+
+ if (isVTRN_v_undef_Mask(ShuffleMask, VT, WhichResult))
+ return DAG.getNode(ARMISD::VTRN, dl, DAG.getVTList(VT, VT),
+ V1, V1).getValue(WhichResult);
+ if (isVUZP_v_undef_Mask(ShuffleMask, VT, WhichResult))
+ return DAG.getNode(ARMISD::VUZP, dl, DAG.getVTList(VT, VT),
+ V1, V1).getValue(WhichResult);
+ if (isVZIP_v_undef_Mask(ShuffleMask, VT, WhichResult))
+ return DAG.getNode(ARMISD::VZIP, dl, DAG.getVTList(VT, VT),
+ V1, V1).getValue(WhichResult);
+ }
// If the shuffle is not directly supported and it has 4 elements, use
// the PerfectShuffle-generated table to synthesize it from other shuffles.
@@ -3108,8 +3516,7 @@ static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
return GeneratePerfectShuffle(PFEntry, V1, V2, DAG, dl);
}
- // Implement shuffles with 32- or 64-bit elements as subreg copies.
- unsigned EltSize = VT.getVectorElementType().getSizeInBits();
+ // Implement shuffles with 32- or 64-bit elements as ARMISD::BUILD_VECTORs.
if (EltSize >= 32) {
// Do the expansion with floating-point types, since that is what the VFP
// registers are defined to use, and since i64 is not legal.
@@ -3117,17 +3524,17 @@ static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
EVT VecVT = EVT::getVectorVT(*DAG.getContext(), EltVT, NumElts);
V1 = DAG.getNode(ISD::BIT_CONVERT, dl, VecVT, V1);
V2 = DAG.getNode(ISD::BIT_CONVERT, dl, VecVT, V2);
- SDValue Val = DAG.getUNDEF(VecVT);
+ SmallVector<SDValue, 8> Ops;
for (unsigned i = 0; i < NumElts; ++i) {
if (ShuffleMask[i] < 0)
- continue;
- SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT,
- ShuffleMask[i] < (int)NumElts ? V1 : V2,
- DAG.getConstant(ShuffleMask[i] & (NumElts-1),
- MVT::i32));
- Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VecVT, Val,
- Elt, DAG.getConstant(i, MVT::i32));
+ Ops.push_back(DAG.getUNDEF(EltVT));
+ else
+ Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT,
+ ShuffleMask[i] < (int)NumElts ? V1 : V2,
+ DAG.getConstant(ShuffleMask[i] & (NumElts-1),
+ MVT::i32)));
}
+ SDValue Val = DAG.getNode(ARMISD::BUILD_VECTOR, dl, VecVT, &Ops[0],NumElts);
return DAG.getNode(ISD::BIT_CONVERT, dl, VT, Val);
}
@@ -3277,7 +3684,12 @@ ARMTargetLowering::EmitAtomicCmpSwap(MachineInstr *MI,
MF->insert(It, loop1MBB);
MF->insert(It, loop2MBB);
MF->insert(It, exitMBB);
- exitMBB->transferSuccessors(BB);
+
+ // Transfer the remainder of BB and its successor edges to exitMBB.
+ exitMBB->splice(exitMBB->begin(), BB,
+ llvm::next(MachineBasicBlock::iterator(MI)),
+ BB->end());
+ exitMBB->transferSuccessorsAndUpdatePHIs(BB);
// thisMBB:
// ...
@@ -3315,7 +3727,7 @@ ARMTargetLowering::EmitAtomicCmpSwap(MachineInstr *MI,
// ...
BB = exitMBB;
- MF->DeleteMachineInstr(MI); // The instruction is gone now.
+ MI->eraseFromParent(); // The instruction is gone now.
return BB;
}
@@ -3358,7 +3770,12 @@ ARMTargetLowering::EmitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB,
MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB);
MF->insert(It, loopMBB);
MF->insert(It, exitMBB);
- exitMBB->transferSuccessors(BB);
+
+ // Transfer the remainder of BB and its successor edges to exitMBB.
+ exitMBB->splice(exitMBB->begin(), BB,
+ llvm::next(MachineBasicBlock::iterator(MI)),
+ BB->end());
+ exitMBB->transferSuccessorsAndUpdatePHIs(BB);
MachineRegisterInfo &RegInfo = MF->getRegInfo();
unsigned scratch = RegInfo.createVirtualRegister(ARM::GPRRegisterClass);
@@ -3403,7 +3820,7 @@ ARMTargetLowering::EmitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB,
// ...
BB = exitMBB;
- MF->DeleteMachineInstr(MI); // The instruction is gone now.
+ MI->eraseFromParent(); // The instruction is gone now.
return BB;
}
@@ -3488,22 +3905,21 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
MachineFunction *F = BB->getParent();
MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB);
MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
- BuildMI(BB, dl, TII->get(ARM::tBcc)).addMBB(sinkMBB)
- .addImm(MI->getOperand(3).getImm()).addReg(MI->getOperand(4).getReg());
F->insert(It, copy0MBB);
F->insert(It, sinkMBB);
- // Update machine-CFG edges by first adding all successors of the current
- // block to the new block which will contain the Phi node for the select.
- for (MachineBasicBlock::succ_iterator I = BB->succ_begin(),
- E = BB->succ_end(); I != E; ++I)
- sinkMBB->addSuccessor(*I);
- // Next, remove all successors of the current block, and add the true
- // and fallthrough blocks as its successors.
- while (!BB->succ_empty())
- BB->removeSuccessor(BB->succ_begin());
+
+ // Transfer the remainder of BB and its successor edges to sinkMBB.
+ sinkMBB->splice(sinkMBB->begin(), BB,
+ llvm::next(MachineBasicBlock::iterator(MI)),
+ BB->end());
+ sinkMBB->transferSuccessorsAndUpdatePHIs(BB);
+
BB->addSuccessor(copy0MBB);
BB->addSuccessor(sinkMBB);
+ BuildMI(BB, dl, TII->get(ARM::tBcc)).addMBB(sinkMBB)
+ .addImm(MI->getOperand(3).getImm()).addReg(MI->getOperand(4).getReg());
+
// copy0MBB:
// %FalseValue = ...
// # fallthrough to sinkMBB
@@ -3516,11 +3932,12 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
// %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ]
// ...
BB = sinkMBB;
- BuildMI(BB, dl, TII->get(ARM::PHI), MI->getOperand(0).getReg())
+ BuildMI(*BB, BB->begin(), dl,
+ TII->get(ARM::PHI), MI->getOperand(0).getReg())
.addReg(MI->getOperand(1).getReg()).addMBB(copy0MBB)
.addReg(MI->getOperand(2).getReg()).addMBB(thisMBB);
- F->DeleteMachineInstr(MI); // The pseudo instruction is gone now.
+ MI->eraseFromParent(); // The pseudo instruction is gone now.
return BB;
}
@@ -3541,7 +3958,7 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
const TargetRegisterClass *RC = MF->getRegInfo().getRegClass(SrcReg);
unsigned CopyOpc = (RC == ARM::tGPRRegisterClass)
? ARM::tMOVtgpr2gpr : ARM::tMOVgpr2gpr;
- BuildMI(BB, dl, TII->get(CopyOpc), ARM::SP)
+ BuildMI(*BB, MI, dl, TII->get(CopyOpc), ARM::SP)
.addReg(SrcReg, getKillRegState(SrcIsKill));
}
@@ -3573,7 +3990,7 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
NeedPred = true; NeedCC = true; NeedOp3 = true;
break;
}
- MachineInstrBuilder MIB = BuildMI(BB, dl, TII->get(OpOpc), ARM::SP);
+ MachineInstrBuilder MIB = BuildMI(*BB, MI, dl, TII->get(OpOpc), ARM::SP);
if (OpOpc == ARM::tAND)
AddDefaultT1CC(MIB);
MIB.addReg(ARM::SP);
@@ -3589,10 +4006,10 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
const TargetRegisterClass *RC = MF->getRegInfo().getRegClass(DstReg);
unsigned CopyOpc = (RC == ARM::tGPRRegisterClass)
? ARM::tMOVgpr2tgpr : ARM::tMOVgpr2gpr;
- BuildMI(BB, dl, TII->get(CopyOpc))
+ BuildMI(*BB, MI, dl, TII->get(CopyOpc))
.addReg(DstReg, getDefRegState(true) | getDeadRegState(DstIsDead))
.addReg(ARM::SP);
- MF->DeleteMachineInstr(MI); // The pseudo instruction is gone now.
+ MI->eraseFromParent(); // The pseudo instruction is gone now.
return BB;
}
}
@@ -3893,7 +4310,8 @@ static SDValue PerformIntrinsicCombine(SDNode *N, SelectionDAG &DAG) {
// Narrowing shifts require an immediate right shift.
if (isVShiftRImm(N->getOperand(2), VT, true, true, Cnt))
break;
- llvm_unreachable("invalid shift count for narrowing vector shift intrinsic");
+ llvm_unreachable("invalid shift count for narrowing vector shift "
+ "intrinsic");
default:
llvm_unreachable("unhandled vector shift");
@@ -4156,14 +4574,13 @@ bool ARMTargetLowering::allowsUnalignedMemoryAccesses(EVT VT) const {
if (!Subtarget->hasV6Ops())
// Pre-v6 does not support unaligned mem access.
return false;
- else {
- // v6+ may or may not support unaligned mem access depending on the system
- // configuration.
- // FIXME: This is pretty conservative. Should we provide cmdline option to
- // control the behaviour?
- if (!Subtarget->isTargetDarwin())
- return false;
- }
+
+ // v6+ may or may not support unaligned mem access depending on the system
+ // configuration.
+ // FIXME: This is pretty conservative. Should we provide cmdline option to
+ // control the behaviour?
+ if (!Subtarget->isTargetDarwin())
+ return false;
switch (VT.getSimpleVT().SimpleTy) {
default:
@@ -4619,7 +5036,7 @@ ARMTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
}
}
if (StringRef("{cc}").equals_lower(Constraint))
- return std::make_pair(0U, ARM::CCRRegisterClass);
+ return std::make_pair(unsigned(ARM::CPSR), ARM::CCRRegisterClass);
return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
}
@@ -4669,7 +5086,6 @@ getRegClassForInlineAsmConstraint(const std::string &Constraint,
/// vector. If it is invalid, don't add anything to Ops.
void ARMTargetLowering::LowerAsmOperandForConstraint(SDValue Op,
char Constraint,
- bool hasMemory,
std::vector<SDValue>&Ops,
SelectionDAG &DAG) const {
SDValue Result(0, 0);
@@ -4818,8 +5234,7 @@ void ARMTargetLowering::LowerAsmOperandForConstraint(SDValue Op,
Ops.push_back(Result);
return;
}
- return TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, hasMemory,
- Ops, DAG);
+ return TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
}
bool
diff --git a/lib/Target/ARM/ARMISelLowering.h b/lib/Target/ARM/ARMISelLowering.h
index 9c7517c88195..3a3866928a0e 100644
--- a/lib/Target/ARM/ARMISelLowering.h
+++ b/lib/Target/ARM/ARMISelLowering.h
@@ -70,6 +70,8 @@ namespace llvm {
EH_SJLJ_SETJMP, // SjLj exception handling setjmp.
EH_SJLJ_LONGJMP, // SjLj exception handling longjmp.
+ TC_RETURN, // Tail call return pseudo.
+
THREAD_POINTER,
DYN_ALLOC, // Dynamic allocation on the stack.
@@ -133,6 +135,13 @@ namespace llvm {
VUZP, // unzip (deinterleave)
VTRN, // transpose
+ // Operands of the standard BUILD_VECTOR node are not legalized, which
+ // is fine if BUILD_VECTORs are always lowered to shuffles or other
+ // operations, but for ARM some BUILD_VECTORs are legal as-is and their
+ // operands need to be legalized. Define an ARM-specific version of
+ // BUILD_VECTOR for this purpose.
+ BUILD_VECTOR,
+
// Floating-point max and min:
FMAX,
FMIN
@@ -141,11 +150,12 @@ namespace llvm {
/// Define some predicates that are used for node matching.
namespace ARM {
- /// getVMOVImm - If this is a build_vector of constants which can be
- /// formed by using a VMOV instruction of the specified element size,
- /// return the constant being splatted. The ByteSize field indicates the
- /// number of bytes of each element [1248].
- SDValue getVMOVImm(SDNode *N, unsigned ByteSize, SelectionDAG &DAG);
+ /// getNEONModImm - If this is a valid vector constant for a NEON
+ /// instruction with a "modified immediate" operand (e.g., VMOV) of the
+ /// specified element size, return the encoded value for that immediate.
+ /// The ByteSize field indicates the number of bytes of each element [1248].
+ SDValue getNEONModImm(SDNode *N, unsigned ByteSize, bool isVMOV,
+ SelectionDAG &DAG);
/// getVFPf32Imm / getVFPf64Imm - If the given fp immediate can be
/// materialized with a VMOV.f32 / VMOV.f64 (i.e. fconsts / fconstd)
@@ -189,9 +199,9 @@ namespace llvm {
bool isLegalT2ScaledAddressingMode(const AddrMode &AM, EVT VT) const;
/// isLegalICmpImmediate - Return true if the specified immediate is legal
- /// icmp immediate, that is the target has icmp instructions which can compare
- /// a register against the immediate without having to materialize the
- /// immediate into a register.
+ /// icmp immediate, that is the target has icmp instructions which can
+ /// compare a register against the immediate without having to materialize
+ /// the immediate into a register.
virtual bool isLegalICmpImmediate(int64_t Imm) const;
/// getPreIndexedAddressParts - returns true by value, base pointer and
@@ -232,7 +242,6 @@ namespace llvm {
/// being processed is 'm'.
virtual void LowerAsmOperandForConstraint(SDValue Op,
char ConstraintLetter,
- bool hasMemory,
std::vector<SDValue> &Ops,
SelectionDAG &DAG) const;
@@ -282,7 +291,8 @@ namespace llvm {
SDValue &Root, SelectionDAG &DAG,
DebugLoc dl) const;
- CCAssignFn *CCAssignFnForNode(CallingConv::ID CC, bool Return, bool isVarArg) const;
+ CCAssignFn *CCAssignFnForNode(CallingConv::ID CC, bool Return,
+ bool isVarArg) const;
SDValue LowerMemOpCallTo(SDValue Chain, SDValue StackPtr, SDValue Arg,
DebugLoc dl, SelectionDAG &DAG,
const CCValAssign &VA,
@@ -303,6 +313,7 @@ namespace llvm {
SDValue LowerBR_JT(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerBR_CC(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const;
@@ -327,18 +338,34 @@ namespace llvm {
CallingConv::ID CallConv, bool isVarArg,
bool &isTailCall,
const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
const SmallVectorImpl<ISD::InputArg> &Ins,
DebugLoc dl, SelectionDAG &DAG,
SmallVectorImpl<SDValue> &InVals) const;
+ /// IsEligibleForTailCallOptimization - Check whether the call is eligible
+ /// for tail call optimization. Targets which want to do tail call
+ /// optimization should implement this function.
+ bool IsEligibleForTailCallOptimization(SDValue Callee,
+ CallingConv::ID CalleeCC,
+ bool isVarArg,
+ bool isCalleeStructRet,
+ bool isCallerStructRet,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ SelectionDAG& DAG) const;
virtual SDValue
LowerReturn(SDValue Chain,
CallingConv::ID CallConv, bool isVarArg,
const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
DebugLoc dl, SelectionDAG &DAG) const;
SDValue getARMCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,
SDValue &ARMCC, SelectionDAG &DAG, DebugLoc dl) const;
+ SDValue getVFPCmp(SDValue &LHS, SDValue &RHS, ISD::CondCode CC,
+ SDValue &ARMCC, SelectionDAG &DAG, DebugLoc dl) const;
MachineBasicBlock *EmitAtomicCmpSwap(MachineInstr *MI,
MachineBasicBlock *BB,
diff --git a/lib/Target/ARM/ARMInstrFormats.td b/lib/Target/ARM/ARMInstrFormats.td
index d487df16df5e..ac568e75ccc4 100644
--- a/lib/Target/ARM/ARMInstrFormats.td
+++ b/lib/Target/ARM/ARMInstrFormats.td
@@ -50,27 +50,23 @@ def VFPLdStMulFrm : Format<22>;
def VFPMiscFrm : Format<23>;
def ThumbFrm : Format<24>;
-
-def NEONFrm : Format<25>;
-def NEONGetLnFrm : Format<26>;
-def NEONSetLnFrm : Format<27>;
-def NEONDupFrm : Format<28>;
-
-def MiscFrm : Format<29>;
-def ThumbMiscFrm : Format<30>;
-
-def NLdStFrm : Format<31>;
-def N1RegModImmFrm : Format<32>;
-def N2RegFrm : Format<33>;
-def NVCVTFrm : Format<34>;
-def NVDupLnFrm : Format<35>;
-def N2RegVShLFrm : Format<36>;
-def N2RegVShRFrm : Format<37>;
-def N3RegFrm : Format<38>;
-def N3RegVShFrm : Format<39>;
-def NVExtFrm : Format<40>;
-def NVMulSLFrm : Format<41>;
-def NVTBLFrm : Format<42>;
+def MiscFrm : Format<25>;
+
+def NGetLnFrm : Format<26>;
+def NSetLnFrm : Format<27>;
+def NDupFrm : Format<28>;
+def NLdStFrm : Format<29>;
+def N1RegModImmFrm: Format<30>;
+def N2RegFrm : Format<31>;
+def NVCVTFrm : Format<32>;
+def NVDupLnFrm : Format<33>;
+def N2RegVShLFrm : Format<34>;
+def N2RegVShRFrm : Format<35>;
+def N3RegFrm : Format<36>;
+def N3RegVShFrm : Format<37>;
+def NVExtFrm : Format<38>;
+def NVMulSLFrm : Format<39>;
+def NVTBLFrm : Format<40>;
// Misc flags.
@@ -1653,17 +1649,17 @@ class NVLaneOp<bits<8> opcod1, bits<4> opcod2, bits<2> opcod3,
class NVGetLane<bits<8> opcod1, bits<4> opcod2, bits<2> opcod3,
dag oops, dag iops, InstrItinClass itin,
string opc, string dt, string asm, list<dag> pattern>
- : NVLaneOp<opcod1, opcod2, opcod3, oops, iops, NEONGetLnFrm, itin,
+ : NVLaneOp<opcod1, opcod2, opcod3, oops, iops, NGetLnFrm, itin,
opc, dt, asm, pattern>;
class NVSetLane<bits<8> opcod1, bits<4> opcod2, bits<2> opcod3,
dag oops, dag iops, InstrItinClass itin,
string opc, string dt, string asm, list<dag> pattern>
- : NVLaneOp<opcod1, opcod2, opcod3, oops, iops, NEONSetLnFrm, itin,
+ : NVLaneOp<opcod1, opcod2, opcod3, oops, iops, NSetLnFrm, itin,
opc, dt, asm, pattern>;
class NVDup<bits<8> opcod1, bits<4> opcod2, bits<2> opcod3,
dag oops, dag iops, InstrItinClass itin,
string opc, string dt, string asm, list<dag> pattern>
- : NVLaneOp<opcod1, opcod2, opcod3, oops, iops, NEONDupFrm, itin,
+ : NVLaneOp<opcod1, opcod2, opcod3, oops, iops, NDupFrm, itin,
opc, dt, asm, pattern>;
// Vector Duplicate Lane (from scalar to all elements)
diff --git a/lib/Target/ARM/ARMInstrInfo.cpp b/lib/Target/ARM/ARMInstrInfo.cpp
index 85f6b4019a8e..ba228ffac8ed 100644
--- a/lib/Target/ARM/ARMInstrInfo.cpp
+++ b/lib/Target/ARM/ARMInstrInfo.cpp
@@ -63,7 +63,7 @@ unsigned ARMInstrInfo::getUnindexedOpcode(unsigned Opc) const {
void ARMInstrInfo::
reMaterialize(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
unsigned DestReg, unsigned SubIdx, const MachineInstr *Orig,
- const TargetRegisterInfo *TRI) const {
+ const TargetRegisterInfo &TRI) const {
DebugLoc dl = Orig->getDebugLoc();
unsigned Opcode = Orig->getOpcode();
switch (Opcode) {
diff --git a/lib/Target/ARM/ARMInstrInfo.h b/lib/Target/ARM/ARMInstrInfo.h
index d4199d1267fd..4563ffea7b9c 100644
--- a/lib/Target/ARM/ARMInstrInfo.h
+++ b/lib/Target/ARM/ARMInstrInfo.h
@@ -35,7 +35,7 @@ public:
void reMaterialize(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
unsigned DestReg, unsigned SubIdx,
const MachineInstr *Orig,
- const TargetRegisterInfo *TRI) const;
+ const TargetRegisterInfo &TRI) const;
/// getRegisterInfo - TargetInstrInfo is a superset of MRegister info. As
/// such, whenever a client has an instance of instruction info, it should
diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td
index f3156d94693b..c73e204a26b3 100644
--- a/lib/Target/ARM/ARMInstrInfo.td
+++ b/lib/Target/ARM/ARMInstrInfo.td
@@ -53,6 +53,8 @@ def SDT_ARMSYNCBARRIERV7 : SDTypeProfile<0, 0, []>;
def SDT_ARMMEMBARRIERV6 : SDTypeProfile<0, 1, [SDTCisInt<0>]>;
def SDT_ARMSYNCBARRIERV6 : SDTypeProfile<0, 1, [SDTCisInt<0>]>;
+def SDT_ARMTCRET : SDTypeProfile<0, 1, [SDTCisPtrTy<0>]>;
+
// Node definitions.
def ARMWrapper : SDNode<"ARMISD::Wrapper", SDTIntUnaryOp>;
def ARMWrapperJT : SDNode<"ARMISD::WrapperJT", SDTIntBinOp>;
@@ -117,6 +119,9 @@ def ARMSyncBarrierV6 : SDNode<"ARMISD::SYNCBARRIER", SDT_ARMMEMBARRIERV6,
def ARMrbit : SDNode<"ARMISD::RBIT", SDTIntUnaryOp>;
+def ARMtcret : SDNode<"ARMISD::TC_RETURN", SDT_ARMTCRET,
+ [SDNPHasChain, SDNPOptInFlag, SDNPVariadic]>;
+
//===----------------------------------------------------------------------===//
// ARM Instruction Predicate Definitions.
//
@@ -858,13 +863,13 @@ def LEApcrel : AXI1<0x0, (outs GPR:$dst), (ins i32imm:$label, pred:$p),
Pseudo, IIC_iALUi,
"adr$p\t$dst, #$label", []>;
+} // neverHasSideEffects
def LEApcrelJT : AXI1<0x0, (outs GPR:$dst),
(ins i32imm:$label, nohash_imm:$id, pred:$p),
Pseudo, IIC_iALUi,
"adr$p\t$dst, #${label}_${id}", []> {
let Inst{25} = 1;
}
-} // neverHasSideEffects
//===----------------------------------------------------------------------===//
// Control Flow Instructions.
@@ -1026,6 +1031,74 @@ let isCall = 1,
}
}
+// Tail calls.
+
+let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in {
+ // Darwin versions.
+ let Defs = [R0, R1, R2, R3, R9, R12,
+ D0, D1, D2, D3, D4, D5, D6, D7,
+ D16, D17, D18, D19, D20, D21, D22, D23, D24, D25, D26,
+ D27, D28, D29, D30, D31, PC],
+ Uses = [SP] in {
+ def TCRETURNdi : AInoP<(outs), (ins i32imm:$dst, variable_ops),
+ Pseudo, IIC_Br,
+ "@TC_RETURN","\t$dst", []>, Requires<[IsDarwin]>;
+
+ def TCRETURNri : AInoP<(outs), (ins tcGPR:$dst, variable_ops),
+ Pseudo, IIC_Br,
+ "@TC_RETURN","\t$dst", []>, Requires<[IsDarwin]>;
+
+ def TAILJMPd : ABXI<0b1010, (outs), (ins brtarget:$dst, variable_ops),
+ IIC_Br, "b\t$dst @ TAILCALL",
+ []>, Requires<[IsDarwin]>;
+
+ def TAILJMPdt: ABXI<0b1010, (outs), (ins brtarget:$dst, variable_ops),
+ IIC_Br, "b.w\t$dst @ TAILCALL",
+ []>, Requires<[IsDarwin]>;
+
+ def TAILJMPr : AXI<(outs), (ins tcGPR:$dst, variable_ops),
+ BrMiscFrm, IIC_Br, "bx\t$dst @ TAILCALL",
+ []>, Requires<[IsDarwin]> {
+ let Inst{7-4} = 0b0001;
+ let Inst{19-8} = 0b111111111111;
+ let Inst{27-20} = 0b00010010;
+ let Inst{31-28} = 0b1110;
+ }
+ }
+
+ // Non-Darwin versions (the difference is R9).
+ let Defs = [R0, R1, R2, R3, R12,
+ D0, D1, D2, D3, D4, D5, D6, D7,
+ D16, D17, D18, D19, D20, D21, D22, D23, D24, D25, D26,
+ D27, D28, D29, D30, D31, PC],
+ Uses = [SP] in {
+ def TCRETURNdiND : AInoP<(outs), (ins i32imm:$dst, variable_ops),
+ Pseudo, IIC_Br,
+ "@TC_RETURN","\t$dst", []>, Requires<[IsNotDarwin]>;
+
+ def TCRETURNriND : AInoP<(outs), (ins tcGPR:$dst, variable_ops),
+ Pseudo, IIC_Br,
+ "@TC_RETURN","\t$dst", []>, Requires<[IsNotDarwin]>;
+
+ def TAILJMPdND : ABXI<0b1010, (outs), (ins brtarget:$dst, variable_ops),
+ IIC_Br, "b\t$dst @ TAILCALL",
+ []>, Requires<[IsARM, IsNotDarwin]>;
+
+ def TAILJMPdNDt : ABXI<0b1010, (outs), (ins brtarget:$dst, variable_ops),
+ IIC_Br, "b.w\t$dst @ TAILCALL",
+ []>, Requires<[IsThumb, IsNotDarwin]>;
+
+ def TAILJMPrND : AXI<(outs), (ins tcGPR:$dst, variable_ops),
+ BrMiscFrm, IIC_Br, "bx\t$dst @ TAILCALL",
+ []>, Requires<[IsNotDarwin]> {
+ let Inst{7-4} = 0b0001;
+ let Inst{19-8} = 0b111111111111;
+ let Inst{27-20} = 0b00010010;
+ let Inst{31-28} = 0b1110;
+ }
+ }
+}
+
let isBranch = 1, isTerminator = 1 in {
// B is "predicable" since it can be xformed into a Bcc.
let isBarrier = 1 in {
@@ -1397,6 +1470,14 @@ def MOVr : AsI1<0b1101, (outs GPR:$dst), (ins GPR:$src), DPFrm, IIC_iMOVr,
let Inst{25} = 0;
}
+// A version for the smaller set of tail call registers.
+let neverHasSideEffects = 1 in
+def MOVr_TC : AsI1<0b1101, (outs tcGPR:$dst), (ins tcGPR:$src), DPFrm,
+ IIC_iMOVr, "mov", "\t$dst, $src", []>, UnaryDP {
+ let Inst{11-4} = 0b00000000;
+ let Inst{25} = 0;
+}
+
def MOVs : AsI1<0b1101, (outs GPR:$dst), (ins so_reg:$src),
DPSoRegFrm, IIC_iMOVsr,
"mov", "\t$dst, $src", [(set GPR:$dst, so_reg:$src)]>, UnaryDP {
@@ -2530,31 +2611,30 @@ let Defs =
[ R0, R1, R2, R3, R4, R5, R6, R7, R8, R9, R10, R11, R12, LR, D0,
D1, D2, D3, D4, D5, D6, D7, D8, D9, D10, D11, D12, D13, D14, D15,
D16, D17, D18, D19, D20, D21, D22, D23, D24, D25, D26, D27, D28, D29, D30,
- D31 ] in {
+ D31 ], hasSideEffects = 1, isBarrier = 1 in {
def Int_eh_sjlj_setjmp : XI<(outs), (ins GPR:$src, GPR:$val),
AddrModeNone, SizeSpecial, IndexModeNone,
Pseudo, NoItinerary,
- "str\tsp, [$src, #+8] ${:comment} eh_setjmp begin\n\t"
- "add\t$val, pc, #8\n\t"
- "str\t$val, [$src, #+4]\n\t"
- "mov\tr0, #0\n\t"
- "add\tpc, pc, #0\n\t"
- "mov\tr0, #1 ${:comment} eh_setjmp end", "",
+ "add\t$val, pc, #8\t${:comment} eh_setjmp begin\n\t"
+ "str\t$val, [$src, #+4]\n\t"
+ "mov\tr0, #0\n\t"
+ "add\tpc, pc, #0\n\t"
+ "mov\tr0, #1 ${:comment} eh_setjmp end", "",
[(set R0, (ARMeh_sjlj_setjmp GPR:$src, GPR:$val))]>,
Requires<[IsARM, HasVFP2]>;
}
let Defs =
- [ R0, R1, R2, R3, R4, R5, R6, R7, R8, R9, R10, R11, R12, LR ] in {
+ [ R0, R1, R2, R3, R4, R5, R6, R7, R8, R9, R10, R11, R12, LR ],
+ hasSideEffects = 1, isBarrier = 1 in {
def Int_eh_sjlj_setjmp_nofp : XI<(outs), (ins GPR:$src, GPR:$val),
AddrModeNone, SizeSpecial, IndexModeNone,
Pseudo, NoItinerary,
- "str\tsp, [$src, #+8] ${:comment} eh_setjmp begin\n\t"
- "add\t$val, pc, #8\n\t"
- "str\t$val, [$src, #+4]\n\t"
- "mov\tr0, #0\n\t"
- "add\tpc, pc, #0\n\t"
- "mov\tr0, #1 ${:comment} eh_setjmp end", "",
+ "add\t$val, pc, #8\n ${:comment} eh_setjmp begin\n\t"
+ "str\t$val, [$src, #+4]\n\t"
+ "mov\tr0, #0\n\t"
+ "add\tpc, pc, #0\n\t"
+ "mov\tr0, #1 ${:comment} eh_setjmp end", "",
[(set R0, (ARMeh_sjlj_setjmp GPR:$src, GPR:$val))]>,
Requires<[IsARM, NoVFP]>;
}
@@ -2621,6 +2701,24 @@ def : ARMPat<(ARMWrapperJT tjumptable:$dst, imm:$id),
// TODO: add,sub,and, 3-instr forms?
+// Tail calls
+def : ARMPat<(ARMtcret tcGPR:$dst),
+ (TCRETURNri tcGPR:$dst)>, Requires<[IsDarwin]>;
+
+def : ARMPat<(ARMtcret (i32 tglobaladdr:$dst)),
+ (TCRETURNdi texternalsym:$dst)>, Requires<[IsDarwin]>;
+
+def : ARMPat<(ARMtcret (i32 texternalsym:$dst)),
+ (TCRETURNdi texternalsym:$dst)>, Requires<[IsDarwin]>;
+
+def : ARMPat<(ARMtcret tcGPR:$dst),
+ (TCRETURNriND tcGPR:$dst)>, Requires<[IsNotDarwin]>;
+
+def : ARMPat<(ARMtcret (i32 tglobaladdr:$dst)),
+ (TCRETURNdiND texternalsym:$dst)>, Requires<[IsNotDarwin]>;
+
+def : ARMPat<(ARMtcret (i32 texternalsym:$dst)),
+ (TCRETURNdiND texternalsym:$dst)>, Requires<[IsNotDarwin]>;
// Direct calls
def : ARMPat<(ARMcall texternalsym:$func), (BL texternalsym:$func)>,
diff --git a/lib/Target/ARM/ARMInstrNEON.td b/lib/Target/ARM/ARMInstrNEON.td
index 197ec16eedea..a84315f73038 100644
--- a/lib/Target/ARM/ARMInstrNEON.td
+++ b/lib/Target/ARM/ARMInstrNEON.td
@@ -98,17 +98,8 @@ def NEONfmin : SDNode<"ARMISD::FMIN", SDTARMFMAX>;
// NEON operand definitions
//===----------------------------------------------------------------------===//
-def h8imm : Operand<i8> {
- let PrintMethod = "printHex8ImmOperand";
-}
-def h16imm : Operand<i16> {
- let PrintMethod = "printHex16ImmOperand";
-}
-def h32imm : Operand<i32> {
- let PrintMethod = "printHex32ImmOperand";
-}
-def h64imm : Operand<i64> {
- let PrintMethod = "printHex64ImmOperand";
+def nModImm : Operand<i32> {
+ let PrintMethod = "printNEONModImmOperand";
}
//===----------------------------------------------------------------------===//
@@ -812,11 +803,6 @@ def DSubReg_f64_reg : SDNodeXForm<imm, [{
assert(ARM::dsub_7 == ARM::dsub_0+7 && "Unexpected subreg numbering");
return CurDAG->getTargetConstant(ARM::dsub_0 + N->getZExtValue(), MVT::i32);
}]>;
-def DSubReg_f64_other_reg : SDNodeXForm<imm, [{
- assert(ARM::dsub_7 == ARM::dsub_0+7 && "Unexpected subreg numbering");
- return CurDAG->getTargetConstant(ARM::dsub_0 + (1 - N->getZExtValue()),
- MVT::i32);
-}]>;
// Extract S sub-registers of Q/D registers.
def SSubReg_f32_reg : SDNodeXForm<imm, [{
@@ -2282,7 +2268,7 @@ def VCEQfq : N3VQ<0,0,0b00,0b1110,0, IIC_VBINQ, "vceq", "f32", v4i32, v4f32,
NEONvceq, 1>;
// For disassembly only.
defm VCEQz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00010, 0, "vceq", "i",
- "$dst, $src, #0">;
+ "$dst, $src, #0">;
// VCGE : Vector Compare Greater Than or Equal
defm VCGEs : N3V_QHS<0, 0, 0b0011, 1, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q,
@@ -2834,73 +2820,70 @@ def VMOVQQQQ : PseudoInst<(outs QQQQPR:$dst), (ins QQQQPR:$src),
// VMOV_get_imm8 xform function: convert build_vector to VMOV.i8 imm.
def VMOV_get_imm8 : SDNodeXForm<build_vector, [{
- return ARM::getVMOVImm(N, 1, *CurDAG);
+ return ARM::getNEONModImm(N, 1, true, *CurDAG);
}]>;
def vmovImm8 : PatLeaf<(build_vector), [{
- return ARM::getVMOVImm(N, 1, *CurDAG).getNode() != 0;
+ return ARM::getNEONModImm(N, 1, true, *CurDAG).getNode() != 0;
}], VMOV_get_imm8>;
// VMOV_get_imm16 xform function: convert build_vector to VMOV.i16 imm.
def VMOV_get_imm16 : SDNodeXForm<build_vector, [{
- return ARM::getVMOVImm(N, 2, *CurDAG);
+ return ARM::getNEONModImm(N, 2, true, *CurDAG);
}]>;
def vmovImm16 : PatLeaf<(build_vector), [{
- return ARM::getVMOVImm(N, 2, *CurDAG).getNode() != 0;
+ return ARM::getNEONModImm(N, 2, true, *CurDAG).getNode() != 0;
}], VMOV_get_imm16>;
// VMOV_get_imm32 xform function: convert build_vector to VMOV.i32 imm.
def VMOV_get_imm32 : SDNodeXForm<build_vector, [{
- return ARM::getVMOVImm(N, 4, *CurDAG);
+ return ARM::getNEONModImm(N, 4, true, *CurDAG);
}]>;
def vmovImm32 : PatLeaf<(build_vector), [{
- return ARM::getVMOVImm(N, 4, *CurDAG).getNode() != 0;
+ return ARM::getNEONModImm(N, 4, true, *CurDAG).getNode() != 0;
}], VMOV_get_imm32>;
// VMOV_get_imm64 xform function: convert build_vector to VMOV.i64 imm.
def VMOV_get_imm64 : SDNodeXForm<build_vector, [{
- return ARM::getVMOVImm(N, 8, *CurDAG);
+ return ARM::getNEONModImm(N, 8, true, *CurDAG);
}]>;
def vmovImm64 : PatLeaf<(build_vector), [{
- return ARM::getVMOVImm(N, 8, *CurDAG).getNode() != 0;
+ return ARM::getNEONModImm(N, 8, true, *CurDAG).getNode() != 0;
}], VMOV_get_imm64>;
-// Note: Some of the cmode bits in the following VMOV instructions need to
-// be encoded based on the immed values.
-
let isReMaterializable = 1 in {
def VMOVv8i8 : N1ModImm<1, 0b000, 0b1110, 0, 0, 0, 1, (outs DPR:$dst),
- (ins h8imm:$SIMM), IIC_VMOVImm,
+ (ins nModImm:$SIMM), IIC_VMOVImm,
"vmov", "i8", "$dst, $SIMM", "",
[(set DPR:$dst, (v8i8 vmovImm8:$SIMM))]>;
def VMOVv16i8 : N1ModImm<1, 0b000, 0b1110, 0, 1, 0, 1, (outs QPR:$dst),
- (ins h8imm:$SIMM), IIC_VMOVImm,
+ (ins nModImm:$SIMM), IIC_VMOVImm,
"vmov", "i8", "$dst, $SIMM", "",
[(set QPR:$dst, (v16i8 vmovImm8:$SIMM))]>;
-def VMOVv4i16 : N1ModImm<1, 0b000, {1,0,?,?}, 0, 0, {?}, 1, (outs DPR:$dst),
- (ins h16imm:$SIMM), IIC_VMOVImm,
+def VMOVv4i16 : N1ModImm<1, 0b000, {1,0,?,0}, 0, 0, 0, 1, (outs DPR:$dst),
+ (ins nModImm:$SIMM), IIC_VMOVImm,
"vmov", "i16", "$dst, $SIMM", "",
[(set DPR:$dst, (v4i16 vmovImm16:$SIMM))]>;
-def VMOVv8i16 : N1ModImm<1, 0b000, {1,0,?,?}, 0, 1, {?}, 1, (outs QPR:$dst),
- (ins h16imm:$SIMM), IIC_VMOVImm,
+def VMOVv8i16 : N1ModImm<1, 0b000, {1,0,?,0}, 0, 1, 0, 1, (outs QPR:$dst),
+ (ins nModImm:$SIMM), IIC_VMOVImm,
"vmov", "i16", "$dst, $SIMM", "",
[(set QPR:$dst, (v8i16 vmovImm16:$SIMM))]>;
-def VMOVv2i32 : N1ModImm<1, 0b000, {?,?,?,?}, 0, 0, {?}, 1, (outs DPR:$dst),
- (ins h32imm:$SIMM), IIC_VMOVImm,
+def VMOVv2i32 : N1ModImm<1, 0b000, {0,?,?,0}, 0, 0, 0, 1, (outs DPR:$dst),
+ (ins nModImm:$SIMM), IIC_VMOVImm,
"vmov", "i32", "$dst, $SIMM", "",
[(set DPR:$dst, (v2i32 vmovImm32:$SIMM))]>;
-def VMOVv4i32 : N1ModImm<1, 0b000, {?,?,?,?}, 0, 1, {?}, 1, (outs QPR:$dst),
- (ins h32imm:$SIMM), IIC_VMOVImm,
+def VMOVv4i32 : N1ModImm<1, 0b000, {0,?,?,0}, 0, 1, 0, 1, (outs QPR:$dst),
+ (ins nModImm:$SIMM), IIC_VMOVImm,
"vmov", "i32", "$dst, $SIMM", "",
[(set QPR:$dst, (v4i32 vmovImm32:$SIMM))]>;
def VMOVv1i64 : N1ModImm<1, 0b000, 0b1110, 0, 0, 1, 1, (outs DPR:$dst),
- (ins h64imm:$SIMM), IIC_VMOVImm,
+ (ins nModImm:$SIMM), IIC_VMOVImm,
"vmov", "i64", "$dst, $SIMM", "",
[(set DPR:$dst, (v1i64 vmovImm64:$SIMM))]>;
def VMOVv2i64 : N1ModImm<1, 0b000, 0b1110, 0, 1, 1, 1, (outs QPR:$dst),
- (ins h64imm:$SIMM), IIC_VMOVImm,
+ (ins nModImm:$SIMM), IIC_VMOVImm,
"vmov", "i64", "$dst, $SIMM", "",
[(set QPR:$dst, (v2i64 vmovImm64:$SIMM))]>;
} // isReMaterializable
@@ -3122,17 +3105,6 @@ def VDUPfqf : N2V<0b11, 0b11, {?,1}, {0,0}, 0b11000, 1, 0,
IIC_VMOVD, "vdup", "32", "$dst, ${src:lane}", "",
[(set QPR:$dst, (v4f32 (NEONvdup (f32 SPR:$src))))]>;
-def : Pat<(v2i64 (NEONvduplane (v2i64 QPR:$src), imm:$lane)),
- (INSERT_SUBREG QPR:$src,
- (i64 (EXTRACT_SUBREG QPR:$src,
- (DSubReg_f64_reg imm:$lane))),
- (DSubReg_f64_other_reg imm:$lane))>;
-def : Pat<(v2f64 (NEONvduplane (v2f64 QPR:$src), imm:$lane)),
- (INSERT_SUBREG QPR:$src,
- (f64 (EXTRACT_SUBREG QPR:$src,
- (DSubReg_f64_reg imm:$lane))),
- (DSubReg_f64_other_reg imm:$lane))>;
-
// VMOVN : Vector Narrowing Move
defm VMOVN : N2VNInt_HSD<0b11,0b11,0b10,0b00100,0,0, IIC_VMOVD,
"vmovn", "i", int_arm_neon_vmovn>;
@@ -3319,22 +3291,16 @@ let hasExtraSrcRegAllocReq = 1 in {
def VTBL2
: N3V<1,1,0b11,0b1001,0,0, (outs DPR:$dst),
(ins DPR:$tbl1, DPR:$tbl2, DPR:$src), NVTBLFrm, IIC_VTB2,
- "vtbl", "8", "$dst, \\{$tbl1, $tbl2\\}, $src", "",
- [(set DPR:$dst, (v8i8 (int_arm_neon_vtbl2
- DPR:$tbl1, DPR:$tbl2, DPR:$src)))]>;
+ "vtbl", "8", "$dst, \\{$tbl1, $tbl2\\}, $src", "", []>;
def VTBL3
: N3V<1,1,0b11,0b1010,0,0, (outs DPR:$dst),
(ins DPR:$tbl1, DPR:$tbl2, DPR:$tbl3, DPR:$src), NVTBLFrm, IIC_VTB3,
- "vtbl", "8", "$dst, \\{$tbl1, $tbl2, $tbl3\\}, $src", "",
- [(set DPR:$dst, (v8i8 (int_arm_neon_vtbl3
- DPR:$tbl1, DPR:$tbl2, DPR:$tbl3, DPR:$src)))]>;
+ "vtbl", "8", "$dst, \\{$tbl1, $tbl2, $tbl3\\}, $src", "", []>;
def VTBL4
: N3V<1,1,0b11,0b1011,0,0, (outs DPR:$dst),
(ins DPR:$tbl1, DPR:$tbl2, DPR:$tbl3, DPR:$tbl4, DPR:$src),
NVTBLFrm, IIC_VTB4,
- "vtbl", "8", "$dst, \\{$tbl1, $tbl2, $tbl3, $tbl4\\}, $src", "",
- [(set DPR:$dst, (v8i8 (int_arm_neon_vtbl4 DPR:$tbl1, DPR:$tbl2,
- DPR:$tbl3, DPR:$tbl4, DPR:$src)))]>;
+ "vtbl", "8", "$dst, \\{$tbl1, $tbl2, $tbl3, $tbl4\\}, $src", "", []>;
} // hasExtraSrcRegAllocReq = 1
// VTBX : Vector Table Extension
@@ -3348,23 +3314,18 @@ let hasExtraSrcRegAllocReq = 1 in {
def VTBX2
: N3V<1,1,0b11,0b1001,1,0, (outs DPR:$dst),
(ins DPR:$orig, DPR:$tbl1, DPR:$tbl2, DPR:$src), NVTBLFrm, IIC_VTBX2,
- "vtbx", "8", "$dst, \\{$tbl1, $tbl2\\}, $src", "$orig = $dst",
- [(set DPR:$dst, (v8i8 (int_arm_neon_vtbx2
- DPR:$orig, DPR:$tbl1, DPR:$tbl2, DPR:$src)))]>;
+ "vtbx", "8", "$dst, \\{$tbl1, $tbl2\\}, $src", "$orig = $dst", []>;
def VTBX3
: N3V<1,1,0b11,0b1010,1,0, (outs DPR:$dst),
(ins DPR:$orig, DPR:$tbl1, DPR:$tbl2, DPR:$tbl3, DPR:$src),
NVTBLFrm, IIC_VTBX3,
- "vtbx", "8", "$dst, \\{$tbl1, $tbl2, $tbl3\\}, $src", "$orig = $dst",
- [(set DPR:$dst, (v8i8 (int_arm_neon_vtbx3 DPR:$orig, DPR:$tbl1,
- DPR:$tbl2, DPR:$tbl3, DPR:$src)))]>;
+ "vtbx", "8", "$dst, \\{$tbl1, $tbl2, $tbl3\\}, $src",
+ "$orig = $dst", []>;
def VTBX4
: N3V<1,1,0b11,0b1011,1,0, (outs DPR:$dst), (ins DPR:$orig, DPR:$tbl1,
DPR:$tbl2, DPR:$tbl3, DPR:$tbl4, DPR:$src), NVTBLFrm, IIC_VTBX4,
"vtbx", "8", "$dst, \\{$tbl1, $tbl2, $tbl3, $tbl4\\}, $src",
- "$orig = $dst",
- [(set DPR:$dst, (v8i8 (int_arm_neon_vtbx4 DPR:$orig, DPR:$tbl1,
- DPR:$tbl2, DPR:$tbl3, DPR:$tbl4, DPR:$src)))]>;
+ "$orig = $dst", []>;
} // hasExtraSrcRegAllocReq = 1
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/ARM/ARMInstrThumb.td b/lib/Target/ARM/ARMInstrThumb.td
index 40f924b679fa..bc0790dccbb5 100644
--- a/lib/Target/ARM/ARMInstrThumb.td
+++ b/lib/Target/ARM/ARMInstrThumb.td
@@ -894,11 +894,11 @@ def tLEApcrel : T1I<(outs tGPR:$dst), (ins i32imm:$label, pred:$p), IIC_iALUi,
"adr$p\t$dst, #$label", []>,
T1Encoding<{1,0,1,0,0,?}>; // A6.2 & A8.6.10
+} // neverHasSideEffects
def tLEApcrelJT : T1I<(outs tGPR:$dst),
(ins i32imm:$label, nohash_imm:$id, pred:$p),
IIC_iALUi, "adr$p\t$dst, #${label}_${id}", []>,
T1Encoding<{1,0,1,0,0,?}>; // A6.2 & A8.6.10
-} // neverHasSideEffects
//===----------------------------------------------------------------------===//
// TLS Instructions
@@ -923,18 +923,18 @@ let isCall = 1,
// except for our own input by listing the relevant registers in Defs. By
// doing so, we also cause the prologue/epilogue code to actively preserve
// all of the callee-saved resgisters, which is exactly what we want.
-// The current SP is passed in $val, and we reuse the reg as a scratch.
+// $val is a scratch register for our use.
let Defs =
- [ R0, R1, R2, R3, R4, R5, R6, R7, R12 ] in {
+ [ R0, R1, R2, R3, R4, R5, R6, R7, R12 ], hasSideEffects = 1,
+ isBarrier = 1 in {
def tInt_eh_sjlj_setjmp : ThumbXI<(outs),(ins tGPR:$src, tGPR:$val),
AddrModeNone, SizeSpecial, NoItinerary,
- "str\t$val, [$src, #8]\t${:comment} begin eh.setjmp\n"
- "\tmov\t$val, pc\n"
- "\tadds\t$val, #7\n"
- "\tstr\t$val, [$src, #4]\n"
- "\tmovs\tr0, #0\n"
- "\tb\t1f\n"
- "\tmovs\tr0, #1\t${:comment} end eh.setjmp\n"
+ "mov\t$val, pc\t${:comment} begin eh.setjmp\n\t"
+ "adds\t$val, #7\n\t"
+ "str\t$val, [$src, #4]\n\t"
+ "movs\tr0, #0\n\t"
+ "b\t1f\n\t"
+ "movs\tr0, #1\t${:comment} end eh.setjmp\n\t"
"1:", "",
[(set R0, (ARMeh_sjlj_setjmp tGPR:$src, tGPR:$val))]>;
}
@@ -1037,7 +1037,8 @@ def : T1Pat<(i32 imm0_255_comp:$src),
// scheduling.
let isReMaterializable = 1 in
def tLDRpci_pic : PseudoInst<(outs GPR:$dst), (ins i32imm:$addr, pclabel:$cp),
- NoItinerary, "${:comment} ldr.n\t$dst, $addr\n$cp:\n\tadd\t$dst, pc",
+ NoItinerary,
+ "${:comment} ldr.n\t$dst, $addr\n$cp:\n\tadd\t$dst, pc",
[(set GPR:$dst, (ARMpic_add (load (ARMWrapper tconstpool:$addr)),
imm:$cp))]>,
Requires<[IsThumb1Only]>;
diff --git a/lib/Target/ARM/ARMInstrThumb2.td b/lib/Target/ARM/ARMInstrThumb2.td
index b91c089fa5db..4692f2a42133 100644
--- a/lib/Target/ARM/ARMInstrThumb2.td
+++ b/lib/Target/ARM/ARMInstrThumb2.td
@@ -637,8 +637,7 @@ multiclass T2I_st<bits<2> opcod, string opc, PatFrag opnode> {
multiclass T2I_unary_rrot<bits<3> opcod, string opc, PatFrag opnode> {
def r : T2I<(outs GPR:$dst), (ins GPR:$src), IIC_iUNAr,
opc, ".w\t$dst, $src",
- [(set GPR:$dst, (opnode GPR:$src))]>,
- Requires<[HasT2ExtractPack]> {
+ [(set GPR:$dst, (opnode GPR:$src))]> {
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0100;
let Inst{22-20} = opcod;
@@ -649,8 +648,7 @@ multiclass T2I_unary_rrot<bits<3> opcod, string opc, PatFrag opnode> {
}
def r_rot : T2I<(outs GPR:$dst), (ins GPR:$src, i32imm:$rot), IIC_iUNAsi,
opc, ".w\t$dst, $src, ror $rot",
- [(set GPR:$dst, (opnode (rotr GPR:$src, rot_imm:$rot)))]>,
- Requires<[HasT2ExtractPack]> {
+ [(set GPR:$dst, (opnode (rotr GPR:$src, rot_imm:$rot)))]> {
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0100;
let Inst{22-20} = opcod;
@@ -661,8 +659,8 @@ multiclass T2I_unary_rrot<bits<3> opcod, string opc, PatFrag opnode> {
}
}
-// SXTB16 and UXTB16 do not need the .w qualifier.
-multiclass T2I_unary_rrot_nw<bits<3> opcod, string opc, PatFrag opnode> {
+// UXTB16 - Requres T2ExtractPack, does not need the .w qualifier.
+multiclass T2I_unary_rrot_uxtb16<bits<3> opcod, string opc, PatFrag opnode> {
def r : T2I<(outs GPR:$dst), (ins GPR:$src), IIC_iUNAr,
opc, "\t$dst, $src",
[(set GPR:$dst, (opnode GPR:$src))]>,
@@ -689,9 +687,9 @@ multiclass T2I_unary_rrot_nw<bits<3> opcod, string opc, PatFrag opnode> {
}
}
-// DO variant - disassembly only, no pattern
-
-multiclass T2I_unary_rrot_DO<bits<3> opcod, string opc> {
+// SXTB16 - Requres T2ExtractPack, does not need the .w qualifier, no pattern
+// supported yet.
+multiclass T2I_unary_rrot_sxtb16<bits<3> opcod, string opc> {
def r : T2I<(outs GPR:$dst), (ins GPR:$src), IIC_iUNAr,
opc, "\t$dst, $src", []> {
let Inst{31-27} = 0b11111;
@@ -787,6 +785,7 @@ def t2LEApcrel : T2XI<(outs GPR:$dst), (ins i32imm:$label, pred:$p), IIC_iALUi,
let Inst{19-16} = 0b1111; // Rn
let Inst{15} = 0;
}
+} // neverHasSideEffects
def t2LEApcrelJT : T2XI<(outs GPR:$dst),
(ins i32imm:$label, nohash_imm:$id, pred:$p), IIC_iALUi,
"adr$p.w\t$dst, #${label}_${id}", []> {
@@ -798,7 +797,6 @@ def t2LEApcrelJT : T2XI<(outs GPR:$dst),
let Inst{19-16} = 0b1111; // Rn
let Inst{15} = 0;
}
-} // neverHasSideEffects
// ADD r, sp, {so_imm|i12}
def t2ADDrSPi : T2sI<(outs GPR:$dst), (ins GPR:$sp, t2_so_imm:$imm),
@@ -1330,7 +1328,7 @@ defm t2SXTB : T2I_unary_rrot<0b100, "sxtb",
UnOpFrag<(sext_inreg node:$Src, i8)>>;
defm t2SXTH : T2I_unary_rrot<0b000, "sxth",
UnOpFrag<(sext_inreg node:$Src, i16)>>;
-defm t2SXTB16 : T2I_unary_rrot_DO<0b010, "sxtb16">;
+defm t2SXTB16 : T2I_unary_rrot_sxtb16<0b010, "sxtb16">;
defm t2SXTAB : T2I_bin_rrot<0b100, "sxtab",
BinOpFrag<(add node:$LHS, (sext_inreg node:$RHS, i8))>>;
@@ -1347,13 +1345,13 @@ defm t2UXTB : T2I_unary_rrot<0b101, "uxtb",
UnOpFrag<(and node:$Src, 0x000000FF)>>;
defm t2UXTH : T2I_unary_rrot<0b001, "uxth",
UnOpFrag<(and node:$Src, 0x0000FFFF)>>;
-defm t2UXTB16 : T2I_unary_rrot_nw<0b011, "uxtb16",
+defm t2UXTB16 : T2I_unary_rrot_uxtb16<0b011, "uxtb16",
UnOpFrag<(and node:$Src, 0x00FF00FF)>>;
def : T2Pat<(and (shl GPR:$Src, (i32 8)), 0xFF00FF),
- (t2UXTB16r_rot GPR:$Src, 24)>;
+ (t2UXTB16r_rot GPR:$Src, 24)>, Requires<[HasT2ExtractPack]>;
def : T2Pat<(and (srl GPR:$Src, (i32 8)), 0xFF00FF),
- (t2UXTB16r_rot GPR:$Src, 8)>;
+ (t2UXTB16r_rot GPR:$Src, 8)>, Requires<[HasT2ExtractPack]>;
defm t2UXTAB : T2I_bin_rrot<0b101, "uxtab",
BinOpFrag<(add node:$LHS, (and node:$RHS, 0x00FF))>>;
@@ -2389,37 +2387,36 @@ let isCall = 1,
// except for our own input by listing the relevant registers in Defs. By
// doing so, we also cause the prologue/epilogue code to actively preserve
// all of the callee-saved resgisters, which is exactly what we want.
-// The current SP is passed in $val, and we reuse the reg as a scratch.
+// $val is a scratch register for our use.
let Defs =
[ R0, R1, R2, R3, R4, R5, R6, R7, R8, R9, R10, R11, R12, LR, D0,
D1, D2, D3, D4, D5, D6, D7, D8, D9, D10, D11, D12, D13, D14, D15,
D16, D17, D18, D19, D20, D21, D22, D23, D24, D25, D26, D27, D28, D29, D30,
- D31 ] in {
+ D31 ], hasSideEffects = 1, isBarrier = 1 in {
def t2Int_eh_sjlj_setjmp : Thumb2XI<(outs), (ins GPR:$src, tGPR:$val),
AddrModeNone, SizeSpecial, NoItinerary,
- "str\t$val, [$src, #8]\t${:comment} begin eh.setjmp\n"
- "\tmov\t$val, pc\n"
- "\tadds\t$val, #7\n"
- "\tstr\t$val, [$src, #4]\n"
- "\tmovs\tr0, #0\n"
- "\tb\t1f\n"
- "\tmovs\tr0, #1\t${:comment} end eh.setjmp\n"
+ "mov\t$val, pc\t${:comment} begin eh.setjmp\n\t"
+ "adds\t$val, #7\n\t"
+ "str\t$val, [$src, #4]\n\t"
+ "movs\tr0, #0\n\t"
+ "b\t1f\n\t"
+ "movs\tr0, #1\t${:comment} end eh.setjmp\n\t"
"1:", "",
[(set R0, (ARMeh_sjlj_setjmp GPR:$src, tGPR:$val))]>,
Requires<[IsThumb2, HasVFP2]>;
}
let Defs =
- [ R0, R1, R2, R3, R4, R5, R6, R7, R8, R9, R10, R11, R12, LR ] in {
+ [ R0, R1, R2, R3, R4, R5, R6, R7, R8, R9, R10, R11, R12, LR ],
+ hasSideEffects = 1, isBarrier = 1 in {
def t2Int_eh_sjlj_setjmp_nofp : Thumb2XI<(outs), (ins GPR:$src, tGPR:$val),
AddrModeNone, SizeSpecial, NoItinerary,
- "str\t$val, [$src, #8]\t${:comment} begin eh.setjmp\n"
- "\tmov\t$val, pc\n"
- "\tadds\t$val, #7\n"
- "\tstr\t$val, [$src, #4]\n"
- "\tmovs\tr0, #0\n"
- "\tb\t1f\n"
- "\tmovs\tr0, #1\t${:comment} end eh.setjmp\n"
+ "mov\t$val, pc\t${:comment} begin eh.setjmp\n\t"
+ "adds\t$val, #7\n\t"
+ "str\t$val, [$src, #4]\n\t"
+ "movs\tr0, #0\n\t"
+ "b\t1f\n\t"
+ "movs\tr0, #1\t${:comment} end eh.setjmp\n\t"
"1:", "",
[(set R0, (ARMeh_sjlj_setjmp GPR:$src, tGPR:$val))]>,
Requires<[IsThumb2, NoVFP]>;
@@ -2529,6 +2526,7 @@ def t2Bcc : T2I<(outs), (ins brtarget:$target), IIC_Br,
// IT block
+let Defs = [ITSTATE] in
def t2IT : Thumb2XI<(outs), (ins it_pred:$cc, it_mask:$mask),
AddrModeNone, Size2Bytes, IIC_iALUx,
"it$mask\t$cc", "", []> {
@@ -2691,7 +2689,8 @@ def : T2Pat<(ARMWrapperJT tjumptable:$dst, imm:$id),
// scheduling.
let canFoldAsLoad = 1, isReMaterializable = 1 in
def t2LDRpci_pic : PseudoInst<(outs GPR:$dst), (ins i32imm:$addr, pclabel:$cp),
- NoItinerary, "${:comment} ldr.w\t$dst, $addr\n$cp:\n\tadd\t$dst, pc",
+ NoItinerary,
+ "${:comment} ldr.w\t$dst, $addr\n$cp:\n\tadd\t$dst, pc",
[(set GPR:$dst, (ARMpic_add (load (ARMWrapper tconstpool:$addr)),
imm:$cp))]>,
Requires<[IsThumb2]>;
diff --git a/lib/Target/ARM/ARMInstrVFP.td b/lib/Target/ARM/ARMInstrVFP.td
index 54474cfe2e29..84c23e1a784c 100644
--- a/lib/Target/ARM/ARMInstrVFP.td
+++ b/lib/Target/ARM/ARMInstrVFP.td
@@ -255,25 +255,25 @@ def VCVTSD : VFPAI<(outs SPR:$dst), (ins DPR:$a), VFPUnaryFrm,
// Between half-precision and single-precision. For disassembly only.
-def VCVTBSH : ASuI<0b11101, 0b11, 0b0010, 0b01, 0, (outs SPR:$dst), (ins SPR:$a),
+def VCVTBSH: ASuI<0b11101, 0b11, 0b0010, 0b01, 0, (outs SPR:$dst), (ins SPR:$a),
/* FIXME */ IIC_fpCVTSH, "vcvtb", ".f32.f16\t$dst, $a",
[/* For disassembly only; pattern left blank */]>;
def : ARMPat<(f32_to_f16 SPR:$a),
(i32 (COPY_TO_REGCLASS (VCVTBSH SPR:$a), GPR))>;
-def VCVTBHS : ASuI<0b11101, 0b11, 0b0011, 0b01, 0, (outs SPR:$dst), (ins SPR:$a),
+def VCVTBHS: ASuI<0b11101, 0b11, 0b0011, 0b01, 0, (outs SPR:$dst), (ins SPR:$a),
/* FIXME */ IIC_fpCVTHS, "vcvtb", ".f16.f32\t$dst, $a",
[/* For disassembly only; pattern left blank */]>;
def : ARMPat<(f16_to_f32 GPR:$a),
(VCVTBHS (COPY_TO_REGCLASS GPR:$a, SPR))>;
-def VCVTTSH : ASuI<0b11101, 0b11, 0b0010, 0b11, 0, (outs SPR:$dst), (ins SPR:$a),
+def VCVTTSH: ASuI<0b11101, 0b11, 0b0010, 0b11, 0, (outs SPR:$dst), (ins SPR:$a),
/* FIXME */ IIC_fpCVTSH, "vcvtt", ".f32.f16\t$dst, $a",
[/* For disassembly only; pattern left blank */]>;
-def VCVTTHS : ASuI<0b11101, 0b11, 0b0011, 0b11, 0, (outs SPR:$dst), (ins SPR:$a),
+def VCVTTHS: ASuI<0b11101, 0b11, 0b0011, 0b11, 0, (outs SPR:$dst), (ins SPR:$a),
/* FIXME */ IIC_fpCVTHS, "vcvtt", ".f16.f32\t$dst, $a",
[/* For disassembly only; pattern left blank */]>;
diff --git a/lib/Target/ARM/ARMJITInfo.h b/lib/Target/ARM/ARMJITInfo.h
index ff332b7ee15b..f5d9effeb9a5 100644
--- a/lib/Target/ARM/ARMJITInfo.h
+++ b/lib/Target/ARM/ARMJITInfo.h
@@ -143,7 +143,8 @@ namespace llvm {
JumpTableId2AddrMap[JTI] = Addr;
}
- /// getPCLabelAddr - Retrieve the address of the PC label of the specified id.
+ /// getPCLabelAddr - Retrieve the address of the PC label of the
+ /// specified id.
intptr_t getPCLabelAddr(unsigned Id) const {
DenseMap<unsigned, intptr_t>::const_iterator I = PCLabelMap.find(Id);
assert(I != PCLabelMap.end());
diff --git a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
index 8585c1e50105..f80e316d23e8 100644
--- a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
+++ b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
@@ -74,11 +74,14 @@ namespace {
private:
struct MemOpQueueEntry {
int Offset;
+ unsigned Reg;
+ bool isKill;
unsigned Position;
MachineBasicBlock::iterator MBBI;
bool Merged;
- MemOpQueueEntry(int o, int p, MachineBasicBlock::iterator i)
- : Offset(o), Position(p), MBBI(i), Merged(false) {}
+ MemOpQueueEntry(int o, unsigned r, bool k, unsigned p,
+ MachineBasicBlock::iterator i)
+ : Offset(o), Reg(r), isKill(k), Position(p), MBBI(i), Merged(false) {}
};
typedef SmallVector<MemOpQueueEntry,8> MemOpQueue;
typedef MemOpQueue::iterator MemOpQueueIter;
@@ -128,30 +131,30 @@ namespace {
static int getLoadStoreMultipleOpcode(int Opcode) {
switch (Opcode) {
case ARM::LDR:
- NumLDMGened++;
+ ++NumLDMGened;
return ARM::LDM;
case ARM::STR:
- NumSTMGened++;
+ ++NumSTMGened;
return ARM::STM;
case ARM::t2LDRi8:
case ARM::t2LDRi12:
- NumLDMGened++;
+ ++NumLDMGened;
return ARM::t2LDM;
case ARM::t2STRi8:
case ARM::t2STRi12:
- NumSTMGened++;
+ ++NumSTMGened;
return ARM::t2STM;
case ARM::VLDRS:
- NumVLDMGened++;
+ ++NumVLDMGened;
return ARM::VLDMS;
case ARM::VSTRS:
- NumVSTMGened++;
+ ++NumVSTMGened;
return ARM::VSTMS;
case ARM::VLDRD:
- NumVLDMGened++;
+ ++NumVLDMGened;
return ARM::VLDMD;
case ARM::VSTRD:
- NumVSTMGened++;
+ ++NumVSTMGened;
return ARM::VSTMD;
default: llvm_unreachable("Unhandled opcode!");
}
@@ -264,45 +267,59 @@ ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB,
// MergeOpsUpdate - call MergeOps and update MemOps and merges accordingly on
// success.
-void ARMLoadStoreOpt::
-MergeOpsUpdate(MachineBasicBlock &MBB,
- MemOpQueue &memOps,
- unsigned memOpsBegin,
- unsigned memOpsEnd,
- unsigned insertAfter,
- int Offset,
- unsigned Base,
- bool BaseKill,
- int Opcode,
- ARMCC::CondCodes Pred,
- unsigned PredReg,
- unsigned Scratch,
- DebugLoc dl,
- SmallVector<MachineBasicBlock::iterator, 4> &Merges) {
+void ARMLoadStoreOpt::MergeOpsUpdate(MachineBasicBlock &MBB,
+ MemOpQueue &memOps,
+ unsigned memOpsBegin, unsigned memOpsEnd,
+ unsigned insertAfter, int Offset,
+ unsigned Base, bool BaseKill,
+ int Opcode,
+ ARMCC::CondCodes Pred, unsigned PredReg,
+ unsigned Scratch,
+ DebugLoc dl,
+ SmallVector<MachineBasicBlock::iterator, 4> &Merges) {
// First calculate which of the registers should be killed by the merged
// instruction.
- SmallVector<std::pair<unsigned, bool>, 8> Regs;
const unsigned insertPos = memOps[insertAfter].Position;
+
+ SmallSet<unsigned, 4> UnavailRegs;
+ SmallSet<unsigned, 4> KilledRegs;
+ DenseMap<unsigned, unsigned> Killer;
+ for (unsigned i = 0; i < memOpsBegin; ++i) {
+ if (memOps[i].Position < insertPos && memOps[i].isKill) {
+ unsigned Reg = memOps[i].Reg;
+ if (memOps[i].Merged)
+ UnavailRegs.insert(Reg);
+ else {
+ KilledRegs.insert(Reg);
+ Killer[Reg] = i;
+ }
+ }
+ }
+ for (unsigned i = memOpsEnd, e = memOps.size(); i != e; ++i) {
+ if (memOps[i].Position < insertPos && memOps[i].isKill) {
+ unsigned Reg = memOps[i].Reg;
+ KilledRegs.insert(Reg);
+ Killer[Reg] = i;
+ }
+ }
+
+ SmallVector<std::pair<unsigned, bool>, 8> Regs;
for (unsigned i = memOpsBegin; i < memOpsEnd; ++i) {
- const MachineOperand &MO = memOps[i].MBBI->getOperand(0);
- unsigned Reg = MO.getReg();
- bool isKill = MO.isKill();
+ unsigned Reg = memOps[i].Reg;
+ if (UnavailRegs.count(Reg))
+ // Register is killed before and it's not easy / possible to update the
+ // kill marker on already merged instructions. Abort.
+ return;
// If we are inserting the merged operation after an unmerged operation that
// uses the same register, make sure to transfer any kill flag.
- for (unsigned j = memOpsEnd, e = memOps.size(); !isKill && j != e; ++j)
- if (memOps[j].Position<insertPos) {
- const MachineOperand &MOJ = memOps[j].MBBI->getOperand(0);
- if (MOJ.getReg() == Reg && MOJ.isKill())
- isKill = true;
- }
-
+ bool isKill = memOps[i].isKill || KilledRegs.count(Reg);
Regs.push_back(std::make_pair(Reg, isKill));
}
// Try to do the merge.
MachineBasicBlock::iterator Loc = memOps[insertAfter].MBBI;
- Loc++;
+ ++Loc;
if (!MergeOps(MBB, Loc, Offset, Base, BaseKill, Opcode,
Pred, PredReg, Scratch, dl, Regs))
return;
@@ -311,13 +328,13 @@ MergeOpsUpdate(MachineBasicBlock &MBB,
Merges.push_back(prior(Loc));
for (unsigned i = memOpsBegin; i < memOpsEnd; ++i) {
// Remove kill flags from any unmerged memops that come before insertPos.
- if (Regs[i-memOpsBegin].second)
- for (unsigned j = memOpsEnd, e = memOps.size(); j != e; ++j)
- if (memOps[j].Position<insertPos) {
- MachineOperand &MOJ = memOps[j].MBBI->getOperand(0);
- if (MOJ.getReg() == Regs[i-memOpsBegin].first && MOJ.isKill())
- MOJ.setIsKill(false);
- }
+ if (Regs[i-memOpsBegin].second) {
+ unsigned Reg = Regs[i-memOpsBegin].first;
+ if (KilledRegs.count(Reg)) {
+ unsigned j = Killer[Reg];
+ memOps[j].MBBI->getOperand(0).setIsKill(false);
+ }
+ }
MBB.erase(memOps[i].MBBI);
memOps[i].Merged = true;
}
@@ -517,8 +534,11 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB,
}
// Try merging with the previous instruction.
- if (MBBI != MBB.begin()) {
+ MachineBasicBlock::iterator BeginMBBI = MBB.begin();
+ if (MBBI != BeginMBBI) {
MachineBasicBlock::iterator PrevMBBI = prior(MBBI);
+ while (PrevMBBI != BeginMBBI && PrevMBBI->isDebugValue())
+ --PrevMBBI;
if (isAM4) {
if (Mode == ARM_AM::ia &&
isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) {
@@ -541,8 +561,11 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB,
}
// Try merging with the next instruction.
- if (!DoMerge && MBBI != MBB.end()) {
+ MachineBasicBlock::iterator EndMBBI = MBB.end();
+ if (!DoMerge && MBBI != EndMBBI) {
MachineBasicBlock::iterator NextMBBI = llvm::next(MBBI);
+ while (NextMBBI != EndMBBI && NextMBBI->isDebugValue())
+ ++NextMBBI;
if (isAM4) {
if ((Mode == ARM_AM::ia || Mode == ARM_AM::ib) &&
isMatchingIncrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) {
@@ -669,8 +692,11 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineBasicBlock &MBB,
unsigned Limit = isAM5 ? 0 : (isAM2 ? 0x1000 : 0x100);
// Try merging with the previous instruction.
- if (MBBI != MBB.begin()) {
+ MachineBasicBlock::iterator BeginMBBI = MBB.begin();
+ if (MBBI != BeginMBBI) {
MachineBasicBlock::iterator PrevMBBI = prior(MBBI);
+ while (PrevMBBI != BeginMBBI && PrevMBBI->isDebugValue())
+ --PrevMBBI;
if (isMatchingDecrement(PrevMBBI, Base, Bytes, Limit, Pred, PredReg)) {
DoMerge = true;
AddSub = ARM_AM::sub;
@@ -685,8 +711,11 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineBasicBlock &MBB,
}
// Try merging with the next instruction.
- if (!DoMerge && MBBI != MBB.end()) {
+ MachineBasicBlock::iterator EndMBBI = MBB.end();
+ if (!DoMerge && MBBI != EndMBBI) {
MachineBasicBlock::iterator NextMBBI = llvm::next(MBBI);
+ while (NextMBBI != EndMBBI && NextMBBI->isDebugValue())
+ ++NextMBBI;
if (!isAM5 &&
isMatchingDecrement(NextMBBI, Base, Bytes, Limit, Pred, PredReg)) {
DoMerge = true;
@@ -759,18 +788,21 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineBasicBlock &MBB,
/// isMemoryOp - Returns true if instruction is a memory operations (that this
/// pass is capable of operating on).
static bool isMemoryOp(const MachineInstr *MI) {
- if (MI->hasOneMemOperand()) {
- const MachineMemOperand *MMO = *MI->memoperands_begin();
+ // When no memory operands are present, conservatively assume unaligned,
+ // volatile, unfoldable.
+ if (!MI->hasOneMemOperand())
+ return false;
- // Don't touch volatile memory accesses - we may be changing their order.
- if (MMO->isVolatile())
- return false;
+ const MachineMemOperand *MMO = *MI->memoperands_begin();
- // Unaligned ldr/str is emulated by some kernels, but unaligned ldm/stm is
- // not.
- if (MMO->getAlignment() < 4)
- return false;
- }
+ // Don't touch volatile memory accesses - we may be changing their order.
+ if (MMO->isVolatile())
+ return false;
+
+ // Unaligned ldr/str is emulated by some kernels, but unaligned ldm/stm is
+ // not.
+ if (MMO->getAlignment() < 4)
+ return false;
// str <undef> could probably be eliminated entirely, but for now we just want
// to avoid making a mess of it.
@@ -898,6 +930,7 @@ bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB,
if ((EvenRegNum & 1) == 0 && (EvenRegNum + 1) == OddRegNum)
return false;
+ MachineBasicBlock::iterator NewBBI = MBBI;
bool isT2 = Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8;
bool isLd = Opcode == ARM::LDRD || Opcode == ARM::t2LDRDi8;
bool EvenDeadKill = isLd ?
@@ -942,6 +975,7 @@ bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB,
getKillRegState(OddDeadKill) | getUndefRegState(OddUndef));
++NumSTRD2STM;
}
+ NewBBI = llvm::prior(MBBI);
} else {
// Split into two instructions.
assert((!isT2 || !OffReg) &&
@@ -962,14 +996,15 @@ bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB,
OddReg, OddDeadKill, false,
BaseReg, false, BaseUndef, OffReg, false, OffUndef,
Pred, PredReg, TII, isT2);
+ NewBBI = llvm::prior(MBBI);
InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc,
EvenReg, EvenDeadKill, false,
BaseReg, BaseKill, BaseUndef, OffReg, OffKill, OffUndef,
Pred, PredReg, TII, isT2);
} else {
if (OddReg == EvenReg && EvenDeadKill) {
- // If the two source operands are the same, the kill marker is probably
- // on the first one. e.g.
+ // If the two source operands are the same, the kill marker is
+ // probably on the first one. e.g.
// t2STRDi8 %R5<kill>, %R5, %R9<kill>, 0, 14, %reg0
EvenDeadKill = false;
OddDeadKill = true;
@@ -978,6 +1013,7 @@ bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB,
EvenReg, EvenDeadKill, EvenUndef,
BaseReg, false, BaseUndef, OffReg, false, OffUndef,
Pred, PredReg, TII, isT2);
+ NewBBI = llvm::prior(MBBI);
InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc,
OddReg, OddDeadKill, OddUndef,
BaseReg, BaseKill, BaseUndef, OffReg, OffKill, OffUndef,
@@ -989,8 +1025,9 @@ bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB,
++NumSTRD2STR;
}
- MBBI = prior(MBBI);
MBB.erase(MI);
+ MBBI = NewBBI;
+ return true;
}
return false;
}
@@ -1023,6 +1060,9 @@ bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) {
if (isMemOp) {
int Opcode = MBBI->getOpcode();
unsigned Size = getLSMultipleTransferSize(MBBI);
+ const MachineOperand &MO = MBBI->getOperand(0);
+ unsigned Reg = MO.getReg();
+ bool isKill = MO.isDef() ? false : MO.isKill();
unsigned Base = MBBI->getOperand(1).getReg();
unsigned PredReg = 0;
ARMCC::CondCodes Pred = llvm::getInstrPredicate(MBBI, PredReg);
@@ -1044,8 +1084,8 @@ bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) {
CurrSize = Size;
CurrPred = Pred;
CurrPredReg = PredReg;
- MemOps.push_back(MemOpQueueEntry(Offset, Position, MBBI));
- NumMemOps++;
+ MemOps.push_back(MemOpQueueEntry(Offset, Reg, isKill, Position, MBBI));
+ ++NumMemOps;
Advance = true;
} else {
if (Clobber) {
@@ -1057,15 +1097,17 @@ bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) {
// No need to match PredReg.
// Continue adding to the queue.
if (Offset > MemOps.back().Offset) {
- MemOps.push_back(MemOpQueueEntry(Offset, Position, MBBI));
- NumMemOps++;
+ MemOps.push_back(MemOpQueueEntry(Offset, Reg, isKill,
+ Position, MBBI));
+ ++NumMemOps;
Advance = true;
} else {
for (MemOpQueueIter I = MemOps.begin(), E = MemOps.end();
I != E; ++I) {
if (Offset < I->Offset) {
- MemOps.insert(I, MemOpQueueEntry(Offset, Position, MBBI));
- NumMemOps++;
+ MemOps.insert(I, MemOpQueueEntry(Offset, Reg, isKill,
+ Position, MBBI));
+ ++NumMemOps;
Advance = true;
break;
} else if (Offset == I->Offset) {
@@ -1078,7 +1120,12 @@ bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) {
}
}
- if (Advance) {
+ if (MBBI->isDebugValue()) {
+ ++MBBI;
+ if (MBBI == E)
+ // Reach the end of the block, try merging the memory instructions.
+ TryMerge = true;
+ } else if (Advance) {
++Position;
++MBBI;
if (MBBI == E)
@@ -1279,7 +1326,7 @@ static bool IsSafeAndProfitableToMove(bool isLd, unsigned Base,
// some day.
SmallSet<unsigned, 4> AddedRegPressure;
while (++I != E) {
- if (MemOps.count(&*I))
+ if (I->isDebugValue() || MemOps.count(&*I))
continue;
const TargetInstrDesc &TID = I->getDesc();
if (TID.isCall() || TID.isTerminator() || TID.hasUnmodeledSideEffects())
@@ -1411,7 +1458,7 @@ bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB,
std::sort(Ops.begin(), Ops.end(), OffsetCompare());
// The loads / stores of the same base are in order. Scan them from first to
- // last and check for the followins:
+ // last and check for the following:
// 1. Any def of base.
// 2. Any gaps.
while (Ops.size() > 1) {
@@ -1474,7 +1521,8 @@ bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB,
} else {
// This is the new location for the loads / stores.
MachineBasicBlock::iterator InsertPos = isLd ? FirstOp : LastOp;
- while (InsertPos != MBB->end() && MemOps.count(InsertPos))
+ while (InsertPos != MBB->end()
+ && (MemOps.count(InsertPos) || InsertPos->isDebugValue()))
++InsertPos;
// If we are moving a pair of loads / stores, see if it makes sense
@@ -1562,7 +1610,9 @@ ARMPreAllocLoadStoreOpt::RescheduleLoadStoreInstrs(MachineBasicBlock *MBB) {
break;
}
- MI2LocMap[MI] = Loc++;
+ if (!MI->isDebugValue())
+ MI2LocMap[MI] = ++Loc;
+
if (!isMemoryOp(MI))
continue;
unsigned PredReg = 0;
diff --git a/lib/Target/ARM/ARMMachineFunctionInfo.h b/lib/Target/ARM/ARMMachineFunctionInfo.h
index 013427635592..7e57a1ca5576 100644
--- a/lib/Target/ARM/ARMMachineFunctionInfo.h
+++ b/lib/Target/ARM/ARMMachineFunctionInfo.h
@@ -88,6 +88,9 @@ class ARMFunctionInfo : public MachineFunctionInfo {
/// VarArgsFrameIndex - FrameIndex for start of varargs area.
int VarArgsFrameIndex;
+ /// HasITBlocks - True if IT blocks have been inserted.
+ bool HasITBlocks;
+
public:
ARMFunctionInfo() :
isThumb(false),
@@ -97,7 +100,8 @@ public:
FramePtrSpillOffset(0), GPRCS1Offset(0), GPRCS2Offset(0), DPRCSOffset(0),
GPRCS1Size(0), GPRCS2Size(0), DPRCSSize(0),
GPRCS1Frames(0), GPRCS2Frames(0), DPRCSFrames(0),
- JumpTableUId(0), ConstPoolEntryUId(0), VarArgsFrameIndex(0) {}
+ JumpTableUId(0), ConstPoolEntryUId(0), VarArgsFrameIndex(0),
+ HasITBlocks(false) {}
explicit ARMFunctionInfo(MachineFunction &MF) :
isThumb(MF.getTarget().getSubtarget<ARMSubtarget>().isThumb()),
@@ -108,7 +112,8 @@ public:
GPRCS1Size(0), GPRCS2Size(0), DPRCSSize(0),
GPRCS1Frames(32), GPRCS2Frames(32), DPRCSFrames(32),
SpilledCSRegs(MF.getTarget().getRegisterInfo()->getNumRegs()),
- JumpTableUId(0), ConstPoolEntryUId(0), VarArgsFrameIndex(0) {}
+ JumpTableUId(0), ConstPoolEntryUId(0), VarArgsFrameIndex(0),
+ HasITBlocks(false) {}
bool isThumbFunction() const { return isThumb; }
bool isThumb1OnlyFunction() const { return isThumb && !hasThumb2; }
@@ -229,6 +234,9 @@ public:
int getVarArgsFrameIndex() const { return VarArgsFrameIndex; }
void setVarArgsFrameIndex(int Index) { VarArgsFrameIndex = Index; }
+
+ bool hasITBlocks() const { return HasITBlocks; }
+ void setHasITBlocks(bool h) { HasITBlocks = h; }
};
} // End llvm namespace
diff --git a/lib/Target/ARM/ARMRegisterInfo.td b/lib/Target/ARM/ARMRegisterInfo.td
index 6beca8b9199b..d020f3c74bde 100644
--- a/lib/Target/ARM/ARMRegisterInfo.td
+++ b/lib/Target/ARM/ARMRegisterInfo.td
@@ -153,11 +153,11 @@ def Q15 : ARMReg<15, "q15", [D30, D31]>;
// Pseudo 256-bit registers to represent pairs of Q registers. These should
// never be present in the emitted code.
-// These are used for NEON load / store instructions, e.g. vld4, vst3.
-// NOTE: It's possible to define more QQ registers since technical the
-// starting D register number doesn't have to be multiple of 4. e.g.
-// D1, D2, D3, D4 would be a legal quad. But that would make the sub-register
-// stuffs very messy.
+// These are used for NEON load / store instructions, e.g., vld4, vst3.
+// NOTE: It's possible to define more QQ registers since technically the
+// starting D register number doesn't have to be multiple of 4, e.g.,
+// D1, D2, D3, D4 would be a legal quad, but that would make the subregister
+// stuff very messy.
let SubRegIndices = [qsub_0, qsub_1] in {
let CompositeIndices = [(dsub_2 qsub_1, dsub_0), (dsub_3 qsub_1, dsub_1),
(ssub_4 qsub_1, ssub_0), (ssub_5 qsub_1, ssub_1),
@@ -183,7 +183,8 @@ let CompositeIndices = [(qsub_2 qqsub_1, qsub_0), (qsub_3 qqsub_1, qsub_1),
(ssub_8 qqsub_1, ssub_0), (ssub_9 qqsub_1, ssub_1),
(ssub_10 qqsub_1, ssub_2), (ssub_11 qqsub_1, ssub_3),
(ssub_12 qqsub_1, ssub_4), (ssub_13 qqsub_1, ssub_5),
- (ssub_14 qqsub_1, ssub_6), (ssub_15 qqsub_1, ssub_7)] in {
+ (ssub_14 qqsub_1, ssub_6), (ssub_15 qqsub_1, ssub_7)] in
+{
def QQQQ0 : ARMReg<0, "qqqq0", [QQ0, QQ1]>;
def QQQQ1 : ARMReg<1, "qqqq1", [QQ2, QQ3]>;
}
@@ -196,9 +197,9 @@ def QQQQ3 : ARMReg<3, "qqqq3", [QQ6, QQ7]>;
}
// Current Program Status Register.
-def CPSR : ARMReg<0, "cpsr">;
-
-def FPSCR : ARMReg<1, "fpscr">;
+def CPSR : ARMReg<0, "cpsr">;
+def FPSCR : ARMReg<1, "fpscr">;
+def ITSTATE : ARMReg<2, "itstate">;
// Register classes.
//
@@ -348,6 +349,73 @@ def tGPR : RegisterClass<"ARM", [i32], 32, [R0, R1, R2, R3, R4, R5, R6, R7]> {
}];
}
+// For tail calls, we can't use callee-saved registers, as they are restored
+// to the saved value before the tail call, which would clobber a call address.
+// Note, getMinimalPhysRegClass(R0) returns tGPR because of the names of
+// this class and the preceding one(!) This is what we want.
+def tcGPR : RegisterClass<"ARM", [i32], 32, [R0, R1, R2, R3, R9, R12]> {
+ let MethodProtos = [{
+ iterator allocation_order_begin(const MachineFunction &MF) const;
+ iterator allocation_order_end(const MachineFunction &MF) const;
+ }];
+ let MethodBodies = [{
+ // R9 is available.
+ static const unsigned ARM_GPR_R9_TC[] = {
+ ARM::R0, ARM::R1, ARM::R2, ARM::R3,
+ ARM::R9, ARM::R12 };
+ // R9 is not available.
+ static const unsigned ARM_GPR_NOR9_TC[] = {
+ ARM::R0, ARM::R1, ARM::R2, ARM::R3,
+ ARM::R12 };
+
+ // For Thumb1 mode, we don't want to allocate hi regs at all, as we
+ // don't know how to spill them. If we make our prologue/epilogue code
+ // smarter at some point, we can go back to using the above allocation
+ // orders for the Thumb1 instructions that know how to use hi regs.
+ static const unsigned THUMB_GPR_AO_TC[] = {
+ ARM::R0, ARM::R1, ARM::R2, ARM::R3 };
+
+ tcGPRClass::iterator
+ tcGPRClass::allocation_order_begin(const MachineFunction &MF) const {
+ const TargetMachine &TM = MF.getTarget();
+ const ARMSubtarget &Subtarget = TM.getSubtarget<ARMSubtarget>();
+ if (Subtarget.isThumb1Only())
+ return THUMB_GPR_AO_TC;
+ if (Subtarget.isTargetDarwin()) {
+ if (Subtarget.isR9Reserved())
+ return ARM_GPR_NOR9_TC;
+ else
+ return ARM_GPR_R9_TC;
+ } else
+ // R9 is either callee-saved or reserved; can't use it.
+ return ARM_GPR_NOR9_TC;
+ }
+
+ tcGPRClass::iterator
+ tcGPRClass::allocation_order_end(const MachineFunction &MF) const {
+ const TargetMachine &TM = MF.getTarget();
+ const ARMSubtarget &Subtarget = TM.getSubtarget<ARMSubtarget>();
+ GPRClass::iterator I;
+
+ if (Subtarget.isThumb1Only()) {
+ I = THUMB_GPR_AO_TC + (sizeof(THUMB_GPR_AO_TC)/sizeof(unsigned));
+ return I;
+ }
+
+ if (Subtarget.isTargetDarwin()) {
+ if (Subtarget.isR9Reserved())
+ I = ARM_GPR_NOR9_TC + (sizeof(ARM_GPR_NOR9_TC)/sizeof(unsigned));
+ else
+ I = ARM_GPR_R9_TC + (sizeof(ARM_GPR_R9_TC)/sizeof(unsigned));
+ } else
+ // R9 is either callee-saved or reserved; can't use it.
+ I = ARM_GPR_NOR9_TC + (sizeof(ARM_GPR_NOR9_TC)/sizeof(unsigned));
+ return I;
+ }
+ }];
+}
+
+
// Scalar single precision floating point register class..
def SPR : RegisterClass<"ARM", [f32], 32, [S0, S1, S2, S3, S4, S5, S6, S7, S8,
S9, S10, S11, S12, S13, S14, S15, S16, S17, S18, S19, S20, S21, S22,
@@ -479,4 +547,3 @@ def QQQQPR : RegisterClass<"ARM", [v8i64],
// Condition code registers.
def CCR : RegisterClass<"ARM", [i32], 32, [CPSR]>;
-
diff --git a/lib/Target/ARM/ARMScheduleA8.td b/lib/Target/ARM/ARMScheduleA8.td
index bbfc0b2b4744..282abca98803 100644
--- a/lib/Target/ARM/ARMScheduleA8.td
+++ b/lib/Target/ARM/ARMScheduleA8.td
@@ -1,10 +1,10 @@
//=- ARMScheduleA8.td - ARM Cortex-A8 Scheduling Definitions -*- tablegen -*-=//
-//
+//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
-//
+//
//===----------------------------------------------------------------------===//
//
// This file defines the itinerary class data for the ARM Cortex A8 processors.
@@ -32,50 +32,50 @@ def CortexA8Itineraries : ProcessorItineraries<
InstrItinData<IIC_iALUx , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>]>,
//
// Binary Instructions that produce a result
- InstrItinData<IIC_iALUi , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2]>,
- InstrItinData<IIC_iALUr , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2, 2]>,
- InstrItinData<IIC_iALUsi , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2, 1]>,
- InstrItinData<IIC_iALUsr , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2, 1, 1]>,
+ InstrItinData<IIC_iALUi ,[InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2]>,
+ InstrItinData<IIC_iALUr ,[InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2, 2]>,
+ InstrItinData<IIC_iALUsi,[InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2, 1]>,
+ InstrItinData<IIC_iALUsr,[InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2, 1, 1]>,
//
// Unary Instructions that produce a result
- InstrItinData<IIC_iUNAr , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2]>,
- InstrItinData<IIC_iUNAsi , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 1]>,
- InstrItinData<IIC_iUNAsr , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 1, 1]>,
+ InstrItinData<IIC_iUNAr , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2]>,
+ InstrItinData<IIC_iUNAsi, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 1]>,
+ InstrItinData<IIC_iUNAsr, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 1, 1]>,
//
// Compare instructions
- InstrItinData<IIC_iCMPi , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2]>,
- InstrItinData<IIC_iCMPr , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2]>,
- InstrItinData<IIC_iCMPsi , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 1]>,
- InstrItinData<IIC_iCMPsr , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 1, 1]>,
+ InstrItinData<IIC_iCMPi , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2]>,
+ InstrItinData<IIC_iCMPr , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2]>,
+ InstrItinData<IIC_iCMPsi, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 1]>,
+ InstrItinData<IIC_iCMPsr, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 1, 1]>,
//
// Move instructions, unconditional
- InstrItinData<IIC_iMOVi , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [1]>,
- InstrItinData<IIC_iMOVr , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [1, 1]>,
- InstrItinData<IIC_iMOVsi , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [1, 1]>,
- InstrItinData<IIC_iMOVsr , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [1, 1, 1]>,
+ InstrItinData<IIC_iMOVi , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [1]>,
+ InstrItinData<IIC_iMOVr , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [1, 1]>,
+ InstrItinData<IIC_iMOVsi, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [1, 1]>,
+ InstrItinData<IIC_iMOVsr, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [1, 1, 1]>,
//
// Move instructions, conditional
- InstrItinData<IIC_iCMOVi , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2]>,
- InstrItinData<IIC_iCMOVr , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 1]>,
- InstrItinData<IIC_iCMOVsi , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 1]>,
- InstrItinData<IIC_iCMOVsr , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 1, 1]>,
+ InstrItinData<IIC_iCMOVi , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2]>,
+ InstrItinData<IIC_iCMOVr , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 1]>,
+ InstrItinData<IIC_iCMOVsi, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 1]>,
+ InstrItinData<IIC_iCMOVsr, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 1, 1]>,
// Integer multiply pipeline
// Result written in E5, but that is relative to the last cycle of multicycle,
// so we use 6 for those cases
//
InstrItinData<IIC_iMUL16 , [InstrStage<1, [A8_Pipe0]>], [5, 1, 1]>,
- InstrItinData<IIC_iMAC16 , [InstrStage<1, [A8_Pipe1], 0>,
+ InstrItinData<IIC_iMAC16 , [InstrStage<1, [A8_Pipe1], 0>,
InstrStage<2, [A8_Pipe0]>], [6, 1, 1, 4]>,
- InstrItinData<IIC_iMUL32 , [InstrStage<1, [A8_Pipe1], 0>,
+ InstrItinData<IIC_iMUL32 , [InstrStage<1, [A8_Pipe1], 0>,
InstrStage<2, [A8_Pipe0]>], [6, 1, 1]>,
- InstrItinData<IIC_iMAC32 , [InstrStage<1, [A8_Pipe1], 0>,
+ InstrItinData<IIC_iMAC32 , [InstrStage<1, [A8_Pipe1], 0>,
InstrStage<2, [A8_Pipe0]>], [6, 1, 1, 4]>,
- InstrItinData<IIC_iMUL64 , [InstrStage<2, [A8_Pipe1], 0>,
+ InstrItinData<IIC_iMUL64 , [InstrStage<2, [A8_Pipe1], 0>,
InstrStage<3, [A8_Pipe0]>], [6, 6, 1, 1]>,
- InstrItinData<IIC_iMAC64 , [InstrStage<2, [A8_Pipe1], 0>,
+ InstrItinData<IIC_iMAC64 , [InstrStage<2, [A8_Pipe1], 0>,
InstrStage<3, [A8_Pipe0]>], [6, 6, 1, 1]>,
-
+
// Integer load pipeline
//
// loads have an extra cycle of latency, but are fully pipelined
@@ -166,7 +166,7 @@ def CortexA8Itineraries : ProcessorItineraries<
InstrStage<2, [A8_Pipe1]>,
InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
InstrStage<1, [A8_LdSt0]>]>,
-
+
// Branch
//
// no delay slots, so the latency of a branch is unimportant
@@ -276,14 +276,14 @@ def CortexA8Itineraries : ProcessorItineraries<
//
// Single-precision FP Load
// use A8_Issue to enforce the 1 load/store per cycle limit
- InstrItinData<IIC_fpLoad32, [InstrStage<1, [A8_Issue], 0>,
+ InstrItinData<IIC_fpLoad32, [InstrStage<1, [A8_Issue], 0>,
InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
InstrStage<1, [A8_LdSt0], 0>,
InstrStage<1, [A8_NLSPipe]>]>,
//
// Double-precision FP Load
// use A8_Issue to enforce the 1 load/store per cycle limit
- InstrItinData<IIC_fpLoad64, [InstrStage<2, [A8_Issue], 0>,
+ InstrItinData<IIC_fpLoad64, [InstrStage<2, [A8_Issue], 0>,
InstrStage<1, [A8_Pipe0], 0>,
InstrStage<1, [A8_Pipe1]>,
InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
@@ -292,7 +292,7 @@ def CortexA8Itineraries : ProcessorItineraries<
//
// FP Load Multiple
// use A8_Issue to enforce the 1 load/store per cycle limit
- InstrItinData<IIC_fpLoadm, [InstrStage<3, [A8_Issue], 0>,
+ InstrItinData<IIC_fpLoadm, [InstrStage<3, [A8_Issue], 0>,
InstrStage<2, [A8_Pipe0], 0>,
InstrStage<2, [A8_Pipe1]>,
InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
@@ -301,14 +301,14 @@ def CortexA8Itineraries : ProcessorItineraries<
//
// Single-precision FP Store
// use A8_Issue to enforce the 1 load/store per cycle limit
- InstrItinData<IIC_fpStore32,[InstrStage<1, [A8_Issue], 0>,
+ InstrItinData<IIC_fpStore32,[InstrStage<1, [A8_Issue], 0>,
InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
InstrStage<1, [A8_LdSt0], 0>,
InstrStage<1, [A8_NLSPipe]>]>,
//
// Double-precision FP Store
// use A8_Issue to enforce the 1 load/store per cycle limit
- InstrItinData<IIC_fpStore64,[InstrStage<2, [A8_Issue], 0>,
+ InstrItinData<IIC_fpStore64,[InstrStage<2, [A8_Issue], 0>,
InstrStage<1, [A8_Pipe0], 0>,
InstrStage<1, [A8_Pipe1]>,
InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
@@ -317,7 +317,7 @@ def CortexA8Itineraries : ProcessorItineraries<
//
// FP Store Multiple
// use A8_Issue to enforce the 1 load/store per cycle limit
- InstrItinData<IIC_fpStorem, [InstrStage<3, [A8_Issue], 0>,
+ InstrItinData<IIC_fpStorem, [InstrStage<3, [A8_Issue], 0>,
InstrStage<2, [A8_Pipe0], 0>,
InstrStage<2, [A8_Pipe1]>,
InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
@@ -329,35 +329,35 @@ def CortexA8Itineraries : ProcessorItineraries<
//
// VLD1
// FIXME: We don't model this instruction properly
- InstrItinData<IIC_VLD1, [InstrStage<1, [A8_Issue], 0>,
+ InstrItinData<IIC_VLD1, [InstrStage<1, [A8_Issue], 0>,
InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
InstrStage<1, [A8_LdSt0], 0>,
InstrStage<1, [A8_NLSPipe]>]>,
//
// VLD2
// FIXME: We don't model this instruction properly
- InstrItinData<IIC_VLD2, [InstrStage<1, [A8_Issue], 0>,
+ InstrItinData<IIC_VLD2, [InstrStage<1, [A8_Issue], 0>,
InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
InstrStage<1, [A8_LdSt0], 0>,
InstrStage<1, [A8_NLSPipe]>], [2, 2, 1]>,
//
// VLD3
// FIXME: We don't model this instruction properly
- InstrItinData<IIC_VLD3, [InstrStage<1, [A8_Issue], 0>,
+ InstrItinData<IIC_VLD3, [InstrStage<1, [A8_Issue], 0>,
InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
InstrStage<1, [A8_LdSt0], 0>,
InstrStage<1, [A8_NLSPipe]>], [2, 2, 2, 1]>,
//
// VLD4
// FIXME: We don't model this instruction properly
- InstrItinData<IIC_VLD4, [InstrStage<1, [A8_Issue], 0>,
+ InstrItinData<IIC_VLD4, [InstrStage<1, [A8_Issue], 0>,
InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
InstrStage<1, [A8_LdSt0], 0>,
InstrStage<1, [A8_NLSPipe]>], [2, 2, 2, 2, 1]>,
//
// VST
// FIXME: We don't model this instruction properly
- InstrItinData<IIC_VST, [InstrStage<1, [A8_Issue], 0>,
+ InstrItinData<IIC_VST, [InstrStage<1, [A8_Issue], 0>,
InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
InstrStage<1, [A8_LdSt0], 0>,
InstrStage<1, [A8_NLSPipe]>]>,
@@ -600,7 +600,7 @@ def CortexA8Itineraries : ProcessorItineraries<
InstrItinData<IIC_VTB4, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
InstrStage<1, [A8_NLSPipe]>,
InstrStage<1, [A8_NPipe], 0>,
- InstrStage<2, [A8_NLSPipe]>], [4, 2, 2, 3, 3, 1]>,
+ InstrStage<2, [A8_NLSPipe]>],[4, 2, 2, 3, 3, 1]>,
//
// VTBX
InstrItinData<IIC_VTBX1, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
@@ -610,9 +610,9 @@ def CortexA8Itineraries : ProcessorItineraries<
InstrItinData<IIC_VTBX3, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
InstrStage<1, [A8_NLSPipe]>,
InstrStage<1, [A8_NPipe], 0>,
- InstrStage<2, [A8_NLSPipe]>], [4, 1, 2, 2, 3, 1]>,
+ InstrStage<2, [A8_NLSPipe]>],[4, 1, 2, 2, 3, 1]>,
InstrItinData<IIC_VTBX4, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
InstrStage<1, [A8_NLSPipe]>,
InstrStage<1, [A8_NPipe], 0>,
- InstrStage<2, [A8_NLSPipe]>], [4, 1, 2, 2, 3, 3, 1]>
+ InstrStage<2, [A8_NLSPipe]>], [4, 1, 2, 2, 3, 3, 1]>
]>;
diff --git a/lib/Target/ARM/ARMScheduleA9.td b/lib/Target/ARM/ARMScheduleA9.td
index 75320d929952..df2f896a8d4b 100644
--- a/lib/Target/ARM/ARMScheduleA9.td
+++ b/lib/Target/ARM/ARMScheduleA9.td
@@ -1,10 +1,10 @@
//=- ARMScheduleA9.td - ARM Cortex-A9 Scheduling Definitions -*- tablegen -*-=//
-//
+//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
-//
+//
//===----------------------------------------------------------------------===//
//
// This file defines the itinerary class data for the ARM Cortex A9 processors.
@@ -16,7 +16,6 @@
// Reference Manual".
//
// Functional units
-def A9_Issue : FuncUnit; // issue
def A9_Pipe0 : FuncUnit; // pipeline 0
def A9_Pipe1 : FuncUnit; // pipeline 1
def A9_LSPipe : FuncUnit; // LS pipe
@@ -27,7 +26,121 @@ def A9_DRegsN : FuncUnit; // FP register set, NEON side
// Dual issue pipeline represented by A9_Pipe0 | A9_Pipe1
//
def CortexA9Itineraries : ProcessorItineraries<
- [A9_NPipe, A9_DRegsN, A9_DRegsVFP, A9_LSPipe, A9_Pipe0, A9_Pipe1, A9_Issue], [
+ [A9_NPipe, A9_DRegsN, A9_DRegsVFP, A9_LSPipe, A9_Pipe0, A9_Pipe1], [
+ // Two fully-pipelined integer ALU pipelines
+ // FIXME: There are no operand latencies for these instructions at all!
+ //
+ // Move instructions, unconditional
+ InstrItinData<IIC_iMOVi , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>], [1]>,
+ InstrItinData<IIC_iMOVr , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>], [1, 1]>,
+ InstrItinData<IIC_iMOVsi , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>], [1, 1]>,
+ InstrItinData<IIC_iMOVsr , [InstrStage<2, [A9_Pipe0, A9_Pipe1]>], [2, 2, 1]>,
+ //
+ // No operand cycles
+ InstrItinData<IIC_iALUx , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>]>,
+ //
+ // Binary Instructions that produce a result
+ InstrItinData<IIC_iALUi , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>], [2, 2]>,
+ InstrItinData<IIC_iALUr , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>], [2, 2, 2]>,
+ InstrItinData<IIC_iALUsi, [InstrStage<2, [A9_Pipe0, A9_Pipe1]>], [2, 2, 1]>,
+ InstrItinData<IIC_iALUsr,[InstrStage<3, [A9_Pipe0, A9_Pipe1]>], [2, 2, 1, 1]>,
+ //
+ // Unary Instructions that produce a result
+ InstrItinData<IIC_iUNAr , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>], [2, 2]>,
+ InstrItinData<IIC_iUNAsi , [InstrStage<2, [A9_Pipe0, A9_Pipe1]>], [2, 1]>,
+ InstrItinData<IIC_iUNAsr , [InstrStage<3, [A9_Pipe0, A9_Pipe1]>], [2, 1, 1]>,
+ //
+ // Compare instructions
+ InstrItinData<IIC_iCMPi , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>], [2]>,
+ InstrItinData<IIC_iCMPr , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>], [2, 2]>,
+ InstrItinData<IIC_iCMPsi , [InstrStage<2, [A9_Pipe0, A9_Pipe1]>], [2, 1]>,
+ InstrItinData<IIC_iCMPsr , [InstrStage<3, [A9_Pipe0, A9_Pipe1]>], [2, 1, 1]>,
+ //
+ // Move instructions, conditional
+ InstrItinData<IIC_iCMOVi , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>], [2]>,
+ InstrItinData<IIC_iCMOVr , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>], [2, 1]>,
+ InstrItinData<IIC_iCMOVsi , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>], [2, 1]>,
+ InstrItinData<IIC_iCMOVsr , [InstrStage<2, [A9_Pipe0, A9_Pipe1]>], [2, 1, 1]>,
+
+ // Integer multiply pipeline
+ //
+ InstrItinData<IIC_iMUL16 , [InstrStage<1, [A9_Pipe1], 0>,
+ InstrStage<2, [A9_Pipe0]>], [4, 1, 1]>,
+ InstrItinData<IIC_iMAC16 , [InstrStage<1, [A9_Pipe1], 0>,
+ InstrStage<2, [A9_Pipe0]>], [4, 1, 1, 2]>,
+ InstrItinData<IIC_iMUL32 , [InstrStage<1, [A9_Pipe1], 0>,
+ InstrStage<2, [A9_Pipe0]>], [4, 1, 1]>,
+ InstrItinData<IIC_iMAC32 , [InstrStage<1, [A9_Pipe1], 0>,
+ InstrStage<2, [A9_Pipe0]>], [4, 1, 1, 2]>,
+ InstrItinData<IIC_iMUL64 , [InstrStage<2, [A9_Pipe1], 0>,
+ InstrStage<3, [A9_Pipe0]>], [4, 5, 1, 1]>,
+ InstrItinData<IIC_iMAC64 , [InstrStage<2, [A9_Pipe1], 0>,
+ InstrStage<3, [A9_Pipe0]>], [4, 5, 1, 1]>,
+ // Integer load pipeline
+ // FIXME: The timings are some rough approximations
+ //
+ // Immediate offset
+ InstrItinData<IIC_iLoadi , [InstrStage<1, [A9_Pipe1]>,
+ InstrStage<1, [A9_LSPipe]>], [3, 1]>,
+ //
+ // Register offset
+ InstrItinData<IIC_iLoadr , [InstrStage<1, [A9_Pipe1]>,
+ InstrStage<1, [A9_LSPipe]>], [3, 1, 1]>,
+ //
+ // Scaled register offset
+ InstrItinData<IIC_iLoadsi , [InstrStage<1, [A9_Pipe1]>,
+ InstrStage<2, [A9_LSPipe]>], [4, 1, 1]>,
+ //
+ // Immediate offset with update
+ InstrItinData<IIC_iLoadiu , [InstrStage<1, [A9_Pipe1]>,
+ InstrStage<2, [A9_LSPipe]>], [3, 2, 1]>,
+ //
+ // Register offset with update
+ InstrItinData<IIC_iLoadru , [InstrStage<1, [A9_Pipe1]>,
+ InstrStage<2, [A9_LSPipe]>], [3, 2, 1, 1]>,
+ //
+ // Scaled register offset with update
+ InstrItinData<IIC_iLoadsiu , [InstrStage<1, [A9_Pipe1]>,
+ InstrStage<2, [A9_LSPipe]>], [4, 3, 1, 1]>,
+ //
+ // Load multiple
+ InstrItinData<IIC_iLoadm , [InstrStage<1, [A9_Pipe1]>,
+ InstrStage<1, [A9_LSPipe]>]>,
+
+ // Integer store pipeline
+ ///
+ // Immediate offset
+ InstrItinData<IIC_iStorei , [InstrStage<1, [A9_Pipe1]>,
+ InstrStage<1, [A9_LSPipe]>], [3, 1]>,
+ //
+ // Register offset
+ InstrItinData<IIC_iStorer , [InstrStage<1, [ A9_Pipe1]>,
+ InstrStage<1, [A9_LSPipe]>], [3, 1, 1]>,
+ //
+ // Scaled register offset
+ InstrItinData<IIC_iStoresi , [InstrStage<1, [A9_Pipe1]>,
+ InstrStage<2, [A9_LSPipe]>], [3, 1, 1]>,
+ //
+ // Immediate offset with update
+ InstrItinData<IIC_iStoreiu , [InstrStage<1, [A9_Pipe1]>,
+ InstrStage<1, [A9_LSPipe]>], [2, 3, 1]>,
+ //
+ // Register offset with update
+ InstrItinData<IIC_iStoreru , [InstrStage<1, [A9_Pipe1]>,
+ InstrStage<1, [A9_LSPipe]>], [2, 3, 1, 1]>,
+ //
+ // Scaled register offset with update
+ InstrItinData<IIC_iStoresiu, [InstrStage<1, [A9_Pipe1]>,
+ InstrStage<2, [A9_LSPipe]>], [3, 3, 1, 1]>,
+ //
+ // Store multiple
+ InstrItinData<IIC_iStorem , [InstrStage<1, [A9_Pipe1]>,
+ InstrStage<1, [A9_LSPipe]>]>,
+ // Branch
+ //
+ // no delay slots, so the latency of a branch is unimportant
+ InstrItinData<IIC_Br , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>]>,
+
// VFP and NEON shares the same register file. This means that every VFP
// instruction should wait for full completion of the consecutive NEON
// instruction and vice-versa. We model this behavior with two artificial FUs:
@@ -39,8 +152,8 @@ def CortexA9Itineraries : ProcessorItineraries<
// register file writeback!).
// Every NEON instruction does the same but with FUs swapped.
//
- // Since the reserved FU cannot be acquired this models precisly "cross-domain"
- // stalls.
+ // Since the reserved FU cannot be acquired, this models precisely
+ // "cross-domain" stalls.
// VFP
// Issue through integer pipeline, and execute in NEON unit.
@@ -48,21 +161,21 @@ def CortexA9Itineraries : ProcessorItineraries<
// FP Special Register to Integer Register File Move
InstrItinData<IIC_fpSTAT , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
InstrStage<2, [A9_DRegsN], 0, Reserved>,
- InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+ InstrStage<1, [A9_Pipe1]>,
InstrStage<1, [A9_NPipe]>]>,
//
// Single-precision FP Unary
InstrItinData<IIC_fpUNA32 , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
// Extra latency cycles since wbck is 2 cycles
InstrStage<3, [A9_DRegsN], 0, Reserved>,
- InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+ InstrStage<1, [A9_Pipe1]>,
InstrStage<1, [A9_NPipe]>], [1, 1]>,
//
// Double-precision FP Unary
InstrItinData<IIC_fpUNA64 , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
// Extra latency cycles since wbck is 2 cycles
InstrStage<3, [A9_DRegsN], 0, Reserved>,
- InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+ InstrStage<1, [A9_Pipe1]>,
InstrStage<1, [A9_NPipe]>], [1, 1]>,
//
@@ -70,124 +183,124 @@ def CortexA9Itineraries : ProcessorItineraries<
InstrItinData<IIC_fpCMP32 , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
// Extra latency cycles since wbck is 4 cycles
InstrStage<5, [A9_DRegsN], 0, Reserved>,
- InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+ InstrStage<1, [A9_Pipe1]>,
InstrStage<1, [A9_NPipe]>], [1, 1]>,
//
// Double-precision FP Compare
InstrItinData<IIC_fpCMP64 , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
// Extra latency cycles since wbck is 4 cycles
InstrStage<5, [A9_DRegsN], 0, Reserved>,
- InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+ InstrStage<1, [A9_Pipe1]>,
InstrStage<1, [A9_NPipe]>], [1, 1]>,
//
// Single to Double FP Convert
InstrItinData<IIC_fpCVTSD , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
InstrStage<5, [A9_DRegsN], 0, Reserved>,
- InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+ InstrStage<1, [A9_Pipe1]>,
InstrStage<1, [A9_NPipe]>], [4, 1]>,
//
// Double to Single FP Convert
InstrItinData<IIC_fpCVTDS , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
InstrStage<5, [A9_DRegsN], 0, Reserved>,
- InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+ InstrStage<1, [A9_Pipe1]>,
InstrStage<1, [A9_NPipe]>], [4, 1]>,
//
// Single to Half FP Convert
InstrItinData<IIC_fpCVTSH , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
InstrStage<5, [A9_DRegsN], 0, Reserved>,
- InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+ InstrStage<1, [A9_Pipe1]>,
InstrStage<1, [A9_NPipe]>], [4, 1]>,
//
// Half to Single FP Convert
InstrItinData<IIC_fpCVTHS , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
InstrStage<3, [A9_DRegsN], 0, Reserved>,
- InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+ InstrStage<1, [A9_Pipe1]>,
InstrStage<1, [A9_NPipe]>], [2, 1]>,
//
// Single-Precision FP to Integer Convert
InstrItinData<IIC_fpCVTSI , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
InstrStage<5, [A9_DRegsN], 0, Reserved>,
- InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+ InstrStage<1, [A9_Pipe1]>,
InstrStage<1, [A9_NPipe]>], [4, 1]>,
//
// Double-Precision FP to Integer Convert
InstrItinData<IIC_fpCVTDI , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
InstrStage<5, [A9_DRegsN], 0, Reserved>,
- InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+ InstrStage<1, [A9_Pipe1]>,
InstrStage<1, [A9_NPipe]>], [4, 1]>,
//
// Integer to Single-Precision FP Convert
InstrItinData<IIC_fpCVTIS , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
InstrStage<5, [A9_DRegsN], 0, Reserved>,
- InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+ InstrStage<1, [A9_Pipe1]>,
InstrStage<1, [A9_NPipe]>], [4, 1]>,
//
// Integer to Double-Precision FP Convert
InstrItinData<IIC_fpCVTID , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
InstrStage<5, [A9_DRegsN], 0, Reserved>,
- InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+ InstrStage<1, [A9_Pipe1]>,
InstrStage<1, [A9_NPipe]>], [4, 1]>,
//
// Single-precision FP ALU
InstrItinData<IIC_fpALU32 , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
InstrStage<5, [A9_DRegsN], 0, Reserved>,
- InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+ InstrStage<1, [A9_Pipe1]>,
InstrStage<1, [A9_NPipe]>], [4, 1, 1]>,
//
// Double-precision FP ALU
InstrItinData<IIC_fpALU64 , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
InstrStage<5, [A9_DRegsN], 0, Reserved>,
- InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+ InstrStage<1, [A9_Pipe1]>,
InstrStage<1, [A9_NPipe]>], [4, 1, 1]>,
//
// Single-precision FP Multiply
InstrItinData<IIC_fpMUL32 , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
InstrStage<6, [A9_DRegsN], 0, Reserved>,
- InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+ InstrStage<1, [A9_Pipe1]>,
InstrStage<1, [A9_NPipe]>], [5, 1, 1]>,
//
// Double-precision FP Multiply
InstrItinData<IIC_fpMUL64 , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
InstrStage<7, [A9_DRegsN], 0, Reserved>,
- InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+ InstrStage<1, [A9_Pipe1]>,
InstrStage<2, [A9_NPipe]>], [6, 1, 1]>,
//
// Single-precision FP MAC
InstrItinData<IIC_fpMAC32 , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
InstrStage<9, [A9_DRegsN], 0, Reserved>,
- InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+ InstrStage<1, [A9_Pipe1]>,
InstrStage<1, [A9_NPipe]>], [8, 0, 1, 1]>,
//
// Double-precision FP MAC
InstrItinData<IIC_fpMAC64 , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
InstrStage<10, [A9_DRegsN], 0, Reserved>,
- InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+ InstrStage<1, [A9_Pipe1]>,
InstrStage<2, [A9_NPipe]>], [9, 0, 1, 1]>,
//
// Single-precision FP DIV
InstrItinData<IIC_fpDIV32 , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
InstrStage<16, [A9_DRegsN], 0, Reserved>,
- InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+ InstrStage<1, [A9_Pipe1]>,
InstrStage<10, [A9_NPipe]>], [15, 1, 1]>,
//
// Double-precision FP DIV
InstrItinData<IIC_fpDIV64 , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
InstrStage<26, [A9_DRegsN], 0, Reserved>,
- InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+ InstrStage<1, [A9_Pipe1]>,
InstrStage<20, [A9_NPipe]>], [25, 1, 1]>,
//
// Single-precision FP SQRT
InstrItinData<IIC_fpSQRT32, [InstrStage<1, [A9_DRegsVFP], 0, Required>,
InstrStage<18, [A9_DRegsN], 0, Reserved>,
- InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
- InstrStage<13, [A9_NPipe]>], [17, 1]>,
+ InstrStage<1, [A9_Pipe1]>,
+ InstrStage<13, [A9_NPipe]>], [17, 1]>,
//
// Double-precision FP SQRT
InstrItinData<IIC_fpSQRT64, [InstrStage<1, [A9_DRegsVFP], 0, Required>,
InstrStage<33, [A9_DRegsN], 0, Reserved>,
- InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+ InstrStage<1, [A9_Pipe1]>,
InstrStage<28, [A9_NPipe]>], [32, 1]>,
//
@@ -195,92 +308,79 @@ def CortexA9Itineraries : ProcessorItineraries<
InstrItinData<IIC_fpMOVIS, [InstrStage<1, [A9_DRegsVFP], 0, Required>,
// Extra 1 latency cycle since wbck is 2 cycles
InstrStage<3, [A9_DRegsN], 0, Reserved>,
- InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+ InstrStage<1, [A9_Pipe1]>,
InstrStage<1, [A9_NPipe]>], [1, 1]>,
//
// Integer to Double-precision Move
InstrItinData<IIC_fpMOVID, [InstrStage<1, [A9_DRegsVFP], 0, Required>,
// Extra 1 latency cycle since wbck is 2 cycles
InstrStage<3, [A9_DRegsN], 0, Reserved>,
- InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+ InstrStage<1, [A9_Pipe1]>,
InstrStage<1, [A9_NPipe]>], [1, 1, 1]>,
//
// Single-precision to Integer Move
InstrItinData<IIC_fpMOVSI, [InstrStage<1, [A9_DRegsVFP], 0, Required>,
InstrStage<2, [A9_DRegsN], 0, Reserved>,
- InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+ InstrStage<1, [A9_Pipe1]>,
InstrStage<1, [A9_NPipe]>], [1, 1]>,
//
// Double-precision to Integer Move
InstrItinData<IIC_fpMOVDI, [InstrStage<1, [A9_DRegsVFP], 0, Required>,
InstrStage<2, [A9_DRegsN], 0, Reserved>,
- InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+ InstrStage<1, [A9_Pipe1]>,
InstrStage<1, [A9_NPipe]>], [1, 1, 1]>,
//
// Single-precision FP Load
- // use A9_Issue to enforce the 1 load/store per cycle limit
InstrItinData<IIC_fpLoad32, [InstrStage<1, [A9_DRegsVFP], 0, Required>,
InstrStage<2, [A9_DRegsN], 0, Reserved>,
- InstrStage<1, [A9_Issue], 0>,
- InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
- InstrStage<1, [A9_LSPipe], 0>,
+ InstrStage<1, [A9_Pipe1], 0>,
+ InstrStage<1, [A9_LSPipe]>,
InstrStage<1, [A9_NPipe]>]>,
//
// Double-precision FP Load
- // use A9_Issue to enforce the 1 load/store per cycle limit
InstrItinData<IIC_fpLoad64, [InstrStage<1, [A9_DRegsVFP], 0, Required>,
InstrStage<2, [A9_DRegsN], 0, Reserved>,
- InstrStage<1, [A9_Issue], 0>,
- InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
- InstrStage<1, [A9_LSPipe], 0>,
+ InstrStage<1, [A9_Pipe1], 0>,
+ InstrStage<1, [A9_LSPipe]>,
InstrStage<1, [A9_NPipe]>]>,
//
// FP Load Multiple
- // use A9_Issue to enforce the 1 load/store per cycle limit
InstrItinData<IIC_fpLoadm, [InstrStage<1, [A9_DRegsVFP], 0, Required>,
InstrStage<2, [A9_DRegsN], 0, Reserved>,
- InstrStage<1, [A9_Issue], 0>,
- InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
- InstrStage<1, [A9_LSPipe], 0>,
+ InstrStage<1, [A9_Pipe1], 0>,
+ InstrStage<1, [A9_LSPipe]>,
InstrStage<1, [A9_NPipe]>]>,
//
// Single-precision FP Store
- // use A9_Issue to enforce the 1 load/store per cycle limit
InstrItinData<IIC_fpStore32,[InstrStage<1, [A9_DRegsVFP], 0, Required>,
InstrStage<2, [A9_DRegsN], 0, Reserved>,
- InstrStage<1, [A9_Issue], 0>,
- InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
- InstrStage<1, [A9_LSPipe], 0>,
+ InstrStage<1, [A9_Pipe1], 0>,
+ InstrStage<1, [A9_LSPipe]>,
InstrStage<1, [A9_NPipe]>]>,
//
// Double-precision FP Store
- // use A9_Issue to enforce the 1 load/store per cycle limit
InstrItinData<IIC_fpStore64,[InstrStage<1, [A9_DRegsVFP], 0, Required>,
InstrStage<2, [A9_DRegsN], 0, Reserved>,
- InstrStage<1, [A9_Issue], 0>,
- InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
- InstrStage<1, [A9_LSPipe], 0>,
+ InstrStage<1, [A9_Pipe1], 0>,
+ InstrStage<1, [A9_LSPipe]>,
InstrStage<1, [A9_NPipe]>]>,
//
// FP Store Multiple
- // use A9_Issue to enforce the 1 load/store per cycle limit
InstrItinData<IIC_fpStorem, [InstrStage<1, [A9_DRegsVFP], 0, Required>,
InstrStage<2, [A9_DRegsN], 0, Reserved>,
- InstrStage<1, [A9_Issue], 0>,
- InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
- InstrStage<1, [A9_LSPipe], 0>,
+ InstrStage<1, [A9_Pipe1], 0>,
+ InstrStage<1, [A9_LSPipe]>,
InstrStage<1, [A9_NPipe]>]>,
// NEON
// Issue through integer pipeline, and execute in NEON unit.
- // FIXME: Neon pipeline and LdSt unit are multiplexed.
+ // FIXME: Neon pipeline and LdSt unit are multiplexed.
// Add some syntactic sugar to model this!
// VLD1
// FIXME: We don't model this instruction properly
InstrItinData<IIC_VLD1, [InstrStage<1, [A9_DRegsN], 0, Required>,
InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
- InstrStage<1, [A9_Issue], 0>,
- InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
- InstrStage<1, [A9_LSPipe], 0>,
+ InstrStage<1, [A9_Pipe1], 0>,
+ InstrStage<1, [A9_LSPipe]>,
InstrStage<1, [A9_NPipe]>]>,
//
// VLD2
@@ -288,9 +388,8 @@ def CortexA9Itineraries : ProcessorItineraries<
InstrItinData<IIC_VLD2, [InstrStage<1, [A9_DRegsN], 0, Required>,
// Extra latency cycles since wbck is 6 cycles
InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
- InstrStage<1, [A9_Issue], 0>,
- InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
- InstrStage<1, [A9_LSPipe], 0>,
+ InstrStage<1, [A9_Pipe1], 0>,
+ InstrStage<1, [A9_LSPipe]>,
InstrStage<1, [A9_NPipe]>], [2, 2, 1]>,
//
// VLD3
@@ -298,9 +397,8 @@ def CortexA9Itineraries : ProcessorItineraries<
InstrItinData<IIC_VLD3, [InstrStage<1, [A9_DRegsN], 0, Required>,
// Extra latency cycles since wbck is 6 cycles
InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
- InstrStage<1, [A9_Issue], 0>,
- InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
- InstrStage<1, [A9_LSPipe], 0>,
+ InstrStage<1, [A9_Pipe1], 0>,
+ InstrStage<1, [A9_LSPipe]>,
InstrStage<1, [A9_NPipe]>], [2, 2, 2, 1]>,
//
// VLD4
@@ -308,9 +406,8 @@ def CortexA9Itineraries : ProcessorItineraries<
InstrItinData<IIC_VLD4, [InstrStage<1, [A9_DRegsN], 0, Required>,
// Extra latency cycles since wbck is 6 cycles
InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
- InstrStage<1, [A9_Issue], 0>,
- InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
- InstrStage<1, [A9_LSPipe], 0>,
+ InstrStage<1, [A9_Pipe1], 0>,
+ InstrStage<1, [A9_LSPipe]>,
InstrStage<1, [A9_NPipe]>], [2, 2, 2, 2, 1]>,
//
// VST
@@ -318,121 +415,120 @@ def CortexA9Itineraries : ProcessorItineraries<
InstrItinData<IIC_VST, [InstrStage<1, [A9_DRegsN], 0, Required>,
// Extra latency cycles since wbck is 6 cycles
InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
- InstrStage<1, [A9_Issue], 0>,
- InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
- InstrStage<1, [A9_LSPipe], 0>,
+ InstrStage<1, [A9_Pipe1], 0>,
+ InstrStage<1, [A9_LSPipe]>,
InstrStage<1, [A9_NPipe]>]>,
//
// Double-register Integer Unary
InstrItinData<IIC_VUNAiD, [InstrStage<1, [A9_DRegsN], 0, Required>,
// Extra latency cycles since wbck is 6 cycles
InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
- InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+ InstrStage<1, [A9_Pipe1]>,
InstrStage<1, [A9_NPipe]>], [4, 2]>,
//
// Quad-register Integer Unary
InstrItinData<IIC_VUNAiQ, [InstrStage<1, [A9_DRegsN], 0, Required>,
// Extra latency cycles since wbck is 6 cycles
InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
- InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+ InstrStage<1, [A9_Pipe1]>,
InstrStage<1, [A9_NPipe]>], [4, 2]>,
//
// Double-register Integer Q-Unary
InstrItinData<IIC_VQUNAiD, [InstrStage<1, [A9_DRegsN], 0, Required>,
// Extra latency cycles since wbck is 6 cycles
InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
- InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+ InstrStage<1, [A9_Pipe1]>,
InstrStage<1, [A9_NPipe]>], [4, 1]>,
//
// Quad-register Integer CountQ-Unary
InstrItinData<IIC_VQUNAiQ, [InstrStage<1, [A9_DRegsN], 0, Required>,
// Extra latency cycles since wbck is 6 cycles
InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
- InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+ InstrStage<1, [A9_Pipe1]>,
InstrStage<1, [A9_NPipe]>], [4, 1]>,
//
// Double-register Integer Binary
InstrItinData<IIC_VBINiD, [InstrStage<1, [A9_DRegsN], 0, Required>,
// Extra latency cycles since wbck is 6 cycles
InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
- InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+ InstrStage<1, [A9_Pipe1]>,
InstrStage<1, [A9_NPipe]>], [3, 2, 2]>,
//
// Quad-register Integer Binary
InstrItinData<IIC_VBINiQ, [InstrStage<1, [A9_DRegsN], 0, Required>,
// Extra latency cycles since wbck is 6 cycles
InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
- InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+ InstrStage<1, [A9_Pipe1]>,
InstrStage<1, [A9_NPipe]>], [3, 2, 2]>,
//
// Double-register Integer Subtract
InstrItinData<IIC_VSUBiD, [InstrStage<1, [A9_DRegsN], 0, Required>,
// Extra latency cycles since wbck is 6 cycles
InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
- InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+ InstrStage<1, [A9_Pipe1]>,
InstrStage<1, [A9_NPipe]>], [3, 2, 1]>,
//
// Quad-register Integer Subtract
InstrItinData<IIC_VSUBiQ, [InstrStage<1, [A9_DRegsN], 0, Required>,
// Extra latency cycles since wbck is 6 cycles
InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
- InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+ InstrStage<1, [A9_Pipe1]>,
InstrStage<1, [A9_NPipe]>], [3, 2, 1]>,
//
// Double-register Integer Shift
InstrItinData<IIC_VSHLiD, [InstrStage<1, [A9_DRegsN], 0, Required>,
// Extra latency cycles since wbck is 6 cycles
InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
- InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+ InstrStage<1, [A9_Pipe1]>,
InstrStage<1, [A9_NPipe]>], [3, 1, 1]>,
//
// Quad-register Integer Shift
InstrItinData<IIC_VSHLiQ, [InstrStage<1, [A9_DRegsN], 0, Required>,
// Extra latency cycles since wbck is 6 cycles
InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
- InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+ InstrStage<1, [A9_Pipe1]>,
InstrStage<1, [A9_NPipe]>], [3, 1, 1]>,
//
// Double-register Integer Shift (4 cycle)
InstrItinData<IIC_VSHLi4D, [InstrStage<1, [A9_DRegsN], 0, Required>,
// Extra latency cycles since wbck is 6 cycles
InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
- InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+ InstrStage<1, [A9_Pipe1]>,
InstrStage<1, [A9_NPipe]>], [4, 1, 1]>,
//
// Quad-register Integer Shift (4 cycle)
InstrItinData<IIC_VSHLi4Q, [InstrStage<1, [A9_DRegsN], 0, Required>,
// Extra latency cycles since wbck is 6 cycles
InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
- InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+ InstrStage<1, [A9_Pipe1]>,
InstrStage<1, [A9_NPipe]>], [4, 1, 1]>,
//
// Double-register Integer Binary (4 cycle)
InstrItinData<IIC_VBINi4D, [InstrStage<1, [A9_DRegsN], 0, Required>,
// Extra latency cycles since wbck is 6 cycles
InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
- InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+ InstrStage<1, [A9_Pipe1]>,
InstrStage<1, [A9_NPipe]>], [4, 2, 2]>,
//
// Quad-register Integer Binary (4 cycle)
InstrItinData<IIC_VBINi4Q, [InstrStage<1, [A9_DRegsN], 0, Required>,
// Extra latency cycles since wbck is 6 cycles
InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
- InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+ InstrStage<1, [A9_Pipe1]>,
InstrStage<1, [A9_NPipe]>], [4, 2, 2]>,
//
// Double-register Integer Subtract (4 cycle)
InstrItinData<IIC_VSUBiD, [InstrStage<1, [A9_DRegsN], 0, Required>,
// Extra latency cycles since wbck is 6 cycles
InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
- InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+ InstrStage<1, [A9_Pipe1]>,
InstrStage<1, [A9_NPipe]>], [4, 2, 1]>,
//
// Quad-register Integer Subtract (4 cycle)
InstrItinData<IIC_VSUBiQ, [InstrStage<1, [A9_DRegsN], 0, Required>,
// Extra latency cycles since wbck is 6 cycles
InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
- InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+ InstrStage<1, [A9_Pipe1]>,
InstrStage<1, [A9_NPipe]>], [4, 2, 1]>,
//
@@ -440,7 +536,7 @@ def CortexA9Itineraries : ProcessorItineraries<
InstrItinData<IIC_VCNTiD, [InstrStage<1, [A9_DRegsN], 0, Required>,
// Extra latency cycles since wbck is 6 cycles
InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
- InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+ InstrStage<1, [A9_Pipe1]>,
InstrStage<1, [A9_NPipe]>], [3, 2, 2]>,
//
// Quad-register Integer Count
@@ -449,35 +545,35 @@ def CortexA9Itineraries : ProcessorItineraries<
InstrItinData<IIC_VCNTiQ, [InstrStage<1, [A9_DRegsN], 0, Required>,
// Extra latency cycles since wbck is 7 cycles
InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
- InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+ InstrStage<1, [A9_Pipe1]>,
InstrStage<2, [A9_NPipe]>], [4, 2, 2]>,
//
// Double-register Absolute Difference and Accumulate
InstrItinData<IIC_VABAD, [InstrStage<1, [A9_DRegsN], 0, Required>,
// Extra latency cycles since wbck is 6 cycles
InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
- InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+ InstrStage<1, [A9_Pipe1]>,
InstrStage<1, [A9_NPipe]>], [6, 3, 2, 1]>,
//
// Quad-register Absolute Difference and Accumulate
InstrItinData<IIC_VABAQ, [InstrStage<1, [A9_DRegsN], 0, Required>,
// Extra latency cycles since wbck is 6 cycles
InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
- InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+ InstrStage<1, [A9_Pipe1]>,
InstrStage<2, [A9_NPipe]>], [6, 3, 2, 1]>,
//
// Double-register Integer Pair Add Long
InstrItinData<IIC_VPALiD, [InstrStage<1, [A9_DRegsN], 0, Required>,
// Extra latency cycles since wbck is 6 cycles
InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
- InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+ InstrStage<1, [A9_Pipe1]>,
InstrStage<1, [A9_NPipe]>], [6, 3, 1]>,
//
// Quad-register Integer Pair Add Long
InstrItinData<IIC_VPALiQ, [InstrStage<1, [A9_DRegsN], 0, Required>,
// Extra latency cycles since wbck is 6 cycles
InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
- InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+ InstrStage<1, [A9_Pipe1]>,
InstrStage<2, [A9_NPipe]>], [6, 3, 1]>,
//
@@ -485,14 +581,14 @@ def CortexA9Itineraries : ProcessorItineraries<
InstrItinData<IIC_VMULi16D, [InstrStage<1, [A9_DRegsN], 0, Required>,
// Extra latency cycles since wbck is 6 cycles
InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
- InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+ InstrStage<1, [A9_Pipe1]>,
InstrStage<1, [A9_NPipe]>], [6, 2, 2]>,
//
// Quad-register Integer Multiply (.8, .16)
InstrItinData<IIC_VMULi16Q, [InstrStage<1, [A9_DRegsN], 0, Required>,
// Extra latency cycles since wbck is 7 cycles
InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
- InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+ InstrStage<1, [A9_Pipe1]>,
InstrStage<2, [A9_NPipe]>], [7, 2, 2]>,
//
@@ -500,56 +596,56 @@ def CortexA9Itineraries : ProcessorItineraries<
InstrItinData<IIC_VMULi32D, [InstrStage<1, [A9_DRegsN], 0, Required>,
// Extra latency cycles since wbck is 7 cycles
InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
- InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+ InstrStage<1, [A9_Pipe1]>,
InstrStage<2, [A9_NPipe]>], [7, 2, 1]>,
//
// Quad-register Integer Multiply (.32)
InstrItinData<IIC_VMULi32Q, [InstrStage<1, [A9_DRegsN], 0, Required>,
// Extra latency cycles since wbck is 9 cycles
InstrStage<10, [A9_DRegsVFP], 0, Reserved>,
- InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+ InstrStage<1, [A9_Pipe1]>,
InstrStage<4, [A9_NPipe]>], [9, 2, 1]>,
//
// Double-register Integer Multiply-Accumulate (.8, .16)
InstrItinData<IIC_VMACi16D, [InstrStage<1, [A9_DRegsN], 0, Required>,
// Extra latency cycles since wbck is 6 cycles
InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
- InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+ InstrStage<1, [A9_Pipe1]>,
InstrStage<1, [A9_NPipe]>], [6, 3, 2, 2]>,
//
// Double-register Integer Multiply-Accumulate (.32)
InstrItinData<IIC_VMACi32D, [InstrStage<1, [A9_DRegsN], 0, Required>,
// Extra latency cycles since wbck is 7 cycles
InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
- InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+ InstrStage<1, [A9_Pipe1]>,
InstrStage<2, [A9_NPipe]>], [7, 3, 2, 1]>,
//
// Quad-register Integer Multiply-Accumulate (.8, .16)
InstrItinData<IIC_VMACi16Q, [InstrStage<1, [A9_DRegsN], 0, Required>,
// Extra latency cycles since wbck is 7 cycles
InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
- InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+ InstrStage<1, [A9_Pipe1]>,
InstrStage<2, [A9_NPipe]>], [7, 3, 2, 2]>,
//
// Quad-register Integer Multiply-Accumulate (.32)
InstrItinData<IIC_VMACi32Q, [InstrStage<1, [A9_DRegsN], 0, Required>,
// Extra latency cycles since wbck is 9 cycles
InstrStage<10, [A9_DRegsVFP], 0, Reserved>,
- InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+ InstrStage<1, [A9_Pipe1]>,
InstrStage<4, [A9_NPipe]>], [9, 3, 2, 1]>,
//
// Move Immediate
InstrItinData<IIC_VMOVImm, [InstrStage<1, [A9_DRegsN], 0, Required>,
// Extra latency cycles since wbck is 6 cycles
InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
- InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+ InstrStage<1, [A9_Pipe1]>,
InstrStage<1, [A9_NPipe]>], [3]>,
//
// Double-register Permute Move
InstrItinData<IIC_VMOVD, [InstrStage<1, [A9_DRegsN], 0, Required>,
// FIXME: all latencies are arbitrary, no information is available
InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
- InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+ InstrStage<1, [A9_Pipe1]>,
InstrStage<1, [A9_LSPipe]>], [2, 1]>,
//
// Quad-register Permute Move
@@ -558,42 +654,42 @@ def CortexA9Itineraries : ProcessorItineraries<
InstrItinData<IIC_VMOVQ, [InstrStage<1, [A9_DRegsN], 0, Required>,
// FIXME: all latencies are arbitrary, no information is available
InstrStage<4, [A9_DRegsVFP], 0, Reserved>,
- InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+ InstrStage<1, [A9_Pipe1]>,
InstrStage<2, [A9_NPipe]>], [3, 1]>,
//
// Integer to Single-precision Move
InstrItinData<IIC_VMOVIS , [InstrStage<1, [A9_DRegsN], 0, Required>,
// FIXME: all latencies are arbitrary, no information is available
InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
- InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+ InstrStage<1, [A9_Pipe1]>,
InstrStage<1, [A9_NPipe]>], [2, 1]>,
//
// Integer to Double-precision Move
InstrItinData<IIC_VMOVID , [InstrStage<1, [A9_DRegsN], 0, Required>,
// FIXME: all latencies are arbitrary, no information is available
InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
- InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+ InstrStage<1, [A9_Pipe1]>,
InstrStage<1, [A9_NPipe]>], [2, 1, 1]>,
//
// Single-precision to Integer Move
InstrItinData<IIC_VMOVSI , [InstrStage<1, [A9_DRegsN], 0, Required>,
// FIXME: all latencies are arbitrary, no information is available
InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
- InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+ InstrStage<1, [A9_Pipe1]>,
InstrStage<1, [A9_NPipe]>], [2, 1]>,
//
// Double-precision to Integer Move
InstrItinData<IIC_VMOVDI , [InstrStage<1, [A9_DRegsN], 0, Required>,
// FIXME: all latencies are arbitrary, no information is available
InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
- InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+ InstrStage<1, [A9_Pipe1]>,
InstrStage<1, [A9_NPipe]>], [2, 2, 1]>,
//
// Integer to Lane Move
InstrItinData<IIC_VMOVISL , [InstrStage<1, [A9_DRegsN], 0, Required>,
// FIXME: all latencies are arbitrary, no information is available
InstrStage<4, [A9_DRegsVFP], 0, Reserved>,
- InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+ InstrStage<1, [A9_Pipe1]>,
InstrStage<2, [A9_NPipe]>], [3, 1, 1]>,
//
@@ -601,7 +697,7 @@ def CortexA9Itineraries : ProcessorItineraries<
InstrItinData<IIC_VUNAD, [InstrStage<1, [A9_DRegsN], 0, Required>,
// Extra latency cycles since wbck is 6 cycles
InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
- InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+ InstrStage<1, [A9_Pipe1]>,
InstrStage<1, [A9_NPipe]>], [5, 2]>,
//
// Quad-register FP Unary
@@ -610,7 +706,7 @@ def CortexA9Itineraries : ProcessorItineraries<
InstrItinData<IIC_VUNAQ, [InstrStage<1, [A9_DRegsN], 0, Required>,
// Extra latency cycles since wbck is 7 cycles
InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
- InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+ InstrStage<1, [A9_Pipe1]>,
InstrStage<2, [A9_NPipe]>], [6, 2]>,
//
// Double-register FP Binary
@@ -619,7 +715,7 @@ def CortexA9Itineraries : ProcessorItineraries<
InstrItinData<IIC_VBIND, [InstrStage<1, [A9_DRegsN], 0, Required>,
// Extra latency cycles since wbck is 7 cycles
InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
- InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+ InstrStage<1, [A9_Pipe1]>,
InstrStage<1, [A9_NPipe]>], [5, 2, 2]>,
//
// Quad-register FP Binary
@@ -630,14 +726,14 @@ def CortexA9Itineraries : ProcessorItineraries<
InstrItinData<IIC_VBINQ, [InstrStage<1, [A9_DRegsN], 0, Required>,
// Extra latency cycles since wbck is 8 cycles
InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
- InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+ InstrStage<1, [A9_Pipe1]>,
InstrStage<2, [A9_NPipe]>], [6, 2, 2]>,
//
// Double-register FP Multiple-Accumulate
InstrItinData<IIC_VMACD, [InstrStage<1, [A9_DRegsN], 0, Required>,
// Extra latency cycles since wbck is 7 cycles
InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
- InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+ InstrStage<1, [A9_Pipe1]>,
InstrStage<2, [A9_NPipe]>], [6, 3, 2, 1]>,
//
// Quad-register FP Multiple-Accumulate
@@ -646,28 +742,28 @@ def CortexA9Itineraries : ProcessorItineraries<
InstrItinData<IIC_VMACQ, [InstrStage<1, [A9_DRegsN], 0, Required>,
// Extra latency cycles since wbck is 9 cycles
InstrStage<10, [A9_DRegsVFP], 0, Reserved>,
- InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+ InstrStage<1, [A9_Pipe1]>,
InstrStage<4, [A9_NPipe]>], [8, 4, 2, 1]>,
//
// Double-register Reciprical Step
InstrItinData<IIC_VRECSD, [InstrStage<1, [A9_DRegsN], 0, Required>,
// Extra latency cycles since wbck is 7 cycles
InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
- InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+ InstrStage<1, [A9_Pipe1]>,
InstrStage<2, [A9_NPipe]>], [6, 2, 2]>,
//
// Quad-register Reciprical Step
InstrItinData<IIC_VRECSQ, [InstrStage<1, [A9_DRegsN], 0, Required>,
// Extra latency cycles since wbck is 9 cycles
InstrStage<10, [A9_DRegsVFP], 0, Reserved>,
- InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+ InstrStage<1, [A9_Pipe1]>,
InstrStage<4, [A9_NPipe]>], [8, 2, 2]>,
//
// Double-register Permute
InstrItinData<IIC_VPERMD, [InstrStage<1, [A9_DRegsN], 0, Required>,
// Extra latency cycles since wbck is 6 cycles
InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
- InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+ InstrStage<1, [A9_Pipe1]>,
InstrStage<1, [A9_NPipe]>], [2, 2, 1, 1]>,
//
// Quad-register Permute
@@ -676,7 +772,7 @@ def CortexA9Itineraries : ProcessorItineraries<
InstrItinData<IIC_VPERMQ, [InstrStage<1, [A9_DRegsN], 0, Required>,
// Extra latency cycles since wbck is 7 cycles
InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
- InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+ InstrStage<1, [A9_Pipe1]>,
InstrStage<2, [A9_NPipe]>], [3, 3, 1, 1]>,
//
// Quad-register Permute (3 cycle issue)
@@ -685,7 +781,7 @@ def CortexA9Itineraries : ProcessorItineraries<
InstrItinData<IIC_VPERMQ3, [InstrStage<1, [A9_DRegsN], 0, Required>,
// Extra latency cycles since wbck is 8 cycles
InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
- InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+ InstrStage<1, [A9_Pipe1]>,
InstrStage<3, [A9_LSPipe]>], [4, 4, 1, 1]>,
//
@@ -693,57 +789,57 @@ def CortexA9Itineraries : ProcessorItineraries<
InstrItinData<IIC_VEXTD, [InstrStage<1, [A9_DRegsN], 0, Required>,
// Extra latency cycles since wbck is 7 cycles
InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
- InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+ InstrStage<1, [A9_Pipe1]>,
InstrStage<1, [A9_NPipe]>], [2, 1, 1]>,
//
// Quad-register VEXT
InstrItinData<IIC_VEXTQ, [InstrStage<1, [A9_DRegsN], 0, Required>,
// Extra latency cycles since wbck is 9 cycles
InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
- InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+ InstrStage<1, [A9_Pipe1]>,
InstrStage<2, [A9_NPipe]>], [3, 1, 1]>,
//
// VTB
InstrItinData<IIC_VTB1, [InstrStage<1, [A9_DRegsN], 0, Required>,
// Extra latency cycles since wbck is 7 cycles
InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
- InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+ InstrStage<1, [A9_Pipe1]>,
InstrStage<2, [A9_NPipe]>], [3, 2, 1]>,
InstrItinData<IIC_VTB2, [InstrStage<2, [A9_DRegsN], 0, Required>,
// Extra latency cycles since wbck is 7 cycles
InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
- InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+ InstrStage<1, [A9_Pipe1]>,
InstrStage<2, [A9_NPipe]>], [3, 2, 2, 1]>,
InstrItinData<IIC_VTB3, [InstrStage<2, [A9_DRegsN], 0, Required>,
// Extra latency cycles since wbck is 8 cycles
InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
- InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+ InstrStage<1, [A9_Pipe1]>,
InstrStage<3, [A9_NPipe]>], [4, 2, 2, 3, 1]>,
InstrItinData<IIC_VTB4, [InstrStage<1, [A9_DRegsN], 0, Required>,
// Extra latency cycles since wbck is 8 cycles
InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
- InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+ InstrStage<1, [A9_Pipe1]>,
InstrStage<3, [A9_NPipe]>], [4, 2, 2, 3, 3, 1]>,
//
// VTBX
InstrItinData<IIC_VTBX1, [InstrStage<1, [A9_DRegsN], 0, Required>,
// Extra latency cycles since wbck is 7 cycles
InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
- InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+ InstrStage<1, [A9_Pipe1]>,
InstrStage<2, [A9_NPipe]>], [3, 1, 2, 1]>,
InstrItinData<IIC_VTBX2, [InstrStage<1, [A9_DRegsN], 0, Required>,
// Extra latency cycles since wbck is 7 cycles
InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
- InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+ InstrStage<1, [A9_Pipe1]>,
InstrStage<2, [A9_NPipe]>], [3, 1, 2, 2, 1]>,
InstrItinData<IIC_VTBX3, [InstrStage<1, [A9_DRegsN], 0, Required>,
// Extra latency cycles since wbck is 8 cycles
InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
- InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+ InstrStage<1, [A9_Pipe1]>,
InstrStage<3, [A9_NPipe]>], [4, 1, 2, 2, 3, 1]>,
InstrItinData<IIC_VTBX4, [InstrStage<1, [A9_DRegsN], 0, Required>,
// Extra latency cycles since wbck is 8 cycles
InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
- InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
- InstrStage<2, [A9_NPipe]>], [4, 1, 2, 2, 3, 3, 1]>
+ InstrStage<1, [A9_Pipe1]>,
+ InstrStage<2, [A9_NPipe]>], [4, 1, 2, 2, 3, 3, 1]>
]>;
diff --git a/lib/Target/ARM/ARMScheduleV6.td b/lib/Target/ARM/ARMScheduleV6.td
index f813022d8741..08b560cc0c2f 100644
--- a/lib/Target/ARM/ARMScheduleV6.td
+++ b/lib/Target/ARM/ARMScheduleV6.td
@@ -16,7 +16,7 @@
// Functional Units
def V6_Pipe : FuncUnit; // pipeline
-// Scheduling information derived from "ARM1176JZF-S Technical Reference Manual".
+// Scheduling information derived from "ARM1176JZF-S Technical Reference Manual"
//
def ARMV6Itineraries : ProcessorItineraries<
[V6_Pipe], [
diff --git a/lib/Target/ARM/ARMTargetMachine.cpp b/lib/Target/ARM/ARMTargetMachine.cpp
index b4a9252909bc..09203f9304df 100644
--- a/lib/Target/ARM/ARMTargetMachine.cpp
+++ b/lib/Target/ARM/ARMTargetMachine.cpp
@@ -60,8 +60,10 @@ ARMTargetMachine::ARMTargetMachine(const Target &T, const std::string &TT,
const std::string &FS)
: ARMBaseTargetMachine(T, TT, FS, false), InstrInfo(Subtarget),
DataLayout(Subtarget.isAPCS_ABI() ?
- std::string("e-p:32:32-f64:32:32-i64:32:32-n32") :
- std::string("e-p:32:32-f64:64:64-i64:64:64-n32")),
+ std::string("e-p:32:32-f64:32:32-i64:32:32-"
+ "v128:32:128-v64:32:64-n32") :
+ std::string("e-p:32:32-f64:64:64-i64:64:64-"
+ "v128:64:128-v64:64:64-n32")),
TLInfo(*this),
TSInfo(*this) {
}
@@ -74,9 +76,11 @@ ThumbTargetMachine::ThumbTargetMachine(const Target &T, const std::string &TT,
: ((ARMBaseInstrInfo*)new Thumb1InstrInfo(Subtarget))),
DataLayout(Subtarget.isAPCS_ABI() ?
std::string("e-p:32:32-f64:32:32-i64:32:32-"
- "i16:16:32-i8:8:32-i1:8:32-a:0:32-n32") :
+ "i16:16:32-i8:8:32-i1:8:32-"
+ "v128:32:128-v64:32:64-a:0:32-n32") :
std::string("e-p:32:32-f64:64:64-i64:64:64-"
- "i16:16:32-i8:8:32-i1:8:32-a:0:32-n32")),
+ "i16:16:32-i8:8:32-i1:8:32-"
+ "v128:64:128-v64:64:64-a:0:32-n32")),
TLInfo(*this),
TSInfo(*this) {
}
@@ -98,6 +102,7 @@ bool ARMBaseTargetMachine::addPreRegAlloc(PassManagerBase &PM,
// FIXME: temporarily disabling load / store optimization pass for Thumb1.
if (OptLevel != CodeGenOpt::None && !Subtarget.isThumb1Only())
PM.add(createARMLoadStoreOptimizationPass(true));
+
return true;
}
@@ -115,21 +120,20 @@ bool ARMBaseTargetMachine::addPreSched2(PassManagerBase &PM,
// proper scheduling.
PM.add(createARMExpandPseudoPass());
- return true;
-}
-
-bool ARMBaseTargetMachine::addPreEmitPass(PassManagerBase &PM,
- CodeGenOpt::Level OptLevel) {
- // FIXME: temporarily disabling load / store optimization pass for Thumb1.
if (OptLevel != CodeGenOpt::None) {
if (!Subtarget.isThumb1Only())
PM.add(createIfConverterPass());
}
-
- if (Subtarget.isThumb2()) {
+ if (Subtarget.isThumb2())
PM.add(createThumb2ITBlockPass());
+
+ return true;
+}
+
+bool ARMBaseTargetMachine::addPreEmitPass(PassManagerBase &PM,
+ CodeGenOpt::Level OptLevel) {
+ if (Subtarget.isThumb2())
PM.add(createThumb2SizeReductionPass());
- }
PM.add(createARMConstantIslandPass());
return true;
diff --git a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
index bfa89c4a4696..8415d1ad8827 100644
--- a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
+++ b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
@@ -425,7 +425,7 @@ bool ARMAsmParser::ParseMemory(OwningPtr<ARMOperand> &Op) {
const AsmToken &NextTok = Parser.getTok();
if (NextTok.isNot(AsmToken::EndOfStatement)) {
if (NextTok.isNot(AsmToken::Comma))
- return Error(NextTok.getLoc(), "',' expected");
+ return Error(NextTok.getLoc(), "',' expected");
Parser.Lex(); // Eat comma token.
if(ParseMemoryOffsetReg(Negative, OffsetRegShifted, ShiftType,
ShiftAmount, Offset, OffsetIsReg, OffsetRegNum,
@@ -488,7 +488,7 @@ bool ARMAsmParser::ParseMemoryOffsetReg(bool &Negative,
const AsmToken &Tok = Parser.getTok();
if (ParseShift(ShiftType, ShiftAmount, E))
- return Error(Tok.getLoc(), "shift expected");
+ return Error(Tok.getLoc(), "shift expected");
OffsetRegShifted = true;
}
}
@@ -665,7 +665,6 @@ bool ARMAsmParser::ParseInstruction(const StringRef &Name, SMLoc NameLoc,
Operands.push_back(Op.take());
- SMLoc Loc = Parser.getTok().getLoc();
if (getLexer().isNot(AsmToken::EndOfStatement)) {
// Read the first operand.
@@ -763,15 +762,10 @@ bool ARMAsmParser::ParseDirectiveSyntax(SMLoc L) {
if (Tok.isNot(AsmToken::Identifier))
return Error(L, "unexpected token in .syntax directive");
const StringRef &Mode = Tok.getString();
- bool unified_syntax;
- if (Mode == "unified" || Mode == "UNIFIED") {
+ if (Mode == "unified" || Mode == "UNIFIED")
Parser.Lex();
- unified_syntax = true;
- }
- else if (Mode == "divided" || Mode == "DIVIDED") {
+ else if (Mode == "divided" || Mode == "DIVIDED")
Parser.Lex();
- unified_syntax = false;
- }
else
return Error(L, "unrecognized syntax mode in .syntax directive");
@@ -791,15 +785,10 @@ bool ARMAsmParser::ParseDirectiveCode(SMLoc L) {
if (Tok.isNot(AsmToken::Integer))
return Error(L, "unexpected token in .code directive");
int64_t Val = Parser.getTok().getIntVal();
- bool thumb_mode;
- if (Val == 16) {
+ if (Val == 16)
Parser.Lex();
- thumb_mode = true;
- }
- else if (Val == 32) {
+ else if (Val == 32)
Parser.Lex();
- thumb_mode = false;
- }
else
return Error(L, "invalid operand to .code directive");
diff --git a/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp b/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp
index d95efdb80943..6a40cf3602e9 100644
--- a/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp
+++ b/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp
@@ -175,23 +175,8 @@ namespace {
raw_ostream &O);
void printVFPf64ImmOperand(const MachineInstr *MI, int OpNum,
raw_ostream &O);
-
- void printHex8ImmOperand(const MachineInstr *MI, int OpNum,
- raw_ostream &O) {
- O << "#0x" << utohexstr(MI->getOperand(OpNum).getImm() & 0xff);
- }
- void printHex16ImmOperand(const MachineInstr *MI, int OpNum,
- raw_ostream &O) {
- O << "#0x" << utohexstr(MI->getOperand(OpNum).getImm() & 0xffff);
- }
- void printHex32ImmOperand(const MachineInstr *MI, int OpNum,
- raw_ostream &O) {
- O << "#0x" << utohexstr(MI->getOperand(OpNum).getImm() & 0xffffffff);
- }
- void printHex64ImmOperand(const MachineInstr *MI, int OpNum,
- raw_ostream &O) {
- O << "#0x" << utohexstr(MI->getOperand(OpNum).getImm());
- }
+ void printNEONModImmOperand(const MachineInstr *MI, int OpNum,
+ raw_ostream &O);
virtual bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNum,
unsigned AsmVariant, const char *ExtraCode,
@@ -322,7 +307,7 @@ void ARMAsmPrinter::printOperand(const MachineInstr *MI, int OpNum,
unsigned DRegLo = TM.getRegisterInfo()->getSubReg(Reg, ARM::dsub_0);
unsigned DRegHi = TM.getRegisterInfo()->getSubReg(Reg, ARM::dsub_1);
O << '{'
- << getRegisterName(DRegLo) << ',' << getRegisterName(DRegHi)
+ << getRegisterName(DRegLo) << ", " << getRegisterName(DRegHi)
<< '}';
} else if (Modifier && strcmp(Modifier, "lane") == 0) {
unsigned RegNum = ARMRegisterInfo::getRegisterNumbering(Reg);
@@ -617,8 +602,12 @@ void ARMAsmPrinter::printAddrMode6Operand(const MachineInstr *MI, int Op,
O << "[" << getRegisterName(MO1.getReg());
if (MO2.getImm()) {
+ unsigned Align = MO2.getImm();
+ assert((Align == 8 || Align == 16 || Align == 32) &&
+ "unexpected NEON load/store alignment");
+ Align <<= 3;
// FIXME: Both darwin as and GNU as violate ARM docs here.
- O << ", :" << MO2.getImm();
+ O << ", :" << Align;
}
O << "]";
}
@@ -1039,6 +1028,14 @@ void ARMAsmPrinter::printVFPf64ImmOperand(const MachineInstr *MI, int OpNum,
}
}
+void ARMAsmPrinter::printNEONModImmOperand(const MachineInstr *MI, int OpNum,
+ raw_ostream &O) {
+ unsigned EncodedImm = MI->getOperand(OpNum).getImm();
+ unsigned EltBits;
+ uint64_t Val = ARM_AM::decodeNEONModImm(EncodedImm, EltBits);
+ O << "#0x" << utohexstr(Val);
+}
+
bool ARMAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNum,
unsigned AsmVariant, const char *ExtraCode,
raw_ostream &O) {
@@ -1064,20 +1061,10 @@ bool ARMAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNum,
printOperand(MI, OpNum, O);
return false;
case 'Q':
- if (TM.getTargetData()->isLittleEndian())
- break;
- // Fallthrough
case 'R':
- if (TM.getTargetData()->isBigEndian())
- break;
- // Fallthrough
- case 'H': // Write second word of DI / DF reference.
- // Verify that this operand has two consecutive registers.
- if (!MI->getOperand(OpNum).isReg() ||
- OpNum+1 == MI->getNumOperands() ||
- !MI->getOperand(OpNum+1).isReg())
- return true;
- ++OpNum; // Return the high-part.
+ case 'H':
+ report_fatal_error("llvm does not support 'Q', 'R', and 'H' modifiers!");
+ return true;
}
}
@@ -1384,11 +1371,11 @@ void ARMAsmPrinter::printInstructionThroughMCStreamer(const MachineInstr *MI) {
} else if (MO.isGlobal()) {
MCSymbol *Symbol = MCInstLowering.GetGlobalAddressSymbol(MO);
const MCSymbolRefExpr *SymRef1 =
- MCSymbolRefExpr::Create(Symbol,
- MCSymbolRefExpr::VK_ARM_LO16, OutContext);
+ MCSymbolRefExpr::Create(Symbol,
+ MCSymbolRefExpr::VK_ARM_LO16, OutContext);
const MCSymbolRefExpr *SymRef2 =
- MCSymbolRefExpr::Create(Symbol,
- MCSymbolRefExpr::VK_ARM_HI16, OutContext);
+ MCSymbolRefExpr::Create(Symbol,
+ MCSymbolRefExpr::VK_ARM_HI16, OutContext);
V1 = MCOperand::CreateExpr(SymRef1);
V2 = MCOperand::CreateExpr(SymRef2);
} else {
diff --git a/lib/Target/ARM/AsmPrinter/ARMInstPrinter.cpp b/lib/Target/ARM/AsmPrinter/ARMInstPrinter.cpp
index 2b94b76087e7..170819ad4f06 100644
--- a/lib/Target/ARM/AsmPrinter/ARMInstPrinter.cpp
+++ b/lib/Target/ARM/AsmPrinter/ARMInstPrinter.cpp
@@ -779,22 +779,10 @@ void ARMInstPrinter::printVFPf64ImmOperand(const MCInst *MI, unsigned OpNum,
O << '#' << MI->getOperand(OpNum).getImm();
}
-void ARMInstPrinter::printHex8ImmOperand(const MCInst *MI, unsigned OpNum,
- raw_ostream &O) {
- O << "#0x" << utohexstr(MI->getOperand(OpNum).getImm() & 0xff);
-}
-
-void ARMInstPrinter::printHex16ImmOperand(const MCInst *MI, unsigned OpNum,
- raw_ostream &O) {
- O << "#0x" << utohexstr(MI->getOperand(OpNum).getImm() & 0xffff);
-}
-
-void ARMInstPrinter::printHex32ImmOperand(const MCInst *MI, unsigned OpNum,
- raw_ostream &O) {
- O << "#0x" << utohexstr(MI->getOperand(OpNum).getImm() & 0xffffffff);
-}
-
-void ARMInstPrinter::printHex64ImmOperand(const MCInst *MI, unsigned OpNum,
- raw_ostream &O) {
- O << "#0x" << utohexstr(MI->getOperand(OpNum).getImm());
+void ARMInstPrinter::printNEONModImmOperand(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O) {
+ unsigned EncodedImm = MI->getOperand(OpNum).getImm();
+ unsigned EltBits;
+ uint64_t Val = ARM_AM::decodeNEONModImm(EncodedImm, EltBits);
+ O << "#0x" << utohexstr(Val);
}
diff --git a/lib/Target/ARM/AsmPrinter/ARMInstPrinter.h b/lib/Target/ARM/AsmPrinter/ARMInstPrinter.h
index be0b7c1eb027..ddf5047793d2 100644
--- a/lib/Target/ARM/AsmPrinter/ARMInstPrinter.h
+++ b/lib/Target/ARM/AsmPrinter/ARMInstPrinter.h
@@ -104,10 +104,7 @@ public:
void printNoHashImmediate(const MCInst *MI, unsigned OpNum, raw_ostream &O);
void printVFPf32ImmOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
void printVFPf64ImmOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
- void printHex8ImmOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
- void printHex16ImmOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
- void printHex32ImmOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
- void printHex64ImmOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+ void printNEONModImmOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
void printPCLabel(const MCInst *MI, unsigned OpNum, raw_ostream &O);
// FIXME: Implement.
diff --git a/lib/Target/ARM/CMakeLists.txt b/lib/Target/ARM/CMakeLists.txt
index 29e66e13b168..0df34666b959 100644
--- a/lib/Target/ARM/CMakeLists.txt
+++ b/lib/Target/ARM/CMakeLists.txt
@@ -33,6 +33,7 @@ add_llvm_target(ARMCodeGen
NEONPreAllocPass.cpp
Thumb1InstrInfo.cpp
Thumb1RegisterInfo.cpp
+ Thumb2HazardRecognizer.cpp
Thumb2ITBlockPass.cpp
Thumb2InstrInfo.cpp
Thumb2RegisterInfo.cpp
diff --git a/lib/Target/ARM/Disassembler/ARMDisassemblerCore.cpp b/lib/Target/ARM/Disassembler/ARMDisassemblerCore.cpp
index adb7795a746c..a07ff2832aa7 100644
--- a/lib/Target/ARM/Disassembler/ARMDisassemblerCore.cpp
+++ b/lib/Target/ARM/Disassembler/ARMDisassemblerCore.cpp
@@ -34,7 +34,7 @@
/// Uses and Defs by this instr. For the Uses part, the pred:$p operand is
/// defined with two components:
///
-/// def pred { // Operand PredicateOperand
+/// def pred { // Operand PredicateOperand
/// ValueType Type = OtherVT;
/// string PrintMethod = "printPredicateOperand";
/// string AsmOperandLowerMethod = ?;
@@ -54,7 +54,7 @@
///
/// For the Defs part, in the simple case of only cc_out:$s, we have:
///
-/// def cc_out { // Operand OptionalDefOperand
+/// def cc_out { // Operand OptionalDefOperand
/// ValueType Type = OtherVT;
/// string PrintMethod = "printSBitModifierOperand";
/// string AsmOperandLowerMethod = ?;
@@ -765,7 +765,7 @@ static bool DisassembleBrFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
|| Opcode == ARM::SMC || Opcode == ARM::SVC) &&
"Unexpected Opcode");
- assert(NumOps >= 1 && OpInfo[0].RegClass == 0 && "Reg operand expected");
+ assert(NumOps >= 1 && OpInfo[0].RegClass < 0 && "Reg operand expected");
int Imm32 = 0;
if (Opcode == ARM::SMC) {
@@ -1106,7 +1106,7 @@ static bool DisassembleDPSoRegFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
assert((OpInfo[OpIdx].RegClass == ARM::GPRRegClassID) &&
(OpInfo[OpIdx+1].RegClass == ARM::GPRRegClassID) &&
- (OpInfo[OpIdx+2].RegClass == 0) &&
+ (OpInfo[OpIdx+2].RegClass < 0) &&
"Expect 3 reg operands");
// Register-controlled shifts have Inst{7} = 0 and Inst{4} = 1.
@@ -1201,7 +1201,7 @@ static bool DisassembleLdStFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
return false;
assert((OpInfo[OpIdx].RegClass == ARM::GPRRegClassID) &&
- (OpInfo[OpIdx+1].RegClass == 0) &&
+ (OpInfo[OpIdx+1].RegClass < 0) &&
"Expect 1 reg operand followed by 1 imm operand");
ARM_AM::AddrOpc AddrOpcode = getUBit(insn) ? ARM_AM::add : ARM_AM::sub;
@@ -1323,7 +1323,7 @@ static bool DisassembleLdStMiscFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
return false;
assert((OpInfo[OpIdx].RegClass == ARM::GPRRegClassID) &&
- (OpInfo[OpIdx+1].RegClass == 0) &&
+ (OpInfo[OpIdx+1].RegClass < 0) &&
"Expect 1 reg operand followed by 1 imm operand");
ARM_AM::AddrOpc AddrOpcode = getUBit(insn) ? ARM_AM::add : ARM_AM::sub;
@@ -1494,7 +1494,7 @@ static bool DisassembleArithMiscFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
// If there is still an operand info left which is an immediate operand, add
// an additional imm5 LSL/ASR operand.
- if (ThreeReg && OpInfo[OpIdx].RegClass == 0
+ if (ThreeReg && OpInfo[OpIdx].RegClass < 0
&& !OpInfo[OpIdx].isPredicate() && !OpInfo[OpIdx].isOptionalDef()) {
// Extract the 5-bit immediate field Inst{11-7}.
unsigned ShiftAmt = (insn >> ARMII::ShiftShift) & 0x1F;
@@ -1540,7 +1540,7 @@ static bool DisassembleExtFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
// If there is still an operand info left which is an immediate operand, add
// an additional rotate immediate operand.
- if (OpIdx < NumOps && OpInfo[OpIdx].RegClass == 0
+ if (OpIdx < NumOps && OpInfo[OpIdx].RegClass < 0
&& !OpInfo[OpIdx].isPredicate() && !OpInfo[OpIdx].isOptionalDef()) {
// Extract the 2-bit rotate field Inst{11-10}.
unsigned rot = (insn >> ARMII::ExtRotImmShift) & 3;
@@ -1725,7 +1725,7 @@ static bool DisassembleVFPConv1Frm(MCInst &MI, unsigned Opcode, uint32_t insn,
"Tied to operand expected");
MI.addOperand(MI.getOperand(0));
- assert(OpInfo[2].RegClass == 0 && !OpInfo[2].isPredicate() &&
+ assert(OpInfo[2].RegClass < 0 && !OpInfo[2].isPredicate() &&
!OpInfo[2].isOptionalDef() && "Imm operand expected");
MI.addOperand(MCOperand::CreateImm(fbits));
@@ -1984,7 +1984,7 @@ static bool DisassembleVFPMiscFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
++OpIdx;
// Extract/decode the f64/f32 immediate.
- if (OpIdx < NumOps && OpInfo[OpIdx].RegClass == 0
+ if (OpIdx < NumOps && OpInfo[OpIdx].RegClass < 0
&& !OpInfo[OpIdx].isPredicate() && !OpInfo[OpIdx].isOptionalDef()) {
// The asm syntax specifies the before-expanded <imm>.
// Not VFPExpandImm(slice(insn,19,16) << 4 | slice(insn, 3, 0),
@@ -2077,42 +2077,12 @@ static unsigned decodeLaneIndex(uint32_t insn) {
// imm3 = Inst{18-16}, imm4 = Inst{3-0}
// Ref: Table A7-15 Modified immediate values for Advanced SIMD instructions.
static uint64_t decodeN1VImm(uint32_t insn, ElemSize esize) {
+ unsigned char op = (insn >> 5) & 1;
unsigned char cmode = (insn >> 8) & 0xF;
unsigned char Imm8 = ((insn >> 24) & 1) << 7 |
((insn >> 16) & 7) << 4 |
(insn & 0xF);
- uint64_t Imm64 = 0;
-
- switch (esize) {
- case ESize8:
- Imm64 = Imm8;
- break;
- case ESize16:
- Imm64 = Imm8 << 8*(cmode >> 1 & 1);
- break;
- case ESize32: {
- if (cmode == 12)
- Imm64 = (Imm8 << 8) | 0xFF;
- else if (cmode == 13)
- Imm64 = (Imm8 << 16) | 0xFFFF;
- else {
- // Imm8 to be shifted left by how many bytes...
- Imm64 = Imm8 << 8*(cmode >> 1 & 3);
- }
- break;
- }
- case ESize64: {
- for (unsigned i = 0; i < 8; ++i)
- if ((Imm8 >> i) & 1)
- Imm64 |= (uint64_t)0xFF << 8*i;
- break;
- }
- default:
- assert(0 && "Unreachable code!");
- return 0;
- }
-
- return Imm64;
+ return (op << 12) | (cmode << 8) | Imm8;
}
// A8.6.339 VMUL, VMULL (by scalar)
@@ -2303,7 +2273,7 @@ static bool DisassembleNLdSt0(MCInst &MI, unsigned Opcode, uint32_t insn,
}
assert((OpIdx+1) < NumOps && OpInfo[OpIdx].RegClass == ARM::GPRRegClassID &&
- OpInfo[OpIdx + 1].RegClass == 0 && "Addrmode #6 Operands expected");
+ OpInfo[OpIdx + 1].RegClass < 0 && "Addrmode #6 Operands expected");
MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
Rn)));
MI.addOperand(MCOperand::CreateImm(0)); // Alignment ignored?
@@ -2320,7 +2290,7 @@ static bool DisassembleNLdSt0(MCInst &MI, unsigned Opcode, uint32_t insn,
"Reg operand expected");
RegClass = OpInfo[OpIdx].RegClass;
- while (OpIdx < NumOps && OpInfo[OpIdx].RegClass == RegClass) {
+ while (OpIdx < NumOps && (unsigned)OpInfo[OpIdx].RegClass == RegClass) {
MI.addOperand(MCOperand::CreateReg(
getRegisterEnum(B, RegClass, Rd,
UseDRegPair(Opcode))));
@@ -2329,7 +2299,7 @@ static bool DisassembleNLdSt0(MCInst &MI, unsigned Opcode, uint32_t insn,
}
// Handle possible lane index.
- if (OpIdx < NumOps && OpInfo[OpIdx].RegClass == 0
+ if (OpIdx < NumOps && OpInfo[OpIdx].RegClass < 0
&& !OpInfo[OpIdx].isPredicate() && !OpInfo[OpIdx].isOptionalDef()) {
MI.addOperand(MCOperand::CreateImm(decodeLaneIndex(insn)));
++OpIdx;
@@ -2340,7 +2310,7 @@ static bool DisassembleNLdSt0(MCInst &MI, unsigned Opcode, uint32_t insn,
// possible TIED_TO DPR/QPR's (ignored), then possible lane index.
RegClass = OpInfo[0].RegClass;
- while (OpIdx < NumOps && OpInfo[OpIdx].RegClass == RegClass) {
+ while (OpIdx < NumOps && (unsigned)OpInfo[OpIdx].RegClass == RegClass) {
MI.addOperand(MCOperand::CreateReg(
getRegisterEnum(B, RegClass, Rd,
UseDRegPair(Opcode))));
@@ -2355,7 +2325,7 @@ static bool DisassembleNLdSt0(MCInst &MI, unsigned Opcode, uint32_t insn,
}
assert((OpIdx+1) < NumOps && OpInfo[OpIdx].RegClass == ARM::GPRRegClassID &&
- OpInfo[OpIdx + 1].RegClass == 0 && "Addrmode #6 Operands expected");
+ OpInfo[OpIdx + 1].RegClass < 0 && "Addrmode #6 Operands expected");
MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
Rn)));
MI.addOperand(MCOperand::CreateImm(0)); // Alignment ignored?
@@ -2366,7 +2336,7 @@ static bool DisassembleNLdSt0(MCInst &MI, unsigned Opcode, uint32_t insn,
++OpIdx;
}
- while (OpIdx < NumOps && OpInfo[OpIdx].RegClass == RegClass) {
+ while (OpIdx < NumOps && (unsigned)OpInfo[OpIdx].RegClass == RegClass) {
assert(TID.getOperandConstraint(OpIdx, TOI::TIED_TO) != -1 &&
"Tied to operand expected");
MI.addOperand(MCOperand::CreateReg(0));
@@ -2374,7 +2344,7 @@ static bool DisassembleNLdSt0(MCInst &MI, unsigned Opcode, uint32_t insn,
}
// Handle possible lane index.
- if (OpIdx < NumOps && OpInfo[OpIdx].RegClass == 0
+ if (OpIdx < NumOps && OpInfo[OpIdx].RegClass < 0
&& !OpInfo[OpIdx].isPredicate() && !OpInfo[OpIdx].isOptionalDef()) {
MI.addOperand(MCOperand::CreateImm(decodeLaneIndex(insn)));
++OpIdx;
@@ -2438,7 +2408,7 @@ static bool DisassembleN1RegModImmFrm(MCInst &MI, unsigned Opcode,
assert(NumOps >= 2 &&
(OpInfo[0].RegClass == ARM::DPRRegClassID ||
OpInfo[0].RegClass == ARM::QPRRegClassID) &&
- (OpInfo[1].RegClass == 0) &&
+ (OpInfo[1].RegClass < 0) &&
"Expect 1 reg operand followed by 1 imm operand");
// Qd/Dd = Inst{22:15-12} => NEON Rd
@@ -2552,7 +2522,7 @@ static bool DisassembleNVdVmOptImm(MCInst &MI, unsigned Opc, uint32_t insn,
}
// Add the imm operand, if required.
- if (OpIdx < NumOps && OpInfo[OpIdx].RegClass == 0
+ if (OpIdx < NumOps && OpInfo[OpIdx].RegClass < 0
&& !OpInfo[OpIdx].isPredicate() && !OpInfo[OpIdx].isOptionalDef()) {
unsigned imm = 0xFFFFFFFF;
@@ -2632,7 +2602,7 @@ static bool DisassembleNVectorShift(MCInst &MI, unsigned Opcode, uint32_t insn,
decodeNEONRm(insn))));
++OpIdx;
- assert(OpInfo[OpIdx].RegClass == 0 && "Imm operand expected");
+ assert(OpInfo[OpIdx].RegClass < 0 && "Imm operand expected");
// Add the imm operand.
@@ -2762,7 +2732,7 @@ static bool DisassembleNVdVnVmOptImm(MCInst &MI, unsigned Opcode, uint32_t insn,
getRegisterEnum(B, OpInfo[OpIdx].RegClass, m)));
++OpIdx;
- if (OpIdx < NumOps && OpInfo[OpIdx].RegClass == 0
+ if (OpIdx < NumOps && OpInfo[OpIdx].RegClass < 0
&& !OpInfo[OpIdx].isPredicate() && !OpInfo[OpIdx].isOptionalDef()) {
// Add the imm operand.
unsigned Imm = 0;
@@ -2869,15 +2839,9 @@ static bool DisassembleNVTBLFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
return true;
}
-static bool DisassembleNEONFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
- unsigned short NumOps, unsigned &NumOpsAdded, BO) {
- assert(0 && "Unreachable code!");
- return false;
-}
-
// Vector Get Lane (move scalar to ARM core register) Instructions.
// VGETLNi32, VGETLNs16, VGETLNs8, VGETLNu16, VGETLNu8: Rt Dn index
-static bool DisassembleNEONGetLnFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
+static bool DisassembleNGetLnFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
const TargetInstrDesc &TID = ARMInsts[Opcode];
@@ -2887,7 +2851,7 @@ static bool DisassembleNEONGetLnFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
assert(TID.getNumDefs() == 1 && NumOps >= 3 &&
OpInfo[0].RegClass == ARM::GPRRegClassID &&
OpInfo[1].RegClass == ARM::DPRRegClassID &&
- OpInfo[2].RegClass == 0 &&
+ OpInfo[2].RegClass < 0 &&
"Expect >= 3 operands with one dst operand");
ElemSize esize =
@@ -2911,7 +2875,7 @@ static bool DisassembleNEONGetLnFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
// Vector Set Lane (move ARM core register to scalar) Instructions.
// VSETLNi16, VSETLNi32, VSETLNi8: Dd Dd (TIED_TO) Rt index
-static bool DisassembleNEONSetLnFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
+static bool DisassembleNSetLnFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
const TargetInstrDesc &TID = ARMInsts[Opcode];
@@ -2923,7 +2887,7 @@ static bool DisassembleNEONSetLnFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
OpInfo[1].RegClass == ARM::DPRRegClassID &&
TID.getOperandConstraint(1, TOI::TIED_TO) != -1 &&
OpInfo[2].RegClass == ARM::GPRRegClassID &&
- OpInfo[3].RegClass == 0 &&
+ OpInfo[3].RegClass < 0 &&
"Expect >= 3 operands with one dst operand");
ElemSize esize =
@@ -2950,7 +2914,7 @@ static bool DisassembleNEONSetLnFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
// Vector Duplicate Instructions (from ARM core register to all elements).
// VDUP8d, VDUP16d, VDUP32d, VDUP8q, VDUP16q, VDUP32q: Qd/Dd Rt
-static bool DisassembleNEONDupFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
+static bool DisassembleNDupFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
const TargetOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo;
@@ -3090,13 +3054,6 @@ static bool DisassembleMiscFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
return false;
}
-static bool DisassembleThumbMiscFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
- unsigned short NumOps, unsigned &NumOpsAdded, BO) {
-
- assert(0 && "Unexpected thumb misc. instruction!");
- return false;
-}
-
/// FuncPtrs - FuncPtrs maps ARMFormat to its corresponding DisassembleFP.
/// We divide the disassembly task into different categories, with each one
/// corresponding to a specific instruction encoding format. There could be
@@ -3128,12 +3085,10 @@ static const DisassembleFP FuncPtrs[] = {
&DisassembleVFPLdStMulFrm,
&DisassembleVFPMiscFrm,
&DisassembleThumbFrm,
- &DisassembleNEONFrm,
- &DisassembleNEONGetLnFrm,
- &DisassembleNEONSetLnFrm,
- &DisassembleNEONDupFrm,
&DisassembleMiscFrm,
- &DisassembleThumbMiscFrm,
+ &DisassembleNGetLnFrm,
+ &DisassembleNSetLnFrm,
+ &DisassembleNDupFrm,
// VLD and VST (including one lane) Instructions.
&DisassembleNLdSt,
@@ -3233,7 +3188,8 @@ bool ARMBasicMCBuilder::DoPredicateOperands(MCInst& MI, unsigned Opcode,
// a pair of TargetOperandInfos with isPredicate() property.
if (NumOpsRemaining >= 2 &&
OpInfo[Idx].isPredicate() && OpInfo[Idx+1].isPredicate() &&
- OpInfo[Idx].RegClass == 0 && OpInfo[Idx+1].RegClass == ARM::CCRRegClassID)
+ OpInfo[Idx].RegClass < 0 &&
+ OpInfo[Idx+1].RegClass == ARM::CCRRegClassID)
{
// If we are inside an IT block, get the IT condition bits maintained via
// ARMBasicMCBuilder::ITState[7:0], through ARMBasicMCBuilder::GetITCond().
@@ -3265,7 +3221,8 @@ bool ARMBasicMCBuilder::TryPredicateAndSBitModifier(MCInst& MI, unsigned Opcode,
// a pair of TargetOperandInfos with isPredicate() property.
if (NumOpsRemaining >= 2 &&
OpInfo[Idx].isPredicate() && OpInfo[Idx+1].isPredicate() &&
- OpInfo[Idx].RegClass == 0 && OpInfo[Idx+1].RegClass == ARM::CCRRegClassID)
+ OpInfo[Idx].RegClass < 0 &&
+ OpInfo[Idx+1].RegClass == ARM::CCRRegClassID)
{
// If we are inside an IT block, get the IT condition bits maintained via
// ARMBasicMCBuilder::ITState[7:0], through ARMBasicMCBuilder::GetITCond().
diff --git a/lib/Target/ARM/Disassembler/ARMDisassemblerCore.h b/lib/Target/ARM/Disassembler/ARMDisassemblerCore.h
index b1d90df34177..7d21256a14f9 100644
--- a/lib/Target/ARM/Disassembler/ARMDisassemblerCore.h
+++ b/lib/Target/ARM/Disassembler/ARMDisassemblerCore.h
@@ -137,25 +137,25 @@ static inline void setSlice(uint32_t &Bits, unsigned From, unsigned To,
/// Various utilities for checking the target specific flags.
/// A unary data processing instruction doesn't have an Rn operand.
-static inline bool isUnaryDP(unsigned TSFlags) {
+static inline bool isUnaryDP(uint64_t TSFlags) {
return (TSFlags & ARMII::UnaryDP);
}
/// This four-bit field describes the addressing mode used.
/// See also ARMBaseInstrInfo.h.
-static inline unsigned getAddrMode(unsigned TSFlags) {
+static inline unsigned getAddrMode(uint64_t TSFlags) {
return (TSFlags & ARMII::AddrModeMask);
}
/// {IndexModePre, IndexModePost}
/// Only valid for load and store ops.
/// See also ARMBaseInstrInfo.h.
-static inline unsigned getIndexMode(unsigned TSFlags) {
+static inline unsigned getIndexMode(uint64_t TSFlags) {
return (TSFlags & ARMII::IndexModeMask) >> ARMII::IndexModeShift;
}
/// Pre-/post-indexed operations define an extra $base_wb in the OutOperandList.
-static inline bool isPrePostLdSt(unsigned TSFlags) {
+static inline bool isPrePostLdSt(uint64_t TSFlags) {
return (TSFlags & ARMII::IndexModeMask) != 0;
}
diff --git a/lib/Target/ARM/Disassembler/ThumbDisassemblerCore.h b/lib/Target/ARM/Disassembler/ThumbDisassemblerCore.h
index 4b2e308248c5..4b7a0bf6fdb9 100644
--- a/lib/Target/ARM/Disassembler/ThumbDisassemblerCore.h
+++ b/lib/Target/ARM/Disassembler/ThumbDisassemblerCore.h
@@ -395,7 +395,7 @@ static bool DisassembleThumb1General(MCInst &MI, unsigned Opcode, uint32_t insn,
MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::tGPRRegClassID,
getT1tRm(insn))));
} else {
- assert(OpInfo[OpIdx].RegClass == 0 &&
+ assert(OpInfo[OpIdx].RegClass < 0 &&
!OpInfo[OpIdx].isPredicate() && !OpInfo[OpIdx].isOptionalDef()
&& "Pure imm operand expected");
MI.addOperand(MCOperand::CreateImm(UseRt ? getT1Imm8(insn)
@@ -531,7 +531,7 @@ static bool DisassembleThumb1LdPC(MCInst &MI, unsigned Opcode, uint32_t insn,
if (!OpInfo) return false;
assert(NumOps >= 2 && OpInfo[0].RegClass == ARM::tGPRRegClassID &&
- (OpInfo[1].RegClass == 0 &&
+ (OpInfo[1].RegClass < 0 &&
!OpInfo[1].isPredicate() &&
!OpInfo[1].isOptionalDef())
&& "Invalid arguments");
@@ -598,7 +598,7 @@ static bool DisassembleThumb1LdSt(unsigned opA, MCInst &MI, unsigned Opcode,
assert(OpIdx < NumOps && "More operands expected");
- if (OpInfo[OpIdx].RegClass == 0 && !OpInfo[OpIdx].isPredicate() &&
+ if (OpInfo[OpIdx].RegClass < 0 && !OpInfo[OpIdx].isPredicate() &&
!OpInfo[OpIdx].isOptionalDef()) {
MI.addOperand(MCOperand::CreateImm(Imm5 ? getT1Imm5(insn) : 0));
@@ -632,7 +632,7 @@ static bool DisassembleThumb1LdStSP(MCInst &MI, unsigned Opcode, uint32_t insn,
assert(NumOps >= 3 &&
OpInfo[0].RegClass == ARM::tGPRRegClassID &&
OpInfo[1].RegClass == ARM::GPRRegClassID &&
- (OpInfo[2].RegClass == 0 &&
+ (OpInfo[2].RegClass < 0 &&
!OpInfo[2].isPredicate() &&
!OpInfo[2].isOptionalDef())
&& "Invalid arguments");
@@ -658,7 +658,7 @@ static bool DisassembleThumb1AddPCi(MCInst &MI, unsigned Opcode, uint32_t insn,
if (!OpInfo) return false;
assert(NumOps >= 2 && OpInfo[0].RegClass == ARM::tGPRRegClassID &&
- (OpInfo[1].RegClass == 0 &&
+ (OpInfo[1].RegClass < 0 &&
!OpInfo[1].isPredicate() &&
!OpInfo[1].isOptionalDef())
&& "Invalid arguments");
@@ -685,7 +685,7 @@ static bool DisassembleThumb1AddSPi(MCInst &MI, unsigned Opcode, uint32_t insn,
assert(NumOps >= 3 &&
OpInfo[0].RegClass == ARM::tGPRRegClassID &&
OpInfo[1].RegClass == ARM::GPRRegClassID &&
- (OpInfo[2].RegClass == 0 &&
+ (OpInfo[2].RegClass < 0 &&
!OpInfo[2].isPredicate() &&
!OpInfo[2].isOptionalDef())
&& "Invalid arguments");
@@ -761,7 +761,7 @@ static bool DisassembleThumb1Misc(MCInst &MI, unsigned Opcode, uint32_t insn,
// Predicate operands are handled elsewhere.
if (NumOps == 2 &&
OpInfo[0].isPredicate() && OpInfo[1].isPredicate() &&
- OpInfo[0].RegClass == 0 && OpInfo[1].RegClass == ARM::CCRRegClassID) {
+ OpInfo[0].RegClass < 0 && OpInfo[1].RegClass == ARM::CCRRegClassID) {
return true;
}
@@ -808,7 +808,7 @@ static bool DisassembleThumb1Misc(MCInst &MI, unsigned Opcode, uint32_t insn,
}
assert(NumOps >= 2 && OpInfo[0].RegClass == ARM::tGPRRegClassID &&
- (OpInfo[1].RegClass==0 || OpInfo[1].RegClass==ARM::tGPRRegClassID)
+ (OpInfo[1].RegClass < 0 || OpInfo[1].RegClass==ARM::tGPRRegClassID)
&& "Expect >=2 operands");
// Add the destination operand.
@@ -913,7 +913,7 @@ static bool DisassembleThumb1CondBr(MCInst &MI, unsigned Opcode, uint32_t insn,
const TargetOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo;
if (!OpInfo) return false;
- assert(NumOps == 3 && OpInfo[0].RegClass == 0 &&
+ assert(NumOps == 3 && OpInfo[0].RegClass < 0 &&
OpInfo[1].isPredicate() && OpInfo[2].RegClass == ARM::CCRRegClassID
&& "Exactly 3 operands expected");
@@ -939,7 +939,7 @@ static bool DisassembleThumb1Br(MCInst &MI, unsigned Opcode, uint32_t insn,
const TargetOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo;
if (!OpInfo) return false;
- assert(NumOps == 1 && OpInfo[0].RegClass == 0 && "1 imm operand expected");
+ assert(NumOps == 1 && OpInfo[0].RegClass < 0 && "1 imm operand expected");
unsigned Imm11 = getT1Imm11(insn);
@@ -1239,7 +1239,7 @@ static bool DisassembleThumb2LdStDual(MCInst &MI, unsigned Opcode,
&& OpInfo[0].RegClass == ARM::GPRRegClassID
&& OpInfo[1].RegClass == ARM::GPRRegClassID
&& OpInfo[2].RegClass == ARM::GPRRegClassID
- && OpInfo[3].RegClass == 0
+ && OpInfo[3].RegClass < 0
&& "Expect >= 4 operands and first 3 as reg operands");
// Add the <Rt> <Rt2> operands.
@@ -1322,8 +1322,8 @@ static bool DisassembleThumb2DPSoReg(MCInst &MI, unsigned Opcode, uint32_t insn,
assert(NumOps == 4
&& OpInfo[0].RegClass == ARM::GPRRegClassID
&& OpInfo[1].RegClass == ARM::GPRRegClassID
- && OpInfo[2].RegClass == 0
- && OpInfo[3].RegClass == 0
+ && OpInfo[2].RegClass < 0
+ && OpInfo[3].RegClass < 0
&& "Exactlt 4 operands expect and first two as reg operands");
// Only need to populate the src reg operand.
MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
@@ -1375,7 +1375,7 @@ static bool DisassembleThumb2DPSoReg(MCInst &MI, unsigned Opcode, uint32_t insn,
if (NumOps == OpIdx)
return true;
- if (OpInfo[OpIdx].RegClass == 0 && !OpInfo[OpIdx].isPredicate()
+ if (OpInfo[OpIdx].RegClass < 0 && !OpInfo[OpIdx].isPredicate()
&& !OpInfo[OpIdx].isOptionalDef()) {
if (Thumb2ShiftOpcode(Opcode))
@@ -1440,7 +1440,7 @@ static bool DisassembleThumb2DPModImm(MCInst &MI, unsigned Opcode,
}
// The modified immediate operand should come next.
- assert(OpIdx < NumOps && OpInfo[OpIdx].RegClass == 0 &&
+ assert(OpIdx < NumOps && OpInfo[OpIdx].RegClass < 0 &&
!OpInfo[OpIdx].isPredicate() && !OpInfo[OpIdx].isOptionalDef()
&& "Pure imm operand expected");
@@ -1555,7 +1555,7 @@ static bool DisassembleThumb2DPBinImm(MCInst &MI, unsigned Opcode,
++OpIdx;
}
- assert(OpInfo[OpIdx].RegClass == 0 && !OpInfo[OpIdx].isPredicate()
+ assert(OpInfo[OpIdx].RegClass < 0 && !OpInfo[OpIdx].isPredicate()
&& !OpInfo[OpIdx].isOptionalDef()
&& "Pure imm operand expected");
@@ -1772,7 +1772,7 @@ static bool DisassembleThumb2PreLoad(MCInst &MI, unsigned Opcode, uint32_t insn,
MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
decodeRm(insn))));
} else {
- assert(OpInfo[OpIdx].RegClass == 0 && !OpInfo[OpIdx].isPredicate()
+ assert(OpInfo[OpIdx].RegClass < 0 && !OpInfo[OpIdx].isPredicate()
&& !OpInfo[OpIdx].isOptionalDef()
&& "Pure imm operand expected");
int Offset = 0;
@@ -1792,7 +1792,7 @@ static bool DisassembleThumb2PreLoad(MCInst &MI, unsigned Opcode, uint32_t insn,
}
++OpIdx;
- if (OpIdx < NumOps && OpInfo[OpIdx].RegClass == 0 &&
+ if (OpIdx < NumOps && OpInfo[OpIdx].RegClass < 0 &&
!OpInfo[OpIdx].isPredicate() && !OpInfo[OpIdx].isOptionalDef()) {
// Fills in the shift amount for t2PLDs, t2PLDWs, t2PLIs.
MI.addOperand(MCOperand::CreateImm(slice(insn, 5, 4)));
@@ -1818,7 +1818,7 @@ static bool DisassembleThumb2Ldpci(MCInst &MI, unsigned Opcode,
assert(NumOps >= 2 &&
OpInfo[0].RegClass == ARM::GPRRegClassID &&
- OpInfo[1].RegClass == 0 &&
+ OpInfo[1].RegClass < 0 &&
"Expect >= 2 operands, first as reg, and second as imm operand");
// Build the register operand, followed by the (+/-)imm12 immediate.
@@ -1930,7 +1930,7 @@ static bool DisassembleThumb2LdSt(bool Load, MCInst &MI, unsigned Opcode,
++OpIdx;
}
- assert(OpInfo[OpIdx].RegClass == 0 && !OpInfo[OpIdx].isPredicate()
+ assert(OpInfo[OpIdx].RegClass < 0 && !OpInfo[OpIdx].isPredicate()
&& !OpInfo[OpIdx].isOptionalDef()
&& "Pure imm operand expected");
@@ -1981,7 +1981,7 @@ static bool DisassembleThumb2DPReg(MCInst &MI, unsigned Opcode, uint32_t insn,
decodeRm(insn))));
++OpIdx;
- if (OpIdx < NumOps && OpInfo[OpIdx].RegClass == 0
+ if (OpIdx < NumOps && OpInfo[OpIdx].RegClass < 0
&& !OpInfo[OpIdx].isPredicate() && !OpInfo[OpIdx].isOptionalDef()) {
// Add the rotation amount immediate.
MI.addOperand(MCOperand::CreateImm(decodeRotate(insn)));
diff --git a/lib/Target/ARM/NEONMoveFix.cpp b/lib/Target/ARM/NEONMoveFix.cpp
index 0a4400cc7ddd..bbdd3c7f7c3e 100644
--- a/lib/Target/ARM/NEONMoveFix.cpp
+++ b/lib/Target/ARM/NEONMoveFix.cpp
@@ -105,8 +105,8 @@ bool NEONMoveFixPass::InsertMoves(MachineBasicBlock &MBB) {
unsigned MOReg = MO.getReg();
Defs[MOReg] = MI;
- // Catch subregs as well.
- for (const unsigned *R = TRI->getSubRegisters(MOReg); *R; ++R)
+ // Catch aliases as well.
+ for (const unsigned *R = TRI->getAliasSet(MOReg); *R; ++R)
Defs[*R] = MI;
}
}
diff --git a/lib/Target/ARM/NEONPreAllocPass.cpp b/lib/Target/ARM/NEONPreAllocPass.cpp
index a7258985e104..f67717cdd56f 100644
--- a/lib/Target/ARM/NEONPreAllocPass.cpp
+++ b/lib/Target/ARM/NEONPreAllocPass.cpp
@@ -407,7 +407,7 @@ NEONPreAllocPass::FormsRegSequence(MachineInstr *MI,
"expected a virtual register");
// Extracting from a Q or QQ register.
MachineInstr *DefMI = MRI->getVRegDef(VirtReg);
- if (!DefMI || !DefMI->isExtractSubreg())
+ if (!DefMI || !DefMI->isCopy() || !DefMI->getOperand(1).getSubReg())
return false;
VirtReg = DefMI->getOperand(1).getReg();
if (LastSrcReg && LastSrcReg != VirtReg)
@@ -418,7 +418,7 @@ NEONPreAllocPass::FormsRegSequence(MachineInstr *MI,
RC != ARM::QQPRRegisterClass &&
RC != ARM::QQQQPRRegisterClass)
return false;
- unsigned SubIdx = DefMI->getOperand(2).getImm();
+ unsigned SubIdx = DefMI->getOperand(1).getSubReg();
if (LastSubIdx) {
if (LastSubIdx != SubIdx-Stride)
return false;
@@ -434,22 +434,21 @@ NEONPreAllocPass::FormsRegSequence(MachineInstr *MI,
// FIXME: Update the uses of EXTRACT_SUBREG from REG_SEQUENCE is
// currently required for correctness. e.g.
- // %reg1041;<def> = REG_SEQUENCE %reg1040<kill>, 5, %reg1035<kill>, 6
+ // %reg1041<def> = REG_SEQUENCE %reg1040<kill>, 5, %reg1035<kill>, 6
// %reg1042<def> = EXTRACT_SUBREG %reg1041, 6
// %reg1043<def> = EXTRACT_SUBREG %reg1041, 5
// VST1q16 %reg1025<kill>, 0, %reg1043<kill>, %reg1042<kill>,
- // reg1025 and reg1043 should be replaced with reg1041:6 and reg1041:5
+ // reg1042 and reg1043 should be replaced with reg1041:6 and reg1041:5
// respectively.
// We need to change how we model uses of REG_SEQUENCE.
for (unsigned R = 0; R < NumRegs; ++R) {
MachineOperand &MO = MI->getOperand(FirstOpnd + R);
unsigned OldReg = MO.getReg();
MachineInstr *DefMI = MRI->getVRegDef(OldReg);
- assert(DefMI->isExtractSubreg());
+ assert(DefMI->isCopy());
MO.setReg(LastSrcReg);
MO.setSubReg(SubIds[R]);
- if (R != 0)
- MO.setIsKill(false);
+ MO.setIsKill(false);
// Delete the EXTRACT_SUBREG if its result is now dead.
if (MRI->use_empty(OldReg))
DefMI->eraseFromParent();
@@ -467,43 +466,9 @@ bool NEONPreAllocPass::PreAllocNEONRegisters(MachineBasicBlock &MBB) {
unsigned FirstOpnd, NumRegs, Offset, Stride;
if (!isNEONMultiRegOp(MI->getOpcode(), FirstOpnd, NumRegs, Offset, Stride))
continue;
- if (llvm::ModelWithRegSequence() &&
- FormsRegSequence(MI, FirstOpnd, NumRegs, Offset, Stride))
+ if (FormsRegSequence(MI, FirstOpnd, NumRegs, Offset, Stride))
continue;
-
- MachineBasicBlock::iterator NextI = llvm::next(MBBI);
- for (unsigned R = 0; R < NumRegs; ++R) {
- MachineOperand &MO = MI->getOperand(FirstOpnd + R);
- assert(MO.isReg() && MO.getSubReg() == 0 && "unexpected operand");
- unsigned VirtReg = MO.getReg();
- assert(TargetRegisterInfo::isVirtualRegister(VirtReg) &&
- "expected a virtual register");
-
- // For now, just assign a fixed set of adjacent registers.
- // This leaves plenty of room for future improvements.
- static const unsigned NEONDRegs[] = {
- ARM::D0, ARM::D1, ARM::D2, ARM::D3,
- ARM::D4, ARM::D5, ARM::D6, ARM::D7
- };
- MO.setReg(NEONDRegs[Offset + R * Stride]);
-
- if (MO.isUse()) {
- // Insert a copy from VirtReg.
- TII->copyRegToReg(MBB, MBBI, MO.getReg(), VirtReg,
- ARM::DPRRegisterClass, ARM::DPRRegisterClass,
- DebugLoc());
- if (MO.isKill()) {
- MachineInstr *CopyMI = prior(MBBI);
- CopyMI->findRegisterUseOperand(VirtReg)->setIsKill();
- }
- MO.setIsKill();
- } else if (MO.isDef() && !MO.isDead()) {
- // Add a copy to VirtReg.
- TII->copyRegToReg(MBB, NextI, VirtReg, MO.getReg(),
- ARM::DPRRegisterClass, ARM::DPRRegisterClass,
- DebugLoc());
- }
- }
+ llvm_unreachable("expected a REG_SEQUENCE");
}
return Modified;
diff --git a/lib/Target/ARM/Thumb1InstrInfo.cpp b/lib/Target/ARM/Thumb1InstrInfo.cpp
index fae84d4ee022..af630ac797c5 100644
--- a/lib/Target/ARM/Thumb1InstrInfo.cpp
+++ b/lib/Target/ARM/Thumb1InstrInfo.cpp
@@ -33,64 +33,24 @@ unsigned Thumb1InstrInfo::getUnindexedOpcode(unsigned Opc) const {
return 0;
}
-bool Thumb1InstrInfo::copyRegToReg(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator I,
- unsigned DestReg, unsigned SrcReg,
- const TargetRegisterClass *DestRC,
- const TargetRegisterClass *SrcRC,
- DebugLoc DL) const {
- if (DestRC == ARM::GPRRegisterClass) {
- if (SrcRC == ARM::GPRRegisterClass) {
- BuildMI(MBB, I, DL, get(ARM::tMOVgpr2gpr), DestReg).addReg(SrcReg);
- return true;
- } else if (SrcRC == ARM::tGPRRegisterClass) {
- BuildMI(MBB, I, DL, get(ARM::tMOVtgpr2gpr), DestReg).addReg(SrcReg);
- return true;
- }
- } else if (DestRC == ARM::tGPRRegisterClass) {
- if (SrcRC == ARM::GPRRegisterClass) {
- BuildMI(MBB, I, DL, get(ARM::tMOVgpr2tgpr), DestReg).addReg(SrcReg);
- return true;
- } else if (SrcRC == ARM::tGPRRegisterClass) {
- BuildMI(MBB, I, DL, get(ARM::tMOVr), DestReg).addReg(SrcReg);
- return true;
- }
- }
-
- return false;
-}
-
-bool Thumb1InstrInfo::
-canFoldMemoryOperand(const MachineInstr *MI,
- const SmallVectorImpl<unsigned> &Ops) const {
- if (Ops.size() != 1) return false;
-
- unsigned OpNum = Ops[0];
- unsigned Opc = MI->getOpcode();
- switch (Opc) {
- default: break;
- case ARM::tMOVr:
- case ARM::tMOVtgpr2gpr:
- case ARM::tMOVgpr2tgpr:
- case ARM::tMOVgpr2gpr: {
- if (OpNum == 0) { // move -> store
- unsigned SrcReg = MI->getOperand(1).getReg();
- if (TargetRegisterInfo::isPhysicalRegister(SrcReg) &&
- !isARMLowRegister(SrcReg))
- // tSpill cannot take a high register operand.
- return false;
- } else { // move -> load
- unsigned DstReg = MI->getOperand(0).getReg();
- if (TargetRegisterInfo::isPhysicalRegister(DstReg) &&
- !isARMLowRegister(DstReg))
- // tRestore cannot target a high register operand.
- return false;
- }
- return true;
- }
- }
-
- return false;
+void Thumb1InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I, DebugLoc DL,
+ unsigned DestReg, unsigned SrcReg,
+ bool KillSrc) const {
+ bool tDest = ARM::tGPRRegClass.contains(DestReg);
+ bool tSrc = ARM::tGPRRegClass.contains(SrcReg);
+ unsigned Opc = ARM::tMOVgpr2gpr;
+ if (tDest && tSrc)
+ Opc = ARM::tMOVr;
+ else if (tSrc)
+ Opc = ARM::tMOVtgpr2gpr;
+ else if (tDest)
+ Opc = ARM::tMOVgpr2tgpr;
+
+ BuildMI(MBB, I, DL, get(Opc), DestReg)
+ .addReg(SrcReg, getKillRegState(KillSrc));
+ assert(ARM::GPRRegClass.contains(DestReg, SrcReg) &&
+ "Thumb1 can only copy GPR registers");
}
void Thumb1InstrInfo::
@@ -175,10 +135,10 @@ spillCalleeSavedRegisters(MachineBasicBlock &MBB,
isKill = false;
}
- if (isKill) {
+ if (isKill)
MBB.addLiveIn(Reg);
- MIB.addReg(Reg, RegState::Kill);
- }
+
+ MIB.addReg(Reg, getKillRegState(isKill));
}
return true;
}
@@ -221,46 +181,3 @@ restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
return true;
}
-
-MachineInstr *Thumb1InstrInfo::
-foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI,
- const SmallVectorImpl<unsigned> &Ops, int FI) const {
- if (Ops.size() != 1) return NULL;
-
- unsigned OpNum = Ops[0];
- unsigned Opc = MI->getOpcode();
- MachineInstr *NewMI = NULL;
- switch (Opc) {
- default: break;
- case ARM::tMOVr:
- case ARM::tMOVtgpr2gpr:
- case ARM::tMOVgpr2tgpr:
- case ARM::tMOVgpr2gpr: {
- if (OpNum == 0) { // move -> store
- unsigned SrcReg = MI->getOperand(1).getReg();
- bool isKill = MI->getOperand(1).isKill();
- if (TargetRegisterInfo::isPhysicalRegister(SrcReg) &&
- !isARMLowRegister(SrcReg))
- // tSpill cannot take a high register operand.
- break;
- NewMI = AddDefaultPred(BuildMI(MF, MI->getDebugLoc(), get(ARM::tSpill))
- .addReg(SrcReg, getKillRegState(isKill))
- .addFrameIndex(FI).addImm(0));
- } else { // move -> load
- unsigned DstReg = MI->getOperand(0).getReg();
- if (TargetRegisterInfo::isPhysicalRegister(DstReg) &&
- !isARMLowRegister(DstReg))
- // tRestore cannot target a high register operand.
- break;
- bool isDead = MI->getOperand(0).isDead();
- NewMI = AddDefaultPred(BuildMI(MF, MI->getDebugLoc(), get(ARM::tRestore))
- .addReg(DstReg,
- RegState::Define | getDeadRegState(isDead))
- .addFrameIndex(FI).addImm(0));
- }
- break;
- }
- }
-
- return NewMI;
-}
diff --git a/lib/Target/ARM/Thumb1InstrInfo.h b/lib/Target/ARM/Thumb1InstrInfo.h
index c937296bbee1..555135a8b76c 100644
--- a/lib/Target/ARM/Thumb1InstrInfo.h
+++ b/lib/Target/ARM/Thumb1InstrInfo.h
@@ -46,12 +46,10 @@ public:
const std::vector<CalleeSavedInfo> &CSI,
const TargetRegisterInfo *TRI) const;
- bool copyRegToReg(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator I,
- unsigned DestReg, unsigned SrcReg,
- const TargetRegisterClass *DestRC,
- const TargetRegisterClass *SrcRC,
- DebugLoc DL) const;
+ void copyPhysReg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I, DebugLoc DL,
+ unsigned DestReg, unsigned SrcReg,
+ bool KillSrc) const;
void storeRegToStackSlot(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI,
unsigned SrcReg, bool isKill, int FrameIndex,
@@ -64,20 +62,6 @@ public:
const TargetRegisterClass *RC,
const TargetRegisterInfo *TRI) const;
- bool canFoldMemoryOperand(const MachineInstr *MI,
- const SmallVectorImpl<unsigned> &Ops) const;
-
- MachineInstr* foldMemoryOperandImpl(MachineFunction &MF,
- MachineInstr* MI,
- const SmallVectorImpl<unsigned> &Ops,
- int FrameIndex) const;
-
- MachineInstr* foldMemoryOperandImpl(MachineFunction &MF,
- MachineInstr* MI,
- const SmallVectorImpl<unsigned> &Ops,
- MachineInstr* LoadMI) const {
- return 0;
- }
};
}
diff --git a/lib/Target/ARM/Thumb1RegisterInfo.cpp b/lib/Target/ARM/Thumb1RegisterInfo.cpp
index 2f635fe50ad5..39b70b43b23f 100644
--- a/lib/Target/ARM/Thumb1RegisterInfo.cpp
+++ b/lib/Target/ARM/Thumb1RegisterInfo.cpp
@@ -68,21 +68,6 @@ void Thumb1RegisterInfo::emitLoadConstPool(MachineBasicBlock &MBB,
.addConstantPoolIndex(Idx).addImm(Pred).addReg(PredReg);
}
-const TargetRegisterClass*
-Thumb1RegisterInfo::getPhysicalRegisterRegClass(unsigned Reg, EVT VT) const {
- if (isARMLowRegister(Reg))
- return ARM::tGPRRegisterClass;
- switch (Reg) {
- default:
- break;
- case ARM::R8: case ARM::R9: case ARM::R10: case ARM::R11:
- case ARM::R12: case ARM::SP: case ARM::LR: case ARM::PC:
- return ARM::GPRRegisterClass;
- }
-
- return TargetRegisterInfo::getPhysicalRegisterRegClass(Reg, VT);
-}
-
bool Thumb1RegisterInfo::hasReservedCallFrame(MachineFunction &MF) const {
const MachineFrameInfo *FFI = MF.getFrameInfo();
unsigned CFSize = FFI->getMaxCallFrameSize();
@@ -410,6 +395,8 @@ Thumb1RegisterInfo::saveScavengerRegister(MachineBasicBlock &MBB,
// before that instead and adjust the UseMI.
bool done = false;
for (MachineBasicBlock::iterator II = I; !done && II != UseMI ; ++II) {
+ if (II->isDebugValue())
+ continue;
// If this instruction affects R12, adjust our restore point.
for (unsigned i = 0, e = II->getNumOperands(); i != e; ++i) {
const MachineOperand &MO = II->getOperand(i);
diff --git a/lib/Target/ARM/Thumb1RegisterInfo.h b/lib/Target/ARM/Thumb1RegisterInfo.h
index 4eca3673391f..9a0308afa20c 100644
--- a/lib/Target/ARM/Thumb1RegisterInfo.h
+++ b/lib/Target/ARM/Thumb1RegisterInfo.h
@@ -38,9 +38,6 @@ public:
unsigned PredReg = 0) const;
/// Code Generation virtual methods...
- const TargetRegisterClass *
- getPhysicalRegisterRegClass(unsigned Reg, EVT VT = MVT::Other) const;
-
bool hasReservedCallFrame(MachineFunction &MF) const;
void eliminateCallFramePseudoInstr(MachineFunction &MF,
@@ -51,7 +48,8 @@ public:
// could not be handled directly in MI.
int rewriteFrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
unsigned FrameReg, int Offset,
- unsigned MOVOpc, unsigned ADDriOpc, unsigned SUBriOpc) const;
+ unsigned MOVOpc, unsigned ADDriOpc,
+ unsigned SUBriOpc) const;
bool saveScavengerRegister(MachineBasicBlock &MBB,
MachineBasicBlock::iterator I,
diff --git a/lib/Target/ARM/Thumb2HazardRecognizer.cpp b/lib/Target/ARM/Thumb2HazardRecognizer.cpp
new file mode 100644
index 000000000000..172908da228a
--- /dev/null
+++ b/lib/Target/ARM/Thumb2HazardRecognizer.cpp
@@ -0,0 +1,53 @@
+//===-- Thumb2HazardRecognizer.cpp - Thumb2 postra hazard recognizer ------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "ARM.h"
+#include "Thumb2HazardRecognizer.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/ScheduleDAG.h"
+using namespace llvm;
+
+ScheduleHazardRecognizer::HazardType
+Thumb2HazardRecognizer::getHazardType(SUnit *SU) {
+ if (ITBlockSize) {
+ MachineInstr *MI = SU->getInstr();
+ if (!MI->isDebugValue() && MI != ITBlockMIs[ITBlockSize-1])
+ return Hazard;
+ }
+
+ return PostRAHazardRecognizer::getHazardType(SU);
+}
+
+void Thumb2HazardRecognizer::Reset() {
+ ITBlockSize = 0;
+ PostRAHazardRecognizer::Reset();
+}
+
+void Thumb2HazardRecognizer::EmitInstruction(SUnit *SU) {
+ MachineInstr *MI = SU->getInstr();
+ unsigned Opcode = MI->getOpcode();
+ if (ITBlockSize) {
+ --ITBlockSize;
+ } else if (Opcode == ARM::t2IT) {
+ unsigned Mask = MI->getOperand(1).getImm();
+ unsigned NumTZ = CountTrailingZeros_32(Mask);
+ assert(NumTZ <= 3 && "Invalid IT mask!");
+ ITBlockSize = 4 - NumTZ;
+ MachineBasicBlock::iterator I = MI;
+ for (unsigned i = 0; i < ITBlockSize; ++i) {
+ // Advance to the next instruction, skipping any dbg_value instructions.
+ do {
+ ++I;
+ } while (I->isDebugValue());
+ ITBlockMIs[ITBlockSize-1-i] = &*I;
+ }
+ }
+
+ PostRAHazardRecognizer::EmitInstruction(SU);
+}
diff --git a/lib/Target/ARM/Thumb2HazardRecognizer.h b/lib/Target/ARM/Thumb2HazardRecognizer.h
new file mode 100644
index 000000000000..472665862e41
--- /dev/null
+++ b/lib/Target/ARM/Thumb2HazardRecognizer.h
@@ -0,0 +1,40 @@
+//===-- Thumb2HazardRecognizer.h - Thumb2 Hazard Recognizers ----*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines hazard recognizers for scheduling Thumb2 functions on
+// ARM processors.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef THUMB2HAZARDRECOGNIZER_H
+#define THUMB2HAZARDRECOGNIZER_H
+
+#include "llvm/CodeGen/PostRAHazardRecognizer.h"
+
+namespace llvm {
+
+class MachineInstr;
+
+class Thumb2HazardRecognizer : public PostRAHazardRecognizer {
+ unsigned ITBlockSize; // No. of MIs in current IT block yet to be scheduled.
+ MachineInstr *ITBlockMIs[4];
+
+public:
+ Thumb2HazardRecognizer(const InstrItineraryData &ItinData) :
+ PostRAHazardRecognizer(ItinData) {}
+
+ virtual HazardType getHazardType(SUnit *SU);
+ virtual void Reset();
+ virtual void EmitInstruction(SUnit *SU);
+};
+
+
+} // end namespace llvm
+
+#endif // THUMB2HAZARDRECOGNIZER_H
diff --git a/lib/Target/ARM/Thumb2ITBlockPass.cpp b/lib/Target/ARM/Thumb2ITBlockPass.cpp
index f36d4ef7567e..cd15bbed9f23 100644
--- a/lib/Target/ARM/Thumb2ITBlockPass.cpp
+++ b/lib/Target/ARM/Thumb2ITBlockPass.cpp
@@ -14,17 +14,23 @@
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/Statistic.h"
using namespace llvm;
-STATISTIC(NumITs, "Number of IT blocks inserted");
+STATISTIC(NumITs, "Number of IT blocks inserted");
+STATISTIC(NumMovedInsts, "Number of predicated instructions moved");
namespace {
- struct Thumb2ITBlockPass : public MachineFunctionPass {
+ class Thumb2ITBlockPass : public MachineFunctionPass {
+ bool PreRegAlloc;
+
+ public:
static char ID;
Thumb2ITBlockPass() : MachineFunctionPass(&ID) {}
const Thumb2InstrInfo *TII;
+ const TargetRegisterInfo *TRI;
ARMFunctionInfo *AFI;
virtual bool runOnMachineFunction(MachineFunction &Fn);
@@ -34,61 +40,167 @@ namespace {
}
private:
- bool InsertITBlocks(MachineBasicBlock &MBB);
+ bool MoveCopyOutOfITBlock(MachineInstr *MI,
+ ARMCC::CondCodes CC, ARMCC::CondCodes OCC,
+ SmallSet<unsigned, 4> &Defs,
+ SmallSet<unsigned, 4> &Uses);
+ bool InsertITInstructions(MachineBasicBlock &MBB);
};
char Thumb2ITBlockPass::ID = 0;
}
-static ARMCC::CondCodes getPredicate(const MachineInstr *MI, unsigned &PredReg){
- unsigned Opc = MI->getOpcode();
- if (Opc == ARM::tBcc || Opc == ARM::t2Bcc)
- return ARMCC::AL;
- return llvm::getInstrPredicate(MI, PredReg);
+/// TrackDefUses - Tracking what registers are being defined and used by
+/// instructions in the IT block. This also tracks "dependencies", i.e. uses
+/// in the IT block that are defined before the IT instruction.
+static void TrackDefUses(MachineInstr *MI,
+ SmallSet<unsigned, 4> &Defs,
+ SmallSet<unsigned, 4> &Uses,
+ const TargetRegisterInfo *TRI) {
+ SmallVector<unsigned, 4> LocalDefs;
+ SmallVector<unsigned, 4> LocalUses;
+
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isReg())
+ continue;
+ unsigned Reg = MO.getReg();
+ if (!Reg || Reg == ARM::ITSTATE || Reg == ARM::SP)
+ continue;
+ if (MO.isUse())
+ LocalUses.push_back(Reg);
+ else
+ LocalDefs.push_back(Reg);
+ }
+
+ for (unsigned i = 0, e = LocalUses.size(); i != e; ++i) {
+ unsigned Reg = LocalUses[i];
+ Uses.insert(Reg);
+ for (const unsigned *Subreg = TRI->getSubRegisters(Reg);
+ *Subreg; ++Subreg)
+ Uses.insert(*Subreg);
+ }
+
+ for (unsigned i = 0, e = LocalDefs.size(); i != e; ++i) {
+ unsigned Reg = LocalDefs[i];
+ Defs.insert(Reg);
+ for (const unsigned *Subreg = TRI->getSubRegisters(Reg);
+ *Subreg; ++Subreg)
+ Defs.insert(*Subreg);
+ if (Reg == ARM::CPSR)
+ continue;
+ }
+}
+
+bool
+Thumb2ITBlockPass::MoveCopyOutOfITBlock(MachineInstr *MI,
+ ARMCC::CondCodes CC, ARMCC::CondCodes OCC,
+ SmallSet<unsigned, 4> &Defs,
+ SmallSet<unsigned, 4> &Uses) {
+ unsigned SrcReg, DstReg, SrcSubIdx, DstSubIdx;
+ if (TII->isMoveInstr(*MI, SrcReg, DstReg, SrcSubIdx, DstSubIdx)) {
+ assert(SrcSubIdx == 0 && DstSubIdx == 0 &&
+ "Sub-register indices still around?");
+ // llvm models select's as two-address instructions. That means a copy
+ // is inserted before a t2MOVccr, etc. If the copy is scheduled in
+ // between selects we would end up creating multiple IT blocks.
+
+ // First check if it's safe to move it.
+ if (Uses.count(DstReg) || Defs.count(SrcReg))
+ return false;
+
+ // Then peek at the next instruction to see if it's predicated on CC or OCC.
+ // If not, then there is nothing to be gained by moving the copy.
+ MachineBasicBlock::iterator I = MI; ++I;
+ MachineBasicBlock::iterator E = MI->getParent()->end();
+ while (I != E && I->isDebugValue())
+ ++I;
+ if (I != E) {
+ unsigned NPredReg = 0;
+ ARMCC::CondCodes NCC = llvm::getITInstrPredicate(I, NPredReg);
+ if (NCC == CC || NCC == OCC)
+ return true;
+ }
+ }
+ return false;
}
-bool Thumb2ITBlockPass::InsertITBlocks(MachineBasicBlock &MBB) {
+bool Thumb2ITBlockPass::InsertITInstructions(MachineBasicBlock &MBB) {
bool Modified = false;
+ SmallSet<unsigned, 4> Defs;
+ SmallSet<unsigned, 4> Uses;
MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
while (MBBI != E) {
MachineInstr *MI = &*MBBI;
DebugLoc dl = MI->getDebugLoc();
unsigned PredReg = 0;
- ARMCC::CondCodes CC = getPredicate(MI, PredReg);
-
+ ARMCC::CondCodes CC = llvm::getITInstrPredicate(MI, PredReg);
if (CC == ARMCC::AL) {
++MBBI;
continue;
}
+ Defs.clear();
+ Uses.clear();
+ TrackDefUses(MI, Defs, Uses, TRI);
+
// Insert an IT instruction.
MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII->get(ARM::t2IT))
.addImm(CC);
+
+ // Add implicit use of ITSTATE to IT block instructions.
+ MI->addOperand(MachineOperand::CreateReg(ARM::ITSTATE, false/*ifDef*/,
+ true/*isImp*/, false/*isKill*/));
+
+ MachineInstr *LastITMI = MI;
+ MachineBasicBlock::iterator InsertPos = MIB;
++MBBI;
- // Finalize IT mask.
+ // Form IT block.
ARMCC::CondCodes OCC = ARMCC::getOppositeCondition(CC);
unsigned Mask = 0, Pos = 3;
// Branches, including tricky ones like LDM_RET, need to end an IT
// block so check the instruction we just put in the block.
- while (MBBI != E && Pos &&
- (!MI->getDesc().isBranch() && !MI->getDesc().isReturn())) {
+ for (; MBBI != E && Pos &&
+ (!MI->getDesc().isBranch() && !MI->getDesc().isReturn()) ; ++MBBI) {
+ if (MBBI->isDebugValue())
+ continue;
+
MachineInstr *NMI = &*MBBI;
MI = NMI;
- DebugLoc ndl = NMI->getDebugLoc();
+
unsigned NPredReg = 0;
- ARMCC::CondCodes NCC = getPredicate(NMI, NPredReg);
- if (NCC == CC || NCC == OCC)
+ ARMCC::CondCodes NCC = llvm::getITInstrPredicate(NMI, NPredReg);
+ if (NCC == CC || NCC == OCC) {
Mask |= (NCC & 1) << Pos;
- else
+ // Add implicit use of ITSTATE.
+ NMI->addOperand(MachineOperand::CreateReg(ARM::ITSTATE, false/*ifDef*/,
+ true/*isImp*/, false/*isKill*/));
+ LastITMI = NMI;
+ } else {
+ if (NCC == ARMCC::AL &&
+ MoveCopyOutOfITBlock(NMI, CC, OCC, Defs, Uses)) {
+ --MBBI;
+ MBB.remove(NMI);
+ MBB.insert(InsertPos, NMI);
+ ++NumMovedInsts;
+ continue;
+ }
break;
+ }
+ TrackDefUses(NMI, Defs, Uses, TRI);
--Pos;
- ++MBBI;
}
+
+ // Finalize IT mask.
Mask |= (1 << Pos);
// Tag along (firstcond[0] << 4) with the mask.
Mask |= (CC & 1) << 4;
MIB.addImm(Mask);
+
+ // Last instruction in IT block kills ITSTATE.
+ LastITMI->findRegisterUseOperand(ARM::ITSTATE)->setIsKill();
+
Modified = true;
++NumITs;
}
@@ -100,17 +212,21 @@ bool Thumb2ITBlockPass::runOnMachineFunction(MachineFunction &Fn) {
const TargetMachine &TM = Fn.getTarget();
AFI = Fn.getInfo<ARMFunctionInfo>();
TII = static_cast<const Thumb2InstrInfo*>(TM.getInstrInfo());
+ TRI = TM.getRegisterInfo();
if (!AFI->isThumbFunction())
return false;
bool Modified = false;
- for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
- ++MFI) {
+ for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E; ) {
MachineBasicBlock &MBB = *MFI;
- Modified |= InsertITBlocks(MBB);
+ ++MFI;
+ Modified |= InsertITInstructions(MBB);
}
+ if (Modified)
+ AFI->setHasITBlocks(true);
+
return Modified;
}
diff --git a/lib/Target/ARM/Thumb2InstrInfo.cpp b/lib/Target/ARM/Thumb2InstrInfo.cpp
index 531d5e9f147e..ee517279c9d7 100644
--- a/lib/Target/ARM/Thumb2InstrInfo.cpp
+++ b/lib/Target/ARM/Thumb2InstrInfo.cpp
@@ -17,15 +17,27 @@
#include "ARMAddressingModes.h"
#include "ARMGenInstrInfo.inc"
#include "ARMMachineFunctionInfo.h"
+#include "Thumb2HazardRecognizer.h"
+#include "Thumb2InstrInfo.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineMemOperand.h"
#include "llvm/CodeGen/PseudoSourceValue.h"
#include "llvm/ADT/SmallVector.h"
-#include "Thumb2InstrInfo.h"
+#include "llvm/Support/CommandLine.h"
using namespace llvm;
+static cl::opt<unsigned>
+IfCvtLimit("thumb2-ifcvt-limit", cl::Hidden,
+ cl::desc("Thumb2 if-conversion limit (default 3)"),
+ cl::init(3));
+
+static cl::opt<unsigned>
+IfCvtDiamondLimit("thumb2-ifcvt-diamond-limit", cl::Hidden,
+ cl::desc("Thumb2 diamond if-conversion limit (default 3)"),
+ cl::init(3));
+
Thumb2InstrInfo::Thumb2InstrInfo(const ARMSubtarget &STI)
: ARMBaseInstrInfo(STI), RI(*this, STI) {
}
@@ -35,33 +47,99 @@ unsigned Thumb2InstrInfo::getUnindexedOpcode(unsigned Opc) const {
return 0;
}
-bool
-Thumb2InstrInfo::copyRegToReg(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator I,
- unsigned DestReg, unsigned SrcReg,
- const TargetRegisterClass *DestRC,
- const TargetRegisterClass *SrcRC,
- DebugLoc DL) const {
- if (DestRC == ARM::GPRRegisterClass) {
- if (SrcRC == ARM::GPRRegisterClass) {
- BuildMI(MBB, I, DL, get(ARM::tMOVgpr2gpr), DestReg).addReg(SrcReg);
- return true;
- } else if (SrcRC == ARM::tGPRRegisterClass) {
- BuildMI(MBB, I, DL, get(ARM::tMOVtgpr2gpr), DestReg).addReg(SrcReg);
- return true;
- }
- } else if (DestRC == ARM::tGPRRegisterClass) {
- if (SrcRC == ARM::GPRRegisterClass) {
- BuildMI(MBB, I, DL, get(ARM::tMOVgpr2tgpr), DestReg).addReg(SrcReg);
- return true;
- } else if (SrcRC == ARM::tGPRRegisterClass) {
- BuildMI(MBB, I, DL, get(ARM::tMOVr), DestReg).addReg(SrcReg);
- return true;
+void
+Thumb2InstrInfo::ReplaceTailWithBranchTo(MachineBasicBlock::iterator Tail,
+ MachineBasicBlock *NewDest) const {
+ MachineBasicBlock *MBB = Tail->getParent();
+ ARMFunctionInfo *AFI = MBB->getParent()->getInfo<ARMFunctionInfo>();
+ if (!AFI->hasITBlocks()) {
+ TargetInstrInfoImpl::ReplaceTailWithBranchTo(Tail, NewDest);
+ return;
+ }
+
+ // If the first instruction of Tail is predicated, we may have to update
+ // the IT instruction.
+ unsigned PredReg = 0;
+ ARMCC::CondCodes CC = llvm::getInstrPredicate(Tail, PredReg);
+ MachineBasicBlock::iterator MBBI = Tail;
+ if (CC != ARMCC::AL)
+ // Expecting at least the t2IT instruction before it.
+ --MBBI;
+
+ // Actually replace the tail.
+ TargetInstrInfoImpl::ReplaceTailWithBranchTo(Tail, NewDest);
+
+ // Fix up IT.
+ if (CC != ARMCC::AL) {
+ MachineBasicBlock::iterator E = MBB->begin();
+ unsigned Count = 4; // At most 4 instructions in an IT block.
+ while (Count && MBBI != E) {
+ if (MBBI->isDebugValue()) {
+ --MBBI;
+ continue;
+ }
+ if (MBBI->getOpcode() == ARM::t2IT) {
+ unsigned Mask = MBBI->getOperand(1).getImm();
+ if (Count == 4)
+ MBBI->eraseFromParent();
+ else {
+ unsigned MaskOn = 1 << Count;
+ unsigned MaskOff = ~(MaskOn - 1);
+ MBBI->getOperand(1).setImm((Mask & MaskOff) | MaskOn);
+ }
+ return;
+ }
+ --MBBI;
+ --Count;
}
+
+ // Ctrl flow can reach here if branch folding is run before IT block
+ // formation pass.
}
+}
+
+bool
+Thumb2InstrInfo::isLegalToSplitMBBAt(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI) const {
+ unsigned PredReg = 0;
+ return llvm::getITInstrPredicate(MBBI, PredReg) == ARMCC::AL;
+}
+bool Thumb2InstrInfo::isProfitableToIfCvt(MachineBasicBlock &MBB,
+ unsigned NumInstrs) const {
+ return NumInstrs && NumInstrs <= IfCvtLimit;
+}
+
+bool Thumb2InstrInfo::
+isProfitableToIfCvt(MachineBasicBlock &TMBB, unsigned NumT,
+ MachineBasicBlock &FMBB, unsigned NumF) const {
+ // FIXME: Catch optimization such as:
+ // r0 = movne
+ // r0 = moveq
+ return NumT && NumF &&
+ NumT <= (IfCvtDiamondLimit) && NumF <= (IfCvtDiamondLimit);
+}
+
+void Thumb2InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I, DebugLoc DL,
+ unsigned DestReg, unsigned SrcReg,
+ bool KillSrc) const {
// Handle SPR, DPR, and QPR copies.
- return ARMBaseInstrInfo::copyRegToReg(MBB, I, DestReg, SrcReg, DestRC, SrcRC, DL);
+ if (!ARM::GPRRegClass.contains(DestReg, SrcReg))
+ return ARMBaseInstrInfo::copyPhysReg(MBB, I, DL, DestReg, SrcReg, KillSrc);
+
+ bool tDest = ARM::tGPRRegClass.contains(DestReg);
+ bool tSrc = ARM::tGPRRegClass.contains(SrcReg);
+ unsigned Opc = ARM::tMOVgpr2gpr;
+ if (tDest && tSrc)
+ Opc = ARM::tMOVr;
+ else if (tSrc)
+ Opc = ARM::tMOVtgpr2gpr;
+ else if (tDest)
+ Opc = ARM::tMOVgpr2tgpr;
+
+ BuildMI(MBB, I, DL, get(Opc), DestReg)
+ .addReg(SrcReg, getKillRegState(KillSrc));
}
void Thumb2InstrInfo::
@@ -69,7 +147,8 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
unsigned SrcReg, bool isKill, int FI,
const TargetRegisterClass *RC,
const TargetRegisterInfo *TRI) const {
- if (RC == ARM::GPRRegisterClass || RC == ARM::tGPRRegisterClass) {
+ if (RC == ARM::GPRRegisterClass || RC == ARM::tGPRRegisterClass ||
+ RC == ARM::tcGPRRegisterClass) {
DebugLoc DL;
if (I != MBB.end()) DL = I->getDebugLoc();
@@ -94,7 +173,8 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
unsigned DestReg, int FI,
const TargetRegisterClass *RC,
const TargetRegisterInfo *TRI) const {
- if (RC == ARM::GPRRegisterClass || RC == ARM::tGPRRegisterClass) {
+ if (RC == ARM::GPRRegisterClass || RC == ARM::tGPRRegisterClass ||
+ RC == ARM::tcGPRRegisterClass) {
DebugLoc DL;
if (I != MBB.end()) DL = I->getDebugLoc();
@@ -113,6 +193,11 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
ARMBaseInstrInfo::loadRegFromStackSlot(MBB, I, DestReg, FI, RC, TRI);
}
+ScheduleHazardRecognizer *Thumb2InstrInfo::
+CreateTargetPostRAHazardRecognizer(const InstrItineraryData &II) const {
+ return (ScheduleHazardRecognizer *)new Thumb2HazardRecognizer(II);
+}
+
void llvm::emitT2RegPlusImmediate(MachineBasicBlock &MBB,
MachineBasicBlock::iterator &MBBI, DebugLoc dl,
unsigned DestReg, unsigned BaseReg, int NumBytes,
@@ -131,14 +216,14 @@ void llvm::emitT2RegPlusImmediate(MachineBasicBlock &MBB,
// Use a movw to materialize the 16-bit constant.
BuildMI(MBB, MBBI, dl, TII.get(ARM::t2MOVi16), DestReg)
.addImm(NumBytes)
- .addImm((unsigned)Pred).addReg(PredReg).addReg(0);
+ .addImm((unsigned)Pred).addReg(PredReg);
Fits = true;
} else if ((NumBytes & 0xffff) == 0) {
// Use a movt to materialize the 32-bit constant.
BuildMI(MBB, MBBI, dl, TII.get(ARM::t2MOVTi16), DestReg)
.addReg(DestReg)
.addImm(NumBytes >> 16)
- .addImm((unsigned)Pred).addReg(PredReg).addReg(0);
+ .addImm((unsigned)Pred).addReg(PredReg);
Fits = true;
}
@@ -502,3 +587,54 @@ bool llvm::rewriteT2FrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
Offset = (isSub) ? -Offset : Offset;
return Offset == 0;
}
+
+/// scheduleTwoAddrSource - Schedule the copy / re-mat of the source of the
+/// two-addrss instruction inserted by two-address pass.
+void
+Thumb2InstrInfo::scheduleTwoAddrSource(MachineInstr *SrcMI,
+ MachineInstr *UseMI,
+ const TargetRegisterInfo &TRI) const {
+ if (SrcMI->getOpcode() != ARM::tMOVgpr2gpr ||
+ SrcMI->getOperand(1).isKill())
+ return;
+
+ unsigned PredReg = 0;
+ ARMCC::CondCodes CC = llvm::getInstrPredicate(UseMI, PredReg);
+ if (CC == ARMCC::AL || PredReg != ARM::CPSR)
+ return;
+
+ // Schedule the copy so it doesn't come between previous instructions
+ // and UseMI which can form an IT block.
+ unsigned SrcReg = SrcMI->getOperand(1).getReg();
+ ARMCC::CondCodes OCC = ARMCC::getOppositeCondition(CC);
+ MachineBasicBlock *MBB = UseMI->getParent();
+ MachineBasicBlock::iterator MBBI = SrcMI;
+ unsigned NumInsts = 0;
+ while (--MBBI != MBB->begin()) {
+ if (MBBI->isDebugValue())
+ continue;
+
+ MachineInstr *NMI = &*MBBI;
+ ARMCC::CondCodes NCC = llvm::getInstrPredicate(NMI, PredReg);
+ if (!(NCC == CC || NCC == OCC) ||
+ NMI->modifiesRegister(SrcReg, &TRI) ||
+ NMI->definesRegister(ARM::CPSR))
+ break;
+ if (++NumInsts == 4)
+ // Too many in a row!
+ return;
+ }
+
+ if (NumInsts) {
+ MBB->remove(SrcMI);
+ MBB->insert(++MBBI, SrcMI);
+ }
+}
+
+ARMCC::CondCodes
+llvm::getITInstrPredicate(const MachineInstr *MI, unsigned &PredReg) {
+ unsigned Opc = MI->getOpcode();
+ if (Opc == ARM::tBcc || Opc == ARM::t2Bcc)
+ return ARMCC::AL;
+ return llvm::getInstrPredicate(MI, PredReg);
+}
diff --git a/lib/Target/ARM/Thumb2InstrInfo.h b/lib/Target/ARM/Thumb2InstrInfo.h
index 29487700d19b..3a9f8b194d3c 100644
--- a/lib/Target/ARM/Thumb2InstrInfo.h
+++ b/lib/Target/ARM/Thumb2InstrInfo.h
@@ -20,7 +20,8 @@
#include "Thumb2RegisterInfo.h"
namespace llvm {
- class ARMSubtarget;
+class ARMSubtarget;
+class ScheduleHazardRecognizer;
class Thumb2InstrInfo : public ARMBaseInstrInfo {
Thumb2RegisterInfo RI;
@@ -31,12 +32,21 @@ public:
// if there is not such an opcode.
unsigned getUnindexedOpcode(unsigned Opc) const;
- bool copyRegToReg(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator I,
- unsigned DestReg, unsigned SrcReg,
- const TargetRegisterClass *DestRC,
- const TargetRegisterClass *SrcRC,
- DebugLoc DL) const;
+ void ReplaceTailWithBranchTo(MachineBasicBlock::iterator Tail,
+ MachineBasicBlock *NewDest) const;
+
+ bool isLegalToSplitMBBAt(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI) const;
+
+ bool isProfitableToIfCvt(MachineBasicBlock &MBB, unsigned NumInstrs) const;
+
+ bool isProfitableToIfCvt(MachineBasicBlock &TMBB, unsigned NumTInstrs,
+ MachineBasicBlock &FMBB, unsigned NumFInstrs) const;
+
+ void copyPhysReg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I, DebugLoc DL,
+ unsigned DestReg, unsigned SrcReg,
+ bool KillSrc) const;
void storeRegToStackSlot(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI,
@@ -50,12 +60,27 @@ public:
const TargetRegisterClass *RC,
const TargetRegisterInfo *TRI) const;
+ /// scheduleTwoAddrSource - Schedule the copy / re-mat of the source of the
+ /// two-addrss instruction inserted by two-address pass.
+ void scheduleTwoAddrSource(MachineInstr *SrcMI, MachineInstr *UseMI,
+ const TargetRegisterInfo &TRI) const;
+
/// getRegisterInfo - TargetInstrInfo is a superset of MRegister info. As
/// such, whenever a client has an instance of instruction info, it should
/// always be able to get register info as well (through this method).
///
const Thumb2RegisterInfo &getRegisterInfo() const { return RI; }
+
+ ScheduleHazardRecognizer *
+ CreateTargetPostRAHazardRecognizer(const InstrItineraryData &II) const;
};
+
+/// getITInstrPredicate - Valid only in Thumb2 mode. This function is identical
+/// to llvm::getInstrPredicate except it returns AL for conditional branch
+/// instructions which are "predicated", but are not in IT blocks.
+ARMCC::CondCodes getITInstrPredicate(const MachineInstr *MI, unsigned &PredReg);
+
+
}
#endif // THUMB2INSTRUCTIONINFO_H
diff --git a/lib/Target/ARM/Thumb2SizeReduction.cpp b/lib/Target/ARM/Thumb2SizeReduction.cpp
index 8fe2e42a7cdd..ba392f36d946 100644
--- a/lib/Target/ARM/Thumb2SizeReduction.cpp
+++ b/lib/Target/ARM/Thumb2SizeReduction.cpp
@@ -451,11 +451,18 @@ Thumb2SizeReduce::ReduceTo2Addr(MachineBasicBlock &MBB, MachineInstr *MI,
if (ReduceLimit2Addr != -1 && ((int)Num2Addrs >= ReduceLimit2Addr))
return false;
- const TargetInstrDesc &TID = MI->getDesc();
unsigned Reg0 = MI->getOperand(0).getReg();
unsigned Reg1 = MI->getOperand(1).getReg();
- if (Reg0 != Reg1)
- return false;
+ if (Reg0 != Reg1) {
+ // Try to commute the operands to make it a 2-address instruction.
+ unsigned CommOpIdx1, CommOpIdx2;
+ if (!TII->findCommutedOpIndices(MI, CommOpIdx1, CommOpIdx2) ||
+ CommOpIdx1 != 1 || MI->getOperand(CommOpIdx2).getReg() != Reg0)
+ return false;
+ MachineInstr *CommutedMI = TII->commuteInstruction(MI);
+ if (!CommutedMI)
+ return false;
+ }
if (Entry.LowRegs2 && !isARMLowRegister(Reg0))
return false;
if (Entry.Imm2Limit) {
@@ -484,6 +491,7 @@ Thumb2SizeReduce::ReduceTo2Addr(MachineBasicBlock &MBB, MachineInstr *MI,
bool HasCC = false;
bool CCDead = false;
+ const TargetInstrDesc &TID = MI->getDesc();
if (TID.hasOptionalDef()) {
unsigned NumOps = TID.getNumOperands();
HasCC = (MI->getOperand(NumOps-1).getReg() == ARM::CPSR);
@@ -689,7 +697,7 @@ bool Thumb2SizeReduce::ReduceMBB(MachineBasicBlock &MBB) {
goto ProcessNext;
}
- // Try to transform ro a 16-bit non-two-address instruction.
+ // Try to transform to a 16-bit non-two-address instruction.
if (Entry.NarrowOpc1 && ReduceToNarrow(MBB, MI, Entry, LiveCPSR)) {
Modified = true;
MachineBasicBlock::iterator I = prior(NextMII);