aboutsummaryrefslogtreecommitdiff
path: root/lib/Target/ARM/ARMBaseInstrInfo.cpp
diff options
context:
space:
mode:
authorDimitry Andric <dim@FreeBSD.org>2012-08-15 19:34:23 +0000
committerDimitry Andric <dim@FreeBSD.org>2012-08-15 19:34:23 +0000
commit58b69754af0cbff56b1cfce9be9392e4451f6628 (patch)
treeeacfc83d988e4b9d11114387ae7dc41243f2a363 /lib/Target/ARM/ARMBaseInstrInfo.cpp
parent0378662f5bd3dbe8305a485b0282bceb8b52f465 (diff)
downloadsrc-58b69754af0cbff56b1cfce9be9392e4451f6628.tar.gz
src-58b69754af0cbff56b1cfce9be9392e4451f6628.zip
Vendor import of llvm trunk r161861:vendor/llvm/llvm-trunk-r161861
Notes
Notes: svn path=/vendor/llvm/dist/; revision=239310 svn path=/vendor/llvm/llvm-trunk-r161861/; revision=239311; tag=vendor/llvm/llvm-trunk-r161861
Diffstat (limited to 'lib/Target/ARM/ARMBaseInstrInfo.cpp')
-rw-r--r--lib/Target/ARM/ARMBaseInstrInfo.cpp540
1 files changed, 368 insertions, 172 deletions
diff --git a/lib/Target/ARM/ARMBaseInstrInfo.cpp b/lib/Target/ARM/ARMBaseInstrInfo.cpp
index c6280f819a4f..057fd718fdb5 100644
--- a/lib/Target/ARM/ARMBaseInstrInfo.cpp
+++ b/lib/Target/ARM/ARMBaseInstrInfo.cpp
@@ -51,9 +51,9 @@ WidenVMOVS("widen-vmovs", cl::Hidden, cl::init(true),
/// ARM_MLxEntry - Record information about MLA / MLS instructions.
struct ARM_MLxEntry {
- unsigned MLxOpc; // MLA / MLS opcode
- unsigned MulOpc; // Expanded multiplication opcode
- unsigned AddSubOpc; // Expanded add / sub opcode
+ uint16_t MLxOpc; // MLA / MLS opcode
+ uint16_t MulOpc; // Expanded multiplication opcode
+ uint16_t AddSubOpc; // Expanded add / sub opcode
bool NegAcc; // True if the acc is negated before the add / sub.
bool HasLane; // True if instruction has an extra "lane" operand.
};
@@ -795,8 +795,28 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
} else
llvm_unreachable("Unknown reg class!");
break;
+ case 24:
+ if (ARM::DTripleRegClass.hasSubClassEq(RC)) {
+ // Use aligned spills if the stack can be realigned.
+ if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) {
+ AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VST1d64TPseudo))
+ .addFrameIndex(FI).addImm(16)
+ .addReg(SrcReg, getKillRegState(isKill))
+ .addMemOperand(MMO));
+ } else {
+ MachineInstrBuilder MIB =
+ AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VSTMDIA))
+ .addFrameIndex(FI))
+ .addMemOperand(MMO);
+ MIB = AddDReg(MIB, SrcReg, ARM::dsub_0, getKillRegState(isKill), TRI);
+ MIB = AddDReg(MIB, SrcReg, ARM::dsub_1, 0, TRI);
+ AddDReg(MIB, SrcReg, ARM::dsub_2, 0, TRI);
+ }
+ } else
+ llvm_unreachable("Unknown reg class!");
+ break;
case 32:
- if (ARM::QQPRRegClass.hasSubClassEq(RC)) {
+ if (ARM::QQPRRegClass.hasSubClassEq(RC) || ARM::DQuadRegClass.hasSubClassEq(RC)) {
if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) {
// FIXME: It's possible to only store part of the QQ register if the
// spilled def has a sub-register index.
@@ -868,6 +888,8 @@ ARMBaseInstrInfo::isStoreToStackSlot(const MachineInstr *MI,
}
break;
case ARM::VST1q64:
+ case ARM::VST1d64TPseudo:
+ case ARM::VST1d64QPseudo:
if (MI->getOperand(0).isFI() &&
MI->getOperand(2).getSubReg() == 0) {
FrameIndex = MI->getOperand(0).getIndex();
@@ -942,8 +964,28 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
} else
llvm_unreachable("Unknown reg class!");
break;
- case 32:
- if (ARM::QQPRRegClass.hasSubClassEq(RC)) {
+ case 24:
+ if (ARM::DTripleRegClass.hasSubClassEq(RC)) {
+ if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) {
+ AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLD1d64TPseudo), DestReg)
+ .addFrameIndex(FI).addImm(16)
+ .addMemOperand(MMO));
+ } else {
+ MachineInstrBuilder MIB =
+ AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDMDIA))
+ .addFrameIndex(FI)
+ .addMemOperand(MMO));
+ MIB = AddDReg(MIB, DestReg, ARM::dsub_0, RegState::DefineNoRead, TRI);
+ MIB = AddDReg(MIB, DestReg, ARM::dsub_1, RegState::DefineNoRead, TRI);
+ MIB = AddDReg(MIB, DestReg, ARM::dsub_2, RegState::DefineNoRead, TRI);
+ if (TargetRegisterInfo::isPhysicalRegister(DestReg))
+ MIB.addReg(DestReg, RegState::ImplicitDefine);
+ }
+ } else
+ llvm_unreachable("Unknown reg class!");
+ break;
+ case 32:
+ if (ARM::QQPRRegClass.hasSubClassEq(RC) || ARM::DQuadRegClass.hasSubClassEq(RC)) {
if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) {
AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLD1d64QPseudo), DestReg)
.addFrameIndex(FI).addImm(16)
@@ -1016,6 +1058,8 @@ ARMBaseInstrInfo::isLoadFromStackSlot(const MachineInstr *MI,
}
break;
case ARM::VLD1q64:
+ case ARM::VLD1d64TPseudo:
+ case ARM::VLD1d64QPseudo:
if (MI->getOperand(1).isFI() &&
MI->getOperand(0).getSubReg() == 0) {
FrameIndex = MI->getOperand(1).getIndex();
@@ -1531,11 +1575,11 @@ ARMBaseInstrInfo::commuteInstruction(MachineInstr *MI, bool NewMI) const {
/// This will go away once we can teach tblgen how to set the optional CPSR def
/// operand itself.
struct AddSubFlagsOpcodePair {
- unsigned PseudoOpc;
- unsigned MachineOpc;
+ uint16_t PseudoOpc;
+ uint16_t MachineOpc;
};
-static AddSubFlagsOpcodePair AddSubFlagsOpcodeMap[] = {
+static const AddSubFlagsOpcodePair AddSubFlagsOpcodeMap[] = {
{ARM::ADDSri, ARM::ADDri},
{ARM::ADDSrr, ARM::ADDrr},
{ARM::ADDSrsi, ARM::ADDrsi},
@@ -1563,14 +1607,9 @@ static AddSubFlagsOpcodePair AddSubFlagsOpcodeMap[] = {
};
unsigned llvm::convertAddSubFlagsOpcode(unsigned OldOpc) {
- static const int NPairs =
- sizeof(AddSubFlagsOpcodeMap) / sizeof(AddSubFlagsOpcodePair);
- for (AddSubFlagsOpcodePair *OpcPair = &AddSubFlagsOpcodeMap[0],
- *End = &AddSubFlagsOpcodeMap[NPairs]; OpcPair != End; ++OpcPair) {
- if (OldOpc == OpcPair->PseudoOpc) {
- return OpcPair->MachineOpc;
- }
- }
+ for (unsigned i = 0, e = array_lengthof(AddSubFlagsOpcodeMap); i != e; ++i)
+ if (OldOpc == AddSubFlagsOpcodeMap[i].PseudoOpc)
+ return AddSubFlagsOpcodeMap[i].MachineOpc;
return 0;
}
@@ -1742,20 +1781,33 @@ bool llvm::rewriteARMFrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
return Offset == 0;
}
+/// analyzeCompare - For a comparison instruction, return the source registers
+/// in SrcReg and SrcReg2 if having two register operands, and the value it
+/// compares against in CmpValue. Return true if the comparison instruction
+/// can be analyzed.
bool ARMBaseInstrInfo::
-AnalyzeCompare(const MachineInstr *MI, unsigned &SrcReg, int &CmpMask,
- int &CmpValue) const {
+analyzeCompare(const MachineInstr *MI, unsigned &SrcReg, unsigned &SrcReg2,
+ int &CmpMask, int &CmpValue) const {
switch (MI->getOpcode()) {
default: break;
case ARM::CMPri:
case ARM::t2CMPri:
SrcReg = MI->getOperand(0).getReg();
+ SrcReg2 = 0;
CmpMask = ~0;
CmpValue = MI->getOperand(1).getImm();
return true;
+ case ARM::CMPrr:
+ case ARM::t2CMPrr:
+ SrcReg = MI->getOperand(0).getReg();
+ SrcReg2 = MI->getOperand(1).getReg();
+ CmpMask = ~0;
+ CmpValue = 0;
+ return true;
case ARM::TSTri:
case ARM::t2TSTri:
SrcReg = MI->getOperand(0).getReg();
+ SrcReg2 = 0;
CmpMask = MI->getOperand(1).getImm();
CmpValue = 0;
return true;
@@ -1793,20 +1845,67 @@ static bool isSuitableForMask(MachineInstr *&MI, unsigned SrcReg,
return false;
}
-/// OptimizeCompareInstr - Convert the instruction supplying the argument to the
-/// comparison into one that sets the zero bit in the flags register.
-bool ARMBaseInstrInfo::
-OptimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg, int CmpMask,
- int CmpValue, const MachineRegisterInfo *MRI) const {
- if (CmpValue != 0)
- return false;
+/// getSwappedCondition - assume the flags are set by MI(a,b), return
+/// the condition code if we modify the instructions such that flags are
+/// set by MI(b,a).
+inline static ARMCC::CondCodes getSwappedCondition(ARMCC::CondCodes CC) {
+ switch (CC) {
+ default: return ARMCC::AL;
+ case ARMCC::EQ: return ARMCC::EQ;
+ case ARMCC::NE: return ARMCC::NE;
+ case ARMCC::HS: return ARMCC::LS;
+ case ARMCC::LO: return ARMCC::HI;
+ case ARMCC::HI: return ARMCC::LO;
+ case ARMCC::LS: return ARMCC::HS;
+ case ARMCC::GE: return ARMCC::LE;
+ case ARMCC::LT: return ARMCC::GT;
+ case ARMCC::GT: return ARMCC::LT;
+ case ARMCC::LE: return ARMCC::GE;
+ }
+}
+
+/// isRedundantFlagInstr - check whether the first instruction, whose only
+/// purpose is to update flags, can be made redundant.
+/// CMPrr can be made redundant by SUBrr if the operands are the same.
+/// CMPri can be made redundant by SUBri if the operands are the same.
+/// This function can be extended later on.
+inline static bool isRedundantFlagInstr(MachineInstr *CmpI, unsigned SrcReg,
+ unsigned SrcReg2, int ImmValue,
+ MachineInstr *OI) {
+ if ((CmpI->getOpcode() == ARM::CMPrr ||
+ CmpI->getOpcode() == ARM::t2CMPrr) &&
+ (OI->getOpcode() == ARM::SUBrr ||
+ OI->getOpcode() == ARM::t2SUBrr) &&
+ ((OI->getOperand(1).getReg() == SrcReg &&
+ OI->getOperand(2).getReg() == SrcReg2) ||
+ (OI->getOperand(1).getReg() == SrcReg2 &&
+ OI->getOperand(2).getReg() == SrcReg)))
+ return true;
- MachineRegisterInfo::def_iterator DI = MRI->def_begin(SrcReg);
- if (llvm::next(DI) != MRI->def_end())
- // Only support one definition.
- return false;
+ if ((CmpI->getOpcode() == ARM::CMPri ||
+ CmpI->getOpcode() == ARM::t2CMPri) &&
+ (OI->getOpcode() == ARM::SUBri ||
+ OI->getOpcode() == ARM::t2SUBri) &&
+ OI->getOperand(1).getReg() == SrcReg &&
+ OI->getOperand(2).getImm() == ImmValue)
+ return true;
+ return false;
+}
- MachineInstr *MI = &*DI;
+/// optimizeCompareInstr - Convert the instruction supplying the argument to the
+/// comparison into one that sets the zero bit in the flags register;
+/// Remove a redundant Compare instruction if an earlier instruction can set the
+/// flags in the same way as Compare.
+/// E.g. SUBrr(r1,r2) and CMPrr(r1,r2). We also handle the case where two
+/// operands are swapped: SUBrr(r1,r2) and CMPrr(r2,r1), by updating the
+/// condition code of instructions which use the flags.
+bool ARMBaseInstrInfo::
+optimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg, unsigned SrcReg2,
+ int CmpMask, int CmpValue,
+ const MachineRegisterInfo *MRI) const {
+ // Get the unique definition of SrcReg.
+ MachineInstr *MI = MRI->getUniqueVRegDef(SrcReg);
+ if (!MI) return false;
// Masked compares sometimes use the same register as the corresponding 'and'.
if (CmpMask != ~0) {
@@ -1825,32 +1924,49 @@ OptimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg, int CmpMask,
}
}
- // Conservatively refuse to convert an instruction which isn't in the same BB
- // as the comparison.
- if (MI->getParent() != CmpInstr->getParent())
- return false;
-
- // Check that CPSR isn't set between the comparison instruction and the one we
- // want to change.
- MachineBasicBlock::iterator I = CmpInstr,E = MI, B = MI->getParent()->begin();
+ // Get ready to iterate backward from CmpInstr.
+ MachineBasicBlock::iterator I = CmpInstr, E = MI,
+ B = CmpInstr->getParent()->begin();
// Early exit if CmpInstr is at the beginning of the BB.
if (I == B) return false;
+ // There are two possible candidates which can be changed to set CPSR:
+ // One is MI, the other is a SUB instruction.
+ // For CMPrr(r1,r2), we are looking for SUB(r1,r2) or SUB(r2,r1).
+ // For CMPri(r1, CmpValue), we are looking for SUBri(r1, CmpValue).
+ MachineInstr *Sub = NULL;
+ if (SrcReg2 != 0)
+ // MI is not a candidate for CMPrr.
+ MI = NULL;
+ else if (MI->getParent() != CmpInstr->getParent() || CmpValue != 0) {
+ // Conservatively refuse to convert an instruction which isn't in the same
+ // BB as the comparison.
+ // For CMPri, we need to check Sub, thus we can't return here.
+ if (CmpInstr->getOpcode() == ARM::CMPri ||
+ CmpInstr->getOpcode() == ARM::t2CMPri)
+ MI = NULL;
+ else
+ return false;
+ }
+
+ // Check that CPSR isn't set between the comparison instruction and the one we
+ // want to change. At the same time, search for Sub.
+ const TargetRegisterInfo *TRI = &getRegisterInfo();
--I;
for (; I != E; --I) {
const MachineInstr &Instr = *I;
- for (unsigned IO = 0, EO = Instr.getNumOperands(); IO != EO; ++IO) {
- const MachineOperand &MO = Instr.getOperand(IO);
- if (MO.isRegMask() && MO.clobbersPhysReg(ARM::CPSR))
- return false;
- if (!MO.isReg()) continue;
-
+ if (Instr.modifiesRegister(ARM::CPSR, TRI) ||
+ Instr.readsRegister(ARM::CPSR, TRI))
// This instruction modifies or uses CPSR after the one we want to
// change. We can't do this transformation.
- if (MO.getReg() == ARM::CPSR)
- return false;
+ return false;
+
+ // Check whether CmpInstr can be made redundant by the current instruction.
+ if (isRedundantFlagInstr(CmpInstr, SrcReg, SrcReg2, CmpValue, &*I)) {
+ Sub = &*I;
+ break;
}
if (I == B)
@@ -1858,7 +1974,13 @@ OptimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg, int CmpMask,
return false;
}
- // Set the "zero" bit in CPSR.
+ // Return false if no candidates exist.
+ if (!MI && !Sub)
+ return false;
+
+ // The single candidate is called MI.
+ if (!MI) MI = Sub;
+
switch (MI->getOpcode()) {
default: break;
case ARM::RSBrr:
@@ -1894,13 +2016,17 @@ OptimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg, int CmpMask,
case ARM::EORri:
case ARM::t2EORrr:
case ARM::t2EORri: {
- // Scan forward for the use of CPSR, if it's a conditional code requires
- // checking of V bit, then this is not safe to do. If we can't find the
- // CPSR use (i.e. used in another block), then it's not safe to perform
- // the optimization.
+ // Scan forward for the use of CPSR
+ // When checking against MI: if it's a conditional code requires
+ // checking of V bit, then this is not safe to do.
+ // It is safe to remove CmpInstr if CPSR is redefined or killed.
+ // If we are done with the basic block, we need to check whether CPSR is
+ // live-out.
+ SmallVector<std::pair<MachineOperand*, ARMCC::CondCodes>, 4>
+ OperandsToUpdate;
bool isSafe = false;
I = CmpInstr;
- E = MI->getParent()->end();
+ E = CmpInstr->getParent()->end();
while (!isSafe && ++I != E) {
const MachineInstr &Instr = *I;
for (unsigned IO = 0, EO = Instr.getNumOperands();
@@ -1918,28 +2044,56 @@ OptimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg, int CmpMask,
}
// Condition code is after the operand before CPSR.
ARMCC::CondCodes CC = (ARMCC::CondCodes)Instr.getOperand(IO-1).getImm();
- switch (CC) {
- default:
- isSafe = true;
- break;
- case ARMCC::VS:
- case ARMCC::VC:
- case ARMCC::GE:
- case ARMCC::LT:
- case ARMCC::GT:
- case ARMCC::LE:
- return false;
+ if (Sub) {
+ ARMCC::CondCodes NewCC = getSwappedCondition(CC);
+ if (NewCC == ARMCC::AL)
+ return false;
+ // If we have SUB(r1, r2) and CMP(r2, r1), the condition code based
+ // on CMP needs to be updated to be based on SUB.
+ // Push the condition code operands to OperandsToUpdate.
+ // If it is safe to remove CmpInstr, the condition code of these
+ // operands will be modified.
+ if (SrcReg2 != 0 && Sub->getOperand(1).getReg() == SrcReg2 &&
+ Sub->getOperand(2).getReg() == SrcReg)
+ OperandsToUpdate.push_back(std::make_pair(&((*I).getOperand(IO-1)),
+ NewCC));
}
+ else
+ switch (CC) {
+ default:
+ // CPSR can be used multiple times, we should continue.
+ break;
+ case ARMCC::VS:
+ case ARMCC::VC:
+ case ARMCC::GE:
+ case ARMCC::LT:
+ case ARMCC::GT:
+ case ARMCC::LE:
+ return false;
+ }
}
}
- if (!isSafe)
- return false;
+ // If CPSR is not killed nor re-defined, we should check whether it is
+ // live-out. If it is live-out, do not optimize.
+ if (!isSafe) {
+ MachineBasicBlock *MBB = CmpInstr->getParent();
+ for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(),
+ SE = MBB->succ_end(); SI != SE; ++SI)
+ if ((*SI)->isLiveIn(ARM::CPSR))
+ return false;
+ }
// Toggle the optional operand to CPSR.
MI->getOperand(5).setReg(ARM::CPSR);
MI->getOperand(5).setIsDef(true);
CmpInstr->eraseFromParent();
+
+ // Modify the condition code of operands in OperandsToUpdate.
+ // Since we have SUB(r1, r2) and CMP(r2, r1), the condition code needs to
+ // be changed from r2 > r1 to r1 < r2, from r2 < r1 to r1 > r2, etc.
+ for (unsigned i = 0, e = OperandsToUpdate.size(); i < e; i++)
+ OperandsToUpdate[i].first->setImm(OperandsToUpdate[i].second);
return true;
}
}
@@ -2071,9 +2225,9 @@ ARMBaseInstrInfo::getNumMicroOps(const InstrItineraryData *ItinData,
const MCInstrDesc &Desc = MI->getDesc();
unsigned Class = Desc.getSchedClass();
- unsigned UOps = ItinData->Itineraries[Class].NumMicroOps;
- if (UOps)
- return UOps;
+ int ItinUOps = ItinData->getNumMicroOps(Class);
+ if (ItinUOps >= 0)
+ return ItinUOps;
unsigned Opc = MI->getOpcode();
switch (Opc) {
@@ -2088,7 +2242,7 @@ ARMBaseInstrInfo::getNumMicroOps(const InstrItineraryData *ItinData,
//
// On Cortex-A8, each pair of register loads / stores can be scheduled on the
// same cycle. The scheduling for the first load / store must be done
- // separately by assuming the the address is not 64-bit aligned.
+ // separately by assuming the address is not 64-bit aligned.
//
// On Cortex-A9, the formula is simply (#reg / 2) + (#reg % 2). If the address
// is not 64-bit aligned, then AGU would take an extra cycle. For VFP / NEON
@@ -2147,19 +2301,19 @@ ARMBaseInstrInfo::getNumMicroOps(const InstrItineraryData *ItinData,
return 2;
// 4 registers would be issued: 2, 2.
// 5 registers would be issued: 2, 2, 1.
- UOps = (NumRegs / 2);
+ int A8UOps = (NumRegs / 2);
if (NumRegs % 2)
- ++UOps;
- return UOps;
+ ++A8UOps;
+ return A8UOps;
} else if (Subtarget.isCortexA9()) {
- UOps = (NumRegs / 2);
+ int A9UOps = (NumRegs / 2);
// If there are odd number of registers or if it's not 64-bit aligned,
// then it takes an extra AGU (Address Generation Unit) cycle.
if ((NumRegs % 2) ||
!MI->hasOneMemOperand() ||
(*MI->memoperands_begin())->getAlignment() < 8)
- ++UOps;
- return UOps;
+ ++A9UOps;
+ return A9UOps;
} else {
// Assume the worst.
return NumRegs;
@@ -2478,82 +2632,14 @@ static const MachineInstr *getBundledUseMI(const TargetRegisterInfo *TRI,
return II;
}
-int
-ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
- const MachineInstr *DefMI, unsigned DefIdx,
- const MachineInstr *UseMI, unsigned UseIdx) const {
- if (DefMI->isCopyLike() || DefMI->isInsertSubreg() ||
- DefMI->isRegSequence() || DefMI->isImplicitDef())
- return 1;
-
- if (!ItinData || ItinData->isEmpty())
- return DefMI->mayLoad() ? 3 : 1;
-
- const MCInstrDesc *DefMCID = &DefMI->getDesc();
- const MCInstrDesc *UseMCID = &UseMI->getDesc();
- const MachineOperand &DefMO = DefMI->getOperand(DefIdx);
- unsigned Reg = DefMO.getReg();
- if (Reg == ARM::CPSR) {
- if (DefMI->getOpcode() == ARM::FMSTAT) {
- // fpscr -> cpsr stalls over 20 cycles on A8 (and earlier?)
- return Subtarget.isCortexA9() ? 1 : 20;
- }
-
- // CPSR set and branch can be paired in the same cycle.
- if (UseMI->isBranch())
- return 0;
-
- // Otherwise it takes the instruction latency (generally one).
- int Latency = getInstrLatency(ItinData, DefMI);
-
- // For Thumb2 and -Os, prefer scheduling CPSR setting instruction close to
- // its uses. Instructions which are otherwise scheduled between them may
- // incur a code size penalty (not able to use the CPSR setting 16-bit
- // instructions).
- if (Latency > 0 && Subtarget.isThumb2()) {
- const MachineFunction *MF = DefMI->getParent()->getParent();
- if (MF->getFunction()->hasFnAttr(Attribute::OptimizeForSize))
- --Latency;
- }
- return Latency;
- }
-
- unsigned DefAlign = DefMI->hasOneMemOperand()
- ? (*DefMI->memoperands_begin())->getAlignment() : 0;
- unsigned UseAlign = UseMI->hasOneMemOperand()
- ? (*UseMI->memoperands_begin())->getAlignment() : 0;
-
- unsigned DefAdj = 0;
- if (DefMI->isBundle()) {
- DefMI = getBundledDefMI(&getRegisterInfo(), DefMI, Reg, DefIdx, DefAdj);
- if (DefMI->isCopyLike() || DefMI->isInsertSubreg() ||
- DefMI->isRegSequence() || DefMI->isImplicitDef())
- return 1;
- DefMCID = &DefMI->getDesc();
- }
- unsigned UseAdj = 0;
- if (UseMI->isBundle()) {
- unsigned NewUseIdx;
- const MachineInstr *NewUseMI = getBundledUseMI(&getRegisterInfo(), UseMI,
- Reg, NewUseIdx, UseAdj);
- if (NewUseMI) {
- UseMI = NewUseMI;
- UseIdx = NewUseIdx;
- UseMCID = &UseMI->getDesc();
- }
- }
-
- int Latency = getOperandLatency(ItinData, *DefMCID, DefIdx, DefAlign,
- *UseMCID, UseIdx, UseAlign);
- int Adj = DefAdj + UseAdj;
- if (Adj) {
- Latency -= (int)(DefAdj + UseAdj);
- if (Latency < 1)
- return 1;
- }
-
- if (Latency > 1 &&
- (Subtarget.isCortexA8() || Subtarget.isCortexA9())) {
+/// Return the number of cycles to add to (or subtract from) the static
+/// itinerary based on the def opcode and alignment. The caller will ensure that
+/// adjusted latency is at least one cycle.
+static int adjustDefLatency(const ARMSubtarget &Subtarget,
+ const MachineInstr *DefMI,
+ const MCInstrDesc *DefMCID, unsigned DefAlign) {
+ int Adjust = 0;
+ if (Subtarget.isCortexA8() || Subtarget.isCortexA9()) {
// FIXME: Shifter op hack: no shift (i.e. [r +/- r]) or [r + r << 2]
// variants are one cycle cheaper.
switch (DefMCID->getOpcode()) {
@@ -2564,7 +2650,7 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
if (ShImm == 0 ||
(ShImm == 2 && ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl))
- --Latency;
+ --Adjust;
break;
}
case ARM::t2LDRs:
@@ -2574,13 +2660,13 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
// Thumb2 mode: lsl only.
unsigned ShAmt = DefMI->getOperand(3).getImm();
if (ShAmt == 0 || ShAmt == 2)
- --Latency;
+ --Adjust;
break;
}
}
}
- if (DefAlign < 8 && Subtarget.isCortexA9())
+ if (DefAlign < 8 && Subtarget.isCortexA9()) {
switch (DefMCID->getOpcode()) {
default: break;
case ARM::VLD1q8:
@@ -2689,10 +2775,101 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
case ARM::VLD4LNq32_UPD:
// If the address is not 64-bit aligned, the latencies of these
// instructions increases by one.
- ++Latency;
+ ++Adjust;
break;
}
+ }
+ return Adjust;
+}
+
+
+
+int
+ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
+ const MachineInstr *DefMI, unsigned DefIdx,
+ const MachineInstr *UseMI,
+ unsigned UseIdx) const {
+ // No operand latency. The caller may fall back to getInstrLatency.
+ if (!ItinData || ItinData->isEmpty())
+ return -1;
+
+ const MachineOperand &DefMO = DefMI->getOperand(DefIdx);
+ unsigned Reg = DefMO.getReg();
+ const MCInstrDesc *DefMCID = &DefMI->getDesc();
+ const MCInstrDesc *UseMCID = &UseMI->getDesc();
+
+ unsigned DefAdj = 0;
+ if (DefMI->isBundle()) {
+ DefMI = getBundledDefMI(&getRegisterInfo(), DefMI, Reg, DefIdx, DefAdj);
+ DefMCID = &DefMI->getDesc();
+ }
+ if (DefMI->isCopyLike() || DefMI->isInsertSubreg() ||
+ DefMI->isRegSequence() || DefMI->isImplicitDef()) {
+ return 1;
+ }
+
+ unsigned UseAdj = 0;
+ if (UseMI->isBundle()) {
+ unsigned NewUseIdx;
+ const MachineInstr *NewUseMI = getBundledUseMI(&getRegisterInfo(), UseMI,
+ Reg, NewUseIdx, UseAdj);
+ if (!NewUseMI)
+ return -1;
+
+ UseMI = NewUseMI;
+ UseIdx = NewUseIdx;
+ UseMCID = &UseMI->getDesc();
+ }
+
+ if (Reg == ARM::CPSR) {
+ if (DefMI->getOpcode() == ARM::FMSTAT) {
+ // fpscr -> cpsr stalls over 20 cycles on A8 (and earlier?)
+ return Subtarget.isCortexA9() ? 1 : 20;
+ }
+
+ // CPSR set and branch can be paired in the same cycle.
+ if (UseMI->isBranch())
+ return 0;
+
+ // Otherwise it takes the instruction latency (generally one).
+ unsigned Latency = getInstrLatency(ItinData, DefMI);
+
+ // For Thumb2 and -Os, prefer scheduling CPSR setting instruction close to
+ // its uses. Instructions which are otherwise scheduled between them may
+ // incur a code size penalty (not able to use the CPSR setting 16-bit
+ // instructions).
+ if (Latency > 0 && Subtarget.isThumb2()) {
+ const MachineFunction *MF = DefMI->getParent()->getParent();
+ if (MF->getFunction()->hasFnAttr(Attribute::OptimizeForSize))
+ --Latency;
+ }
+ return Latency;
+ }
+
+ if (DefMO.isImplicit() || UseMI->getOperand(UseIdx).isImplicit())
+ return -1;
+
+ unsigned DefAlign = DefMI->hasOneMemOperand()
+ ? (*DefMI->memoperands_begin())->getAlignment() : 0;
+ unsigned UseAlign = UseMI->hasOneMemOperand()
+ ? (*UseMI->memoperands_begin())->getAlignment() : 0;
+ // Get the itinerary's latency if possible, and handle variable_ops.
+ int Latency = getOperandLatency(ItinData, *DefMCID, DefIdx, DefAlign,
+ *UseMCID, UseIdx, UseAlign);
+ // Unable to find operand latency. The caller may resort to getInstrLatency.
+ if (Latency < 0)
+ return Latency;
+
+ // Adjust for IT block position.
+ int Adj = DefAdj + UseAdj;
+
+ // Adjust for dynamic def-side opcode variants not captured by the itinerary.
+ Adj += adjustDefLatency(Subtarget, DefMI, DefMCID, DefAlign);
+ if (Adj >= 0 || (int)Latency > -Adj) {
+ return Latency + Adj;
+ }
+ // Return the itinerary latency, which may be zero but not less than zero.
return Latency;
}
@@ -2892,22 +3069,20 @@ ARMBaseInstrInfo::getOutputLatency(const InstrItineraryData *ItinData,
return 1;
// If the second MI is predicated, then there is an implicit use dependency.
- return getOperandLatency(ItinData, DefMI, DefIdx, DepMI,
- DepMI->getNumOperands());
+ return getInstrLatency(ItinData, DefMI);
}
-int ARMBaseInstrInfo::getInstrLatency(const InstrItineraryData *ItinData,
- const MachineInstr *MI,
- unsigned *PredCost) const {
+unsigned ARMBaseInstrInfo::getInstrLatency(const InstrItineraryData *ItinData,
+ const MachineInstr *MI,
+ unsigned *PredCost) const {
if (MI->isCopyLike() || MI->isInsertSubreg() ||
MI->isRegSequence() || MI->isImplicitDef())
return 1;
- if (!ItinData || ItinData->isEmpty())
- return 1;
-
+ // An instruction scheduler typically runs on unbundled instructions, however
+ // other passes may query the latency of a bundled instruction.
if (MI->isBundle()) {
- int Latency = 0;
+ unsigned Latency = 0;
MachineBasicBlock::const_instr_iterator I = MI;
MachineBasicBlock::const_instr_iterator E = MI->getParent()->instr_end();
while (++I != E && I->isInsideBundle()) {
@@ -2918,15 +3093,33 @@ int ARMBaseInstrInfo::getInstrLatency(const InstrItineraryData *ItinData,
}
const MCInstrDesc &MCID = MI->getDesc();
- unsigned Class = MCID.getSchedClass();
- unsigned UOps = ItinData->Itineraries[Class].NumMicroOps;
- if (PredCost && (MCID.isCall() || MCID.hasImplicitDefOfPhysReg(ARM::CPSR)))
+ if (PredCost && (MCID.isCall() || MCID.hasImplicitDefOfPhysReg(ARM::CPSR))) {
// When predicated, CPSR is an additional source operand for CPSR updating
// instructions, this apparently increases their latencies.
*PredCost = 1;
- if (UOps)
- return ItinData->getStageLatency(Class);
- return getNumMicroOps(ItinData, MI);
+ }
+ // Be sure to call getStageLatency for an empty itinerary in case it has a
+ // valid MinLatency property.
+ if (!ItinData)
+ return MI->mayLoad() ? 3 : 1;
+
+ unsigned Class = MCID.getSchedClass();
+
+ // For instructions with variable uops, use uops as latency.
+ if (!ItinData->isEmpty() && ItinData->getNumMicroOps(Class) < 0)
+ return getNumMicroOps(ItinData, MI);
+
+ // For the common case, fall back on the itinerary's latency.
+ unsigned Latency = ItinData->getStageLatency(Class);
+
+ // Adjust for dynamic def-side opcode variants not captured by the itinerary.
+ unsigned DefAlign = MI->hasOneMemOperand()
+ ? (*MI->memoperands_begin())->getAlignment() : 0;
+ int Adj = adjustDefLatency(Subtarget, MI, &MCID, DefAlign);
+ if (Adj >= 0 || (int)Latency > -Adj) {
+ return Latency + Adj;
+ }
+ return Latency;
}
int ARMBaseInstrInfo::getInstrLatency(const InstrItineraryData *ItinData,
@@ -2960,7 +3153,10 @@ hasHighOperandLatency(const InstrItineraryData *ItinData,
return true;
// Hoist VFP / NEON instructions with 4 or higher latency.
- int Latency = getOperandLatency(ItinData, DefMI, DefIdx, UseMI, UseIdx);
+ int Latency = computeOperandLatency(ItinData, DefMI, DefIdx, UseMI, UseIdx,
+ /*FindMin=*/false);
+ if (Latency < 0)
+ Latency = getInstrLatency(ItinData, DefMI);
if (Latency <= 3)
return false;
return DDomain == ARMII::DomainVFP || DDomain == ARMII::DomainNEON ||