aboutsummaryrefslogtreecommitdiff
path: root/contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp')
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp470
1 files changed, 328 insertions, 142 deletions
diff --git a/contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp b/contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp
index 60050542716c..f835a4e5b5fe 100644
--- a/contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp
@@ -11,10 +11,11 @@
//
//===----------------------------------------------------------------------===//
-#include "ARMBaseInstrInfo.h"
#include "ARM.h"
+#include "ARMBaseInstrInfo.h"
#include "ARMBaseRegisterInfo.h"
#include "ARMConstantPoolValue.h"
+#include "ARMFeatures.h"
#include "ARMHazardRecognizer.h"
#include "ARMMachineFunctionInfo.h"
#include "MCTargetDesc/ARMAddressingModes.h"
@@ -36,7 +37,7 @@
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
-#define GET_INSTRINFO_CTOR
+#define GET_INSTRINFO_CTOR_DTOR
#include "ARMGenInstrInfo.inc"
using namespace llvm;
@@ -113,8 +114,7 @@ ScheduleHazardRecognizer *ARMBaseInstrInfo::
CreateTargetPostRAHazardRecognizer(const InstrItineraryData *II,
const ScheduleDAG *DAG) const {
if (Subtarget.isThumb2() || Subtarget.hasVFP2())
- return (ScheduleHazardRecognizer *)
- new ARMHazardRecognizer(II, *this, getRegisterInfo(), Subtarget, DAG);
+ return (ScheduleHazardRecognizer *)new ARMHazardRecognizer(II, DAG);
return TargetInstrInfo::CreateTargetPostRAHazardRecognizer(II, DAG);
}
@@ -273,104 +273,90 @@ ARMBaseInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,MachineBasicBlock *&TBB,
MachineBasicBlock *&FBB,
SmallVectorImpl<MachineOperand> &Cond,
bool AllowModify) const {
- // If the block has no terminators, it just falls into the block after it.
+ TBB = 0;
+ FBB = 0;
+
MachineBasicBlock::iterator I = MBB.end();
if (I == MBB.begin())
- return false;
+ return false; // Empty blocks are easy.
--I;
- while (I->isDebugValue()) {
- if (I == MBB.begin())
- return false;
- --I;
- }
-
- // Get the last instruction in the block.
- MachineInstr *LastInst = I;
- unsigned LastOpc = LastInst->getOpcode();
- // Check if it's an indirect branch first, this should return 'unanalyzable'
- // even if it's predicated.
- if (isIndirectBranchOpcode(LastOpc))
- return true;
+ // Walk backwards from the end of the basic block until the branch is
+ // analyzed or we give up.
+ while (isPredicated(I) || I->isTerminator()) {
- if (!isUnpredicatedTerminator(I))
- return false;
+ // Flag to be raised on unanalyzeable instructions. This is useful in cases
+ // where we want to clean up on the end of the basic block before we bail
+ // out.
+ bool CantAnalyze = false;
- // If there is only one terminator instruction, process it.
- if (I == MBB.begin() || !isUnpredicatedTerminator(--I)) {
- if (isUncondBranchOpcode(LastOpc)) {
- TBB = LastInst->getOperand(0).getMBB();
- return false;
+ // Skip over DEBUG values and predicated nonterminators.
+ while (I->isDebugValue() || !I->isTerminator()) {
+ if (I == MBB.begin())
+ return false;
+ --I;
}
- if (isCondBranchOpcode(LastOpc)) {
- // Block ends with fall-through condbranch.
- TBB = LastInst->getOperand(0).getMBB();
- Cond.push_back(LastInst->getOperand(1));
- Cond.push_back(LastInst->getOperand(2));
- return false;
+
+ if (isIndirectBranchOpcode(I->getOpcode()) ||
+ isJumpTableBranchOpcode(I->getOpcode())) {
+ // Indirect branches and jump tables can't be analyzed, but we still want
+ // to clean up any instructions at the tail of the basic block.
+ CantAnalyze = true;
+ } else if (isUncondBranchOpcode(I->getOpcode())) {
+ TBB = I->getOperand(0).getMBB();
+ } else if (isCondBranchOpcode(I->getOpcode())) {
+ // Bail out if we encounter multiple conditional branches.
+ if (!Cond.empty())
+ return true;
+
+ assert(!FBB && "FBB should have been null.");
+ FBB = TBB;
+ TBB = I->getOperand(0).getMBB();
+ Cond.push_back(I->getOperand(1));
+ Cond.push_back(I->getOperand(2));
+ } else if (I->isReturn()) {
+ // Returns can't be analyzed, but we should run cleanup.
+ CantAnalyze = !isPredicated(I);
+ } else {
+ // We encountered other unrecognized terminator. Bail out immediately.
+ return true;
}
- return true; // Can't handle indirect branch.
- }
- // Get the instruction before it if it is a terminator.
- MachineInstr *SecondLastInst = I;
- unsigned SecondLastOpc = SecondLastInst->getOpcode();
-
- // If AllowModify is true and the block ends with two or more unconditional
- // branches, delete all but the first unconditional branch.
- if (AllowModify && isUncondBranchOpcode(LastOpc)) {
- while (isUncondBranchOpcode(SecondLastOpc)) {
- LastInst->eraseFromParent();
- LastInst = SecondLastInst;
- LastOpc = LastInst->getOpcode();
- if (I == MBB.begin() || !isUnpredicatedTerminator(--I)) {
- // Return now the only terminator is an unconditional branch.
- TBB = LastInst->getOperand(0).getMBB();
- return false;
- } else {
- SecondLastInst = I;
- SecondLastOpc = SecondLastInst->getOpcode();
+ // Cleanup code - to be run for unpredicated unconditional branches and
+ // returns.
+ if (!isPredicated(I) &&
+ (isUncondBranchOpcode(I->getOpcode()) ||
+ isIndirectBranchOpcode(I->getOpcode()) ||
+ isJumpTableBranchOpcode(I->getOpcode()) ||
+ I->isReturn())) {
+ // Forget any previous condition branch information - it no longer applies.
+ Cond.clear();
+ FBB = 0;
+
+ // If we can modify the function, delete everything below this
+ // unconditional branch.
+ if (AllowModify) {
+ MachineBasicBlock::iterator DI = llvm::next(I);
+ while (DI != MBB.end()) {
+ MachineInstr *InstToDelete = DI;
+ ++DI;
+ InstToDelete->eraseFromParent();
+ }
}
}
- }
- // If there are three terminators, we don't know what sort of block this is.
- if (SecondLastInst && I != MBB.begin() && isUnpredicatedTerminator(--I))
- return true;
-
- // If the block ends with a B and a Bcc, handle it.
- if (isCondBranchOpcode(SecondLastOpc) && isUncondBranchOpcode(LastOpc)) {
- TBB = SecondLastInst->getOperand(0).getMBB();
- Cond.push_back(SecondLastInst->getOperand(1));
- Cond.push_back(SecondLastInst->getOperand(2));
- FBB = LastInst->getOperand(0).getMBB();
- return false;
- }
+ if (CantAnalyze)
+ return true;
- // If the block ends with two unconditional branches, handle it. The second
- // one is not executed, so remove it.
- if (isUncondBranchOpcode(SecondLastOpc) && isUncondBranchOpcode(LastOpc)) {
- TBB = SecondLastInst->getOperand(0).getMBB();
- I = LastInst;
- if (AllowModify)
- I->eraseFromParent();
- return false;
- }
+ if (I == MBB.begin())
+ return false;
- // ...likewise if it ends with a branch table followed by an unconditional
- // branch. The branch folder can create these, and we must get rid of them for
- // correctness of Thumb constant islands.
- if ((isJumpTableBranchOpcode(SecondLastOpc) ||
- isIndirectBranchOpcode(SecondLastOpc)) &&
- isUncondBranchOpcode(LastOpc)) {
- I = LastInst;
- if (AllowModify)
- I->eraseFromParent();
- return true;
+ --I;
}
- // Otherwise, can't handle this.
- return true;
+ // We made it past the terminators without bailing out - we must have
+ // analyzed this branch successfully.
+ return false;
}
@@ -535,11 +521,17 @@ bool ARMBaseInstrInfo::isPredicable(MachineInstr *MI) const {
if (!MI->isPredicable())
return false;
- if ((MI->getDesc().TSFlags & ARMII::DomainMask) == ARMII::DomainNEON) {
- ARMFunctionInfo *AFI =
- MI->getParent()->getParent()->getInfo<ARMFunctionInfo>();
- return AFI->isThumb2Function();
+ ARMFunctionInfo *AFI =
+ MI->getParent()->getParent()->getInfo<ARMFunctionInfo>();
+
+ if (AFI->isThumb2Function()) {
+ if (getSubtarget().restrictIT())
+ return isV8EligibleForIT(MI);
+ } else { // non-Thumb
+ if ((MI->getDesc().TSFlags & ARMII::DomainMask) == ARMII::DomainNEON)
+ return false;
}
+
return true;
}
@@ -660,16 +652,16 @@ void ARMBaseInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
unsigned DestReg, unsigned SrcReg,
bool KillSrc) const {
bool GPRDest = ARM::GPRRegClass.contains(DestReg);
- bool GPRSrc = ARM::GPRRegClass.contains(SrcReg);
+ bool GPRSrc = ARM::GPRRegClass.contains(SrcReg);
if (GPRDest && GPRSrc) {
AddDefaultCC(AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::MOVr), DestReg)
- .addReg(SrcReg, getKillRegState(KillSrc))));
+ .addReg(SrcReg, getKillRegState(KillSrc))));
return;
}
bool SPRDest = ARM::SPRRegClass.contains(DestReg);
- bool SPRSrc = ARM::SPRRegClass.contains(SrcReg);
+ bool SPRSrc = ARM::SPRRegClass.contains(SrcReg);
unsigned Opc = 0;
if (SPRDest && SPRSrc)
@@ -698,26 +690,47 @@ void ARMBaseInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
int Spacing = 1;
// Use VORRq when possible.
- if (ARM::QQPRRegClass.contains(DestReg, SrcReg))
- Opc = ARM::VORRq, BeginIdx = ARM::qsub_0, SubRegs = 2;
- else if (ARM::QQQQPRRegClass.contains(DestReg, SrcReg))
- Opc = ARM::VORRq, BeginIdx = ARM::qsub_0, SubRegs = 4;
+ if (ARM::QQPRRegClass.contains(DestReg, SrcReg)) {
+ Opc = ARM::VORRq;
+ BeginIdx = ARM::qsub_0;
+ SubRegs = 2;
+ } else if (ARM::QQQQPRRegClass.contains(DestReg, SrcReg)) {
+ Opc = ARM::VORRq;
+ BeginIdx = ARM::qsub_0;
+ SubRegs = 4;
// Fall back to VMOVD.
- else if (ARM::DPairRegClass.contains(DestReg, SrcReg))
- Opc = ARM::VMOVD, BeginIdx = ARM::dsub_0, SubRegs = 2;
- else if (ARM::DTripleRegClass.contains(DestReg, SrcReg))
- Opc = ARM::VMOVD, BeginIdx = ARM::dsub_0, SubRegs = 3;
- else if (ARM::DQuadRegClass.contains(DestReg, SrcReg))
- Opc = ARM::VMOVD, BeginIdx = ARM::dsub_0, SubRegs = 4;
- else if (ARM::GPRPairRegClass.contains(DestReg, SrcReg))
- Opc = ARM::MOVr, BeginIdx = ARM::gsub_0, SubRegs = 2;
-
- else if (ARM::DPairSpcRegClass.contains(DestReg, SrcReg))
- Opc = ARM::VMOVD, BeginIdx = ARM::dsub_0, SubRegs = 2, Spacing = 2;
- else if (ARM::DTripleSpcRegClass.contains(DestReg, SrcReg))
- Opc = ARM::VMOVD, BeginIdx = ARM::dsub_0, SubRegs = 3, Spacing = 2;
- else if (ARM::DQuadSpcRegClass.contains(DestReg, SrcReg))
- Opc = ARM::VMOVD, BeginIdx = ARM::dsub_0, SubRegs = 4, Spacing = 2;
+ } else if (ARM::DPairRegClass.contains(DestReg, SrcReg)) {
+ Opc = ARM::VMOVD;
+ BeginIdx = ARM::dsub_0;
+ SubRegs = 2;
+ } else if (ARM::DTripleRegClass.contains(DestReg, SrcReg)) {
+ Opc = ARM::VMOVD;
+ BeginIdx = ARM::dsub_0;
+ SubRegs = 3;
+ } else if (ARM::DQuadRegClass.contains(DestReg, SrcReg)) {
+ Opc = ARM::VMOVD;
+ BeginIdx = ARM::dsub_0;
+ SubRegs = 4;
+ } else if (ARM::GPRPairRegClass.contains(DestReg, SrcReg)) {
+ Opc = Subtarget.isThumb2() ? ARM::tMOVr : ARM::MOVr;
+ BeginIdx = ARM::gsub_0;
+ SubRegs = 2;
+ } else if (ARM::DPairSpcRegClass.contains(DestReg, SrcReg)) {
+ Opc = ARM::VMOVD;
+ BeginIdx = ARM::dsub_0;
+ SubRegs = 2;
+ Spacing = 2;
+ } else if (ARM::DTripleSpcRegClass.contains(DestReg, SrcReg)) {
+ Opc = ARM::VMOVD;
+ BeginIdx = ARM::dsub_0;
+ SubRegs = 3;
+ Spacing = 2;
+ } else if (ARM::DQuadSpcRegClass.contains(DestReg, SrcReg)) {
+ Opc = ARM::VMOVD;
+ BeginIdx = ARM::dsub_0;
+ SubRegs = 4;
+ Spacing = 2;
+ }
assert(Opc && "Impossible reg-to-reg copy");
@@ -726,26 +739,28 @@ void ARMBaseInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
// Copy register tuples backward when the first Dest reg overlaps with SrcReg.
if (TRI->regsOverlap(SrcReg, TRI->getSubReg(DestReg, BeginIdx))) {
- BeginIdx = BeginIdx + ((SubRegs-1)*Spacing);
+ BeginIdx = BeginIdx + ((SubRegs - 1) * Spacing);
Spacing = -Spacing;
}
#ifndef NDEBUG
SmallSet<unsigned, 4> DstRegs;
#endif
for (unsigned i = 0; i != SubRegs; ++i) {
- unsigned Dst = TRI->getSubReg(DestReg, BeginIdx + i*Spacing);
- unsigned Src = TRI->getSubReg(SrcReg, BeginIdx + i*Spacing);
+ unsigned Dst = TRI->getSubReg(DestReg, BeginIdx + i * Spacing);
+ unsigned Src = TRI->getSubReg(SrcReg, BeginIdx + i * Spacing);
assert(Dst && Src && "Bad sub-register");
#ifndef NDEBUG
assert(!DstRegs.count(Src) && "destructive vector copy");
DstRegs.insert(Dst);
#endif
- Mov = BuildMI(MBB, I, I->getDebugLoc(), get(Opc), Dst)
- .addReg(Src);
+ Mov = BuildMI(MBB, I, I->getDebugLoc(), get(Opc), Dst).addReg(Src);
// VORR takes two source operands.
if (Opc == ARM::VORRq)
Mov.addReg(Src);
Mov = AddDefaultPred(Mov);
+ // MOVr can set CC.
+ if (Opc == ARM::MOVr)
+ Mov = AddDefaultCC(Mov);
}
// Add implicit super-register defs and kills to the last instruction.
Mov->addRegisterDefined(DestReg, TRI);
@@ -1214,16 +1229,6 @@ bool ARMBaseInstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const{
return true;
}
-MachineInstr*
-ARMBaseInstrInfo::emitFrameIndexDebugValue(MachineFunction &MF,
- int FrameIx, uint64_t Offset,
- const MDNode *MDPtr,
- DebugLoc DL) const {
- MachineInstrBuilder MIB = BuildMI(MF, DL, get(ARM::DBG_VALUE))
- .addFrameIndex(FrameIx).addImm(0).addImm(Offset).addMetadata(MDPtr);
- return &*MIB;
-}
-
/// Create a copy of a const pool value. Update CPI to the new index and return
/// the label UID.
static unsigned duplicateCPV(MachineFunction &MF, unsigned &CPI) {
@@ -1426,9 +1431,11 @@ bool ARMBaseInstrInfo::areLoadsFromSameBasePtr(SDNode *Load1, SDNode *Load2,
case ARM::VLDRD:
case ARM::VLDRS:
case ARM::t2LDRi8:
+ case ARM::t2LDRBi8:
case ARM::t2LDRDi8:
case ARM::t2LDRSHi8:
case ARM::t2LDRi12:
+ case ARM::t2LDRBi12:
case ARM::t2LDRSHi12:
break;
}
@@ -1445,8 +1452,10 @@ bool ARMBaseInstrInfo::areLoadsFromSameBasePtr(SDNode *Load1, SDNode *Load2,
case ARM::VLDRD:
case ARM::VLDRS:
case ARM::t2LDRi8:
+ case ARM::t2LDRBi8:
case ARM::t2LDRSHi8:
case ARM::t2LDRi12:
+ case ARM::t2LDRBi12:
case ARM::t2LDRSHi12:
break;
}
@@ -1493,7 +1502,16 @@ bool ARMBaseInstrInfo::shouldScheduleLoadsNear(SDNode *Load1, SDNode *Load2,
if ((Offset2 - Offset1) / 8 > 64)
return false;
- if (Load1->getMachineOpcode() != Load2->getMachineOpcode())
+ // Check if the machine opcodes are different. If they are different
+ // then we consider them to not be of the same base address,
+ // EXCEPT in the case of Thumb2 byte loads where one is LDRBi8 and the other LDRBi12.
+ // In this case, they are considered to be the same because they are different
+ // encoding forms of the same basic instruction.
+ if ((Load1->getMachineOpcode() != Load2->getMachineOpcode()) &&
+ !((Load1->getMachineOpcode() == ARM::t2LDRBi8 &&
+ Load2->getMachineOpcode() == ARM::t2LDRBi12) ||
+ (Load1->getMachineOpcode() == ARM::t2LDRBi12 &&
+ Load2->getMachineOpcode() == ARM::t2LDRBi8)))
return false; // FIXME: overly conservative?
// Four loads in a row should be sufficient.
@@ -1708,7 +1726,7 @@ MachineInstr *ARMBaseInstrInfo::optimizeSelect(MachineInstr *MI,
bool PreferFalse) const {
assert((MI->getOpcode() == ARM::MOVCCr || MI->getOpcode() == ARM::t2MOVCCr) &&
"Unknown select instruction");
- const MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo();
+ MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo();
MachineInstr *DefMI = canFoldIntoMOVCC(MI->getOperand(2).getReg(), MRI, this);
bool Invert = !DefMI;
if (!DefMI)
@@ -1716,11 +1734,17 @@ MachineInstr *ARMBaseInstrInfo::optimizeSelect(MachineInstr *MI,
if (!DefMI)
return 0;
+ // Find new register class to use.
+ MachineOperand FalseReg = MI->getOperand(Invert ? 2 : 1);
+ unsigned DestReg = MI->getOperand(0).getReg();
+ const TargetRegisterClass *PreviousClass = MRI.getRegClass(FalseReg.getReg());
+ if (!MRI.constrainRegClass(DestReg, PreviousClass))
+ return 0;
+
// Create a new predicated version of DefMI.
// Rfalse is the first use.
MachineInstrBuilder NewMI = BuildMI(*MI->getParent(), MI, MI->getDebugLoc(),
- DefMI->getDesc(),
- MI->getOperand(0).getReg());
+ DefMI->getDesc(), DestReg);
// Copy all the DefMI operands, excluding its (null) predicate.
const MCInstrDesc &DefDesc = DefMI->getDesc();
@@ -1743,7 +1767,6 @@ MachineInstr *ARMBaseInstrInfo::optimizeSelect(MachineInstr *MI,
// register operand tied to the first def.
// The tie makes the register allocator ensure the FalseReg is allocated the
// same register as operand 0.
- MachineOperand FalseReg = MI->getOperand(Invert ? 2 : 1);
FalseReg.setImplicit();
NewMI.addOperand(FalseReg);
NewMI->tieOperands(0, NewMI->getNumOperands() - 1);
@@ -1803,6 +1826,14 @@ void llvm::emitARMRegPlusImmediate(MachineBasicBlock &MBB,
unsigned DestReg, unsigned BaseReg, int NumBytes,
ARMCC::CondCodes Pred, unsigned PredReg,
const ARMBaseInstrInfo &TII, unsigned MIFlags) {
+ if (NumBytes == 0 && DestReg != BaseReg) {
+ BuildMI(MBB, MBBI, dl, TII.get(ARM::MOVr), DestReg)
+ .addReg(BaseReg, RegState::Kill)
+ .addImm((unsigned)Pred).addReg(PredReg).addReg(0)
+ .setMIFlags(MIFlags);
+ return;
+ }
+
bool isSub = NumBytes < 0;
if (isSub) NumBytes = -NumBytes;
@@ -1826,6 +1857,115 @@ void llvm::emitARMRegPlusImmediate(MachineBasicBlock &MBB,
}
}
+bool llvm::tryFoldSPUpdateIntoPushPop(MachineFunction &MF,
+ MachineInstr *MI,
+ unsigned NumBytes) {
+ // This optimisation potentially adds lots of load and store
+ // micro-operations, it's only really a great benefit to code-size.
+ if (!MF.getFunction()->hasFnAttribute(Attribute::MinSize))
+ return false;
+
+ // If only one register is pushed/popped, LLVM can use an LDR/STR
+ // instead. We can't modify those so make sure we're dealing with an
+ // instruction we understand.
+ bool IsPop = isPopOpcode(MI->getOpcode());
+ bool IsPush = isPushOpcode(MI->getOpcode());
+ if (!IsPush && !IsPop)
+ return false;
+
+ bool IsVFPPushPop = MI->getOpcode() == ARM::VSTMDDB_UPD ||
+ MI->getOpcode() == ARM::VLDMDIA_UPD;
+ bool IsT1PushPop = MI->getOpcode() == ARM::tPUSH ||
+ MI->getOpcode() == ARM::tPOP ||
+ MI->getOpcode() == ARM::tPOP_RET;
+
+ assert((IsT1PushPop || (MI->getOperand(0).getReg() == ARM::SP &&
+ MI->getOperand(1).getReg() == ARM::SP)) &&
+ "trying to fold sp update into non-sp-updating push/pop");
+
+ // The VFP push & pop act on D-registers, so we can only fold an adjustment
+ // by a multiple of 8 bytes in correctly. Similarly rN is 4-bytes. Don't try
+ // if this is violated.
+ if (NumBytes % (IsVFPPushPop ? 8 : 4) != 0)
+ return false;
+
+ // ARM and Thumb2 push/pop insts have explicit "sp, sp" operands (+
+ // pred) so the list starts at 4. Thumb1 starts after the predicate.
+ int RegListIdx = IsT1PushPop ? 2 : 4;
+
+ // Calculate the space we'll need in terms of registers.
+ unsigned FirstReg = MI->getOperand(RegListIdx).getReg();
+ unsigned RD0Reg, RegsNeeded;
+ if (IsVFPPushPop) {
+ RD0Reg = ARM::D0;
+ RegsNeeded = NumBytes / 8;
+ } else {
+ RD0Reg = ARM::R0;
+ RegsNeeded = NumBytes / 4;
+ }
+
+ // We're going to have to strip all list operands off before
+ // re-adding them since the order matters, so save the existing ones
+ // for later.
+ SmallVector<MachineOperand, 4> RegList;
+ for (int i = MI->getNumOperands() - 1; i >= RegListIdx; --i)
+ RegList.push_back(MI->getOperand(i));
+
+ MachineBasicBlock *MBB = MI->getParent();
+ const TargetRegisterInfo *TRI = MF.getRegInfo().getTargetRegisterInfo();
+ const MCPhysReg *CSRegs = TRI->getCalleeSavedRegs(&MF);
+
+ // Now try to find enough space in the reglist to allocate NumBytes.
+ for (unsigned CurReg = FirstReg - 1; CurReg >= RD0Reg && RegsNeeded;
+ --CurReg) {
+ if (!IsPop) {
+ // Pushing any register is completely harmless, mark the
+ // register involved as undef since we don't care about it in
+ // the slightest.
+ RegList.push_back(MachineOperand::CreateReg(CurReg, false, false,
+ false, false, true));
+ --RegsNeeded;
+ continue;
+ }
+
+ // However, we can only pop an extra register if it's not live. For
+ // registers live within the function we might clobber a return value
+ // register; the other way a register can be live here is if it's
+ // callee-saved.
+ if (isCalleeSavedRegister(CurReg, CSRegs) ||
+ MBB->computeRegisterLiveness(TRI, CurReg, MI) !=
+ MachineBasicBlock::LQR_Dead) {
+ // VFP pops don't allow holes in the register list, so any skip is fatal
+ // for our transformation. GPR pops do, so we should just keep looking.
+ if (IsVFPPushPop)
+ return false;
+ else
+ continue;
+ }
+
+ // Mark the unimportant registers as <def,dead> in the POP.
+ RegList.push_back(MachineOperand::CreateReg(CurReg, true, false, false,
+ true));
+ --RegsNeeded;
+ }
+
+ if (RegsNeeded > 0)
+ return false;
+
+ // Finally we know we can profitably perform the optimisation so go
+ // ahead: strip all existing registers off and add them back again
+ // in the right order.
+ for (int i = MI->getNumOperands() - 1; i >= RegListIdx; --i)
+ MI->RemoveOperand(i);
+
+ // Add the complete list back in.
+ MachineInstrBuilder MIB(MF, &*MI);
+ for (int i = RegList.size() - 1; i >= 0; --i)
+ MIB.addOperand(RegList[i]);
+
+ return true;
+}
+
bool llvm::rewriteARMFrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
unsigned FrameReg, int &Offset,
const ARMBaseInstrInfo &TII) {
@@ -2232,8 +2372,32 @@ optimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg, unsigned SrcReg2,
isSafe = true;
break;
}
- // Condition code is after the operand before CPSR.
- ARMCC::CondCodes CC = (ARMCC::CondCodes)Instr.getOperand(IO-1).getImm();
+ // Condition code is after the operand before CPSR except for VSELs.
+ ARMCC::CondCodes CC;
+ bool IsInstrVSel = true;
+ switch (Instr.getOpcode()) {
+ default:
+ IsInstrVSel = false;
+ CC = (ARMCC::CondCodes)Instr.getOperand(IO - 1).getImm();
+ break;
+ case ARM::VSELEQD:
+ case ARM::VSELEQS:
+ CC = ARMCC::EQ;
+ break;
+ case ARM::VSELGTD:
+ case ARM::VSELGTS:
+ CC = ARMCC::GT;
+ break;
+ case ARM::VSELGED:
+ case ARM::VSELGES:
+ CC = ARMCC::GE;
+ break;
+ case ARM::VSELVSS:
+ case ARM::VSELVSD:
+ CC = ARMCC::VS;
+ break;
+ }
+
if (Sub) {
ARMCC::CondCodes NewCC = getSwappedCondition(CC);
if (NewCC == ARMCC::AL)
@@ -2244,11 +2408,14 @@ optimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg, unsigned SrcReg2,
// If it is safe to remove CmpInstr, the condition code of these
// operands will be modified.
if (SrcReg2 != 0 && Sub->getOperand(1).getReg() == SrcReg2 &&
- Sub->getOperand(2).getReg() == SrcReg)
- OperandsToUpdate.push_back(std::make_pair(&((*I).getOperand(IO-1)),
- NewCC));
- }
- else
+ Sub->getOperand(2).getReg() == SrcReg) {
+ // VSel doesn't support condition code update.
+ if (IsInstrVSel)
+ return false;
+ OperandsToUpdate.push_back(
+ std::make_pair(&((*I).getOperand(IO - 1)), NewCC));
+ }
+ } else
switch (CC) {
default:
// CPSR can be used multiple times, we should continue.
@@ -3604,6 +3771,24 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
return Latency;
}
+unsigned ARMBaseInstrInfo::getPredicationCost(const MachineInstr *MI) const {
+ if (MI->isCopyLike() || MI->isInsertSubreg() ||
+ MI->isRegSequence() || MI->isImplicitDef())
+ return 0;
+
+ if (MI->isBundle())
+ return 0;
+
+ const MCInstrDesc &MCID = MI->getDesc();
+
+ if (MCID.isCall() || MCID.hasImplicitDefOfPhysReg(ARM::CPSR)) {
+ // When predicated, CPSR is an additional source operand for CPSR updating
+ // instructions, this apparently increases their latencies.
+ return 1;
+ }
+ return 0;
+}
+
unsigned ARMBaseInstrInfo::getInstrLatency(const InstrItineraryData *ItinData,
const MachineInstr *MI,
unsigned *PredCost) const {
@@ -3685,8 +3870,7 @@ hasHighOperandLatency(const InstrItineraryData *ItinData,
return true;
// Hoist VFP / NEON instructions with 4 or higher latency.
- int Latency = computeOperandLatency(ItinData, DefMI, DefIdx, UseMI, UseIdx,
- /*FindMin=*/false);
+ int Latency = computeOperandLatency(ItinData, DefMI, DefIdx, UseMI, UseIdx);
if (Latency < 0)
Latency = getInstrLatency(ItinData, DefMI);
if (Latency <= 3)
@@ -4137,7 +4321,7 @@ breakPartialRegDependency(MachineBasicBlock::iterator MI,
// FIXME: In some cases, VLDRS can be changed to a VLD1DUPd32 which defines
// the full D-register by loading the same value to both lanes. The
// instruction is micro-coded with 2 uops, so don't do this until we can
- // properly schedule micro-coded instuctions. The dispatcher stalls cause
+ // properly schedule micro-coded instructions. The dispatcher stalls cause
// too big regressions.
// Insert the dependency-breaking FCONSTD before MI.
@@ -4152,6 +4336,8 @@ bool ARMBaseInstrInfo::hasNOP() const {
}
bool ARMBaseInstrInfo::isSwiftFastImmShift(const MachineInstr *MI) const {
+ if (MI->getNumOperands() < 4)
+ return true;
unsigned ShOpVal = MI->getOperand(3).getImm();
unsigned ShImm = ARM_AM::getSORegOffset(ShOpVal);
// Swift supports faster shifts for: lsl 2, lsl 1, and lsr 1.