diff options
Diffstat (limited to 'llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp')
-rw-r--r-- | llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp | 662 |
1 files changed, 621 insertions, 41 deletions
diff --git a/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp b/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp index 48f781510254..4cc2b6bf7e7e 100644 --- a/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp +++ b/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp @@ -32,6 +32,7 @@ #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineMemOperand.h" +#include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/ScoreboardHazardRecognizer.h" @@ -495,6 +496,31 @@ bool ARMBaseInstrInfo::isPredicated(const MachineInstr &MI) const { return PIdx != -1 && MI.getOperand(PIdx).getImm() != ARMCC::AL; } +std::string ARMBaseInstrInfo::createMIROperandComment( + const MachineInstr &MI, const MachineOperand &Op, unsigned OpIdx, + const TargetRegisterInfo *TRI) const { + + // First, let's see if there is a generic comment for this operand + std::string GenericComment = + TargetInstrInfo::createMIROperandComment(MI, Op, OpIdx, TRI); + if (!GenericComment.empty()) + return GenericComment; + + // If not, check if we have an immediate operand. + if (Op.getType() != MachineOperand::MO_Immediate) + return std::string(); + + // And print its corresponding condition code if the immediate is a + // predicate. + int FirstPredOp = MI.findFirstPredOperandIdx(); + if (FirstPredOp != (int) OpIdx) + return std::string(); + + std::string CC = "CC::"; + CC += ARMCondCodeToString((ARMCC::CondCodes)Op.getImm()); + return CC; +} + bool ARMBaseInstrInfo::PredicateInstruction( MachineInstr &MI, ArrayRef<MachineOperand> Pred) const { unsigned Opc = MI.getOpcode(); @@ -811,7 +837,7 @@ void llvm::addUnpredicatedMveVpredNOp(MachineInstrBuilder &MIB) { } void llvm::addUnpredicatedMveVpredROp(MachineInstrBuilder &MIB, - unsigned DestReg) { + Register DestReg) { addUnpredicatedMveVpredNOp(MIB); MIB.addReg(DestReg, RegState::Undef); } @@ -1009,6 +1035,36 @@ ARMBaseInstrInfo::isCopyInstrImpl(const MachineInstr &MI) const { return DestSourcePair{MI.getOperand(0), MI.getOperand(1)}; } +Optional<ParamLoadedValue> +ARMBaseInstrInfo::describeLoadedValue(const MachineInstr &MI, + Register Reg) const { + if (auto DstSrcPair = isCopyInstrImpl(MI)) { + Register DstReg = DstSrcPair->Destination->getReg(); + + // TODO: We don't handle cases where the forwarding reg is narrower/wider + // than the copy registers. Consider for example: + // + // s16 = VMOVS s0 + // s17 = VMOVS s1 + // call @callee(d0) + // + // We'd like to describe the call site value of d0 as d8, but this requires + // gathering and merging the descriptions for the two VMOVS instructions. + // + // We also don't handle the reverse situation, where the forwarding reg is + // narrower than the copy destination: + // + // d8 = VMOVD d0 + // call @callee(s1) + // + // We need to produce a fragment description (the call site value of s1 is + // /not/ just d8). + if (DstReg != Reg) + return None; + } + return TargetInstrInfo::describeLoadedValue(MI, Reg); +} + const MachineInstrBuilder & ARMBaseInstrInfo::AddDReg(MachineInstrBuilder &MIB, unsigned Reg, unsigned SubIdx, unsigned State, @@ -1023,16 +1079,16 @@ ARMBaseInstrInfo::AddDReg(MachineInstrBuilder &MIB, unsigned Reg, void ARMBaseInstrInfo:: storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, - unsigned SrcReg, bool isKill, int FI, + Register SrcReg, bool isKill, int FI, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI) const { MachineFunction &MF = *MBB.getParent(); MachineFrameInfo &MFI = MF.getFrameInfo(); - unsigned Align = MFI.getObjectAlignment(FI); + Align Alignment = MFI.getObjectAlign(FI); MachineMemOperand *MMO = MF.getMachineMemOperand( MachinePointerInfo::getFixedStack(MF, FI), MachineMemOperand::MOStore, - MFI.getObjectSize(FI), Align); + MFI.getObjectSize(FI), Alignment); switch (TRI->getSpillSize(*RC)) { case 2: @@ -1102,7 +1158,7 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, case 16: if (ARM::DPairRegClass.hasSubClassEq(RC) && Subtarget.hasNEON()) { // Use aligned spills if the stack can be realigned. - if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) { + if (Alignment >= 16 && getRegisterInfo().canRealignStack(MF)) { BuildMI(MBB, I, DebugLoc(), get(ARM::VST1q64)) .addFrameIndex(FI) .addImm(16) @@ -1130,7 +1186,7 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, case 24: if (ARM::DTripleRegClass.hasSubClassEq(RC)) { // Use aligned spills if the stack can be realigned. - if (Align >= 16 && getRegisterInfo().canRealignStack(MF) && + if (Alignment >= 16 && getRegisterInfo().canRealignStack(MF) && Subtarget.hasNEON()) { BuildMI(MBB, I, DebugLoc(), get(ARM::VST1d64TPseudo)) .addFrameIndex(FI) @@ -1153,7 +1209,7 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, break; case 32: if (ARM::QQPRRegClass.hasSubClassEq(RC) || ARM::DQuadRegClass.hasSubClassEq(RC)) { - if (Align >= 16 && getRegisterInfo().canRealignStack(MF) && + if (Alignment >= 16 && getRegisterInfo().canRealignStack(MF) && Subtarget.hasNEON()) { // FIXME: It's possible to only store part of the QQ register if the // spilled def has a sub-register index. @@ -1264,17 +1320,17 @@ unsigned ARMBaseInstrInfo::isStoreToStackSlotPostFE(const MachineInstr &MI, void ARMBaseInstrInfo:: loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, - unsigned DestReg, int FI, + Register DestReg, int FI, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI) const { DebugLoc DL; if (I != MBB.end()) DL = I->getDebugLoc(); MachineFunction &MF = *MBB.getParent(); MachineFrameInfo &MFI = MF.getFrameInfo(); - unsigned Align = MFI.getObjectAlignment(FI); + const Align Alignment = MFI.getObjectAlign(FI); MachineMemOperand *MMO = MF.getMachineMemOperand( MachinePointerInfo::getFixedStack(MF, FI), MachineMemOperand::MOLoad, - MFI.getObjectSize(FI), Align); + MFI.getObjectSize(FI), Alignment); switch (TRI->getSpillSize(*RC)) { case 2: @@ -1343,7 +1399,7 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, break; case 16: if (ARM::DPairRegClass.hasSubClassEq(RC) && Subtarget.hasNEON()) { - if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) { + if (Alignment >= 16 && getRegisterInfo().canRealignStack(MF)) { BuildMI(MBB, I, DL, get(ARM::VLD1q64), DestReg) .addFrameIndex(FI) .addImm(16) @@ -1367,7 +1423,7 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, break; case 24: if (ARM::DTripleRegClass.hasSubClassEq(RC)) { - if (Align >= 16 && getRegisterInfo().canRealignStack(MF) && + if (Alignment >= 16 && getRegisterInfo().canRealignStack(MF) && Subtarget.hasNEON()) { BuildMI(MBB, I, DL, get(ARM::VLD1d64TPseudo), DestReg) .addFrameIndex(FI) @@ -1390,7 +1446,7 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, break; case 32: if (ARM::QQPRRegClass.hasSubClassEq(RC) || ARM::DQuadRegClass.hasSubClassEq(RC)) { - if (Align >= 16 && getRegisterInfo().canRealignStack(MF) && + if (Alignment >= 16 && getRegisterInfo().canRealignStack(MF) && Subtarget.hasNEON()) { BuildMI(MBB, I, DL, get(ARM::VLD1d64QPseudo), DestReg) .addFrameIndex(FI) @@ -1682,13 +1738,13 @@ static unsigned duplicateCPV(MachineFunction &MF, unsigned &CPI) { cast<ARMConstantPoolMBB>(ACPV)->getMBB(), PCLabelId, 4); else llvm_unreachable("Unexpected ARM constantpool value type!!"); - CPI = MCP->getConstantPoolIndex(NewCPV, MCPE.getAlignment()); + CPI = MCP->getConstantPoolIndex(NewCPV, MCPE.getAlign()); return PCLabelId; } void ARMBaseInstrInfo::reMaterialize(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, - unsigned DestReg, unsigned SubIdx, + Register DestReg, unsigned SubIdx, const MachineInstr &Orig, const TargetRegisterInfo &TRI) const { unsigned Opcode = Orig.getOpcode(); @@ -1959,6 +2015,10 @@ bool ARMBaseInstrInfo::isSchedulingBoundary(const MachineInstr &MI, if (MI.isTerminator() || MI.isPosition()) return true; + // INLINEASM_BR can jump to another block + if (MI.getOpcode() == TargetOpcode::INLINEASM_BR) + return true; + // Treat the start of the IT block as a scheduling boundary, but schedule // t2IT along with all instructions following it. // FIXME: This is a big hammer. But the alternative is to add all potential @@ -2120,7 +2180,7 @@ ARMBaseInstrInfo::isProfitableToUnpredicate(MachineBasicBlock &TMBB, /// condition, otherwise returns AL. It also returns the condition code /// register by reference. ARMCC::CondCodes llvm::getInstrPredicate(const MachineInstr &MI, - unsigned &PredReg) { + Register &PredReg) { int PIdx = MI.findFirstPredOperandIdx(); if (PIdx == -1) { PredReg = 0; @@ -2150,7 +2210,7 @@ MachineInstr *ARMBaseInstrInfo::commuteInstructionImpl(MachineInstr &MI, case ARM::MOVCCr: case ARM::t2MOVCCr: { // MOVCC can be commuted by inverting the condition. - unsigned PredReg = 0; + Register PredReg; ARMCC::CondCodes CC = getInstrPredicate(MI, PredReg); // MOVCC AL can't be inverted. Shouldn't happen. if (CC == ARMCC::AL || PredReg != ARM::CPSR) @@ -2171,9 +2231,9 @@ MachineInstr *ARMBaseInstrInfo::commuteInstructionImpl(MachineInstr &MI, /// Identify instructions that can be folded into a MOVCC instruction, and /// return the defining instruction. MachineInstr * -ARMBaseInstrInfo::canFoldIntoMOVCC(unsigned Reg, const MachineRegisterInfo &MRI, +ARMBaseInstrInfo::canFoldIntoMOVCC(Register Reg, const MachineRegisterInfo &MRI, const TargetInstrInfo *TII) const { - if (!Register::isVirtualRegister(Reg)) + if (!Reg.isVirtual()) return nullptr; if (!MRI.hasOneNonDBGUse(Reg)) return nullptr; @@ -2353,9 +2413,9 @@ unsigned llvm::convertAddSubFlagsOpcode(unsigned OldOpc) { void llvm::emitARMRegPlusImmediate(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, - const DebugLoc &dl, unsigned DestReg, - unsigned BaseReg, int NumBytes, - ARMCC::CondCodes Pred, unsigned PredReg, + const DebugLoc &dl, Register DestReg, + Register BaseReg, int NumBytes, + ARMCC::CondCodes Pred, Register PredReg, const ARMBaseInstrInfo &TII, unsigned MIFlags) { if (NumBytes == 0 && DestReg != BaseReg) { @@ -2515,7 +2575,7 @@ bool llvm::tryFoldSPUpdateIntoPushPop(const ARMSubtarget &Subtarget, } bool llvm::rewriteARMFrameIndex(MachineInstr &MI, unsigned FrameRegIdx, - unsigned FrameReg, int &Offset, + Register FrameReg, int &Offset, const ARMBaseInstrInfo &TII) { unsigned Opcode = MI.getOpcode(); const MCInstrDesc &Desc = MI.getDesc(); @@ -2671,8 +2731,8 @@ bool llvm::rewriteARMFrameIndex(MachineInstr &MI, unsigned FrameRegIdx, /// in SrcReg and SrcReg2 if having two register operands, and the value it /// compares against in CmpValue. Return true if the comparison instruction /// can be analyzed. -bool ARMBaseInstrInfo::analyzeCompare(const MachineInstr &MI, unsigned &SrcReg, - unsigned &SrcReg2, int &CmpMask, +bool ARMBaseInstrInfo::analyzeCompare(const MachineInstr &MI, Register &SrcReg, + Register &SrcReg2, int &CmpMask, int &CmpValue) const { switch (MI.getOpcode()) { default: break; @@ -2708,7 +2768,7 @@ bool ARMBaseInstrInfo::analyzeCompare(const MachineInstr &MI, unsigned &SrcReg, /// operates on the given source register and applies the same mask /// as a 'tst' instruction. Provide a limited look-through for copies. /// When successful, MI will hold the found instruction. -static bool isSuitableForMask(MachineInstr *&MI, unsigned SrcReg, +static bool isSuitableForMask(MachineInstr *&MI, Register SrcReg, int CmpMask, bool CommonUse) { switch (MI->getOpcode()) { case ARM::ANDri: @@ -2743,7 +2803,7 @@ inline static ARMCC::CondCodes getCmpToAddCondition(ARMCC::CondCodes CC) { /// CMPrr(r0, r1) can be made redundant by ADDr[ri](r0, r1, X). /// This function can be extended later on. inline static bool isRedundantFlagInstr(const MachineInstr *CmpI, - unsigned SrcReg, unsigned SrcReg2, + Register SrcReg, Register SrcReg2, int ImmValue, const MachineInstr *OI, bool &IsThumb1) { if ((CmpI->getOpcode() == ARM::CMPrr || CmpI->getOpcode() == ARM::t2CMPrr) && @@ -2879,7 +2939,7 @@ static bool isOptimizeCompareCandidate(MachineInstr *MI, bool &IsThumb1) { /// operands are swapped: SUBrr(r1,r2) and CMPrr(r2,r1), by updating the /// condition code of instructions which use the flags. bool ARMBaseInstrInfo::optimizeCompareInstr( - MachineInstr &CmpInstr, unsigned SrcReg, unsigned SrcReg2, int CmpMask, + MachineInstr &CmpInstr, Register SrcReg, Register SrcReg2, int CmpMask, int CmpValue, const MachineRegisterInfo *MRI) const { // Get the unique definition of SrcReg. MachineInstr *MI = MRI->getUniqueVRegDef(SrcReg); @@ -3166,7 +3226,7 @@ bool ARMBaseInstrInfo::shouldSink(const MachineInstr &MI) const { return true; MachineBasicBlock::const_iterator Next = &MI; ++Next; - unsigned SrcReg, SrcReg2; + Register SrcReg, SrcReg2; int CmpMask, CmpValue; bool IsThumb1; if (Next != MI.getParent()->end() && @@ -3177,7 +3237,7 @@ bool ARMBaseInstrInfo::shouldSink(const MachineInstr &MI) const { } bool ARMBaseInstrInfo::FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI, - unsigned Reg, + Register Reg, MachineRegisterInfo *MRI) const { // Fold large immediates into add, sub, or, xor. unsigned DefOpc = DefMI.getOpcode(); @@ -3729,7 +3789,7 @@ unsigned ARMBaseInstrInfo::getNumMicroOps(const InstrItineraryData *ItinData, // If there are odd number of registers or if it's not 64-bit aligned, // then it takes an extra AGU (Address Generation Unit) cycle. if ((NumRegs % 2) || !MI.hasOneMemOperand() || - (*MI.memoperands_begin())->getAlignment() < 8) + (*MI.memoperands_begin())->getAlign() < Align(8)) ++UOps; return UOps; } @@ -4316,10 +4376,10 @@ int ARMBaseInstrInfo::getOperandLatencyImpl( return -1; unsigned DefAlign = DefMI.hasOneMemOperand() - ? (*DefMI.memoperands_begin())->getAlignment() + ? (*DefMI.memoperands_begin())->getAlign().value() : 0; unsigned UseAlign = UseMI.hasOneMemOperand() - ? (*UseMI.memoperands_begin())->getAlignment() + ? (*UseMI.memoperands_begin())->getAlign().value() : 0; // Get the itinerary's latency if possible, and handle variable_ops. @@ -4366,10 +4426,12 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData, const MCInstrDesc &UseMCID = get(UseNode->getMachineOpcode()); auto *DefMN = cast<MachineSDNode>(DefNode); unsigned DefAlign = !DefMN->memoperands_empty() - ? (*DefMN->memoperands_begin())->getAlignment() : 0; + ? (*DefMN->memoperands_begin())->getAlign().value() + : 0; auto *UseMN = cast<MachineSDNode>(UseNode); unsigned UseAlign = !UseMN->memoperands_empty() - ? (*UseMN->memoperands_begin())->getAlignment() : 0; + ? (*UseMN->memoperands_begin())->getAlign().value() + : 0; int Latency = getOperandLatency(ItinData, DefMCID, DefIdx, DefAlign, UseMCID, UseIdx, UseAlign); @@ -4660,7 +4722,7 @@ unsigned ARMBaseInstrInfo::getInstrLatency(const InstrItineraryData *ItinData, // Adjust for dynamic def-side opcode variants not captured by the itinerary. unsigned DefAlign = - MI.hasOneMemOperand() ? (*MI.memoperands_begin())->getAlignment() : 0; + MI.hasOneMemOperand() ? (*MI.memoperands_begin())->getAlign().value() : 0; int Adj = adjustDefLatency(Subtarget, MI, MCID, DefAlign); if (Adj >= 0 || (int)Latency > -Adj) { return Latency + Adj; @@ -4782,7 +4844,7 @@ void ARMBaseInstrInfo::expandLoadStackGuardBase(MachineBasicBlock::iterator MI, MachineMemOperand::MODereferenceable | MachineMemOperand::MOInvariant; MachineMemOperand *MMO = MBB.getParent()->getMachineMemOperand( - MachinePointerInfo::getGOT(*MBB.getParent()), Flags, 4, 4); + MachinePointerInfo::getGOT(*MBB.getParent()), Flags, 4, Align(4)); MIB.addMemOperand(MMO).add(predOps(ARMCC::AL)); } @@ -5353,7 +5415,8 @@ Optional<RegImmPair> ARMBaseInstrInfo::isAddImmediate(const MachineInstr &MI, // TODO: Handle cases where Reg is a super- or sub-register of the // destination register. - if (Reg != MI.getOperand(0).getReg()) + const MachineOperand &Op0 = MI.getOperand(0); + if (!Op0.isReg() || Reg != Op0.getReg()) return None; // We describe SUBri or ADDri instructions. @@ -5365,8 +5428,7 @@ Optional<RegImmPair> ARMBaseInstrInfo::isAddImmediate(const MachineInstr &MI, // TODO: Third operand can be global address (usually some string). Since // strings can be relocated we cannot calculate their offsets for // now. - if (!MI.getOperand(0).isReg() || !MI.getOperand(1).isReg() || - !MI.getOperand(2).isImm()) + if (!MI.getOperand(1).isReg() || !MI.getOperand(2).isImm()) return None; Offset = MI.getOperand(2).getImm() * Sign; @@ -5402,7 +5464,7 @@ MachineInstr *llvm::findCMPToFoldIntoCBZ(MachineInstr *Br, if (CmpMI->getOpcode() != ARM::tCMPi8 && CmpMI->getOpcode() != ARM::t2CMPri) return nullptr; Register Reg = CmpMI->getOperand(0).getReg(); - unsigned PredReg = 0; + Register PredReg; ARMCC::CondCodes Pred = getInstrPredicate(*CmpMI, PredReg); if (Pred != ARMCC::AL || CmpMI->getOperand(1).getImm() != 0) return nullptr; @@ -5460,3 +5522,521 @@ bool llvm::HasLowerConstantMaterializationCost(unsigned Val1, unsigned Val2, return ConstantMaterializationCost(Val1, Subtarget, !ForCodesize) < ConstantMaterializationCost(Val2, Subtarget, !ForCodesize); } + +/// Constants defining how certain sequences should be outlined. +/// This encompasses how an outlined function should be called, and what kind of +/// frame should be emitted for that outlined function. +/// +/// \p MachineOutlinerTailCall implies that the function is being created from +/// a sequence of instructions ending in a return. +/// +/// That is, +/// +/// I1 OUTLINED_FUNCTION: +/// I2 --> B OUTLINED_FUNCTION I1 +/// BX LR I2 +/// BX LR +/// +/// +-------------------------+--------+-----+ +/// | | Thumb2 | ARM | +/// +-------------------------+--------+-----+ +/// | Call overhead in Bytes | 4 | 4 | +/// | Frame overhead in Bytes | 0 | 0 | +/// | Stack fixup required | No | No | +/// +-------------------------+--------+-----+ +/// +/// \p MachineOutlinerThunk implies that the function is being created from +/// a sequence of instructions ending in a call. The outlined function is +/// called with a BL instruction, and the outlined function tail-calls the +/// original call destination. +/// +/// That is, +/// +/// I1 OUTLINED_FUNCTION: +/// I2 --> BL OUTLINED_FUNCTION I1 +/// BL f I2 +/// B f +/// +/// +-------------------------+--------+-----+ +/// | | Thumb2 | ARM | +/// +-------------------------+--------+-----+ +/// | Call overhead in Bytes | 4 | 4 | +/// | Frame overhead in Bytes | 0 | 0 | +/// | Stack fixup required | No | No | +/// +-------------------------+--------+-----+ +/// +/// \p MachineOutlinerNoLRSave implies that the function should be called using +/// a BL instruction, but doesn't require LR to be saved and restored. This +/// happens when LR is known to be dead. +/// +/// That is, +/// +/// I1 OUTLINED_FUNCTION: +/// I2 --> BL OUTLINED_FUNCTION I1 +/// I3 I2 +/// I3 +/// BX LR +/// +/// +-------------------------+--------+-----+ +/// | | Thumb2 | ARM | +/// +-------------------------+--------+-----+ +/// | Call overhead in Bytes | 4 | 4 | +/// | Frame overhead in Bytes | 4 | 4 | +/// | Stack fixup required | No | No | +/// +-------------------------+--------+-----+ +/// +/// \p MachineOutlinerRegSave implies that the function should be called with a +/// save and restore of LR to an available register. This allows us to avoid +/// stack fixups. Note that this outlining variant is compatible with the +/// NoLRSave case. +/// +/// That is, +/// +/// I1 Save LR OUTLINED_FUNCTION: +/// I2 --> BL OUTLINED_FUNCTION I1 +/// I3 Restore LR I2 +/// I3 +/// BX LR +/// +/// +-------------------------+--------+-----+ +/// | | Thumb2 | ARM | +/// +-------------------------+--------+-----+ +/// | Call overhead in Bytes | 8 | 12 | +/// | Frame overhead in Bytes | 2 | 4 | +/// | Stack fixup required | No | No | +/// +-------------------------+--------+-----+ + +enum MachineOutlinerClass { + MachineOutlinerTailCall, + MachineOutlinerThunk, + MachineOutlinerNoLRSave, + MachineOutlinerRegSave +}; + +enum MachineOutlinerMBBFlags { + LRUnavailableSomewhere = 0x2, + HasCalls = 0x4, + UnsafeRegsDead = 0x8 +}; + +struct OutlinerCosts { + const int CallTailCall; + const int FrameTailCall; + const int CallThunk; + const int FrameThunk; + const int CallNoLRSave; + const int FrameNoLRSave; + const int CallRegSave; + const int FrameRegSave; + + OutlinerCosts(const ARMSubtarget &target) + : CallTailCall(target.isThumb() ? 4 : 4), + FrameTailCall(target.isThumb() ? 0 : 0), + CallThunk(target.isThumb() ? 4 : 4), + FrameThunk(target.isThumb() ? 0 : 0), + CallNoLRSave(target.isThumb() ? 4 : 4), + FrameNoLRSave(target.isThumb() ? 4 : 4), + CallRegSave(target.isThumb() ? 8 : 12), + FrameRegSave(target.isThumb() ? 2 : 4) {} +}; + +unsigned +ARMBaseInstrInfo::findRegisterToSaveLRTo(const outliner::Candidate &C) const { + assert(C.LRUWasSet && "LRU wasn't set?"); + MachineFunction *MF = C.getMF(); + const ARMBaseRegisterInfo *ARI = static_cast<const ARMBaseRegisterInfo *>( + MF->getSubtarget().getRegisterInfo()); + + BitVector regsReserved = ARI->getReservedRegs(*MF); + // Check if there is an available register across the sequence that we can + // use. + for (unsigned Reg : ARM::rGPRRegClass) { + if (!(Reg < regsReserved.size() && regsReserved.test(Reg)) && + Reg != ARM::LR && // LR is not reserved, but don't use it. + Reg != ARM::R12 && // R12 is not guaranteed to be preserved. + C.LRU.available(Reg) && C.UsedInSequence.available(Reg)) + return Reg; + } + + // No suitable register. Return 0. + return 0u; +} + +outliner::OutlinedFunction ARMBaseInstrInfo::getOutliningCandidateInfo( + std::vector<outliner::Candidate> &RepeatedSequenceLocs) const { + outliner::Candidate &FirstCand = RepeatedSequenceLocs[0]; + unsigned SequenceSize = + std::accumulate(FirstCand.front(), std::next(FirstCand.back()), 0, + [this](unsigned Sum, const MachineInstr &MI) { + return Sum + getInstSizeInBytes(MI); + }); + + // Properties about candidate MBBs that hold for all of them. + unsigned FlagsSetInAll = 0xF; + + // Compute liveness information for each candidate, and set FlagsSetInAll. + const TargetRegisterInfo &TRI = getRegisterInfo(); + std::for_each( + RepeatedSequenceLocs.begin(), RepeatedSequenceLocs.end(), + [&FlagsSetInAll](outliner::Candidate &C) { FlagsSetInAll &= C.Flags; }); + + // According to the ARM Procedure Call Standard, the following are + // undefined on entry/exit from a function call: + // + // * Register R12(IP), + // * Condition codes (and thus the CPSR register) + // + // Since we control the instructions which are part of the outlined regions + // we don't need to be fully compliant with the AAPCS, but we have to + // guarantee that if a veneer is inserted at link time the code is still + // correct. Because of this, we can't outline any sequence of instructions + // where one of these registers is live into/across it. Thus, we need to + // delete those candidates. + auto CantGuaranteeValueAcrossCall = [&TRI](outliner::Candidate &C) { + // If the unsafe registers in this block are all dead, then we don't need + // to compute liveness here. + if (C.Flags & UnsafeRegsDead) + return false; + C.initLRU(TRI); + LiveRegUnits LRU = C.LRU; + return (!LRU.available(ARM::R12) || !LRU.available(ARM::CPSR)); + }; + + // Are there any candidates where those registers are live? + if (!(FlagsSetInAll & UnsafeRegsDead)) { + // Erase every candidate that violates the restrictions above. (It could be + // true that we have viable candidates, so it's not worth bailing out in + // the case that, say, 1 out of 20 candidates violate the restructions.) + RepeatedSequenceLocs.erase(std::remove_if(RepeatedSequenceLocs.begin(), + RepeatedSequenceLocs.end(), + CantGuaranteeValueAcrossCall), + RepeatedSequenceLocs.end()); + + // If the sequence doesn't have enough candidates left, then we're done. + if (RepeatedSequenceLocs.size() < 2) + return outliner::OutlinedFunction(); + } + + // At this point, we have only "safe" candidates to outline. Figure out + // frame + call instruction information. + + unsigned LastInstrOpcode = RepeatedSequenceLocs[0].back()->getOpcode(); + + // Helper lambda which sets call information for every candidate. + auto SetCandidateCallInfo = + [&RepeatedSequenceLocs](unsigned CallID, unsigned NumBytesForCall) { + for (outliner::Candidate &C : RepeatedSequenceLocs) + C.setCallInfo(CallID, NumBytesForCall); + }; + + OutlinerCosts Costs(Subtarget); + unsigned FrameID = 0; + unsigned NumBytesToCreateFrame = 0; + + // If the last instruction in any candidate is a terminator, then we should + // tail call all of the candidates. + if (RepeatedSequenceLocs[0].back()->isTerminator()) { + FrameID = MachineOutlinerTailCall; + NumBytesToCreateFrame = Costs.FrameTailCall; + SetCandidateCallInfo(MachineOutlinerTailCall, Costs.CallTailCall); + } else if (LastInstrOpcode == ARM::BL || LastInstrOpcode == ARM::BLX || + LastInstrOpcode == ARM::tBL || LastInstrOpcode == ARM::tBLXr || + LastInstrOpcode == ARM::tBLXi) { + FrameID = MachineOutlinerThunk; + NumBytesToCreateFrame = Costs.FrameThunk; + SetCandidateCallInfo(MachineOutlinerThunk, Costs.CallThunk); + } else { + // We need to decide how to emit calls + frames. We can always emit the same + // frame if we don't need to save to the stack. + unsigned NumBytesNoStackCalls = 0; + std::vector<outliner::Candidate> CandidatesWithoutStackFixups; + + for (outliner::Candidate &C : RepeatedSequenceLocs) { + C.initLRU(TRI); + + // Is LR available? If so, we don't need a save. + if (C.LRU.available(ARM::LR)) { + FrameID = MachineOutlinerNoLRSave; + NumBytesNoStackCalls += Costs.CallNoLRSave; + C.setCallInfo(MachineOutlinerNoLRSave, Costs.CallNoLRSave); + CandidatesWithoutStackFixups.push_back(C); + } + + // Is an unused register available? If so, we won't modify the stack, so + // we can outline with the same frame type as those that don't save LR. + else if (findRegisterToSaveLRTo(C)) { + FrameID = MachineOutlinerRegSave; + NumBytesNoStackCalls += Costs.CallRegSave; + C.setCallInfo(MachineOutlinerRegSave, Costs.CallRegSave); + CandidatesWithoutStackFixups.push_back(C); + } + } + + if (!CandidatesWithoutStackFixups.empty()) { + RepeatedSequenceLocs = CandidatesWithoutStackFixups; + } else + return outliner::OutlinedFunction(); + } + + return outliner::OutlinedFunction(RepeatedSequenceLocs, SequenceSize, + NumBytesToCreateFrame, FrameID); +} + +bool ARMBaseInstrInfo::isFunctionSafeToOutlineFrom( + MachineFunction &MF, bool OutlineFromLinkOnceODRs) const { + const Function &F = MF.getFunction(); + + // Can F be deduplicated by the linker? If it can, don't outline from it. + if (!OutlineFromLinkOnceODRs && F.hasLinkOnceODRLinkage()) + return false; + + // Don't outline from functions with section markings; the program could + // expect that all the code is in the named section. + // FIXME: Allow outlining from multiple functions with the same section + // marking. + if (F.hasSection()) + return false; + + // FIXME: Thumb1 outlining is not handled + if (MF.getInfo<ARMFunctionInfo>()->isThumb1OnlyFunction()) + return false; + + // It's safe to outline from MF. + return true; +} + +bool ARMBaseInstrInfo::isMBBSafeToOutlineFrom(MachineBasicBlock &MBB, + unsigned &Flags) const { + // Check if LR is available through all of the MBB. If it's not, then set + // a flag. + assert(MBB.getParent()->getRegInfo().tracksLiveness() && + "Suitable Machine Function for outlining must track liveness"); + + LiveRegUnits LRU(getRegisterInfo()); + + std::for_each(MBB.rbegin(), MBB.rend(), + [&LRU](MachineInstr &MI) { LRU.accumulate(MI); }); + + // Check if each of the unsafe registers are available... + bool R12AvailableInBlock = LRU.available(ARM::R12); + bool CPSRAvailableInBlock = LRU.available(ARM::CPSR); + + // If all of these are dead (and not live out), we know we don't have to check + // them later. + if (R12AvailableInBlock && CPSRAvailableInBlock) + Flags |= MachineOutlinerMBBFlags::UnsafeRegsDead; + + // Now, add the live outs to the set. + LRU.addLiveOuts(MBB); + + // If any of these registers is available in the MBB, but also a live out of + // the block, then we know outlining is unsafe. + if (R12AvailableInBlock && !LRU.available(ARM::R12)) + return false; + if (CPSRAvailableInBlock && !LRU.available(ARM::CPSR)) + return false; + + // Check if there's a call inside this MachineBasicBlock. If there is, then + // set a flag. + if (any_of(MBB, [](MachineInstr &MI) { return MI.isCall(); })) + Flags |= MachineOutlinerMBBFlags::HasCalls; + + if (!LRU.available(ARM::LR)) + Flags |= MachineOutlinerMBBFlags::LRUnavailableSomewhere; + + return true; +} + +outliner::InstrType +ARMBaseInstrInfo::getOutliningType(MachineBasicBlock::iterator &MIT, + unsigned Flags) const { + MachineInstr &MI = *MIT; + const TargetRegisterInfo *TRI = &getRegisterInfo(); + + // Be conservative with inline ASM + if (MI.isInlineAsm()) + return outliner::InstrType::Illegal; + + // Don't allow debug values to impact outlining type. + if (MI.isDebugInstr() || MI.isIndirectDebugValue()) + return outliner::InstrType::Invisible; + + // At this point, KILL or IMPLICIT_DEF instructions don't really tell us much + // so we can go ahead and skip over them. + if (MI.isKill() || MI.isImplicitDef()) + return outliner::InstrType::Invisible; + + // PIC instructions contain labels, outlining them would break offset + // computing. unsigned Opc = MI.getOpcode(); + unsigned Opc = MI.getOpcode(); + if (Opc == ARM::tPICADD || Opc == ARM::PICADD || Opc == ARM::PICSTR || + Opc == ARM::PICSTRB || Opc == ARM::PICSTRH || Opc == ARM::PICLDR || + Opc == ARM::PICLDRB || Opc == ARM::PICLDRH || Opc == ARM::PICLDRSB || + Opc == ARM::PICLDRSH || Opc == ARM::t2LDRpci_pic || + Opc == ARM::t2MOVi16_ga_pcrel || Opc == ARM::t2MOVTi16_ga_pcrel || + Opc == ARM::t2MOV_ga_pcrel) + return outliner::InstrType::Illegal; + + // Be conservative with ARMv8.1 MVE instructions. + if (Opc == ARM::t2BF_LabelPseudo || Opc == ARM::t2DoLoopStart || + Opc == ARM::t2WhileLoopStart || Opc == ARM::t2LoopDec || + Opc == ARM::t2LoopEnd) + return outliner::InstrType::Illegal; + + const MCInstrDesc &MCID = MI.getDesc(); + uint64_t MIFlags = MCID.TSFlags; + if ((MIFlags & ARMII::DomainMask) == ARMII::DomainMVE) + return outliner::InstrType::Illegal; + + // Is this a terminator for a basic block? + if (MI.isTerminator()) { + // Don't outline if the branch is not unconditional. + if (isPredicated(MI)) + return outliner::InstrType::Illegal; + + // Is this the end of a function? + if (MI.getParent()->succ_empty()) + return outliner::InstrType::Legal; + + // It's not, so don't outline it. + return outliner::InstrType::Illegal; + } + + // Make sure none of the operands are un-outlinable. + for (const MachineOperand &MOP : MI.operands()) { + if (MOP.isCPI() || MOP.isJTI() || MOP.isCFIIndex() || MOP.isFI() || + MOP.isTargetIndex()) + return outliner::InstrType::Illegal; + } + + // Don't outline if link register or program counter value are used. + if (MI.readsRegister(ARM::LR, TRI) || MI.readsRegister(ARM::PC, TRI)) + return outliner::InstrType::Illegal; + + if (MI.isCall()) { + // If we don't know anything about the callee, assume it depends on the + // stack layout of the caller. In that case, it's only legal to outline + // as a tail-call. Explicitly list the call instructions we know about so + // we don't get unexpected results with call pseudo-instructions. + auto UnknownCallOutlineType = outliner::InstrType::Illegal; + if (Opc == ARM::BL || Opc == ARM::tBL || Opc == ARM::BLX || + Opc == ARM::tBLXr || Opc == ARM::tBLXi) + UnknownCallOutlineType = outliner::InstrType::LegalTerminator; + + return UnknownCallOutlineType; + } + + // Since calls are handled, don't touch LR or PC + if (MI.modifiesRegister(ARM::LR, TRI) || MI.modifiesRegister(ARM::PC, TRI)) + return outliner::InstrType::Illegal; + + // Does this use the stack? + if (MI.modifiesRegister(ARM::SP, TRI) || MI.readsRegister(ARM::SP, TRI)) { + // True if there is no chance that any outlined candidate from this range + // could require stack fixups. That is, both + // * LR is available in the range (No save/restore around call) + // * The range doesn't include calls (No save/restore in outlined frame) + // are true. + // FIXME: This is very restrictive; the flags check the whole block, + // not just the bit we will try to outline. + bool MightNeedStackFixUp = + (Flags & (MachineOutlinerMBBFlags::LRUnavailableSomewhere | + MachineOutlinerMBBFlags::HasCalls)); + + if (!MightNeedStackFixUp) + return outliner::InstrType::Legal; + + return outliner::InstrType::Illegal; + } + + // Be conservative with IT blocks. + if (MI.readsRegister(ARM::ITSTATE, TRI) || + MI.modifiesRegister(ARM::ITSTATE, TRI)) + return outliner::InstrType::Illegal; + + // Don't outline positions. + if (MI.isPosition()) + return outliner::InstrType::Illegal; + + return outliner::InstrType::Legal; +} + +void ARMBaseInstrInfo::buildOutlinedFrame( + MachineBasicBlock &MBB, MachineFunction &MF, + const outliner::OutlinedFunction &OF) const { + // Nothing is needed for tail-calls. + if (OF.FrameConstructionID == MachineOutlinerTailCall) + return; + + // For thunk outlining, rewrite the last instruction from a call to a + // tail-call. + if (OF.FrameConstructionID == MachineOutlinerThunk) { + MachineInstr *Call = &*--MBB.instr_end(); + bool isThumb = Subtarget.isThumb(); + unsigned FuncOp = isThumb ? 2 : 0; + unsigned Opc = Call->getOperand(FuncOp).isReg() + ? isThumb ? ARM::tTAILJMPr : ARM::TAILJMPr + : isThumb ? Subtarget.isTargetMachO() ? ARM::tTAILJMPd + : ARM::tTAILJMPdND + : ARM::TAILJMPd; + MachineInstrBuilder MIB = BuildMI(MBB, MBB.end(), DebugLoc(), get(Opc)) + .add(Call->getOperand(FuncOp)); + if (isThumb && !Call->getOperand(FuncOp).isReg()) + MIB.add(predOps(ARMCC::AL)); + Call->eraseFromParent(); + return; + } + + // Here we have to insert the return ourselves. Get the correct opcode from + // current feature set. + BuildMI(MBB, MBB.end(), DebugLoc(), get(Subtarget.getReturnOpcode())) + .add(predOps(ARMCC::AL)); +} + +MachineBasicBlock::iterator ARMBaseInstrInfo::insertOutlinedCall( + Module &M, MachineBasicBlock &MBB, MachineBasicBlock::iterator &It, + MachineFunction &MF, const outliner::Candidate &C) const { + MachineInstrBuilder MIB; + MachineBasicBlock::iterator CallPt; + unsigned Opc; + bool isThumb = Subtarget.isThumb(); + + // Are we tail calling? + if (C.CallConstructionID == MachineOutlinerTailCall) { + // If yes, then we can just branch to the label. + Opc = isThumb + ? Subtarget.isTargetMachO() ? ARM::tTAILJMPd : ARM::tTAILJMPdND + : ARM::TAILJMPd; + MIB = BuildMI(MF, DebugLoc(), get(Opc)) + .addGlobalAddress(M.getNamedValue(MF.getName())); + if (isThumb) + MIB.add(predOps(ARMCC::AL)); + It = MBB.insert(It, MIB); + return It; + } + + // Create the call instruction. + Opc = isThumb ? ARM::tBL : ARM::BL; + MachineInstrBuilder CallMIB = BuildMI(MF, DebugLoc(), get(Opc)); + if (isThumb) + CallMIB.add(predOps(ARMCC::AL)); + CallMIB.addGlobalAddress(M.getNamedValue(MF.getName())); + + // Can we save to a register? + if (C.CallConstructionID == MachineOutlinerRegSave) { + unsigned Reg = findRegisterToSaveLRTo(C); + assert(Reg != 0 && "No callee-saved register available?"); + + // Save and restore LR from that register. + if (!MBB.isLiveIn(ARM::LR)) + MBB.addLiveIn(ARM::LR); + copyPhysReg(MBB, It, DebugLoc(), Reg, ARM::LR, true); + CallPt = MBB.insert(It, CallMIB); + copyPhysReg(MBB, It, DebugLoc(), ARM::LR, Reg, true); + It--; + return CallPt; + } + // Insert the call. + It = MBB.insert(It, CallMIB); + return It; +} |