diff options
Diffstat (limited to 'llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp')
-rw-r--r-- | llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp | 122 |
1 files changed, 83 insertions, 39 deletions
diff --git a/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp b/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp index 3156bb446963..d975b8bd04fe 100644 --- a/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp +++ b/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp @@ -29,6 +29,7 @@ #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/IR/DebugLoc.h" +#include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCRegisterInfo.h" #include "llvm/Pass.h" #include "llvm/Support/CommandLine.h" @@ -66,6 +67,10 @@ static cl::opt<unsigned> LdStLimit("aarch64-load-store-scan-limit", static cl::opt<unsigned> UpdateLimit("aarch64-update-scan-limit", cl::init(100), cl::Hidden); +// Enable register renaming to find additional store pairing opportunities. +static cl::opt<bool> EnableRenaming("aarch64-load-store-renaming", + cl::init(true), cl::Hidden); + #define AARCH64_LOAD_STORE_OPT_NAME "AArch64 load / store optimization pass" namespace { @@ -673,14 +678,14 @@ AArch64LoadStoreOpt::mergeNarrowZeroStores(MachineBasicBlock::iterator I, assert(isPromotableZeroStoreInst(*I) && isPromotableZeroStoreInst(*MergeMI) && "Expected promotable zero stores."); - MachineBasicBlock::iterator NextI = I; - ++NextI; + MachineBasicBlock::iterator E = I->getParent()->end(); + MachineBasicBlock::iterator NextI = next_nodbg(I, E); // If NextI is the second of the two instructions to be merged, we need // to skip one further. Either way we merge will invalidate the iterator, // and we don't need to scan the new instruction, as it's a pairwise // instruction, which we're not considering for further action anyway. if (NextI == MergeMI) - ++NextI; + NextI = next_nodbg(NextI, E); unsigned Opc = I->getOpcode(); bool IsScaled = !TII->isUnscaledLdSt(Opc); @@ -743,18 +748,17 @@ static bool forAllMIsUntilDef(MachineInstr &MI, MCPhysReg DefReg, const TargetRegisterInfo *TRI, unsigned Limit, std::function<bool(MachineInstr &, bool)> &Fn) { auto MBB = MI.getParent(); - for (MachineBasicBlock::reverse_iterator I = MI.getReverseIterator(), - E = MBB->rend(); - I != E; I++) { + for (MachineInstr &I : + instructionsWithoutDebug(MI.getReverseIterator(), MBB->instr_rend())) { if (!Limit) return false; --Limit; - bool isDef = any_of(I->operands(), [DefReg, TRI](MachineOperand &MOP) { + bool isDef = any_of(I.operands(), [DefReg, TRI](MachineOperand &MOP) { return MOP.isReg() && MOP.isDef() && !MOP.isDebug() && MOP.getReg() && TRI->regsOverlap(MOP.getReg(), DefReg); }); - if (!Fn(*I, isDef)) + if (!Fn(I, isDef)) return false; if (isDef) break; @@ -778,14 +782,14 @@ MachineBasicBlock::iterator AArch64LoadStoreOpt::mergePairedInsns(MachineBasicBlock::iterator I, MachineBasicBlock::iterator Paired, const LdStPairFlags &Flags) { - MachineBasicBlock::iterator NextI = I; - ++NextI; + MachineBasicBlock::iterator E = I->getParent()->end(); + MachineBasicBlock::iterator NextI = next_nodbg(I, E); // If NextI is the second of the two instructions to be merged, we need // to skip one further. Either way we merge will invalidate the iterator, // and we don't need to scan the new instruction, as it's a pairwise // instruction, which we're not considering for further action anyway. if (NextI == Paired) - ++NextI; + NextI = next_nodbg(NextI, E); int SExtIdx = Flags.getSExtIdx(); unsigned Opc = @@ -1004,8 +1008,8 @@ AArch64LoadStoreOpt::mergePairedInsns(MachineBasicBlock::iterator I, MachineBasicBlock::iterator AArch64LoadStoreOpt::promoteLoadFromStore(MachineBasicBlock::iterator LoadI, MachineBasicBlock::iterator StoreI) { - MachineBasicBlock::iterator NextI = LoadI; - ++NextI; + MachineBasicBlock::iterator NextI = + next_nodbg(LoadI, LoadI->getParent()->end()); int LoadSize = TII->getMemScale(*LoadI); int StoreSize = TII->getMemScale(*StoreI); @@ -1140,24 +1144,11 @@ static int alignTo(int Num, int PowOf2) { return (Num + PowOf2 - 1) & ~(PowOf2 - 1); } -static bool mayAlias(MachineInstr &MIa, MachineInstr &MIb, - AliasAnalysis *AA) { - // One of the instructions must modify memory. - if (!MIa.mayStore() && !MIb.mayStore()) - return false; - - // Both instructions must be memory operations. - if (!MIa.mayLoadOrStore() && !MIb.mayLoadOrStore()) - return false; - - return MIa.mayAlias(AA, MIb, /*UseTBAA*/false); -} - static bool mayAlias(MachineInstr &MIa, SmallVectorImpl<MachineInstr *> &MemInsns, AliasAnalysis *AA) { for (MachineInstr *MIb : MemInsns) - if (mayAlias(MIa, *MIb, AA)) + if (MIa.mayAlias(AA, *MIb, /*UseTBAA*/ false)) return true; return false; @@ -1183,7 +1174,7 @@ bool AArch64LoadStoreOpt::findMatchingStore( unsigned Count = 0; do { - --MBBI; + MBBI = prev_nodbg(MBBI, B); MachineInstr &MI = *MBBI; // Don't count transient instructions towards the search limit since there @@ -1215,7 +1206,7 @@ bool AArch64LoadStoreOpt::findMatchingStore( return false; // If we encounter a store aliased with the load, return early. - if (MI.mayStore() && mayAlias(LoadMI, MI, AA)) + if (MI.mayStore() && LoadMI.mayAlias(AA, MI, /*UseTBAA*/ false)) return false; } while (MBBI != B && Count < Limit); return false; @@ -1296,7 +1287,23 @@ canRenameUpToDef(MachineInstr &FirstMI, LiveRegUnits &UsedInBetween, LLVM_DEBUG(dbgs() << " Operand not killed at " << FirstMI << "\n"); return false; } - auto canRenameMOP = [](const MachineOperand &MOP) { + auto canRenameMOP = [TRI](const MachineOperand &MOP) { + if (MOP.isReg()) { + auto *RegClass = TRI->getMinimalPhysRegClass(MOP.getReg()); + // Renaming registers with multiple disjunct sub-registers (e.g. the + // result of a LD3) means that all sub-registers are renamed, potentially + // impacting other instructions we did not check. Bail out. + // Note that this relies on the structure of the AArch64 register file. In + // particular, a subregister cannot be written without overwriting the + // whole register. + if (RegClass->HasDisjunctSubRegs) { + LLVM_DEBUG( + dbgs() + << " Cannot rename operands with multiple disjunct subregisters (" + << MOP << ")\n"); + return false; + } + } return MOP.isImplicit() || (MOP.isRenamable() && !MOP.isEarlyClobber() && !MOP.isTied()); }; @@ -1325,6 +1332,19 @@ canRenameUpToDef(MachineInstr &FirstMI, LiveRegUnits &UsedInBetween, // For defs, check if we can rename the first def of RegToRename. if (FoundDef) { + // For some pseudo instructions, we might not generate code in the end + // (e.g. KILL) and we would end up without a correct def for the rename + // register. + // TODO: This might be overly conservative and we could handle those cases + // in multiple ways: + // 1. Insert an extra copy, to materialize the def. + // 2. Skip pseudo-defs until we find an non-pseudo def. + if (MI.isPseudo()) { + LLVM_DEBUG(dbgs() << " Cannot rename pseudo instruction " << MI + << "\n"); + return false; + } + for (auto &MOP : MI.operands()) { if (!MOP.isReg() || !MOP.isDef() || MOP.isDebug() || !MOP.getReg() || !TRI->regsOverlap(MOP.getReg(), RegToRename)) @@ -1422,7 +1442,7 @@ AArch64LoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I, MachineBasicBlock::iterator MBBI = I; MachineBasicBlock::iterator MBBIWithRenameReg; MachineInstr &FirstMI = *I; - ++MBBI; + MBBI = next_nodbg(MBBI, E); bool MayLoad = FirstMI.mayLoad(); bool IsUnscaled = TII->isUnscaledLdSt(FirstMI); @@ -1433,6 +1453,9 @@ AArch64LoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I, bool IsPromotableZeroStore = isPromotableZeroStoreInst(FirstMI); Optional<bool> MaybeCanRename = None; + if (!EnableRenaming) + MaybeCanRename = {false}; + SmallPtrSet<const TargetRegisterClass *, 5> RequiredClasses; LiveRegUnits UsedInBetween; UsedInBetween.init(*TRI); @@ -1447,7 +1470,8 @@ AArch64LoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I, // Remember any instructions that read/write memory between FirstMI and MI. SmallVector<MachineInstr *, 4> MemInsns; - for (unsigned Count = 0; MBBI != E && Count < Limit; ++MBBI) { + for (unsigned Count = 0; MBBI != E && Count < Limit; + MBBI = next_nodbg(MBBI, E)) { MachineInstr &MI = *MBBI; UsedInBetween.accumulate(MI); @@ -1616,12 +1640,13 @@ AArch64LoadStoreOpt::mergeUpdateInsn(MachineBasicBlock::iterator I, assert((Update->getOpcode() == AArch64::ADDXri || Update->getOpcode() == AArch64::SUBXri) && "Unexpected base register update instruction to merge!"); - MachineBasicBlock::iterator NextI = I; + MachineBasicBlock::iterator E = I->getParent()->end(); + MachineBasicBlock::iterator NextI = next_nodbg(I, E); // Return the instruction following the merged instruction, which is // the instruction following our unmerged load. Unless that's the add/sub // instruction we're merging, in which case it's the one after that. - if (++NextI == Update) - ++NextI; + if (NextI == Update) + NextI = next_nodbg(NextI, E); int Value = Update->getOperand(2).getImm(); assert(AArch64_AM::getShiftValue(Update->getOperand(3).getImm()) == 0 && @@ -1759,8 +1784,24 @@ MachineBasicBlock::iterator AArch64LoadStoreOpt::findMatchingUpdateInsnForward( // insn (inclusive) and the second insn. ModifiedRegUnits.clear(); UsedRegUnits.clear(); - ++MBBI; - for (unsigned Count = 0; MBBI != E && Count < Limit; ++MBBI) { + MBBI = next_nodbg(MBBI, E); + + // We can't post-increment the stack pointer if any instruction between + // the memory access (I) and the increment (MBBI) can access the memory + // region defined by [SP, MBBI]. + const bool BaseRegSP = BaseReg == AArch64::SP; + if (BaseRegSP) { + // FIXME: For now, we always block the optimization over SP in windows + // targets as it requires to adjust the unwind/debug info, messing up + // the unwind info can actually cause a miscompile. + const MCAsmInfo *MAI = I->getMF()->getTarget().getMCAsmInfo(); + if (MAI->usesWindowsCFI() && + I->getMF()->getFunction().needsUnwindTableEntry()) + return E; + } + + for (unsigned Count = 0; MBBI != E && Count < Limit; + MBBI = next_nodbg(MBBI, E)) { MachineInstr &MI = *MBBI; // Don't count transient instructions towards the search limit since there @@ -1777,8 +1818,11 @@ MachineBasicBlock::iterator AArch64LoadStoreOpt::findMatchingUpdateInsnForward( // Otherwise, if the base register is used or modified, we have no match, so // return early. + // If we are optimizing SP, do not allow instructions that may load or store + // in between the load and the optimized value update. if (!ModifiedRegUnits.available(BaseReg) || - !UsedRegUnits.available(BaseReg)) + !UsedRegUnits.available(BaseReg) || + (BaseRegSP && MBBI->mayLoadOrStore())) return E; } return E; @@ -1815,7 +1859,7 @@ MachineBasicBlock::iterator AArch64LoadStoreOpt::findMatchingUpdateInsnBackward( UsedRegUnits.clear(); unsigned Count = 0; do { - --MBBI; + MBBI = prev_nodbg(MBBI, B); MachineInstr &MI = *MBBI; // Don't count transient instructions towards the search limit since there |