aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp')
-rw-r--r--llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp122
1 files changed, 83 insertions, 39 deletions
diff --git a/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp b/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp
index 3156bb446963..d975b8bd04fe 100644
--- a/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp
+++ b/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp
@@ -29,6 +29,7 @@
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/IR/DebugLoc.h"
+#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/Pass.h"
#include "llvm/Support/CommandLine.h"
@@ -66,6 +67,10 @@ static cl::opt<unsigned> LdStLimit("aarch64-load-store-scan-limit",
static cl::opt<unsigned> UpdateLimit("aarch64-update-scan-limit", cl::init(100),
cl::Hidden);
+// Enable register renaming to find additional store pairing opportunities.
+static cl::opt<bool> EnableRenaming("aarch64-load-store-renaming",
+ cl::init(true), cl::Hidden);
+
#define AARCH64_LOAD_STORE_OPT_NAME "AArch64 load / store optimization pass"
namespace {
@@ -673,14 +678,14 @@ AArch64LoadStoreOpt::mergeNarrowZeroStores(MachineBasicBlock::iterator I,
assert(isPromotableZeroStoreInst(*I) && isPromotableZeroStoreInst(*MergeMI) &&
"Expected promotable zero stores.");
- MachineBasicBlock::iterator NextI = I;
- ++NextI;
+ MachineBasicBlock::iterator E = I->getParent()->end();
+ MachineBasicBlock::iterator NextI = next_nodbg(I, E);
// If NextI is the second of the two instructions to be merged, we need
// to skip one further. Either way we merge will invalidate the iterator,
// and we don't need to scan the new instruction, as it's a pairwise
// instruction, which we're not considering for further action anyway.
if (NextI == MergeMI)
- ++NextI;
+ NextI = next_nodbg(NextI, E);
unsigned Opc = I->getOpcode();
bool IsScaled = !TII->isUnscaledLdSt(Opc);
@@ -743,18 +748,17 @@ static bool forAllMIsUntilDef(MachineInstr &MI, MCPhysReg DefReg,
const TargetRegisterInfo *TRI, unsigned Limit,
std::function<bool(MachineInstr &, bool)> &Fn) {
auto MBB = MI.getParent();
- for (MachineBasicBlock::reverse_iterator I = MI.getReverseIterator(),
- E = MBB->rend();
- I != E; I++) {
+ for (MachineInstr &I :
+ instructionsWithoutDebug(MI.getReverseIterator(), MBB->instr_rend())) {
if (!Limit)
return false;
--Limit;
- bool isDef = any_of(I->operands(), [DefReg, TRI](MachineOperand &MOP) {
+ bool isDef = any_of(I.operands(), [DefReg, TRI](MachineOperand &MOP) {
return MOP.isReg() && MOP.isDef() && !MOP.isDebug() && MOP.getReg() &&
TRI->regsOverlap(MOP.getReg(), DefReg);
});
- if (!Fn(*I, isDef))
+ if (!Fn(I, isDef))
return false;
if (isDef)
break;
@@ -778,14 +782,14 @@ MachineBasicBlock::iterator
AArch64LoadStoreOpt::mergePairedInsns(MachineBasicBlock::iterator I,
MachineBasicBlock::iterator Paired,
const LdStPairFlags &Flags) {
- MachineBasicBlock::iterator NextI = I;
- ++NextI;
+ MachineBasicBlock::iterator E = I->getParent()->end();
+ MachineBasicBlock::iterator NextI = next_nodbg(I, E);
// If NextI is the second of the two instructions to be merged, we need
// to skip one further. Either way we merge will invalidate the iterator,
// and we don't need to scan the new instruction, as it's a pairwise
// instruction, which we're not considering for further action anyway.
if (NextI == Paired)
- ++NextI;
+ NextI = next_nodbg(NextI, E);
int SExtIdx = Flags.getSExtIdx();
unsigned Opc =
@@ -1004,8 +1008,8 @@ AArch64LoadStoreOpt::mergePairedInsns(MachineBasicBlock::iterator I,
MachineBasicBlock::iterator
AArch64LoadStoreOpt::promoteLoadFromStore(MachineBasicBlock::iterator LoadI,
MachineBasicBlock::iterator StoreI) {
- MachineBasicBlock::iterator NextI = LoadI;
- ++NextI;
+ MachineBasicBlock::iterator NextI =
+ next_nodbg(LoadI, LoadI->getParent()->end());
int LoadSize = TII->getMemScale(*LoadI);
int StoreSize = TII->getMemScale(*StoreI);
@@ -1140,24 +1144,11 @@ static int alignTo(int Num, int PowOf2) {
return (Num + PowOf2 - 1) & ~(PowOf2 - 1);
}
-static bool mayAlias(MachineInstr &MIa, MachineInstr &MIb,
- AliasAnalysis *AA) {
- // One of the instructions must modify memory.
- if (!MIa.mayStore() && !MIb.mayStore())
- return false;
-
- // Both instructions must be memory operations.
- if (!MIa.mayLoadOrStore() && !MIb.mayLoadOrStore())
- return false;
-
- return MIa.mayAlias(AA, MIb, /*UseTBAA*/false);
-}
-
static bool mayAlias(MachineInstr &MIa,
SmallVectorImpl<MachineInstr *> &MemInsns,
AliasAnalysis *AA) {
for (MachineInstr *MIb : MemInsns)
- if (mayAlias(MIa, *MIb, AA))
+ if (MIa.mayAlias(AA, *MIb, /*UseTBAA*/ false))
return true;
return false;
@@ -1183,7 +1174,7 @@ bool AArch64LoadStoreOpt::findMatchingStore(
unsigned Count = 0;
do {
- --MBBI;
+ MBBI = prev_nodbg(MBBI, B);
MachineInstr &MI = *MBBI;
// Don't count transient instructions towards the search limit since there
@@ -1215,7 +1206,7 @@ bool AArch64LoadStoreOpt::findMatchingStore(
return false;
// If we encounter a store aliased with the load, return early.
- if (MI.mayStore() && mayAlias(LoadMI, MI, AA))
+ if (MI.mayStore() && LoadMI.mayAlias(AA, MI, /*UseTBAA*/ false))
return false;
} while (MBBI != B && Count < Limit);
return false;
@@ -1296,7 +1287,23 @@ canRenameUpToDef(MachineInstr &FirstMI, LiveRegUnits &UsedInBetween,
LLVM_DEBUG(dbgs() << " Operand not killed at " << FirstMI << "\n");
return false;
}
- auto canRenameMOP = [](const MachineOperand &MOP) {
+ auto canRenameMOP = [TRI](const MachineOperand &MOP) {
+ if (MOP.isReg()) {
+ auto *RegClass = TRI->getMinimalPhysRegClass(MOP.getReg());
+ // Renaming registers with multiple disjunct sub-registers (e.g. the
+ // result of a LD3) means that all sub-registers are renamed, potentially
+ // impacting other instructions we did not check. Bail out.
+ // Note that this relies on the structure of the AArch64 register file. In
+ // particular, a subregister cannot be written without overwriting the
+ // whole register.
+ if (RegClass->HasDisjunctSubRegs) {
+ LLVM_DEBUG(
+ dbgs()
+ << " Cannot rename operands with multiple disjunct subregisters ("
+ << MOP << ")\n");
+ return false;
+ }
+ }
return MOP.isImplicit() ||
(MOP.isRenamable() && !MOP.isEarlyClobber() && !MOP.isTied());
};
@@ -1325,6 +1332,19 @@ canRenameUpToDef(MachineInstr &FirstMI, LiveRegUnits &UsedInBetween,
// For defs, check if we can rename the first def of RegToRename.
if (FoundDef) {
+ // For some pseudo instructions, we might not generate code in the end
+ // (e.g. KILL) and we would end up without a correct def for the rename
+ // register.
+ // TODO: This might be overly conservative and we could handle those cases
+ // in multiple ways:
+ // 1. Insert an extra copy, to materialize the def.
+ // 2. Skip pseudo-defs until we find an non-pseudo def.
+ if (MI.isPseudo()) {
+ LLVM_DEBUG(dbgs() << " Cannot rename pseudo instruction " << MI
+ << "\n");
+ return false;
+ }
+
for (auto &MOP : MI.operands()) {
if (!MOP.isReg() || !MOP.isDef() || MOP.isDebug() || !MOP.getReg() ||
!TRI->regsOverlap(MOP.getReg(), RegToRename))
@@ -1422,7 +1442,7 @@ AArch64LoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I,
MachineBasicBlock::iterator MBBI = I;
MachineBasicBlock::iterator MBBIWithRenameReg;
MachineInstr &FirstMI = *I;
- ++MBBI;
+ MBBI = next_nodbg(MBBI, E);
bool MayLoad = FirstMI.mayLoad();
bool IsUnscaled = TII->isUnscaledLdSt(FirstMI);
@@ -1433,6 +1453,9 @@ AArch64LoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I,
bool IsPromotableZeroStore = isPromotableZeroStoreInst(FirstMI);
Optional<bool> MaybeCanRename = None;
+ if (!EnableRenaming)
+ MaybeCanRename = {false};
+
SmallPtrSet<const TargetRegisterClass *, 5> RequiredClasses;
LiveRegUnits UsedInBetween;
UsedInBetween.init(*TRI);
@@ -1447,7 +1470,8 @@ AArch64LoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I,
// Remember any instructions that read/write memory between FirstMI and MI.
SmallVector<MachineInstr *, 4> MemInsns;
- for (unsigned Count = 0; MBBI != E && Count < Limit; ++MBBI) {
+ for (unsigned Count = 0; MBBI != E && Count < Limit;
+ MBBI = next_nodbg(MBBI, E)) {
MachineInstr &MI = *MBBI;
UsedInBetween.accumulate(MI);
@@ -1616,12 +1640,13 @@ AArch64LoadStoreOpt::mergeUpdateInsn(MachineBasicBlock::iterator I,
assert((Update->getOpcode() == AArch64::ADDXri ||
Update->getOpcode() == AArch64::SUBXri) &&
"Unexpected base register update instruction to merge!");
- MachineBasicBlock::iterator NextI = I;
+ MachineBasicBlock::iterator E = I->getParent()->end();
+ MachineBasicBlock::iterator NextI = next_nodbg(I, E);
// Return the instruction following the merged instruction, which is
// the instruction following our unmerged load. Unless that's the add/sub
// instruction we're merging, in which case it's the one after that.
- if (++NextI == Update)
- ++NextI;
+ if (NextI == Update)
+ NextI = next_nodbg(NextI, E);
int Value = Update->getOperand(2).getImm();
assert(AArch64_AM::getShiftValue(Update->getOperand(3).getImm()) == 0 &&
@@ -1759,8 +1784,24 @@ MachineBasicBlock::iterator AArch64LoadStoreOpt::findMatchingUpdateInsnForward(
// insn (inclusive) and the second insn.
ModifiedRegUnits.clear();
UsedRegUnits.clear();
- ++MBBI;
- for (unsigned Count = 0; MBBI != E && Count < Limit; ++MBBI) {
+ MBBI = next_nodbg(MBBI, E);
+
+ // We can't post-increment the stack pointer if any instruction between
+ // the memory access (I) and the increment (MBBI) can access the memory
+ // region defined by [SP, MBBI].
+ const bool BaseRegSP = BaseReg == AArch64::SP;
+ if (BaseRegSP) {
+ // FIXME: For now, we always block the optimization over SP in windows
+ // targets as it requires to adjust the unwind/debug info, messing up
+ // the unwind info can actually cause a miscompile.
+ const MCAsmInfo *MAI = I->getMF()->getTarget().getMCAsmInfo();
+ if (MAI->usesWindowsCFI() &&
+ I->getMF()->getFunction().needsUnwindTableEntry())
+ return E;
+ }
+
+ for (unsigned Count = 0; MBBI != E && Count < Limit;
+ MBBI = next_nodbg(MBBI, E)) {
MachineInstr &MI = *MBBI;
// Don't count transient instructions towards the search limit since there
@@ -1777,8 +1818,11 @@ MachineBasicBlock::iterator AArch64LoadStoreOpt::findMatchingUpdateInsnForward(
// Otherwise, if the base register is used or modified, we have no match, so
// return early.
+ // If we are optimizing SP, do not allow instructions that may load or store
+ // in between the load and the optimized value update.
if (!ModifiedRegUnits.available(BaseReg) ||
- !UsedRegUnits.available(BaseReg))
+ !UsedRegUnits.available(BaseReg) ||
+ (BaseRegSP && MBBI->mayLoadOrStore()))
return E;
}
return E;
@@ -1815,7 +1859,7 @@ MachineBasicBlock::iterator AArch64LoadStoreOpt::findMatchingUpdateInsnBackward(
UsedRegUnits.clear();
unsigned Count = 0;
do {
- --MBBI;
+ MBBI = prev_nodbg(MBBI, B);
MachineInstr &MI = *MBBI;
// Don't count transient instructions towards the search limit since there