aboutsummaryrefslogtreecommitdiff
path: root/contrib/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'contrib/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp')
-rw-r--r--contrib/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp1050
1 files changed, 592 insertions, 458 deletions
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp b/contrib/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp
index 43664df3b861..dca13fc49414 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp
+++ b/contrib/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp
@@ -33,9 +33,6 @@ using namespace llvm;
#define DEBUG_TYPE "aarch64-ldst-opt"
-/// AArch64AllocLoadStoreOpt - Post-register allocation pass to combine
-/// load / store instructions to form ldp / stp instructions.
-
STATISTIC(NumPairCreated, "Number of load/store pair instructions generated");
STATISTIC(NumPostFolded, "Number of post-index updates folded");
STATISTIC(NumPreFolded, "Number of pre-index updates folded");
@@ -45,9 +42,19 @@ STATISTIC(NumNarrowLoadsPromoted, "Number of narrow loads promoted");
STATISTIC(NumZeroStoresPromoted, "Number of narrow zero stores promoted");
STATISTIC(NumLoadsFromStoresPromoted, "Number of loads from stores promoted");
-static cl::opt<unsigned> ScanLimit("aarch64-load-store-scan-limit",
+// The LdStLimit limits how far we search for load/store pairs.
+static cl::opt<unsigned> LdStLimit("aarch64-load-store-scan-limit",
cl::init(20), cl::Hidden);
+// The UpdateLimit limits how far we search for update instructions when we form
+// pre-/post-index instructions.
+static cl::opt<unsigned> UpdateLimit("aarch64-update-scan-limit", cl::init(100),
+ cl::Hidden);
+
+static cl::opt<bool> EnableNarrowLdMerge("enable-narrow-ld-merge", cl::Hidden,
+ cl::init(false),
+ cl::desc("Enable narrow load merge"));
+
namespace llvm {
void initializeAArch64LoadStoreOptPass(PassRegistry &);
}
@@ -88,22 +95,29 @@ struct AArch64LoadStoreOpt : public MachineFunctionPass {
const TargetRegisterInfo *TRI;
const AArch64Subtarget *Subtarget;
+ // Track which registers have been modified and used.
+ BitVector ModifiedRegs, UsedRegs;
+
// Scan the instructions looking for a load/store that can be combined
// with the current instruction into a load/store pair.
// Return the matching instruction if one is found, else MBB->end().
MachineBasicBlock::iterator findMatchingInsn(MachineBasicBlock::iterator I,
LdStPairFlags &Flags,
- unsigned Limit);
+ unsigned Limit,
+ bool FindNarrowMerge);
// Scan the instructions looking for a store that writes to the address from
// which the current load instruction reads. Return true if one is found.
bool findMatchingStore(MachineBasicBlock::iterator I, unsigned Limit,
MachineBasicBlock::iterator &StoreI);
+ // Merge the two instructions indicated into a wider instruction.
+ MachineBasicBlock::iterator
+ mergeNarrowInsns(MachineBasicBlock::iterator I,
+ MachineBasicBlock::iterator MergeMI,
+ const LdStPairFlags &Flags);
+
// Merge the two instructions indicated into a single pair-wise instruction.
- // If MergeForward is true, erase the first instruction and fold its
- // operation into the second. If false, the reverse. Return the instruction
- // following the first instruction (which may change during processing).
MachineBasicBlock::iterator
mergePairedInsns(MachineBasicBlock::iterator I,
MachineBasicBlock::iterator Paired,
@@ -118,8 +132,8 @@ struct AArch64LoadStoreOpt : public MachineFunctionPass {
// be combined with the current instruction (a load or store) using
// pre or post indexed addressing with writeback. Scan forwards.
MachineBasicBlock::iterator
- findMatchingUpdateInsnForward(MachineBasicBlock::iterator I, unsigned Limit,
- int UnscaledOffset);
+ findMatchingUpdateInsnForward(MachineBasicBlock::iterator I,
+ int UnscaledOffset, unsigned Limit);
// Scan the instruction list to find a base register update that can
// be combined with the current instruction (a load or store) using
@@ -129,7 +143,7 @@ struct AArch64LoadStoreOpt : public MachineFunctionPass {
// Find an instruction that updates the base register of the ld/st
// instruction.
- bool isMatchingUpdateInsn(MachineInstr *MemMI, MachineInstr *MI,
+ bool isMatchingUpdateInsn(MachineInstr &MemMI, MachineInstr &MI,
unsigned BaseReg, int Offset);
// Merge a pre- or post-index base register update into a ld/st instruction.
@@ -140,17 +154,21 @@ struct AArch64LoadStoreOpt : public MachineFunctionPass {
// Find and merge foldable ldr/str instructions.
bool tryToMergeLdStInst(MachineBasicBlock::iterator &MBBI);
+ // Find and pair ldr/str instructions.
+ bool tryToPairLdStInst(MachineBasicBlock::iterator &MBBI);
+
// Find and promote load instructions which read directly from store.
bool tryToPromoteLoadFromStore(MachineBasicBlock::iterator &MBBI);
- // Check if converting two narrow loads into a single wider load with
- // bitfield extracts could be enabled.
- bool enableNarrowLdMerge(MachineFunction &Fn);
-
bool optimizeBlock(MachineBasicBlock &MBB, bool enableNarrowLdOpt);
bool runOnMachineFunction(MachineFunction &Fn) override;
+ MachineFunctionProperties getRequiredProperties() const override {
+ return MachineFunctionProperties().set(
+ MachineFunctionProperties::Property::AllVRegsAllocated);
+ }
+
const char *getPassName() const override {
return AARCH64_LOAD_STORE_OPT_NAME;
}
@@ -161,37 +179,8 @@ char AArch64LoadStoreOpt::ID = 0;
INITIALIZE_PASS(AArch64LoadStoreOpt, "aarch64-ldst-opt",
AARCH64_LOAD_STORE_OPT_NAME, false, false)
-static bool isUnscaledLdSt(unsigned Opc) {
- switch (Opc) {
- default:
- return false;
- case AArch64::STURSi:
- case AArch64::STURDi:
- case AArch64::STURQi:
- case AArch64::STURBBi:
- case AArch64::STURHHi:
- case AArch64::STURWi:
- case AArch64::STURXi:
- case AArch64::LDURSi:
- case AArch64::LDURDi:
- case AArch64::LDURQi:
- case AArch64::LDURWi:
- case AArch64::LDURXi:
- case AArch64::LDURSWi:
- case AArch64::LDURHHi:
- case AArch64::LDURBBi:
- case AArch64::LDURSBWi:
- case AArch64::LDURSHWi:
- return true;
- }
-}
-
-static bool isUnscaledLdSt(MachineInstr *MI) {
- return isUnscaledLdSt(MI->getOpcode());
-}
-
-static unsigned getBitExtrOpcode(MachineInstr *MI) {
- switch (MI->getOpcode()) {
+static unsigned getBitExtrOpcode(MachineInstr &MI) {
+ switch (MI.getOpcode()) {
default:
llvm_unreachable("Unexpected opcode.");
case AArch64::LDRBBui:
@@ -219,10 +208,6 @@ static bool isNarrowStore(unsigned Opc) {
}
}
-static bool isNarrowStore(MachineInstr *MI) {
- return isNarrowStore(MI->getOpcode());
-}
-
static bool isNarrowLoad(unsigned Opc) {
switch (Opc) {
default:
@@ -239,13 +224,17 @@ static bool isNarrowLoad(unsigned Opc) {
}
}
-static bool isNarrowLoad(MachineInstr *MI) {
- return isNarrowLoad(MI->getOpcode());
+static bool isNarrowLoad(MachineInstr &MI) {
+ return isNarrowLoad(MI.getOpcode());
+}
+
+static bool isNarrowLoadOrStore(unsigned Opc) {
+ return isNarrowLoad(Opc) || isNarrowStore(Opc);
}
// Scaling factor for unscaled load or store.
-static int getMemScale(MachineInstr *MI) {
- switch (MI->getOpcode()) {
+static int getMemScale(MachineInstr &MI) {
+ switch (MI.getOpcode()) {
default:
llvm_unreachable("Opcode has unknown scale!");
case AArch64::LDRBBui:
@@ -354,6 +343,37 @@ static unsigned getMatchingNonSExtOpcode(unsigned Opc,
}
}
+static unsigned getMatchingWideOpcode(unsigned Opc) {
+ switch (Opc) {
+ default:
+ llvm_unreachable("Opcode has no wide equivalent!");
+ case AArch64::STRBBui:
+ return AArch64::STRHHui;
+ case AArch64::STRHHui:
+ return AArch64::STRWui;
+ case AArch64::STURBBi:
+ return AArch64::STURHHi;
+ case AArch64::STURHHi:
+ return AArch64::STURWi;
+ case AArch64::STURWi:
+ return AArch64::STURXi;
+ case AArch64::STRWui:
+ return AArch64::STRXui;
+ case AArch64::LDRHHui:
+ case AArch64::LDRSHWui:
+ return AArch64::LDRWui;
+ case AArch64::LDURHHi:
+ case AArch64::LDURSHWi:
+ return AArch64::LDURWi;
+ case AArch64::LDRBBui:
+ case AArch64::LDRSBWui:
+ return AArch64::LDRHHui;
+ case AArch64::LDURBBi:
+ case AArch64::LDURSBWi:
+ return AArch64::LDURHHi;
+ }
+}
+
static unsigned getMatchingPairOpcode(unsigned Opc) {
switch (Opc) {
default:
@@ -367,14 +387,6 @@ static unsigned getMatchingPairOpcode(unsigned Opc) {
case AArch64::STRQui:
case AArch64::STURQi:
return AArch64::STPQi;
- case AArch64::STRBBui:
- return AArch64::STRHHui;
- case AArch64::STRHHui:
- return AArch64::STRWui;
- case AArch64::STURBBi:
- return AArch64::STURHHi;
- case AArch64::STURHHi:
- return AArch64::STURWi;
case AArch64::STRWui:
case AArch64::STURWi:
return AArch64::STPWi;
@@ -399,25 +411,13 @@ static unsigned getMatchingPairOpcode(unsigned Opc) {
case AArch64::LDRSWui:
case AArch64::LDURSWi:
return AArch64::LDPSWi;
- case AArch64::LDRHHui:
- case AArch64::LDRSHWui:
- return AArch64::LDRWui;
- case AArch64::LDURHHi:
- case AArch64::LDURSHWi:
- return AArch64::LDURWi;
- case AArch64::LDRBBui:
- case AArch64::LDRSBWui:
- return AArch64::LDRHHui;
- case AArch64::LDURBBi:
- case AArch64::LDURSBWi:
- return AArch64::LDURHHi;
}
}
-static unsigned isMatchingStore(MachineInstr *LoadInst,
- MachineInstr *StoreInst) {
- unsigned LdOpc = LoadInst->getOpcode();
- unsigned StOpc = StoreInst->getOpcode();
+static unsigned isMatchingStore(MachineInstr &LoadInst,
+ MachineInstr &StoreInst) {
+ unsigned LdOpc = LoadInst.getOpcode();
+ unsigned StOpc = StoreInst.getOpcode();
switch (LdOpc) {
default:
llvm_unreachable("Unsupported load instruction!");
@@ -562,8 +562,8 @@ static unsigned getPostIndexedOpcode(unsigned Opc) {
}
}
-static bool isPairedLdSt(const MachineInstr *MI) {
- switch (MI->getOpcode()) {
+static bool isPairedLdSt(const MachineInstr &MI) {
+ switch (MI.getOpcode()) {
default:
return false;
case AArch64::LDPSi:
@@ -581,41 +581,55 @@ static bool isPairedLdSt(const MachineInstr *MI) {
}
}
-static const MachineOperand &getLdStRegOp(const MachineInstr *MI,
+static const MachineOperand &getLdStRegOp(const MachineInstr &MI,
unsigned PairedRegOp = 0) {
assert(PairedRegOp < 2 && "Unexpected register operand idx.");
unsigned Idx = isPairedLdSt(MI) ? PairedRegOp : 0;
- return MI->getOperand(Idx);
+ return MI.getOperand(Idx);
}
-static const MachineOperand &getLdStBaseOp(const MachineInstr *MI) {
+static const MachineOperand &getLdStBaseOp(const MachineInstr &MI) {
unsigned Idx = isPairedLdSt(MI) ? 2 : 1;
- return MI->getOperand(Idx);
+ return MI.getOperand(Idx);
}
-static const MachineOperand &getLdStOffsetOp(const MachineInstr *MI) {
+static const MachineOperand &getLdStOffsetOp(const MachineInstr &MI) {
unsigned Idx = isPairedLdSt(MI) ? 3 : 2;
- return MI->getOperand(Idx);
+ return MI.getOperand(Idx);
}
-static bool isLdOffsetInRangeOfSt(MachineInstr *LoadInst,
- MachineInstr *StoreInst) {
+static bool isLdOffsetInRangeOfSt(MachineInstr &LoadInst,
+ MachineInstr &StoreInst,
+ const AArch64InstrInfo *TII) {
assert(isMatchingStore(LoadInst, StoreInst) && "Expect only matched ld/st.");
int LoadSize = getMemScale(LoadInst);
int StoreSize = getMemScale(StoreInst);
- int UnscaledStOffset = isUnscaledLdSt(StoreInst)
+ int UnscaledStOffset = TII->isUnscaledLdSt(StoreInst)
? getLdStOffsetOp(StoreInst).getImm()
: getLdStOffsetOp(StoreInst).getImm() * StoreSize;
- int UnscaledLdOffset = isUnscaledLdSt(LoadInst)
+ int UnscaledLdOffset = TII->isUnscaledLdSt(LoadInst)
? getLdStOffsetOp(LoadInst).getImm()
: getLdStOffsetOp(LoadInst).getImm() * LoadSize;
return (UnscaledStOffset <= UnscaledLdOffset) &&
(UnscaledLdOffset + LoadSize <= (UnscaledStOffset + StoreSize));
}
+static bool isPromotableZeroStoreOpcode(unsigned Opc) {
+ return isNarrowStore(Opc) || Opc == AArch64::STRWui || Opc == AArch64::STURWi;
+}
+
+static bool isPromotableZeroStoreOpcode(MachineInstr &MI) {
+ return isPromotableZeroStoreOpcode(MI.getOpcode());
+}
+
+static bool isPromotableZeroStoreInst(MachineInstr &MI) {
+ return (isPromotableZeroStoreOpcode(MI)) &&
+ getLdStRegOp(MI).getReg() == AArch64::WZR;
+}
+
MachineBasicBlock::iterator
-AArch64LoadStoreOpt::mergePairedInsns(MachineBasicBlock::iterator I,
- MachineBasicBlock::iterator Paired,
+AArch64LoadStoreOpt::mergeNarrowInsns(MachineBasicBlock::iterator I,
+ MachineBasicBlock::iterator MergeMI,
const LdStPairFlags &Flags) {
MachineBasicBlock::iterator NextI = I;
++NextI;
@@ -623,128 +637,124 @@ AArch64LoadStoreOpt::mergePairedInsns(MachineBasicBlock::iterator I,
// to skip one further. Either way we merge will invalidate the iterator,
// and we don't need to scan the new instruction, as it's a pairwise
// instruction, which we're not considering for further action anyway.
- if (NextI == Paired)
+ if (NextI == MergeMI)
++NextI;
- int SExtIdx = Flags.getSExtIdx();
- unsigned Opc =
- SExtIdx == -1 ? I->getOpcode() : getMatchingNonSExtOpcode(I->getOpcode());
- bool IsUnscaled = isUnscaledLdSt(Opc);
- int OffsetStride = IsUnscaled ? getMemScale(I) : 1;
+ unsigned Opc = I->getOpcode();
+ bool IsScaled = !TII->isUnscaledLdSt(Opc);
+ int OffsetStride = IsScaled ? 1 : getMemScale(*I);
bool MergeForward = Flags.getMergeForward();
- unsigned NewOpc = getMatchingPairOpcode(Opc);
// Insert our new paired instruction after whichever of the paired
// instructions MergeForward indicates.
- MachineBasicBlock::iterator InsertionPoint = MergeForward ? Paired : I;
+ MachineBasicBlock::iterator InsertionPoint = MergeForward ? MergeMI : I;
// Also based on MergeForward is from where we copy the base register operand
// so we get the flags compatible with the input code.
const MachineOperand &BaseRegOp =
- MergeForward ? getLdStBaseOp(Paired) : getLdStBaseOp(I);
+ MergeForward ? getLdStBaseOp(*MergeMI) : getLdStBaseOp(*I);
// Which register is Rt and which is Rt2 depends on the offset order.
MachineInstr *RtMI, *Rt2MI;
- if (getLdStOffsetOp(I).getImm() ==
- getLdStOffsetOp(Paired).getImm() + OffsetStride) {
- RtMI = Paired;
- Rt2MI = I;
- // Here we swapped the assumption made for SExtIdx.
- // I.e., we turn ldp I, Paired into ldp Paired, I.
- // Update the index accordingly.
- if (SExtIdx != -1)
- SExtIdx = (SExtIdx + 1) % 2;
+ if (getLdStOffsetOp(*I).getImm() ==
+ getLdStOffsetOp(*MergeMI).getImm() + OffsetStride) {
+ RtMI = &*MergeMI;
+ Rt2MI = &*I;
} else {
- RtMI = I;
- Rt2MI = Paired;
+ RtMI = &*I;
+ Rt2MI = &*MergeMI;
}
- int OffsetImm = getLdStOffsetOp(RtMI).getImm();
+ int OffsetImm = getLdStOffsetOp(*RtMI).getImm();
+ // Change the scaled offset from small to large type.
+ if (IsScaled) {
+ assert(((OffsetImm & 1) == 0) && "Unexpected offset to merge");
+ OffsetImm /= 2;
+ }
+ DebugLoc DL = I->getDebugLoc();
+ MachineBasicBlock *MBB = I->getParent();
if (isNarrowLoad(Opc)) {
- // Change the scaled offset from small to large type.
- if (!IsUnscaled) {
- assert(((OffsetImm & 1) == 0) && "Unexpected offset to merge");
- OffsetImm /= 2;
- }
- MachineInstr *RtNewDest = MergeForward ? I : Paired;
+ MachineInstr *RtNewDest = &*(MergeForward ? I : MergeMI);
// When merging small (< 32 bit) loads for big-endian targets, the order of
// the component parts gets swapped.
if (!Subtarget->isLittleEndian())
std::swap(RtMI, Rt2MI);
// Construct the new load instruction.
MachineInstr *NewMemMI, *BitExtMI1, *BitExtMI2;
- NewMemMI = BuildMI(*I->getParent(), InsertionPoint, I->getDebugLoc(),
- TII->get(NewOpc))
- .addOperand(getLdStRegOp(RtNewDest))
- .addOperand(BaseRegOp)
- .addImm(OffsetImm)
- .setMemRefs(I->mergeMemRefsWith(*Paired));
+ NewMemMI =
+ BuildMI(*MBB, InsertionPoint, DL, TII->get(getMatchingWideOpcode(Opc)))
+ .addOperand(getLdStRegOp(*RtNewDest))
+ .addOperand(BaseRegOp)
+ .addImm(OffsetImm)
+ .setMemRefs(I->mergeMemRefsWith(*MergeMI));
+ (void)NewMemMI;
DEBUG(
dbgs()
<< "Creating the new load and extract. Replacing instructions:\n ");
DEBUG(I->print(dbgs()));
DEBUG(dbgs() << " ");
- DEBUG(Paired->print(dbgs()));
+ DEBUG(MergeMI->print(dbgs()));
DEBUG(dbgs() << " with instructions:\n ");
DEBUG((NewMemMI)->print(dbgs()));
- int Width = getMemScale(I) == 1 ? 8 : 16;
+ int Width = getMemScale(*I) == 1 ? 8 : 16;
int LSBLow = 0;
int LSBHigh = Width;
int ImmsLow = LSBLow + Width - 1;
int ImmsHigh = LSBHigh + Width - 1;
- MachineInstr *ExtDestMI = MergeForward ? Paired : I;
+ MachineInstr *ExtDestMI = &*(MergeForward ? MergeMI : I);
if ((ExtDestMI == Rt2MI) == Subtarget->isLittleEndian()) {
// Create the bitfield extract for high bits.
- BitExtMI1 = BuildMI(*I->getParent(), InsertionPoint, I->getDebugLoc(),
- TII->get(getBitExtrOpcode(Rt2MI)))
- .addOperand(getLdStRegOp(Rt2MI))
- .addReg(getLdStRegOp(RtNewDest).getReg())
- .addImm(LSBHigh)
- .addImm(ImmsHigh);
+ BitExtMI1 =
+ BuildMI(*MBB, InsertionPoint, DL, TII->get(getBitExtrOpcode(*Rt2MI)))
+ .addOperand(getLdStRegOp(*Rt2MI))
+ .addReg(getLdStRegOp(*RtNewDest).getReg())
+ .addImm(LSBHigh)
+ .addImm(ImmsHigh);
// Create the bitfield extract for low bits.
if (RtMI->getOpcode() == getMatchingNonSExtOpcode(RtMI->getOpcode())) {
// For unsigned, prefer to use AND for low bits.
- BitExtMI2 = BuildMI(*I->getParent(), InsertionPoint, I->getDebugLoc(),
- TII->get(AArch64::ANDWri))
- .addOperand(getLdStRegOp(RtMI))
- .addReg(getLdStRegOp(RtNewDest).getReg())
+ BitExtMI2 = BuildMI(*MBB, InsertionPoint, DL, TII->get(AArch64::ANDWri))
+ .addOperand(getLdStRegOp(*RtMI))
+ .addReg(getLdStRegOp(*RtNewDest).getReg())
.addImm(ImmsLow);
} else {
- BitExtMI2 = BuildMI(*I->getParent(), InsertionPoint, I->getDebugLoc(),
- TII->get(getBitExtrOpcode(RtMI)))
- .addOperand(getLdStRegOp(RtMI))
- .addReg(getLdStRegOp(RtNewDest).getReg())
- .addImm(LSBLow)
- .addImm(ImmsLow);
+ BitExtMI2 =
+ BuildMI(*MBB, InsertionPoint, DL, TII->get(getBitExtrOpcode(*RtMI)))
+ .addOperand(getLdStRegOp(*RtMI))
+ .addReg(getLdStRegOp(*RtNewDest).getReg())
+ .addImm(LSBLow)
+ .addImm(ImmsLow);
}
} else {
// Create the bitfield extract for low bits.
if (RtMI->getOpcode() == getMatchingNonSExtOpcode(RtMI->getOpcode())) {
// For unsigned, prefer to use AND for low bits.
- BitExtMI1 = BuildMI(*I->getParent(), InsertionPoint, I->getDebugLoc(),
- TII->get(AArch64::ANDWri))
- .addOperand(getLdStRegOp(RtMI))
- .addReg(getLdStRegOp(RtNewDest).getReg())
+ BitExtMI1 = BuildMI(*MBB, InsertionPoint, DL, TII->get(AArch64::ANDWri))
+ .addOperand(getLdStRegOp(*RtMI))
+ .addReg(getLdStRegOp(*RtNewDest).getReg())
.addImm(ImmsLow);
} else {
- BitExtMI1 = BuildMI(*I->getParent(), InsertionPoint, I->getDebugLoc(),
- TII->get(getBitExtrOpcode(RtMI)))
- .addOperand(getLdStRegOp(RtMI))
- .addReg(getLdStRegOp(RtNewDest).getReg())
- .addImm(LSBLow)
- .addImm(ImmsLow);
+ BitExtMI1 =
+ BuildMI(*MBB, InsertionPoint, DL, TII->get(getBitExtrOpcode(*RtMI)))
+ .addOperand(getLdStRegOp(*RtMI))
+ .addReg(getLdStRegOp(*RtNewDest).getReg())
+ .addImm(LSBLow)
+ .addImm(ImmsLow);
}
// Create the bitfield extract for high bits.
- BitExtMI2 = BuildMI(*I->getParent(), InsertionPoint, I->getDebugLoc(),
- TII->get(getBitExtrOpcode(Rt2MI)))
- .addOperand(getLdStRegOp(Rt2MI))
- .addReg(getLdStRegOp(RtNewDest).getReg())
- .addImm(LSBHigh)
- .addImm(ImmsHigh);
+ BitExtMI2 =
+ BuildMI(*MBB, InsertionPoint, DL, TII->get(getBitExtrOpcode(*Rt2MI)))
+ .addOperand(getLdStRegOp(*Rt2MI))
+ .addReg(getLdStRegOp(*RtNewDest).getReg())
+ .addImm(LSBHigh)
+ .addImm(ImmsHigh);
}
+ (void)BitExtMI1;
+ (void)BitExtMI2;
+
DEBUG(dbgs() << " ");
DEBUG((BitExtMI1)->print(dbgs()));
DEBUG(dbgs() << " ");
@@ -753,47 +763,122 @@ AArch64LoadStoreOpt::mergePairedInsns(MachineBasicBlock::iterator I,
// Erase the old instructions.
I->eraseFromParent();
- Paired->eraseFromParent();
+ MergeMI->eraseFromParent();
return NextI;
}
+ assert(isPromotableZeroStoreInst(*I) && isPromotableZeroStoreInst(*MergeMI) &&
+ "Expected promotable zero store");
// Construct the new instruction.
MachineInstrBuilder MIB;
- if (isNarrowStore(Opc)) {
- // Change the scaled offset from small to large type.
- if (!IsUnscaled) {
- assert(((OffsetImm & 1) == 0) && "Unexpected offset to merge");
- OffsetImm /= 2;
+ MIB = BuildMI(*MBB, InsertionPoint, DL, TII->get(getMatchingWideOpcode(Opc)))
+ .addReg(isNarrowStore(Opc) ? AArch64::WZR : AArch64::XZR)
+ .addOperand(BaseRegOp)
+ .addImm(OffsetImm)
+ .setMemRefs(I->mergeMemRefsWith(*MergeMI));
+ (void)MIB;
+
+ DEBUG(dbgs() << "Creating wider load/store. Replacing instructions:\n ");
+ DEBUG(I->print(dbgs()));
+ DEBUG(dbgs() << " ");
+ DEBUG(MergeMI->print(dbgs()));
+ DEBUG(dbgs() << " with instruction:\n ");
+ DEBUG(((MachineInstr *)MIB)->print(dbgs()));
+ DEBUG(dbgs() << "\n");
+
+ // Erase the old instructions.
+ I->eraseFromParent();
+ MergeMI->eraseFromParent();
+ return NextI;
+}
+
+MachineBasicBlock::iterator
+AArch64LoadStoreOpt::mergePairedInsns(MachineBasicBlock::iterator I,
+ MachineBasicBlock::iterator Paired,
+ const LdStPairFlags &Flags) {
+ MachineBasicBlock::iterator NextI = I;
+ ++NextI;
+ // If NextI is the second of the two instructions to be merged, we need
+ // to skip one further. Either way we merge will invalidate the iterator,
+ // and we don't need to scan the new instruction, as it's a pairwise
+ // instruction, which we're not considering for further action anyway.
+ if (NextI == Paired)
+ ++NextI;
+
+ int SExtIdx = Flags.getSExtIdx();
+ unsigned Opc =
+ SExtIdx == -1 ? I->getOpcode() : getMatchingNonSExtOpcode(I->getOpcode());
+ bool IsUnscaled = TII->isUnscaledLdSt(Opc);
+ int OffsetStride = IsUnscaled ? getMemScale(*I) : 1;
+
+ bool MergeForward = Flags.getMergeForward();
+ // Insert our new paired instruction after whichever of the paired
+ // instructions MergeForward indicates.
+ MachineBasicBlock::iterator InsertionPoint = MergeForward ? Paired : I;
+ // Also based on MergeForward is from where we copy the base register operand
+ // so we get the flags compatible with the input code.
+ const MachineOperand &BaseRegOp =
+ MergeForward ? getLdStBaseOp(*Paired) : getLdStBaseOp(*I);
+
+ int Offset = getLdStOffsetOp(*I).getImm();
+ int PairedOffset = getLdStOffsetOp(*Paired).getImm();
+ bool PairedIsUnscaled = TII->isUnscaledLdSt(Paired->getOpcode());
+ if (IsUnscaled != PairedIsUnscaled) {
+ // We're trying to pair instructions that differ in how they are scaled. If
+ // I is scaled then scale the offset of Paired accordingly. Otherwise, do
+ // the opposite (i.e., make Paired's offset unscaled).
+ int MemSize = getMemScale(*Paired);
+ if (PairedIsUnscaled) {
+ // If the unscaled offset isn't a multiple of the MemSize, we can't
+ // pair the operations together.
+ assert(!(PairedOffset % getMemScale(*Paired)) &&
+ "Offset should be a multiple of the stride!");
+ PairedOffset /= MemSize;
+ } else {
+ PairedOffset *= MemSize;
}
- MIB = BuildMI(*I->getParent(), InsertionPoint, I->getDebugLoc(),
- TII->get(NewOpc))
- .addOperand(getLdStRegOp(I))
- .addOperand(BaseRegOp)
- .addImm(OffsetImm)
- .setMemRefs(I->mergeMemRefsWith(*Paired));
+ }
+
+ // Which register is Rt and which is Rt2 depends on the offset order.
+ MachineInstr *RtMI, *Rt2MI;
+ if (Offset == PairedOffset + OffsetStride) {
+ RtMI = &*Paired;
+ Rt2MI = &*I;
+ // Here we swapped the assumption made for SExtIdx.
+ // I.e., we turn ldp I, Paired into ldp Paired, I.
+ // Update the index accordingly.
+ if (SExtIdx != -1)
+ SExtIdx = (SExtIdx + 1) % 2;
} else {
- // Handle Unscaled
- if (IsUnscaled)
- OffsetImm /= OffsetStride;
- MIB = BuildMI(*I->getParent(), InsertionPoint, I->getDebugLoc(),
- TII->get(NewOpc))
- .addOperand(getLdStRegOp(RtMI))
- .addOperand(getLdStRegOp(Rt2MI))
- .addOperand(BaseRegOp)
- .addImm(OffsetImm);
+ RtMI = &*I;
+ Rt2MI = &*Paired;
+ }
+ int OffsetImm = getLdStOffsetOp(*RtMI).getImm();
+ // Scale the immediate offset, if necessary.
+ if (TII->isUnscaledLdSt(RtMI->getOpcode())) {
+ assert(!(OffsetImm % getMemScale(*RtMI)) &&
+ "Unscaled offset cannot be scaled.");
+ OffsetImm /= getMemScale(*RtMI);
}
- (void)MIB;
+ // Construct the new instruction.
+ MachineInstrBuilder MIB;
+ DebugLoc DL = I->getDebugLoc();
+ MachineBasicBlock *MBB = I->getParent();
+ MIB = BuildMI(*MBB, InsertionPoint, DL, TII->get(getMatchingPairOpcode(Opc)))
+ .addOperand(getLdStRegOp(*RtMI))
+ .addOperand(getLdStRegOp(*Rt2MI))
+ .addOperand(BaseRegOp)
+ .addImm(OffsetImm)
+ .setMemRefs(I->mergeMemRefsWith(*Paired));
- // FIXME: Do we need/want to copy the mem operands from the source
- // instructions? Probably. What uses them after this?
+ (void)MIB;
DEBUG(dbgs() << "Creating pair load/store. Replacing instructions:\n ");
DEBUG(I->print(dbgs()));
DEBUG(dbgs() << " ");
DEBUG(Paired->print(dbgs()));
DEBUG(dbgs() << " with instruction:\n ");
-
if (SExtIdx != -1) {
// Generate the sign extension for the proper result of the ldp.
// I.e., with X1, that would be:
@@ -814,26 +899,23 @@ AArch64LoadStoreOpt::mergePairedInsns(MachineBasicBlock::iterator I,
// Insert this definition right after the generated LDP, i.e., before
// InsertionPoint.
MachineInstrBuilder MIBKill =
- BuildMI(*I->getParent(), InsertionPoint, I->getDebugLoc(),
- TII->get(TargetOpcode::KILL), DstRegW)
+ BuildMI(*MBB, InsertionPoint, DL, TII->get(TargetOpcode::KILL), DstRegW)
.addReg(DstRegW)
.addReg(DstRegX, RegState::Define);
MIBKill->getOperand(2).setImplicit();
// Create the sign extension.
MachineInstrBuilder MIBSXTW =
- BuildMI(*I->getParent(), InsertionPoint, I->getDebugLoc(),
- TII->get(AArch64::SBFMXri), DstRegX)
+ BuildMI(*MBB, InsertionPoint, DL, TII->get(AArch64::SBFMXri), DstRegX)
.addReg(DstRegX)
.addImm(0)
.addImm(31);
(void)MIBSXTW;
DEBUG(dbgs() << " Extend operand:\n ");
DEBUG(((MachineInstr *)MIBSXTW)->print(dbgs()));
- DEBUG(dbgs() << "\n");
} else {
DEBUG(((MachineInstr *)MIB)->print(dbgs()));
- DEBUG(dbgs() << "\n");
}
+ DEBUG(dbgs() << "\n");
// Erase the old instructions.
I->eraseFromParent();
@@ -848,10 +930,10 @@ AArch64LoadStoreOpt::promoteLoadFromStore(MachineBasicBlock::iterator LoadI,
MachineBasicBlock::iterator NextI = LoadI;
++NextI;
- int LoadSize = getMemScale(LoadI);
- int StoreSize = getMemScale(StoreI);
- unsigned LdRt = getLdStRegOp(LoadI).getReg();
- unsigned StRt = getLdStRegOp(StoreI).getReg();
+ int LoadSize = getMemScale(*LoadI);
+ int StoreSize = getMemScale(*StoreI);
+ unsigned LdRt = getLdStRegOp(*LoadI).getReg();
+ unsigned StRt = getLdStRegOp(*StoreI).getReg();
bool IsStoreXReg = TRI->getRegClass(AArch64::GPR64RegClassID)->contains(StRt);
assert((IsStoreXReg ||
@@ -881,15 +963,16 @@ AArch64LoadStoreOpt::promoteLoadFromStore(MachineBasicBlock::iterator LoadI,
// performance and correctness are verified only in little-endian.
if (!Subtarget->isLittleEndian())
return NextI;
- bool IsUnscaled = isUnscaledLdSt(LoadI);
- assert(IsUnscaled == isUnscaledLdSt(StoreI) && "Unsupported ld/st match");
+ bool IsUnscaled = TII->isUnscaledLdSt(*LoadI);
+ assert(IsUnscaled == TII->isUnscaledLdSt(*StoreI) &&
+ "Unsupported ld/st match");
assert(LoadSize <= StoreSize && "Invalid load size");
int UnscaledLdOffset = IsUnscaled
- ? getLdStOffsetOp(LoadI).getImm()
- : getLdStOffsetOp(LoadI).getImm() * LoadSize;
+ ? getLdStOffsetOp(*LoadI).getImm()
+ : getLdStOffsetOp(*LoadI).getImm() * LoadSize;
int UnscaledStOffset = IsUnscaled
- ? getLdStOffsetOp(StoreI).getImm()
- : getLdStOffsetOp(StoreI).getImm() * StoreSize;
+ ? getLdStOffsetOp(*StoreI).getImm()
+ : getLdStOffsetOp(*StoreI).getImm() * StoreSize;
int Width = LoadSize * 8;
int Immr = 8 * (UnscaledLdOffset - UnscaledStOffset);
int Imms = Immr + Width - 1;
@@ -926,6 +1009,7 @@ AArch64LoadStoreOpt::promoteLoadFromStore(MachineBasicBlock::iterator LoadI,
.addImm(Imms);
}
}
+ (void)BitExtMI;
DEBUG(dbgs() << "Promoting load by replacing :\n ");
DEBUG(StoreI->print(dbgs()));
@@ -944,16 +1028,18 @@ AArch64LoadStoreOpt::promoteLoadFromStore(MachineBasicBlock::iterator LoadI,
/// trackRegDefsUses - Remember what registers the specified instruction uses
/// and modifies.
-static void trackRegDefsUses(const MachineInstr *MI, BitVector &ModifiedRegs,
+static void trackRegDefsUses(const MachineInstr &MI, BitVector &ModifiedRegs,
BitVector &UsedRegs,
const TargetRegisterInfo *TRI) {
- for (const MachineOperand &MO : MI->operands()) {
+ for (const MachineOperand &MO : MI.operands()) {
if (MO.isRegMask())
ModifiedRegs.setBitsNotInMask(MO.getRegMask());
if (!MO.isReg())
continue;
unsigned Reg = MO.getReg();
+ if (!Reg)
+ continue;
if (MO.isDef()) {
for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI)
ModifiedRegs.set(*AI);
@@ -968,38 +1054,42 @@ static void trackRegDefsUses(const MachineInstr *MI, BitVector &ModifiedRegs,
static bool inBoundsForPair(bool IsUnscaled, int Offset, int OffsetStride) {
// Convert the byte-offset used by unscaled into an "element" offset used
// by the scaled pair load/store instructions.
- if (IsUnscaled)
+ if (IsUnscaled) {
+ // If the byte-offset isn't a multiple of the stride, there's no point
+ // trying to match it.
+ if (Offset % OffsetStride)
+ return false;
Offset /= OffsetStride;
-
+ }
return Offset <= 63 && Offset >= -64;
}
// Do alignment, specialized to power of 2 and for signed ints,
// avoiding having to do a C-style cast from uint_64t to int when
-// using RoundUpToAlignment from include/llvm/Support/MathExtras.h.
+// using alignTo from include/llvm/Support/MathExtras.h.
// FIXME: Move this function to include/MathExtras.h?
static int alignTo(int Num, int PowOf2) {
return (Num + PowOf2 - 1) & ~(PowOf2 - 1);
}
-static bool mayAlias(MachineInstr *MIa, MachineInstr *MIb,
+static bool mayAlias(MachineInstr &MIa, MachineInstr &MIb,
const AArch64InstrInfo *TII) {
// One of the instructions must modify memory.
- if (!MIa->mayStore() && !MIb->mayStore())
+ if (!MIa.mayStore() && !MIb.mayStore())
return false;
// Both instructions must be memory operations.
- if (!MIa->mayLoadOrStore() && !MIb->mayLoadOrStore())
+ if (!MIa.mayLoadOrStore() && !MIb.mayLoadOrStore())
return false;
return !TII->areMemAccessesTriviallyDisjoint(MIa, MIb);
}
-static bool mayAlias(MachineInstr *MIa,
+static bool mayAlias(MachineInstr &MIa,
SmallVectorImpl<MachineInstr *> &MemInsns,
const AArch64InstrInfo *TII) {
- for (auto &MIb : MemInsns)
- if (mayAlias(MIa, MIb, TII))
+ for (MachineInstr *MIb : MemInsns)
+ if (mayAlias(MIa, *MIb, TII))
return true;
return false;
@@ -1008,40 +1098,43 @@ static bool mayAlias(MachineInstr *MIa,
bool AArch64LoadStoreOpt::findMatchingStore(
MachineBasicBlock::iterator I, unsigned Limit,
MachineBasicBlock::iterator &StoreI) {
- MachineBasicBlock::iterator E = I->getParent()->begin();
+ MachineBasicBlock::iterator B = I->getParent()->begin();
MachineBasicBlock::iterator MBBI = I;
- MachineInstr *FirstMI = I;
- unsigned BaseReg = getLdStBaseOp(FirstMI).getReg();
+ MachineInstr &LoadMI = *I;
+ unsigned BaseReg = getLdStBaseOp(LoadMI).getReg();
+
+ // If the load is the first instruction in the block, there's obviously
+ // not any matching store.
+ if (MBBI == B)
+ return false;
// Track which registers have been modified and used between the first insn
// and the second insn.
- BitVector ModifiedRegs, UsedRegs;
- ModifiedRegs.resize(TRI->getNumRegs());
- UsedRegs.resize(TRI->getNumRegs());
+ ModifiedRegs.reset();
+ UsedRegs.reset();
- for (unsigned Count = 0; MBBI != E && Count < Limit;) {
+ unsigned Count = 0;
+ do {
--MBBI;
- MachineInstr *MI = MBBI;
- // Skip DBG_VALUE instructions. Otherwise debug info can affect the
- // optimization by changing how far we scan.
- if (MI->isDebugValue())
- continue;
- // Now that we know this is a real instruction, count it.
- ++Count;
+ MachineInstr &MI = *MBBI;
+
+ // Don't count DBG_VALUE instructions towards the search limit.
+ if (!MI.isDebugValue())
+ ++Count;
// If the load instruction reads directly from the address to which the
// store instruction writes and the stored value is not modified, we can
// promote the load. Since we do not handle stores with pre-/post-index,
// it's unnecessary to check if BaseReg is modified by the store itself.
- if (MI->mayStore() && isMatchingStore(FirstMI, MI) &&
+ if (MI.mayStore() && isMatchingStore(LoadMI, MI) &&
BaseReg == getLdStBaseOp(MI).getReg() &&
- isLdOffsetInRangeOfSt(FirstMI, MI) &&
+ isLdOffsetInRangeOfSt(LoadMI, MI, TII) &&
!ModifiedRegs[getLdStRegOp(MI).getReg()]) {
StoreI = MBBI;
return true;
}
- if (MI->isCall())
+ if (MI.isCall())
return false;
// Update modified / uses register lists.
@@ -1053,139 +1146,165 @@ bool AArch64LoadStoreOpt::findMatchingStore(
return false;
// If we encounter a store aliased with the load, return early.
- if (MI->mayStore() && mayAlias(FirstMI, MI, TII))
+ if (MI.mayStore() && mayAlias(LoadMI, MI, TII))
return false;
- }
+ } while (MBBI != B && Count < Limit);
return false;
}
-/// findMatchingInsn - Scan the instructions looking for a load/store that can
-/// be combined with the current instruction into a load/store pair.
+// Returns true if FirstMI and MI are candidates for merging or pairing.
+// Otherwise, returns false.
+static bool areCandidatesToMergeOrPair(MachineInstr &FirstMI, MachineInstr &MI,
+ LdStPairFlags &Flags,
+ const AArch64InstrInfo *TII) {
+ // If this is volatile or if pairing is suppressed, not a candidate.
+ if (MI.hasOrderedMemoryRef() || TII->isLdStPairSuppressed(MI))
+ return false;
+
+ // We should have already checked FirstMI for pair suppression and volatility.
+ assert(!FirstMI.hasOrderedMemoryRef() &&
+ !TII->isLdStPairSuppressed(FirstMI) &&
+ "FirstMI shouldn't get here if either of these checks are true.");
+
+ unsigned OpcA = FirstMI.getOpcode();
+ unsigned OpcB = MI.getOpcode();
+
+ // Opcodes match: nothing more to check.
+ if (OpcA == OpcB)
+ return true;
+
+ // Try to match a sign-extended load/store with a zero-extended load/store.
+ bool IsValidLdStrOpc, PairIsValidLdStrOpc;
+ unsigned NonSExtOpc = getMatchingNonSExtOpcode(OpcA, &IsValidLdStrOpc);
+ assert(IsValidLdStrOpc &&
+ "Given Opc should be a Load or Store with an immediate");
+ // OpcA will be the first instruction in the pair.
+ if (NonSExtOpc == getMatchingNonSExtOpcode(OpcB, &PairIsValidLdStrOpc)) {
+ Flags.setSExtIdx(NonSExtOpc == (unsigned)OpcA ? 1 : 0);
+ return true;
+ }
+
+ // If the second instruction isn't even a load/store, bail out.
+ if (!PairIsValidLdStrOpc)
+ return false;
+
+ // FIXME: We don't support merging narrow loads/stores with mixed
+ // scaled/unscaled offsets.
+ if (isNarrowLoadOrStore(OpcA) || isNarrowLoadOrStore(OpcB))
+ return false;
+
+ // Try to match an unscaled load/store with a scaled load/store.
+ return TII->isUnscaledLdSt(OpcA) != TII->isUnscaledLdSt(OpcB) &&
+ getMatchingPairOpcode(OpcA) == getMatchingPairOpcode(OpcB);
+
+ // FIXME: Can we also match a mixed sext/zext unscaled/scaled pair?
+}
+
+/// Scan the instructions looking for a load/store that can be combined with the
+/// current instruction into a wider equivalent or a load/store pair.
MachineBasicBlock::iterator
AArch64LoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I,
- LdStPairFlags &Flags, unsigned Limit) {
+ LdStPairFlags &Flags, unsigned Limit,
+ bool FindNarrowMerge) {
MachineBasicBlock::iterator E = I->getParent()->end();
MachineBasicBlock::iterator MBBI = I;
- MachineInstr *FirstMI = I;
+ MachineInstr &FirstMI = *I;
++MBBI;
- unsigned Opc = FirstMI->getOpcode();
- bool MayLoad = FirstMI->mayLoad();
- bool IsUnscaled = isUnscaledLdSt(FirstMI);
+ bool MayLoad = FirstMI.mayLoad();
+ bool IsUnscaled = TII->isUnscaledLdSt(FirstMI);
unsigned Reg = getLdStRegOp(FirstMI).getReg();
unsigned BaseReg = getLdStBaseOp(FirstMI).getReg();
int Offset = getLdStOffsetOp(FirstMI).getImm();
- bool IsNarrowStore = isNarrowStore(Opc);
-
- // For narrow stores, find only the case where the stored value is WZR.
- if (IsNarrowStore && Reg != AArch64::WZR)
- return E;
-
- // Early exit if the first instruction modifies the base register.
- // e.g., ldr x0, [x0]
- if (FirstMI->modifiesRegister(BaseReg, TRI))
- return E;
-
- // Early exit if the offset if not possible to match. (6 bits of positive
- // range, plus allow an extra one in case we find a later insn that matches
- // with Offset-1)
int OffsetStride = IsUnscaled ? getMemScale(FirstMI) : 1;
- if (!(isNarrowLoad(Opc) || IsNarrowStore) &&
- !inBoundsForPair(IsUnscaled, Offset, OffsetStride))
- return E;
+ bool IsPromotableZeroStore = isPromotableZeroStoreInst(FirstMI);
// Track which registers have been modified and used between the first insn
// (inclusive) and the second insn.
- BitVector ModifiedRegs, UsedRegs;
- ModifiedRegs.resize(TRI->getNumRegs());
- UsedRegs.resize(TRI->getNumRegs());
+ ModifiedRegs.reset();
+ UsedRegs.reset();
// Remember any instructions that read/write memory between FirstMI and MI.
SmallVector<MachineInstr *, 4> MemInsns;
for (unsigned Count = 0; MBBI != E && Count < Limit; ++MBBI) {
- MachineInstr *MI = MBBI;
+ MachineInstr &MI = *MBBI;
// Skip DBG_VALUE instructions. Otherwise debug info can affect the
// optimization by changing how far we scan.
- if (MI->isDebugValue())
+ if (MI.isDebugValue())
continue;
// Now that we know this is a real instruction, count it.
++Count;
- bool CanMergeOpc = Opc == MI->getOpcode();
Flags.setSExtIdx(-1);
- if (!CanMergeOpc) {
- bool IsValidLdStrOpc;
- unsigned NonSExtOpc = getMatchingNonSExtOpcode(Opc, &IsValidLdStrOpc);
- assert(IsValidLdStrOpc &&
- "Given Opc should be a Load or Store with an immediate");
- // Opc will be the first instruction in the pair.
- Flags.setSExtIdx(NonSExtOpc == (unsigned)Opc ? 1 : 0);
- CanMergeOpc = NonSExtOpc == getMatchingNonSExtOpcode(MI->getOpcode());
- }
-
- if (CanMergeOpc && getLdStOffsetOp(MI).isImm()) {
- assert(MI->mayLoadOrStore() && "Expected memory operation.");
+ if (areCandidatesToMergeOrPair(FirstMI, MI, Flags, TII) &&
+ getLdStOffsetOp(MI).isImm()) {
+ assert(MI.mayLoadOrStore() && "Expected memory operation.");
// If we've found another instruction with the same opcode, check to see
// if the base and offset are compatible with our starting instruction.
// These instructions all have scaled immediate operands, so we just
// check for +1/-1. Make sure to check the new instruction offset is
// actually an immediate and not a symbolic reference destined for
// a relocation.
- //
- // Pairwise instructions have a 7-bit signed offset field. Single insns
- // have a 12-bit unsigned offset field. To be a valid combine, the
- // final offset must be in range.
unsigned MIBaseReg = getLdStBaseOp(MI).getReg();
int MIOffset = getLdStOffsetOp(MI).getImm();
+ bool MIIsUnscaled = TII->isUnscaledLdSt(MI);
+ if (IsUnscaled != MIIsUnscaled) {
+ // We're trying to pair instructions that differ in how they are scaled.
+ // If FirstMI is scaled then scale the offset of MI accordingly.
+ // Otherwise, do the opposite (i.e., make MI's offset unscaled).
+ int MemSize = getMemScale(MI);
+ if (MIIsUnscaled) {
+ // If the unscaled offset isn't a multiple of the MemSize, we can't
+ // pair the operations together: bail and keep looking.
+ if (MIOffset % MemSize)
+ continue;
+ MIOffset /= MemSize;
+ } else {
+ MIOffset *= MemSize;
+ }
+ }
+
if (BaseReg == MIBaseReg && ((Offset == MIOffset + OffsetStride) ||
(Offset + OffsetStride == MIOffset))) {
int MinOffset = Offset < MIOffset ? Offset : MIOffset;
- // If this is a volatile load/store that otherwise matched, stop looking
- // as something is going on that we don't have enough information to
- // safely transform. Similarly, stop if we see a hint to avoid pairs.
- if (MI->hasOrderedMemoryRef() || TII->isLdStPairSuppressed(MI))
- return E;
- // If the resultant immediate offset of merging these instructions
- // is out of range for a pairwise instruction, bail and keep looking.
- bool MIIsUnscaled = isUnscaledLdSt(MI);
- bool IsNarrowLoad = isNarrowLoad(MI->getOpcode());
- if (!IsNarrowLoad &&
- !inBoundsForPair(MIIsUnscaled, MinOffset, OffsetStride)) {
- trackRegDefsUses(MI, ModifiedRegs, UsedRegs, TRI);
- MemInsns.push_back(MI);
- continue;
- }
-
- if (IsNarrowLoad || IsNarrowStore) {
+ if (FindNarrowMerge) {
// If the alignment requirements of the scaled wide load/store
- // instruction can't express the offset of the scaled narrow
- // input, bail and keep looking.
- if (!IsUnscaled && alignTo(MinOffset, 2) != MinOffset) {
+ // instruction can't express the offset of the scaled narrow input,
+ // bail and keep looking. For promotable zero stores, allow only when
+ // the stored value is the same (i.e., WZR).
+ if ((!IsUnscaled && alignTo(MinOffset, 2) != MinOffset) ||
+ (IsPromotableZeroStore && Reg != getLdStRegOp(MI).getReg())) {
trackRegDefsUses(MI, ModifiedRegs, UsedRegs, TRI);
- MemInsns.push_back(MI);
+ MemInsns.push_back(&MI);
continue;
}
} else {
+ // Pairwise instructions have a 7-bit signed offset field. Single
+ // insns have a 12-bit unsigned offset field. If the resultant
+ // immediate offset of merging these instructions is out of range for
+ // a pairwise instruction, bail and keep looking.
+ if (!inBoundsForPair(IsUnscaled, MinOffset, OffsetStride)) {
+ trackRegDefsUses(MI, ModifiedRegs, UsedRegs, TRI);
+ MemInsns.push_back(&MI);
+ continue;
+ }
// If the alignment requirements of the paired (scaled) instruction
// can't express the offset of the unscaled input, bail and keep
// looking.
if (IsUnscaled && (alignTo(MinOffset, OffsetStride) != MinOffset)) {
trackRegDefsUses(MI, ModifiedRegs, UsedRegs, TRI);
- MemInsns.push_back(MI);
+ MemInsns.push_back(&MI);
continue;
}
}
// If the destination register of the loads is the same register, bail
// and keep looking. A load-pair instruction with both destination
// registers the same is UNPREDICTABLE and will result in an exception.
- // For narrow stores, allow only when the stored value is the same
- // (i.e., WZR).
- if ((MayLoad && Reg == getLdStRegOp(MI).getReg()) ||
- (IsNarrowStore && Reg != getLdStRegOp(MI).getReg())) {
+ if (MayLoad && Reg == getLdStRegOp(MI).getReg()) {
trackRegDefsUses(MI, ModifiedRegs, UsedRegs, TRI);
- MemInsns.push_back(MI);
+ MemInsns.push_back(&MI);
continue;
}
@@ -1194,7 +1313,7 @@ AArch64LoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I,
// and first alias with the second, we can combine the second into the
// first.
if (!ModifiedRegs[getLdStRegOp(MI).getReg()] &&
- !(MI->mayLoad() && UsedRegs[getLdStRegOp(MI).getReg()]) &&
+ !(MI.mayLoad() && UsedRegs[getLdStRegOp(MI).getReg()]) &&
!mayAlias(MI, MemInsns, TII)) {
Flags.setMergeForward(false);
return MBBI;
@@ -1217,7 +1336,7 @@ AArch64LoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I,
// If the instruction wasn't a matching load or store. Stop searching if we
// encounter a call instruction that might modify memory.
- if (MI->isCall())
+ if (MI.isCall())
return E;
// Update modified / uses register lists.
@@ -1229,8 +1348,8 @@ AArch64LoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I,
return E;
// Update list of instructions that read/write memory.
- if (MI->mayLoadOrStore())
- MemInsns.push_back(MI);
+ if (MI.mayLoadOrStore())
+ MemInsns.push_back(&MI);
}
return E;
}
@@ -1258,22 +1377,24 @@ AArch64LoadStoreOpt::mergeUpdateInsn(MachineBasicBlock::iterator I,
unsigned NewOpc = IsPreIdx ? getPreIndexedOpcode(I->getOpcode())
: getPostIndexedOpcode(I->getOpcode());
MachineInstrBuilder MIB;
- if (!isPairedLdSt(I)) {
+ if (!isPairedLdSt(*I)) {
// Non-paired instruction.
MIB = BuildMI(*I->getParent(), I, I->getDebugLoc(), TII->get(NewOpc))
- .addOperand(getLdStRegOp(Update))
- .addOperand(getLdStRegOp(I))
- .addOperand(getLdStBaseOp(I))
- .addImm(Value);
+ .addOperand(getLdStRegOp(*Update))
+ .addOperand(getLdStRegOp(*I))
+ .addOperand(getLdStBaseOp(*I))
+ .addImm(Value)
+ .setMemRefs(I->memoperands_begin(), I->memoperands_end());
} else {
// Paired instruction.
- int Scale = getMemScale(I);
+ int Scale = getMemScale(*I);
MIB = BuildMI(*I->getParent(), I, I->getDebugLoc(), TII->get(NewOpc))
- .addOperand(getLdStRegOp(Update))
- .addOperand(getLdStRegOp(I, 0))
- .addOperand(getLdStRegOp(I, 1))
- .addOperand(getLdStBaseOp(I))
- .addImm(Value / Scale);
+ .addOperand(getLdStRegOp(*Update))
+ .addOperand(getLdStRegOp(*I, 0))
+ .addOperand(getLdStRegOp(*I, 1))
+ .addOperand(getLdStBaseOp(*I))
+ .addImm(Value / Scale)
+ .setMemRefs(I->memoperands_begin(), I->memoperands_end());
}
(void)MIB;
@@ -1296,10 +1417,10 @@ AArch64LoadStoreOpt::mergeUpdateInsn(MachineBasicBlock::iterator I,
return NextI;
}
-bool AArch64LoadStoreOpt::isMatchingUpdateInsn(MachineInstr *MemMI,
- MachineInstr *MI,
+bool AArch64LoadStoreOpt::isMatchingUpdateInsn(MachineInstr &MemMI,
+ MachineInstr &MI,
unsigned BaseReg, int Offset) {
- switch (MI->getOpcode()) {
+ switch (MI.getOpcode()) {
default:
break;
case AArch64::SUBXri:
@@ -1309,20 +1430,20 @@ bool AArch64LoadStoreOpt::isMatchingUpdateInsn(MachineInstr *MemMI,
case AArch64::ADDXri:
// Make sure it's a vanilla immediate operand, not a relocation or
// anything else we can't handle.
- if (!MI->getOperand(2).isImm())
+ if (!MI.getOperand(2).isImm())
break;
// Watch out for 1 << 12 shifted value.
- if (AArch64_AM::getShiftValue(MI->getOperand(3).getImm()))
+ if (AArch64_AM::getShiftValue(MI.getOperand(3).getImm()))
break;
// The update instruction source and destination register must be the
// same as the load/store base register.
- if (MI->getOperand(0).getReg() != BaseReg ||
- MI->getOperand(1).getReg() != BaseReg)
+ if (MI.getOperand(0).getReg() != BaseReg ||
+ MI.getOperand(1).getReg() != BaseReg)
break;
bool IsPairedInsn = isPairedLdSt(MemMI);
- int UpdateOffset = MI->getOperand(2).getImm();
+ int UpdateOffset = MI.getOperand(2).getImm();
// For non-paired load/store instructions, the immediate must fit in a
// signed 9-bit integer.
if (!IsPairedInsn && (UpdateOffset > 255 || UpdateOffset < -256))
@@ -1343,7 +1464,7 @@ bool AArch64LoadStoreOpt::isMatchingUpdateInsn(MachineInstr *MemMI,
// If we have a non-zero Offset, we check that it matches the amount
// we're adding to the register.
- if (!Offset || Offset == MI->getOperand(2).getImm())
+ if (!Offset || Offset == MI.getOperand(2).getImm())
return true;
break;
}
@@ -1351,9 +1472,9 @@ bool AArch64LoadStoreOpt::isMatchingUpdateInsn(MachineInstr *MemMI,
}
MachineBasicBlock::iterator AArch64LoadStoreOpt::findMatchingUpdateInsnForward(
- MachineBasicBlock::iterator I, unsigned Limit, int UnscaledOffset) {
+ MachineBasicBlock::iterator I, int UnscaledOffset, unsigned Limit) {
MachineBasicBlock::iterator E = I->getParent()->end();
- MachineInstr *MemMI = I;
+ MachineInstr &MemMI = *I;
MachineBasicBlock::iterator MBBI = I;
unsigned BaseReg = getLdStBaseOp(MemMI).getReg();
@@ -1376,22 +1497,20 @@ MachineBasicBlock::iterator AArch64LoadStoreOpt::findMatchingUpdateInsnForward(
// Track which registers have been modified and used between the first insn
// (inclusive) and the second insn.
- BitVector ModifiedRegs, UsedRegs;
- ModifiedRegs.resize(TRI->getNumRegs());
- UsedRegs.resize(TRI->getNumRegs());
+ ModifiedRegs.reset();
+ UsedRegs.reset();
++MBBI;
- for (unsigned Count = 0; MBBI != E; ++MBBI) {
- MachineInstr *MI = MBBI;
- // Skip DBG_VALUE instructions. Otherwise debug info can affect the
- // optimization by changing how far we scan.
- if (MI->isDebugValue())
+ for (unsigned Count = 0; MBBI != E && Count < Limit; ++MBBI) {
+ MachineInstr &MI = *MBBI;
+ // Skip DBG_VALUE instructions.
+ if (MI.isDebugValue())
continue;
// Now that we know this is a real instruction, count it.
++Count;
// If we found a match, return it.
- if (isMatchingUpdateInsn(I, MI, BaseReg, UnscaledOffset))
+ if (isMatchingUpdateInsn(*I, MI, BaseReg, UnscaledOffset))
return MBBI;
// Update the status of what the instruction clobbered and used.
@@ -1409,7 +1528,7 @@ MachineBasicBlock::iterator AArch64LoadStoreOpt::findMatchingUpdateInsnBackward(
MachineBasicBlock::iterator I, unsigned Limit) {
MachineBasicBlock::iterator B = I->getParent()->begin();
MachineBasicBlock::iterator E = I->getParent()->end();
- MachineInstr *MemMI = I;
+ MachineInstr &MemMI = *I;
MachineBasicBlock::iterator MBBI = I;
unsigned BaseReg = getLdStBaseOp(MemMI).getReg();
@@ -1430,22 +1549,19 @@ MachineBasicBlock::iterator AArch64LoadStoreOpt::findMatchingUpdateInsnBackward(
// Track which registers have been modified and used between the first insn
// (inclusive) and the second insn.
- BitVector ModifiedRegs, UsedRegs;
- ModifiedRegs.resize(TRI->getNumRegs());
- UsedRegs.resize(TRI->getNumRegs());
- --MBBI;
- for (unsigned Count = 0; MBBI != B; --MBBI) {
- MachineInstr *MI = MBBI;
- // Skip DBG_VALUE instructions. Otherwise debug info can affect the
- // optimization by changing how far we scan.
- if (MI->isDebugValue())
- continue;
+ ModifiedRegs.reset();
+ UsedRegs.reset();
+ unsigned Count = 0;
+ do {
+ --MBBI;
+ MachineInstr &MI = *MBBI;
- // Now that we know this is a real instruction, count it.
- ++Count;
+ // Don't count DBG_VALUE instructions towards the search limit.
+ if (!MI.isDebugValue())
+ ++Count;
// If we found a match, return it.
- if (isMatchingUpdateInsn(I, MI, BaseReg, Offset))
+ if (isMatchingUpdateInsn(*I, MI, BaseReg, Offset))
return MBBI;
// Update the status of what the instruction clobbered and used.
@@ -1455,15 +1571,15 @@ MachineBasicBlock::iterator AArch64LoadStoreOpt::findMatchingUpdateInsnBackward(
// return early.
if (ModifiedRegs[BaseReg] || UsedRegs[BaseReg])
return E;
- }
+ } while (MBBI != B && Count < Limit);
return E;
}
bool AArch64LoadStoreOpt::tryToPromoteLoadFromStore(
MachineBasicBlock::iterator &MBBI) {
- MachineInstr *MI = MBBI;
+ MachineInstr &MI = *MBBI;
// If this is a volatile load, don't mess with it.
- if (MI->hasOrderedMemoryRef())
+ if (MI.hasOrderedMemoryRef())
return false;
// Make sure this is a reg+imm.
@@ -1471,9 +1587,9 @@ bool AArch64LoadStoreOpt::tryToPromoteLoadFromStore(
if (!getLdStOffsetOp(MI).isImm())
return false;
- // Look backward up to ScanLimit instructions.
+ // Look backward up to LdStLimit instructions.
MachineBasicBlock::iterator StoreI;
- if (findMatchingStore(MBBI, ScanLimit, StoreI)) {
+ if (findMatchingStore(MBBI, LdStLimit, StoreI)) {
++NumLoadsFromStoresPromoted;
// Promote the load. Keeping the iterator straight is a
// pain, so we let the merge routine tell us what the next instruction
@@ -1484,40 +1600,70 @@ bool AArch64LoadStoreOpt::tryToPromoteLoadFromStore(
return false;
}
+// Find narrow loads that can be converted into a single wider load with
+// bitfield extract instructions. Also merge adjacent zero stores into a wider
+// store.
bool AArch64LoadStoreOpt::tryToMergeLdStInst(
MachineBasicBlock::iterator &MBBI) {
- MachineInstr *MI = MBBI;
- MachineBasicBlock::iterator E = MI->getParent()->end();
- // If this is a volatile load/store, don't mess with it.
- if (MI->hasOrderedMemoryRef())
- return false;
+ assert((isNarrowLoad(*MBBI) || isPromotableZeroStoreOpcode(*MBBI)) &&
+ "Expected narrow op.");
+ MachineInstr &MI = *MBBI;
+ MachineBasicBlock::iterator E = MI.getParent()->end();
- // Make sure this is a reg+imm (as opposed to an address reloc).
- if (!getLdStOffsetOp(MI).isImm())
+ if (!TII->isCandidateToMergeOrPair(MI))
return false;
- // Check if this load/store has a hint to avoid pair formation.
- // MachineMemOperands hints are set by the AArch64StorePairSuppress pass.
- if (TII->isLdStPairSuppressed(MI))
+ // For promotable zero stores, the stored value should be WZR.
+ if (isPromotableZeroStoreOpcode(MI) &&
+ getLdStRegOp(MI).getReg() != AArch64::WZR)
return false;
- // Look ahead up to ScanLimit instructions for a pairable instruction.
+ // Look ahead up to LdStLimit instructions for a mergable instruction.
LdStPairFlags Flags;
- MachineBasicBlock::iterator Paired = findMatchingInsn(MBBI, Flags, ScanLimit);
- if (Paired != E) {
+ MachineBasicBlock::iterator MergeMI =
+ findMatchingInsn(MBBI, Flags, LdStLimit, /* FindNarrowMerge = */ true);
+ if (MergeMI != E) {
if (isNarrowLoad(MI)) {
++NumNarrowLoadsPromoted;
- } else if (isNarrowStore(MI)) {
+ } else if (isPromotableZeroStoreInst(MI)) {
++NumZeroStoresPromoted;
- } else {
- ++NumPairCreated;
- if (isUnscaledLdSt(MI))
- ++NumUnscaledPairCreated;
}
+ // Keeping the iterator straight is a pain, so we let the merge routine tell
+ // us what the next instruction is after it's done mucking about.
+ MBBI = mergeNarrowInsns(MBBI, MergeMI, Flags);
+ return true;
+ }
+ return false;
+}
- // Merge the loads into a pair. Keeping the iterator straight is a
- // pain, so we let the merge routine tell us what the next instruction
- // is after it's done mucking about.
+// Find loads and stores that can be merged into a single load or store pair
+// instruction.
+bool AArch64LoadStoreOpt::tryToPairLdStInst(MachineBasicBlock::iterator &MBBI) {
+ MachineInstr &MI = *MBBI;
+ MachineBasicBlock::iterator E = MI.getParent()->end();
+
+ if (!TII->isCandidateToMergeOrPair(MI))
+ return false;
+
+ // Early exit if the offset is not possible to match. (6 bits of positive
+ // range, plus allow an extra one in case we find a later insn that matches
+ // with Offset-1)
+ bool IsUnscaled = TII->isUnscaledLdSt(MI);
+ int Offset = getLdStOffsetOp(MI).getImm();
+ int OffsetStride = IsUnscaled ? getMemScale(MI) : 1;
+ if (!inBoundsForPair(IsUnscaled, Offset, OffsetStride))
+ return false;
+
+ // Look ahead up to LdStLimit instructions for a pairable instruction.
+ LdStPairFlags Flags;
+ MachineBasicBlock::iterator Paired =
+ findMatchingInsn(MBBI, Flags, LdStLimit, /* FindNarrowMerge = */ false);
+ if (Paired != E) {
+ ++NumPairCreated;
+ if (TII->isUnscaledLdSt(MI))
+ ++NumUnscaledPairCreated;
+ // Keeping the iterator straight is a pain, so we let the merge routine tell
+ // us what the next instruction is after it's done mucking about.
MBBI = mergePairedInsns(MBBI, Paired, Flags);
return true;
}
@@ -1527,7 +1673,7 @@ bool AArch64LoadStoreOpt::tryToMergeLdStInst(
bool AArch64LoadStoreOpt::optimizeBlock(MachineBasicBlock &MBB,
bool enableNarrowLdOpt) {
bool Modified = false;
- // Three tranformations to do here:
+ // Four tranformations to do here:
// 1) Find loads that directly read from stores and promote them by
// replacing with mov instructions. If the store is wider than the load,
// the load will be replaced with a bitfield extract.
@@ -1536,35 +1682,11 @@ bool AArch64LoadStoreOpt::optimizeBlock(MachineBasicBlock &MBB,
// ldrh w2, [x0, #6]
// ; becomes
// str w1, [x0, #4]
- // lsr w2, w1, #16
- // 2) Find narrow loads that can be converted into a single wider load
- // with bitfield extract instructions.
- // e.g.,
- // ldrh w0, [x2]
- // ldrh w1, [x2, #2]
- // ; becomes
- // ldr w0, [x2]
- // ubfx w1, w0, #16, #16
- // and w0, w0, #ffff
- // 3) Find loads and stores that can be merged into a single load or store
- // pair instruction.
- // e.g.,
- // ldr x0, [x2]
- // ldr x1, [x2, #8]
- // ; becomes
- // ldp x0, x1, [x2]
- // 4) Find base register updates that can be merged into the load or store
- // as a base-reg writeback.
- // e.g.,
- // ldr x0, [x2]
- // add x2, x2, #4
- // ; becomes
- // ldr x0, [x2], #4
-
+ // lsr w2, w1, #16
for (MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
MBBI != E;) {
- MachineInstr *MI = MBBI;
- switch (MI->getOpcode()) {
+ MachineInstr &MI = *MBBI;
+ switch (MI.getOpcode()) {
default:
// Just move on to the next instruction.
++MBBI;
@@ -1586,47 +1708,49 @@ bool AArch64LoadStoreOpt::optimizeBlock(MachineBasicBlock &MBB,
++MBBI;
break;
}
- // FIXME: Do the other instructions.
}
}
-
+ // 2) Find narrow loads that can be converted into a single wider load
+ // with bitfield extract instructions.
+ // e.g.,
+ // ldrh w0, [x2]
+ // ldrh w1, [x2, #2]
+ // ; becomes
+ // ldr w0, [x2]
+ // ubfx w1, w0, #16, #16
+ // and w0, w0, #ffff
+ //
+ // Also merge adjacent zero stores into a wider store.
+ // e.g.,
+ // strh wzr, [x0]
+ // strh wzr, [x0, #2]
+ // ; becomes
+ // str wzr, [x0]
for (MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
enableNarrowLdOpt && MBBI != E;) {
- MachineInstr *MI = MBBI;
- switch (MI->getOpcode()) {
- default:
- // Just move on to the next instruction.
- ++MBBI;
- break;
- // Scaled instructions.
- case AArch64::LDRBBui:
- case AArch64::LDRHHui:
- case AArch64::LDRSBWui:
- case AArch64::LDRSHWui:
- case AArch64::STRBBui:
- case AArch64::STRHHui:
- // Unscaled instructions.
- case AArch64::LDURBBi:
- case AArch64::LDURHHi:
- case AArch64::LDURSBWi:
- case AArch64::LDURSHWi:
- case AArch64::STURBBi:
- case AArch64::STURHHi: {
+ MachineInstr &MI = *MBBI;
+ unsigned Opc = MI.getOpcode();
+ if (isPromotableZeroStoreOpcode(Opc) ||
+ (EnableNarrowLdMerge && isNarrowLoad(Opc))) {
if (tryToMergeLdStInst(MBBI)) {
Modified = true;
- break;
- }
+ } else
+ ++MBBI;
+ } else
++MBBI;
- break;
- }
- // FIXME: Do the other instructions.
- }
}
+ // 3) Find loads and stores that can be merged into a single load or store
+ // pair instruction.
+ // e.g.,
+ // ldr x0, [x2]
+ // ldr x1, [x2, #8]
+ // ; becomes
+ // ldp x0, x1, [x2]
for (MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
MBBI != E;) {
- MachineInstr *MI = MBBI;
- switch (MI->getOpcode()) {
+ MachineInstr &MI = *MBBI;
+ switch (MI.getOpcode()) {
default:
// Just move on to the next instruction.
++MBBI;
@@ -1655,23 +1779,28 @@ bool AArch64LoadStoreOpt::optimizeBlock(MachineBasicBlock &MBB,
case AArch64::LDURWi:
case AArch64::LDURXi:
case AArch64::LDURSWi: {
- if (tryToMergeLdStInst(MBBI)) {
+ if (tryToPairLdStInst(MBBI)) {
Modified = true;
break;
}
++MBBI;
break;
}
- // FIXME: Do the other instructions.
}
}
-
+ // 4) Find base register updates that can be merged into the load or store
+ // as a base-reg writeback.
+ // e.g.,
+ // ldr x0, [x2]
+ // add x2, x2, #4
+ // ; becomes
+ // ldr x0, [x2], #4
for (MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
MBBI != E;) {
- MachineInstr *MI = MBBI;
+ MachineInstr &MI = *MBBI;
// Do update merging. It's simpler to keep this separate from the above
- // switch, though not strictly necessary.
- unsigned Opc = MI->getOpcode();
+ // switchs, though not strictly necessary.
+ unsigned Opc = MI.getOpcode();
switch (Opc) {
default:
// Just move on to the next instruction.
@@ -1726,7 +1855,7 @@ bool AArch64LoadStoreOpt::optimizeBlock(MachineBasicBlock &MBB,
// merged into:
// ldr x0, [x20], #32
MachineBasicBlock::iterator Update =
- findMatchingUpdateInsnForward(MBBI, ScanLimit, 0);
+ findMatchingUpdateInsnForward(MBBI, 0, UpdateLimit);
if (Update != E) {
// Merge the update into the ld/st.
MBBI = mergeUpdateInsn(MBBI, Update, /*IsPreIdx=*/false);
@@ -1736,7 +1865,7 @@ bool AArch64LoadStoreOpt::optimizeBlock(MachineBasicBlock &MBB,
}
// Don't know how to handle pre/post-index versions, so move to the next
// instruction.
- if (isUnscaledLdSt(Opc)) {
+ if (TII->isUnscaledLdSt(Opc)) {
++MBBI;
break;
}
@@ -1746,7 +1875,7 @@ bool AArch64LoadStoreOpt::optimizeBlock(MachineBasicBlock &MBB,
// ldr x1, [x0]
// merged into:
// ldr x1, [x0, #8]!
- Update = findMatchingUpdateInsnBackward(MBBI, ScanLimit);
+ Update = findMatchingUpdateInsnBackward(MBBI, UpdateLimit);
if (Update != E) {
// Merge the update into the ld/st.
MBBI = mergeUpdateInsn(MBBI, Update, /*IsPreIdx=*/true);
@@ -1764,7 +1893,7 @@ bool AArch64LoadStoreOpt::optimizeBlock(MachineBasicBlock &MBB,
// add x0, x0, #64
// merged into:
// ldr x1, [x0, #64]!
- Update = findMatchingUpdateInsnForward(MBBI, ScanLimit, UnscaledOffset);
+ Update = findMatchingUpdateInsnForward(MBBI, UnscaledOffset, UpdateLimit);
if (Update != E) {
// Merge the update into the ld/st.
MBBI = mergeUpdateInsn(MBBI, Update, /*IsPreIdx=*/true);
@@ -1777,29 +1906,29 @@ bool AArch64LoadStoreOpt::optimizeBlock(MachineBasicBlock &MBB,
++MBBI;
break;
}
- // FIXME: Do the other instructions.
}
}
return Modified;
}
-bool AArch64LoadStoreOpt::enableNarrowLdMerge(MachineFunction &Fn) {
- bool ProfitableArch = Subtarget->isCortexA57();
- // FIXME: The benefit from converting narrow loads into a wider load could be
- // microarchitectural as it assumes that a single load with two bitfield
- // extracts is cheaper than two narrow loads. Currently, this conversion is
- // enabled only in cortex-a57 on which performance benefits were verified.
- return ProfitableArch && !Subtarget->requiresStrictAlign();
-}
-
bool AArch64LoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
+ if (skipFunction(*Fn.getFunction()))
+ return false;
+
Subtarget = &static_cast<const AArch64Subtarget &>(Fn.getSubtarget());
TII = static_cast<const AArch64InstrInfo *>(Subtarget->getInstrInfo());
TRI = Subtarget->getRegisterInfo();
+ // Resize the modified and used register bitfield trackers. We do this once
+ // per function and then clear the bitfield each time we optimize a load or
+ // store.
+ ModifiedRegs.resize(TRI->getNumRegs());
+ UsedRegs.resize(TRI->getNumRegs());
+
bool Modified = false;
- bool enableNarrowLdOpt = enableNarrowLdMerge(Fn);
+ bool enableNarrowLdOpt =
+ Subtarget->mergeNarrowLoads() && !Subtarget->requiresStrictAlign();
for (auto &MBB : Fn)
Modified |= optimizeBlock(MBB, enableNarrowLdOpt);
@@ -1809,6 +1938,11 @@ bool AArch64LoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
// FIXME: Do we need/want a pre-alloc pass like ARM has to try to keep
// loads and stores near one another?
+// FIXME: When pairing store instructions it's very possible for this pass to
+// hoist a store with a KILL marker above another use (without a KILL marker).
+// The resulting IR is invalid, but nothing uses the KILL markers after this
+// pass, so it's never caused a problem in practice.
+
/// createAArch64LoadStoreOptimizationPass - returns an instance of the
/// load / store optimization pass.
FunctionPass *llvm::createAArch64LoadStoreOptimizationPass() {