aboutsummaryrefslogtreecommitdiff
path: root/contrib/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp
diff options
context:
space:
mode:
authorDimitry Andric <dim@FreeBSD.org>2017-12-20 14:16:56 +0000
committerDimitry Andric <dim@FreeBSD.org>2017-12-20 14:16:56 +0000
commit2cab237b5dbfe1b3e9c7aa7a3c02d2b98fcf7462 (patch)
tree524fe828571f81358bba62fdb6d04c6e5e96a2a4 /contrib/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp
parent6c7828a2807ea5e50c79ca42dbedf2b589ce63b2 (diff)
parent044eb2f6afba375a914ac9d8024f8f5142bb912e (diff)
Merge llvm trunk r321017 to contrib/llvm.
Notes
Notes: svn path=/projects/clang600-import/; revision=327023
Diffstat (limited to 'contrib/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp')
-rw-r--r--contrib/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp304
1 files changed, 152 insertions, 152 deletions
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp b/contrib/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp
index 9a7f45bde6c9..8a29456430b9 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp
+++ b/contrib/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp
@@ -1,4 +1,4 @@
-//=- AArch64LoadStoreOptimizer.cpp - AArch64 load/store opt. pass -*- C++ -*-=//
+//===- AArch64LoadStoreOptimizer.cpp - AArch64 load/store opt. pass -------===//
//
// The LLVM Compiler Infrastructure
//
@@ -20,12 +20,14 @@
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/iterator_range.h"
+#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/IR/DebugLoc.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/Pass.h"
@@ -33,7 +35,6 @@
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetRegisterInfo.h"
#include <cassert>
#include <cstdint>
#include <iterator>
@@ -64,7 +65,7 @@ static cl::opt<unsigned> UpdateLimit("aarch64-update-scan-limit", cl::init(100),
namespace {
-typedef struct LdStPairFlags {
+using LdStPairFlags = struct LdStPairFlags {
// If a matching instruction is found, MergeForward is set to true if the
// merge is to remove the first instruction and replace the second with
// a pair-wise insn, and false if the reverse is true.
@@ -83,8 +84,7 @@ typedef struct LdStPairFlags {
void setSExtIdx(int V) { SExtIdx = V; }
int getSExtIdx() const { return SExtIdx; }
-
-} LdStPairFlags;
+};
struct AArch64LoadStoreOpt : public MachineFunctionPass {
static char ID;
@@ -101,7 +101,7 @@ struct AArch64LoadStoreOpt : public MachineFunctionPass {
// Track which registers have been modified and used.
BitVector ModifiedRegs, UsedRegs;
- virtual void getAnalysisUsage(AnalysisUsage &AU) const override {
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.addRequired<AAResultsWrapperPass>();
MachineFunctionPass::getAnalysisUsage(AU);
}
@@ -168,6 +168,9 @@ struct AArch64LoadStoreOpt : public MachineFunctionPass {
// Find and promote load instructions which read directly from store.
bool tryToPromoteLoadFromStore(MachineBasicBlock::iterator &MBBI);
+ // Find and merge a base register updates before or after a ld/st instruction.
+ bool tryToMergeLdStUpdate(MachineBasicBlock::iterator &MBBI);
+
bool optimizeBlock(MachineBasicBlock &MBB, bool EnableNarrowZeroStOpt);
bool runOnMachineFunction(MachineFunction &Fn) override;
@@ -578,6 +581,75 @@ static bool isPromotableZeroStoreInst(MachineInstr &MI) {
getLdStRegOp(MI).getReg() == AArch64::WZR;
}
+static bool isPromotableLoadFromStore(MachineInstr &MI) {
+ switch (MI.getOpcode()) {
+ default:
+ return false;
+ // Scaled instructions.
+ case AArch64::LDRBBui:
+ case AArch64::LDRHHui:
+ case AArch64::LDRWui:
+ case AArch64::LDRXui:
+ // Unscaled instructions.
+ case AArch64::LDURBBi:
+ case AArch64::LDURHHi:
+ case AArch64::LDURWi:
+ case AArch64::LDURXi:
+ return true;
+ }
+}
+
+static bool isMergeableLdStUpdate(MachineInstr &MI) {
+ unsigned Opc = MI.getOpcode();
+ switch (Opc) {
+ default:
+ return false;
+ // Scaled instructions.
+ case AArch64::STRSui:
+ case AArch64::STRDui:
+ case AArch64::STRQui:
+ case AArch64::STRXui:
+ case AArch64::STRWui:
+ case AArch64::STRHHui:
+ case AArch64::STRBBui:
+ case AArch64::LDRSui:
+ case AArch64::LDRDui:
+ case AArch64::LDRQui:
+ case AArch64::LDRXui:
+ case AArch64::LDRWui:
+ case AArch64::LDRHHui:
+ case AArch64::LDRBBui:
+ // Unscaled instructions.
+ case AArch64::STURSi:
+ case AArch64::STURDi:
+ case AArch64::STURQi:
+ case AArch64::STURWi:
+ case AArch64::STURXi:
+ case AArch64::LDURSi:
+ case AArch64::LDURDi:
+ case AArch64::LDURQi:
+ case AArch64::LDURWi:
+ case AArch64::LDURXi:
+ // Paired instructions.
+ case AArch64::LDPSi:
+ case AArch64::LDPSWi:
+ case AArch64::LDPDi:
+ case AArch64::LDPQi:
+ case AArch64::LDPWi:
+ case AArch64::LDPXi:
+ case AArch64::STPSi:
+ case AArch64::STPDi:
+ case AArch64::STPQi:
+ case AArch64::STPWi:
+ case AArch64::STPXi:
+ // Make sure this is a reg+imm (as opposed to an address reloc).
+ if (!getLdStOffsetOp(MI).isImm())
+ return false;
+
+ return true;
+ }
+}
+
MachineBasicBlock::iterator
AArch64LoadStoreOpt::mergeNarrowZeroStores(MachineBasicBlock::iterator I,
MachineBasicBlock::iterator MergeMI,
@@ -758,8 +830,8 @@ AArch64LoadStoreOpt::mergePairedInsns(MachineBasicBlock::iterator I,
if (SExtIdx != -1) {
// Generate the sign extension for the proper result of the ldp.
// I.e., with X1, that would be:
- // %W1<def> = KILL %W1, %X1<imp-def>
- // %X1<def> = SBFMXri %X1<kill>, 0, 31
+ // %w1 = KILL %w1, implicit-def %x1
+ // %x1 = SBFMXri killed %x1, 0, 31
MachineOperand &DstMO = MIB->getOperand(SExtIdx);
// Right now, DstMO has the extended register, since it comes from an
// extended opcode.
@@ -1294,10 +1366,13 @@ AArch64LoadStoreOpt::mergeUpdateInsn(MachineBasicBlock::iterator I,
}
(void)MIB;
- if (IsPreIdx)
+ if (IsPreIdx) {
+ ++NumPreFolded;
DEBUG(dbgs() << "Creating pre-indexed load/store.");
- else
+ } else {
+ ++NumPostFolded;
DEBUG(dbgs() << "Creating post-indexed load/store.");
+ }
DEBUG(dbgs() << " Replacing instructions:\n ");
DEBUG(I->print(dbgs()));
DEBUG(dbgs() << " ");
@@ -1558,6 +1633,60 @@ bool AArch64LoadStoreOpt::tryToPairLdStInst(MachineBasicBlock::iterator &MBBI) {
return false;
}
+bool AArch64LoadStoreOpt::tryToMergeLdStUpdate
+ (MachineBasicBlock::iterator &MBBI) {
+ MachineInstr &MI = *MBBI;
+ MachineBasicBlock::iterator E = MI.getParent()->end();
+ MachineBasicBlock::iterator Update;
+
+ // Look forward to try to form a post-index instruction. For example,
+ // ldr x0, [x20]
+ // add x20, x20, #32
+ // merged into:
+ // ldr x0, [x20], #32
+ Update = findMatchingUpdateInsnForward(MBBI, 0, UpdateLimit);
+ if (Update != E) {
+ // Merge the update into the ld/st.
+ MBBI = mergeUpdateInsn(MBBI, Update, /*IsPreIdx=*/false);
+ return true;
+ }
+
+ // Don't know how to handle unscaled pre/post-index versions below, so bail.
+ if (TII->isUnscaledLdSt(MI.getOpcode()))
+ return false;
+
+ // Look back to try to find a pre-index instruction. For example,
+ // add x0, x0, #8
+ // ldr x1, [x0]
+ // merged into:
+ // ldr x1, [x0, #8]!
+ Update = findMatchingUpdateInsnBackward(MBBI, UpdateLimit);
+ if (Update != E) {
+ // Merge the update into the ld/st.
+ MBBI = mergeUpdateInsn(MBBI, Update, /*IsPreIdx=*/true);
+ return true;
+ }
+
+ // The immediate in the load/store is scaled by the size of the memory
+ // operation. The immediate in the add we're looking for,
+ // however, is not, so adjust here.
+ int UnscaledOffset = getLdStOffsetOp(MI).getImm() * getMemScale(MI);
+
+ // Look forward to try to find a post-index instruction. For example,
+ // ldr x1, [x0, #64]
+ // add x0, x0, #64
+ // merged into:
+ // ldr x1, [x0, #64]!
+ Update = findMatchingUpdateInsnForward(MBBI, UnscaledOffset, UpdateLimit);
+ if (Update != E) {
+ // Merge the update into the ld/st.
+ MBBI = mergeUpdateInsn(MBBI, Update, /*IsPreIdx=*/true);
+ return true;
+ }
+
+ return false;
+}
+
bool AArch64LoadStoreOpt::optimizeBlock(MachineBasicBlock &MBB,
bool EnableNarrowZeroStOpt) {
bool Modified = false;
@@ -1573,29 +1702,10 @@ bool AArch64LoadStoreOpt::optimizeBlock(MachineBasicBlock &MBB,
// lsr w2, w1, #16
for (MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
MBBI != E;) {
- MachineInstr &MI = *MBBI;
- switch (MI.getOpcode()) {
- default:
- // Just move on to the next instruction.
- ++MBBI;
- break;
- // Scaled instructions.
- case AArch64::LDRBBui:
- case AArch64::LDRHHui:
- case AArch64::LDRWui:
- case AArch64::LDRXui:
- // Unscaled instructions.
- case AArch64::LDURBBi:
- case AArch64::LDURHHi:
- case AArch64::LDURWi:
- case AArch64::LDURXi:
- if (tryToPromoteLoadFromStore(MBBI)) {
- Modified = true;
- break;
- }
+ if (isPromotableLoadFromStore(*MBBI) && tryToPromoteLoadFromStore(MBBI))
+ Modified = true;
+ else
++MBBI;
- break;
- }
}
// 2) Merge adjacent zero stores into a wider store.
// e.g.,
@@ -1608,17 +1718,14 @@ bool AArch64LoadStoreOpt::optimizeBlock(MachineBasicBlock &MBB,
// str wzr, [x0, #4]
// ; becomes
// str xzr, [x0]
- for (MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
- EnableNarrowZeroStOpt && MBBI != E;) {
- if (isPromotableZeroStoreInst(*MBBI)) {
- if (tryToMergeZeroStInst(MBBI)) {
+ if (EnableNarrowZeroStOpt)
+ for (MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
+ MBBI != E;) {
+ if (isPromotableZeroStoreInst(*MBBI) && tryToMergeZeroStInst(MBBI))
Modified = true;
- } else
+ else
++MBBI;
- } else
- ++MBBI;
- }
-
+ }
// 3) Find loads and stores that can be merged into a single load or store
// pair instruction.
// e.g.,
@@ -1642,124 +1749,17 @@ bool AArch64LoadStoreOpt::optimizeBlock(MachineBasicBlock &MBB,
// ldr x0, [x2], #4
for (MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
MBBI != E;) {
- MachineInstr &MI = *MBBI;
- // Do update merging. It's simpler to keep this separate from the above
- // switchs, though not strictly necessary.
- unsigned Opc = MI.getOpcode();
- switch (Opc) {
- default:
- // Just move on to the next instruction.
- ++MBBI;
- break;
- // Scaled instructions.
- case AArch64::STRSui:
- case AArch64::STRDui:
- case AArch64::STRQui:
- case AArch64::STRXui:
- case AArch64::STRWui:
- case AArch64::STRHHui:
- case AArch64::STRBBui:
- case AArch64::LDRSui:
- case AArch64::LDRDui:
- case AArch64::LDRQui:
- case AArch64::LDRXui:
- case AArch64::LDRWui:
- case AArch64::LDRHHui:
- case AArch64::LDRBBui:
- // Unscaled instructions.
- case AArch64::STURSi:
- case AArch64::STURDi:
- case AArch64::STURQi:
- case AArch64::STURWi:
- case AArch64::STURXi:
- case AArch64::LDURSi:
- case AArch64::LDURDi:
- case AArch64::LDURQi:
- case AArch64::LDURWi:
- case AArch64::LDURXi:
- // Paired instructions.
- case AArch64::LDPSi:
- case AArch64::LDPSWi:
- case AArch64::LDPDi:
- case AArch64::LDPQi:
- case AArch64::LDPWi:
- case AArch64::LDPXi:
- case AArch64::STPSi:
- case AArch64::STPDi:
- case AArch64::STPQi:
- case AArch64::STPWi:
- case AArch64::STPXi: {
- // Make sure this is a reg+imm (as opposed to an address reloc).
- if (!getLdStOffsetOp(MI).isImm()) {
- ++MBBI;
- break;
- }
- // Look forward to try to form a post-index instruction. For example,
- // ldr x0, [x20]
- // add x20, x20, #32
- // merged into:
- // ldr x0, [x20], #32
- MachineBasicBlock::iterator Update =
- findMatchingUpdateInsnForward(MBBI, 0, UpdateLimit);
- if (Update != E) {
- // Merge the update into the ld/st.
- MBBI = mergeUpdateInsn(MBBI, Update, /*IsPreIdx=*/false);
- Modified = true;
- ++NumPostFolded;
- break;
- }
-
- // Don't know how to handle unscaled pre/post-index versions below, so
- // move to the next instruction.
- if (TII->isUnscaledLdSt(Opc)) {
- ++MBBI;
- break;
- }
-
- // Look back to try to find a pre-index instruction. For example,
- // add x0, x0, #8
- // ldr x1, [x0]
- // merged into:
- // ldr x1, [x0, #8]!
- Update = findMatchingUpdateInsnBackward(MBBI, UpdateLimit);
- if (Update != E) {
- // Merge the update into the ld/st.
- MBBI = mergeUpdateInsn(MBBI, Update, /*IsPreIdx=*/true);
- Modified = true;
- ++NumPreFolded;
- break;
- }
- // The immediate in the load/store is scaled by the size of the memory
- // operation. The immediate in the add we're looking for,
- // however, is not, so adjust here.
- int UnscaledOffset = getLdStOffsetOp(MI).getImm() * getMemScale(MI);
-
- // Look forward to try to find a post-index instruction. For example,
- // ldr x1, [x0, #64]
- // add x0, x0, #64
- // merged into:
- // ldr x1, [x0, #64]!
- Update = findMatchingUpdateInsnForward(MBBI, UnscaledOffset, UpdateLimit);
- if (Update != E) {
- // Merge the update into the ld/st.
- MBBI = mergeUpdateInsn(MBBI, Update, /*IsPreIdx=*/true);
- Modified = true;
- ++NumPreFolded;
- break;
- }
-
- // Nothing found. Just move to the next instruction.
+ if (isMergeableLdStUpdate(*MBBI) && tryToMergeLdStUpdate(MBBI))
+ Modified = true;
+ else
++MBBI;
- break;
- }
- }
}
return Modified;
}
bool AArch64LoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
- if (skipFunction(*Fn.getFunction()))
+ if (skipFunction(Fn.getFunction()))
return false;
Subtarget = &static_cast<const AArch64Subtarget &>(Fn.getSubtarget());