src - FreeBSD source tree

diff options


context:
space:
mode:

author	Dimitry Andric <dim@FreeBSD.org>	2017-12-20 14:16:56 +0000
committer	Dimitry Andric <dim@FreeBSD.org>	2017-12-20 14:16:56 +0000
commit	2cab237b5dbfe1b3e9c7aa7a3c02d2b98fcf7462 (patch)
tree	524fe828571f81358bba62fdb6d04c6e5e96a2a4 /contrib/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp
parent	6c7828a2807ea5e50c79ca42dbedf2b589ce63b2 (diff)
parent	044eb2f6afba375a914ac9d8024f8f5142bb912e (diff)

Merge llvm trunk r321017 to contrib/llvm.

Notes

Notes: svn path=/projects/clang600-import/; revision=327023

Diffstat (limited to 'contrib/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp')

-rw-r--r--

contrib/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp

304

1 files changed, 152 insertions, 152 deletions

diff --git a/contrib/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp b/contrib/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp
index 9a7f45bde6c9..8a29456430b9 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp
+++ b/contrib/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp

@@ -1,4 +1,4 @@

-//=- AArch64LoadStoreOptimizer.cpp - AArch64 load/store opt. pass -*- C++ -*-=//

+//===- AArch64LoadStoreOptimizer.cpp - AArch64 load/store opt. pass -------===//

// The LLVM Compiler Infrastructure

@@ -20,12 +20,14 @@

#include "llvm/ADT/Statistic.h"

#include "llvm/ADT/StringRef.h"

#include "llvm/ADT/iterator_range.h"

+#include "llvm/Analysis/AliasAnalysis.h"

#include "llvm/CodeGen/MachineBasicBlock.h"

#include "llvm/CodeGen/MachineFunction.h"

#include "llvm/CodeGen/MachineFunctionPass.h"

#include "llvm/CodeGen/MachineInstr.h"

#include "llvm/CodeGen/MachineInstrBuilder.h"

#include "llvm/CodeGen/MachineOperand.h"

+#include "llvm/CodeGen/TargetRegisterInfo.h"

#include "llvm/IR/DebugLoc.h"

#include "llvm/MC/MCRegisterInfo.h"

#include "llvm/Pass.h"

@@ -33,7 +35,6 @@

#include "llvm/Support/Debug.h"

#include "llvm/Support/ErrorHandling.h"

#include "llvm/Support/raw_ostream.h"

-#include "llvm/Target/TargetRegisterInfo.h"

#include <cassert>

#include <cstdint>

#include <iterator>

@@ -64,7 +65,7 @@ static cl::opt<unsigned> UpdateLimit("aarch64-update-scan-limit", cl::init(100),

namespace {

-typedef struct LdStPairFlags {

+using LdStPairFlags = struct LdStPairFlags {

// If a matching instruction is found, MergeForward is set to true if the

// merge is to remove the first instruction and replace the second with

// a pair-wise insn, and false if the reverse is true.

@@ -83,8 +84,7 @@ typedef struct LdStPairFlags {

void setSExtIdx(int V) { SExtIdx = V; }

int getSExtIdx() const { return SExtIdx; }

-} LdStPairFlags;

+};

struct AArch64LoadStoreOpt : public MachineFunctionPass {

static char ID;

@@ -101,7 +101,7 @@ struct AArch64LoadStoreOpt : public MachineFunctionPass {

// Track which registers have been modified and used.

BitVector ModifiedRegs, UsedRegs;

- virtual void getAnalysisUsage(AnalysisUsage &AU) const override {

+ void getAnalysisUsage(AnalysisUsage &AU) const override {

AU.addRequired<AAResultsWrapperPass>();

MachineFunctionPass::getAnalysisUsage(AU);

}

@@ -168,6 +168,9 @@ struct AArch64LoadStoreOpt : public MachineFunctionPass {

// Find and promote load instructions which read directly from store.

bool tryToPromoteLoadFromStore(MachineBasicBlock::iterator &MBBI);

+ // Find and merge a base register updates before or after a ld/st instruction.

+ bool tryToMergeLdStUpdate(MachineBasicBlock::iterator &MBBI);

bool optimizeBlock(MachineBasicBlock &MBB, bool EnableNarrowZeroStOpt);

bool runOnMachineFunction(MachineFunction &Fn) override;

@@ -578,6 +581,75 @@ static bool isPromotableZeroStoreInst(MachineInstr &MI) {

getLdStRegOp(MI).getReg() == AArch64::WZR;

}

+static bool isPromotableLoadFromStore(MachineInstr &MI) {

+ switch (MI.getOpcode()) {

+ default:

+ return false;

+ // Scaled instructions.

+ case AArch64::LDRBBui:

+ case AArch64::LDRHHui:

+ case AArch64::LDRWui:

+ case AArch64::LDRXui:

+ // Unscaled instructions.

+ case AArch64::LDURBBi:

+ case AArch64::LDURHHi:

+ case AArch64::LDURWi:

+ case AArch64::LDURXi:

+ return true;

+ }

+static bool isMergeableLdStUpdate(MachineInstr &MI) {

+ unsigned Opc = MI.getOpcode();

+ switch (Opc) {

+ default:

+ return false;

+ // Scaled instructions.

+ case AArch64::STRSui:

+ case AArch64::STRDui:

+ case AArch64::STRQui:

+ case AArch64::STRXui:

+ case AArch64::STRWui:

+ case AArch64::STRHHui:

+ case AArch64::STRBBui:

+ case AArch64::LDRSui:

+ case AArch64::LDRDui:

+ case AArch64::LDRQui:

+ case AArch64::LDRXui:

+ case AArch64::LDRWui:

+ case AArch64::LDRHHui:

+ case AArch64::LDRBBui:

+ // Unscaled instructions.

+ case AArch64::STURSi:

+ case AArch64::STURDi:

+ case AArch64::STURQi:

+ case AArch64::STURWi:

+ case AArch64::STURXi:

+ case AArch64::LDURSi:

+ case AArch64::LDURDi:

+ case AArch64::LDURQi:

+ case AArch64::LDURWi:

+ case AArch64::LDURXi:

+ // Paired instructions.

+ case AArch64::LDPSi:

+ case AArch64::LDPSWi:

+ case AArch64::LDPDi:

+ case AArch64::LDPQi:

+ case AArch64::LDPWi:

+ case AArch64::LDPXi:

+ case AArch64::STPSi:

+ case AArch64::STPDi:

+ case AArch64::STPQi:

+ case AArch64::STPWi:

+ case AArch64::STPXi:

+ // Make sure this is a reg+imm (as opposed to an address reloc).

+ if (!getLdStOffsetOp(MI).isImm())

+ return false;

+ return true;

+ }

MachineBasicBlock::iterator

AArch64LoadStoreOpt::mergeNarrowZeroStores(MachineBasicBlock::iterator I,

MachineBasicBlock::iterator MergeMI,

@@ -758,8 +830,8 @@ AArch64LoadStoreOpt::mergePairedInsns(MachineBasicBlock::iterator I,

if (SExtIdx != -1) {

// Generate the sign extension for the proper result of the ldp.

// I.e., with X1, that would be:

- // %W1<def> = KILL %W1, %X1<imp-def>

- // %X1<def> = SBFMXri %X1<kill>, 0, 31

+ // %w1 = KILL %w1, implicit-def %x1

+ // %x1 = SBFMXri killed %x1, 0, 31

MachineOperand &DstMO = MIB->getOperand(SExtIdx);

// Right now, DstMO has the extended register, since it comes from an

// extended opcode.

@@ -1294,10 +1366,13 @@ AArch64LoadStoreOpt::mergeUpdateInsn(MachineBasicBlock::iterator I,

}

(void)MIB;

- if (IsPreIdx)

+ if (IsPreIdx) {

+ ++NumPreFolded;

DEBUG(dbgs() << "Creating pre-indexed load/store.");

- else

+ } else {

+ ++NumPostFolded;

DEBUG(dbgs() << "Creating post-indexed load/store.");

+ }

DEBUG(dbgs() << " Replacing instructions:\n ");

DEBUG(I->print(dbgs()));

DEBUG(dbgs() << " ");

@@ -1558,6 +1633,60 @@ bool AArch64LoadStoreOpt::tryToPairLdStInst(MachineBasicBlock::iterator &MBBI) {

return false;

}

+bool AArch64LoadStoreOpt::tryToMergeLdStUpdate

+ (MachineBasicBlock::iterator &MBBI) {

+ MachineInstr &MI = *MBBI;

+ MachineBasicBlock::iterator E = MI.getParent()->end();

+ MachineBasicBlock::iterator Update;

+ // Look forward to try to form a post-index instruction. For example,

+ // ldr x0, [x20]

+ // add x20, x20, #32

+ // merged into:

+ // ldr x0, [x20], #32

+ Update = findMatchingUpdateInsnForward(MBBI, 0, UpdateLimit);

+ if (Update != E) {

+ // Merge the update into the ld/st.

+ MBBI = mergeUpdateInsn(MBBI, Update, /*IsPreIdx=*/false);

+ return true;

+ }

+ // Don't know how to handle unscaled pre/post-index versions below, so bail.

+ if (TII->isUnscaledLdSt(MI.getOpcode()))

+ return false;

+ // Look back to try to find a pre-index instruction. For example,

+ // add x0, x0, #8

+ // ldr x1, [x0]

+ // merged into:

+ // ldr x1, [x0, #8]!

+ Update = findMatchingUpdateInsnBackward(MBBI, UpdateLimit);

+ if (Update != E) {

+ // Merge the update into the ld/st.

+ MBBI = mergeUpdateInsn(MBBI, Update, /*IsPreIdx=*/true);

+ return true;

+ }

+ // The immediate in the load/store is scaled by the size of the memory

+ // operation. The immediate in the add we're looking for,

+ // however, is not, so adjust here.

+ int UnscaledOffset = getLdStOffsetOp(MI).getImm() * getMemScale(MI);

+ // Look forward to try to find a post-index instruction. For example,

+ // ldr x1, [x0, #64]

+ // add x0, x0, #64

+ // merged into:

+ // ldr x1, [x0, #64]!

+ Update = findMatchingUpdateInsnForward(MBBI, UnscaledOffset, UpdateLimit);

+ if (Update != E) {

+ // Merge the update into the ld/st.

+ MBBI = mergeUpdateInsn(MBBI, Update, /*IsPreIdx=*/true);

+ return true;

+ }

+ return false;

bool AArch64LoadStoreOpt::optimizeBlock(MachineBasicBlock &MBB,

bool EnableNarrowZeroStOpt) {

bool Modified = false;

@@ -1573,29 +1702,10 @@ bool AArch64LoadStoreOpt::optimizeBlock(MachineBasicBlock &MBB,

// lsr w2, w1, #16

for (MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();

MBBI != E;) {

- MachineInstr &MI = *MBBI;

- switch (MI.getOpcode()) {

- default:

- // Just move on to the next instruction.

- ++MBBI;

- break;

- // Scaled instructions.

- case AArch64::LDRBBui:

- case AArch64::LDRHHui:

- case AArch64::LDRWui:

- case AArch64::LDRXui:

- // Unscaled instructions.

- case AArch64::LDURBBi:

- case AArch64::LDURHHi:

- case AArch64::LDURWi:

- case AArch64::LDURXi:

- if (tryToPromoteLoadFromStore(MBBI)) {

- Modified = true;

- break;

- }

+ if (isPromotableLoadFromStore(*MBBI) && tryToPromoteLoadFromStore(MBBI))

+ Modified = true;

+ else

++MBBI;

- break;

- }

}

// 2) Merge adjacent zero stores into a wider store.

// e.g.,

@@ -1608,17 +1718,14 @@ bool AArch64LoadStoreOpt::optimizeBlock(MachineBasicBlock &MBB,

// str wzr, [x0, #4]

// ; becomes

// str xzr, [x0]

- for (MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();

- EnableNarrowZeroStOpt && MBBI != E;) {

- if (isPromotableZeroStoreInst(*MBBI)) {

- if (tryToMergeZeroStInst(MBBI)) {

+ if (EnableNarrowZeroStOpt)

+ for (MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();

+ MBBI != E;) {

+ if (isPromotableZeroStoreInst(*MBBI) && tryToMergeZeroStInst(MBBI))

Modified = true;

- } else

+ else

++MBBI;

- } else

- ++MBBI;

- }

+ }

// 3) Find loads and stores that can be merged into a single load or store

// pair instruction.

// e.g.,

@@ -1642,124 +1749,17 @@ bool AArch64LoadStoreOpt::optimizeBlock(MachineBasicBlock &MBB,

// ldr x0, [x2], #4

for (MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();

MBBI != E;) {

- MachineInstr &MI = *MBBI;

- // Do update merging. It's simpler to keep this separate from the above

- // switchs, though not strictly necessary.

- unsigned Opc = MI.getOpcode();

- switch (Opc) {

- default:

- // Just move on to the next instruction.

- ++MBBI;

- break;

- // Scaled instructions.

- case AArch64::STRSui:

- case AArch64::STRDui:

- case AArch64::STRQui:

- case AArch64::STRXui:

- case AArch64::STRWui:

- case AArch64::STRHHui:

- case AArch64::STRBBui:

- case AArch64::LDRSui:

- case AArch64::LDRDui:

- case AArch64::LDRQui:

- case AArch64::LDRXui:

- case AArch64::LDRWui:

- case AArch64::LDRHHui:

- case AArch64::LDRBBui:

- // Unscaled instructions.

- case AArch64::STURSi:

- case AArch64::STURDi:

- case AArch64::STURQi:

- case AArch64::STURWi:

- case AArch64::STURXi:

- case AArch64::LDURSi:

- case AArch64::LDURDi:

- case AArch64::LDURQi:

- case AArch64::LDURWi:

- case AArch64::LDURXi:

- // Paired instructions.

- case AArch64::LDPSi:

- case AArch64::LDPSWi:

- case AArch64::LDPDi:

- case AArch64::LDPQi:

- case AArch64::LDPWi:

- case AArch64::LDPXi:

- case AArch64::STPSi:

- case AArch64::STPDi:

- case AArch64::STPQi:

- case AArch64::STPWi:

- case AArch64::STPXi: {

- // Make sure this is a reg+imm (as opposed to an address reloc).

- if (!getLdStOffsetOp(MI).isImm()) {

- ++MBBI;

- break;

- }

- // Look forward to try to form a post-index instruction. For example,

- // ldr x0, [x20]

- // add x20, x20, #32

- // merged into:

- // ldr x0, [x20], #32

- MachineBasicBlock::iterator Update =

- findMatchingUpdateInsnForward(MBBI, 0, UpdateLimit);

- if (Update != E) {

- // Merge the update into the ld/st.

- MBBI = mergeUpdateInsn(MBBI, Update, /*IsPreIdx=*/false);

- Modified = true;

- ++NumPostFolded;

- break;

- }

- // Don't know how to handle unscaled pre/post-index versions below, so

- // move to the next instruction.

- if (TII->isUnscaledLdSt(Opc)) {

- ++MBBI;

- break;

- }

- // Look back to try to find a pre-index instruction. For example,

- // add x0, x0, #8

- // ldr x1, [x0]

- // merged into:

- // ldr x1, [x0, #8]!

- Update = findMatchingUpdateInsnBackward(MBBI, UpdateLimit);

- if (Update != E) {

- // Merge the update into the ld/st.

- MBBI = mergeUpdateInsn(MBBI, Update, /*IsPreIdx=*/true);

- Modified = true;

- ++NumPreFolded;

- break;

- }

- // The immediate in the load/store is scaled by the size of the memory

- // operation. The immediate in the add we're looking for,

- // however, is not, so adjust here.

- int UnscaledOffset = getLdStOffsetOp(MI).getImm() * getMemScale(MI);

- // Look forward to try to find a post-index instruction. For example,

- // ldr x1, [x0, #64]

- // add x0, x0, #64

- // merged into:

- // ldr x1, [x0, #64]!

- Update = findMatchingUpdateInsnForward(MBBI, UnscaledOffset, UpdateLimit);

- if (Update != E) {

- // Merge the update into the ld/st.

- MBBI = mergeUpdateInsn(MBBI, Update, /*IsPreIdx=*/true);

- Modified = true;

- ++NumPreFolded;

- break;

- }

- // Nothing found. Just move to the next instruction.

+ if (isMergeableLdStUpdate(*MBBI) && tryToMergeLdStUpdate(MBBI))

+ Modified = true;

+ else

++MBBI;

- break;

- }

}

return Modified;

}

bool AArch64LoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {

- if (skipFunction(*Fn.getFunction()))

+ if (skipFunction(Fn.getFunction()))

return false;

Subtarget = &static_cast<const AArch64Subtarget &>(Fn.getSubtarget());