diff options
author | Dimitry Andric <dim@FreeBSD.org> | 2017-12-20 14:16:56 +0000 |
---|---|---|
committer | Dimitry Andric <dim@FreeBSD.org> | 2017-12-20 14:16:56 +0000 |
commit | 2cab237b5dbfe1b3e9c7aa7a3c02d2b98fcf7462 (patch) | |
tree | 524fe828571f81358bba62fdb6d04c6e5e96a2a4 /contrib/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp | |
parent | 6c7828a2807ea5e50c79ca42dbedf2b589ce63b2 (diff) | |
parent | 044eb2f6afba375a914ac9d8024f8f5142bb912e (diff) |
Merge llvm trunk r321017 to contrib/llvm.
Notes
Notes:
svn path=/projects/clang600-import/; revision=327023
Diffstat (limited to 'contrib/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp')
-rw-r--r-- | contrib/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp | 304 |
1 files changed, 152 insertions, 152 deletions
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp b/contrib/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp index 9a7f45bde6c9..8a29456430b9 100644 --- a/contrib/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp +++ b/contrib/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp @@ -1,4 +1,4 @@ -//=- AArch64LoadStoreOptimizer.cpp - AArch64 load/store opt. pass -*- C++ -*-=// +//===- AArch64LoadStoreOptimizer.cpp - AArch64 load/store opt. pass -------===// // // The LLVM Compiler Infrastructure // @@ -20,12 +20,14 @@ #include "llvm/ADT/Statistic.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/iterator_range.h" +#include "llvm/Analysis/AliasAnalysis.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineOperand.h" +#include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/IR/DebugLoc.h" #include "llvm/MC/MCRegisterInfo.h" #include "llvm/Pass.h" @@ -33,7 +35,6 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetRegisterInfo.h" #include <cassert> #include <cstdint> #include <iterator> @@ -64,7 +65,7 @@ static cl::opt<unsigned> UpdateLimit("aarch64-update-scan-limit", cl::init(100), namespace { -typedef struct LdStPairFlags { +using LdStPairFlags = struct LdStPairFlags { // If a matching instruction is found, MergeForward is set to true if the // merge is to remove the first instruction and replace the second with // a pair-wise insn, and false if the reverse is true. @@ -83,8 +84,7 @@ typedef struct LdStPairFlags { void setSExtIdx(int V) { SExtIdx = V; } int getSExtIdx() const { return SExtIdx; } - -} LdStPairFlags; +}; struct AArch64LoadStoreOpt : public MachineFunctionPass { static char ID; @@ -101,7 +101,7 @@ struct AArch64LoadStoreOpt : public MachineFunctionPass { // Track which registers have been modified and used. BitVector ModifiedRegs, UsedRegs; - virtual void getAnalysisUsage(AnalysisUsage &AU) const override { + void getAnalysisUsage(AnalysisUsage &AU) const override { AU.addRequired<AAResultsWrapperPass>(); MachineFunctionPass::getAnalysisUsage(AU); } @@ -168,6 +168,9 @@ struct AArch64LoadStoreOpt : public MachineFunctionPass { // Find and promote load instructions which read directly from store. bool tryToPromoteLoadFromStore(MachineBasicBlock::iterator &MBBI); + // Find and merge a base register updates before or after a ld/st instruction. + bool tryToMergeLdStUpdate(MachineBasicBlock::iterator &MBBI); + bool optimizeBlock(MachineBasicBlock &MBB, bool EnableNarrowZeroStOpt); bool runOnMachineFunction(MachineFunction &Fn) override; @@ -578,6 +581,75 @@ static bool isPromotableZeroStoreInst(MachineInstr &MI) { getLdStRegOp(MI).getReg() == AArch64::WZR; } +static bool isPromotableLoadFromStore(MachineInstr &MI) { + switch (MI.getOpcode()) { + default: + return false; + // Scaled instructions. + case AArch64::LDRBBui: + case AArch64::LDRHHui: + case AArch64::LDRWui: + case AArch64::LDRXui: + // Unscaled instructions. + case AArch64::LDURBBi: + case AArch64::LDURHHi: + case AArch64::LDURWi: + case AArch64::LDURXi: + return true; + } +} + +static bool isMergeableLdStUpdate(MachineInstr &MI) { + unsigned Opc = MI.getOpcode(); + switch (Opc) { + default: + return false; + // Scaled instructions. + case AArch64::STRSui: + case AArch64::STRDui: + case AArch64::STRQui: + case AArch64::STRXui: + case AArch64::STRWui: + case AArch64::STRHHui: + case AArch64::STRBBui: + case AArch64::LDRSui: + case AArch64::LDRDui: + case AArch64::LDRQui: + case AArch64::LDRXui: + case AArch64::LDRWui: + case AArch64::LDRHHui: + case AArch64::LDRBBui: + // Unscaled instructions. + case AArch64::STURSi: + case AArch64::STURDi: + case AArch64::STURQi: + case AArch64::STURWi: + case AArch64::STURXi: + case AArch64::LDURSi: + case AArch64::LDURDi: + case AArch64::LDURQi: + case AArch64::LDURWi: + case AArch64::LDURXi: + // Paired instructions. + case AArch64::LDPSi: + case AArch64::LDPSWi: + case AArch64::LDPDi: + case AArch64::LDPQi: + case AArch64::LDPWi: + case AArch64::LDPXi: + case AArch64::STPSi: + case AArch64::STPDi: + case AArch64::STPQi: + case AArch64::STPWi: + case AArch64::STPXi: + // Make sure this is a reg+imm (as opposed to an address reloc). + if (!getLdStOffsetOp(MI).isImm()) + return false; + + return true; + } +} + MachineBasicBlock::iterator AArch64LoadStoreOpt::mergeNarrowZeroStores(MachineBasicBlock::iterator I, MachineBasicBlock::iterator MergeMI, @@ -758,8 +830,8 @@ AArch64LoadStoreOpt::mergePairedInsns(MachineBasicBlock::iterator I, if (SExtIdx != -1) { // Generate the sign extension for the proper result of the ldp. // I.e., with X1, that would be: - // %W1<def> = KILL %W1, %X1<imp-def> - // %X1<def> = SBFMXri %X1<kill>, 0, 31 + // %w1 = KILL %w1, implicit-def %x1 + // %x1 = SBFMXri killed %x1, 0, 31 MachineOperand &DstMO = MIB->getOperand(SExtIdx); // Right now, DstMO has the extended register, since it comes from an // extended opcode. @@ -1294,10 +1366,13 @@ AArch64LoadStoreOpt::mergeUpdateInsn(MachineBasicBlock::iterator I, } (void)MIB; - if (IsPreIdx) + if (IsPreIdx) { + ++NumPreFolded; DEBUG(dbgs() << "Creating pre-indexed load/store."); - else + } else { + ++NumPostFolded; DEBUG(dbgs() << "Creating post-indexed load/store."); + } DEBUG(dbgs() << " Replacing instructions:\n "); DEBUG(I->print(dbgs())); DEBUG(dbgs() << " "); @@ -1558,6 +1633,60 @@ bool AArch64LoadStoreOpt::tryToPairLdStInst(MachineBasicBlock::iterator &MBBI) { return false; } +bool AArch64LoadStoreOpt::tryToMergeLdStUpdate + (MachineBasicBlock::iterator &MBBI) { + MachineInstr &MI = *MBBI; + MachineBasicBlock::iterator E = MI.getParent()->end(); + MachineBasicBlock::iterator Update; + + // Look forward to try to form a post-index instruction. For example, + // ldr x0, [x20] + // add x20, x20, #32 + // merged into: + // ldr x0, [x20], #32 + Update = findMatchingUpdateInsnForward(MBBI, 0, UpdateLimit); + if (Update != E) { + // Merge the update into the ld/st. + MBBI = mergeUpdateInsn(MBBI, Update, /*IsPreIdx=*/false); + return true; + } + + // Don't know how to handle unscaled pre/post-index versions below, so bail. + if (TII->isUnscaledLdSt(MI.getOpcode())) + return false; + + // Look back to try to find a pre-index instruction. For example, + // add x0, x0, #8 + // ldr x1, [x0] + // merged into: + // ldr x1, [x0, #8]! + Update = findMatchingUpdateInsnBackward(MBBI, UpdateLimit); + if (Update != E) { + // Merge the update into the ld/st. + MBBI = mergeUpdateInsn(MBBI, Update, /*IsPreIdx=*/true); + return true; + } + + // The immediate in the load/store is scaled by the size of the memory + // operation. The immediate in the add we're looking for, + // however, is not, so adjust here. + int UnscaledOffset = getLdStOffsetOp(MI).getImm() * getMemScale(MI); + + // Look forward to try to find a post-index instruction. For example, + // ldr x1, [x0, #64] + // add x0, x0, #64 + // merged into: + // ldr x1, [x0, #64]! + Update = findMatchingUpdateInsnForward(MBBI, UnscaledOffset, UpdateLimit); + if (Update != E) { + // Merge the update into the ld/st. + MBBI = mergeUpdateInsn(MBBI, Update, /*IsPreIdx=*/true); + return true; + } + + return false; +} + bool AArch64LoadStoreOpt::optimizeBlock(MachineBasicBlock &MBB, bool EnableNarrowZeroStOpt) { bool Modified = false; @@ -1573,29 +1702,10 @@ bool AArch64LoadStoreOpt::optimizeBlock(MachineBasicBlock &MBB, // lsr w2, w1, #16 for (MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end(); MBBI != E;) { - MachineInstr &MI = *MBBI; - switch (MI.getOpcode()) { - default: - // Just move on to the next instruction. - ++MBBI; - break; - // Scaled instructions. - case AArch64::LDRBBui: - case AArch64::LDRHHui: - case AArch64::LDRWui: - case AArch64::LDRXui: - // Unscaled instructions. - case AArch64::LDURBBi: - case AArch64::LDURHHi: - case AArch64::LDURWi: - case AArch64::LDURXi: - if (tryToPromoteLoadFromStore(MBBI)) { - Modified = true; - break; - } + if (isPromotableLoadFromStore(*MBBI) && tryToPromoteLoadFromStore(MBBI)) + Modified = true; + else ++MBBI; - break; - } } // 2) Merge adjacent zero stores into a wider store. // e.g., @@ -1608,17 +1718,14 @@ bool AArch64LoadStoreOpt::optimizeBlock(MachineBasicBlock &MBB, // str wzr, [x0, #4] // ; becomes // str xzr, [x0] - for (MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end(); - EnableNarrowZeroStOpt && MBBI != E;) { - if (isPromotableZeroStoreInst(*MBBI)) { - if (tryToMergeZeroStInst(MBBI)) { + if (EnableNarrowZeroStOpt) + for (MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end(); + MBBI != E;) { + if (isPromotableZeroStoreInst(*MBBI) && tryToMergeZeroStInst(MBBI)) Modified = true; - } else + else ++MBBI; - } else - ++MBBI; - } - + } // 3) Find loads and stores that can be merged into a single load or store // pair instruction. // e.g., @@ -1642,124 +1749,17 @@ bool AArch64LoadStoreOpt::optimizeBlock(MachineBasicBlock &MBB, // ldr x0, [x2], #4 for (MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end(); MBBI != E;) { - MachineInstr &MI = *MBBI; - // Do update merging. It's simpler to keep this separate from the above - // switchs, though not strictly necessary. - unsigned Opc = MI.getOpcode(); - switch (Opc) { - default: - // Just move on to the next instruction. - ++MBBI; - break; - // Scaled instructions. - case AArch64::STRSui: - case AArch64::STRDui: - case AArch64::STRQui: - case AArch64::STRXui: - case AArch64::STRWui: - case AArch64::STRHHui: - case AArch64::STRBBui: - case AArch64::LDRSui: - case AArch64::LDRDui: - case AArch64::LDRQui: - case AArch64::LDRXui: - case AArch64::LDRWui: - case AArch64::LDRHHui: - case AArch64::LDRBBui: - // Unscaled instructions. - case AArch64::STURSi: - case AArch64::STURDi: - case AArch64::STURQi: - case AArch64::STURWi: - case AArch64::STURXi: - case AArch64::LDURSi: - case AArch64::LDURDi: - case AArch64::LDURQi: - case AArch64::LDURWi: - case AArch64::LDURXi: - // Paired instructions. - case AArch64::LDPSi: - case AArch64::LDPSWi: - case AArch64::LDPDi: - case AArch64::LDPQi: - case AArch64::LDPWi: - case AArch64::LDPXi: - case AArch64::STPSi: - case AArch64::STPDi: - case AArch64::STPQi: - case AArch64::STPWi: - case AArch64::STPXi: { - // Make sure this is a reg+imm (as opposed to an address reloc). - if (!getLdStOffsetOp(MI).isImm()) { - ++MBBI; - break; - } - // Look forward to try to form a post-index instruction. For example, - // ldr x0, [x20] - // add x20, x20, #32 - // merged into: - // ldr x0, [x20], #32 - MachineBasicBlock::iterator Update = - findMatchingUpdateInsnForward(MBBI, 0, UpdateLimit); - if (Update != E) { - // Merge the update into the ld/st. - MBBI = mergeUpdateInsn(MBBI, Update, /*IsPreIdx=*/false); - Modified = true; - ++NumPostFolded; - break; - } - - // Don't know how to handle unscaled pre/post-index versions below, so - // move to the next instruction. - if (TII->isUnscaledLdSt(Opc)) { - ++MBBI; - break; - } - - // Look back to try to find a pre-index instruction. For example, - // add x0, x0, #8 - // ldr x1, [x0] - // merged into: - // ldr x1, [x0, #8]! - Update = findMatchingUpdateInsnBackward(MBBI, UpdateLimit); - if (Update != E) { - // Merge the update into the ld/st. - MBBI = mergeUpdateInsn(MBBI, Update, /*IsPreIdx=*/true); - Modified = true; - ++NumPreFolded; - break; - } - // The immediate in the load/store is scaled by the size of the memory - // operation. The immediate in the add we're looking for, - // however, is not, so adjust here. - int UnscaledOffset = getLdStOffsetOp(MI).getImm() * getMemScale(MI); - - // Look forward to try to find a post-index instruction. For example, - // ldr x1, [x0, #64] - // add x0, x0, #64 - // merged into: - // ldr x1, [x0, #64]! - Update = findMatchingUpdateInsnForward(MBBI, UnscaledOffset, UpdateLimit); - if (Update != E) { - // Merge the update into the ld/st. - MBBI = mergeUpdateInsn(MBBI, Update, /*IsPreIdx=*/true); - Modified = true; - ++NumPreFolded; - break; - } - - // Nothing found. Just move to the next instruction. + if (isMergeableLdStUpdate(*MBBI) && tryToMergeLdStUpdate(MBBI)) + Modified = true; + else ++MBBI; - break; - } - } } return Modified; } bool AArch64LoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) { - if (skipFunction(*Fn.getFunction())) + if (skipFunction(Fn.getFunction())) return false; Subtarget = &static_cast<const AArch64Subtarget &>(Fn.getSubtarget()); |