diff options
Diffstat (limited to 'contrib/llvm/lib/Target/X86/X86FixupBWInsts.cpp')
-rw-r--r-- | contrib/llvm/lib/Target/X86/X86FixupBWInsts.cpp | 371 |
1 files changed, 371 insertions, 0 deletions
diff --git a/contrib/llvm/lib/Target/X86/X86FixupBWInsts.cpp b/contrib/llvm/lib/Target/X86/X86FixupBWInsts.cpp new file mode 100644 index 000000000000..90e758dc2e02 --- /dev/null +++ b/contrib/llvm/lib/Target/X86/X86FixupBWInsts.cpp @@ -0,0 +1,371 @@ +//===-- X86FixupBWInsts.cpp - Fixup Byte or Word instructions -----------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// \file +/// This file defines the pass that looks through the machine instructions +/// late in the compilation, and finds byte or word instructions that +/// can be profitably replaced with 32 bit instructions that give equivalent +/// results for the bits of the results that are used. There are two possible +/// reasons to do this. +/// +/// One reason is to avoid false-dependences on the upper portions +/// of the registers. Only instructions that have a destination register +/// which is not in any of the source registers can be affected by this. +/// Any instruction where one of the source registers is also the destination +/// register is unaffected, because it has a true dependence on the source +/// register already. So, this consideration primarily affects load +/// instructions and register-to-register moves. It would +/// seem like cmov(s) would also be affected, but because of the way cmov is +/// really implemented by most machines as reading both the destination and +/// and source regsters, and then "merging" the two based on a condition, +/// it really already should be considered as having a true dependence on the +/// destination register as well. +/// +/// The other reason to do this is for potential code size savings. Word +/// operations need an extra override byte compared to their 32 bit +/// versions. So this can convert many word operations to their larger +/// size, saving a byte in encoding. This could introduce partial register +/// dependences where none existed however. As an example take: +/// orw ax, $0x1000 +/// addw ax, $3 +/// now if this were to get transformed into +/// orw ax, $1000 +/// addl eax, $3 +/// because the addl encodes shorter than the addw, this would introduce +/// a use of a register that was only partially written earlier. On older +/// Intel processors this can be quite a performance penalty, so this should +/// probably only be done when it can be proven that a new partial dependence +/// wouldn't be created, or when your know a newer processor is being +/// targeted, or when optimizing for minimum code size. +/// +//===----------------------------------------------------------------------===// + +#include "X86.h" +#include "X86InstrInfo.h" +#include "X86Subtarget.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/CodeGen/LivePhysRegs.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineLoopInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetInstrInfo.h" +using namespace llvm; + +#define FIXUPBW_DESC "X86 Byte/Word Instruction Fixup" +#define FIXUPBW_NAME "x86-fixup-bw-insts" + +#define DEBUG_TYPE FIXUPBW_NAME + +// Option to allow this optimization pass to have fine-grained control. +// This is turned off by default so as not to affect a large number of +// existing lit tests. +static cl::opt<bool> + FixupBWInsts("fixup-byte-word-insts", + cl::desc("Change byte and word instructions to larger sizes"), + cl::init(true), cl::Hidden); + +namespace { +class FixupBWInstPass : public MachineFunctionPass { + /// Loop over all of the instructions in the basic block replacing applicable + /// byte or word instructions with better alternatives. + void processBasicBlock(MachineFunction &MF, MachineBasicBlock &MBB); + + /// This sets the \p SuperDestReg to the 32 bit super reg of the original + /// destination register of the MachineInstr passed in. It returns true if + /// that super register is dead just prior to \p OrigMI, and false if not. + bool getSuperRegDestIfDead(MachineInstr *OrigMI, + unsigned &SuperDestReg) const; + + /// Change the MachineInstr \p MI into the equivalent extending load to 32 bit + /// register if it is safe to do so. Return the replacement instruction if + /// OK, otherwise return nullptr. + MachineInstr *tryReplaceLoad(unsigned New32BitOpcode, MachineInstr *MI) const; + + /// Change the MachineInstr \p MI into the equivalent 32-bit copy if it is + /// safe to do so. Return the replacement instruction if OK, otherwise return + /// nullptr. + MachineInstr *tryReplaceCopy(MachineInstr *MI) const; + + // Change the MachineInstr \p MI into an eqivalent 32 bit instruction if + // possible. Return the replacement instruction if OK, return nullptr + // otherwise. Set WasCandidate to true or false depending on whether the + // MI was a candidate for this sort of transformation. + MachineInstr *tryReplaceInstr(MachineInstr *MI, MachineBasicBlock &MBB, + bool &WasCandidate) const; +public: + static char ID; + + const char *getPassName() const override { + return FIXUPBW_DESC; + } + + FixupBWInstPass() : MachineFunctionPass(ID) { + initializeFixupBWInstPassPass(*PassRegistry::getPassRegistry()); + } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired<MachineLoopInfo>(); // Machine loop info is used to + // guide some heuristics. + MachineFunctionPass::getAnalysisUsage(AU); + } + + /// Loop over all of the basic blocks, replacing byte and word instructions by + /// equivalent 32 bit instructions where performance or code size can be + /// improved. + bool runOnMachineFunction(MachineFunction &MF) override; + + MachineFunctionProperties getRequiredProperties() const override { + return MachineFunctionProperties().set( + MachineFunctionProperties::Property::AllVRegsAllocated); + } + +private: + MachineFunction *MF; + + /// Machine instruction info used throughout the class. + const X86InstrInfo *TII; + + /// Local member for function's OptForSize attribute. + bool OptForSize; + + /// Machine loop info used for guiding some heruistics. + MachineLoopInfo *MLI; + + /// Register Liveness information after the current instruction. + LivePhysRegs LiveRegs; +}; +char FixupBWInstPass::ID = 0; +} + +INITIALIZE_PASS(FixupBWInstPass, FIXUPBW_NAME, FIXUPBW_DESC, false, false) + +FunctionPass *llvm::createX86FixupBWInsts() { return new FixupBWInstPass(); } + +bool FixupBWInstPass::runOnMachineFunction(MachineFunction &MF) { + if (!FixupBWInsts || skipFunction(*MF.getFunction())) + return false; + + this->MF = &MF; + TII = MF.getSubtarget<X86Subtarget>().getInstrInfo(); + OptForSize = MF.getFunction()->optForSize(); + MLI = &getAnalysis<MachineLoopInfo>(); + LiveRegs.init(&TII->getRegisterInfo()); + + DEBUG(dbgs() << "Start X86FixupBWInsts\n";); + + // Process all basic blocks. + for (auto &MBB : MF) + processBasicBlock(MF, MBB); + + DEBUG(dbgs() << "End X86FixupBWInsts\n";); + + return true; +} + +// TODO: This method of analysis can miss some legal cases, because the +// super-register could be live into the address expression for a memory +// reference for the instruction, and still be killed/last used by the +// instruction. However, the existing query interfaces don't seem to +// easily allow that to be checked. +// +// What we'd really like to know is whether after OrigMI, the +// only portion of SuperDestReg that is alive is the portion that +// was the destination register of OrigMI. +bool FixupBWInstPass::getSuperRegDestIfDead(MachineInstr *OrigMI, + unsigned &SuperDestReg) const { + auto *TRI = &TII->getRegisterInfo(); + + unsigned OrigDestReg = OrigMI->getOperand(0).getReg(); + SuperDestReg = getX86SubSuperRegister(OrigDestReg, 32); + + const auto SubRegIdx = TRI->getSubRegIndex(SuperDestReg, OrigDestReg); + + // Make sure that the sub-register that this instruction has as its + // destination is the lowest order sub-register of the super-register. + // If it isn't, then the register isn't really dead even if the + // super-register is considered dead. + if (SubRegIdx == X86::sub_8bit_hi) + return false; + + if (LiveRegs.contains(SuperDestReg)) + return false; + + if (SubRegIdx == X86::sub_8bit) { + // In the case of byte registers, we also have to check that the upper + // byte register is also dead. That is considered to be independent of + // whether the super-register is dead. + unsigned UpperByteReg = + getX86SubSuperRegister(SuperDestReg, 8, /*High=*/true); + + if (LiveRegs.contains(UpperByteReg)) + return false; + } + + return true; +} + +MachineInstr *FixupBWInstPass::tryReplaceLoad(unsigned New32BitOpcode, + MachineInstr *MI) const { + unsigned NewDestReg; + + // We are going to try to rewrite this load to a larger zero-extending + // load. This is safe if all portions of the 32 bit super-register + // of the original destination register, except for the original destination + // register are dead. getSuperRegDestIfDead checks that. + if (!getSuperRegDestIfDead(MI, NewDestReg)) + return nullptr; + + // Safe to change the instruction. + MachineInstrBuilder MIB = + BuildMI(*MF, MI->getDebugLoc(), TII->get(New32BitOpcode), NewDestReg); + + unsigned NumArgs = MI->getNumOperands(); + for (unsigned i = 1; i < NumArgs; ++i) + MIB.addOperand(MI->getOperand(i)); + + MIB->setMemRefs(MI->memoperands_begin(), MI->memoperands_end()); + + return MIB; +} + +MachineInstr *FixupBWInstPass::tryReplaceCopy(MachineInstr *MI) const { + assert(MI->getNumExplicitOperands() == 2); + auto &OldDest = MI->getOperand(0); + auto &OldSrc = MI->getOperand(1); + + unsigned NewDestReg; + if (!getSuperRegDestIfDead(MI, NewDestReg)) + return nullptr; + + unsigned NewSrcReg = getX86SubSuperRegister(OldSrc.getReg(), 32); + + // This is only correct if we access the same subregister index: otherwise, + // we could try to replace "movb %ah, %al" with "movl %eax, %eax". + auto *TRI = &TII->getRegisterInfo(); + if (TRI->getSubRegIndex(NewSrcReg, OldSrc.getReg()) != + TRI->getSubRegIndex(NewDestReg, OldDest.getReg())) + return nullptr; + + // Safe to change the instruction. + // Don't set src flags, as we don't know if we're also killing the superreg. + // However, the superregister might not be defined; make it explicit that + // we don't care about the higher bits by reading it as Undef, and adding + // an imp-use on the original subregister. + MachineInstrBuilder MIB = + BuildMI(*MF, MI->getDebugLoc(), TII->get(X86::MOV32rr), NewDestReg) + .addReg(NewSrcReg, RegState::Undef) + .addReg(OldSrc.getReg(), RegState::Implicit); + + // Drop imp-defs/uses that would be redundant with the new def/use. + for (auto &Op : MI->implicit_operands()) + if (Op.getReg() != (Op.isDef() ? NewDestReg : NewSrcReg)) + MIB.addOperand(Op); + + return MIB; +} + +MachineInstr *FixupBWInstPass::tryReplaceInstr( + MachineInstr *MI, MachineBasicBlock &MBB, + bool &WasCandidate) const { + MachineInstr *NewMI = nullptr; + WasCandidate = false; + + // See if this is an instruction of the type we are currently looking for. + switch (MI->getOpcode()) { + + case X86::MOV8rm: + // Only replace 8 bit loads with the zero extending versions if + // in an inner most loop and not optimizing for size. This takes + // an extra byte to encode, and provides limited performance upside. + if (MachineLoop *ML = MLI->getLoopFor(&MBB)) { + if (ML->begin() == ML->end() && !OptForSize) { + NewMI = tryReplaceLoad(X86::MOVZX32rm8, MI); + WasCandidate = true; + } + } + break; + + case X86::MOV16rm: + // Always try to replace 16 bit load with 32 bit zero extending. + // Code size is the same, and there is sometimes a perf advantage + // from eliminating a false dependence on the upper portion of + // the register. + NewMI = tryReplaceLoad(X86::MOVZX32rm16, MI); + WasCandidate = true; + break; + + case X86::MOV8rr: + case X86::MOV16rr: + // Always try to replace 8/16 bit copies with a 32 bit copy. + // Code size is either less (16) or equal (8), and there is sometimes a + // perf advantage from eliminating a false dependence on the upper portion + // of the register. + NewMI = tryReplaceCopy(MI); + WasCandidate = true; + break; + + default: + // nothing to do here. + break; + } + + return NewMI; +} + +void FixupBWInstPass::processBasicBlock(MachineFunction &MF, + MachineBasicBlock &MBB) { + + // This algorithm doesn't delete the instructions it is replacing + // right away. By leaving the existing instructions in place, the + // register liveness information doesn't change, and this makes the + // analysis that goes on be better than if the replaced instructions + // were immediately removed. + // + // This algorithm always creates a replacement instruction + // and notes that and the original in a data structure, until the + // whole BB has been analyzed. This keeps the replacement instructions + // from making it seem as if the larger register might be live. + SmallVector<std::pair<MachineInstr *, MachineInstr *>, 8> MIReplacements; + + // Start computing liveness for this block. We iterate from the end to be able + // to update this for each instruction. + LiveRegs.clear(); + // We run after PEI, so we need to AddPristinesAndCSRs. + LiveRegs.addLiveOuts(MBB); + + bool WasCandidate = false; + + for (auto I = MBB.rbegin(); I != MBB.rend(); ++I) { + MachineInstr *MI = &*I; + + MachineInstr *NewMI = tryReplaceInstr(MI, MBB, WasCandidate); + + // Add this to replacements if it was a candidate, even if NewMI is + // nullptr. We will revisit that in a bit. + if (WasCandidate) { + MIReplacements.push_back(std::make_pair(MI, NewMI)); + } + + // We're done with this instruction, update liveness for the next one. + LiveRegs.stepBackward(*MI); + } + + while (!MIReplacements.empty()) { + MachineInstr *MI = MIReplacements.back().first; + MachineInstr *NewMI = MIReplacements.back().second; + MIReplacements.pop_back(); + if (NewMI) { + MBB.insert(MI, NewMI); + MBB.erase(MI); + } + } +} |