diff options
Diffstat (limited to 'llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp')
-rw-r--r-- | llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp | 971 |
1 files changed, 685 insertions, 286 deletions
diff --git a/llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp b/llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp index dffda5217675..bf3b6bcb5463 100644 --- a/llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp +++ b/llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp @@ -12,7 +12,9 @@ #include "llvm/BinaryFormat/ELF.h" #include "llvm/BinaryFormat/MachO.h" #include "llvm/MC/MCAsmBackend.h" +#include "llvm/MC/MCAsmLayout.h" #include "llvm/MC/MCAssembler.h" +#include "llvm/MC/MCCodeEmitter.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCDwarf.h" #include "llvm/MC/MCELFObjectWriter.h" @@ -60,10 +62,9 @@ public: else if (BranchType == "indirect") addKind(X86::AlignBranchIndirect); else { - report_fatal_error( - "'-x86-align-branch 'The branches's type is combination of jcc, " - "fused, jmp, call, ret, indirect.(plus separated)", - false); + errs() << "invalid argument " << BranchType.str() + << " to -x86-align-branch=; each element must be one of: fused, " + "jcc, jmp, call, ret, indirect.(plus separated)\n"; } } } @@ -85,13 +86,14 @@ cl::opt<unsigned> X86AlignBranchBoundary( cl::opt<X86AlignBranchKind, true, cl::parser<std::string>> X86AlignBranch( "x86-align-branch", - cl::desc("Specify types of branches to align (plus separated list of " - "types). The branches's types are combination of jcc, fused, " - "jmp, call, ret, indirect."), - cl::value_desc("jcc indicates conditional jumps, fused indicates fused " - "conditional jumps, jmp indicates unconditional jumps, call " - "indicates direct and indirect calls, ret indicates rets, " - "indirect indicates indirect jumps."), + cl::desc( + "Specify types of branches to align (plus separated list of types):" + "\njcc indicates conditional jumps" + "\nfused indicates fused conditional jumps" + "\njmp indicates direct unconditional jumps" + "\ncall indicates direct and indirect calls" + "\nret indicates rets" + "\nindirect indicates indirect unconditional jumps"), cl::location(X86AlignBranchKindLoc)); cl::opt<bool> X86AlignBranchWithin32BBoundaries( @@ -102,6 +104,18 @@ cl::opt<bool> X86AlignBranchWithin32BBoundaries( "assumptions about labels corresponding to particular instructions, " "and should be used with caution.")); +cl::opt<unsigned> X86PadMaxPrefixSize( + "x86-pad-max-prefix-size", cl::init(0), + cl::desc("Maximum number of prefixes to use for padding")); + +cl::opt<bool> X86PadForAlign( + "x86-pad-for-align", cl::init(true), cl::Hidden, + cl::desc("Pad previous instructions to implement align directives")); + +cl::opt<bool> X86PadForBranchAlign( + "x86-pad-for-branch-align", cl::init(true), cl::Hidden, + cl::desc("Pad previous instructions to implement branch alignment")); + class X86ELFObjectWriter : public MCELFObjectTargetWriter { public: X86ELFObjectWriter(bool is64Bit, uint8_t OSABI, uint16_t EMachine, @@ -114,14 +128,18 @@ class X86AsmBackend : public MCAsmBackend { std::unique_ptr<const MCInstrInfo> MCII; X86AlignBranchKind AlignBranchType; Align AlignBoundary; + unsigned TargetPrefixMax = 0; - bool isMacroFused(const MCInst &Cmp, const MCInst &Jcc) const; - - bool needAlign(MCObjectStreamer &OS) const; - bool needAlignInst(const MCInst &Inst) const; - MCBoundaryAlignFragment * - getOrCreateBoundaryAlignFragment(MCObjectStreamer &OS) const; MCInst PrevInst; + MCBoundaryAlignFragment *PendingBA = nullptr; + std::pair<MCFragment *, size_t> PrevInstPosition; + bool CanPadInst; + + uint8_t determinePaddingPrefix(const MCInst &Inst) const; + bool isMacroFused(const MCInst &Cmp, const MCInst &Jcc) const; + bool needAlign(const MCInst &Inst) const; + bool canPadBranches(MCObjectStreamer &OS) const; + bool canPadInst(const MCInst &Inst, MCObjectStreamer &OS) const; public: X86AsmBackend(const Target &T, const MCSubtargetInfo &STI) @@ -142,11 +160,14 @@ public: AlignBoundary = assumeAligned(X86AlignBranchBoundary); if (X86AlignBranch.getNumOccurrences()) AlignBranchType = X86AlignBranchKindLoc; + if (X86PadMaxPrefixSize.getNumOccurrences()) + TargetPrefixMax = X86PadMaxPrefixSize; } bool allowAutoPadding() const override; - void alignBranchesBegin(MCObjectStreamer &OS, const MCInst &Inst) override; - void alignBranchesEnd(MCObjectStreamer &OS, const MCInst &Inst) override; + bool allowEnhancedRelaxation() const override; + void emitInstructionBegin(MCObjectStreamer &OS, const MCInst &Inst) override; + void emitInstructionEnd(MCObjectStreamer &OS, const MCInst &Inst) override; unsigned getNumFixupKinds() const override { return X86::NumTargetFixupKinds; @@ -155,7 +176,7 @@ public: Optional<MCFixupKind> getFixupKind(StringRef Name) const override; const MCFixupKindInfo &getFixupKindInfo(MCFixupKind Kind) const override; - + bool shouldForceRelocation(const MCAssembler &Asm, const MCFixup &Fixup, const MCValue &Target) override; @@ -171,22 +192,34 @@ public: const MCRelaxableFragment *DF, const MCAsmLayout &Layout) const override; - void relaxInstruction(const MCInst &Inst, const MCSubtargetInfo &STI, - MCInst &Res) const override; + void relaxInstruction(MCInst &Inst, + const MCSubtargetInfo &STI) const override; + + bool padInstructionViaRelaxation(MCRelaxableFragment &RF, + MCCodeEmitter &Emitter, + unsigned &RemainingSize) const; + + bool padInstructionViaPrefix(MCRelaxableFragment &RF, MCCodeEmitter &Emitter, + unsigned &RemainingSize) const; + + bool padInstructionEncoding(MCRelaxableFragment &RF, MCCodeEmitter &Emitter, + unsigned &RemainingSize) const; + + void finishLayout(MCAssembler const &Asm, MCAsmLayout &Layout) const override; bool writeNopData(raw_ostream &OS, uint64_t Count) const override; }; } // end anonymous namespace -static unsigned getRelaxedOpcodeBranch(const MCInst &Inst, bool is16BitMode) { +static unsigned getRelaxedOpcodeBranch(const MCInst &Inst, bool Is16BitMode) { unsigned Op = Inst.getOpcode(); switch (Op) { default: return Op; case X86::JCC_1: - return (is16BitMode) ? X86::JCC_2 : X86::JCC_4; + return (Is16BitMode) ? X86::JCC_2 : X86::JCC_4; case X86::JMP_1: - return (is16BitMode) ? X86::JMP_2 : X86::JMP_4; + return (Is16BitMode) ? X86::JMP_2 : X86::JMP_4; } } @@ -275,11 +308,11 @@ static unsigned getRelaxedOpcodeArith(const MCInst &Inst) { } } -static unsigned getRelaxedOpcode(const MCInst &Inst, bool is16BitMode) { +static unsigned getRelaxedOpcode(const MCInst &Inst, bool Is16BitMode) { unsigned R = getRelaxedOpcodeArith(Inst); if (R != Inst.getOpcode()) return R; - return getRelaxedOpcodeBranch(Inst, is16BitMode); + return getRelaxedOpcodeBranch(Inst, Is16BitMode); } static X86::CondCode getCondFromBranch(const MCInst &MI, @@ -316,6 +349,11 @@ static bool isRIPRelative(const MCInst &MI, const MCInstrInfo &MCII) { return (BaseReg == X86::RIP); } +/// Check if the instruction is a prefix. +static bool isPrefix(const MCInst &MI, const MCInstrInfo &MCII) { + return X86II::isPrefix(MCII.get(MI.getOpcode()).TSFlags); +} + /// Check if the instruction is valid as the first instruction in macro fusion. static bool isFirstMacroFusibleInst(const MCInst &Inst, const MCInstrInfo &MCII) { @@ -327,6 +365,69 @@ static bool isFirstMacroFusibleInst(const MCInst &Inst, return FIK != X86::FirstMacroFusionInstKind::Invalid; } +/// X86 can reduce the bytes of NOP by padding instructions with prefixes to +/// get a better peformance in some cases. Here, we determine which prefix is +/// the most suitable. +/// +/// If the instruction has a segment override prefix, use the existing one. +/// If the target is 64-bit, use the CS. +/// If the target is 32-bit, +/// - If the instruction has a ESP/EBP base register, use SS. +/// - Otherwise use DS. +uint8_t X86AsmBackend::determinePaddingPrefix(const MCInst &Inst) const { + assert((STI.hasFeature(X86::Mode32Bit) || STI.hasFeature(X86::Mode64Bit)) && + "Prefixes can be added only in 32-bit or 64-bit mode."); + const MCInstrDesc &Desc = MCII->get(Inst.getOpcode()); + uint64_t TSFlags = Desc.TSFlags; + + // Determine where the memory operand starts, if present. + int MemoryOperand = X86II::getMemoryOperandNo(TSFlags); + if (MemoryOperand != -1) + MemoryOperand += X86II::getOperandBias(Desc); + + unsigned SegmentReg = 0; + if (MemoryOperand >= 0) { + // Check for explicit segment override on memory operand. + SegmentReg = Inst.getOperand(MemoryOperand + X86::AddrSegmentReg).getReg(); + } + + switch (TSFlags & X86II::FormMask) { + default: + break; + case X86II::RawFrmDstSrc: { + // Check segment override opcode prefix as needed (not for %ds). + if (Inst.getOperand(2).getReg() != X86::DS) + SegmentReg = Inst.getOperand(2).getReg(); + break; + } + case X86II::RawFrmSrc: { + // Check segment override opcode prefix as needed (not for %ds). + if (Inst.getOperand(1).getReg() != X86::DS) + SegmentReg = Inst.getOperand(1).getReg(); + break; + } + case X86II::RawFrmMemOffs: { + // Check segment override opcode prefix as needed. + SegmentReg = Inst.getOperand(1).getReg(); + break; + } + } + + if (SegmentReg != 0) + return X86::getSegmentOverridePrefixForReg(SegmentReg); + + if (STI.hasFeature(X86::Mode64Bit)) + return X86::CS_Encoding; + + if (MemoryOperand >= 0) { + unsigned BaseRegNum = MemoryOperand + X86::AddrBaseReg; + unsigned BaseReg = Inst.getOperand(BaseRegNum).getReg(); + if (BaseReg == X86::ESP || BaseReg == X86::EBP) + return X86::SS_Encoding; + } + return X86::DS_Encoding; +} + /// Check if the two instructions will be macro-fused on the target cpu. bool X86AsmBackend::isMacroFused(const MCInst &Cmp, const MCInst &Jcc) const { const MCInstrDesc &InstDesc = MCII->get(Jcc.getOpcode()); @@ -355,19 +456,122 @@ static bool hasVariantSymbol(const MCInst &MI) { } bool X86AsmBackend::allowAutoPadding() const { - return (AlignBoundary != Align::None() && - AlignBranchType != X86::AlignBranchNone); + return (AlignBoundary != Align(1) && AlignBranchType != X86::AlignBranchNone); +} + +bool X86AsmBackend::allowEnhancedRelaxation() const { + return allowAutoPadding() && TargetPrefixMax != 0 && X86PadForBranchAlign; +} + +/// X86 has certain instructions which enable interrupts exactly one +/// instruction *after* the instruction which stores to SS. Return true if the +/// given instruction has such an interrupt delay slot. +static bool hasInterruptDelaySlot(const MCInst &Inst) { + switch (Inst.getOpcode()) { + case X86::POPSS16: + case X86::POPSS32: + case X86::STI: + return true; + + case X86::MOV16sr: + case X86::MOV32sr: + case X86::MOV64sr: + case X86::MOV16sm: + if (Inst.getOperand(0).getReg() == X86::SS) + return true; + break; + } + return false; +} + +/// Check if the instruction to be emitted is right after any data. +static bool +isRightAfterData(MCFragment *CurrentFragment, + const std::pair<MCFragment *, size_t> &PrevInstPosition) { + MCFragment *F = CurrentFragment; + // Empty data fragments may be created to prevent further data being + // added into the previous fragment, we need to skip them since they + // have no contents. + for (; isa_and_nonnull<MCDataFragment>(F); F = F->getPrevNode()) + if (cast<MCDataFragment>(F)->getContents().size() != 0) + break; + + // Since data is always emitted into a DataFragment, our check strategy is + // simple here. + // - If the fragment is a DataFragment + // - If it's not the fragment where the previous instruction is, + // returns true. + // - If it's the fragment holding the previous instruction but its + // size changed since the the previous instruction was emitted into + // it, returns true. + // - Otherwise returns false. + // - If the fragment is not a DataFragment, returns false. + if (auto *DF = dyn_cast_or_null<MCDataFragment>(F)) + return DF != PrevInstPosition.first || + DF->getContents().size() != PrevInstPosition.second; + + return false; +} + +/// \returns the fragment size if it has instructions, otherwise returns 0. +static size_t getSizeForInstFragment(const MCFragment *F) { + if (!F || !F->hasInstructions()) + return 0; + // MCEncodedFragmentWithContents being templated makes this tricky. + switch (F->getKind()) { + default: + llvm_unreachable("Unknown fragment with instructions!"); + case MCFragment::FT_Data: + return cast<MCDataFragment>(*F).getContents().size(); + case MCFragment::FT_Relaxable: + return cast<MCRelaxableFragment>(*F).getContents().size(); + case MCFragment::FT_CompactEncodedInst: + return cast<MCCompactEncodedInstFragment>(*F).getContents().size(); + } +} + +/// Return true if we can insert NOP or prefixes automatically before the +/// the instruction to be emitted. +bool X86AsmBackend::canPadInst(const MCInst &Inst, MCObjectStreamer &OS) const { + if (hasVariantSymbol(Inst)) + // Linker may rewrite the instruction with variant symbol operand(e.g. + // TLSCALL). + return false; + + if (hasInterruptDelaySlot(PrevInst)) + // If this instruction follows an interrupt enabling instruction with a one + // instruction delay, inserting a nop would change behavior. + return false; + + if (isPrefix(PrevInst, *MCII)) + // If this instruction follows a prefix, inserting a nop/prefix would change + // semantic. + return false; + + if (isPrefix(Inst, *MCII)) + // If this instruction is a prefix, inserting a prefix would change + // semantic. + return false; + + if (isRightAfterData(OS.getCurrentFragment(), PrevInstPosition)) + // If this instruction follows any data, there is no clear + // instruction boundary, inserting a nop/prefix would change semantic. + return false; + + return true; } -bool X86AsmBackend::needAlign(MCObjectStreamer &OS) const { +bool X86AsmBackend::canPadBranches(MCObjectStreamer &OS) const { if (!OS.getAllowAutoPadding()) return false; assert(allowAutoPadding() && "incorrect initialization!"); - MCAssembler &Assembler = OS.getAssembler(); - MCSection *Sec = OS.getCurrentSectionOnly(); + // We only pad in text section. + if (!OS.getCurrentSectionOnly()->getKind().isText()) + return false; + // To be Done: Currently don't deal with Bundle cases. - if (Assembler.isBundlingEnabled() && Sec->isBundleLocked()) + if (OS.getAssembler().isBundlingEnabled()) return false; // Branches only need to be aligned in 32-bit or 64-bit mode. @@ -377,59 +581,42 @@ bool X86AsmBackend::needAlign(MCObjectStreamer &OS) const { return true; } -/// Check if the instruction operand needs to be aligned. Padding is disabled -/// before intruction which may be rewritten by linker(e.g. TLSCALL). -bool X86AsmBackend::needAlignInst(const MCInst &Inst) const { - // Linker may rewrite the instruction with variant symbol operand. - if (hasVariantSymbol(Inst)) - return false; - - const MCInstrDesc &InstDesc = MCII->get(Inst.getOpcode()); - return (InstDesc.isConditionalBranch() && +/// Check if the instruction operand needs to be aligned. +bool X86AsmBackend::needAlign(const MCInst &Inst) const { + const MCInstrDesc &Desc = MCII->get(Inst.getOpcode()); + return (Desc.isConditionalBranch() && (AlignBranchType & X86::AlignBranchJcc)) || - (InstDesc.isUnconditionalBranch() && + (Desc.isUnconditionalBranch() && (AlignBranchType & X86::AlignBranchJmp)) || - (InstDesc.isCall() && - (AlignBranchType & X86::AlignBranchCall)) || - (InstDesc.isReturn() && - (AlignBranchType & X86::AlignBranchRet)) || - (InstDesc.isIndirectBranch() && + (Desc.isCall() && (AlignBranchType & X86::AlignBranchCall)) || + (Desc.isReturn() && (AlignBranchType & X86::AlignBranchRet)) || + (Desc.isIndirectBranch() && (AlignBranchType & X86::AlignBranchIndirect)); } -static bool canReuseBoundaryAlignFragment(const MCBoundaryAlignFragment &F) { - // If a MCBoundaryAlignFragment has not been used to emit NOP,we can reuse it. - return !F.canEmitNops(); -} +/// Insert BoundaryAlignFragment before instructions to align branches. +void X86AsmBackend::emitInstructionBegin(MCObjectStreamer &OS, + const MCInst &Inst) { + CanPadInst = canPadInst(Inst, OS); -MCBoundaryAlignFragment * -X86AsmBackend::getOrCreateBoundaryAlignFragment(MCObjectStreamer &OS) const { - auto *F = dyn_cast_or_null<MCBoundaryAlignFragment>(OS.getCurrentFragment()); - if (!F || !canReuseBoundaryAlignFragment(*F)) { - F = new MCBoundaryAlignFragment(AlignBoundary); - OS.insert(F); - } - return F; -} + if (!canPadBranches(OS)) + return; + + if (!isMacroFused(PrevInst, Inst)) + // Macro fusion doesn't happen indeed, clear the pending. + PendingBA = nullptr; -/// Insert MCBoundaryAlignFragment before instructions to align branches. -void X86AsmBackend::alignBranchesBegin(MCObjectStreamer &OS, - const MCInst &Inst) { - if (!needAlign(OS)) + if (!CanPadInst) return; - MCFragment *CF = OS.getCurrentFragment(); - bool NeedAlignFused = AlignBranchType & X86::AlignBranchFused; - if (NeedAlignFused && isMacroFused(PrevInst, Inst) && CF) { + if (PendingBA && OS.getCurrentFragment()->getPrevNode() == PendingBA) { // Macro fusion actually happens and there is no other fragment inserted - // after the previous instruction. NOP can be emitted in PF to align fused - // jcc. - if (auto *PF = - dyn_cast_or_null<MCBoundaryAlignFragment>(CF->getPrevNode())) { - const_cast<MCBoundaryAlignFragment *>(PF)->setEmitNops(true); - const_cast<MCBoundaryAlignFragment *>(PF)->setFused(true); - } - } else if (needAlignInst(Inst)) { + // after the previous instruction. + // + // Do nothing here since we already inserted a BoudaryAlign fragment when + // we met the first instruction in the fused pair and we'll tie them + // together in emitInstructionEnd. + // // Note: When there is at least one fragment, such as MCAlignFragment, // inserted after the previous instruction, e.g. // @@ -441,34 +628,41 @@ void X86AsmBackend::alignBranchesBegin(MCObjectStreamer &OS, // // We will treat the JCC as a unfused branch although it may be fused // with the CMP. - auto *F = getOrCreateBoundaryAlignFragment(OS); - F->setEmitNops(true); - F->setFused(false); - } else if (NeedAlignFused && isFirstMacroFusibleInst(Inst, *MCII)) { - // We don't know if macro fusion happens until the reaching the next - // instruction, so a place holder is put here if necessary. - getOrCreateBoundaryAlignFragment(OS); + return; } - PrevInst = Inst; + if (needAlign(Inst) || ((AlignBranchType & X86::AlignBranchFused) && + isFirstMacroFusibleInst(Inst, *MCII))) { + // If we meet a unfused branch or the first instuction in a fusiable pair, + // insert a BoundaryAlign fragment. + OS.insert(PendingBA = new MCBoundaryAlignFragment(AlignBoundary)); + } } -/// Insert a MCBoundaryAlignFragment to mark the end of the branch to be aligned -/// if necessary. -void X86AsmBackend::alignBranchesEnd(MCObjectStreamer &OS, const MCInst &Inst) { - if (!needAlign(OS)) +/// Set the last fragment to be aligned for the BoundaryAlignFragment. +void X86AsmBackend::emitInstructionEnd(MCObjectStreamer &OS, const MCInst &Inst) { + PrevInst = Inst; + MCFragment *CF = OS.getCurrentFragment(); + PrevInstPosition = std::make_pair(CF, getSizeForInstFragment(CF)); + if (auto *F = dyn_cast_or_null<MCRelaxableFragment>(CF)) + F->setAllowAutoPadding(CanPadInst); + + if (!canPadBranches(OS)) return; - // If the branch is emitted into a MCRelaxableFragment, we can determine the - // size of the branch easily in MCAssembler::relaxBoundaryAlign. When the - // branch is fused, the fused branch(macro fusion pair) must be emitted into - // two fragments. Or when the branch is unfused, the branch must be emitted - // into one fragment. The MCRelaxableFragment naturally marks the end of the - // fused or unfused branch. - // Otherwise, we need to insert a MCBoundaryAlignFragment to mark the end of - // the branch. This MCBoundaryAlignFragment may be reused to emit NOP to align - // other branch. - if (needAlignInst(Inst) && !isa<MCRelaxableFragment>(OS.getCurrentFragment())) - OS.insert(new MCBoundaryAlignFragment(AlignBoundary)); + + if (!needAlign(Inst) || !PendingBA) + return; + + // Tie the aligned instructions into a a pending BoundaryAlign. + PendingBA->setLastFragment(CF); + PendingBA = nullptr; + + // We need to ensure that further data isn't added to the current + // DataFragment, so that we can get the size of instructions later in + // MCAssembler::relaxBoundaryAlign. The easiest way is to insert a new empty + // DataFragment. + if (isa_and_nonnull<MCDataFragment>(CF)) + OS.insert(new MCDataFragment()); // Update the maximum alignment on the current section if necessary. MCSection *Sec = OS.getCurrentSectionOnly(); @@ -478,13 +672,23 @@ void X86AsmBackend::alignBranchesEnd(MCObjectStreamer &OS, const MCInst &Inst) { Optional<MCFixupKind> X86AsmBackend::getFixupKind(StringRef Name) const { if (STI.getTargetTriple().isOSBinFormatELF()) { + unsigned Type; if (STI.getTargetTriple().getArch() == Triple::x86_64) { - if (Name == "R_X86_64_NONE") - return FK_NONE; + Type = llvm::StringSwitch<unsigned>(Name) +#define ELF_RELOC(X, Y) .Case(#X, Y) +#include "llvm/BinaryFormat/ELFRelocs/x86_64.def" +#undef ELF_RELOC + .Default(-1u); } else { - if (Name == "R_386_NONE") - return FK_NONE; + Type = llvm::StringSwitch<unsigned>(Name) +#define ELF_RELOC(X, Y) .Case(#X, Y) +#include "llvm/BinaryFormat/ELFRelocs/i386.def" +#undef ELF_RELOC + .Default(-1u); } + if (Type == -1u) + return None; + return static_cast<MCFixupKind>(FirstLiteralRelocationKind + Type); } return MCAsmBackend::getFixupKind(Name); } @@ -502,6 +706,11 @@ const MCFixupKindInfo &X86AsmBackend::getFixupKindInfo(MCFixupKind Kind) const { {"reloc_branch_4byte_pcrel", 0, 32, MCFixupKindInfo::FKF_IsPCRel}, }; + // Fixup kinds from .reloc directive are like R_386_NONE/R_X86_64_NONE. They + // do not require any extra processing. + if (Kind >= FirstLiteralRelocationKind) + return MCAsmBackend::getFixupKindInfo(FK_NONE); + if (Kind < FirstTargetFixupKind) return MCAsmBackend::getFixupKindInfo(Kind); @@ -514,7 +723,7 @@ const MCFixupKindInfo &X86AsmBackend::getFixupKindInfo(MCFixupKind Kind) const { bool X86AsmBackend::shouldForceRelocation(const MCAssembler &, const MCFixup &Fixup, const MCValue &) { - return Fixup.getKind() == FK_NONE; + return Fixup.getKind() >= FirstLiteralRelocationKind; } static unsigned getFixupKindSize(unsigned Kind) { @@ -556,7 +765,10 @@ void X86AsmBackend::applyFixup(const MCAssembler &Asm, const MCFixup &Fixup, MutableArrayRef<char> Data, uint64_t Value, bool IsResolved, const MCSubtargetInfo *STI) const { - unsigned Size = getFixupKindSize(Fixup.getKind()); + unsigned Kind = Fixup.getKind(); + if (Kind >= FirstLiteralRelocationKind) + return; + unsigned Size = getFixupKindSize(Kind); assert(Fixup.getOffset() + Size <= Data.size() && "Invalid fixup offset!"); @@ -613,12 +825,11 @@ bool X86AsmBackend::fixupNeedsRelaxation(const MCFixup &Fixup, // FIXME: Can tblgen help at all here to verify there aren't other instructions // we can relax? -void X86AsmBackend::relaxInstruction(const MCInst &Inst, - const MCSubtargetInfo &STI, - MCInst &Res) const { +void X86AsmBackend::relaxInstruction(MCInst &Inst, + const MCSubtargetInfo &STI) const { // The only relaxations X86 does is from a 1byte pcrel to a 4byte pcrel. - bool is16BitMode = STI.getFeatureBits()[X86::Mode16Bit]; - unsigned RelaxedOp = getRelaxedOpcode(Inst, is16BitMode); + bool Is16BitMode = STI.getFeatureBits()[X86::Mode16Bit]; + unsigned RelaxedOp = getRelaxedOpcode(Inst, Is16BitMode); if (RelaxedOp == Inst.getOpcode()) { SmallString<256> Tmp; @@ -628,8 +839,232 @@ void X86AsmBackend::relaxInstruction(const MCInst &Inst, report_fatal_error("unexpected instruction to relax: " + OS.str()); } - Res = Inst; - Res.setOpcode(RelaxedOp); + Inst.setOpcode(RelaxedOp); +} + +/// Return true if this instruction has been fully relaxed into it's most +/// general available form. +static bool isFullyRelaxed(const MCRelaxableFragment &RF) { + auto &Inst = RF.getInst(); + auto &STI = *RF.getSubtargetInfo(); + bool Is16BitMode = STI.getFeatureBits()[X86::Mode16Bit]; + return getRelaxedOpcode(Inst, Is16BitMode) == Inst.getOpcode(); +} + +bool X86AsmBackend::padInstructionViaPrefix(MCRelaxableFragment &RF, + MCCodeEmitter &Emitter, + unsigned &RemainingSize) const { + if (!RF.getAllowAutoPadding()) + return false; + // If the instruction isn't fully relaxed, shifting it around might require a + // larger value for one of the fixups then can be encoded. The outer loop + // will also catch this before moving to the next instruction, but we need to + // prevent padding this single instruction as well. + if (!isFullyRelaxed(RF)) + return false; + + const unsigned OldSize = RF.getContents().size(); + if (OldSize == 15) + return false; + + const unsigned MaxPossiblePad = std::min(15 - OldSize, RemainingSize); + const unsigned RemainingPrefixSize = [&]() -> unsigned { + SmallString<15> Code; + raw_svector_ostream VecOS(Code); + Emitter.emitPrefix(RF.getInst(), VecOS, STI); + assert(Code.size() < 15 && "The number of prefixes must be less than 15."); + + // TODO: It turns out we need a decent amount of plumbing for the target + // specific bits to determine number of prefixes its safe to add. Various + // targets (older chips mostly, but also Atom family) encounter decoder + // stalls with too many prefixes. For testing purposes, we set the value + // externally for the moment. + unsigned ExistingPrefixSize = Code.size(); + if (TargetPrefixMax <= ExistingPrefixSize) + return 0; + return TargetPrefixMax - ExistingPrefixSize; + }(); + const unsigned PrefixBytesToAdd = + std::min(MaxPossiblePad, RemainingPrefixSize); + if (PrefixBytesToAdd == 0) + return false; + + const uint8_t Prefix = determinePaddingPrefix(RF.getInst()); + + SmallString<256> Code; + Code.append(PrefixBytesToAdd, Prefix); + Code.append(RF.getContents().begin(), RF.getContents().end()); + RF.getContents() = Code; + + // Adjust the fixups for the change in offsets + for (auto &F : RF.getFixups()) { + F.setOffset(F.getOffset() + PrefixBytesToAdd); + } + + RemainingSize -= PrefixBytesToAdd; + return true; +} + +bool X86AsmBackend::padInstructionViaRelaxation(MCRelaxableFragment &RF, + MCCodeEmitter &Emitter, + unsigned &RemainingSize) const { + if (isFullyRelaxed(RF)) + // TODO: There are lots of other tricks we could apply for increasing + // encoding size without impacting performance. + return false; + + MCInst Relaxed = RF.getInst(); + relaxInstruction(Relaxed, *RF.getSubtargetInfo()); + + SmallVector<MCFixup, 4> Fixups; + SmallString<15> Code; + raw_svector_ostream VecOS(Code); + Emitter.encodeInstruction(Relaxed, VecOS, Fixups, *RF.getSubtargetInfo()); + const unsigned OldSize = RF.getContents().size(); + const unsigned NewSize = Code.size(); + assert(NewSize >= OldSize && "size decrease during relaxation?"); + unsigned Delta = NewSize - OldSize; + if (Delta > RemainingSize) + return false; + RF.setInst(Relaxed); + RF.getContents() = Code; + RF.getFixups() = Fixups; + RemainingSize -= Delta; + return true; +} + +bool X86AsmBackend::padInstructionEncoding(MCRelaxableFragment &RF, + MCCodeEmitter &Emitter, + unsigned &RemainingSize) const { + bool Changed = false; + if (RemainingSize != 0) + Changed |= padInstructionViaRelaxation(RF, Emitter, RemainingSize); + if (RemainingSize != 0) + Changed |= padInstructionViaPrefix(RF, Emitter, RemainingSize); + return Changed; +} + +void X86AsmBackend::finishLayout(MCAssembler const &Asm, + MCAsmLayout &Layout) const { + // See if we can further relax some instructions to cut down on the number of + // nop bytes required for code alignment. The actual win is in reducing + // instruction count, not number of bytes. Modern X86-64 can easily end up + // decode limited. It is often better to reduce the number of instructions + // (i.e. eliminate nops) even at the cost of increasing the size and + // complexity of others. + if (!X86PadForAlign && !X86PadForBranchAlign) + return; + + DenseSet<MCFragment *> LabeledFragments; + for (const MCSymbol &S : Asm.symbols()) + LabeledFragments.insert(S.getFragment(false)); + + for (MCSection &Sec : Asm) { + if (!Sec.getKind().isText()) + continue; + + SmallVector<MCRelaxableFragment *, 4> Relaxable; + for (MCSection::iterator I = Sec.begin(), IE = Sec.end(); I != IE; ++I) { + MCFragment &F = *I; + + if (LabeledFragments.count(&F)) + Relaxable.clear(); + + if (F.getKind() == MCFragment::FT_Data || + F.getKind() == MCFragment::FT_CompactEncodedInst) + // Skip and ignore + continue; + + if (F.getKind() == MCFragment::FT_Relaxable) { + auto &RF = cast<MCRelaxableFragment>(*I); + Relaxable.push_back(&RF); + continue; + } + + auto canHandle = [](MCFragment &F) -> bool { + switch (F.getKind()) { + default: + return false; + case MCFragment::FT_Align: + return X86PadForAlign; + case MCFragment::FT_BoundaryAlign: + return X86PadForBranchAlign; + } + }; + // For any unhandled kind, assume we can't change layout. + if (!canHandle(F)) { + Relaxable.clear(); + continue; + } + +#ifndef NDEBUG + const uint64_t OrigOffset = Layout.getFragmentOffset(&F); +#endif + const uint64_t OrigSize = Asm.computeFragmentSize(Layout, F); + + // To keep the effects local, prefer to relax instructions closest to + // the align directive. This is purely about human understandability + // of the resulting code. If we later find a reason to expand + // particular instructions over others, we can adjust. + MCFragment *FirstChangedFragment = nullptr; + unsigned RemainingSize = OrigSize; + while (!Relaxable.empty() && RemainingSize != 0) { + auto &RF = *Relaxable.pop_back_val(); + // Give the backend a chance to play any tricks it wishes to increase + // the encoding size of the given instruction. Target independent code + // will try further relaxation, but target's may play further tricks. + if (padInstructionEncoding(RF, Asm.getEmitter(), RemainingSize)) + FirstChangedFragment = &RF; + + // If we have an instruction which hasn't been fully relaxed, we can't + // skip past it and insert bytes before it. Changing its starting + // offset might require a larger negative offset than it can encode. + // We don't need to worry about larger positive offsets as none of the + // possible offsets between this and our align are visible, and the + // ones afterwards aren't changing. + if (!isFullyRelaxed(RF)) + break; + } + Relaxable.clear(); + + if (FirstChangedFragment) { + // Make sure the offsets for any fragments in the effected range get + // updated. Note that this (conservatively) invalidates the offsets of + // those following, but this is not required. + Layout.invalidateFragmentsFrom(FirstChangedFragment); + } + + // BoundaryAlign explicitly tracks it's size (unlike align) + if (F.getKind() == MCFragment::FT_BoundaryAlign) + cast<MCBoundaryAlignFragment>(F).setSize(RemainingSize); + +#ifndef NDEBUG + const uint64_t FinalOffset = Layout.getFragmentOffset(&F); + const uint64_t FinalSize = Asm.computeFragmentSize(Layout, F); + assert(OrigOffset + OrigSize == FinalOffset + FinalSize && + "can't move start of next fragment!"); + assert(FinalSize == RemainingSize && "inconsistent size computation?"); +#endif + + // If we're looking at a boundary align, make sure we don't try to pad + // its target instructions for some following directive. Doing so would + // break the alignment of the current boundary align. + if (auto *BF = dyn_cast<MCBoundaryAlignFragment>(&F)) { + const MCFragment *LastFragment = BF->getLastFragment(); + if (!LastFragment) + continue; + while (&*I != LastFragment) + ++I; + } + } + } + + // The layout is done. Mark every fragment as valid. + for (unsigned int i = 0, n = Layout.getSectionOrder().size(); i != n; ++i) { + MCSection &Section = *Layout.getSectionOrder()[i]; + Layout.getFragmentOffset(&*Section.getFragmentList().rbegin()); + Asm.computeFragmentSize(Layout, *Section.getFragmentList().rbegin()); + } } /// Write a sequence of optimal nops to the output, covering \p Count @@ -661,7 +1096,7 @@ bool X86AsmBackend::writeNopData(raw_ostream &OS, uint64_t Count) const { // This CPU doesn't support long nops. If needed add more. // FIXME: We could generated something better than plain 0x90. - if (!STI.getFeatureBits()[X86::FeatureNOPL]) { + if (!STI.hasFeature(X86::FeatureNOPL) && !STI.hasFeature(X86::Mode64Bit)) { for (uint64_t i = 0; i < Count; ++i) OS << '\x90'; return true; @@ -670,7 +1105,7 @@ bool X86AsmBackend::writeNopData(raw_ostream &OS, uint64_t Count) const { // 15-bytes is the longest single NOP instruction, but 10-bytes is // commonly the longest that can be efficiently decoded. uint64_t MaxNopLength = 10; - if (STI.getFeatureBits()[X86::ProcIntelSLM]) + if (STI.getFeatureBits()[X86::FeatureFast7ByteNOP]) MaxNopLength = 7; else if (STI.getFeatureBits()[X86::FeatureFast15ByteNOP]) MaxNopLength = 15; @@ -811,6 +1246,7 @@ class DarwinX86AsmBackend : public X86AsmBackend { enum { CU_NUM_SAVED_REGS = 6 }; mutable unsigned SavedRegs[CU_NUM_SAVED_REGS]; + Triple TT; bool Is64Bit; unsigned OffsetSize; ///< Offset of a "push" instruction. @@ -838,10 +1274,140 @@ protected: return 1; } +private: + /// Get the compact unwind number for a given register. The number + /// corresponds to the enum lists in compact_unwind_encoding.h. + int getCompactUnwindRegNum(unsigned Reg) const { + static const MCPhysReg CU32BitRegs[7] = { + X86::EBX, X86::ECX, X86::EDX, X86::EDI, X86::ESI, X86::EBP, 0 + }; + static const MCPhysReg CU64BitRegs[] = { + X86::RBX, X86::R12, X86::R13, X86::R14, X86::R15, X86::RBP, 0 + }; + const MCPhysReg *CURegs = Is64Bit ? CU64BitRegs : CU32BitRegs; + for (int Idx = 1; *CURegs; ++CURegs, ++Idx) + if (*CURegs == Reg) + return Idx; + + return -1; + } + + /// Return the registers encoded for a compact encoding with a frame + /// pointer. + uint32_t encodeCompactUnwindRegistersWithFrame() const { + // Encode the registers in the order they were saved --- 3-bits per + // register. The list of saved registers is assumed to be in reverse + // order. The registers are numbered from 1 to CU_NUM_SAVED_REGS. + uint32_t RegEnc = 0; + for (int i = 0, Idx = 0; i != CU_NUM_SAVED_REGS; ++i) { + unsigned Reg = SavedRegs[i]; + if (Reg == 0) break; + + int CURegNum = getCompactUnwindRegNum(Reg); + if (CURegNum == -1) return ~0U; + + // Encode the 3-bit register number in order, skipping over 3-bits for + // each register. + RegEnc |= (CURegNum & 0x7) << (Idx++ * 3); + } + + assert((RegEnc & 0x3FFFF) == RegEnc && + "Invalid compact register encoding!"); + return RegEnc; + } + + /// Create the permutation encoding used with frameless stacks. It is + /// passed the number of registers to be saved and an array of the registers + /// saved. + uint32_t encodeCompactUnwindRegistersWithoutFrame(unsigned RegCount) const { + // The saved registers are numbered from 1 to 6. In order to encode the + // order in which they were saved, we re-number them according to their + // place in the register order. The re-numbering is relative to the last + // re-numbered register. E.g., if we have registers {6, 2, 4, 5} saved in + // that order: + // + // Orig Re-Num + // ---- ------ + // 6 6 + // 2 2 + // 4 3 + // 5 3 + // + for (unsigned i = 0; i < RegCount; ++i) { + int CUReg = getCompactUnwindRegNum(SavedRegs[i]); + if (CUReg == -1) return ~0U; + SavedRegs[i] = CUReg; + } + + // Reverse the list. + std::reverse(&SavedRegs[0], &SavedRegs[CU_NUM_SAVED_REGS]); + + uint32_t RenumRegs[CU_NUM_SAVED_REGS]; + for (unsigned i = CU_NUM_SAVED_REGS - RegCount; i < CU_NUM_SAVED_REGS; ++i){ + unsigned Countless = 0; + for (unsigned j = CU_NUM_SAVED_REGS - RegCount; j < i; ++j) + if (SavedRegs[j] < SavedRegs[i]) + ++Countless; + + RenumRegs[i] = SavedRegs[i] - Countless - 1; + } + + // Take the renumbered values and encode them into a 10-bit number. + uint32_t permutationEncoding = 0; + switch (RegCount) { + case 6: + permutationEncoding |= 120 * RenumRegs[0] + 24 * RenumRegs[1] + + 6 * RenumRegs[2] + 2 * RenumRegs[3] + + RenumRegs[4]; + break; + case 5: + permutationEncoding |= 120 * RenumRegs[1] + 24 * RenumRegs[2] + + 6 * RenumRegs[3] + 2 * RenumRegs[4] + + RenumRegs[5]; + break; + case 4: + permutationEncoding |= 60 * RenumRegs[2] + 12 * RenumRegs[3] + + 3 * RenumRegs[4] + RenumRegs[5]; + break; + case 3: + permutationEncoding |= 20 * RenumRegs[3] + 4 * RenumRegs[4] + + RenumRegs[5]; + break; + case 2: + permutationEncoding |= 5 * RenumRegs[4] + RenumRegs[5]; + break; + case 1: + permutationEncoding |= RenumRegs[5]; + break; + } + + assert((permutationEncoding & 0x3FF) == permutationEncoding && + "Invalid compact register encoding!"); + return permutationEncoding; + } + +public: + DarwinX86AsmBackend(const Target &T, const MCRegisterInfo &MRI, + const MCSubtargetInfo &STI) + : X86AsmBackend(T, STI), MRI(MRI), TT(STI.getTargetTriple()), + Is64Bit(TT.isArch64Bit()) { + memset(SavedRegs, 0, sizeof(SavedRegs)); + OffsetSize = Is64Bit ? 8 : 4; + MoveInstrSize = Is64Bit ? 3 : 2; + StackDivide = Is64Bit ? 8 : 4; + } + + std::unique_ptr<MCObjectTargetWriter> + createObjectTargetWriter() const override { + uint32_t CPUType = cantFail(MachO::getCPUType(TT)); + uint32_t CPUSubType = cantFail(MachO::getCPUSubType(TT)); + return createX86MachObjectWriter(Is64Bit, CPUType, CPUSubType); + } + /// Implementation of algorithm to generate the compact unwind encoding /// for the CFI instructions. uint32_t - generateCompactUnwindEncodingImpl(ArrayRef<MCCFIInstruction> Instrs) const { + generateCompactUnwindEncoding(ArrayRef<MCCFIInstruction> Instrs) const override { if (Instrs.empty()) return 0; // Reset the saved registers. @@ -904,7 +1470,7 @@ protected: // L0: // .cfi_def_cfa_offset 80 // - StackSize = std::abs(Inst.getOffset()) / StackDivide; + StackSize = Inst.getOffset() / StackDivide; ++NumDefCFAOffsets; break; } @@ -991,168 +1557,6 @@ protected: return CompactUnwindEncoding; } - -private: - /// Get the compact unwind number for a given register. The number - /// corresponds to the enum lists in compact_unwind_encoding.h. - int getCompactUnwindRegNum(unsigned Reg) const { - static const MCPhysReg CU32BitRegs[7] = { - X86::EBX, X86::ECX, X86::EDX, X86::EDI, X86::ESI, X86::EBP, 0 - }; - static const MCPhysReg CU64BitRegs[] = { - X86::RBX, X86::R12, X86::R13, X86::R14, X86::R15, X86::RBP, 0 - }; - const MCPhysReg *CURegs = Is64Bit ? CU64BitRegs : CU32BitRegs; - for (int Idx = 1; *CURegs; ++CURegs, ++Idx) - if (*CURegs == Reg) - return Idx; - - return -1; - } - - /// Return the registers encoded for a compact encoding with a frame - /// pointer. - uint32_t encodeCompactUnwindRegistersWithFrame() const { - // Encode the registers in the order they were saved --- 3-bits per - // register. The list of saved registers is assumed to be in reverse - // order. The registers are numbered from 1 to CU_NUM_SAVED_REGS. - uint32_t RegEnc = 0; - for (int i = 0, Idx = 0; i != CU_NUM_SAVED_REGS; ++i) { - unsigned Reg = SavedRegs[i]; - if (Reg == 0) break; - - int CURegNum = getCompactUnwindRegNum(Reg); - if (CURegNum == -1) return ~0U; - - // Encode the 3-bit register number in order, skipping over 3-bits for - // each register. - RegEnc |= (CURegNum & 0x7) << (Idx++ * 3); - } - - assert((RegEnc & 0x3FFFF) == RegEnc && - "Invalid compact register encoding!"); - return RegEnc; - } - - /// Create the permutation encoding used with frameless stacks. It is - /// passed the number of registers to be saved and an array of the registers - /// saved. - uint32_t encodeCompactUnwindRegistersWithoutFrame(unsigned RegCount) const { - // The saved registers are numbered from 1 to 6. In order to encode the - // order in which they were saved, we re-number them according to their - // place in the register order. The re-numbering is relative to the last - // re-numbered register. E.g., if we have registers {6, 2, 4, 5} saved in - // that order: - // - // Orig Re-Num - // ---- ------ - // 6 6 - // 2 2 - // 4 3 - // 5 3 - // - for (unsigned i = 0; i < RegCount; ++i) { - int CUReg = getCompactUnwindRegNum(SavedRegs[i]); - if (CUReg == -1) return ~0U; - SavedRegs[i] = CUReg; - } - - // Reverse the list. - std::reverse(&SavedRegs[0], &SavedRegs[CU_NUM_SAVED_REGS]); - - uint32_t RenumRegs[CU_NUM_SAVED_REGS]; - for (unsigned i = CU_NUM_SAVED_REGS - RegCount; i < CU_NUM_SAVED_REGS; ++i){ - unsigned Countless = 0; - for (unsigned j = CU_NUM_SAVED_REGS - RegCount; j < i; ++j) - if (SavedRegs[j] < SavedRegs[i]) - ++Countless; - - RenumRegs[i] = SavedRegs[i] - Countless - 1; - } - - // Take the renumbered values and encode them into a 10-bit number. - uint32_t permutationEncoding = 0; - switch (RegCount) { - case 6: - permutationEncoding |= 120 * RenumRegs[0] + 24 * RenumRegs[1] - + 6 * RenumRegs[2] + 2 * RenumRegs[3] - + RenumRegs[4]; - break; - case 5: - permutationEncoding |= 120 * RenumRegs[1] + 24 * RenumRegs[2] - + 6 * RenumRegs[3] + 2 * RenumRegs[4] - + RenumRegs[5]; - break; - case 4: - permutationEncoding |= 60 * RenumRegs[2] + 12 * RenumRegs[3] - + 3 * RenumRegs[4] + RenumRegs[5]; - break; - case 3: - permutationEncoding |= 20 * RenumRegs[3] + 4 * RenumRegs[4] - + RenumRegs[5]; - break; - case 2: - permutationEncoding |= 5 * RenumRegs[4] + RenumRegs[5]; - break; - case 1: - permutationEncoding |= RenumRegs[5]; - break; - } - - assert((permutationEncoding & 0x3FF) == permutationEncoding && - "Invalid compact register encoding!"); - return permutationEncoding; - } - -public: - DarwinX86AsmBackend(const Target &T, const MCRegisterInfo &MRI, - const MCSubtargetInfo &STI, bool Is64Bit) - : X86AsmBackend(T, STI), MRI(MRI), Is64Bit(Is64Bit) { - memset(SavedRegs, 0, sizeof(SavedRegs)); - OffsetSize = Is64Bit ? 8 : 4; - MoveInstrSize = Is64Bit ? 3 : 2; - StackDivide = Is64Bit ? 8 : 4; - } -}; - -class DarwinX86_32AsmBackend : public DarwinX86AsmBackend { -public: - DarwinX86_32AsmBackend(const Target &T, const MCRegisterInfo &MRI, - const MCSubtargetInfo &STI) - : DarwinX86AsmBackend(T, MRI, STI, false) {} - - std::unique_ptr<MCObjectTargetWriter> - createObjectTargetWriter() const override { - return createX86MachObjectWriter(/*Is64Bit=*/false, - MachO::CPU_TYPE_I386, - MachO::CPU_SUBTYPE_I386_ALL); - } - - /// Generate the compact unwind encoding for the CFI instructions. - uint32_t generateCompactUnwindEncoding( - ArrayRef<MCCFIInstruction> Instrs) const override { - return generateCompactUnwindEncodingImpl(Instrs); - } -}; - -class DarwinX86_64AsmBackend : public DarwinX86AsmBackend { - const MachO::CPUSubTypeX86 Subtype; -public: - DarwinX86_64AsmBackend(const Target &T, const MCRegisterInfo &MRI, - const MCSubtargetInfo &STI, MachO::CPUSubTypeX86 st) - : DarwinX86AsmBackend(T, MRI, STI, true), Subtype(st) {} - - std::unique_ptr<MCObjectTargetWriter> - createObjectTargetWriter() const override { - return createX86MachObjectWriter(/*Is64Bit=*/true, MachO::CPU_TYPE_X86_64, - Subtype); - } - - /// Generate the compact unwind encoding for the CFI instructions. - uint32_t generateCompactUnwindEncoding( - ArrayRef<MCCFIInstruction> Instrs) const override { - return generateCompactUnwindEncodingImpl(Instrs); - } }; } // end anonymous namespace @@ -1163,7 +1567,7 @@ MCAsmBackend *llvm::createX86_32AsmBackend(const Target &T, const MCTargetOptions &Options) { const Triple &TheTriple = STI.getTargetTriple(); if (TheTriple.isOSBinFormatMachO()) - return new DarwinX86_32AsmBackend(T, MRI, STI); + return new DarwinX86AsmBackend(T, MRI, STI); if (TheTriple.isOSWindows() && TheTriple.isOSBinFormatCOFF()) return new WindowsX86AsmBackend(T, false, STI); @@ -1181,13 +1585,8 @@ MCAsmBackend *llvm::createX86_64AsmBackend(const Target &T, const MCRegisterInfo &MRI, const MCTargetOptions &Options) { const Triple &TheTriple = STI.getTargetTriple(); - if (TheTriple.isOSBinFormatMachO()) { - MachO::CPUSubTypeX86 CS = - StringSwitch<MachO::CPUSubTypeX86>(TheTriple.getArchName()) - .Case("x86_64h", MachO::CPU_SUBTYPE_X86_64_H) - .Default(MachO::CPU_SUBTYPE_X86_64_ALL); - return new DarwinX86_64AsmBackend(T, MRI, STI, CS); - } + if (TheTriple.isOSBinFormatMachO()) + return new DarwinX86AsmBackend(T, MRI, STI); if (TheTriple.isOSWindows() && TheTriple.isOSBinFormatCOFF()) return new WindowsX86AsmBackend(T, true, STI); |