aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp')
-rw-r--r--llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp971
1 files changed, 685 insertions, 286 deletions
diff --git a/llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp b/llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp
index dffda5217675..bf3b6bcb5463 100644
--- a/llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp
+++ b/llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp
@@ -12,7 +12,9 @@
#include "llvm/BinaryFormat/ELF.h"
#include "llvm/BinaryFormat/MachO.h"
#include "llvm/MC/MCAsmBackend.h"
+#include "llvm/MC/MCAsmLayout.h"
#include "llvm/MC/MCAssembler.h"
+#include "llvm/MC/MCCodeEmitter.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCDwarf.h"
#include "llvm/MC/MCELFObjectWriter.h"
@@ -60,10 +62,9 @@ public:
else if (BranchType == "indirect")
addKind(X86::AlignBranchIndirect);
else {
- report_fatal_error(
- "'-x86-align-branch 'The branches's type is combination of jcc, "
- "fused, jmp, call, ret, indirect.(plus separated)",
- false);
+ errs() << "invalid argument " << BranchType.str()
+ << " to -x86-align-branch=; each element must be one of: fused, "
+ "jcc, jmp, call, ret, indirect.(plus separated)\n";
}
}
}
@@ -85,13 +86,14 @@ cl::opt<unsigned> X86AlignBranchBoundary(
cl::opt<X86AlignBranchKind, true, cl::parser<std::string>> X86AlignBranch(
"x86-align-branch",
- cl::desc("Specify types of branches to align (plus separated list of "
- "types). The branches's types are combination of jcc, fused, "
- "jmp, call, ret, indirect."),
- cl::value_desc("jcc indicates conditional jumps, fused indicates fused "
- "conditional jumps, jmp indicates unconditional jumps, call "
- "indicates direct and indirect calls, ret indicates rets, "
- "indirect indicates indirect jumps."),
+ cl::desc(
+ "Specify types of branches to align (plus separated list of types):"
+ "\njcc indicates conditional jumps"
+ "\nfused indicates fused conditional jumps"
+ "\njmp indicates direct unconditional jumps"
+ "\ncall indicates direct and indirect calls"
+ "\nret indicates rets"
+ "\nindirect indicates indirect unconditional jumps"),
cl::location(X86AlignBranchKindLoc));
cl::opt<bool> X86AlignBranchWithin32BBoundaries(
@@ -102,6 +104,18 @@ cl::opt<bool> X86AlignBranchWithin32BBoundaries(
"assumptions about labels corresponding to particular instructions, "
"and should be used with caution."));
+cl::opt<unsigned> X86PadMaxPrefixSize(
+ "x86-pad-max-prefix-size", cl::init(0),
+ cl::desc("Maximum number of prefixes to use for padding"));
+
+cl::opt<bool> X86PadForAlign(
+ "x86-pad-for-align", cl::init(true), cl::Hidden,
+ cl::desc("Pad previous instructions to implement align directives"));
+
+cl::opt<bool> X86PadForBranchAlign(
+ "x86-pad-for-branch-align", cl::init(true), cl::Hidden,
+ cl::desc("Pad previous instructions to implement branch alignment"));
+
class X86ELFObjectWriter : public MCELFObjectTargetWriter {
public:
X86ELFObjectWriter(bool is64Bit, uint8_t OSABI, uint16_t EMachine,
@@ -114,14 +128,18 @@ class X86AsmBackend : public MCAsmBackend {
std::unique_ptr<const MCInstrInfo> MCII;
X86AlignBranchKind AlignBranchType;
Align AlignBoundary;
+ unsigned TargetPrefixMax = 0;
- bool isMacroFused(const MCInst &Cmp, const MCInst &Jcc) const;
-
- bool needAlign(MCObjectStreamer &OS) const;
- bool needAlignInst(const MCInst &Inst) const;
- MCBoundaryAlignFragment *
- getOrCreateBoundaryAlignFragment(MCObjectStreamer &OS) const;
MCInst PrevInst;
+ MCBoundaryAlignFragment *PendingBA = nullptr;
+ std::pair<MCFragment *, size_t> PrevInstPosition;
+ bool CanPadInst;
+
+ uint8_t determinePaddingPrefix(const MCInst &Inst) const;
+ bool isMacroFused(const MCInst &Cmp, const MCInst &Jcc) const;
+ bool needAlign(const MCInst &Inst) const;
+ bool canPadBranches(MCObjectStreamer &OS) const;
+ bool canPadInst(const MCInst &Inst, MCObjectStreamer &OS) const;
public:
X86AsmBackend(const Target &T, const MCSubtargetInfo &STI)
@@ -142,11 +160,14 @@ public:
AlignBoundary = assumeAligned(X86AlignBranchBoundary);
if (X86AlignBranch.getNumOccurrences())
AlignBranchType = X86AlignBranchKindLoc;
+ if (X86PadMaxPrefixSize.getNumOccurrences())
+ TargetPrefixMax = X86PadMaxPrefixSize;
}
bool allowAutoPadding() const override;
- void alignBranchesBegin(MCObjectStreamer &OS, const MCInst &Inst) override;
- void alignBranchesEnd(MCObjectStreamer &OS, const MCInst &Inst) override;
+ bool allowEnhancedRelaxation() const override;
+ void emitInstructionBegin(MCObjectStreamer &OS, const MCInst &Inst) override;
+ void emitInstructionEnd(MCObjectStreamer &OS, const MCInst &Inst) override;
unsigned getNumFixupKinds() const override {
return X86::NumTargetFixupKinds;
@@ -155,7 +176,7 @@ public:
Optional<MCFixupKind> getFixupKind(StringRef Name) const override;
const MCFixupKindInfo &getFixupKindInfo(MCFixupKind Kind) const override;
-
+
bool shouldForceRelocation(const MCAssembler &Asm, const MCFixup &Fixup,
const MCValue &Target) override;
@@ -171,22 +192,34 @@ public:
const MCRelaxableFragment *DF,
const MCAsmLayout &Layout) const override;
- void relaxInstruction(const MCInst &Inst, const MCSubtargetInfo &STI,
- MCInst &Res) const override;
+ void relaxInstruction(MCInst &Inst,
+ const MCSubtargetInfo &STI) const override;
+
+ bool padInstructionViaRelaxation(MCRelaxableFragment &RF,
+ MCCodeEmitter &Emitter,
+ unsigned &RemainingSize) const;
+
+ bool padInstructionViaPrefix(MCRelaxableFragment &RF, MCCodeEmitter &Emitter,
+ unsigned &RemainingSize) const;
+
+ bool padInstructionEncoding(MCRelaxableFragment &RF, MCCodeEmitter &Emitter,
+ unsigned &RemainingSize) const;
+
+ void finishLayout(MCAssembler const &Asm, MCAsmLayout &Layout) const override;
bool writeNopData(raw_ostream &OS, uint64_t Count) const override;
};
} // end anonymous namespace
-static unsigned getRelaxedOpcodeBranch(const MCInst &Inst, bool is16BitMode) {
+static unsigned getRelaxedOpcodeBranch(const MCInst &Inst, bool Is16BitMode) {
unsigned Op = Inst.getOpcode();
switch (Op) {
default:
return Op;
case X86::JCC_1:
- return (is16BitMode) ? X86::JCC_2 : X86::JCC_4;
+ return (Is16BitMode) ? X86::JCC_2 : X86::JCC_4;
case X86::JMP_1:
- return (is16BitMode) ? X86::JMP_2 : X86::JMP_4;
+ return (Is16BitMode) ? X86::JMP_2 : X86::JMP_4;
}
}
@@ -275,11 +308,11 @@ static unsigned getRelaxedOpcodeArith(const MCInst &Inst) {
}
}
-static unsigned getRelaxedOpcode(const MCInst &Inst, bool is16BitMode) {
+static unsigned getRelaxedOpcode(const MCInst &Inst, bool Is16BitMode) {
unsigned R = getRelaxedOpcodeArith(Inst);
if (R != Inst.getOpcode())
return R;
- return getRelaxedOpcodeBranch(Inst, is16BitMode);
+ return getRelaxedOpcodeBranch(Inst, Is16BitMode);
}
static X86::CondCode getCondFromBranch(const MCInst &MI,
@@ -316,6 +349,11 @@ static bool isRIPRelative(const MCInst &MI, const MCInstrInfo &MCII) {
return (BaseReg == X86::RIP);
}
+/// Check if the instruction is a prefix.
+static bool isPrefix(const MCInst &MI, const MCInstrInfo &MCII) {
+ return X86II::isPrefix(MCII.get(MI.getOpcode()).TSFlags);
+}
+
/// Check if the instruction is valid as the first instruction in macro fusion.
static bool isFirstMacroFusibleInst(const MCInst &Inst,
const MCInstrInfo &MCII) {
@@ -327,6 +365,69 @@ static bool isFirstMacroFusibleInst(const MCInst &Inst,
return FIK != X86::FirstMacroFusionInstKind::Invalid;
}
+/// X86 can reduce the bytes of NOP by padding instructions with prefixes to
+/// get a better peformance in some cases. Here, we determine which prefix is
+/// the most suitable.
+///
+/// If the instruction has a segment override prefix, use the existing one.
+/// If the target is 64-bit, use the CS.
+/// If the target is 32-bit,
+/// - If the instruction has a ESP/EBP base register, use SS.
+/// - Otherwise use DS.
+uint8_t X86AsmBackend::determinePaddingPrefix(const MCInst &Inst) const {
+ assert((STI.hasFeature(X86::Mode32Bit) || STI.hasFeature(X86::Mode64Bit)) &&
+ "Prefixes can be added only in 32-bit or 64-bit mode.");
+ const MCInstrDesc &Desc = MCII->get(Inst.getOpcode());
+ uint64_t TSFlags = Desc.TSFlags;
+
+ // Determine where the memory operand starts, if present.
+ int MemoryOperand = X86II::getMemoryOperandNo(TSFlags);
+ if (MemoryOperand != -1)
+ MemoryOperand += X86II::getOperandBias(Desc);
+
+ unsigned SegmentReg = 0;
+ if (MemoryOperand >= 0) {
+ // Check for explicit segment override on memory operand.
+ SegmentReg = Inst.getOperand(MemoryOperand + X86::AddrSegmentReg).getReg();
+ }
+
+ switch (TSFlags & X86II::FormMask) {
+ default:
+ break;
+ case X86II::RawFrmDstSrc: {
+ // Check segment override opcode prefix as needed (not for %ds).
+ if (Inst.getOperand(2).getReg() != X86::DS)
+ SegmentReg = Inst.getOperand(2).getReg();
+ break;
+ }
+ case X86II::RawFrmSrc: {
+ // Check segment override opcode prefix as needed (not for %ds).
+ if (Inst.getOperand(1).getReg() != X86::DS)
+ SegmentReg = Inst.getOperand(1).getReg();
+ break;
+ }
+ case X86II::RawFrmMemOffs: {
+ // Check segment override opcode prefix as needed.
+ SegmentReg = Inst.getOperand(1).getReg();
+ break;
+ }
+ }
+
+ if (SegmentReg != 0)
+ return X86::getSegmentOverridePrefixForReg(SegmentReg);
+
+ if (STI.hasFeature(X86::Mode64Bit))
+ return X86::CS_Encoding;
+
+ if (MemoryOperand >= 0) {
+ unsigned BaseRegNum = MemoryOperand + X86::AddrBaseReg;
+ unsigned BaseReg = Inst.getOperand(BaseRegNum).getReg();
+ if (BaseReg == X86::ESP || BaseReg == X86::EBP)
+ return X86::SS_Encoding;
+ }
+ return X86::DS_Encoding;
+}
+
/// Check if the two instructions will be macro-fused on the target cpu.
bool X86AsmBackend::isMacroFused(const MCInst &Cmp, const MCInst &Jcc) const {
const MCInstrDesc &InstDesc = MCII->get(Jcc.getOpcode());
@@ -355,19 +456,122 @@ static bool hasVariantSymbol(const MCInst &MI) {
}
bool X86AsmBackend::allowAutoPadding() const {
- return (AlignBoundary != Align::None() &&
- AlignBranchType != X86::AlignBranchNone);
+ return (AlignBoundary != Align(1) && AlignBranchType != X86::AlignBranchNone);
+}
+
+bool X86AsmBackend::allowEnhancedRelaxation() const {
+ return allowAutoPadding() && TargetPrefixMax != 0 && X86PadForBranchAlign;
+}
+
+/// X86 has certain instructions which enable interrupts exactly one
+/// instruction *after* the instruction which stores to SS. Return true if the
+/// given instruction has such an interrupt delay slot.
+static bool hasInterruptDelaySlot(const MCInst &Inst) {
+ switch (Inst.getOpcode()) {
+ case X86::POPSS16:
+ case X86::POPSS32:
+ case X86::STI:
+ return true;
+
+ case X86::MOV16sr:
+ case X86::MOV32sr:
+ case X86::MOV64sr:
+ case X86::MOV16sm:
+ if (Inst.getOperand(0).getReg() == X86::SS)
+ return true;
+ break;
+ }
+ return false;
+}
+
+/// Check if the instruction to be emitted is right after any data.
+static bool
+isRightAfterData(MCFragment *CurrentFragment,
+ const std::pair<MCFragment *, size_t> &PrevInstPosition) {
+ MCFragment *F = CurrentFragment;
+ // Empty data fragments may be created to prevent further data being
+ // added into the previous fragment, we need to skip them since they
+ // have no contents.
+ for (; isa_and_nonnull<MCDataFragment>(F); F = F->getPrevNode())
+ if (cast<MCDataFragment>(F)->getContents().size() != 0)
+ break;
+
+ // Since data is always emitted into a DataFragment, our check strategy is
+ // simple here.
+ // - If the fragment is a DataFragment
+ // - If it's not the fragment where the previous instruction is,
+ // returns true.
+ // - If it's the fragment holding the previous instruction but its
+ // size changed since the the previous instruction was emitted into
+ // it, returns true.
+ // - Otherwise returns false.
+ // - If the fragment is not a DataFragment, returns false.
+ if (auto *DF = dyn_cast_or_null<MCDataFragment>(F))
+ return DF != PrevInstPosition.first ||
+ DF->getContents().size() != PrevInstPosition.second;
+
+ return false;
+}
+
+/// \returns the fragment size if it has instructions, otherwise returns 0.
+static size_t getSizeForInstFragment(const MCFragment *F) {
+ if (!F || !F->hasInstructions())
+ return 0;
+ // MCEncodedFragmentWithContents being templated makes this tricky.
+ switch (F->getKind()) {
+ default:
+ llvm_unreachable("Unknown fragment with instructions!");
+ case MCFragment::FT_Data:
+ return cast<MCDataFragment>(*F).getContents().size();
+ case MCFragment::FT_Relaxable:
+ return cast<MCRelaxableFragment>(*F).getContents().size();
+ case MCFragment::FT_CompactEncodedInst:
+ return cast<MCCompactEncodedInstFragment>(*F).getContents().size();
+ }
+}
+
+/// Return true if we can insert NOP or prefixes automatically before the
+/// the instruction to be emitted.
+bool X86AsmBackend::canPadInst(const MCInst &Inst, MCObjectStreamer &OS) const {
+ if (hasVariantSymbol(Inst))
+ // Linker may rewrite the instruction with variant symbol operand(e.g.
+ // TLSCALL).
+ return false;
+
+ if (hasInterruptDelaySlot(PrevInst))
+ // If this instruction follows an interrupt enabling instruction with a one
+ // instruction delay, inserting a nop would change behavior.
+ return false;
+
+ if (isPrefix(PrevInst, *MCII))
+ // If this instruction follows a prefix, inserting a nop/prefix would change
+ // semantic.
+ return false;
+
+ if (isPrefix(Inst, *MCII))
+ // If this instruction is a prefix, inserting a prefix would change
+ // semantic.
+ return false;
+
+ if (isRightAfterData(OS.getCurrentFragment(), PrevInstPosition))
+ // If this instruction follows any data, there is no clear
+ // instruction boundary, inserting a nop/prefix would change semantic.
+ return false;
+
+ return true;
}
-bool X86AsmBackend::needAlign(MCObjectStreamer &OS) const {
+bool X86AsmBackend::canPadBranches(MCObjectStreamer &OS) const {
if (!OS.getAllowAutoPadding())
return false;
assert(allowAutoPadding() && "incorrect initialization!");
- MCAssembler &Assembler = OS.getAssembler();
- MCSection *Sec = OS.getCurrentSectionOnly();
+ // We only pad in text section.
+ if (!OS.getCurrentSectionOnly()->getKind().isText())
+ return false;
+
// To be Done: Currently don't deal with Bundle cases.
- if (Assembler.isBundlingEnabled() && Sec->isBundleLocked())
+ if (OS.getAssembler().isBundlingEnabled())
return false;
// Branches only need to be aligned in 32-bit or 64-bit mode.
@@ -377,59 +581,42 @@ bool X86AsmBackend::needAlign(MCObjectStreamer &OS) const {
return true;
}
-/// Check if the instruction operand needs to be aligned. Padding is disabled
-/// before intruction which may be rewritten by linker(e.g. TLSCALL).
-bool X86AsmBackend::needAlignInst(const MCInst &Inst) const {
- // Linker may rewrite the instruction with variant symbol operand.
- if (hasVariantSymbol(Inst))
- return false;
-
- const MCInstrDesc &InstDesc = MCII->get(Inst.getOpcode());
- return (InstDesc.isConditionalBranch() &&
+/// Check if the instruction operand needs to be aligned.
+bool X86AsmBackend::needAlign(const MCInst &Inst) const {
+ const MCInstrDesc &Desc = MCII->get(Inst.getOpcode());
+ return (Desc.isConditionalBranch() &&
(AlignBranchType & X86::AlignBranchJcc)) ||
- (InstDesc.isUnconditionalBranch() &&
+ (Desc.isUnconditionalBranch() &&
(AlignBranchType & X86::AlignBranchJmp)) ||
- (InstDesc.isCall() &&
- (AlignBranchType & X86::AlignBranchCall)) ||
- (InstDesc.isReturn() &&
- (AlignBranchType & X86::AlignBranchRet)) ||
- (InstDesc.isIndirectBranch() &&
+ (Desc.isCall() && (AlignBranchType & X86::AlignBranchCall)) ||
+ (Desc.isReturn() && (AlignBranchType & X86::AlignBranchRet)) ||
+ (Desc.isIndirectBranch() &&
(AlignBranchType & X86::AlignBranchIndirect));
}
-static bool canReuseBoundaryAlignFragment(const MCBoundaryAlignFragment &F) {
- // If a MCBoundaryAlignFragment has not been used to emit NOP,we can reuse it.
- return !F.canEmitNops();
-}
+/// Insert BoundaryAlignFragment before instructions to align branches.
+void X86AsmBackend::emitInstructionBegin(MCObjectStreamer &OS,
+ const MCInst &Inst) {
+ CanPadInst = canPadInst(Inst, OS);
-MCBoundaryAlignFragment *
-X86AsmBackend::getOrCreateBoundaryAlignFragment(MCObjectStreamer &OS) const {
- auto *F = dyn_cast_or_null<MCBoundaryAlignFragment>(OS.getCurrentFragment());
- if (!F || !canReuseBoundaryAlignFragment(*F)) {
- F = new MCBoundaryAlignFragment(AlignBoundary);
- OS.insert(F);
- }
- return F;
-}
+ if (!canPadBranches(OS))
+ return;
+
+ if (!isMacroFused(PrevInst, Inst))
+ // Macro fusion doesn't happen indeed, clear the pending.
+ PendingBA = nullptr;
-/// Insert MCBoundaryAlignFragment before instructions to align branches.
-void X86AsmBackend::alignBranchesBegin(MCObjectStreamer &OS,
- const MCInst &Inst) {
- if (!needAlign(OS))
+ if (!CanPadInst)
return;
- MCFragment *CF = OS.getCurrentFragment();
- bool NeedAlignFused = AlignBranchType & X86::AlignBranchFused;
- if (NeedAlignFused && isMacroFused(PrevInst, Inst) && CF) {
+ if (PendingBA && OS.getCurrentFragment()->getPrevNode() == PendingBA) {
// Macro fusion actually happens and there is no other fragment inserted
- // after the previous instruction. NOP can be emitted in PF to align fused
- // jcc.
- if (auto *PF =
- dyn_cast_or_null<MCBoundaryAlignFragment>(CF->getPrevNode())) {
- const_cast<MCBoundaryAlignFragment *>(PF)->setEmitNops(true);
- const_cast<MCBoundaryAlignFragment *>(PF)->setFused(true);
- }
- } else if (needAlignInst(Inst)) {
+ // after the previous instruction.
+ //
+ // Do nothing here since we already inserted a BoudaryAlign fragment when
+ // we met the first instruction in the fused pair and we'll tie them
+ // together in emitInstructionEnd.
+ //
// Note: When there is at least one fragment, such as MCAlignFragment,
// inserted after the previous instruction, e.g.
//
@@ -441,34 +628,41 @@ void X86AsmBackend::alignBranchesBegin(MCObjectStreamer &OS,
//
// We will treat the JCC as a unfused branch although it may be fused
// with the CMP.
- auto *F = getOrCreateBoundaryAlignFragment(OS);
- F->setEmitNops(true);
- F->setFused(false);
- } else if (NeedAlignFused && isFirstMacroFusibleInst(Inst, *MCII)) {
- // We don't know if macro fusion happens until the reaching the next
- // instruction, so a place holder is put here if necessary.
- getOrCreateBoundaryAlignFragment(OS);
+ return;
}
- PrevInst = Inst;
+ if (needAlign(Inst) || ((AlignBranchType & X86::AlignBranchFused) &&
+ isFirstMacroFusibleInst(Inst, *MCII))) {
+ // If we meet a unfused branch or the first instuction in a fusiable pair,
+ // insert a BoundaryAlign fragment.
+ OS.insert(PendingBA = new MCBoundaryAlignFragment(AlignBoundary));
+ }
}
-/// Insert a MCBoundaryAlignFragment to mark the end of the branch to be aligned
-/// if necessary.
-void X86AsmBackend::alignBranchesEnd(MCObjectStreamer &OS, const MCInst &Inst) {
- if (!needAlign(OS))
+/// Set the last fragment to be aligned for the BoundaryAlignFragment.
+void X86AsmBackend::emitInstructionEnd(MCObjectStreamer &OS, const MCInst &Inst) {
+ PrevInst = Inst;
+ MCFragment *CF = OS.getCurrentFragment();
+ PrevInstPosition = std::make_pair(CF, getSizeForInstFragment(CF));
+ if (auto *F = dyn_cast_or_null<MCRelaxableFragment>(CF))
+ F->setAllowAutoPadding(CanPadInst);
+
+ if (!canPadBranches(OS))
return;
- // If the branch is emitted into a MCRelaxableFragment, we can determine the
- // size of the branch easily in MCAssembler::relaxBoundaryAlign. When the
- // branch is fused, the fused branch(macro fusion pair) must be emitted into
- // two fragments. Or when the branch is unfused, the branch must be emitted
- // into one fragment. The MCRelaxableFragment naturally marks the end of the
- // fused or unfused branch.
- // Otherwise, we need to insert a MCBoundaryAlignFragment to mark the end of
- // the branch. This MCBoundaryAlignFragment may be reused to emit NOP to align
- // other branch.
- if (needAlignInst(Inst) && !isa<MCRelaxableFragment>(OS.getCurrentFragment()))
- OS.insert(new MCBoundaryAlignFragment(AlignBoundary));
+
+ if (!needAlign(Inst) || !PendingBA)
+ return;
+
+ // Tie the aligned instructions into a a pending BoundaryAlign.
+ PendingBA->setLastFragment(CF);
+ PendingBA = nullptr;
+
+ // We need to ensure that further data isn't added to the current
+ // DataFragment, so that we can get the size of instructions later in
+ // MCAssembler::relaxBoundaryAlign. The easiest way is to insert a new empty
+ // DataFragment.
+ if (isa_and_nonnull<MCDataFragment>(CF))
+ OS.insert(new MCDataFragment());
// Update the maximum alignment on the current section if necessary.
MCSection *Sec = OS.getCurrentSectionOnly();
@@ -478,13 +672,23 @@ void X86AsmBackend::alignBranchesEnd(MCObjectStreamer &OS, const MCInst &Inst) {
Optional<MCFixupKind> X86AsmBackend::getFixupKind(StringRef Name) const {
if (STI.getTargetTriple().isOSBinFormatELF()) {
+ unsigned Type;
if (STI.getTargetTriple().getArch() == Triple::x86_64) {
- if (Name == "R_X86_64_NONE")
- return FK_NONE;
+ Type = llvm::StringSwitch<unsigned>(Name)
+#define ELF_RELOC(X, Y) .Case(#X, Y)
+#include "llvm/BinaryFormat/ELFRelocs/x86_64.def"
+#undef ELF_RELOC
+ .Default(-1u);
} else {
- if (Name == "R_386_NONE")
- return FK_NONE;
+ Type = llvm::StringSwitch<unsigned>(Name)
+#define ELF_RELOC(X, Y) .Case(#X, Y)
+#include "llvm/BinaryFormat/ELFRelocs/i386.def"
+#undef ELF_RELOC
+ .Default(-1u);
}
+ if (Type == -1u)
+ return None;
+ return static_cast<MCFixupKind>(FirstLiteralRelocationKind + Type);
}
return MCAsmBackend::getFixupKind(Name);
}
@@ -502,6 +706,11 @@ const MCFixupKindInfo &X86AsmBackend::getFixupKindInfo(MCFixupKind Kind) const {
{"reloc_branch_4byte_pcrel", 0, 32, MCFixupKindInfo::FKF_IsPCRel},
};
+ // Fixup kinds from .reloc directive are like R_386_NONE/R_X86_64_NONE. They
+ // do not require any extra processing.
+ if (Kind >= FirstLiteralRelocationKind)
+ return MCAsmBackend::getFixupKindInfo(FK_NONE);
+
if (Kind < FirstTargetFixupKind)
return MCAsmBackend::getFixupKindInfo(Kind);
@@ -514,7 +723,7 @@ const MCFixupKindInfo &X86AsmBackend::getFixupKindInfo(MCFixupKind Kind) const {
bool X86AsmBackend::shouldForceRelocation(const MCAssembler &,
const MCFixup &Fixup,
const MCValue &) {
- return Fixup.getKind() == FK_NONE;
+ return Fixup.getKind() >= FirstLiteralRelocationKind;
}
static unsigned getFixupKindSize(unsigned Kind) {
@@ -556,7 +765,10 @@ void X86AsmBackend::applyFixup(const MCAssembler &Asm, const MCFixup &Fixup,
MutableArrayRef<char> Data,
uint64_t Value, bool IsResolved,
const MCSubtargetInfo *STI) const {
- unsigned Size = getFixupKindSize(Fixup.getKind());
+ unsigned Kind = Fixup.getKind();
+ if (Kind >= FirstLiteralRelocationKind)
+ return;
+ unsigned Size = getFixupKindSize(Kind);
assert(Fixup.getOffset() + Size <= Data.size() && "Invalid fixup offset!");
@@ -613,12 +825,11 @@ bool X86AsmBackend::fixupNeedsRelaxation(const MCFixup &Fixup,
// FIXME: Can tblgen help at all here to verify there aren't other instructions
// we can relax?
-void X86AsmBackend::relaxInstruction(const MCInst &Inst,
- const MCSubtargetInfo &STI,
- MCInst &Res) const {
+void X86AsmBackend::relaxInstruction(MCInst &Inst,
+ const MCSubtargetInfo &STI) const {
// The only relaxations X86 does is from a 1byte pcrel to a 4byte pcrel.
- bool is16BitMode = STI.getFeatureBits()[X86::Mode16Bit];
- unsigned RelaxedOp = getRelaxedOpcode(Inst, is16BitMode);
+ bool Is16BitMode = STI.getFeatureBits()[X86::Mode16Bit];
+ unsigned RelaxedOp = getRelaxedOpcode(Inst, Is16BitMode);
if (RelaxedOp == Inst.getOpcode()) {
SmallString<256> Tmp;
@@ -628,8 +839,232 @@ void X86AsmBackend::relaxInstruction(const MCInst &Inst,
report_fatal_error("unexpected instruction to relax: " + OS.str());
}
- Res = Inst;
- Res.setOpcode(RelaxedOp);
+ Inst.setOpcode(RelaxedOp);
+}
+
+/// Return true if this instruction has been fully relaxed into it's most
+/// general available form.
+static bool isFullyRelaxed(const MCRelaxableFragment &RF) {
+ auto &Inst = RF.getInst();
+ auto &STI = *RF.getSubtargetInfo();
+ bool Is16BitMode = STI.getFeatureBits()[X86::Mode16Bit];
+ return getRelaxedOpcode(Inst, Is16BitMode) == Inst.getOpcode();
+}
+
+bool X86AsmBackend::padInstructionViaPrefix(MCRelaxableFragment &RF,
+ MCCodeEmitter &Emitter,
+ unsigned &RemainingSize) const {
+ if (!RF.getAllowAutoPadding())
+ return false;
+ // If the instruction isn't fully relaxed, shifting it around might require a
+ // larger value for one of the fixups then can be encoded. The outer loop
+ // will also catch this before moving to the next instruction, but we need to
+ // prevent padding this single instruction as well.
+ if (!isFullyRelaxed(RF))
+ return false;
+
+ const unsigned OldSize = RF.getContents().size();
+ if (OldSize == 15)
+ return false;
+
+ const unsigned MaxPossiblePad = std::min(15 - OldSize, RemainingSize);
+ const unsigned RemainingPrefixSize = [&]() -> unsigned {
+ SmallString<15> Code;
+ raw_svector_ostream VecOS(Code);
+ Emitter.emitPrefix(RF.getInst(), VecOS, STI);
+ assert(Code.size() < 15 && "The number of prefixes must be less than 15.");
+
+ // TODO: It turns out we need a decent amount of plumbing for the target
+ // specific bits to determine number of prefixes its safe to add. Various
+ // targets (older chips mostly, but also Atom family) encounter decoder
+ // stalls with too many prefixes. For testing purposes, we set the value
+ // externally for the moment.
+ unsigned ExistingPrefixSize = Code.size();
+ if (TargetPrefixMax <= ExistingPrefixSize)
+ return 0;
+ return TargetPrefixMax - ExistingPrefixSize;
+ }();
+ const unsigned PrefixBytesToAdd =
+ std::min(MaxPossiblePad, RemainingPrefixSize);
+ if (PrefixBytesToAdd == 0)
+ return false;
+
+ const uint8_t Prefix = determinePaddingPrefix(RF.getInst());
+
+ SmallString<256> Code;
+ Code.append(PrefixBytesToAdd, Prefix);
+ Code.append(RF.getContents().begin(), RF.getContents().end());
+ RF.getContents() = Code;
+
+ // Adjust the fixups for the change in offsets
+ for (auto &F : RF.getFixups()) {
+ F.setOffset(F.getOffset() + PrefixBytesToAdd);
+ }
+
+ RemainingSize -= PrefixBytesToAdd;
+ return true;
+}
+
+bool X86AsmBackend::padInstructionViaRelaxation(MCRelaxableFragment &RF,
+ MCCodeEmitter &Emitter,
+ unsigned &RemainingSize) const {
+ if (isFullyRelaxed(RF))
+ // TODO: There are lots of other tricks we could apply for increasing
+ // encoding size without impacting performance.
+ return false;
+
+ MCInst Relaxed = RF.getInst();
+ relaxInstruction(Relaxed, *RF.getSubtargetInfo());
+
+ SmallVector<MCFixup, 4> Fixups;
+ SmallString<15> Code;
+ raw_svector_ostream VecOS(Code);
+ Emitter.encodeInstruction(Relaxed, VecOS, Fixups, *RF.getSubtargetInfo());
+ const unsigned OldSize = RF.getContents().size();
+ const unsigned NewSize = Code.size();
+ assert(NewSize >= OldSize && "size decrease during relaxation?");
+ unsigned Delta = NewSize - OldSize;
+ if (Delta > RemainingSize)
+ return false;
+ RF.setInst(Relaxed);
+ RF.getContents() = Code;
+ RF.getFixups() = Fixups;
+ RemainingSize -= Delta;
+ return true;
+}
+
+bool X86AsmBackend::padInstructionEncoding(MCRelaxableFragment &RF,
+ MCCodeEmitter &Emitter,
+ unsigned &RemainingSize) const {
+ bool Changed = false;
+ if (RemainingSize != 0)
+ Changed |= padInstructionViaRelaxation(RF, Emitter, RemainingSize);
+ if (RemainingSize != 0)
+ Changed |= padInstructionViaPrefix(RF, Emitter, RemainingSize);
+ return Changed;
+}
+
+void X86AsmBackend::finishLayout(MCAssembler const &Asm,
+ MCAsmLayout &Layout) const {
+ // See if we can further relax some instructions to cut down on the number of
+ // nop bytes required for code alignment. The actual win is in reducing
+ // instruction count, not number of bytes. Modern X86-64 can easily end up
+ // decode limited. It is often better to reduce the number of instructions
+ // (i.e. eliminate nops) even at the cost of increasing the size and
+ // complexity of others.
+ if (!X86PadForAlign && !X86PadForBranchAlign)
+ return;
+
+ DenseSet<MCFragment *> LabeledFragments;
+ for (const MCSymbol &S : Asm.symbols())
+ LabeledFragments.insert(S.getFragment(false));
+
+ for (MCSection &Sec : Asm) {
+ if (!Sec.getKind().isText())
+ continue;
+
+ SmallVector<MCRelaxableFragment *, 4> Relaxable;
+ for (MCSection::iterator I = Sec.begin(), IE = Sec.end(); I != IE; ++I) {
+ MCFragment &F = *I;
+
+ if (LabeledFragments.count(&F))
+ Relaxable.clear();
+
+ if (F.getKind() == MCFragment::FT_Data ||
+ F.getKind() == MCFragment::FT_CompactEncodedInst)
+ // Skip and ignore
+ continue;
+
+ if (F.getKind() == MCFragment::FT_Relaxable) {
+ auto &RF = cast<MCRelaxableFragment>(*I);
+ Relaxable.push_back(&RF);
+ continue;
+ }
+
+ auto canHandle = [](MCFragment &F) -> bool {
+ switch (F.getKind()) {
+ default:
+ return false;
+ case MCFragment::FT_Align:
+ return X86PadForAlign;
+ case MCFragment::FT_BoundaryAlign:
+ return X86PadForBranchAlign;
+ }
+ };
+ // For any unhandled kind, assume we can't change layout.
+ if (!canHandle(F)) {
+ Relaxable.clear();
+ continue;
+ }
+
+#ifndef NDEBUG
+ const uint64_t OrigOffset = Layout.getFragmentOffset(&F);
+#endif
+ const uint64_t OrigSize = Asm.computeFragmentSize(Layout, F);
+
+ // To keep the effects local, prefer to relax instructions closest to
+ // the align directive. This is purely about human understandability
+ // of the resulting code. If we later find a reason to expand
+ // particular instructions over others, we can adjust.
+ MCFragment *FirstChangedFragment = nullptr;
+ unsigned RemainingSize = OrigSize;
+ while (!Relaxable.empty() && RemainingSize != 0) {
+ auto &RF = *Relaxable.pop_back_val();
+ // Give the backend a chance to play any tricks it wishes to increase
+ // the encoding size of the given instruction. Target independent code
+ // will try further relaxation, but target's may play further tricks.
+ if (padInstructionEncoding(RF, Asm.getEmitter(), RemainingSize))
+ FirstChangedFragment = &RF;
+
+ // If we have an instruction which hasn't been fully relaxed, we can't
+ // skip past it and insert bytes before it. Changing its starting
+ // offset might require a larger negative offset than it can encode.
+ // We don't need to worry about larger positive offsets as none of the
+ // possible offsets between this and our align are visible, and the
+ // ones afterwards aren't changing.
+ if (!isFullyRelaxed(RF))
+ break;
+ }
+ Relaxable.clear();
+
+ if (FirstChangedFragment) {
+ // Make sure the offsets for any fragments in the effected range get
+ // updated. Note that this (conservatively) invalidates the offsets of
+ // those following, but this is not required.
+ Layout.invalidateFragmentsFrom(FirstChangedFragment);
+ }
+
+ // BoundaryAlign explicitly tracks it's size (unlike align)
+ if (F.getKind() == MCFragment::FT_BoundaryAlign)
+ cast<MCBoundaryAlignFragment>(F).setSize(RemainingSize);
+
+#ifndef NDEBUG
+ const uint64_t FinalOffset = Layout.getFragmentOffset(&F);
+ const uint64_t FinalSize = Asm.computeFragmentSize(Layout, F);
+ assert(OrigOffset + OrigSize == FinalOffset + FinalSize &&
+ "can't move start of next fragment!");
+ assert(FinalSize == RemainingSize && "inconsistent size computation?");
+#endif
+
+ // If we're looking at a boundary align, make sure we don't try to pad
+ // its target instructions for some following directive. Doing so would
+ // break the alignment of the current boundary align.
+ if (auto *BF = dyn_cast<MCBoundaryAlignFragment>(&F)) {
+ const MCFragment *LastFragment = BF->getLastFragment();
+ if (!LastFragment)
+ continue;
+ while (&*I != LastFragment)
+ ++I;
+ }
+ }
+ }
+
+ // The layout is done. Mark every fragment as valid.
+ for (unsigned int i = 0, n = Layout.getSectionOrder().size(); i != n; ++i) {
+ MCSection &Section = *Layout.getSectionOrder()[i];
+ Layout.getFragmentOffset(&*Section.getFragmentList().rbegin());
+ Asm.computeFragmentSize(Layout, *Section.getFragmentList().rbegin());
+ }
}
/// Write a sequence of optimal nops to the output, covering \p Count
@@ -661,7 +1096,7 @@ bool X86AsmBackend::writeNopData(raw_ostream &OS, uint64_t Count) const {
// This CPU doesn't support long nops. If needed add more.
// FIXME: We could generated something better than plain 0x90.
- if (!STI.getFeatureBits()[X86::FeatureNOPL]) {
+ if (!STI.hasFeature(X86::FeatureNOPL) && !STI.hasFeature(X86::Mode64Bit)) {
for (uint64_t i = 0; i < Count; ++i)
OS << '\x90';
return true;
@@ -670,7 +1105,7 @@ bool X86AsmBackend::writeNopData(raw_ostream &OS, uint64_t Count) const {
// 15-bytes is the longest single NOP instruction, but 10-bytes is
// commonly the longest that can be efficiently decoded.
uint64_t MaxNopLength = 10;
- if (STI.getFeatureBits()[X86::ProcIntelSLM])
+ if (STI.getFeatureBits()[X86::FeatureFast7ByteNOP])
MaxNopLength = 7;
else if (STI.getFeatureBits()[X86::FeatureFast15ByteNOP])
MaxNopLength = 15;
@@ -811,6 +1246,7 @@ class DarwinX86AsmBackend : public X86AsmBackend {
enum { CU_NUM_SAVED_REGS = 6 };
mutable unsigned SavedRegs[CU_NUM_SAVED_REGS];
+ Triple TT;
bool Is64Bit;
unsigned OffsetSize; ///< Offset of a "push" instruction.
@@ -838,10 +1274,140 @@ protected:
return 1;
}
+private:
+ /// Get the compact unwind number for a given register. The number
+ /// corresponds to the enum lists in compact_unwind_encoding.h.
+ int getCompactUnwindRegNum(unsigned Reg) const {
+ static const MCPhysReg CU32BitRegs[7] = {
+ X86::EBX, X86::ECX, X86::EDX, X86::EDI, X86::ESI, X86::EBP, 0
+ };
+ static const MCPhysReg CU64BitRegs[] = {
+ X86::RBX, X86::R12, X86::R13, X86::R14, X86::R15, X86::RBP, 0
+ };
+ const MCPhysReg *CURegs = Is64Bit ? CU64BitRegs : CU32BitRegs;
+ for (int Idx = 1; *CURegs; ++CURegs, ++Idx)
+ if (*CURegs == Reg)
+ return Idx;
+
+ return -1;
+ }
+
+ /// Return the registers encoded for a compact encoding with a frame
+ /// pointer.
+ uint32_t encodeCompactUnwindRegistersWithFrame() const {
+ // Encode the registers in the order they were saved --- 3-bits per
+ // register. The list of saved registers is assumed to be in reverse
+ // order. The registers are numbered from 1 to CU_NUM_SAVED_REGS.
+ uint32_t RegEnc = 0;
+ for (int i = 0, Idx = 0; i != CU_NUM_SAVED_REGS; ++i) {
+ unsigned Reg = SavedRegs[i];
+ if (Reg == 0) break;
+
+ int CURegNum = getCompactUnwindRegNum(Reg);
+ if (CURegNum == -1) return ~0U;
+
+ // Encode the 3-bit register number in order, skipping over 3-bits for
+ // each register.
+ RegEnc |= (CURegNum & 0x7) << (Idx++ * 3);
+ }
+
+ assert((RegEnc & 0x3FFFF) == RegEnc &&
+ "Invalid compact register encoding!");
+ return RegEnc;
+ }
+
+ /// Create the permutation encoding used with frameless stacks. It is
+ /// passed the number of registers to be saved and an array of the registers
+ /// saved.
+ uint32_t encodeCompactUnwindRegistersWithoutFrame(unsigned RegCount) const {
+ // The saved registers are numbered from 1 to 6. In order to encode the
+ // order in which they were saved, we re-number them according to their
+ // place in the register order. The re-numbering is relative to the last
+ // re-numbered register. E.g., if we have registers {6, 2, 4, 5} saved in
+ // that order:
+ //
+ // Orig Re-Num
+ // ---- ------
+ // 6 6
+ // 2 2
+ // 4 3
+ // 5 3
+ //
+ for (unsigned i = 0; i < RegCount; ++i) {
+ int CUReg = getCompactUnwindRegNum(SavedRegs[i]);
+ if (CUReg == -1) return ~0U;
+ SavedRegs[i] = CUReg;
+ }
+
+ // Reverse the list.
+ std::reverse(&SavedRegs[0], &SavedRegs[CU_NUM_SAVED_REGS]);
+
+ uint32_t RenumRegs[CU_NUM_SAVED_REGS];
+ for (unsigned i = CU_NUM_SAVED_REGS - RegCount; i < CU_NUM_SAVED_REGS; ++i){
+ unsigned Countless = 0;
+ for (unsigned j = CU_NUM_SAVED_REGS - RegCount; j < i; ++j)
+ if (SavedRegs[j] < SavedRegs[i])
+ ++Countless;
+
+ RenumRegs[i] = SavedRegs[i] - Countless - 1;
+ }
+
+ // Take the renumbered values and encode them into a 10-bit number.
+ uint32_t permutationEncoding = 0;
+ switch (RegCount) {
+ case 6:
+ permutationEncoding |= 120 * RenumRegs[0] + 24 * RenumRegs[1]
+ + 6 * RenumRegs[2] + 2 * RenumRegs[3]
+ + RenumRegs[4];
+ break;
+ case 5:
+ permutationEncoding |= 120 * RenumRegs[1] + 24 * RenumRegs[2]
+ + 6 * RenumRegs[3] + 2 * RenumRegs[4]
+ + RenumRegs[5];
+ break;
+ case 4:
+ permutationEncoding |= 60 * RenumRegs[2] + 12 * RenumRegs[3]
+ + 3 * RenumRegs[4] + RenumRegs[5];
+ break;
+ case 3:
+ permutationEncoding |= 20 * RenumRegs[3] + 4 * RenumRegs[4]
+ + RenumRegs[5];
+ break;
+ case 2:
+ permutationEncoding |= 5 * RenumRegs[4] + RenumRegs[5];
+ break;
+ case 1:
+ permutationEncoding |= RenumRegs[5];
+ break;
+ }
+
+ assert((permutationEncoding & 0x3FF) == permutationEncoding &&
+ "Invalid compact register encoding!");
+ return permutationEncoding;
+ }
+
+public:
+ DarwinX86AsmBackend(const Target &T, const MCRegisterInfo &MRI,
+ const MCSubtargetInfo &STI)
+ : X86AsmBackend(T, STI), MRI(MRI), TT(STI.getTargetTriple()),
+ Is64Bit(TT.isArch64Bit()) {
+ memset(SavedRegs, 0, sizeof(SavedRegs));
+ OffsetSize = Is64Bit ? 8 : 4;
+ MoveInstrSize = Is64Bit ? 3 : 2;
+ StackDivide = Is64Bit ? 8 : 4;
+ }
+
+ std::unique_ptr<MCObjectTargetWriter>
+ createObjectTargetWriter() const override {
+ uint32_t CPUType = cantFail(MachO::getCPUType(TT));
+ uint32_t CPUSubType = cantFail(MachO::getCPUSubType(TT));
+ return createX86MachObjectWriter(Is64Bit, CPUType, CPUSubType);
+ }
+
/// Implementation of algorithm to generate the compact unwind encoding
/// for the CFI instructions.
uint32_t
- generateCompactUnwindEncodingImpl(ArrayRef<MCCFIInstruction> Instrs) const {
+ generateCompactUnwindEncoding(ArrayRef<MCCFIInstruction> Instrs) const override {
if (Instrs.empty()) return 0;
// Reset the saved registers.
@@ -904,7 +1470,7 @@ protected:
// L0:
// .cfi_def_cfa_offset 80
//
- StackSize = std::abs(Inst.getOffset()) / StackDivide;
+ StackSize = Inst.getOffset() / StackDivide;
++NumDefCFAOffsets;
break;
}
@@ -991,168 +1557,6 @@ protected:
return CompactUnwindEncoding;
}
-
-private:
- /// Get the compact unwind number for a given register. The number
- /// corresponds to the enum lists in compact_unwind_encoding.h.
- int getCompactUnwindRegNum(unsigned Reg) const {
- static const MCPhysReg CU32BitRegs[7] = {
- X86::EBX, X86::ECX, X86::EDX, X86::EDI, X86::ESI, X86::EBP, 0
- };
- static const MCPhysReg CU64BitRegs[] = {
- X86::RBX, X86::R12, X86::R13, X86::R14, X86::R15, X86::RBP, 0
- };
- const MCPhysReg *CURegs = Is64Bit ? CU64BitRegs : CU32BitRegs;
- for (int Idx = 1; *CURegs; ++CURegs, ++Idx)
- if (*CURegs == Reg)
- return Idx;
-
- return -1;
- }
-
- /// Return the registers encoded for a compact encoding with a frame
- /// pointer.
- uint32_t encodeCompactUnwindRegistersWithFrame() const {
- // Encode the registers in the order they were saved --- 3-bits per
- // register. The list of saved registers is assumed to be in reverse
- // order. The registers are numbered from 1 to CU_NUM_SAVED_REGS.
- uint32_t RegEnc = 0;
- for (int i = 0, Idx = 0; i != CU_NUM_SAVED_REGS; ++i) {
- unsigned Reg = SavedRegs[i];
- if (Reg == 0) break;
-
- int CURegNum = getCompactUnwindRegNum(Reg);
- if (CURegNum == -1) return ~0U;
-
- // Encode the 3-bit register number in order, skipping over 3-bits for
- // each register.
- RegEnc |= (CURegNum & 0x7) << (Idx++ * 3);
- }
-
- assert((RegEnc & 0x3FFFF) == RegEnc &&
- "Invalid compact register encoding!");
- return RegEnc;
- }
-
- /// Create the permutation encoding used with frameless stacks. It is
- /// passed the number of registers to be saved and an array of the registers
- /// saved.
- uint32_t encodeCompactUnwindRegistersWithoutFrame(unsigned RegCount) const {
- // The saved registers are numbered from 1 to 6. In order to encode the
- // order in which they were saved, we re-number them according to their
- // place in the register order. The re-numbering is relative to the last
- // re-numbered register. E.g., if we have registers {6, 2, 4, 5} saved in
- // that order:
- //
- // Orig Re-Num
- // ---- ------
- // 6 6
- // 2 2
- // 4 3
- // 5 3
- //
- for (unsigned i = 0; i < RegCount; ++i) {
- int CUReg = getCompactUnwindRegNum(SavedRegs[i]);
- if (CUReg == -1) return ~0U;
- SavedRegs[i] = CUReg;
- }
-
- // Reverse the list.
- std::reverse(&SavedRegs[0], &SavedRegs[CU_NUM_SAVED_REGS]);
-
- uint32_t RenumRegs[CU_NUM_SAVED_REGS];
- for (unsigned i = CU_NUM_SAVED_REGS - RegCount; i < CU_NUM_SAVED_REGS; ++i){
- unsigned Countless = 0;
- for (unsigned j = CU_NUM_SAVED_REGS - RegCount; j < i; ++j)
- if (SavedRegs[j] < SavedRegs[i])
- ++Countless;
-
- RenumRegs[i] = SavedRegs[i] - Countless - 1;
- }
-
- // Take the renumbered values and encode them into a 10-bit number.
- uint32_t permutationEncoding = 0;
- switch (RegCount) {
- case 6:
- permutationEncoding |= 120 * RenumRegs[0] + 24 * RenumRegs[1]
- + 6 * RenumRegs[2] + 2 * RenumRegs[3]
- + RenumRegs[4];
- break;
- case 5:
- permutationEncoding |= 120 * RenumRegs[1] + 24 * RenumRegs[2]
- + 6 * RenumRegs[3] + 2 * RenumRegs[4]
- + RenumRegs[5];
- break;
- case 4:
- permutationEncoding |= 60 * RenumRegs[2] + 12 * RenumRegs[3]
- + 3 * RenumRegs[4] + RenumRegs[5];
- break;
- case 3:
- permutationEncoding |= 20 * RenumRegs[3] + 4 * RenumRegs[4]
- + RenumRegs[5];
- break;
- case 2:
- permutationEncoding |= 5 * RenumRegs[4] + RenumRegs[5];
- break;
- case 1:
- permutationEncoding |= RenumRegs[5];
- break;
- }
-
- assert((permutationEncoding & 0x3FF) == permutationEncoding &&
- "Invalid compact register encoding!");
- return permutationEncoding;
- }
-
-public:
- DarwinX86AsmBackend(const Target &T, const MCRegisterInfo &MRI,
- const MCSubtargetInfo &STI, bool Is64Bit)
- : X86AsmBackend(T, STI), MRI(MRI), Is64Bit(Is64Bit) {
- memset(SavedRegs, 0, sizeof(SavedRegs));
- OffsetSize = Is64Bit ? 8 : 4;
- MoveInstrSize = Is64Bit ? 3 : 2;
- StackDivide = Is64Bit ? 8 : 4;
- }
-};
-
-class DarwinX86_32AsmBackend : public DarwinX86AsmBackend {
-public:
- DarwinX86_32AsmBackend(const Target &T, const MCRegisterInfo &MRI,
- const MCSubtargetInfo &STI)
- : DarwinX86AsmBackend(T, MRI, STI, false) {}
-
- std::unique_ptr<MCObjectTargetWriter>
- createObjectTargetWriter() const override {
- return createX86MachObjectWriter(/*Is64Bit=*/false,
- MachO::CPU_TYPE_I386,
- MachO::CPU_SUBTYPE_I386_ALL);
- }
-
- /// Generate the compact unwind encoding for the CFI instructions.
- uint32_t generateCompactUnwindEncoding(
- ArrayRef<MCCFIInstruction> Instrs) const override {
- return generateCompactUnwindEncodingImpl(Instrs);
- }
-};
-
-class DarwinX86_64AsmBackend : public DarwinX86AsmBackend {
- const MachO::CPUSubTypeX86 Subtype;
-public:
- DarwinX86_64AsmBackend(const Target &T, const MCRegisterInfo &MRI,
- const MCSubtargetInfo &STI, MachO::CPUSubTypeX86 st)
- : DarwinX86AsmBackend(T, MRI, STI, true), Subtype(st) {}
-
- std::unique_ptr<MCObjectTargetWriter>
- createObjectTargetWriter() const override {
- return createX86MachObjectWriter(/*Is64Bit=*/true, MachO::CPU_TYPE_X86_64,
- Subtype);
- }
-
- /// Generate the compact unwind encoding for the CFI instructions.
- uint32_t generateCompactUnwindEncoding(
- ArrayRef<MCCFIInstruction> Instrs) const override {
- return generateCompactUnwindEncodingImpl(Instrs);
- }
};
} // end anonymous namespace
@@ -1163,7 +1567,7 @@ MCAsmBackend *llvm::createX86_32AsmBackend(const Target &T,
const MCTargetOptions &Options) {
const Triple &TheTriple = STI.getTargetTriple();
if (TheTriple.isOSBinFormatMachO())
- return new DarwinX86_32AsmBackend(T, MRI, STI);
+ return new DarwinX86AsmBackend(T, MRI, STI);
if (TheTriple.isOSWindows() && TheTriple.isOSBinFormatCOFF())
return new WindowsX86AsmBackend(T, false, STI);
@@ -1181,13 +1585,8 @@ MCAsmBackend *llvm::createX86_64AsmBackend(const Target &T,
const MCRegisterInfo &MRI,
const MCTargetOptions &Options) {
const Triple &TheTriple = STI.getTargetTriple();
- if (TheTriple.isOSBinFormatMachO()) {
- MachO::CPUSubTypeX86 CS =
- StringSwitch<MachO::CPUSubTypeX86>(TheTriple.getArchName())
- .Case("x86_64h", MachO::CPU_SUBTYPE_X86_64_H)
- .Default(MachO::CPU_SUBTYPE_X86_64_ALL);
- return new DarwinX86_64AsmBackend(T, MRI, STI, CS);
- }
+ if (TheTriple.isOSBinFormatMachO())
+ return new DarwinX86AsmBackend(T, MRI, STI);
if (TheTriple.isOSWindows() && TheTriple.isOSBinFormatCOFF())
return new WindowsX86AsmBackend(T, true, STI);