diff options
Diffstat (limited to 'lib/Target/X86')
48 files changed, 2308 insertions, 1079 deletions
diff --git a/lib/Target/X86/AsmParser/X86AsmParser.cpp b/lib/Target/X86/AsmParser/X86AsmParser.cpp index c352bfcd8cce..d45dd352fbc4 100644 --- a/lib/Target/X86/AsmParser/X86AsmParser.cpp +++ b/lib/Target/X86/AsmParser/X86AsmParser.cpp @@ -15,9 +15,11 @@ #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCInst.h" +#include "llvm/MC/MCSubtargetInfo.h" #include "llvm/MC/MCParser/MCAsmLexer.h" #include "llvm/MC/MCParser/MCAsmParser.h" #include "llvm/MC/MCParser/MCParsedAsmOperand.h" +#include "llvm/ADT/OwningPtr.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringExtras.h" @@ -25,17 +27,15 @@ #include "llvm/ADT/Twine.h" #include "llvm/Support/SourceMgr.h" #include "llvm/Support/raw_ostream.h" + using namespace llvm; namespace { struct X86Operand; class X86ATTAsmParser : public TargetAsmParser { + MCSubtargetInfo &STI; MCAsmParser &Parser; - TargetMachine &TM; - -protected: - unsigned Is64Bit : 1; private: MCAsmParser &getParser() const { return Parser; } @@ -61,6 +61,11 @@ private: /// or %es:(%edi) in 32bit mode. bool isDstOp(X86Operand &Op); + bool is64BitMode() const { + // FIXME: Can tablegen auto-generate this? + return (STI.getFeatureBits() & X86::Mode64Bit) != 0; + } + /// @name Auto-generated Matcher Functions /// { @@ -70,12 +75,11 @@ private: /// } public: - X86ATTAsmParser(const Target &T, MCAsmParser &parser, TargetMachine &TM) - : TargetAsmParser(T), Parser(parser), TM(TM) { + X86ATTAsmParser(MCSubtargetInfo &sti, MCAsmParser &parser) + : TargetAsmParser(), STI(sti), Parser(parser) { // Initialize the set of available features. - setAvailableFeatures(ComputeAvailableFeatures( - &TM.getSubtarget<X86Subtarget>())); + setAvailableFeatures(ComputeAvailableFeatures(STI.getFeatureBits())); } virtual bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc); @@ -84,23 +88,6 @@ public: virtual bool ParseDirective(AsmToken DirectiveID); }; - -class X86_32ATTAsmParser : public X86ATTAsmParser { -public: - X86_32ATTAsmParser(const Target &T, MCAsmParser &Parser, TargetMachine &TM) - : X86ATTAsmParser(T, Parser, TM) { - Is64Bit = false; - } -}; - -class X86_64ATTAsmParser : public X86ATTAsmParser { -public: - X86_64ATTAsmParser(const Target &T, MCAsmParser &Parser, TargetMachine &TM) - : X86ATTAsmParser(T, Parser, TM) { - Is64Bit = true; - } -}; - } // end anonymous namespace /// @name Auto-generated Match Functions @@ -155,7 +142,7 @@ struct X86Operand : public MCParsedAsmOperand { /// getEndLoc - Get the location of the last token of this operand. SMLoc getEndLoc() const { return EndLoc; } - virtual void dump(raw_ostream &OS) const {} + virtual void print(raw_ostream &OS) const {} StringRef getToken() const { assert(Kind == Token && "Invalid access!"); @@ -365,7 +352,7 @@ struct X86Operand : public MCParsedAsmOperand { } // end anonymous namespace. bool X86ATTAsmParser::isSrcOp(X86Operand &Op) { - unsigned basereg = Is64Bit ? X86::RSI : X86::ESI; + unsigned basereg = is64BitMode() ? X86::RSI : X86::ESI; return (Op.isMem() && (Op.Mem.SegReg == 0 || Op.Mem.SegReg == X86::DS) && @@ -375,7 +362,7 @@ bool X86ATTAsmParser::isSrcOp(X86Operand &Op) { } bool X86ATTAsmParser::isDstOp(X86Operand &Op) { - unsigned basereg = Is64Bit ? X86::RDI : X86::EDI; + unsigned basereg = is64BitMode() ? X86::RDI : X86::EDI; return Op.isMem() && Op.Mem.SegReg == X86::ES && isa<MCConstantExpr>(Op.Mem.Disp) && @@ -406,7 +393,7 @@ bool X86ATTAsmParser::ParseRegister(unsigned &RegNo, // FIXME: This should be done using Requires<In32BitMode> and // Requires<In64BitMode> so "eiz" usage in 64-bit instructions // can be also checked. - if (RegNo == X86::RIZ && !Is64Bit) + if (RegNo == X86::RIZ && !is64BitMode()) return Error(Tok.getLoc(), "riz register in 64-bit mode only"); // Parse "%st" as "%st(0)" and "%st(1)", which is multiple tokens. @@ -710,23 +697,6 @@ ParseInstruction(StringRef Name, SMLoc NameLoc, } } - // FIXME: Hack to recognize vpclmul<src1_quadword, src2_quadword>dq - if (PatchedName.startswith("vpclmul")) { - unsigned CLMULQuadWordSelect = StringSwitch<unsigned>( - PatchedName.slice(7, PatchedName.size() - 2)) - .Case("lqlq", 0x00) // src1[63:0], src2[63:0] - .Case("hqlq", 0x01) // src1[127:64], src2[63:0] - .Case("lqhq", 0x10) // src1[63:0], src2[127:64] - .Case("hqhq", 0x11) // src1[127:64], src2[127:64] - .Default(~0U); - if (CLMULQuadWordSelect != ~0U) { - ExtraImmOp = MCConstantExpr::Create(CLMULQuadWordSelect, - getParser().getContext()); - assert(PatchedName.endswith("dq") && "Unexpected mnemonic!"); - PatchedName = "vpclmulqdq"; - } - } - Operands.push_back(X86Operand::CreateToken(PatchedName, NameLoc)); if (ExtraImmOp) @@ -843,7 +813,7 @@ ParseInstruction(StringRef Name, SMLoc NameLoc, // Transform "movs[bwl] %ds:(%esi), %es:(%edi)" into "movs[bwl]" if (Name.startswith("movs") && Operands.size() == 3 && (Name == "movsb" || Name == "movsw" || Name == "movsl" || - (Is64Bit && Name == "movsq"))) { + (is64BitMode() && Name == "movsq"))) { X86Operand &Op = *(X86Operand*)Operands.begin()[1]; X86Operand &Op2 = *(X86Operand*)Operands.begin()[2]; if (isSrcOp(Op) && isDstOp(Op2)) { @@ -856,7 +826,7 @@ ParseInstruction(StringRef Name, SMLoc NameLoc, // Transform "lods[bwl] %ds:(%esi),{%al,%ax,%eax,%rax}" into "lods[bwl]" if (Name.startswith("lods") && Operands.size() == 3 && (Name == "lods" || Name == "lodsb" || Name == "lodsw" || - Name == "lodsl" || (Is64Bit && Name == "lodsq"))) { + Name == "lodsl" || (is64BitMode() && Name == "lodsq"))) { X86Operand *Op1 = static_cast<X86Operand*>(Operands[1]); X86Operand *Op2 = static_cast<X86Operand*>(Operands[2]); if (isSrcOp(*Op1) && Op2->isReg()) { @@ -886,7 +856,7 @@ ParseInstruction(StringRef Name, SMLoc NameLoc, // Transform "stos[bwl] {%al,%ax,%eax,%rax},%es:(%edi)" into "stos[bwl]" if (Name.startswith("stos") && Operands.size() == 3 && (Name == "stos" || Name == "stosb" || Name == "stosw" || - Name == "stosl" || (Is64Bit && Name == "stosq"))) { + Name == "stosl" || (is64BitMode() && Name == "stosq"))) { X86Operand *Op1 = static_cast<X86Operand*>(Operands[1]); X86Operand *Op2 = static_cast<X86Operand*>(Operands[2]); if (isDstOp(*Op2) && Op1->isReg()) { @@ -1161,8 +1131,8 @@ extern "C" void LLVMInitializeX86AsmLexer(); // Force static initialization. extern "C" void LLVMInitializeX86AsmParser() { - RegisterAsmParser<X86_32ATTAsmParser> X(TheX86_32Target); - RegisterAsmParser<X86_64ATTAsmParser> Y(TheX86_64Target); + RegisterAsmParser<X86ATTAsmParser> X(TheX86_32Target); + RegisterAsmParser<X86ATTAsmParser> Y(TheX86_64Target); LLVMInitializeX86AsmLexer(); } diff --git a/lib/Target/X86/CMakeLists.txt b/lib/Target/X86/CMakeLists.txt index b5fa94f12bc7..b112f9ff69bb 100644 --- a/lib/Target/X86/CMakeLists.txt +++ b/lib/Target/X86/CMakeLists.txt @@ -1,18 +1,15 @@ set(LLVM_TARGET_DEFINITIONS X86.td) -tablegen(X86GenRegisterInfo.h.inc -gen-register-desc-header) -tablegen(X86GenRegisterNames.inc -gen-register-enums) -tablegen(X86GenRegisterInfo.inc -gen-register-desc) +tablegen(X86GenRegisterInfo.inc -gen-register-info) tablegen(X86GenDisassemblerTables.inc -gen-disassembler) -tablegen(X86GenInstrNames.inc -gen-instr-enums) -tablegen(X86GenInstrInfo.inc -gen-instr-desc) +tablegen(X86GenInstrInfo.inc -gen-instr-info) tablegen(X86GenAsmWriter.inc -gen-asm-writer) tablegen(X86GenAsmWriter1.inc -gen-asm-writer -asmwriternum=1) tablegen(X86GenAsmMatcher.inc -gen-asm-matcher) tablegen(X86GenDAGISel.inc -gen-dag-isel) tablegen(X86GenFastISel.inc -gen-fast-isel) tablegen(X86GenCallingConv.inc -gen-callingconv) -tablegen(X86GenSubtarget.inc -gen-subtarget) +tablegen(X86GenSubtargetInfo.inc -gen-subtarget) tablegen(X86GenEDInfo.inc -gen-enhanced-disassembly-info) set(sources @@ -30,7 +27,6 @@ set(sources X86InstrInfo.cpp X86JITInfo.cpp X86MachObjectWriter.cpp - X86MCAsmInfo.cpp X86MCCodeEmitter.cpp X86MCInstLower.cpp X86RegisterInfo.cpp @@ -60,5 +56,6 @@ add_llvm_target(X86CodeGen ${sources}) add_subdirectory(AsmParser) add_subdirectory(Disassembler) add_subdirectory(InstPrinter) +add_subdirectory(MCTargetDesc) add_subdirectory(TargetInfo) add_subdirectory(Utils) diff --git a/lib/Target/X86/Disassembler/X86Disassembler.cpp b/lib/Target/X86/Disassembler/X86Disassembler.cpp index d8a105e7e9d2..4a0d2ec727a9 100644 --- a/lib/Target/X86/Disassembler/X86Disassembler.cpp +++ b/lib/Target/X86/Disassembler/X86Disassembler.cpp @@ -26,7 +26,8 @@ #include "llvm/Support/MemoryObject.h" #include "llvm/Support/raw_ostream.h" -#include "X86GenRegisterNames.inc" +#define GET_REGINFO_ENUM +#include "X86GenRegisterInfo.inc" #include "X86GenEDInfo.inc" using namespace llvm; diff --git a/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp b/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp index 68247d2f1a5b..c37d8797b39c 100644 --- a/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp +++ b/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp @@ -15,30 +15,23 @@ #define DEBUG_TYPE "asm-printer" #include "X86ATTInstPrinter.h" #include "X86InstComments.h" -#include "X86Subtarget.h" +#include "MCTargetDesc/X86MCTargetDesc.h" #include "llvm/MC/MCInst.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCExpr.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/Format.h" #include "llvm/Support/FormattedStream.h" -#include "X86GenInstrNames.inc" #include <map> using namespace llvm; // Include the auto-generated portion of the assembly writer. #define GET_INSTRUCTION_NAME #define PRINT_ALIAS_INSTR -#include "X86GenRegisterNames.inc" #include "X86GenAsmWriter.inc" -#undef PRINT_ALIAS_INSTR -#undef GET_INSTRUCTION_NAME -X86ATTInstPrinter::X86ATTInstPrinter(TargetMachine &TM, const MCAsmInfo &MAI) +X86ATTInstPrinter::X86ATTInstPrinter(const MCAsmInfo &MAI) : MCInstPrinter(MAI) { - // Initialize the set of available features. - setAvailableFeatures(ComputeAvailableFeatures( - &TM.getSubtarget<X86Subtarget>())); } void X86ATTInstPrinter::printRegName(raw_ostream &OS, diff --git a/lib/Target/X86/InstPrinter/X86ATTInstPrinter.h b/lib/Target/X86/InstPrinter/X86ATTInstPrinter.h index 5f939b61da21..5426e5cf38d9 100644 --- a/lib/Target/X86/InstPrinter/X86ATTInstPrinter.h +++ b/lib/Target/X86/InstPrinter/X86ATTInstPrinter.h @@ -19,19 +19,15 @@ namespace llvm { class MCOperand; -class X86Subtarget; -class TargetMachine; class X86ATTInstPrinter : public MCInstPrinter { public: - X86ATTInstPrinter(TargetMachine &TM, const MCAsmInfo &MAI); + X86ATTInstPrinter(const MCAsmInfo &MAI); virtual void printRegName(raw_ostream &OS, unsigned RegNo) const; virtual void printInst(const MCInst *MI, raw_ostream &OS); virtual StringRef getOpcodeName(unsigned Opcode) const; - // Methods used to print the alias of an instruction. - unsigned ComputeAvailableFeatures(const X86Subtarget *Subtarget) const; // Autogenerated by tblgen, returns true if we successfully printed an // alias. bool printAliasInstr(const MCInst *MI, raw_ostream &OS); diff --git a/lib/Target/X86/InstPrinter/X86InstComments.cpp b/lib/Target/X86/InstPrinter/X86InstComments.cpp index c642acc3b9a2..4e28dfe7fa81 100644 --- a/lib/Target/X86/InstPrinter/X86InstComments.cpp +++ b/lib/Target/X86/InstPrinter/X86InstComments.cpp @@ -13,7 +13,7 @@ //===----------------------------------------------------------------------===// #include "X86InstComments.h" -#include "X86GenInstrNames.inc" +#include "MCTargetDesc/X86MCTargetDesc.h" #include "llvm/MC/MCInst.h" #include "llvm/Support/raw_ostream.h" #include "../Utils/X86ShuffleDecode.h" diff --git a/lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp b/lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp index 5f581bab3906..506e26cbf7cd 100644 --- a/lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp +++ b/lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp @@ -15,13 +15,12 @@ #define DEBUG_TYPE "asm-printer" #include "X86IntelInstPrinter.h" #include "X86InstComments.h" -#include "X86Subtarget.h" +#include "MCTargetDesc/X86MCTargetDesc.h" #include "llvm/MC/MCInst.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCExpr.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/FormattedStream.h" -#include "X86GenInstrNames.inc" #include <cctype> using namespace llvm; diff --git a/lib/Target/X86/InstPrinter/X86IntelInstPrinter.h b/lib/Target/X86/InstPrinter/X86IntelInstPrinter.h index c8030c3ecdac..e84a1940017d 100644 --- a/lib/Target/X86/InstPrinter/X86IntelInstPrinter.h +++ b/lib/Target/X86/InstPrinter/X86IntelInstPrinter.h @@ -20,11 +20,10 @@ namespace llvm { class MCOperand; -class TargetMachine; class X86IntelInstPrinter : public MCInstPrinter { public: - X86IntelInstPrinter(TargetMachine &TM, const MCAsmInfo &MAI) + X86IntelInstPrinter(const MCAsmInfo &MAI) : MCInstPrinter(MAI) {} virtual void printRegName(raw_ostream &OS, unsigned RegNo) const; diff --git a/lib/Target/X86/MCTargetDesc/CMakeLists.txt b/lib/Target/X86/MCTargetDesc/CMakeLists.txt new file mode 100644 index 000000000000..ca88f8ffd08c --- /dev/null +++ b/lib/Target/X86/MCTargetDesc/CMakeLists.txt @@ -0,0 +1,7 @@ +add_llvm_library(LLVMX86Desc + X86MCTargetDesc.cpp + X86MCAsmInfo.cpp + ) + +# Hack: we need to include 'main' target directory to grab private headers +include_directories(${CMAKE_CURRENT_SOURCE_DIR}/.. ${CMAKE_CURRENT_BINARY_DIR}/..) diff --git a/lib/Target/X86/MCTargetDesc/Makefile b/lib/Target/X86/MCTargetDesc/Makefile new file mode 100644 index 000000000000..b19774ee379e --- /dev/null +++ b/lib/Target/X86/MCTargetDesc/Makefile @@ -0,0 +1,16 @@ +##===- lib/Target/X86/TargetDesc/Makefile ------------------*- Makefile -*-===## +# +# The LLVM Compiler Infrastructure +# +# This file is distributed under the University of Illinois Open Source +# License. See LICENSE.TXT for details. +# +##===----------------------------------------------------------------------===## + +LEVEL = ../../../.. +LIBRARYNAME = LLVMX86Desc + +# Hack: we need to include 'main' target directory to grab private headers +CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/.. + +include $(LEVEL)/Makefile.common diff --git a/lib/Target/X86/X86MCAsmInfo.cpp b/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp index 2e1ec6317601..27031005bd09 100644 --- a/lib/Target/X86/X86MCAsmInfo.cpp +++ b/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp @@ -12,7 +12,6 @@ //===----------------------------------------------------------------------===// #include "X86MCAsmInfo.h" -#include "X86TargetMachine.h" #include "llvm/ADT/Triple.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCExpr.h" @@ -45,14 +44,17 @@ static const char *const x86_asm_table[] = { "{flags}", "", "{dirflag}", "", "{fpsr}", "", + "{fpcr}", "", "{cc}", "cc", 0,0}; -X86MCAsmInfoDarwin::X86MCAsmInfoDarwin(const Triple &Triple) { +X86MCAsmInfoDarwin::X86MCAsmInfoDarwin(const Triple &T) { + bool is64Bit = T.getArch() == Triple::x86_64; + if (is64Bit) + PointerSize = 8; + AsmTransCBE = x86_asm_table; AssemblerDialect = AsmWriterFlavor; - - bool is64Bit = Triple.getArch() == Triple::x86_64; TextAlignFillValue = 0x90; @@ -74,22 +76,14 @@ X86MCAsmInfoDarwin::X86MCAsmInfoDarwin(const Triple &Triple) { ExceptionsType = ExceptionHandling::DwarfCFI; } -const MCExpr * -X86_64MCAsmInfoDarwin::getExprForPersonalitySymbol(const MCSymbol *Sym, - unsigned Encoding, - MCStreamer &Streamer) const { - MCContext &Context = Streamer.getContext(); - const MCExpr *Res = - MCSymbolRefExpr::Create(Sym, MCSymbolRefExpr::VK_GOTPCREL, Context); - const MCExpr *Four = MCConstantExpr::Create(4, Context); - return MCBinaryExpr::CreateAdd(Res, Four, Context); -} - X86_64MCAsmInfoDarwin::X86_64MCAsmInfoDarwin(const Triple &Triple) : X86MCAsmInfoDarwin(Triple) { } X86ELFMCAsmInfo::X86ELFMCAsmInfo(const Triple &T) { + if (T.getArch() == Triple::x86_64) + PointerSize = 8; + AsmTransCBE = x86_asm_table; AssemblerDialect = AsmWriterFlavor; @@ -114,6 +108,17 @@ X86ELFMCAsmInfo::X86ELFMCAsmInfo(const Triple &T) { Data64bitsDirective = 0; } +const MCExpr * +X86_64MCAsmInfoDarwin::getExprForPersonalitySymbol(const MCSymbol *Sym, + unsigned Encoding, + MCStreamer &Streamer) const { + MCContext &Context = Streamer.getContext(); + const MCExpr *Res = + MCSymbolRefExpr::Create(Sym, MCSymbolRefExpr::VK_GOTPCREL, Context); + const MCExpr *Four = MCConstantExpr::Create(4, Context); + return MCBinaryExpr::CreateAdd(Res, Four, Context); +} + const MCSection *X86ELFMCAsmInfo:: getNonexecutableStackSection(MCContext &Ctx) const { return Ctx.getELFSection(".note.GNU-stack", ELF::SHT_PROGBITS, diff --git a/lib/Target/X86/X86MCAsmInfo.h b/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.h index 2cd4c8eb30ec..2cd4c8eb30ec 100644 --- a/lib/Target/X86/X86MCAsmInfo.h +++ b/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.h diff --git a/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp b/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp new file mode 100644 index 000000000000..b77f37b03f19 --- /dev/null +++ b/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp @@ -0,0 +1,185 @@ +//===-- X86MCTargetDesc.cpp - X86 Target Descriptions -----------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file provides X86 specific target descriptions. +// +//===----------------------------------------------------------------------===// + +#include "X86MCTargetDesc.h" +#include "X86MCAsmInfo.h" +#include "llvm/MC/MCInstrInfo.h" +#include "llvm/MC/MCRegisterInfo.h" +#include "llvm/MC/MCSubtargetInfo.h" +#include "llvm/Target/TargetRegistry.h" +#include "llvm/ADT/Triple.h" +#include "llvm/Support/Host.h" + +#define GET_REGINFO_MC_DESC +#include "X86GenRegisterInfo.inc" + +#define GET_INSTRINFO_MC_DESC +#include "X86GenInstrInfo.inc" + +#define GET_SUBTARGETINFO_MC_DESC +#include "X86GenSubtargetInfo.inc" + +using namespace llvm; + + +std::string X86_MC::ParseX86Triple(StringRef TT) { + Triple TheTriple(TT); + if (TheTriple.getArch() == Triple::x86_64) + return "+64bit-mode"; + return "-64bit-mode"; +} + +/// GetCpuIDAndInfo - Execute the specified cpuid and return the 4 values in the +/// specified arguments. If we can't run cpuid on the host, return true. +bool X86_MC::GetCpuIDAndInfo(unsigned value, unsigned *rEAX, + unsigned *rEBX, unsigned *rECX, unsigned *rEDX) { +#if defined(__x86_64__) || defined(_M_AMD64) || defined (_M_X64) + #if defined(__GNUC__) + // gcc doesn't know cpuid would clobber ebx/rbx. Preseve it manually. + asm ("movq\t%%rbx, %%rsi\n\t" + "cpuid\n\t" + "xchgq\t%%rbx, %%rsi\n\t" + : "=a" (*rEAX), + "=S" (*rEBX), + "=c" (*rECX), + "=d" (*rEDX) + : "a" (value)); + return false; + #elif defined(_MSC_VER) + int registers[4]; + __cpuid(registers, value); + *rEAX = registers[0]; + *rEBX = registers[1]; + *rECX = registers[2]; + *rEDX = registers[3]; + return false; + #endif +#elif defined(i386) || defined(__i386__) || defined(__x86__) || defined(_M_IX86) + #if defined(__GNUC__) + asm ("movl\t%%ebx, %%esi\n\t" + "cpuid\n\t" + "xchgl\t%%ebx, %%esi\n\t" + : "=a" (*rEAX), + "=S" (*rEBX), + "=c" (*rECX), + "=d" (*rEDX) + : "a" (value)); + return false; + #elif defined(_MSC_VER) + __asm { + mov eax,value + cpuid + mov esi,rEAX + mov dword ptr [esi],eax + mov esi,rEBX + mov dword ptr [esi],ebx + mov esi,rECX + mov dword ptr [esi],ecx + mov esi,rEDX + mov dword ptr [esi],edx + } + return false; + #endif +#endif + return true; +} + +void X86_MC::DetectFamilyModel(unsigned EAX, unsigned &Family, + unsigned &Model) { + Family = (EAX >> 8) & 0xf; // Bits 8 - 11 + Model = (EAX >> 4) & 0xf; // Bits 4 - 7 + if (Family == 6 || Family == 0xf) { + if (Family == 0xf) + // Examine extended family ID if family ID is F. + Family += (EAX >> 20) & 0xff; // Bits 20 - 27 + // Examine extended model ID if family ID is 6 or F. + Model += ((EAX >> 16) & 0xf) << 4; // Bits 16 - 19 + } +} + +MCSubtargetInfo *X86_MC::createX86MCSubtargetInfo(StringRef TT, StringRef CPU, + StringRef FS) { + std::string ArchFS = X86_MC::ParseX86Triple(TT); + if (!FS.empty()) { + if (!ArchFS.empty()) + ArchFS = ArchFS + "," + FS.str(); + else + ArchFS = FS; + } + + std::string CPUName = CPU; + if (CPUName.empty()) { +#if defined (__x86_64__) || defined(__i386__) + CPUName = sys::getHostCPUName(); +#else + CPUName = "generic"; +#endif + } + + MCSubtargetInfo *X = new MCSubtargetInfo(); + InitX86MCSubtargetInfo(X, TT, CPUName, ArchFS); + return X; +} + +// Force static initialization. +extern "C" void LLVMInitializeX86MCSubtargetInfo() { + TargetRegistry::RegisterMCSubtargetInfo(TheX86_32Target, + X86_MC::createX86MCSubtargetInfo); + TargetRegistry::RegisterMCSubtargetInfo(TheX86_64Target, + X86_MC::createX86MCSubtargetInfo); +} + +static MCInstrInfo *createX86MCInstrInfo() { + MCInstrInfo *X = new MCInstrInfo(); + InitX86MCInstrInfo(X); + return X; +} + +extern "C" void LLVMInitializeX86MCInstrInfo() { + TargetRegistry::RegisterMCInstrInfo(TheX86_32Target, createX86MCInstrInfo); + TargetRegistry::RegisterMCInstrInfo(TheX86_64Target, createX86MCInstrInfo); +} + +static MCRegisterInfo *createX86MCRegisterInfo() { + MCRegisterInfo *X = new MCRegisterInfo(); + InitX86MCRegisterInfo(X); + return X; +} + +extern "C" void LLVMInitializeX86MCRegInfo() { + TargetRegistry::RegisterMCRegInfo(TheX86_32Target, createX86MCRegisterInfo); + TargetRegistry::RegisterMCRegInfo(TheX86_64Target, createX86MCRegisterInfo); +} + + +static MCAsmInfo *createX86MCAsmInfo(const Target &T, StringRef TT) { + Triple TheTriple(TT); + + if (TheTriple.isOSDarwin() || TheTriple.getEnvironment() == Triple::MachO) { + if (TheTriple.getArch() == Triple::x86_64) + return new X86_64MCAsmInfoDarwin(TheTriple); + else + return new X86MCAsmInfoDarwin(TheTriple); + } + + if (TheTriple.isOSWindows()) + return new X86MCAsmInfoCOFF(TheTriple); + + return new X86ELFMCAsmInfo(TheTriple); +} + +extern "C" void LLVMInitializeX86MCAsmInfo() { + // Register the target asm info. + RegisterMCAsmInfoFn A(TheX86_32Target, createX86MCAsmInfo); + RegisterMCAsmInfoFn B(TheX86_64Target, createX86MCAsmInfo); +} diff --git a/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.h b/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.h new file mode 100644 index 000000000000..89ea22b31be2 --- /dev/null +++ b/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.h @@ -0,0 +1,60 @@ +//===-- X86MCTargetDesc.h - X86 Target Descriptions -------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file provides X86 specific target descriptions. +// +//===----------------------------------------------------------------------===// + +#ifndef X86MCTARGETDESC_H +#define X86MCTARGETDESC_H + +#include <string> + +namespace llvm { +class MCSubtargetInfo; +class Target; +class StringRef; + +extern Target TheX86_32Target, TheX86_64Target; + +namespace X86_MC { + std::string ParseX86Triple(StringRef TT); + + /// GetCpuIDAndInfo - Execute the specified cpuid and return the 4 values in + /// the specified arguments. If we can't run cpuid on the host, return true. + bool GetCpuIDAndInfo(unsigned value, unsigned *rEAX, + unsigned *rEBX, unsigned *rECX, unsigned *rEDX); + + void DetectFamilyModel(unsigned EAX, unsigned &Family, unsigned &Model); + + /// createARMMCSubtargetInfo - Create a X86 MCSubtargetInfo instance. + /// This is exposed so Asm parser, etc. do not need to go through + /// TargetRegistry. + MCSubtargetInfo *createX86MCSubtargetInfo(StringRef TT, StringRef CPU, + StringRef FS); +} + +} // End llvm namespace + + +// Defines symbolic names for X86 registers. This defines a mapping from +// register name to register number. +// +#define GET_REGINFO_ENUM +#include "X86GenRegisterInfo.inc" + +// Defines symbolic names for the X86 instructions. +// +#define GET_INSTRINFO_ENUM +#include "X86GenInstrInfo.inc" + +#define GET_SUBTARGETINFO_ENUM +#include "X86GenSubtargetInfo.inc" + +#endif diff --git a/lib/Target/X86/Makefile b/lib/Target/X86/Makefile index 12fb090d4dce..949661eb99e9 100644 --- a/lib/Target/X86/Makefile +++ b/lib/Target/X86/Makefile @@ -12,14 +12,13 @@ LIBRARYNAME = LLVMX86CodeGen TARGET = X86 # Make sure that tblgen is run, first thing. -BUILT_SOURCES = X86GenRegisterInfo.h.inc X86GenRegisterNames.inc \ - X86GenRegisterInfo.inc X86GenInstrNames.inc \ - X86GenInstrInfo.inc X86GenAsmWriter.inc X86GenAsmMatcher.inc \ +BUILT_SOURCES = X86GenRegisterInfo.inc X86GenInstrInfo.inc \ + X86GenAsmWriter.inc X86GenAsmMatcher.inc \ X86GenAsmWriter1.inc X86GenDAGISel.inc \ X86GenDisassemblerTables.inc X86GenFastISel.inc \ - X86GenCallingConv.inc X86GenSubtarget.inc \ + X86GenCallingConv.inc X86GenSubtargetInfo.inc \ X86GenEDInfo.inc -DIRS = InstPrinter AsmParser Disassembler TargetInfo Utils +DIRS = InstPrinter AsmParser Disassembler TargetInfo MCTargetDesc Utils include $(LEVEL)/Makefile.common diff --git a/lib/Target/X86/X86.h b/lib/Target/X86/X86.h index 0ca436690040..ec52dfb3e7d1 100644 --- a/lib/Target/X86/X86.h +++ b/lib/Target/X86/X86.h @@ -15,6 +15,7 @@ #ifndef TARGET_X86_H #define TARGET_X86_H +#include "MCTargetDesc/X86MCTargetDesc.h" #include "llvm/Support/DataTypes.h" #include "llvm/Target/TargetMachine.h" @@ -22,10 +23,12 @@ namespace llvm { class FunctionPass; class JITCodeEmitter; +class MachineCodeEmitter; class MCCodeEmitter; class MCContext; +class MCInstrInfo; class MCObjectWriter; -class MachineCodeEmitter; +class MCSubtargetInfo; class Target; class TargetAsmBackend; class X86TargetMachine; @@ -57,10 +60,9 @@ FunctionPass *createSSEDomainFixPass(); FunctionPass *createX86JITCodeEmitterPass(X86TargetMachine &TM, JITCodeEmitter &JCE); -MCCodeEmitter *createX86_32MCCodeEmitter(const Target &, TargetMachine &TM, - MCContext &Ctx); -MCCodeEmitter *createX86_64MCCodeEmitter(const Target &, TargetMachine &TM, - MCContext &Ctx); +MCCodeEmitter *createX86MCCodeEmitter(const MCInstrInfo &MCII, + const MCSubtargetInfo &STI, + MCContext &Ctx); TargetAsmBackend *createX86_32AsmBackend(const Target &, const std::string &); TargetAsmBackend *createX86_64AsmBackend(const Target &, const std::string &); @@ -84,17 +86,6 @@ MCObjectWriter *createX86MachObjectWriter(raw_ostream &OS, uint32_t CPUType, uint32_t CPUSubtype); -extern Target TheX86_32Target, TheX86_64Target; - } // End llvm namespace -// Defines symbolic names for X86 registers. This defines a mapping from -// register name to register number. -// -#include "X86GenRegisterNames.inc" - -// Defines symbolic names for the X86 instructions. -// -#include "X86GenInstrNames.inc" - #endif diff --git a/lib/Target/X86/X86.td b/lib/Target/X86/X86.td index 7bb96766cceb..4ccb43fe18cc 100644 --- a/lib/Target/X86/X86.td +++ b/lib/Target/X86/X86.td @@ -17,6 +17,13 @@ include "llvm/Target/Target.td" //===----------------------------------------------------------------------===// +// X86 Subtarget state. +// + +def Mode64Bit : SubtargetFeature<"64bit-mode", "In64BitMode", "true", + "64-bit mode (x86_64)">; + +//===----------------------------------------------------------------------===// // X86 Subtarget features. //===----------------------------------------------------------------------===// diff --git a/lib/Target/X86/X86AsmBackend.cpp b/lib/Target/X86/X86AsmBackend.cpp index 4d7d96dcb36b..9b556a55efd9 100644 --- a/lib/Target/X86/X86AsmBackend.cpp +++ b/lib/Target/X86/X86AsmBackend.cpp @@ -194,6 +194,9 @@ static unsigned getRelaxedOpcodeArith(unsigned Op) { // PUSH case X86::PUSHi8: return X86::PUSHi32; + case X86::PUSHi16: return X86::PUSHi32; + case X86::PUSH64i8: return X86::PUSH64i32; + case X86::PUSH64i16: return X86::PUSH64i32; } } diff --git a/lib/Target/X86/X86AsmPrinter.cpp b/lib/Target/X86/X86AsmPrinter.cpp index c2d53c4dd26c..99b4479a9fc9 100644 --- a/lib/Target/X86/X86AsmPrinter.cpp +++ b/lib/Target/X86/X86AsmPrinter.cpp @@ -709,13 +709,12 @@ void X86AsmPrinter::PrintDebugValueComment(const MachineInstr *MI, //===----------------------------------------------------------------------===// static MCInstPrinter *createX86MCInstPrinter(const Target &T, - TargetMachine &TM, unsigned SyntaxVariant, const MCAsmInfo &MAI) { if (SyntaxVariant == 0) - return new X86ATTInstPrinter(TM, MAI); + return new X86ATTInstPrinter(MAI); if (SyntaxVariant == 1) - return new X86IntelInstPrinter(TM, MAI); + return new X86IntelInstPrinter(MAI); return 0; } diff --git a/lib/Target/X86/X86CallingConv.td b/lib/Target/X86/X86CallingConv.td index 56351756e8dd..77b99056ae00 100644 --- a/lib/Target/X86/X86CallingConv.td +++ b/lib/Target/X86/X86CallingConv.td @@ -44,11 +44,11 @@ def RetCC_X86Common : CallingConv<[ // can only be used by ABI non-compliant code. This vector type is only // supported while using the AVX target feature. CCIfType<[v32i8, v16i16, v8i32, v4i64, v8f32, v4f64], - CCIfSubtarget<"hasAVX()", CCAssignToReg<[YMM0,YMM1,YMM2,YMM3]>>>, + CCAssignToReg<[YMM0,YMM1,YMM2,YMM3]>>, // MMX vector types are always returned in MM0. If the target doesn't have // MM0, it doesn't support these vector types. - CCIfType<[x86mmx, v1i64], CCAssignToReg<[MM0]>>, + CCIfType<[x86mmx], CCAssignToReg<[MM0]>>, // Long double types are always returned in ST0 (even with SSE). CCIfType<[f80], CCAssignToReg<[ST0, ST1]>> @@ -91,10 +91,7 @@ def RetCC_X86_64_C : CallingConv<[ CCIfType<[f32], CCAssignToReg<[XMM0, XMM1]>>, CCIfType<[f64], CCAssignToReg<[XMM0, XMM1]>>, - // MMX vector types are always returned in XMM0 except for v1i64 which is - // returned in RAX. This disagrees with ABI documentation but is bug - // compatible with gcc. - CCIfType<[v1i64], CCAssignToReg<[RAX]>>, + // MMX vector types are always returned in XMM0. CCIfType<[x86mmx], CCAssignToReg<[XMM0, XMM1]>>, CCDelegateTo<RetCC_X86Common> ]>; @@ -102,11 +99,7 @@ def RetCC_X86_64_C : CallingConv<[ // X86-Win64 C return-value convention. def RetCC_X86_Win64_C : CallingConv<[ // The X86-Win64 calling convention always returns __m64 values in RAX. - CCIfType<[x86mmx, v1i64], CCBitConvertToType<i64>>, - - // And FP in XMM0 only. - CCIfType<[f32], CCAssignToReg<[XMM0]>>, - CCIfType<[f64], CCAssignToReg<[XMM0]>>, + CCIfType<[x86mmx], CCBitConvertToType<i64>>, // Otherwise, everything is the same as 'normal' X86-64 C CC. CCDelegateTo<RetCC_X86_64_C> @@ -150,17 +143,11 @@ def CC_X86_64_C : CallingConv<[ // The 'nest' parameter, if any, is passed in R10. CCIfNest<CCAssignToReg<[R10]>>, - // The first 6 v1i64 vector arguments are passed in GPRs on Darwin. - CCIfType<[v1i64], - CCIfSubtarget<"isTargetDarwin()", - CCBitConvertToType<i64>>>, - // The first 6 integer arguments are passed in integer registers. CCIfType<[i32], CCAssignToReg<[EDI, ESI, EDX, ECX, R8D, R9D]>>, CCIfType<[i64], CCAssignToReg<[RDI, RSI, RDX, RCX, R8 , R9 ]>>, - // The first 8 MMX (except for v1i64) vector arguments are passed in XMM - // registers on Darwin. + // The first 8 MMX vector arguments are passed in XMM registers on Darwin. CCIfType<[x86mmx], CCIfSubtarget<"isTargetDarwin()", CCIfSubtarget<"hasXMMInt()", @@ -189,10 +176,7 @@ def CC_X86_64_C : CallingConv<[ // 256-bit vectors get 32-byte stack slots that are 32-byte aligned. CCIfType<[v32i8, v16i16, v8i32, v4i64, v8f32, v4f64], - CCAssignToStack<32, 32>>, - - // __m64 vectors get 8-byte stack slots that are 8-byte aligned. - CCIfType<[x86mmx,v1i64], CCAssignToStack<8, 8>> + CCAssignToStack<32, 32>> ]>; // Calling convention used on Win64 @@ -210,7 +194,7 @@ def CC_X86_Win64_C : CallingConv<[ CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], CCPassIndirect<i64>>, // The first 4 MMX vector arguments are passed in GPRs. - CCIfType<[x86mmx, v1i64], CCBitConvertToType<i64>>, + CCIfType<[x86mmx], CCBitConvertToType<i64>>, // The first 4 integer arguments are passed in integer registers. CCIfType<[i32], CCAssignToRegWithShadow<[ECX , EDX , R8D , R9D ], @@ -236,10 +220,7 @@ def CC_X86_Win64_C : CallingConv<[ // Long doubles get stack slots whose size and alignment depends on the // subtarget. - CCIfType<[f80], CCAssignToStack<0, 0>>, - - // __m64 vectors get 8-byte stack slots that are 8-byte aligned. - CCIfType<[x86mmx,v1i64], CCAssignToStack<8, 8>> + CCIfType<[f80], CCAssignToStack<0, 0>> ]>; def CC_X86_64_GHC : CallingConv<[ @@ -273,8 +254,8 @@ def CC_X86_32_Common : CallingConv<[ CCIfSubtarget<"hasXMMInt()", CCAssignToReg<[XMM0,XMM1,XMM2]>>>>>, - // The first 3 __m64 (except for v1i64) vector arguments are passed in mmx - // registers if the call is not a vararg call. + // The first 3 __m64 vector arguments are passed in mmx registers if the + // call is not a vararg call. CCIfNotVarArg<CCIfType<[x86mmx], CCAssignToReg<[MM0, MM1, MM2]>>>, @@ -306,7 +287,7 @@ def CC_X86_32_Common : CallingConv<[ // __m64 vectors get 8-byte stack slots that are 4-byte aligned. They are // passed in the parameter area. - CCIfType<[x86mmx,v1i64], CCAssignToStack<8, 4>>]>; + CCIfType<[x86mmx], CCAssignToStack<8, 4>>]>; def CC_X86_32_C : CallingConv<[ // Promote i8/i16 arguments to i32. diff --git a/lib/Target/X86/X86CodeEmitter.cpp b/lib/Target/X86/X86CodeEmitter.cpp index 421e221d205c..4b11db7c0331 100644 --- a/lib/Target/X86/X86CodeEmitter.cpp +++ b/lib/Target/X86/X86CodeEmitter.cpp @@ -68,7 +68,7 @@ namespace { return "X86 Machine Code Emitter"; } - void emitInstruction(MachineInstr &MI, const TargetInstrDesc *Desc); + void emitInstruction(MachineInstr &MI, const MCInstrDesc *Desc); void getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesAll(); @@ -132,7 +132,7 @@ bool Emitter<CodeEmitter>::runOnMachineFunction(MachineFunction &MF) { MCE.StartMachineBasicBlock(MBB); for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); I != E; ++I) { - const TargetInstrDesc &Desc = I->getDesc(); + const MCInstrDesc &Desc = I->getDesc(); emitInstruction(*I, &Desc); // MOVPC32r is basically a call plus a pop instruction. if (Desc.getOpcode() == X86::MOVPC32r) @@ -150,7 +150,7 @@ bool Emitter<CodeEmitter>::runOnMachineFunction(MachineFunction &MF) { /// size, and 3) use of X86-64 extended registers. static unsigned determineREX(const MachineInstr &MI) { unsigned REX = 0; - const TargetInstrDesc &Desc = MI.getDesc(); + const MCInstrDesc &Desc = MI.getDesc(); // Pseudo instructions do not need REX prefix byte. if ((Desc.TSFlags & X86II::FormMask) == X86II::Pseudo) @@ -161,7 +161,7 @@ static unsigned determineREX(const MachineInstr &MI) { unsigned NumOps = Desc.getNumOperands(); if (NumOps) { bool isTwoAddr = NumOps > 1 && - Desc.getOperandConstraint(1, TOI::TIED_TO) != -1; + Desc.getOperandConstraint(1, MCOI::TIED_TO) != -1; // If it accesses SPL, BPL, SIL, or DIL, then it requires a 0x40 REX prefix. unsigned i = isTwoAddr ? 1 : 0; @@ -598,7 +598,7 @@ void Emitter<CodeEmitter>::emitMemModRMByte(const MachineInstr &MI, template<class CodeEmitter> void Emitter<CodeEmitter>::emitInstruction(MachineInstr &MI, - const TargetInstrDesc *Desc) { + const MCInstrDesc *Desc) { DEBUG(dbgs() << MI); // If this is a pseudo instruction, lower it. @@ -708,9 +708,9 @@ void Emitter<CodeEmitter>::emitInstruction(MachineInstr &MI, // If this is a two-address instruction, skip one of the register operands. unsigned NumOps = Desc->getNumOperands(); unsigned CurOp = 0; - if (NumOps > 1 && Desc->getOperandConstraint(1, TOI::TIED_TO) != -1) + if (NumOps > 1 && Desc->getOperandConstraint(1, MCOI::TIED_TO) != -1) ++CurOp; - else if (NumOps > 2 && Desc->getOperandConstraint(NumOps-1, TOI::TIED_TO)== 0) + else if (NumOps > 2 && Desc->getOperandConstraint(NumOps-1,MCOI::TIED_TO)== 0) // Skip the last source operand that is tied_to the dest reg. e.g. LXADD32 --NumOps; diff --git a/lib/Target/X86/X86FastISel.cpp b/lib/Target/X86/X86FastISel.cpp index f1b9972530c6..21e163a30054 100644 --- a/lib/Target/X86/X86FastISel.cpp +++ b/lib/Target/X86/X86FastISel.cpp @@ -15,6 +15,7 @@ #include "X86.h" #include "X86InstrBuilder.h" +#include "X86ISelLowering.h" #include "X86RegisterInfo.h" #include "X86Subtarget.h" #include "X86TargetMachine.h" @@ -1392,7 +1393,7 @@ bool X86FastISel::X86VisitIntrinsicCall(const IntrinsicInst &I) { assert(DI->getAddress() && "Null address should be checked earlier!"); if (!X86SelectAddress(DI->getAddress(), AM)) return false; - const TargetInstrDesc &II = TII.get(TargetOpcode::DBG_VALUE); + const MCInstrDesc &II = TII.get(TargetOpcode::DBG_VALUE); // FIXME may need to add RegState::Debug to any registers produced, // although ESP/EBP should be the only ones at the moment. addFullAddress(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II), AM). @@ -1493,7 +1494,8 @@ bool X86FastISel::DoSelectCall(const Instruction *I, const char *MemIntName) { return false; // Fast-isel doesn't know about callee-pop yet. - if (Subtarget->IsCalleePop(isVarArg, CC)) + if (X86::isCalleePop(CC, Subtarget->is64Bit(), isVarArg, + GuaranteedTailCallOpt)) return false; // Check whether the function can return without sret-demotion. @@ -1628,7 +1630,7 @@ bool X86FastISel::DoSelectCall(const Instruction *I, const char *MemIntName) { unsigned NumBytes = CCInfo.getNextStackOffset(); // Issue CALLSEQ_START - unsigned AdjStackDown = TM.getRegisterInfo()->getCallFrameSetupOpcode(); + unsigned AdjStackDown = TII.getCallFrameSetupOpcode(); BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(AdjStackDown)) .addImm(NumBytes); @@ -1801,7 +1803,7 @@ bool X86FastISel::DoSelectCall(const Instruction *I, const char *MemIntName) { MIB.addReg(RegArgs[i]); // Issue CALLSEQ_END - unsigned AdjStackUp = TM.getRegisterInfo()->getCallFrameDestroyOpcode(); + unsigned AdjStackUp = TII.getCallFrameDestroyOpcode(); unsigned NumBytesCallee = 0; if (!Subtarget->is64Bit() && CS.paramHasAttr(1, Attribute::StructRet)) NumBytesCallee = 4; @@ -1846,16 +1848,19 @@ bool X86FastISel::DoSelectCall(const Instruction *I, const char *MemIntName) { // stack, but where we prefer to use the value in xmm registers, copy it // out as F80 and use a truncate to move it from fp stack reg to xmm reg. if ((RVLocs[i].getLocReg() == X86::ST0 || - RVLocs[i].getLocReg() == X86::ST1) && - isScalarFPTypeInSSEReg(RVLocs[0].getValVT())) { - CopyVT = MVT::f80; - CopyReg = createResultReg(X86::RFP80RegisterClass); + RVLocs[i].getLocReg() == X86::ST1)) { + if (isScalarFPTypeInSSEReg(RVLocs[i].getValVT())) { + CopyVT = MVT::f80; + CopyReg = createResultReg(X86::RFP80RegisterClass); + } + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(X86::FpPOP_RETVAL), + CopyReg); + } else { + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY), + CopyReg).addReg(RVLocs[i].getLocReg()); + UsedRegs.push_back(RVLocs[i].getLocReg()); } - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY), - CopyReg).addReg(RVLocs[i].getLocReg()); - UsedRegs.push_back(RVLocs[i].getLocReg()); - if (CopyVT != RVLocs[i].getValVT()) { // Round the F80 the right size, which also moves to the appropriate xmm // register. This is accomplished by storing the F80 value in memory and diff --git a/lib/Target/X86/X86FloatingPoint.cpp b/lib/Target/X86/X86FloatingPoint.cpp index 325d0611817d..6eed6abd43e2 100644 --- a/lib/Target/X86/X86FloatingPoint.cpp +++ b/lib/Target/X86/X86FloatingPoint.cpp @@ -37,6 +37,7 @@ #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/Passes.h" +#include "llvm/InlineAsm.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" @@ -126,10 +127,45 @@ namespace { void bundleCFG(MachineFunction &MF); MachineBasicBlock *MBB; // Current basic block + + // The hardware keeps track of how many FP registers are live, so we have + // to model that exactly. Usually, each live register corresponds to an + // FP<n> register, but when dealing with calls, returns, and inline + // assembly, it is sometimes neccesary to have live scratch registers. unsigned Stack[8]; // FP<n> Registers in each stack slot... - unsigned RegMap[8]; // Track which stack slot contains each register unsigned StackTop; // The current top of the FP stack. + enum { + NumFPRegs = 16 // Including scratch pseudo-registers. + }; + + // For each live FP<n> register, point to its Stack[] entry. + // The first entries correspond to FP0-FP6, the rest are scratch registers + // used when we need slightly different live registers than what the + // register allocator thinks. + unsigned RegMap[NumFPRegs]; + + // Pending fixed registers - Inline assembly needs FP registers to appear + // in fixed stack slot positions. This is handled by copying FP registers + // to ST registers before the instruction, and copying back after the + // instruction. + // + // This is modeled with pending ST registers. NumPendingSTs is the number + // of ST registers (ST0-STn) we are tracking. PendingST[n] points to an FP + // register that holds the ST value. The ST registers are not moved into + // place until immediately before the instruction that needs them. + // + // It can happen that we need an ST register to be live when no FP register + // holds the value: + // + // %ST0 = COPY %FP4<kill> + // + // When that happens, we allocate a scratch FP register to hold the ST + // value. That means every register in PendingST must be live. + + unsigned NumPendingSTs; + unsigned char PendingST[8]; + // Set up our stack model to match the incoming registers to MBB. void setupBlockStack(); @@ -142,13 +178,15 @@ namespace { dbgs() << " FP" << Stack[i]; assert(RegMap[Stack[i]] == i && "Stack[] doesn't match RegMap[]!"); } + for (unsigned i = 0; i != NumPendingSTs; ++i) + dbgs() << ", ST" << i << " in FP" << unsigned(PendingST[i]); dbgs() << "\n"; } /// getSlot - Return the stack slot number a particular register number is /// in. unsigned getSlot(unsigned RegNo) const { - assert(RegNo < 8 && "Regno out of range!"); + assert(RegNo < NumFPRegs && "Regno out of range!"); return RegMap[RegNo]; } @@ -160,12 +198,17 @@ namespace { /// getScratchReg - Return an FP register that is not currently in use. unsigned getScratchReg() { - for (int i = 7; i >= 0; --i) + for (int i = NumFPRegs - 1; i >= 8; --i) if (!isLive(i)) return i; llvm_unreachable("Ran out of scratch FP registers"); } + /// isScratchReg - Returns trus if RegNo is a scratch FP register. + bool isScratchReg(unsigned RegNo) { + return RegNo > 8 && RegNo < NumFPRegs; + } + /// getStackEntry - Return the X86::FP<n> register in register ST(i). unsigned getStackEntry(unsigned STi) const { if (STi >= StackTop) @@ -181,7 +224,7 @@ namespace { // pushReg - Push the specified FP<n> register onto the stack. void pushReg(unsigned Reg) { - assert(Reg < 8 && "Register number out of range!"); + assert(Reg < NumFPRegs && "Register number out of range!"); if (StackTop >= 8) report_fatal_error("Stack overflow!"); Stack[StackTop] = Reg; @@ -236,7 +279,7 @@ namespace { /// Adjust the live registers to be the set in Mask. void adjustLiveRegs(unsigned Mask, MachineBasicBlock::iterator I); - /// Shuffle the top FixCount stack entries susch that FP reg FixStack[0] is + /// Shuffle the top FixCount stack entries such that FP reg FixStack[0] is /// st(0), FP reg FixStack[1] is st(1) etc. void shuffleStackTop(const unsigned char *FixStack, unsigned FixCount, MachineBasicBlock::iterator I); @@ -251,7 +294,14 @@ namespace { void handleCondMovFP(MachineBasicBlock::iterator &I); void handleSpecialFP(MachineBasicBlock::iterator &I); - bool translateCopy(MachineInstr*); + // Check if a COPY instruction is using FP registers. + bool isFPCopy(MachineInstr *MI) { + unsigned DstReg = MI->getOperand(0).getReg(); + unsigned SrcReg = MI->getOperand(1).getReg(); + + return X86::RFP80RegClass.contains(DstReg) || + X86::RFP80RegClass.contains(SrcReg); + } }; char FPS::ID = 0; } @@ -341,6 +391,7 @@ void FPS::bundleCFG(MachineFunction &MF) { bool FPS::processBasicBlock(MachineFunction &MF, MachineBasicBlock &BB) { bool Changed = false; MBB = &BB; + NumPendingSTs = 0; setupBlockStack(); @@ -352,7 +403,7 @@ bool FPS::processBasicBlock(MachineFunction &MF, MachineBasicBlock &BB) { if (MI->isInlineAsm()) FPInstClass = X86II::SpecialFP; - if (MI->isCopy() && translateCopy(MI)) + if (MI->isCopy() && isFPCopy(MI)) FPInstClass = X86II::SpecialFP; if (FPInstClass == X86II::NotFP) @@ -833,7 +884,7 @@ void FPS::adjustLiveRegs(unsigned Mask, MachineBasicBlock::iterator I) { // Kill registers by popping. if (Kills && I != MBB->begin()) { MachineBasicBlock::iterator I2 = llvm::prior(I); - for (;;) { + while (StackTop) { unsigned KReg = getStackEntry(0); if (!(Kills & (1 << KReg))) break; @@ -881,7 +932,8 @@ void FPS::shuffleStackTop(const unsigned char *FixStack, continue; // (Reg st0) (OldReg st0) = (Reg OldReg st0) moveToTop(Reg, I); - moveToTop(OldReg, I); + if (FixCount > 0) + moveToTop(OldReg, I); } DEBUG(dumpStack()); } @@ -1239,141 +1291,307 @@ void FPS::handleSpecialFP(MachineBasicBlock::iterator &I) { MachineInstr *MI = I; switch (MI->getOpcode()) { default: llvm_unreachable("Unknown SpecialFP instruction!"); - case X86::FpGET_ST0_32:// Appears immediately after a call returning FP type! - case X86::FpGET_ST0_64:// Appears immediately after a call returning FP type! - case X86::FpGET_ST0_80:// Appears immediately after a call returning FP type! - assert(StackTop == 0 && "Stack should be empty after a call!"); - pushReg(getFPReg(MI->getOperand(0))); - break; - case X86::FpGET_ST1_32:// Appears immediately after a call returning FP type! - case X86::FpGET_ST1_64:// Appears immediately after a call returning FP type! - case X86::FpGET_ST1_80:{// Appears immediately after a call returning FP type! - // FpGET_ST1 should occur right after a FpGET_ST0 for a call or inline asm. - // The pattern we expect is: - // CALL - // FP1 = FpGET_ST0 - // FP4 = FpGET_ST1 - // - // At this point, we've pushed FP1 on the top of stack, so it should be - // present if it isn't dead. If it was dead, we already emitted a pop to - // remove it from the stack and StackTop = 0. - - // Push FP4 as top of stack next. - pushReg(getFPReg(MI->getOperand(0))); + case TargetOpcode::COPY: { + // We handle three kinds of copies: FP <- FP, FP <- ST, and ST <- FP. + const MachineOperand &MO1 = MI->getOperand(1); + const MachineOperand &MO0 = MI->getOperand(0); + unsigned DstST = MO0.getReg() - X86::ST0; + unsigned SrcST = MO1.getReg() - X86::ST0; + bool KillsSrc = MI->killsRegister(MO1.getReg()); + + // ST = COPY FP. Set up a pending ST register. + if (DstST < 8) { + unsigned SrcFP = getFPReg(MO1); + assert(isLive(SrcFP) && "Cannot copy dead register"); + assert(!MO0.isDead() && "Cannot copy to dead ST register"); + + // Unallocated STs are marked as the nonexistent FP255. + while (NumPendingSTs <= DstST) + PendingST[NumPendingSTs++] = NumFPRegs; + + // STi could still be live from a previous inline asm. + if (isScratchReg(PendingST[DstST])) { + DEBUG(dbgs() << "Clobbering old ST in FP" << unsigned(PendingST[DstST]) + << '\n'); + freeStackSlotBefore(MI, PendingST[DstST]); + } - // If StackTop was 0 before we pushed our operand, then ST(0) must have been - // dead. In this case, the ST(1) value is the only thing that is live, so - // it should be on the TOS (after the pop that was emitted) and is. Just - // continue in this case. - if (StackTop == 1) + // When the source is killed, allocate a scratch FP register. + if (KillsSrc) { + unsigned Slot = getSlot(SrcFP); + unsigned SR = getScratchReg(); + PendingST[DstST] = SR; + Stack[Slot] = SR; + RegMap[SR] = Slot; + } else + PendingST[DstST] = SrcFP; break; - - // Because pushReg just pushed ST(1) as TOS, we now have to swap the two top - // elements so that our accounting is correct. - unsigned RegOnTop = getStackEntry(0); - unsigned RegNo = getStackEntry(1); - - // Swap the slots the regs are in. - std::swap(RegMap[RegNo], RegMap[RegOnTop]); - - // Swap stack slot contents. - if (RegMap[RegOnTop] >= StackTop) - report_fatal_error("Access past stack top!"); - std::swap(Stack[RegMap[RegOnTop]], Stack[StackTop-1]); - break; - } - case X86::FpSET_ST0_32: - case X86::FpSET_ST0_64: - case X86::FpSET_ST0_80: { - // FpSET_ST0_80 is generated by copyRegToReg for setting up inline asm - // arguments that use an st constraint. We expect a sequence of - // instructions: Fp_SET_ST0 Fp_SET_ST1? INLINEASM - unsigned Op0 = getFPReg(MI->getOperand(0)); - - if (!MI->killsRegister(X86::FP0 + Op0)) { - // Duplicate Op0 into a temporary on the stack top. - duplicateToTop(Op0, getScratchReg(), I); - } else { - // Op0 is killed, so just swap it into position. - moveToTop(Op0, I); } - --StackTop; // "Forget" we have something on the top of stack! - break; - } - case X86::FpSET_ST1_32: - case X86::FpSET_ST1_64: - case X86::FpSET_ST1_80: { - // Set up st(1) for inline asm. We are assuming that st(0) has already been - // set up by FpSET_ST0, and our StackTop is off by one because of it. - unsigned Op0 = getFPReg(MI->getOperand(0)); - // Restore the actual StackTop from before Fp_SET_ST0. - // Note we can't handle Fp_SET_ST1 without a preceding Fp_SET_ST0, and we - // are not enforcing the constraint. - ++StackTop; - unsigned RegOnTop = getStackEntry(0); // This reg must remain in st(0). - if (!MI->killsRegister(X86::FP0 + Op0)) { - duplicateToTop(Op0, getScratchReg(), I); - moveToTop(RegOnTop, I); - } else if (getSTReg(Op0) != X86::ST1) { - // We have the wrong value at st(1). Shuffle! Untested! - moveToTop(getStackEntry(1), I); - moveToTop(Op0, I); - moveToTop(RegOnTop, I); + + // FP = COPY ST. Extract fixed stack value. + // Any instruction defining ST registers must have assigned them to a + // scratch register. + if (SrcST < 8) { + unsigned DstFP = getFPReg(MO0); + assert(!isLive(DstFP) && "Cannot copy ST to live FP register"); + assert(NumPendingSTs > SrcST && "Cannot copy from dead ST register"); + unsigned SrcFP = PendingST[SrcST]; + assert(isScratchReg(SrcFP) && "Expected ST in a scratch register"); + assert(isLive(SrcFP) && "Scratch holding ST is dead"); + + // DstFP steals the stack slot from SrcFP. + unsigned Slot = getSlot(SrcFP); + Stack[Slot] = DstFP; + RegMap[DstFP] = Slot; + + // Always treat the ST as killed. + PendingST[SrcST] = NumFPRegs; + while (NumPendingSTs && PendingST[NumPendingSTs - 1] == NumFPRegs) + --NumPendingSTs; + break; } - assert(StackTop >= 2 && "Too few live registers"); - StackTop -= 2; // "Forget" both st(0) and st(1). - break; - } - case X86::MOV_Fp3232: - case X86::MOV_Fp3264: - case X86::MOV_Fp6432: - case X86::MOV_Fp6464: - case X86::MOV_Fp3280: - case X86::MOV_Fp6480: - case X86::MOV_Fp8032: - case X86::MOV_Fp8064: - case X86::MOV_Fp8080: { - const MachineOperand &MO1 = MI->getOperand(1); - unsigned SrcReg = getFPReg(MO1); - const MachineOperand &MO0 = MI->getOperand(0); - unsigned DestReg = getFPReg(MO0); - if (MI->killsRegister(X86::FP0+SrcReg)) { + // FP <- FP copy. + unsigned DstFP = getFPReg(MO0); + unsigned SrcFP = getFPReg(MO1); + assert(isLive(SrcFP) && "Cannot copy dead register"); + if (KillsSrc) { // If the input operand is killed, we can just change the owner of the // incoming stack slot into the result. - unsigned Slot = getSlot(SrcReg); - assert(Slot < 7 && DestReg < 7 && "FpMOV operands invalid!"); - Stack[Slot] = DestReg; - RegMap[DestReg] = Slot; - + unsigned Slot = getSlot(SrcFP); + Stack[Slot] = DstFP; + RegMap[DstFP] = Slot; } else { - // For FMOV we just duplicate the specified value to a new stack slot. + // For COPY we just duplicate the specified value to a new stack slot. // This could be made better, but would require substantial changes. - duplicateToTop(SrcReg, DestReg, I); + duplicateToTop(SrcFP, DstFP, I); } + break; + } + + case X86::FpPOP_RETVAL: { + // The FpPOP_RETVAL instruction is used after calls that return a value on + // the floating point stack. We cannot model this with ST defs since CALL + // instructions have fixed clobber lists. This instruction is interpreted + // to mean that there is one more live register on the stack than we + // thought. + // + // This means that StackTop does not match the hardware stack between a + // call and the FpPOP_RETVAL instructions. We do tolerate FP instructions + // between CALL and FpPOP_RETVAL as long as they don't overflow the + // hardware stack. + unsigned DstFP = getFPReg(MI->getOperand(0)); + + // Move existing stack elements up to reflect reality. + assert(StackTop < 8 && "Stack overflowed before FpPOP_RETVAL"); + if (StackTop) { + std::copy_backward(Stack, Stack + StackTop, Stack + StackTop + 1); + for (unsigned i = 0; i != NumFPRegs; ++i) + ++RegMap[i]; } + ++StackTop; + + // DstFP is the new bottom of the stack. + Stack[0] = DstFP; + RegMap[DstFP] = 0; + + // DstFP will be killed by processBasicBlock if this was a dead def. break; + } + case TargetOpcode::INLINEASM: { // The inline asm MachineInstr currently only *uses* FP registers for the // 'f' constraint. These should be turned into the current ST(x) register - // in the machine instr. Also, any kills should be explicitly popped after - // the inline asm. - unsigned Kills = 0; + // in the machine instr. + // + // There are special rules for x87 inline assembly. The compiler must know + // exactly how many registers are popped and pushed implicitly by the asm. + // Otherwise it is not possible to restore the stack state after the inline + // asm. + // + // There are 3 kinds of input operands: + // + // 1. Popped inputs. These must appear at the stack top in ST0-STn. A + // popped input operand must be in a fixed stack slot, and it is either + // tied to an output operand, or in the clobber list. The MI has ST use + // and def operands for these inputs. + // + // 2. Fixed inputs. These inputs appear in fixed stack slots, but are + // preserved by the inline asm. The fixed stack slots must be STn-STm + // following the popped inputs. A fixed input operand cannot be tied to + // an output or appear in the clobber list. The MI has ST use operands + // and no defs for these inputs. + // + // 3. Preserved inputs. These inputs use the "f" constraint which is + // represented as an FP register. The inline asm won't change these + // stack slots. + // + // Outputs must be in ST registers, FP outputs are not allowed. Clobbered + // registers do not count as output operands. The inline asm changes the + // stack as if it popped all the popped inputs and then pushed all the + // output operands. + + // Scan the assembly for ST registers used, defined and clobbered. We can + // only tell clobbers from defs by looking at the asm descriptor. + unsigned STUses = 0, STDefs = 0, STClobbers = 0, STDeadDefs = 0; + unsigned NumOps = 0; + for (unsigned i = InlineAsm::MIOp_FirstOperand, e = MI->getNumOperands(); + i != e && MI->getOperand(i).isImm(); i += 1 + NumOps) { + unsigned Flags = MI->getOperand(i).getImm(); + NumOps = InlineAsm::getNumOperandRegisters(Flags); + if (NumOps != 1) + continue; + const MachineOperand &MO = MI->getOperand(i + 1); + if (!MO.isReg()) + continue; + unsigned STReg = MO.getReg() - X86::ST0; + if (STReg >= 8) + continue; + + switch (InlineAsm::getKind(Flags)) { + case InlineAsm::Kind_RegUse: + STUses |= (1u << STReg); + break; + case InlineAsm::Kind_RegDef: + case InlineAsm::Kind_RegDefEarlyClobber: + STDefs |= (1u << STReg); + if (MO.isDead()) + STDeadDefs |= (1u << STReg); + break; + case InlineAsm::Kind_Clobber: + STClobbers |= (1u << STReg); + break; + default: + break; + } + } + + if (STUses && !isMask_32(STUses)) + MI->emitError("fixed input regs must be last on the x87 stack"); + unsigned NumSTUses = CountTrailingOnes_32(STUses); + + // Defs must be contiguous from the stack top. ST0-STn. + if (STDefs && !isMask_32(STDefs)) { + MI->emitError("output regs must be last on the x87 stack"); + STDefs = NextPowerOf2(STDefs) - 1; + } + unsigned NumSTDefs = CountTrailingOnes_32(STDefs); + + // So must the clobbered stack slots. ST0-STm, m >= n. + if (STClobbers && !isMask_32(STDefs | STClobbers)) + MI->emitError("clobbers must be last on the x87 stack"); + + // Popped inputs are the ones that are also clobbered or defined. + unsigned STPopped = STUses & (STDefs | STClobbers); + if (STPopped && !isMask_32(STPopped)) + MI->emitError("implicitly popped regs must be last on the x87 stack"); + unsigned NumSTPopped = CountTrailingOnes_32(STPopped); + + DEBUG(dbgs() << "Asm uses " << NumSTUses << " fixed regs, pops " + << NumSTPopped << ", and defines " << NumSTDefs << " regs.\n"); + + // Scan the instruction for FP uses corresponding to "f" constraints. + // Collect FP registers to kill afer the instruction. + // Always kill all the scratch regs. + unsigned FPKills = ((1u << NumFPRegs) - 1) & ~0xff; + unsigned FPUsed = 0; for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { MachineOperand &Op = MI->getOperand(i); if (!Op.isReg() || Op.getReg() < X86::FP0 || Op.getReg() > X86::FP6) continue; - assert(Op.isUse() && "Only handle inline asm uses right now"); - + if (!Op.isUse()) + MI->emitError("illegal \"f\" output constraint"); unsigned FPReg = getFPReg(Op); - Op.setReg(getSTReg(FPReg)); - + FPUsed |= 1U << FPReg; + // If we kill this operand, make sure to pop it from the stack after the // asm. We just remember it for now, and pop them all off at the end in // a batch. if (Op.isKill()) - Kills |= 1U << FPReg; + FPKills |= 1U << FPReg; + } + + // The popped inputs will be killed by the instruction, so duplicate them + // if the FP register needs to be live after the instruction, or if it is + // used in the instruction itself. We effectively treat the popped inputs + // as early clobbers. + for (unsigned i = 0; i < NumSTPopped; ++i) { + if ((FPKills & ~FPUsed) & (1u << PendingST[i])) + continue; + unsigned SR = getScratchReg(); + duplicateToTop(PendingST[i], SR, I); + DEBUG(dbgs() << "Duplicating ST" << i << " in FP" + << unsigned(PendingST[i]) << " to avoid clobbering it.\n"); + PendingST[i] = SR; + } + + // Make sure we have a unique live register for every fixed use. Some of + // them could be undef uses, and we need to emit LD_F0 instructions. + for (unsigned i = 0; i < NumSTUses; ++i) { + if (i < NumPendingSTs && PendingST[i] < NumFPRegs) { + // Check for shared assignments. + for (unsigned j = 0; j < i; ++j) { + if (PendingST[j] != PendingST[i]) + continue; + // STi and STj are inn the same register, create a copy. + unsigned SR = getScratchReg(); + duplicateToTop(PendingST[i], SR, I); + DEBUG(dbgs() << "Duplicating ST" << i << " in FP" + << unsigned(PendingST[i]) + << " to avoid collision with ST" << j << '\n'); + PendingST[i] = SR; + } + continue; + } + unsigned SR = getScratchReg(); + DEBUG(dbgs() << "Emitting LD_F0 for ST" << i << " in FP" << SR << '\n'); + BuildMI(*MBB, I, MI->getDebugLoc(), TII->get(X86::LD_F0)); + pushReg(SR); + PendingST[i] = SR; + if (NumPendingSTs == i) + ++NumPendingSTs; + } + assert(NumPendingSTs >= NumSTUses && "Fixed registers should be assigned"); + + // Now we can rearrange the live registers to match what was requested. + shuffleStackTop(PendingST, NumPendingSTs, I); + DEBUG({dbgs() << "Before asm: "; dumpStack();}); + + // With the stack layout fixed, rewrite the FP registers. + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + MachineOperand &Op = MI->getOperand(i); + if (!Op.isReg() || Op.getReg() < X86::FP0 || Op.getReg() > X86::FP6) + continue; + unsigned FPReg = getFPReg(Op); + Op.setReg(getSTReg(FPReg)); + } + + // Simulate the inline asm popping its inputs and pushing its outputs. + StackTop -= NumSTPopped; + + // Hold the fixed output registers in scratch FP registers. They will be + // transferred to real FP registers by copies. + NumPendingSTs = 0; + for (unsigned i = 0; i < NumSTDefs; ++i) { + unsigned SR = getScratchReg(); + pushReg(SR); + FPKills &= ~(1u << SR); + } + for (unsigned i = 0; i < NumSTDefs; ++i) + PendingST[NumPendingSTs++] = getStackEntry(i); + DEBUG({dbgs() << "After asm: "; dumpStack();}); + + // If any of the ST defs were dead, pop them immediately. Our caller only + // handles dead FP defs. + MachineBasicBlock::iterator InsertPt = MI; + for (unsigned i = 0; STDefs & (1u << i); ++i) { + if (!(STDeadDefs & (1u << i))) + continue; + freeStackSlotAfter(InsertPt, PendingST[i]); + PendingST[i] = NumFPRegs; } + while (NumPendingSTs && PendingST[NumPendingSTs - 1] == NumFPRegs) + --NumPendingSTs; // If this asm kills any FP registers (is the last use of them) we must // explicitly emit pop instructions for them. Do this now after the asm has @@ -1382,16 +1600,16 @@ void FPS::handleSpecialFP(MachineBasicBlock::iterator &I) { // // Note: this might be a non-optimal pop sequence. We might be able to do // better by trying to pop in stack order or something. - MachineBasicBlock::iterator InsertPt = MI; - while (Kills) { - unsigned FPReg = CountTrailingZeros_32(Kills); - freeStackSlotAfter(InsertPt, FPReg); - Kills &= ~(1U << FPReg); + while (FPKills) { + unsigned FPReg = CountTrailingZeros_32(FPKills); + if (isLive(FPReg)) + freeStackSlotAfter(InsertPt, FPReg); + FPKills &= ~(1U << FPReg); } // Don't delete the inline asm! return; } - + case X86::RET: case X86::RETI: // If RET has an FP register use operand, pass the first one in ST(0) and @@ -1489,33 +1707,3 @@ void FPS::handleSpecialFP(MachineBasicBlock::iterator &I) { } else --I; } - -// Translate a COPY instruction to a pseudo-op that handleSpecialFP understands. -bool FPS::translateCopy(MachineInstr *MI) { - unsigned DstReg = MI->getOperand(0).getReg(); - unsigned SrcReg = MI->getOperand(1).getReg(); - - if (DstReg == X86::ST0) { - MI->setDesc(TII->get(X86::FpSET_ST0_80)); - MI->RemoveOperand(0); - return true; - } - if (DstReg == X86::ST1) { - MI->setDesc(TII->get(X86::FpSET_ST1_80)); - MI->RemoveOperand(0); - return true; - } - if (SrcReg == X86::ST0) { - MI->setDesc(TII->get(X86::FpGET_ST0_80)); - return true; - } - if (SrcReg == X86::ST1) { - MI->setDesc(TII->get(X86::FpGET_ST1_80)); - return true; - } - if (X86::RFP80RegClass.contains(DstReg, SrcReg)) { - MI->setDesc(TII->get(X86::MOV_Fp8080)); - return true; - } - return false; -} diff --git a/lib/Target/X86/X86FrameLowering.cpp b/lib/Target/X86/X86FrameLowering.cpp index 2e95300160d8..ed45a9a4c1c0 100644 --- a/lib/Target/X86/X86FrameLowering.cpp +++ b/lib/Target/X86/X86FrameLowering.cpp @@ -1,4 +1,4 @@ -//=======- X86FrameLowering.cpp - X86 Frame Information ------------*- C++ -*-====// +//=======- X86FrameLowering.cpp - X86 Frame Information --------*- C++ -*-====// // // The LLVM Compiler Infrastructure // @@ -23,6 +23,7 @@ #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCSymbol.h" #include "llvm/Target/TargetData.h" #include "llvm/Target/TargetOptions.h" #include "llvm/Support/CommandLine.h" @@ -160,8 +161,10 @@ void emitSPUpdate(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, Opc = isSub ? (Is64Bit ? X86::PUSH64r : X86::PUSH32r) : (Is64Bit ? X86::POP64r : X86::POP32r); - BuildMI(MBB, MBBI, DL, TII.get(Opc)) + MachineInstr *MI = BuildMI(MBB, MBBI, DL, TII.get(Opc)) .addReg(Reg, getDefRegState(!isSub) | getUndefRegState(isSub)); + if (isSub) + MI->setFlag(MachineInstr::FrameSetup); Offset -= ThisVal; continue; } @@ -171,6 +174,8 @@ void emitSPUpdate(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, BuildMI(MBB, MBBI, DL, TII.get(Opc), StackPtr) .addReg(StackPtr) .addImm(ThisVal); + if (isSub) + MI->setFlag(MachineInstr::FrameSetup); MI->getOperand(3).setIsDead(); // The EFLAGS implicit def is dead. Offset -= ThisVal; } @@ -409,7 +414,8 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF) const { TII.get(getSUBriOpcode(Is64Bit, -TailCallReturnAddrDelta)), StackPtr) .addReg(StackPtr) - .addImm(-TailCallReturnAddrDelta); + .addImm(-TailCallReturnAddrDelta) + .setMIFlag(MachineInstr::FrameSetup); MI->getOperand(3).setIsDead(); // The EFLAGS implicit def is dead. } @@ -447,7 +453,8 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF) const { // Save EBP/RBP into the appropriate stack slot. BuildMI(MBB, MBBI, DL, TII.get(Is64Bit ? X86::PUSH64r : X86::PUSH32r)) - .addReg(FramePtr, RegState::Kill); + .addReg(FramePtr, RegState::Kill) + .setMIFlag(MachineInstr::FrameSetup); if (needsFrameMoves) { // Mark the place where EBP/RBP was saved. @@ -474,7 +481,8 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF) const { // Update EBP with the new base value... BuildMI(MBB, MBBI, DL, TII.get(Is64Bit ? X86::MOV64rr : X86::MOV32rr), FramePtr) - .addReg(StackPtr); + .addReg(StackPtr) + .setMIFlag(MachineInstr::FrameSetup); if (needsFrameMoves) { // Mark effective beginning of when frame pointer becomes valid. @@ -642,7 +650,7 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF) const { } void X86FrameLowering::emitEpilogue(MachineFunction &MF, - MachineBasicBlock &MBB) const { + MachineBasicBlock &MBB) const { const MachineFrameInfo *MFI = MF.getFrameInfo(); X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>(); const X86RegisterInfo *RegInfo = TM.getRegisterInfo(); @@ -919,7 +927,8 @@ bool X86FrameLowering::spillCalleeSavedRegisters(MachineBasicBlock &MBB, // X86RegisterInfo::emitPrologue will handle spilling of frame register. continue; CalleeFrameSize += SlotSize; - BuildMI(MBB, MI, DL, TII.get(Opc)).addReg(Reg, RegState::Kill); + BuildMI(MBB, MI, DL, TII.get(Opc)).addReg(Reg, RegState::Kill) + .setMIFlag(MachineInstr::FrameSetup); } X86FI->setCalleeSavedFrameSize(CalleeFrameSize); @@ -1021,3 +1030,181 @@ X86FrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, FrameIdx = 0; } } + +/// permuteEncode - Create the permutation encoding used with frameless +/// stacks. It is passed the number of registers to be saved and an array of the +/// registers saved. +static uint32_t permuteEncode(unsigned SavedCount, unsigned Registers[6]) { + // The saved registers are numbered from 1 to 6. In order to encode the order + // in which they were saved, we re-number them according to their place in the + // register order. The re-numbering is relative to the last re-numbered + // register. E.g., if we have registers {6, 2, 4, 5} saved in that order: + // + // Orig Re-Num + // ---- ------ + // 6 6 + // 2 2 + // 4 3 + // 5 3 + // + bool Used[7] = { false, false, false, false, false, false, false }; + uint32_t RenumRegs[6]; + for (unsigned I = 0; I < SavedCount; ++I) { + uint32_t Renum = 0; + for (unsigned U = 1; U < 7; ++U) { + if (U == Registers[I]) + break; + if (!Used[U]) + ++Renum; + } + + Used[Registers[I]] = true; + RenumRegs[I] = Renum; + } + + // Take the renumbered values and encode them into a 10-bit number. + uint32_t permutationEncoding = 0; + switch (SavedCount) { + case 6: + permutationEncoding |= 120 * RenumRegs[0] + 24 * RenumRegs[1] + + 6 * RenumRegs[2] + 2 * RenumRegs[3] + + RenumRegs[4]; + break; + case 5: + permutationEncoding |= 120 * RenumRegs[0] + 24 * RenumRegs[1] + + 6 * RenumRegs[2] + 2 * RenumRegs[3] + + RenumRegs[4]; + break; + case 4: + permutationEncoding |= 60 * RenumRegs[0] + 12 * RenumRegs[1] + + 3 * RenumRegs[2] + RenumRegs[3]; + break; + case 3: + permutationEncoding |= 20 * RenumRegs[0] + 4 * RenumRegs[1] + + RenumRegs[2]; + break; + case 2: + permutationEncoding |= 5 * RenumRegs[0] + RenumRegs[1]; + break; + case 1: + permutationEncoding |= RenumRegs[0]; + break; + } + + return permutationEncoding; +} + +uint32_t X86FrameLowering:: +getCompactUnwindEncoding(ArrayRef<MCCFIInstruction> Instrs, + int DataAlignmentFactor, bool IsEH) const { + uint32_t Encoding = 0; + int CFAOffset = 0; + const TargetRegisterInfo *TRI = TM.getRegisterInfo(); + unsigned SavedRegs[6] = { 0, 0, 0, 0, 0, 0 }; + unsigned SavedRegIdx = 0; + int FramePointerReg = -1; + + for (ArrayRef<MCCFIInstruction>::const_iterator + I = Instrs.begin(), E = Instrs.end(); I != E; ++I) { + const MCCFIInstruction &Inst = *I; + MCSymbol *Label = Inst.getLabel(); + + // Ignore invalid labels. + if (Label && !Label->isDefined()) continue; + + unsigned Operation = Inst.getOperation(); + if (Operation != MCCFIInstruction::Move && + Operation != MCCFIInstruction::RelMove) + // FIXME: We can't handle this frame just yet. + return 0; + + const MachineLocation &Dst = Inst.getDestination(); + const MachineLocation &Src = Inst.getSource(); + const bool IsRelative = (Operation == MCCFIInstruction::RelMove); + + if (Dst.isReg() && Dst.getReg() == MachineLocation::VirtualFP) { + if (Src.getReg() != MachineLocation::VirtualFP) { + // DW_CFA_def_cfa + assert(FramePointerReg == -1 &&"Defining more than one frame pointer?"); + if (TRI->getLLVMRegNum(Src.getReg(), IsEH) != X86::EBP && + TRI->getLLVMRegNum(Src.getReg(), IsEH) != X86::RBP) + // The frame pointer isn't EBP/RBP. Cannot make unwind information + // compact. + return 0; + FramePointerReg = TRI->getCompactUnwindRegNum(Src.getReg(), IsEH); + } // else DW_CFA_def_cfa_offset + + if (IsRelative) + CFAOffset += Src.getOffset(); + else + CFAOffset -= Src.getOffset(); + + continue; + } + + if (Src.isReg() && Src.getReg() == MachineLocation::VirtualFP) { + // DW_CFA_def_cfa_register + assert(FramePointerReg == -1 && "Defining more than one frame pointer?"); + + if (TRI->getLLVMRegNum(Dst.getReg(), IsEH) != X86::EBP && + TRI->getLLVMRegNum(Dst.getReg(), IsEH) != X86::RBP) + // The frame pointer isn't EBP/RBP. Cannot make unwind information + // compact. + return 0; + + FramePointerReg = TRI->getCompactUnwindRegNum(Dst.getReg(), IsEH); + if (SavedRegIdx != 1 || SavedRegs[0] != unsigned(FramePointerReg)) + return 0; + + SavedRegs[0] = 0; + SavedRegIdx = 0; + continue; + } + + unsigned Reg = Src.getReg(); + int Offset = Dst.getOffset(); + if (IsRelative) + Offset -= CFAOffset; + Offset /= DataAlignmentFactor; + + if (Offset < 0) { + // FIXME: Handle? + // DW_CFA_offset_extended_sf + return 0; + } else if (Reg < 64) { + // DW_CFA_offset + Reg + if (SavedRegIdx >= 6) return 0; + int CURegNum = TRI->getCompactUnwindRegNum(Reg, IsEH); + if (CURegNum == -1) return 0; + SavedRegs[SavedRegIdx++] = CURegNum; + } else { + // FIXME: Handle? + // DW_CFA_offset_extended + return 0; + } + } + + // Bail if there are too many registers to encode. + if (SavedRegIdx > 6) return 0; + + // Check if the offset is too big. + CFAOffset /= 4; + if ((CFAOffset & 0xFF) != CFAOffset) + return 0; + Encoding |= (CFAOffset & 0xFF) << 16; // Size encoding. + + if (FramePointerReg != -1) { + Encoding |= 0x01000000; // EBP/RBP Unwind Frame + for (unsigned I = 0; I != SavedRegIdx; ++I) { + unsigned Reg = SavedRegs[I]; + if (Reg == unsigned(FramePointerReg)) continue; + Encoding |= (Reg & 0x7) << (I * 3); // Register encoding + } + } else { + Encoding |= 0x02000000; // Frameless unwind with small stack + Encoding |= (SavedRegIdx & 0x7) << 10; + Encoding |= permuteEncode(SavedRegIdx, SavedRegs); + } + + return Encoding; +} diff --git a/lib/Target/X86/X86FrameLowering.h b/lib/Target/X86/X86FrameLowering.h index d71108cd0586..14c31ed47cf1 100644 --- a/lib/Target/X86/X86FrameLowering.h +++ b/lib/Target/X86/X86FrameLowering.h @@ -15,6 +15,7 @@ #define X86_FRAMELOWERING_H #include "X86Subtarget.h" +#include "llvm/MC/MCDwarf.h" #include "llvm/Target/TargetFrameLowering.h" namespace llvm { @@ -58,6 +59,9 @@ public: void getInitialFrameState(std::vector<MachineMove> &Moves) const; int getFrameIndexOffset(const MachineFunction &MF, int FI) const; + + uint32_t getCompactUnwindEncoding(ArrayRef<MCCFIInstruction> Instrs, + int DataAlignmentFactor, bool IsEH) const; }; } // End llvm namespace diff --git a/lib/Target/X86/X86ISelDAGToDAG.cpp b/lib/Target/X86/X86ISelDAGToDAG.cpp index 1fcc274e0f85..2b0f283bec75 100644 --- a/lib/Target/X86/X86ISelDAGToDAG.cpp +++ b/lib/Target/X86/X86ISelDAGToDAG.cpp @@ -23,6 +23,7 @@ #include "llvm/Intrinsics.h" #include "llvm/Support/CFG.h" #include "llvm/Type.h" +#include "llvm/CodeGen/FunctionLoweringInfo.h" #include "llvm/CodeGen/MachineConstantPool.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFrameInfo.h" @@ -191,6 +192,7 @@ namespace { SDNode *SelectAtomicLoadAdd(SDNode *Node, EVT NVT); SDNode *SelectAtomicLoadArith(SDNode *Node, EVT NVT); + bool FoldOffsetIntoAddress(uint64_t Offset, X86ISelAddressMode &AM); bool MatchLoadInAddress(LoadSDNode *N, X86ISelAddressMode &AM); bool MatchWrapper(SDValue N, X86ISelAddressMode &AM); bool MatchAddress(SDValue N, X86ISelAddressMode &AM); @@ -546,6 +548,34 @@ void X86DAGToDAGISel::EmitFunctionEntryCode() { EmitSpecialCodeForMain(MF->begin(), MF->getFrameInfo()); } +static bool isDispSafeForFrameIndex(int64_t Val) { + // On 64-bit platforms, we can run into an issue where a frame index + // includes a displacement that, when added to the explicit displacement, + // will overflow the displacement field. Assuming that the frame index + // displacement fits into a 31-bit integer (which is only slightly more + // aggressive than the current fundamental assumption that it fits into + // a 32-bit integer), a 31-bit disp should always be safe. + return isInt<31>(Val); +} + +bool X86DAGToDAGISel::FoldOffsetIntoAddress(uint64_t Offset, + X86ISelAddressMode &AM) { + int64_t Val = AM.Disp + Offset; + CodeModel::Model M = TM.getCodeModel(); + if (Subtarget->is64Bit()) { + if (!X86::isOffsetSuitableForCodeModel(Val, M, + AM.hasSymbolicDisplacement())) + return true; + // In addition to the checks required for a register base, check that + // we do not try to use an unsafe Disp with a frame index. + if (AM.BaseType == X86ISelAddressMode::FrameIndexBase && + !isDispSafeForFrameIndex(Val)) + return true; + } + AM.Disp = Val; + return false; + +} bool X86DAGToDAGISel::MatchLoadInAddress(LoadSDNode *N, X86ISelAddressMode &AM){ SDValue Address = N->getOperand(1); @@ -595,18 +625,22 @@ bool X86DAGToDAGISel::MatchWrapper(SDValue N, X86ISelAddressMode &AM) { // must allow RIP. !AM.hasBaseOrIndexReg() && N.getOpcode() == X86ISD::WrapperRIP) { if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(N0)) { - int64_t Offset = AM.Disp + G->getOffset(); - if (!X86::isOffsetSuitableForCodeModel(Offset, M)) return true; + X86ISelAddressMode Backup = AM; AM.GV = G->getGlobal(); - AM.Disp = Offset; AM.SymbolFlags = G->getTargetFlags(); + if (FoldOffsetIntoAddress(G->getOffset(), AM)) { + AM = Backup; + return true; + } } else if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(N0)) { - int64_t Offset = AM.Disp + CP->getOffset(); - if (!X86::isOffsetSuitableForCodeModel(Offset, M)) return true; + X86ISelAddressMode Backup = AM; AM.CP = CP->getConstVal(); AM.Align = CP->getAlignment(); - AM.Disp = Offset; AM.SymbolFlags = CP->getTargetFlags(); + if (FoldOffsetIntoAddress(CP->getOffset(), AM)) { + AM = Backup; + return true; + } } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(N0)) { AM.ES = S->getSymbol(); AM.SymbolFlags = S->getTargetFlags(); @@ -688,7 +722,6 @@ bool X86DAGToDAGISel::MatchAddress(SDValue N, X86ISelAddressMode &AM) { bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM, unsigned Depth) { - bool is64Bit = Subtarget->is64Bit(); DebugLoc dl = N.getDebugLoc(); DEBUG({ dbgs() << "MatchAddress: "; @@ -698,8 +731,6 @@ bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM, if (Depth > 5) return MatchAddressBase(N, AM); - CodeModel::Model M = TM.getCodeModel(); - // If this is already a %rip relative address, we can only merge immediates // into it. Instead of handling this in every case, we handle it here. // RIP relative addressing: %rip + 32-bit displacement! @@ -709,14 +740,9 @@ bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM, // consistency. if (!AM.ES && AM.JT != -1) return true; - if (ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(N)) { - int64_t Val = AM.Disp + Cst->getSExtValue(); - if (X86::isOffsetSuitableForCodeModel(Val, M, - AM.hasSymbolicDisplacement())) { - AM.Disp = Val; + if (ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(N)) + if (!FoldOffsetIntoAddress(Cst->getSExtValue(), AM)) return false; - } - } return true; } @@ -724,12 +750,8 @@ bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM, default: break; case ISD::Constant: { uint64_t Val = cast<ConstantSDNode>(N)->getSExtValue(); - if (!is64Bit || - X86::isOffsetSuitableForCodeModel(AM.Disp + Val, M, - AM.hasSymbolicDisplacement())) { - AM.Disp += Val; + if (!FoldOffsetIntoAddress(Val, AM)) return false; - } break; } @@ -745,8 +767,9 @@ bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM, break; case ISD::FrameIndex: - if (AM.BaseType == X86ISelAddressMode::RegBase - && AM.Base_Reg.getNode() == 0) { + if (AM.BaseType == X86ISelAddressMode::RegBase && + AM.Base_Reg.getNode() == 0 && + (!Subtarget->is64Bit() || isDispSafeForFrameIndex(AM.Disp))) { AM.BaseType = X86ISelAddressMode::FrameIndexBase; AM.Base_FrameIndex = cast<FrameIndexSDNode>(N)->getIndex(); return false; @@ -775,16 +798,12 @@ bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM, AM.IndexReg = ShVal.getNode()->getOperand(0); ConstantSDNode *AddVal = cast<ConstantSDNode>(ShVal.getNode()->getOperand(1)); - uint64_t Disp = AM.Disp + (AddVal->getSExtValue() << Val); - if (!is64Bit || - X86::isOffsetSuitableForCodeModel(Disp, M, - AM.hasSymbolicDisplacement())) - AM.Disp = Disp; - else - AM.IndexReg = ShVal; - } else { - AM.IndexReg = ShVal; + uint64_t Disp = AddVal->getSExtValue() << Val; + if (!FoldOffsetIntoAddress(Disp, AM)) + return false; } + + AM.IndexReg = ShVal; return false; } break; @@ -818,13 +837,8 @@ bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM, Reg = MulVal.getNode()->getOperand(0); ConstantSDNode *AddVal = cast<ConstantSDNode>(MulVal.getNode()->getOperand(1)); - uint64_t Disp = AM.Disp + AddVal->getSExtValue() * - CN->getZExtValue(); - if (!is64Bit || - X86::isOffsetSuitableForCodeModel(Disp, M, - AM.hasSymbolicDisplacement())) - AM.Disp = Disp; - else + uint64_t Disp = AddVal->getSExtValue() * CN->getZExtValue(); + if (FoldOffsetIntoAddress(Disp, AM)) Reg = N.getNode()->getOperand(0); } else { Reg = N.getNode()->getOperand(0); @@ -949,19 +963,11 @@ bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM, if (CurDAG->isBaseWithConstantOffset(N)) { X86ISelAddressMode Backup = AM; ConstantSDNode *CN = cast<ConstantSDNode>(N.getOperand(1)); - uint64_t Offset = CN->getSExtValue(); // Start with the LHS as an addr mode. if (!MatchAddressRecursively(N.getOperand(0), AM, Depth+1) && - // Address could not have picked a GV address for the displacement. - AM.GV == NULL && - // On x86-64, the resultant disp must fit in 32-bits. - (!is64Bit || - X86::isOffsetSuitableForCodeModel(AM.Disp + Offset, M, - AM.hasSymbolicDisplacement()))) { - AM.Disp += Offset; + !FoldOffsetIntoAddress(CN->getSExtValue(), AM)) return false; - } AM = Backup; } break; @@ -1351,7 +1357,7 @@ SDNode *X86DAGToDAGISel::SelectAtomicLoadAdd(SDNode *Node, EVT NVT) { bool isInc = false, isDec = false, isSub = false, isCN = false; ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Val); - if (CN) { + if (CN && CN->getSExtValue() == (int32_t)CN->getSExtValue()) { isCN = true; int64_t CNVal = CN->getSExtValue(); if (CNVal == 1) @@ -1371,6 +1377,7 @@ SDNode *X86DAGToDAGISel::SelectAtomicLoadAdd(SDNode *Node, EVT NVT) { Val = Val.getOperand(1); } + DebugLoc dl = Node->getDebugLoc(); unsigned Opc = 0; switch (NVT.getSimpleVT().SimpleTy) { default: return 0; @@ -1462,7 +1469,6 @@ SDNode *X86DAGToDAGISel::SelectAtomicLoadAdd(SDNode *Node, EVT NVT) { break; } - DebugLoc dl = Node->getDebugLoc(); SDValue Undef = SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, NVT), 0); MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1); @@ -1579,7 +1585,7 @@ SDNode *X86DAGToDAGISel::SelectAtomicLoadArith(SDNode *Node, EVT NVT) { bool isCN = false; ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Val); - if (CN) { + if (CN && (int32_t)CN->getSExtValue() == CN->getSExtValue()) { isCN = true; Val = CurDAG->getTargetConstant(CN->getSExtValue(), NVT); } @@ -1612,16 +1618,18 @@ SDNode *X86DAGToDAGISel::SelectAtomicLoadArith(SDNode *Node, EVT NVT) { Opc = AtomicOpcTbl[Op][I32]; break; case MVT::i64: + Opc = AtomicOpcTbl[Op][I64]; if (isCN) { if (immSext8(Val.getNode())) Opc = AtomicOpcTbl[Op][SextConstantI64]; else if (i64immSExt32(Val.getNode())) Opc = AtomicOpcTbl[Op][ConstantI64]; - } else - Opc = AtomicOpcTbl[Op][I64]; + } break; } + assert(Opc != 0 && "Invalid arith lock transform!"); + DebugLoc dl = Node->getDebugLoc(); SDValue Undef = SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, NVT), 0); diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 294a6a74cc77..5096d9ae2edf 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -235,10 +235,16 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) // Setup Windows compiler runtime calls. setLibcallName(RTLIB::SDIV_I64, "_alldiv"); setLibcallName(RTLIB::UDIV_I64, "_aulldiv"); + setLibcallName(RTLIB::SREM_I64, "_allrem"); + setLibcallName(RTLIB::UREM_I64, "_aullrem"); + setLibcallName(RTLIB::MUL_I64, "_allmul"); setLibcallName(RTLIB::FPTOUINT_F64_I64, "_ftol2"); setLibcallName(RTLIB::FPTOUINT_F32_I64, "_ftol2"); setLibcallCallingConv(RTLIB::SDIV_I64, CallingConv::X86_StdCall); setLibcallCallingConv(RTLIB::UDIV_I64, CallingConv::X86_StdCall); + setLibcallCallingConv(RTLIB::SREM_I64, CallingConv::X86_StdCall); + setLibcallCallingConv(RTLIB::UREM_I64, CallingConv::X86_StdCall); + setLibcallCallingConv(RTLIB::MUL_I64, CallingConv::X86_StdCall); setLibcallCallingConv(RTLIB::FPTOUINT_F64_I64, CallingConv::C); setLibcallCallingConv(RTLIB::FPTOUINT_F32_I64, CallingConv::C); } @@ -646,6 +652,10 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) addLegalFPImmediate(APFloat(-1.0f)); // FLD1/FCHS } + // We don't support FMA. + setOperationAction(ISD::FMA, MVT::f64, Expand); + setOperationAction(ISD::FMA, MVT::f32, Expand); + // Long double always uses X87. if (!UseSoftFloat) { addRegisterClass(MVT::f80, X86::RFP80RegisterClass); @@ -670,6 +680,8 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) setOperationAction(ISD::FSIN , MVT::f80 , Expand); setOperationAction(ISD::FCOS , MVT::f80 , Expand); } + + setOperationAction(ISD::FMA, MVT::f80, Expand); } // Always use a library call for pow. @@ -976,7 +988,6 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) addRegisterClass(MVT::v32i8, X86::VR256RegisterClass); setOperationAction(ISD::LOAD, MVT::v8f32, Legal); - setOperationAction(ISD::LOAD, MVT::v8i32, Legal); setOperationAction(ISD::LOAD, MVT::v4f64, Legal); setOperationAction(ISD::LOAD, MVT::v4i64, Legal); @@ -994,63 +1005,58 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) setOperationAction(ISD::FSQRT, MVT::v4f64, Legal); setOperationAction(ISD::FNEG, MVT::v4f64, Custom); - // Custom lower build_vector, vector_shuffle, scalar_to_vector, - // insert_vector_elt extract_subvector and extract_vector_elt for - // 256-bit types. - for (unsigned i = (unsigned)MVT::FIRST_VECTOR_VALUETYPE; - i <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; - ++i) { - MVT::SimpleValueType VT = (MVT::SimpleValueType)i; - // Do not attempt to custom lower non-256-bit vectors - if (!isPowerOf2_32(MVT(VT).getVectorNumElements()) - || (MVT(VT).getSizeInBits() < 256)) - continue; - setOperationAction(ISD::BUILD_VECTOR, VT, Custom); - setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom); - setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom); - setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom); - setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom); - } - // Custom-lower insert_subvector and extract_subvector based on - // the result type. + // Custom lower several nodes for 256-bit types. for (unsigned i = (unsigned)MVT::FIRST_VECTOR_VALUETYPE; - i <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; - ++i) { - MVT::SimpleValueType VT = (MVT::SimpleValueType)i; - // Do not attempt to custom lower non-256-bit vectors - if (!isPowerOf2_32(MVT(VT).getVectorNumElements())) + i <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++i) { + MVT::SimpleValueType SVT = (MVT::SimpleValueType)i; + EVT VT = SVT; + + // Extract subvector is special because the value type + // (result) is 128-bit but the source is 256-bit wide. + if (VT.is128BitVector()) + setOperationAction(ISD::EXTRACT_SUBVECTOR, SVT, Custom); + + // Do not attempt to custom lower other non-256-bit vectors + if (!VT.is256BitVector()) continue; - if (MVT(VT).getSizeInBits() == 128) { - setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom); - } - else if (MVT(VT).getSizeInBits() == 256) { - setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom); - } + setOperationAction(ISD::BUILD_VECTOR, SVT, Custom); + setOperationAction(ISD::VECTOR_SHUFFLE, SVT, Custom); + setOperationAction(ISD::INSERT_VECTOR_ELT, SVT, Custom); + setOperationAction(ISD::EXTRACT_VECTOR_ELT, SVT, Custom); + setOperationAction(ISD::SCALAR_TO_VECTOR, SVT, Custom); + setOperationAction(ISD::INSERT_SUBVECTOR, SVT, Custom); } // Promote v32i8, v16i16, v8i32 select, and, or, xor to v4i64. - // Don't promote loads because we need them for VPERM vector index versions. + for (unsigned i = (unsigned)MVT::v32i8; i != (unsigned)MVT::v4i64; ++i) { + MVT::SimpleValueType SVT = (MVT::SimpleValueType)i; + EVT VT = SVT; - for (unsigned VT = (unsigned)MVT::FIRST_VECTOR_VALUETYPE; - VT != (unsigned)MVT::LAST_VECTOR_VALUETYPE; - VT++) { - if (!isPowerOf2_32(MVT((MVT::SimpleValueType)VT).getVectorNumElements()) - || (MVT((MVT::SimpleValueType)VT).getSizeInBits() < 256)) + // Do not attempt to promote non-256-bit vectors + if (!VT.is256BitVector()) continue; - setOperationAction(ISD::AND, (MVT::SimpleValueType)VT, Promote); - AddPromotedToType (ISD::AND, (MVT::SimpleValueType)VT, MVT::v4i64); - setOperationAction(ISD::OR, (MVT::SimpleValueType)VT, Promote); - AddPromotedToType (ISD::OR, (MVT::SimpleValueType)VT, MVT::v4i64); - setOperationAction(ISD::XOR, (MVT::SimpleValueType)VT, Promote); - AddPromotedToType (ISD::XOR, (MVT::SimpleValueType)VT, MVT::v4i64); - //setOperationAction(ISD::LOAD, (MVT::SimpleValueType)VT, Promote); - //AddPromotedToType (ISD::LOAD, (MVT::SimpleValueType)VT, MVT::v4i64); - setOperationAction(ISD::SELECT, (MVT::SimpleValueType)VT, Promote); - AddPromotedToType (ISD::SELECT, (MVT::SimpleValueType)VT, MVT::v4i64); + + setOperationAction(ISD::AND, SVT, Promote); + AddPromotedToType (ISD::AND, SVT, MVT::v4i64); + setOperationAction(ISD::OR, SVT, Promote); + AddPromotedToType (ISD::OR, SVT, MVT::v4i64); + setOperationAction(ISD::XOR, SVT, Promote); + AddPromotedToType (ISD::XOR, SVT, MVT::v4i64); + setOperationAction(ISD::LOAD, SVT, Promote); + AddPromotedToType (ISD::LOAD, SVT, MVT::v4i64); + setOperationAction(ISD::SELECT, SVT, Promote); + AddPromotedToType (ISD::SELECT, SVT, MVT::v4i64); } } + // SIGN_EXTEND_INREGs are evaluated by the extend type. Handle the expansion + // of this type with custom code. + for (unsigned VT = (unsigned)MVT::FIRST_VECTOR_VALUETYPE; + VT != (unsigned)MVT::LAST_VECTOR_VALUETYPE; VT++) { + setOperationAction(ISD::SIGN_EXTEND_INREG, (MVT::SimpleValueType)VT, Custom); + } + // We want to custom lower some of our intrinsics. setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); @@ -1511,20 +1517,15 @@ X86TargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag, // If this is a call to a function that returns an fp value on the floating // point stack, we must guarantee the the value is popped from the stack, so // a CopyFromReg is not good enough - the copy instruction may be eliminated - // if the return value is not used. We use the FpGET_ST0 instructions + // if the return value is not used. We use the FpPOP_RETVAL instruction // instead. if (VA.getLocReg() == X86::ST0 || VA.getLocReg() == X86::ST1) { // If we prefer to use the value in xmm registers, copy it out as f80 and // use a truncate to move it from fp stack reg to xmm reg. if (isScalarFPTypeInSSEReg(VA.getValVT())) CopyVT = MVT::f80; - bool isST0 = VA.getLocReg() == X86::ST0; - unsigned Opc = 0; - if (CopyVT == MVT::f32) Opc = isST0 ? X86::FpGET_ST0_32:X86::FpGET_ST1_32; - if (CopyVT == MVT::f64) Opc = isST0 ? X86::FpGET_ST0_64:X86::FpGET_ST1_64; - if (CopyVT == MVT::f80) Opc = isST0 ? X86::FpGET_ST0_80:X86::FpGET_ST1_80; SDValue Ops[] = { Chain, InFlag }; - Chain = SDValue(DAG.getMachineNode(Opc, dl, CopyVT, MVT::Other, MVT::Glue, - Ops, 2), 1); + Chain = SDValue(DAG.getMachineNode(X86::FpPOP_RETVAL, dl, CopyVT, + MVT::Other, MVT::Glue, Ops, 2), 1); Val = Chain.getValue(0); // Round the f80 to the right size, which also moves it to the appropriate @@ -1898,7 +1899,7 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain, } // Some CCs need callee pop. - if (Subtarget->IsCalleePop(isVarArg, CallConv)) { + if (X86::isCalleePop(CallConv, Is64Bit, isVarArg, GuaranteedTailCallOpt)) { FuncInfo->setBytesToPopOnReturn(StackSize); // Callee pops everything. } else { FuncInfo->setBytesToPopOnReturn(0); // Callee pops nothing. @@ -2271,6 +2272,8 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee, const GlobalValue *GV = G->getGlobal(); if (!GV->hasDLLImportLinkage()) { unsigned char OpFlags = 0; + bool ExtraLoad = false; + unsigned WrapperKind = ISD::DELETED_NODE; // On ELF targets, in both X86-64 and X86-32 mode, direct calls to // external symbols most go through the PLT in PIC mode. If the symbol @@ -2288,10 +2291,28 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee, // unless we're building with the leopard linker or later, which // automatically synthesizes these stubs. OpFlags = X86II::MO_DARWIN_STUB; + } else if (Subtarget->isPICStyleRIPRel() && + isa<Function>(GV) && + cast<Function>(GV)->hasFnAttr(Attribute::NonLazyBind)) { + // If the function is marked as non-lazy, generate an indirect call + // which loads from the GOT directly. This avoids runtime overhead + // at the cost of eager binding (and one extra byte of encoding). + OpFlags = X86II::MO_GOTPCREL; + WrapperKind = X86ISD::WrapperRIP; + ExtraLoad = true; } Callee = DAG.getTargetGlobalAddress(GV, dl, getPointerTy(), G->getOffset(), OpFlags); + + // Add a wrapper if needed. + if (WrapperKind != ISD::DELETED_NODE) + Callee = DAG.getNode(X86ISD::WrapperRIP, dl, getPointerTy(), Callee); + // Add extra indirection if needed. + if (ExtraLoad) + Callee = DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(), Callee, + MachinePointerInfo::getGOT(), + false, false, 0); } } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) { unsigned char OpFlags = 0; @@ -2363,7 +2384,7 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee, // Create the CALLSEQ_END node. unsigned NumBytesForCalleeToPush; - if (Subtarget->IsCalleePop(isVarArg, CallConv)) + if (X86::isCalleePop(CallConv, Is64Bit, isVarArg, GuaranteedTailCallOpt)) NumBytesForCalleeToPush = NumBytes; // Callee pops everything else if (!Is64Bit && !IsTailCallConvention(CallConv) && IsStructRet) // If this is a call to a struct-return function, the callee @@ -2485,6 +2506,10 @@ bool MatchingStackOffset(SDValue Arg, unsigned Offset, ISD::ArgFlagsTy Flags, if (!FINode) return false; FI = FINode->getIndex(); + } else if (Arg.getOpcode() == ISD::FrameIndex && Flags.isByVal()) { + FrameIndexSDNode *FINode = cast<FrameIndexSDNode>(Arg); + FI = FINode->getIndex(); + Bytes = Flags.getByValSize(); } else return false; @@ -2536,6 +2561,11 @@ X86TargetLowering::IsEligibleForTailCallOptimization(SDValue Callee, if (isCalleeStructRet || isCallerStructRet) return false; + // An stdcall caller is expected to clean up its arguments; the callee + // isn't going to do that. + if (!CCMatch && CallerCC==CallingConv::X86_StdCall) + return false; + // Do not sibcall optimize vararg calls unless all arguments are passed via // registers. if (isVarArg && !Outs.empty()) { @@ -2672,11 +2702,6 @@ X86TargetLowering::IsEligibleForTailCallOptimization(SDValue Callee, } } - // An stdcall caller is expected to clean up its arguments; the callee - // isn't going to do that. - if (!CCMatch && CallerCC==CallingConv::X86_StdCall) - return false; - return true; } @@ -2856,6 +2881,29 @@ bool X86::isOffsetSuitableForCodeModel(int64_t Offset, CodeModel::Model M, return false; } +/// isCalleePop - Determines whether the callee is required to pop its +/// own arguments. Callee pop is necessary to support tail calls. +bool X86::isCalleePop(CallingConv::ID CallingConv, + bool is64Bit, bool IsVarArg, bool TailCallOpt) { + if (IsVarArg) + return false; + + switch (CallingConv) { + default: + return false; + case CallingConv::X86_StdCall: + return !is64Bit; + case CallingConv::X86_FastCall: + return !is64Bit; + case CallingConv::X86_ThisCall: + return !is64Bit; + case CallingConv::Fast: + return TailCallOpt; + case CallingConv::GHC: + return TailCallOpt; + } +} + /// TranslateX86CC - do a one to one translation of a ISD::CondCode to the X86 /// specific condition code, returning the condition code and the LHS/RHS of the /// comparison to make. @@ -3790,19 +3838,24 @@ static SDValue getZeroVector(EVT VT, bool HasSSE2, SelectionDAG &DAG, } /// getOnesVector - Returns a vector of specified type with all bits set. -/// +/// Always build ones vectors as <4 x i32> or <8 x i32> bitcasted to +/// their original type, ensuring they get CSE'd. static SDValue getOnesVector(EVT VT, SelectionDAG &DAG, DebugLoc dl) { assert(VT.isVector() && "Expected a vector type"); + assert((VT.is128BitVector() || VT.is256BitVector()) + && "Expected a 128-bit or 256-bit vector type"); - // Always build ones vectors as <4 x i32> or <2 x i32> bitcasted to their dest - // type. This ensures they get CSE'd. SDValue Cst = DAG.getTargetConstant(~0U, MVT::i32); + SDValue Vec; - Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, Cst, Cst, Cst, Cst); + if (VT.is256BitVector()) { + SDValue Ops[] = { Cst, Cst, Cst, Cst, Cst, Cst, Cst, Cst }; + Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v8i32, Ops, 8); + } else + Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, Cst, Cst, Cst, Cst); return DAG.getNode(ISD::BITCAST, dl, VT, Vec); } - /// NormalizeMask - V2 is a splat, modify the mask (if needed) so all elements /// that point to V2 points to its first element. static SDValue NormalizeMask(ShuffleVectorSDNode *SVOp, SelectionDAG &DAG) { @@ -4417,17 +4470,17 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const { return ConcatVectors(Lower, Upper, DAG); } - // All zero's are handled with pxor in SSE2 and above, xorps in SSE1. - // All one's are handled with pcmpeqd. In AVX, zero's are handled with - // vpxor in 128-bit and xor{pd,ps} in 256-bit, but no 256 version of pcmpeqd - // is present, so AllOnes is ignored. + // All zero's: + // - pxor (SSE2), xorps (SSE1), vpxor (128 AVX), xorp[s|d] (256 AVX) + // All one's: + // - pcmpeqd (SSE2 and 128 AVX), fallback to constant pools (256 AVX) if (ISD::isBuildVectorAllZeros(Op.getNode()) || - (Op.getValueType().getSizeInBits() != 256 && - ISD::isBuildVectorAllOnes(Op.getNode()))) { - // Canonicalize this to <4 x i32> (SSE) to + ISD::isBuildVectorAllOnes(Op.getNode())) { + // Canonicalize this to <4 x i32> or <8 x 32> (SSE) to // 1) ensure the zero vectors are CSE'd, and 2) ensure that i64 scalars are // eliminated on x86-32 hosts. - if (Op.getValueType() == MVT::v4i32) + if (Op.getValueType() == MVT::v4i32 || + Op.getValueType() == MVT::v8i32) return Op; if (ISD::isBuildVectorAllOnes(Op.getNode())) @@ -8874,8 +8927,8 @@ SDValue X86TargetLowering::LowerShift(SDValue Op, SelectionDAG &DAG) const { } // Lower SHL with variable shift amount. - // Cannot lower SHL without SSE4.1 or later. - if (!Subtarget->hasSSE41()) return SDValue(); + // Cannot lower SHL without SSE2 or later. + if (!Subtarget->hasSSE2()) return SDValue(); if (VT == MVT::v4i32 && Op->getOpcode() == ISD::SHL) { Op = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, @@ -9022,13 +9075,66 @@ SDValue X86TargetLowering::LowerXALUO(SDValue Op, SelectionDAG &DAG) const { return Sum; } +SDValue X86TargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op, SelectionDAG &DAG) const{ + DebugLoc dl = Op.getDebugLoc(); + SDNode* Node = Op.getNode(); + EVT ExtraVT = cast<VTSDNode>(Node->getOperand(1))->getVT(); + EVT VT = Node->getValueType(0); + + if (Subtarget->hasSSE2() && VT.isVector()) { + unsigned BitsDiff = VT.getScalarType().getSizeInBits() - + ExtraVT.getScalarType().getSizeInBits(); + SDValue ShAmt = DAG.getConstant(BitsDiff, MVT::i32); + + unsigned SHLIntrinsicsID = 0; + unsigned SRAIntrinsicsID = 0; + switch (VT.getSimpleVT().SimpleTy) { + default: + return SDValue(); + case MVT::v2i64: { + SHLIntrinsicsID = Intrinsic::x86_sse2_pslli_q; + SRAIntrinsicsID = 0; + break; + } + case MVT::v4i32: { + SHLIntrinsicsID = Intrinsic::x86_sse2_pslli_d; + SRAIntrinsicsID = Intrinsic::x86_sse2_psrai_d; + break; + } + case MVT::v8i16: { + SHLIntrinsicsID = Intrinsic::x86_sse2_pslli_w; + SRAIntrinsicsID = Intrinsic::x86_sse2_psrai_w; + break; + } + } + + SDValue Tmp1 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, + DAG.getConstant(SHLIntrinsicsID, MVT::i32), + Node->getOperand(0), ShAmt); + + // In case of 1 bit sext, no need to shr + if (ExtraVT.getScalarType().getSizeInBits() == 1) return Tmp1; + + if (SRAIntrinsicsID) { + Tmp1 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, + DAG.getConstant(SRAIntrinsicsID, MVT::i32), + Tmp1, ShAmt); + } + return Tmp1; + } + + return SDValue(); +} + + SDValue X86TargetLowering::LowerMEMBARRIER(SDValue Op, SelectionDAG &DAG) const{ DebugLoc dl = Op.getDebugLoc(); - if (!Subtarget->hasSSE2()) { + // Go ahead and emit the fence on x86-64 even if we asked for no-sse2. + // There isn't any reason to disable it if the target processor supports it. + if (!Subtarget->hasSSE2() && !Subtarget->is64Bit()) { SDValue Chain = Op.getOperand(0); - SDValue Zero = DAG.getConstant(0, - Subtarget->is64Bit() ? MVT::i64 : MVT::i32); + SDValue Zero = DAG.getConstant(0, MVT::i32); SDValue Ops[] = { DAG.getRegister(X86::ESP, MVT::i32), // Base DAG.getTargetConstant(1, MVT::i8), // Scale @@ -9183,6 +9289,7 @@ static SDValue LowerADDC_ADDE_SUBC_SUBE(SDValue Op, SelectionDAG &DAG) { SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { switch (Op.getOpcode()) { default: llvm_unreachable("Should not custom lower this!"); + case ISD::SIGN_EXTEND_INREG: return LowerSIGN_EXTEND_INREG(Op,DAG); case ISD::MEMBARRIER: return LowerMEMBARRIER(Op,DAG); case ISD::ATOMIC_CMP_SWAP: return LowerCMP_SWAP(Op,DAG); case ISD::ATOMIC_LOAD_SUB: return LowerLOAD_SUB(Op,DAG); @@ -9281,6 +9388,7 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N, default: assert(false && "Do not know how to custom type legalize this operation!"); return; + case ISD::SIGN_EXTEND_INREG: case ISD::ADDC: case ISD::ADDE: case ISD::SUBC: @@ -9415,7 +9523,7 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const { case X86ISD::PINSRB: return "X86ISD::PINSRB"; case X86ISD::PINSRW: return "X86ISD::PINSRW"; case X86ISD::PSHUFB: return "X86ISD::PSHUFB"; - case X86ISD::PANDN: return "X86ISD::PANDN"; + case X86ISD::ANDNP: return "X86ISD::ANDNP"; case X86ISD::PSIGNB: return "X86ISD::PSIGNB"; case X86ISD::PSIGNW: return "X86ISD::PSIGNW"; case X86ISD::PSIGND: return "X86ISD::PSIGND"; @@ -11766,10 +11874,12 @@ static SDValue PerformAndCombine(SDNode *N, SelectionDAG &DAG, if (R.getNode()) return R; - // Want to form PANDN nodes, in the hopes of then easily combining them with - // OR and AND nodes to form PBLEND/PSIGN. + // Want to form ANDNP nodes: + // 1) In the hopes of then easily combining them with OR and AND nodes + // to form PBLEND/PSIGN. + // 2) To match ANDN packed intrinsics EVT VT = N->getValueType(0); - if (VT != MVT::v2i64) + if (VT != MVT::v2i64 && VT != MVT::v4i64) return SDValue(); SDValue N0 = N->getOperand(0); @@ -11779,12 +11889,12 @@ static SDValue PerformAndCombine(SDNode *N, SelectionDAG &DAG, // Check LHS for vnot if (N0.getOpcode() == ISD::XOR && ISD::isBuildVectorAllOnes(N0.getOperand(1).getNode())) - return DAG.getNode(X86ISD::PANDN, DL, VT, N0.getOperand(0), N1); + return DAG.getNode(X86ISD::ANDNP, DL, VT, N0.getOperand(0), N1); // Check RHS for vnot if (N1.getOpcode() == ISD::XOR && ISD::isBuildVectorAllOnes(N1.getOperand(1).getNode())) - return DAG.getNode(X86ISD::PANDN, DL, VT, N1.getOperand(0), N0); + return DAG.getNode(X86ISD::ANDNP, DL, VT, N1.getOperand(0), N0); return SDValue(); } @@ -11810,10 +11920,10 @@ static SDValue PerformOrCombine(SDNode *N, SelectionDAG &DAG, if (Subtarget->hasSSSE3()) { if (VT == MVT::v2i64) { // Canonicalize pandn to RHS - if (N0.getOpcode() == X86ISD::PANDN) + if (N0.getOpcode() == X86ISD::ANDNP) std::swap(N0, N1); // or (and (m, x), (pandn m, y)) - if (N0.getOpcode() == ISD::AND && N1.getOpcode() == X86ISD::PANDN) { + if (N0.getOpcode() == ISD::AND && N1.getOpcode() == X86ISD::ANDNP) { SDValue Mask = N1.getOperand(0); SDValue X = N1.getOperand(1); SDValue Y; @@ -11822,7 +11932,7 @@ static SDValue PerformOrCombine(SDNode *N, SelectionDAG &DAG, if (N0.getOperand(1) == Mask) Y = N0.getOperand(0); - // Check to see if the mask appeared in both the AND and PANDN and + // Check to see if the mask appeared in both the AND and ANDNP and if (!Y.getNode()) return SDValue(); @@ -12166,8 +12276,8 @@ static SDValue PerformSETCCCombine(SDNode *N, SelectionDAG &DAG) { return SDValue(); } -static SDValue PerformSINT_TO_FPCombine(SDNode *N, SelectionDAG &DAG, const X86TargetLowering *XTLI) { - DebugLoc dl = N->getDebugLoc(); +static SDValue PerformSINT_TO_FPCombine(SDNode *N, SelectionDAG &DAG, + const X86TargetLowering *XTLI) { SDValue Op0 = N->getOperand(0); // Transform (SINT_TO_FP (i64 ...)) into an x87 operation if we have // a 32-bit target where SSE doesn't support i64->FP operations. @@ -12178,7 +12288,8 @@ static SDValue PerformSINT_TO_FPCombine(SDNode *N, SelectionDAG &DAG, const X86T ISD::isNON_EXTLoad(Op0.getNode()) && Op0.hasOneUse() && !XTLI->getSubtarget()->is64Bit() && !DAG.getTargetLoweringInfo().isTypeLegal(VT)) { - SDValue FILDChain = XTLI->BuildFILD(SDValue(N, 0), Ld->getValueType(0), Ld->getChain(), Op0, DAG); + SDValue FILDChain = XTLI->BuildFILD(SDValue(N, 0), Ld->getValueType(0), + Ld->getChain(), Op0, DAG); DAG.ReplaceAllUsesOfValueWith(Op0.getValue(1), FILDChain.getValue(1)); return FILDChain; } @@ -12549,6 +12660,7 @@ X86TargetLowering::getConstraintType(const std::string &Constraint) const { case 'y': case 'x': case 'Y': + case 'l': return C_RegisterClass; case 'a': case 'b': @@ -12832,60 +12944,6 @@ void X86TargetLowering::LowerAsmOperandForConstraint(SDValue Op, return TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG); } -std::vector<unsigned> X86TargetLowering:: -getRegClassForInlineAsmConstraint(const std::string &Constraint, - EVT VT) const { - if (Constraint.size() == 1) { - // FIXME: not handling fp-stack yet! - switch (Constraint[0]) { // GCC X86 Constraint Letters - default: break; // Unknown constraint letter - case 'q': // GENERAL_REGS in 64-bit mode, Q_REGS in 32-bit mode. - if (Subtarget->is64Bit()) { - if (VT == MVT::i32) - return make_vector<unsigned>(X86::EAX, X86::EDX, X86::ECX, X86::EBX, - X86::ESI, X86::EDI, X86::R8D, X86::R9D, - X86::R10D,X86::R11D,X86::R12D, - X86::R13D,X86::R14D,X86::R15D, - X86::EBP, X86::ESP, 0); - else if (VT == MVT::i16) - return make_vector<unsigned>(X86::AX, X86::DX, X86::CX, X86::BX, - X86::SI, X86::DI, X86::R8W,X86::R9W, - X86::R10W,X86::R11W,X86::R12W, - X86::R13W,X86::R14W,X86::R15W, - X86::BP, X86::SP, 0); - else if (VT == MVT::i8) - return make_vector<unsigned>(X86::AL, X86::DL, X86::CL, X86::BL, - X86::SIL, X86::DIL, X86::R8B,X86::R9B, - X86::R10B,X86::R11B,X86::R12B, - X86::R13B,X86::R14B,X86::R15B, - X86::BPL, X86::SPL, 0); - - else if (VT == MVT::i64) - return make_vector<unsigned>(X86::RAX, X86::RDX, X86::RCX, X86::RBX, - X86::RSI, X86::RDI, X86::R8, X86::R9, - X86::R10, X86::R11, X86::R12, - X86::R13, X86::R14, X86::R15, - X86::RBP, X86::RSP, 0); - - break; - } - // 32-bit fallthrough - case 'Q': // Q_REGS - if (VT == MVT::i32) - return make_vector<unsigned>(X86::EAX, X86::EDX, X86::ECX, X86::EBX, 0); - else if (VT == MVT::i16) - return make_vector<unsigned>(X86::AX, X86::DX, X86::CX, X86::BX, 0); - else if (VT == MVT::i8) - return make_vector<unsigned>(X86::AL, X86::DL, X86::CL, X86::BL, 0); - else if (VT == MVT::i64) - return make_vector<unsigned>(X86::RAX, X86::RDX, X86::RCX, X86::RBX, 0); - break; - } - } - - return std::vector<unsigned>(); -} - std::pair<unsigned, const TargetRegisterClass*> X86TargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint, EVT VT) const { @@ -12895,9 +12953,35 @@ X86TargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint, // GCC Constraint Letters switch (Constraint[0]) { default: break; + // TODO: Slight differences here in allocation order and leaving + // RIP in the class. Do they matter any more here than they do + // in the normal allocation? + case 'q': // GENERAL_REGS in 64-bit mode, Q_REGS in 32-bit mode. + if (Subtarget->is64Bit()) { + if (VT == MVT::i32 || VT == MVT::f32) + return std::make_pair(0U, X86::GR32RegisterClass); + else if (VT == MVT::i16) + return std::make_pair(0U, X86::GR16RegisterClass); + else if (VT == MVT::i8 || VT == MVT::i1) + return std::make_pair(0U, X86::GR8RegisterClass); + else if (VT == MVT::i64 || VT == MVT::f64) + return std::make_pair(0U, X86::GR64RegisterClass); + break; + } + // 32-bit fallthrough + case 'Q': // Q_REGS + if (VT == MVT::i32 || VT == MVT::f32) + return std::make_pair(0U, X86::GR32_ABCDRegisterClass); + else if (VT == MVT::i16) + return std::make_pair(0U, X86::GR16_ABCDRegisterClass); + else if (VT == MVT::i8 || VT == MVT::i1) + return std::make_pair(0U, X86::GR8_ABCD_LRegisterClass); + else if (VT == MVT::i64) + return std::make_pair(0U, X86::GR64_ABCDRegisterClass); + break; case 'r': // GENERAL_REGS case 'l': // INDEX_REGS - if (VT == MVT::i8) + if (VT == MVT::i8 || VT == MVT::i1) return std::make_pair(0U, X86::GR8RegisterClass); if (VT == MVT::i16) return std::make_pair(0U, X86::GR16RegisterClass); @@ -12905,7 +12989,7 @@ X86TargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint, return std::make_pair(0U, X86::GR32RegisterClass); return std::make_pair(0U, X86::GR64RegisterClass); case 'R': // LEGACY_REGS - if (VT == MVT::i8) + if (VT == MVT::i8 || VT == MVT::i1) return std::make_pair(0U, X86::GR8_NOREXRegisterClass); if (VT == MVT::i16) return std::make_pair(0U, X86::GR16_NOREXRegisterClass); diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h index d61a1252304a..b6036782b865 100644 --- a/lib/Target/X86/X86ISelLowering.h +++ b/lib/Target/X86/X86ISelLowering.h @@ -169,8 +169,8 @@ namespace llvm { /// PSHUFB - Shuffle 16 8-bit values within a vector. PSHUFB, - /// PANDN - and with not'd value. - PANDN, + /// ANDNP - Bitwise Logical AND NOT of Packed FP values. + ANDNP, /// PSIGNB/W/D - Copy integer sign. PSIGNB, PSIGNW, PSIGND, @@ -466,6 +466,12 @@ namespace llvm { /// fit into displacement field of the instruction. bool isOffsetSuitableForCodeModel(int64_t Offset, CodeModel::Model M, bool hasSymbolicDisplacement = true); + + + /// isCalleePop - Determines whether the callee is required to pop its + /// own arguments. Callee pop is necessary to support tail calls. + bool isCalleePop(CallingConv::ID CallingConv, + bool is64Bit, bool IsVarArg, bool TailCallOpt); } //===--------------------------------------------------------------------===// @@ -590,10 +596,6 @@ namespace llvm { virtual ConstraintWeight getSingleConstraintMatchWeight( AsmOperandInfo &info, const char *constraint) const; - std::vector<unsigned> - getRegClassForInlineAsmConstraint(const std::string &Constraint, - EVT VT) const; - virtual const char *LowerXConstraint(EVT ConstraintVT) const; /// LowerAsmOperandForConstraint - Lower the specified operand into the Ops @@ -823,6 +825,7 @@ namespace llvm { SDValue LowerLOAD_SUB(SDValue Op, SelectionDAG &DAG) const; SDValue LowerREADCYCLECOUNTER(SDValue Op, SelectionDAG &DAG) const; SDValue LowerMEMBARRIER(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerSIGN_EXTEND_INREG(SDValue Op, SelectionDAG &DAG) const; // Utility functions to help LowerVECTOR_SHUFFLE SDValue LowerVECTOR_SHUFFLEv8i16(SDValue Op, SelectionDAG &DAG) const; diff --git a/lib/Target/X86/X86InstrBuilder.h b/lib/Target/X86/X86InstrBuilder.h index 1ea8071053e9..0245e5c09644 100644 --- a/lib/Target/X86/X86InstrBuilder.h +++ b/lib/Target/X86/X86InstrBuilder.h @@ -150,11 +150,11 @@ addFrameReference(const MachineInstrBuilder &MIB, int FI, int Offset = 0) { MachineInstr *MI = MIB; MachineFunction &MF = *MI->getParent()->getParent(); MachineFrameInfo &MFI = *MF.getFrameInfo(); - const TargetInstrDesc &TID = MI->getDesc(); + const MCInstrDesc &MCID = MI->getDesc(); unsigned Flags = 0; - if (TID.mayLoad()) + if (MCID.mayLoad()) Flags |= MachineMemOperand::MOLoad; - if (TID.mayStore()) + if (MCID.mayStore()) Flags |= MachineMemOperand::MOStore; MachineMemOperand *MMO = MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(FI, Offset), diff --git a/lib/Target/X86/X86InstrCompiler.td b/lib/Target/X86/X86InstrCompiler.td index 33534cd82bdb..adcc747eb4b8 100644 --- a/lib/Target/X86/X86InstrCompiler.td +++ b/lib/Target/X86/X86InstrCompiler.td @@ -1368,6 +1368,11 @@ def : Pat<(store (i8 (trunc_su (srl_su GR16:$src, (i8 8)))), addr:$dst), // (shl x, 1) ==> (add x, x) +// Note that if x is undef (immediate or otherwise), we could theoretically +// end up with the two uses of x getting different values, producing a result +// where the least significant bit is not 0. However, the probability of this +// happening is considered low enough that this is officially not a +// "real problem". def : Pat<(shl GR8 :$src1, (i8 1)), (ADD8rr GR8 :$src1, GR8 :$src1)>; def : Pat<(shl GR16:$src1, (i8 1)), (ADD16rr GR16:$src1, GR16:$src1)>; def : Pat<(shl GR32:$src1, (i8 1)), (ADD32rr GR32:$src1, GR32:$src1)>; diff --git a/lib/Target/X86/X86InstrFPStack.td b/lib/Target/X86/X86InstrFPStack.td index b506f5e0b81a..7cb870fabd62 100644 --- a/lib/Target/X86/X86InstrFPStack.td +++ b/lib/Target/X86/X86InstrFPStack.td @@ -112,31 +112,8 @@ let usesCustomInserter = 1 in { // Expanded after instruction selection. // a pattern) and the FPI instruction should have emission info (e.g. opcode // encoding and asm printing info). -// Pseudo Instructions for FP stack return values. -def FpGET_ST0_32 : FpI_<(outs RFP32:$dst), (ins), SpecialFP, []>; // FPR = ST(0) -def FpGET_ST0_64 : FpI_<(outs RFP64:$dst), (ins), SpecialFP, []>; // FPR = ST(0) -def FpGET_ST0_80 : FpI_<(outs RFP80:$dst), (ins), SpecialFP, []>; // FPR = ST(0) - -// FpGET_ST1* should only be issued *after* an FpGET_ST0* has been issued when -// there are two values live out on the stack from a call or inlineasm. This -// magic is handled by the stackifier. It is not valid to emit FpGET_ST1* and -// then FpGET_ST0*. In addition, it is invalid for any FP-using operations to -// occur between them. -def FpGET_ST1_32 : FpI_<(outs RFP32:$dst), (ins), SpecialFP, []>; // FPR = ST(1) -def FpGET_ST1_64 : FpI_<(outs RFP64:$dst), (ins), SpecialFP, []>; // FPR = ST(1) -def FpGET_ST1_80 : FpI_<(outs RFP80:$dst), (ins), SpecialFP, []>; // FPR = ST(1) - -let Defs = [ST0] in { -def FpSET_ST0_32 : FpI_<(outs), (ins RFP32:$src), SpecialFP, []>; // ST(0) = FPR -def FpSET_ST0_64 : FpI_<(outs), (ins RFP64:$src), SpecialFP, []>; // ST(0) = FPR -def FpSET_ST0_80 : FpI_<(outs), (ins RFP80:$src), SpecialFP, []>; // ST(0) = FPR -} - -let Defs = [ST1] in { -def FpSET_ST1_32 : FpI_<(outs), (ins RFP32:$src), SpecialFP, []>; // ST(1) = FPR -def FpSET_ST1_64 : FpI_<(outs), (ins RFP64:$src), SpecialFP, []>; // ST(1) = FPR -def FpSET_ST1_80 : FpI_<(outs), (ins RFP80:$src), SpecialFP, []>; // ST(1) = FPR -} +// Pseudo Instruction for FP stack return values. +def FpPOP_RETVAL : FpI_<(outs RFP80:$dst), (ins), SpecialFP, []>; // FpIf32, FpIf64 - Floating Point Pseudo Instruction template. // f32 instructions can use SSE1 and are predicated on FPStackf32 == !SSE1. @@ -147,19 +124,6 @@ class FpIf32<dag outs, dag ins, FPFormat fp, list<dag> pattern> : class FpIf64<dag outs, dag ins, FPFormat fp, list<dag> pattern> : FpI_<outs, ins, fp, pattern>, Requires<[FPStackf64]>; -// Register copies. Just copies, the shortening ones do not truncate. -let neverHasSideEffects = 1 in { - def MOV_Fp3232 : FpIf32<(outs RFP32:$dst), (ins RFP32:$src), SpecialFP, []>; - def MOV_Fp3264 : FpIf32<(outs RFP64:$dst), (ins RFP32:$src), SpecialFP, []>; - def MOV_Fp6432 : FpIf32<(outs RFP32:$dst), (ins RFP64:$src), SpecialFP, []>; - def MOV_Fp6464 : FpIf64<(outs RFP64:$dst), (ins RFP64:$src), SpecialFP, []>; - def MOV_Fp8032 : FpIf32<(outs RFP32:$dst), (ins RFP80:$src), SpecialFP, []>; - def MOV_Fp3280 : FpIf32<(outs RFP80:$dst), (ins RFP32:$src), SpecialFP, []>; - def MOV_Fp8064 : FpIf64<(outs RFP64:$dst), (ins RFP80:$src), SpecialFP, []>; - def MOV_Fp6480 : FpIf64<(outs RFP80:$dst), (ins RFP64:$src), SpecialFP, []>; - def MOV_Fp8080 : FpI_ <(outs RFP80:$dst), (ins RFP80:$src), SpecialFP, []>; -} - // Factoring for arithmetic. multiclass FPBinary_rr<SDNode OpNode> { // Register op register -> register diff --git a/lib/Target/X86/X86InstrFormats.td b/lib/Target/X86/X86InstrFormats.td index 7daa26492274..6d89bcc29e7b 100644 --- a/lib/Target/X86/X86InstrFormats.td +++ b/lib/Target/X86/X86InstrFormats.td @@ -460,6 +460,11 @@ class AESAI<bits<8> o, Format F, dag outs, dag ins, string asm, class CLMULIi8<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag>pattern> : Ii8<o, F, outs, ins, asm, pattern, SSEPackedInt>, TA, + OpSize, Requires<[HasCLMUL]>; + +class AVXCLMULIi8<bits<8> o, Format F, dag outs, dag ins, string asm, + list<dag>pattern> + : Ii8<o, F, outs, ins, asm, pattern, SSEPackedInt>, TA, OpSize, VEX_4V, Requires<[HasAVX, HasCLMUL]>; // FMA3 Instruction Templates diff --git a/lib/Target/X86/X86InstrFragmentsSIMD.td b/lib/Target/X86/X86InstrFragmentsSIMD.td index 7c9a9f7e8c50..b00109c9fa4d 100644 --- a/lib/Target/X86/X86InstrFragmentsSIMD.td +++ b/lib/Target/X86/X86InstrFragmentsSIMD.td @@ -46,8 +46,8 @@ def X86cmpsd : SDNode<"X86ISD::FSETCCsd", SDTX86Cmpsd>; def X86pshufb : SDNode<"X86ISD::PSHUFB", SDTypeProfile<1, 2, [SDTCisVT<0, v16i8>, SDTCisSameAs<0,1>, SDTCisSameAs<0,2>]>>; -def X86pandn : SDNode<"X86ISD::PANDN", - SDTypeProfile<1, 2, [SDTCisVT<0, v2i64>, SDTCisSameAs<0,1>, +def X86andnp : SDNode<"X86ISD::ANDNP", + SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0,1>, SDTCisSameAs<0,2>]>>; def X86psignb : SDNode<"X86ISD::PSIGNB", SDTypeProfile<1, 2, [SDTCisVT<0, v16i8>, SDTCisSameAs<0,1>, @@ -168,11 +168,13 @@ def ssmem : Operand<v4f32> { let PrintMethod = "printf32mem"; let MIOperandInfo = (ops ptr_rc, i8imm, ptr_rc_nosp, i32imm, i8imm); let ParserMatchClass = X86MemAsmOperand; + let OperandType = "OPERAND_MEMORY"; } def sdmem : Operand<v2f64> { let PrintMethod = "printf64mem"; let MIOperandInfo = (ops ptr_rc, i8imm, ptr_rc_nosp, i32imm, i8imm); let ParserMatchClass = X86MemAsmOperand; + let OperandType = "OPERAND_MEMORY"; } //===----------------------------------------------------------------------===// @@ -301,6 +303,7 @@ def bc_v2i64 : PatFrag<(ops node:$in), (v2i64 (bitconvert node:$in))>; // 256-bit bitconvert pattern fragments def bc_v8i32 : PatFrag<(ops node:$in), (v8i32 (bitconvert node:$in))>; +def bc_v4i64 : PatFrag<(ops node:$in), (v4i64 (bitconvert node:$in))>; def vzmovl_v2i64 : PatFrag<(ops node:$src), (bitconvert (v2i64 (X86vzmovl diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp index e2016eb2d6fb..55b5835f52a7 100644 --- a/lib/Target/X86/X86InstrInfo.cpp +++ b/lib/Target/X86/X86InstrInfo.cpp @@ -13,7 +13,6 @@ #include "X86InstrInfo.h" #include "X86.h" -#include "X86GenInstrInfo.inc" #include "X86InstrBuilder.h" #include "X86MachineFunctionInfo.h" #include "X86Subtarget.h" @@ -36,6 +35,9 @@ #include "llvm/MC/MCAsmInfo.h" #include <limits> +#define GET_INSTRINFO_CTOR +#include "X86GenInstrInfo.inc" + using namespace llvm; static cl::opt<bool> @@ -52,7 +54,12 @@ ReMatPICStubLoad("remat-pic-stub-load", cl::init(false), cl::Hidden); X86InstrInfo::X86InstrInfo(X86TargetMachine &tm) - : TargetInstrInfoImpl(X86Insts, array_lengthof(X86Insts)), + : X86GenInstrInfo((tm.getSubtarget<X86Subtarget>().is64Bit() + ? X86::ADJCALLSTACKDOWN64 + : X86::ADJCALLSTACKDOWN32), + (tm.getSubtarget<X86Subtarget>().is64Bit() + ? X86::ADJCALLSTACKUP64 + : X86::ADJCALLSTACKUP32)), TM(tm), RI(tm, *this) { enum { TB_NOT_REVERSABLE = 1U << 31, @@ -293,12 +300,17 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm) { X86::MOVAPDrr, X86::MOVAPDmr, 0, 16 }, { X86::MOVAPSrr, X86::MOVAPSmr, 0, 16 }, { X86::MOVDQArr, X86::MOVDQAmr, 0, 16 }, + { X86::VMOVAPDYrr, X86::VMOVAPDYmr, 0, 32 }, + { X86::VMOVAPSYrr, X86::VMOVAPSYmr, 0, 32 }, + { X86::VMOVDQAYrr, X86::VMOVDQAYmr, 0, 32 }, { X86::MOVPDI2DIrr, X86::MOVPDI2DImr, 0, 0 }, { X86::MOVPQIto64rr,X86::MOVPQI2QImr, 0, 0 }, { X86::MOVSDto64rr, X86::MOVSDto64mr, 0, 0 }, { X86::MOVSS2DIrr, X86::MOVSS2DImr, 0, 0 }, { X86::MOVUPDrr, X86::MOVUPDmr, 0, 0 }, { X86::MOVUPSrr, X86::MOVUPSmr, 0, 0 }, + { X86::VMOVUPDYrr, X86::VMOVUPDYmr, 0, 0 }, + { X86::VMOVUPSYrr, X86::VMOVUPSYmr, 0, 0 }, { X86::MUL16r, X86::MUL16m, 1, 0 }, { X86::MUL32r, X86::MUL32m, 1, 0 }, { X86::MUL64r, X86::MUL64m, 1, 0 }, @@ -403,10 +415,13 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm) { X86::MOV8rr, X86::MOV8rm, 0 }, { X86::MOVAPDrr, X86::MOVAPDrm, 16 }, { X86::MOVAPSrr, X86::MOVAPSrm, 16 }, + { X86::VMOVAPDYrr, X86::VMOVAPDYrm, 32 }, + { X86::VMOVAPSYrr, X86::VMOVAPSYrm, 32 }, { X86::MOVDDUPrr, X86::MOVDDUPrm, 0 }, { X86::MOVDI2PDIrr, X86::MOVDI2PDIrm, 0 }, { X86::MOVDI2SSrr, X86::MOVDI2SSrm, 0 }, { X86::MOVDQArr, X86::MOVDQArm, 16 }, + { X86::VMOVDQAYrr, X86::VMOVDQAYrm, 16 }, { X86::MOVSHDUPrr, X86::MOVSHDUPrm, 16 }, { X86::MOVSLDUPrr, X86::MOVSLDUPrm, 16 }, { X86::MOVSX16rr8, X86::MOVSX16rm8, 0 }, @@ -417,6 +432,8 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm) { X86::MOVSX64rr8, X86::MOVSX64rm8, 0 }, { X86::MOVUPDrr, X86::MOVUPDrm, 16 }, { X86::MOVUPSrr, X86::MOVUPSrm, 0 }, + { X86::VMOVUPDYrr, X86::VMOVUPDYrm, 0 }, + { X86::VMOVUPSYrr, X86::VMOVUPSYrm, 0 }, { X86::MOVZDI2PDIrr, X86::MOVZDI2PDIrm, 0 }, { X86::MOVZQI2PQIrr, X86::MOVZQI2PQIrm, 0 }, { X86::MOVZPQILo2PQIrr, X86::MOVZPQILo2PQIrm, 16 }, @@ -779,6 +796,9 @@ static bool isFrameLoadOpcode(int Opcode) { case X86::MOVAPSrm: case X86::MOVAPDrm: case X86::MOVDQArm: + case X86::VMOVAPSYrm: + case X86::VMOVAPDYrm: + case X86::VMOVDQAYrm: case X86::MMX_MOVD64rm: case X86::MMX_MOVQ64rm: return true; @@ -800,6 +820,9 @@ static bool isFrameStoreOpcode(int Opcode) { case X86::MOVAPSmr: case X86::MOVAPDmr: case X86::MOVDQAmr: + case X86::VMOVAPSYmr: + case X86::VMOVAPDYmr: + case X86::VMOVDQAYmr: case X86::MMX_MOVD64mr: case X86::MMX_MOVQ64mr: case X86::MMX_MOVNTQmr: @@ -918,6 +941,10 @@ X86InstrInfo::isReallyTriviallyReMaterializable(const MachineInstr *MI, case X86::MOVUPSrm: case X86::MOVAPDrm: case X86::MOVDQArm: + case X86::VMOVAPSYrm: + case X86::VMOVUPSYrm: + case X86::VMOVAPDYrm: + case X86::VMOVDQAYrm: case X86::MMX_MOVD64rm: case X86::MMX_MOVQ64rm: case X86::FsMOVAPSrm: @@ -1689,13 +1716,13 @@ X86::CondCode X86::GetOppositeBranchCondition(X86::CondCode CC) { } bool X86InstrInfo::isUnpredicatedTerminator(const MachineInstr *MI) const { - const TargetInstrDesc &TID = MI->getDesc(); - if (!TID.isTerminator()) return false; + const MCInstrDesc &MCID = MI->getDesc(); + if (!MCID.isTerminator()) return false; // Conditional branch is a special case. - if (TID.isBranch() && !TID.isBarrier()) + if (MCID.isBranch() && !MCID.isBarrier()) return true; - if (!TID.isPredicable()) + if (!MCID.isPredicable()) return true; return !isPredicated(MI); } @@ -1789,7 +1816,6 @@ bool X86InstrInfo::AnalyzeBranch(MachineBasicBlock &MBB, .addMBB(UnCondBrIter->getOperand(0).getMBB()); BuildMI(MBB, UnCondBrIter, MBB.findDebugLoc(I), get(X86::JMP_4)) .addMBB(TargetBB); - MBB.addSuccessor(TargetBB); OldInst->eraseFromParent(); UnCondBrIter->eraseFromParent(); @@ -1968,6 +1994,8 @@ void X86InstrInfo::copyPhysReg(MachineBasicBlock &MBB, Opc = X86::MOV8rr; } else if (X86::VR128RegClass.contains(DestReg, SrcReg)) Opc = X86::MOVAPSrr; + else if (X86::VR256RegClass.contains(DestReg, SrcReg)) + Opc = X86::VMOVAPSYrr; else if (X86::VR64RegClass.contains(DestReg, SrcReg)) Opc = X86::MMX_MOVQ64rr; else @@ -2057,6 +2085,13 @@ static unsigned getLoadStoreRegOpcode(unsigned Reg, return load ? X86::MOVAPSrm : X86::MOVAPSmr; else return load ? X86::MOVUPSrm : X86::MOVUPSmr; + case 32: + assert(X86::VR256RegClass.hasSubClassEq(RC) && "Unknown 32-byte regclass"); + // If stack is realigned we can use aligned stores. + if (isStackAligned) + return load ? X86::VMOVAPSYrm : X86::VMOVAPSYmr; + else + return load ? X86::VMOVUPSYrm : X86::VMOVUPSYmr; } } @@ -2083,7 +2118,8 @@ void X86InstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, const MachineFunction &MF = *MBB.getParent(); assert(MF.getFrameInfo()->getObjectSize(FrameIdx) >= RC->getSize() && "Stack slot too small for store"); - bool isAligned = (RI.getStackAlignment() >= 16) || RI.canRealignStack(MF); + bool isAligned = (TM.getFrameLowering()->getStackAlignment() >= 16) || + RI.canRealignStack(MF); unsigned Opc = getStoreRegOpcode(SrcReg, RC, isAligned, TM); DebugLoc DL = MBB.findDebugLoc(MI); addFrameReference(BuildMI(MBB, MI, DL, get(Opc)), FrameIdx) @@ -2115,7 +2151,8 @@ void X86InstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI) const { const MachineFunction &MF = *MBB.getParent(); - bool isAligned = (RI.getStackAlignment() >= 16) || RI.canRealignStack(MF); + bool isAligned = (TM.getFrameLowering()->getStackAlignment() >= 16) || + RI.canRealignStack(MF); unsigned Opc = getLoadRegOpcode(DestReg, RC, isAligned, TM); DebugLoc DL = MBB.findDebugLoc(MI); addFrameReference(BuildMI(MBB, MI, DL, get(Opc), DestReg), FrameIdx); @@ -2224,7 +2261,7 @@ X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, bool isTwoAddrFold = false; unsigned NumOps = MI->getDesc().getNumOperands(); bool isTwoAddr = NumOps > 1 && - MI->getDesc().getOperandConstraint(1, TOI::TIED_TO) != -1; + MI->getDesc().getOperandConstraint(1, MCOI::TIED_TO) != -1; // FIXME: AsmPrinter doesn't know how to handle // X86II::MO_GOT_ABSOLUTE_ADDRESS after folding. @@ -2273,7 +2310,7 @@ X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, return NULL; bool NarrowToMOV32rm = false; if (Size) { - unsigned RCSize = MI->getDesc().OpInfo[i].getRegClass(&RI)->getSize(); + unsigned RCSize = getRegClass(MI->getDesc(), i, &RI)->getSize(); if (Size < RCSize) { // Check if it's safe to fold the load. If the size of the object is // narrower than the load width, then it's not. @@ -2542,7 +2579,7 @@ bool X86InstrInfo::canFoldMemoryOperand(const MachineInstr *MI, unsigned Opc = MI->getOpcode(); unsigned NumOps = MI->getDesc().getNumOperands(); bool isTwoAddr = NumOps > 1 && - MI->getDesc().getOperandConstraint(1, TOI::TIED_TO) != -1; + MI->getDesc().getOperandConstraint(1, MCOI::TIED_TO) != -1; // Folding a memory location into the two-address part of a two-address // instruction is different than folding it other places. It requires @@ -2588,9 +2625,8 @@ bool X86InstrInfo::unfoldMemoryOperand(MachineFunction &MF, MachineInstr *MI, return false; UnfoldStore &= FoldedStore; - const TargetInstrDesc &TID = get(Opc); - const TargetOperandInfo &TOI = TID.OpInfo[Index]; - const TargetRegisterClass *RC = TOI.getRegClass(&RI); + const MCInstrDesc &MCID = get(Opc); + const TargetRegisterClass *RC = getRegClass(MCID, Index, &RI); if (!MI->hasOneMemOperand() && RC == &X86::VR128RegClass && !TM.getSubtarget<X86Subtarget>().isUnalignedMemAccessFast()) @@ -2632,7 +2668,7 @@ bool X86InstrInfo::unfoldMemoryOperand(MachineFunction &MF, MachineInstr *MI, } // Emit the data processing instruction. - MachineInstr *DataMI = MF.CreateMachineInstr(TID, MI->getDebugLoc(), true); + MachineInstr *DataMI = MF.CreateMachineInstr(MCID, MI->getDebugLoc(), true); MachineInstrBuilder MIB(DataMI); if (FoldedStore) @@ -2685,7 +2721,7 @@ bool X86InstrInfo::unfoldMemoryOperand(MachineFunction &MF, MachineInstr *MI, // Emit the store instruction. if (UnfoldStore) { - const TargetRegisterClass *DstRC = TID.OpInfo[0].getRegClass(&RI); + const TargetRegisterClass *DstRC = getRegClass(MCID, 0, &RI); std::pair<MachineInstr::mmo_iterator, MachineInstr::mmo_iterator> MMOs = MF.extractStoreMemRefs(MI->memoperands_begin(), @@ -2710,9 +2746,9 @@ X86InstrInfo::unfoldMemoryOperand(SelectionDAG &DAG, SDNode *N, unsigned Index = I->second.second & 0xf; bool FoldedLoad = I->second.second & (1 << 4); bool FoldedStore = I->second.second & (1 << 5); - const TargetInstrDesc &TID = get(Opc); - const TargetRegisterClass *RC = TID.OpInfo[Index].getRegClass(&RI); - unsigned NumDefs = TID.NumDefs; + const MCInstrDesc &MCID = get(Opc); + const TargetRegisterClass *RC = getRegClass(MCID, Index, &RI); + unsigned NumDefs = MCID.NumDefs; std::vector<SDValue> AddrOps; std::vector<SDValue> BeforeOps; std::vector<SDValue> AfterOps; @@ -2756,13 +2792,13 @@ X86InstrInfo::unfoldMemoryOperand(SelectionDAG &DAG, SDNode *N, // Emit the data processing instruction. std::vector<EVT> VTs; const TargetRegisterClass *DstRC = 0; - if (TID.getNumDefs() > 0) { - DstRC = TID.OpInfo[0].getRegClass(&RI); + if (MCID.getNumDefs() > 0) { + DstRC = getRegClass(MCID, 0, &RI); VTs.push_back(*DstRC->vt_begin()); } for (unsigned i = 0, e = N->getNumValues(); i != e; ++i) { EVT VT = N->getValueType(i); - if (VT != MVT::Other && i >= (unsigned)TID.getNumDefs()) + if (VT != MVT::Other && i >= (unsigned)MCID.getNumDefs()) VTs.push_back(VT); } if (Load) @@ -2845,6 +2881,11 @@ X86InstrInfo::areLoadsFromSameBasePtr(SDNode *Load1, SDNode *Load2, case X86::MOVAPDrm: case X86::MOVDQArm: case X86::MOVDQUrm: + case X86::VMOVAPSYrm: + case X86::VMOVUPSYrm: + case X86::VMOVAPDYrm: + case X86::VMOVDQAYrm: + case X86::VMOVDQUYrm: break; } switch (Opc2) { @@ -2867,6 +2908,11 @@ X86InstrInfo::areLoadsFromSameBasePtr(SDNode *Load1, SDNode *Load2, case X86::MOVAPDrm: case X86::MOVDQArm: case X86::MOVDQUrm: + case X86::VMOVAPSYrm: + case X86::VMOVUPSYrm: + case X86::VMOVAPDYrm: + case X86::VMOVDQAYrm: + case X86::VMOVDQUYrm: break; } @@ -3045,6 +3091,13 @@ static const unsigned ReplaceableInstrs[][3] = { { X86::AVX_SET0PS, X86::AVX_SET0PD, X86::AVX_SET0PI }, { X86::VXORPSrm, X86::VXORPDrm, X86::VPXORrm }, { X86::VXORPSrr, X86::VXORPDrr, X86::VPXORrr }, + // AVX 256-bit support + { X86::VMOVAPSYmr, X86::VMOVAPDYmr, X86::VMOVDQAYmr }, + { X86::VMOVAPSYrm, X86::VMOVAPDYrm, X86::VMOVDQAYrm }, + { X86::VMOVAPSYrr, X86::VMOVAPDYrr, X86::VMOVDQAYrr }, + { X86::VMOVUPSYmr, X86::VMOVUPDYmr, X86::VMOVDQUYmr }, + { X86::VMOVUPSYrm, X86::VMOVUPDYrm, X86::VMOVDQUYrm }, + { X86::VMOVNTPSYmr, X86::VMOVNTPDYmr, X86::VMOVNTDQYmr }, }; // FIXME: Some shuffle and unpack instructions have equivalents in different diff --git a/lib/Target/X86/X86InstrInfo.h b/lib/Target/X86/X86InstrInfo.h index d8950230d83d..5f2eba34ac45 100644 --- a/lib/Target/X86/X86InstrInfo.h +++ b/lib/Target/X86/X86InstrInfo.h @@ -19,6 +19,9 @@ #include "X86RegisterInfo.h" #include "llvm/ADT/DenseMap.h" +#define GET_INSTRINFO_HEADER +#include "X86GenInstrInfo.inc" + namespace llvm { class X86RegisterInfo; class X86TargetMachine; @@ -611,7 +614,7 @@ inline static bool isMem(const MachineInstr *MI, unsigned Op) { isLeaMem(MI, Op); } -class X86InstrInfo : public TargetInstrInfoImpl { +class X86InstrInfo : public X86GenInstrInfo { X86TargetMachine &TM; const X86RegisterInfo RI; diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td index 8cab80824688..7eb07b0a97bd 100644 --- a/lib/Target/X86/X86InstrInfo.td +++ b/lib/Target/X86/X86InstrInfo.td @@ -251,6 +251,7 @@ class X86MemOperand<string printMethod> : Operand<iPTR> { let ParserMatchClass = X86MemAsmOperand; } +let OperandType = "OPERAND_MEMORY" in { def opaque32mem : X86MemOperand<"printopaquemem">; def opaque48mem : X86MemOperand<"printopaquemem">; def opaque80mem : X86MemOperand<"printopaquemem">; @@ -267,6 +268,7 @@ def f64mem : X86MemOperand<"printf64mem">; def f80mem : X86MemOperand<"printf80mem">; def f128mem : X86MemOperand<"printf128mem">; def f256mem : X86MemOperand<"printf256mem">; +} // A version of i8mem for use on x86-64 that uses GR64_NOREX instead of // plain GR64, so that it doesn't potentially require a REX prefix. @@ -274,6 +276,7 @@ def i8mem_NOREX : Operand<i64> { let PrintMethod = "printi8mem"; let MIOperandInfo = (ops GR64_NOREX, i8imm, GR64_NOREX_NOSP, i32imm, i8imm); let ParserMatchClass = X86MemAsmOperand; + let OperandType = "OPERAND_MEMORY"; } // GPRs available for tailcall. @@ -287,6 +290,7 @@ def i32mem_TC : Operand<i32> { let PrintMethod = "printi32mem"; let MIOperandInfo = (ops GR32_TC, i8imm, GR32_TC, i32imm, i8imm); let ParserMatchClass = X86MemAsmOperand; + let OperandType = "OPERAND_MEMORY"; } // Special i64mem for addresses of load folding tail calls. These are not @@ -297,9 +301,11 @@ def i64mem_TC : Operand<i64> { let MIOperandInfo = (ops ptr_rc_tailcall, i8imm, ptr_rc_tailcall, i32imm, i8imm); let ParserMatchClass = X86MemAsmOperand; + let OperandType = "OPERAND_MEMORY"; } -let ParserMatchClass = X86AbsMemAsmOperand, +let OperandType = "OPERAND_PCREL", + ParserMatchClass = X86AbsMemAsmOperand, PrintMethod = "print_pcrel_imm" in { def i32imm_pcrel : Operand<i32>; def i16imm_pcrel : Operand<i16>; @@ -317,6 +323,7 @@ def brtarget8 : Operand<OtherVT>; def SSECC : Operand<i8> { let PrintMethod = "printSSECC"; + let OperandType = "OPERAND_IMMEDIATE"; } class ImmSExtAsmOperandClass : AsmOperandClass { @@ -363,15 +370,18 @@ def ImmSExti64i8AsmOperand : ImmSExtAsmOperandClass { // 16-bits but only 8 bits are significant. def i16i8imm : Operand<i16> { let ParserMatchClass = ImmSExti16i8AsmOperand; + let OperandType = "OPERAND_IMMEDIATE"; } // 32-bits but only 8 bits are significant. def i32i8imm : Operand<i32> { let ParserMatchClass = ImmSExti32i8AsmOperand; + let OperandType = "OPERAND_IMMEDIATE"; } // 64-bits but only 32 bits are significant. def i64i32imm : Operand<i64> { let ParserMatchClass = ImmSExti64i32AsmOperand; + let OperandType = "OPERAND_IMMEDIATE"; } // 64-bits but only 32 bits are significant, and those bits are treated as being @@ -438,8 +448,10 @@ def HasFMA3 : Predicate<"Subtarget->hasFMA3()">; def HasFMA4 : Predicate<"Subtarget->hasFMA4()">; def FPStackf32 : Predicate<"!Subtarget->hasXMM()">; def FPStackf64 : Predicate<"!Subtarget->hasXMMInt()">; -def In32BitMode : Predicate<"!Subtarget->is64Bit()">, AssemblerPredicate; -def In64BitMode : Predicate<"Subtarget->is64Bit()">, AssemblerPredicate; +def In32BitMode : Predicate<"!Subtarget->is64Bit()">, + AssemblerPredicate<"!Mode64Bit">; +def In64BitMode : Predicate<"Subtarget->is64Bit()">, + AssemblerPredicate<"Mode64Bit">; def IsWin64 : Predicate<"Subtarget->isTargetWin64()">; def NotWin64 : Predicate<"!Subtarget->isTargetWin64()">; def SmallCode : Predicate<"TM.getCodeModel() == CodeModel::Small">; @@ -669,7 +681,7 @@ def PUSH64rmm: I<0xFF, MRM6m, (outs), (ins i64mem:$src), "push{q}\t$src", []>; } let Defs = [RSP], Uses = [RSP], neverHasSideEffects = 1, mayStore = 1 in { -def PUSH64i8 : Ii8<0x6a, RawFrm, (outs), (ins i8imm:$imm), +def PUSH64i8 : Ii8<0x6a, RawFrm, (outs), (ins i64i8imm:$imm), "push{q}\t$imm", []>; def PUSH64i16 : Ii16<0x68, RawFrm, (outs), (ins i16imm:$imm), "push{q}\t$imm", []>; diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index b64c03a9b597..fe11d776804c 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -512,6 +512,26 @@ defm VCVTSI2SDL : sse12_vcvt_avx<0x2A, GR32, FR64, i32mem, "cvtsi2sd{l}">, XD, defm VCVTSI2SD64 : sse12_vcvt_avx<0x2A, GR64, FR64, i64mem, "cvtsi2sd{q}">, XD, VEX_4V, VEX_W; +let Predicates = [HasAVX] in { + def : Pat<(f32 (sint_to_fp (loadi32 addr:$src))), + (VCVTSI2SSrm (f32 (IMPLICIT_DEF)), addr:$src)>; + def : Pat<(f32 (sint_to_fp (loadi64 addr:$src))), + (VCVTSI2SS64rm (f32 (IMPLICIT_DEF)), addr:$src)>; + def : Pat<(f64 (sint_to_fp (loadi32 addr:$src))), + (VCVTSI2SDrm (f64 (IMPLICIT_DEF)), addr:$src)>; + def : Pat<(f64 (sint_to_fp (loadi64 addr:$src))), + (VCVTSI2SD64rm (f64 (IMPLICIT_DEF)), addr:$src)>; + + def : Pat<(f32 (sint_to_fp GR32:$src)), + (VCVTSI2SSrr (f32 (IMPLICIT_DEF)), GR32:$src)>; + def : Pat<(f32 (sint_to_fp GR64:$src)), + (VCVTSI2SS64rr (f32 (IMPLICIT_DEF)), GR64:$src)>; + def : Pat<(f64 (sint_to_fp GR32:$src)), + (VCVTSI2SDrr (f64 (IMPLICIT_DEF)), GR32:$src)>; + def : Pat<(f64 (sint_to_fp GR64:$src)), + (VCVTSI2SD64rr (f64 (IMPLICIT_DEF)), GR64:$src)>; +} + defm CVTTSS2SI : sse12_cvt_s<0x2C, FR32, GR32, fp_to_sint, f32mem, loadf32, "cvttss2si\t{$src, $dst|$dst, $src}">, XS; defm CVTTSS2SI64 : sse12_cvt_s<0x2C, FR32, GR64, fp_to_sint, f32mem, loadf32, @@ -1473,83 +1493,68 @@ let neverHasSideEffects = 1, Pattern = []<dag>, isCommutable = 0 in /// sse12_fp_packed_logical - SSE 1 & 2 packed FP logical ops /// multiclass sse12_fp_packed_logical<bits<8> opc, string OpcodeStr, - SDNode OpNode, int HasPat = 0, - list<list<dag>> Pattern = []> { + SDNode OpNode> { let Pattern = []<dag> in { defm V#NAME#PS : sse12_fp_packed_logical_rm<opc, VR128, SSEPackedSingle, !strconcat(OpcodeStr, "ps"), f128mem, - !if(HasPat, Pattern[0], // rr - [(set VR128:$dst, (v2i64 (OpNode VR128:$src1, - VR128:$src2)))]), - !if(HasPat, Pattern[2], // rm - [(set VR128:$dst, (OpNode (bc_v2i64 (v4f32 VR128:$src1)), - (memopv2i64 addr:$src2)))]), 0>, - VEX_4V; + [(set VR128:$dst, (v2i64 (OpNode VR128:$src1, VR128:$src2)))], + [(set VR128:$dst, (OpNode (bc_v2i64 (v4f32 VR128:$src1)), + (memopv2i64 addr:$src2)))], 0>, VEX_4V; defm V#NAME#PD : sse12_fp_packed_logical_rm<opc, VR128, SSEPackedDouble, !strconcat(OpcodeStr, "pd"), f128mem, - !if(HasPat, Pattern[1], // rr - [(set VR128:$dst, (OpNode (bc_v2i64 (v2f64 VR128:$src1)), - (bc_v2i64 (v2f64 - VR128:$src2))))]), - !if(HasPat, Pattern[3], // rm - [(set VR128:$dst, (OpNode (bc_v2i64 (v2f64 VR128:$src1)), - (memopv2i64 addr:$src2)))]), 0>, - OpSize, VEX_4V; + [(set VR128:$dst, (OpNode (bc_v2i64 (v2f64 VR128:$src1)), + (bc_v2i64 (v2f64 VR128:$src2))))], + [(set VR128:$dst, (OpNode (bc_v2i64 (v2f64 VR128:$src1)), + (memopv2i64 addr:$src2)))], 0>, + OpSize, VEX_4V; } let Constraints = "$src1 = $dst" in { defm PS : sse12_fp_packed_logical_rm<opc, VR128, SSEPackedSingle, !strconcat(OpcodeStr, "ps"), f128mem, - !if(HasPat, Pattern[0], // rr - [(set VR128:$dst, (v2i64 (OpNode VR128:$src1, - VR128:$src2)))]), - !if(HasPat, Pattern[2], // rm - [(set VR128:$dst, (OpNode (bc_v2i64 (v4f32 VR128:$src1)), - (memopv2i64 addr:$src2)))])>, TB; + [(set VR128:$dst, (v2i64 (OpNode VR128:$src1, VR128:$src2)))], + [(set VR128:$dst, (OpNode (bc_v2i64 (v4f32 VR128:$src1)), + (memopv2i64 addr:$src2)))]>, TB; defm PD : sse12_fp_packed_logical_rm<opc, VR128, SSEPackedDouble, !strconcat(OpcodeStr, "pd"), f128mem, - !if(HasPat, Pattern[1], // rr - [(set VR128:$dst, (OpNode (bc_v2i64 (v2f64 VR128:$src1)), - (bc_v2i64 (v2f64 - VR128:$src2))))]), - !if(HasPat, Pattern[3], // rm - [(set VR128:$dst, (OpNode (bc_v2i64 (v2f64 VR128:$src1)), - (memopv2i64 addr:$src2)))])>, - TB, OpSize; + [(set VR128:$dst, (OpNode (bc_v2i64 (v2f64 VR128:$src1)), + (bc_v2i64 (v2f64 VR128:$src2))))], + [(set VR128:$dst, (OpNode (bc_v2i64 (v2f64 VR128:$src1)), + (memopv2i64 addr:$src2)))]>, TB, OpSize; } } /// sse12_fp_packed_logical_y - AVX 256-bit SSE 1 & 2 logical ops forms /// -multiclass sse12_fp_packed_logical_y<bits<8> opc, string OpcodeStr> { +multiclass sse12_fp_packed_logical_y<bits<8> opc, string OpcodeStr, + SDNode OpNode> { defm PSY : sse12_fp_packed_logical_rm<opc, VR256, SSEPackedSingle, - !strconcat(OpcodeStr, "ps"), f256mem, [], [], 0>, VEX_4V; + !strconcat(OpcodeStr, "ps"), f256mem, + [(set VR256:$dst, (v4i64 (OpNode VR256:$src1, VR256:$src2)))], + [(set VR256:$dst, (OpNode (bc_v4i64 (v8f32 VR256:$src1)), + (memopv4i64 addr:$src2)))], 0>, VEX_4V; defm PDY : sse12_fp_packed_logical_rm<opc, VR256, SSEPackedDouble, - !strconcat(OpcodeStr, "pd"), f256mem, [], [], 0>, OpSize, VEX_4V; + !strconcat(OpcodeStr, "pd"), f256mem, + [(set VR256:$dst, (OpNode (bc_v4i64 (v4f64 VR256:$src1)), + (bc_v4i64 (v4f64 VR256:$src2))))], + [(set VR256:$dst, (OpNode (bc_v4i64 (v4f64 VR256:$src1)), + (memopv4i64 addr:$src2)))], 0>, + OpSize, VEX_4V; } // AVX 256-bit packed logical ops forms -defm VAND : sse12_fp_packed_logical_y<0x54, "and">; -defm VOR : sse12_fp_packed_logical_y<0x56, "or">; -defm VXOR : sse12_fp_packed_logical_y<0x57, "xor">; -let isCommutable = 0 in - defm VANDN : sse12_fp_packed_logical_y<0x55, "andn">; +defm VAND : sse12_fp_packed_logical_y<0x54, "and", and>; +defm VOR : sse12_fp_packed_logical_y<0x56, "or", or>; +defm VXOR : sse12_fp_packed_logical_y<0x57, "xor", xor>; +defm VANDN : sse12_fp_packed_logical_y<0x55, "andn", X86andnp>; defm AND : sse12_fp_packed_logical<0x54, "and", and>; defm OR : sse12_fp_packed_logical<0x56, "or", or>; defm XOR : sse12_fp_packed_logical<0x57, "xor", xor>; let isCommutable = 0 in - defm ANDN : sse12_fp_packed_logical<0x55, "andn", undef /* dummy */, 1, [ - // single r+r - [(set VR128:$dst, (X86pandn VR128:$src1, VR128:$src2))], - // double r+r - [], - // single r+m - [(set VR128:$dst, (X86pandn VR128:$src1, (memopv2i64 addr:$src2)))], - // double r+m - []]>; + defm ANDN : sse12_fp_packed_logical<0x55, "andn", X86andnp>; //===----------------------------------------------------------------------===// // SSE 1 & 2 - Arithmetic Instructions @@ -1991,11 +1996,11 @@ def : Pat<(alignednontemporalstore (v2i64 VR128:$src), addr:$dst), // There is no AVX form for instructions below this point def MOVNTImr : I<0xC3, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src), - "movnti\t{$src, $dst|$dst, $src}", + "movnti{l}\t{$src, $dst|$dst, $src}", [(nontemporalstore (i32 GR32:$src), addr:$dst)]>, TB, Requires<[HasSSE2]>; def MOVNTI_64mr : RI<0xC3, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src), - "movnti\t{$src, $dst|$dst, $src}", + "movnti{q}\t{$src, $dst|$dst, $src}", [(nontemporalstore (i64 GR64:$src), addr:$dst)]>, TB, Requires<[HasSSE2]>; } @@ -2006,13 +2011,13 @@ def MOVNTI_64mr : RI<0xC3, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src), // Prefetch intrinsic. def PREFETCHT0 : PSI<0x18, MRM1m, (outs), (ins i8mem:$src), - "prefetcht0\t$src", [(prefetch addr:$src, imm, (i32 3))]>; + "prefetcht0\t$src", [(prefetch addr:$src, imm, (i32 3), (i32 1))]>; def PREFETCHT1 : PSI<0x18, MRM2m, (outs), (ins i8mem:$src), - "prefetcht1\t$src", [(prefetch addr:$src, imm, (i32 2))]>; + "prefetcht1\t$src", [(prefetch addr:$src, imm, (i32 2), (i32 1))]>; def PREFETCHT2 : PSI<0x18, MRM3m, (outs), (ins i8mem:$src), - "prefetcht2\t$src", [(prefetch addr:$src, imm, (i32 1))]>; + "prefetcht2\t$src", [(prefetch addr:$src, imm, (i32 1), (i32 1))]>; def PREFETCHNTA : PSI<0x18, MRM0m, (outs), (ins i8mem:$src), - "prefetchnta\t$src", [(prefetch addr:$src, imm, (i32 0))]>; + "prefetchnta\t$src", [(prefetch addr:$src, imm, (i32 0), (i32 1))]>; // Load, store, and memory fence def SFENCE : I<0xAE, MRM_F8, (outs), (ins), "sfence", [(int_x86_sse_sfence)]>, @@ -2037,7 +2042,10 @@ def V_SET0PI : PDI<0xEF, MRMInitReg, (outs VR128:$dst), (ins), "", } // The same as done above but for AVX. The 128-bit versions are the -// same, but re-encoded. The 256-bit does not support PI version. +// same, but re-encoded. The 256-bit does not support PI version, and +// doesn't need it because on sandy bridge the register is set to zero +// at the rename stage without using any execution unit, so SET0PSY +// and SET0PDY can be used for vector int instructions without penalty // FIXME: Change encoding to pseudo! This is blocked right now by the x86 // JIT implementatioan, it does not expand the instructions below like // X86MCInstLower does. @@ -2052,8 +2060,8 @@ def AVX_SET0PSY : PSI<0x57, MRMInitReg, (outs VR256:$dst), (ins), "", def AVX_SET0PDY : PDI<0x57, MRMInitReg, (outs VR256:$dst), (ins), "", [(set VR256:$dst, (v4f64 immAllZerosV))]>, VEX_4V; let ExeDomain = SSEPackedInt in -def AVX_SET0PI : PDI<0xEF, MRMInitReg, (outs VR128:$dst), (ins), "", - [(set VR128:$dst, (v4i32 immAllZerosV))]>; +def AVX_SET0PI : PDI<0xEF, MRMInitReg, (outs VR128:$dst), (ins), "", + [(set VR128:$dst, (v4i32 immAllZerosV))]>; } def : Pat<(v2i64 immAllZerosV), (V_SET0PI)>; @@ -2063,6 +2071,15 @@ def : Pat<(v16i8 immAllZerosV), (V_SET0PI)>; def : Pat<(f32 (vector_extract (v4f32 VR128:$src), (iPTR 0))), (f32 (EXTRACT_SUBREG (v4f32 VR128:$src), sub_ss))>; +// FIXME: According to the intel manual, DEST[127:64] <- SRC1[127:64], while +// in the non-AVX version bits 127:64 aren't touched. Find a better way to +// represent this instead of always zeroing SRC1. One possible solution is +// to represent the instruction w/ something similar as the "$src1 = $dst" +// constraint but without the tied operands. +def : Pat<(extloadf32 addr:$src), + (VCVTSS2SDrm (f32 (EXTRACT_SUBREG (AVX_SET0PS), sub_ss)), addr:$src)>, + Requires<[HasAVX, OptForSpeed]>; + //===----------------------------------------------------------------------===// // SSE 1 & 2 - Load/Store XCSR register //===----------------------------------------------------------------------===// @@ -2959,6 +2976,22 @@ def : Pat<(v4i32 (X86vzmovl (bc_v4i32 (loadv2i64 addr:$src)))), (MOVZDI2PDIrm addr:$src)>; } +// These are the correct encodings of the instructions so that we know how to +// read correct assembly, even though we continue to emit the wrong ones for +// compatibility with Darwin's buggy assembler. +def : InstAlias<"movq\t{$src, $dst|$dst, $src}", + (MOV64toPQIrr VR128:$dst, GR64:$src), 0>; +def : InstAlias<"movq\t{$src, $dst|$dst, $src}", + (MOV64toSDrr FR64:$dst, GR64:$src), 0>; +def : InstAlias<"movq\t{$src, $dst|$dst, $src}", + (MOVPQIto64rr GR64:$dst, VR128:$src), 0>; +def : InstAlias<"movq\t{$src, $dst|$dst, $src}", + (MOVSDto64rr GR64:$dst, FR64:$src), 0>; +def : InstAlias<"movq\t{$src, $dst|$dst, $src}", + (VMOVZQI2PQIrr VR128:$dst, GR64:$src), 0>; +def : InstAlias<"movq\t{$src, $dst|$dst, $src}", + (MOVZQI2PQIrr VR128:$dst, GR64:$src), 0>; + //===---------------------------------------------------------------------===// // SSE2 - Move Quadword //===---------------------------------------------------------------------===// @@ -3589,6 +3622,16 @@ let Predicates = [HasSSE2] in def : Pat<(fextend (loadf32 addr:$src)), (CVTSS2SDrm addr:$src)>; +// FIXME: According to the intel manual, DEST[127:64] <- SRC1[127:64], while +// in the non-AVX version bits 127:64 aren't touched. Find a better way to +// represent this instead of always zeroing SRC1. One possible solution is +// to represent the instruction w/ something similar as the "$src1 = $dst" +// constraint but without the tied operands. +let Predicates = [HasAVX] in + def : Pat<(fextend (loadf32 addr:$src)), + (VCVTSS2SDrm (f32 (EXTRACT_SUBREG (AVX_SET0PS), sub_ss)), + addr:$src)>; + // bit_convert let Predicates = [HasXMMInt] in { def : Pat<(v2i64 (bitconvert (v4i32 VR128:$src))), (v2i64 VR128:$src)>; @@ -3625,6 +3668,19 @@ let Predicates = [HasXMMInt] in { let Predicates = [HasAVX] in { def : Pat<(v4f64 (bitconvert (v8f32 VR256:$src))), (v4f64 VR256:$src)>; + def : Pat<(v4f64 (bitconvert (v4i64 VR256:$src))), (v4f64 VR256:$src)>; + def : Pat<(v4f64 (bitconvert (v32i8 VR256:$src))), (v4f64 VR256:$src)>; + def : Pat<(v8f32 (bitconvert (v4i64 VR256:$src))), (v8f32 VR256:$src)>; + def : Pat<(v8f32 (bitconvert (v4f64 VR256:$src))), (v8f32 VR256:$src)>; + def : Pat<(v8f32 (bitconvert (v32i8 VR256:$src))), (v8f32 VR256:$src)>; + def : Pat<(v4i64 (bitconvert (v8f32 VR256:$src))), (v4i64 VR256:$src)>; + def : Pat<(v4i64 (bitconvert (v4f64 VR256:$src))), (v4i64 VR256:$src)>; + def : Pat<(v4i64 (bitconvert (v32i8 VR256:$src))), (v4i64 VR256:$src)>; + def : Pat<(v32i8 (bitconvert (v4f64 VR256:$src))), (v32i8 VR256:$src)>; + def : Pat<(v32i8 (bitconvert (v4i64 VR256:$src))), (v32i8 VR256:$src)>; + def : Pat<(v32i8 (bitconvert (v8f32 VR256:$src))), (v32i8 VR256:$src)>; + def : Pat<(v32i8 (bitconvert (v8i32 VR256:$src))), (v32i8 VR256:$src)>; + def : Pat<(v8i32 (bitconvert (v32i8 VR256:$src))), (v8i32 VR256:$src)>; } // Move scalar to XMM zero-extended @@ -3807,6 +3863,8 @@ def : Pat<(v4i32 (fp_to_sint (v4f32 VR128:$src))), (CVTTPS2DQrr VR128:$src)>, Requires<[HasSSE2]>; // Use movaps / movups for SSE integer load / store (one byte shorter). +// The instructions selected below are then converted to MOVDQA/MOVDQU +// during the SSE domain pass. let Predicates = [HasSSE1] in { def : Pat<(alignedloadv4i32 addr:$src), (MOVAPSrm addr:$src)>; @@ -3835,8 +3893,9 @@ let Predicates = [HasSSE1] in { (MOVUPSmr addr:$dst, VR128:$src)>; } -// Use vmovaps/vmovups for AVX 128-bit integer load/store (one byte shorter). +// Use vmovaps/vmovups for AVX integer load/store. let Predicates = [HasAVX] in { + // 128-bit load/store def : Pat<(alignedloadv4i32 addr:$src), (VMOVAPSrm addr:$src)>; def : Pat<(loadv4i32 addr:$src), @@ -3862,6 +3921,24 @@ let Predicates = [HasAVX] in { (VMOVUPSmr addr:$dst, VR128:$src)>; def : Pat<(store (v16i8 VR128:$src), addr:$dst), (VMOVUPSmr addr:$dst, VR128:$src)>; + + // 256-bit load/store + def : Pat<(alignedloadv4i64 addr:$src), + (VMOVAPSYrm addr:$src)>; + def : Pat<(loadv4i64 addr:$src), + (VMOVUPSYrm addr:$src)>; + def : Pat<(alignedloadv8i32 addr:$src), + (VMOVAPSYrm addr:$src)>; + def : Pat<(loadv8i32 addr:$src), + (VMOVUPSYrm addr:$src)>; + def : Pat<(alignedstore (v4i64 VR256:$src), addr:$dst), + (VMOVAPSYmr addr:$dst, VR256:$src)>; + def : Pat<(alignedstore (v8i32 VR256:$src), addr:$dst), + (VMOVAPSYmr addr:$dst, VR256:$src)>; + def : Pat<(store (v4i64 VR256:$src), addr:$dst), + (VMOVUPSYmr addr:$dst, VR256:$src)>; + def : Pat<(store (v8i32 VR256:$src), addr:$dst), + (VMOVUPSYmr addr:$dst, VR256:$src)>; } //===----------------------------------------------------------------------===// @@ -5160,33 +5237,52 @@ def AESKEYGENASSIST128rm : AESAI<0xDF, MRMSrcMem, (outs VR128:$dst), // CLMUL Instructions //===----------------------------------------------------------------------===// -// Only the AVX version of CLMUL instructions are described here. - // Carry-less Multiplication instructions -def VPCLMULQDQrr : CLMULIi8<0x44, MRMSrcReg, (outs VR128:$dst), +let Constraints = "$src1 = $dst" in { +def PCLMULQDQrr : CLMULIi8<0x44, MRMSrcReg, (outs VR128:$dst), + (ins VR128:$src1, VR128:$src2, i8imm:$src3), + "pclmulqdq\t{$src3, $src2, $dst|$dst, $src2, $src3}", + []>; + +def PCLMULQDQrm : CLMULIi8<0x44, MRMSrcMem, (outs VR128:$dst), + (ins VR128:$src1, i128mem:$src2, i8imm:$src3), + "pclmulqdq\t{$src3, $src2, $dst|$dst, $src2, $src3}", + []>; +} + +// AVX carry-less Multiplication instructions +def VPCLMULQDQrr : AVXCLMULIi8<0x44, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2, i8imm:$src3), "vpclmulqdq\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", []>; -def VPCLMULQDQrm : CLMULIi8<0x44, MRMSrcMem, (outs VR128:$dst), +def VPCLMULQDQrm : AVXCLMULIi8<0x44, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2, i8imm:$src3), "vpclmulqdq\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", []>; -// Assembler Only -multiclass avx_vpclmul<string asm> { - def rr : I<0, Pseudo, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), - !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), - []>; - - def rm : I<0, Pseudo, (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2), - !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), - []>; -} -defm VPCLMULHQHQDQ : avx_vpclmul<"vpclmulhqhqdq">; -defm VPCLMULHQLQDQ : avx_vpclmul<"vpclmulhqlqdq">; -defm VPCLMULLQHQDQ : avx_vpclmul<"vpclmullqhqdq">; -defm VPCLMULLQLQDQ : avx_vpclmul<"vpclmullqlqdq">; + +multiclass pclmul_alias<string asm, int immop> { + def : InstAlias<!strconcat("pclmul", asm, + "dq {$src, $dst|$dst, $src}"), + (PCLMULQDQrr VR128:$dst, VR128:$src, immop)>; + + def : InstAlias<!strconcat("pclmul", asm, + "dq {$src, $dst|$dst, $src}"), + (PCLMULQDQrm VR128:$dst, i128mem:$src, immop)>; + + def : InstAlias<!strconcat("vpclmul", asm, + "dq {$src2, $src1, $dst|$dst, $src1, $src2}"), + (VPCLMULQDQrr VR128:$dst, VR128:$src1, VR128:$src2, immop)>; + + def : InstAlias<!strconcat("vpclmul", asm, + "dq {$src2, $src1, $dst|$dst, $src1, $src2}"), + (VPCLMULQDQrm VR128:$dst, VR128:$src1, i128mem:$src2, immop)>; +} +defm : pclmul_alias<"hqhq", 0x11>; +defm : pclmul_alias<"hqlq", 0x01>; +defm : pclmul_alias<"lqhq", 0x10>; +defm : pclmul_alias<"lqlq", 0x00>; //===----------------------------------------------------------------------===// // AVX Instructions diff --git a/lib/Target/X86/X86InstrSystem.td b/lib/Target/X86/X86InstrSystem.td index f73cff39e86d..31de878343ef 100644 --- a/lib/Target/X86/X86InstrSystem.td +++ b/lib/Target/X86/X86InstrSystem.td @@ -411,6 +411,8 @@ let Uses = [RDX, RAX, RCX] in let Defs = [RAX, RDI], Uses = [RDX, RDI] in def XSTORE : I<0xc0, RawFrm, (outs), (ins), "xstore", []>, A7; +def : InstAlias<"xstorerng", (XSTORE)>; + let Defs = [RSI, RDI], Uses = [RBX, RDX, RSI, RDI] in { def XCRYPTECB : I<0xc8, RawFrm, (outs), (ins), "xcryptecb", []>, A7; def XCRYPTCBC : I<0xd0, RawFrm, (outs), (ins), "xcryptcbc", []>, A7; diff --git a/lib/Target/X86/X86MCCodeEmitter.cpp b/lib/Target/X86/X86MCCodeEmitter.cpp index 55aceba9f270..ce8ef495c001 100644 --- a/lib/Target/X86/X86MCCodeEmitter.cpp +++ b/lib/Target/X86/X86MCCodeEmitter.cpp @@ -18,26 +18,32 @@ #include "llvm/MC/MCCodeEmitter.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCInst.h" +#include "llvm/MC/MCSubtargetInfo.h" #include "llvm/MC/MCSymbol.h" #include "llvm/Support/raw_ostream.h" + using namespace llvm; namespace { class X86MCCodeEmitter : public MCCodeEmitter { X86MCCodeEmitter(const X86MCCodeEmitter &); // DO NOT IMPLEMENT void operator=(const X86MCCodeEmitter &); // DO NOT IMPLEMENT - const TargetMachine &TM; - const TargetInstrInfo &TII; + const MCInstrInfo &MCII; + const MCSubtargetInfo &STI; MCContext &Ctx; - bool Is64BitMode; public: - X86MCCodeEmitter(TargetMachine &tm, MCContext &ctx, bool is64Bit) - : TM(tm), TII(*TM.getInstrInfo()), Ctx(ctx) { - Is64BitMode = is64Bit; + X86MCCodeEmitter(const MCInstrInfo &mcii, const MCSubtargetInfo &sti, + MCContext &ctx) + : MCII(mcii), STI(sti), Ctx(ctx) { } ~X86MCCodeEmitter() {} + bool is64BitMode() const { + // FIXME: Can tablegen auto-generate this? + return (STI.getFeatureBits() & X86::Mode64Bit) != 0; + } + static unsigned GetX86RegNum(const MCOperand &MO) { return X86RegisterInfo::getX86RegNum(MO.getReg()); } @@ -111,7 +117,7 @@ public: SmallVectorImpl<MCFixup> &Fixups) const; void EmitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte, int MemOperand, - const MCInst &MI, const TargetInstrDesc &Desc, + const MCInst &MI, const MCInstrDesc &Desc, raw_ostream &OS) const; void EmitSegmentOverridePrefix(uint64_t TSFlags, unsigned &CurByte, @@ -119,23 +125,17 @@ public: raw_ostream &OS) const; void EmitOpcodePrefix(uint64_t TSFlags, unsigned &CurByte, int MemOperand, - const MCInst &MI, const TargetInstrDesc &Desc, + const MCInst &MI, const MCInstrDesc &Desc, raw_ostream &OS) const; }; } // end anonymous namespace -MCCodeEmitter *llvm::createX86_32MCCodeEmitter(const Target &, - TargetMachine &TM, - MCContext &Ctx) { - return new X86MCCodeEmitter(TM, Ctx, false); -} - -MCCodeEmitter *llvm::createX86_64MCCodeEmitter(const Target &, - TargetMachine &TM, - MCContext &Ctx) { - return new X86MCCodeEmitter(TM, Ctx, true); +MCCodeEmitter *llvm::createX86MCCodeEmitter(const MCInstrInfo &MCII, + const MCSubtargetInfo &STI, + MCContext &Ctx) { + return new X86MCCodeEmitter(MCII, STI, Ctx); } /// isDisp8 - Return true if this signed displacement fits in a 8-bit @@ -245,7 +245,7 @@ void X86MCCodeEmitter::EmitMemModRMByte(const MCInst &MI, unsigned Op, // Handle %rip relative addressing. if (BaseReg == X86::RIP) { // [disp32+RIP] in X86-64 mode - assert(Is64BitMode && "Rip-relative addressing requires 64-bit mode"); + assert(is64BitMode() && "Rip-relative addressing requires 64-bit mode"); assert(IndexReg.getReg() == 0 && "Invalid rip-relative address"); EmitByte(ModRMByte(0, RegOpcodeField, 5), CurByte, OS); @@ -284,7 +284,7 @@ void X86MCCodeEmitter::EmitMemModRMByte(const MCInst &MI, unsigned Op, BaseRegNo != N86::ESP && // If there is no base register and we're in 64-bit mode, we need a SIB // byte to emit an addr that is just 'disp32' (the non-RIP relative form). - (!Is64BitMode || BaseReg != 0)) { + (!is64BitMode() || BaseReg != 0)) { if (BaseReg == 0) { // [disp32] in X86-32 mode EmitByte(ModRMByte(0, RegOpcodeField, 5), CurByte, OS); @@ -379,7 +379,7 @@ void X86MCCodeEmitter::EmitMemModRMByte(const MCInst &MI, unsigned Op, /// called VEX. void X86MCCodeEmitter::EmitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte, int MemOperand, const MCInst &MI, - const TargetInstrDesc &Desc, + const MCInstrDesc &Desc, raw_ostream &OS) const { bool HasVEX_4V = false; if ((TSFlags >> X86II::VEXShift) & X86II::VEX_4V) @@ -586,7 +586,7 @@ void X86MCCodeEmitter::EmitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte, /// REX prefix which specifies 1) 64-bit instructions, 2) non-default operand /// size, and 3) use of X86-64 extended registers. static unsigned DetermineREXPrefix(const MCInst &MI, uint64_t TSFlags, - const TargetInstrDesc &Desc) { + const MCInstrDesc &Desc) { unsigned REX = 0; if (TSFlags & X86II::REX_W) REX |= 1 << 3; // set REX.W @@ -596,7 +596,7 @@ static unsigned DetermineREXPrefix(const MCInst &MI, uint64_t TSFlags, unsigned NumOps = MI.getNumOperands(); // FIXME: MCInst should explicitize the two-addrness. bool isTwoAddr = NumOps > 1 && - Desc.getOperandConstraint(1, TOI::TIED_TO) != -1; + Desc.getOperandConstraint(1, MCOI::TIED_TO) != -1; // If it accesses SPL, BPL, SIL, or DIL, then it requires a 0x40 REX prefix. unsigned i = isTwoAddr ? 1 : 0; @@ -713,7 +713,7 @@ void X86MCCodeEmitter::EmitSegmentOverridePrefix(uint64_t TSFlags, /// Not present, it is -1. void X86MCCodeEmitter::EmitOpcodePrefix(uint64_t TSFlags, unsigned &CurByte, int MemOperand, const MCInst &MI, - const TargetInstrDesc &Desc, + const MCInstrDesc &Desc, raw_ostream &OS) const { // Emit the lock opcode prefix as needed. @@ -729,7 +729,7 @@ void X86MCCodeEmitter::EmitOpcodePrefix(uint64_t TSFlags, unsigned &CurByte, // Emit the address size opcode prefix as needed. if ((TSFlags & X86II::AdSize) || - (MemOperand != -1 && Is64BitMode && Is32BitMemOperand(MI, MemOperand))) + (MemOperand != -1 && is64BitMode() && Is32BitMemOperand(MI, MemOperand))) EmitByte(0x67, CurByte, OS); // Emit the operand size opcode prefix as needed. @@ -772,7 +772,7 @@ void X86MCCodeEmitter::EmitOpcodePrefix(uint64_t TSFlags, unsigned &CurByte, // Handle REX prefix. // FIXME: Can this come before F2 etc to simplify emission? - if (Is64BitMode) { + if (is64BitMode()) { if (unsigned REX = DetermineREXPrefix(MI, TSFlags, Desc)) EmitByte(0x40 | REX, CurByte, OS); } @@ -803,7 +803,7 @@ void X86MCCodeEmitter:: EncodeInstruction(const MCInst &MI, raw_ostream &OS, SmallVectorImpl<MCFixup> &Fixups) const { unsigned Opcode = MI.getOpcode(); - const TargetInstrDesc &Desc = TII.get(Opcode); + const MCInstrDesc &Desc = MCII.get(Opcode); uint64_t TSFlags = Desc.TSFlags; // Pseudo instructions don't get encoded. @@ -814,9 +814,9 @@ EncodeInstruction(const MCInst &MI, raw_ostream &OS, // FIXME: This should be handled during MCInst lowering. unsigned NumOps = Desc.getNumOperands(); unsigned CurOp = 0; - if (NumOps > 1 && Desc.getOperandConstraint(1, TOI::TIED_TO) != -1) + if (NumOps > 1 && Desc.getOperandConstraint(1, MCOI::TIED_TO) != -1) ++CurOp; - else if (NumOps > 2 && Desc.getOperandConstraint(NumOps-1, TOI::TIED_TO)== 0) + else if (NumOps > 2 && Desc.getOperandConstraint(NumOps-1, MCOI::TIED_TO)== 0) // Skip the last source operand that is tied_to the dest reg. e.g. LXADD32 --NumOps; diff --git a/lib/Target/X86/X86MCInstLower.cpp b/lib/Target/X86/X86MCInstLower.cpp index 793156ffce83..e38533555534 100644 --- a/lib/Target/X86/X86MCInstLower.cpp +++ b/lib/Target/X86/X86MCInstLower.cpp @@ -16,8 +16,8 @@ #include "X86MCInstLower.h" #include "X86AsmPrinter.h" #include "X86COFFMachineModuleInfo.h" -#include "X86MCAsmInfo.h" #include "llvm/CodeGen/MachineModuleInfoImpls.h" +#include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCInst.h" diff --git a/lib/Target/X86/X86MachObjectWriter.cpp b/lib/Target/X86/X86MachObjectWriter.cpp index 8f3dd3222489..37110382379e 100644 --- a/lib/Target/X86/X86MachObjectWriter.cpp +++ b/lib/Target/X86/X86MachObjectWriter.cpp @@ -8,19 +8,541 @@ //===----------------------------------------------------------------------===// #include "X86.h" +#include "X86FixupKinds.h" +#include "llvm/ADT/Twine.h" +#include "llvm/MC/MCAssembler.h" +#include "llvm/MC/MCAsmLayout.h" #include "llvm/MC/MCMachObjectWriter.h" +#include "llvm/MC/MCSectionMachO.h" +#include "llvm/MC/MCValue.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Object/MachOFormat.h" + using namespace llvm; +using namespace llvm::object; namespace { class X86MachObjectWriter : public MCMachObjectTargetWriter { + void RecordScatteredRelocation(MachObjectWriter *Writer, + const MCAssembler &Asm, + const MCAsmLayout &Layout, + const MCFragment *Fragment, + const MCFixup &Fixup, + MCValue Target, + unsigned Log2Size, + uint64_t &FixedValue); + void RecordTLVPRelocation(MachObjectWriter *Writer, + const MCAssembler &Asm, + const MCAsmLayout &Layout, + const MCFragment *Fragment, + const MCFixup &Fixup, + MCValue Target, + uint64_t &FixedValue); + + void RecordX86Relocation(MachObjectWriter *Writer, + const MCAssembler &Asm, + const MCAsmLayout &Layout, + const MCFragment *Fragment, + const MCFixup &Fixup, + MCValue Target, + uint64_t &FixedValue); + void RecordX86_64Relocation(MachObjectWriter *Writer, + const MCAssembler &Asm, + const MCAsmLayout &Layout, + const MCFragment *Fragment, + const MCFixup &Fixup, + MCValue Target, + uint64_t &FixedValue); public: X86MachObjectWriter(bool Is64Bit, uint32_t CPUType, uint32_t CPUSubtype) : MCMachObjectTargetWriter(Is64Bit, CPUType, CPUSubtype, /*UseAggressiveSymbolFolding=*/Is64Bit) {} + + void RecordRelocation(MachObjectWriter *Writer, + const MCAssembler &Asm, const MCAsmLayout &Layout, + const MCFragment *Fragment, const MCFixup &Fixup, + MCValue Target, uint64_t &FixedValue) { + if (Writer->is64Bit()) + RecordX86_64Relocation(Writer, Asm, Layout, Fragment, Fixup, Target, + FixedValue); + else + RecordX86Relocation(Writer, Asm, Layout, Fragment, Fixup, Target, + FixedValue); + } }; } +static bool isFixupKindRIPRel(unsigned Kind) { + return Kind == X86::reloc_riprel_4byte || + Kind == X86::reloc_riprel_4byte_movq_load; +} + +static unsigned getFixupKindLog2Size(unsigned Kind) { + switch (Kind) { + default: + llvm_unreachable("invalid fixup kind!"); + case FK_PCRel_1: + case FK_Data_1: return 0; + case FK_PCRel_2: + case FK_Data_2: return 1; + case FK_PCRel_4: + // FIXME: Remove these!!! + case X86::reloc_riprel_4byte: + case X86::reloc_riprel_4byte_movq_load: + case X86::reloc_signed_4byte: + case FK_Data_4: return 2; + case FK_Data_8: return 3; + } +} + +void X86MachObjectWriter::RecordX86_64Relocation(MachObjectWriter *Writer, + const MCAssembler &Asm, + const MCAsmLayout &Layout, + const MCFragment *Fragment, + const MCFixup &Fixup, + MCValue Target, + uint64_t &FixedValue) { + unsigned IsPCRel = Writer->isFixupKindPCRel(Asm, Fixup.getKind()); + unsigned IsRIPRel = isFixupKindRIPRel(Fixup.getKind()); + unsigned Log2Size = getFixupKindLog2Size(Fixup.getKind()); + + // See <reloc.h>. + uint32_t FixupOffset = + Layout.getFragmentOffset(Fragment) + Fixup.getOffset(); + uint32_t FixupAddress = + Writer->getFragmentAddress(Fragment, Layout) + Fixup.getOffset(); + int64_t Value = 0; + unsigned Index = 0; + unsigned IsExtern = 0; + unsigned Type = 0; + + Value = Target.getConstant(); + + if (IsPCRel) { + // Compensate for the relocation offset, Darwin x86_64 relocations only have + // the addend and appear to have attempted to define it to be the actual + // expression addend without the PCrel bias. However, instructions with data + // following the relocation are not accommodated for (see comment below + // regarding SIGNED{1,2,4}), so it isn't exactly that either. + Value += 1LL << Log2Size; + } + + if (Target.isAbsolute()) { // constant + // SymbolNum of 0 indicates the absolute section. + Type = macho::RIT_X86_64_Unsigned; + Index = 0; + + // FIXME: I believe this is broken, I don't think the linker can understand + // it. I think it would require a local relocation, but I'm not sure if that + // would work either. The official way to get an absolute PCrel relocation + // is to use an absolute symbol (which we don't support yet). + if (IsPCRel) { + IsExtern = 1; + Type = macho::RIT_X86_64_Branch; + } + } else if (Target.getSymB()) { // A - B + constant + const MCSymbol *A = &Target.getSymA()->getSymbol(); + MCSymbolData &A_SD = Asm.getSymbolData(*A); + const MCSymbolData *A_Base = Asm.getAtom(&A_SD); + + const MCSymbol *B = &Target.getSymB()->getSymbol(); + MCSymbolData &B_SD = Asm.getSymbolData(*B); + const MCSymbolData *B_Base = Asm.getAtom(&B_SD); + + // Neither symbol can be modified. + if (Target.getSymA()->getKind() != MCSymbolRefExpr::VK_None || + Target.getSymB()->getKind() != MCSymbolRefExpr::VK_None) + report_fatal_error("unsupported relocation of modified symbol"); + + // We don't support PCrel relocations of differences. Darwin 'as' doesn't + // implement most of these correctly. + if (IsPCRel) + report_fatal_error("unsupported pc-relative relocation of difference"); + + // The support for the situation where one or both of the symbols would + // require a local relocation is handled just like if the symbols were + // external. This is certainly used in the case of debug sections where the + // section has only temporary symbols and thus the symbols don't have base + // symbols. This is encoded using the section ordinal and non-extern + // relocation entries. + + // Darwin 'as' doesn't emit correct relocations for this (it ends up with a + // single SIGNED relocation); reject it for now. Except the case where both + // symbols don't have a base, equal but both NULL. + if (A_Base == B_Base && A_Base) + report_fatal_error("unsupported relocation with identical base"); + + Value += Writer->getSymbolAddress(&A_SD, Layout) - + (A_Base == NULL ? 0 : Writer->getSymbolAddress(A_Base, Layout)); + Value -= Writer->getSymbolAddress(&B_SD, Layout) - + (B_Base == NULL ? 0 : Writer->getSymbolAddress(B_Base, Layout)); + + if (A_Base) { + Index = A_Base->getIndex(); + IsExtern = 1; + } + else { + Index = A_SD.getFragment()->getParent()->getOrdinal() + 1; + IsExtern = 0; + } + Type = macho::RIT_X86_64_Unsigned; + + macho::RelocationEntry MRE; + MRE.Word0 = FixupOffset; + MRE.Word1 = ((Index << 0) | + (IsPCRel << 24) | + (Log2Size << 25) | + (IsExtern << 27) | + (Type << 28)); + Writer->addRelocation(Fragment->getParent(), MRE); + + if (B_Base) { + Index = B_Base->getIndex(); + IsExtern = 1; + } + else { + Index = B_SD.getFragment()->getParent()->getOrdinal() + 1; + IsExtern = 0; + } + Type = macho::RIT_X86_64_Subtractor; + } else { + const MCSymbol *Symbol = &Target.getSymA()->getSymbol(); + MCSymbolData &SD = Asm.getSymbolData(*Symbol); + const MCSymbolData *Base = Asm.getAtom(&SD); + + // Relocations inside debug sections always use local relocations when + // possible. This seems to be done because the debugger doesn't fully + // understand x86_64 relocation entries, and expects to find values that + // have already been fixed up. + if (Symbol->isInSection()) { + const MCSectionMachO &Section = static_cast<const MCSectionMachO&>( + Fragment->getParent()->getSection()); + if (Section.hasAttribute(MCSectionMachO::S_ATTR_DEBUG)) + Base = 0; + } + + // x86_64 almost always uses external relocations, except when there is no + // symbol to use as a base address (a local symbol with no preceding + // non-local symbol). + if (Base) { + Index = Base->getIndex(); + IsExtern = 1; + + // Add the local offset, if needed. + if (Base != &SD) + Value += Layout.getSymbolOffset(&SD) - Layout.getSymbolOffset(Base); + } else if (Symbol->isInSection() && !Symbol->isVariable()) { + // The index is the section ordinal (1-based). + Index = SD.getFragment()->getParent()->getOrdinal() + 1; + IsExtern = 0; + Value += Writer->getSymbolAddress(&SD, Layout); + + if (IsPCRel) + Value -= FixupAddress + (1 << Log2Size); + } else if (Symbol->isVariable()) { + const MCExpr *Value = Symbol->getVariableValue(); + int64_t Res; + bool isAbs = Value->EvaluateAsAbsolute(Res, Layout, + Writer->getSectionAddressMap()); + if (isAbs) { + FixedValue = Res; + return; + } else { + report_fatal_error("unsupported relocation of variable '" + + Symbol->getName() + "'"); + } + } else { + report_fatal_error("unsupported relocation of undefined symbol '" + + Symbol->getName() + "'"); + } + + MCSymbolRefExpr::VariantKind Modifier = Target.getSymA()->getKind(); + if (IsPCRel) { + if (IsRIPRel) { + if (Modifier == MCSymbolRefExpr::VK_GOTPCREL) { + // x86_64 distinguishes movq foo@GOTPCREL so that the linker can + // rewrite the movq to an leaq at link time if the symbol ends up in + // the same linkage unit. + if (unsigned(Fixup.getKind()) == X86::reloc_riprel_4byte_movq_load) + Type = macho::RIT_X86_64_GOTLoad; + else + Type = macho::RIT_X86_64_GOT; + } else if (Modifier == MCSymbolRefExpr::VK_TLVP) { + Type = macho::RIT_X86_64_TLV; + } else if (Modifier != MCSymbolRefExpr::VK_None) { + report_fatal_error("unsupported symbol modifier in relocation"); + } else { + Type = macho::RIT_X86_64_Signed; + + // The Darwin x86_64 relocation format has a problem where it cannot + // encode an address (L<foo> + <constant>) which is outside the atom + // containing L<foo>. Generally, this shouldn't occur but it does + // happen when we have a RIPrel instruction with data following the + // relocation entry (e.g., movb $012, L0(%rip)). Even with the PCrel + // adjustment Darwin x86_64 uses, the offset is still negative and the + // linker has no way to recognize this. + // + // To work around this, Darwin uses several special relocation types + // to indicate the offsets. However, the specification or + // implementation of these seems to also be incomplete; they should + // adjust the addend as well based on the actual encoded instruction + // (the additional bias), but instead appear to just look at the final + // offset. + switch (-(Target.getConstant() + (1LL << Log2Size))) { + case 1: Type = macho::RIT_X86_64_Signed1; break; + case 2: Type = macho::RIT_X86_64_Signed2; break; + case 4: Type = macho::RIT_X86_64_Signed4; break; + } + } + } else { + if (Modifier != MCSymbolRefExpr::VK_None) + report_fatal_error("unsupported symbol modifier in branch " + "relocation"); + + Type = macho::RIT_X86_64_Branch; + } + } else { + if (Modifier == MCSymbolRefExpr::VK_GOT) { + Type = macho::RIT_X86_64_GOT; + } else if (Modifier == MCSymbolRefExpr::VK_GOTPCREL) { + // GOTPCREL is allowed as a modifier on non-PCrel instructions, in which + // case all we do is set the PCrel bit in the relocation entry; this is + // used with exception handling, for example. The source is required to + // include any necessary offset directly. + Type = macho::RIT_X86_64_GOT; + IsPCRel = 1; + } else if (Modifier == MCSymbolRefExpr::VK_TLVP) { + report_fatal_error("TLVP symbol modifier should have been rip-rel"); + } else if (Modifier != MCSymbolRefExpr::VK_None) + report_fatal_error("unsupported symbol modifier in relocation"); + else + Type = macho::RIT_X86_64_Unsigned; + } + } + + // x86_64 always writes custom values into the fixups. + FixedValue = Value; + + // struct relocation_info (8 bytes) + macho::RelocationEntry MRE; + MRE.Word0 = FixupOffset; + MRE.Word1 = ((Index << 0) | + (IsPCRel << 24) | + (Log2Size << 25) | + (IsExtern << 27) | + (Type << 28)); + Writer->addRelocation(Fragment->getParent(), MRE); +} + +void X86MachObjectWriter::RecordScatteredRelocation(MachObjectWriter *Writer, + const MCAssembler &Asm, + const MCAsmLayout &Layout, + const MCFragment *Fragment, + const MCFixup &Fixup, + MCValue Target, + unsigned Log2Size, + uint64_t &FixedValue) { + uint32_t FixupOffset = Layout.getFragmentOffset(Fragment)+Fixup.getOffset(); + unsigned IsPCRel = Writer->isFixupKindPCRel(Asm, Fixup.getKind()); + unsigned Type = macho::RIT_Vanilla; + + // See <reloc.h>. + const MCSymbol *A = &Target.getSymA()->getSymbol(); + MCSymbolData *A_SD = &Asm.getSymbolData(*A); + + if (!A_SD->getFragment()) + report_fatal_error("symbol '" + A->getName() + + "' can not be undefined in a subtraction expression"); + + uint32_t Value = Writer->getSymbolAddress(A_SD, Layout); + uint64_t SecAddr = Writer->getSectionAddress(A_SD->getFragment()->getParent()); + FixedValue += SecAddr; + uint32_t Value2 = 0; + + if (const MCSymbolRefExpr *B = Target.getSymB()) { + MCSymbolData *B_SD = &Asm.getSymbolData(B->getSymbol()); + + if (!B_SD->getFragment()) + report_fatal_error("symbol '" + B->getSymbol().getName() + + "' can not be undefined in a subtraction expression"); + + // Select the appropriate difference relocation type. + // + // Note that there is no longer any semantic difference between these two + // relocation types from the linkers point of view, this is done solely for + // pedantic compatibility with 'as'. + Type = A_SD->isExternal() ? (unsigned)macho::RIT_Difference : + (unsigned)macho::RIT_Generic_LocalDifference; + Value2 = Writer->getSymbolAddress(B_SD, Layout); + FixedValue -= Writer->getSectionAddress(B_SD->getFragment()->getParent()); + } + + // Relocations are written out in reverse order, so the PAIR comes first. + if (Type == macho::RIT_Difference || + Type == macho::RIT_Generic_LocalDifference) { + macho::RelocationEntry MRE; + MRE.Word0 = ((0 << 0) | + (macho::RIT_Pair << 24) | + (Log2Size << 28) | + (IsPCRel << 30) | + macho::RF_Scattered); + MRE.Word1 = Value2; + Writer->addRelocation(Fragment->getParent(), MRE); + } + + macho::RelocationEntry MRE; + MRE.Word0 = ((FixupOffset << 0) | + (Type << 24) | + (Log2Size << 28) | + (IsPCRel << 30) | + macho::RF_Scattered); + MRE.Word1 = Value; + Writer->addRelocation(Fragment->getParent(), MRE); +} + +void X86MachObjectWriter::RecordTLVPRelocation(MachObjectWriter *Writer, + const MCAssembler &Asm, + const MCAsmLayout &Layout, + const MCFragment *Fragment, + const MCFixup &Fixup, + MCValue Target, + uint64_t &FixedValue) { + assert(Target.getSymA()->getKind() == MCSymbolRefExpr::VK_TLVP && + !is64Bit() && + "Should only be called with a 32-bit TLVP relocation!"); + + unsigned Log2Size = getFixupKindLog2Size(Fixup.getKind()); + uint32_t Value = Layout.getFragmentOffset(Fragment)+Fixup.getOffset(); + unsigned IsPCRel = 0; + + // Get the symbol data. + MCSymbolData *SD_A = &Asm.getSymbolData(Target.getSymA()->getSymbol()); + unsigned Index = SD_A->getIndex(); + + // We're only going to have a second symbol in pic mode and it'll be a + // subtraction from the picbase. For 32-bit pic the addend is the difference + // between the picbase and the next address. For 32-bit static the addend is + // zero. + if (Target.getSymB()) { + // If this is a subtraction then we're pcrel. + uint32_t FixupAddress = + Writer->getFragmentAddress(Fragment, Layout) + Fixup.getOffset(); + MCSymbolData *SD_B = &Asm.getSymbolData(Target.getSymB()->getSymbol()); + IsPCRel = 1; + FixedValue = (FixupAddress - Writer->getSymbolAddress(SD_B, Layout) + + Target.getConstant()); + FixedValue += 1ULL << Log2Size; + } else { + FixedValue = 0; + } + + // struct relocation_info (8 bytes) + macho::RelocationEntry MRE; + MRE.Word0 = Value; + MRE.Word1 = ((Index << 0) | + (IsPCRel << 24) | + (Log2Size << 25) | + (1 << 27) | // Extern + (macho::RIT_Generic_TLV << 28)); // Type + Writer->addRelocation(Fragment->getParent(), MRE); +} + +void X86MachObjectWriter::RecordX86Relocation(MachObjectWriter *Writer, + const MCAssembler &Asm, + const MCAsmLayout &Layout, + const MCFragment *Fragment, + const MCFixup &Fixup, + MCValue Target, + uint64_t &FixedValue) { + unsigned IsPCRel = Writer->isFixupKindPCRel(Asm, Fixup.getKind()); + unsigned Log2Size = getFixupKindLog2Size(Fixup.getKind()); + + // If this is a 32-bit TLVP reloc it's handled a bit differently. + if (Target.getSymA() && + Target.getSymA()->getKind() == MCSymbolRefExpr::VK_TLVP) { + RecordTLVPRelocation(Writer, Asm, Layout, Fragment, Fixup, Target, + FixedValue); + return; + } + + // If this is a difference or a defined symbol plus an offset, then we need a + // scattered relocation entry. Differences always require scattered + // relocations. + if (Target.getSymB()) + return RecordScatteredRelocation(Writer, Asm, Layout, Fragment, Fixup, + Target, Log2Size, FixedValue); + + // Get the symbol data, if any. + MCSymbolData *SD = 0; + if (Target.getSymA()) + SD = &Asm.getSymbolData(Target.getSymA()->getSymbol()); + + // If this is an internal relocation with an offset, it also needs a scattered + // relocation entry. + uint32_t Offset = Target.getConstant(); + if (IsPCRel) + Offset += 1 << Log2Size; + if (Offset && SD && !Writer->doesSymbolRequireExternRelocation(SD)) + return RecordScatteredRelocation(Writer, Asm, Layout, Fragment, Fixup, + Target, Log2Size, FixedValue); + + // See <reloc.h>. + uint32_t FixupOffset = Layout.getFragmentOffset(Fragment)+Fixup.getOffset(); + unsigned Index = 0; + unsigned IsExtern = 0; + unsigned Type = 0; + + if (Target.isAbsolute()) { // constant + // SymbolNum of 0 indicates the absolute section. + // + // FIXME: Currently, these are never generated (see code below). I cannot + // find a case where they are actually emitted. + Type = macho::RIT_Vanilla; + } else { + // Resolve constant variables. + if (SD->getSymbol().isVariable()) { + int64_t Res; + if (SD->getSymbol().getVariableValue()->EvaluateAsAbsolute( + Res, Layout, Writer->getSectionAddressMap())) { + FixedValue = Res; + return; + } + } + + // Check whether we need an external or internal relocation. + if (Writer->doesSymbolRequireExternRelocation(SD)) { + IsExtern = 1; + Index = SD->getIndex(); + // For external relocations, make sure to offset the fixup value to + // compensate for the addend of the symbol address, if it was + // undefined. This occurs with weak definitions, for example. + if (!SD->Symbol->isUndefined()) + FixedValue -= Layout.getSymbolOffset(SD); + } else { + // The index is the section ordinal (1-based). + const MCSectionData &SymSD = Asm.getSectionData( + SD->getSymbol().getSection()); + Index = SymSD.getOrdinal() + 1; + FixedValue += Writer->getSectionAddress(&SymSD); + } + if (IsPCRel) + FixedValue -= Writer->getSectionAddress(Fragment->getParent()); + + Type = macho::RIT_Vanilla; + } + + // struct relocation_info (8 bytes) + macho::RelocationEntry MRE; + MRE.Word0 = FixupOffset; + MRE.Word1 = ((Index << 0) | + (IsPCRel << 24) | + (Log2Size << 25) | + (IsExtern << 27) | + (Type << 28)); + Writer->addRelocation(Fragment->getParent(), MRE); +} + MCObjectWriter *llvm::createX86MachObjectWriter(raw_ostream &OS, bool Is64Bit, uint32_t CPUType, diff --git a/lib/Target/X86/X86RegisterInfo.cpp b/lib/Target/X86/X86RegisterInfo.cpp index 1ad6203af2f6..f2faf59367a1 100644 --- a/lib/Target/X86/X86RegisterInfo.cpp +++ b/lib/Target/X86/X86RegisterInfo.cpp @@ -39,6 +39,10 @@ #include "llvm/ADT/STLExtras.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/CommandLine.h" + +#define GET_REGINFO_TARGET_DESC +#include "X86GenRegisterInfo.inc" + using namespace llvm; cl::opt<bool> @@ -49,18 +53,11 @@ ForceStackAlign("force-align-stack", X86RegisterInfo::X86RegisterInfo(X86TargetMachine &tm, const TargetInstrInfo &tii) - : X86GenRegisterInfo(tm.getSubtarget<X86Subtarget>().is64Bit() ? - X86::ADJCALLSTACKDOWN64 : - X86::ADJCALLSTACKDOWN32, - tm.getSubtarget<X86Subtarget>().is64Bit() ? - X86::ADJCALLSTACKUP64 : - X86::ADJCALLSTACKUP32), - TM(tm), TII(tii) { + : X86GenRegisterInfo(), TM(tm), TII(tii) { // Cache some information. const X86Subtarget *Subtarget = &TM.getSubtarget<X86Subtarget>(); Is64Bit = Subtarget->is64Bit(); IsWin64 = Subtarget->isTargetWin64(); - StackAlign = TM.getFrameLowering()->getStackAlignment(); if (Is64Bit) { SlotSize = 8; @@ -107,6 +104,21 @@ int X86RegisterInfo::getLLVMRegNum(unsigned DwarfRegNo, bool isEH) const { return X86GenRegisterInfo::getLLVMRegNumFull(DwarfRegNo, Flavour); } +/// getCompactUnwindRegNum - This function maps the register to the number for +/// compact unwind encoding. Return -1 if the register isn't valid. +int X86RegisterInfo::getCompactUnwindRegNum(unsigned RegNum, bool isEH) const { + switch (getLLVMRegNum(RegNum, isEH)) { + case X86::EBX: case X86::RBX: return 1; + case X86::ECX: case X86::R12: return 2; + case X86::EDX: case X86::R13: return 3; + case X86::EDI: case X86::R14: return 4; + case X86::ESI: case X86::R15: return 5; + case X86::EBP: case X86::RBP: return 6; + } + + return -1; +} + int X86RegisterInfo::getSEHRegNum(unsigned i) const { int reg = getX86RegNum(i); @@ -495,18 +507,6 @@ BitVector X86RegisterInfo::getReservedRegs(const MachineFunction &MF) const { Reserved.set(X86::BPL); } - // Mark the x87 stack registers as reserved, since they don't behave normally - // with respect to liveness. We don't fully model the effects of x87 stack - // pushes and pops after stackification. - Reserved.set(X86::ST0); - Reserved.set(X86::ST1); - Reserved.set(X86::ST2); - Reserved.set(X86::ST3); - Reserved.set(X86::ST4); - Reserved.set(X86::ST5); - Reserved.set(X86::ST6); - Reserved.set(X86::ST7); - // Mark the segment registers as reserved. Reserved.set(X86::CS); Reserved.set(X86::SS); @@ -517,13 +517,20 @@ BitVector X86RegisterInfo::getReservedRegs(const MachineFunction &MF) const { // Reserve the registers that only exist in 64-bit mode. if (!Is64Bit) { + // These 8-bit registers are part of the x86-64 extension even though their + // super-registers are old 32-bits. + Reserved.set(X86::SIL); + Reserved.set(X86::DIL); + Reserved.set(X86::BPL); + Reserved.set(X86::SPL); + for (unsigned n = 0; n != 8; ++n) { + // R8, R9, ... const unsigned GPR64[] = { X86::R8, X86::R9, X86::R10, X86::R11, X86::R12, X86::R13, X86::R14, X86::R15 }; - for (const unsigned *AI = getOverlaps(GPR64[n]); unsigned Reg = *AI; - ++AI) + for (const unsigned *AI = getOverlaps(GPR64[n]); unsigned Reg = *AI; ++AI) Reserved.set(Reg); // XMM8, XMM9, ... @@ -550,6 +557,7 @@ bool X86RegisterInfo::canRealignStack(const MachineFunction &MF) const { bool X86RegisterInfo::needsStackRealignment(const MachineFunction &MF) const { const MachineFrameInfo *MFI = MF.getFrameInfo(); const Function *F = MF.getFunction(); + unsigned StackAlign = TM.getFrameLowering()->getStackAlignment(); bool requiresRealignment = ((MFI->getMaxAlignment() > StackAlign) || F->hasFnAttr(Attribute::StackAlignment)); @@ -608,7 +616,7 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering(); bool reseveCallFrame = TFI->hasReservedCallFrame(MF); int Opcode = I->getOpcode(); - bool isDestroy = Opcode == getCallFrameDestroyOpcode(); + bool isDestroy = Opcode == TII.getCallFrameDestroyOpcode(); DebugLoc DL = I->getDebugLoc(); uint64_t Amount = !reseveCallFrame ? I->getOperand(0).getImm() : 0; uint64_t CalleeAmt = isDestroy ? I->getOperand(1).getImm() : 0; @@ -625,16 +633,17 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, // We need to keep the stack aligned properly. To do this, we round the // amount of space needed for the outgoing arguments up to the next // alignment boundary. + unsigned StackAlign = TM.getFrameLowering()->getStackAlignment(); Amount = (Amount + StackAlign - 1) / StackAlign * StackAlign; MachineInstr *New = 0; - if (Opcode == getCallFrameSetupOpcode()) { + if (Opcode == TII.getCallFrameSetupOpcode()) { New = BuildMI(MF, DL, TII.get(getSUBriOpcode(Is64Bit, Amount)), StackPtr) .addReg(StackPtr) .addImm(Amount); } else { - assert(Opcode == getCallFrameDestroyOpcode()); + assert(Opcode == TII.getCallFrameDestroyOpcode()); // Factor out the amount the callee already popped. Amount -= CalleeAmt; @@ -657,7 +666,7 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, return; } - if (Opcode == getCallFrameDestroyOpcode() && CalleeAmt) { + if (Opcode == TII.getCallFrameDestroyOpcode() && CalleeAmt) { // If we are performing frame pointer elimination and if the callee pops // something off the stack pointer, add it back. We do this until we have // more advanced stack pointer tracking ability. @@ -667,6 +676,13 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, // The EFLAGS implicit def is dead. New->getOperand(3).setIsDead(); + + // We are not tracking the stack pointer adjustment by the callee, so make + // sure we restore the stack pointer immediately after the call, there may + // be spill code inserted between the CALL and ADJCALLSTACKUP instructions. + MachineBasicBlock::iterator B = MBB.begin(); + while (I != B && !llvm::prior(I)->getDesc().isCall()) + --I; MBB.insert(I, New); } } @@ -713,7 +729,10 @@ X86RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, if (MI.getOperand(i+3).isImm()) { // Offset is a 32-bit integer. - int Offset = FIOffset + (int)(MI.getOperand(i + 3).getImm()); + int Imm = (int)(MI.getOperand(i + 3).getImm()); + int Offset = FIOffset + Imm; + assert((!Is64Bit || isInt<32>((long long)FIOffset + Imm)) && + "Requesting 64-bit offset in 32-bit immediate!"); MI.getOperand(i + 3).ChangeToImmediate(Offset); } else { // Offset is symbolic. This is extremely rare. @@ -910,8 +929,6 @@ unsigned getX86SubSuperRegister(unsigned Reg, EVT VT, bool High) { } } -#include "X86GenRegisterInfo.inc" - namespace { struct MSAH : public MachineFunctionPass { static char ID; @@ -920,10 +937,10 @@ namespace { virtual bool runOnMachineFunction(MachineFunction &MF) { const X86TargetMachine *TM = static_cast<const X86TargetMachine *>(&MF.getTarget()); - const X86RegisterInfo *X86RI = TM->getRegisterInfo(); + const TargetFrameLowering *TFI = TM->getFrameLowering(); MachineRegisterInfo &RI = MF.getRegInfo(); X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>(); - unsigned StackAlignment = X86RI->getStackAlignment(); + unsigned StackAlignment = TFI->getStackAlignment(); // Be over-conservative: scan over all vreg defs and find whether vector // registers are used. If yes, there is a possibility that vector register diff --git a/lib/Target/X86/X86RegisterInfo.h b/lib/Target/X86/X86RegisterInfo.h index dd3d3dcdcce5..a12eb1297f7e 100644 --- a/lib/Target/X86/X86RegisterInfo.h +++ b/lib/Target/X86/X86RegisterInfo.h @@ -15,7 +15,9 @@ #define X86REGISTERINFO_H #include "llvm/Target/TargetRegisterInfo.h" -#include "X86GenRegisterInfo.h.inc" + +#define GET_REGINFO_HEADER +#include "X86GenRegisterInfo.inc" namespace llvm { class Type; @@ -56,10 +58,6 @@ private: /// unsigned SlotSize; - /// StackAlign - Default stack alignment. - /// - unsigned StackAlign; - /// StackPtr - X86 physical register used as stack ptr. /// unsigned StackPtr; @@ -75,8 +73,6 @@ public: /// register identifier. static unsigned getX86RegNum(unsigned RegNo); - unsigned getStackAlignment() const { return StackAlign; } - /// getDwarfRegNum - allows modification of X86GenRegisterInfo::getDwarfRegNum /// (created by TableGen) for target dependencies. int getDwarfRegNum(unsigned RegNum, bool isEH) const; @@ -85,6 +81,10 @@ public: // FIXME: This should be tablegen'd like getDwarfRegNum is int getSEHRegNum(unsigned i) const; + /// getCompactUnwindRegNum - This function maps the register to the number for + /// compact unwind encoding. Return -1 if the register isn't valid. + int getCompactUnwindRegNum(unsigned RegNum, bool isEH) const; + /// Code Generation virtual methods... /// diff --git a/lib/Target/X86/X86RegisterInfo.td b/lib/Target/X86/X86RegisterInfo.td index f1d149c3fbc7..203722a66162 100644 --- a/lib/Target/X86/X86RegisterInfo.td +++ b/lib/Target/X86/X86RegisterInfo.td @@ -206,15 +206,22 @@ let Namespace = "X86" in { def YMM15: RegisterWithSubRegs<"ymm15", [XMM15]>, DwarfRegAlias<XMM15>; } - // Floating point stack registers - def ST0 : Register<"st(0)">, DwarfRegNum<[33, 12, 11]>; - def ST1 : Register<"st(1)">, DwarfRegNum<[34, 13, 12]>; - def ST2 : Register<"st(2)">, DwarfRegNum<[35, 14, 13]>; - def ST3 : Register<"st(3)">, DwarfRegNum<[36, 15, 14]>; - def ST4 : Register<"st(4)">, DwarfRegNum<[37, 16, 15]>; - def ST5 : Register<"st(5)">, DwarfRegNum<[38, 17, 16]>; - def ST6 : Register<"st(6)">, DwarfRegNum<[39, 18, 17]>; - def ST7 : Register<"st(7)">, DwarfRegNum<[40, 19, 18]>; + class STRegister<string Name, list<Register> A> : Register<Name> { + let Aliases = A; + } + + // Floating point stack registers. These don't map one-to-one to the FP + // pseudo registers, but we still mark them as aliasing FP registers. That + // way both kinds can be live without exceeding the stack depth. ST registers + // are only live around inline assembly. + def ST0 : STRegister<"st(0)", []>, DwarfRegNum<[33, 12, 11]>; + def ST1 : STRegister<"st(1)", [FP6]>, DwarfRegNum<[34, 13, 12]>; + def ST2 : STRegister<"st(2)", [FP5]>, DwarfRegNum<[35, 14, 13]>; + def ST3 : STRegister<"st(3)", [FP4]>, DwarfRegNum<[36, 15, 14]>; + def ST4 : STRegister<"st(4)", [FP3]>, DwarfRegNum<[37, 16, 15]>; + def ST5 : STRegister<"st(5)", [FP2]>, DwarfRegNum<[38, 17, 16]>; + def ST6 : STRegister<"st(6)", [FP1]>, DwarfRegNum<[39, 18, 17]>; + def ST7 : STRegister<"st(7)", [FP0]>, DwarfRegNum<[40, 19, 18]>; // Status flags register def EFLAGS : Register<"flags">; @@ -279,58 +286,23 @@ let Namespace = "X86" in { // require a REX prefix. For example, "addb %ah, %dil" and "movzbl %ah, %r8d" // cannot be encoded. def GR8 : RegisterClass<"X86", [i8], 8, - [AL, CL, DL, AH, CH, DH, BL, BH, SIL, DIL, BPL, SPL, - R8B, R9B, R10B, R11B, R14B, R15B, R12B, R13B]> { - let MethodProtos = [{ - iterator allocation_order_begin(const MachineFunction &MF) const; - iterator allocation_order_end(const MachineFunction &MF) const; - }]; - let MethodBodies = [{ - static const unsigned X86_GR8_AO_64[] = { - X86::AL, X86::CL, X86::DL, X86::SIL, X86::DIL, - X86::R8B, X86::R9B, X86::R10B, X86::R11B, - X86::BL, X86::R14B, X86::R15B, X86::R12B, X86::R13B, X86::BPL - }; - - GR8Class::iterator - GR8Class::allocation_order_begin(const MachineFunction &MF) const { - const TargetMachine &TM = MF.getTarget(); - const X86Subtarget &Subtarget = TM.getSubtarget<X86Subtarget>(); - if (Subtarget.is64Bit()) - return X86_GR8_AO_64; - else - return begin(); - } - - GR8Class::iterator - GR8Class::allocation_order_end(const MachineFunction &MF) const { - const TargetMachine &TM = MF.getTarget(); - const TargetFrameLowering *TFI = TM.getFrameLowering(); - const X86Subtarget &Subtarget = TM.getSubtarget<X86Subtarget>(); - const X86MachineFunctionInfo *MFI = MF.getInfo<X86MachineFunctionInfo>(); - // Does the function dedicate RBP / EBP to being a frame ptr? - if (!Subtarget.is64Bit()) - // In 32-mode, none of the 8-bit registers aliases EBP or ESP. - return begin() + 8; - else if (TFI->hasFP(MF) || MFI->getReserveFP()) - // If so, don't allocate SPL or BPL. - return array_endof(X86_GR8_AO_64) - 1; - else - // If not, just don't allocate SPL. - return array_endof(X86_GR8_AO_64); - } + (add AL, CL, DL, AH, CH, DH, BL, BH, SIL, DIL, BPL, SPL, + R8B, R9B, R10B, R11B, R14B, R15B, R12B, R13B)> { + let AltOrders = [(sub GR8, AH, BH, CH, DH)]; + let AltOrderSelect = [{ + return MF.getTarget().getSubtarget<X86Subtarget>().is64Bit(); }]; } def GR16 : RegisterClass<"X86", [i16], 16, - [AX, CX, DX, SI, DI, BX, BP, SP, - R8W, R9W, R10W, R11W, R14W, R15W, R12W, R13W]> { + (add AX, CX, DX, SI, DI, BX, BP, SP, + R8W, R9W, R10W, R11W, R14W, R15W, R12W, R13W)> { let SubRegClasses = [(GR8 sub_8bit, sub_8bit_hi)]; } def GR32 : RegisterClass<"X86", [i32], 32, - [EAX, ECX, EDX, ESI, EDI, EBX, EBP, ESP, - R8D, R9D, R10D, R11D, R14D, R15D, R12D, R13D]> { + (add EAX, ECX, EDX, ESI, EDI, EBX, EBP, ESP, + R8D, R9D, R10D, R11D, R14D, R15D, R12D, R13D)> { let SubRegClasses = [(GR8 sub_8bit, sub_8bit_hi), (GR16 sub_16bit)]; } @@ -338,8 +310,8 @@ def GR32 : RegisterClass<"X86", [i32], 32, // RIP isn't really a register and it can't be used anywhere except in an // address, but it doesn't cause trouble. def GR64 : RegisterClass<"X86", [i64], 64, - [RAX, RCX, RDX, RSI, RDI, R8, R9, R10, R11, - RBX, R14, R15, R12, R13, RBP, RSP, RIP]> { + (add RAX, RCX, RDX, RSI, RDI, R8, R9, R10, R11, + RBX, R14, R15, R12, R13, RBP, RSP, RIP)> { let SubRegClasses = [(GR8 sub_8bit, sub_8bit_hi), (GR16 sub_16bit), (GR32 sub_32bit)]; @@ -348,16 +320,13 @@ def GR64 : RegisterClass<"X86", [i64], 64, // Segment registers for use by MOV instructions (and others) that have a // segment register as one operand. Always contain a 16-bit segment // descriptor. -def SEGMENT_REG : RegisterClass<"X86", [i16], 16, [CS, DS, SS, ES, FS, GS]>; +def SEGMENT_REG : RegisterClass<"X86", [i16], 16, (add CS, DS, SS, ES, FS, GS)>; // Debug registers. -def DEBUG_REG : RegisterClass<"X86", [i32], 32, - [DR0, DR1, DR2, DR3, DR4, DR5, DR6, DR7]>; +def DEBUG_REG : RegisterClass<"X86", [i32], 32, (sequence "DR%u", 0, 7)>; // Control registers. -def CONTROL_REG : RegisterClass<"X86", [i64], 64, - [CR0, CR1, CR2, CR3, CR4, CR5, CR6, CR7, CR8, - CR9, CR10, CR11, CR12, CR13, CR14, CR15]>; +def CONTROL_REG : RegisterClass<"X86", [i64], 64, (sequence "CR%u", 0, 15)>; // GR8_ABCD_L, GR8_ABCD_H, GR16_ABCD, GR32_ABCD, GR64_ABCD - Subclasses of // GR8, GR16, GR32, and GR64 which contain just the "a" "b", "c", and "d" @@ -365,99 +334,69 @@ def CONTROL_REG : RegisterClass<"X86", [i64], 64, // that support 8-bit subreg operations. On x86-64, GR16_ABCD, GR32_ABCD, // and GR64_ABCD are classes for registers that support 8-bit h-register // operations. -def GR8_ABCD_L : RegisterClass<"X86", [i8], 8, [AL, CL, DL, BL]>; -def GR8_ABCD_H : RegisterClass<"X86", [i8], 8, [AH, CH, DH, BH]>; -def GR16_ABCD : RegisterClass<"X86", [i16], 16, [AX, CX, DX, BX]> { +def GR8_ABCD_L : RegisterClass<"X86", [i8], 8, (add AL, CL, DL, BL)>; +def GR8_ABCD_H : RegisterClass<"X86", [i8], 8, (add AH, CH, DH, BH)>; +def GR16_ABCD : RegisterClass<"X86", [i16], 16, (add AX, CX, DX, BX)> { let SubRegClasses = [(GR8_ABCD_L sub_8bit), (GR8_ABCD_H sub_8bit_hi)]; } -def GR32_ABCD : RegisterClass<"X86", [i32], 32, [EAX, ECX, EDX, EBX]> { +def GR32_ABCD : RegisterClass<"X86", [i32], 32, (add EAX, ECX, EDX, EBX)> { let SubRegClasses = [(GR8_ABCD_L sub_8bit), (GR8_ABCD_H sub_8bit_hi), (GR16_ABCD sub_16bit)]; } -def GR64_ABCD : RegisterClass<"X86", [i64], 64, [RAX, RCX, RDX, RBX]> { +def GR64_ABCD : RegisterClass<"X86", [i64], 64, (add RAX, RCX, RDX, RBX)> { let SubRegClasses = [(GR8_ABCD_L sub_8bit), (GR8_ABCD_H sub_8bit_hi), (GR16_ABCD sub_16bit), (GR32_ABCD sub_32bit)]; } -def GR32_TC : RegisterClass<"X86", [i32], 32, [EAX, ECX, EDX]> { +def GR32_TC : RegisterClass<"X86", [i32], 32, (add EAX, ECX, EDX)> { let SubRegClasses = [(GR8 sub_8bit, sub_8bit_hi), (GR16 sub_16bit)]; } -def GR64_TC : RegisterClass<"X86", [i64], 64, [RAX, RCX, RDX, RSI, RDI, - R8, R9, R11, RIP]> { +def GR64_TC : RegisterClass<"X86", [i64], 64, (add RAX, RCX, RDX, RSI, RDI, + R8, R9, R11, RIP)> { let SubRegClasses = [(GR8 sub_8bit, sub_8bit_hi), (GR16 sub_16bit), (GR32_TC sub_32bit)]; } -def GR64_TCW64 : RegisterClass<"X86", [i64], 64, [RAX, RCX, RDX, - R8, R9, R11]>; +def GR64_TCW64 : RegisterClass<"X86", [i64], 64, (add RAX, RCX, RDX, + R8, R9, R11)>; // GR8_NOREX - GR8 registers which do not require a REX prefix. def GR8_NOREX : RegisterClass<"X86", [i8], 8, - [AL, CL, DL, AH, CH, DH, BL, BH]> { - let MethodProtos = [{ - iterator allocation_order_begin(const MachineFunction &MF) const; - iterator allocation_order_end(const MachineFunction &MF) const; - }]; - let MethodBodies = [{ - // In 64-bit mode, it's not safe to blindly allocate H registers. - static const unsigned X86_GR8_NOREX_AO_64[] = { - X86::AL, X86::CL, X86::DL, X86::BL - }; - - GR8_NOREXClass::iterator - GR8_NOREXClass::allocation_order_begin(const MachineFunction &MF) const { - const TargetMachine &TM = MF.getTarget(); - const X86Subtarget &Subtarget = TM.getSubtarget<X86Subtarget>(); - if (Subtarget.is64Bit()) - return X86_GR8_NOREX_AO_64; - else - return begin(); - } - - GR8_NOREXClass::iterator - GR8_NOREXClass::allocation_order_end(const MachineFunction &MF) const { - const TargetMachine &TM = MF.getTarget(); - const X86Subtarget &Subtarget = TM.getSubtarget<X86Subtarget>(); - if (Subtarget.is64Bit()) - return array_endof(X86_GR8_NOREX_AO_64); - else - return end(); - } + (add AL, CL, DL, AH, CH, DH, BL, BH)> { + let AltOrders = [(sub GR8_NOREX, AH, BH, CH, DH)]; + let AltOrderSelect = [{ + return MF.getTarget().getSubtarget<X86Subtarget>().is64Bit(); }]; } // GR16_NOREX - GR16 registers which do not require a REX prefix. def GR16_NOREX : RegisterClass<"X86", [i16], 16, - [AX, CX, DX, SI, DI, BX, BP, SP]> { + (add AX, CX, DX, SI, DI, BX, BP, SP)> { let SubRegClasses = [(GR8_NOREX sub_8bit, sub_8bit_hi)]; } // GR32_NOREX - GR32 registers which do not require a REX prefix. def GR32_NOREX : RegisterClass<"X86", [i32], 32, - [EAX, ECX, EDX, ESI, EDI, EBX, EBP, ESP]> { + (add EAX, ECX, EDX, ESI, EDI, EBX, EBP, ESP)> { let SubRegClasses = [(GR8_NOREX sub_8bit, sub_8bit_hi), (GR16_NOREX sub_16bit)]; } // GR64_NOREX - GR64 registers which do not require a REX prefix. def GR64_NOREX : RegisterClass<"X86", [i64], 64, - [RAX, RCX, RDX, RSI, RDI, RBX, RBP, RSP, RIP]> { + (add RAX, RCX, RDX, RSI, RDI, RBX, RBP, RSP, RIP)> { let SubRegClasses = [(GR8_NOREX sub_8bit, sub_8bit_hi), (GR16_NOREX sub_16bit), (GR32_NOREX sub_32bit)]; } // GR32_NOSP - GR32 registers except ESP. -def GR32_NOSP : RegisterClass<"X86", [i32], 32, - [EAX, ECX, EDX, ESI, EDI, EBX, EBP, - R8D, R9D, R10D, R11D, R14D, R15D, R12D, R13D]> { +def GR32_NOSP : RegisterClass<"X86", [i32], 32, (sub GR32, ESP)> { let SubRegClasses = [(GR8 sub_8bit, sub_8bit_hi), (GR16 sub_16bit)]; } // GR64_NOSP - GR64 registers except RSP (and RIP). -def GR64_NOSP : RegisterClass<"X86", [i64], 64, - [RAX, RCX, RDX, RSI, RDI, R8, R9, R10, R11, - RBX, R14, R15, R12, R13, RBP]> { +def GR64_NOSP : RegisterClass<"X86", [i64], 64, (sub GR64, RSP, RIP)> { let SubRegClasses = [(GR8 sub_8bit, sub_8bit_hi), (GR16 sub_16bit), (GR32_NOSP sub_32bit)]; @@ -466,36 +405,30 @@ def GR64_NOSP : RegisterClass<"X86", [i64], 64, // GR32_NOREX_NOSP - GR32 registers which do not require a REX prefix except // ESP. def GR32_NOREX_NOSP : RegisterClass<"X86", [i32], 32, - [EAX, ECX, EDX, ESI, EDI, EBX, EBP]> { + (and GR32_NOREX, GR32_NOSP)> { let SubRegClasses = [(GR8_NOREX sub_8bit, sub_8bit_hi), (GR16_NOREX sub_16bit)]; } // GR64_NOREX_NOSP - GR64_NOREX registers except RSP. def GR64_NOREX_NOSP : RegisterClass<"X86", [i64], 64, - [RAX, RCX, RDX, RSI, RDI, RBX, RBP]> { + (and GR64_NOREX, GR64_NOSP)> { let SubRegClasses = [(GR8_NOREX sub_8bit, sub_8bit_hi), (GR16_NOREX sub_16bit), (GR32_NOREX_NOSP sub_32bit)]; } // A class to support the 'A' assembler constraint: EAX then EDX. -def GR32_AD : RegisterClass<"X86", [i32], 32, [EAX, EDX]> { +def GR32_AD : RegisterClass<"X86", [i32], 32, (add EAX, EDX)> { let SubRegClasses = [(GR8_ABCD_L sub_8bit), (GR8_ABCD_H sub_8bit_hi), (GR16_ABCD sub_16bit)]; } // Scalar SSE2 floating point registers. -def FR32 : RegisterClass<"X86", [f32], 32, - [XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7, - XMM8, XMM9, XMM10, XMM11, - XMM12, XMM13, XMM14, XMM15]>; +def FR32 : RegisterClass<"X86", [f32], 32, (sequence "XMM%u", 0, 15)>; -def FR64 : RegisterClass<"X86", [f64], 64, - [XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7, - XMM8, XMM9, XMM10, XMM11, - XMM12, XMM13, XMM14, XMM15]>; +def FR64 : RegisterClass<"X86", [f64], 64, (add FR32)>; // FIXME: This sets up the floating point register files as though they are f64 @@ -504,37 +437,31 @@ def FR64 : RegisterClass<"X86", [f64], 64, // faster on common hardware. In reality, this should be controlled by a // command line option or something. -def RFP32 : RegisterClass<"X86",[f32], 32, [FP0, FP1, FP2, FP3, FP4, FP5, FP6]>; -def RFP64 : RegisterClass<"X86",[f64], 32, [FP0, FP1, FP2, FP3, FP4, FP5, FP6]>; -def RFP80 : RegisterClass<"X86",[f80], 32, [FP0, FP1, FP2, FP3, FP4, FP5, FP6]>; +def RFP32 : RegisterClass<"X86",[f32], 32, (sequence "FP%u", 0, 6)>; +def RFP64 : RegisterClass<"X86",[f64], 32, (add RFP32)>; +def RFP80 : RegisterClass<"X86",[f80], 32, (add RFP32)>; // Floating point stack registers (these are not allocatable by the // register allocator - the floating point stackifier is responsible // for transforming FPn allocations to STn registers) -def RST : RegisterClass<"X86", [f80, f64, f32], 32, - [ST0, ST1, ST2, ST3, ST4, ST5, ST6, ST7]> { +def RST : RegisterClass<"X86", [f80, f64, f32], 32, (sequence "ST%u", 0, 7)> { let isAllocatable = 0; } // Generic vector registers: VR64 and VR128. -def VR64: RegisterClass<"X86", [x86mmx], 64, - [MM0, MM1, MM2, MM3, MM4, MM5, MM6, MM7]>; -def VR128 : RegisterClass<"X86", [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],128, - [XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7, - XMM8, XMM9, XMM10, XMM11, - XMM12, XMM13, XMM14, XMM15]> { +def VR64: RegisterClass<"X86", [x86mmx], 64, (sequence "MM%u", 0, 7)>; +def VR128 : RegisterClass<"X86", [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], + 128, (add FR32)> { let SubRegClasses = [(FR32 sub_ss), (FR64 sub_sd)]; } def VR256 : RegisterClass<"X86", [v32i8, v8i32, v4i64, v8f32, v4f64], 256, - [YMM0, YMM1, YMM2, YMM3, YMM4, YMM5, YMM6, YMM7, - YMM8, YMM9, YMM10, YMM11, - YMM12, YMM13, YMM14, YMM15]> { + (sequence "YMM%u", 0, 15)> { let SubRegClasses = [(FR32 sub_ss), (FR64 sub_sd), (VR128 sub_xmm)]; } // Status flags registers. -def CCR : RegisterClass<"X86", [i32], 32, [EFLAGS]> { +def CCR : RegisterClass<"X86", [i32], 32, (add EFLAGS)> { let CopyCost = -1; // Don't allow copying of status registers. let isAllocatable = 0; } diff --git a/lib/Target/X86/X86Subtarget.cpp b/lib/Target/X86/X86Subtarget.cpp index 481e821030b3..5e6c659e5393 100644 --- a/lib/Target/X86/X86Subtarget.cpp +++ b/lib/Target/X86/X86Subtarget.cpp @@ -7,21 +7,24 @@ // //===----------------------------------------------------------------------===// // -// This file implements the X86 specific subclass of TargetSubtarget. +// This file implements the X86 specific subclass of TargetSubtargetInfo. // //===----------------------------------------------------------------------===// #define DEBUG_TYPE "subtarget" #include "X86Subtarget.h" #include "X86InstrInfo.h" -#include "X86GenSubtarget.inc" #include "llvm/GlobalValue.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Support/Host.h" #include "llvm/Target/TargetMachine.h" -#include "llvm/Target/TargetOptions.h" #include "llvm/ADT/SmallVector.h" + +#define GET_SUBTARGETINFO_TARGET_DESC +#define GET_SUBTARGETINFO_CTOR +#include "X86GenSubtargetInfo.inc" + using namespace llvm; #if defined(_MSC_VER) @@ -154,7 +157,7 @@ const char *X86Subtarget::getBZeroEntry() const { /// IsLegalToCallImmediateAddr - Return true if the subtarget allows calls /// to immediate address. bool X86Subtarget::IsLegalToCallImmediateAddr(const TargetMachine &TM) const { - if (Is64Bit) + if (In64BitMode) return false; return isTargetELF() || TM.getRelocationModel() == Reloc::Static; } @@ -170,73 +173,6 @@ unsigned X86Subtarget::getSpecialAddressLatency() const { return 200; } -/// GetCpuIDAndInfo - Execute the specified cpuid and return the 4 values in the -/// specified arguments. If we can't run cpuid on the host, return true. -static bool GetCpuIDAndInfo(unsigned value, unsigned *rEAX, - unsigned *rEBX, unsigned *rECX, unsigned *rEDX) { -#if defined(__x86_64__) || defined(_M_AMD64) || defined (_M_X64) - #if defined(__GNUC__) - // gcc doesn't know cpuid would clobber ebx/rbx. Preseve it manually. - asm ("movq\t%%rbx, %%rsi\n\t" - "cpuid\n\t" - "xchgq\t%%rbx, %%rsi\n\t" - : "=a" (*rEAX), - "=S" (*rEBX), - "=c" (*rECX), - "=d" (*rEDX) - : "a" (value)); - return false; - #elif defined(_MSC_VER) - int registers[4]; - __cpuid(registers, value); - *rEAX = registers[0]; - *rEBX = registers[1]; - *rECX = registers[2]; - *rEDX = registers[3]; - return false; - #endif -#elif defined(i386) || defined(__i386__) || defined(__x86__) || defined(_M_IX86) - #if defined(__GNUC__) - asm ("movl\t%%ebx, %%esi\n\t" - "cpuid\n\t" - "xchgl\t%%ebx, %%esi\n\t" - : "=a" (*rEAX), - "=S" (*rEBX), - "=c" (*rECX), - "=d" (*rEDX) - : "a" (value)); - return false; - #elif defined(_MSC_VER) - __asm { - mov eax,value - cpuid - mov esi,rEAX - mov dword ptr [esi],eax - mov esi,rEBX - mov dword ptr [esi],ebx - mov esi,rECX - mov dword ptr [esi],ecx - mov esi,rEDX - mov dword ptr [esi],edx - } - return false; - #endif -#endif - return true; -} - -static void DetectFamilyModel(unsigned EAX, unsigned &Family, unsigned &Model) { - Family = (EAX >> 8) & 0xf; // Bits 8 - 11 - Model = (EAX >> 4) & 0xf; // Bits 4 - 7 - if (Family == 6 || Family == 0xf) { - if (Family == 0xf) - // Examine extended family ID if family ID is F. - Family += (EAX >> 20) & 0xff; // Bits 20 - 27 - // Examine extended model ID if family ID is 6 or F. - Model += ((EAX >> 16) & 0xf) << 4; // Bits 16 - 19 - } -} - void X86Subtarget::AutoDetectSubtargetFeatures() { unsigned EAX = 0, EBX = 0, ECX = 0, EDX = 0; union { @@ -244,50 +180,66 @@ void X86Subtarget::AutoDetectSubtargetFeatures() { char c[12]; } text; - if (GetCpuIDAndInfo(0, &EAX, text.u+0, text.u+2, text.u+1)) + if (X86_MC::GetCpuIDAndInfo(0, &EAX, text.u+0, text.u+2, text.u+1)) return; - GetCpuIDAndInfo(0x1, &EAX, &EBX, &ECX, &EDX); + X86_MC::GetCpuIDAndInfo(0x1, &EAX, &EBX, &ECX, &EDX); - if ((EDX >> 15) & 1) HasCMov = true; - if ((EDX >> 23) & 1) X86SSELevel = MMX; - if ((EDX >> 25) & 1) X86SSELevel = SSE1; - if ((EDX >> 26) & 1) X86SSELevel = SSE2; - if (ECX & 0x1) X86SSELevel = SSE3; - if ((ECX >> 9) & 1) X86SSELevel = SSSE3; - if ((ECX >> 19) & 1) X86SSELevel = SSE41; - if ((ECX >> 20) & 1) X86SSELevel = SSE42; + if ((EDX >> 15) & 1) HasCMov = true; ToggleFeature(X86::FeatureCMOV); + if ((EDX >> 23) & 1) X86SSELevel = MMX; ToggleFeature(X86::FeatureMMX); + if ((EDX >> 25) & 1) X86SSELevel = SSE1; ToggleFeature(X86::FeatureSSE1); + if ((EDX >> 26) & 1) X86SSELevel = SSE2; ToggleFeature(X86::FeatureSSE2); + if (ECX & 0x1) X86SSELevel = SSE3; ToggleFeature(X86::FeatureSSE3); + if ((ECX >> 9) & 1) X86SSELevel = SSSE3; ToggleFeature(X86::FeatureSSSE3); + if ((ECX >> 19) & 1) X86SSELevel = SSE41; ToggleFeature(X86::FeatureSSE41); + if ((ECX >> 20) & 1) X86SSELevel = SSE42; ToggleFeature(X86::FeatureSSE42); // FIXME: AVX codegen support is not ready. - //if ((ECX >> 28) & 1) { HasAVX = true; X86SSELevel = NoMMXSSE; } + //if ((ECX >> 28) & 1) { HasAVX = true; } ToggleFeature(X86::FeatureAVX); bool IsIntel = memcmp(text.c, "GenuineIntel", 12) == 0; bool IsAMD = !IsIntel && memcmp(text.c, "AuthenticAMD", 12) == 0; - HasCLMUL = IsIntel && ((ECX >> 1) & 0x1); - HasFMA3 = IsIntel && ((ECX >> 12) & 0x1); - HasPOPCNT = IsIntel && ((ECX >> 23) & 0x1); - HasAES = IsIntel && ((ECX >> 25) & 0x1); + HasCLMUL = IsIntel && ((ECX >> 1) & 0x1); ToggleFeature(X86::FeatureCLMUL); + HasFMA3 = IsIntel && ((ECX >> 12) & 0x1); ToggleFeature(X86::FeatureFMA3); + HasPOPCNT = IsIntel && ((ECX >> 23) & 0x1); ToggleFeature(X86::FeaturePOPCNT); + HasAES = IsIntel && ((ECX >> 25) & 0x1); ToggleFeature(X86::FeatureAES); if (IsIntel || IsAMD) { // Determine if bit test memory instructions are slow. unsigned Family = 0; unsigned Model = 0; - DetectFamilyModel(EAX, Family, Model); - IsBTMemSlow = IsAMD || (Family == 6 && Model >= 13); + X86_MC::DetectFamilyModel(EAX, Family, Model); + if (IsAMD || (Family == 6 && Model >= 13)) { + IsBTMemSlow = true; + ToggleFeature(X86::FeatureSlowBTMem); + } // If it's Nehalem, unaligned memory access is fast. - if (Family == 15 && Model == 26) + if (Family == 15 && Model == 26) { IsUAMemFast = true; + ToggleFeature(X86::FeatureFastUAMem); + } - GetCpuIDAndInfo(0x80000001, &EAX, &EBX, &ECX, &EDX); - HasX86_64 = (EDX >> 29) & 0x1; - HasSSE4A = IsAMD && ((ECX >> 6) & 0x1); - HasFMA4 = IsAMD && ((ECX >> 16) & 0x1); + X86_MC::GetCpuIDAndInfo(0x80000001, &EAX, &EBX, &ECX, &EDX); + if ((EDX >> 29) & 0x1) { + HasX86_64 = true; + ToggleFeature(X86::Feature64Bit); + } + if (IsAMD && ((ECX >> 6) & 0x1)) { + HasSSE4A = true; + ToggleFeature(X86::FeatureSSE4A); + } + if (IsAMD && ((ECX >> 16) & 0x1)) { + HasFMA4 = true; + ToggleFeature(X86::FeatureFMA4); + } } } -X86Subtarget::X86Subtarget(const std::string &TT, const std::string &FS, - bool is64Bit) - : PICStyle(PICStyles::None) +X86Subtarget::X86Subtarget(const std::string &TT, const std::string &CPU, + const std::string &FS, + unsigned StackAlignOverride, bool is64Bit) + : X86GenSubtargetInfo(TT, CPU, FS) + , PICStyle(PICStyles::None) , X86SSELevel(NoMMXSSE) , X863DNowLevel(NoThreeDNow) , HasCMov(false) @@ -306,73 +258,66 @@ X86Subtarget::X86Subtarget(const std::string &TT, const std::string &FS, // FIXME: this is a known good value for Yonah. How about others? , MaxInlineSizeThreshold(128) , TargetTriple(TT) - , Is64Bit(is64Bit) { - - // default to hard float ABI - if (FloatABIType == FloatABI::Default) - FloatABIType = FloatABI::Hard; - + , In64BitMode(is64Bit) { // Determine default and user specified characteristics - if (!FS.empty()) { + if (!FS.empty() || !CPU.empty()) { + std::string CPUName = CPU; + if (CPUName.empty()) { +#if defined (__x86_64__) || defined(__i386__) + CPUName = sys::getHostCPUName(); +#else + CPUName = "generic"; +#endif + } + + // Make sure 64-bit features are available in 64-bit mode. (But make sure + // SSE2 can be turned off explicitly.) + std::string FullFS = FS; + if (In64BitMode) { + if (!FullFS.empty()) + FullFS = "+64bit,+sse2," + FullFS; + else + FullFS = "+64bit,+sse2"; + } + // If feature string is not empty, parse features string. - std::string CPU = sys::getHostCPUName(); - ParseSubtargetFeatures(FS, CPU); - // All X86-64 CPUs also have SSE2, however user might request no SSE via - // -mattr, so don't force SSELevel here. - if (HasAVX) - X86SSELevel = NoMMXSSE; + ParseSubtargetFeatures(CPUName, FullFS); } else { // Otherwise, use CPUID to auto-detect feature set. AutoDetectSubtargetFeatures(); - // Make sure SSE2 is enabled; it is available on all X86-64 CPUs. - if (Is64Bit && !HasAVX && X86SSELevel < SSE2) - X86SSELevel = SSE2; - } - // If requesting codegen for X86-64, make sure that 64-bit features - // are enabled. - if (Is64Bit) { - HasX86_64 = true; + // Make sure 64-bit features are available in 64-bit mode. + if (In64BitMode) { + HasX86_64 = true; ToggleFeature(X86::Feature64Bit); + HasCMov = true; ToggleFeature(X86::FeatureCMOV); - // All 64-bit cpus have cmov support. - HasCMov = true; + if (!HasAVX && X86SSELevel < SSE2) { + X86SSELevel = SSE2; + ToggleFeature(X86::FeatureSSE1); + ToggleFeature(X86::FeatureSSE2); + } + } } + + // It's important to keep the MCSubtargetInfo feature bits in sync with + // target data structure which is shared with MC code emitter, etc. + if (In64BitMode) + ToggleFeature(X86::Mode64Bit); + + if (HasAVX) + X86SSELevel = NoMMXSSE; DEBUG(dbgs() << "Subtarget features: SSELevel " << X86SSELevel << ", 3DNowLevel " << X863DNowLevel << ", 64bit " << HasX86_64 << "\n"); - assert((!Is64Bit || HasX86_64) && + assert((!In64BitMode || HasX86_64) && "64-bit code requested on a subtarget that doesn't support it!"); // Stack alignment is 16 bytes on Darwin, FreeBSD, Linux and Solaris (both // 32 and 64 bit) and for all 64-bit targets. - if (isTargetDarwin() || isTargetFreeBSD() || isTargetLinux() || - isTargetSolaris() || Is64Bit) + if (StackAlignOverride) + stackAlignment = StackAlignOverride; + else if (isTargetDarwin() || isTargetFreeBSD() || isTargetLinux() || + isTargetSolaris() || In64BitMode) stackAlignment = 16; - - if (StackAlignment) - stackAlignment = StackAlignment; -} - -/// IsCalleePop - Determines whether the callee is required to pop its -/// own arguments. Callee pop is necessary to support tail calls. -bool X86Subtarget::IsCalleePop(bool IsVarArg, - CallingConv::ID CallingConv) const { - if (IsVarArg) - return false; - - switch (CallingConv) { - default: - return false; - case CallingConv::X86_StdCall: - return !is64Bit(); - case CallingConv::X86_FastCall: - return !is64Bit(); - case CallingConv::X86_ThisCall: - return !is64Bit(); - case CallingConv::Fast: - return GuaranteedTailCallOpt; - case CallingConv::GHC: - return GuaranteedTailCallOpt; - } } diff --git a/lib/Target/X86/X86Subtarget.h b/lib/Target/X86/X86Subtarget.h index 286a7982a699..6d22027b7aa8 100644 --- a/lib/Target/X86/X86Subtarget.h +++ b/lib/Target/X86/X86Subtarget.h @@ -7,7 +7,7 @@ // //===----------------------------------------------------------------------===// // -// This file declares the X86 specific subclass of TargetSubtarget. +// This file declares the X86 specific subclass of TargetSubtargetInfo. // //===----------------------------------------------------------------------===// @@ -15,12 +15,16 @@ #define X86SUBTARGET_H #include "llvm/ADT/Triple.h" -#include "llvm/Target/TargetSubtarget.h" +#include "llvm/Target/TargetSubtargetInfo.h" #include "llvm/CallingConv.h" #include <string> +#define GET_SUBTARGETINFO_HEADER +#include "X86GenSubtargetInfo.inc" + namespace llvm { class GlobalValue; +class StringRef; class TargetMachine; /// PICStyles - The X86 backend supports a number of different styles of PIC. @@ -35,7 +39,7 @@ enum Style { }; } -class X86Subtarget : public TargetSubtarget { +class X86Subtarget : public X86GenSubtargetInfo { protected: enum X86SSEEnum { NoMMXSSE, MMX, SSE1, SSE2, SSE3, SSSE3, SSE41, SSE42 @@ -108,16 +112,17 @@ protected: Triple TargetTriple; private: - /// Is64Bit - True if the processor supports 64-bit instructions and - /// pointer size is 64 bit. - bool Is64Bit; + /// In64BitMode - True if compiling for 64-bit, false for 32-bit. + bool In64BitMode; public: /// This constructor initializes the data members to match that /// of the specified triple. /// - X86Subtarget(const std::string &TT, const std::string &FS, bool is64Bit); + X86Subtarget(const std::string &TT, const std::string &CPU, + const std::string &FS, + unsigned StackAlignOverride, bool is64Bit); /// getStackAlignment - Returns the minimum alignment known to hold of the /// stack frame on entry to the function and which must be maintained by every @@ -130,14 +135,13 @@ public: /// ParseSubtargetFeatures - Parses features string setting specified /// subtarget options. Definition of function is auto generated by tblgen. - std::string ParseSubtargetFeatures(const std::string &FS, - const std::string &CPU); + void ParseSubtargetFeatures(StringRef CPU, StringRef FS); /// AutoDetectSubtargetFeatures - Auto-detect CPU features using CPUID /// instruction. void AutoDetectSubtargetFeatures(); - bool is64Bit() const { return Is64Bit; } + bool is64Bit() const { return In64BitMode; } PICStyles::Style getPICStyle() const { return PICStyle; } void setPICStyle(PICStyles::Style Style) { PICStyle = Style; } @@ -195,7 +199,7 @@ public: } bool isTargetWin64() const { - return Is64Bit && (isTargetMingw() || isTargetWindows()); + return In64BitMode && (isTargetMingw() || isTargetWindows()); } bool isTargetEnvMacho() const { @@ -203,7 +207,7 @@ public: } bool isTargetWin32() const { - return !Is64Bit && (isTargetMingw() || isTargetWindows()); + return !In64BitMode && (isTargetMingw() || isTargetWindows()); } bool isPICStyleSet() const { return PICStyle != PICStyles::None; } @@ -248,9 +252,6 @@ public: /// indicating the number of scheduling cycles of backscheduling that /// should be attempted. unsigned getSpecialAddressLatency() const; - - /// IsCalleePop - Test whether a function should pop its own arguments. - bool IsCalleePop(bool isVarArg, CallingConv::ID CallConv) const; }; } // End llvm namespace diff --git a/lib/Target/X86/X86TargetMachine.cpp b/lib/Target/X86/X86TargetMachine.cpp index 74833291dc7a..9cab0e089098 100644 --- a/lib/Target/X86/X86TargetMachine.cpp +++ b/lib/Target/X86/X86TargetMachine.cpp @@ -11,7 +11,6 @@ // //===----------------------------------------------------------------------===// -#include "X86MCAsmInfo.h" #include "X86TargetMachine.h" #include "X86.h" #include "llvm/PassManager.h" @@ -24,22 +23,6 @@ #include "llvm/Target/TargetRegistry.h" using namespace llvm; -static MCAsmInfo *createMCAsmInfo(const Target &T, StringRef TT) { - Triple TheTriple(TT); - - if (TheTriple.isOSDarwin() || TheTriple.getEnvironment() == Triple::MachO) { - if (TheTriple.getArch() == Triple::x86_64) - return new X86_64MCAsmInfoDarwin(TheTriple); - else - return new X86MCAsmInfoDarwin(TheTriple); - } - - if (TheTriple.isOSWindows()) - return new X86MCAsmInfoCOFF(TheTriple); - - return new X86ELFMCAsmInfo(TheTriple); -} - static MCStreamer *createMCStreamer(const Target &T, const std::string &TT, MCContext &Ctx, TargetAsmBackend &TAB, raw_ostream &_OS, @@ -62,15 +45,11 @@ extern "C" void LLVMInitializeX86Target() { RegisterTargetMachine<X86_32TargetMachine> X(TheX86_32Target); RegisterTargetMachine<X86_64TargetMachine> Y(TheX86_64Target); - // Register the target asm info. - RegisterAsmInfoFn A(TheX86_32Target, createMCAsmInfo); - RegisterAsmInfoFn B(TheX86_64Target, createMCAsmInfo); - // Register the code emitter. TargetRegistry::RegisterCodeEmitter(TheX86_32Target, - createX86_32MCCodeEmitter); + createX86MCCodeEmitter); TargetRegistry::RegisterCodeEmitter(TheX86_64Target, - createX86_64MCCodeEmitter); + createX86MCCodeEmitter); // Register the asm backend. TargetRegistry::RegisterAsmBackend(TheX86_32Target, @@ -87,8 +66,9 @@ extern "C" void LLVMInitializeX86Target() { X86_32TargetMachine::X86_32TargetMachine(const Target &T, const std::string &TT, + const std::string &CPU, const std::string &FS) - : X86TargetMachine(T, TT, FS, false), + : X86TargetMachine(T, TT, CPU, FS, false), DataLayout(getSubtargetImpl()->isTargetDarwin() ? "e-p:32:32-f64:32:64-i64:32:64-f80:128:128-f128:128:128-n8:16:32" : (getSubtargetImpl()->isTargetCygMing() || @@ -103,8 +83,9 @@ X86_32TargetMachine::X86_32TargetMachine(const Target &T, const std::string &TT, X86_64TargetMachine::X86_64TargetMachine(const Target &T, const std::string &TT, + const std::string &CPU, const std::string &FS) - : X86TargetMachine(T, TT, FS, true), + : X86TargetMachine(T, TT, CPU, FS, true), DataLayout("e-p:64:64-s:64-f64:64:64-i64:64:64-f80:128:128-f128:128:128-n8:16:32:64"), InstrInfo(*this), TSInfo(*this), @@ -115,9 +96,10 @@ X86_64TargetMachine::X86_64TargetMachine(const Target &T, const std::string &TT, /// X86TargetMachine ctor - Create an X86 target. /// X86TargetMachine::X86TargetMachine(const Target &T, const std::string &TT, + const std::string &CPU, const std::string &FS, bool is64Bit) - : LLVMTargetMachine(T, TT), - Subtarget(TT, FS, is64Bit), + : LLVMTargetMachine(T, TT, CPU, FS), + Subtarget(TT, CPU, FS, StackAlignmentOverride, is64Bit), FrameLowering(*this, Subtarget), ELFWriterInfo(is64Bit, true) { DefRelocModel = getRelocationModel(); @@ -182,6 +164,10 @@ X86TargetMachine::X86TargetMachine(const Target &T, const std::string &TT, // Finally, if we have "none" as our PIC style, force to static mode. if (Subtarget.getPICStyle() == PICStyles::None) setRelocationModel(Reloc::Static); + + // default to hard float ABI + if (FloatABIType == FloatABI::Default) + FloatABIType = FloatABI::Hard; } //===----------------------------------------------------------------------===// diff --git a/lib/Target/X86/X86TargetMachine.h b/lib/Target/X86/X86TargetMachine.h index 597392251e6a..885334a365fe 100644 --- a/lib/Target/X86/X86TargetMachine.h +++ b/lib/Target/X86/X86TargetMachine.h @@ -43,7 +43,8 @@ private: public: X86TargetMachine(const Target &T, const std::string &TT, - const std::string &FS, bool is64Bit); + const std::string &CPU, const std::string &FS, + bool is64Bit); virtual const X86InstrInfo *getInstrInfo() const { llvm_unreachable("getInstrInfo not implemented"); @@ -87,7 +88,7 @@ class X86_32TargetMachine : public X86TargetMachine { X86JITInfo JITInfo; public: X86_32TargetMachine(const Target &T, const std::string &M, - const std::string &FS); + const std::string &CPU, const std::string &FS); virtual const TargetData *getTargetData() const { return &DataLayout; } virtual const X86TargetLowering *getTargetLowering() const { return &TLInfo; @@ -113,7 +114,7 @@ class X86_64TargetMachine : public X86TargetMachine { X86JITInfo JITInfo; public: X86_64TargetMachine(const Target &T, const std::string &TT, - const std::string &FS); + const std::string &CPU, const std::string &FS); virtual const TargetData *getTargetData() const { return &DataLayout; } virtual const X86TargetLowering *getTargetLowering() const { return &TLInfo; |