diff options
author | Roman Divacky <rdivacky@FreeBSD.org> | 2010-01-15 15:37:28 +0000 |
---|---|---|
committer | Roman Divacky <rdivacky@FreeBSD.org> | 2010-01-15 15:37:28 +0000 |
commit | 829000e035f46f2a227a5466e4e427a2f3cc00a9 (patch) | |
tree | be5a687969f682edded4aa6f13594ffd9aa9030e /lib/Target/X86 | |
parent | 1e7804dbd25b8dbf534c850355d70ad215206f4b (diff) | |
download | src-829000e035f46f2a227a5466e4e427a2f3cc00a9.tar.gz src-829000e035f46f2a227a5466e4e427a2f3cc00a9.zip |
Update LLVM to 93512.
Notes
Notes:
svn path=/vendor/llvm/dist/; revision=202375
Diffstat (limited to 'lib/Target/X86')
-rw-r--r-- | lib/Target/X86/AsmParser/X86AsmParser.cpp | 36 | ||||
-rw-r--r-- | lib/Target/X86/AsmPrinter/X86AsmPrinter.cpp | 66 | ||||
-rw-r--r-- | lib/Target/X86/AsmPrinter/X86MCInstLower.cpp | 28 | ||||
-rw-r--r-- | lib/Target/X86/README-SSE.txt | 20 | ||||
-rw-r--r-- | lib/Target/X86/README.txt | 83 | ||||
-rw-r--r-- | lib/Target/X86/X86.td | 4 | ||||
-rw-r--r-- | lib/Target/X86/X86CodeEmitter.cpp | 20 | ||||
-rw-r--r-- | lib/Target/X86/X86FastISel.cpp | 16 | ||||
-rw-r--r-- | lib/Target/X86/X86FloatingPoint.cpp | 16 | ||||
-rw-r--r-- | lib/Target/X86/X86ISelDAGToDAG.cpp | 157 | ||||
-rw-r--r-- | lib/Target/X86/X86ISelLowering.cpp | 241 | ||||
-rw-r--r-- | lib/Target/X86/X86Instr64bit.td | 94 | ||||
-rw-r--r-- | lib/Target/X86/X86InstrInfo.cpp | 73 | ||||
-rw-r--r-- | lib/Target/X86/X86InstrInfo.h | 10 | ||||
-rw-r--r-- | lib/Target/X86/X86InstrInfo.td | 75 | ||||
-rw-r--r-- | lib/Target/X86/X86InstrSSE.td | 10 | ||||
-rw-r--r-- | lib/Target/X86/X86JITInfo.cpp | 2 | ||||
-rw-r--r-- | lib/Target/X86/X86RegisterInfo.cpp | 9 | ||||
-rw-r--r-- | lib/Target/X86/X86Subtarget.cpp | 3 | ||||
-rw-r--r-- | lib/Target/X86/X86Subtarget.h | 7 |
20 files changed, 594 insertions, 376 deletions
diff --git a/lib/Target/X86/AsmParser/X86AsmParser.cpp b/lib/Target/X86/AsmParser/X86AsmParser.cpp index c357b4d0dee1..c4ae5d220b32 100644 --- a/lib/Target/X86/AsmParser/X86AsmParser.cpp +++ b/lib/Target/X86/AsmParser/X86AsmParser.cpp @@ -7,6 +7,7 @@ // //===----------------------------------------------------------------------===// +#include "llvm/Target/TargetAsmParser.h" #include "X86.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Twine.h" @@ -15,6 +16,7 @@ #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCInst.h" +#include "llvm/MC/MCParsedAsmOperand.h" #include "llvm/Support/SourceMgr.h" #include "llvm/Target/TargetRegistry.h" #include "llvm/Target/TargetAsmParser.h" @@ -46,7 +48,7 @@ private: /// @name Auto-generated Match Functions /// { - bool MatchInstruction(SmallVectorImpl<X86Operand> &Operands, + bool MatchInstruction(const SmallVectorImpl<MCParsedAsmOperand*> &Operands, MCInst &Inst); /// MatchRegisterName - Match the given string to a register name, or 0 if @@ -59,7 +61,8 @@ public: X86ATTAsmParser(const Target &T, MCAsmParser &_Parser) : TargetAsmParser(T), Parser(_Parser) {} - virtual bool ParseInstruction(const StringRef &Name, MCInst &Inst); + virtual bool ParseInstruction(const StringRef &Name, SMLoc NameLoc, + SmallVectorImpl<MCParsedAsmOperand*> &Operands); virtual bool ParseDirective(AsmToken DirectiveID); }; @@ -71,7 +74,7 @@ namespace { /// X86Operand - Instances of this class represent a parsed X86 machine /// instruction. -struct X86Operand { +struct X86Operand : public MCParsedAsmOperand { enum { Token, Register, @@ -400,10 +403,11 @@ bool X86ATTAsmParser::ParseMemOperand(X86Operand &Op) { return false; } -bool X86ATTAsmParser::ParseInstruction(const StringRef &Name, MCInst &Inst) { - SmallVector<X86Operand, 8> Operands; +bool X86ATTAsmParser:: +ParseInstruction(const StringRef &Name, SMLoc NameLoc, + SmallVectorImpl<MCParsedAsmOperand*> &Operands) { - Operands.push_back(X86Operand::CreateToken(Name)); + Operands.push_back(new X86Operand(X86Operand::CreateToken(Name))); SMLoc Loc = getLexer().getTok().getLoc(); if (getLexer().isNot(AsmToken::EndOfStatement)) { @@ -411,31 +415,27 @@ bool X86ATTAsmParser::ParseInstruction(const StringRef &Name, MCInst &Inst) { // Parse '*' modifier. if (getLexer().is(AsmToken::Star)) { getLexer().Lex(); // Eat the star. - Operands.push_back(X86Operand::CreateToken("*")); + Operands.push_back(new X86Operand(X86Operand::CreateToken("*"))); } // Read the first operand. - Operands.push_back(X86Operand()); - if (ParseOperand(Operands.back())) + X86Operand Op; + if (ParseOperand(Op)) return true; + Operands.push_back(new X86Operand(Op)); + while (getLexer().is(AsmToken::Comma)) { getLexer().Lex(); // Eat the comma. // Parse and remember the operand. - Operands.push_back(X86Operand()); - if (ParseOperand(Operands.back())) + if (ParseOperand(Op)) return true; + Operands.push_back(new X86Operand(Op)); } } - if (!MatchInstruction(Operands, Inst)) - return false; - - // FIXME: We should give nicer diagnostics about the exact failure. - - Error(Loc, "unrecognized instruction"); - return true; + return false; } bool X86ATTAsmParser::ParseDirective(AsmToken DirectiveID) { diff --git a/lib/Target/X86/AsmPrinter/X86AsmPrinter.cpp b/lib/Target/X86/AsmPrinter/X86AsmPrinter.cpp index b88063f9ce72..70c6dd03eb19 100644 --- a/lib/Target/X86/AsmPrinter/X86AsmPrinter.cpp +++ b/lib/Target/X86/AsmPrinter/X86AsmPrinter.cpp @@ -201,6 +201,7 @@ bool X86AsmPrinter::runOnMachineFunction(MachineFunction &MF) { /// jump tables, constant pools, global address and external symbols, all of /// which print to a label with various suffixes for relocation types etc. void X86AsmPrinter::printSymbolOperand(const MachineOperand &MO) { + SmallString<128> TempNameStr; switch (MO.getType()) { default: llvm_unreachable("unknown symbol type!"); case MachineOperand::MO_JumpTableIndex: @@ -236,41 +237,38 @@ void X86AsmPrinter::printSymbolOperand(const MachineOperand &MO) { if (MO.getTargetFlags() == X86II::MO_DARWIN_NONLAZY || MO.getTargetFlags() == X86II::MO_DARWIN_NONLAZY_PIC_BASE) { - SmallString<128> NameStr; - Mang->getNameWithPrefix(NameStr, GV, true); - NameStr += "$non_lazy_ptr"; - MCSymbol *Sym = OutContext.GetOrCreateSymbol(NameStr.str()); + Mang->getNameWithPrefix(TempNameStr, GV, true); + TempNameStr += "$non_lazy_ptr"; + MCSymbol *Sym = OutContext.GetOrCreateSymbol(TempNameStr.str()); const MCSymbol *&StubSym = MMI->getObjFileInfo<MachineModuleInfoMachO>().getGVStubEntry(Sym); if (StubSym == 0) { - NameStr.clear(); - Mang->getNameWithPrefix(NameStr, GV, false); - StubSym = OutContext.GetOrCreateSymbol(NameStr.str()); + TempNameStr.clear(); + Mang->getNameWithPrefix(TempNameStr, GV, false); + StubSym = OutContext.GetOrCreateSymbol(TempNameStr.str()); } } else if (MO.getTargetFlags() == X86II::MO_DARWIN_HIDDEN_NONLAZY_PIC_BASE){ - SmallString<128> NameStr; - Mang->getNameWithPrefix(NameStr, GV, true); - NameStr += "$non_lazy_ptr"; - MCSymbol *Sym = OutContext.GetOrCreateSymbol(NameStr.str()); + Mang->getNameWithPrefix(TempNameStr, GV, true); + TempNameStr += "$non_lazy_ptr"; + MCSymbol *Sym = OutContext.GetOrCreateSymbol(TempNameStr.str()); const MCSymbol *&StubSym = MMI->getObjFileInfo<MachineModuleInfoMachO>().getHiddenGVStubEntry(Sym); if (StubSym == 0) { - NameStr.clear(); - Mang->getNameWithPrefix(NameStr, GV, false); - StubSym = OutContext.GetOrCreateSymbol(NameStr.str()); + TempNameStr.clear(); + Mang->getNameWithPrefix(TempNameStr, GV, false); + StubSym = OutContext.GetOrCreateSymbol(TempNameStr.str()); } } else if (MO.getTargetFlags() == X86II::MO_DARWIN_STUB) { - SmallString<128> NameStr; - Mang->getNameWithPrefix(NameStr, GV, true); - NameStr += "$stub"; - MCSymbol *Sym = OutContext.GetOrCreateSymbol(NameStr.str()); + Mang->getNameWithPrefix(TempNameStr, GV, true); + TempNameStr += "$stub"; + MCSymbol *Sym = OutContext.GetOrCreateSymbol(TempNameStr.str()); const MCSymbol *&StubSym = MMI->getObjFileInfo<MachineModuleInfoMachO>().getFnStubEntry(Sym); if (StubSym == 0) { - NameStr.clear(); - Mang->getNameWithPrefix(NameStr, GV, false); - StubSym = OutContext.GetOrCreateSymbol(NameStr.str()); + TempNameStr.clear(); + Mang->getNameWithPrefix(TempNameStr, GV, false); + StubSym = OutContext.GetOrCreateSymbol(TempNameStr.str()); } } @@ -285,24 +283,32 @@ void X86AsmPrinter::printSymbolOperand(const MachineOperand &MO) { break; } case MachineOperand::MO_ExternalSymbol: { - std::string Name = Mang->makeNameProper(MO.getSymbolName()); + const MCSymbol *SymToPrint; if (MO.getTargetFlags() == X86II::MO_DARWIN_STUB) { - Name += "$stub"; - MCSymbol *Sym = OutContext.GetOrCreateSymbol(StringRef(Name)); + Mang->getNameWithPrefix(TempNameStr, + StringRef(MO.getSymbolName())+"$stub"); + const MCSymbol *Sym = OutContext.GetOrCreateSymbol(TempNameStr.str()); const MCSymbol *&StubSym = MMI->getObjFileInfo<MachineModuleInfoMachO>().getFnStubEntry(Sym); if (StubSym == 0) { - Name.erase(Name.end()-5, Name.end()); - StubSym = OutContext.GetOrCreateSymbol(StringRef(Name)); + TempNameStr.erase(TempNameStr.end()-5, TempNameStr.end()); + StubSym = OutContext.GetOrCreateSymbol(TempNameStr.str()); } + SymToPrint = StubSym; + } else { + Mang->getNameWithPrefix(TempNameStr, MO.getSymbolName()); + SymToPrint = OutContext.GetOrCreateSymbol(TempNameStr.str()); } // If the name begins with a dollar-sign, enclose it in parens. We do this // to avoid having it look like an integer immediate to the assembler. - if (Name[0] == '$') - O << '(' << Name << ')'; - else - O << Name; + if (SymToPrint->getName()[0] != '$') + SymToPrint->print(O, MAI); + else { + O << '('; + SymToPrint->print(O, MAI); + O << '('; + } break; } } diff --git a/lib/Target/X86/AsmPrinter/X86MCInstLower.cpp b/lib/Target/X86/AsmPrinter/X86MCInstLower.cpp index 1015b6924734..9ee118cdfd93 100644 --- a/lib/Target/X86/AsmPrinter/X86MCInstLower.cpp +++ b/lib/Target/X86/AsmPrinter/X86MCInstLower.cpp @@ -25,6 +25,7 @@ #include "llvm/Support/FormattedStream.h" #include "llvm/Support/Mangler.h" #include "llvm/ADT/SmallString.h" +#include "llvm/Analysis/DebugInfo.h" using namespace llvm; @@ -399,6 +400,14 @@ void X86MCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const { OutMI.setOpcode(X86::MOVZX32rm16); lower_subreg32(&OutMI, 0); break; + case X86::MOV16r0: + OutMI.setOpcode(X86::MOV32r0); + lower_subreg32(&OutMI, 0); + break; + case X86::MOV64r0: + OutMI.setOpcode(X86::MOV32r0); + lower_subreg32(&OutMI, 0); + break; } } @@ -412,6 +421,25 @@ void X86AsmPrinter::printInstructionThroughMCStreamer(const MachineInstr *MI) { case TargetInstrInfo::GC_LABEL: printLabel(MI); return; + case TargetInstrInfo::DEBUG_VALUE: { + if (!VerboseAsm) + return; + O << '\t' << MAI->getCommentString() << "DEBUG_VALUE: "; + // cast away const; DIetc do not take const operands for some reason + DIVariable V((MDNode*)(MI->getOperand(2).getMetadata())); + O << V.getName(); + O << " <- "; + if (MI->getOperand(0).getType()==MachineOperand::MO_Register) + printOperand(MI, 0); + else { + assert(MI->getOperand(0).getType()==MachineOperand::MO_Immediate); + int64_t imm = MI->getOperand(0).getImm(); + O << '[' << ((imm<0) ? "EBP" : "ESP+") << imm << ']'; + } + O << "+"; + printOperand(MI, 1); + return; + } case TargetInstrInfo::INLINEASM: printInlineAsm(MI); return; diff --git a/lib/Target/X86/README-SSE.txt b/lib/Target/X86/README-SSE.txt index 71ad51c7984e..0f3e44b52899 100644 --- a/lib/Target/X86/README-SSE.txt +++ b/lib/Target/X86/README-SSE.txt @@ -916,3 +916,23 @@ cheaper to do fld1 than load from a constant pool for example, so "load, add 1.0, store" is better done in the fp stack, etc. //===---------------------------------------------------------------------===// + +The X86 backend should be able to if-convert SSE comparisons like "ucomisd" to +"cmpsd". For example, this code: + +double d1(double x) { return x == x ? x : x + x; } + +Compiles into: + +_d1: + ucomisd %xmm0, %xmm0 + jnp LBB1_2 + addsd %xmm0, %xmm0 + ret +LBB1_2: + ret + +Also, the 'ret's should be shared. This is PR6032. + +//===---------------------------------------------------------------------===// + diff --git a/lib/Target/X86/README.txt b/lib/Target/X86/README.txt index afd9f53ea6bb..aa7bb3d97889 100644 --- a/lib/Target/X86/README.txt +++ b/lib/Target/X86/README.txt @@ -530,7 +530,7 @@ We should inline lrintf and probably other libc functions. //===---------------------------------------------------------------------===// -Start using the flags more. For example, compile: +Use the FLAGS values from arithmetic instructions more. For example, compile: int add_zf(int *x, int y, int a, int b) { if ((*x += y) == 0) @@ -554,31 +554,8 @@ _add_zf: movl %ecx, %eax ret -and: - -int add_zf(int *x, int y, int a, int b) { - if ((*x + y) < 0) - return a; - else - return b; -} - -to: - -add_zf: - addl (%rdi), %esi - movl %edx, %eax - cmovns %ecx, %eax - ret - -instead of: - -_add_zf: - addl (%rdi), %esi - testl %esi, %esi - cmovs %edx, %ecx - movl %ecx, %eax - ret +As another example, compile function f2 in test/CodeGen/X86/cmp-test.ll +without a test instruction. //===---------------------------------------------------------------------===// @@ -685,55 +662,6 @@ Though this probably isn't worth it. //===---------------------------------------------------------------------===// -We need to teach the codegen to convert two-address INC instructions to LEA -when the flags are dead (likewise dec). For example, on X86-64, compile: - -int foo(int A, int B) { - return A+1; -} - -to: - -_foo: - leal 1(%edi), %eax - ret - -instead of: - -_foo: - incl %edi - movl %edi, %eax - ret - -Another example is: - -;; X's live range extends beyond the shift, so the register allocator -;; cannot coalesce it with Y. Because of this, a copy needs to be -;; emitted before the shift to save the register value before it is -;; clobbered. However, this copy is not needed if the register -;; allocator turns the shift into an LEA. This also occurs for ADD. - -; Check that the shift gets turned into an LEA. -; RUN: llvm-as < %s | llc -march=x86 -x86-asm-syntax=intel | \ -; RUN: not grep {mov E.X, E.X} - -@G = external global i32 ; <i32*> [#uses=3] - -define i32 @test1(i32 %X, i32 %Y) { - %Z = add i32 %X, %Y ; <i32> [#uses=1] - volatile store i32 %Y, i32* @G - volatile store i32 %Z, i32* @G - ret i32 %X -} - -define i32 @test2(i32 %X) { - %Z = add i32 %X, 1 ; <i32> [#uses=1] - volatile store i32 %Z, i32* @G - ret i32 %X -} - -//===---------------------------------------------------------------------===// - Sometimes it is better to codegen subtractions from a constant (e.g. 7-x) with a neg instead of a sub instruction. Consider: @@ -854,11 +782,6 @@ __Z11no_overflowjj: //===---------------------------------------------------------------------===// -Re-materialize MOV32r0 etc. with xor instead of changing them to moves if the -condition register is dead. xor reg reg is shorter than mov reg, #0. - -//===---------------------------------------------------------------------===// - The following code: bb114.preheader: ; preds = %cond_next94 diff --git a/lib/Target/X86/X86.td b/lib/Target/X86/X86.td index a6e1ca3128ee..7919559058b1 100644 --- a/lib/Target/X86/X86.td +++ b/lib/Target/X86/X86.td @@ -23,6 +23,7 @@ include "llvm/Target/Target.td" def FeatureCMOV : SubtargetFeature<"cmov","HasCMov", "true", "Enable conditional move instructions">; + def FeatureMMX : SubtargetFeature<"mmx","X86SSELevel", "MMX", "Enable MMX instructions">; def FeatureSSE1 : SubtargetFeature<"sse", "X86SSELevel", "SSE1", @@ -66,6 +67,9 @@ def FeatureFMA3 : SubtargetFeature<"fma3", "HasFMA3", "true", "Enable three-operand fused multiple-add">; def FeatureFMA4 : SubtargetFeature<"fma4", "HasFMA4", "true", "Enable four-operand fused multiple-add">; +def FeatureVectorUAMem : SubtargetFeature<"vector-unaligned-mem", + "HasVectorUAMem", "true", + "Allow unaligned memory operands on vector/SIMD instructions">; //===----------------------------------------------------------------------===// // X86 processors supported. diff --git a/lib/Target/X86/X86CodeEmitter.cpp b/lib/Target/X86/X86CodeEmitter.cpp index 4892e1746079..828e872cacbf 100644 --- a/lib/Target/X86/X86CodeEmitter.cpp +++ b/lib/Target/X86/X86CodeEmitter.cpp @@ -135,7 +135,7 @@ bool Emitter<CodeEmitter>::runOnMachineFunction(MachineFunction &MF) { IsPIC = TM.getRelocationModel() == Reloc::PIC_; do { - DEBUG(errs() << "JITTing function '" + DEBUG(dbgs() << "JITTing function '" << MF.getFunction()->getName() << "'\n"); MCE.startFunction(MF); for (MachineFunction::iterator MBB = MF.begin(), E = MF.end(); @@ -477,7 +477,7 @@ void Emitter<CodeEmitter>::emitMemModRMByte(const MachineInstr &MI, template<class CodeEmitter> void Emitter<CodeEmitter>::emitInstruction(const MachineInstr &MI, const TargetInstrDesc *Desc) { - DEBUG(errs() << MI); + DEBUG(dbgs() << MI); MCE.processDebugLoc(MI.getDebugLoc(), true); @@ -618,11 +618,11 @@ void Emitter<CodeEmitter>::emitInstruction(const MachineInstr &MI, const MachineOperand &MO = MI.getOperand(CurOp++); - DEBUG(errs() << "RawFrm CurOp " << CurOp << "\n"); - DEBUG(errs() << "isMBB " << MO.isMBB() << "\n"); - DEBUG(errs() << "isGlobal " << MO.isGlobal() << "\n"); - DEBUG(errs() << "isSymbol " << MO.isSymbol() << "\n"); - DEBUG(errs() << "isImm " << MO.isImm() << "\n"); + DEBUG(dbgs() << "RawFrm CurOp " << CurOp << "\n"); + DEBUG(dbgs() << "isMBB " << MO.isMBB() << "\n"); + DEBUG(dbgs() << "isGlobal " << MO.isGlobal() << "\n"); + DEBUG(dbgs() << "isSymbol " << MO.isSymbol() << "\n"); + DEBUG(dbgs() << "isImm " << MO.isImm() << "\n"); if (MO.isMBB()) { emitPCRelativeBlockAddress(MO.getMBB()); @@ -843,7 +843,7 @@ void Emitter<CodeEmitter>::emitInstruction(const MachineInstr &MI, if (!Desc->isVariadic() && CurOp != NumOps) { #ifndef NDEBUG - errs() << "Cannot encode all operands of: " << MI << "\n"; + dbgs() << "Cannot encode all operands of: " << MI << "\n"; #endif llvm_unreachable(0); } @@ -1082,9 +1082,9 @@ public: } if (!OK) { - errs() << "couldn't convert inst '"; + dbgs() << "couldn't convert inst '"; MI.dump(); - errs() << "' to machine instr:\n"; + dbgs() << "' to machine instr:\n"; Instr->dump(); } diff --git a/lib/Target/X86/X86FastISel.cpp b/lib/Target/X86/X86FastISel.cpp index 431c120f8f0d..7e02d59c1bca 100644 --- a/lib/Target/X86/X86FastISel.cpp +++ b/lib/Target/X86/X86FastISel.cpp @@ -786,8 +786,8 @@ bool X86FastISel::X86SelectCmp(Instruction *I) { bool X86FastISel::X86SelectZExt(Instruction *I) { // Handle zero-extension from i1 to i8, which is common. - if (I->getType() == Type::getInt8Ty(I->getContext()) && - I->getOperand(0)->getType() == Type::getInt1Ty(I->getContext())) { + if (I->getType()->isInteger(8) && + I->getOperand(0)->getType()->isInteger(1)) { unsigned ResultReg = getRegForValue(I->getOperand(0)); if (ResultReg == 0) return false; // Set the high bits to zero. @@ -948,7 +948,7 @@ bool X86FastISel::X86SelectBranch(Instruction *I) { bool X86FastISel::X86SelectShift(Instruction *I) { unsigned CReg = 0, OpReg = 0, OpImm = 0; const TargetRegisterClass *RC = NULL; - if (I->getType() == Type::getInt8Ty(I->getContext())) { + if (I->getType()->isInteger(8)) { CReg = X86::CL; RC = &X86::GR8RegClass; switch (I->getOpcode()) { @@ -957,7 +957,7 @@ bool X86FastISel::X86SelectShift(Instruction *I) { case Instruction::Shl: OpReg = X86::SHL8rCL; OpImm = X86::SHL8ri; break; default: return false; } - } else if (I->getType() == Type::getInt16Ty(I->getContext())) { + } else if (I->getType()->isInteger(16)) { CReg = X86::CX; RC = &X86::GR16RegClass; switch (I->getOpcode()) { @@ -966,7 +966,7 @@ bool X86FastISel::X86SelectShift(Instruction *I) { case Instruction::Shl: OpReg = X86::SHL16rCL; OpImm = X86::SHL16ri; break; default: return false; } - } else if (I->getType() == Type::getInt32Ty(I->getContext())) { + } else if (I->getType()->isInteger(32)) { CReg = X86::ECX; RC = &X86::GR32RegClass; switch (I->getOpcode()) { @@ -975,7 +975,7 @@ bool X86FastISel::X86SelectShift(Instruction *I) { case Instruction::Shl: OpReg = X86::SHL32rCL; OpImm = X86::SHL32ri; break; default: return false; } - } else if (I->getType() == Type::getInt64Ty(I->getContext())) { + } else if (I->getType()->isInteger(64)) { CReg = X86::RCX; RC = &X86::GR64RegClass; switch (I->getOpcode()) { @@ -1230,8 +1230,8 @@ bool X86FastISel::X86SelectCall(Instruction *I) { CC != CallingConv::X86_FastCall) return false; - // On X86, -tailcallopt changes the fastcc ABI. FastISel doesn't - // handle this for now. + // fastcc with -tailcallopt is intended to provide a guaranteed + // tail call optimization. Fastisel doesn't know how to do that. if (CC == CallingConv::Fast && PerformTailCallOpt) return false; diff --git a/lib/Target/X86/X86FloatingPoint.cpp b/lib/Target/X86/X86FloatingPoint.cpp index 044bd4be322a..503ac146d27a 100644 --- a/lib/Target/X86/X86FloatingPoint.cpp +++ b/lib/Target/X86/X86FloatingPoint.cpp @@ -75,12 +75,12 @@ namespace { unsigned StackTop; // The current top of the FP stack. void dumpStack() const { - errs() << "Stack contents:"; + dbgs() << "Stack contents:"; for (unsigned i = 0; i != StackTop; ++i) { - errs() << " FP" << Stack[i]; + dbgs() << " FP" << Stack[i]; assert(RegMap[Stack[i]] == i && "Stack[] doesn't match RegMap[]!"); } - errs() << "\n"; + dbgs() << "\n"; } private: /// isStackEmpty - Return true if the FP stack is empty. @@ -246,7 +246,7 @@ bool FPS::processBasicBlock(MachineFunction &MF, MachineBasicBlock &BB) { PrevMI = prior(I); ++NumFP; // Keep track of # of pseudo instrs - DEBUG(errs() << "\nFPInst:\t" << *MI); + DEBUG(dbgs() << "\nFPInst:\t" << *MI); // Get dead variables list now because the MI pointer may be deleted as part // of processing! @@ -273,7 +273,7 @@ bool FPS::processBasicBlock(MachineFunction &MF, MachineBasicBlock &BB) { for (unsigned i = 0, e = DeadRegs.size(); i != e; ++i) { unsigned Reg = DeadRegs[i]; if (Reg >= X86::FP0 && Reg <= X86::FP6) { - DEBUG(errs() << "Register FP#" << Reg-X86::FP0 << " is dead!\n"); + DEBUG(dbgs() << "Register FP#" << Reg-X86::FP0 << " is dead!\n"); freeStackSlotAfter(I, Reg-X86::FP0); } } @@ -282,13 +282,13 @@ bool FPS::processBasicBlock(MachineFunction &MF, MachineBasicBlock &BB) { DEBUG( MachineBasicBlock::iterator PrevI(PrevMI); if (I == PrevI) { - errs() << "Just deleted pseudo instruction\n"; + dbgs() << "Just deleted pseudo instruction\n"; } else { MachineBasicBlock::iterator Start = I; // Rewind to first instruction newly inserted. while (Start != BB.begin() && prior(Start) != PrevI) --Start; - errs() << "Inserted instructions:\n\t"; - Start->print(errs(), &MF.getTarget()); + dbgs() << "Inserted instructions:\n\t"; + Start->print(dbgs(), &MF.getTarget()); while (++Start != llvm::next(I)) {} } dumpStack(); diff --git a/lib/Target/X86/X86ISelDAGToDAG.cpp b/lib/Target/X86/X86ISelDAGToDAG.cpp index cb8238377858..e2a53d1118b8 100644 --- a/lib/Target/X86/X86ISelDAGToDAG.cpp +++ b/lib/Target/X86/X86ISelDAGToDAG.cpp @@ -113,37 +113,37 @@ namespace { } void dump() { - errs() << "X86ISelAddressMode " << this << '\n'; - errs() << "Base.Reg "; + dbgs() << "X86ISelAddressMode " << this << '\n'; + dbgs() << "Base.Reg "; if (Base.Reg.getNode() != 0) Base.Reg.getNode()->dump(); else - errs() << "nul"; - errs() << " Base.FrameIndex " << Base.FrameIndex << '\n' + dbgs() << "nul"; + dbgs() << " Base.FrameIndex " << Base.FrameIndex << '\n' << " Scale" << Scale << '\n' << "IndexReg "; if (IndexReg.getNode() != 0) IndexReg.getNode()->dump(); else - errs() << "nul"; - errs() << " Disp " << Disp << '\n' + dbgs() << "nul"; + dbgs() << " Disp " << Disp << '\n' << "GV "; if (GV) GV->dump(); else - errs() << "nul"; - errs() << " CP "; + dbgs() << "nul"; + dbgs() << " CP "; if (CP) CP->dump(); else - errs() << "nul"; - errs() << '\n' + dbgs() << "nul"; + dbgs() << '\n' << "ES "; if (ES) - errs() << ES; + dbgs() << ES; else - errs() << "nul"; - errs() << " JT" << JT << " Align" << Align << '\n'; + dbgs() << "nul"; + dbgs() << " JT" << JT << " Align" << Align << '\n'; } }; } @@ -190,7 +190,7 @@ namespace { #include "X86GenDAGISel.inc" private: - SDNode *Select(SDValue N); + SDNode *Select(SDNode *N); SDNode *SelectAtomic64(SDNode *Node, unsigned Opc); SDNode *SelectAtomicLoadAdd(SDNode *Node, EVT NVT); @@ -201,19 +201,19 @@ namespace { bool MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM, unsigned Depth); bool MatchAddressBase(SDValue N, X86ISelAddressMode &AM); - bool SelectAddr(SDValue Op, SDValue N, SDValue &Base, + bool SelectAddr(SDNode *Op, SDValue N, SDValue &Base, SDValue &Scale, SDValue &Index, SDValue &Disp, SDValue &Segment); - bool SelectLEAAddr(SDValue Op, SDValue N, SDValue &Base, + bool SelectLEAAddr(SDNode *Op, SDValue N, SDValue &Base, SDValue &Scale, SDValue &Index, SDValue &Disp); - bool SelectTLSADDRAddr(SDValue Op, SDValue N, SDValue &Base, + bool SelectTLSADDRAddr(SDNode *Op, SDValue N, SDValue &Base, SDValue &Scale, SDValue &Index, SDValue &Disp); - bool SelectScalarSSELoad(SDValue Op, SDValue Pred, + bool SelectScalarSSELoad(SDNode *Op, SDValue Pred, SDValue N, SDValue &Base, SDValue &Scale, SDValue &Index, SDValue &Disp, SDValue &Segment, SDValue &InChain, SDValue &OutChain); - bool TryFoldLoad(SDValue P, SDValue N, + bool TryFoldLoad(SDNode *P, SDValue N, SDValue &Base, SDValue &Scale, SDValue &Index, SDValue &Disp, SDValue &Segment); @@ -310,6 +310,11 @@ bool X86DAGToDAGISel::IsLegalAndProfitableToFold(SDNode *N, SDNode *U, if (U == Root) switch (U->getOpcode()) { default: break; + case X86ISD::ADD: + case X86ISD::SUB: + case X86ISD::AND: + case X86ISD::XOR: + case X86ISD::OR: case ISD::ADD: case ISD::ADDC: case ISD::ADDE: @@ -675,12 +680,12 @@ void X86DAGToDAGISel::InstructionSelect() { // Codegen the basic block. #ifndef NDEBUG - DEBUG(errs() << "===== Instruction selection begins:\n"); + DEBUG(dbgs() << "===== Instruction selection begins:\n"); Indent = 0; #endif SelectRoot(*CurDAG); #ifndef NDEBUG - DEBUG(errs() << "===== Instruction selection ends:\n"); + DEBUG(dbgs() << "===== Instruction selection ends:\n"); #endif CurDAG->RemoveDeadNodes(); @@ -850,7 +855,7 @@ bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM, bool is64Bit = Subtarget->is64Bit(); DebugLoc dl = N.getDebugLoc(); DEBUG({ - errs() << "MatchAddress: "; + dbgs() << "MatchAddress: "; AM.dump(); }); // Limit recursion. @@ -1268,7 +1273,7 @@ bool X86DAGToDAGISel::MatchAddressBase(SDValue N, X86ISelAddressMode &AM) { /// SelectAddr - returns true if it is able pattern match an addressing mode. /// It returns the operands which make up the maximal addressing mode it can /// match by reference. -bool X86DAGToDAGISel::SelectAddr(SDValue Op, SDValue N, SDValue &Base, +bool X86DAGToDAGISel::SelectAddr(SDNode *Op, SDValue N, SDValue &Base, SDValue &Scale, SDValue &Index, SDValue &Disp, SDValue &Segment) { X86ISelAddressMode AM; @@ -1291,7 +1296,7 @@ bool X86DAGToDAGISel::SelectAddr(SDValue Op, SDValue N, SDValue &Base, /// SelectScalarSSELoad - Match a scalar SSE load. In particular, we want to /// match a load whose top elements are either undef or zeros. The load flavor /// is derived from the type of N, which is either v4f32 or v2f64. -bool X86DAGToDAGISel::SelectScalarSSELoad(SDValue Op, SDValue Pred, +bool X86DAGToDAGISel::SelectScalarSSELoad(SDNode *Op, SDValue Pred, SDValue N, SDValue &Base, SDValue &Scale, SDValue &Index, SDValue &Disp, SDValue &Segment, @@ -1302,7 +1307,7 @@ bool X86DAGToDAGISel::SelectScalarSSELoad(SDValue Op, SDValue Pred, if (ISD::isNON_EXTLoad(InChain.getNode()) && InChain.getValue(0).hasOneUse() && N.hasOneUse() && - IsLegalAndProfitableToFold(N.getNode(), Pred.getNode(), Op.getNode())) { + IsLegalAndProfitableToFold(N.getNode(), Pred.getNode(), Op)) { LoadSDNode *LD = cast<LoadSDNode>(InChain); if (!SelectAddr(Op, LD->getBasePtr(), Base, Scale, Index, Disp, Segment)) return false; @@ -1333,7 +1338,7 @@ bool X86DAGToDAGISel::SelectScalarSSELoad(SDValue Op, SDValue Pred, /// SelectLEAAddr - it calls SelectAddr and determines if the maximal addressing /// mode it matches can be cost effectively emitted as an LEA instruction. -bool X86DAGToDAGISel::SelectLEAAddr(SDValue Op, SDValue N, +bool X86DAGToDAGISel::SelectLEAAddr(SDNode *Op, SDValue N, SDValue &Base, SDValue &Scale, SDValue &Index, SDValue &Disp) { X86ISelAddressMode AM; @@ -1395,10 +1400,10 @@ bool X86DAGToDAGISel::SelectLEAAddr(SDValue Op, SDValue N, } /// SelectTLSADDRAddr - This is only run on TargetGlobalTLSAddress nodes. -bool X86DAGToDAGISel::SelectTLSADDRAddr(SDValue Op, SDValue N, SDValue &Base, +bool X86DAGToDAGISel::SelectTLSADDRAddr(SDNode *Op, SDValue N, SDValue &Base, SDValue &Scale, SDValue &Index, SDValue &Disp) { - assert(Op.getOpcode() == X86ISD::TLSADDR); + assert(Op->getOpcode() == X86ISD::TLSADDR); assert(N.getOpcode() == ISD::TargetGlobalTLSAddress); const GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(N); @@ -1421,13 +1426,13 @@ bool X86DAGToDAGISel::SelectTLSADDRAddr(SDValue Op, SDValue N, SDValue &Base, } -bool X86DAGToDAGISel::TryFoldLoad(SDValue P, SDValue N, +bool X86DAGToDAGISel::TryFoldLoad(SDNode *P, SDValue N, SDValue &Base, SDValue &Scale, SDValue &Index, SDValue &Disp, SDValue &Segment) { if (ISD::isNON_EXTLoad(N.getNode()) && N.hasOneUse() && - IsLegalAndProfitableToFold(N.getNode(), P.getNode(), P.getNode())) + IsLegalAndProfitableToFold(N.getNode(), P, P)) return SelectAddr(P, N.getOperand(1), Base, Scale, Index, Disp, Segment); return false; } @@ -1454,7 +1459,7 @@ SDNode *X86DAGToDAGISel::SelectAtomic64(SDNode *Node, unsigned Opc) { SDValue In2L = Node->getOperand(2); SDValue In2H = Node->getOperand(3); SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4; - if (!SelectAddr(In1, In1, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4)) + if (!SelectAddr(In1.getNode(), In1, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4)) return NULL; MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1); MemOp[0] = cast<MemSDNode>(Node)->getMemOperand(); @@ -1480,7 +1485,7 @@ SDNode *X86DAGToDAGISel::SelectAtomicLoadAdd(SDNode *Node, EVT NVT) { SDValue Ptr = Node->getOperand(1); SDValue Val = Node->getOperand(2); SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4; - if (!SelectAddr(Ptr, Ptr, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4)) + if (!SelectAddr(Ptr.getNode(), Ptr, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4)) return 0; bool isInc = false, isDec = false, isSub = false, isCN = false; @@ -1678,8 +1683,7 @@ static bool HasNoSignedComparisonUses(SDNode *N) { return true; } -SDNode *X86DAGToDAGISel::Select(SDValue N) { - SDNode *Node = N.getNode(); +SDNode *X86DAGToDAGISel::Select(SDNode *Node) { EVT NVT = Node->getValueType(0); unsigned Opc, MOpc; unsigned Opcode = Node->getOpcode(); @@ -1687,9 +1691,9 @@ SDNode *X86DAGToDAGISel::Select(SDValue N) { #ifndef NDEBUG DEBUG({ - errs() << std::string(Indent, ' ') << "Selecting: "; + dbgs() << std::string(Indent, ' ') << "Selecting: "; Node->dump(CurDAG); - errs() << '\n'; + dbgs() << '\n'; }); Indent += 2; #endif @@ -1697,9 +1701,9 @@ SDNode *X86DAGToDAGISel::Select(SDValue N) { if (Node->isMachineOpcode()) { #ifndef NDEBUG DEBUG({ - errs() << std::string(Indent-2, ' ') << "== "; + dbgs() << std::string(Indent-2, ' ') << "== "; Node->dump(CurDAG); - errs() << '\n'; + dbgs() << '\n'; }); Indent -= 2; #endif @@ -1767,10 +1771,10 @@ SDNode *X86DAGToDAGISel::Select(SDValue N) { } SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4; - bool foldedLoad = TryFoldLoad(N, N1, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4); + bool foldedLoad = TryFoldLoad(Node, N1, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4); // Multiply is commmutative. if (!foldedLoad) { - foldedLoad = TryFoldLoad(N, N0, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4); + foldedLoad = TryFoldLoad(Node, N0, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4); if (foldedLoad) std::swap(N0, N1); } @@ -1793,21 +1797,21 @@ SDNode *X86DAGToDAGISel::Select(SDValue N) { } // Copy the low half of the result, if it is needed. - if (!N.getValue(0).use_empty()) { + if (!SDValue(Node, 0).use_empty()) { SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, LoReg, NVT, InFlag); InFlag = Result.getValue(2); - ReplaceUses(N.getValue(0), Result); + ReplaceUses(SDValue(Node, 0), Result); #ifndef NDEBUG DEBUG({ - errs() << std::string(Indent-2, ' ') << "=> "; + dbgs() << std::string(Indent-2, ' ') << "=> "; Result.getNode()->dump(CurDAG); - errs() << '\n'; + dbgs() << '\n'; }); #endif } // Copy the high half of the result, if it is needed. - if (!N.getValue(1).use_empty()) { + if (!SDValue(Node, 1).use_empty()) { SDValue Result; if (HiReg == X86::AH && Subtarget->is64Bit()) { // Prevent use of AH in a REX instruction by referencing AX instead. @@ -1826,12 +1830,12 @@ SDNode *X86DAGToDAGISel::Select(SDValue N) { HiReg, NVT, InFlag); InFlag = Result.getValue(2); } - ReplaceUses(N.getValue(1), Result); + ReplaceUses(SDValue(Node, 1), Result); #ifndef NDEBUG DEBUG({ - errs() << std::string(Indent-2, ' ') << "=> "; + dbgs() << std::string(Indent-2, ' ') << "=> "; Result.getNode()->dump(CurDAG); - errs() << '\n'; + dbgs() << '\n'; }); #endif } @@ -1869,7 +1873,6 @@ SDNode *X86DAGToDAGISel::Select(SDValue N) { unsigned LoReg, HiReg, ClrReg; unsigned ClrOpcode, SExtOpcode; - EVT ClrVT = NVT; switch (NVT.getSimpleVT().SimpleTy) { default: llvm_unreachable("Unsupported VT!"); case MVT::i8: @@ -1879,7 +1882,7 @@ SDNode *X86DAGToDAGISel::Select(SDValue N) { break; case MVT::i16: LoReg = X86::AX; HiReg = X86::DX; - ClrOpcode = X86::MOV32r0; ClrReg = X86::EDX; ClrVT = MVT::i32; + ClrOpcode = X86::MOV16r0; ClrReg = X86::DX; SExtOpcode = X86::CWD; break; case MVT::i32: @@ -1889,13 +1892,13 @@ SDNode *X86DAGToDAGISel::Select(SDValue N) { break; case MVT::i64: LoReg = X86::RAX; ClrReg = HiReg = X86::RDX; - ClrOpcode = ~0U; // NOT USED. + ClrOpcode = X86::MOV64r0; SExtOpcode = X86::CQO; break; } SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4; - bool foldedLoad = TryFoldLoad(N, N1, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4); + bool foldedLoad = TryFoldLoad(Node, N1, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4); bool signBitIsZero = CurDAG->SignBitIsZero(N0); SDValue InFlag; @@ -1903,7 +1906,7 @@ SDNode *X86DAGToDAGISel::Select(SDValue N) { // Special case for div8, just use a move with zero extension to AX to // clear the upper 8 bits (AH). SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, Move, Chain; - if (TryFoldLoad(N, N0, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4)) { + if (TryFoldLoad(Node, N0, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4)) { SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, N0.getOperand(0) }; Move = SDValue(CurDAG->getMachineNode(X86::MOVZX16rm8, dl, MVT::i16, @@ -1928,24 +1931,8 @@ SDNode *X86DAGToDAGISel::Select(SDValue N) { SDValue(CurDAG->getMachineNode(SExtOpcode, dl, MVT::Flag, InFlag),0); } else { // Zero out the high part, effectively zero extending the input. - SDValue ClrNode; - - if (NVT.getSimpleVT() == MVT::i64) { - ClrNode = SDValue(CurDAG->getMachineNode(X86::MOV32r0, dl, MVT::i32), - 0); - // We just did a 32-bit clear, insert it into a 64-bit register to - // clear the whole 64-bit reg. - SDValue Zero = CurDAG->getTargetConstant(0, MVT::i64); - SDValue SubRegNo = - CurDAG->getTargetConstant(X86::SUBREG_32BIT, MVT::i32); - ClrNode = - SDValue(CurDAG->getMachineNode(TargetInstrInfo::SUBREG_TO_REG, dl, - MVT::i64, Zero, ClrNode, SubRegNo), - 0); - } else { - ClrNode = SDValue(CurDAG->getMachineNode(ClrOpcode, dl, ClrVT), 0); - } - + SDValue ClrNode = + SDValue(CurDAG->getMachineNode(ClrOpcode, dl, NVT), 0); InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, ClrReg, ClrNode, InFlag).getValue(1); } @@ -1966,21 +1953,21 @@ SDNode *X86DAGToDAGISel::Select(SDValue N) { } // Copy the division (low) result, if it is needed. - if (!N.getValue(0).use_empty()) { + if (!SDValue(Node, 0).use_empty()) { SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, LoReg, NVT, InFlag); InFlag = Result.getValue(2); - ReplaceUses(N.getValue(0), Result); + ReplaceUses(SDValue(Node, 0), Result); #ifndef NDEBUG DEBUG({ - errs() << std::string(Indent-2, ' ') << "=> "; + dbgs() << std::string(Indent-2, ' ') << "=> "; Result.getNode()->dump(CurDAG); - errs() << '\n'; + dbgs() << '\n'; }); #endif } // Copy the remainder (high) result, if it is needed. - if (!N.getValue(1).use_empty()) { + if (!SDValue(Node, 1).use_empty()) { SDValue Result; if (HiReg == X86::AH && Subtarget->is64Bit()) { // Prevent use of AH in a REX instruction by referencing AX instead. @@ -2000,12 +1987,12 @@ SDNode *X86DAGToDAGISel::Select(SDValue N) { HiReg, NVT, InFlag); InFlag = Result.getValue(2); } - ReplaceUses(N.getValue(1), Result); + ReplaceUses(SDValue(Node, 1), Result); #ifndef NDEBUG DEBUG({ - errs() << std::string(Indent-2, ' ') << "=> "; + dbgs() << std::string(Indent-2, ' ') << "=> "; Result.getNode()->dump(CurDAG); - errs() << '\n'; + dbgs() << '\n'; }); #endif } @@ -2124,16 +2111,16 @@ SDNode *X86DAGToDAGISel::Select(SDValue N) { } } - SDNode *ResNode = SelectCode(N); + SDNode *ResNode = SelectCode(Node); #ifndef NDEBUG DEBUG({ - errs() << std::string(Indent-2, ' ') << "=> "; - if (ResNode == NULL || ResNode == N.getNode()) - N.getNode()->dump(CurDAG); + dbgs() << std::string(Indent-2, ' ') << "=> "; + if (ResNode == NULL || ResNode == Node) + Node->dump(CurDAG); else ResNode->dump(CurDAG); - errs() << '\n'; + dbgs() << '\n'; }); Indent -= 2; #endif @@ -2150,7 +2137,7 @@ SelectInlineAsmMemoryOperand(const SDValue &Op, char ConstraintCode, case 'v': // not offsetable ?? default: return true; case 'm': // memory - if (!SelectAddr(Op, Op, Op0, Op1, Op2, Op3, Op4)) + if (!SelectAddr(Op.getNode(), Op, Op0, Op1, Op2, Op3, Op4)) return true; break; } diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index c722fbf648b4..228ec9f2d63d 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -978,6 +978,7 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) setTargetDAGCombine(ISD::SHL); setTargetDAGCombine(ISD::SRA); setTargetDAGCombine(ISD::SRL); + setTargetDAGCombine(ISD::OR); setTargetDAGCombine(ISD::STORE); setTargetDAGCombine(ISD::MEMBARRIER); setTargetDAGCombine(ISD::ZERO_EXTEND); @@ -2077,10 +2078,10 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee, assert(((Callee.getOpcode() == ISD::Register && (cast<RegisterSDNode>(Callee)->getReg() == X86::EAX || - cast<RegisterSDNode>(Callee)->getReg() == X86::R9)) || + cast<RegisterSDNode>(Callee)->getReg() == X86::R11)) || Callee.getOpcode() == ISD::TargetExternalSymbol || Callee.getOpcode() == ISD::TargetGlobalAddress) && - "Expecting an global address, external symbol, or register"); + "Expecting a global address, external symbol, or scratch register"); return DAG.getNode(X86ISD::TC_RETURN, dl, NodeTys, &Ops[0], Ops.size()); @@ -5610,13 +5611,21 @@ SDValue X86TargetLowering::EmitTest(SDValue Op, unsigned X86CC, // because a TEST instruction will be better. bool NonFlagUse = false; for (SDNode::use_iterator UI = Op.getNode()->use_begin(), - UE = Op.getNode()->use_end(); UI != UE; ++UI) - if (UI->getOpcode() != ISD::BRCOND && - UI->getOpcode() != ISD::SELECT && - UI->getOpcode() != ISD::SETCC) { + UE = Op.getNode()->use_end(); UI != UE; ++UI) { + SDNode *User = *UI; + unsigned UOpNo = UI.getOperandNo(); + if (User->getOpcode() == ISD::TRUNCATE && User->hasOneUse()) { + // Look pass truncate. + UOpNo = User->use_begin().getOperandNo(); + User = *User->use_begin(); + } + if (User->getOpcode() != ISD::BRCOND && + User->getOpcode() != ISD::SETCC && + (User->getOpcode() != ISD::SELECT || UOpNo != 0)) { NonFlagUse = true; break; } + } if (!NonFlagUse) break; } @@ -5680,6 +5689,56 @@ SDValue X86TargetLowering::EmitCmp(SDValue Op0, SDValue Op1, unsigned X86CC, return DAG.getNode(X86ISD::CMP, dl, MVT::i32, Op0, Op1); } +/// LowerToBT - Result of 'and' is compared against zero. Turn it into a BT node +/// if it's possible. +static SDValue LowerToBT(SDValue Op0, ISD::CondCode CC, + DebugLoc dl, SelectionDAG &DAG) { + SDValue LHS, RHS; + if (Op0.getOperand(1).getOpcode() == ISD::SHL) { + if (ConstantSDNode *Op010C = + dyn_cast<ConstantSDNode>(Op0.getOperand(1).getOperand(0))) + if (Op010C->getZExtValue() == 1) { + LHS = Op0.getOperand(0); + RHS = Op0.getOperand(1).getOperand(1); + } + } else if (Op0.getOperand(0).getOpcode() == ISD::SHL) { + if (ConstantSDNode *Op000C = + dyn_cast<ConstantSDNode>(Op0.getOperand(0).getOperand(0))) + if (Op000C->getZExtValue() == 1) { + LHS = Op0.getOperand(1); + RHS = Op0.getOperand(0).getOperand(1); + } + } else if (Op0.getOperand(1).getOpcode() == ISD::Constant) { + ConstantSDNode *AndRHS = cast<ConstantSDNode>(Op0.getOperand(1)); + SDValue AndLHS = Op0.getOperand(0); + if (AndRHS->getZExtValue() == 1 && AndLHS.getOpcode() == ISD::SRL) { + LHS = AndLHS.getOperand(0); + RHS = AndLHS.getOperand(1); + } + } + + if (LHS.getNode()) { + // If LHS is i8, promote it to i16 with any_extend. There is no i8 BT + // instruction. Since the shift amount is in-range-or-undefined, we know + // that doing a bittest on the i16 value is ok. We extend to i32 because + // the encoding for the i16 version is larger than the i32 version. + if (LHS.getValueType() == MVT::i8) + LHS = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, LHS); + + // If the operand types disagree, extend the shift amount to match. Since + // BT ignores high bits (like shifts) we can use anyextend. + if (LHS.getValueType() != RHS.getValueType()) + RHS = DAG.getNode(ISD::ANY_EXTEND, dl, LHS.getValueType(), RHS); + + SDValue BT = DAG.getNode(X86ISD::BT, dl, MVT::i32, LHS, RHS); + unsigned Cond = CC == ISD::SETEQ ? X86::COND_AE : X86::COND_B; + return DAG.getNode(X86ISD::SETCC, dl, MVT::i8, + DAG.getConstant(Cond, MVT::i8), BT); + } + + return SDValue(); +} + SDValue X86TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) { assert(Op.getValueType() == MVT::i8 && "SetCC type must be 8-bit integer"); SDValue Op0 = Op.getOperand(0); @@ -5687,6 +5746,7 @@ SDValue X86TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) { DebugLoc dl = Op.getDebugLoc(); ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get(); + // Optimize to BT if possible. // Lower (X & (1 << N)) == 0 to BT(X, N). // Lower ((X >>u N) & 1) != 0 to BT(X, N). // Lower ((X >>s N) & 1) != 0 to BT(X, N). @@ -5695,48 +5755,9 @@ SDValue X86TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) { Op1.getOpcode() == ISD::Constant && cast<ConstantSDNode>(Op1)->getZExtValue() == 0 && (CC == ISD::SETEQ || CC == ISD::SETNE)) { - SDValue LHS, RHS; - if (Op0.getOperand(1).getOpcode() == ISD::SHL) { - if (ConstantSDNode *Op010C = - dyn_cast<ConstantSDNode>(Op0.getOperand(1).getOperand(0))) - if (Op010C->getZExtValue() == 1) { - LHS = Op0.getOperand(0); - RHS = Op0.getOperand(1).getOperand(1); - } - } else if (Op0.getOperand(0).getOpcode() == ISD::SHL) { - if (ConstantSDNode *Op000C = - dyn_cast<ConstantSDNode>(Op0.getOperand(0).getOperand(0))) - if (Op000C->getZExtValue() == 1) { - LHS = Op0.getOperand(1); - RHS = Op0.getOperand(0).getOperand(1); - } - } else if (Op0.getOperand(1).getOpcode() == ISD::Constant) { - ConstantSDNode *AndRHS = cast<ConstantSDNode>(Op0.getOperand(1)); - SDValue AndLHS = Op0.getOperand(0); - if (AndRHS->getZExtValue() == 1 && AndLHS.getOpcode() == ISD::SRL) { - LHS = AndLHS.getOperand(0); - RHS = AndLHS.getOperand(1); - } - } - - if (LHS.getNode()) { - // If LHS is i8, promote it to i16 with any_extend. There is no i8 BT - // instruction. Since the shift amount is in-range-or-undefined, we know - // that doing a bittest on the i16 value is ok. We extend to i32 because - // the encoding for the i16 version is larger than the i32 version. - if (LHS.getValueType() == MVT::i8) - LHS = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, LHS); - - // If the operand types disagree, extend the shift amount to match. Since - // BT ignores high bits (like shifts) we can use anyextend. - if (LHS.getValueType() != RHS.getValueType()) - RHS = DAG.getNode(ISD::ANY_EXTEND, dl, LHS.getValueType(), RHS); - - SDValue BT = DAG.getNode(X86ISD::BT, dl, MVT::i32, LHS, RHS); - unsigned Cond = CC == ISD::SETEQ ? X86::COND_AE : X86::COND_B; - return DAG.getNode(X86ISD::SETCC, dl, MVT::i8, - DAG.getConstant(Cond, MVT::i8), BT); - } + SDValue NewSetCC = LowerToBT(Op0, CC, dl, DAG); + if (NewSetCC.getNode()) + return NewSetCC; } bool isFP = Op.getOperand(1).getValueType().isFloatingPoint(); @@ -5936,6 +5957,23 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) { } if (addTest) { + // Look pass the truncate. + if (Cond.getOpcode() == ISD::TRUNCATE) + Cond = Cond.getOperand(0); + + // We know the result of AND is compared against zero. Try to match + // it to BT. + if (Cond.getOpcode() == ISD::AND && Cond.hasOneUse()) { + SDValue NewSetCC = LowerToBT(Cond, ISD::SETNE, dl, DAG); + if (NewSetCC.getNode()) { + CC = NewSetCC.getOperand(0); + Cond = NewSetCC.getOperand(1); + addTest = false; + } + } + } + + if (addTest) { CC = DAG.getConstant(X86::COND_NE, MVT::i8); Cond = EmitTest(Cond, X86::COND_NE, DAG); } @@ -6093,6 +6131,23 @@ SDValue X86TargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) { } if (addTest) { + // Look pass the truncate. + if (Cond.getOpcode() == ISD::TRUNCATE) + Cond = Cond.getOperand(0); + + // We know the result of AND is compared against zero. Try to match + // it to BT. + if (Cond.getOpcode() == ISD::AND && Cond.hasOneUse()) { + SDValue NewSetCC = LowerToBT(Cond, ISD::SETNE, dl, DAG); + if (NewSetCC.getNode()) { + CC = NewSetCC.getOperand(0); + Cond = NewSetCC.getOperand(1); + addTest = false; + } + } + } + + if (addTest) { CC = DAG.getConstant(X86::COND_NE, MVT::i8); Cond = EmitTest(Cond, X86::COND_NE, DAG); } @@ -7524,8 +7579,7 @@ bool X86TargetLowering::isTruncateFree(EVT VT1, EVT VT2) const { bool X86TargetLowering::isZExtFree(const Type *Ty1, const Type *Ty2) const { // x86-64 implicitly zero-extends 32-bit results in 64-bit registers. - return Ty1 == Type::getInt32Ty(Ty1->getContext()) && - Ty2 == Type::getInt64Ty(Ty1->getContext()) && Subtarget->is64Bit(); + return Ty1->isInteger(64) && Ty2->isInteger(64) && Subtarget->is64Bit(); } bool X86TargetLowering::isZExtFree(EVT VT1, EVT VT2) const { @@ -7749,7 +7803,7 @@ X86TargetLowering::EmitAtomicBit6432WithCustomInserter(MachineInstr *bInstr, for (int i=0; i < 2 + X86AddrNumOperands; ++i) argOpers[i] = &bInstr->getOperand(i+2); - // x86 address has 4 operands: base, index, scale, and displacement + // x86 address has 5 operands: base, index, scale, displacement, and segment. int lastAddrIndx = X86AddrNumOperands - 1; // [0,3] unsigned t1 = F->getRegInfo().createVirtualRegister(RC); @@ -7777,14 +7831,16 @@ X86TargetLowering::EmitAtomicBit6432WithCustomInserter(MachineInstr *bInstr, BuildMI(newMBB, dl, TII->get(X86::PHI), dest2Oper.getReg()) .addReg(t2).addMBB(thisMBB).addReg(t4).addMBB(newMBB); - unsigned tt1 = F->getRegInfo().createVirtualRegister(RC); - unsigned tt2 = F->getRegInfo().createVirtualRegister(RC); + // The subsequent operations should be using the destination registers of + //the PHI instructions. if (invSrc) { - MIB = BuildMI(newMBB, dl, TII->get(NotOpc), tt1).addReg(t1); - MIB = BuildMI(newMBB, dl, TII->get(NotOpc), tt2).addReg(t2); + t1 = F->getRegInfo().createVirtualRegister(RC); + t2 = F->getRegInfo().createVirtualRegister(RC); + MIB = BuildMI(newMBB, dl, TII->get(NotOpc), t1).addReg(dest1Oper.getReg()); + MIB = BuildMI(newMBB, dl, TII->get(NotOpc), t2).addReg(dest2Oper.getReg()); } else { - tt1 = t1; - tt2 = t2; + t1 = dest1Oper.getReg(); + t2 = dest2Oper.getReg(); } int valArgIndx = lastAddrIndx + 1; @@ -7798,7 +7854,7 @@ X86TargetLowering::EmitAtomicBit6432WithCustomInserter(MachineInstr *bInstr, else MIB = BuildMI(newMBB, dl, TII->get(immOpcL), t5); if (regOpcL != X86::MOV32rr) - MIB.addReg(tt1); + MIB.addReg(t1); (*MIB).addOperand(*argOpers[valArgIndx]); assert(argOpers[valArgIndx + 1]->isReg() == argOpers[valArgIndx]->isReg()); @@ -7809,7 +7865,7 @@ X86TargetLowering::EmitAtomicBit6432WithCustomInserter(MachineInstr *bInstr, else MIB = BuildMI(newMBB, dl, TII->get(immOpcH), t6); if (regOpcH != X86::MOV32rr) - MIB.addReg(tt2); + MIB.addReg(t2); (*MIB).addOperand(*argOpers[valArgIndx + 1]); MIB = BuildMI(newMBB, dl, TII->get(copyOpc), X86::EAX); @@ -9108,6 +9164,64 @@ static SDValue PerformShiftCombine(SDNode* N, SelectionDAG &DAG, return SDValue(); } +static SDValue PerformOrCombine(SDNode *N, SelectionDAG &DAG, + const X86Subtarget *Subtarget) { + EVT VT = N->getValueType(0); + if (VT != MVT::i64 || !Subtarget->is64Bit()) + return SDValue(); + + // fold (or (x << c) | (y >> (64 - c))) ==> (shld64 x, y, c) + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + if (N0.getOpcode() == ISD::SRL && N1.getOpcode() == ISD::SHL) + std::swap(N0, N1); + if (N0.getOpcode() != ISD::SHL || N1.getOpcode() != ISD::SRL) + return SDValue(); + + SDValue ShAmt0 = N0.getOperand(1); + if (ShAmt0.getValueType() != MVT::i8) + return SDValue(); + SDValue ShAmt1 = N1.getOperand(1); + if (ShAmt1.getValueType() != MVT::i8) + return SDValue(); + if (ShAmt0.getOpcode() == ISD::TRUNCATE) + ShAmt0 = ShAmt0.getOperand(0); + if (ShAmt1.getOpcode() == ISD::TRUNCATE) + ShAmt1 = ShAmt1.getOperand(0); + + DebugLoc DL = N->getDebugLoc(); + unsigned Opc = X86ISD::SHLD; + SDValue Op0 = N0.getOperand(0); + SDValue Op1 = N1.getOperand(0); + if (ShAmt0.getOpcode() == ISD::SUB) { + Opc = X86ISD::SHRD; + std::swap(Op0, Op1); + std::swap(ShAmt0, ShAmt1); + } + + if (ShAmt1.getOpcode() == ISD::SUB) { + SDValue Sum = ShAmt1.getOperand(0); + if (ConstantSDNode *SumC = dyn_cast<ConstantSDNode>(Sum)) { + if (SumC->getSExtValue() == 64 && + ShAmt1.getOperand(1) == ShAmt0) + return DAG.getNode(Opc, DL, VT, + Op0, Op1, + DAG.getNode(ISD::TRUNCATE, DL, + MVT::i8, ShAmt0)); + } + } else if (ConstantSDNode *ShAmt1C = dyn_cast<ConstantSDNode>(ShAmt1)) { + ConstantSDNode *ShAmt0C = dyn_cast<ConstantSDNode>(ShAmt0); + if (ShAmt0C && + ShAmt0C->getSExtValue() + ShAmt1C->getSExtValue() == 64) + return DAG.getNode(Opc, DL, VT, + N0.getOperand(0), N1.getOperand(0), + DAG.getNode(ISD::TRUNCATE, DL, + MVT::i8, ShAmt0)); + } + + return SDValue(); +} + /// PerformSTORECombine - Do target-specific dag combines on STORE nodes. static SDValue PerformSTORECombine(SDNode *N, SelectionDAG &DAG, const X86Subtarget *Subtarget) { @@ -9370,6 +9484,7 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N, case ISD::SHL: case ISD::SRA: case ISD::SRL: return PerformShiftCombine(N, DAG, Subtarget); + case ISD::OR: return PerformOrCombine(N, DAG, Subtarget); case ISD::STORE: return PerformSTORECombine(N, DAG, Subtarget); case X86ISD::FXOR: case X86ISD::FOR: return PerformFORCombine(N, DAG); @@ -9423,7 +9538,7 @@ bool X86TargetLowering::ExpandInlineAsm(CallInst *CI) const { std::string AsmStr = IA->getAsmString(); // TODO: should remove alternatives from the asmstring: "foo {a|b}" -> "foo a" - std::vector<std::string> AsmPieces; + SmallVector<StringRef, 4> AsmPieces; SplitString(AsmStr, AsmPieces, "\n"); // ; as separator? switch (AsmPieces.size()) { @@ -9445,7 +9560,7 @@ bool X86TargetLowering::ExpandInlineAsm(CallInst *CI) const { return LowerToBSwap(CI); } // rorw $$8, ${0:w} --> llvm.bswap.i16 - if (CI->getType() == Type::getInt16Ty(CI->getContext()) && + if (CI->getType()->isInteger(16) && AsmPieces.size() == 3 && AsmPieces[0] == "rorw" && AsmPieces[1] == "$$8," && @@ -9455,12 +9570,12 @@ bool X86TargetLowering::ExpandInlineAsm(CallInst *CI) const { } break; case 3: - if (CI->getType() == Type::getInt64Ty(CI->getContext()) && + if (CI->getType()->isInteger(64) && Constraints.size() >= 2 && Constraints[0].Codes.size() == 1 && Constraints[0].Codes[0] == "A" && Constraints[1].Codes.size() == 1 && Constraints[1].Codes[0] == "0") { // bswap %eax / bswap %edx / xchgl %eax, %edx -> llvm.bswap.i64 - std::vector<std::string> Words; + SmallVector<StringRef, 4> Words; SplitString(AsmPieces[0], Words, " \t"); if (Words.size() == 2 && Words[0] == "bswap" && Words[1] == "%eax") { Words.clear(); diff --git a/lib/Target/X86/X86Instr64bit.td b/lib/Target/X86/X86Instr64bit.td index 65fbbdae9a7f..08e1dd1e060d 100644 --- a/lib/Target/X86/X86Instr64bit.td +++ b/lib/Target/X86/X86Instr64bit.td @@ -1106,13 +1106,13 @@ def OR64rm : RI<0x0B, MRMSrcMem , (outs GR64:$dst), def OR64ri8 : RIi8<0x83, MRM1r, (outs GR64:$dst), (ins GR64:$src1, i64i8imm:$src2), "or{q}\t{$src2, $dst|$dst, $src2}", - [(set GR64:$dst, (or GR64:$src1, i64immSExt8:$src2)), - (implicit EFLAGS)]>; + [(set GR64:$dst, (or GR64:$src1, i64immSExt8:$src2)), + (implicit EFLAGS)]>; def OR64ri32 : RIi32<0x81, MRM1r, (outs GR64:$dst), (ins GR64:$src1, i64i32imm:$src2), "or{q}\t{$src2, $dst|$dst, $src2}", - [(set GR64:$dst, (or GR64:$src1, i64immSExt32:$src2)), - (implicit EFLAGS)]>; + [(set GR64:$dst, (or GR64:$src1, i64immSExt32:$src2)), + (implicit EFLAGS)]>; } // isTwoAddress def OR64mr : RI<0x09, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src), @@ -1598,17 +1598,21 @@ def SLDT64m : RI<0x00, MRM0m, (outs i16mem:$dst), (ins), // Alias Instructions //===----------------------------------------------------------------------===// -// Alias instructions that map movr0 to xor. Use xorl instead of xorq; it's -// equivalent due to implicit zero-extending, and it sometimes has a smaller -// encoding. +// We want to rewrite MOV64r0 in terms of MOV32r0, because it's sometimes a +// smaller encoding, but doing so at isel time interferes with rematerialization +// in the current register allocator. For now, this is rewritten when the +// instruction is lowered to an MCInst. // FIXME: AddedComplexity gives this a higher priority than MOV64ri32. Remove // when we have a better way to specify isel priority. -let AddedComplexity = 1 in -def : Pat<(i64 0), - (SUBREG_TO_REG (i64 0), (MOV32r0), x86_subreg_32bit)>; - - -// Materialize i64 constant where top 32-bits are zero. +let Defs = [EFLAGS], + AddedComplexity = 1, isReMaterializable = 1, isAsCheapAsAMove = 1 in +def MOV64r0 : I<0x31, MRMInitReg, (outs GR64:$dst), (ins), + "", + [(set GR64:$dst, 0)]>; + +// Materialize i64 constant where top 32-bits are zero. This could theoretically +// use MOV32ri with a SUBREG_TO_REG to represent the zero-extension, however +// that would make it more difficult to rematerialize. let AddedComplexity = 1, isReMaterializable = 1, isAsCheapAsAMove = 1 in def MOV64ri64i32 : Ii32<0xB8, AddRegFrm, (outs GR64:$dst), (ins i64i32imm:$src), "", [(set GR64:$dst, i64immZExt32:$src)]>; @@ -1683,6 +1687,7 @@ def CMPXCHG64rr : RI<0xB1, MRMDestReg, (outs GR64:$dst), (ins GR64:$src), def CMPXCHG64rm : RI<0xB1, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src), "cmpxchg{q}\t{$src, $dst|$dst, $src}", []>, TB; +let Defs = [RAX, RDX, EFLAGS], Uses = [RAX, RBX, RCX, RDX] in def CMPXCHG16B : RI<0xC7, MRM1m, (outs), (ins i128mem:$dst), "cmpxchg16b\t$dst", []>, TB; @@ -1962,6 +1967,17 @@ def : Pat<(add GR64:$src1, 0x0000000080000000), def : Pat<(store (add (loadi64 addr:$dst), 0x00000000800000000), addr:$dst), (SUB64mi32 addr:$dst, 0xffffffff80000000)>; +// Use a 32-bit and with implicit zero-extension instead of a 64-bit and if it +// has an immediate with at least 32 bits of leading zeros, to avoid needing to +// materialize that immediate in a register first. +def : Pat<(and GR64:$src, i64immZExt32:$imm), + (SUBREG_TO_REG + (i64 0), + (AND32ri + (EXTRACT_SUBREG GR64:$src, x86_subreg_32bit), + imm:$imm), + x86_subreg_32bit)>; + // r & (2^32-1) ==> movz def : Pat<(and GR64:$src, 0x00000000FFFFFFFF), (MOVZX64rr32 (EXTRACT_SUBREG GR64:$src, x86_subreg_32bit))>; @@ -2028,7 +2044,7 @@ def : Pat<(and (srl_su GR32:$src, (i8 8)), (i32 255)), (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR32:$src, GR32_ABCD)), x86_subreg_8bit_hi))>, Requires<[In64BitMode]>; -def : Pat<(srl_su GR16:$src, (i8 8)), +def : Pat<(srl GR16:$src, (i8 8)), (EXTRACT_SUBREG (MOVZX32_NOREXrr8 (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)), @@ -2098,24 +2114,7 @@ def : Pat<(sra GR64:$src1, (and CL:$amt, 63)), def : Pat<(store (sra (loadi64 addr:$dst), (and CL:$amt, 63)), addr:$dst), (SAR64mCL addr:$dst)>; -// (or (x >> c) | (y << (64 - c))) ==> (shrd64 x, y, c) -def : Pat<(or (srl GR64:$src1, CL:$amt), - (shl GR64:$src2, (sub 64, CL:$amt))), - (SHRD64rrCL GR64:$src1, GR64:$src2)>; - -def : Pat<(store (or (srl (loadi64 addr:$dst), CL:$amt), - (shl GR64:$src2, (sub 64, CL:$amt))), addr:$dst), - (SHRD64mrCL addr:$dst, GR64:$src2)>; - -def : Pat<(or (srl GR64:$src1, (i8 (trunc RCX:$amt))), - (shl GR64:$src2, (i8 (trunc (sub 64, RCX:$amt))))), - (SHRD64rrCL GR64:$src1, GR64:$src2)>; - -def : Pat<(store (or (srl (loadi64 addr:$dst), (i8 (trunc RCX:$amt))), - (shl GR64:$src2, (i8 (trunc (sub 64, RCX:$amt))))), - addr:$dst), - (SHRD64mrCL addr:$dst, GR64:$src2)>; - +// Double shift patterns def : Pat<(shrd GR64:$src1, (i8 imm:$amt1), GR64:$src2, (i8 imm:$amt2)), (SHRD64rri8 GR64:$src1, GR64:$src2, (i8 imm:$amt1))>; @@ -2123,24 +2122,6 @@ def : Pat<(store (shrd (loadi64 addr:$dst), (i8 imm:$amt1), GR64:$src2, (i8 imm:$amt2)), addr:$dst), (SHRD64mri8 addr:$dst, GR64:$src2, (i8 imm:$amt1))>; -// (or (x << c) | (y >> (64 - c))) ==> (shld64 x, y, c) -def : Pat<(or (shl GR64:$src1, CL:$amt), - (srl GR64:$src2, (sub 64, CL:$amt))), - (SHLD64rrCL GR64:$src1, GR64:$src2)>; - -def : Pat<(store (or (shl (loadi64 addr:$dst), CL:$amt), - (srl GR64:$src2, (sub 64, CL:$amt))), addr:$dst), - (SHLD64mrCL addr:$dst, GR64:$src2)>; - -def : Pat<(or (shl GR64:$src1, (i8 (trunc RCX:$amt))), - (srl GR64:$src2, (i8 (trunc (sub 64, RCX:$amt))))), - (SHLD64rrCL GR64:$src1, GR64:$src2)>; - -def : Pat<(store (or (shl (loadi64 addr:$dst), (i8 (trunc RCX:$amt))), - (srl GR64:$src2, (i8 (trunc (sub 64, RCX:$amt))))), - addr:$dst), - (SHLD64mrCL addr:$dst, GR64:$src2)>; - def : Pat<(shld GR64:$src1, (i8 imm:$amt1), GR64:$src2, (i8 imm:$amt2)), (SHLD64rri8 GR64:$src1, GR64:$src2, (i8 imm:$amt1))>; @@ -2148,6 +2129,19 @@ def : Pat<(store (shld (loadi64 addr:$dst), (i8 imm:$amt1), GR64:$src2, (i8 imm:$amt2)), addr:$dst), (SHLD64mri8 addr:$dst, GR64:$src2, (i8 imm:$amt1))>; +// (or x1, x2) -> (add x1, x2) if two operands are known not to share bits. +let AddedComplexity = 5 in { // Try this before the selecting to OR +def : Pat<(parallel (or_is_add GR64:$src1, i64immSExt8:$src2), + (implicit EFLAGS)), + (ADD64ri8 GR64:$src1, i64immSExt8:$src2)>; +def : Pat<(parallel (or_is_add GR64:$src1, i64immSExt32:$src2), + (implicit EFLAGS)), + (ADD64ri32 GR64:$src1, i64immSExt32:$src2)>; +def : Pat<(parallel (or_is_add GR64:$src1, GR64:$src2), + (implicit EFLAGS)), + (ADD64rr GR64:$src1, GR64:$src2)>; +} // AddedComplexity + // X86 specific add which produces a flag. def : Pat<(addc GR64:$src1, GR64:$src2), (ADD64rr GR64:$src1, GR64:$src2)>; diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp index e555cd176cdf..7b39fb311cba 100644 --- a/lib/Target/X86/X86InstrInfo.cpp +++ b/lib/Target/X86/X86InstrInfo.cpp @@ -28,6 +28,7 @@ #include "llvm/CodeGen/LiveVariables.h" #include "llvm/CodeGen/PseudoSourceValue.h" #include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetOptions.h" @@ -711,6 +712,62 @@ bool X86InstrInfo::isMoveInstr(const MachineInstr& MI, } } +bool +X86InstrInfo::isCoalescableExtInstr(const MachineInstr &MI, + unsigned &SrcReg, unsigned &DstReg, + unsigned &SubIdx) const { + switch (MI.getOpcode()) { + default: break; + case X86::MOVSX16rr8: + case X86::MOVZX16rr8: + case X86::MOVSX32rr8: + case X86::MOVZX32rr8: + case X86::MOVSX64rr8: + case X86::MOVZX64rr8: + if (!TM.getSubtarget<X86Subtarget>().is64Bit()) + // It's not always legal to reference the low 8-bit of the larger + // register in 32-bit mode. + return false; + case X86::MOVSX32rr16: + case X86::MOVZX32rr16: + case X86::MOVSX64rr16: + case X86::MOVZX64rr16: + case X86::MOVSX64rr32: + case X86::MOVZX64rr32: { + if (MI.getOperand(0).getSubReg() || MI.getOperand(1).getSubReg()) + // Be conservative. + return false; + SrcReg = MI.getOperand(1).getReg(); + DstReg = MI.getOperand(0).getReg(); + switch (MI.getOpcode()) { + default: + llvm_unreachable(0); + break; + case X86::MOVSX16rr8: + case X86::MOVZX16rr8: + case X86::MOVSX32rr8: + case X86::MOVZX32rr8: + case X86::MOVSX64rr8: + case X86::MOVZX64rr8: + SubIdx = 1; + break; + case X86::MOVSX32rr16: + case X86::MOVZX32rr16: + case X86::MOVSX64rr16: + case X86::MOVZX64rr16: + SubIdx = 3; + break; + case X86::MOVSX64rr32: + case X86::MOVZX64rr32: + SubIdx = 4; + break; + } + return true; + } + } + return false; +} + /// isFrameOperand - Return true and the FrameIndex if the specified /// operand and follow operands form a reference to the stack frame. bool X86InstrInfo::isFrameOperand(const MachineInstr *MI, unsigned int Op, @@ -1018,12 +1075,16 @@ void X86InstrInfo::reMaterialize(MachineBasicBlock &MBB, switch (Opc) { default: break; case X86::MOV8r0: - case X86::MOV32r0: { + case X86::MOV16r0: + case X86::MOV32r0: + case X86::MOV64r0: { if (!isSafeToClobberEFLAGS(MBB, I)) { switch (Opc) { default: break; case X86::MOV8r0: Opc = X86::MOV8ri; break; + case X86::MOV16r0: Opc = X86::MOV16ri; break; case X86::MOV32r0: Opc = X86::MOV32ri; break; + case X86::MOV64r0: Opc = X86::MOV64ri; break; } Clone = false; } @@ -2290,8 +2351,12 @@ X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, OpcodeTablePtr = &RegOp2MemOpTable2Addr; isTwoAddrFold = true; } else if (i == 0) { // If operand 0 - if (MI->getOpcode() == X86::MOV32r0) + if (MI->getOpcode() == X86::MOV64r0) + NewMI = MakeM0Inst(*this, X86::MOV64mi32, MOs, MI); + else if (MI->getOpcode() == X86::MOV32r0) NewMI = MakeM0Inst(*this, X86::MOV32mi, MOs, MI); + else if (MI->getOpcode() == X86::MOV16r0) + NewMI = MakeM0Inst(*this, X86::MOV16mi, MOs, MI); else if (MI->getOpcode() == X86::MOV8r0) NewMI = MakeM0Inst(*this, X86::MOV8mi, MOs, MI); if (NewMI) @@ -2354,7 +2419,7 @@ X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, // No fusion if (PrintFailedFusing) - errs() << "We failed to fuse operand " << i << " in " << *MI; + dbgs() << "We failed to fuse operand " << i << " in " << *MI; return NULL; } @@ -2559,7 +2624,9 @@ bool X86InstrInfo::canFoldMemoryOperand(const MachineInstr *MI, } else if (OpNum == 0) { // If operand 0 switch (Opc) { case X86::MOV8r0: + case X86::MOV16r0: case X86::MOV32r0: + case X86::MOV64r0: return true; default: break; } diff --git a/lib/Target/X86/X86InstrInfo.h b/lib/Target/X86/X86InstrInfo.h index b83441d89eff..0ab85f4f45b2 100644 --- a/lib/Target/X86/X86InstrInfo.h +++ b/lib/Target/X86/X86InstrInfo.h @@ -448,6 +448,16 @@ public: unsigned &SrcReg, unsigned &DstReg, unsigned &SrcSubIdx, unsigned &DstSubIdx) const; + /// isCoalescableExtInstr - Return true if the instruction is a "coalescable" + /// extension instruction. That is, it's like a copy where it's legal for the + /// source to overlap the destination. e.g. X86::MOVSX64rr32. If this returns + /// true, then it's expected the pre-extension value is available as a subreg + /// of the result register. This also returns the sub-register index in + /// SubIdx. + virtual bool isCoalescableExtInstr(const MachineInstr &MI, + unsigned &SrcReg, unsigned &DstReg, + unsigned &SubIdx) const; + unsigned isLoadFromStackSlot(const MachineInstr *MI, int &FrameIndex) const; /// isLoadFromStackSlotPostFE - Check for post-frame ptr elimination /// stack locations as well. This uses a heuristic so it isn't diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td index 4d922a54ec2c..396cb53502ef 100644 --- a/lib/Target/X86/X86InstrInfo.td +++ b/lib/Target/X86/X86InstrInfo.td @@ -160,15 +160,21 @@ def X86ehret : SDNode<"X86ISD::EH_RETURN", SDT_X86EHRET, def X86tcret : SDNode<"X86ISD::TC_RETURN", SDT_X86TCRET, [SDNPHasChain, SDNPOptInFlag]>; -def X86add_flag : SDNode<"X86ISD::ADD", SDTBinaryArithWithFlags>; +def X86add_flag : SDNode<"X86ISD::ADD", SDTBinaryArithWithFlags, + [SDNPCommutative]>; def X86sub_flag : SDNode<"X86ISD::SUB", SDTBinaryArithWithFlags>; -def X86smul_flag : SDNode<"X86ISD::SMUL", SDTBinaryArithWithFlags>; -def X86umul_flag : SDNode<"X86ISD::UMUL", SDTUnaryArithWithFlags>; +def X86smul_flag : SDNode<"X86ISD::SMUL", SDTBinaryArithWithFlags, + [SDNPCommutative]>; +def X86umul_flag : SDNode<"X86ISD::UMUL", SDTUnaryArithWithFlags, + [SDNPCommutative]>; def X86inc_flag : SDNode<"X86ISD::INC", SDTUnaryArithWithFlags>; def X86dec_flag : SDNode<"X86ISD::DEC", SDTUnaryArithWithFlags>; -def X86or_flag : SDNode<"X86ISD::OR", SDTBinaryArithWithFlags>; -def X86xor_flag : SDNode<"X86ISD::XOR", SDTBinaryArithWithFlags>; -def X86and_flag : SDNode<"X86ISD::AND", SDTBinaryArithWithFlags>; +def X86or_flag : SDNode<"X86ISD::OR", SDTBinaryArithWithFlags, + [SDNPCommutative]>; +def X86xor_flag : SDNode<"X86ISD::XOR", SDTBinaryArithWithFlags, + [SDNPCommutative]>; +def X86and_flag : SDNode<"X86ISD::AND", SDTBinaryArithWithFlags, + [SDNPCommutative]>; def X86mul_imm : SDNode<"X86ISD::MUL_IMM", SDTIntBinOp>; @@ -487,6 +493,21 @@ def trunc_su : PatFrag<(ops node:$src), (trunc node:$src), [{ return N->hasOneUse(); }]>; +// Treat an 'or' node is as an 'add' if the or'ed bits are known to be zero. +def or_is_add : PatFrag<(ops node:$lhs, node:$rhs), (or node:$lhs, node:$rhs),[{ + if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N->getOperand(1))) + return CurDAG->MaskedValueIsZero(N->getOperand(0), CN->getAPIntValue()); + else { + unsigned BitWidth = N->getValueType(0).getScalarType().getSizeInBits(); + APInt Mask = APInt::getAllOnesValue(BitWidth); + APInt KnownZero0, KnownOne0; + CurDAG->ComputeMaskedBits(N->getOperand(0), Mask, KnownZero0, KnownOne0, 0); + APInt KnownZero1, KnownOne1; + CurDAG->ComputeMaskedBits(N->getOperand(1), Mask, KnownZero1, KnownOne1, 0); + return (~KnownZero0 & ~KnownZero1) == 0; + } +}]>; + // 'shld' and 'shrd' instruction patterns. Note that even though these have // the srl and shl in their patterns, the C++ code must still check for them, // because predicates are tested before children nodes are explored. @@ -3700,18 +3721,21 @@ let Defs = [EFLAGS], isReMaterializable = 1, isAsCheapAsAMove = 1, def MOV8r0 : I<0x30, MRMInitReg, (outs GR8 :$dst), (ins), "xor{b}\t$dst, $dst", [(set GR8:$dst, 0)]>; + +// We want to rewrite MOV16r0 in terms of MOV32r0, because it's a smaller +// encoding and avoids a partial-register update sometimes, but doing so +// at isel time interferes with rematerialization in the current register +// allocator. For now, this is rewritten when the instruction is lowered +// to an MCInst. +def MOV16r0 : I<0x31, MRMInitReg, (outs GR16:$dst), (ins), + "", + [(set GR16:$dst, 0)]>, OpSize; def MOV32r0 : I<0x31, MRMInitReg, (outs GR32:$dst), (ins), "xor{l}\t$dst, $dst", [(set GR32:$dst, 0)]>; } -// Use xorl instead of xorw since we don't care about the high 16 bits, -// it's smaller, and it avoids a partial-register update. -let AddedComplexity = 1 in -def : Pat<(i16 0), - (EXTRACT_SUBREG (MOV32r0), x86_subreg_16bit)>; - //===----------------------------------------------------------------------===// // Thread Local Storage Instructions // @@ -3792,7 +3816,7 @@ def LCMPXCHG32 : I<0xB1, MRMDestMem, (outs), (ins i32mem:$ptr, GR32:$swap), [(X86cas addr:$ptr, GR32:$swap, 4)]>, TB, LOCK; } let Defs = [EAX, EDX, EFLAGS], Uses = [EAX, EBX, ECX, EDX] in { -def LCMPXCHG8B : I<0xC7, MRM1m, (outs), (ins i32mem:$ptr), +def LCMPXCHG8B : I<0xC7, MRM1m, (outs), (ins i64mem:$ptr), "lock\n\t" "cmpxchg8b\t$ptr", [(X86cas8 addr:$ptr)]>, TB, LOCK; @@ -3858,6 +3882,7 @@ def CMPXCHG16rm : I<0xB1, MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src), def CMPXCHG32rm : I<0xB1, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src), "cmpxchg{l}\t{$src, $dst|$dst, $src}", []>, TB; +let Defs = [EAX, EDX, EFLAGS], Uses = [EAX, EBX, ECX, EDX] in def CMPXCHG8B : I<0xC7, MRM1m, (outs), (ins i64mem:$dst), "cmpxchg8b\t$dst", []>, TB; @@ -4466,7 +4491,7 @@ def : Pat<(i8 (trunc (srl_su GR32:$src, (i8 8)))), (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR32:$src, GR32_ABCD)), x86_subreg_8bit_hi)>, Requires<[In32BitMode]>; -def : Pat<(srl_su GR16:$src, (i8 8)), +def : Pat<(srl GR16:$src, (i8 8)), (EXTRACT_SUBREG (MOVZX32rr8 (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)), @@ -4640,6 +4665,28 @@ def : Pat<(i16 (anyext (i8 (X86setcc_c X86_COND_B, EFLAGS)))), def : Pat<(i32 (anyext (i8 (X86setcc_c X86_COND_B, EFLAGS)))), (SETB_C32r)>; +// (or x1, x2) -> (add x1, x2) if two operands are known not to share bits. +let AddedComplexity = 5 in { // Try this before the selecting to OR +def : Pat<(parallel (or_is_add GR16:$src1, imm:$src2), + (implicit EFLAGS)), + (ADD16ri GR16:$src1, imm:$src2)>; +def : Pat<(parallel (or_is_add GR32:$src1, imm:$src2), + (implicit EFLAGS)), + (ADD32ri GR32:$src1, imm:$src2)>; +def : Pat<(parallel (or_is_add GR16:$src1, i16immSExt8:$src2), + (implicit EFLAGS)), + (ADD16ri8 GR16:$src1, i16immSExt8:$src2)>; +def : Pat<(parallel (or_is_add GR32:$src1, i32immSExt8:$src2), + (implicit EFLAGS)), + (ADD32ri8 GR32:$src1, i32immSExt8:$src2)>; +def : Pat<(parallel (or_is_add GR16:$src1, GR16:$src2), + (implicit EFLAGS)), + (ADD16rr GR16:$src1, GR16:$src2)>; +def : Pat<(parallel (or_is_add GR32:$src1, GR32:$src2), + (implicit EFLAGS)), + (ADD32rr GR32:$src1, GR32:$src2)>; +} // AddedComplexity + //===----------------------------------------------------------------------===// // EFLAGS-defining Patterns //===----------------------------------------------------------------------===// diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index b26e50869205..94b9b5543066 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -131,11 +131,13 @@ def alignedloadv2i64 : PatFrag<(ops node:$ptr), // Like 'load', but uses special alignment checks suitable for use in // memory operands in most SSE instructions, which are required to -// be naturally aligned on some targets but not on others. -// FIXME: Actually implement support for targets that don't require the -// alignment. This probably wants a subtarget predicate. +// be naturally aligned on some targets but not on others. If the subtarget +// allows unaligned accesses, match any load, though this may require +// setting a feature bit in the processor (on startup, for example). +// Opteron 10h and later implement such a feature. def memop : PatFrag<(ops node:$ptr), (load node:$ptr), [{ - return cast<LoadSDNode>(N)->getAlignment() >= 16; + return Subtarget->hasVectorUAMem() + || cast<LoadSDNode>(N)->getAlignment() >= 16; }]>; def memopfsf32 : PatFrag<(ops node:$ptr), (f32 (memop node:$ptr))>; diff --git a/lib/Target/X86/X86JITInfo.cpp b/lib/Target/X86/X86JITInfo.cpp index c69cc83df6bb..f363903d9316 100644 --- a/lib/Target/X86/X86JITInfo.cpp +++ b/lib/Target/X86/X86JITInfo.cpp @@ -348,7 +348,7 @@ X86CompilationCallback2(intptr_t *StackPtr, intptr_t RetAddr) { #endif #if 0 - DEBUG(errs() << "In callback! Addr=" << (void*)RetAddr + DEBUG(dbgs() << "In callback! Addr=" << (void*)RetAddr << " ESP=" << (void*)StackPtr << ": Resolving call to function: " << TheVM->getFunctionReferencedName((void*)RetAddr) << "\n"); diff --git a/lib/Target/X86/X86RegisterInfo.cpp b/lib/Target/X86/X86RegisterInfo.cpp index d96aafda603a..9bd96af6c750 100644 --- a/lib/Target/X86/X86RegisterInfo.cpp +++ b/lib/Target/X86/X86RegisterInfo.cpp @@ -591,6 +591,15 @@ X86RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, int FrameIndex = MI.getOperand(i).getIndex(); unsigned BasePtr; + // DEBUG_VALUE has a special representation, and is only robust enough to + // represent SP(or BP) +- offset addressing modes. We rewrite the + // FrameIndex to be a constant; implicitly positive constants are relative + // to ESP and negative ones to EBP. + if (MI.getOpcode()==TargetInstrInfo::DEBUG_VALUE) { + MI.getOperand(i).ChangeToImmediate(getFrameIndexOffset(MF, FrameIndex)); + return 0; + } + if (needsStackRealignment(MF)) BasePtr = (FrameIndex < 0 ? FramePtr : StackPtr); else diff --git a/lib/Target/X86/X86Subtarget.cpp b/lib/Target/X86/X86Subtarget.cpp index 75cdbada1b5a..2039be7c9b3d 100644 --- a/lib/Target/X86/X86Subtarget.cpp +++ b/lib/Target/X86/X86Subtarget.cpp @@ -286,6 +286,7 @@ X86Subtarget::X86Subtarget(const std::string &TT, const std::string &FS, , HasFMA3(false) , HasFMA4(false) , IsBTMemSlow(false) + , HasVectorUAMem(false) , DarwinVers(0) , stackAlignment(8) // FIXME: this is a known good value for Yonah. How about others? @@ -317,7 +318,7 @@ X86Subtarget::X86Subtarget(const std::string &TT, const std::string &FS, if (Is64Bit) HasX86_64 = true; - DEBUG(errs() << "Subtarget features: SSELevel " << X86SSELevel + DEBUG(dbgs() << "Subtarget features: SSELevel " << X86SSELevel << ", 3DNowLevel " << X863DNowLevel << ", 64bit " << HasX86_64 << "\n"); assert((!Is64Bit || HasX86_64) && diff --git a/lib/Target/X86/X86Subtarget.h b/lib/Target/X86/X86Subtarget.h index ef6dbafac346..618dd102f32e 100644 --- a/lib/Target/X86/X86Subtarget.h +++ b/lib/Target/X86/X86Subtarget.h @@ -78,6 +78,10 @@ protected: /// IsBTMemSlow - True if BT (bit test) of memory instructions are slow. bool IsBTMemSlow; + /// HasVectorUAMem - True if SIMD operations can have unaligned memory operands. + /// This may require setting a feature bit in the processor. + bool HasVectorUAMem; + /// DarwinVers - Nonzero if this is a darwin platform: the numeric /// version of the platform, e.g. 8 = 10.4 (Tiger), 9 = 10.5 (Leopard), etc. unsigned char DarwinVers; // Is any darwin-x86 platform. @@ -142,6 +146,7 @@ public: bool hasFMA3() const { return HasFMA3; } bool hasFMA4() const { return HasFMA4; } bool isBTMemSlow() const { return IsBTMemSlow; } + bool hasVectorUAMem() const { return HasVectorUAMem; } bool isTargetDarwin() const { return TargetType == isDarwin; } bool isTargetELF() const { return TargetType == isELF; } @@ -169,7 +174,7 @@ public: p = "e-p:64:64-s:64-f64:64:64-i64:64:64-f80:128:128-n8:16:32:64"; else if (isTargetDarwin()) p = "e-p:32:32-f64:32:64-i64:32:64-f80:128:128-n8:16:32"; - else if (isTargetCygMing() || isTargetWindows()) + else if (isTargetMingw() || isTargetWindows()) p = "e-p:32:32-f64:64:64-i64:64:64-f80:128:128-n8:16:32"; else p = "e-p:32:32-f64:32:64-i64:32:64-f80:32:32-n8:16:32"; |