src - FreeBSD source tree

diff options


context:
space:
mode:

author	Roman Divacky <rdivacky@FreeBSD.org>	2010-01-15 15:37:28 +0000
committer	Roman Divacky <rdivacky@FreeBSD.org>	2010-01-15 15:37:28 +0000
commit	829000e035f46f2a227a5466e4e427a2f3cc00a9 (patch)
tree	be5a687969f682edded4aa6f13594ffd9aa9030e /lib/Target/X86
parent	1e7804dbd25b8dbf534c850355d70ad215206f4b (diff)
download	src-829000e035f46f2a227a5466e4e427a2f3cc00a9.tar.gz src-829000e035f46f2a227a5466e4e427a2f3cc00a9.zip

Update LLVM to 93512.

Notes

Notes: svn path=/vendor/llvm/dist/; revision=202375

Diffstat (limited to 'lib/Target/X86')

-rw-r--r--

lib/Target/X86/AsmParser/X86AsmParser.cpp

-rw-r--r--

lib/Target/X86/AsmPrinter/X86AsmPrinter.cpp

-rw-r--r--

lib/Target/X86/AsmPrinter/X86MCInstLower.cpp

-rw-r--r--

lib/Target/X86/README-SSE.txt

-rw-r--r--

lib/Target/X86/README.txt

-rw-r--r--

lib/Target/X86/X86.td

-rw-r--r--

lib/Target/X86/X86CodeEmitter.cpp

-rw-r--r--

lib/Target/X86/X86FastISel.cpp

-rw-r--r--

lib/Target/X86/X86FloatingPoint.cpp

-rw-r--r--

lib/Target/X86/X86ISelDAGToDAG.cpp

157

-rw-r--r--

lib/Target/X86/X86ISelLowering.cpp

241

-rw-r--r--

lib/Target/X86/X86Instr64bit.td

-rw-r--r--

lib/Target/X86/X86InstrInfo.cpp

-rw-r--r--

lib/Target/X86/X86InstrInfo.h

-rw-r--r--

lib/Target/X86/X86InstrInfo.td

-rw-r--r--

lib/Target/X86/X86InstrSSE.td

-rw-r--r--

lib/Target/X86/X86JITInfo.cpp

-rw-r--r--

lib/Target/X86/X86RegisterInfo.cpp

-rw-r--r--

lib/Target/X86/X86Subtarget.cpp

-rw-r--r--

lib/Target/X86/X86Subtarget.h

20 files changed, 594 insertions, 376 deletions

diff --git a/lib/Target/X86/AsmParser/X86AsmParser.cpp b/lib/Target/X86/AsmParser/X86AsmParser.cpp
index c357b4d0dee1..c4ae5d220b32 100644
--- a/lib/Target/X86/AsmParser/X86AsmParser.cpp
+++ b/lib/Target/X86/AsmParser/X86AsmParser.cpp

@@ -7,6 +7,7 @@

//===----------------------------------------------------------------------===//

+#include "llvm/Target/TargetAsmParser.h"

#include "X86.h"

#include "llvm/ADT/SmallVector.h"

#include "llvm/ADT/Twine.h"

@@ -15,6 +16,7 @@

#include "llvm/MC/MCStreamer.h"

#include "llvm/MC/MCExpr.h"

#include "llvm/MC/MCInst.h"

+#include "llvm/MC/MCParsedAsmOperand.h"

#include "llvm/Support/SourceMgr.h"

#include "llvm/Target/TargetRegistry.h"

#include "llvm/Target/TargetAsmParser.h"

@@ -46,7 +48,7 @@ private:

/// @name Auto-generated Match Functions

/// {

- bool MatchInstruction(SmallVectorImpl<X86Operand> &Operands,

+ bool MatchInstruction(const SmallVectorImpl<MCParsedAsmOperand*> &Operands,

MCInst &Inst);

/// MatchRegisterName - Match the given string to a register name, or 0 if

@@ -59,7 +61,8 @@ public:

X86ATTAsmParser(const Target &T, MCAsmParser &_Parser)

: TargetAsmParser(T), Parser(_Parser) {}

- virtual bool ParseInstruction(const StringRef &Name, MCInst &Inst);

+ virtual bool ParseInstruction(const StringRef &Name, SMLoc NameLoc,

+ SmallVectorImpl<MCParsedAsmOperand*> &Operands);

virtual bool ParseDirective(AsmToken DirectiveID);

};

@@ -71,7 +74,7 @@ namespace {

/// X86Operand - Instances of this class represent a parsed X86 machine

/// instruction.

-struct X86Operand {

+struct X86Operand : public MCParsedAsmOperand {

enum {

Token,

@@ -400,10 +403,11 @@ bool X86ATTAsmParser::ParseMemOperand(X86Operand &Op) {

return false;

}

-bool X86ATTAsmParser::ParseInstruction(const StringRef &Name, MCInst &Inst) {

- SmallVector<X86Operand, 8> Operands;

+bool X86ATTAsmParser::

+ParseInstruction(const StringRef &Name, SMLoc NameLoc,

+ SmallVectorImpl<MCParsedAsmOperand*> &Operands) {

- Operands.push_back(X86Operand::CreateToken(Name));

+ Operands.push_back(new X86Operand(X86Operand::CreateToken(Name)));

SMLoc Loc = getLexer().getTok().getLoc();

if (getLexer().isNot(AsmToken::EndOfStatement)) {

@@ -411,31 +415,27 @@ bool X86ATTAsmParser::ParseInstruction(const StringRef &Name, MCInst &Inst) {

// Parse '*' modifier.

if (getLexer().is(AsmToken::Star)) {

getLexer().Lex(); // Eat the star.

- Operands.push_back(X86Operand::CreateToken("*"));

+ Operands.push_back(new X86Operand(X86Operand::CreateToken("*")));

}

// Read the first operand.

- Operands.push_back(X86Operand());

- if (ParseOperand(Operands.back()))

+ X86Operand Op;

+ if (ParseOperand(Op))

return true;

+ Operands.push_back(new X86Operand(Op));

while (getLexer().is(AsmToken::Comma)) {

getLexer().Lex(); // Eat the comma.

// Parse and remember the operand.

- Operands.push_back(X86Operand());

- if (ParseOperand(Operands.back()))

+ if (ParseOperand(Op))

return true;

+ Operands.push_back(new X86Operand(Op));

}

- if (!MatchInstruction(Operands, Inst))

- return false;

- // FIXME: We should give nicer diagnostics about the exact failure.

- Error(Loc, "unrecognized instruction");

- return true;

+ return false;

}

bool X86ATTAsmParser::ParseDirective(AsmToken DirectiveID) {

diff --git a/lib/Target/X86/AsmPrinter/X86AsmPrinter.cpp b/lib/Target/X86/AsmPrinter/X86AsmPrinter.cpp
index b88063f9ce72..70c6dd03eb19 100644
--- a/lib/Target/X86/AsmPrinter/X86AsmPrinter.cpp
+++ b/lib/Target/X86/AsmPrinter/X86AsmPrinter.cpp

@@ -201,6 +201,7 @@ bool X86AsmPrinter::runOnMachineFunction(MachineFunction &MF) {

/// jump tables, constant pools, global address and external symbols, all of

/// which print to a label with various suffixes for relocation types etc.

void X86AsmPrinter::printSymbolOperand(const MachineOperand &MO) {

+ SmallString<128> TempNameStr;

switch (MO.getType()) {

default: llvm_unreachable("unknown symbol type!");

case MachineOperand::MO_JumpTableIndex:

@@ -236,41 +237,38 @@ void X86AsmPrinter::printSymbolOperand(const MachineOperand &MO) {

if (MO.getTargetFlags() == X86II::MO_DARWIN_NONLAZY ||

MO.getTargetFlags() == X86II::MO_DARWIN_NONLAZY_PIC_BASE) {

- SmallString<128> NameStr;

- Mang->getNameWithPrefix(NameStr, GV, true);

- NameStr += "$non_lazy_ptr";

- MCSymbol *Sym = OutContext.GetOrCreateSymbol(NameStr.str());

+ Mang->getNameWithPrefix(TempNameStr, GV, true);

+ TempNameStr += "$non_lazy_ptr";

+ MCSymbol *Sym = OutContext.GetOrCreateSymbol(TempNameStr.str());

const MCSymbol *&StubSym =

MMI->getObjFileInfo<MachineModuleInfoMachO>().getGVStubEntry(Sym);

if (StubSym == 0) {

- NameStr.clear();

- Mang->getNameWithPrefix(NameStr, GV, false);

- StubSym = OutContext.GetOrCreateSymbol(NameStr.str());

+ TempNameStr.clear();

+ Mang->getNameWithPrefix(TempNameStr, GV, false);

+ StubSym = OutContext.GetOrCreateSymbol(TempNameStr.str());

}

} else if (MO.getTargetFlags() == X86II::MO_DARWIN_HIDDEN_NONLAZY_PIC_BASE){

- SmallString<128> NameStr;

- Mang->getNameWithPrefix(NameStr, GV, true);

- NameStr += "$non_lazy_ptr";

- MCSymbol *Sym = OutContext.GetOrCreateSymbol(NameStr.str());

+ Mang->getNameWithPrefix(TempNameStr, GV, true);

+ TempNameStr += "$non_lazy_ptr";

+ MCSymbol *Sym = OutContext.GetOrCreateSymbol(TempNameStr.str());

const MCSymbol *&StubSym =

MMI->getObjFileInfo<MachineModuleInfoMachO>().getHiddenGVStubEntry(Sym);

if (StubSym == 0) {

- NameStr.clear();

- Mang->getNameWithPrefix(NameStr, GV, false);

- StubSym = OutContext.GetOrCreateSymbol(NameStr.str());

+ TempNameStr.clear();

+ Mang->getNameWithPrefix(TempNameStr, GV, false);

+ StubSym = OutContext.GetOrCreateSymbol(TempNameStr.str());

}

} else if (MO.getTargetFlags() == X86II::MO_DARWIN_STUB) {

- SmallString<128> NameStr;

- Mang->getNameWithPrefix(NameStr, GV, true);

- NameStr += "$stub";

- MCSymbol *Sym = OutContext.GetOrCreateSymbol(NameStr.str());

+ Mang->getNameWithPrefix(TempNameStr, GV, true);

+ TempNameStr += "$stub";

+ MCSymbol *Sym = OutContext.GetOrCreateSymbol(TempNameStr.str());

const MCSymbol *&StubSym =

MMI->getObjFileInfo<MachineModuleInfoMachO>().getFnStubEntry(Sym);

if (StubSym == 0) {

- NameStr.clear();

- Mang->getNameWithPrefix(NameStr, GV, false);

- StubSym = OutContext.GetOrCreateSymbol(NameStr.str());

+ TempNameStr.clear();

+ Mang->getNameWithPrefix(TempNameStr, GV, false);

+ StubSym = OutContext.GetOrCreateSymbol(TempNameStr.str());

}

@@ -285,24 +283,32 @@ void X86AsmPrinter::printSymbolOperand(const MachineOperand &MO) {

break;

}

case MachineOperand::MO_ExternalSymbol: {

- std::string Name = Mang->makeNameProper(MO.getSymbolName());

+ const MCSymbol *SymToPrint;

if (MO.getTargetFlags() == X86II::MO_DARWIN_STUB) {

- Name += "$stub";

- MCSymbol *Sym = OutContext.GetOrCreateSymbol(StringRef(Name));

+ Mang->getNameWithPrefix(TempNameStr,

+ StringRef(MO.getSymbolName())+"$stub");

+ const MCSymbol *Sym = OutContext.GetOrCreateSymbol(TempNameStr.str());

const MCSymbol *&StubSym =

MMI->getObjFileInfo<MachineModuleInfoMachO>().getFnStubEntry(Sym);

if (StubSym == 0) {

- Name.erase(Name.end()-5, Name.end());

- StubSym = OutContext.GetOrCreateSymbol(StringRef(Name));

+ TempNameStr.erase(TempNameStr.end()-5, TempNameStr.end());

+ StubSym = OutContext.GetOrCreateSymbol(TempNameStr.str());

}

+ SymToPrint = StubSym;

+ } else {

+ Mang->getNameWithPrefix(TempNameStr, MO.getSymbolName());

+ SymToPrint = OutContext.GetOrCreateSymbol(TempNameStr.str());

}

// If the name begins with a dollar-sign, enclose it in parens. We do this

// to avoid having it look like an integer immediate to the assembler.

- if (Name[0] == '$')

- O << '(' << Name << ')';

- else

- O << Name;

+ if (SymToPrint->getName()[0] != '$')

+ SymToPrint->print(O, MAI);

+ else {

+ O << '(';

+ SymToPrint->print(O, MAI);

+ O << '(';

+ }

break;

}

diff --git a/lib/Target/X86/AsmPrinter/X86MCInstLower.cpp b/lib/Target/X86/AsmPrinter/X86MCInstLower.cpp
index 1015b6924734..9ee118cdfd93 100644
--- a/lib/Target/X86/AsmPrinter/X86MCInstLower.cpp
+++ b/lib/Target/X86/AsmPrinter/X86MCInstLower.cpp

@@ -25,6 +25,7 @@

#include "llvm/Support/FormattedStream.h"

#include "llvm/Support/Mangler.h"

#include "llvm/ADT/SmallString.h"

+#include "llvm/Analysis/DebugInfo.h"

using namespace llvm;

@@ -399,6 +400,14 @@ void X86MCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const {

OutMI.setOpcode(X86::MOVZX32rm16);

lower_subreg32(&OutMI, 0);

break;

+ case X86::MOV16r0:

+ OutMI.setOpcode(X86::MOV32r0);

+ lower_subreg32(&OutMI, 0);

+ break;

+ case X86::MOV64r0:

+ OutMI.setOpcode(X86::MOV32r0);

+ lower_subreg32(&OutMI, 0);

+ break;

}

@@ -412,6 +421,25 @@ void X86AsmPrinter::printInstructionThroughMCStreamer(const MachineInstr *MI) {

case TargetInstrInfo::GC_LABEL:

printLabel(MI);

return;

+ case TargetInstrInfo::DEBUG_VALUE: {

+ if (!VerboseAsm)

+ return;

+ O << '\t' << MAI->getCommentString() << "DEBUG_VALUE: ";

+ // cast away const; DIetc do not take const operands for some reason

+ DIVariable V((MDNode*)(MI->getOperand(2).getMetadata()));

+ O << V.getName();

+ O << " <- ";

+ if (MI->getOperand(0).getType()==MachineOperand::MO_Register)

+ printOperand(MI, 0);

+ else {

+ assert(MI->getOperand(0).getType()==MachineOperand::MO_Immediate);

+ int64_t imm = MI->getOperand(0).getImm();

+ O << '[' << ((imm<0) ? "EBP" : "ESP+") << imm << ']';

+ }

+ O << "+";

+ printOperand(MI, 1);

+ return;

+ }

case TargetInstrInfo::INLINEASM:

printInlineAsm(MI);

return;

diff --git a/lib/Target/X86/README-SSE.txt b/lib/Target/X86/README-SSE.txt
index 71ad51c7984e..0f3e44b52899 100644
--- a/lib/Target/X86/README-SSE.txt
+++ b/lib/Target/X86/README-SSE.txt

@@ -916,3 +916,23 @@ cheaper to do fld1 than load from a constant pool for example, so

"load, add 1.0, store" is better done in the fp stack, etc.

//===---------------------------------------------------------------------===//

+The X86 backend should be able to if-convert SSE comparisons like "ucomisd" to

+"cmpsd". For example, this code:

+double d1(double x) { return x == x ? x : x + x; }

+Compiles into:

+_d1:

+ ucomisd %xmm0, %xmm0

+ jnp LBB1_2

+ addsd %xmm0, %xmm0

+ ret

+LBB1_2:

+ ret

+Also, the 'ret's should be shared. This is PR6032.

+//===---------------------------------------------------------------------===//

diff --git a/lib/Target/X86/README.txt b/lib/Target/X86/README.txt
index afd9f53ea6bb..aa7bb3d97889 100644
--- a/lib/Target/X86/README.txt
+++ b/lib/Target/X86/README.txt

@@ -530,7 +530,7 @@ We should inline lrintf and probably other libc functions.

//===---------------------------------------------------------------------===//

-Start using the flags more. For example, compile:

+Use the FLAGS values from arithmetic instructions more. For example, compile:

int add_zf(int *x, int y, int a, int b) {

if ((*x += y) == 0)

@@ -554,31 +554,8 @@ _add_zf:

movl %ecx, %eax

ret

-and:

-int add_zf(int *x, int y, int a, int b) {

- if ((*x + y) < 0)

- return a;

- else

- return b;

-to:

-add_zf:

- addl (%rdi), %esi

- movl %edx, %eax

- cmovns %ecx, %eax

- ret

-instead of:

-_add_zf:

- addl (%rdi), %esi

- testl %esi, %esi

- cmovs %edx, %ecx

- movl %ecx, %eax

- ret

+As another example, compile function f2 in test/CodeGen/X86/cmp-test.ll

+without a test instruction.

//===---------------------------------------------------------------------===//

@@ -685,55 +662,6 @@ Though this probably isn't worth it.

//===---------------------------------------------------------------------===//

-We need to teach the codegen to convert two-address INC instructions to LEA

-when the flags are dead (likewise dec). For example, on X86-64, compile:

-int foo(int A, int B) {

- return A+1;

-to:

-_foo:

- leal 1(%edi), %eax

- ret

-instead of:

-_foo:

- incl %edi

- movl %edi, %eax

- ret

-Another example is:

-;; X's live range extends beyond the shift, so the register allocator

-;; cannot coalesce it with Y. Because of this, a copy needs to be

-;; emitted before the shift to save the register value before it is

-;; clobbered. However, this copy is not needed if the register

-;; allocator turns the shift into an LEA. This also occurs for ADD.

-; Check that the shift gets turned into an LEA.

-; RUN: llvm-as < %s | llc -march=x86 -x86-asm-syntax=intel | \

-; RUN: not grep {mov E.X, E.X}

-@G = external global i32 ; <i32*> [#uses=3]

-define i32 @test1(i32 %X, i32 %Y) {

- %Z = add i32 %X, %Y ; <i32> [#uses=1]

- volatile store i32 %Y, i32* @G

- volatile store i32 %Z, i32* @G

- ret i32 %X

-define i32 @test2(i32 %X) {

- %Z = add i32 %X, 1 ; <i32> [#uses=1]

- volatile store i32 %Z, i32* @G

- ret i32 %X

-//===---------------------------------------------------------------------===//

Sometimes it is better to codegen subtractions from a constant (e.g. 7-x) with

a neg instead of a sub instruction. Consider:

@@ -854,11 +782,6 @@ __Z11no_overflowjj:

//===---------------------------------------------------------------------===//

-Re-materialize MOV32r0 etc. with xor instead of changing them to moves if the

-condition register is dead. xor reg reg is shorter than mov reg, #0.

-//===---------------------------------------------------------------------===//

The following code:

bb114.preheader: ; preds = %cond_next94

diff --git a/lib/Target/X86/X86.td b/lib/Target/X86/X86.td
index a6e1ca3128ee..7919559058b1 100644
--- a/lib/Target/X86/X86.td
+++ b/lib/Target/X86/X86.td

@@ -23,6 +23,7 @@ include "llvm/Target/Target.td"

def FeatureCMOV : SubtargetFeature<"cmov","HasCMov", "true",

"Enable conditional move instructions">;

def FeatureMMX : SubtargetFeature<"mmx","X86SSELevel", "MMX",

"Enable MMX instructions">;

def FeatureSSE1 : SubtargetFeature<"sse", "X86SSELevel", "SSE1",

@@ -66,6 +67,9 @@ def FeatureFMA3 : SubtargetFeature<"fma3", "HasFMA3", "true",

"Enable three-operand fused multiple-add">;

def FeatureFMA4 : SubtargetFeature<"fma4", "HasFMA4", "true",

"Enable four-operand fused multiple-add">;

+def FeatureVectorUAMem : SubtargetFeature<"vector-unaligned-mem",

+ "HasVectorUAMem", "true",

+ "Allow unaligned memory operands on vector/SIMD instructions">;

//===----------------------------------------------------------------------===//

// X86 processors supported.

diff --git a/lib/Target/X86/X86CodeEmitter.cpp b/lib/Target/X86/X86CodeEmitter.cpp
index 4892e1746079..828e872cacbf 100644
--- a/lib/Target/X86/X86CodeEmitter.cpp
+++ b/lib/Target/X86/X86CodeEmitter.cpp

@@ -135,7 +135,7 @@ bool Emitter<CodeEmitter>::runOnMachineFunction(MachineFunction &MF) {

IsPIC = TM.getRelocationModel() == Reloc::PIC_;

do {

- DEBUG(errs() << "JITTing function '"

+ DEBUG(dbgs() << "JITTing function '"

<< MF.getFunction()->getName() << "'\n");

MCE.startFunction(MF);

for (MachineFunction::iterator MBB = MF.begin(), E = MF.end();

@@ -477,7 +477,7 @@ void Emitter<CodeEmitter>::emitMemModRMByte(const MachineInstr &MI,

template<class CodeEmitter>

void Emitter<CodeEmitter>::emitInstruction(const MachineInstr &MI,

const TargetInstrDesc *Desc) {

- DEBUG(errs() << MI);

+ DEBUG(dbgs() << MI);

MCE.processDebugLoc(MI.getDebugLoc(), true);

@@ -618,11 +618,11 @@ void Emitter<CodeEmitter>::emitInstruction(const MachineInstr &MI,

const MachineOperand &MO = MI.getOperand(CurOp++);

- DEBUG(errs() << "RawFrm CurOp " << CurOp << "\n");

- DEBUG(errs() << "isMBB " << MO.isMBB() << "\n");

- DEBUG(errs() << "isGlobal " << MO.isGlobal() << "\n");

- DEBUG(errs() << "isSymbol " << MO.isSymbol() << "\n");

- DEBUG(errs() << "isImm " << MO.isImm() << "\n");

+ DEBUG(dbgs() << "RawFrm CurOp " << CurOp << "\n");

+ DEBUG(dbgs() << "isMBB " << MO.isMBB() << "\n");

+ DEBUG(dbgs() << "isGlobal " << MO.isGlobal() << "\n");

+ DEBUG(dbgs() << "isSymbol " << MO.isSymbol() << "\n");

+ DEBUG(dbgs() << "isImm " << MO.isImm() << "\n");

if (MO.isMBB()) {

emitPCRelativeBlockAddress(MO.getMBB());

@@ -843,7 +843,7 @@ void Emitter<CodeEmitter>::emitInstruction(const MachineInstr &MI,

if (!Desc->isVariadic() && CurOp != NumOps) {

#ifndef NDEBUG

- errs() << "Cannot encode all operands of: " << MI << "\n";

+ dbgs() << "Cannot encode all operands of: " << MI << "\n";

#endif

llvm_unreachable(0);

}

@@ -1082,9 +1082,9 @@ public:

}

if (!OK) {

- errs() << "couldn't convert inst '";

+ dbgs() << "couldn't convert inst '";

MI.dump();

- errs() << "' to machine instr:\n";

+ dbgs() << "' to machine instr:\n";

Instr->dump();

}

diff --git a/lib/Target/X86/X86FastISel.cpp b/lib/Target/X86/X86FastISel.cpp
index 431c120f8f0d..7e02d59c1bca 100644
--- a/lib/Target/X86/X86FastISel.cpp
+++ b/lib/Target/X86/X86FastISel.cpp

@@ -786,8 +786,8 @@ bool X86FastISel::X86SelectCmp(Instruction *I) {

bool X86FastISel::X86SelectZExt(Instruction *I) {

// Handle zero-extension from i1 to i8, which is common.

- if (I->getType() == Type::getInt8Ty(I->getContext()) &&

- I->getOperand(0)->getType() == Type::getInt1Ty(I->getContext())) {

+ if (I->getType()->isInteger(8) &&

+ I->getOperand(0)->getType()->isInteger(1)) {

unsigned ResultReg = getRegForValue(I->getOperand(0));

if (ResultReg == 0) return false;

// Set the high bits to zero.

@@ -948,7 +948,7 @@ bool X86FastISel::X86SelectBranch(Instruction *I) {

bool X86FastISel::X86SelectShift(Instruction *I) {

unsigned CReg = 0, OpReg = 0, OpImm = 0;

const TargetRegisterClass *RC = NULL;

- if (I->getType() == Type::getInt8Ty(I->getContext())) {

+ if (I->getType()->isInteger(8)) {

CReg = X86::CL;

RC = &X86::GR8RegClass;

switch (I->getOpcode()) {

@@ -957,7 +957,7 @@ bool X86FastISel::X86SelectShift(Instruction *I) {

case Instruction::Shl: OpReg = X86::SHL8rCL; OpImm = X86::SHL8ri; break;

default: return false;

}

- } else if (I->getType() == Type::getInt16Ty(I->getContext())) {

+ } else if (I->getType()->isInteger(16)) {

CReg = X86::CX;

RC = &X86::GR16RegClass;

switch (I->getOpcode()) {

@@ -966,7 +966,7 @@ bool X86FastISel::X86SelectShift(Instruction *I) {

case Instruction::Shl: OpReg = X86::SHL16rCL; OpImm = X86::SHL16ri; break;

default: return false;

}

- } else if (I->getType() == Type::getInt32Ty(I->getContext())) {

+ } else if (I->getType()->isInteger(32)) {

CReg = X86::ECX;

RC = &X86::GR32RegClass;

switch (I->getOpcode()) {

@@ -975,7 +975,7 @@ bool X86FastISel::X86SelectShift(Instruction *I) {

case Instruction::Shl: OpReg = X86::SHL32rCL; OpImm = X86::SHL32ri; break;

default: return false;

}

- } else if (I->getType() == Type::getInt64Ty(I->getContext())) {

+ } else if (I->getType()->isInteger(64)) {

CReg = X86::RCX;

RC = &X86::GR64RegClass;

switch (I->getOpcode()) {

@@ -1230,8 +1230,8 @@ bool X86FastISel::X86SelectCall(Instruction *I) {

CC != CallingConv::X86_FastCall)

return false;

- // On X86, -tailcallopt changes the fastcc ABI. FastISel doesn't

- // handle this for now.

+ // fastcc with -tailcallopt is intended to provide a guaranteed

+ // tail call optimization. Fastisel doesn't know how to do that.

if (CC == CallingConv::Fast && PerformTailCallOpt)

return false;

diff --git a/lib/Target/X86/X86FloatingPoint.cpp b/lib/Target/X86/X86FloatingPoint.cpp
index 044bd4be322a..503ac146d27a 100644
--- a/lib/Target/X86/X86FloatingPoint.cpp
+++ b/lib/Target/X86/X86FloatingPoint.cpp

@@ -75,12 +75,12 @@ namespace {

unsigned StackTop; // The current top of the FP stack.

void dumpStack() const {

- errs() << "Stack contents:";

+ dbgs() << "Stack contents:";

for (unsigned i = 0; i != StackTop; ++i) {

- errs() << " FP" << Stack[i];

+ dbgs() << " FP" << Stack[i];

assert(RegMap[Stack[i]] == i && "Stack[] doesn't match RegMap[]!");

}

- errs() << "\n";

+ dbgs() << "\n";

}

private:

/// isStackEmpty - Return true if the FP stack is empty.

@@ -246,7 +246,7 @@ bool FPS::processBasicBlock(MachineFunction &MF, MachineBasicBlock &BB) {

PrevMI = prior(I);

++NumFP; // Keep track of # of pseudo instrs

- DEBUG(errs() << "\nFPInst:\t" << *MI);

+ DEBUG(dbgs() << "\nFPInst:\t" << *MI);

// Get dead variables list now because the MI pointer may be deleted as part

// of processing!

@@ -273,7 +273,7 @@ bool FPS::processBasicBlock(MachineFunction &MF, MachineBasicBlock &BB) {

for (unsigned i = 0, e = DeadRegs.size(); i != e; ++i) {

unsigned Reg = DeadRegs[i];

if (Reg >= X86::FP0 && Reg <= X86::FP6) {

- DEBUG(errs() << "Register FP#" << Reg-X86::FP0 << " is dead!\n");

+ DEBUG(dbgs() << "Register FP#" << Reg-X86::FP0 << " is dead!\n");

freeStackSlotAfter(I, Reg-X86::FP0);

}

@@ -282,13 +282,13 @@ bool FPS::processBasicBlock(MachineFunction &MF, MachineBasicBlock &BB) {

DEBUG(

MachineBasicBlock::iterator PrevI(PrevMI);

if (I == PrevI) {

- errs() << "Just deleted pseudo instruction\n";

+ dbgs() << "Just deleted pseudo instruction\n";

} else {

MachineBasicBlock::iterator Start = I;

// Rewind to first instruction newly inserted.

while (Start != BB.begin() && prior(Start) != PrevI) --Start;

- errs() << "Inserted instructions:\n\t";

- Start->print(errs(), &MF.getTarget());

+ dbgs() << "Inserted instructions:\n\t";

+ Start->print(dbgs(), &MF.getTarget());

while (++Start != llvm::next(I)) {}

}

dumpStack();

diff --git a/lib/Target/X86/X86ISelDAGToDAG.cpp b/lib/Target/X86/X86ISelDAGToDAG.cpp
index cb8238377858..e2a53d1118b8 100644
--- a/lib/Target/X86/X86ISelDAGToDAG.cpp
+++ b/lib/Target/X86/X86ISelDAGToDAG.cpp

@@ -113,37 +113,37 @@ namespace {

}

void dump() {

- errs() << "X86ISelAddressMode " << this << '\n';

- errs() << "Base.Reg ";

+ dbgs() << "X86ISelAddressMode " << this << '\n';

+ dbgs() << "Base.Reg ";

if (Base.Reg.getNode() != 0)

Base.Reg.getNode()->dump();

else

- errs() << "nul";

- errs() << " Base.FrameIndex " << Base.FrameIndex << '\n'

+ dbgs() << "nul";

+ dbgs() << " Base.FrameIndex " << Base.FrameIndex << '\n'

<< " Scale" << Scale << '\n'

<< "IndexReg ";

if (IndexReg.getNode() != 0)

IndexReg.getNode()->dump();

else

- errs() << "nul";

- errs() << " Disp " << Disp << '\n'

+ dbgs() << "nul";

+ dbgs() << " Disp " << Disp << '\n'

<< "GV ";

if (GV)

GV->dump();

else

- errs() << "nul";

- errs() << " CP ";

+ dbgs() << "nul";

+ dbgs() << " CP ";

if (CP)

CP->dump();

else

- errs() << "nul";

- errs() << '\n'

+ dbgs() << "nul";

+ dbgs() << '\n'

<< "ES ";

if (ES)

- errs() << ES;

+ dbgs() << ES;

else

- errs() << "nul";

- errs() << " JT" << JT << " Align" << Align << '\n';

+ dbgs() << "nul";

+ dbgs() << " JT" << JT << " Align" << Align << '\n';

}

};

}

@@ -190,7 +190,7 @@ namespace {

#include "X86GenDAGISel.inc"

private:

- SDNode *Select(SDValue N);

+ SDNode *Select(SDNode *N);

SDNode *SelectAtomic64(SDNode *Node, unsigned Opc);

SDNode *SelectAtomicLoadAdd(SDNode *Node, EVT NVT);

@@ -201,19 +201,19 @@ namespace {

bool MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM,

unsigned Depth);

bool MatchAddressBase(SDValue N, X86ISelAddressMode &AM);

- bool SelectAddr(SDValue Op, SDValue N, SDValue &Base,

+ bool SelectAddr(SDNode *Op, SDValue N, SDValue &Base,

SDValue &Scale, SDValue &Index, SDValue &Disp,

SDValue &Segment);

- bool SelectLEAAddr(SDValue Op, SDValue N, SDValue &Base,

+ bool SelectLEAAddr(SDNode *Op, SDValue N, SDValue &Base,

SDValue &Scale, SDValue &Index, SDValue &Disp);

- bool SelectTLSADDRAddr(SDValue Op, SDValue N, SDValue &Base,

+ bool SelectTLSADDRAddr(SDNode *Op, SDValue N, SDValue &Base,

SDValue &Scale, SDValue &Index, SDValue &Disp);

- bool SelectScalarSSELoad(SDValue Op, SDValue Pred,

+ bool SelectScalarSSELoad(SDNode *Op, SDValue Pred,

SDValue N, SDValue &Base, SDValue &Scale,

SDValue &Index, SDValue &Disp,

SDValue &Segment,

SDValue &InChain, SDValue &OutChain);

- bool TryFoldLoad(SDValue P, SDValue N,

+ bool TryFoldLoad(SDNode *P, SDValue N,

SDValue &Base, SDValue &Scale,

SDValue &Index, SDValue &Disp,

SDValue &Segment);

@@ -310,6 +310,11 @@ bool X86DAGToDAGISel::IsLegalAndProfitableToFold(SDNode *N, SDNode *U,

if (U == Root)

switch (U->getOpcode()) {

default: break;

+ case X86ISD::ADD:

+ case X86ISD::SUB:

+ case X86ISD::AND:

+ case X86ISD::XOR:

+ case X86ISD::OR:

case ISD::ADD:

case ISD::ADDC:

case ISD::ADDE:

@@ -675,12 +680,12 @@ void X86DAGToDAGISel::InstructionSelect() {

// Codegen the basic block.

#ifndef NDEBUG

- DEBUG(errs() << "===== Instruction selection begins:\n");

+ DEBUG(dbgs() << "===== Instruction selection begins:\n");

Indent = 0;

#endif

SelectRoot(*CurDAG);

#ifndef NDEBUG

- DEBUG(errs() << "===== Instruction selection ends:\n");

+ DEBUG(dbgs() << "===== Instruction selection ends:\n");

#endif

CurDAG->RemoveDeadNodes();

@@ -850,7 +855,7 @@ bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM,

bool is64Bit = Subtarget->is64Bit();

DebugLoc dl = N.getDebugLoc();

DEBUG({

- errs() << "MatchAddress: ";

+ dbgs() << "MatchAddress: ";

AM.dump();

});

// Limit recursion.

@@ -1268,7 +1273,7 @@ bool X86DAGToDAGISel::MatchAddressBase(SDValue N, X86ISelAddressMode &AM) {

/// SelectAddr - returns true if it is able pattern match an addressing mode.

/// It returns the operands which make up the maximal addressing mode it can

/// match by reference.

-bool X86DAGToDAGISel::SelectAddr(SDValue Op, SDValue N, SDValue &Base,

+bool X86DAGToDAGISel::SelectAddr(SDNode *Op, SDValue N, SDValue &Base,

SDValue &Scale, SDValue &Index,

SDValue &Disp, SDValue &Segment) {

X86ISelAddressMode AM;

@@ -1291,7 +1296,7 @@ bool X86DAGToDAGISel::SelectAddr(SDValue Op, SDValue N, SDValue &Base,

/// SelectScalarSSELoad - Match a scalar SSE load. In particular, we want to

/// match a load whose top elements are either undef or zeros. The load flavor

/// is derived from the type of N, which is either v4f32 or v2f64.

-bool X86DAGToDAGISel::SelectScalarSSELoad(SDValue Op, SDValue Pred,

+bool X86DAGToDAGISel::SelectScalarSSELoad(SDNode *Op, SDValue Pred,

SDValue N, SDValue &Base,

SDValue &Scale, SDValue &Index,

SDValue &Disp, SDValue &Segment,

@@ -1302,7 +1307,7 @@ bool X86DAGToDAGISel::SelectScalarSSELoad(SDValue Op, SDValue Pred,

if (ISD::isNON_EXTLoad(InChain.getNode()) &&

InChain.getValue(0).hasOneUse() &&

N.hasOneUse() &&

- IsLegalAndProfitableToFold(N.getNode(), Pred.getNode(), Op.getNode())) {

+ IsLegalAndProfitableToFold(N.getNode(), Pred.getNode(), Op)) {

LoadSDNode *LD = cast<LoadSDNode>(InChain);

if (!SelectAddr(Op, LD->getBasePtr(), Base, Scale, Index, Disp, Segment))

return false;

@@ -1333,7 +1338,7 @@ bool X86DAGToDAGISel::SelectScalarSSELoad(SDValue Op, SDValue Pred,

/// SelectLEAAddr - it calls SelectAddr and determines if the maximal addressing

/// mode it matches can be cost effectively emitted as an LEA instruction.

-bool X86DAGToDAGISel::SelectLEAAddr(SDValue Op, SDValue N,

+bool X86DAGToDAGISel::SelectLEAAddr(SDNode *Op, SDValue N,

SDValue &Base, SDValue &Scale,

SDValue &Index, SDValue &Disp) {

X86ISelAddressMode AM;

@@ -1395,10 +1400,10 @@ bool X86DAGToDAGISel::SelectLEAAddr(SDValue Op, SDValue N,

}

/// SelectTLSADDRAddr - This is only run on TargetGlobalTLSAddress nodes.

-bool X86DAGToDAGISel::SelectTLSADDRAddr(SDValue Op, SDValue N, SDValue &Base,

+bool X86DAGToDAGISel::SelectTLSADDRAddr(SDNode *Op, SDValue N, SDValue &Base,

SDValue &Scale, SDValue &Index,

SDValue &Disp) {

- assert(Op.getOpcode() == X86ISD::TLSADDR);

+ assert(Op->getOpcode() == X86ISD::TLSADDR);

assert(N.getOpcode() == ISD::TargetGlobalTLSAddress);

const GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(N);

@@ -1421,13 +1426,13 @@ bool X86DAGToDAGISel::SelectTLSADDRAddr(SDValue Op, SDValue N, SDValue &Base,

}

-bool X86DAGToDAGISel::TryFoldLoad(SDValue P, SDValue N,

+bool X86DAGToDAGISel::TryFoldLoad(SDNode *P, SDValue N,

SDValue &Base, SDValue &Scale,

SDValue &Index, SDValue &Disp,

SDValue &Segment) {

if (ISD::isNON_EXTLoad(N.getNode()) &&

N.hasOneUse() &&

- IsLegalAndProfitableToFold(N.getNode(), P.getNode(), P.getNode()))

+ IsLegalAndProfitableToFold(N.getNode(), P, P))

return SelectAddr(P, N.getOperand(1), Base, Scale, Index, Disp, Segment);

return false;

}

@@ -1454,7 +1459,7 @@ SDNode *X86DAGToDAGISel::SelectAtomic64(SDNode *Node, unsigned Opc) {

SDValue In2L = Node->getOperand(2);

SDValue In2H = Node->getOperand(3);

SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4;

- if (!SelectAddr(In1, In1, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4))

+ if (!SelectAddr(In1.getNode(), In1, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4))

return NULL;

MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);

MemOp[0] = cast<MemSDNode>(Node)->getMemOperand();

@@ -1480,7 +1485,7 @@ SDNode *X86DAGToDAGISel::SelectAtomicLoadAdd(SDNode *Node, EVT NVT) {

SDValue Ptr = Node->getOperand(1);

SDValue Val = Node->getOperand(2);

SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4;

- if (!SelectAddr(Ptr, Ptr, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4))

+ if (!SelectAddr(Ptr.getNode(), Ptr, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4))

return 0;

bool isInc = false, isDec = false, isSub = false, isCN = false;

@@ -1678,8 +1683,7 @@ static bool HasNoSignedComparisonUses(SDNode *N) {

return true;

}

-SDNode *X86DAGToDAGISel::Select(SDValue N) {

- SDNode *Node = N.getNode();

+SDNode *X86DAGToDAGISel::Select(SDNode *Node) {

EVT NVT = Node->getValueType(0);

unsigned Opc, MOpc;

unsigned Opcode = Node->getOpcode();

@@ -1687,9 +1691,9 @@ SDNode *X86DAGToDAGISel::Select(SDValue N) {

#ifndef NDEBUG

DEBUG({

- errs() << std::string(Indent, ' ') << "Selecting: ";

+ dbgs() << std::string(Indent, ' ') << "Selecting: ";

Node->dump(CurDAG);

- errs() << '\n';

+ dbgs() << '\n';

});

Indent += 2;

#endif

@@ -1697,9 +1701,9 @@ SDNode *X86DAGToDAGISel::Select(SDValue N) {

if (Node->isMachineOpcode()) {

#ifndef NDEBUG

DEBUG({

- errs() << std::string(Indent-2, ' ') << "== ";

+ dbgs() << std::string(Indent-2, ' ') << "== ";

Node->dump(CurDAG);

- errs() << '\n';

+ dbgs() << '\n';

});

Indent -= 2;

#endif

@@ -1767,10 +1771,10 @@ SDNode *X86DAGToDAGISel::Select(SDValue N) {

}

SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4;

- bool foldedLoad = TryFoldLoad(N, N1, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4);

+ bool foldedLoad = TryFoldLoad(Node, N1, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4);

// Multiply is commmutative.

if (!foldedLoad) {

- foldedLoad = TryFoldLoad(N, N0, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4);

+ foldedLoad = TryFoldLoad(Node, N0, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4);

if (foldedLoad)

std::swap(N0, N1);

}

@@ -1793,21 +1797,21 @@ SDNode *X86DAGToDAGISel::Select(SDValue N) {

}

// Copy the low half of the result, if it is needed.

- if (!N.getValue(0).use_empty()) {

+ if (!SDValue(Node, 0).use_empty()) {

SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,

LoReg, NVT, InFlag);

InFlag = Result.getValue(2);

- ReplaceUses(N.getValue(0), Result);

+ ReplaceUses(SDValue(Node, 0), Result);

#ifndef NDEBUG

DEBUG({

- errs() << std::string(Indent-2, ' ') << "=> ";

+ dbgs() << std::string(Indent-2, ' ') << "=> ";

Result.getNode()->dump(CurDAG);

- errs() << '\n';

+ dbgs() << '\n';

});

#endif

}

// Copy the high half of the result, if it is needed.

- if (!N.getValue(1).use_empty()) {

+ if (!SDValue(Node, 1).use_empty()) {

SDValue Result;

if (HiReg == X86::AH && Subtarget->is64Bit()) {

// Prevent use of AH in a REX instruction by referencing AX instead.

@@ -1826,12 +1830,12 @@ SDNode *X86DAGToDAGISel::Select(SDValue N) {

HiReg, NVT, InFlag);

InFlag = Result.getValue(2);

}

- ReplaceUses(N.getValue(1), Result);

+ ReplaceUses(SDValue(Node, 1), Result);

#ifndef NDEBUG

DEBUG({

- errs() << std::string(Indent-2, ' ') << "=> ";

+ dbgs() << std::string(Indent-2, ' ') << "=> ";

Result.getNode()->dump(CurDAG);

- errs() << '\n';

+ dbgs() << '\n';

});

#endif

}

@@ -1869,7 +1873,6 @@ SDNode *X86DAGToDAGISel::Select(SDValue N) {

unsigned LoReg, HiReg, ClrReg;

unsigned ClrOpcode, SExtOpcode;

- EVT ClrVT = NVT;

switch (NVT.getSimpleVT().SimpleTy) {

default: llvm_unreachable("Unsupported VT!");

case MVT::i8:

@@ -1879,7 +1882,7 @@ SDNode *X86DAGToDAGISel::Select(SDValue N) {

break;

case MVT::i16:

LoReg = X86::AX; HiReg = X86::DX;

- ClrOpcode = X86::MOV32r0; ClrReg = X86::EDX; ClrVT = MVT::i32;

+ ClrOpcode = X86::MOV16r0; ClrReg = X86::DX;

SExtOpcode = X86::CWD;

break;

case MVT::i32:

@@ -1889,13 +1892,13 @@ SDNode *X86DAGToDAGISel::Select(SDValue N) {

break;

case MVT::i64:

LoReg = X86::RAX; ClrReg = HiReg = X86::RDX;

- ClrOpcode = ~0U; // NOT USED.

+ ClrOpcode = X86::MOV64r0;

SExtOpcode = X86::CQO;

break;

}

SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4;

- bool foldedLoad = TryFoldLoad(N, N1, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4);

+ bool foldedLoad = TryFoldLoad(Node, N1, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4);

bool signBitIsZero = CurDAG->SignBitIsZero(N0);

SDValue InFlag;

@@ -1903,7 +1906,7 @@ SDNode *X86DAGToDAGISel::Select(SDValue N) {

// Special case for div8, just use a move with zero extension to AX to

// clear the upper 8 bits (AH).

SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, Move, Chain;

- if (TryFoldLoad(N, N0, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4)) {

+ if (TryFoldLoad(Node, N0, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4)) {

SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, N0.getOperand(0) };

Move =

SDValue(CurDAG->getMachineNode(X86::MOVZX16rm8, dl, MVT::i16,

@@ -1928,24 +1931,8 @@ SDNode *X86DAGToDAGISel::Select(SDValue N) {

SDValue(CurDAG->getMachineNode(SExtOpcode, dl, MVT::Flag, InFlag),0);

} else {

// Zero out the high part, effectively zero extending the input.

- SDValue ClrNode;

- if (NVT.getSimpleVT() == MVT::i64) {

- ClrNode = SDValue(CurDAG->getMachineNode(X86::MOV32r0, dl, MVT::i32),

- 0);

- // We just did a 32-bit clear, insert it into a 64-bit register to

- // clear the whole 64-bit reg.

- SDValue Zero = CurDAG->getTargetConstant(0, MVT::i64);

- SDValue SubRegNo =

- CurDAG->getTargetConstant(X86::SUBREG_32BIT, MVT::i32);

- ClrNode =

- SDValue(CurDAG->getMachineNode(TargetInstrInfo::SUBREG_TO_REG, dl,

- MVT::i64, Zero, ClrNode, SubRegNo),

- 0);

- } else {

- ClrNode = SDValue(CurDAG->getMachineNode(ClrOpcode, dl, ClrVT), 0);

- }

+ SDValue ClrNode =

+ SDValue(CurDAG->getMachineNode(ClrOpcode, dl, NVT), 0);

InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, ClrReg,

ClrNode, InFlag).getValue(1);

}

@@ -1966,21 +1953,21 @@ SDNode *X86DAGToDAGISel::Select(SDValue N) {

}

// Copy the division (low) result, if it is needed.

- if (!N.getValue(0).use_empty()) {

+ if (!SDValue(Node, 0).use_empty()) {

SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,

LoReg, NVT, InFlag);

InFlag = Result.getValue(2);

- ReplaceUses(N.getValue(0), Result);

+ ReplaceUses(SDValue(Node, 0), Result);

#ifndef NDEBUG

DEBUG({

- errs() << std::string(Indent-2, ' ') << "=> ";

+ dbgs() << std::string(Indent-2, ' ') << "=> ";

Result.getNode()->dump(CurDAG);

- errs() << '\n';

+ dbgs() << '\n';

});

#endif

}

// Copy the remainder (high) result, if it is needed.

- if (!N.getValue(1).use_empty()) {

+ if (!SDValue(Node, 1).use_empty()) {

SDValue Result;

if (HiReg == X86::AH && Subtarget->is64Bit()) {

// Prevent use of AH in a REX instruction by referencing AX instead.

@@ -2000,12 +1987,12 @@ SDNode *X86DAGToDAGISel::Select(SDValue N) {

HiReg, NVT, InFlag);

InFlag = Result.getValue(2);

}

- ReplaceUses(N.getValue(1), Result);

+ ReplaceUses(SDValue(Node, 1), Result);

#ifndef NDEBUG

DEBUG({

- errs() << std::string(Indent-2, ' ') << "=> ";

+ dbgs() << std::string(Indent-2, ' ') << "=> ";

Result.getNode()->dump(CurDAG);

- errs() << '\n';

+ dbgs() << '\n';

});

#endif

}

@@ -2124,16 +2111,16 @@ SDNode *X86DAGToDAGISel::Select(SDValue N) {

}

- SDNode *ResNode = SelectCode(N);

+ SDNode *ResNode = SelectCode(Node);

#ifndef NDEBUG

DEBUG({

- errs() << std::string(Indent-2, ' ') << "=> ";

- if (ResNode == NULL || ResNode == N.getNode())

- N.getNode()->dump(CurDAG);

+ dbgs() << std::string(Indent-2, ' ') << "=> ";

+ if (ResNode == NULL || ResNode == Node)

+ Node->dump(CurDAG);

else

ResNode->dump(CurDAG);

- errs() << '\n';

+ dbgs() << '\n';

});

Indent -= 2;

#endif

@@ -2150,7 +2137,7 @@ SelectInlineAsmMemoryOperand(const SDValue &Op, char ConstraintCode,

case 'v': // not offsetable ??

default: return true;

case 'm': // memory

- if (!SelectAddr(Op, Op, Op0, Op1, Op2, Op3, Op4))

+ if (!SelectAddr(Op.getNode(), Op, Op0, Op1, Op2, Op3, Op4))

return true;

break;

}

diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
index c722fbf648b4..228ec9f2d63d 100644
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp

@@ -978,6 +978,7 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)

setTargetDAGCombine(ISD::SHL);

setTargetDAGCombine(ISD::SRA);

setTargetDAGCombine(ISD::SRL);

+ setTargetDAGCombine(ISD::OR);

setTargetDAGCombine(ISD::STORE);

setTargetDAGCombine(ISD::MEMBARRIER);

setTargetDAGCombine(ISD::ZERO_EXTEND);

@@ -2077,10 +2078,10 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee,

assert(((Callee.getOpcode() == ISD::Register &&

(cast<RegisterSDNode>(Callee)->getReg() == X86::EAX ||

- cast<RegisterSDNode>(Callee)->getReg() == X86::R9)) ||

+ cast<RegisterSDNode>(Callee)->getReg() == X86::R11)) ||

Callee.getOpcode() == ISD::TargetExternalSymbol ||

Callee.getOpcode() == ISD::TargetGlobalAddress) &&

- "Expecting an global address, external symbol, or register");

+ "Expecting a global address, external symbol, or scratch register");

return DAG.getNode(X86ISD::TC_RETURN, dl,

NodeTys, &Ops[0], Ops.size());

@@ -5610,13 +5611,21 @@ SDValue X86TargetLowering::EmitTest(SDValue Op, unsigned X86CC,

// because a TEST instruction will be better.

bool NonFlagUse = false;

for (SDNode::use_iterator UI = Op.getNode()->use_begin(),

- UE = Op.getNode()->use_end(); UI != UE; ++UI)

- if (UI->getOpcode() != ISD::BRCOND &&

- UI->getOpcode() != ISD::SELECT &&

- UI->getOpcode() != ISD::SETCC) {

+ UE = Op.getNode()->use_end(); UI != UE; ++UI) {

+ SDNode *User = *UI;

+ unsigned UOpNo = UI.getOperandNo();

+ if (User->getOpcode() == ISD::TRUNCATE && User->hasOneUse()) {

+ // Look pass truncate.

+ UOpNo = User->use_begin().getOperandNo();

+ User = *User->use_begin();

+ }

+ if (User->getOpcode() != ISD::BRCOND &&

+ User->getOpcode() != ISD::SETCC &&

+ (User->getOpcode() != ISD::SELECT || UOpNo != 0)) {

NonFlagUse = true;

break;

}

+ }

if (!NonFlagUse)

break;

}

@@ -5680,6 +5689,56 @@ SDValue X86TargetLowering::EmitCmp(SDValue Op0, SDValue Op1, unsigned X86CC,

return DAG.getNode(X86ISD::CMP, dl, MVT::i32, Op0, Op1);

}

+/// LowerToBT - Result of 'and' is compared against zero. Turn it into a BT node

+/// if it's possible.

+static SDValue LowerToBT(SDValue Op0, ISD::CondCode CC,

+ DebugLoc dl, SelectionDAG &DAG) {

+ SDValue LHS, RHS;

+ if (Op0.getOperand(1).getOpcode() == ISD::SHL) {

+ if (ConstantSDNode *Op010C =

+ dyn_cast<ConstantSDNode>(Op0.getOperand(1).getOperand(0)))

+ if (Op010C->getZExtValue() == 1) {

+ LHS = Op0.getOperand(0);

+ RHS = Op0.getOperand(1).getOperand(1);

+ }

+ } else if (Op0.getOperand(0).getOpcode() == ISD::SHL) {

+ if (ConstantSDNode *Op000C =

+ dyn_cast<ConstantSDNode>(Op0.getOperand(0).getOperand(0)))

+ if (Op000C->getZExtValue() == 1) {

+ LHS = Op0.getOperand(1);

+ RHS = Op0.getOperand(0).getOperand(1);

+ }

+ } else if (Op0.getOperand(1).getOpcode() == ISD::Constant) {

+ ConstantSDNode *AndRHS = cast<ConstantSDNode>(Op0.getOperand(1));

+ SDValue AndLHS = Op0.getOperand(0);

+ if (AndRHS->getZExtValue() == 1 && AndLHS.getOpcode() == ISD::SRL) {

+ LHS = AndLHS.getOperand(0);

+ RHS = AndLHS.getOperand(1);

+ }

+ if (LHS.getNode()) {

+ // If LHS is i8, promote it to i16 with any_extend. There is no i8 BT

+ // instruction. Since the shift amount is in-range-or-undefined, we know

+ // that doing a bittest on the i16 value is ok. We extend to i32 because

+ // the encoding for the i16 version is larger than the i32 version.

+ if (LHS.getValueType() == MVT::i8)

+ LHS = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, LHS);

+ // If the operand types disagree, extend the shift amount to match. Since

+ // BT ignores high bits (like shifts) we can use anyextend.

+ if (LHS.getValueType() != RHS.getValueType())

+ RHS = DAG.getNode(ISD::ANY_EXTEND, dl, LHS.getValueType(), RHS);

+ SDValue BT = DAG.getNode(X86ISD::BT, dl, MVT::i32, LHS, RHS);

+ unsigned Cond = CC == ISD::SETEQ ? X86::COND_AE : X86::COND_B;

+ return DAG.getNode(X86ISD::SETCC, dl, MVT::i8,

+ DAG.getConstant(Cond, MVT::i8), BT);

+ }

+ return SDValue();

SDValue X86TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) {

assert(Op.getValueType() == MVT::i8 && "SetCC type must be 8-bit integer");

SDValue Op0 = Op.getOperand(0);

@@ -5687,6 +5746,7 @@ SDValue X86TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) {

DebugLoc dl = Op.getDebugLoc();

ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();

+ // Optimize to BT if possible.

// Lower (X & (1 << N)) == 0 to BT(X, N).

// Lower ((X >>u N) & 1) != 0 to BT(X, N).

// Lower ((X >>s N) & 1) != 0 to BT(X, N).

@@ -5695,48 +5755,9 @@ SDValue X86TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) {

Op1.getOpcode() == ISD::Constant &&

cast<ConstantSDNode>(Op1)->getZExtValue() == 0 &&

(CC == ISD::SETEQ || CC == ISD::SETNE)) {

- SDValue LHS, RHS;

- if (Op0.getOperand(1).getOpcode() == ISD::SHL) {

- if (ConstantSDNode *Op010C =

- dyn_cast<ConstantSDNode>(Op0.getOperand(1).getOperand(0)))

- if (Op010C->getZExtValue() == 1) {

- LHS = Op0.getOperand(0);

- RHS = Op0.getOperand(1).getOperand(1);

- }

- } else if (Op0.getOperand(0).getOpcode() == ISD::SHL) {

- if (ConstantSDNode *Op000C =

- dyn_cast<ConstantSDNode>(Op0.getOperand(0).getOperand(0)))

- if (Op000C->getZExtValue() == 1) {

- LHS = Op0.getOperand(1);

- RHS = Op0.getOperand(0).getOperand(1);

- }

- } else if (Op0.getOperand(1).getOpcode() == ISD::Constant) {

- ConstantSDNode *AndRHS = cast<ConstantSDNode>(Op0.getOperand(1));

- SDValue AndLHS = Op0.getOperand(0);

- if (AndRHS->getZExtValue() == 1 && AndLHS.getOpcode() == ISD::SRL) {

- LHS = AndLHS.getOperand(0);

- RHS = AndLHS.getOperand(1);

- }

- if (LHS.getNode()) {

- // If LHS is i8, promote it to i16 with any_extend. There is no i8 BT

- // instruction. Since the shift amount is in-range-or-undefined, we know

- // that doing a bittest on the i16 value is ok. We extend to i32 because

- // the encoding for the i16 version is larger than the i32 version.

- if (LHS.getValueType() == MVT::i8)

- LHS = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, LHS);

- // If the operand types disagree, extend the shift amount to match. Since

- // BT ignores high bits (like shifts) we can use anyextend.

- if (LHS.getValueType() != RHS.getValueType())

- RHS = DAG.getNode(ISD::ANY_EXTEND, dl, LHS.getValueType(), RHS);

- SDValue BT = DAG.getNode(X86ISD::BT, dl, MVT::i32, LHS, RHS);

- unsigned Cond = CC == ISD::SETEQ ? X86::COND_AE : X86::COND_B;

- return DAG.getNode(X86ISD::SETCC, dl, MVT::i8,

- DAG.getConstant(Cond, MVT::i8), BT);

- }

+ SDValue NewSetCC = LowerToBT(Op0, CC, dl, DAG);

+ if (NewSetCC.getNode())

+ return NewSetCC;

}

bool isFP = Op.getOperand(1).getValueType().isFloatingPoint();

@@ -5936,6 +5957,23 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) {

}

if (addTest) {

+ // Look pass the truncate.

+ if (Cond.getOpcode() == ISD::TRUNCATE)

+ Cond = Cond.getOperand(0);

+ // We know the result of AND is compared against zero. Try to match

+ // it to BT.

+ if (Cond.getOpcode() == ISD::AND && Cond.hasOneUse()) {

+ SDValue NewSetCC = LowerToBT(Cond, ISD::SETNE, dl, DAG);

+ if (NewSetCC.getNode()) {

+ CC = NewSetCC.getOperand(0);

+ Cond = NewSetCC.getOperand(1);

+ addTest = false;

+ }

+ if (addTest) {

CC = DAG.getConstant(X86::COND_NE, MVT::i8);

Cond = EmitTest(Cond, X86::COND_NE, DAG);

}

@@ -6093,6 +6131,23 @@ SDValue X86TargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) {

}

if (addTest) {

+ // Look pass the truncate.

+ if (Cond.getOpcode() == ISD::TRUNCATE)

+ Cond = Cond.getOperand(0);

+ // We know the result of AND is compared against zero. Try to match

+ // it to BT.

+ if (Cond.getOpcode() == ISD::AND && Cond.hasOneUse()) {

+ SDValue NewSetCC = LowerToBT(Cond, ISD::SETNE, dl, DAG);

+ if (NewSetCC.getNode()) {

+ CC = NewSetCC.getOperand(0);

+ Cond = NewSetCC.getOperand(1);

+ addTest = false;

+ }

+ if (addTest) {

CC = DAG.getConstant(X86::COND_NE, MVT::i8);

Cond = EmitTest(Cond, X86::COND_NE, DAG);

}

@@ -7524,8 +7579,7 @@ bool X86TargetLowering::isTruncateFree(EVT VT1, EVT VT2) const {

bool X86TargetLowering::isZExtFree(const Type *Ty1, const Type *Ty2) const {

// x86-64 implicitly zero-extends 32-bit results in 64-bit registers.

- return Ty1 == Type::getInt32Ty(Ty1->getContext()) &&

- Ty2 == Type::getInt64Ty(Ty1->getContext()) && Subtarget->is64Bit();

+ return Ty1->isInteger(64) && Ty2->isInteger(64) && Subtarget->is64Bit();

}

bool X86TargetLowering::isZExtFree(EVT VT1, EVT VT2) const {

@@ -7749,7 +7803,7 @@ X86TargetLowering::EmitAtomicBit6432WithCustomInserter(MachineInstr *bInstr,

for (int i=0; i < 2 + X86AddrNumOperands; ++i)

argOpers[i] = &bInstr->getOperand(i+2);

- // x86 address has 4 operands: base, index, scale, and displacement

+ // x86 address has 5 operands: base, index, scale, displacement, and segment.

int lastAddrIndx = X86AddrNumOperands - 1; // [0,3]

unsigned t1 = F->getRegInfo().createVirtualRegister(RC);

@@ -7777,14 +7831,16 @@ X86TargetLowering::EmitAtomicBit6432WithCustomInserter(MachineInstr *bInstr,

BuildMI(newMBB, dl, TII->get(X86::PHI), dest2Oper.getReg())

.addReg(t2).addMBB(thisMBB).addReg(t4).addMBB(newMBB);

- unsigned tt1 = F->getRegInfo().createVirtualRegister(RC);

- unsigned tt2 = F->getRegInfo().createVirtualRegister(RC);

+ // The subsequent operations should be using the destination registers of

+ //the PHI instructions.

if (invSrc) {

- MIB = BuildMI(newMBB, dl, TII->get(NotOpc), tt1).addReg(t1);

- MIB = BuildMI(newMBB, dl, TII->get(NotOpc), tt2).addReg(t2);

+ t1 = F->getRegInfo().createVirtualRegister(RC);

+ t2 = F->getRegInfo().createVirtualRegister(RC);

+ MIB = BuildMI(newMBB, dl, TII->get(NotOpc), t1).addReg(dest1Oper.getReg());

+ MIB = BuildMI(newMBB, dl, TII->get(NotOpc), t2).addReg(dest2Oper.getReg());

} else {

- tt1 = t1;

- tt2 = t2;

+ t1 = dest1Oper.getReg();

+ t2 = dest2Oper.getReg();

}

int valArgIndx = lastAddrIndx + 1;

@@ -7798,7 +7854,7 @@ X86TargetLowering::EmitAtomicBit6432WithCustomInserter(MachineInstr *bInstr,

else

MIB = BuildMI(newMBB, dl, TII->get(immOpcL), t5);

if (regOpcL != X86::MOV32rr)

- MIB.addReg(tt1);

+ MIB.addReg(t1);

(*MIB).addOperand(*argOpers[valArgIndx]);

assert(argOpers[valArgIndx + 1]->isReg() ==

argOpers[valArgIndx]->isReg());

@@ -7809,7 +7865,7 @@ X86TargetLowering::EmitAtomicBit6432WithCustomInserter(MachineInstr *bInstr,

else

MIB = BuildMI(newMBB, dl, TII->get(immOpcH), t6);

if (regOpcH != X86::MOV32rr)

- MIB.addReg(tt2);

+ MIB.addReg(t2);

(*MIB).addOperand(*argOpers[valArgIndx + 1]);

MIB = BuildMI(newMBB, dl, TII->get(copyOpc), X86::EAX);

@@ -9108,6 +9164,64 @@ static SDValue PerformShiftCombine(SDNode* N, SelectionDAG &DAG,

return SDValue();

}

+static SDValue PerformOrCombine(SDNode *N, SelectionDAG &DAG,

+ const X86Subtarget *Subtarget) {

+ EVT VT = N->getValueType(0);

+ if (VT != MVT::i64 || !Subtarget->is64Bit())

+ return SDValue();

+ // fold (or (x << c) | (y >> (64 - c))) ==> (shld64 x, y, c)

+ SDValue N0 = N->getOperand(0);

+ SDValue N1 = N->getOperand(1);

+ if (N0.getOpcode() == ISD::SRL && N1.getOpcode() == ISD::SHL)

+ std::swap(N0, N1);

+ if (N0.getOpcode() != ISD::SHL || N1.getOpcode() != ISD::SRL)

+ return SDValue();

+ SDValue ShAmt0 = N0.getOperand(1);

+ if (ShAmt0.getValueType() != MVT::i8)

+ return SDValue();

+ SDValue ShAmt1 = N1.getOperand(1);

+ if (ShAmt1.getValueType() != MVT::i8)

+ return SDValue();

+ if (ShAmt0.getOpcode() == ISD::TRUNCATE)

+ ShAmt0 = ShAmt0.getOperand(0);

+ if (ShAmt1.getOpcode() == ISD::TRUNCATE)

+ ShAmt1 = ShAmt1.getOperand(0);

+ DebugLoc DL = N->getDebugLoc();

+ unsigned Opc = X86ISD::SHLD;

+ SDValue Op0 = N0.getOperand(0);

+ SDValue Op1 = N1.getOperand(0);

+ if (ShAmt0.getOpcode() == ISD::SUB) {

+ Opc = X86ISD::SHRD;

+ std::swap(Op0, Op1);

+ std::swap(ShAmt0, ShAmt1);

+ }

+ if (ShAmt1.getOpcode() == ISD::SUB) {

+ SDValue Sum = ShAmt1.getOperand(0);

+ if (ConstantSDNode *SumC = dyn_cast<ConstantSDNode>(Sum)) {

+ if (SumC->getSExtValue() == 64 &&

+ ShAmt1.getOperand(1) == ShAmt0)

+ return DAG.getNode(Opc, DL, VT,

+ Op0, Op1,

+ DAG.getNode(ISD::TRUNCATE, DL,

+ MVT::i8, ShAmt0));

+ }

+ } else if (ConstantSDNode *ShAmt1C = dyn_cast<ConstantSDNode>(ShAmt1)) {

+ ConstantSDNode *ShAmt0C = dyn_cast<ConstantSDNode>(ShAmt0);

+ if (ShAmt0C &&

+ ShAmt0C->getSExtValue() + ShAmt1C->getSExtValue() == 64)

+ return DAG.getNode(Opc, DL, VT,

+ N0.getOperand(0), N1.getOperand(0),

+ DAG.getNode(ISD::TRUNCATE, DL,

+ MVT::i8, ShAmt0));

+ }

+ return SDValue();

/// PerformSTORECombine - Do target-specific dag combines on STORE nodes.

static SDValue PerformSTORECombine(SDNode *N, SelectionDAG &DAG,

const X86Subtarget *Subtarget) {

@@ -9370,6 +9484,7 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,

case ISD::SHL:

case ISD::SRA:

case ISD::SRL: return PerformShiftCombine(N, DAG, Subtarget);

+ case ISD::OR: return PerformOrCombine(N, DAG, Subtarget);

case ISD::STORE: return PerformSTORECombine(N, DAG, Subtarget);

case X86ISD::FXOR:

case X86ISD::FOR: return PerformFORCombine(N, DAG);

@@ -9423,7 +9538,7 @@ bool X86TargetLowering::ExpandInlineAsm(CallInst *CI) const {

std::string AsmStr = IA->getAsmString();

// TODO: should remove alternatives from the asmstring: "foo {a|b}" -> "foo a"

- std::vector<std::string> AsmPieces;

+ SmallVector<StringRef, 4> AsmPieces;

SplitString(AsmStr, AsmPieces, "\n"); // ; as separator?

switch (AsmPieces.size()) {

@@ -9445,7 +9560,7 @@ bool X86TargetLowering::ExpandInlineAsm(CallInst *CI) const {

return LowerToBSwap(CI);

}

// rorw $$8, ${0:w} --> llvm.bswap.i16

- if (CI->getType() == Type::getInt16Ty(CI->getContext()) &&

+ if (CI->getType()->isInteger(16) &&

AsmPieces.size() == 3 &&

AsmPieces[0] == "rorw" &&

AsmPieces[1] == "$$8," &&

@@ -9455,12 +9570,12 @@ bool X86TargetLowering::ExpandInlineAsm(CallInst *CI) const {

}

break;

case 3:

- if (CI->getType() == Type::getInt64Ty(CI->getContext()) &&

+ if (CI->getType()->isInteger(64) &&

Constraints.size() >= 2 &&

Constraints[0].Codes.size() == 1 && Constraints[0].Codes[0] == "A" &&

Constraints[1].Codes.size() == 1 && Constraints[1].Codes[0] == "0") {

// bswap %eax / bswap %edx / xchgl %eax, %edx -> llvm.bswap.i64

- std::vector<std::string> Words;

+ SmallVector<StringRef, 4> Words;

SplitString(AsmPieces[0], Words, " \t");

if (Words.size() == 2 && Words[0] == "bswap" && Words[1] == "%eax") {

Words.clear();

diff --git a/lib/Target/X86/X86Instr64bit.td b/lib/Target/X86/X86Instr64bit.td
index 65fbbdae9a7f..08e1dd1e060d 100644
--- a/lib/Target/X86/X86Instr64bit.td
+++ b/lib/Target/X86/X86Instr64bit.td

@@ -1106,13 +1106,13 @@ def OR64rm : RI<0x0B, MRMSrcMem , (outs GR64:$dst),

def OR64ri8 : RIi8<0x83, MRM1r, (outs GR64:$dst),

(ins GR64:$src1, i64i8imm:$src2),

"or{q}\t{$src2, $dst|$dst, $src2}",

- [(set GR64:$dst, (or GR64:$src1, i64immSExt8:$src2)),

- (implicit EFLAGS)]>;

+ [(set GR64:$dst, (or GR64:$src1, i64immSExt8:$src2)),

+ (implicit EFLAGS)]>;

def OR64ri32 : RIi32<0x81, MRM1r, (outs GR64:$dst),

(ins GR64:$src1, i64i32imm:$src2),

"or{q}\t{$src2, $dst|$dst, $src2}",

- [(set GR64:$dst, (or GR64:$src1, i64immSExt32:$src2)),

- (implicit EFLAGS)]>;

+ [(set GR64:$dst, (or GR64:$src1, i64immSExt32:$src2)),

+ (implicit EFLAGS)]>;

} // isTwoAddress

def OR64mr : RI<0x09, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src),

@@ -1598,17 +1598,21 @@ def SLDT64m : RI<0x00, MRM0m, (outs i16mem:$dst), (ins),

// Alias Instructions

//===----------------------------------------------------------------------===//

-// Alias instructions that map movr0 to xor. Use xorl instead of xorq; it's

-// equivalent due to implicit zero-extending, and it sometimes has a smaller

-// encoding.

+// We want to rewrite MOV64r0 in terms of MOV32r0, because it's sometimes a

+// smaller encoding, but doing so at isel time interferes with rematerialization

+// in the current register allocator. For now, this is rewritten when the

+// instruction is lowered to an MCInst.

// FIXME: AddedComplexity gives this a higher priority than MOV64ri32. Remove

// when we have a better way to specify isel priority.

-let AddedComplexity = 1 in

-def : Pat<(i64 0),

- (SUBREG_TO_REG (i64 0), (MOV32r0), x86_subreg_32bit)>;

-// Materialize i64 constant where top 32-bits are zero.

+let Defs = [EFLAGS],

+ AddedComplexity = 1, isReMaterializable = 1, isAsCheapAsAMove = 1 in

+def MOV64r0 : I<0x31, MRMInitReg, (outs GR64:$dst), (ins),

+ "",

+ [(set GR64:$dst, 0)]>;

+// Materialize i64 constant where top 32-bits are zero. This could theoretically

+// use MOV32ri with a SUBREG_TO_REG to represent the zero-extension, however

+// that would make it more difficult to rematerialize.

let AddedComplexity = 1, isReMaterializable = 1, isAsCheapAsAMove = 1 in

def MOV64ri64i32 : Ii32<0xB8, AddRegFrm, (outs GR64:$dst), (ins i64i32imm:$src),

"", [(set GR64:$dst, i64immZExt32:$src)]>;

@@ -1683,6 +1687,7 @@ def CMPXCHG64rr : RI<0xB1, MRMDestReg, (outs GR64:$dst), (ins GR64:$src),

def CMPXCHG64rm : RI<0xB1, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src),

"cmpxchg{q}\t{$src, $dst|$dst, $src}", []>, TB;

+let Defs = [RAX, RDX, EFLAGS], Uses = [RAX, RBX, RCX, RDX] in

def CMPXCHG16B : RI<0xC7, MRM1m, (outs), (ins i128mem:$dst),

"cmpxchg16b\t$dst", []>, TB;

@@ -1962,6 +1967,17 @@ def : Pat<(add GR64:$src1, 0x0000000080000000),

def : Pat<(store (add (loadi64 addr:$dst), 0x00000000800000000), addr:$dst),

(SUB64mi32 addr:$dst, 0xffffffff80000000)>;

+// Use a 32-bit and with implicit zero-extension instead of a 64-bit and if it

+// has an immediate with at least 32 bits of leading zeros, to avoid needing to

+// materialize that immediate in a register first.

+def : Pat<(and GR64:$src, i64immZExt32:$imm),

+ (SUBREG_TO_REG

+ (i64 0),

+ (AND32ri

+ (EXTRACT_SUBREG GR64:$src, x86_subreg_32bit),

+ imm:$imm),

+ x86_subreg_32bit)>;

// r & (2^32-1) ==> movz

def : Pat<(and GR64:$src, 0x00000000FFFFFFFF),

(MOVZX64rr32 (EXTRACT_SUBREG GR64:$src, x86_subreg_32bit))>;

@@ -2028,7 +2044,7 @@ def : Pat<(and (srl_su GR32:$src, (i8 8)), (i32 255)),

(EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR32:$src, GR32_ABCD)),

x86_subreg_8bit_hi))>,

Requires<[In64BitMode]>;

-def : Pat<(srl_su GR16:$src, (i8 8)),

+def : Pat<(srl GR16:$src, (i8 8)),

(EXTRACT_SUBREG

(MOVZX32_NOREXrr8

(EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)),

@@ -2098,24 +2114,7 @@ def : Pat<(sra GR64:$src1, (and CL:$amt, 63)),

def : Pat<(store (sra (loadi64 addr:$dst), (and CL:$amt, 63)), addr:$dst),

(SAR64mCL addr:$dst)>;

-// (or (x >> c) | (y << (64 - c))) ==> (shrd64 x, y, c)

-def : Pat<(or (srl GR64:$src1, CL:$amt),

- (shl GR64:$src2, (sub 64, CL:$amt))),

- (SHRD64rrCL GR64:$src1, GR64:$src2)>;

-def : Pat<(store (or (srl (loadi64 addr:$dst), CL:$amt),

- (shl GR64:$src2, (sub 64, CL:$amt))), addr:$dst),

- (SHRD64mrCL addr:$dst, GR64:$src2)>;

-def : Pat<(or (srl GR64:$src1, (i8 (trunc RCX:$amt))),

- (shl GR64:$src2, (i8 (trunc (sub 64, RCX:$amt))))),

- (SHRD64rrCL GR64:$src1, GR64:$src2)>;

-def : Pat<(store (or (srl (loadi64 addr:$dst), (i8 (trunc RCX:$amt))),

- (shl GR64:$src2, (i8 (trunc (sub 64, RCX:$amt))))),

- addr:$dst),

- (SHRD64mrCL addr:$dst, GR64:$src2)>;

+// Double shift patterns

def : Pat<(shrd GR64:$src1, (i8 imm:$amt1), GR64:$src2, (i8 imm:$amt2)),

(SHRD64rri8 GR64:$src1, GR64:$src2, (i8 imm:$amt1))>;

@@ -2123,24 +2122,6 @@ def : Pat<(store (shrd (loadi64 addr:$dst), (i8 imm:$amt1),

GR64:$src2, (i8 imm:$amt2)), addr:$dst),

(SHRD64mri8 addr:$dst, GR64:$src2, (i8 imm:$amt1))>;

-// (or (x << c) | (y >> (64 - c))) ==> (shld64 x, y, c)

-def : Pat<(or (shl GR64:$src1, CL:$amt),

- (srl GR64:$src2, (sub 64, CL:$amt))),

- (SHLD64rrCL GR64:$src1, GR64:$src2)>;

-def : Pat<(store (or (shl (loadi64 addr:$dst), CL:$amt),

- (srl GR64:$src2, (sub 64, CL:$amt))), addr:$dst),

- (SHLD64mrCL addr:$dst, GR64:$src2)>;

-def : Pat<(or (shl GR64:$src1, (i8 (trunc RCX:$amt))),

- (srl GR64:$src2, (i8 (trunc (sub 64, RCX:$amt))))),

- (SHLD64rrCL GR64:$src1, GR64:$src2)>;

-def : Pat<(store (or (shl (loadi64 addr:$dst), (i8 (trunc RCX:$amt))),

- (srl GR64:$src2, (i8 (trunc (sub 64, RCX:$amt))))),

- addr:$dst),

- (SHLD64mrCL addr:$dst, GR64:$src2)>;

def : Pat<(shld GR64:$src1, (i8 imm:$amt1), GR64:$src2, (i8 imm:$amt2)),

(SHLD64rri8 GR64:$src1, GR64:$src2, (i8 imm:$amt1))>;

@@ -2148,6 +2129,19 @@ def : Pat<(store (shld (loadi64 addr:$dst), (i8 imm:$amt1),

GR64:$src2, (i8 imm:$amt2)), addr:$dst),

(SHLD64mri8 addr:$dst, GR64:$src2, (i8 imm:$amt1))>;

+// (or x1, x2) -> (add x1, x2) if two operands are known not to share bits.

+let AddedComplexity = 5 in { // Try this before the selecting to OR

+def : Pat<(parallel (or_is_add GR64:$src1, i64immSExt8:$src2),

+ (implicit EFLAGS)),

+ (ADD64ri8 GR64:$src1, i64immSExt8:$src2)>;

+def : Pat<(parallel (or_is_add GR64:$src1, i64immSExt32:$src2),

+ (implicit EFLAGS)),

+ (ADD64ri32 GR64:$src1, i64immSExt32:$src2)>;

+def : Pat<(parallel (or_is_add GR64:$src1, GR64:$src2),

+ (implicit EFLAGS)),

+ (ADD64rr GR64:$src1, GR64:$src2)>;

+} // AddedComplexity

// X86 specific add which produces a flag.

def : Pat<(addc GR64:$src1, GR64:$src2),

(ADD64rr GR64:$src1, GR64:$src2)>;

diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp
index e555cd176cdf..7b39fb311cba 100644
--- a/lib/Target/X86/X86InstrInfo.cpp
+++ b/lib/Target/X86/X86InstrInfo.cpp

@@ -28,6 +28,7 @@

#include "llvm/CodeGen/LiveVariables.h"

#include "llvm/CodeGen/PseudoSourceValue.h"

#include "llvm/Support/CommandLine.h"

+#include "llvm/Support/Debug.h"

#include "llvm/Support/ErrorHandling.h"

#include "llvm/Support/raw_ostream.h"

#include "llvm/Target/TargetOptions.h"

@@ -711,6 +712,62 @@ bool X86InstrInfo::isMoveInstr(const MachineInstr& MI,

}

+bool

+X86InstrInfo::isCoalescableExtInstr(const MachineInstr &MI,

+ unsigned &SrcReg, unsigned &DstReg,

+ unsigned &SubIdx) const {

+ switch (MI.getOpcode()) {

+ default: break;

+ case X86::MOVSX16rr8:

+ case X86::MOVZX16rr8:

+ case X86::MOVSX32rr8:

+ case X86::MOVZX32rr8:

+ case X86::MOVSX64rr8:

+ case X86::MOVZX64rr8:

+ if (!TM.getSubtarget<X86Subtarget>().is64Bit())

+ // It's not always legal to reference the low 8-bit of the larger

+ // register in 32-bit mode.

+ return false;

+ case X86::MOVSX32rr16:

+ case X86::MOVZX32rr16:

+ case X86::MOVSX64rr16:

+ case X86::MOVZX64rr16:

+ case X86::MOVSX64rr32:

+ case X86::MOVZX64rr32: {

+ if (MI.getOperand(0).getSubReg() || MI.getOperand(1).getSubReg())

+ // Be conservative.

+ return false;

+ SrcReg = MI.getOperand(1).getReg();

+ DstReg = MI.getOperand(0).getReg();

+ switch (MI.getOpcode()) {

+ default:

+ llvm_unreachable(0);

+ break;

+ case X86::MOVSX16rr8:

+ case X86::MOVZX16rr8:

+ case X86::MOVSX32rr8:

+ case X86::MOVZX32rr8:

+ case X86::MOVSX64rr8:

+ case X86::MOVZX64rr8:

+ SubIdx = 1;

+ break;

+ case X86::MOVSX32rr16:

+ case X86::MOVZX32rr16:

+ case X86::MOVSX64rr16:

+ case X86::MOVZX64rr16:

+ SubIdx = 3;

+ break;

+ case X86::MOVSX64rr32:

+ case X86::MOVZX64rr32:

+ SubIdx = 4;

+ break;

+ }

+ return true;

+ }

+ return false;

/// isFrameOperand - Return true and the FrameIndex if the specified

/// operand and follow operands form a reference to the stack frame.

bool X86InstrInfo::isFrameOperand(const MachineInstr *MI, unsigned int Op,

@@ -1018,12 +1075,16 @@ void X86InstrInfo::reMaterialize(MachineBasicBlock &MBB,

switch (Opc) {

default: break;

case X86::MOV8r0:

- case X86::MOV32r0: {

+ case X86::MOV16r0:

+ case X86::MOV32r0:

+ case X86::MOV64r0: {

if (!isSafeToClobberEFLAGS(MBB, I)) {

switch (Opc) {

default: break;

case X86::MOV8r0: Opc = X86::MOV8ri; break;

+ case X86::MOV16r0: Opc = X86::MOV16ri; break;

case X86::MOV32r0: Opc = X86::MOV32ri; break;

+ case X86::MOV64r0: Opc = X86::MOV64ri; break;

}

Clone = false;

}

@@ -2290,8 +2351,12 @@ X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,

OpcodeTablePtr = &RegOp2MemOpTable2Addr;

isTwoAddrFold = true;

} else if (i == 0) { // If operand 0

- if (MI->getOpcode() == X86::MOV32r0)

+ if (MI->getOpcode() == X86::MOV64r0)

+ NewMI = MakeM0Inst(*this, X86::MOV64mi32, MOs, MI);

+ else if (MI->getOpcode() == X86::MOV32r0)

NewMI = MakeM0Inst(*this, X86::MOV32mi, MOs, MI);

+ else if (MI->getOpcode() == X86::MOV16r0)

+ NewMI = MakeM0Inst(*this, X86::MOV16mi, MOs, MI);

else if (MI->getOpcode() == X86::MOV8r0)

NewMI = MakeM0Inst(*this, X86::MOV8mi, MOs, MI);

if (NewMI)

@@ -2354,7 +2419,7 @@ X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,

// No fusion

if (PrintFailedFusing)

- errs() << "We failed to fuse operand " << i << " in " << *MI;

+ dbgs() << "We failed to fuse operand " << i << " in " << *MI;

return NULL;

}

@@ -2559,7 +2624,9 @@ bool X86InstrInfo::canFoldMemoryOperand(const MachineInstr *MI,

} else if (OpNum == 0) { // If operand 0

switch (Opc) {

case X86::MOV8r0:

+ case X86::MOV16r0:

case X86::MOV32r0:

+ case X86::MOV64r0:

return true;

default: break;

}

diff --git a/lib/Target/X86/X86InstrInfo.h b/lib/Target/X86/X86InstrInfo.h
index b83441d89eff..0ab85f4f45b2 100644
--- a/lib/Target/X86/X86InstrInfo.h
+++ b/lib/Target/X86/X86InstrInfo.h

@@ -448,6 +448,16 @@ public:

unsigned &SrcReg, unsigned &DstReg,

unsigned &SrcSubIdx, unsigned &DstSubIdx) const;

+ /// isCoalescableExtInstr - Return true if the instruction is a "coalescable"

+ /// extension instruction. That is, it's like a copy where it's legal for the

+ /// source to overlap the destination. e.g. X86::MOVSX64rr32. If this returns

+ /// true, then it's expected the pre-extension value is available as a subreg

+ /// of the result register. This also returns the sub-register index in

+ /// SubIdx.

+ virtual bool isCoalescableExtInstr(const MachineInstr &MI,

+ unsigned &SrcReg, unsigned &DstReg,

+ unsigned &SubIdx) const;

unsigned isLoadFromStackSlot(const MachineInstr *MI, int &FrameIndex) const;

/// isLoadFromStackSlotPostFE - Check for post-frame ptr elimination

/// stack locations as well. This uses a heuristic so it isn't

diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td
index 4d922a54ec2c..396cb53502ef 100644
--- a/lib/Target/X86/X86InstrInfo.td
+++ b/lib/Target/X86/X86InstrInfo.td

@@ -160,15 +160,21 @@ def X86ehret : SDNode<"X86ISD::EH_RETURN", SDT_X86EHRET,

def X86tcret : SDNode<"X86ISD::TC_RETURN", SDT_X86TCRET,

[SDNPHasChain, SDNPOptInFlag]>;

-def X86add_flag : SDNode<"X86ISD::ADD", SDTBinaryArithWithFlags>;

+def X86add_flag : SDNode<"X86ISD::ADD", SDTBinaryArithWithFlags,

+ [SDNPCommutative]>;

def X86sub_flag : SDNode<"X86ISD::SUB", SDTBinaryArithWithFlags>;

-def X86smul_flag : SDNode<"X86ISD::SMUL", SDTBinaryArithWithFlags>;

-def X86umul_flag : SDNode<"X86ISD::UMUL", SDTUnaryArithWithFlags>;

+def X86smul_flag : SDNode<"X86ISD::SMUL", SDTBinaryArithWithFlags,

+ [SDNPCommutative]>;

+def X86umul_flag : SDNode<"X86ISD::UMUL", SDTUnaryArithWithFlags,

+ [SDNPCommutative]>;

def X86inc_flag : SDNode<"X86ISD::INC", SDTUnaryArithWithFlags>;

def X86dec_flag : SDNode<"X86ISD::DEC", SDTUnaryArithWithFlags>;

-def X86or_flag : SDNode<"X86ISD::OR", SDTBinaryArithWithFlags>;

-def X86xor_flag : SDNode<"X86ISD::XOR", SDTBinaryArithWithFlags>;

-def X86and_flag : SDNode<"X86ISD::AND", SDTBinaryArithWithFlags>;

+def X86or_flag : SDNode<"X86ISD::OR", SDTBinaryArithWithFlags,

+ [SDNPCommutative]>;

+def X86xor_flag : SDNode<"X86ISD::XOR", SDTBinaryArithWithFlags,

+ [SDNPCommutative]>;

+def X86and_flag : SDNode<"X86ISD::AND", SDTBinaryArithWithFlags,

+ [SDNPCommutative]>;

def X86mul_imm : SDNode<"X86ISD::MUL_IMM", SDTIntBinOp>;

@@ -487,6 +493,21 @@ def trunc_su : PatFrag<(ops node:$src), (trunc node:$src), [{

return N->hasOneUse();

}]>;

+// Treat an 'or' node is as an 'add' if the or'ed bits are known to be zero.

+def or_is_add : PatFrag<(ops node:$lhs, node:$rhs), (or node:$lhs, node:$rhs),[{

+ if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N->getOperand(1)))

+ return CurDAG->MaskedValueIsZero(N->getOperand(0), CN->getAPIntValue());

+ else {

+ unsigned BitWidth = N->getValueType(0).getScalarType().getSizeInBits();

+ APInt Mask = APInt::getAllOnesValue(BitWidth);

+ APInt KnownZero0, KnownOne0;

+ CurDAG->ComputeMaskedBits(N->getOperand(0), Mask, KnownZero0, KnownOne0, 0);

+ APInt KnownZero1, KnownOne1;

+ CurDAG->ComputeMaskedBits(N->getOperand(1), Mask, KnownZero1, KnownOne1, 0);

+ return (~KnownZero0 & ~KnownZero1) == 0;

+ }

+}]>;

// 'shld' and 'shrd' instruction patterns. Note that even though these have

// the srl and shl in their patterns, the C++ code must still check for them,

// because predicates are tested before children nodes are explored.

@@ -3700,18 +3721,21 @@ let Defs = [EFLAGS], isReMaterializable = 1, isAsCheapAsAMove = 1,

def MOV8r0 : I<0x30, MRMInitReg, (outs GR8 :$dst), (ins),

"xor{b}\t$dst, $dst",

[(set GR8:$dst, 0)]>;

+// We want to rewrite MOV16r0 in terms of MOV32r0, because it's a smaller

+// encoding and avoids a partial-register update sometimes, but doing so

+// at isel time interferes with rematerialization in the current register

+// allocator. For now, this is rewritten when the instruction is lowered

+// to an MCInst.

+def MOV16r0 : I<0x31, MRMInitReg, (outs GR16:$dst), (ins),

+ "",

+ [(set GR16:$dst, 0)]>, OpSize;

def MOV32r0 : I<0x31, MRMInitReg, (outs GR32:$dst), (ins),

"xor{l}\t$dst, $dst",

[(set GR32:$dst, 0)]>;

}

-// Use xorl instead of xorw since we don't care about the high 16 bits,

-// it's smaller, and it avoids a partial-register update.

-let AddedComplexity = 1 in

-def : Pat<(i16 0),

- (EXTRACT_SUBREG (MOV32r0), x86_subreg_16bit)>;

//===----------------------------------------------------------------------===//

// Thread Local Storage Instructions

@@ -3792,7 +3816,7 @@ def LCMPXCHG32 : I<0xB1, MRMDestMem, (outs), (ins i32mem:$ptr, GR32:$swap),

[(X86cas addr:$ptr, GR32:$swap, 4)]>, TB, LOCK;

}

let Defs = [EAX, EDX, EFLAGS], Uses = [EAX, EBX, ECX, EDX] in {

-def LCMPXCHG8B : I<0xC7, MRM1m, (outs), (ins i32mem:$ptr),

+def LCMPXCHG8B : I<0xC7, MRM1m, (outs), (ins i64mem:$ptr),

"lock\n\t"

"cmpxchg8b\t$ptr",

[(X86cas8 addr:$ptr)]>, TB, LOCK;

@@ -3858,6 +3882,7 @@ def CMPXCHG16rm : I<0xB1, MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src),

def CMPXCHG32rm : I<0xB1, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src),

"cmpxchg{l}\t{$src, $dst|$dst, $src}", []>, TB;

+let Defs = [EAX, EDX, EFLAGS], Uses = [EAX, EBX, ECX, EDX] in

def CMPXCHG8B : I<0xC7, MRM1m, (outs), (ins i64mem:$dst),

"cmpxchg8b\t$dst", []>, TB;

@@ -4466,7 +4491,7 @@ def : Pat<(i8 (trunc (srl_su GR32:$src, (i8 8)))),

(EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR32:$src, GR32_ABCD)),

x86_subreg_8bit_hi)>,

Requires<[In32BitMode]>;

-def : Pat<(srl_su GR16:$src, (i8 8)),

+def : Pat<(srl GR16:$src, (i8 8)),

(EXTRACT_SUBREG

(MOVZX32rr8

(EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)),

@@ -4640,6 +4665,28 @@ def : Pat<(i16 (anyext (i8 (X86setcc_c X86_COND_B, EFLAGS)))),

def : Pat<(i32 (anyext (i8 (X86setcc_c X86_COND_B, EFLAGS)))),

(SETB_C32r)>;

+// (or x1, x2) -> (add x1, x2) if two operands are known not to share bits.

+let AddedComplexity = 5 in { // Try this before the selecting to OR

+def : Pat<(parallel (or_is_add GR16:$src1, imm:$src2),

+ (implicit EFLAGS)),

+ (ADD16ri GR16:$src1, imm:$src2)>;

+def : Pat<(parallel (or_is_add GR32:$src1, imm:$src2),

+ (implicit EFLAGS)),

+ (ADD32ri GR32:$src1, imm:$src2)>;

+def : Pat<(parallel (or_is_add GR16:$src1, i16immSExt8:$src2),

+ (implicit EFLAGS)),

+ (ADD16ri8 GR16:$src1, i16immSExt8:$src2)>;

+def : Pat<(parallel (or_is_add GR32:$src1, i32immSExt8:$src2),

+ (implicit EFLAGS)),

+ (ADD32ri8 GR32:$src1, i32immSExt8:$src2)>;

+def : Pat<(parallel (or_is_add GR16:$src1, GR16:$src2),

+ (implicit EFLAGS)),

+ (ADD16rr GR16:$src1, GR16:$src2)>;

+def : Pat<(parallel (or_is_add GR32:$src1, GR32:$src2),

+ (implicit EFLAGS)),

+ (ADD32rr GR32:$src1, GR32:$src2)>;

+} // AddedComplexity

//===----------------------------------------------------------------------===//

// EFLAGS-defining Patterns

//===----------------------------------------------------------------------===//

diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td
index b26e50869205..94b9b5543066 100644
--- a/lib/Target/X86/X86InstrSSE.td
+++ b/lib/Target/X86/X86InstrSSE.td

@@ -131,11 +131,13 @@ def alignedloadv2i64 : PatFrag<(ops node:$ptr),

// Like 'load', but uses special alignment checks suitable for use in

// memory operands in most SSE instructions, which are required to

-// be naturally aligned on some targets but not on others.

-// FIXME: Actually implement support for targets that don't require the

-// alignment. This probably wants a subtarget predicate.

+// be naturally aligned on some targets but not on others. If the subtarget

+// allows unaligned accesses, match any load, though this may require

+// setting a feature bit in the processor (on startup, for example).

+// Opteron 10h and later implement such a feature.

def memop : PatFrag<(ops node:$ptr), (load node:$ptr), [{

- return cast<LoadSDNode>(N)->getAlignment() >= 16;

+ return Subtarget->hasVectorUAMem()

+ || cast<LoadSDNode>(N)->getAlignment() >= 16;

}]>;

def memopfsf32 : PatFrag<(ops node:$ptr), (f32 (memop node:$ptr))>;

diff --git a/lib/Target/X86/X86JITInfo.cpp b/lib/Target/X86/X86JITInfo.cpp
index c69cc83df6bb..f363903d9316 100644
--- a/lib/Target/X86/X86JITInfo.cpp
+++ b/lib/Target/X86/X86JITInfo.cpp

@@ -348,7 +348,7 @@ X86CompilationCallback2(intptr_t *StackPtr, intptr_t RetAddr) {

#endif

#if 0

- DEBUG(errs() << "In callback! Addr=" << (void*)RetAddr

+ DEBUG(dbgs() << "In callback! Addr=" << (void*)RetAddr

<< " ESP=" << (void*)StackPtr

<< ": Resolving call to function: "

<< TheVM->getFunctionReferencedName((void*)RetAddr) << "\n");

diff --git a/lib/Target/X86/X86RegisterInfo.cpp b/lib/Target/X86/X86RegisterInfo.cpp
index d96aafda603a..9bd96af6c750 100644
--- a/lib/Target/X86/X86RegisterInfo.cpp
+++ b/lib/Target/X86/X86RegisterInfo.cpp

@@ -591,6 +591,15 @@ X86RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,

int FrameIndex = MI.getOperand(i).getIndex();

unsigned BasePtr;

+ // DEBUG_VALUE has a special representation, and is only robust enough to

+ // represent SP(or BP) +- offset addressing modes. We rewrite the

+ // FrameIndex to be a constant; implicitly positive constants are relative

+ // to ESP and negative ones to EBP.

+ if (MI.getOpcode()==TargetInstrInfo::DEBUG_VALUE) {

+ MI.getOperand(i).ChangeToImmediate(getFrameIndexOffset(MF, FrameIndex));

+ return 0;

+ }

if (needsStackRealignment(MF))

BasePtr = (FrameIndex < 0 ? FramePtr : StackPtr);

else

diff --git a/lib/Target/X86/X86Subtarget.cpp b/lib/Target/X86/X86Subtarget.cpp
index 75cdbada1b5a..2039be7c9b3d 100644
--- a/lib/Target/X86/X86Subtarget.cpp
+++ b/lib/Target/X86/X86Subtarget.cpp

@@ -286,6 +286,7 @@ X86Subtarget::X86Subtarget(const std::string &TT, const std::string &FS,

, HasFMA3(false)

, HasFMA4(false)

, IsBTMemSlow(false)

+ , HasVectorUAMem(false)

, DarwinVers(0)

, stackAlignment(8)

// FIXME: this is a known good value for Yonah. How about others?

@@ -317,7 +318,7 @@ X86Subtarget::X86Subtarget(const std::string &TT, const std::string &FS,

if (Is64Bit)

HasX86_64 = true;

- DEBUG(errs() << "Subtarget features: SSELevel " << X86SSELevel

+ DEBUG(dbgs() << "Subtarget features: SSELevel " << X86SSELevel

<< ", 3DNowLevel " << X863DNowLevel

<< ", 64bit " << HasX86_64 << "\n");

assert((!Is64Bit || HasX86_64) &&

diff --git a/lib/Target/X86/X86Subtarget.h b/lib/Target/X86/X86Subtarget.h
index ef6dbafac346..618dd102f32e 100644
--- a/lib/Target/X86/X86Subtarget.h
+++ b/lib/Target/X86/X86Subtarget.h

@@ -78,6 +78,10 @@ protected:

/// IsBTMemSlow - True if BT (bit test) of memory instructions are slow.

bool IsBTMemSlow;

+ /// HasVectorUAMem - True if SIMD operations can have unaligned memory operands.

+ /// This may require setting a feature bit in the processor.

+ bool HasVectorUAMem;

/// DarwinVers - Nonzero if this is a darwin platform: the numeric

/// version of the platform, e.g. 8 = 10.4 (Tiger), 9 = 10.5 (Leopard), etc.

unsigned char DarwinVers; // Is any darwin-x86 platform.

@@ -142,6 +146,7 @@ public:

bool hasFMA3() const { return HasFMA3; }

bool hasFMA4() const { return HasFMA4; }

bool isBTMemSlow() const { return IsBTMemSlow; }

+ bool hasVectorUAMem() const { return HasVectorUAMem; }

bool isTargetDarwin() const { return TargetType == isDarwin; }

bool isTargetELF() const { return TargetType == isELF; }

@@ -169,7 +174,7 @@ public:

p = "e-p:64:64-s:64-f64:64:64-i64:64:64-f80:128:128-n8:16:32:64";

else if (isTargetDarwin())

p = "e-p:32:32-f64:32:64-i64:32:64-f80:128:128-n8:16:32";

- else if (isTargetCygMing() || isTargetWindows())

+ else if (isTargetMingw() || isTargetWindows())

p = "e-p:32:32-f64:64:64-i64:64:64-f80:128:128-n8:16:32";

else

p = "e-p:32:32-f64:32:64-i64:32:64-f80:32:32-n8:16:32";