aboutsummaryrefslogtreecommitdiff
path: root/lib/Target/X86
diff options
context:
space:
mode:
authorDimitry Andric <dim@FreeBSD.org>2015-06-21 13:59:01 +0000
committerDimitry Andric <dim@FreeBSD.org>2015-06-21 13:59:01 +0000
commit3a0822f094b578157263e04114075ad7df81db41 (patch)
treebc48361fe2cd1ca5f93ac01b38b183774468fc79 /lib/Target/X86
parent85d8b2bbe386bcfe669575d05b61482d7be07e5d (diff)
downloadsrc-3a0822f094b578157263e04114075ad7df81db41.tar.gz
src-3a0822f094b578157263e04114075ad7df81db41.zip
Vendor import of llvm trunk r240225:vendor/llvm/llvm-trunk-r240225
Notes
Notes: svn path=/vendor/llvm/dist/; revision=284677 svn path=/vendor/llvm/llvm-trunk-r240225/; revision=284678; tag=vendor/llvm/llvm-trunk-r240225
Diffstat (limited to 'lib/Target/X86')
-rw-r--r--lib/Target/X86/AsmParser/X86AsmInstrumentation.cpp2
-rw-r--r--lib/Target/X86/AsmParser/X86AsmInstrumentation.h2
-rw-r--r--lib/Target/X86/AsmParser/X86AsmParser.cpp51
-rw-r--r--lib/Target/X86/Disassembler/X86Disassembler.cpp2
-rw-r--r--lib/Target/X86/InstPrinter/X86ATTInstPrinter.h2
-rw-r--r--lib/Target/X86/InstPrinter/X86IntelInstPrinter.h2
-rw-r--r--lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp10
-rw-r--r--lib/Target/X86/MCTargetDesc/X86BaseInfo.h8
-rw-r--r--lib/Target/X86/MCTargetDesc/X86ELFObjectWriter.cpp2
-rw-r--r--lib/Target/X86/MCTargetDesc/X86ELFRelocationInfo.cpp3
-rw-r--r--lib/Target/X86/MCTargetDesc/X86FixupKinds.h4
-rw-r--r--lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp18
-rw-r--r--lib/Target/X86/MCTargetDesc/X86MCTargetDesc.h22
-rw-r--r--lib/Target/X86/MCTargetDesc/X86MachObjectWriter.cpp4
-rw-r--r--lib/Target/X86/MCTargetDesc/X86WinCOFFObjectWriter.cpp2
-rw-r--r--lib/Target/X86/MCTargetDesc/X86WinCOFFStreamer.cpp2
-rw-r--r--lib/Target/X86/Utils/X86ShuffleDecode.cpp2
-rw-r--r--lib/Target/X86/Utils/X86ShuffleDecode.h2
-rw-r--r--lib/Target/X86/X86.h2
-rw-r--r--lib/Target/X86/X86AsmPrinter.cpp11
-rw-r--r--lib/Target/X86/X86AsmPrinter.h6
-rw-r--r--lib/Target/X86/X86CallFrameOptimization.cpp2
-rw-r--r--lib/Target/X86/X86CallingConv.h2
-rw-r--r--lib/Target/X86/X86ExpandPseudo.cpp14
-rw-r--r--lib/Target/X86/X86FixupLEAs.cpp4
-rw-r--r--lib/Target/X86/X86FloatingPoint.cpp4
-rw-r--r--lib/Target/X86/X86FrameLowering.cpp495
-rw-r--r--lib/Target/X86/X86FrameLowering.h64
-rw-r--r--lib/Target/X86/X86ISelDAGToDAG.cpp4
-rw-r--r--lib/Target/X86/X86ISelLowering.cpp349
-rw-r--r--lib/Target/X86/X86ISelLowering.h16
-rw-r--r--lib/Target/X86/X86InstrAVX512.td318
-rw-r--r--lib/Target/X86/X86InstrBuilder.h2
-rw-r--r--lib/Target/X86/X86InstrFragmentsSIMD.td10
-rw-r--r--lib/Target/X86/X86InstrInfo.cpp338
-rw-r--r--lib/Target/X86/X86InstrInfo.h62
-rw-r--r--lib/Target/X86/X86InstrSSE.td15
-rw-r--r--lib/Target/X86/X86IntrinsicsInfo.h61
-rw-r--r--lib/Target/X86/X86MCInstLower.cpp102
-rw-r--r--lib/Target/X86/X86MachineFunctionInfo.h2
-rw-r--r--lib/Target/X86/X86PadShortFunction.cpp2
-rw-r--r--lib/Target/X86/X86RegisterInfo.cpp18
-rw-r--r--lib/Target/X86/X86RegisterInfo.h4
-rw-r--r--lib/Target/X86/X86SelectionDAGInfo.h2
-rw-r--r--lib/Target/X86/X86Subtarget.cpp5
-rw-r--r--lib/Target/X86/X86Subtarget.h7
-rw-r--r--lib/Target/X86/X86TargetMachine.cpp21
-rw-r--r--lib/Target/X86/X86TargetMachine.h6
-rw-r--r--lib/Target/X86/X86TargetTransformInfo.cpp20
-rw-r--r--lib/Target/X86/X86VZeroUpper.cpp2
-rw-r--r--lib/Target/X86/X86WinEHState.cpp99
51 files changed, 1477 insertions, 732 deletions
diff --git a/lib/Target/X86/AsmParser/X86AsmInstrumentation.cpp b/lib/Target/X86/AsmParser/X86AsmInstrumentation.cpp
index 9eee4a0f3d82..6ba897b8636d 100644
--- a/lib/Target/X86/AsmParser/X86AsmInstrumentation.cpp
+++ b/lib/Target/X86/AsmParser/X86AsmInstrumentation.cpp
@@ -1080,4 +1080,4 @@ CreateX86AsmInstrumentation(const MCTargetOptions &MCOptions,
return new X86AsmInstrumentation(STI);
}
-} // End llvm namespace
+} // namespace llvm
diff --git a/lib/Target/X86/AsmParser/X86AsmInstrumentation.h b/lib/Target/X86/AsmParser/X86AsmInstrumentation.h
index 19ebcc44f61e..341fc81c0480 100644
--- a/lib/Target/X86/AsmParser/X86AsmInstrumentation.h
+++ b/lib/Target/X86/AsmParser/X86AsmInstrumentation.h
@@ -61,6 +61,6 @@ protected:
unsigned InitialFrameReg;
};
-} // End llvm namespace
+} // namespace llvm
#endif
diff --git a/lib/Target/X86/AsmParser/X86AsmParser.cpp b/lib/Target/X86/AsmParser/X86AsmParser.cpp
index e8965710f022..418f0431e1d8 100644
--- a/lib/Target/X86/AsmParser/X86AsmParser.cpp
+++ b/lib/Target/X86/AsmParser/X86AsmParser.cpp
@@ -42,15 +42,16 @@ namespace {
static const char OpPrecedence[] = {
0, // IC_OR
- 1, // IC_AND
- 2, // IC_LSHIFT
- 2, // IC_RSHIFT
- 3, // IC_PLUS
- 3, // IC_MINUS
- 4, // IC_MULTIPLY
- 4, // IC_DIVIDE
- 5, // IC_RPAREN
- 6, // IC_LPAREN
+ 1, // IC_XOR
+ 2, // IC_AND
+ 3, // IC_LSHIFT
+ 3, // IC_RSHIFT
+ 4, // IC_PLUS
+ 4, // IC_MINUS
+ 5, // IC_MULTIPLY
+ 5, // IC_DIVIDE
+ 6, // IC_RPAREN
+ 7, // IC_LPAREN
0, // IC_IMM
0 // IC_REGISTER
};
@@ -70,6 +71,7 @@ private:
enum InfixCalculatorTok {
IC_OR = 0,
+ IC_XOR,
IC_AND,
IC_LSHIFT,
IC_RSHIFT,
@@ -204,6 +206,12 @@ private:
Val = Op1.second | Op2.second;
OperandStack.push_back(std::make_pair(IC_IMM, Val));
break;
+ case IC_XOR:
+ assert(Op1.first == IC_IMM && Op2.first == IC_IMM &&
+ "Xor operation with an immediate and a register!");
+ Val = Op1.second ^ Op2.second;
+ OperandStack.push_back(std::make_pair(IC_IMM, Val));
+ break;
case IC_AND:
assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
"And operation with an immediate and a register!");
@@ -232,6 +240,7 @@ private:
enum IntelExprState {
IES_OR,
+ IES_XOR,
IES_AND,
IES_LSHIFT,
IES_RSHIFT,
@@ -297,6 +306,21 @@ private:
}
PrevState = CurrState;
}
+ void onXor() {
+ IntelExprState CurrState = State;
+ switch (State) {
+ default:
+ State = IES_ERROR;
+ break;
+ case IES_INTEGER:
+ case IES_RPAREN:
+ case IES_REGISTER:
+ State = IES_XOR;
+ IC.pushOperator(IC_XOR);
+ break;
+ }
+ PrevState = CurrState;
+ }
void onAnd() {
IntelExprState CurrState = State;
switch (State) {
@@ -473,6 +497,7 @@ private:
case IES_MINUS:
case IES_NOT:
case IES_OR:
+ case IES_XOR:
case IES_AND:
case IES_LSHIFT:
case IES_RSHIFT:
@@ -496,7 +521,7 @@ private:
PrevState == IES_LSHIFT || PrevState == IES_RSHIFT ||
PrevState == IES_MULTIPLY || PrevState == IES_DIVIDE ||
PrevState == IES_LPAREN || PrevState == IES_LBRAC ||
- PrevState == IES_NOT) &&
+ PrevState == IES_NOT || PrevState == IES_XOR) &&
CurrState == IES_MINUS) {
// Unary minus. No need to pop the minus operand because it was never
// pushed.
@@ -506,7 +531,7 @@ private:
PrevState == IES_LSHIFT || PrevState == IES_RSHIFT ||
PrevState == IES_MULTIPLY || PrevState == IES_DIVIDE ||
PrevState == IES_LPAREN || PrevState == IES_LBRAC ||
- PrevState == IES_NOT) &&
+ PrevState == IES_NOT || PrevState == IES_XOR) &&
CurrState == IES_NOT) {
// Unary not. No need to pop the not operand because it was never
// pushed.
@@ -593,6 +618,7 @@ private:
case IES_MINUS:
case IES_NOT:
case IES_OR:
+ case IES_XOR:
case IES_AND:
case IES_LSHIFT:
case IES_RSHIFT:
@@ -605,7 +631,7 @@ private:
PrevState == IES_LSHIFT || PrevState == IES_RSHIFT ||
PrevState == IES_MULTIPLY || PrevState == IES_DIVIDE ||
PrevState == IES_LPAREN || PrevState == IES_LBRAC ||
- PrevState == IES_NOT) &&
+ PrevState == IES_NOT || PrevState == IES_XOR) &&
(CurrState == IES_MINUS || CurrState == IES_NOT)) {
State = IES_ERROR;
break;
@@ -1217,6 +1243,7 @@ bool X86AsmParser::ParseIntelExpression(IntelExprStateMachine &SM, SMLoc &End) {
case AsmToken::Star: SM.onStar(); break;
case AsmToken::Slash: SM.onDivide(); break;
case AsmToken::Pipe: SM.onOr(); break;
+ case AsmToken::Caret: SM.onXor(); break;
case AsmToken::Amp: SM.onAnd(); break;
case AsmToken::LessLess:
SM.onLShift(); break;
diff --git a/lib/Target/X86/Disassembler/X86Disassembler.cpp b/lib/Target/X86/Disassembler/X86Disassembler.cpp
index 6e99c37c2bc7..5b53fbef3f71 100644
--- a/lib/Target/X86/Disassembler/X86Disassembler.cpp
+++ b/lib/Target/X86/Disassembler/X86Disassembler.cpp
@@ -69,7 +69,7 @@ namespace X86 {
extern Target TheX86_32Target, TheX86_64Target;
-}
+} // namespace llvm
static bool translateInstruction(MCInst &target,
InternalInstruction &source,
diff --git a/lib/Target/X86/InstPrinter/X86ATTInstPrinter.h b/lib/Target/X86/InstPrinter/X86ATTInstPrinter.h
index 62b6b73e7864..ac484f317276 100644
--- a/lib/Target/X86/InstPrinter/X86ATTInstPrinter.h
+++ b/lib/Target/X86/InstPrinter/X86ATTInstPrinter.h
@@ -140,6 +140,6 @@ public:
private:
bool HasCustomInstComment;
};
-}
+} // namespace llvm
#endif
diff --git a/lib/Target/X86/InstPrinter/X86IntelInstPrinter.h b/lib/Target/X86/InstPrinter/X86IntelInstPrinter.h
index 6e371da37290..2bee518fed68 100644
--- a/lib/Target/X86/InstPrinter/X86IntelInstPrinter.h
+++ b/lib/Target/X86/InstPrinter/X86IntelInstPrinter.h
@@ -159,6 +159,6 @@ public:
}
};
-}
+} // namespace llvm
#endif
diff --git a/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp b/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp
index 1ac656d4614b..2d85f84d6669 100644
--- a/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp
+++ b/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp
@@ -426,7 +426,7 @@ namespace CU {
UNWIND_FRAMELESS_STACK_REG_PERMUTATION = 0x000003FF
};
-} // end CU namespace
+} // namespace CU
class DarwinX86AsmBackend : public X86AsmBackend {
const MCRegisterInfo &MRI;
@@ -790,10 +790,8 @@ public:
MCAsmBackend *llvm::createX86_32AsmBackend(const Target &T,
const MCRegisterInfo &MRI,
- StringRef TT,
+ const Triple &TheTriple,
StringRef CPU) {
- Triple TheTriple(TT);
-
if (TheTriple.isOSBinFormatMachO())
return new DarwinX86_32AsmBackend(T, MRI, CPU);
@@ -806,10 +804,8 @@ MCAsmBackend *llvm::createX86_32AsmBackend(const Target &T,
MCAsmBackend *llvm::createX86_64AsmBackend(const Target &T,
const MCRegisterInfo &MRI,
- StringRef TT,
+ const Triple &TheTriple,
StringRef CPU) {
- Triple TheTriple(TT);
-
if (TheTriple.isOSBinFormatMachO()) {
MachO::CPUSubTypeX86 CS =
StringSwitch<MachO::CPUSubTypeX86>(TheTriple.getArchName())
diff --git a/lib/Target/X86/MCTargetDesc/X86BaseInfo.h b/lib/Target/X86/MCTargetDesc/X86BaseInfo.h
index 85b00068252d..69e9c7b4a83e 100644
--- a/lib/Target/X86/MCTargetDesc/X86BaseInfo.h
+++ b/lib/Target/X86/MCTargetDesc/X86BaseInfo.h
@@ -41,7 +41,7 @@ namespace X86 {
/// AddrNumOperands - Total number of operands in a memory reference.
AddrNumOperands = 5
};
-} // end namespace X86;
+} // namespace X86
/// X86II - This namespace holds all of the target specific flags that
/// instruction info tracks.
@@ -271,7 +271,7 @@ namespace X86II {
/// register DI/EDI/ESI.
RawFrmDst = 9,
- /// RawFrmSrc - This form is for instructions that use the the source index
+ /// RawFrmSrc - This form is for instructions that use the source index
/// register SI/ESI/ERI with a possible segment override, and also the
/// destination index register DI/ESI/RDI.
RawFrmDstSrc = 10,
@@ -762,8 +762,8 @@ namespace X86II {
return (reg == X86::SPL || reg == X86::BPL ||
reg == X86::SIL || reg == X86::DIL);
}
-}
+} // namespace X86II
-} // end namespace llvm;
+} // namespace llvm
#endif
diff --git a/lib/Target/X86/MCTargetDesc/X86ELFObjectWriter.cpp b/lib/Target/X86/MCTargetDesc/X86ELFObjectWriter.cpp
index a33468dc4769..512afebf482e 100644
--- a/lib/Target/X86/MCTargetDesc/X86ELFObjectWriter.cpp
+++ b/lib/Target/X86/MCTargetDesc/X86ELFObjectWriter.cpp
@@ -28,7 +28,7 @@ namespace {
unsigned GetRelocType(const MCValue &Target, const MCFixup &Fixup,
bool IsPCRel) const override;
};
-}
+} // namespace
X86ELFObjectWriter::X86ELFObjectWriter(bool IsELF64, uint8_t OSABI,
uint16_t EMachine)
diff --git a/lib/Target/X86/MCTargetDesc/X86ELFRelocationInfo.cpp b/lib/Target/X86/MCTargetDesc/X86ELFRelocationInfo.cpp
index 2943dd383efa..7c09e5d59580 100644
--- a/lib/Target/X86/MCTargetDesc/X86ELFRelocationInfo.cpp
+++ b/lib/Target/X86/MCTargetDesc/X86ELFRelocationInfo.cpp
@@ -32,7 +32,8 @@ public:
StringRef SymName; SymI->getName(SymName);
uint64_t SymAddr; SymI->getAddress(SymAddr);
uint64_t SymSize = SymI->getSize();
- int64_t Addend; getELFRelocationAddend(Rel, Addend);
+ auto *Obj = cast<ELFObjectFileBase>(Rel.getObjectFile());
+ int64_t Addend = *Obj->getRelocationAddend(Rel.getRawDataRefImpl());
MCSymbol *Sym = Ctx.getOrCreateSymbol(SymName);
// FIXME: check that the value is actually the same.
diff --git a/lib/Target/X86/MCTargetDesc/X86FixupKinds.h b/lib/Target/X86/MCTargetDesc/X86FixupKinds.h
index 4899900dcef9..a523a32b2a2d 100644
--- a/lib/Target/X86/MCTargetDesc/X86FixupKinds.h
+++ b/lib/Target/X86/MCTargetDesc/X86FixupKinds.h
@@ -28,7 +28,7 @@ enum Fixups {
LastTargetFixupKind,
NumTargetFixupKinds = LastTargetFixupKind - FirstTargetFixupKind
};
-}
-}
+} // namespace X86
+} // namespace llvm
#endif
diff --git a/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp b/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp
index cc98e55dc695..431010d4cbc2 100644
--- a/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp
+++ b/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp
@@ -42,12 +42,11 @@ using namespace llvm;
#define GET_SUBTARGETINFO_MC_DESC
#include "X86GenSubtargetInfo.inc"
-std::string X86_MC::ParseX86Triple(StringRef TT) {
- Triple TheTriple(TT);
+std::string X86_MC::ParseX86Triple(const Triple &TT) {
std::string FS;
- if (TheTriple.getArch() == Triple::x86_64)
+ if (TT.getArch() == Triple::x86_64)
FS = "+64bit-mode,-32bit-mode,-16bit-mode";
- else if (TheTriple.getEnvironment() != Triple::CODE16)
+ else if (TT.getEnvironment() != Triple::CODE16)
FS = "-64bit-mode,+32bit-mode,-16bit-mode";
else
FS = "-64bit-mode,-32bit-mode,+16bit-mode";
@@ -55,7 +54,7 @@ std::string X86_MC::ParseX86Triple(StringRef TT) {
return FS;
}
-unsigned X86_MC::getDwarfRegFlavour(Triple TT, bool isEH) {
+unsigned X86_MC::getDwarfRegFlavour(const Triple &TT, bool isEH) {
if (TT.getArch() == Triple::x86_64)
return DWARFFlavour::X86_64;
@@ -75,8 +74,8 @@ void X86_MC::InitLLVM2SEHRegisterMapping(MCRegisterInfo *MRI) {
}
}
-MCSubtargetInfo *X86_MC::createX86MCSubtargetInfo(StringRef TT, StringRef CPU,
- StringRef FS) {
+MCSubtargetInfo *X86_MC::createX86MCSubtargetInfo(const Triple &TT,
+ StringRef CPU, StringRef FS) {
std::string ArchFS = X86_MC::ParseX86Triple(TT);
if (!FS.empty()) {
if (!ArchFS.empty())
@@ -219,15 +218,14 @@ static MCInstPrinter *createX86MCInstPrinter(const Triple &T,
return nullptr;
}
-static MCRelocationInfo *createX86MCRelocationInfo(StringRef TT,
+static MCRelocationInfo *createX86MCRelocationInfo(const Triple &TheTriple,
MCContext &Ctx) {
- Triple TheTriple(TT);
if (TheTriple.isOSBinFormatMachO() && TheTriple.getArch() == Triple::x86_64)
return createX86_64MachORelocationInfo(Ctx);
else if (TheTriple.isOSBinFormatELF())
return createX86_64ELFRelocationInfo(Ctx);
// Default to the stock relocation info.
- return llvm::createMCRelocationInfo(TT, Ctx);
+ return llvm::createMCRelocationInfo(TheTriple, Ctx);
}
static MCInstrAnalysis *createX86MCInstrAnalysis(const MCInstrInfo *Info) {
diff --git a/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.h b/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.h
index dcdae1dbc469..020803b57f76 100644
--- a/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.h
+++ b/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.h
@@ -52,26 +52,26 @@ namespace N86 {
}
namespace X86_MC {
- std::string ParseX86Triple(StringRef TT);
+std::string ParseX86Triple(const Triple &TT);
- unsigned getDwarfRegFlavour(Triple TT, bool isEH);
+unsigned getDwarfRegFlavour(const Triple &TT, bool isEH);
- void InitLLVM2SEHRegisterMapping(MCRegisterInfo *MRI);
+void InitLLVM2SEHRegisterMapping(MCRegisterInfo *MRI);
- /// Create a X86 MCSubtargetInfo instance. This is exposed so Asm parser, etc.
- /// do not need to go through TargetRegistry.
- MCSubtargetInfo *createX86MCSubtargetInfo(StringRef TT, StringRef CPU,
- StringRef FS);
-}
+/// Create a X86 MCSubtargetInfo instance. This is exposed so Asm parser, etc.
+/// do not need to go through TargetRegistry.
+MCSubtargetInfo *createX86MCSubtargetInfo(const Triple &TT, StringRef CPU,
+ StringRef FS);
+} // namespace X86_MC
MCCodeEmitter *createX86MCCodeEmitter(const MCInstrInfo &MCII,
const MCRegisterInfo &MRI,
MCContext &Ctx);
MCAsmBackend *createX86_32AsmBackend(const Target &T, const MCRegisterInfo &MRI,
- StringRef TT, StringRef CPU);
+ const Triple &TT, StringRef CPU);
MCAsmBackend *createX86_64AsmBackend(const Target &T, const MCRegisterInfo &MRI,
- StringRef TT, StringRef CPU);
+ const Triple &TT, StringRef CPU);
/// Construct an X86 Windows COFF machine code streamer which will generate
/// PE/COFF format object files.
@@ -98,7 +98,7 @@ MCRelocationInfo *createX86_64MachORelocationInfo(MCContext &Ctx);
/// Construct X86-64 ELF relocation info.
MCRelocationInfo *createX86_64ELFRelocationInfo(MCContext &Ctx);
-} // End llvm namespace
+} // namespace llvm
// Defines symbolic names for X86 registers. This defines a mapping from
diff --git a/lib/Target/X86/MCTargetDesc/X86MachObjectWriter.cpp b/lib/Target/X86/MCTargetDesc/X86MachObjectWriter.cpp
index 95acc07192da..773fbf41a7b1 100644
--- a/lib/Target/X86/MCTargetDesc/X86MachObjectWriter.cpp
+++ b/lib/Target/X86/MCTargetDesc/X86MachObjectWriter.cpp
@@ -69,7 +69,7 @@ public:
FixedValue);
}
};
-}
+} // namespace
static bool isFixupKindRIPRel(unsigned Kind) {
return Kind == X86::reloc_riprel_4byte ||
@@ -205,7 +205,7 @@ void X86MachObjectWriter::RecordX86_64Relocation(
if (Symbol->isTemporary() && Value) {
const MCSection &Sec = Symbol->getSection();
if (!Asm.getContext().getAsmInfo()->isSectionAtomizableBySymbols(Sec))
- Asm.addLocalUsedInReloc(*Symbol);
+ Symbol->setUsedInReloc();
}
RelSymbol = Asm.getAtom(*Symbol);
diff --git a/lib/Target/X86/MCTargetDesc/X86WinCOFFObjectWriter.cpp b/lib/Target/X86/MCTargetDesc/X86WinCOFFObjectWriter.cpp
index bd1bc9943b6d..7d262cdbf51d 100644
--- a/lib/Target/X86/MCTargetDesc/X86WinCOFFObjectWriter.cpp
+++ b/lib/Target/X86/MCTargetDesc/X86WinCOFFObjectWriter.cpp
@@ -31,7 +31,7 @@ namespace {
bool IsCrossSection,
const MCAsmBackend &MAB) const override;
};
-}
+} // namespace
X86WinCOFFObjectWriter::X86WinCOFFObjectWriter(bool Is64Bit)
: MCWinCOFFObjectTargetWriter(Is64Bit ? COFF::IMAGE_FILE_MACHINE_AMD64
diff --git a/lib/Target/X86/MCTargetDesc/X86WinCOFFStreamer.cpp b/lib/Target/X86/MCTargetDesc/X86WinCOFFStreamer.cpp
index 92f42b68ae51..dc6dd66bcd85 100644
--- a/lib/Target/X86/MCTargetDesc/X86WinCOFFStreamer.cpp
+++ b/lib/Target/X86/MCTargetDesc/X86WinCOFFStreamer.cpp
@@ -46,7 +46,7 @@ void X86WinCOFFStreamer::FinishImpl() {
MCWinCOFFStreamer::FinishImpl();
}
-}
+} // namespace
MCStreamer *llvm::createX86WinCOFFStreamer(MCContext &C, MCAsmBackend &AB,
raw_pwrite_stream &OS,
diff --git a/lib/Target/X86/Utils/X86ShuffleDecode.cpp b/lib/Target/X86/Utils/X86ShuffleDecode.cpp
index ef3318ba7580..1e7d94287c4a 100644
--- a/lib/Target/X86/Utils/X86ShuffleDecode.cpp
+++ b/lib/Target/X86/Utils/X86ShuffleDecode.cpp
@@ -431,4 +431,4 @@ void DecodeScalarMoveMask(MVT VT, bool IsLoad, SmallVectorImpl<int> &Mask) {
for (unsigned i = 1; i < NumElts; i++)
Mask.push_back(IsLoad ? static_cast<int>(SM_SentinelZero) : i);
}
-} // llvm namespace
+} // namespace llvm
diff --git a/lib/Target/X86/Utils/X86ShuffleDecode.h b/lib/Target/X86/Utils/X86ShuffleDecode.h
index 14b69434806e..0139297fc72d 100644
--- a/lib/Target/X86/Utils/X86ShuffleDecode.h
+++ b/lib/Target/X86/Utils/X86ShuffleDecode.h
@@ -100,6 +100,6 @@ void DecodeZeroMoveLowMask(MVT VT, SmallVectorImpl<int> &ShuffleMask);
/// \brief Decode a scalar float move instruction as a shuffle mask.
void DecodeScalarMoveMask(MVT VT, bool IsLoad,
SmallVectorImpl<int> &ShuffleMask);
-} // llvm namespace
+} // namespace llvm
#endif
diff --git a/lib/Target/X86/X86.h b/lib/Target/X86/X86.h
index 8403ae6101df..80f457984951 100644
--- a/lib/Target/X86/X86.h
+++ b/lib/Target/X86/X86.h
@@ -80,6 +80,6 @@ FunctionPass *createX86WinEHStatePass();
/// must run after prologue/epilogue insertion and before lowering
/// the MachineInstr to MC.
FunctionPass *createX86ExpandPseudoPass();
-} // End llvm namespace
+} // namespace llvm
#endif
diff --git a/lib/Target/X86/X86AsmPrinter.cpp b/lib/Target/X86/X86AsmPrinter.cpp
index 64fc6d0d7e5c..205140144ab5 100644
--- a/lib/Target/X86/X86AsmPrinter.cpp
+++ b/lib/Target/X86/X86AsmPrinter.cpp
@@ -511,7 +511,7 @@ bool X86AsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI,
}
void X86AsmPrinter::EmitStartOfAsmFile(Module &M) {
- Triple TT(TM.getTargetTriple());
+ const Triple &TT = TM.getTargetTriple();
if (TT.isOSBinFormatMachO())
OutStreamer->SwitchSection(getObjFileLowering().getTextSection());
@@ -585,7 +585,7 @@ void X86AsmPrinter::GenerateExportDirective(const MCSymbol *Sym, bool IsData) {
SmallString<128> Directive;
raw_svector_ostream OS(Directive);
StringRef Name = Sym->getName();
- Triple TT(TM.getTargetTriple());
+ const Triple &TT = TM.getTargetTriple();
if (TT.isKnownWindowsMSVCEnvironment())
OS << " /EXPORT:";
@@ -610,7 +610,7 @@ void X86AsmPrinter::GenerateExportDirective(const MCSymbol *Sym, bool IsData) {
}
void X86AsmPrinter::EmitEndOfAsmFile(Module &M) {
- Triple TT(TM.getTargetTriple());
+ const Triple &TT = TM.getTargetTriple();
if (TT.isOSBinFormatMachO()) {
// All darwin targets use mach-o.
@@ -674,6 +674,7 @@ void X86AsmPrinter::EmitEndOfAsmFile(Module &M) {
}
SM.serializeToStackMapSection();
+ FM.serializeToFaultMapSection();
// Funny Darwin hack: This flag tells the linker that no global symbols
// contain code that falls through to other global symbols (e.g. the obvious
@@ -726,8 +727,10 @@ void X86AsmPrinter::EmitEndOfAsmFile(Module &M) {
}
}
- if (TT.isOSBinFormatELF())
+ if (TT.isOSBinFormatELF()) {
SM.serializeToStackMapSection();
+ FM.serializeToFaultMapSection();
+ }
}
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/X86/X86AsmPrinter.h b/lib/Target/X86/X86AsmPrinter.h
index 3beeb1752bf5..acba21169c9c 100644
--- a/lib/Target/X86/X86AsmPrinter.h
+++ b/lib/Target/X86/X86AsmPrinter.h
@@ -12,6 +12,7 @@
#include "X86Subtarget.h"
#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/CodeGen/FaultMaps.h"
#include "llvm/CodeGen/StackMaps.h"
#include "llvm/Target/TargetMachine.h"
@@ -27,6 +28,7 @@ class MCSymbol;
class LLVM_LIBRARY_VISIBILITY X86AsmPrinter : public AsmPrinter {
const X86Subtarget *Subtarget;
StackMaps SM;
+ FaultMaps FM;
void GenerateExportDirective(const MCSymbol *Sym, bool IsData);
@@ -83,13 +85,15 @@ class LLVM_LIBRARY_VISIBILITY X86AsmPrinter : public AsmPrinter {
void LowerSTACKMAP(const MachineInstr &MI);
void LowerPATCHPOINT(const MachineInstr &MI, X86MCInstLower &MCIL);
void LowerSTATEPOINT(const MachineInstr &MI, X86MCInstLower &MCIL);
+ void LowerFAULTING_LOAD_OP(const MachineInstr &MI, X86MCInstLower &MCIL);
void LowerTlsAddr(X86MCInstLower &MCInstLowering, const MachineInstr &MI);
public:
explicit X86AsmPrinter(TargetMachine &TM,
std::unique_ptr<MCStreamer> Streamer)
- : AsmPrinter(TM, std::move(Streamer)), SM(*this), SMShadowTracker(TM) {}
+ : AsmPrinter(TM, std::move(Streamer)), SM(*this), FM(*this),
+ SMShadowTracker(TM) {}
const char *getPassName() const override {
return "X86 Assembly / Object Emitter";
diff --git a/lib/Target/X86/X86CallFrameOptimization.cpp b/lib/Target/X86/X86CallFrameOptimization.cpp
index 44121256ef00..6d6831b18b0a 100644
--- a/lib/Target/X86/X86CallFrameOptimization.cpp
+++ b/lib/Target/X86/X86CallFrameOptimization.cpp
@@ -99,7 +99,7 @@ private:
};
char X86CallFrameOptimization::ID = 0;
-}
+} // namespace
FunctionPass *llvm::createX86CallFrameOptimization() {
return new X86CallFrameOptimization();
diff --git a/lib/Target/X86/X86CallingConv.h b/lib/Target/X86/X86CallingConv.h
index 0eb2494f1d63..a377eb6051ae 100644
--- a/lib/Target/X86/X86CallingConv.h
+++ b/lib/Target/X86/X86CallingConv.h
@@ -42,7 +42,7 @@ inline bool CC_X86_AnyReg_Error(unsigned &, MVT &, MVT &,
return false;
}
-} // End llvm namespace
+} // namespace llvm
#endif
diff --git a/lib/Target/X86/X86ExpandPseudo.cpp b/lib/Target/X86/X86ExpandPseudo.cpp
index 1b00997e7504..6a5a28e546f2 100644
--- a/lib/Target/X86/X86ExpandPseudo.cpp
+++ b/lib/Target/X86/X86ExpandPseudo.cpp
@@ -84,19 +84,9 @@ bool X86ExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
int StackAdj = StackAdjust.getImm();
if (StackAdj) {
- bool Is64Bit = STI->is64Bit();
- // standard x86_64 and NaCl use 64-bit frame/stack pointers, x32 - 32-bit.
- const bool Uses64BitFramePtr =
- STI->isTarget64BitLP64() || STI->isTargetNaCl64();
- // Check if we should use LEA for SP.
- bool UseLEAForSP = STI->useLeaForSP() &&
- X86FL->canUseLEAForSPInEpilogue(*MBB.getParent());
- unsigned StackPtr = TRI->getStackRegister();
// Check for possible merge with preceding ADD instruction.
- StackAdj += X86FrameLowering::mergeSPUpdates(MBB, MBBI, StackPtr, true);
- X86FrameLowering::emitSPUpdate(MBB, MBBI, StackPtr, StackAdj, Is64Bit,
- Uses64BitFramePtr, UseLEAForSP, *TII,
- *TRI);
+ StackAdj += X86FL->mergeSPUpdates(MBB, MBBI, true);
+ X86FL->emitSPUpdate(MBB, MBBI, StackAdj, /*InEpilogue=*/true);
}
// Jump to label or value in register.
diff --git a/lib/Target/X86/X86FixupLEAs.cpp b/lib/Target/X86/X86FixupLEAs.cpp
index b39c5aba30bf..8305a0454c80 100644
--- a/lib/Target/X86/X86FixupLEAs.cpp
+++ b/lib/Target/X86/X86FixupLEAs.cpp
@@ -44,7 +44,7 @@ class FixupLEAPass : public MachineFunctionPass {
/// \brief Given a machine register, look for the instruction
/// which writes it in the current basic block. If found,
/// try to replace it with an equivalent LEA instruction.
- /// If replacement succeeds, then also process the the newly created
+ /// If replacement succeeds, then also process the newly created
/// instruction.
void seekLEAFixup(MachineOperand &p, MachineBasicBlock::iterator &I,
MachineFunction::iterator MFI);
@@ -91,7 +91,7 @@ private:
const X86InstrInfo *TII; // Machine instruction info.
};
char FixupLEAPass::ID = 0;
-}
+} // namespace
MachineInstr *
FixupLEAPass::postRAConvertToLEA(MachineFunction::iterator &MFI,
diff --git a/lib/Target/X86/X86FloatingPoint.cpp b/lib/Target/X86/X86FloatingPoint.cpp
index 3b0bd03095a9..6f1d8e523732 100644
--- a/lib/Target/X86/X86FloatingPoint.cpp
+++ b/lib/Target/X86/X86FloatingPoint.cpp
@@ -279,7 +279,7 @@ namespace {
void setKillFlags(MachineBasicBlock &MBB) const;
};
char FPS::ID = 0;
-}
+} // namespace
FunctionPass *llvm::createX86FloatingPointStackifierPass() { return new FPS(); }
@@ -544,7 +544,7 @@ namespace {
return V < TE.from;
}
};
-}
+} // namespace
#ifndef NDEBUG
static bool TableIsSorted(const TableEntry *Table, unsigned NumEntries) {
diff --git a/lib/Target/X86/X86FrameLowering.cpp b/lib/Target/X86/X86FrameLowering.cpp
index db58d9c5f301..85c5b6499131 100644
--- a/lib/Target/X86/X86FrameLowering.cpp
+++ b/lib/Target/X86/X86FrameLowering.cpp
@@ -37,6 +37,20 @@ using namespace llvm;
// FIXME: completely move here.
extern cl::opt<bool> ForceStackAlign;
+X86FrameLowering::X86FrameLowering(const X86Subtarget &STI,
+ unsigned StackAlignOverride)
+ : TargetFrameLowering(StackGrowsDown, StackAlignOverride,
+ STI.is64Bit() ? -8 : -4),
+ STI(STI), TII(*STI.getInstrInfo()), TRI(STI.getRegisterInfo()) {
+ // Cache a bunch of frame-related predicates for this subtarget.
+ SlotSize = TRI->getSlotSize();
+ Is64Bit = STI.is64Bit();
+ IsLP64 = STI.isTarget64BitLP64();
+ // standard x86_64 and NaCl use 64-bit frame/stack pointers, x32 - 32-bit.
+ Uses64BitFramePtr = STI.isTarget64BitLP64() || STI.isTargetNaCl64();
+ StackPtr = TRI->getStackRegister();
+}
+
bool X86FrameLowering::hasReservedCallFrame(const MachineFunction &MF) const {
return !MF.getFrameInfo()->hasVarSizedObjects() &&
!MF.getInfo<X86MachineFunctionInfo>()->getHasPushSequences();
@@ -48,11 +62,9 @@ bool X86FrameLowering::hasReservedCallFrame(const MachineFunction &MF) const {
/// Use a more nuanced condition.
bool
X86FrameLowering::canSimplifyCallFramePseudos(const MachineFunction &MF) const {
- const X86RegisterInfo *TRI = static_cast<const X86RegisterInfo *>
- (MF.getSubtarget().getRegisterInfo());
return hasReservedCallFrame(MF) ||
- (hasFP(MF) && !TRI->needsStackRealignment(MF))
- || TRI->hasBasePointer(MF);
+ (hasFP(MF) && !TRI->needsStackRealignment(MF)) ||
+ TRI->hasBasePointer(MF);
}
// needsFrameIndexResolution - Do we need to perform FI resolution for
@@ -74,10 +86,9 @@ X86FrameLowering::needsFrameIndexResolution(const MachineFunction &MF) const {
bool X86FrameLowering::hasFP(const MachineFunction &MF) const {
const MachineFrameInfo *MFI = MF.getFrameInfo();
const MachineModuleInfo &MMI = MF.getMMI();
- const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo();
return (MF.getTarget().Options.DisableFramePointerElim(MF) ||
- RegInfo->needsStackRealignment(MF) ||
+ TRI->needsStackRealignment(MF) ||
MFI->hasVarSizedObjects() ||
MFI->isFrameAddressTaken() || MFI->hasInlineAsmWithSPAdjust() ||
MF.getInfo<X86MachineFunctionInfo>()->getForceFramePointer() ||
@@ -137,7 +148,7 @@ static unsigned getLEArOpcode(unsigned IsLP64) {
/// to this register without worry about clobbering it.
static unsigned findDeadCallerSavedReg(MachineBasicBlock &MBB,
MachineBasicBlock::iterator &MBBI,
- const TargetRegisterInfo &TRI,
+ const TargetRegisterInfo *TRI,
bool Is64Bit) {
const MachineFunction *MF = MBB.getParent();
const Function *F = MF->getFunction();
@@ -176,7 +187,7 @@ static unsigned findDeadCallerSavedReg(MachineBasicBlock &MBB,
unsigned Reg = MO.getReg();
if (!Reg)
continue;
- for (MCRegAliasIterator AI(Reg, &TRI, true); AI.isValid(); ++AI)
+ for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI)
Uses.insert(*AI);
}
@@ -203,23 +214,36 @@ static bool isEAXLiveIn(MachineFunction &MF) {
return false;
}
+/// Check whether or not the terminators of \p MBB needs to read EFLAGS.
+static bool terminatorsNeedFlagsAsInput(const MachineBasicBlock &MBB) {
+ for (const MachineInstr &MI : MBB.terminators()) {
+ bool BreakNext = false;
+ for (const MachineOperand &MO : MI.operands()) {
+ if (!MO.isReg())
+ continue;
+ unsigned Reg = MO.getReg();
+ if (Reg != X86::EFLAGS)
+ continue;
+
+ // This terminator needs an eflag that is not defined
+ // by a previous terminator.
+ if (!MO.isDef())
+ return true;
+ BreakNext = true;
+ }
+ if (BreakNext)
+ break;
+ }
+ return false;
+}
+
/// emitSPUpdate - Emit a series of instructions to increment / decrement the
/// stack pointer by a constant value.
void X86FrameLowering::emitSPUpdate(MachineBasicBlock &MBB,
MachineBasicBlock::iterator &MBBI,
- unsigned StackPtr, int64_t NumBytes,
- bool Is64BitTarget, bool Is64BitStackPtr,
- bool UseLEA, const TargetInstrInfo &TII,
- const TargetRegisterInfo &TRI) {
+ int64_t NumBytes, bool InEpilogue) const {
bool isSub = NumBytes < 0;
uint64_t Offset = isSub ? -NumBytes : NumBytes;
- unsigned Opc;
- if (UseLEA)
- Opc = getLEArOpcode(Is64BitStackPtr);
- else
- Opc = isSub
- ? getSUBriOpcode(Is64BitStackPtr, Offset)
- : getADDriOpcode(Is64BitStackPtr, Offset);
uint64_t Chunk = (1LL << 31) - 1;
DebugLoc DL = MBB.findDebugLoc(MBBI);
@@ -231,17 +255,17 @@ void X86FrameLowering::emitSPUpdate(MachineBasicBlock &MBB,
unsigned Reg = 0;
if (isSub && !isEAXLiveIn(*MBB.getParent()))
- Reg = (unsigned)(Is64BitTarget ? X86::RAX : X86::EAX);
+ Reg = (unsigned)(Is64Bit ? X86::RAX : X86::EAX);
else
- Reg = findDeadCallerSavedReg(MBB, MBBI, TRI, Is64BitTarget);
+ Reg = findDeadCallerSavedReg(MBB, MBBI, TRI, Is64Bit);
if (Reg) {
- Opc = Is64BitTarget ? X86::MOV64ri : X86::MOV32ri;
+ unsigned Opc = Is64Bit ? X86::MOV64ri : X86::MOV32ri;
BuildMI(MBB, MBBI, DL, TII.get(Opc), Reg)
.addImm(Offset);
Opc = isSub
- ? getSUBrrOpcode(Is64BitTarget)
- : getADDrrOpcode(Is64BitTarget);
+ ? getSUBrrOpcode(Is64Bit)
+ : getADDrrOpcode(Is64Bit);
MachineInstr *MI = BuildMI(MBB, MBBI, DL, TII.get(Opc), StackPtr)
.addReg(StackPtr)
.addReg(Reg);
@@ -252,15 +276,15 @@ void X86FrameLowering::emitSPUpdate(MachineBasicBlock &MBB,
}
uint64_t ThisVal = std::min(Offset, Chunk);
- if (ThisVal == (Is64BitTarget ? 8 : 4)) {
+ if (ThisVal == (Is64Bit ? 8 : 4)) {
// Use push / pop instead.
unsigned Reg = isSub
- ? (unsigned)(Is64BitTarget ? X86::RAX : X86::EAX)
- : findDeadCallerSavedReg(MBB, MBBI, TRI, Is64BitTarget);
+ ? (unsigned)(Is64Bit ? X86::RAX : X86::EAX)
+ : findDeadCallerSavedReg(MBB, MBBI, TRI, Is64Bit);
if (Reg) {
- Opc = isSub
- ? (Is64BitTarget ? X86::PUSH64r : X86::PUSH32r)
- : (Is64BitTarget ? X86::POP64r : X86::POP32r);
+ unsigned Opc = isSub
+ ? (Is64Bit ? X86::PUSH64r : X86::PUSH32r)
+ : (Is64Bit ? X86::POP64r : X86::POP32r);
MachineInstr *MI = BuildMI(MBB, MBBI, DL, TII.get(Opc))
.addReg(Reg, getDefRegState(!isSub) | getUndefRegState(isSub));
if (isSub)
@@ -270,25 +294,59 @@ void X86FrameLowering::emitSPUpdate(MachineBasicBlock &MBB,
}
}
- MachineInstr *MI = nullptr;
-
- if (UseLEA) {
- MI = addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(Opc), StackPtr),
- StackPtr, false, isSub ? -ThisVal : ThisVal);
- } else {
- MI = BuildMI(MBB, MBBI, DL, TII.get(Opc), StackPtr)
- .addReg(StackPtr)
- .addImm(ThisVal);
- MI->getOperand(3).setIsDead(); // The EFLAGS implicit def is dead.
- }
-
+ MachineInstrBuilder MI = BuildStackAdjustment(
+ MBB, MBBI, DL, isSub ? -ThisVal : ThisVal, InEpilogue);
if (isSub)
- MI->setFlag(MachineInstr::FrameSetup);
+ MI.setMIFlag(MachineInstr::FrameSetup);
Offset -= ThisVal;
}
}
+MachineInstrBuilder X86FrameLowering::BuildStackAdjustment(
+ MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, DebugLoc DL,
+ int64_t Offset, bool InEpilogue) const {
+ assert(Offset != 0 && "zero offset stack adjustment requested");
+
+ // On Atom, using LEA to adjust SP is preferred, but using it in the epilogue
+ // is tricky.
+ bool UseLEA;
+ if (!InEpilogue) {
+ UseLEA = STI.useLeaForSP();
+ } else {
+ // If we can use LEA for SP but we shouldn't, check that none
+ // of the terminators uses the eflags. Otherwise we will insert
+ // a ADD that will redefine the eflags and break the condition.
+ // Alternatively, we could move the ADD, but this may not be possible
+ // and is an optimization anyway.
+ UseLEA = canUseLEAForSPInEpilogue(*MBB.getParent());
+ if (UseLEA && !STI.useLeaForSP())
+ UseLEA = terminatorsNeedFlagsAsInput(MBB);
+ // If that assert breaks, that means we do not do the right thing
+ // in canUseAsEpilogue.
+ assert((UseLEA || !terminatorsNeedFlagsAsInput(MBB)) &&
+ "We shouldn't have allowed this insertion point");
+ }
+
+ MachineInstrBuilder MI;
+ if (UseLEA) {
+ MI = addRegOffset(BuildMI(MBB, MBBI, DL,
+ TII.get(getLEArOpcode(Uses64BitFramePtr)),
+ StackPtr),
+ StackPtr, false, Offset);
+ } else {
+ bool IsSub = Offset < 0;
+ uint64_t AbsOffset = IsSub ? -Offset : Offset;
+ unsigned Opc = IsSub ? getSUBriOpcode(Uses64BitFramePtr, AbsOffset)
+ : getADDriOpcode(Uses64BitFramePtr, AbsOffset);
+ MI = BuildMI(MBB, MBBI, DL, TII.get(Opc), StackPtr)
+ .addReg(StackPtr)
+ .addImm(AbsOffset);
+ MI->getOperand(3).setIsDead(); // The EFLAGS implicit def is dead.
+ }
+ return MI;
+}
+
/// mergeSPUpdatesUp - Merge two stack-manipulating instructions upper iterator.
static
void mergeSPUpdatesUp(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI,
@@ -315,8 +373,7 @@ void mergeSPUpdatesUp(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI,
int X86FrameLowering::mergeSPUpdates(MachineBasicBlock &MBB,
MachineBasicBlock::iterator &MBBI,
- unsigned StackPtr,
- bool doMergeWithPrevious) {
+ bool doMergeWithPrevious) const {
if ((doMergeWithPrevious && MBBI == MBB.begin()) ||
(!doMergeWithPrevious && MBBI == MBB.end()))
return 0;
@@ -345,6 +402,15 @@ int X86FrameLowering::mergeSPUpdates(MachineBasicBlock &MBB,
return Offset;
}
+void X86FrameLowering::BuildCFI(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI, DebugLoc DL,
+ MCCFIInstruction CFIInst) const {
+ MachineFunction &MF = *MBB.getParent();
+ unsigned CFIIndex = MF.getMMI().addFrameInst(CFIInst);
+ BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
+ .addCFIIndex(CFIIndex);
+}
+
void
X86FrameLowering::emitCalleeSavedFrameMoves(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI,
@@ -353,7 +419,6 @@ X86FrameLowering::emitCalleeSavedFrameMoves(MachineBasicBlock &MBB,
MachineFrameInfo *MFI = MF.getFrameInfo();
MachineModuleInfo &MMI = MF.getMMI();
const MCRegisterInfo *MRI = MMI.getContext().getRegisterInfo();
- const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
// Add callee saved registers to move list.
const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo();
@@ -366,11 +431,8 @@ X86FrameLowering::emitCalleeSavedFrameMoves(MachineBasicBlock &MBB,
unsigned Reg = I->getReg();
unsigned DwarfReg = MRI->getDwarfRegNum(Reg, true);
- unsigned CFIIndex =
- MMI.addFrameInst(MCCFIInstruction::createOffset(nullptr, DwarfReg,
- Offset));
- BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
- .addCFIIndex(CFIIndex);
+ BuildCFI(MBB, MBBI, DL,
+ MCCFIInstruction::createOffset(nullptr, DwarfReg, Offset));
}
}
@@ -394,10 +456,7 @@ static bool usesTheStack(const MachineFunction &MF) {
void X86FrameLowering::emitStackProbeCall(MachineFunction &MF,
MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI,
- DebugLoc DL) {
- const X86Subtarget &STI = MF.getSubtarget<X86Subtarget>();
- const TargetInstrInfo &TII = *STI.getInstrInfo();
- bool Is64Bit = STI.is64Bit();
+ DebugLoc DL) const {
bool IsLargeCodeModel = MF.getTarget().getCodeModel() == CodeModel::Large;
unsigned CallOp;
@@ -463,13 +522,10 @@ static unsigned calculateSetFPREG(uint64_t SPAdjust) {
// info, we need to know the ABI stack alignment as well in case we
// have a call out. Otherwise just make sure we have some alignment - we'll
// go with the minimum SlotSize.
-static uint64_t calculateMaxStackAlign(const MachineFunction &MF) {
+uint64_t X86FrameLowering::calculateMaxStackAlign(const MachineFunction &MF) const {
const MachineFrameInfo *MFI = MF.getFrameInfo();
uint64_t MaxAlign = MFI->getMaxAlignment(); // Desired stack alignment.
- const X86Subtarget &STI = MF.getSubtarget<X86Subtarget>();
- const X86RegisterInfo *RegInfo = STI.getRegisterInfo();
- unsigned SlotSize = RegInfo->getSlotSize();
- unsigned StackAlign = STI.getFrameLowering()->getStackAlignment();
+ unsigned StackAlign = getStackAlignment();
if (ForceStackAlign) {
if (MFI->hasCalls())
MaxAlign = (StackAlign > MaxAlign) ? StackAlign : MaxAlign;
@@ -479,6 +535,22 @@ static uint64_t calculateMaxStackAlign(const MachineFunction &MF) {
return MaxAlign;
}
+void X86FrameLowering::BuildStackAlignAND(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ DebugLoc DL,
+ uint64_t MaxAlign) const {
+ uint64_t Val = -MaxAlign;
+ MachineInstr *MI =
+ BuildMI(MBB, MBBI, DL, TII.get(getANDriOpcode(Uses64BitFramePtr, Val)),
+ StackPtr)
+ .addReg(StackPtr)
+ .addImm(Val)
+ .setMIFlag(MachineInstr::FrameSetup);
+
+ // The EFLAGS implicit def is dead.
+ MI->getOperand(3).setIsDead();
+}
+
/// emitPrologue - Push callee-saved registers onto the stack, which
/// automatically adjust the stack pointer. Adjust the stack pointer to allocate
/// space for local variables. Also emit labels used by the exception handler to
@@ -565,40 +637,32 @@ static uint64_t calculateMaxStackAlign(const MachineFunction &MF) {
void X86FrameLowering::emitPrologue(MachineFunction &MF,
MachineBasicBlock &MBB) const {
+ assert(&STI == &MF.getSubtarget<X86Subtarget>() &&
+ "MF used frame lowering for wrong subtarget");
MachineBasicBlock::iterator MBBI = MBB.begin();
MachineFrameInfo *MFI = MF.getFrameInfo();
const Function *Fn = MF.getFunction();
- const X86Subtarget &STI = MF.getSubtarget<X86Subtarget>();
- const X86RegisterInfo *RegInfo = STI.getRegisterInfo();
- const TargetInstrInfo &TII = *STI.getInstrInfo();
MachineModuleInfo &MMI = MF.getMMI();
X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
uint64_t MaxAlign = calculateMaxStackAlign(MF); // Desired stack alignment.
uint64_t StackSize = MFI->getStackSize(); // Number of bytes to allocate.
bool HasFP = hasFP(MF);
- bool Is64Bit = STI.is64Bit();
- // standard x86_64 and NaCl use 64-bit frame/stack pointers, x32 - 32-bit.
- const bool Uses64BitFramePtr = STI.isTarget64BitLP64() || STI.isTargetNaCl64();
- bool IsWin64 = STI.isCallingConvWin64(Fn->getCallingConv());
- // Not necessarily synonymous with IsWin64.
- bool IsWinEH = MF.getTarget().getMCAsmInfo()->usesWindowsCFI();
- bool NeedsWinEH = IsWinEH && Fn->needsUnwindTableEntry();
+ bool IsWin64CC = STI.isCallingConvWin64(Fn->getCallingConv());
+ bool IsWin64Prologue = MF.getTarget().getMCAsmInfo()->usesWindowsCFI();
+ bool NeedsWinCFI = IsWin64Prologue && Fn->needsUnwindTableEntry();
bool NeedsDwarfCFI =
- !IsWinEH && (MMI.hasDebugInfo() || Fn->needsUnwindTableEntry());
- bool UseLEA = STI.useLeaForSP();
- unsigned SlotSize = RegInfo->getSlotSize();
- unsigned FramePtr = RegInfo->getFrameRegister(MF);
+ !IsWin64Prologue && (MMI.hasDebugInfo() || Fn->needsUnwindTableEntry());
+ unsigned FramePtr = TRI->getFrameRegister(MF);
const unsigned MachineFramePtr =
STI.isTarget64BitILP32()
? getX86SubSuperRegister(FramePtr, MVT::i64, false)
: FramePtr;
- unsigned StackPtr = RegInfo->getStackRegister();
- unsigned BasePtr = RegInfo->getBaseRegister();
+ unsigned BasePtr = TRI->getBaseRegister();
DebugLoc DL;
// Add RETADDR move area to callee saved frame size.
int TailCallReturnAddrDelta = X86FI->getTCReturnAddrDelta();
- if (TailCallReturnAddrDelta && IsWinEH)
+ if (TailCallReturnAddrDelta && IsWin64Prologue)
report_fatal_error("Can't handle guaranteed tail call under win64 yet");
if (TailCallReturnAddrDelta < 0)
@@ -621,10 +685,10 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF,
// stack pointer (we fit in the Red Zone). We also check that we don't
// push and pop from the stack.
if (Is64Bit && !Fn->hasFnAttribute(Attribute::NoRedZone) &&
- !RegInfo->needsStackRealignment(MF) &&
+ !TRI->needsStackRealignment(MF) &&
!MFI->hasVarSizedObjects() && // No dynamic alloca.
!MFI->adjustsStack() && // No calls.
- !IsWin64 && // Win64 has no Red Zone
+ !IsWin64CC && // Win64 has no Red Zone
!usesTheStack(MF) && // Don't push and pop.
!MF.shouldSplitStack()) { // Regular stack
uint64_t MinSize = X86FI->getCalleeSavedFrameSize();
@@ -637,14 +701,9 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF,
// applies to tail call optimized functions where the callee argument stack
// size is bigger than the callers.
if (TailCallReturnAddrDelta < 0) {
- MachineInstr *MI =
- BuildMI(MBB, MBBI, DL,
- TII.get(getSUBriOpcode(Uses64BitFramePtr, -TailCallReturnAddrDelta)),
- StackPtr)
- .addReg(StackPtr)
- .addImm(-TailCallReturnAddrDelta)
+ BuildStackAdjustment(MBB, MBBI, DL, TailCallReturnAddrDelta,
+ /*InEpilogue=*/false)
.setMIFlag(MachineInstr::FrameSetup);
- MI->getOperand(3).setIsDead(); // The EFLAGS implicit def is dead.
}
// Mapping for machine moves:
@@ -674,7 +733,7 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF,
NumBytes = FrameSize - X86FI->getCalleeSavedFrameSize();
// Callee-saved registers are pushed on stack before the stack is realigned.
- if (RegInfo->needsStackRealignment(MF) && !IsWinEH)
+ if (TRI->needsStackRealignment(MF) && !IsWin64Prologue)
NumBytes = RoundUpToAlignment(NumBytes, MaxAlign);
// Get the offset of the stack slot for the EBP register, which is
@@ -691,27 +750,22 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF,
// Mark the place where EBP/RBP was saved.
// Define the current CFA rule to use the provided offset.
assert(StackSize);
- unsigned CFIIndex = MMI.addFrameInst(
- MCCFIInstruction::createDefCfaOffset(nullptr, 2 * stackGrowth));
- BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
- .addCFIIndex(CFIIndex);
+ BuildCFI(MBB, MBBI, DL,
+ MCCFIInstruction::createDefCfaOffset(nullptr, 2 * stackGrowth));
// Change the rule for the FramePtr to be an "offset" rule.
- unsigned DwarfFramePtr = RegInfo->getDwarfRegNum(MachineFramePtr, true);
- CFIIndex = MMI.addFrameInst(
- MCCFIInstruction::createOffset(nullptr,
- DwarfFramePtr, 2 * stackGrowth));
- BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
- .addCFIIndex(CFIIndex);
+ unsigned DwarfFramePtr = TRI->getDwarfRegNum(MachineFramePtr, true);
+ BuildCFI(MBB, MBBI, DL, MCCFIInstruction::createOffset(
+ nullptr, DwarfFramePtr, 2 * stackGrowth));
}
- if (NeedsWinEH) {
+ if (NeedsWinCFI) {
BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_PushReg))
.addImm(FramePtr)
.setMIFlag(MachineInstr::FrameSetup);
}
- if (!IsWinEH) {
+ if (!IsWin64Prologue) {
// Update EBP with the new base value.
BuildMI(MBB, MBBI, DL,
TII.get(Uses64BitFramePtr ? X86::MOV64rr : X86::MOV32rr),
@@ -723,11 +777,9 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF,
if (NeedsDwarfCFI) {
// Mark effective beginning of when frame pointer becomes valid.
// Define the current CFA to use the EBP/RBP register.
- unsigned DwarfFramePtr = RegInfo->getDwarfRegNum(MachineFramePtr, true);
- unsigned CFIIndex = MMI.addFrameInst(
- MCCFIInstruction::createDefCfaRegister(nullptr, DwarfFramePtr));
- BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
- .addCFIIndex(CFIIndex);
+ unsigned DwarfFramePtr = TRI->getDwarfRegNum(MachineFramePtr, true);
+ BuildCFI(MBB, MBBI, DL,
+ MCCFIInstruction::createDefCfaRegister(nullptr, DwarfFramePtr));
}
// Mark the FramePtr as live-in in every block.
@@ -752,14 +804,12 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF,
// Mark callee-saved push instruction.
// Define the current CFA rule to use the provided offset.
assert(StackSize);
- unsigned CFIIndex = MMI.addFrameInst(
- MCCFIInstruction::createDefCfaOffset(nullptr, StackOffset));
- BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
- .addCFIIndex(CFIIndex);
+ BuildCFI(MBB, MBBI, DL,
+ MCCFIInstruction::createDefCfaOffset(nullptr, StackOffset));
StackOffset += stackGrowth;
}
- if (NeedsWinEH) {
+ if (NeedsWinCFI) {
BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_PushReg)).addImm(Reg).setMIFlag(
MachineInstr::FrameSetup);
}
@@ -768,24 +818,15 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF,
// Realign stack after we pushed callee-saved registers (so that we'll be
// able to calculate their offsets from the frame pointer).
// Don't do this for Win64, it needs to realign the stack after the prologue.
- if (!IsWinEH && RegInfo->needsStackRealignment(MF)) {
+ if (!IsWin64Prologue && TRI->needsStackRealignment(MF)) {
assert(HasFP && "There should be a frame pointer if stack is realigned.");
- uint64_t Val = -MaxAlign;
- MachineInstr *MI =
- BuildMI(MBB, MBBI, DL, TII.get(getANDriOpcode(Uses64BitFramePtr, Val)),
- StackPtr)
- .addReg(StackPtr)
- .addImm(Val)
- .setMIFlag(MachineInstr::FrameSetup);
-
- // The EFLAGS implicit def is dead.
- MI->getOperand(3).setIsDead();
+ BuildStackAlignAND(MBB, MBBI, DL, MaxAlign);
}
// If there is an SUB32ri of ESP immediately before this instruction, merge
// the two. This can be the case when tail call elimination is enabled and
// the callee has more arguments then the caller.
- NumBytes -= mergeSPUpdates(MBB, MBBI, StackPtr, true);
+ NumBytes -= mergeSPUpdates(MBB, MBBI, true);
// Adjust stack pointer: ESP -= numbytes.
@@ -798,7 +839,7 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF,
// increments is necessary to ensure that the guard pages used by the OS
// virtual memory manager are allocated in correct sequence.
uint64_t AlignedNumBytes = NumBytes;
- if (IsWinEH && RegInfo->needsStackRealignment(MF))
+ if (IsWin64Prologue && TRI->needsStackRealignment(MF))
AlignedNumBytes = RoundUpToAlignment(AlignedNumBytes, MaxAlign);
if (AlignedNumBytes >= StackProbeSize && UseStackProbe) {
// Check whether EAX is livein for this function.
@@ -859,17 +900,16 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF,
MBB.insert(MBBI, MI);
}
} else if (NumBytes) {
- emitSPUpdate(MBB, MBBI, StackPtr, -(int64_t)NumBytes, Is64Bit, Uses64BitFramePtr,
- UseLEA, TII, *RegInfo);
+ emitSPUpdate(MBB, MBBI, -(int64_t)NumBytes, /*InEpilogue=*/false);
}
- if (NeedsWinEH && NumBytes)
+ if (NeedsWinCFI && NumBytes)
BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_StackAlloc))
.addImm(NumBytes)
.setMIFlag(MachineInstr::FrameSetup);
int SEHFrameOffset = 0;
- if (IsWinEH && HasFP) {
+ if (IsWin64Prologue && HasFP) {
SEHFrameOffset = calculateSetFPREG(NumBytes);
if (SEHFrameOffset)
addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(X86::LEA64r), FramePtr),
@@ -877,7 +917,7 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF,
else
BuildMI(MBB, MBBI, DL, TII.get(X86::MOV64rr), FramePtr).addReg(StackPtr);
- if (NeedsWinEH)
+ if (NeedsWinCFI)
BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_SetFrame))
.addImm(FramePtr)
.addImm(SEHFrameOffset)
@@ -888,7 +928,7 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF,
const MachineInstr *FrameInstr = &*MBBI;
++MBBI;
- if (NeedsWinEH) {
+ if (NeedsWinCFI) {
int FI;
if (unsigned Reg = TII.isStoreToStackSlot(FrameInstr, FI)) {
if (X86::FR64RegClass.contains(Reg)) {
@@ -904,32 +944,23 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF,
}
}
- if (NeedsWinEH)
+ if (NeedsWinCFI)
BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_EndPrologue))
.setMIFlag(MachineInstr::FrameSetup);
// Realign stack after we spilled callee-saved registers (so that we'll be
// able to calculate their offsets from the frame pointer).
// Win64 requires aligning the stack after the prologue.
- if (IsWinEH && RegInfo->needsStackRealignment(MF)) {
+ if (IsWin64Prologue && TRI->needsStackRealignment(MF)) {
assert(HasFP && "There should be a frame pointer if stack is realigned.");
- uint64_t Val = -MaxAlign;
- MachineInstr *MI =
- BuildMI(MBB, MBBI, DL, TII.get(getANDriOpcode(Uses64BitFramePtr, Val)),
- StackPtr)
- .addReg(StackPtr)
- .addImm(Val)
- .setMIFlag(MachineInstr::FrameSetup);
-
- // The EFLAGS implicit def is dead.
- MI->getOperand(3).setIsDead();
+ BuildStackAlignAND(MBB, MBBI, DL, MaxAlign);
}
// If we need a base pointer, set it up here. It's whatever the value
// of the stack pointer is at this point. Any variable size objects
// will be allocated after this, so we can still use the base pointer
// to reference locals.
- if (RegInfo->hasBasePointer(MF)) {
+ if (TRI->hasBasePointer(MF)) {
// Update the base pointer with the current stack pointer.
unsigned Opc = Uses64BitFramePtr ? X86::MOV64rr : X86::MOV32rr;
BuildMI(MBB, MBBI, DL, TII.get(Opc), BasePtr)
@@ -950,12 +981,8 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF,
if (!HasFP && NumBytes) {
// Define the current CFA rule to use the provided offset.
assert(StackSize);
- unsigned CFIIndex = MMI.addFrameInst(
- MCCFIInstruction::createDefCfaOffset(nullptr,
- -StackSize + stackGrowth));
-
- BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
- .addCFIIndex(CFIIndex);
+ BuildCFI(MBB, MBBI, DL, MCCFIInstruction::createDefCfaOffset(
+ nullptr, -StackSize + stackGrowth));
}
// Emit DWARF info specifying the offsets of the callee-saved registers.
@@ -975,65 +1002,24 @@ bool X86FrameLowering::canUseLEAForSPInEpilogue(
return !MF.getTarget().getMCAsmInfo()->usesWindowsCFI() || hasFP(MF);
}
-/// Check whether or not the terminators of \p MBB needs to read EFLAGS.
-static bool terminatorsNeedFlagsAsInput(const MachineBasicBlock &MBB) {
- for (const MachineInstr &MI : MBB.terminators()) {
- bool BreakNext = false;
- for (const MachineOperand &MO : MI.operands()) {
- if (!MO.isReg())
- continue;
- unsigned Reg = MO.getReg();
- if (Reg != X86::EFLAGS)
- continue;
-
- // This terminator needs an eflag that is not defined
- // by a previous terminator.
- if (!MO.isDef())
- return true;
- BreakNext = true;
- }
- if (BreakNext)
- break;
- }
- return false;
-}
-
void X86FrameLowering::emitEpilogue(MachineFunction &MF,
MachineBasicBlock &MBB) const {
const MachineFrameInfo *MFI = MF.getFrameInfo();
X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
- const X86Subtarget &STI = MF.getSubtarget<X86Subtarget>();
- const X86RegisterInfo *RegInfo = STI.getRegisterInfo();
- const TargetInstrInfo &TII = *STI.getInstrInfo();
MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator();
DebugLoc DL;
if (MBBI != MBB.end())
DL = MBBI->getDebugLoc();
- bool Is64Bit = STI.is64Bit();
// standard x86_64 and NaCl use 64-bit frame/stack pointers, x32 - 32-bit.
- const bool Uses64BitFramePtr = STI.isTarget64BitLP64() || STI.isTargetNaCl64();
const bool Is64BitILP32 = STI.isTarget64BitILP32();
- unsigned SlotSize = RegInfo->getSlotSize();
- unsigned FramePtr = RegInfo->getFrameRegister(MF);
+ unsigned FramePtr = TRI->getFrameRegister(MF);
unsigned MachineFramePtr =
Is64BitILP32 ? getX86SubSuperRegister(FramePtr, MVT::i64, false)
: FramePtr;
- unsigned StackPtr = RegInfo->getStackRegister();
-
- bool IsWinEH = MF.getTarget().getMCAsmInfo()->usesWindowsCFI();
- bool NeedsWinEH = IsWinEH && MF.getFunction()->needsUnwindTableEntry();
- bool UseLEAForSP = canUseLEAForSPInEpilogue(MF);
- // If we can use LEA for SP but we shouldn't, check that none
- // of the terminators uses the eflags. Otherwise we will insert
- // a ADD that will redefine the eflags and break the condition.
- // Alternatively, we could move the ADD, but this may not be possible
- // and is an optimization anyway.
- if (UseLEAForSP && !MF.getSubtarget<X86Subtarget>().useLeaForSP())
- UseLEAForSP = terminatorsNeedFlagsAsInput(MBB);
- // If that assert breaks, that means we do not do the right thing
- // in canUseAsEpilogue.
- assert((UseLEAForSP || !terminatorsNeedFlagsAsInput(MBB)) &&
- "We shouldn't have allowed this insertion point");
+
+ bool IsWin64Prologue = MF.getTarget().getMCAsmInfo()->usesWindowsCFI();
+ bool NeedsWinCFI =
+ IsWin64Prologue && MF.getFunction()->needsUnwindTableEntry();
// Get the number of bytes to allocate from the FrameInfo.
uint64_t StackSize = MFI->getStackSize();
@@ -1048,7 +1034,7 @@ void X86FrameLowering::emitEpilogue(MachineFunction &MF,
// Callee-saved registers were pushed on stack before the stack was
// realigned.
- if (RegInfo->needsStackRealignment(MF) && !IsWinEH)
+ if (TRI->needsStackRealignment(MF) && !IsWin64Prologue)
NumBytes = RoundUpToAlignment(FrameSize, MaxAlign);
// Pop EBP.
@@ -1083,11 +1069,12 @@ void X86FrameLowering::emitEpilogue(MachineFunction &MF,
// If dynamic alloca is used, then reset esp to point to the last callee-saved
// slot before popping them off! Same applies for the case, when stack was
// realigned.
- if (RegInfo->needsStackRealignment(MF) || MFI->hasVarSizedObjects()) {
- if (RegInfo->needsStackRealignment(MF))
+ if (TRI->needsStackRealignment(MF) || MFI->hasVarSizedObjects()) {
+ if (TRI->needsStackRealignment(MF))
MBBI = FirstCSPop;
unsigned SEHFrameOffset = calculateSetFPREG(SEHStackAllocAmt);
- uint64_t LEAAmount = IsWinEH ? SEHStackAllocAmt - SEHFrameOffset : -CSSize;
+ uint64_t LEAAmount =
+ IsWin64Prologue ? SEHStackAllocAmt - SEHFrameOffset : -CSSize;
// There are only two legal forms of epilogue:
// - add SEHAllocationSize, %rsp
@@ -1109,8 +1096,7 @@ void X86FrameLowering::emitEpilogue(MachineFunction &MF,
}
} else if (NumBytes) {
// Adjust stack pointer back: ESP += numbytes.
- emitSPUpdate(MBB, MBBI, StackPtr, NumBytes, Is64Bit, Uses64BitFramePtr,
- UseLEAForSP, TII, *RegInfo);
+ emitSPUpdate(MBB, MBBI, NumBytes, /*InEpilogue=*/true);
--MBBI;
}
@@ -1120,7 +1106,7 @@ void X86FrameLowering::emitEpilogue(MachineFunction &MF,
// into the epilogue. To cope with that, we insert an epilogue marker here,
// then replace it with a 'nop' if it ends up immediately after a CALL in the
// final emitted code.
- if (NeedsWinEH)
+ if (NeedsWinCFI)
BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_Epilogue));
// Add the return addr area delta back since we are not tail calling.
@@ -1130,16 +1116,13 @@ void X86FrameLowering::emitEpilogue(MachineFunction &MF,
MBBI = MBB.getFirstTerminator();
// Check for possible merge with preceding ADD instruction.
- Offset += mergeSPUpdates(MBB, MBBI, StackPtr, true);
- emitSPUpdate(MBB, MBBI, StackPtr, Offset, Is64Bit, Uses64BitFramePtr,
- UseLEAForSP, TII, *RegInfo);
+ Offset += mergeSPUpdates(MBB, MBBI, true);
+ emitSPUpdate(MBB, MBBI, Offset, /*InEpilogue=*/true);
}
}
int X86FrameLowering::getFrameIndexOffset(const MachineFunction &MF,
int FI) const {
- const X86RegisterInfo *RegInfo =
- MF.getSubtarget<X86Subtarget>().getRegisterInfo();
const MachineFrameInfo *MFI = MF.getFrameInfo();
// Offset will hold the offset from the stack pointer at function entry to the
// object.
@@ -1149,12 +1132,11 @@ int X86FrameLowering::getFrameIndexOffset(const MachineFunction &MF,
const X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
unsigned CSSize = X86FI->getCalleeSavedFrameSize();
uint64_t StackSize = MFI->getStackSize();
- unsigned SlotSize = RegInfo->getSlotSize();
bool HasFP = hasFP(MF);
- bool IsWinEH = MF.getTarget().getMCAsmInfo()->usesWindowsCFI();
+ bool IsWin64Prologue = MF.getTarget().getMCAsmInfo()->usesWindowsCFI();
int64_t FPDelta = 0;
- if (IsWinEH) {
+ if (IsWin64Prologue) {
assert(!MFI->hasCalls() || (StackSize % 16) == 8);
// Calculate required stack adjustment.
@@ -1178,7 +1160,7 @@ int X86FrameLowering::getFrameIndexOffset(const MachineFunction &MF,
}
- if (RegInfo->hasBasePointer(MF)) {
+ if (TRI->hasBasePointer(MF)) {
assert(HasFP && "VLAs and dynamic stack realign, but no FP?!");
if (FI < 0) {
// Skip the saved EBP.
@@ -1187,7 +1169,7 @@ int X86FrameLowering::getFrameIndexOffset(const MachineFunction &MF,
assert((-(Offset + StackSize)) % MFI->getObjectAlignment(FI) == 0);
return Offset + StackSize;
}
- } else if (RegInfo->needsStackRealignment(MF)) {
+ } else if (TRI->needsStackRealignment(MF)) {
if (FI < 0) {
// Skip the saved EBP.
return Offset + SlotSize + FPDelta;
@@ -1214,17 +1196,15 @@ int X86FrameLowering::getFrameIndexOffset(const MachineFunction &MF,
int X86FrameLowering::getFrameIndexReference(const MachineFunction &MF, int FI,
unsigned &FrameReg) const {
- const X86RegisterInfo *RegInfo =
- MF.getSubtarget<X86Subtarget>().getRegisterInfo();
// We can't calculate offset from frame pointer if the stack is realigned,
// so enforce usage of stack/base pointer. The base pointer is used when we
// have dynamic allocas in addition to dynamic realignment.
- if (RegInfo->hasBasePointer(MF))
- FrameReg = RegInfo->getBaseRegister();
- else if (RegInfo->needsStackRealignment(MF))
- FrameReg = RegInfo->getStackRegister();
+ if (TRI->hasBasePointer(MF))
+ FrameReg = TRI->getBaseRegister();
+ else if (TRI->needsStackRealignment(MF))
+ FrameReg = TRI->getStackRegister();
else
- FrameReg = RegInfo->getFrameRegister(MF);
+ FrameReg = TRI->getFrameRegister(MF);
return getFrameIndexOffset(MF, FI);
}
@@ -1235,8 +1215,6 @@ int X86FrameLowering::getFrameIndexOffsetFromSP(const MachineFunction &MF, int F
const uint64_t StackSize = MFI->getStackSize();
{
#ifndef NDEBUG
- const X86RegisterInfo *RegInfo =
- MF.getSubtarget<X86Subtarget>().getRegisterInfo();
// Note: LLVM arranges the stack as:
// Args > Saved RetPC (<--FP) > CSRs > dynamic alignment (<--BP)
// > "Stack Slots" (<--SP)
@@ -1248,7 +1226,7 @@ int X86FrameLowering::getFrameIndexOffsetFromSP(const MachineFunction &MF, int F
// frame). As a result, THE RESULT OF THIS CALL IS MEANINGLESS FOR CSRs
// AND FixedObjects IFF needsStackRealignment or hasVarSizedObject.
- assert(!RegInfo->hasBasePointer(MF) && "we don't handle this case");
+ assert(!TRI->hasBasePointer(MF) && "we don't handle this case");
// We don't handle tail calls, and shouldn't be seeing them
// either.
@@ -1293,11 +1271,9 @@ int X86FrameLowering::getFrameIndexOffsetFromSP(const MachineFunction &MF, int F
int X86FrameLowering::getFrameIndexReferenceFromSP(const MachineFunction &MF,
int FI,
unsigned &FrameReg) const {
- const X86RegisterInfo *RegInfo =
- MF.getSubtarget<X86Subtarget>().getRegisterInfo();
- assert(!RegInfo->hasBasePointer(MF) && "we don't handle this case");
+ assert(!TRI->hasBasePointer(MF) && "we don't handle this case");
- FrameReg = RegInfo->getStackRegister();
+ FrameReg = TRI->getStackRegister();
return getFrameIndexOffsetFromSP(MF, FI);
}
@@ -1305,9 +1281,6 @@ bool X86FrameLowering::assignCalleeSavedSpillSlots(
MachineFunction &MF, const TargetRegisterInfo *TRI,
std::vector<CalleeSavedInfo> &CSI) const {
MachineFrameInfo *MFI = MF.getFrameInfo();
- const X86RegisterInfo *RegInfo =
- MF.getSubtarget<X86Subtarget>().getRegisterInfo();
- unsigned SlotSize = RegInfo->getSlotSize();
X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
unsigned CalleeSavedFrameSize = 0;
@@ -1321,7 +1294,7 @@ bool X86FrameLowering::assignCalleeSavedSpillSlots(
// Since emitPrologue and emitEpilogue will handle spilling and restoring of
// the frame register, we can delete it from CSI list and not have to worry
// about avoiding it later.
- unsigned FPReg = RegInfo->getFrameRegister(MF);
+ unsigned FPReg = TRI->getFrameRegister(MF);
for (unsigned i = 0; i < CSI.size(); ++i) {
if (TRI->regsOverlap(CSI[i].getReg(),FPReg)) {
CSI.erase(CSI.begin() + i);
@@ -1352,7 +1325,7 @@ bool X86FrameLowering::assignCalleeSavedSpillSlots(
if (X86::GR64RegClass.contains(Reg) || X86::GR32RegClass.contains(Reg))
continue;
- const TargetRegisterClass *RC = RegInfo->getMinimalPhysRegClass(Reg);
+ const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
// ensure alignment
SpillSlotOffset -= std::abs(SpillSlotOffset) % RC->getAlignment();
// spill into slot
@@ -1372,10 +1345,6 @@ bool X86FrameLowering::spillCalleeSavedRegisters(
const TargetRegisterInfo *TRI) const {
DebugLoc DL = MBB.findDebugLoc(MI);
- MachineFunction &MF = *MBB.getParent();
- const X86Subtarget &STI = MF.getSubtarget<X86Subtarget>();
- const TargetInstrInfo &TII = *STI.getInstrInfo();
-
// Push GPRs. It increases frame size.
unsigned Opc = STI.is64Bit() ? X86::PUSH64r : X86::PUSH32r;
for (unsigned i = CSI.size(); i != 0; --i) {
@@ -1419,10 +1388,6 @@ bool X86FrameLowering::restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
DebugLoc DL = MBB.findDebugLoc(MI);
- MachineFunction &MF = *MBB.getParent();
- const X86Subtarget &STI = MF.getSubtarget<X86Subtarget>();
- const TargetInstrInfo &TII = *STI.getInstrInfo();
-
// Reload XMMs from stack frame.
for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
unsigned Reg = CSI[i].getReg();
@@ -1451,9 +1416,6 @@ void
X86FrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
RegScavenger *RS) const {
MachineFrameInfo *MFI = MF.getFrameInfo();
- const X86RegisterInfo *RegInfo =
- MF.getSubtarget<X86Subtarget>().getRegisterInfo();
- unsigned SlotSize = RegInfo->getSlotSize();
X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
int64_t TailCallReturnAddrDelta = X86FI->getTCReturnAddrDelta();
@@ -1473,8 +1435,8 @@ X86FrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
}
// Spill the BasePtr if it's used.
- if (RegInfo->hasBasePointer(MF))
- MF.getRegInfo().setPhysRegUsed(RegInfo->getBaseRegister());
+ if (TRI->hasBasePointer(MF))
+ MF.getRegInfo().setPhysRegUsed(TRI->getBaseRegister());
}
static bool
@@ -1532,11 +1494,7 @@ static const uint64_t kSplitStackAvailable = 256;
void X86FrameLowering::adjustForSegmentedStacks(
MachineFunction &MF, MachineBasicBlock &PrologueMBB) const {
MachineFrameInfo *MFI = MF.getFrameInfo();
- const X86Subtarget &STI = MF.getSubtarget<X86Subtarget>();
- const TargetInstrInfo &TII = *STI.getInstrInfo();
uint64_t StackSize;
- bool Is64Bit = STI.is64Bit();
- const bool IsLP64 = STI.isTarget64BitLP64();
unsigned TlsReg, TlsOffset;
DebugLoc DL;
@@ -1782,12 +1740,7 @@ void X86FrameLowering::adjustForSegmentedStacks(
/// if( temp0 < SP_LIMIT(P) ) goto IncStack else goto OldStart
void X86FrameLowering::adjustForHiPEPrologue(
MachineFunction &MF, MachineBasicBlock &PrologueMBB) const {
- const X86Subtarget &STI = MF.getSubtarget<X86Subtarget>();
- const TargetInstrInfo &TII = *STI.getInstrInfo();
MachineFrameInfo *MFI = MF.getFrameInfo();
- const unsigned SlotSize = STI.getRegisterInfo()->getSlotSize();
- const bool Is64Bit = STI.is64Bit();
- const bool IsLP64 = STI.isTarget64BitLP64();
DebugLoc DL;
// HiPE-specific values
const unsigned HipeLeafWords = 24;
@@ -1915,14 +1868,9 @@ void X86FrameLowering::adjustForHiPEPrologue(
void X86FrameLowering::
eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
MachineBasicBlock::iterator I) const {
- const X86Subtarget &STI = MF.getSubtarget<X86Subtarget>();
- const TargetInstrInfo &TII = *STI.getInstrInfo();
- const X86RegisterInfo &RegInfo = *STI.getRegisterInfo();
- unsigned StackPtr = RegInfo.getStackRegister();
bool reserveCallFrame = hasReservedCallFrame(MF);
unsigned Opcode = I->getOpcode();
bool isDestroy = Opcode == TII.getCallFrameDestroyOpcode();
- bool IsLP64 = STI.isTarget64BitLP64();
DebugLoc DL = I->getDebugLoc();
uint64_t Amount = !reserveCallFrame ? I->getOperand(0).getImm() : 0;
uint64_t InternalAmt = (isDestroy || Amount) ? I->getOperand(1).getImm() : 0;
@@ -1941,54 +1889,29 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
unsigned StackAlign = getStackAlignment();
Amount = RoundUpToAlignment(Amount, StackAlign);
- MachineInstr *New = nullptr;
-
// Factor out the amount that gets handled inside the sequence
// (Pushes of argument for frame setup, callee pops for frame destroy)
Amount -= InternalAmt;
if (Amount) {
- if (Opcode == TII.getCallFrameSetupOpcode()) {
- New = BuildMI(MF, DL, TII.get(getSUBriOpcode(IsLP64, Amount)), StackPtr)
- .addReg(StackPtr).addImm(Amount);
- } else {
- assert(Opcode == TII.getCallFrameDestroyOpcode());
-
- unsigned Opc = getADDriOpcode(IsLP64, Amount);
- New = BuildMI(MF, DL, TII.get(Opc), StackPtr)
- .addReg(StackPtr).addImm(Amount);
- }
+ // Add Amount to SP to destroy a frame, and subtract to setup.
+ int Offset = isDestroy ? Amount : -Amount;
+ BuildStackAdjustment(MBB, I, DL, Offset, /*InEpilogue=*/false);
}
-
- if (New) {
- // The EFLAGS implicit def is dead.
- New->getOperand(3).setIsDead();
-
- // Replace the pseudo instruction with a new instruction.
- MBB.insert(I, New);
- }
-
return;
}
- if (Opcode == TII.getCallFrameDestroyOpcode() && InternalAmt) {
+ if (isDestroy && InternalAmt) {
// If we are performing frame pointer elimination and if the callee pops
// something off the stack pointer, add it back. We do this until we have
// more advanced stack pointer tracking ability.
- unsigned Opc = getSUBriOpcode(IsLP64, InternalAmt);
- MachineInstr *New = BuildMI(MF, DL, TII.get(Opc), StackPtr)
- .addReg(StackPtr).addImm(InternalAmt);
-
- // The EFLAGS implicit def is dead.
- New->getOperand(3).setIsDead();
-
// We are not tracking the stack pointer adjustment by the callee, so make
// sure we restore the stack pointer immediately after the call, there may
// be spill code inserted between the CALL and ADJCALLSTACKUP instructions.
MachineBasicBlock::iterator B = MBB.begin();
while (I != B && !std::prev(I)->isCall())
--I;
- MBB.insert(I, New);
+ BuildStackAdjustment(MBB, I, DL, -InternalAmt, /*InEpilogue=*/false);
}
}
diff --git a/lib/Target/X86/X86FrameLowering.h b/lib/Target/X86/X86FrameLowering.h
index 5d03b4db45c1..2858e86cd0e0 100644
--- a/lib/Target/X86/X86FrameLowering.h
+++ b/lib/Target/X86/X86FrameLowering.h
@@ -18,16 +18,40 @@
namespace llvm {
+class MachineInstrBuilder;
+class MCCFIInstruction;
+class X86Subtarget;
+class X86RegisterInfo;
+
class X86FrameLowering : public TargetFrameLowering {
public:
- explicit X86FrameLowering(StackDirection D, unsigned StackAl, int LAO)
- : TargetFrameLowering(StackGrowsDown, StackAl, LAO) {}
+ X86FrameLowering(const X86Subtarget &STI, unsigned StackAlignOverride);
+
+ // Cached subtarget predicates.
+
+ const X86Subtarget &STI;
+ const TargetInstrInfo &TII;
+ const X86RegisterInfo *TRI;
+
+ unsigned SlotSize;
+
+ /// Is64Bit implies that x86_64 instructions are available.
+ bool Is64Bit;
+
+ bool IsLP64;
+
+ /// True if the 64-bit frame or stack pointer should be used. True for most
+ /// 64-bit targets with the exception of x32. If this is false, 32-bit
+ /// instruction operands should be used to manipulate StackPtr and FramePtr.
+ bool Uses64BitFramePtr;
+
+ unsigned StackPtr;
/// Emit a call to the target's stack probe function. This is required for all
/// large stack allocations on Windows. The caller is required to materialize
/// the number of bytes to probe in RAX/EAX.
- static void emitStackProbeCall(MachineFunction &MF, MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MBBI, DebugLoc DL);
+ void emitStackProbeCall(MachineFunction &MF, MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI, DebugLoc DL) const;
void emitCalleeSavedFrameMoves(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI,
@@ -83,18 +107,13 @@ public:
/// it is an ADD/SUB/LEA instruction it is deleted argument and the
/// stack adjustment is returned as a positive value for ADD/LEA and
/// a negative for SUB.
- static int mergeSPUpdates(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator &MBBI,
- unsigned StackPtr, bool doMergeWithPrevious);
+ int mergeSPUpdates(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI,
+ bool doMergeWithPrevious) const;
/// Emit a series of instructions to increment / decrement the stack
/// pointer by a constant value.
- static void emitSPUpdate(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator &MBBI, unsigned StackPtr,
- int64_t NumBytes, bool Is64BitTarget,
- bool Is64BitStackPtr, bool UseLEA,
- const TargetInstrInfo &TII,
- const TargetRegisterInfo &TRI);
+ void emitSPUpdate(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI,
+ int64_t NumBytes, bool InEpilogue) const;
/// Check that LEA can be used on SP in an epilogue sequence for \p MF.
bool canUseLEAForSPInEpilogue(const MachineFunction &MF) const;
@@ -115,8 +134,25 @@ private:
MachineBasicBlock &MBB,
MachineBasicBlock::iterator I,
uint64_t Amount) const;
+
+ uint64_t calculateMaxStackAlign(const MachineFunction &MF) const;
+
+ /// Wraps up getting a CFI index and building a MachineInstr for it.
+ void BuildCFI(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
+ DebugLoc DL, MCCFIInstruction CFIInst) const;
+
+ /// Aligns the stack pointer by ANDing it with -MaxAlign.
+ void BuildStackAlignAND(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI, DebugLoc DL,
+ uint64_t MaxAlign) const;
+
+ /// Adjusts the stack pointer using LEA, SUB, or ADD.
+ MachineInstrBuilder BuildStackAdjustment(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ DebugLoc DL, int64_t Offset,
+ bool InEpilogue) const;
};
-} // End llvm namespace
+} // namespace llvm
#endif
diff --git a/lib/Target/X86/X86ISelDAGToDAG.cpp b/lib/Target/X86/X86ISelDAGToDAG.cpp
index de591091f1ae..f6785e161188 100644
--- a/lib/Target/X86/X86ISelDAGToDAG.cpp
+++ b/lib/Target/X86/X86ISelDAGToDAG.cpp
@@ -138,7 +138,7 @@ namespace {
}
#endif
};
-}
+} // namespace
namespace {
//===--------------------------------------------------------------------===//
@@ -310,7 +310,7 @@ namespace {
return true;
}
};
-}
+} // namespace
bool
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
index e3ec288a683e..ce1ca20ee81a 100644
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -915,6 +915,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::FP_TO_SINT, MVT::v4i32, Legal);
setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Legal);
+ setOperationAction(ISD::SINT_TO_FP, MVT::v2i32, Custom);
+
setOperationAction(ISD::UINT_TO_FP, MVT::v4i8, Custom);
setOperationAction(ISD::UINT_TO_FP, MVT::v4i16, Custom);
// As there is no 64-bit GPR available, we need build a special custom
@@ -2233,7 +2235,9 @@ static bool IsCCallConvention(CallingConv::ID CC) {
}
bool X86TargetLowering::mayBeEmittedAsTailCall(CallInst *CI) const {
- if (!CI->isTailCall() || getTargetMachine().Options.DisableTailCalls)
+ auto Attr =
+ CI->getParent()->getParent()->getFnAttribute("disable-tail-calls");
+ if (!CI->isTailCall() || Attr.getValueAsString() == "true")
return false;
CallSite CS(CI);
@@ -2762,8 +2766,9 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
StructReturnType SR = callIsStructReturn(Outs);
bool IsSibcall = false;
X86MachineFunctionInfo *X86Info = MF.getInfo<X86MachineFunctionInfo>();
+ auto Attr = MF.getFunction()->getFnAttribute("disable-tail-calls");
- if (MF.getTarget().Options.DisableTailCalls)
+ if (Attr.getValueAsString() == "true")
isTailCall = false;
if (Subtarget->isPICStyleGOT() &&
@@ -5441,7 +5446,7 @@ static bool isHorizontalBinOp(const BuildVectorSDNode *N, unsigned Opcode,
///
/// Otherwise, the first horizontal binop dag node takes as input the lower
/// 128-bit of V0 and the lower 128-bit of V1, and the second horizontal binop
-/// dag node takes the the upper 128-bit of V0 and the upper 128-bit of V1.
+/// dag node takes the upper 128-bit of V0 and the upper 128-bit of V1.
/// Example:
/// HADD V0_LO, V1_LO
/// HADD V0_HI, V1_HI
@@ -6353,7 +6358,7 @@ static SDValue getV4X86ShuffleImm8ForMask(ArrayRef<int> Mask, SDLoc DL,
///
/// This helper function produces an 8-bit shuffle immediate corresponding to
/// the ubiquitous shuffle encoding scheme used in x86 instructions for
-/// shuffling 8 lanes.
+/// shuffling 8 lanes.
static SDValue get1bitLaneShuffleImm8ForMask(ArrayRef<int> Mask, SDLoc DL,
SelectionDAG &DAG) {
assert(Mask.size() <= 8 &&
@@ -9380,6 +9385,30 @@ static SDValue lowerV2X128VectorShuffle(SDLoc DL, MVT VT, SDValue V1,
DAG.getConstant(PermMask, DL, MVT::i8));
}
+/// \brief Handle lowering 4-lane 128-bit shuffles.
+static SDValue lowerV4X128VectorShuffle(SDLoc DL, MVT VT, SDValue V1,
+ SDValue V2, ArrayRef<int> WidenedMask,
+ SelectionDAG &DAG) {
+
+ assert(WidenedMask.size() == 4 && "Unexpected mask size for 128bit shuffle!");
+ // form a 128-bit permutation.
+ // convert the 64-bit shuffle mask selection values into 128-bit selection
+ // bits defined by a vshuf64x2 instruction's immediate control byte.
+ unsigned PermMask = 0, Imm = 0;
+
+ for (int i = 0, Size = WidenedMask.size(); i < Size; ++i) {
+ if(WidenedMask[i] == SM_SentinelZero)
+ return SDValue();
+
+ // use first element in place of undef musk
+ Imm = (WidenedMask[i] == SM_SentinelUndef) ? 0 : WidenedMask[i];
+ PermMask |= (Imm % 4) << (i * 2);
+ }
+
+ return DAG.getNode(X86ISD::SHUF128, DL, VT, V1, V2,
+ DAG.getConstant(PermMask, DL, MVT::i8));
+}
+
/// \brief Lower a vector shuffle by first fixing the 128-bit lanes and then
/// shuffling each lane.
///
@@ -10173,6 +10202,10 @@ static SDValue lowerV8X64VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
ArrayRef<int> Mask = SVOp->getMask();
assert(Mask.size() == 8 && "Unexpected mask size for v8 shuffle!");
+ SmallVector<int, 4> WidenedMask;
+ if (canWidenShuffleElements(Mask, WidenedMask))
+ if(SDValue Op = lowerV4X128VectorShuffle(DL, VT, V1, V2, WidenedMask, DAG))
+ return Op;
// X86 has dedicated unpack instructions that can handle specific blend
// operations: UNPCKH and UNPCKL.
if (isShuffleEquivalent(V1, V2, Mask, {0, 8, 2, 10, 4, 12, 6, 14}))
@@ -11023,9 +11056,8 @@ static SDValue LowerINSERT_SUBVECTOR(SDValue Op, const X86Subtarget *Subtarget,
if (auto *Idx2 = dyn_cast<ConstantSDNode>(Vec.getOperand(2))) {
if (Idx2->getZExtValue() == 0) {
SDValue Ops[] = { SubVec2, SubVec };
- SDValue LD = EltsFromConsecutiveLoads(OpVT, Ops, dl, DAG, false);
- if (LD.getNode())
- return LD;
+ if (SDValue Ld = EltsFromConsecutiveLoads(OpVT, Ops, dl, DAG, false))
+ return Ld;
}
}
}
@@ -11617,15 +11649,21 @@ static SDValue LowerShiftParts(SDValue Op, SelectionDAG &DAG) {
SDValue X86TargetLowering::LowerSINT_TO_FP(SDValue Op,
SelectionDAG &DAG) const {
- MVT SrcVT = Op.getOperand(0).getSimpleValueType();
+ SDValue Src = Op.getOperand(0);
+ MVT SrcVT = Src.getSimpleValueType();
+ MVT VT = Op.getSimpleValueType();
SDLoc dl(Op);
if (SrcVT.isVector()) {
+ if (SrcVT == MVT::v2i32 && VT == MVT::v2f64) {
+ return DAG.getNode(X86ISD::CVTDQ2PD, dl, VT,
+ DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v4i32, Src,
+ DAG.getUNDEF(SrcVT)));
+ }
if (SrcVT.getVectorElementType() == MVT::i1) {
MVT IntegerVT = MVT::getVectorVT(MVT::i32, SrcVT.getVectorNumElements());
return DAG.getNode(ISD::SINT_TO_FP, dl, Op.getValueType(),
- DAG.getNode(ISD::SIGN_EXTEND, dl, IntegerVT,
- Op.getOperand(0)));
+ DAG.getNode(ISD::SIGN_EXTEND, dl, IntegerVT, Src));
}
return SDValue();
}
@@ -13018,11 +13056,11 @@ SDValue X86TargetLowering::getRsqrtEstimate(SDValue Op,
RecipOp = "vec-sqrtf";
else
return SDValue();
-
+
TargetRecip Recips = DCI.DAG.getTarget().Options.Reciprocals;
if (!Recips.isEnabled(RecipOp))
return SDValue();
-
+
RefinementSteps = Recips.getRefinementSteps(RecipOp);
UseOneConstNR = false;
return DCI.DAG.getNode(X86ISD::FRSQRT, SDLoc(Op), VT, Op);
@@ -13035,7 +13073,7 @@ SDValue X86TargetLowering::getRecipEstimate(SDValue Op,
unsigned &RefinementSteps) const {
EVT VT = Op.getValueType();
const char *RecipOp;
-
+
// SSE1 has rcpss and rcpps. AVX adds a 256-bit variant for rcpps.
// TODO: Add support for AVX512 (v16f32).
// It is likely not profitable to do this for f64 because a double-precision
@@ -13050,7 +13088,7 @@ SDValue X86TargetLowering::getRecipEstimate(SDValue Op,
RecipOp = "vec-divf";
else
return SDValue();
-
+
TargetRecip Recips = DCI.DAG.getTarget().Options.Reciprocals;
if (!Recips.isEnabled(RecipOp))
return SDValue();
@@ -13236,13 +13274,13 @@ static SDValue LowerBoolVSETCC_AVX512(SDValue Op, SelectionDAG &DAG) {
DAG.getConstant(-1, dl, VT));
switch (SetCCOpcode) {
default: llvm_unreachable("Unexpected SETCC condition");
- case ISD::SETNE:
- // (x != y) -> ~(x ^ y)
+ case ISD::SETEQ:
+ // (x == y) -> ~(x ^ y)
return DAG.getNode(ISD::XOR, dl, VT,
DAG.getNode(ISD::XOR, dl, VT, Op0, Op1),
DAG.getConstant(-1, dl, VT));
- case ISD::SETEQ:
- // (x == y) -> (x ^ y)
+ case ISD::SETNE:
+ // (x != y) -> (x ^ y)
return DAG.getNode(ISD::XOR, dl, VT, Op0, Op1);
case ISD::SETUGT:
case ISD::SETGT:
@@ -15107,7 +15145,7 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, const X86Subtarget *Subtarget
unsigned IntrWithRoundingModeOpcode = IntrData->Opc1;
if (IntrWithRoundingModeOpcode != 0) {
unsigned Round = cast<ConstantSDNode>(RoundingMode)->getZExtValue();
- if (Round != X86::STATIC_ROUNDING::CUR_DIRECTION)
+ if (Round != X86::STATIC_ROUNDING::CUR_DIRECTION)
return getVectorMaskingNode(DAG.getNode(IntrWithRoundingModeOpcode,
dl, Op.getValueType(), Src, RoundingMode),
Mask, PassThru, Subtarget, DAG);
@@ -15687,14 +15725,49 @@ static SDValue LowerREADCYCLECOUNTER(SDValue Op, const X86Subtarget *Subtarget,
return DAG.getMergeValues(Results, DL);
}
+static SDValue LowerEXCEPTIONINFO(SDValue Op, const X86Subtarget *Subtarget,
+ SelectionDAG &DAG) {
+ MachineFunction &MF = DAG.getMachineFunction();
+ SDLoc dl(Op);
+ SDValue FnOp = Op.getOperand(2);
+ SDValue FPOp = Op.getOperand(3);
+
+ // Compute the symbol for the parent EH registration. We know it'll get
+ // emitted later.
+ auto *Fn = cast<Function>(cast<GlobalAddressSDNode>(FnOp)->getGlobal());
+ MCSymbol *ParentFrameSym =
+ MF.getMMI().getContext().getOrCreateParentFrameOffsetSymbol(
+ GlobalValue::getRealLinkageName(Fn->getName()));
+ StringRef Name = ParentFrameSym->getName();
+ assert(Name.data()[Name.size()] == '\0' && "not null terminated");
+
+ // Create a TargetExternalSymbol for the label to avoid any target lowering
+ // that would make this PC relative.
+ MVT PtrVT = Op.getSimpleValueType();
+ SDValue OffsetSym = DAG.getTargetExternalSymbol(Name.data(), PtrVT);
+ SDValue OffsetVal =
+ DAG.getNode(ISD::FRAME_ALLOC_RECOVER, dl, PtrVT, OffsetSym);
+
+ // Add the offset to the FP.
+ SDValue Add = DAG.getNode(ISD::ADD, dl, PtrVT, FPOp, OffsetVal);
+
+ // Load the second field of the struct, which is 4 bytes in. See
+ // WinEHStatePass for more info.
+ Add = DAG.getNode(ISD::ADD, dl, PtrVT, Add, DAG.getConstant(4, dl, PtrVT));
+ return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Add, MachinePointerInfo(),
+ false, false, false, 0);
+}
static SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, const X86Subtarget *Subtarget,
SelectionDAG &DAG) {
unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
const IntrinsicData* IntrData = getIntrinsicWithChain(IntNo);
- if (!IntrData)
+ if (!IntrData) {
+ if (IntNo == Intrinsic::x86_seh_exceptioninfo)
+ return LowerEXCEPTIONINFO(Op, Subtarget, DAG);
return SDValue();
+ }
SDLoc dl(Op);
switch(IntrData->Type) {
@@ -16464,6 +16537,8 @@ static SDValue LowerMUL(SDValue Op, const X86Subtarget *Subtarget,
SDValue Ahi = getTargetVShiftByConstNode(X86ISD::VSRLI, dl, VT, A, 32, DAG);
SDValue Bhi = getTargetVShiftByConstNode(X86ISD::VSRLI, dl, VT, B, 32, DAG);
+ SDValue AhiBlo = Ahi;
+ SDValue AloBhi = Bhi;
// Bit cast to 32-bit vectors for MULUDQ
EVT MulVT = (VT == MVT::v2i64) ? MVT::v4i32 :
(VT == MVT::v4i64) ? MVT::v8i32 : MVT::v16i32;
@@ -16473,11 +16548,15 @@ static SDValue LowerMUL(SDValue Op, const X86Subtarget *Subtarget,
Bhi = DAG.getBitcast(MulVT, Bhi);
SDValue AloBlo = DAG.getNode(X86ISD::PMULUDQ, dl, VT, A, B);
- SDValue AloBhi = DAG.getNode(X86ISD::PMULUDQ, dl, VT, A, Bhi);
- SDValue AhiBlo = DAG.getNode(X86ISD::PMULUDQ, dl, VT, Ahi, B);
-
- AloBhi = getTargetVShiftByConstNode(X86ISD::VSHLI, dl, VT, AloBhi, 32, DAG);
- AhiBlo = getTargetVShiftByConstNode(X86ISD::VSHLI, dl, VT, AhiBlo, 32, DAG);
+ // After shifting right const values the result may be all-zero.
+ if (!ISD::isBuildVectorAllZeros(Ahi.getNode())) {
+ AhiBlo = DAG.getNode(X86ISD::PMULUDQ, dl, VT, Ahi, B);
+ AhiBlo = getTargetVShiftByConstNode(X86ISD::VSHLI, dl, VT, AhiBlo, 32, DAG);
+ }
+ if (!ISD::isBuildVectorAllZeros(Bhi.getNode())) {
+ AloBhi = DAG.getNode(X86ISD::PMULUDQ, dl, VT, A, Bhi);
+ AloBhi = getTargetVShiftByConstNode(X86ISD::VSHLI, dl, VT, AloBhi, 32, DAG);
+ }
SDValue Res = DAG.getNode(ISD::ADD, dl, VT, AloBlo, AloBhi);
return DAG.getNode(ISD::ADD, dl, VT, Res, AhiBlo);
@@ -16992,36 +17071,111 @@ static SDValue LowerShift(SDValue Op, const X86Subtarget* Subtarget,
}
}
- if (VT == MVT::v16i8 && Op->getOpcode() == ISD::SHL) {
- // Turn 'a' into a mask suitable for VSELECT: a = a << 5;
- Op = DAG.getNode(ISD::SHL, dl, VT, Amt, DAG.getConstant(5, dl, VT));
-
- SDValue VSelM = DAG.getConstant(0x80, dl, VT);
- SDValue OpVSel = DAG.getNode(ISD::AND, dl, VT, VSelM, Op);
- OpVSel = DAG.getNode(X86ISD::PCMPEQ, dl, VT, OpVSel, VSelM);
-
- // r = VSELECT(r, shl(r, 4), a);
- SDValue M = DAG.getNode(ISD::SHL, dl, VT, R, DAG.getConstant(4, dl, VT));
- R = DAG.getNode(ISD::VSELECT, dl, VT, OpVSel, M, R);
-
- // a += a
- Op = DAG.getNode(ISD::ADD, dl, VT, Op, Op);
- OpVSel = DAG.getNode(ISD::AND, dl, VT, VSelM, Op);
- OpVSel = DAG.getNode(X86ISD::PCMPEQ, dl, VT, OpVSel, VSelM);
+ if (VT == MVT::v16i8 || (VT == MVT::v32i8 && Subtarget->hasInt256())) {
+ MVT ExtVT = MVT::getVectorVT(MVT::i16, VT.getVectorNumElements() / 2);
+ unsigned ShiftOpcode = Op->getOpcode();
- // r = VSELECT(r, shl(r, 2), a);
- M = DAG.getNode(ISD::SHL, dl, VT, R, DAG.getConstant(2, dl, VT));
- R = DAG.getNode(ISD::VSELECT, dl, VT, OpVSel, M, R);
+ auto SignBitSelect = [&](MVT SelVT, SDValue Sel, SDValue V0, SDValue V1) {
+ // On SSE41 targets we make use of the fact that VSELECT lowers
+ // to PBLENDVB which selects bytes based just on the sign bit.
+ if (Subtarget->hasSSE41()) {
+ V0 = DAG.getBitcast(VT, V0);
+ V1 = DAG.getBitcast(VT, V1);
+ Sel = DAG.getBitcast(VT, Sel);
+ return DAG.getBitcast(SelVT,
+ DAG.getNode(ISD::VSELECT, dl, VT, Sel, V0, V1));
+ }
+ // On pre-SSE41 targets we test for the sign bit by comparing to
+ // zero - a negative value will set all bits of the lanes to true
+ // and VSELECT uses that in its OR(AND(V0,C),AND(V1,~C)) lowering.
+ SDValue Z = getZeroVector(SelVT, Subtarget, DAG, dl);
+ SDValue C = DAG.getNode(X86ISD::PCMPGT, dl, SelVT, Z, Sel);
+ return DAG.getNode(ISD::VSELECT, dl, SelVT, C, V0, V1);
+ };
- // a += a
- Op = DAG.getNode(ISD::ADD, dl, VT, Op, Op);
- OpVSel = DAG.getNode(ISD::AND, dl, VT, VSelM, Op);
- OpVSel = DAG.getNode(X86ISD::PCMPEQ, dl, VT, OpVSel, VSelM);
+ // Turn 'a' into a mask suitable for VSELECT: a = a << 5;
+ // We can safely do this using i16 shifts as we're only interested in
+ // the 3 lower bits of each byte.
+ Amt = DAG.getBitcast(ExtVT, Amt);
+ Amt = DAG.getNode(ISD::SHL, dl, ExtVT, Amt, DAG.getConstant(5, dl, ExtVT));
+ Amt = DAG.getBitcast(VT, Amt);
+
+ if (Op->getOpcode() == ISD::SHL || Op->getOpcode() == ISD::SRL) {
+ // r = VSELECT(r, shift(r, 4), a);
+ SDValue M =
+ DAG.getNode(ShiftOpcode, dl, VT, R, DAG.getConstant(4, dl, VT));
+ R = SignBitSelect(VT, Amt, M, R);
+
+ // a += a
+ Amt = DAG.getNode(ISD::ADD, dl, VT, Amt, Amt);
+
+ // r = VSELECT(r, shift(r, 2), a);
+ M = DAG.getNode(ShiftOpcode, dl, VT, R, DAG.getConstant(2, dl, VT));
+ R = SignBitSelect(VT, Amt, M, R);
+
+ // a += a
+ Amt = DAG.getNode(ISD::ADD, dl, VT, Amt, Amt);
+
+ // return VSELECT(r, shift(r, 1), a);
+ M = DAG.getNode(ShiftOpcode, dl, VT, R, DAG.getConstant(1, dl, VT));
+ R = SignBitSelect(VT, Amt, M, R);
+ return R;
+ }
- // return VSELECT(r, r+r, a);
- R = DAG.getNode(ISD::VSELECT, dl, VT, OpVSel,
- DAG.getNode(ISD::ADD, dl, VT, R, R), R);
- return R;
+ if (Op->getOpcode() == ISD::SRA) {
+ // For SRA we need to unpack each byte to the higher byte of a i16 vector
+ // so we can correctly sign extend. We don't care what happens to the
+ // lower byte.
+ SDValue ALo = DAG.getNode(X86ISD::UNPCKL, dl, VT, DAG.getUNDEF(VT), Amt);
+ SDValue AHi = DAG.getNode(X86ISD::UNPCKH, dl, VT, DAG.getUNDEF(VT), Amt);
+ SDValue RLo = DAG.getNode(X86ISD::UNPCKL, dl, VT, DAG.getUNDEF(VT), R);
+ SDValue RHi = DAG.getNode(X86ISD::UNPCKH, dl, VT, DAG.getUNDEF(VT), R);
+ ALo = DAG.getBitcast(ExtVT, ALo);
+ AHi = DAG.getBitcast(ExtVT, AHi);
+ RLo = DAG.getBitcast(ExtVT, RLo);
+ RHi = DAG.getBitcast(ExtVT, RHi);
+
+ // r = VSELECT(r, shift(r, 4), a);
+ SDValue MLo = DAG.getNode(ShiftOpcode, dl, ExtVT, RLo,
+ DAG.getConstant(4, dl, ExtVT));
+ SDValue MHi = DAG.getNode(ShiftOpcode, dl, ExtVT, RHi,
+ DAG.getConstant(4, dl, ExtVT));
+ RLo = SignBitSelect(ExtVT, ALo, MLo, RLo);
+ RHi = SignBitSelect(ExtVT, AHi, MHi, RHi);
+
+ // a += a
+ ALo = DAG.getNode(ISD::ADD, dl, ExtVT, ALo, ALo);
+ AHi = DAG.getNode(ISD::ADD, dl, ExtVT, AHi, AHi);
+
+ // r = VSELECT(r, shift(r, 2), a);
+ MLo = DAG.getNode(ShiftOpcode, dl, ExtVT, RLo,
+ DAG.getConstant(2, dl, ExtVT));
+ MHi = DAG.getNode(ShiftOpcode, dl, ExtVT, RHi,
+ DAG.getConstant(2, dl, ExtVT));
+ RLo = SignBitSelect(ExtVT, ALo, MLo, RLo);
+ RHi = SignBitSelect(ExtVT, AHi, MHi, RHi);
+
+ // a += a
+ ALo = DAG.getNode(ISD::ADD, dl, ExtVT, ALo, ALo);
+ AHi = DAG.getNode(ISD::ADD, dl, ExtVT, AHi, AHi);
+
+ // r = VSELECT(r, shift(r, 1), a);
+ MLo = DAG.getNode(ShiftOpcode, dl, ExtVT, RLo,
+ DAG.getConstant(1, dl, ExtVT));
+ MHi = DAG.getNode(ShiftOpcode, dl, ExtVT, RHi,
+ DAG.getConstant(1, dl, ExtVT));
+ RLo = SignBitSelect(ExtVT, ALo, MLo, RLo);
+ RHi = SignBitSelect(ExtVT, AHi, MHi, RHi);
+
+ // Logical shift the result back to the lower byte, leaving a zero upper
+ // byte
+ // meaning that we can safely pack with PACKUSWB.
+ RLo =
+ DAG.getNode(ISD::SRL, dl, ExtVT, RLo, DAG.getConstant(8, dl, ExtVT));
+ RHi =
+ DAG.getNode(ISD::SRL, dl, ExtVT, RHi, DAG.getConstant(8, dl, ExtVT));
+ return DAG.getNode(X86ISD::PACKUS, dl, VT, RLo, RHi);
+ }
}
// It's worth extending once and using the v8i32 shifts for 16-bit types, but
@@ -17055,6 +17209,67 @@ static SDValue LowerShift(SDValue Op, const X86Subtarget* Subtarget,
return DAG.getNode(X86ISD::PACKUS, dl, VT, Lo, Hi);
}
+ if (VT == MVT::v8i16) {
+ unsigned ShiftOpcode = Op->getOpcode();
+
+ auto SignBitSelect = [&](SDValue Sel, SDValue V0, SDValue V1) {
+ // On SSE41 targets we make use of the fact that VSELECT lowers
+ // to PBLENDVB which selects bytes based just on the sign bit.
+ if (Subtarget->hasSSE41()) {
+ MVT ExtVT = MVT::getVectorVT(MVT::i8, VT.getVectorNumElements() * 2);
+ V0 = DAG.getBitcast(ExtVT, V0);
+ V1 = DAG.getBitcast(ExtVT, V1);
+ Sel = DAG.getBitcast(ExtVT, Sel);
+ return DAG.getBitcast(
+ VT, DAG.getNode(ISD::VSELECT, dl, ExtVT, Sel, V0, V1));
+ }
+ // On pre-SSE41 targets we splat the sign bit - a negative value will
+ // set all bits of the lanes to true and VSELECT uses that in
+ // its OR(AND(V0,C),AND(V1,~C)) lowering.
+ SDValue C =
+ DAG.getNode(ISD::SRA, dl, VT, Sel, DAG.getConstant(15, dl, VT));
+ return DAG.getNode(ISD::VSELECT, dl, VT, C, V0, V1);
+ };
+
+ // Turn 'a' into a mask suitable for VSELECT: a = a << 12;
+ if (Subtarget->hasSSE41()) {
+ // On SSE41 targets we need to replicate the shift mask in both
+ // bytes for PBLENDVB.
+ Amt = DAG.getNode(
+ ISD::OR, dl, VT,
+ DAG.getNode(ISD::SHL, dl, VT, Amt, DAG.getConstant(4, dl, VT)),
+ DAG.getNode(ISD::SHL, dl, VT, Amt, DAG.getConstant(12, dl, VT)));
+ } else {
+ Amt = DAG.getNode(ISD::SHL, dl, VT, Amt, DAG.getConstant(12, dl, VT));
+ }
+
+ // r = VSELECT(r, shift(r, 8), a);
+ SDValue M = DAG.getNode(ShiftOpcode, dl, VT, R, DAG.getConstant(8, dl, VT));
+ R = SignBitSelect(Amt, M, R);
+
+ // a += a
+ Amt = DAG.getNode(ISD::ADD, dl, VT, Amt, Amt);
+
+ // r = VSELECT(r, shift(r, 4), a);
+ M = DAG.getNode(ShiftOpcode, dl, VT, R, DAG.getConstant(4, dl, VT));
+ R = SignBitSelect(Amt, M, R);
+
+ // a += a
+ Amt = DAG.getNode(ISD::ADD, dl, VT, Amt, Amt);
+
+ // r = VSELECT(r, shift(r, 2), a);
+ M = DAG.getNode(ShiftOpcode, dl, VT, R, DAG.getConstant(2, dl, VT));
+ R = SignBitSelect(Amt, M, R);
+
+ // a += a
+ Amt = DAG.getNode(ISD::ADD, dl, VT, Amt, Amt);
+
+ // return VSELECT(r, shift(r, 1), a);
+ M = DAG.getNode(ShiftOpcode, dl, VT, R, DAG.getConstant(1, dl, VT));
+ R = SignBitSelect(Amt, M, R);
+ return R;
+ }
+
// Decompose 256-bit shifts into smaller 128-bit shifts.
if (VT.is256BitVector()) {
unsigned NumElems = VT.getVectorNumElements();
@@ -18290,6 +18505,7 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
case X86ISD::VINSERT: return "X86ISD::VINSERT";
case X86ISD::VFPEXT: return "X86ISD::VFPEXT";
case X86ISD::VFPROUND: return "X86ISD::VFPROUND";
+ case X86ISD::CVTDQ2PD: return "X86ISD::CVTDQ2PD";
case X86ISD::VSHLDQ: return "X86ISD::VSHLDQ";
case X86ISD::VSRLDQ: return "X86ISD::VSRLDQ";
case X86ISD::VSHL: return "X86ISD::VSHL";
@@ -18404,6 +18620,9 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
case X86ISD::FGETEXP_RND: return "X86ISD::FGETEXP_RND";
case X86ISD::ADDS: return "X86ISD::ADDS";
case X86ISD::SUBS: return "X86ISD::SUBS";
+ case X86ISD::AVG: return "X86ISD::AVG";
+ case X86ISD::SINT_TO_FP_RND: return "X86ISD::SINT_TO_FP_RND";
+ case X86ISD::UINT_TO_FP_RND: return "X86ISD::UINT_TO_FP_RND";
}
return nullptr;
}
@@ -19464,7 +19683,8 @@ X86TargetLowering::EmitLoweredWinAlloca(MachineInstr *MI,
assert(!Subtarget->isTargetMachO());
- X86FrameLowering::emitStackProbeCall(*BB->getParent(), *BB, MI, DL);
+ Subtarget->getFrameLowering()->emitStackProbeCall(*BB->getParent(), *BB, MI,
+ DL);
MI->eraseFromParent(); // The pseudo instruction is gone now.
return BB;
@@ -24019,7 +24239,7 @@ static SDValue PerformSExtCombine(SDNode *N, SelectionDAG &DAG,
SDValue N0 = N->getOperand(0);
EVT VT = N->getValueType(0);
EVT SVT = VT.getScalarType();
- EVT InVT = N0->getValueType(0);
+ EVT InVT = N0.getValueType();
EVT InSVT = InVT.getScalarType();
SDLoc DL(N);
@@ -24037,7 +24257,7 @@ static SDValue PerformSExtCombine(SDNode *N, SelectionDAG &DAG,
}
if (!DCI.isBeforeLegalizeOps()) {
- if (N0.getValueType() == MVT::i1) {
+ if (InVT == MVT::i1) {
SDValue Zero = DAG.getConstant(0, DL, VT);
SDValue AllOnes =
DAG.getConstant(APInt::getAllOnesValue(VT.getSizeInBits()), DL, VT);
@@ -24048,7 +24268,7 @@ static SDValue PerformSExtCombine(SDNode *N, SelectionDAG &DAG,
if (VT.isVector()) {
auto ExtendToVec128 = [&DAG](SDLoc DL, SDValue N) {
- EVT InVT = N->getValueType(0);
+ EVT InVT = N.getValueType();
EVT OutVT = EVT::getVectorVT(*DAG.getContext(), InVT.getScalarType(),
128 / InVT.getScalarSizeInBits());
SmallVector<SDValue, 8> Opnds(128 / InVT.getSizeInBits(),
@@ -24470,18 +24690,19 @@ static SDValue PerformSINT_TO_FPCombine(SDNode *N, SelectionDAG &DAG,
const X86Subtarget *Subtarget) {
// First try to optimize away the conversion entirely when it's
// conditionally from a constant. Vectors only.
- SDValue Res = performVectorCompareAndMaskUnaryOpCombine(N, DAG);
- if (Res != SDValue())
+ if (SDValue Res = performVectorCompareAndMaskUnaryOpCombine(N, DAG))
return Res;
// Now move on to more general possibilities.
SDValue Op0 = N->getOperand(0);
EVT InVT = Op0->getValueType(0);
- // SINT_TO_FP(v4i8) -> SINT_TO_FP(SEXT(v4i8 to v4i32))
- if (InVT == MVT::v8i8 || InVT == MVT::v4i8) {
+ // SINT_TO_FP(vXi8) -> SINT_TO_FP(SEXT(vXi8 to vXi32))
+ // SINT_TO_FP(vXi16) -> SINT_TO_FP(SEXT(vXi16 to vXi32))
+ if (InVT == MVT::v8i8 || InVT == MVT::v4i8 ||
+ InVT == MVT::v8i16 || InVT == MVT::v4i16) {
SDLoc dl(N);
- MVT DstVT = InVT == MVT::v4i8 ? MVT::v4i32 : MVT::v8i32;
+ MVT DstVT = MVT::getVectorVT(MVT::i32, InVT.getVectorNumElements());
SDValue P = DAG.getNode(ISD::SIGN_EXTEND, dl, DstVT, Op0);
return DAG.getNode(ISD::SINT_TO_FP, dl, N->getValueType(0), P);
}
@@ -24490,7 +24711,7 @@ static SDValue PerformSINT_TO_FPCombine(SDNode *N, SelectionDAG &DAG,
// a 32-bit target where SSE doesn't support i64->FP operations.
if (Op0.getOpcode() == ISD::LOAD) {
LoadSDNode *Ld = cast<LoadSDNode>(Op0.getNode());
- EVT VT = Ld->getValueType(0);
+ EVT LdVT = Ld->getValueType(0);
// This transformation is not supported if the result type is f16
if (N->getValueType(0) == MVT::f16)
@@ -24498,9 +24719,9 @@ static SDValue PerformSINT_TO_FPCombine(SDNode *N, SelectionDAG &DAG,
if (!Ld->isVolatile() && !N->getValueType(0).isVector() &&
ISD::isNON_EXTLoad(Op0.getNode()) && Op0.hasOneUse() &&
- !Subtarget->is64Bit() && VT == MVT::i64) {
+ !Subtarget->is64Bit() && LdVT == MVT::i64) {
SDValue FILDChain = Subtarget->getTargetLowering()->BuildFILD(
- SDValue(N, 0), Ld->getValueType(0), Ld->getChain(), Op0, DAG);
+ SDValue(N, 0), LdVT, Ld->getChain(), Op0, DAG);
DAG.ReplaceAllUsesOfValueWith(Op0.getValue(1), FILDChain.getValue(1));
return FILDChain;
}
diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h
index b5d062f72b24..9c98333776cf 100644
--- a/lib/Target/X86/X86ISelLowering.h
+++ b/lib/Target/X86/X86ISelLowering.h
@@ -218,7 +218,8 @@ namespace llvm {
// Integer add/sub with signed saturation.
ADDS,
SUBS,
-
+ // Unsigned Integer average
+ AVG,
/// Integer horizontal add.
HADD,
@@ -293,6 +294,9 @@ namespace llvm {
// Vector FP round.
VFPROUND,
+ // Vector signed integer to double.
+ CVTDQ2PD,
+
// 128-bit vector logical left / right shift
VSHLDQ, VSRLDQ,
@@ -417,6 +421,10 @@ namespace llvm {
COMPRESS,
EXPAND,
+ //Convert Unsigned/Integer to Scalar Floating-Point Value
+ //with rounding mode
+ SINT_TO_FP_RND,
+ UINT_TO_FP_RND,
// Save xmm argument registers to the stack, according to %al. An operator
// is needed so that this can be expanded with control flow.
VASTART_SAVE_XMM_REGS,
@@ -508,7 +516,7 @@ namespace llvm {
// have memop! In fact, starting from ATOMADD64_DAG all opcodes will be
// thought as target memory ops!
};
- }
+ } // namespace X86ISD
/// Define some predicates that are used for node matching.
namespace X86 {
@@ -575,7 +583,7 @@ namespace llvm {
TO_ZERO = 3,
CUR_DIRECTION = 4
};
- }
+ } // namespace X86
//===--------------------------------------------------------------------===//
// X86 Implementation of the TargetLowering interface
@@ -1112,6 +1120,6 @@ namespace llvm {
FastISel *createFastISel(FunctionLoweringInfo &funcInfo,
const TargetLibraryInfo *libInfo);
}
-}
+} // namespace llvm
#endif // X86ISELLOWERING_H
diff --git a/lib/Target/X86/X86InstrAVX512.td b/lib/Target/X86/X86InstrAVX512.td
index c1d0aef07118..de6a83506b28 100644
--- a/lib/Target/X86/X86InstrAVX512.td
+++ b/lib/Target/X86/X86InstrAVX512.td
@@ -1058,118 +1058,87 @@ def : Pat<(v8i64 (X86VPermilpi VR512:$src1, (i8 imm:$imm))),
(VPERMILPDZri VR512:$src1, imm:$imm)>;
// -- VPERM2I - 3 source operands form --
-multiclass avx512_perm_3src<bits<8> opc, string OpcodeStr, RegisterClass RC,
- PatFrag mem_frag, X86MemOperand x86memop,
- SDNode OpNode, ValueType OpVT, RegisterClass KRC> {
+multiclass avx512_perm_3src<bits<8> opc, string OpcodeStr,
+ SDNode OpNode, X86VectorVTInfo _> {
let Constraints = "$src1 = $dst" in {
- def rr : AVX5128I<opc, MRMSrcReg, (outs RC:$dst),
- (ins RC:$src1, RC:$src2, RC:$src3),
- !strconcat(OpcodeStr,
- "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
- [(set RC:$dst,
- (OpVT (OpNode RC:$src1, RC:$src2, RC:$src3)))]>,
- EVEX_4V;
-
- def rrk : AVX5128I<opc, MRMSrcReg, (outs RC:$dst),
- (ins RC:$src1, KRC:$mask, RC:$src2, RC:$src3),
- !strconcat(OpcodeStr,
- "\t{$src3, $src2, $dst {${mask}}|"
- "$dst {${mask}}, $src2, $src3}"),
- [(set RC:$dst, (OpVT (vselect KRC:$mask,
- (OpNode RC:$src1, RC:$src2,
- RC:$src3),
- RC:$src1)))]>,
- EVEX_4V, EVEX_K;
-
- let AddedComplexity = 30 in // Prefer over VMOV*rrkz Pat<>
- def rrkz : AVX5128I<opc, MRMSrcReg, (outs RC:$dst),
- (ins RC:$src1, KRC:$mask, RC:$src2, RC:$src3),
- !strconcat(OpcodeStr,
- "\t{$src3, $src2, $dst {${mask}} {z} |",
- "$dst {${mask}} {z}, $src2, $src3}"),
- [(set RC:$dst, (OpVT (vselect KRC:$mask,
- (OpNode RC:$src1, RC:$src2,
- RC:$src3),
- (OpVT (bitconvert
- (v16i32 immAllZerosV))))))]>,
- EVEX_4V, EVEX_KZ;
+ defm rr: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
+ (ins _.RC:$src2, _.RC:$src3),
+ OpcodeStr, "$src3, $src2", "$src2, $src3",
+ (_.VT (OpNode _.RC:$src1, _.RC:$src2, _.RC:$src3))>, EVEX_4V,
+ AVX5128IBase;
- def rm : AVX5128I<opc, MRMSrcMem, (outs RC:$dst),
- (ins RC:$src1, RC:$src2, x86memop:$src3),
- !strconcat(OpcodeStr,
- "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
- [(set RC:$dst,
- (OpVT (OpNode RC:$src1, RC:$src2,
- (mem_frag addr:$src3))))]>, EVEX_4V;
+ let mayLoad = 1 in
+ defm rm: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
+ (ins _.RC:$src2, _.MemOp:$src3),
+ OpcodeStr, "$src3, $src2", "$src2, $src3",
+ (_.VT (OpNode _.RC:$src1, _.RC:$src2,
+ (_.VT (bitconvert (_.LdFrag addr:$src3)))))>,
+ EVEX_4V, AVX5128IBase;
+ }
+}
+multiclass avx512_perm_3src_mb<bits<8> opc, string OpcodeStr,
+ SDNode OpNode, X86VectorVTInfo _> {
+ let mayLoad = 1, Constraints = "$src1 = $dst" in
+ defm rmb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
+ (ins _.RC:$src2, _.ScalarMemOp:$src3),
+ OpcodeStr, !strconcat("${src3}", _.BroadcastStr,", $src2"),
+ !strconcat("$src2, ${src3}", _.BroadcastStr ),
+ (_.VT (OpNode _.RC:$src1,
+ _.RC:$src2,(_.VT (X86VBroadcast (_.ScalarLdFrag addr:$src3)))))>,
+ AVX5128IBase, EVEX_4V, EVEX_B;
+}
- def rmk : AVX5128I<opc, MRMSrcMem, (outs RC:$dst),
- (ins RC:$src1, KRC:$mask, RC:$src2, x86memop:$src3),
- !strconcat(OpcodeStr,
- "\t{$src3, $src2, $dst {${mask}}|"
- "$dst {${mask}}, $src2, $src3}"),
- [(set RC:$dst,
- (OpVT (vselect KRC:$mask,
- (OpNode RC:$src1, RC:$src2,
- (mem_frag addr:$src3)),
- RC:$src1)))]>,
- EVEX_4V, EVEX_K;
-
- let AddedComplexity = 10 in // Prefer over the rrkz variant
- def rmkz : AVX5128I<opc, MRMSrcMem, (outs RC:$dst),
- (ins RC:$src1, KRC:$mask, RC:$src2, x86memop:$src3),
- !strconcat(OpcodeStr,
- "\t{$src3, $src2, $dst {${mask}} {z}|"
- "$dst {${mask}} {z}, $src2, $src3}"),
- [(set RC:$dst,
- (OpVT (vselect KRC:$mask,
- (OpNode RC:$src1, RC:$src2,
- (mem_frag addr:$src3)),
- (OpVT (bitconvert
- (v16i32 immAllZerosV))))))]>,
- EVEX_4V, EVEX_KZ;
+multiclass avx512_perm_3src_sizes<bits<8> opc, string OpcodeStr,
+ SDNode OpNode, AVX512VLVectorVTInfo VTInfo> {
+ let Predicates = [HasAVX512] in
+ defm NAME: avx512_perm_3src<opc, OpcodeStr, OpNode, VTInfo.info512>,
+ avx512_perm_3src_mb<opc, OpcodeStr, OpNode, VTInfo.info512>, EVEX_V512;
+ let Predicates = [HasVLX] in {
+ defm NAME#128: avx512_perm_3src<opc, OpcodeStr, OpNode, VTInfo.info128>,
+ avx512_perm_3src_mb<opc, OpcodeStr, OpNode, VTInfo.info128>,
+ EVEX_V128;
+ defm NAME#256: avx512_perm_3src<opc, OpcodeStr, OpNode, VTInfo.info256>,
+ avx512_perm_3src_mb<opc, OpcodeStr, OpNode, VTInfo.info256>,
+ EVEX_V256;
}
}
-defm VPERMI2D : avx512_perm_3src<0x76, "vpermi2d", VR512, loadv16i32,
- i512mem, X86VPermiv3, v16i32, VK16WM>,
- EVEX_V512, EVEX_CD8<32, CD8VF>;
-defm VPERMI2Q : avx512_perm_3src<0x76, "vpermi2q", VR512, loadv8i64,
- i512mem, X86VPermiv3, v8i64, VK8WM>,
- EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
-defm VPERMI2PS : avx512_perm_3src<0x77, "vpermi2ps", VR512, loadv16f32,
- i512mem, X86VPermiv3, v16f32, VK16WM>,
- EVEX_V512, EVEX_CD8<32, CD8VF>;
-defm VPERMI2PD : avx512_perm_3src<0x77, "vpermi2pd", VR512, loadv8f64,
- i512mem, X86VPermiv3, v8f64, VK8WM>,
- EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
-
-multiclass avx512_perm_table_3src<bits<8> opc, string Suffix, RegisterClass RC,
- PatFrag mem_frag, X86MemOperand x86memop,
- SDNode OpNode, ValueType OpVT, RegisterClass KRC,
- ValueType MaskVT, RegisterClass MRC> :
- avx512_perm_3src<opc, "vpermt2"##Suffix, RC, mem_frag, x86memop, OpNode,
- OpVT, KRC> {
- def : Pat<(OpVT (!cast<Intrinsic>("int_x86_avx512_mask_vpermt_"##Suffix##"_512")
- VR512:$idx, VR512:$src1, VR512:$src2, -1)),
- (!cast<Instruction>(NAME#rr) VR512:$src1, VR512:$idx, VR512:$src2)>;
-
- def : Pat<(OpVT (!cast<Intrinsic>("int_x86_avx512_mask_vpermt_"##Suffix##"_512")
- VR512:$idx, VR512:$src1, VR512:$src2, MRC:$mask)),
- (!cast<Instruction>(NAME#rrk) VR512:$src1,
- (MaskVT (COPY_TO_REGCLASS MRC:$mask, KRC)), VR512:$idx, VR512:$src2)>;
-}
-
-defm VPERMT2D : avx512_perm_table_3src<0x7E, "d", VR512, loadv16i32, i512mem,
- X86VPermv3, v16i32, VK16WM, v16i1, GR16>,
- EVEX_V512, EVEX_CD8<32, CD8VF>;
-defm VPERMT2Q : avx512_perm_table_3src<0x7E, "q", VR512, loadv8i64, i512mem,
- X86VPermv3, v8i64, VK8WM, v8i1, GR8>,
- EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
-defm VPERMT2PS : avx512_perm_table_3src<0x7F, "ps", VR512, loadv16f32, i512mem,
- X86VPermv3, v16f32, VK16WM, v16i1, GR16>,
- EVEX_V512, EVEX_CD8<32, CD8VF>;
-defm VPERMT2PD : avx512_perm_table_3src<0x7F, "pd", VR512, loadv8f64, i512mem,
- X86VPermv3, v8f64, VK8WM, v8i1, GR8>,
- EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
+multiclass avx512_perm_3src_sizes_w<bits<8> opc, string OpcodeStr,
+ SDNode OpNode, AVX512VLVectorVTInfo VTInfo> {
+ let Predicates = [HasBWI] in
+ defm NAME: avx512_perm_3src<opc, OpcodeStr, OpNode, VTInfo.info512>,
+ avx512_perm_3src_mb<opc, OpcodeStr, OpNode, VTInfo.info512>,
+ EVEX_V512;
+ let Predicates = [HasBWI, HasVLX] in {
+ defm NAME#128: avx512_perm_3src<opc, OpcodeStr, OpNode, VTInfo.info128>,
+ avx512_perm_3src_mb<opc, OpcodeStr, OpNode, VTInfo.info128>,
+ EVEX_V128;
+ defm NAME#256: avx512_perm_3src<opc, OpcodeStr, OpNode, VTInfo.info256>,
+ avx512_perm_3src_mb<opc, OpcodeStr, OpNode, VTInfo.info256>,
+ EVEX_V256;
+ }
+}
+defm VPERMI2D : avx512_perm_3src_sizes<0x76, "vpermi2d", X86VPermiv3,
+ avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
+defm VPERMI2Q : avx512_perm_3src_sizes<0x76, "vpermi2q", X86VPermiv3,
+ avx512vl_i64_info>, VEX_W, EVEX_CD8<64, CD8VF>;
+defm VPERMI2PS : avx512_perm_3src_sizes<0x77, "vpermi2ps", X86VPermiv3,
+ avx512vl_f32_info>, EVEX_CD8<32, CD8VF>;
+defm VPERMI2PD : avx512_perm_3src_sizes<0x77, "vpermi2pd", X86VPermiv3,
+ avx512vl_f64_info>, VEX_W, EVEX_CD8<64, CD8VF>;
+
+defm VPERMT2D : avx512_perm_3src_sizes<0x7E, "vpermt2d", X86VPermv3,
+ avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
+defm VPERMT2Q : avx512_perm_3src_sizes<0x7E, "vpermt2q", X86VPermv3,
+ avx512vl_i64_info>, VEX_W, EVEX_CD8<64, CD8VF>;
+defm VPERMT2PS : avx512_perm_3src_sizes<0x7F, "vpermt2ps", X86VPermv3,
+ avx512vl_f32_info>, EVEX_CD8<32, CD8VF>;
+defm VPERMT2PD : avx512_perm_3src_sizes<0x7F, "vpermt2pd", X86VPermv3,
+ avx512vl_f64_info>, VEX_W, EVEX_CD8<64, CD8VF>;
+
+defm VPERMT2W : avx512_perm_3src_sizes_w<0x7D, "vpermt2w", X86VPermv3,
+ avx512vl_i16_info>, VEX_W, EVEX_CD8<16, CD8VF>;
+defm VPERMI2W : avx512_perm_3src_sizes_w<0x75, "vpermi2w", X86VPermiv3,
+ avx512vl_i16_info>, VEX_W, EVEX_CD8<16, CD8VF>;
//===----------------------------------------------------------------------===//
// AVX-512 - BLEND using mask
@@ -2044,11 +2013,11 @@ defm : avx512_binop_pat<xor, KXORWrr>;
def : Pat<(xor (xor VK16:$src1, VK16:$src2), (v16i1 immAllOnesV)),
(KXNORWrr VK16:$src1, VK16:$src2)>;
def : Pat<(xor (xor VK8:$src1, VK8:$src2), (v8i1 immAllOnesV)),
- (KXNORBrr VK8:$src1, VK8:$src2)>;
+ (KXNORBrr VK8:$src1, VK8:$src2)>, Requires<[HasDQI]>;
def : Pat<(xor (xor VK32:$src1, VK32:$src2), (v32i1 immAllOnesV)),
- (KXNORDrr VK32:$src1, VK32:$src2)>;
+ (KXNORDrr VK32:$src1, VK32:$src2)>, Requires<[HasBWI]>;
def : Pat<(xor (xor VK64:$src1, VK64:$src2), (v64i1 immAllOnesV)),
- (KXNORQrr VK64:$src1, VK64:$src2)>;
+ (KXNORQrr VK64:$src1, VK64:$src2)>, Requires<[HasBWI]>;
let Predicates = [NoDQI] in
def : Pat<(xor (xor VK8:$src1, VK8:$src2), (v8i1 immAllOnesV)),
@@ -3157,7 +3126,8 @@ defm VPMULLW : avx512_binop_rm_vl_w<0xD5, "vpmull", mul,
SSE_INTALU_ITINS_P, HasBWI, 1>;
defm VPMULLQ : avx512_binop_rm_vl_q<0x40, "vpmull", mul,
SSE_INTALU_ITINS_P, HasDQI, 1>, T8PD;
-
+defm VPAVG : avx512_binop_rm_vl_bw<0xE0, 0xE3, "vpavg", X86avg,
+ SSE_INTALU_ITINS_P, HasBWI, 1>;
multiclass avx512_binop_all<bits<8> opc, string OpcodeStr, OpndItins itins,
SDNode OpNode, bit IsCommutable = 0> {
@@ -3278,30 +3248,6 @@ defm VPMINUW : avx512_binop_rm_vl_w<0x3A, "vpminu", X86umin,
defm VPMINU : avx512_binop_rm_vl_dq<0x3B, 0x3B, "vpminu", X86umin,
SSE_INTALU_ITINS_P, HasAVX512, 1>, T8PD;
-def : Pat <(v16i32 (int_x86_avx512_mask_pmaxs_d_512 (v16i32 VR512:$src1),
- (v16i32 VR512:$src2), (v16i32 immAllZerosV), (i16 -1))),
- (VPMAXSDZrr VR512:$src1, VR512:$src2)>;
-def : Pat <(v16i32 (int_x86_avx512_mask_pmaxu_d_512 (v16i32 VR512:$src1),
- (v16i32 VR512:$src2), (v16i32 immAllZerosV), (i16 -1))),
- (VPMAXUDZrr VR512:$src1, VR512:$src2)>;
-def : Pat <(v8i64 (int_x86_avx512_mask_pmaxs_q_512 (v8i64 VR512:$src1),
- (v8i64 VR512:$src2), (bc_v8i64 (v16i32 immAllZerosV)), (i8 -1))),
- (VPMAXSQZrr VR512:$src1, VR512:$src2)>;
-def : Pat <(v8i64 (int_x86_avx512_mask_pmaxu_q_512 (v8i64 VR512:$src1),
- (v8i64 VR512:$src2), (bc_v8i64 (v16i32 immAllZerosV)), (i8 -1))),
- (VPMAXUQZrr VR512:$src1, VR512:$src2)>;
-def : Pat <(v16i32 (int_x86_avx512_mask_pmins_d_512 (v16i32 VR512:$src1),
- (v16i32 VR512:$src2), (v16i32 immAllZerosV), (i16 -1))),
- (VPMINSDZrr VR512:$src1, VR512:$src2)>;
-def : Pat <(v16i32 (int_x86_avx512_mask_pminu_d_512 (v16i32 VR512:$src1),
- (v16i32 VR512:$src2), (v16i32 immAllZerosV), (i16 -1))),
- (VPMINUDZrr VR512:$src1, VR512:$src2)>;
-def : Pat <(v8i64 (int_x86_avx512_mask_pmins_q_512 (v8i64 VR512:$src1),
- (v8i64 VR512:$src2), (bc_v8i64 (v16i32 immAllZerosV)), (i8 -1))),
- (VPMINSQZrr VR512:$src1, VR512:$src2)>;
-def : Pat <(v8i64 (int_x86_avx512_mask_pminu_q_512 (v8i64 VR512:$src1),
- (v8i64 VR512:$src2), (bc_v8i64 (v16i32 immAllZerosV)), (i8 -1))),
- (VPMINUQZrr VR512:$src1, VR512:$src2)>;
//===----------------------------------------------------------------------===//
// AVX-512 - Unpack Instructions
//===----------------------------------------------------------------------===//
@@ -4191,29 +4137,72 @@ defm VFNMSUBSDZ : avx512_fma3s_rm<0xAF, "vfnmsub213sd", X86Fnmsub, FR64X,
// AVX-512 Scalar convert from sign integer to float/double
//===----------------------------------------------------------------------===//
-multiclass avx512_vcvtsi<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC,
- X86MemOperand x86memop, string asm> {
-let hasSideEffects = 0 in {
- def rr : SI<opc, MRMSrcReg, (outs DstRC:$dst), (ins DstRC:$src1, SrcRC:$src),
+multiclass avx512_vcvtsi<bits<8> opc, SDNode OpNode, RegisterClass SrcRC,
+ X86VectorVTInfo DstVT, X86MemOperand x86memop,
+ PatFrag ld_frag, string asm> {
+ let hasSideEffects = 0 in {
+ def rr : SI<opc, MRMSrcReg, (outs DstVT.FRC:$dst),
+ (ins DstVT.FRC:$src1, SrcRC:$src),
!strconcat(asm,"\t{$src, $src1, $dst|$dst, $src1, $src}"), []>,
EVEX_4V;
- let mayLoad = 1 in
- def rm : SI<opc, MRMSrcMem, (outs DstRC:$dst),
- (ins DstRC:$src1, x86memop:$src),
+ let mayLoad = 1 in
+ def rm : SI<opc, MRMSrcMem, (outs DstVT.FRC:$dst),
+ (ins DstVT.FRC:$src1, x86memop:$src),
!strconcat(asm,"\t{$src, $src1, $dst|$dst, $src1, $src}"), []>,
EVEX_4V;
-} // hasSideEffects = 0
+ } // hasSideEffects = 0
+ let isCodeGenOnly = 1 in {
+ def rr_Int : SI<opc, MRMSrcReg, (outs DstVT.RC:$dst),
+ (ins DstVT.RC:$src1, SrcRC:$src2),
+ !strconcat(asm,"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(set DstVT.RC:$dst,
+ (OpNode (DstVT.VT DstVT.RC:$src1),
+ SrcRC:$src2,
+ (i32 FROUND_CURRENT)))]>, EVEX_4V;
+
+ def rm_Int : SI<opc, MRMSrcMem, (outs DstVT.RC:$dst),
+ (ins DstVT.RC:$src1, x86memop:$src2),
+ !strconcat(asm,"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(set DstVT.RC:$dst,
+ (OpNode (DstVT.VT DstVT.RC:$src1),
+ (ld_frag addr:$src2),
+ (i32 FROUND_CURRENT)))]>, EVEX_4V;
+ }//isCodeGenOnly = 1
+}
+
+multiclass avx512_vcvtsi_round<bits<8> opc, SDNode OpNode, RegisterClass SrcRC,
+ X86VectorVTInfo DstVT, string asm> {
+ def rrb_Int : SI<opc, MRMSrcReg, (outs DstVT.RC:$dst),
+ (ins DstVT.RC:$src1, SrcRC:$src2, AVX512RC:$rc),
+ !strconcat(asm,
+ "\t{$src2, $rc, $src1, $dst|$dst, $src1, $rc, $src2}"),
+ [(set DstVT.RC:$dst,
+ (OpNode (DstVT.VT DstVT.RC:$src1),
+ SrcRC:$src2,
+ (i32 imm:$rc)))]>, EVEX_4V, EVEX_B, EVEX_RC;
+}
+
+multiclass avx512_vcvtsi_common<bits<8> opc, SDNode OpNode, RegisterClass SrcRC,
+ X86VectorVTInfo DstVT, X86MemOperand x86memop,
+ PatFrag ld_frag, string asm> {
+ defm NAME : avx512_vcvtsi_round<opc, OpNode, SrcRC, DstVT, asm>,
+ avx512_vcvtsi<opc, OpNode, SrcRC, DstVT, x86memop, ld_frag, asm>,
+ VEX_LIG;
}
let Predicates = [HasAVX512] in {
-defm VCVTSI2SSZ : avx512_vcvtsi<0x2A, GR32, FR32X, i32mem, "cvtsi2ss{l}">,
- XS, VEX_LIG, EVEX_CD8<32, CD8VT1>;
-defm VCVTSI642SSZ : avx512_vcvtsi<0x2A, GR64, FR32X, i64mem, "cvtsi2ss{q}">,
- XS, VEX_W, VEX_LIG, EVEX_CD8<64, CD8VT1>;
-defm VCVTSI2SDZ : avx512_vcvtsi<0x2A, GR32, FR64X, i32mem, "cvtsi2sd{l}">,
- XD, VEX_LIG, EVEX_CD8<32, CD8VT1>;
-defm VCVTSI642SDZ : avx512_vcvtsi<0x2A, GR64, FR64X, i64mem, "cvtsi2sd{q}">,
- XD, VEX_W, VEX_LIG, EVEX_CD8<64, CD8VT1>;
+defm VCVTSI2SSZ : avx512_vcvtsi_common<0x2A, X86SintToFpRnd, GR32,
+ v4f32x_info, i32mem, loadi32, "cvtsi2ss{l}">,
+ XS, EVEX_CD8<32, CD8VT1>;
+defm VCVTSI642SSZ: avx512_vcvtsi_common<0x2A, X86SintToFpRnd, GR64,
+ v4f32x_info, i64mem, loadi64, "cvtsi2ss{q}">,
+ XS, VEX_W, EVEX_CD8<64, CD8VT1>;
+defm VCVTSI2SDZ : avx512_vcvtsi_common<0x2A, X86SintToFpRnd, GR32,
+ v2f64x_info, i32mem, loadi32, "cvtsi2sd{l}">,
+ XD, EVEX_CD8<32, CD8VT1>;
+defm VCVTSI642SDZ: avx512_vcvtsi_common<0x2A, X86SintToFpRnd, GR64,
+ v2f64x_info, i64mem, loadi64, "cvtsi2sd{q}">,
+ XD, VEX_W, EVEX_CD8<64, CD8VT1>;
def : Pat<(f32 (sint_to_fp (loadi32 addr:$src))),
(VCVTSI2SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>;
@@ -4233,14 +4222,18 @@ def : Pat<(f64 (sint_to_fp GR32:$src)),
def : Pat<(f64 (sint_to_fp GR64:$src)),
(VCVTSI642SDZrr (f64 (IMPLICIT_DEF)), GR64:$src)>;
-defm VCVTUSI2SSZ : avx512_vcvtsi<0x7B, GR32, FR32X, i32mem, "cvtusi2ss{l}">,
- XS, VEX_LIG, EVEX_CD8<32, CD8VT1>;
-defm VCVTUSI642SSZ : avx512_vcvtsi<0x7B, GR64, FR32X, i64mem, "cvtusi2ss{q}">,
- XS, VEX_W, VEX_LIG, EVEX_CD8<64, CD8VT1>;
-defm VCVTUSI2SDZ : avx512_vcvtsi<0x7B, GR32, FR64X, i32mem, "cvtusi2sd{l}">,
+defm VCVTUSI2SSZ : avx512_vcvtsi_common<0x7B, X86SuintToFpRnd, GR32,
+ v4f32x_info, i32mem, loadi32,
+ "cvtusi2ss{l}">, XS, EVEX_CD8<32, CD8VT1>;
+defm VCVTUSI642SSZ : avx512_vcvtsi_common<0x7B, X86SuintToFpRnd, GR64,
+ v4f32x_info, i64mem, loadi64, "cvtusi2ss{q}">,
+ XS, VEX_W, EVEX_CD8<64, CD8VT1>;
+defm VCVTUSI2SDZ : avx512_vcvtsi<0x7B, X86SuintToFpRnd, GR32, v2f64x_info,
+ i32mem, loadi32, "cvtusi2sd{l}">,
XD, VEX_LIG, EVEX_CD8<32, CD8VT1>;
-defm VCVTUSI642SDZ : avx512_vcvtsi<0x7B, GR64, FR64X, i64mem, "cvtusi2sd{q}">,
- XD, VEX_W, VEX_LIG, EVEX_CD8<64, CD8VT1>;
+defm VCVTUSI642SDZ : avx512_vcvtsi_common<0x7B, X86SuintToFpRnd, GR64,
+ v2f64x_info, i64mem, loadi64, "cvtusi2sd{q}">,
+ XD, VEX_W, EVEX_CD8<64, CD8VT1>;
def : Pat<(f32 (uint_to_fp (loadi32 addr:$src))),
(VCVTUSI2SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>;
@@ -4321,18 +4314,9 @@ let isCodeGenOnly = 1 in {
int_x86_sse2_cvtsi642sd, i64mem, loadi64, "cvtsi2sd{q}",
SSE_CVT_Scalar, 0>, XD, EVEX_4V, VEX_W;
- defm Int_VCVTUSI2SSZ : sse12_cvt_sint_3addr<0x2A, GR32, VR128X,
- int_x86_avx512_cvtusi2ss, i32mem, loadi32, "cvtusi2ss{l}",
- SSE_CVT_Scalar, 0>, XS, EVEX_4V;
- defm Int_VCVTUSI2SS64Z : sse12_cvt_sint_3addr<0x2A, GR64, VR128X,
- int_x86_avx512_cvtusi642ss, i64mem, loadi64, "cvtusi2ss{q}",
- SSE_CVT_Scalar, 0>, XS, EVEX_4V, VEX_W;
defm Int_VCVTUSI2SDZ : sse12_cvt_sint_3addr<0x2A, GR32, VR128X,
int_x86_avx512_cvtusi2sd, i32mem, loadi32, "cvtusi2sd{l}",
SSE_CVT_Scalar, 0>, XD, EVEX_4V;
- defm Int_VCVTUSI2SD64Z : sse12_cvt_sint_3addr<0x2A, GR64, VR128X,
- int_x86_avx512_cvtusi642sd, i64mem, loadi64, "cvtusi2sd{q}",
- SSE_CVT_Scalar, 0>, XD, EVEX_4V, VEX_W;
} // isCodeGenOnly = 1
// Convert float/double to signed/unsigned int 32/64 with truncation
diff --git a/lib/Target/X86/X86InstrBuilder.h b/lib/Target/X86/X86InstrBuilder.h
index 2056056d23a5..eb4dc48a7a65 100644
--- a/lib/Target/X86/X86InstrBuilder.h
+++ b/lib/Target/X86/X86InstrBuilder.h
@@ -179,6 +179,6 @@ addConstantPoolReference(const MachineInstrBuilder &MIB, unsigned CPI,
.addConstantPoolIndex(CPI, 0, OpFlags).addReg(0);
}
-} // End llvm namespace
+} // namespace llvm
#endif
diff --git a/lib/Target/X86/X86InstrFragmentsSIMD.td b/lib/Target/X86/X86InstrFragmentsSIMD.td
index dfe58ef8067b..16ae77dd81a3 100644
--- a/lib/Target/X86/X86InstrFragmentsSIMD.td
+++ b/lib/Target/X86/X86InstrFragmentsSIMD.td
@@ -72,6 +72,9 @@ def X86comi : SDNode<"X86ISD::COMI", SDTX86CmpTest>;
def X86ucomi : SDNode<"X86ISD::UCOMI", SDTX86CmpTest>;
def X86cmps : SDNode<"X86ISD::FSETCC", SDTX86Cmps>;
//def X86cmpsd : SDNode<"X86ISD::FSETCCsd", SDTX86Cmpsd>;
+def X86cvtdq2pd: SDNode<"X86ISD::CVTDQ2PD",
+ SDTypeProfile<1, 1, [SDTCisVT<0, v2f64>,
+ SDTCisVT<1, v4i32>]>>;
def X86pshufb : SDNode<"X86ISD::PSHUFB",
SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0,1>,
SDTCisSameAs<0,2>]>>;
@@ -184,6 +187,7 @@ def X86addus : SDNode<"X86ISD::ADDUS", SDTIntBinOp>;
def X86subus : SDNode<"X86ISD::SUBUS", SDTIntBinOp>;
def X86adds : SDNode<"X86ISD::ADDS", SDTIntBinOp>;
def X86subs : SDNode<"X86ISD::SUBS", SDTIntBinOp>;
+def X86avg : SDNode<"X86ISD::AVG" , SDTIntBinOp>;
def X86ptest : SDNode<"X86ISD::PTEST", SDTX86CmpPTest>;
def X86testp : SDNode<"X86ISD::TESTP", SDTX86CmpPTest>;
def X86kortest : SDNode<"X86ISD::KORTEST", SDTX86CmpPTest>;
@@ -350,6 +354,12 @@ def X86expand : SDNode<"X86ISD::EXPAND", SDTypeProfile<1, 3,
[SDTCisSameAs<0, 3>,
SDTCisVec<3>, SDTCisVec<1>, SDTCisInt<1>]>, []>;
+def SDTintToFPRound: SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisFP<0>,
+ SDTCisSameAs<0,1>, SDTCisInt<2>, SDTCisInt<3>]>;
+
+def X86SintToFpRnd : SDNode<"X86ISD::SINT_TO_FP_RND", SDTintToFPRound>;
+def X86SuintToFpRnd : SDNode<"X86ISD::UINT_TO_FP_RND", SDTintToFPRound>;
+
//===----------------------------------------------------------------------===//
// SSE Complex Patterns
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp
index 6b7a9299dcfb..4aa0ae6f1959 100644
--- a/lib/Target/X86/X86InstrInfo.cpp
+++ b/lib/Target/X86/X86InstrInfo.cpp
@@ -3456,11 +3456,11 @@ bool X86InstrInfo::isUnpredicatedTerminator(const MachineInstr *MI) const {
return !isPredicated(MI);
}
-bool X86InstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
- MachineBasicBlock *&TBB,
- MachineBasicBlock *&FBB,
- SmallVectorImpl<MachineOperand> &Cond,
- bool AllowModify) const {
+bool X86InstrInfo::AnalyzeBranchImpl(
+ MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB,
+ SmallVectorImpl<MachineOperand> &Cond,
+ SmallVectorImpl<MachineInstr *> &CondBranches, bool AllowModify) const {
+
// Start from the bottom of the block and work up, examining the
// terminator instructions.
MachineBasicBlock::iterator I = MBB.end();
@@ -3558,6 +3558,7 @@ bool X86InstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
FBB = TBB;
TBB = I->getOperand(0).getMBB();
Cond.push_back(MachineOperand::CreateImm(BranchCode));
+ CondBranches.push_back(I);
continue;
}
@@ -3595,11 +3596,90 @@ bool X86InstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
// Update the MachineOperand.
Cond[0].setImm(BranchCode);
+ CondBranches.push_back(I);
}
return false;
}
+bool X86InstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
+ MachineBasicBlock *&TBB,
+ MachineBasicBlock *&FBB,
+ SmallVectorImpl<MachineOperand> &Cond,
+ bool AllowModify) const {
+ SmallVector<MachineInstr *, 4> CondBranches;
+ return AnalyzeBranchImpl(MBB, TBB, FBB, Cond, CondBranches, AllowModify);
+}
+
+bool X86InstrInfo::AnalyzeBranchPredicate(MachineBasicBlock &MBB,
+ MachineBranchPredicate &MBP,
+ bool AllowModify) const {
+ using namespace std::placeholders;
+
+ SmallVector<MachineOperand, 4> Cond;
+ SmallVector<MachineInstr *, 4> CondBranches;
+ if (AnalyzeBranchImpl(MBB, MBP.TrueDest, MBP.FalseDest, Cond, CondBranches,
+ AllowModify))
+ return true;
+
+ if (Cond.size() != 1)
+ return true;
+
+ assert(MBP.TrueDest && "expected!");
+
+ if (!MBP.FalseDest)
+ MBP.FalseDest = MBB.getNextNode();
+
+ const TargetRegisterInfo *TRI = &getRegisterInfo();
+
+ MachineInstr *ConditionDef = nullptr;
+ bool SingleUseCondition = true;
+
+ for (auto I = std::next(MBB.rbegin()), E = MBB.rend(); I != E; ++I) {
+ if (I->modifiesRegister(X86::EFLAGS, TRI)) {
+ ConditionDef = &*I;
+ break;
+ }
+
+ if (I->readsRegister(X86::EFLAGS, TRI))
+ SingleUseCondition = false;
+ }
+
+ if (!ConditionDef)
+ return true;
+
+ if (SingleUseCondition) {
+ for (auto *Succ : MBB.successors())
+ if (Succ->isLiveIn(X86::EFLAGS))
+ SingleUseCondition = false;
+ }
+
+ MBP.ConditionDef = ConditionDef;
+ MBP.SingleUseCondition = SingleUseCondition;
+
+ // Currently we only recognize the simple pattern:
+ //
+ // test %reg, %reg
+ // je %label
+ //
+ const unsigned TestOpcode =
+ Subtarget.is64Bit() ? X86::TEST64rr : X86::TEST32rr;
+
+ if (ConditionDef->getOpcode() == TestOpcode &&
+ ConditionDef->getNumOperands() == 3 &&
+ ConditionDef->getOperand(0).isIdenticalTo(ConditionDef->getOperand(1)) &&
+ (Cond[0].getImm() == X86::COND_NE || Cond[0].getImm() == X86::COND_E)) {
+ MBP.LHS = ConditionDef->getOperand(0);
+ MBP.RHS = MachineOperand::CreateImm(0);
+ MBP.Predicate = Cond[0].getImm() == X86::COND_NE
+ ? MachineBranchPredicate::PRED_NE
+ : MachineBranchPredicate::PRED_EQ;
+ return false;
+ }
+
+ return true;
+}
+
unsigned X86InstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
MachineBasicBlock::iterator I = MBB.end();
unsigned Count = 0;
@@ -3622,8 +3702,7 @@ unsigned X86InstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
unsigned
X86InstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
- MachineBasicBlock *FBB,
- const SmallVectorImpl<MachineOperand> &Cond,
+ MachineBasicBlock *FBB, ArrayRef<MachineOperand> Cond,
DebugLoc DL) const {
// Shouldn't be a fall through.
assert(TBB && "InsertBranch must not be told to insert a fallthrough");
@@ -3671,7 +3750,7 @@ X86InstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
bool X86InstrInfo::
canInsertSelect(const MachineBasicBlock &MBB,
- const SmallVectorImpl<MachineOperand> &Cond,
+ ArrayRef<MachineOperand> Cond,
unsigned TrueReg, unsigned FalseReg,
int &CondCycles, int &TrueCycles, int &FalseCycles) const {
// Not all subtargets have cmov instructions.
@@ -3708,8 +3787,7 @@ canInsertSelect(const MachineBasicBlock &MBB,
void X86InstrInfo::insertSelect(MachineBasicBlock &MBB,
MachineBasicBlock::iterator I, DebugLoc DL,
- unsigned DstReg,
- const SmallVectorImpl<MachineOperand> &Cond,
+ unsigned DstReg, ArrayRef<MachineOperand> Cond,
unsigned TrueReg, unsigned FalseReg) const {
MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
assert(Cond.size() == 1 && "Invalid Cond array");
@@ -3967,6 +4045,36 @@ static unsigned getLoadStoreRegOpcode(unsigned Reg,
}
}
+bool X86InstrInfo::getMemOpBaseRegImmOfs(MachineInstr *MemOp, unsigned &BaseReg,
+ unsigned &Offset,
+ const TargetRegisterInfo *TRI) const {
+ const MCInstrDesc &Desc = MemOp->getDesc();
+ int MemRefBegin = X86II::getMemoryOperandNo(Desc.TSFlags, MemOp->getOpcode());
+ if (MemRefBegin < 0)
+ return false;
+
+ MemRefBegin += X86II::getOperandBias(Desc);
+
+ BaseReg = MemOp->getOperand(MemRefBegin + X86::AddrBaseReg).getReg();
+ if (MemOp->getOperand(MemRefBegin + X86::AddrScaleAmt).getImm() != 1)
+ return false;
+
+ if (MemOp->getOperand(MemRefBegin + X86::AddrIndexReg).getReg() !=
+ X86::NoRegister)
+ return false;
+
+ const MachineOperand &DispMO = MemOp->getOperand(MemRefBegin + X86::AddrDisp);
+
+ // Displacement can be symbolic
+ if (!DispMO.isImm())
+ return false;
+
+ Offset = DispMO.getImm();
+
+ return (MemOp->getOperand(MemRefBegin + X86::AddrIndexReg).getReg() ==
+ X86::NoRegister);
+}
+
static unsigned getStoreRegOpcode(unsigned SrcReg,
const TargetRegisterClass *RC,
bool isStackAligned,
@@ -6219,13 +6327,217 @@ bool X86InstrInfo::isHighLatencyDef(int opc) const {
}
bool X86InstrInfo::
-hasHighOperandLatency(const InstrItineraryData *ItinData,
+hasHighOperandLatency(const TargetSchedModel &SchedModel,
const MachineRegisterInfo *MRI,
const MachineInstr *DefMI, unsigned DefIdx,
const MachineInstr *UseMI, unsigned UseIdx) const {
return isHighLatencyDef(DefMI->getOpcode());
}
+/// If the input instruction is part of a chain of dependent ops that are
+/// suitable for reassociation, return the earlier instruction in the sequence
+/// that defines its first operand, otherwise return a nullptr.
+/// If the instruction's operands must be commuted to be considered a
+/// reassociation candidate, Commuted will be set to true.
+static MachineInstr *isReassocCandidate(const MachineInstr &Inst,
+ unsigned AssocOpcode,
+ bool checkPrevOneUse,
+ bool &Commuted) {
+ if (Inst.getOpcode() != AssocOpcode)
+ return nullptr;
+
+ MachineOperand Op1 = Inst.getOperand(1);
+ MachineOperand Op2 = Inst.getOperand(2);
+
+ const MachineBasicBlock *MBB = Inst.getParent();
+ const MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
+
+ // We need virtual register definitions.
+ MachineInstr *MI1 = nullptr;
+ MachineInstr *MI2 = nullptr;
+ if (Op1.isReg() && TargetRegisterInfo::isVirtualRegister(Op1.getReg()))
+ MI1 = MRI.getUniqueVRegDef(Op1.getReg());
+ if (Op2.isReg() && TargetRegisterInfo::isVirtualRegister(Op2.getReg()))
+ MI2 = MRI.getUniqueVRegDef(Op2.getReg());
+
+ // And they need to be in the trace (otherwise, they won't have a depth).
+ if (!MI1 || !MI2 || MI1->getParent() != MBB || MI2->getParent() != MBB)
+ return nullptr;
+
+ Commuted = false;
+ if (MI1->getOpcode() != AssocOpcode && MI2->getOpcode() == AssocOpcode) {
+ std::swap(MI1, MI2);
+ Commuted = true;
+ }
+
+ // Avoid reassociating operands when it won't provide any benefit. If both
+ // operands are produced by instructions of this type, we may already
+ // have the optimal sequence.
+ if (MI2->getOpcode() == AssocOpcode)
+ return nullptr;
+
+ // The instruction must only be used by the other instruction that we
+ // reassociate with.
+ if (checkPrevOneUse && !MRI.hasOneNonDBGUse(MI1->getOperand(0).getReg()))
+ return nullptr;
+
+ // We must match a simple chain of dependent ops.
+ // TODO: This check is not necessary for the earliest instruction in the
+ // sequence. Instead of a sequence of 3 dependent instructions with the same
+ // opcode, we only need to find a sequence of 2 dependent instructions with
+ // the same opcode plus 1 other instruction that adds to the height of the
+ // trace.
+ if (MI1->getOpcode() != AssocOpcode)
+ return nullptr;
+
+ return MI1;
+}
+
+/// Select a pattern based on how the operands of each associative operation
+/// need to be commuted.
+static MachineCombinerPattern::MC_PATTERN getPattern(bool CommutePrev,
+ bool CommuteRoot) {
+ if (CommutePrev) {
+ if (CommuteRoot)
+ return MachineCombinerPattern::MC_REASSOC_XA_YB;
+ return MachineCombinerPattern::MC_REASSOC_XA_BY;
+ } else {
+ if (CommuteRoot)
+ return MachineCombinerPattern::MC_REASSOC_AX_YB;
+ return MachineCombinerPattern::MC_REASSOC_AX_BY;
+ }
+}
+
+bool X86InstrInfo::getMachineCombinerPatterns(MachineInstr &Root,
+ SmallVectorImpl<MachineCombinerPattern::MC_PATTERN> &Patterns) const {
+ if (!Root.getParent()->getParent()->getTarget().Options.UnsafeFPMath)
+ return false;
+
+ // TODO: There are many more associative instruction types to match:
+ // 1. Other forms of scalar FP add (non-AVX)
+ // 2. Other data types (double, integer, vectors)
+ // 3. Other math / logic operations (mul, and, or)
+ unsigned AssocOpcode = X86::VADDSSrr;
+
+ // TODO: There is nothing x86-specific here except the instruction type.
+ // This logic could be hoisted into the machine combiner pass itself.
+ bool CommuteRoot;
+ if (MachineInstr *Prev = isReassocCandidate(Root, AssocOpcode, true,
+ CommuteRoot)) {
+ bool CommutePrev;
+ if (isReassocCandidate(*Prev, AssocOpcode, false, CommutePrev)) {
+ // We found a sequence of instructions that may be suitable for a
+ // reassociation of operands to increase ILP.
+ Patterns.push_back(getPattern(CommutePrev, CommuteRoot));
+ return true;
+ }
+ }
+
+ return false;
+}
+
+/// Attempt the following reassociation to reduce critical path length:
+/// B = A op X (Prev)
+/// C = B op Y (Root)
+/// ===>
+/// B = X op Y
+/// C = A op B
+static void reassociateOps(MachineInstr &Root, MachineInstr &Prev,
+ MachineCombinerPattern::MC_PATTERN Pattern,
+ SmallVectorImpl<MachineInstr *> &InsInstrs,
+ SmallVectorImpl<MachineInstr *> &DelInstrs,
+ DenseMap<unsigned, unsigned> &InstrIdxForVirtReg) {
+ MachineFunction *MF = Root.getParent()->getParent();
+ MachineRegisterInfo &MRI = MF->getRegInfo();
+ const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();
+ const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo();
+ const TargetRegisterClass *RC = Root.getRegClassConstraint(0, TII, TRI);
+
+ // This array encodes the operand index for each parameter because the
+ // operands may be commuted. Each row corresponds to a pattern value,
+ // and each column specifies the index of A, B, X, Y.
+ unsigned OpIdx[4][4] = {
+ { 1, 1, 2, 2 },
+ { 1, 2, 2, 1 },
+ { 2, 1, 1, 2 },
+ { 2, 2, 1, 1 }
+ };
+
+ MachineOperand &OpA = Prev.getOperand(OpIdx[Pattern][0]);
+ MachineOperand &OpB = Root.getOperand(OpIdx[Pattern][1]);
+ MachineOperand &OpX = Prev.getOperand(OpIdx[Pattern][2]);
+ MachineOperand &OpY = Root.getOperand(OpIdx[Pattern][3]);
+ MachineOperand &OpC = Root.getOperand(0);
+
+ unsigned RegA = OpA.getReg();
+ unsigned RegB = OpB.getReg();
+ unsigned RegX = OpX.getReg();
+ unsigned RegY = OpY.getReg();
+ unsigned RegC = OpC.getReg();
+
+ if (TargetRegisterInfo::isVirtualRegister(RegA))
+ MRI.constrainRegClass(RegA, RC);
+ if (TargetRegisterInfo::isVirtualRegister(RegB))
+ MRI.constrainRegClass(RegB, RC);
+ if (TargetRegisterInfo::isVirtualRegister(RegX))
+ MRI.constrainRegClass(RegX, RC);
+ if (TargetRegisterInfo::isVirtualRegister(RegY))
+ MRI.constrainRegClass(RegY, RC);
+ if (TargetRegisterInfo::isVirtualRegister(RegC))
+ MRI.constrainRegClass(RegC, RC);
+
+ // Create a new virtual register for the result of (X op Y) instead of
+ // recycling RegB because the MachineCombiner's computation of the critical
+ // path requires a new register definition rather than an existing one.
+ unsigned NewVR = MRI.createVirtualRegister(RC);
+ InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
+
+ unsigned Opcode = Root.getOpcode();
+ bool KillA = OpA.isKill();
+ bool KillX = OpX.isKill();
+ bool KillY = OpY.isKill();
+
+ // Create new instructions for insertion.
+ MachineInstrBuilder MIB1 =
+ BuildMI(*MF, Prev.getDebugLoc(), TII->get(Opcode), NewVR)
+ .addReg(RegX, getKillRegState(KillX))
+ .addReg(RegY, getKillRegState(KillY));
+ InsInstrs.push_back(MIB1);
+
+ MachineInstrBuilder MIB2 =
+ BuildMI(*MF, Root.getDebugLoc(), TII->get(Opcode), RegC)
+ .addReg(RegA, getKillRegState(KillA))
+ .addReg(NewVR, getKillRegState(true));
+ InsInstrs.push_back(MIB2);
+
+ // Record old instructions for deletion.
+ DelInstrs.push_back(&Prev);
+ DelInstrs.push_back(&Root);
+}
+
+void X86InstrInfo::genAlternativeCodeSequence(
+ MachineInstr &Root,
+ MachineCombinerPattern::MC_PATTERN Pattern,
+ SmallVectorImpl<MachineInstr *> &InsInstrs,
+ SmallVectorImpl<MachineInstr *> &DelInstrs,
+ DenseMap<unsigned, unsigned> &InstIdxForVirtReg) const {
+ MachineRegisterInfo &MRI = Root.getParent()->getParent()->getRegInfo();
+
+ // Select the previous instruction in the sequence based on the input pattern.
+ MachineInstr *Prev = nullptr;
+ if (Pattern == MachineCombinerPattern::MC_REASSOC_AX_BY ||
+ Pattern == MachineCombinerPattern::MC_REASSOC_XA_BY)
+ Prev = MRI.getUniqueVRegDef(Root.getOperand(1).getReg());
+ else if (Pattern == MachineCombinerPattern::MC_REASSOC_AX_YB ||
+ Pattern == MachineCombinerPattern::MC_REASSOC_XA_YB)
+ Prev = MRI.getUniqueVRegDef(Root.getOperand(2).getReg());
+ else
+ llvm_unreachable("Unknown pattern for machine combiner");
+
+ reassociateOps(Root, *Prev, Pattern, InsInstrs, DelInstrs, InstIdxForVirtReg);
+ return;
+}
+
namespace {
/// Create Global Base Reg pass. This initializes the PIC
/// global base register for x86-32.
@@ -6292,7 +6604,7 @@ namespace {
MachineFunctionPass::getAnalysisUsage(AU);
}
};
-}
+} // namespace
char CGBR::ID = 0;
FunctionPass*
@@ -6404,7 +6716,7 @@ namespace {
MachineFunctionPass::getAnalysisUsage(AU);
}
};
-}
+} // namespace
char LDTLSCleanup::ID = 0;
FunctionPass*
diff --git a/lib/Target/X86/X86InstrInfo.h b/lib/Target/X86/X86InstrInfo.h
index ac1b2d4fedc6..4912951140d9 100644
--- a/lib/Target/X86/X86InstrInfo.h
+++ b/lib/Target/X86/X86InstrInfo.h
@@ -26,6 +26,19 @@ namespace llvm {
class X86RegisterInfo;
class X86Subtarget;
+ namespace MachineCombinerPattern {
+ enum MC_PATTERN : int {
+ // These are commutative variants for reassociating a computation chain
+ // of the form:
+ // B = A op X (Prev)
+ // C = B op Y (Root)
+ MC_REASSOC_AX_BY = 0,
+ MC_REASSOC_AX_YB = 1,
+ MC_REASSOC_XA_BY = 2,
+ MC_REASSOC_XA_YB = 3,
+ };
+ } // end namespace MachineCombinerPattern
+
namespace X86 {
// X86 specific condition code. These correspond to X86_*_COND in
// X86InstrInfo.td. They must be kept in synch.
@@ -77,7 +90,7 @@ namespace X86 {
/// GetOppositeBranchCondition - Return the inverse of the specified cond,
/// e.g. turning COND_E to COND_NE.
CondCode GetOppositeBranchCondition(CondCode CC);
-} // end namespace X86;
+} // namespace X86
/// isGlobalStubReference - Return true if the specified TargetFlag operand is
@@ -166,6 +179,12 @@ class X86InstrInfo final : public X86GenInstrInfo {
virtual void anchor();
+ bool AnalyzeBranchImpl(MachineBasicBlock &MBB, MachineBasicBlock *&TBB,
+ MachineBasicBlock *&FBB,
+ SmallVectorImpl<MachineOperand> &Cond,
+ SmallVectorImpl<MachineInstr *> &CondBranches,
+ bool AllowModify) const;
+
public:
explicit X86InstrInfo(X86Subtarget &STI);
@@ -254,18 +273,23 @@ public:
MachineBasicBlock *&FBB,
SmallVectorImpl<MachineOperand> &Cond,
bool AllowModify) const override;
+
+ bool getMemOpBaseRegImmOfs(MachineInstr *LdSt, unsigned &BaseReg,
+ unsigned &Offset,
+ const TargetRegisterInfo *TRI) const override;
+ bool AnalyzeBranchPredicate(MachineBasicBlock &MBB,
+ TargetInstrInfo::MachineBranchPredicate &MBP,
+ bool AllowModify = false) const override;
+
unsigned RemoveBranch(MachineBasicBlock &MBB) const override;
unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
- MachineBasicBlock *FBB,
- const SmallVectorImpl<MachineOperand> &Cond,
+ MachineBasicBlock *FBB, ArrayRef<MachineOperand> Cond,
DebugLoc DL) const override;
- bool canInsertSelect(const MachineBasicBlock&,
- const SmallVectorImpl<MachineOperand> &Cond,
+ bool canInsertSelect(const MachineBasicBlock&, ArrayRef<MachineOperand> Cond,
unsigned, unsigned, int&, int&, int&) const override;
void insertSelect(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI, DebugLoc DL,
- unsigned DstReg,
- const SmallVectorImpl<MachineOperand> &Cond,
+ unsigned DstReg, ArrayRef<MachineOperand> Cond,
unsigned TrueReg, unsigned FalseReg) const override;
void copyPhysReg(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI, DebugLoc DL,
@@ -423,12 +447,32 @@ public:
bool isHighLatencyDef(int opc) const override;
- bool hasHighOperandLatency(const InstrItineraryData *ItinData,
+ bool hasHighOperandLatency(const TargetSchedModel &SchedModel,
const MachineRegisterInfo *MRI,
const MachineInstr *DefMI, unsigned DefIdx,
const MachineInstr *UseMI,
unsigned UseIdx) const override;
+
+ bool useMachineCombiner() const override {
+ return true;
+ }
+
+ /// Return true when there is potentially a faster code sequence
+ /// for an instruction chain ending in <Root>. All potential patterns are
+ /// output in the <Pattern> array.
+ bool getMachineCombinerPatterns(
+ MachineInstr &Root,
+ SmallVectorImpl<MachineCombinerPattern::MC_PATTERN> &P) const override;
+
+ /// When getMachineCombinerPatterns() finds a pattern, this function generates
+ /// the instructions that could replace the original code sequence.
+ void genAlternativeCodeSequence(
+ MachineInstr &Root, MachineCombinerPattern::MC_PATTERN P,
+ SmallVectorImpl<MachineInstr *> &InsInstrs,
+ SmallVectorImpl<MachineInstr *> &DelInstrs,
+ DenseMap<unsigned, unsigned> &InstrIdxForVirtReg) const override;
+
/// analyzeCompare - For a comparison instruction, return the source registers
/// in SrcReg and SrcReg2 if having two register operands, and the value it
/// compares against in CmpValue. Return true if the comparison instruction
@@ -468,6 +512,6 @@ private:
int &FrameIndex) const;
};
-} // End llvm namespace
+} // namespace llvm
#endif
diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td
index 8294e38e9957..95629184f2cf 100644
--- a/lib/Target/X86/X86InstrSSE.td
+++ b/lib/Target/X86/X86InstrSSE.td
@@ -2234,14 +2234,27 @@ def CVTDQ2PDrr : S2SI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
[(set VR128:$dst, (int_x86_sse2_cvtdq2pd VR128:$src))],
IIC_SSE_CVT_PD_RM>, Sched<[WriteCvtI2F]>;
-// AVX 256-bit register conversion intrinsics
+// AVX register conversion intrinsics
let Predicates = [HasAVX] in {
+ def : Pat<(v2f64 (X86cvtdq2pd (v4i32 VR128:$src))),
+ (VCVTDQ2PDrr VR128:$src)>;
+ def : Pat<(v2f64 (X86cvtdq2pd (bc_v4i32 (loadv2i64 addr:$src)))),
+ (VCVTDQ2PDrm addr:$src)>;
+
def : Pat<(v4f64 (sint_to_fp (v4i32 VR128:$src))),
(VCVTDQ2PDYrr VR128:$src)>;
def : Pat<(v4f64 (sint_to_fp (bc_v4i32 (loadv2i64 addr:$src)))),
(VCVTDQ2PDYrm addr:$src)>;
} // Predicates = [HasAVX]
+// SSE2 register conversion intrinsics
+let Predicates = [HasSSE2] in {
+ def : Pat<(v2f64 (X86cvtdq2pd (v4i32 VR128:$src))),
+ (CVTDQ2PDrr VR128:$src)>;
+ def : Pat<(v2f64 (X86cvtdq2pd (bc_v4i32 (loadv2i64 addr:$src)))),
+ (CVTDQ2PDrm addr:$src)>;
+} // Predicates = [HasSSE2]
+
// Convert packed double to packed single
// The assembler can recognize rr 256-bit instructions by seeing a ymm
// register, but the same isn't true when using memory operands instead.
diff --git a/lib/Target/X86/X86IntrinsicsInfo.h b/lib/Target/X86/X86IntrinsicsInfo.h
index 0268066c2ba1..2b829301e327 100644
--- a/lib/Target/X86/X86IntrinsicsInfo.h
+++ b/lib/Target/X86/X86IntrinsicsInfo.h
@@ -242,6 +242,13 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86_INTRINSIC_DATA(avx2_psubus_b, INTR_TYPE_2OP, X86ISD::SUBUS, 0),
X86_INTRINSIC_DATA(avx2_psubus_w, INTR_TYPE_2OP, X86ISD::SUBUS, 0),
X86_INTRINSIC_DATA(avx2_vperm2i128, INTR_TYPE_3OP, X86ISD::VPERM2X128, 0),
+ X86_INTRINSIC_DATA(avx512_cvtsi2sd32, INTR_TYPE_3OP, X86ISD::SINT_TO_FP_RND, 0),
+ X86_INTRINSIC_DATA(avx512_cvtsi2sd64, INTR_TYPE_3OP, X86ISD::SINT_TO_FP_RND, 0),
+ X86_INTRINSIC_DATA(avx512_cvtsi2ss32, INTR_TYPE_3OP, X86ISD::SINT_TO_FP_RND, 0),
+ X86_INTRINSIC_DATA(avx512_cvtsi2ss64, INTR_TYPE_3OP, X86ISD::SINT_TO_FP_RND, 0),
+ X86_INTRINSIC_DATA(avx512_cvtusi2ss, INTR_TYPE_3OP, X86ISD::UINT_TO_FP_RND, 0),
+ X86_INTRINSIC_DATA(avx512_cvtusi642sd, INTR_TYPE_3OP, X86ISD::UINT_TO_FP_RND, 0),
+ X86_INTRINSIC_DATA(avx512_cvtusi642ss, INTR_TYPE_3OP, X86ISD::UINT_TO_FP_RND, 0),
X86_INTRINSIC_DATA(avx512_exp2_pd, INTR_TYPE_1OP_MASK_RM, X86ISD::EXP2, 0),
X86_INTRINSIC_DATA(avx512_exp2_ps, INTR_TYPE_1OP_MASK_RM, X86ISD::EXP2, 0),
X86_INTRINSIC_DATA(avx512_mask_add_pd_128, INTR_TYPE_2OP_MASK, ISD::FADD, 0),
@@ -469,6 +476,12 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86_INTRINSIC_DATA(avx512_mask_pandn_q_128, INTR_TYPE_2OP_MASK, X86ISD::ANDNP, 0),
X86_INTRINSIC_DATA(avx512_mask_pandn_q_256, INTR_TYPE_2OP_MASK, X86ISD::ANDNP, 0),
X86_INTRINSIC_DATA(avx512_mask_pandn_q_512, INTR_TYPE_2OP_MASK, X86ISD::ANDNP, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pavg_b_128, INTR_TYPE_2OP_MASK, X86ISD::AVG, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pavg_b_256, INTR_TYPE_2OP_MASK, X86ISD::AVG, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pavg_b_512, INTR_TYPE_2OP_MASK, X86ISD::AVG, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pavg_w_128, INTR_TYPE_2OP_MASK, X86ISD::AVG, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pavg_w_256, INTR_TYPE_2OP_MASK, X86ISD::AVG, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pavg_w_512, INTR_TYPE_2OP_MASK, X86ISD::AVG, 0),
X86_INTRINSIC_DATA(avx512_mask_pcmpeq_b_128, CMP_MASK, X86ISD::PCMPEQM, 0),
X86_INTRINSIC_DATA(avx512_mask_pcmpeq_b_256, CMP_MASK, X86ISD::PCMPEQM, 0),
X86_INTRINSIC_DATA(avx512_mask_pcmpeq_b_512, CMP_MASK, X86ISD::PCMPEQM, 0),
@@ -493,6 +506,54 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86_INTRINSIC_DATA(avx512_mask_pcmpgt_w_128, CMP_MASK, X86ISD::PCMPGTM, 0),
X86_INTRINSIC_DATA(avx512_mask_pcmpgt_w_256, CMP_MASK, X86ISD::PCMPGTM, 0),
X86_INTRINSIC_DATA(avx512_mask_pcmpgt_w_512, CMP_MASK, X86ISD::PCMPGTM, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmaxs_b_128, INTR_TYPE_2OP_MASK, X86ISD::SMAX, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmaxs_b_256, INTR_TYPE_2OP_MASK, X86ISD::SMAX, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmaxs_b_512, INTR_TYPE_2OP_MASK, X86ISD::SMAX, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmaxs_d_128, INTR_TYPE_2OP_MASK, X86ISD::SMAX, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmaxs_d_256, INTR_TYPE_2OP_MASK, X86ISD::SMAX, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmaxs_d_512, INTR_TYPE_2OP_MASK, X86ISD::SMAX, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmaxs_q_128, INTR_TYPE_2OP_MASK, X86ISD::SMAX, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmaxs_q_256, INTR_TYPE_2OP_MASK, X86ISD::SMAX, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmaxs_q_512, INTR_TYPE_2OP_MASK, X86ISD::SMAX, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmaxs_w_128, INTR_TYPE_2OP_MASK, X86ISD::SMAX, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmaxs_w_256, INTR_TYPE_2OP_MASK, X86ISD::SMAX, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmaxs_w_512, INTR_TYPE_2OP_MASK, X86ISD::SMAX, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmaxu_b_128, INTR_TYPE_2OP_MASK, X86ISD::UMAX, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmaxu_b_256, INTR_TYPE_2OP_MASK, X86ISD::UMAX, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmaxu_b_512, INTR_TYPE_2OP_MASK, X86ISD::UMAX, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmaxu_d_128, INTR_TYPE_2OP_MASK, X86ISD::UMAX, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmaxu_d_256, INTR_TYPE_2OP_MASK, X86ISD::UMAX, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmaxu_d_512, INTR_TYPE_2OP_MASK, X86ISD::UMAX, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmaxu_q_128, INTR_TYPE_2OP_MASK, X86ISD::UMAX, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmaxu_q_256, INTR_TYPE_2OP_MASK, X86ISD::UMAX, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmaxu_q_512, INTR_TYPE_2OP_MASK, X86ISD::UMAX, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmaxu_w_128, INTR_TYPE_2OP_MASK, X86ISD::UMAX, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmaxu_w_256, INTR_TYPE_2OP_MASK, X86ISD::UMAX, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmaxu_w_512, INTR_TYPE_2OP_MASK, X86ISD::UMAX, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmins_b_128, INTR_TYPE_2OP_MASK, X86ISD::SMIN, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmins_b_256, INTR_TYPE_2OP_MASK, X86ISD::SMIN, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmins_b_512, INTR_TYPE_2OP_MASK, X86ISD::SMIN, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmins_d_128, INTR_TYPE_2OP_MASK, X86ISD::SMIN, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmins_d_256, INTR_TYPE_2OP_MASK, X86ISD::SMIN, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmins_d_512, INTR_TYPE_2OP_MASK, X86ISD::SMIN, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmins_q_128, INTR_TYPE_2OP_MASK, X86ISD::SMIN, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmins_q_256, INTR_TYPE_2OP_MASK, X86ISD::SMIN, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmins_q_512, INTR_TYPE_2OP_MASK, X86ISD::SMIN, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmins_w_128, INTR_TYPE_2OP_MASK, X86ISD::SMIN, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmins_w_256, INTR_TYPE_2OP_MASK, X86ISD::SMIN, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmins_w_512, INTR_TYPE_2OP_MASK, X86ISD::SMIN, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pminu_b_128, INTR_TYPE_2OP_MASK, X86ISD::UMIN, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pminu_b_256, INTR_TYPE_2OP_MASK, X86ISD::UMIN, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pminu_b_512, INTR_TYPE_2OP_MASK, X86ISD::UMIN, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pminu_d_128, INTR_TYPE_2OP_MASK, X86ISD::UMIN, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pminu_d_256, INTR_TYPE_2OP_MASK, X86ISD::UMIN, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pminu_d_512, INTR_TYPE_2OP_MASK, X86ISD::UMIN, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pminu_q_128, INTR_TYPE_2OP_MASK, X86ISD::UMIN, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pminu_q_256, INTR_TYPE_2OP_MASK, X86ISD::UMIN, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pminu_q_512, INTR_TYPE_2OP_MASK, X86ISD::UMIN, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pminu_w_128, INTR_TYPE_2OP_MASK, X86ISD::UMIN, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pminu_w_256, INTR_TYPE_2OP_MASK, X86ISD::UMIN, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pminu_w_512, INTR_TYPE_2OP_MASK, X86ISD::UMIN, 0),
X86_INTRINSIC_DATA(avx512_mask_pmul_dq_128, INTR_TYPE_2OP_MASK,
X86ISD::PMULDQ, 0),
X86_INTRINSIC_DATA(avx512_mask_pmul_dq_256, INTR_TYPE_2OP_MASK,
diff --git a/lib/Target/X86/X86MCInstLower.cpp b/lib/Target/X86/X86MCInstLower.cpp
index ff1436af4ece..64135e0f53e5 100644
--- a/lib/Target/X86/X86MCInstLower.cpp
+++ b/lib/Target/X86/X86MCInstLower.cpp
@@ -17,6 +17,7 @@
#include "InstPrinter/X86ATTInstPrinter.h"
#include "MCTargetDesc/X86BaseInfo.h"
#include "Utils/X86ShuffleDecode.h"
+#include "llvm/ADT/Optional.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineConstantPool.h"
@@ -50,6 +51,8 @@ class X86MCInstLower {
public:
X86MCInstLower(const MachineFunction &MF, X86AsmPrinter &asmprinter);
+ Optional<MCOperand> LowerMachineOperand(const MachineInstr *MI,
+ const MachineOperand &MO) const;
void Lower(const MachineInstr *MI, MCInst &OutMI) const;
MCSymbol *GetSymbolFromOperand(const MachineOperand &MO) const;
@@ -109,7 +112,7 @@ namespace llvm {
OutStreamer->EmitInstruction(Inst, getSubtargetInfo());
SMShadowTracker.count(Inst, getSubtargetInfo());
}
-} // end llvm namespace
+} // namespace llvm
X86MCInstLower::X86MCInstLower(const MachineFunction &mf,
X86AsmPrinter &asmprinter)
@@ -402,47 +405,43 @@ static unsigned getRetOpcode(const X86Subtarget &Subtarget) {
return Subtarget.is64Bit() ? X86::RETQ : X86::RETL;
}
+Optional<MCOperand>
+X86MCInstLower::LowerMachineOperand(const MachineInstr *MI,
+ const MachineOperand &MO) const {
+ switch (MO.getType()) {
+ default:
+ MI->dump();
+ llvm_unreachable("unknown operand type");
+ case MachineOperand::MO_Register:
+ // Ignore all implicit register operands.
+ if (MO.isImplicit())
+ return None;
+ return MCOperand::createReg(MO.getReg());
+ case MachineOperand::MO_Immediate:
+ return MCOperand::createImm(MO.getImm());
+ case MachineOperand::MO_MachineBasicBlock:
+ case MachineOperand::MO_GlobalAddress:
+ case MachineOperand::MO_ExternalSymbol:
+ return LowerSymbolOperand(MO, GetSymbolFromOperand(MO));
+ case MachineOperand::MO_JumpTableIndex:
+ return LowerSymbolOperand(MO, AsmPrinter.GetJTISymbol(MO.getIndex()));
+ case MachineOperand::MO_ConstantPoolIndex:
+ return LowerSymbolOperand(MO, AsmPrinter.GetCPISymbol(MO.getIndex()));
+ case MachineOperand::MO_BlockAddress:
+ return LowerSymbolOperand(
+ MO, AsmPrinter.GetBlockAddressSymbol(MO.getBlockAddress()));
+ case MachineOperand::MO_RegisterMask:
+ // Ignore call clobbers.
+ return None;
+ }
+}
+
void X86MCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const {
OutMI.setOpcode(MI->getOpcode());
- for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
- const MachineOperand &MO = MI->getOperand(i);
-
- MCOperand MCOp;
- switch (MO.getType()) {
- default:
- MI->dump();
- llvm_unreachable("unknown operand type");
- case MachineOperand::MO_Register:
- // Ignore all implicit register operands.
- if (MO.isImplicit()) continue;
- MCOp = MCOperand::createReg(MO.getReg());
- break;
- case MachineOperand::MO_Immediate:
- MCOp = MCOperand::createImm(MO.getImm());
- break;
- case MachineOperand::MO_MachineBasicBlock:
- case MachineOperand::MO_GlobalAddress:
- case MachineOperand::MO_ExternalSymbol:
- MCOp = LowerSymbolOperand(MO, GetSymbolFromOperand(MO));
- break;
- case MachineOperand::MO_JumpTableIndex:
- MCOp = LowerSymbolOperand(MO, AsmPrinter.GetJTISymbol(MO.getIndex()));
- break;
- case MachineOperand::MO_ConstantPoolIndex:
- MCOp = LowerSymbolOperand(MO, AsmPrinter.GetCPISymbol(MO.getIndex()));
- break;
- case MachineOperand::MO_BlockAddress:
- MCOp = LowerSymbolOperand(MO,
- AsmPrinter.GetBlockAddressSymbol(MO.getBlockAddress()));
- break;
- case MachineOperand::MO_RegisterMask:
- // Ignore call clobbers.
- continue;
- }
-
- OutMI.addOperand(MCOp);
- }
+ for (const MachineOperand &MO : MI->operands())
+ if (auto MaybeMCOp = LowerMachineOperand(MI, MO))
+ OutMI.addOperand(MaybeMCOp.getValue());
// Handle a few special cases to eliminate operand modifiers.
ReSimplify:
@@ -865,6 +864,28 @@ void X86AsmPrinter::LowerSTATEPOINT(const MachineInstr &MI,
SM.recordStatepoint(MI);
}
+void X86AsmPrinter::LowerFAULTING_LOAD_OP(const MachineInstr &MI,
+ X86MCInstLower &MCIL) {
+ // FAULTING_LOAD_OP <def>, <handler label>, <load opcode>, <load operands>
+
+ unsigned LoadDefRegister = MI.getOperand(0).getReg();
+ MCSymbol *HandlerLabel = MI.getOperand(1).getMCSymbol();
+ unsigned LoadOpcode = MI.getOperand(2).getImm();
+ unsigned LoadOperandsBeginIdx = 3;
+
+ FM.recordFaultingOp(FaultMaps::FaultingLoad, HandlerLabel);
+
+ MCInst LoadMI;
+ LoadMI.setOpcode(LoadOpcode);
+ LoadMI.addOperand(MCOperand::createReg(LoadDefRegister));
+ for (auto I = MI.operands_begin() + LoadOperandsBeginIdx,
+ E = MI.operands_end();
+ I != E; ++I)
+ if (auto MaybeOperand = MCIL.LowerMachineOperand(&MI, *I))
+ LoadMI.addOperand(MaybeOperand.getValue());
+
+ OutStreamer->EmitInstruction(LoadMI, getSubtargetInfo());
+}
// Lower a stackmap of the form:
// <id>, <shadowBytes>, ...
@@ -1120,6 +1141,9 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) {
case TargetOpcode::STATEPOINT:
return LowerSTATEPOINT(*MI, MCInstLowering);
+ case TargetOpcode::FAULTING_LOAD_OP:
+ return LowerFAULTING_LOAD_OP(*MI, MCInstLowering);
+
case TargetOpcode::STACKMAP:
return LowerSTACKMAP(*MI);
diff --git a/lib/Target/X86/X86MachineFunctionInfo.h b/lib/Target/X86/X86MachineFunctionInfo.h
index d598b55aae3e..342d26ab1fbb 100644
--- a/lib/Target/X86/X86MachineFunctionInfo.h
+++ b/lib/Target/X86/X86MachineFunctionInfo.h
@@ -179,6 +179,6 @@ public:
}
};
-} // End llvm namespace
+} // namespace llvm
#endif
diff --git a/lib/Target/X86/X86PadShortFunction.cpp b/lib/Target/X86/X86PadShortFunction.cpp
index 143e70bda9e7..33aa78ffdf8a 100644
--- a/lib/Target/X86/X86PadShortFunction.cpp
+++ b/lib/Target/X86/X86PadShortFunction.cpp
@@ -84,7 +84,7 @@ namespace {
};
char PadShortFunc::ID = 0;
-}
+} // namespace
FunctionPass *llvm::createX86PadShortFunctions() {
return new PadShortFunc();
diff --git a/lib/Target/X86/X86RegisterInfo.cpp b/lib/Target/X86/X86RegisterInfo.cpp
index e9b6bfc3273c..00e213423974 100644
--- a/lib/Target/X86/X86RegisterInfo.cpp
+++ b/lib/Target/X86/X86RegisterInfo.cpp
@@ -419,6 +419,22 @@ BitVector X86RegisterInfo::getReservedRegs(const MachineFunction &MF) const {
return Reserved;
}
+void X86RegisterInfo::adjustStackMapLiveOutMask(uint32_t *Mask) const {
+ // Check if the EFLAGS register is marked as live-out. This shouldn't happen,
+ // because the calling convention defines the EFLAGS register as NOT
+ // preserved.
+ //
+ // Unfortunatelly the EFLAGS show up as live-out after branch folding. Adding
+ // an assert to track this and clear the register afterwards to avoid
+ // unnecessary crashes during release builds.
+ assert(!(Mask[X86::EFLAGS / 32] & (1U << (X86::EFLAGS % 32))) &&
+ "EFLAGS are not live-out from a patchpoint.");
+
+ // Also clean other registers that don't need preserving (IP).
+ for (auto Reg : {X86::EFLAGS, X86::RIP, X86::EIP, X86::IP})
+ Mask[Reg / 32] &= ~(1U << (Reg % 32));
+}
+
//===----------------------------------------------------------------------===//
// Stack Frame Processing methods
//===----------------------------------------------------------------------===//
@@ -765,4 +781,4 @@ unsigned get512BitSuperRegister(unsigned Reg) {
llvm_unreachable("Unexpected SIMD register");
}
-}
+} // namespace llvm
diff --git a/lib/Target/X86/X86RegisterInfo.h b/lib/Target/X86/X86RegisterInfo.h
index a714c2a33d06..459ecf7fff72 100644
--- a/lib/Target/X86/X86RegisterInfo.h
+++ b/lib/Target/X86/X86RegisterInfo.h
@@ -104,6 +104,8 @@ public:
/// register scavenger to determine what registers are free.
BitVector getReservedRegs(const MachineFunction &MF) const override;
+ void adjustStackMapLiveOutMask(uint32_t *Mask) const override;
+
bool hasBasePointer(const MachineFunction &MF) const;
bool canRealignStack(const MachineFunction &MF) const;
@@ -134,6 +136,6 @@ unsigned getX86SubSuperRegister(unsigned, MVT::SimpleValueType, bool High=false)
//get512BitRegister - X86 utility - returns 512-bit super register
unsigned get512BitSuperRegister(unsigned Reg);
-} // End llvm namespace
+} // namespace llvm
#endif
diff --git a/lib/Target/X86/X86SelectionDAGInfo.h b/lib/Target/X86/X86SelectionDAGInfo.h
index eb7e0ed9de6c..25606d3f5df3 100644
--- a/lib/Target/X86/X86SelectionDAGInfo.h
+++ b/lib/Target/X86/X86SelectionDAGInfo.h
@@ -48,6 +48,6 @@ public:
MachinePointerInfo SrcPtrInfo) const override;
};
-}
+} // namespace llvm
#endif
diff --git a/lib/Target/X86/X86Subtarget.cpp b/lib/Target/X86/X86Subtarget.cpp
index 74af29f4566c..3b25d30dc221 100644
--- a/lib/Target/X86/X86Subtarget.cpp
+++ b/lib/Target/X86/X86Subtarget.cpp
@@ -287,7 +287,7 @@ X86Subtarget &X86Subtarget::initializeSubtargetDependencies(StringRef CPU,
return *this;
}
-X86Subtarget::X86Subtarget(const std::string &TT, const std::string &CPU,
+X86Subtarget::X86Subtarget(const Triple &TT, const std::string &CPU,
const std::string &FS, const X86TargetMachine &TM,
unsigned StackAlignOverride)
: X86GenSubtargetInfo(TT, CPU, FS), X86ProcFamily(Others),
@@ -300,8 +300,7 @@ X86Subtarget::X86Subtarget(const std::string &TT, const std::string &CPU,
TargetTriple.getEnvironment() == Triple::CODE16),
TSInfo(*TM.getDataLayout()),
InstrInfo(initializeSubtargetDependencies(CPU, FS)), TLInfo(TM, *this),
- FrameLowering(TargetFrameLowering::StackGrowsDown, getStackAlignment(),
- is64Bit() ? -8 : -4) {
+ FrameLowering(*this, getStackAlignment()) {
// Determine the PICStyle based on the target selected.
if (TM.getRelocationModel() == Reloc::Static) {
// Unless we're in PIC or DynamicNoPIC mode, set the PIC style to None.
diff --git a/lib/Target/X86/X86Subtarget.h b/lib/Target/X86/X86Subtarget.h
index a476f7aba932..6934061c6922 100644
--- a/lib/Target/X86/X86Subtarget.h
+++ b/lib/Target/X86/X86Subtarget.h
@@ -253,9 +253,8 @@ public:
/// This constructor initializes the data members to match that
/// of the specified triple.
///
- X86Subtarget(const std::string &TT, const std::string &CPU,
- const std::string &FS, const X86TargetMachine &TM,
- unsigned StackAlignOverride);
+ X86Subtarget(const Triple &TT, const std::string &CPU, const std::string &FS,
+ const X86TargetMachine &TM, unsigned StackAlignOverride);
const X86TargetLowering *getTargetLowering() const override {
return &TLInfo;
@@ -491,6 +490,6 @@ public:
}
};
-} // End llvm namespace
+} // namespace llvm
#endif
diff --git a/lib/Target/X86/X86TargetMachine.cpp b/lib/Target/X86/X86TargetMachine.cpp
index 646cff7c5bdb..3d6eb4f7ce02 100644
--- a/lib/Target/X86/X86TargetMachine.cpp
+++ b/lib/Target/X86/X86TargetMachine.cpp
@@ -24,6 +24,10 @@
#include "llvm/Target/TargetOptions.h"
using namespace llvm;
+static cl::opt<bool> EnableMachineCombinerPass("x86-machine-combiner",
+ cl::desc("Enable the machine combiner pass"),
+ cl::init(true), cl::Hidden);
+
extern "C" void LLVMInitializeX86Target() {
// Register the target.
RegisterTargetMachine<X86TargetMachine> X(TheX86_32Target);
@@ -90,13 +94,14 @@ static std::string computeDataLayout(const Triple &TT) {
/// X86TargetMachine ctor - Create an X86 target.
///
-X86TargetMachine::X86TargetMachine(const Target &T, StringRef TT, StringRef CPU,
- StringRef FS, const TargetOptions &Options,
+X86TargetMachine::X86TargetMachine(const Target &T, const Triple &TT,
+ StringRef CPU, StringRef FS,
+ const TargetOptions &Options,
Reloc::Model RM, CodeModel::Model CM,
CodeGenOpt::Level OL)
- : LLVMTargetMachine(T, computeDataLayout(Triple(TT)), TT, CPU, FS, Options,
- RM, CM, OL),
- TLOF(createTLOF(Triple(getTargetTriple()))),
+ : LLVMTargetMachine(T, computeDataLayout(TT), TT, CPU, FS, Options, RM, CM,
+ OL),
+ TLOF(createTLOF(getTargetTriple())),
Subtarget(TT, CPU, FS, *this, Options.StackAlignmentOverride) {
// Windows stack unwinder gets confused when execution flow "falls through"
// after a call to 'noreturn' function.
@@ -213,7 +218,7 @@ bool X86PassConfig::addInstSelector() {
addPass(createX86ISelDag(getX86TargetMachine(), getOptLevel()));
// For ELF, cleanup any local-dynamic TLS accesses.
- if (Triple(TM->getTargetTriple()).isOSBinFormatELF() &&
+ if (TM->getTargetTriple().isOSBinFormatELF() &&
getOptLevel() != CodeGenOpt::None)
addPass(createCleanupLocalDynamicTLSPass());
@@ -224,12 +229,14 @@ bool X86PassConfig::addInstSelector() {
bool X86PassConfig::addILPOpts() {
addPass(&EarlyIfConverterID);
+ if (EnableMachineCombinerPass)
+ addPass(&MachineCombinerID);
return true;
}
bool X86PassConfig::addPreISel() {
// Only add this pass for 32-bit x86 Windows.
- Triple TT(TM->getTargetTriple());
+ const Triple &TT = TM->getTargetTriple();
if (TT.isOSWindows() && TT.getArch() == Triple::x86)
addPass(createX86WinEHStatePass());
return true;
diff --git a/lib/Target/X86/X86TargetMachine.h b/lib/Target/X86/X86TargetMachine.h
index c9833ed39e24..be56888b75f4 100644
--- a/lib/Target/X86/X86TargetMachine.h
+++ b/lib/Target/X86/X86TargetMachine.h
@@ -29,8 +29,8 @@ class X86TargetMachine final : public LLVMTargetMachine {
mutable StringMap<std::unique_ptr<X86Subtarget>> SubtargetMap;
public:
- X86TargetMachine(const Target &T, StringRef TT, StringRef CPU, StringRef FS,
- const TargetOptions &Options, Reloc::Model RM,
+ X86TargetMachine(const Target &T, const Triple &TT, StringRef CPU,
+ StringRef FS, const TargetOptions &Options, Reloc::Model RM,
CodeModel::Model CM, CodeGenOpt::Level OL);
~X86TargetMachine() override;
const X86Subtarget *getSubtargetImpl(const Function &F) const override;
@@ -44,6 +44,6 @@ public:
}
};
-} // End llvm namespace
+} // namespace llvm
#endif
diff --git a/lib/Target/X86/X86TargetTransformInfo.cpp b/lib/Target/X86/X86TargetTransformInfo.cpp
index bbfeba8b9d8d..13384fab5985 100644
--- a/lib/Target/X86/X86TargetTransformInfo.cpp
+++ b/lib/Target/X86/X86TargetTransformInfo.cpp
@@ -153,13 +153,13 @@ unsigned X86TTIImpl::getArithmeticInstrCost(
{ ISD::SHL, MVT::v4i64, 1 },
{ ISD::SRL, MVT::v4i64, 1 },
- { ISD::SHL, MVT::v32i8, 42 }, // cmpeqb sequence.
+ { ISD::SHL, MVT::v32i8, 11 }, // vpblendvb sequence.
{ ISD::SHL, MVT::v16i16, 10 }, // extend/vpsrlvd/pack sequence.
- { ISD::SRL, MVT::v32i8, 32*10 }, // Scalarized.
+ { ISD::SRL, MVT::v32i8, 11 }, // vpblendvb sequence.
{ ISD::SRL, MVT::v16i16, 10 }, // extend/vpsrlvd/pack sequence.
- { ISD::SRA, MVT::v32i8, 32*10 }, // Scalarized.
+ { ISD::SRA, MVT::v32i8, 24 }, // vpblendvb sequence.
{ ISD::SRA, MVT::v16i16, 10 }, // extend/vpsravd/pack sequence.
{ ISD::SRA, MVT::v4i64, 4*10 }, // Scalarized.
@@ -253,19 +253,19 @@ unsigned X86TTIImpl::getArithmeticInstrCost(
// to ISel. The cost model must return worst case assumptions because it is
// used for vectorization and we don't want to make vectorized code worse
// than scalar code.
- { ISD::SHL, MVT::v16i8, 30 }, // cmpeqb sequence.
- { ISD::SHL, MVT::v8i16, 8*10 }, // Scalarized.
- { ISD::SHL, MVT::v4i32, 2*5 }, // We optimized this using mul.
+ { ISD::SHL, MVT::v16i8, 26 }, // cmpgtb sequence.
+ { ISD::SHL, MVT::v8i16, 32 }, // cmpgtb sequence.
+ { ISD::SHL, MVT::v4i32, 2*5 }, // We optimized this using mul.
{ ISD::SHL, MVT::v2i64, 2*10 }, // Scalarized.
{ ISD::SHL, MVT::v4i64, 4*10 }, // Scalarized.
- { ISD::SRL, MVT::v16i8, 16*10 }, // Scalarized.
- { ISD::SRL, MVT::v8i16, 8*10 }, // Scalarized.
+ { ISD::SRL, MVT::v16i8, 26 }, // cmpgtb sequence.
+ { ISD::SRL, MVT::v8i16, 32 }, // cmpgtb sequence.
{ ISD::SRL, MVT::v4i32, 4*10 }, // Scalarized.
{ ISD::SRL, MVT::v2i64, 2*10 }, // Scalarized.
- { ISD::SRA, MVT::v16i8, 16*10 }, // Scalarized.
- { ISD::SRA, MVT::v8i16, 8*10 }, // Scalarized.
+ { ISD::SRA, MVT::v16i8, 54 }, // unpacked cmpgtb sequence.
+ { ISD::SRA, MVT::v8i16, 32 }, // cmpgtb sequence.
{ ISD::SRA, MVT::v4i32, 4*10 }, // Scalarized.
{ ISD::SRA, MVT::v2i64, 2*10 }, // Scalarized.
diff --git a/lib/Target/X86/X86VZeroUpper.cpp b/lib/Target/X86/X86VZeroUpper.cpp
index 6925b272b4a5..71ce45b0bc2e 100644
--- a/lib/Target/X86/X86VZeroUpper.cpp
+++ b/lib/Target/X86/X86VZeroUpper.cpp
@@ -86,7 +86,7 @@ namespace {
};
char VZeroUpperInserter::ID = 0;
-}
+} // namespace
FunctionPass *llvm::createX86IssueVZeroUpperPass() {
return new VZeroUpperInserter();
diff --git a/lib/Target/X86/X86WinEHState.cpp b/lib/Target/X86/X86WinEHState.cpp
index ce69ea721993..c9e80945549b 100644
--- a/lib/Target/X86/X86WinEHState.cpp
+++ b/lib/Target/X86/X86WinEHState.cpp
@@ -60,9 +60,10 @@ public:
private:
void emitExceptionRegistrationRecord(Function *F);
- void linkExceptionRegistration(IRBuilder<> &Builder, Value *Handler);
+ void linkExceptionRegistration(IRBuilder<> &Builder, Function *Handler);
void unlinkExceptionRegistration(IRBuilder<> &Builder);
void addCXXStateStores(Function &F, MachineModuleInfo &MMI);
+ void addSEHStateStores(Function &F, MachineModuleInfo &MMI);
void addCXXStateStoresToFunclet(Value *ParentRegNode, WinEHFuncInfo &FuncInfo,
Function &F, int BaseState);
void insertStateNumberStore(Value *ParentRegNode, Instruction *IP, int State);
@@ -104,7 +105,7 @@ private:
/// The linked list node subobject inside of RegNode.
Value *Link = nullptr;
};
-}
+} // namespace
FunctionPass *llvm::createX86WinEHStatePass() { return new WinEHStatePass(); }
@@ -145,16 +146,10 @@ bool WinEHStatePass::runOnFunction(Function &F) {
return false;
// Check the personality. Do nothing if this is not an MSVC personality.
- LandingPadInst *LP = nullptr;
- for (BasicBlock &BB : F) {
- LP = BB.getLandingPadInst();
- if (LP)
- break;
- }
- if (!LP)
+ if (!F.hasPersonalityFn())
return false;
PersonalityFn =
- dyn_cast<Function>(LP->getPersonalityFn()->stripPointerCasts());
+ dyn_cast<Function>(F.getPersonalityFn()->stripPointerCasts());
if (!PersonalityFn)
return false;
Personality = classifyEHPersonality(PersonalityFn);
@@ -171,8 +166,10 @@ bool WinEHStatePass::runOnFunction(Function &F) {
auto *MMIPtr = getAnalysisIfAvailable<MachineModuleInfo>();
assert(MMIPtr && "MachineModuleInfo should always be available");
MachineModuleInfo &MMI = *MMIPtr;
- if (Personality == EHPersonality::MSVC_CXX) {
- addCXXStateStores(F, MMI);
+ switch (Personality) {
+ default: llvm_unreachable("unexpected personality function");
+ case EHPersonality::MSVC_CXX: addCXXStateStores(F, MMI); break;
+ case EHPersonality::MSVC_X86SEH: addSEHStateStores(F, MMI); break;
}
// Reset per-function state.
@@ -258,7 +255,6 @@ void WinEHStatePass::emitExceptionRegistrationRecord(Function *F) {
if (Personality == EHPersonality::MSVC_CXX) {
RegNodeTy = getCXXEHRegistrationType();
RegNode = Builder.CreateAlloca(RegNodeTy);
- // FIXME: We can skip this in -GS- mode, when we figure that out.
// SavedESP = llvm.stacksave()
Value *SP = Builder.CreateCall(
Intrinsic::getDeclaration(TheModule, Intrinsic::stacksave), {});
@@ -360,11 +356,14 @@ Function *WinEHStatePass::generateLSDAInEAXThunk(Function *ParentFunc) {
}
void WinEHStatePass::linkExceptionRegistration(IRBuilder<> &Builder,
- Value *Handler) {
+ Function *Handler) {
+ // Emit the .safeseh directive for this function.
+ Handler->addFnAttr("safeseh");
+
Type *LinkTy = getEHLinkRegistrationType();
// Handler = Handler
- Handler = Builder.CreateBitCast(Handler, Builder.getInt8PtrTy());
- Builder.CreateStore(Handler, Builder.CreateStructGEP(LinkTy, Link, 1));
+ Value *HandlerI8 = Builder.CreateBitCast(Handler, Builder.getInt8PtrTy());
+ Builder.CreateStore(HandlerI8, Builder.CreateStructGEP(LinkTy, Link, 1));
// Next = [fs:00]
Constant *FSZero =
Constant::getNullValue(LinkTy->getPointerTo()->getPointerTo(257));
@@ -472,6 +471,74 @@ void WinEHStatePass::addCXXStateStoresToFunclet(Value *ParentRegNode,
}
}
+/// Assign every distinct landingpad a unique state number for SEH. Unlike C++
+/// EH, we can use this very simple algorithm while C++ EH cannot because catch
+/// handlers aren't outlined and the runtime doesn't have to figure out which
+/// catch handler frame to unwind to.
+/// FIXME: __finally blocks are outlined, so this approach may break down there.
+void WinEHStatePass::addSEHStateStores(Function &F, MachineModuleInfo &MMI) {
+ WinEHFuncInfo &FuncInfo = MMI.getWinEHFuncInfo(&F);
+
+ // Remember and return the index that we used. We save it in WinEHFuncInfo so
+ // that we can lower llvm.x86.seh.exceptioninfo later in filter functions
+ // without too much trouble.
+ int RegNodeEscapeIndex = escapeRegNode(F);
+ FuncInfo.EHRegNodeEscapeIndex = RegNodeEscapeIndex;
+
+ // Iterate all the instructions and emit state number stores.
+ int CurState = 0;
+ SmallPtrSet<BasicBlock *, 4> ExceptBlocks;
+ for (BasicBlock &BB : F) {
+ for (auto I = BB.begin(), E = BB.end(); I != E; ++I) {
+ if (auto *CI = dyn_cast<CallInst>(I)) {
+ auto *Intrin = dyn_cast<IntrinsicInst>(CI);
+ if (Intrin) {
+ // Calls that "don't throw" are considered to be able to throw asynch
+ // exceptions, but intrinsics cannot.
+ continue;
+ }
+ insertStateNumberStore(RegNode, CI, -1);
+ } else if (auto *II = dyn_cast<InvokeInst>(I)) {
+ // Look up the state number of the landingpad this unwinds to.
+ LandingPadInst *LPI = II->getUnwindDest()->getLandingPadInst();
+ auto InsertionPair =
+ FuncInfo.LandingPadStateMap.insert(std::make_pair(LPI, CurState));
+ auto Iter = InsertionPair.first;
+ int &State = Iter->second;
+ bool Inserted = InsertionPair.second;
+ if (Inserted) {
+ // Each action consumes a state number.
+ auto *EHActions = cast<IntrinsicInst>(LPI->getNextNode());
+ SmallVector<std::unique_ptr<ActionHandler>, 4> ActionList;
+ parseEHActions(EHActions, ActionList);
+ assert(!ActionList.empty());
+ CurState += ActionList.size();
+ State += ActionList.size() - 1;
+
+ // Remember all the __except block targets.
+ for (auto &Handler : ActionList) {
+ if (auto *CH = dyn_cast<CatchHandler>(Handler.get())) {
+ auto *BA = cast<BlockAddress>(CH->getHandlerBlockOrFunc());
+ ExceptBlocks.insert(BA->getBasicBlock());
+ }
+ }
+ }
+ insertStateNumberStore(RegNode, II, State);
+ }
+ }
+ }
+
+ // Insert llvm.stackrestore into each __except block.
+ Function *StackRestore =
+ Intrinsic::getDeclaration(TheModule, Intrinsic::stackrestore);
+ for (BasicBlock *ExceptBB : ExceptBlocks) {
+ IRBuilder<> Builder(ExceptBB->begin());
+ Value *SP =
+ Builder.CreateLoad(Builder.CreateStructGEP(RegNodeTy, RegNode, 0));
+ Builder.CreateCall(StackRestore, {SP});
+ }
+}
+
void WinEHStatePass::insertStateNumberStore(Value *ParentRegNode,
Instruction *IP, int State) {
IRBuilder<> Builder(IP);