aboutsummaryrefslogtreecommitdiff
path: root/lib/Target/PowerPC
diff options
context:
space:
mode:
Diffstat (limited to 'lib/Target/PowerPC')
-rw-r--r--lib/Target/PowerPC/CMakeLists.txt1
-rw-r--r--lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp30
-rw-r--r--lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp56
-rw-r--r--lib/Target/PowerPC/MCTargetDesc/PPCBaseInfo.h70
-rw-r--r--lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp124
-rw-r--r--lib/Target/PowerPC/MCTargetDesc/PPCFixupKinds.h21
-rw-r--r--lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp10
-rw-r--r--lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp75
-rw-r--r--lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp9
-rw-r--r--lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h7
-rw-r--r--lib/Target/PowerPC/MCTargetDesc/PPCPredicates.cpp2
-rw-r--r--lib/Target/PowerPC/MCTargetDesc/PPCPredicates.h8
-rw-r--r--lib/Target/PowerPC/PPC.h26
-rw-r--r--lib/Target/PowerPC/PPC.td149
-rw-r--r--lib/Target/PowerPC/PPCAsmPrinter.cpp525
-rw-r--r--lib/Target/PowerPC/PPCBranchSelector.cpp17
-rw-r--r--lib/Target/PowerPC/PPCCTRLoops.cpp103
-rw-r--r--lib/Target/PowerPC/PPCCallingConv.td68
-rw-r--r--lib/Target/PowerPC/PPCCodeEmitter.cpp20
-rw-r--r--lib/Target/PowerPC/PPCFrameLowering.cpp250
-rw-r--r--lib/Target/PowerPC/PPCFrameLowering.h21
-rw-r--r--lib/Target/PowerPC/PPCHazardRecognizers.cpp2
-rw-r--r--lib/Target/PowerPC/PPCISelDAGToDAG.cpp333
-rw-r--r--lib/Target/PowerPC/PPCISelLowering.cpp1260
-rw-r--r--lib/Target/PowerPC/PPCISelLowering.h191
-rw-r--r--lib/Target/PowerPC/PPCInstr64Bit.td680
-rw-r--r--lib/Target/PowerPC/PPCInstrAltivec.td635
-rw-r--r--lib/Target/PowerPC/PPCInstrFormats.td22
-rw-r--r--lib/Target/PowerPC/PPCInstrInfo.cpp268
-rw-r--r--lib/Target/PowerPC/PPCInstrInfo.h6
-rw-r--r--lib/Target/PowerPC/PPCInstrInfo.td932
-rw-r--r--lib/Target/PowerPC/PPCJITInfo.cpp19
-rw-r--r--lib/Target/PowerPC/PPCJITInfo.h2
-rw-r--r--lib/Target/PowerPC/PPCMCInstLower.cpp9
-rw-r--r--lib/Target/PowerPC/PPCMachineFunctionInfo.h31
-rw-r--r--lib/Target/PowerPC/PPCRegisterInfo.cpp414
-rw-r--r--lib/Target/PowerPC/PPCRegisterInfo.h48
-rw-r--r--lib/Target/PowerPC/PPCRegisterInfo.td176
-rw-r--r--lib/Target/PowerPC/PPCScheduleA2.td15
-rw-r--r--lib/Target/PowerPC/PPCScheduleG5.td15
-rw-r--r--lib/Target/PowerPC/PPCSubtarget.cpp23
-rw-r--r--lib/Target/PowerPC/PPCSubtarget.h30
-rw-r--r--lib/Target/PowerPC/PPCTargetMachine.cpp18
-rw-r--r--lib/Target/PowerPC/PPCTargetMachine.h20
-rw-r--r--lib/Target/PowerPC/PPCTargetTransformInfo.cpp240
-rw-r--r--lib/Target/PowerPC/README.txt21
-rw-r--r--lib/Target/PowerPC/TargetInfo/PowerPCTargetInfo.cpp2
47 files changed, 4643 insertions, 2361 deletions
diff --git a/lib/Target/PowerPC/CMakeLists.txt b/lib/Target/PowerPC/CMakeLists.txt
index 192d18d66440..6036428fad93 100644
--- a/lib/Target/PowerPC/CMakeLists.txt
+++ b/lib/Target/PowerPC/CMakeLists.txt
@@ -26,6 +26,7 @@ add_llvm_target(PowerPCCodeGen
PPCRegisterInfo.cpp
PPCSubtarget.cpp
PPCTargetMachine.cpp
+ PPCTargetTransformInfo.cpp
PPCSelectionDAGInfo.cpp
)
diff --git a/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp b/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp
index 3d583060d1ef..bacc108c62b4 100644
--- a/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp
+++ b/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp
@@ -13,7 +13,7 @@
#define DEBUG_TYPE "asm-printer"
#include "PPCInstPrinter.h"
-#include "MCTargetDesc/PPCBaseInfo.h"
+#include "MCTargetDesc/PPCMCTargetDesc.h"
#include "MCTargetDesc/PPCPredicates.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCInst.h"
@@ -87,35 +87,9 @@ void PPCInstPrinter::printPredicateOperand(const MCInst *MI, unsigned OpNo,
raw_ostream &O,
const char *Modifier) {
unsigned Code = MI->getOperand(OpNo).getImm();
- if (!Modifier) {
- unsigned CCReg = MI->getOperand(OpNo+1).getReg();
- unsigned RegNo;
- switch (CCReg) {
- default: llvm_unreachable("Unknown CR register");
- case PPC::CR0: RegNo = 0; break;
- case PPC::CR1: RegNo = 1; break;
- case PPC::CR2: RegNo = 2; break;
- case PPC::CR3: RegNo = 3; break;
- case PPC::CR4: RegNo = 4; break;
- case PPC::CR5: RegNo = 5; break;
- case PPC::CR6: RegNo = 6; break;
- case PPC::CR7: RegNo = 7; break;
- }
-
- // Print the CR bit number. The Code is ((BI << 5) | BO) for a
- // BCC, but we must have the positive form here (BO == 12)
- unsigned BI = Code >> 5;
- assert((Code & 0xF) == 12 &&
- "BO in predicate bit must have the positive form");
-
- unsigned Value = 4*RegNo + BI;
- O << Value;
- return;
- }
if (StringRef(Modifier) == "cc") {
switch ((PPC::Predicate)Code) {
- case PPC::PRED_ALWAYS: return; // Don't print anything for always.
case PPC::PRED_LT: O << "lt"; return;
case PPC::PRED_LE: O << "le"; return;
case PPC::PRED_EQ: O << "eq"; return;
@@ -129,8 +103,6 @@ void PPCInstPrinter::printPredicateOperand(const MCInst *MI, unsigned OpNo,
assert(StringRef(Modifier) == "reg" &&
"Need to specify 'cc' or 'reg' as predicate op modifier!");
- // Don't print the register for 'always'.
- if (Code == PPC::PRED_ALWAYS) return;
printOperand(MI, OpNo+1, O);
}
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp
index 87ecb13a4c76..ec2657403e0c 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp
@@ -13,8 +13,8 @@
#include "llvm/MC/MCELFObjectWriter.h"
#include "llvm/MC/MCFixupKindInfo.h"
#include "llvm/MC/MCMachObjectWriter.h"
-#include "llvm/MC/MCSectionMachO.h"
#include "llvm/MC/MCObjectWriter.h"
+#include "llvm/MC/MCSectionMachO.h"
#include "llvm/MC/MCValue.h"
#include "llvm/Object/MachOFormat.h"
#include "llvm/Support/ELF.h"
@@ -30,11 +30,9 @@ static unsigned adjustFixupValue(unsigned Kind, uint64_t Value) {
case FK_Data_2:
case FK_Data_4:
case FK_Data_8:
- case PPC::fixup_ppc_toc:
+ case PPC::fixup_ppc_tlsreg:
+ case PPC::fixup_ppc_nofixup:
return Value;
- case PPC::fixup_ppc_lo14:
- case PPC::fixup_ppc_toc16_ds:
- return (Value & 0xffff) << 2;
case PPC::fixup_ppc_brcond14:
return Value & 0xfffc;
case PPC::fixup_ppc_br24:
@@ -46,8 +44,9 @@ static unsigned adjustFixupValue(unsigned Kind, uint64_t Value) {
case PPC::fixup_ppc_ha16:
return ((Value >> 16) + ((Value & 0x8000) ? 1 : 0)) & 0xffff;
case PPC::fixup_ppc_lo16:
- case PPC::fixup_ppc_toc16:
return Value & 0xffff;
+ case PPC::fixup_ppc_lo16_ds:
+ return Value & 0xfffc;
}
}
@@ -61,7 +60,9 @@ public:
void RecordRelocation(MachObjectWriter *Writer,
const MCAssembler &Asm, const MCAsmLayout &Layout,
const MCFragment *Fragment, const MCFixup &Fixup,
- MCValue Target, uint64_t &FixedValue) {}
+ MCValue Target, uint64_t &FixedValue) {
+ llvm_unreachable("Relocation emission for MachO/PPC unimplemented!");
+ }
};
class PPCAsmBackend : public MCAsmBackend {
@@ -78,10 +79,9 @@ public:
{ "fixup_ppc_brcond14", 16, 14, MCFixupKindInfo::FKF_IsPCRel },
{ "fixup_ppc_lo16", 16, 16, 0 },
{ "fixup_ppc_ha16", 16, 16, 0 },
- { "fixup_ppc_lo14", 16, 14, 0 },
- { "fixup_ppc_toc", 0, 64, 0 },
- { "fixup_ppc_toc16", 16, 16, 0 },
- { "fixup_ppc_toc16_ds", 16, 14, 0 }
+ { "fixup_ppc_lo16_ds", 16, 14, 0 },
+ { "fixup_ppc_tlsreg", 0, 0, 0 },
+ { "fixup_ppc_nofixup", 0, 0, 0 }
};
if (Kind < FirstTargetFixupKind)
@@ -92,6 +92,20 @@ public:
return Infos[Kind - FirstTargetFixupKind];
}
+ void applyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize,
+ uint64_t Value) const {
+ Value = adjustFixupValue(Fixup.getKind(), Value);
+ if (!Value) return; // Doesn't change encoding.
+
+ unsigned Offset = Fixup.getOffset();
+
+ // For each byte of the fragment that the fixup touches, mask in the bits
+ // from the fixup value. The Value has been "split up" into the appropriate
+ // bitfields above.
+ for (unsigned i = 0; i != 4; ++i)
+ Data[Offset + i] |= uint8_t((Value >> ((4 - i - 1)*8)) & 0xff);
+ }
+
bool mayNeedRelaxation(const MCInst &Inst) const {
// FIXME.
return false;
@@ -99,7 +113,7 @@ public:
bool fixupNeedsRelaxation(const MCFixup &Fixup,
uint64_t Value,
- const MCInstFragment *DF,
+ const MCRelaxableFragment *DF,
const MCAsmLayout &Layout) const {
// FIXME.
llvm_unreachable("relaxInstruction() unimplemented");
@@ -135,11 +149,6 @@ namespace {
public:
DarwinPPCAsmBackend(const Target &T) : PPCAsmBackend(T) { }
- void applyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize,
- uint64_t Value) const {
- llvm_unreachable("UNIMP");
- }
-
MCObjectWriter *createObjectWriter(raw_ostream &OS) const {
bool is64 = getPointerSize() == 8;
return createMachObjectWriter(new PPCMachObjectWriter(
@@ -161,19 +170,6 @@ namespace {
ELFPPCAsmBackend(const Target &T, uint8_t OSABI) :
PPCAsmBackend(T), OSABI(OSABI) { }
- void applyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize,
- uint64_t Value) const {
- Value = adjustFixupValue(Fixup.getKind(), Value);
- if (!Value) return; // Doesn't change encoding.
-
- unsigned Offset = Fixup.getOffset();
-
- // For each byte of the fragment that the fixup touches, mask in the bits from
- // the fixup value. The Value has been "split up" into the appropriate
- // bitfields above.
- for (unsigned i = 0; i != 4; ++i)
- Data[Offset + i] |= uint8_t((Value >> ((4 - i - 1)*8)) & 0xff);
- }
MCObjectWriter *createObjectWriter(raw_ostream &OS) const {
bool is64 = getPointerSize() == 8;
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCBaseInfo.h b/lib/Target/PowerPC/MCTargetDesc/PPCBaseInfo.h
deleted file mode 100644
index 9c975c089ea6..000000000000
--- a/lib/Target/PowerPC/MCTargetDesc/PPCBaseInfo.h
+++ /dev/null
@@ -1,70 +0,0 @@
-//===-- PPCBaseInfo.h - Top level definitions for PPC -----------*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file contains small standalone helper functions and enum definitions for
-// the PPC target useful for the compiler back-end and the MC libraries.
-// As such, it deliberately does not include references to LLVM core
-// code gen types, passes, etc..
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef PPCBASEINFO_H
-#define PPCBASEINFO_H
-
-#include "PPCMCTargetDesc.h"
-#include "llvm/Support/ErrorHandling.h"
-
-namespace llvm {
-
-/// getPPCRegisterNumbering - Given the enum value for some register, e.g.
-/// PPC::F14, return the number that it corresponds to (e.g. 14).
-inline static unsigned getPPCRegisterNumbering(unsigned RegEnum) {
- using namespace PPC;
- switch (RegEnum) {
- case 0: return 0;
- case R0 : case X0 : case F0 : case V0 : case CR0: case CR0LT: return 0;
- case R1 : case X1 : case F1 : case V1 : case CR1: case CR0GT: return 1;
- case R2 : case X2 : case F2 : case V2 : case CR2: case CR0EQ: return 2;
- case R3 : case X3 : case F3 : case V3 : case CR3: case CR0UN: return 3;
- case R4 : case X4 : case F4 : case V4 : case CR4: case CR1LT: return 4;
- case R5 : case X5 : case F5 : case V5 : case CR5: case CR1GT: return 5;
- case R6 : case X6 : case F6 : case V6 : case CR6: case CR1EQ: return 6;
- case R7 : case X7 : case F7 : case V7 : case CR7: case CR1UN: return 7;
- case R8 : case X8 : case F8 : case V8 : case CR2LT: return 8;
- case R9 : case X9 : case F9 : case V9 : case CR2GT: return 9;
- case R10: case X10: case F10: case V10: case CR2EQ: return 10;
- case R11: case X11: case F11: case V11: case CR2UN: return 11;
- case R12: case X12: case F12: case V12: case CR3LT: return 12;
- case R13: case X13: case F13: case V13: case CR3GT: return 13;
- case R14: case X14: case F14: case V14: case CR3EQ: return 14;
- case R15: case X15: case F15: case V15: case CR3UN: return 15;
- case R16: case X16: case F16: case V16: case CR4LT: return 16;
- case R17: case X17: case F17: case V17: case CR4GT: return 17;
- case R18: case X18: case F18: case V18: case CR4EQ: return 18;
- case R19: case X19: case F19: case V19: case CR4UN: return 19;
- case R20: case X20: case F20: case V20: case CR5LT: return 20;
- case R21: case X21: case F21: case V21: case CR5GT: return 21;
- case R22: case X22: case F22: case V22: case CR5EQ: return 22;
- case R23: case X23: case F23: case V23: case CR5UN: return 23;
- case R24: case X24: case F24: case V24: case CR6LT: return 24;
- case R25: case X25: case F25: case V25: case CR6GT: return 25;
- case R26: case X26: case F26: case V26: case CR6EQ: return 26;
- case R27: case X27: case F27: case V27: case CR6UN: return 27;
- case R28: case X28: case F28: case V28: case CR7LT: return 28;
- case R29: case X29: case F29: case V29: case CR7GT: return 29;
- case R30: case X30: case F30: case V30: case CR7EQ: return 30;
- case R31: case X31: case F31: case V31: case CR7UN: return 31;
- default:
- llvm_unreachable("Unhandled reg in PPCRegisterInfo::getRegisterNumbering!");
- }
-}
-
-} // end namespace llvm;
-
-#endif
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp
index dc93f7124a52..84e4175e635b 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp
@@ -7,12 +7,13 @@
//
//===----------------------------------------------------------------------===//
-#include "MCTargetDesc/PPCFixupKinds.h"
#include "MCTargetDesc/PPCMCTargetDesc.h"
+#include "MCTargetDesc/PPCFixupKinds.h"
+#include "llvm/ADT/STLExtras.h"
#include "llvm/MC/MCELFObjectWriter.h"
-#include "llvm/Support/ErrorHandling.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCValue.h"
+#include "llvm/Support/ErrorHandling.h"
using namespace llvm;
@@ -33,9 +34,25 @@ namespace {
const MCFixup &Fixup,
bool IsPCRel) const;
virtual void adjustFixupOffset(const MCFixup &Fixup, uint64_t &RelocOffset);
+
+ virtual void sortRelocs(const MCAssembler &Asm,
+ std::vector<ELFRelocationEntry> &Relocs);
+ };
+
+ class PPCELFRelocationEntry : public ELFRelocationEntry {
+ public:
+ PPCELFRelocationEntry(const ELFRelocationEntry &RE);
+ bool operator<(const PPCELFRelocationEntry &RE) const {
+ return (RE.r_offset < r_offset ||
+ (RE.r_offset == r_offset && RE.Type > Type));
+ }
};
}
+PPCELFRelocationEntry::PPCELFRelocationEntry(const ELFRelocationEntry &RE)
+ : ELFRelocationEntry(RE.r_offset, RE.Index, RE.Type, RE.Symbol,
+ RE.r_addend, *RE.Fixup) {}
+
PPCELFObjectWriter::PPCELFObjectWriter(bool Is64Bit, uint8_t OSABI)
: MCELFObjectTargetWriter(Is64Bit, OSABI,
Is64Bit ? ELF::EM_PPC64 : ELF::EM_PPC,
@@ -60,9 +77,14 @@ unsigned PPCELFObjectWriter::getRelocTypeInner(const MCValue &Target,
case PPC::fixup_ppc_br24:
Type = ELF::R_PPC_REL24;
break;
+ case FK_Data_4:
case FK_PCRel_4:
Type = ELF::R_PPC_REL32;
break;
+ case FK_Data_8:
+ case FK_PCRel_8:
+ Type = ELF::R_PPC64_REL64;
+ break;
}
} else {
switch ((unsigned)Fixup.getKind()) {
@@ -79,9 +101,24 @@ unsigned PPCELFObjectWriter::getRelocTypeInner(const MCValue &Target,
case MCSymbolRefExpr::VK_PPC_TPREL16_HA:
Type = ELF::R_PPC_TPREL16_HA;
break;
+ case MCSymbolRefExpr::VK_PPC_DTPREL16_HA:
+ Type = ELF::R_PPC64_DTPREL16_HA;
+ break;
case MCSymbolRefExpr::VK_None:
Type = ELF::R_PPC_ADDR16_HA;
break;
+ case MCSymbolRefExpr::VK_PPC_TOC16_HA:
+ Type = ELF::R_PPC64_TOC16_HA;
+ break;
+ case MCSymbolRefExpr::VK_PPC_GOT_TPREL16_HA:
+ Type = ELF::R_PPC64_GOT_TPREL16_HA;
+ break;
+ case MCSymbolRefExpr::VK_PPC_GOT_TLSGD16_HA:
+ Type = ELF::R_PPC64_GOT_TLSGD16_HA;
+ break;
+ case MCSymbolRefExpr::VK_PPC_GOT_TLSLD16_HA:
+ Type = ELF::R_PPC64_GOT_TLSLD16_HA;
+ break;
}
break;
case PPC::fixup_ppc_lo16:
@@ -90,22 +127,56 @@ unsigned PPCELFObjectWriter::getRelocTypeInner(const MCValue &Target,
case MCSymbolRefExpr::VK_PPC_TPREL16_LO:
Type = ELF::R_PPC_TPREL16_LO;
break;
+ case MCSymbolRefExpr::VK_PPC_DTPREL16_LO:
+ Type = ELF::R_PPC64_DTPREL16_LO;
+ break;
case MCSymbolRefExpr::VK_None:
Type = ELF::R_PPC_ADDR16_LO;
break;
+ case MCSymbolRefExpr::VK_PPC_TOC_ENTRY:
+ Type = ELF::R_PPC64_TOC16;
+ break;
+ case MCSymbolRefExpr::VK_PPC_TOC16_LO:
+ Type = ELF::R_PPC64_TOC16_LO;
+ break;
+ case MCSymbolRefExpr::VK_PPC_GOT_TLSGD16_LO:
+ Type = ELF::R_PPC64_GOT_TLSGD16_LO;
+ break;
+ case MCSymbolRefExpr::VK_PPC_GOT_TLSLD16_LO:
+ Type = ELF::R_PPC64_GOT_TLSLD16_LO;
+ break;
}
break;
- case PPC::fixup_ppc_lo14:
- Type = ELF::R_PPC_ADDR14;
- break;
- case PPC::fixup_ppc_toc:
- Type = ELF::R_PPC64_TOC;
+ case PPC::fixup_ppc_lo16_ds:
+ switch (Modifier) {
+ default: llvm_unreachable("Unsupported Modifier");
+ case MCSymbolRefExpr::VK_None:
+ Type = ELF::R_PPC64_ADDR16_DS;
+ break;
+ case MCSymbolRefExpr::VK_PPC_TOC_ENTRY:
+ Type = ELF::R_PPC64_TOC16_DS;
+ break;
+ case MCSymbolRefExpr::VK_PPC_TOC16_LO:
+ Type = ELF::R_PPC64_TOC16_LO_DS;
+ break;
+ case MCSymbolRefExpr::VK_PPC_GOT_TPREL16_LO:
+ Type = ELF::R_PPC64_GOT_TPREL16_LO_DS;
+ break;
+ }
break;
- case PPC::fixup_ppc_toc16:
- Type = ELF::R_PPC64_TOC16;
+ case PPC::fixup_ppc_tlsreg:
+ Type = ELF::R_PPC64_TLS;
break;
- case PPC::fixup_ppc_toc16_ds:
- Type = ELF::R_PPC64_TOC16_DS;
+ case PPC::fixup_ppc_nofixup:
+ switch (Modifier) {
+ default: llvm_unreachable("Unsupported Modifier");
+ case MCSymbolRefExpr::VK_PPC_TLSGD:
+ Type = ELF::R_PPC64_TLSGD;
+ break;
+ case MCSymbolRefExpr::VK_PPC_TLSLD:
+ Type = ELF::R_PPC64_TLSLD;
+ break;
+ }
break;
case FK_Data_8:
switch (Modifier) {
@@ -162,8 +233,7 @@ adjustFixupOffset(const MCFixup &Fixup, uint64_t &RelocOffset) {
switch ((unsigned)Fixup.getKind()) {
case PPC::fixup_ppc_ha16:
case PPC::fixup_ppc_lo16:
- case PPC::fixup_ppc_toc16:
- case PPC::fixup_ppc_toc16_ds:
+ case PPC::fixup_ppc_lo16_ds:
RelocOffset += 2;
break;
default:
@@ -171,6 +241,34 @@ adjustFixupOffset(const MCFixup &Fixup, uint64_t &RelocOffset) {
}
}
+// The standard sorter only sorts on the r_offset field, but PowerPC can
+// have multiple relocations at the same offset. Sort secondarily on the
+// relocation type to avoid nondeterminism.
+void PPCELFObjectWriter::sortRelocs(const MCAssembler &Asm,
+ std::vector<ELFRelocationEntry> &Relocs) {
+
+ // Copy to a temporary vector of relocation entries having a different
+ // sort function.
+ std::vector<PPCELFRelocationEntry> TmpRelocs;
+
+ for (std::vector<ELFRelocationEntry>::iterator R = Relocs.begin();
+ R != Relocs.end(); ++R) {
+ TmpRelocs.push_back(PPCELFRelocationEntry(*R));
+ }
+
+ // Sort in place by ascending r_offset and descending r_type.
+ array_pod_sort(TmpRelocs.begin(), TmpRelocs.end());
+
+ // Copy back to the original vector.
+ unsigned I = 0;
+ for (std::vector<PPCELFRelocationEntry>::iterator R = TmpRelocs.begin();
+ R != TmpRelocs.end(); ++R, ++I) {
+ Relocs[I] = ELFRelocationEntry(R->r_offset, R->Index, R->Type,
+ R->Symbol, R->r_addend, *R->Fixup);
+ }
+}
+
+
MCObjectWriter *llvm::createPPCELFObjectWriter(raw_ostream &OS,
bool Is64Bit,
uint8_t OSABI) {
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCFixupKinds.h b/lib/Target/PowerPC/MCTargetDesc/PPCFixupKinds.h
index 37b265e7fd38..86c44f57a5e2 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCFixupKinds.h
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCFixupKinds.h
@@ -12,6 +12,8 @@
#include "llvm/MC/MCFixup.h"
+#undef PPC
+
namespace llvm {
namespace PPC {
enum Fixups {
@@ -31,19 +33,16 @@ enum Fixups {
/// like 'lis'.
fixup_ppc_ha16,
- /// fixup_ppc_lo14 - A 14-bit fixup corresponding to lo16(_foo) for instrs
- /// like 'std'.
- fixup_ppc_lo14,
-
- /// fixup_ppc_toc - Insert value of TOC base (.TOC.).
- fixup_ppc_toc,
+ /// fixup_ppc_lo16_ds - A 14-bit fixup corresponding to lo16(_foo) with
+ /// implied 2 zero bits for instrs like 'std'.
+ fixup_ppc_lo16_ds,
- /// fixup_ppc_toc16 - A 16-bit signed fixup relative to the TOC base.
- fixup_ppc_toc16,
+ /// fixup_ppc_tlsreg - Insert thread-pointer register number.
+ fixup_ppc_tlsreg,
- /// fixup_ppc_toc16_ds - A 14-bit signed fixup relative to the TOC base with
- /// implied 2 zero bits
- fixup_ppc_toc16_ds,
+ /// fixup_ppc_nofixup - Not a true fixup, but ties a symbol to a call
+ /// to __tls_get_addr for the TLS general and local dynamic models.
+ fixup_ppc_nofixup,
// Marker
LastTargetFixupKind,
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp
index 215aa40c4afd..a25d7fe64f3a 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp
@@ -17,8 +17,9 @@ using namespace llvm;
void PPCMCAsmInfoDarwin::anchor() { }
PPCMCAsmInfoDarwin::PPCMCAsmInfoDarwin(bool is64Bit) {
- if (is64Bit)
- PointerSize = 8;
+ if (is64Bit) {
+ PointerSize = CalleeSaveStackSlotSize = 8;
+ }
IsLittleEndian = false;
PCSymbol = ".";
@@ -35,8 +36,9 @@ PPCMCAsmInfoDarwin::PPCMCAsmInfoDarwin(bool is64Bit) {
void PPCLinuxMCAsmInfo::anchor() { }
PPCLinuxMCAsmInfo::PPCLinuxMCAsmInfo(bool is64Bit) {
- if (is64Bit)
- PointerSize = 8;
+ if (is64Bit) {
+ PointerSize = CalleeSaveStackSlotSize = 8;
+ }
IsLittleEndian = false;
// ".comm align is in bytes but .align is pow-2."
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp
index 21183024a509..2223cd623cb5 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp
@@ -12,15 +12,17 @@
//===----------------------------------------------------------------------===//
#define DEBUG_TYPE "mccodeemitter"
-#include "MCTargetDesc/PPCBaseInfo.h"
+#include "MCTargetDesc/PPCMCTargetDesc.h"
#include "MCTargetDesc/PPCFixupKinds.h"
+#include "llvm/ADT/Statistic.h"
#include "llvm/MC/MCCodeEmitter.h"
-#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCInstrInfo.h"
-#include "llvm/ADT/Statistic.h"
-#include "llvm/Support/raw_ostream.h"
+#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
using namespace llvm;
STATISTIC(MCNumEmitted, "Number of MC instructions emitted");
@@ -31,24 +33,17 @@ class PPCMCCodeEmitter : public MCCodeEmitter {
void operator=(const PPCMCCodeEmitter &) LLVM_DELETED_FUNCTION;
const MCSubtargetInfo &STI;
+ const MCContext &CTX;
Triple TT;
public:
PPCMCCodeEmitter(const MCInstrInfo &mcii, const MCSubtargetInfo &sti,
MCContext &ctx)
- : STI(sti), TT(STI.getTargetTriple()) {
+ : STI(sti), CTX(ctx), TT(STI.getTargetTriple()) {
}
~PPCMCCodeEmitter() {}
- bool is64BitMode() const {
- return (STI.getFeatureBits() & PPC::Feature64Bit) != 0;
- }
-
- bool isSVR4ABI() const {
- return TT.isMacOSX() == 0;
- }
-
unsigned getDirectBrEncoding(const MCInst &MI, unsigned OpNo,
SmallVectorImpl<MCFixup> &Fixups) const;
unsigned getCondBrEncoding(const MCInst &MI, unsigned OpNo,
@@ -61,6 +56,8 @@ public:
SmallVectorImpl<MCFixup> &Fixups) const;
unsigned getMemRIXEncoding(const MCInst &MI, unsigned OpNo,
SmallVectorImpl<MCFixup> &Fixups) const;
+ unsigned getTLSRegEncoding(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups) const;
unsigned get_crbitm_encoding(const MCInst &MI, unsigned OpNo,
SmallVectorImpl<MCFixup> &Fixups) const;
@@ -77,11 +74,11 @@ public:
SmallVectorImpl<MCFixup> &Fixups) const {
uint64_t Bits = getBinaryCodeForInstr(MI, Fixups);
- // BL8_NOPELF and BLA8_NOP_ELF is both size of 8 bacause of the
- // following 'nop'.
+ // BL8_NOP etc. all have a size of 8 because of the following 'nop'.
unsigned Size = 4; // FIXME: Have Desc.getSize() return the correct value!
unsigned Opcode = MI.getOpcode();
- if (Opcode == PPC::BL8_NOP_ELF || Opcode == PPC::BLA8_NOP_ELF)
+ if (Opcode == PPC::BL8_NOP || Opcode == PPC::BLA8_NOP ||
+ Opcode == PPC::BL8_NOP_TLSGD || Opcode == PPC::BL8_NOP_TLSLD)
Size = 8;
// Output the constant in big endian byte order.
@@ -114,6 +111,17 @@ getDirectBrEncoding(const MCInst &MI, unsigned OpNo,
// Add a fixup for the branch target.
Fixups.push_back(MCFixup::Create(0, MO.getExpr(),
(MCFixupKind)PPC::fixup_ppc_br24));
+
+ // For special TLS calls, add another fixup for the symbol. Apparently
+ // BL8_NOP, BL8_NOP_TLSGD, and BL8_NOP_TLSLD are sufficiently
+ // similar that TblGen will not generate a separate case for the latter
+ // two, so this is the only way to get the extra fixup generated.
+ unsigned Opcode = MI.getOpcode();
+ if (Opcode == PPC::BL8_NOP_TLSGD || Opcode == PPC::BL8_NOP_TLSLD) {
+ const MCOperand &MO2 = MI.getOperand(OpNo+1);
+ Fixups.push_back(MCFixup::Create(0, MO2.getExpr(),
+ (MCFixupKind)PPC::fixup_ppc_nofixup));
+ }
return 0;
}
@@ -162,12 +170,8 @@ unsigned PPCMCCodeEmitter::getMemRIEncoding(const MCInst &MI, unsigned OpNo,
return (getMachineOpValue(MI, MO, Fixups) & 0xFFFF) | RegBits;
// Add a fixup for the displacement field.
- if (isSVR4ABI() && is64BitMode())
- Fixups.push_back(MCFixup::Create(0, MO.getExpr(),
- (MCFixupKind)PPC::fixup_ppc_toc16));
- else
- Fixups.push_back(MCFixup::Create(0, MO.getExpr(),
- (MCFixupKind)PPC::fixup_ppc_lo16));
+ Fixups.push_back(MCFixup::Create(0, MO.getExpr(),
+ (MCFixupKind)PPC::fixup_ppc_lo16));
return RegBits;
}
@@ -183,17 +187,26 @@ unsigned PPCMCCodeEmitter::getMemRIXEncoding(const MCInst &MI, unsigned OpNo,
if (MO.isImm())
return (getMachineOpValue(MI, MO, Fixups) & 0x3FFF) | RegBits;
- // Add a fixup for the branch target.
- if (isSVR4ABI() && is64BitMode())
- Fixups.push_back(MCFixup::Create(0, MO.getExpr(),
- (MCFixupKind)PPC::fixup_ppc_toc16_ds));
- else
- Fixups.push_back(MCFixup::Create(0, MO.getExpr(),
- (MCFixupKind)PPC::fixup_ppc_lo14));
+ // Add a fixup for the displacement field.
+ Fixups.push_back(MCFixup::Create(0, MO.getExpr(),
+ (MCFixupKind)PPC::fixup_ppc_lo16_ds));
return RegBits;
}
+unsigned PPCMCCodeEmitter::getTLSRegEncoding(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ const MCOperand &MO = MI.getOperand(OpNo);
+ if (MO.isReg()) return getMachineOpValue(MI, MO, Fixups);
+
+ // Add a fixup for the TLS register, which simply provides a relocation
+ // hint to the linker that this statement is part of a relocation sequence.
+ // Return the thread-pointer register's encoding.
+ Fixups.push_back(MCFixup::Create(0, MO.getExpr(),
+ (MCFixupKind)PPC::fixup_ppc_tlsreg));
+ return CTX.getRegisterInfo().getEncodingValue(PPC::X13);
+}
+
unsigned PPCMCCodeEmitter::
get_crbitm_encoding(const MCInst &MI, unsigned OpNo,
SmallVectorImpl<MCFixup> &Fixups) const {
@@ -202,7 +215,7 @@ get_crbitm_encoding(const MCInst &MI, unsigned OpNo,
MI.getOpcode() == PPC::MFOCRF ||
MI.getOpcode() == PPC::MTCRF8) &&
(MO.getReg() >= PPC::CR0 && MO.getReg() <= PPC::CR7));
- return 0x80 >> getPPCRegisterNumbering(MO.getReg());
+ return 0x80 >> CTX.getRegisterInfo().getEncodingValue(MO.getReg());
}
@@ -214,7 +227,7 @@ getMachineOpValue(const MCInst &MI, const MCOperand &MO,
// The GPR operand should come through here though.
assert((MI.getOpcode() != PPC::MTCRF && MI.getOpcode() != PPC::MFOCRF) ||
MO.getReg() < PPC::CR0 || MO.getReg() > PPC::CR7);
- return getPPCRegisterNumbering(MO.getReg());
+ return CTX.getRegisterInfo().getEncodingValue(MO.getReg());
}
assert(MO.isImm() &&
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp
index 4c2578d5dc53..2209f936ec33 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp
@@ -12,14 +12,14 @@
//===----------------------------------------------------------------------===//
#include "PPCMCTargetDesc.h"
-#include "PPCMCAsmInfo.h"
#include "InstPrinter/PPCInstPrinter.h"
-#include "llvm/MC/MachineLocation.h"
+#include "PPCMCAsmInfo.h"
#include "llvm/MC/MCCodeGenInfo.h"
#include "llvm/MC/MCInstrInfo.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/MC/MachineLocation.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/TargetRegistry.h"
@@ -88,6 +88,11 @@ static MCCodeGenInfo *createPPCMCCodeGenInfo(StringRef TT, Reloc::Model RM,
else
RM = Reloc::Static;
}
+ if (CM == CodeModel::Default) {
+ Triple T(TT);
+ if (!T.isOSDarwin() && T.getArch() == Triple::ppc64)
+ CM = CodeModel::Medium;
+ }
X->InitMCCodeGenInfo(RM, CM, OL);
return X;
}
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h b/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h
index a0e4cf3005f2..38a7420d972d 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h
@@ -14,6 +14,9 @@
#ifndef PPCMCTARGETDESC_H
#define PPCMCTARGETDESC_H
+// GCC #defines PPC on Linux but we use it as our namespace name
+#undef PPC
+
#include "llvm/Support/DataTypes.h"
namespace llvm {
@@ -44,6 +47,10 @@ MCObjectWriter *createPPCELFObjectWriter(raw_ostream &OS,
uint8_t OSABI);
} // End llvm namespace
+// Generated files will use "namespace PPC". To avoid symbol clash,
+// undefine PPC here. PPC may be predefined on some hosts.
+#undef PPC
+
// Defines symbolic names for PowerPC registers. This defines a mapping from
// register name to register number.
//
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.cpp
index 12bb0a143406..d84eb9c6aa03 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.cpp
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.cpp
@@ -18,7 +18,6 @@ using namespace llvm;
PPC::Predicate PPC::InvertPredicate(PPC::Predicate Opcode) {
switch (Opcode) {
- default: llvm_unreachable("Unknown PPC branch opcode!");
case PPC::PRED_EQ: return PPC::PRED_NE;
case PPC::PRED_NE: return PPC::PRED_EQ;
case PPC::PRED_LT: return PPC::PRED_GE;
@@ -28,4 +27,5 @@ PPC::Predicate PPC::InvertPredicate(PPC::Predicate Opcode) {
case PPC::PRED_NU: return PPC::PRED_UN;
case PPC::PRED_UN: return PPC::PRED_NU;
}
+ llvm_unreachable("Unknown PPC branch opcode!");
}
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.h b/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.h
index f872e861bfa7..ad2b01812816 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.h
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.h
@@ -14,11 +14,17 @@
#ifndef LLVM_TARGET_POWERPC_PPCPREDICATES_H
#define LLVM_TARGET_POWERPC_PPCPREDICATES_H
+// GCC #defines PPC on Linux but we use it as our namespace name
+#undef PPC
+
+// Generated files will use "namespace PPC". To avoid symbol clash,
+// undefine PPC here. PPC may be predefined on some hosts.
+#undef PPC
+
namespace llvm {
namespace PPC {
/// Predicate - These are "(BI << 5) | BO" for various predicates.
enum Predicate {
- PRED_ALWAYS = (0 << 5) | 20,
PRED_LT = (0 << 5) | 12,
PRED_LE = (1 << 5) | 4,
PRED_EQ = (2 << 5) | 12,
diff --git a/lib/Target/PowerPC/PPC.h b/lib/Target/PowerPC/PPC.h
index 9103e1232505..446b6854fb5b 100644
--- a/lib/Target/PowerPC/PPC.h
+++ b/lib/Target/PowerPC/PPC.h
@@ -15,7 +15,6 @@
#ifndef LLVM_TARGET_POWERPC_H
#define LLVM_TARGET_POWERPC_H
-#include "MCTargetDesc/PPCBaseInfo.h"
#include "MCTargetDesc/PPCMCTargetDesc.h"
#include <string>
@@ -25,6 +24,7 @@
namespace llvm {
class PPCTargetMachine;
class FunctionPass;
+ class ImmutablePass;
class JITCodeEmitter;
class MachineInstr;
class AsmPrinter;
@@ -37,6 +37,9 @@ namespace llvm {
JITCodeEmitter &MCE);
void LowerPPCMachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI,
AsmPrinter &AP, bool isDarwin);
+
+ /// \brief Creates an PPC-specific Target Transformation Info pass.
+ ImmutablePass *createPPCTargetTransformInfoPass(const PPCTargetMachine *TM);
namespace PPCII {
@@ -53,25 +56,32 @@ namespace llvm {
/// MO_PIC_FLAG - If this bit is set, the symbol reference is relative to
/// the function's picbase, e.g. lo16(symbol-picbase).
- MO_PIC_FLAG = 4,
+ MO_PIC_FLAG = 2,
/// MO_NLP_FLAG - If this bit is set, the symbol reference is actually to
/// the non_lazy_ptr for the global, e.g. lo16(symbol$non_lazy_ptr-picbase).
- MO_NLP_FLAG = 8,
+ MO_NLP_FLAG = 4,
/// MO_NLP_HIDDEN_FLAG - If this bit is set, the symbol reference is to a
/// symbol with hidden visibility. This causes a different kind of
/// non-lazy-pointer to be generated.
- MO_NLP_HIDDEN_FLAG = 16,
+ MO_NLP_HIDDEN_FLAG = 8,
/// The next are not flags but distinct values.
- MO_ACCESS_MASK = 224,
+ MO_ACCESS_MASK = 0xf0,
/// MO_LO16, MO_HA16 - lo16(symbol) and ha16(symbol)
- MO_LO16 = 32, MO_HA16 = 64,
+ MO_LO16 = 1 << 4,
+ MO_HA16 = 2 << 4,
+
+ MO_TPREL16_HA = 3 << 4,
+ MO_TPREL16_LO = 4 << 4,
- MO_TPREL16_HA = 96,
- MO_TPREL16_LO = 128
+ /// These values identify relocations on immediates folded
+ /// into memory operations.
+ MO_DTPREL16_LO = 5 << 4,
+ MO_TLSLD16_LO = 6 << 4,
+ MO_TOC16_LO = 7 << 4
};
} // end namespace PPCII
diff --git a/lib/Target/PowerPC/PPC.td b/lib/Target/PowerPC/PPC.td
index cb15dadb7e99..389216278ee4 100644
--- a/lib/Target/PowerPC/PPC.td
+++ b/lib/Target/PowerPC/PPC.td
@@ -39,7 +39,12 @@ def DirectiveE500mc : SubtargetFeature<"", "DarwinDirective",
"PPC::DIR_E500mc", "">;
def DirectiveE5500 : SubtargetFeature<"", "DarwinDirective",
"PPC::DIR_E5500", "">;
+def DirectivePwr3: SubtargetFeature<"", "DarwinDirective", "PPC::DIR_PWR3", "">;
+def DirectivePwr4: SubtargetFeature<"", "DarwinDirective", "PPC::DIR_PWR4", "">;
+def DirectivePwr5: SubtargetFeature<"", "DarwinDirective", "PPC::DIR_PWR5", "">;
+def DirectivePwr5x: SubtargetFeature<"", "DarwinDirective", "PPC::DIR_PWR5X", "">;
def DirectivePwr6: SubtargetFeature<"", "DarwinDirective", "PPC::DIR_PWR6", "">;
+def DirectivePwr6x: SubtargetFeature<"", "DarwinDirective", "PPC::DIR_PWR6X", "">;
def DirectivePwr7: SubtargetFeature<"", "DarwinDirective", "PPC::DIR_PWR7", "">;
def Feature64Bit : SubtargetFeature<"64bit","Has64BitSupport", "true",
@@ -52,12 +57,42 @@ def FeatureMFOCRF : SubtargetFeature<"mfocrf","HasMFOCRF", "true",
"Enable the MFOCRF instruction">;
def FeatureFSqrt : SubtargetFeature<"fsqrt","HasFSQRT", "true",
"Enable the fsqrt instruction">;
+def FeatureFRE : SubtargetFeature<"fre", "HasFRE", "true",
+ "Enable the fre instruction">;
+def FeatureFRES : SubtargetFeature<"fres", "HasFRES", "true",
+ "Enable the fres instruction">;
+def FeatureFRSQRTE : SubtargetFeature<"frsqrte", "HasFRSQRTE", "true",
+ "Enable the frsqrte instruction">;
+def FeatureFRSQRTES : SubtargetFeature<"frsqrtes", "HasFRSQRTES", "true",
+ "Enable the frsqrtes instruction">;
+def FeatureRecipPrec : SubtargetFeature<"recipprec", "HasRecipPrec", "true",
+ "Assume higher precision reciprocal estimates">;
def FeatureSTFIWX : SubtargetFeature<"stfiwx","HasSTFIWX", "true",
"Enable the stfiwx instruction">;
+def FeatureLFIWAX : SubtargetFeature<"lfiwax","HasLFIWAX", "true",
+ "Enable the lfiwax instruction">;
+def FeatureFPRND : SubtargetFeature<"fprnd", "HasFPRND", "true",
+ "Enable the fri[mnpz] instructions">;
+def FeatureFPCVT : SubtargetFeature<"fpcvt", "HasFPCVT", "true",
+ "Enable fc[ft]* (unsigned and single-precision) and lfiwzx instructions">;
def FeatureISEL : SubtargetFeature<"isel","HasISEL", "true",
"Enable the isel instruction">;
+def FeaturePOPCNTD : SubtargetFeature<"popcntd","HasPOPCNTD", "true",
+ "Enable the popcnt[dw] instructions">;
+def FeatureLDBRX : SubtargetFeature<"ldbrx","HasLDBRX", "true",
+ "Enable the ldbrx instruction">;
def FeatureBookE : SubtargetFeature<"booke", "IsBookE", "true",
"Enable Book E instructions">;
+def FeatureQPX : SubtargetFeature<"qpx","HasQPX", "true",
+ "Enable QPX instructions">;
+
+// Note: Future features to add when support is extended to more
+// recent ISA levels:
+//
+// CMPB p6, p6x, p7 cmpb
+// DFP p6, p6x, p7 decimal floating-point instructions
+// POPCNTB p5 through p7 popcntb and related instructions
+// VSX p7 vector-scalar instruction set
//===----------------------------------------------------------------------===//
// Register File Description
@@ -73,30 +108,46 @@ include "PPCInstrInfo.td"
def : Processor<"generic", G3Itineraries, [Directive32]>;
def : Processor<"440", PPC440Itineraries, [Directive440, FeatureISEL,
+ FeatureFRES, FeatureFRSQRTE,
FeatureBookE]>;
def : Processor<"450", PPC440Itineraries, [Directive440, FeatureISEL,
+ FeatureFRES, FeatureFRSQRTE,
FeatureBookE]>;
def : Processor<"601", G3Itineraries, [Directive601]>;
def : Processor<"602", G3Itineraries, [Directive602]>;
-def : Processor<"603", G3Itineraries, [Directive603]>;
-def : Processor<"603e", G3Itineraries, [Directive603]>;
-def : Processor<"603ev", G3Itineraries, [Directive603]>;
-def : Processor<"604", G3Itineraries, [Directive604]>;
-def : Processor<"604e", G3Itineraries, [Directive604]>;
-def : Processor<"620", G3Itineraries, [Directive620]>;
-def : Processor<"750", G4Itineraries, [Directive750]>;
-def : Processor<"g3", G3Itineraries, [Directive750]>;
-def : Processor<"7400", G4Itineraries, [Directive7400, FeatureAltivec]>;
-def : Processor<"g4", G4Itineraries, [Directive7400, FeatureAltivec]>;
-def : Processor<"7450", G4PlusItineraries, [Directive7400, FeatureAltivec]>;
-def : Processor<"g4+", G4PlusItineraries, [Directive7400, FeatureAltivec]>;
-def : Processor<"970", G5Itineraries,
+def : Processor<"603", G3Itineraries, [Directive603,
+ FeatureFRES, FeatureFRSQRTE]>;
+def : Processor<"603e", G3Itineraries, [Directive603,
+ FeatureFRES, FeatureFRSQRTE]>;
+def : Processor<"603ev", G3Itineraries, [Directive603,
+ FeatureFRES, FeatureFRSQRTE]>;
+def : Processor<"604", G3Itineraries, [Directive604,
+ FeatureFRES, FeatureFRSQRTE]>;
+def : Processor<"604e", G3Itineraries, [Directive604,
+ FeatureFRES, FeatureFRSQRTE]>;
+def : Processor<"620", G3Itineraries, [Directive620,
+ FeatureFRES, FeatureFRSQRTE]>;
+def : Processor<"750", G4Itineraries, [Directive750,
+ FeatureFRES, FeatureFRSQRTE]>;
+def : Processor<"g3", G3Itineraries, [Directive750,
+ FeatureFRES, FeatureFRSQRTE]>;
+def : Processor<"7400", G4Itineraries, [Directive7400, FeatureAltivec,
+ FeatureFRES, FeatureFRSQRTE]>;
+def : Processor<"g4", G4Itineraries, [Directive7400, FeatureAltivec,
+ FeatureFRES, FeatureFRSQRTE]>;
+def : Processor<"7450", G4PlusItineraries, [Directive7400, FeatureAltivec,
+ FeatureFRES, FeatureFRSQRTE]>;
+def : Processor<"g4+", G4PlusItineraries, [Directive7400, FeatureAltivec,
+ FeatureFRES, FeatureFRSQRTE]>;
+def : ProcessorModel<"970", G5Model,
[Directive970, FeatureAltivec,
- FeatureMFOCRF, FeatureFSqrt, FeatureSTFIWX,
+ FeatureMFOCRF, FeatureFSqrt,
+ FeatureFRES, FeatureFRSQRTE, FeatureSTFIWX,
Feature64Bit /*, Feature64BitRegs */]>;
-def : Processor<"g5", G5Itineraries,
+def : ProcessorModel<"g5", G5Model,
[Directive970, FeatureAltivec,
FeatureMFOCRF, FeatureFSqrt, FeatureSTFIWX,
+ FeatureFRES, FeatureFRSQRTE,
Feature64Bit /*, Feature64BitRegs */]>;
def : ProcessorModel<"e500mc", PPCE500mcModel,
[DirectiveE500mc, FeatureMFOCRF,
@@ -104,23 +155,65 @@ def : ProcessorModel<"e500mc", PPCE500mcModel,
def : ProcessorModel<"e5500", PPCE5500Model,
[DirectiveE5500, FeatureMFOCRF, Feature64Bit,
FeatureSTFIWX, FeatureBookE, FeatureISEL]>;
-def : Processor<"a2", PPCA2Itineraries, [DirectiveA2, FeatureBookE,
- FeatureMFOCRF, FeatureFSqrt,
- FeatureSTFIWX, FeatureISEL,
- Feature64Bit
- /*, Feature64BitRegs */]>;
-def : Processor<"pwr6", G5Itineraries,
+def : ProcessorModel<"a2", PPCA2Model,
+ [DirectiveA2, FeatureBookE, FeatureMFOCRF,
+ FeatureFSqrt, FeatureFRE, FeatureFRES,
+ FeatureFRSQRTE, FeatureFRSQRTES, FeatureRecipPrec,
+ FeatureSTFIWX, FeatureLFIWAX,
+ FeatureFPRND, FeatureFPCVT, FeatureISEL,
+ FeaturePOPCNTD, FeatureLDBRX, Feature64Bit
+ /*, Feature64BitRegs */]>;
+def : ProcessorModel<"a2q", PPCA2Model,
+ [DirectiveA2, FeatureBookE, FeatureMFOCRF,
+ FeatureFSqrt, FeatureFRE, FeatureFRES,
+ FeatureFRSQRTE, FeatureFRSQRTES, FeatureRecipPrec,
+ FeatureSTFIWX, FeatureLFIWAX,
+ FeatureFPRND, FeatureFPCVT, FeatureISEL,
+ FeaturePOPCNTD, FeatureLDBRX, Feature64Bit
+ /*, Feature64BitRegs */, FeatureQPX]>;
+def : ProcessorModel<"pwr3", G5Model,
+ [DirectivePwr3, FeatureAltivec,
+ FeatureFRES, FeatureFRSQRTE, FeatureMFOCRF,
+ FeatureSTFIWX, Feature64Bit]>;
+def : ProcessorModel<"pwr4", G5Model,
+ [DirectivePwr4, FeatureAltivec, FeatureMFOCRF,
+ FeatureFSqrt, FeatureFRES, FeatureFRSQRTE,
+ FeatureSTFIWX, Feature64Bit]>;
+def : ProcessorModel<"pwr5", G5Model,
+ [DirectivePwr5, FeatureAltivec, FeatureMFOCRF,
+ FeatureFSqrt, FeatureFRE, FeatureFRES,
+ FeatureFRSQRTE, FeatureFRSQRTES,
+ FeatureSTFIWX, Feature64Bit]>;
+def : ProcessorModel<"pwr5x", G5Model,
+ [DirectivePwr5x, FeatureAltivec, FeatureMFOCRF,
+ FeatureFSqrt, FeatureFRE, FeatureFRES,
+ FeatureFRSQRTE, FeatureFRSQRTES,
+ FeatureSTFIWX, FeatureFPRND, Feature64Bit]>;
+def : ProcessorModel<"pwr6", G5Model,
[DirectivePwr6, FeatureAltivec,
- FeatureMFOCRF, FeatureFSqrt, FeatureSTFIWX,
- Feature64Bit /*, Feature64BitRegs */]>;
-def : Processor<"pwr7", G5Itineraries,
+ FeatureMFOCRF, FeatureFSqrt, FeatureFRE,
+ FeatureFRES, FeatureFRSQRTE, FeatureFRSQRTES,
+ FeatureRecipPrec, FeatureSTFIWX, FeatureLFIWAX,
+ FeatureFPRND, Feature64Bit /*, Feature64BitRegs */]>;
+def : ProcessorModel<"pwr6x", G5Model,
+ [DirectivePwr5x, FeatureAltivec, FeatureMFOCRF,
+ FeatureFSqrt, FeatureFRE, FeatureFRES,
+ FeatureFRSQRTE, FeatureFRSQRTES, FeatureRecipPrec,
+ FeatureSTFIWX, FeatureLFIWAX,
+ FeatureFPRND, Feature64Bit]>;
+def : ProcessorModel<"pwr7", G5Model,
[DirectivePwr7, FeatureAltivec,
- FeatureMFOCRF, FeatureFSqrt, FeatureSTFIWX,
- FeatureISEL, Feature64Bit /*, Feature64BitRegs */]>;
+ FeatureMFOCRF, FeatureFSqrt, FeatureFRE,
+ FeatureFRES, FeatureFRSQRTE, FeatureFRSQRTES,
+ FeatureRecipPrec, FeatureSTFIWX, FeatureLFIWAX,
+ FeatureFPRND, FeatureFPCVT, FeatureISEL,
+ FeaturePOPCNTD, FeatureLDBRX,
+ Feature64Bit /*, Feature64BitRegs */]>;
def : Processor<"ppc", G3Itineraries, [Directive32]>;
-def : Processor<"ppc64", G5Itineraries,
+def : ProcessorModel<"ppc64", G5Model,
[Directive64, FeatureAltivec,
- FeatureMFOCRF, FeatureFSqrt, FeatureSTFIWX,
+ FeatureMFOCRF, FeatureFSqrt, FeatureFRES,
+ FeatureFRSQRTE, FeatureSTFIWX,
Feature64Bit /*, Feature64BitRegs */]>;
diff --git a/lib/Target/PowerPC/PPCAsmPrinter.cpp b/lib/Target/PowerPC/PPCAsmPrinter.cpp
index 15d690bd8970..96a9f0a39006 100644
--- a/lib/Target/PowerPC/PPCAsmPrinter.cpp
+++ b/lib/Target/PowerPC/PPCAsmPrinter.cpp
@@ -18,14 +18,13 @@
#define DEBUG_TYPE "asmprinter"
#include "PPC.h"
-#include "PPCTargetMachine.h"
-#include "PPCSubtarget.h"
#include "InstPrinter/PPCInstPrinter.h"
#include "MCTargetDesc/PPCPredicates.h"
-#include "llvm/Constants.h"
-#include "llvm/DebugInfo.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/Module.h"
+#include "PPCSubtarget.h"
+#include "PPCTargetMachine.h"
+#include "llvm/ADT/MapVector.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/StringExtras.h"
#include "llvm/Assembly/Writer.h"
#include "llvm/CodeGen/AsmPrinter.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
@@ -33,28 +32,30 @@
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineModuleInfoImpls.h"
#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
+#include "llvm/DebugInfo.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Module.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCInstBuilder.h"
+#include "llvm/MC/MCSectionELF.h"
#include "llvm/MC/MCSectionMachO.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSymbol.h"
-#include "llvm/MC/MCSectionELF.h"
-#include "llvm/Target/Mangler.h"
-#include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetOptions.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
-#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/ELF.h"
#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MathExtras.h"
#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Support/ELF.h"
-#include "llvm/ADT/StringExtras.h"
-#include "llvm/ADT/SmallString.h"
-#include "llvm/ADT/MapVector.h"
+#include "llvm/Target/Mangler.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Target/TargetRegisterInfo.h"
using namespace llvm;
namespace {
@@ -72,6 +73,7 @@ namespace {
return "PowerPC Assembly Printer";
}
+ MCSymbol *lookUpOrCreateTOCEntry(MCSymbol *Sym);
virtual void EmitInstruction(const MachineInstr *MI);
@@ -309,6 +311,25 @@ bool PPCAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo,
}
+/// lookUpOrCreateTOCEntry -- Given a symbol, look up whether a TOC entry
+/// exists for it. If not, create one. Then return a symbol that references
+/// the TOC entry.
+MCSymbol *PPCAsmPrinter::lookUpOrCreateTOCEntry(MCSymbol *Sym) {
+
+ MCSymbol *&TOCEntry = TOC[Sym];
+
+ // To avoid name clash check if the name already exists.
+ while (TOCEntry == 0) {
+ if (OutContext.LookupSymbol(Twine(MAI->getPrivateGlobalPrefix()) +
+ "C" + Twine(TOCLabelID++)) == 0) {
+ TOCEntry = GetTempSymbol("C", TOCLabelID);
+ }
+ }
+
+ return TOCEntry;
+}
+
+
/// EmitInstruction -- Print out a single PowerPC MI in Darwin syntax to
/// the current output stream.
///
@@ -349,14 +370,10 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
MCSymbol *PICBase = MF->getPICBaseSymbol();
// Emit the 'bl'.
- TmpInst.setOpcode(PPC::BL_Darwin); // Darwin vs SVR4 doesn't matter here.
-
-
- // FIXME: We would like an efficient form for this, so we don't have to do
- // a lot of extra uniquing.
- TmpInst.addOperand(MCOperand::CreateExpr(MCSymbolRefExpr::
- Create(PICBase, OutContext)));
- OutStreamer.EmitInstruction(TmpInst);
+ OutStreamer.EmitInstruction(MCInstBuilder(PPC::BL)
+ // FIXME: We would like an efficient form for this, so we don't have to do
+ // a lot of extra uniquing.
+ .addExpr(MCSymbolRefExpr::Create(PICBase, OutContext)));
// Emit the label.
OutStreamer.EmitLabel(PICBase);
@@ -382,14 +399,8 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
MOSymbol = GetCPISymbol(MO.getIndex());
else if (MO.isJTI())
MOSymbol = GetJTISymbol(MO.getIndex());
- MCSymbol *&TOCEntry = TOC[MOSymbol];
- // To avoid name clash check if the name already exists.
- while (TOCEntry == 0) {
- if (OutContext.LookupSymbol(Twine(MAI->getPrivateGlobalPrefix()) +
- "C" + Twine(TOCLabelID++)) == 0) {
- TOCEntry = GetTempSymbol("C", TOCLabelID);
- }
- }
+
+ MCSymbol *TOCEntry = lookUpOrCreateTOCEntry(MOSymbol);
const MCExpr *Exp =
MCSymbolRefExpr::Create(TOCEntry, MCSymbolRefExpr::VK_PPC_TOC_ENTRY,
@@ -399,15 +410,299 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
return;
}
+ case PPC::ADDIStocHA: {
+ // Transform %Xd = ADDIStocHA %X2, <ga:@sym>
+ LowerPPCMachineInstrToMCInst(MI, TmpInst, *this, Subtarget.isDarwin());
+
+ // Change the opcode to ADDIS8. If the global address is external,
+ // has common linkage, is a function address, or is a jump table
+ // address, then generate a TOC entry and reference that. Otherwise
+ // reference the symbol directly.
+ TmpInst.setOpcode(PPC::ADDIS8);
+ const MachineOperand &MO = MI->getOperand(2);
+ assert((MO.isGlobal() || MO.isCPI() || MO.isJTI()) &&
+ "Invalid operand for ADDIStocHA!");
+ MCSymbol *MOSymbol = 0;
+ bool IsExternal = false;
+ bool IsFunction = false;
+ bool IsCommon = false;
+ bool IsAvailExt = false;
+
+ if (MO.isGlobal()) {
+ const GlobalValue *GValue = MO.getGlobal();
+ const GlobalAlias *GAlias = dyn_cast<GlobalAlias>(GValue);
+ const GlobalValue *RealGValue = GAlias ?
+ GAlias->resolveAliasedGlobal(false) : GValue;
+ MOSymbol = Mang->getSymbol(RealGValue);
+ const GlobalVariable *GVar = dyn_cast<GlobalVariable>(RealGValue);
+ IsExternal = GVar && !GVar->hasInitializer();
+ IsCommon = GVar && RealGValue->hasCommonLinkage();
+ IsFunction = !GVar;
+ IsAvailExt = GVar && RealGValue->hasAvailableExternallyLinkage();
+ } else if (MO.isCPI())
+ MOSymbol = GetCPISymbol(MO.getIndex());
+ else if (MO.isJTI())
+ MOSymbol = GetJTISymbol(MO.getIndex());
+
+ if (IsExternal || IsFunction || IsCommon || IsAvailExt || MO.isJTI())
+ MOSymbol = lookUpOrCreateTOCEntry(MOSymbol);
+
+ const MCExpr *Exp =
+ MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_TOC16_HA,
+ OutContext);
+ TmpInst.getOperand(2) = MCOperand::CreateExpr(Exp);
+ OutStreamer.EmitInstruction(TmpInst);
+ return;
+ }
+ case PPC::LDtocL: {
+ // Transform %Xd = LDtocL <ga:@sym>, %Xs
+ LowerPPCMachineInstrToMCInst(MI, TmpInst, *this, Subtarget.isDarwin());
+
+ // Change the opcode to LD. If the global address is external, has
+ // common linkage, or is a jump table address, then reference the
+ // associated TOC entry. Otherwise reference the symbol directly.
+ TmpInst.setOpcode(PPC::LD);
+ const MachineOperand &MO = MI->getOperand(1);
+ assert((MO.isGlobal() || MO.isJTI() || MO.isCPI()) &&
+ "Invalid operand for LDtocL!");
+ MCSymbol *MOSymbol = 0;
+
+ if (MO.isJTI())
+ MOSymbol = lookUpOrCreateTOCEntry(GetJTISymbol(MO.getIndex()));
+ else if (MO.isCPI())
+ MOSymbol = GetCPISymbol(MO.getIndex());
+ else if (MO.isGlobal()) {
+ const GlobalValue *GValue = MO.getGlobal();
+ const GlobalAlias *GAlias = dyn_cast<GlobalAlias>(GValue);
+ const GlobalValue *RealGValue = GAlias ?
+ GAlias->resolveAliasedGlobal(false) : GValue;
+ MOSymbol = Mang->getSymbol(RealGValue);
+ const GlobalVariable *GVar = dyn_cast<GlobalVariable>(RealGValue);
+
+ if (!GVar || !GVar->hasInitializer() || RealGValue->hasCommonLinkage() ||
+ RealGValue->hasAvailableExternallyLinkage())
+ MOSymbol = lookUpOrCreateTOCEntry(MOSymbol);
+ }
+
+ const MCExpr *Exp =
+ MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_TOC16_LO,
+ OutContext);
+ TmpInst.getOperand(1) = MCOperand::CreateExpr(Exp);
+ OutStreamer.EmitInstruction(TmpInst);
+ return;
+ }
+ case PPC::ADDItocL: {
+ // Transform %Xd = ADDItocL %Xs, <ga:@sym>
+ LowerPPCMachineInstrToMCInst(MI, TmpInst, *this, Subtarget.isDarwin());
+
+ // Change the opcode to ADDI8. If the global address is external, then
+ // generate a TOC entry and reference that. Otherwise reference the
+ // symbol directly.
+ TmpInst.setOpcode(PPC::ADDI8);
+ const MachineOperand &MO = MI->getOperand(2);
+ assert((MO.isGlobal() || MO.isCPI()) && "Invalid operand for ADDItocL");
+ MCSymbol *MOSymbol = 0;
+ bool IsExternal = false;
+ bool IsFunction = false;
+
+ if (MO.isGlobal()) {
+ const GlobalValue *GValue = MO.getGlobal();
+ const GlobalAlias *GAlias = dyn_cast<GlobalAlias>(GValue);
+ const GlobalValue *RealGValue = GAlias ?
+ GAlias->resolveAliasedGlobal(false) : GValue;
+ MOSymbol = Mang->getSymbol(RealGValue);
+ const GlobalVariable *GVar = dyn_cast<GlobalVariable>(RealGValue);
+ IsExternal = GVar && !GVar->hasInitializer();
+ IsFunction = !GVar;
+ } else if (MO.isCPI())
+ MOSymbol = GetCPISymbol(MO.getIndex());
+
+ if (IsFunction || IsExternal)
+ MOSymbol = lookUpOrCreateTOCEntry(MOSymbol);
+
+ const MCExpr *Exp =
+ MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_TOC16_LO,
+ OutContext);
+ TmpInst.getOperand(2) = MCOperand::CreateExpr(Exp);
+ OutStreamer.EmitInstruction(TmpInst);
+ return;
+ }
+ case PPC::ADDISgotTprelHA: {
+ // Transform: %Xd = ADDISgotTprelHA %X2, <ga:@sym>
+ // Into: %Xd = ADDIS8 %X2, sym@got@tlsgd@ha
+ assert(Subtarget.isPPC64() && "Not supported for 32-bit PowerPC");
+ const MachineOperand &MO = MI->getOperand(2);
+ const GlobalValue *GValue = MO.getGlobal();
+ MCSymbol *MOSymbol = Mang->getSymbol(GValue);
+ const MCExpr *SymGotTprel =
+ MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_GOT_TPREL16_HA,
+ OutContext);
+ OutStreamer.EmitInstruction(MCInstBuilder(PPC::ADDIS8)
+ .addReg(MI->getOperand(0).getReg())
+ .addReg(PPC::X2)
+ .addExpr(SymGotTprel));
+ return;
+ }
+ case PPC::LDgotTprelL: {
+ // Transform %Xd = LDgotTprelL <ga:@sym>, %Xs
+ LowerPPCMachineInstrToMCInst(MI, TmpInst, *this, Subtarget.isDarwin());
+
+ // Change the opcode to LD.
+ TmpInst.setOpcode(PPC::LD);
+ const MachineOperand &MO = MI->getOperand(1);
+ const GlobalValue *GValue = MO.getGlobal();
+ MCSymbol *MOSymbol = Mang->getSymbol(GValue);
+ const MCExpr *Exp =
+ MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_GOT_TPREL16_LO,
+ OutContext);
+ TmpInst.getOperand(1) = MCOperand::CreateExpr(Exp);
+ OutStreamer.EmitInstruction(TmpInst);
+ return;
+ }
+ case PPC::ADDIStlsgdHA: {
+ // Transform: %Xd = ADDIStlsgdHA %X2, <ga:@sym>
+ // Into: %Xd = ADDIS8 %X2, sym@got@tlsgd@ha
+ assert(Subtarget.isPPC64() && "Not supported for 32-bit PowerPC");
+ const MachineOperand &MO = MI->getOperand(2);
+ const GlobalValue *GValue = MO.getGlobal();
+ MCSymbol *MOSymbol = Mang->getSymbol(GValue);
+ const MCExpr *SymGotTlsGD =
+ MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_GOT_TLSGD16_HA,
+ OutContext);
+ OutStreamer.EmitInstruction(MCInstBuilder(PPC::ADDIS8)
+ .addReg(MI->getOperand(0).getReg())
+ .addReg(PPC::X2)
+ .addExpr(SymGotTlsGD));
+ return;
+ }
+ case PPC::ADDItlsgdL: {
+ // Transform: %Xd = ADDItlsgdL %Xs, <ga:@sym>
+ // Into: %Xd = ADDI8 %Xs, sym@got@tlsgd@l
+ assert(Subtarget.isPPC64() && "Not supported for 32-bit PowerPC");
+ const MachineOperand &MO = MI->getOperand(2);
+ const GlobalValue *GValue = MO.getGlobal();
+ MCSymbol *MOSymbol = Mang->getSymbol(GValue);
+ const MCExpr *SymGotTlsGD =
+ MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_GOT_TLSGD16_LO,
+ OutContext);
+ OutStreamer.EmitInstruction(MCInstBuilder(PPC::ADDI8)
+ .addReg(MI->getOperand(0).getReg())
+ .addReg(MI->getOperand(1).getReg())
+ .addExpr(SymGotTlsGD));
+ return;
+ }
+ case PPC::GETtlsADDR: {
+ // Transform: %X3 = GETtlsADDR %X3, <ga:@sym>
+ // Into: BL8_NOP_TLSGD __tls_get_addr(sym@tlsgd)
+ assert(Subtarget.isPPC64() && "Not supported for 32-bit PowerPC");
+
+ StringRef Name = "__tls_get_addr";
+ MCSymbol *TlsGetAddr = OutContext.GetOrCreateSymbol(Name);
+ const MCSymbolRefExpr *TlsRef =
+ MCSymbolRefExpr::Create(TlsGetAddr, MCSymbolRefExpr::VK_None, OutContext);
+ const MachineOperand &MO = MI->getOperand(2);
+ const GlobalValue *GValue = MO.getGlobal();
+ MCSymbol *MOSymbol = Mang->getSymbol(GValue);
+ const MCExpr *SymVar =
+ MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_TLSGD,
+ OutContext);
+ OutStreamer.EmitInstruction(MCInstBuilder(PPC::BL8_NOP_TLSGD)
+ .addExpr(TlsRef)
+ .addExpr(SymVar));
+ return;
+ }
+ case PPC::ADDIStlsldHA: {
+ // Transform: %Xd = ADDIStlsldHA %X2, <ga:@sym>
+ // Into: %Xd = ADDIS8 %X2, sym@got@tlsld@ha
+ assert(Subtarget.isPPC64() && "Not supported for 32-bit PowerPC");
+ const MachineOperand &MO = MI->getOperand(2);
+ const GlobalValue *GValue = MO.getGlobal();
+ MCSymbol *MOSymbol = Mang->getSymbol(GValue);
+ const MCExpr *SymGotTlsLD =
+ MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_GOT_TLSLD16_HA,
+ OutContext);
+ OutStreamer.EmitInstruction(MCInstBuilder(PPC::ADDIS8)
+ .addReg(MI->getOperand(0).getReg())
+ .addReg(PPC::X2)
+ .addExpr(SymGotTlsLD));
+ return;
+ }
+ case PPC::ADDItlsldL: {
+ // Transform: %Xd = ADDItlsldL %Xs, <ga:@sym>
+ // Into: %Xd = ADDI8 %Xs, sym@got@tlsld@l
+ assert(Subtarget.isPPC64() && "Not supported for 32-bit PowerPC");
+ const MachineOperand &MO = MI->getOperand(2);
+ const GlobalValue *GValue = MO.getGlobal();
+ MCSymbol *MOSymbol = Mang->getSymbol(GValue);
+ const MCExpr *SymGotTlsLD =
+ MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_GOT_TLSLD16_LO,
+ OutContext);
+ OutStreamer.EmitInstruction(MCInstBuilder(PPC::ADDI8)
+ .addReg(MI->getOperand(0).getReg())
+ .addReg(MI->getOperand(1).getReg())
+ .addExpr(SymGotTlsLD));
+ return;
+ }
+ case PPC::GETtlsldADDR: {
+ // Transform: %X3 = GETtlsldADDR %X3, <ga:@sym>
+ // Into: BL8_NOP_TLSLD __tls_get_addr(sym@tlsld)
+ assert(Subtarget.isPPC64() && "Not supported for 32-bit PowerPC");
+
+ StringRef Name = "__tls_get_addr";
+ MCSymbol *TlsGetAddr = OutContext.GetOrCreateSymbol(Name);
+ const MCSymbolRefExpr *TlsRef =
+ MCSymbolRefExpr::Create(TlsGetAddr, MCSymbolRefExpr::VK_None, OutContext);
+ const MachineOperand &MO = MI->getOperand(2);
+ const GlobalValue *GValue = MO.getGlobal();
+ MCSymbol *MOSymbol = Mang->getSymbol(GValue);
+ const MCExpr *SymVar =
+ MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_TLSLD,
+ OutContext);
+ OutStreamer.EmitInstruction(MCInstBuilder(PPC::BL8_NOP_TLSLD)
+ .addExpr(TlsRef)
+ .addExpr(SymVar));
+ return;
+ }
+ case PPC::ADDISdtprelHA: {
+ // Transform: %Xd = ADDISdtprelHA %X3, <ga:@sym>
+ // Into: %Xd = ADDIS8 %X3, sym@dtprel@ha
+ assert(Subtarget.isPPC64() && "Not supported for 32-bit PowerPC");
+ const MachineOperand &MO = MI->getOperand(2);
+ const GlobalValue *GValue = MO.getGlobal();
+ MCSymbol *MOSymbol = Mang->getSymbol(GValue);
+ const MCExpr *SymDtprel =
+ MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_DTPREL16_HA,
+ OutContext);
+ OutStreamer.EmitInstruction(MCInstBuilder(PPC::ADDIS8)
+ .addReg(MI->getOperand(0).getReg())
+ .addReg(PPC::X3)
+ .addExpr(SymDtprel));
+ return;
+ }
+ case PPC::ADDIdtprelL: {
+ // Transform: %Xd = ADDIdtprelL %Xs, <ga:@sym>
+ // Into: %Xd = ADDI8 %Xs, sym@dtprel@l
+ assert(Subtarget.isPPC64() && "Not supported for 32-bit PowerPC");
+ const MachineOperand &MO = MI->getOperand(2);
+ const GlobalValue *GValue = MO.getGlobal();
+ MCSymbol *MOSymbol = Mang->getSymbol(GValue);
+ const MCExpr *SymDtprel =
+ MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_DTPREL16_LO,
+ OutContext);
+ OutStreamer.EmitInstruction(MCInstBuilder(PPC::ADDI8)
+ .addReg(MI->getOperand(0).getReg())
+ .addReg(MI->getOperand(1).getReg())
+ .addExpr(SymDtprel));
+ return;
+ }
case PPC::MFCRpseud:
case PPC::MFCR8pseud:
// Transform: %R3 = MFCRpseud %CR7
// Into: %R3 = MFCR ;; cr7
OutStreamer.AddComment(PPCInstPrinter::
getRegisterName(MI->getOperand(1).getReg()));
- TmpInst.setOpcode(Subtarget.isPPC64() ? PPC::MFCR8 : PPC::MFCR);
- TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(0).getReg()));
- OutStreamer.EmitInstruction(TmpInst);
+ OutStreamer.EmitInstruction(MCInstBuilder(Subtarget.isPPC64() ? PPC::MFCR8 : PPC::MFCR)
+ .addReg(MI->getOperand(0).getReg()));
return;
case PPC::SYNC:
// In Book E sync is called msync, handle this special case here...
@@ -438,14 +733,14 @@ void PPCLinuxAsmPrinter::EmitFunctionEntryLabel() {
// Generates a R_PPC64_ADDR64 (from FK_DATA_8) relocation for the function
// entry point.
OutStreamer.EmitValue(MCSymbolRefExpr::Create(Symbol1, OutContext),
- 8/*size*/, 0/*addrspace*/);
+ 8 /*size*/);
MCSymbol *Symbol2 = OutContext.GetOrCreateSymbol(StringRef(".TOC."));
// Generates a R_PPC64_TOC relocation for TOC base insertion.
OutStreamer.EmitValue(MCSymbolRefExpr::Create(Symbol2,
MCSymbolRefExpr::VK_PPC_TOC, OutContext),
- 8/*size*/, 0/*addrspace*/);
+ 8/*size*/);
// Emit a null environment pointer.
- OutStreamer.EmitIntValue(0, 8 /* size */, 0 /* addrspace */);
+ OutStreamer.EmitIntValue(0, 8 /* size */);
OutStreamer.SwitchSection(Current);
MCSymbol *RealFnSym = OutContext.GetOrCreateSymbol(
@@ -474,6 +769,25 @@ bool PPCLinuxAsmPrinter::doFinalization(Module &M) {
}
}
+ MachineModuleInfoELF &MMIELF =
+ MMI->getObjFileInfo<MachineModuleInfoELF>();
+
+ MachineModuleInfoELF::SymbolListTy Stubs = MMIELF.GetGVStubList();
+ if (!Stubs.empty()) {
+ OutStreamer.SwitchSection(getObjFileLowering().getDataSection());
+ for (unsigned i = 0, e = Stubs.size(); i != e; ++i) {
+ // L_foo$stub:
+ OutStreamer.EmitLabel(Stubs[i].first);
+ // .long _foo
+ OutStreamer.EmitValue(MCSymbolRefExpr::Create(Stubs[i].second.getPointer(),
+ OutContext),
+ isPPC64 ? 8 : 4/*size*/, 0/*addrspace*/);
+ }
+
+ Stubs.clear();
+ OutStreamer.AddBlankLine();
+ }
+
return AsmPrinter::doFinalization(M);
}
@@ -508,7 +822,12 @@ void PPCDarwinAsmPrinter::EmitStartOfAsmFile(Module &M) {
"ppcA2",
"ppce500mc",
"ppce5500",
+ "power3",
+ "power4",
+ "power5",
+ "power5x",
"power6",
+ "power6x",
"power7",
"ppc64"
};
@@ -523,8 +842,11 @@ void PPCDarwinAsmPrinter::EmitStartOfAsmFile(Module &M) {
assert(Directive <= PPC::DIR_64 && "Directive out of range.");
// FIXME: This is a total hack, finish mc'izing the PPC backend.
- if (OutStreamer.hasRawTextSupport())
+ if (OutStreamer.hasRawTextSupport()) {
+ assert(Directive < sizeof(CPUDirectives) / sizeof(*CPUDirectives) &&
+ "CPUDirectives[] might not be up-to-date!");
OutStreamer.EmitRawText("\t.machine " + Twine(CPUDirectives[Directive]));
+ }
// Prime text sections so they are adjacent. This reduces the likelihood a
// large data or debug section causes a branch to exceed 16M limit.
@@ -549,16 +871,13 @@ void PPCDarwinAsmPrinter::EmitStartOfAsmFile(Module &M) {
static MCSymbol *GetLazyPtr(MCSymbol *Sym, MCContext &Ctx) {
// Remove $stub suffix, add $lazy_ptr.
- SmallString<128> TmpStr(Sym->getName().begin(), Sym->getName().end()-5);
- TmpStr += "$lazy_ptr";
- return Ctx.GetOrCreateSymbol(TmpStr.str());
+ StringRef NoStub = Sym->getName().substr(0, Sym->getName().size()-5);
+ return Ctx.GetOrCreateSymbol(NoStub + "$lazy_ptr");
}
static MCSymbol *GetAnonSym(MCSymbol *Sym, MCContext &Ctx) {
// Add $tmp suffix to $stub, yielding $stub$tmp.
- SmallString<128> TmpStr(Sym->getName().begin(), Sym->getName().end());
- TmpStr += "$tmp";
- return Ctx.GetOrCreateSymbol(TmpStr.str());
+ return Ctx.GetOrCreateSymbol(Sym->getName() + "$tmp");
}
void PPCDarwinAsmPrinter::
@@ -589,32 +908,51 @@ EmitFunctionStubs(const MachineModuleInfoMachO::SymbolListTy &Stubs) {
OutStreamer.EmitLabel(Stub);
OutStreamer.EmitSymbolAttribute(RawSym, MCSA_IndirectSymbol);
- // FIXME: MCize this.
- OutStreamer.EmitRawText(StringRef("\tmflr r0"));
- OutStreamer.EmitRawText("\tbcl 20,31," + Twine(AnonSymbol->getName()));
+
+ const MCExpr *Anon = MCSymbolRefExpr::Create(AnonSymbol, OutContext);
+
+ // mflr r0
+ OutStreamer.EmitInstruction(MCInstBuilder(PPC::MFLR).addReg(PPC::R0));
+ // bcl 20, 31, AnonSymbol
+ OutStreamer.EmitInstruction(MCInstBuilder(PPC::BCLalways).addExpr(Anon));
OutStreamer.EmitLabel(AnonSymbol);
- OutStreamer.EmitRawText(StringRef("\tmflr r11"));
- OutStreamer.EmitRawText("\taddis r11,r11,ha16("+Twine(LazyPtr->getName())+
- "-" + AnonSymbol->getName() + ")");
- OutStreamer.EmitRawText(StringRef("\tmtlr r0"));
-
- if (isPPC64)
- OutStreamer.EmitRawText("\tldu r12,lo16(" + Twine(LazyPtr->getName()) +
- "-" + AnonSymbol->getName() + ")(r11)");
- else
- OutStreamer.EmitRawText("\tlwzu r12,lo16(" + Twine(LazyPtr->getName()) +
- "-" + AnonSymbol->getName() + ")(r11)");
- OutStreamer.EmitRawText(StringRef("\tmtctr r12"));
- OutStreamer.EmitRawText(StringRef("\tbctr"));
-
+ // mflr r11
+ OutStreamer.EmitInstruction(MCInstBuilder(PPC::MFLR).addReg(PPC::R11));
+ // addis r11, r11, ha16(LazyPtr - AnonSymbol)
+ const MCExpr *Sub =
+ MCBinaryExpr::CreateSub(MCSymbolRefExpr::Create(LazyPtr, OutContext),
+ Anon, OutContext);
+ OutStreamer.EmitInstruction(MCInstBuilder(PPC::ADDIS)
+ .addReg(PPC::R11)
+ .addReg(PPC::R11)
+ .addExpr(Sub));
+ // mtlr r0
+ OutStreamer.EmitInstruction(MCInstBuilder(PPC::MTLR).addReg(PPC::R0));
+
+ // ldu r12, lo16(LazyPtr - AnonSymbol)(r11)
+ // lwzu r12, lo16(LazyPtr - AnonSymbol)(r11)
+ OutStreamer.EmitInstruction(MCInstBuilder(isPPC64 ? PPC::LDU : PPC::LWZU)
+ .addReg(PPC::R12)
+ .addExpr(Sub).addExpr(Sub)
+ .addReg(PPC::R11));
+ // mtctr r12
+ OutStreamer.EmitInstruction(MCInstBuilder(PPC::MTCTR).addReg(PPC::R12));
+ // bctr
+ OutStreamer.EmitInstruction(MCInstBuilder(PPC::BCTR));
+
OutStreamer.SwitchSection(LSPSection);
OutStreamer.EmitLabel(LazyPtr);
OutStreamer.EmitSymbolAttribute(RawSym, MCSA_IndirectSymbol);
-
- if (isPPC64)
- OutStreamer.EmitRawText(StringRef("\t.quad dyld_stub_binding_helper"));
- else
- OutStreamer.EmitRawText(StringRef("\t.long dyld_stub_binding_helper"));
+
+ MCSymbol *DyldStubBindingHelper =
+ OutContext.GetOrCreateSymbol(StringRef("dyld_stub_binding_helper"));
+ if (isPPC64) {
+ // .quad dyld_stub_binding_helper
+ OutStreamer.EmitSymbolValue(DyldStubBindingHelper, 8);
+ } else {
+ // .long dyld_stub_binding_helper
+ OutStreamer.EmitSymbolValue(DyldStubBindingHelper, 4);
+ }
}
OutStreamer.AddBlankLine();
return;
@@ -634,23 +972,42 @@ EmitFunctionStubs(const MachineModuleInfoMachO::SymbolListTy &Stubs) {
EmitAlignment(4);
OutStreamer.EmitLabel(Stub);
OutStreamer.EmitSymbolAttribute(RawSym, MCSA_IndirectSymbol);
- OutStreamer.EmitRawText("\tlis r11,ha16(" + Twine(LazyPtr->getName()) +")");
- if (isPPC64)
- OutStreamer.EmitRawText("\tldu r12,lo16(" + Twine(LazyPtr->getName()) +
- ")(r11)");
- else
- OutStreamer.EmitRawText("\tlwzu r12,lo16(" + Twine(LazyPtr->getName()) +
- ")(r11)");
- OutStreamer.EmitRawText(StringRef("\tmtctr r12"));
- OutStreamer.EmitRawText(StringRef("\tbctr"));
+ // lis r11, ha16(LazyPtr)
+ const MCExpr *LazyPtrHa16 =
+ MCSymbolRefExpr::Create(LazyPtr, MCSymbolRefExpr::VK_PPC_DARWIN_HA16,
+ OutContext);
+ OutStreamer.EmitInstruction(MCInstBuilder(PPC::LIS)
+ .addReg(PPC::R11)
+ .addExpr(LazyPtrHa16));
+
+ const MCExpr *LazyPtrLo16 =
+ MCSymbolRefExpr::Create(LazyPtr, MCSymbolRefExpr::VK_PPC_DARWIN_LO16,
+ OutContext);
+ // ldu r12, lo16(LazyPtr)(r11)
+ // lwzu r12, lo16(LazyPtr)(r11)
+ OutStreamer.EmitInstruction(MCInstBuilder(isPPC64 ? PPC::LDU : PPC::LWZU)
+ .addReg(PPC::R12)
+ .addExpr(LazyPtrLo16).addExpr(LazyPtrLo16)
+ .addReg(PPC::R11));
+
+ // mtctr r12
+ OutStreamer.EmitInstruction(MCInstBuilder(PPC::MTCTR).addReg(PPC::R12));
+ // bctr
+ OutStreamer.EmitInstruction(MCInstBuilder(PPC::BCTR));
+
OutStreamer.SwitchSection(LSPSection);
OutStreamer.EmitLabel(LazyPtr);
OutStreamer.EmitSymbolAttribute(RawSym, MCSA_IndirectSymbol);
-
- if (isPPC64)
- OutStreamer.EmitRawText(StringRef("\t.quad dyld_stub_binding_helper"));
- else
- OutStreamer.EmitRawText(StringRef("\t.long dyld_stub_binding_helper"));
+
+ MCSymbol *DyldStubBindingHelper =
+ OutContext.GetOrCreateSymbol(StringRef("dyld_stub_binding_helper"));
+ if (isPPC64) {
+ // .quad dyld_stub_binding_helper
+ OutStreamer.EmitSymbolValue(DyldStubBindingHelper, 8);
+ } else {
+ // .long dyld_stub_binding_helper
+ OutStreamer.EmitSymbolValue(DyldStubBindingHelper, 4);
+ }
}
OutStreamer.AddBlankLine();
@@ -703,7 +1060,7 @@ bool PPCDarwinAsmPrinter::doFinalization(Module &M) {
if (MCSym.getInt())
// External to current translation unit.
- OutStreamer.EmitIntValue(0, isPPC64 ? 8 : 4/*size*/, 0/*addrspace*/);
+ OutStreamer.EmitIntValue(0, isPPC64 ? 8 : 4/*size*/);
else
// Internal to current translation unit.
//
@@ -713,7 +1070,7 @@ bool PPCDarwinAsmPrinter::doFinalization(Module &M) {
// fill in the value for the NLP in those cases.
OutStreamer.EmitValue(MCSymbolRefExpr::Create(MCSym.getPointer(),
OutContext),
- isPPC64 ? 8 : 4/*size*/, 0/*addrspace*/);
+ isPPC64 ? 8 : 4/*size*/);
}
Stubs.clear();
@@ -732,7 +1089,7 @@ bool PPCDarwinAsmPrinter::doFinalization(Module &M) {
OutStreamer.EmitValue(MCSymbolRefExpr::
Create(Stubs[i].second.getPointer(),
OutContext),
- isPPC64 ? 8 : 4/*size*/, 0/*addrspace*/);
+ isPPC64 ? 8 : 4/*size*/);
}
Stubs.clear();
diff --git a/lib/Target/PowerPC/PPCBranchSelector.cpp b/lib/Target/PowerPC/PPCBranchSelector.cpp
index 21a0fb200f20..bd1c37868110 100644
--- a/lib/Target/PowerPC/PPCBranchSelector.cpp
+++ b/lib/Target/PowerPC/PPCBranchSelector.cpp
@@ -17,21 +17,27 @@
#define DEBUG_TYPE "ppc-branch-select"
#include "PPC.h"
+#include "MCTargetDesc/PPCPredicates.h"
#include "PPCInstrBuilder.h"
#include "PPCInstrInfo.h"
-#include "MCTargetDesc/PPCPredicates.h"
-#include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/Target/TargetMachine.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/Support/MathExtras.h"
+#include "llvm/Target/TargetMachine.h"
using namespace llvm;
STATISTIC(NumExpanded, "Number of branches expanded to long format");
+namespace llvm {
+ void initializePPCBSelPass(PassRegistry&);
+}
+
namespace {
struct PPCBSel : public MachineFunctionPass {
static char ID;
- PPCBSel() : MachineFunctionPass(ID) {}
+ PPCBSel() : MachineFunctionPass(ID) {
+ initializePPCBSelPass(*PassRegistry::getPassRegistry());
+ }
/// BlockSizes - The sizes of the basic blocks in the function.
std::vector<unsigned> BlockSizes;
@@ -45,6 +51,9 @@ namespace {
char PPCBSel::ID = 0;
}
+INITIALIZE_PASS(PPCBSel, "ppc-branch-select", "PowerPC Branch Selector",
+ false, false)
+
/// createPPCBranchSelectionPass - returns an instance of the Branch Selection
/// Pass
///
diff --git a/lib/Target/PowerPC/PPCCTRLoops.cpp b/lib/Target/PowerPC/PPCCTRLoops.cpp
index 2a2abb171fb1..81a54d7015b0 100644
--- a/lib/Target/PowerPC/PPCCTRLoops.cpp
+++ b/lib/Target/PowerPC/PPCCTRLoops.cpp
@@ -31,20 +31,20 @@
#define DEBUG_TYPE "ctrloops"
#include "PPC.h"
-#include "PPCTargetMachine.h"
#include "MCTargetDesc/PPCPredicates.h"
-#include "llvm/Constants.h"
-#include "llvm/PassSupport.h"
+#include "PPCTargetMachine.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/Statistic.h"
-#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineLoopInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/RegisterScavenging.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/PassSupport.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetInstrInfo.h"
@@ -54,6 +54,10 @@ using namespace llvm;
STATISTIC(NumCTRLoops, "Number of loops converted to CTR loops");
+namespace llvm {
+ void initializePPCCTRLoopsPass(PassRegistry&);
+}
+
namespace {
class CountValue;
struct PPCCTRLoops : public MachineFunctionPass {
@@ -64,7 +68,9 @@ namespace {
public:
static char ID; // Pass identification, replacement for typeid
- PPCCTRLoops() : MachineFunctionPass(ID) {}
+ PPCCTRLoops() : MachineFunctionPass(ID) {
+ initializePPCCTRLoopsPass(*PassRegistry::getPassRegistry());
+ }
virtual bool runOnMachineFunction(MachineFunction &MF);
@@ -174,15 +180,32 @@ namespace {
};
} // end anonymous namespace
+INITIALIZE_PASS_BEGIN(PPCCTRLoops, "ppc-ctr-loops", "PowerPC CTR Loops",
+ false, false)
+INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
+INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
+INITIALIZE_PASS_END(PPCCTRLoops, "ppc-ctr-loops", "PowerPC CTR Loops",
+ false, false)
/// isCompareEquals - Returns true if the instruction is a compare equals
/// instruction with an immediate operand.
-static bool isCompareEqualsImm(const MachineInstr *MI, bool &SignedCmp) {
- if (MI->getOpcode() == PPC::CMPWI || MI->getOpcode() == PPC::CMPDI) {
+static bool isCompareEqualsImm(const MachineInstr *MI, bool &SignedCmp,
+ bool &Int64Cmp) {
+ if (MI->getOpcode() == PPC::CMPWI) {
SignedCmp = true;
+ Int64Cmp = false;
+ return true;
+ } else if (MI->getOpcode() == PPC::CMPDI) {
+ SignedCmp = true;
+ Int64Cmp = true;
+ return true;
+ } else if (MI->getOpcode() == PPC::CMPLWI) {
+ SignedCmp = false;
+ Int64Cmp = false;
return true;
- } else if (MI->getOpcode() == PPC::CMPLWI || MI->getOpcode() == PPC::CMPLDI) {
+ } else if (MI->getOpcode() == PPC::CMPLDI) {
SignedCmp = false;
+ Int64Cmp = true;
return true;
}
@@ -341,9 +364,9 @@ CountValue *PPCCTRLoops::getTripCount(MachineLoop *L,
RI = MRI->reg_begin(IV_Opnd->getReg()), RE = MRI->reg_end();
RI != RE; ++RI) {
IV_Opnd = &RI.getOperand();
- bool SignedCmp;
+ bool SignedCmp, Int64Cmp;
MachineInstr *MI = IV_Opnd->getParent();
- if (L->contains(MI) && isCompareEqualsImm(MI, SignedCmp) &&
+ if (L->contains(MI) && isCompareEqualsImm(MI, SignedCmp, Int64Cmp) &&
MI->getOperand(0).getReg() == PredReg) {
OldInsts.push_back(MI);
@@ -368,14 +391,14 @@ CountValue *PPCCTRLoops::getTripCount(MachineLoop *L,
assert(InitialValue->isReg() && "Expecting register for init value");
unsigned InitialValueReg = InitialValue->getReg();
- const MachineInstr *DefInstr = MRI->getVRegDef(InitialValueReg);
+ MachineInstr *DefInstr = MRI->getVRegDef(InitialValueReg);
// Here we need to look for an immediate load (an li or lis/ori pair).
if (DefInstr && (DefInstr->getOpcode() == PPC::ORI8 ||
DefInstr->getOpcode() == PPC::ORI)) {
- int64_t start = (short) DefInstr->getOperand(2).getImm();
- const MachineInstr *DefInstr2 =
- MRI->getVRegDef(DefInstr->getOperand(0).getReg());
+ int64_t start = DefInstr->getOperand(2).getImm();
+ MachineInstr *DefInstr2 =
+ MRI->getVRegDef(DefInstr->getOperand(1).getReg());
if (DefInstr2 && (DefInstr2->getOpcode() == PPC::LIS8 ||
DefInstr2->getOpcode() == PPC::LIS)) {
DEBUG(dbgs() << " initial constant: " << *DefInstr);
@@ -387,17 +410,33 @@ CountValue *PPCCTRLoops::getTripCount(MachineLoop *L,
if ((count % iv_value) != 0) {
return 0;
}
- return new CountValue(count/iv_value);
+
+ OldInsts.push_back(DefInstr);
+ OldInsts.push_back(DefInstr2);
+
+ // count/iv_value, the trip count, should be positive here. If it
+ // is negative, that indicates that the counter will wrap.
+ if (Int64Cmp)
+ return new CountValue(count/iv_value);
+ else
+ return new CountValue(uint32_t(count/iv_value));
}
} else if (DefInstr && (DefInstr->getOpcode() == PPC::LI8 ||
DefInstr->getOpcode() == PPC::LI)) {
DEBUG(dbgs() << " initial constant: " << *DefInstr);
- int64_t count = ImmVal - int64_t(short(DefInstr->getOperand(1).getImm()));
+ int64_t count = ImmVal -
+ int64_t(short(DefInstr->getOperand(1).getImm()));
if ((count % iv_value) != 0) {
return 0;
}
- return new CountValue(count/iv_value);
+
+ OldInsts.push_back(DefInstr);
+
+ if (Int64Cmp)
+ return new CountValue(count/iv_value);
+ else
+ return new CountValue(uint32_t(count/iv_value));
} else if (iv_value == 1 || iv_value == -1) {
// We can't determine a constant starting value.
if (ImmVal == 0) {
@@ -405,8 +444,8 @@ CountValue *PPCCTRLoops::getTripCount(MachineLoop *L,
}
// FIXME: handle non-zero end value.
}
- // FIXME: handle non-unit increments (we might not want to introduce division
- // but we can handle some 2^n cases with shifts).
+ // FIXME: handle non-unit increments (we might not want to introduce
+ // division but we can handle some 2^n cases with shifts).
}
}
@@ -477,9 +516,10 @@ bool PPCCTRLoops::isDead(const MachineInstr *MI,
if (MO.isReg() && MO.isDef()) {
unsigned Reg = MO.getReg();
if (!MRI->use_nodbg_empty(Reg)) {
- // This instruction has users, but if the only user is the phi node for the
- // parent block, and the only use of that phi node is this instruction, then
- // this instruction is dead: both it (and the phi node) can be removed.
+ // This instruction has users, but if the only user is the phi node for
+ // the parent block, and the only use of that phi node is this
+ // instruction, then this instruction is dead: both it (and the phi
+ // node) can be removed.
MachineRegisterInfo::use_iterator I = MRI->use_begin(Reg);
if (llvm::next(I) == MRI->use_end() &&
I.getOperand().getParent()->isPHI()) {
@@ -582,6 +622,16 @@ bool PPCCTRLoops::convertToCTRLoop(MachineLoop *L) {
DEBUG(dbgs() << "failed to get trip count!\n");
return false;
}
+
+ if (TripCount->isImm()) {
+ DEBUG(dbgs() << "constant trip count: " << TripCount->getImm() << "\n");
+
+ // FIXME: We currently can't form 64-bit constants
+ // (including 32-bit unsigned constants)
+ if (!isInt<32>(TripCount->getImm()))
+ return false;
+ }
+
// Does the loop contain any invalid instructions?
if (containsInvalidInstruction(L)) {
return false;
@@ -635,7 +685,7 @@ bool PPCCTRLoops::convertToCTRLoop(MachineLoop *L) {
const TargetRegisterClass *SrcRC =
MF->getRegInfo().getRegClass(TripCount->getReg());
CountReg = MF->getRegInfo().createVirtualRegister(RC);
- unsigned CopyOp = (isPPC64 && SrcRC == GPRC) ?
+ unsigned CopyOp = (isPPC64 && GPRC->hasSubClassEq(SrcRC)) ?
(unsigned) PPC::EXTSW_32_64 :
(unsigned) TargetOpcode::COPY;
BuildMI(*Preheader, InsertPos, dl,
@@ -652,13 +702,14 @@ bool PPCCTRLoops::convertToCTRLoop(MachineLoop *L) {
// Put the trip count in a register for transfer into the count register.
int64_t CountImm = TripCount->getImm();
- assert(!TripCount->isNeg() && "Constant trip count must be positive");
+ if (TripCount->isNeg())
+ CountImm = -CountImm;
CountReg = MF->getRegInfo().createVirtualRegister(RC);
- if (CountImm > 0xFFFF) {
+ if (abs64(CountImm) > 0x7FFF) {
BuildMI(*Preheader, InsertPos, dl,
TII->get(isPPC64 ? PPC::LIS8 : PPC::LIS),
- CountReg).addImm(CountImm >> 16);
+ CountReg).addImm((CountImm >> 16) & 0xFFFF);
unsigned CountReg1 = CountReg;
CountReg = MF->getRegInfo().createVirtualRegister(RC);
BuildMI(*Preheader, InsertPos, dl,
diff --git a/lib/Target/PowerPC/PPCCallingConv.td b/lib/Target/PowerPC/PPCCallingConv.td
index 3f87e883b1e4..c8a29a3d2cfe 100644
--- a/lib/Target/PowerPC/PPCCallingConv.td
+++ b/lib/Target/PowerPC/PPCCallingConv.td
@@ -27,9 +27,10 @@ def RetCC_PPC : CallingConv<[
CCIfType<[i32], CCAssignToReg<[R3, R4, R5, R6, R7, R8, R9, R10]>>,
CCIfType<[i64], CCAssignToReg<[X3, X4, X5, X6]>>,
+ CCIfType<[i128], CCAssignToReg<[X3, X4, X5, X6]>>,
- CCIfType<[f32], CCAssignToReg<[F1]>>,
- CCIfType<[f64], CCAssignToReg<[F1, F2]>>,
+ CCIfType<[f32], CCAssignToReg<[F1, F2]>>,
+ CCIfType<[f64], CCAssignToReg<[F1, F2, F3, F4]>>,
// Vector types are always returned in V2.
CCIfType<[v16i8, v8i16, v4i32, v4f32], CCAssignToReg<[V2]>>
@@ -37,49 +38,20 @@ def RetCC_PPC : CallingConv<[
//===----------------------------------------------------------------------===//
-// PowerPC Argument Calling Conventions
-//===----------------------------------------------------------------------===//
-/*
-def CC_PPC : CallingConv<[
- // The first 8 integer arguments are passed in integer registers.
- CCIfType<[i32], CCAssignToReg<[R3, R4, R5, R6, R7, R8, R9, R10]>>,
- CCIfType<[i64], CCAssignToReg<[X3, X4, X5, X6, X7, X8, X9, X10]>>,
-
- // Common sub-targets passes FP values in F1 - F13
- CCIfType<[f32, f64],
- CCAssignToReg<[F1, F2, F3, F4, F5, F6, F7, F8,F9,F10,F11,F12,F13]>>,
-
- // The first 12 Vector arguments are passed in altivec registers.
- CCIfType<[v16i8, v8i16, v4i32, v4f32],
- CCAssignToReg<[V2, V3, V4, V5, V6, V7, V8, V9, V10,V11,V12,V13]>>
-
-/*
- // Integer/FP values get stored in stack slots that are 8 bytes in size and
- // 8-byte aligned if there are no more registers to hold them.
- CCIfType<[i32, i64, f32, f64], CCAssignToStack<8, 8>>,
-
- // Vectors get 16-byte stack slots that are 16-byte aligned.
- CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],
- CCAssignToStack<16, 16>>*/
-]>;
-
-*/
-
-//===----------------------------------------------------------------------===//
-// PowerPC System V Release 4 ABI
+// PowerPC System V Release 4 32-bit ABI
//===----------------------------------------------------------------------===//
-def CC_PPC_SVR4_Common : CallingConv<[
+def CC_PPC32_SVR4_Common : CallingConv<[
// The ABI requires i64 to be passed in two adjacent registers with the first
// register having an odd register number.
- CCIfType<[i32], CCIfSplit<CCCustom<"CC_PPC_SVR4_Custom_AlignArgRegs">>>,
+ CCIfType<[i32], CCIfSplit<CCCustom<"CC_PPC32_SVR4_Custom_AlignArgRegs">>>,
// The first 8 integer arguments are passed in integer registers.
CCIfType<[i32], CCAssignToReg<[R3, R4, R5, R6, R7, R8, R9, R10]>>,
// Make sure the i64 words from a long double are either both passed in
// registers or both passed on the stack.
- CCIfType<[f64], CCIfSplit<CCCustom<"CC_PPC_SVR4_Custom_AlignFPArgRegs">>>,
+ CCIfType<[f64], CCIfSplit<CCCustom<"CC_PPC32_SVR4_Custom_AlignFPArgRegs">>>,
// FP values are passed in F1 - F8.
CCIfType<[f32, f64], CCAssignToReg<[F1, F2, F3, F4, F5, F6, F7, F8]>>,
@@ -100,18 +72,18 @@ def CC_PPC_SVR4_Common : CallingConv<[
// This calling convention puts vector arguments always on the stack. It is used
// to assign vector arguments which belong to the variable portion of the
// parameter list of a variable argument function.
-def CC_PPC_SVR4_VarArg : CallingConv<[
- CCDelegateTo<CC_PPC_SVR4_Common>
+def CC_PPC32_SVR4_VarArg : CallingConv<[
+ CCDelegateTo<CC_PPC32_SVR4_Common>
]>;
-// In contrast to CC_PPC_SVR4_VarArg, this calling convention first tries to put
-// vector arguments in vector registers before putting them on the stack.
-def CC_PPC_SVR4 : CallingConv<[
+// In contrast to CC_PPC32_SVR4_VarArg, this calling convention first tries to
+// put vector arguments in vector registers before putting them on the stack.
+def CC_PPC32_SVR4 : CallingConv<[
// The first 12 Vector arguments are passed in AltiVec registers.
CCIfType<[v16i8, v8i16, v4i32, v4f32],
CCAssignToReg<[V2, V3, V4, V5, V6, V7, V8, V9, V10, V11, V12, V13]>>,
- CCDelegateTo<CC_PPC_SVR4_Common>
+ CCDelegateTo<CC_PPC32_SVR4_Common>
]>;
// Helper "calling convention" to handle aggregate by value arguments.
@@ -122,15 +94,15 @@ def CC_PPC_SVR4 : CallingConv<[
// Still, the address of the aggregate copy in the callers stack frame is passed
// in a GPR (or in the parameter list area if all GPRs are allocated) from the
// caller to the callee. The location for the address argument is assigned by
-// the CC_PPC_SVR4 calling convention.
+// the CC_PPC32_SVR4 calling convention.
//
-// The only purpose of CC_PPC_SVR4_Custom_Dummy is to skip arguments which are
+// The only purpose of CC_PPC32_SVR4_Custom_Dummy is to skip arguments which are
// not passed by value.
-def CC_PPC_SVR4_ByVal : CallingConv<[
+def CC_PPC32_SVR4_ByVal : CallingConv<[
CCIfByVal<CCPassByVal<4, 4>>,
- CCCustom<"CC_PPC_SVR4_Custom_Dummy">
+ CCCustom<"CC_PPC32_SVR4_Custom_Dummy">
]>;
def CSR_Darwin32 : CalleeSavedRegs<(add R13, R14, R15, R16, R17, R18, R19, R20,
@@ -164,3 +136,9 @@ def CSR_SVR464 : CalleeSavedRegs<(add X14, X15, X16, X17, X18, X19, X20, VRSAV
F27, F28, F29, F30, F31, CR2, CR3, CR4,
V20, V21, V22, V23, V24, V25, V26, V27,
V28, V29, V30, V31)>;
+
+def CSR_NoRegs : CalleeSavedRegs<(add VRSAVE)>;
+def CSR_NoRegs_Darwin : CalleeSavedRegs<(add)>;
+
+def CSR_NoRegs_Altivec : CalleeSavedRegs<(add (sequence "V%u", 0, 31), VRSAVE)>;
+
diff --git a/lib/Target/PowerPC/PPCCodeEmitter.cpp b/lib/Target/PowerPC/PPCCodeEmitter.cpp
index 252a2d159ec3..64787185138b 100644
--- a/lib/Target/PowerPC/PPCCodeEmitter.cpp
+++ b/lib/Target/PowerPC/PPCCodeEmitter.cpp
@@ -12,15 +12,15 @@
//
//===----------------------------------------------------------------------===//
-#include "PPCTargetMachine.h"
-#include "PPCRelocations.h"
#include "PPC.h"
-#include "llvm/Module.h"
-#include "llvm/PassManager.h"
+#include "PPCRelocations.h"
+#include "PPCTargetMachine.h"
#include "llvm/CodeGen/JITCodeEmitter.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/IR/Module.h"
+#include "llvm/PassManager.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetOptions.h"
@@ -68,6 +68,7 @@ namespace {
unsigned getLO16Encoding(const MachineInstr &MI, unsigned OpNo) const;
unsigned getMemRIEncoding(const MachineInstr &MI, unsigned OpNo) const;
unsigned getMemRIXEncoding(const MachineInstr &MI, unsigned OpNo) const;
+ unsigned getTLSRegEncoding(const MachineInstr &MI, unsigned OpNo) const;
const char *getPassName() const { return "PowerPC Machine Code Emitter"; }
@@ -141,7 +142,7 @@ unsigned PPCCodeEmitter::get_crbitm_encoding(const MachineInstr &MI,
assert((MI.getOpcode() == PPC::MTCRF || MI.getOpcode() == PPC::MTCRF8 ||
MI.getOpcode() == PPC::MFOCRF) &&
(MO.getReg() >= PPC::CR0 && MO.getReg() <= PPC::CR7));
- return 0x80 >> getPPCRegisterNumbering(MO.getReg());
+ return 0x80 >> TM.getRegisterInfo()->getEncodingValue(MO.getReg());
}
MachineRelocation PPCCodeEmitter::GetRelocation(const MachineOperand &MO,
@@ -243,6 +244,13 @@ unsigned PPCCodeEmitter::getMemRIXEncoding(const MachineInstr &MI,
}
+unsigned PPCCodeEmitter::getTLSRegEncoding(const MachineInstr &MI,
+ unsigned OpNo) const {
+ llvm_unreachable("TLS not supported on the old JIT.");
+ return 0;
+}
+
+
unsigned PPCCodeEmitter::getMachineOpValue(const MachineInstr &MI,
const MachineOperand &MO) const {
@@ -252,7 +260,7 @@ unsigned PPCCodeEmitter::getMachineOpValue(const MachineInstr &MI,
assert((MI.getOpcode() != PPC::MTCRF && MI.getOpcode() != PPC::MTCRF8 &&
MI.getOpcode() != PPC::MFOCRF) ||
MO.getReg() < PPC::CR0 || MO.getReg() > PPC::CR7);
- return getPPCRegisterNumbering(MO.getReg());
+ return TM.getRegisterInfo()->getEncodingValue(MO.getReg());
}
assert(MO.isImm() &&
diff --git a/lib/Target/PowerPC/PPCFrameLowering.cpp b/lib/Target/PowerPC/PPCFrameLowering.cpp
index caf7bf2be793..3244b904ee64 100644
--- a/lib/Target/PowerPC/PPCFrameLowering.cpp
+++ b/lib/Target/PowerPC/PPCFrameLowering.cpp
@@ -12,16 +12,16 @@
//===----------------------------------------------------------------------===//
#include "PPCFrameLowering.h"
-#include "PPCInstrInfo.h"
#include "PPCInstrBuilder.h"
+#include "PPCInstrInfo.h"
#include "PPCMachineFunctionInfo.h"
-#include "llvm/Function.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/RegisterScavenging.h"
+#include "llvm/IR/Function.h"
#include "llvm/Target/TargetOptions.h"
using namespace llvm;
@@ -103,6 +103,7 @@ static void RemoveVRSaveCode(MachineInstr *MI) {
// transform this into the appropriate ORI instruction.
static void HandleVRSaveUpdate(MachineInstr *MI, const TargetInstrInfo &TII) {
MachineFunction *MF = MI->getParent()->getParent();
+ const TargetRegisterInfo *TRI = MF->getTarget().getRegisterInfo();
DebugLoc dl = MI->getDebugLoc();
unsigned UsedRegMask = 0;
@@ -115,16 +116,25 @@ static void HandleVRSaveUpdate(MachineInstr *MI, const TargetInstrInfo &TII) {
for (MachineRegisterInfo::livein_iterator
I = MF->getRegInfo().livein_begin(),
E = MF->getRegInfo().livein_end(); I != E; ++I) {
- unsigned RegNo = getPPCRegisterNumbering(I->first);
+ unsigned RegNo = TRI->getEncodingValue(I->first);
if (VRRegNo[RegNo] == I->first) // If this really is a vector reg.
UsedRegMask &= ~(1 << (31-RegNo)); // Doesn't need to be marked.
}
- for (MachineRegisterInfo::liveout_iterator
- I = MF->getRegInfo().liveout_begin(),
- E = MF->getRegInfo().liveout_end(); I != E; ++I) {
- unsigned RegNo = getPPCRegisterNumbering(*I);
- if (VRRegNo[RegNo] == *I) // If this really is a vector reg.
- UsedRegMask &= ~(1 << (31-RegNo)); // Doesn't need to be marked.
+
+ // Live out registers appear as use operands on return instructions.
+ for (MachineFunction::const_iterator BI = MF->begin(), BE = MF->end();
+ UsedRegMask != 0 && BI != BE; ++BI) {
+ const MachineBasicBlock &MBB = *BI;
+ if (MBB.empty() || !MBB.back().isReturn())
+ continue;
+ const MachineInstr &Ret = MBB.back();
+ for (unsigned I = 0, E = Ret.getNumOperands(); I != E; ++I) {
+ const MachineOperand &MO = Ret.getOperand(I);
+ if (!MO.isReg() || !PPC::VRRCRegClass.contains(MO.getReg()))
+ continue;
+ unsigned RegNo = TRI->getEncodingValue(MO.getReg());
+ UsedRegMask &= ~(1 << (31-RegNo));
+ }
}
// If no registers are used, turn this into a copy.
@@ -179,13 +189,31 @@ static bool spillsCR(const MachineFunction &MF) {
return FuncInfo->isCRSpilled();
}
+static bool spillsVRSAVE(const MachineFunction &MF) {
+ const PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
+ return FuncInfo->isVRSAVESpilled();
+}
+
+static bool hasSpills(const MachineFunction &MF) {
+ const PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
+ return FuncInfo->hasSpills();
+}
+
+static bool hasNonRISpills(const MachineFunction &MF) {
+ const PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
+ return FuncInfo->hasNonRISpills();
+}
+
/// determineFrameLayout - Determine the size of the frame and maximum call
/// frame size.
-void PPCFrameLowering::determineFrameLayout(MachineFunction &MF) const {
+unsigned PPCFrameLowering::determineFrameLayout(MachineFunction &MF,
+ bool UpdateMF,
+ bool UseEstimate) const {
MachineFrameInfo *MFI = MF.getFrameInfo();
// Get the number of bytes to allocate from the FrameInfo
- unsigned FrameSize = MFI->getStackSize();
+ unsigned FrameSize =
+ UseEstimate ? MFI->estimateStackSize(MF) : MFI->getStackSize();
// Get the alignments provided by the target, and the maximum alignment
// (if any) of the fixed frame objects.
@@ -198,13 +226,14 @@ void PPCFrameLowering::determineFrameLayout(MachineFunction &MF) const {
// to adjust the stack pointer (we fit in the Red Zone). For 64-bit
// SVR4, we also require a stack frame if we need to spill the CR,
// since this spill area is addressed relative to the stack pointer.
- bool DisableRedZone = MF.getFunction()->getFnAttributes().
- hasAttribute(Attributes::NoRedZone);
- // FIXME SVR4 The 32-bit SVR4 ABI has no red zone. However, it can
- // still generate stackless code if all local vars are reg-allocated.
- // Try: (FrameSize <= 224
- // || (FrameSize == 0 && Subtarget.isPPC32 && Subtarget.isSVR4ABI()))
+ // The 32-bit SVR4 ABI has no Red Zone. However, it can still generate
+ // stackless code if all local vars are reg-allocated.
+ bool DisableRedZone = MF.getFunction()->getAttributes().
+ hasAttribute(AttributeSet::FunctionIndex, Attribute::NoRedZone);
if (!DisableRedZone &&
+ (Subtarget.isPPC64() || // 32-bit SVR4, no stack-
+ !Subtarget.isSVR4ABI() || // allocated locals.
+ FrameSize == 0) &&
FrameSize <= 224 && // Fits in red zone.
!MFI->hasVarSizedObjects() && // No dynamic alloca.
!MFI->adjustsStack() && // No calls.
@@ -213,8 +242,9 @@ void PPCFrameLowering::determineFrameLayout(MachineFunction &MF) const {
&& spillsCR(MF)) &&
(!ALIGN_STACK || MaxAlign <= TargetAlign)) { // No special alignment.
// No need for frame
- MFI->setStackSize(0);
- return;
+ if (UpdateMF)
+ MFI->setStackSize(0);
+ return 0;
}
// Get the maximum call frame size of all the calls.
@@ -231,7 +261,8 @@ void PPCFrameLowering::determineFrameLayout(MachineFunction &MF) const {
maxCallFrameSize = (maxCallFrameSize + AlignMask) & ~AlignMask;
// Update maximum call frame size.
- MFI->setMaxCallFrameSize(maxCallFrameSize);
+ if (UpdateMF)
+ MFI->setMaxCallFrameSize(maxCallFrameSize);
// Include call frame size in total.
FrameSize += maxCallFrameSize;
@@ -240,7 +271,10 @@ void PPCFrameLowering::determineFrameLayout(MachineFunction &MF) const {
FrameSize = (FrameSize + AlignMask) & ~AlignMask;
// Update frame info.
- MFI->setStackSize(FrameSize);
+ if (UpdateMF)
+ MFI->setStackSize(FrameSize);
+
+ return FrameSize;
}
// hasFP - Return true if the specified function actually has a dedicated frame
@@ -261,7 +295,8 @@ bool PPCFrameLowering::needsFP(const MachineFunction &MF) const {
// Naked functions have no stack frame pushed, so we don't have a frame
// pointer.
- if (MF.getFunction()->getFnAttributes().hasAttribute(Attributes::Naked))
+ if (MF.getFunction()->getAttributes().hasAttribute(AttributeSet::FunctionIndex,
+ Attribute::Naked))
return false;
return MF.getTarget().Options.DisableFramePointerElim(MF) ||
@@ -270,6 +305,31 @@ bool PPCFrameLowering::needsFP(const MachineFunction &MF) const {
MF.getInfo<PPCFunctionInfo>()->hasFastCall());
}
+void PPCFrameLowering::replaceFPWithRealFP(MachineFunction &MF) const {
+ bool is31 = needsFP(MF);
+ unsigned FPReg = is31 ? PPC::R31 : PPC::R1;
+ unsigned FP8Reg = is31 ? PPC::X31 : PPC::X1;
+
+ for (MachineFunction::iterator BI = MF.begin(), BE = MF.end();
+ BI != BE; ++BI)
+ for (MachineBasicBlock::iterator MBBI = BI->end(); MBBI != BI->begin(); ) {
+ --MBBI;
+ for (unsigned I = 0, E = MBBI->getNumOperands(); I != E; ++I) {
+ MachineOperand &MO = MBBI->getOperand(I);
+ if (!MO.isReg())
+ continue;
+
+ switch (MO.getReg()) {
+ case PPC::FP:
+ MO.setReg(FPReg);
+ break;
+ case PPC::FP8:
+ MO.setReg(FP8Reg);
+ break;
+ }
+ }
+ }
+}
void PPCFrameLowering::emitPrologue(MachineFunction &MF) const {
MachineBasicBlock &MBB = MF.front(); // Prolog goes in entry BB
@@ -300,13 +360,12 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF) const {
MBBI = MBB.begin();
// Work out frame sizes.
- // FIXME: determineFrameLayout() may change the frame size. This should be
- // moved upper, to some hook.
- determineFrameLayout(MF);
- unsigned FrameSize = MFI->getStackSize();
-
+ unsigned FrameSize = determineFrameLayout(MF);
int NegFrameSize = -FrameSize;
+ if (MFI->isFrameAddressTaken())
+ replaceFPWithRealFP(MF);
+
// Get processor type.
bool isPPC64 = Subtarget.isPPC64();
// Get operating system
@@ -769,14 +828,15 @@ static bool MustSaveLR(const MachineFunction &MF, unsigned LR) {
void
PPCFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
- RegScavenger *RS) const {
+ RegScavenger *) const {
const TargetRegisterInfo *RegInfo = MF.getTarget().getRegisterInfo();
// Save and clear the LR state.
PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
unsigned LR = RegInfo->getRARegister();
FI->setMustSaveLR(MustSaveLR(MF, LR));
- MF.getRegInfo().setPhysRegUnused(LR);
+ MachineRegisterInfo &MRI = MF.getRegInfo();
+ MRI.setPhysRegUnused(LR);
// Save R31 if necessary
int FPSI = FI->getFramePointerSaveIndex();
@@ -801,29 +861,24 @@ PPCFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
MFI->CreateFixedObject(-1 * TCSPDelta, TCSPDelta, true);
}
- // Reserve a slot closest to SP or frame pointer if we have a dynalloc or
- // a large stack, which will require scavenging a register to materialize a
- // large offset.
- // FIXME: this doesn't actually check stack size, so is a bit pessimistic
- // FIXME: doesn't detect whether or not we need to spill vXX, which requires
- // r0 for now.
-
- if (RegInfo->requiresRegisterScavenging(MF))
- if (needsFP(MF) || spillsCR(MF)) {
- const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
- const TargetRegisterClass *G8RC = &PPC::G8RCRegClass;
- const TargetRegisterClass *RC = isPPC64 ? G8RC : GPRC;
- RS->setScavengingFrameIndex(MFI->CreateStackObject(RC->getSize(),
- RC->getAlignment(),
- false));
- }
+ // For 32-bit SVR4, allocate the nonvolatile CR spill slot iff the
+ // function uses CR 2, 3, or 4.
+ if (!isPPC64 && !isDarwinABI &&
+ (MRI.isPhysRegUsed(PPC::CR2) ||
+ MRI.isPhysRegUsed(PPC::CR3) ||
+ MRI.isPhysRegUsed(PPC::CR4))) {
+ int FrameIdx = MFI->CreateFixedObject((uint64_t)4, (int64_t)-4, true);
+ FI->setCRSpillFrameIndex(FrameIdx);
+ }
}
-void PPCFrameLowering::processFunctionBeforeFrameFinalized(MachineFunction &MF)
- const {
+void PPCFrameLowering::processFunctionBeforeFrameFinalized(MachineFunction &MF,
+ RegScavenger *RS) const {
// Early exit if not using the SVR4 ABI.
- if (!Subtarget.isSVR4ABI())
+ if (!Subtarget.isSVR4ABI()) {
+ addScavengingSpillSlot(MF, RS);
return;
+ }
// Get callee saved register information.
MachineFrameInfo *FFI = MF.getFrameInfo();
@@ -831,6 +886,7 @@ void PPCFrameLowering::processFunctionBeforeFrameFinalized(MachineFunction &MF)
// Early exit if no callee saved registers are modified!
if (CSI.empty() && !needsFP(MF)) {
+ addScavengingSpillSlot(MF, RS);
return;
}
@@ -895,6 +951,7 @@ void PPCFrameLowering::processFunctionBeforeFrameFinalized(MachineFunction &MF)
}
PPCFunctionInfo *PFI = MF.getInfo<PPCFunctionInfo>();
+ const TargetRegisterInfo *TRI = MF.getTarget().getRegisterInfo();
int64_t LowerBound = 0;
@@ -914,7 +971,7 @@ void PPCFrameLowering::processFunctionBeforeFrameFinalized(MachineFunction &MF)
FFI->setObjectOffset(FI, LowerBound + FFI->getObjectOffset(FI));
}
- LowerBound -= (31 - getPPCRegisterNumbering(MinFPR) + 1) * 8;
+ LowerBound -= (31 - TRI->getEncodingValue(MinFPR) + 1) * 8;
}
// Check whether the frame pointer register is allocated. If so, make sure it
@@ -948,8 +1005,8 @@ void PPCFrameLowering::processFunctionBeforeFrameFinalized(MachineFunction &MF)
}
unsigned MinReg =
- std::min<unsigned>(getPPCRegisterNumbering(MinGPR),
- getPPCRegisterNumbering(MinG8R));
+ std::min<unsigned>(TRI->getEncodingValue(MinGPR),
+ TRI->getEncodingValue(MinG8R));
if (Subtarget.isPPC64()) {
LowerBound -= (31 - MinReg + 1) * 8;
@@ -1009,6 +1066,44 @@ void PPCFrameLowering::processFunctionBeforeFrameFinalized(MachineFunction &MF)
FFI->setObjectOffset(FI, LowerBound + FFI->getObjectOffset(FI));
}
}
+
+ addScavengingSpillSlot(MF, RS);
+}
+
+void
+PPCFrameLowering::addScavengingSpillSlot(MachineFunction &MF,
+ RegScavenger *RS) const {
+ // Reserve a slot closest to SP or frame pointer if we have a dynalloc or
+ // a large stack, which will require scavenging a register to materialize a
+ // large offset.
+
+ // We need to have a scavenger spill slot for spills if the frame size is
+ // large. In case there is no free register for large-offset addressing,
+ // this slot is used for the necessary emergency spill. Also, we need the
+ // slot for dynamic stack allocations.
+
+ // The scavenger might be invoked if the frame offset does not fit into
+ // the 16-bit immediate. We don't know the complete frame size here
+ // because we've not yet computed callee-saved register spills or the
+ // needed alignment padding.
+ unsigned StackSize = determineFrameLayout(MF, false, true);
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ if (MFI->hasVarSizedObjects() || spillsCR(MF) || spillsVRSAVE(MF) ||
+ hasNonRISpills(MF) || (hasSpills(MF) && !isInt<16>(StackSize))) {
+ const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
+ const TargetRegisterClass *G8RC = &PPC::G8RCRegClass;
+ const TargetRegisterClass *RC = Subtarget.isPPC64() ? G8RC : GPRC;
+ RS->addScavengingFrameIndex(MFI->CreateStackObject(RC->getSize(),
+ RC->getAlignment(),
+ false));
+
+ // These kinds of spills might need two registers.
+ if (spillsCR(MF) || spillsVRSAVE(MF))
+ RS->addScavengingFrameIndex(MFI->CreateStackObject(RC->getSize(),
+ RC->getAlignment(),
+ false));
+
+ }
}
bool
@@ -1046,8 +1141,8 @@ PPCFrameLowering::spillCalleeSavedRegisters(MachineBasicBlock &MBB,
// save slot via GPR12 (available in the prolog for 32- and 64-bit).
if (Subtarget.isPPC64()) {
// 64-bit: SP+8
- MBB.insert(MI, BuildMI(*MF, DL, TII.get(PPC::MFCR), PPC::X12));
- MBB.insert(MI, BuildMI(*MF, DL, TII.get(PPC::STW))
+ MBB.insert(MI, BuildMI(*MF, DL, TII.get(PPC::MFCR8), PPC::X12));
+ MBB.insert(MI, BuildMI(*MF, DL, TII.get(PPC::STW8))
.addReg(PPC::X12,
getKillRegState(true))
.addImm(8)
@@ -1087,7 +1182,7 @@ restoreCRs(bool isPPC64, bool CR2Spilled, bool CR3Spilled, bool CR4Spilled,
if (isPPC64) {
// 64-bit: SP+8
- MBB.insert(MI, BuildMI(*MF, DL, TII.get(PPC::LWZ), PPC::X12)
+ MBB.insert(MI, BuildMI(*MF, DL, TII.get(PPC::LWZ8), PPC::X12)
.addImm(8)
.addReg(PPC::X1));
RestoreOp = PPC::MTCRF8;
@@ -1103,15 +1198,56 @@ restoreCRs(bool isPPC64, bool CR2Spilled, bool CR3Spilled, bool CR4Spilled,
if (CR2Spilled)
MBB.insert(MI, BuildMI(*MF, DL, TII.get(RestoreOp), PPC::CR2)
- .addReg(MoveReg));
+ .addReg(MoveReg, getKillRegState(!CR3Spilled && !CR4Spilled)));
if (CR3Spilled)
MBB.insert(MI, BuildMI(*MF, DL, TII.get(RestoreOp), PPC::CR3)
- .addReg(MoveReg));
+ .addReg(MoveReg, getKillRegState(!CR4Spilled)));
if (CR4Spilled)
MBB.insert(MI, BuildMI(*MF, DL, TII.get(RestoreOp), PPC::CR4)
- .addReg(MoveReg));
+ .addReg(MoveReg, getKillRegState(true)));
+}
+
+void PPCFrameLowering::
+eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I) const {
+ const PPCInstrInfo &TII =
+ *static_cast<const PPCInstrInfo*>(MF.getTarget().getInstrInfo());
+ if (MF.getTarget().Options.GuaranteedTailCallOpt &&
+ I->getOpcode() == PPC::ADJCALLSTACKUP) {
+ // Add (actually subtract) back the amount the callee popped on return.
+ if (int CalleeAmt = I->getOperand(1).getImm()) {
+ bool is64Bit = Subtarget.isPPC64();
+ CalleeAmt *= -1;
+ unsigned StackReg = is64Bit ? PPC::X1 : PPC::R1;
+ unsigned TmpReg = is64Bit ? PPC::X0 : PPC::R0;
+ unsigned ADDIInstr = is64Bit ? PPC::ADDI8 : PPC::ADDI;
+ unsigned ADDInstr = is64Bit ? PPC::ADD8 : PPC::ADD4;
+ unsigned LISInstr = is64Bit ? PPC::LIS8 : PPC::LIS;
+ unsigned ORIInstr = is64Bit ? PPC::ORI8 : PPC::ORI;
+ MachineInstr *MI = I;
+ DebugLoc dl = MI->getDebugLoc();
+
+ if (isInt<16>(CalleeAmt)) {
+ BuildMI(MBB, I, dl, TII.get(ADDIInstr), StackReg)
+ .addReg(StackReg, RegState::Kill)
+ .addImm(CalleeAmt);
+ } else {
+ MachineBasicBlock::iterator MBBI = I;
+ BuildMI(MBB, MBBI, dl, TII.get(LISInstr), TmpReg)
+ .addImm(CalleeAmt >> 16);
+ BuildMI(MBB, MBBI, dl, TII.get(ORIInstr), TmpReg)
+ .addReg(TmpReg, RegState::Kill)
+ .addImm(CalleeAmt & 0xFFFF);
+ BuildMI(MBB, MBBI, dl, TII.get(ADDInstr), StackReg)
+ .addReg(StackReg, RegState::Kill)
+ .addReg(TmpReg);
+ }
+ }
+ }
+ // Simply discard ADJCALLSTACKDOWN, ADJCALLSTACKUP instructions.
+ MBB.erase(I);
}
bool
diff --git a/lib/Target/PowerPC/PPCFrameLowering.h b/lib/Target/PowerPC/PPCFrameLowering.h
index 4d957b91c7bb..6f5f9368c6c6 100644
--- a/lib/Target/PowerPC/PPCFrameLowering.h
+++ b/lib/Target/PowerPC/PPCFrameLowering.h
@@ -15,9 +15,9 @@
#include "PPC.h"
#include "PPCSubtarget.h"
+#include "llvm/ADT/STLExtras.h"
#include "llvm/Target/TargetFrameLowering.h"
#include "llvm/Target/TargetMachine.h"
-#include "llvm/ADT/STLExtras.h"
namespace llvm {
class PPCSubtarget;
@@ -27,11 +27,14 @@ class PPCFrameLowering: public TargetFrameLowering {
public:
PPCFrameLowering(const PPCSubtarget &sti)
- : TargetFrameLowering(TargetFrameLowering::StackGrowsDown, 16, 0),
+ : TargetFrameLowering(TargetFrameLowering::StackGrowsDown,
+ (sti.hasQPX() || sti.isBGQ()) ? 32 : 16, 0),
Subtarget(sti) {
}
- void determineFrameLayout(MachineFunction &MF) const;
+ unsigned determineFrameLayout(MachineFunction &MF,
+ bool UpdateMF = true,
+ bool UseEstimate = false) const;
/// emitProlog/emitEpilog - These methods insert prolog and epilog code into
/// the function.
@@ -40,16 +43,23 @@ public:
bool hasFP(const MachineFunction &MF) const;
bool needsFP(const MachineFunction &MF) const;
+ void replaceFPWithRealFP(MachineFunction &MF) const;
void processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
RegScavenger *RS = NULL) const;
- void processFunctionBeforeFrameFinalized(MachineFunction &MF) const;
+ void processFunctionBeforeFrameFinalized(MachineFunction &MF,
+ RegScavenger *RS = NULL) const;
+ void addScavengingSpillSlot(MachineFunction &MF, RegScavenger *RS) const;
bool spillCalleeSavedRegisters(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI,
const std::vector<CalleeSavedInfo> &CSI,
const TargetRegisterInfo *TRI) const;
+ void eliminateCallFramePseudoInstr(MachineFunction &MF,
+ MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I) const;
+
bool restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI,
const std::vector<CalleeSavedInfo> &CSI,
@@ -139,6 +149,9 @@ public:
return 0;
}
+ // Note that the offsets here overlap, but this is fixed up in
+ // processFunctionBeforeFrameFinalized.
+
static const SpillSlot Offsets[] = {
// Floating-point register save area offsets.
{PPC::F31, -8},
diff --git a/lib/Target/PowerPC/PPCHazardRecognizers.cpp b/lib/Target/PowerPC/PPCHazardRecognizers.cpp
index 6ed1fb9e6a3c..4bf1e3396429 100644
--- a/lib/Target/PowerPC/PPCHazardRecognizers.cpp
+++ b/lib/Target/PowerPC/PPCHazardRecognizers.cpp
@@ -179,7 +179,7 @@ getHazardType(SUnit *SU, int Stalls) {
}
// Do not allow MTCTR and BCTRL to be in the same dispatch group.
- if (HasCTRSet && (Opcode == PPC::BCTRL_Darwin || Opcode == PPC::BCTRL_SVR4))
+ if (HasCTRSet && Opcode == PPC::BCTRL)
return NoopHazard;
// If this is a load following a store, make sure it's not to the same or
diff --git a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
index 254fea67fc4e..95efc11b53c1 100644
--- a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
+++ b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
@@ -14,24 +14,30 @@
#define DEBUG_TYPE "ppc-codegen"
#include "PPC.h"
-#include "PPCTargetMachine.h"
#include "MCTargetDesc/PPCPredicates.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "PPCTargetMachine.h"
#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/CodeGen/SelectionDAGISel.h"
-#include "llvm/Target/TargetOptions.h"
-#include "llvm/Constants.h"
-#include "llvm/Function.h"
-#include "llvm/GlobalValue.h"
-#include "llvm/Intrinsics.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/GlobalAlias.h"
+#include "llvm/IR/GlobalValue.h"
+#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/Intrinsics.h"
#include "llvm/Support/Debug.h"
-#include "llvm/Support/MathExtras.h"
#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetOptions.h"
using namespace llvm;
+namespace llvm {
+ void initializePPCDAGToDAGISelPass(PassRegistry&);
+}
+
namespace {
//===--------------------------------------------------------------------===//
/// PPCDAGToDAGISel - PPC specific code to select PPC machine
@@ -46,7 +52,9 @@ namespace {
explicit PPCDAGToDAGISel(PPCTargetMachine &tm)
: SelectionDAGISel(tm), TM(tm),
PPCLowering(*TM.getTargetLowering()),
- PPCSubTarget(*TM.getSubtargetImpl()) {}
+ PPCSubTarget(*TM.getSubtargetImpl()) {
+ initializePPCDAGToDAGISelPass(*PassRegistry::getPassRegistry());
+ }
virtual bool runOnMachineFunction(MachineFunction &MF) {
// Make sure we re-emit a set of the global base reg if necessary
@@ -59,6 +67,8 @@ namespace {
return true;
}
+ virtual void PostprocessISelDAG();
+
/// getI32Imm - Return a target constant with the specified value, of type
/// i32.
inline SDValue getI32Imm(unsigned Imm) {
@@ -110,10 +120,10 @@ namespace {
}
/// SelectAddrImmOffs - Return true if the operand is valid for a preinc
- /// immediate field. Because preinc imms have already been validated, just
- /// accept it.
+ /// immediate field. Note that the operand at this point is already the
+ /// result of a prior SelectAddressRegImm call.
bool SelectAddrImmOffs(SDValue N, SDValue &Out) const {
- if (isa<ConstantSDNode>(N) || N.getOpcode() == PPCISD::Lo ||
+ if (N.getOpcode() == ISD::TargetConstant ||
N.getOpcode() == ISD::TargetGlobalAddress) {
Out = N;
return true;
@@ -122,18 +132,6 @@ namespace {
return false;
}
- /// SelectAddrIdxOffs - Return true if the operand is valid for a preinc
- /// index field. Because preinc imms have already been validated, just
- /// accept it.
- bool SelectAddrIdxOffs(SDValue N, SDValue &Out) const {
- if (isa<ConstantSDNode>(N) || N.getOpcode() == PPCISD::Lo ||
- N.getOpcode() == ISD::TargetGlobalAddress)
- return false;
-
- Out = N;
- return true;
- }
-
/// SelectAddrIdx - Given the specified addressed, check to see if it can be
/// represented as an indexed [r+r] operation. Returns false if it can
/// be represented by [r+imm], which are preferred.
@@ -154,6 +152,12 @@ namespace {
return PPCLowering.SelectAddressRegImmShift(N, Disp, Base, *CurDAG);
}
+ // Select an address into a single register.
+ bool SelectAddr(SDValue N, SDValue &Base) {
+ Base = N;
+ return true;
+ }
+
/// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
/// inline asm expressions. It is always correct to compute the value into
/// a register. The case of adding a (possibly relocatable) constant to a
@@ -1040,7 +1044,7 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) {
break;
SDValue Offset = LD->getOffset();
- if (isa<ConstantSDNode>(Offset) ||
+ if (Offset.getOpcode() == ISD::TargetConstant ||
Offset.getOpcode() == ISD::TargetGlobalAddress) {
unsigned Opcode;
@@ -1107,7 +1111,7 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) {
SDValue Chain = LD->getChain();
SDValue Base = LD->getBasePtr();
- SDValue Ops[] = { Offset, Base, Chain };
+ SDValue Ops[] = { Base, Offset, Chain };
return CurDAG->getMachineNode(Opcode, dl, LD->getValueType(0),
PPCLowering.getPointerTy(),
MVT::Other, Ops, 3);
@@ -1268,11 +1272,277 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) {
Chain), 0);
return CurDAG->SelectNodeTo(N, Reg, MVT::Other, Chain);
}
+ case PPCISD::TOC_ENTRY: {
+ assert (PPCSubTarget.isPPC64() && "Only supported for 64-bit ABI");
+
+ // For medium and large code model, we generate two instructions as
+ // described below. Otherwise we allow SelectCodeCommon to handle this,
+ // selecting one of LDtoc, LDtocJTI, and LDtocCPT.
+ CodeModel::Model CModel = TM.getCodeModel();
+ if (CModel != CodeModel::Medium && CModel != CodeModel::Large)
+ break;
+
+ // The first source operand is a TargetGlobalAddress or a
+ // TargetJumpTable. If it is an externally defined symbol, a symbol
+ // with common linkage, a function address, or a jump table address,
+ // or if we are generating code for large code model, we generate:
+ // LDtocL(<ga:@sym>, ADDIStocHA(%X2, <ga:@sym>))
+ // Otherwise we generate:
+ // ADDItocL(ADDIStocHA(%X2, <ga:@sym>), <ga:@sym>)
+ SDValue GA = N->getOperand(0);
+ SDValue TOCbase = N->getOperand(1);
+ SDNode *Tmp = CurDAG->getMachineNode(PPC::ADDIStocHA, dl, MVT::i64,
+ TOCbase, GA);
+
+ if (isa<JumpTableSDNode>(GA) || CModel == CodeModel::Large)
+ return CurDAG->getMachineNode(PPC::LDtocL, dl, MVT::i64, GA,
+ SDValue(Tmp, 0));
+
+ if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(GA)) {
+ const GlobalValue *GValue = G->getGlobal();
+ const GlobalAlias *GAlias = dyn_cast<GlobalAlias>(GValue);
+ const GlobalValue *RealGValue = GAlias ?
+ GAlias->resolveAliasedGlobal(false) : GValue;
+ const GlobalVariable *GVar = dyn_cast<GlobalVariable>(RealGValue);
+ assert((GVar || isa<Function>(RealGValue)) &&
+ "Unexpected global value subclass!");
+
+ // An external variable is one without an initializer. For these,
+ // for variables with common linkage, and for Functions, generate
+ // the LDtocL form.
+ if (!GVar || !GVar->hasInitializer() || RealGValue->hasCommonLinkage() ||
+ RealGValue->hasAvailableExternallyLinkage())
+ return CurDAG->getMachineNode(PPC::LDtocL, dl, MVT::i64, GA,
+ SDValue(Tmp, 0));
+ }
+
+ return CurDAG->getMachineNode(PPC::ADDItocL, dl, MVT::i64,
+ SDValue(Tmp, 0), GA);
+ }
+ case PPCISD::VADD_SPLAT: {
+ // This expands into one of three sequences, depending on whether
+ // the first operand is odd or even, positive or negative.
+ assert(isa<ConstantSDNode>(N->getOperand(0)) &&
+ isa<ConstantSDNode>(N->getOperand(1)) &&
+ "Invalid operand on VADD_SPLAT!");
+
+ int Elt = N->getConstantOperandVal(0);
+ int EltSize = N->getConstantOperandVal(1);
+ unsigned Opc1, Opc2, Opc3;
+ EVT VT;
+
+ if (EltSize == 1) {
+ Opc1 = PPC::VSPLTISB;
+ Opc2 = PPC::VADDUBM;
+ Opc3 = PPC::VSUBUBM;
+ VT = MVT::v16i8;
+ } else if (EltSize == 2) {
+ Opc1 = PPC::VSPLTISH;
+ Opc2 = PPC::VADDUHM;
+ Opc3 = PPC::VSUBUHM;
+ VT = MVT::v8i16;
+ } else {
+ assert(EltSize == 4 && "Invalid element size on VADD_SPLAT!");
+ Opc1 = PPC::VSPLTISW;
+ Opc2 = PPC::VADDUWM;
+ Opc3 = PPC::VSUBUWM;
+ VT = MVT::v4i32;
+ }
+
+ if ((Elt & 1) == 0) {
+ // Elt is even, in the range [-32,-18] + [16,30].
+ //
+ // Convert: VADD_SPLAT elt, size
+ // Into: tmp = VSPLTIS[BHW] elt
+ // VADDU[BHW]M tmp, tmp
+ // Where: [BHW] = B for size = 1, H for size = 2, W for size = 4
+ SDValue EltVal = getI32Imm(Elt >> 1);
+ SDNode *Tmp = CurDAG->getMachineNode(Opc1, dl, VT, EltVal);
+ SDValue TmpVal = SDValue(Tmp, 0);
+ return CurDAG->getMachineNode(Opc2, dl, VT, TmpVal, TmpVal);
+
+ } else if (Elt > 0) {
+ // Elt is odd and positive, in the range [17,31].
+ //
+ // Convert: VADD_SPLAT elt, size
+ // Into: tmp1 = VSPLTIS[BHW] elt-16
+ // tmp2 = VSPLTIS[BHW] -16
+ // VSUBU[BHW]M tmp1, tmp2
+ SDValue EltVal = getI32Imm(Elt - 16);
+ SDNode *Tmp1 = CurDAG->getMachineNode(Opc1, dl, VT, EltVal);
+ EltVal = getI32Imm(-16);
+ SDNode *Tmp2 = CurDAG->getMachineNode(Opc1, dl, VT, EltVal);
+ return CurDAG->getMachineNode(Opc3, dl, VT, SDValue(Tmp1, 0),
+ SDValue(Tmp2, 0));
+
+ } else {
+ // Elt is odd and negative, in the range [-31,-17].
+ //
+ // Convert: VADD_SPLAT elt, size
+ // Into: tmp1 = VSPLTIS[BHW] elt+16
+ // tmp2 = VSPLTIS[BHW] -16
+ // VADDU[BHW]M tmp1, tmp2
+ SDValue EltVal = getI32Imm(Elt + 16);
+ SDNode *Tmp1 = CurDAG->getMachineNode(Opc1, dl, VT, EltVal);
+ EltVal = getI32Imm(-16);
+ SDNode *Tmp2 = CurDAG->getMachineNode(Opc1, dl, VT, EltVal);
+ return CurDAG->getMachineNode(Opc2, dl, VT, SDValue(Tmp1, 0),
+ SDValue(Tmp2, 0));
+ }
+ }
}
return SelectCode(N);
}
+/// PostProcessISelDAG - Perform some late peephole optimizations
+/// on the DAG representation.
+void PPCDAGToDAGISel::PostprocessISelDAG() {
+
+ // Skip peepholes at -O0.
+ if (TM.getOptLevel() == CodeGenOpt::None)
+ return;
+
+ // These optimizations are currently supported only for 64-bit SVR4.
+ if (PPCSubTarget.isDarwin() || !PPCSubTarget.isPPC64())
+ return;
+
+ SelectionDAG::allnodes_iterator Position(CurDAG->getRoot().getNode());
+ ++Position;
+
+ while (Position != CurDAG->allnodes_begin()) {
+ SDNode *N = --Position;
+ // Skip dead nodes and any non-machine opcodes.
+ if (N->use_empty() || !N->isMachineOpcode())
+ continue;
+
+ unsigned FirstOp;
+ unsigned StorageOpcode = N->getMachineOpcode();
+
+ switch (StorageOpcode) {
+ default: continue;
+
+ case PPC::LBZ:
+ case PPC::LBZ8:
+ case PPC::LD:
+ case PPC::LFD:
+ case PPC::LFS:
+ case PPC::LHA:
+ case PPC::LHA8:
+ case PPC::LHZ:
+ case PPC::LHZ8:
+ case PPC::LWA:
+ case PPC::LWZ:
+ case PPC::LWZ8:
+ FirstOp = 0;
+ break;
+
+ case PPC::STB:
+ case PPC::STB8:
+ case PPC::STD:
+ case PPC::STFD:
+ case PPC::STFS:
+ case PPC::STH:
+ case PPC::STH8:
+ case PPC::STW:
+ case PPC::STW8:
+ FirstOp = 1;
+ break;
+ }
+
+ // If this is a load or store with a zero offset, we may be able to
+ // fold an add-immediate into the memory operation.
+ if (!isa<ConstantSDNode>(N->getOperand(FirstOp)) ||
+ N->getConstantOperandVal(FirstOp) != 0)
+ continue;
+
+ SDValue Base = N->getOperand(FirstOp + 1);
+ if (!Base.isMachineOpcode())
+ continue;
+
+ unsigned Flags = 0;
+ bool ReplaceFlags = true;
+
+ // When the feeding operation is an add-immediate of some sort,
+ // determine whether we need to add relocation information to the
+ // target flags on the immediate operand when we fold it into the
+ // load instruction.
+ //
+ // For something like ADDItocL, the relocation information is
+ // inferred from the opcode; when we process it in the AsmPrinter,
+ // we add the necessary relocation there. A load, though, can receive
+ // relocation from various flavors of ADDIxxx, so we need to carry
+ // the relocation information in the target flags.
+ switch (Base.getMachineOpcode()) {
+ default: continue;
+
+ case PPC::ADDI8:
+ case PPC::ADDI:
+ // In some cases (such as TLS) the relocation information
+ // is already in place on the operand, so copying the operand
+ // is sufficient.
+ ReplaceFlags = false;
+ // For these cases, the immediate may not be divisible by 4, in
+ // which case the fold is illegal for DS-form instructions. (The
+ // other cases provide aligned addresses and are always safe.)
+ if ((StorageOpcode == PPC::LWA ||
+ StorageOpcode == PPC::LD ||
+ StorageOpcode == PPC::STD) &&
+ (!isa<ConstantSDNode>(Base.getOperand(1)) ||
+ Base.getConstantOperandVal(1) % 4 != 0))
+ continue;
+ break;
+ case PPC::ADDIdtprelL:
+ Flags = PPCII::MO_DTPREL16_LO;
+ break;
+ case PPC::ADDItlsldL:
+ Flags = PPCII::MO_TLSLD16_LO;
+ break;
+ case PPC::ADDItocL:
+ Flags = PPCII::MO_TOC16_LO;
+ break;
+ }
+
+ // We found an opportunity. Reverse the operands from the add
+ // immediate and substitute them into the load or store. If
+ // needed, update the target flags for the immediate operand to
+ // reflect the necessary relocation information.
+ DEBUG(dbgs() << "Folding add-immediate into mem-op:\nBase: ");
+ DEBUG(Base->dump(CurDAG));
+ DEBUG(dbgs() << "\nN: ");
+ DEBUG(N->dump(CurDAG));
+ DEBUG(dbgs() << "\n");
+
+ SDValue ImmOpnd = Base.getOperand(1);
+
+ // If the relocation information isn't already present on the
+ // immediate operand, add it now.
+ if (ReplaceFlags) {
+ if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(ImmOpnd)) {
+ DebugLoc dl = GA->getDebugLoc();
+ const GlobalValue *GV = GA->getGlobal();
+ ImmOpnd = CurDAG->getTargetGlobalAddress(GV, dl, MVT::i64, 0, Flags);
+ } else if (ConstantPoolSDNode *CP =
+ dyn_cast<ConstantPoolSDNode>(ImmOpnd)) {
+ const Constant *C = CP->getConstVal();
+ ImmOpnd = CurDAG->getTargetConstantPool(C, MVT::i64,
+ CP->getAlignment(),
+ 0, Flags);
+ }
+ }
+
+ if (FirstOp == 1) // Store
+ (void)CurDAG->UpdateNodeOperands(N, N->getOperand(0), ImmOpnd,
+ Base.getOperand(0), N->getOperand(3));
+ else // Load
+ (void)CurDAG->UpdateNodeOperands(N, ImmOpnd, Base.getOperand(0),
+ N->getOperand(2));
+
+ // The add-immediate may now be dead, in which case remove it.
+ if (Base.getNode()->use_empty())
+ CurDAG->RemoveDeadNode(Base.getNode());
+ }
+}
/// createPPCISelDag - This pass converts a legalized DAG into a
@@ -1282,3 +1552,14 @@ FunctionPass *llvm::createPPCISelDag(PPCTargetMachine &TM) {
return new PPCDAGToDAGISel(TM);
}
+static void initializePassOnce(PassRegistry &Registry) {
+ const char *Name = "PowerPC DAG->DAG Pattern Instruction Selection";
+ PassInfo *PI = new PassInfo(Name, "ppc-codegen", &SelectionDAGISel::ID, 0,
+ false, false);
+ Registry.registerPass(*PI, true);
+}
+
+void llvm::initializePPCDAGToDAGISelPass(PassRegistry &Registry) {
+ CALL_ONCE_INITIALIZATION(initializePassOnce);
+}
+
diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp
index adf78d5233ae..16fc8a0e3726 100644
--- a/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -12,15 +12,10 @@
//===----------------------------------------------------------------------===//
#include "PPCISelLowering.h"
+#include "MCTargetDesc/PPCPredicates.h"
#include "PPCMachineFunctionInfo.h"
#include "PPCPerfectShuffle.h"
#include "PPCTargetMachine.h"
-#include "MCTargetDesc/PPCPredicates.h"
-#include "llvm/CallingConv.h"
-#include "llvm/Constants.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/Function.h"
-#include "llvm/Intrinsics.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/CodeGen/CallingConvLower.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
@@ -29,6 +24,11 @@
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
+#include "llvm/IR/CallingConv.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Intrinsics.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MathExtras.h"
@@ -36,20 +36,20 @@
#include "llvm/Target/TargetOptions.h"
using namespace llvm;
-static bool CC_PPC_SVR4_Custom_Dummy(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
- CCValAssign::LocInfo &LocInfo,
- ISD::ArgFlagsTy &ArgFlags,
- CCState &State);
-static bool CC_PPC_SVR4_Custom_AlignArgRegs(unsigned &ValNo, MVT &ValVT,
- MVT &LocVT,
- CCValAssign::LocInfo &LocInfo,
- ISD::ArgFlagsTy &ArgFlags,
- CCState &State);
-static bool CC_PPC_SVR4_Custom_AlignFPArgRegs(unsigned &ValNo, MVT &ValVT,
+static bool CC_PPC32_SVR4_Custom_Dummy(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
+ CCValAssign::LocInfo &LocInfo,
+ ISD::ArgFlagsTy &ArgFlags,
+ CCState &State);
+static bool CC_PPC32_SVR4_Custom_AlignArgRegs(unsigned &ValNo, MVT &ValVT,
MVT &LocVT,
CCValAssign::LocInfo &LocInfo,
ISD::ArgFlagsTy &ArgFlags,
CCState &State);
+static bool CC_PPC32_SVR4_Custom_AlignFPArgRegs(unsigned &ValNo, MVT &ValVT,
+ MVT &LocVT,
+ CCValAssign::LocInfo &LocInfo,
+ ISD::ArgFlagsTy &ArgFlags,
+ CCState &State);
static cl::opt<bool> DisablePPCPreinc("disable-ppc-preinc",
cl::desc("disable preincrement load/store generation on PPC"), cl::Hidden);
@@ -57,6 +57,9 @@ cl::desc("disable preincrement load/store generation on PPC"), cl::Hidden);
static cl::opt<bool> DisableILPPref("disable-ppc-ilp-pref",
cl::desc("disable setting the node scheduling preference to ILP on PPC"), cl::Hidden);
+static cl::opt<bool> DisablePPCUnaligned("disable-ppc-unaligned",
+cl::desc("disable unaligned load/store generation on PPC"), cl::Hidden);
+
static TargetLoweringObjectFile *CreateTLOF(const PPCTargetMachine &TM) {
if (TM.getSubtargetImpl()->isDarwin())
return new TargetLoweringObjectFileMachO();
@@ -67,6 +70,7 @@ static TargetLoweringObjectFile *CreateTLOF(const PPCTargetMachine &TM) {
PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
: TargetLowering(TM, CreateTLOF(TM)), PPCSubTarget(*TM.getSubtargetImpl()) {
const PPCSubtarget *Subtarget = &TM.getSubtarget<PPCSubtarget>();
+ PPCRegInfo = TM.getRegisterInfo();
setPow2DivIsCheap();
@@ -112,6 +116,7 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
setOperationAction(ISD::FTRUNC, MVT::ppcf128, Expand);
setOperationAction(ISD::FRINT, MVT::ppcf128, Expand);
setOperationAction(ISD::FNEARBYINT, MVT::ppcf128, Expand);
+ setOperationAction(ISD::FREM, MVT::ppcf128, Expand);
// PowerPC has no SREM/UREM instructions
setOperationAction(ISD::SREM, MVT::i32, Expand);
@@ -132,11 +137,13 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
// We don't support sin/cos/sqrt/fmod/pow
setOperationAction(ISD::FSIN , MVT::f64, Expand);
setOperationAction(ISD::FCOS , MVT::f64, Expand);
+ setOperationAction(ISD::FSINCOS, MVT::f64, Expand);
setOperationAction(ISD::FREM , MVT::f64, Expand);
setOperationAction(ISD::FPOW , MVT::f64, Expand);
setOperationAction(ISD::FMA , MVT::f64, Legal);
setOperationAction(ISD::FSIN , MVT::f32, Expand);
setOperationAction(ISD::FCOS , MVT::f32, Expand);
+ setOperationAction(ISD::FSINCOS, MVT::f32, Expand);
setOperationAction(ISD::FREM , MVT::f32, Expand);
setOperationAction(ISD::FPOW , MVT::f32, Expand);
setOperationAction(ISD::FMA , MVT::f32, Legal);
@@ -144,26 +151,58 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
setOperationAction(ISD::FLT_ROUNDS_, MVT::i32, Custom);
// If we're enabling GP optimizations, use hardware square root
- if (!Subtarget->hasFSQRT()) {
+ if (!Subtarget->hasFSQRT() &&
+ !(TM.Options.UnsafeFPMath &&
+ Subtarget->hasFRSQRTE() && Subtarget->hasFRE()))
setOperationAction(ISD::FSQRT, MVT::f64, Expand);
+
+ if (!Subtarget->hasFSQRT() &&
+ !(TM.Options.UnsafeFPMath &&
+ Subtarget->hasFRSQRTES() && Subtarget->hasFRES()))
setOperationAction(ISD::FSQRT, MVT::f32, Expand);
- }
setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
+ if (Subtarget->hasFPRND()) {
+ setOperationAction(ISD::FFLOOR, MVT::f64, Legal);
+ setOperationAction(ISD::FCEIL, MVT::f64, Legal);
+ setOperationAction(ISD::FTRUNC, MVT::f64, Legal);
+
+ setOperationAction(ISD::FFLOOR, MVT::f32, Legal);
+ setOperationAction(ISD::FCEIL, MVT::f32, Legal);
+ setOperationAction(ISD::FTRUNC, MVT::f32, Legal);
+
+ // frin does not implement "ties to even." Thus, this is safe only in
+ // fast-math mode.
+ if (TM.Options.UnsafeFPMath) {
+ setOperationAction(ISD::FNEARBYINT, MVT::f64, Legal);
+ setOperationAction(ISD::FNEARBYINT, MVT::f32, Legal);
+
+ // These need to set FE_INEXACT, and use a custom inserter.
+ setOperationAction(ISD::FRINT, MVT::f64, Legal);
+ setOperationAction(ISD::FRINT, MVT::f32, Legal);
+ }
+ }
+
// PowerPC does not have BSWAP, CTPOP or CTTZ
setOperationAction(ISD::BSWAP, MVT::i32 , Expand);
- setOperationAction(ISD::CTPOP, MVT::i32 , Expand);
setOperationAction(ISD::CTTZ , MVT::i32 , Expand);
setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i32, Expand);
setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32, Expand);
setOperationAction(ISD::BSWAP, MVT::i64 , Expand);
- setOperationAction(ISD::CTPOP, MVT::i64 , Expand);
setOperationAction(ISD::CTTZ , MVT::i64 , Expand);
setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i64, Expand);
setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i64, Expand);
+ if (Subtarget->hasPOPCNTD()) {
+ setOperationAction(ISD::CTPOP, MVT::i32 , Legal);
+ setOperationAction(ISD::CTPOP, MVT::i64 , Legal);
+ } else {
+ setOperationAction(ISD::CTPOP, MVT::i32 , Expand);
+ setOperationAction(ISD::CTPOP, MVT::i64 , Expand);
+ }
+
// PowerPC does not have ROTR
setOperationAction(ISD::ROTR, MVT::i32 , Expand);
setOperationAction(ISD::ROTR, MVT::i64 , Expand);
@@ -206,6 +245,14 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
setOperationAction(ISD::EXCEPTIONADDR, MVT::i32, Expand);
setOperationAction(ISD::EHSELECTION, MVT::i32, Expand);
+ // NOTE: EH_SJLJ_SETJMP/_LONGJMP supported here is NOT intended to support
+ // SjLj exception handling but a light-weight setjmp/longjmp replacement to
+ // support continuation, user-level threading, and etc.. As a result, no
+ // other SjLj exception interfaces are implemented and please don't build
+ // your own exception handling based on them.
+ // LLVM/Clang supports zero-cost DWARF exception handling.
+ setOperationAction(ISD::EH_SJLJ_SETJMP, MVT::i32, Custom);
+ setOperationAction(ISD::EH_SJLJ_LONGJMP, MVT::Other, Custom);
// We want to legalize GlobalAddress and ConstantPool nodes into the
// appropriate instructions to materialize the address.
@@ -285,15 +332,28 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
// We cannot do this with Promote because i64 is not a legal type.
setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
- // FIXME: disable this lowered code. This generates 64-bit register values,
- // and we don't model the fact that the top part is clobbered by calls. We
- // need to flag these together so that the value isn't live across a call.
- //setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
+ if (PPCSubTarget.hasLFIWAX() || Subtarget->isPPC64())
+ setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
} else {
// PowerPC does not have FP_TO_UINT on 32-bit implementations.
setOperationAction(ISD::FP_TO_UINT, MVT::i32, Expand);
}
+ // With the instructions enabled under FPCVT, we can do everything.
+ if (PPCSubTarget.hasFPCVT()) {
+ if (Subtarget->has64BitSupport()) {
+ setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
+ setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom);
+ setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
+ setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom);
+ }
+
+ setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
+ setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
+ setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
+ setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom);
+ }
+
if (Subtarget->use64BitRegs()) {
// 64-bit PowerPC implementations can support i64 types directly
addRegisterClass(MVT::i64, &PPC::G8RCRegClass);
@@ -347,6 +407,21 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
setOperationAction(ISD::UREM, VT, Expand);
setOperationAction(ISD::FDIV, VT, Expand);
setOperationAction(ISD::FNEG, VT, Expand);
+ setOperationAction(ISD::FSQRT, VT, Expand);
+ setOperationAction(ISD::FLOG, VT, Expand);
+ setOperationAction(ISD::FLOG10, VT, Expand);
+ setOperationAction(ISD::FLOG2, VT, Expand);
+ setOperationAction(ISD::FEXP, VT, Expand);
+ setOperationAction(ISD::FEXP2, VT, Expand);
+ setOperationAction(ISD::FSIN, VT, Expand);
+ setOperationAction(ISD::FCOS, VT, Expand);
+ setOperationAction(ISD::FABS, VT, Expand);
+ setOperationAction(ISD::FPOWI, VT, Expand);
+ setOperationAction(ISD::FFLOOR, VT, Expand);
+ setOperationAction(ISD::FCEIL, VT, Expand);
+ setOperationAction(ISD::FTRUNC, VT, Expand);
+ setOperationAction(ISD::FRINT, VT, Expand);
+ setOperationAction(ISD::FNEARBYINT, VT, Expand);
setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Expand);
setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Expand);
setOperationAction(ISD::BUILD_VECTOR, VT, Expand);
@@ -361,6 +436,7 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
setOperationAction(ISD::CTLZ_ZERO_UNDEF, VT, Expand);
setOperationAction(ISD::CTTZ, VT, Expand);
setOperationAction(ISD::CTTZ_ZERO_UNDEF, VT, Expand);
+ setOperationAction(ISD::VSELECT, VT, Expand);
setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Expand);
for (unsigned j = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
@@ -373,12 +449,6 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
setLoadExtAction(ISD::EXTLOAD, VT, Expand);
}
- for (unsigned i = (unsigned)MVT::FIRST_FP_VECTOR_VALUETYPE;
- i <= (unsigned)MVT::LAST_FP_VECTOR_VALUETYPE; ++i) {
- MVT::SimpleValueType VT = (MVT::SimpleValueType)i;
- setOperationAction(ISD::FSQRT, VT, Expand);
- }
-
// We can custom expand all VECTOR_SHUFFLEs to VPERM, others we can handle
// with merges, splats, etc.
setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v16i8, Custom);
@@ -393,6 +463,10 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
setOperationAction(ISD::FP_TO_UINT, MVT::v4i32, Legal);
setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Legal);
setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, Legal);
+ setOperationAction(ISD::FFLOOR, MVT::v4f32, Legal);
+ setOperationAction(ISD::FCEIL, MVT::v4f32, Legal);
+ setOperationAction(ISD::FTRUNC, MVT::v4f32, Legal);
+ setOperationAction(ISD::FNEARBYINT, MVT::v4f32, Legal);
addRegisterClass(MVT::v4f32, &PPC::VRRCRegClass);
addRegisterClass(MVT::v4i32, &PPC::VRRCRegClass);
@@ -401,6 +475,12 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
setOperationAction(ISD::MUL, MVT::v4f32, Legal);
setOperationAction(ISD::FMA, MVT::v4f32, Legal);
+
+ if (TM.Options.UnsafeFPMath) {
+ setOperationAction(ISD::FDIV, MVT::v4f32, Legal);
+ setOperationAction(ISD::FSQRT, MVT::v4f32, Legal);
+ }
+
setOperationAction(ISD::MUL, MVT::v4i32, Custom);
setOperationAction(ISD::MUL, MVT::v8i16, Custom);
setOperationAction(ISD::MUL, MVT::v16i8, Custom);
@@ -429,6 +509,8 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
setOperationAction(ISD::ATOMIC_LOAD, MVT::i32, Expand);
setOperationAction(ISD::ATOMIC_STORE, MVT::i32, Expand);
+ setOperationAction(ISD::ATOMIC_LOAD, MVT::i64, Expand);
+ setOperationAction(ISD::ATOMIC_STORE, MVT::i64, Expand);
setBooleanContents(ZeroOrOneBooleanContent);
setBooleanVectorContents(ZeroOrOneBooleanContent); // FIXME: Is this correct?
@@ -449,6 +531,12 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
setTargetDAGCombine(ISD::BR_CC);
setTargetDAGCombine(ISD::BSWAP);
+ // Use reciprocal estimates.
+ if (TM.Options.UnsafeFPMath) {
+ setTargetDAGCombine(ISD::FDIV);
+ setTargetDAGCombine(ISD::FSQRT);
+ }
+
// Darwin long double math library functions have $LDBL128 appended.
if (Subtarget->isDarwin()) {
setLibcallName(RTLIB::COS_PPCF128, "cosl$LDBL128");
@@ -482,15 +570,14 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
// friends. Gcc uses same threshold of 128 bytes (= 32 word stores).
if (Subtarget->getDarwinDirective() == PPC::DIR_E500mc ||
Subtarget->getDarwinDirective() == PPC::DIR_E5500) {
- maxStoresPerMemset = 32;
- maxStoresPerMemsetOptSize = 16;
- maxStoresPerMemcpy = 32;
- maxStoresPerMemcpyOptSize = 8;
- maxStoresPerMemmove = 32;
- maxStoresPerMemmoveOptSize = 8;
+ MaxStoresPerMemset = 32;
+ MaxStoresPerMemsetOptSize = 16;
+ MaxStoresPerMemcpy = 32;
+ MaxStoresPerMemcpyOptSize = 8;
+ MaxStoresPerMemmove = 32;
+ MaxStoresPerMemmoveOptSize = 8;
setPrefFunctionAlignment(4);
- benefitFromCodePlacementOpt = true;
}
}
@@ -521,6 +608,8 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
case PPCISD::FCFID: return "PPCISD::FCFID";
case PPCISD::FCTIDZ: return "PPCISD::FCTIDZ";
case PPCISD::FCTIWZ: return "PPCISD::FCTIWZ";
+ case PPCISD::FRE: return "PPCISD::FRE";
+ case PPCISD::FRSQRTE: return "PPCISD::FRSQRTE";
case PPCISD::STFIWX: return "PPCISD::STFIWX";
case PPCISD::VMADDFP: return "PPCISD::VMADDFP";
case PPCISD::VNMSUBFP: return "PPCISD::VNMSUBFP";
@@ -536,16 +625,13 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
case PPCISD::SRL: return "PPCISD::SRL";
case PPCISD::SRA: return "PPCISD::SRA";
case PPCISD::SHL: return "PPCISD::SHL";
- case PPCISD::EXTSW_32: return "PPCISD::EXTSW_32";
- case PPCISD::STD_32: return "PPCISD::STD_32";
- case PPCISD::CALL_SVR4: return "PPCISD::CALL_SVR4";
- case PPCISD::CALL_NOP_SVR4: return "PPCISD::CALL_NOP_SVR4";
- case PPCISD::CALL_Darwin: return "PPCISD::CALL_Darwin";
- case PPCISD::NOP: return "PPCISD::NOP";
+ case PPCISD::CALL: return "PPCISD::CALL";
+ case PPCISD::CALL_NOP: return "PPCISD::CALL_NOP";
case PPCISD::MTCTR: return "PPCISD::MTCTR";
- case PPCISD::BCTRL_Darwin: return "PPCISD::BCTRL_Darwin";
- case PPCISD::BCTRL_SVR4: return "PPCISD::BCTRL_SVR4";
+ case PPCISD::BCTRL: return "PPCISD::BCTRL";
case PPCISD::RET_FLAG: return "PPCISD::RET_FLAG";
+ case PPCISD::EH_SJLJ_SETJMP: return "PPCISD::EH_SJLJ_SETJMP";
+ case PPCISD::EH_SJLJ_LONGJMP: return "PPCISD::EH_SJLJ_LONGJMP";
case PPCISD::MFCR: return "PPCISD::MFCR";
case PPCISD::VCMP: return "PPCISD::VCMP";
case PPCISD::VCMPo: return "PPCISD::VCMPo";
@@ -555,13 +641,25 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
case PPCISD::STCX: return "PPCISD::STCX";
case PPCISD::COND_BRANCH: return "PPCISD::COND_BRANCH";
case PPCISD::MFFS: return "PPCISD::MFFS";
- case PPCISD::MTFSB0: return "PPCISD::MTFSB0";
- case PPCISD::MTFSB1: return "PPCISD::MTFSB1";
case PPCISD::FADDRTZ: return "PPCISD::FADDRTZ";
- case PPCISD::MTFSF: return "PPCISD::MTFSF";
case PPCISD::TC_RETURN: return "PPCISD::TC_RETURN";
case PPCISD::CR6SET: return "PPCISD::CR6SET";
case PPCISD::CR6UNSET: return "PPCISD::CR6UNSET";
+ case PPCISD::ADDIS_TOC_HA: return "PPCISD::ADDIS_TOC_HA";
+ case PPCISD::LD_TOC_L: return "PPCISD::LD_TOC_L";
+ case PPCISD::ADDI_TOC_L: return "PPCISD::ADDI_TOC_L";
+ case PPCISD::ADDIS_GOT_TPREL_HA: return "PPCISD::ADDIS_GOT_TPREL_HA";
+ case PPCISD::LD_GOT_TPREL_L: return "PPCISD::LD_GOT_TPREL_L";
+ case PPCISD::ADD_TLS: return "PPCISD::ADD_TLS";
+ case PPCISD::ADDIS_TLSGD_HA: return "PPCISD::ADDIS_TLSGD_HA";
+ case PPCISD::ADDI_TLSGD_L: return "PPCISD::ADDI_TLSGD_L";
+ case PPCISD::GET_TLS_ADDR: return "PPCISD::GET_TLS_ADDR";
+ case PPCISD::ADDIS_TLSLD_HA: return "PPCISD::ADDIS_TLSLD_HA";
+ case PPCISD::ADDI_TLSLD_L: return "PPCISD::ADDI_TLSLD_L";
+ case PPCISD::GET_TLSLD_ADDR: return "PPCISD::GET_TLSLD_ADDR";
+ case PPCISD::ADDIS_DTPREL_HA: return "PPCISD::ADDIS_DTPREL_HA";
+ case PPCISD::ADDI_DTPREL_L: return "PPCISD::ADDI_DTPREL_L";
+ case PPCISD::VADD_SPLAT: return "PPCISD::VADD_SPLAT";
}
}
@@ -995,7 +1093,7 @@ bool PPCTargetLowering::SelectAddressRegImm(SDValue N, SDValue &Disp,
short Imm;
if (isIntS16Immediate(CN, Imm)) {
Disp = DAG.getTargetConstant(Imm, CN->getValueType(0));
- Base = DAG.getRegister(PPCSubTarget.isPPC64() ? PPC::X0 : PPC::R0,
+ Base = DAG.getRegister(PPCSubTarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
CN->getValueType(0));
return true;
}
@@ -1044,7 +1142,7 @@ bool PPCTargetLowering::SelectAddressRegRegOnly(SDValue N, SDValue &Base,
}
// Otherwise, do it the hard way, using R0 as the base register.
- Base = DAG.getRegister(PPCSubTarget.isPPC64() ? PPC::X0 : PPC::R0,
+ Base = DAG.getRegister(PPCSubTarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
N.getValueType());
Index = N;
return true;
@@ -1107,7 +1205,7 @@ bool PPCTargetLowering::SelectAddressRegImmShift(SDValue N, SDValue &Disp,
short Imm;
if (isIntS16Immediate(CN, Imm)) {
Disp = DAG.getTargetConstant((unsigned short)Imm >> 2, getPointerTy());
- Base = DAG.getRegister(PPCSubTarget.isPPC64() ? PPC::X0 : PPC::R0,
+ Base = DAG.getRegister(PPCSubTarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
CN->getValueType(0));
return true;
}
@@ -1145,15 +1243,19 @@ bool PPCTargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base,
SelectionDAG &DAG) const {
if (DisablePPCPreinc) return false;
+ bool isLoad = true;
SDValue Ptr;
EVT VT;
+ unsigned Alignment;
if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
Ptr = LD->getBasePtr();
VT = LD->getMemoryVT();
-
+ Alignment = LD->getAlignment();
} else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
Ptr = ST->getBasePtr();
VT = ST->getMemoryVT();
+ Alignment = ST->getAlignment();
+ isLoad = false;
} else
return false;
@@ -1161,7 +1263,25 @@ bool PPCTargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base,
if (VT.isVector())
return false;
- if (SelectAddressRegReg(Ptr, Offset, Base, DAG)) {
+ if (SelectAddressRegReg(Ptr, Base, Offset, DAG)) {
+
+ // Common code will reject creating a pre-inc form if the base pointer
+ // is a frame index, or if N is a store and the base pointer is either
+ // the same as or a predecessor of the value being stored. Check for
+ // those situations here, and try with swapped Base/Offset instead.
+ bool Swap = false;
+
+ if (isa<FrameIndexSDNode>(Base) || isa<RegisterSDNode>(Base))
+ Swap = true;
+ else if (!isLoad) {
+ SDValue Val = cast<StoreSDNode>(N)->getValue();
+ if (Val == Base || Base.getNode()->isPredecessorOf(Val.getNode()))
+ Swap = true;
+ }
+
+ if (Swap)
+ std::swap(Base, Offset);
+
AM = ISD::PRE_INC;
return true;
}
@@ -1172,6 +1292,10 @@ bool PPCTargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base,
if (!SelectAddressRegImm(Ptr, Offset, Base, DAG))
return false;
} else {
+ // LDU/STU need an address with at least 4-byte alignment.
+ if (Alignment < 4)
+ return false;
+
// reg + imm * 4.
if (!SelectAddressRegImmShift(Ptr, Offset, Base, DAG))
return false;
@@ -1308,19 +1432,81 @@ SDValue PPCTargetLowering::LowerGlobalTLSAddress(SDValue Op,
EVT PtrVT = getPointerTy();
bool is64bit = PPCSubTarget.isPPC64();
- TLSModel::Model model = getTargetMachine().getTLSModel(GV);
+ TLSModel::Model Model = getTargetMachine().getTLSModel(GV);
+
+ if (Model == TLSModel::LocalExec) {
+ SDValue TGAHi = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
+ PPCII::MO_TPREL16_HA);
+ SDValue TGALo = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
+ PPCII::MO_TPREL16_LO);
+ SDValue TLSReg = DAG.getRegister(is64bit ? PPC::X13 : PPC::R2,
+ is64bit ? MVT::i64 : MVT::i32);
+ SDValue Hi = DAG.getNode(PPCISD::Hi, dl, PtrVT, TGAHi, TLSReg);
+ return DAG.getNode(PPCISD::Lo, dl, PtrVT, TGALo, Hi);
+ }
+
+ if (!is64bit)
+ llvm_unreachable("only local-exec is currently supported for ppc32");
+
+ if (Model == TLSModel::InitialExec) {
+ SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0);
+ SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64);
+ SDValue TPOffsetHi = DAG.getNode(PPCISD::ADDIS_GOT_TPREL_HA, dl,
+ PtrVT, GOTReg, TGA);
+ SDValue TPOffset = DAG.getNode(PPCISD::LD_GOT_TPREL_L, dl,
+ PtrVT, TGA, TPOffsetHi);
+ return DAG.getNode(PPCISD::ADD_TLS, dl, PtrVT, TPOffset, TGA);
+ }
+
+ if (Model == TLSModel::GeneralDynamic) {
+ SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0);
+ SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64);
+ SDValue GOTEntryHi = DAG.getNode(PPCISD::ADDIS_TLSGD_HA, dl, PtrVT,
+ GOTReg, TGA);
+ SDValue GOTEntry = DAG.getNode(PPCISD::ADDI_TLSGD_L, dl, PtrVT,
+ GOTEntryHi, TGA);
+
+ // We need a chain node, and don't have one handy. The underlying
+ // call has no side effects, so using the function entry node
+ // suffices.
+ SDValue Chain = DAG.getEntryNode();
+ Chain = DAG.getCopyToReg(Chain, dl, PPC::X3, GOTEntry);
+ SDValue ParmReg = DAG.getRegister(PPC::X3, MVT::i64);
+ SDValue TLSAddr = DAG.getNode(PPCISD::GET_TLS_ADDR, dl,
+ PtrVT, ParmReg, TGA);
+ // The return value from GET_TLS_ADDR really is in X3 already, but
+ // some hacks are needed here to tie everything together. The extra
+ // copies dissolve during subsequent transforms.
+ Chain = DAG.getCopyToReg(Chain, dl, PPC::X3, TLSAddr);
+ return DAG.getCopyFromReg(Chain, dl, PPC::X3, PtrVT);
+ }
- SDValue TGAHi = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
- PPCII::MO_TPREL16_HA);
- SDValue TGALo = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
- PPCII::MO_TPREL16_LO);
+ if (Model == TLSModel::LocalDynamic) {
+ SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0);
+ SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64);
+ SDValue GOTEntryHi = DAG.getNode(PPCISD::ADDIS_TLSLD_HA, dl, PtrVT,
+ GOTReg, TGA);
+ SDValue GOTEntry = DAG.getNode(PPCISD::ADDI_TLSLD_L, dl, PtrVT,
+ GOTEntryHi, TGA);
+
+ // We need a chain node, and don't have one handy. The underlying
+ // call has no side effects, so using the function entry node
+ // suffices.
+ SDValue Chain = DAG.getEntryNode();
+ Chain = DAG.getCopyToReg(Chain, dl, PPC::X3, GOTEntry);
+ SDValue ParmReg = DAG.getRegister(PPC::X3, MVT::i64);
+ SDValue TLSAddr = DAG.getNode(PPCISD::GET_TLSLD_ADDR, dl,
+ PtrVT, ParmReg, TGA);
+ // The return value from GET_TLSLD_ADDR really is in X3 already, but
+ // some hacks are needed here to tie everything together. The extra
+ // copies dissolve during subsequent transforms.
+ Chain = DAG.getCopyToReg(Chain, dl, PPC::X3, TLSAddr);
+ SDValue DtvOffsetHi = DAG.getNode(PPCISD::ADDIS_DTPREL_HA, dl, PtrVT,
+ Chain, ParmReg, TGA);
+ return DAG.getNode(PPCISD::ADDI_DTPREL_L, dl, PtrVT, DtvOffsetHi, TGA);
+ }
- if (model != TLSModel::LocalExec)
- llvm_unreachable("only local-exec TLS mode supported");
- SDValue TLSReg = DAG.getRegister(is64bit ? PPC::X13 : PPC::R2,
- is64bit ? MVT::i64 : MVT::i32);
- SDValue Hi = DAG.getNode(PPCISD::Hi, dl, PtrVT, TGAHi, TLSReg);
- return DAG.getNode(PPCISD::Lo, dl, PtrVT, TGALo, Hi);
+ llvm_unreachable("Unknown TLS model!");
}
SDValue PPCTargetLowering::LowerGlobalAddress(SDValue Op,
@@ -1654,18 +1840,18 @@ SDValue PPCTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG,
#include "PPCGenCallingConv.inc"
-static bool CC_PPC_SVR4_Custom_Dummy(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
- CCValAssign::LocInfo &LocInfo,
- ISD::ArgFlagsTy &ArgFlags,
- CCState &State) {
+static bool CC_PPC32_SVR4_Custom_Dummy(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
+ CCValAssign::LocInfo &LocInfo,
+ ISD::ArgFlagsTy &ArgFlags,
+ CCState &State) {
return true;
}
-static bool CC_PPC_SVR4_Custom_AlignArgRegs(unsigned &ValNo, MVT &ValVT,
- MVT &LocVT,
- CCValAssign::LocInfo &LocInfo,
- ISD::ArgFlagsTy &ArgFlags,
- CCState &State) {
+static bool CC_PPC32_SVR4_Custom_AlignArgRegs(unsigned &ValNo, MVT &ValVT,
+ MVT &LocVT,
+ CCValAssign::LocInfo &LocInfo,
+ ISD::ArgFlagsTy &ArgFlags,
+ CCState &State) {
static const uint16_t ArgRegs[] = {
PPC::R3, PPC::R4, PPC::R5, PPC::R6,
PPC::R7, PPC::R8, PPC::R9, PPC::R10,
@@ -1688,11 +1874,11 @@ static bool CC_PPC_SVR4_Custom_AlignArgRegs(unsigned &ValNo, MVT &ValVT,
return false;
}
-static bool CC_PPC_SVR4_Custom_AlignFPArgRegs(unsigned &ValNo, MVT &ValVT,
- MVT &LocVT,
- CCValAssign::LocInfo &LocInfo,
- ISD::ArgFlagsTy &ArgFlags,
- CCState &State) {
+static bool CC_PPC32_SVR4_Custom_AlignFPArgRegs(unsigned &ValNo, MVT &ValVT,
+ MVT &LocVT,
+ CCValAssign::LocInfo &LocInfo,
+ ISD::ArgFlagsTy &ArgFlags,
+ CCState &State) {
static const uint16_t ArgRegs[] = {
PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7,
PPC::F8
@@ -1815,7 +2001,7 @@ PPCTargetLowering::LowerFormalArguments_32SVR4(
// Reserve space for the linkage area on the stack.
CCInfo.AllocateStack(PPCFrameLowering::getLinkageSize(false, false), PtrByteSize);
- CCInfo.AnalyzeFormalArguments(Ins, CC_PPC_SVR4);
+ CCInfo.AnalyzeFormalArguments(Ins, CC_PPC32_SVR4);
for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
CCValAssign &VA = ArgLocs[i];
@@ -1876,7 +2062,7 @@ PPCTargetLowering::LowerFormalArguments_32SVR4(
// Reserve stack space for the allocations in CCInfo.
CCByValInfo.AllocateStack(CCInfo.getNextStackOffset(), PtrByteSize);
- CCByValInfo.AnalyzeFormalArguments(Ins, CC_PPC_SVR4_ByVal);
+ CCByValInfo.AnalyzeFormalArguments(Ins, CC_PPC32_SVR4_ByVal);
// Area that is at least reserved in the caller of this function.
unsigned MinReservedArea = CCByValInfo.getNextStackOffset();
@@ -2068,13 +2254,16 @@ PPCTargetLowering::LowerFormalArguments_64SVR4(
SmallVector<SDValue, 8> MemOps;
unsigned nAltivecParamsAtEnd = 0;
Function::const_arg_iterator FuncArg = MF.getFunction()->arg_begin();
- for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e; ++ArgNo, ++FuncArg) {
+ unsigned CurArgIdx = 0;
+ for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e; ++ArgNo) {
SDValue ArgVal;
bool needsLoad = false;
EVT ObjectVT = Ins[ArgNo].VT;
unsigned ObjSize = ObjectVT.getSizeInBits()/8;
unsigned ArgSize = ObjSize;
ISD::ArgFlagsTy Flags = Ins[ArgNo].Flags;
+ std::advance(FuncArg, Ins[ArgNo].OrigArgIndex - CurArgIdx);
+ CurArgIdx = Ins[ArgNo].OrigArgIndex;
unsigned CurArgOffset = ArgOffset;
@@ -2409,6 +2598,9 @@ PPCTargetLowering::LowerFormalArguments_Darwin(
SmallVector<SDValue, 8> MemOps;
unsigned nAltivecParamsAtEnd = 0;
+ // FIXME: FuncArg and Ins[ArgNo] must reference the same argument.
+ // When passing anonymous aggregates, this is currently not true.
+ // See LowerFormalArguments_64SVR4 for a fix.
Function::const_arg_iterator FuncArg = MF.getFunction()->arg_begin();
for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e; ++ArgNo, ++FuncArg) {
SDValue ArgVal;
@@ -2995,7 +3187,7 @@ unsigned PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag,
NodeTys.push_back(MVT::Other); // Returns a chain
NodeTys.push_back(MVT::Glue); // Returns a flag for retval copy to use.
- unsigned CallOpc = isSVR4ABI ? PPCISD::CALL_SVR4 : PPCISD::CALL_Darwin;
+ unsigned CallOpc = PPCISD::CALL;
bool needIndirectCall = true;
if (SDNode *Dest = isBLACompatibleAddress(Callee, DAG)) {
@@ -3128,8 +3320,11 @@ unsigned PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag,
NodeTys.push_back(MVT::Other);
NodeTys.push_back(MVT::Glue);
Ops.push_back(Chain);
- CallOpc = isSVR4ABI ? PPCISD::BCTRL_SVR4 : PPCISD::BCTRL_Darwin;
+ CallOpc = PPCISD::BCTRL;
Callee.setNode(0);
+ // Add use of X11 (holding environment pointer)
+ if (isSVR4ABI && isPPC64)
+ Ops.push_back(DAG.getRegister(PPC::X11, PtrVT));
// Add CTR register as callee so a bctr can be emitted later.
if (isTailCall)
Ops.push_back(DAG.getRegister(isPPC64 ? PPC::CTR8 : PPC::CTR, PtrVT));
@@ -3231,7 +3426,7 @@ PPCTargetLowering::FinishCall(CallingConv::ID CallConv, DebugLoc dl,
// When performing tail call optimization the callee pops its arguments off
// the stack. Account for this here so these bytes can be pushed back on in
- // PPCRegisterInfo::eliminateCallFramePseudoInstr.
+ // PPCFrameLowering::eliminateCallFramePseudoInstr.
int BytesCalleePops =
(CallConv == CallingConv::Fast &&
getTargetMachine().Options.GuaranteedTailCallOpt) ? NumBytes : 0;
@@ -3247,17 +3442,6 @@ PPCTargetLowering::FinishCall(CallingConv::ID CallConv, DebugLoc dl,
// Emit tail call.
if (isTailCall) {
- // If this is the first return lowered for this function, add the regs
- // to the liveout set for the function.
- if (DAG.getMachineFunction().getRegInfo().liveout_empty()) {
- SmallVector<CCValAssign, 16> RVLocs;
- CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
- getTargetMachine(), RVLocs, *DAG.getContext());
- CCInfo.AnalyzeCallResult(Ins, RetCC_PPC);
- for (unsigned i = 0; i != RVLocs.size(); ++i)
- DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg());
- }
-
assert(((Callee.getOpcode() == ISD::Register &&
cast<RegisterSDNode>(Callee)->getReg() == PPC::CTR) ||
Callee.getOpcode() == ISD::TargetExternalSymbol ||
@@ -3279,7 +3463,7 @@ PPCTargetLowering::FinishCall(CallingConv::ID CallConv, DebugLoc dl,
bool needsTOCRestore = false;
if (!isTailCall && PPCSubTarget.isSVR4ABI()&& PPCSubTarget.isPPC64()) {
- if (CallOpc == PPCISD::BCTRL_SVR4) {
+ if (CallOpc == PPCISD::BCTRL) {
// This is a call through a function pointer.
// Restore the caller TOC from the save area into R2.
// See PrepareCall() for more information about calls through function
@@ -3290,9 +3474,9 @@ PPCTargetLowering::FinishCall(CallingConv::ID CallConv, DebugLoc dl,
// from allocating it), resulting in an additional register being
// allocated and an unnecessary move instruction being generated.
needsTOCRestore = true;
- } else if ((CallOpc == PPCISD::CALL_SVR4) && !isLocalCall(Callee)) {
+ } else if ((CallOpc == PPCISD::CALL) && !isLocalCall(Callee)) {
// Otherwise insert NOP for non-local calls.
- CallOpc = PPCISD::CALL_NOP_SVR4;
+ CallOpc = PPCISD::CALL_NOP;
}
}
@@ -3401,11 +3585,11 @@ PPCTargetLowering::LowerCall_32SVR4(SDValue Chain, SDValue Callee,
bool Result;
if (Outs[i].IsFixed) {
- Result = CC_PPC_SVR4(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags,
- CCInfo);
+ Result = CC_PPC32_SVR4(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags,
+ CCInfo);
} else {
- Result = CC_PPC_SVR4_VarArg(i, ArgVT, ArgVT, CCValAssign::Full,
- ArgFlags, CCInfo);
+ Result = CC_PPC32_SVR4_VarArg(i, ArgVT, ArgVT, CCValAssign::Full,
+ ArgFlags, CCInfo);
}
if (Result) {
@@ -3418,7 +3602,7 @@ PPCTargetLowering::LowerCall_32SVR4(SDValue Chain, SDValue Callee,
}
} else {
// All arguments are treated the same.
- CCInfo.AnalyzeCallOperands(Outs, CC_PPC_SVR4);
+ CCInfo.AnalyzeCallOperands(Outs, CC_PPC32_SVR4);
}
// Assign locations to all of the outgoing aggregate by value arguments.
@@ -3429,7 +3613,7 @@ PPCTargetLowering::LowerCall_32SVR4(SDValue Chain, SDValue Callee,
// Reserve stack space for the allocations in CCInfo.
CCByValInfo.AllocateStack(CCInfo.getNextStackOffset(), PtrByteSize);
- CCByValInfo.AnalyzeCallOperands(Outs, CC_PPC_SVR4_ByVal);
+ CCByValInfo.AnalyzeCallOperands(Outs, CC_PPC32_SVR4_ByVal);
// Size of the linkage area, parameter list area and the part of the local
// space variable where copies of aggregates which are passed by value are
@@ -4323,14 +4507,8 @@ PPCTargetLowering::LowerReturn(SDValue Chain,
getTargetMachine(), RVLocs, *DAG.getContext());
CCInfo.AnalyzeReturn(Outs, RetCC_PPC);
- // If this is the first return lowered for this function, add the regs to the
- // liveout set for the function.
- if (DAG.getMachineFunction().getRegInfo().liveout_empty()) {
- for (unsigned i = 0; i != RVLocs.size(); ++i)
- DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg());
- }
-
SDValue Flag;
+ SmallVector<SDValue, 4> RetOps(1, Chain);
// Copy the result values into the output registers.
for (unsigned i = 0; i != RVLocs.size(); ++i) {
@@ -4355,12 +4533,17 @@ PPCTargetLowering::LowerReturn(SDValue Chain,
Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), Arg, Flag);
Flag = Chain.getValue(1);
+ RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
}
+ RetOps[0] = Chain; // Update chain.
+
+ // Add the flag if we have it.
if (Flag.getNode())
- return DAG.getNode(PPCISD::RET_FLAG, dl, MVT::Other, Chain, Flag);
- else
- return DAG.getNode(PPCISD::RET_FLAG, dl, MVT::Other, Chain);
+ RetOps.push_back(Flag);
+
+ return DAG.getNode(PPCISD::RET_FLAG, dl, MVT::Other,
+ &RetOps[0], RetOps.size());
}
SDValue PPCTargetLowering::LowerSTACKRESTORE(SDValue Op, SelectionDAG &DAG,
@@ -4466,6 +4649,21 @@ SDValue PPCTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
return DAG.getNode(PPCISD::DYNALLOC, dl, VTs, Ops, 3);
}
+SDValue PPCTargetLowering::lowerEH_SJLJ_SETJMP(SDValue Op,
+ SelectionDAG &DAG) const {
+ DebugLoc DL = Op.getDebugLoc();
+ return DAG.getNode(PPCISD::EH_SJLJ_SETJMP, DL,
+ DAG.getVTList(MVT::i32, MVT::Other),
+ Op.getOperand(0), Op.getOperand(1));
+}
+
+SDValue PPCTargetLowering::lowerEH_SJLJ_LONGJMP(SDValue Op,
+ SelectionDAG &DAG) const {
+ DebugLoc DL = Op.getDebugLoc();
+ return DAG.getNode(PPCISD::EH_SJLJ_LONGJMP, DL, MVT::Other,
+ Op.getOperand(0), Op.getOperand(1));
+}
+
/// LowerSELECT_CC - Lower floating point select_cc's into fsel instruction when
/// possible.
SDValue PPCTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
@@ -4553,37 +4751,72 @@ SDValue PPCTargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG,
default: llvm_unreachable("Unhandled FP_TO_INT type in custom expander!");
case MVT::i32:
Tmp = DAG.getNode(Op.getOpcode()==ISD::FP_TO_SINT ? PPCISD::FCTIWZ :
- PPCISD::FCTIDZ,
+ (PPCSubTarget.hasFPCVT() ? PPCISD::FCTIWUZ :
+ PPCISD::FCTIDZ),
dl, MVT::f64, Src);
break;
case MVT::i64:
- Tmp = DAG.getNode(PPCISD::FCTIDZ, dl, MVT::f64, Src);
+ assert((Op.getOpcode() == ISD::FP_TO_SINT || PPCSubTarget.hasFPCVT()) &&
+ "i64 FP_TO_UINT is supported only with FPCVT");
+ Tmp = DAG.getNode(Op.getOpcode()==ISD::FP_TO_SINT ? PPCISD::FCTIDZ :
+ PPCISD::FCTIDUZ,
+ dl, MVT::f64, Src);
break;
}
// Convert the FP value to an int value through memory.
- SDValue FIPtr = DAG.CreateStackTemporary(MVT::f64);
+ bool i32Stack = Op.getValueType() == MVT::i32 && PPCSubTarget.hasSTFIWX() &&
+ (Op.getOpcode() == ISD::FP_TO_SINT || PPCSubTarget.hasFPCVT());
+ SDValue FIPtr = DAG.CreateStackTemporary(i32Stack ? MVT::i32 : MVT::f64);
+ int FI = cast<FrameIndexSDNode>(FIPtr)->getIndex();
+ MachinePointerInfo MPI = MachinePointerInfo::getFixedStack(FI);
// Emit a store to the stack slot.
- SDValue Chain = DAG.getStore(DAG.getEntryNode(), dl, Tmp, FIPtr,
- MachinePointerInfo(), false, false, 0);
+ SDValue Chain;
+ if (i32Stack) {
+ MachineFunction &MF = DAG.getMachineFunction();
+ MachineMemOperand *MMO =
+ MF.getMachineMemOperand(MPI, MachineMemOperand::MOStore, 4, 4);
+ SDValue Ops[] = { DAG.getEntryNode(), Tmp, FIPtr };
+ Chain = DAG.getMemIntrinsicNode(PPCISD::STFIWX, dl,
+ DAG.getVTList(MVT::Other), Ops, array_lengthof(Ops),
+ MVT::i32, MMO);
+ } else
+ Chain = DAG.getStore(DAG.getEntryNode(), dl, Tmp, FIPtr,
+ MPI, false, false, 0);
// Result is a load from the stack slot. If loading 4 bytes, make sure to
// add in a bias.
- if (Op.getValueType() == MVT::i32)
+ if (Op.getValueType() == MVT::i32 && !i32Stack) {
FIPtr = DAG.getNode(ISD::ADD, dl, FIPtr.getValueType(), FIPtr,
DAG.getConstant(4, FIPtr.getValueType()));
- return DAG.getLoad(Op.getValueType(), dl, Chain, FIPtr, MachinePointerInfo(),
+ MPI = MachinePointerInfo();
+ }
+
+ return DAG.getLoad(Op.getValueType(), dl, Chain, FIPtr, MPI,
false, false, false, 0);
}
-SDValue PPCTargetLowering::LowerSINT_TO_FP(SDValue Op,
+SDValue PPCTargetLowering::LowerINT_TO_FP(SDValue Op,
SelectionDAG &DAG) const {
DebugLoc dl = Op.getDebugLoc();
// Don't handle ppc_fp128 here; let it be lowered to a libcall.
if (Op.getValueType() != MVT::f32 && Op.getValueType() != MVT::f64)
return SDValue();
+ assert((Op.getOpcode() == ISD::SINT_TO_FP || PPCSubTarget.hasFPCVT()) &&
+ "UINT_TO_FP is supported only with FPCVT");
+
+ // If we have FCFIDS, then use it when converting to single-precision.
+ // Otherwise, convert to double-precision and then round.
+ unsigned FCFOp = (PPCSubTarget.hasFPCVT() && Op.getValueType() == MVT::f32) ?
+ (Op.getOpcode() == ISD::UINT_TO_FP ?
+ PPCISD::FCFIDUS : PPCISD::FCFIDS) :
+ (Op.getOpcode() == ISD::UINT_TO_FP ?
+ PPCISD::FCFIDU : PPCISD::FCFID);
+ MVT FCFTy = (PPCSubTarget.hasFPCVT() && Op.getValueType() == MVT::f32) ?
+ MVT::f32 : MVT::f64;
+
if (Op.getOperand(0).getValueType() == MVT::i64) {
SDValue SINT = Op.getOperand(0);
// When converting to single-precision, we actually need to convert
@@ -4597,6 +4830,7 @@ SDValue PPCTargetLowering::LowerSINT_TO_FP(SDValue Op,
// However, if -enable-unsafe-fp-math is in effect, accept double
// rounding to avoid the extra overhead.
if (Op.getValueType() == MVT::f32 &&
+ !PPCSubTarget.hasFPCVT() &&
!DAG.getTarget().Options.UnsafeFPMath) {
// Twiddle input to make sure the low 11 bits are zero. (If this
@@ -4630,44 +4864,69 @@ SDValue PPCTargetLowering::LowerSINT_TO_FP(SDValue Op,
SINT = DAG.getNode(ISD::SELECT, dl, MVT::i64, Cond, Round, SINT);
}
+
SDValue Bits = DAG.getNode(ISD::BITCAST, dl, MVT::f64, SINT);
- SDValue FP = DAG.getNode(PPCISD::FCFID, dl, MVT::f64, Bits);
- if (Op.getValueType() == MVT::f32)
+ SDValue FP = DAG.getNode(FCFOp, dl, FCFTy, Bits);
+
+ if (Op.getValueType() == MVT::f32 && !PPCSubTarget.hasFPCVT())
FP = DAG.getNode(ISD::FP_ROUND, dl,
MVT::f32, FP, DAG.getIntPtrConstant(0));
return FP;
}
assert(Op.getOperand(0).getValueType() == MVT::i32 &&
- "Unhandled SINT_TO_FP type in custom expander!");
+ "Unhandled INT_TO_FP type in custom expander!");
// Since we only generate this in 64-bit mode, we can take advantage of
// 64-bit registers. In particular, sign extend the input value into the
// 64-bit register with extsw, store the WHOLE 64-bit value into the stack
// then lfd it and fcfid it.
MachineFunction &MF = DAG.getMachineFunction();
MachineFrameInfo *FrameInfo = MF.getFrameInfo();
- int FrameIdx = FrameInfo->CreateStackObject(8, 8, false);
EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
- SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
- SDValue Ext64 = DAG.getNode(PPCISD::EXTSW_32, dl, MVT::i32,
+ SDValue Ld;
+ if (PPCSubTarget.hasLFIWAX() || PPCSubTarget.hasFPCVT()) {
+ int FrameIdx = FrameInfo->CreateStackObject(4, 4, false);
+ SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
+
+ SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Op.getOperand(0), FIdx,
+ MachinePointerInfo::getFixedStack(FrameIdx),
+ false, false, 0);
+
+ assert(cast<StoreSDNode>(Store)->getMemoryVT() == MVT::i32 &&
+ "Expected an i32 store");
+ MachineMemOperand *MMO =
+ MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(FrameIdx),
+ MachineMemOperand::MOLoad, 4, 4);
+ SDValue Ops[] = { Store, FIdx };
+ Ld = DAG.getMemIntrinsicNode(Op.getOpcode() == ISD::UINT_TO_FP ?
+ PPCISD::LFIWZX : PPCISD::LFIWAX,
+ dl, DAG.getVTList(MVT::f64, MVT::Other),
+ Ops, 2, MVT::i32, MMO);
+ } else {
+ assert(PPCSubTarget.isPPC64() &&
+ "i32->FP without LFIWAX supported only on PPC64");
+
+ int FrameIdx = FrameInfo->CreateStackObject(8, 8, false);
+ SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
+
+ SDValue Ext64 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i64,
Op.getOperand(0));
- // STD the extended value into the stack slot.
- MachineMemOperand *MMO =
- MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(FrameIdx),
- MachineMemOperand::MOStore, 8, 8);
- SDValue Ops[] = { DAG.getEntryNode(), Ext64, FIdx };
- SDValue Store =
- DAG.getMemIntrinsicNode(PPCISD::STD_32, dl, DAG.getVTList(MVT::Other),
- Ops, 4, MVT::i64, MMO);
- // Load the value as a double.
- SDValue Ld = DAG.getLoad(MVT::f64, dl, Store, FIdx, MachinePointerInfo(),
- false, false, false, 0);
+ // STD the extended value into the stack slot.
+ SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Ext64, FIdx,
+ MachinePointerInfo::getFixedStack(FrameIdx),
+ false, false, 0);
+
+ // Load the value as a double.
+ Ld = DAG.getLoad(MVT::f64, dl, Store, FIdx,
+ MachinePointerInfo::getFixedStack(FrameIdx),
+ false, false, false, 0);
+ }
// FCFID it and return it.
- SDValue FP = DAG.getNode(PPCISD::FCFID, dl, MVT::f64, Ld);
- if (Op.getValueType() == MVT::f32)
+ SDValue FP = DAG.getNode(FCFOp, dl, FCFTy, Ld);
+ if (Op.getValueType() == MVT::f32 && !PPCSubTarget.hasFPCVT())
FP = DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, FP, DAG.getIntPtrConstant(0));
return FP;
}
@@ -4697,12 +4956,13 @@ SDValue PPCTargetLowering::LowerFLT_ROUNDS_(SDValue Op,
MachineFunction &MF = DAG.getMachineFunction();
EVT VT = Op.getValueType();
EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
- std::vector<EVT> NodeTys;
SDValue MFFSreg, InFlag;
// Save FP Control Word to register
- NodeTys.push_back(MVT::f64); // return register
- NodeTys.push_back(MVT::Glue); // unused in this context
+ EVT NodeTys[] = {
+ MVT::f64, // return register
+ MVT::Glue // unused in this context
+ };
SDValue Chain = DAG.getNode(PPCISD::MFFS, dl, NodeTys, &InFlag, 0);
// Save FP register to stack slot
@@ -4936,11 +5196,21 @@ SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op,
// Two instruction sequences.
// If this value is in the range [-32,30] and is even, use:
- // tmp = VSPLTI[bhw], result = add tmp, tmp
- if (SextVal >= -32 && SextVal <= 30 && (SextVal & 1) == 0) {
- SDValue Res = BuildSplatI(SextVal >> 1, SplatSize, MVT::Other, DAG, dl);
- Res = DAG.getNode(ISD::ADD, dl, Res.getValueType(), Res, Res);
- return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
+ // VSPLTI[bhw](val/2) + VSPLTI[bhw](val/2)
+ // If this value is in the range [17,31] and is odd, use:
+ // VSPLTI[bhw](val-16) - VSPLTI[bhw](-16)
+ // If this value is in the range [-31,-17] and is odd, use:
+ // VSPLTI[bhw](val+16) + VSPLTI[bhw](-16)
+ // Note the last two are three-instruction sequences.
+ if (SextVal >= -32 && SextVal <= 31) {
+ // To avoid having these optimizations undone by constant folding,
+ // we convert to a pseudo that will be expanded later into one of
+ // the above forms.
+ SDValue Elt = DAG.getConstant(SextVal, MVT::i32);
+ EVT VT = Op.getValueType();
+ int Size = VT == MVT::v16i8 ? 1 : (VT == MVT::v8i16 ? 2 : 4);
+ SDValue EltSize = DAG.getConstant(Size, MVT::i32);
+ return DAG.getNode(PPCISD::VADD_SPLAT, dl, VT, Elt, EltSize);
}
// If this is 0x8000_0000 x 4, turn into vspltisw + vslw. If it is
@@ -5036,23 +5306,6 @@ SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op,
}
}
- // Three instruction sequences.
-
- // Odd, in range [17,31]: (vsplti C)-(vsplti -16).
- if (SextVal >= 0 && SextVal <= 31) {
- SDValue LHS = BuildSplatI(SextVal-16, SplatSize, MVT::Other, DAG, dl);
- SDValue RHS = BuildSplatI(-16, SplatSize, MVT::Other, DAG, dl);
- LHS = DAG.getNode(ISD::SUB, dl, LHS.getValueType(), LHS, RHS);
- return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), LHS);
- }
- // Odd, in range [-31,-17]: (vsplti C)+(vsplti -16).
- if (SextVal >= -31 && SextVal <= 0) {
- SDValue LHS = BuildSplatI(SextVal+16, SplatSize, MVT::Other, DAG, dl);
- SDValue RHS = BuildSplatI(-16, SplatSize, MVT::Other, DAG, dl);
- LHS = DAG.getNode(ISD::ADD, dl, LHS.getValueType(), LHS, RHS);
- return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), LHS);
- }
-
return SDValue();
}
@@ -5326,9 +5579,7 @@ SDValue PPCTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
Op.getOperand(3), // RHS
DAG.getConstant(CompareOpc, MVT::i32)
};
- std::vector<EVT> VTs;
- VTs.push_back(Op.getOperand(2).getValueType());
- VTs.push_back(MVT::Glue);
+ EVT VTs[] = { Op.getOperand(2).getValueType(), MVT::Glue };
SDValue CompNode = DAG.getNode(PPCISD::VCMPo, dl, VTs, Ops, 3);
// Now that we have the comparison, emit a copy from the CR to a GPR.
@@ -5470,11 +5721,15 @@ SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::DYNAMIC_STACKALLOC:
return LowerDYNAMIC_STACKALLOC(Op, DAG, PPCSubTarget);
+ case ISD::EH_SJLJ_SETJMP: return lowerEH_SJLJ_SETJMP(Op, DAG);
+ case ISD::EH_SJLJ_LONGJMP: return lowerEH_SJLJ_LONGJMP(Op, DAG);
+
case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
case ISD::FP_TO_UINT:
case ISD::FP_TO_SINT: return LowerFP_TO_INT(Op, DAG,
Op.getDebugLoc());
- case ISD::SINT_TO_FP: return LowerSINT_TO_FP(Op, DAG);
+ case ISD::UINT_TO_FP:
+ case ISD::SINT_TO_FP: return LowerINT_TO_FP(Op, DAG);
case ISD::FLT_ROUNDS_: return LowerFLT_ROUNDS_(Op, DAG);
// Lower 64-bit shifts.
@@ -5528,50 +5783,8 @@ void PPCTargetLowering::ReplaceNodeResults(SDNode *N,
MVT::f64, N->getOperand(0),
DAG.getIntPtrConstant(1));
- // This sequence changes FPSCR to do round-to-zero, adds the two halves
- // of the long double, and puts FPSCR back the way it was. We do not
- // actually model FPSCR.
- std::vector<EVT> NodeTys;
- SDValue Ops[4], Result, MFFSreg, InFlag, FPreg;
-
- NodeTys.push_back(MVT::f64); // Return register
- NodeTys.push_back(MVT::Glue); // Returns a flag for later insns
- Result = DAG.getNode(PPCISD::MFFS, dl, NodeTys, &InFlag, 0);
- MFFSreg = Result.getValue(0);
- InFlag = Result.getValue(1);
-
- NodeTys.clear();
- NodeTys.push_back(MVT::Glue); // Returns a flag
- Ops[0] = DAG.getConstant(31, MVT::i32);
- Ops[1] = InFlag;
- Result = DAG.getNode(PPCISD::MTFSB1, dl, NodeTys, Ops, 2);
- InFlag = Result.getValue(0);
-
- NodeTys.clear();
- NodeTys.push_back(MVT::Glue); // Returns a flag
- Ops[0] = DAG.getConstant(30, MVT::i32);
- Ops[1] = InFlag;
- Result = DAG.getNode(PPCISD::MTFSB0, dl, NodeTys, Ops, 2);
- InFlag = Result.getValue(0);
-
- NodeTys.clear();
- NodeTys.push_back(MVT::f64); // result of add
- NodeTys.push_back(MVT::Glue); // Returns a flag
- Ops[0] = Lo;
- Ops[1] = Hi;
- Ops[2] = InFlag;
- Result = DAG.getNode(PPCISD::FADDRTZ, dl, NodeTys, Ops, 3);
- FPreg = Result.getValue(0);
- InFlag = Result.getValue(1);
-
- NodeTys.clear();
- NodeTys.push_back(MVT::f64);
- Ops[0] = DAG.getConstant(1, MVT::i32);
- Ops[1] = MFFSreg;
- Ops[2] = FPreg;
- Ops[3] = InFlag;
- Result = DAG.getNode(PPCISD::MTFSF, dl, NodeTys, Ops, 4);
- FPreg = Result.getValue(0);
+ // Add the two halves of the long double in round-to-zero mode.
+ SDValue FPreg = DAG.getNode(PPCISD::FADDRTZ, dl, MVT::f64, Lo, Hi);
// We know the low half is about to be thrown away, so just use something
// convenient.
@@ -5663,7 +5876,7 @@ PPCTargetLowering::EmitPartwordAtomicBinary(MachineInstr *MI,
// registers without caring whether they're 32 or 64, but here we're
// doing actual arithmetic on the addresses.
bool is64bit = PPCSubTarget.isPPC64();
- unsigned ZeroReg = is64bit ? PPC::X0 : PPC::R0;
+ unsigned ZeroReg = is64bit ? PPC::ZERO8 : PPC::ZERO;
const BasicBlock *LLVM_BB = BB->getBasicBlock();
MachineFunction *F = BB->getParent();
@@ -5767,7 +5980,7 @@ PPCTargetLowering::EmitPartwordAtomicBinary(MachineInstr *MI,
.addReg(TmpReg).addReg(MaskReg);
BuildMI(BB, dl, TII->get(is64bit ? PPC::OR8 : PPC::OR), Tmp4Reg)
.addReg(Tmp3Reg).addReg(Tmp2Reg);
- BuildMI(BB, dl, TII->get(is64bit ? PPC::STDCX : PPC::STWCX))
+ BuildMI(BB, dl, TII->get(PPC::STWCX))
.addReg(Tmp4Reg).addReg(ZeroReg).addReg(PtrReg);
BuildMI(BB, dl, TII->get(PPC::BCC))
.addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(loopMBB);
@@ -5782,9 +5995,238 @@ PPCTargetLowering::EmitPartwordAtomicBinary(MachineInstr *MI,
return BB;
}
+llvm::MachineBasicBlock*
+PPCTargetLowering::emitEHSjLjSetJmp(MachineInstr *MI,
+ MachineBasicBlock *MBB) const {
+ DebugLoc DL = MI->getDebugLoc();
+ const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+
+ MachineFunction *MF = MBB->getParent();
+ MachineRegisterInfo &MRI = MF->getRegInfo();
+
+ const BasicBlock *BB = MBB->getBasicBlock();
+ MachineFunction::iterator I = MBB;
+ ++I;
+
+ // Memory Reference
+ MachineInstr::mmo_iterator MMOBegin = MI->memoperands_begin();
+ MachineInstr::mmo_iterator MMOEnd = MI->memoperands_end();
+
+ unsigned DstReg = MI->getOperand(0).getReg();
+ const TargetRegisterClass *RC = MRI.getRegClass(DstReg);
+ assert(RC->hasType(MVT::i32) && "Invalid destination!");
+ unsigned mainDstReg = MRI.createVirtualRegister(RC);
+ unsigned restoreDstReg = MRI.createVirtualRegister(RC);
+
+ MVT PVT = getPointerTy();
+ assert((PVT == MVT::i64 || PVT == MVT::i32) &&
+ "Invalid Pointer Size!");
+ // For v = setjmp(buf), we generate
+ //
+ // thisMBB:
+ // SjLjSetup mainMBB
+ // bl mainMBB
+ // v_restore = 1
+ // b sinkMBB
+ //
+ // mainMBB:
+ // buf[LabelOffset] = LR
+ // v_main = 0
+ //
+ // sinkMBB:
+ // v = phi(main, restore)
+ //
+
+ MachineBasicBlock *thisMBB = MBB;
+ MachineBasicBlock *mainMBB = MF->CreateMachineBasicBlock(BB);
+ MachineBasicBlock *sinkMBB = MF->CreateMachineBasicBlock(BB);
+ MF->insert(I, mainMBB);
+ MF->insert(I, sinkMBB);
+
+ MachineInstrBuilder MIB;
+
+ // Transfer the remainder of BB and its successor edges to sinkMBB.
+ sinkMBB->splice(sinkMBB->begin(), MBB,
+ llvm::next(MachineBasicBlock::iterator(MI)), MBB->end());
+ sinkMBB->transferSuccessorsAndUpdatePHIs(MBB);
+
+ // Note that the structure of the jmp_buf used here is not compatible
+ // with that used by libc, and is not designed to be. Specifically, it
+ // stores only those 'reserved' registers that LLVM does not otherwise
+ // understand how to spill. Also, by convention, by the time this
+ // intrinsic is called, Clang has already stored the frame address in the
+ // first slot of the buffer and stack address in the third. Following the
+ // X86 target code, we'll store the jump address in the second slot. We also
+ // need to save the TOC pointer (R2) to handle jumps between shared
+ // libraries, and that will be stored in the fourth slot. The thread
+ // identifier (R13) is not affected.
+
+ // thisMBB:
+ const int64_t LabelOffset = 1 * PVT.getStoreSize();
+ const int64_t TOCOffset = 3 * PVT.getStoreSize();
+
+ // Prepare IP either in reg.
+ const TargetRegisterClass *PtrRC = getRegClassFor(PVT);
+ unsigned LabelReg = MRI.createVirtualRegister(PtrRC);
+ unsigned BufReg = MI->getOperand(1).getReg();
+
+ if (PPCSubTarget.isPPC64() && PPCSubTarget.isSVR4ABI()) {
+ MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::STD))
+ .addReg(PPC::X2)
+ .addImm(TOCOffset / 4)
+ .addReg(BufReg);
+
+ MIB.setMemRefs(MMOBegin, MMOEnd);
+ }
+
+ // Setup
+ MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::BCLalways)).addMBB(mainMBB);
+ MIB.addRegMask(PPCRegInfo->getNoPreservedMask());
+
+ BuildMI(*thisMBB, MI, DL, TII->get(PPC::LI), restoreDstReg).addImm(1);
+
+ MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::EH_SjLj_Setup))
+ .addMBB(mainMBB);
+ MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::B)).addMBB(sinkMBB);
+
+ thisMBB->addSuccessor(mainMBB, /* weight */ 0);
+ thisMBB->addSuccessor(sinkMBB, /* weight */ 1);
+
+ // mainMBB:
+ // mainDstReg = 0
+ MIB = BuildMI(mainMBB, DL,
+ TII->get(PPCSubTarget.isPPC64() ? PPC::MFLR8 : PPC::MFLR), LabelReg);
+
+ // Store IP
+ if (PPCSubTarget.isPPC64()) {
+ MIB = BuildMI(mainMBB, DL, TII->get(PPC::STD))
+ .addReg(LabelReg)
+ .addImm(LabelOffset / 4)
+ .addReg(BufReg);
+ } else {
+ MIB = BuildMI(mainMBB, DL, TII->get(PPC::STW))
+ .addReg(LabelReg)
+ .addImm(LabelOffset)
+ .addReg(BufReg);
+ }
+
+ MIB.setMemRefs(MMOBegin, MMOEnd);
+
+ BuildMI(mainMBB, DL, TII->get(PPC::LI), mainDstReg).addImm(0);
+ mainMBB->addSuccessor(sinkMBB);
+
+ // sinkMBB:
+ BuildMI(*sinkMBB, sinkMBB->begin(), DL,
+ TII->get(PPC::PHI), DstReg)
+ .addReg(mainDstReg).addMBB(mainMBB)
+ .addReg(restoreDstReg).addMBB(thisMBB);
+
+ MI->eraseFromParent();
+ return sinkMBB;
+}
+
+MachineBasicBlock *
+PPCTargetLowering::emitEHSjLjLongJmp(MachineInstr *MI,
+ MachineBasicBlock *MBB) const {
+ DebugLoc DL = MI->getDebugLoc();
+ const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+
+ MachineFunction *MF = MBB->getParent();
+ MachineRegisterInfo &MRI = MF->getRegInfo();
+
+ // Memory Reference
+ MachineInstr::mmo_iterator MMOBegin = MI->memoperands_begin();
+ MachineInstr::mmo_iterator MMOEnd = MI->memoperands_end();
+
+ MVT PVT = getPointerTy();
+ assert((PVT == MVT::i64 || PVT == MVT::i32) &&
+ "Invalid Pointer Size!");
+
+ const TargetRegisterClass *RC =
+ (PVT == MVT::i64) ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
+ unsigned Tmp = MRI.createVirtualRegister(RC);
+ // Since FP is only updated here but NOT referenced, it's treated as GPR.
+ unsigned FP = (PVT == MVT::i64) ? PPC::X31 : PPC::R31;
+ unsigned SP = (PVT == MVT::i64) ? PPC::X1 : PPC::R1;
+
+ MachineInstrBuilder MIB;
+
+ const int64_t LabelOffset = 1 * PVT.getStoreSize();
+ const int64_t SPOffset = 2 * PVT.getStoreSize();
+ const int64_t TOCOffset = 3 * PVT.getStoreSize();
+
+ unsigned BufReg = MI->getOperand(0).getReg();
+
+ // Reload FP (the jumped-to function may not have had a
+ // frame pointer, and if so, then its r31 will be restored
+ // as necessary).
+ if (PVT == MVT::i64) {
+ MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), FP)
+ .addImm(0)
+ .addReg(BufReg);
+ } else {
+ MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), FP)
+ .addImm(0)
+ .addReg(BufReg);
+ }
+ MIB.setMemRefs(MMOBegin, MMOEnd);
+
+ // Reload IP
+ if (PVT == MVT::i64) {
+ MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), Tmp)
+ .addImm(LabelOffset / 4)
+ .addReg(BufReg);
+ } else {
+ MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), Tmp)
+ .addImm(LabelOffset)
+ .addReg(BufReg);
+ }
+ MIB.setMemRefs(MMOBegin, MMOEnd);
+
+ // Reload SP
+ if (PVT == MVT::i64) {
+ MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), SP)
+ .addImm(SPOffset / 4)
+ .addReg(BufReg);
+ } else {
+ MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), SP)
+ .addImm(SPOffset)
+ .addReg(BufReg);
+ }
+ MIB.setMemRefs(MMOBegin, MMOEnd);
+
+ // FIXME: When we also support base pointers, that register must also be
+ // restored here.
+
+ // Reload TOC
+ if (PVT == MVT::i64 && PPCSubTarget.isSVR4ABI()) {
+ MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), PPC::X2)
+ .addImm(TOCOffset / 4)
+ .addReg(BufReg);
+
+ MIB.setMemRefs(MMOBegin, MMOEnd);
+ }
+
+ // Jump
+ BuildMI(*MBB, MI, DL,
+ TII->get(PVT == MVT::i64 ? PPC::MTCTR8 : PPC::MTCTR)).addReg(Tmp);
+ BuildMI(*MBB, MI, DL, TII->get(PVT == MVT::i64 ? PPC::BCTR8 : PPC::BCTR));
+
+ MI->eraseFromParent();
+ return MBB;
+}
+
MachineBasicBlock *
PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
MachineBasicBlock *BB) const {
+ if (MI->getOpcode() == PPC::EH_SjLj_SetJmp32 ||
+ MI->getOpcode() == PPC::EH_SjLj_SetJmp64) {
+ return emitEHSjLjSetJmp(MI, BB);
+ } else if (MI->getOpcode() == PPC::EH_SjLj_LongJmp32 ||
+ MI->getOpcode() == PPC::EH_SjLj_LongJmp64) {
+ return emitEHSjLjLongJmp(MI, BB);
+ }
+
const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
// To "insert" these instructions we actually have to insert their
@@ -5802,24 +6244,24 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
unsigned SelectPred = MI->getOperand(4).getImm();
DebugLoc dl = MI->getDebugLoc();
- // The SelectPred is ((BI << 5) | BO) for a BCC
- unsigned BO = SelectPred & 0xF;
- assert((BO == 12 || BO == 4) && "invalid predicate BO field for isel");
-
- unsigned TrueOpNo, FalseOpNo;
- if (BO == 12) {
- TrueOpNo = 2;
- FalseOpNo = 3;
- } else {
- TrueOpNo = 3;
- FalseOpNo = 2;
- SelectPred = PPC::InvertPredicate((PPC::Predicate)SelectPred);
+ unsigned SubIdx;
+ bool SwapOps;
+ switch (SelectPred) {
+ default: llvm_unreachable("invalid predicate for isel");
+ case PPC::PRED_EQ: SubIdx = PPC::sub_eq; SwapOps = false; break;
+ case PPC::PRED_NE: SubIdx = PPC::sub_eq; SwapOps = true; break;
+ case PPC::PRED_LT: SubIdx = PPC::sub_lt; SwapOps = false; break;
+ case PPC::PRED_GE: SubIdx = PPC::sub_lt; SwapOps = true; break;
+ case PPC::PRED_GT: SubIdx = PPC::sub_gt; SwapOps = false; break;
+ case PPC::PRED_LE: SubIdx = PPC::sub_gt; SwapOps = true; break;
+ case PPC::PRED_UN: SubIdx = PPC::sub_un; SwapOps = false; break;
+ case PPC::PRED_NU: SubIdx = PPC::sub_un; SwapOps = true; break;
}
BuildMI(*BB, MI, dl, TII->get(OpCode), MI->getOperand(0).getReg())
- .addReg(MI->getOperand(TrueOpNo).getReg())
- .addReg(MI->getOperand(FalseOpNo).getReg())
- .addImm(SelectPred).addReg(MI->getOperand(1).getReg());
+ .addReg(MI->getOperand(SwapOps? 3 : 2).getReg())
+ .addReg(MI->getOperand(SwapOps? 2 : 3).getReg())
+ .addReg(MI->getOperand(1).getReg(), 0, SubIdx);
} else if (MI->getOpcode() == PPC::SELECT_CC_I4 ||
MI->getOpcode() == PPC::SELECT_CC_I8 ||
MI->getOpcode() == PPC::SELECT_CC_F4 ||
@@ -6052,7 +6494,7 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
unsigned TmpDestReg = RegInfo.createVirtualRegister(RC);
unsigned Ptr1Reg;
unsigned TmpReg = RegInfo.createVirtualRegister(RC);
- unsigned ZeroReg = is64bit ? PPC::X0 : PPC::R0;
+ unsigned ZeroReg = is64bit ? PPC::ZERO8 : PPC::ZERO;
// thisMBB:
// ...
// fallthrough --> loopMBB
@@ -6155,6 +6597,75 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
BB = exitMBB;
BuildMI(*BB, BB->begin(), dl, TII->get(PPC::SRW),dest).addReg(TmpReg)
.addReg(ShiftReg);
+ } else if (MI->getOpcode() == PPC::FADDrtz) {
+ // This pseudo performs an FADD with rounding mode temporarily forced
+ // to round-to-zero. We emit this via custom inserter since the FPSCR
+ // is not modeled at the SelectionDAG level.
+ unsigned Dest = MI->getOperand(0).getReg();
+ unsigned Src1 = MI->getOperand(1).getReg();
+ unsigned Src2 = MI->getOperand(2).getReg();
+ DebugLoc dl = MI->getDebugLoc();
+
+ MachineRegisterInfo &RegInfo = F->getRegInfo();
+ unsigned MFFSReg = RegInfo.createVirtualRegister(&PPC::F8RCRegClass);
+
+ // Save FPSCR value.
+ BuildMI(*BB, MI, dl, TII->get(PPC::MFFS), MFFSReg);
+
+ // Set rounding mode to round-to-zero.
+ BuildMI(*BB, MI, dl, TII->get(PPC::MTFSB1)).addImm(31);
+ BuildMI(*BB, MI, dl, TII->get(PPC::MTFSB0)).addImm(30);
+
+ // Perform addition.
+ BuildMI(*BB, MI, dl, TII->get(PPC::FADD), Dest).addReg(Src1).addReg(Src2);
+
+ // Restore FPSCR value.
+ BuildMI(*BB, MI, dl, TII->get(PPC::MTFSF)).addImm(1).addReg(MFFSReg);
+ } else if (MI->getOpcode() == PPC::FRINDrint ||
+ MI->getOpcode() == PPC::FRINSrint) {
+ bool isf32 = MI->getOpcode() == PPC::FRINSrint;
+ unsigned Dest = MI->getOperand(0).getReg();
+ unsigned Src = MI->getOperand(1).getReg();
+ DebugLoc dl = MI->getDebugLoc();
+
+ MachineRegisterInfo &RegInfo = F->getRegInfo();
+ unsigned CRReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass);
+
+ // Perform the rounding.
+ BuildMI(*BB, MI, dl, TII->get(isf32 ? PPC::FRINS : PPC::FRIND), Dest)
+ .addReg(Src);
+
+ // Compare the results.
+ BuildMI(*BB, MI, dl, TII->get(isf32 ? PPC::FCMPUS : PPC::FCMPUD), CRReg)
+ .addReg(Dest).addReg(Src);
+
+ // If the results were not equal, then set the FPSCR XX bit.
+ MachineBasicBlock *midMBB = F->CreateMachineBasicBlock(LLVM_BB);
+ MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
+ F->insert(It, midMBB);
+ F->insert(It, exitMBB);
+ exitMBB->splice(exitMBB->begin(), BB,
+ llvm::next(MachineBasicBlock::iterator(MI)),
+ BB->end());
+ exitMBB->transferSuccessorsAndUpdatePHIs(BB);
+
+ BuildMI(*BB, MI, dl, TII->get(PPC::BCC))
+ .addImm(PPC::PRED_EQ).addReg(CRReg).addMBB(exitMBB);
+
+ BB->addSuccessor(midMBB);
+ BB->addSuccessor(exitMBB);
+
+ BB = midMBB;
+
+ // Set the FPSCR XX bit (FE_INEXACT). Note that we cannot just set
+ // the FI bit here because that will not automatically set XX also,
+ // and XX is what libm interprets as the FE_INEXACT flag.
+ BuildMI(BB, dl, TII->get(PPC::MTFSB1)).addImm(/* 38 - 32 = */ 6);
+ BuildMI(BB, dl, TII->get(PPC::B)).addMBB(exitMBB);
+
+ BB->addSuccessor(exitMBB);
+
+ BB = exitMBB;
} else {
llvm_unreachable("Unexpected instr type to insert");
}
@@ -6167,6 +6678,139 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
// Target Optimization Hooks
//===----------------------------------------------------------------------===//
+SDValue PPCTargetLowering::DAGCombineFastRecip(SDValue Op,
+ DAGCombinerInfo &DCI) const {
+ if (DCI.isAfterLegalizeVectorOps())
+ return SDValue();
+
+ EVT VT = Op.getValueType();
+
+ if ((VT == MVT::f32 && PPCSubTarget.hasFRES()) ||
+ (VT == MVT::f64 && PPCSubTarget.hasFRE()) ||
+ (VT == MVT::v4f32 && PPCSubTarget.hasAltivec())) {
+
+ // Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
+ // For the reciprocal, we need to find the zero of the function:
+ // F(X) = A X - 1 [which has a zero at X = 1/A]
+ // =>
+ // X_{i+1} = X_i (2 - A X_i) = X_i + X_i (1 - A X_i) [this second form
+ // does not require additional intermediate precision]
+
+ // Convergence is quadratic, so we essentially double the number of digits
+ // correct after every iteration. The minimum architected relative
+ // accuracy is 2^-5. When hasRecipPrec(), this is 2^-14. IEEE float has
+ // 23 digits and double has 52 digits.
+ int Iterations = PPCSubTarget.hasRecipPrec() ? 1 : 3;
+ if (VT.getScalarType() == MVT::f64)
+ ++Iterations;
+
+ SelectionDAG &DAG = DCI.DAG;
+ DebugLoc dl = Op.getDebugLoc();
+
+ SDValue FPOne =
+ DAG.getConstantFP(1.0, VT.getScalarType());
+ if (VT.isVector()) {
+ assert(VT.getVectorNumElements() == 4 &&
+ "Unknown vector type");
+ FPOne = DAG.getNode(ISD::BUILD_VECTOR, dl, VT,
+ FPOne, FPOne, FPOne, FPOne);
+ }
+
+ SDValue Est = DAG.getNode(PPCISD::FRE, dl, VT, Op);
+ DCI.AddToWorklist(Est.getNode());
+
+ // Newton iterations: Est = Est + Est (1 - Arg * Est)
+ for (int i = 0; i < Iterations; ++i) {
+ SDValue NewEst = DAG.getNode(ISD::FMUL, dl, VT, Op, Est);
+ DCI.AddToWorklist(NewEst.getNode());
+
+ NewEst = DAG.getNode(ISD::FSUB, dl, VT, FPOne, NewEst);
+ DCI.AddToWorklist(NewEst.getNode());
+
+ NewEst = DAG.getNode(ISD::FMUL, dl, VT, Est, NewEst);
+ DCI.AddToWorklist(NewEst.getNode());
+
+ Est = DAG.getNode(ISD::FADD, dl, VT, Est, NewEst);
+ DCI.AddToWorklist(Est.getNode());
+ }
+
+ return Est;
+ }
+
+ return SDValue();
+}
+
+SDValue PPCTargetLowering::DAGCombineFastRecipFSQRT(SDValue Op,
+ DAGCombinerInfo &DCI) const {
+ if (DCI.isAfterLegalizeVectorOps())
+ return SDValue();
+
+ EVT VT = Op.getValueType();
+
+ if ((VT == MVT::f32 && PPCSubTarget.hasFRSQRTES()) ||
+ (VT == MVT::f64 && PPCSubTarget.hasFRSQRTE()) ||
+ (VT == MVT::v4f32 && PPCSubTarget.hasAltivec())) {
+
+ // Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
+ // For the reciprocal sqrt, we need to find the zero of the function:
+ // F(X) = 1/X^2 - A [which has a zero at X = 1/sqrt(A)]
+ // =>
+ // X_{i+1} = X_i (1.5 - A X_i^2 / 2)
+ // As a result, we precompute A/2 prior to the iteration loop.
+
+ // Convergence is quadratic, so we essentially double the number of digits
+ // correct after every iteration. The minimum architected relative
+ // accuracy is 2^-5. When hasRecipPrec(), this is 2^-14. IEEE float has
+ // 23 digits and double has 52 digits.
+ int Iterations = PPCSubTarget.hasRecipPrec() ? 1 : 3;
+ if (VT.getScalarType() == MVT::f64)
+ ++Iterations;
+
+ SelectionDAG &DAG = DCI.DAG;
+ DebugLoc dl = Op.getDebugLoc();
+
+ SDValue FPThreeHalves =
+ DAG.getConstantFP(1.5, VT.getScalarType());
+ if (VT.isVector()) {
+ assert(VT.getVectorNumElements() == 4 &&
+ "Unknown vector type");
+ FPThreeHalves = DAG.getNode(ISD::BUILD_VECTOR, dl, VT,
+ FPThreeHalves, FPThreeHalves,
+ FPThreeHalves, FPThreeHalves);
+ }
+
+ SDValue Est = DAG.getNode(PPCISD::FRSQRTE, dl, VT, Op);
+ DCI.AddToWorklist(Est.getNode());
+
+ // We now need 0.5*Arg which we can write as (1.5*Arg - Arg) so that
+ // this entire sequence requires only one FP constant.
+ SDValue HalfArg = DAG.getNode(ISD::FMUL, dl, VT, FPThreeHalves, Op);
+ DCI.AddToWorklist(HalfArg.getNode());
+
+ HalfArg = DAG.getNode(ISD::FSUB, dl, VT, HalfArg, Op);
+ DCI.AddToWorklist(HalfArg.getNode());
+
+ // Newton iterations: Est = Est * (1.5 - HalfArg * Est * Est)
+ for (int i = 0; i < Iterations; ++i) {
+ SDValue NewEst = DAG.getNode(ISD::FMUL, dl, VT, Est, Est);
+ DCI.AddToWorklist(NewEst.getNode());
+
+ NewEst = DAG.getNode(ISD::FMUL, dl, VT, HalfArg, NewEst);
+ DCI.AddToWorklist(NewEst.getNode());
+
+ NewEst = DAG.getNode(ISD::FSUB, dl, VT, FPThreeHalves, NewEst);
+ DCI.AddToWorklist(NewEst.getNode());
+
+ Est = DAG.getNode(ISD::FMUL, dl, VT, Est, NewEst);
+ DCI.AddToWorklist(Est.getNode());
+ }
+
+ return Est;
+ }
+
+ return SDValue();
+}
+
SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
DAGCombinerInfo &DCI) const {
const TargetMachine &TM = getTargetMachine();
@@ -6193,7 +6837,72 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
return N->getOperand(0);
}
break;
+ case ISD::FDIV: {
+ assert(TM.Options.UnsafeFPMath &&
+ "Reciprocal estimates require UnsafeFPMath");
+
+ if (N->getOperand(1).getOpcode() == ISD::FSQRT) {
+ SDValue RV =
+ DAGCombineFastRecipFSQRT(N->getOperand(1).getOperand(0), DCI);
+ if (RV.getNode() != 0) {
+ DCI.AddToWorklist(RV.getNode());
+ return DAG.getNode(ISD::FMUL, dl, N->getValueType(0),
+ N->getOperand(0), RV);
+ }
+ } else if (N->getOperand(1).getOpcode() == ISD::FP_EXTEND &&
+ N->getOperand(1).getOperand(0).getOpcode() == ISD::FSQRT) {
+ SDValue RV =
+ DAGCombineFastRecipFSQRT(N->getOperand(1).getOperand(0).getOperand(0),
+ DCI);
+ if (RV.getNode() != 0) {
+ DCI.AddToWorklist(RV.getNode());
+ RV = DAG.getNode(ISD::FP_EXTEND, N->getOperand(1).getDebugLoc(),
+ N->getValueType(0), RV);
+ DCI.AddToWorklist(RV.getNode());
+ return DAG.getNode(ISD::FMUL, dl, N->getValueType(0),
+ N->getOperand(0), RV);
+ }
+ } else if (N->getOperand(1).getOpcode() == ISD::FP_ROUND &&
+ N->getOperand(1).getOperand(0).getOpcode() == ISD::FSQRT) {
+ SDValue RV =
+ DAGCombineFastRecipFSQRT(N->getOperand(1).getOperand(0).getOperand(0),
+ DCI);
+ if (RV.getNode() != 0) {
+ DCI.AddToWorklist(RV.getNode());
+ RV = DAG.getNode(ISD::FP_ROUND, N->getOperand(1).getDebugLoc(),
+ N->getValueType(0), RV,
+ N->getOperand(1).getOperand(1));
+ DCI.AddToWorklist(RV.getNode());
+ return DAG.getNode(ISD::FMUL, dl, N->getValueType(0),
+ N->getOperand(0), RV);
+ }
+ }
+
+ SDValue RV = DAGCombineFastRecip(N->getOperand(1), DCI);
+ if (RV.getNode() != 0) {
+ DCI.AddToWorklist(RV.getNode());
+ return DAG.getNode(ISD::FMUL, dl, N->getValueType(0),
+ N->getOperand(0), RV);
+ }
+
+ }
+ break;
+ case ISD::FSQRT: {
+ assert(TM.Options.UnsafeFPMath &&
+ "Reciprocal estimates require UnsafeFPMath");
+
+ // Compute this as 1/(1/sqrt(X)), which is the reciprocal of the
+ // reciprocal sqrt.
+ SDValue RV = DAGCombineFastRecipFSQRT(N->getOperand(0), DCI);
+ if (RV.getNode() != 0) {
+ DCI.AddToWorklist(RV.getNode());
+ RV = DAGCombineFastRecip(RV, DCI);
+ if (RV.getNode() != 0)
+ return RV;
+ }
+ }
+ break;
case ISD::SINT_TO_FP:
if (TM.getSubtarget<PPCSubtarget>().has64BitSupport()) {
if (N->getOperand(0).getOpcode() == ISD::FP_TO_SINT) {
@@ -6240,8 +6949,15 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
Val = DAG.getNode(PPCISD::FCTIWZ, dl, MVT::f64, Val);
DCI.AddToWorklist(Val.getNode());
- Val = DAG.getNode(PPCISD::STFIWX, dl, MVT::Other, N->getOperand(0), Val,
- N->getOperand(2), N->getOperand(3));
+ SDValue Ops[] = {
+ N->getOperand(0), Val, N->getOperand(2),
+ DAG.getValueType(N->getOperand(1).getValueType())
+ };
+
+ Val = DAG.getMemIntrinsicNode(PPCISD::STFIWX, dl,
+ DAG.getVTList(MVT::Other), Ops, array_lengthof(Ops),
+ cast<StoreSDNode>(N)->getMemoryVT(),
+ cast<StoreSDNode>(N)->getMemOperand());
DCI.AddToWorklist(Val.getNode());
return Val;
}
@@ -6251,7 +6967,10 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
N->getOperand(1).getOpcode() == ISD::BSWAP &&
N->getOperand(1).getNode()->hasOneUse() &&
(N->getOperand(1).getValueType() == MVT::i32 ||
- N->getOperand(1).getValueType() == MVT::i16)) {
+ N->getOperand(1).getValueType() == MVT::i16 ||
+ (TM.getSubtarget<PPCSubtarget>().hasLDBRX() &&
+ TM.getSubtarget<PPCSubtarget>().isPPC64() &&
+ N->getOperand(1).getValueType() == MVT::i64))) {
SDValue BSwapOp = N->getOperand(1).getOperand(0);
// Do an any-extend to 32-bits if this is a half-word input.
if (BSwapOp.getValueType() == MVT::i16)
@@ -6272,7 +6991,10 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
// Turn BSWAP (LOAD) -> lhbrx/lwbrx.
if (ISD::isNON_EXTLoad(N->getOperand(0).getNode()) &&
N->getOperand(0).hasOneUse() &&
- (N->getValueType(0) == MVT::i32 || N->getValueType(0) == MVT::i16)) {
+ (N->getValueType(0) == MVT::i32 || N->getValueType(0) == MVT::i16 ||
+ (TM.getSubtarget<PPCSubtarget>().hasLDBRX() &&
+ TM.getSubtarget<PPCSubtarget>().isPPC64() &&
+ N->getValueType(0) == MVT::i64))) {
SDValue Load = N->getOperand(0);
LoadSDNode *LD = cast<LoadSDNode>(Load);
// Create the byte-swapping load.
@@ -6283,8 +7005,9 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
};
SDValue BSLoad =
DAG.getMemIntrinsicNode(PPCISD::LBRX, dl,
- DAG.getVTList(MVT::i32, MVT::Other), Ops, 3,
- LD->getMemoryVT(), LD->getMemOperand());
+ DAG.getVTList(N->getValueType(0) == MVT::i64 ?
+ MVT::i64 : MVT::i32, MVT::Other),
+ Ops, 3, LD->getMemoryVT(), LD->getMemOperand());
// If this is an i16 load, insert the truncate.
SDValue ResVal = BSLoad;
@@ -6384,14 +7107,12 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
bool BranchOnWhenPredTrue = (CC == ISD::SETEQ) ^ (Val == 0);
// Create the PPCISD altivec 'dot' comparison node.
- std::vector<EVT> VTs;
SDValue Ops[] = {
LHS.getOperand(2), // LHS of compare
LHS.getOperand(3), // RHS of compare
DAG.getConstant(CompareOpc, MVT::i32)
};
- VTs.push_back(LHS.getOperand(2).getValueType());
- VTs.push_back(MVT::Glue);
+ EVT VTs[] = { LHS.getOperand(2).getValueType(), MVT::Glue };
SDValue CompNode = DAG.getNode(PPCISD::VCMPo, dl, VTs, Ops, 3);
// Unpack the result based on how the target uses it.
@@ -6543,6 +7264,9 @@ PPCTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
// GCC RS6000 Constraint Letters
switch (Constraint[0]) {
case 'b': // R1-R31
+ if (VT == MVT::i64 && PPCSubTarget.isPPC64())
+ return std::make_pair(0U, &PPC::G8RC_NOX0RegClass);
+ return std::make_pair(0U, &PPC::GPRC_NOR0RegClass);
case 'r': // R0-R31
if (VT == MVT::i64 && PPCSubTarget.isPPC64())
return std::make_pair(0U, &PPC::G8RCRegClass);
@@ -6727,13 +7451,16 @@ SDValue PPCTargetLowering::LowerFRAMEADDR(SDValue Op,
MachineFunction &MF = DAG.getMachineFunction();
MachineFrameInfo *MFI = MF.getFrameInfo();
MFI->setFrameAddressIsTaken(true);
- bool is31 = (getTargetMachine().Options.DisableFramePointerElim(MF) ||
- MFI->hasVarSizedObjects()) &&
- MFI->getStackSize() &&
- !MF.getFunction()->getFnAttributes().
- hasAttribute(Attributes::Naked);
- unsigned FrameReg = isPPC64 ? (is31 ? PPC::X31 : PPC::X1) :
- (is31 ? PPC::R31 : PPC::R1);
+
+ // Naked functions never have a frame pointer, and so we use r1. For all
+ // other functions, this decision must be delayed until during PEI.
+ unsigned FrameReg;
+ if (MF.getFunction()->getAttributes().hasAttribute(
+ AttributeSet::FunctionIndex, Attribute::Naked))
+ FrameReg = isPPC64 ? PPC::X1 : PPC::R1;
+ else
+ FrameReg = isPPC64 ? PPC::FP8 : PPC::FP;
+
SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg,
PtrVT);
while (Depth--)
@@ -6754,16 +7481,15 @@ PPCTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
/// lowering. If DstAlign is zero that means it's safe to destination
/// alignment can satisfy any constraint. Similarly if SrcAlign is zero it
/// means there isn't a need to check it against alignment requirement,
-/// probably because the source does not need to be loaded. If
-/// 'IsZeroVal' is true, that means it's safe to return a
-/// non-scalar-integer type, e.g. empty string source, constant, or loaded
-/// from memory. 'MemcpyStrSrc' indicates whether the memcpy source is
-/// constant so it does not need to be loaded.
+/// probably because the source does not need to be loaded. If 'IsMemset' is
+/// true, that means it's expanding a memset. If 'ZeroMemset' is true, that
+/// means it's a memset of zero. 'MemcpyStrSrc' indicates whether the memcpy
+/// source is constant so it does not need to be loaded.
/// It returns EVT::Other if the type should be determined using generic
/// target-independent logic.
EVT PPCTargetLowering::getOptimalMemOpType(uint64_t Size,
unsigned DstAlign, unsigned SrcAlign,
- bool IsZeroVal,
+ bool IsMemset, bool ZeroMemset,
bool MemcpyStrSrc,
MachineFunction &MF) const {
if (this->PPCSubTarget.isPPC64()) {
@@ -6773,6 +7499,32 @@ EVT PPCTargetLowering::getOptimalMemOpType(uint64_t Size,
}
}
+bool PPCTargetLowering::allowsUnalignedMemoryAccesses(EVT VT,
+ bool *Fast) const {
+ if (DisablePPCUnaligned)
+ return false;
+
+ // PowerPC supports unaligned memory access for simple non-vector types.
+ // Although accessing unaligned addresses is not as efficient as accessing
+ // aligned addresses, it is generally more efficient than manual expansion,
+ // and generally only traps for software emulation when crossing page
+ // boundaries.
+
+ if (!VT.isSimple())
+ return false;
+
+ if (VT.getSimpleVT().isVector())
+ return false;
+
+ if (VT == MVT::ppcf128)
+ return false;
+
+ if (Fast)
+ *Fast = true;
+
+ return true;
+}
+
/// isFMAFasterThanMulAndAdd - Return true if an FMA operation is faster than
/// a pair of mul and add instructions. fmuladd intrinsics will be expanded to
/// FMAs when this method returns true (and FMAs are legal), otherwise fmuladd
diff --git a/lib/Target/PowerPC/PPCISelLowering.h b/lib/Target/PowerPC/PPCISelLowering.h
index b3c7f9c28d40..7157b70d8622 100644
--- a/lib/Target/PowerPC/PPCISelLowering.h
+++ b/lib/Target/PowerPC/PPCISelLowering.h
@@ -16,9 +16,10 @@
#define LLVM_TARGET_POWERPC_PPC32ISELLOWERING_H
#include "PPC.h"
+#include "PPCRegisterInfo.h"
#include "PPCSubtarget.h"
-#include "llvm/Target/TargetLowering.h"
#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/Target/TargetLowering.h"
namespace llvm {
namespace PPCISD {
@@ -35,14 +36,21 @@ namespace llvm {
/// was temporarily in the f64 operand.
FCFID,
+ /// Newer FCFID[US] integer-to-floating-point conversion instructions for
+ /// unsigned integers and single-precision outputs.
+ FCFIDU, FCFIDS, FCFIDUS,
+
/// FCTI[D,W]Z - The FCTIDZ and FCTIWZ instructions, taking an f32 or f64
/// operand, producing an f64 value containing the integer representation
/// of that FP value.
FCTIDZ, FCTIWZ,
- /// STFIWX - The STFIWX instruction. The first operand is an input token
- /// chain, then an f64 value to store, then an address to store it to.
- STFIWX,
+ /// Newer FCTI[D,W]UZ floating-point-to-integer conversion instructions for
+ /// unsigned integers.
+ FCTIDUZ, FCTIWUZ,
+
+ /// Reciprocal estimate instructions (unary FP ops).
+ FRE, FRSQRTE,
// VMADDFP, VNMSUBFP - The VMADDFP and VNMSUBFP instructions, taking
// three v4f32 operands and producing a v4f32 result.
@@ -90,17 +98,10 @@ namespace llvm {
/// code.
SRL, SRA, SHL,
- /// EXTSW_32 - This is the EXTSW instruction for use with "32-bit"
- /// registers.
- EXTSW_32,
-
/// CALL - A direct function call.
- /// CALL_NOP_SVR4 is a call with the special NOP which follows 64-bit
+ /// CALL_NOP is a call with the special NOP which follows 64-bit
/// SVR4 calls.
- CALL_Darwin, CALL_SVR4, CALL_NOP_SVR4,
-
- /// NOP - Special NOP which follows 64-bit SVR4 calls.
- NOP,
+ CALL, CALL_NOP,
/// CHAIN,FLAG = MTCTR(VAL, CHAIN[, INFLAG]) - Directly corresponds to a
/// MTCTR instruction.
@@ -108,7 +109,7 @@ namespace llvm {
/// CHAIN,FLAG = BCTRL(CHAIN, INFLAG) - Directly corresponds to a
/// BCTRL instruction.
- BCTRL_Darwin, BCTRL_SVR4,
+ BCTRL,
/// Return with a flag operand, matched by 'blr'
RET_FLAG,
@@ -119,6 +120,12 @@ namespace llvm {
/// are undefined.
MFCR,
+ // EH_SJLJ_SETJMP - SjLj exception handling setjmp.
+ EH_SJLJ_SETJMP,
+
+ // EH_SJLJ_LONGJMP - SjLj exception handling longjmp.
+ EH_SJLJ_LONGJMP,
+
/// RESVEC = VCMP(LHS, RHS, OPC) - Represents one of the altivec VCMP*
/// instructions. For lack of better number, we use the opcode number
/// encoding for the OPC field to identify the compare. For example, 838
@@ -138,26 +145,13 @@ namespace llvm {
/// an optional input flag argument.
COND_BRANCH,
- // The following 5 instructions are used only as part of the
- // long double-to-int conversion sequence.
-
- /// OUTFLAG = MFFS F8RC - This moves the FPSCR (not modelled) into the
- /// register.
- MFFS,
-
- /// OUTFLAG = MTFSB0 INFLAG - This clears a bit in the FPSCR.
- MTFSB0,
-
- /// OUTFLAG = MTFSB1 INFLAG - This sets a bit in the FPSCR.
- MTFSB1,
-
- /// F8RC, OUTFLAG = FADDRTZ F8RC, F8RC, INFLAG - This is an FADD done with
- /// rounding towards zero. It has flags added so it won't move past the
- /// FPSCR-setting instructions.
+ /// F8RC = FADDRTZ F8RC, F8RC - This is an FADD done with rounding
+ /// towards zero. Used only as part of the long double-to-int
+ /// conversion sequence.
FADDRTZ,
- /// MTFSF = F8RC, INFLAG - This moves the register into the FPSCR.
- MTFSF,
+ /// F8RC = MFFS - This moves the FPSCR (not modeled) into the register.
+ MFFS,
/// LARX = This corresponds to PPC l{w|d}arx instrcution: load and
/// reserve indexed. This is used to implement atomic operations.
@@ -178,20 +172,111 @@ namespace llvm {
CR6SET,
CR6UNSET,
- /// STD_32 - This is the STD instruction for use with "32-bit" registers.
- STD_32 = ISD::FIRST_TARGET_MEMORY_OPCODE,
+ /// G8RC = ADDIS_GOT_TPREL_HA %X2, Symbol - Used by the initial-exec
+ /// TLS model, produces an ADDIS8 instruction that adds the GOT
+ /// base to sym@got@tprel@ha.
+ ADDIS_GOT_TPREL_HA,
+
+ /// G8RC = LD_GOT_TPREL_L Symbol, G8RReg - Used by the initial-exec
+ /// TLS model, produces a LD instruction with base register G8RReg
+ /// and offset sym@got@tprel@l. This completes the addition that
+ /// finds the offset of "sym" relative to the thread pointer.
+ LD_GOT_TPREL_L,
+
+ /// G8RC = ADD_TLS G8RReg, Symbol - Used by the initial-exec TLS
+ /// model, produces an ADD instruction that adds the contents of
+ /// G8RReg to the thread pointer. Symbol contains a relocation
+ /// sym@tls which is to be replaced by the thread pointer and
+ /// identifies to the linker that the instruction is part of a
+ /// TLS sequence.
+ ADD_TLS,
+
+ /// G8RC = ADDIS_TLSGD_HA %X2, Symbol - For the general-dynamic TLS
+ /// model, produces an ADDIS8 instruction that adds the GOT base
+ /// register to sym@got@tlsgd@ha.
+ ADDIS_TLSGD_HA,
+
+ /// G8RC = ADDI_TLSGD_L G8RReg, Symbol - For the general-dynamic TLS
+ /// model, produces an ADDI8 instruction that adds G8RReg to
+ /// sym@got@tlsgd@l.
+ ADDI_TLSGD_L,
+
+ /// G8RC = GET_TLS_ADDR %X3, Symbol - For the general-dynamic TLS
+ /// model, produces a call to __tls_get_addr(sym@tlsgd).
+ GET_TLS_ADDR,
+
+ /// G8RC = ADDIS_TLSLD_HA %X2, Symbol - For the local-dynamic TLS
+ /// model, produces an ADDIS8 instruction that adds the GOT base
+ /// register to sym@got@tlsld@ha.
+ ADDIS_TLSLD_HA,
+
+ /// G8RC = ADDI_TLSLD_L G8RReg, Symbol - For the local-dynamic TLS
+ /// model, produces an ADDI8 instruction that adds G8RReg to
+ /// sym@got@tlsld@l.
+ ADDI_TLSLD_L,
+
+ /// G8RC = GET_TLSLD_ADDR %X3, Symbol - For the local-dynamic TLS
+ /// model, produces a call to __tls_get_addr(sym@tlsld).
+ GET_TLSLD_ADDR,
+
+ /// G8RC = ADDIS_DTPREL_HA %X3, Symbol, Chain - For the
+ /// local-dynamic TLS model, produces an ADDIS8 instruction
+ /// that adds X3 to sym@dtprel@ha. The Chain operand is needed
+ /// to tie this in place following a copy to %X3 from the result
+ /// of a GET_TLSLD_ADDR.
+ ADDIS_DTPREL_HA,
+
+ /// G8RC = ADDI_DTPREL_L G8RReg, Symbol - For the local-dynamic TLS
+ /// model, produces an ADDI8 instruction that adds G8RReg to
+ /// sym@got@dtprel@l.
+ ADDI_DTPREL_L,
+
+ /// VRRC = VADD_SPLAT Elt, EltSize - Temporary node to be expanded
+ /// during instruction selection to optimize a BUILD_VECTOR into
+ /// operations on splats. This is necessary to avoid losing these
+ /// optimizations due to constant folding.
+ VADD_SPLAT,
/// CHAIN = STBRX CHAIN, GPRC, Ptr, Type - This is a
/// byte-swapping store instruction. It byte-swaps the low "Type" bits of
/// the GPRC input, then stores it through Ptr. Type can be either i16 or
/// i32.
- STBRX,
+ STBRX = ISD::FIRST_TARGET_MEMORY_OPCODE,
/// GPRC, CHAIN = LBRX CHAIN, Ptr, Type - This is a
/// byte-swapping load instruction. It loads "Type" bits, byte swaps it,
/// then puts it in the bottom bits of the GPRC. TYPE can be either i16
/// or i32.
- LBRX
+ LBRX,
+
+ /// STFIWX - The STFIWX instruction. The first operand is an input token
+ /// chain, then an f64 value to store, then an address to store it to.
+ STFIWX,
+
+ /// GPRC, CHAIN = LFIWAX CHAIN, Ptr - This is a floating-point
+ /// load which sign-extends from a 32-bit integer value into the
+ /// destination 64-bit register.
+ LFIWAX,
+
+ /// GPRC, CHAIN = LFIWZX CHAIN, Ptr - This is a floating-point
+ /// load which zero-extends from a 32-bit integer value into the
+ /// destination 64-bit register.
+ LFIWZX,
+
+ /// G8RC = ADDIS_TOC_HA %X2, Symbol - For medium and large code model,
+ /// produces an ADDIS8 instruction that adds the TOC base register to
+ /// sym@toc@ha.
+ ADDIS_TOC_HA,
+
+ /// G8RC = LD_TOC_L Symbol, G8RReg - For medium and large code model,
+ /// produces a LD instruction with base register G8RReg and offset
+ /// sym@toc@l. Preceded by an ADDIS_TOC_HA to form a full 32-bit offset.
+ LD_TOC_L,
+
+ /// G8RC = ADDI_TOC_L G8RReg, Symbol - For medium code model, produces
+ /// an ADDI8 instruction that adds G8RReg to sym@toc@l.
+ /// Preceded by an ADDIS_TOC_HA to form a full 32-bit offset.
+ ADDI_TOC_L
};
}
@@ -241,6 +326,7 @@ namespace llvm {
class PPCTargetLowering : public TargetLowering {
const PPCSubtarget &PPCSubTarget;
+ const PPCRegisterInfo *PPCRegInfo;
public:
explicit PPCTargetLowering(PPCTargetMachine &TM);
@@ -249,7 +335,7 @@ namespace llvm {
/// DAG node.
virtual const char *getTargetNodeName(unsigned Opcode) const;
- virtual MVT getShiftAmountTy(EVT LHSTy) const { return MVT::i32; }
+ virtual MVT getScalarShiftAmountTy(EVT LHSTy) const { return MVT::i32; }
/// getSetCCResultType - Return the ISD::SETCC ValueType
virtual EVT getSetCCResultType(EVT VT) const;
@@ -315,6 +401,12 @@ namespace llvm {
MachineBasicBlock *MBB,
bool is8bit, unsigned Opcode) const;
+ MachineBasicBlock *emitEHSjLjSetJmp(MachineInstr *MI,
+ MachineBasicBlock *MBB) const;
+
+ MachineBasicBlock *emitEHSjLjLongJmp(MachineInstr *MI,
+ MachineBasicBlock *MBB) const;
+
ConstraintType getConstraintType(const std::string &Constraint) const;
/// Examine constraint string and operand type and determine a weight value.
@@ -358,18 +450,21 @@ namespace llvm {
/// lowering. If DstAlign is zero that means it's safe to destination
/// alignment can satisfy any constraint. Similarly if SrcAlign is zero it
/// means there isn't a need to check it against alignment requirement,
- /// probably because the source does not need to be loaded. If
- /// 'IsZeroVal' is true, that means it's safe to return a
- /// non-scalar-integer type, e.g. empty string source, constant, or loaded
- /// from memory. 'MemcpyStrSrc' indicates whether the memcpy source is
- /// constant so it does not need to be loaded.
+ /// probably because the source does not need to be loaded. If 'IsMemset' is
+ /// true, that means it's expanding a memset. If 'ZeroMemset' is true, that
+ /// means it's a memset of zero. 'MemcpyStrSrc' indicates whether the memcpy
+ /// source is constant so it does not need to be loaded.
/// It returns EVT::Other if the type should be determined using generic
/// target-independent logic.
virtual EVT
- getOptimalMemOpType(uint64_t Size, unsigned DstAlign, unsigned SrcAlign,
- bool IsZeroVal, bool MemcpyStrSrc,
+ getOptimalMemOpType(uint64_t Size, unsigned DstAlign, unsigned SrcAlign,
+ bool IsMemset, bool ZeroMemset, bool MemcpyStrSrc,
MachineFunction &MF) const;
+ /// Is unaligned memory access allowed for the given type, and is it fast
+ /// relative to software emulation.
+ virtual bool allowsUnalignedMemoryAccesses(EVT VT, bool *Fast = 0) const;
+
/// isFMAFasterThanMulAndAdd - Return true if an FMA operation is faster than
/// a pair of mul and add instructions. fmuladd intrinsics will be expanded to
/// FMAs when this method returns true (and FMAs are legal), otherwise fmuladd
@@ -415,7 +510,7 @@ namespace llvm {
const PPCSubtarget &Subtarget) const;
SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG, DebugLoc dl) const;
- SDValue LowerSINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFLT_ROUNDS_(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerSHL_PARTS(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerSRL_PARTS(SDValue Op, SelectionDAG &DAG) const;
@@ -525,6 +620,12 @@ namespace llvm {
const SmallVectorImpl<ISD::InputArg> &Ins,
DebugLoc dl, SelectionDAG &DAG,
SmallVectorImpl<SDValue> &InVals) const;
+
+ SDValue lowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG) const;
+ SDValue lowerEH_SJLJ_LONGJMP(SDValue Op, SelectionDAG &DAG) const;
+
+ SDValue DAGCombineFastRecip(SDValue Op, DAGCombinerInfo &DCI) const;
+ SDValue DAGCombineFastRecipFSQRT(SDValue Op, DAGCombinerInfo &DCI) const;
};
}
diff --git a/lib/Target/PowerPC/PPCInstr64Bit.td b/lib/Target/PowerPC/PPCInstr64Bit.td
index 9711452ec46a..fa5b65f0ba2d 100644
--- a/lib/Target/PowerPC/PPCInstr64Bit.td
+++ b/lib/Target/PowerPC/PPCInstr64Bit.td
@@ -30,8 +30,12 @@ def symbolLo64 : Operand<i64> {
let EncoderMethod = "getLO16Encoding";
}
def tocentry : Operand<iPTR> {
- let MIOperandInfo = (ops i32imm:$imm);
+ let MIOperandInfo = (ops i64imm:$imm);
}
+def tlsreg : Operand<i64> {
+ let EncoderMethod = "getTLSRegEncoding";
+}
+def tlsgd : Operand<i64> {}
//===----------------------------------------------------------------------===//
// 64-bit transformation functions.
@@ -62,123 +66,112 @@ def HI48_64 : SDNodeXForm<imm, [{
// Calls.
//
+let isTerminator = 1, isBarrier = 1, PPC970_Unit = 7 in {
+ let isBranch = 1, isIndirectBranch = 1, Uses = [CTR8] in
+ def BCTR8 : XLForm_2_ext<19, 528, 20, 0, 0, (outs), (ins), "bctr", BrB, []>,
+ Requires<[In64BitMode]>;
+}
+
let Defs = [LR8] in
def MovePCtoLR8 : Pseudo<(outs), (ins), "#MovePCtoLR8", []>,
PPC970_Unit_BRU;
-// Darwin ABI Calls.
-let isCall = 1, PPC970_Unit = 7, Defs = [LR8] in {
- // Convenient aliases for call instructions
- let Uses = [RM] in {
- def BL8_Darwin : IForm<18, 0, 1,
- (outs), (ins calltarget:$func),
- "bl $func", BrB, []>; // See Pat patterns below.
- def BLA8_Darwin : IForm<18, 1, 1,
- (outs), (ins aaddr:$func),
- "bla $func", BrB, [(PPCcall_Darwin (i64 imm:$func))]>;
- }
- let Uses = [CTR8, RM] in {
- def BCTRL8_Darwin : XLForm_2_ext<19, 528, 20, 0, 1,
- (outs), (ins),
- "bctrl", BrB,
- [(PPCbctrl_Darwin)]>, Requires<[In64BitMode]>;
+let isBranch = 1, isTerminator = 1, hasCtrlDep = 1, PPC970_Unit = 7 in {
+ let Defs = [CTR8], Uses = [CTR8] in {
+ def BDZ8 : BForm_1<16, 18, 0, 0, (outs), (ins condbrtarget:$dst),
+ "bdz $dst">;
+ def BDNZ8 : BForm_1<16, 16, 0, 0, (outs), (ins condbrtarget:$dst),
+ "bdnz $dst">;
}
}
-// ELF 64 ABI Calls = Darwin ABI Calls
-// Used to define BL8_ELF and BLA8_ELF
let isCall = 1, PPC970_Unit = 7, Defs = [LR8] in {
// Convenient aliases for call instructions
let Uses = [RM] in {
- def BL8_ELF : IForm<18, 0, 1,
- (outs), (ins calltarget:$func),
- "bl $func", BrB, []>; // See Pat patterns below.
+ def BL8 : IForm<18, 0, 1, (outs), (ins calltarget:$func),
+ "bl $func", BrB, []>; // See Pat patterns below.
- let isCodeGenOnly = 1 in
- def BL8_NOP_ELF : IForm_and_DForm_4_zero<18, 0, 1, 24,
+ def BLA8 : IForm<18, 1, 1, (outs), (ins aaddr:$func),
+ "bla $func", BrB, [(PPCcall (i64 imm:$func))]>;
+ }
+ let Uses = [RM], isCodeGenOnly = 1 in {
+ def BL8_NOP : IForm_and_DForm_4_zero<18, 0, 1, 24,
(outs), (ins calltarget:$func),
"bl $func\n\tnop", BrB, []>;
- def BLA8_ELF : IForm<18, 1, 1,
- (outs), (ins aaddr:$func),
- "bla $func", BrB, [(PPCcall_SVR4 (i64 imm:$func))]>;
+ def BL8_NOP_TLSGD : IForm_and_DForm_4_zero<18, 0, 1, 24,
+ (outs), (ins calltarget:$func, tlsgd:$sym),
+ "bl $func($sym)\n\tnop", BrB, []>;
- let isCodeGenOnly = 1 in
- def BLA8_NOP_ELF : IForm_and_DForm_4_zero<18, 1, 1, 24,
+ def BL8_NOP_TLSLD : IForm_and_DForm_4_zero<18, 0, 1, 24,
+ (outs), (ins calltarget:$func, tlsgd:$sym),
+ "bl $func($sym)\n\tnop", BrB, []>;
+
+ def BLA8_NOP : IForm_and_DForm_4_zero<18, 1, 1, 24,
(outs), (ins aaddr:$func),
"bla $func\n\tnop", BrB,
- [(PPCcall_nop_SVR4 (i64 imm:$func))]>;
+ [(PPCcall_nop (i64 imm:$func))]>;
}
- let Uses = [X11, CTR8, RM] in {
- def BCTRL8_ELF : XLForm_2_ext<19, 528, 20, 0, 1,
- (outs), (ins),
- "bctrl", BrB,
- [(PPCbctrl_SVR4)]>, Requires<[In64BitMode]>;
+ let Uses = [CTR8, RM] in {
+ def BCTRL8 : XLForm_2_ext<19, 528, 20, 0, 1, (outs), (ins),
+ "bctrl", BrB, [(PPCbctrl)]>,
+ Requires<[In64BitMode]>;
}
}
// Calls
-def : Pat<(PPCcall_Darwin (i64 tglobaladdr:$dst)),
- (BL8_Darwin tglobaladdr:$dst)>;
-def : Pat<(PPCcall_Darwin (i64 texternalsym:$dst)),
- (BL8_Darwin texternalsym:$dst)>;
+def : Pat<(PPCcall (i64 tglobaladdr:$dst)),
+ (BL8 tglobaladdr:$dst)>;
+def : Pat<(PPCcall_nop (i64 tglobaladdr:$dst)),
+ (BL8_NOP tglobaladdr:$dst)>;
-def : Pat<(PPCcall_SVR4 (i64 tglobaladdr:$dst)),
- (BL8_ELF tglobaladdr:$dst)>;
-def : Pat<(PPCcall_nop_SVR4 (i64 tglobaladdr:$dst)),
- (BL8_NOP_ELF tglobaladdr:$dst)>;
-
-def : Pat<(PPCcall_SVR4 (i64 texternalsym:$dst)),
- (BL8_ELF texternalsym:$dst)>;
-def : Pat<(PPCcall_nop_SVR4 (i64 texternalsym:$dst)),
- (BL8_NOP_ELF texternalsym:$dst)>;
-
-def : Pat<(PPCnop),
- (NOP)>;
+def : Pat<(PPCcall (i64 texternalsym:$dst)),
+ (BL8 texternalsym:$dst)>;
+def : Pat<(PPCcall_nop (i64 texternalsym:$dst)),
+ (BL8_NOP texternalsym:$dst)>;
// Atomic operations
let usesCustomInserter = 1 in {
let Defs = [CR0] in {
def ATOMIC_LOAD_ADD_I64 : Pseudo<
(outs G8RC:$dst), (ins memrr:$ptr, G8RC:$incr), "#ATOMIC_LOAD_ADD_I64",
- [(set G8RC:$dst, (atomic_load_add_64 xoaddr:$ptr, G8RC:$incr))]>;
+ [(set i64:$dst, (atomic_load_add_64 xoaddr:$ptr, i64:$incr))]>;
def ATOMIC_LOAD_SUB_I64 : Pseudo<
(outs G8RC:$dst), (ins memrr:$ptr, G8RC:$incr), "#ATOMIC_LOAD_SUB_I64",
- [(set G8RC:$dst, (atomic_load_sub_64 xoaddr:$ptr, G8RC:$incr))]>;
+ [(set i64:$dst, (atomic_load_sub_64 xoaddr:$ptr, i64:$incr))]>;
def ATOMIC_LOAD_OR_I64 : Pseudo<
(outs G8RC:$dst), (ins memrr:$ptr, G8RC:$incr), "#ATOMIC_LOAD_OR_I64",
- [(set G8RC:$dst, (atomic_load_or_64 xoaddr:$ptr, G8RC:$incr))]>;
+ [(set i64:$dst, (atomic_load_or_64 xoaddr:$ptr, i64:$incr))]>;
def ATOMIC_LOAD_XOR_I64 : Pseudo<
(outs G8RC:$dst), (ins memrr:$ptr, G8RC:$incr), "#ATOMIC_LOAD_XOR_I64",
- [(set G8RC:$dst, (atomic_load_xor_64 xoaddr:$ptr, G8RC:$incr))]>;
+ [(set i64:$dst, (atomic_load_xor_64 xoaddr:$ptr, i64:$incr))]>;
def ATOMIC_LOAD_AND_I64 : Pseudo<
(outs G8RC:$dst), (ins memrr:$ptr, G8RC:$incr), "#ATOMIC_LOAD_AND_i64",
- [(set G8RC:$dst, (atomic_load_and_64 xoaddr:$ptr, G8RC:$incr))]>;
+ [(set i64:$dst, (atomic_load_and_64 xoaddr:$ptr, i64:$incr))]>;
def ATOMIC_LOAD_NAND_I64 : Pseudo<
(outs G8RC:$dst), (ins memrr:$ptr, G8RC:$incr), "#ATOMIC_LOAD_NAND_I64",
- [(set G8RC:$dst, (atomic_load_nand_64 xoaddr:$ptr, G8RC:$incr))]>;
+ [(set i64:$dst, (atomic_load_nand_64 xoaddr:$ptr, i64:$incr))]>;
def ATOMIC_CMP_SWAP_I64 : Pseudo<
(outs G8RC:$dst), (ins memrr:$ptr, G8RC:$old, G8RC:$new), "#ATOMIC_CMP_SWAP_I64",
- [(set G8RC:$dst,
- (atomic_cmp_swap_64 xoaddr:$ptr, G8RC:$old, G8RC:$new))]>;
+ [(set i64:$dst, (atomic_cmp_swap_64 xoaddr:$ptr, i64:$old, i64:$new))]>;
def ATOMIC_SWAP_I64 : Pseudo<
(outs G8RC:$dst), (ins memrr:$ptr, G8RC:$new), "#ATOMIC_SWAP_I64",
- [(set G8RC:$dst, (atomic_swap_64 xoaddr:$ptr, G8RC:$new))]>;
+ [(set i64:$dst, (atomic_swap_64 xoaddr:$ptr, i64:$new))]>;
}
}
// Instructions to support atomic operations
def LDARX : XForm_1<31, 84, (outs G8RC:$rD), (ins memrr:$ptr),
"ldarx $rD, $ptr", LdStLDARX,
- [(set G8RC:$rD, (PPClarx xoaddr:$ptr))]>;
+ [(set i64:$rD, (PPClarx xoaddr:$ptr))]>;
let Defs = [CR0] in
def STDCX : XForm_1<31, 214, (outs), (ins G8RC:$rS, memrr:$dst),
"stdcx. $rS, $dst", LdStSTDCX,
- [(PPCstcx G8RC:$rS, xoaddr:$dst)]>,
+ [(PPCstcx i64:$rS, xoaddr:$dst)]>,
isDOT;
let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, Uses = [RM] in
@@ -197,17 +190,12 @@ def TCRETURNri8 : Pseudo<(outs), (ins CTRRC8:$dst, i32imm:$offset),
"#TC_RETURNr8 $dst $offset",
[]>;
+let isCodeGenOnly = 1 in {
let isTerminator = 1, isBarrier = 1, PPC970_Unit = 7, isBranch = 1,
- isIndirectBranch = 1, isCall = 1, Uses = [CTR8, RM] in {
- let isReturn = 1 in {
- def TAILBCTR8 : XLForm_2_ext<19, 528, 20, 0, 0, (outs), (ins), "bctr", BrB, []>,
- Requires<[In64BitMode]>;
- }
-
- def BCTR8 : XLForm_2_ext<19, 528, 20, 0, 0, (outs), (ins), "bctr", BrB, []>,
- Requires<[In64BitMode]>;
-}
+ isIndirectBranch = 1, isCall = 1, isReturn = 1, Uses = [CTR8, RM] in
+def TAILBCTR8 : XLForm_2_ext<19, 528, 20, 0, 0, (outs), (ins), "bctr", BrB, []>,
+ Requires<[In64BitMode]>;
let isBranch = 1, isTerminator = 1, hasCtrlDep = 1, PPC970_Unit = 7,
@@ -223,6 +211,8 @@ def TAILBA8 : IForm<18, 0, 0, (outs), (ins aaddr:$dst),
"ba $dst", BrB,
[]>;
+}
+
def : Pat<(PPCtc_return (i64 tglobaladdr:$dst), imm:$imm),
(TCRETURNdi8 tglobaladdr:$dst, imm:$imm)>;
@@ -232,20 +222,13 @@ def : Pat<(PPCtc_return (i64 texternalsym:$dst), imm:$imm),
def : Pat<(PPCtc_return CTRRC8:$dst, imm:$imm),
(TCRETURNri8 CTRRC8:$dst, imm:$imm)>;
-let isBranch = 1, isTerminator = 1, hasCtrlDep = 1, PPC970_Unit = 7 in {
- let Defs = [CTR8], Uses = [CTR8] in {
- def BDZ8 : BForm_1<16, 18, 0, 0, (outs), (ins condbrtarget:$dst),
- "bdz $dst">;
- def BDNZ8 : BForm_1<16, 16, 0, 0, (outs), (ins condbrtarget:$dst),
- "bdnz $dst">;
- }
-}
-// 64-but CR instructions
+// 64-bit CR instructions
def MTCRF8 : XFXForm_5<31, 144, (outs crbitm:$FXM), (ins G8RC:$rS),
"mtcrf $FXM, $rS", BrMCRX>,
PPC970_MicroCode, PPC970_Unit_CRU;
+let isCodeGenOnly = 1 in
def MFCR8pseud: XFXForm_3<31, 19, (outs G8RC:$rT), (ins crbitm:$FXM),
"#MFCR8pseud", SprMFCR>,
PPC970_MicroCode, PPC970_Unit_CRU;
@@ -254,6 +237,18 @@ def MFCR8 : XFXForm_3<31, 19, (outs G8RC:$rT), (ins),
"mfcr $rT", SprMFCR>,
PPC970_MicroCode, PPC970_Unit_CRU;
+let hasSideEffects = 1, isBarrier = 1, usesCustomInserter = 1 in {
+ def EH_SjLj_SetJmp64 : Pseudo<(outs GPRC:$dst), (ins memr:$buf),
+ "#EH_SJLJ_SETJMP64",
+ [(set i32:$dst, (PPCeh_sjlj_setjmp addr:$buf))]>,
+ Requires<[In64BitMode]>;
+ let isTerminator = 1 in
+ def EH_SjLj_LongJmp64 : Pseudo<(outs), (ins memr:$buf),
+ "#EH_SJLJ_LONGJMP64",
+ [(PPCeh_sjlj_longjmp addr:$buf)]>,
+ Requires<[In64BitMode]>;
+}
+
//===----------------------------------------------------------------------===//
// 64-bit SPR manipulation instrs.
@@ -262,13 +257,13 @@ def MFCTR8 : XFXForm_1_ext<31, 339, 9, (outs G8RC:$rT), (ins),
"mfctr $rT", SprMFSPR>,
PPC970_DGroup_First, PPC970_Unit_FXU;
}
-let Pattern = [(PPCmtctr G8RC:$rS)], Defs = [CTR8] in {
+let Pattern = [(PPCmtctr i64:$rS)], Defs = [CTR8] in {
def MTCTR8 : XFXForm_7_ext<31, 467, 9, (outs), (ins G8RC:$rS),
"mtctr $rS", SprMTSPR>,
PPC970_DGroup_First, PPC970_Unit_FXU;
}
-let Pattern = [(set G8RC:$rT, readcyclecounter)] in
+let Pattern = [(set i64:$rT, readcyclecounter)] in
def MFTB8 : XFXForm_1_ext<31, 339, 268, (outs G8RC:$rT), (ins),
"mfspr $rT, 268", SprMFTB>,
PPC970_DGroup_First, PPC970_Unit_FXU;
@@ -279,8 +274,8 @@ def MFTB8 : XFXForm_1_ext<31, 339, 268, (outs G8RC:$rT), (ins),
let Defs = [X1], Uses = [X1] in
def DYNALLOC8 : Pseudo<(outs G8RC:$result), (ins G8RC:$negsize, memri:$fpsi),"#DYNALLOC8",
- [(set G8RC:$result,
- (PPCdynalloc G8RC:$negsize, iaddr:$fpsi))]>;
+ [(set i64:$result,
+ (PPCdynalloc i64:$negsize, iaddr:$fpsi))]>;
let Defs = [LR8] in {
def MTLR8 : XFXForm_7_ext<31, 467, 8, (outs), (ins G8RC:$rS),
@@ -302,126 +297,129 @@ let PPC970_Unit = 1 in { // FXU Operations.
let isReMaterializable = 1, isAsCheapAsAMove = 1, isMoveImm = 1 in {
def LI8 : DForm_2_r0<14, (outs G8RC:$rD), (ins symbolLo64:$imm),
"li $rD, $imm", IntSimple,
- [(set G8RC:$rD, immSExt16:$imm)]>;
+ [(set i64:$rD, immSExt16:$imm)]>;
def LIS8 : DForm_2_r0<15, (outs G8RC:$rD), (ins symbolHi64:$imm),
"lis $rD, $imm", IntSimple,
- [(set G8RC:$rD, imm16ShiftedSExt:$imm)]>;
+ [(set i64:$rD, imm16ShiftedSExt:$imm)]>;
}
// Logical ops.
def NAND8: XForm_6<31, 476, (outs G8RC:$rA), (ins G8RC:$rS, G8RC:$rB),
"nand $rA, $rS, $rB", IntSimple,
- [(set G8RC:$rA, (not (and G8RC:$rS, G8RC:$rB)))]>;
+ [(set i64:$rA, (not (and i64:$rS, i64:$rB)))]>;
def AND8 : XForm_6<31, 28, (outs G8RC:$rA), (ins G8RC:$rS, G8RC:$rB),
"and $rA, $rS, $rB", IntSimple,
- [(set G8RC:$rA, (and G8RC:$rS, G8RC:$rB))]>;
+ [(set i64:$rA, (and i64:$rS, i64:$rB))]>;
def ANDC8: XForm_6<31, 60, (outs G8RC:$rA), (ins G8RC:$rS, G8RC:$rB),
"andc $rA, $rS, $rB", IntSimple,
- [(set G8RC:$rA, (and G8RC:$rS, (not G8RC:$rB)))]>;
+ [(set i64:$rA, (and i64:$rS, (not i64:$rB)))]>;
def OR8 : XForm_6<31, 444, (outs G8RC:$rA), (ins G8RC:$rS, G8RC:$rB),
"or $rA, $rS, $rB", IntSimple,
- [(set G8RC:$rA, (or G8RC:$rS, G8RC:$rB))]>;
+ [(set i64:$rA, (or i64:$rS, i64:$rB))]>;
def NOR8 : XForm_6<31, 124, (outs G8RC:$rA), (ins G8RC:$rS, G8RC:$rB),
"nor $rA, $rS, $rB", IntSimple,
- [(set G8RC:$rA, (not (or G8RC:$rS, G8RC:$rB)))]>;
+ [(set i64:$rA, (not (or i64:$rS, i64:$rB)))]>;
def ORC8 : XForm_6<31, 412, (outs G8RC:$rA), (ins G8RC:$rS, G8RC:$rB),
"orc $rA, $rS, $rB", IntSimple,
- [(set G8RC:$rA, (or G8RC:$rS, (not G8RC:$rB)))]>;
+ [(set i64:$rA, (or i64:$rS, (not i64:$rB)))]>;
def EQV8 : XForm_6<31, 284, (outs G8RC:$rA), (ins G8RC:$rS, G8RC:$rB),
"eqv $rA, $rS, $rB", IntSimple,
- [(set G8RC:$rA, (not (xor G8RC:$rS, G8RC:$rB)))]>;
+ [(set i64:$rA, (not (xor i64:$rS, i64:$rB)))]>;
def XOR8 : XForm_6<31, 316, (outs G8RC:$rA), (ins G8RC:$rS, G8RC:$rB),
"xor $rA, $rS, $rB", IntSimple,
- [(set G8RC:$rA, (xor G8RC:$rS, G8RC:$rB))]>;
+ [(set i64:$rA, (xor i64:$rS, i64:$rB))]>;
// Logical ops with immediate.
def ANDIo8 : DForm_4<28, (outs G8RC:$dst), (ins G8RC:$src1, u16imm:$src2),
"andi. $dst, $src1, $src2", IntGeneral,
- [(set G8RC:$dst, (and G8RC:$src1, immZExt16:$src2))]>,
+ [(set i64:$dst, (and i64:$src1, immZExt16:$src2))]>,
isDOT;
def ANDISo8 : DForm_4<29, (outs G8RC:$dst), (ins G8RC:$src1, u16imm:$src2),
"andis. $dst, $src1, $src2", IntGeneral,
- [(set G8RC:$dst, (and G8RC:$src1,imm16ShiftedZExt:$src2))]>,
+ [(set i64:$dst, (and i64:$src1, imm16ShiftedZExt:$src2))]>,
isDOT;
def ORI8 : DForm_4<24, (outs G8RC:$dst), (ins G8RC:$src1, u16imm:$src2),
"ori $dst, $src1, $src2", IntSimple,
- [(set G8RC:$dst, (or G8RC:$src1, immZExt16:$src2))]>;
+ [(set i64:$dst, (or i64:$src1, immZExt16:$src2))]>;
def ORIS8 : DForm_4<25, (outs G8RC:$dst), (ins G8RC:$src1, u16imm:$src2),
"oris $dst, $src1, $src2", IntSimple,
- [(set G8RC:$dst, (or G8RC:$src1, imm16ShiftedZExt:$src2))]>;
+ [(set i64:$dst, (or i64:$src1, imm16ShiftedZExt:$src2))]>;
def XORI8 : DForm_4<26, (outs G8RC:$dst), (ins G8RC:$src1, u16imm:$src2),
"xori $dst, $src1, $src2", IntSimple,
- [(set G8RC:$dst, (xor G8RC:$src1, immZExt16:$src2))]>;
+ [(set i64:$dst, (xor i64:$src1, immZExt16:$src2))]>;
def XORIS8 : DForm_4<27, (outs G8RC:$dst), (ins G8RC:$src1, u16imm:$src2),
"xoris $dst, $src1, $src2", IntSimple,
- [(set G8RC:$dst, (xor G8RC:$src1, imm16ShiftedZExt:$src2))]>;
+ [(set i64:$dst, (xor i64:$src1, imm16ShiftedZExt:$src2))]>;
def ADD8 : XOForm_1<31, 266, 0, (outs G8RC:$rT), (ins G8RC:$rA, G8RC:$rB),
"add $rT, $rA, $rB", IntSimple,
- [(set G8RC:$rT, (add G8RC:$rA, G8RC:$rB))]>;
+ [(set i64:$rT, (add i64:$rA, i64:$rB))]>;
+// ADD8 has a special form: reg = ADD8(reg, sym@tls) for use by the
+// initial-exec thread-local storage model.
+let isCodeGenOnly = 1 in
+def ADD8TLS : XOForm_1<31, 266, 0, (outs G8RC:$rT), (ins G8RC:$rA, tlsreg:$rB),
+ "add $rT, $rA, $rB@tls", IntSimple,
+ [(set i64:$rT, (add i64:$rA, tglobaltlsaddr:$rB))]>;
let Defs = [CARRY] in {
def ADDC8 : XOForm_1<31, 10, 0, (outs G8RC:$rT), (ins G8RC:$rA, G8RC:$rB),
"addc $rT, $rA, $rB", IntGeneral,
- [(set G8RC:$rT, (addc G8RC:$rA, G8RC:$rB))]>,
+ [(set i64:$rT, (addc i64:$rA, i64:$rB))]>,
PPC970_DGroup_Cracked;
def ADDIC8 : DForm_2<12, (outs G8RC:$rD), (ins G8RC:$rA, s16imm64:$imm),
"addic $rD, $rA, $imm", IntGeneral,
- [(set G8RC:$rD, (addc G8RC:$rA, immSExt16:$imm))]>;
+ [(set i64:$rD, (addc i64:$rA, immSExt16:$imm))]>;
}
-def ADDI8 : DForm_2<14, (outs G8RC:$rD), (ins G8RC:$rA, s16imm64:$imm),
- "addi $rD, $rA, $imm", IntSimple,
- [(set G8RC:$rD, (add G8RC:$rA, immSExt16:$imm))]>;
-def ADDI8L : DForm_2<14, (outs G8RC:$rD), (ins G8RC:$rA, symbolLo64:$imm),
+def ADDI8 : DForm_2<14, (outs G8RC:$rD), (ins G8RC_NOX0:$rA, symbolLo64:$imm),
"addi $rD, $rA, $imm", IntSimple,
- [(set G8RC:$rD, (add G8RC:$rA, immSExt16:$imm))]>;
-def ADDIS8 : DForm_2<15, (outs G8RC:$rD), (ins G8RC:$rA, symbolHi64:$imm),
+ [(set i64:$rD, (add i64:$rA, immSExt16:$imm))]>;
+def ADDIS8 : DForm_2<15, (outs G8RC:$rD), (ins G8RC_NOX0:$rA, symbolHi64:$imm),
"addis $rD, $rA, $imm", IntSimple,
- [(set G8RC:$rD, (add G8RC:$rA, imm16ShiftedSExt:$imm))]>;
+ [(set i64:$rD, (add i64:$rA, imm16ShiftedSExt:$imm))]>;
let Defs = [CARRY] in {
def SUBFIC8: DForm_2< 8, (outs G8RC:$rD), (ins G8RC:$rA, s16imm64:$imm),
"subfic $rD, $rA, $imm", IntGeneral,
- [(set G8RC:$rD, (subc immSExt16:$imm, G8RC:$rA))]>;
+ [(set i64:$rD, (subc immSExt16:$imm, i64:$rA))]>;
def SUBFC8 : XOForm_1<31, 8, 0, (outs G8RC:$rT), (ins G8RC:$rA, G8RC:$rB),
"subfc $rT, $rA, $rB", IntGeneral,
- [(set G8RC:$rT, (subc G8RC:$rB, G8RC:$rA))]>,
+ [(set i64:$rT, (subc i64:$rB, i64:$rA))]>,
PPC970_DGroup_Cracked;
}
def SUBF8 : XOForm_1<31, 40, 0, (outs G8RC:$rT), (ins G8RC:$rA, G8RC:$rB),
"subf $rT, $rA, $rB", IntGeneral,
- [(set G8RC:$rT, (sub G8RC:$rB, G8RC:$rA))]>;
+ [(set i64:$rT, (sub i64:$rB, i64:$rA))]>;
def NEG8 : XOForm_3<31, 104, 0, (outs G8RC:$rT), (ins G8RC:$rA),
"neg $rT, $rA", IntSimple,
- [(set G8RC:$rT, (ineg G8RC:$rA))]>;
+ [(set i64:$rT, (ineg i64:$rA))]>;
let Uses = [CARRY], Defs = [CARRY] in {
def ADDE8 : XOForm_1<31, 138, 0, (outs G8RC:$rT), (ins G8RC:$rA, G8RC:$rB),
"adde $rT, $rA, $rB", IntGeneral,
- [(set G8RC:$rT, (adde G8RC:$rA, G8RC:$rB))]>;
+ [(set i64:$rT, (adde i64:$rA, i64:$rB))]>;
def ADDME8 : XOForm_3<31, 234, 0, (outs G8RC:$rT), (ins G8RC:$rA),
"addme $rT, $rA", IntGeneral,
- [(set G8RC:$rT, (adde G8RC:$rA, -1))]>;
+ [(set i64:$rT, (adde i64:$rA, -1))]>;
def ADDZE8 : XOForm_3<31, 202, 0, (outs G8RC:$rT), (ins G8RC:$rA),
"addze $rT, $rA", IntGeneral,
- [(set G8RC:$rT, (adde G8RC:$rA, 0))]>;
+ [(set i64:$rT, (adde i64:$rA, 0))]>;
def SUBFE8 : XOForm_1<31, 136, 0, (outs G8RC:$rT), (ins G8RC:$rA, G8RC:$rB),
"subfe $rT, $rA, $rB", IntGeneral,
- [(set G8RC:$rT, (sube G8RC:$rB, G8RC:$rA))]>;
+ [(set i64:$rT, (sube i64:$rB, i64:$rA))]>;
def SUBFME8 : XOForm_3<31, 232, 0, (outs G8RC:$rT), (ins G8RC:$rA),
"subfme $rT, $rA", IntGeneral,
- [(set G8RC:$rT, (sube -1, G8RC:$rA))]>;
+ [(set i64:$rT, (sube -1, i64:$rA))]>;
def SUBFZE8 : XOForm_3<31, 200, 0, (outs G8RC:$rT), (ins G8RC:$rA),
"subfze $rT, $rA", IntGeneral,
- [(set G8RC:$rT, (sube 0, G8RC:$rA))]>;
+ [(set i64:$rT, (sube 0, i64:$rA))]>;
}
def MULHD : XOForm_1<31, 73, 0, (outs G8RC:$rT), (ins G8RC:$rA, G8RC:$rB),
"mulhd $rT, $rA, $rB", IntMulHW,
- [(set G8RC:$rT, (mulhs G8RC:$rA, G8RC:$rB))]>;
+ [(set i64:$rT, (mulhs i64:$rA, i64:$rB))]>;
def MULHDU : XOForm_1<31, 9, 0, (outs G8RC:$rT), (ins G8RC:$rA, G8RC:$rB),
"mulhdu $rT, $rA, $rB", IntMulHWU,
- [(set G8RC:$rT, (mulhu G8RC:$rA, G8RC:$rB))]>;
+ [(set i64:$rT, (mulhu i64:$rA, i64:$rB))]>;
def CMPD : XForm_16_ext<31, 0, (outs CRRC:$crD), (ins G8RC:$rA, G8RC:$rB),
"cmpd $crD, $rA, $rB", IntCompare>, isPPC64;
@@ -434,54 +432,60 @@ def CMPLDI : DForm_6_ext<10, (outs CRRC:$dst), (ins G8RC:$src1, u16imm:$src2),
def SLD : XForm_6<31, 27, (outs G8RC:$rA), (ins G8RC:$rS, GPRC:$rB),
"sld $rA, $rS, $rB", IntRotateD,
- [(set G8RC:$rA, (PPCshl G8RC:$rS, GPRC:$rB))]>, isPPC64;
+ [(set i64:$rA, (PPCshl i64:$rS, i32:$rB))]>, isPPC64;
def SRD : XForm_6<31, 539, (outs G8RC:$rA), (ins G8RC:$rS, GPRC:$rB),
"srd $rA, $rS, $rB", IntRotateD,
- [(set G8RC:$rA, (PPCsrl G8RC:$rS, GPRC:$rB))]>, isPPC64;
+ [(set i64:$rA, (PPCsrl i64:$rS, i32:$rB))]>, isPPC64;
let Defs = [CARRY] in {
def SRAD : XForm_6<31, 794, (outs G8RC:$rA), (ins G8RC:$rS, GPRC:$rB),
"srad $rA, $rS, $rB", IntRotateD,
- [(set G8RC:$rA, (PPCsra G8RC:$rS, GPRC:$rB))]>, isPPC64;
+ [(set i64:$rA, (PPCsra i64:$rS, i32:$rB))]>, isPPC64;
}
def EXTSB8 : XForm_11<31, 954, (outs G8RC:$rA), (ins G8RC:$rS),
"extsb $rA, $rS", IntSimple,
- [(set G8RC:$rA, (sext_inreg G8RC:$rS, i8))]>;
+ [(set i64:$rA, (sext_inreg i64:$rS, i8))]>;
def EXTSH8 : XForm_11<31, 922, (outs G8RC:$rA), (ins G8RC:$rS),
"extsh $rA, $rS", IntSimple,
- [(set G8RC:$rA, (sext_inreg G8RC:$rS, i16))]>;
+ [(set i64:$rA, (sext_inreg i64:$rS, i16))]>;
def EXTSW : XForm_11<31, 986, (outs G8RC:$rA), (ins G8RC:$rS),
"extsw $rA, $rS", IntSimple,
- [(set G8RC:$rA, (sext_inreg G8RC:$rS, i32))]>, isPPC64;
-/// EXTSW_32 - Just like EXTSW, but works on '32-bit' registers.
-def EXTSW_32 : XForm_11<31, 986, (outs GPRC:$rA), (ins GPRC:$rS),
- "extsw $rA, $rS", IntSimple,
- [(set GPRC:$rA, (PPCextsw_32 GPRC:$rS))]>, isPPC64;
+ [(set i64:$rA, (sext_inreg i64:$rS, i32))]>, isPPC64;
def EXTSW_32_64 : XForm_11<31, 986, (outs G8RC:$rA), (ins GPRC:$rS),
"extsw $rA, $rS", IntSimple,
- [(set G8RC:$rA, (sext GPRC:$rS))]>, isPPC64;
+ [(set i64:$rA, (sext i32:$rS))]>, isPPC64;
let Defs = [CARRY] in {
def SRADI : XSForm_1<31, 413, (outs G8RC:$rA), (ins G8RC:$rS, u6imm:$SH),
"sradi $rA, $rS, $SH", IntRotateDI,
- [(set G8RC:$rA, (sra G8RC:$rS, (i32 imm:$SH)))]>, isPPC64;
+ [(set i64:$rA, (sra i64:$rS, (i32 imm:$SH)))]>, isPPC64;
}
def CNTLZD : XForm_11<31, 58, (outs G8RC:$rA), (ins G8RC:$rS),
"cntlzd $rA, $rS", IntGeneral,
- [(set G8RC:$rA, (ctlz G8RC:$rS))]>;
+ [(set i64:$rA, (ctlz i64:$rS))]>;
+def POPCNTD : XForm_11<31, 506, (outs G8RC:$rA), (ins G8RC:$rS),
+ "popcntd $rA, $rS", IntGeneral,
+ [(set i64:$rA, (ctpop i64:$rS))]>;
+
+// popcntw also does a population count on the high 32 bits (storing the
+// results in the high 32-bits of the output). We'll ignore that here (which is
+// safe because we never separately use the high part of the 64-bit registers).
+def POPCNTW : XForm_11<31, 378, (outs GPRC:$rA), (ins GPRC:$rS),
+ "popcntw $rA, $rS", IntGeneral,
+ [(set i32:$rA, (ctpop i32:$rS))]>;
def DIVD : XOForm_1<31, 489, 0, (outs G8RC:$rT), (ins G8RC:$rA, G8RC:$rB),
"divd $rT, $rA, $rB", IntDivD,
- [(set G8RC:$rT, (sdiv G8RC:$rA, G8RC:$rB))]>, isPPC64,
+ [(set i64:$rT, (sdiv i64:$rA, i64:$rB))]>, isPPC64,
PPC970_DGroup_First, PPC970_DGroup_Cracked;
def DIVDU : XOForm_1<31, 457, 0, (outs G8RC:$rT), (ins G8RC:$rA, G8RC:$rB),
"divdu $rT, $rA, $rB", IntDivD,
- [(set G8RC:$rT, (udiv G8RC:$rA, G8RC:$rB))]>, isPPC64,
+ [(set i64:$rT, (udiv i64:$rA, i64:$rB))]>, isPPC64,
PPC970_DGroup_First, PPC970_DGroup_Cracked;
def MULLD : XOForm_1<31, 233, 0, (outs G8RC:$rT), (ins G8RC:$rA, G8RC:$rB),
"mulld $rT, $rA, $rB", IntMulHD,
- [(set G8RC:$rT, (mul G8RC:$rA, G8RC:$rB))]>, isPPC64;
+ [(set i64:$rT, (mul i64:$rA, i64:$rB))]>, isPPC64;
let isCommutable = 1 in {
@@ -512,7 +516,7 @@ def RLWINM8 : MForm_2<21,
[]>;
def ISEL8 : AForm_4<31, 15,
- (outs G8RC:$rT), (ins G8RC:$rA, G8RC:$rB, pred:$cond),
+ (outs G8RC:$rT), (ins G8RC_NOX0:$rA, G8RC:$rB, CRBITRC:$cond),
"isel $rT, $rA, $rB, $cond", IntGeneral,
[]>;
} // End FXU Operations.
@@ -527,94 +531,96 @@ def ISEL8 : AForm_4<31, 15,
let canFoldAsLoad = 1, PPC970_Unit = 2 in {
def LHA8: DForm_1<42, (outs G8RC:$rD), (ins memri:$src),
"lha $rD, $src", LdStLHA,
- [(set G8RC:$rD, (sextloadi16 iaddr:$src))]>,
+ [(set i64:$rD, (sextloadi16 iaddr:$src))]>,
PPC970_DGroup_Cracked;
def LWA : DSForm_1<58, 2, (outs G8RC:$rD), (ins memrix:$src),
"lwa $rD, $src", LdStLWA,
- [(set G8RC:$rD, (sextloadi32 ixaddr:$src))]>, isPPC64,
+ [(set i64:$rD,
+ (aligned4sextloadi32 ixaddr:$src))]>, isPPC64,
PPC970_DGroup_Cracked;
def LHAX8: XForm_1<31, 343, (outs G8RC:$rD), (ins memrr:$src),
"lhax $rD, $src", LdStLHA,
- [(set G8RC:$rD, (sextloadi16 xaddr:$src))]>,
+ [(set i64:$rD, (sextloadi16 xaddr:$src))]>,
PPC970_DGroup_Cracked;
def LWAX : XForm_1<31, 341, (outs G8RC:$rD), (ins memrr:$src),
"lwax $rD, $src", LdStLHA,
- [(set G8RC:$rD, (sextloadi32 xaddr:$src))]>, isPPC64,
+ [(set i64:$rD, (sextloadi32 xaddr:$src))]>, isPPC64,
PPC970_DGroup_Cracked;
// Update forms.
-let mayLoad = 1 in
-def LHAU8 : DForm_1a<43, (outs G8RC:$rD, ptr_rc:$ea_result), (ins symbolLo:$disp,
- ptr_rc:$rA),
- "lhau $rD, $disp($rA)", LdStLHAU,
- []>, RegConstraint<"$rA = $ea_result">,
+let mayLoad = 1 in {
+def LHAU8 : DForm_1<43, (outs G8RC:$rD, ptr_rc_nor0:$ea_result),
+ (ins memri:$addr),
+ "lhau $rD, $addr", LdStLHAU,
+ []>, RegConstraint<"$addr.reg = $ea_result">,
NoEncode<"$ea_result">;
// NO LWAU!
-def LHAUX8 : XForm_1<31, 375, (outs G8RC:$rD, ptr_rc:$ea_result),
+def LHAUX8 : XForm_1<31, 375, (outs G8RC:$rD, ptr_rc_nor0:$ea_result),
(ins memrr:$addr),
"lhaux $rD, $addr", LdStLHAU,
- []>, RegConstraint<"$addr.offreg = $ea_result">,
+ []>, RegConstraint<"$addr.ptrreg = $ea_result">,
NoEncode<"$ea_result">;
-def LWAUX : XForm_1<31, 373, (outs G8RC:$rD, ptr_rc:$ea_result),
+def LWAUX : XForm_1<31, 373, (outs G8RC:$rD, ptr_rc_nor0:$ea_result),
(ins memrr:$addr),
"lwaux $rD, $addr", LdStLHAU,
- []>, RegConstraint<"$addr.offreg = $ea_result">,
+ []>, RegConstraint<"$addr.ptrreg = $ea_result">,
NoEncode<"$ea_result">, isPPC64;
}
+}
// Zero extending loads.
let canFoldAsLoad = 1, PPC970_Unit = 2 in {
def LBZ8 : DForm_1<34, (outs G8RC:$rD), (ins memri:$src),
"lbz $rD, $src", LdStLoad,
- [(set G8RC:$rD, (zextloadi8 iaddr:$src))]>;
+ [(set i64:$rD, (zextloadi8 iaddr:$src))]>;
def LHZ8 : DForm_1<40, (outs G8RC:$rD), (ins memri:$src),
"lhz $rD, $src", LdStLoad,
- [(set G8RC:$rD, (zextloadi16 iaddr:$src))]>;
+ [(set i64:$rD, (zextloadi16 iaddr:$src))]>;
def LWZ8 : DForm_1<32, (outs G8RC:$rD), (ins memri:$src),
"lwz $rD, $src", LdStLoad,
- [(set G8RC:$rD, (zextloadi32 iaddr:$src))]>, isPPC64;
+ [(set i64:$rD, (zextloadi32 iaddr:$src))]>, isPPC64;
def LBZX8 : XForm_1<31, 87, (outs G8RC:$rD), (ins memrr:$src),
"lbzx $rD, $src", LdStLoad,
- [(set G8RC:$rD, (zextloadi8 xaddr:$src))]>;
+ [(set i64:$rD, (zextloadi8 xaddr:$src))]>;
def LHZX8 : XForm_1<31, 279, (outs G8RC:$rD), (ins memrr:$src),
"lhzx $rD, $src", LdStLoad,
- [(set G8RC:$rD, (zextloadi16 xaddr:$src))]>;
+ [(set i64:$rD, (zextloadi16 xaddr:$src))]>;
def LWZX8 : XForm_1<31, 23, (outs G8RC:$rD), (ins memrr:$src),
"lwzx $rD, $src", LdStLoad,
- [(set G8RC:$rD, (zextloadi32 xaddr:$src))]>;
+ [(set i64:$rD, (zextloadi32 xaddr:$src))]>;
// Update forms.
let mayLoad = 1 in {
-def LBZU8 : DForm_1<35, (outs G8RC:$rD, ptr_rc:$ea_result), (ins memri:$addr),
+def LBZU8 : DForm_1<35, (outs G8RC:$rD, ptr_rc_nor0:$ea_result), (ins memri:$addr),
"lbzu $rD, $addr", LdStLoadUpd,
[]>, RegConstraint<"$addr.reg = $ea_result">,
NoEncode<"$ea_result">;
-def LHZU8 : DForm_1<41, (outs G8RC:$rD, ptr_rc:$ea_result), (ins memri:$addr),
+def LHZU8 : DForm_1<41, (outs G8RC:$rD, ptr_rc_nor0:$ea_result), (ins memri:$addr),
"lhzu $rD, $addr", LdStLoadUpd,
[]>, RegConstraint<"$addr.reg = $ea_result">,
NoEncode<"$ea_result">;
-def LWZU8 : DForm_1<33, (outs G8RC:$rD, ptr_rc:$ea_result), (ins memri:$addr),
+def LWZU8 : DForm_1<33, (outs G8RC:$rD, ptr_rc_nor0:$ea_result), (ins memri:$addr),
"lwzu $rD, $addr", LdStLoadUpd,
[]>, RegConstraint<"$addr.reg = $ea_result">,
NoEncode<"$ea_result">;
-def LBZUX8 : XForm_1<31, 119, (outs G8RC:$rD, ptr_rc:$ea_result),
+def LBZUX8 : XForm_1<31, 119, (outs G8RC:$rD, ptr_rc_nor0:$ea_result),
(ins memrr:$addr),
"lbzux $rD, $addr", LdStLoadUpd,
- []>, RegConstraint<"$addr.offreg = $ea_result">,
+ []>, RegConstraint<"$addr.ptrreg = $ea_result">,
NoEncode<"$ea_result">;
-def LHZUX8 : XForm_1<31, 311, (outs G8RC:$rD, ptr_rc:$ea_result),
+def LHZUX8 : XForm_1<31, 311, (outs G8RC:$rD, ptr_rc_nor0:$ea_result),
(ins memrr:$addr),
"lhzux $rD, $addr", LdStLoadUpd,
- []>, RegConstraint<"$addr.offreg = $ea_result">,
+ []>, RegConstraint<"$addr.ptrreg = $ea_result">,
NoEncode<"$ea_result">;
-def LWZUX8 : XForm_1<31, 55, (outs G8RC:$rD, ptr_rc:$ea_result),
+def LWZUX8 : XForm_1<31, 55, (outs G8RC:$rD, ptr_rc_nor0:$ea_result),
(ins memrr:$addr),
"lwzux $rD, $addr", LdStLoadUpd,
- []>, RegConstraint<"$addr.offreg = $ea_result">,
+ []>, RegConstraint<"$addr.ptrreg = $ea_result">,
NoEncode<"$ea_result">;
}
}
@@ -624,25 +630,28 @@ def LWZUX8 : XForm_1<31, 55, (outs G8RC:$rD, ptr_rc:$ea_result),
let canFoldAsLoad = 1, PPC970_Unit = 2 in {
def LD : DSForm_1<58, 0, (outs G8RC:$rD), (ins memrix:$src),
"ld $rD, $src", LdStLD,
- [(set G8RC:$rD, (load ixaddr:$src))]>, isPPC64;
+ [(set i64:$rD, (aligned4load ixaddr:$src))]>, isPPC64;
+// The following three definitions are selected for small code model only.
+// Otherwise, we need to create two instructions to form a 32-bit offset,
+// so we have a custom matcher for TOC_ENTRY in PPCDAGToDAGIsel::Select().
def LDtoc: Pseudo<(outs G8RC:$rD), (ins tocentry:$disp, G8RC:$reg),
"#LDtoc",
- [(set G8RC:$rD,
- (PPCtoc_entry tglobaladdr:$disp, G8RC:$reg))]>, isPPC64;
+ [(set i64:$rD,
+ (PPCtoc_entry tglobaladdr:$disp, i64:$reg))]>, isPPC64;
def LDtocJTI: Pseudo<(outs G8RC:$rD), (ins tocentry:$disp, G8RC:$reg),
"#LDtocJTI",
- [(set G8RC:$rD,
- (PPCtoc_entry tjumptable:$disp, G8RC:$reg))]>, isPPC64;
+ [(set i64:$rD,
+ (PPCtoc_entry tjumptable:$disp, i64:$reg))]>, isPPC64;
def LDtocCPT: Pseudo<(outs G8RC:$rD), (ins tocentry:$disp, G8RC:$reg),
"#LDtocCPT",
- [(set G8RC:$rD,
- (PPCtoc_entry tconstpool:$disp, G8RC:$reg))]>, isPPC64;
+ [(set i64:$rD,
+ (PPCtoc_entry tconstpool:$disp, i64:$reg))]>, isPPC64;
-let hasSideEffects = 1 in {
+let hasSideEffects = 1, isCodeGenOnly = 1 in {
let RST = 2, DS = 2 in
def LDinto_toc: DSForm_1a<58, 0, (outs), (ins G8RC:$reg),
"ld 2, 8($reg)", LdStLD,
- [(PPCload_toc G8RC:$reg)]>, isPPC64;
+ [(PPCload_toc i64:$reg)]>, isPPC64;
let RST = 2, DS = 10, RA = 1 in
def LDtoc_restore : DSForm_1a<58, 0, (outs), (ins),
@@ -651,18 +660,21 @@ def LDtoc_restore : DSForm_1a<58, 0, (outs), (ins),
}
def LDX : XForm_1<31, 21, (outs G8RC:$rD), (ins memrr:$src),
"ldx $rD, $src", LdStLD,
- [(set G8RC:$rD, (load xaddr:$src))]>, isPPC64;
-
+ [(set i64:$rD, (load xaddr:$src))]>, isPPC64;
+def LDBRX : XForm_1<31, 532, (outs G8RC:$rD), (ins memrr:$src),
+ "ldbrx $rD, $src", LdStLoad,
+ [(set i64:$rD, (PPClbrx xoaddr:$src, i64))]>, isPPC64;
+
let mayLoad = 1 in
-def LDU : DSForm_1<58, 1, (outs G8RC:$rD, ptr_rc:$ea_result), (ins memrix:$addr),
+def LDU : DSForm_1<58, 1, (outs G8RC:$rD, ptr_rc_nor0:$ea_result), (ins memrix:$addr),
"ldu $rD, $addr", LdStLDU,
[]>, RegConstraint<"$addr.reg = $ea_result">, isPPC64,
NoEncode<"$ea_result">;
-def LDUX : XForm_1<31, 53, (outs G8RC:$rD, ptr_rc:$ea_result),
+def LDUX : XForm_1<31, 53, (outs G8RC:$rD, ptr_rc_nor0:$ea_result),
(ins memrr:$addr),
"ldux $rD, $addr", LdStLDU,
- []>, RegConstraint<"$addr.offreg = $ea_result">,
+ []>, RegConstraint<"$addr.ptrreg = $ea_result">,
NoEncode<"$ea_result">, isPPC64;
}
@@ -671,118 +683,168 @@ def : Pat<(PPCload ixaddr:$src),
def : Pat<(PPCload xaddr:$src),
(LDX xaddr:$src)>;
+// Support for medium and large code model.
+def ADDIStocHA: Pseudo<(outs G8RC:$rD), (ins G8RC_NOX0:$reg, tocentry:$disp),
+ "#ADDIStocHA",
+ [(set i64:$rD,
+ (PPCaddisTocHA i64:$reg, tglobaladdr:$disp))]>,
+ isPPC64;
+def LDtocL: Pseudo<(outs G8RC:$rD), (ins tocentry:$disp, G8RC_NOX0:$reg),
+ "#LDtocL",
+ [(set i64:$rD,
+ (PPCldTocL tglobaladdr:$disp, i64:$reg))]>, isPPC64;
+def ADDItocL: Pseudo<(outs G8RC:$rD), (ins G8RC_NOX0:$reg, tocentry:$disp),
+ "#ADDItocL",
+ [(set i64:$rD,
+ (PPCaddiTocL i64:$reg, tglobaladdr:$disp))]>, isPPC64;
+
+// Support for thread-local storage.
+def ADDISgotTprelHA: Pseudo<(outs G8RC:$rD), (ins G8RC_NOX0:$reg, symbolHi64:$disp),
+ "#ADDISgotTprelHA",
+ [(set i64:$rD,
+ (PPCaddisGotTprelHA i64:$reg,
+ tglobaltlsaddr:$disp))]>,
+ isPPC64;
+def LDgotTprelL: Pseudo<(outs G8RC:$rD), (ins symbolLo64:$disp, G8RC_NOX0:$reg),
+ "#LDgotTprelL",
+ [(set i64:$rD,
+ (PPCldGotTprelL tglobaltlsaddr:$disp, i64:$reg))]>,
+ isPPC64;
+def : Pat<(PPCaddTls i64:$in, tglobaltlsaddr:$g),
+ (ADD8TLS $in, tglobaltlsaddr:$g)>;
+def ADDIStlsgdHA: Pseudo<(outs G8RC:$rD), (ins G8RC_NOX0:$reg, symbolHi64:$disp),
+ "#ADDIStlsgdHA",
+ [(set i64:$rD,
+ (PPCaddisTlsgdHA i64:$reg, tglobaltlsaddr:$disp))]>,
+ isPPC64;
+def ADDItlsgdL : Pseudo<(outs G8RC:$rD), (ins G8RC_NOX0:$reg, symbolLo64:$disp),
+ "#ADDItlsgdL",
+ [(set i64:$rD,
+ (PPCaddiTlsgdL i64:$reg, tglobaltlsaddr:$disp))]>,
+ isPPC64;
+def GETtlsADDR : Pseudo<(outs G8RC:$rD), (ins G8RC:$reg, tlsgd:$sym),
+ "#GETtlsADDR",
+ [(set i64:$rD,
+ (PPCgetTlsAddr i64:$reg, tglobaltlsaddr:$sym))]>,
+ isPPC64;
+def ADDIStlsldHA: Pseudo<(outs G8RC:$rD), (ins G8RC_NOX0:$reg, symbolHi64:$disp),
+ "#ADDIStlsldHA",
+ [(set i64:$rD,
+ (PPCaddisTlsldHA i64:$reg, tglobaltlsaddr:$disp))]>,
+ isPPC64;
+def ADDItlsldL : Pseudo<(outs G8RC:$rD), (ins G8RC_NOX0:$reg, symbolLo64:$disp),
+ "#ADDItlsldL",
+ [(set i64:$rD,
+ (PPCaddiTlsldL i64:$reg, tglobaltlsaddr:$disp))]>,
+ isPPC64;
+def GETtlsldADDR : Pseudo<(outs G8RC:$rD), (ins G8RC:$reg, tlsgd:$sym),
+ "#GETtlsldADDR",
+ [(set i64:$rD,
+ (PPCgetTlsldAddr i64:$reg, tglobaltlsaddr:$sym))]>,
+ isPPC64;
+def ADDISdtprelHA: Pseudo<(outs G8RC:$rD), (ins G8RC_NOX0:$reg, symbolHi64:$disp),
+ "#ADDISdtprelHA",
+ [(set i64:$rD,
+ (PPCaddisDtprelHA i64:$reg,
+ tglobaltlsaddr:$disp))]>,
+ isPPC64;
+def ADDIdtprelL : Pseudo<(outs G8RC:$rD), (ins G8RC_NOX0:$reg, symbolLo64:$disp),
+ "#ADDIdtprelL",
+ [(set i64:$rD,
+ (PPCaddiDtprelL i64:$reg, tglobaltlsaddr:$disp))]>,
+ isPPC64;
+
let PPC970_Unit = 2 in {
// Truncating stores.
def STB8 : DForm_1<38, (outs), (ins G8RC:$rS, memri:$src),
"stb $rS, $src", LdStStore,
- [(truncstorei8 G8RC:$rS, iaddr:$src)]>;
+ [(truncstorei8 i64:$rS, iaddr:$src)]>;
def STH8 : DForm_1<44, (outs), (ins G8RC:$rS, memri:$src),
"sth $rS, $src", LdStStore,
- [(truncstorei16 G8RC:$rS, iaddr:$src)]>;
+ [(truncstorei16 i64:$rS, iaddr:$src)]>;
def STW8 : DForm_1<36, (outs), (ins G8RC:$rS, memri:$src),
"stw $rS, $src", LdStStore,
- [(truncstorei32 G8RC:$rS, iaddr:$src)]>;
+ [(truncstorei32 i64:$rS, iaddr:$src)]>;
def STBX8 : XForm_8<31, 215, (outs), (ins G8RC:$rS, memrr:$dst),
"stbx $rS, $dst", LdStStore,
- [(truncstorei8 G8RC:$rS, xaddr:$dst)]>,
+ [(truncstorei8 i64:$rS, xaddr:$dst)]>,
PPC970_DGroup_Cracked;
def STHX8 : XForm_8<31, 407, (outs), (ins G8RC:$rS, memrr:$dst),
"sthx $rS, $dst", LdStStore,
- [(truncstorei16 G8RC:$rS, xaddr:$dst)]>,
+ [(truncstorei16 i64:$rS, xaddr:$dst)]>,
PPC970_DGroup_Cracked;
def STWX8 : XForm_8<31, 151, (outs), (ins G8RC:$rS, memrr:$dst),
"stwx $rS, $dst", LdStStore,
- [(truncstorei32 G8RC:$rS, xaddr:$dst)]>,
+ [(truncstorei32 i64:$rS, xaddr:$dst)]>,
PPC970_DGroup_Cracked;
// Normal 8-byte stores.
def STD : DSForm_1<62, 0, (outs), (ins G8RC:$rS, memrix:$dst),
"std $rS, $dst", LdStSTD,
- [(store G8RC:$rS, ixaddr:$dst)]>, isPPC64;
+ [(aligned4store i64:$rS, ixaddr:$dst)]>, isPPC64;
def STDX : XForm_8<31, 149, (outs), (ins G8RC:$rS, memrr:$dst),
"stdx $rS, $dst", LdStSTD,
- [(store G8RC:$rS, xaddr:$dst)]>, isPPC64,
+ [(store i64:$rS, xaddr:$dst)]>, isPPC64,
+ PPC970_DGroup_Cracked;
+def STDBRX: XForm_8<31, 660, (outs), (ins G8RC:$rS, memrr:$dst),
+ "stdbrx $rS, $dst", LdStStore,
+ [(PPCstbrx i64:$rS, xoaddr:$dst, i64)]>, isPPC64,
PPC970_DGroup_Cracked;
}
-let PPC970_Unit = 2 in {
-
-def STBU8 : DForm_1a<39, (outs ptr_rc:$ea_res), (ins G8RC:$rS,
- symbolLo:$ptroff, ptr_rc:$ptrreg),
- "stbu $rS, $ptroff($ptrreg)", LdStStoreUpd,
- [(set ptr_rc:$ea_res,
- (pre_truncsti8 G8RC:$rS, ptr_rc:$ptrreg,
- iaddroff:$ptroff))]>,
- RegConstraint<"$ptrreg = $ea_res">, NoEncode<"$ea_res">;
-def STHU8 : DForm_1a<45, (outs ptr_rc:$ea_res), (ins G8RC:$rS,
- symbolLo:$ptroff, ptr_rc:$ptrreg),
- "sthu $rS, $ptroff($ptrreg)", LdStStoreUpd,
- [(set ptr_rc:$ea_res,
- (pre_truncsti16 G8RC:$rS, ptr_rc:$ptrreg,
- iaddroff:$ptroff))]>,
- RegConstraint<"$ptrreg = $ea_res">, NoEncode<"$ea_res">;
-
-def STWU8 : DForm_1a<37, (outs ptr_rc:$ea_res), (ins G8RC:$rS,
- symbolLo:$ptroff, ptr_rc:$ptrreg),
- "stwu $rS, $ptroff($ptrreg)", LdStStoreUpd,
- [(set ptr_rc:$ea_res,
- (pre_truncsti32 G8RC:$rS, ptr_rc:$ptrreg,
- iaddroff:$ptroff))]>,
- RegConstraint<"$ptrreg = $ea_res">, NoEncode<"$ea_res">;
-
-def STDU : DSForm_1a<62, 1, (outs ptr_rc:$ea_res), (ins G8RC:$rS,
- s16immX4:$ptroff, ptr_rc:$ptrreg),
- "stdu $rS, $ptroff($ptrreg)", LdStSTDU,
- [(set ptr_rc:$ea_res, (pre_store G8RC:$rS, ptr_rc:$ptrreg,
- iaddroff:$ptroff))]>,
- RegConstraint<"$ptrreg = $ea_res">, NoEncode<"$ea_res">,
- isPPC64;
-
-
-def STBUX8 : XForm_8<31, 247, (outs ptr_rc:$ea_res),
- (ins G8RC:$rS, ptr_rc:$ptroff, ptr_rc:$ptrreg),
- "stbux $rS, $ptroff, $ptrreg", LdStStoreUpd,
- [(set ptr_rc:$ea_res,
- (pre_truncsti8 G8RC:$rS,
- ptr_rc:$ptrreg, xaddroff:$ptroff))]>,
- RegConstraint<"$ptroff = $ea_res">, NoEncode<"$ea_res">,
+// Stores with Update (pre-inc).
+let PPC970_Unit = 2, mayStore = 1 in {
+def STBU8 : DForm_1<39, (outs ptr_rc_nor0:$ea_res), (ins G8RC:$rS, memri:$dst),
+ "stbu $rS, $dst", LdStStoreUpd, []>,
+ RegConstraint<"$dst.reg = $ea_res">, NoEncode<"$ea_res">;
+def STHU8 : DForm_1<45, (outs ptr_rc_nor0:$ea_res), (ins G8RC:$rS, memri:$dst),
+ "sthu $rS, $dst", LdStStoreUpd, []>,
+ RegConstraint<"$dst.reg = $ea_res">, NoEncode<"$ea_res">;
+def STWU8 : DForm_1<37, (outs ptr_rc_nor0:$ea_res), (ins G8RC:$rS, memri:$dst),
+ "stwu $rS, $dst", LdStStoreUpd, []>,
+ RegConstraint<"$dst.reg = $ea_res">, NoEncode<"$ea_res">;
+def STDU : DSForm_1<62, 1, (outs ptr_rc_nor0:$ea_res), (ins G8RC:$rS, memrix:$dst),
+ "stdu $rS, $dst", LdStSTDU, []>,
+ RegConstraint<"$dst.reg = $ea_res">, NoEncode<"$ea_res">,
+ isPPC64;
+
+def STBUX8: XForm_8<31, 247, (outs ptr_rc_nor0:$ea_res), (ins G8RC:$rS, memrr:$dst),
+ "stbux $rS, $dst", LdStStoreUpd, []>,
+ RegConstraint<"$dst.ptrreg = $ea_res">, NoEncode<"$ea_res">,
PPC970_DGroup_Cracked;
-
-def STHUX8 : XForm_8<31, 439, (outs ptr_rc:$ea_res),
- (ins G8RC:$rS, ptr_rc:$ptroff, ptr_rc:$ptrreg),
- "sthux $rS, $ptroff, $ptrreg", LdStStoreUpd,
- [(set ptr_rc:$ea_res,
- (pre_truncsti16 G8RC:$rS,
- ptr_rc:$ptrreg, xaddroff:$ptroff))]>,
- RegConstraint<"$ptroff = $ea_res">, NoEncode<"$ea_res">,
+def STHUX8: XForm_8<31, 439, (outs ptr_rc_nor0:$ea_res), (ins G8RC:$rS, memrr:$dst),
+ "sthux $rS, $dst", LdStStoreUpd, []>,
+ RegConstraint<"$dst.ptrreg = $ea_res">, NoEncode<"$ea_res">,
PPC970_DGroup_Cracked;
-
-def STWUX8 : XForm_8<31, 183, (outs ptr_rc:$ea_res),
- (ins G8RC:$rS, ptr_rc:$ptroff, ptr_rc:$ptrreg),
- "stwux $rS, $ptroff, $ptrreg", LdStStoreUpd,
- [(set ptr_rc:$ea_res,
- (pre_truncsti32 G8RC:$rS,
- ptr_rc:$ptrreg, xaddroff:$ptroff))]>,
- RegConstraint<"$ptroff = $ea_res">, NoEncode<"$ea_res">,
+def STWUX8: XForm_8<31, 183, (outs ptr_rc_nor0:$ea_res), (ins G8RC:$rS, memrr:$dst),
+ "stwux $rS, $dst", LdStStoreUpd, []>,
+ RegConstraint<"$dst.ptrreg = $ea_res">, NoEncode<"$ea_res">,
PPC970_DGroup_Cracked;
-
-def STDUX : XForm_8<31, 181, (outs ptr_rc:$ea_res),
- (ins G8RC:$rS, ptr_rc:$ptroff, ptr_rc:$ptrreg),
- "stdux $rS, $ptroff, $ptrreg", LdStSTDU,
- [(set ptr_rc:$ea_res,
- (pre_store G8RC:$rS, ptr_rc:$ptrreg, xaddroff:$ptroff))]>,
- RegConstraint<"$ptroff = $ea_res">, NoEncode<"$ea_res">,
+def STDUX : XForm_8<31, 181, (outs ptr_rc_nor0:$ea_res), (ins G8RC:$rS, memrr:$dst),
+ "stdux $rS, $dst", LdStSTDU, []>,
+ RegConstraint<"$dst.ptrreg = $ea_res">, NoEncode<"$ea_res">,
PPC970_DGroup_Cracked, isPPC64;
-
-// STD_32/STDX_32 - Just like STD/STDX, but uses a '32-bit' input register.
-def STD_32 : DSForm_1<62, 0, (outs), (ins GPRC:$rT, memrix:$dst),
- "std $rT, $dst", LdStSTD,
- [(PPCstd_32 GPRC:$rT, ixaddr:$dst)]>, isPPC64;
-def STDX_32 : XForm_8<31, 149, (outs), (ins GPRC:$rT, memrr:$dst),
- "stdx $rT, $dst", LdStSTD,
- [(PPCstd_32 GPRC:$rT, xaddr:$dst)]>, isPPC64,
- PPC970_DGroup_Cracked;
}
+// Patterns to match the pre-inc stores. We can't put the patterns on
+// the instruction definitions directly as ISel wants the address base
+// and offset to be separate operands, not a single complex operand.
+def : Pat<(pre_truncsti8 i64:$rS, iPTR:$ptrreg, iaddroff:$ptroff),
+ (STBU8 $rS, iaddroff:$ptroff, $ptrreg)>;
+def : Pat<(pre_truncsti16 i64:$rS, iPTR:$ptrreg, iaddroff:$ptroff),
+ (STHU8 $rS, iaddroff:$ptroff, $ptrreg)>;
+def : Pat<(pre_truncsti32 i64:$rS, iPTR:$ptrreg, iaddroff:$ptroff),
+ (STWU8 $rS, iaddroff:$ptroff, $ptrreg)>;
+def : Pat<(aligned4pre_store i64:$rS, iPTR:$ptrreg, iaddroff:$ptroff),
+ (STDU $rS, iaddroff:$ptroff, $ptrreg)>;
+
+def : Pat<(pre_truncsti8 i64:$rS, iPTR:$ptrreg, iPTR:$ptroff),
+ (STBUX8 $rS, $ptrreg, $ptroff)>;
+def : Pat<(pre_truncsti16 i64:$rS, iPTR:$ptrreg, iPTR:$ptroff),
+ (STHUX8 $rS, $ptrreg, $ptroff)>;
+def : Pat<(pre_truncsti32 i64:$rS, iPTR:$ptrreg, iPTR:$ptroff),
+ (STWUX8 $rS, $ptrreg, $ptroff)>;
+def : Pat<(pre_store i64:$rS, iPTR:$ptrreg, iPTR:$ptroff),
+ (STDUX $rS, $ptrreg, $ptroff)>;
//===----------------------------------------------------------------------===//
@@ -793,10 +855,26 @@ def STDX_32 : XForm_8<31, 149, (outs), (ins GPRC:$rT, memrr:$dst),
let PPC970_Unit = 3, Uses = [RM] in { // FPU Operations.
def FCFID : XForm_26<63, 846, (outs F8RC:$frD), (ins F8RC:$frB),
"fcfid $frD, $frB", FPGeneral,
- [(set F8RC:$frD, (PPCfcfid F8RC:$frB))]>, isPPC64;
+ [(set f64:$frD, (PPCfcfid f64:$frB))]>, isPPC64;
def FCTIDZ : XForm_26<63, 815, (outs F8RC:$frD), (ins F8RC:$frB),
"fctidz $frD, $frB", FPGeneral,
- [(set F8RC:$frD, (PPCfctidz F8RC:$frB))]>, isPPC64;
+ [(set f64:$frD, (PPCfctidz f64:$frB))]>, isPPC64;
+
+def FCFIDU : XForm_26<63, 974, (outs F8RC:$frD), (ins F8RC:$frB),
+ "fcfidu $frD, $frB", FPGeneral,
+ [(set f64:$frD, (PPCfcfidu f64:$frB))]>, isPPC64;
+def FCFIDS : XForm_26<59, 846, (outs F4RC:$frD), (ins F8RC:$frB),
+ "fcfids $frD, $frB", FPGeneral,
+ [(set f32:$frD, (PPCfcfids f64:$frB))]>, isPPC64;
+def FCFIDUS : XForm_26<59, 974, (outs F4RC:$frD), (ins F8RC:$frB),
+ "fcfidus $frD, $frB", FPGeneral,
+ [(set f32:$frD, (PPCfcfidus f64:$frB))]>, isPPC64;
+def FCTIDUZ : XForm_26<63, 943, (outs F8RC:$frD), (ins F8RC:$frB),
+ "fctiduz $frD, $frB", FPGeneral,
+ [(set f64:$frD, (PPCfctiduz f64:$frB))]>, isPPC64;
+def FCTIWUZ : XForm_26<63, 143, (outs F8RC:$frD), (ins F8RC:$frB),
+ "fctiwuz $frD, $frB", FPGeneral,
+ [(set f64:$frD, (PPCfctiwuz f64:$frB))]>, isPPC64;
}
@@ -805,13 +883,13 @@ def FCTIDZ : XForm_26<63, 815, (outs F8RC:$frD), (ins F8RC:$frB),
//
// Extensions and truncates to/from 32-bit regs.
-def : Pat<(i64 (zext GPRC:$in)),
- (RLDICL (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GPRC:$in, sub_32),
+def : Pat<(i64 (zext i32:$in)),
+ (RLDICL (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $in, sub_32),
0, 32)>;
-def : Pat<(i64 (anyext GPRC:$in)),
- (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GPRC:$in, sub_32)>;
-def : Pat<(i32 (trunc G8RC:$in)),
- (EXTRACT_SUBREG G8RC:$in, sub_32)>;
+def : Pat<(i64 (anyext i32:$in)),
+ (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $in, sub_32)>;
+def : Pat<(i32 (trunc i64:$in)),
+ (EXTRACT_SUBREG $in, sub_32)>;
// Extending loads with i64 targets.
def : Pat<(zextloadi1 iaddr:$src),
@@ -838,24 +916,24 @@ def : Pat<(extloadi32 xaddr:$src),
// Standard shifts. These are represented separately from the real shifts above
// so that we can distinguish between shifts that allow 6-bit and 7-bit shift
// amounts.
-def : Pat<(sra G8RC:$rS, GPRC:$rB),
- (SRAD G8RC:$rS, GPRC:$rB)>;
-def : Pat<(srl G8RC:$rS, GPRC:$rB),
- (SRD G8RC:$rS, GPRC:$rB)>;
-def : Pat<(shl G8RC:$rS, GPRC:$rB),
- (SLD G8RC:$rS, GPRC:$rB)>;
+def : Pat<(sra i64:$rS, i32:$rB),
+ (SRAD $rS, $rB)>;
+def : Pat<(srl i64:$rS, i32:$rB),
+ (SRD $rS, $rB)>;
+def : Pat<(shl i64:$rS, i32:$rB),
+ (SLD $rS, $rB)>;
// SHL/SRL
-def : Pat<(shl G8RC:$in, (i32 imm:$imm)),
- (RLDICR G8RC:$in, imm:$imm, (SHL64 imm:$imm))>;
-def : Pat<(srl G8RC:$in, (i32 imm:$imm)),
- (RLDICL G8RC:$in, (SRL64 imm:$imm), imm:$imm)>;
+def : Pat<(shl i64:$in, (i32 imm:$imm)),
+ (RLDICR $in, imm:$imm, (SHL64 imm:$imm))>;
+def : Pat<(srl i64:$in, (i32 imm:$imm)),
+ (RLDICL $in, (SRL64 imm:$imm), imm:$imm)>;
// ROTL
-def : Pat<(rotl G8RC:$in, GPRC:$sh),
- (RLDCL G8RC:$in, GPRC:$sh, 0)>;
-def : Pat<(rotl G8RC:$in, (i32 imm:$imm)),
- (RLDICL G8RC:$in, imm:$imm, 0)>;
+def : Pat<(rotl i64:$in, i32:$sh),
+ (RLDCL $in, $sh, 0)>;
+def : Pat<(rotl i64:$in, (i32 imm:$imm)),
+ (RLDICL $in, imm:$imm, 0)>;
// Hi and Lo for Darwin Global Addresses.
def : Pat<(PPChi tglobaladdr:$in, 0), (LIS8 tglobaladdr:$in)>;
@@ -866,15 +944,25 @@ def : Pat<(PPChi tjumptable:$in , 0), (LIS8 tjumptable:$in)>;
def : Pat<(PPClo tjumptable:$in , 0), (LI8 tjumptable:$in)>;
def : Pat<(PPChi tblockaddress:$in, 0), (LIS8 tblockaddress:$in)>;
def : Pat<(PPClo tblockaddress:$in, 0), (LI8 tblockaddress:$in)>;
-def : Pat<(PPChi tglobaltlsaddr:$g, G8RC:$in),
- (ADDIS8 G8RC:$in, tglobaltlsaddr:$g)>;
-def : Pat<(PPClo tglobaltlsaddr:$g, G8RC:$in),
- (ADDI8L G8RC:$in, tglobaltlsaddr:$g)>;
-def : Pat<(add G8RC:$in, (PPChi tglobaladdr:$g, 0)),
- (ADDIS8 G8RC:$in, tglobaladdr:$g)>;
-def : Pat<(add G8RC:$in, (PPChi tconstpool:$g, 0)),
- (ADDIS8 G8RC:$in, tconstpool:$g)>;
-def : Pat<(add G8RC:$in, (PPChi tjumptable:$g, 0)),
- (ADDIS8 G8RC:$in, tjumptable:$g)>;
-def : Pat<(add G8RC:$in, (PPChi tblockaddress:$g, 0)),
- (ADDIS8 G8RC:$in, tblockaddress:$g)>;
+def : Pat<(PPChi tglobaltlsaddr:$g, i64:$in),
+ (ADDIS8 $in, tglobaltlsaddr:$g)>;
+def : Pat<(PPClo tglobaltlsaddr:$g, i64:$in),
+ (ADDI8 $in, tglobaltlsaddr:$g)>;
+def : Pat<(add i64:$in, (PPChi tglobaladdr:$g, 0)),
+ (ADDIS8 $in, tglobaladdr:$g)>;
+def : Pat<(add i64:$in, (PPChi tconstpool:$g, 0)),
+ (ADDIS8 $in, tconstpool:$g)>;
+def : Pat<(add i64:$in, (PPChi tjumptable:$g, 0)),
+ (ADDIS8 $in, tjumptable:$g)>;
+def : Pat<(add i64:$in, (PPChi tblockaddress:$g, 0)),
+ (ADDIS8 $in, tblockaddress:$g)>;
+
+// Patterns to match r+r indexed loads and stores for
+// addresses without at least 4-byte alignment.
+def : Pat<(i64 (unaligned4sextloadi32 xoaddr:$src)),
+ (LWAX xoaddr:$src)>;
+def : Pat<(i64 (unaligned4load xoaddr:$src)),
+ (LDX xoaddr:$src)>;
+def : Pat<(unaligned4store i64:$rS, xoaddr:$dst),
+ (STDX $rS, xoaddr:$dst)>;
+
diff --git a/lib/Target/PowerPC/PPCInstrAltivec.td b/lib/Target/PowerPC/PPCInstrAltivec.td
index ba58c3e4ac88..a5ba4c8aebef 100644
--- a/lib/Target/PowerPC/PPCInstrAltivec.td
+++ b/lib/Target/PowerPC/PPCInstrAltivec.td
@@ -158,34 +158,75 @@ def vecspltisw : PatLeaf<(build_vector), [{
return PPC::get_VSPLTI_elt(N, 4, *CurDAG).getNode() != 0;
}], VSPLTISW_get_imm>;
-def V_immneg0 : PatLeaf<(build_vector), [{
- return PPC::isAllNegativeZeroVector(N);
-}]>;
-
//===----------------------------------------------------------------------===//
// Helpers for defining instructions that directly correspond to intrinsics.
-// VA1a_Int - A VAForm_1a intrinsic definition.
-class VA1a_Int<bits<6> xo, string opc, Intrinsic IntID>
+// VA1a_Int_Ty - A VAForm_1a intrinsic definition of specific type.
+class VA1a_Int_Ty<bits<6> xo, string opc, Intrinsic IntID, ValueType Ty>
: VAForm_1a<xo, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB, VRRC:$vC),
!strconcat(opc, " $vD, $vA, $vB, $vC"), VecFP,
- [(set VRRC:$vD, (IntID VRRC:$vA, VRRC:$vB, VRRC:$vC))]>;
+ [(set Ty:$vD, (IntID Ty:$vA, Ty:$vB, Ty:$vC))]>;
-// VX1_Int - A VXForm_1 intrinsic definition.
-class VX1_Int<bits<11> xo, string opc, Intrinsic IntID>
+// VA1a_Int_Ty2 - A VAForm_1a intrinsic definition where the type of the
+// inputs doesn't match the type of the output.
+class VA1a_Int_Ty2<bits<6> xo, string opc, Intrinsic IntID, ValueType OutTy,
+ ValueType InTy>
+ : VAForm_1a<xo, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB, VRRC:$vC),
+ !strconcat(opc, " $vD, $vA, $vB, $vC"), VecFP,
+ [(set OutTy:$vD, (IntID InTy:$vA, InTy:$vB, InTy:$vC))]>;
+
+// VA1a_Int_Ty3 - A VAForm_1a intrinsic definition where there are two
+// input types and an output type.
+class VA1a_Int_Ty3<bits<6> xo, string opc, Intrinsic IntID, ValueType OutTy,
+ ValueType In1Ty, ValueType In2Ty>
+ : VAForm_1a<xo, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB, VRRC:$vC),
+ !strconcat(opc, " $vD, $vA, $vB, $vC"), VecFP,
+ [(set OutTy:$vD,
+ (IntID In1Ty:$vA, In1Ty:$vB, In2Ty:$vC))]>;
+
+// VX1_Int_Ty - A VXForm_1 intrinsic definition of specific type.
+class VX1_Int_Ty<bits<11> xo, string opc, Intrinsic IntID, ValueType Ty>
+ : VXForm_1<xo, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
+ !strconcat(opc, " $vD, $vA, $vB"), VecFP,
+ [(set Ty:$vD, (IntID Ty:$vA, Ty:$vB))]>;
+
+// VX1_Int_Ty2 - A VXForm_1 intrinsic definition where the type of the
+// inputs doesn't match the type of the output.
+class VX1_Int_Ty2<bits<11> xo, string opc, Intrinsic IntID, ValueType OutTy,
+ ValueType InTy>
: VXForm_1<xo, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
!strconcat(opc, " $vD, $vA, $vB"), VecFP,
- [(set VRRC:$vD, (IntID VRRC:$vA, VRRC:$vB))]>;
+ [(set OutTy:$vD, (IntID InTy:$vA, InTy:$vB))]>;
-// VX2_Int - A VXForm_2 intrinsic definition.
-class VX2_Int<bits<11> xo, string opc, Intrinsic IntID>
+// VX1_Int_Ty3 - A VXForm_1 intrinsic definition where there are two
+// input types and an output type.
+class VX1_Int_Ty3<bits<11> xo, string opc, Intrinsic IntID, ValueType OutTy,
+ ValueType In1Ty, ValueType In2Ty>
+ : VXForm_1<xo, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
+ !strconcat(opc, " $vD, $vA, $vB"), VecFP,
+ [(set OutTy:$vD, (IntID In1Ty:$vA, In2Ty:$vB))]>;
+
+// VX2_Int_SP - A VXForm_2 intrinsic definition of vector single-precision type.
+class VX2_Int_SP<bits<11> xo, string opc, Intrinsic IntID>
+ : VXForm_2<xo, (outs VRRC:$vD), (ins VRRC:$vB),
+ !strconcat(opc, " $vD, $vB"), VecFP,
+ [(set v4f32:$vD, (IntID v4f32:$vB))]>;
+
+// VX2_Int_Ty2 - A VXForm_2 intrinsic definition where the type of the
+// inputs doesn't match the type of the output.
+class VX2_Int_Ty2<bits<11> xo, string opc, Intrinsic IntID, ValueType OutTy,
+ ValueType InTy>
: VXForm_2<xo, (outs VRRC:$vD), (ins VRRC:$vB),
!strconcat(opc, " $vD, $vB"), VecFP,
- [(set VRRC:$vD, (IntID VRRC:$vB))]>;
+ [(set OutTy:$vD, (IntID InTy:$vB))]>;
//===----------------------------------------------------------------------===//
// Instruction Definitions.
+def HasAltivec : Predicate<"PPCSubTarget.hasAltivec()">;
+let Predicates = [HasAltivec] in {
+
+let isCodeGenOnly = 1 in {
def DSS : DSS_Form<822, (outs),
(ins u5imm:$ZERO0, u5imm:$STRM,u5imm:$ZERO1,u5imm:$ZERO2),
"dss $STRM", LdStLoad /*FIXME*/, []>;
@@ -217,129 +258,136 @@ def DSTST64 : DSS_Form<374, (outs),
def DSTSTT64 : DSS_Form<374, (outs),
(ins u5imm:$ONE, u5imm:$STRM, G8RC:$rA, GPRC:$rB),
"dststt $rA, $rB, $STRM", LdStLoad /*FIXME*/, []>;
+}
def MFVSCR : VXForm_4<1540, (outs VRRC:$vD), (ins),
"mfvscr $vD", LdStStore,
- [(set VRRC:$vD, (int_ppc_altivec_mfvscr))]>;
+ [(set v8i16:$vD, (int_ppc_altivec_mfvscr))]>;
def MTVSCR : VXForm_5<1604, (outs), (ins VRRC:$vB),
"mtvscr $vB", LdStLoad,
- [(int_ppc_altivec_mtvscr VRRC:$vB)]>;
+ [(int_ppc_altivec_mtvscr v4i32:$vB)]>;
let canFoldAsLoad = 1, PPC970_Unit = 2 in { // Loads.
def LVEBX: XForm_1<31, 7, (outs VRRC:$vD), (ins memrr:$src),
"lvebx $vD, $src", LdStLoad,
- [(set VRRC:$vD, (int_ppc_altivec_lvebx xoaddr:$src))]>;
+ [(set v16i8:$vD, (int_ppc_altivec_lvebx xoaddr:$src))]>;
def LVEHX: XForm_1<31, 39, (outs VRRC:$vD), (ins memrr:$src),
"lvehx $vD, $src", LdStLoad,
- [(set VRRC:$vD, (int_ppc_altivec_lvehx xoaddr:$src))]>;
+ [(set v8i16:$vD, (int_ppc_altivec_lvehx xoaddr:$src))]>;
def LVEWX: XForm_1<31, 71, (outs VRRC:$vD), (ins memrr:$src),
"lvewx $vD, $src", LdStLoad,
- [(set VRRC:$vD, (int_ppc_altivec_lvewx xoaddr:$src))]>;
+ [(set v4i32:$vD, (int_ppc_altivec_lvewx xoaddr:$src))]>;
def LVX : XForm_1<31, 103, (outs VRRC:$vD), (ins memrr:$src),
"lvx $vD, $src", LdStLoad,
- [(set VRRC:$vD, (int_ppc_altivec_lvx xoaddr:$src))]>;
+ [(set v4i32:$vD, (int_ppc_altivec_lvx xoaddr:$src))]>;
def LVXL : XForm_1<31, 359, (outs VRRC:$vD), (ins memrr:$src),
"lvxl $vD, $src", LdStLoad,
- [(set VRRC:$vD, (int_ppc_altivec_lvxl xoaddr:$src))]>;
+ [(set v4i32:$vD, (int_ppc_altivec_lvxl xoaddr:$src))]>;
}
def LVSL : XForm_1<31, 6, (outs VRRC:$vD), (ins memrr:$src),
"lvsl $vD, $src", LdStLoad,
- [(set VRRC:$vD, (int_ppc_altivec_lvsl xoaddr:$src))]>,
+ [(set v16i8:$vD, (int_ppc_altivec_lvsl xoaddr:$src))]>,
PPC970_Unit_LSU;
def LVSR : XForm_1<31, 38, (outs VRRC:$vD), (ins memrr:$src),
"lvsr $vD, $src", LdStLoad,
- [(set VRRC:$vD, (int_ppc_altivec_lvsr xoaddr:$src))]>,
+ [(set v16i8:$vD, (int_ppc_altivec_lvsr xoaddr:$src))]>,
PPC970_Unit_LSU;
let PPC970_Unit = 2 in { // Stores.
def STVEBX: XForm_8<31, 135, (outs), (ins VRRC:$rS, memrr:$dst),
"stvebx $rS, $dst", LdStStore,
- [(int_ppc_altivec_stvebx VRRC:$rS, xoaddr:$dst)]>;
+ [(int_ppc_altivec_stvebx v16i8:$rS, xoaddr:$dst)]>;
def STVEHX: XForm_8<31, 167, (outs), (ins VRRC:$rS, memrr:$dst),
"stvehx $rS, $dst", LdStStore,
- [(int_ppc_altivec_stvehx VRRC:$rS, xoaddr:$dst)]>;
+ [(int_ppc_altivec_stvehx v8i16:$rS, xoaddr:$dst)]>;
def STVEWX: XForm_8<31, 199, (outs), (ins VRRC:$rS, memrr:$dst),
"stvewx $rS, $dst", LdStStore,
- [(int_ppc_altivec_stvewx VRRC:$rS, xoaddr:$dst)]>;
+ [(int_ppc_altivec_stvewx v4i32:$rS, xoaddr:$dst)]>;
def STVX : XForm_8<31, 231, (outs), (ins VRRC:$rS, memrr:$dst),
"stvx $rS, $dst", LdStStore,
- [(int_ppc_altivec_stvx VRRC:$rS, xoaddr:$dst)]>;
+ [(int_ppc_altivec_stvx v4i32:$rS, xoaddr:$dst)]>;
def STVXL : XForm_8<31, 487, (outs), (ins VRRC:$rS, memrr:$dst),
"stvxl $rS, $dst", LdStStore,
- [(int_ppc_altivec_stvxl VRRC:$rS, xoaddr:$dst)]>;
+ [(int_ppc_altivec_stvxl v4i32:$rS, xoaddr:$dst)]>;
}
let PPC970_Unit = 5 in { // VALU Operations.
// VA-Form instructions. 3-input AltiVec ops.
def VMADDFP : VAForm_1<46, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vC, VRRC:$vB),
"vmaddfp $vD, $vA, $vC, $vB", VecFP,
- [(set VRRC:$vD, (fma VRRC:$vA, VRRC:$vC, VRRC:$vB))]>;
+ [(set v4f32:$vD,
+ (fma v4f32:$vA, v4f32:$vC, v4f32:$vB))]>;
+
+// FIXME: The fma+fneg pattern won't match because fneg is not legal.
def VNMSUBFP: VAForm_1<47, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vC, VRRC:$vB),
"vnmsubfp $vD, $vA, $vC, $vB", VecFP,
- [(set VRRC:$vD, (fneg (fma VRRC:$vA, VRRC:$vC,
- (fneg VRRC:$vB))))]>;
+ [(set v4f32:$vD, (fneg (fma v4f32:$vA, v4f32:$vC,
+ (fneg v4f32:$vB))))]>;
+
+def VMHADDSHS : VA1a_Int_Ty<32, "vmhaddshs", int_ppc_altivec_vmhaddshs, v8i16>;
+def VMHRADDSHS : VA1a_Int_Ty<33, "vmhraddshs", int_ppc_altivec_vmhraddshs,
+ v8i16>;
+def VMLADDUHM : VA1a_Int_Ty<34, "vmladduhm", int_ppc_altivec_vmladduhm, v8i16>;
-def VMHADDSHS : VA1a_Int<32, "vmhaddshs", int_ppc_altivec_vmhaddshs>;
-def VMHRADDSHS : VA1a_Int<33, "vmhraddshs", int_ppc_altivec_vmhraddshs>;
-def VMLADDUHM : VA1a_Int<34, "vmladduhm", int_ppc_altivec_vmladduhm>;
-def VPERM : VA1a_Int<43, "vperm", int_ppc_altivec_vperm>;
-def VSEL : VA1a_Int<42, "vsel", int_ppc_altivec_vsel>;
+def VPERM : VA1a_Int_Ty3<43, "vperm", int_ppc_altivec_vperm,
+ v4i32, v4i32, v16i8>;
+def VSEL : VA1a_Int_Ty<42, "vsel", int_ppc_altivec_vsel, v4i32>;
// Shuffles.
def VSLDOI : VAForm_2<44, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB, u5imm:$SH),
"vsldoi $vD, $vA, $vB, $SH", VecFP,
- [(set VRRC:$vD,
- (vsldoi_shuffle:$SH (v16i8 VRRC:$vA), VRRC:$vB))]>;
+ [(set v16i8:$vD,
+ (vsldoi_shuffle:$SH v16i8:$vA, v16i8:$vB))]>;
// VX-Form instructions. AltiVec arithmetic ops.
def VADDFP : VXForm_1<10, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
"vaddfp $vD, $vA, $vB", VecFP,
- [(set VRRC:$vD, (fadd VRRC:$vA, VRRC:$vB))]>;
+ [(set v4f32:$vD, (fadd v4f32:$vA, v4f32:$vB))]>;
def VADDUBM : VXForm_1<0, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
"vaddubm $vD, $vA, $vB", VecGeneral,
- [(set VRRC:$vD, (add (v16i8 VRRC:$vA), VRRC:$vB))]>;
+ [(set v16i8:$vD, (add v16i8:$vA, v16i8:$vB))]>;
def VADDUHM : VXForm_1<64, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
"vadduhm $vD, $vA, $vB", VecGeneral,
- [(set VRRC:$vD, (add (v8i16 VRRC:$vA), VRRC:$vB))]>;
+ [(set v8i16:$vD, (add v8i16:$vA, v8i16:$vB))]>;
def VADDUWM : VXForm_1<128, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
"vadduwm $vD, $vA, $vB", VecGeneral,
- [(set VRRC:$vD, (add (v4i32 VRRC:$vA), VRRC:$vB))]>;
+ [(set v4i32:$vD, (add v4i32:$vA, v4i32:$vB))]>;
-def VADDCUW : VX1_Int<384, "vaddcuw", int_ppc_altivec_vaddcuw>;
-def VADDSBS : VX1_Int<768, "vaddsbs", int_ppc_altivec_vaddsbs>;
-def VADDSHS : VX1_Int<832, "vaddshs", int_ppc_altivec_vaddshs>;
-def VADDSWS : VX1_Int<896, "vaddsws", int_ppc_altivec_vaddsws>;
-def VADDUBS : VX1_Int<512, "vaddubs", int_ppc_altivec_vaddubs>;
-def VADDUHS : VX1_Int<576, "vadduhs", int_ppc_altivec_vadduhs>;
-def VADDUWS : VX1_Int<640, "vadduws", int_ppc_altivec_vadduws>;
+def VADDCUW : VX1_Int_Ty<384, "vaddcuw", int_ppc_altivec_vaddcuw, v4i32>;
+def VADDSBS : VX1_Int_Ty<768, "vaddsbs", int_ppc_altivec_vaddsbs, v16i8>;
+def VADDSHS : VX1_Int_Ty<832, "vaddshs", int_ppc_altivec_vaddshs, v8i16>;
+def VADDSWS : VX1_Int_Ty<896, "vaddsws", int_ppc_altivec_vaddsws, v4i32>;
+def VADDUBS : VX1_Int_Ty<512, "vaddubs", int_ppc_altivec_vaddubs, v16i8>;
+def VADDUHS : VX1_Int_Ty<576, "vadduhs", int_ppc_altivec_vadduhs, v8i16>;
+def VADDUWS : VX1_Int_Ty<640, "vadduws", int_ppc_altivec_vadduws, v4i32>;
def VAND : VXForm_1<1028, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
"vand $vD, $vA, $vB", VecFP,
- [(set VRRC:$vD, (and (v4i32 VRRC:$vA), VRRC:$vB))]>;
+ [(set v4i32:$vD, (and v4i32:$vA, v4i32:$vB))]>;
def VANDC : VXForm_1<1092, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
"vandc $vD, $vA, $vB", VecFP,
- [(set VRRC:$vD, (and (v4i32 VRRC:$vA),
- (vnot_ppc VRRC:$vB)))]>;
+ [(set v4i32:$vD, (and v4i32:$vA,
+ (vnot_ppc v4i32:$vB)))]>;
def VCFSX : VXForm_1<842, (outs VRRC:$vD), (ins u5imm:$UIMM, VRRC:$vB),
"vcfsx $vD, $vB, $UIMM", VecFP,
- [(set VRRC:$vD,
- (int_ppc_altivec_vcfsx VRRC:$vB, imm:$UIMM))]>;
+ [(set v4f32:$vD,
+ (int_ppc_altivec_vcfsx v4i32:$vB, imm:$UIMM))]>;
def VCFUX : VXForm_1<778, (outs VRRC:$vD), (ins u5imm:$UIMM, VRRC:$vB),
"vcfux $vD, $vB, $UIMM", VecFP,
- [(set VRRC:$vD,
- (int_ppc_altivec_vcfux VRRC:$vB, imm:$UIMM))]>;
+ [(set v4f32:$vD,
+ (int_ppc_altivec_vcfux v4i32:$vB, imm:$UIMM))]>;
def VCTSXS : VXForm_1<970, (outs VRRC:$vD), (ins u5imm:$UIMM, VRRC:$vB),
"vctsxs $vD, $vB, $UIMM", VecFP,
- [(set VRRC:$vD,
- (int_ppc_altivec_vctsxs VRRC:$vB, imm:$UIMM))]>;
+ [(set v4i32:$vD,
+ (int_ppc_altivec_vctsxs v4f32:$vB, imm:$UIMM))]>;
def VCTUXS : VXForm_1<906, (outs VRRC:$vD), (ins u5imm:$UIMM, VRRC:$vB),
"vctuxs $vD, $vB, $UIMM", VecFP,
- [(set VRRC:$vD,
- (int_ppc_altivec_vctuxs VRRC:$vB, imm:$UIMM))]>;
+ [(set v4i32:$vD,
+ (int_ppc_altivec_vctuxs v4f32:$vB, imm:$UIMM))]>;
// Defines with the UIM field set to 0 for floating-point
// to integer (fp_to_sint/fp_to_uint) conversions and integer
@@ -347,203 +395,237 @@ def VCTUXS : VXForm_1<906, (outs VRRC:$vD), (ins u5imm:$UIMM, VRRC:$vB),
let VA = 0 in {
def VCFSX_0 : VXForm_1<842, (outs VRRC:$vD), (ins VRRC:$vB),
"vcfsx $vD, $vB, 0", VecFP,
- [(set VRRC:$vD,
- (int_ppc_altivec_vcfsx VRRC:$vB, 0))]>;
+ [(set v4f32:$vD,
+ (int_ppc_altivec_vcfsx v4i32:$vB, 0))]>;
def VCTUXS_0 : VXForm_1<906, (outs VRRC:$vD), (ins VRRC:$vB),
"vctuxs $vD, $vB, 0", VecFP,
- [(set VRRC:$vD,
- (int_ppc_altivec_vctuxs VRRC:$vB, 0))]>;
+ [(set v4i32:$vD,
+ (int_ppc_altivec_vctuxs v4f32:$vB, 0))]>;
def VCFUX_0 : VXForm_1<778, (outs VRRC:$vD), (ins VRRC:$vB),
"vcfux $vD, $vB, 0", VecFP,
- [(set VRRC:$vD,
- (int_ppc_altivec_vcfux VRRC:$vB, 0))]>;
+ [(set v4f32:$vD,
+ (int_ppc_altivec_vcfux v4i32:$vB, 0))]>;
def VCTSXS_0 : VXForm_1<970, (outs VRRC:$vD), (ins VRRC:$vB),
"vctsxs $vD, $vB, 0", VecFP,
- [(set VRRC:$vD,
- (int_ppc_altivec_vctsxs VRRC:$vB, 0))]>;
+ [(set v4i32:$vD,
+ (int_ppc_altivec_vctsxs v4f32:$vB, 0))]>;
}
-def VEXPTEFP : VX2_Int<394, "vexptefp", int_ppc_altivec_vexptefp>;
-def VLOGEFP : VX2_Int<458, "vlogefp", int_ppc_altivec_vlogefp>;
-
-def VAVGSB : VX1_Int<1282, "vavgsb", int_ppc_altivec_vavgsb>;
-def VAVGSH : VX1_Int<1346, "vavgsh", int_ppc_altivec_vavgsh>;
-def VAVGSW : VX1_Int<1410, "vavgsw", int_ppc_altivec_vavgsw>;
-def VAVGUB : VX1_Int<1026, "vavgub", int_ppc_altivec_vavgub>;
-def VAVGUH : VX1_Int<1090, "vavguh", int_ppc_altivec_vavguh>;
-def VAVGUW : VX1_Int<1154, "vavguw", int_ppc_altivec_vavguw>;
-
-def VMAXFP : VX1_Int<1034, "vmaxfp", int_ppc_altivec_vmaxfp>;
-def VMAXSB : VX1_Int< 258, "vmaxsb", int_ppc_altivec_vmaxsb>;
-def VMAXSH : VX1_Int< 322, "vmaxsh", int_ppc_altivec_vmaxsh>;
-def VMAXSW : VX1_Int< 386, "vmaxsw", int_ppc_altivec_vmaxsw>;
-def VMAXUB : VX1_Int< 2, "vmaxub", int_ppc_altivec_vmaxub>;
-def VMAXUH : VX1_Int< 66, "vmaxuh", int_ppc_altivec_vmaxuh>;
-def VMAXUW : VX1_Int< 130, "vmaxuw", int_ppc_altivec_vmaxuw>;
-def VMINFP : VX1_Int<1098, "vminfp", int_ppc_altivec_vminfp>;
-def VMINSB : VX1_Int< 770, "vminsb", int_ppc_altivec_vminsb>;
-def VMINSH : VX1_Int< 834, "vminsh", int_ppc_altivec_vminsh>;
-def VMINSW : VX1_Int< 898, "vminsw", int_ppc_altivec_vminsw>;
-def VMINUB : VX1_Int< 514, "vminub", int_ppc_altivec_vminub>;
-def VMINUH : VX1_Int< 578, "vminuh", int_ppc_altivec_vminuh>;
-def VMINUW : VX1_Int< 642, "vminuw", int_ppc_altivec_vminuw>;
+def VEXPTEFP : VX2_Int_SP<394, "vexptefp", int_ppc_altivec_vexptefp>;
+def VLOGEFP : VX2_Int_SP<458, "vlogefp", int_ppc_altivec_vlogefp>;
+
+def VAVGSB : VX1_Int_Ty<1282, "vavgsb", int_ppc_altivec_vavgsb, v16i8>;
+def VAVGSH : VX1_Int_Ty<1346, "vavgsh", int_ppc_altivec_vavgsh, v8i16>;
+def VAVGSW : VX1_Int_Ty<1410, "vavgsw", int_ppc_altivec_vavgsw, v4i32>;
+def VAVGUB : VX1_Int_Ty<1026, "vavgub", int_ppc_altivec_vavgub, v16i8>;
+def VAVGUH : VX1_Int_Ty<1090, "vavguh", int_ppc_altivec_vavguh, v8i16>;
+def VAVGUW : VX1_Int_Ty<1154, "vavguw", int_ppc_altivec_vavguw, v4i32>;
+
+def VMAXFP : VX1_Int_Ty<1034, "vmaxfp", int_ppc_altivec_vmaxfp, v4f32>;
+def VMAXSB : VX1_Int_Ty< 258, "vmaxsb", int_ppc_altivec_vmaxsb, v16i8>;
+def VMAXSH : VX1_Int_Ty< 322, "vmaxsh", int_ppc_altivec_vmaxsh, v8i16>;
+def VMAXSW : VX1_Int_Ty< 386, "vmaxsw", int_ppc_altivec_vmaxsw, v4i32>;
+def VMAXUB : VX1_Int_Ty< 2, "vmaxub", int_ppc_altivec_vmaxub, v16i8>;
+def VMAXUH : VX1_Int_Ty< 66, "vmaxuh", int_ppc_altivec_vmaxuh, v8i16>;
+def VMAXUW : VX1_Int_Ty< 130, "vmaxuw", int_ppc_altivec_vmaxuw, v4i32>;
+def VMINFP : VX1_Int_Ty<1098, "vminfp", int_ppc_altivec_vminfp, v4f32>;
+def VMINSB : VX1_Int_Ty< 770, "vminsb", int_ppc_altivec_vminsb, v16i8>;
+def VMINSH : VX1_Int_Ty< 834, "vminsh", int_ppc_altivec_vminsh, v8i16>;
+def VMINSW : VX1_Int_Ty< 898, "vminsw", int_ppc_altivec_vminsw, v4i32>;
+def VMINUB : VX1_Int_Ty< 514, "vminub", int_ppc_altivec_vminub, v16i8>;
+def VMINUH : VX1_Int_Ty< 578, "vminuh", int_ppc_altivec_vminuh, v8i16>;
+def VMINUW : VX1_Int_Ty< 642, "vminuw", int_ppc_altivec_vminuw, v4i32>;
def VMRGHB : VXForm_1< 12, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
"vmrghb $vD, $vA, $vB", VecFP,
- [(set VRRC:$vD, (vmrghb_shuffle VRRC:$vA, VRRC:$vB))]>;
+ [(set v16i8:$vD, (vmrghb_shuffle v16i8:$vA, v16i8:$vB))]>;
def VMRGHH : VXForm_1< 76, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
"vmrghh $vD, $vA, $vB", VecFP,
- [(set VRRC:$vD, (vmrghh_shuffle VRRC:$vA, VRRC:$vB))]>;
+ [(set v16i8:$vD, (vmrghh_shuffle v16i8:$vA, v16i8:$vB))]>;
def VMRGHW : VXForm_1<140, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
"vmrghw $vD, $vA, $vB", VecFP,
- [(set VRRC:$vD, (vmrghw_shuffle VRRC:$vA, VRRC:$vB))]>;
+ [(set v16i8:$vD, (vmrghw_shuffle v16i8:$vA, v16i8:$vB))]>;
def VMRGLB : VXForm_1<268, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
"vmrglb $vD, $vA, $vB", VecFP,
- [(set VRRC:$vD, (vmrglb_shuffle VRRC:$vA, VRRC:$vB))]>;
+ [(set v16i8:$vD, (vmrglb_shuffle v16i8:$vA, v16i8:$vB))]>;
def VMRGLH : VXForm_1<332, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
"vmrglh $vD, $vA, $vB", VecFP,
- [(set VRRC:$vD, (vmrglh_shuffle VRRC:$vA, VRRC:$vB))]>;
+ [(set v16i8:$vD, (vmrglh_shuffle v16i8:$vA, v16i8:$vB))]>;
def VMRGLW : VXForm_1<396, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
"vmrglw $vD, $vA, $vB", VecFP,
- [(set VRRC:$vD, (vmrglw_shuffle VRRC:$vA, VRRC:$vB))]>;
-
-def VMSUMMBM : VA1a_Int<37, "vmsummbm", int_ppc_altivec_vmsummbm>;
-def VMSUMSHM : VA1a_Int<40, "vmsumshm", int_ppc_altivec_vmsumshm>;
-def VMSUMSHS : VA1a_Int<41, "vmsumshs", int_ppc_altivec_vmsumshs>;
-def VMSUMUBM : VA1a_Int<36, "vmsumubm", int_ppc_altivec_vmsumubm>;
-def VMSUMUHM : VA1a_Int<38, "vmsumuhm", int_ppc_altivec_vmsumuhm>;
-def VMSUMUHS : VA1a_Int<39, "vmsumuhs", int_ppc_altivec_vmsumuhs>;
-
-def VMULESB : VX1_Int<776, "vmulesb", int_ppc_altivec_vmulesb>;
-def VMULESH : VX1_Int<840, "vmulesh", int_ppc_altivec_vmulesh>;
-def VMULEUB : VX1_Int<520, "vmuleub", int_ppc_altivec_vmuleub>;
-def VMULEUH : VX1_Int<584, "vmuleuh", int_ppc_altivec_vmuleuh>;
-def VMULOSB : VX1_Int<264, "vmulosb", int_ppc_altivec_vmulosb>;
-def VMULOSH : VX1_Int<328, "vmulosh", int_ppc_altivec_vmulosh>;
-def VMULOUB : VX1_Int< 8, "vmuloub", int_ppc_altivec_vmuloub>;
-def VMULOUH : VX1_Int< 72, "vmulouh", int_ppc_altivec_vmulouh>;
+ [(set v16i8:$vD, (vmrglw_shuffle v16i8:$vA, v16i8:$vB))]>;
+
+def VMSUMMBM : VA1a_Int_Ty3<37, "vmsummbm", int_ppc_altivec_vmsummbm,
+ v4i32, v16i8, v4i32>;
+def VMSUMSHM : VA1a_Int_Ty3<40, "vmsumshm", int_ppc_altivec_vmsumshm,
+ v4i32, v8i16, v4i32>;
+def VMSUMSHS : VA1a_Int_Ty3<41, "vmsumshs", int_ppc_altivec_vmsumshs,
+ v4i32, v8i16, v4i32>;
+def VMSUMUBM : VA1a_Int_Ty3<36, "vmsumubm", int_ppc_altivec_vmsumubm,
+ v4i32, v16i8, v4i32>;
+def VMSUMUHM : VA1a_Int_Ty3<38, "vmsumuhm", int_ppc_altivec_vmsumuhm,
+ v4i32, v8i16, v4i32>;
+def VMSUMUHS : VA1a_Int_Ty3<39, "vmsumuhs", int_ppc_altivec_vmsumuhs,
+ v4i32, v8i16, v4i32>;
+
+def VMULESB : VX1_Int_Ty2<776, "vmulesb", int_ppc_altivec_vmulesb,
+ v8i16, v16i8>;
+def VMULESH : VX1_Int_Ty2<840, "vmulesh", int_ppc_altivec_vmulesh,
+ v4i32, v8i16>;
+def VMULEUB : VX1_Int_Ty2<520, "vmuleub", int_ppc_altivec_vmuleub,
+ v8i16, v16i8>;
+def VMULEUH : VX1_Int_Ty2<584, "vmuleuh", int_ppc_altivec_vmuleuh,
+ v4i32, v8i16>;
+def VMULOSB : VX1_Int_Ty2<264, "vmulosb", int_ppc_altivec_vmulosb,
+ v8i16, v16i8>;
+def VMULOSH : VX1_Int_Ty2<328, "vmulosh", int_ppc_altivec_vmulosh,
+ v4i32, v8i16>;
+def VMULOUB : VX1_Int_Ty2< 8, "vmuloub", int_ppc_altivec_vmuloub,
+ v8i16, v16i8>;
+def VMULOUH : VX1_Int_Ty2< 72, "vmulouh", int_ppc_altivec_vmulouh,
+ v4i32, v8i16>;
-def VREFP : VX2_Int<266, "vrefp", int_ppc_altivec_vrefp>;
-def VRFIM : VX2_Int<714, "vrfim", int_ppc_altivec_vrfim>;
-def VRFIN : VX2_Int<522, "vrfin", int_ppc_altivec_vrfin>;
-def VRFIP : VX2_Int<650, "vrfip", int_ppc_altivec_vrfip>;
-def VRFIZ : VX2_Int<586, "vrfiz", int_ppc_altivec_vrfiz>;
-def VRSQRTEFP : VX2_Int<330, "vrsqrtefp", int_ppc_altivec_vrsqrtefp>;
+def VREFP : VX2_Int_SP<266, "vrefp", int_ppc_altivec_vrefp>;
+def VRFIM : VX2_Int_SP<714, "vrfim", int_ppc_altivec_vrfim>;
+def VRFIN : VX2_Int_SP<522, "vrfin", int_ppc_altivec_vrfin>;
+def VRFIP : VX2_Int_SP<650, "vrfip", int_ppc_altivec_vrfip>;
+def VRFIZ : VX2_Int_SP<586, "vrfiz", int_ppc_altivec_vrfiz>;
+def VRSQRTEFP : VX2_Int_SP<330, "vrsqrtefp", int_ppc_altivec_vrsqrtefp>;
-def VSUBCUW : VX1_Int<74, "vsubcuw", int_ppc_altivec_vsubcuw>;
+def VSUBCUW : VX1_Int_Ty<74, "vsubcuw", int_ppc_altivec_vsubcuw, v4i32>;
def VSUBFP : VXForm_1<74, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
"vsubfp $vD, $vA, $vB", VecGeneral,
- [(set VRRC:$vD, (fsub VRRC:$vA, VRRC:$vB))]>;
+ [(set v4f32:$vD, (fsub v4f32:$vA, v4f32:$vB))]>;
def VSUBUBM : VXForm_1<1024, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
"vsububm $vD, $vA, $vB", VecGeneral,
- [(set VRRC:$vD, (sub (v16i8 VRRC:$vA), VRRC:$vB))]>;
+ [(set v16i8:$vD, (sub v16i8:$vA, v16i8:$vB))]>;
def VSUBUHM : VXForm_1<1088, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
"vsubuhm $vD, $vA, $vB", VecGeneral,
- [(set VRRC:$vD, (sub (v8i16 VRRC:$vA), VRRC:$vB))]>;
+ [(set v8i16:$vD, (sub v8i16:$vA, v8i16:$vB))]>;
def VSUBUWM : VXForm_1<1152, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
"vsubuwm $vD, $vA, $vB", VecGeneral,
- [(set VRRC:$vD, (sub (v4i32 VRRC:$vA), VRRC:$vB))]>;
+ [(set v4i32:$vD, (sub v4i32:$vA, v4i32:$vB))]>;
-def VSUBSBS : VX1_Int<1792, "vsubsbs" , int_ppc_altivec_vsubsbs>;
-def VSUBSHS : VX1_Int<1856, "vsubshs" , int_ppc_altivec_vsubshs>;
-def VSUBSWS : VX1_Int<1920, "vsubsws" , int_ppc_altivec_vsubsws>;
-def VSUBUBS : VX1_Int<1536, "vsububs" , int_ppc_altivec_vsububs>;
-def VSUBUHS : VX1_Int<1600, "vsubuhs" , int_ppc_altivec_vsubuhs>;
-def VSUBUWS : VX1_Int<1664, "vsubuws" , int_ppc_altivec_vsubuws>;
-def VSUMSWS : VX1_Int<1928, "vsumsws" , int_ppc_altivec_vsumsws>;
-def VSUM2SWS: VX1_Int<1672, "vsum2sws", int_ppc_altivec_vsum2sws>;
-def VSUM4SBS: VX1_Int<1672, "vsum4sbs", int_ppc_altivec_vsum4sbs>;
-def VSUM4SHS: VX1_Int<1608, "vsum4shs", int_ppc_altivec_vsum4shs>;
-def VSUM4UBS: VX1_Int<1544, "vsum4ubs", int_ppc_altivec_vsum4ubs>;
+def VSUBSBS : VX1_Int_Ty<1792, "vsubsbs" , int_ppc_altivec_vsubsbs, v16i8>;
+def VSUBSHS : VX1_Int_Ty<1856, "vsubshs" , int_ppc_altivec_vsubshs, v8i16>;
+def VSUBSWS : VX1_Int_Ty<1920, "vsubsws" , int_ppc_altivec_vsubsws, v4i32>;
+def VSUBUBS : VX1_Int_Ty<1536, "vsububs" , int_ppc_altivec_vsububs, v16i8>;
+def VSUBUHS : VX1_Int_Ty<1600, "vsubuhs" , int_ppc_altivec_vsubuhs, v8i16>;
+def VSUBUWS : VX1_Int_Ty<1664, "vsubuws" , int_ppc_altivec_vsubuws, v4i32>;
+
+def VSUMSWS : VX1_Int_Ty<1928, "vsumsws" , int_ppc_altivec_vsumsws, v4i32>;
+def VSUM2SWS: VX1_Int_Ty<1672, "vsum2sws", int_ppc_altivec_vsum2sws, v4i32>;
+
+def VSUM4SBS: VX1_Int_Ty3<1672, "vsum4sbs", int_ppc_altivec_vsum4sbs,
+ v4i32, v16i8, v4i32>;
+def VSUM4SHS: VX1_Int_Ty3<1608, "vsum4shs", int_ppc_altivec_vsum4shs,
+ v4i32, v8i16, v4i32>;
+def VSUM4UBS: VX1_Int_Ty3<1544, "vsum4ubs", int_ppc_altivec_vsum4ubs,
+ v4i32, v16i8, v4i32>;
def VNOR : VXForm_1<1284, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
"vnor $vD, $vA, $vB", VecFP,
- [(set VRRC:$vD, (vnot_ppc (or (v4i32 VRRC:$vA),
- VRRC:$vB)))]>;
+ [(set v4i32:$vD, (vnot_ppc (or v4i32:$vA,
+ v4i32:$vB)))]>;
def VOR : VXForm_1<1156, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
"vor $vD, $vA, $vB", VecFP,
- [(set VRRC:$vD, (or (v4i32 VRRC:$vA), VRRC:$vB))]>;
+ [(set v4i32:$vD, (or v4i32:$vA, v4i32:$vB))]>;
def VXOR : VXForm_1<1220, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
"vxor $vD, $vA, $vB", VecFP,
- [(set VRRC:$vD, (xor (v4i32 VRRC:$vA), VRRC:$vB))]>;
+ [(set v4i32:$vD, (xor v4i32:$vA, v4i32:$vB))]>;
+
+def VRLB : VX1_Int_Ty< 4, "vrlb", int_ppc_altivec_vrlb, v16i8>;
+def VRLH : VX1_Int_Ty< 68, "vrlh", int_ppc_altivec_vrlh, v8i16>;
+def VRLW : VX1_Int_Ty< 132, "vrlw", int_ppc_altivec_vrlw, v4i32>;
-def VRLB : VX1_Int< 4, "vrlb", int_ppc_altivec_vrlb>;
-def VRLH : VX1_Int< 68, "vrlh", int_ppc_altivec_vrlh>;
-def VRLW : VX1_Int< 132, "vrlw", int_ppc_altivec_vrlw>;
+def VSL : VX1_Int_Ty< 452, "vsl" , int_ppc_altivec_vsl, v4i32 >;
+def VSLO : VX1_Int_Ty<1036, "vslo", int_ppc_altivec_vslo, v4i32>;
-def VSL : VX1_Int< 452, "vsl" , int_ppc_altivec_vsl >;
-def VSLO : VX1_Int<1036, "vslo", int_ppc_altivec_vslo>;
-def VSLB : VX1_Int< 260, "vslb", int_ppc_altivec_vslb>;
-def VSLH : VX1_Int< 324, "vslh", int_ppc_altivec_vslh>;
-def VSLW : VX1_Int< 388, "vslw", int_ppc_altivec_vslw>;
+def VSLB : VX1_Int_Ty< 260, "vslb", int_ppc_altivec_vslb, v16i8>;
+def VSLH : VX1_Int_Ty< 324, "vslh", int_ppc_altivec_vslh, v8i16>;
+def VSLW : VX1_Int_Ty< 388, "vslw", int_ppc_altivec_vslw, v4i32>;
def VSPLTB : VXForm_1<524, (outs VRRC:$vD), (ins u5imm:$UIMM, VRRC:$vB),
"vspltb $vD, $vB, $UIMM", VecPerm,
- [(set VRRC:$vD,
- (vspltb_shuffle:$UIMM (v16i8 VRRC:$vB), (undef)))]>;
+ [(set v16i8:$vD,
+ (vspltb_shuffle:$UIMM v16i8:$vB, (undef)))]>;
def VSPLTH : VXForm_1<588, (outs VRRC:$vD), (ins u5imm:$UIMM, VRRC:$vB),
"vsplth $vD, $vB, $UIMM", VecPerm,
- [(set VRRC:$vD,
- (vsplth_shuffle:$UIMM (v16i8 VRRC:$vB), (undef)))]>;
+ [(set v16i8:$vD,
+ (vsplth_shuffle:$UIMM v16i8:$vB, (undef)))]>;
def VSPLTW : VXForm_1<652, (outs VRRC:$vD), (ins u5imm:$UIMM, VRRC:$vB),
"vspltw $vD, $vB, $UIMM", VecPerm,
- [(set VRRC:$vD,
- (vspltw_shuffle:$UIMM (v16i8 VRRC:$vB), (undef)))]>;
+ [(set v16i8:$vD,
+ (vspltw_shuffle:$UIMM v16i8:$vB, (undef)))]>;
-def VSR : VX1_Int< 708, "vsr" , int_ppc_altivec_vsr>;
-def VSRO : VX1_Int<1100, "vsro" , int_ppc_altivec_vsro>;
-def VSRAB : VX1_Int< 772, "vsrab", int_ppc_altivec_vsrab>;
-def VSRAH : VX1_Int< 836, "vsrah", int_ppc_altivec_vsrah>;
-def VSRAW : VX1_Int< 900, "vsraw", int_ppc_altivec_vsraw>;
-def VSRB : VX1_Int< 516, "vsrb" , int_ppc_altivec_vsrb>;
-def VSRH : VX1_Int< 580, "vsrh" , int_ppc_altivec_vsrh>;
-def VSRW : VX1_Int< 644, "vsrw" , int_ppc_altivec_vsrw>;
+def VSR : VX1_Int_Ty< 708, "vsr" , int_ppc_altivec_vsr, v4i32>;
+def VSRO : VX1_Int_Ty<1100, "vsro" , int_ppc_altivec_vsro, v4i32>;
+
+def VSRAB : VX1_Int_Ty< 772, "vsrab", int_ppc_altivec_vsrab, v16i8>;
+def VSRAH : VX1_Int_Ty< 836, "vsrah", int_ppc_altivec_vsrah, v8i16>;
+def VSRAW : VX1_Int_Ty< 900, "vsraw", int_ppc_altivec_vsraw, v4i32>;
+def VSRB : VX1_Int_Ty< 516, "vsrb" , int_ppc_altivec_vsrb , v16i8>;
+def VSRH : VX1_Int_Ty< 580, "vsrh" , int_ppc_altivec_vsrh , v8i16>;
+def VSRW : VX1_Int_Ty< 644, "vsrw" , int_ppc_altivec_vsrw , v4i32>;
def VSPLTISB : VXForm_3<780, (outs VRRC:$vD), (ins s5imm:$SIMM),
"vspltisb $vD, $SIMM", VecPerm,
- [(set VRRC:$vD, (v16i8 vecspltisb:$SIMM))]>;
+ [(set v16i8:$vD, (v16i8 vecspltisb:$SIMM))]>;
def VSPLTISH : VXForm_3<844, (outs VRRC:$vD), (ins s5imm:$SIMM),
"vspltish $vD, $SIMM", VecPerm,
- [(set VRRC:$vD, (v8i16 vecspltish:$SIMM))]>;
+ [(set v8i16:$vD, (v8i16 vecspltish:$SIMM))]>;
def VSPLTISW : VXForm_3<908, (outs VRRC:$vD), (ins s5imm:$SIMM),
"vspltisw $vD, $SIMM", VecPerm,
- [(set VRRC:$vD, (v4i32 vecspltisw:$SIMM))]>;
+ [(set v4i32:$vD, (v4i32 vecspltisw:$SIMM))]>;
// Vector Pack.
-def VPKPX : VX1_Int<782, "vpkpx", int_ppc_altivec_vpkpx>;
-def VPKSHSS : VX1_Int<398, "vpkshss", int_ppc_altivec_vpkshss>;
-def VPKSHUS : VX1_Int<270, "vpkshus", int_ppc_altivec_vpkshus>;
-def VPKSWSS : VX1_Int<462, "vpkswss", int_ppc_altivec_vpkswss>;
-def VPKSWUS : VX1_Int<334, "vpkswus", int_ppc_altivec_vpkswus>;
+def VPKPX : VX1_Int_Ty2<782, "vpkpx", int_ppc_altivec_vpkpx,
+ v8i16, v4i32>;
+def VPKSHSS : VX1_Int_Ty2<398, "vpkshss", int_ppc_altivec_vpkshss,
+ v16i8, v8i16>;
+def VPKSHUS : VX1_Int_Ty2<270, "vpkshus", int_ppc_altivec_vpkshus,
+ v16i8, v8i16>;
+def VPKSWSS : VX1_Int_Ty2<462, "vpkswss", int_ppc_altivec_vpkswss,
+ v16i8, v4i32>;
+def VPKSWUS : VX1_Int_Ty2<334, "vpkswus", int_ppc_altivec_vpkswus,
+ v8i16, v4i32>;
def VPKUHUM : VXForm_1<14, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
"vpkuhum $vD, $vA, $vB", VecFP,
- [(set VRRC:$vD,
- (vpkuhum_shuffle (v16i8 VRRC:$vA), VRRC:$vB))]>;
-def VPKUHUS : VX1_Int<142, "vpkuhus", int_ppc_altivec_vpkuhus>;
+ [(set v16i8:$vD,
+ (vpkuhum_shuffle v16i8:$vA, v16i8:$vB))]>;
+def VPKUHUS : VX1_Int_Ty2<142, "vpkuhus", int_ppc_altivec_vpkuhus,
+ v16i8, v8i16>;
def VPKUWUM : VXForm_1<78, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
"vpkuwum $vD, $vA, $vB", VecFP,
- [(set VRRC:$vD,
- (vpkuwum_shuffle (v16i8 VRRC:$vA), VRRC:$vB))]>;
-def VPKUWUS : VX1_Int<206, "vpkuwus", int_ppc_altivec_vpkuwus>;
+ [(set v16i8:$vD,
+ (vpkuwum_shuffle v16i8:$vA, v16i8:$vB))]>;
+def VPKUWUS : VX1_Int_Ty2<206, "vpkuwus", int_ppc_altivec_vpkuwus,
+ v8i16, v4i32>;
// Vector Unpack.
-def VUPKHPX : VX2_Int<846, "vupkhpx", int_ppc_altivec_vupkhpx>;
-def VUPKHSB : VX2_Int<526, "vupkhsb", int_ppc_altivec_vupkhsb>;
-def VUPKHSH : VX2_Int<590, "vupkhsh", int_ppc_altivec_vupkhsh>;
-def VUPKLPX : VX2_Int<974, "vupklpx", int_ppc_altivec_vupklpx>;
-def VUPKLSB : VX2_Int<654, "vupklsb", int_ppc_altivec_vupklsb>;
-def VUPKLSH : VX2_Int<718, "vupklsh", int_ppc_altivec_vupklsh>;
+def VUPKHPX : VX2_Int_Ty2<846, "vupkhpx", int_ppc_altivec_vupkhpx,
+ v4i32, v8i16>;
+def VUPKHSB : VX2_Int_Ty2<526, "vupkhsb", int_ppc_altivec_vupkhsb,
+ v8i16, v16i8>;
+def VUPKHSH : VX2_Int_Ty2<590, "vupkhsh", int_ppc_altivec_vupkhsh,
+ v4i32, v8i16>;
+def VUPKLPX : VX2_Int_Ty2<974, "vupklpx", int_ppc_altivec_vupklpx,
+ v4i32, v8i16>;
+def VUPKLSB : VX2_Int_Ty2<654, "vupklsb", int_ppc_altivec_vupklsb,
+ v8i16, v16i8>;
+def VUPKLSH : VX2_Int_Ty2<718, "vupklsh", int_ppc_altivec_vupklsh,
+ v4i32, v8i16>;
// Altivec Comparisons.
class VCMP<bits<10> xo, string asmstr, ValueType Ty>
: VXRForm_1<xo, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),asmstr,VecFPCompare,
- [(set VRRC:$vD, (Ty (PPCvcmp VRRC:$vA, VRRC:$vB, xo)))]>;
+ [(set Ty:$vD, (Ty (PPCvcmp Ty:$vA, Ty:$vB, xo)))]>;
class VCMPo<bits<10> xo, string asmstr, ValueType Ty>
: VXRForm_1<xo, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),asmstr,VecFPCompare,
- [(set VRRC:$vD, (Ty (PPCvcmp_o VRRC:$vA, VRRC:$vB, xo)))]> {
+ [(set Ty:$vD, (Ty (PPCvcmp_o Ty:$vA, Ty:$vB, xo)))]> {
let Defs = [CR6];
let RC = 1;
}
@@ -582,10 +664,16 @@ def VCMPGTSWo : VCMPo<902, "vcmpgtsw. $vD, $vA, $vB", v4i32>;
def VCMPGTUW : VCMP <646, "vcmpgtuw $vD, $vA, $vB" , v4i32>;
def VCMPGTUWo : VCMPo<646, "vcmpgtuw. $vD, $vA, $vB", v4i32>;
+let isCodeGenOnly = 1 in
def V_SET0 : VXForm_setzero<1220, (outs VRRC:$vD), (ins),
"vxor $vD, $vD, $vD", VecFP,
- [(set VRRC:$vD, (v4i32 immAllZerosV))]>;
+ [(set v4i32:$vD, (v4i32 immAllZerosV))]>;
+let IMM=-1 in {
+def V_SETALLONES : VXForm_3<908, (outs VRRC:$vD), (ins),
+ "vspltisw $vD, -1", VecFP,
+ [(set v4i32:$vD, (v4i32 immAllOnesV))]>;
}
+} // VALU Operations.
//===----------------------------------------------------------------------===//
// Additional Altivec Patterns
@@ -596,31 +684,31 @@ def : Pat<(int_ppc_altivec_dssall), (DSSALL 1, 0, 0, 0)>;
def : Pat<(int_ppc_altivec_dss imm:$STRM), (DSS 0, imm:$STRM, 0, 0)>;
// * 32-bit
-def : Pat<(int_ppc_altivec_dst GPRC:$rA, GPRC:$rB, imm:$STRM),
- (DST 0, imm:$STRM, GPRC:$rA, GPRC:$rB)>;
-def : Pat<(int_ppc_altivec_dstt GPRC:$rA, GPRC:$rB, imm:$STRM),
- (DSTT 1, imm:$STRM, GPRC:$rA, GPRC:$rB)>;
-def : Pat<(int_ppc_altivec_dstst GPRC:$rA, GPRC:$rB, imm:$STRM),
- (DSTST 0, imm:$STRM, GPRC:$rA, GPRC:$rB)>;
-def : Pat<(int_ppc_altivec_dststt GPRC:$rA, GPRC:$rB, imm:$STRM),
- (DSTSTT 1, imm:$STRM, GPRC:$rA, GPRC:$rB)>;
+def : Pat<(int_ppc_altivec_dst i32:$rA, i32:$rB, imm:$STRM),
+ (DST 0, imm:$STRM, $rA, $rB)>;
+def : Pat<(int_ppc_altivec_dstt i32:$rA, i32:$rB, imm:$STRM),
+ (DSTT 1, imm:$STRM, $rA, $rB)>;
+def : Pat<(int_ppc_altivec_dstst i32:$rA, i32:$rB, imm:$STRM),
+ (DSTST 0, imm:$STRM, $rA, $rB)>;
+def : Pat<(int_ppc_altivec_dststt i32:$rA, i32:$rB, imm:$STRM),
+ (DSTSTT 1, imm:$STRM, $rA, $rB)>;
// * 64-bit
-def : Pat<(int_ppc_altivec_dst G8RC:$rA, GPRC:$rB, imm:$STRM),
- (DST64 0, imm:$STRM, (i64 G8RC:$rA), GPRC:$rB)>;
-def : Pat<(int_ppc_altivec_dstt G8RC:$rA, GPRC:$rB, imm:$STRM),
- (DSTT64 1, imm:$STRM, (i64 G8RC:$rA), GPRC:$rB)>;
-def : Pat<(int_ppc_altivec_dstst G8RC:$rA, GPRC:$rB, imm:$STRM),
- (DSTST64 0, imm:$STRM, (i64 G8RC:$rA), GPRC:$rB)>;
-def : Pat<(int_ppc_altivec_dststt G8RC:$rA, GPRC:$rB, imm:$STRM),
- (DSTSTT64 1, imm:$STRM, (i64 G8RC:$rA), GPRC:$rB)>;
+def : Pat<(int_ppc_altivec_dst i64:$rA, i32:$rB, imm:$STRM),
+ (DST64 0, imm:$STRM, $rA, $rB)>;
+def : Pat<(int_ppc_altivec_dstt i64:$rA, i32:$rB, imm:$STRM),
+ (DSTT64 1, imm:$STRM, $rA, $rB)>;
+def : Pat<(int_ppc_altivec_dstst i64:$rA, i32:$rB, imm:$STRM),
+ (DSTST64 0, imm:$STRM, $rA, $rB)>;
+def : Pat<(int_ppc_altivec_dststt i64:$rA, i32:$rB, imm:$STRM),
+ (DSTSTT64 1, imm:$STRM, $rA, $rB)>;
// Loads.
def : Pat<(v4i32 (load xoaddr:$src)), (LVX xoaddr:$src)>;
// Stores.
-def : Pat<(store (v4i32 VRRC:$rS), xoaddr:$dst),
- (STVX (v4i32 VRRC:$rS), xoaddr:$dst)>;
+def : Pat<(store v4i32:$rS, xoaddr:$dst),
+ (STVX $rS, xoaddr:$dst)>;
// Bit conversions.
def : Pat<(v16i8 (bitconvert (v8i16 VRRC:$src))), (v16i8 VRRC:$src)>;
@@ -642,82 +730,99 @@ def : Pat<(v4f32 (bitconvert (v4i32 VRRC:$src))), (v4f32 VRRC:$src)>;
// Shuffles.
// Match vsldoi(x,x), vpkuwum(x,x), vpkuhum(x,x)
-def:Pat<(vsldoi_unary_shuffle:$in (v16i8 VRRC:$vA), undef),
- (VSLDOI VRRC:$vA, VRRC:$vA, (VSLDOI_unary_get_imm VRRC:$in))>;
-def:Pat<(vpkuwum_unary_shuffle (v16i8 VRRC:$vA), undef),
- (VPKUWUM VRRC:$vA, VRRC:$vA)>;
-def:Pat<(vpkuhum_unary_shuffle (v16i8 VRRC:$vA), undef),
- (VPKUHUM VRRC:$vA, VRRC:$vA)>;
+def:Pat<(vsldoi_unary_shuffle:$in v16i8:$vA, undef),
+ (VSLDOI $vA, $vA, (VSLDOI_unary_get_imm $in))>;
+def:Pat<(vpkuwum_unary_shuffle v16i8:$vA, undef),
+ (VPKUWUM $vA, $vA)>;
+def:Pat<(vpkuhum_unary_shuffle v16i8:$vA, undef),
+ (VPKUHUM $vA, $vA)>;
// Match vmrg*(x,x)
-def:Pat<(vmrglb_unary_shuffle (v16i8 VRRC:$vA), undef),
- (VMRGLB VRRC:$vA, VRRC:$vA)>;
-def:Pat<(vmrglh_unary_shuffle (v16i8 VRRC:$vA), undef),
- (VMRGLH VRRC:$vA, VRRC:$vA)>;
-def:Pat<(vmrglw_unary_shuffle (v16i8 VRRC:$vA), undef),
- (VMRGLW VRRC:$vA, VRRC:$vA)>;
-def:Pat<(vmrghb_unary_shuffle (v16i8 VRRC:$vA), undef),
- (VMRGHB VRRC:$vA, VRRC:$vA)>;
-def:Pat<(vmrghh_unary_shuffle (v16i8 VRRC:$vA), undef),
- (VMRGHH VRRC:$vA, VRRC:$vA)>;
-def:Pat<(vmrghw_unary_shuffle (v16i8 VRRC:$vA), undef),
- (VMRGHW VRRC:$vA, VRRC:$vA)>;
+def:Pat<(vmrglb_unary_shuffle v16i8:$vA, undef),
+ (VMRGLB $vA, $vA)>;
+def:Pat<(vmrglh_unary_shuffle v16i8:$vA, undef),
+ (VMRGLH $vA, $vA)>;
+def:Pat<(vmrglw_unary_shuffle v16i8:$vA, undef),
+ (VMRGLW $vA, $vA)>;
+def:Pat<(vmrghb_unary_shuffle v16i8:$vA, undef),
+ (VMRGHB $vA, $vA)>;
+def:Pat<(vmrghh_unary_shuffle v16i8:$vA, undef),
+ (VMRGHH $vA, $vA)>;
+def:Pat<(vmrghw_unary_shuffle v16i8:$vA, undef),
+ (VMRGHW $vA, $vA)>;
// Logical Operations
-def : Pat<(v4i32 (vnot_ppc VRRC:$vA)), (VNOR VRRC:$vA, VRRC:$vA)>;
+def : Pat<(vnot_ppc v4i32:$vA), (VNOR $vA, $vA)>;
-def : Pat<(v4i32 (vnot_ppc (or VRRC:$A, VRRC:$B))),
- (VNOR VRRC:$A, VRRC:$B)>;
-def : Pat<(v4i32 (and VRRC:$A, (vnot_ppc VRRC:$B))),
- (VANDC VRRC:$A, VRRC:$B)>;
+def : Pat<(vnot_ppc (or v4i32:$A, v4i32:$B)),
+ (VNOR $A, $B)>;
+def : Pat<(and v4i32:$A, (vnot_ppc v4i32:$B)),
+ (VANDC $A, $B)>;
-def : Pat<(fmul VRRC:$vA, VRRC:$vB),
- (VMADDFP VRRC:$vA, VRRC:$vB, (v4i32 (V_SET0)))>;
+def : Pat<(fmul v4f32:$vA, v4f32:$vB),
+ (VMADDFP $vA, $vB,
+ (v4i32 (VSLW (V_SETALLONES), (V_SETALLONES))))>;
// Fused multiply add and multiply sub for packed float. These are represented
// separately from the real instructions above, for operations that must have
// the additional precision, such as Newton-Rhapson (used by divide, sqrt)
-def : Pat<(PPCvmaddfp VRRC:$A, VRRC:$B, VRRC:$C),
- (VMADDFP VRRC:$A, VRRC:$B, VRRC:$C)>;
-def : Pat<(PPCvnmsubfp VRRC:$A, VRRC:$B, VRRC:$C),
- (VNMSUBFP VRRC:$A, VRRC:$B, VRRC:$C)>;
+def : Pat<(PPCvmaddfp v4f32:$A, v4f32:$B, v4f32:$C),
+ (VMADDFP $A, $B, $C)>;
+def : Pat<(PPCvnmsubfp v4f32:$A, v4f32:$B, v4f32:$C),
+ (VNMSUBFP $A, $B, $C)>;
+
+def : Pat<(int_ppc_altivec_vmaddfp v4f32:$A, v4f32:$B, v4f32:$C),
+ (VMADDFP $A, $B, $C)>;
+def : Pat<(int_ppc_altivec_vnmsubfp v4f32:$A, v4f32:$B, v4f32:$C),
+ (VNMSUBFP $A, $B, $C)>;
-def : Pat<(int_ppc_altivec_vmaddfp VRRC:$A, VRRC:$B, VRRC:$C),
- (VMADDFP VRRC:$A, VRRC:$B, VRRC:$C)>;
-def : Pat<(int_ppc_altivec_vnmsubfp VRRC:$A, VRRC:$B, VRRC:$C),
- (VNMSUBFP VRRC:$A, VRRC:$B, VRRC:$C)>;
+def : Pat<(PPCvperm v16i8:$vA, v16i8:$vB, v16i8:$vC),
+ (VPERM $vA, $vB, $vC)>;
-def : Pat<(PPCvperm (v16i8 VRRC:$vA), VRRC:$vB, VRRC:$vC),
- (VPERM VRRC:$vA, VRRC:$vB, VRRC:$vC)>;
+def : Pat<(PPCfre v4f32:$A), (VREFP $A)>;
+def : Pat<(PPCfrsqrte v4f32:$A), (VRSQRTEFP $A)>;
// Vector shifts
-def : Pat<(v16i8 (shl (v16i8 VRRC:$vA), (v16i8 VRRC:$vB))),
- (v16i8 (VSLB VRRC:$vA, VRRC:$vB))>;
-def : Pat<(v8i16 (shl (v8i16 VRRC:$vA), (v8i16 VRRC:$vB))),
- (v8i16 (VSLH VRRC:$vA, VRRC:$vB))>;
-def : Pat<(v4i32 (shl (v4i32 VRRC:$vA), (v4i32 VRRC:$vB))),
- (v4i32 (VSLW VRRC:$vA, VRRC:$vB))>;
-
-def : Pat<(v16i8 (srl (v16i8 VRRC:$vA), (v16i8 VRRC:$vB))),
- (v16i8 (VSRB VRRC:$vA, VRRC:$vB))>;
-def : Pat<(v8i16 (srl (v8i16 VRRC:$vA), (v8i16 VRRC:$vB))),
- (v8i16 (VSRH VRRC:$vA, VRRC:$vB))>;
-def : Pat<(v4i32 (srl (v4i32 VRRC:$vA), (v4i32 VRRC:$vB))),
- (v4i32 (VSRW VRRC:$vA, VRRC:$vB))>;
-
-def : Pat<(v16i8 (sra (v16i8 VRRC:$vA), (v16i8 VRRC:$vB))),
- (v16i8 (VSRAB VRRC:$vA, VRRC:$vB))>;
-def : Pat<(v8i16 (sra (v8i16 VRRC:$vA), (v8i16 VRRC:$vB))),
- (v8i16 (VSRAH VRRC:$vA, VRRC:$vB))>;
-def : Pat<(v4i32 (sra (v4i32 VRRC:$vA), (v4i32 VRRC:$vB))),
- (v4i32 (VSRAW VRRC:$vA, VRRC:$vB))>;
+def : Pat<(v16i8 (shl v16i8:$vA, v16i8:$vB)),
+ (v16i8 (VSLB $vA, $vB))>;
+def : Pat<(v8i16 (shl v8i16:$vA, v8i16:$vB)),
+ (v8i16 (VSLH $vA, $vB))>;
+def : Pat<(v4i32 (shl v4i32:$vA, v4i32:$vB)),
+ (v4i32 (VSLW $vA, $vB))>;
+
+def : Pat<(v16i8 (srl v16i8:$vA, v16i8:$vB)),
+ (v16i8 (VSRB $vA, $vB))>;
+def : Pat<(v8i16 (srl v8i16:$vA, v8i16:$vB)),
+ (v8i16 (VSRH $vA, $vB))>;
+def : Pat<(v4i32 (srl v4i32:$vA, v4i32:$vB)),
+ (v4i32 (VSRW $vA, $vB))>;
+
+def : Pat<(v16i8 (sra v16i8:$vA, v16i8:$vB)),
+ (v16i8 (VSRAB $vA, $vB))>;
+def : Pat<(v8i16 (sra v8i16:$vA, v8i16:$vB)),
+ (v8i16 (VSRAH $vA, $vB))>;
+def : Pat<(v4i32 (sra v4i32:$vA, v4i32:$vB)),
+ (v4i32 (VSRAW $vA, $vB))>;
// Float to integer and integer to float conversions
-def : Pat<(v4i32 (fp_to_sint (v4f32 VRRC:$vA))),
- (VCTSXS_0 VRRC:$vA)>;
-def : Pat<(v4i32 (fp_to_uint (v4f32 VRRC:$vA))),
- (VCTUXS_0 VRRC:$vA)>;
-def : Pat<(v4f32 (sint_to_fp (v4i32 VRRC:$vA))),
- (VCFSX_0 VRRC:$vA)>;
-def : Pat<(v4f32 (uint_to_fp (v4i32 VRRC:$vA))),
- (VCFUX_0 VRRC:$vA)>;
+def : Pat<(v4i32 (fp_to_sint v4f32:$vA)),
+ (VCTSXS_0 $vA)>;
+def : Pat<(v4i32 (fp_to_uint v4f32:$vA)),
+ (VCTUXS_0 $vA)>;
+def : Pat<(v4f32 (sint_to_fp v4i32:$vA)),
+ (VCFSX_0 $vA)>;
+def : Pat<(v4f32 (uint_to_fp v4i32:$vA)),
+ (VCFUX_0 $vA)>;
+
+// Floating-point rounding
+def : Pat<(v4f32 (ffloor v4f32:$vA)),
+ (VRFIM $vA)>;
+def : Pat<(v4f32 (fceil v4f32:$vA)),
+ (VRFIP $vA)>;
+def : Pat<(v4f32 (ftrunc v4f32:$vA)),
+ (VRFIZ $vA)>;
+def : Pat<(v4f32 (fnearbyint v4f32:$vA)),
+ (VRFIN $vA)>;
+
+} // end HasAltivec
+
diff --git a/lib/Target/PowerPC/PPCInstrFormats.td b/lib/Target/PowerPC/PPCInstrFormats.td
index c3c171cd21fc..400b7e367bfe 100644
--- a/lib/Target/PowerPC/PPCInstrFormats.td
+++ b/lib/Target/PowerPC/PPCInstrFormats.td
@@ -120,6 +120,18 @@ class BForm_1<bits<6> opcode, bits<5> bo, bit aa, bit lk, dag OOL, dag IOL,
let CR = 0;
}
+class BForm_2<bits<6> opcode, bits<5> bo, bits<5> bi, bit aa, bit lk,
+ dag OOL, dag IOL, string asmstr>
+ : I<opcode, OOL, IOL, asmstr, BrB> {
+ bits<14> BD;
+
+ let Inst{6-10} = bo;
+ let Inst{11-15} = bi;
+ let Inst{16-29} = BD;
+ let Inst{30} = aa;
+ let Inst{31} = lk;
+}
+
// 1.7.4 D-Form
class DForm_base<bits<6> opcode, dag OOL, dag IOL, string asmstr,
InstrItinClass itin, list<dag> pattern>
@@ -664,14 +676,13 @@ class XFXForm_7_ext<bits<6> opcode, bits<10> xo, bits<10> spr,
// This is probably 1.7.9, but I don't have the reference that uses this
// numbering scheme...
class XFLForm<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
- string cstr, InstrItinClass itin, list<dag>pattern>
+ InstrItinClass itin, list<dag>pattern>
: I<opcode, OOL, IOL, asmstr, itin> {
bits<8> FM;
bits<5> rT;
bit RC = 0; // set by isDOT
let Pattern = pattern;
- let Constraints = cstr;
let Inst{6} = 0;
let Inst{7-14} = FM;
@@ -765,16 +776,14 @@ class AForm_4<bits<6> opcode, bits<5> xo, dag OOL, dag IOL, string asmstr,
bits<5> RT;
bits<5> RA;
bits<5> RB;
- bits<7> BIBO; // 2 bits of BI and 5 bits of BO (must be 12).
- bits<3> CR;
+ bits<5> COND;
let Pattern = pattern;
let Inst{6-10} = RT;
let Inst{11-15} = RA;
let Inst{16-20} = RB;
- let Inst{21-23} = CR;
- let Inst{24-25} = BIBO{6-5};
+ let Inst{21-25} = COND;
let Inst{26-30} = xo;
let Inst{31} = 0;
}
@@ -987,6 +996,7 @@ class VXRForm_1<bits<10> xo, dag OOL, dag IOL, string asmstr,
//===----------------------------------------------------------------------===//
class Pseudo<dag OOL, dag IOL, string asmstr, list<dag> pattern>
: I<0, OOL, IOL, asmstr, NoItinerary> {
+ let isCodeGenOnly = 1;
let PPC64 = 0;
let Pattern = pattern;
let Inst{31-0} = 0;
diff --git a/lib/Target/PowerPC/PPCInstrInfo.cpp b/lib/Target/PowerPC/PPCInstrInfo.cpp
index d9d68446f536..69c54ed084be 100644
--- a/lib/Target/PowerPC/PPCInstrInfo.cpp
+++ b/lib/Target/PowerPC/PPCInstrInfo.cpp
@@ -12,12 +12,13 @@
//===----------------------------------------------------------------------===//
#include "PPCInstrInfo.h"
+#include "MCTargetDesc/PPCPredicates.h"
#include "PPC.h"
+#include "PPCHazardRecognizers.h"
#include "PPCInstrBuilder.h"
#include "PPCMachineFunctionInfo.h"
#include "PPCTargetMachine.h"
-#include "PPCHazardRecognizers.h"
-#include "MCTargetDesc/PPCPredicates.h"
+#include "llvm/ADT/STLExtras.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineMemOperand.h"
@@ -28,16 +29,10 @@
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/ADT/STLExtras.h"
#define GET_INSTRINFO_CTOR
#include "PPCGenInstrInfo.inc"
-namespace llvm {
-extern cl::opt<bool> DisablePPC32RS;
-extern cl::opt<bool> DisablePPC64RS;
-}
-
using namespace llvm;
static cl::
@@ -60,7 +55,7 @@ ScheduleHazardRecognizer *PPCInstrInfo::CreateTargetHazardRecognizer(
return new PPCScoreboardHazardRecognizer(II, DAG);
}
- return TargetInstrInfoImpl::CreateTargetHazardRecognizer(TM, DAG);
+ return TargetInstrInfo::CreateTargetHazardRecognizer(TM, DAG);
}
/// CreateTargetPostRAHazardRecognizer - Return the postRA hazard recognizer
@@ -99,12 +94,18 @@ bool PPCInstrInfo::isCoalescableExtInstr(const MachineInstr &MI,
unsigned PPCInstrInfo::isLoadFromStackSlot(const MachineInstr *MI,
int &FrameIndex) const {
+ // Note: This list must be kept consistent with LoadRegFromStackSlot.
switch (MI->getOpcode()) {
default: break;
case PPC::LD:
case PPC::LWZ:
case PPC::LFS:
case PPC::LFD:
+ case PPC::RESTORE_CR:
+ case PPC::LVX:
+ case PPC::RESTORE_VRSAVE:
+ // Check for the operands added by addFrameReference (the immediate is the
+ // offset which defaults to 0).
if (MI->getOperand(1).isImm() && !MI->getOperand(1).getImm() &&
MI->getOperand(2).isFI()) {
FrameIndex = MI->getOperand(2).getIndex();
@@ -117,12 +118,18 @@ unsigned PPCInstrInfo::isLoadFromStackSlot(const MachineInstr *MI,
unsigned PPCInstrInfo::isStoreToStackSlot(const MachineInstr *MI,
int &FrameIndex) const {
+ // Note: This list must be kept consistent with StoreRegToStackSlot.
switch (MI->getOpcode()) {
default: break;
case PPC::STD:
case PPC::STW:
case PPC::STFS:
case PPC::STFD:
+ case PPC::SPILL_CR:
+ case PPC::STVX:
+ case PPC::SPILL_VRSAVE:
+ // Check for the operands added by addFrameReference (the immediate is the
+ // offset which defaults to 0).
if (MI->getOperand(1).isImm() && !MI->getOperand(1).getImm() &&
MI->getOperand(2).isFI()) {
FrameIndex = MI->getOperand(2).getIndex();
@@ -141,7 +148,7 @@ PPCInstrInfo::commuteInstruction(MachineInstr *MI, bool NewMI) const {
// Normal instructions can be commuted the obvious way.
if (MI->getOpcode() != PPC::RLWIMI)
- return TargetInstrInfoImpl::commuteInstruction(MI, NewMI);
+ return TargetInstrInfo::commuteInstruction(MI, NewMI);
// Cannot commute if it has a non-zero rotate count.
if (MI->getOperand(3).getImm() != 0)
@@ -444,40 +451,22 @@ PPCInstrInfo::StoreRegToStackSlot(MachineFunction &MF,
unsigned SrcReg, bool isKill,
int FrameIdx,
const TargetRegisterClass *RC,
- SmallVectorImpl<MachineInstr*> &NewMIs) const{
+ SmallVectorImpl<MachineInstr*> &NewMIs,
+ bool &NonRI, bool &SpillsVRS) const{
+ // Note: If additional store instructions are added here,
+ // update isStoreToStackSlot.
+
DebugLoc DL;
if (PPC::GPRCRegClass.hasSubClassEq(RC)) {
- if (SrcReg != PPC::LR) {
- NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::STW))
- .addReg(SrcReg,
- getKillRegState(isKill)),
- FrameIdx));
- } else {
- // FIXME: this spills LR immediately to memory in one step. To do this,
- // we use R11, which we know cannot be used in the prolog/epilog. This is
- // a hack.
- NewMIs.push_back(BuildMI(MF, DL, get(PPC::MFLR), PPC::R11));
- NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::STW))
- .addReg(PPC::R11,
- getKillRegState(isKill)),
- FrameIdx));
- }
+ NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::STW))
+ .addReg(SrcReg,
+ getKillRegState(isKill)),
+ FrameIdx));
} else if (PPC::G8RCRegClass.hasSubClassEq(RC)) {
- if (SrcReg != PPC::LR8) {
- NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::STD))
- .addReg(SrcReg,
- getKillRegState(isKill)),
- FrameIdx));
- } else {
- // FIXME: this spills LR immediately to memory in one step. To do this,
- // we use X11, which we know cannot be used in the prolog/epilog. This is
- // a hack.
- NewMIs.push_back(BuildMI(MF, DL, get(PPC::MFLR8), PPC::X11));
- NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::STD))
- .addReg(PPC::X11,
- getKillRegState(isKill)),
- FrameIdx));
- }
+ NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::STD))
+ .addReg(SrcReg,
+ getKillRegState(isKill)),
+ FrameIdx));
} else if (PPC::F8RCRegClass.hasSubClassEq(RC)) {
NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::STFD))
.addReg(SrcReg,
@@ -489,47 +478,11 @@ PPCInstrInfo::StoreRegToStackSlot(MachineFunction &MF,
getKillRegState(isKill)),
FrameIdx));
} else if (PPC::CRRCRegClass.hasSubClassEq(RC)) {
- if ((!DisablePPC32RS && !TM.getSubtargetImpl()->isPPC64()) ||
- (!DisablePPC64RS && TM.getSubtargetImpl()->isPPC64())) {
- NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::SPILL_CR))
- .addReg(SrcReg,
- getKillRegState(isKill)),
- FrameIdx));
- return true;
- } else {
- // FIXME: We need a scatch reg here. The trouble with using R0 is that
- // it's possible for the stack frame to be so big the save location is
- // out of range of immediate offsets, necessitating another register.
- // We hack this on Darwin by reserving R2. It's probably broken on Linux
- // at the moment.
-
- bool is64Bit = TM.getSubtargetImpl()->isPPC64();
- // We need to store the CR in the low 4-bits of the saved value. First,
- // issue a MFCR to save all of the CRBits.
- unsigned ScratchReg = TM.getSubtargetImpl()->isDarwinABI() ?
- (is64Bit ? PPC::X2 : PPC::R2) :
- (is64Bit ? PPC::X0 : PPC::R0);
- NewMIs.push_back(BuildMI(MF, DL, get(is64Bit ? PPC::MFCR8pseud :
- PPC::MFCRpseud), ScratchReg)
- .addReg(SrcReg, getKillRegState(isKill)));
-
- // If the saved register wasn't CR0, shift the bits left so that they are
- // in CR0's slot.
- if (SrcReg != PPC::CR0) {
- unsigned ShiftBits = getPPCRegisterNumbering(SrcReg)*4;
- // rlwinm scratch, scratch, ShiftBits, 0, 31.
- NewMIs.push_back(BuildMI(MF, DL, get(is64Bit ? PPC::RLWINM8 :
- PPC::RLWINM), ScratchReg)
- .addReg(ScratchReg).addImm(ShiftBits)
- .addImm(0).addImm(31));
- }
-
- NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(is64Bit ?
- PPC::STW8 : PPC::STW))
- .addReg(ScratchReg,
- getKillRegState(isKill)),
- FrameIdx));
- }
+ NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::SPILL_CR))
+ .addReg(SrcReg,
+ getKillRegState(isKill)),
+ FrameIdx));
+ return true;
} else if (PPC::CRBITRCRegClass.hasSubClassEq(RC)) {
// FIXME: We use CRi here because there is no mtcrf on a bit. Since the
// backend currently only uses CR1EQ as an individual bit, this should
@@ -562,23 +515,22 @@ PPCInstrInfo::StoreRegToStackSlot(MachineFunction &MF,
Reg = PPC::CR7;
return StoreRegToStackSlot(MF, Reg, isKill, FrameIdx,
- &PPC::CRRCRegClass, NewMIs);
+ &PPC::CRRCRegClass, NewMIs, NonRI, SpillsVRS);
} else if (PPC::VRRCRegClass.hasSubClassEq(RC)) {
- // We don't have indexed addressing for vector loads. Emit:
- // R0 = ADDI FI#
- // STVX VAL, 0, R0
- //
- // FIXME: We use R0 here, because it isn't available for RA.
- bool Is64Bit = TM.getSubtargetImpl()->isPPC64();
- unsigned Instr = Is64Bit ? PPC::ADDI8 : PPC::ADDI;
- unsigned GPR0 = Is64Bit ? PPC::X0 : PPC::R0;
- NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(Instr), GPR0),
- FrameIdx, 0, 0));
- NewMIs.push_back(BuildMI(MF, DL, get(PPC::STVX))
- .addReg(SrcReg, getKillRegState(isKill))
- .addReg(GPR0)
- .addReg(GPR0));
+ NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::STVX))
+ .addReg(SrcReg,
+ getKillRegState(isKill)),
+ FrameIdx));
+ NonRI = true;
+ } else if (PPC::VRSAVERCRegClass.hasSubClassEq(RC)) {
+ assert(TM.getSubtargetImpl()->isDarwin() &&
+ "VRSAVE only needs spill/restore on Darwin");
+ NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::SPILL_VRSAVE))
+ .addReg(SrcReg,
+ getKillRegState(isKill)),
+ FrameIdx));
+ SpillsVRS = true;
} else {
llvm_unreachable("Unknown regclass!");
}
@@ -595,10 +547,19 @@ PPCInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
MachineFunction &MF = *MBB.getParent();
SmallVector<MachineInstr*, 4> NewMIs;
- if (StoreRegToStackSlot(MF, SrcReg, isKill, FrameIdx, RC, NewMIs)) {
- PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
+ PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
+ FuncInfo->setHasSpills();
+
+ bool NonRI = false, SpillsVRS = false;
+ if (StoreRegToStackSlot(MF, SrcReg, isKill, FrameIdx, RC, NewMIs,
+ NonRI, SpillsVRS))
FuncInfo->setSpillsCR();
- }
+
+ if (SpillsVRS)
+ FuncInfo->setSpillsVRSAVE();
+
+ if (NonRI)
+ FuncInfo->setHasNonRISpills();
for (unsigned i = 0, e = NewMIs.size(); i != e; ++i)
MBB.insert(MI, NewMIs[i]);
@@ -616,25 +577,17 @@ bool
PPCInstrInfo::LoadRegFromStackSlot(MachineFunction &MF, DebugLoc DL,
unsigned DestReg, int FrameIdx,
const TargetRegisterClass *RC,
- SmallVectorImpl<MachineInstr*> &NewMIs)const{
+ SmallVectorImpl<MachineInstr*> &NewMIs,
+ bool &NonRI, bool &SpillsVRS) const{
+ // Note: If additional load instructions are added here,
+ // update isLoadFromStackSlot.
+
if (PPC::GPRCRegClass.hasSubClassEq(RC)) {
- if (DestReg != PPC::LR) {
- NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::LWZ),
- DestReg), FrameIdx));
- } else {
- NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::LWZ),
- PPC::R11), FrameIdx));
- NewMIs.push_back(BuildMI(MF, DL, get(PPC::MTLR)).addReg(PPC::R11));
- }
+ NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::LWZ),
+ DestReg), FrameIdx));
} else if (PPC::G8RCRegClass.hasSubClassEq(RC)) {
- if (DestReg != PPC::LR8) {
- NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::LD), DestReg),
- FrameIdx));
- } else {
- NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::LD),
- PPC::X11), FrameIdx));
- NewMIs.push_back(BuildMI(MF, DL, get(PPC::MTLR8)).addReg(PPC::X11));
- }
+ NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::LD), DestReg),
+ FrameIdx));
} else if (PPC::F8RCRegClass.hasSubClassEq(RC)) {
NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::LFD), DestReg),
FrameIdx));
@@ -642,37 +595,10 @@ PPCInstrInfo::LoadRegFromStackSlot(MachineFunction &MF, DebugLoc DL,
NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::LFS), DestReg),
FrameIdx));
} else if (PPC::CRRCRegClass.hasSubClassEq(RC)) {
- if ((!DisablePPC32RS && !TM.getSubtargetImpl()->isPPC64()) ||
- (!DisablePPC64RS && TM.getSubtargetImpl()->isPPC64())) {
- NewMIs.push_back(addFrameReference(BuildMI(MF, DL,
- get(PPC::RESTORE_CR), DestReg)
- , FrameIdx));
- return true;
- } else {
- // FIXME: We need a scatch reg here. The trouble with using R0 is that
- // it's possible for the stack frame to be so big the save location is
- // out of range of immediate offsets, necessitating another register.
- // We hack this on Darwin by reserving R2. It's probably broken on Linux
- // at the moment.
- unsigned ScratchReg = TM.getSubtargetImpl()->isDarwinABI() ?
- PPC::R2 : PPC::R0;
- NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::LWZ),
- ScratchReg), FrameIdx));
-
- // If the reloaded register isn't CR0, shift the bits right so that they are
- // in the right CR's slot.
- if (DestReg != PPC::CR0) {
- unsigned ShiftBits = getPPCRegisterNumbering(DestReg)*4;
- // rlwinm r11, r11, 32-ShiftBits, 0, 31.
- NewMIs.push_back(BuildMI(MF, DL, get(PPC::RLWINM), ScratchReg)
- .addReg(ScratchReg).addImm(32-ShiftBits).addImm(0)
- .addImm(31));
- }
-
- NewMIs.push_back(BuildMI(MF, DL, get(TM.getSubtargetImpl()->isPPC64() ?
- PPC::MTCRF8 : PPC::MTCRF), DestReg)
- .addReg(ScratchReg));
- }
+ NewMIs.push_back(addFrameReference(BuildMI(MF, DL,
+ get(PPC::RESTORE_CR), DestReg),
+ FrameIdx));
+ return true;
} else if (PPC::CRBITRCRegClass.hasSubClassEq(RC)) {
unsigned Reg = 0;
@@ -702,21 +628,20 @@ PPCInstrInfo::LoadRegFromStackSlot(MachineFunction &MF, DebugLoc DL,
Reg = PPC::CR7;
return LoadRegFromStackSlot(MF, DL, Reg, FrameIdx,
- &PPC::CRRCRegClass, NewMIs);
+ &PPC::CRRCRegClass, NewMIs, NonRI, SpillsVRS);
} else if (PPC::VRRCRegClass.hasSubClassEq(RC)) {
- // We don't have indexed addressing for vector loads. Emit:
- // R0 = ADDI FI#
- // Dest = LVX 0, R0
- //
- // FIXME: We use R0 here, because it isn't available for RA.
- bool Is64Bit = TM.getSubtargetImpl()->isPPC64();
- unsigned Instr = Is64Bit ? PPC::ADDI8 : PPC::ADDI;
- unsigned GPR0 = Is64Bit ? PPC::X0 : PPC::R0;
- NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(Instr), GPR0),
- FrameIdx, 0, 0));
- NewMIs.push_back(BuildMI(MF, DL, get(PPC::LVX),DestReg).addReg(GPR0)
- .addReg(GPR0));
+ NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::LVX), DestReg),
+ FrameIdx));
+ NonRI = true;
+ } else if (PPC::VRSAVERCRegClass.hasSubClassEq(RC)) {
+ assert(TM.getSubtargetImpl()->isDarwin() &&
+ "VRSAVE only needs spill/restore on Darwin");
+ NewMIs.push_back(addFrameReference(BuildMI(MF, DL,
+ get(PPC::RESTORE_VRSAVE),
+ DestReg),
+ FrameIdx));
+ SpillsVRS = true;
} else {
llvm_unreachable("Unknown regclass!");
}
@@ -734,10 +659,21 @@ PPCInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
SmallVector<MachineInstr*, 4> NewMIs;
DebugLoc DL;
if (MI != MBB.end()) DL = MI->getDebugLoc();
- if (LoadRegFromStackSlot(MF, DL, DestReg, FrameIdx, RC, NewMIs)) {
- PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
+
+ PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
+ FuncInfo->setHasSpills();
+
+ bool NonRI = false, SpillsVRS = false;
+ if (LoadRegFromStackSlot(MF, DL, DestReg, FrameIdx, RC, NewMIs,
+ NonRI, SpillsVRS))
FuncInfo->setSpillsCR();
- }
+
+ if (SpillsVRS)
+ FuncInfo->setSpillsVRSAVE();
+
+ if (NonRI)
+ FuncInfo->setHasNonRISpills();
+
for (unsigned i = 0, e = NewMIs.size(); i != e; ++i)
MBB.insert(MI, NewMIs[i]);
@@ -786,8 +722,8 @@ unsigned PPCInstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const {
case PPC::GC_LABEL:
case PPC::DBG_VALUE:
return 0;
- case PPC::BL8_NOP_ELF:
- case PPC::BLA8_NOP_ELF:
+ case PPC::BL8_NOP:
+ case PPC::BLA8_NOP:
return 8;
default:
return 4; // PowerPC instructions are all 4 bytes
diff --git a/lib/Target/PowerPC/PPCInstrInfo.h b/lib/Target/PowerPC/PPCInstrInfo.h
index 374213ea435b..635e3480b06d 100644
--- a/lib/Target/PowerPC/PPCInstrInfo.h
+++ b/lib/Target/PowerPC/PPCInstrInfo.h
@@ -71,11 +71,13 @@ class PPCInstrInfo : public PPCGenInstrInfo {
bool StoreRegToStackSlot(MachineFunction &MF,
unsigned SrcReg, bool isKill, int FrameIdx,
const TargetRegisterClass *RC,
- SmallVectorImpl<MachineInstr*> &NewMIs) const;
+ SmallVectorImpl<MachineInstr*> &NewMIs,
+ bool &NonRI, bool &SpillsVRS) const;
bool LoadRegFromStackSlot(MachineFunction &MF, DebugLoc DL,
unsigned DestReg, int FrameIdx,
const TargetRegisterClass *RC,
- SmallVectorImpl<MachineInstr*> &NewMIs) const;
+ SmallVectorImpl<MachineInstr*> &NewMIs,
+ bool &NonRI, bool &SpillsVRS) const;
public:
explicit PPCInstrInfo(PPCTargetMachine &TM);
diff --git a/lib/Target/PowerPC/PPCInstrInfo.td b/lib/Target/PowerPC/PPCInstrInfo.td
index 6ee045a2c7c9..ab907622beeb 100644
--- a/lib/Target/PowerPC/PPCInstrInfo.td
+++ b/lib/Target/PowerPC/PPCInstrInfo.td
@@ -20,6 +20,10 @@ include "PPCInstrFormats.td"
def SDT_PPCstfiwx : SDTypeProfile<0, 2, [ // stfiwx
SDTCisVT<0, f64>, SDTCisPtrTy<1>
]>;
+def SDT_PPClfiwx : SDTypeProfile<1, 1, [ // lfiw[az]x
+ SDTCisVT<0, f64>, SDTCisPtrTy<1>
+]>;
+
def SDT_PPCCallSeqStart : SDCallSeqStart<[ SDTCisVT<0, i32> ]>;
def SDT_PPCCallSeqEnd : SDCallSeqEnd<[ SDTCisVT<0, i32>,
SDTCisVT<1, i32> ]>;
@@ -36,10 +40,10 @@ def SDT_PPCcondbr : SDTypeProfile<0, 3, [
]>;
def SDT_PPClbrx : SDTypeProfile<1, 2, [
- SDTCisVT<0, i32>, SDTCisPtrTy<1>, SDTCisVT<2, OtherVT>
+ SDTCisInt<0>, SDTCisPtrTy<1>, SDTCisVT<2, OtherVT>
]>;
def SDT_PPCstbrx : SDTypeProfile<0, 3, [
- SDTCisVT<0, i32>, SDTCisPtrTy<1>, SDTCisVT<2, OtherVT>
+ SDTCisInt<0>, SDTCisPtrTy<1>, SDTCisVT<2, OtherVT>
]>;
def SDT_PPClarx : SDTypeProfile<1, 1, [
@@ -53,32 +57,36 @@ def SDT_PPCTC_ret : SDTypeProfile<0, 2, [
SDTCisPtrTy<0>, SDTCisVT<1, i32>
]>;
-def SDT_PPCnop : SDTypeProfile<0, 0, []>;
//===----------------------------------------------------------------------===//
// PowerPC specific DAG Nodes.
//
-def PPCfcfid : SDNode<"PPCISD::FCFID" , SDTFPUnaryOp, []>;
+def PPCfre : SDNode<"PPCISD::FRE", SDTFPUnaryOp, []>;
+def PPCfrsqrte: SDNode<"PPCISD::FRSQRTE", SDTFPUnaryOp, []>;
+
+def PPCfcfid : SDNode<"PPCISD::FCFID", SDTFPUnaryOp, []>;
+def PPCfcfidu : SDNode<"PPCISD::FCFIDU", SDTFPUnaryOp, []>;
+def PPCfcfids : SDNode<"PPCISD::FCFIDS", SDTFPRoundOp, []>;
+def PPCfcfidus: SDNode<"PPCISD::FCFIDUS", SDTFPRoundOp, []>;
def PPCfctidz : SDNode<"PPCISD::FCTIDZ", SDTFPUnaryOp, []>;
def PPCfctiwz : SDNode<"PPCISD::FCTIWZ", SDTFPUnaryOp, []>;
+def PPCfctiduz: SDNode<"PPCISD::FCTIDUZ",SDTFPUnaryOp, []>;
+def PPCfctiwuz: SDNode<"PPCISD::FCTIWUZ",SDTFPUnaryOp, []>;
def PPCstfiwx : SDNode<"PPCISD::STFIWX", SDT_PPCstfiwx,
[SDNPHasChain, SDNPMayStore]>;
+def PPClfiwax : SDNode<"PPCISD::LFIWAX", SDT_PPClfiwx,
+ [SDNPHasChain, SDNPMayLoad]>;
+def PPClfiwzx : SDNode<"PPCISD::LFIWZX", SDT_PPClfiwx,
+ [SDNPHasChain, SDNPMayLoad]>;
+
+// Extract FPSCR (not modeled at the DAG level).
+def PPCmffs : SDNode<"PPCISD::MFFS",
+ SDTypeProfile<1, 0, [SDTCisVT<0, f64>]>, []>;
+
+// Perform FADD in round-to-zero mode.
+def PPCfaddrtz: SDNode<"PPCISD::FADDRTZ", SDTFPBinOp, []>;
-// This sequence is used for long double->int conversions. It changes the
-// bits in the FPSCR which is not modelled.
-def PPCmffs : SDNode<"PPCISD::MFFS", SDTypeProfile<1, 0, [SDTCisVT<0, f64>]>,
- [SDNPOutGlue]>;
-def PPCmtfsb0 : SDNode<"PPCISD::MTFSB0", SDTypeProfile<0, 1, [SDTCisInt<0>]>,
- [SDNPInGlue, SDNPOutGlue]>;
-def PPCmtfsb1 : SDNode<"PPCISD::MTFSB1", SDTypeProfile<0, 1, [SDTCisInt<0>]>,
- [SDNPInGlue, SDNPOutGlue]>;
-def PPCfaddrtz: SDNode<"PPCISD::FADDRTZ", SDTFPBinOp,
- [SDNPInGlue, SDNPOutGlue]>;
-def PPCmtfsf : SDNode<"PPCISD::MTFSF", SDTypeProfile<1, 3,
- [SDTCisVT<0, f64>, SDTCisInt<1>, SDTCisVT<2, f64>,
- SDTCisVT<3, f64>]>,
- [SDNPInGlue]>;
def PPCfsel : SDNode<"PPCISD::FSEL",
// Type constraint for fsel.
@@ -91,6 +99,20 @@ def PPCtoc_entry: SDNode<"PPCISD::TOC_ENTRY", SDTIntBinOp, [SDNPMayLoad]>;
def PPCvmaddfp : SDNode<"PPCISD::VMADDFP", SDTFPTernaryOp, []>;
def PPCvnmsubfp : SDNode<"PPCISD::VNMSUBFP", SDTFPTernaryOp, []>;
+def PPCaddisGotTprelHA : SDNode<"PPCISD::ADDIS_GOT_TPREL_HA", SDTIntBinOp>;
+def PPCldGotTprelL : SDNode<"PPCISD::LD_GOT_TPREL_L", SDTIntBinOp,
+ [SDNPMayLoad]>;
+def PPCaddTls : SDNode<"PPCISD::ADD_TLS", SDTIntBinOp, []>;
+def PPCaddisTlsgdHA : SDNode<"PPCISD::ADDIS_TLSGD_HA", SDTIntBinOp>;
+def PPCaddiTlsgdL : SDNode<"PPCISD::ADDI_TLSGD_L", SDTIntBinOp>;
+def PPCgetTlsAddr : SDNode<"PPCISD::GET_TLS_ADDR", SDTIntBinOp>;
+def PPCaddisTlsldHA : SDNode<"PPCISD::ADDIS_TLSLD_HA", SDTIntBinOp>;
+def PPCaddiTlsldL : SDNode<"PPCISD::ADDI_TLSLD_L", SDTIntBinOp>;
+def PPCgetTlsldAddr : SDNode<"PPCISD::GET_TLSLD_ADDR", SDTIntBinOp>;
+def PPCaddisDtprelHA : SDNode<"PPCISD::ADDIS_DTPREL_HA", SDTIntBinOp,
+ [SDNPHasChain]>;
+def PPCaddiDtprelL : SDNode<"PPCISD::ADDI_DTPREL_L", SDTIntBinOp>;
+
def PPCvperm : SDNode<"PPCISD::VPERM", SDT_PPCvperm, []>;
// These nodes represent the 32-bit PPC shifts that operate on 6-bit shift
@@ -99,10 +121,6 @@ def PPCsrl : SDNode<"PPCISD::SRL" , SDTIntShiftOp>;
def PPCsra : SDNode<"PPCISD::SRA" , SDTIntShiftOp>;
def PPCshl : SDNode<"PPCISD::SHL" , SDTIntShiftOp>;
-def PPCextsw_32 : SDNode<"PPCISD::EXTSW_32" , SDTIntUnaryOp>;
-def PPCstd_32 : SDNode<"PPCISD::STD_32" , SDTStore,
- [SDNPHasChain, SDNPMayStore]>;
-
// These are target-independent nodes, but have target-specific formats.
def callseq_start : SDNode<"ISD::CALLSEQ_START", SDT_PPCCallSeqStart,
[SDNPHasChain, SDNPOutGlue]>;
@@ -110,16 +128,12 @@ def callseq_end : SDNode<"ISD::CALLSEQ_END", SDT_PPCCallSeqEnd,
[SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
def SDT_PPCCall : SDTypeProfile<0, -1, [SDTCisInt<0>]>;
-def PPCcall_Darwin : SDNode<"PPCISD::CALL_Darwin", SDT_PPCCall,
- [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
- SDNPVariadic]>;
-def PPCcall_SVR4 : SDNode<"PPCISD::CALL_SVR4", SDT_PPCCall,
- [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
- SDNPVariadic]>;
-def PPCcall_nop_SVR4 : SDNode<"PPCISD::CALL_NOP_SVR4", SDT_PPCCall,
- [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
- SDNPVariadic]>;
-def PPCnop : SDNode<"PPCISD::NOP", SDT_PPCnop, [SDNPInGlue, SDNPOutGlue]>;
+def PPCcall : SDNode<"PPCISD::CALL", SDT_PPCCall,
+ [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
+ SDNPVariadic]>;
+def PPCcall_nop : SDNode<"PPCISD::CALL_NOP", SDT_PPCCall,
+ [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
+ SDNPVariadic]>;
def PPCload : SDNode<"PPCISD::LOAD", SDTypeProfile<1, 1, []>,
[SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
def PPCload_toc : SDNode<"PPCISD::LOAD_TOC", SDTypeProfile<0, 1, []>,
@@ -130,13 +144,9 @@ def PPCtoc_restore : SDNode<"PPCISD::TOC_RESTORE", SDTypeProfile<0, 0, []>,
SDNPInGlue, SDNPOutGlue]>;
def PPCmtctr : SDNode<"PPCISD::MTCTR", SDT_PPCCall,
[SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
-def PPCbctrl_Darwin : SDNode<"PPCISD::BCTRL_Darwin", SDTNone,
- [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
- SDNPVariadic]>;
-
-def PPCbctrl_SVR4 : SDNode<"PPCISD::BCTRL_SVR4", SDTNone,
- [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
- SDNPVariadic]>;
+def PPCbctrl : SDNode<"PPCISD::BCTRL", SDTNone,
+ [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
+ SDNPVariadic]>;
def retflag : SDNode<"PPCISD::RET_FLAG", SDTNone,
[SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>;
@@ -144,6 +154,14 @@ def retflag : SDNode<"PPCISD::RET_FLAG", SDTNone,
def PPCtc_return : SDNode<"PPCISD::TC_RETURN", SDT_PPCTC_ret,
[SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>;
+def PPCeh_sjlj_setjmp : SDNode<"PPCISD::EH_SJLJ_SETJMP",
+ SDTypeProfile<1, 1, [SDTCisInt<0>,
+ SDTCisPtrTy<1>]>,
+ [SDNPHasChain, SDNPSideEffect]>;
+def PPCeh_sjlj_longjmp : SDNode<"PPCISD::EH_SJLJ_LONGJMP",
+ SDTypeProfile<0, 1, [SDTCisPtrTy<0>]>,
+ [SDNPHasChain, SDNPSideEffect]>;
+
def PPCvcmp : SDNode<"PPCISD::VCMP" , SDT_PPCvcmp, []>;
def PPCvcmp_o : SDNode<"PPCISD::VCMPo", SDT_PPCvcmp, [SDNPOutGlue]>;
@@ -167,6 +185,12 @@ def PPClarx : SDNode<"PPCISD::LARX", SDT_PPClarx,
def PPCstcx : SDNode<"PPCISD::STCX", SDT_PPCstcx,
[SDNPHasChain, SDNPMayStore]>;
+// Instructions to support medium and large code model
+def PPCaddisTocHA : SDNode<"PPCISD::ADDIS_TOC_HA", SDTIntBinOp, []>;
+def PPCldTocL : SDNode<"PPCISD::LD_TOC_L", SDTIntBinOp, [SDNPMayLoad]>;
+def PPCaddiTocL : SDNode<"PPCISD::ADDI_TOC_L", SDTIntBinOp, []>;
+
+
// Instructions to support dynamic alloca.
def SDTDynOp : SDTypeProfile<1, 2, []>;
def PPCdynalloc : SDNode<"PPCISD::DYNALLOC", SDTDynOp, [SDNPHasChain]>;
@@ -258,6 +282,38 @@ def imm16ShiftedSExt : PatLeaf<(imm), [{
return N->getZExtValue() == (uint64_t)(int)N->getZExtValue();
}], HI16>;
+// Some r+i load/store instructions (such as LD, STD, LDU, etc.) that require
+// restricted memrix (offset/4) constants are alignment sensitive. If these
+// offsets are hidden behind TOC entries than the values of the lower-order
+// bits cannot be checked directly. As a result, we need to also incorporate
+// an alignment check into the relevant patterns.
+
+def aligned4load : PatFrag<(ops node:$ptr), (load node:$ptr), [{
+ return cast<LoadSDNode>(N)->getAlignment() >= 4;
+}]>;
+def aligned4store : PatFrag<(ops node:$val, node:$ptr),
+ (store node:$val, node:$ptr), [{
+ return cast<StoreSDNode>(N)->getAlignment() >= 4;
+}]>;
+def aligned4sextloadi32 : PatFrag<(ops node:$ptr), (sextloadi32 node:$ptr), [{
+ return cast<LoadSDNode>(N)->getAlignment() >= 4;
+}]>;
+def aligned4pre_store : PatFrag<
+ (ops node:$val, node:$base, node:$offset),
+ (pre_store node:$val, node:$base, node:$offset), [{
+ return cast<StoreSDNode>(N)->getAlignment() >= 4;
+}]>;
+
+def unaligned4load : PatFrag<(ops node:$ptr), (load node:$ptr), [{
+ return cast<LoadSDNode>(N)->getAlignment() < 4;
+}]>;
+def unaligned4store : PatFrag<(ops node:$val, node:$ptr),
+ (store node:$val, node:$ptr), [{
+ return cast<StoreSDNode>(N)->getAlignment() < 4;
+}]>;
+def unaligned4sextloadi32 : PatFrag<(ops node:$ptr), (sextloadi32 node:$ptr), [{
+ return cast<LoadSDNode>(N)->getAlignment() < 4;
+}]>;
//===----------------------------------------------------------------------===//
// PowerPC Flag Definitions.
@@ -294,9 +350,6 @@ def s16imm : Operand<i32> {
def u16imm : Operand<i32> {
let PrintMethod = "printU16ImmOperand";
}
-def s16immX4 : Operand<i32> { // Multiply imm by 4 before printing.
- let PrintMethod = "printS16X4ImmOperand";
-}
def directbrtarget : Operand<OtherVT> {
let PrintMethod = "printBranchOperand";
let EncoderMethod = "getDirectBrEncoding";
@@ -324,26 +377,37 @@ def crbitm: Operand<i8> {
let EncoderMethod = "get_crbitm_encoding";
}
// Address operands
+// A version of ptr_rc which excludes R0 (or X0 in 64-bit mode).
+def ptr_rc_nor0 : PointerLikeRegClass<1>;
+
+def dispRI : Operand<iPTR>;
+def dispRIX : Operand<iPTR>;
+
def memri : Operand<iPTR> {
let PrintMethod = "printMemRegImm";
- let MIOperandInfo = (ops i32imm:$imm, ptr_rc:$reg);
+ let MIOperandInfo = (ops dispRI:$imm, ptr_rc_nor0:$reg);
let EncoderMethod = "getMemRIEncoding";
}
def memrr : Operand<iPTR> {
let PrintMethod = "printMemRegReg";
- let MIOperandInfo = (ops ptr_rc:$offreg, ptr_rc:$ptrreg);
+ let MIOperandInfo = (ops ptr_rc_nor0:$ptrreg, ptr_rc:$offreg);
}
def memrix : Operand<iPTR> { // memri where the imm is shifted 2 bits.
let PrintMethod = "printMemRegImmShifted";
- let MIOperandInfo = (ops i32imm:$imm, ptr_rc:$reg);
+ let MIOperandInfo = (ops dispRIX:$imm, ptr_rc_nor0:$reg);
let EncoderMethod = "getMemRIXEncoding";
}
-// PowerPC Predicate operand. 20 = (0<<5)|20 = always, CR0 is a dummy reg
-// that doesn't matter.
-def pred : PredicateOperand<OtherVT, (ops imm, CRRC),
- (ops (i32 20), (i32 zero_reg))> {
+// A single-register address. This is used with the SjLj
+// pseudo-instructions.
+def memr : Operand<iPTR> {
+ let MIOperandInfo = (ops ptr_rc:$ptrreg);
+}
+
+// PowerPC Predicate operand.
+def pred : Operand<OtherVT> {
let PrintMethod = "printPredicateOperand";
+ let MIOperandInfo = (ops i32imm:$bibo, CRRC:$reg);
}
// Define PowerPC specific addressing mode.
@@ -352,9 +416,12 @@ def xaddr : ComplexPattern<iPTR, 2, "SelectAddrIdx", [], []>;
def xoaddr : ComplexPattern<iPTR, 2, "SelectAddrIdxOnly",[], []>;
def ixaddr : ComplexPattern<iPTR, 2, "SelectAddrImmShift", [], []>; // "std"
+// The address in a single register. This is used with the SjLj
+// pseudo-instructions.
+def addr : ComplexPattern<iPTR, 1, "SelectAddr",[], []>;
+
/// This is just the offset part of iaddr, used for preinc.
def iaddroff : ComplexPattern<iPTR, 1, "SelectAddrImmOffs", [], []>;
-def xaddroff : ComplexPattern<iPTR, 1, "SelectAddrIdxOffs", [], []>;
//===----------------------------------------------------------------------===//
// PowerPC Instruction Predicate Definitions.
@@ -381,17 +448,22 @@ def UPDATE_VRSAVE : Pseudo<(outs GPRC:$rD), (ins GPRC:$rS),
let Defs = [R1], Uses = [R1] in
def DYNALLOC : Pseudo<(outs GPRC:$result), (ins GPRC:$negsize, memri:$fpsi), "#DYNALLOC",
- [(set GPRC:$result,
- (PPCdynalloc GPRC:$negsize, iaddr:$fpsi))]>;
+ [(set i32:$result,
+ (PPCdynalloc i32:$negsize, iaddr:$fpsi))]>;
// SELECT_CC_* - Used to implement the SELECT_CC DAG operation. Expanded after
// instruction selection into a branch sequence.
let usesCustomInserter = 1, // Expanded after instruction selection.
PPC970_Single = 1 in {
- def SELECT_CC_I4 : Pseudo<(outs GPRC:$dst), (ins CRRC:$cond, GPRC:$T, GPRC:$F,
+ // Note that SELECT_CC_I4 and SELECT_CC_I8 use the no-r0 register classes
+ // because either operand might become the first operand in an isel, and
+ // that operand cannot be r0.
+ def SELECT_CC_I4 : Pseudo<(outs GPRC:$dst), (ins CRRC:$cond,
+ GPRC_NOR0:$T, GPRC_NOR0:$F,
i32imm:$BROPC), "#SELECT_CC_I4",
[]>;
- def SELECT_CC_I8 : Pseudo<(outs G8RC:$dst), (ins CRRC:$cond, G8RC:$T, G8RC:$F,
+ def SELECT_CC_I8 : Pseudo<(outs G8RC:$dst), (ins CRRC:$cond,
+ G8RC_NOX0:$T, G8RC_NOX0:$F,
i32imm:$BROPC), "#SELECT_CC_I8",
[]>;
def SELECT_CC_F4 : Pseudo<(outs F4RC:$dst), (ins CRRC:$cond, F4RC:$T, F4RC:$F,
@@ -418,10 +490,9 @@ def RESTORE_CR : Pseudo<(outs CRRC:$cond), (ins memri:$F),
"#RESTORE_CR", []>;
let isTerminator = 1, isBarrier = 1, PPC970_Unit = 7 in {
- let isCodeGenOnly = 1, isReturn = 1, Uses = [LR, RM] in
- def BLR : XLForm_2_br<19, 16, 0, (outs), (ins pred:$p),
- "b${p:cc}lr ${p:reg}", BrB,
- [(retflag)]>;
+ let isReturn = 1, Uses = [LR, RM] in
+ def BLR : XLForm_2_ext<19, 16, 20, 0, 0, (outs), (ins), "blr", BrB,
+ [(retflag)]>;
let isBranch = 1, isIndirectBranch = 1, Uses = [CTR] in
def BCTR : XLForm_2_ext<19, 528, 20, 0, 0, (outs), (ins), "bctr", BrB, []>;
}
@@ -453,46 +524,29 @@ let isBranch = 1, isTerminator = 1, hasCtrlDep = 1, PPC970_Unit = 7 in {
}
}
-// Darwin ABI Calls.
-let isCall = 1, PPC970_Unit = 7, Defs = [LR] in {
- // Convenient aliases for call instructions
- let Uses = [RM] in {
- def BL_Darwin : IForm<18, 0, 1,
- (outs), (ins calltarget:$func),
- "bl $func", BrB, []>; // See Pat patterns below.
- def BLA_Darwin : IForm<18, 1, 1,
- (outs), (ins aaddr:$func),
- "bla $func", BrB, [(PPCcall_Darwin (i32 imm:$func))]>;
- }
- let Uses = [CTR, RM] in {
- def BCTRL_Darwin : XLForm_2_ext<19, 528, 20, 0, 1,
- (outs), (ins),
- "bctrl", BrB,
- [(PPCbctrl_Darwin)]>, Requires<[In32BitMode]>;
+// The unconditional BCL used by the SjLj setjmp code.
+let isCall = 1, hasCtrlDep = 1, isCodeGenOnly = 1, PPC970_Unit = 7 in {
+ let Defs = [LR], Uses = [RM] in {
+ def BCLalways : BForm_2<16, 20, 31, 0, 1, (outs), (ins condbrtarget:$dst),
+ "bcl 20, 31, $dst">;
}
}
-// SVR4 ABI Calls.
let isCall = 1, PPC970_Unit = 7, Defs = [LR] in {
// Convenient aliases for call instructions
let Uses = [RM] in {
- def BL_SVR4 : IForm<18, 0, 1,
- (outs), (ins calltarget:$func),
- "bl $func", BrB, []>; // See Pat patterns below.
- def BLA_SVR4 : IForm<18, 1, 1,
- (outs), (ins aaddr:$func),
- "bla $func", BrB,
- [(PPCcall_SVR4 (i32 imm:$func))]>;
+ def BL : IForm<18, 0, 1, (outs), (ins calltarget:$func),
+ "bl $func", BrB, []>; // See Pat patterns below.
+ def BLA : IForm<18, 1, 1, (outs), (ins aaddr:$func),
+ "bla $func", BrB, [(PPCcall (i32 imm:$func))]>;
}
let Uses = [CTR, RM] in {
- def BCTRL_SVR4 : XLForm_2_ext<19, 528, 20, 0, 1,
- (outs), (ins),
- "bctrl", BrB,
- [(PPCbctrl_SVR4)]>, Requires<[In32BitMode]>;
+ def BCTRL : XLForm_2_ext<19, 528, 20, 0, 1, (outs), (ins),
+ "bctrl", BrB, [(PPCbctrl)]>,
+ Requires<[In32BitMode]>;
}
}
-
let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, Uses = [RM] in
def TCRETURNdi :Pseudo< (outs),
(ins calltarget:$dst, i32imm:$offset),
@@ -511,6 +565,8 @@ def TCRETURNri : Pseudo<(outs), (ins CTRRC:$dst, i32imm:$offset),
[]>;
+let isCodeGenOnly = 1 in {
+
let isTerminator = 1, isBarrier = 1, PPC970_Unit = 7, isBranch = 1,
isIndirectBranch = 1, isCall = 1, isReturn = 1, Uses = [CTR, RM] in
def TAILBCTR : XLForm_2_ext<19, 528, 20, 0, 0, (outs), (ins), "bctr", BrB, []>,
@@ -524,6 +580,7 @@ def TAILB : IForm<18, 0, 0, (outs), (ins calltarget:$dst),
"b $dst", BrB,
[]>;
+}
let isBranch = 1, isTerminator = 1, hasCtrlDep = 1, PPC970_Unit = 7,
isBarrier = 1, isCall = 1, isReturn = 1, Uses = [RM] in
@@ -531,6 +588,22 @@ def TAILBA : IForm<18, 0, 0, (outs), (ins aaddr:$dst),
"ba $dst", BrB,
[]>;
+let hasSideEffects = 1, isBarrier = 1, usesCustomInserter = 1 in {
+ def EH_SjLj_SetJmp32 : Pseudo<(outs GPRC:$dst), (ins memr:$buf),
+ "#EH_SJLJ_SETJMP32",
+ [(set i32:$dst, (PPCeh_sjlj_setjmp addr:$buf))]>,
+ Requires<[In32BitMode]>;
+ let isTerminator = 1 in
+ def EH_SjLj_LongJmp32 : Pseudo<(outs), (ins memr:$buf),
+ "#EH_SJLJ_LONGJMP32",
+ [(PPCeh_sjlj_longjmp addr:$buf)]>,
+ Requires<[In32BitMode]>;
+}
+
+let isBranch = 1, isTerminator = 1 in {
+ def EH_SjLj_Setup : Pseudo<(outs), (ins directbrtarget:$dst),
+ "#EH_SjLj_Setup\t$dst", []>;
+}
// DCB* instructions.
def DCBA : DCB_Form<758, 0, (outs), (ins memrr:$dst),
@@ -566,93 +639,90 @@ let usesCustomInserter = 1 in {
let Defs = [CR0] in {
def ATOMIC_LOAD_ADD_I8 : Pseudo<
(outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "#ATOMIC_LOAD_ADD_I8",
- [(set GPRC:$dst, (atomic_load_add_8 xoaddr:$ptr, GPRC:$incr))]>;
+ [(set i32:$dst, (atomic_load_add_8 xoaddr:$ptr, i32:$incr))]>;
def ATOMIC_LOAD_SUB_I8 : Pseudo<
(outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "#ATOMIC_LOAD_SUB_I8",
- [(set GPRC:$dst, (atomic_load_sub_8 xoaddr:$ptr, GPRC:$incr))]>;
+ [(set i32:$dst, (atomic_load_sub_8 xoaddr:$ptr, i32:$incr))]>;
def ATOMIC_LOAD_AND_I8 : Pseudo<
(outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "#ATOMIC_LOAD_AND_I8",
- [(set GPRC:$dst, (atomic_load_and_8 xoaddr:$ptr, GPRC:$incr))]>;
+ [(set i32:$dst, (atomic_load_and_8 xoaddr:$ptr, i32:$incr))]>;
def ATOMIC_LOAD_OR_I8 : Pseudo<
(outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "#ATOMIC_LOAD_OR_I8",
- [(set GPRC:$dst, (atomic_load_or_8 xoaddr:$ptr, GPRC:$incr))]>;
+ [(set i32:$dst, (atomic_load_or_8 xoaddr:$ptr, i32:$incr))]>;
def ATOMIC_LOAD_XOR_I8 : Pseudo<
(outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "ATOMIC_LOAD_XOR_I8",
- [(set GPRC:$dst, (atomic_load_xor_8 xoaddr:$ptr, GPRC:$incr))]>;
+ [(set i32:$dst, (atomic_load_xor_8 xoaddr:$ptr, i32:$incr))]>;
def ATOMIC_LOAD_NAND_I8 : Pseudo<
(outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "#ATOMIC_LOAD_NAND_I8",
- [(set GPRC:$dst, (atomic_load_nand_8 xoaddr:$ptr, GPRC:$incr))]>;
+ [(set i32:$dst, (atomic_load_nand_8 xoaddr:$ptr, i32:$incr))]>;
def ATOMIC_LOAD_ADD_I16 : Pseudo<
(outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "#ATOMIC_LOAD_ADD_I16",
- [(set GPRC:$dst, (atomic_load_add_16 xoaddr:$ptr, GPRC:$incr))]>;
+ [(set i32:$dst, (atomic_load_add_16 xoaddr:$ptr, i32:$incr))]>;
def ATOMIC_LOAD_SUB_I16 : Pseudo<
(outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "#ATOMIC_LOAD_SUB_I16",
- [(set GPRC:$dst, (atomic_load_sub_16 xoaddr:$ptr, GPRC:$incr))]>;
+ [(set i32:$dst, (atomic_load_sub_16 xoaddr:$ptr, i32:$incr))]>;
def ATOMIC_LOAD_AND_I16 : Pseudo<
(outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "#ATOMIC_LOAD_AND_I16",
- [(set GPRC:$dst, (atomic_load_and_16 xoaddr:$ptr, GPRC:$incr))]>;
+ [(set i32:$dst, (atomic_load_and_16 xoaddr:$ptr, i32:$incr))]>;
def ATOMIC_LOAD_OR_I16 : Pseudo<
(outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "#ATOMIC_LOAD_OR_I16",
- [(set GPRC:$dst, (atomic_load_or_16 xoaddr:$ptr, GPRC:$incr))]>;
+ [(set i32:$dst, (atomic_load_or_16 xoaddr:$ptr, i32:$incr))]>;
def ATOMIC_LOAD_XOR_I16 : Pseudo<
(outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "#ATOMIC_LOAD_XOR_I16",
- [(set GPRC:$dst, (atomic_load_xor_16 xoaddr:$ptr, GPRC:$incr))]>;
+ [(set i32:$dst, (atomic_load_xor_16 xoaddr:$ptr, i32:$incr))]>;
def ATOMIC_LOAD_NAND_I16 : Pseudo<
(outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "#ATOMIC_LOAD_NAND_I16",
- [(set GPRC:$dst, (atomic_load_nand_16 xoaddr:$ptr, GPRC:$incr))]>;
+ [(set i32:$dst, (atomic_load_nand_16 xoaddr:$ptr, i32:$incr))]>;
def ATOMIC_LOAD_ADD_I32 : Pseudo<
(outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "#ATOMIC_LOAD_ADD_I32",
- [(set GPRC:$dst, (atomic_load_add_32 xoaddr:$ptr, GPRC:$incr))]>;
+ [(set i32:$dst, (atomic_load_add_32 xoaddr:$ptr, i32:$incr))]>;
def ATOMIC_LOAD_SUB_I32 : Pseudo<
(outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "#ATOMIC_LOAD_SUB_I32",
- [(set GPRC:$dst, (atomic_load_sub_32 xoaddr:$ptr, GPRC:$incr))]>;
+ [(set i32:$dst, (atomic_load_sub_32 xoaddr:$ptr, i32:$incr))]>;
def ATOMIC_LOAD_AND_I32 : Pseudo<
(outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "#ATOMIC_LOAD_AND_I32",
- [(set GPRC:$dst, (atomic_load_and_32 xoaddr:$ptr, GPRC:$incr))]>;
+ [(set i32:$dst, (atomic_load_and_32 xoaddr:$ptr, i32:$incr))]>;
def ATOMIC_LOAD_OR_I32 : Pseudo<
(outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "#ATOMIC_LOAD_OR_I32",
- [(set GPRC:$dst, (atomic_load_or_32 xoaddr:$ptr, GPRC:$incr))]>;
+ [(set i32:$dst, (atomic_load_or_32 xoaddr:$ptr, i32:$incr))]>;
def ATOMIC_LOAD_XOR_I32 : Pseudo<
(outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "#ATOMIC_LOAD_XOR_I32",
- [(set GPRC:$dst, (atomic_load_xor_32 xoaddr:$ptr, GPRC:$incr))]>;
+ [(set i32:$dst, (atomic_load_xor_32 xoaddr:$ptr, i32:$incr))]>;
def ATOMIC_LOAD_NAND_I32 : Pseudo<
(outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "#ATOMIC_LOAD_NAND_I32",
- [(set GPRC:$dst, (atomic_load_nand_32 xoaddr:$ptr, GPRC:$incr))]>;
+ [(set i32:$dst, (atomic_load_nand_32 xoaddr:$ptr, i32:$incr))]>;
def ATOMIC_CMP_SWAP_I8 : Pseudo<
(outs GPRC:$dst), (ins memrr:$ptr, GPRC:$old, GPRC:$new), "#ATOMIC_CMP_SWAP_I8",
- [(set GPRC:$dst,
- (atomic_cmp_swap_8 xoaddr:$ptr, GPRC:$old, GPRC:$new))]>;
+ [(set i32:$dst, (atomic_cmp_swap_8 xoaddr:$ptr, i32:$old, i32:$new))]>;
def ATOMIC_CMP_SWAP_I16 : Pseudo<
(outs GPRC:$dst), (ins memrr:$ptr, GPRC:$old, GPRC:$new), "#ATOMIC_CMP_SWAP_I16 $dst $ptr $old $new",
- [(set GPRC:$dst,
- (atomic_cmp_swap_16 xoaddr:$ptr, GPRC:$old, GPRC:$new))]>;
+ [(set i32:$dst, (atomic_cmp_swap_16 xoaddr:$ptr, i32:$old, i32:$new))]>;
def ATOMIC_CMP_SWAP_I32 : Pseudo<
(outs GPRC:$dst), (ins memrr:$ptr, GPRC:$old, GPRC:$new), "#ATOMIC_CMP_SWAP_I32 $dst $ptr $old $new",
- [(set GPRC:$dst,
- (atomic_cmp_swap_32 xoaddr:$ptr, GPRC:$old, GPRC:$new))]>;
+ [(set i32:$dst, (atomic_cmp_swap_32 xoaddr:$ptr, i32:$old, i32:$new))]>;
def ATOMIC_SWAP_I8 : Pseudo<
(outs GPRC:$dst), (ins memrr:$ptr, GPRC:$new), "#ATOMIC_SWAP_i8",
- [(set GPRC:$dst, (atomic_swap_8 xoaddr:$ptr, GPRC:$new))]>;
+ [(set i32:$dst, (atomic_swap_8 xoaddr:$ptr, i32:$new))]>;
def ATOMIC_SWAP_I16 : Pseudo<
(outs GPRC:$dst), (ins memrr:$ptr, GPRC:$new), "#ATOMIC_SWAP_I16",
- [(set GPRC:$dst, (atomic_swap_16 xoaddr:$ptr, GPRC:$new))]>;
+ [(set i32:$dst, (atomic_swap_16 xoaddr:$ptr, i32:$new))]>;
def ATOMIC_SWAP_I32 : Pseudo<
(outs GPRC:$dst), (ins memrr:$ptr, GPRC:$new), "#ATOMIC_SWAP_I32",
- [(set GPRC:$dst, (atomic_swap_32 xoaddr:$ptr, GPRC:$new))]>;
+ [(set i32:$dst, (atomic_swap_32 xoaddr:$ptr, i32:$new))]>;
}
}
// Instructions to support atomic operations
def LWARX : XForm_1<31, 20, (outs GPRC:$rD), (ins memrr:$src),
"lwarx $rD, $src", LdStLWARX,
- [(set GPRC:$rD, (PPClarx xoaddr:$src))]>;
+ [(set i32:$rD, (PPClarx xoaddr:$src))]>;
let Defs = [CR0] in
def STWCX : XForm_1<31, 150, (outs), (ins GPRC:$rS, memrr:$dst),
"stwcx. $rS, $dst", LdStSTWCX,
- [(PPCstcx GPRC:$rS, xoaddr:$dst)]>,
+ [(PPCstcx i32:$rS, xoaddr:$dst)]>,
isDOT;
let isTerminator = 1, isBarrier = 1, hasCtrlDep = 1 in
@@ -666,94 +736,94 @@ def TRAP : XForm_24<31, 4, (outs), (ins), "trap", LdStLoad, [(trap)]>;
let canFoldAsLoad = 1, PPC970_Unit = 2 in {
def LBZ : DForm_1<34, (outs GPRC:$rD), (ins memri:$src),
"lbz $rD, $src", LdStLoad,
- [(set GPRC:$rD, (zextloadi8 iaddr:$src))]>;
+ [(set i32:$rD, (zextloadi8 iaddr:$src))]>;
def LHA : DForm_1<42, (outs GPRC:$rD), (ins memri:$src),
"lha $rD, $src", LdStLHA,
- [(set GPRC:$rD, (sextloadi16 iaddr:$src))]>,
+ [(set i32:$rD, (sextloadi16 iaddr:$src))]>,
PPC970_DGroup_Cracked;
def LHZ : DForm_1<40, (outs GPRC:$rD), (ins memri:$src),
"lhz $rD, $src", LdStLoad,
- [(set GPRC:$rD, (zextloadi16 iaddr:$src))]>;
+ [(set i32:$rD, (zextloadi16 iaddr:$src))]>;
def LWZ : DForm_1<32, (outs GPRC:$rD), (ins memri:$src),
"lwz $rD, $src", LdStLoad,
- [(set GPRC:$rD, (load iaddr:$src))]>;
+ [(set i32:$rD, (load iaddr:$src))]>;
def LFS : DForm_1<48, (outs F4RC:$rD), (ins memri:$src),
"lfs $rD, $src", LdStLFD,
- [(set F4RC:$rD, (load iaddr:$src))]>;
+ [(set f32:$rD, (load iaddr:$src))]>;
def LFD : DForm_1<50, (outs F8RC:$rD), (ins memri:$src),
"lfd $rD, $src", LdStLFD,
- [(set F8RC:$rD, (load iaddr:$src))]>;
+ [(set f64:$rD, (load iaddr:$src))]>;
// Unindexed (r+i) Loads with Update (preinc).
let mayLoad = 1 in {
-def LBZU : DForm_1<35, (outs GPRC:$rD, ptr_rc:$ea_result), (ins memri:$addr),
+def LBZU : DForm_1<35, (outs GPRC:$rD, ptr_rc_nor0:$ea_result), (ins memri:$addr),
"lbzu $rD, $addr", LdStLoadUpd,
[]>, RegConstraint<"$addr.reg = $ea_result">,
NoEncode<"$ea_result">;
-def LHAU : DForm_1<43, (outs GPRC:$rD, ptr_rc:$ea_result), (ins memri:$addr),
+def LHAU : DForm_1<43, (outs GPRC:$rD, ptr_rc_nor0:$ea_result), (ins memri:$addr),
"lhau $rD, $addr", LdStLHAU,
[]>, RegConstraint<"$addr.reg = $ea_result">,
NoEncode<"$ea_result">;
-def LHZU : DForm_1<41, (outs GPRC:$rD, ptr_rc:$ea_result), (ins memri:$addr),
+def LHZU : DForm_1<41, (outs GPRC:$rD, ptr_rc_nor0:$ea_result), (ins memri:$addr),
"lhzu $rD, $addr", LdStLoadUpd,
[]>, RegConstraint<"$addr.reg = $ea_result">,
NoEncode<"$ea_result">;
-def LWZU : DForm_1<33, (outs GPRC:$rD, ptr_rc:$ea_result), (ins memri:$addr),
+def LWZU : DForm_1<33, (outs GPRC:$rD, ptr_rc_nor0:$ea_result), (ins memri:$addr),
"lwzu $rD, $addr", LdStLoadUpd,
[]>, RegConstraint<"$addr.reg = $ea_result">,
NoEncode<"$ea_result">;
-def LFSU : DForm_1<49, (outs F4RC:$rD, ptr_rc:$ea_result), (ins memri:$addr),
+def LFSU : DForm_1<49, (outs F4RC:$rD, ptr_rc_nor0:$ea_result), (ins memri:$addr),
"lfsu $rD, $addr", LdStLFDU,
[]>, RegConstraint<"$addr.reg = $ea_result">,
NoEncode<"$ea_result">;
-def LFDU : DForm_1<51, (outs F8RC:$rD, ptr_rc:$ea_result), (ins memri:$addr),
+def LFDU : DForm_1<51, (outs F8RC:$rD, ptr_rc_nor0:$ea_result), (ins memri:$addr),
"lfdu $rD, $addr", LdStLFDU,
[]>, RegConstraint<"$addr.reg = $ea_result">,
NoEncode<"$ea_result">;
// Indexed (r+r) Loads with Update (preinc).
-def LBZUX : XForm_1<31, 119, (outs GPRC:$rD, ptr_rc:$ea_result),
+def LBZUX : XForm_1<31, 119, (outs GPRC:$rD, ptr_rc_nor0:$ea_result),
(ins memrr:$addr),
"lbzux $rD, $addr", LdStLoadUpd,
- []>, RegConstraint<"$addr.offreg = $ea_result">,
+ []>, RegConstraint<"$addr.ptrreg = $ea_result">,
NoEncode<"$ea_result">;
-def LHAUX : XForm_1<31, 375, (outs GPRC:$rD, ptr_rc:$ea_result),
+def LHAUX : XForm_1<31, 375, (outs GPRC:$rD, ptr_rc_nor0:$ea_result),
(ins memrr:$addr),
"lhaux $rD, $addr", LdStLHAU,
- []>, RegConstraint<"$addr.offreg = $ea_result">,
+ []>, RegConstraint<"$addr.ptrreg = $ea_result">,
NoEncode<"$ea_result">;
-def LHZUX : XForm_1<31, 311, (outs GPRC:$rD, ptr_rc:$ea_result),
+def LHZUX : XForm_1<31, 311, (outs GPRC:$rD, ptr_rc_nor0:$ea_result),
(ins memrr:$addr),
"lhzux $rD, $addr", LdStLoadUpd,
- []>, RegConstraint<"$addr.offreg = $ea_result">,
+ []>, RegConstraint<"$addr.ptrreg = $ea_result">,
NoEncode<"$ea_result">;
-def LWZUX : XForm_1<31, 55, (outs GPRC:$rD, ptr_rc:$ea_result),
+def LWZUX : XForm_1<31, 55, (outs GPRC:$rD, ptr_rc_nor0:$ea_result),
(ins memrr:$addr),
"lwzux $rD, $addr", LdStLoadUpd,
- []>, RegConstraint<"$addr.offreg = $ea_result">,
+ []>, RegConstraint<"$addr.ptrreg = $ea_result">,
NoEncode<"$ea_result">;
-def LFSUX : XForm_1<31, 567, (outs F4RC:$rD, ptr_rc:$ea_result),
+def LFSUX : XForm_1<31, 567, (outs F4RC:$rD, ptr_rc_nor0:$ea_result),
(ins memrr:$addr),
"lfsux $rD, $addr", LdStLFDU,
- []>, RegConstraint<"$addr.offreg = $ea_result">,
+ []>, RegConstraint<"$addr.ptrreg = $ea_result">,
NoEncode<"$ea_result">;
-def LFDUX : XForm_1<31, 631, (outs F8RC:$rD, ptr_rc:$ea_result),
+def LFDUX : XForm_1<31, 631, (outs F8RC:$rD, ptr_rc_nor0:$ea_result),
(ins memrr:$addr),
"lfdux $rD, $addr", LdStLFDU,
- []>, RegConstraint<"$addr.offreg = $ea_result">,
+ []>, RegConstraint<"$addr.ptrreg = $ea_result">,
NoEncode<"$ea_result">;
}
}
@@ -763,32 +833,39 @@ def LFDUX : XForm_1<31, 631, (outs F8RC:$rD, ptr_rc:$ea_result),
let canFoldAsLoad = 1, PPC970_Unit = 2 in {
def LBZX : XForm_1<31, 87, (outs GPRC:$rD), (ins memrr:$src),
"lbzx $rD, $src", LdStLoad,
- [(set GPRC:$rD, (zextloadi8 xaddr:$src))]>;
+ [(set i32:$rD, (zextloadi8 xaddr:$src))]>;
def LHAX : XForm_1<31, 343, (outs GPRC:$rD), (ins memrr:$src),
"lhax $rD, $src", LdStLHA,
- [(set GPRC:$rD, (sextloadi16 xaddr:$src))]>,
+ [(set i32:$rD, (sextloadi16 xaddr:$src))]>,
PPC970_DGroup_Cracked;
def LHZX : XForm_1<31, 279, (outs GPRC:$rD), (ins memrr:$src),
"lhzx $rD, $src", LdStLoad,
- [(set GPRC:$rD, (zextloadi16 xaddr:$src))]>;
+ [(set i32:$rD, (zextloadi16 xaddr:$src))]>;
def LWZX : XForm_1<31, 23, (outs GPRC:$rD), (ins memrr:$src),
"lwzx $rD, $src", LdStLoad,
- [(set GPRC:$rD, (load xaddr:$src))]>;
+ [(set i32:$rD, (load xaddr:$src))]>;
def LHBRX : XForm_1<31, 790, (outs GPRC:$rD), (ins memrr:$src),
"lhbrx $rD, $src", LdStLoad,
- [(set GPRC:$rD, (PPClbrx xoaddr:$src, i16))]>;
+ [(set i32:$rD, (PPClbrx xoaddr:$src, i16))]>;
def LWBRX : XForm_1<31, 534, (outs GPRC:$rD), (ins memrr:$src),
"lwbrx $rD, $src", LdStLoad,
- [(set GPRC:$rD, (PPClbrx xoaddr:$src, i32))]>;
+ [(set i32:$rD, (PPClbrx xoaddr:$src, i32))]>;
def LFSX : XForm_25<31, 535, (outs F4RC:$frD), (ins memrr:$src),
"lfsx $frD, $src", LdStLFD,
- [(set F4RC:$frD, (load xaddr:$src))]>;
+ [(set f32:$frD, (load xaddr:$src))]>;
def LFDX : XForm_25<31, 599, (outs F8RC:$frD), (ins memrr:$src),
"lfdx $frD, $src", LdStLFD,
- [(set F8RC:$frD, (load xaddr:$src))]>;
+ [(set f64:$frD, (load xaddr:$src))]>;
+
+def LFIWAX : XForm_25<31, 855, (outs F8RC:$frD), (ins memrr:$src),
+ "lfiwax $frD, $src", LdStLFD,
+ [(set f64:$frD, (PPClfiwax xoaddr:$src))]>;
+def LFIWZX : XForm_25<31, 887, (outs F8RC:$frD), (ins memrr:$src),
+ "lfiwzx $frD, $src", LdStLFD,
+ [(set f64:$frD, (PPClfiwzx xoaddr:$src))]>;
}
//===----------------------------------------------------------------------===//
@@ -799,137 +876,128 @@ def LFDX : XForm_25<31, 599, (outs F8RC:$frD), (ins memrr:$src),
let PPC970_Unit = 2 in {
def STB : DForm_1<38, (outs), (ins GPRC:$rS, memri:$src),
"stb $rS, $src", LdStStore,
- [(truncstorei8 GPRC:$rS, iaddr:$src)]>;
+ [(truncstorei8 i32:$rS, iaddr:$src)]>;
def STH : DForm_1<44, (outs), (ins GPRC:$rS, memri:$src),
"sth $rS, $src", LdStStore,
- [(truncstorei16 GPRC:$rS, iaddr:$src)]>;
+ [(truncstorei16 i32:$rS, iaddr:$src)]>;
def STW : DForm_1<36, (outs), (ins GPRC:$rS, memri:$src),
"stw $rS, $src", LdStStore,
- [(store GPRC:$rS, iaddr:$src)]>;
+ [(store i32:$rS, iaddr:$src)]>;
def STFS : DForm_1<52, (outs), (ins F4RC:$rS, memri:$dst),
"stfs $rS, $dst", LdStSTFD,
- [(store F4RC:$rS, iaddr:$dst)]>;
+ [(store f32:$rS, iaddr:$dst)]>;
def STFD : DForm_1<54, (outs), (ins F8RC:$rS, memri:$dst),
"stfd $rS, $dst", LdStSTFD,
- [(store F8RC:$rS, iaddr:$dst)]>;
+ [(store f64:$rS, iaddr:$dst)]>;
}
// Unindexed (r+i) Stores with Update (preinc).
-let PPC970_Unit = 2 in {
-def STBU : DForm_1a<39, (outs ptr_rc:$ea_res), (ins GPRC:$rS,
- symbolLo:$ptroff, ptr_rc:$ptrreg),
- "stbu $rS, $ptroff($ptrreg)", LdStStoreUpd,
- [(set ptr_rc:$ea_res,
- (pre_truncsti8 GPRC:$rS, ptr_rc:$ptrreg,
- iaddroff:$ptroff))]>,
- RegConstraint<"$ptrreg = $ea_res">, NoEncode<"$ea_res">;
-def STHU : DForm_1a<45, (outs ptr_rc:$ea_res), (ins GPRC:$rS,
- symbolLo:$ptroff, ptr_rc:$ptrreg),
- "sthu $rS, $ptroff($ptrreg)", LdStStoreUpd,
- [(set ptr_rc:$ea_res,
- (pre_truncsti16 GPRC:$rS, ptr_rc:$ptrreg,
- iaddroff:$ptroff))]>,
- RegConstraint<"$ptrreg = $ea_res">, NoEncode<"$ea_res">;
-def STWU : DForm_1a<37, (outs ptr_rc:$ea_res), (ins GPRC:$rS,
- symbolLo:$ptroff, ptr_rc:$ptrreg),
- "stwu $rS, $ptroff($ptrreg)", LdStStoreUpd,
- [(set ptr_rc:$ea_res, (pre_store GPRC:$rS, ptr_rc:$ptrreg,
- iaddroff:$ptroff))]>,
- RegConstraint<"$ptrreg = $ea_res">, NoEncode<"$ea_res">;
-def STFSU : DForm_1a<37, (outs ptr_rc:$ea_res), (ins F4RC:$rS,
- symbolLo:$ptroff, ptr_rc:$ptrreg),
- "stfsu $rS, $ptroff($ptrreg)", LdStSTFDU,
- [(set ptr_rc:$ea_res, (pre_store F4RC:$rS, ptr_rc:$ptrreg,
- iaddroff:$ptroff))]>,
- RegConstraint<"$ptrreg = $ea_res">, NoEncode<"$ea_res">;
-def STFDU : DForm_1a<37, (outs ptr_rc:$ea_res), (ins F8RC:$rS,
- symbolLo:$ptroff, ptr_rc:$ptrreg),
- "stfdu $rS, $ptroff($ptrreg)", LdStSTFDU,
- [(set ptr_rc:$ea_res, (pre_store F8RC:$rS, ptr_rc:$ptrreg,
- iaddroff:$ptroff))]>,
- RegConstraint<"$ptrreg = $ea_res">, NoEncode<"$ea_res">;
+let PPC970_Unit = 2, mayStore = 1 in {
+def STBU : DForm_1<39, (outs ptr_rc_nor0:$ea_res), (ins GPRC:$rS, memri:$dst),
+ "stbu $rS, $dst", LdStStoreUpd, []>,
+ RegConstraint<"$dst.reg = $ea_res">, NoEncode<"$ea_res">;
+def STHU : DForm_1<45, (outs ptr_rc_nor0:$ea_res), (ins GPRC:$rS, memri:$dst),
+ "sthu $rS, $dst", LdStStoreUpd, []>,
+ RegConstraint<"$dst.reg = $ea_res">, NoEncode<"$ea_res">;
+def STWU : DForm_1<37, (outs ptr_rc_nor0:$ea_res), (ins GPRC:$rS, memri:$dst),
+ "stwu $rS, $dst", LdStStoreUpd, []>,
+ RegConstraint<"$dst.reg = $ea_res">, NoEncode<"$ea_res">;
+def STFSU : DForm_1<37, (outs ptr_rc_nor0:$ea_res), (ins F4RC:$rS, memri:$dst),
+ "stfsu $rS, $dst", LdStSTFDU, []>,
+ RegConstraint<"$dst.reg = $ea_res">, NoEncode<"$ea_res">;
+def STFDU : DForm_1<37, (outs ptr_rc_nor0:$ea_res), (ins F8RC:$rS, memri:$dst),
+ "stfdu $rS, $dst", LdStSTFDU, []>,
+ RegConstraint<"$dst.reg = $ea_res">, NoEncode<"$ea_res">;
}
+// Patterns to match the pre-inc stores. We can't put the patterns on
+// the instruction definitions directly as ISel wants the address base
+// and offset to be separate operands, not a single complex operand.
+def : Pat<(pre_truncsti8 i32:$rS, iPTR:$ptrreg, iaddroff:$ptroff),
+ (STBU $rS, iaddroff:$ptroff, $ptrreg)>;
+def : Pat<(pre_truncsti16 i32:$rS, iPTR:$ptrreg, iaddroff:$ptroff),
+ (STHU $rS, iaddroff:$ptroff, $ptrreg)>;
+def : Pat<(pre_store i32:$rS, iPTR:$ptrreg, iaddroff:$ptroff),
+ (STWU $rS, iaddroff:$ptroff, $ptrreg)>;
+def : Pat<(pre_store f32:$rS, iPTR:$ptrreg, iaddroff:$ptroff),
+ (STFSU $rS, iaddroff:$ptroff, $ptrreg)>;
+def : Pat<(pre_store f64:$rS, iPTR:$ptrreg, iaddroff:$ptroff),
+ (STFDU $rS, iaddroff:$ptroff, $ptrreg)>;
// Indexed (r+r) Stores.
-//
let PPC970_Unit = 2 in {
def STBX : XForm_8<31, 215, (outs), (ins GPRC:$rS, memrr:$dst),
"stbx $rS, $dst", LdStStore,
- [(truncstorei8 GPRC:$rS, xaddr:$dst)]>,
+ [(truncstorei8 i32:$rS, xaddr:$dst)]>,
PPC970_DGroup_Cracked;
def STHX : XForm_8<31, 407, (outs), (ins GPRC:$rS, memrr:$dst),
"sthx $rS, $dst", LdStStore,
- [(truncstorei16 GPRC:$rS, xaddr:$dst)]>,
+ [(truncstorei16 i32:$rS, xaddr:$dst)]>,
PPC970_DGroup_Cracked;
def STWX : XForm_8<31, 151, (outs), (ins GPRC:$rS, memrr:$dst),
"stwx $rS, $dst", LdStStore,
- [(store GPRC:$rS, xaddr:$dst)]>,
- PPC970_DGroup_Cracked;
-
-def STBUX : XForm_8<31, 247, (outs ptr_rc:$ea_res),
- (ins GPRC:$rS, ptr_rc:$ptroff, ptr_rc:$ptrreg),
- "stbux $rS, $ptroff, $ptrreg", LdStStoreUpd,
- [(set ptr_rc:$ea_res,
- (pre_truncsti8 GPRC:$rS,
- ptr_rc:$ptrreg, xaddroff:$ptroff))]>,
- RegConstraint<"$ptroff = $ea_res">, NoEncode<"$ea_res">,
+ [(store i32:$rS, xaddr:$dst)]>,
PPC970_DGroup_Cracked;
-def STHUX : XForm_8<31, 439, (outs ptr_rc:$ea_res),
- (ins GPRC:$rS, ptr_rc:$ptroff, ptr_rc:$ptrreg),
- "sthux $rS, $ptroff, $ptrreg", LdStStoreUpd,
- [(set ptr_rc:$ea_res,
- (pre_truncsti16 GPRC:$rS,
- ptr_rc:$ptrreg, xaddroff:$ptroff))]>,
- RegConstraint<"$ptroff = $ea_res">, NoEncode<"$ea_res">,
- PPC970_DGroup_Cracked;
-
-def STWUX : XForm_8<31, 183, (outs ptr_rc:$ea_res),
- (ins GPRC:$rS, ptr_rc:$ptroff, ptr_rc:$ptrreg),
- "stwux $rS, $ptroff, $ptrreg", LdStStoreUpd,
- [(set ptr_rc:$ea_res,
- (pre_store GPRC:$rS, ptr_rc:$ptrreg, xaddroff:$ptroff))]>,
- RegConstraint<"$ptroff = $ea_res">, NoEncode<"$ea_res">,
- PPC970_DGroup_Cracked;
-
-def STFSUX : XForm_8<31, 695, (outs ptr_rc:$ea_res),
- (ins F4RC:$rS, ptr_rc:$ptroff, ptr_rc:$ptrreg),
- "stfsux $rS, $ptroff, $ptrreg", LdStSTFDU,
- [(set ptr_rc:$ea_res,
- (pre_store F4RC:$rS, ptr_rc:$ptrreg, xaddroff:$ptroff))]>,
- RegConstraint<"$ptroff = $ea_res">, NoEncode<"$ea_res">,
- PPC970_DGroup_Cracked;
-
-def STFDUX : XForm_8<31, 759, (outs ptr_rc:$ea_res),
- (ins F8RC:$rS, ptr_rc:$ptroff, ptr_rc:$ptrreg),
- "stfdux $rS, $ptroff, $ptrreg", LdStSTFDU,
- [(set ptr_rc:$ea_res,
- (pre_store F8RC:$rS, ptr_rc:$ptrreg, xaddroff:$ptroff))]>,
- RegConstraint<"$ptroff = $ea_res">, NoEncode<"$ea_res">,
- PPC970_DGroup_Cracked;
-
def STHBRX: XForm_8<31, 918, (outs), (ins GPRC:$rS, memrr:$dst),
"sthbrx $rS, $dst", LdStStore,
- [(PPCstbrx GPRC:$rS, xoaddr:$dst, i16)]>,
+ [(PPCstbrx i32:$rS, xoaddr:$dst, i16)]>,
PPC970_DGroup_Cracked;
def STWBRX: XForm_8<31, 662, (outs), (ins GPRC:$rS, memrr:$dst),
"stwbrx $rS, $dst", LdStStore,
- [(PPCstbrx GPRC:$rS, xoaddr:$dst, i32)]>,
+ [(PPCstbrx i32:$rS, xoaddr:$dst, i32)]>,
PPC970_DGroup_Cracked;
def STFIWX: XForm_28<31, 983, (outs), (ins F8RC:$frS, memrr:$dst),
"stfiwx $frS, $dst", LdStSTFD,
- [(PPCstfiwx F8RC:$frS, xoaddr:$dst)]>;
+ [(PPCstfiwx f64:$frS, xoaddr:$dst)]>;
def STFSX : XForm_28<31, 663, (outs), (ins F4RC:$frS, memrr:$dst),
"stfsx $frS, $dst", LdStSTFD,
- [(store F4RC:$frS, xaddr:$dst)]>;
+ [(store f32:$frS, xaddr:$dst)]>;
def STFDX : XForm_28<31, 727, (outs), (ins F8RC:$frS, memrr:$dst),
"stfdx $frS, $dst", LdStSTFD,
- [(store F8RC:$frS, xaddr:$dst)]>;
+ [(store f64:$frS, xaddr:$dst)]>;
+}
+
+// Indexed (r+r) Stores with Update (preinc).
+let PPC970_Unit = 2, mayStore = 1 in {
+def STBUX : XForm_8<31, 247, (outs ptr_rc_nor0:$ea_res), (ins GPRC:$rS, memrr:$dst),
+ "stbux $rS, $dst", LdStStoreUpd, []>,
+ RegConstraint<"$dst.ptrreg = $ea_res">, NoEncode<"$ea_res">,
+ PPC970_DGroup_Cracked;
+def STHUX : XForm_8<31, 439, (outs ptr_rc_nor0:$ea_res), (ins GPRC:$rS, memrr:$dst),
+ "sthux $rS, $dst", LdStStoreUpd, []>,
+ RegConstraint<"$dst.ptrreg = $ea_res">, NoEncode<"$ea_res">,
+ PPC970_DGroup_Cracked;
+def STWUX : XForm_8<31, 183, (outs ptr_rc_nor0:$ea_res), (ins GPRC:$rS, memrr:$dst),
+ "stwux $rS, $dst", LdStStoreUpd, []>,
+ RegConstraint<"$dst.ptrreg = $ea_res">, NoEncode<"$ea_res">,
+ PPC970_DGroup_Cracked;
+def STFSUX: XForm_8<31, 695, (outs ptr_rc_nor0:$ea_res), (ins F4RC:$rS, memrr:$dst),
+ "stfsux $rS, $dst", LdStSTFDU, []>,
+ RegConstraint<"$dst.ptrreg = $ea_res">, NoEncode<"$ea_res">,
+ PPC970_DGroup_Cracked;
+def STFDUX: XForm_8<31, 759, (outs ptr_rc_nor0:$ea_res), (ins F8RC:$rS, memrr:$dst),
+ "stfdux $rS, $dst", LdStSTFDU, []>,
+ RegConstraint<"$dst.ptrreg = $ea_res">, NoEncode<"$ea_res">,
+ PPC970_DGroup_Cracked;
}
+// Patterns to match the pre-inc stores. We can't put the patterns on
+// the instruction definitions directly as ISel wants the address base
+// and offset to be separate operands, not a single complex operand.
+def : Pat<(pre_truncsti8 i32:$rS, iPTR:$ptrreg, iPTR:$ptroff),
+ (STBUX $rS, $ptrreg, $ptroff)>;
+def : Pat<(pre_truncsti16 i32:$rS, iPTR:$ptrreg, iPTR:$ptroff),
+ (STHUX $rS, $ptrreg, $ptroff)>;
+def : Pat<(pre_store i32:$rS, iPTR:$ptrreg, iPTR:$ptroff),
+ (STWUX $rS, $ptrreg, $ptroff)>;
+def : Pat<(pre_store f32:$rS, iPTR:$ptrreg, iPTR:$ptroff),
+ (STFSUX $rS, $ptrreg, $ptroff)>;
+def : Pat<(pre_store f64:$rS, iPTR:$ptrreg, iPTR:$ptroff),
+ (STFDUX $rS, $ptrreg, $ptroff)>;
+
def SYNC : XForm_24_sync<31, 598, (outs), (ins),
"sync", LdStSync,
[(int_ppc_sync)]>;
@@ -939,68 +1007,66 @@ def SYNC : XForm_24_sync<31, 598, (outs), (ins),
//
let PPC970_Unit = 1 in { // FXU Operations.
-def ADDI : DForm_2<14, (outs GPRC:$rD), (ins GPRC:$rA, s16imm:$imm),
- "addi $rD, $rA, $imm", IntSimple,
- [(set GPRC:$rD, (add GPRC:$rA, immSExt16:$imm))]>;
-def ADDIL : DForm_2<14, (outs GPRC:$rD), (ins GPRC:$rA, symbolLo:$imm),
+def ADDI : DForm_2<14, (outs GPRC:$rD), (ins GPRC_NOR0:$rA, symbolLo:$imm),
"addi $rD, $rA, $imm", IntSimple,
- [(set GPRC:$rD, (add GPRC:$rA, immSExt16:$imm))]>;
+ [(set i32:$rD, (add i32:$rA, immSExt16:$imm))]>;
let Defs = [CARRY] in {
def ADDIC : DForm_2<12, (outs GPRC:$rD), (ins GPRC:$rA, s16imm:$imm),
"addic $rD, $rA, $imm", IntGeneral,
- [(set GPRC:$rD, (addc GPRC:$rA, immSExt16:$imm))]>,
+ [(set i32:$rD, (addc i32:$rA, immSExt16:$imm))]>,
PPC970_DGroup_Cracked;
def ADDICo : DForm_2<13, (outs GPRC:$rD), (ins GPRC:$rA, s16imm:$imm),
"addic. $rD, $rA, $imm", IntGeneral,
[]>;
}
-def ADDIS : DForm_2<15, (outs GPRC:$rD), (ins GPRC:$rA, symbolHi:$imm),
+def ADDIS : DForm_2<15, (outs GPRC:$rD), (ins GPRC_NOR0:$rA, symbolHi:$imm),
"addis $rD, $rA, $imm", IntSimple,
- [(set GPRC:$rD, (add GPRC:$rA, imm16ShiftedSExt:$imm))]>;
-def LA : DForm_2<14, (outs GPRC:$rD), (ins GPRC:$rA, symbolLo:$sym),
+ [(set i32:$rD, (add i32:$rA, imm16ShiftedSExt:$imm))]>;
+let isCodeGenOnly = 1 in
+def LA : DForm_2<14, (outs GPRC:$rD), (ins GPRC_NOR0:$rA, symbolLo:$sym),
"la $rD, $sym($rA)", IntGeneral,
- [(set GPRC:$rD, (add GPRC:$rA,
+ [(set i32:$rD, (add i32:$rA,
(PPClo tglobaladdr:$sym, 0)))]>;
def MULLI : DForm_2< 7, (outs GPRC:$rD), (ins GPRC:$rA, s16imm:$imm),
"mulli $rD, $rA, $imm", IntMulLI,
- [(set GPRC:$rD, (mul GPRC:$rA, immSExt16:$imm))]>;
+ [(set i32:$rD, (mul i32:$rA, immSExt16:$imm))]>;
let Defs = [CARRY] in {
def SUBFIC : DForm_2< 8, (outs GPRC:$rD), (ins GPRC:$rA, s16imm:$imm),
"subfic $rD, $rA, $imm", IntGeneral,
- [(set GPRC:$rD, (subc immSExt16:$imm, GPRC:$rA))]>;
+ [(set i32:$rD, (subc immSExt16:$imm, i32:$rA))]>;
}
let isReMaterializable = 1, isAsCheapAsAMove = 1, isMoveImm = 1 in {
def LI : DForm_2_r0<14, (outs GPRC:$rD), (ins symbolLo:$imm),
"li $rD, $imm", IntSimple,
- [(set GPRC:$rD, immSExt16:$imm)]>;
+ [(set i32:$rD, immSExt16:$imm)]>;
def LIS : DForm_2_r0<15, (outs GPRC:$rD), (ins symbolHi:$imm),
"lis $rD, $imm", IntSimple,
- [(set GPRC:$rD, imm16ShiftedSExt:$imm)]>;
+ [(set i32:$rD, imm16ShiftedSExt:$imm)]>;
}
}
let PPC970_Unit = 1 in { // FXU Operations.
def ANDIo : DForm_4<28, (outs GPRC:$dst), (ins GPRC:$src1, u16imm:$src2),
"andi. $dst, $src1, $src2", IntGeneral,
- [(set GPRC:$dst, (and GPRC:$src1, immZExt16:$src2))]>,
+ [(set i32:$dst, (and i32:$src1, immZExt16:$src2))]>,
isDOT;
def ANDISo : DForm_4<29, (outs GPRC:$dst), (ins GPRC:$src1, u16imm:$src2),
"andis. $dst, $src1, $src2", IntGeneral,
- [(set GPRC:$dst, (and GPRC:$src1,imm16ShiftedZExt:$src2))]>,
+ [(set i32:$dst, (and i32:$src1, imm16ShiftedZExt:$src2))]>,
isDOT;
def ORI : DForm_4<24, (outs GPRC:$dst), (ins GPRC:$src1, u16imm:$src2),
"ori $dst, $src1, $src2", IntSimple,
- [(set GPRC:$dst, (or GPRC:$src1, immZExt16:$src2))]>;
+ [(set i32:$dst, (or i32:$src1, immZExt16:$src2))]>;
def ORIS : DForm_4<25, (outs GPRC:$dst), (ins GPRC:$src1, u16imm:$src2),
"oris $dst, $src1, $src2", IntSimple,
- [(set GPRC:$dst, (or GPRC:$src1, imm16ShiftedZExt:$src2))]>;
+ [(set i32:$dst, (or i32:$src1, imm16ShiftedZExt:$src2))]>;
def XORI : DForm_4<26, (outs GPRC:$dst), (ins GPRC:$src1, u16imm:$src2),
"xori $dst, $src1, $src2", IntSimple,
- [(set GPRC:$dst, (xor GPRC:$src1, immZExt16:$src2))]>;
+ [(set i32:$dst, (xor i32:$src1, immZExt16:$src2))]>;
def XORIS : DForm_4<27, (outs GPRC:$dst), (ins GPRC:$src1, u16imm:$src2),
"xoris $dst, $src1, $src2", IntSimple,
- [(set GPRC:$dst, (xor GPRC:$src1,imm16ShiftedZExt:$src2))]>;
+ [(set i32:$dst, (xor i32:$src1, imm16ShiftedZExt:$src2))]>;
def NOP : DForm_4_zero<24, (outs), (ins), "nop", IntSimple,
[]>;
def CMPWI : DForm_5_ext<11, (outs CRRC:$crD), (ins GPRC:$rA, s16imm:$imm),
@@ -1013,38 +1079,38 @@ def CMPLWI : DForm_6_ext<10, (outs CRRC:$dst), (ins GPRC:$src1, u16imm:$src2),
let PPC970_Unit = 1 in { // FXU Operations.
def NAND : XForm_6<31, 476, (outs GPRC:$rA), (ins GPRC:$rS, GPRC:$rB),
"nand $rA, $rS, $rB", IntSimple,
- [(set GPRC:$rA, (not (and GPRC:$rS, GPRC:$rB)))]>;
+ [(set i32:$rA, (not (and i32:$rS, i32:$rB)))]>;
def AND : XForm_6<31, 28, (outs GPRC:$rA), (ins GPRC:$rS, GPRC:$rB),
"and $rA, $rS, $rB", IntSimple,
- [(set GPRC:$rA, (and GPRC:$rS, GPRC:$rB))]>;
+ [(set i32:$rA, (and i32:$rS, i32:$rB))]>;
def ANDC : XForm_6<31, 60, (outs GPRC:$rA), (ins GPRC:$rS, GPRC:$rB),
"andc $rA, $rS, $rB", IntSimple,
- [(set GPRC:$rA, (and GPRC:$rS, (not GPRC:$rB)))]>;
+ [(set i32:$rA, (and i32:$rS, (not i32:$rB)))]>;
def OR : XForm_6<31, 444, (outs GPRC:$rA), (ins GPRC:$rS, GPRC:$rB),
"or $rA, $rS, $rB", IntSimple,
- [(set GPRC:$rA, (or GPRC:$rS, GPRC:$rB))]>;
+ [(set i32:$rA, (or i32:$rS, i32:$rB))]>;
def NOR : XForm_6<31, 124, (outs GPRC:$rA), (ins GPRC:$rS, GPRC:$rB),
"nor $rA, $rS, $rB", IntSimple,
- [(set GPRC:$rA, (not (or GPRC:$rS, GPRC:$rB)))]>;
+ [(set i32:$rA, (not (or i32:$rS, i32:$rB)))]>;
def ORC : XForm_6<31, 412, (outs GPRC:$rA), (ins GPRC:$rS, GPRC:$rB),
"orc $rA, $rS, $rB", IntSimple,
- [(set GPRC:$rA, (or GPRC:$rS, (not GPRC:$rB)))]>;
+ [(set i32:$rA, (or i32:$rS, (not i32:$rB)))]>;
def EQV : XForm_6<31, 284, (outs GPRC:$rA), (ins GPRC:$rS, GPRC:$rB),
"eqv $rA, $rS, $rB", IntSimple,
- [(set GPRC:$rA, (not (xor GPRC:$rS, GPRC:$rB)))]>;
+ [(set i32:$rA, (not (xor i32:$rS, i32:$rB)))]>;
def XOR : XForm_6<31, 316, (outs GPRC:$rA), (ins GPRC:$rS, GPRC:$rB),
"xor $rA, $rS, $rB", IntSimple,
- [(set GPRC:$rA, (xor GPRC:$rS, GPRC:$rB))]>;
+ [(set i32:$rA, (xor i32:$rS, i32:$rB))]>;
def SLW : XForm_6<31, 24, (outs GPRC:$rA), (ins GPRC:$rS, GPRC:$rB),
"slw $rA, $rS, $rB", IntGeneral,
- [(set GPRC:$rA, (PPCshl GPRC:$rS, GPRC:$rB))]>;
+ [(set i32:$rA, (PPCshl i32:$rS, i32:$rB))]>;
def SRW : XForm_6<31, 536, (outs GPRC:$rA), (ins GPRC:$rS, GPRC:$rB),
"srw $rA, $rS, $rB", IntGeneral,
- [(set GPRC:$rA, (PPCsrl GPRC:$rS, GPRC:$rB))]>;
+ [(set i32:$rA, (PPCsrl i32:$rS, i32:$rB))]>;
let Defs = [CARRY] in {
def SRAW : XForm_6<31, 792, (outs GPRC:$rA), (ins GPRC:$rS, GPRC:$rB),
"sraw $rA, $rS, $rB", IntShift,
- [(set GPRC:$rA, (PPCsra GPRC:$rS, GPRC:$rB))]>;
+ [(set i32:$rA, (PPCsra i32:$rS, i32:$rB))]>;
}
}
@@ -1052,17 +1118,17 @@ let PPC970_Unit = 1 in { // FXU Operations.
let Defs = [CARRY] in {
def SRAWI : XForm_10<31, 824, (outs GPRC:$rA), (ins GPRC:$rS, u5imm:$SH),
"srawi $rA, $rS, $SH", IntShift,
- [(set GPRC:$rA, (sra GPRC:$rS, (i32 imm:$SH)))]>;
+ [(set i32:$rA, (sra i32:$rS, (i32 imm:$SH)))]>;
}
def CNTLZW : XForm_11<31, 26, (outs GPRC:$rA), (ins GPRC:$rS),
"cntlzw $rA, $rS", IntGeneral,
- [(set GPRC:$rA, (ctlz GPRC:$rS))]>;
+ [(set i32:$rA, (ctlz i32:$rS))]>;
def EXTSB : XForm_11<31, 954, (outs GPRC:$rA), (ins GPRC:$rS),
"extsb $rA, $rS", IntSimple,
- [(set GPRC:$rA, (sext_inreg GPRC:$rS, i8))]>;
+ [(set i32:$rA, (sext_inreg i32:$rS, i8))]>;
def EXTSH : XForm_11<31, 922, (outs GPRC:$rA), (ins GPRC:$rS),
"extsh $rA, $rS", IntSimple,
- [(set GPRC:$rA, (sext_inreg GPRC:$rS, i16))]>;
+ [(set i32:$rA, (sext_inreg i32:$rS, i16))]>;
def CMPW : XForm_16_ext<31, 0, (outs CRRC:$crD), (ins GPRC:$rA, GPRC:$rB),
"cmpw $crD, $rA, $rB", IntCompare>;
@@ -1080,16 +1146,54 @@ def FCMPUD : XForm_17<63, 0, (outs CRRC:$crD), (ins F8RC:$fA, F8RC:$fB),
let Uses = [RM] in {
def FCTIWZ : XForm_26<63, 15, (outs F8RC:$frD), (ins F8RC:$frB),
"fctiwz $frD, $frB", FPGeneral,
- [(set F8RC:$frD, (PPCfctiwz F8RC:$frB))]>;
+ [(set f64:$frD, (PPCfctiwz f64:$frB))]>;
+
def FRSP : XForm_26<63, 12, (outs F4RC:$frD), (ins F8RC:$frB),
"frsp $frD, $frB", FPGeneral,
- [(set F4RC:$frD, (fround F8RC:$frB))]>;
+ [(set f32:$frD, (fround f64:$frB))]>;
+
+ // The frin -> nearbyint mapping is valid only in fast-math mode.
+ def FRIND : XForm_26<63, 392, (outs F8RC:$frD), (ins F8RC:$frB),
+ "frin $frD, $frB", FPGeneral,
+ [(set f64:$frD, (fnearbyint f64:$frB))]>;
+ def FRINS : XForm_26<63, 392, (outs F4RC:$frD), (ins F4RC:$frB),
+ "frin $frD, $frB", FPGeneral,
+ [(set f32:$frD, (fnearbyint f32:$frB))]>;
+
+ // These pseudos expand to rint but also set FE_INEXACT when the result does
+ // not equal the argument.
+ let usesCustomInserter = 1, Defs = [RM] in { // FIXME: Model FPSCR!
+ def FRINDrint : Pseudo<(outs F8RC:$frD), (ins F8RC:$frB),
+ "#FRINDrint", [(set f64:$frD, (frint f64:$frB))]>;
+ def FRINSrint : Pseudo<(outs F4RC:$frD), (ins F4RC:$frB),
+ "#FRINSrint", [(set f32:$frD, (frint f32:$frB))]>;
+ }
+
+ def FRIPD : XForm_26<63, 456, (outs F8RC:$frD), (ins F8RC:$frB),
+ "frip $frD, $frB", FPGeneral,
+ [(set f64:$frD, (fceil f64:$frB))]>;
+ def FRIPS : XForm_26<63, 456, (outs F4RC:$frD), (ins F4RC:$frB),
+ "frip $frD, $frB", FPGeneral,
+ [(set f32:$frD, (fceil f32:$frB))]>;
+ def FRIZD : XForm_26<63, 424, (outs F8RC:$frD), (ins F8RC:$frB),
+ "friz $frD, $frB", FPGeneral,
+ [(set f64:$frD, (ftrunc f64:$frB))]>;
+ def FRIZS : XForm_26<63, 424, (outs F4RC:$frD), (ins F4RC:$frB),
+ "friz $frD, $frB", FPGeneral,
+ [(set f32:$frD, (ftrunc f32:$frB))]>;
+ def FRIMD : XForm_26<63, 488, (outs F8RC:$frD), (ins F8RC:$frB),
+ "frim $frD, $frB", FPGeneral,
+ [(set f64:$frD, (ffloor f64:$frB))]>;
+ def FRIMS : XForm_26<63, 488, (outs F4RC:$frD), (ins F4RC:$frB),
+ "frim $frD, $frB", FPGeneral,
+ [(set f32:$frD, (ffloor f32:$frB))]>;
+
def FSQRT : XForm_26<63, 22, (outs F8RC:$frD), (ins F8RC:$frB),
"fsqrt $frD, $frB", FPSqrt,
- [(set F8RC:$frD, (fsqrt F8RC:$frB))]>;
+ [(set f64:$frD, (fsqrt f64:$frB))]>;
def FSQRTS : XForm_26<59, 22, (outs F4RC:$frD), (ins F4RC:$frB),
"fsqrts $frD, $frB", FPSqrt,
- [(set F4RC:$frD, (fsqrt F4RC:$frB))]>;
+ [(set f32:$frD, (fsqrt f32:$frB))]>;
}
}
@@ -1099,31 +1203,44 @@ let Uses = [RM] in {
/// sneak into a d-group with a store).
def FMR : XForm_26<63, 72, (outs F4RC:$frD), (ins F4RC:$frB),
"fmr $frD, $frB", FPGeneral,
- []>, // (set F4RC:$frD, F4RC:$frB)
+ []>, // (set f32:$frD, f32:$frB)
PPC970_Unit_Pseudo;
let PPC970_Unit = 3 in { // FPU Operations.
// These are artificially split into two different forms, for 4/8 byte FP.
def FABSS : XForm_26<63, 264, (outs F4RC:$frD), (ins F4RC:$frB),
"fabs $frD, $frB", FPGeneral,
- [(set F4RC:$frD, (fabs F4RC:$frB))]>;
+ [(set f32:$frD, (fabs f32:$frB))]>;
def FABSD : XForm_26<63, 264, (outs F8RC:$frD), (ins F8RC:$frB),
"fabs $frD, $frB", FPGeneral,
- [(set F8RC:$frD, (fabs F8RC:$frB))]>;
+ [(set f64:$frD, (fabs f64:$frB))]>;
def FNABSS : XForm_26<63, 136, (outs F4RC:$frD), (ins F4RC:$frB),
"fnabs $frD, $frB", FPGeneral,
- [(set F4RC:$frD, (fneg (fabs F4RC:$frB)))]>;
+ [(set f32:$frD, (fneg (fabs f32:$frB)))]>;
def FNABSD : XForm_26<63, 136, (outs F8RC:$frD), (ins F8RC:$frB),
"fnabs $frD, $frB", FPGeneral,
- [(set F8RC:$frD, (fneg (fabs F8RC:$frB)))]>;
+ [(set f64:$frD, (fneg (fabs f64:$frB)))]>;
def FNEGS : XForm_26<63, 40, (outs F4RC:$frD), (ins F4RC:$frB),
"fneg $frD, $frB", FPGeneral,
- [(set F4RC:$frD, (fneg F4RC:$frB))]>;
+ [(set f32:$frD, (fneg f32:$frB))]>;
def FNEGD : XForm_26<63, 40, (outs F8RC:$frD), (ins F8RC:$frB),
"fneg $frD, $frB", FPGeneral,
- [(set F8RC:$frD, (fneg F8RC:$frB))]>;
+ [(set f64:$frD, (fneg f64:$frB))]>;
+
+// Reciprocal estimates.
+def FRE : XForm_26<63, 24, (outs F8RC:$frD), (ins F8RC:$frB),
+ "fre $frD, $frB", FPGeneral,
+ [(set f64:$frD, (PPCfre f64:$frB))]>;
+def FRES : XForm_26<59, 24, (outs F4RC:$frD), (ins F4RC:$frB),
+ "fres $frD, $frB", FPGeneral,
+ [(set f32:$frD, (PPCfre f32:$frB))]>;
+def FRSQRTE : XForm_26<63, 26, (outs F8RC:$frD), (ins F8RC:$frB),
+ "frsqrte $frD, $frB", FPGeneral,
+ [(set f64:$frD, (PPCfrsqrte f64:$frB))]>;
+def FRSQRTES : XForm_26<59, 26, (outs F4RC:$frD), (ins F4RC:$frB),
+ "frsqrtes $frD, $frB", FPGeneral,
+ [(set f32:$frD, (PPCfrsqrte f32:$frB))]>;
}
-
// XL-Form instructions. condition register logical ops.
//
@@ -1141,6 +1258,7 @@ def CROR : XLForm_1<19, 449, (outs CRBITRC:$CRD),
"cror $CRD, $CRA, $CRB", BrCR,
[]>;
+let isCodeGenOnly = 1 in {
def CRSET : XLForm_1_ext<19, 289, (outs CRBITRC:$dst), (ins),
"creqv $dst, $dst, $dst", BrCR,
[]>;
@@ -1158,6 +1276,7 @@ def CR6UNSET: XLForm_1_ext<19, 193, (outs), (ins),
"crxor 6, 6, 6", BrCR,
[(PPCcr6unset)]>;
}
+}
// XFX-Form instructions. Instructions that deal with SPRs.
//
@@ -1166,7 +1285,7 @@ def MFCTR : XFXForm_1_ext<31, 339, 9, (outs GPRC:$rT), (ins),
"mfctr $rT", SprMFSPR>,
PPC970_DGroup_First, PPC970_Unit_FXU;
}
-let Defs = [CTR], Pattern = [(PPCmtctr GPRC:$rS)] in {
+let Defs = [CTR], Pattern = [(PPCmtctr i32:$rS)] in {
def MTCTR : XFXForm_7_ext<31, 467, 9, (outs), (ins GPRC:$rS),
"mtctr $rS", SprMTSPR>,
PPC970_DGroup_First, PPC970_Unit_FXU;
@@ -1193,6 +1312,29 @@ def MFVRSAVE : XFXForm_1_ext<31, 339, 256, (outs GPRC:$rT), (ins),
"mfspr $rT, 256", IntGeneral>,
PPC970_DGroup_First, PPC970_Unit_FXU;
+let isCodeGenOnly = 1 in {
+ def MTVRSAVEv : XFXForm_7_ext<31, 467, 256,
+ (outs VRSAVERC:$reg), (ins GPRC:$rS),
+ "mtspr 256, $rS", IntGeneral>,
+ PPC970_DGroup_Single, PPC970_Unit_FXU;
+ def MFVRSAVEv : XFXForm_1_ext<31, 339, 256, (outs GPRC:$rT),
+ (ins VRSAVERC:$reg),
+ "mfspr $rT, 256", IntGeneral>,
+ PPC970_DGroup_First, PPC970_Unit_FXU;
+}
+
+// SPILL_VRSAVE - Indicate that we're dumping the VRSAVE register,
+// so we'll need to scavenge a register for it.
+let mayStore = 1 in
+def SPILL_VRSAVE : Pseudo<(outs), (ins VRSAVERC:$vrsave, memri:$F),
+ "#SPILL_VRSAVE", []>;
+
+// RESTORE_VRSAVE - Indicate that we're restoring the VRSAVE register (previously
+// spilled), so we'll need to scavenge a register for it.
+let mayLoad = 1 in
+def RESTORE_VRSAVE : Pseudo<(outs VRSAVERC:$vrsave), (ins memri:$F),
+ "#RESTORE_VRSAVE", []>;
+
def MTCRF : XFXForm_5<31, 144, (outs crbitm:$FXM), (ins GPRC:$rS),
"mtcrf $FXM, $rS", BrMCRX>,
PPC970_MicroCode, PPC970_Unit_CRU;
@@ -1207,6 +1349,7 @@ def MTCRF : XFXForm_5<31, 144, (outs crbitm:$FXM), (ins GPRC:$rS),
// instruction to keep the register allocator from becoming confused.
//
// FIXME: Make this a real Pseudo instruction when the JIT switches to MC.
+let isCodeGenOnly = 1 in
def MFCRpseud: XFXForm_3<31, 19, (outs GPRC:$rT), (ins crbitm:$FXM),
"#MFCRpseud", SprMFCR>,
PPC970_MicroCode, PPC970_Unit_CRU;
@@ -1219,38 +1362,29 @@ def MFOCRF: XFXForm_5a<31, 19, (outs GPRC:$rT), (ins crbitm:$FXM),
"mfocrf $rT, $FXM", SprMFCR>,
PPC970_DGroup_First, PPC970_Unit_CRU;
-// Instructions to manipulate FPSCR. Only long double handling uses these.
-// FPSCR is not modelled; we use the SDNode Flag to keep things in order.
+// Pseudo instruction to perform FADD in round-to-zero mode.
+let usesCustomInserter = 1, Uses = [RM] in {
+ def FADDrtz: Pseudo<(outs F8RC:$FRT), (ins F8RC:$FRA, F8RC:$FRB), "",
+ [(set f64:$FRT, (PPCfaddrtz f64:$FRA, f64:$FRB))]>;
+}
+// The above pseudo gets expanded to make use of the following instructions
+// to manipulate FPSCR. Note that FPSCR is not modeled at the DAG level.
let Uses = [RM], Defs = [RM] in {
def MTFSB0 : XForm_43<63, 70, (outs), (ins u5imm:$FM),
- "mtfsb0 $FM", IntMTFSB0,
- [(PPCmtfsb0 (i32 imm:$FM))]>,
+ "mtfsb0 $FM", IntMTFSB0, []>,
PPC970_DGroup_Single, PPC970_Unit_FPU;
def MTFSB1 : XForm_43<63, 38, (outs), (ins u5imm:$FM),
- "mtfsb1 $FM", IntMTFSB0,
- [(PPCmtfsb1 (i32 imm:$FM))]>,
+ "mtfsb1 $FM", IntMTFSB0, []>,
PPC970_DGroup_Single, PPC970_Unit_FPU;
- // MTFSF does not actually produce an FP result. We pretend it copies
- // input reg B to the output. If we didn't do this it would look like the
- // instruction had no outputs (because we aren't modelling the FPSCR) and
- // it would be deleted.
- def MTFSF : XFLForm<63, 711, (outs F8RC:$FRA),
- (ins i32imm:$FM, F8RC:$rT, F8RC:$FRB),
- "mtfsf $FM, $rT", "$FRB = $FRA", IntMTFSB0,
- [(set F8RC:$FRA, (PPCmtfsf (i32 imm:$FM),
- F8RC:$rT, F8RC:$FRB))]>,
+ def MTFSF : XFLForm<63, 711, (outs), (ins i32imm:$FM, F8RC:$rT),
+ "mtfsf $FM, $rT", IntMTFSB0, []>,
PPC970_DGroup_Single, PPC970_Unit_FPU;
}
let Uses = [RM] in {
def MFFS : XForm_42<63, 583, (outs F8RC:$rT), (ins),
"mffs $rT", IntMFFS,
- [(set F8RC:$rT, (PPCmffs))]>,
- PPC970_DGroup_Single, PPC970_Unit_FPU;
- def FADDrtz: AForm_2<63, 21,
- (outs F8RC:$FRT), (ins F8RC:$FRA, F8RC:$FRB),
- "fadd $FRT, $FRA, $FRB", FPAddSub,
- [(set F8RC:$FRT, (PPCfaddrtz F8RC:$FRA, F8RC:$FRB))]>,
+ [(set f64:$rT, (PPCmffs))]>,
PPC970_DGroup_Single, PPC970_Unit_FPU;
}
@@ -1261,61 +1395,61 @@ let PPC970_Unit = 1 in { // FXU Operations.
//
def ADD4 : XOForm_1<31, 266, 0, (outs GPRC:$rT), (ins GPRC:$rA, GPRC:$rB),
"add $rT, $rA, $rB", IntSimple,
- [(set GPRC:$rT, (add GPRC:$rA, GPRC:$rB))]>;
+ [(set i32:$rT, (add i32:$rA, i32:$rB))]>;
let Defs = [CARRY] in {
def ADDC : XOForm_1<31, 10, 0, (outs GPRC:$rT), (ins GPRC:$rA, GPRC:$rB),
"addc $rT, $rA, $rB", IntGeneral,
- [(set GPRC:$rT, (addc GPRC:$rA, GPRC:$rB))]>,
+ [(set i32:$rT, (addc i32:$rA, i32:$rB))]>,
PPC970_DGroup_Cracked;
}
def DIVW : XOForm_1<31, 491, 0, (outs GPRC:$rT), (ins GPRC:$rA, GPRC:$rB),
"divw $rT, $rA, $rB", IntDivW,
- [(set GPRC:$rT, (sdiv GPRC:$rA, GPRC:$rB))]>,
+ [(set i32:$rT, (sdiv i32:$rA, i32:$rB))]>,
PPC970_DGroup_First, PPC970_DGroup_Cracked;
def DIVWU : XOForm_1<31, 459, 0, (outs GPRC:$rT), (ins GPRC:$rA, GPRC:$rB),
"divwu $rT, $rA, $rB", IntDivW,
- [(set GPRC:$rT, (udiv GPRC:$rA, GPRC:$rB))]>,
+ [(set i32:$rT, (udiv i32:$rA, i32:$rB))]>,
PPC970_DGroup_First, PPC970_DGroup_Cracked;
def MULHW : XOForm_1<31, 75, 0, (outs GPRC:$rT), (ins GPRC:$rA, GPRC:$rB),
"mulhw $rT, $rA, $rB", IntMulHW,
- [(set GPRC:$rT, (mulhs GPRC:$rA, GPRC:$rB))]>;
+ [(set i32:$rT, (mulhs i32:$rA, i32:$rB))]>;
def MULHWU : XOForm_1<31, 11, 0, (outs GPRC:$rT), (ins GPRC:$rA, GPRC:$rB),
"mulhwu $rT, $rA, $rB", IntMulHWU,
- [(set GPRC:$rT, (mulhu GPRC:$rA, GPRC:$rB))]>;
+ [(set i32:$rT, (mulhu i32:$rA, i32:$rB))]>;
def MULLW : XOForm_1<31, 235, 0, (outs GPRC:$rT), (ins GPRC:$rA, GPRC:$rB),
"mullw $rT, $rA, $rB", IntMulHW,
- [(set GPRC:$rT, (mul GPRC:$rA, GPRC:$rB))]>;
+ [(set i32:$rT, (mul i32:$rA, i32:$rB))]>;
def SUBF : XOForm_1<31, 40, 0, (outs GPRC:$rT), (ins GPRC:$rA, GPRC:$rB),
"subf $rT, $rA, $rB", IntGeneral,
- [(set GPRC:$rT, (sub GPRC:$rB, GPRC:$rA))]>;
+ [(set i32:$rT, (sub i32:$rB, i32:$rA))]>;
let Defs = [CARRY] in {
def SUBFC : XOForm_1<31, 8, 0, (outs GPRC:$rT), (ins GPRC:$rA, GPRC:$rB),
"subfc $rT, $rA, $rB", IntGeneral,
- [(set GPRC:$rT, (subc GPRC:$rB, GPRC:$rA))]>,
+ [(set i32:$rT, (subc i32:$rB, i32:$rA))]>,
PPC970_DGroup_Cracked;
}
def NEG : XOForm_3<31, 104, 0, (outs GPRC:$rT), (ins GPRC:$rA),
"neg $rT, $rA", IntSimple,
- [(set GPRC:$rT, (ineg GPRC:$rA))]>;
+ [(set i32:$rT, (ineg i32:$rA))]>;
let Uses = [CARRY], Defs = [CARRY] in {
def ADDE : XOForm_1<31, 138, 0, (outs GPRC:$rT), (ins GPRC:$rA, GPRC:$rB),
"adde $rT, $rA, $rB", IntGeneral,
- [(set GPRC:$rT, (adde GPRC:$rA, GPRC:$rB))]>;
+ [(set i32:$rT, (adde i32:$rA, i32:$rB))]>;
def ADDME : XOForm_3<31, 234, 0, (outs GPRC:$rT), (ins GPRC:$rA),
"addme $rT, $rA", IntGeneral,
- [(set GPRC:$rT, (adde GPRC:$rA, -1))]>;
+ [(set i32:$rT, (adde i32:$rA, -1))]>;
def ADDZE : XOForm_3<31, 202, 0, (outs GPRC:$rT), (ins GPRC:$rA),
"addze $rT, $rA", IntGeneral,
- [(set GPRC:$rT, (adde GPRC:$rA, 0))]>;
+ [(set i32:$rT, (adde i32:$rA, 0))]>;
def SUBFE : XOForm_1<31, 136, 0, (outs GPRC:$rT), (ins GPRC:$rA, GPRC:$rB),
"subfe $rT, $rA, $rB", IntGeneral,
- [(set GPRC:$rT, (sube GPRC:$rB, GPRC:$rA))]>;
+ [(set i32:$rT, (sube i32:$rB, i32:$rA))]>;
def SUBFME : XOForm_3<31, 232, 0, (outs GPRC:$rT), (ins GPRC:$rA),
"subfme $rT, $rA", IntGeneral,
- [(set GPRC:$rT, (sube -1, GPRC:$rA))]>;
+ [(set i32:$rT, (sube -1, i32:$rA))]>;
def SUBFZE : XOForm_3<31, 200, 0, (outs GPRC:$rT), (ins GPRC:$rA),
"subfze $rT, $rA", IntGeneral,
- [(set GPRC:$rT, (sube 0, GPRC:$rA))]>;
+ [(set i32:$rT, (sube 0, i32:$rA))]>;
}
}
@@ -1327,43 +1461,41 @@ let Uses = [RM] in {
def FMADD : AForm_1<63, 29,
(outs F8RC:$FRT), (ins F8RC:$FRA, F8RC:$FRC, F8RC:$FRB),
"fmadd $FRT, $FRA, $FRC, $FRB", FPFused,
- [(set F8RC:$FRT,
- (fma F8RC:$FRA, F8RC:$FRC, F8RC:$FRB))]>;
+ [(set f64:$FRT, (fma f64:$FRA, f64:$FRC, f64:$FRB))]>;
def FMADDS : AForm_1<59, 29,
(outs F4RC:$FRT), (ins F4RC:$FRA, F4RC:$FRC, F4RC:$FRB),
"fmadds $FRT, $FRA, $FRC, $FRB", FPGeneral,
- [(set F4RC:$FRT,
- (fma F4RC:$FRA, F4RC:$FRC, F4RC:$FRB))]>;
+ [(set f32:$FRT, (fma f32:$FRA, f32:$FRC, f32:$FRB))]>;
def FMSUB : AForm_1<63, 28,
(outs F8RC:$FRT), (ins F8RC:$FRA, F8RC:$FRC, F8RC:$FRB),
"fmsub $FRT, $FRA, $FRC, $FRB", FPFused,
- [(set F8RC:$FRT,
- (fma F8RC:$FRA, F8RC:$FRC, (fneg F8RC:$FRB)))]>;
+ [(set f64:$FRT,
+ (fma f64:$FRA, f64:$FRC, (fneg f64:$FRB)))]>;
def FMSUBS : AForm_1<59, 28,
(outs F4RC:$FRT), (ins F4RC:$FRA, F4RC:$FRC, F4RC:$FRB),
"fmsubs $FRT, $FRA, $FRC, $FRB", FPGeneral,
- [(set F4RC:$FRT,
- (fma F4RC:$FRA, F4RC:$FRC, (fneg F4RC:$FRB)))]>;
+ [(set f32:$FRT,
+ (fma f32:$FRA, f32:$FRC, (fneg f32:$FRB)))]>;
def FNMADD : AForm_1<63, 31,
(outs F8RC:$FRT), (ins F8RC:$FRA, F8RC:$FRC, F8RC:$FRB),
"fnmadd $FRT, $FRA, $FRC, $FRB", FPFused,
- [(set F8RC:$FRT,
- (fneg (fma F8RC:$FRA, F8RC:$FRC, F8RC:$FRB)))]>;
+ [(set f64:$FRT,
+ (fneg (fma f64:$FRA, f64:$FRC, f64:$FRB)))]>;
def FNMADDS : AForm_1<59, 31,
(outs F4RC:$FRT), (ins F4RC:$FRA, F4RC:$FRC, F4RC:$FRB),
"fnmadds $FRT, $FRA, $FRC, $FRB", FPGeneral,
- [(set F4RC:$FRT,
- (fneg (fma F4RC:$FRA, F4RC:$FRC, F4RC:$FRB)))]>;
+ [(set f32:$FRT,
+ (fneg (fma f32:$FRA, f32:$FRC, f32:$FRB)))]>;
def FNMSUB : AForm_1<63, 30,
(outs F8RC:$FRT), (ins F8RC:$FRA, F8RC:$FRC, F8RC:$FRB),
"fnmsub $FRT, $FRA, $FRC, $FRB", FPFused,
- [(set F8RC:$FRT, (fneg (fma F8RC:$FRA, F8RC:$FRC,
- (fneg F8RC:$FRB))))]>;
+ [(set f64:$FRT, (fneg (fma f64:$FRA, f64:$FRC,
+ (fneg f64:$FRB))))]>;
def FNMSUBS : AForm_1<59, 30,
(outs F4RC:$FRT), (ins F4RC:$FRA, F4RC:$FRC, F4RC:$FRB),
"fnmsubs $FRT, $FRA, $FRC, $FRB", FPGeneral,
- [(set F4RC:$FRT, (fneg (fma F4RC:$FRA, F4RC:$FRC,
- (fneg F4RC:$FRB))))]>;
+ [(set f32:$FRT, (fneg (fma f32:$FRA, f32:$FRC,
+ (fneg f32:$FRB))))]>;
}
// FSEL is artificially split into 4 and 8-byte forms for the result. To avoid
// having 4 of these, force the comparison to always be an 8-byte double (code
@@ -1372,50 +1504,50 @@ let Uses = [RM] in {
def FSELD : AForm_1<63, 23,
(outs F8RC:$FRT), (ins F8RC:$FRA, F8RC:$FRC, F8RC:$FRB),
"fsel $FRT, $FRA, $FRC, $FRB", FPGeneral,
- [(set F8RC:$FRT, (PPCfsel F8RC:$FRA,F8RC:$FRC,F8RC:$FRB))]>;
+ [(set f64:$FRT, (PPCfsel f64:$FRA, f64:$FRC, f64:$FRB))]>;
def FSELS : AForm_1<63, 23,
(outs F4RC:$FRT), (ins F8RC:$FRA, F4RC:$FRC, F4RC:$FRB),
"fsel $FRT, $FRA, $FRC, $FRB", FPGeneral,
- [(set F4RC:$FRT, (PPCfsel F8RC:$FRA,F4RC:$FRC,F4RC:$FRB))]>;
+ [(set f32:$FRT, (PPCfsel f64:$FRA, f32:$FRC, f32:$FRB))]>;
let Uses = [RM] in {
def FADD : AForm_2<63, 21,
(outs F8RC:$FRT), (ins F8RC:$FRA, F8RC:$FRB),
"fadd $FRT, $FRA, $FRB", FPAddSub,
- [(set F8RC:$FRT, (fadd F8RC:$FRA, F8RC:$FRB))]>;
+ [(set f64:$FRT, (fadd f64:$FRA, f64:$FRB))]>;
def FADDS : AForm_2<59, 21,
(outs F4RC:$FRT), (ins F4RC:$FRA, F4RC:$FRB),
"fadds $FRT, $FRA, $FRB", FPGeneral,
- [(set F4RC:$FRT, (fadd F4RC:$FRA, F4RC:$FRB))]>;
+ [(set f32:$FRT, (fadd f32:$FRA, f32:$FRB))]>;
def FDIV : AForm_2<63, 18,
(outs F8RC:$FRT), (ins F8RC:$FRA, F8RC:$FRB),
"fdiv $FRT, $FRA, $FRB", FPDivD,
- [(set F8RC:$FRT, (fdiv F8RC:$FRA, F8RC:$FRB))]>;
+ [(set f64:$FRT, (fdiv f64:$FRA, f64:$FRB))]>;
def FDIVS : AForm_2<59, 18,
(outs F4RC:$FRT), (ins F4RC:$FRA, F4RC:$FRB),
"fdivs $FRT, $FRA, $FRB", FPDivS,
- [(set F4RC:$FRT, (fdiv F4RC:$FRA, F4RC:$FRB))]>;
+ [(set f32:$FRT, (fdiv f32:$FRA, f32:$FRB))]>;
def FMUL : AForm_3<63, 25,
(outs F8RC:$FRT), (ins F8RC:$FRA, F8RC:$FRC),
"fmul $FRT, $FRA, $FRC", FPFused,
- [(set F8RC:$FRT, (fmul F8RC:$FRA, F8RC:$FRC))]>;
+ [(set f64:$FRT, (fmul f64:$FRA, f64:$FRC))]>;
def FMULS : AForm_3<59, 25,
(outs F4RC:$FRT), (ins F4RC:$FRA, F4RC:$FRC),
"fmuls $FRT, $FRA, $FRC", FPGeneral,
- [(set F4RC:$FRT, (fmul F4RC:$FRA, F4RC:$FRC))]>;
+ [(set f32:$FRT, (fmul f32:$FRA, f32:$FRC))]>;
def FSUB : AForm_2<63, 20,
(outs F8RC:$FRT), (ins F8RC:$FRA, F8RC:$FRB),
"fsub $FRT, $FRA, $FRB", FPAddSub,
- [(set F8RC:$FRT, (fsub F8RC:$FRA, F8RC:$FRB))]>;
+ [(set f64:$FRT, (fsub f64:$FRA, f64:$FRB))]>;
def FSUBS : AForm_2<59, 20,
(outs F4RC:$FRT), (ins F4RC:$FRA, F4RC:$FRB),
"fsubs $FRT, $FRA, $FRB", FPGeneral,
- [(set F4RC:$FRT, (fsub F4RC:$FRA, F4RC:$FRB))]>;
+ [(set f32:$FRT, (fsub f32:$FRA, f32:$FRB))]>;
}
}
let PPC970_Unit = 1 in { // FXU Operations.
def ISEL : AForm_4<31, 15,
- (outs GPRC:$rT), (ins GPRC:$rA, GPRC:$rB, pred:$cond),
+ (outs GPRC:$rT), (ins GPRC_NOR0:$rA, GPRC:$rB, CRBITRC:$cond),
"isel $rT, $rA, $rB, $cond", IntGeneral,
[]>;
}
@@ -1455,47 +1587,43 @@ def : Pat<(i32 imm:$imm),
(ORI (LIS (HI16 imm:$imm)), (LO16 imm:$imm))>;
// Implement the 'not' operation with the NOR instruction.
-def NOT : Pat<(not GPRC:$in),
- (NOR GPRC:$in, GPRC:$in)>;
+def NOT : Pat<(not i32:$in),
+ (NOR $in, $in)>;
// ADD an arbitrary immediate.
-def : Pat<(add GPRC:$in, imm:$imm),
- (ADDIS (ADDI GPRC:$in, (LO16 imm:$imm)), (HA16 imm:$imm))>;
+def : Pat<(add i32:$in, imm:$imm),
+ (ADDIS (ADDI $in, (LO16 imm:$imm)), (HA16 imm:$imm))>;
// OR an arbitrary immediate.
-def : Pat<(or GPRC:$in, imm:$imm),
- (ORIS (ORI GPRC:$in, (LO16 imm:$imm)), (HI16 imm:$imm))>;
+def : Pat<(or i32:$in, imm:$imm),
+ (ORIS (ORI $in, (LO16 imm:$imm)), (HI16 imm:$imm))>;
// XOR an arbitrary immediate.
-def : Pat<(xor GPRC:$in, imm:$imm),
- (XORIS (XORI GPRC:$in, (LO16 imm:$imm)), (HI16 imm:$imm))>;
+def : Pat<(xor i32:$in, imm:$imm),
+ (XORIS (XORI $in, (LO16 imm:$imm)), (HI16 imm:$imm))>;
// SUBFIC
-def : Pat<(sub immSExt16:$imm, GPRC:$in),
- (SUBFIC GPRC:$in, imm:$imm)>;
+def : Pat<(sub immSExt16:$imm, i32:$in),
+ (SUBFIC $in, imm:$imm)>;
// SHL/SRL
-def : Pat<(shl GPRC:$in, (i32 imm:$imm)),
- (RLWINM GPRC:$in, imm:$imm, 0, (SHL32 imm:$imm))>;
-def : Pat<(srl GPRC:$in, (i32 imm:$imm)),
- (RLWINM GPRC:$in, (SRL32 imm:$imm), imm:$imm, 31)>;
+def : Pat<(shl i32:$in, (i32 imm:$imm)),
+ (RLWINM $in, imm:$imm, 0, (SHL32 imm:$imm))>;
+def : Pat<(srl i32:$in, (i32 imm:$imm)),
+ (RLWINM $in, (SRL32 imm:$imm), imm:$imm, 31)>;
// ROTL
-def : Pat<(rotl GPRC:$in, GPRC:$sh),
- (RLWNM GPRC:$in, GPRC:$sh, 0, 31)>;
-def : Pat<(rotl GPRC:$in, (i32 imm:$imm)),
- (RLWINM GPRC:$in, imm:$imm, 0, 31)>;
+def : Pat<(rotl i32:$in, i32:$sh),
+ (RLWNM $in, $sh, 0, 31)>;
+def : Pat<(rotl i32:$in, (i32 imm:$imm)),
+ (RLWINM $in, imm:$imm, 0, 31)>;
// RLWNM
-def : Pat<(and (rotl GPRC:$in, GPRC:$sh), maskimm32:$imm),
- (RLWNM GPRC:$in, GPRC:$sh, (MB maskimm32:$imm), (ME maskimm32:$imm))>;
+def : Pat<(and (rotl i32:$in, i32:$sh), maskimm32:$imm),
+ (RLWNM $in, $sh, (MB maskimm32:$imm), (ME maskimm32:$imm))>;
// Calls
-def : Pat<(PPCcall_Darwin (i32 tglobaladdr:$dst)),
- (BL_Darwin tglobaladdr:$dst)>;
-def : Pat<(PPCcall_Darwin (i32 texternalsym:$dst)),
- (BL_Darwin texternalsym:$dst)>;
-def : Pat<(PPCcall_SVR4 (i32 tglobaladdr:$dst)),
- (BL_SVR4 tglobaladdr:$dst)>;
-def : Pat<(PPCcall_SVR4 (i32 texternalsym:$dst)),
- (BL_SVR4 texternalsym:$dst)>;
+def : Pat<(PPCcall (i32 tglobaladdr:$dst)),
+ (BL tglobaladdr:$dst)>;
+def : Pat<(PPCcall (i32 texternalsym:$dst)),
+ (BL texternalsym:$dst)>;
def : Pat<(PPCtc_return (i32 tglobaladdr:$dst), imm:$imm),
@@ -1518,28 +1646,28 @@ def : Pat<(PPChi tjumptable:$in, 0), (LIS tjumptable:$in)>;
def : Pat<(PPClo tjumptable:$in, 0), (LI tjumptable:$in)>;
def : Pat<(PPChi tblockaddress:$in, 0), (LIS tblockaddress:$in)>;
def : Pat<(PPClo tblockaddress:$in, 0), (LI tblockaddress:$in)>;
-def : Pat<(PPChi tglobaltlsaddr:$g, GPRC:$in),
- (ADDIS GPRC:$in, tglobaltlsaddr:$g)>;
-def : Pat<(PPClo tglobaltlsaddr:$g, GPRC:$in),
- (ADDIL GPRC:$in, tglobaltlsaddr:$g)>;
-def : Pat<(add GPRC:$in, (PPChi tglobaladdr:$g, 0)),
- (ADDIS GPRC:$in, tglobaladdr:$g)>;
-def : Pat<(add GPRC:$in, (PPChi tconstpool:$g, 0)),
- (ADDIS GPRC:$in, tconstpool:$g)>;
-def : Pat<(add GPRC:$in, (PPChi tjumptable:$g, 0)),
- (ADDIS GPRC:$in, tjumptable:$g)>;
-def : Pat<(add GPRC:$in, (PPChi tblockaddress:$g, 0)),
- (ADDIS GPRC:$in, tblockaddress:$g)>;
+def : Pat<(PPChi tglobaltlsaddr:$g, i32:$in),
+ (ADDIS $in, tglobaltlsaddr:$g)>;
+def : Pat<(PPClo tglobaltlsaddr:$g, i32:$in),
+ (ADDI $in, tglobaltlsaddr:$g)>;
+def : Pat<(add i32:$in, (PPChi tglobaladdr:$g, 0)),
+ (ADDIS $in, tglobaladdr:$g)>;
+def : Pat<(add i32:$in, (PPChi tconstpool:$g, 0)),
+ (ADDIS $in, tconstpool:$g)>;
+def : Pat<(add i32:$in, (PPChi tjumptable:$g, 0)),
+ (ADDIS $in, tjumptable:$g)>;
+def : Pat<(add i32:$in, (PPChi tblockaddress:$g, 0)),
+ (ADDIS $in, tblockaddress:$g)>;
// Standard shifts. These are represented separately from the real shifts above
// so that we can distinguish between shifts that allow 5-bit and 6-bit shift
// amounts.
-def : Pat<(sra GPRC:$rS, GPRC:$rB),
- (SRAW GPRC:$rS, GPRC:$rB)>;
-def : Pat<(srl GPRC:$rS, GPRC:$rB),
- (SRW GPRC:$rS, GPRC:$rB)>;
-def : Pat<(shl GPRC:$rS, GPRC:$rB),
- (SLW GPRC:$rS, GPRC:$rB)>;
+def : Pat<(sra i32:$rS, i32:$rB),
+ (SRAW $rS, $rB)>;
+def : Pat<(srl i32:$rS, i32:$rB),
+ (SRW $rS, $rB)>;
+def : Pat<(shl i32:$rS, i32:$rB),
+ (SLW $rS, $rB)>;
def : Pat<(zextloadi1 iaddr:$src),
(LBZ iaddr:$src)>;
@@ -1562,8 +1690,8 @@ def : Pat<(f64 (extloadf32 iaddr:$src)),
def : Pat<(f64 (extloadf32 xaddr:$src)),
(COPY_TO_REGCLASS (LFSX xaddr:$src), F8RC)>;
-def : Pat<(f64 (fextend F4RC:$src)),
- (COPY_TO_REGCLASS F4RC:$src, F8RC)>;
+def : Pat<(f64 (fextend f32:$src)),
+ (COPY_TO_REGCLASS $src, F8RC)>;
// Memory barriers
def : Pat<(membarrier (i32 imm /*ll*/),
@@ -1575,5 +1703,15 @@ def : Pat<(membarrier (i32 imm /*ll*/),
def : Pat<(atomic_fence (imm), (imm)), (SYNC)>;
+// Additional FNMSUB patterns: -a*c + b == -(a*c - b)
+def : Pat<(fma (fneg f64:$A), f64:$C, f64:$B),
+ (FNMSUB $A, $C, $B)>;
+def : Pat<(fma f64:$A, (fneg f64:$C), f64:$B),
+ (FNMSUB $A, $C, $B)>;
+def : Pat<(fma (fneg f32:$A), f32:$C, f32:$B),
+ (FNMSUBS $A, $C, $B)>;
+def : Pat<(fma f32:$A, (fneg f32:$C), f32:$B),
+ (FNMSUBS $A, $C, $B)>;
+
include "PPCInstrAltivec.td"
include "PPCInstr64Bit.td"
diff --git a/lib/Target/PowerPC/PPCJITInfo.cpp b/lib/Target/PowerPC/PPCJITInfo.cpp
index aba27399d6da..cfcd7490ed0d 100644
--- a/lib/Target/PowerPC/PPCJITInfo.cpp
+++ b/lib/Target/PowerPC/PPCJITInfo.cpp
@@ -15,10 +15,10 @@
#include "PPCJITInfo.h"
#include "PPCRelocations.h"
#include "PPCTargetMachine.h"
-#include "llvm/Function.h"
-#include "llvm/Support/Memory.h"
+#include "llvm/IR/Function.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/Memory.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
@@ -115,7 +115,7 @@ asm(
"lwz r2, 208(r1)\n" // stub's frame
"lwz r4, 8(r2)\n" // stub's lr
"li r5, 0\n" // 0 == 32 bit
- "bl _PPCCompilationCallbackC\n"
+ "bl _LLVMPPCCompilationCallback\n"
"mtctr r3\n"
// Restore all int arg registers
"lwz r10, 204(r1)\n" "lwz r9, 200(r1)\n"
@@ -178,7 +178,7 @@ asm(
"lwz 5, 104(1)\n" // stub's frame
"lwz 4, 4(5)\n" // stub's lr
"li 5, 0\n" // 0 == 32 bit
- "bl PPCCompilationCallbackC\n"
+ "bl LLVMPPCCompilationCallback\n"
"mtctr 3\n"
// Restore all int arg registers
"lwz 10, 100(1)\n" "lwz 9, 96(1)\n"
@@ -259,10 +259,10 @@ asm(
"ld 4, 16(5)\n" // stub's lr
"li 5, 1\n" // 1 == 64 bit
#ifdef __ELF__
- "bl PPCCompilationCallbackC\n"
+ "bl LLVMPPCCompilationCallback\n"
"nop\n"
#else
- "bl _PPCCompilationCallbackC\n"
+ "bl _LLVMPPCCompilationCallback\n"
#endif
"mtctr 3\n"
// Restore all int arg registers
@@ -292,9 +292,10 @@ void PPC64CompilationCallback() {
#endif
extern "C" {
-static void* LLVM_ATTRIBUTE_USED PPCCompilationCallbackC(unsigned *StubCallAddrPlus4,
- unsigned *OrigCallAddrPlus4,
- bool is64Bit) {
+LLVM_LIBRARY_VISIBILITY void *
+LLVMPPCCompilationCallback(unsigned *StubCallAddrPlus4,
+ unsigned *OrigCallAddrPlus4,
+ bool is64Bit) {
// Adjust the pointer to the address of the call instruction in the stub
// emitted by emitFunctionStub, rather than the instruction after it.
unsigned *StubCallAddr = StubCallAddrPlus4 - 1;
diff --git a/lib/Target/PowerPC/PPCJITInfo.h b/lib/Target/PowerPC/PPCJITInfo.h
index 2f8243a597e6..46d4a08eb687 100644
--- a/lib/Target/PowerPC/PPCJITInfo.h
+++ b/lib/Target/PowerPC/PPCJITInfo.h
@@ -14,8 +14,8 @@
#ifndef POWERPC_JITINFO_H
#define POWERPC_JITINFO_H
-#include "llvm/Target/TargetJITInfo.h"
#include "llvm/CodeGen/JITCodeEmitter.h"
+#include "llvm/Target/TargetJITInfo.h"
namespace llvm {
class PPCTargetMachine;
diff --git a/lib/Target/PowerPC/PPCMCInstLower.cpp b/lib/Target/PowerPC/PPCMCInstLower.cpp
index 19ec993ba00f..9b0df3e86a75 100644
--- a/lib/Target/PowerPC/PPCMCInstLower.cpp
+++ b/lib/Target/PowerPC/PPCMCInstLower.cpp
@@ -13,14 +13,15 @@
//===----------------------------------------------------------------------===//
#include "PPC.h"
+#include "llvm/ADT/SmallString.h"
#include "llvm/CodeGen/AsmPrinter.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineModuleInfoImpls.h"
+#include "llvm/IR/GlobalValue.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCInst.h"
#include "llvm/Target/Mangler.h"
-#include "llvm/ADT/SmallString.h"
using namespace llvm;
static MachineModuleInfoMachO &getMachOMMI(AsmPrinter &AP) {
@@ -114,6 +115,12 @@ static MCOperand GetSymbolRef(const MachineOperand &MO, const MCSymbol *Symbol,
break;
case PPCII::MO_TPREL16_LO: RefKind = MCSymbolRefExpr::VK_PPC_TPREL16_LO;
break;
+ case PPCII::MO_DTPREL16_LO: RefKind = MCSymbolRefExpr::VK_PPC_DTPREL16_LO;
+ break;
+ case PPCII::MO_TLSLD16_LO: RefKind = MCSymbolRefExpr::VK_PPC_GOT_TLSLD16_LO;
+ break;
+ case PPCII::MO_TOC16_LO: RefKind = MCSymbolRefExpr::VK_PPC_TOC16_LO;
+ break;
}
// FIXME: This isn't right, but we don't have a good way to express this in
diff --git a/lib/Target/PowerPC/PPCMachineFunctionInfo.h b/lib/Target/PowerPC/PPCMachineFunctionInfo.h
index 24caffa3f0f2..ee18eadf6e5f 100644
--- a/lib/Target/PowerPC/PPCMachineFunctionInfo.h
+++ b/lib/Target/PowerPC/PPCMachineFunctionInfo.h
@@ -37,9 +37,19 @@ class PPCFunctionInfo : public MachineFunctionInfo {
/// PEI.
bool MustSaveLR;
+ /// Does this function have any stack spills.
+ bool HasSpills;
+
+ /// Does this function spill using instructions with only r+r (not r+i)
+ /// forms.
+ bool HasNonRISpills;
+
/// SpillsCR - Indicates whether CR is spilled in the current function.
bool SpillsCR;
+ /// Indicates whether VRSAVE is spilled in the current function.
+ bool SpillsVRSAVE;
+
/// LRStoreRequired - The bool indicates whether there is some explicit use of
/// the LR/LR8 stack slot that is not obvious from scanning the code. This
/// requires that the code generator produce a store of LR to the stack on
@@ -71,11 +81,17 @@ class PPCFunctionInfo : public MachineFunctionInfo {
/// register for parameter passing.
unsigned VarArgsNumFPR;
+ /// CRSpillFrameIndex - FrameIndex for CR spill slot for 32-bit SVR4.
+ int CRSpillFrameIndex;
+
public:
explicit PPCFunctionInfo(MachineFunction &MF)
: FramePointerSaveIndex(0),
ReturnAddrSaveIndex(0),
+ HasSpills(false),
+ HasNonRISpills(false),
SpillsCR(false),
+ SpillsVRSAVE(false),
LRStoreRequired(false),
MinReservedArea(0),
TailCallSPDelta(0),
@@ -83,7 +99,8 @@ public:
VarArgsFrameIndex(0),
VarArgsStackOffset(0),
VarArgsNumGPR(0),
- VarArgsNumFPR(0) {}
+ VarArgsNumFPR(0),
+ CRSpillFrameIndex(0) {}
int getFramePointerSaveIndex() const { return FramePointerSaveIndex; }
void setFramePointerSaveIndex(int Idx) { FramePointerSaveIndex = Idx; }
@@ -105,9 +122,18 @@ public:
void setMustSaveLR(bool U) { MustSaveLR = U; }
bool mustSaveLR() const { return MustSaveLR; }
+ void setHasSpills() { HasSpills = true; }
+ bool hasSpills() const { return HasSpills; }
+
+ void setHasNonRISpills() { HasNonRISpills = true; }
+ bool hasNonRISpills() const { return HasNonRISpills; }
+
void setSpillsCR() { SpillsCR = true; }
bool isCRSpilled() const { return SpillsCR; }
+ void setSpillsVRSAVE() { SpillsVRSAVE = true; }
+ bool isVRSAVESpilled() const { return SpillsVRSAVE; }
+
void setLRStoreRequired() { LRStoreRequired = true; }
bool isLRStoreRequired() const { return LRStoreRequired; }
@@ -125,6 +151,9 @@ public:
unsigned getVarArgsNumFPR() const { return VarArgsNumFPR; }
void setVarArgsNumFPR(unsigned Num) { VarArgsNumFPR = Num; }
+
+ int getCRSpillFrameIndex() const { return CRSpillFrameIndex; }
+ void setCRSpillFrameIndex(int idx) { CRSpillFrameIndex = idx; }
};
} // end of namespace llvm
diff --git a/lib/Target/PowerPC/PPCRegisterInfo.cpp b/lib/Target/PowerPC/PPCRegisterInfo.cpp
index 459c3589d3f6..1d61a3a8eac2 100644
--- a/lib/Target/PowerPC/PPCRegisterInfo.cpp
+++ b/lib/Target/PowerPC/PPCRegisterInfo.cpp
@@ -15,63 +15,45 @@
#define DEBUG_TYPE "reginfo"
#include "PPCRegisterInfo.h"
#include "PPC.h"
+#include "PPCFrameLowering.h"
#include "PPCInstrBuilder.h"
#include "PPCMachineFunctionInfo.h"
-#include "PPCFrameLowering.h"
#include "PPCSubtarget.h"
-#include "llvm/CallingConv.h"
-#include "llvm/Constants.h"
-#include "llvm/Function.h"
-#include "llvm/Type.h"
-#include "llvm/CodeGen/ValueTypes.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
-#include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/RegisterScavenging.h"
-#include "llvm/Target/TargetFrameLowering.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetOptions.h"
+#include "llvm/CodeGen/ValueTypes.h"
+#include "llvm/IR/CallingConv.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Type.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/ADT/BitVector.h"
-#include "llvm/ADT/STLExtras.h"
+#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
#include <cstdlib>
#define GET_REGINFO_TARGET_DESC
#include "PPCGenRegisterInfo.inc"
-namespace llvm {
-cl::opt<bool> DisablePPC32RS("disable-ppc32-regscavenger",
- cl::init(false),
- cl::desc("Disable PPC32 register scavenger"),
- cl::Hidden);
-cl::opt<bool> DisablePPC64RS("disable-ppc64-regscavenger",
- cl::init(false),
- cl::desc("Disable PPC64 register scavenger"),
- cl::Hidden);
-}
-
using namespace llvm;
-// FIXME (64-bit): Should be inlined.
-bool
-PPCRegisterInfo::requiresRegisterScavenging(const MachineFunction &) const {
- return ((!DisablePPC32RS && !Subtarget.isPPC64()) ||
- (!DisablePPC64RS && Subtarget.isPPC64()));
-}
-
PPCRegisterInfo::PPCRegisterInfo(const PPCSubtarget &ST,
const TargetInstrInfo &tii)
: PPCGenRegisterInfo(ST.isPPC64() ? PPC::LR8 : PPC::LR,
ST.isPPC64() ? 0 : 1,
ST.isPPC64() ? 0 : 1),
- Subtarget(ST), TII(tii), CRSpillFrameIdx(0) {
+ Subtarget(ST), TII(tii) {
ImmToIdxMap[PPC::LD] = PPC::LDX; ImmToIdxMap[PPC::STD] = PPC::STDX;
ImmToIdxMap[PPC::LBZ] = PPC::LBZX; ImmToIdxMap[PPC::STB] = PPC::STBX;
ImmToIdxMap[PPC::LHZ] = PPC::LHZX; ImmToIdxMap[PPC::LHA] = PPC::LHAX;
@@ -86,20 +68,20 @@ PPCRegisterInfo::PPCRegisterInfo(const PPCSubtarget &ST,
ImmToIdxMap[PPC::LHZ8] = PPC::LHZX8; ImmToIdxMap[PPC::LWZ8] = PPC::LWZX8;
ImmToIdxMap[PPC::STB8] = PPC::STBX8; ImmToIdxMap[PPC::STH8] = PPC::STHX8;
ImmToIdxMap[PPC::STW8] = PPC::STWX8; ImmToIdxMap[PPC::STDU] = PPC::STDUX;
- ImmToIdxMap[PPC::ADDI8] = PPC::ADD8; ImmToIdxMap[PPC::STD_32] = PPC::STDX_32;
+ ImmToIdxMap[PPC::ADDI8] = PPC::ADD8;
}
-bool
-PPCRegisterInfo::trackLivenessAfterRegAlloc(const MachineFunction &MF) const {
- return requiresRegisterScavenging(MF);
-}
-
-
/// getPointerRegClass - Return the register class to use to hold pointers.
/// This is used for addressing modes.
const TargetRegisterClass *
PPCRegisterInfo::getPointerRegClass(const MachineFunction &MF, unsigned Kind)
const {
+ if (Kind == 1) {
+ if (Subtarget.isPPC64())
+ return &PPC::G8RC_NOX0RegClass;
+ return &PPC::GPRC_NOR0RegClass;
+ }
+
if (Subtarget.isPPC64())
return &PPC::G8RCRegClass;
return &PPC::GPRCRegClass;
@@ -111,11 +93,6 @@ PPCRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
return Subtarget.isPPC64() ? CSR_Darwin64_SaveList :
CSR_Darwin32_SaveList;
- // For 32-bit SVR4, also initialize the frame index associated with
- // the CR spill slot.
- if (!Subtarget.isPPC64())
- CRSpillFrameIdx = 0;
-
return Subtarget.isPPC64() ? CSR_SVR464_SaveList : CSR_SVR432_SaveList;
}
@@ -128,12 +105,35 @@ PPCRegisterInfo::getCallPreservedMask(CallingConv::ID CC) const {
return Subtarget.isPPC64() ? CSR_SVR464_RegMask : CSR_SVR432_RegMask;
}
+const uint32_t*
+PPCRegisterInfo::getNoPreservedMask() const {
+ // The naming here is inverted: The CSR_NoRegs_Altivec has the
+ // Altivec registers masked so that they're not saved and restored around
+ // instructions with this preserved mask.
+
+ if (!Subtarget.hasAltivec())
+ return CSR_NoRegs_Altivec_RegMask;
+
+ if (Subtarget.isDarwin())
+ return CSR_NoRegs_Darwin_RegMask;
+ return CSR_NoRegs_RegMask;
+}
+
BitVector PPCRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
BitVector Reserved(getNumRegs());
const PPCFrameLowering *PPCFI =
static_cast<const PPCFrameLowering*>(MF.getTarget().getFrameLowering());
- Reserved.set(PPC::R0);
+ // The ZERO register is not really a register, but the representation of r0
+ // when used in instructions that treat r0 as the constant 0.
+ Reserved.set(PPC::ZERO);
+ Reserved.set(PPC::ZERO8);
+
+ // The FP register is also not really a register, but is the representation
+ // of the frame pointer register used by ISD::FRAMEADDR.
+ Reserved.set(PPC::FP);
+ Reserved.set(PPC::FP8);
+
Reserved.set(PPC::R1);
Reserved.set(PPC::LR);
Reserved.set(PPC::LR8);
@@ -144,35 +144,21 @@ BitVector PPCRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
Reserved.set(PPC::R2); // System-reserved register
Reserved.set(PPC::R13); // Small Data Area pointer register
}
- // Reserve R2 on Darwin to hack around the problem of save/restore of CR
- // when the stack frame is too big to address directly; we need two regs.
- // This is a hack.
- if (Subtarget.isDarwinABI()) {
- Reserved.set(PPC::R2);
- }
// On PPC64, r13 is the thread pointer. Never allocate this register.
- // Note that this is over conservative, as it also prevents allocation of R31
- // when the FP is not needed.
if (Subtarget.isPPC64()) {
Reserved.set(PPC::R13);
- Reserved.set(PPC::R31);
- Reserved.set(PPC::X0);
Reserved.set(PPC::X1);
Reserved.set(PPC::X13);
- Reserved.set(PPC::X31);
+
+ if (PPCFI->needsFP(MF))
+ Reserved.set(PPC::X31);
// The 64-bit SVR4 ABI reserves r2 for the TOC pointer.
if (Subtarget.isSVR4ABI()) {
Reserved.set(PPC::X2);
}
- // Reserve X2 on Darwin to hack around the problem of save/restore of CR
- // when the stack frame is too big to address directly; we need two regs.
- // This is a hack.
- if (Subtarget.isDarwinABI()) {
- Reserved.set(PPC::X2);
- }
}
if (PPCFI->needsFP(MF))
@@ -190,6 +176,8 @@ PPCRegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC,
switch (RC->getID()) {
default:
return 0;
+ case PPC::G8RC_NOX0RegClassID:
+ case PPC::GPRC_NOR0RegClassID:
case PPC::G8RCRegClassID:
case PPC::GPRCRegClassID: {
unsigned FP = TFI->hasFP(MF) ? 1 : 0;
@@ -204,77 +192,10 @@ PPCRegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC,
}
}
-bool
-PPCRegisterInfo::avoidWriteAfterWrite(const TargetRegisterClass *RC) const {
- switch (RC->getID()) {
- case PPC::G8RCRegClassID:
- case PPC::GPRCRegClassID:
- case PPC::F8RCRegClassID:
- case PPC::F4RCRegClassID:
- case PPC::VRRCRegClassID:
- return true;
- default:
- return false;
- }
-}
-
//===----------------------------------------------------------------------===//
// Stack Frame Processing methods
//===----------------------------------------------------------------------===//
-void PPCRegisterInfo::
-eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
- MachineBasicBlock::iterator I) const {
- if (MF.getTarget().Options.GuaranteedTailCallOpt &&
- I->getOpcode() == PPC::ADJCALLSTACKUP) {
- // Add (actually subtract) back the amount the callee popped on return.
- if (int CalleeAmt = I->getOperand(1).getImm()) {
- bool is64Bit = Subtarget.isPPC64();
- CalleeAmt *= -1;
- unsigned StackReg = is64Bit ? PPC::X1 : PPC::R1;
- unsigned TmpReg = is64Bit ? PPC::X0 : PPC::R0;
- unsigned ADDIInstr = is64Bit ? PPC::ADDI8 : PPC::ADDI;
- unsigned ADDInstr = is64Bit ? PPC::ADD8 : PPC::ADD4;
- unsigned LISInstr = is64Bit ? PPC::LIS8 : PPC::LIS;
- unsigned ORIInstr = is64Bit ? PPC::ORI8 : PPC::ORI;
- MachineInstr *MI = I;
- DebugLoc dl = MI->getDebugLoc();
-
- if (isInt<16>(CalleeAmt)) {
- BuildMI(MBB, I, dl, TII.get(ADDIInstr), StackReg)
- .addReg(StackReg, RegState::Kill)
- .addImm(CalleeAmt);
- } else {
- MachineBasicBlock::iterator MBBI = I;
- BuildMI(MBB, MBBI, dl, TII.get(LISInstr), TmpReg)
- .addImm(CalleeAmt >> 16);
- BuildMI(MBB, MBBI, dl, TII.get(ORIInstr), TmpReg)
- .addReg(TmpReg, RegState::Kill)
- .addImm(CalleeAmt & 0xFFFF);
- BuildMI(MBB, MBBI, dl, TII.get(ADDInstr), StackReg)
- .addReg(StackReg, RegState::Kill)
- .addReg(TmpReg);
- }
- }
- }
- // Simply discard ADJCALLSTACKDOWN, ADJCALLSTACKUP instructions.
- MBB.erase(I);
-}
-
-/// findScratchRegister - Find a 'free' PPC register. Try for a call-clobbered
-/// register first and then a spilled callee-saved register if that fails.
-static
-unsigned findScratchRegister(MachineBasicBlock::iterator II, RegScavenger *RS,
- const TargetRegisterClass *RC, int SPAdj) {
- assert(RS && "Register scavenging must be on");
- unsigned Reg = RS->FindUnusedReg(RC);
- // FIXME: move ARM callee-saved reg scan to target independent code, then
- // search for already spilled CS register here.
- if (Reg == 0)
- Reg = RS->scavengeRegister(RC, II, SPAdj);
- return Reg;
-}
-
/// lowerDynamicAlloc - Generate the code for allocating an object in the
/// current frame. The sequence of code with be in the general form
///
@@ -282,8 +203,7 @@ unsigned findScratchRegister(MachineBasicBlock::iterator II, RegScavenger *RS,
/// stwxu R0, SP, Rnegsize ; add and update the SP with the negated size
/// addi Rnew, SP, \#maxCalFrameSize ; get the top of the allocation
///
-void PPCRegisterInfo::lowerDynamicAlloc(MachineBasicBlock::iterator II,
- int SPAdj, RegScavenger *RS) const {
+void PPCRegisterInfo::lowerDynamicAlloc(MachineBasicBlock::iterator II) const {
// Get the instruction.
MachineInstr &MI = *II;
// Get the instruction's basic block.
@@ -315,28 +235,16 @@ void PPCRegisterInfo::lowerDynamicAlloc(MachineBasicBlock::iterator II,
// Fortunately, a frame greater than 32K is rare.
const TargetRegisterClass *G8RC = &PPC::G8RCRegClass;
const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
- const TargetRegisterClass *RC = LP64 ? G8RC : GPRC;
-
- // FIXME (64-bit): Use "findScratchRegister"
- unsigned Reg;
- if (requiresRegisterScavenging(MF))
- Reg = findScratchRegister(II, RS, RC, SPAdj);
- else
- Reg = PPC::R0;
+ unsigned Reg = MF.getRegInfo().createVirtualRegister(LP64 ? G8RC : GPRC);
if (MaxAlign < TargetAlign && isInt<16>(FrameSize)) {
BuildMI(MBB, II, dl, TII.get(PPC::ADDI), Reg)
.addReg(PPC::R31)
.addImm(FrameSize);
} else if (LP64) {
- if (requiresRegisterScavenging(MF)) // FIXME (64-bit): Use "true" part.
- BuildMI(MBB, II, dl, TII.get(PPC::LD), Reg)
- .addImm(0)
- .addReg(PPC::X1);
- else
- BuildMI(MBB, II, dl, TII.get(PPC::LD), PPC::X0)
- .addImm(0)
- .addReg(PPC::X1);
+ BuildMI(MBB, II, dl, TII.get(PPC::LD), Reg)
+ .addImm(0)
+ .addReg(PPC::X1);
} else {
BuildMI(MBB, II, dl, TII.get(PPC::LWZ), Reg)
.addImm(0)
@@ -346,17 +254,10 @@ void PPCRegisterInfo::lowerDynamicAlloc(MachineBasicBlock::iterator II,
// Grow the stack and update the stack pointer link, then determine the
// address of new allocated space.
if (LP64) {
- if (requiresRegisterScavenging(MF)) // FIXME (64-bit): Use "true" part.
- BuildMI(MBB, II, dl, TII.get(PPC::STDUX), PPC::X1)
- .addReg(Reg, RegState::Kill)
- .addReg(PPC::X1)
- .addReg(MI.getOperand(1).getReg());
- else
- BuildMI(MBB, II, dl, TII.get(PPC::STDUX), PPC::X1)
- .addReg(PPC::X0, RegState::Kill)
- .addReg(PPC::X1)
- .addReg(MI.getOperand(1).getReg());
-
+ BuildMI(MBB, II, dl, TII.get(PPC::STDUX), PPC::X1)
+ .addReg(Reg, RegState::Kill)
+ .addReg(PPC::X1)
+ .addReg(MI.getOperand(1).getReg());
if (!MI.getOperand(1).isKill())
BuildMI(MBB, II, dl, TII.get(PPC::ADDI8), MI.getOperand(0).getReg())
.addReg(PPC::X1)
@@ -398,23 +299,19 @@ void PPCRegisterInfo::lowerDynamicAlloc(MachineBasicBlock::iterator II,
/// stw rA, FI ; Store rA to the frame.
///
void PPCRegisterInfo::lowerCRSpilling(MachineBasicBlock::iterator II,
- unsigned FrameIndex, int SPAdj,
- RegScavenger *RS) const {
+ unsigned FrameIndex) const {
// Get the instruction.
MachineInstr &MI = *II; // ; SPILL_CR <SrcReg>, <offset>
// Get the instruction's basic block.
MachineBasicBlock &MBB = *MI.getParent();
+ MachineFunction &MF = *MBB.getParent();
DebugLoc dl = MI.getDebugLoc();
- // FIXME: Once LLVM supports creating virtual registers here, or the register
- // scavenger can return multiple registers, stop using reserved registers
- // here.
- (void) SPAdj;
- (void) RS;
-
bool LP64 = Subtarget.isPPC64();
- unsigned Reg = Subtarget.isDarwinABI() ? (LP64 ? PPC::X2 : PPC::R2) :
- (LP64 ? PPC::X0 : PPC::R0);
+ const TargetRegisterClass *G8RC = &PPC::G8RCRegClass;
+ const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
+
+ unsigned Reg = MF.getRegInfo().createVirtualRegister(LP64 ? G8RC : GPRC);
unsigned SrcReg = MI.getOperand(0).getReg();
// We need to store the CR in the low 4-bits of the saved value. First, issue
@@ -424,16 +321,20 @@ void PPCRegisterInfo::lowerCRSpilling(MachineBasicBlock::iterator II,
// If the saved register wasn't CR0, shift the bits left so that they are in
// CR0's slot.
- if (SrcReg != PPC::CR0)
+ if (SrcReg != PPC::CR0) {
+ unsigned Reg1 = Reg;
+ Reg = MF.getRegInfo().createVirtualRegister(LP64 ? G8RC : GPRC);
+
// rlwinm rA, rA, ShiftBits, 0, 31.
BuildMI(MBB, II, dl, TII.get(LP64 ? PPC::RLWINM8 : PPC::RLWINM), Reg)
- .addReg(Reg, RegState::Kill)
- .addImm(getPPCRegisterNumbering(SrcReg) * 4)
+ .addReg(Reg1, RegState::Kill)
+ .addImm(getEncodingValue(SrcReg) * 4)
.addImm(0)
.addImm(31);
+ }
addFrameReference(BuildMI(MBB, II, dl, TII.get(LP64 ? PPC::STW8 : PPC::STW))
- .addReg(Reg, getKillRegState(MI.getOperand(1).getImm())),
+ .addReg(Reg, RegState::Kill),
FrameIndex);
// Discard the pseudo instruction.
@@ -441,23 +342,19 @@ void PPCRegisterInfo::lowerCRSpilling(MachineBasicBlock::iterator II,
}
void PPCRegisterInfo::lowerCRRestore(MachineBasicBlock::iterator II,
- unsigned FrameIndex, int SPAdj,
- RegScavenger *RS) const {
+ unsigned FrameIndex) const {
// Get the instruction.
MachineInstr &MI = *II; // ; <DestReg> = RESTORE_CR <offset>
// Get the instruction's basic block.
MachineBasicBlock &MBB = *MI.getParent();
+ MachineFunction &MF = *MBB.getParent();
DebugLoc dl = MI.getDebugLoc();
- // FIXME: Once LLVM supports creating virtual registers here, or the register
- // scavenger can return multiple registers, stop using reserved registers
- // here.
- (void) SPAdj;
- (void) RS;
-
bool LP64 = Subtarget.isPPC64();
- unsigned Reg = Subtarget.isDarwinABI() ? (LP64 ? PPC::X2 : PPC::R2) :
- (LP64 ? PPC::X0 : PPC::R0);
+ const TargetRegisterClass *G8RC = &PPC::G8RCRegClass;
+ const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
+
+ unsigned Reg = MF.getRegInfo().createVirtualRegister(LP64 ? G8RC : GPRC);
unsigned DestReg = MI.getOperand(0).getReg();
assert(MI.definesRegister(DestReg) &&
"RESTORE_CR does not define its destination");
@@ -468,15 +365,67 @@ void PPCRegisterInfo::lowerCRRestore(MachineBasicBlock::iterator II,
// If the reloaded register isn't CR0, shift the bits right so that they are
// in the right CR's slot.
if (DestReg != PPC::CR0) {
- unsigned ShiftBits = getPPCRegisterNumbering(DestReg)*4;
+ unsigned Reg1 = Reg;
+ Reg = MF.getRegInfo().createVirtualRegister(LP64 ? G8RC : GPRC);
+
+ unsigned ShiftBits = getEncodingValue(DestReg)*4;
// rlwinm r11, r11, 32-ShiftBits, 0, 31.
BuildMI(MBB, II, dl, TII.get(LP64 ? PPC::RLWINM8 : PPC::RLWINM), Reg)
- .addReg(Reg).addImm(32-ShiftBits).addImm(0)
+ .addReg(Reg1, RegState::Kill).addImm(32-ShiftBits).addImm(0)
.addImm(31);
}
BuildMI(MBB, II, dl, TII.get(LP64 ? PPC::MTCRF8 : PPC::MTCRF), DestReg)
- .addReg(Reg);
+ .addReg(Reg, RegState::Kill);
+
+ // Discard the pseudo instruction.
+ MBB.erase(II);
+}
+
+void PPCRegisterInfo::lowerVRSAVESpilling(MachineBasicBlock::iterator II,
+ unsigned FrameIndex) const {
+ // Get the instruction.
+ MachineInstr &MI = *II; // ; SPILL_VRSAVE <SrcReg>, <offset>
+ // Get the instruction's basic block.
+ MachineBasicBlock &MBB = *MI.getParent();
+ MachineFunction &MF = *MBB.getParent();
+ DebugLoc dl = MI.getDebugLoc();
+
+ const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
+ unsigned Reg = MF.getRegInfo().createVirtualRegister(GPRC);
+ unsigned SrcReg = MI.getOperand(0).getReg();
+
+ BuildMI(MBB, II, dl, TII.get(PPC::MFVRSAVEv), Reg)
+ .addReg(SrcReg, getKillRegState(MI.getOperand(0).isKill()));
+
+ addFrameReference(BuildMI(MBB, II, dl, TII.get(PPC::STW))
+ .addReg(Reg, RegState::Kill),
+ FrameIndex);
+
+ // Discard the pseudo instruction.
+ MBB.erase(II);
+}
+
+void PPCRegisterInfo::lowerVRSAVERestore(MachineBasicBlock::iterator II,
+ unsigned FrameIndex) const {
+ // Get the instruction.
+ MachineInstr &MI = *II; // ; <DestReg> = RESTORE_VRSAVE <offset>
+ // Get the instruction's basic block.
+ MachineBasicBlock &MBB = *MI.getParent();
+ MachineFunction &MF = *MBB.getParent();
+ DebugLoc dl = MI.getDebugLoc();
+
+ const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
+ unsigned Reg = MF.getRegInfo().createVirtualRegister(GPRC);
+ unsigned DestReg = MI.getOperand(0).getReg();
+ assert(MI.definesRegister(DestReg) &&
+ "RESTORE_VRSAVE does not define its destination");
+
+ addFrameReference(BuildMI(MBB, II, dl, TII.get(PPC::LWZ),
+ Reg), FrameIndex);
+
+ BuildMI(MBB, II, dl, TII.get(PPC::MTVRSAVEv), DestReg)
+ .addReg(Reg, RegState::Kill);
// Discard the pseudo instruction.
MBB.erase(II);
@@ -489,18 +438,14 @@ PPCRegisterInfo::hasReservedSpillSlot(const MachineFunction &MF,
// For the nonvolatile condition registers (CR2, CR3, CR4) in an SVR4
// ABI, return true to prevent allocating an additional frame slot.
// For 64-bit, the CR save area is at SP+8; the value of FrameIdx = 0
- // is arbitrary and will be subsequently ignored. For 32-bit, we must
- // create exactly one stack slot and return its FrameIdx for all
- // nonvolatiles.
+ // is arbitrary and will be subsequently ignored. For 32-bit, we have
+ // previously created the stack slot if needed, so return its FrameIdx.
if (Subtarget.isSVR4ABI() && PPC::CR2 <= Reg && Reg <= PPC::CR4) {
- if (Subtarget.isPPC64()) {
+ if (Subtarget.isPPC64())
FrameIdx = 0;
- } else if (CRSpillFrameIdx) {
- FrameIdx = CRSpillFrameIdx;
- } else {
- MachineFrameInfo *MFI = ((MachineFunction &)MF).getFrameInfo();
- FrameIdx = MFI->CreateFixedObject((uint64_t)4, (int64_t)-4, true);
- CRSpillFrameIdx = FrameIdx;
+ else {
+ const PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
+ FrameIdx = FI->getCRSpillFrameIndex();
}
return true;
}
@@ -509,7 +454,8 @@ PPCRegisterInfo::hasReservedSpillSlot(const MachineFunction &MF,
void
PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
- int SPAdj, RegScavenger *RS) const {
+ int SPAdj, unsigned FIOperandNum,
+ RegScavenger *RS) const {
assert(SPAdj == 0 && "Unexpected");
// Get the instruction.
@@ -523,20 +469,13 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
DebugLoc dl = MI.getDebugLoc();
- // Find out which operand is the frame index.
- unsigned FIOperandNo = 0;
- while (!MI.getOperand(FIOperandNo).isFI()) {
- ++FIOperandNo;
- assert(FIOperandNo != MI.getNumOperands() &&
- "Instr doesn't have FrameIndex operand!");
- }
// Take into account whether it's an add or mem instruction
- unsigned OffsetOperandNo = (FIOperandNo == 2) ? 1 : 2;
+ unsigned OffsetOperandNo = (FIOperandNum == 2) ? 1 : 2;
if (MI.isInlineAsm())
- OffsetOperandNo = FIOperandNo-1;
+ OffsetOperandNo = FIOperandNum-1;
// Get the frame index.
- int FrameIndex = MI.getOperand(FIOperandNo).getIndex();
+ int FrameIndex = MI.getOperand(FIOperandNum).getIndex();
// Get the frame pointer save index. Users of this index are primarily
// DYNALLOC instructions.
@@ -548,25 +487,29 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
// Special case for dynamic alloca.
if (FPSI && FrameIndex == FPSI &&
(OpC == PPC::DYNALLOC || OpC == PPC::DYNALLOC8)) {
- lowerDynamicAlloc(II, SPAdj, RS);
+ lowerDynamicAlloc(II);
return;
}
- // Special case for pseudo-ops SPILL_CR and RESTORE_CR.
- if (requiresRegisterScavenging(MF)) {
- if (OpC == PPC::SPILL_CR) {
- lowerCRSpilling(II, FrameIndex, SPAdj, RS);
- return;
- } else if (OpC == PPC::RESTORE_CR) {
- lowerCRRestore(II, FrameIndex, SPAdj, RS);
- return;
- }
+ // Special case for pseudo-ops SPILL_CR and RESTORE_CR, etc.
+ if (OpC == PPC::SPILL_CR) {
+ lowerCRSpilling(II, FrameIndex);
+ return;
+ } else if (OpC == PPC::RESTORE_CR) {
+ lowerCRRestore(II, FrameIndex);
+ return;
+ } else if (OpC == PPC::SPILL_VRSAVE) {
+ lowerVRSAVESpilling(II, FrameIndex);
+ return;
+ } else if (OpC == PPC::RESTORE_VRSAVE) {
+ lowerVRSAVERestore(II, FrameIndex);
+ return;
}
// Replace the FrameIndex with base register with GPR1 (SP) or GPR31 (FP).
bool is64Bit = Subtarget.isPPC64();
- MI.getOperand(FIOperandNo).ChangeToRegister(TFI->hasFP(MF) ?
+ MI.getOperand(FIOperandNum).ChangeToRegister(TFI->hasFP(MF) ?
(is64Bit ? PPC::X31 : PPC::R31) :
(is64Bit ? PPC::X1 : PPC::R1),
false);
@@ -579,11 +522,14 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
case PPC::LWA:
case PPC::LD:
case PPC::STD:
- case PPC::STD_32:
isIXAddr = true;
break;
}
-
+
+ // If the instruction is not present in ImmToIdxMap, then it has no immediate
+ // form (and must be r+r).
+ bool noImmForm = !MI.isInlineAsm() && !ImmToIdxMap.count(OpC);
+
// Now add the frame object offset to the offset from r1.
int Offset = MFI->getObjectOffset(FrameIndex);
if (!isIXAddr)
@@ -596,7 +542,8 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
// to Offset to get the correct offset.
// Naked functions have stack size 0, although getStackSize may not reflect that
// because we didn't call all the pieces that compute it for naked functions.
- if (!MF.getFunction()->getFnAttributes().hasAttribute(Attributes::Naked))
+ if (!MF.getFunction()->getAttributes().
+ hasAttribute(AttributeSet::FunctionIndex, Attribute::Naked))
Offset += MFI->getStackSize();
// If we can, encode the offset directly into the instruction. If this is a
@@ -606,7 +553,8 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
// only "std" to a stack slot that is at least 4-byte aligned, but it can
// happen in invalid code.
if (OpC == PPC::DBG_VALUE || // DBG_VALUE is always Reg+Imm
- (isInt<16>(Offset) && (!isIXAddr || (Offset & 3) == 0))) {
+ (!noImmForm &&
+ isInt<16>(Offset) && (!isIXAddr || (Offset & 3) == 0))) {
if (isIXAddr)
Offset >>= 2; // The actual encoded value has the low two bits zero.
MI.getOperand(OffsetOperandNo).ChangeToImmediate(Offset);
@@ -616,19 +564,17 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
// The offset doesn't fit into a single register, scavenge one to build the
// offset in.
- unsigned SReg;
- if (requiresRegisterScavenging(MF)) {
- const TargetRegisterClass *G8RC = &PPC::G8RCRegClass;
- const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
- SReg = findScratchRegister(II, RS, is64Bit ? G8RC : GPRC, SPAdj);
- } else
- SReg = is64Bit ? PPC::X0 : PPC::R0;
+ const TargetRegisterClass *G8RC = &PPC::G8RCRegClass;
+ const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
+ const TargetRegisterClass *RC = is64Bit ? G8RC : GPRC;
+ unsigned SRegHi = MF.getRegInfo().createVirtualRegister(RC),
+ SReg = MF.getRegInfo().createVirtualRegister(RC);
// Insert a set of rA with the full offset value before the ld, st, or add
- BuildMI(MBB, II, dl, TII.get(is64Bit ? PPC::LIS8 : PPC::LIS), SReg)
+ BuildMI(MBB, II, dl, TII.get(is64Bit ? PPC::LIS8 : PPC::LIS), SRegHi)
.addImm(Offset >> 16);
BuildMI(MBB, II, dl, TII.get(is64Bit ? PPC::ORI8 : PPC::ORI), SReg)
- .addReg(SReg, RegState::Kill)
+ .addReg(SRegHi, RegState::Kill)
.addImm(Offset);
// Convert into indexed form of the instruction:
@@ -637,7 +583,9 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
// addi 0:rA 1:rB, 2, imm ==> add 0:rA, 1:rB, 2:r0
unsigned OperandBase;
- if (OpC != TargetOpcode::INLINEASM) {
+ if (noImmForm)
+ OperandBase = 1;
+ else if (OpC != TargetOpcode::INLINEASM) {
assert(ImmToIdxMap.count(OpC) &&
"No indexed form of load or store available!");
unsigned NewOpcode = ImmToIdxMap.find(OpC)->second;
@@ -647,7 +595,7 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
OperandBase = OffsetOperandNo;
}
- unsigned StackReg = MI.getOperand(FIOperandNo).getReg();
+ unsigned StackReg = MI.getOperand(FIOperandNum).getReg();
MI.getOperand(OperandBase).ChangeToRegister(StackReg, false);
MI.getOperand(OperandBase + 1).ChangeToRegister(SReg, false, false, true);
}
diff --git a/lib/Target/PowerPC/PPCRegisterInfo.h b/lib/Target/PowerPC/PPCRegisterInfo.h
index a8fd796d9e97..7e6683eeb2ef 100644
--- a/lib/Target/PowerPC/PPCRegisterInfo.h
+++ b/lib/Target/PowerPC/PPCRegisterInfo.h
@@ -15,8 +15,8 @@
#ifndef POWERPC32_REGISTERINFO_H
#define POWERPC32_REGISTERINFO_H
+#include "llvm/ADT/DenseMap.h"
#include "PPC.h"
-#include <map>
#define GET_REGINFO_HEADER
#include "PPCGenRegisterInfo.inc"
@@ -27,10 +27,9 @@ class TargetInstrInfo;
class Type;
class PPCRegisterInfo : public PPCGenRegisterInfo {
- std::map<unsigned, unsigned> ImmToIdxMap;
+ DenseMap<unsigned, unsigned> ImmToIdxMap;
const PPCSubtarget &Subtarget;
const TargetInstrInfo &TII;
- mutable int CRSpillFrameIdx;
public:
PPCRegisterInfo(const PPCSubtarget &SubTarget, const TargetInstrInfo &tii);
@@ -45,31 +44,38 @@ public:
/// Code Generation virtual methods...
const uint16_t *getCalleeSavedRegs(const MachineFunction* MF = 0) const;
const uint32_t *getCallPreservedMask(CallingConv::ID CC) const;
+ const uint32_t *getNoPreservedMask() const;
BitVector getReservedRegs(const MachineFunction &MF) const;
- virtual bool avoidWriteAfterWrite(const TargetRegisterClass *RC) const;
+ /// We require the register scavenger.
+ bool requiresRegisterScavenging(const MachineFunction &MF) const {
+ return true;
+ }
+
+ bool requiresFrameIndexScavenging(const MachineFunction &MF) const {
+ return true;
+ }
+
+ bool trackLivenessAfterRegAlloc(const MachineFunction &MF) const {
+ return true;
+ }
+
+ void lowerDynamicAlloc(MachineBasicBlock::iterator II) const;
+ void lowerCRSpilling(MachineBasicBlock::iterator II,
+ unsigned FrameIndex) const;
+ void lowerCRRestore(MachineBasicBlock::iterator II,
+ unsigned FrameIndex) const;
+ void lowerVRSAVESpilling(MachineBasicBlock::iterator II,
+ unsigned FrameIndex) const;
+ void lowerVRSAVERestore(MachineBasicBlock::iterator II,
+ unsigned FrameIndex) const;
- /// requiresRegisterScavenging - We require a register scavenger.
- /// FIXME (64-bit): Should be inlined.
- bool requiresRegisterScavenging(const MachineFunction &MF) const;
-
- bool trackLivenessAfterRegAlloc(const MachineFunction &MF) const;
-
- void eliminateCallFramePseudoInstr(MachineFunction &MF,
- MachineBasicBlock &MBB,
- MachineBasicBlock::iterator I) const;
-
- void lowerDynamicAlloc(MachineBasicBlock::iterator II,
- int SPAdj, RegScavenger *RS) const;
- void lowerCRSpilling(MachineBasicBlock::iterator II, unsigned FrameIndex,
- int SPAdj, RegScavenger *RS) const;
- void lowerCRRestore(MachineBasicBlock::iterator II, unsigned FrameIndex,
- int SPAdj, RegScavenger *RS) const;
bool hasReservedSpillSlot(const MachineFunction &MF, unsigned Reg,
int &FrameIdx) const;
void eliminateFrameIndex(MachineBasicBlock::iterator II,
- int SPAdj, RegScavenger *RS = NULL) const;
+ int SPAdj, unsigned FIOperandNum,
+ RegScavenger *RS = NULL) const;
// Debug information queries.
unsigned getFrameRegister(const MachineFunction &MF) const;
diff --git a/lib/Target/PowerPC/PPCRegisterInfo.td b/lib/Target/PowerPC/PPCRegisterInfo.td
index 5ca387629b6c..57a25f5143fa 100644
--- a/lib/Target/PowerPC/PPCRegisterInfo.td
+++ b/lib/Target/PowerPC/PPCRegisterInfo.td
@@ -27,178 +27,72 @@ class PPCReg<string n> : Register<n> {
// GPR - One of the 32 32-bit general-purpose registers
class GPR<bits<5> num, string n> : PPCReg<n> {
- field bits<5> Num = num;
+ let HWEncoding{4-0} = num;
}
// GP8 - One of the 32 64-bit general-purpose registers
class GP8<GPR SubReg, string n> : PPCReg<n> {
- field bits<5> Num = SubReg.Num;
+ let HWEncoding = SubReg.HWEncoding;
let SubRegs = [SubReg];
let SubRegIndices = [sub_32];
}
// SPR - One of the 32-bit special-purpose registers
class SPR<bits<10> num, string n> : PPCReg<n> {
- field bits<10> Num = num;
+ let HWEncoding{9-0} = num;
}
// FPR - One of the 32 64-bit floating-point registers
class FPR<bits<5> num, string n> : PPCReg<n> {
- field bits<5> Num = num;
+ let HWEncoding{4-0} = num;
}
// VR - One of the 32 128-bit vector registers
class VR<bits<5> num, string n> : PPCReg<n> {
- field bits<5> Num = num;
+ let HWEncoding{4-0} = num;
}
// CR - One of the 8 4-bit condition registers
class CR<bits<3> num, string n, list<Register> subregs> : PPCReg<n> {
- field bits<3> Num = num;
+ let HWEncoding{2-0} = num;
let SubRegs = subregs;
}
// CRBIT - One of the 32 1-bit condition register fields
class CRBIT<bits<5> num, string n> : PPCReg<n> {
- field bits<5> Num = num;
+ let HWEncoding{4-0} = num;
}
-
// General-purpose registers
-def R0 : GPR< 0, "r0">, DwarfRegNum<[-2, 0]>;
-def R1 : GPR< 1, "r1">, DwarfRegNum<[-2, 1]>;
-def R2 : GPR< 2, "r2">, DwarfRegNum<[-2, 2]>;
-def R3 : GPR< 3, "r3">, DwarfRegNum<[-2, 3]>;
-def R4 : GPR< 4, "r4">, DwarfRegNum<[-2, 4]>;
-def R5 : GPR< 5, "r5">, DwarfRegNum<[-2, 5]>;
-def R6 : GPR< 6, "r6">, DwarfRegNum<[-2, 6]>;
-def R7 : GPR< 7, "r7">, DwarfRegNum<[-2, 7]>;
-def R8 : GPR< 8, "r8">, DwarfRegNum<[-2, 8]>;
-def R9 : GPR< 9, "r9">, DwarfRegNum<[-2, 9]>;
-def R10 : GPR<10, "r10">, DwarfRegNum<[-2, 10]>;
-def R11 : GPR<11, "r11">, DwarfRegNum<[-2, 11]>;
-def R12 : GPR<12, "r12">, DwarfRegNum<[-2, 12]>;
-def R13 : GPR<13, "r13">, DwarfRegNum<[-2, 13]>;
-def R14 : GPR<14, "r14">, DwarfRegNum<[-2, 14]>;
-def R15 : GPR<15, "r15">, DwarfRegNum<[-2, 15]>;
-def R16 : GPR<16, "r16">, DwarfRegNum<[-2, 16]>;
-def R17 : GPR<17, "r17">, DwarfRegNum<[-2, 17]>;
-def R18 : GPR<18, "r18">, DwarfRegNum<[-2, 18]>;
-def R19 : GPR<19, "r19">, DwarfRegNum<[-2, 19]>;
-def R20 : GPR<20, "r20">, DwarfRegNum<[-2, 20]>;
-def R21 : GPR<21, "r21">, DwarfRegNum<[-2, 21]>;
-def R22 : GPR<22, "r22">, DwarfRegNum<[-2, 22]>;
-def R23 : GPR<23, "r23">, DwarfRegNum<[-2, 23]>;
-def R24 : GPR<24, "r24">, DwarfRegNum<[-2, 24]>;
-def R25 : GPR<25, "r25">, DwarfRegNum<[-2, 25]>;
-def R26 : GPR<26, "r26">, DwarfRegNum<[-2, 26]>;
-def R27 : GPR<27, "r27">, DwarfRegNum<[-2, 27]>;
-def R28 : GPR<28, "r28">, DwarfRegNum<[-2, 28]>;
-def R29 : GPR<29, "r29">, DwarfRegNum<[-2, 29]>;
-def R30 : GPR<30, "r30">, DwarfRegNum<[-2, 30]>;
-def R31 : GPR<31, "r31">, DwarfRegNum<[-2, 31]>;
+foreach Index = 0-31 in {
+ def R#Index : GPR<Index, "r"#Index>, DwarfRegNum<[-2, Index]>;
+}
// 64-bit General-purpose registers
-def X0 : GP8< R0, "r0">, DwarfRegNum<[0, -2]>;
-def X1 : GP8< R1, "r1">, DwarfRegNum<[1, -2]>;
-def X2 : GP8< R2, "r2">, DwarfRegNum<[2, -2]>;
-def X3 : GP8< R3, "r3">, DwarfRegNum<[3, -2]>;
-def X4 : GP8< R4, "r4">, DwarfRegNum<[4, -2]>;
-def X5 : GP8< R5, "r5">, DwarfRegNum<[5, -2]>;
-def X6 : GP8< R6, "r6">, DwarfRegNum<[6, -2]>;
-def X7 : GP8< R7, "r7">, DwarfRegNum<[7, -2]>;
-def X8 : GP8< R8, "r8">, DwarfRegNum<[8, -2]>;
-def X9 : GP8< R9, "r9">, DwarfRegNum<[9, -2]>;
-def X10 : GP8<R10, "r10">, DwarfRegNum<[10, -2]>;
-def X11 : GP8<R11, "r11">, DwarfRegNum<[11, -2]>;
-def X12 : GP8<R12, "r12">, DwarfRegNum<[12, -2]>;
-def X13 : GP8<R13, "r13">, DwarfRegNum<[13, -2]>;
-def X14 : GP8<R14, "r14">, DwarfRegNum<[14, -2]>;
-def X15 : GP8<R15, "r15">, DwarfRegNum<[15, -2]>;
-def X16 : GP8<R16, "r16">, DwarfRegNum<[16, -2]>;
-def X17 : GP8<R17, "r17">, DwarfRegNum<[17, -2]>;
-def X18 : GP8<R18, "r18">, DwarfRegNum<[18, -2]>;
-def X19 : GP8<R19, "r19">, DwarfRegNum<[19, -2]>;
-def X20 : GP8<R20, "r20">, DwarfRegNum<[20, -2]>;
-def X21 : GP8<R21, "r21">, DwarfRegNum<[21, -2]>;
-def X22 : GP8<R22, "r22">, DwarfRegNum<[22, -2]>;
-def X23 : GP8<R23, "r23">, DwarfRegNum<[23, -2]>;
-def X24 : GP8<R24, "r24">, DwarfRegNum<[24, -2]>;
-def X25 : GP8<R25, "r25">, DwarfRegNum<[25, -2]>;
-def X26 : GP8<R26, "r26">, DwarfRegNum<[26, -2]>;
-def X27 : GP8<R27, "r27">, DwarfRegNum<[27, -2]>;
-def X28 : GP8<R28, "r28">, DwarfRegNum<[28, -2]>;
-def X29 : GP8<R29, "r29">, DwarfRegNum<[29, -2]>;
-def X30 : GP8<R30, "r30">, DwarfRegNum<[30, -2]>;
-def X31 : GP8<R31, "r31">, DwarfRegNum<[31, -2]>;
+foreach Index = 0-31 in {
+ def X#Index : GP8<!cast<GPR>("R"#Index), "r"#Index>,
+ DwarfRegNum<[Index, -2]>;
+}
// Floating-point registers
-def F0 : FPR< 0, "f0">, DwarfRegNum<[32, 32]>;
-def F1 : FPR< 1, "f1">, DwarfRegNum<[33, 33]>;
-def F2 : FPR< 2, "f2">, DwarfRegNum<[34, 34]>;
-def F3 : FPR< 3, "f3">, DwarfRegNum<[35, 35]>;
-def F4 : FPR< 4, "f4">, DwarfRegNum<[36, 36]>;
-def F5 : FPR< 5, "f5">, DwarfRegNum<[37, 37]>;
-def F6 : FPR< 6, "f6">, DwarfRegNum<[38, 38]>;
-def F7 : FPR< 7, "f7">, DwarfRegNum<[39, 39]>;
-def F8 : FPR< 8, "f8">, DwarfRegNum<[40, 40]>;
-def F9 : FPR< 9, "f9">, DwarfRegNum<[41, 41]>;
-def F10 : FPR<10, "f10">, DwarfRegNum<[42, 42]>;
-def F11 : FPR<11, "f11">, DwarfRegNum<[43, 43]>;
-def F12 : FPR<12, "f12">, DwarfRegNum<[44, 44]>;
-def F13 : FPR<13, "f13">, DwarfRegNum<[45, 45]>;
-def F14 : FPR<14, "f14">, DwarfRegNum<[46, 46]>;
-def F15 : FPR<15, "f15">, DwarfRegNum<[47, 47]>;
-def F16 : FPR<16, "f16">, DwarfRegNum<[48, 48]>;
-def F17 : FPR<17, "f17">, DwarfRegNum<[49, 49]>;
-def F18 : FPR<18, "f18">, DwarfRegNum<[50, 50]>;
-def F19 : FPR<19, "f19">, DwarfRegNum<[51, 51]>;
-def F20 : FPR<20, "f20">, DwarfRegNum<[52, 52]>;
-def F21 : FPR<21, "f21">, DwarfRegNum<[53, 53]>;
-def F22 : FPR<22, "f22">, DwarfRegNum<[54, 54]>;
-def F23 : FPR<23, "f23">, DwarfRegNum<[55, 55]>;
-def F24 : FPR<24, "f24">, DwarfRegNum<[56, 56]>;
-def F25 : FPR<25, "f25">, DwarfRegNum<[57, 57]>;
-def F26 : FPR<26, "f26">, DwarfRegNum<[58, 58]>;
-def F27 : FPR<27, "f27">, DwarfRegNum<[59, 59]>;
-def F28 : FPR<28, "f28">, DwarfRegNum<[60, 60]>;
-def F29 : FPR<29, "f29">, DwarfRegNum<[61, 61]>;
-def F30 : FPR<30, "f30">, DwarfRegNum<[62, 62]>;
-def F31 : FPR<31, "f31">, DwarfRegNum<[63, 63]>;
+foreach Index = 0-31 in {
+ def F#Index : FPR<Index, "f"#Index>,
+ DwarfRegNum<[!add(Index, 32), !add(Index, 32)]>;
+}
// Vector registers
-def V0 : VR< 0, "v0">, DwarfRegNum<[77, 77]>;
-def V1 : VR< 1, "v1">, DwarfRegNum<[78, 78]>;
-def V2 : VR< 2, "v2">, DwarfRegNum<[79, 79]>;
-def V3 : VR< 3, "v3">, DwarfRegNum<[80, 80]>;
-def V4 : VR< 4, "v4">, DwarfRegNum<[81, 81]>;
-def V5 : VR< 5, "v5">, DwarfRegNum<[82, 82]>;
-def V6 : VR< 6, "v6">, DwarfRegNum<[83, 83]>;
-def V7 : VR< 7, "v7">, DwarfRegNum<[84, 84]>;
-def V8 : VR< 8, "v8">, DwarfRegNum<[85, 85]>;
-def V9 : VR< 9, "v9">, DwarfRegNum<[86, 86]>;
-def V10 : VR<10, "v10">, DwarfRegNum<[87, 87]>;
-def V11 : VR<11, "v11">, DwarfRegNum<[88, 88]>;
-def V12 : VR<12, "v12">, DwarfRegNum<[89, 89]>;
-def V13 : VR<13, "v13">, DwarfRegNum<[90, 90]>;
-def V14 : VR<14, "v14">, DwarfRegNum<[91, 91]>;
-def V15 : VR<15, "v15">, DwarfRegNum<[92, 92]>;
-def V16 : VR<16, "v16">, DwarfRegNum<[93, 93]>;
-def V17 : VR<17, "v17">, DwarfRegNum<[94, 94]>;
-def V18 : VR<18, "v18">, DwarfRegNum<[95, 95]>;
-def V19 : VR<19, "v19">, DwarfRegNum<[96, 96]>;
-def V20 : VR<20, "v20">, DwarfRegNum<[97, 97]>;
-def V21 : VR<21, "v21">, DwarfRegNum<[98, 98]>;
-def V22 : VR<22, "v22">, DwarfRegNum<[99, 99]>;
-def V23 : VR<23, "v23">, DwarfRegNum<[100, 100]>;
-def V24 : VR<24, "v24">, DwarfRegNum<[101, 101]>;
-def V25 : VR<25, "v25">, DwarfRegNum<[102, 102]>;
-def V26 : VR<26, "v26">, DwarfRegNum<[103, 103]>;
-def V27 : VR<27, "v27">, DwarfRegNum<[104, 104]>;
-def V28 : VR<28, "v28">, DwarfRegNum<[105, 105]>;
-def V29 : VR<29, "v29">, DwarfRegNum<[106, 106]>;
-def V30 : VR<30, "v30">, DwarfRegNum<[107, 107]>;
-def V31 : VR<31, "v31">, DwarfRegNum<[108, 108]>;
+foreach Index = 0-31 in {
+ def V#Index : VR<Index, "v"#Index>,
+ DwarfRegNum<[!add(Index, 77), !add(Index, 77)]>;
+}
+
+// The reprsentation of r0 when treated as the constant 0.
+def ZERO : GPR<0, "0">;
+def ZERO8 : GP8<ZERO, "0">;
+
+// Representations of the frame pointer used by ISD::FRAMEADDR.
+def FP : GPR<0 /* arbitrary */, "**FRAME POINTER**">;
+def FP8 : GP8<FP, "**FRAME POINTER**">;
// Condition register bits
def CR0LT : CRBIT< 0, "0">;
@@ -278,11 +172,17 @@ def RM: SPR<512, "**ROUNDING MODE**">;
// then nonvolatiles in reverse order since stmw/lmw save from rN to r31
def GPRC : RegisterClass<"PPC", [i32], 32, (add (sequence "R%u", 2, 12),
(sequence "R%u", 30, 13),
- R31, R0, R1, LR)>;
+ R31, R0, R1, FP)>;
def G8RC : RegisterClass<"PPC", [i64], 64, (add (sequence "X%u", 2, 12),
(sequence "X%u", 30, 14),
- X31, X13, X0, X1, LR8)>;
+ X31, X13, X0, X1, FP8)>;
+
+// For some instructions r0 is special (representing the value 0 instead of
+// the value in the r0 register), and we use these register subclasses to
+// prevent r0 from being allocated for use by those instructions.
+def GPRC_NOR0 : RegisterClass<"PPC", [i32], 32, (add (sub GPRC, R0), ZERO)>;
+def G8RC_NOX0 : RegisterClass<"PPC", [i64], 64, (add (sub G8RC, X0), ZERO8)>;
// Allocate volatiles first, then non-volatiles in reverse order. With the SVR4
// ABI the size of the Floating-point register save area is determined by the
diff --git a/lib/Target/PowerPC/PPCScheduleA2.td b/lib/Target/PowerPC/PPCScheduleA2.td
index ba63b5cd8faf..ae084aa0e8c1 100644
--- a/lib/Target/PowerPC/PPCScheduleA2.td
+++ b/lib/Target/PowerPC/PPCScheduleA2.td
@@ -749,3 +749,18 @@ def PPCA2Itineraries : ProcessorItineraries<
[15, 7],
[FPR_Bypass, FPR_Bypass]>
]>;
+
+// ===---------------------------------------------------------------------===//
+// A2 machine model for scheduling and other instruction cost heuristics.
+
+def PPCA2Model : SchedMachineModel {
+ let IssueWidth = 1; // 2 micro-ops are dispatched per cycle.
+ let MinLatency = -1; // OperandCycles are interpreted as MinLatency.
+ let LoadLatency = 6; // Optimistic load latency assuming bypass.
+ // This is overriden by OperandCycles if the
+ // Itineraries are queried instead.
+ let MispredictPenalty = 6;
+
+ let Itineraries = PPCA2Itineraries;
+}
+
diff --git a/lib/Target/PowerPC/PPCScheduleG5.td b/lib/Target/PowerPC/PPCScheduleG5.td
index 7c02ea099c14..c64998d52a0c 100644
--- a/lib/Target/PowerPC/PPCScheduleG5.td
+++ b/lib/Target/PowerPC/PPCScheduleG5.td
@@ -92,3 +92,18 @@ def G5Itineraries : ProcessorItineraries<
InstrItinData<VecVSL , [InstrStage<2, [VIU1]>]>,
InstrItinData<VecVSR , [InstrStage<3, [VPU]>]>
]>;
+
+// ===---------------------------------------------------------------------===//
+// e5500 machine model for scheduling and other instruction cost heuristics.
+
+def G5Model : SchedMachineModel {
+ let IssueWidth = 4; // 4 (non-branch) instructions are dispatched per cycle.
+ let MinLatency = 0; // Out-of-order dispatch.
+ let LoadLatency = 3; // Optimistic load latency assuming bypass.
+ // This is overriden by OperandCycles if the
+ // Itineraries are queried instead.
+ let MispredictPenalty = 16;
+
+ let Itineraries = G5Itineraries;
+}
+
diff --git a/lib/Target/PowerPC/PPCSubtarget.cpp b/lib/Target/PowerPC/PPCSubtarget.cpp
index 9c8cb92cc7ea..a8f2b3f47d1b 100644
--- a/lib/Target/PowerPC/PPCSubtarget.cpp
+++ b/lib/Target/PowerPC/PPCSubtarget.cpp
@@ -12,12 +12,12 @@
//===----------------------------------------------------------------------===//
#include "PPCSubtarget.h"
-#include "PPCRegisterInfo.h"
#include "PPC.h"
-#include "llvm/GlobalValue.h"
-#include "llvm/Target/TargetMachine.h"
+#include "PPCRegisterInfo.h"
+#include "llvm/IR/GlobalValue.h"
#include "llvm/Support/Host.h"
#include "llvm/Support/TargetRegistry.h"
+#include "llvm/Target/TargetMachine.h"
#include <cstdlib>
#define GET_SUBTARGETINFO_TARGET_DESC
@@ -36,9 +36,20 @@ PPCSubtarget::PPCSubtarget(const std::string &TT, const std::string &CPU,
, Use64BitRegs(false)
, IsPPC64(is64Bit)
, HasAltivec(false)
+ , HasQPX(false)
, HasFSQRT(false)
+ , HasFRE(false)
+ , HasFRES(false)
+ , HasFRSQRTE(false)
+ , HasFRSQRTES(false)
+ , HasRecipPrec(false)
, HasSTFIWX(false)
+ , HasLFIWAX(false)
+ , HasFPRND(false)
+ , HasFPCVT(false)
, HasISEL(false)
+ , HasPOPCNTD(false)
+ , HasLDBRX(false)
, IsBookE(false)
, HasLazyResolverStubs(false)
, IsJITCodeModel(false)
@@ -82,6 +93,12 @@ PPCSubtarget::PPCSubtarget(const std::string &TT, const std::string &CPU,
// Set up darwin-specific properties.
if (isDarwin())
HasLazyResolverStubs = true;
+
+ // QPX requires a 32-byte aligned stack. Note that we need to do this if
+ // we're compiling for a BG/Q system regardless of whether or not QPX
+ // is enabled because external functions will assume this alignment.
+ if (hasQPX() || isBGQ())
+ StackAlignment = 32;
}
/// SetJITMode - This is called to inform the subtarget info that we are
diff --git a/lib/Target/PowerPC/PPCSubtarget.h b/lib/Target/PowerPC/PPCSubtarget.h
index b9e22f43c39e..65b4d211fc6a 100644
--- a/lib/Target/PowerPC/PPCSubtarget.h
+++ b/lib/Target/PowerPC/PPCSubtarget.h
@@ -14,9 +14,9 @@
#ifndef POWERPCSUBTARGET_H
#define POWERPCSUBTARGET_H
-#include "llvm/Target/TargetSubtargetInfo.h"
-#include "llvm/MC/MCInstrItineraries.h"
#include "llvm/ADT/Triple.h"
+#include "llvm/MC/MCInstrItineraries.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
#include <string>
#define GET_SUBTARGETINFO_HEADER
@@ -43,7 +43,12 @@ namespace PPC {
DIR_A2,
DIR_E500mc,
DIR_E5500,
+ DIR_PWR3,
+ DIR_PWR4,
+ DIR_PWR5,
+ DIR_PWR5X,
DIR_PWR6,
+ DIR_PWR6X,
DIR_PWR7,
DIR_64
};
@@ -70,9 +75,17 @@ protected:
bool Use64BitRegs;
bool IsPPC64;
bool HasAltivec;
+ bool HasQPX;
bool HasFSQRT;
+ bool HasFRE, HasFRES, HasFRSQRTE, HasFRSQRTES;
+ bool HasRecipPrec;
bool HasSTFIWX;
+ bool HasLFIWAX;
+ bool HasFPRND;
+ bool HasFPCVT;
bool HasISEL;
+ bool HasPOPCNTD;
+ bool HasLDBRX;
bool IsBookE;
bool HasLazyResolverStubs;
bool IsJITCodeModel;
@@ -148,10 +161,21 @@ public:
// Specific obvious features.
bool hasFSQRT() const { return HasFSQRT; }
+ bool hasFRE() const { return HasFRE; }
+ bool hasFRES() const { return HasFRES; }
+ bool hasFRSQRTE() const { return HasFRSQRTE; }
+ bool hasFRSQRTES() const { return HasFRSQRTES; }
+ bool hasRecipPrec() const { return HasRecipPrec; }
bool hasSTFIWX() const { return HasSTFIWX; }
+ bool hasLFIWAX() const { return HasLFIWAX; }
+ bool hasFPRND() const { return HasFPRND; }
+ bool hasFPCVT() const { return HasFPCVT; }
bool hasAltivec() const { return HasAltivec; }
+ bool hasQPX() const { return HasQPX; }
bool hasMFOCRF() const { return HasMFOCRF; }
bool hasISEL() const { return HasISEL; }
+ bool hasPOPCNTD() const { return HasPOPCNTD; }
+ bool hasLDBRX() const { return HasLDBRX; }
bool isBookE() const { return IsBookE; }
const Triple &getTargetTriple() const { return TargetTriple; }
@@ -160,6 +184,8 @@ public:
bool isDarwin() const { return TargetTriple.isMacOSX(); }
/// isBGP - True if this is a BG/P platform.
bool isBGP() const { return TargetTriple.getVendor() == Triple::BGP; }
+ /// isBGQ - True if this is a BG/Q platform.
+ bool isBGQ() const { return TargetTriple.getVendor() == Triple::BGQ; }
bool isDarwinABI() const { return isDarwin(); }
bool isSVR4ABI() const { return !isDarwin(); }
diff --git a/lib/Target/PowerPC/PPCTargetMachine.cpp b/lib/Target/PowerPC/PPCTargetMachine.cpp
index 3fc977ee2b41..fe851c1b6fb8 100644
--- a/lib/Target/PowerPC/PPCTargetMachine.cpp
+++ b/lib/Target/PowerPC/PPCTargetMachine.cpp
@@ -13,13 +13,13 @@
#include "PPCTargetMachine.h"
#include "PPC.h"
-#include "llvm/PassManager.h"
-#include "llvm/MC/MCStreamer.h"
#include "llvm/CodeGen/Passes.h"
-#include "llvm/Target/TargetOptions.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/PassManager.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/FormattedStream.h"
#include "llvm/Support/TargetRegistry.h"
+#include "llvm/Target/TargetOptions.h"
using namespace llvm;
static cl::
@@ -43,8 +43,7 @@ PPCTargetMachine::PPCTargetMachine(const Target &T, StringRef TT,
DL(Subtarget.getDataLayoutString()), InstrInfo(*this),
FrameLowering(Subtarget), JITInfo(*this, is64Bit),
TLInfo(*this), TSInfo(*this),
- InstrItins(Subtarget.getInstrItineraryData()),
- STTI(&TLInfo), VTTI(&TLInfo) {
+ InstrItins(Subtarget.getInstrItineraryData()) {
// The binutils for the BG/P are too old for CFI.
if (Subtarget.isBGP())
@@ -127,3 +126,12 @@ bool PPCTargetMachine::addCodeEmitter(PassManagerBase &PM,
return false;
}
+
+void PPCTargetMachine::addAnalysisPasses(PassManagerBase &PM) {
+ // Add first the target-independent BasicTTI pass, then our PPC pass. This
+ // allows the PPC pass to delegate to the target independent layer when
+ // appropriate.
+ PM.add(createBasicTargetTransformInfoPass(getTargetLowering()));
+ PM.add(createPPCTargetTransformInfoPass(this));
+}
+
diff --git a/lib/Target/PowerPC/PPCTargetMachine.h b/lib/Target/PowerPC/PPCTargetMachine.h
index c168433a71b3..606ccb314126 100644
--- a/lib/Target/PowerPC/PPCTargetMachine.h
+++ b/lib/Target/PowerPC/PPCTargetMachine.h
@@ -15,14 +15,13 @@
#define PPC_TARGETMACHINE_H
#include "PPCFrameLowering.h"
-#include "PPCSubtarget.h"
-#include "PPCJITInfo.h"
-#include "PPCInstrInfo.h"
#include "PPCISelLowering.h"
+#include "PPCInstrInfo.h"
+#include "PPCJITInfo.h"
#include "PPCSelectionDAGInfo.h"
+#include "PPCSubtarget.h"
+#include "llvm/IR/DataLayout.h"
#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetTransformImpl.h"
-#include "llvm/DataLayout.h"
namespace llvm {
@@ -37,8 +36,6 @@ class PPCTargetMachine : public LLVMTargetMachine {
PPCTargetLowering TLInfo;
PPCSelectionDAGInfo TSInfo;
InstrItineraryData InstrItins;
- ScalarTargetTransformImpl STTI;
- VectorTargetTransformImpl VTTI;
public:
PPCTargetMachine(const Target &T, StringRef TT,
@@ -66,17 +63,14 @@ public:
virtual const InstrItineraryData *getInstrItineraryData() const {
return &InstrItins;
}
- virtual const ScalarTargetTransformInfo *getScalarTargetTransformInfo()const {
- return &STTI;
- }
- virtual const VectorTargetTransformInfo *getVectorTargetTransformInfo()const {
- return &VTTI;
- }
// Pass Pipeline Configuration
virtual TargetPassConfig *createPassConfig(PassManagerBase &PM);
virtual bool addCodeEmitter(PassManagerBase &PM,
JITCodeEmitter &JCE);
+
+ /// \brief Register PPC analysis passes with a pass manager.
+ virtual void addAnalysisPasses(PassManagerBase &PM);
};
/// PPC32TargetMachine - PowerPC 32-bit target machine.
diff --git a/lib/Target/PowerPC/PPCTargetTransformInfo.cpp b/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
new file mode 100644
index 000000000000..2504ba70c25a
--- /dev/null
+++ b/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
@@ -0,0 +1,240 @@
+//===-- PPCTargetTransformInfo.cpp - PPC specific TTI pass ----------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \file
+/// This file implements a TargetTransformInfo analysis pass specific to the
+/// PPC target machine. It uses the target's detailed information to provide
+/// more precise answers to certain TTI queries, while letting the target
+/// independent and default TTI implementations handle the rest.
+///
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "ppctti"
+#include "PPC.h"
+#include "PPCTargetMachine.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/CostTable.h"
+using namespace llvm;
+
+// Declare the pass initialization routine locally as target-specific passes
+// don't havve a target-wide initialization entry point, and so we rely on the
+// pass constructor initialization.
+namespace llvm {
+void initializePPCTTIPass(PassRegistry &);
+}
+
+namespace {
+
+class PPCTTI : public ImmutablePass, public TargetTransformInfo {
+ const PPCTargetMachine *TM;
+ const PPCSubtarget *ST;
+ const PPCTargetLowering *TLI;
+
+ /// Estimate the overhead of scalarizing an instruction. Insert and Extract
+ /// are set if the result needs to be inserted and/or extracted from vectors.
+ unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) const;
+
+public:
+ PPCTTI() : ImmutablePass(ID), TM(0), ST(0), TLI(0) {
+ llvm_unreachable("This pass cannot be directly constructed");
+ }
+
+ PPCTTI(const PPCTargetMachine *TM)
+ : ImmutablePass(ID), TM(TM), ST(TM->getSubtargetImpl()),
+ TLI(TM->getTargetLowering()) {
+ initializePPCTTIPass(*PassRegistry::getPassRegistry());
+ }
+
+ virtual void initializePass() {
+ pushTTIStack(this);
+ }
+
+ virtual void finalizePass() {
+ popTTIStack();
+ }
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ TargetTransformInfo::getAnalysisUsage(AU);
+ }
+
+ /// Pass identification.
+ static char ID;
+
+ /// Provide necessary pointer adjustments for the two base classes.
+ virtual void *getAdjustedAnalysisPointer(const void *ID) {
+ if (ID == &TargetTransformInfo::ID)
+ return (TargetTransformInfo*)this;
+ return this;
+ }
+
+ /// \name Scalar TTI Implementations
+ /// @{
+ virtual PopcntSupportKind getPopcntSupport(unsigned TyWidth) const;
+
+ /// @}
+
+ /// \name Vector TTI Implementations
+ /// @{
+
+ virtual unsigned getNumberOfRegisters(bool Vector) const;
+ virtual unsigned getRegisterBitWidth(bool Vector) const;
+ virtual unsigned getMaximumUnrollFactor() const;
+ virtual unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty,
+ OperandValueKind,
+ OperandValueKind) const;
+ virtual unsigned getShuffleCost(ShuffleKind Kind, Type *Tp,
+ int Index, Type *SubTp) const;
+ virtual unsigned getCastInstrCost(unsigned Opcode, Type *Dst,
+ Type *Src) const;
+ virtual unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
+ Type *CondTy) const;
+ virtual unsigned getVectorInstrCost(unsigned Opcode, Type *Val,
+ unsigned Index) const;
+ virtual unsigned getMemoryOpCost(unsigned Opcode, Type *Src,
+ unsigned Alignment,
+ unsigned AddressSpace) const;
+
+ /// @}
+};
+
+} // end anonymous namespace
+
+INITIALIZE_AG_PASS(PPCTTI, TargetTransformInfo, "ppctti",
+ "PPC Target Transform Info", true, true, false)
+char PPCTTI::ID = 0;
+
+ImmutablePass *
+llvm::createPPCTargetTransformInfoPass(const PPCTargetMachine *TM) {
+ return new PPCTTI(TM);
+}
+
+
+//===----------------------------------------------------------------------===//
+//
+// PPC cost model.
+//
+//===----------------------------------------------------------------------===//
+
+PPCTTI::PopcntSupportKind PPCTTI::getPopcntSupport(unsigned TyWidth) const {
+ assert(isPowerOf2_32(TyWidth) && "Ty width must be power of 2");
+ if (ST->hasPOPCNTD() && TyWidth <= 64)
+ return PSK_FastHardware;
+ return PSK_Software;
+}
+
+unsigned PPCTTI::getNumberOfRegisters(bool Vector) const {
+ if (Vector && !ST->hasAltivec())
+ return 0;
+ return 32;
+}
+
+unsigned PPCTTI::getRegisterBitWidth(bool Vector) const {
+ if (Vector) {
+ if (ST->hasAltivec()) return 128;
+ return 0;
+ }
+
+ if (ST->isPPC64())
+ return 64;
+ return 32;
+
+}
+
+unsigned PPCTTI::getMaximumUnrollFactor() const {
+ unsigned Directive = ST->getDarwinDirective();
+ // The 440 has no SIMD support, but floating-point instructions
+ // have a 5-cycle latency, so unroll by 5x for latency hiding.
+ if (Directive == PPC::DIR_440)
+ return 5;
+
+ // The A2 has no SIMD support, but floating-point instructions
+ // have a 6-cycle latency, so unroll by 6x for latency hiding.
+ if (Directive == PPC::DIR_A2)
+ return 6;
+
+ // FIXME: For lack of any better information, do no harm...
+ if (Directive == PPC::DIR_E500mc || Directive == PPC::DIR_E5500)
+ return 1;
+
+ // For most things, modern systems have two execution units (and
+ // out-of-order execution).
+ return 2;
+}
+
+unsigned PPCTTI::getArithmeticInstrCost(unsigned Opcode, Type *Ty,
+ OperandValueKind Op1Info,
+ OperandValueKind Op2Info) const {
+ assert(TLI->InstructionOpcodeToISD(Opcode) && "Invalid opcode");
+
+ // Fallback to the default implementation.
+ return TargetTransformInfo::getArithmeticInstrCost(Opcode, Ty, Op1Info,
+ Op2Info);
+}
+
+unsigned PPCTTI::getShuffleCost(ShuffleKind Kind, Type *Tp, int Index,
+ Type *SubTp) const {
+ return TargetTransformInfo::getShuffleCost(Kind, Tp, Index, SubTp);
+}
+
+unsigned PPCTTI::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) const {
+ assert(TLI->InstructionOpcodeToISD(Opcode) && "Invalid opcode");
+
+ return TargetTransformInfo::getCastInstrCost(Opcode, Dst, Src);
+}
+
+unsigned PPCTTI::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
+ Type *CondTy) const {
+ return TargetTransformInfo::getCmpSelInstrCost(Opcode, ValTy, CondTy);
+}
+
+unsigned PPCTTI::getVectorInstrCost(unsigned Opcode, Type *Val,
+ unsigned Index) const {
+ assert(Val->isVectorTy() && "This must be a vector type");
+
+ int ISD = TLI->InstructionOpcodeToISD(Opcode);
+ assert(ISD && "Invalid opcode");
+
+ // Estimated cost of a load-hit-store delay. This was obtained
+ // experimentally as a minimum needed to prevent unprofitable
+ // vectorization for the paq8p benchmark. It may need to be
+ // raised further if other unprofitable cases remain.
+ unsigned LHSPenalty = 12;
+
+ // Vector element insert/extract with Altivec is very expensive,
+ // because they require store and reload with the attendant
+ // processor stall for load-hit-store. Until VSX is available,
+ // these need to be estimated as very costly.
+ if (ISD == ISD::EXTRACT_VECTOR_ELT ||
+ ISD == ISD::INSERT_VECTOR_ELT)
+ return LHSPenalty +
+ TargetTransformInfo::getVectorInstrCost(Opcode, Val, Index);
+
+ return TargetTransformInfo::getVectorInstrCost(Opcode, Val, Index);
+}
+
+unsigned PPCTTI::getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
+ unsigned AddressSpace) const {
+ // Legalize the type.
+ std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Src);
+ assert((Opcode == Instruction::Load || Opcode == Instruction::Store) &&
+ "Invalid Opcode");
+
+ // Each load/store unit costs 1.
+ unsigned Cost = LT.first * 1;
+
+ // PPC in general does not support unaligned loads and stores. They'll need
+ // to be decomposed based on the alignment factor.
+ unsigned SrcBytes = LT.second.getStoreSize();
+ if (SrcBytes && Alignment && Alignment < SrcBytes)
+ Cost *= (SrcBytes/Alignment);
+
+ return Cost;
+}
+
diff --git a/lib/Target/PowerPC/README.txt b/lib/Target/PowerPC/README.txt
index b6763aa73802..cc2ff966332e 100644
--- a/lib/Target/PowerPC/README.txt
+++ b/lib/Target/PowerPC/README.txt
@@ -1,7 +1,6 @@
//===- README.txt - Notes for improving PowerPC-specific code gen ---------===//
TODO:
-* gpr0 allocation
* lmw/stmw pass a la arm load store optimizer for prolog/epilog
===-------------------------------------------------------------------------===
@@ -204,12 +203,6 @@ http://gcc.gnu.org/ml/gcc-patches/2006-02/msg00133.html
===-------------------------------------------------------------------------===
-Implement Newton-Rhapson method for improving estimate instructions to the
-correct accuracy, and implementing divide as multiply by reciprocal when it has
-more than one use. Itanium would want this too.
-
-===-------------------------------------------------------------------------===
-
Compile offsets from allocas:
int *%test() {
@@ -536,20 +529,6 @@ void func(unsigned int *ret, float dx, float dy, float dz, float dw) {
===-------------------------------------------------------------------------===
-Complete the signed i32 to FP conversion code using 64-bit registers
-transformation, good for PI. See PPCISelLowering.cpp, this comment:
-
- // FIXME: disable this lowered code. This generates 64-bit register values,
- // and we don't model the fact that the top part is clobbered by calls. We
- // need to flag these together so that the value isn't live across a call.
- //setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
-
-Also, if the registers are spilled to the stack, we have to ensure that all
-64-bits of them are save/restored, otherwise we will miscompile the code. It
-sounds like we need to get the 64-bit register classes going.
-
-===-------------------------------------------------------------------------===
-
%struct.B = type { i8, [3 x i8] }
define void @bar(%struct.B* %b) {
diff --git a/lib/Target/PowerPC/TargetInfo/PowerPCTargetInfo.cpp b/lib/Target/PowerPC/TargetInfo/PowerPCTargetInfo.cpp
index 5dc8568d83f2..fa44331b8af6 100644
--- a/lib/Target/PowerPC/TargetInfo/PowerPCTargetInfo.cpp
+++ b/lib/Target/PowerPC/TargetInfo/PowerPCTargetInfo.cpp
@@ -8,7 +8,7 @@
//===----------------------------------------------------------------------===//
#include "PPC.h"
-#include "llvm/Module.h"
+#include "llvm/IR/Module.h"
#include "llvm/Support/TargetRegistry.h"
using namespace llvm;