aboutsummaryrefslogtreecommitdiff
path: root/contrib/llvm/lib/Target/PowerPC
diff options
context:
space:
mode:
authorDimitry Andric <dim@FreeBSD.org>2019-08-21 18:13:02 +0000
committerDimitry Andric <dim@FreeBSD.org>2019-08-21 18:13:02 +0000
commit54db30ce18663e6c2991958f3b5d18362e8e93c4 (patch)
tree4aa6442802570767398cc83ba484e97b1309bdc2 /contrib/llvm/lib/Target/PowerPC
parent35284c22e9c8348159b7ce032ea45f2cdeb65298 (diff)
parente6d1592492a3a379186bfb02bd0f4eda0669c0d5 (diff)
downloadsrc-54db30ce18663e6c2991958f3b5d18362e8e93c4.tar.gz
src-54db30ce18663e6c2991958f3b5d18362e8e93c4.zip
Merge llvm trunk r366426, resolve conflicts, and update FREEBSD-Xlist.
Notes
Notes: svn path=/projects/clang900-import/; revision=351344
Diffstat (limited to 'contrib/llvm/lib/Target/PowerPC')
-rw-r--r--contrib/llvm/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp15
-rw-r--r--contrib/llvm/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp14
-rw-r--r--contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp117
-rw-r--r--contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp10
-rw-r--r--contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCFixupKinds.h7
-rw-r--r--contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCInstPrinter.cpp (renamed from contrib/llvm/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp)26
-rw-r--r--contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCInstPrinter.h (renamed from contrib/llvm/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h)11
-rw-r--r--contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp13
-rw-r--r--contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.h17
-rw-r--r--contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp9
-rw-r--r--contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.h14
-rw-r--r--contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.cpp7
-rw-r--r--contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.h7
-rw-r--r--contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp37
-rw-r--r--contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h14
-rw-r--r--contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMachObjectWriter.cpp7
-rw-r--r--contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.cpp7
-rw-r--r--contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.h7
-rw-r--r--contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCXCOFFObjectWriter.cpp29
-rw-r--r--contrib/llvm/lib/Target/PowerPC/P9InstrResources.td371
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPC.h22
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPC.td38
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp223
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCBoolRetToInt.cpp7
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCBranchCoalescing.cpp11
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCBranchSelector.cpp262
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCCCState.cpp7
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCCCState.h7
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCCTRLoops.cpp585
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCCallingConv.cpp162
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCCallingConv.h36
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCCallingConv.td50
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCEarlyReturn.cpp19
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCExpandISEL.cpp7
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCFastISel.cpp108
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp211
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCFrameLowering.h31
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCHazardRecognizers.cpp10
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCHazardRecognizers.h7
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp94
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCISelLowering.cpp1087
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCISelLowering.h117
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCInstr64Bit.td66
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCInstrAltivec.td37
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCInstrBuilder.h7
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCInstrFormats.td21
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCInstrHTM.td49
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp388
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.h100
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.td82
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCInstrQPX.td7
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCInstrSPE.td19
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCInstrVSX.td531
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCLoopPreIncPrep.cpp15
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCMCInstLower.cpp17
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCMIPeephole.cpp186
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCMachineFunctionInfo.cpp7
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCMachineFunctionInfo.h16
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCMachineScheduler.cpp83
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCMachineScheduler.h49
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCPerfectShuffle.h7
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCPfmCounters.td7
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCPreEmitPeephole.cpp7
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCQPXLoadSplat.cpp11
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCReduceCRLogicals.cpp52
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp217
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.h18
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.td9
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCSchedule.td8
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCSchedule440.td7
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCScheduleA2.td7
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCScheduleE500.td7
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCScheduleE500mc.td7
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCScheduleE5500.td7
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCScheduleG3.td7
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCScheduleG4.td7
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCScheduleG4Plus.td7
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCScheduleG5.td7
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCScheduleP7.td7
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCScheduleP8.td7
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCScheduleP9.td77
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCSubtarget.cpp30
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCSubtarget.h28
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCTLSDynamicCall.cpp11
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCTOCRegDeps.cpp11
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp74
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCTargetMachine.h11
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCTargetObjectFile.cpp7
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCTargetObjectFile.h7
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCTargetStreamer.h7
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp449
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h21
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCVSXCopy.cpp11
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCVSXFMAMutate.cpp7
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCVSXSwapRemoval.cpp12
-rw-r--r--contrib/llvm/lib/Target/PowerPC/README_P9.txt8
-rw-r--r--contrib/llvm/lib/Target/PowerPC/TargetInfo/PowerPCTargetInfo.cpp10
-rw-r--r--contrib/llvm/lib/Target/PowerPC/TargetInfo/PowerPCTargetInfo.h22
98 files changed, 4402 insertions, 2334 deletions
diff --git a/contrib/llvm/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp b/contrib/llvm/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp
index 8b3480f772e9..c9524da93acd 100644
--- a/contrib/llvm/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp
@@ -1,15 +1,15 @@
//===-- PPCAsmParser.cpp - Parse PowerPC asm to MCInst instructions -------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#include "MCTargetDesc/PPCMCExpr.h"
#include "MCTargetDesc/PPCMCTargetDesc.h"
#include "PPCTargetStreamer.h"
+#include "TargetInfo/PowerPCTargetInfo.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/StringSwitch.h"
#include "llvm/ADT/Twine.h"
@@ -147,8 +147,7 @@ public:
: MCTargetAsmParser(Options, STI, MII) {
// Check for 64-bit vs. 32-bit pointer mode.
const Triple &TheTriple = STI.getTargetTriple();
- IsPPC64 = (TheTriple.getArch() == Triple::ppc64 ||
- TheTriple.getArch() == Triple::ppc64le);
+ IsPPC64 = TheTriple.isPPC64();
IsDarwin = TheTriple.isMacOSX();
// Initialize the set of available features.
setAvailableFeatures(ComputeAvailableFeatures(STI.getFeatureBits()));
@@ -1129,7 +1128,7 @@ void PPCAsmParser::ProcessInstruction(MCInst &Inst,
}
}
-static std::string PPCMnemonicSpellCheck(StringRef S, uint64_t FBS,
+static std::string PPCMnemonicSpellCheck(StringRef S, const FeatureBitset &FBS,
unsigned VariantID = 0);
bool PPCAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
@@ -1148,7 +1147,7 @@ bool PPCAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
case Match_MissingFeature:
return Error(IDLoc, "instruction use requires an option to be enabled");
case Match_MnemonicFail: {
- uint64_t FBS = ComputeAvailableFeatures(getSTI().getFeatureBits());
+ FeatureBitset FBS = ComputeAvailableFeatures(getSTI().getFeatureBits());
std::string Suggestion = PPCMnemonicSpellCheck(
((PPCOperand &)*Operands[0]).getToken(), FBS);
return Error(IDLoc, "invalid instruction" + Suggestion,
diff --git a/contrib/llvm/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp b/contrib/llvm/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp
index cce239cac970..7a8af57961cb 100644
--- a/contrib/llvm/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp
@@ -1,13 +1,13 @@
//===------ PPCDisassembler.cpp - Disassembler for PowerPC ------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#include "MCTargetDesc/PPCMCTargetDesc.h"
+#include "TargetInfo/PowerPCTargetInfo.h"
#include "llvm/MC/MCDisassembler/MCDisassembler.h"
#include "llvm/MC/MCFixedLenDisassembler.h"
#include "llvm/MC/MCInst.h"
@@ -86,12 +86,6 @@ static DecodeStatus DecodeCRRCRegisterClass(MCInst &Inst, uint64_t RegNo,
return decodeRegisterClass(Inst, RegNo, CRRegs);
}
-static DecodeStatus DecodeCRRC0RegisterClass(MCInst &Inst, uint64_t RegNo,
- uint64_t Address,
- const void *Decoder) {
- return decodeRegisterClass(Inst, RegNo, CRRegs);
-}
-
static DecodeStatus DecodeCRBITRCRegisterClass(MCInst &Inst, uint64_t RegNo,
uint64_t Address,
const void *Decoder) {
diff --git a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp
index a405dd70c307..8778e916f7e4 100644
--- a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp
@@ -1,9 +1,8 @@
//===-- PPCAsmBackend.cpp - PPC Assembler Backend -------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -29,6 +28,7 @@ static uint64_t adjustFixupValue(unsigned Kind, uint64_t Value) {
switch (Kind) {
default:
llvm_unreachable("Unknown fixup kind!");
+ case FK_NONE:
case FK_Data_1:
case FK_Data_2:
case FK_Data_4:
@@ -52,6 +52,8 @@ static unsigned getFixupKindNumBytes(unsigned Kind) {
switch (Kind) {
default:
llvm_unreachable("Unknown fixup kind!");
+ case FK_NONE:
+ return 0;
case FK_Data_1:
return 1;
case FK_Data_2:
@@ -74,10 +76,12 @@ static unsigned getFixupKindNumBytes(unsigned Kind) {
namespace {
class PPCAsmBackend : public MCAsmBackend {
- const Target &TheTarget;
+protected:
+ Triple TT;
public:
- PPCAsmBackend(const Target &T, support::endianness Endian)
- : MCAsmBackend(Endian), TheTarget(T) {}
+ PPCAsmBackend(const Target &T, const Triple &TT)
+ : MCAsmBackend(TT.isLittleEndian() ? support::little : support::big),
+ TT(TT) {}
unsigned getNumFixupKinds() const override {
return PPC::NumTargetFixupKinds;
@@ -136,9 +140,11 @@ public:
bool shouldForceRelocation(const MCAssembler &Asm, const MCFixup &Fixup,
const MCValue &Target) override {
- switch ((PPC::Fixups)Fixup.getKind()) {
+ switch ((unsigned)Fixup.getKind()) {
default:
return false;
+ case FK_NONE:
+ return true;
case PPC::fixup_ppc_br24:
case PPC::fixup_ppc_br24abs:
// If the target symbol has a local entry point we must not attempt
@@ -187,59 +193,76 @@ public:
return true;
}
-
- unsigned getPointerSize() const {
- StringRef Name = TheTarget.getName();
- if (Name == "ppc64" || Name == "ppc64le") return 8;
- assert(Name == "ppc32" && "Unknown target name!");
- return 4;
- }
};
} // end anonymous namespace
// FIXME: This should be in a separate file.
namespace {
- class DarwinPPCAsmBackend : public PPCAsmBackend {
- public:
- DarwinPPCAsmBackend(const Target &T) : PPCAsmBackend(T, support::big) { }
-
- std::unique_ptr<MCObjectTargetWriter>
- createObjectTargetWriter() const override {
- bool is64 = getPointerSize() == 8;
- return createPPCMachObjectWriter(
- /*Is64Bit=*/is64,
- (is64 ? MachO::CPU_TYPE_POWERPC64 : MachO::CPU_TYPE_POWERPC),
- MachO::CPU_SUBTYPE_POWERPC_ALL);
- }
- };
-
- class ELFPPCAsmBackend : public PPCAsmBackend {
- uint8_t OSABI;
- public:
- ELFPPCAsmBackend(const Target &T, support::endianness Endian,
- uint8_t OSABI)
- : PPCAsmBackend(T, Endian), OSABI(OSABI) {}
-
- std::unique_ptr<MCObjectTargetWriter>
- createObjectTargetWriter() const override {
- bool is64 = getPointerSize() == 8;
- return createPPCELFObjectWriter(is64, OSABI);
- }
- };
+
+class DarwinPPCAsmBackend : public PPCAsmBackend {
+public:
+ DarwinPPCAsmBackend(const Target &T, const Triple &TT)
+ : PPCAsmBackend(T, TT) {}
+
+ std::unique_ptr<MCObjectTargetWriter>
+ createObjectTargetWriter() const override {
+ bool Is64 = TT.isPPC64();
+ return createPPCMachObjectWriter(
+ /*Is64Bit=*/Is64,
+ (Is64 ? MachO::CPU_TYPE_POWERPC64 : MachO::CPU_TYPE_POWERPC),
+ MachO::CPU_SUBTYPE_POWERPC_ALL);
+ }
+};
+
+class ELFPPCAsmBackend : public PPCAsmBackend {
+public:
+ ELFPPCAsmBackend(const Target &T, const Triple &TT) : PPCAsmBackend(T, TT) {}
+
+ std::unique_ptr<MCObjectTargetWriter>
+ createObjectTargetWriter() const override {
+ uint8_t OSABI = MCELFObjectTargetWriter::getOSABI(TT.getOS());
+ bool Is64 = TT.isPPC64();
+ return createPPCELFObjectWriter(Is64, OSABI);
+ }
+
+ Optional<MCFixupKind> getFixupKind(StringRef Name) const override;
+};
+
+class XCOFFPPCAsmBackend : public PPCAsmBackend {
+public:
+ XCOFFPPCAsmBackend(const Target &T, const Triple &TT)
+ : PPCAsmBackend(T, TT) {}
+
+ std::unique_ptr<MCObjectTargetWriter>
+ createObjectTargetWriter() const override {
+ return createPPCXCOFFObjectWriter(TT.isArch64Bit());
+ }
+};
} // end anonymous namespace
+Optional<MCFixupKind> ELFPPCAsmBackend::getFixupKind(StringRef Name) const {
+ if (TT.isPPC64()) {
+ if (Name == "R_PPC64_NONE")
+ return FK_NONE;
+ } else {
+ if (Name == "R_PPC_NONE")
+ return FK_NONE;
+ }
+ return MCAsmBackend::getFixupKind(Name);
+}
+
MCAsmBackend *llvm::createPPCAsmBackend(const Target &T,
const MCSubtargetInfo &STI,
const MCRegisterInfo &MRI,
const MCTargetOptions &Options) {
const Triple &TT = STI.getTargetTriple();
if (TT.isOSDarwin())
- return new DarwinPPCAsmBackend(T);
+ return new DarwinPPCAsmBackend(T, TT);
+
+ if (TT.isOSBinFormatXCOFF())
+ return new XCOFFPPCAsmBackend(T, TT);
- uint8_t OSABI = MCELFObjectTargetWriter::getOSABI(TT.getOS());
- bool IsLittleEndian = TT.getArch() == Triple::ppc64le;
- return new ELFPPCAsmBackend(
- T, IsLittleEndian ? support::little : support::big, OSABI);
+ return new ELFPPCAsmBackend(T, TT);
}
diff --git a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp
index a3caf9a7a5ee..042ddf48d5df 100644
--- a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp
@@ -1,9 +1,8 @@
//===-- PPCELFObjectWriter.cpp - PPC ELF Writer ---------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -134,6 +133,9 @@ unsigned PPCELFObjectWriter::getRelocType(MCContext &Ctx, const MCValue &Target,
} else {
switch ((unsigned)Fixup.getKind()) {
default: llvm_unreachable("invalid fixup kind!");
+ case FK_NONE:
+ Type = ELF::R_PPC_NONE;
+ break;
case PPC::fixup_ppc_br24abs:
Type = ELF::R_PPC_ADDR24;
break;
diff --git a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCFixupKinds.h b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCFixupKinds.h
index dce443997ea5..845489788c86 100644
--- a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCFixupKinds.h
+++ b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCFixupKinds.h
@@ -1,9 +1,8 @@
//===-- PPCFixupKinds.h - PPC Specific Fixup Entries ------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/contrib/llvm/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCInstPrinter.cpp
index 6824168b890d..0e64ae55ab1c 100644
--- a/contrib/llvm/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCInstPrinter.cpp
@@ -1,9 +1,8 @@
//===-- PPCInstPrinter.cpp - Convert PPC MCInst to assembly syntax --------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -11,7 +10,7 @@
//
//===----------------------------------------------------------------------===//
-#include "PPCInstPrinter.h"
+#include "MCTargetDesc/PPCInstPrinter.h"
#include "MCTargetDesc/PPCMCTargetDesc.h"
#include "MCTargetDesc/PPCPredicates.h"
#include "PPCInstrInfo.h"
@@ -445,13 +444,22 @@ void PPCInstPrinter::printTLSCall(const MCInst *MI, unsigned OpNo,
// On PPC64, VariantKind is VK_None, but on PPC32, it's VK_PLT, and it must
// come at the _end_ of the expression.
const MCOperand &Op = MI->getOperand(OpNo);
- const MCSymbolRefExpr &refExp = cast<MCSymbolRefExpr>(*Op.getExpr());
- O << refExp.getSymbol().getName();
+ const MCSymbolRefExpr *RefExp = nullptr;
+ const MCConstantExpr *ConstExp = nullptr;
+ if (const MCBinaryExpr *BinExpr = dyn_cast<MCBinaryExpr>(Op.getExpr())) {
+ RefExp = cast<MCSymbolRefExpr>(BinExpr->getLHS());
+ ConstExp = cast<MCConstantExpr>(BinExpr->getRHS());
+ } else
+ RefExp = cast<MCSymbolRefExpr>(Op.getExpr());
+
+ O << RefExp->getSymbol().getName();
O << '(';
printOperand(MI, OpNo+1, O);
O << ')';
- if (refExp.getKind() != MCSymbolRefExpr::VK_None)
- O << '@' << MCSymbolRefExpr::getVariantKindName(refExp.getKind());
+ if (RefExp->getKind() != MCSymbolRefExpr::VK_None)
+ O << '@' << MCSymbolRefExpr::getVariantKindName(RefExp->getKind());
+ if (ConstExp != nullptr)
+ O << '+' << ConstExp->getValue();
}
/// showRegistersWithPercentPrefix - Check if this register name should be
diff --git a/contrib/llvm/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCInstPrinter.h
index 351ccefa2da2..725ae2a7081b 100644
--- a/contrib/llvm/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h
+++ b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCInstPrinter.h
@@ -1,9 +1,8 @@
//===- PPCInstPrinter.h - Convert PPC MCInst to assembly syntax -*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -11,8 +10,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_LIB_TARGET_POWERPC_INSTPRINTER_PPCINSTPRINTER_H
-#define LLVM_LIB_TARGET_POWERPC_INSTPRINTER_PPCINSTPRINTER_H
+#ifndef LLVM_LIB_TARGET_POWERPC_MCTARGETDESC_PPCINSTPRINTER_H
+#define LLVM_LIB_TARGET_POWERPC_MCTARGETDESC_PPCINSTPRINTER_H
#include "llvm/ADT/Triple.h"
#include "llvm/MC/MCInstPrinter.h"
diff --git a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp
index fb7bf23509c7..5f0005ea1d7b 100644
--- a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp
@@ -1,9 +1,8 @@
//===-- PPCMCAsmInfo.cpp - PPC asm properties -----------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -82,3 +81,9 @@ PPCELFMCAsmInfo::PPCELFMCAsmInfo(bool is64Bit, const Triple& T) {
UseIntegratedAssembler = true;
}
+void PPCXCOFFMCAsmInfo::anchor() {}
+
+PPCXCOFFMCAsmInfo::PPCXCOFFMCAsmInfo(bool Is64Bit, const Triple &T) {
+ assert(!IsLittleEndian && "Little-endian XCOFF not supported.");
+ CodePointerSize = CalleeSaveStackSlotSize = Is64Bit ? 8 : 4;
+}
diff --git a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.h b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.h
index e252ac944d40..42cb62ad26a4 100644
--- a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.h
+++ b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.h
@@ -1,13 +1,12 @@
//===-- PPCMCAsmInfo.h - PPC asm properties --------------------*- C++ -*--===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
-// This file contains the declaration of the MCAsmInfoDarwin class.
+// This file contains the declarations of the PowerPC MCAsmInfo classes.
//
//===----------------------------------------------------------------------===//
@@ -16,6 +15,7 @@
#include "llvm/MC/MCAsmInfoDarwin.h"
#include "llvm/MC/MCAsmInfoELF.h"
+#include "llvm/MC/MCAsmInfoXCOFF.h"
namespace llvm {
class Triple;
@@ -34,6 +34,13 @@ public:
explicit PPCELFMCAsmInfo(bool is64Bit, const Triple &);
};
+class PPCXCOFFMCAsmInfo : public MCAsmInfoXCOFF {
+ virtual void anchor();
+
+public:
+ explicit PPCXCOFFMCAsmInfo(bool is64Bit, const Triple &);
+};
+
} // namespace llvm
#endif
diff --git a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp
index 8c15ade6f9c4..676efc500455 100644
--- a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp
@@ -1,9 +1,8 @@
//===-- PPCMCCodeEmitter.cpp - Convert PPC code to machine code -----------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -217,7 +216,7 @@ unsigned PPCMCCodeEmitter::getTLSRegEncoding(const MCInst &MI, unsigned OpNo,
Fixups.push_back(MCFixup::create(0, MO.getExpr(),
(MCFixupKind)PPC::fixup_ppc_nofixup));
const Triple &TT = STI.getTargetTriple();
- bool isPPC64 = TT.getArch() == Triple::ppc64 || TT.getArch() == Triple::ppc64le;
+ bool isPPC64 = TT.isPPC64();
return CTX.getRegisterInfo()->getEncodingValue(isPPC64 ? PPC::X13 : PPC::R2);
}
diff --git a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.h b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.h
index a4bcff4b9450..1324faa12553 100644
--- a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.h
+++ b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.h
@@ -1,9 +1,8 @@
//===-- PPCMCCodeEmitter.h - Convert PPC code to machine code -------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -99,9 +98,10 @@ public:
unsigned getInstSizeInBytes(const MCInst &MI) const;
private:
- uint64_t computeAvailableFeatures(const FeatureBitset &FB) const;
- void verifyInstructionPredicates(const MCInst &MI,
- uint64_t AvailableFeatures) const;
+ FeatureBitset computeAvailableFeatures(const FeatureBitset &FB) const;
+ void
+ verifyInstructionPredicates(const MCInst &MI,
+ const FeatureBitset &AvailableFeatures) const;
};
} // namespace llvm
diff --git a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.cpp b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.cpp
index 32e6a0bdd65f..d467f5c4a439 100644
--- a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.cpp
@@ -1,9 +1,8 @@
//===-- PPCMCExpr.cpp - PPC specific MC expression classes ----------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.h b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.h
index 8bb4791d13dd..449e2c34f74d 100644
--- a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.h
+++ b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.h
@@ -1,9 +1,8 @@
//===-- PPCMCExpr.h - PPC specific MC expression classes --------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp
index 78609ef3d4e0..90c3c8d20edb 100644
--- a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp
@@ -1,9 +1,8 @@
//===-- PPCMCTargetDesc.cpp - PowerPC Target Descriptions -----------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -12,9 +11,10 @@
//===----------------------------------------------------------------------===//
#include "MCTargetDesc/PPCMCTargetDesc.h"
-#include "InstPrinter/PPCInstPrinter.h"
+#include "MCTargetDesc/PPCInstPrinter.h"
#include "MCTargetDesc/PPCMCAsmInfo.h"
#include "PPCTargetStreamer.h"
+#include "TargetInfo/PowerPCTargetInfo.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/Triple.h"
@@ -48,9 +48,9 @@ using namespace llvm;
#define GET_REGINFO_MC_DESC
#include "PPCGenRegisterInfo.inc"
-// Pin the vtable to this file.
PPCTargetStreamer::PPCTargetStreamer(MCStreamer &S) : MCTargetStreamer(S) {}
+// Pin the vtable to this file.
PPCTargetStreamer::~PPCTargetStreamer() = default;
static MCInstrInfo *createPPCMCInstrInfo() {
@@ -83,6 +83,8 @@ static MCAsmInfo *createPPCMCAsmInfo(const MCRegisterInfo &MRI,
MCAsmInfo *MAI;
if (TheTriple.isOSDarwin())
MAI = new PPCMCAsmInfoDarwin(isPPC64, TheTriple);
+ else if (TheTriple.isOSBinFormatXCOFF())
+ MAI = new PPCXCOFFMCAsmInfo(isPPC64, TheTriple);
else
MAI = new PPCELFMCAsmInfo(isPPC64, TheTriple);
@@ -235,6 +237,27 @@ public:
}
};
+class PPCTargetXCOFFStreamer : public PPCTargetStreamer {
+public:
+ PPCTargetXCOFFStreamer(MCStreamer &S) : PPCTargetStreamer(S) {}
+
+ void emitTCEntry(const MCSymbol &S) override {
+ report_fatal_error("TOC entries not supported yet.");
+ }
+
+ void emitMachine(StringRef CPU) override {
+ llvm_unreachable("Machine pseudo-ops are invalid for XCOFF.");
+ }
+
+ void emitAbiVersion(int AbiVersion) override {
+ llvm_unreachable("ABI-version pseudo-ops are invalid for XCOFF.");
+ }
+
+ void emitLocalEntry(MCSymbolELF *S, const MCExpr *LocalOffset) override {
+ llvm_unreachable("Local-entry pseudo-ops are invalid for XCOFF.");
+ }
+};
+
} // end anonymous namespace
static MCTargetStreamer *createAsmTargetStreamer(MCStreamer &S,
@@ -249,6 +272,8 @@ createObjectTargetStreamer(MCStreamer &S, const MCSubtargetInfo &STI) {
const Triple &TT = STI.getTargetTriple();
if (TT.isOSBinFormatELF())
return new PPCTargetELFStreamer(S);
+ if (TT.isOSBinFormatXCOFF())
+ return new PPCTargetXCOFFStreamer(S);
return new PPCTargetMachOStreamer(S);
}
diff --git a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h
index d6e450cba0d7..74b67bd2e928 100644
--- a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h
+++ b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h
@@ -1,9 +1,8 @@
//===-- PPCMCTargetDesc.h - PowerPC Target Descriptions ---------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -37,10 +36,6 @@ class Triple;
class StringRef;
class raw_pwrite_stream;
-Target &getThePPC32Target();
-Target &getThePPC64Target();
-Target &getThePPC64LETarget();
-
MCCodeEmitter *createPPCMCCodeEmitter(const MCInstrInfo &MCII,
const MCRegisterInfo &MRI,
MCContext &Ctx);
@@ -56,6 +51,9 @@ std::unique_ptr<MCObjectTargetWriter> createPPCELFObjectWriter(bool Is64Bit,
std::unique_ptr<MCObjectTargetWriter>
createPPCMachObjectWriter(bool Is64Bit, uint32_t CPUType, uint32_t CPUSubtype);
+/// Construct a PPC XCOFF object writer.
+std::unique_ptr<MCObjectTargetWriter> createPPCXCOFFObjectWriter(bool Is64Bit);
+
/// Returns true iff Val consists of one contiguous run of 1s with any number of
/// 0s on either side. The 1s are allowed to wrap from LSB to MSB, so
/// 0x000FFF0, 0x0000FFFF, and 0xFF0000FF are all runs. 0x0F0F0000 is not,
diff --git a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMachObjectWriter.cpp b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMachObjectWriter.cpp
index ff6cf584da23..4cf7fd15fa75 100644
--- a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMachObjectWriter.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMachObjectWriter.cpp
@@ -1,9 +1,8 @@
//===-- PPCMachObjectWriter.cpp - PPC Mach-O Writer -----------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.cpp b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.cpp
index c2987b641c04..284e52c298a2 100644
--- a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.cpp
@@ -1,9 +1,8 @@
//===-- PPCPredicates.cpp - PPC Branch Predicate Information --------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.h b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.h
index 481ba3f09cc7..d686a8ea2a22 100644
--- a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.h
+++ b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.h
@@ -1,9 +1,8 @@
//===-- PPCPredicates.h - PPC Branch Predicate Information ------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCXCOFFObjectWriter.cpp b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCXCOFFObjectWriter.cpp
new file mode 100644
index 000000000000..9c661286d455
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCXCOFFObjectWriter.cpp
@@ -0,0 +1,29 @@
+//===-- PPCXCOFFObjectWriter.cpp - PowerPC XCOFF Writer -------------------===//
+//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "PPCMCTargetDesc.h"
+#include "llvm/MC/MCXCOFFObjectWriter.h"
+
+using namespace llvm;
+
+namespace {
+class PPCXCOFFObjectWriter : public MCXCOFFObjectTargetWriter {
+
+public:
+ PPCXCOFFObjectWriter(bool Is64Bit);
+};
+} // end anonymous namespace
+
+PPCXCOFFObjectWriter::PPCXCOFFObjectWriter(bool Is64Bit)
+ : MCXCOFFObjectTargetWriter(Is64Bit) {}
+
+std::unique_ptr<MCObjectTargetWriter>
+llvm::createPPCXCOFFObjectWriter(bool Is64Bit) {
+ return llvm::make_unique<PPCXCOFFObjectWriter>(Is64Bit);
+}
diff --git a/contrib/llvm/lib/Target/PowerPC/P9InstrResources.td b/contrib/llvm/lib/Target/PowerPC/P9InstrResources.td
index 17c37964c562..2a10322d3f49 100644
--- a/contrib/llvm/lib/Target/PowerPC/P9InstrResources.td
+++ b/contrib/llvm/lib/Target/PowerPC/P9InstrResources.td
@@ -1,22 +1,21 @@
-//===- P9InstrResources.td - P9 Instruction Resource Defs -*- tablegen -*-===//
+//===- P9InstrResources.td - P9 Instruction Resource Defs -*- tablegen -*-==//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
-// This file defines the resources required by P9 instructions. This is part
-// P9 processor model used for instruction scheduling. This file should contain
-// all of the instructions that may be used on Power 9. This is not just
-// instructions that are new on Power 9 but also instructions that were
+// This file defines the resources required by P9 instructions. This is part of
+// the P9 processor model used for instruction scheduling. This file should
+// contain all the instructions that may be used on Power 9. This is not
+// just instructions that are new on Power 9 but also instructions that were
// available on earlier architectures and are still used in Power 9.
//
// The makeup of the P9 CPU is modeled as follows:
// - Each CPU is made up of two superslices.
// - Each superslice is made up of two slices. Therefore, there are 4 slices
-// for each CPU.
+// for each CPU.
// - Up to 6 instructions can be dispatched to each CPU. Three per superslice.
// - Each CPU has:
// - One CY (Crypto) unit P9_CY_*
@@ -33,9 +32,8 @@
// Two cycle ALU vector operation that uses an entire superslice.
// Uses both ALU units (the even ALUE and odd ALUO units), two pipelines
-// (EXECE, EXECO) and all three dispatches (DISP) to the given superslice.
-def : InstRW<[P9_ALUE_2C, P9_ALUO_2C, IP_EXECE_1C, IP_EXECO_1C,
- DISP_1C, DISP_1C, DISP_1C],
+// (EXECE, EXECO) and 1 dispatch (DISP) to the given superslice.
+def : InstRW<[P9_ALUE_2C, P9_ALUO_2C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C],
(instrs
(instregex "VADDU(B|H|W|D)M$"),
(instregex "VAND(C)?$"),
@@ -85,9 +83,9 @@ def : InstRW<[P9_ALUE_2C, P9_ALUO_2C, IP_EXECE_1C, IP_EXECO_1C,
)>;
// Restricted Dispatch ALU operation for 3 cycles. The operation runs on a
-// slingle slice. However, since it is Restricted it requires all 3 dispatches
+// single slice. However, since it is Restricted, it requires all 3 dispatches
// (DISP) for that superslice.
-def : InstRW<[P9_ALU_3C, IP_EXEC_1C, DISP_1C, DISP_1C, DISP_1C],
+def : InstRW<[P9_ALU_3C, IP_EXEC_1C, DISP_3SLOTS_1C],
(instrs
(instregex "TABORT(D|W)C(I)?$"),
(instregex "MTFSB(0|1)$"),
@@ -103,7 +101,7 @@ def : InstRW<[P9_ALU_3C, IP_EXEC_1C, DISP_1C, DISP_1C, DISP_1C],
)>;
// Standard Dispatch ALU operation for 3 cycles. Only one slice used.
-def : InstRW<[P9_ALU_3C, IP_EXEC_1C, DISP_1C, DISP_1C],
+def : InstRW<[P9_ALU_3C, IP_EXEC_1C, DISP_1C],
(instrs
(instregex "XSMAX(C|J)?DP$"),
(instregex "XSMIN(C|J)?DP$"),
@@ -120,11 +118,11 @@ def : InstRW<[P9_ALU_3C, IP_EXEC_1C, DISP_1C, DISP_1C],
)>;
// Standard Dispatch ALU operation for 2 cycles. Only one slice used.
-def : InstRW<[P9_ALU_2C, IP_EXEC_1C, DISP_1C, DISP_1C],
+def : InstRW<[P9_ALU_2C, IP_EXEC_1C, DISP_1C],
(instrs
(instregex "S(L|R)D$"),
(instregex "SRAD(I)?$"),
- (instregex "EXTSWSLI$"),
+ (instregex "EXTSWSLI_32_64$"),
(instregex "MFV(S)?RD$"),
(instregex "MTVSRD$"),
(instregex "MTVSRW(A|Z)$"),
@@ -160,6 +158,7 @@ def : InstRW<[P9_ALU_2C, IP_EXEC_1C, DISP_1C, DISP_1C],
XSNEGDP,
XSCPSGNDP,
MFVSRWZ,
+ EXTSWSLI,
SRADI_32,
RLDIC,
RFEBB,
@@ -171,9 +170,9 @@ def : InstRW<[P9_ALU_2C, IP_EXEC_1C, DISP_1C, DISP_1C],
)>;
// Restricted Dispatch ALU operation for 2 cycles. The operation runs on a
-// slingle slice. However, since it is Restricted it requires all 3 dispatches
-// (DISP) for that superslice.
-def : InstRW<[P9_ALU_2C, IP_EXEC_1C, DISP_1C, DISP_1C, DISP_1C],
+// single slice. However, since it is Restricted, it requires all 3 dispatches
+// (DISP) for that superslice.
+def : InstRW<[P9_ALU_2C, IP_EXEC_1C, DISP_3SLOTS_1C],
(instrs
(instregex "RLDC(L|R)$"),
(instregex "RLWIMI(8)?$"),
@@ -200,9 +199,8 @@ def : InstRW<[P9_ALU_2C, IP_EXEC_1C, DISP_1C, DISP_1C, DISP_1C],
// Three cycle ALU vector operation that uses an entire superslice.
// Uses both ALU units (the even ALUE and odd ALUO units), two pipelines
-// (EXECE, EXECO) and all three dispatches (DISP) to the given superslice.
-def : InstRW<[P9_ALUE_3C, P9_ALUO_3C, IP_EXECE_1C, IP_EXECO_1C,
- DISP_1C, DISP_1C, DISP_1C],
+// (EXECE, EXECO) and 1 dispatch (DISP) to the given superslice.
+def : InstRW<[P9_ALUE_3C, P9_ALUO_3C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C],
(instrs
(instregex "M(T|F)VSCR$"),
(instregex "VCMPNEZ(B|H|W)$"),
@@ -285,10 +283,9 @@ def : InstRW<[P9_ALUE_3C, P9_ALUO_3C, IP_EXECE_1C, IP_EXECO_1C,
)>;
// 7 cycle DP vector operation that uses an entire superslice.
-// Uses both DP units (the even DPE and odd DPO units), two pipelines
-// (EXECE, EXECO) and all three dispatches (DISP) to the given superslice.
-def : InstRW<[P9_DPE_7C, P9_DPO_7C, IP_EXECE_1C, IP_EXECO_1C,
- DISP_1C, DISP_1C, DISP_1C],
+// Uses both DP units (the even DPE and odd DPO units), two pipelines (EXECE,
+// EXECO) and all three dispatches (DISP) to the given superslice.
+def : InstRW<[P9_DPE_7C, P9_DPO_7C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C],
(instrs
VADDFP,
VCTSXS,
@@ -395,18 +392,17 @@ def : InstRW<[P9_DPE_7C, P9_DPO_7C, IP_EXECE_1C, IP_EXECO_1C,
VSUMSWS
)>;
-
// 5 cycle Restricted DP operation. One DP unit, one EXEC pipeline and all three
-// dispatch units for the superslice.
-def : InstRW<[P9_DP_5C, IP_EXEC_1C, DISP_1C, DISP_1C, DISP_1C],
+// dispatch units for the superslice.
+def : InstRW<[P9_DP_5C, IP_EXEC_1C, DISP_3SLOTS_1C],
(instrs
- (instregex "MADD(HD|HDU|LD)$"),
+ (instregex "MADD(HD|HDU|LD|LD8)$"),
(instregex "MUL(HD|HW|LD|LI|LI8|LW)(U)?$")
)>;
// 7 cycle Restricted DP operation. One DP unit, one EXEC pipeline and all three
-// dispatch units for the superslice.
-def : InstRW<[P9_DP_7C, IP_EXEC_1C, DISP_1C, DISP_1C, DISP_1C],
+// dispatch units for the superslice.
+def : InstRW<[P9_DP_7C, IP_EXEC_1C, DISP_3SLOTS_1C],
(instrs
FRSP,
(instregex "FRI(N|P|Z|M)(D|S)$"),
@@ -448,26 +444,26 @@ def : InstRW<[P9_DP_7C, IP_EXEC_1C, DISP_1C, DISP_1C, DISP_1C],
)>;
// 7 cycle Restricted DP operation and one 3 cycle ALU operation.
-// These operations can be done in parallel.
-// The DP is restricted so we need a full 5 dispatches.
+// These operations can be done in parallel. The DP is restricted so we need a
+// full 4 dispatches.
def : InstRW<[P9_DP_7C, P9_ALU_3C, IP_EXEC_1C, IP_EXEC_1C,
- DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C],
+ DISP_3SLOTS_1C, DISP_1C],
(instrs
(instregex "FSEL(D|S)o$")
)>;
// 5 Cycle Restricted DP operation and one 2 cycle ALU operation.
def : InstRW<[P9_DPOpAndALUOp_7C, IP_EXEC_1C, IP_EXEC_1C,
- DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C],
+ DISP_3SLOTS_1C, DISP_1C],
(instrs
(instregex "MUL(H|L)(D|W)(U)?o$")
)>;
// 7 cycle Restricted DP operation and one 3 cycle ALU operation.
-// These operations must be done sequentially.
-// The DP is restricted so we need a full 5 dispatches.
+// These operations must be done sequentially.The DP is restricted so we need a
+// full 4 dispatches.
def : InstRW<[P9_DPOpAndALU2Op_10C, IP_EXEC_1C, IP_EXEC_1C,
- DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C],
+ DISP_3SLOTS_1C, DISP_1C],
(instrs
(instregex "FRI(N|P|Z|M)(D|S)o$"),
(instregex "FRE(S)?o$"),
@@ -483,8 +479,8 @@ def : InstRW<[P9_DPOpAndALU2Op_10C, IP_EXEC_1C, IP_EXEC_1C,
FRSPo
)>;
-// 7 cycle DP operation. One DP unit, one EXEC pipeline and two dispatch units.
-def : InstRW<[P9_DP_7C, IP_EXEC_1C, DISP_1C, DISP_1C],
+// 7 cycle DP operation. One DP unit, one EXEC pipeline and 1 dispatch units.
+def : InstRW<[P9_DP_7C, IP_EXEC_1C, DISP_1C],
(instrs
XSADDDP,
XSADDSP,
@@ -520,9 +516,9 @@ def : InstRW<[P9_DP_7C, IP_EXEC_1C, DISP_1C, DISP_1C],
)>;
// Three Cycle PM operation. Only one PM unit per superslice so we use the whole
-// superslice. That includes both exec pipelines (EXECO, EXECE) and all three
-// dispatches.
-def : InstRW<[P9_PM_3C, IP_EXECO_1C, IP_EXECE_1C, DISP_1C, DISP_1C, DISP_1C],
+// superslice. That includes both exec pipelines (EXECO, EXECE) and one
+// dispatch.
+def : InstRW<[P9_PM_3C, IP_EXECO_1C, IP_EXECE_1C, DISP_1C],
(instrs
(instregex "LVS(L|R)$"),
(instregex "VSPLTIS(W|H|B)$"),
@@ -628,9 +624,9 @@ def : InstRW<[P9_PM_3C, IP_EXECO_1C, IP_EXECE_1C, DISP_1C, DISP_1C, DISP_1C],
)>;
// 12 Cycle DFU operation. Only one DFU unit per CPU so we use a whole
-// superslice. That includes both exec pipelines (EXECO, EXECE) and all three
-// dispatches.
-def : InstRW<[P9_DFU_12C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C, DISP_1C, DISP_1C],
+// superslice. That includes both exec pipelines (EXECO, EXECE) and one
+// dispatch.
+def : InstRW<[P9_DFU_12C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C],
(instrs
BCDSRo,
XSADDQP,
@@ -652,17 +648,17 @@ def : InstRW<[P9_DFU_12C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C, DISP_1C, DISP_1C],
)>;
// 23 Cycle DFU operation. Only one DFU unit per CPU so we use a whole
-// superslice. That includes both exec pipelines (EXECO, EXECE) and all three
-// dispatches.
-def : InstRW<[P9_DFU_23C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C, DISP_1C, DISP_1C],
+// superslice. That includes both exec pipelines (EXECO, EXECE) and one
+// dispatch.
+def : InstRW<[P9_DFU_23C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C],
(instrs
BCDCTSQo
)>;
// 24 Cycle DFU operation. Only one DFU unit per CPU so we use a whole
-// superslice. That includes both exec pipelines (EXECO, EXECE) and all three
-// dispatches.
-def : InstRW<[P9_DFU_24C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C, DISP_1C, DISP_1C],
+// superslice. That includes both exec pipelines (EXECO, EXECE) and one
+// dispatch.
+def : InstRW<[P9_DFU_24C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C],
(instrs
XSMADDQP,
XSMADDQPO,
@@ -677,39 +673,39 @@ def : InstRW<[P9_DFU_24C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C, DISP_1C, DISP_1C],
)>;
// 37 Cycle DFU operation. Only one DFU unit per CPU so we use a whole
-// superslice. That includes both exec pipelines (EXECO, EXECE) and all three
-// dispatches.
-def : InstRW<[P9_DFU_37C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C, DISP_1C, DISP_1C],
+// superslice. That includes both exec pipelines (EXECO, EXECE) and one
+// dispatch.
+def : InstRW<[P9_DFU_37C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C],
(instrs
BCDCFSQo
)>;
// 58 Cycle DFU operation. Only one DFU unit per CPU so we use a whole
-// superslice. That includes both exec pipelines (EXECO, EXECE) and all three
-// dispatches.
-def : InstRW<[P9_DFU_58C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C, DISP_1C, DISP_1C],
+// superslice. That includes both exec pipelines (EXECO, EXECE) and one
+// dispatch.
+def : InstRW<[P9_DFU_58C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C],
(instrs
XSDIVQP,
XSDIVQPO
)>;
// 76 Cycle DFU operation. Only one DFU unit per CPU so we use a whole
-// superslice. That includes both exec pipelines (EXECO, EXECE) and all three
-// dispatches.
-def : InstRW<[P9_DFU_76C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C, DISP_1C, DISP_1C],
+// superslice. That includes both exec pipelines (EXECO, EXECE) and all three
+// dispatches.
+def : InstRW<[P9_DFU_76C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C],
(instrs
XSSQRTQP,
XSSQRTQPO
)>;
// 6 Cycle Load uses a single slice.
-def : InstRW<[P9_LS_6C, IP_AGEN_1C, DISP_1C, DISP_1C],
+def : InstRW<[P9_LS_6C, IP_AGEN_1C, DISP_1C],
(instrs
(instregex "LXVL(L)?")
)>;
// 5 Cycle Load uses a single slice.
-def : InstRW<[P9_LS_5C, IP_AGEN_1C, DISP_1C, DISP_1C],
+def : InstRW<[P9_LS_5C, IP_AGEN_1C, DISP_1C],
(instrs
(instregex "LVE(B|H|W)X$"),
(instregex "LVX(L)?"),
@@ -728,7 +724,7 @@ def : InstRW<[P9_LS_5C, IP_AGEN_1C, DISP_1C, DISP_1C],
)>;
// 4 Cycle Load uses a single slice.
-def : InstRW<[P9_LS_4C, IP_AGEN_1C, DISP_1C, DISP_1C],
+def : InstRW<[P9_LS_4C, IP_AGEN_1C, DISP_1C],
(instrs
(instregex "DCB(F|T|ST)(EP)?$"),
(instregex "DCBZ(L)?(EP)?$"),
@@ -757,8 +753,8 @@ def : InstRW<[P9_LS_4C, IP_AGEN_1C, DISP_1C, DISP_1C],
)>;
// 4 Cycle Restricted load uses a single slice but the dispatch for the whole
-// superslice.
-def : InstRW<[P9_LS_4C, IP_AGEN_1C, DISP_1C, DISP_1C, DISP_1C],
+// superslice.
+def : InstRW<[P9_LS_4C, IP_AGEN_1C, DISP_3SLOTS_1C],
(instrs
LFIWZX,
LFDX,
@@ -768,7 +764,7 @@ def : InstRW<[P9_LS_4C, IP_AGEN_1C, DISP_1C, DISP_1C, DISP_1C],
// Cracked Load Instructions.
// Load instructions that can be done in parallel.
def : InstRW<[P9_LS_4C, P9_LS_4C, IP_AGEN_1C, IP_AGEN_1C,
- DISP_1C, DISP_1C, DISP_1C, DISP_1C],
+ DISP_PAIR_1C],
(instrs
SLBIA,
SLBIE,
@@ -782,17 +778,26 @@ def : InstRW<[P9_LS_4C, P9_LS_4C, IP_AGEN_1C, IP_AGEN_1C,
// Requires Load and ALU pieces totaling 6 cycles. The Load and ALU
// operations can be run in parallel.
def : InstRW<[P9_LS_4C, P9_ALU_2C, IP_EXEC_1C, IP_AGEN_1C,
- DISP_1C, DISP_1C, DISP_1C, DISP_1C],
+ DISP_PAIR_1C, DISP_PAIR_1C],
+ (instrs
+ (instregex "L(W|H)ZU(X)?(8)?$")
+)>;
+
+// Cracked TEND Instruction.
+// Requires Load and ALU pieces totaling 6 cycles. The Load and ALU
+// operations can be run in parallel.
+def : InstRW<[P9_LS_4C, P9_ALU_2C, IP_EXEC_1C, IP_AGEN_1C,
+ DISP_1C, DISP_1C],
(instrs
- (instregex "L(W|H)ZU(X)?(8)?$"),
TEND
)>;
+
// Cracked Store Instruction
// Consecutive Store and ALU instructions. The store is restricted and requires
// three dispatches.
def : InstRW<[P9_StoreAndALUOp_3C, IP_EXEC_1C, IP_EXEC_1C, IP_AGEN_1C,
- DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C],
+ DISP_3SLOTS_1C, DISP_1C],
(instrs
(instregex "ST(B|H|W|D)CX$")
)>;
@@ -800,16 +805,16 @@ def : InstRW<[P9_StoreAndALUOp_3C, IP_EXEC_1C, IP_EXEC_1C, IP_AGEN_1C,
// Cracked Load Instruction.
// Two consecutive load operations for a total of 8 cycles.
def : InstRW<[P9_LoadAndLoadOp_8C, IP_AGEN_1C, IP_AGEN_1C,
- DISP_1C, DISP_1C, DISP_1C, DISP_1C],
+ DISP_1C, DISP_1C],
(instrs
LDMX
)>;
// Cracked Load instruction.
// Requires consecutive Load and ALU pieces totaling 6 cycles. The Load and ALU
-// operations cannot be done at the same time and so their latencies are added.
+// operations cannot be done at the same time and so their latencies are added.
def : InstRW<[P9_LoadAndALUOp_6C, IP_EXEC_1C, IP_AGEN_1C,
- DISP_1C, DISP_1C, DISP_1C, DISP_1C],
+ DISP_1C, DISP_1C],
(instrs
(instregex "LHA(X)?(8)?$"),
(instregex "CP_PASTE(8)?o$"),
@@ -819,20 +824,19 @@ def : InstRW<[P9_LoadAndALUOp_6C, IP_EXEC_1C, IP_AGEN_1C,
// Cracked Restricted Load instruction.
// Requires consecutive Load and ALU pieces totaling 6 cycles. The Load and ALU
-// operations cannot be done at the same time and so their latencies are added.
+// operations cannot be done at the same time and so their latencies are added.
// Full 6 dispatches are required as this is both cracked and restricted.
def : InstRW<[P9_LoadAndALUOp_6C, IP_EXEC_1C, IP_AGEN_1C,
- DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C],
+ DISP_3SLOTS_1C, DISP_3SLOTS_1C],
(instrs
LFIWAX
)>;
// Cracked Load instruction.
// Requires consecutive Load and ALU pieces totaling 7 cycles. The Load and ALU
-// operations cannot be done at the same time and so their latencies are added.
+// operations cannot be done at the same time and so their latencies are added.
// Full 4 dispatches are required as this is a cracked instruction.
-def : InstRW<[P9_LoadAndALUOp_7C, IP_AGEN_1C, IP_EXEC_1C,
- DISP_1C, DISP_1C, DISP_1C, DISP_1C],
+def : InstRW<[P9_LoadAndALUOp_7C, IP_AGEN_1C, IP_EXEC_1C, DISP_1C, DISP_1C],
(instrs
LXSIWAX,
LIWAX
@@ -844,7 +848,7 @@ def : InstRW<[P9_LoadAndALUOp_7C, IP_AGEN_1C, IP_EXEC_1C,
// their latencies are added.
// Full 6 dispatches are required as this is a restricted instruction.
def : InstRW<[P9_LoadAndALU2Op_7C, IP_AGEN_1C, IP_EXEC_1C,
- DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C],
+ DISP_3SLOTS_1C, DISP_3SLOTS_1C],
(instrs
LFSX,
LFS
@@ -852,10 +856,9 @@ def : InstRW<[P9_LoadAndALU2Op_7C, IP_AGEN_1C, IP_EXEC_1C,
// Cracked Load instruction.
// Requires consecutive Load and ALU pieces totaling 8 cycles. The Load and ALU
-// operations cannot be done at the same time and so their latencies are added.
+// operations cannot be done at the same time and so their latencies are added.
// Full 4 dispatches are required as this is a cracked instruction.
-def : InstRW<[P9_LoadAndALU2Op_8C, IP_AGEN_1C, IP_EXEC_1C,
- DISP_1C, DISP_1C, DISP_1C, DISP_1C],
+def : InstRW<[P9_LoadAndALU2Op_8C, IP_AGEN_1C, IP_EXEC_1C, DISP_1C, DISP_1C],
(instrs
LXSSP,
LXSSPX,
@@ -866,7 +869,7 @@ def : InstRW<[P9_LoadAndALU2Op_8C, IP_AGEN_1C, IP_EXEC_1C,
// Cracked 3-Way Load Instruction
// Load with two ALU operations that depend on each other
def : InstRW<[P9_LoadAndALUOp_6C, P9_ALU_2C, IP_AGEN_1C, IP_EXEC_1C, IP_EXEC_1C,
- DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C],
+ DISP_PAIR_1C, DISP_PAIR_1C, DISP_1C],
(instrs
(instregex "LHAU(X)?(8)?$"),
LWAUX
@@ -874,12 +877,11 @@ def : InstRW<[P9_LoadAndALUOp_6C, P9_ALU_2C, IP_AGEN_1C, IP_EXEC_1C, IP_EXEC_1C,
// Cracked Load that requires the PM resource.
// Since the Load and the PM cannot be done at the same time the latencies are
-// added. Requires 8 cycles.
-// Since the PM requires the full superslice we need both EXECE, EXECO pipelines
-// as well as 3 dispatches for the PM. The Load requires the remaining 2
-// dispatches.
+// added. Requires 8 cycles. Since the PM requires the full superslice we need
+// both EXECE, EXECO pipelines as well as 1 dispatch for the PM. The Load
+// requires the remaining 1 dispatch.
def : InstRW<[P9_LoadAndPMOp_8C, IP_AGEN_1C, IP_EXECE_1C, IP_EXECO_1C,
- DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C],
+ DISP_1C, DISP_1C],
(instrs
LXVH8X,
LXVDSX,
@@ -887,8 +889,8 @@ def : InstRW<[P9_LoadAndPMOp_8C, IP_AGEN_1C, IP_EXECE_1C, IP_EXECO_1C,
)>;
// Single slice Restricted store operation. The restricted operation requires
-// all three dispatches for the superslice.
-def : InstRW<[P9_LS_1C, IP_EXEC_1C, IP_AGEN_1C, DISP_1C, DISP_1C, DISP_1C],
+// all three dispatches for the superslice.
+def : InstRW<[P9_LS_1C, IP_EXEC_1C, IP_AGEN_1C, DISP_3SLOTS_1C],
(instrs
(instregex "STF(S|D|IWX|SX|DX)$"),
(instregex "STXS(D|DX|SPX|IWX|IBX|IHX|SP)(v)?$"),
@@ -905,10 +907,9 @@ def : InstRW<[P9_LS_1C, IP_EXEC_1C, IP_AGEN_1C, DISP_1C, DISP_1C, DISP_1C],
)>;
// Vector Store Instruction
-// Requires the whole superslice and therefore requires all three dispatches
+// Requires the whole superslice and therefore requires one dispatch
// as well as both the Even and Odd exec pipelines.
-def : InstRW<[P9_LS_1C, IP_EXECE_1C, IP_EXECO_1C, IP_AGEN_1C,
- DISP_1C, DISP_1C, DISP_1C],
+def : InstRW<[P9_LS_1C, IP_EXECE_1C, IP_EXECO_1C, IP_AGEN_1C, DISP_1C],
(instrs
(instregex "STVE(B|H|W)X$"),
(instregex "STVX(L)?$"),
@@ -916,18 +917,18 @@ def : InstRW<[P9_LS_1C, IP_EXECE_1C, IP_EXECO_1C, IP_AGEN_1C,
)>;
// 5 Cycle DIV operation. Only one DIV unit per superslice so we use the whole
-// superslice. That includes both exec pipelines (EXECO, EXECE) and all three
+// superslice. That includes both exec pipelines (EXECO, EXECE) and two
// dispatches.
-def : InstRW<[P9_DIV_5C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C, DISP_1C, DISP_1C],
+def : InstRW<[P9_DIV_5C, IP_EXECE_1C, IP_EXECO_1C, DISP_EVEN_1C],
(instrs
(instregex "MTCTR(8)?(loop)?$"),
(instregex "MTLR(8)?$")
)>;
// 12 Cycle DIV operation. Only one DIV unit per superslice so we use the whole
-// superslice. That includes both exec pipelines (EXECO, EXECE) and all three
+// superslice. That includes both exec pipelines (EXECO, EXECE) and two
// dispatches.
-def : InstRW<[P9_DIV_12C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C, DISP_1C, DISP_1C],
+def : InstRW<[P9_DIV_12C, IP_EXECE_1C, IP_EXECO_1C, DISP_EVEN_1C],
(instrs
(instregex "M(T|F)VRSAVE(v)?$"),
(instregex "M(T|F)PMR$"),
@@ -938,10 +939,9 @@ def : InstRW<[P9_DIV_12C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C, DISP_1C, DISP_1C],
)>;
// 16 Cycle DIV operation. Only one DIV unit per superslice so we use the whole
-// superslice. That includes both exec pipelines (EXECO, EXECE) and all three
-// dispatches.
-def : InstRW<[P9_DIV_16C_8, IP_EXECO_1C, IP_EXECE_1C,
- DISP_1C, DISP_1C, DISP_1C],
+// superslice. That includes both exec pipelines (EXECO, EXECE) and two
+// dispatches.
+def : InstRW<[P9_DIV_16C_8, IP_EXECO_1C, IP_EXECE_1C, DISP_EVEN_1C],
(instrs
DIVW,
DIVWU,
@@ -949,10 +949,9 @@ def : InstRW<[P9_DIV_16C_8, IP_EXECO_1C, IP_EXECE_1C,
)>;
// 24 Cycle DIV operation. Only one DIV unit per superslice so we use the whole
-// superslice. That includes both exec pipelines (EXECO, EXECE) and all three
-// dispatches.
-def : InstRW<[P9_DIV_24C_8, IP_EXECO_1C, IP_EXECE_1C,
- DISP_1C, DISP_1C, DISP_1C],
+// superslice. That includes both exec pipelines (EXECO, EXECE) and two
+// dispatches.
+def : InstRW<[P9_DIV_24C_8, IP_EXECO_1C, IP_EXECE_1C, DISP_EVEN_1C],
(instrs
DIVWE,
DIVD,
@@ -964,29 +963,28 @@ def : InstRW<[P9_DIV_24C_8, IP_EXECO_1C, IP_EXECE_1C,
)>;
// 40 Cycle DIV operation. Only one DIV unit per superslice so we use the whole
-// superslice. That includes both exec pipelines (EXECO, EXECE) and all three
-// dispatches.
-def : InstRW<[P9_DIV_40C_8, IP_EXECO_1C, IP_EXECE_1C,
- DISP_1C, DISP_1C, DISP_1C],
+// superslice. That includes both exec pipelines (EXECO, EXECE) and all three
+// dispatches.
+def : InstRW<[P9_DIV_40C_8, IP_EXECO_1C, IP_EXECE_1C, DISP_EVEN_1C],
(instrs
DIVDE,
DIVDEU
)>;
// Cracked DIV and ALU operation. Requires one full slice for the ALU operation
-// and one full superslice for the DIV operation since there is only one DIV
-// per superslice. Latency of DIV plus ALU is 26.
+// and one full superslice for the DIV operation since there is only one DIV per
+// superslice. Latency of DIV plus ALU is 26.
def : InstRW<[P9_IntDivAndALUOp_18C_8, IP_EXECE_1C, IP_EXECO_1C, IP_EXEC_1C,
- DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C],
+ DISP_EVEN_1C, DISP_1C],
(instrs
(instregex "DIVW(U)?(O)?o$")
)>;
// Cracked DIV and ALU operation. Requires one full slice for the ALU operation
-// and one full superslice for the DIV operation since there is only one DIV
-// per superslice. Latency of DIV plus ALU is 26.
+// and one full superslice for the DIV operation since there is only one DIV per
+// superslice. Latency of DIV plus ALU is 26.
def : InstRW<[P9_IntDivAndALUOp_26C_8, IP_EXECE_1C, IP_EXECO_1C, IP_EXEC_1C,
- DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C],
+ DISP_EVEN_1C, DISP_1C],
(instrs
DIVDo,
DIVDUo,
@@ -995,10 +993,10 @@ def : InstRW<[P9_IntDivAndALUOp_26C_8, IP_EXECE_1C, IP_EXECO_1C, IP_EXEC_1C,
)>;
// Cracked DIV and ALU operation. Requires one full slice for the ALU operation
-// and one full superslice for the DIV operation since there is only one DIV
-// per superslice. Latency of DIV plus ALU is 42.
+// and one full superslice for the DIV operation since there is only one DIV per
+// superslice. Latency of DIV plus ALU is 42.
def : InstRW<[P9_IntDivAndALUOp_42C_8, IP_EXECE_1C, IP_EXECO_1C, IP_EXEC_1C,
- DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C],
+ DISP_EVEN_1C, DISP_1C],
(instrs
DIVDEo,
DIVDEUo
@@ -1008,11 +1006,11 @@ def : InstRW<[P9_IntDivAndALUOp_42C_8, IP_EXECE_1C, IP_EXECO_1C, IP_EXEC_1C,
// Cracked, restricted, ALU operations.
// Here the two ALU ops can actually be done in parallel and therefore the
-// latencies are not added together. Otherwise this is like having two
-// instructions running together on two pipelines and 6 dispatches.
-// ALU ops are 2 cycles each.
+// latencies are not added together. Otherwise this is like having two
+// instructions running together on two pipelines and 6 dispatches. ALU ops are
+// 2 cycles each.
def : InstRW<[P9_ALU_2C, P9_ALU_2C, IP_EXEC_1C, IP_EXEC_1C,
- DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C],
+ DISP_3SLOTS_1C, DISP_3SLOTS_1C],
(instrs
MTCRF,
MTCRF8
@@ -1020,11 +1018,11 @@ def : InstRW<[P9_ALU_2C, P9_ALU_2C, IP_EXEC_1C, IP_EXEC_1C,
// Cracked ALU operations.
// Here the two ALU ops can actually be done in parallel and therefore the
-// latencies are not added together. Otherwise this is like having two
-// instructions running together on two pipelines and 4 dispatches.
-// ALU ops are 2 cycles each.
+// latencies are not added together. Otherwise this is like having two
+// instructions running together on two pipelines and 2 dispatches. ALU ops are
+// 2 cycles each.
def : InstRW<[P9_ALU_2C, P9_ALU_2C, IP_EXEC_1C, IP_EXEC_1C,
- DISP_1C, DISP_1C, DISP_1C, DISP_1C],
+ DISP_1C, DISP_1C],
(instrs
(instregex "ADDC(8)?o$"),
(instregex "SUBFC(8)?o$")
@@ -1036,7 +1034,7 @@ def : InstRW<[P9_ALU_2C, P9_ALU_2C, IP_EXEC_1C, IP_EXEC_1C,
// One of the ALU ops is restricted the other is not so we have a total of
// 5 dispatches.
def : InstRW<[P9_ALU_2C, P9_ALU_3C, IP_EXEC_1C, IP_EXEC_1C,
- DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C],
+ DISP_3SLOTS_1C, DISP_1C],
(instrs
(instregex "F(N)?ABS(D|S)o$"),
(instregex "FCPSGN(D|S)o$"),
@@ -1046,22 +1044,22 @@ def : InstRW<[P9_ALU_2C, P9_ALU_3C, IP_EXEC_1C, IP_EXEC_1C,
// Cracked ALU operations.
// Here the two ALU ops can actually be done in parallel and therefore the
-// latencies are not added together. Otherwise this is like having two
-// instructions running together on two pipelines and 4 dispatches.
+// latencies are not added together. Otherwise this is like having two
+// instructions running together on two pipelines and 2 dispatches.
// ALU ops are 3 cycles each.
def : InstRW<[P9_ALU_3C, P9_ALU_3C, IP_EXEC_1C, IP_EXEC_1C,
- DISP_1C, DISP_1C, DISP_1C, DISP_1C],
+ DISP_1C, DISP_1C],
(instrs
MCRFS
)>;
// Cracked Restricted ALU operations.
// Here the two ALU ops can actually be done in parallel and therefore the
-// latencies are not added together. Otherwise this is like having two
-// instructions running together on two pipelines and 6 dispatches.
+// latencies are not added together. Otherwise this is like having two
+// instructions running together on two pipelines and 6 dispatches.
// ALU ops are 3 cycles each.
def : InstRW<[P9_ALU_3C, P9_ALU_3C, IP_EXEC_1C, IP_EXEC_1C,
- DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C],
+ DISP_3SLOTS_1C, DISP_3SLOTS_1C],
(instrs
(instregex "MTFSF(b|o)?$"),
(instregex "MTFSFI(o)?$")
@@ -1071,7 +1069,7 @@ def : InstRW<[P9_ALU_3C, P9_ALU_3C, IP_EXEC_1C, IP_EXEC_1C,
// The two ops cannot be done in parallel.
// One of the ALU ops is restricted and takes 3 dispatches.
def : InstRW<[P9_ALUOpAndALUOp_4C, IP_EXEC_1C, IP_EXEC_1C,
- DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C],
+ DISP_3SLOTS_1C, DISP_1C],
(instrs
(instregex "RLD(I)?C(R|L)o$"),
(instregex "RLW(IMI|INM|NM)(8)?o$"),
@@ -1086,7 +1084,7 @@ def : InstRW<[P9_ALUOpAndALUOp_4C, IP_EXEC_1C, IP_EXEC_1C,
// The two ops cannot be done in parallel.
// Both of the ALU ops are restricted and take 3 dispatches.
def : InstRW<[P9_ALU2OpAndALU2Op_6C, IP_EXEC_1C, IP_EXEC_1C,
- DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C],
+ DISP_3SLOTS_1C, DISP_3SLOTS_1C],
(instrs
(instregex "MFFS(L|CE|o)?$")
)>;
@@ -1095,143 +1093,141 @@ def : InstRW<[P9_ALU2OpAndALU2Op_6C, IP_EXEC_1C, IP_EXEC_1C,
// total of 6 cycles. All of the ALU operations are also restricted so each
// takes 3 dispatches for a total of 9.
def : InstRW<[P9_ALUOpAndALUOpAndALUOp_6C, IP_EXEC_1C, IP_EXEC_1C, IP_EXEC_1C,
- DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C,
- DISP_1C, DISP_1C],
+ DISP_3SLOTS_1C, DISP_3SLOTS_1C, DISP_3SLOTS_1C],
(instrs
(instregex "MFCR(8)?$")
)>;
// Cracked instruction made of two ALU ops.
// The two ops cannot be done in parallel.
-def : InstRW<[P9_ALUOpAndALUOp_4C, IP_EXEC_1C, IP_EXEC_1C,
- DISP_1C, DISP_1C, DISP_1C, DISP_1C],
+def : InstRW<[P9_ALUOpAndALUOp_4C, IP_EXEC_1C, IP_EXEC_1C, DISP_1C, DISP_1C],
(instrs
- (instregex "EXTSWSLIo$"),
+ (instregex "EXTSWSLI_32_64o$"),
(instregex "SRAD(I)?o$"),
+ EXTSWSLIo,
SLDo,
SRDo,
RLDICo
)>;
// 33 Cycle DP Instruction Restricted. Takes one slice and 3 dispatches.
-def : InstRW<[P9_DP_33C_8, IP_EXEC_1C, DISP_1C, DISP_1C, DISP_1C],
+def : InstRW<[P9_DP_33C_8, IP_EXEC_1C, DISP_3SLOTS_1C],
(instrs
FDIV
)>;
// 33 Cycle DP Instruction Restricted and Cracked with 3 Cycle ALU.
def : InstRW<[P9_DPOpAndALU2Op_36C_8, IP_EXEC_1C, IP_EXEC_1C,
- DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C],
+ DISP_3SLOTS_1C, DISP_1C],
(instrs
FDIVo
)>;
// 36 Cycle DP Instruction.
// Instruction can be done on a single slice.
-def : InstRW<[P9_DP_36C_10, IP_EXEC_1C, DISP_1C, DISP_1C],
+def : InstRW<[P9_DP_36C_10, IP_EXEC_1C, DISP_1C],
(instrs
XSSQRTDP
)>;
// 36 Cycle DP Instruction Restricted. Takes one slice and 3 dispatches.
-def : InstRW<[P9_DP_36C_10, IP_EXEC_1C, DISP_1C, DISP_1C, DISP_1C],
+def : InstRW<[P9_DP_36C_10, IP_EXEC_1C, DISP_3SLOTS_1C],
(instrs
FSQRT
)>;
// 36 Cycle DP Vector Instruction.
def : InstRW<[P9_DPE_36C_10, P9_DPO_36C_10, IP_EXECE_1C, IP_EXECO_1C,
- DISP_1C, DISP_1C, DISP_1C],
+ DISP_1C],
(instrs
XVSQRTDP
)>;
// 27 Cycle DP Vector Instruction.
def : InstRW<[P9_DPE_27C_10, P9_DPO_27C_10, IP_EXECE_1C, IP_EXECO_1C,
- DISP_1C, DISP_1C, DISP_1C],
+ DISP_1C],
(instrs
XVSQRTSP
)>;
// 36 Cycle DP Instruction Restricted and Cracked with 3 Cycle ALU.
def : InstRW<[P9_DPOpAndALU2Op_39C_10, IP_EXEC_1C, IP_EXEC_1C,
- DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C],
+ DISP_3SLOTS_1C, DISP_1C],
(instrs
FSQRTo
)>;
// 26 Cycle DP Instruction.
-def : InstRW<[P9_DP_26C_5, IP_EXEC_1C, DISP_1C, DISP_1C],
+def : InstRW<[P9_DP_26C_5, IP_EXEC_1C, DISP_1C],
(instrs
XSSQRTSP
)>;
// 26 Cycle DP Instruction Restricted. Takes one slice and 3 dispatches.
-def : InstRW<[P9_DP_26C_5, IP_EXEC_1C, DISP_1C, DISP_1C, DISP_1C],
+def : InstRW<[P9_DP_26C_5, IP_EXEC_1C, DISP_3SLOTS_1C],
(instrs
FSQRTS
)>;
// 26 Cycle DP Instruction Restricted and Cracked with 3 Cycle ALU.
def : InstRW<[P9_DPOpAndALU2Op_29C_5, IP_EXEC_1C, IP_EXEC_1C,
- DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C],
+ DISP_3SLOTS_1C, DISP_1C],
(instrs
FSQRTSo
)>;
-// 33 Cycle DP Instruction. Takes one slice and 2 dispatches.
-def : InstRW<[P9_DP_33C_8, IP_EXEC_1C, DISP_1C, DISP_1C],
+// 33 Cycle DP Instruction. Takes one slice and 1 dispatch.
+def : InstRW<[P9_DP_33C_8, IP_EXEC_1C, DISP_1C],
(instrs
XSDIVDP
)>;
// 22 Cycle DP Instruction Restricted. Takes one slice and 3 dispatches.
-def : InstRW<[P9_DP_22C_5, IP_EXEC_1C, DISP_1C, DISP_1C, DISP_1C],
+def : InstRW<[P9_DP_22C_5, IP_EXEC_1C, DISP_3SLOTS_1C],
(instrs
FDIVS
)>;
// 22 Cycle DP Instruction Restricted and Cracked with 2 Cycle ALU.
def : InstRW<[P9_DPOpAndALU2Op_25C_5, IP_EXEC_1C, IP_EXEC_1C,
- DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C],
+ DISP_3SLOTS_1C, DISP_1C],
(instrs
FDIVSo
)>;
-// 22 Cycle DP Instruction. Takes one slice and 2 dispatches.
-def : InstRW<[P9_DP_22C_5, IP_EXEC_1C, DISP_1C, DISP_1C],
+// 22 Cycle DP Instruction. Takes one slice and 1 dispatch.
+def : InstRW<[P9_DP_22C_5, IP_EXEC_1C, DISP_1C],
(instrs
XSDIVSP
)>;
// 24 Cycle DP Vector Instruction. Takes one full superslice.
-// Includes both EXECE, EXECO pipelines and all 3 dispatches for the given
-// superslice.
+// Includes both EXECE, EXECO pipelines and 1 dispatch for the given
+// superslice.
def : InstRW<[P9_DPE_24C_8, P9_DPO_24C_8, IP_EXECE_1C, IP_EXECO_1C,
- DISP_1C, DISP_1C, DISP_1C],
+ DISP_1C],
(instrs
XVDIVSP
)>;
// 33 Cycle DP Vector Instruction. Takes one full superslice.
-// Includes both EXECE, EXECO pipelines and all 3 dispatches for the given
-// superslice.
+// Includes both EXECE, EXECO pipelines and 1 dispatch for the given
+// superslice.
def : InstRW<[P9_DPE_33C_8, P9_DPO_33C_8, IP_EXECE_1C, IP_EXECO_1C,
- DISP_1C, DISP_1C, DISP_1C],
+ DISP_1C],
(instrs
XVDIVDP
)>;
// Instruction cracked into three pieces. One Load and two ALU operations.
// The Load and one of the ALU ops cannot be run at the same time and so the
-// latencies are added together for 6 cycles. The remainaing ALU is 2 cycles.
+// latencies are added together for 6 cycles. The remainaing ALU is 2 cycles.
// Both the load and the ALU that depends on it are restricted and so they take
-// a total of 6 dispatches. The final 2 dispatches come from the second ALU op.
+// a total of 7 dispatches. The final 2 dispatches come from the second ALU op.
// The two EXEC pipelines are for the 2 ALUs while the AGEN is for the load.
def : InstRW<[P9_LoadAndALU2Op_7C, P9_ALU_2C,
IP_AGEN_1C, IP_EXEC_1C, IP_EXEC_1C,
- DISP_1C, DISP_1C, DISP_1C, DISP_1C,
- DISP_1C, DISP_1C, DISP_1C, DISP_1C],
+ DISP_3SLOTS_1C, DISP_3SLOTS_1C, DISP_1C],
(instrs
(instregex "LF(SU|SUX)$")
)>;
@@ -1240,7 +1236,7 @@ def : InstRW<[P9_LoadAndALU2Op_7C, P9_ALU_2C,
// the store and so it can be run at the same time as the store. The store is
// also restricted.
def : InstRW<[P9_LS_1C, P9_ALU_2C, IP_AGEN_1C, IP_EXEC_1C, IP_EXEC_1C,
- DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C],
+ DISP_3SLOTS_1C, DISP_1C],
(instrs
(instregex "STF(S|D)U(X)?$"),
(instregex "ST(B|H|W|D)U(X)?(8)?$")
@@ -1249,20 +1245,19 @@ def : InstRW<[P9_LS_1C, P9_ALU_2C, IP_AGEN_1C, IP_EXEC_1C, IP_EXEC_1C,
// Cracked instruction made up of a Load and an ALU. The ALU does not depend on
// the load and so it can be run at the same time as the load.
def : InstRW<[P9_LS_4C, P9_ALU_2C, IP_AGEN_1C, IP_EXEC_1C,
- DISP_1C, DISP_1C, DISP_1C, DISP_1C],
+ DISP_PAIR_1C, DISP_PAIR_1C],
(instrs
(instregex "LBZU(X)?(8)?$"),
(instregex "LDU(X)?$")
)>;
-
// Cracked instruction made up of a Load and an ALU. The ALU does not depend on
-// the load and so it can be run at the same time as the load. The load is also
-// restricted. 3 dispatches are from the restricted load while the other two
-// are from the ALU. The AGEN pipeline is from the load and the EXEC pipeline
-// is required for the ALU.
+// the load and so it can be run at the same time as the load. The load is also
+// restricted. 3 dispatches are from the restricted load while the other two
+// are from the ALU. The AGEN pipeline is from the load and the EXEC pipeline
+// is required for the ALU.
def : InstRW<[P9_LS_4C, P9_ALU_2C, IP_AGEN_1C, IP_EXEC_1C,
- DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C],
+ DISP_3SLOTS_1C, DISP_1C],
(instrs
(instregex "LF(DU|DUX)$")
)>;
@@ -1270,9 +1265,9 @@ def : InstRW<[P9_LS_4C, P9_ALU_2C, IP_AGEN_1C, IP_EXEC_1C,
// Crypto Instructions
// 6 Cycle CY operation. Only one CY unit per CPU so we use a whole
-// superslice. That includes both exec pipelines (EXECO, EXECE) and all three
-// dispatches.
-def : InstRW<[P9_CY_6C, IP_EXECO_1C, IP_EXECE_1C, DISP_1C, DISP_1C, DISP_1C],
+// superslice. That includes both exec pipelines (EXECO, EXECE) and one
+// dispatch.
+def : InstRW<[P9_CY_6C, IP_EXECO_1C, IP_EXECE_1C, DISP_1C],
(instrs
(instregex "VPMSUM(B|H|W|D)$"),
(instregex "V(N)?CIPHER(LAST)?$"),
@@ -1282,14 +1277,14 @@ def : InstRW<[P9_CY_6C, IP_EXECO_1C, IP_EXECE_1C, DISP_1C, DISP_1C, DISP_1C],
// Branch Instructions
// Two Cycle Branch
-def : InstRW<[P9_BR_2C, DISP_1C, DISP_1C],
+def : InstRW<[P9_BR_2C, DISP_BR_1C],
(instrs
(instregex "BCCCTR(L)?(8)?$"),
(instregex "BCCL(A|R|RL)?$"),
(instregex "BCCTR(L)?(8)?(n)?$"),
(instregex "BD(N)?Z(8|A|Am|Ap|m|p)?$"),
(instregex "BD(N)?ZL(A|Am|Ap|R|R8|RL|RLm|RLp|Rm|Rp|m|p)?$"),
- (instregex "BL(_TLS)?$"),
+ (instregex "BL(_TLS|_NOP)?$"),
(instregex "BL8(_TLS|_NOP|_NOP_TLS|_TLS_)?$"),
(instregex "BLA(8|8_NOP)?$"),
(instregex "BLR(8|L)?$"),
@@ -1313,8 +1308,7 @@ def : InstRW<[P9_BR_2C, DISP_1C, DISP_1C],
// Five Cycle Branch with a 2 Cycle ALU Op
// Operations must be done consecutively and not in parallel.
-def : InstRW<[P9_BROpAndALUOp_7C, IP_EXEC_1C,
- DISP_1C, DISP_1C, DISP_1C, DISP_1C],
+def : InstRW<[P9_BROpAndALUOp_7C, IP_EXEC_1C, DISP_BR_1C, DISP_1C],
(instrs
ADDPCIS
)>;
@@ -1324,17 +1318,15 @@ def : InstRW<[P9_BROpAndALUOp_7C, IP_EXEC_1C,
// Atomic Load
def : InstRW<[P9_LS_1C, P9_LS_1C, P9_LS_4C, P9_LS_4C, P9_LS_4C,
IP_EXEC_1C, IP_EXEC_1C, IP_AGEN_1C, IP_AGEN_1C, IP_AGEN_1C,
- IP_AGEN_1C, IP_AGEN_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C,
- DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C,
- DISP_1C],
+ IP_AGEN_1C, IP_AGEN_1C, DISP_1C, DISP_3SLOTS_1C,
+ DISP_3SLOTS_1C, DISP_1C, DISP_1C, DISP_1C],
(instrs
(instregex "L(D|W)AT$")
)>;
// Atomic Store
def : InstRW<[P9_LS_1C, P9_LS_4C, P9_LS_4C, IP_EXEC_1C, IP_AGEN_1C, IP_AGEN_1C,
- IP_AGEN_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C,
- DISP_1C],
+ IP_AGEN_1C, DISP_1C, DISP_3SLOTS_1C, DISP_1C],
(instrs
(instregex "ST(D|W)AT$")
)>;
@@ -1406,6 +1398,7 @@ def : InstRW<[],
MBAR,
MSYNC,
SLBSYNC,
+ SLBFEEo,
NAP,
STOP,
TRAP,
diff --git a/contrib/llvm/lib/Target/PowerPC/PPC.h b/contrib/llvm/lib/Target/PowerPC/PPC.h
index bfc613af3dc0..c6951ab67b08 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPC.h
+++ b/contrib/llvm/lib/Target/PowerPC/PPC.h
@@ -1,9 +1,8 @@
//===-- PPC.h - Top-level interface for PowerPC Target ----------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -16,7 +15,6 @@
#define LLVM_LIB_TARGET_POWERPC_PPC_H
#include "llvm/Support/CodeGen.h"
-#include "MCTargetDesc/PPCMCTargetDesc.h"
// GCC #defines PPC on Linux but we use it as our namespace name
#undef PPC
@@ -57,12 +55,26 @@ namespace llvm {
MCOperand &OutMO, AsmPrinter &AP,
bool isDarwin);
+ void initializePPCCTRLoopsPass(PassRegistry&);
+#ifndef NDEBUG
+ void initializePPCCTRLoopsVerifyPass(PassRegistry&);
+#endif
+ void initializePPCLoopPreIncPrepPass(PassRegistry&);
+ void initializePPCTOCRegDepsPass(PassRegistry&);
+ void initializePPCEarlyReturnPass(PassRegistry&);
+ void initializePPCVSXCopyPass(PassRegistry&);
void initializePPCVSXFMAMutatePass(PassRegistry&);
+ void initializePPCVSXSwapRemovalPass(PassRegistry&);
+ void initializePPCReduceCRLogicalsPass(PassRegistry&);
+ void initializePPCBSelPass(PassRegistry&);
+ void initializePPCBranchCoalescingPass(PassRegistry&);
+ void initializePPCQPXLoadSplatPass(PassRegistry&);
void initializePPCBoolRetToIntPass(PassRegistry&);
void initializePPCExpandISELPass(PassRegistry &);
void initializePPCPreEmitPeepholePass(PassRegistry &);
void initializePPCTLSDynamicCallPass(PassRegistry &);
void initializePPCMIPeepholePass(PassRegistry&);
+
extern char &PPCVSXFMAMutateID;
namespace PPCII {
diff --git a/contrib/llvm/lib/Target/PowerPC/PPC.td b/contrib/llvm/lib/Target/PowerPC/PPC.td
index 98e6e98e6974..8e94a2ae15e0 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPC.td
+++ b/contrib/llvm/lib/Target/PowerPC/PPC.td
@@ -1,9 +1,8 @@
//===-- PPC.td - Describe the PowerPC Target Machine -------*- tablegen -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -136,6 +135,9 @@ def FeatureQPX : SubtargetFeature<"qpx","HasQPX", "true",
def FeatureVSX : SubtargetFeature<"vsx","HasVSX", "true",
"Enable VSX instructions",
[FeatureAltivec]>;
+def FeatureTwoConstNR :
+ SubtargetFeature<"two-const-nr", "NeedsTwoConstNR", "true",
+ "Requires two constant Newton-Raphson computation">;
def FeatureP8Altivec : SubtargetFeature<"power8-altivec", "HasP8Altivec", "true",
"Enable POWER8 Altivec instructions",
[FeatureAltivec]>;
@@ -162,8 +164,12 @@ def FeatureHTM : SubtargetFeature<"htm", "HasHTM", "true",
"Enable Hardware Transactional Memory instructions">;
def FeatureMFTB : SubtargetFeature<"", "FeatureMFTB", "true",
"Implement mftb using the mfspr instruction">;
-def FeatureFusion : SubtargetFeature<"fusion", "HasFusion", "true",
- "Target supports add/load integer fusion.">;
+def FeaturePPCPreRASched:
+ SubtargetFeature<"ppc-prera-sched", "UsePPCPreRASchedStrategy", "true",
+ "Use PowerPC pre-RA scheduling strategy">;
+def FeaturePPCPostRASched:
+ SubtargetFeature<"ppc-postra-sched", "UsePPCPostRASchedStrategy", "true",
+ "Use PowerPC post-RA scheduling strategy">;
def FeatureFloat128 :
SubtargetFeature<"float128", "HasFloat128", "true",
"Enable the __float128 data type for IEEE-754R Binary128.",
@@ -191,6 +197,13 @@ def FeatureP9Vector : SubtargetFeature<"power9-vector", "HasP9Vector", "true",
"Enable POWER9 vector instructions",
[FeatureISA3_0, FeatureP8Vector,
FeatureP9Altivec]>;
+// A separate feature for this even though it is equivalent to P9Vector
+// because this is a feature of the implementation rather than the architecture
+// and may go away with future CPU's.
+def FeatureVectorsUseTwoUnits : SubtargetFeature<"vectors-use-two-units",
+ "VectorsUseTwoUnits",
+ "true",
+ "Vectors use two units">;
// Since new processors generally contain a superset of features of those that
// came before them, the idea is to make implementations of new processors
@@ -215,15 +228,15 @@ def ProcessorFeatures {
FeaturePOPCNTD, FeatureCMPB, FeatureLDBRX,
Feature64Bit /*, Feature64BitRegs */,
FeatureBPERMD, FeatureExtDiv,
- FeatureMFTB, DeprecatedDST];
+ FeatureMFTB, DeprecatedDST, FeatureTwoConstNR];
list<SubtargetFeature> Power8SpecificFeatures =
[DirectivePwr8, FeatureP8Altivec, FeatureP8Vector, FeatureP8Crypto,
- FeatureHTM, FeatureDirectMove, FeatureICBT, FeaturePartwordAtomic,
- FeatureFusion];
+ FeatureHTM, FeatureDirectMove, FeatureICBT, FeaturePartwordAtomic];
list<SubtargetFeature> Power8FeatureList =
!listconcat(Power7FeatureList, Power8SpecificFeatures);
list<SubtargetFeature> Power9SpecificFeatures =
- [DirectivePwr9, FeatureP9Altivec, FeatureP9Vector, FeatureISA3_0];
+ [DirectivePwr9, FeatureP9Altivec, FeatureP9Vector, FeatureISA3_0,
+ FeatureVectorsUseTwoUnits, FeaturePPCPreRASched, FeaturePPCPostRASched];
list<SubtargetFeature> Power9FeatureList =
!listconcat(Power8FeatureList, Power9SpecificFeatures);
}
@@ -279,10 +292,9 @@ def getNonRecordFormOpcode : InstrMapping {
def getAltVSXFMAOpcode : InstrMapping {
let FilterClass = "AltVSXFMARel";
- // Instructions with the same BaseName and Interpretation64Bit values
- // form a row.
+ // Instructions with the same BaseName value form a row.
let RowFields = ["BaseName"];
- // Instructions with the same RC value form a column.
+ // Instructions with the same IsVSXFMAAlt value form a column.
let ColFields = ["IsVSXFMAAlt"];
// The key column are the (default) addend-killing instructions.
let KeyCol = ["0"];
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp b/contrib/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
index 04aa3c9b1e22..bd87ce06b4fb 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
@@ -1,9 +1,8 @@
//===-- PPCAsmPrinter.cpp - Print machine instrs to PowerPC assembly ------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -16,7 +15,7 @@
//
//===----------------------------------------------------------------------===//
-#include "InstPrinter/PPCInstPrinter.h"
+#include "MCTargetDesc/PPCInstPrinter.h"
#include "MCTargetDesc/PPCMCExpr.h"
#include "MCTargetDesc/PPCMCTargetDesc.h"
#include "MCTargetDesc/PPCPredicates.h"
@@ -26,6 +25,7 @@
#include "PPCSubtarget.h"
#include "PPCTargetMachine.h"
#include "PPCTargetStreamer.h"
+#include "TargetInfo/PowerPCTargetInfo.h"
#include "llvm/ADT/MapVector.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/Triple.h"
@@ -95,68 +95,102 @@ public:
return AsmPrinter::doInitialization(M);
}
- void EmitInstruction(const MachineInstr *MI) override;
+ void EmitInstruction(const MachineInstr *MI) override;
+
+ /// This function is for PrintAsmOperand and PrintAsmMemoryOperand,
+ /// invoked by EmitMSInlineAsmStr and EmitGCCInlineAsmStr only.
+ /// The \p MI would be INLINEASM ONLY.
+ void printOperand(const MachineInstr *MI, unsigned OpNo, raw_ostream &O);
+
+ void PrintSymbolOperand(const MachineOperand &MO, raw_ostream &O) override;
+ bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
+ const char *ExtraCode, raw_ostream &O) override;
+ bool PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo,
+ const char *ExtraCode, raw_ostream &O) override;
+
+ void EmitEndOfAsmFile(Module &M) override;
+
+ void LowerSTACKMAP(StackMaps &SM, const MachineInstr &MI);
+ void LowerPATCHPOINT(StackMaps &SM, const MachineInstr &MI);
+ void EmitTlsCall(const MachineInstr *MI, MCSymbolRefExpr::VariantKind VK);
+ bool runOnMachineFunction(MachineFunction &MF) override {
+ Subtarget = &MF.getSubtarget<PPCSubtarget>();
+ bool Changed = AsmPrinter::runOnMachineFunction(MF);
+ emitXRayTable();
+ return Changed;
+ }
+};
- void printOperand(const MachineInstr *MI, unsigned OpNo, raw_ostream &O);
+/// PPCLinuxAsmPrinter - PowerPC assembly printer, customized for Linux
+class PPCLinuxAsmPrinter : public PPCAsmPrinter {
+public:
+ explicit PPCLinuxAsmPrinter(TargetMachine &TM,
+ std::unique_ptr<MCStreamer> Streamer)
+ : PPCAsmPrinter(TM, std::move(Streamer)) {}
- bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
- unsigned AsmVariant, const char *ExtraCode,
- raw_ostream &O) override;
- bool PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo,
- unsigned AsmVariant, const char *ExtraCode,
- raw_ostream &O) override;
+ StringRef getPassName() const override {
+ return "Linux PPC Assembly Printer";
+ }
- void EmitEndOfAsmFile(Module &M) override;
+ bool doFinalization(Module &M) override;
+ void EmitStartOfAsmFile(Module &M) override;
- void LowerSTACKMAP(StackMaps &SM, const MachineInstr &MI);
- void LowerPATCHPOINT(StackMaps &SM, const MachineInstr &MI);
- void EmitTlsCall(const MachineInstr *MI, MCSymbolRefExpr::VariantKind VK);
- bool runOnMachineFunction(MachineFunction &MF) override {
- Subtarget = &MF.getSubtarget<PPCSubtarget>();
- bool Changed = AsmPrinter::runOnMachineFunction(MF);
- emitXRayTable();
- return Changed;
- }
- };
+ void EmitFunctionEntryLabel() override;
- /// PPCLinuxAsmPrinter - PowerPC assembly printer, customized for Linux
- class PPCLinuxAsmPrinter : public PPCAsmPrinter {
- public:
- explicit PPCLinuxAsmPrinter(TargetMachine &TM,
- std::unique_ptr<MCStreamer> Streamer)
- : PPCAsmPrinter(TM, std::move(Streamer)) {}
+ void EmitFunctionBodyStart() override;
+ void EmitFunctionBodyEnd() override;
+ void EmitInstruction(const MachineInstr *MI) override;
+};
- StringRef getPassName() const override {
- return "Linux PPC Assembly Printer";
- }
+/// PPCDarwinAsmPrinter - PowerPC assembly printer, customized for Darwin/Mac
+/// OS X
+class PPCDarwinAsmPrinter : public PPCAsmPrinter {
+public:
+ explicit PPCDarwinAsmPrinter(TargetMachine &TM,
+ std::unique_ptr<MCStreamer> Streamer)
+ : PPCAsmPrinter(TM, std::move(Streamer)) {}
- bool doFinalization(Module &M) override;
- void EmitStartOfAsmFile(Module &M) override;
+ StringRef getPassName() const override {
+ return "Darwin PPC Assembly Printer";
+ }
- void EmitFunctionEntryLabel() override;
+ bool doFinalization(Module &M) override;
+ void EmitStartOfAsmFile(Module &M) override;
+};
- void EmitFunctionBodyStart() override;
- void EmitFunctionBodyEnd() override;
- void EmitInstruction(const MachineInstr *MI) override;
- };
+class PPCAIXAsmPrinter : public PPCAsmPrinter {
+public:
+ PPCAIXAsmPrinter(TargetMachine &TM, std::unique_ptr<MCStreamer> Streamer)
+ : PPCAsmPrinter(TM, std::move(Streamer)) {}
- /// PPCDarwinAsmPrinter - PowerPC assembly printer, customized for Darwin/Mac
- /// OS X
- class PPCDarwinAsmPrinter : public PPCAsmPrinter {
- public:
- explicit PPCDarwinAsmPrinter(TargetMachine &TM,
- std::unique_ptr<MCStreamer> Streamer)
- : PPCAsmPrinter(TM, std::move(Streamer)) {}
+ StringRef getPassName() const override { return "AIX PPC Assembly Printer"; }
+};
- StringRef getPassName() const override {
- return "Darwin PPC Assembly Printer";
- }
+} // end anonymous namespace
- bool doFinalization(Module &M) override;
- void EmitStartOfAsmFile(Module &M) override;
- };
+void PPCAsmPrinter::PrintSymbolOperand(const MachineOperand &MO,
+ raw_ostream &O) {
+ // Computing the address of a global symbol, not calling it.
+ const GlobalValue *GV = MO.getGlobal();
+ MCSymbol *SymToPrint;
+
+ // External or weakly linked global variables need non-lazily-resolved stubs
+ if (Subtarget->hasLazyResolverStub(GV)) {
+ SymToPrint = getSymbolWithGlobalValueBase(GV, "$non_lazy_ptr");
+ MachineModuleInfoImpl::StubValueTy &StubSym =
+ MMI->getObjFileInfo<MachineModuleInfoMachO>().getGVStubEntry(
+ SymToPrint);
+ if (!StubSym.getPointer())
+ StubSym = MachineModuleInfoImpl::StubValueTy(getSymbol(GV),
+ !GV->hasInternalLinkage());
+ } else {
+ SymToPrint = getSymbol(GV);
+ }
-} // end anonymous namespace
+ SymToPrint->print(O, MAI);
+
+ printOffset(MO.getOffset(), O);
+}
void PPCAsmPrinter::printOperand(const MachineInstr *MI, unsigned OpNo,
raw_ostream &O) {
@@ -165,10 +199,8 @@ void PPCAsmPrinter::printOperand(const MachineInstr *MI, unsigned OpNo,
switch (MO.getType()) {
case MachineOperand::MO_Register: {
- unsigned Reg = PPCInstrInfo::getRegNumForOperand(MI->getDesc(),
- MO.getReg(), OpNo);
-
- const char *RegName = PPCInstPrinter::getRegisterName(Reg);
+ // The MI is INLINEASM ONLY and UseVSXReg is always false.
+ const char *RegName = PPCInstPrinter::getRegisterName(MO.getReg());
// Linux assembler (Others?) does not take register mnemonics.
// FIXME - What about special registers used in mfspr/mtspr?
@@ -192,26 +224,7 @@ void PPCAsmPrinter::printOperand(const MachineInstr *MI, unsigned OpNo,
GetBlockAddressSymbol(MO.getBlockAddress())->print(O, MAI);
return;
case MachineOperand::MO_GlobalAddress: {
- // Computing the address of a global symbol, not calling it.
- const GlobalValue *GV = MO.getGlobal();
- MCSymbol *SymToPrint;
-
- // External or weakly linked global variables need non-lazily-resolved stubs
- if (Subtarget->hasLazyResolverStub(GV)) {
- SymToPrint = getSymbolWithGlobalValueBase(GV, "$non_lazy_ptr");
- MachineModuleInfoImpl::StubValueTy &StubSym =
- MMI->getObjFileInfo<MachineModuleInfoMachO>().getGVStubEntry(
- SymToPrint);
- if (!StubSym.getPointer())
- StubSym = MachineModuleInfoImpl::StubValueTy(getSymbol(GV),
- !GV->hasInternalLinkage());
- } else {
- SymToPrint = getSymbol(GV);
- }
-
- SymToPrint->print(O, MAI);
-
- printOffset(MO.getOffset(), O);
+ PrintSymbolOperand(MO, O);
return;
}
@@ -224,7 +237,6 @@ void PPCAsmPrinter::printOperand(const MachineInstr *MI, unsigned OpNo,
/// PrintAsmOperand - Print out an operand for an inline asm expression.
///
bool PPCAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
- unsigned AsmVariant,
const char *ExtraCode, raw_ostream &O) {
// Does this asm operand have a single letter operand modifier?
if (ExtraCode && ExtraCode[0]) {
@@ -233,9 +245,7 @@ bool PPCAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
switch (ExtraCode[0]) {
default:
// See if this is a generic print operand
- return AsmPrinter::PrintAsmOperand(MI, OpNo, AsmVariant, ExtraCode, O);
- case 'c': // Don't print "$" before a global var name or constant.
- break; // PPC never has a prefix.
+ return AsmPrinter::PrintAsmOperand(MI, OpNo, ExtraCode, O);
case 'L': // Write second word of DImode reference.
// Verify that this operand has two consecutive registers.
if (!MI->getOperand(OpNo).isReg() ||
@@ -277,7 +287,6 @@ bool PPCAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
// assembler operand.
bool PPCAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo,
- unsigned AsmVariant,
const char *ExtraCode,
raw_ostream &O) {
if (ExtraCode && ExtraCode[0]) {
@@ -460,6 +469,7 @@ void PPCAsmPrinter::EmitTlsCall(const MachineInstr *MI,
StringRef Name = "__tls_get_addr";
MCSymbol *TlsGetAddr = OutContext.getOrCreateSymbol(Name);
MCSymbolRefExpr::VariantKind Kind = MCSymbolRefExpr::VK_None;
+ const Module *M = MF->getFunction().getParent();
assert(MI->getOperand(0).isReg() &&
((Subtarget->isPPC64() && MI->getOperand(0).getReg() == PPC::X3) ||
@@ -473,8 +483,14 @@ void PPCAsmPrinter::EmitTlsCall(const MachineInstr *MI,
if (!Subtarget->isPPC64() && !Subtarget->isDarwin() &&
isPositionIndependent())
Kind = MCSymbolRefExpr::VK_PLT;
- const MCSymbolRefExpr *TlsRef =
+ const MCExpr *TlsRef =
MCSymbolRefExpr::create(TlsGetAddr, Kind, OutContext);
+
+ // Add 32768 offset to the symbol so we follow up the latest GOT/PLT ABI.
+ if (Kind == MCSymbolRefExpr::VK_PLT && Subtarget->isSecurePlt() &&
+ M->getPICLevel() == PICLevel::BigPIC)
+ TlsRef = MCBinaryExpr::createAdd(
+ TlsRef, MCConstantExpr::create(32768, OutContext), OutContext);
const MachineOperand &MO = MI->getOperand(2);
const GlobalValue *GValue = MO.getGlobal();
MCSymbol *MOSymbol = getSymbol(GValue);
@@ -576,34 +592,30 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
// Into: lwz %rt, .L0$poff - .L0$pb(%ri)
// add %rd, %rt, %ri
// or into (if secure plt mode is on):
- // addis r30, r30, .LTOC - .L0$pb@ha
- // addi r30, r30, .LTOC - .L0$pb@l
+ // addis r30, r30, {.LTOC,_GLOBAL_OFFSET_TABLE} - .L0$pb@ha
+ // addi r30, r30, {.LTOC,_GLOBAL_OFFSET_TABLE} - .L0$pb@l
// Get the offset from the GOT Base Register to the GOT
LowerPPCMachineInstrToMCInst(MI, TmpInst, *this, isDarwin);
if (Subtarget->isSecurePlt() && isPositionIndependent() ) {
unsigned PICR = TmpInst.getOperand(0).getReg();
- MCSymbol *LTOCSymbol = OutContext.getOrCreateSymbol(StringRef(".LTOC"));
+ MCSymbol *BaseSymbol = OutContext.getOrCreateSymbol(
+ M->getPICLevel() == PICLevel::SmallPIC ? "_GLOBAL_OFFSET_TABLE_"
+ : ".LTOC");
const MCExpr *PB =
- MCSymbolRefExpr::create(MF->getPICBaseSymbol(),
- OutContext);
+ MCSymbolRefExpr::create(MF->getPICBaseSymbol(), OutContext);
- const MCExpr *LTOCDeltaExpr =
- MCBinaryExpr::createSub(MCSymbolRefExpr::create(LTOCSymbol, OutContext),
- PB, OutContext);
+ const MCExpr *DeltaExpr = MCBinaryExpr::createSub(
+ MCSymbolRefExpr::create(BaseSymbol, OutContext), PB, OutContext);
- const MCExpr *LTOCDeltaHi =
- PPCMCExpr::createHa(LTOCDeltaExpr, false, OutContext);
- EmitToStreamer(*OutStreamer, MCInstBuilder(PPC::ADDIS)
- .addReg(PICR)
- .addReg(PICR)
- .addExpr(LTOCDeltaHi));
+ const MCExpr *DeltaHi = PPCMCExpr::createHa(DeltaExpr, false, OutContext);
+ EmitToStreamer(
+ *OutStreamer,
+ MCInstBuilder(PPC::ADDIS).addReg(PICR).addReg(PICR).addExpr(DeltaHi));
- const MCExpr *LTOCDeltaLo =
- PPCMCExpr::createLo(LTOCDeltaExpr, false, OutContext);
- EmitToStreamer(*OutStreamer, MCInstBuilder(PPC::ADDI)
- .addReg(PICR)
- .addReg(PICR)
- .addExpr(LTOCDeltaLo));
+ const MCExpr *DeltaLo = PPCMCExpr::createLo(DeltaExpr, false, OutContext);
+ EmitToStreamer(
+ *OutStreamer,
+ MCInstBuilder(PPC::ADDI).addReg(PICR).addReg(PICR).addExpr(DeltaLo));
return;
} else {
MCSymbol *PICOffset =
@@ -1640,6 +1652,9 @@ createPPCAsmPrinterPass(TargetMachine &tm,
std::unique_ptr<MCStreamer> &&Streamer) {
if (tm.getTargetTriple().isMacOSX())
return new PPCDarwinAsmPrinter(tm, std::move(Streamer));
+ if (tm.getTargetTriple().isOSAIX())
+ return new PPCAIXAsmPrinter(tm, std::move(Streamer));
+
return new PPCLinuxAsmPrinter(tm, std::move(Streamer));
}
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCBoolRetToInt.cpp b/contrib/llvm/lib/Target/PowerPC/PPCBoolRetToInt.cpp
index 55e105dad0e5..104cf2ba3c00 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCBoolRetToInt.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/PPCBoolRetToInt.cpp
@@ -1,9 +1,8 @@
//===- PPCBoolRetToInt.cpp ------------------------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCBranchCoalescing.cpp b/contrib/llvm/lib/Target/PowerPC/PPCBranchCoalescing.cpp
index bbb977f090c5..5e9a661f8f0b 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCBranchCoalescing.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/PPCBranchCoalescing.cpp
@@ -1,9 +1,8 @@
//===-- CoalesceBranches.cpp - Coalesce blocks with the same condition ---===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
///
@@ -34,10 +33,6 @@ STATISTIC(NumBlocksCoalesced, "Number of blocks coalesced");
STATISTIC(NumPHINotMoved, "Number of PHI Nodes that cannot be merged");
STATISTIC(NumBlocksNotCoalesced, "Number of blocks not coalesced");
-namespace llvm {
- void initializePPCBranchCoalescingPass(PassRegistry&);
-}
-
//===----------------------------------------------------------------------===//
// PPCBranchCoalescing
//===----------------------------------------------------------------------===//
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCBranchSelector.cpp b/contrib/llvm/lib/Target/PowerPC/PPCBranchSelector.cpp
index 0d1bb9297bcb..793d690baec3 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCBranchSelector.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/PPCBranchSelector.cpp
@@ -1,9 +1,8 @@
//===-- PPCBranchSelector.cpp - Emit long conditional branches ------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -26,16 +25,13 @@
#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Target/TargetMachine.h"
+#include <algorithm>
using namespace llvm;
#define DEBUG_TYPE "ppc-branch-select"
STATISTIC(NumExpanded, "Number of branches expanded to long format");
-namespace llvm {
- void initializePPCBSelPass(PassRegistry&);
-}
-
namespace {
struct PPCBSel : public MachineFunctionPass {
static char ID;
@@ -48,6 +44,17 @@ namespace {
// size that is due to potential padding.
std::vector<std::pair<unsigned, unsigned>> BlockSizes;
+ // The first block number which has imprecise instruction address.
+ int FirstImpreciseBlock = -1;
+
+ unsigned GetAlignmentAdjustment(MachineBasicBlock &MBB, unsigned Offset);
+ unsigned ComputeBlockSizes(MachineFunction &Fn);
+ void modifyAdjustment(MachineFunction &Fn);
+ int computeBranchSize(MachineFunction &Fn,
+ const MachineBasicBlock *Src,
+ const MachineBasicBlock *Dest,
+ unsigned BrOffset);
+
bool runOnMachineFunction(MachineFunction &Fn) override;
MachineFunctionProperties getRequiredProperties() const override {
@@ -70,43 +77,47 @@ FunctionPass *llvm::createPPCBranchSelectionPass() {
return new PPCBSel();
}
-bool PPCBSel::runOnMachineFunction(MachineFunction &Fn) {
- const PPCInstrInfo *TII =
- static_cast<const PPCInstrInfo *>(Fn.getSubtarget().getInstrInfo());
- // Give the blocks of the function a dense, in-order, numbering.
- Fn.RenumberBlocks();
- BlockSizes.resize(Fn.getNumBlockIDs());
-
- auto GetAlignmentAdjustment =
- [](MachineBasicBlock &MBB, unsigned Offset) -> unsigned {
- unsigned Align = MBB.getAlignment();
- if (!Align)
- return 0;
-
- unsigned AlignAmt = 1 << Align;
- unsigned ParentAlign = MBB.getParent()->getAlignment();
-
- if (Align <= ParentAlign)
- return OffsetToAlignment(Offset, AlignAmt);
-
- // The alignment of this MBB is larger than the function's alignment, so we
- // can't tell whether or not it will insert nops. Assume that it will.
- return AlignAmt + OffsetToAlignment(Offset, AlignAmt);
- };
+/// In order to make MBB aligned, we need to add an adjustment value to the
+/// original Offset.
+unsigned PPCBSel::GetAlignmentAdjustment(MachineBasicBlock &MBB,
+ unsigned Offset) {
+ unsigned Align = MBB.getAlignment();
+ if (!Align)
+ return 0;
+
+ unsigned AlignAmt = 1 << Align;
+ unsigned ParentAlign = MBB.getParent()->getAlignment();
+
+ if (Align <= ParentAlign)
+ return OffsetToAlignment(Offset, AlignAmt);
+
+ // The alignment of this MBB is larger than the function's alignment, so we
+ // can't tell whether or not it will insert nops. Assume that it will.
+ if (FirstImpreciseBlock < 0)
+ FirstImpreciseBlock = MBB.getNumber();
+ return AlignAmt + OffsetToAlignment(Offset, AlignAmt);
+}
- // We need to be careful about the offset of the first block in the function
- // because it might not have the function's alignment. This happens because,
- // under the ELFv2 ABI, for functions which require a TOC pointer, we add a
- // two-instruction sequence to the start of the function.
- // Note: This needs to be synchronized with the check in
- // PPCLinuxAsmPrinter::EmitFunctionBodyStart.
+/// We need to be careful about the offset of the first block in the function
+/// because it might not have the function's alignment. This happens because,
+/// under the ELFv2 ABI, for functions which require a TOC pointer, we add a
+/// two-instruction sequence to the start of the function.
+/// Note: This needs to be synchronized with the check in
+/// PPCLinuxAsmPrinter::EmitFunctionBodyStart.
+static inline unsigned GetInitialOffset(MachineFunction &Fn) {
unsigned InitialOffset = 0;
if (Fn.getSubtarget<PPCSubtarget>().isELFv2ABI() &&
!Fn.getRegInfo().use_empty(PPC::X2))
InitialOffset = 8;
+ return InitialOffset;
+}
+
+/// Measure each MBB and compute a size for the entire function.
+unsigned PPCBSel::ComputeBlockSizes(MachineFunction &Fn) {
+ const PPCInstrInfo *TII =
+ static_cast<const PPCInstrInfo *>(Fn.getSubtarget().getInstrInfo());
+ unsigned FuncSize = GetInitialOffset(Fn);
- // Measure each MBB and compute a size for the entire function.
- unsigned FuncSize = InitialOffset;
for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
++MFI) {
MachineBasicBlock *MBB = &*MFI;
@@ -124,13 +135,145 @@ bool PPCBSel::runOnMachineFunction(MachineFunction &Fn) {
}
unsigned BlockSize = 0;
- for (MachineInstr &MI : *MBB)
+ for (MachineInstr &MI : *MBB) {
BlockSize += TII->getInstSizeInBytes(MI);
+ if (MI.isInlineAsm() && (FirstImpreciseBlock < 0))
+ FirstImpreciseBlock = MBB->getNumber();
+ }
BlockSizes[MBB->getNumber()].first = BlockSize;
FuncSize += BlockSize;
}
+ return FuncSize;
+}
+
+/// Modify the basic block align adjustment.
+void PPCBSel::modifyAdjustment(MachineFunction &Fn) {
+ unsigned Offset = GetInitialOffset(Fn);
+ for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
+ ++MFI) {
+ MachineBasicBlock *MBB = &*MFI;
+
+ if (MBB->getNumber() > 0) {
+ auto &BS = BlockSizes[MBB->getNumber()-1];
+ BS.first -= BS.second;
+ Offset -= BS.second;
+
+ unsigned AlignExtra = GetAlignmentAdjustment(*MBB, Offset);
+
+ BS.first += AlignExtra;
+ BS.second = AlignExtra;
+
+ Offset += AlignExtra;
+ }
+
+ Offset += BlockSizes[MBB->getNumber()].first;
+ }
+}
+
+/// Determine the offset from the branch in Src block to the Dest block.
+/// BrOffset is the offset of the branch instruction inside Src block.
+int PPCBSel::computeBranchSize(MachineFunction &Fn,
+ const MachineBasicBlock *Src,
+ const MachineBasicBlock *Dest,
+ unsigned BrOffset) {
+ int BranchSize;
+ unsigned MaxAlign = 2;
+ bool NeedExtraAdjustment = false;
+ if (Dest->getNumber() <= Src->getNumber()) {
+ // If this is a backwards branch, the delta is the offset from the
+ // start of this block to this branch, plus the sizes of all blocks
+ // from this block to the dest.
+ BranchSize = BrOffset;
+ MaxAlign = std::max(MaxAlign, Src->getAlignment());
+
+ int DestBlock = Dest->getNumber();
+ BranchSize += BlockSizes[DestBlock].first;
+ for (unsigned i = DestBlock+1, e = Src->getNumber(); i < e; ++i) {
+ BranchSize += BlockSizes[i].first;
+ MaxAlign = std::max(MaxAlign,
+ Fn.getBlockNumbered(i)->getAlignment());
+ }
+
+ NeedExtraAdjustment = (FirstImpreciseBlock >= 0) &&
+ (DestBlock >= FirstImpreciseBlock);
+ } else {
+ // Otherwise, add the size of the blocks between this block and the
+ // dest to the number of bytes left in this block.
+ unsigned StartBlock = Src->getNumber();
+ BranchSize = BlockSizes[StartBlock].first - BrOffset;
+
+ MaxAlign = std::max(MaxAlign, Dest->getAlignment());
+ for (unsigned i = StartBlock+1, e = Dest->getNumber(); i != e; ++i) {
+ BranchSize += BlockSizes[i].first;
+ MaxAlign = std::max(MaxAlign,
+ Fn.getBlockNumbered(i)->getAlignment());
+ }
+
+ NeedExtraAdjustment = (FirstImpreciseBlock >= 0) &&
+ (Src->getNumber() >= FirstImpreciseBlock);
+ }
+
+ // We tend to over estimate code size due to large alignment and
+ // inline assembly. Usually it causes larger computed branch offset.
+ // But sometimes it may also causes smaller computed branch offset
+ // than actual branch offset. If the offset is close to the limit of
+ // encoding, it may cause problem at run time.
+ // Following is a simplified example.
+ //
+ // actual estimated
+ // address address
+ // ...
+ // bne Far 100 10c
+ // .p2align 4
+ // Near: 110 110
+ // ...
+ // Far: 8108 8108
+ //
+ // Actual offset: 0x8108 - 0x100 = 0x8008
+ // Computed offset: 0x8108 - 0x10c = 0x7ffc
+ //
+ // This example also shows when we can get the largest gap between
+ // estimated offset and actual offset. If there is an aligned block
+ // ABB between branch and target, assume its alignment is <align>
+ // bits. Now consider the accumulated function size FSIZE till the end
+ // of previous block PBB. If the estimated FSIZE is multiple of
+ // 2^<align>, we don't need any padding for the estimated address of
+ // ABB. If actual FSIZE at the end of PBB is 4 bytes more than
+ // multiple of 2^<align>, then we need (2^<align> - 4) bytes of
+ // padding. It also means the actual branch offset is (2^<align> - 4)
+ // larger than computed offset. Other actual FSIZE needs less padding
+ // bytes, so causes smaller gap between actual and computed offset.
+ //
+ // On the other hand, if the inline asm or large alignment occurs
+ // between the branch block and destination block, the estimated address
+ // can be <delta> larger than actual address. If padding bytes are
+ // needed for a later aligned block, the actual number of padding bytes
+ // is at most <delta> more than estimated padding bytes. So the actual
+ // aligned block address is less than or equal to the estimated aligned
+ // block address. So the actual branch offset is less than or equal to
+ // computed branch offset.
+ //
+ // The computed offset is at most ((1 << alignment) - 4) bytes smaller
+ // than actual offset. So we add this number to the offset for safety.
+ if (NeedExtraAdjustment)
+ BranchSize += (1 << MaxAlign) - 4;
+
+ return BranchSize;
+}
+
+bool PPCBSel::runOnMachineFunction(MachineFunction &Fn) {
+ const PPCInstrInfo *TII =
+ static_cast<const PPCInstrInfo *>(Fn.getSubtarget().getInstrInfo());
+ // Give the blocks of the function a dense, in-order, numbering.
+ Fn.RenumberBlocks();
+ BlockSizes.resize(Fn.getNumBlockIDs());
+ FirstImpreciseBlock = -1;
+
+ // Measure each MBB and compute a size for the entire function.
+ unsigned FuncSize = ComputeBlockSizes(Fn);
+
// If the entire function is smaller than the displacement of a branch field,
// we know we don't need to shrink any branches in this function. This is a
// common case.
@@ -178,23 +321,7 @@ bool PPCBSel::runOnMachineFunction(MachineFunction &Fn) {
// Determine the offset from the current branch to the destination
// block.
- int BranchSize;
- if (Dest->getNumber() <= MBB.getNumber()) {
- // If this is a backwards branch, the delta is the offset from the
- // start of this block to this branch, plus the sizes of all blocks
- // from this block to the dest.
- BranchSize = MBBStartOffset;
-
- for (unsigned i = Dest->getNumber(), e = MBB.getNumber(); i != e; ++i)
- BranchSize += BlockSizes[i].first;
- } else {
- // Otherwise, add the size of the blocks between this block and the
- // dest to the number of bytes left in this block.
- BranchSize = -MBBStartOffset;
-
- for (unsigned i = MBB.getNumber(), e = Dest->getNumber(); i != e; ++i)
- BranchSize += BlockSizes[i].first;
- }
+ int BranchSize = computeBranchSize(Fn, &MBB, Dest, MBBStartOffset);
// If this branch is in range, ignore it.
if (isInt<16>(BranchSize)) {
@@ -253,26 +380,7 @@ bool PPCBSel::runOnMachineFunction(MachineFunction &Fn) {
if (MadeChange) {
// If we're going to iterate again, make sure we've updated our
// padding-based contributions to the block sizes.
- unsigned Offset = InitialOffset;
- for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
- ++MFI) {
- MachineBasicBlock *MBB = &*MFI;
-
- if (MBB->getNumber() > 0) {
- auto &BS = BlockSizes[MBB->getNumber()-1];
- BS.first -= BS.second;
- Offset -= BS.second;
-
- unsigned AlignExtra = GetAlignmentAdjustment(*MBB, Offset);
-
- BS.first += AlignExtra;
- BS.second = AlignExtra;
-
- Offset += AlignExtra;
- }
-
- Offset += BlockSizes[MBB->getNumber()].first;
- }
+ modifyAdjustment(Fn);
}
EverMadeChange |= MadeChange;
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCCCState.cpp b/contrib/llvm/lib/Target/PowerPC/PPCCCState.cpp
index 5510a95430f5..5116f0d121f4 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCCCState.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/PPCCCState.cpp
@@ -1,9 +1,8 @@
//===---- PPCCCState.cpp - CCState with PowerPC specific extensions ---------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCCCState.h b/contrib/llvm/lib/Target/PowerPC/PPCCCState.h
index 9be9f11dbea3..e3499597474c 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCCCState.h
+++ b/contrib/llvm/lib/Target/PowerPC/PPCCCState.h
@@ -1,9 +1,8 @@
//===---- PPCCCState.h - CCState with PowerPC specific extensions -----------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCCTRLoops.cpp b/contrib/llvm/lib/Target/PowerPC/PPCCTRLoops.cpp
index 6b9e2383e36f..2b8d9b87724f 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCCTRLoops.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/PPCCTRLoops.cpp
@@ -1,9 +1,8 @@
//===-- PPCCTRLoops.cpp - Identify and generate CTR loops -----------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -72,70 +71,7 @@ using namespace llvm;
static cl::opt<int> CTRLoopLimit("ppc-max-ctrloop", cl::Hidden, cl::init(-1));
#endif
-// The latency of mtctr is only justified if there are more than 4
-// comparisons that will be removed as a result.
-static cl::opt<unsigned>
-SmallCTRLoopThreshold("min-ctr-loop-threshold", cl::init(4), cl::Hidden,
- cl::desc("Loops with a constant trip count smaller than "
- "this value will not use the count register."));
-
-STATISTIC(NumCTRLoops, "Number of loops converted to CTR loops");
-
-namespace llvm {
- void initializePPCCTRLoopsPass(PassRegistry&);
-#ifndef NDEBUG
- void initializePPCCTRLoopsVerifyPass(PassRegistry&);
-#endif
-}
-
namespace {
- struct PPCCTRLoops : public FunctionPass {
-
-#ifndef NDEBUG
- static int Counter;
-#endif
-
- public:
- static char ID;
-
- PPCCTRLoops() : FunctionPass(ID) {
- initializePPCCTRLoopsPass(*PassRegistry::getPassRegistry());
- }
-
- bool runOnFunction(Function &F) override;
-
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.addRequired<LoopInfoWrapperPass>();
- AU.addPreserved<LoopInfoWrapperPass>();
- AU.addRequired<DominatorTreeWrapperPass>();
- AU.addPreserved<DominatorTreeWrapperPass>();
- AU.addRequired<ScalarEvolutionWrapperPass>();
- AU.addRequired<AssumptionCacheTracker>();
- AU.addRequired<TargetTransformInfoWrapperPass>();
- }
-
- private:
- bool mightUseCTR(BasicBlock *BB);
- bool convertToCTRLoop(Loop *L);
-
- private:
- const PPCTargetMachine *TM;
- const PPCSubtarget *STI;
- const PPCTargetLowering *TLI;
- const DataLayout *DL;
- const TargetLibraryInfo *LibInfo;
- const TargetTransformInfo *TTI;
- LoopInfo *LI;
- ScalarEvolution *SE;
- DominatorTree *DT;
- bool PreserveLCSSA;
- TargetSchedModel SchedModel;
- };
-
- char PPCCTRLoops::ID = 0;
-#ifndef NDEBUG
- int PPCCTRLoops::Counter = 0;
-#endif
#ifndef NDEBUG
struct PPCCTRLoopsVerify : public MachineFunctionPass {
@@ -161,16 +97,6 @@ namespace {
#endif // NDEBUG
} // end anonymous namespace
-INITIALIZE_PASS_BEGIN(PPCCTRLoops, "ppc-ctr-loops", "PowerPC CTR Loops",
- false, false)
-INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)
-INITIALIZE_PASS_END(PPCCTRLoops, "ppc-ctr-loops", "PowerPC CTR Loops",
- false, false)
-
-FunctionPass *llvm::createPPCCTRLoops() { return new PPCCTRLoops(); }
-
#ifndef NDEBUG
INITIALIZE_PASS_BEGIN(PPCCTRLoopsVerify, "ppc-ctr-loops-verify",
"PowerPC CTR Loops Verify", false, false)
@@ -183,511 +109,6 @@ FunctionPass *llvm::createPPCCTRLoopsVerify() {
}
#endif // NDEBUG
-bool PPCCTRLoops::runOnFunction(Function &F) {
- if (skipFunction(F))
- return false;
-
- auto *TPC = getAnalysisIfAvailable<TargetPassConfig>();
- if (!TPC)
- return false;
-
- TM = &TPC->getTM<PPCTargetMachine>();
- STI = TM->getSubtargetImpl(F);
- TLI = STI->getTargetLowering();
-
- LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
- SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
- DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
- TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
- DL = &F.getParent()->getDataLayout();
- auto *TLIP = getAnalysisIfAvailable<TargetLibraryInfoWrapperPass>();
- LibInfo = TLIP ? &TLIP->getTLI() : nullptr;
- PreserveLCSSA = mustPreserveAnalysisID(LCSSAID);
-
- bool MadeChange = false;
-
- for (LoopInfo::iterator I = LI->begin(), E = LI->end();
- I != E; ++I) {
- Loop *L = *I;
- if (!L->getParentLoop())
- MadeChange |= convertToCTRLoop(L);
- }
-
- return MadeChange;
-}
-
-static bool isLargeIntegerTy(bool Is32Bit, Type *Ty) {
- if (IntegerType *ITy = dyn_cast<IntegerType>(Ty))
- return ITy->getBitWidth() > (Is32Bit ? 32U : 64U);
-
- return false;
-}
-
-// Determining the address of a TLS variable results in a function call in
-// certain TLS models.
-static bool memAddrUsesCTR(const PPCTargetMachine &TM, const Value *MemAddr) {
- const auto *GV = dyn_cast<GlobalValue>(MemAddr);
- if (!GV) {
- // Recurse to check for constants that refer to TLS global variables.
- if (const auto *CV = dyn_cast<Constant>(MemAddr))
- for (const auto &CO : CV->operands())
- if (memAddrUsesCTR(TM, CO))
- return true;
-
- return false;
- }
-
- if (!GV->isThreadLocal())
- return false;
- TLSModel::Model Model = TM.getTLSModel(GV);
- return Model == TLSModel::GeneralDynamic || Model == TLSModel::LocalDynamic;
-}
-
-// Loop through the inline asm constraints and look for something that clobbers
-// ctr.
-static bool asmClobbersCTR(InlineAsm *IA) {
- InlineAsm::ConstraintInfoVector CIV = IA->ParseConstraints();
- for (unsigned i = 0, ie = CIV.size(); i < ie; ++i) {
- InlineAsm::ConstraintInfo &C = CIV[i];
- if (C.Type != InlineAsm::isInput)
- for (unsigned j = 0, je = C.Codes.size(); j < je; ++j)
- if (StringRef(C.Codes[j]).equals_lower("{ctr}"))
- return true;
- }
- return false;
-}
-
-bool PPCCTRLoops::mightUseCTR(BasicBlock *BB) {
- for (BasicBlock::iterator J = BB->begin(), JE = BB->end();
- J != JE; ++J) {
- if (CallInst *CI = dyn_cast<CallInst>(J)) {
- // Inline ASM is okay, unless it clobbers the ctr register.
- if (InlineAsm *IA = dyn_cast<InlineAsm>(CI->getCalledValue())) {
- if (asmClobbersCTR(IA))
- return true;
- continue;
- }
-
- if (Function *F = CI->getCalledFunction()) {
- // Most intrinsics don't become function calls, but some might.
- // sin, cos, exp and log are always calls.
- unsigned Opcode = 0;
- if (F->getIntrinsicID() != Intrinsic::not_intrinsic) {
- switch (F->getIntrinsicID()) {
- default: continue;
- // If we have a call to ppc_is_decremented_ctr_nonzero, or ppc_mtctr
- // we're definitely using CTR.
- case Intrinsic::ppc_is_decremented_ctr_nonzero:
- case Intrinsic::ppc_mtctr:
- return true;
-
-// VisualStudio defines setjmp as _setjmp
-#if defined(_MSC_VER) && defined(setjmp) && \
- !defined(setjmp_undefined_for_msvc)
-# pragma push_macro("setjmp")
-# undef setjmp
-# define setjmp_undefined_for_msvc
-#endif
-
- case Intrinsic::setjmp:
-
-#if defined(_MSC_VER) && defined(setjmp_undefined_for_msvc)
- // let's return it to _setjmp state
-# pragma pop_macro("setjmp")
-# undef setjmp_undefined_for_msvc
-#endif
-
- case Intrinsic::longjmp:
-
- // Exclude eh_sjlj_setjmp; we don't need to exclude eh_sjlj_longjmp
- // because, although it does clobber the counter register, the
- // control can't then return to inside the loop unless there is also
- // an eh_sjlj_setjmp.
- case Intrinsic::eh_sjlj_setjmp:
-
- case Intrinsic::memcpy:
- case Intrinsic::memmove:
- case Intrinsic::memset:
- case Intrinsic::powi:
- case Intrinsic::log:
- case Intrinsic::log2:
- case Intrinsic::log10:
- case Intrinsic::exp:
- case Intrinsic::exp2:
- case Intrinsic::pow:
- case Intrinsic::sin:
- case Intrinsic::cos:
- return true;
- case Intrinsic::copysign:
- if (CI->getArgOperand(0)->getType()->getScalarType()->
- isPPC_FP128Ty())
- return true;
- else
- continue; // ISD::FCOPYSIGN is never a library call.
- case Intrinsic::sqrt: Opcode = ISD::FSQRT; break;
- case Intrinsic::floor: Opcode = ISD::FFLOOR; break;
- case Intrinsic::ceil: Opcode = ISD::FCEIL; break;
- case Intrinsic::trunc: Opcode = ISD::FTRUNC; break;
- case Intrinsic::rint: Opcode = ISD::FRINT; break;
- case Intrinsic::nearbyint: Opcode = ISD::FNEARBYINT; break;
- case Intrinsic::round: Opcode = ISD::FROUND; break;
- case Intrinsic::minnum: Opcode = ISD::FMINNUM; break;
- case Intrinsic::maxnum: Opcode = ISD::FMAXNUM; break;
- case Intrinsic::umul_with_overflow: Opcode = ISD::UMULO; break;
- case Intrinsic::smul_with_overflow: Opcode = ISD::SMULO; break;
- }
- }
-
- // PowerPC does not use [US]DIVREM or other library calls for
- // operations on regular types which are not otherwise library calls
- // (i.e. soft float or atomics). If adapting for targets that do,
- // additional care is required here.
-
- LibFunc Func;
- if (!F->hasLocalLinkage() && F->hasName() && LibInfo &&
- LibInfo->getLibFunc(F->getName(), Func) &&
- LibInfo->hasOptimizedCodeGen(Func)) {
- // Non-read-only functions are never treated as intrinsics.
- if (!CI->onlyReadsMemory())
- return true;
-
- // Conversion happens only for FP calls.
- if (!CI->getArgOperand(0)->getType()->isFloatingPointTy())
- return true;
-
- switch (Func) {
- default: return true;
- case LibFunc_copysign:
- case LibFunc_copysignf:
- continue; // ISD::FCOPYSIGN is never a library call.
- case LibFunc_copysignl:
- return true;
- case LibFunc_fabs:
- case LibFunc_fabsf:
- case LibFunc_fabsl:
- continue; // ISD::FABS is never a library call.
- case LibFunc_sqrt:
- case LibFunc_sqrtf:
- case LibFunc_sqrtl:
- Opcode = ISD::FSQRT; break;
- case LibFunc_floor:
- case LibFunc_floorf:
- case LibFunc_floorl:
- Opcode = ISD::FFLOOR; break;
- case LibFunc_nearbyint:
- case LibFunc_nearbyintf:
- case LibFunc_nearbyintl:
- Opcode = ISD::FNEARBYINT; break;
- case LibFunc_ceil:
- case LibFunc_ceilf:
- case LibFunc_ceill:
- Opcode = ISD::FCEIL; break;
- case LibFunc_rint:
- case LibFunc_rintf:
- case LibFunc_rintl:
- Opcode = ISD::FRINT; break;
- case LibFunc_round:
- case LibFunc_roundf:
- case LibFunc_roundl:
- Opcode = ISD::FROUND; break;
- case LibFunc_trunc:
- case LibFunc_truncf:
- case LibFunc_truncl:
- Opcode = ISD::FTRUNC; break;
- case LibFunc_fmin:
- case LibFunc_fminf:
- case LibFunc_fminl:
- Opcode = ISD::FMINNUM; break;
- case LibFunc_fmax:
- case LibFunc_fmaxf:
- case LibFunc_fmaxl:
- Opcode = ISD::FMAXNUM; break;
- }
- }
-
- if (Opcode) {
- EVT EVTy =
- TLI->getValueType(*DL, CI->getArgOperand(0)->getType(), true);
-
- if (EVTy == MVT::Other)
- return true;
-
- if (TLI->isOperationLegalOrCustom(Opcode, EVTy))
- continue;
- else if (EVTy.isVector() &&
- TLI->isOperationLegalOrCustom(Opcode, EVTy.getScalarType()))
- continue;
-
- return true;
- }
- }
-
- return true;
- } else if (isa<BinaryOperator>(J) &&
- J->getType()->getScalarType()->isPPC_FP128Ty()) {
- // Most operations on ppc_f128 values become calls.
- return true;
- } else if (isa<UIToFPInst>(J) || isa<SIToFPInst>(J) ||
- isa<FPToUIInst>(J) || isa<FPToSIInst>(J)) {
- CastInst *CI = cast<CastInst>(J);
- if (CI->getSrcTy()->getScalarType()->isPPC_FP128Ty() ||
- CI->getDestTy()->getScalarType()->isPPC_FP128Ty() ||
- isLargeIntegerTy(!TM->isPPC64(), CI->getSrcTy()->getScalarType()) ||
- isLargeIntegerTy(!TM->isPPC64(), CI->getDestTy()->getScalarType()))
- return true;
- } else if (isLargeIntegerTy(!TM->isPPC64(),
- J->getType()->getScalarType()) &&
- (J->getOpcode() == Instruction::UDiv ||
- J->getOpcode() == Instruction::SDiv ||
- J->getOpcode() == Instruction::URem ||
- J->getOpcode() == Instruction::SRem)) {
- return true;
- } else if (!TM->isPPC64() &&
- isLargeIntegerTy(false, J->getType()->getScalarType()) &&
- (J->getOpcode() == Instruction::Shl ||
- J->getOpcode() == Instruction::AShr ||
- J->getOpcode() == Instruction::LShr)) {
- // Only on PPC32, for 128-bit integers (specifically not 64-bit
- // integers), these might be runtime calls.
- return true;
- } else if (isa<IndirectBrInst>(J) || isa<InvokeInst>(J)) {
- // On PowerPC, indirect jumps use the counter register.
- return true;
- } else if (SwitchInst *SI = dyn_cast<SwitchInst>(J)) {
- if (SI->getNumCases() + 1 >= (unsigned)TLI->getMinimumJumpTableEntries())
- return true;
- }
-
- // FREM is always a call.
- if (J->getOpcode() == Instruction::FRem)
- return true;
-
- if (STI->useSoftFloat()) {
- switch(J->getOpcode()) {
- case Instruction::FAdd:
- case Instruction::FSub:
- case Instruction::FMul:
- case Instruction::FDiv:
- case Instruction::FPTrunc:
- case Instruction::FPExt:
- case Instruction::FPToUI:
- case Instruction::FPToSI:
- case Instruction::UIToFP:
- case Instruction::SIToFP:
- case Instruction::FCmp:
- return true;
- }
- }
-
- for (Value *Operand : J->operands())
- if (memAddrUsesCTR(*TM, Operand))
- return true;
- }
-
- return false;
-}
-bool PPCCTRLoops::convertToCTRLoop(Loop *L) {
- bool MadeChange = false;
-
- // Do not convert small short loops to CTR loop.
- unsigned ConstTripCount = SE->getSmallConstantTripCount(L);
- if (ConstTripCount && ConstTripCount < SmallCTRLoopThreshold) {
- SmallPtrSet<const Value *, 32> EphValues;
- auto AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(
- *L->getHeader()->getParent());
- CodeMetrics::collectEphemeralValues(L, &AC, EphValues);
- CodeMetrics Metrics;
- for (BasicBlock *BB : L->blocks())
- Metrics.analyzeBasicBlock(BB, *TTI, EphValues);
- // 6 is an approximate latency for the mtctr instruction.
- if (Metrics.NumInsts <= (6 * SchedModel.getIssueWidth()))
- return false;
- }
-
- // Process nested loops first.
- for (Loop::iterator I = L->begin(), E = L->end(); I != E; ++I) {
- MadeChange |= convertToCTRLoop(*I);
- LLVM_DEBUG(dbgs() << "Nested loop converted\n");
- }
-
- // If a nested loop has been converted, then we can't convert this loop.
- if (MadeChange)
- return MadeChange;
-
- // Bail out if the loop has irreducible control flow.
- LoopBlocksRPO RPOT(L);
- RPOT.perform(LI);
- if (containsIrreducibleCFG<const BasicBlock *>(RPOT, *LI))
- return false;
-
-#ifndef NDEBUG
- // Stop trying after reaching the limit (if any).
- int Limit = CTRLoopLimit;
- if (Limit >= 0) {
- if (Counter >= CTRLoopLimit)
- return false;
- Counter++;
- }
-#endif
-
- // We don't want to spill/restore the counter register, and so we don't
- // want to use the counter register if the loop contains calls.
- for (Loop::block_iterator I = L->block_begin(), IE = L->block_end();
- I != IE; ++I)
- if (mightUseCTR(*I))
- return MadeChange;
-
- SmallVector<BasicBlock*, 4> ExitingBlocks;
- L->getExitingBlocks(ExitingBlocks);
-
- // If there is an exit edge known to be frequently taken,
- // we should not transform this loop.
- for (auto &BB : ExitingBlocks) {
- Instruction *TI = BB->getTerminator();
- if (!TI) continue;
-
- if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
- uint64_t TrueWeight = 0, FalseWeight = 0;
- if (!BI->isConditional() ||
- !BI->extractProfMetadata(TrueWeight, FalseWeight))
- continue;
-
- // If the exit path is more frequent than the loop path,
- // we return here without further analysis for this loop.
- bool TrueIsExit = !L->contains(BI->getSuccessor(0));
- if (( TrueIsExit && FalseWeight < TrueWeight) ||
- (!TrueIsExit && FalseWeight > TrueWeight))
- return MadeChange;
- }
- }
-
- BasicBlock *CountedExitBlock = nullptr;
- const SCEV *ExitCount = nullptr;
- BranchInst *CountedExitBranch = nullptr;
- for (SmallVectorImpl<BasicBlock *>::iterator I = ExitingBlocks.begin(),
- IE = ExitingBlocks.end(); I != IE; ++I) {
- const SCEV *EC = SE->getExitCount(L, *I);
- LLVM_DEBUG(dbgs() << "Exit Count for " << *L << " from block "
- << (*I)->getName() << ": " << *EC << "\n");
- if (isa<SCEVCouldNotCompute>(EC))
- continue;
- if (const SCEVConstant *ConstEC = dyn_cast<SCEVConstant>(EC)) {
- if (ConstEC->getValue()->isZero())
- continue;
- } else if (!SE->isLoopInvariant(EC, L))
- continue;
-
- if (SE->getTypeSizeInBits(EC->getType()) > (TM->isPPC64() ? 64 : 32))
- continue;
-
- // If this exiting block is contained in a nested loop, it is not eligible
- // for insertion of the branch-and-decrement since the inner loop would
- // end up messing up the value in the CTR.
- if (LI->getLoopFor(*I) != L)
- continue;
-
- // We now have a loop-invariant count of loop iterations (which is not the
- // constant zero) for which we know that this loop will not exit via this
- // existing block.
-
- // We need to make sure that this block will run on every loop iteration.
- // For this to be true, we must dominate all blocks with backedges. Such
- // blocks are in-loop predecessors to the header block.
- bool NotAlways = false;
- for (pred_iterator PI = pred_begin(L->getHeader()),
- PIE = pred_end(L->getHeader()); PI != PIE; ++PI) {
- if (!L->contains(*PI))
- continue;
-
- if (!DT->dominates(*I, *PI)) {
- NotAlways = true;
- break;
- }
- }
-
- if (NotAlways)
- continue;
-
- // Make sure this blocks ends with a conditional branch.
- Instruction *TI = (*I)->getTerminator();
- if (!TI)
- continue;
-
- if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
- if (!BI->isConditional())
- continue;
-
- CountedExitBranch = BI;
- } else
- continue;
-
- // Note that this block may not be the loop latch block, even if the loop
- // has a latch block.
- CountedExitBlock = *I;
- ExitCount = EC;
- break;
- }
-
- if (!CountedExitBlock)
- return MadeChange;
-
- BasicBlock *Preheader = L->getLoopPreheader();
-
- // If we don't have a preheader, then insert one. If we already have a
- // preheader, then we can use it (except if the preheader contains a use of
- // the CTR register because some such uses might be reordered by the
- // selection DAG after the mtctr instruction).
- if (!Preheader || mightUseCTR(Preheader))
- Preheader = InsertPreheaderForLoop(L, DT, LI, PreserveLCSSA);
- if (!Preheader)
- return MadeChange;
-
- LLVM_DEBUG(dbgs() << "Preheader for exit count: " << Preheader->getName()
- << "\n");
-
- // Insert the count into the preheader and replace the condition used by the
- // selected branch.
- MadeChange = true;
-
- SCEVExpander SCEVE(*SE, *DL, "loopcnt");
- LLVMContext &C = SE->getContext();
- Type *CountType = TM->isPPC64() ? Type::getInt64Ty(C) : Type::getInt32Ty(C);
- if (!ExitCount->getType()->isPointerTy() &&
- ExitCount->getType() != CountType)
- ExitCount = SE->getZeroExtendExpr(ExitCount, CountType);
- ExitCount = SE->getAddExpr(ExitCount, SE->getOne(CountType));
- Value *ECValue =
- SCEVE.expandCodeFor(ExitCount, CountType, Preheader->getTerminator());
-
- IRBuilder<> CountBuilder(Preheader->getTerminator());
- Module *M = Preheader->getParent()->getParent();
- Value *MTCTRFunc = Intrinsic::getDeclaration(M, Intrinsic::ppc_mtctr,
- CountType);
- CountBuilder.CreateCall(MTCTRFunc, ECValue);
-
- IRBuilder<> CondBuilder(CountedExitBranch);
- Value *DecFunc =
- Intrinsic::getDeclaration(M, Intrinsic::ppc_is_decremented_ctr_nonzero);
- Value *NewCond = CondBuilder.CreateCall(DecFunc, {});
- Value *OldCond = CountedExitBranch->getCondition();
- CountedExitBranch->setCondition(NewCond);
-
- // The false branch must exit the loop.
- if (!L->contains(CountedExitBranch->getSuccessor(0)))
- CountedExitBranch->swapSuccessors();
-
- // The old condition may be dead now, and may have even created a dead PHI
- // (the original induction variable).
- RecursivelyDeleteTriviallyDeadInstructions(OldCond);
- // Run through the basic blocks of the loop and see if any of them have dead
- // PHIs that can be removed.
- for (auto I : L->blocks())
- DeleteDeadPHIs(I);
-
- ++NumCTRLoops;
- return MadeChange;
-}
-
#ifndef NDEBUG
static bool clobbersCTR(const MachineInstr &MI) {
for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCCallingConv.cpp b/contrib/llvm/lib/Target/PowerPC/PPCCallingConv.cpp
new file mode 100644
index 000000000000..77cdf5c939dc
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/PPCCallingConv.cpp
@@ -0,0 +1,162 @@
+//===-- PPCCallingConv.h - --------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "PPCRegisterInfo.h"
+#include "PPCCallingConv.h"
+#include "PPCSubtarget.h"
+#include "PPCCCState.h"
+using namespace llvm;
+
+inline bool CC_PPC_AnyReg_Error(unsigned &, MVT &, MVT &,
+ CCValAssign::LocInfo &, ISD::ArgFlagsTy &,
+ CCState &) {
+ llvm_unreachable("The AnyReg calling convention is only supported by the " \
+ "stackmap and patchpoint intrinsics.");
+ // gracefully fallback to PPC C calling convention on Release builds.
+ return false;
+}
+
+static bool CC_PPC32_SVR4_Custom_Dummy(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
+ CCValAssign::LocInfo &LocInfo,
+ ISD::ArgFlagsTy &ArgFlags,
+ CCState &State) {
+ return true;
+}
+
+static bool CC_PPC32_SVR4_Custom_AlignArgRegs(unsigned &ValNo, MVT &ValVT,
+ MVT &LocVT,
+ CCValAssign::LocInfo &LocInfo,
+ ISD::ArgFlagsTy &ArgFlags,
+ CCState &State) {
+ static const MCPhysReg ArgRegs[] = {
+ PPC::R3, PPC::R4, PPC::R5, PPC::R6,
+ PPC::R7, PPC::R8, PPC::R9, PPC::R10,
+ };
+ const unsigned NumArgRegs = array_lengthof(ArgRegs);
+
+ unsigned RegNum = State.getFirstUnallocated(ArgRegs);
+
+ // Skip one register if the first unallocated register has an even register
+ // number and there are still argument registers available which have not been
+ // allocated yet. RegNum is actually an index into ArgRegs, which means we
+ // need to skip a register if RegNum is odd.
+ if (RegNum != NumArgRegs && RegNum % 2 == 1) {
+ State.AllocateReg(ArgRegs[RegNum]);
+ }
+
+ // Always return false here, as this function only makes sure that the first
+ // unallocated register has an odd register number and does not actually
+ // allocate a register for the current argument.
+ return false;
+}
+
+static bool CC_PPC32_SVR4_Custom_SkipLastArgRegsPPCF128(
+ unsigned &ValNo, MVT &ValVT, MVT &LocVT, CCValAssign::LocInfo &LocInfo,
+ ISD::ArgFlagsTy &ArgFlags, CCState &State) {
+ static const MCPhysReg ArgRegs[] = {
+ PPC::R3, PPC::R4, PPC::R5, PPC::R6,
+ PPC::R7, PPC::R8, PPC::R9, PPC::R10,
+ };
+ const unsigned NumArgRegs = array_lengthof(ArgRegs);
+
+ unsigned RegNum = State.getFirstUnallocated(ArgRegs);
+ int RegsLeft = NumArgRegs - RegNum;
+
+ // Skip if there is not enough registers left for long double type (4 gpr regs
+ // in soft float mode) and put long double argument on the stack.
+ if (RegNum != NumArgRegs && RegsLeft < 4) {
+ for (int i = 0; i < RegsLeft; i++) {
+ State.AllocateReg(ArgRegs[RegNum + i]);
+ }
+ }
+
+ return false;
+}
+
+static bool CC_PPC32_SVR4_Custom_AlignFPArgRegs(unsigned &ValNo, MVT &ValVT,
+ MVT &LocVT,
+ CCValAssign::LocInfo &LocInfo,
+ ISD::ArgFlagsTy &ArgFlags,
+ CCState &State) {
+ static const MCPhysReg ArgRegs[] = {
+ PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7,
+ PPC::F8
+ };
+
+ const unsigned NumArgRegs = array_lengthof(ArgRegs);
+
+ unsigned RegNum = State.getFirstUnallocated(ArgRegs);
+
+ // If there is only one Floating-point register left we need to put both f64
+ // values of a split ppc_fp128 value on the stack.
+ if (RegNum != NumArgRegs && ArgRegs[RegNum] == PPC::F8) {
+ State.AllocateReg(ArgRegs[RegNum]);
+ }
+
+ // Always return false here, as this function only makes sure that the two f64
+ // values a ppc_fp128 value is split into are both passed in registers or both
+ // passed on the stack and does not actually allocate a register for the
+ // current argument.
+ return false;
+}
+
+// Split F64 arguments into two 32-bit consecutive registers.
+static bool CC_PPC32_SPE_CustomSplitFP64(unsigned &ValNo, MVT &ValVT,
+ MVT &LocVT,
+ CCValAssign::LocInfo &LocInfo,
+ ISD::ArgFlagsTy &ArgFlags,
+ CCState &State) {
+ static const MCPhysReg HiRegList[] = { PPC::R3, PPC::R5, PPC::R7, PPC::R9 };
+ static const MCPhysReg LoRegList[] = { PPC::R4, PPC::R6, PPC::R8, PPC::R10 };
+
+ // Try to get the first register.
+ unsigned Reg = State.AllocateReg(HiRegList);
+ if (!Reg)
+ return false;
+
+ unsigned i;
+ for (i = 0; i < sizeof(HiRegList) / sizeof(HiRegList[0]); ++i)
+ if (HiRegList[i] == Reg)
+ break;
+
+ unsigned T = State.AllocateReg(LoRegList[i]);
+ (void)T;
+ assert(T == LoRegList[i] && "Could not allocate register");
+
+ State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, Reg, LocVT, LocInfo));
+ State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, LoRegList[i],
+ LocVT, LocInfo));
+ return true;
+}
+
+// Same as above, but for return values, so only allocate for R3 and R4
+static bool CC_PPC32_SPE_RetF64(unsigned &ValNo, MVT &ValVT,
+ MVT &LocVT,
+ CCValAssign::LocInfo &LocInfo,
+ ISD::ArgFlagsTy &ArgFlags,
+ CCState &State) {
+ static const MCPhysReg HiRegList[] = { PPC::R3 };
+ static const MCPhysReg LoRegList[] = { PPC::R4 };
+
+ // Try to get the first register.
+ unsigned Reg = State.AllocateReg(HiRegList, LoRegList);
+ if (!Reg)
+ return false;
+
+ unsigned i;
+ for (i = 0; i < sizeof(HiRegList) / sizeof(HiRegList[0]); ++i)
+ if (HiRegList[i] == Reg)
+ break;
+
+ State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, Reg, LocVT, LocInfo));
+ State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, LoRegList[i],
+ LocVT, LocInfo));
+ return true;
+}
+
+#include "PPCGenCallingConv.inc"
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCCallingConv.h b/contrib/llvm/lib/Target/PowerPC/PPCCallingConv.h
index eb904a858592..03d9be0a73d9 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCCallingConv.h
+++ b/contrib/llvm/lib/Target/PowerPC/PPCCallingConv.h
@@ -1,9 +1,8 @@
//=== PPCCallingConv.h - PPC Custom Calling Convention Routines -*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -20,14 +19,27 @@
namespace llvm {
-inline bool CC_PPC_AnyReg_Error(unsigned &, MVT &, MVT &,
- CCValAssign::LocInfo &, ISD::ArgFlagsTy &,
- CCState &) {
- llvm_unreachable("The AnyReg calling convention is only supported by the " \
- "stackmap and patchpoint intrinsics.");
- // gracefully fallback to PPC C calling convention on Release builds.
- return false;
-}
+bool RetCC_PPC(unsigned ValNo, MVT ValVT, MVT LocVT,
+ CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags,
+ CCState &State);
+bool RetCC_PPC64_ELF_FIS(unsigned ValNo, MVT ValVT, MVT LocVT,
+ CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags,
+ CCState &State);
+bool RetCC_PPC_Cold(unsigned ValNo, MVT ValVT, MVT LocVT,
+ CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags,
+ CCState &State);
+bool CC_PPC32_SVR4(unsigned ValNo, MVT ValVT, MVT LocVT,
+ CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags,
+ CCState &State);
+bool CC_PPC64_ELF_FIS(unsigned ValNo, MVT ValVT, MVT LocVT,
+ CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags,
+ CCState &State);
+bool CC_PPC32_SVR4_ByVal(unsigned ValNo, MVT ValVT, MVT LocVT,
+ CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags,
+ CCState &State);
+bool CC_PPC32_SVR4_VarArg(unsigned ValNo, MVT ValVT, MVT LocVT,
+ CCValAssign::LocInfo LocInfo,
+ ISD::ArgFlagsTy ArgFlags, CCState &State);
} // End llvm namespace
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCCallingConv.td b/contrib/llvm/lib/Target/PowerPC/PPCCallingConv.td
index 22842d516e7d..369b9ce1a711 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCCallingConv.td
+++ b/contrib/llvm/lib/Target/PowerPC/PPCCallingConv.td
@@ -1,9 +1,8 @@
//===- PPCCallingConv.td - Calling Conventions for PowerPC -*- tablegen -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -46,6 +45,7 @@ def RetCC_PPC64_AnyReg : CallingConv<[
]>;
// Return-value convention for PowerPC coldcc.
+let Entry = 1 in
def RetCC_PPC_Cold : CallingConv<[
// Use the same return registers as RetCC_PPC, but limited to only
// one return value. The remaining return values will be saved to
@@ -70,6 +70,7 @@ def RetCC_PPC_Cold : CallingConv<[
]>;
// Return-value convention for PowerPC
+let Entry = 1 in
def RetCC_PPC : CallingConv<[
CCIfCC<"CallingConv::AnyReg", CCDelegateTo<RetCC_PPC64_AnyReg>>,
@@ -90,7 +91,7 @@ def RetCC_PPC : CallingConv<[
CCIfSubtarget<"hasSPE()",
CCIfType<[f32], CCAssignToReg<[R3, R4, R5, R6, R7, R8, R9, R10]>>>,
CCIfSubtarget<"hasSPE()",
- CCIfType<[f64], CCAssignToReg<[S3, S4, S5, S6, S7, S8, S9, S10]>>>,
+ CCIfType<[f64], CCCustom<"CC_PPC32_SPE_RetF64">>>,
// For P9, f128 are passed in vector registers.
CCIfType<[f128],
@@ -126,6 +127,7 @@ def CC_PPC64_AnyReg : CallingConv<[
// Simple calling convention for 64-bit ELF PowerPC fast isel.
// Only handle ints and floats. All ints are promoted to i64.
// Vector types and quadword ints are not handled.
+let Entry = 1 in
def CC_PPC64_ELF_FIS : CallingConv<[
CCIfCC<"CallingConv::AnyReg", CCDelegateTo<CC_PPC64_AnyReg>>,
@@ -141,6 +143,7 @@ def CC_PPC64_ELF_FIS : CallingConv<[
// All small ints are promoted to i64. Vector types, quadword ints,
// and multiple register returns are "supported" to avoid compile
// errors, but none are handled by the fast selector.
+let Entry = 1 in
def RetCC_PPC64_ELF_FIS : CallingConv<[
CCIfCC<"CallingConv::AnyReg", CCDelegateTo<RetCC_PPC64_AnyReg>>,
@@ -179,6 +182,9 @@ def CC_PPC32_SVR4_Common : CallingConv<[
CCIfType<[i32],
CCIfSplit<CCIfNotSubtarget<"useSoftFloat()",
CCCustom<"CC_PPC32_SVR4_Custom_AlignArgRegs">>>>,
+ CCIfType<[f64],
+ CCIfSubtarget<"hasSPE()",
+ CCCustom<"CC_PPC32_SVR4_Custom_AlignArgRegs">>>,
CCIfSplit<CCIfSubtarget<"useSoftFloat()",
CCIfOrigArgWasPPCF128<CCCustom<
"CC_PPC32_SVR4_Custom_SkipLastArgRegsPPCF128">>>>,
@@ -199,7 +205,7 @@ def CC_PPC32_SVR4_Common : CallingConv<[
CCAssignToReg<[F1, F2, F3, F4, F5, F6, F7, F8]>>>,
CCIfType<[f64],
CCIfSubtarget<"hasSPE()",
- CCAssignToReg<[S3, S4, S5, S6, S7, S8, S9, S10]>>>,
+ CCCustom<"CC_PPC32_SPE_CustomSplitFP64">>>,
CCIfType<[f32],
CCIfSubtarget<"hasSPE()",
CCAssignToReg<[R3, R4, R5, R6, R7, R8, R9, R10]>>>,
@@ -228,12 +234,14 @@ def CC_PPC32_SVR4_Common : CallingConv<[
// This calling convention puts vector arguments always on the stack. It is used
// to assign vector arguments which belong to the variable portion of the
// parameter list of a variable argument function.
+let Entry = 1 in
def CC_PPC32_SVR4_VarArg : CallingConv<[
CCDelegateTo<CC_PPC32_SVR4_Common>
]>;
// In contrast to CC_PPC32_SVR4_VarArg, this calling convention first tries to
// put vector arguments in vector registers before putting them on the stack.
+let Entry = 1 in
def CC_PPC32_SVR4 : CallingConv<[
// QPX vectors mirror the scalar FP convention.
CCIfType<[v4f64, v4f32, v4i1], CCIfSubtarget<"hasQPX()",
@@ -265,6 +273,7 @@ def CC_PPC32_SVR4 : CallingConv<[
// The only purpose of CC_PPC32_SVR4_Custom_Dummy is to skip arguments which are
// not passed by value.
+let Entry = 1 in
def CC_PPC32_SVR4_ByVal : CallingConv<[
CCIfByVal<CCPassByVal<4, 4>>,
@@ -300,6 +309,13 @@ def CSR_SVR432_Altivec : CalleeSavedRegs<(add CSR_SVR432, CSR_Altivec)>;
def CSR_SVR432_SPE : CalleeSavedRegs<(add CSR_SVR432_COMM, CSR_SPE)>;
+def CSR_AIX32 : CalleeSavedRegs<(add R13, R14, R15, R16, R17, R18, R19, R20,
+ R21, R22, R23, R24, R25, R26, R27, R28,
+ R29, R30, R31, F14, F15, F16, F17, F18,
+ F19, F20, F21, F22, F23, F24, F25, F26,
+ F27, F28, F29, F30, F31, CR2, CR3, CR4
+ )>;
+
def CSR_Darwin64 : CalleeSavedRegs<(add X13, X14, X15, X16, X17, X18, X19, X20,
X21, X22, X23, X24, X25, X26, X27, X28,
X29, X30, X31, F14, F15, F16, F17, F18,
@@ -316,6 +332,13 @@ def CSR_SVR464 : CalleeSavedRegs<(add X14, X15, X16, X17, X18, X19, X20,
F27, F28, F29, F30, F31, CR2, CR3, CR4
)>;
+def CSR_AIX64 : CalleeSavedRegs<(add X14, X15, X16, X17, X18, X19, X20,
+ X21, X22, X23, X24, X25, X26, X27, X28,
+ X29, X30, X31, F14, F15, F16, F17, F18,
+ F19, F20, F21, F22, F23, F24, F25, F26,
+ F27, F28, F29, F30, F31, CR2, CR3, CR4
+ )>;
+
// CSRs that are handled by prologue, epilogue.
def CSR_SRV464_TLS_PE : CalleeSavedRegs<(add)>;
@@ -343,15 +366,22 @@ def CSR_NoRegs : CalleeSavedRegs<(add)>;
// and value may be altered by inter-library calls.
// Do not include r12 as it is used as a scratch register.
// Do not include return registers r3, f1, v2.
-def CSR_SVR32_ColdCC : CalleeSavedRegs<(add (sequence "R%u", 4, 10),
- (sequence "R%u", 14, 31),
- F0, (sequence "F%u", 2, 31),
- (sequence "CR%u", 0, 7))>;
+def CSR_SVR32_ColdCC_Common : CalleeSavedRegs<(add (sequence "R%u", 4, 10),
+ (sequence "R%u", 14, 31),
+ (sequence "CR%u", 0, 7))>;
+
+def CSR_SVR32_ColdCC : CalleeSavedRegs<(add CSR_SVR32_ColdCC_Common,
+ F0, (sequence "F%u", 2, 31))>;
+
def CSR_SVR32_ColdCC_Altivec : CalleeSavedRegs<(add CSR_SVR32_ColdCC,
(sequence "V%u", 0, 1),
(sequence "V%u", 3, 31))>;
+def CSR_SVR32_ColdCC_SPE : CalleeSavedRegs<(add CSR_SVR32_ColdCC_Common,
+ (sequence "S%u", 4, 10),
+ (sequence "S%u", 14, 31))>;
+
def CSR_SVR64_ColdCC : CalleeSavedRegs<(add (sequence "X%u", 4, 10),
(sequence "X%u", 14, 31),
F0, (sequence "F%u", 2, 31),
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCEarlyReturn.cpp b/contrib/llvm/lib/Target/PowerPC/PPCEarlyReturn.cpp
index ac931f7d0ec0..aa5d830b549e 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCEarlyReturn.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/PPCEarlyReturn.cpp
@@ -1,9 +1,8 @@
//===------------- PPCEarlyReturn.cpp - Form Early Returns ----------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -37,10 +36,6 @@ using namespace llvm;
STATISTIC(NumBCLR, "Number of early conditional returns");
STATISTIC(NumBLR, "Number of early returns");
-namespace llvm {
- void initializePPCEarlyReturnPass(PassRegistry&);
-}
-
namespace {
// PPCEarlyReturn pass - For simple functions without epilogue code, move
// returns up, and create conditional returns, to avoid unnecessary
@@ -184,11 +179,11 @@ public:
// nothing to do.
if (MF.size() < 2)
return Changed;
-
- for (MachineFunction::iterator I = MF.begin(); I != MF.end();) {
+
+ // We can't use a range-based for loop due to clobbering the iterator.
+ for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E;) {
MachineBasicBlock &B = *I++;
- if (processBlock(B))
- Changed = true;
+ Changed |= processBlock(B);
}
return Changed;
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCExpandISEL.cpp b/contrib/llvm/lib/Target/PowerPC/PPCExpandISEL.cpp
index a03e691ef5bb..e8ef451c7ec9 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCExpandISEL.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/PPCExpandISEL.cpp
@@ -1,9 +1,8 @@
//===------------- PPCExpandISEL.cpp - Expand ISEL instruction ------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCFastISel.cpp b/contrib/llvm/lib/Target/PowerPC/PPCFastISel.cpp
index 3b2d92db78b9..264d6b590f95 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCFastISel.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/PPCFastISel.cpp
@@ -1,9 +1,8 @@
//===-- PPCFastISel.cpp - PowerPC FastISel implementation -----------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -152,6 +151,14 @@ class PPCFastISel final : public FastISel {
bool isVSSRCRegClass(const TargetRegisterClass *RC) const {
return RC->getID() == PPC::VSSRCRegClassID;
}
+ unsigned copyRegToRegClass(const TargetRegisterClass *ToRC,
+ unsigned SrcReg, unsigned Flag = 0,
+ unsigned SubReg = 0) {
+ unsigned TmpReg = createResultReg(ToRC);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ TII.get(TargetOpcode::COPY), TmpReg).addReg(SrcReg, Flag, SubReg);
+ return TmpReg;
+ }
bool PPCEmitCmp(const Value *Src1Value, const Value *Src2Value,
bool isZExt, unsigned DestReg,
const PPC::Predicate Pred);
@@ -187,7 +194,6 @@ class PPCFastISel final : public FastISel {
unsigned &NumBytes,
bool IsVarArg);
bool finishCall(MVT RetVT, CallLoweringInfo &CLI, unsigned &NumBytes);
- LLVM_ATTRIBUTE_UNUSED CCAssignFn *usePPC32CCs(unsigned Flag);
private:
#include "PPCGenFastISel.inc"
@@ -196,23 +202,6 @@ class PPCFastISel final : public FastISel {
} // end anonymous namespace
-#include "PPCGenCallingConv.inc"
-
-// Function whose sole purpose is to kill compiler warnings
-// stemming from unused functions included from PPCGenCallingConv.inc.
-CCAssignFn *PPCFastISel::usePPC32CCs(unsigned Flag) {
- if (Flag == 1)
- return CC_PPC32_SVR4;
- else if (Flag == 2)
- return CC_PPC32_SVR4_ByVal;
- else if (Flag == 3)
- return CC_PPC32_SVR4_VarArg;
- else if (Flag == 4)
- return RetCC_PPC_Cold;
- else
- return RetCC_PPC;
-}
-
static Optional<PPC::Predicate> getComparePred(CmpInst::Predicate Pred) {
switch (Pred) {
// These are not representable with any single compare.
@@ -874,7 +863,10 @@ bool PPCFastISel::PPCEmitCmp(const Value *SrcValue1, const Value *SrcValue2,
unsigned CmpOpc;
bool NeedsExt = false;
- auto RC = MRI.getRegClass(SrcReg1);
+
+ auto RC1 = MRI.getRegClass(SrcReg1);
+ auto RC2 = SrcReg2 != 0 ? MRI.getRegClass(SrcReg2) : nullptr;
+
switch (SrcVT.SimpleTy) {
default: return false;
case MVT::f32:
@@ -893,12 +885,10 @@ bool PPCFastISel::PPCEmitCmp(const Value *SrcValue1, const Value *SrcValue2,
}
} else {
CmpOpc = PPC::FCMPUS;
- if (isVSSRCRegClass(RC)) {
- unsigned TmpReg = createResultReg(&PPC::F4RCRegClass);
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
- TII.get(TargetOpcode::COPY), TmpReg).addReg(SrcReg1);
- SrcReg1 = TmpReg;
- }
+ if (isVSSRCRegClass(RC1))
+ SrcReg1 = copyRegToRegClass(&PPC::F4RCRegClass, SrcReg1);
+ if (RC2 && isVSSRCRegClass(RC2))
+ SrcReg2 = copyRegToRegClass(&PPC::F4RCRegClass, SrcReg2);
}
break;
case MVT::f64:
@@ -915,7 +905,7 @@ bool PPCFastISel::PPCEmitCmp(const Value *SrcValue1, const Value *SrcValue2,
CmpOpc = PPC::EFDCMPGT;
break;
}
- } else if (isVSFRCRegClass(RC)) {
+ } else if (isVSFRCRegClass(RC1) || (RC2 && isVSFRCRegClass(RC2))) {
CmpOpc = PPC::XSCMPUDP;
} else {
CmpOpc = PPC::FCMPUD;
@@ -997,12 +987,17 @@ bool PPCFastISel::SelectFPTrunc(const Instruction *I) {
// Round the result to single precision.
unsigned DestReg;
-
+ auto RC = MRI.getRegClass(SrcReg);
if (PPCSubTarget->hasSPE()) {
DestReg = createResultReg(&PPC::SPE4RCRegClass);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(PPC::EFSCFD), DestReg)
.addReg(SrcReg);
+ } else if (isVSFRCRegClass(RC)) {
+ DestReg = createResultReg(&PPC::VSSRCRegClass);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ TII.get(PPC::XSRSP), DestReg)
+ .addReg(SrcReg);
} else {
DestReg = createResultReg(&PPC::F4RCRegClass);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
@@ -1217,21 +1212,19 @@ bool PPCFastISel::SelectFPToI(const Instruction *I, bool IsSigned) {
if (SrcReg == 0)
return false;
- // Convert f32 to f64 if necessary. This is just a meaningless copy
- // to get the register class right.
+ // Convert f32 to f64 or convert VSSRC to VSFRC if necessary. This is just a
+ // meaningless copy to get the register class right.
const TargetRegisterClass *InRC = MRI.getRegClass(SrcReg);
- if (InRC == &PPC::F4RCRegClass) {
- unsigned TmpReg = createResultReg(&PPC::F8RCRegClass);
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
- TII.get(TargetOpcode::COPY), TmpReg)
- .addReg(SrcReg);
- SrcReg = TmpReg;
- }
+ if (InRC == &PPC::F4RCRegClass)
+ SrcReg = copyRegToRegClass(&PPC::F8RCRegClass, SrcReg);
+ else if (InRC == &PPC::VSSRCRegClass)
+ SrcReg = copyRegToRegClass(&PPC::VSFRCRegClass, SrcReg);
// Determine the opcode for the conversion, which takes place
- // entirely within FPRs.
+ // entirely within FPRs or VSRs.
unsigned DestReg;
unsigned Opc;
+ auto RC = MRI.getRegClass(SrcReg);
if (PPCSubTarget->hasSPE()) {
DestReg = createResultReg(&PPC::GPRCRegClass);
@@ -1239,6 +1232,12 @@ bool PPCFastISel::SelectFPToI(const Instruction *I, bool IsSigned) {
Opc = InRC == &PPC::SPE4RCRegClass ? PPC::EFSCTSIZ : PPC::EFDCTSIZ;
else
Opc = InRC == &PPC::SPE4RCRegClass ? PPC::EFSCTUIZ : PPC::EFDCTUIZ;
+ } else if (isVSFRCRegClass(RC)) {
+ DestReg = createResultReg(&PPC::VSFRCRegClass);
+ if (DstVT == MVT::i32)
+ Opc = IsSigned ? PPC::XSCVDPSXWS : PPC::XSCVDPUXWS;
+ else
+ Opc = IsSigned ? PPC::XSCVDPSXDS : PPC::XSCVDPUXDS;
} else {
DestReg = createResultReg(&PPC::F8RCRegClass);
if (DstVT == MVT::i32)
@@ -1520,11 +1519,7 @@ bool PPCFastISel::finishCall(MVT RetVT, CallLoweringInfo &CLI, unsigned &NumByte
if (RetVT == CopyVT) {
const TargetRegisterClass *CpyRC = TLI.getRegClassFor(CopyVT);
- ResultReg = createResultReg(CpyRC);
-
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
- TII.get(TargetOpcode::COPY), ResultReg)
- .addReg(SourcePhysReg);
+ ResultReg = copyRegToRegClass(CpyRC, SourcePhysReg);
// If necessary, round the floating result to single precision.
} else if (CopyVT == MVT::f64) {
@@ -1537,12 +1532,9 @@ bool PPCFastISel::finishCall(MVT RetVT, CallLoweringInfo &CLI, unsigned &NumByte
// used along the fast-isel path (not lowered), and downstream logic
// also doesn't like a direct subreg copy on a physical reg.)
} else if (RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32) {
- ResultReg = createResultReg(&PPC::GPRCRegClass);
// Convert physical register from G8RC to GPRC.
SourcePhysReg -= PPC::X0 - PPC::R0;
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
- TII.get(TargetOpcode::COPY), ResultReg)
- .addReg(SourcePhysReg);
+ ResultReg = copyRegToRegClass(&PPC::GPRCRegClass, SourcePhysReg);
}
assert(ResultReg && "ResultReg unset!");
@@ -1894,13 +1886,8 @@ bool PPCFastISel::SelectTrunc(const Instruction *I) {
return false;
// The only interesting case is when we need to switch register classes.
- if (SrcVT == MVT::i64) {
- unsigned ResultReg = createResultReg(&PPC::GPRCRegClass);
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
- TII.get(TargetOpcode::COPY),
- ResultReg).addReg(SrcReg, 0, PPC::sub_32);
- SrcReg = ResultReg;
- }
+ if (SrcVT == MVT::i64)
+ SrcReg = copyRegToRegClass(&PPC::GPRCRegClass, SrcReg, 0, PPC::sub_32);
updateValueMap(I, SrcReg);
return true;
@@ -1977,6 +1964,13 @@ bool PPCFastISel::fastSelectInstruction(const Instruction *I) {
case Instruction::Sub:
return SelectBinaryIntOp(I, ISD::SUB);
case Instruction::Call:
+ // On AIX, call lowering uses the DAG-ISEL path currently so that the
+ // callee of the direct function call instruction will be mapped to the
+ // symbol for the function's entry point, which is distinct from the
+ // function descriptor symbol. The latter is the symbol whose XCOFF symbol
+ // name is the C-linkage name of the source level function.
+ if (TM.getTargetTriple().isOSAIX())
+ break;
return selectCall(I);
case Instruction::Ret:
return SelectRet(I);
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp b/contrib/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp
index 8263954994d2..ebfb1ef7f49b 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp
@@ -1,9 +1,8 @@
//===-- PPCFrameLowering.cpp - PPC Frame Information ----------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -30,7 +29,6 @@
using namespace llvm;
#define DEBUG_TYPE "framelowering"
-STATISTIC(NumNoNeedForFrame, "Number of functions without frames");
STATISTIC(NumPESpillVSR, "Number of spills to vector in prologue");
STATISTIC(NumPEReloadVSR, "Number of reloads from vector in epilogue");
@@ -73,10 +71,10 @@ static unsigned computeFramePointerSaveOffset(const PPCSubtarget &STI) {
}
static unsigned computeLinkageSize(const PPCSubtarget &STI) {
- if (STI.isDarwinABI() || STI.isPPC64())
+ if ((STI.isDarwinABI() || STI.isAIXABI()) || STI.isPPC64())
return (STI.isELFv2ABI() ? 4 : 6) * (STI.isPPC64() ? 8 : 4);
- // SVR4 ABI:
+ // 32-bit SVR4 ABI:
return 8;
}
@@ -446,12 +444,27 @@ static bool MustSaveLR(const MachineFunction &MF, unsigned LR) {
return RI !=MF.getRegInfo().def_end() || MFI->isLRStoreRequired();
}
+/// determineFrameLayoutAndUpdate - Determine the size of the frame and maximum
+/// call frame size. Update the MachineFunction object with the stack size.
+unsigned
+PPCFrameLowering::determineFrameLayoutAndUpdate(MachineFunction &MF,
+ bool UseEstimate) const {
+ unsigned NewMaxCallFrameSize = 0;
+ unsigned FrameSize = determineFrameLayout(MF, UseEstimate,
+ &NewMaxCallFrameSize);
+ MF.getFrameInfo().setStackSize(FrameSize);
+ MF.getFrameInfo().setMaxCallFrameSize(NewMaxCallFrameSize);
+ return FrameSize;
+}
+
/// determineFrameLayout - Determine the size of the frame and maximum call
/// frame size.
-unsigned PPCFrameLowering::determineFrameLayout(MachineFunction &MF,
- bool UpdateMF,
- bool UseEstimate) const {
- MachineFrameInfo &MFI = MF.getFrameInfo();
+unsigned
+PPCFrameLowering::determineFrameLayout(const MachineFunction &MF,
+ bool UseEstimate,
+ unsigned *NewMaxCallFrameSize) const {
+ const MachineFrameInfo &MFI = MF.getFrameInfo();
+ const PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
// Get the number of bytes to allocate from the FrameInfo
unsigned FrameSize =
@@ -469,6 +482,7 @@ unsigned PPCFrameLowering::determineFrameLayout(MachineFunction &MF,
bool CanUseRedZone = !MFI.hasVarSizedObjects() && // No dynamic alloca.
!MFI.adjustsStack() && // No calls.
!MustSaveLR(MF, LR) && // No need to save LR.
+ !FI->mustSaveTOC() && // No need to save TOC.
!RegInfo->hasBasePointer(MF); // No special alignment.
// Note: for PPC32 SVR4ABI (Non-DarwinABI), we can still generate stackless
@@ -477,10 +491,7 @@ unsigned PPCFrameLowering::determineFrameLayout(MachineFunction &MF,
// Check whether we can skip adjusting the stack pointer (by using red zone)
if (!DisableRedZone && CanUseRedZone && FitsInRedZone) {
- NumNoNeedForFrame++;
// No need for frame
- if (UpdateMF)
- MFI.setStackSize(0);
return 0;
}
@@ -496,9 +507,9 @@ unsigned PPCFrameLowering::determineFrameLayout(MachineFunction &MF,
if (MFI.hasVarSizedObjects())
maxCallFrameSize = (maxCallFrameSize + AlignMask) & ~AlignMask;
- // Update maximum call frame size.
- if (UpdateMF)
- MFI.setMaxCallFrameSize(maxCallFrameSize);
+ // Update the new max call frame size if the caller passes in a valid pointer.
+ if (NewMaxCallFrameSize)
+ *NewMaxCallFrameSize = maxCallFrameSize;
// Include call frame size in total.
FrameSize += maxCallFrameSize;
@@ -506,10 +517,6 @@ unsigned PPCFrameLowering::determineFrameLayout(MachineFunction &MF,
// Make sure the frame is aligned.
FrameSize = (FrameSize + AlignMask) & ~AlignMask;
- // Update frame info.
- if (UpdateMF)
- MFI.setStackSize(FrameSize);
-
return FrameSize;
}
@@ -690,7 +697,7 @@ PPCFrameLowering::twoUniqueScratchRegsRequired(MachineBasicBlock *MBB) const {
const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
MachineFunction &MF = *(MBB->getParent());
bool HasBP = RegInfo->hasBasePointer(MF);
- unsigned FrameSize = determineFrameLayout(MF, false);
+ unsigned FrameSize = determineFrameLayout(MF);
int NegFrameSize = -FrameSize;
bool IsLargeFrame = !isInt<16>(NegFrameSize);
MachineFrameInfo &MFI = MF.getFrameInfo();
@@ -713,6 +720,50 @@ bool PPCFrameLowering::canUseAsEpilogue(const MachineBasicBlock &MBB) const {
return findScratchRegister(TmpMBB, true);
}
+bool PPCFrameLowering::stackUpdateCanBeMoved(MachineFunction &MF) const {
+ const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
+ PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
+
+ // Abort if there is no register info or function info.
+ if (!RegInfo || !FI)
+ return false;
+
+ // Only move the stack update on ELFv2 ABI and PPC64.
+ if (!Subtarget.isELFv2ABI() || !Subtarget.isPPC64())
+ return false;
+
+ // Check the frame size first and return false if it does not fit the
+ // requirements.
+ // We need a non-zero frame size as well as a frame that will fit in the red
+ // zone. This is because by moving the stack pointer update we are now storing
+ // to the red zone until the stack pointer is updated. If we get an interrupt
+ // inside the prologue but before the stack update we now have a number of
+ // stores to the red zone and those stores must all fit.
+ MachineFrameInfo &MFI = MF.getFrameInfo();
+ unsigned FrameSize = MFI.getStackSize();
+ if (!FrameSize || FrameSize > Subtarget.getRedZoneSize())
+ return false;
+
+ // Frame pointers and base pointers complicate matters so don't do anything
+ // if we have them. For example having a frame pointer will sometimes require
+ // a copy of r1 into r31 and that makes keeping track of updates to r1 more
+ // difficult.
+ if (hasFP(MF) || RegInfo->hasBasePointer(MF))
+ return false;
+
+ // Calls to fast_cc functions use different rules for passing parameters on
+ // the stack from the ABI and using PIC base in the function imposes
+ // similar restrictions to using the base pointer. It is not generally safe
+ // to move the stack pointer update in these situations.
+ if (FI->hasFastCall() || FI->usesPICBase())
+ return false;
+
+ // Finally we can move the stack update if we do not require register
+ // scavenging. Register scavenging can introduce more spills and so
+ // may make the frame size larger than we have computed.
+ return !RegInfo->requiresFrameIndexScavenging(MF);
+}
+
void PPCFrameLowering::emitPrologue(MachineFunction &MF,
MachineBasicBlock &MBB) const {
MachineBasicBlock::iterator MBBI = MBB.begin();
@@ -748,7 +799,7 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF,
MBBI = MBB.begin();
// Work out frame sizes.
- unsigned FrameSize = determineFrameLayout(MF);
+ unsigned FrameSize = determineFrameLayoutAndUpdate(MF);
int NegFrameSize = -FrameSize;
if (!isInt<32>(NegFrameSize))
llvm_unreachable("Unhandled stack size!");
@@ -759,6 +810,7 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF,
// Check if the link register (LR) must be saved.
PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
bool MustSaveLR = FI->mustSaveLR();
+ bool MustSaveTOC = FI->mustSaveTOC();
const SmallVectorImpl<unsigned> &MustSaveCRs = FI->getMustSaveCRs();
bool MustSaveCR = !MustSaveCRs.empty();
// Do we have a frame pointer and/or base pointer for this function?
@@ -770,6 +822,7 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF,
unsigned BPReg = RegInfo->getBaseRegister(MF);
unsigned FPReg = isPPC64 ? PPC::X31 : PPC::R31;
unsigned LRReg = isPPC64 ? PPC::LR8 : PPC::LR;
+ unsigned TOCReg = isPPC64 ? PPC::X2 : PPC::R2;
unsigned ScratchReg = 0;
unsigned TempReg = isPPC64 ? PPC::X12 : PPC::R12; // another scratch reg
// ...(R12/X12 is volatile in both Darwin & SVR4, & can't be a function arg.)
@@ -855,6 +908,45 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF,
assert((isPPC64 || !MustSaveCR) &&
"Prologue CR saving supported only in 64-bit mode");
+ // Check if we can move the stack update instruction (stdu) down the prologue
+ // past the callee saves. Hopefully this will avoid the situation where the
+ // saves are waiting for the update on the store with update to complete.
+ MachineBasicBlock::iterator StackUpdateLoc = MBBI;
+ bool MovingStackUpdateDown = false;
+
+ // Check if we can move the stack update.
+ if (stackUpdateCanBeMoved(MF)) {
+ const std::vector<CalleeSavedInfo> &Info = MFI.getCalleeSavedInfo();
+ for (CalleeSavedInfo CSI : Info) {
+ int FrIdx = CSI.getFrameIdx();
+ // If the frame index is not negative the callee saved info belongs to a
+ // stack object that is not a fixed stack object. We ignore non-fixed
+ // stack objects because we won't move the stack update pointer past them.
+ if (FrIdx >= 0)
+ continue;
+
+ if (MFI.isFixedObjectIndex(FrIdx) && MFI.getObjectOffset(FrIdx) < 0) {
+ StackUpdateLoc++;
+ MovingStackUpdateDown = true;
+ } else {
+ // We need all of the Frame Indices to meet these conditions.
+ // If they do not, abort the whole operation.
+ StackUpdateLoc = MBBI;
+ MovingStackUpdateDown = false;
+ break;
+ }
+ }
+
+ // If the operation was not aborted then update the object offset.
+ if (MovingStackUpdateDown) {
+ for (CalleeSavedInfo CSI : Info) {
+ int FrIdx = CSI.getFrameIdx();
+ if (FrIdx < 0)
+ MFI.setObjectOffset(FrIdx, MFI.getObjectOffset(FrIdx) + NegFrameSize);
+ }
+ }
+ }
+
// If we need to spill the CR and the LR but we don't have two separate
// registers available, we must spill them one at a time
if (MustSaveCR && SingleScratchReg && MustSaveLR) {
@@ -918,7 +1010,7 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF,
}
if (MustSaveLR)
- BuildMI(MBB, MBBI, dl, StoreInst)
+ BuildMI(MBB, StackUpdateLoc, dl, StoreInst)
.addReg(ScratchReg, getKillRegState(true))
.addImm(LROffset)
.addReg(SPReg);
@@ -986,7 +1078,7 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF,
HasSTUX = true;
} else if (!isLargeFrame) {
- BuildMI(MBB, MBBI, dl, StoreUpdtInst, SPReg)
+ BuildMI(MBB, StackUpdateLoc, dl, StoreUpdtInst, SPReg)
.addReg(SPReg)
.addImm(NegFrameSize)
.addReg(SPReg);
@@ -1004,6 +1096,16 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF,
HasSTUX = true;
}
+ // Save the TOC register after the stack pointer update if a prologue TOC
+ // save is required for the function.
+ if (MustSaveTOC) {
+ assert(isELFv2ABI && "TOC saves in the prologue only supported on ELFv2");
+ BuildMI(MBB, StackUpdateLoc, dl, TII.get(PPC::STD))
+ .addReg(TOCReg, getKillRegState(true))
+ .addImm(TOCSaveOffset)
+ .addReg(SPReg);
+ }
+
if (!HasRedZone) {
assert(!isPPC64 && "A red zone is always available on PPC64");
if (HasSTUX) {
@@ -1205,6 +1307,9 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF,
if (PPC::CRBITRCRegClass.contains(Reg))
continue;
+ if ((Reg == PPC::X2 || Reg == PPC::R2) && MustSaveTOC)
+ continue;
+
// For SVR4, don't emit a move for the CR spill slot if we haven't
// spilled CRs.
if (isSVR4ABI && (PPC::CR2 <= Reg && Reg <= PPC::CR4)
@@ -1234,6 +1339,12 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF,
.addCFIIndex(CFIRegister);
} else {
int Offset = MFI.getObjectOffset(CSI[I].getFrameIdx());
+ // We have changed the object offset above but we do not want to change
+ // the actual offsets in the CFI instruction so we have to undo the
+ // offset change here.
+ if (MovingStackUpdateDown)
+ Offset -= NegFrameSize;
+
unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset(
nullptr, MRI->getDwarfRegNum(Reg, true), Offset));
BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
@@ -1380,6 +1491,32 @@ void PPCFrameLowering::emitEpilogue(MachineFunction &MF,
unsigned RBReg = SPReg;
unsigned SPAdd = 0;
+ // Check if we can move the stack update instruction up the epilogue
+ // past the callee saves. This will allow the move to LR instruction
+ // to be executed before the restores of the callee saves which means
+ // that the callee saves can hide the latency from the MTLR instrcution.
+ MachineBasicBlock::iterator StackUpdateLoc = MBBI;
+ if (stackUpdateCanBeMoved(MF)) {
+ const std::vector<CalleeSavedInfo> & Info = MFI.getCalleeSavedInfo();
+ for (CalleeSavedInfo CSI : Info) {
+ int FrIdx = CSI.getFrameIdx();
+ // If the frame index is not negative the callee saved info belongs to a
+ // stack object that is not a fixed stack object. We ignore non-fixed
+ // stack objects because we won't move the update of the stack pointer
+ // past them.
+ if (FrIdx >= 0)
+ continue;
+
+ if (MFI.isFixedObjectIndex(FrIdx) && MFI.getObjectOffset(FrIdx) < 0)
+ StackUpdateLoc--;
+ else {
+ // Abort the operation as we can't update all CSR restores.
+ StackUpdateLoc = MBBI;
+ break;
+ }
+ }
+ }
+
if (FrameSize) {
// In the prologue, the loaded (or persistent) stack pointer value is
// offset by the STDU/STDUX/STWU/STWUX instruction. For targets with red
@@ -1409,7 +1546,7 @@ void PPCFrameLowering::emitEpilogue(MachineFunction &MF,
}
} else if (!isLargeFrame && !HasBP && !MFI.hasVarSizedObjects()) {
if (HasRedZone) {
- BuildMI(MBB, MBBI, dl, AddImmInst, SPReg)
+ BuildMI(MBB, StackUpdateLoc, dl, AddImmInst, SPReg)
.addReg(SPReg)
.addImm(FrameSize);
} else {
@@ -1433,7 +1570,7 @@ void PPCFrameLowering::emitEpilogue(MachineFunction &MF,
.addReg(FPReg);
RBReg = FPReg;
}
- BuildMI(MBB, MBBI, dl, LoadInst, RBReg)
+ BuildMI(MBB, StackUpdateLoc, dl, LoadInst, RBReg)
.addImm(0)
.addReg(SPReg);
}
@@ -1466,7 +1603,7 @@ void PPCFrameLowering::emitEpilogue(MachineFunction &MF,
// a base register anyway, because it may happen to be R0.
bool LoadedLR = false;
if (MustSaveLR && RBReg == SPReg && isInt<16>(LROffset+SPAdd)) {
- BuildMI(MBB, MBBI, dl, LoadInst, ScratchReg)
+ BuildMI(MBB, StackUpdateLoc, dl, LoadInst, ScratchReg)
.addImm(LROffset+SPAdd)
.addReg(RBReg);
LoadedLR = true;
@@ -1538,7 +1675,7 @@ void PPCFrameLowering::emitEpilogue(MachineFunction &MF,
.addReg(TempReg, getKillRegState(i == e-1));
if (MustSaveLR)
- BuildMI(MBB, MBBI, dl, MTLRInst).addReg(ScratchReg);
+ BuildMI(MBB, StackUpdateLoc, dl, MTLRInst).addReg(ScratchReg);
// Callee pop calling convention. Pop parameter/linkage area. Used for tail
// call optimization
@@ -1732,6 +1869,9 @@ void PPCFrameLowering::processFunctionBeforeFrameFinalized(MachineFunction &MF,
for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
unsigned Reg = CSI[i].getReg();
+ assert((!MF.getInfo<PPCFunctionInfo>()->mustSaveTOC() ||
+ (Reg != PPC::X2 && Reg != PPC::R2)) &&
+ "Not expecting to try to spill R2 in a function that must save TOC");
if (PPC::GPRCRegClass.contains(Reg) ||
PPC::SPE4RCRegClass.contains(Reg)) {
HasGPSaveArea = true;
@@ -1947,7 +2087,7 @@ PPCFrameLowering::addScavengingSpillSlot(MachineFunction &MF,
// the 16-bit immediate. We don't know the complete frame size here
// because we've not yet computed callee-saved register spills or the
// needed alignment padding.
- unsigned StackSize = determineFrameLayout(MF, false, true);
+ unsigned StackSize = determineFrameLayout(MF, true);
MachineFrameInfo &MFI = MF.getFrameInfo();
if (MFI.hasVarSizedObjects() || spillsCR(MF) || spillsVRSAVE(MF) ||
hasNonRISpills(MF) || (hasSpills(MF) && !isInt<16>(StackSize))) {
@@ -2041,6 +2181,8 @@ PPCFrameLowering::spillCalleeSavedRegisters(MachineBasicBlock &MBB,
MachineFunction *MF = MBB.getParent();
const PPCInstrInfo &TII = *Subtarget.getInstrInfo();
+ PPCFunctionInfo *FI = MF->getInfo<PPCFunctionInfo>();
+ bool MustSaveTOC = FI->mustSaveTOC();
DebugLoc DL;
bool CRSpilled = false;
MachineInstrBuilder CRMIB;
@@ -2071,6 +2213,10 @@ PPCFrameLowering::spillCalleeSavedRegisters(MachineBasicBlock &MBB,
continue;
}
+ // The actual spill will happen in the prologue.
+ if ((Reg == PPC::X2 || Reg == PPC::R2) && MustSaveTOC)
+ continue;
+
// Insert the spill to the stack frame.
if (IsCRField) {
PPCFunctionInfo *FuncInfo = MF->getInfo<PPCFunctionInfo>();
@@ -2198,6 +2344,8 @@ PPCFrameLowering::restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
MachineFunction *MF = MBB.getParent();
const PPCInstrInfo &TII = *Subtarget.getInstrInfo();
+ PPCFunctionInfo *FI = MF->getInfo<PPCFunctionInfo>();
+ bool MustSaveTOC = FI->mustSaveTOC();
bool CR2Spilled = false;
bool CR3Spilled = false;
bool CR4Spilled = false;
@@ -2220,6 +2368,9 @@ PPCFrameLowering::restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
if (Reg == PPC::VRSAVE && !Subtarget.isDarwinABI())
continue;
+ if ((Reg == PPC::X2 || Reg == PPC::R2) && MustSaveTOC)
+ continue;
+
if (Reg == PPC::CR2) {
CR2Spilled = true;
// The spill slot is associated only with CR2, which is the
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCFrameLowering.h b/contrib/llvm/lib/Target/PowerPC/PPCFrameLowering.h
index 69bd1484d6e5..d116e9fd22e1 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCFrameLowering.h
+++ b/contrib/llvm/lib/Target/PowerPC/PPCFrameLowering.h
@@ -1,9 +1,8 @@
//===-- PPCFrameLowering.h - Define frame lowering for PowerPC --*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -13,7 +12,6 @@
#ifndef LLVM_LIB_TARGET_POWERPC_PPCFRAMELOWERING_H
#define LLVM_LIB_TARGET_POWERPC_PPCFRAMELOWERING_H
-#include "PPC.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/CodeGen/TargetFrameLowering.h"
#include "llvm/Target/TargetMachine.h"
@@ -73,12 +71,29 @@ class PPCFrameLowering: public TargetFrameLowering {
*/
void createTailCallBranchInstr(MachineBasicBlock &MBB) const;
+ /**
+ * Check if the conditions are correct to allow for the stack update
+ * to be moved past the CSR save/restore code.
+ */
+ bool stackUpdateCanBeMoved(MachineFunction &MF) const;
+
public:
PPCFrameLowering(const PPCSubtarget &STI);
- unsigned determineFrameLayout(MachineFunction &MF,
- bool UpdateMF = true,
- bool UseEstimate = false) const;
+ /**
+ * Determine the frame layout and update the machine function.
+ */
+ unsigned determineFrameLayoutAndUpdate(MachineFunction &MF,
+ bool UseEstimate = false) const;
+
+ /**
+ * Determine the frame layout but do not update the machine function.
+ * The MachineFunction object can be const in this case as it is not
+ * modified.
+ */
+ unsigned determineFrameLayout(const MachineFunction &MF,
+ bool UseEstimate = false,
+ unsigned *NewMaxCallFrameSize = nullptr) const;
/// emitProlog/emitEpilog - These methods insert prolog and epilog code into
/// the function.
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCHazardRecognizers.cpp b/contrib/llvm/lib/Target/PowerPC/PPCHazardRecognizers.cpp
index 5f6966cecd61..391ebcc1a143 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCHazardRecognizers.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/PPCHazardRecognizers.cpp
@@ -1,9 +1,8 @@
//===-- PPCHazardRecognizers.cpp - PowerPC Hazard Recognizer Impls --------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -12,9 +11,8 @@
//===----------------------------------------------------------------------===//
#include "PPCHazardRecognizers.h"
-#include "PPC.h"
#include "PPCInstrInfo.h"
-#include "PPCTargetMachine.h"
+#include "PPCSubtarget.h"
#include "llvm/CodeGen/ScheduleDAG.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCHazardRecognizers.h b/contrib/llvm/lib/Target/PowerPC/PPCHazardRecognizers.h
index 4b502147ca63..5b32147ca88d 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCHazardRecognizers.h
+++ b/contrib/llvm/lib/Target/PowerPC/PPCHazardRecognizers.h
@@ -1,9 +1,8 @@
//===-- PPCHazardRecognizers.h - PowerPC Hazard Recognizers -----*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/contrib/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
index 70e9049a2ab3..543cac075f55 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
@@ -1,9 +1,8 @@
//===-- PPCISelDAGToDAG.cpp - PPC --pattern matching inst selector --------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -219,13 +218,6 @@ namespace {
SDValue SelectCC(SDValue LHS, SDValue RHS, ISD::CondCode CC,
const SDLoc &dl);
- /// SelectAddrImm - Returns true if the address N can be represented by
- /// a base register plus a signed 16-bit displacement [r+imm].
- bool SelectAddrImm(SDValue N, SDValue &Disp,
- SDValue &Base) {
- return PPCLowering->SelectAddressRegImm(N, Disp, Base, *CurDAG, 0);
- }
-
/// SelectAddrImmOffs - Return true if the operand is valid for a preinc
/// immediate field. Note that the operand at this point is already the
/// result of a prior SelectAddressRegImm call.
@@ -239,26 +231,61 @@ namespace {
return false;
}
- /// SelectAddrIdx - Given the specified addressed, check to see if it can be
- /// represented as an indexed [r+r] operation. Returns false if it can
- /// be represented by [r+imm], which are preferred.
+ /// SelectAddrIdx - Given the specified address, check to see if it can be
+ /// represented as an indexed [r+r] operation.
+ /// This is for xform instructions whose associated displacement form is D.
+ /// The last parameter \p 0 means associated D form has no requirment for 16
+ /// bit signed displacement.
+ /// Returns false if it can be represented by [r+imm], which are preferred.
bool SelectAddrIdx(SDValue N, SDValue &Base, SDValue &Index) {
- return PPCLowering->SelectAddressRegReg(N, Base, Index, *CurDAG);
+ return PPCLowering->SelectAddressRegReg(N, Base, Index, *CurDAG, 0);
+ }
+
+ /// SelectAddrIdx4 - Given the specified address, check to see if it can be
+ /// represented as an indexed [r+r] operation.
+ /// This is for xform instructions whose associated displacement form is DS.
+ /// The last parameter \p 4 means associated DS form 16 bit signed
+ /// displacement must be a multiple of 4.
+ /// Returns false if it can be represented by [r+imm], which are preferred.
+ bool SelectAddrIdxX4(SDValue N, SDValue &Base, SDValue &Index) {
+ return PPCLowering->SelectAddressRegReg(N, Base, Index, *CurDAG, 4);
+ }
+
+ /// SelectAddrIdx16 - Given the specified address, check to see if it can be
+ /// represented as an indexed [r+r] operation.
+ /// This is for xform instructions whose associated displacement form is DQ.
+ /// The last parameter \p 16 means associated DQ form 16 bit signed
+ /// displacement must be a multiple of 16.
+ /// Returns false if it can be represented by [r+imm], which are preferred.
+ bool SelectAddrIdxX16(SDValue N, SDValue &Base, SDValue &Index) {
+ return PPCLowering->SelectAddressRegReg(N, Base, Index, *CurDAG, 16);
}
- /// SelectAddrIdxOnly - Given the specified addressed, force it to be
+ /// SelectAddrIdxOnly - Given the specified address, force it to be
/// represented as an indexed [r+r] operation.
bool SelectAddrIdxOnly(SDValue N, SDValue &Base, SDValue &Index) {
return PPCLowering->SelectAddressRegRegOnly(N, Base, Index, *CurDAG);
}
+
+ /// SelectAddrImm - Returns true if the address N can be represented by
+ /// a base register plus a signed 16-bit displacement [r+imm].
+ /// The last parameter \p 0 means D form has no requirment for 16 bit signed
+ /// displacement.
+ bool SelectAddrImm(SDValue N, SDValue &Disp,
+ SDValue &Base) {
+ return PPCLowering->SelectAddressRegImm(N, Disp, Base, *CurDAG, 0);
+ }
/// SelectAddrImmX4 - Returns true if the address N can be represented by
- /// a base register plus a signed 16-bit displacement that is a multiple of 4.
- /// Suitable for use by STD and friends.
+ /// a base register plus a signed 16-bit displacement that is a multiple of
+ /// 4 (last parameter). Suitable for use by STD and friends.
bool SelectAddrImmX4(SDValue N, SDValue &Disp, SDValue &Base) {
return PPCLowering->SelectAddressRegImm(N, Disp, Base, *CurDAG, 4);
}
+ /// SelectAddrImmX16 - Returns true if the address N can be represented by
+ /// a base register plus a signed 16-bit displacement that is a multiple of
+ /// 16(last parameter). Suitable for use by STXV and friends.
bool SelectAddrImmX16(SDValue N, SDValue &Disp, SDValue &Base) {
return PPCLowering->SelectAddressRegImm(N, Disp, Base, *CurDAG, 16);
}
@@ -412,7 +439,8 @@ SDNode *PPCDAGToDAGISel::getGlobalBaseReg() {
if (PPCLowering->getPointerTy(CurDAG->getDataLayout()) == MVT::i32) {
if (PPCSubTarget->isTargetELF()) {
GlobalBaseReg = PPC::R30;
- if (M->getPICLevel() == PICLevel::SmallPIC) {
+ if (!PPCSubTarget->isSecurePlt() &&
+ M->getPICLevel() == PICLevel::SmallPIC) {
BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MoveGOTtoLR));
BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MFLR), GlobalBaseReg);
MF->getInfo<PPCFunctionInfo>()->setUsesPICBase(true);
@@ -2373,7 +2401,7 @@ public:
// Here we try to match complex bit permutations into a set of
// rotate-and-shift/shift/and/or instructions, using a set of heuristics
- // known to produce optimial code for common cases (like i32 byte swapping).
+ // known to produce optimal code for common cases (like i32 byte swapping).
SDNode *Select(SDNode *N) {
Memoizer.clear();
auto Result =
@@ -4214,12 +4242,12 @@ static bool mayUseP9Setb(SDNode *N, const ISD::CondCode &CC, SelectionDAG *DAG,
// Without this setb optimization, the outer SELECT_CC will be manually
// selected to SELECT_CC_I4/SELECT_CC_I8 Pseudo, then expand-isel-pseudos pass
- // transforms pseduo instruction to isel instruction. When there are more than
+ // transforms pseudo instruction to isel instruction. When there are more than
// one use for result like zext/sext, with current optimization we only see
// isel is replaced by setb but can't see any significant gain. Since
// setb has longer latency than original isel, we should avoid this. Another
// point is that setb requires comparison always kept, it can break the
- // oppotunity to get the comparison away if we have in future.
+ // opportunity to get the comparison away if we have in future.
if (!SetOrSelCC.hasOneUse() || (!InnerIsSel && !FalseRes.hasOneUse()))
return false;
@@ -4354,13 +4382,23 @@ void PPCDAGToDAGISel::Select(SDNode *N) {
if (trySETCC(N))
return;
break;
-
+ // These nodes will be transformed into GETtlsADDR32 node, which
+ // later becomes BL_TLS __tls_get_addr(sym at tlsgd)@PLT
+ case PPCISD::ADDI_TLSLD_L_ADDR:
+ case PPCISD::ADDI_TLSGD_L_ADDR: {
+ const Module *Mod = MF->getFunction().getParent();
+ if (PPCLowering->getPointerTy(CurDAG->getDataLayout()) != MVT::i32 ||
+ !PPCSubTarget->isSecurePlt() || !PPCSubTarget->isTargetELF() ||
+ Mod->getPICLevel() == PICLevel::SmallPIC)
+ break;
+ // Attach global base pointer on GETtlsADDR32 node in order to
+ // generate secure plt code for TLS symbols.
+ getGlobalBaseReg();
+ } break;
case PPCISD::CALL: {
- const Module *M = MF->getFunction().getParent();
-
if (PPCLowering->getPointerTy(CurDAG->getDataLayout()) != MVT::i32 ||
- (!TM.isPositionIndependent() || !PPCSubTarget->isSecurePlt()) ||
- !PPCSubTarget->isTargetELF() || M->getPICLevel() == PICLevel::SmallPIC)
+ !TM.isPositionIndependent() || !PPCSubTarget->isSecurePlt() ||
+ !PPCSubTarget->isTargetELF())
break;
SDValue Op = N->getOperand(1);
@@ -5305,7 +5343,7 @@ SDValue PPCDAGToDAGISel::combineToCMPB(SDNode *N) {
SDValue V = Queue.pop_back_val();
for (const SDValue &O : V.getNode()->ops()) {
- unsigned b;
+ unsigned b = 0;
uint64_t M = 0, A = 0;
SDValue OLHS, ORHS;
if (O.getOpcode() == ISD::OR) {
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index 39608cb74bee..24d50074860d 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -1,9 +1,8 @@
//===-- PPCISelLowering.cpp - PPC DAG Lowering Implementation -------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -45,6 +44,7 @@
#include "llvm/CodeGen/MachineJumpTableInfo.h"
#include "llvm/CodeGen/MachineLoopInfo.h"
#include "llvm/CodeGen/MachineMemOperand.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/RuntimeLibcalls.h"
@@ -70,8 +70,10 @@
#include "llvm/IR/Type.h"
#include "llvm/IR/Use.h"
#include "llvm/IR/Value.h"
+#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCSymbolXCOFF.h"
#include "llvm/Support/AtomicOrdering.h"
#include "llvm/Support/BranchProbability.h"
#include "llvm/Support/Casting.h"
@@ -111,6 +113,9 @@ cl::desc("disable unaligned load/store generation on PPC"), cl::Hidden);
static cl::opt<bool> DisableSCO("disable-ppc-sco",
cl::desc("disable sibling call optimization on ppc"), cl::Hidden);
+static cl::opt<bool> DisableInnermostLoopAlign32("disable-ppc-innermost-loop-align32",
+cl::desc("don't always align innermost loop to 32 bytes on ppc"), cl::Hidden);
+
static cl::opt<bool> EnableQuadPrecision("enable-ppc-quad-precision",
cl::desc("enable quad precision float support on ppc"), cl::Hidden);
@@ -119,6 +124,8 @@ STATISTIC(NumSiblingCalls, "Number of sibling calls");
static bool isNByteElemShuffleMask(ShuffleVectorSDNode *, unsigned, int);
+static SDValue widenVec(SelectionDAG &DAG, SDValue Vec, const SDLoc &dl);
+
// FIXME: Remove this once the bug has been fixed!
extern cl::opt<bool> ANDIGlueBug;
@@ -550,7 +557,18 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
// add/sub are legal for all supported vector VT's.
setOperationAction(ISD::ADD, VT, Legal);
setOperationAction(ISD::SUB, VT, Legal);
- setOperationAction(ISD::ABS, VT, Custom);
+
+ // For v2i64, these are only valid with P8Vector. This is corrected after
+ // the loop.
+ setOperationAction(ISD::SMAX, VT, Legal);
+ setOperationAction(ISD::SMIN, VT, Legal);
+ setOperationAction(ISD::UMAX, VT, Legal);
+ setOperationAction(ISD::UMIN, VT, Legal);
+
+ if (Subtarget.hasVSX()) {
+ setOperationAction(ISD::FMAXNUM, VT, Legal);
+ setOperationAction(ISD::FMINNUM, VT, Legal);
+ }
// Vector instructions introduced in P8
if (Subtarget.hasP8Altivec() && (VT.SimpleTy != MVT::v1i128)) {
@@ -635,11 +653,28 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand);
}
}
+ if (!Subtarget.hasP8Vector()) {
+ setOperationAction(ISD::SMAX, MVT::v2i64, Expand);
+ setOperationAction(ISD::SMIN, MVT::v2i64, Expand);
+ setOperationAction(ISD::UMAX, MVT::v2i64, Expand);
+ setOperationAction(ISD::UMIN, MVT::v2i64, Expand);
+ }
+
+ for (auto VT : {MVT::v2i64, MVT::v4i32, MVT::v8i16, MVT::v16i8})
+ setOperationAction(ISD::ABS, VT, Custom);
// We can custom expand all VECTOR_SHUFFLEs to VPERM, others we can handle
// with merges, splats, etc.
setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v16i8, Custom);
+ // Vector truncates to sub-word integer that fit in an Altivec/VSX register
+ // are cheap, so handle them before they get expanded to scalar.
+ setOperationAction(ISD::TRUNCATE, MVT::v8i8, Custom);
+ setOperationAction(ISD::TRUNCATE, MVT::v4i8, Custom);
+ setOperationAction(ISD::TRUNCATE, MVT::v2i8, Custom);
+ setOperationAction(ISD::TRUNCATE, MVT::v4i16, Custom);
+ setOperationAction(ISD::TRUNCATE, MVT::v2i16, Custom);
+
setOperationAction(ISD::AND , MVT::v4i32, Legal);
setOperationAction(ISD::OR , MVT::v4i32, Legal);
setOperationAction(ISD::XOR , MVT::v4i32, Legal);
@@ -804,6 +839,8 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
setOperationAction(ISD::FNEG, MVT::v2f64, Legal);
setOperationAction(ISD::FABS, MVT::v4f32, Legal);
setOperationAction(ISD::FABS, MVT::v2f64, Legal);
+ setOperationAction(ISD::FCOPYSIGN, MVT::v4f32, Legal);
+ setOperationAction(ISD::FCOPYSIGN, MVT::v2f64, Legal);
if (Subtarget.hasDirectMove())
setOperationAction(ISD::BUILD_VECTOR, MVT::v2i64, Custom);
@@ -866,6 +903,7 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
setOperationAction(ISD::FPOWI, MVT::f128, Expand);
setOperationAction(ISD::FREM, MVT::f128, Expand);
}
+ setOperationAction(ISD::FP_EXTEND, MVT::v2f32, Custom);
}
@@ -1060,6 +1098,7 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
setTargetDAGCombine(ISD::SHL);
setTargetDAGCombine(ISD::SRA);
setTargetDAGCombine(ISD::SRL);
+ setTargetDAGCombine(ISD::MUL);
setTargetDAGCombine(ISD::SINT_TO_FP);
setTargetDAGCombine(ISD::BUILD_VECTOR);
if (Subtarget.hasFPCVT())
@@ -1232,22 +1271,6 @@ unsigned PPCTargetLowering::getByValTypeAlignment(Type *Ty,
return Align;
}
-unsigned PPCTargetLowering::getNumRegistersForCallingConv(LLVMContext &Context,
- CallingConv:: ID CC,
- EVT VT) const {
- if (Subtarget.hasSPE() && VT == MVT::f64)
- return 2;
- return PPCTargetLowering::getNumRegisters(Context, VT);
-}
-
-MVT PPCTargetLowering::getRegisterTypeForCallingConv(LLVMContext &Context,
- CallingConv:: ID CC,
- EVT VT) const {
- if (Subtarget.hasSPE() && VT == MVT::f64)
- return MVT::i32;
- return PPCTargetLowering::getRegisterType(Context, VT);
-}
-
bool PPCTargetLowering::useSoftFloat() const {
return Subtarget.useSoftFloat();
}
@@ -1256,6 +1279,10 @@ bool PPCTargetLowering::hasSPE() const {
return Subtarget.hasSPE();
}
+bool PPCTargetLowering::preferIncOfAddToSubOfNot(EVT VT) const {
+ return VT.isScalarInteger();
+}
+
const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
switch ((PPCISD::NodeType)Opcode) {
case PPCISD::FIRST_NUMBER: break;
@@ -1365,7 +1392,11 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
case PPCISD::QBFLT: return "PPCISD::QBFLT";
case PPCISD::QVLFSb: return "PPCISD::QVLFSb";
case PPCISD::BUILD_FP128: return "PPCISD::BUILD_FP128";
+ case PPCISD::BUILD_SPE64: return "PPCISD::BUILD_SPE64";
+ case PPCISD::EXTRACT_SPE: return "PPCISD::EXTRACT_SPE";
case PPCISD::EXTSWSLI: return "PPCISD::EXTSWSLI";
+ case PPCISD::LD_VSX_LH: return "PPCISD::LD_VSX_LH";
+ case PPCISD::FP_EXTEND_LH: return "PPCISD::FP_EXTEND_LH";
}
return nullptr;
}
@@ -2202,16 +2233,43 @@ bool llvm::isIntS16Immediate(SDValue Op, int16_t &Imm) {
return isIntS16Immediate(Op.getNode(), Imm);
}
+
+/// SelectAddressEVXRegReg - Given the specified address, check to see if it can
+/// be represented as an indexed [r+r] operation.
+bool PPCTargetLowering::SelectAddressEVXRegReg(SDValue N, SDValue &Base,
+ SDValue &Index,
+ SelectionDAG &DAG) const {
+ for (SDNode::use_iterator UI = N->use_begin(), E = N->use_end();
+ UI != E; ++UI) {
+ if (MemSDNode *Memop = dyn_cast<MemSDNode>(*UI)) {
+ if (Memop->getMemoryVT() == MVT::f64) {
+ Base = N.getOperand(0);
+ Index = N.getOperand(1);
+ return true;
+ }
+ }
+ }
+ return false;
+}
+
/// SelectAddressRegReg - Given the specified addressed, check to see if it
/// can be represented as an indexed [r+r] operation. Returns false if it
-/// can be more efficiently represented with [r+imm].
+/// can be more efficiently represented as [r+imm]. If \p EncodingAlignment is
+/// non-zero and N can be represented by a base register plus a signed 16-bit
+/// displacement, make a more precise judgement by checking (displacement % \p
+/// EncodingAlignment).
bool PPCTargetLowering::SelectAddressRegReg(SDValue N, SDValue &Base,
- SDValue &Index,
- SelectionDAG &DAG) const {
+ SDValue &Index, SelectionDAG &DAG,
+ unsigned EncodingAlignment) const {
int16_t imm = 0;
if (N.getOpcode() == ISD::ADD) {
- if (isIntS16Immediate(N.getOperand(1), imm))
- return false; // r+i
+ // Is there any SPE load/store (f64), which can't handle 16bit offset?
+ // SPE load/store can only handle 8-bit offsets.
+ if (hasSPE() && SelectAddressEVXRegReg(N, Base, Index, DAG))
+ return true;
+ if (isIntS16Immediate(N.getOperand(1), imm) &&
+ (!EncodingAlignment || !(imm % EncodingAlignment)))
+ return false; // r+i
if (N.getOperand(1).getOpcode() == PPCISD::Lo)
return false; // r+i
@@ -2219,8 +2277,9 @@ bool PPCTargetLowering::SelectAddressRegReg(SDValue N, SDValue &Base,
Index = N.getOperand(1);
return true;
} else if (N.getOpcode() == ISD::OR) {
- if (isIntS16Immediate(N.getOperand(1), imm))
- return false; // r+i can fold it if we can.
+ if (isIntS16Immediate(N.getOperand(1), imm) &&
+ (!EncodingAlignment || !(imm % EncodingAlignment)))
+ return false; // r+i can fold it if we can.
// If this is an or of disjoint bitfields, we can codegen this as an add
// (for better address arithmetic) if the LHS and RHS of the OR are provably
@@ -2284,22 +2343,22 @@ static void fixupFuncForFI(SelectionDAG &DAG, int FrameIdx, EVT VT) {
/// Returns true if the address N can be represented by a base register plus
/// a signed 16-bit displacement [r+imm], and if it is not better
-/// represented as reg+reg. If \p Alignment is non-zero, only accept
+/// represented as reg+reg. If \p EncodingAlignment is non-zero, only accept
/// displacements that are multiples of that value.
bool PPCTargetLowering::SelectAddressRegImm(SDValue N, SDValue &Disp,
SDValue &Base,
SelectionDAG &DAG,
- unsigned Alignment) const {
+ unsigned EncodingAlignment) const {
// FIXME dl should come from parent load or store, not from address
SDLoc dl(N);
// If this can be more profitably realized as r+r, fail.
- if (SelectAddressRegReg(N, Disp, Base, DAG))
+ if (SelectAddressRegReg(N, Disp, Base, DAG, EncodingAlignment))
return false;
if (N.getOpcode() == ISD::ADD) {
int16_t imm = 0;
if (isIntS16Immediate(N.getOperand(1), imm) &&
- (!Alignment || (imm % Alignment) == 0)) {
+ (!EncodingAlignment || (imm % EncodingAlignment) == 0)) {
Disp = DAG.getTargetConstant(imm, dl, N.getValueType());
if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N.getOperand(0))) {
Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
@@ -2323,7 +2382,7 @@ bool PPCTargetLowering::SelectAddressRegImm(SDValue N, SDValue &Disp,
} else if (N.getOpcode() == ISD::OR) {
int16_t imm = 0;
if (isIntS16Immediate(N.getOperand(1), imm) &&
- (!Alignment || (imm % Alignment) == 0)) {
+ (!EncodingAlignment || (imm % EncodingAlignment) == 0)) {
// If this is an or of disjoint bitfields, we can codegen this as an add
// (for better address arithmetic) if the LHS and RHS of the OR are
// provably disjoint.
@@ -2349,7 +2408,8 @@ bool PPCTargetLowering::SelectAddressRegImm(SDValue N, SDValue &Disp,
// If this address fits entirely in a 16-bit sext immediate field, codegen
// this as "d, 0"
int16_t Imm;
- if (isIntS16Immediate(CN, Imm) && (!Alignment || (Imm % Alignment) == 0)) {
+ if (isIntS16Immediate(CN, Imm) &&
+ (!EncodingAlignment || (Imm % EncodingAlignment) == 0)) {
Disp = DAG.getTargetConstant(Imm, dl, CN->getValueType(0));
Base = DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
CN->getValueType(0));
@@ -2359,7 +2419,7 @@ bool PPCTargetLowering::SelectAddressRegImm(SDValue N, SDValue &Disp,
// Handle 32-bit sext immediates with LIS + addr mode.
if ((CN->getValueType(0) == MVT::i32 ||
(int64_t)CN->getZExtValue() == (int)CN->getZExtValue()) &&
- (!Alignment || (CN->getZExtValue() % Alignment) == 0)) {
+ (!EncodingAlignment || (CN->getZExtValue() % EncodingAlignment) == 0)) {
int Addr = (int)CN->getZExtValue();
// Otherwise, break this down into an LIS + disp.
@@ -2416,24 +2476,45 @@ bool PPCTargetLowering::SelectAddressRegRegOnly(SDValue N, SDValue &Base,
/// Returns true if we should use a direct load into vector instruction
/// (such as lxsd or lfd), instead of a load into gpr + direct move sequence.
-static bool usePartialVectorLoads(SDNode *N) {
- if (!N->hasOneUse())
- return false;
+static bool usePartialVectorLoads(SDNode *N, const PPCSubtarget& ST) {
// If there are any other uses other than scalar to vector, then we should
// keep it as a scalar load -> direct move pattern to prevent multiple
- // loads. Currently, only check for i64 since we have lxsd/lfd to do this
- // efficiently, but no update equivalent.
- if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
- EVT MemVT = LD->getMemoryVT();
- if (MemVT.isSimple() && MemVT.getSimpleVT().SimpleTy == MVT::i64) {
- SDNode *User = *(LD->use_begin());
- if (User->getOpcode() == ISD::SCALAR_TO_VECTOR)
- return true;
- }
+ // loads.
+ LoadSDNode *LD = dyn_cast<LoadSDNode>(N);
+ if (!LD)
+ return false;
+
+ EVT MemVT = LD->getMemoryVT();
+ if (!MemVT.isSimple())
+ return false;
+ switch(MemVT.getSimpleVT().SimpleTy) {
+ case MVT::i64:
+ break;
+ case MVT::i32:
+ if (!ST.hasP8Vector())
+ return false;
+ break;
+ case MVT::i16:
+ case MVT::i8:
+ if (!ST.hasP9Vector())
+ return false;
+ break;
+ default:
+ return false;
}
- return false;
+ SDValue LoadedVal(N, 0);
+ if (!LoadedVal.hasOneUse())
+ return false;
+
+ for (SDNode::use_iterator UI = LD->use_begin(), UE = LD->use_end();
+ UI != UE; ++UI)
+ if (UI.getUse().get().getResNo() == 0 &&
+ UI->getOpcode() != ISD::SCALAR_TO_VECTOR)
+ return false;
+
+ return true;
}
/// getPreIndexedAddressParts - returns true by value, base pointer and
@@ -2464,7 +2545,7 @@ bool PPCTargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base,
// Do not generate pre-inc forms for specific loads that feed scalar_to_vector
// instructions because we can fold these into a more efficient instruction
// instead, (such as LXSD).
- if (isLoad && usePartialVectorLoads(N)) {
+ if (isLoad && usePartialVectorLoads(N, Subtarget)) {
return false;
}
@@ -2745,7 +2826,8 @@ SDValue PPCTargetLowering::LowerGlobalTLSAddress(SDValue Op,
const Module *M = DAG.getMachineFunction().getFunction().getParent();
PICLevel::Level picLevel = M->getPICLevel();
- TLSModel::Model Model = getTargetMachine().getTLSModel(GV);
+ const TargetMachine &TM = getTargetMachine();
+ TLSModel::Model Model = TM.getTLSModel(GV);
if (Model == TLSModel::LocalExec) {
SDValue TGAHi = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
@@ -2769,8 +2851,14 @@ SDValue PPCTargetLowering::LowerGlobalTLSAddress(SDValue Op,
SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64);
GOTPtr = DAG.getNode(PPCISD::ADDIS_GOT_TPREL_HA, dl,
PtrVT, GOTReg, TGA);
- } else
- GOTPtr = DAG.getNode(PPCISD::PPC32_GOT, dl, PtrVT);
+ } else {
+ if (!TM.isPositionIndependent())
+ GOTPtr = DAG.getNode(PPCISD::PPC32_GOT, dl, PtrVT);
+ else if (picLevel == PICLevel::SmallPIC)
+ GOTPtr = DAG.getNode(PPCISD::GlobalBaseReg, dl, PtrVT);
+ else
+ GOTPtr = DAG.getNode(PPCISD::PPC32_PICGOT, dl, PtrVT);
+ }
SDValue TPOffset = DAG.getNode(PPCISD::LD_GOT_TPREL_L, dl,
PtrVT, TGA, GOTPtr);
return DAG.getNode(PPCISD::ADD_TLS, dl, PtrVT, TPOffset, TGATLS);
@@ -3147,101 +3235,6 @@ SDValue PPCTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const {
MachinePointerInfo(SV, nextOffset));
}
-#include "PPCGenCallingConv.inc"
-
-// Function whose sole purpose is to kill compiler warnings
-// stemming from unused functions included from PPCGenCallingConv.inc.
-CCAssignFn *PPCTargetLowering::useFastISelCCs(unsigned Flag) const {
- return Flag ? CC_PPC64_ELF_FIS : RetCC_PPC64_ELF_FIS;
-}
-
-bool llvm::CC_PPC32_SVR4_Custom_Dummy(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
- CCValAssign::LocInfo &LocInfo,
- ISD::ArgFlagsTy &ArgFlags,
- CCState &State) {
- return true;
-}
-
-bool llvm::CC_PPC32_SVR4_Custom_AlignArgRegs(unsigned &ValNo, MVT &ValVT,
- MVT &LocVT,
- CCValAssign::LocInfo &LocInfo,
- ISD::ArgFlagsTy &ArgFlags,
- CCState &State) {
- static const MCPhysReg ArgRegs[] = {
- PPC::R3, PPC::R4, PPC::R5, PPC::R6,
- PPC::R7, PPC::R8, PPC::R9, PPC::R10,
- };
- const unsigned NumArgRegs = array_lengthof(ArgRegs);
-
- unsigned RegNum = State.getFirstUnallocated(ArgRegs);
-
- // Skip one register if the first unallocated register has an even register
- // number and there are still argument registers available which have not been
- // allocated yet. RegNum is actually an index into ArgRegs, which means we
- // need to skip a register if RegNum is odd.
- if (RegNum != NumArgRegs && RegNum % 2 == 1) {
- State.AllocateReg(ArgRegs[RegNum]);
- }
-
- // Always return false here, as this function only makes sure that the first
- // unallocated register has an odd register number and does not actually
- // allocate a register for the current argument.
- return false;
-}
-
-bool
-llvm::CC_PPC32_SVR4_Custom_SkipLastArgRegsPPCF128(unsigned &ValNo, MVT &ValVT,
- MVT &LocVT,
- CCValAssign::LocInfo &LocInfo,
- ISD::ArgFlagsTy &ArgFlags,
- CCState &State) {
- static const MCPhysReg ArgRegs[] = {
- PPC::R3, PPC::R4, PPC::R5, PPC::R6,
- PPC::R7, PPC::R8, PPC::R9, PPC::R10,
- };
- const unsigned NumArgRegs = array_lengthof(ArgRegs);
-
- unsigned RegNum = State.getFirstUnallocated(ArgRegs);
- int RegsLeft = NumArgRegs - RegNum;
-
- // Skip if there is not enough registers left for long double type (4 gpr regs
- // in soft float mode) and put long double argument on the stack.
- if (RegNum != NumArgRegs && RegsLeft < 4) {
- for (int i = 0; i < RegsLeft; i++) {
- State.AllocateReg(ArgRegs[RegNum + i]);
- }
- }
-
- return false;
-}
-
-bool llvm::CC_PPC32_SVR4_Custom_AlignFPArgRegs(unsigned &ValNo, MVT &ValVT,
- MVT &LocVT,
- CCValAssign::LocInfo &LocInfo,
- ISD::ArgFlagsTy &ArgFlags,
- CCState &State) {
- static const MCPhysReg ArgRegs[] = {
- PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7,
- PPC::F8
- };
-
- const unsigned NumArgRegs = array_lengthof(ArgRegs);
-
- unsigned RegNum = State.getFirstUnallocated(ArgRegs);
-
- // If there is only one Floating-point register left we need to put both f64
- // values of a split ppc_fp128 value on the stack.
- if (RegNum != NumArgRegs && ArgRegs[RegNum] == PPC::F8) {
- State.AllocateReg(ArgRegs[RegNum]);
- }
-
- // Always return false here, as this function only makes sure that the two f64
- // values a ppc_fp128 value is split into are both passed in registers or both
- // passed on the stack and does not actually allocate a register for the
- // current argument.
- return false;
-}
-
/// FPR - The set of FP registers that should be allocated for arguments,
/// on Darwin.
static const MCPhysReg FPR[] = {PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5,
@@ -3449,7 +3442,7 @@ SDValue PPCTargetLowering::LowerFormalArguments_32SVR4(
// Reserve space for the linkage area on the stack.
unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
CCInfo.AllocateStack(LinkageSize, PtrByteSize);
- if (useSoftFloat() || hasSPE())
+ if (useSoftFloat())
CCInfo.PreAnalyzeFormalArguments(Ins);
CCInfo.AnalyzeFormalArguments(Ins, CC_PPC32_SVR4);
@@ -3482,7 +3475,8 @@ SDValue PPCTargetLowering::LowerFormalArguments_32SVR4(
if (Subtarget.hasVSX())
RC = &PPC::VSFRCRegClass;
else if (Subtarget.hasSPE())
- RC = &PPC::SPERCRegClass;
+ // SPE passes doubles in GPR pairs.
+ RC = &PPC::GPRCRegClass;
else
RC = &PPC::F8RCRegClass;
break;
@@ -3506,13 +3500,26 @@ SDValue PPCTargetLowering::LowerFormalArguments_32SVR4(
break;
}
- // Transform the arguments stored in physical registers into virtual ones.
- unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC);
- SDValue ArgValue = DAG.getCopyFromReg(Chain, dl, Reg,
- ValVT == MVT::i1 ? MVT::i32 : ValVT);
-
- if (ValVT == MVT::i1)
- ArgValue = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, ArgValue);
+ SDValue ArgValue;
+ // Transform the arguments stored in physical registers into
+ // virtual ones.
+ if (VA.getLocVT() == MVT::f64 && Subtarget.hasSPE()) {
+ assert(i + 1 < e && "No second half of double precision argument");
+ unsigned RegLo = MF.addLiveIn(VA.getLocReg(), RC);
+ unsigned RegHi = MF.addLiveIn(ArgLocs[++i].getLocReg(), RC);
+ SDValue ArgValueLo = DAG.getCopyFromReg(Chain, dl, RegLo, MVT::i32);
+ SDValue ArgValueHi = DAG.getCopyFromReg(Chain, dl, RegHi, MVT::i32);
+ if (!Subtarget.isLittleEndian())
+ std::swap (ArgValueLo, ArgValueHi);
+ ArgValue = DAG.getNode(PPCISD::BUILD_SPE64, dl, MVT::f64, ArgValueLo,
+ ArgValueHi);
+ } else {
+ unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC);
+ ArgValue = DAG.getCopyFromReg(Chain, dl, Reg,
+ ValVT == MVT::i1 ? MVT::i32 : ValVT);
+ if (ValVT == MVT::i1)
+ ArgValue = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, ArgValue);
+ }
InVals.push_back(ArgValue);
} else {
@@ -4448,24 +4455,27 @@ static bool isFunctionGlobalAddress(SDValue Callee);
static bool
callsShareTOCBase(const Function *Caller, SDValue Callee,
const TargetMachine &TM) {
- // If !G, Callee can be an external symbol.
- GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee);
- if (!G)
- return false;
-
+ // Callee is either a GlobalAddress or an ExternalSymbol. ExternalSymbols
+ // don't have enough information to determine if the caller and calle share
+ // the same TOC base, so we have to pessimistically assume they don't for
+ // correctness.
+ GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee);
+ if (!G)
+ return false;
+
+ const GlobalValue *GV = G->getGlobal();
// The medium and large code models are expected to provide a sufficiently
// large TOC to provide all data addressing needs of a module with a
// single TOC. Since each module will be addressed with a single TOC then we
// only need to check that caller and callee don't cross dso boundaries.
if (CodeModel::Medium == TM.getCodeModel() ||
CodeModel::Large == TM.getCodeModel())
- return TM.shouldAssumeDSOLocal(*Caller->getParent(), G->getGlobal());
+ return TM.shouldAssumeDSOLocal(*Caller->getParent(), GV);
// Otherwise we need to ensure callee and caller are in the same section,
// since the linker may allocate multiple TOCs, and we don't know which
// sections will belong to the same TOC base.
- const GlobalValue *GV = G->getGlobal();
if (!GV->isStrongDefinitionForLinker())
return false;
@@ -4917,6 +4927,7 @@ PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag, SDValue &Chain,
bool isPPC64 = Subtarget.isPPC64();
bool isSVR4ABI = Subtarget.isSVR4ABI();
bool isELFv2ABI = Subtarget.isELFv2ABI();
+ bool isAIXABI = Subtarget.isAIXABI();
EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout());
NodeTys.push_back(MVT::Other); // Returns a chain
@@ -4943,17 +4954,18 @@ PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag, SDValue &Chain,
bool Local = TM.shouldAssumeDSOLocal(*Mod, GV);
bool UsePlt = !Local && Subtarget.isTargetELF() && !isPPC64;
+ // If the callee is a GlobalAddress/ExternalSymbol node (quite common,
+ // every direct call is) turn it into a TargetGlobalAddress /
+ // TargetExternalSymbol node so that legalize doesn't hack it.
if (isFunctionGlobalAddress(Callee)) {
GlobalAddressSDNode *G = cast<GlobalAddressSDNode>(Callee);
+
// A call to a TLS address is actually an indirect call to a
// thread-specific pointer.
unsigned OpFlags = 0;
if (UsePlt)
OpFlags = PPCII::MO_PLT;
- // If the callee is a GlobalAddress/ExternalSymbol node (quite common,
- // every direct call is) turn it into a TargetGlobalAddress /
- // TargetExternalSymbol node so that legalize doesn't hack it.
Callee = DAG.getTargetGlobalAddress(G->getGlobal(), dl,
Callee.getValueType(), 0, OpFlags);
needIndirectCall = false;
@@ -5095,17 +5107,18 @@ PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag, SDValue &Chain,
Ops.push_back(DAG.getRegister(RegsToPass[i].first,
RegsToPass[i].second.getValueType()));
- // All calls, in both the ELF V1 and V2 ABIs, need the TOC register live
- // into the call.
- // We do need to reserve X2 to appease the verifier for the PATCHPOINT.
- if (isSVR4ABI && isPPC64) {
+ // All calls, in the AIX ABI and 64-bit ELF ABIs, need the TOC register
+ // live into the call.
+ // We do need to reserve R2/X2 to appease the verifier for the PATCHPOINT.
+ if ((isSVR4ABI && isPPC64) || isAIXABI) {
setUsesTOCBasePtr(DAG);
- // We cannot add X2 as an operand here for PATCHPOINT, because there is no
- // way to mark dependencies as implicit here. We will add the X2 dependency
- // in EmitInstrWithCustomInserter.
- if (!isPatchPoint)
- Ops.push_back(DAG.getRegister(PPC::X2, PtrVT));
+ // We cannot add R2/X2 as an operand here for PATCHPOINT, because there is
+ // no way to mark dependencies as implicit here.
+ // We will add the R2/X2 dependency in EmitInstrWithCustomInserter.
+ if (!isPatchPoint)
+ Ops.push_back(DAG.getRegister(isPPC64 ? PPC::X2
+ : PPC::R2, PtrVT));
}
return CallOpc;
@@ -5129,10 +5142,27 @@ SDValue PPCTargetLowering::LowerCallResult(
CCValAssign &VA = RVLocs[i];
assert(VA.isRegLoc() && "Can only return in registers!");
- SDValue Val = DAG.getCopyFromReg(Chain, dl,
- VA.getLocReg(), VA.getLocVT(), InFlag);
- Chain = Val.getValue(1);
- InFlag = Val.getValue(2);
+ SDValue Val;
+
+ if (Subtarget.hasSPE() && VA.getLocVT() == MVT::f64) {
+ SDValue Lo = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32,
+ InFlag);
+ Chain = Lo.getValue(1);
+ InFlag = Lo.getValue(2);
+ VA = RVLocs[++i]; // skip ahead to next loc
+ SDValue Hi = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32,
+ InFlag);
+ Chain = Hi.getValue(1);
+ InFlag = Hi.getValue(2);
+ if (!Subtarget.isLittleEndian())
+ std::swap (Lo, Hi);
+ Val = DAG.getNode(PPCISD::BUILD_SPE64, dl, MVT::f64, Lo, Hi);
+ } else {
+ Val = DAG.getCopyFromReg(Chain, dl,
+ VA.getLocReg(), VA.getLocVT(), InFlag);
+ Chain = Val.getValue(1);
+ InFlag = Val.getValue(2);
+ }
switch (VA.getLocInfo()) {
default: llvm_unreachable("Unknown loc info!");
@@ -5206,18 +5236,24 @@ SDValue PPCTargetLowering::FinishCall(
}
// Add a NOP immediately after the branch instruction when using the 64-bit
- // SVR4 ABI. At link time, if caller and callee are in a different module and
+ // SVR4 or the AIX ABI.
+ // At link time, if caller and callee are in a different module and
// thus have a different TOC, the call will be replaced with a call to a stub
// function which saves the current TOC, loads the TOC of the callee and
// branches to the callee. The NOP will be replaced with a load instruction
// which restores the TOC of the caller from the TOC save slot of the current
// stack frame. If caller and callee belong to the same module (and have the
- // same TOC), the NOP will remain unchanged.
+ // same TOC), the NOP will remain unchanged, or become some other NOP.
MachineFunction &MF = DAG.getMachineFunction();
- if (!isTailCall && Subtarget.isSVR4ABI()&& Subtarget.isPPC64() &&
- !isPatchPoint) {
+ EVT PtrVT = getPointerTy(DAG.getDataLayout());
+ if (!isTailCall && !isPatchPoint &&
+ ((Subtarget.isSVR4ABI() && Subtarget.isPPC64()) ||
+ Subtarget.isAIXABI())) {
if (CallOpc == PPCISD::BCTRL) {
+ if (Subtarget.isAIXABI())
+ report_fatal_error("Indirect call on AIX is not implemented.");
+
// This is a call through a function pointer.
// Restore the caller TOC from the save area into R2.
// See PrepareCall() for more information about calls through function
@@ -5229,7 +5265,6 @@ SDValue PPCTargetLowering::FinishCall(
// allocated and an unnecessary move instruction being generated.
CallOpc = PPCISD::BCTRL_LOAD_TOC;
- EVT PtrVT = getPointerTy(DAG.getDataLayout());
SDValue StackPtr = DAG.getRegister(PPC::X1, PtrVT);
unsigned TOCSaveOffset = Subtarget.getFrameLowering()->getTOCSaveOffset();
SDValue TOCOff = DAG.getIntPtrConstant(TOCSaveOffset, dl);
@@ -5245,6 +5280,19 @@ SDValue PPCTargetLowering::FinishCall(
}
}
+ if (Subtarget.isAIXABI() && isFunctionGlobalAddress(Callee)) {
+ // On AIX, direct function calls reference the symbol for the function's
+ // entry point, which is named by inserting a "." before the function's
+ // C-linkage name.
+ GlobalAddressSDNode *G = cast<GlobalAddressSDNode>(Callee);
+ auto &Context = DAG.getMachineFunction().getMMI().getContext();
+ MCSymbol *S = Context.getOrCreateSymbol(Twine(".") +
+ Twine(G->getGlobal()->getName()));
+ Callee = DAG.getMCSymbol(S, PtrVT);
+ // Replace the GlobalAddressSDNode Callee with the MCSymbolSDNode.
+ Ops[1] = Callee;
+ }
+
Chain = DAG.getNode(CallOpc, dl, NodeTys, Ops);
InFlag = Chain.getValue(1);
@@ -5314,16 +5362,20 @@ PPCTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
!isTailCall)
Callee = LowerGlobalAddress(Callee, DAG);
- if (Subtarget.isSVR4ABI()) {
- if (Subtarget.isPPC64())
- return LowerCall_64SVR4(Chain, Callee, CallConv, isVarArg,
- isTailCall, isPatchPoint, Outs, OutVals, Ins,
- dl, DAG, InVals, CS);
- else
- return LowerCall_32SVR4(Chain, Callee, CallConv, isVarArg,
- isTailCall, isPatchPoint, Outs, OutVals, Ins,
- dl, DAG, InVals, CS);
- }
+ if (Subtarget.isSVR4ABI() && Subtarget.isPPC64())
+ return LowerCall_64SVR4(Chain, Callee, CallConv, isVarArg,
+ isTailCall, isPatchPoint, Outs, OutVals, Ins,
+ dl, DAG, InVals, CS);
+
+ if (Subtarget.isSVR4ABI())
+ return LowerCall_32SVR4(Chain, Callee, CallConv, isVarArg,
+ isTailCall, isPatchPoint, Outs, OutVals, Ins,
+ dl, DAG, InVals, CS);
+
+ if (Subtarget.isAIXABI())
+ return LowerCall_AIX(Chain, Callee, CallConv, isVarArg,
+ isTailCall, isPatchPoint, Outs, OutVals, Ins,
+ dl, DAG, InVals, CS);
return LowerCall_Darwin(Chain, Callee, CallConv, isVarArg,
isTailCall, isPatchPoint, Outs, OutVals, Ins,
@@ -5444,12 +5496,15 @@ SDValue PPCTargetLowering::LowerCall_32SVR4(
bool seenFloatArg = false;
// Walk the register/memloc assignments, inserting copies/loads.
- for (unsigned i = 0, j = 0, e = ArgLocs.size();
+ // i - Tracks the index into the list of registers allocated for the call
+ // RealArgIdx - Tracks the index into the list of actual function arguments
+ // j - Tracks the index into the list of byval arguments
+ for (unsigned i = 0, RealArgIdx = 0, j = 0, e = ArgLocs.size();
i != e;
- ++i) {
+ ++i, ++RealArgIdx) {
CCValAssign &VA = ArgLocs[i];
- SDValue Arg = OutVals[i];
- ISD::ArgFlagsTy Flags = Outs[i].Flags;
+ SDValue Arg = OutVals[RealArgIdx];
+ ISD::ArgFlagsTy Flags = Outs[RealArgIdx].Flags;
if (Flags.isByVal()) {
// Argument is an aggregate which is passed by value, thus we need to
@@ -5498,7 +5553,17 @@ SDValue PPCTargetLowering::LowerCall_32SVR4(
if (VA.isRegLoc()) {
seenFloatArg |= VA.getLocVT().isFloatingPoint();
// Put argument in a physical register.
- RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
+ if (Subtarget.hasSPE() && Arg.getValueType() == MVT::f64) {
+ bool IsLE = Subtarget.isLittleEndian();
+ SDValue SVal = DAG.getNode(PPCISD::EXTRACT_SPE, dl, MVT::i32, Arg,
+ DAG.getIntPtrConstant(IsLE ? 0 : 1, dl));
+ RegsToPass.push_back(std::make_pair(VA.getLocReg(), SVal.getValue(0)));
+ SVal = DAG.getNode(PPCISD::EXTRACT_SPE, dl, MVT::i32, Arg,
+ DAG.getIntPtrConstant(IsLE ? 1 : 0, dl));
+ RegsToPass.push_back(std::make_pair(ArgLocs[++i].getLocReg(),
+ SVal.getValue(0)));
+ } else
+ RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
} else {
// Put argument in the parameter list area of the current stack frame.
assert(VA.isMemLoc());
@@ -6613,6 +6678,128 @@ SDValue PPCTargetLowering::LowerCall_Darwin(
NumBytes, Ins, InVals, CS);
}
+
+SDValue PPCTargetLowering::LowerCall_AIX(
+ SDValue Chain, SDValue Callee, CallingConv::ID CallConv, bool isVarArg,
+ bool isTailCall, bool isPatchPoint,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
+ const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
+ SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals,
+ ImmutableCallSite CS) const {
+
+ assert((CallConv == CallingConv::C || CallConv == CallingConv::Fast) &&
+ "Unimplemented calling convention!");
+ if (isVarArg || isPatchPoint)
+ report_fatal_error("This call type is unimplemented on AIX.");
+
+ EVT PtrVT = getPointerTy(DAG.getDataLayout());
+ bool isPPC64 = PtrVT == MVT::i64;
+ unsigned PtrByteSize = isPPC64 ? 8 : 4;
+ unsigned NumOps = Outs.size();
+
+
+ // Count how many bytes are to be pushed on the stack, including the linkage
+ // area, parameter list area.
+ // On XCOFF, we start with 24/48, which is reserved space for
+ // [SP][CR][LR][2 x reserved][TOC].
+ unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
+
+ // The prolog code of the callee may store up to 8 GPR argument registers to
+ // the stack, allowing va_start to index over them in memory if the callee
+ // is variadic.
+ // Because we cannot tell if this is needed on the caller side, we have to
+ // conservatively assume that it is needed. As such, make sure we have at
+ // least enough stack space for the caller to store the 8 GPRs.
+ unsigned NumBytes = LinkageSize + 8 * PtrByteSize;
+
+ // Adjust the stack pointer for the new arguments...
+ // These operations are automatically eliminated by the prolog/epilog
+ // inserter pass.
+ Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, dl);
+ SDValue CallSeqStart = Chain;
+
+ static const MCPhysReg GPR_32[] = { // 32-bit registers.
+ PPC::R3, PPC::R4, PPC::R5, PPC::R6,
+ PPC::R7, PPC::R8, PPC::R9, PPC::R10
+ };
+ static const MCPhysReg GPR_64[] = { // 64-bit registers.
+ PPC::X3, PPC::X4, PPC::X5, PPC::X6,
+ PPC::X7, PPC::X8, PPC::X9, PPC::X10
+ };
+
+ const unsigned NumGPRs = isPPC64 ? array_lengthof(GPR_64)
+ : array_lengthof(GPR_32);
+ const MCPhysReg *GPR = isPPC64 ? GPR_64 : GPR_32;
+ unsigned GPR_idx = 0;
+
+ SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
+
+ if (isTailCall)
+ report_fatal_error("Handling of tail call is unimplemented!");
+ int SPDiff = 0;
+
+ for (unsigned i = 0; i != NumOps; ++i) {
+ SDValue Arg = OutVals[i];
+ ISD::ArgFlagsTy Flags = Outs[i].Flags;
+
+ // Promote integers if needed.
+ if (Arg.getValueType() == MVT::i1 ||
+ (isPPC64 && Arg.getValueType() == MVT::i32)) {
+ unsigned ExtOp = Flags.isSExt() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
+ Arg = DAG.getNode(ExtOp, dl, PtrVT, Arg);
+ }
+
+ // Note: "by value" is code for passing a structure by value, not
+ // basic types.
+ if (Flags.isByVal())
+ report_fatal_error("Passing structure by value is unimplemented!");
+
+ switch (Arg.getSimpleValueType().SimpleTy) {
+ default: llvm_unreachable("Unexpected ValueType for argument!");
+ case MVT::i1:
+ case MVT::i32:
+ case MVT::i64:
+ if (GPR_idx != NumGPRs)
+ RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Arg));
+ else
+ report_fatal_error("Handling of placing parameters on the stack is "
+ "unimplemented!");
+ break;
+ case MVT::f32:
+ case MVT::f64:
+ case MVT::v4f32:
+ case MVT::v4i32:
+ case MVT::v8i16:
+ case MVT::v16i8:
+ case MVT::v2f64:
+ case MVT::v2i64:
+ case MVT::v1i128:
+ case MVT::f128:
+ case MVT::v4f64:
+ case MVT::v4i1:
+ report_fatal_error("Handling of this parameter type is unimplemented!");
+ }
+ }
+
+ if (!isFunctionGlobalAddress(Callee) &&
+ !isa<ExternalSymbolSDNode>(Callee))
+ report_fatal_error("Handling of indirect call is unimplemented!");
+
+ // Build a sequence of copy-to-reg nodes chained together with token chain
+ // and flag operands which copy the outgoing args into the appropriate regs.
+ SDValue InFlag;
+ for (auto Reg : RegsToPass) {
+ Chain = DAG.getCopyToReg(Chain, dl, Reg.first, Reg.second, InFlag);
+ InFlag = Chain.getValue(1);
+ }
+
+ return FinishCall(CallConv, dl, isTailCall, isVarArg, isPatchPoint,
+ /* unused except on PPC64 ELFv1 */ false, DAG,
+ RegsToPass, InFlag, Chain, CallSeqStart, Callee, SPDiff,
+ NumBytes, Ins, InVals, CS);
+}
+
bool
PPCTargetLowering::CanLowerReturn(CallingConv::ID CallConv,
MachineFunction &MF, bool isVarArg,
@@ -6644,11 +6831,11 @@ PPCTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
SmallVector<SDValue, 4> RetOps(1, Chain);
// Copy the result values into the output registers.
- for (unsigned i = 0; i != RVLocs.size(); ++i) {
+ for (unsigned i = 0, RealResIdx = 0; i != RVLocs.size(); ++i, ++RealResIdx) {
CCValAssign &VA = RVLocs[i];
assert(VA.isRegLoc() && "Can only return in registers!");
- SDValue Arg = OutVals[i];
+ SDValue Arg = OutVals[RealResIdx];
switch (VA.getLocInfo()) {
default: llvm_unreachable("Unknown loc info!");
@@ -6663,8 +6850,21 @@ PPCTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg);
break;
}
-
- Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), Arg, Flag);
+ if (Subtarget.hasSPE() && VA.getLocVT() == MVT::f64) {
+ bool isLittleEndian = Subtarget.isLittleEndian();
+ // Legalize ret f64 -> ret 2 x i32.
+ SDValue SVal =
+ DAG.getNode(PPCISD::EXTRACT_SPE, dl, MVT::i32, Arg,
+ DAG.getIntPtrConstant(isLittleEndian ? 0 : 1, dl));
+ Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), SVal, Flag);
+ RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
+ SVal = DAG.getNode(PPCISD::EXTRACT_SPE, dl, MVT::i32, Arg,
+ DAG.getIntPtrConstant(isLittleEndian ? 1 : 0, dl));
+ Flag = Chain.getValue(1);
+ VA = RVLocs[++i]; // skip ahead to next loc
+ Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), SVal, Flag);
+ } else
+ Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), Arg, Flag);
Flag = Chain.getValue(1);
RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
}
@@ -6890,6 +7090,61 @@ SDValue PPCTargetLowering::LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const {
Op.getOperand(0));
}
+SDValue PPCTargetLowering::LowerTRUNCATEVector(SDValue Op,
+ SelectionDAG &DAG) const {
+
+ // Implements a vector truncate that fits in a vector register as a shuffle.
+ // We want to legalize vector truncates down to where the source fits in
+ // a vector register (and target is therefore smaller than vector register
+ // size). At that point legalization will try to custom lower the sub-legal
+ // result and get here - where we can contain the truncate as a single target
+ // operation.
+
+ // For example a trunc <2 x i16> to <2 x i8> could be visualized as follows:
+ // <MSB1|LSB1, MSB2|LSB2> to <LSB1, LSB2>
+ //
+ // We will implement it for big-endian ordering as this (where x denotes
+ // undefined):
+ // < MSB1|LSB1, MSB2|LSB2, uu, uu, uu, uu, uu, uu> to
+ // < LSB1, LSB2, u, u, u, u, u, u, u, u, u, u, u, u, u, u>
+ //
+ // The same operation in little-endian ordering will be:
+ // <uu, uu, uu, uu, uu, uu, LSB2|MSB2, LSB1|MSB1> to
+ // <u, u, u, u, u, u, u, u, u, u, u, u, u, u, LSB2, LSB1>
+
+ assert(Op.getValueType().isVector() && "Vector type expected.");
+
+ SDLoc DL(Op);
+ SDValue N1 = Op.getOperand(0);
+ unsigned SrcSize = N1.getValueType().getSizeInBits();
+ assert(SrcSize <= 128 && "Source must fit in an Altivec/VSX vector");
+ SDValue WideSrc = SrcSize == 128 ? N1 : widenVec(DAG, N1, DL);
+
+ EVT TrgVT = Op.getValueType();
+ unsigned TrgNumElts = TrgVT.getVectorNumElements();
+ EVT EltVT = TrgVT.getVectorElementType();
+ unsigned WideNumElts = 128 / EltVT.getSizeInBits();
+ EVT WideVT = EVT::getVectorVT(*DAG.getContext(), EltVT, WideNumElts);
+
+ // First list the elements we want to keep.
+ unsigned SizeMult = SrcSize / TrgVT.getSizeInBits();
+ SmallVector<int, 16> ShuffV;
+ if (Subtarget.isLittleEndian())
+ for (unsigned i = 0; i < TrgNumElts; ++i)
+ ShuffV.push_back(i * SizeMult);
+ else
+ for (unsigned i = 1; i <= TrgNumElts; ++i)
+ ShuffV.push_back(i * SizeMult - 1);
+
+ // Populate the remaining elements with undefs.
+ for (unsigned i = TrgNumElts; i < WideNumElts; ++i)
+ // ShuffV.push_back(i + WideNumElts);
+ ShuffV.push_back(WideNumElts + 1);
+
+ SDValue Conv = DAG.getNode(ISD::BITCAST, DL, WideVT, WideSrc);
+ return DAG.getVectorShuffle(WideVT, DL, Conv, DAG.getUNDEF(WideVT), ShuffV);
+}
+
/// LowerSELECT_CC - Lower floating point select_cc's into fsel instruction when
/// possible.
SDValue PPCTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
@@ -9604,10 +9859,63 @@ SDValue PPCTargetLowering::LowerABS(SDValue Op, SelectionDAG &DAG) const {
BifID = Intrinsic::ppc_altivec_vmaxsh;
else if (VT == MVT::v16i8)
BifID = Intrinsic::ppc_altivec_vmaxsb;
-
+
return BuildIntrinsicOp(BifID, X, Y, DAG, dl, VT);
}
+// Custom lowering for fpext vf32 to v2f64
+SDValue PPCTargetLowering::LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const {
+
+ assert(Op.getOpcode() == ISD::FP_EXTEND &&
+ "Should only be called for ISD::FP_EXTEND");
+
+ // We only want to custom lower an extend from v2f32 to v2f64.
+ if (Op.getValueType() != MVT::v2f64 ||
+ Op.getOperand(0).getValueType() != MVT::v2f32)
+ return SDValue();
+
+ SDLoc dl(Op);
+ SDValue Op0 = Op.getOperand(0);
+
+ switch (Op0.getOpcode()) {
+ default:
+ return SDValue();
+ case ISD::FADD:
+ case ISD::FMUL:
+ case ISD::FSUB: {
+ SDValue NewLoad[2];
+ for (unsigned i = 0, ie = Op0.getNumOperands(); i != ie; ++i) {
+ // Ensure both input are loads.
+ SDValue LdOp = Op0.getOperand(i);
+ if (LdOp.getOpcode() != ISD::LOAD)
+ return SDValue();
+ // Generate new load node.
+ LoadSDNode *LD = cast<LoadSDNode>(LdOp);
+ SDValue LoadOps[] = { LD->getChain(), LD->getBasePtr() };
+ NewLoad[i] =
+ DAG.getMemIntrinsicNode(PPCISD::LD_VSX_LH, dl,
+ DAG.getVTList(MVT::v4f32, MVT::Other),
+ LoadOps, LD->getMemoryVT(),
+ LD->getMemOperand());
+ }
+ SDValue NewOp = DAG.getNode(Op0.getOpcode(), SDLoc(Op0), MVT::v4f32,
+ NewLoad[0], NewLoad[1],
+ Op0.getNode()->getFlags());
+ return DAG.getNode(PPCISD::FP_EXTEND_LH, dl, MVT::v2f64, NewOp);
+ }
+ case ISD::LOAD: {
+ LoadSDNode *LD = cast<LoadSDNode>(Op0);
+ SDValue LoadOps[] = { LD->getChain(), LD->getBasePtr() };
+ SDValue NewLd =
+ DAG.getMemIntrinsicNode(PPCISD::LD_VSX_LH, dl,
+ DAG.getVTList(MVT::v4f32, MVT::Other),
+ LoadOps, LD->getMemoryVT(), LD->getMemOperand());
+ return DAG.getNode(PPCISD::FP_EXTEND_LH, dl, MVT::v2f64, NewLd);
+ }
+ }
+ llvm_unreachable("ERROR:Should return for all cases within swtich.");
+}
+
/// LowerOperation - Provide custom lowering hooks for some operations.
///
SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
@@ -9661,6 +9969,7 @@ SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::INSERT_VECTOR_ELT: return LowerINSERT_VECTOR_ELT(Op, DAG);
case ISD::MUL: return LowerMUL(Op, DAG);
case ISD::ABS: return LowerABS(Op, DAG);
+ case ISD::FP_EXTEND: return LowerFP_EXTEND(Op, DAG);
// For counter-based loop handling.
case ISD::INTRINSIC_W_CHAIN: return SDValue();
@@ -9701,7 +10010,7 @@ void PPCTargetLowering::ReplaceNodeResults(SDNode *N,
}
case ISD::INTRINSIC_W_CHAIN: {
if (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue() !=
- Intrinsic::ppc_is_decremented_ctr_nonzero)
+ Intrinsic::loop_decrement)
break;
assert(N->getValueType(0) == MVT::i1 &&
@@ -9737,6 +10046,14 @@ void PPCTargetLowering::ReplaceNodeResults(SDNode *N,
return;
Results.push_back(LowerFP_TO_INT(SDValue(N, 0), DAG, dl));
return;
+ case ISD::TRUNCATE: {
+ EVT TrgVT = N->getValueType(0);
+ if (TrgVT.isVector() &&
+ isOperationCustom(N->getOpcode(), TrgVT) &&
+ N->getOperand(0).getValueType().getSizeInBits() <= 128)
+ Results.push_back(LowerTRUNCATEVector(SDValue(N, 0), DAG));
+ return;
+ }
case ISD::BITCAST:
// Don't handle bitcast here.
return;
@@ -9822,10 +10139,10 @@ PPCTargetLowering::EmitAtomicBinary(MachineInstr &MI, MachineBasicBlock *BB,
MachineFunction *F = BB->getParent();
MachineFunction::iterator It = ++BB->getIterator();
- unsigned dest = MI.getOperand(0).getReg();
- unsigned ptrA = MI.getOperand(1).getReg();
- unsigned ptrB = MI.getOperand(2).getReg();
- unsigned incr = MI.getOperand(3).getReg();
+ Register dest = MI.getOperand(0).getReg();
+ Register ptrA = MI.getOperand(1).getReg();
+ Register ptrB = MI.getOperand(2).getReg();
+ Register incr = MI.getOperand(3).getReg();
DebugLoc dl = MI.getDebugLoc();
MachineBasicBlock *loopMBB = F->CreateMachineBasicBlock(LLVM_BB);
@@ -9841,7 +10158,7 @@ PPCTargetLowering::EmitAtomicBinary(MachineInstr &MI, MachineBasicBlock *BB,
exitMBB->transferSuccessorsAndUpdatePHIs(BB);
MachineRegisterInfo &RegInfo = F->getRegInfo();
- unsigned TmpReg = (!BinOpcode) ? incr :
+ Register TmpReg = (!BinOpcode) ? incr :
RegInfo.createVirtualRegister( AtomicSize == 8 ? &PPC::G8RCRegClass
: &PPC::GPRCRegClass);
@@ -9949,20 +10266,20 @@ MachineBasicBlock *PPCTargetLowering::EmitPartwordAtomicBinary(
is64bit ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
- unsigned PtrReg = RegInfo.createVirtualRegister(RC);
- unsigned Shift1Reg = RegInfo.createVirtualRegister(GPRC);
- unsigned ShiftReg =
+ Register PtrReg = RegInfo.createVirtualRegister(RC);
+ Register Shift1Reg = RegInfo.createVirtualRegister(GPRC);
+ Register ShiftReg =
isLittleEndian ? Shift1Reg : RegInfo.createVirtualRegister(GPRC);
- unsigned Incr2Reg = RegInfo.createVirtualRegister(GPRC);
- unsigned MaskReg = RegInfo.createVirtualRegister(GPRC);
- unsigned Mask2Reg = RegInfo.createVirtualRegister(GPRC);
- unsigned Mask3Reg = RegInfo.createVirtualRegister(GPRC);
- unsigned Tmp2Reg = RegInfo.createVirtualRegister(GPRC);
- unsigned Tmp3Reg = RegInfo.createVirtualRegister(GPRC);
- unsigned Tmp4Reg = RegInfo.createVirtualRegister(GPRC);
- unsigned TmpDestReg = RegInfo.createVirtualRegister(GPRC);
- unsigned Ptr1Reg;
- unsigned TmpReg =
+ Register Incr2Reg = RegInfo.createVirtualRegister(GPRC);
+ Register MaskReg = RegInfo.createVirtualRegister(GPRC);
+ Register Mask2Reg = RegInfo.createVirtualRegister(GPRC);
+ Register Mask3Reg = RegInfo.createVirtualRegister(GPRC);
+ Register Tmp2Reg = RegInfo.createVirtualRegister(GPRC);
+ Register Tmp3Reg = RegInfo.createVirtualRegister(GPRC);
+ Register Tmp4Reg = RegInfo.createVirtualRegister(GPRC);
+ Register TmpDestReg = RegInfo.createVirtualRegister(GPRC);
+ Register Ptr1Reg;
+ Register TmpReg =
(!BinOpcode) ? Incr2Reg : RegInfo.createVirtualRegister(GPRC);
// thisMBB:
@@ -10764,23 +11081,23 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
is64bit ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
- unsigned PtrReg = RegInfo.createVirtualRegister(RC);
- unsigned Shift1Reg = RegInfo.createVirtualRegister(GPRC);
- unsigned ShiftReg =
+ Register PtrReg = RegInfo.createVirtualRegister(RC);
+ Register Shift1Reg = RegInfo.createVirtualRegister(GPRC);
+ Register ShiftReg =
isLittleEndian ? Shift1Reg : RegInfo.createVirtualRegister(GPRC);
- unsigned NewVal2Reg = RegInfo.createVirtualRegister(GPRC);
- unsigned NewVal3Reg = RegInfo.createVirtualRegister(GPRC);
- unsigned OldVal2Reg = RegInfo.createVirtualRegister(GPRC);
- unsigned OldVal3Reg = RegInfo.createVirtualRegister(GPRC);
- unsigned MaskReg = RegInfo.createVirtualRegister(GPRC);
- unsigned Mask2Reg = RegInfo.createVirtualRegister(GPRC);
- unsigned Mask3Reg = RegInfo.createVirtualRegister(GPRC);
- unsigned Tmp2Reg = RegInfo.createVirtualRegister(GPRC);
- unsigned Tmp4Reg = RegInfo.createVirtualRegister(GPRC);
- unsigned TmpDestReg = RegInfo.createVirtualRegister(GPRC);
- unsigned Ptr1Reg;
- unsigned TmpReg = RegInfo.createVirtualRegister(GPRC);
- unsigned ZeroReg = is64bit ? PPC::ZERO8 : PPC::ZERO;
+ Register NewVal2Reg = RegInfo.createVirtualRegister(GPRC);
+ Register NewVal3Reg = RegInfo.createVirtualRegister(GPRC);
+ Register OldVal2Reg = RegInfo.createVirtualRegister(GPRC);
+ Register OldVal3Reg = RegInfo.createVirtualRegister(GPRC);
+ Register MaskReg = RegInfo.createVirtualRegister(GPRC);
+ Register Mask2Reg = RegInfo.createVirtualRegister(GPRC);
+ Register Mask3Reg = RegInfo.createVirtualRegister(GPRC);
+ Register Tmp2Reg = RegInfo.createVirtualRegister(GPRC);
+ Register Tmp4Reg = RegInfo.createVirtualRegister(GPRC);
+ Register TmpDestReg = RegInfo.createVirtualRegister(GPRC);
+ Register Ptr1Reg;
+ Register TmpReg = RegInfo.createVirtualRegister(GPRC);
+ Register ZeroReg = is64bit ? PPC::ZERO8 : PPC::ZERO;
// thisMBB:
// ...
// fallthrough --> loopMBB
@@ -10968,7 +11285,147 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
MachineRegisterInfo &RegInfo = F->getRegInfo();
unsigned CRReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass);
BuildMI(*BB, MI, Dl, TII->get(PPC::TCHECK), CRReg);
- return BB;
+ BuildMI(*BB, MI, Dl, TII->get(TargetOpcode::COPY),
+ MI.getOperand(0).getReg())
+ .addReg(CRReg);
+ } else if (MI.getOpcode() == PPC::TBEGIN_RET) {
+ DebugLoc Dl = MI.getDebugLoc();
+ unsigned Imm = MI.getOperand(1).getImm();
+ BuildMI(*BB, MI, Dl, TII->get(PPC::TBEGIN)).addImm(Imm);
+ BuildMI(*BB, MI, Dl, TII->get(TargetOpcode::COPY),
+ MI.getOperand(0).getReg())
+ .addReg(PPC::CR0EQ);
+ } else if (MI.getOpcode() == PPC::SETRNDi) {
+ DebugLoc dl = MI.getDebugLoc();
+ unsigned OldFPSCRReg = MI.getOperand(0).getReg();
+
+ // Save FPSCR value.
+ BuildMI(*BB, MI, dl, TII->get(PPC::MFFS), OldFPSCRReg);
+
+ // The floating point rounding mode is in the bits 62:63 of FPCSR, and has
+ // the following settings:
+ // 00 Round to nearest
+ // 01 Round to 0
+ // 10 Round to +inf
+ // 11 Round to -inf
+
+ // When the operand is immediate, using the two least significant bits of
+ // the immediate to set the bits 62:63 of FPSCR.
+ unsigned Mode = MI.getOperand(1).getImm();
+ BuildMI(*BB, MI, dl, TII->get((Mode & 1) ? PPC::MTFSB1 : PPC::MTFSB0))
+ .addImm(31);
+
+ BuildMI(*BB, MI, dl, TII->get((Mode & 2) ? PPC::MTFSB1 : PPC::MTFSB0))
+ .addImm(30);
+ } else if (MI.getOpcode() == PPC::SETRND) {
+ DebugLoc dl = MI.getDebugLoc();
+
+ // Copy register from F8RCRegClass::SrcReg to G8RCRegClass::DestReg
+ // or copy register from G8RCRegClass::SrcReg to F8RCRegClass::DestReg.
+ // If the target doesn't have DirectMove, we should use stack to do the
+ // conversion, because the target doesn't have the instructions like mtvsrd
+ // or mfvsrd to do this conversion directly.
+ auto copyRegFromG8RCOrF8RC = [&] (unsigned DestReg, unsigned SrcReg) {
+ if (Subtarget.hasDirectMove()) {
+ BuildMI(*BB, MI, dl, TII->get(TargetOpcode::COPY), DestReg)
+ .addReg(SrcReg);
+ } else {
+ // Use stack to do the register copy.
+ unsigned StoreOp = PPC::STD, LoadOp = PPC::LFD;
+ MachineRegisterInfo &RegInfo = F->getRegInfo();
+ const TargetRegisterClass *RC = RegInfo.getRegClass(SrcReg);
+ if (RC == &PPC::F8RCRegClass) {
+ // Copy register from F8RCRegClass to G8RCRegclass.
+ assert((RegInfo.getRegClass(DestReg) == &PPC::G8RCRegClass) &&
+ "Unsupported RegClass.");
+
+ StoreOp = PPC::STFD;
+ LoadOp = PPC::LD;
+ } else {
+ // Copy register from G8RCRegClass to F8RCRegclass.
+ assert((RegInfo.getRegClass(SrcReg) == &PPC::G8RCRegClass) &&
+ (RegInfo.getRegClass(DestReg) == &PPC::F8RCRegClass) &&
+ "Unsupported RegClass.");
+ }
+
+ MachineFrameInfo &MFI = F->getFrameInfo();
+ int FrameIdx = MFI.CreateStackObject(8, 8, false);
+
+ MachineMemOperand *MMOStore = F->getMachineMemOperand(
+ MachinePointerInfo::getFixedStack(*F, FrameIdx, 0),
+ MachineMemOperand::MOStore, MFI.getObjectSize(FrameIdx),
+ MFI.getObjectAlignment(FrameIdx));
+
+ // Store the SrcReg into the stack.
+ BuildMI(*BB, MI, dl, TII->get(StoreOp))
+ .addReg(SrcReg)
+ .addImm(0)
+ .addFrameIndex(FrameIdx)
+ .addMemOperand(MMOStore);
+
+ MachineMemOperand *MMOLoad = F->getMachineMemOperand(
+ MachinePointerInfo::getFixedStack(*F, FrameIdx, 0),
+ MachineMemOperand::MOLoad, MFI.getObjectSize(FrameIdx),
+ MFI.getObjectAlignment(FrameIdx));
+
+ // Load from the stack where SrcReg is stored, and save to DestReg,
+ // so we have done the RegClass conversion from RegClass::SrcReg to
+ // RegClass::DestReg.
+ BuildMI(*BB, MI, dl, TII->get(LoadOp), DestReg)
+ .addImm(0)
+ .addFrameIndex(FrameIdx)
+ .addMemOperand(MMOLoad);
+ }
+ };
+
+ unsigned OldFPSCRReg = MI.getOperand(0).getReg();
+
+ // Save FPSCR value.
+ BuildMI(*BB, MI, dl, TII->get(PPC::MFFS), OldFPSCRReg);
+
+ // When the operand is gprc register, use two least significant bits of the
+ // register and mtfsf instruction to set the bits 62:63 of FPSCR.
+ //
+ // copy OldFPSCRTmpReg, OldFPSCRReg
+ // (INSERT_SUBREG ExtSrcReg, (IMPLICIT_DEF ImDefReg), SrcOp, 1)
+ // rldimi NewFPSCRTmpReg, ExtSrcReg, OldFPSCRReg, 0, 62
+ // copy NewFPSCRReg, NewFPSCRTmpReg
+ // mtfsf 255, NewFPSCRReg
+ MachineOperand SrcOp = MI.getOperand(1);
+ MachineRegisterInfo &RegInfo = F->getRegInfo();
+ unsigned OldFPSCRTmpReg = RegInfo.createVirtualRegister(&PPC::G8RCRegClass);
+
+ copyRegFromG8RCOrF8RC(OldFPSCRTmpReg, OldFPSCRReg);
+
+ unsigned ImDefReg = RegInfo.createVirtualRegister(&PPC::G8RCRegClass);
+ unsigned ExtSrcReg = RegInfo.createVirtualRegister(&PPC::G8RCRegClass);
+
+ // The first operand of INSERT_SUBREG should be a register which has
+ // subregisters, we only care about its RegClass, so we should use an
+ // IMPLICIT_DEF register.
+ BuildMI(*BB, MI, dl, TII->get(TargetOpcode::IMPLICIT_DEF), ImDefReg);
+ BuildMI(*BB, MI, dl, TII->get(PPC::INSERT_SUBREG), ExtSrcReg)
+ .addReg(ImDefReg)
+ .add(SrcOp)
+ .addImm(1);
+
+ unsigned NewFPSCRTmpReg = RegInfo.createVirtualRegister(&PPC::G8RCRegClass);
+ BuildMI(*BB, MI, dl, TII->get(PPC::RLDIMI), NewFPSCRTmpReg)
+ .addReg(OldFPSCRTmpReg)
+ .addReg(ExtSrcReg)
+ .addImm(0)
+ .addImm(62);
+
+ unsigned NewFPSCRReg = RegInfo.createVirtualRegister(&PPC::F8RCRegClass);
+ copyRegFromG8RCOrF8RC(NewFPSCRReg, NewFPSCRTmpReg);
+
+ // The mask 255 means that put the 32:63 bits of NewFPSCRReg to the 32:63
+ // bits of FPSCR.
+ BuildMI(*BB, MI, dl, TII->get(PPC::MTFSF))
+ .addImm(255)
+ .addReg(NewFPSCRReg)
+ .addImm(0)
+ .addImm(0);
} else {
llvm_unreachable("Unexpected instr type to insert");
}
@@ -11006,7 +11463,9 @@ SDValue PPCTargetLowering::getSqrtEstimate(SDValue Operand, SelectionDAG &DAG,
if (RefinementSteps == ReciprocalEstimate::Unspecified)
RefinementSteps = getEstimateRefinementSteps(VT, Subtarget);
- UseOneConstNR = true;
+ // The Newton-Raphson computation with a single constant does not provide
+ // enough accuracy on some CPUs.
+ UseOneConstNR = !Subtarget.needsTwoConstNR();
return DAG.getNode(PPCISD::FRSQRTE, SDLoc(Operand), VT, Operand);
}
return SDValue();
@@ -12062,9 +12521,14 @@ static SDValue combineBVOfConsecutiveLoads(SDNode *N, SelectionDAG &DAG) {
"Should be called with a BUILD_VECTOR node");
SDLoc dl(N);
+
+ // Return early for non byte-sized type, as they can't be consecutive.
+ if (!N->getValueType(0).getVectorElementType().isByteSized())
+ return SDValue();
+
bool InputsAreConsecutiveLoads = true;
bool InputsAreReverseConsecutive = true;
- unsigned ElemSize = N->getValueType(0).getScalarSizeInBits() / 8;
+ unsigned ElemSize = N->getValueType(0).getScalarType().getStoreSize();
SDValue FirstInput = N->getOperand(0);
bool IsRoundOfExtLoad = false;
@@ -12332,9 +12796,8 @@ SDValue PPCTargetLowering::DAGCombineBuildVector(SDNode *N,
ConstantSDNode *Ext2Op = dyn_cast<ConstantSDNode>(Ext2.getOperand(1));
if (!Ext1Op || !Ext2Op)
return SDValue();
- if (Ext1.getValueType() != MVT::i32 ||
- Ext2.getValueType() != MVT::i32)
- if (Ext1.getOperand(0) != Ext2.getOperand(0))
+ if (Ext1.getOperand(0).getValueType() != MVT::v4i32 ||
+ Ext1.getOperand(0) != Ext2.getOperand(0))
return SDValue();
int FirstElem = Ext1Op->getZExtValue();
@@ -12664,6 +13127,8 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
return combineSRA(N, DCI);
case ISD::SRL:
return combineSRL(N, DCI);
+ case ISD::MUL:
+ return combineMUL(N, DCI);
case PPCISD::SHL:
if (isNullConstant(N->getOperand(0))) // 0 << V -> 0.
return N->getOperand(0);
@@ -13246,7 +13711,7 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
if (Cond.getOpcode() == ISD::INTRINSIC_W_CHAIN &&
cast<ConstantSDNode>(Cond.getOperand(1))->getZExtValue() ==
- Intrinsic::ppc_is_decremented_ctr_nonzero) {
+ Intrinsic::loop_decrement) {
// We now need to make the intrinsic dead (it cannot be instruction
// selected).
@@ -13272,14 +13737,14 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
if (LHS.getOpcode() == ISD::AND &&
LHS.getOperand(0).getOpcode() == ISD::INTRINSIC_W_CHAIN &&
cast<ConstantSDNode>(LHS.getOperand(0).getOperand(1))->getZExtValue() ==
- Intrinsic::ppc_is_decremented_ctr_nonzero &&
+ Intrinsic::loop_decrement &&
isa<ConstantSDNode>(LHS.getOperand(1)) &&
!isNullConstant(LHS.getOperand(1)))
LHS = LHS.getOperand(0);
if (LHS.getOpcode() == ISD::INTRINSIC_W_CHAIN &&
cast<ConstantSDNode>(LHS.getOperand(1))->getZExtValue() ==
- Intrinsic::ppc_is_decremented_ctr_nonzero &&
+ Intrinsic::loop_decrement &&
isa<ConstantSDNode>(RHS)) {
assert((CC == ISD::SETEQ || CC == ISD::SETNE) &&
"Counter decrement comparison is not EQ or NE");
@@ -13355,9 +13820,9 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
}
case ISD::BUILD_VECTOR:
return DAGCombineBuildVector(N, DCI);
- case ISD::ABS:
+ case ISD::ABS:
return combineABS(N, DCI);
- case ISD::VSELECT:
+ case ISD::VSELECT:
return combineVSelect(N, DCI);
}
@@ -13453,6 +13918,15 @@ unsigned PPCTargetLowering::getPrefLoopAlignment(MachineLoop *ML) const {
if (!ML)
break;
+ if (!DisableInnermostLoopAlign32) {
+ // If the nested loop is an innermost loop, prefer to a 32-byte alignment,
+ // so that we can decrease cache misses and branch-prediction misses.
+ // Actual alignment of the loop will depend on the hotness check and other
+ // logic in alignBlocks.
+ if (ML->getLoopDepth() > 1 && ML->getSubLoops().empty())
+ return 5;
+ }
+
const PPCInstrInfo *TII = Subtarget.getInstrInfo();
// For small loops (between 5 and 8 instructions), align to a 32-byte
@@ -13502,7 +13976,7 @@ PPCTargetLowering::getConstraintType(StringRef Constraint) const {
return C_RegisterClass;
} else if (Constraint == "wa" || Constraint == "wd" ||
Constraint == "wf" || Constraint == "ws" ||
- Constraint == "wi") {
+ Constraint == "wi" || Constraint == "ww") {
return C_RegisterClass; // VSX registers.
}
return TargetLowering::getConstraintType(Constraint);
@@ -13530,10 +14004,12 @@ PPCTargetLowering::getSingleConstraintMatchWeight(
StringRef(constraint) == "wf") &&
type->isVectorTy())
return CW_Register;
- else if (StringRef(constraint) == "ws" && type->isDoubleTy())
- return CW_Register;
else if (StringRef(constraint) == "wi" && type->isIntegerTy(64))
return CW_Register; // just hold 64-bit integers data.
+ else if (StringRef(constraint) == "ws" && type->isDoubleTy())
+ return CW_Register;
+ else if (StringRef(constraint) == "ww" && type->isFloatTy())
+ return CW_Register;
switch (*constraint) {
default:
@@ -13619,7 +14095,7 @@ PPCTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
Constraint == "wf" || Constraint == "wi") &&
Subtarget.hasVSX()) {
return std::make_pair(0U, &PPC::VSRCRegClass);
- } else if (Constraint == "ws" && Subtarget.hasVSX()) {
+ } else if ((Constraint == "ws" || Constraint == "ww") && Subtarget.hasVSX()) {
if (VT == MVT::f32 && Subtarget.hasP8Vector())
return std::make_pair(0U, &PPC::VSSRCRegClass);
else
@@ -13865,7 +14341,7 @@ bool PPCTargetLowering::isAccessedAsGotIndirect(SDValue GA) const {
if (CModel == CodeModel::Small || CModel == CodeModel::Large)
return true;
- // JumpTable and BlockAddress are accessed as got-indirect.
+ // JumpTable and BlockAddress are accessed as got-indirect.
if (isa<JumpTableSDNode>(GA) || isa<BlockAddressSDNode>(GA))
return true;
@@ -14082,18 +14558,16 @@ bool PPCTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
/// source is constant so it does not need to be loaded.
/// It returns EVT::Other if the type should be determined using generic
/// target-independent logic.
-EVT PPCTargetLowering::getOptimalMemOpType(uint64_t Size,
- unsigned DstAlign, unsigned SrcAlign,
- bool IsMemset, bool ZeroMemset,
- bool MemcpyStrSrc,
- MachineFunction &MF) const {
+EVT PPCTargetLowering::getOptimalMemOpType(
+ uint64_t Size, unsigned DstAlign, unsigned SrcAlign, bool IsMemset,
+ bool ZeroMemset, bool MemcpyStrSrc,
+ const AttributeList &FuncAttributes) const {
if (getTargetMachine().getOptLevel() != CodeGenOpt::None) {
- const Function &F = MF.getFunction();
// When expanding a memset, require at least two QPX instructions to cover
// the cost of loading the value to be stored from the constant pool.
if (Subtarget.hasQPX() && Size >= 32 && (!IsMemset || Size >= 64) &&
(!SrcAlign || SrcAlign >= 32) && (!DstAlign || DstAlign >= 32) &&
- !F.hasFnAttribute(Attribute::NoImplicitFloat)) {
+ !FuncAttributes.hasFnAttribute(Attribute::NoImplicitFloat)) {
return MVT::v4f64;
}
@@ -14178,6 +14652,7 @@ bool PPCTargetLowering::isLegalAddImmediate(int64_t Imm) const {
bool PPCTargetLowering::allowsMisalignedMemoryAccesses(EVT VT,
unsigned,
unsigned,
+ MachineMemOperand::Flags,
bool *Fast) const {
if (DisablePPCUnaligned)
return false;
@@ -14324,7 +14799,7 @@ void PPCTargetLowering::insertCopiesSplitCSR(
BuildMI(*Entry, MBBI, DebugLoc(), TII->get(TargetOpcode::COPY), NewVR)
.addReg(*I);
- // Insert the copy-back instructions right before the terminator
+ // Insert the copy-back instructions right before the terminator.
for (auto *Exit : Exits)
BuildMI(*Exit, Exit->getFirstTerminator(), DebugLoc(),
TII->get(TargetOpcode::COPY), *I)
@@ -14345,7 +14820,8 @@ void PPCTargetLowering::insertSSPDeclarations(Module &M) const {
return TargetLowering::insertSSPDeclarations(M);
}
-bool PPCTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const {
+bool PPCTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT,
+ bool ForCodeSize) const {
if (!VT.isSimple() || !Subtarget.hasVSX())
return false;
@@ -14585,6 +15061,89 @@ SDValue PPCTargetLowering::combineTRUNCATE(SDNode *N,
return SDValue();
}
+SDValue PPCTargetLowering::combineMUL(SDNode *N, DAGCombinerInfo &DCI) const {
+ SelectionDAG &DAG = DCI.DAG;
+
+ ConstantSDNode *ConstOpOrElement = isConstOrConstSplat(N->getOperand(1));
+ if (!ConstOpOrElement)
+ return SDValue();
+
+ // An imul is usually smaller than the alternative sequence for legal type.
+ if (DAG.getMachineFunction().getFunction().hasMinSize() &&
+ isOperationLegal(ISD::MUL, N->getValueType(0)))
+ return SDValue();
+
+ auto IsProfitable = [this](bool IsNeg, bool IsAddOne, EVT VT) -> bool {
+ switch (this->Subtarget.getDarwinDirective()) {
+ default:
+ // TODO: enhance the condition for subtarget before pwr8
+ return false;
+ case PPC::DIR_PWR8:
+ // type mul add shl
+ // scalar 4 1 1
+ // vector 7 2 2
+ return true;
+ case PPC::DIR_PWR9:
+ // type mul add shl
+ // scalar 5 2 2
+ // vector 7 2 2
+
+ // The cycle RATIO of related operations are showed as a table above.
+ // Because mul is 5(scalar)/7(vector), add/sub/shl are all 2 for both
+ // scalar and vector type. For 2 instrs patterns, add/sub + shl
+ // are 4, it is always profitable; but for 3 instrs patterns
+ // (mul x, -(2^N + 1)) => -(add (shl x, N), x), sub + add + shl are 6.
+ // So we should only do it for vector type.
+ return IsAddOne && IsNeg ? VT.isVector() : true;
+ }
+ };
+
+ EVT VT = N->getValueType(0);
+ SDLoc DL(N);
+
+ const APInt &MulAmt = ConstOpOrElement->getAPIntValue();
+ bool IsNeg = MulAmt.isNegative();
+ APInt MulAmtAbs = MulAmt.abs();
+
+ if ((MulAmtAbs - 1).isPowerOf2()) {
+ // (mul x, 2^N + 1) => (add (shl x, N), x)
+ // (mul x, -(2^N + 1)) => -(add (shl x, N), x)
+
+ if (!IsProfitable(IsNeg, true, VT))
+ return SDValue();
+
+ SDValue Op0 = N->getOperand(0);
+ SDValue Op1 =
+ DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0),
+ DAG.getConstant((MulAmtAbs - 1).logBase2(), DL, VT));
+ SDValue Res = DAG.getNode(ISD::ADD, DL, VT, Op0, Op1);
+
+ if (!IsNeg)
+ return Res;
+
+ return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), Res);
+ } else if ((MulAmtAbs + 1).isPowerOf2()) {
+ // (mul x, 2^N - 1) => (sub (shl x, N), x)
+ // (mul x, -(2^N - 1)) => (sub x, (shl x, N))
+
+ if (!IsProfitable(IsNeg, false, VT))
+ return SDValue();
+
+ SDValue Op0 = N->getOperand(0);
+ SDValue Op1 =
+ DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0),
+ DAG.getConstant((MulAmtAbs + 1).logBase2(), DL, VT));
+
+ if (!IsNeg)
+ return DAG.getNode(ISD::SUB, DL, VT, Op1, Op0);
+ else
+ return DAG.getNode(ISD::SUB, DL, VT, Op0, Op1);
+
+ } else {
+ return SDValue();
+ }
+}
+
bool PPCTargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const {
// Only duplicate to increase tail-calls for the 64bit SysV ABIs.
if (!Subtarget.isSVR4ABI() || !Subtarget.isPPC64())
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.h b/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.h
index 30acd60eba6f..97422c6eda36 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.h
+++ b/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.h
@@ -1,9 +1,8 @@
//===-- PPCISelLowering.h - PPC32 DAG Lowering Interface --------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -15,7 +14,6 @@
#ifndef LLVM_LIB_TARGET_POWERPC_PPCISELLOWERING_H
#define LLVM_LIB_TARGET_POWERPC_PPCISELLOWERING_H
-#include "PPC.h"
#include "PPCInstrInfo.h"
#include "llvm/CodeGen/CallingConvLower.h"
#include "llvm/CodeGen/MachineFunction.h"
@@ -41,7 +39,7 @@ namespace llvm {
// the enum. The order of elements in this enum matters!
// Values that are added after this entry:
// STBRX = ISD::FIRST_TARGET_MEMORY_OPCODE
- // are considerd memory opcodes and are treated differently than entries
+ // are considered memory opcodes and are treated differently than entries
// that come before it. For example, ADD or MUL should be placed before
// the ISD::FIRST_TARGET_MEMORY_OPCODE while a LOAD or STORE should come
// after it.
@@ -161,7 +159,7 @@ namespace llvm {
/// CALL - A direct function call.
/// CALL_NOP is a call with the special NOP which follows 64-bit
- /// SVR4 calls.
+ /// SVR4 calls and 32-bit/64-bit AIX calls.
CALL, CALL_NOP,
/// CHAIN,FLAG = MTCTR(VAL, CHAIN[, INFLAG]) - Directly corresponds to a
@@ -193,9 +191,18 @@ namespace llvm {
/// Direct move from a GPR to a VSX register (zero)
MTVSRZ,
- /// Direct move of 2 consective GPR to a VSX register.
+ /// Direct move of 2 consecutive GPR to a VSX register.
BUILD_FP128,
+ /// BUILD_SPE64 and EXTRACT_SPE are analogous to BUILD_PAIR and
+ /// EXTRACT_ELEMENT but take f64 arguments instead of i64, as i64 is
+ /// unsupported for this target.
+ /// Merge 2 GPRs to a single SPE register.
+ BUILD_SPE64,
+
+ /// Extract SPE register component, second argument is high or low.
+ EXTRACT_SPE,
+
/// Extract a subvector from signed integer vector and convert to FP.
/// It is primarily used to convert a (widened) illegal integer vector
/// type to a legal floating point vector type.
@@ -265,11 +272,11 @@ namespace llvm {
CR6UNSET,
/// GPRC = address of _GLOBAL_OFFSET_TABLE_. Used by initial-exec TLS
- /// on PPC32.
+ /// for non-position independent code on PPC32.
PPC32_GOT,
/// GPRC = address of _GLOBAL_OFFSET_TABLE_. Used by general dynamic and
- /// local dynamic TLS on PPC32.
+ /// local dynamic TLS and position indendepent code on PPC32.
PPC32_PICGOT,
/// G8RC = ADDIS_GOT_TPREL_HA %x2, Symbol - Used by the initial-exec
@@ -405,6 +412,9 @@ namespace llvm {
/// representation.
QBFLT,
+ /// Custom extend v4f32 to v2f64.
+ FP_EXTEND_LH,
+
/// CHAIN = STBRX CHAIN, GPRC, Ptr, Type - This is a
/// byte-swapping store instruction. It byte-swaps the low "Type" bits of
/// the GPRC input, then stores it through Ptr. Type can be either i16 or
@@ -446,6 +456,10 @@ namespace llvm {
/// an xxswapd.
LXVD2X,
+ /// VSRC, CHAIN = LD_VSX_LH CHAIN, Ptr - This is a floating-point load of a
+ /// v2f32 value into the lower half of a VSR register.
+ LD_VSX_LH,
+
/// CHAIN = STXVD2X CHAIN, VSRC, Ptr - Occurs only for little endian.
/// Maps directly to an stxvd2x instruction that will be preceded by
/// an xxswapd.
@@ -620,6 +634,8 @@ namespace llvm {
return true;
}
+ bool preferIncOfAddToSubOfNot(EVT VT) const override;
+
bool convertSetCCLogicToBitwiseLogic(EVT VT) const override {
return VT.isScalarInteger();
}
@@ -653,18 +669,27 @@ namespace llvm {
ISD::MemIndexedMode &AM,
SelectionDAG &DAG) const override;
+ /// SelectAddressEVXRegReg - Given the specified addressed, check to see if
+ /// it can be more efficiently represented as [r+imm].
+ bool SelectAddressEVXRegReg(SDValue N, SDValue &Base, SDValue &Index,
+ SelectionDAG &DAG) const;
+
/// SelectAddressRegReg - Given the specified addressed, check to see if it
- /// can be represented as an indexed [r+r] operation. Returns false if it
- /// can be more efficiently represented with [r+imm].
+ /// can be more efficiently represented as [r+imm]. If \p EncodingAlignment
+ /// is non-zero, only accept displacement which is not suitable for [r+imm].
+ /// Returns false if it can be represented by [r+imm], which are preferred.
bool SelectAddressRegReg(SDValue N, SDValue &Base, SDValue &Index,
- SelectionDAG &DAG) const;
+ SelectionDAG &DAG,
+ unsigned EncodingAlignment = 0) const;
/// SelectAddressRegImm - Returns true if the address N can be represented
/// by a base register plus a signed 16-bit displacement [r+imm], and if it
- /// is not better represented as reg+reg. If Aligned is true, only accept
- /// displacements suitable for STD and friends, i.e. multiples of 4.
+ /// is not better represented as reg+reg. If \p EncodingAlignment is
+ /// non-zero, only accept displacements suitable for instruction encoding
+ /// requirement, i.e. multiples of 4 for DS form.
bool SelectAddressRegImm(SDValue N, SDValue &Disp, SDValue &Base,
- SelectionDAG &DAG, unsigned Alignment) const;
+ SelectionDAG &DAG,
+ unsigned EncodingAlignment) const;
/// SelectAddressRegRegOnly - Given the specified addressed, force it to be
/// represented as an indexed [r+r] operation.
@@ -833,14 +858,14 @@ namespace llvm {
EVT
getOptimalMemOpType(uint64_t Size, unsigned DstAlign, unsigned SrcAlign,
bool IsMemset, bool ZeroMemset, bool MemcpyStrSrc,
- MachineFunction &MF) const override;
+ const AttributeList &FuncAttributes) const override;
/// Is unaligned memory access allowed for the given type, and is it fast
/// relative to software emulation.
- bool allowsMisalignedMemoryAccesses(EVT VT,
- unsigned AddrSpace,
- unsigned Align = 1,
- bool *Fast = nullptr) const override;
+ bool allowsMisalignedMemoryAccesses(
+ EVT VT, unsigned AddrSpace, unsigned Align = 1,
+ MachineMemOperand::Flags Flags = MachineMemOperand::MONone,
+ bool *Fast = nullptr) const override;
/// isFMAFasterThanFMulAndFAdd - Return true if an FMA operation is faster
/// than a pair of fmul and fadd instructions. fmuladd intrinsics will be
@@ -888,7 +913,8 @@ namespace llvm {
bool useLoadStackGuardNode() const override;
void insertSSPDeclarations(Module &M) const override;
- bool isFPImmLegal(const APFloat &Imm, EVT VT) const override;
+ bool isFPImmLegal(const APFloat &Imm, EVT VT,
+ bool ForCodeSize) const override;
unsigned getJumpTableEncoding() const override;
bool isJumpTableRelative() const override;
@@ -898,14 +924,6 @@ namespace llvm {
unsigned JTI,
MCContext &Ctx) const override;
- unsigned getNumRegistersForCallingConv(LLVMContext &Context,
- CallingConv:: ID CC,
- EVT VT) const override;
-
- MVT getRegisterTypeForCallingConv(LLVMContext &Context,
- CallingConv:: ID CC,
- EVT VT) const override;
-
private:
struct ReuseLoadInfo {
SDValue Ptr;
@@ -953,6 +971,8 @@ namespace llvm {
SDValue LowerINT_TO_FPVector(SDValue Op, SelectionDAG &DAG,
const SDLoc &dl) const;
+ SDValue LowerTRUNCATEVector(SDValue Op, SelectionDAG &DAG) const;
+
SDValue getFramePointerFrameIndex(SelectionDAG & DAG) const;
SDValue getReturnAddrFrameIndex(SelectionDAG & DAG) const;
@@ -1019,6 +1039,7 @@ namespace llvm {
SDValue LowerSIGN_EXTEND_INREG(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerMUL(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerABS(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerVectorLoad(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerVectorStore(SDValue Op, SelectionDAG &DAG) const;
@@ -1106,6 +1127,15 @@ namespace llvm {
const SDLoc &dl, SelectionDAG &DAG,
SmallVectorImpl<SDValue> &InVals,
ImmutableCallSite CS) const;
+ SDValue LowerCall_AIX(SDValue Chain, SDValue Callee,
+ CallingConv::ID CallConv, bool isVarArg,
+ bool isTailCall, bool isPatchPoint,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ const SDLoc &dl, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals,
+ ImmutableCallSite CS) const;
SDValue lowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerEH_SJLJ_LONGJMP(SDValue Op, SelectionDAG &DAG) const;
@@ -1119,6 +1149,7 @@ namespace llvm {
SDValue combineSHL(SDNode *N, DAGCombinerInfo &DCI) const;
SDValue combineSRA(SDNode *N, DAGCombinerInfo &DCI) const;
SDValue combineSRL(SDNode *N, DAGCombinerInfo &DCI) const;
+ SDValue combineMUL(SDNode *N, DAGCombinerInfo &DCI) const;
SDValue combineADD(SDNode *N, DAGCombinerInfo &DCI) const;
SDValue combineTRUNCATE(SDNode *N, DAGCombinerInfo &DCI) const;
SDValue combineSetCC(SDNode *N, DAGCombinerInfo &DCI) const;
@@ -1137,8 +1168,6 @@ namespace llvm {
int &RefinementSteps) const override;
unsigned combineRepeatedFPDivisors() const override;
- CCAssignFn *useFastISelCCs(unsigned Flag) const;
-
SDValue
combineElementTruncationToVectorTruncation(SDNode *N,
DAGCombinerInfo &DCI) const;
@@ -1169,30 +1198,6 @@ namespace llvm {
} // end namespace PPC
- bool CC_PPC32_SVR4_Custom_Dummy(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
- CCValAssign::LocInfo &LocInfo,
- ISD::ArgFlagsTy &ArgFlags,
- CCState &State);
-
- bool CC_PPC32_SVR4_Custom_AlignArgRegs(unsigned &ValNo, MVT &ValVT,
- MVT &LocVT,
- CCValAssign::LocInfo &LocInfo,
- ISD::ArgFlagsTy &ArgFlags,
- CCState &State);
-
- bool
- CC_PPC32_SVR4_Custom_SkipLastArgRegsPPCF128(unsigned &ValNo, MVT &ValVT,
- MVT &LocVT,
- CCValAssign::LocInfo &LocInfo,
- ISD::ArgFlagsTy &ArgFlags,
- CCState &State);
-
- bool CC_PPC32_SVR4_Custom_AlignFPArgRegs(unsigned &ValNo, MVT &ValVT,
- MVT &LocVT,
- CCValAssign::LocInfo &LocInfo,
- ISD::ArgFlagsTy &ArgFlags,
- CCState &State);
-
bool isIntS16Immediate(SDNode *N, int16_t &Imm);
bool isIntS16Immediate(SDValue Op, int16_t &Imm);
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCInstr64Bit.td b/contrib/llvm/lib/Target/PowerPC/PPCInstr64Bit.td
index 2ce6ad3293eb..d598567f8e4e 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCInstr64Bit.td
+++ b/contrib/llvm/lib/Target/PowerPC/PPCInstr64Bit.td
@@ -1,9 +1,8 @@
//===-- PPCInstr64Bit.td - The PowerPC 64-bit Support ------*- tablegen -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -168,7 +167,7 @@ let isCall = 1, PPC970_Unit = 7, isCodeGenOnly = 1,
XLForm_2_ext_and_DSForm_1<19, 528, 20, 0, 1, 58, 0, (outs),
(ins memrix:$src),
"bctrl\n\tld 2, $src", IIC_BrB,
- [(PPCbctrl_load_toc ixaddr:$src)]>,
+ [(PPCbctrl_load_toc iaddrX4:$src)]>,
Requires<[In64BitMode]>;
}
@@ -193,6 +192,12 @@ def : Pat<(PPCcall (i64 texternalsym:$dst)),
def : Pat<(PPCcall_nop (i64 texternalsym:$dst)),
(BL8_NOP texternalsym:$dst)>;
+// Calls for AIX
+def : Pat<(PPCcall (i64 mcsym:$dst)),
+ (BL8 mcsym:$dst)>;
+def : Pat<(PPCcall_nop (i64 mcsym:$dst)),
+ (BL8_NOP mcsym:$dst)>;
+
// Atomic operations
// FIXME: some of these might be used with constant operands. This will result
// in constant materialization instructions that may be redundant. We currently
@@ -383,7 +388,7 @@ def MTCTR8 : XFXForm_7_ext<31, 467, 9, (outs), (ins g8rc:$rS),
PPC970_DGroup_First, PPC970_Unit_FXU;
}
let hasSideEffects = 1, Defs = [CTR8] in {
-let Pattern = [(int_ppc_mtctr i64:$rS)] in
+let Pattern = [(int_set_loop_iterations i64:$rS)] in
def MTCTR8loop : XFXForm_7_ext<31, 467, 9, (outs), (ins g8rc:$rS),
"mtctr $rS", IIC_SprMTSPR>,
PPC970_DGroup_First, PPC970_Unit_FXU;
@@ -720,10 +725,17 @@ defm SRADI : XSForm_1rc<31, 413, (outs g8rc:$rA), (ins g8rc:$rS, u6imm:$SH),
"sradi", "$rA, $rS, $SH", IIC_IntRotateDI,
[(set i64:$rA, (sra i64:$rS, (i32 imm:$SH)))]>, isPPC64;
-defm EXTSWSLI : XSForm_1r<31, 445, (outs g8rc:$rA), (ins gprc:$rS, u6imm:$SH),
- "extswsli", "$rA, $rS, $SH", IIC_IntRotateDI,
- [(set i64:$rA, (PPCextswsli i32:$rS, (i32 imm:$SH)))]>,
- isPPC64, Requires<[IsISA3_0]>;
+let Interpretation64Bit = 1, isCodeGenOnly = 1 in
+defm EXTSWSLI_32_64 : XSForm_1r<31, 445, (outs g8rc:$rA),
+ (ins gprc:$rS, u6imm:$SH),
+ "extswsli", "$rA, $rS, $SH", IIC_IntRotateDI,
+ [(set i64:$rA,
+ (PPCextswsli i32:$rS, (i32 imm:$SH)))]>,
+ isPPC64, Requires<[IsISA3_0]>;
+
+defm EXTSWSLI : XSForm_1rc<31, 445, (outs g8rc:$rA), (ins g8rc:$rS, u6imm:$SH),
+ "extswsli", "$rA, $rS, $SH", IIC_IntRotateDI,
+ []>, isPPC64, Requires<[IsISA3_0]>;
// For fast-isel:
let isCodeGenOnly = 1, Defs = [CARRY] in
@@ -773,13 +785,21 @@ def DIVDE : XOForm_1<31, 425, 0, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB),
let Predicates = [IsISA3_0] in {
def MADDHD : VAForm_1a<48, (outs g8rc :$RT), (ins g8rc:$RA, g8rc:$RB, g8rc:$RC),
"maddhd $RT, $RA, $RB, $RC", IIC_IntMulHD, []>, isPPC64;
-def MADDHDU : VAForm_1a<49, (outs g8rc :$RT), (ins g8rc:$RA, g8rc:$RB, g8rc:$RC),
+def MADDHDU : VAForm_1a<49,
+ (outs g8rc :$RT), (ins g8rc:$RA, g8rc:$RB, g8rc:$RC),
"maddhdu $RT, $RA, $RB, $RC", IIC_IntMulHD, []>, isPPC64;
-def MADDLD : VAForm_1a<51, (outs g8rc :$RT), (ins g8rc:$RA, g8rc:$RB, g8rc:$RC),
- "maddld $RT, $RA, $RB, $RC", IIC_IntMulHD, []>, isPPC64;
+def MADDLD : VAForm_1a<51, (outs gprc :$RT), (ins gprc:$RA, gprc:$RB, gprc:$RC),
+ "maddld $RT, $RA, $RB, $RC", IIC_IntMulHD,
+ [(set i32:$RT, (add_without_simm16 (mul_without_simm16 i32:$RA, i32:$RB), i32:$RC))]>,
+ isPPC64;
def SETB : XForm_44<31, 128, (outs gprc:$RT), (ins crrc:$BFA),
"setb $RT, $BFA", IIC_IntGeneral>, isPPC64;
let Interpretation64Bit = 1, isCodeGenOnly = 1 in {
+ def MADDLD8 : VAForm_1a<51,
+ (outs g8rc :$RT), (ins g8rc:$RA, g8rc:$RB, g8rc:$RC),
+ "maddld $RT, $RA, $RB, $RC", IIC_IntMulHD,
+ [(set i64:$RT, (add_without_simm16 (mul_without_simm16 i64:$RA, i64:$RB), i64:$RC))]>,
+ isPPC64;
def SETB8 : XForm_44<31, 128, (outs g8rc:$RT), (ins crrc:$BFA),
"setb $RT, $BFA", IIC_IntGeneral>, isPPC64;
}
@@ -911,7 +931,7 @@ def LHA8: DForm_1<42, (outs g8rc:$rD), (ins memri:$src),
def LWA : DSForm_1<58, 2, (outs g8rc:$rD), (ins memrix:$src),
"lwa $rD, $src", IIC_LdStLWA,
[(set i64:$rD,
- (aligned4sextloadi32 ixaddr:$src))]>, isPPC64,
+ (aligned4sextloadi32 iaddrX4:$src))]>, isPPC64,
PPC970_DGroup_Cracked;
let Interpretation64Bit = 1, isCodeGenOnly = 1 in
def LHAX8: XForm_1_memOp<31, 343, (outs g8rc:$rD), (ins memrr:$src),
@@ -920,7 +940,7 @@ def LHAX8: XForm_1_memOp<31, 343, (outs g8rc:$rD), (ins memrr:$src),
PPC970_DGroup_Cracked;
def LWAX : XForm_1_memOp<31, 341, (outs g8rc:$rD), (ins memrr:$src),
"lwax $rD, $src", IIC_LdStLHA,
- [(set i64:$rD, (sextloadi32 xaddr:$src))]>, isPPC64,
+ [(set i64:$rD, (sextloadi32 xaddrX4:$src))]>, isPPC64,
PPC970_DGroup_Cracked;
// For fast-isel:
let isCodeGenOnly = 1, mayLoad = 1 in {
@@ -1022,7 +1042,7 @@ def LWZUX8 : XForm_1_memOp<31, 55, (outs g8rc:$rD, ptr_rc_nor0:$ea_result),
let PPC970_Unit = 2 in {
def LD : DSForm_1<58, 0, (outs g8rc:$rD), (ins memrix:$src),
"ld $rD, $src", IIC_LdStLD,
- [(set i64:$rD, (aligned4load ixaddr:$src))]>, isPPC64;
+ [(set i64:$rD, (aligned4load iaddrX4:$src))]>, isPPC64;
// The following four definitions are selected for small code model only.
// Otherwise, we need to create two instructions to form a 32-bit offset,
// so we have a custom matcher for TOC_ENTRY in PPCDAGToDAGIsel::Select().
@@ -1045,7 +1065,7 @@ def LDtocBA: PPCEmitTimePseudo<(outs g8rc:$rD), (ins tocentry:$disp, g8rc:$reg),
def LDX : XForm_1_memOp<31, 21, (outs g8rc:$rD), (ins memrr:$src),
"ldx $rD, $src", IIC_LdStLD,
- [(set i64:$rD, (load xaddr:$src))]>, isPPC64;
+ [(set i64:$rD, (load xaddrX4:$src))]>, isPPC64;
def LDBRX : XForm_1_memOp<31, 532, (outs g8rc:$rD), (ins memrr:$src),
"ldbrx $rD, $src", IIC_LdStLoad,
[(set i64:$rD, (PPClbrx xoaddr:$src, i64))]>, isPPC64;
@@ -1214,10 +1234,10 @@ def STWX8 : XForm_8_memOp<31, 151, (outs), (ins g8rc:$rS, memrr:$dst),
// Normal 8-byte stores.
def STD : DSForm_1<62, 0, (outs), (ins g8rc:$rS, memrix:$dst),
"std $rS, $dst", IIC_LdStSTD,
- [(aligned4store i64:$rS, ixaddr:$dst)]>, isPPC64;
+ [(aligned4store i64:$rS, iaddrX4:$dst)]>, isPPC64;
def STDX : XForm_8_memOp<31, 149, (outs), (ins g8rc:$rS, memrr:$dst),
"stdx $rS, $dst", IIC_LdStSTD,
- [(store i64:$rS, xaddr:$dst)]>, isPPC64,
+ [(store i64:$rS, xaddrX4:$dst)]>, isPPC64,
PPC970_DGroup_Cracked;
def STDBRX: XForm_8_memOp<31, 660, (outs), (ins g8rc:$rS, memrr:$dst),
"stdbrx $rS, $dst", IIC_LdStStore,
@@ -1433,11 +1453,11 @@ def : Pat<(unaligned4store i64:$rS, xoaddr:$dst),
(STDX $rS, xoaddr:$dst)>;
// 64-bits atomic loads and stores
-def : Pat<(atomic_load_64 ixaddr:$src), (LD memrix:$src)>;
-def : Pat<(atomic_load_64 xaddr:$src), (LDX memrr:$src)>;
+def : Pat<(atomic_load_64 iaddrX4:$src), (LD memrix:$src)>;
+def : Pat<(atomic_load_64 xaddrX4:$src), (LDX memrr:$src)>;
-def : Pat<(atomic_store_64 ixaddr:$ptr, i64:$val), (STD g8rc:$val, memrix:$ptr)>;
-def : Pat<(atomic_store_64 xaddr:$ptr, i64:$val), (STDX g8rc:$val, memrr:$ptr)>;
+def : Pat<(atomic_store_64 iaddrX4:$ptr, i64:$val), (STD g8rc:$val, memrix:$ptr)>;
+def : Pat<(atomic_store_64 xaddrX4:$ptr, i64:$val), (STDX g8rc:$val, memrr:$ptr)>;
let Predicates = [IsISA3_0] in {
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCInstrAltivec.td b/contrib/llvm/lib/Target/PowerPC/PPCInstrAltivec.td
index 69b19e45c3e9..8176c5120a83 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCInstrAltivec.td
+++ b/contrib/llvm/lib/Target/PowerPC/PPCInstrAltivec.td
@@ -1,9 +1,8 @@
//===-- PPCInstrAltivec.td - The PowerPC Altivec Extension -*- tablegen -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -822,7 +821,9 @@ def VCMPGTSWo : VCMPo<902, "vcmpgtsw. $vD, $vA, $vB", v4i32>;
def VCMPGTUW : VCMP <646, "vcmpgtuw $vD, $vA, $vB" , v4i32>;
def VCMPGTUWo : VCMPo<646, "vcmpgtuw. $vD, $vA, $vB", v4i32>;
-let isCodeGenOnly = 1 in {
+let isCodeGenOnly = 1, isMoveImm = 1, isAsCheapAsAMove = 1,
+ isReMaterializable = 1 in {
+
def V_SET0B : VXForm_setzero<1220, (outs vrrc:$vD), (ins),
"vxor $vD, $vD, $vD", IIC_VecFP,
[(set v16i8:$vD, (v16i8 immAllZerosV))]>;
@@ -899,6 +900,32 @@ def : Pat<(v1i128 (bitconvert (v4i32 VRRC:$src))), (v1i128 VRRC:$src)>;
def : Pat<(v1i128 (bitconvert (v4f32 VRRC:$src))), (v1i128 VRRC:$src)>;
def : Pat<(v1i128 (bitconvert (v2i64 VRRC:$src))), (v1i128 VRRC:$src)>;
+// Max/Min
+def : Pat<(v16i8 (umax v16i8:$src1, v16i8:$src2)),
+ (v16i8 (VMAXUB $src1, $src2))>;
+def : Pat<(v16i8 (smax v16i8:$src1, v16i8:$src2)),
+ (v16i8 (VMAXSB $src1, $src2))>;
+def : Pat<(v8i16 (umax v8i16:$src1, v8i16:$src2)),
+ (v8i16 (VMAXUH $src1, $src2))>;
+def : Pat<(v8i16 (smax v8i16:$src1, v8i16:$src2)),
+ (v8i16 (VMAXSH $src1, $src2))>;
+def : Pat<(v4i32 (umax v4i32:$src1, v4i32:$src2)),
+ (v4i32 (VMAXUW $src1, $src2))>;
+def : Pat<(v4i32 (smax v4i32:$src1, v4i32:$src2)),
+ (v4i32 (VMAXSW $src1, $src2))>;
+def : Pat<(v16i8 (umin v16i8:$src1, v16i8:$src2)),
+ (v16i8 (VMINUB $src1, $src2))>;
+def : Pat<(v16i8 (smin v16i8:$src1, v16i8:$src2)),
+ (v16i8 (VMINSB $src1, $src2))>;
+def : Pat<(v8i16 (umin v8i16:$src1, v8i16:$src2)),
+ (v8i16 (VMINUH $src1, $src2))>;
+def : Pat<(v8i16 (smin v8i16:$src1, v8i16:$src2)),
+ (v8i16 (VMINSH $src1, $src2))>;
+def : Pat<(v4i32 (umin v4i32:$src1, v4i32:$src2)),
+ (v4i32 (VMINUW $src1, $src2))>;
+def : Pat<(v4i32 (smin v4i32:$src1, v4i32:$src2)),
+ (v4i32 (VMINSW $src1, $src2))>;
+
// Shuffles.
// Match vsldoi(x,x), vpkuwum(x,x), vpkuhum(x,x)
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCInstrBuilder.h b/contrib/llvm/lib/Target/PowerPC/PPCInstrBuilder.h
index cf71b1c59869..323f7e39adf7 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCInstrBuilder.h
+++ b/contrib/llvm/lib/Target/PowerPC/PPCInstrBuilder.h
@@ -1,9 +1,8 @@
//===-- PPCInstrBuilder.h - Aides for building PPC insts --------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCInstrFormats.td b/contrib/llvm/lib/Target/PowerPC/PPCInstrFormats.td
index 2fe765dd99e1..a48eb1690695 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCInstrFormats.td
+++ b/contrib/llvm/lib/Target/PowerPC/PPCInstrFormats.td
@@ -1,9 +1,8 @@
//===- PowerPCInstrFormats.td - PowerPC Instruction Formats --*- tablegen -*-=//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -38,14 +37,6 @@ class I<bits<6> opcode, dag OOL, dag IOL, string asmstr, InstrItinClass itin>
let TSFlags{2} = PPC970_Cracked;
let TSFlags{5-3} = PPC970_Unit;
- /// Indicate that the VSX instruction is to use VSX numbering/encoding.
- /// Since ISA 3.0, there are scalar instructions that use the upper
- /// half of the VSX register set only. Rather than adding further complexity
- /// to the register class set, the VSX registers just include the Altivec
- /// registers and this flag decides the numbering to be used for them.
- bits<1> UseVSXReg = 0;
- let TSFlags{6} = UseVSXReg;
-
// Indicate that this instruction is of type X-Form Load or Store
bits<1> XFormMemOp = 0;
let TSFlags{7} = XFormMemOp;
@@ -74,7 +65,6 @@ class PPC970_Unit_VALU { bits<3> PPC970_Unit = 5; }
class PPC970_Unit_VPERM { bits<3> PPC970_Unit = 6; }
class PPC970_Unit_BRU { bits<3> PPC970_Unit = 7; }
-class UseVSXReg { bits<1> UseVSXReg = 1; }
class XFormMemOp { bits<1> XFormMemOp = 1; }
// Two joined instructions; used to emit two adjacent instructions as one.
@@ -730,6 +720,7 @@ class XForm_25_memOp<bits<6> opcode, bits<10> xo, dag OOL, dag IOL,
: XForm_base_r3xo_memOp<opcode, xo, OOL, IOL, asmstr, itin, pattern> {
}
+// [PO RT /// RB XO RC]
class XForm_26<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
InstrItinClass itin, list<dag> pattern>
: XForm_base_r3xo<opcode, xo, OOL, IOL, asmstr, itin, pattern> {
@@ -1193,9 +1184,9 @@ class XX2_RD6_DCMX7_RS6<bits<6> opcode, bits<4> xo1, bits<3> xo2,
let Inst{11-15} = DCMX{4-0};
let Inst{16-20} = XB{4-0};
let Inst{21-24} = xo1;
- let Inst{25} = DCMX{5};
+ let Inst{25} = DCMX{6};
let Inst{26-28} = xo2;
- let Inst{29} = DCMX{6};
+ let Inst{29} = DCMX{5};
let Inst{30} = XB{5};
let Inst{31} = XT{5};
}
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCInstrHTM.td b/contrib/llvm/lib/Target/PowerPC/PPCInstrHTM.td
index 0efe797c765d..104b57a70a2e 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCInstrHTM.td
+++ b/contrib/llvm/lib/Target/PowerPC/PPCInstrHTM.td
@@ -1,9 +1,8 @@
//===-- PPCInstrHTM.td - The PowerPC Hardware Transactional Memory -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -21,55 +20,53 @@ def HTM_get_imm : SDNodeXForm<imm, [{
}]>;
let hasSideEffects = 1 in {
-def TCHECK_RET : PPCCustomInserterPseudo<(outs crrc:$out), (ins), "#TCHECK_RET", []>;
+def TCHECK_RET : PPCCustomInserterPseudo<(outs gprc:$out), (ins), "#TCHECK_RET", []>;
+def TBEGIN_RET : PPCCustomInserterPseudo<(outs gprc:$out), (ins u1imm:$R), "#TBEGIN_RET", []>;
}
let Predicates = [HasHTM] in {
+let Defs = [CR0] in {
def TBEGIN : XForm_htm0 <31, 654,
- (outs crrc0:$ret), (ins u1imm:$R), "tbegin. $R", IIC_SprMTSPR, []>;
+ (outs), (ins u1imm:$R), "tbegin. $R", IIC_SprMTSPR, []>;
def TEND : XForm_htm1 <31, 686,
- (outs crrc0:$ret), (ins u1imm:$A), "tend. $A", IIC_SprMTSPR, []>;
+ (outs), (ins u1imm:$A), "tend. $A", IIC_SprMTSPR, []>;
def TABORT : XForm_base_r3xo <31, 910,
- (outs crrc0:$ret), (ins gprc:$A), "tabort. $A", IIC_SprMTSPR,
+ (outs), (ins gprc:$A), "tabort. $A", IIC_SprMTSPR,
[]>, isDOT {
let RST = 0;
let B = 0;
}
def TABORTWC : XForm_base_r3xo <31, 782,
- (outs crrc0:$ret), (ins u5imm:$RTS, gprc:$A, gprc:$B),
+ (outs), (ins u5imm:$RTS, gprc:$A, gprc:$B),
"tabortwc. $RTS, $A, $B", IIC_SprMTSPR, []>,
isDOT;
def TABORTWCI : XForm_base_r3xo <31, 846,
- (outs crrc0:$ret), (ins u5imm:$RTS, gprc:$A, u5imm:$B),
+ (outs), (ins u5imm:$RTS, gprc:$A, u5imm:$B),
"tabortwci. $RTS, $A, $B", IIC_SprMTSPR, []>,
isDOT;
def TABORTDC : XForm_base_r3xo <31, 814,
- (outs crrc0:$ret), (ins u5imm:$RTS, gprc:$A, gprc:$B),
+ (outs), (ins u5imm:$RTS, gprc:$A, gprc:$B),
"tabortdc. $RTS, $A, $B", IIC_SprMTSPR, []>,
isDOT;
def TABORTDCI : XForm_base_r3xo <31, 878,
- (outs crrc0:$ret), (ins u5imm:$RTS, gprc:$A, u5imm:$B),
+ (outs), (ins u5imm:$RTS, gprc:$A, u5imm:$B),
"tabortdci. $RTS, $A, $B", IIC_SprMTSPR, []>,
isDOT;
def TSR : XForm_htm2 <31, 750,
- (outs crrc0:$ret), (ins u1imm:$L), "tsr. $L", IIC_SprMTSPR, []>,
+ (outs), (ins u1imm:$L), "tsr. $L", IIC_SprMTSPR, []>,
isDOT;
-def TCHECK : XForm_htm3 <31, 718,
- (outs), (ins crrc:$BF), "tcheck $BF", IIC_SprMTSPR, []>;
-
-
def TRECLAIM : XForm_base_r3xo <31, 942,
- (outs crrc:$ret), (ins gprc:$A), "treclaim. $A",
+ (outs), (ins gprc:$A), "treclaim. $A",
IIC_SprMTSPR, []>,
isDOT {
let RST = 0;
@@ -77,13 +74,17 @@ def TRECLAIM : XForm_base_r3xo <31, 942,
}
def TRECHKPT : XForm_base_r3xo <31, 1006,
- (outs crrc:$ret), (ins), "trechkpt.", IIC_SprMTSPR, []>,
+ (outs), (ins), "trechkpt.", IIC_SprMTSPR, []>,
isDOT {
let RST = 0;
let A = 0;
let B = 0;
}
+}
+
+def TCHECK : XForm_htm3 <31, 718,
+ (outs crrc:$BF), (ins), "tcheck $BF", IIC_SprMTSPR, []>;
// Builtins
// All HTM instructions, with the exception of tcheck, set CR0 with the
@@ -94,15 +95,11 @@ def TRECHKPT : XForm_base_r3xo <31, 1006,
// tbegin builtin API which defines a return value of 1 as success.
def : Pat<(int_ppc_tbegin i32:$R),
- (XORI
- (EXTRACT_SUBREG (
- TBEGIN (HTM_get_imm imm:$R)), sub_eq),
- 1)>;
+ (XORI (TBEGIN_RET(HTM_get_imm imm:$R)), 1)>;
def : Pat<(int_ppc_tend i32:$R),
(TEND (HTM_get_imm imm:$R))>;
-
def : Pat<(int_ppc_tabort i32:$R),
(TABORT $R)>;
@@ -167,6 +164,8 @@ def : Pat<(int_ppc_tsuspend),
(TSR 0)>;
def : Pat<(i64 (int_ppc_ttest)),
- (RLDICL (i64 (COPY (TABORTWCI 0, ZERO, 0))), 36, 28)>;
+ (RLDICL (i64 (INSERT_SUBREG (i64 (IMPLICIT_DEF)),
+ (TABORTWCI 0, (LI 0), 0), sub_32)),
+ 36, 28)>;
} // [HasHTM]
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp b/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
index d754ce2990d2..a787bdd56b9d 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
@@ -1,9 +1,8 @@
//===-- PPCInstrInfo.cpp - PowerPC Instruction Information ----------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -333,6 +332,17 @@ bool PPCInstrInfo::isReallyTriviallyReMaterializable(const MachineInstr &MI,
case PPC::ADDIStocHA:
case PPC::ADDItocL:
case PPC::LOAD_STACK_GUARD:
+ case PPC::XXLXORz:
+ case PPC::XXLXORspz:
+ case PPC::XXLXORdpz:
+ case PPC::V_SET0B:
+ case PPC::V_SET0H:
+ case PPC::V_SET0:
+ case PPC::V_SETALLONESB:
+ case PPC::V_SETALLONESH:
+ case PPC::V_SETALLONES:
+ case PPC::CRSET:
+ case PPC::CRUNSET:
return true;
}
return false;
@@ -381,9 +391,9 @@ MachineInstr *PPCInstrInfo::commuteInstructionImpl(MachineInstr &MI, bool NewMI,
// Swap op1/op2
assert(((OpIdx1 == 1 && OpIdx2 == 2) || (OpIdx1 == 2 && OpIdx2 == 1)) &&
"Only the operands 1 and 2 can be swapped in RLSIMI/RLWIMIo.");
- unsigned Reg0 = MI.getOperand(0).getReg();
- unsigned Reg1 = MI.getOperand(1).getReg();
- unsigned Reg2 = MI.getOperand(2).getReg();
+ Register Reg0 = MI.getOperand(0).getReg();
+ Register Reg1 = MI.getOperand(1).getReg();
+ Register Reg2 = MI.getOperand(2).getReg();
unsigned SubReg1 = MI.getOperand(1).getSubReg();
unsigned SubReg2 = MI.getOperand(2).getSubReg();
bool Reg1IsKill = MI.getOperand(1).isKill();
@@ -411,7 +421,7 @@ MachineInstr *PPCInstrInfo::commuteInstructionImpl(MachineInstr &MI, bool NewMI,
if (NewMI) {
// Create a new instruction.
- unsigned Reg0 = ChangeReg0 ? Reg2 : MI.getOperand(0).getReg();
+ Register Reg0 = ChangeReg0 ? Reg2 : MI.getOperand(0).getReg();
bool Reg0IsDead = MI.getOperand(0).isDead();
return BuildMI(MF, MI.getDebugLoc(), MI.getDesc())
.addReg(Reg0, RegState::Define | getDeadRegState(Reg0IsDead))
@@ -942,12 +952,16 @@ void PPCInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
return;
} else if (PPC::G8RCRegClass.contains(SrcReg) &&
PPC::VSFRCRegClass.contains(DestReg)) {
+ assert(Subtarget.hasDirectMove() &&
+ "Subtarget doesn't support directmove, don't know how to copy.");
BuildMI(MBB, I, DL, get(PPC::MTVSRD), DestReg).addReg(SrcReg);
NumGPRtoVSRSpill++;
getKillRegState(KillSrc);
return;
} else if (PPC::VSFRCRegClass.contains(SrcReg) &&
PPC::G8RCRegClass.contains(DestReg)) {
+ assert(Subtarget.hasDirectMove() &&
+ "Subtarget doesn't support directmove, don't know how to copy.");
BuildMI(MBB, I, DL, get(PPC::MFVSRD), DestReg).addReg(SrcReg);
getKillRegState(KillSrc);
return;
@@ -963,7 +977,6 @@ void PPCInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
return;
}
-
unsigned Opc;
if (PPC::GPRCRegClass.contains(DestReg, SrcReg))
Opc = PPC::OR;
@@ -996,6 +1009,8 @@ void PPCInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
Opc = PPC::QVFMRb;
else if (PPC::CRBITRCRegClass.contains(DestReg, SrcReg))
Opc = PPC::CROR;
+ else if (PPC::SPE4RCRegClass.contains(DestReg, SrcReg))
+ Opc = PPC::OR;
else if (PPC::SPERCRegClass.contains(DestReg, SrcReg))
Opc = PPC::EVOR;
else
@@ -1066,6 +1081,10 @@ unsigned PPCInstrInfo::getStoreOpcodeForSpill(unsigned Reg,
OpcodeIndex = SOK_Float8Spill;
} else if (PPC::F4RCRegClass.contains(Reg)) {
OpcodeIndex = SOK_Float4Spill;
+ } else if (PPC::SPERCRegClass.contains(Reg)) {
+ OpcodeIndex = SOK_SPESpill;
+ } else if (PPC::SPE4RCRegClass.contains(Reg)) {
+ OpcodeIndex = SOK_SPE4Spill;
} else if (PPC::CRRCRegClass.contains(Reg)) {
OpcodeIndex = SOK_CRSpill;
} else if (PPC::CRBITRCRegClass.contains(Reg)) {
@@ -1152,6 +1171,10 @@ PPCInstrInfo::getLoadOpcodeForSpill(unsigned Reg,
OpcodeIndex = SOK_Float8Spill;
} else if (PPC::F4RCRegClass.contains(Reg)) {
OpcodeIndex = SOK_Float4Spill;
+ } else if (PPC::SPERCRegClass.contains(Reg)) {
+ OpcodeIndex = SOK_SPESpill;
+ } else if (PPC::SPE4RCRegClass.contains(Reg)) {
+ OpcodeIndex = SOK_SPE4Spill;
} else if (PPC::CRRCRegClass.contains(Reg)) {
OpcodeIndex = SOK_CRSpill;
} else if (PPC::CRBITRCRegClass.contains(Reg)) {
@@ -1632,6 +1655,7 @@ bool PPCInstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, unsigned SrcReg,
if (OpC == PPC::FCMPUS || OpC == PPC::FCMPUD)
return false;
+ const TargetRegisterInfo *TRI = &getRegisterInfo();
// The record forms set the condition register based on a signed comparison
// with zero (so says the ISA manual). This is not as straightforward as it
// seems, however, because this is always a 64-bit comparison on PPC64, even
@@ -1645,6 +1669,11 @@ bool PPCInstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, unsigned SrcReg,
bool is32BitUnsignedCompare = OpC == PPC::CMPLWI || OpC == PPC::CMPLW;
bool is64BitUnsignedCompare = OpC == PPC::CMPLDI || OpC == PPC::CMPLD;
+ // Look through copies unless that gets us to a physical register.
+ unsigned ActualSrc = TRI->lookThruCopyLike(SrcReg, MRI);
+ if (TargetRegisterInfo::isVirtualRegister(ActualSrc))
+ SrcReg = ActualSrc;
+
// Get the unique definition of SrcReg.
MachineInstr *MI = MRI->getUniqueVRegDef(SrcReg);
if (!MI) return false;
@@ -1745,7 +1774,6 @@ bool PPCInstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, unsigned SrcReg,
return false;
PPC::Predicate Pred = (PPC::Predicate)UseMI->getOperand(0).getImm();
- PPC::Predicate NewPred = Pred;
unsigned PredCond = PPC::getPredicateCondition(Pred);
unsigned PredHint = PPC::getPredicateHint(Pred);
int16_t Immed = (int16_t)Value;
@@ -1755,25 +1783,23 @@ bool PPCInstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, unsigned SrcReg,
if (Immed == -1 && PredCond == PPC::PRED_GT)
// We convert "greater than -1" into "greater than or equal to 0",
// since we are assuming signed comparison by !equalityOnly
- NewPred = PPC::getPredicate(PPC::PRED_GE, PredHint);
+ Pred = PPC::getPredicate(PPC::PRED_GE, PredHint);
else if (Immed == -1 && PredCond == PPC::PRED_LE)
// We convert "less than or equal to -1" into "less than 0".
- NewPred = PPC::getPredicate(PPC::PRED_LT, PredHint);
+ Pred = PPC::getPredicate(PPC::PRED_LT, PredHint);
else if (Immed == 1 && PredCond == PPC::PRED_LT)
// We convert "less than 1" into "less than or equal to 0".
- NewPred = PPC::getPredicate(PPC::PRED_LE, PredHint);
+ Pred = PPC::getPredicate(PPC::PRED_LE, PredHint);
else if (Immed == 1 && PredCond == PPC::PRED_GE)
// We convert "greater than or equal to 1" into "greater than 0".
- NewPred = PPC::getPredicate(PPC::PRED_GT, PredHint);
+ Pred = PPC::getPredicate(PPC::PRED_GT, PredHint);
else
return false;
- PredsToUpdate.push_back(std::make_pair(&(UseMI->getOperand(0)),
- NewPred));
+ PredsToUpdate.push_back(std::make_pair(&(UseMI->getOperand(0)), Pred));
}
// Search for Sub.
- const TargetRegisterInfo *TRI = &getRegisterInfo();
--I;
// Get ready to iterate backward from CmpInstr.
@@ -1992,7 +2018,7 @@ bool PPCInstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, unsigned SrcReg,
unsigned PPCInstrInfo::getInstSizeInBytes(const MachineInstr &MI) const {
unsigned Opcode = MI.getOpcode();
- if (Opcode == PPC::INLINEASM) {
+ if (Opcode == PPC::INLINEASM || Opcode == PPC::INLINEASM_BR) {
const MachineFunction *MF = MI.getParent()->getParent();
const char *AsmStr = MI.getOperand(0).getSymbolName();
return getInlineAsmLength(AsmStr, *MF->getTarget().getMCAsmInfo());
@@ -2358,13 +2384,6 @@ MachineInstr *PPCInstrInfo::getForwardingDefMI(
MachineBasicBlock::reverse_iterator E = MI.getParent()->rend(), It = MI;
It++;
unsigned Reg = MI.getOperand(i).getReg();
- // MachineInstr::readsRegister only returns true if the machine
- // instruction reads the exact register or its super-register. It
- // does not consider uses of sub-registers which seems like strange
- // behaviour. Nonetheless, if we end up with a 64-bit register here,
- // get the corresponding 32-bit register to check.
- if (PPC::G8RCRegClass.contains(Reg))
- Reg = Reg - PPC::X0 + PPC::R0;
// Is this register defined by some form of add-immediate (including
// load-immediate) within this basic block?
@@ -2381,7 +2400,7 @@ MachineInstr *PPCInstrInfo::getForwardingDefMI(
return &*It;
}
break;
- } else if (It->readsRegister(Reg, &getRegisterInfo()))
+ } else if (It->readsRegister(Reg, &getRegisterInfo()))
// If we see another use of this reg between the def and the MI,
// we want to flat it so the def isn't deleted.
SeenIntermediateUse = true;
@@ -2424,6 +2443,83 @@ const unsigned *PPCInstrInfo::getLoadOpcodesForSpillArray() const {
return OpcodesForSpill[(Subtarget.hasP9Vector()) ? 1 : 0];
}
+void PPCInstrInfo::fixupIsDeadOrKill(MachineInstr &StartMI, MachineInstr &EndMI,
+ unsigned RegNo) const {
+ const MachineRegisterInfo &MRI =
+ StartMI.getParent()->getParent()->getRegInfo();
+ if (MRI.isSSA())
+ return;
+
+ // Instructions between [StartMI, EndMI] should be in same basic block.
+ assert((StartMI.getParent() == EndMI.getParent()) &&
+ "Instructions are not in same basic block");
+
+ bool IsKillSet = false;
+
+ auto clearOperandKillInfo = [=] (MachineInstr &MI, unsigned Index) {
+ MachineOperand &MO = MI.getOperand(Index);
+ if (MO.isReg() && MO.isUse() && MO.isKill() &&
+ getRegisterInfo().regsOverlap(MO.getReg(), RegNo))
+ MO.setIsKill(false);
+ };
+
+ // Set killed flag for EndMI.
+ // No need to do anything if EndMI defines RegNo.
+ int UseIndex =
+ EndMI.findRegisterUseOperandIdx(RegNo, false, &getRegisterInfo());
+ if (UseIndex != -1) {
+ EndMI.getOperand(UseIndex).setIsKill(true);
+ IsKillSet = true;
+ // Clear killed flag for other EndMI operands related to RegNo. In some
+ // upexpected cases, killed may be set multiple times for same register
+ // operand in same MI.
+ for (int i = 0, e = EndMI.getNumOperands(); i != e; ++i)
+ if (i != UseIndex)
+ clearOperandKillInfo(EndMI, i);
+ }
+
+ // Walking the inst in reverse order (EndMI -> StartMI].
+ MachineBasicBlock::reverse_iterator It = EndMI;
+ MachineBasicBlock::reverse_iterator E = EndMI.getParent()->rend();
+ // EndMI has been handled above, skip it here.
+ It++;
+ MachineOperand *MO = nullptr;
+ for (; It != E; ++It) {
+ // Skip insturctions which could not be a def/use of RegNo.
+ if (It->isDebugInstr() || It->isPosition())
+ continue;
+
+ // Clear killed flag for all It operands related to RegNo. In some
+ // upexpected cases, killed may be set multiple times for same register
+ // operand in same MI.
+ for (int i = 0, e = It->getNumOperands(); i != e; ++i)
+ clearOperandKillInfo(*It, i);
+
+ // If killed is not set, set killed for its last use or set dead for its def
+ // if no use found.
+ if (!IsKillSet) {
+ if ((MO = It->findRegisterUseOperand(RegNo, false, &getRegisterInfo()))) {
+ // Use found, set it killed.
+ IsKillSet = true;
+ MO->setIsKill(true);
+ continue;
+ } else if ((MO = It->findRegisterDefOperand(RegNo, false, true,
+ &getRegisterInfo()))) {
+ // No use found, set dead for its def.
+ assert(&*It == &StartMI && "No new def between StartMI and EndMI.");
+ MO->setIsDead(true);
+ break;
+ }
+ }
+
+ if ((&*It) == &StartMI)
+ break;
+ }
+ // Ensure RegMo liveness is killed after EndMI.
+ assert((IsKillSet || (MO && MO->isDead())) &&
+ "RegNo should be killed or dead");
+}
+
// If this instruction has an immediate form and one of its operands is a
// result of a load-immediate or an add-immediate, convert it to
// the immediate form if the constant is in range.
@@ -2440,8 +2536,9 @@ bool PPCInstrInfo::convertToImmediateForm(MachineInstr &MI,
return false;
assert(ForwardingOperand < MI.getNumOperands() &&
"The forwarding operand needs to be valid at this point");
- bool KillFwdDefMI = !SeenIntermediateUse &&
- MI.getOperand(ForwardingOperand).isKill();
+ bool IsForwardingOperandKilled = MI.getOperand(ForwardingOperand).isKill();
+ bool KillFwdDefMI = !SeenIntermediateUse && IsForwardingOperandKilled;
+ unsigned ForwardingOperandReg = MI.getOperand(ForwardingOperand).getReg();
if (KilledDef && KillFwdDefMI)
*KilledDef = DefMI;
@@ -2450,8 +2547,9 @@ bool PPCInstrInfo::convertToImmediateForm(MachineInstr &MI,
// If this is a reg+reg instruction that has a reg+imm form,
// and one of the operands is produced by an add-immediate,
// try to convert it.
- if (HasImmForm && transformToImmFormFedByAdd(MI, III, ForwardingOperand,
- *DefMI, KillFwdDefMI))
+ if (HasImmForm &&
+ transformToImmFormFedByAdd(MI, III, ForwardingOperand, *DefMI,
+ KillFwdDefMI))
return true;
if ((DefMI->getOpcode() != PPC::LI && DefMI->getOpcode() != PPC::LI8) ||
@@ -2466,7 +2564,7 @@ bool PPCInstrInfo::convertToImmediateForm(MachineInstr &MI,
// If this is a reg+reg instruction that has a reg+imm form,
// and one of the operands is produced by LI, convert it now.
if (HasImmForm)
- return transformToImmFormFedByLI(MI, III, ForwardingOperand, SExtImm);
+ return transformToImmFormFedByLI(MI, III, ForwardingOperand, *DefMI, SExtImm);
bool ReplaceWithLI = false;
bool Is64BitLI = false;
@@ -2486,6 +2584,8 @@ bool PPCInstrInfo::convertToImmediateForm(MachineInstr &MI,
case PPC::CMPLDI: {
// Doing this post-RA would require dataflow analysis to reliably find uses
// of the CR register set by the compare.
+ // No need to fixup killed/dead flag since this transformation is only valid
+ // before RA.
if (PostRA)
return false;
// If a compare-immediate is fed by an immediate and is itself an input of
@@ -2662,6 +2762,14 @@ bool PPCInstrInfo::convertToImmediateForm(MachineInstr &MI,
if (KilledDef && SetCR)
*KilledDef = nullptr;
replaceInstrWithLI(MI, LII);
+
+ // Fixup killed/dead flag after transformation.
+ // Pattern:
+ // ForwardingOperandReg = LI imm1
+ // y = op2 imm2, ForwardingOperandReg(killed)
+ if (IsForwardingOperandKilled)
+ fixupIsDeadOrKill(*DefMI, MI, ForwardingOperandReg);
+
LLVM_DEBUG(dbgs() << "With:\n");
LLVM_DEBUG(MI.dump());
return true;
@@ -2669,10 +2777,6 @@ bool PPCInstrInfo::convertToImmediateForm(MachineInstr &MI,
return false;
}
-static bool isVFReg(unsigned Reg) {
- return PPC::VFRCRegClass.contains(Reg);
-}
-
bool PPCInstrInfo::instrHasImmForm(const MachineInstr &MI,
ImmInstrInfo &III, bool PostRA) const {
unsigned Opc = MI.getOpcode();
@@ -3007,7 +3111,7 @@ bool PPCInstrInfo::instrHasImmForm(const MachineInstr &MI,
break;
case PPC::LXSSPX:
if (PostRA) {
- if (isVFReg(MI.getOperand(0).getReg()))
+ if (isVFRegister(MI.getOperand(0).getReg()))
III.ImmOpcode = PPC::LXSSP;
else {
III.ImmOpcode = PPC::LFS;
@@ -3021,7 +3125,7 @@ bool PPCInstrInfo::instrHasImmForm(const MachineInstr &MI,
break;
case PPC::LXSDX:
if (PostRA) {
- if (isVFReg(MI.getOperand(0).getReg()))
+ if (isVFRegister(MI.getOperand(0).getReg()))
III.ImmOpcode = PPC::LXSD;
else {
III.ImmOpcode = PPC::LFD;
@@ -3039,7 +3143,7 @@ bool PPCInstrInfo::instrHasImmForm(const MachineInstr &MI,
break;
case PPC::STXSSPX:
if (PostRA) {
- if (isVFReg(MI.getOperand(0).getReg()))
+ if (isVFRegister(MI.getOperand(0).getReg()))
III.ImmOpcode = PPC::STXSSP;
else {
III.ImmOpcode = PPC::STFS;
@@ -3053,7 +3157,7 @@ bool PPCInstrInfo::instrHasImmForm(const MachineInstr &MI,
break;
case PPC::STXSDX:
if (PostRA) {
- if (isVFReg(MI.getOperand(0).getReg()))
+ if (isVFRegister(MI.getOperand(0).getReg()))
III.ImmOpcode = PPC::STXSD;
else {
III.ImmOpcode = PPC::STFD;
@@ -3110,7 +3214,7 @@ static void swapMIOperands(MachineInstr &MI, unsigned Op1, unsigned Op2) {
}
}
-// Check if the 'MI' that has the index OpNoForForwarding
+// Check if the 'MI' that has the index OpNoForForwarding
// meets the requirement described in the ImmInstrInfo.
bool PPCInstrInfo::isUseMIElgibleForForwarding(MachineInstr &MI,
const ImmInstrInfo &III,
@@ -3156,7 +3260,7 @@ bool PPCInstrInfo::isDefMIElgibleForForwarding(MachineInstr &DefMI,
MachineOperand *&RegMO) const {
unsigned Opc = DefMI.getOpcode();
if (Opc != PPC::ADDItocL && Opc != PPC::ADDI && Opc != PPC::ADDI8)
- return false;
+ return false;
assert(DefMI.getNumOperands() >= 3 &&
"Add inst must have at least three operands");
@@ -3169,11 +3273,10 @@ bool PPCInstrInfo::isDefMIElgibleForForwarding(MachineInstr &DefMI,
return isAnImmediateOperand(*ImmMO);
}
-bool PPCInstrInfo::isRegElgibleForForwarding(const MachineOperand &RegMO,
- const MachineInstr &DefMI,
- const MachineInstr &MI,
- bool KillDefMI
- ) const {
+bool PPCInstrInfo::isRegElgibleForForwarding(
+ const MachineOperand &RegMO, const MachineInstr &DefMI,
+ const MachineInstr &MI, bool KillDefMI,
+ bool &IsFwdFeederRegKilled) const {
// x = addi y, imm
// ...
// z = lfdx 0, x -> z = lfd imm(y)
@@ -3184,14 +3287,7 @@ bool PPCInstrInfo::isRegElgibleForForwarding(const MachineOperand &RegMO,
if (MRI.isSSA())
return false;
- // MachineInstr::readsRegister only returns true if the machine
- // instruction reads the exact register or its super-register. It
- // does not consider uses of sub-registers which seems like strange
- // behaviour. Nonetheless, if we end up with a 64-bit register here,
- // get the corresponding 32-bit register to check.
unsigned Reg = RegMO.getReg();
- if (PPC::G8RCRegClass.contains(Reg))
- Reg = Reg - PPC::X0 + PPC::R0;
// Walking the inst in reverse(MI-->DefMI) to get the last DEF of the Reg.
MachineBasicBlock::const_reverse_iterator It = MI;
@@ -3200,15 +3296,17 @@ bool PPCInstrInfo::isRegElgibleForForwarding(const MachineOperand &RegMO,
for (; It != E; ++It) {
if (It->modifiesRegister(Reg, &getRegisterInfo()) && (&*It) != &DefMI)
return false;
+ else if (It->killsRegister(Reg, &getRegisterInfo()) && (&*It) != &DefMI)
+ IsFwdFeederRegKilled = true;
// Made it to DefMI without encountering a clobber.
if ((&*It) == &DefMI)
break;
}
assert((&*It) == &DefMI && "DefMI is missing");
- // If DefMI also uses the register to be forwarded, we can only forward it
+ // If DefMI also defines the register to be forwarded, we can only forward it
// if DefMI is being erased.
- if (DefMI.readsRegister(Reg, &getRegisterInfo()))
+ if (DefMI.modifiesRegister(Reg, &getRegisterInfo()))
return KillDefMI;
return true;
@@ -3271,11 +3369,9 @@ bool PPCInstrInfo::isImmElgibleForForwarding(const MachineOperand &ImmMO,
// is the literal zero, attempt to forward the source of the add-immediate to
// the corresponding D-Form instruction with the displacement coming from
// the immediate being added.
-bool PPCInstrInfo::transformToImmFormFedByAdd(MachineInstr &MI,
- const ImmInstrInfo &III,
- unsigned OpNoForForwarding,
- MachineInstr &DefMI,
- bool KillDefMI) const {
+bool PPCInstrInfo::transformToImmFormFedByAdd(
+ MachineInstr &MI, const ImmInstrInfo &III, unsigned OpNoForForwarding,
+ MachineInstr &DefMI, bool KillDefMI) const {
// RegMO ImmMO
// | |
// x = addi reg, imm <----- DefMI
@@ -3300,10 +3396,19 @@ bool PPCInstrInfo::transformToImmFormFedByAdd(MachineInstr &MI,
if (!isImmElgibleForForwarding(*ImmMO, DefMI, III, Imm))
return false;
+ bool IsFwdFeederRegKilled = false;
// Check if the RegMO can be forwarded to MI.
- if (!isRegElgibleForForwarding(*RegMO, DefMI, MI, KillDefMI))
+ if (!isRegElgibleForForwarding(*RegMO, DefMI, MI, KillDefMI,
+ IsFwdFeederRegKilled))
return false;
+ // Get killed info in case fixup needed after transformation.
+ unsigned ForwardKilledOperandReg = ~0U;
+ MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
+ bool PostRA = !MRI.isSSA();
+ if (PostRA && MI.getOperand(OpNoForForwarding).isKill())
+ ForwardKilledOperandReg = MI.getOperand(OpNoForForwarding).getReg();
+
// We know that, the MI and DefMI both meet the pattern, and
// the Imm also meet the requirement with the new Imm-form.
// It is safe to do the transformation now.
@@ -3327,7 +3432,7 @@ bool PPCInstrInfo::transformToImmFormFedByAdd(MachineInstr &MI,
// Otherwise, it is Constant Pool Index(CPI) or Global,
// which is relocation in fact. We need to replace the special zero
// register with ImmMO.
- // Before that, we need to fixup the target flags for imm.
+ // Before that, we need to fixup the target flags for imm.
// For some reason, we miss to set the flag for the ImmMO if it is CPI.
if (DefMI.getOpcode() == PPC::ADDItocL)
ImmMO->setTargetFlags(PPCII::MO_TOC_LO);
@@ -3354,6 +3459,22 @@ bool PPCInstrInfo::transformToImmFormFedByAdd(MachineInstr &MI,
// Update the opcode.
MI.setDesc(get(III.ImmOpcode));
+ // Fix up killed/dead flag after transformation.
+ // Pattern 1:
+ // x = ADD KilledFwdFeederReg, imm
+ // n = opn KilledFwdFeederReg(killed), regn
+ // y = XOP 0, x
+ // Pattern 2:
+ // x = ADD reg(killed), imm
+ // y = XOP 0, x
+ if (IsFwdFeederRegKilled || RegMO->isKill())
+ fixupIsDeadOrKill(DefMI, MI, RegMO->getReg());
+ // Pattern 3:
+ // ForwardKilledOperandReg = ADD reg, imm
+ // y = XOP 0, ForwardKilledOperandReg(killed)
+ if (ForwardKilledOperandReg != ~0U)
+ fixupIsDeadOrKill(DefMI, MI, ForwardKilledOperandReg);
+
LLVM_DEBUG(dbgs() << "With:\n");
LLVM_DEBUG(MI.dump());
@@ -3363,6 +3484,7 @@ bool PPCInstrInfo::transformToImmFormFedByAdd(MachineInstr &MI,
bool PPCInstrInfo::transformToImmFormFedByLI(MachineInstr &MI,
const ImmInstrInfo &III,
unsigned ConstantOpNo,
+ MachineInstr &DefMI,
int64_t Imm) const {
MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
bool PostRA = !MRI.isSSA();
@@ -3401,6 +3523,11 @@ bool PPCInstrInfo::transformToImmFormFedByLI(MachineInstr &MI,
return false;
}
+ // Get killed info in case fixup needed after transformation.
+ unsigned ForwardKilledOperandReg = ~0U;
+ if (PostRA && MI.getOperand(ConstantOpNo).isKill())
+ ForwardKilledOperandReg = MI.getOperand(ConstantOpNo).getReg();
+
unsigned Opc = MI.getOpcode();
bool SpecialShift32 =
Opc == PPC::SLW || Opc == PPC::SLWo || Opc == PPC::SRW || Opc == PPC::SRWo;
@@ -3483,6 +3610,13 @@ bool PPCInstrInfo::transformToImmFormFedByLI(MachineInstr &MI,
}
}
}
+
+ // Fix up killed/dead flag after transformation.
+ // Pattern:
+ // ForwardKilledOperandReg = LI imm
+ // y = XOP reg, ForwardKilledOperandReg(killed)
+ if (ForwardKilledOperandReg != ~0U)
+ fixupIsDeadOrKill(DefMI, MI, ForwardKilledOperandReg);
return true;
}
@@ -3784,3 +3918,133 @@ PPCInstrInfo::isSignOrZeroExtended(const MachineInstr &MI, bool SignExt,
}
return false;
}
+
+bool PPCInstrInfo::isBDNZ(unsigned Opcode) const {
+ return (Opcode == (Subtarget.isPPC64() ? PPC::BDNZ8 : PPC::BDNZ));
+}
+
+bool PPCInstrInfo::analyzeLoop(MachineLoop &L, MachineInstr *&IndVarInst,
+ MachineInstr *&CmpInst) const {
+ MachineBasicBlock *LoopEnd = L.getBottomBlock();
+ MachineBasicBlock::iterator I = LoopEnd->getFirstTerminator();
+ // We really "analyze" only CTR loops right now.
+ if (I != LoopEnd->end() && isBDNZ(I->getOpcode())) {
+ IndVarInst = nullptr;
+ CmpInst = &*I;
+ return false;
+ }
+ return true;
+}
+
+MachineInstr *
+PPCInstrInfo::findLoopInstr(MachineBasicBlock &PreHeader) const {
+
+ unsigned LOOPi = (Subtarget.isPPC64() ? PPC::MTCTR8loop : PPC::MTCTRloop);
+
+ // The loop set-up instruction should be in preheader
+ for (auto &I : PreHeader.instrs())
+ if (I.getOpcode() == LOOPi)
+ return &I;
+ return nullptr;
+}
+
+unsigned PPCInstrInfo::reduceLoopCount(
+ MachineBasicBlock &MBB, MachineBasicBlock &PreHeader, MachineInstr *IndVar,
+ MachineInstr &Cmp, SmallVectorImpl<MachineOperand> &Cond,
+ SmallVectorImpl<MachineInstr *> &PrevInsts, unsigned Iter,
+ unsigned MaxIter) const {
+ // We expect a hardware loop currently. This means that IndVar is set
+ // to null, and the compare is the ENDLOOP instruction.
+ assert((!IndVar) && isBDNZ(Cmp.getOpcode()) && "Expecting a CTR loop");
+ MachineFunction *MF = MBB.getParent();
+ DebugLoc DL = Cmp.getDebugLoc();
+ MachineInstr *Loop = findLoopInstr(PreHeader);
+ if (!Loop)
+ return 0;
+ unsigned LoopCountReg = Loop->getOperand(0).getReg();
+ MachineRegisterInfo &MRI = MF->getRegInfo();
+ MachineInstr *LoopCount = MRI.getUniqueVRegDef(LoopCountReg);
+
+ if (!LoopCount)
+ return 0;
+ // If the loop trip count is a compile-time value, then just change the
+ // value.
+ if (LoopCount->getOpcode() == PPC::LI8 || LoopCount->getOpcode() == PPC::LI) {
+ int64_t Offset = LoopCount->getOperand(1).getImm();
+ if (Offset <= 1) {
+ LoopCount->eraseFromParent();
+ Loop->eraseFromParent();
+ return 0;
+ }
+ LoopCount->getOperand(1).setImm(Offset - 1);
+ return Offset - 1;
+ }
+
+ // The loop trip count is a run-time value.
+ // We need to subtract one from the trip count,
+ // and insert branch later to check if we're done with the loop.
+
+ // Since BDZ/BDZ8 that we will insert will also decrease the ctr by 1,
+ // so we don't need to generate any thing here.
+ Cond.push_back(MachineOperand::CreateImm(0));
+ Cond.push_back(MachineOperand::CreateReg(
+ Subtarget.isPPC64() ? PPC::CTR8 : PPC::CTR, true));
+ return LoopCountReg;
+}
+
+// Return true if get the base operand, byte offset of an instruction and the
+// memory width. Width is the size of memory that is being loaded/stored.
+bool PPCInstrInfo::getMemOperandWithOffsetWidth(
+ const MachineInstr &LdSt,
+ const MachineOperand *&BaseReg,
+ int64_t &Offset,
+ unsigned &Width,
+ const TargetRegisterInfo *TRI) const {
+ assert(LdSt.mayLoadOrStore() && "Expected a memory operation.");
+
+ // Handle only loads/stores with base register followed by immediate offset.
+ if (LdSt.getNumExplicitOperands() != 3)
+ return false;
+ if (!LdSt.getOperand(1).isImm() || !LdSt.getOperand(2).isReg())
+ return false;
+
+ if (!LdSt.hasOneMemOperand())
+ return false;
+
+ Width = (*LdSt.memoperands_begin())->getSize();
+ Offset = LdSt.getOperand(1).getImm();
+ BaseReg = &LdSt.getOperand(2);
+ return true;
+}
+
+bool PPCInstrInfo::areMemAccessesTriviallyDisjoint(
+ const MachineInstr &MIa, const MachineInstr &MIb,
+ AliasAnalysis * /*AA*/) const {
+ assert(MIa.mayLoadOrStore() && "MIa must be a load or store.");
+ assert(MIb.mayLoadOrStore() && "MIb must be a load or store.");
+
+ if (MIa.hasUnmodeledSideEffects() || MIb.hasUnmodeledSideEffects() ||
+ MIa.hasOrderedMemoryRef() || MIb.hasOrderedMemoryRef())
+ return false;
+
+ // Retrieve the base register, offset from the base register and width. Width
+ // is the size of memory that is being loaded/stored (e.g. 1, 2, 4). If
+ // base registers are identical, and the offset of a lower memory access +
+ // the width doesn't overlap the offset of a higher memory access,
+ // then the memory accesses are different.
+ const TargetRegisterInfo *TRI = &getRegisterInfo();
+ const MachineOperand *BaseOpA = nullptr, *BaseOpB = nullptr;
+ int64_t OffsetA = 0, OffsetB = 0;
+ unsigned int WidthA = 0, WidthB = 0;
+ if (getMemOperandWithOffsetWidth(MIa, BaseOpA, OffsetA, WidthA, TRI) &&
+ getMemOperandWithOffsetWidth(MIb, BaseOpB, OffsetB, WidthB, TRI)) {
+ if (BaseOpA->isIdenticalTo(*BaseOpB)) {
+ int LowOffset = std::min(OffsetA, OffsetB);
+ int HighOffset = std::max(OffsetA, OffsetB);
+ int LowWidth = (LowOffset == OffsetA) ? WidthA : WidthB;
+ if (LowOffset + LowWidth <= HighOffset)
+ return true;
+ }
+ }
+ return false;
+}
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.h b/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.h
index 7ed558b835af..70fb757e8f1e 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.h
+++ b/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.h
@@ -1,9 +1,8 @@
//===-- PPCInstrInfo.h - PowerPC Instruction Information --------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -14,7 +13,6 @@
#ifndef LLVM_LIB_TARGET_POWERPC_PPCINSTRINFO_H
#define LLVM_LIB_TARGET_POWERPC_PPCINSTRINFO_H
-#include "PPC.h"
#include "PPCRegisterInfo.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
@@ -66,9 +64,6 @@ enum {
/// Shift count to bypass PPC970 flags
NewDef_Shift = 6,
- /// The VSX instruction that uses VSX register (vs0-vs63), instead of VMX
- /// register (v0-v31).
- UseVSXReg = 0x1 << NewDef_Shift,
/// This instruction is an X-Form memory operation.
XFormMemOp = 0x1 << (NewDef_Shift+1)
};
@@ -129,12 +124,12 @@ class PPCInstrInfo : public PPCGenInstrInfo {
// If the inst has imm-form and one of its operand is produced by a LI,
// put the imm into the inst directly and remove the LI if possible.
bool transformToImmFormFedByLI(MachineInstr &MI, const ImmInstrInfo &III,
- unsigned ConstantOpNo, int64_t Imm) const;
+ unsigned ConstantOpNo, MachineInstr &DefMI,
+ int64_t Imm) const;
// If the inst has imm-form and one of its operand is produced by an
// add-immediate, try to transform it when possible.
bool transformToImmFormFedByAdd(MachineInstr &MI, const ImmInstrInfo &III,
- unsigned ConstantOpNo,
- MachineInstr &DefMI,
+ unsigned ConstantOpNo, MachineInstr &DefMI,
bool KillDefMI) const;
// Try to find that, if the instruction 'MI' contains any operand that
// could be forwarded from some inst that feeds it. If yes, return the
@@ -159,8 +154,8 @@ class PPCInstrInfo : public PPCGenInstrInfo {
int64_t &Imm) const;
bool isRegElgibleForForwarding(const MachineOperand &RegMO,
const MachineInstr &DefMI,
- const MachineInstr &MI,
- bool KillDefMI) const;
+ const MachineInstr &MI, bool KillDefMI,
+ bool &IsFwdFeederRegKilled) const;
const unsigned *getStoreOpcodesForSpillArray() const;
const unsigned *getLoadOpcodesForSpillArray() const;
virtual void anchor();
@@ -362,6 +357,22 @@ public:
unsigned SrcReg2, int Mask, int Value,
const MachineRegisterInfo *MRI) const override;
+
+ /// Return true if get the base operand, byte offset of an instruction and
+ /// the memory width. Width is the size of memory that is being
+ /// loaded/stored (e.g. 1, 2, 4, 8).
+ bool getMemOperandWithOffsetWidth(const MachineInstr &LdSt,
+ const MachineOperand *&BaseOp,
+ int64_t &Offset, unsigned &Width,
+ const TargetRegisterInfo *TRI) const;
+
+ /// Return true if two MIs access different memory addresses and false
+ /// otherwise
+ bool
+ areMemAccessesTriviallyDisjoint(const MachineInstr &MIa,
+ const MachineInstr &MIb,
+ AliasAnalysis *AA = nullptr) const override;
+
/// GetInstSize - Return the number of bytes of code the specified
/// instruction may be. This returns the maximum number of bytes.
///
@@ -412,6 +423,18 @@ public:
bool convertToImmediateForm(MachineInstr &MI,
MachineInstr **KilledDef = nullptr) const;
+
+ /// Fixup killed/dead flag for register \p RegNo between instructions [\p
+ /// StartMI, \p EndMI]. Some PostRA transformations may violate register
+ /// killed/dead flags semantics, this function can be called to fix up. Before
+ /// calling this function,
+ /// 1. Ensure that \p RegNo liveness is killed after instruction \p EndMI.
+ /// 2. Ensure that there is no new definition between (\p StartMI, \p EndMI)
+ /// and possible definition for \p RegNo is \p StartMI or \p EndMI.
+ /// 3. Ensure that all instructions between [\p StartMI, \p EndMI] are in same
+ /// basic block.
+ void fixupIsDeadOrKill(MachineInstr &StartMI, MachineInstr &EndMI,
+ unsigned RegNo) const;
void replaceInstrWithLI(MachineInstr &MI, const LoadImmediateInfo &LII) const;
void replaceInstrOperandWithImm(MachineInstr &MI, unsigned OpNo,
int64_t Imm) const;
@@ -429,14 +452,55 @@ public:
/// operands).
static unsigned getRegNumForOperand(const MCInstrDesc &Desc, unsigned Reg,
unsigned OpNo) {
- if (Desc.TSFlags & PPCII::UseVSXReg) {
- if (isVRRegister(Reg))
- Reg = PPC::VSX32 + (Reg - PPC::V0);
- else if (isVFRegister(Reg))
- Reg = PPC::VSX32 + (Reg - PPC::VF0);
+ int16_t regClass = Desc.OpInfo[OpNo].RegClass;
+ switch (regClass) {
+ // We store F0-F31, VF0-VF31 in MCOperand and it should be F0-F31,
+ // VSX32-VSX63 during encoding/disassembling
+ case PPC::VSSRCRegClassID:
+ case PPC::VSFRCRegClassID:
+ if (isVFRegister(Reg))
+ return PPC::VSX32 + (Reg - PPC::VF0);
+ break;
+ // We store VSL0-VSL31, V0-V31 in MCOperand and it should be VSL0-VSL31,
+ // VSX32-VSX63 during encoding/disassembling
+ case PPC::VSRCRegClassID:
+ if (isVRRegister(Reg))
+ return PPC::VSX32 + (Reg - PPC::V0);
+ break;
+ // Other RegClass doesn't need mapping
+ default:
+ break;
}
return Reg;
}
+
+ /// Check \p Opcode is BDNZ (Decrement CTR and branch if it is still nonzero).
+ bool isBDNZ(unsigned Opcode) const;
+
+ /// Find the hardware loop instruction used to set-up the specified loop.
+ /// On PPC, we have two instructions used to set-up the hardware loop
+ /// (MTCTRloop, MTCTR8loop) with corresponding endloop (BDNZ, BDNZ8)
+ /// instructions to indicate the end of a loop.
+ MachineInstr *findLoopInstr(MachineBasicBlock &PreHeader) const;
+
+ /// Analyze the loop code to find the loop induction variable and compare used
+ /// to compute the number of iterations. Currently, we analyze loop that are
+ /// controlled using hardware loops. In this case, the induction variable
+ /// instruction is null. For all other cases, this function returns true,
+ /// which means we're unable to analyze it. \p IndVarInst and \p CmpInst will
+ /// return new values when we can analyze the readonly loop \p L, otherwise,
+ /// nothing got changed
+ bool analyzeLoop(MachineLoop &L, MachineInstr *&IndVarInst,
+ MachineInstr *&CmpInst) const override;
+ /// Generate code to reduce the loop iteration by one and check if the loop
+ /// is finished. Return the value/register of the new loop count. We need
+ /// this function when peeling off one or more iterations of a loop. This
+ /// function assumes the last iteration is peeled first.
+ unsigned reduceLoopCount(MachineBasicBlock &MBB, MachineBasicBlock &PreHeader,
+ MachineInstr *IndVar, MachineInstr &Cmp,
+ SmallVectorImpl<MachineOperand> &Cond,
+ SmallVectorImpl<MachineInstr *> &PrevInsts,
+ unsigned Iter, unsigned MaxIter) const override;
};
}
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.td b/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.td
index 77aa4fe3d415..c313337047f0 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.td
+++ b/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.td
@@ -1,9 +1,8 @@
//===-- PPCInstrInfo.td - The PowerPC Instruction Set ------*- tablegen -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -231,6 +230,18 @@ def PPCbuild_fp128: SDNode<"PPCISD::BUILD_FP128",
SDTCisSameAs<1,2>]>,
[]>;
+def PPCbuild_spe64: SDNode<"PPCISD::BUILD_SPE64",
+ SDTypeProfile<1, 2,
+ [SDTCisVT<0, f64>, SDTCisVT<1,i32>,
+ SDTCisVT<1,i32>]>,
+ []>;
+
+def PPCextract_spe : SDNode<"PPCISD::EXTRACT_SPE",
+ SDTypeProfile<1, 2,
+ [SDTCisVT<0, i32>, SDTCisVT<1, f64>,
+ SDTCisPtrTy<2>]>,
+ []>;
+
// These are target-independent nodes, but have target-specific formats.
def callseq_start : SDNode<"ISD::CALLSEQ_START", SDT_PPCCallSeqStart,
[SDNPHasChain, SDNPOutGlue]>;
@@ -458,6 +469,17 @@ def nonQuadwOffsetStore : PatFrag<(ops node:$val, node:$ptr),
return !isOffsetMultipleOf(N, 16);
}]>;
+// PatFrag for binary operation whose operands are both non-constant
+class BinOpWithoutSImm16Operand<SDNode opcode> :
+ PatFrag<(ops node:$left, node:$right), (opcode node:$left, node:$right), [{
+ int16_t Imm;
+ return !isIntS16Immediate(N->getOperand(0), Imm)
+ && !isIntS16Immediate(N->getOperand(1), Imm);
+}]>;
+
+def add_without_simm16 : BinOpWithoutSImm16Operand<add>;
+def mul_without_simm16 : BinOpWithoutSImm16Operand<mul>;
+
//===----------------------------------------------------------------------===//
// PowerPC Flag Definitions.
@@ -546,10 +568,6 @@ def PPCRegCRRCAsmOperand : AsmOperandClass {
def crrc : RegisterOperand<CRRC> {
let ParserMatchClass = PPCRegCRRCAsmOperand;
}
-def crrc0 : RegisterOperand<CRRC0> {
- let ParserMatchClass = PPCRegCRRCAsmOperand;
-}
-
def PPCRegSPERCAsmOperand : AsmOperandClass {
let Name = "RegSPERC"; let PredicateMethod = "isRegNumber";
}
@@ -883,11 +901,24 @@ def pred : Operand<OtherVT> {
}
// Define PowerPC specific addressing mode.
-def iaddr : ComplexPattern<iPTR, 2, "SelectAddrImm", [], []>;
-def xaddr : ComplexPattern<iPTR, 2, "SelectAddrIdx", [], []>;
+
+// d-form
+def iaddr : ComplexPattern<iPTR, 2, "SelectAddrImm", [], []>; // "stb"
+// ds-form
+def iaddrX4 : ComplexPattern<iPTR, 2, "SelectAddrImmX4", [], []>; // "std"
+// dq-form
+def iaddrX16 : ComplexPattern<iPTR, 2, "SelectAddrImmX16", [], []>; // "stxv"
+
+// Below forms are all x-form addressing mode, use three different ones so we
+// can make a accurate check for x-form instructions in ISEL.
+// x-form addressing mode whose associated diplacement form is D.
+def xaddr : ComplexPattern<iPTR, 2, "SelectAddrIdx", [], []>; // "stbx"
+// x-form addressing mode whose associated diplacement form is DS.
+def xaddrX4 : ComplexPattern<iPTR, 2, "SelectAddrIdxX4", [], []>; // "stdx"
+// x-form addressing mode whose associated diplacement form is DQ.
+def xaddrX16 : ComplexPattern<iPTR, 2, "SelectAddrIdxX16", [], []>; // "stxvx"
+
def xoaddr : ComplexPattern<iPTR, 2, "SelectAddrIdxOnly",[], []>;
-def ixaddr : ComplexPattern<iPTR, 2, "SelectAddrImmX4", [], []>; // "std"
-def iqaddr : ComplexPattern<iPTR, 2, "SelectAddrImmX16", [], []>; // "stxv"
// The address in a single register. This is used with the SjLj
// pseudo-instructions.
@@ -1311,6 +1342,15 @@ let isTerminator = 1, isBarrier = 1, PPC970_Unit = 7 in {
}
}
+// Set the float rounding mode.
+let Uses = [RM], Defs = [RM] in {
+def SETRNDi : PPCCustomInserterPseudo<(outs f8rc:$FRT), (ins u2imm:$RND),
+ "#SETRNDi", [(set f64:$FRT, (int_ppc_setrnd (i32 imm:$RND)))]>;
+
+def SETRND : PPCCustomInserterPseudo<(outs f8rc:$FRT), (ins gprc:$in),
+ "#SETRND", [(set f64:$FRT, (int_ppc_setrnd gprc :$in))]>;
+}
+
let Defs = [LR] in
def MovePCtoLR : PPCEmitTimePseudo<(outs), (ins), "#MovePCtoLR", []>,
PPC970_Unit_BRU;
@@ -1437,6 +1477,9 @@ let isCall = 1, PPC970_Unit = 7, Defs = [LR] in {
def BCLn : BForm_4<16, 4, 0, 1, (outs),
(ins crbitrc:$bi, condbrtarget:$dst),
"bcl 4, $bi, $dst">;
+ def BL_NOP : IForm_and_DForm_4_zero<18, 0, 1, 24,
+ (outs), (ins calltarget:$func),
+ "bl $func\n\tnop", IIC_BrB, []>;
}
}
let Uses = [CTR, RM] in {
@@ -2514,6 +2557,7 @@ def CRORC : XLForm_1<19, 417, (outs crbitrc:$CRD),
[(set i1:$CRD, (or i1:$CRA, (not i1:$CRB)))]>;
let isCodeGenOnly = 1 in {
+let isReMaterializable = 1, isAsCheapAsAMove = 1 in {
def CRSET : XLForm_1_ext<19, 289, (outs crbitrc:$dst), (ins),
"creqv $dst, $dst, $dst", IIC_BrCR,
[(set i1:$dst, 1)]>;
@@ -2521,6 +2565,7 @@ def CRSET : XLForm_1_ext<19, 289, (outs crbitrc:$dst), (ins),
def CRUNSET: XLForm_1_ext<19, 193, (outs crbitrc:$dst), (ins),
"crxor $dst, $dst, $dst", IIC_BrCR,
[(set i1:$dst, 0)]>;
+}
let Defs = [CR1EQ], CRD = 6 in {
def CR6SET : XLForm_1_ext<19, 289, (outs), (ins),
@@ -2568,7 +2613,7 @@ def MTCTR : XFXForm_7_ext<31, 467, 9, (outs), (ins gprc:$rS),
PPC970_DGroup_First, PPC970_Unit_FXU;
}
let hasSideEffects = 1, isCodeGenOnly = 1, Defs = [CTR] in {
-let Pattern = [(int_ppc_mtctr i32:$rS)] in
+let Pattern = [(int_set_loop_iterations i32:$rS)] in
def MTCTRloop : XFXForm_7_ext<31, 467, 9, (outs), (ins gprc:$rS),
"mtctr $rS", IIC_SprMTSPR>,
PPC970_DGroup_First, PPC970_Unit_FXU;
@@ -2995,9 +3040,16 @@ def : Pat<(and (rotl i32:$in, i32:$sh), maskimm32:$imm),
// Calls
def : Pat<(PPCcall (i32 tglobaladdr:$dst)),
(BL tglobaladdr:$dst)>;
+
def : Pat<(PPCcall (i32 texternalsym:$dst)),
(BL texternalsym:$dst)>;
+// Calls for AIX only
+def : Pat<(PPCcall (i32 mcsym:$dst)),
+ (BL mcsym:$dst)>;
+def : Pat<(PPCcall_nop (i32 mcsym:$dst)),
+ (BL_NOP mcsym:$dst)>;
+
def : Pat<(PPCtc_return (i32 tglobaladdr:$dst), imm:$imm),
(TCRETURNdi tglobaladdr:$dst, imm:$imm)>;
@@ -4073,6 +4125,10 @@ def SLBMFEV : XLForm_1_gen<31, 851, (outs gprc:$RT), (ins gprc:$RB),
def SLBIA : XForm_0<31, 498, (outs), (ins), "slbia", IIC_SprSLBIA, []>;
+let Defs = [CR0] in
+def SLBFEEo : XForm_26<31, 979, (outs gprc:$RT), (ins gprc:$RB),
+ "slbfee. $RT, $RB", IIC_SprSLBFEE, []>, isDOT;
+
def TLBIA : XForm_0<31, 370, (outs), (ins),
"tlbia", IIC_SprTLBIA, []>;
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCInstrQPX.td b/contrib/llvm/lib/Target/PowerPC/PPCInstrQPX.td
index ef589ad01fd7..d67041d46d9f 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCInstrQPX.td
+++ b/contrib/llvm/lib/Target/PowerPC/PPCInstrQPX.td
@@ -1,9 +1,8 @@
//===- PPCInstrQPX.td - The PowerPC QPX Extension --*- tablegen -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCInstrSPE.td b/contrib/llvm/lib/Target/PowerPC/PPCInstrSPE.td
index 9f5891a45f22..935c3044ae47 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCInstrSPE.td
+++ b/contrib/llvm/lib/Target/PowerPC/PPCInstrSPE.td
@@ -1,9 +1,8 @@
//=======-- PPCInstrSPE.td - The PowerPC SPE Extension -*- tablegen -*-=======//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -512,7 +511,7 @@ def EVLWWSPLATX : EVXForm_1<792, (outs sperc:$RT), (ins memrr:$src),
def EVMERGEHI : EVXForm_1<556, (outs sperc:$RT), (ins sperc:$RA, sperc:$RB),
"evmergehi $RT, $RA, $RB", IIC_VecGeneral, []>;
-def EVMERGELO : EVXForm_1<557, (outs sperc:$RT), (ins sperc:$RA, sperc:$RB),
+def EVMERGELO : EVXForm_1<557, (outs sperc:$RT), (ins gprc:$RA, gprc:$RB),
"evmergelo $RT, $RA, $RB", IIC_VecGeneral, []>;
def EVMERGEHILO : EVXForm_1<558, (outs sperc:$RT), (ins sperc:$RA, sperc:$RB),
"evmergehilo $RT, $RA, $RB", IIC_VecGeneral, []>;
@@ -887,4 +886,14 @@ def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETUGT)),
(SELECT_SPE (CRANDC $lhs, $rhs), $tval, $fval)>;
def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETNE)),
(SELECT_SPE (CRXOR $lhs, $rhs), $tval, $fval)>;
+
+
+def : Pat<(f64 (PPCbuild_spe64 i32:$rB, i32:$rA)),
+ (f64 (COPY_TO_REGCLASS (EVMERGELO $rA, $rB), SPERC))>;
+
+def : Pat<(i32 (PPCextract_spe f64:$rA, 1)),
+ (i32 (EXTRACT_SUBREG (EVMERGEHI $rA, $rA), sub_32))>;
+def : Pat<(i32 (PPCextract_spe f64:$rA, 0)),
+ (i32 (EXTRACT_SUBREG $rA, sub_32))>;
+
}
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCInstrVSX.td b/contrib/llvm/lib/Target/PowerPC/PPCInstrVSX.td
index 0f073388dc74..07f38a61d098 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCInstrVSX.td
+++ b/contrib/llvm/lib/Target/PowerPC/PPCInstrVSX.td
@@ -1,9 +1,8 @@
//===- PPCInstrVSX.td - The PowerPC VSX Extension --*- tablegen -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -54,6 +53,15 @@ def PPCRegSPILLTOVSRRCAsmOperand : AsmOperandClass {
def spilltovsrrc : RegisterOperand<SPILLTOVSRRC> {
let ParserMatchClass = PPCRegSPILLTOVSRRCAsmOperand;
}
+
+def SDT_PPCldvsxlh : SDTypeProfile<1, 1, [
+ SDTCisVT<0, v4f32>, SDTCisPtrTy<1>
+]>;
+
+def SDT_PPCfpextlh : SDTypeProfile<1, 1, [
+ SDTCisVT<0, v2f64>, SDTCisVT<1, v4f32>
+]>;
+
// Little-endian-specific nodes.
def SDT_PPClxvd2x : SDTypeProfile<1, 1, [
SDTCisVT<0, v2f64>, SDTCisPtrTy<1>
@@ -85,6 +93,10 @@ def PPCuvec2fp: SDNode<"PPCISD::UINT_VEC_TO_FP", SDTVecConv, []>;
def PPCswapNoChain : SDNode<"PPCISD::SWAP_NO_CHAIN", SDT_PPCxxswapd>;
def PPCvabsd : SDNode<"PPCISD::VABSD", SDTVabsd, []>;
+def PPCfpextlh : SDNode<"PPCISD::FP_EXTEND_LH", SDT_PPCfpextlh, []>;
+def PPCldvsxlh : SDNode<"PPCISD::LD_VSX_LH", SDT_PPCldvsxlh,
+ [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
+
multiclass XX3Form_Rcr<bits<6> opcode, bits<7> xo, string asmbase,
string asmstr, InstrItinClass itin, Intrinsic Int,
ValueType OutTy, ValueType InTy> {
@@ -124,7 +136,6 @@ def HasOnlySwappingMemOps : Predicate<"!PPCSubTarget->hasP9Vector()">;
let Predicates = [HasVSX] in {
let AddedComplexity = 400 in { // Prefer VSX patterns over non-VSX patterns.
-let UseVSXReg = 1 in {
let hasSideEffects = 0 in { // VSX instructions don't have side effects.
let Uses = [RM] in {
@@ -841,12 +852,12 @@ let Uses = [RM] in {
"xxlxor $XT, $XA, $XB", IIC_VecGeneral,
[(set v4i32:$XT, (xor v4i32:$XA, v4i32:$XB))]>;
} // isCommutable
- let isCodeGenOnly = 1 in
- def XXLXORz : XX3Form_Zero<60, 154, (outs vsrc:$XT), (ins),
+
+ let isCodeGenOnly = 1, isMoveImm = 1, isAsCheapAsAMove = 1,
+ isReMaterializable = 1 in {
+ def XXLXORz : XX3Form_Zero<60, 154, (outs vsrc:$XT), (ins),
"xxlxor $XT, $XT, $XT", IIC_VecGeneral,
[(set v4i32:$XT, (v4i32 immAllZerosV))]>;
-
- let isCodeGenOnly = 1 in {
def XXLXORdpz : XX3Form_SetZero<60, 154,
(outs vsfrc:$XT), (ins),
"xxlxor $XT, $XT, $XT", IIC_VecGeneral,
@@ -895,11 +906,10 @@ let Uses = [RM] in {
(PPCxxsplt v4i32:$XB, imm32SExt16:$UIM))]>;
let isCodeGenOnly = 1 in
def XXSPLTWs : XX2Form_2<60, 164,
- (outs vsrc:$XT), (ins vfrc:$XB, u2imm:$UIM),
+ (outs vsrc:$XT), (ins vsfrc:$XB, u2imm:$UIM),
"xxspltw $XT, $XB, $UIM", IIC_VecPerm, []>;
} // hasSideEffects
-} // UseVSXReg = 1
// SELECT_CC_* - Used to implement the SELECT_CC DAG operation. Expanded after
// instruction selection into a branch sequence.
@@ -961,6 +971,10 @@ let AddedComplexity = 400 in { // Prefer VSX patterns over non-VSX patterns.
def : Pat<(v4i32 (vnot_ppc v4i32:$A)),
(v4i32 (XXLNOR $A, $A))>;
+def : Pat<(v4i32 (or (and (vnot_ppc v4i32:$C), v4i32:$A),
+ (and v4i32:$B, v4i32:$C))),
+ (v4i32 (XXSEL $A, $B, $C))>;
+
let Predicates = [IsBigEndian] in {
def : Pat<(v2f64 (scalar_to_vector f64:$A)),
(v2f64 (SUBREG_TO_REG (i64 1), $A, sub_64))>;
@@ -1063,6 +1077,8 @@ def : Pat<(v2f64 (PPCuvec2fp v4i32:$C, 0)),
def : Pat<(v2f64 (PPCuvec2fp v4i32:$C, 1)),
(v2f64 (XVCVUXWDP (v2i64 (XXMRGLW $C, $C))))>;
+def : Pat<(v2f64 (PPCfpextlh v4f32:$C)), (XVCVSPDP (XXMRGHW $C, $C))>;
+
// Loads.
let Predicates = [HasVSX, HasOnlySwappingMemOps] in {
def : Pat<(v2f64 (PPClxvd2x xoaddr:$src)), (LXVD2X xoaddr:$src)>;
@@ -1176,6 +1192,15 @@ def : Pat<(vselect v4i32:$vA, v4f32:$vB, v4f32:$vC),
def : Pat<(vselect v2i64:$vA, v2f64:$vB, v2f64:$vC),
(XXSEL $vC, $vB, $vA)>;
+def : Pat<(v4f32 (fmaxnum v4f32:$src1, v4f32:$src2)),
+ (v4f32 (XVMAXSP $src1, $src2))>;
+def : Pat<(v4f32 (fminnum v4f32:$src1, v4f32:$src2)),
+ (v4f32 (XVMINSP $src1, $src2))>;
+def : Pat<(v2f64 (fmaxnum v2f64:$src1, v2f64:$src2)),
+ (v2f64 (XVMAXDP $src1, $src2))>;
+def : Pat<(v2f64 (fminnum v2f64:$src1, v2f64:$src2)),
+ (v2f64 (XVMINDP $src1, $src2))>;
+
let Predicates = [IsLittleEndian] in {
def : Pat<(f64 (PPCfcfid (PPCmtvsra (i64 (vector_extract v2i64:$S, 0))))),
(f64 (XSCVSXDDP (COPY_TO_REGCLASS (XXPERMDI $S, $S, 2), VSFRC)))>;
@@ -1248,7 +1273,7 @@ def HasDirectMove : Predicate<"PPCSubTarget->hasDirectMove()">;
def NoP9Vector : Predicate<"!PPCSubTarget->hasP9Vector()">;
let Predicates = [HasP8Vector] in {
let AddedComplexity = 400 in { // Prefer VSX patterns over non-VSX patterns.
- let isCommutable = 1, UseVSXReg = 1 in {
+ let isCommutable = 1 in {
def XXLEQV : XX3Form<60, 186,
(outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
"xxleqv $XT, $XA, $XB", IIC_VecGeneral,
@@ -1258,12 +1283,11 @@ let AddedComplexity = 400 in { // Prefer VSX patterns over non-VSX patterns.
"xxlnand $XT, $XA, $XB", IIC_VecGeneral,
[(set v4i32:$XT, (vnot_ppc (and v4i32:$XA,
v4i32:$XB)))]>;
- } // isCommutable, UseVSXReg
+ } // isCommutable
def : Pat<(int_ppc_vsx_xxleqv v4i32:$A, v4i32:$B),
(XXLEQV $A, $B)>;
- let UseVSXReg = 1 in {
def XXLORC : XX3Form<60, 170,
(outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
"xxlorc $XT, $XA, $XB", IIC_VecGeneral,
@@ -1312,7 +1336,6 @@ let AddedComplexity = 400 in { // Prefer VSX patterns over non-VSX patterns.
"#STIWX",
[(PPCstfiwx f64:$XT, xoaddr:$dst)]>;
} // mayStore
- } // UseVSXReg = 1
def : Pat<(f64 (extloadf32 xoaddr:$src)),
(COPY_TO_REGCLASS (XFLOADf32 xoaddr:$src), VSFRC)>;
@@ -1342,7 +1365,6 @@ let AddedComplexity = 400 in { // Prefer VSX patterns over non-VSX patterns.
def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETNE)),
(SELECT_VSSRC (CRXOR $lhs, $rhs), $tval, $fval)>;
- let UseVSXReg = 1 in {
// VSX Elementary Scalar FP arithmetic (SP)
let isCommutable = 1 in {
def XSADDSP : XX3Form<60, 0,
@@ -1354,7 +1376,10 @@ let AddedComplexity = 400 in { // Prefer VSX patterns over non-VSX patterns.
"xsmulsp $XT, $XA, $XB", IIC_VecFP,
[(set f32:$XT, (fmul f32:$XA, f32:$XB))]>;
} // isCommutable
-
+ def XSSUBSP : XX3Form<60, 8,
+ (outs vssrc:$XT), (ins vssrc:$XA, vssrc:$XB),
+ "xssubsp $XT, $XA, $XB", IIC_VecFP,
+ [(set f32:$XT, (fsub f32:$XA, f32:$XB))]>;
def XSDIVSP : XX3Form<60, 24,
(outs vssrc:$XT), (ins vssrc:$XA, vssrc:$XB),
"xsdivsp $XT, $XA, $XB", IIC_FPDivS,
@@ -1374,10 +1399,6 @@ let AddedComplexity = 400 in { // Prefer VSX patterns over non-VSX patterns.
(outs vssrc:$XT), (ins vssrc:$XB),
"xsrsqrtesp $XT, $XB", IIC_VecFP,
[(set f32:$XT, (PPCfrsqrte f32:$XB))]>;
- def XSSUBSP : XX3Form<60, 8,
- (outs vssrc:$XT), (ins vssrc:$XA, vssrc:$XB),
- "xssubsp $XT, $XA, $XB", IIC_VecFP,
- [(set f32:$XT, (fsub f32:$XA, f32:$XB))]>;
// FMA Instructions
let BaseName = "XSMADDASP" in {
@@ -1470,7 +1491,6 @@ let AddedComplexity = 400 in { // Prefer VSX patterns over non-VSX patterns.
"xscvdpspn $XT, $XB", IIC_VecFP, []>;
def XSCVSPDPN : XX2Form<60, 331, (outs vssrc:$XT), (ins vsrc:$XB),
"xscvspdpn $XT, $XB", IIC_VecFP, []>;
- } // UseVSXReg = 1
let Predicates = [IsLittleEndian] in {
def : Pat<DWToSPExtractConv.El0SS1,
@@ -1514,10 +1534,22 @@ let AddedComplexity = 400 in { // Prefer VSX patterns over non-VSX patterns.
(f64 (PPCcv_fp_to_uint_in_vsr f64:$src)), xoaddr:$dst, 4),
(STIWX (XSCVDPUXWS f64:$src), xoaddr:$dst)>;
+ def : Pat<(v2i64 (smax v2i64:$src1, v2i64:$src2)),
+ (v2i64 (VMAXSD (COPY_TO_REGCLASS $src1, VRRC),
+ (COPY_TO_REGCLASS $src2, VRRC)))>;
+ def : Pat<(v2i64 (umax v2i64:$src1, v2i64:$src2)),
+ (v2i64 (VMAXUD (COPY_TO_REGCLASS $src1, VRRC),
+ (COPY_TO_REGCLASS $src2, VRRC)))>;
+ def : Pat<(v2i64 (smin v2i64:$src1, v2i64:$src2)),
+ (v2i64 (VMINSD (COPY_TO_REGCLASS $src1, VRRC),
+ (COPY_TO_REGCLASS $src2, VRRC)))>;
+ def : Pat<(v2i64 (umin v2i64:$src1, v2i64:$src2)),
+ (v2i64 (VMINUD (COPY_TO_REGCLASS $src1, VRRC),
+ (COPY_TO_REGCLASS $src2, VRRC)))>;
} // AddedComplexity = 400
} // HasP8Vector
-let UseVSXReg = 1, AddedComplexity = 400 in {
+let AddedComplexity = 400 in {
let Predicates = [HasDirectMove] in {
// VSX direct move instructions
def MFVSRD : XX1_RS6_RD5_XO<31, 51, (outs g8rc:$rA), (ins vsfrc:$XT),
@@ -1525,7 +1557,7 @@ let Predicates = [HasDirectMove] in {
[(set i64:$rA, (PPCmfvsr f64:$XT))]>,
Requires<[In64BitMode]>;
let isCodeGenOnly = 1 in
- def MFVRD : XX1_RS6_RD5_XO<31, 51, (outs g8rc:$rA), (ins vrrc:$XT),
+ def MFVRD : XX1_RS6_RD5_XO<31, 51, (outs g8rc:$rA), (ins vsrc:$XT),
"mfvsrd $rA, $XT", IIC_VecGeneral,
[]>,
Requires<[In64BitMode]>;
@@ -1557,7 +1589,7 @@ let Predicates = [IsISA3_0, HasDirectMove] in {
[]>, Requires<[In64BitMode]>;
} // IsISA3_0, HasDirectMove
-} // UseVSXReg = 1
+} // AddedComplexity = 400
// We want to parse this from asm, but we don't want to emit this as it would
// be emitted with a VSX reg. So leave Emit = 0 here.
@@ -2415,7 +2447,6 @@ let AddedComplexity = 400, Predicates = [HasP9Vector] in {
list<dag> pattern>
: X_VT5_XO5_VB5_VSFR<opcode, xo2, xo, opc, pattern>, isDOT;
- let UseVSXReg = 1 in {
// [PO T XO B XO BX /]
class XX2_RT5_XO5_XB6<bits<6> opcode, bits<5> xo2, bits<9> xo, string opc,
list<dag> pattern>
@@ -2434,7 +2465,6 @@ let AddedComplexity = 400, Predicates = [HasP9Vector] in {
InstrItinClass itin, list<dag> pattern>
: XX3Form<opcode, xo, (outs xty:$XT), (ins aty:$XA, bty:$XB),
!strconcat(opc, " $XT, $XA, $XB"), itin, pattern>;
- } // UseVSXReg = 1
// [PO VRT VRA VRB XO /]
class X_VT5_VA5_VB5<bits<6> opcode, bits<10> xo, string opc,
@@ -2482,69 +2512,70 @@ let AddedComplexity = 400, Predicates = [HasP9Vector] in {
let isCommutable = 1 in {
def XSADDQP : X_VT5_VA5_VB5 <63, 4, "xsaddqp",
[(set f128:$vT, (fadd f128:$vA, f128:$vB))]>;
+ def XSMULQP : X_VT5_VA5_VB5 <63, 36, "xsmulqp",
+ [(set f128:$vT, (fmul f128:$vA, f128:$vB))]>;
+ }
+ def XSSUBQP : X_VT5_VA5_VB5 <63, 516, "xssubqp" ,
+ [(set f128:$vT, (fsub f128:$vA, f128:$vB))]>;
+ def XSDIVQP : X_VT5_VA5_VB5 <63, 548, "xsdivqp",
+ [(set f128:$vT, (fdiv f128:$vA, f128:$vB))]>;
+ // Square-Root
+ def XSSQRTQP : X_VT5_XO5_VB5 <63, 27, 804, "xssqrtqp",
+ [(set f128:$vT, (fsqrt f128:$vB))]>;
+ // (Negative) Multiply-{Add/Subtract}
+ def XSMADDQP : X_VT5_VA5_VB5_FMA <63, 388, "xsmaddqp",
+ [(set f128:$vT,
+ (fma f128:$vA, f128:$vB,
+ f128:$vTi))]>;
+ def XSMSUBQP : X_VT5_VA5_VB5_FMA <63, 420, "xsmsubqp" ,
+ [(set f128:$vT,
+ (fma f128:$vA, f128:$vB,
+ (fneg f128:$vTi)))]>;
+ def XSNMADDQP : X_VT5_VA5_VB5_FMA <63, 452, "xsnmaddqp",
+ [(set f128:$vT,
+ (fneg (fma f128:$vA, f128:$vB,
+ f128:$vTi)))]>;
+ def XSNMSUBQP : X_VT5_VA5_VB5_FMA <63, 484, "xsnmsubqp",
+ [(set f128:$vT,
+ (fneg (fma f128:$vA, f128:$vB,
+ (fneg f128:$vTi))))]>;
+
+ let isCommutable = 1 in {
def XSADDQPO : X_VT5_VA5_VB5_Ro<63, 4, "xsaddqpo",
[(set f128:$vT,
(int_ppc_addf128_round_to_odd
f128:$vA, f128:$vB))]>;
- def XSMULQP : X_VT5_VA5_VB5 <63, 36, "xsmulqp",
- [(set f128:$vT, (fmul f128:$vA, f128:$vB))]>;
def XSMULQPO : X_VT5_VA5_VB5_Ro<63, 36, "xsmulqpo",
[(set f128:$vT,
(int_ppc_mulf128_round_to_odd
f128:$vA, f128:$vB))]>;
}
-
- def XSSUBQP : X_VT5_VA5_VB5 <63, 516, "xssubqp" ,
- [(set f128:$vT, (fsub f128:$vA, f128:$vB))]>;
def XSSUBQPO : X_VT5_VA5_VB5_Ro<63, 516, "xssubqpo",
[(set f128:$vT,
(int_ppc_subf128_round_to_odd
f128:$vA, f128:$vB))]>;
- def XSDIVQP : X_VT5_VA5_VB5 <63, 548, "xsdivqp",
- [(set f128:$vT, (fdiv f128:$vA, f128:$vB))]>;
def XSDIVQPO : X_VT5_VA5_VB5_Ro<63, 548, "xsdivqpo",
[(set f128:$vT,
(int_ppc_divf128_round_to_odd
f128:$vA, f128:$vB))]>;
-
- // Square-Root
- def XSSQRTQP : X_VT5_XO5_VB5 <63, 27, 804, "xssqrtqp",
- [(set f128:$vT, (fsqrt f128:$vB))]>;
def XSSQRTQPO : X_VT5_XO5_VB5_Ro<63, 27, 804, "xssqrtqpo",
[(set f128:$vT,
(int_ppc_sqrtf128_round_to_odd f128:$vB))]>;
- // (Negative) Multiply-{Add/Subtract}
- def XSMADDQP : X_VT5_VA5_VB5_FMA <63, 388, "xsmaddqp",
- [(set f128:$vT,
- (fma f128:$vA, f128:$vB,
- f128:$vTi))]>;
def XSMADDQPO : X_VT5_VA5_VB5_FMA_Ro<63, 388, "xsmaddqpo",
[(set f128:$vT,
(int_ppc_fmaf128_round_to_odd
f128:$vA,f128:$vB,f128:$vTi))]>;
- def XSMSUBQP : X_VT5_VA5_VB5_FMA <63, 420, "xsmsubqp" ,
- [(set f128:$vT,
- (fma f128:$vA, f128:$vB,
- (fneg f128:$vTi)))]>;
def XSMSUBQPO : X_VT5_VA5_VB5_FMA_Ro<63, 420, "xsmsubqpo" ,
[(set f128:$vT,
(int_ppc_fmaf128_round_to_odd
f128:$vA, f128:$vB, (fneg f128:$vTi)))]>;
- def XSNMADDQP : X_VT5_VA5_VB5_FMA <63, 452, "xsnmaddqp",
- [(set f128:$vT,
- (fneg (fma f128:$vA, f128:$vB,
- f128:$vTi)))]>;
def XSNMADDQPO: X_VT5_VA5_VB5_FMA_Ro<63, 452, "xsnmaddqpo",
[(set f128:$vT,
(fneg (int_ppc_fmaf128_round_to_odd
f128:$vA, f128:$vB, f128:$vTi)))]>;
- def XSNMSUBQP : X_VT5_VA5_VB5_FMA <63, 484, "xsnmsubqp",
- [(set f128:$vT,
- (fneg (fma f128:$vA, f128:$vB,
- (fneg f128:$vTi))))]>;
def XSNMSUBQPO: X_VT5_VA5_VB5_FMA_Ro<63, 484, "xsnmsubqpo",
[(set f128:$vT,
(fneg (int_ppc_fmaf128_round_to_odd
@@ -2572,8 +2603,7 @@ let AddedComplexity = 400, Predicates = [HasP9Vector] in {
// DP/QP Compare Exponents
def XSCMPEXPDP : XX3Form_1<60, 59,
(outs crrc:$crD), (ins vsfrc:$XA, vsfrc:$XB),
- "xscmpexpdp $crD, $XA, $XB", IIC_FPCompare, []>,
- UseVSXReg;
+ "xscmpexpdp $crD, $XA, $XB", IIC_FPCompare, []>;
def XSCMPEXPQP : X_BF3_VA5_VB5<63, 164, "xscmpexpqp", []>;
// DP Compare ==, >=, >, !=
@@ -2631,7 +2661,6 @@ let AddedComplexity = 400, Predicates = [HasP9Vector] in {
def : Pat<(f128 (uint_to_fp (i32 (load xoaddr:$src)))),
(f128 (XSCVUDQP (LIWZX xoaddr:$src)))>;
- let UseVSXReg = 1 in {
//===--------------------------------------------------------------------===//
// Round to Floating-Point Integer Instructions
@@ -2648,8 +2677,6 @@ let AddedComplexity = 400, Predicates = [HasP9Vector] in {
[(set v4f32:$XT,
(int_ppc_vsx_xvcvsphp v4f32:$XB))]>;
- } // UseVSXReg = 1
-
// Pattern for matching Vector HP -> Vector SP intrinsic. Defined as a
// separate pattern so that it can convert the input register class from
// VRRC(v8i16) to VSRC.
@@ -2691,7 +2718,7 @@ let AddedComplexity = 400, Predicates = [HasP9Vector] in {
// Insert Exponent DP/QP
// XT NOTE: XT.dword[1] = 0xUUUU_UUUU_UUUU_UUUU
def XSIEXPDP : XX1Form <60, 918, (outs vsrc:$XT), (ins g8rc:$rA, g8rc:$rB),
- "xsiexpdp $XT, $rA, $rB", IIC_VecFP, []>, UseVSXReg;
+ "xsiexpdp $XT, $rA, $rB", IIC_VecFP, []>;
// vB NOTE: only vB.dword[0] is used, that's why we don't use
// X_VT5_VA5_VB5 form
def XSIEXPQP : XForm_18<63, 868, (outs vrrc:$vT), (ins vrrc:$vA, vsfrc:$vB),
@@ -2712,7 +2739,6 @@ let AddedComplexity = 400, Predicates = [HasP9Vector] in {
(v2i64 (XSXEXPQP $vA)), sub_64)))>;
// Vector Insert Word
- let UseVSXReg = 1 in {
// XB NOTE: Only XB.dword[1] is used, but we use vsrc on XB.
def XXINSERTW :
XX2_RD6_UIM5_RS6<60, 181, (outs vsrc:$XT),
@@ -2726,7 +2752,6 @@ let AddedComplexity = 400, Predicates = [HasP9Vector] in {
def XXEXTRACTUW : XX2_RD6_UIM5_RS6<60, 165,
(outs vsfrc:$XT), (ins vsrc:$XB, u4imm:$UIMM),
"xxextractuw $XT, $XB, $UIMM", IIC_VecFP, []>;
- } // UseVSXReg = 1
// Vector Insert Exponent DP/SP
def XVIEXPDP : XX3_XT5_XA5_XB5<60, 248, "xviexpdp", vsrc, vsrc, vsrc,
@@ -2759,20 +2784,17 @@ let AddedComplexity = 400, Predicates = [HasP9Vector] in {
//===--------------------------------------------------------------------===//
// Test Data Class SP/DP/QP
- let UseVSXReg = 1 in {
def XSTSTDCSP : XX2_BF3_DCMX7_RS6<60, 298,
(outs crrc:$BF), (ins u7imm:$DCMX, vsfrc:$XB),
"xststdcsp $BF, $XB, $DCMX", IIC_VecFP, []>;
def XSTSTDCDP : XX2_BF3_DCMX7_RS6<60, 362,
(outs crrc:$BF), (ins u7imm:$DCMX, vsfrc:$XB),
"xststdcdp $BF, $XB, $DCMX", IIC_VecFP, []>;
- } // UseVSXReg = 1
def XSTSTDCQP : X_BF3_DCMX7_RS5 <63, 708,
(outs crrc:$BF), (ins u7imm:$DCMX, vrrc:$vB),
"xststdcqp $BF, $vB, $DCMX", IIC_VecFP, []>;
// Vector Test Data Class SP/DP
- let UseVSXReg = 1 in {
def XVTSTDCSP : XX2_RD6_DCMX7_RS6<60, 13, 5,
(outs vsrc:$XT), (ins u7imm:$DCMX, vsrc:$XB),
"xvtstdcsp $XT, $XB, $DCMX", IIC_VecFP,
@@ -2783,7 +2805,6 @@ let AddedComplexity = 400, Predicates = [HasP9Vector] in {
"xvtstdcdp $XT, $XB, $DCMX", IIC_VecFP,
[(set v2i64: $XT,
(int_ppc_vsx_xvtstdcdp v2f64:$XB, imm:$DCMX))]>;
- } // UseVSXReg = 1
//===--------------------------------------------------------------------===//
@@ -2824,7 +2845,7 @@ let AddedComplexity = 400, Predicates = [HasP9Vector] in {
// Vector Splat Immediate Byte
def XXSPLTIB : X_RD6_IMM8<60, 360, (outs vsrc:$XT), (ins u8imm:$IMM8),
- "xxspltib $XT, $IMM8", IIC_VecPerm, []>, UseVSXReg;
+ "xxspltib $XT, $IMM8", IIC_VecPerm, []>;
//===--------------------------------------------------------------------===//
// Vector/Scalar Load/Store Instructions
@@ -2834,7 +2855,7 @@ let AddedComplexity = 400, Predicates = [HasP9Vector] in {
let mayLoad = 1, mayStore = 0 in {
// Load Vector
def LXV : DQ_RD6_RS5_DQ12<61, 1, (outs vsrc:$XT), (ins memrix16:$src),
- "lxv $XT, $src", IIC_LdStLFD, []>, UseVSXReg;
+ "lxv $XT, $src", IIC_LdStLFD, []>;
// Load DWord
def LXSD : DSForm_1<57, 2, (outs vfrc:$vD), (ins memrix:$src),
"lxsd $vD, $src", IIC_LdStLFD, []>;
@@ -2847,7 +2868,7 @@ let AddedComplexity = 400, Predicates = [HasP9Vector] in {
class X_XT6_RA5_RB5<bits<6> opcode, bits<10> xo, string opc,
RegisterOperand vtype, list<dag> pattern>
: XX1Form_memOp<opcode, xo, (outs vtype:$XT), (ins memrr:$src),
- !strconcat(opc, " $XT, $src"), IIC_LdStLFD, pattern>, UseVSXReg;
+ !strconcat(opc, " $XT, $src"), IIC_LdStLFD, pattern>;
// Load as Integer Byte/Halfword & Zero Indexed
def LXSIBZX : X_XT6_RA5_RB5<31, 781, "lxsibzx", vsfrc,
@@ -2861,16 +2882,14 @@ let AddedComplexity = 400, Predicates = [HasP9Vector] in {
// Load Vector Indexed
def LXVX : X_XT6_RA5_RB5<31, 268, "lxvx" , vsrc,
- [(set v2f64:$XT, (load xaddr:$src))]>;
+ [(set v2f64:$XT, (load xaddrX16:$src))]>;
// Load Vector (Left-justified) with Length
def LXVL : XX1Form_memOp<31, 269, (outs vsrc:$XT), (ins memr:$src, g8rc:$rB),
"lxvl $XT, $src, $rB", IIC_LdStLoad,
- [(set v4i32:$XT, (int_ppc_vsx_lxvl addr:$src, i64:$rB))]>,
- UseVSXReg;
+ [(set v4i32:$XT, (int_ppc_vsx_lxvl addr:$src, i64:$rB))]>;
def LXVLL : XX1Form_memOp<31,301, (outs vsrc:$XT), (ins memr:$src, g8rc:$rB),
"lxvll $XT, $src, $rB", IIC_LdStLoad,
- [(set v4i32:$XT, (int_ppc_vsx_lxvll addr:$src, i64:$rB))]>,
- UseVSXReg;
+ [(set v4i32:$XT, (int_ppc_vsx_lxvll addr:$src, i64:$rB))]>;
// Load Vector Word & Splat Indexed
def LXVWSX : X_XT6_RA5_RB5<31, 364, "lxvwsx" , vsrc, []>;
@@ -2881,7 +2900,7 @@ let AddedComplexity = 400, Predicates = [HasP9Vector] in {
let mayStore = 1, mayLoad = 0 in {
// Store Vector
def STXV : DQ_RD6_RS5_DQ12<61, 5, (outs), (ins vsrc:$XT, memrix16:$dst),
- "stxv $XT, $dst", IIC_LdStSTFD, []>, UseVSXReg;
+ "stxv $XT, $dst", IIC_LdStSTFD, []>;
// Store DWord
def STXSD : DSForm_1<61, 2, (outs), (ins vfrc:$vS, memrix:$dst),
"stxsd $vS, $dst", IIC_LdStSTFD, []>;
@@ -2893,7 +2912,7 @@ let AddedComplexity = 400, Predicates = [HasP9Vector] in {
class X_XS6_RA5_RB5<bits<6> opcode, bits<10> xo, string opc,
RegisterOperand vtype, list<dag> pattern>
: XX1Form_memOp<opcode, xo, (outs), (ins vtype:$XT, memrr:$dst),
- !strconcat(opc, " $XT, $dst"), IIC_LdStSTFD, pattern>, UseVSXReg;
+ !strconcat(opc, " $XT, $dst"), IIC_LdStSTFD, pattern>;
// Store as Integer Byte/Halfword Indexed
def STXSIBX : X_XS6_RA5_RB5<31, 909, "stxsibx" , vsfrc,
@@ -2901,8 +2920,8 @@ let AddedComplexity = 400, Predicates = [HasP9Vector] in {
def STXSIHX : X_XS6_RA5_RB5<31, 941, "stxsihx" , vsfrc,
[(PPCstxsix f64:$XT, xoaddr:$dst, 2)]>;
let isCodeGenOnly = 1 in {
- def STXSIBXv : X_XS6_RA5_RB5<31, 909, "stxsibx" , vrrc, []>;
- def STXSIHXv : X_XS6_RA5_RB5<31, 941, "stxsihx" , vrrc, []>;
+ def STXSIBXv : X_XS6_RA5_RB5<31, 909, "stxsibx" , vsrc, []>;
+ def STXSIHXv : X_XS6_RA5_RB5<31, 941, "stxsihx" , vsrc, []>;
}
// Store Vector Halfword*8/Byte*16 Indexed
@@ -2911,21 +2930,19 @@ let AddedComplexity = 400, Predicates = [HasP9Vector] in {
// Store Vector Indexed
def STXVX : X_XS6_RA5_RB5<31, 396, "stxvx" , vsrc,
- [(store v2f64:$XT, xaddr:$dst)]>;
+ [(store v2f64:$XT, xaddrX16:$dst)]>;
// Store Vector (Left-justified) with Length
def STXVL : XX1Form_memOp<31, 397, (outs),
(ins vsrc:$XT, memr:$dst, g8rc:$rB),
"stxvl $XT, $dst, $rB", IIC_LdStLoad,
[(int_ppc_vsx_stxvl v4i32:$XT, addr:$dst,
- i64:$rB)]>,
- UseVSXReg;
+ i64:$rB)]>;
def STXVLL : XX1Form_memOp<31, 429, (outs),
(ins vsrc:$XT, memr:$dst, g8rc:$rB),
"stxvll $XT, $dst, $rB", IIC_LdStLoad,
[(int_ppc_vsx_stxvll v4i32:$XT, addr:$dst,
- i64:$rB)]>,
- UseVSXReg;
+ i64:$rB)]>;
} // mayStore
let Predicates = [IsLittleEndian] in {
@@ -3045,24 +3062,24 @@ let AddedComplexity = 400, Predicates = [HasP9Vector] in {
} // IsLittleEndian, HasP9Vector
// D-Form Load/Store
- def : Pat<(v4i32 (quadwOffsetLoad iqaddr:$src)), (LXV memrix16:$src)>;
- def : Pat<(v4f32 (quadwOffsetLoad iqaddr:$src)), (LXV memrix16:$src)>;
- def : Pat<(v2i64 (quadwOffsetLoad iqaddr:$src)), (LXV memrix16:$src)>;
- def : Pat<(v2f64 (quadwOffsetLoad iqaddr:$src)), (LXV memrix16:$src)>;
- def : Pat<(f128 (quadwOffsetLoad iqaddr:$src)),
+ def : Pat<(v4i32 (quadwOffsetLoad iaddrX16:$src)), (LXV memrix16:$src)>;
+ def : Pat<(v4f32 (quadwOffsetLoad iaddrX16:$src)), (LXV memrix16:$src)>;
+ def : Pat<(v2i64 (quadwOffsetLoad iaddrX16:$src)), (LXV memrix16:$src)>;
+ def : Pat<(v2f64 (quadwOffsetLoad iaddrX16:$src)), (LXV memrix16:$src)>;
+ def : Pat<(f128 (quadwOffsetLoad iaddrX16:$src)),
(COPY_TO_REGCLASS (LXV memrix16:$src), VRRC)>;
- def : Pat<(v4i32 (int_ppc_vsx_lxvw4x iqaddr:$src)), (LXV memrix16:$src)>;
- def : Pat<(v2f64 (int_ppc_vsx_lxvd2x iqaddr:$src)), (LXV memrix16:$src)>;
+ def : Pat<(v4i32 (int_ppc_vsx_lxvw4x iaddrX16:$src)), (LXV memrix16:$src)>;
+ def : Pat<(v2f64 (int_ppc_vsx_lxvd2x iaddrX16:$src)), (LXV memrix16:$src)>;
- def : Pat<(quadwOffsetStore v4f32:$rS, iqaddr:$dst), (STXV $rS, memrix16:$dst)>;
- def : Pat<(quadwOffsetStore v4i32:$rS, iqaddr:$dst), (STXV $rS, memrix16:$dst)>;
- def : Pat<(quadwOffsetStore v2f64:$rS, iqaddr:$dst), (STXV $rS, memrix16:$dst)>;
- def : Pat<(quadwOffsetStore f128:$rS, iqaddr:$dst),
+ def : Pat<(quadwOffsetStore v4f32:$rS, iaddrX16:$dst), (STXV $rS, memrix16:$dst)>;
+ def : Pat<(quadwOffsetStore v4i32:$rS, iaddrX16:$dst), (STXV $rS, memrix16:$dst)>;
+ def : Pat<(quadwOffsetStore v2f64:$rS, iaddrX16:$dst), (STXV $rS, memrix16:$dst)>;
+ def : Pat<(quadwOffsetStore f128:$rS, iaddrX16:$dst),
(STXV (COPY_TO_REGCLASS $rS, VSRC), memrix16:$dst)>;
- def : Pat<(quadwOffsetStore v2i64:$rS, iqaddr:$dst), (STXV $rS, memrix16:$dst)>;
- def : Pat<(int_ppc_vsx_stxvw4x v4i32:$rS, iqaddr:$dst),
+ def : Pat<(quadwOffsetStore v2i64:$rS, iaddrX16:$dst), (STXV $rS, memrix16:$dst)>;
+ def : Pat<(int_ppc_vsx_stxvw4x v4i32:$rS, iaddrX16:$dst),
(STXV $rS, memrix16:$dst)>;
- def : Pat<(int_ppc_vsx_stxvd2x v2f64:$rS, iqaddr:$dst),
+ def : Pat<(int_ppc_vsx_stxvd2x v2f64:$rS, iaddrX16:$dst),
(STXV $rS, memrix16:$dst)>;
@@ -3159,109 +3176,109 @@ let AddedComplexity = 400, Predicates = [HasP9Vector] in {
let Predicates = [IsBigEndian, HasP9Vector] in {
// Scalar stores of i8
def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 0)), xoaddr:$dst),
- (STXSIBXv (v16i8 (VSLDOI $S, $S, 9)), xoaddr:$dst)>;
+ (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 9)), VSRC), xoaddr:$dst)>;
def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 1)), xoaddr:$dst),
- (STXSIBXv (v16i8 (VSLDOI $S, $S, 10)), xoaddr:$dst)>;
+ (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 10)), VSRC), xoaddr:$dst)>;
def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 2)), xoaddr:$dst),
- (STXSIBXv (v16i8 (VSLDOI $S, $S, 11)), xoaddr:$dst)>;
+ (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 11)), VSRC), xoaddr:$dst)>;
def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 3)), xoaddr:$dst),
- (STXSIBXv (v16i8 (VSLDOI $S, $S, 12)), xoaddr:$dst)>;
+ (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 12)), VSRC), xoaddr:$dst)>;
def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 4)), xoaddr:$dst),
- (STXSIBXv (v16i8 (VSLDOI $S, $S, 13)), xoaddr:$dst)>;
+ (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 13)), VSRC), xoaddr:$dst)>;
def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 5)), xoaddr:$dst),
- (STXSIBXv (v16i8 (VSLDOI $S, $S, 14)), xoaddr:$dst)>;
+ (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 14)), VSRC), xoaddr:$dst)>;
def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 6)), xoaddr:$dst),
- (STXSIBXv (v16i8 (VSLDOI $S, $S, 15)), xoaddr:$dst)>;
+ (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 15)), VSRC), xoaddr:$dst)>;
def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 7)), xoaddr:$dst),
- (STXSIBXv $S, xoaddr:$dst)>;
+ (STXSIBXv (COPY_TO_REGCLASS $S, VSRC), xoaddr:$dst)>;
def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 8)), xoaddr:$dst),
- (STXSIBXv (v16i8 (VSLDOI $S, $S, 1)), xoaddr:$dst)>;
+ (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 1)), VSRC), xoaddr:$dst)>;
def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 9)), xoaddr:$dst),
- (STXSIBXv (v16i8 (VSLDOI $S, $S, 2)), xoaddr:$dst)>;
+ (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 2)), VSRC), xoaddr:$dst)>;
def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 10)), xoaddr:$dst),
- (STXSIBXv (v16i8 (VSLDOI $S, $S, 3)), xoaddr:$dst)>;
+ (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 3)), VSRC), xoaddr:$dst)>;
def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 11)), xoaddr:$dst),
- (STXSIBXv (v16i8 (VSLDOI $S, $S, 4)), xoaddr:$dst)>;
+ (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 4)), VSRC), xoaddr:$dst)>;
def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 12)), xoaddr:$dst),
- (STXSIBXv (v16i8 (VSLDOI $S, $S, 5)), xoaddr:$dst)>;
+ (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 5)), VSRC), xoaddr:$dst)>;
def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 13)), xoaddr:$dst),
- (STXSIBXv (v16i8 (VSLDOI $S, $S, 6)), xoaddr:$dst)>;
+ (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 6)), VSRC), xoaddr:$dst)>;
def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 14)), xoaddr:$dst),
- (STXSIBXv (v16i8 (VSLDOI $S, $S, 7)), xoaddr:$dst)>;
+ (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 7)), VSRC), xoaddr:$dst)>;
def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 15)), xoaddr:$dst),
- (STXSIBXv (v16i8 (VSLDOI $S, $S, 8)), xoaddr:$dst)>;
+ (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 8)), VSRC), xoaddr:$dst)>;
// Scalar stores of i16
def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 0)), xoaddr:$dst),
- (STXSIHXv (v16i8 (VSLDOI $S, $S, 10)), xoaddr:$dst)>;
+ (STXSIHXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 10)), VSRC), xoaddr:$dst)>;
def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 1)), xoaddr:$dst),
- (STXSIHXv (v16i8 (VSLDOI $S, $S, 12)), xoaddr:$dst)>;
+ (STXSIHXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 12)), VSRC), xoaddr:$dst)>;
def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 2)), xoaddr:$dst),
- (STXSIHXv (v16i8 (VSLDOI $S, $S, 14)), xoaddr:$dst)>;
+ (STXSIHXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 14)), VSRC), xoaddr:$dst)>;
def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 3)), xoaddr:$dst),
- (STXSIHXv $S, xoaddr:$dst)>;
+ (STXSIHXv (COPY_TO_REGCLASS $S, VSRC), xoaddr:$dst)>;
def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 4)), xoaddr:$dst),
- (STXSIHXv (v16i8 (VSLDOI $S, $S, 2)), xoaddr:$dst)>;
+ (STXSIHXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 2)), VSRC), xoaddr:$dst)>;
def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 5)), xoaddr:$dst),
- (STXSIHXv (v16i8 (VSLDOI $S, $S, 4)), xoaddr:$dst)>;
+ (STXSIHXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 4)), VSRC), xoaddr:$dst)>;
def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 6)), xoaddr:$dst),
- (STXSIHXv (v16i8 (VSLDOI $S, $S, 6)), xoaddr:$dst)>;
+ (STXSIHXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 6)), VSRC), xoaddr:$dst)>;
def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 7)), xoaddr:$dst),
- (STXSIHXv (v16i8 (VSLDOI $S, $S, 8)), xoaddr:$dst)>;
+ (STXSIHXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 8)), VSRC), xoaddr:$dst)>;
} // IsBigEndian, HasP9Vector
let Predicates = [IsLittleEndian, HasP9Vector] in {
// Scalar stores of i8
def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 0)), xoaddr:$dst),
- (STXSIBXv (v16i8 (VSLDOI $S, $S, 8)), xoaddr:$dst)>;
+ (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 8)), VSRC), xoaddr:$dst)>;
def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 1)), xoaddr:$dst),
- (STXSIBXv (v16i8 (VSLDOI $S, $S, 7)), xoaddr:$dst)>;
+ (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 7)), VSRC), xoaddr:$dst)>;
def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 2)), xoaddr:$dst),
- (STXSIBXv (v16i8 (VSLDOI $S, $S, 6)), xoaddr:$dst)>;
+ (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 6)), VSRC), xoaddr:$dst)>;
def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 3)), xoaddr:$dst),
- (STXSIBXv (v16i8 (VSLDOI $S, $S, 5)), xoaddr:$dst)>;
+ (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 5)), VSRC), xoaddr:$dst)>;
def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 4)), xoaddr:$dst),
- (STXSIBXv (v16i8 (VSLDOI $S, $S, 4)), xoaddr:$dst)>;
+ (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 4)), VSRC), xoaddr:$dst)>;
def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 5)), xoaddr:$dst),
- (STXSIBXv (v16i8 (VSLDOI $S, $S, 3)), xoaddr:$dst)>;
+ (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 3)), VSRC), xoaddr:$dst)>;
def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 6)), xoaddr:$dst),
- (STXSIBXv (v16i8 (VSLDOI $S, $S, 2)), xoaddr:$dst)>;
+ (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 2)), VSRC), xoaddr:$dst)>;
def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 7)), xoaddr:$dst),
- (STXSIBXv (v16i8 (VSLDOI $S, $S, 1)), xoaddr:$dst)>;
+ (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 1)), VSRC), xoaddr:$dst)>;
def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 8)), xoaddr:$dst),
- (STXSIBXv $S, xoaddr:$dst)>;
+ (STXSIBXv (COPY_TO_REGCLASS $S, VSRC), xoaddr:$dst)>;
def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 9)), xoaddr:$dst),
- (STXSIBXv (v16i8 (VSLDOI $S, $S, 15)), xoaddr:$dst)>;
+ (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 15)), VSRC), xoaddr:$dst)>;
def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 10)), xoaddr:$dst),
- (STXSIBXv (v16i8 (VSLDOI $S, $S, 14)), xoaddr:$dst)>;
+ (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 14)), VSRC), xoaddr:$dst)>;
def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 11)), xoaddr:$dst),
- (STXSIBXv (v16i8 (VSLDOI $S, $S, 13)), xoaddr:$dst)>;
+ (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 13)), VSRC), xoaddr:$dst)>;
def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 12)), xoaddr:$dst),
- (STXSIBXv (v16i8 (VSLDOI $S, $S, 12)), xoaddr:$dst)>;
+ (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 12)), VSRC), xoaddr:$dst)>;
def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 13)), xoaddr:$dst),
- (STXSIBXv (v16i8 (VSLDOI $S, $S, 11)), xoaddr:$dst)>;
+ (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 11)), VSRC), xoaddr:$dst)>;
def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 14)), xoaddr:$dst),
- (STXSIBXv (v16i8 (VSLDOI $S, $S, 10)), xoaddr:$dst)>;
+ (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 10)), VSRC), xoaddr:$dst)>;
def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 15)), xoaddr:$dst),
- (STXSIBXv (v16i8 (VSLDOI $S, $S, 9)), xoaddr:$dst)>;
+ (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 9)), VSRC), xoaddr:$dst)>;
// Scalar stores of i16
def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 0)), xoaddr:$dst),
- (STXSIHXv (v16i8 (VSLDOI $S, $S, 8)), xoaddr:$dst)>;
+ (STXSIHXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 8)), VSRC), xoaddr:$dst)>;
def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 1)), xoaddr:$dst),
- (STXSIHXv (v16i8 (VSLDOI $S, $S, 6)), xoaddr:$dst)>;
+ (STXSIHXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 6)), VSRC), xoaddr:$dst)>;
def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 2)), xoaddr:$dst),
- (STXSIHXv (v16i8 (VSLDOI $S, $S, 4)), xoaddr:$dst)>;
+ (STXSIHXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 4)), VSRC), xoaddr:$dst)>;
def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 3)), xoaddr:$dst),
- (STXSIHXv (v16i8 (VSLDOI $S, $S, 2)), xoaddr:$dst)>;
+ (STXSIHXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 2)), VSRC), xoaddr:$dst)>;
def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 4)), xoaddr:$dst),
- (STXSIHXv $S, xoaddr:$dst)>;
+ (STXSIHXv (COPY_TO_REGCLASS $S, VSRC), xoaddr:$dst)>;
def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 5)), xoaddr:$dst),
- (STXSIHXv (v16i8 (VSLDOI $S, $S, 14)), xoaddr:$dst)>;
+ (STXSIHXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 14)), VSRC), xoaddr:$dst)>;
def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 6)), xoaddr:$dst),
- (STXSIHXv (v16i8 (VSLDOI $S, $S, 12)), xoaddr:$dst)>;
+ (STXSIHXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 12)), VSRC), xoaddr:$dst)>;
def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 7)), xoaddr:$dst),
- (STXSIHXv (v16i8 (VSLDOI $S, $S, 10)), xoaddr:$dst)>;
+ (STXSIHXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 10)), VSRC), xoaddr:$dst)>;
} // IsLittleEndian, HasP9Vector
@@ -3273,53 +3290,97 @@ let AddedComplexity = 400, Predicates = [HasP9Vector] in {
def DFLOADf32 : PPCPostRAExpPseudo<(outs vssrc:$XT), (ins memrix:$src),
"#DFLOADf32",
- [(set f32:$XT, (load ixaddr:$src))]>;
+ [(set f32:$XT, (load iaddrX4:$src))]>;
def DFLOADf64 : PPCPostRAExpPseudo<(outs vsfrc:$XT), (ins memrix:$src),
"#DFLOADf64",
- [(set f64:$XT, (load ixaddr:$src))]>;
+ [(set f64:$XT, (load iaddrX4:$src))]>;
def DFSTOREf32 : PPCPostRAExpPseudo<(outs), (ins vssrc:$XT, memrix:$dst),
"#DFSTOREf32",
- [(store f32:$XT, ixaddr:$dst)]>;
+ [(store f32:$XT, iaddrX4:$dst)]>;
def DFSTOREf64 : PPCPostRAExpPseudo<(outs), (ins vsfrc:$XT, memrix:$dst),
"#DFSTOREf64",
- [(store f64:$XT, ixaddr:$dst)]>;
+ [(store f64:$XT, iaddrX4:$dst)]>;
- def : Pat<(f64 (extloadf32 ixaddr:$src)),
- (COPY_TO_REGCLASS (DFLOADf32 ixaddr:$src), VSFRC)>;
- def : Pat<(f32 (fpround (f64 (extloadf32 ixaddr:$src)))),
- (f32 (DFLOADf32 ixaddr:$src))>;
+ def : Pat<(f64 (extloadf32 iaddrX4:$src)),
+ (COPY_TO_REGCLASS (DFLOADf32 iaddrX4:$src), VSFRC)>;
+ def : Pat<(f32 (fpround (f64 (extloadf32 iaddrX4:$src)))),
+ (f32 (DFLOADf32 iaddrX4:$src))>;
+ def : Pat<(v4f32 (PPCldvsxlh xaddr:$src)),
+ (COPY_TO_REGCLASS (XFLOADf64 xaddr:$src), VSRC)>;
+ def : Pat<(v4f32 (PPCldvsxlh iaddrX4:$src)),
+ (COPY_TO_REGCLASS (DFLOADf64 iaddrX4:$src), VSRC)>;
let AddedComplexity = 400 in {
// The following pseudoinstructions are used to ensure the utilization
// of all 64 VSX registers.
let Predicates = [IsLittleEndian, HasP9Vector] in {
- def : Pat<(v2i64 (scalar_to_vector (i64 (load ixaddr:$src)))),
+ def : Pat<(v2i64 (scalar_to_vector (i64 (load iaddrX4:$src)))),
(v2i64 (XXPERMDIs
- (COPY_TO_REGCLASS (DFLOADf64 ixaddr:$src), VSRC), 2))>;
- def : Pat<(v2i64 (scalar_to_vector (i64 (load xaddr:$src)))),
+ (COPY_TO_REGCLASS (DFLOADf64 iaddrX4:$src), VSRC), 2))>;
+ def : Pat<(v2i64 (scalar_to_vector (i64 (load xaddrX4:$src)))),
(v2i64 (XXPERMDIs
- (COPY_TO_REGCLASS (XFLOADf64 xaddr:$src), VSRC), 2))>;
+ (COPY_TO_REGCLASS (XFLOADf64 xaddrX4:$src), VSRC), 2))>;
- def : Pat<(v2f64 (scalar_to_vector (f64 (load ixaddr:$src)))),
+ def : Pat<(v2f64 (scalar_to_vector (f64 (load iaddrX4:$src)))),
(v2f64 (XXPERMDIs
- (COPY_TO_REGCLASS (DFLOADf64 ixaddr:$src), VSRC), 2))>;
- def : Pat<(v2f64 (scalar_to_vector (f64 (load xaddr:$src)))),
+ (COPY_TO_REGCLASS (DFLOADf64 iaddrX4:$src), VSRC), 2))>;
+ def : Pat<(v2f64 (scalar_to_vector (f64 (load xaddrX4:$src)))),
(v2f64 (XXPERMDIs
- (COPY_TO_REGCLASS (XFLOADf64 xaddr:$src), VSRC), 2))>;
- }
+ (COPY_TO_REGCLASS (XFLOADf64 xaddrX4:$src), VSRC), 2))>;
+ def : Pat<(store (i64 (extractelt v2i64:$A, 0)), xaddrX4:$src),
+ (XFSTOREf64 (EXTRACT_SUBREG (XXPERMDI $A, $A, 2),
+ sub_64), xaddrX4:$src)>;
+ def : Pat<(store (f64 (extractelt v2f64:$A, 0)), xaddrX4:$src),
+ (XFSTOREf64 (EXTRACT_SUBREG (XXPERMDI $A, $A, 2),
+ sub_64), xaddrX4:$src)>;
+ def : Pat<(store (i64 (extractelt v2i64:$A, 1)), xaddrX4:$src),
+ (XFSTOREf64 (EXTRACT_SUBREG $A, sub_64), xaddrX4:$src)>;
+ def : Pat<(store (f64 (extractelt v2f64:$A, 1)), xaddrX4:$src),
+ (XFSTOREf64 (EXTRACT_SUBREG $A, sub_64), xaddrX4:$src)>;
+ def : Pat<(store (i64 (extractelt v2i64:$A, 0)), iaddrX4:$src),
+ (DFSTOREf64 (EXTRACT_SUBREG (XXPERMDI $A, $A, 2),
+ sub_64), iaddrX4:$src)>;
+ def : Pat<(store (f64 (extractelt v2f64:$A, 0)), iaddrX4:$src),
+ (DFSTOREf64 (EXTRACT_SUBREG (XXPERMDI $A, $A, 2), sub_64),
+ iaddrX4:$src)>;
+ def : Pat<(store (i64 (extractelt v2i64:$A, 1)), iaddrX4:$src),
+ (DFSTOREf64 (EXTRACT_SUBREG $A, sub_64), iaddrX4:$src)>;
+ def : Pat<(store (f64 (extractelt v2f64:$A, 1)), iaddrX4:$src),
+ (DFSTOREf64 (EXTRACT_SUBREG $A, sub_64), iaddrX4:$src)>;
+ } // IsLittleEndian, HasP9Vector
let Predicates = [IsBigEndian, HasP9Vector] in {
- def : Pat<(v2i64 (scalar_to_vector (i64 (load ixaddr:$src)))),
- (v2i64 (COPY_TO_REGCLASS (DFLOADf64 ixaddr:$src), VSRC))>;
- def : Pat<(v2i64 (scalar_to_vector (i64 (load xaddr:$src)))),
- (v2i64 (COPY_TO_REGCLASS (XFLOADf64 xaddr:$src), VSRC))>;
-
- def : Pat<(v2f64 (scalar_to_vector (f64 (load ixaddr:$src)))),
- (v2f64 (COPY_TO_REGCLASS (DFLOADf64 ixaddr:$src), VSRC))>;
- def : Pat<(v2f64 (scalar_to_vector (f64 (load xaddr:$src)))),
- (v2f64 (COPY_TO_REGCLASS (XFLOADf64 xaddr:$src), VSRC))>;
- }
+ def : Pat<(v2i64 (scalar_to_vector (i64 (load iaddrX4:$src)))),
+ (v2i64 (COPY_TO_REGCLASS (DFLOADf64 iaddrX4:$src), VSRC))>;
+ def : Pat<(v2i64 (scalar_to_vector (i64 (load xaddrX4:$src)))),
+ (v2i64 (COPY_TO_REGCLASS (XFLOADf64 xaddrX4:$src), VSRC))>;
+
+ def : Pat<(v2f64 (scalar_to_vector (f64 (load iaddrX4:$src)))),
+ (v2f64 (COPY_TO_REGCLASS (DFLOADf64 iaddrX4:$src), VSRC))>;
+ def : Pat<(v2f64 (scalar_to_vector (f64 (load xaddrX4:$src)))),
+ (v2f64 (COPY_TO_REGCLASS (XFLOADf64 xaddrX4:$src), VSRC))>;
+ def : Pat<(store (i64 (extractelt v2i64:$A, 1)), xaddrX4:$src),
+ (XFSTOREf64 (EXTRACT_SUBREG (XXPERMDI $A, $A, 2),
+ sub_64), xaddrX4:$src)>;
+ def : Pat<(store (f64 (extractelt v2f64:$A, 1)), xaddrX4:$src),
+ (XFSTOREf64 (EXTRACT_SUBREG (XXPERMDI $A, $A, 2),
+ sub_64), xaddrX4:$src)>;
+ def : Pat<(store (i64 (extractelt v2i64:$A, 0)), xaddrX4:$src),
+ (XFSTOREf64 (EXTRACT_SUBREG $A, sub_64), xaddrX4:$src)>;
+ def : Pat<(store (f64 (extractelt v2f64:$A, 0)), xaddrX4:$src),
+ (XFSTOREf64 (EXTRACT_SUBREG $A, sub_64), xaddrX4:$src)>;
+ def : Pat<(store (i64 (extractelt v2i64:$A, 1)), iaddrX4:$src),
+ (DFSTOREf64 (EXTRACT_SUBREG (XXPERMDI $A, $A, 2),
+ sub_64), iaddrX4:$src)>;
+ def : Pat<(store (f64 (extractelt v2f64:$A, 1)), iaddrX4:$src),
+ (DFSTOREf64 (EXTRACT_SUBREG (XXPERMDI $A, $A, 2),
+ sub_64), iaddrX4:$src)>;
+ def : Pat<(store (i64 (extractelt v2i64:$A, 0)), iaddrX4:$src),
+ (DFSTOREf64 (EXTRACT_SUBREG $A, sub_64), iaddrX4:$src)>;
+ def : Pat<(store (f64 (extractelt v2f64:$A, 0)), iaddrX4:$src),
+ (DFSTOREf64 (EXTRACT_SUBREG $A, sub_64), iaddrX4:$src)>;
+ } // IsBigEndian, HasP9Vector
}
let Predicates = [IsBigEndian, HasP9Vector] in {
@@ -3455,14 +3516,14 @@ let AddedComplexity = 400, Predicates = [HasP9Vector] in {
} // IsLittleEndian, HasP9Vector
// Convert (Un)Signed DWord in memory -> QP
- def : Pat<(f128 (sint_to_fp (i64 (load xaddr:$src)))),
- (f128 (XSCVSDQP (LXSDX xaddr:$src)))>;
- def : Pat<(f128 (sint_to_fp (i64 (load ixaddr:$src)))),
- (f128 (XSCVSDQP (LXSD ixaddr:$src)))>;
- def : Pat<(f128 (uint_to_fp (i64 (load xaddr:$src)))),
- (f128 (XSCVUDQP (LXSDX xaddr:$src)))>;
- def : Pat<(f128 (uint_to_fp (i64 (load ixaddr:$src)))),
- (f128 (XSCVUDQP (LXSD ixaddr:$src)))>;
+ def : Pat<(f128 (sint_to_fp (i64 (load xaddrX4:$src)))),
+ (f128 (XSCVSDQP (LXSDX xaddrX4:$src)))>;
+ def : Pat<(f128 (sint_to_fp (i64 (load iaddrX4:$src)))),
+ (f128 (XSCVSDQP (LXSD iaddrX4:$src)))>;
+ def : Pat<(f128 (uint_to_fp (i64 (load xaddrX4:$src)))),
+ (f128 (XSCVUDQP (LXSDX xaddrX4:$src)))>;
+ def : Pat<(f128 (uint_to_fp (i64 (load iaddrX4:$src)))),
+ (f128 (XSCVUDQP (LXSD iaddrX4:$src)))>;
// Convert Unsigned HWord in memory -> QP
def : Pat<(f128 (uint_to_fp ScalarLoads.ZELi16)),
@@ -3483,13 +3544,13 @@ let AddedComplexity = 400, Predicates = [HasP9Vector] in {
// Instructions for store(fptosi).
// The 8-byte version is repeated here due to availability of D-Form STXSD.
def : Pat<(PPCstore_scal_int_from_vsr
- (f64 (PPCcv_fp_to_sint_in_vsr f128:$src)), xaddr:$dst, 8),
+ (f64 (PPCcv_fp_to_sint_in_vsr f128:$src)), xaddrX4:$dst, 8),
(STXSDX (COPY_TO_REGCLASS (XSCVQPSDZ f128:$src), VFRC),
- xaddr:$dst)>;
+ xaddrX4:$dst)>;
def : Pat<(PPCstore_scal_int_from_vsr
- (f64 (PPCcv_fp_to_sint_in_vsr f128:$src)), ixaddr:$dst, 8),
+ (f64 (PPCcv_fp_to_sint_in_vsr f128:$src)), iaddrX4:$dst, 8),
(STXSD (COPY_TO_REGCLASS (XSCVQPSDZ f128:$src), VFRC),
- ixaddr:$dst)>;
+ iaddrX4:$dst)>;
def : Pat<(PPCstore_scal_int_from_vsr
(f64 (PPCcv_fp_to_sint_in_vsr f128:$src)), xoaddr:$dst, 4),
(STXSIWX (COPY_TO_REGCLASS (XSCVQPSWZ $src), VFRC), xoaddr:$dst)>;
@@ -3500,11 +3561,11 @@ let AddedComplexity = 400, Predicates = [HasP9Vector] in {
(f64 (PPCcv_fp_to_sint_in_vsr f128:$src)), xoaddr:$dst, 1),
(STXSIBX (COPY_TO_REGCLASS (XSCVQPSWZ $src), VFRC), xoaddr:$dst)>;
def : Pat<(PPCstore_scal_int_from_vsr
- (f64 (PPCcv_fp_to_sint_in_vsr f64:$src)), xaddr:$dst, 8),
- (STXSDX (XSCVDPSXDS f64:$src), xaddr:$dst)>;
+ (f64 (PPCcv_fp_to_sint_in_vsr f64:$src)), xaddrX4:$dst, 8),
+ (STXSDX (XSCVDPSXDS f64:$src), xaddrX4:$dst)>;
def : Pat<(PPCstore_scal_int_from_vsr
- (f64 (PPCcv_fp_to_sint_in_vsr f64:$src)), ixaddr:$dst, 8),
- (STXSD (XSCVDPSXDS f64:$src), ixaddr:$dst)>;
+ (f64 (PPCcv_fp_to_sint_in_vsr f64:$src)), iaddrX4:$dst, 8),
+ (STXSD (XSCVDPSXDS f64:$src), iaddrX4:$dst)>;
def : Pat<(PPCstore_scal_int_from_vsr
(f64 (PPCcv_fp_to_sint_in_vsr f64:$src)), xoaddr:$dst, 2),
(STXSIHX (XSCVDPSXWS f64:$src), xoaddr:$dst)>;
@@ -3514,13 +3575,13 @@ let AddedComplexity = 400, Predicates = [HasP9Vector] in {
// Instructions for store(fptoui).
def : Pat<(PPCstore_scal_int_from_vsr
- (f64 (PPCcv_fp_to_uint_in_vsr f128:$src)), xaddr:$dst, 8),
+ (f64 (PPCcv_fp_to_uint_in_vsr f128:$src)), xaddrX4:$dst, 8),
(STXSDX (COPY_TO_REGCLASS (XSCVQPUDZ f128:$src), VFRC),
- xaddr:$dst)>;
+ xaddrX4:$dst)>;
def : Pat<(PPCstore_scal_int_from_vsr
- (f64 (PPCcv_fp_to_uint_in_vsr f128:$src)), ixaddr:$dst, 8),
+ (f64 (PPCcv_fp_to_uint_in_vsr f128:$src)), iaddrX4:$dst, 8),
(STXSD (COPY_TO_REGCLASS (XSCVQPUDZ f128:$src), VFRC),
- ixaddr:$dst)>;
+ iaddrX4:$dst)>;
def : Pat<(PPCstore_scal_int_from_vsr
(f64 (PPCcv_fp_to_uint_in_vsr f128:$src)), xoaddr:$dst, 4),
(STXSIWX (COPY_TO_REGCLASS (XSCVQPUWZ $src), VFRC), xoaddr:$dst)>;
@@ -3531,11 +3592,11 @@ let AddedComplexity = 400, Predicates = [HasP9Vector] in {
(f64 (PPCcv_fp_to_uint_in_vsr f128:$src)), xoaddr:$dst, 1),
(STXSIBX (COPY_TO_REGCLASS (XSCVQPUWZ $src), VFRC), xoaddr:$dst)>;
def : Pat<(PPCstore_scal_int_from_vsr
- (f64 (PPCcv_fp_to_uint_in_vsr f64:$src)), xaddr:$dst, 8),
- (STXSDX (XSCVDPUXDS f64:$src), xaddr:$dst)>;
+ (f64 (PPCcv_fp_to_uint_in_vsr f64:$src)), xaddrX4:$dst, 8),
+ (STXSDX (XSCVDPUXDS f64:$src), xaddrX4:$dst)>;
def : Pat<(PPCstore_scal_int_from_vsr
- (f64 (PPCcv_fp_to_uint_in_vsr f64:$src)), ixaddr:$dst, 8),
- (STXSD (XSCVDPUXDS f64:$src), ixaddr:$dst)>;
+ (f64 (PPCcv_fp_to_uint_in_vsr f64:$src)), iaddrX4:$dst, 8),
+ (STXSD (XSCVDPUXDS f64:$src), iaddrX4:$dst)>;
def : Pat<(PPCstore_scal_int_from_vsr
(f64 (PPCcv_fp_to_uint_in_vsr f64:$src)), xoaddr:$dst, 2),
(STXSIHX (XSCVDPUXWS f64:$src), xoaddr:$dst)>;
@@ -3668,13 +3729,13 @@ def FltToLongLoad {
dag A = (i64 (PPCmfvsr (PPCfctidz (f64 (extloadf32 xoaddr:$A)))));
}
def FltToLongLoadP9 {
- dag A = (i64 (PPCmfvsr (PPCfctidz (f64 (extloadf32 ixaddr:$A)))));
+ dag A = (i64 (PPCmfvsr (PPCfctidz (f64 (extloadf32 iaddrX4:$A)))));
}
def FltToULongLoad {
dag A = (i64 (PPCmfvsr (PPCfctiduz (f64 (extloadf32 xoaddr:$A)))));
}
def FltToULongLoadP9 {
- dag A = (i64 (PPCmfvsr (PPCfctiduz (f64 (extloadf32 ixaddr:$A)))));
+ dag A = (i64 (PPCmfvsr (PPCfctiduz (f64 (extloadf32 iaddrX4:$A)))));
}
def FltToLong {
dag A = (i64 (PPCmfvsr (f64 (PPCfctidz (fpextend f32:$A)))));
@@ -3704,13 +3765,13 @@ def DblToIntLoad {
dag A = (i32 (PPCmfvsr (PPCfctiwz (f64 (load xoaddr:$A)))));
}
def DblToIntLoadP9 {
- dag A = (i32 (PPCmfvsr (PPCfctiwz (f64 (load ixaddr:$A)))));
+ dag A = (i32 (PPCmfvsr (PPCfctiwz (f64 (load iaddrX4:$A)))));
}
def DblToUIntLoad {
dag A = (i32 (PPCmfvsr (PPCfctiwuz (f64 (load xoaddr:$A)))));
}
def DblToUIntLoadP9 {
- dag A = (i32 (PPCmfvsr (PPCfctiwuz (f64 (load ixaddr:$A)))));
+ dag A = (i32 (PPCmfvsr (PPCfctiwuz (f64 (load iaddrX4:$A)))));
}
def DblToLongLoad {
dag A = (i64 (PPCmfvsr (PPCfctidz (f64 (load xoaddr:$A)))));
@@ -3834,8 +3895,38 @@ let AddedComplexity = 400 in {
def : Pat<DWToSPExtractConv.BVS,
(v4f32 (VPKUDUM (XXSLDWI (XVCVSXDSP $S1), (XVCVSXDSP $S1), 3),
(XXSLDWI (XVCVSXDSP $S2), (XVCVSXDSP $S2), 3)))>;
+ def : Pat<(store (i32 (extractelt v4i32:$A, 1)), xoaddr:$src),
+ (STIWX (EXTRACT_SUBREG $A, sub_64), xoaddr:$src)>;
+ def : Pat<(store (f32 (extractelt v4f32:$A, 1)), xoaddr:$src),
+ (STIWX (EXTRACT_SUBREG $A, sub_64), xoaddr:$src)>;
+
+ // Elements in a register on a BE system are in order <0, 1, 2, 3>.
+ // The store instructions store the second word from the left.
+ // So to align element zero, we need to modulo-left-shift by 3 words.
+ // Similar logic applies for elements 2 and 3.
+ foreach Idx = [ [0,3], [2,1], [3,2] ] in {
+ def : Pat<(store (i32 (extractelt v4i32:$A, !head(Idx))), xoaddr:$src),
+ (STIWX (EXTRACT_SUBREG (XXSLDWI $A, $A, !head(!tail(Idx))),
+ sub_64), xoaddr:$src)>;
+ def : Pat<(store (f32 (extractelt v4f32:$A, !head(Idx))), xoaddr:$src),
+ (STIWX (EXTRACT_SUBREG (XXSLDWI $A, $A, !head(!tail(Idx))),
+ sub_64), xoaddr:$src)>;
+ }
}
+ let Predicates = [HasP8Vector, IsBigEndian, NoP9Vector] in {
+ def : Pat<(store (i64 (extractelt v2i64:$A, 0)), xoaddr:$src),
+ (XFSTOREf64 (EXTRACT_SUBREG $A, sub_64), xoaddr:$src)>;
+ def : Pat<(store (f64 (extractelt v2f64:$A, 0)), xoaddr:$src),
+ (XFSTOREf64 (EXTRACT_SUBREG $A, sub_64), xoaddr:$src)>;
+ def : Pat<(store (i64 (extractelt v2i64:$A, 1)), xoaddr:$src),
+ (XFSTOREf64 (EXTRACT_SUBREG (XXPERMDI $A, $A, 2), sub_64),
+ xoaddr:$src)>;
+ def : Pat<(store (f64 (extractelt v2f64:$A, 1)), xoaddr:$src),
+ (XFSTOREf64 (EXTRACT_SUBREG (XXPERMDI $A, $A, 2), sub_64),
+ xoaddr:$src)>;
+ }
+
// Big endian, available on all targets with VSX
let Predicates = [IsBigEndian, HasVSX] in {
def : Pat<(v2f64 (build_vector f64:$A, f64:$B)),
@@ -3871,8 +3962,38 @@ let AddedComplexity = 400 in {
def : Pat<DWToSPExtractConv.BVS,
(v4f32 (VPKUDUM (XXSLDWI (XVCVSXDSP $S2), (XVCVSXDSP $S2), 3),
(XXSLDWI (XVCVSXDSP $S1), (XVCVSXDSP $S1), 3)))>;
+ def : Pat<(store (i32 (extractelt v4i32:$A, 2)), xoaddr:$src),
+ (STIWX (EXTRACT_SUBREG $A, sub_64), xoaddr:$src)>;
+ def : Pat<(store (f32 (extractelt v4f32:$A, 2)), xoaddr:$src),
+ (STIWX (EXTRACT_SUBREG $A, sub_64), xoaddr:$src)>;
+
+ // Elements in a register on a LE system are in order <3, 2, 1, 0>.
+ // The store instructions store the second word from the left.
+ // So to align element 3, we need to modulo-left-shift by 3 words.
+ // Similar logic applies for elements 0 and 1.
+ foreach Idx = [ [0,2], [1,1], [3,3] ] in {
+ def : Pat<(store (i32 (extractelt v4i32:$A, !head(Idx))), xoaddr:$src),
+ (STIWX (EXTRACT_SUBREG (XXSLDWI $A, $A, !head(!tail(Idx))),
+ sub_64), xoaddr:$src)>;
+ def : Pat<(store (f32 (extractelt v4f32:$A, !head(Idx))), xoaddr:$src),
+ (STIWX (EXTRACT_SUBREG (XXSLDWI $A, $A, !head(!tail(Idx))),
+ sub_64), xoaddr:$src)>;
+ }
}
+ let Predicates = [HasP8Vector, IsLittleEndian, NoP9Vector] in {
+ def : Pat<(store (i64 (extractelt v2i64:$A, 0)), xoaddr:$src),
+ (XFSTOREf64 (EXTRACT_SUBREG (XXPERMDI $A, $A, 2), sub_64),
+ xoaddr:$src)>;
+ def : Pat<(store (f64 (extractelt v2f64:$A, 0)), xoaddr:$src),
+ (XFSTOREf64 (EXTRACT_SUBREG (XXPERMDI $A, $A, 2), sub_64),
+ xoaddr:$src)>;
+ def : Pat<(store (i64 (extractelt v2i64:$A, 1)), xoaddr:$src),
+ (XFSTOREf64 (EXTRACT_SUBREG $A, sub_64), xoaddr:$src)>;
+ def : Pat<(store (f64 (extractelt v2f64:$A, 1)), xoaddr:$src),
+ (XFSTOREf64 (EXTRACT_SUBREG $A, sub_64), xoaddr:$src)>;
+ }
+
let Predicates = [IsLittleEndian, HasVSX] in {
// Little endian, available on all targets with VSX
def : Pat<(v2f64 (build_vector f64:$A, f64:$B)),
@@ -3969,17 +4090,17 @@ let AddedComplexity = 400 in {
(v4i32 (XVCVSPUXWS (LXVWSX xoaddr:$A)))>;
def : Pat<(v4i32 (scalar_to_vector DblToIntLoadP9.A)),
(v4i32 (XXSPLTW (COPY_TO_REGCLASS
- (XSCVDPSXWS (DFLOADf64 ixaddr:$A)), VSRC), 1))>;
+ (XSCVDPSXWS (DFLOADf64 iaddrX4:$A)), VSRC), 1))>;
def : Pat<(v4i32 (scalar_to_vector DblToUIntLoadP9.A)),
(v4i32 (XXSPLTW (COPY_TO_REGCLASS
- (XSCVDPUXWS (DFLOADf64 ixaddr:$A)), VSRC), 1))>;
+ (XSCVDPUXWS (DFLOADf64 iaddrX4:$A)), VSRC), 1))>;
def : Pat<(v2i64 (scalar_to_vector FltToLongLoadP9.A)),
(v2i64 (XXPERMDIs (XSCVDPSXDS (COPY_TO_REGCLASS
- (DFLOADf32 ixaddr:$A),
+ (DFLOADf32 iaddrX4:$A),
VSFRC)), 0))>;
def : Pat<(v2i64 (scalar_to_vector FltToULongLoadP9.A)),
(v2i64 (XXPERMDIs (XSCVDPUXDS (COPY_TO_REGCLASS
- (DFLOADf32 ixaddr:$A),
+ (DFLOADf32 iaddrX4:$A),
VSFRC)), 0))>;
}
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCLoopPreIncPrep.cpp b/contrib/llvm/lib/Target/PowerPC/PPCLoopPreIncPrep.cpp
index 0b57dd9b618d..4d45d96d4479 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCLoopPreIncPrep.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/PPCLoopPreIncPrep.cpp
@@ -1,9 +1,8 @@
//===------ PPCLoopPreIncPrep.cpp - Loop Pre-Inc. AM Prep. Pass -----------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -65,12 +64,6 @@ static cl::opt<unsigned> MaxVars("ppc-preinc-prep-max-vars",
STATISTIC(PHINodeAlreadyExists, "PHI node already in pre-increment form");
-namespace llvm {
-
- void initializePPCLoopPreIncPrepPass(PassRegistry&);
-
-} // end namespace llvm
-
namespace {
class PPCLoopPreIncPrep : public FunctionPass {
@@ -338,7 +331,7 @@ bool PPCLoopPreIncPrep::runOnLoop(Loop *L) {
// iteration space), insert a new preheader for the loop.
if (!LoopPredecessor ||
!LoopPredecessor->getTerminator()->getType()->isVoidTy()) {
- LoopPredecessor = InsertPreheaderForLoop(L, DT, LI, PreserveLCSSA);
+ LoopPredecessor = InsertPreheaderForLoop(L, DT, LI, nullptr, PreserveLCSSA);
if (LoopPredecessor)
MadeChange = true;
}
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCMCInstLower.cpp b/contrib/llvm/lib/Target/PowerPC/PPCMCInstLower.cpp
index e731c0bc0c23..027e6bd1ba06 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCMCInstLower.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/PPCMCInstLower.cpp
@@ -1,9 +1,8 @@
//===-- PPCMCInstLower.cpp - Convert PPC MachineInstr to an MCInst --------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -111,16 +110,16 @@ static MCOperand GetSymbolRef(const MachineOperand &MO, const MCSymbol *Symbol,
RefKind = MCSymbolRefExpr::VK_PLT;
const MachineFunction *MF = MO.getParent()->getParent()->getParent();
+ const Module *M = MF->getFunction().getParent();
const PPCSubtarget *Subtarget = &(MF->getSubtarget<PPCSubtarget>());
const TargetMachine &TM = Printer.TM;
const MCExpr *Expr = MCSymbolRefExpr::create(Symbol, RefKind, Ctx);
- // -msecure-plt option works only in PIC mode. If secure plt mode
- // is on add 32768 to symbol.
+ // If -msecure-plt -fPIC, add 32768 to symbol.
if (Subtarget->isSecurePlt() && TM.isPositionIndependent() &&
+ M->getPICLevel() == PICLevel::BigPIC &&
MO.getTargetFlags() == PPCII::MO_PLT)
- Expr = MCBinaryExpr::createAdd(Expr,
- MCConstantExpr::create(32768, Ctx),
- Ctx);
+ Expr =
+ MCBinaryExpr::createAdd(Expr, MCConstantExpr::create(32768, Ctx), Ctx);
if (!MO.isJTI() && MO.getOffset())
Expr = MCBinaryExpr::createAdd(Expr,
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCMIPeephole.cpp b/contrib/llvm/lib/Target/PowerPC/PPCMIPeephole.cpp
index 0068df19f0c8..446246358e96 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCMIPeephole.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/PPCMIPeephole.cpp
@@ -1,9 +1,8 @@
//===-------------- PPCMIPeephole.cpp - MI Peephole Cleanups -------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===---------------------------------------------------------------------===//
//
@@ -22,9 +21,12 @@
#include "PPC.h"
#include "PPCInstrBuilder.h"
#include "PPCInstrInfo.h"
+#include "PPCMachineFunctionInfo.h"
#include "PPCTargetMachine.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachinePostDominators.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
@@ -38,6 +40,7 @@ using namespace llvm;
STATISTIC(RemoveTOCSave, "Number of TOC saves removed");
STATISTIC(MultiTOCSaves,
"Number of functions with multiple TOC saves that must be kept");
+STATISTIC(NumTOCSavesInPrologue, "Number of TOC saves placed in the prologue");
STATISTIC(NumEliminatedSExt, "Number of eliminated sign-extensions");
STATISTIC(NumEliminatedZExt, "Number of eliminated zero-extensions");
STATISTIC(NumOptADDLIs, "Number of optimized ADD instruction fed by LI");
@@ -48,6 +51,10 @@ STATISTIC(NumFunctionsEnteredInMIPeephole,
STATISTIC(NumFixedPointIterations,
"Number of fixed-point iterations converting reg-reg instructions "
"to reg-imm ones");
+STATISTIC(NumRotatesCollapsed,
+ "Number of pairs of rotate left, clear left/right collapsed");
+STATISTIC(NumEXTSWAndSLDICombined,
+ "Number of pairs of EXTSW and SLDI combined as EXTSWSLI");
static cl::opt<bool>
FixedPointRegToImm("ppc-reg-to-imm-fixed-point", cl::Hidden, cl::init(true),
@@ -83,6 +90,9 @@ struct PPCMIPeephole : public MachineFunctionPass {
private:
MachineDominatorTree *MDT;
+ MachinePostDominatorTree *MPDT;
+ MachineBlockFrequencyInfo *MBFI;
+ uint64_t EntryFreq;
// Initialize class variables.
void initialize(MachineFunction &MFParm);
@@ -93,6 +103,8 @@ private:
// Perform peepholes.
bool eliminateRedundantCompare(void);
bool eliminateRedundantTOCSaves(std::map<MachineInstr *, bool> &TOCSaves);
+ bool combineSEXTAndSHL(MachineInstr &MI, MachineInstr *&ToErase);
+ bool emitRLDICWhenLoweringJumpTables(MachineInstr &MI);
void UpdateTOCSaves(std::map<MachineInstr *, bool> &TOCSaves,
MachineInstr *MI);
@@ -100,7 +112,11 @@ public:
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.addRequired<MachineDominatorTree>();
+ AU.addRequired<MachinePostDominatorTree>();
+ AU.addRequired<MachineBlockFrequencyInfo>();
AU.addPreserved<MachineDominatorTree>();
+ AU.addPreserved<MachinePostDominatorTree>();
+ AU.addPreserved<MachineBlockFrequencyInfo>();
MachineFunctionPass::getAnalysisUsage(AU);
}
@@ -118,6 +134,9 @@ void PPCMIPeephole::initialize(MachineFunction &MFParm) {
MF = &MFParm;
MRI = &MF->getRegInfo();
MDT = &getAnalysis<MachineDominatorTree>();
+ MPDT = &getAnalysis<MachinePostDominatorTree>();
+ MBFI = &getAnalysis<MachineBlockFrequencyInfo>();
+ EntryFreq = MBFI->getEntryFreq();
TII = MF->getSubtarget<PPCSubtarget>().getInstrInfo();
LLVM_DEBUG(dbgs() << "*** PowerPC MI peephole pass ***\n\n");
LLVM_DEBUG(MF->dump());
@@ -198,6 +217,30 @@ getKnownLeadingZeroCount(MachineInstr *MI, const PPCInstrInfo *TII) {
void PPCMIPeephole::UpdateTOCSaves(
std::map<MachineInstr *, bool> &TOCSaves, MachineInstr *MI) {
assert(TII->isTOCSaveMI(*MI) && "Expecting a TOC save instruction here");
+ assert(MF->getSubtarget<PPCSubtarget>().isELFv2ABI() &&
+ "TOC-save removal only supported on ELFv2");
+ PPCFunctionInfo *FI = MF->getInfo<PPCFunctionInfo>();
+
+ MachineBasicBlock *Entry = &MF->front();
+ uint64_t CurrBlockFreq = MBFI->getBlockFreq(MI->getParent()).getFrequency();
+
+ // If the block in which the TOC save resides is in a block that
+ // post-dominates Entry, or a block that is hotter than entry (keep in mind
+ // that early MachineLICM has already run so the TOC save won't be hoisted)
+ // we can just do the save in the prologue.
+ if (CurrBlockFreq > EntryFreq || MPDT->dominates(MI->getParent(), Entry))
+ FI->setMustSaveTOC(true);
+
+ // If we are saving the TOC in the prologue, all the TOC saves can be removed
+ // from the code.
+ if (FI->mustSaveTOC()) {
+ for (auto &TOCSave : TOCSaves)
+ TOCSave.second = false;
+ // Add new instruction to map.
+ TOCSaves[MI] = false;
+ return;
+ }
+
bool Keep = true;
for (auto It = TOCSaves.begin(); It != TOCSaves.end(); It++ ) {
MachineInstr *CurrInst = It->first;
@@ -758,6 +801,11 @@ bool PPCMIPeephole::simplifyCode(void) {
NumOptADDLIs++;
break;
}
+ case PPC::RLDICR: {
+ Simplified |= emitRLDICWhenLoweringJumpTables(MI) ||
+ combineSEXTAndSHL(MI, ToErase);
+ break;
+ }
}
}
@@ -771,6 +819,10 @@ bool PPCMIPeephole::simplifyCode(void) {
// Eliminate all the TOC save instructions which are redundant.
Simplified |= eliminateRedundantTOCSaves(TOCSaves);
+ PPCFunctionInfo *FI = MF->getInfo<PPCFunctionInfo>();
+ if (FI->mustSaveTOC())
+ NumTOCSavesInPrologue++;
+
// We try to eliminate redundant compare instruction.
Simplified |= eliminateRedundantCompare();
@@ -1275,10 +1327,136 @@ bool PPCMIPeephole::eliminateRedundantCompare(void) {
return Simplified;
}
+// We miss the opportunity to emit an RLDIC when lowering jump tables
+// since ISEL sees only a single basic block. When selecting, the clear
+// and shift left will be in different blocks.
+bool PPCMIPeephole::emitRLDICWhenLoweringJumpTables(MachineInstr &MI) {
+ if (MI.getOpcode() != PPC::RLDICR)
+ return false;
+
+ unsigned SrcReg = MI.getOperand(1).getReg();
+ if (!TargetRegisterInfo::isVirtualRegister(SrcReg))
+ return false;
+
+ MachineInstr *SrcMI = MRI->getVRegDef(SrcReg);
+ if (SrcMI->getOpcode() != PPC::RLDICL)
+ return false;
+
+ MachineOperand MOpSHSrc = SrcMI->getOperand(2);
+ MachineOperand MOpMBSrc = SrcMI->getOperand(3);
+ MachineOperand MOpSHMI = MI.getOperand(2);
+ MachineOperand MOpMEMI = MI.getOperand(3);
+ if (!(MOpSHSrc.isImm() && MOpMBSrc.isImm() && MOpSHMI.isImm() &&
+ MOpMEMI.isImm()))
+ return false;
+
+ uint64_t SHSrc = MOpSHSrc.getImm();
+ uint64_t MBSrc = MOpMBSrc.getImm();
+ uint64_t SHMI = MOpSHMI.getImm();
+ uint64_t MEMI = MOpMEMI.getImm();
+ uint64_t NewSH = SHSrc + SHMI;
+ uint64_t NewMB = MBSrc - SHMI;
+ if (NewMB > 63 || NewSH > 63)
+ return false;
+
+ // The bits cleared with RLDICL are [0, MBSrc).
+ // The bits cleared with RLDICR are (MEMI, 63].
+ // After the sequence, the bits cleared are:
+ // [0, MBSrc-SHMI) and (MEMI, 63).
+ //
+ // The bits cleared with RLDIC are [0, NewMB) and (63-NewSH, 63].
+ if ((63 - NewSH) != MEMI)
+ return false;
+
+ LLVM_DEBUG(dbgs() << "Converting pair: ");
+ LLVM_DEBUG(SrcMI->dump());
+ LLVM_DEBUG(MI.dump());
+
+ MI.setDesc(TII->get(PPC::RLDIC));
+ MI.getOperand(1).setReg(SrcMI->getOperand(1).getReg());
+ MI.getOperand(2).setImm(NewSH);
+ MI.getOperand(3).setImm(NewMB);
+
+ LLVM_DEBUG(dbgs() << "To: ");
+ LLVM_DEBUG(MI.dump());
+ NumRotatesCollapsed++;
+ return true;
+}
+
+// For case in LLVM IR
+// entry:
+// %iconv = sext i32 %index to i64
+// br i1 undef label %true, label %false
+// true:
+// %ptr = getelementptr inbounds i32, i32* null, i64 %iconv
+// ...
+// PPCISelLowering::combineSHL fails to combine, because sext and shl are in
+// different BBs when conducting instruction selection. We can do a peephole
+// optimization to combine these two instructions into extswsli after
+// instruction selection.
+bool PPCMIPeephole::combineSEXTAndSHL(MachineInstr &MI,
+ MachineInstr *&ToErase) {
+ if (MI.getOpcode() != PPC::RLDICR)
+ return false;
+
+ if (!MF->getSubtarget<PPCSubtarget>().isISA3_0())
+ return false;
+
+ assert(MI.getNumOperands() == 4 && "RLDICR should have 4 operands");
+
+ MachineOperand MOpSHMI = MI.getOperand(2);
+ MachineOperand MOpMEMI = MI.getOperand(3);
+ if (!(MOpSHMI.isImm() && MOpMEMI.isImm()))
+ return false;
+
+ uint64_t SHMI = MOpSHMI.getImm();
+ uint64_t MEMI = MOpMEMI.getImm();
+ if (SHMI + MEMI != 63)
+ return false;
+
+ unsigned SrcReg = MI.getOperand(1).getReg();
+ if (!TargetRegisterInfo::isVirtualRegister(SrcReg))
+ return false;
+
+ MachineInstr *SrcMI = MRI->getVRegDef(SrcReg);
+ if (SrcMI->getOpcode() != PPC::EXTSW &&
+ SrcMI->getOpcode() != PPC::EXTSW_32_64)
+ return false;
+
+ // If the register defined by extsw has more than one use, combination is not
+ // needed.
+ if (!MRI->hasOneNonDBGUse(SrcReg))
+ return false;
+
+ LLVM_DEBUG(dbgs() << "Combining pair: ");
+ LLVM_DEBUG(SrcMI->dump());
+ LLVM_DEBUG(MI.dump());
+
+ MachineInstr *NewInstr =
+ BuildMI(*MI.getParent(), &MI, MI.getDebugLoc(),
+ SrcMI->getOpcode() == PPC::EXTSW ? TII->get(PPC::EXTSWSLI)
+ : TII->get(PPC::EXTSWSLI_32_64),
+ MI.getOperand(0).getReg())
+ .add(SrcMI->getOperand(1))
+ .add(MOpSHMI);
+ (void)NewInstr;
+
+ LLVM_DEBUG(dbgs() << "TO: ");
+ LLVM_DEBUG(NewInstr->dump());
+ ++NumEXTSWAndSLDICombined;
+ ToErase = &MI;
+ // SrcMI, which is extsw, is of no use now, erase it.
+ SrcMI->eraseFromParent();
+ return true;
+}
+
} // end default namespace
INITIALIZE_PASS_BEGIN(PPCMIPeephole, DEBUG_TYPE,
"PowerPC MI Peephole Optimization", false, false)
+INITIALIZE_PASS_DEPENDENCY(MachineBlockFrequencyInfo)
+INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
+INITIALIZE_PASS_DEPENDENCY(MachinePostDominatorTree)
INITIALIZE_PASS_END(PPCMIPeephole, DEBUG_TYPE,
"PowerPC MI Peephole Optimization", false, false)
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCMachineFunctionInfo.cpp b/contrib/llvm/lib/Target/PowerPC/PPCMachineFunctionInfo.cpp
index 3923417257e8..2f65d6a2855b 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCMachineFunctionInfo.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/PPCMachineFunctionInfo.cpp
@@ -1,9 +1,8 @@
//===-- PPCMachineFunctionInfo.cpp - Private data used for PowerPC --------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCMachineFunctionInfo.h b/contrib/llvm/lib/Target/PowerPC/PPCMachineFunctionInfo.h
index 8a3f50aa9565..dfae19804d94 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCMachineFunctionInfo.h
+++ b/contrib/llvm/lib/Target/PowerPC/PPCMachineFunctionInfo.h
@@ -1,9 +1,8 @@
//===-- PPCMachineFunctionInfo.h - Private data used for PowerPC --*- C++ -*-=//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -45,6 +44,12 @@ class PPCFunctionInfo : public MachineFunctionInfo {
/// PEI.
bool MustSaveLR;
+ /// MustSaveTOC - Indicates that the TOC save needs to be performed in the
+ /// prologue of the function. This is typically the case when there are
+ /// indirect calls in the function and it is more profitable to save the
+ /// TOC pointer in the prologue than in the block(s) containing the call(s).
+ bool MustSaveTOC = false;
+
/// Do we have to disable shrink-wrapping? This has to be set if we emit any
/// instructions that clobber LR in the entry block because discovering this
/// in PEI is too late (happens after shrink-wrapping);
@@ -152,6 +157,9 @@ public:
void setMustSaveLR(bool U) { MustSaveLR = U; }
bool mustSaveLR() const { return MustSaveLR; }
+ void setMustSaveTOC(bool U) { MustSaveTOC = U; }
+ bool mustSaveTOC() const { return MustSaveTOC; }
+
/// We certainly don't want to shrink wrap functions if we've emitted a
/// MovePCtoLR8 as that has to go into the entry, so the prologue definitely
/// has to go into the entry block.
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCMachineScheduler.cpp b/contrib/llvm/lib/Target/PowerPC/PPCMachineScheduler.cpp
new file mode 100644
index 000000000000..a38c8f475066
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/PPCMachineScheduler.cpp
@@ -0,0 +1,83 @@
+//===- PPCMachineScheduler.cpp - MI Scheduler for PowerPC -------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "PPCMachineScheduler.h"
+#include "MCTargetDesc/PPCMCTargetDesc.h"
+
+using namespace llvm;
+
+static cl::opt<bool>
+DisableAddiLoadHeuristic("disable-ppc-sched-addi-load",
+ cl::desc("Disable scheduling addi instruction before"
+ "load for ppc"), cl::Hidden);
+
+bool PPCPreRASchedStrategy::biasAddiLoadCandidate(SchedCandidate &Cand,
+ SchedCandidate &TryCand,
+ SchedBoundary &Zone) const {
+ if (DisableAddiLoadHeuristic)
+ return false;
+
+ auto isADDIInstr = [&] (const MachineInstr &Inst) {
+ return Inst.getOpcode() == PPC::ADDI || Inst.getOpcode() == PPC::ADDI8;
+ };
+
+ SchedCandidate &FirstCand = Zone.isTop() ? TryCand : Cand;
+ SchedCandidate &SecondCand = Zone.isTop() ? Cand : TryCand;
+ if (isADDIInstr(*FirstCand.SU->getInstr()) &&
+ SecondCand.SU->getInstr()->mayLoad()) {
+ TryCand.Reason = Stall;
+ return true;
+ }
+ if (FirstCand.SU->getInstr()->mayLoad() &&
+ isADDIInstr(*SecondCand.SU->getInstr())) {
+ TryCand.Reason = NoCand;
+ return true;
+ }
+
+ return false;
+}
+
+void PPCPreRASchedStrategy::tryCandidate(SchedCandidate &Cand,
+ SchedCandidate &TryCand,
+ SchedBoundary *Zone) const {
+ GenericScheduler::tryCandidate(Cand, TryCand, Zone);
+
+ if (!Cand.isValid() || !Zone)
+ return;
+
+ // Add powerpc specific heuristic only when TryCand isn't selected or
+ // selected as node order.
+ if (TryCand.Reason != NodeOrder && TryCand.Reason != NoCand)
+ return;
+
+ // There are some benefits to schedule the ADDI before the load to hide the
+ // latency, as RA may create a true dependency between the load and addi.
+ if (biasAddiLoadCandidate(Cand, TryCand, *Zone))
+ return;
+}
+
+void PPCPostRASchedStrategy::enterMBB(MachineBasicBlock *MBB) {
+ // Custom PPC PostRA specific behavior here.
+ PostGenericScheduler::enterMBB(MBB);
+}
+
+void PPCPostRASchedStrategy::leaveMBB() {
+ // Custom PPC PostRA specific behavior here.
+ PostGenericScheduler::leaveMBB();
+}
+
+void PPCPostRASchedStrategy::initialize(ScheduleDAGMI *Dag) {
+ // Custom PPC PostRA specific initialization here.
+ PostGenericScheduler::initialize(Dag);
+}
+
+SUnit *PPCPostRASchedStrategy::pickNode(bool &IsTopNode) {
+ // Custom PPC PostRA specific scheduling here.
+ return PostGenericScheduler::pickNode(IsTopNode);
+}
+
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCMachineScheduler.h b/contrib/llvm/lib/Target/PowerPC/PPCMachineScheduler.h
new file mode 100644
index 000000000000..93532d9545a6
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/PPCMachineScheduler.h
@@ -0,0 +1,49 @@
+//===- PPCMachineScheduler.h - Custom PowerPC MI scheduler --*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Custom PowerPC MI scheduler.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_POWERPC_POWERPCMACHINESCHEDULER_H
+#define LLVM_LIB_TARGET_POWERPC_POWERPCMACHINESCHEDULER_H
+
+#include "llvm/CodeGen/MachineScheduler.h"
+
+namespace llvm {
+
+/// A MachineSchedStrategy implementation for PowerPC pre RA scheduling.
+class PPCPreRASchedStrategy : public GenericScheduler {
+public:
+ PPCPreRASchedStrategy(const MachineSchedContext *C) :
+ GenericScheduler(C) {}
+protected:
+ void tryCandidate(SchedCandidate &Cand, SchedCandidate &TryCand,
+ SchedBoundary *Zone) const override;
+private:
+ bool biasAddiLoadCandidate(SchedCandidate &Cand,
+ SchedCandidate &TryCand,
+ SchedBoundary &Zone) const;
+};
+
+/// A MachineSchedStrategy implementation for PowerPC post RA scheduling.
+class PPCPostRASchedStrategy : public PostGenericScheduler {
+public:
+ PPCPostRASchedStrategy(const MachineSchedContext *C) :
+ PostGenericScheduler(C) {}
+
+protected:
+ void initialize(ScheduleDAGMI *Dag) override;
+ SUnit *pickNode(bool &IsTopNode) override;
+ void enterMBB(MachineBasicBlock *MBB) override;
+ void leaveMBB() override;
+};
+
+} // end namespace llvm
+
+#endif // LLVM_LIB_TARGET_POWERPC_POWERPCMACHINESCHEDULER_H
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCPerfectShuffle.h b/contrib/llvm/lib/Target/PowerPC/PPCPerfectShuffle.h
index 8a1d68011c5f..d0d84efdbd20 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCPerfectShuffle.h
+++ b/contrib/llvm/lib/Target/PowerPC/PPCPerfectShuffle.h
@@ -1,9 +1,8 @@
//===-- PPCPerfectShuffle.h - Altivec Perfect Shuffle Table -----*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCPfmCounters.td b/contrib/llvm/lib/Target/PowerPC/PPCPfmCounters.td
index d2a09f30c0f3..20b9efdc9df9 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCPfmCounters.td
+++ b/contrib/llvm/lib/Target/PowerPC/PPCPfmCounters.td
@@ -1,9 +1,8 @@
//===-- PPCPfmCounters.td - PPC Hardware Counters ----------*- tablegen -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCPreEmitPeephole.cpp b/contrib/llvm/lib/Target/PowerPC/PPCPreEmitPeephole.cpp
index 4458b92ceb5e..d83c92276800 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCPreEmitPeephole.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/PPCPreEmitPeephole.cpp
@@ -1,9 +1,8 @@
//===--------- PPCPreEmitPeephole.cpp - Late peephole optimizations -------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCQPXLoadSplat.cpp b/contrib/llvm/lib/Target/PowerPC/PPCQPXLoadSplat.cpp
index 25b2b54cbe98..3a83cc27439c 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCQPXLoadSplat.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/PPCQPXLoadSplat.cpp
@@ -1,9 +1,8 @@
//===----- PPCQPXLoadSplat.cpp - QPX Load Splat Simplification ------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -31,10 +30,6 @@ using namespace llvm;
STATISTIC(NumSimplified, "Number of QPX load splats simplified");
-namespace llvm {
- void initializePPCQPXLoadSplatPass(PassRegistry&);
-}
-
namespace {
struct PPCQPXLoadSplat : public MachineFunctionPass {
static char ID;
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCReduceCRLogicals.cpp b/contrib/llvm/lib/Target/PowerPC/PPCReduceCRLogicals.cpp
index 173fc18b9ebf..8eaa6dfe2bf7 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCReduceCRLogicals.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/PPCReduceCRLogicals.cpp
@@ -1,9 +1,8 @@
//===---- PPCReduceCRLogicals.cpp - Reduce CR Bit Logical operations ------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===---------------------------------------------------------------------===//
//
@@ -49,10 +48,6 @@ STATISTIC(NumNotSplitChainCopies,
STATISTIC(NumNotSplitWrongOpcode,
"Number of blocks not split due to the wrong opcode.");
-namespace llvm {
- void initializePPCReduceCRLogicalsPass(PassRegistry&);
-}
-
/// Given a basic block \p Successor that potentially contains PHIs, this
/// function will look for any incoming values in the PHIs that are supposed to
/// be coming from \p OrigMBB but whose definition is actually in \p NewMBB.
@@ -171,9 +166,33 @@ static bool splitMBB(BlockSplitInfo &BSI) {
: *ThisMBB->succ_begin();
MachineBasicBlock *NewBRTarget =
BSI.BranchToFallThrough ? OrigFallThrough : OrigTarget;
- BranchProbability ProbToNewTarget =
- !BSI.MBPI ? BranchProbability::getUnknown()
- : BSI.MBPI->getEdgeProbability(ThisMBB, NewBRTarget);
+
+ // It's impossible to know the precise branch probability after the split.
+ // But it still needs to be reasonable, the whole probability to original
+ // targets should not be changed.
+ // After split NewBRTarget will get two incoming edges. Assume P0 is the
+ // original branch probability to NewBRTarget, P1 and P2 are new branch
+ // probabilies to NewBRTarget after split. If the two edge frequencies are
+ // same, then
+ // F * P1 = F * P0 / 2 ==> P1 = P0 / 2
+ // F * (1 - P1) * P2 = F * P1 ==> P2 = P1 / (1 - P1)
+ BranchProbability ProbToNewTarget, ProbFallThrough; // Prob for new Br.
+ BranchProbability ProbOrigTarget, ProbOrigFallThrough; // Prob for orig Br.
+ ProbToNewTarget = ProbFallThrough = BranchProbability::getUnknown();
+ ProbOrigTarget = ProbOrigFallThrough = BranchProbability::getUnknown();
+ if (BSI.MBPI) {
+ if (BSI.BranchToFallThrough) {
+ ProbToNewTarget = BSI.MBPI->getEdgeProbability(ThisMBB, OrigFallThrough) / 2;
+ ProbFallThrough = ProbToNewTarget.getCompl();
+ ProbOrigFallThrough = ProbToNewTarget / ProbToNewTarget.getCompl();
+ ProbOrigTarget = ProbOrigFallThrough.getCompl();
+ } else {
+ ProbToNewTarget = BSI.MBPI->getEdgeProbability(ThisMBB, OrigTarget) / 2;
+ ProbFallThrough = ProbToNewTarget.getCompl();
+ ProbOrigTarget = ProbToNewTarget / ProbToNewTarget.getCompl();
+ ProbOrigFallThrough = ProbOrigTarget.getCompl();
+ }
+ }
// Create a new basic block.
MachineBasicBlock::iterator InsertPoint = BSI.SplitBefore;
@@ -185,11 +204,16 @@ static bool splitMBB(BlockSplitInfo &BSI) {
// Move everything after SplitBefore into the new block.
NewMBB->splice(NewMBB->end(), ThisMBB, InsertPoint, ThisMBB->end());
NewMBB->transferSuccessors(ThisMBB);
+ if (!ProbOrigTarget.isUnknown()) {
+ auto MBBI = std::find(NewMBB->succ_begin(), NewMBB->succ_end(), OrigTarget);
+ NewMBB->setSuccProbability(MBBI, ProbOrigTarget);
+ MBBI = std::find(NewMBB->succ_begin(), NewMBB->succ_end(), OrigFallThrough);
+ NewMBB->setSuccProbability(MBBI, ProbOrigFallThrough);
+ }
- // Add the two successors to ThisMBB. The probabilities come from the
- // existing blocks if available.
+ // Add the two successors to ThisMBB.
ThisMBB->addSuccessor(NewBRTarget, ProbToNewTarget);
- ThisMBB->addSuccessor(NewMBB, ProbToNewTarget.getCompl());
+ ThisMBB->addSuccessor(NewMBB, ProbFallThrough);
// Add the branches to ThisMBB.
BuildMI(*ThisMBB, ThisMBB->end(), BSI.SplitBefore->getDebugLoc(),
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp b/contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp
index 3d067aa8e621..12554ea8d079 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp
@@ -1,9 +1,8 @@
//===-- PPCRegisterInfo.cpp - PowerPC Register Information ----------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -13,7 +12,6 @@
//===----------------------------------------------------------------------===//
#include "PPCRegisterInfo.h"
-#include "PPC.h"
#include "PPCFrameLowering.h"
#include "PPCInstrBuilder.h"
#include "PPCMachineFunctionInfo.h"
@@ -71,6 +69,14 @@ StackPtrConst("ppc-stack-ptr-caller-preserved",
"caller preserved registers can be LICM candidates"),
cl::init(true), cl::Hidden);
+static cl::opt<unsigned>
+MaxCRBitSpillDist("ppc-max-crbit-spill-dist",
+ cl::desc("Maximum search distance for definition of CR bit "
+ "spill on ppc"),
+ cl::Hidden, cl::init(100));
+
+static unsigned offsetMinAlignForOpcode(unsigned OpC);
+
PPCRegisterInfo::PPCRegisterInfo(const PPCTargetMachine &TM)
: PPCGenRegisterInfo(TM.isPPC64() ? PPC::LR8 : PPC::LR,
TM.isPPC64() ? 0 : 1,
@@ -153,30 +159,39 @@ PPCRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
if (TM.isPPC64() && MF->getInfo<PPCFunctionInfo>()->isSplitCSR())
return CSR_SRV464_TLS_PE_SaveList;
- if (Subtarget.hasSPE())
- return CSR_SVR432_SPE_SaveList;
-
// On PPC64, we might need to save r2 (but only if it is not reserved).
bool SaveR2 = MF->getRegInfo().isAllocatable(PPC::X2);
+ // Cold calling convention CSRs.
if (MF->getFunction().getCallingConv() == CallingConv::Cold) {
- return TM.isPPC64()
- ? (Subtarget.hasAltivec()
- ? (SaveR2 ? CSR_SVR64_ColdCC_R2_Altivec_SaveList
- : CSR_SVR64_ColdCC_Altivec_SaveList)
- : (SaveR2 ? CSR_SVR64_ColdCC_R2_SaveList
- : CSR_SVR64_ColdCC_SaveList))
- : (Subtarget.hasAltivec() ? CSR_SVR32_ColdCC_Altivec_SaveList
- : CSR_SVR32_ColdCC_SaveList);
+ if (TM.isPPC64()) {
+ if (Subtarget.hasAltivec())
+ return SaveR2 ? CSR_SVR64_ColdCC_R2_Altivec_SaveList
+ : CSR_SVR64_ColdCC_Altivec_SaveList;
+ return SaveR2 ? CSR_SVR64_ColdCC_R2_SaveList
+ : CSR_SVR64_ColdCC_SaveList;
+ }
+ // 32-bit targets.
+ if (Subtarget.hasAltivec())
+ return CSR_SVR32_ColdCC_Altivec_SaveList;
+ else if (Subtarget.hasSPE())
+ return CSR_SVR32_ColdCC_SPE_SaveList;
+ return CSR_SVR32_ColdCC_SaveList;
}
-
- return TM.isPPC64()
- ? (Subtarget.hasAltivec()
- ? (SaveR2 ? CSR_SVR464_R2_Altivec_SaveList
- : CSR_SVR464_Altivec_SaveList)
- : (SaveR2 ? CSR_SVR464_R2_SaveList : CSR_SVR464_SaveList))
- : (Subtarget.hasAltivec() ? CSR_SVR432_Altivec_SaveList
- : CSR_SVR432_SaveList);
+ // Standard calling convention CSRs.
+ if (TM.isPPC64()) {
+ if (Subtarget.hasAltivec())
+ return SaveR2 ? CSR_SVR464_R2_Altivec_SaveList
+ : CSR_SVR464_Altivec_SaveList;
+ return SaveR2 ? CSR_SVR464_R2_SaveList
+ : CSR_SVR464_SaveList;
+ }
+ // 32-bit targets.
+ if (Subtarget.hasAltivec())
+ return CSR_SVR432_Altivec_SaveList;
+ else if (Subtarget.hasSPE())
+ return CSR_SVR432_SPE_SaveList;
+ return CSR_SVR432_SaveList;
}
const MCPhysReg *
@@ -221,18 +236,26 @@ PPCRegisterInfo::getCallPreservedMask(const MachineFunction &MF,
: CSR_Darwin64_RegMask)
: (Subtarget.hasAltivec() ? CSR_Darwin32_Altivec_RegMask
: CSR_Darwin32_RegMask);
+ if (Subtarget.isAIXABI()) {
+ assert(!Subtarget.hasAltivec() && "Altivec is not implemented on AIX yet.");
+ return TM.isPPC64() ? CSR_AIX64_RegMask : CSR_AIX32_RegMask;
+ }
if (CC == CallingConv::Cold) {
return TM.isPPC64() ? (Subtarget.hasAltivec() ? CSR_SVR64_ColdCC_Altivec_RegMask
: CSR_SVR64_ColdCC_RegMask)
: (Subtarget.hasAltivec() ? CSR_SVR32_ColdCC_Altivec_RegMask
- : CSR_SVR32_ColdCC_RegMask);
+ : (Subtarget.hasSPE()
+ ? CSR_SVR32_ColdCC_SPE_RegMask
+ : CSR_SVR32_ColdCC_RegMask));
}
return TM.isPPC64() ? (Subtarget.hasAltivec() ? CSR_SVR464_Altivec_RegMask
: CSR_SVR464_RegMask)
: (Subtarget.hasAltivec() ? CSR_SVR432_Altivec_RegMask
- : CSR_SVR432_RegMask);
+ : (Subtarget.hasSPE()
+ ? CSR_SVR432_SPE_RegMask
+ : CSR_SVR432_RegMask));
}
const uint32_t*
@@ -288,6 +311,11 @@ BitVector PPCRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
markSuperRegs(Reserved, PPC::R13); // Small Data Area pointer register
}
+ // Always reserve r2 on AIX for now.
+ // TODO: Make r2 allocatable on AIX/XCOFF for some leaf functions.
+ if (Subtarget.isAIXABI())
+ markSuperRegs(Reserved, PPC::R2); // System-reserved register
+
// On PPC64, r13 is the thread pointer. Never allocate this register.
if (TM.isPPC64())
markSuperRegs(Reserved, PPC::R13);
@@ -316,6 +344,51 @@ BitVector PPCRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
return Reserved;
}
+bool PPCRegisterInfo::requiresFrameIndexScavenging(const MachineFunction &MF) const {
+ const PPCSubtarget &Subtarget = MF.getSubtarget<PPCSubtarget>();
+ const PPCInstrInfo *InstrInfo = Subtarget.getInstrInfo();
+ const MachineFrameInfo &MFI = MF.getFrameInfo();
+ const std::vector<CalleeSavedInfo> &Info = MFI.getCalleeSavedInfo();
+
+ // If the callee saved info is invalid we have to default to true for safety.
+ if (!MFI.isCalleeSavedInfoValid())
+ return true;
+
+ // We will require the use of X-Forms because the frame is larger than what
+ // can be represented in signed 16 bits that fit in the immediate of a D-Form.
+ // If we need an X-Form then we need a register to store the address offset.
+ unsigned FrameSize = MFI.getStackSize();
+ // Signed 16 bits means that the FrameSize cannot be more than 15 bits.
+ if (FrameSize & ~0x7FFF)
+ return true;
+
+ // The callee saved info is valid so it can be traversed.
+ // Checking for registers that need saving that do not have load or store
+ // forms where the address offset is an immediate.
+ for (unsigned i = 0; i < Info.size(); i++) {
+ int FrIdx = Info[i].getFrameIdx();
+ unsigned Reg = Info[i].getReg();
+
+ unsigned Opcode = InstrInfo->getStoreOpcodeForSpill(Reg);
+ if (!MFI.isFixedObjectIndex(FrIdx)) {
+ // This is not a fixed object. If it requires alignment then we may still
+ // need to use the XForm.
+ if (offsetMinAlignForOpcode(Opcode) > 1)
+ return true;
+ }
+
+ // This is eiher:
+ // 1) A fixed frame index object which we know are aligned so
+ // as long as we have a valid DForm/DSForm/DQForm (non XForm) we don't
+ // need to consider the alignement here.
+ // 2) A not fixed object but in that case we now know that the min required
+ // alignment is no more than 1 based on the previous check.
+ if (InstrInfo->isXFormMemOp(Opcode))
+ return true;
+ }
+ return false;
+}
+
bool PPCRegisterInfo::isCallerPreservedPhysReg(unsigned PhysReg,
const MachineFunction &MF) const {
assert(TargetRegisterInfo::isPhysicalRegister(PhysReg));
@@ -664,6 +737,7 @@ void PPCRegisterInfo::lowerCRBitSpilling(MachineBasicBlock::iterator II,
MachineFunction &MF = *MBB.getParent();
const PPCSubtarget &Subtarget = MF.getSubtarget<PPCSubtarget>();
const TargetInstrInfo &TII = *Subtarget.getInstrInfo();
+ const TargetRegisterInfo* TRI = Subtarget.getRegisterInfo();
DebugLoc dl = MI.getDebugLoc();
bool LP64 = TM.isPPC64();
@@ -673,27 +747,59 @@ void PPCRegisterInfo::lowerCRBitSpilling(MachineBasicBlock::iterator II,
unsigned Reg = MF.getRegInfo().createVirtualRegister(LP64 ? G8RC : GPRC);
unsigned SrcReg = MI.getOperand(0).getReg();
- // We need to move the CR field that contains the CR bit we are spilling.
- // The super register may not be explicitly defined (i.e. it can be defined
- // by a CR-logical that only defines the subreg) so we state that the CR
- // field is undef. Also, in order to preserve the kill flag on the CR bit,
- // we add it as an implicit use.
- BuildMI(MBB, II, dl, TII.get(LP64 ? PPC::MFOCRF8 : PPC::MFOCRF), Reg)
+ // Search up the BB to find the definition of the CR bit.
+ MachineBasicBlock::reverse_iterator Ins;
+ unsigned CRBitSpillDistance = 0;
+ for (Ins = MI; Ins != MBB.rend(); Ins++) {
+ // Definition found.
+ if (Ins->modifiesRegister(SrcReg, TRI))
+ break;
+ // Unable to find CR bit definition within maximum search distance.
+ if (CRBitSpillDistance == MaxCRBitSpillDist) {
+ Ins = MI;
+ break;
+ }
+ // Skip debug instructions when counting CR bit spill distance.
+ if (!Ins->isDebugInstr())
+ CRBitSpillDistance++;
+ }
+
+ // Unable to find the definition of the CR bit in the MBB.
+ if (Ins == MBB.rend())
+ Ins = MI;
+
+ // There is no need to extract the CR bit if its value is already known.
+ switch (Ins->getOpcode()) {
+ case PPC::CRUNSET:
+ BuildMI(MBB, II, dl, TII.get(LP64 ? PPC::LI8 : PPC::LI), Reg)
+ .addImm(0);
+ break;
+ case PPC::CRSET:
+ BuildMI(MBB, II, dl, TII.get(LP64 ? PPC::LIS8 : PPC::LIS), Reg)
+ .addImm(-32768);
+ break;
+ default:
+ // We need to move the CR field that contains the CR bit we are spilling.
+ // The super register may not be explicitly defined (i.e. it can be defined
+ // by a CR-logical that only defines the subreg) so we state that the CR
+ // field is undef. Also, in order to preserve the kill flag on the CR bit,
+ // we add it as an implicit use.
+ BuildMI(MBB, II, dl, TII.get(LP64 ? PPC::MFOCRF8 : PPC::MFOCRF), Reg)
.addReg(getCRFromCRBit(SrcReg), RegState::Undef)
.addReg(SrcReg,
RegState::Implicit | getKillRegState(MI.getOperand(0).isKill()));
- // If the saved register wasn't CR0LT, shift the bits left so that the bit to
- // store is the first one. Mask all but that bit.
- unsigned Reg1 = Reg;
- Reg = MF.getRegInfo().createVirtualRegister(LP64 ? G8RC : GPRC);
-
- // rlwinm rA, rA, ShiftBits, 0, 0.
- BuildMI(MBB, II, dl, TII.get(LP64 ? PPC::RLWINM8 : PPC::RLWINM), Reg)
- .addReg(Reg1, RegState::Kill)
- .addImm(getEncodingValue(SrcReg))
- .addImm(0).addImm(0);
+ // If the saved register wasn't CR0LT, shift the bits left so that the bit
+ // to store is the first one. Mask all but that bit.
+ unsigned Reg1 = Reg;
+ Reg = MF.getRegInfo().createVirtualRegister(LP64 ? G8RC : GPRC);
+ // rlwinm rA, rA, ShiftBits, 0, 0.
+ BuildMI(MBB, II, dl, TII.get(LP64 ? PPC::RLWINM8 : PPC::RLWINM), Reg)
+ .addReg(Reg1, RegState::Kill)
+ .addImm(getEncodingValue(SrcReg))
+ .addImm(0).addImm(0);
+ }
addFrameReference(BuildMI(MBB, II, dl, TII.get(LP64 ? PPC::STW8 : PPC::STW))
.addReg(Reg, RegState::Kill),
FrameIndex);
@@ -826,9 +932,7 @@ bool PPCRegisterInfo::hasReservedSpillSlot(const MachineFunction &MF,
}
// If the offset must be a multiple of some value, return what that value is.
-static unsigned offsetMinAlign(const MachineInstr &MI) {
- unsigned OpC = MI.getOpcode();
-
+static unsigned offsetMinAlignForOpcode(unsigned OpC) {
switch (OpC) {
default:
return 1;
@@ -847,12 +951,21 @@ static unsigned offsetMinAlign(const MachineInstr &MI) {
case PPC::STXSD:
case PPC::STXSSP:
return 4;
+ case PPC::EVLDD:
+ case PPC::EVSTDD:
+ return 8;
case PPC::LXV:
case PPC::STXV:
return 16;
}
}
+// If the offset must be a multiple of some value, return what that value is.
+static unsigned offsetMinAlign(const MachineInstr &MI) {
+ unsigned OpC = MI.getOpcode();
+ return offsetMinAlignForOpcode(OpC);
+}
+
// Return the OffsetOperandNo given the FIOperandNum (and the instruction).
static unsigned getOffsetONFromFION(const MachineInstr &MI,
unsigned FIOperandNum) {
@@ -963,7 +1076,10 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
// happen in invalid code.
assert(OpC != PPC::DBG_VALUE &&
"This should be handled in a target-independent way");
- if (!noImmForm && ((isInt<16>(Offset) &&
+ bool OffsetFitsMnemonic = (OpC == PPC::EVSTDD || OpC == PPC::EVLDD) ?
+ isUInt<8>(Offset) :
+ isInt<16>(Offset);
+ if (!noImmForm && ((OffsetFitsMnemonic &&
((Offset % offsetMinAlign(MI)) == 0)) ||
OpC == TargetOpcode::STACKMAP ||
OpC == TargetOpcode::PATCHPOINT)) {
@@ -1001,7 +1117,8 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
if (noImmForm)
OperandBase = 1;
- else if (OpC != TargetOpcode::INLINEASM) {
+ else if (OpC != TargetOpcode::INLINEASM &&
+ OpC != TargetOpcode::INLINEASM_BR) {
assert(ImmToIdxMap.count(OpC) &&
"No indexed form of load or store available!");
unsigned NewOpcode = ImmToIdxMap.find(OpC)->second;
@@ -1016,7 +1133,7 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
MI.getOperand(OperandBase + 1).ChangeToRegister(SReg, false, false, true);
}
-unsigned PPCRegisterInfo::getFrameRegister(const MachineFunction &MF) const {
+Register PPCRegisterInfo::getFrameRegister(const MachineFunction &MF) const {
const PPCFrameLowering *TFI = getFrameLowering(MF);
if (!TM.isPPC64())
@@ -1025,7 +1142,7 @@ unsigned PPCRegisterInfo::getFrameRegister(const MachineFunction &MF) const {
return TFI->hasFP(MF) ? PPC::X31 : PPC::X1;
}
-unsigned PPCRegisterInfo::getBaseRegister(const MachineFunction &MF) const {
+Register PPCRegisterInfo::getBaseRegister(const MachineFunction &MF) const {
const PPCSubtarget &Subtarget = MF.getSubtarget<PPCSubtarget>();
if (!hasBasePointer(MF))
return getFrameRegister(MF);
@@ -1080,7 +1197,7 @@ needsFrameBaseReg(MachineInstr *MI, int64_t Offset) const {
MachineBasicBlock &MBB = *MI->getParent();
MachineFunction &MF = *MBB.getParent();
const PPCFrameLowering *TFI = getFrameLowering(MF);
- unsigned StackEst = TFI->determineFrameLayout(MF, false, true);
+ unsigned StackEst = TFI->determineFrameLayout(MF, true);
// If we likely don't need a stack frame, then we probably don't need a
// virtual base register either.
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.h b/contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.h
index e93fe4ce3453..a50e05920cd4 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.h
+++ b/contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.h
@@ -1,9 +1,8 @@
//===-- PPCRegisterInfo.h - PowerPC Register Information Impl ---*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -15,13 +14,14 @@
#ifndef LLVM_LIB_TARGET_POWERPC_PPCREGISTERINFO_H
#define LLVM_LIB_TARGET_POWERPC_PPCREGISTERINFO_H
-#include "PPC.h"
+#include "MCTargetDesc/PPCMCTargetDesc.h"
#include "llvm/ADT/DenseMap.h"
#define GET_REGINFO_HEADER
#include "PPCGenRegisterInfo.inc"
namespace llvm {
+class PPCTargetMachine;
inline static unsigned getCRFromCRBit(unsigned SrcReg) {
unsigned Reg = 0;
@@ -90,9 +90,7 @@ public:
return true;
}
- bool requiresFrameIndexScavenging(const MachineFunction &MF) const override {
- return true;
- }
+ bool requiresFrameIndexScavenging(const MachineFunction &MF) const override;
bool trackLivenessAfterRegAlloc(const MachineFunction &MF) const override {
return true;
@@ -134,10 +132,10 @@ public:
int64_t Offset) const override;
// Debug information queries.
- unsigned getFrameRegister(const MachineFunction &MF) const override;
+ Register getFrameRegister(const MachineFunction &MF) const override;
// Base pointer (stack realignment) support.
- unsigned getBaseRegister(const MachineFunction &MF) const;
+ Register getBaseRegister(const MachineFunction &MF) const;
bool hasBasePointer(const MachineFunction &MF) const;
/// stripRegisterPrefix - This method strips the character prefix from a
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.td b/contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.td
index d0d29b6d2c7d..af0dff6347a6 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.td
+++ b/contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.td
@@ -1,9 +1,8 @@
//===-- PPCRegisterInfo.td - The PowerPC Register File -----*- tablegen -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -375,8 +374,6 @@ def CRBITRC : RegisterClass<"PPC", [i1], 32,
def CRRC : RegisterClass<"PPC", [i32], 32, (add CR0, CR1, CR5, CR6,
CR7, CR2, CR3, CR4)>;
-def CRRC0 : RegisterClass<"PPC", [i32], 32, (add CR0)>;
-
// The CTR registers are not allocatable because they're used by the
// decrement-and-branch instructions, and thus need to stay live across
// multiple basic blocks.
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCSchedule.td b/contrib/llvm/lib/Target/PowerPC/PPCSchedule.td
index c8fe7d7eea78..4fa29d96ca14 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCSchedule.td
+++ b/contrib/llvm/lib/Target/PowerPC/PPCSchedule.td
@@ -1,9 +1,8 @@
//===-- PPCSchedule.td - PowerPC Scheduling Definitions ----*- tablegen -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -106,6 +105,7 @@ def IIC_VecVSL : InstrItinClass;
def IIC_VecVSR : InstrItinClass;
def IIC_SprMTMSRD : InstrItinClass;
def IIC_SprSLIE : InstrItinClass;
+def IIC_SprSLBFEE : InstrItinClass;
def IIC_SprSLBIE : InstrItinClass;
def IIC_SprSLBIEG : InstrItinClass;
def IIC_SprSLBMTE : InstrItinClass;
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCSchedule440.td b/contrib/llvm/lib/Target/PowerPC/PPCSchedule440.td
index 646822eedbe0..708261fc7cc8 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCSchedule440.td
+++ b/contrib/llvm/lib/Target/PowerPC/PPCSchedule440.td
@@ -1,9 +1,8 @@
//===-- PPCSchedule440.td - PPC 440 Scheduling Definitions -*- tablegen -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCScheduleA2.td b/contrib/llvm/lib/Target/PowerPC/PPCScheduleA2.td
index f34c1accc0fd..c2b298524e00 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCScheduleA2.td
+++ b/contrib/llvm/lib/Target/PowerPC/PPCScheduleA2.td
@@ -1,9 +1,8 @@
//===- PPCScheduleA2.td - PPC A2 Scheduling Definitions --*- tablegen -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCScheduleE500.td b/contrib/llvm/lib/Target/PowerPC/PPCScheduleE500.td
index 479a970b2537..74744dda54f7 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCScheduleE500.td
+++ b/contrib/llvm/lib/Target/PowerPC/PPCScheduleE500.td
@@ -1,9 +1,8 @@
//===-- PPCScheduleE500.td - e500 Scheduling Defs ------*- tablegen -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCScheduleE500mc.td b/contrib/llvm/lib/Target/PowerPC/PPCScheduleE500mc.td
index d8bda073833f..1a1c041565b6 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCScheduleE500mc.td
+++ b/contrib/llvm/lib/Target/PowerPC/PPCScheduleE500mc.td
@@ -1,9 +1,8 @@
//===-- PPCScheduleE500mc.td - e500mc Scheduling Defs ------*- tablegen -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCScheduleE5500.td b/contrib/llvm/lib/Target/PowerPC/PPCScheduleE5500.td
index 3e50803955c4..4480d7fba4fb 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCScheduleE5500.td
+++ b/contrib/llvm/lib/Target/PowerPC/PPCScheduleE5500.td
@@ -1,9 +1,8 @@
//===-- PPCScheduleE500mc.td - e5500 Scheduling Defs -------*- tablegen -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCScheduleG3.td b/contrib/llvm/lib/Target/PowerPC/PPCScheduleG3.td
index 0995b7200d93..8f1907f2c016 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCScheduleG3.td
+++ b/contrib/llvm/lib/Target/PowerPC/PPCScheduleG3.td
@@ -1,9 +1,8 @@
//===-- PPCScheduleG3.td - PPC G3 Scheduling Definitions ---*- tablegen -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCScheduleG4.td b/contrib/llvm/lib/Target/PowerPC/PPCScheduleG4.td
index 1b15c7b3c7ad..0eabc49d7841 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCScheduleG4.td
+++ b/contrib/llvm/lib/Target/PowerPC/PPCScheduleG4.td
@@ -1,9 +1,8 @@
//===-- PPCScheduleG4.td - PPC G4 Scheduling Definitions ---*- tablegen -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCScheduleG4Plus.td b/contrib/llvm/lib/Target/PowerPC/PPCScheduleG4Plus.td
index 0044c3c6a449..9c84aec638d7 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCScheduleG4Plus.td
+++ b/contrib/llvm/lib/Target/PowerPC/PPCScheduleG4Plus.td
@@ -1,9 +1,8 @@
//===-- PPCScheduleG4Plus.td - PPC G4+ Scheduling Defs. ----*- tablegen -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCScheduleG5.td b/contrib/llvm/lib/Target/PowerPC/PPCScheduleG5.td
index c802b80170fb..087073537796 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCScheduleG5.td
+++ b/contrib/llvm/lib/Target/PowerPC/PPCScheduleG5.td
@@ -1,9 +1,8 @@
//===-- PPCScheduleG5.td - PPC G5 Scheduling Definitions ---*- tablegen -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCScheduleP7.td b/contrib/llvm/lib/Target/PowerPC/PPCScheduleP7.td
index 1d6e509819da..5a8c1eb2b837 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCScheduleP7.td
+++ b/contrib/llvm/lib/Target/PowerPC/PPCScheduleP7.td
@@ -1,9 +1,8 @@
//===-- PPCScheduleP7.td - PPC P7 Scheduling Definitions ---*- tablegen -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCScheduleP8.td b/contrib/llvm/lib/Target/PowerPC/PPCScheduleP8.td
index ff39dfda7016..70a58f42a98a 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCScheduleP8.td
+++ b/contrib/llvm/lib/Target/PowerPC/PPCScheduleP8.td
@@ -1,9 +1,8 @@
//===-- PPCScheduleP8.td - PPC P8 Scheduling Definitions ---*- tablegen -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCScheduleP9.td b/contrib/llvm/lib/Target/PowerPC/PPCScheduleP9.td
index a1e625c855e0..6a79cca89194 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCScheduleP9.td
+++ b/contrib/llvm/lib/Target/PowerPC/PPCScheduleP9.td
@@ -1,9 +1,8 @@
//===-- PPCScheduleP9.td - PPC P9 Scheduling Definitions ---*- tablegen -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -51,8 +50,21 @@ let SchedModel = P9Model in {
// ***************** Processor Resources *****************
- //Dispatcher:
- def DISPATCHER : ProcResource<12>;
+ // Dispatcher slots:
+ // x0, x1, x2, and x3 are the dedicated slice dispatch ports, where each
+ // corresponds to one of the four execution slices.
+ def DISPx02 : ProcResource<2>;
+ def DISPx13 : ProcResource<2>;
+ // The xa and xb ports can be used to send an iop to either of the two slices
+ // of the superslice, but are restricted to iops with only two primary sources.
+ def DISPxab : ProcResource<2>;
+ // b0 and b1 are dedicated dispatch ports into the branch slice.
+ def DISPb01 : ProcResource<2>;
+
+ // Any non BR dispatch ports
+ def DISP_NBR
+ : ProcResGroup<[ DISPx02, DISPx13, DISPxab]>;
+ def DISP_SS : ProcResGroup<[ DISPx02, DISPx13]>;
// Issue Ports
// An instruction can go down one of two issue queues.
@@ -117,8 +129,37 @@ let SchedModel = P9Model in {
// ***************** SchedWriteRes Definitions *****************
- //Dispatcher
- def DISP_1C : SchedWriteRes<[DISPATCHER]> {
+ // Dispatcher
+ // Dispatch Rules: '-' or 'V'
+ // Vector ('V') - vector iops (128-bit operand) take only one decode and
+ // dispatch slot but are dispatched to both the even and odd slices of a
+ // superslice.
+ def DISP_1C : SchedWriteRes<[DISP_NBR]> {
+ let NumMicroOps = 0;
+ let Latency = 1;
+ }
+ // Dispatch Rules: 'E'
+ // Even slice ('E')- certain operations must be sent only to an even slice.
+ // Also consumes odd dispatch slice slot of the same superslice at dispatch
+ def DISP_EVEN_1C : SchedWriteRes<[ DISPx02, DISPx13 ]> {
+ let NumMicroOps = 0;
+ let Latency = 1;
+ }
+ // Dispatch Rules: 'P'
+ // Paired ('P') - certain cracked and expanded iops are paired such that they
+ // must dispatch together to the same superslice.
+ def DISP_PAIR_1C : SchedWriteRes<[ DISP_SS, DISP_SS]> {
+ let NumMicroOps = 0;
+ let Latency = 1;
+ }
+ // Tuple Restricted ('R') - certain iops preclude dispatching more than one
+ // operation per slice for the super- slice to which they are dispatched
+ def DISP_3SLOTS_1C : SchedWriteRes<[DISPx02, DISPx13, DISPxab]> {
+ let NumMicroOps = 0;
+ let Latency = 1;
+ }
+ // Each execution and branch slice can receive up to two iops per cycle
+ def DISP_BR_1C : SchedWriteRes<[ DISPxab ]> {
let NumMicroOps = 0;
let Latency = 1;
}
@@ -148,7 +189,7 @@ let SchedModel = P9Model in {
// ALU Units
// An ALU may take either 2 or 3 cycles to complete the operation.
- // However, the ALU unit is only every busy for 1 cycle at a time and may
+ // However, the ALU unit is only ever busy for 1 cycle at a time and may
// receive new instructions each cycle.
def P9_ALU_2C : SchedWriteRes<[ALU]> {
let Latency = 2;
@@ -203,10 +244,6 @@ let SchedModel = P9Model in {
// DP Unit
// A DP unit may take from 2 to 36 cycles to complete.
// Some DP operations keep the unit busy for up to 10 cycles.
- def P9_DP_2C : SchedWriteRes<[DP]> {
- let Latency = 2;
- }
-
def P9_DP_5C : SchedWriteRes<[DP]> {
let Latency = 5;
}
@@ -228,11 +265,6 @@ let SchedModel = P9Model in {
let Latency = 22;
}
- def P9_DP_24C_8 : SchedWriteRes<[DP]> {
- let ResourceCycles = [8];
- let Latency = 24;
- }
-
def P9_DPO_24C_8 : SchedWriteRes<[DPO]> {
let ResourceCycles = [8];
let Latency = 24;
@@ -248,11 +280,6 @@ let SchedModel = P9Model in {
let Latency = 22;
}
- def P9_DP_27C_7 : SchedWriteRes<[DP]> {
- let ResourceCycles = [7];
- let Latency = 27;
- }
-
def P9_DPE_27C_10 : SchedWriteRes<[DP]> {
let ResourceCycles = [10];
let Latency = 27;
@@ -383,16 +410,12 @@ let SchedModel = P9Model in {
def P9_IntDivAndALUOp_26C_8 : WriteSequence<[P9_DIV_24C_8, P9_ALU_2C]>;
def P9_IntDivAndALUOp_42C_8 : WriteSequence<[P9_DIV_40C_8, P9_ALU_2C]>;
def P9_StoreAndALUOp_3C : WriteSequence<[P9_LS_1C, P9_ALU_2C]>;
- def P9_StoreAndALUOp_4C : WriteSequence<[P9_LS_1C, P9_ALU_3C]>;
def P9_ALUOpAndALUOp_4C : WriteSequence<[P9_ALU_2C, P9_ALU_2C]>;
def P9_ALU2OpAndALU2Op_6C : WriteSequence<[P9_ALU_3C, P9_ALU_3C]>;
def P9_ALUOpAndALUOpAndALUOp_6C :
WriteSequence<[P9_ALU_2C, P9_ALU_2C, P9_ALU_2C]>;
def P9_DPOpAndALUOp_7C : WriteSequence<[P9_DP_5C, P9_ALU_2C]>;
- def P9_DPOpAndALUOp_9C : WriteSequence<[P9_DP_7C, P9_ALU_2C]>;
def P9_DPOpAndALU2Op_10C : WriteSequence<[P9_DP_7C, P9_ALU_3C]>;
- def P9_DPOpAndALUOp_24C_5 : WriteSequence<[P9_DP_22C_5, P9_ALU_2C]>;
- def P9_DPOpAndALUOp_35C_8 : WriteSequence<[P9_DP_33C_8, P9_ALU_2C]>;
def P9_DPOpAndALU2Op_25C_5 : WriteSequence<[P9_DP_22C_5, P9_ALU_3C]>;
def P9_DPOpAndALU2Op_29C_5 : WriteSequence<[P9_DP_26C_5, P9_ALU_3C]>;
def P9_DPOpAndALU2Op_36C_8 : WriteSequence<[P9_DP_33C_8, P9_ALU_3C]>;
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCSubtarget.cpp b/contrib/llvm/lib/Target/PowerPC/PPCSubtarget.cpp
index 57244ddff552..6239decf1539 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCSubtarget.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/PPCSubtarget.cpp
@@ -1,9 +1,8 @@
//===-- PowerPCSubtarget.cpp - PPC Subtarget Information ------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -40,6 +39,11 @@ static cl::opt<bool> QPXStackUnaligned("qpx-stack-unaligned",
cl::desc("Even when QPX is enabled the stack is not 32-byte aligned"),
cl::Hidden);
+static cl::opt<bool>
+ EnableMachinePipeliner("ppc-enable-pipeliner",
+ cl::desc("Enable Machine Pipeliner for PPC"),
+ cl::init(false), cl::Hidden);
+
PPCSubtarget &PPCSubtarget::initializeSubtargetDependencies(StringRef CPU,
StringRef FS) {
initializeEnvironment();
@@ -68,6 +72,7 @@ void PPCSubtarget::initializeEnvironment() {
HasFPU = false;
HasQPX = false;
HasVSX = false;
+ NeedsTwoConstNR = false;
HasP8Vector = false;
HasP8Altivec = false;
HasP8Crypto = false;
@@ -103,11 +108,13 @@ void PPCSubtarget::initializeEnvironment() {
HasDirectMove = false;
IsQPXStackUnaligned = false;
HasHTM = false;
- HasFusion = false;
HasFloat128 = false;
IsISA3_0 = false;
UseLongCalls = false;
SecurePlt = false;
+ VectorsUseTwoUnits = false;
+ UsePPCPreRASchedStrategy = false;
+ UsePPCPostRASchedStrategy = false;
HasPOPCNTD = POPCNTD_Unavailable;
}
@@ -138,8 +145,9 @@ void PPCSubtarget::initSubtargetFeatures(StringRef CPU, StringRef FS) {
if (isDarwin())
HasLazyResolverStubs = true;
- if ((TargetTriple.isOSFreeBSD() && TargetTriple.getOSMajorVersion() >= 13)
- || TargetTriple.isOSNetBSD() || TargetTriple.isOSOpenBSD())
+ if ((TargetTriple.isOSFreeBSD() && TargetTriple.getOSMajorVersion() >= 13) ||
+ TargetTriple.isOSNetBSD() || TargetTriple.isOSOpenBSD() ||
+ TargetTriple.isMusl())
SecurePlt = true;
if (HasSPE && IsPPC64)
@@ -179,10 +187,14 @@ bool PPCSubtarget::hasLazyResolverStub(const GlobalValue *GV) const {
return false;
}
-bool PPCSubtarget::enableMachineScheduler() const {
- return true;
+bool PPCSubtarget::enableMachineScheduler() const { return true; }
+
+bool PPCSubtarget::enableMachinePipeliner() const {
+ return (DarwinDirective == PPC::DIR_PWR9) && EnableMachinePipeliner;
}
+bool PPCSubtarget::useDFAforSMS() const { return false; }
+
// This overrides the PostRAScheduler bit in the SchedModel for each CPU.
bool PPCSubtarget::enablePostRAScheduler() const { return true; }
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCSubtarget.h b/contrib/llvm/lib/Target/PowerPC/PPCSubtarget.h
index c56f254d6bec..55fec1cb6d99 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCSubtarget.h
+++ b/contrib/llvm/lib/Target/PowerPC/PPCSubtarget.h
@@ -1,9 +1,8 @@
//===-- PPCSubtarget.h - Define Subtarget for the PPC ----------*- C++ -*--===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -99,6 +98,7 @@ protected:
bool HasSPE;
bool HasQPX;
bool HasVSX;
+ bool NeedsTwoConstNR;
bool HasP8Vector;
bool HasP8Altivec;
bool HasP8Crypto;
@@ -131,11 +131,13 @@ protected:
bool HasPartwordAtomics;
bool HasDirectMove;
bool HasHTM;
- bool HasFusion;
bool HasFloat128;
bool IsISA3_0;
bool UseLongCalls;
bool SecurePlt;
+ bool VectorsUseTwoUnits;
+ bool UsePPCPreRASchedStrategy;
+ bool UsePPCPostRASchedStrategy;
POPCNTDKind HasPOPCNTD;
@@ -244,6 +246,7 @@ public:
bool hasFPU() const { return HasFPU; }
bool hasQPX() const { return HasQPX; }
bool hasVSX() const { return HasVSX; }
+ bool needsTwoConstNR() const { return NeedsTwoConstNR; }
bool hasP8Vector() const { return HasP8Vector; }
bool hasP8Altivec() const { return HasP8Altivec; }
bool hasP8Crypto() const { return HasP8Crypto; }
@@ -260,6 +263,7 @@ public:
bool isPPC4xx() const { return IsPPC4xx; }
bool isPPC6xx() const { return IsPPC6xx; }
bool isSecurePlt() const {return SecurePlt; }
+ bool vectorsUseTwoUnits() const {return VectorsUseTwoUnits; }
bool isE500() const { return IsE500; }
bool isFeatureMFTB() const { return FeatureMFTB; }
bool isDeprecatedDST() const { return DeprecatedDST; }
@@ -267,6 +271,8 @@ public:
bool hasInvariantFunctionDescriptors() const {
return HasInvariantFunctionDescriptors;
}
+ bool usePPCPreRASchedStrategy() const { return UsePPCPreRASchedStrategy; }
+ bool usePPCPostRASchedStrategy() const { return UsePPCPostRASchedStrategy; }
bool hasPartwordAtomics() const { return HasPartwordAtomics; }
bool hasDirectMove() const { return HasDirectMove; }
@@ -285,7 +291,6 @@ public:
}
bool hasHTM() const { return HasHTM; }
- bool hasFusion() const { return HasFusion; }
bool hasFloat128() const { return HasFloat128; }
bool isISA3_0() const { return IsISA3_0; }
bool useLongCalls() const { return UseLongCalls; }
@@ -307,16 +312,21 @@ public:
bool isTargetLinux() const { return TargetTriple.isOSLinux(); }
bool isDarwinABI() const { return isTargetMachO() || isDarwin(); }
- bool isSVR4ABI() const { return !isDarwinABI(); }
+ bool isAIXABI() const { return TargetTriple.isOSAIX(); }
+ bool isSVR4ABI() const { return !isDarwinABI() && !isAIXABI(); }
bool isELFv2ABI() const;
/// Originally, this function return hasISEL(). Now we always enable it,
/// but may expand the ISEL instruction later.
bool enableEarlyIfConversion() const override { return true; }
- // Scheduling customization.
+ /// Scheduling customization.
bool enableMachineScheduler() const override;
- // This overrides the PostRAScheduler bit in the SchedModel for each CPU.
+ /// Pipeliner customization.
+ bool enableMachinePipeliner() const override;
+ /// Machine Pipeliner customization
+ bool useDFAforSMS() const override;
+ /// This overrides the PostRAScheduler bit in the SchedModel for each CPU.
bool enablePostRAScheduler() const override;
AntiDepBreakMode getAntiDepBreakMode() const override;
void getCriticalPathRCs(RegClassVector &CriticalPathRCs) const override;
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCTLSDynamicCall.cpp b/contrib/llvm/lib/Target/PowerPC/PPCTLSDynamicCall.cpp
index ac36abbe8439..fb826c4a32f1 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCTLSDynamicCall.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/PPCTLSDynamicCall.cpp
@@ -1,9 +1,8 @@
//===---------- PPCTLSDynamicCall.cpp - TLS Dynamic Call Fixup ------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -35,10 +34,6 @@ using namespace llvm;
#define DEBUG_TYPE "ppc-tls-dynamic-call"
-namespace llvm {
- void initializePPCTLSDynamicCallPass(PassRegistry&);
-}
-
namespace {
struct PPCTLSDynamicCall : public MachineFunctionPass {
static char ID;
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCTOCRegDeps.cpp b/contrib/llvm/lib/Target/PowerPC/PPCTOCRegDeps.cpp
index 17345b6ca8d3..3eb0569fb955 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCTOCRegDeps.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/PPCTOCRegDeps.cpp
@@ -1,9 +1,8 @@
//===-- PPCTOCRegDeps.cpp - Add Extra TOC Register Dependencies -----------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -83,10 +82,6 @@ using namespace llvm;
#define DEBUG_TYPE "ppc-toc-reg-deps"
-namespace llvm {
- void initializePPCTOCRegDepsPass(PassRegistry&);
-}
-
namespace {
// PPCTOCRegDeps pass - For simple functions without epilogue code, move
// returns up, and create conditional returns, to avoid unnecessary
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp b/contrib/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp
index 580d057602f5..ce00f848dd72 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp
@@ -1,9 +1,8 @@
//===-- PPCTargetMachine.cpp - Define TargetMachine for PowerPC -----------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -14,9 +13,11 @@
#include "PPCTargetMachine.h"
#include "MCTargetDesc/PPCMCTargetDesc.h"
#include "PPC.h"
+#include "PPCMachineScheduler.h"
#include "PPCSubtarget.h"
#include "PPCTargetObjectFile.h"
#include "PPCTargetTransformInfo.h"
+#include "TargetInfo/PowerPCTargetInfo.h"
#include "llvm/ADT/Optional.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/StringRef.h"
@@ -100,6 +101,19 @@ extern "C" void LLVMInitializePowerPCTarget() {
RegisterTargetMachine<PPCTargetMachine> C(getThePPC64LETarget());
PassRegistry &PR = *PassRegistry::getPassRegistry();
+#ifndef NDEBUG
+ initializePPCCTRLoopsVerifyPass(PR);
+#endif
+ initializePPCLoopPreIncPrepPass(PR);
+ initializePPCTOCRegDepsPass(PR);
+ initializePPCEarlyReturnPass(PR);
+ initializePPCVSXCopyPass(PR);
+ initializePPCVSXFMAMutatePass(PR);
+ initializePPCVSXSwapRemovalPass(PR);
+ initializePPCReduceCRLogicalsPass(PR);
+ initializePPCBSelPass(PR);
+ initializePPCBranchCoalescingPass(PR);
+ initializePPCQPXLoadSplatPass(PR);
initializePPCBoolRetToIntPass(PR);
initializePPCExpandISELPass(PR);
initializePPCPreEmitPeepholePass(PR);
@@ -199,6 +213,8 @@ static PPCTargetMachine::PPCABI computeTargetABI(const Triple &TT,
case Triple::ppc64le:
return PPCTargetMachine::PPC_ABI_ELFv2;
case Triple::ppc64:
+ if (TT.getEnvironment() == llvm::Triple::ELFv2)
+ return PPCTargetMachine::PPC_ABI_ELFv2;
return PPCTargetMachine::PPC_ABI_ELFv1;
default:
return PPCTargetMachine::PPC_ABI_UNKNOWN;
@@ -227,9 +243,9 @@ static CodeModel::Model getEffectivePPCCodeModel(const Triple &TT,
bool JIT) {
if (CM) {
if (*CM == CodeModel::Tiny)
- report_fatal_error("Target does not support the tiny CodeModel");
+ report_fatal_error("Target does not support the tiny CodeModel", false);
if (*CM == CodeModel::Kernel)
- report_fatal_error("Target does not support the kernel CodeModel");
+ report_fatal_error("Target does not support the kernel CodeModel", false);
return *CM;
}
if (!TT.isOSDarwin() && !JIT &&
@@ -238,6 +254,29 @@ static CodeModel::Model getEffectivePPCCodeModel(const Triple &TT,
return CodeModel::Small;
}
+
+static ScheduleDAGInstrs *createPPCMachineScheduler(MachineSchedContext *C) {
+ const PPCSubtarget &ST = C->MF->getSubtarget<PPCSubtarget>();
+ ScheduleDAGMILive *DAG =
+ new ScheduleDAGMILive(C, ST.usePPCPreRASchedStrategy() ?
+ llvm::make_unique<PPCPreRASchedStrategy>(C) :
+ llvm::make_unique<GenericScheduler>(C));
+ // add DAG Mutations here.
+ DAG->addMutation(createCopyConstrainDAGMutation(DAG->TII, DAG->TRI));
+ return DAG;
+}
+
+static ScheduleDAGInstrs *createPPCPostMachineScheduler(
+ MachineSchedContext *C) {
+ const PPCSubtarget &ST = C->MF->getSubtarget<PPCSubtarget>();
+ ScheduleDAGMI *DAG =
+ new ScheduleDAGMI(C, ST.usePPCPostRASchedStrategy() ?
+ llvm::make_unique<PPCPostRASchedStrategy>(C) :
+ llvm::make_unique<PostGenericScheduler>(C), true);
+ // add DAG Mutations here.
+ return DAG;
+}
+
// The FeatureString here is a little subtle. We are modifying the feature
// string with what are (currently) non-function specific overrides as it goes
// into the LLVMTargetMachine constructor and then using the stored value in the
@@ -331,6 +370,14 @@ public:
void addPreRegAlloc() override;
void addPreSched2() override;
void addPreEmitPass() override;
+ ScheduleDAGInstrs *
+ createMachineScheduler(MachineSchedContext *C) const override {
+ return createPPCMachineScheduler(C);
+ }
+ ScheduleDAGInstrs *
+ createPostMachineScheduler(MachineSchedContext *C) const override {
+ return createPPCPostMachineScheduler(C);
+ }
};
} // end anonymous namespace
@@ -374,7 +421,7 @@ bool PPCPassConfig::addPreISel() {
addPass(createPPCLoopPreIncPrepPass(getPPCTargetMachine()));
if (!DisableCTRLoops && getOptLevel() != CodeGenOpt::None)
- addPass(createPPCCTRLoops());
+ addPass(createHardwareLoopsPass());
return false;
}
@@ -441,6 +488,9 @@ void PPCPassConfig::addPreRegAlloc() {
}
if (EnableExtraTOCRegDeps)
addPass(createPPCTOCRegDepsPass());
+
+ if (getOptLevel() != CodeGenOpt::None)
+ addPass(&MachinePipelinerID);
}
void PPCPassConfig::addPreSched2() {
@@ -469,3 +519,13 @@ TargetTransformInfo
PPCTargetMachine::getTargetTransformInfo(const Function &F) {
return TargetTransformInfo(PPCTTIImpl(this, F));
}
+
+static MachineSchedRegistry
+PPCPreRASchedRegistry("ppc-prera",
+ "Run PowerPC PreRA specific scheduler",
+ createPPCMachineScheduler);
+
+static MachineSchedRegistry
+PPCPostRASchedRegistry("ppc-postra",
+ "Run PowerPC PostRA specific scheduler",
+ createPPCPostMachineScheduler);
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCTargetMachine.h b/contrib/llvm/lib/Target/PowerPC/PPCTargetMachine.h
index 75b98a815ab4..fd1d14ae32d4 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCTargetMachine.h
+++ b/contrib/llvm/lib/Target/PowerPC/PPCTargetMachine.h
@@ -1,9 +1,8 @@
//===-- PPCTargetMachine.h - Define TargetMachine for PowerPC ---*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -59,10 +58,6 @@ public:
const Triple &TT = getTargetTriple();
return (TT.getArch() == Triple::ppc64 || TT.getArch() == Triple::ppc64le);
};
-
- bool isMachineVerifierClean() const override {
- return false;
- }
};
} // end namespace llvm
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCTargetObjectFile.cpp b/contrib/llvm/lib/Target/PowerPC/PPCTargetObjectFile.cpp
index a049dc3fda93..e237fab1b267 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCTargetObjectFile.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/PPCTargetObjectFile.cpp
@@ -1,9 +1,8 @@
//===-- PPCTargetObjectFile.cpp - PPC Object Info -------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCTargetObjectFile.h b/contrib/llvm/lib/Target/PowerPC/PPCTargetObjectFile.h
index 417b8ed0d612..78a5840c87c7 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCTargetObjectFile.h
+++ b/contrib/llvm/lib/Target/PowerPC/PPCTargetObjectFile.h
@@ -1,9 +1,8 @@
//===-- PPCTargetObjectFile.h - PPC Object Info -----------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCTargetStreamer.h b/contrib/llvm/lib/Target/PowerPC/PPCTargetStreamer.h
index 310fea9ef09f..e17361d997fd 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCTargetStreamer.h
+++ b/contrib/llvm/lib/Target/PowerPC/PPCTargetStreamer.h
@@ -1,9 +1,8 @@
//===- PPCTargetStreamer.h - PPC Target Streamer ----------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp b/contrib/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
index bc9bcab83a0a..ff3dfbfaca05 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
@@ -1,17 +1,18 @@
//===-- PPCTargetTransformInfo.cpp - PPC specific TTI ---------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#include "PPCTargetTransformInfo.h"
+#include "llvm/Analysis/CodeMetrics.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/CodeGen/BasicTTIImpl.h"
#include "llvm/CodeGen/CostTable.h"
#include "llvm/CodeGen/TargetLowering.h"
+#include "llvm/CodeGen/TargetSchedule.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
using namespace llvm;
@@ -32,6 +33,13 @@ EnablePPCColdCC("ppc-enable-coldcc", cl::Hidden, cl::init(false),
cl::desc("Enable using coldcc calling conv for cold "
"internal functions"));
+// The latency of mtctr is only justified if there are more than 4
+// comparisons that will be removed as a result.
+static cl::opt<unsigned>
+SmallCTRLoopThreshold("min-ctr-loop-threshold", cl::init(4), cl::Hidden,
+ cl::desc("Loops with a constant trip count smaller than "
+ "this value will not use the count register."));
+
//===----------------------------------------------------------------------===//
//
// PPC cost model.
@@ -205,6 +213,341 @@ unsigned PPCTTIImpl::getUserCost(const User *U,
return BaseT::getUserCost(U, Operands);
}
+bool PPCTTIImpl::mightUseCTR(BasicBlock *BB,
+ TargetLibraryInfo *LibInfo) {
+ const PPCTargetMachine &TM = ST->getTargetMachine();
+
+ // Loop through the inline asm constraints and look for something that
+ // clobbers ctr.
+ auto asmClobbersCTR = [](InlineAsm *IA) {
+ InlineAsm::ConstraintInfoVector CIV = IA->ParseConstraints();
+ for (unsigned i = 0, ie = CIV.size(); i < ie; ++i) {
+ InlineAsm::ConstraintInfo &C = CIV[i];
+ if (C.Type != InlineAsm::isInput)
+ for (unsigned j = 0, je = C.Codes.size(); j < je; ++j)
+ if (StringRef(C.Codes[j]).equals_lower("{ctr}"))
+ return true;
+ }
+ return false;
+ };
+
+ // Determining the address of a TLS variable results in a function call in
+ // certain TLS models.
+ std::function<bool(const Value*)> memAddrUsesCTR =
+ [&memAddrUsesCTR, &TM](const Value *MemAddr) -> bool {
+ const auto *GV = dyn_cast<GlobalValue>(MemAddr);
+ if (!GV) {
+ // Recurse to check for constants that refer to TLS global variables.
+ if (const auto *CV = dyn_cast<Constant>(MemAddr))
+ for (const auto &CO : CV->operands())
+ if (memAddrUsesCTR(CO))
+ return true;
+
+ return false;
+ }
+
+ if (!GV->isThreadLocal())
+ return false;
+ TLSModel::Model Model = TM.getTLSModel(GV);
+ return Model == TLSModel::GeneralDynamic ||
+ Model == TLSModel::LocalDynamic;
+ };
+
+ auto isLargeIntegerTy = [](bool Is32Bit, Type *Ty) {
+ if (IntegerType *ITy = dyn_cast<IntegerType>(Ty))
+ return ITy->getBitWidth() > (Is32Bit ? 32U : 64U);
+
+ return false;
+ };
+
+ for (BasicBlock::iterator J = BB->begin(), JE = BB->end();
+ J != JE; ++J) {
+ if (CallInst *CI = dyn_cast<CallInst>(J)) {
+ // Inline ASM is okay, unless it clobbers the ctr register.
+ if (InlineAsm *IA = dyn_cast<InlineAsm>(CI->getCalledValue())) {
+ if (asmClobbersCTR(IA))
+ return true;
+ continue;
+ }
+
+ if (Function *F = CI->getCalledFunction()) {
+ // Most intrinsics don't become function calls, but some might.
+ // sin, cos, exp and log are always calls.
+ unsigned Opcode = 0;
+ if (F->getIntrinsicID() != Intrinsic::not_intrinsic) {
+ switch (F->getIntrinsicID()) {
+ default: continue;
+ // If we have a call to ppc_is_decremented_ctr_nonzero, or ppc_mtctr
+ // we're definitely using CTR.
+ case Intrinsic::set_loop_iterations:
+ case Intrinsic::loop_decrement:
+ return true;
+
+// VisualStudio defines setjmp as _setjmp
+#if defined(_MSC_VER) && defined(setjmp) && \
+ !defined(setjmp_undefined_for_msvc)
+# pragma push_macro("setjmp")
+# undef setjmp
+# define setjmp_undefined_for_msvc
+#endif
+
+ case Intrinsic::setjmp:
+
+#if defined(_MSC_VER) && defined(setjmp_undefined_for_msvc)
+ // let's return it to _setjmp state
+# pragma pop_macro("setjmp")
+# undef setjmp_undefined_for_msvc
+#endif
+
+ case Intrinsic::longjmp:
+
+ // Exclude eh_sjlj_setjmp; we don't need to exclude eh_sjlj_longjmp
+ // because, although it does clobber the counter register, the
+ // control can't then return to inside the loop unless there is also
+ // an eh_sjlj_setjmp.
+ case Intrinsic::eh_sjlj_setjmp:
+
+ case Intrinsic::memcpy:
+ case Intrinsic::memmove:
+ case Intrinsic::memset:
+ case Intrinsic::powi:
+ case Intrinsic::log:
+ case Intrinsic::log2:
+ case Intrinsic::log10:
+ case Intrinsic::exp:
+ case Intrinsic::exp2:
+ case Intrinsic::pow:
+ case Intrinsic::sin:
+ case Intrinsic::cos:
+ return true;
+ case Intrinsic::copysign:
+ if (CI->getArgOperand(0)->getType()->getScalarType()->
+ isPPC_FP128Ty())
+ return true;
+ else
+ continue; // ISD::FCOPYSIGN is never a library call.
+ case Intrinsic::sqrt: Opcode = ISD::FSQRT; break;
+ case Intrinsic::floor: Opcode = ISD::FFLOOR; break;
+ case Intrinsic::ceil: Opcode = ISD::FCEIL; break;
+ case Intrinsic::trunc: Opcode = ISD::FTRUNC; break;
+ case Intrinsic::rint: Opcode = ISD::FRINT; break;
+ case Intrinsic::nearbyint: Opcode = ISD::FNEARBYINT; break;
+ case Intrinsic::round: Opcode = ISD::FROUND; break;
+ case Intrinsic::minnum: Opcode = ISD::FMINNUM; break;
+ case Intrinsic::maxnum: Opcode = ISD::FMAXNUM; break;
+ case Intrinsic::umul_with_overflow: Opcode = ISD::UMULO; break;
+ case Intrinsic::smul_with_overflow: Opcode = ISD::SMULO; break;
+ }
+ }
+
+ // PowerPC does not use [US]DIVREM or other library calls for
+ // operations on regular types which are not otherwise library calls
+ // (i.e. soft float or atomics). If adapting for targets that do,
+ // additional care is required here.
+
+ LibFunc Func;
+ if (!F->hasLocalLinkage() && F->hasName() && LibInfo &&
+ LibInfo->getLibFunc(F->getName(), Func) &&
+ LibInfo->hasOptimizedCodeGen(Func)) {
+ // Non-read-only functions are never treated as intrinsics.
+ if (!CI->onlyReadsMemory())
+ return true;
+
+ // Conversion happens only for FP calls.
+ if (!CI->getArgOperand(0)->getType()->isFloatingPointTy())
+ return true;
+
+ switch (Func) {
+ default: return true;
+ case LibFunc_copysign:
+ case LibFunc_copysignf:
+ continue; // ISD::FCOPYSIGN is never a library call.
+ case LibFunc_copysignl:
+ return true;
+ case LibFunc_fabs:
+ case LibFunc_fabsf:
+ case LibFunc_fabsl:
+ continue; // ISD::FABS is never a library call.
+ case LibFunc_sqrt:
+ case LibFunc_sqrtf:
+ case LibFunc_sqrtl:
+ Opcode = ISD::FSQRT; break;
+ case LibFunc_floor:
+ case LibFunc_floorf:
+ case LibFunc_floorl:
+ Opcode = ISD::FFLOOR; break;
+ case LibFunc_nearbyint:
+ case LibFunc_nearbyintf:
+ case LibFunc_nearbyintl:
+ Opcode = ISD::FNEARBYINT; break;
+ case LibFunc_ceil:
+ case LibFunc_ceilf:
+ case LibFunc_ceill:
+ Opcode = ISD::FCEIL; break;
+ case LibFunc_rint:
+ case LibFunc_rintf:
+ case LibFunc_rintl:
+ Opcode = ISD::FRINT; break;
+ case LibFunc_round:
+ case LibFunc_roundf:
+ case LibFunc_roundl:
+ Opcode = ISD::FROUND; break;
+ case LibFunc_trunc:
+ case LibFunc_truncf:
+ case LibFunc_truncl:
+ Opcode = ISD::FTRUNC; break;
+ case LibFunc_fmin:
+ case LibFunc_fminf:
+ case LibFunc_fminl:
+ Opcode = ISD::FMINNUM; break;
+ case LibFunc_fmax:
+ case LibFunc_fmaxf:
+ case LibFunc_fmaxl:
+ Opcode = ISD::FMAXNUM; break;
+ }
+ }
+
+ if (Opcode) {
+ EVT EVTy =
+ TLI->getValueType(DL, CI->getArgOperand(0)->getType(), true);
+
+ if (EVTy == MVT::Other)
+ return true;
+
+ if (TLI->isOperationLegalOrCustom(Opcode, EVTy))
+ continue;
+ else if (EVTy.isVector() &&
+ TLI->isOperationLegalOrCustom(Opcode, EVTy.getScalarType()))
+ continue;
+
+ return true;
+ }
+ }
+
+ return true;
+ } else if (isa<BinaryOperator>(J) &&
+ J->getType()->getScalarType()->isPPC_FP128Ty()) {
+ // Most operations on ppc_f128 values become calls.
+ return true;
+ } else if (isa<UIToFPInst>(J) || isa<SIToFPInst>(J) ||
+ isa<FPToUIInst>(J) || isa<FPToSIInst>(J)) {
+ CastInst *CI = cast<CastInst>(J);
+ if (CI->getSrcTy()->getScalarType()->isPPC_FP128Ty() ||
+ CI->getDestTy()->getScalarType()->isPPC_FP128Ty() ||
+ isLargeIntegerTy(!TM.isPPC64(), CI->getSrcTy()->getScalarType()) ||
+ isLargeIntegerTy(!TM.isPPC64(), CI->getDestTy()->getScalarType()))
+ return true;
+ } else if (isLargeIntegerTy(!TM.isPPC64(),
+ J->getType()->getScalarType()) &&
+ (J->getOpcode() == Instruction::UDiv ||
+ J->getOpcode() == Instruction::SDiv ||
+ J->getOpcode() == Instruction::URem ||
+ J->getOpcode() == Instruction::SRem)) {
+ return true;
+ } else if (!TM.isPPC64() &&
+ isLargeIntegerTy(false, J->getType()->getScalarType()) &&
+ (J->getOpcode() == Instruction::Shl ||
+ J->getOpcode() == Instruction::AShr ||
+ J->getOpcode() == Instruction::LShr)) {
+ // Only on PPC32, for 128-bit integers (specifically not 64-bit
+ // integers), these might be runtime calls.
+ return true;
+ } else if (isa<IndirectBrInst>(J) || isa<InvokeInst>(J)) {
+ // On PowerPC, indirect jumps use the counter register.
+ return true;
+ } else if (SwitchInst *SI = dyn_cast<SwitchInst>(J)) {
+ if (SI->getNumCases() + 1 >= (unsigned)TLI->getMinimumJumpTableEntries())
+ return true;
+ }
+
+ // FREM is always a call.
+ if (J->getOpcode() == Instruction::FRem)
+ return true;
+
+ if (ST->useSoftFloat()) {
+ switch(J->getOpcode()) {
+ case Instruction::FAdd:
+ case Instruction::FSub:
+ case Instruction::FMul:
+ case Instruction::FDiv:
+ case Instruction::FPTrunc:
+ case Instruction::FPExt:
+ case Instruction::FPToUI:
+ case Instruction::FPToSI:
+ case Instruction::UIToFP:
+ case Instruction::SIToFP:
+ case Instruction::FCmp:
+ return true;
+ }
+ }
+
+ for (Value *Operand : J->operands())
+ if (memAddrUsesCTR(Operand))
+ return true;
+ }
+
+ return false;
+}
+
+bool PPCTTIImpl::isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE,
+ AssumptionCache &AC,
+ TargetLibraryInfo *LibInfo,
+ HardwareLoopInfo &HWLoopInfo) {
+ const PPCTargetMachine &TM = ST->getTargetMachine();
+ TargetSchedModel SchedModel;
+ SchedModel.init(ST);
+
+ // Do not convert small short loops to CTR loop.
+ unsigned ConstTripCount = SE.getSmallConstantTripCount(L);
+ if (ConstTripCount && ConstTripCount < SmallCTRLoopThreshold) {
+ SmallPtrSet<const Value *, 32> EphValues;
+ CodeMetrics::collectEphemeralValues(L, &AC, EphValues);
+ CodeMetrics Metrics;
+ for (BasicBlock *BB : L->blocks())
+ Metrics.analyzeBasicBlock(BB, *this, EphValues);
+ // 6 is an approximate latency for the mtctr instruction.
+ if (Metrics.NumInsts <= (6 * SchedModel.getIssueWidth()))
+ return false;
+ }
+
+ // We don't want to spill/restore the counter register, and so we don't
+ // want to use the counter register if the loop contains calls.
+ for (Loop::block_iterator I = L->block_begin(), IE = L->block_end();
+ I != IE; ++I)
+ if (mightUseCTR(*I, LibInfo))
+ return false;
+
+ SmallVector<BasicBlock*, 4> ExitingBlocks;
+ L->getExitingBlocks(ExitingBlocks);
+
+ // If there is an exit edge known to be frequently taken,
+ // we should not transform this loop.
+ for (auto &BB : ExitingBlocks) {
+ Instruction *TI = BB->getTerminator();
+ if (!TI) continue;
+
+ if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
+ uint64_t TrueWeight = 0, FalseWeight = 0;
+ if (!BI->isConditional() ||
+ !BI->extractProfMetadata(TrueWeight, FalseWeight))
+ continue;
+
+ // If the exit path is more frequent than the loop path,
+ // we return here without further analysis for this loop.
+ bool TrueIsExit = !L->contains(BI->getSuccessor(0));
+ if (( TrueIsExit && FalseWeight < TrueWeight) ||
+ (!TrueIsExit && FalseWeight > TrueWeight))
+ return false;
+ }
+ }
+
+ LLVMContext &C = L->getHeader()->getContext();
+ HWLoopInfo.CountType = TM.isPPC64() ?
+ Type::getInt64Ty(C) : Type::getInt32Ty(C);
+ HWLoopInfo.LoopDecrement = ConstantInt::get(HWLoopInfo.CountType, 1);
+ return true;
+}
+
void PPCTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
TTI::UnrollingPreferences &UP) {
if (ST->getDarwinDirective() == PPC::DIR_A2) {
@@ -239,17 +582,12 @@ bool PPCTTIImpl::enableAggressiveInterleaving(bool LoopHasReductions) {
return LoopHasReductions;
}
-const PPCTTIImpl::TTI::MemCmpExpansionOptions *
-PPCTTIImpl::enableMemCmpExpansion(bool IsZeroCmp) const {
- static const auto Options = []() {
- TTI::MemCmpExpansionOptions Options;
- Options.LoadSizes.push_back(8);
- Options.LoadSizes.push_back(4);
- Options.LoadSizes.push_back(2);
- Options.LoadSizes.push_back(1);
- return Options;
- }();
- return &Options;
+PPCTTIImpl::TTI::MemCmpExpansionOptions
+PPCTTIImpl::enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const {
+ TTI::MemCmpExpansionOptions Options;
+ Options.LoadSizes = {8, 4, 2, 1};
+ Options.MaxNumLoads = TLI->getMaxExpandSizeMemcmp(OptSize);
+ return Options;
}
bool PPCTTIImpl::enableInterleavedAccessVectorization() {
@@ -324,6 +662,33 @@ unsigned PPCTTIImpl::getMaxInterleaveFactor(unsigned VF) {
return 2;
}
+// Adjust the cost of vector instructions on targets which there is overlap
+// between the vector and scalar units, thereby reducing the overall throughput
+// of vector code wrt. scalar code.
+int PPCTTIImpl::vectorCostAdjustment(int Cost, unsigned Opcode, Type *Ty1,
+ Type *Ty2) {
+ if (!ST->vectorsUseTwoUnits() || !Ty1->isVectorTy())
+ return Cost;
+
+ std::pair<int, MVT> LT1 = TLI->getTypeLegalizationCost(DL, Ty1);
+ // If type legalization involves splitting the vector, we don't want to
+ // double the cost at every step - only the last step.
+ if (LT1.first != 1 || !LT1.second.isVector())
+ return Cost;
+
+ int ISD = TLI->InstructionOpcodeToISD(Opcode);
+ if (TLI->isOperationExpand(ISD, LT1.second))
+ return Cost;
+
+ if (Ty2) {
+ std::pair<int, MVT> LT2 = TLI->getTypeLegalizationCost(DL, Ty2);
+ if (LT2.first != 1 || !LT2.second.isVector())
+ return Cost;
+ }
+
+ return Cost * 2;
+}
+
int PPCTTIImpl::getArithmeticInstrCost(
unsigned Opcode, Type *Ty, TTI::OperandValueKind Op1Info,
TTI::OperandValueKind Op2Info, TTI::OperandValueProperties Opd1PropInfo,
@@ -331,8 +696,9 @@ int PPCTTIImpl::getArithmeticInstrCost(
assert(TLI->InstructionOpcodeToISD(Opcode) && "Invalid opcode");
// Fallback to the default implementation.
- return BaseT::getArithmeticInstrCost(Opcode, Ty, Op1Info, Op2Info,
- Opd1PropInfo, Opd2PropInfo);
+ int Cost = BaseT::getArithmeticInstrCost(Opcode, Ty, Op1Info, Op2Info,
+ Opd1PropInfo, Opd2PropInfo);
+ return vectorCostAdjustment(Cost, Opcode, Ty, nullptr);
}
int PPCTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
@@ -345,19 +711,22 @@ int PPCTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
// instruction). We need one such shuffle instruction for each actual
// register (this is not true for arbitrary shuffles, but is true for the
// structured types of shuffles covered by TTI::ShuffleKind).
- return LT.first;
+ return vectorCostAdjustment(LT.first, Instruction::ShuffleVector, Tp,
+ nullptr);
}
int PPCTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
const Instruction *I) {
assert(TLI->InstructionOpcodeToISD(Opcode) && "Invalid opcode");
- return BaseT::getCastInstrCost(Opcode, Dst, Src);
+ int Cost = BaseT::getCastInstrCost(Opcode, Dst, Src);
+ return vectorCostAdjustment(Cost, Opcode, Dst, Src);
}
int PPCTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
const Instruction *I) {
- return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, I);
+ int Cost = BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, I);
+ return vectorCostAdjustment(Cost, Opcode, ValTy, nullptr);
}
int PPCTTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) {
@@ -366,18 +735,23 @@ int PPCTTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) {
int ISD = TLI->InstructionOpcodeToISD(Opcode);
assert(ISD && "Invalid opcode");
+ int Cost = BaseT::getVectorInstrCost(Opcode, Val, Index);
+ Cost = vectorCostAdjustment(Cost, Opcode, Val, nullptr);
+
if (ST->hasVSX() && Val->getScalarType()->isDoubleTy()) {
- // Double-precision scalars are already located in index #0.
- if (Index == 0)
+ // Double-precision scalars are already located in index #0 (or #1 if LE).
+ if (ISD == ISD::EXTRACT_VECTOR_ELT &&
+ Index == (ST->isLittleEndian() ? 1 : 0))
return 0;
- return BaseT::getVectorInstrCost(Opcode, Val, Index);
+ return Cost;
+
} else if (ST->hasQPX() && Val->getScalarType()->isFloatingPointTy()) {
// Floating point scalars are already located in index #0.
if (Index == 0)
return 0;
- return BaseT::getVectorInstrCost(Opcode, Val, Index);
+ return Cost;
}
// Estimated cost of a load-hit-store delay. This was obtained
@@ -394,9 +768,9 @@ int PPCTTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) {
// these need to be estimated as very costly.
if (ISD == ISD::EXTRACT_VECTOR_ELT ||
ISD == ISD::INSERT_VECTOR_ELT)
- return LHSPenalty + BaseT::getVectorInstrCost(Opcode, Val, Index);
+ return LHSPenalty + Cost;
- return BaseT::getVectorInstrCost(Opcode, Val, Index);
+ return Cost;
}
int PPCTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
@@ -407,6 +781,7 @@ int PPCTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
"Invalid Opcode");
int Cost = BaseT::getMemoryOpCost(Opcode, Src, Alignment, AddressSpace);
+ Cost = vectorCostAdjustment(Cost, Opcode, Src, nullptr);
bool IsAltivecType = ST->hasAltivec() &&
(LT.second == MVT::v16i8 || LT.second == MVT::v8i16 ||
@@ -500,3 +875,25 @@ int PPCTTIImpl::getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy,
return Cost;
}
+bool PPCTTIImpl::canSaveCmp(Loop *L, BranchInst **BI, ScalarEvolution *SE,
+ LoopInfo *LI, DominatorTree *DT,
+ AssumptionCache *AC, TargetLibraryInfo *LibInfo) {
+ // Process nested loops first.
+ for (Loop::iterator I = L->begin(), E = L->end(); I != E; ++I)
+ if (canSaveCmp(*I, BI, SE, LI, DT, AC, LibInfo))
+ return false; // Stop search.
+
+ HardwareLoopInfo HWLoopInfo(L);
+
+ if (!HWLoopInfo.canAnalyze(*LI))
+ return false;
+
+ if (!isHardwareLoopProfitable(L, *SE, *AC, LibInfo, HWLoopInfo))
+ return false;
+
+ if (!HWLoopInfo.isHardwareLoopCandidate(*SE, *LI, *DT))
+ return false;
+
+ *BI = HWLoopInfo.ExitBranch;
+ return true;
+}
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h b/contrib/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h
index 9221a910288a..5d76ee418b69 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h
+++ b/contrib/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h
@@ -1,9 +1,8 @@
//===-- PPCTargetTransformInfo.h - PPC specific TTI -------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
/// \file
@@ -17,7 +16,6 @@
#ifndef LLVM_LIB_TARGET_POWERPC_PPCTARGETTRANSFORMINFO_H
#define LLVM_LIB_TARGET_POWERPC_PPCTARGETTRANSFORMINFO_H
-#include "PPC.h"
#include "PPCTargetMachine.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/CodeGen/BasicTTIImpl.h"
@@ -35,6 +33,7 @@ class PPCTTIImpl : public BasicTTIImplBase<PPCTTIImpl> {
const PPCSubtarget *getST() const { return ST; }
const PPCTargetLowering *getTLI() const { return TLI; }
+ bool mightUseCTR(BasicBlock *BB, TargetLibraryInfo *LibInfo);
public:
explicit PPCTTIImpl(const PPCTargetMachine *TM, const Function &F)
@@ -54,6 +53,13 @@ public:
unsigned getUserCost(const User *U, ArrayRef<const Value *> Operands);
TTI::PopcntSupportKind getPopcntSupport(unsigned TyWidth);
+ bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE,
+ AssumptionCache &AC,
+ TargetLibraryInfo *LibInfo,
+ HardwareLoopInfo &HWLoopInfo);
+ bool canSaveCmp(Loop *L, BranchInst **BI, ScalarEvolution *SE, LoopInfo *LI,
+ DominatorTree *DT, AssumptionCache *AC,
+ TargetLibraryInfo *LibInfo);
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
TTI::UnrollingPreferences &UP);
@@ -63,14 +69,15 @@ public:
/// @{
bool useColdCCForColdCall(Function &F);
bool enableAggressiveInterleaving(bool LoopHasReductions);
- const TTI::MemCmpExpansionOptions *enableMemCmpExpansion(
- bool IsZeroCmp) const;
+ TTI::MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize,
+ bool IsZeroCmp) const;
bool enableInterleavedAccessVectorization();
unsigned getNumberOfRegisters(bool Vector);
unsigned getRegisterBitWidth(bool Vector) const;
unsigned getCacheLineSize();
unsigned getPrefetchDistance();
unsigned getMaxInterleaveFactor(unsigned VF);
+ int vectorCostAdjustment(int Cost, unsigned Opcode, Type *Ty1, Type *Ty2);
int getArithmeticInstrCost(
unsigned Opcode, Type *Ty,
TTI::OperandValueKind Opd1Info = TTI::OK_AnyValue,
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCVSXCopy.cpp b/contrib/llvm/lib/Target/PowerPC/PPCVSXCopy.cpp
index 93fe3230ab81..719ed7b63878 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCVSXCopy.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/PPCVSXCopy.cpp
@@ -1,9 +1,8 @@
//===-------------- PPCVSXCopy.cpp - VSX Copy Legalization ----------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -37,10 +36,6 @@ using namespace llvm;
#define DEBUG_TYPE "ppc-vsx-copy"
-namespace llvm {
- void initializePPCVSXCopyPass(PassRegistry&);
-}
-
namespace {
// PPCVSXCopy pass - For copies between VSX registers and non-VSX registers
// (Altivec and scalar floating-point registers), we need to transform the
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCVSXFMAMutate.cpp b/contrib/llvm/lib/Target/PowerPC/PPCVSXFMAMutate.cpp
index 6586f503a7b8..ce78239df0a8 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCVSXFMAMutate.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/PPCVSXFMAMutate.cpp
@@ -1,9 +1,8 @@
//===--------------- PPCVSXFMAMutate.cpp - VSX FMA Mutation ---------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCVSXSwapRemoval.cpp b/contrib/llvm/lib/Target/PowerPC/PPCVSXSwapRemoval.cpp
index 1be193e08c01..44175af7f9b6 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCVSXSwapRemoval.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/PPCVSXSwapRemoval.cpp
@@ -1,9 +1,8 @@
//===----------- PPCVSXSwapRemoval.cpp - Remove VSX LE Swaps -------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===---------------------------------------------------------------------===//
//
@@ -60,10 +59,6 @@ using namespace llvm;
#define DEBUG_TYPE "ppc-vsx-swaps"
-namespace llvm {
- void initializePPCVSXSwapRemovalPass(PassRegistry&);
-}
-
namespace {
// A PPCVSXSwapEntry is created for each machine instruction that
@@ -427,6 +422,7 @@ bool PPCVSXSwapRemoval::gatherVectorInstructions() {
// of opcodes having a common attribute in TableGen. Should this
// change, this is a prime candidate to use such a mechanism.
case PPC::INLINEASM:
+ case PPC::INLINEASM_BR:
case PPC::EXTRACT_SUBREG:
case PPC::INSERT_SUBREG:
case PPC::COPY_TO_REGCLASS:
diff --git a/contrib/llvm/lib/Target/PowerPC/README_P9.txt b/contrib/llvm/lib/Target/PowerPC/README_P9.txt
index d56f7cca7b21..c9984b7604bd 100644
--- a/contrib/llvm/lib/Target/PowerPC/README_P9.txt
+++ b/contrib/llvm/lib/Target/PowerPC/README_P9.txt
@@ -512,8 +512,8 @@ Fixed Point Facility:
"lxsdx $XT, $src", IIC_LdStLFD,
[(set f64:$XT, (load xoaddr:$src))]>;
- . (set f64:$XT, (load ixaddr:$src))
- (set f64:$XT, (store ixaddr:$dst))
+ . (set f64:$XT, (load iaddrX4:$src))
+ (set f64:$XT, (store iaddrX4:$dst))
- Load/Store SP, with conversion from/to DP: lxssp stxssp
. Similar to lxsspx/stxsspx:
@@ -521,8 +521,8 @@ Fixed Point Facility:
"lxsspx $XT, $src", IIC_LdStLFD,
[(set f32:$XT, (load xoaddr:$src))]>;
- . (set f32:$XT, (load ixaddr:$src))
- (set f32:$XT, (store ixaddr:$dst))
+ . (set f32:$XT, (load iaddrX4:$src))
+ (set f32:$XT, (store iaddrX4:$dst))
- Load as Integer Byte/Halfword & Zero Indexed: lxsibzx lxsihzx
. Similar to lxsiwzx:
diff --git a/contrib/llvm/lib/Target/PowerPC/TargetInfo/PowerPCTargetInfo.cpp b/contrib/llvm/lib/Target/PowerPC/TargetInfo/PowerPCTargetInfo.cpp
index 979595264472..99b5dec74668 100644
--- a/contrib/llvm/lib/Target/PowerPC/TargetInfo/PowerPCTargetInfo.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/TargetInfo/PowerPCTargetInfo.cpp
@@ -1,14 +1,12 @@
//===-- PowerPCTargetInfo.cpp - PowerPC Target Implementation -------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
-#include "PPC.h"
-#include "llvm/IR/Module.h"
+#include "TargetInfo/PowerPCTargetInfo.h"
#include "llvm/Support/TargetRegistry.h"
using namespace llvm;
diff --git a/contrib/llvm/lib/Target/PowerPC/TargetInfo/PowerPCTargetInfo.h b/contrib/llvm/lib/Target/PowerPC/TargetInfo/PowerPCTargetInfo.h
new file mode 100644
index 000000000000..2d0afbfb1be0
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/TargetInfo/PowerPCTargetInfo.h
@@ -0,0 +1,22 @@
+//===-- PowerPCTargetInfo.h - PowerPC Target Implementation -----*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_POWERPC_TARGETINFO_POWERPCTARGETINFO_H
+#define LLVM_LIB_TARGET_POWERPC_TARGETINFO_POWERPCTARGETINFO_H
+
+namespace llvm {
+
+class Target;
+
+Target &getThePPC32Target();
+Target &getThePPC64Target();
+Target &getThePPC64LETarget();
+
+} // namespace llvm
+
+#endif // LLVM_LIB_TARGET_POWERPC_TARGETINFO_POWERPCTARGETINFO_H