diff options
Diffstat (limited to 'contrib/llvm/lib/Target/PowerPC')
98 files changed, 4402 insertions, 2334 deletions
diff --git a/contrib/llvm/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp b/contrib/llvm/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp index 8b3480f772e9..c9524da93acd 100644 --- a/contrib/llvm/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp +++ b/contrib/llvm/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp @@ -1,15 +1,15 @@ //===-- PPCAsmParser.cpp - Parse PowerPC asm to MCInst instructions -------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// #include "MCTargetDesc/PPCMCExpr.h" #include "MCTargetDesc/PPCMCTargetDesc.h" #include "PPCTargetStreamer.h" +#include "TargetInfo/PowerPCTargetInfo.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/StringSwitch.h" #include "llvm/ADT/Twine.h" @@ -147,8 +147,7 @@ public: : MCTargetAsmParser(Options, STI, MII) { // Check for 64-bit vs. 32-bit pointer mode. const Triple &TheTriple = STI.getTargetTriple(); - IsPPC64 = (TheTriple.getArch() == Triple::ppc64 || - TheTriple.getArch() == Triple::ppc64le); + IsPPC64 = TheTriple.isPPC64(); IsDarwin = TheTriple.isMacOSX(); // Initialize the set of available features. setAvailableFeatures(ComputeAvailableFeatures(STI.getFeatureBits())); @@ -1129,7 +1128,7 @@ void PPCAsmParser::ProcessInstruction(MCInst &Inst, } } -static std::string PPCMnemonicSpellCheck(StringRef S, uint64_t FBS, +static std::string PPCMnemonicSpellCheck(StringRef S, const FeatureBitset &FBS, unsigned VariantID = 0); bool PPCAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, @@ -1148,7 +1147,7 @@ bool PPCAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, case Match_MissingFeature: return Error(IDLoc, "instruction use requires an option to be enabled"); case Match_MnemonicFail: { - uint64_t FBS = ComputeAvailableFeatures(getSTI().getFeatureBits()); + FeatureBitset FBS = ComputeAvailableFeatures(getSTI().getFeatureBits()); std::string Suggestion = PPCMnemonicSpellCheck( ((PPCOperand &)*Operands[0]).getToken(), FBS); return Error(IDLoc, "invalid instruction" + Suggestion, diff --git a/contrib/llvm/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp b/contrib/llvm/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp index cce239cac970..7a8af57961cb 100644 --- a/contrib/llvm/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp +++ b/contrib/llvm/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp @@ -1,13 +1,13 @@ //===------ PPCDisassembler.cpp - Disassembler for PowerPC ------*- C++ -*-===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// #include "MCTargetDesc/PPCMCTargetDesc.h" +#include "TargetInfo/PowerPCTargetInfo.h" #include "llvm/MC/MCDisassembler/MCDisassembler.h" #include "llvm/MC/MCFixedLenDisassembler.h" #include "llvm/MC/MCInst.h" @@ -86,12 +86,6 @@ static DecodeStatus DecodeCRRCRegisterClass(MCInst &Inst, uint64_t RegNo, return decodeRegisterClass(Inst, RegNo, CRRegs); } -static DecodeStatus DecodeCRRC0RegisterClass(MCInst &Inst, uint64_t RegNo, - uint64_t Address, - const void *Decoder) { - return decodeRegisterClass(Inst, RegNo, CRRegs); -} - static DecodeStatus DecodeCRBITRCRegisterClass(MCInst &Inst, uint64_t RegNo, uint64_t Address, const void *Decoder) { diff --git a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp index a405dd70c307..8778e916f7e4 100644 --- a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp +++ b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp @@ -1,9 +1,8 @@ //===-- PPCAsmBackend.cpp - PPC Assembler Backend -------------------------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// @@ -29,6 +28,7 @@ static uint64_t adjustFixupValue(unsigned Kind, uint64_t Value) { switch (Kind) { default: llvm_unreachable("Unknown fixup kind!"); + case FK_NONE: case FK_Data_1: case FK_Data_2: case FK_Data_4: @@ -52,6 +52,8 @@ static unsigned getFixupKindNumBytes(unsigned Kind) { switch (Kind) { default: llvm_unreachable("Unknown fixup kind!"); + case FK_NONE: + return 0; case FK_Data_1: return 1; case FK_Data_2: @@ -74,10 +76,12 @@ static unsigned getFixupKindNumBytes(unsigned Kind) { namespace { class PPCAsmBackend : public MCAsmBackend { - const Target &TheTarget; +protected: + Triple TT; public: - PPCAsmBackend(const Target &T, support::endianness Endian) - : MCAsmBackend(Endian), TheTarget(T) {} + PPCAsmBackend(const Target &T, const Triple &TT) + : MCAsmBackend(TT.isLittleEndian() ? support::little : support::big), + TT(TT) {} unsigned getNumFixupKinds() const override { return PPC::NumTargetFixupKinds; @@ -136,9 +140,11 @@ public: bool shouldForceRelocation(const MCAssembler &Asm, const MCFixup &Fixup, const MCValue &Target) override { - switch ((PPC::Fixups)Fixup.getKind()) { + switch ((unsigned)Fixup.getKind()) { default: return false; + case FK_NONE: + return true; case PPC::fixup_ppc_br24: case PPC::fixup_ppc_br24abs: // If the target symbol has a local entry point we must not attempt @@ -187,59 +193,76 @@ public: return true; } - - unsigned getPointerSize() const { - StringRef Name = TheTarget.getName(); - if (Name == "ppc64" || Name == "ppc64le") return 8; - assert(Name == "ppc32" && "Unknown target name!"); - return 4; - } }; } // end anonymous namespace // FIXME: This should be in a separate file. namespace { - class DarwinPPCAsmBackend : public PPCAsmBackend { - public: - DarwinPPCAsmBackend(const Target &T) : PPCAsmBackend(T, support::big) { } - - std::unique_ptr<MCObjectTargetWriter> - createObjectTargetWriter() const override { - bool is64 = getPointerSize() == 8; - return createPPCMachObjectWriter( - /*Is64Bit=*/is64, - (is64 ? MachO::CPU_TYPE_POWERPC64 : MachO::CPU_TYPE_POWERPC), - MachO::CPU_SUBTYPE_POWERPC_ALL); - } - }; - - class ELFPPCAsmBackend : public PPCAsmBackend { - uint8_t OSABI; - public: - ELFPPCAsmBackend(const Target &T, support::endianness Endian, - uint8_t OSABI) - : PPCAsmBackend(T, Endian), OSABI(OSABI) {} - - std::unique_ptr<MCObjectTargetWriter> - createObjectTargetWriter() const override { - bool is64 = getPointerSize() == 8; - return createPPCELFObjectWriter(is64, OSABI); - } - }; + +class DarwinPPCAsmBackend : public PPCAsmBackend { +public: + DarwinPPCAsmBackend(const Target &T, const Triple &TT) + : PPCAsmBackend(T, TT) {} + + std::unique_ptr<MCObjectTargetWriter> + createObjectTargetWriter() const override { + bool Is64 = TT.isPPC64(); + return createPPCMachObjectWriter( + /*Is64Bit=*/Is64, + (Is64 ? MachO::CPU_TYPE_POWERPC64 : MachO::CPU_TYPE_POWERPC), + MachO::CPU_SUBTYPE_POWERPC_ALL); + } +}; + +class ELFPPCAsmBackend : public PPCAsmBackend { +public: + ELFPPCAsmBackend(const Target &T, const Triple &TT) : PPCAsmBackend(T, TT) {} + + std::unique_ptr<MCObjectTargetWriter> + createObjectTargetWriter() const override { + uint8_t OSABI = MCELFObjectTargetWriter::getOSABI(TT.getOS()); + bool Is64 = TT.isPPC64(); + return createPPCELFObjectWriter(Is64, OSABI); + } + + Optional<MCFixupKind> getFixupKind(StringRef Name) const override; +}; + +class XCOFFPPCAsmBackend : public PPCAsmBackend { +public: + XCOFFPPCAsmBackend(const Target &T, const Triple &TT) + : PPCAsmBackend(T, TT) {} + + std::unique_ptr<MCObjectTargetWriter> + createObjectTargetWriter() const override { + return createPPCXCOFFObjectWriter(TT.isArch64Bit()); + } +}; } // end anonymous namespace +Optional<MCFixupKind> ELFPPCAsmBackend::getFixupKind(StringRef Name) const { + if (TT.isPPC64()) { + if (Name == "R_PPC64_NONE") + return FK_NONE; + } else { + if (Name == "R_PPC_NONE") + return FK_NONE; + } + return MCAsmBackend::getFixupKind(Name); +} + MCAsmBackend *llvm::createPPCAsmBackend(const Target &T, const MCSubtargetInfo &STI, const MCRegisterInfo &MRI, const MCTargetOptions &Options) { const Triple &TT = STI.getTargetTriple(); if (TT.isOSDarwin()) - return new DarwinPPCAsmBackend(T); + return new DarwinPPCAsmBackend(T, TT); + + if (TT.isOSBinFormatXCOFF()) + return new XCOFFPPCAsmBackend(T, TT); - uint8_t OSABI = MCELFObjectTargetWriter::getOSABI(TT.getOS()); - bool IsLittleEndian = TT.getArch() == Triple::ppc64le; - return new ELFPPCAsmBackend( - T, IsLittleEndian ? support::little : support::big, OSABI); + return new ELFPPCAsmBackend(T, TT); } diff --git a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp index a3caf9a7a5ee..042ddf48d5df 100644 --- a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp +++ b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp @@ -1,9 +1,8 @@ //===-- PPCELFObjectWriter.cpp - PPC ELF Writer ---------------------------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// @@ -134,6 +133,9 @@ unsigned PPCELFObjectWriter::getRelocType(MCContext &Ctx, const MCValue &Target, } else { switch ((unsigned)Fixup.getKind()) { default: llvm_unreachable("invalid fixup kind!"); + case FK_NONE: + Type = ELF::R_PPC_NONE; + break; case PPC::fixup_ppc_br24abs: Type = ELF::R_PPC_ADDR24; break; diff --git a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCFixupKinds.h b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCFixupKinds.h index dce443997ea5..845489788c86 100644 --- a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCFixupKinds.h +++ b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCFixupKinds.h @@ -1,9 +1,8 @@ //===-- PPCFixupKinds.h - PPC Specific Fixup Entries ------------*- C++ -*-===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// diff --git a/contrib/llvm/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCInstPrinter.cpp index 6824168b890d..0e64ae55ab1c 100644 --- a/contrib/llvm/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp +++ b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCInstPrinter.cpp @@ -1,9 +1,8 @@ //===-- PPCInstPrinter.cpp - Convert PPC MCInst to assembly syntax --------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -11,7 +10,7 @@ // //===----------------------------------------------------------------------===// -#include "PPCInstPrinter.h" +#include "MCTargetDesc/PPCInstPrinter.h" #include "MCTargetDesc/PPCMCTargetDesc.h" #include "MCTargetDesc/PPCPredicates.h" #include "PPCInstrInfo.h" @@ -445,13 +444,22 @@ void PPCInstPrinter::printTLSCall(const MCInst *MI, unsigned OpNo, // On PPC64, VariantKind is VK_None, but on PPC32, it's VK_PLT, and it must // come at the _end_ of the expression. const MCOperand &Op = MI->getOperand(OpNo); - const MCSymbolRefExpr &refExp = cast<MCSymbolRefExpr>(*Op.getExpr()); - O << refExp.getSymbol().getName(); + const MCSymbolRefExpr *RefExp = nullptr; + const MCConstantExpr *ConstExp = nullptr; + if (const MCBinaryExpr *BinExpr = dyn_cast<MCBinaryExpr>(Op.getExpr())) { + RefExp = cast<MCSymbolRefExpr>(BinExpr->getLHS()); + ConstExp = cast<MCConstantExpr>(BinExpr->getRHS()); + } else + RefExp = cast<MCSymbolRefExpr>(Op.getExpr()); + + O << RefExp->getSymbol().getName(); O << '('; printOperand(MI, OpNo+1, O); O << ')'; - if (refExp.getKind() != MCSymbolRefExpr::VK_None) - O << '@' << MCSymbolRefExpr::getVariantKindName(refExp.getKind()); + if (RefExp->getKind() != MCSymbolRefExpr::VK_None) + O << '@' << MCSymbolRefExpr::getVariantKindName(RefExp->getKind()); + if (ConstExp != nullptr) + O << '+' << ConstExp->getValue(); } /// showRegistersWithPercentPrefix - Check if this register name should be diff --git a/contrib/llvm/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCInstPrinter.h index 351ccefa2da2..725ae2a7081b 100644 --- a/contrib/llvm/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h +++ b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCInstPrinter.h @@ -1,9 +1,8 @@ //===- PPCInstPrinter.h - Convert PPC MCInst to assembly syntax -*- C++ -*-===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -11,8 +10,8 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_LIB_TARGET_POWERPC_INSTPRINTER_PPCINSTPRINTER_H -#define LLVM_LIB_TARGET_POWERPC_INSTPRINTER_PPCINSTPRINTER_H +#ifndef LLVM_LIB_TARGET_POWERPC_MCTARGETDESC_PPCINSTPRINTER_H +#define LLVM_LIB_TARGET_POWERPC_MCTARGETDESC_PPCINSTPRINTER_H #include "llvm/ADT/Triple.h" #include "llvm/MC/MCInstPrinter.h" diff --git a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp index fb7bf23509c7..5f0005ea1d7b 100644 --- a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp +++ b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp @@ -1,9 +1,8 @@ //===-- PPCMCAsmInfo.cpp - PPC asm properties -----------------------------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -82,3 +81,9 @@ PPCELFMCAsmInfo::PPCELFMCAsmInfo(bool is64Bit, const Triple& T) { UseIntegratedAssembler = true; } +void PPCXCOFFMCAsmInfo::anchor() {} + +PPCXCOFFMCAsmInfo::PPCXCOFFMCAsmInfo(bool Is64Bit, const Triple &T) { + assert(!IsLittleEndian && "Little-endian XCOFF not supported."); + CodePointerSize = CalleeSaveStackSlotSize = Is64Bit ? 8 : 4; +} diff --git a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.h b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.h index e252ac944d40..42cb62ad26a4 100644 --- a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.h +++ b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.h @@ -1,13 +1,12 @@ //===-- PPCMCAsmInfo.h - PPC asm properties --------------------*- C++ -*--===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // -// This file contains the declaration of the MCAsmInfoDarwin class. +// This file contains the declarations of the PowerPC MCAsmInfo classes. // //===----------------------------------------------------------------------===// @@ -16,6 +15,7 @@ #include "llvm/MC/MCAsmInfoDarwin.h" #include "llvm/MC/MCAsmInfoELF.h" +#include "llvm/MC/MCAsmInfoXCOFF.h" namespace llvm { class Triple; @@ -34,6 +34,13 @@ public: explicit PPCELFMCAsmInfo(bool is64Bit, const Triple &); }; +class PPCXCOFFMCAsmInfo : public MCAsmInfoXCOFF { + virtual void anchor(); + +public: + explicit PPCXCOFFMCAsmInfo(bool is64Bit, const Triple &); +}; + } // namespace llvm #endif diff --git a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp index 8c15ade6f9c4..676efc500455 100644 --- a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp +++ b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp @@ -1,9 +1,8 @@ //===-- PPCMCCodeEmitter.cpp - Convert PPC code to machine code -----------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -217,7 +216,7 @@ unsigned PPCMCCodeEmitter::getTLSRegEncoding(const MCInst &MI, unsigned OpNo, Fixups.push_back(MCFixup::create(0, MO.getExpr(), (MCFixupKind)PPC::fixup_ppc_nofixup)); const Triple &TT = STI.getTargetTriple(); - bool isPPC64 = TT.getArch() == Triple::ppc64 || TT.getArch() == Triple::ppc64le; + bool isPPC64 = TT.isPPC64(); return CTX.getRegisterInfo()->getEncodingValue(isPPC64 ? PPC::X13 : PPC::R2); } diff --git a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.h b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.h index a4bcff4b9450..1324faa12553 100644 --- a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.h +++ b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.h @@ -1,9 +1,8 @@ //===-- PPCMCCodeEmitter.h - Convert PPC code to machine code -------------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -99,9 +98,10 @@ public: unsigned getInstSizeInBytes(const MCInst &MI) const; private: - uint64_t computeAvailableFeatures(const FeatureBitset &FB) const; - void verifyInstructionPredicates(const MCInst &MI, - uint64_t AvailableFeatures) const; + FeatureBitset computeAvailableFeatures(const FeatureBitset &FB) const; + void + verifyInstructionPredicates(const MCInst &MI, + const FeatureBitset &AvailableFeatures) const; }; } // namespace llvm diff --git a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.cpp b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.cpp index 32e6a0bdd65f..d467f5c4a439 100644 --- a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.cpp +++ b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.cpp @@ -1,9 +1,8 @@ //===-- PPCMCExpr.cpp - PPC specific MC expression classes ----------------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// diff --git a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.h b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.h index 8bb4791d13dd..449e2c34f74d 100644 --- a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.h +++ b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.h @@ -1,9 +1,8 @@ //===-- PPCMCExpr.h - PPC specific MC expression classes --------*- C++ -*-===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// diff --git a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp index 78609ef3d4e0..90c3c8d20edb 100644 --- a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp +++ b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp @@ -1,9 +1,8 @@ //===-- PPCMCTargetDesc.cpp - PowerPC Target Descriptions -----------------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -12,9 +11,10 @@ //===----------------------------------------------------------------------===// #include "MCTargetDesc/PPCMCTargetDesc.h" -#include "InstPrinter/PPCInstPrinter.h" +#include "MCTargetDesc/PPCInstPrinter.h" #include "MCTargetDesc/PPCMCAsmInfo.h" #include "PPCTargetStreamer.h" +#include "TargetInfo/PowerPCTargetInfo.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/Triple.h" @@ -48,9 +48,9 @@ using namespace llvm; #define GET_REGINFO_MC_DESC #include "PPCGenRegisterInfo.inc" -// Pin the vtable to this file. PPCTargetStreamer::PPCTargetStreamer(MCStreamer &S) : MCTargetStreamer(S) {} +// Pin the vtable to this file. PPCTargetStreamer::~PPCTargetStreamer() = default; static MCInstrInfo *createPPCMCInstrInfo() { @@ -83,6 +83,8 @@ static MCAsmInfo *createPPCMCAsmInfo(const MCRegisterInfo &MRI, MCAsmInfo *MAI; if (TheTriple.isOSDarwin()) MAI = new PPCMCAsmInfoDarwin(isPPC64, TheTriple); + else if (TheTriple.isOSBinFormatXCOFF()) + MAI = new PPCXCOFFMCAsmInfo(isPPC64, TheTriple); else MAI = new PPCELFMCAsmInfo(isPPC64, TheTriple); @@ -235,6 +237,27 @@ public: } }; +class PPCTargetXCOFFStreamer : public PPCTargetStreamer { +public: + PPCTargetXCOFFStreamer(MCStreamer &S) : PPCTargetStreamer(S) {} + + void emitTCEntry(const MCSymbol &S) override { + report_fatal_error("TOC entries not supported yet."); + } + + void emitMachine(StringRef CPU) override { + llvm_unreachable("Machine pseudo-ops are invalid for XCOFF."); + } + + void emitAbiVersion(int AbiVersion) override { + llvm_unreachable("ABI-version pseudo-ops are invalid for XCOFF."); + } + + void emitLocalEntry(MCSymbolELF *S, const MCExpr *LocalOffset) override { + llvm_unreachable("Local-entry pseudo-ops are invalid for XCOFF."); + } +}; + } // end anonymous namespace static MCTargetStreamer *createAsmTargetStreamer(MCStreamer &S, @@ -249,6 +272,8 @@ createObjectTargetStreamer(MCStreamer &S, const MCSubtargetInfo &STI) { const Triple &TT = STI.getTargetTriple(); if (TT.isOSBinFormatELF()) return new PPCTargetELFStreamer(S); + if (TT.isOSBinFormatXCOFF()) + return new PPCTargetXCOFFStreamer(S); return new PPCTargetMachOStreamer(S); } diff --git a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h index d6e450cba0d7..74b67bd2e928 100644 --- a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h +++ b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h @@ -1,9 +1,8 @@ //===-- PPCMCTargetDesc.h - PowerPC Target Descriptions ---------*- C++ -*-===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -37,10 +36,6 @@ class Triple; class StringRef; class raw_pwrite_stream; -Target &getThePPC32Target(); -Target &getThePPC64Target(); -Target &getThePPC64LETarget(); - MCCodeEmitter *createPPCMCCodeEmitter(const MCInstrInfo &MCII, const MCRegisterInfo &MRI, MCContext &Ctx); @@ -56,6 +51,9 @@ std::unique_ptr<MCObjectTargetWriter> createPPCELFObjectWriter(bool Is64Bit, std::unique_ptr<MCObjectTargetWriter> createPPCMachObjectWriter(bool Is64Bit, uint32_t CPUType, uint32_t CPUSubtype); +/// Construct a PPC XCOFF object writer. +std::unique_ptr<MCObjectTargetWriter> createPPCXCOFFObjectWriter(bool Is64Bit); + /// Returns true iff Val consists of one contiguous run of 1s with any number of /// 0s on either side. The 1s are allowed to wrap from LSB to MSB, so /// 0x000FFF0, 0x0000FFFF, and 0xFF0000FF are all runs. 0x0F0F0000 is not, diff --git a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMachObjectWriter.cpp b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMachObjectWriter.cpp index ff6cf584da23..4cf7fd15fa75 100644 --- a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMachObjectWriter.cpp +++ b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMachObjectWriter.cpp @@ -1,9 +1,8 @@ //===-- PPCMachObjectWriter.cpp - PPC Mach-O Writer -----------------------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// diff --git a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.cpp b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.cpp index c2987b641c04..284e52c298a2 100644 --- a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.cpp +++ b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.cpp @@ -1,9 +1,8 @@ //===-- PPCPredicates.cpp - PPC Branch Predicate Information --------------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // diff --git a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.h b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.h index 481ba3f09cc7..d686a8ea2a22 100644 --- a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.h +++ b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.h @@ -1,9 +1,8 @@ //===-- PPCPredicates.h - PPC Branch Predicate Information ------*- C++ -*-===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // diff --git a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCXCOFFObjectWriter.cpp b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCXCOFFObjectWriter.cpp new file mode 100644 index 000000000000..9c661286d455 --- /dev/null +++ b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCXCOFFObjectWriter.cpp @@ -0,0 +1,29 @@ +//===-- PPCXCOFFObjectWriter.cpp - PowerPC XCOFF Writer -------------------===// +// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "PPCMCTargetDesc.h" +#include "llvm/MC/MCXCOFFObjectWriter.h" + +using namespace llvm; + +namespace { +class PPCXCOFFObjectWriter : public MCXCOFFObjectTargetWriter { + +public: + PPCXCOFFObjectWriter(bool Is64Bit); +}; +} // end anonymous namespace + +PPCXCOFFObjectWriter::PPCXCOFFObjectWriter(bool Is64Bit) + : MCXCOFFObjectTargetWriter(Is64Bit) {} + +std::unique_ptr<MCObjectTargetWriter> +llvm::createPPCXCOFFObjectWriter(bool Is64Bit) { + return llvm::make_unique<PPCXCOFFObjectWriter>(Is64Bit); +} diff --git a/contrib/llvm/lib/Target/PowerPC/P9InstrResources.td b/contrib/llvm/lib/Target/PowerPC/P9InstrResources.td index 17c37964c562..2a10322d3f49 100644 --- a/contrib/llvm/lib/Target/PowerPC/P9InstrResources.td +++ b/contrib/llvm/lib/Target/PowerPC/P9InstrResources.td @@ -1,22 +1,21 @@ -//===- P9InstrResources.td - P9 Instruction Resource Defs -*- tablegen -*-===// +//===- P9InstrResources.td - P9 Instruction Resource Defs -*- tablegen -*-==// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // -// This file defines the resources required by P9 instructions. This is part -// P9 processor model used for instruction scheduling. This file should contain -// all of the instructions that may be used on Power 9. This is not just -// instructions that are new on Power 9 but also instructions that were +// This file defines the resources required by P9 instructions. This is part of +// the P9 processor model used for instruction scheduling. This file should +// contain all the instructions that may be used on Power 9. This is not +// just instructions that are new on Power 9 but also instructions that were // available on earlier architectures and are still used in Power 9. // // The makeup of the P9 CPU is modeled as follows: // - Each CPU is made up of two superslices. // - Each superslice is made up of two slices. Therefore, there are 4 slices -// for each CPU. +// for each CPU. // - Up to 6 instructions can be dispatched to each CPU. Three per superslice. // - Each CPU has: // - One CY (Crypto) unit P9_CY_* @@ -33,9 +32,8 @@ // Two cycle ALU vector operation that uses an entire superslice. // Uses both ALU units (the even ALUE and odd ALUO units), two pipelines -// (EXECE, EXECO) and all three dispatches (DISP) to the given superslice. -def : InstRW<[P9_ALUE_2C, P9_ALUO_2C, IP_EXECE_1C, IP_EXECO_1C, - DISP_1C, DISP_1C, DISP_1C], +// (EXECE, EXECO) and 1 dispatch (DISP) to the given superslice. +def : InstRW<[P9_ALUE_2C, P9_ALUO_2C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C], (instrs (instregex "VADDU(B|H|W|D)M$"), (instregex "VAND(C)?$"), @@ -85,9 +83,9 @@ def : InstRW<[P9_ALUE_2C, P9_ALUO_2C, IP_EXECE_1C, IP_EXECO_1C, )>; // Restricted Dispatch ALU operation for 3 cycles. The operation runs on a -// slingle slice. However, since it is Restricted it requires all 3 dispatches +// single slice. However, since it is Restricted, it requires all 3 dispatches // (DISP) for that superslice. -def : InstRW<[P9_ALU_3C, IP_EXEC_1C, DISP_1C, DISP_1C, DISP_1C], +def : InstRW<[P9_ALU_3C, IP_EXEC_1C, DISP_3SLOTS_1C], (instrs (instregex "TABORT(D|W)C(I)?$"), (instregex "MTFSB(0|1)$"), @@ -103,7 +101,7 @@ def : InstRW<[P9_ALU_3C, IP_EXEC_1C, DISP_1C, DISP_1C, DISP_1C], )>; // Standard Dispatch ALU operation for 3 cycles. Only one slice used. -def : InstRW<[P9_ALU_3C, IP_EXEC_1C, DISP_1C, DISP_1C], +def : InstRW<[P9_ALU_3C, IP_EXEC_1C, DISP_1C], (instrs (instregex "XSMAX(C|J)?DP$"), (instregex "XSMIN(C|J)?DP$"), @@ -120,11 +118,11 @@ def : InstRW<[P9_ALU_3C, IP_EXEC_1C, DISP_1C, DISP_1C], )>; // Standard Dispatch ALU operation for 2 cycles. Only one slice used. -def : InstRW<[P9_ALU_2C, IP_EXEC_1C, DISP_1C, DISP_1C], +def : InstRW<[P9_ALU_2C, IP_EXEC_1C, DISP_1C], (instrs (instregex "S(L|R)D$"), (instregex "SRAD(I)?$"), - (instregex "EXTSWSLI$"), + (instregex "EXTSWSLI_32_64$"), (instregex "MFV(S)?RD$"), (instregex "MTVSRD$"), (instregex "MTVSRW(A|Z)$"), @@ -160,6 +158,7 @@ def : InstRW<[P9_ALU_2C, IP_EXEC_1C, DISP_1C, DISP_1C], XSNEGDP, XSCPSGNDP, MFVSRWZ, + EXTSWSLI, SRADI_32, RLDIC, RFEBB, @@ -171,9 +170,9 @@ def : InstRW<[P9_ALU_2C, IP_EXEC_1C, DISP_1C, DISP_1C], )>; // Restricted Dispatch ALU operation for 2 cycles. The operation runs on a -// slingle slice. However, since it is Restricted it requires all 3 dispatches -// (DISP) for that superslice. -def : InstRW<[P9_ALU_2C, IP_EXEC_1C, DISP_1C, DISP_1C, DISP_1C], +// single slice. However, since it is Restricted, it requires all 3 dispatches +// (DISP) for that superslice. +def : InstRW<[P9_ALU_2C, IP_EXEC_1C, DISP_3SLOTS_1C], (instrs (instregex "RLDC(L|R)$"), (instregex "RLWIMI(8)?$"), @@ -200,9 +199,8 @@ def : InstRW<[P9_ALU_2C, IP_EXEC_1C, DISP_1C, DISP_1C, DISP_1C], // Three cycle ALU vector operation that uses an entire superslice. // Uses both ALU units (the even ALUE and odd ALUO units), two pipelines -// (EXECE, EXECO) and all three dispatches (DISP) to the given superslice. -def : InstRW<[P9_ALUE_3C, P9_ALUO_3C, IP_EXECE_1C, IP_EXECO_1C, - DISP_1C, DISP_1C, DISP_1C], +// (EXECE, EXECO) and 1 dispatch (DISP) to the given superslice. +def : InstRW<[P9_ALUE_3C, P9_ALUO_3C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C], (instrs (instregex "M(T|F)VSCR$"), (instregex "VCMPNEZ(B|H|W)$"), @@ -285,10 +283,9 @@ def : InstRW<[P9_ALUE_3C, P9_ALUO_3C, IP_EXECE_1C, IP_EXECO_1C, )>; // 7 cycle DP vector operation that uses an entire superslice. -// Uses both DP units (the even DPE and odd DPO units), two pipelines -// (EXECE, EXECO) and all three dispatches (DISP) to the given superslice. -def : InstRW<[P9_DPE_7C, P9_DPO_7C, IP_EXECE_1C, IP_EXECO_1C, - DISP_1C, DISP_1C, DISP_1C], +// Uses both DP units (the even DPE and odd DPO units), two pipelines (EXECE, +// EXECO) and all three dispatches (DISP) to the given superslice. +def : InstRW<[P9_DPE_7C, P9_DPO_7C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C], (instrs VADDFP, VCTSXS, @@ -395,18 +392,17 @@ def : InstRW<[P9_DPE_7C, P9_DPO_7C, IP_EXECE_1C, IP_EXECO_1C, VSUMSWS )>; - // 5 cycle Restricted DP operation. One DP unit, one EXEC pipeline and all three -// dispatch units for the superslice. -def : InstRW<[P9_DP_5C, IP_EXEC_1C, DISP_1C, DISP_1C, DISP_1C], +// dispatch units for the superslice. +def : InstRW<[P9_DP_5C, IP_EXEC_1C, DISP_3SLOTS_1C], (instrs - (instregex "MADD(HD|HDU|LD)$"), + (instregex "MADD(HD|HDU|LD|LD8)$"), (instregex "MUL(HD|HW|LD|LI|LI8|LW)(U)?$") )>; // 7 cycle Restricted DP operation. One DP unit, one EXEC pipeline and all three -// dispatch units for the superslice. -def : InstRW<[P9_DP_7C, IP_EXEC_1C, DISP_1C, DISP_1C, DISP_1C], +// dispatch units for the superslice. +def : InstRW<[P9_DP_7C, IP_EXEC_1C, DISP_3SLOTS_1C], (instrs FRSP, (instregex "FRI(N|P|Z|M)(D|S)$"), @@ -448,26 +444,26 @@ def : InstRW<[P9_DP_7C, IP_EXEC_1C, DISP_1C, DISP_1C, DISP_1C], )>; // 7 cycle Restricted DP operation and one 3 cycle ALU operation. -// These operations can be done in parallel. -// The DP is restricted so we need a full 5 dispatches. +// These operations can be done in parallel. The DP is restricted so we need a +// full 4 dispatches. def : InstRW<[P9_DP_7C, P9_ALU_3C, IP_EXEC_1C, IP_EXEC_1C, - DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C], + DISP_3SLOTS_1C, DISP_1C], (instrs (instregex "FSEL(D|S)o$") )>; // 5 Cycle Restricted DP operation and one 2 cycle ALU operation. def : InstRW<[P9_DPOpAndALUOp_7C, IP_EXEC_1C, IP_EXEC_1C, - DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C], + DISP_3SLOTS_1C, DISP_1C], (instrs (instregex "MUL(H|L)(D|W)(U)?o$") )>; // 7 cycle Restricted DP operation and one 3 cycle ALU operation. -// These operations must be done sequentially. -// The DP is restricted so we need a full 5 dispatches. +// These operations must be done sequentially.The DP is restricted so we need a +// full 4 dispatches. def : InstRW<[P9_DPOpAndALU2Op_10C, IP_EXEC_1C, IP_EXEC_1C, - DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C], + DISP_3SLOTS_1C, DISP_1C], (instrs (instregex "FRI(N|P|Z|M)(D|S)o$"), (instregex "FRE(S)?o$"), @@ -483,8 +479,8 @@ def : InstRW<[P9_DPOpAndALU2Op_10C, IP_EXEC_1C, IP_EXEC_1C, FRSPo )>; -// 7 cycle DP operation. One DP unit, one EXEC pipeline and two dispatch units. -def : InstRW<[P9_DP_7C, IP_EXEC_1C, DISP_1C, DISP_1C], +// 7 cycle DP operation. One DP unit, one EXEC pipeline and 1 dispatch units. +def : InstRW<[P9_DP_7C, IP_EXEC_1C, DISP_1C], (instrs XSADDDP, XSADDSP, @@ -520,9 +516,9 @@ def : InstRW<[P9_DP_7C, IP_EXEC_1C, DISP_1C, DISP_1C], )>; // Three Cycle PM operation. Only one PM unit per superslice so we use the whole -// superslice. That includes both exec pipelines (EXECO, EXECE) and all three -// dispatches. -def : InstRW<[P9_PM_3C, IP_EXECO_1C, IP_EXECE_1C, DISP_1C, DISP_1C, DISP_1C], +// superslice. That includes both exec pipelines (EXECO, EXECE) and one +// dispatch. +def : InstRW<[P9_PM_3C, IP_EXECO_1C, IP_EXECE_1C, DISP_1C], (instrs (instregex "LVS(L|R)$"), (instregex "VSPLTIS(W|H|B)$"), @@ -628,9 +624,9 @@ def : InstRW<[P9_PM_3C, IP_EXECO_1C, IP_EXECE_1C, DISP_1C, DISP_1C, DISP_1C], )>; // 12 Cycle DFU operation. Only one DFU unit per CPU so we use a whole -// superslice. That includes both exec pipelines (EXECO, EXECE) and all three -// dispatches. -def : InstRW<[P9_DFU_12C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C, DISP_1C, DISP_1C], +// superslice. That includes both exec pipelines (EXECO, EXECE) and one +// dispatch. +def : InstRW<[P9_DFU_12C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C], (instrs BCDSRo, XSADDQP, @@ -652,17 +648,17 @@ def : InstRW<[P9_DFU_12C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C, DISP_1C, DISP_1C], )>; // 23 Cycle DFU operation. Only one DFU unit per CPU so we use a whole -// superslice. That includes both exec pipelines (EXECO, EXECE) and all three -// dispatches. -def : InstRW<[P9_DFU_23C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C, DISP_1C, DISP_1C], +// superslice. That includes both exec pipelines (EXECO, EXECE) and one +// dispatch. +def : InstRW<[P9_DFU_23C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C], (instrs BCDCTSQo )>; // 24 Cycle DFU operation. Only one DFU unit per CPU so we use a whole -// superslice. That includes both exec pipelines (EXECO, EXECE) and all three -// dispatches. -def : InstRW<[P9_DFU_24C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C, DISP_1C, DISP_1C], +// superslice. That includes both exec pipelines (EXECO, EXECE) and one +// dispatch. +def : InstRW<[P9_DFU_24C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C], (instrs XSMADDQP, XSMADDQPO, @@ -677,39 +673,39 @@ def : InstRW<[P9_DFU_24C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C, DISP_1C, DISP_1C], )>; // 37 Cycle DFU operation. Only one DFU unit per CPU so we use a whole -// superslice. That includes both exec pipelines (EXECO, EXECE) and all three -// dispatches. -def : InstRW<[P9_DFU_37C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C, DISP_1C, DISP_1C], +// superslice. That includes both exec pipelines (EXECO, EXECE) and one +// dispatch. +def : InstRW<[P9_DFU_37C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C], (instrs BCDCFSQo )>; // 58 Cycle DFU operation. Only one DFU unit per CPU so we use a whole -// superslice. That includes both exec pipelines (EXECO, EXECE) and all three -// dispatches. -def : InstRW<[P9_DFU_58C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C, DISP_1C, DISP_1C], +// superslice. That includes both exec pipelines (EXECO, EXECE) and one +// dispatch. +def : InstRW<[P9_DFU_58C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C], (instrs XSDIVQP, XSDIVQPO )>; // 76 Cycle DFU operation. Only one DFU unit per CPU so we use a whole -// superslice. That includes both exec pipelines (EXECO, EXECE) and all three -// dispatches. -def : InstRW<[P9_DFU_76C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C, DISP_1C, DISP_1C], +// superslice. That includes both exec pipelines (EXECO, EXECE) and all three +// dispatches. +def : InstRW<[P9_DFU_76C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C], (instrs XSSQRTQP, XSSQRTQPO )>; // 6 Cycle Load uses a single slice. -def : InstRW<[P9_LS_6C, IP_AGEN_1C, DISP_1C, DISP_1C], +def : InstRW<[P9_LS_6C, IP_AGEN_1C, DISP_1C], (instrs (instregex "LXVL(L)?") )>; // 5 Cycle Load uses a single slice. -def : InstRW<[P9_LS_5C, IP_AGEN_1C, DISP_1C, DISP_1C], +def : InstRW<[P9_LS_5C, IP_AGEN_1C, DISP_1C], (instrs (instregex "LVE(B|H|W)X$"), (instregex "LVX(L)?"), @@ -728,7 +724,7 @@ def : InstRW<[P9_LS_5C, IP_AGEN_1C, DISP_1C, DISP_1C], )>; // 4 Cycle Load uses a single slice. -def : InstRW<[P9_LS_4C, IP_AGEN_1C, DISP_1C, DISP_1C], +def : InstRW<[P9_LS_4C, IP_AGEN_1C, DISP_1C], (instrs (instregex "DCB(F|T|ST)(EP)?$"), (instregex "DCBZ(L)?(EP)?$"), @@ -757,8 +753,8 @@ def : InstRW<[P9_LS_4C, IP_AGEN_1C, DISP_1C, DISP_1C], )>; // 4 Cycle Restricted load uses a single slice but the dispatch for the whole -// superslice. -def : InstRW<[P9_LS_4C, IP_AGEN_1C, DISP_1C, DISP_1C, DISP_1C], +// superslice. +def : InstRW<[P9_LS_4C, IP_AGEN_1C, DISP_3SLOTS_1C], (instrs LFIWZX, LFDX, @@ -768,7 +764,7 @@ def : InstRW<[P9_LS_4C, IP_AGEN_1C, DISP_1C, DISP_1C, DISP_1C], // Cracked Load Instructions. // Load instructions that can be done in parallel. def : InstRW<[P9_LS_4C, P9_LS_4C, IP_AGEN_1C, IP_AGEN_1C, - DISP_1C, DISP_1C, DISP_1C, DISP_1C], + DISP_PAIR_1C], (instrs SLBIA, SLBIE, @@ -782,17 +778,26 @@ def : InstRW<[P9_LS_4C, P9_LS_4C, IP_AGEN_1C, IP_AGEN_1C, // Requires Load and ALU pieces totaling 6 cycles. The Load and ALU // operations can be run in parallel. def : InstRW<[P9_LS_4C, P9_ALU_2C, IP_EXEC_1C, IP_AGEN_1C, - DISP_1C, DISP_1C, DISP_1C, DISP_1C], + DISP_PAIR_1C, DISP_PAIR_1C], + (instrs + (instregex "L(W|H)ZU(X)?(8)?$") +)>; + +// Cracked TEND Instruction. +// Requires Load and ALU pieces totaling 6 cycles. The Load and ALU +// operations can be run in parallel. +def : InstRW<[P9_LS_4C, P9_ALU_2C, IP_EXEC_1C, IP_AGEN_1C, + DISP_1C, DISP_1C], (instrs - (instregex "L(W|H)ZU(X)?(8)?$"), TEND )>; + // Cracked Store Instruction // Consecutive Store and ALU instructions. The store is restricted and requires // three dispatches. def : InstRW<[P9_StoreAndALUOp_3C, IP_EXEC_1C, IP_EXEC_1C, IP_AGEN_1C, - DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C], + DISP_3SLOTS_1C, DISP_1C], (instrs (instregex "ST(B|H|W|D)CX$") )>; @@ -800,16 +805,16 @@ def : InstRW<[P9_StoreAndALUOp_3C, IP_EXEC_1C, IP_EXEC_1C, IP_AGEN_1C, // Cracked Load Instruction. // Two consecutive load operations for a total of 8 cycles. def : InstRW<[P9_LoadAndLoadOp_8C, IP_AGEN_1C, IP_AGEN_1C, - DISP_1C, DISP_1C, DISP_1C, DISP_1C], + DISP_1C, DISP_1C], (instrs LDMX )>; // Cracked Load instruction. // Requires consecutive Load and ALU pieces totaling 6 cycles. The Load and ALU -// operations cannot be done at the same time and so their latencies are added. +// operations cannot be done at the same time and so their latencies are added. def : InstRW<[P9_LoadAndALUOp_6C, IP_EXEC_1C, IP_AGEN_1C, - DISP_1C, DISP_1C, DISP_1C, DISP_1C], + DISP_1C, DISP_1C], (instrs (instregex "LHA(X)?(8)?$"), (instregex "CP_PASTE(8)?o$"), @@ -819,20 +824,19 @@ def : InstRW<[P9_LoadAndALUOp_6C, IP_EXEC_1C, IP_AGEN_1C, // Cracked Restricted Load instruction. // Requires consecutive Load and ALU pieces totaling 6 cycles. The Load and ALU -// operations cannot be done at the same time and so their latencies are added. +// operations cannot be done at the same time and so their latencies are added. // Full 6 dispatches are required as this is both cracked and restricted. def : InstRW<[P9_LoadAndALUOp_6C, IP_EXEC_1C, IP_AGEN_1C, - DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C], + DISP_3SLOTS_1C, DISP_3SLOTS_1C], (instrs LFIWAX )>; // Cracked Load instruction. // Requires consecutive Load and ALU pieces totaling 7 cycles. The Load and ALU -// operations cannot be done at the same time and so their latencies are added. +// operations cannot be done at the same time and so their latencies are added. // Full 4 dispatches are required as this is a cracked instruction. -def : InstRW<[P9_LoadAndALUOp_7C, IP_AGEN_1C, IP_EXEC_1C, - DISP_1C, DISP_1C, DISP_1C, DISP_1C], +def : InstRW<[P9_LoadAndALUOp_7C, IP_AGEN_1C, IP_EXEC_1C, DISP_1C, DISP_1C], (instrs LXSIWAX, LIWAX @@ -844,7 +848,7 @@ def : InstRW<[P9_LoadAndALUOp_7C, IP_AGEN_1C, IP_EXEC_1C, // their latencies are added. // Full 6 dispatches are required as this is a restricted instruction. def : InstRW<[P9_LoadAndALU2Op_7C, IP_AGEN_1C, IP_EXEC_1C, - DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C], + DISP_3SLOTS_1C, DISP_3SLOTS_1C], (instrs LFSX, LFS @@ -852,10 +856,9 @@ def : InstRW<[P9_LoadAndALU2Op_7C, IP_AGEN_1C, IP_EXEC_1C, // Cracked Load instruction. // Requires consecutive Load and ALU pieces totaling 8 cycles. The Load and ALU -// operations cannot be done at the same time and so their latencies are added. +// operations cannot be done at the same time and so their latencies are added. // Full 4 dispatches are required as this is a cracked instruction. -def : InstRW<[P9_LoadAndALU2Op_8C, IP_AGEN_1C, IP_EXEC_1C, - DISP_1C, DISP_1C, DISP_1C, DISP_1C], +def : InstRW<[P9_LoadAndALU2Op_8C, IP_AGEN_1C, IP_EXEC_1C, DISP_1C, DISP_1C], (instrs LXSSP, LXSSPX, @@ -866,7 +869,7 @@ def : InstRW<[P9_LoadAndALU2Op_8C, IP_AGEN_1C, IP_EXEC_1C, // Cracked 3-Way Load Instruction // Load with two ALU operations that depend on each other def : InstRW<[P9_LoadAndALUOp_6C, P9_ALU_2C, IP_AGEN_1C, IP_EXEC_1C, IP_EXEC_1C, - DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C], + DISP_PAIR_1C, DISP_PAIR_1C, DISP_1C], (instrs (instregex "LHAU(X)?(8)?$"), LWAUX @@ -874,12 +877,11 @@ def : InstRW<[P9_LoadAndALUOp_6C, P9_ALU_2C, IP_AGEN_1C, IP_EXEC_1C, IP_EXEC_1C, // Cracked Load that requires the PM resource. // Since the Load and the PM cannot be done at the same time the latencies are -// added. Requires 8 cycles. -// Since the PM requires the full superslice we need both EXECE, EXECO pipelines -// as well as 3 dispatches for the PM. The Load requires the remaining 2 -// dispatches. +// added. Requires 8 cycles. Since the PM requires the full superslice we need +// both EXECE, EXECO pipelines as well as 1 dispatch for the PM. The Load +// requires the remaining 1 dispatch. def : InstRW<[P9_LoadAndPMOp_8C, IP_AGEN_1C, IP_EXECE_1C, IP_EXECO_1C, - DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C], + DISP_1C, DISP_1C], (instrs LXVH8X, LXVDSX, @@ -887,8 +889,8 @@ def : InstRW<[P9_LoadAndPMOp_8C, IP_AGEN_1C, IP_EXECE_1C, IP_EXECO_1C, )>; // Single slice Restricted store operation. The restricted operation requires -// all three dispatches for the superslice. -def : InstRW<[P9_LS_1C, IP_EXEC_1C, IP_AGEN_1C, DISP_1C, DISP_1C, DISP_1C], +// all three dispatches for the superslice. +def : InstRW<[P9_LS_1C, IP_EXEC_1C, IP_AGEN_1C, DISP_3SLOTS_1C], (instrs (instregex "STF(S|D|IWX|SX|DX)$"), (instregex "STXS(D|DX|SPX|IWX|IBX|IHX|SP)(v)?$"), @@ -905,10 +907,9 @@ def : InstRW<[P9_LS_1C, IP_EXEC_1C, IP_AGEN_1C, DISP_1C, DISP_1C, DISP_1C], )>; // Vector Store Instruction -// Requires the whole superslice and therefore requires all three dispatches +// Requires the whole superslice and therefore requires one dispatch // as well as both the Even and Odd exec pipelines. -def : InstRW<[P9_LS_1C, IP_EXECE_1C, IP_EXECO_1C, IP_AGEN_1C, - DISP_1C, DISP_1C, DISP_1C], +def : InstRW<[P9_LS_1C, IP_EXECE_1C, IP_EXECO_1C, IP_AGEN_1C, DISP_1C], (instrs (instregex "STVE(B|H|W)X$"), (instregex "STVX(L)?$"), @@ -916,18 +917,18 @@ def : InstRW<[P9_LS_1C, IP_EXECE_1C, IP_EXECO_1C, IP_AGEN_1C, )>; // 5 Cycle DIV operation. Only one DIV unit per superslice so we use the whole -// superslice. That includes both exec pipelines (EXECO, EXECE) and all three +// superslice. That includes both exec pipelines (EXECO, EXECE) and two // dispatches. -def : InstRW<[P9_DIV_5C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C, DISP_1C, DISP_1C], +def : InstRW<[P9_DIV_5C, IP_EXECE_1C, IP_EXECO_1C, DISP_EVEN_1C], (instrs (instregex "MTCTR(8)?(loop)?$"), (instregex "MTLR(8)?$") )>; // 12 Cycle DIV operation. Only one DIV unit per superslice so we use the whole -// superslice. That includes both exec pipelines (EXECO, EXECE) and all three +// superslice. That includes both exec pipelines (EXECO, EXECE) and two // dispatches. -def : InstRW<[P9_DIV_12C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C, DISP_1C, DISP_1C], +def : InstRW<[P9_DIV_12C, IP_EXECE_1C, IP_EXECO_1C, DISP_EVEN_1C], (instrs (instregex "M(T|F)VRSAVE(v)?$"), (instregex "M(T|F)PMR$"), @@ -938,10 +939,9 @@ def : InstRW<[P9_DIV_12C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C, DISP_1C, DISP_1C], )>; // 16 Cycle DIV operation. Only one DIV unit per superslice so we use the whole -// superslice. That includes both exec pipelines (EXECO, EXECE) and all three -// dispatches. -def : InstRW<[P9_DIV_16C_8, IP_EXECO_1C, IP_EXECE_1C, - DISP_1C, DISP_1C, DISP_1C], +// superslice. That includes both exec pipelines (EXECO, EXECE) and two +// dispatches. +def : InstRW<[P9_DIV_16C_8, IP_EXECO_1C, IP_EXECE_1C, DISP_EVEN_1C], (instrs DIVW, DIVWU, @@ -949,10 +949,9 @@ def : InstRW<[P9_DIV_16C_8, IP_EXECO_1C, IP_EXECE_1C, )>; // 24 Cycle DIV operation. Only one DIV unit per superslice so we use the whole -// superslice. That includes both exec pipelines (EXECO, EXECE) and all three -// dispatches. -def : InstRW<[P9_DIV_24C_8, IP_EXECO_1C, IP_EXECE_1C, - DISP_1C, DISP_1C, DISP_1C], +// superslice. That includes both exec pipelines (EXECO, EXECE) and two +// dispatches. +def : InstRW<[P9_DIV_24C_8, IP_EXECO_1C, IP_EXECE_1C, DISP_EVEN_1C], (instrs DIVWE, DIVD, @@ -964,29 +963,28 @@ def : InstRW<[P9_DIV_24C_8, IP_EXECO_1C, IP_EXECE_1C, )>; // 40 Cycle DIV operation. Only one DIV unit per superslice so we use the whole -// superslice. That includes both exec pipelines (EXECO, EXECE) and all three -// dispatches. -def : InstRW<[P9_DIV_40C_8, IP_EXECO_1C, IP_EXECE_1C, - DISP_1C, DISP_1C, DISP_1C], +// superslice. That includes both exec pipelines (EXECO, EXECE) and all three +// dispatches. +def : InstRW<[P9_DIV_40C_8, IP_EXECO_1C, IP_EXECE_1C, DISP_EVEN_1C], (instrs DIVDE, DIVDEU )>; // Cracked DIV and ALU operation. Requires one full slice for the ALU operation -// and one full superslice for the DIV operation since there is only one DIV -// per superslice. Latency of DIV plus ALU is 26. +// and one full superslice for the DIV operation since there is only one DIV per +// superslice. Latency of DIV plus ALU is 26. def : InstRW<[P9_IntDivAndALUOp_18C_8, IP_EXECE_1C, IP_EXECO_1C, IP_EXEC_1C, - DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C], + DISP_EVEN_1C, DISP_1C], (instrs (instregex "DIVW(U)?(O)?o$") )>; // Cracked DIV and ALU operation. Requires one full slice for the ALU operation -// and one full superslice for the DIV operation since there is only one DIV -// per superslice. Latency of DIV plus ALU is 26. +// and one full superslice for the DIV operation since there is only one DIV per +// superslice. Latency of DIV plus ALU is 26. def : InstRW<[P9_IntDivAndALUOp_26C_8, IP_EXECE_1C, IP_EXECO_1C, IP_EXEC_1C, - DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C], + DISP_EVEN_1C, DISP_1C], (instrs DIVDo, DIVDUo, @@ -995,10 +993,10 @@ def : InstRW<[P9_IntDivAndALUOp_26C_8, IP_EXECE_1C, IP_EXECO_1C, IP_EXEC_1C, )>; // Cracked DIV and ALU operation. Requires one full slice for the ALU operation -// and one full superslice for the DIV operation since there is only one DIV -// per superslice. Latency of DIV plus ALU is 42. +// and one full superslice for the DIV operation since there is only one DIV per +// superslice. Latency of DIV plus ALU is 42. def : InstRW<[P9_IntDivAndALUOp_42C_8, IP_EXECE_1C, IP_EXECO_1C, IP_EXEC_1C, - DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C], + DISP_EVEN_1C, DISP_1C], (instrs DIVDEo, DIVDEUo @@ -1008,11 +1006,11 @@ def : InstRW<[P9_IntDivAndALUOp_42C_8, IP_EXECE_1C, IP_EXECO_1C, IP_EXEC_1C, // Cracked, restricted, ALU operations. // Here the two ALU ops can actually be done in parallel and therefore the -// latencies are not added together. Otherwise this is like having two -// instructions running together on two pipelines and 6 dispatches. -// ALU ops are 2 cycles each. +// latencies are not added together. Otherwise this is like having two +// instructions running together on two pipelines and 6 dispatches. ALU ops are +// 2 cycles each. def : InstRW<[P9_ALU_2C, P9_ALU_2C, IP_EXEC_1C, IP_EXEC_1C, - DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C], + DISP_3SLOTS_1C, DISP_3SLOTS_1C], (instrs MTCRF, MTCRF8 @@ -1020,11 +1018,11 @@ def : InstRW<[P9_ALU_2C, P9_ALU_2C, IP_EXEC_1C, IP_EXEC_1C, // Cracked ALU operations. // Here the two ALU ops can actually be done in parallel and therefore the -// latencies are not added together. Otherwise this is like having two -// instructions running together on two pipelines and 4 dispatches. -// ALU ops are 2 cycles each. +// latencies are not added together. Otherwise this is like having two +// instructions running together on two pipelines and 2 dispatches. ALU ops are +// 2 cycles each. def : InstRW<[P9_ALU_2C, P9_ALU_2C, IP_EXEC_1C, IP_EXEC_1C, - DISP_1C, DISP_1C, DISP_1C, DISP_1C], + DISP_1C, DISP_1C], (instrs (instregex "ADDC(8)?o$"), (instregex "SUBFC(8)?o$") @@ -1036,7 +1034,7 @@ def : InstRW<[P9_ALU_2C, P9_ALU_2C, IP_EXEC_1C, IP_EXEC_1C, // One of the ALU ops is restricted the other is not so we have a total of // 5 dispatches. def : InstRW<[P9_ALU_2C, P9_ALU_3C, IP_EXEC_1C, IP_EXEC_1C, - DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C], + DISP_3SLOTS_1C, DISP_1C], (instrs (instregex "F(N)?ABS(D|S)o$"), (instregex "FCPSGN(D|S)o$"), @@ -1046,22 +1044,22 @@ def : InstRW<[P9_ALU_2C, P9_ALU_3C, IP_EXEC_1C, IP_EXEC_1C, // Cracked ALU operations. // Here the two ALU ops can actually be done in parallel and therefore the -// latencies are not added together. Otherwise this is like having two -// instructions running together on two pipelines and 4 dispatches. +// latencies are not added together. Otherwise this is like having two +// instructions running together on two pipelines and 2 dispatches. // ALU ops are 3 cycles each. def : InstRW<[P9_ALU_3C, P9_ALU_3C, IP_EXEC_1C, IP_EXEC_1C, - DISP_1C, DISP_1C, DISP_1C, DISP_1C], + DISP_1C, DISP_1C], (instrs MCRFS )>; // Cracked Restricted ALU operations. // Here the two ALU ops can actually be done in parallel and therefore the -// latencies are not added together. Otherwise this is like having two -// instructions running together on two pipelines and 6 dispatches. +// latencies are not added together. Otherwise this is like having two +// instructions running together on two pipelines and 6 dispatches. // ALU ops are 3 cycles each. def : InstRW<[P9_ALU_3C, P9_ALU_3C, IP_EXEC_1C, IP_EXEC_1C, - DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C], + DISP_3SLOTS_1C, DISP_3SLOTS_1C], (instrs (instregex "MTFSF(b|o)?$"), (instregex "MTFSFI(o)?$") @@ -1071,7 +1069,7 @@ def : InstRW<[P9_ALU_3C, P9_ALU_3C, IP_EXEC_1C, IP_EXEC_1C, // The two ops cannot be done in parallel. // One of the ALU ops is restricted and takes 3 dispatches. def : InstRW<[P9_ALUOpAndALUOp_4C, IP_EXEC_1C, IP_EXEC_1C, - DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C], + DISP_3SLOTS_1C, DISP_1C], (instrs (instregex "RLD(I)?C(R|L)o$"), (instregex "RLW(IMI|INM|NM)(8)?o$"), @@ -1086,7 +1084,7 @@ def : InstRW<[P9_ALUOpAndALUOp_4C, IP_EXEC_1C, IP_EXEC_1C, // The two ops cannot be done in parallel. // Both of the ALU ops are restricted and take 3 dispatches. def : InstRW<[P9_ALU2OpAndALU2Op_6C, IP_EXEC_1C, IP_EXEC_1C, - DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C], + DISP_3SLOTS_1C, DISP_3SLOTS_1C], (instrs (instregex "MFFS(L|CE|o)?$") )>; @@ -1095,143 +1093,141 @@ def : InstRW<[P9_ALU2OpAndALU2Op_6C, IP_EXEC_1C, IP_EXEC_1C, // total of 6 cycles. All of the ALU operations are also restricted so each // takes 3 dispatches for a total of 9. def : InstRW<[P9_ALUOpAndALUOpAndALUOp_6C, IP_EXEC_1C, IP_EXEC_1C, IP_EXEC_1C, - DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C, - DISP_1C, DISP_1C], + DISP_3SLOTS_1C, DISP_3SLOTS_1C, DISP_3SLOTS_1C], (instrs (instregex "MFCR(8)?$") )>; // Cracked instruction made of two ALU ops. // The two ops cannot be done in parallel. -def : InstRW<[P9_ALUOpAndALUOp_4C, IP_EXEC_1C, IP_EXEC_1C, - DISP_1C, DISP_1C, DISP_1C, DISP_1C], +def : InstRW<[P9_ALUOpAndALUOp_4C, IP_EXEC_1C, IP_EXEC_1C, DISP_1C, DISP_1C], (instrs - (instregex "EXTSWSLIo$"), + (instregex "EXTSWSLI_32_64o$"), (instregex "SRAD(I)?o$"), + EXTSWSLIo, SLDo, SRDo, RLDICo )>; // 33 Cycle DP Instruction Restricted. Takes one slice and 3 dispatches. -def : InstRW<[P9_DP_33C_8, IP_EXEC_1C, DISP_1C, DISP_1C, DISP_1C], +def : InstRW<[P9_DP_33C_8, IP_EXEC_1C, DISP_3SLOTS_1C], (instrs FDIV )>; // 33 Cycle DP Instruction Restricted and Cracked with 3 Cycle ALU. def : InstRW<[P9_DPOpAndALU2Op_36C_8, IP_EXEC_1C, IP_EXEC_1C, - DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C], + DISP_3SLOTS_1C, DISP_1C], (instrs FDIVo )>; // 36 Cycle DP Instruction. // Instruction can be done on a single slice. -def : InstRW<[P9_DP_36C_10, IP_EXEC_1C, DISP_1C, DISP_1C], +def : InstRW<[P9_DP_36C_10, IP_EXEC_1C, DISP_1C], (instrs XSSQRTDP )>; // 36 Cycle DP Instruction Restricted. Takes one slice and 3 dispatches. -def : InstRW<[P9_DP_36C_10, IP_EXEC_1C, DISP_1C, DISP_1C, DISP_1C], +def : InstRW<[P9_DP_36C_10, IP_EXEC_1C, DISP_3SLOTS_1C], (instrs FSQRT )>; // 36 Cycle DP Vector Instruction. def : InstRW<[P9_DPE_36C_10, P9_DPO_36C_10, IP_EXECE_1C, IP_EXECO_1C, - DISP_1C, DISP_1C, DISP_1C], + DISP_1C], (instrs XVSQRTDP )>; // 27 Cycle DP Vector Instruction. def : InstRW<[P9_DPE_27C_10, P9_DPO_27C_10, IP_EXECE_1C, IP_EXECO_1C, - DISP_1C, DISP_1C, DISP_1C], + DISP_1C], (instrs XVSQRTSP )>; // 36 Cycle DP Instruction Restricted and Cracked with 3 Cycle ALU. def : InstRW<[P9_DPOpAndALU2Op_39C_10, IP_EXEC_1C, IP_EXEC_1C, - DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C], + DISP_3SLOTS_1C, DISP_1C], (instrs FSQRTo )>; // 26 Cycle DP Instruction. -def : InstRW<[P9_DP_26C_5, IP_EXEC_1C, DISP_1C, DISP_1C], +def : InstRW<[P9_DP_26C_5, IP_EXEC_1C, DISP_1C], (instrs XSSQRTSP )>; // 26 Cycle DP Instruction Restricted. Takes one slice and 3 dispatches. -def : InstRW<[P9_DP_26C_5, IP_EXEC_1C, DISP_1C, DISP_1C, DISP_1C], +def : InstRW<[P9_DP_26C_5, IP_EXEC_1C, DISP_3SLOTS_1C], (instrs FSQRTS )>; // 26 Cycle DP Instruction Restricted and Cracked with 3 Cycle ALU. def : InstRW<[P9_DPOpAndALU2Op_29C_5, IP_EXEC_1C, IP_EXEC_1C, - DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C], + DISP_3SLOTS_1C, DISP_1C], (instrs FSQRTSo )>; -// 33 Cycle DP Instruction. Takes one slice and 2 dispatches. -def : InstRW<[P9_DP_33C_8, IP_EXEC_1C, DISP_1C, DISP_1C], +// 33 Cycle DP Instruction. Takes one slice and 1 dispatch. +def : InstRW<[P9_DP_33C_8, IP_EXEC_1C, DISP_1C], (instrs XSDIVDP )>; // 22 Cycle DP Instruction Restricted. Takes one slice and 3 dispatches. -def : InstRW<[P9_DP_22C_5, IP_EXEC_1C, DISP_1C, DISP_1C, DISP_1C], +def : InstRW<[P9_DP_22C_5, IP_EXEC_1C, DISP_3SLOTS_1C], (instrs FDIVS )>; // 22 Cycle DP Instruction Restricted and Cracked with 2 Cycle ALU. def : InstRW<[P9_DPOpAndALU2Op_25C_5, IP_EXEC_1C, IP_EXEC_1C, - DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C], + DISP_3SLOTS_1C, DISP_1C], (instrs FDIVSo )>; -// 22 Cycle DP Instruction. Takes one slice and 2 dispatches. -def : InstRW<[P9_DP_22C_5, IP_EXEC_1C, DISP_1C, DISP_1C], +// 22 Cycle DP Instruction. Takes one slice and 1 dispatch. +def : InstRW<[P9_DP_22C_5, IP_EXEC_1C, DISP_1C], (instrs XSDIVSP )>; // 24 Cycle DP Vector Instruction. Takes one full superslice. -// Includes both EXECE, EXECO pipelines and all 3 dispatches for the given -// superslice. +// Includes both EXECE, EXECO pipelines and 1 dispatch for the given +// superslice. def : InstRW<[P9_DPE_24C_8, P9_DPO_24C_8, IP_EXECE_1C, IP_EXECO_1C, - DISP_1C, DISP_1C, DISP_1C], + DISP_1C], (instrs XVDIVSP )>; // 33 Cycle DP Vector Instruction. Takes one full superslice. -// Includes both EXECE, EXECO pipelines and all 3 dispatches for the given -// superslice. +// Includes both EXECE, EXECO pipelines and 1 dispatch for the given +// superslice. def : InstRW<[P9_DPE_33C_8, P9_DPO_33C_8, IP_EXECE_1C, IP_EXECO_1C, - DISP_1C, DISP_1C, DISP_1C], + DISP_1C], (instrs XVDIVDP )>; // Instruction cracked into three pieces. One Load and two ALU operations. // The Load and one of the ALU ops cannot be run at the same time and so the -// latencies are added together for 6 cycles. The remainaing ALU is 2 cycles. +// latencies are added together for 6 cycles. The remainaing ALU is 2 cycles. // Both the load and the ALU that depends on it are restricted and so they take -// a total of 6 dispatches. The final 2 dispatches come from the second ALU op. +// a total of 7 dispatches. The final 2 dispatches come from the second ALU op. // The two EXEC pipelines are for the 2 ALUs while the AGEN is for the load. def : InstRW<[P9_LoadAndALU2Op_7C, P9_ALU_2C, IP_AGEN_1C, IP_EXEC_1C, IP_EXEC_1C, - DISP_1C, DISP_1C, DISP_1C, DISP_1C, - DISP_1C, DISP_1C, DISP_1C, DISP_1C], + DISP_3SLOTS_1C, DISP_3SLOTS_1C, DISP_1C], (instrs (instregex "LF(SU|SUX)$") )>; @@ -1240,7 +1236,7 @@ def : InstRW<[P9_LoadAndALU2Op_7C, P9_ALU_2C, // the store and so it can be run at the same time as the store. The store is // also restricted. def : InstRW<[P9_LS_1C, P9_ALU_2C, IP_AGEN_1C, IP_EXEC_1C, IP_EXEC_1C, - DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C], + DISP_3SLOTS_1C, DISP_1C], (instrs (instregex "STF(S|D)U(X)?$"), (instregex "ST(B|H|W|D)U(X)?(8)?$") @@ -1249,20 +1245,19 @@ def : InstRW<[P9_LS_1C, P9_ALU_2C, IP_AGEN_1C, IP_EXEC_1C, IP_EXEC_1C, // Cracked instruction made up of a Load and an ALU. The ALU does not depend on // the load and so it can be run at the same time as the load. def : InstRW<[P9_LS_4C, P9_ALU_2C, IP_AGEN_1C, IP_EXEC_1C, - DISP_1C, DISP_1C, DISP_1C, DISP_1C], + DISP_PAIR_1C, DISP_PAIR_1C], (instrs (instregex "LBZU(X)?(8)?$"), (instregex "LDU(X)?$") )>; - // Cracked instruction made up of a Load and an ALU. The ALU does not depend on -// the load and so it can be run at the same time as the load. The load is also -// restricted. 3 dispatches are from the restricted load while the other two -// are from the ALU. The AGEN pipeline is from the load and the EXEC pipeline -// is required for the ALU. +// the load and so it can be run at the same time as the load. The load is also +// restricted. 3 dispatches are from the restricted load while the other two +// are from the ALU. The AGEN pipeline is from the load and the EXEC pipeline +// is required for the ALU. def : InstRW<[P9_LS_4C, P9_ALU_2C, IP_AGEN_1C, IP_EXEC_1C, - DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C], + DISP_3SLOTS_1C, DISP_1C], (instrs (instregex "LF(DU|DUX)$") )>; @@ -1270,9 +1265,9 @@ def : InstRW<[P9_LS_4C, P9_ALU_2C, IP_AGEN_1C, IP_EXEC_1C, // Crypto Instructions // 6 Cycle CY operation. Only one CY unit per CPU so we use a whole -// superslice. That includes both exec pipelines (EXECO, EXECE) and all three -// dispatches. -def : InstRW<[P9_CY_6C, IP_EXECO_1C, IP_EXECE_1C, DISP_1C, DISP_1C, DISP_1C], +// superslice. That includes both exec pipelines (EXECO, EXECE) and one +// dispatch. +def : InstRW<[P9_CY_6C, IP_EXECO_1C, IP_EXECE_1C, DISP_1C], (instrs (instregex "VPMSUM(B|H|W|D)$"), (instregex "V(N)?CIPHER(LAST)?$"), @@ -1282,14 +1277,14 @@ def : InstRW<[P9_CY_6C, IP_EXECO_1C, IP_EXECE_1C, DISP_1C, DISP_1C, DISP_1C], // Branch Instructions // Two Cycle Branch -def : InstRW<[P9_BR_2C, DISP_1C, DISP_1C], +def : InstRW<[P9_BR_2C, DISP_BR_1C], (instrs (instregex "BCCCTR(L)?(8)?$"), (instregex "BCCL(A|R|RL)?$"), (instregex "BCCTR(L)?(8)?(n)?$"), (instregex "BD(N)?Z(8|A|Am|Ap|m|p)?$"), (instregex "BD(N)?ZL(A|Am|Ap|R|R8|RL|RLm|RLp|Rm|Rp|m|p)?$"), - (instregex "BL(_TLS)?$"), + (instregex "BL(_TLS|_NOP)?$"), (instregex "BL8(_TLS|_NOP|_NOP_TLS|_TLS_)?$"), (instregex "BLA(8|8_NOP)?$"), (instregex "BLR(8|L)?$"), @@ -1313,8 +1308,7 @@ def : InstRW<[P9_BR_2C, DISP_1C, DISP_1C], // Five Cycle Branch with a 2 Cycle ALU Op // Operations must be done consecutively and not in parallel. -def : InstRW<[P9_BROpAndALUOp_7C, IP_EXEC_1C, - DISP_1C, DISP_1C, DISP_1C, DISP_1C], +def : InstRW<[P9_BROpAndALUOp_7C, IP_EXEC_1C, DISP_BR_1C, DISP_1C], (instrs ADDPCIS )>; @@ -1324,17 +1318,15 @@ def : InstRW<[P9_BROpAndALUOp_7C, IP_EXEC_1C, // Atomic Load def : InstRW<[P9_LS_1C, P9_LS_1C, P9_LS_4C, P9_LS_4C, P9_LS_4C, IP_EXEC_1C, IP_EXEC_1C, IP_AGEN_1C, IP_AGEN_1C, IP_AGEN_1C, - IP_AGEN_1C, IP_AGEN_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C, - DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C, - DISP_1C], + IP_AGEN_1C, IP_AGEN_1C, DISP_1C, DISP_3SLOTS_1C, + DISP_3SLOTS_1C, DISP_1C, DISP_1C, DISP_1C], (instrs (instregex "L(D|W)AT$") )>; // Atomic Store def : InstRW<[P9_LS_1C, P9_LS_4C, P9_LS_4C, IP_EXEC_1C, IP_AGEN_1C, IP_AGEN_1C, - IP_AGEN_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C, - DISP_1C], + IP_AGEN_1C, DISP_1C, DISP_3SLOTS_1C, DISP_1C], (instrs (instregex "ST(D|W)AT$") )>; @@ -1406,6 +1398,7 @@ def : InstRW<[], MBAR, MSYNC, SLBSYNC, + SLBFEEo, NAP, STOP, TRAP, diff --git a/contrib/llvm/lib/Target/PowerPC/PPC.h b/contrib/llvm/lib/Target/PowerPC/PPC.h index bfc613af3dc0..c6951ab67b08 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPC.h +++ b/contrib/llvm/lib/Target/PowerPC/PPC.h @@ -1,9 +1,8 @@ //===-- PPC.h - Top-level interface for PowerPC Target ----------*- C++ -*-===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -16,7 +15,6 @@ #define LLVM_LIB_TARGET_POWERPC_PPC_H #include "llvm/Support/CodeGen.h" -#include "MCTargetDesc/PPCMCTargetDesc.h" // GCC #defines PPC on Linux but we use it as our namespace name #undef PPC @@ -57,12 +55,26 @@ namespace llvm { MCOperand &OutMO, AsmPrinter &AP, bool isDarwin); + void initializePPCCTRLoopsPass(PassRegistry&); +#ifndef NDEBUG + void initializePPCCTRLoopsVerifyPass(PassRegistry&); +#endif + void initializePPCLoopPreIncPrepPass(PassRegistry&); + void initializePPCTOCRegDepsPass(PassRegistry&); + void initializePPCEarlyReturnPass(PassRegistry&); + void initializePPCVSXCopyPass(PassRegistry&); void initializePPCVSXFMAMutatePass(PassRegistry&); + void initializePPCVSXSwapRemovalPass(PassRegistry&); + void initializePPCReduceCRLogicalsPass(PassRegistry&); + void initializePPCBSelPass(PassRegistry&); + void initializePPCBranchCoalescingPass(PassRegistry&); + void initializePPCQPXLoadSplatPass(PassRegistry&); void initializePPCBoolRetToIntPass(PassRegistry&); void initializePPCExpandISELPass(PassRegistry &); void initializePPCPreEmitPeepholePass(PassRegistry &); void initializePPCTLSDynamicCallPass(PassRegistry &); void initializePPCMIPeepholePass(PassRegistry&); + extern char &PPCVSXFMAMutateID; namespace PPCII { diff --git a/contrib/llvm/lib/Target/PowerPC/PPC.td b/contrib/llvm/lib/Target/PowerPC/PPC.td index 98e6e98e6974..8e94a2ae15e0 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPC.td +++ b/contrib/llvm/lib/Target/PowerPC/PPC.td @@ -1,9 +1,8 @@ //===-- PPC.td - Describe the PowerPC Target Machine -------*- tablegen -*-===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -136,6 +135,9 @@ def FeatureQPX : SubtargetFeature<"qpx","HasQPX", "true", def FeatureVSX : SubtargetFeature<"vsx","HasVSX", "true", "Enable VSX instructions", [FeatureAltivec]>; +def FeatureTwoConstNR : + SubtargetFeature<"two-const-nr", "NeedsTwoConstNR", "true", + "Requires two constant Newton-Raphson computation">; def FeatureP8Altivec : SubtargetFeature<"power8-altivec", "HasP8Altivec", "true", "Enable POWER8 Altivec instructions", [FeatureAltivec]>; @@ -162,8 +164,12 @@ def FeatureHTM : SubtargetFeature<"htm", "HasHTM", "true", "Enable Hardware Transactional Memory instructions">; def FeatureMFTB : SubtargetFeature<"", "FeatureMFTB", "true", "Implement mftb using the mfspr instruction">; -def FeatureFusion : SubtargetFeature<"fusion", "HasFusion", "true", - "Target supports add/load integer fusion.">; +def FeaturePPCPreRASched: + SubtargetFeature<"ppc-prera-sched", "UsePPCPreRASchedStrategy", "true", + "Use PowerPC pre-RA scheduling strategy">; +def FeaturePPCPostRASched: + SubtargetFeature<"ppc-postra-sched", "UsePPCPostRASchedStrategy", "true", + "Use PowerPC post-RA scheduling strategy">; def FeatureFloat128 : SubtargetFeature<"float128", "HasFloat128", "true", "Enable the __float128 data type for IEEE-754R Binary128.", @@ -191,6 +197,13 @@ def FeatureP9Vector : SubtargetFeature<"power9-vector", "HasP9Vector", "true", "Enable POWER9 vector instructions", [FeatureISA3_0, FeatureP8Vector, FeatureP9Altivec]>; +// A separate feature for this even though it is equivalent to P9Vector +// because this is a feature of the implementation rather than the architecture +// and may go away with future CPU's. +def FeatureVectorsUseTwoUnits : SubtargetFeature<"vectors-use-two-units", + "VectorsUseTwoUnits", + "true", + "Vectors use two units">; // Since new processors generally contain a superset of features of those that // came before them, the idea is to make implementations of new processors @@ -215,15 +228,15 @@ def ProcessorFeatures { FeaturePOPCNTD, FeatureCMPB, FeatureLDBRX, Feature64Bit /*, Feature64BitRegs */, FeatureBPERMD, FeatureExtDiv, - FeatureMFTB, DeprecatedDST]; + FeatureMFTB, DeprecatedDST, FeatureTwoConstNR]; list<SubtargetFeature> Power8SpecificFeatures = [DirectivePwr8, FeatureP8Altivec, FeatureP8Vector, FeatureP8Crypto, - FeatureHTM, FeatureDirectMove, FeatureICBT, FeaturePartwordAtomic, - FeatureFusion]; + FeatureHTM, FeatureDirectMove, FeatureICBT, FeaturePartwordAtomic]; list<SubtargetFeature> Power8FeatureList = !listconcat(Power7FeatureList, Power8SpecificFeatures); list<SubtargetFeature> Power9SpecificFeatures = - [DirectivePwr9, FeatureP9Altivec, FeatureP9Vector, FeatureISA3_0]; + [DirectivePwr9, FeatureP9Altivec, FeatureP9Vector, FeatureISA3_0, + FeatureVectorsUseTwoUnits, FeaturePPCPreRASched, FeaturePPCPostRASched]; list<SubtargetFeature> Power9FeatureList = !listconcat(Power8FeatureList, Power9SpecificFeatures); } @@ -279,10 +292,9 @@ def getNonRecordFormOpcode : InstrMapping { def getAltVSXFMAOpcode : InstrMapping { let FilterClass = "AltVSXFMARel"; - // Instructions with the same BaseName and Interpretation64Bit values - // form a row. + // Instructions with the same BaseName value form a row. let RowFields = ["BaseName"]; - // Instructions with the same RC value form a column. + // Instructions with the same IsVSXFMAAlt value form a column. let ColFields = ["IsVSXFMAAlt"]; // The key column are the (default) addend-killing instructions. let KeyCol = ["0"]; diff --git a/contrib/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp b/contrib/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp index 04aa3c9b1e22..bd87ce06b4fb 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp +++ b/contrib/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp @@ -1,9 +1,8 @@ //===-- PPCAsmPrinter.cpp - Print machine instrs to PowerPC assembly ------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -16,7 +15,7 @@ // //===----------------------------------------------------------------------===// -#include "InstPrinter/PPCInstPrinter.h" +#include "MCTargetDesc/PPCInstPrinter.h" #include "MCTargetDesc/PPCMCExpr.h" #include "MCTargetDesc/PPCMCTargetDesc.h" #include "MCTargetDesc/PPCPredicates.h" @@ -26,6 +25,7 @@ #include "PPCSubtarget.h" #include "PPCTargetMachine.h" #include "PPCTargetStreamer.h" +#include "TargetInfo/PowerPCTargetInfo.h" #include "llvm/ADT/MapVector.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/Triple.h" @@ -95,68 +95,102 @@ public: return AsmPrinter::doInitialization(M); } - void EmitInstruction(const MachineInstr *MI) override; + void EmitInstruction(const MachineInstr *MI) override; + + /// This function is for PrintAsmOperand and PrintAsmMemoryOperand, + /// invoked by EmitMSInlineAsmStr and EmitGCCInlineAsmStr only. + /// The \p MI would be INLINEASM ONLY. + void printOperand(const MachineInstr *MI, unsigned OpNo, raw_ostream &O); + + void PrintSymbolOperand(const MachineOperand &MO, raw_ostream &O) override; + bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, + const char *ExtraCode, raw_ostream &O) override; + bool PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo, + const char *ExtraCode, raw_ostream &O) override; + + void EmitEndOfAsmFile(Module &M) override; + + void LowerSTACKMAP(StackMaps &SM, const MachineInstr &MI); + void LowerPATCHPOINT(StackMaps &SM, const MachineInstr &MI); + void EmitTlsCall(const MachineInstr *MI, MCSymbolRefExpr::VariantKind VK); + bool runOnMachineFunction(MachineFunction &MF) override { + Subtarget = &MF.getSubtarget<PPCSubtarget>(); + bool Changed = AsmPrinter::runOnMachineFunction(MF); + emitXRayTable(); + return Changed; + } +}; - void printOperand(const MachineInstr *MI, unsigned OpNo, raw_ostream &O); +/// PPCLinuxAsmPrinter - PowerPC assembly printer, customized for Linux +class PPCLinuxAsmPrinter : public PPCAsmPrinter { +public: + explicit PPCLinuxAsmPrinter(TargetMachine &TM, + std::unique_ptr<MCStreamer> Streamer) + : PPCAsmPrinter(TM, std::move(Streamer)) {} - bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, - unsigned AsmVariant, const char *ExtraCode, - raw_ostream &O) override; - bool PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo, - unsigned AsmVariant, const char *ExtraCode, - raw_ostream &O) override; + StringRef getPassName() const override { + return "Linux PPC Assembly Printer"; + } - void EmitEndOfAsmFile(Module &M) override; + bool doFinalization(Module &M) override; + void EmitStartOfAsmFile(Module &M) override; - void LowerSTACKMAP(StackMaps &SM, const MachineInstr &MI); - void LowerPATCHPOINT(StackMaps &SM, const MachineInstr &MI); - void EmitTlsCall(const MachineInstr *MI, MCSymbolRefExpr::VariantKind VK); - bool runOnMachineFunction(MachineFunction &MF) override { - Subtarget = &MF.getSubtarget<PPCSubtarget>(); - bool Changed = AsmPrinter::runOnMachineFunction(MF); - emitXRayTable(); - return Changed; - } - }; + void EmitFunctionEntryLabel() override; - /// PPCLinuxAsmPrinter - PowerPC assembly printer, customized for Linux - class PPCLinuxAsmPrinter : public PPCAsmPrinter { - public: - explicit PPCLinuxAsmPrinter(TargetMachine &TM, - std::unique_ptr<MCStreamer> Streamer) - : PPCAsmPrinter(TM, std::move(Streamer)) {} + void EmitFunctionBodyStart() override; + void EmitFunctionBodyEnd() override; + void EmitInstruction(const MachineInstr *MI) override; +}; - StringRef getPassName() const override { - return "Linux PPC Assembly Printer"; - } +/// PPCDarwinAsmPrinter - PowerPC assembly printer, customized for Darwin/Mac +/// OS X +class PPCDarwinAsmPrinter : public PPCAsmPrinter { +public: + explicit PPCDarwinAsmPrinter(TargetMachine &TM, + std::unique_ptr<MCStreamer> Streamer) + : PPCAsmPrinter(TM, std::move(Streamer)) {} - bool doFinalization(Module &M) override; - void EmitStartOfAsmFile(Module &M) override; + StringRef getPassName() const override { + return "Darwin PPC Assembly Printer"; + } - void EmitFunctionEntryLabel() override; + bool doFinalization(Module &M) override; + void EmitStartOfAsmFile(Module &M) override; +}; - void EmitFunctionBodyStart() override; - void EmitFunctionBodyEnd() override; - void EmitInstruction(const MachineInstr *MI) override; - }; +class PPCAIXAsmPrinter : public PPCAsmPrinter { +public: + PPCAIXAsmPrinter(TargetMachine &TM, std::unique_ptr<MCStreamer> Streamer) + : PPCAsmPrinter(TM, std::move(Streamer)) {} - /// PPCDarwinAsmPrinter - PowerPC assembly printer, customized for Darwin/Mac - /// OS X - class PPCDarwinAsmPrinter : public PPCAsmPrinter { - public: - explicit PPCDarwinAsmPrinter(TargetMachine &TM, - std::unique_ptr<MCStreamer> Streamer) - : PPCAsmPrinter(TM, std::move(Streamer)) {} + StringRef getPassName() const override { return "AIX PPC Assembly Printer"; } +}; - StringRef getPassName() const override { - return "Darwin PPC Assembly Printer"; - } +} // end anonymous namespace - bool doFinalization(Module &M) override; - void EmitStartOfAsmFile(Module &M) override; - }; +void PPCAsmPrinter::PrintSymbolOperand(const MachineOperand &MO, + raw_ostream &O) { + // Computing the address of a global symbol, not calling it. + const GlobalValue *GV = MO.getGlobal(); + MCSymbol *SymToPrint; + + // External or weakly linked global variables need non-lazily-resolved stubs + if (Subtarget->hasLazyResolverStub(GV)) { + SymToPrint = getSymbolWithGlobalValueBase(GV, "$non_lazy_ptr"); + MachineModuleInfoImpl::StubValueTy &StubSym = + MMI->getObjFileInfo<MachineModuleInfoMachO>().getGVStubEntry( + SymToPrint); + if (!StubSym.getPointer()) + StubSym = MachineModuleInfoImpl::StubValueTy(getSymbol(GV), + !GV->hasInternalLinkage()); + } else { + SymToPrint = getSymbol(GV); + } -} // end anonymous namespace + SymToPrint->print(O, MAI); + + printOffset(MO.getOffset(), O); +} void PPCAsmPrinter::printOperand(const MachineInstr *MI, unsigned OpNo, raw_ostream &O) { @@ -165,10 +199,8 @@ void PPCAsmPrinter::printOperand(const MachineInstr *MI, unsigned OpNo, switch (MO.getType()) { case MachineOperand::MO_Register: { - unsigned Reg = PPCInstrInfo::getRegNumForOperand(MI->getDesc(), - MO.getReg(), OpNo); - - const char *RegName = PPCInstPrinter::getRegisterName(Reg); + // The MI is INLINEASM ONLY and UseVSXReg is always false. + const char *RegName = PPCInstPrinter::getRegisterName(MO.getReg()); // Linux assembler (Others?) does not take register mnemonics. // FIXME - What about special registers used in mfspr/mtspr? @@ -192,26 +224,7 @@ void PPCAsmPrinter::printOperand(const MachineInstr *MI, unsigned OpNo, GetBlockAddressSymbol(MO.getBlockAddress())->print(O, MAI); return; case MachineOperand::MO_GlobalAddress: { - // Computing the address of a global symbol, not calling it. - const GlobalValue *GV = MO.getGlobal(); - MCSymbol *SymToPrint; - - // External or weakly linked global variables need non-lazily-resolved stubs - if (Subtarget->hasLazyResolverStub(GV)) { - SymToPrint = getSymbolWithGlobalValueBase(GV, "$non_lazy_ptr"); - MachineModuleInfoImpl::StubValueTy &StubSym = - MMI->getObjFileInfo<MachineModuleInfoMachO>().getGVStubEntry( - SymToPrint); - if (!StubSym.getPointer()) - StubSym = MachineModuleInfoImpl::StubValueTy(getSymbol(GV), - !GV->hasInternalLinkage()); - } else { - SymToPrint = getSymbol(GV); - } - - SymToPrint->print(O, MAI); - - printOffset(MO.getOffset(), O); + PrintSymbolOperand(MO, O); return; } @@ -224,7 +237,6 @@ void PPCAsmPrinter::printOperand(const MachineInstr *MI, unsigned OpNo, /// PrintAsmOperand - Print out an operand for an inline asm expression. /// bool PPCAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, - unsigned AsmVariant, const char *ExtraCode, raw_ostream &O) { // Does this asm operand have a single letter operand modifier? if (ExtraCode && ExtraCode[0]) { @@ -233,9 +245,7 @@ bool PPCAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, switch (ExtraCode[0]) { default: // See if this is a generic print operand - return AsmPrinter::PrintAsmOperand(MI, OpNo, AsmVariant, ExtraCode, O); - case 'c': // Don't print "$" before a global var name or constant. - break; // PPC never has a prefix. + return AsmPrinter::PrintAsmOperand(MI, OpNo, ExtraCode, O); case 'L': // Write second word of DImode reference. // Verify that this operand has two consecutive registers. if (!MI->getOperand(OpNo).isReg() || @@ -277,7 +287,6 @@ bool PPCAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, // assembler operand. bool PPCAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo, - unsigned AsmVariant, const char *ExtraCode, raw_ostream &O) { if (ExtraCode && ExtraCode[0]) { @@ -460,6 +469,7 @@ void PPCAsmPrinter::EmitTlsCall(const MachineInstr *MI, StringRef Name = "__tls_get_addr"; MCSymbol *TlsGetAddr = OutContext.getOrCreateSymbol(Name); MCSymbolRefExpr::VariantKind Kind = MCSymbolRefExpr::VK_None; + const Module *M = MF->getFunction().getParent(); assert(MI->getOperand(0).isReg() && ((Subtarget->isPPC64() && MI->getOperand(0).getReg() == PPC::X3) || @@ -473,8 +483,14 @@ void PPCAsmPrinter::EmitTlsCall(const MachineInstr *MI, if (!Subtarget->isPPC64() && !Subtarget->isDarwin() && isPositionIndependent()) Kind = MCSymbolRefExpr::VK_PLT; - const MCSymbolRefExpr *TlsRef = + const MCExpr *TlsRef = MCSymbolRefExpr::create(TlsGetAddr, Kind, OutContext); + + // Add 32768 offset to the symbol so we follow up the latest GOT/PLT ABI. + if (Kind == MCSymbolRefExpr::VK_PLT && Subtarget->isSecurePlt() && + M->getPICLevel() == PICLevel::BigPIC) + TlsRef = MCBinaryExpr::createAdd( + TlsRef, MCConstantExpr::create(32768, OutContext), OutContext); const MachineOperand &MO = MI->getOperand(2); const GlobalValue *GValue = MO.getGlobal(); MCSymbol *MOSymbol = getSymbol(GValue); @@ -576,34 +592,30 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { // Into: lwz %rt, .L0$poff - .L0$pb(%ri) // add %rd, %rt, %ri // or into (if secure plt mode is on): - // addis r30, r30, .LTOC - .L0$pb@ha - // addi r30, r30, .LTOC - .L0$pb@l + // addis r30, r30, {.LTOC,_GLOBAL_OFFSET_TABLE} - .L0$pb@ha + // addi r30, r30, {.LTOC,_GLOBAL_OFFSET_TABLE} - .L0$pb@l // Get the offset from the GOT Base Register to the GOT LowerPPCMachineInstrToMCInst(MI, TmpInst, *this, isDarwin); if (Subtarget->isSecurePlt() && isPositionIndependent() ) { unsigned PICR = TmpInst.getOperand(0).getReg(); - MCSymbol *LTOCSymbol = OutContext.getOrCreateSymbol(StringRef(".LTOC")); + MCSymbol *BaseSymbol = OutContext.getOrCreateSymbol( + M->getPICLevel() == PICLevel::SmallPIC ? "_GLOBAL_OFFSET_TABLE_" + : ".LTOC"); const MCExpr *PB = - MCSymbolRefExpr::create(MF->getPICBaseSymbol(), - OutContext); + MCSymbolRefExpr::create(MF->getPICBaseSymbol(), OutContext); - const MCExpr *LTOCDeltaExpr = - MCBinaryExpr::createSub(MCSymbolRefExpr::create(LTOCSymbol, OutContext), - PB, OutContext); + const MCExpr *DeltaExpr = MCBinaryExpr::createSub( + MCSymbolRefExpr::create(BaseSymbol, OutContext), PB, OutContext); - const MCExpr *LTOCDeltaHi = - PPCMCExpr::createHa(LTOCDeltaExpr, false, OutContext); - EmitToStreamer(*OutStreamer, MCInstBuilder(PPC::ADDIS) - .addReg(PICR) - .addReg(PICR) - .addExpr(LTOCDeltaHi)); + const MCExpr *DeltaHi = PPCMCExpr::createHa(DeltaExpr, false, OutContext); + EmitToStreamer( + *OutStreamer, + MCInstBuilder(PPC::ADDIS).addReg(PICR).addReg(PICR).addExpr(DeltaHi)); - const MCExpr *LTOCDeltaLo = - PPCMCExpr::createLo(LTOCDeltaExpr, false, OutContext); - EmitToStreamer(*OutStreamer, MCInstBuilder(PPC::ADDI) - .addReg(PICR) - .addReg(PICR) - .addExpr(LTOCDeltaLo)); + const MCExpr *DeltaLo = PPCMCExpr::createLo(DeltaExpr, false, OutContext); + EmitToStreamer( + *OutStreamer, + MCInstBuilder(PPC::ADDI).addReg(PICR).addReg(PICR).addExpr(DeltaLo)); return; } else { MCSymbol *PICOffset = @@ -1640,6 +1652,9 @@ createPPCAsmPrinterPass(TargetMachine &tm, std::unique_ptr<MCStreamer> &&Streamer) { if (tm.getTargetTriple().isMacOSX()) return new PPCDarwinAsmPrinter(tm, std::move(Streamer)); + if (tm.getTargetTriple().isOSAIX()) + return new PPCAIXAsmPrinter(tm, std::move(Streamer)); + return new PPCLinuxAsmPrinter(tm, std::move(Streamer)); } diff --git a/contrib/llvm/lib/Target/PowerPC/PPCBoolRetToInt.cpp b/contrib/llvm/lib/Target/PowerPC/PPCBoolRetToInt.cpp index 55e105dad0e5..104cf2ba3c00 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCBoolRetToInt.cpp +++ b/contrib/llvm/lib/Target/PowerPC/PPCBoolRetToInt.cpp @@ -1,9 +1,8 @@ //===- PPCBoolRetToInt.cpp ------------------------------------------------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // diff --git a/contrib/llvm/lib/Target/PowerPC/PPCBranchCoalescing.cpp b/contrib/llvm/lib/Target/PowerPC/PPCBranchCoalescing.cpp index bbb977f090c5..5e9a661f8f0b 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCBranchCoalescing.cpp +++ b/contrib/llvm/lib/Target/PowerPC/PPCBranchCoalescing.cpp @@ -1,9 +1,8 @@ //===-- CoalesceBranches.cpp - Coalesce blocks with the same condition ---===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// /// @@ -34,10 +33,6 @@ STATISTIC(NumBlocksCoalesced, "Number of blocks coalesced"); STATISTIC(NumPHINotMoved, "Number of PHI Nodes that cannot be merged"); STATISTIC(NumBlocksNotCoalesced, "Number of blocks not coalesced"); -namespace llvm { - void initializePPCBranchCoalescingPass(PassRegistry&); -} - //===----------------------------------------------------------------------===// // PPCBranchCoalescing //===----------------------------------------------------------------------===// diff --git a/contrib/llvm/lib/Target/PowerPC/PPCBranchSelector.cpp b/contrib/llvm/lib/Target/PowerPC/PPCBranchSelector.cpp index 0d1bb9297bcb..793d690baec3 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCBranchSelector.cpp +++ b/contrib/llvm/lib/Target/PowerPC/PPCBranchSelector.cpp @@ -1,9 +1,8 @@ //===-- PPCBranchSelector.cpp - Emit long conditional branches ------------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -26,16 +25,13 @@ #include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/Support/MathExtras.h" #include "llvm/Target/TargetMachine.h" +#include <algorithm> using namespace llvm; #define DEBUG_TYPE "ppc-branch-select" STATISTIC(NumExpanded, "Number of branches expanded to long format"); -namespace llvm { - void initializePPCBSelPass(PassRegistry&); -} - namespace { struct PPCBSel : public MachineFunctionPass { static char ID; @@ -48,6 +44,17 @@ namespace { // size that is due to potential padding. std::vector<std::pair<unsigned, unsigned>> BlockSizes; + // The first block number which has imprecise instruction address. + int FirstImpreciseBlock = -1; + + unsigned GetAlignmentAdjustment(MachineBasicBlock &MBB, unsigned Offset); + unsigned ComputeBlockSizes(MachineFunction &Fn); + void modifyAdjustment(MachineFunction &Fn); + int computeBranchSize(MachineFunction &Fn, + const MachineBasicBlock *Src, + const MachineBasicBlock *Dest, + unsigned BrOffset); + bool runOnMachineFunction(MachineFunction &Fn) override; MachineFunctionProperties getRequiredProperties() const override { @@ -70,43 +77,47 @@ FunctionPass *llvm::createPPCBranchSelectionPass() { return new PPCBSel(); } -bool PPCBSel::runOnMachineFunction(MachineFunction &Fn) { - const PPCInstrInfo *TII = - static_cast<const PPCInstrInfo *>(Fn.getSubtarget().getInstrInfo()); - // Give the blocks of the function a dense, in-order, numbering. - Fn.RenumberBlocks(); - BlockSizes.resize(Fn.getNumBlockIDs()); - - auto GetAlignmentAdjustment = - [](MachineBasicBlock &MBB, unsigned Offset) -> unsigned { - unsigned Align = MBB.getAlignment(); - if (!Align) - return 0; - - unsigned AlignAmt = 1 << Align; - unsigned ParentAlign = MBB.getParent()->getAlignment(); - - if (Align <= ParentAlign) - return OffsetToAlignment(Offset, AlignAmt); - - // The alignment of this MBB is larger than the function's alignment, so we - // can't tell whether or not it will insert nops. Assume that it will. - return AlignAmt + OffsetToAlignment(Offset, AlignAmt); - }; +/// In order to make MBB aligned, we need to add an adjustment value to the +/// original Offset. +unsigned PPCBSel::GetAlignmentAdjustment(MachineBasicBlock &MBB, + unsigned Offset) { + unsigned Align = MBB.getAlignment(); + if (!Align) + return 0; + + unsigned AlignAmt = 1 << Align; + unsigned ParentAlign = MBB.getParent()->getAlignment(); + + if (Align <= ParentAlign) + return OffsetToAlignment(Offset, AlignAmt); + + // The alignment of this MBB is larger than the function's alignment, so we + // can't tell whether or not it will insert nops. Assume that it will. + if (FirstImpreciseBlock < 0) + FirstImpreciseBlock = MBB.getNumber(); + return AlignAmt + OffsetToAlignment(Offset, AlignAmt); +} - // We need to be careful about the offset of the first block in the function - // because it might not have the function's alignment. This happens because, - // under the ELFv2 ABI, for functions which require a TOC pointer, we add a - // two-instruction sequence to the start of the function. - // Note: This needs to be synchronized with the check in - // PPCLinuxAsmPrinter::EmitFunctionBodyStart. +/// We need to be careful about the offset of the first block in the function +/// because it might not have the function's alignment. This happens because, +/// under the ELFv2 ABI, for functions which require a TOC pointer, we add a +/// two-instruction sequence to the start of the function. +/// Note: This needs to be synchronized with the check in +/// PPCLinuxAsmPrinter::EmitFunctionBodyStart. +static inline unsigned GetInitialOffset(MachineFunction &Fn) { unsigned InitialOffset = 0; if (Fn.getSubtarget<PPCSubtarget>().isELFv2ABI() && !Fn.getRegInfo().use_empty(PPC::X2)) InitialOffset = 8; + return InitialOffset; +} + +/// Measure each MBB and compute a size for the entire function. +unsigned PPCBSel::ComputeBlockSizes(MachineFunction &Fn) { + const PPCInstrInfo *TII = + static_cast<const PPCInstrInfo *>(Fn.getSubtarget().getInstrInfo()); + unsigned FuncSize = GetInitialOffset(Fn); - // Measure each MBB and compute a size for the entire function. - unsigned FuncSize = InitialOffset; for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E; ++MFI) { MachineBasicBlock *MBB = &*MFI; @@ -124,13 +135,145 @@ bool PPCBSel::runOnMachineFunction(MachineFunction &Fn) { } unsigned BlockSize = 0; - for (MachineInstr &MI : *MBB) + for (MachineInstr &MI : *MBB) { BlockSize += TII->getInstSizeInBytes(MI); + if (MI.isInlineAsm() && (FirstImpreciseBlock < 0)) + FirstImpreciseBlock = MBB->getNumber(); + } BlockSizes[MBB->getNumber()].first = BlockSize; FuncSize += BlockSize; } + return FuncSize; +} + +/// Modify the basic block align adjustment. +void PPCBSel::modifyAdjustment(MachineFunction &Fn) { + unsigned Offset = GetInitialOffset(Fn); + for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E; + ++MFI) { + MachineBasicBlock *MBB = &*MFI; + + if (MBB->getNumber() > 0) { + auto &BS = BlockSizes[MBB->getNumber()-1]; + BS.first -= BS.second; + Offset -= BS.second; + + unsigned AlignExtra = GetAlignmentAdjustment(*MBB, Offset); + + BS.first += AlignExtra; + BS.second = AlignExtra; + + Offset += AlignExtra; + } + + Offset += BlockSizes[MBB->getNumber()].first; + } +} + +/// Determine the offset from the branch in Src block to the Dest block. +/// BrOffset is the offset of the branch instruction inside Src block. +int PPCBSel::computeBranchSize(MachineFunction &Fn, + const MachineBasicBlock *Src, + const MachineBasicBlock *Dest, + unsigned BrOffset) { + int BranchSize; + unsigned MaxAlign = 2; + bool NeedExtraAdjustment = false; + if (Dest->getNumber() <= Src->getNumber()) { + // If this is a backwards branch, the delta is the offset from the + // start of this block to this branch, plus the sizes of all blocks + // from this block to the dest. + BranchSize = BrOffset; + MaxAlign = std::max(MaxAlign, Src->getAlignment()); + + int DestBlock = Dest->getNumber(); + BranchSize += BlockSizes[DestBlock].first; + for (unsigned i = DestBlock+1, e = Src->getNumber(); i < e; ++i) { + BranchSize += BlockSizes[i].first; + MaxAlign = std::max(MaxAlign, + Fn.getBlockNumbered(i)->getAlignment()); + } + + NeedExtraAdjustment = (FirstImpreciseBlock >= 0) && + (DestBlock >= FirstImpreciseBlock); + } else { + // Otherwise, add the size of the blocks between this block and the + // dest to the number of bytes left in this block. + unsigned StartBlock = Src->getNumber(); + BranchSize = BlockSizes[StartBlock].first - BrOffset; + + MaxAlign = std::max(MaxAlign, Dest->getAlignment()); + for (unsigned i = StartBlock+1, e = Dest->getNumber(); i != e; ++i) { + BranchSize += BlockSizes[i].first; + MaxAlign = std::max(MaxAlign, + Fn.getBlockNumbered(i)->getAlignment()); + } + + NeedExtraAdjustment = (FirstImpreciseBlock >= 0) && + (Src->getNumber() >= FirstImpreciseBlock); + } + + // We tend to over estimate code size due to large alignment and + // inline assembly. Usually it causes larger computed branch offset. + // But sometimes it may also causes smaller computed branch offset + // than actual branch offset. If the offset is close to the limit of + // encoding, it may cause problem at run time. + // Following is a simplified example. + // + // actual estimated + // address address + // ... + // bne Far 100 10c + // .p2align 4 + // Near: 110 110 + // ... + // Far: 8108 8108 + // + // Actual offset: 0x8108 - 0x100 = 0x8008 + // Computed offset: 0x8108 - 0x10c = 0x7ffc + // + // This example also shows when we can get the largest gap between + // estimated offset and actual offset. If there is an aligned block + // ABB between branch and target, assume its alignment is <align> + // bits. Now consider the accumulated function size FSIZE till the end + // of previous block PBB. If the estimated FSIZE is multiple of + // 2^<align>, we don't need any padding for the estimated address of + // ABB. If actual FSIZE at the end of PBB is 4 bytes more than + // multiple of 2^<align>, then we need (2^<align> - 4) bytes of + // padding. It also means the actual branch offset is (2^<align> - 4) + // larger than computed offset. Other actual FSIZE needs less padding + // bytes, so causes smaller gap between actual and computed offset. + // + // On the other hand, if the inline asm or large alignment occurs + // between the branch block and destination block, the estimated address + // can be <delta> larger than actual address. If padding bytes are + // needed for a later aligned block, the actual number of padding bytes + // is at most <delta> more than estimated padding bytes. So the actual + // aligned block address is less than or equal to the estimated aligned + // block address. So the actual branch offset is less than or equal to + // computed branch offset. + // + // The computed offset is at most ((1 << alignment) - 4) bytes smaller + // than actual offset. So we add this number to the offset for safety. + if (NeedExtraAdjustment) + BranchSize += (1 << MaxAlign) - 4; + + return BranchSize; +} + +bool PPCBSel::runOnMachineFunction(MachineFunction &Fn) { + const PPCInstrInfo *TII = + static_cast<const PPCInstrInfo *>(Fn.getSubtarget().getInstrInfo()); + // Give the blocks of the function a dense, in-order, numbering. + Fn.RenumberBlocks(); + BlockSizes.resize(Fn.getNumBlockIDs()); + FirstImpreciseBlock = -1; + + // Measure each MBB and compute a size for the entire function. + unsigned FuncSize = ComputeBlockSizes(Fn); + // If the entire function is smaller than the displacement of a branch field, // we know we don't need to shrink any branches in this function. This is a // common case. @@ -178,23 +321,7 @@ bool PPCBSel::runOnMachineFunction(MachineFunction &Fn) { // Determine the offset from the current branch to the destination // block. - int BranchSize; - if (Dest->getNumber() <= MBB.getNumber()) { - // If this is a backwards branch, the delta is the offset from the - // start of this block to this branch, plus the sizes of all blocks - // from this block to the dest. - BranchSize = MBBStartOffset; - - for (unsigned i = Dest->getNumber(), e = MBB.getNumber(); i != e; ++i) - BranchSize += BlockSizes[i].first; - } else { - // Otherwise, add the size of the blocks between this block and the - // dest to the number of bytes left in this block. - BranchSize = -MBBStartOffset; - - for (unsigned i = MBB.getNumber(), e = Dest->getNumber(); i != e; ++i) - BranchSize += BlockSizes[i].first; - } + int BranchSize = computeBranchSize(Fn, &MBB, Dest, MBBStartOffset); // If this branch is in range, ignore it. if (isInt<16>(BranchSize)) { @@ -253,26 +380,7 @@ bool PPCBSel::runOnMachineFunction(MachineFunction &Fn) { if (MadeChange) { // If we're going to iterate again, make sure we've updated our // padding-based contributions to the block sizes. - unsigned Offset = InitialOffset; - for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E; - ++MFI) { - MachineBasicBlock *MBB = &*MFI; - - if (MBB->getNumber() > 0) { - auto &BS = BlockSizes[MBB->getNumber()-1]; - BS.first -= BS.second; - Offset -= BS.second; - - unsigned AlignExtra = GetAlignmentAdjustment(*MBB, Offset); - - BS.first += AlignExtra; - BS.second = AlignExtra; - - Offset += AlignExtra; - } - - Offset += BlockSizes[MBB->getNumber()].first; - } + modifyAdjustment(Fn); } EverMadeChange |= MadeChange; diff --git a/contrib/llvm/lib/Target/PowerPC/PPCCCState.cpp b/contrib/llvm/lib/Target/PowerPC/PPCCCState.cpp index 5510a95430f5..5116f0d121f4 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCCCState.cpp +++ b/contrib/llvm/lib/Target/PowerPC/PPCCCState.cpp @@ -1,9 +1,8 @@ //===---- PPCCCState.cpp - CCState with PowerPC specific extensions ---------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// diff --git a/contrib/llvm/lib/Target/PowerPC/PPCCCState.h b/contrib/llvm/lib/Target/PowerPC/PPCCCState.h index 9be9f11dbea3..e3499597474c 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCCCState.h +++ b/contrib/llvm/lib/Target/PowerPC/PPCCCState.h @@ -1,9 +1,8 @@ //===---- PPCCCState.h - CCState with PowerPC specific extensions -----------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// diff --git a/contrib/llvm/lib/Target/PowerPC/PPCCTRLoops.cpp b/contrib/llvm/lib/Target/PowerPC/PPCCTRLoops.cpp index 6b9e2383e36f..2b8d9b87724f 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCCTRLoops.cpp +++ b/contrib/llvm/lib/Target/PowerPC/PPCCTRLoops.cpp @@ -1,9 +1,8 @@ //===-- PPCCTRLoops.cpp - Identify and generate CTR loops -----------------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -72,70 +71,7 @@ using namespace llvm; static cl::opt<int> CTRLoopLimit("ppc-max-ctrloop", cl::Hidden, cl::init(-1)); #endif -// The latency of mtctr is only justified if there are more than 4 -// comparisons that will be removed as a result. -static cl::opt<unsigned> -SmallCTRLoopThreshold("min-ctr-loop-threshold", cl::init(4), cl::Hidden, - cl::desc("Loops with a constant trip count smaller than " - "this value will not use the count register.")); - -STATISTIC(NumCTRLoops, "Number of loops converted to CTR loops"); - -namespace llvm { - void initializePPCCTRLoopsPass(PassRegistry&); -#ifndef NDEBUG - void initializePPCCTRLoopsVerifyPass(PassRegistry&); -#endif -} - namespace { - struct PPCCTRLoops : public FunctionPass { - -#ifndef NDEBUG - static int Counter; -#endif - - public: - static char ID; - - PPCCTRLoops() : FunctionPass(ID) { - initializePPCCTRLoopsPass(*PassRegistry::getPassRegistry()); - } - - bool runOnFunction(Function &F) override; - - void getAnalysisUsage(AnalysisUsage &AU) const override { - AU.addRequired<LoopInfoWrapperPass>(); - AU.addPreserved<LoopInfoWrapperPass>(); - AU.addRequired<DominatorTreeWrapperPass>(); - AU.addPreserved<DominatorTreeWrapperPass>(); - AU.addRequired<ScalarEvolutionWrapperPass>(); - AU.addRequired<AssumptionCacheTracker>(); - AU.addRequired<TargetTransformInfoWrapperPass>(); - } - - private: - bool mightUseCTR(BasicBlock *BB); - bool convertToCTRLoop(Loop *L); - - private: - const PPCTargetMachine *TM; - const PPCSubtarget *STI; - const PPCTargetLowering *TLI; - const DataLayout *DL; - const TargetLibraryInfo *LibInfo; - const TargetTransformInfo *TTI; - LoopInfo *LI; - ScalarEvolution *SE; - DominatorTree *DT; - bool PreserveLCSSA; - TargetSchedModel SchedModel; - }; - - char PPCCTRLoops::ID = 0; -#ifndef NDEBUG - int PPCCTRLoops::Counter = 0; -#endif #ifndef NDEBUG struct PPCCTRLoopsVerify : public MachineFunctionPass { @@ -161,16 +97,6 @@ namespace { #endif // NDEBUG } // end anonymous namespace -INITIALIZE_PASS_BEGIN(PPCCTRLoops, "ppc-ctr-loops", "PowerPC CTR Loops", - false, false) -INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) -INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) -INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass) -INITIALIZE_PASS_END(PPCCTRLoops, "ppc-ctr-loops", "PowerPC CTR Loops", - false, false) - -FunctionPass *llvm::createPPCCTRLoops() { return new PPCCTRLoops(); } - #ifndef NDEBUG INITIALIZE_PASS_BEGIN(PPCCTRLoopsVerify, "ppc-ctr-loops-verify", "PowerPC CTR Loops Verify", false, false) @@ -183,511 +109,6 @@ FunctionPass *llvm::createPPCCTRLoopsVerify() { } #endif // NDEBUG -bool PPCCTRLoops::runOnFunction(Function &F) { - if (skipFunction(F)) - return false; - - auto *TPC = getAnalysisIfAvailable<TargetPassConfig>(); - if (!TPC) - return false; - - TM = &TPC->getTM<PPCTargetMachine>(); - STI = TM->getSubtargetImpl(F); - TLI = STI->getTargetLowering(); - - LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); - SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE(); - DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree(); - TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F); - DL = &F.getParent()->getDataLayout(); - auto *TLIP = getAnalysisIfAvailable<TargetLibraryInfoWrapperPass>(); - LibInfo = TLIP ? &TLIP->getTLI() : nullptr; - PreserveLCSSA = mustPreserveAnalysisID(LCSSAID); - - bool MadeChange = false; - - for (LoopInfo::iterator I = LI->begin(), E = LI->end(); - I != E; ++I) { - Loop *L = *I; - if (!L->getParentLoop()) - MadeChange |= convertToCTRLoop(L); - } - - return MadeChange; -} - -static bool isLargeIntegerTy(bool Is32Bit, Type *Ty) { - if (IntegerType *ITy = dyn_cast<IntegerType>(Ty)) - return ITy->getBitWidth() > (Is32Bit ? 32U : 64U); - - return false; -} - -// Determining the address of a TLS variable results in a function call in -// certain TLS models. -static bool memAddrUsesCTR(const PPCTargetMachine &TM, const Value *MemAddr) { - const auto *GV = dyn_cast<GlobalValue>(MemAddr); - if (!GV) { - // Recurse to check for constants that refer to TLS global variables. - if (const auto *CV = dyn_cast<Constant>(MemAddr)) - for (const auto &CO : CV->operands()) - if (memAddrUsesCTR(TM, CO)) - return true; - - return false; - } - - if (!GV->isThreadLocal()) - return false; - TLSModel::Model Model = TM.getTLSModel(GV); - return Model == TLSModel::GeneralDynamic || Model == TLSModel::LocalDynamic; -} - -// Loop through the inline asm constraints and look for something that clobbers -// ctr. -static bool asmClobbersCTR(InlineAsm *IA) { - InlineAsm::ConstraintInfoVector CIV = IA->ParseConstraints(); - for (unsigned i = 0, ie = CIV.size(); i < ie; ++i) { - InlineAsm::ConstraintInfo &C = CIV[i]; - if (C.Type != InlineAsm::isInput) - for (unsigned j = 0, je = C.Codes.size(); j < je; ++j) - if (StringRef(C.Codes[j]).equals_lower("{ctr}")) - return true; - } - return false; -} - -bool PPCCTRLoops::mightUseCTR(BasicBlock *BB) { - for (BasicBlock::iterator J = BB->begin(), JE = BB->end(); - J != JE; ++J) { - if (CallInst *CI = dyn_cast<CallInst>(J)) { - // Inline ASM is okay, unless it clobbers the ctr register. - if (InlineAsm *IA = dyn_cast<InlineAsm>(CI->getCalledValue())) { - if (asmClobbersCTR(IA)) - return true; - continue; - } - - if (Function *F = CI->getCalledFunction()) { - // Most intrinsics don't become function calls, but some might. - // sin, cos, exp and log are always calls. - unsigned Opcode = 0; - if (F->getIntrinsicID() != Intrinsic::not_intrinsic) { - switch (F->getIntrinsicID()) { - default: continue; - // If we have a call to ppc_is_decremented_ctr_nonzero, or ppc_mtctr - // we're definitely using CTR. - case Intrinsic::ppc_is_decremented_ctr_nonzero: - case Intrinsic::ppc_mtctr: - return true; - -// VisualStudio defines setjmp as _setjmp -#if defined(_MSC_VER) && defined(setjmp) && \ - !defined(setjmp_undefined_for_msvc) -# pragma push_macro("setjmp") -# undef setjmp -# define setjmp_undefined_for_msvc -#endif - - case Intrinsic::setjmp: - -#if defined(_MSC_VER) && defined(setjmp_undefined_for_msvc) - // let's return it to _setjmp state -# pragma pop_macro("setjmp") -# undef setjmp_undefined_for_msvc -#endif - - case Intrinsic::longjmp: - - // Exclude eh_sjlj_setjmp; we don't need to exclude eh_sjlj_longjmp - // because, although it does clobber the counter register, the - // control can't then return to inside the loop unless there is also - // an eh_sjlj_setjmp. - case Intrinsic::eh_sjlj_setjmp: - - case Intrinsic::memcpy: - case Intrinsic::memmove: - case Intrinsic::memset: - case Intrinsic::powi: - case Intrinsic::log: - case Intrinsic::log2: - case Intrinsic::log10: - case Intrinsic::exp: - case Intrinsic::exp2: - case Intrinsic::pow: - case Intrinsic::sin: - case Intrinsic::cos: - return true; - case Intrinsic::copysign: - if (CI->getArgOperand(0)->getType()->getScalarType()-> - isPPC_FP128Ty()) - return true; - else - continue; // ISD::FCOPYSIGN is never a library call. - case Intrinsic::sqrt: Opcode = ISD::FSQRT; break; - case Intrinsic::floor: Opcode = ISD::FFLOOR; break; - case Intrinsic::ceil: Opcode = ISD::FCEIL; break; - case Intrinsic::trunc: Opcode = ISD::FTRUNC; break; - case Intrinsic::rint: Opcode = ISD::FRINT; break; - case Intrinsic::nearbyint: Opcode = ISD::FNEARBYINT; break; - case Intrinsic::round: Opcode = ISD::FROUND; break; - case Intrinsic::minnum: Opcode = ISD::FMINNUM; break; - case Intrinsic::maxnum: Opcode = ISD::FMAXNUM; break; - case Intrinsic::umul_with_overflow: Opcode = ISD::UMULO; break; - case Intrinsic::smul_with_overflow: Opcode = ISD::SMULO; break; - } - } - - // PowerPC does not use [US]DIVREM or other library calls for - // operations on regular types which are not otherwise library calls - // (i.e. soft float or atomics). If adapting for targets that do, - // additional care is required here. - - LibFunc Func; - if (!F->hasLocalLinkage() && F->hasName() && LibInfo && - LibInfo->getLibFunc(F->getName(), Func) && - LibInfo->hasOptimizedCodeGen(Func)) { - // Non-read-only functions are never treated as intrinsics. - if (!CI->onlyReadsMemory()) - return true; - - // Conversion happens only for FP calls. - if (!CI->getArgOperand(0)->getType()->isFloatingPointTy()) - return true; - - switch (Func) { - default: return true; - case LibFunc_copysign: - case LibFunc_copysignf: - continue; // ISD::FCOPYSIGN is never a library call. - case LibFunc_copysignl: - return true; - case LibFunc_fabs: - case LibFunc_fabsf: - case LibFunc_fabsl: - continue; // ISD::FABS is never a library call. - case LibFunc_sqrt: - case LibFunc_sqrtf: - case LibFunc_sqrtl: - Opcode = ISD::FSQRT; break; - case LibFunc_floor: - case LibFunc_floorf: - case LibFunc_floorl: - Opcode = ISD::FFLOOR; break; - case LibFunc_nearbyint: - case LibFunc_nearbyintf: - case LibFunc_nearbyintl: - Opcode = ISD::FNEARBYINT; break; - case LibFunc_ceil: - case LibFunc_ceilf: - case LibFunc_ceill: - Opcode = ISD::FCEIL; break; - case LibFunc_rint: - case LibFunc_rintf: - case LibFunc_rintl: - Opcode = ISD::FRINT; break; - case LibFunc_round: - case LibFunc_roundf: - case LibFunc_roundl: - Opcode = ISD::FROUND; break; - case LibFunc_trunc: - case LibFunc_truncf: - case LibFunc_truncl: - Opcode = ISD::FTRUNC; break; - case LibFunc_fmin: - case LibFunc_fminf: - case LibFunc_fminl: - Opcode = ISD::FMINNUM; break; - case LibFunc_fmax: - case LibFunc_fmaxf: - case LibFunc_fmaxl: - Opcode = ISD::FMAXNUM; break; - } - } - - if (Opcode) { - EVT EVTy = - TLI->getValueType(*DL, CI->getArgOperand(0)->getType(), true); - - if (EVTy == MVT::Other) - return true; - - if (TLI->isOperationLegalOrCustom(Opcode, EVTy)) - continue; - else if (EVTy.isVector() && - TLI->isOperationLegalOrCustom(Opcode, EVTy.getScalarType())) - continue; - - return true; - } - } - - return true; - } else if (isa<BinaryOperator>(J) && - J->getType()->getScalarType()->isPPC_FP128Ty()) { - // Most operations on ppc_f128 values become calls. - return true; - } else if (isa<UIToFPInst>(J) || isa<SIToFPInst>(J) || - isa<FPToUIInst>(J) || isa<FPToSIInst>(J)) { - CastInst *CI = cast<CastInst>(J); - if (CI->getSrcTy()->getScalarType()->isPPC_FP128Ty() || - CI->getDestTy()->getScalarType()->isPPC_FP128Ty() || - isLargeIntegerTy(!TM->isPPC64(), CI->getSrcTy()->getScalarType()) || - isLargeIntegerTy(!TM->isPPC64(), CI->getDestTy()->getScalarType())) - return true; - } else if (isLargeIntegerTy(!TM->isPPC64(), - J->getType()->getScalarType()) && - (J->getOpcode() == Instruction::UDiv || - J->getOpcode() == Instruction::SDiv || - J->getOpcode() == Instruction::URem || - J->getOpcode() == Instruction::SRem)) { - return true; - } else if (!TM->isPPC64() && - isLargeIntegerTy(false, J->getType()->getScalarType()) && - (J->getOpcode() == Instruction::Shl || - J->getOpcode() == Instruction::AShr || - J->getOpcode() == Instruction::LShr)) { - // Only on PPC32, for 128-bit integers (specifically not 64-bit - // integers), these might be runtime calls. - return true; - } else if (isa<IndirectBrInst>(J) || isa<InvokeInst>(J)) { - // On PowerPC, indirect jumps use the counter register. - return true; - } else if (SwitchInst *SI = dyn_cast<SwitchInst>(J)) { - if (SI->getNumCases() + 1 >= (unsigned)TLI->getMinimumJumpTableEntries()) - return true; - } - - // FREM is always a call. - if (J->getOpcode() == Instruction::FRem) - return true; - - if (STI->useSoftFloat()) { - switch(J->getOpcode()) { - case Instruction::FAdd: - case Instruction::FSub: - case Instruction::FMul: - case Instruction::FDiv: - case Instruction::FPTrunc: - case Instruction::FPExt: - case Instruction::FPToUI: - case Instruction::FPToSI: - case Instruction::UIToFP: - case Instruction::SIToFP: - case Instruction::FCmp: - return true; - } - } - - for (Value *Operand : J->operands()) - if (memAddrUsesCTR(*TM, Operand)) - return true; - } - - return false; -} -bool PPCCTRLoops::convertToCTRLoop(Loop *L) { - bool MadeChange = false; - - // Do not convert small short loops to CTR loop. - unsigned ConstTripCount = SE->getSmallConstantTripCount(L); - if (ConstTripCount && ConstTripCount < SmallCTRLoopThreshold) { - SmallPtrSet<const Value *, 32> EphValues; - auto AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache( - *L->getHeader()->getParent()); - CodeMetrics::collectEphemeralValues(L, &AC, EphValues); - CodeMetrics Metrics; - for (BasicBlock *BB : L->blocks()) - Metrics.analyzeBasicBlock(BB, *TTI, EphValues); - // 6 is an approximate latency for the mtctr instruction. - if (Metrics.NumInsts <= (6 * SchedModel.getIssueWidth())) - return false; - } - - // Process nested loops first. - for (Loop::iterator I = L->begin(), E = L->end(); I != E; ++I) { - MadeChange |= convertToCTRLoop(*I); - LLVM_DEBUG(dbgs() << "Nested loop converted\n"); - } - - // If a nested loop has been converted, then we can't convert this loop. - if (MadeChange) - return MadeChange; - - // Bail out if the loop has irreducible control flow. - LoopBlocksRPO RPOT(L); - RPOT.perform(LI); - if (containsIrreducibleCFG<const BasicBlock *>(RPOT, *LI)) - return false; - -#ifndef NDEBUG - // Stop trying after reaching the limit (if any). - int Limit = CTRLoopLimit; - if (Limit >= 0) { - if (Counter >= CTRLoopLimit) - return false; - Counter++; - } -#endif - - // We don't want to spill/restore the counter register, and so we don't - // want to use the counter register if the loop contains calls. - for (Loop::block_iterator I = L->block_begin(), IE = L->block_end(); - I != IE; ++I) - if (mightUseCTR(*I)) - return MadeChange; - - SmallVector<BasicBlock*, 4> ExitingBlocks; - L->getExitingBlocks(ExitingBlocks); - - // If there is an exit edge known to be frequently taken, - // we should not transform this loop. - for (auto &BB : ExitingBlocks) { - Instruction *TI = BB->getTerminator(); - if (!TI) continue; - - if (BranchInst *BI = dyn_cast<BranchInst>(TI)) { - uint64_t TrueWeight = 0, FalseWeight = 0; - if (!BI->isConditional() || - !BI->extractProfMetadata(TrueWeight, FalseWeight)) - continue; - - // If the exit path is more frequent than the loop path, - // we return here without further analysis for this loop. - bool TrueIsExit = !L->contains(BI->getSuccessor(0)); - if (( TrueIsExit && FalseWeight < TrueWeight) || - (!TrueIsExit && FalseWeight > TrueWeight)) - return MadeChange; - } - } - - BasicBlock *CountedExitBlock = nullptr; - const SCEV *ExitCount = nullptr; - BranchInst *CountedExitBranch = nullptr; - for (SmallVectorImpl<BasicBlock *>::iterator I = ExitingBlocks.begin(), - IE = ExitingBlocks.end(); I != IE; ++I) { - const SCEV *EC = SE->getExitCount(L, *I); - LLVM_DEBUG(dbgs() << "Exit Count for " << *L << " from block " - << (*I)->getName() << ": " << *EC << "\n"); - if (isa<SCEVCouldNotCompute>(EC)) - continue; - if (const SCEVConstant *ConstEC = dyn_cast<SCEVConstant>(EC)) { - if (ConstEC->getValue()->isZero()) - continue; - } else if (!SE->isLoopInvariant(EC, L)) - continue; - - if (SE->getTypeSizeInBits(EC->getType()) > (TM->isPPC64() ? 64 : 32)) - continue; - - // If this exiting block is contained in a nested loop, it is not eligible - // for insertion of the branch-and-decrement since the inner loop would - // end up messing up the value in the CTR. - if (LI->getLoopFor(*I) != L) - continue; - - // We now have a loop-invariant count of loop iterations (which is not the - // constant zero) for which we know that this loop will not exit via this - // existing block. - - // We need to make sure that this block will run on every loop iteration. - // For this to be true, we must dominate all blocks with backedges. Such - // blocks are in-loop predecessors to the header block. - bool NotAlways = false; - for (pred_iterator PI = pred_begin(L->getHeader()), - PIE = pred_end(L->getHeader()); PI != PIE; ++PI) { - if (!L->contains(*PI)) - continue; - - if (!DT->dominates(*I, *PI)) { - NotAlways = true; - break; - } - } - - if (NotAlways) - continue; - - // Make sure this blocks ends with a conditional branch. - Instruction *TI = (*I)->getTerminator(); - if (!TI) - continue; - - if (BranchInst *BI = dyn_cast<BranchInst>(TI)) { - if (!BI->isConditional()) - continue; - - CountedExitBranch = BI; - } else - continue; - - // Note that this block may not be the loop latch block, even if the loop - // has a latch block. - CountedExitBlock = *I; - ExitCount = EC; - break; - } - - if (!CountedExitBlock) - return MadeChange; - - BasicBlock *Preheader = L->getLoopPreheader(); - - // If we don't have a preheader, then insert one. If we already have a - // preheader, then we can use it (except if the preheader contains a use of - // the CTR register because some such uses might be reordered by the - // selection DAG after the mtctr instruction). - if (!Preheader || mightUseCTR(Preheader)) - Preheader = InsertPreheaderForLoop(L, DT, LI, PreserveLCSSA); - if (!Preheader) - return MadeChange; - - LLVM_DEBUG(dbgs() << "Preheader for exit count: " << Preheader->getName() - << "\n"); - - // Insert the count into the preheader and replace the condition used by the - // selected branch. - MadeChange = true; - - SCEVExpander SCEVE(*SE, *DL, "loopcnt"); - LLVMContext &C = SE->getContext(); - Type *CountType = TM->isPPC64() ? Type::getInt64Ty(C) : Type::getInt32Ty(C); - if (!ExitCount->getType()->isPointerTy() && - ExitCount->getType() != CountType) - ExitCount = SE->getZeroExtendExpr(ExitCount, CountType); - ExitCount = SE->getAddExpr(ExitCount, SE->getOne(CountType)); - Value *ECValue = - SCEVE.expandCodeFor(ExitCount, CountType, Preheader->getTerminator()); - - IRBuilder<> CountBuilder(Preheader->getTerminator()); - Module *M = Preheader->getParent()->getParent(); - Value *MTCTRFunc = Intrinsic::getDeclaration(M, Intrinsic::ppc_mtctr, - CountType); - CountBuilder.CreateCall(MTCTRFunc, ECValue); - - IRBuilder<> CondBuilder(CountedExitBranch); - Value *DecFunc = - Intrinsic::getDeclaration(M, Intrinsic::ppc_is_decremented_ctr_nonzero); - Value *NewCond = CondBuilder.CreateCall(DecFunc, {}); - Value *OldCond = CountedExitBranch->getCondition(); - CountedExitBranch->setCondition(NewCond); - - // The false branch must exit the loop. - if (!L->contains(CountedExitBranch->getSuccessor(0))) - CountedExitBranch->swapSuccessors(); - - // The old condition may be dead now, and may have even created a dead PHI - // (the original induction variable). - RecursivelyDeleteTriviallyDeadInstructions(OldCond); - // Run through the basic blocks of the loop and see if any of them have dead - // PHIs that can be removed. - for (auto I : L->blocks()) - DeleteDeadPHIs(I); - - ++NumCTRLoops; - return MadeChange; -} - #ifndef NDEBUG static bool clobbersCTR(const MachineInstr &MI) { for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { diff --git a/contrib/llvm/lib/Target/PowerPC/PPCCallingConv.cpp b/contrib/llvm/lib/Target/PowerPC/PPCCallingConv.cpp new file mode 100644 index 000000000000..77cdf5c939dc --- /dev/null +++ b/contrib/llvm/lib/Target/PowerPC/PPCCallingConv.cpp @@ -0,0 +1,162 @@ +//===-- PPCCallingConv.h - --------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "PPCRegisterInfo.h" +#include "PPCCallingConv.h" +#include "PPCSubtarget.h" +#include "PPCCCState.h" +using namespace llvm; + +inline bool CC_PPC_AnyReg_Error(unsigned &, MVT &, MVT &, + CCValAssign::LocInfo &, ISD::ArgFlagsTy &, + CCState &) { + llvm_unreachable("The AnyReg calling convention is only supported by the " \ + "stackmap and patchpoint intrinsics."); + // gracefully fallback to PPC C calling convention on Release builds. + return false; +} + +static bool CC_PPC32_SVR4_Custom_Dummy(unsigned &ValNo, MVT &ValVT, MVT &LocVT, + CCValAssign::LocInfo &LocInfo, + ISD::ArgFlagsTy &ArgFlags, + CCState &State) { + return true; +} + +static bool CC_PPC32_SVR4_Custom_AlignArgRegs(unsigned &ValNo, MVT &ValVT, + MVT &LocVT, + CCValAssign::LocInfo &LocInfo, + ISD::ArgFlagsTy &ArgFlags, + CCState &State) { + static const MCPhysReg ArgRegs[] = { + PPC::R3, PPC::R4, PPC::R5, PPC::R6, + PPC::R7, PPC::R8, PPC::R9, PPC::R10, + }; + const unsigned NumArgRegs = array_lengthof(ArgRegs); + + unsigned RegNum = State.getFirstUnallocated(ArgRegs); + + // Skip one register if the first unallocated register has an even register + // number and there are still argument registers available which have not been + // allocated yet. RegNum is actually an index into ArgRegs, which means we + // need to skip a register if RegNum is odd. + if (RegNum != NumArgRegs && RegNum % 2 == 1) { + State.AllocateReg(ArgRegs[RegNum]); + } + + // Always return false here, as this function only makes sure that the first + // unallocated register has an odd register number and does not actually + // allocate a register for the current argument. + return false; +} + +static bool CC_PPC32_SVR4_Custom_SkipLastArgRegsPPCF128( + unsigned &ValNo, MVT &ValVT, MVT &LocVT, CCValAssign::LocInfo &LocInfo, + ISD::ArgFlagsTy &ArgFlags, CCState &State) { + static const MCPhysReg ArgRegs[] = { + PPC::R3, PPC::R4, PPC::R5, PPC::R6, + PPC::R7, PPC::R8, PPC::R9, PPC::R10, + }; + const unsigned NumArgRegs = array_lengthof(ArgRegs); + + unsigned RegNum = State.getFirstUnallocated(ArgRegs); + int RegsLeft = NumArgRegs - RegNum; + + // Skip if there is not enough registers left for long double type (4 gpr regs + // in soft float mode) and put long double argument on the stack. + if (RegNum != NumArgRegs && RegsLeft < 4) { + for (int i = 0; i < RegsLeft; i++) { + State.AllocateReg(ArgRegs[RegNum + i]); + } + } + + return false; +} + +static bool CC_PPC32_SVR4_Custom_AlignFPArgRegs(unsigned &ValNo, MVT &ValVT, + MVT &LocVT, + CCValAssign::LocInfo &LocInfo, + ISD::ArgFlagsTy &ArgFlags, + CCState &State) { + static const MCPhysReg ArgRegs[] = { + PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7, + PPC::F8 + }; + + const unsigned NumArgRegs = array_lengthof(ArgRegs); + + unsigned RegNum = State.getFirstUnallocated(ArgRegs); + + // If there is only one Floating-point register left we need to put both f64 + // values of a split ppc_fp128 value on the stack. + if (RegNum != NumArgRegs && ArgRegs[RegNum] == PPC::F8) { + State.AllocateReg(ArgRegs[RegNum]); + } + + // Always return false here, as this function only makes sure that the two f64 + // values a ppc_fp128 value is split into are both passed in registers or both + // passed on the stack and does not actually allocate a register for the + // current argument. + return false; +} + +// Split F64 arguments into two 32-bit consecutive registers. +static bool CC_PPC32_SPE_CustomSplitFP64(unsigned &ValNo, MVT &ValVT, + MVT &LocVT, + CCValAssign::LocInfo &LocInfo, + ISD::ArgFlagsTy &ArgFlags, + CCState &State) { + static const MCPhysReg HiRegList[] = { PPC::R3, PPC::R5, PPC::R7, PPC::R9 }; + static const MCPhysReg LoRegList[] = { PPC::R4, PPC::R6, PPC::R8, PPC::R10 }; + + // Try to get the first register. + unsigned Reg = State.AllocateReg(HiRegList); + if (!Reg) + return false; + + unsigned i; + for (i = 0; i < sizeof(HiRegList) / sizeof(HiRegList[0]); ++i) + if (HiRegList[i] == Reg) + break; + + unsigned T = State.AllocateReg(LoRegList[i]); + (void)T; + assert(T == LoRegList[i] && "Could not allocate register"); + + State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, Reg, LocVT, LocInfo)); + State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, LoRegList[i], + LocVT, LocInfo)); + return true; +} + +// Same as above, but for return values, so only allocate for R3 and R4 +static bool CC_PPC32_SPE_RetF64(unsigned &ValNo, MVT &ValVT, + MVT &LocVT, + CCValAssign::LocInfo &LocInfo, + ISD::ArgFlagsTy &ArgFlags, + CCState &State) { + static const MCPhysReg HiRegList[] = { PPC::R3 }; + static const MCPhysReg LoRegList[] = { PPC::R4 }; + + // Try to get the first register. + unsigned Reg = State.AllocateReg(HiRegList, LoRegList); + if (!Reg) + return false; + + unsigned i; + for (i = 0; i < sizeof(HiRegList) / sizeof(HiRegList[0]); ++i) + if (HiRegList[i] == Reg) + break; + + State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, Reg, LocVT, LocInfo)); + State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, LoRegList[i], + LocVT, LocInfo)); + return true; +} + +#include "PPCGenCallingConv.inc" diff --git a/contrib/llvm/lib/Target/PowerPC/PPCCallingConv.h b/contrib/llvm/lib/Target/PowerPC/PPCCallingConv.h index eb904a858592..03d9be0a73d9 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCCallingConv.h +++ b/contrib/llvm/lib/Target/PowerPC/PPCCallingConv.h @@ -1,9 +1,8 @@ //=== PPCCallingConv.h - PPC Custom Calling Convention Routines -*- C++ -*-===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -20,14 +19,27 @@ namespace llvm { -inline bool CC_PPC_AnyReg_Error(unsigned &, MVT &, MVT &, - CCValAssign::LocInfo &, ISD::ArgFlagsTy &, - CCState &) { - llvm_unreachable("The AnyReg calling convention is only supported by the " \ - "stackmap and patchpoint intrinsics."); - // gracefully fallback to PPC C calling convention on Release builds. - return false; -} +bool RetCC_PPC(unsigned ValNo, MVT ValVT, MVT LocVT, + CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, + CCState &State); +bool RetCC_PPC64_ELF_FIS(unsigned ValNo, MVT ValVT, MVT LocVT, + CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, + CCState &State); +bool RetCC_PPC_Cold(unsigned ValNo, MVT ValVT, MVT LocVT, + CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, + CCState &State); +bool CC_PPC32_SVR4(unsigned ValNo, MVT ValVT, MVT LocVT, + CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, + CCState &State); +bool CC_PPC64_ELF_FIS(unsigned ValNo, MVT ValVT, MVT LocVT, + CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, + CCState &State); +bool CC_PPC32_SVR4_ByVal(unsigned ValNo, MVT ValVT, MVT LocVT, + CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, + CCState &State); +bool CC_PPC32_SVR4_VarArg(unsigned ValNo, MVT ValVT, MVT LocVT, + CCValAssign::LocInfo LocInfo, + ISD::ArgFlagsTy ArgFlags, CCState &State); } // End llvm namespace diff --git a/contrib/llvm/lib/Target/PowerPC/PPCCallingConv.td b/contrib/llvm/lib/Target/PowerPC/PPCCallingConv.td index 22842d516e7d..369b9ce1a711 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCCallingConv.td +++ b/contrib/llvm/lib/Target/PowerPC/PPCCallingConv.td @@ -1,9 +1,8 @@ //===- PPCCallingConv.td - Calling Conventions for PowerPC -*- tablegen -*-===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -46,6 +45,7 @@ def RetCC_PPC64_AnyReg : CallingConv<[ ]>; // Return-value convention for PowerPC coldcc. +let Entry = 1 in def RetCC_PPC_Cold : CallingConv<[ // Use the same return registers as RetCC_PPC, but limited to only // one return value. The remaining return values will be saved to @@ -70,6 +70,7 @@ def RetCC_PPC_Cold : CallingConv<[ ]>; // Return-value convention for PowerPC +let Entry = 1 in def RetCC_PPC : CallingConv<[ CCIfCC<"CallingConv::AnyReg", CCDelegateTo<RetCC_PPC64_AnyReg>>, @@ -90,7 +91,7 @@ def RetCC_PPC : CallingConv<[ CCIfSubtarget<"hasSPE()", CCIfType<[f32], CCAssignToReg<[R3, R4, R5, R6, R7, R8, R9, R10]>>>, CCIfSubtarget<"hasSPE()", - CCIfType<[f64], CCAssignToReg<[S3, S4, S5, S6, S7, S8, S9, S10]>>>, + CCIfType<[f64], CCCustom<"CC_PPC32_SPE_RetF64">>>, // For P9, f128 are passed in vector registers. CCIfType<[f128], @@ -126,6 +127,7 @@ def CC_PPC64_AnyReg : CallingConv<[ // Simple calling convention for 64-bit ELF PowerPC fast isel. // Only handle ints and floats. All ints are promoted to i64. // Vector types and quadword ints are not handled. +let Entry = 1 in def CC_PPC64_ELF_FIS : CallingConv<[ CCIfCC<"CallingConv::AnyReg", CCDelegateTo<CC_PPC64_AnyReg>>, @@ -141,6 +143,7 @@ def CC_PPC64_ELF_FIS : CallingConv<[ // All small ints are promoted to i64. Vector types, quadword ints, // and multiple register returns are "supported" to avoid compile // errors, but none are handled by the fast selector. +let Entry = 1 in def RetCC_PPC64_ELF_FIS : CallingConv<[ CCIfCC<"CallingConv::AnyReg", CCDelegateTo<RetCC_PPC64_AnyReg>>, @@ -179,6 +182,9 @@ def CC_PPC32_SVR4_Common : CallingConv<[ CCIfType<[i32], CCIfSplit<CCIfNotSubtarget<"useSoftFloat()", CCCustom<"CC_PPC32_SVR4_Custom_AlignArgRegs">>>>, + CCIfType<[f64], + CCIfSubtarget<"hasSPE()", + CCCustom<"CC_PPC32_SVR4_Custom_AlignArgRegs">>>, CCIfSplit<CCIfSubtarget<"useSoftFloat()", CCIfOrigArgWasPPCF128<CCCustom< "CC_PPC32_SVR4_Custom_SkipLastArgRegsPPCF128">>>>, @@ -199,7 +205,7 @@ def CC_PPC32_SVR4_Common : CallingConv<[ CCAssignToReg<[F1, F2, F3, F4, F5, F6, F7, F8]>>>, CCIfType<[f64], CCIfSubtarget<"hasSPE()", - CCAssignToReg<[S3, S4, S5, S6, S7, S8, S9, S10]>>>, + CCCustom<"CC_PPC32_SPE_CustomSplitFP64">>>, CCIfType<[f32], CCIfSubtarget<"hasSPE()", CCAssignToReg<[R3, R4, R5, R6, R7, R8, R9, R10]>>>, @@ -228,12 +234,14 @@ def CC_PPC32_SVR4_Common : CallingConv<[ // This calling convention puts vector arguments always on the stack. It is used // to assign vector arguments which belong to the variable portion of the // parameter list of a variable argument function. +let Entry = 1 in def CC_PPC32_SVR4_VarArg : CallingConv<[ CCDelegateTo<CC_PPC32_SVR4_Common> ]>; // In contrast to CC_PPC32_SVR4_VarArg, this calling convention first tries to // put vector arguments in vector registers before putting them on the stack. +let Entry = 1 in def CC_PPC32_SVR4 : CallingConv<[ // QPX vectors mirror the scalar FP convention. CCIfType<[v4f64, v4f32, v4i1], CCIfSubtarget<"hasQPX()", @@ -265,6 +273,7 @@ def CC_PPC32_SVR4 : CallingConv<[ // The only purpose of CC_PPC32_SVR4_Custom_Dummy is to skip arguments which are // not passed by value. +let Entry = 1 in def CC_PPC32_SVR4_ByVal : CallingConv<[ CCIfByVal<CCPassByVal<4, 4>>, @@ -300,6 +309,13 @@ def CSR_SVR432_Altivec : CalleeSavedRegs<(add CSR_SVR432, CSR_Altivec)>; def CSR_SVR432_SPE : CalleeSavedRegs<(add CSR_SVR432_COMM, CSR_SPE)>; +def CSR_AIX32 : CalleeSavedRegs<(add R13, R14, R15, R16, R17, R18, R19, R20, + R21, R22, R23, R24, R25, R26, R27, R28, + R29, R30, R31, F14, F15, F16, F17, F18, + F19, F20, F21, F22, F23, F24, F25, F26, + F27, F28, F29, F30, F31, CR2, CR3, CR4 + )>; + def CSR_Darwin64 : CalleeSavedRegs<(add X13, X14, X15, X16, X17, X18, X19, X20, X21, X22, X23, X24, X25, X26, X27, X28, X29, X30, X31, F14, F15, F16, F17, F18, @@ -316,6 +332,13 @@ def CSR_SVR464 : CalleeSavedRegs<(add X14, X15, X16, X17, X18, X19, X20, F27, F28, F29, F30, F31, CR2, CR3, CR4 )>; +def CSR_AIX64 : CalleeSavedRegs<(add X14, X15, X16, X17, X18, X19, X20, + X21, X22, X23, X24, X25, X26, X27, X28, + X29, X30, X31, F14, F15, F16, F17, F18, + F19, F20, F21, F22, F23, F24, F25, F26, + F27, F28, F29, F30, F31, CR2, CR3, CR4 + )>; + // CSRs that are handled by prologue, epilogue. def CSR_SRV464_TLS_PE : CalleeSavedRegs<(add)>; @@ -343,15 +366,22 @@ def CSR_NoRegs : CalleeSavedRegs<(add)>; // and value may be altered by inter-library calls. // Do not include r12 as it is used as a scratch register. // Do not include return registers r3, f1, v2. -def CSR_SVR32_ColdCC : CalleeSavedRegs<(add (sequence "R%u", 4, 10), - (sequence "R%u", 14, 31), - F0, (sequence "F%u", 2, 31), - (sequence "CR%u", 0, 7))>; +def CSR_SVR32_ColdCC_Common : CalleeSavedRegs<(add (sequence "R%u", 4, 10), + (sequence "R%u", 14, 31), + (sequence "CR%u", 0, 7))>; + +def CSR_SVR32_ColdCC : CalleeSavedRegs<(add CSR_SVR32_ColdCC_Common, + F0, (sequence "F%u", 2, 31))>; + def CSR_SVR32_ColdCC_Altivec : CalleeSavedRegs<(add CSR_SVR32_ColdCC, (sequence "V%u", 0, 1), (sequence "V%u", 3, 31))>; +def CSR_SVR32_ColdCC_SPE : CalleeSavedRegs<(add CSR_SVR32_ColdCC_Common, + (sequence "S%u", 4, 10), + (sequence "S%u", 14, 31))>; + def CSR_SVR64_ColdCC : CalleeSavedRegs<(add (sequence "X%u", 4, 10), (sequence "X%u", 14, 31), F0, (sequence "F%u", 2, 31), diff --git a/contrib/llvm/lib/Target/PowerPC/PPCEarlyReturn.cpp b/contrib/llvm/lib/Target/PowerPC/PPCEarlyReturn.cpp index ac931f7d0ec0..aa5d830b549e 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCEarlyReturn.cpp +++ b/contrib/llvm/lib/Target/PowerPC/PPCEarlyReturn.cpp @@ -1,9 +1,8 @@ //===------------- PPCEarlyReturn.cpp - Form Early Returns ----------------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -37,10 +36,6 @@ using namespace llvm; STATISTIC(NumBCLR, "Number of early conditional returns"); STATISTIC(NumBLR, "Number of early returns"); -namespace llvm { - void initializePPCEarlyReturnPass(PassRegistry&); -} - namespace { // PPCEarlyReturn pass - For simple functions without epilogue code, move // returns up, and create conditional returns, to avoid unnecessary @@ -184,11 +179,11 @@ public: // nothing to do. if (MF.size() < 2) return Changed; - - for (MachineFunction::iterator I = MF.begin(); I != MF.end();) { + + // We can't use a range-based for loop due to clobbering the iterator. + for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E;) { MachineBasicBlock &B = *I++; - if (processBlock(B)) - Changed = true; + Changed |= processBlock(B); } return Changed; diff --git a/contrib/llvm/lib/Target/PowerPC/PPCExpandISEL.cpp b/contrib/llvm/lib/Target/PowerPC/PPCExpandISEL.cpp index a03e691ef5bb..e8ef451c7ec9 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCExpandISEL.cpp +++ b/contrib/llvm/lib/Target/PowerPC/PPCExpandISEL.cpp @@ -1,9 +1,8 @@ //===------------- PPCExpandISEL.cpp - Expand ISEL instruction ------------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // diff --git a/contrib/llvm/lib/Target/PowerPC/PPCFastISel.cpp b/contrib/llvm/lib/Target/PowerPC/PPCFastISel.cpp index 3b2d92db78b9..264d6b590f95 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCFastISel.cpp +++ b/contrib/llvm/lib/Target/PowerPC/PPCFastISel.cpp @@ -1,9 +1,8 @@ //===-- PPCFastISel.cpp - PowerPC FastISel implementation -----------------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -152,6 +151,14 @@ class PPCFastISel final : public FastISel { bool isVSSRCRegClass(const TargetRegisterClass *RC) const { return RC->getID() == PPC::VSSRCRegClassID; } + unsigned copyRegToRegClass(const TargetRegisterClass *ToRC, + unsigned SrcReg, unsigned Flag = 0, + unsigned SubReg = 0) { + unsigned TmpReg = createResultReg(ToRC); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, + TII.get(TargetOpcode::COPY), TmpReg).addReg(SrcReg, Flag, SubReg); + return TmpReg; + } bool PPCEmitCmp(const Value *Src1Value, const Value *Src2Value, bool isZExt, unsigned DestReg, const PPC::Predicate Pred); @@ -187,7 +194,6 @@ class PPCFastISel final : public FastISel { unsigned &NumBytes, bool IsVarArg); bool finishCall(MVT RetVT, CallLoweringInfo &CLI, unsigned &NumBytes); - LLVM_ATTRIBUTE_UNUSED CCAssignFn *usePPC32CCs(unsigned Flag); private: #include "PPCGenFastISel.inc" @@ -196,23 +202,6 @@ class PPCFastISel final : public FastISel { } // end anonymous namespace -#include "PPCGenCallingConv.inc" - -// Function whose sole purpose is to kill compiler warnings -// stemming from unused functions included from PPCGenCallingConv.inc. -CCAssignFn *PPCFastISel::usePPC32CCs(unsigned Flag) { - if (Flag == 1) - return CC_PPC32_SVR4; - else if (Flag == 2) - return CC_PPC32_SVR4_ByVal; - else if (Flag == 3) - return CC_PPC32_SVR4_VarArg; - else if (Flag == 4) - return RetCC_PPC_Cold; - else - return RetCC_PPC; -} - static Optional<PPC::Predicate> getComparePred(CmpInst::Predicate Pred) { switch (Pred) { // These are not representable with any single compare. @@ -874,7 +863,10 @@ bool PPCFastISel::PPCEmitCmp(const Value *SrcValue1, const Value *SrcValue2, unsigned CmpOpc; bool NeedsExt = false; - auto RC = MRI.getRegClass(SrcReg1); + + auto RC1 = MRI.getRegClass(SrcReg1); + auto RC2 = SrcReg2 != 0 ? MRI.getRegClass(SrcReg2) : nullptr; + switch (SrcVT.SimpleTy) { default: return false; case MVT::f32: @@ -893,12 +885,10 @@ bool PPCFastISel::PPCEmitCmp(const Value *SrcValue1, const Value *SrcValue2, } } else { CmpOpc = PPC::FCMPUS; - if (isVSSRCRegClass(RC)) { - unsigned TmpReg = createResultReg(&PPC::F4RCRegClass); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, - TII.get(TargetOpcode::COPY), TmpReg).addReg(SrcReg1); - SrcReg1 = TmpReg; - } + if (isVSSRCRegClass(RC1)) + SrcReg1 = copyRegToRegClass(&PPC::F4RCRegClass, SrcReg1); + if (RC2 && isVSSRCRegClass(RC2)) + SrcReg2 = copyRegToRegClass(&PPC::F4RCRegClass, SrcReg2); } break; case MVT::f64: @@ -915,7 +905,7 @@ bool PPCFastISel::PPCEmitCmp(const Value *SrcValue1, const Value *SrcValue2, CmpOpc = PPC::EFDCMPGT; break; } - } else if (isVSFRCRegClass(RC)) { + } else if (isVSFRCRegClass(RC1) || (RC2 && isVSFRCRegClass(RC2))) { CmpOpc = PPC::XSCMPUDP; } else { CmpOpc = PPC::FCMPUD; @@ -997,12 +987,17 @@ bool PPCFastISel::SelectFPTrunc(const Instruction *I) { // Round the result to single precision. unsigned DestReg; - + auto RC = MRI.getRegClass(SrcReg); if (PPCSubTarget->hasSPE()) { DestReg = createResultReg(&PPC::SPE4RCRegClass); BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::EFSCFD), DestReg) .addReg(SrcReg); + } else if (isVSFRCRegClass(RC)) { + DestReg = createResultReg(&PPC::VSSRCRegClass); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, + TII.get(PPC::XSRSP), DestReg) + .addReg(SrcReg); } else { DestReg = createResultReg(&PPC::F4RCRegClass); BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, @@ -1217,21 +1212,19 @@ bool PPCFastISel::SelectFPToI(const Instruction *I, bool IsSigned) { if (SrcReg == 0) return false; - // Convert f32 to f64 if necessary. This is just a meaningless copy - // to get the register class right. + // Convert f32 to f64 or convert VSSRC to VSFRC if necessary. This is just a + // meaningless copy to get the register class right. const TargetRegisterClass *InRC = MRI.getRegClass(SrcReg); - if (InRC == &PPC::F4RCRegClass) { - unsigned TmpReg = createResultReg(&PPC::F8RCRegClass); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, - TII.get(TargetOpcode::COPY), TmpReg) - .addReg(SrcReg); - SrcReg = TmpReg; - } + if (InRC == &PPC::F4RCRegClass) + SrcReg = copyRegToRegClass(&PPC::F8RCRegClass, SrcReg); + else if (InRC == &PPC::VSSRCRegClass) + SrcReg = copyRegToRegClass(&PPC::VSFRCRegClass, SrcReg); // Determine the opcode for the conversion, which takes place - // entirely within FPRs. + // entirely within FPRs or VSRs. unsigned DestReg; unsigned Opc; + auto RC = MRI.getRegClass(SrcReg); if (PPCSubTarget->hasSPE()) { DestReg = createResultReg(&PPC::GPRCRegClass); @@ -1239,6 +1232,12 @@ bool PPCFastISel::SelectFPToI(const Instruction *I, bool IsSigned) { Opc = InRC == &PPC::SPE4RCRegClass ? PPC::EFSCTSIZ : PPC::EFDCTSIZ; else Opc = InRC == &PPC::SPE4RCRegClass ? PPC::EFSCTUIZ : PPC::EFDCTUIZ; + } else if (isVSFRCRegClass(RC)) { + DestReg = createResultReg(&PPC::VSFRCRegClass); + if (DstVT == MVT::i32) + Opc = IsSigned ? PPC::XSCVDPSXWS : PPC::XSCVDPUXWS; + else + Opc = IsSigned ? PPC::XSCVDPSXDS : PPC::XSCVDPUXDS; } else { DestReg = createResultReg(&PPC::F8RCRegClass); if (DstVT == MVT::i32) @@ -1520,11 +1519,7 @@ bool PPCFastISel::finishCall(MVT RetVT, CallLoweringInfo &CLI, unsigned &NumByte if (RetVT == CopyVT) { const TargetRegisterClass *CpyRC = TLI.getRegClassFor(CopyVT); - ResultReg = createResultReg(CpyRC); - - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, - TII.get(TargetOpcode::COPY), ResultReg) - .addReg(SourcePhysReg); + ResultReg = copyRegToRegClass(CpyRC, SourcePhysReg); // If necessary, round the floating result to single precision. } else if (CopyVT == MVT::f64) { @@ -1537,12 +1532,9 @@ bool PPCFastISel::finishCall(MVT RetVT, CallLoweringInfo &CLI, unsigned &NumByte // used along the fast-isel path (not lowered), and downstream logic // also doesn't like a direct subreg copy on a physical reg.) } else if (RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32) { - ResultReg = createResultReg(&PPC::GPRCRegClass); // Convert physical register from G8RC to GPRC. SourcePhysReg -= PPC::X0 - PPC::R0; - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, - TII.get(TargetOpcode::COPY), ResultReg) - .addReg(SourcePhysReg); + ResultReg = copyRegToRegClass(&PPC::GPRCRegClass, SourcePhysReg); } assert(ResultReg && "ResultReg unset!"); @@ -1894,13 +1886,8 @@ bool PPCFastISel::SelectTrunc(const Instruction *I) { return false; // The only interesting case is when we need to switch register classes. - if (SrcVT == MVT::i64) { - unsigned ResultReg = createResultReg(&PPC::GPRCRegClass); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, - TII.get(TargetOpcode::COPY), - ResultReg).addReg(SrcReg, 0, PPC::sub_32); - SrcReg = ResultReg; - } + if (SrcVT == MVT::i64) + SrcReg = copyRegToRegClass(&PPC::GPRCRegClass, SrcReg, 0, PPC::sub_32); updateValueMap(I, SrcReg); return true; @@ -1977,6 +1964,13 @@ bool PPCFastISel::fastSelectInstruction(const Instruction *I) { case Instruction::Sub: return SelectBinaryIntOp(I, ISD::SUB); case Instruction::Call: + // On AIX, call lowering uses the DAG-ISEL path currently so that the + // callee of the direct function call instruction will be mapped to the + // symbol for the function's entry point, which is distinct from the + // function descriptor symbol. The latter is the symbol whose XCOFF symbol + // name is the C-linkage name of the source level function. + if (TM.getTargetTriple().isOSAIX()) + break; return selectCall(I); case Instruction::Ret: return SelectRet(I); diff --git a/contrib/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp b/contrib/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp index 8263954994d2..ebfb1ef7f49b 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp +++ b/contrib/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp @@ -1,9 +1,8 @@ //===-- PPCFrameLowering.cpp - PPC Frame Information ----------------------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -30,7 +29,6 @@ using namespace llvm; #define DEBUG_TYPE "framelowering" -STATISTIC(NumNoNeedForFrame, "Number of functions without frames"); STATISTIC(NumPESpillVSR, "Number of spills to vector in prologue"); STATISTIC(NumPEReloadVSR, "Number of reloads from vector in epilogue"); @@ -73,10 +71,10 @@ static unsigned computeFramePointerSaveOffset(const PPCSubtarget &STI) { } static unsigned computeLinkageSize(const PPCSubtarget &STI) { - if (STI.isDarwinABI() || STI.isPPC64()) + if ((STI.isDarwinABI() || STI.isAIXABI()) || STI.isPPC64()) return (STI.isELFv2ABI() ? 4 : 6) * (STI.isPPC64() ? 8 : 4); - // SVR4 ABI: + // 32-bit SVR4 ABI: return 8; } @@ -446,12 +444,27 @@ static bool MustSaveLR(const MachineFunction &MF, unsigned LR) { return RI !=MF.getRegInfo().def_end() || MFI->isLRStoreRequired(); } +/// determineFrameLayoutAndUpdate - Determine the size of the frame and maximum +/// call frame size. Update the MachineFunction object with the stack size. +unsigned +PPCFrameLowering::determineFrameLayoutAndUpdate(MachineFunction &MF, + bool UseEstimate) const { + unsigned NewMaxCallFrameSize = 0; + unsigned FrameSize = determineFrameLayout(MF, UseEstimate, + &NewMaxCallFrameSize); + MF.getFrameInfo().setStackSize(FrameSize); + MF.getFrameInfo().setMaxCallFrameSize(NewMaxCallFrameSize); + return FrameSize; +} + /// determineFrameLayout - Determine the size of the frame and maximum call /// frame size. -unsigned PPCFrameLowering::determineFrameLayout(MachineFunction &MF, - bool UpdateMF, - bool UseEstimate) const { - MachineFrameInfo &MFI = MF.getFrameInfo(); +unsigned +PPCFrameLowering::determineFrameLayout(const MachineFunction &MF, + bool UseEstimate, + unsigned *NewMaxCallFrameSize) const { + const MachineFrameInfo &MFI = MF.getFrameInfo(); + const PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>(); // Get the number of bytes to allocate from the FrameInfo unsigned FrameSize = @@ -469,6 +482,7 @@ unsigned PPCFrameLowering::determineFrameLayout(MachineFunction &MF, bool CanUseRedZone = !MFI.hasVarSizedObjects() && // No dynamic alloca. !MFI.adjustsStack() && // No calls. !MustSaveLR(MF, LR) && // No need to save LR. + !FI->mustSaveTOC() && // No need to save TOC. !RegInfo->hasBasePointer(MF); // No special alignment. // Note: for PPC32 SVR4ABI (Non-DarwinABI), we can still generate stackless @@ -477,10 +491,7 @@ unsigned PPCFrameLowering::determineFrameLayout(MachineFunction &MF, // Check whether we can skip adjusting the stack pointer (by using red zone) if (!DisableRedZone && CanUseRedZone && FitsInRedZone) { - NumNoNeedForFrame++; // No need for frame - if (UpdateMF) - MFI.setStackSize(0); return 0; } @@ -496,9 +507,9 @@ unsigned PPCFrameLowering::determineFrameLayout(MachineFunction &MF, if (MFI.hasVarSizedObjects()) maxCallFrameSize = (maxCallFrameSize + AlignMask) & ~AlignMask; - // Update maximum call frame size. - if (UpdateMF) - MFI.setMaxCallFrameSize(maxCallFrameSize); + // Update the new max call frame size if the caller passes in a valid pointer. + if (NewMaxCallFrameSize) + *NewMaxCallFrameSize = maxCallFrameSize; // Include call frame size in total. FrameSize += maxCallFrameSize; @@ -506,10 +517,6 @@ unsigned PPCFrameLowering::determineFrameLayout(MachineFunction &MF, // Make sure the frame is aligned. FrameSize = (FrameSize + AlignMask) & ~AlignMask; - // Update frame info. - if (UpdateMF) - MFI.setStackSize(FrameSize); - return FrameSize; } @@ -690,7 +697,7 @@ PPCFrameLowering::twoUniqueScratchRegsRequired(MachineBasicBlock *MBB) const { const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo(); MachineFunction &MF = *(MBB->getParent()); bool HasBP = RegInfo->hasBasePointer(MF); - unsigned FrameSize = determineFrameLayout(MF, false); + unsigned FrameSize = determineFrameLayout(MF); int NegFrameSize = -FrameSize; bool IsLargeFrame = !isInt<16>(NegFrameSize); MachineFrameInfo &MFI = MF.getFrameInfo(); @@ -713,6 +720,50 @@ bool PPCFrameLowering::canUseAsEpilogue(const MachineBasicBlock &MBB) const { return findScratchRegister(TmpMBB, true); } +bool PPCFrameLowering::stackUpdateCanBeMoved(MachineFunction &MF) const { + const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo(); + PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>(); + + // Abort if there is no register info or function info. + if (!RegInfo || !FI) + return false; + + // Only move the stack update on ELFv2 ABI and PPC64. + if (!Subtarget.isELFv2ABI() || !Subtarget.isPPC64()) + return false; + + // Check the frame size first and return false if it does not fit the + // requirements. + // We need a non-zero frame size as well as a frame that will fit in the red + // zone. This is because by moving the stack pointer update we are now storing + // to the red zone until the stack pointer is updated. If we get an interrupt + // inside the prologue but before the stack update we now have a number of + // stores to the red zone and those stores must all fit. + MachineFrameInfo &MFI = MF.getFrameInfo(); + unsigned FrameSize = MFI.getStackSize(); + if (!FrameSize || FrameSize > Subtarget.getRedZoneSize()) + return false; + + // Frame pointers and base pointers complicate matters so don't do anything + // if we have them. For example having a frame pointer will sometimes require + // a copy of r1 into r31 and that makes keeping track of updates to r1 more + // difficult. + if (hasFP(MF) || RegInfo->hasBasePointer(MF)) + return false; + + // Calls to fast_cc functions use different rules for passing parameters on + // the stack from the ABI and using PIC base in the function imposes + // similar restrictions to using the base pointer. It is not generally safe + // to move the stack pointer update in these situations. + if (FI->hasFastCall() || FI->usesPICBase()) + return false; + + // Finally we can move the stack update if we do not require register + // scavenging. Register scavenging can introduce more spills and so + // may make the frame size larger than we have computed. + return !RegInfo->requiresFrameIndexScavenging(MF); +} + void PPCFrameLowering::emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const { MachineBasicBlock::iterator MBBI = MBB.begin(); @@ -748,7 +799,7 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF, MBBI = MBB.begin(); // Work out frame sizes. - unsigned FrameSize = determineFrameLayout(MF); + unsigned FrameSize = determineFrameLayoutAndUpdate(MF); int NegFrameSize = -FrameSize; if (!isInt<32>(NegFrameSize)) llvm_unreachable("Unhandled stack size!"); @@ -759,6 +810,7 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF, // Check if the link register (LR) must be saved. PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>(); bool MustSaveLR = FI->mustSaveLR(); + bool MustSaveTOC = FI->mustSaveTOC(); const SmallVectorImpl<unsigned> &MustSaveCRs = FI->getMustSaveCRs(); bool MustSaveCR = !MustSaveCRs.empty(); // Do we have a frame pointer and/or base pointer for this function? @@ -770,6 +822,7 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF, unsigned BPReg = RegInfo->getBaseRegister(MF); unsigned FPReg = isPPC64 ? PPC::X31 : PPC::R31; unsigned LRReg = isPPC64 ? PPC::LR8 : PPC::LR; + unsigned TOCReg = isPPC64 ? PPC::X2 : PPC::R2; unsigned ScratchReg = 0; unsigned TempReg = isPPC64 ? PPC::X12 : PPC::R12; // another scratch reg // ...(R12/X12 is volatile in both Darwin & SVR4, & can't be a function arg.) @@ -855,6 +908,45 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF, assert((isPPC64 || !MustSaveCR) && "Prologue CR saving supported only in 64-bit mode"); + // Check if we can move the stack update instruction (stdu) down the prologue + // past the callee saves. Hopefully this will avoid the situation where the + // saves are waiting for the update on the store with update to complete. + MachineBasicBlock::iterator StackUpdateLoc = MBBI; + bool MovingStackUpdateDown = false; + + // Check if we can move the stack update. + if (stackUpdateCanBeMoved(MF)) { + const std::vector<CalleeSavedInfo> &Info = MFI.getCalleeSavedInfo(); + for (CalleeSavedInfo CSI : Info) { + int FrIdx = CSI.getFrameIdx(); + // If the frame index is not negative the callee saved info belongs to a + // stack object that is not a fixed stack object. We ignore non-fixed + // stack objects because we won't move the stack update pointer past them. + if (FrIdx >= 0) + continue; + + if (MFI.isFixedObjectIndex(FrIdx) && MFI.getObjectOffset(FrIdx) < 0) { + StackUpdateLoc++; + MovingStackUpdateDown = true; + } else { + // We need all of the Frame Indices to meet these conditions. + // If they do not, abort the whole operation. + StackUpdateLoc = MBBI; + MovingStackUpdateDown = false; + break; + } + } + + // If the operation was not aborted then update the object offset. + if (MovingStackUpdateDown) { + for (CalleeSavedInfo CSI : Info) { + int FrIdx = CSI.getFrameIdx(); + if (FrIdx < 0) + MFI.setObjectOffset(FrIdx, MFI.getObjectOffset(FrIdx) + NegFrameSize); + } + } + } + // If we need to spill the CR and the LR but we don't have two separate // registers available, we must spill them one at a time if (MustSaveCR && SingleScratchReg && MustSaveLR) { @@ -918,7 +1010,7 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF, } if (MustSaveLR) - BuildMI(MBB, MBBI, dl, StoreInst) + BuildMI(MBB, StackUpdateLoc, dl, StoreInst) .addReg(ScratchReg, getKillRegState(true)) .addImm(LROffset) .addReg(SPReg); @@ -986,7 +1078,7 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF, HasSTUX = true; } else if (!isLargeFrame) { - BuildMI(MBB, MBBI, dl, StoreUpdtInst, SPReg) + BuildMI(MBB, StackUpdateLoc, dl, StoreUpdtInst, SPReg) .addReg(SPReg) .addImm(NegFrameSize) .addReg(SPReg); @@ -1004,6 +1096,16 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF, HasSTUX = true; } + // Save the TOC register after the stack pointer update if a prologue TOC + // save is required for the function. + if (MustSaveTOC) { + assert(isELFv2ABI && "TOC saves in the prologue only supported on ELFv2"); + BuildMI(MBB, StackUpdateLoc, dl, TII.get(PPC::STD)) + .addReg(TOCReg, getKillRegState(true)) + .addImm(TOCSaveOffset) + .addReg(SPReg); + } + if (!HasRedZone) { assert(!isPPC64 && "A red zone is always available on PPC64"); if (HasSTUX) { @@ -1205,6 +1307,9 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF, if (PPC::CRBITRCRegClass.contains(Reg)) continue; + if ((Reg == PPC::X2 || Reg == PPC::R2) && MustSaveTOC) + continue; + // For SVR4, don't emit a move for the CR spill slot if we haven't // spilled CRs. if (isSVR4ABI && (PPC::CR2 <= Reg && Reg <= PPC::CR4) @@ -1234,6 +1339,12 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF, .addCFIIndex(CFIRegister); } else { int Offset = MFI.getObjectOffset(CSI[I].getFrameIdx()); + // We have changed the object offset above but we do not want to change + // the actual offsets in the CFI instruction so we have to undo the + // offset change here. + if (MovingStackUpdateDown) + Offset -= NegFrameSize; + unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset( nullptr, MRI->getDwarfRegNum(Reg, true), Offset)); BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) @@ -1380,6 +1491,32 @@ void PPCFrameLowering::emitEpilogue(MachineFunction &MF, unsigned RBReg = SPReg; unsigned SPAdd = 0; + // Check if we can move the stack update instruction up the epilogue + // past the callee saves. This will allow the move to LR instruction + // to be executed before the restores of the callee saves which means + // that the callee saves can hide the latency from the MTLR instrcution. + MachineBasicBlock::iterator StackUpdateLoc = MBBI; + if (stackUpdateCanBeMoved(MF)) { + const std::vector<CalleeSavedInfo> & Info = MFI.getCalleeSavedInfo(); + for (CalleeSavedInfo CSI : Info) { + int FrIdx = CSI.getFrameIdx(); + // If the frame index is not negative the callee saved info belongs to a + // stack object that is not a fixed stack object. We ignore non-fixed + // stack objects because we won't move the update of the stack pointer + // past them. + if (FrIdx >= 0) + continue; + + if (MFI.isFixedObjectIndex(FrIdx) && MFI.getObjectOffset(FrIdx) < 0) + StackUpdateLoc--; + else { + // Abort the operation as we can't update all CSR restores. + StackUpdateLoc = MBBI; + break; + } + } + } + if (FrameSize) { // In the prologue, the loaded (or persistent) stack pointer value is // offset by the STDU/STDUX/STWU/STWUX instruction. For targets with red @@ -1409,7 +1546,7 @@ void PPCFrameLowering::emitEpilogue(MachineFunction &MF, } } else if (!isLargeFrame && !HasBP && !MFI.hasVarSizedObjects()) { if (HasRedZone) { - BuildMI(MBB, MBBI, dl, AddImmInst, SPReg) + BuildMI(MBB, StackUpdateLoc, dl, AddImmInst, SPReg) .addReg(SPReg) .addImm(FrameSize); } else { @@ -1433,7 +1570,7 @@ void PPCFrameLowering::emitEpilogue(MachineFunction &MF, .addReg(FPReg); RBReg = FPReg; } - BuildMI(MBB, MBBI, dl, LoadInst, RBReg) + BuildMI(MBB, StackUpdateLoc, dl, LoadInst, RBReg) .addImm(0) .addReg(SPReg); } @@ -1466,7 +1603,7 @@ void PPCFrameLowering::emitEpilogue(MachineFunction &MF, // a base register anyway, because it may happen to be R0. bool LoadedLR = false; if (MustSaveLR && RBReg == SPReg && isInt<16>(LROffset+SPAdd)) { - BuildMI(MBB, MBBI, dl, LoadInst, ScratchReg) + BuildMI(MBB, StackUpdateLoc, dl, LoadInst, ScratchReg) .addImm(LROffset+SPAdd) .addReg(RBReg); LoadedLR = true; @@ -1538,7 +1675,7 @@ void PPCFrameLowering::emitEpilogue(MachineFunction &MF, .addReg(TempReg, getKillRegState(i == e-1)); if (MustSaveLR) - BuildMI(MBB, MBBI, dl, MTLRInst).addReg(ScratchReg); + BuildMI(MBB, StackUpdateLoc, dl, MTLRInst).addReg(ScratchReg); // Callee pop calling convention. Pop parameter/linkage area. Used for tail // call optimization @@ -1732,6 +1869,9 @@ void PPCFrameLowering::processFunctionBeforeFrameFinalized(MachineFunction &MF, for (unsigned i = 0, e = CSI.size(); i != e; ++i) { unsigned Reg = CSI[i].getReg(); + assert((!MF.getInfo<PPCFunctionInfo>()->mustSaveTOC() || + (Reg != PPC::X2 && Reg != PPC::R2)) && + "Not expecting to try to spill R2 in a function that must save TOC"); if (PPC::GPRCRegClass.contains(Reg) || PPC::SPE4RCRegClass.contains(Reg)) { HasGPSaveArea = true; @@ -1947,7 +2087,7 @@ PPCFrameLowering::addScavengingSpillSlot(MachineFunction &MF, // the 16-bit immediate. We don't know the complete frame size here // because we've not yet computed callee-saved register spills or the // needed alignment padding. - unsigned StackSize = determineFrameLayout(MF, false, true); + unsigned StackSize = determineFrameLayout(MF, true); MachineFrameInfo &MFI = MF.getFrameInfo(); if (MFI.hasVarSizedObjects() || spillsCR(MF) || spillsVRSAVE(MF) || hasNonRISpills(MF) || (hasSpills(MF) && !isInt<16>(StackSize))) { @@ -2041,6 +2181,8 @@ PPCFrameLowering::spillCalleeSavedRegisters(MachineBasicBlock &MBB, MachineFunction *MF = MBB.getParent(); const PPCInstrInfo &TII = *Subtarget.getInstrInfo(); + PPCFunctionInfo *FI = MF->getInfo<PPCFunctionInfo>(); + bool MustSaveTOC = FI->mustSaveTOC(); DebugLoc DL; bool CRSpilled = false; MachineInstrBuilder CRMIB; @@ -2071,6 +2213,10 @@ PPCFrameLowering::spillCalleeSavedRegisters(MachineBasicBlock &MBB, continue; } + // The actual spill will happen in the prologue. + if ((Reg == PPC::X2 || Reg == PPC::R2) && MustSaveTOC) + continue; + // Insert the spill to the stack frame. if (IsCRField) { PPCFunctionInfo *FuncInfo = MF->getInfo<PPCFunctionInfo>(); @@ -2198,6 +2344,8 @@ PPCFrameLowering::restoreCalleeSavedRegisters(MachineBasicBlock &MBB, MachineFunction *MF = MBB.getParent(); const PPCInstrInfo &TII = *Subtarget.getInstrInfo(); + PPCFunctionInfo *FI = MF->getInfo<PPCFunctionInfo>(); + bool MustSaveTOC = FI->mustSaveTOC(); bool CR2Spilled = false; bool CR3Spilled = false; bool CR4Spilled = false; @@ -2220,6 +2368,9 @@ PPCFrameLowering::restoreCalleeSavedRegisters(MachineBasicBlock &MBB, if (Reg == PPC::VRSAVE && !Subtarget.isDarwinABI()) continue; + if ((Reg == PPC::X2 || Reg == PPC::R2) && MustSaveTOC) + continue; + if (Reg == PPC::CR2) { CR2Spilled = true; // The spill slot is associated only with CR2, which is the diff --git a/contrib/llvm/lib/Target/PowerPC/PPCFrameLowering.h b/contrib/llvm/lib/Target/PowerPC/PPCFrameLowering.h index 69bd1484d6e5..d116e9fd22e1 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCFrameLowering.h +++ b/contrib/llvm/lib/Target/PowerPC/PPCFrameLowering.h @@ -1,9 +1,8 @@ //===-- PPCFrameLowering.h - Define frame lowering for PowerPC --*- C++ -*-===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -13,7 +12,6 @@ #ifndef LLVM_LIB_TARGET_POWERPC_PPCFRAMELOWERING_H #define LLVM_LIB_TARGET_POWERPC_PPCFRAMELOWERING_H -#include "PPC.h" #include "llvm/ADT/STLExtras.h" #include "llvm/CodeGen/TargetFrameLowering.h" #include "llvm/Target/TargetMachine.h" @@ -73,12 +71,29 @@ class PPCFrameLowering: public TargetFrameLowering { */ void createTailCallBranchInstr(MachineBasicBlock &MBB) const; + /** + * Check if the conditions are correct to allow for the stack update + * to be moved past the CSR save/restore code. + */ + bool stackUpdateCanBeMoved(MachineFunction &MF) const; + public: PPCFrameLowering(const PPCSubtarget &STI); - unsigned determineFrameLayout(MachineFunction &MF, - bool UpdateMF = true, - bool UseEstimate = false) const; + /** + * Determine the frame layout and update the machine function. + */ + unsigned determineFrameLayoutAndUpdate(MachineFunction &MF, + bool UseEstimate = false) const; + + /** + * Determine the frame layout but do not update the machine function. + * The MachineFunction object can be const in this case as it is not + * modified. + */ + unsigned determineFrameLayout(const MachineFunction &MF, + bool UseEstimate = false, + unsigned *NewMaxCallFrameSize = nullptr) const; /// emitProlog/emitEpilog - These methods insert prolog and epilog code into /// the function. diff --git a/contrib/llvm/lib/Target/PowerPC/PPCHazardRecognizers.cpp b/contrib/llvm/lib/Target/PowerPC/PPCHazardRecognizers.cpp index 5f6966cecd61..391ebcc1a143 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCHazardRecognizers.cpp +++ b/contrib/llvm/lib/Target/PowerPC/PPCHazardRecognizers.cpp @@ -1,9 +1,8 @@ //===-- PPCHazardRecognizers.cpp - PowerPC Hazard Recognizer Impls --------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -12,9 +11,8 @@ //===----------------------------------------------------------------------===// #include "PPCHazardRecognizers.h" -#include "PPC.h" #include "PPCInstrInfo.h" -#include "PPCTargetMachine.h" +#include "PPCSubtarget.h" #include "llvm/CodeGen/ScheduleDAG.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" diff --git a/contrib/llvm/lib/Target/PowerPC/PPCHazardRecognizers.h b/contrib/llvm/lib/Target/PowerPC/PPCHazardRecognizers.h index 4b502147ca63..5b32147ca88d 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCHazardRecognizers.h +++ b/contrib/llvm/lib/Target/PowerPC/PPCHazardRecognizers.h @@ -1,9 +1,8 @@ //===-- PPCHazardRecognizers.h - PowerPC Hazard Recognizers -----*- C++ -*-===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // diff --git a/contrib/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/contrib/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp index 70e9049a2ab3..543cac075f55 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp +++ b/contrib/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp @@ -1,9 +1,8 @@ //===-- PPCISelDAGToDAG.cpp - PPC --pattern matching inst selector --------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -219,13 +218,6 @@ namespace { SDValue SelectCC(SDValue LHS, SDValue RHS, ISD::CondCode CC, const SDLoc &dl); - /// SelectAddrImm - Returns true if the address N can be represented by - /// a base register plus a signed 16-bit displacement [r+imm]. - bool SelectAddrImm(SDValue N, SDValue &Disp, - SDValue &Base) { - return PPCLowering->SelectAddressRegImm(N, Disp, Base, *CurDAG, 0); - } - /// SelectAddrImmOffs - Return true if the operand is valid for a preinc /// immediate field. Note that the operand at this point is already the /// result of a prior SelectAddressRegImm call. @@ -239,26 +231,61 @@ namespace { return false; } - /// SelectAddrIdx - Given the specified addressed, check to see if it can be - /// represented as an indexed [r+r] operation. Returns false if it can - /// be represented by [r+imm], which are preferred. + /// SelectAddrIdx - Given the specified address, check to see if it can be + /// represented as an indexed [r+r] operation. + /// This is for xform instructions whose associated displacement form is D. + /// The last parameter \p 0 means associated D form has no requirment for 16 + /// bit signed displacement. + /// Returns false if it can be represented by [r+imm], which are preferred. bool SelectAddrIdx(SDValue N, SDValue &Base, SDValue &Index) { - return PPCLowering->SelectAddressRegReg(N, Base, Index, *CurDAG); + return PPCLowering->SelectAddressRegReg(N, Base, Index, *CurDAG, 0); + } + + /// SelectAddrIdx4 - Given the specified address, check to see if it can be + /// represented as an indexed [r+r] operation. + /// This is for xform instructions whose associated displacement form is DS. + /// The last parameter \p 4 means associated DS form 16 bit signed + /// displacement must be a multiple of 4. + /// Returns false if it can be represented by [r+imm], which are preferred. + bool SelectAddrIdxX4(SDValue N, SDValue &Base, SDValue &Index) { + return PPCLowering->SelectAddressRegReg(N, Base, Index, *CurDAG, 4); + } + + /// SelectAddrIdx16 - Given the specified address, check to see if it can be + /// represented as an indexed [r+r] operation. + /// This is for xform instructions whose associated displacement form is DQ. + /// The last parameter \p 16 means associated DQ form 16 bit signed + /// displacement must be a multiple of 16. + /// Returns false if it can be represented by [r+imm], which are preferred. + bool SelectAddrIdxX16(SDValue N, SDValue &Base, SDValue &Index) { + return PPCLowering->SelectAddressRegReg(N, Base, Index, *CurDAG, 16); } - /// SelectAddrIdxOnly - Given the specified addressed, force it to be + /// SelectAddrIdxOnly - Given the specified address, force it to be /// represented as an indexed [r+r] operation. bool SelectAddrIdxOnly(SDValue N, SDValue &Base, SDValue &Index) { return PPCLowering->SelectAddressRegRegOnly(N, Base, Index, *CurDAG); } + + /// SelectAddrImm - Returns true if the address N can be represented by + /// a base register plus a signed 16-bit displacement [r+imm]. + /// The last parameter \p 0 means D form has no requirment for 16 bit signed + /// displacement. + bool SelectAddrImm(SDValue N, SDValue &Disp, + SDValue &Base) { + return PPCLowering->SelectAddressRegImm(N, Disp, Base, *CurDAG, 0); + } /// SelectAddrImmX4 - Returns true if the address N can be represented by - /// a base register plus a signed 16-bit displacement that is a multiple of 4. - /// Suitable for use by STD and friends. + /// a base register plus a signed 16-bit displacement that is a multiple of + /// 4 (last parameter). Suitable for use by STD and friends. bool SelectAddrImmX4(SDValue N, SDValue &Disp, SDValue &Base) { return PPCLowering->SelectAddressRegImm(N, Disp, Base, *CurDAG, 4); } + /// SelectAddrImmX16 - Returns true if the address N can be represented by + /// a base register plus a signed 16-bit displacement that is a multiple of + /// 16(last parameter). Suitable for use by STXV and friends. bool SelectAddrImmX16(SDValue N, SDValue &Disp, SDValue &Base) { return PPCLowering->SelectAddressRegImm(N, Disp, Base, *CurDAG, 16); } @@ -412,7 +439,8 @@ SDNode *PPCDAGToDAGISel::getGlobalBaseReg() { if (PPCLowering->getPointerTy(CurDAG->getDataLayout()) == MVT::i32) { if (PPCSubTarget->isTargetELF()) { GlobalBaseReg = PPC::R30; - if (M->getPICLevel() == PICLevel::SmallPIC) { + if (!PPCSubTarget->isSecurePlt() && + M->getPICLevel() == PICLevel::SmallPIC) { BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MoveGOTtoLR)); BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MFLR), GlobalBaseReg); MF->getInfo<PPCFunctionInfo>()->setUsesPICBase(true); @@ -2373,7 +2401,7 @@ public: // Here we try to match complex bit permutations into a set of // rotate-and-shift/shift/and/or instructions, using a set of heuristics - // known to produce optimial code for common cases (like i32 byte swapping). + // known to produce optimal code for common cases (like i32 byte swapping). SDNode *Select(SDNode *N) { Memoizer.clear(); auto Result = @@ -4214,12 +4242,12 @@ static bool mayUseP9Setb(SDNode *N, const ISD::CondCode &CC, SelectionDAG *DAG, // Without this setb optimization, the outer SELECT_CC will be manually // selected to SELECT_CC_I4/SELECT_CC_I8 Pseudo, then expand-isel-pseudos pass - // transforms pseduo instruction to isel instruction. When there are more than + // transforms pseudo instruction to isel instruction. When there are more than // one use for result like zext/sext, with current optimization we only see // isel is replaced by setb but can't see any significant gain. Since // setb has longer latency than original isel, we should avoid this. Another // point is that setb requires comparison always kept, it can break the - // oppotunity to get the comparison away if we have in future. + // opportunity to get the comparison away if we have in future. if (!SetOrSelCC.hasOneUse() || (!InnerIsSel && !FalseRes.hasOneUse())) return false; @@ -4354,13 +4382,23 @@ void PPCDAGToDAGISel::Select(SDNode *N) { if (trySETCC(N)) return; break; - + // These nodes will be transformed into GETtlsADDR32 node, which + // later becomes BL_TLS __tls_get_addr(sym at tlsgd)@PLT + case PPCISD::ADDI_TLSLD_L_ADDR: + case PPCISD::ADDI_TLSGD_L_ADDR: { + const Module *Mod = MF->getFunction().getParent(); + if (PPCLowering->getPointerTy(CurDAG->getDataLayout()) != MVT::i32 || + !PPCSubTarget->isSecurePlt() || !PPCSubTarget->isTargetELF() || + Mod->getPICLevel() == PICLevel::SmallPIC) + break; + // Attach global base pointer on GETtlsADDR32 node in order to + // generate secure plt code for TLS symbols. + getGlobalBaseReg(); + } break; case PPCISD::CALL: { - const Module *M = MF->getFunction().getParent(); - if (PPCLowering->getPointerTy(CurDAG->getDataLayout()) != MVT::i32 || - (!TM.isPositionIndependent() || !PPCSubTarget->isSecurePlt()) || - !PPCSubTarget->isTargetELF() || M->getPICLevel() == PICLevel::SmallPIC) + !TM.isPositionIndependent() || !PPCSubTarget->isSecurePlt() || + !PPCSubTarget->isTargetELF()) break; SDValue Op = N->getOperand(1); @@ -5305,7 +5343,7 @@ SDValue PPCDAGToDAGISel::combineToCMPB(SDNode *N) { SDValue V = Queue.pop_back_val(); for (const SDValue &O : V.getNode()->ops()) { - unsigned b; + unsigned b = 0; uint64_t M = 0, A = 0; SDValue OLHS, ORHS; if (O.getOpcode() == ISD::OR) { diff --git a/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.cpp index 39608cb74bee..24d50074860d 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -1,9 +1,8 @@ //===-- PPCISelLowering.cpp - PPC DAG Lowering Implementation -------------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -45,6 +44,7 @@ #include "llvm/CodeGen/MachineJumpTableInfo.h" #include "llvm/CodeGen/MachineLoopInfo.h" #include "llvm/CodeGen/MachineMemOperand.h" +#include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/RuntimeLibcalls.h" @@ -70,8 +70,10 @@ #include "llvm/IR/Type.h" #include "llvm/IR/Use.h" #include "llvm/IR/Value.h" +#include "llvm/MC/MCContext.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCRegisterInfo.h" +#include "llvm/MC/MCSymbolXCOFF.h" #include "llvm/Support/AtomicOrdering.h" #include "llvm/Support/BranchProbability.h" #include "llvm/Support/Casting.h" @@ -111,6 +113,9 @@ cl::desc("disable unaligned load/store generation on PPC"), cl::Hidden); static cl::opt<bool> DisableSCO("disable-ppc-sco", cl::desc("disable sibling call optimization on ppc"), cl::Hidden); +static cl::opt<bool> DisableInnermostLoopAlign32("disable-ppc-innermost-loop-align32", +cl::desc("don't always align innermost loop to 32 bytes on ppc"), cl::Hidden); + static cl::opt<bool> EnableQuadPrecision("enable-ppc-quad-precision", cl::desc("enable quad precision float support on ppc"), cl::Hidden); @@ -119,6 +124,8 @@ STATISTIC(NumSiblingCalls, "Number of sibling calls"); static bool isNByteElemShuffleMask(ShuffleVectorSDNode *, unsigned, int); +static SDValue widenVec(SelectionDAG &DAG, SDValue Vec, const SDLoc &dl); + // FIXME: Remove this once the bug has been fixed! extern cl::opt<bool> ANDIGlueBug; @@ -550,7 +557,18 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM, // add/sub are legal for all supported vector VT's. setOperationAction(ISD::ADD, VT, Legal); setOperationAction(ISD::SUB, VT, Legal); - setOperationAction(ISD::ABS, VT, Custom); + + // For v2i64, these are only valid with P8Vector. This is corrected after + // the loop. + setOperationAction(ISD::SMAX, VT, Legal); + setOperationAction(ISD::SMIN, VT, Legal); + setOperationAction(ISD::UMAX, VT, Legal); + setOperationAction(ISD::UMIN, VT, Legal); + + if (Subtarget.hasVSX()) { + setOperationAction(ISD::FMAXNUM, VT, Legal); + setOperationAction(ISD::FMINNUM, VT, Legal); + } // Vector instructions introduced in P8 if (Subtarget.hasP8Altivec() && (VT.SimpleTy != MVT::v1i128)) { @@ -635,11 +653,28 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM, setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand); } } + if (!Subtarget.hasP8Vector()) { + setOperationAction(ISD::SMAX, MVT::v2i64, Expand); + setOperationAction(ISD::SMIN, MVT::v2i64, Expand); + setOperationAction(ISD::UMAX, MVT::v2i64, Expand); + setOperationAction(ISD::UMIN, MVT::v2i64, Expand); + } + + for (auto VT : {MVT::v2i64, MVT::v4i32, MVT::v8i16, MVT::v16i8}) + setOperationAction(ISD::ABS, VT, Custom); // We can custom expand all VECTOR_SHUFFLEs to VPERM, others we can handle // with merges, splats, etc. setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v16i8, Custom); + // Vector truncates to sub-word integer that fit in an Altivec/VSX register + // are cheap, so handle them before they get expanded to scalar. + setOperationAction(ISD::TRUNCATE, MVT::v8i8, Custom); + setOperationAction(ISD::TRUNCATE, MVT::v4i8, Custom); + setOperationAction(ISD::TRUNCATE, MVT::v2i8, Custom); + setOperationAction(ISD::TRUNCATE, MVT::v4i16, Custom); + setOperationAction(ISD::TRUNCATE, MVT::v2i16, Custom); + setOperationAction(ISD::AND , MVT::v4i32, Legal); setOperationAction(ISD::OR , MVT::v4i32, Legal); setOperationAction(ISD::XOR , MVT::v4i32, Legal); @@ -804,6 +839,8 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM, setOperationAction(ISD::FNEG, MVT::v2f64, Legal); setOperationAction(ISD::FABS, MVT::v4f32, Legal); setOperationAction(ISD::FABS, MVT::v2f64, Legal); + setOperationAction(ISD::FCOPYSIGN, MVT::v4f32, Legal); + setOperationAction(ISD::FCOPYSIGN, MVT::v2f64, Legal); if (Subtarget.hasDirectMove()) setOperationAction(ISD::BUILD_VECTOR, MVT::v2i64, Custom); @@ -866,6 +903,7 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM, setOperationAction(ISD::FPOWI, MVT::f128, Expand); setOperationAction(ISD::FREM, MVT::f128, Expand); } + setOperationAction(ISD::FP_EXTEND, MVT::v2f32, Custom); } @@ -1060,6 +1098,7 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM, setTargetDAGCombine(ISD::SHL); setTargetDAGCombine(ISD::SRA); setTargetDAGCombine(ISD::SRL); + setTargetDAGCombine(ISD::MUL); setTargetDAGCombine(ISD::SINT_TO_FP); setTargetDAGCombine(ISD::BUILD_VECTOR); if (Subtarget.hasFPCVT()) @@ -1232,22 +1271,6 @@ unsigned PPCTargetLowering::getByValTypeAlignment(Type *Ty, return Align; } -unsigned PPCTargetLowering::getNumRegistersForCallingConv(LLVMContext &Context, - CallingConv:: ID CC, - EVT VT) const { - if (Subtarget.hasSPE() && VT == MVT::f64) - return 2; - return PPCTargetLowering::getNumRegisters(Context, VT); -} - -MVT PPCTargetLowering::getRegisterTypeForCallingConv(LLVMContext &Context, - CallingConv:: ID CC, - EVT VT) const { - if (Subtarget.hasSPE() && VT == MVT::f64) - return MVT::i32; - return PPCTargetLowering::getRegisterType(Context, VT); -} - bool PPCTargetLowering::useSoftFloat() const { return Subtarget.useSoftFloat(); } @@ -1256,6 +1279,10 @@ bool PPCTargetLowering::hasSPE() const { return Subtarget.hasSPE(); } +bool PPCTargetLowering::preferIncOfAddToSubOfNot(EVT VT) const { + return VT.isScalarInteger(); +} + const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const { switch ((PPCISD::NodeType)Opcode) { case PPCISD::FIRST_NUMBER: break; @@ -1365,7 +1392,11 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const { case PPCISD::QBFLT: return "PPCISD::QBFLT"; case PPCISD::QVLFSb: return "PPCISD::QVLFSb"; case PPCISD::BUILD_FP128: return "PPCISD::BUILD_FP128"; + case PPCISD::BUILD_SPE64: return "PPCISD::BUILD_SPE64"; + case PPCISD::EXTRACT_SPE: return "PPCISD::EXTRACT_SPE"; case PPCISD::EXTSWSLI: return "PPCISD::EXTSWSLI"; + case PPCISD::LD_VSX_LH: return "PPCISD::LD_VSX_LH"; + case PPCISD::FP_EXTEND_LH: return "PPCISD::FP_EXTEND_LH"; } return nullptr; } @@ -2202,16 +2233,43 @@ bool llvm::isIntS16Immediate(SDValue Op, int16_t &Imm) { return isIntS16Immediate(Op.getNode(), Imm); } + +/// SelectAddressEVXRegReg - Given the specified address, check to see if it can +/// be represented as an indexed [r+r] operation. +bool PPCTargetLowering::SelectAddressEVXRegReg(SDValue N, SDValue &Base, + SDValue &Index, + SelectionDAG &DAG) const { + for (SDNode::use_iterator UI = N->use_begin(), E = N->use_end(); + UI != E; ++UI) { + if (MemSDNode *Memop = dyn_cast<MemSDNode>(*UI)) { + if (Memop->getMemoryVT() == MVT::f64) { + Base = N.getOperand(0); + Index = N.getOperand(1); + return true; + } + } + } + return false; +} + /// SelectAddressRegReg - Given the specified addressed, check to see if it /// can be represented as an indexed [r+r] operation. Returns false if it -/// can be more efficiently represented with [r+imm]. +/// can be more efficiently represented as [r+imm]. If \p EncodingAlignment is +/// non-zero and N can be represented by a base register plus a signed 16-bit +/// displacement, make a more precise judgement by checking (displacement % \p +/// EncodingAlignment). bool PPCTargetLowering::SelectAddressRegReg(SDValue N, SDValue &Base, - SDValue &Index, - SelectionDAG &DAG) const { + SDValue &Index, SelectionDAG &DAG, + unsigned EncodingAlignment) const { int16_t imm = 0; if (N.getOpcode() == ISD::ADD) { - if (isIntS16Immediate(N.getOperand(1), imm)) - return false; // r+i + // Is there any SPE load/store (f64), which can't handle 16bit offset? + // SPE load/store can only handle 8-bit offsets. + if (hasSPE() && SelectAddressEVXRegReg(N, Base, Index, DAG)) + return true; + if (isIntS16Immediate(N.getOperand(1), imm) && + (!EncodingAlignment || !(imm % EncodingAlignment))) + return false; // r+i if (N.getOperand(1).getOpcode() == PPCISD::Lo) return false; // r+i @@ -2219,8 +2277,9 @@ bool PPCTargetLowering::SelectAddressRegReg(SDValue N, SDValue &Base, Index = N.getOperand(1); return true; } else if (N.getOpcode() == ISD::OR) { - if (isIntS16Immediate(N.getOperand(1), imm)) - return false; // r+i can fold it if we can. + if (isIntS16Immediate(N.getOperand(1), imm) && + (!EncodingAlignment || !(imm % EncodingAlignment))) + return false; // r+i can fold it if we can. // If this is an or of disjoint bitfields, we can codegen this as an add // (for better address arithmetic) if the LHS and RHS of the OR are provably @@ -2284,22 +2343,22 @@ static void fixupFuncForFI(SelectionDAG &DAG, int FrameIdx, EVT VT) { /// Returns true if the address N can be represented by a base register plus /// a signed 16-bit displacement [r+imm], and if it is not better -/// represented as reg+reg. If \p Alignment is non-zero, only accept +/// represented as reg+reg. If \p EncodingAlignment is non-zero, only accept /// displacements that are multiples of that value. bool PPCTargetLowering::SelectAddressRegImm(SDValue N, SDValue &Disp, SDValue &Base, SelectionDAG &DAG, - unsigned Alignment) const { + unsigned EncodingAlignment) const { // FIXME dl should come from parent load or store, not from address SDLoc dl(N); // If this can be more profitably realized as r+r, fail. - if (SelectAddressRegReg(N, Disp, Base, DAG)) + if (SelectAddressRegReg(N, Disp, Base, DAG, EncodingAlignment)) return false; if (N.getOpcode() == ISD::ADD) { int16_t imm = 0; if (isIntS16Immediate(N.getOperand(1), imm) && - (!Alignment || (imm % Alignment) == 0)) { + (!EncodingAlignment || (imm % EncodingAlignment) == 0)) { Disp = DAG.getTargetConstant(imm, dl, N.getValueType()); if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N.getOperand(0))) { Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType()); @@ -2323,7 +2382,7 @@ bool PPCTargetLowering::SelectAddressRegImm(SDValue N, SDValue &Disp, } else if (N.getOpcode() == ISD::OR) { int16_t imm = 0; if (isIntS16Immediate(N.getOperand(1), imm) && - (!Alignment || (imm % Alignment) == 0)) { + (!EncodingAlignment || (imm % EncodingAlignment) == 0)) { // If this is an or of disjoint bitfields, we can codegen this as an add // (for better address arithmetic) if the LHS and RHS of the OR are // provably disjoint. @@ -2349,7 +2408,8 @@ bool PPCTargetLowering::SelectAddressRegImm(SDValue N, SDValue &Disp, // If this address fits entirely in a 16-bit sext immediate field, codegen // this as "d, 0" int16_t Imm; - if (isIntS16Immediate(CN, Imm) && (!Alignment || (Imm % Alignment) == 0)) { + if (isIntS16Immediate(CN, Imm) && + (!EncodingAlignment || (Imm % EncodingAlignment) == 0)) { Disp = DAG.getTargetConstant(Imm, dl, CN->getValueType(0)); Base = DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO, CN->getValueType(0)); @@ -2359,7 +2419,7 @@ bool PPCTargetLowering::SelectAddressRegImm(SDValue N, SDValue &Disp, // Handle 32-bit sext immediates with LIS + addr mode. if ((CN->getValueType(0) == MVT::i32 || (int64_t)CN->getZExtValue() == (int)CN->getZExtValue()) && - (!Alignment || (CN->getZExtValue() % Alignment) == 0)) { + (!EncodingAlignment || (CN->getZExtValue() % EncodingAlignment) == 0)) { int Addr = (int)CN->getZExtValue(); // Otherwise, break this down into an LIS + disp. @@ -2416,24 +2476,45 @@ bool PPCTargetLowering::SelectAddressRegRegOnly(SDValue N, SDValue &Base, /// Returns true if we should use a direct load into vector instruction /// (such as lxsd or lfd), instead of a load into gpr + direct move sequence. -static bool usePartialVectorLoads(SDNode *N) { - if (!N->hasOneUse()) - return false; +static bool usePartialVectorLoads(SDNode *N, const PPCSubtarget& ST) { // If there are any other uses other than scalar to vector, then we should // keep it as a scalar load -> direct move pattern to prevent multiple - // loads. Currently, only check for i64 since we have lxsd/lfd to do this - // efficiently, but no update equivalent. - if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) { - EVT MemVT = LD->getMemoryVT(); - if (MemVT.isSimple() && MemVT.getSimpleVT().SimpleTy == MVT::i64) { - SDNode *User = *(LD->use_begin()); - if (User->getOpcode() == ISD::SCALAR_TO_VECTOR) - return true; - } + // loads. + LoadSDNode *LD = dyn_cast<LoadSDNode>(N); + if (!LD) + return false; + + EVT MemVT = LD->getMemoryVT(); + if (!MemVT.isSimple()) + return false; + switch(MemVT.getSimpleVT().SimpleTy) { + case MVT::i64: + break; + case MVT::i32: + if (!ST.hasP8Vector()) + return false; + break; + case MVT::i16: + case MVT::i8: + if (!ST.hasP9Vector()) + return false; + break; + default: + return false; } - return false; + SDValue LoadedVal(N, 0); + if (!LoadedVal.hasOneUse()) + return false; + + for (SDNode::use_iterator UI = LD->use_begin(), UE = LD->use_end(); + UI != UE; ++UI) + if (UI.getUse().get().getResNo() == 0 && + UI->getOpcode() != ISD::SCALAR_TO_VECTOR) + return false; + + return true; } /// getPreIndexedAddressParts - returns true by value, base pointer and @@ -2464,7 +2545,7 @@ bool PPCTargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base, // Do not generate pre-inc forms for specific loads that feed scalar_to_vector // instructions because we can fold these into a more efficient instruction // instead, (such as LXSD). - if (isLoad && usePartialVectorLoads(N)) { + if (isLoad && usePartialVectorLoads(N, Subtarget)) { return false; } @@ -2745,7 +2826,8 @@ SDValue PPCTargetLowering::LowerGlobalTLSAddress(SDValue Op, const Module *M = DAG.getMachineFunction().getFunction().getParent(); PICLevel::Level picLevel = M->getPICLevel(); - TLSModel::Model Model = getTargetMachine().getTLSModel(GV); + const TargetMachine &TM = getTargetMachine(); + TLSModel::Model Model = TM.getTLSModel(GV); if (Model == TLSModel::LocalExec) { SDValue TGAHi = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, @@ -2769,8 +2851,14 @@ SDValue PPCTargetLowering::LowerGlobalTLSAddress(SDValue Op, SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64); GOTPtr = DAG.getNode(PPCISD::ADDIS_GOT_TPREL_HA, dl, PtrVT, GOTReg, TGA); - } else - GOTPtr = DAG.getNode(PPCISD::PPC32_GOT, dl, PtrVT); + } else { + if (!TM.isPositionIndependent()) + GOTPtr = DAG.getNode(PPCISD::PPC32_GOT, dl, PtrVT); + else if (picLevel == PICLevel::SmallPIC) + GOTPtr = DAG.getNode(PPCISD::GlobalBaseReg, dl, PtrVT); + else + GOTPtr = DAG.getNode(PPCISD::PPC32_PICGOT, dl, PtrVT); + } SDValue TPOffset = DAG.getNode(PPCISD::LD_GOT_TPREL_L, dl, PtrVT, TGA, GOTPtr); return DAG.getNode(PPCISD::ADD_TLS, dl, PtrVT, TPOffset, TGATLS); @@ -3147,101 +3235,6 @@ SDValue PPCTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const { MachinePointerInfo(SV, nextOffset)); } -#include "PPCGenCallingConv.inc" - -// Function whose sole purpose is to kill compiler warnings -// stemming from unused functions included from PPCGenCallingConv.inc. -CCAssignFn *PPCTargetLowering::useFastISelCCs(unsigned Flag) const { - return Flag ? CC_PPC64_ELF_FIS : RetCC_PPC64_ELF_FIS; -} - -bool llvm::CC_PPC32_SVR4_Custom_Dummy(unsigned &ValNo, MVT &ValVT, MVT &LocVT, - CCValAssign::LocInfo &LocInfo, - ISD::ArgFlagsTy &ArgFlags, - CCState &State) { - return true; -} - -bool llvm::CC_PPC32_SVR4_Custom_AlignArgRegs(unsigned &ValNo, MVT &ValVT, - MVT &LocVT, - CCValAssign::LocInfo &LocInfo, - ISD::ArgFlagsTy &ArgFlags, - CCState &State) { - static const MCPhysReg ArgRegs[] = { - PPC::R3, PPC::R4, PPC::R5, PPC::R6, - PPC::R7, PPC::R8, PPC::R9, PPC::R10, - }; - const unsigned NumArgRegs = array_lengthof(ArgRegs); - - unsigned RegNum = State.getFirstUnallocated(ArgRegs); - - // Skip one register if the first unallocated register has an even register - // number and there are still argument registers available which have not been - // allocated yet. RegNum is actually an index into ArgRegs, which means we - // need to skip a register if RegNum is odd. - if (RegNum != NumArgRegs && RegNum % 2 == 1) { - State.AllocateReg(ArgRegs[RegNum]); - } - - // Always return false here, as this function only makes sure that the first - // unallocated register has an odd register number and does not actually - // allocate a register for the current argument. - return false; -} - -bool -llvm::CC_PPC32_SVR4_Custom_SkipLastArgRegsPPCF128(unsigned &ValNo, MVT &ValVT, - MVT &LocVT, - CCValAssign::LocInfo &LocInfo, - ISD::ArgFlagsTy &ArgFlags, - CCState &State) { - static const MCPhysReg ArgRegs[] = { - PPC::R3, PPC::R4, PPC::R5, PPC::R6, - PPC::R7, PPC::R8, PPC::R9, PPC::R10, - }; - const unsigned NumArgRegs = array_lengthof(ArgRegs); - - unsigned RegNum = State.getFirstUnallocated(ArgRegs); - int RegsLeft = NumArgRegs - RegNum; - - // Skip if there is not enough registers left for long double type (4 gpr regs - // in soft float mode) and put long double argument on the stack. - if (RegNum != NumArgRegs && RegsLeft < 4) { - for (int i = 0; i < RegsLeft; i++) { - State.AllocateReg(ArgRegs[RegNum + i]); - } - } - - return false; -} - -bool llvm::CC_PPC32_SVR4_Custom_AlignFPArgRegs(unsigned &ValNo, MVT &ValVT, - MVT &LocVT, - CCValAssign::LocInfo &LocInfo, - ISD::ArgFlagsTy &ArgFlags, - CCState &State) { - static const MCPhysReg ArgRegs[] = { - PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7, - PPC::F8 - }; - - const unsigned NumArgRegs = array_lengthof(ArgRegs); - - unsigned RegNum = State.getFirstUnallocated(ArgRegs); - - // If there is only one Floating-point register left we need to put both f64 - // values of a split ppc_fp128 value on the stack. - if (RegNum != NumArgRegs && ArgRegs[RegNum] == PPC::F8) { - State.AllocateReg(ArgRegs[RegNum]); - } - - // Always return false here, as this function only makes sure that the two f64 - // values a ppc_fp128 value is split into are both passed in registers or both - // passed on the stack and does not actually allocate a register for the - // current argument. - return false; -} - /// FPR - The set of FP registers that should be allocated for arguments, /// on Darwin. static const MCPhysReg FPR[] = {PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, @@ -3449,7 +3442,7 @@ SDValue PPCTargetLowering::LowerFormalArguments_32SVR4( // Reserve space for the linkage area on the stack. unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize(); CCInfo.AllocateStack(LinkageSize, PtrByteSize); - if (useSoftFloat() || hasSPE()) + if (useSoftFloat()) CCInfo.PreAnalyzeFormalArguments(Ins); CCInfo.AnalyzeFormalArguments(Ins, CC_PPC32_SVR4); @@ -3482,7 +3475,8 @@ SDValue PPCTargetLowering::LowerFormalArguments_32SVR4( if (Subtarget.hasVSX()) RC = &PPC::VSFRCRegClass; else if (Subtarget.hasSPE()) - RC = &PPC::SPERCRegClass; + // SPE passes doubles in GPR pairs. + RC = &PPC::GPRCRegClass; else RC = &PPC::F8RCRegClass; break; @@ -3506,13 +3500,26 @@ SDValue PPCTargetLowering::LowerFormalArguments_32SVR4( break; } - // Transform the arguments stored in physical registers into virtual ones. - unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC); - SDValue ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, - ValVT == MVT::i1 ? MVT::i32 : ValVT); - - if (ValVT == MVT::i1) - ArgValue = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, ArgValue); + SDValue ArgValue; + // Transform the arguments stored in physical registers into + // virtual ones. + if (VA.getLocVT() == MVT::f64 && Subtarget.hasSPE()) { + assert(i + 1 < e && "No second half of double precision argument"); + unsigned RegLo = MF.addLiveIn(VA.getLocReg(), RC); + unsigned RegHi = MF.addLiveIn(ArgLocs[++i].getLocReg(), RC); + SDValue ArgValueLo = DAG.getCopyFromReg(Chain, dl, RegLo, MVT::i32); + SDValue ArgValueHi = DAG.getCopyFromReg(Chain, dl, RegHi, MVT::i32); + if (!Subtarget.isLittleEndian()) + std::swap (ArgValueLo, ArgValueHi); + ArgValue = DAG.getNode(PPCISD::BUILD_SPE64, dl, MVT::f64, ArgValueLo, + ArgValueHi); + } else { + unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC); + ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, + ValVT == MVT::i1 ? MVT::i32 : ValVT); + if (ValVT == MVT::i1) + ArgValue = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, ArgValue); + } InVals.push_back(ArgValue); } else { @@ -4448,24 +4455,27 @@ static bool isFunctionGlobalAddress(SDValue Callee); static bool callsShareTOCBase(const Function *Caller, SDValue Callee, const TargetMachine &TM) { - // If !G, Callee can be an external symbol. - GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee); - if (!G) - return false; - + // Callee is either a GlobalAddress or an ExternalSymbol. ExternalSymbols + // don't have enough information to determine if the caller and calle share + // the same TOC base, so we have to pessimistically assume they don't for + // correctness. + GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee); + if (!G) + return false; + + const GlobalValue *GV = G->getGlobal(); // The medium and large code models are expected to provide a sufficiently // large TOC to provide all data addressing needs of a module with a // single TOC. Since each module will be addressed with a single TOC then we // only need to check that caller and callee don't cross dso boundaries. if (CodeModel::Medium == TM.getCodeModel() || CodeModel::Large == TM.getCodeModel()) - return TM.shouldAssumeDSOLocal(*Caller->getParent(), G->getGlobal()); + return TM.shouldAssumeDSOLocal(*Caller->getParent(), GV); // Otherwise we need to ensure callee and caller are in the same section, // since the linker may allocate multiple TOCs, and we don't know which // sections will belong to the same TOC base. - const GlobalValue *GV = G->getGlobal(); if (!GV->isStrongDefinitionForLinker()) return false; @@ -4917,6 +4927,7 @@ PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag, SDValue &Chain, bool isPPC64 = Subtarget.isPPC64(); bool isSVR4ABI = Subtarget.isSVR4ABI(); bool isELFv2ABI = Subtarget.isELFv2ABI(); + bool isAIXABI = Subtarget.isAIXABI(); EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout()); NodeTys.push_back(MVT::Other); // Returns a chain @@ -4943,17 +4954,18 @@ PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag, SDValue &Chain, bool Local = TM.shouldAssumeDSOLocal(*Mod, GV); bool UsePlt = !Local && Subtarget.isTargetELF() && !isPPC64; + // If the callee is a GlobalAddress/ExternalSymbol node (quite common, + // every direct call is) turn it into a TargetGlobalAddress / + // TargetExternalSymbol node so that legalize doesn't hack it. if (isFunctionGlobalAddress(Callee)) { GlobalAddressSDNode *G = cast<GlobalAddressSDNode>(Callee); + // A call to a TLS address is actually an indirect call to a // thread-specific pointer. unsigned OpFlags = 0; if (UsePlt) OpFlags = PPCII::MO_PLT; - // If the callee is a GlobalAddress/ExternalSymbol node (quite common, - // every direct call is) turn it into a TargetGlobalAddress / - // TargetExternalSymbol node so that legalize doesn't hack it. Callee = DAG.getTargetGlobalAddress(G->getGlobal(), dl, Callee.getValueType(), 0, OpFlags); needIndirectCall = false; @@ -5095,17 +5107,18 @@ PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag, SDValue &Chain, Ops.push_back(DAG.getRegister(RegsToPass[i].first, RegsToPass[i].second.getValueType())); - // All calls, in both the ELF V1 and V2 ABIs, need the TOC register live - // into the call. - // We do need to reserve X2 to appease the verifier for the PATCHPOINT. - if (isSVR4ABI && isPPC64) { + // All calls, in the AIX ABI and 64-bit ELF ABIs, need the TOC register + // live into the call. + // We do need to reserve R2/X2 to appease the verifier for the PATCHPOINT. + if ((isSVR4ABI && isPPC64) || isAIXABI) { setUsesTOCBasePtr(DAG); - // We cannot add X2 as an operand here for PATCHPOINT, because there is no - // way to mark dependencies as implicit here. We will add the X2 dependency - // in EmitInstrWithCustomInserter. - if (!isPatchPoint) - Ops.push_back(DAG.getRegister(PPC::X2, PtrVT)); + // We cannot add R2/X2 as an operand here for PATCHPOINT, because there is + // no way to mark dependencies as implicit here. + // We will add the R2/X2 dependency in EmitInstrWithCustomInserter. + if (!isPatchPoint) + Ops.push_back(DAG.getRegister(isPPC64 ? PPC::X2 + : PPC::R2, PtrVT)); } return CallOpc; @@ -5129,10 +5142,27 @@ SDValue PPCTargetLowering::LowerCallResult( CCValAssign &VA = RVLocs[i]; assert(VA.isRegLoc() && "Can only return in registers!"); - SDValue Val = DAG.getCopyFromReg(Chain, dl, - VA.getLocReg(), VA.getLocVT(), InFlag); - Chain = Val.getValue(1); - InFlag = Val.getValue(2); + SDValue Val; + + if (Subtarget.hasSPE() && VA.getLocVT() == MVT::f64) { + SDValue Lo = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32, + InFlag); + Chain = Lo.getValue(1); + InFlag = Lo.getValue(2); + VA = RVLocs[++i]; // skip ahead to next loc + SDValue Hi = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32, + InFlag); + Chain = Hi.getValue(1); + InFlag = Hi.getValue(2); + if (!Subtarget.isLittleEndian()) + std::swap (Lo, Hi); + Val = DAG.getNode(PPCISD::BUILD_SPE64, dl, MVT::f64, Lo, Hi); + } else { + Val = DAG.getCopyFromReg(Chain, dl, + VA.getLocReg(), VA.getLocVT(), InFlag); + Chain = Val.getValue(1); + InFlag = Val.getValue(2); + } switch (VA.getLocInfo()) { default: llvm_unreachable("Unknown loc info!"); @@ -5206,18 +5236,24 @@ SDValue PPCTargetLowering::FinishCall( } // Add a NOP immediately after the branch instruction when using the 64-bit - // SVR4 ABI. At link time, if caller and callee are in a different module and + // SVR4 or the AIX ABI. + // At link time, if caller and callee are in a different module and // thus have a different TOC, the call will be replaced with a call to a stub // function which saves the current TOC, loads the TOC of the callee and // branches to the callee. The NOP will be replaced with a load instruction // which restores the TOC of the caller from the TOC save slot of the current // stack frame. If caller and callee belong to the same module (and have the - // same TOC), the NOP will remain unchanged. + // same TOC), the NOP will remain unchanged, or become some other NOP. MachineFunction &MF = DAG.getMachineFunction(); - if (!isTailCall && Subtarget.isSVR4ABI()&& Subtarget.isPPC64() && - !isPatchPoint) { + EVT PtrVT = getPointerTy(DAG.getDataLayout()); + if (!isTailCall && !isPatchPoint && + ((Subtarget.isSVR4ABI() && Subtarget.isPPC64()) || + Subtarget.isAIXABI())) { if (CallOpc == PPCISD::BCTRL) { + if (Subtarget.isAIXABI()) + report_fatal_error("Indirect call on AIX is not implemented."); + // This is a call through a function pointer. // Restore the caller TOC from the save area into R2. // See PrepareCall() for more information about calls through function @@ -5229,7 +5265,6 @@ SDValue PPCTargetLowering::FinishCall( // allocated and an unnecessary move instruction being generated. CallOpc = PPCISD::BCTRL_LOAD_TOC; - EVT PtrVT = getPointerTy(DAG.getDataLayout()); SDValue StackPtr = DAG.getRegister(PPC::X1, PtrVT); unsigned TOCSaveOffset = Subtarget.getFrameLowering()->getTOCSaveOffset(); SDValue TOCOff = DAG.getIntPtrConstant(TOCSaveOffset, dl); @@ -5245,6 +5280,19 @@ SDValue PPCTargetLowering::FinishCall( } } + if (Subtarget.isAIXABI() && isFunctionGlobalAddress(Callee)) { + // On AIX, direct function calls reference the symbol for the function's + // entry point, which is named by inserting a "." before the function's + // C-linkage name. + GlobalAddressSDNode *G = cast<GlobalAddressSDNode>(Callee); + auto &Context = DAG.getMachineFunction().getMMI().getContext(); + MCSymbol *S = Context.getOrCreateSymbol(Twine(".") + + Twine(G->getGlobal()->getName())); + Callee = DAG.getMCSymbol(S, PtrVT); + // Replace the GlobalAddressSDNode Callee with the MCSymbolSDNode. + Ops[1] = Callee; + } + Chain = DAG.getNode(CallOpc, dl, NodeTys, Ops); InFlag = Chain.getValue(1); @@ -5314,16 +5362,20 @@ PPCTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, !isTailCall) Callee = LowerGlobalAddress(Callee, DAG); - if (Subtarget.isSVR4ABI()) { - if (Subtarget.isPPC64()) - return LowerCall_64SVR4(Chain, Callee, CallConv, isVarArg, - isTailCall, isPatchPoint, Outs, OutVals, Ins, - dl, DAG, InVals, CS); - else - return LowerCall_32SVR4(Chain, Callee, CallConv, isVarArg, - isTailCall, isPatchPoint, Outs, OutVals, Ins, - dl, DAG, InVals, CS); - } + if (Subtarget.isSVR4ABI() && Subtarget.isPPC64()) + return LowerCall_64SVR4(Chain, Callee, CallConv, isVarArg, + isTailCall, isPatchPoint, Outs, OutVals, Ins, + dl, DAG, InVals, CS); + + if (Subtarget.isSVR4ABI()) + return LowerCall_32SVR4(Chain, Callee, CallConv, isVarArg, + isTailCall, isPatchPoint, Outs, OutVals, Ins, + dl, DAG, InVals, CS); + + if (Subtarget.isAIXABI()) + return LowerCall_AIX(Chain, Callee, CallConv, isVarArg, + isTailCall, isPatchPoint, Outs, OutVals, Ins, + dl, DAG, InVals, CS); return LowerCall_Darwin(Chain, Callee, CallConv, isVarArg, isTailCall, isPatchPoint, Outs, OutVals, Ins, @@ -5444,12 +5496,15 @@ SDValue PPCTargetLowering::LowerCall_32SVR4( bool seenFloatArg = false; // Walk the register/memloc assignments, inserting copies/loads. - for (unsigned i = 0, j = 0, e = ArgLocs.size(); + // i - Tracks the index into the list of registers allocated for the call + // RealArgIdx - Tracks the index into the list of actual function arguments + // j - Tracks the index into the list of byval arguments + for (unsigned i = 0, RealArgIdx = 0, j = 0, e = ArgLocs.size(); i != e; - ++i) { + ++i, ++RealArgIdx) { CCValAssign &VA = ArgLocs[i]; - SDValue Arg = OutVals[i]; - ISD::ArgFlagsTy Flags = Outs[i].Flags; + SDValue Arg = OutVals[RealArgIdx]; + ISD::ArgFlagsTy Flags = Outs[RealArgIdx].Flags; if (Flags.isByVal()) { // Argument is an aggregate which is passed by value, thus we need to @@ -5498,7 +5553,17 @@ SDValue PPCTargetLowering::LowerCall_32SVR4( if (VA.isRegLoc()) { seenFloatArg |= VA.getLocVT().isFloatingPoint(); // Put argument in a physical register. - RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg)); + if (Subtarget.hasSPE() && Arg.getValueType() == MVT::f64) { + bool IsLE = Subtarget.isLittleEndian(); + SDValue SVal = DAG.getNode(PPCISD::EXTRACT_SPE, dl, MVT::i32, Arg, + DAG.getIntPtrConstant(IsLE ? 0 : 1, dl)); + RegsToPass.push_back(std::make_pair(VA.getLocReg(), SVal.getValue(0))); + SVal = DAG.getNode(PPCISD::EXTRACT_SPE, dl, MVT::i32, Arg, + DAG.getIntPtrConstant(IsLE ? 1 : 0, dl)); + RegsToPass.push_back(std::make_pair(ArgLocs[++i].getLocReg(), + SVal.getValue(0))); + } else + RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg)); } else { // Put argument in the parameter list area of the current stack frame. assert(VA.isMemLoc()); @@ -6613,6 +6678,128 @@ SDValue PPCTargetLowering::LowerCall_Darwin( NumBytes, Ins, InVals, CS); } + +SDValue PPCTargetLowering::LowerCall_AIX( + SDValue Chain, SDValue Callee, CallingConv::ID CallConv, bool isVarArg, + bool isTailCall, bool isPatchPoint, + const SmallVectorImpl<ISD::OutputArg> &Outs, + const SmallVectorImpl<SDValue> &OutVals, + const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl, + SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals, + ImmutableCallSite CS) const { + + assert((CallConv == CallingConv::C || CallConv == CallingConv::Fast) && + "Unimplemented calling convention!"); + if (isVarArg || isPatchPoint) + report_fatal_error("This call type is unimplemented on AIX."); + + EVT PtrVT = getPointerTy(DAG.getDataLayout()); + bool isPPC64 = PtrVT == MVT::i64; + unsigned PtrByteSize = isPPC64 ? 8 : 4; + unsigned NumOps = Outs.size(); + + + // Count how many bytes are to be pushed on the stack, including the linkage + // area, parameter list area. + // On XCOFF, we start with 24/48, which is reserved space for + // [SP][CR][LR][2 x reserved][TOC]. + unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize(); + + // The prolog code of the callee may store up to 8 GPR argument registers to + // the stack, allowing va_start to index over them in memory if the callee + // is variadic. + // Because we cannot tell if this is needed on the caller side, we have to + // conservatively assume that it is needed. As such, make sure we have at + // least enough stack space for the caller to store the 8 GPRs. + unsigned NumBytes = LinkageSize + 8 * PtrByteSize; + + // Adjust the stack pointer for the new arguments... + // These operations are automatically eliminated by the prolog/epilog + // inserter pass. + Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, dl); + SDValue CallSeqStart = Chain; + + static const MCPhysReg GPR_32[] = { // 32-bit registers. + PPC::R3, PPC::R4, PPC::R5, PPC::R6, + PPC::R7, PPC::R8, PPC::R9, PPC::R10 + }; + static const MCPhysReg GPR_64[] = { // 64-bit registers. + PPC::X3, PPC::X4, PPC::X5, PPC::X6, + PPC::X7, PPC::X8, PPC::X9, PPC::X10 + }; + + const unsigned NumGPRs = isPPC64 ? array_lengthof(GPR_64) + : array_lengthof(GPR_32); + const MCPhysReg *GPR = isPPC64 ? GPR_64 : GPR_32; + unsigned GPR_idx = 0; + + SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass; + + if (isTailCall) + report_fatal_error("Handling of tail call is unimplemented!"); + int SPDiff = 0; + + for (unsigned i = 0; i != NumOps; ++i) { + SDValue Arg = OutVals[i]; + ISD::ArgFlagsTy Flags = Outs[i].Flags; + + // Promote integers if needed. + if (Arg.getValueType() == MVT::i1 || + (isPPC64 && Arg.getValueType() == MVT::i32)) { + unsigned ExtOp = Flags.isSExt() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND; + Arg = DAG.getNode(ExtOp, dl, PtrVT, Arg); + } + + // Note: "by value" is code for passing a structure by value, not + // basic types. + if (Flags.isByVal()) + report_fatal_error("Passing structure by value is unimplemented!"); + + switch (Arg.getSimpleValueType().SimpleTy) { + default: llvm_unreachable("Unexpected ValueType for argument!"); + case MVT::i1: + case MVT::i32: + case MVT::i64: + if (GPR_idx != NumGPRs) + RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Arg)); + else + report_fatal_error("Handling of placing parameters on the stack is " + "unimplemented!"); + break; + case MVT::f32: + case MVT::f64: + case MVT::v4f32: + case MVT::v4i32: + case MVT::v8i16: + case MVT::v16i8: + case MVT::v2f64: + case MVT::v2i64: + case MVT::v1i128: + case MVT::f128: + case MVT::v4f64: + case MVT::v4i1: + report_fatal_error("Handling of this parameter type is unimplemented!"); + } + } + + if (!isFunctionGlobalAddress(Callee) && + !isa<ExternalSymbolSDNode>(Callee)) + report_fatal_error("Handling of indirect call is unimplemented!"); + + // Build a sequence of copy-to-reg nodes chained together with token chain + // and flag operands which copy the outgoing args into the appropriate regs. + SDValue InFlag; + for (auto Reg : RegsToPass) { + Chain = DAG.getCopyToReg(Chain, dl, Reg.first, Reg.second, InFlag); + InFlag = Chain.getValue(1); + } + + return FinishCall(CallConv, dl, isTailCall, isVarArg, isPatchPoint, + /* unused except on PPC64 ELFv1 */ false, DAG, + RegsToPass, InFlag, Chain, CallSeqStart, Callee, SPDiff, + NumBytes, Ins, InVals, CS); +} + bool PPCTargetLowering::CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF, bool isVarArg, @@ -6644,11 +6831,11 @@ PPCTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, SmallVector<SDValue, 4> RetOps(1, Chain); // Copy the result values into the output registers. - for (unsigned i = 0; i != RVLocs.size(); ++i) { + for (unsigned i = 0, RealResIdx = 0; i != RVLocs.size(); ++i, ++RealResIdx) { CCValAssign &VA = RVLocs[i]; assert(VA.isRegLoc() && "Can only return in registers!"); - SDValue Arg = OutVals[i]; + SDValue Arg = OutVals[RealResIdx]; switch (VA.getLocInfo()) { default: llvm_unreachable("Unknown loc info!"); @@ -6663,8 +6850,21 @@ PPCTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg); break; } - - Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), Arg, Flag); + if (Subtarget.hasSPE() && VA.getLocVT() == MVT::f64) { + bool isLittleEndian = Subtarget.isLittleEndian(); + // Legalize ret f64 -> ret 2 x i32. + SDValue SVal = + DAG.getNode(PPCISD::EXTRACT_SPE, dl, MVT::i32, Arg, + DAG.getIntPtrConstant(isLittleEndian ? 0 : 1, dl)); + Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), SVal, Flag); + RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT())); + SVal = DAG.getNode(PPCISD::EXTRACT_SPE, dl, MVT::i32, Arg, + DAG.getIntPtrConstant(isLittleEndian ? 1 : 0, dl)); + Flag = Chain.getValue(1); + VA = RVLocs[++i]; // skip ahead to next loc + Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), SVal, Flag); + } else + Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), Arg, Flag); Flag = Chain.getValue(1); RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT())); } @@ -6890,6 +7090,61 @@ SDValue PPCTargetLowering::LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const { Op.getOperand(0)); } +SDValue PPCTargetLowering::LowerTRUNCATEVector(SDValue Op, + SelectionDAG &DAG) const { + + // Implements a vector truncate that fits in a vector register as a shuffle. + // We want to legalize vector truncates down to where the source fits in + // a vector register (and target is therefore smaller than vector register + // size). At that point legalization will try to custom lower the sub-legal + // result and get here - where we can contain the truncate as a single target + // operation. + + // For example a trunc <2 x i16> to <2 x i8> could be visualized as follows: + // <MSB1|LSB1, MSB2|LSB2> to <LSB1, LSB2> + // + // We will implement it for big-endian ordering as this (where x denotes + // undefined): + // < MSB1|LSB1, MSB2|LSB2, uu, uu, uu, uu, uu, uu> to + // < LSB1, LSB2, u, u, u, u, u, u, u, u, u, u, u, u, u, u> + // + // The same operation in little-endian ordering will be: + // <uu, uu, uu, uu, uu, uu, LSB2|MSB2, LSB1|MSB1> to + // <u, u, u, u, u, u, u, u, u, u, u, u, u, u, LSB2, LSB1> + + assert(Op.getValueType().isVector() && "Vector type expected."); + + SDLoc DL(Op); + SDValue N1 = Op.getOperand(0); + unsigned SrcSize = N1.getValueType().getSizeInBits(); + assert(SrcSize <= 128 && "Source must fit in an Altivec/VSX vector"); + SDValue WideSrc = SrcSize == 128 ? N1 : widenVec(DAG, N1, DL); + + EVT TrgVT = Op.getValueType(); + unsigned TrgNumElts = TrgVT.getVectorNumElements(); + EVT EltVT = TrgVT.getVectorElementType(); + unsigned WideNumElts = 128 / EltVT.getSizeInBits(); + EVT WideVT = EVT::getVectorVT(*DAG.getContext(), EltVT, WideNumElts); + + // First list the elements we want to keep. + unsigned SizeMult = SrcSize / TrgVT.getSizeInBits(); + SmallVector<int, 16> ShuffV; + if (Subtarget.isLittleEndian()) + for (unsigned i = 0; i < TrgNumElts; ++i) + ShuffV.push_back(i * SizeMult); + else + for (unsigned i = 1; i <= TrgNumElts; ++i) + ShuffV.push_back(i * SizeMult - 1); + + // Populate the remaining elements with undefs. + for (unsigned i = TrgNumElts; i < WideNumElts; ++i) + // ShuffV.push_back(i + WideNumElts); + ShuffV.push_back(WideNumElts + 1); + + SDValue Conv = DAG.getNode(ISD::BITCAST, DL, WideVT, WideSrc); + return DAG.getVectorShuffle(WideVT, DL, Conv, DAG.getUNDEF(WideVT), ShuffV); +} + /// LowerSELECT_CC - Lower floating point select_cc's into fsel instruction when /// possible. SDValue PPCTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const { @@ -9604,10 +9859,63 @@ SDValue PPCTargetLowering::LowerABS(SDValue Op, SelectionDAG &DAG) const { BifID = Intrinsic::ppc_altivec_vmaxsh; else if (VT == MVT::v16i8) BifID = Intrinsic::ppc_altivec_vmaxsb; - + return BuildIntrinsicOp(BifID, X, Y, DAG, dl, VT); } +// Custom lowering for fpext vf32 to v2f64 +SDValue PPCTargetLowering::LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const { + + assert(Op.getOpcode() == ISD::FP_EXTEND && + "Should only be called for ISD::FP_EXTEND"); + + // We only want to custom lower an extend from v2f32 to v2f64. + if (Op.getValueType() != MVT::v2f64 || + Op.getOperand(0).getValueType() != MVT::v2f32) + return SDValue(); + + SDLoc dl(Op); + SDValue Op0 = Op.getOperand(0); + + switch (Op0.getOpcode()) { + default: + return SDValue(); + case ISD::FADD: + case ISD::FMUL: + case ISD::FSUB: { + SDValue NewLoad[2]; + for (unsigned i = 0, ie = Op0.getNumOperands(); i != ie; ++i) { + // Ensure both input are loads. + SDValue LdOp = Op0.getOperand(i); + if (LdOp.getOpcode() != ISD::LOAD) + return SDValue(); + // Generate new load node. + LoadSDNode *LD = cast<LoadSDNode>(LdOp); + SDValue LoadOps[] = { LD->getChain(), LD->getBasePtr() }; + NewLoad[i] = + DAG.getMemIntrinsicNode(PPCISD::LD_VSX_LH, dl, + DAG.getVTList(MVT::v4f32, MVT::Other), + LoadOps, LD->getMemoryVT(), + LD->getMemOperand()); + } + SDValue NewOp = DAG.getNode(Op0.getOpcode(), SDLoc(Op0), MVT::v4f32, + NewLoad[0], NewLoad[1], + Op0.getNode()->getFlags()); + return DAG.getNode(PPCISD::FP_EXTEND_LH, dl, MVT::v2f64, NewOp); + } + case ISD::LOAD: { + LoadSDNode *LD = cast<LoadSDNode>(Op0); + SDValue LoadOps[] = { LD->getChain(), LD->getBasePtr() }; + SDValue NewLd = + DAG.getMemIntrinsicNode(PPCISD::LD_VSX_LH, dl, + DAG.getVTList(MVT::v4f32, MVT::Other), + LoadOps, LD->getMemoryVT(), LD->getMemOperand()); + return DAG.getNode(PPCISD::FP_EXTEND_LH, dl, MVT::v2f64, NewLd); + } + } + llvm_unreachable("ERROR:Should return for all cases within swtich."); +} + /// LowerOperation - Provide custom lowering hooks for some operations. /// SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { @@ -9661,6 +9969,7 @@ SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::INSERT_VECTOR_ELT: return LowerINSERT_VECTOR_ELT(Op, DAG); case ISD::MUL: return LowerMUL(Op, DAG); case ISD::ABS: return LowerABS(Op, DAG); + case ISD::FP_EXTEND: return LowerFP_EXTEND(Op, DAG); // For counter-based loop handling. case ISD::INTRINSIC_W_CHAIN: return SDValue(); @@ -9701,7 +10010,7 @@ void PPCTargetLowering::ReplaceNodeResults(SDNode *N, } case ISD::INTRINSIC_W_CHAIN: { if (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue() != - Intrinsic::ppc_is_decremented_ctr_nonzero) + Intrinsic::loop_decrement) break; assert(N->getValueType(0) == MVT::i1 && @@ -9737,6 +10046,14 @@ void PPCTargetLowering::ReplaceNodeResults(SDNode *N, return; Results.push_back(LowerFP_TO_INT(SDValue(N, 0), DAG, dl)); return; + case ISD::TRUNCATE: { + EVT TrgVT = N->getValueType(0); + if (TrgVT.isVector() && + isOperationCustom(N->getOpcode(), TrgVT) && + N->getOperand(0).getValueType().getSizeInBits() <= 128) + Results.push_back(LowerTRUNCATEVector(SDValue(N, 0), DAG)); + return; + } case ISD::BITCAST: // Don't handle bitcast here. return; @@ -9822,10 +10139,10 @@ PPCTargetLowering::EmitAtomicBinary(MachineInstr &MI, MachineBasicBlock *BB, MachineFunction *F = BB->getParent(); MachineFunction::iterator It = ++BB->getIterator(); - unsigned dest = MI.getOperand(0).getReg(); - unsigned ptrA = MI.getOperand(1).getReg(); - unsigned ptrB = MI.getOperand(2).getReg(); - unsigned incr = MI.getOperand(3).getReg(); + Register dest = MI.getOperand(0).getReg(); + Register ptrA = MI.getOperand(1).getReg(); + Register ptrB = MI.getOperand(2).getReg(); + Register incr = MI.getOperand(3).getReg(); DebugLoc dl = MI.getDebugLoc(); MachineBasicBlock *loopMBB = F->CreateMachineBasicBlock(LLVM_BB); @@ -9841,7 +10158,7 @@ PPCTargetLowering::EmitAtomicBinary(MachineInstr &MI, MachineBasicBlock *BB, exitMBB->transferSuccessorsAndUpdatePHIs(BB); MachineRegisterInfo &RegInfo = F->getRegInfo(); - unsigned TmpReg = (!BinOpcode) ? incr : + Register TmpReg = (!BinOpcode) ? incr : RegInfo.createVirtualRegister( AtomicSize == 8 ? &PPC::G8RCRegClass : &PPC::GPRCRegClass); @@ -9949,20 +10266,20 @@ MachineBasicBlock *PPCTargetLowering::EmitPartwordAtomicBinary( is64bit ? &PPC::G8RCRegClass : &PPC::GPRCRegClass; const TargetRegisterClass *GPRC = &PPC::GPRCRegClass; - unsigned PtrReg = RegInfo.createVirtualRegister(RC); - unsigned Shift1Reg = RegInfo.createVirtualRegister(GPRC); - unsigned ShiftReg = + Register PtrReg = RegInfo.createVirtualRegister(RC); + Register Shift1Reg = RegInfo.createVirtualRegister(GPRC); + Register ShiftReg = isLittleEndian ? Shift1Reg : RegInfo.createVirtualRegister(GPRC); - unsigned Incr2Reg = RegInfo.createVirtualRegister(GPRC); - unsigned MaskReg = RegInfo.createVirtualRegister(GPRC); - unsigned Mask2Reg = RegInfo.createVirtualRegister(GPRC); - unsigned Mask3Reg = RegInfo.createVirtualRegister(GPRC); - unsigned Tmp2Reg = RegInfo.createVirtualRegister(GPRC); - unsigned Tmp3Reg = RegInfo.createVirtualRegister(GPRC); - unsigned Tmp4Reg = RegInfo.createVirtualRegister(GPRC); - unsigned TmpDestReg = RegInfo.createVirtualRegister(GPRC); - unsigned Ptr1Reg; - unsigned TmpReg = + Register Incr2Reg = RegInfo.createVirtualRegister(GPRC); + Register MaskReg = RegInfo.createVirtualRegister(GPRC); + Register Mask2Reg = RegInfo.createVirtualRegister(GPRC); + Register Mask3Reg = RegInfo.createVirtualRegister(GPRC); + Register Tmp2Reg = RegInfo.createVirtualRegister(GPRC); + Register Tmp3Reg = RegInfo.createVirtualRegister(GPRC); + Register Tmp4Reg = RegInfo.createVirtualRegister(GPRC); + Register TmpDestReg = RegInfo.createVirtualRegister(GPRC); + Register Ptr1Reg; + Register TmpReg = (!BinOpcode) ? Incr2Reg : RegInfo.createVirtualRegister(GPRC); // thisMBB: @@ -10764,23 +11081,23 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI, is64bit ? &PPC::G8RCRegClass : &PPC::GPRCRegClass; const TargetRegisterClass *GPRC = &PPC::GPRCRegClass; - unsigned PtrReg = RegInfo.createVirtualRegister(RC); - unsigned Shift1Reg = RegInfo.createVirtualRegister(GPRC); - unsigned ShiftReg = + Register PtrReg = RegInfo.createVirtualRegister(RC); + Register Shift1Reg = RegInfo.createVirtualRegister(GPRC); + Register ShiftReg = isLittleEndian ? Shift1Reg : RegInfo.createVirtualRegister(GPRC); - unsigned NewVal2Reg = RegInfo.createVirtualRegister(GPRC); - unsigned NewVal3Reg = RegInfo.createVirtualRegister(GPRC); - unsigned OldVal2Reg = RegInfo.createVirtualRegister(GPRC); - unsigned OldVal3Reg = RegInfo.createVirtualRegister(GPRC); - unsigned MaskReg = RegInfo.createVirtualRegister(GPRC); - unsigned Mask2Reg = RegInfo.createVirtualRegister(GPRC); - unsigned Mask3Reg = RegInfo.createVirtualRegister(GPRC); - unsigned Tmp2Reg = RegInfo.createVirtualRegister(GPRC); - unsigned Tmp4Reg = RegInfo.createVirtualRegister(GPRC); - unsigned TmpDestReg = RegInfo.createVirtualRegister(GPRC); - unsigned Ptr1Reg; - unsigned TmpReg = RegInfo.createVirtualRegister(GPRC); - unsigned ZeroReg = is64bit ? PPC::ZERO8 : PPC::ZERO; + Register NewVal2Reg = RegInfo.createVirtualRegister(GPRC); + Register NewVal3Reg = RegInfo.createVirtualRegister(GPRC); + Register OldVal2Reg = RegInfo.createVirtualRegister(GPRC); + Register OldVal3Reg = RegInfo.createVirtualRegister(GPRC); + Register MaskReg = RegInfo.createVirtualRegister(GPRC); + Register Mask2Reg = RegInfo.createVirtualRegister(GPRC); + Register Mask3Reg = RegInfo.createVirtualRegister(GPRC); + Register Tmp2Reg = RegInfo.createVirtualRegister(GPRC); + Register Tmp4Reg = RegInfo.createVirtualRegister(GPRC); + Register TmpDestReg = RegInfo.createVirtualRegister(GPRC); + Register Ptr1Reg; + Register TmpReg = RegInfo.createVirtualRegister(GPRC); + Register ZeroReg = is64bit ? PPC::ZERO8 : PPC::ZERO; // thisMBB: // ... // fallthrough --> loopMBB @@ -10968,7 +11285,147 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI, MachineRegisterInfo &RegInfo = F->getRegInfo(); unsigned CRReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass); BuildMI(*BB, MI, Dl, TII->get(PPC::TCHECK), CRReg); - return BB; + BuildMI(*BB, MI, Dl, TII->get(TargetOpcode::COPY), + MI.getOperand(0).getReg()) + .addReg(CRReg); + } else if (MI.getOpcode() == PPC::TBEGIN_RET) { + DebugLoc Dl = MI.getDebugLoc(); + unsigned Imm = MI.getOperand(1).getImm(); + BuildMI(*BB, MI, Dl, TII->get(PPC::TBEGIN)).addImm(Imm); + BuildMI(*BB, MI, Dl, TII->get(TargetOpcode::COPY), + MI.getOperand(0).getReg()) + .addReg(PPC::CR0EQ); + } else if (MI.getOpcode() == PPC::SETRNDi) { + DebugLoc dl = MI.getDebugLoc(); + unsigned OldFPSCRReg = MI.getOperand(0).getReg(); + + // Save FPSCR value. + BuildMI(*BB, MI, dl, TII->get(PPC::MFFS), OldFPSCRReg); + + // The floating point rounding mode is in the bits 62:63 of FPCSR, and has + // the following settings: + // 00 Round to nearest + // 01 Round to 0 + // 10 Round to +inf + // 11 Round to -inf + + // When the operand is immediate, using the two least significant bits of + // the immediate to set the bits 62:63 of FPSCR. + unsigned Mode = MI.getOperand(1).getImm(); + BuildMI(*BB, MI, dl, TII->get((Mode & 1) ? PPC::MTFSB1 : PPC::MTFSB0)) + .addImm(31); + + BuildMI(*BB, MI, dl, TII->get((Mode & 2) ? PPC::MTFSB1 : PPC::MTFSB0)) + .addImm(30); + } else if (MI.getOpcode() == PPC::SETRND) { + DebugLoc dl = MI.getDebugLoc(); + + // Copy register from F8RCRegClass::SrcReg to G8RCRegClass::DestReg + // or copy register from G8RCRegClass::SrcReg to F8RCRegClass::DestReg. + // If the target doesn't have DirectMove, we should use stack to do the + // conversion, because the target doesn't have the instructions like mtvsrd + // or mfvsrd to do this conversion directly. + auto copyRegFromG8RCOrF8RC = [&] (unsigned DestReg, unsigned SrcReg) { + if (Subtarget.hasDirectMove()) { + BuildMI(*BB, MI, dl, TII->get(TargetOpcode::COPY), DestReg) + .addReg(SrcReg); + } else { + // Use stack to do the register copy. + unsigned StoreOp = PPC::STD, LoadOp = PPC::LFD; + MachineRegisterInfo &RegInfo = F->getRegInfo(); + const TargetRegisterClass *RC = RegInfo.getRegClass(SrcReg); + if (RC == &PPC::F8RCRegClass) { + // Copy register from F8RCRegClass to G8RCRegclass. + assert((RegInfo.getRegClass(DestReg) == &PPC::G8RCRegClass) && + "Unsupported RegClass."); + + StoreOp = PPC::STFD; + LoadOp = PPC::LD; + } else { + // Copy register from G8RCRegClass to F8RCRegclass. + assert((RegInfo.getRegClass(SrcReg) == &PPC::G8RCRegClass) && + (RegInfo.getRegClass(DestReg) == &PPC::F8RCRegClass) && + "Unsupported RegClass."); + } + + MachineFrameInfo &MFI = F->getFrameInfo(); + int FrameIdx = MFI.CreateStackObject(8, 8, false); + + MachineMemOperand *MMOStore = F->getMachineMemOperand( + MachinePointerInfo::getFixedStack(*F, FrameIdx, 0), + MachineMemOperand::MOStore, MFI.getObjectSize(FrameIdx), + MFI.getObjectAlignment(FrameIdx)); + + // Store the SrcReg into the stack. + BuildMI(*BB, MI, dl, TII->get(StoreOp)) + .addReg(SrcReg) + .addImm(0) + .addFrameIndex(FrameIdx) + .addMemOperand(MMOStore); + + MachineMemOperand *MMOLoad = F->getMachineMemOperand( + MachinePointerInfo::getFixedStack(*F, FrameIdx, 0), + MachineMemOperand::MOLoad, MFI.getObjectSize(FrameIdx), + MFI.getObjectAlignment(FrameIdx)); + + // Load from the stack where SrcReg is stored, and save to DestReg, + // so we have done the RegClass conversion from RegClass::SrcReg to + // RegClass::DestReg. + BuildMI(*BB, MI, dl, TII->get(LoadOp), DestReg) + .addImm(0) + .addFrameIndex(FrameIdx) + .addMemOperand(MMOLoad); + } + }; + + unsigned OldFPSCRReg = MI.getOperand(0).getReg(); + + // Save FPSCR value. + BuildMI(*BB, MI, dl, TII->get(PPC::MFFS), OldFPSCRReg); + + // When the operand is gprc register, use two least significant bits of the + // register and mtfsf instruction to set the bits 62:63 of FPSCR. + // + // copy OldFPSCRTmpReg, OldFPSCRReg + // (INSERT_SUBREG ExtSrcReg, (IMPLICIT_DEF ImDefReg), SrcOp, 1) + // rldimi NewFPSCRTmpReg, ExtSrcReg, OldFPSCRReg, 0, 62 + // copy NewFPSCRReg, NewFPSCRTmpReg + // mtfsf 255, NewFPSCRReg + MachineOperand SrcOp = MI.getOperand(1); + MachineRegisterInfo &RegInfo = F->getRegInfo(); + unsigned OldFPSCRTmpReg = RegInfo.createVirtualRegister(&PPC::G8RCRegClass); + + copyRegFromG8RCOrF8RC(OldFPSCRTmpReg, OldFPSCRReg); + + unsigned ImDefReg = RegInfo.createVirtualRegister(&PPC::G8RCRegClass); + unsigned ExtSrcReg = RegInfo.createVirtualRegister(&PPC::G8RCRegClass); + + // The first operand of INSERT_SUBREG should be a register which has + // subregisters, we only care about its RegClass, so we should use an + // IMPLICIT_DEF register. + BuildMI(*BB, MI, dl, TII->get(TargetOpcode::IMPLICIT_DEF), ImDefReg); + BuildMI(*BB, MI, dl, TII->get(PPC::INSERT_SUBREG), ExtSrcReg) + .addReg(ImDefReg) + .add(SrcOp) + .addImm(1); + + unsigned NewFPSCRTmpReg = RegInfo.createVirtualRegister(&PPC::G8RCRegClass); + BuildMI(*BB, MI, dl, TII->get(PPC::RLDIMI), NewFPSCRTmpReg) + .addReg(OldFPSCRTmpReg) + .addReg(ExtSrcReg) + .addImm(0) + .addImm(62); + + unsigned NewFPSCRReg = RegInfo.createVirtualRegister(&PPC::F8RCRegClass); + copyRegFromG8RCOrF8RC(NewFPSCRReg, NewFPSCRTmpReg); + + // The mask 255 means that put the 32:63 bits of NewFPSCRReg to the 32:63 + // bits of FPSCR. + BuildMI(*BB, MI, dl, TII->get(PPC::MTFSF)) + .addImm(255) + .addReg(NewFPSCRReg) + .addImm(0) + .addImm(0); } else { llvm_unreachable("Unexpected instr type to insert"); } @@ -11006,7 +11463,9 @@ SDValue PPCTargetLowering::getSqrtEstimate(SDValue Operand, SelectionDAG &DAG, if (RefinementSteps == ReciprocalEstimate::Unspecified) RefinementSteps = getEstimateRefinementSteps(VT, Subtarget); - UseOneConstNR = true; + // The Newton-Raphson computation with a single constant does not provide + // enough accuracy on some CPUs. + UseOneConstNR = !Subtarget.needsTwoConstNR(); return DAG.getNode(PPCISD::FRSQRTE, SDLoc(Operand), VT, Operand); } return SDValue(); @@ -12062,9 +12521,14 @@ static SDValue combineBVOfConsecutiveLoads(SDNode *N, SelectionDAG &DAG) { "Should be called with a BUILD_VECTOR node"); SDLoc dl(N); + + // Return early for non byte-sized type, as they can't be consecutive. + if (!N->getValueType(0).getVectorElementType().isByteSized()) + return SDValue(); + bool InputsAreConsecutiveLoads = true; bool InputsAreReverseConsecutive = true; - unsigned ElemSize = N->getValueType(0).getScalarSizeInBits() / 8; + unsigned ElemSize = N->getValueType(0).getScalarType().getStoreSize(); SDValue FirstInput = N->getOperand(0); bool IsRoundOfExtLoad = false; @@ -12332,9 +12796,8 @@ SDValue PPCTargetLowering::DAGCombineBuildVector(SDNode *N, ConstantSDNode *Ext2Op = dyn_cast<ConstantSDNode>(Ext2.getOperand(1)); if (!Ext1Op || !Ext2Op) return SDValue(); - if (Ext1.getValueType() != MVT::i32 || - Ext2.getValueType() != MVT::i32) - if (Ext1.getOperand(0) != Ext2.getOperand(0)) + if (Ext1.getOperand(0).getValueType() != MVT::v4i32 || + Ext1.getOperand(0) != Ext2.getOperand(0)) return SDValue(); int FirstElem = Ext1Op->getZExtValue(); @@ -12664,6 +13127,8 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N, return combineSRA(N, DCI); case ISD::SRL: return combineSRL(N, DCI); + case ISD::MUL: + return combineMUL(N, DCI); case PPCISD::SHL: if (isNullConstant(N->getOperand(0))) // 0 << V -> 0. return N->getOperand(0); @@ -13246,7 +13711,7 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N, if (Cond.getOpcode() == ISD::INTRINSIC_W_CHAIN && cast<ConstantSDNode>(Cond.getOperand(1))->getZExtValue() == - Intrinsic::ppc_is_decremented_ctr_nonzero) { + Intrinsic::loop_decrement) { // We now need to make the intrinsic dead (it cannot be instruction // selected). @@ -13272,14 +13737,14 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N, if (LHS.getOpcode() == ISD::AND && LHS.getOperand(0).getOpcode() == ISD::INTRINSIC_W_CHAIN && cast<ConstantSDNode>(LHS.getOperand(0).getOperand(1))->getZExtValue() == - Intrinsic::ppc_is_decremented_ctr_nonzero && + Intrinsic::loop_decrement && isa<ConstantSDNode>(LHS.getOperand(1)) && !isNullConstant(LHS.getOperand(1))) LHS = LHS.getOperand(0); if (LHS.getOpcode() == ISD::INTRINSIC_W_CHAIN && cast<ConstantSDNode>(LHS.getOperand(1))->getZExtValue() == - Intrinsic::ppc_is_decremented_ctr_nonzero && + Intrinsic::loop_decrement && isa<ConstantSDNode>(RHS)) { assert((CC == ISD::SETEQ || CC == ISD::SETNE) && "Counter decrement comparison is not EQ or NE"); @@ -13355,9 +13820,9 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N, } case ISD::BUILD_VECTOR: return DAGCombineBuildVector(N, DCI); - case ISD::ABS: + case ISD::ABS: return combineABS(N, DCI); - case ISD::VSELECT: + case ISD::VSELECT: return combineVSelect(N, DCI); } @@ -13453,6 +13918,15 @@ unsigned PPCTargetLowering::getPrefLoopAlignment(MachineLoop *ML) const { if (!ML) break; + if (!DisableInnermostLoopAlign32) { + // If the nested loop is an innermost loop, prefer to a 32-byte alignment, + // so that we can decrease cache misses and branch-prediction misses. + // Actual alignment of the loop will depend on the hotness check and other + // logic in alignBlocks. + if (ML->getLoopDepth() > 1 && ML->getSubLoops().empty()) + return 5; + } + const PPCInstrInfo *TII = Subtarget.getInstrInfo(); // For small loops (between 5 and 8 instructions), align to a 32-byte @@ -13502,7 +13976,7 @@ PPCTargetLowering::getConstraintType(StringRef Constraint) const { return C_RegisterClass; } else if (Constraint == "wa" || Constraint == "wd" || Constraint == "wf" || Constraint == "ws" || - Constraint == "wi") { + Constraint == "wi" || Constraint == "ww") { return C_RegisterClass; // VSX registers. } return TargetLowering::getConstraintType(Constraint); @@ -13530,10 +14004,12 @@ PPCTargetLowering::getSingleConstraintMatchWeight( StringRef(constraint) == "wf") && type->isVectorTy()) return CW_Register; - else if (StringRef(constraint) == "ws" && type->isDoubleTy()) - return CW_Register; else if (StringRef(constraint) == "wi" && type->isIntegerTy(64)) return CW_Register; // just hold 64-bit integers data. + else if (StringRef(constraint) == "ws" && type->isDoubleTy()) + return CW_Register; + else if (StringRef(constraint) == "ww" && type->isFloatTy()) + return CW_Register; switch (*constraint) { default: @@ -13619,7 +14095,7 @@ PPCTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, Constraint == "wf" || Constraint == "wi") && Subtarget.hasVSX()) { return std::make_pair(0U, &PPC::VSRCRegClass); - } else if (Constraint == "ws" && Subtarget.hasVSX()) { + } else if ((Constraint == "ws" || Constraint == "ww") && Subtarget.hasVSX()) { if (VT == MVT::f32 && Subtarget.hasP8Vector()) return std::make_pair(0U, &PPC::VSSRCRegClass); else @@ -13865,7 +14341,7 @@ bool PPCTargetLowering::isAccessedAsGotIndirect(SDValue GA) const { if (CModel == CodeModel::Small || CModel == CodeModel::Large) return true; - // JumpTable and BlockAddress are accessed as got-indirect. + // JumpTable and BlockAddress are accessed as got-indirect. if (isa<JumpTableSDNode>(GA) || isa<BlockAddressSDNode>(GA)) return true; @@ -14082,18 +14558,16 @@ bool PPCTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, /// source is constant so it does not need to be loaded. /// It returns EVT::Other if the type should be determined using generic /// target-independent logic. -EVT PPCTargetLowering::getOptimalMemOpType(uint64_t Size, - unsigned DstAlign, unsigned SrcAlign, - bool IsMemset, bool ZeroMemset, - bool MemcpyStrSrc, - MachineFunction &MF) const { +EVT PPCTargetLowering::getOptimalMemOpType( + uint64_t Size, unsigned DstAlign, unsigned SrcAlign, bool IsMemset, + bool ZeroMemset, bool MemcpyStrSrc, + const AttributeList &FuncAttributes) const { if (getTargetMachine().getOptLevel() != CodeGenOpt::None) { - const Function &F = MF.getFunction(); // When expanding a memset, require at least two QPX instructions to cover // the cost of loading the value to be stored from the constant pool. if (Subtarget.hasQPX() && Size >= 32 && (!IsMemset || Size >= 64) && (!SrcAlign || SrcAlign >= 32) && (!DstAlign || DstAlign >= 32) && - !F.hasFnAttribute(Attribute::NoImplicitFloat)) { + !FuncAttributes.hasFnAttribute(Attribute::NoImplicitFloat)) { return MVT::v4f64; } @@ -14178,6 +14652,7 @@ bool PPCTargetLowering::isLegalAddImmediate(int64_t Imm) const { bool PPCTargetLowering::allowsMisalignedMemoryAccesses(EVT VT, unsigned, unsigned, + MachineMemOperand::Flags, bool *Fast) const { if (DisablePPCUnaligned) return false; @@ -14324,7 +14799,7 @@ void PPCTargetLowering::insertCopiesSplitCSR( BuildMI(*Entry, MBBI, DebugLoc(), TII->get(TargetOpcode::COPY), NewVR) .addReg(*I); - // Insert the copy-back instructions right before the terminator + // Insert the copy-back instructions right before the terminator. for (auto *Exit : Exits) BuildMI(*Exit, Exit->getFirstTerminator(), DebugLoc(), TII->get(TargetOpcode::COPY), *I) @@ -14345,7 +14820,8 @@ void PPCTargetLowering::insertSSPDeclarations(Module &M) const { return TargetLowering::insertSSPDeclarations(M); } -bool PPCTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const { +bool PPCTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT, + bool ForCodeSize) const { if (!VT.isSimple() || !Subtarget.hasVSX()) return false; @@ -14585,6 +15061,89 @@ SDValue PPCTargetLowering::combineTRUNCATE(SDNode *N, return SDValue(); } +SDValue PPCTargetLowering::combineMUL(SDNode *N, DAGCombinerInfo &DCI) const { + SelectionDAG &DAG = DCI.DAG; + + ConstantSDNode *ConstOpOrElement = isConstOrConstSplat(N->getOperand(1)); + if (!ConstOpOrElement) + return SDValue(); + + // An imul is usually smaller than the alternative sequence for legal type. + if (DAG.getMachineFunction().getFunction().hasMinSize() && + isOperationLegal(ISD::MUL, N->getValueType(0))) + return SDValue(); + + auto IsProfitable = [this](bool IsNeg, bool IsAddOne, EVT VT) -> bool { + switch (this->Subtarget.getDarwinDirective()) { + default: + // TODO: enhance the condition for subtarget before pwr8 + return false; + case PPC::DIR_PWR8: + // type mul add shl + // scalar 4 1 1 + // vector 7 2 2 + return true; + case PPC::DIR_PWR9: + // type mul add shl + // scalar 5 2 2 + // vector 7 2 2 + + // The cycle RATIO of related operations are showed as a table above. + // Because mul is 5(scalar)/7(vector), add/sub/shl are all 2 for both + // scalar and vector type. For 2 instrs patterns, add/sub + shl + // are 4, it is always profitable; but for 3 instrs patterns + // (mul x, -(2^N + 1)) => -(add (shl x, N), x), sub + add + shl are 6. + // So we should only do it for vector type. + return IsAddOne && IsNeg ? VT.isVector() : true; + } + }; + + EVT VT = N->getValueType(0); + SDLoc DL(N); + + const APInt &MulAmt = ConstOpOrElement->getAPIntValue(); + bool IsNeg = MulAmt.isNegative(); + APInt MulAmtAbs = MulAmt.abs(); + + if ((MulAmtAbs - 1).isPowerOf2()) { + // (mul x, 2^N + 1) => (add (shl x, N), x) + // (mul x, -(2^N + 1)) => -(add (shl x, N), x) + + if (!IsProfitable(IsNeg, true, VT)) + return SDValue(); + + SDValue Op0 = N->getOperand(0); + SDValue Op1 = + DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0), + DAG.getConstant((MulAmtAbs - 1).logBase2(), DL, VT)); + SDValue Res = DAG.getNode(ISD::ADD, DL, VT, Op0, Op1); + + if (!IsNeg) + return Res; + + return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), Res); + } else if ((MulAmtAbs + 1).isPowerOf2()) { + // (mul x, 2^N - 1) => (sub (shl x, N), x) + // (mul x, -(2^N - 1)) => (sub x, (shl x, N)) + + if (!IsProfitable(IsNeg, false, VT)) + return SDValue(); + + SDValue Op0 = N->getOperand(0); + SDValue Op1 = + DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0), + DAG.getConstant((MulAmtAbs + 1).logBase2(), DL, VT)); + + if (!IsNeg) + return DAG.getNode(ISD::SUB, DL, VT, Op1, Op0); + else + return DAG.getNode(ISD::SUB, DL, VT, Op0, Op1); + + } else { + return SDValue(); + } +} + bool PPCTargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const { // Only duplicate to increase tail-calls for the 64bit SysV ABIs. if (!Subtarget.isSVR4ABI() || !Subtarget.isPPC64()) diff --git a/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.h b/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.h index 30acd60eba6f..97422c6eda36 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.h +++ b/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.h @@ -1,9 +1,8 @@ //===-- PPCISelLowering.h - PPC32 DAG Lowering Interface --------*- C++ -*-===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -15,7 +14,6 @@ #ifndef LLVM_LIB_TARGET_POWERPC_PPCISELLOWERING_H #define LLVM_LIB_TARGET_POWERPC_PPCISELLOWERING_H -#include "PPC.h" #include "PPCInstrInfo.h" #include "llvm/CodeGen/CallingConvLower.h" #include "llvm/CodeGen/MachineFunction.h" @@ -41,7 +39,7 @@ namespace llvm { // the enum. The order of elements in this enum matters! // Values that are added after this entry: // STBRX = ISD::FIRST_TARGET_MEMORY_OPCODE - // are considerd memory opcodes and are treated differently than entries + // are considered memory opcodes and are treated differently than entries // that come before it. For example, ADD or MUL should be placed before // the ISD::FIRST_TARGET_MEMORY_OPCODE while a LOAD or STORE should come // after it. @@ -161,7 +159,7 @@ namespace llvm { /// CALL - A direct function call. /// CALL_NOP is a call with the special NOP which follows 64-bit - /// SVR4 calls. + /// SVR4 calls and 32-bit/64-bit AIX calls. CALL, CALL_NOP, /// CHAIN,FLAG = MTCTR(VAL, CHAIN[, INFLAG]) - Directly corresponds to a @@ -193,9 +191,18 @@ namespace llvm { /// Direct move from a GPR to a VSX register (zero) MTVSRZ, - /// Direct move of 2 consective GPR to a VSX register. + /// Direct move of 2 consecutive GPR to a VSX register. BUILD_FP128, + /// BUILD_SPE64 and EXTRACT_SPE are analogous to BUILD_PAIR and + /// EXTRACT_ELEMENT but take f64 arguments instead of i64, as i64 is + /// unsupported for this target. + /// Merge 2 GPRs to a single SPE register. + BUILD_SPE64, + + /// Extract SPE register component, second argument is high or low. + EXTRACT_SPE, + /// Extract a subvector from signed integer vector and convert to FP. /// It is primarily used to convert a (widened) illegal integer vector /// type to a legal floating point vector type. @@ -265,11 +272,11 @@ namespace llvm { CR6UNSET, /// GPRC = address of _GLOBAL_OFFSET_TABLE_. Used by initial-exec TLS - /// on PPC32. + /// for non-position independent code on PPC32. PPC32_GOT, /// GPRC = address of _GLOBAL_OFFSET_TABLE_. Used by general dynamic and - /// local dynamic TLS on PPC32. + /// local dynamic TLS and position indendepent code on PPC32. PPC32_PICGOT, /// G8RC = ADDIS_GOT_TPREL_HA %x2, Symbol - Used by the initial-exec @@ -405,6 +412,9 @@ namespace llvm { /// representation. QBFLT, + /// Custom extend v4f32 to v2f64. + FP_EXTEND_LH, + /// CHAIN = STBRX CHAIN, GPRC, Ptr, Type - This is a /// byte-swapping store instruction. It byte-swaps the low "Type" bits of /// the GPRC input, then stores it through Ptr. Type can be either i16 or @@ -446,6 +456,10 @@ namespace llvm { /// an xxswapd. LXVD2X, + /// VSRC, CHAIN = LD_VSX_LH CHAIN, Ptr - This is a floating-point load of a + /// v2f32 value into the lower half of a VSR register. + LD_VSX_LH, + /// CHAIN = STXVD2X CHAIN, VSRC, Ptr - Occurs only for little endian. /// Maps directly to an stxvd2x instruction that will be preceded by /// an xxswapd. @@ -620,6 +634,8 @@ namespace llvm { return true; } + bool preferIncOfAddToSubOfNot(EVT VT) const override; + bool convertSetCCLogicToBitwiseLogic(EVT VT) const override { return VT.isScalarInteger(); } @@ -653,18 +669,27 @@ namespace llvm { ISD::MemIndexedMode &AM, SelectionDAG &DAG) const override; + /// SelectAddressEVXRegReg - Given the specified addressed, check to see if + /// it can be more efficiently represented as [r+imm]. + bool SelectAddressEVXRegReg(SDValue N, SDValue &Base, SDValue &Index, + SelectionDAG &DAG) const; + /// SelectAddressRegReg - Given the specified addressed, check to see if it - /// can be represented as an indexed [r+r] operation. Returns false if it - /// can be more efficiently represented with [r+imm]. + /// can be more efficiently represented as [r+imm]. If \p EncodingAlignment + /// is non-zero, only accept displacement which is not suitable for [r+imm]. + /// Returns false if it can be represented by [r+imm], which are preferred. bool SelectAddressRegReg(SDValue N, SDValue &Base, SDValue &Index, - SelectionDAG &DAG) const; + SelectionDAG &DAG, + unsigned EncodingAlignment = 0) const; /// SelectAddressRegImm - Returns true if the address N can be represented /// by a base register plus a signed 16-bit displacement [r+imm], and if it - /// is not better represented as reg+reg. If Aligned is true, only accept - /// displacements suitable for STD and friends, i.e. multiples of 4. + /// is not better represented as reg+reg. If \p EncodingAlignment is + /// non-zero, only accept displacements suitable for instruction encoding + /// requirement, i.e. multiples of 4 for DS form. bool SelectAddressRegImm(SDValue N, SDValue &Disp, SDValue &Base, - SelectionDAG &DAG, unsigned Alignment) const; + SelectionDAG &DAG, + unsigned EncodingAlignment) const; /// SelectAddressRegRegOnly - Given the specified addressed, force it to be /// represented as an indexed [r+r] operation. @@ -833,14 +858,14 @@ namespace llvm { EVT getOptimalMemOpType(uint64_t Size, unsigned DstAlign, unsigned SrcAlign, bool IsMemset, bool ZeroMemset, bool MemcpyStrSrc, - MachineFunction &MF) const override; + const AttributeList &FuncAttributes) const override; /// Is unaligned memory access allowed for the given type, and is it fast /// relative to software emulation. - bool allowsMisalignedMemoryAccesses(EVT VT, - unsigned AddrSpace, - unsigned Align = 1, - bool *Fast = nullptr) const override; + bool allowsMisalignedMemoryAccesses( + EVT VT, unsigned AddrSpace, unsigned Align = 1, + MachineMemOperand::Flags Flags = MachineMemOperand::MONone, + bool *Fast = nullptr) const override; /// isFMAFasterThanFMulAndFAdd - Return true if an FMA operation is faster /// than a pair of fmul and fadd instructions. fmuladd intrinsics will be @@ -888,7 +913,8 @@ namespace llvm { bool useLoadStackGuardNode() const override; void insertSSPDeclarations(Module &M) const override; - bool isFPImmLegal(const APFloat &Imm, EVT VT) const override; + bool isFPImmLegal(const APFloat &Imm, EVT VT, + bool ForCodeSize) const override; unsigned getJumpTableEncoding() const override; bool isJumpTableRelative() const override; @@ -898,14 +924,6 @@ namespace llvm { unsigned JTI, MCContext &Ctx) const override; - unsigned getNumRegistersForCallingConv(LLVMContext &Context, - CallingConv:: ID CC, - EVT VT) const override; - - MVT getRegisterTypeForCallingConv(LLVMContext &Context, - CallingConv:: ID CC, - EVT VT) const override; - private: struct ReuseLoadInfo { SDValue Ptr; @@ -953,6 +971,8 @@ namespace llvm { SDValue LowerINT_TO_FPVector(SDValue Op, SelectionDAG &DAG, const SDLoc &dl) const; + SDValue LowerTRUNCATEVector(SDValue Op, SelectionDAG &DAG) const; + SDValue getFramePointerFrameIndex(SelectionDAG & DAG) const; SDValue getReturnAddrFrameIndex(SelectionDAG & DAG) const; @@ -1019,6 +1039,7 @@ namespace llvm { SDValue LowerSIGN_EXTEND_INREG(SDValue Op, SelectionDAG &DAG) const; SDValue LowerMUL(SDValue Op, SelectionDAG &DAG) const; SDValue LowerABS(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const; SDValue LowerVectorLoad(SDValue Op, SelectionDAG &DAG) const; SDValue LowerVectorStore(SDValue Op, SelectionDAG &DAG) const; @@ -1106,6 +1127,15 @@ namespace llvm { const SDLoc &dl, SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals, ImmutableCallSite CS) const; + SDValue LowerCall_AIX(SDValue Chain, SDValue Callee, + CallingConv::ID CallConv, bool isVarArg, + bool isTailCall, bool isPatchPoint, + const SmallVectorImpl<ISD::OutputArg> &Outs, + const SmallVectorImpl<SDValue> &OutVals, + const SmallVectorImpl<ISD::InputArg> &Ins, + const SDLoc &dl, SelectionDAG &DAG, + SmallVectorImpl<SDValue> &InVals, + ImmutableCallSite CS) const; SDValue lowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG) const; SDValue lowerEH_SJLJ_LONGJMP(SDValue Op, SelectionDAG &DAG) const; @@ -1119,6 +1149,7 @@ namespace llvm { SDValue combineSHL(SDNode *N, DAGCombinerInfo &DCI) const; SDValue combineSRA(SDNode *N, DAGCombinerInfo &DCI) const; SDValue combineSRL(SDNode *N, DAGCombinerInfo &DCI) const; + SDValue combineMUL(SDNode *N, DAGCombinerInfo &DCI) const; SDValue combineADD(SDNode *N, DAGCombinerInfo &DCI) const; SDValue combineTRUNCATE(SDNode *N, DAGCombinerInfo &DCI) const; SDValue combineSetCC(SDNode *N, DAGCombinerInfo &DCI) const; @@ -1137,8 +1168,6 @@ namespace llvm { int &RefinementSteps) const override; unsigned combineRepeatedFPDivisors() const override; - CCAssignFn *useFastISelCCs(unsigned Flag) const; - SDValue combineElementTruncationToVectorTruncation(SDNode *N, DAGCombinerInfo &DCI) const; @@ -1169,30 +1198,6 @@ namespace llvm { } // end namespace PPC - bool CC_PPC32_SVR4_Custom_Dummy(unsigned &ValNo, MVT &ValVT, MVT &LocVT, - CCValAssign::LocInfo &LocInfo, - ISD::ArgFlagsTy &ArgFlags, - CCState &State); - - bool CC_PPC32_SVR4_Custom_AlignArgRegs(unsigned &ValNo, MVT &ValVT, - MVT &LocVT, - CCValAssign::LocInfo &LocInfo, - ISD::ArgFlagsTy &ArgFlags, - CCState &State); - - bool - CC_PPC32_SVR4_Custom_SkipLastArgRegsPPCF128(unsigned &ValNo, MVT &ValVT, - MVT &LocVT, - CCValAssign::LocInfo &LocInfo, - ISD::ArgFlagsTy &ArgFlags, - CCState &State); - - bool CC_PPC32_SVR4_Custom_AlignFPArgRegs(unsigned &ValNo, MVT &ValVT, - MVT &LocVT, - CCValAssign::LocInfo &LocInfo, - ISD::ArgFlagsTy &ArgFlags, - CCState &State); - bool isIntS16Immediate(SDNode *N, int16_t &Imm); bool isIntS16Immediate(SDValue Op, int16_t &Imm); diff --git a/contrib/llvm/lib/Target/PowerPC/PPCInstr64Bit.td b/contrib/llvm/lib/Target/PowerPC/PPCInstr64Bit.td index 2ce6ad3293eb..d598567f8e4e 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCInstr64Bit.td +++ b/contrib/llvm/lib/Target/PowerPC/PPCInstr64Bit.td @@ -1,9 +1,8 @@ //===-- PPCInstr64Bit.td - The PowerPC 64-bit Support ------*- tablegen -*-===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -168,7 +167,7 @@ let isCall = 1, PPC970_Unit = 7, isCodeGenOnly = 1, XLForm_2_ext_and_DSForm_1<19, 528, 20, 0, 1, 58, 0, (outs), (ins memrix:$src), "bctrl\n\tld 2, $src", IIC_BrB, - [(PPCbctrl_load_toc ixaddr:$src)]>, + [(PPCbctrl_load_toc iaddrX4:$src)]>, Requires<[In64BitMode]>; } @@ -193,6 +192,12 @@ def : Pat<(PPCcall (i64 texternalsym:$dst)), def : Pat<(PPCcall_nop (i64 texternalsym:$dst)), (BL8_NOP texternalsym:$dst)>; +// Calls for AIX +def : Pat<(PPCcall (i64 mcsym:$dst)), + (BL8 mcsym:$dst)>; +def : Pat<(PPCcall_nop (i64 mcsym:$dst)), + (BL8_NOP mcsym:$dst)>; + // Atomic operations // FIXME: some of these might be used with constant operands. This will result // in constant materialization instructions that may be redundant. We currently @@ -383,7 +388,7 @@ def MTCTR8 : XFXForm_7_ext<31, 467, 9, (outs), (ins g8rc:$rS), PPC970_DGroup_First, PPC970_Unit_FXU; } let hasSideEffects = 1, Defs = [CTR8] in { -let Pattern = [(int_ppc_mtctr i64:$rS)] in +let Pattern = [(int_set_loop_iterations i64:$rS)] in def MTCTR8loop : XFXForm_7_ext<31, 467, 9, (outs), (ins g8rc:$rS), "mtctr $rS", IIC_SprMTSPR>, PPC970_DGroup_First, PPC970_Unit_FXU; @@ -720,10 +725,17 @@ defm SRADI : XSForm_1rc<31, 413, (outs g8rc:$rA), (ins g8rc:$rS, u6imm:$SH), "sradi", "$rA, $rS, $SH", IIC_IntRotateDI, [(set i64:$rA, (sra i64:$rS, (i32 imm:$SH)))]>, isPPC64; -defm EXTSWSLI : XSForm_1r<31, 445, (outs g8rc:$rA), (ins gprc:$rS, u6imm:$SH), - "extswsli", "$rA, $rS, $SH", IIC_IntRotateDI, - [(set i64:$rA, (PPCextswsli i32:$rS, (i32 imm:$SH)))]>, - isPPC64, Requires<[IsISA3_0]>; +let Interpretation64Bit = 1, isCodeGenOnly = 1 in +defm EXTSWSLI_32_64 : XSForm_1r<31, 445, (outs g8rc:$rA), + (ins gprc:$rS, u6imm:$SH), + "extswsli", "$rA, $rS, $SH", IIC_IntRotateDI, + [(set i64:$rA, + (PPCextswsli i32:$rS, (i32 imm:$SH)))]>, + isPPC64, Requires<[IsISA3_0]>; + +defm EXTSWSLI : XSForm_1rc<31, 445, (outs g8rc:$rA), (ins g8rc:$rS, u6imm:$SH), + "extswsli", "$rA, $rS, $SH", IIC_IntRotateDI, + []>, isPPC64, Requires<[IsISA3_0]>; // For fast-isel: let isCodeGenOnly = 1, Defs = [CARRY] in @@ -773,13 +785,21 @@ def DIVDE : XOForm_1<31, 425, 0, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB), let Predicates = [IsISA3_0] in { def MADDHD : VAForm_1a<48, (outs g8rc :$RT), (ins g8rc:$RA, g8rc:$RB, g8rc:$RC), "maddhd $RT, $RA, $RB, $RC", IIC_IntMulHD, []>, isPPC64; -def MADDHDU : VAForm_1a<49, (outs g8rc :$RT), (ins g8rc:$RA, g8rc:$RB, g8rc:$RC), +def MADDHDU : VAForm_1a<49, + (outs g8rc :$RT), (ins g8rc:$RA, g8rc:$RB, g8rc:$RC), "maddhdu $RT, $RA, $RB, $RC", IIC_IntMulHD, []>, isPPC64; -def MADDLD : VAForm_1a<51, (outs g8rc :$RT), (ins g8rc:$RA, g8rc:$RB, g8rc:$RC), - "maddld $RT, $RA, $RB, $RC", IIC_IntMulHD, []>, isPPC64; +def MADDLD : VAForm_1a<51, (outs gprc :$RT), (ins gprc:$RA, gprc:$RB, gprc:$RC), + "maddld $RT, $RA, $RB, $RC", IIC_IntMulHD, + [(set i32:$RT, (add_without_simm16 (mul_without_simm16 i32:$RA, i32:$RB), i32:$RC))]>, + isPPC64; def SETB : XForm_44<31, 128, (outs gprc:$RT), (ins crrc:$BFA), "setb $RT, $BFA", IIC_IntGeneral>, isPPC64; let Interpretation64Bit = 1, isCodeGenOnly = 1 in { + def MADDLD8 : VAForm_1a<51, + (outs g8rc :$RT), (ins g8rc:$RA, g8rc:$RB, g8rc:$RC), + "maddld $RT, $RA, $RB, $RC", IIC_IntMulHD, + [(set i64:$RT, (add_without_simm16 (mul_without_simm16 i64:$RA, i64:$RB), i64:$RC))]>, + isPPC64; def SETB8 : XForm_44<31, 128, (outs g8rc:$RT), (ins crrc:$BFA), "setb $RT, $BFA", IIC_IntGeneral>, isPPC64; } @@ -911,7 +931,7 @@ def LHA8: DForm_1<42, (outs g8rc:$rD), (ins memri:$src), def LWA : DSForm_1<58, 2, (outs g8rc:$rD), (ins memrix:$src), "lwa $rD, $src", IIC_LdStLWA, [(set i64:$rD, - (aligned4sextloadi32 ixaddr:$src))]>, isPPC64, + (aligned4sextloadi32 iaddrX4:$src))]>, isPPC64, PPC970_DGroup_Cracked; let Interpretation64Bit = 1, isCodeGenOnly = 1 in def LHAX8: XForm_1_memOp<31, 343, (outs g8rc:$rD), (ins memrr:$src), @@ -920,7 +940,7 @@ def LHAX8: XForm_1_memOp<31, 343, (outs g8rc:$rD), (ins memrr:$src), PPC970_DGroup_Cracked; def LWAX : XForm_1_memOp<31, 341, (outs g8rc:$rD), (ins memrr:$src), "lwax $rD, $src", IIC_LdStLHA, - [(set i64:$rD, (sextloadi32 xaddr:$src))]>, isPPC64, + [(set i64:$rD, (sextloadi32 xaddrX4:$src))]>, isPPC64, PPC970_DGroup_Cracked; // For fast-isel: let isCodeGenOnly = 1, mayLoad = 1 in { @@ -1022,7 +1042,7 @@ def LWZUX8 : XForm_1_memOp<31, 55, (outs g8rc:$rD, ptr_rc_nor0:$ea_result), let PPC970_Unit = 2 in { def LD : DSForm_1<58, 0, (outs g8rc:$rD), (ins memrix:$src), "ld $rD, $src", IIC_LdStLD, - [(set i64:$rD, (aligned4load ixaddr:$src))]>, isPPC64; + [(set i64:$rD, (aligned4load iaddrX4:$src))]>, isPPC64; // The following four definitions are selected for small code model only. // Otherwise, we need to create two instructions to form a 32-bit offset, // so we have a custom matcher for TOC_ENTRY in PPCDAGToDAGIsel::Select(). @@ -1045,7 +1065,7 @@ def LDtocBA: PPCEmitTimePseudo<(outs g8rc:$rD), (ins tocentry:$disp, g8rc:$reg), def LDX : XForm_1_memOp<31, 21, (outs g8rc:$rD), (ins memrr:$src), "ldx $rD, $src", IIC_LdStLD, - [(set i64:$rD, (load xaddr:$src))]>, isPPC64; + [(set i64:$rD, (load xaddrX4:$src))]>, isPPC64; def LDBRX : XForm_1_memOp<31, 532, (outs g8rc:$rD), (ins memrr:$src), "ldbrx $rD, $src", IIC_LdStLoad, [(set i64:$rD, (PPClbrx xoaddr:$src, i64))]>, isPPC64; @@ -1214,10 +1234,10 @@ def STWX8 : XForm_8_memOp<31, 151, (outs), (ins g8rc:$rS, memrr:$dst), // Normal 8-byte stores. def STD : DSForm_1<62, 0, (outs), (ins g8rc:$rS, memrix:$dst), "std $rS, $dst", IIC_LdStSTD, - [(aligned4store i64:$rS, ixaddr:$dst)]>, isPPC64; + [(aligned4store i64:$rS, iaddrX4:$dst)]>, isPPC64; def STDX : XForm_8_memOp<31, 149, (outs), (ins g8rc:$rS, memrr:$dst), "stdx $rS, $dst", IIC_LdStSTD, - [(store i64:$rS, xaddr:$dst)]>, isPPC64, + [(store i64:$rS, xaddrX4:$dst)]>, isPPC64, PPC970_DGroup_Cracked; def STDBRX: XForm_8_memOp<31, 660, (outs), (ins g8rc:$rS, memrr:$dst), "stdbrx $rS, $dst", IIC_LdStStore, @@ -1433,11 +1453,11 @@ def : Pat<(unaligned4store i64:$rS, xoaddr:$dst), (STDX $rS, xoaddr:$dst)>; // 64-bits atomic loads and stores -def : Pat<(atomic_load_64 ixaddr:$src), (LD memrix:$src)>; -def : Pat<(atomic_load_64 xaddr:$src), (LDX memrr:$src)>; +def : Pat<(atomic_load_64 iaddrX4:$src), (LD memrix:$src)>; +def : Pat<(atomic_load_64 xaddrX4:$src), (LDX memrr:$src)>; -def : Pat<(atomic_store_64 ixaddr:$ptr, i64:$val), (STD g8rc:$val, memrix:$ptr)>; -def : Pat<(atomic_store_64 xaddr:$ptr, i64:$val), (STDX g8rc:$val, memrr:$ptr)>; +def : Pat<(atomic_store_64 iaddrX4:$ptr, i64:$val), (STD g8rc:$val, memrix:$ptr)>; +def : Pat<(atomic_store_64 xaddrX4:$ptr, i64:$val), (STDX g8rc:$val, memrr:$ptr)>; let Predicates = [IsISA3_0] in { diff --git a/contrib/llvm/lib/Target/PowerPC/PPCInstrAltivec.td b/contrib/llvm/lib/Target/PowerPC/PPCInstrAltivec.td index 69b19e45c3e9..8176c5120a83 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCInstrAltivec.td +++ b/contrib/llvm/lib/Target/PowerPC/PPCInstrAltivec.td @@ -1,9 +1,8 @@ //===-- PPCInstrAltivec.td - The PowerPC Altivec Extension -*- tablegen -*-===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -822,7 +821,9 @@ def VCMPGTSWo : VCMPo<902, "vcmpgtsw. $vD, $vA, $vB", v4i32>; def VCMPGTUW : VCMP <646, "vcmpgtuw $vD, $vA, $vB" , v4i32>; def VCMPGTUWo : VCMPo<646, "vcmpgtuw. $vD, $vA, $vB", v4i32>; -let isCodeGenOnly = 1 in { +let isCodeGenOnly = 1, isMoveImm = 1, isAsCheapAsAMove = 1, + isReMaterializable = 1 in { + def V_SET0B : VXForm_setzero<1220, (outs vrrc:$vD), (ins), "vxor $vD, $vD, $vD", IIC_VecFP, [(set v16i8:$vD, (v16i8 immAllZerosV))]>; @@ -899,6 +900,32 @@ def : Pat<(v1i128 (bitconvert (v4i32 VRRC:$src))), (v1i128 VRRC:$src)>; def : Pat<(v1i128 (bitconvert (v4f32 VRRC:$src))), (v1i128 VRRC:$src)>; def : Pat<(v1i128 (bitconvert (v2i64 VRRC:$src))), (v1i128 VRRC:$src)>; +// Max/Min +def : Pat<(v16i8 (umax v16i8:$src1, v16i8:$src2)), + (v16i8 (VMAXUB $src1, $src2))>; +def : Pat<(v16i8 (smax v16i8:$src1, v16i8:$src2)), + (v16i8 (VMAXSB $src1, $src2))>; +def : Pat<(v8i16 (umax v8i16:$src1, v8i16:$src2)), + (v8i16 (VMAXUH $src1, $src2))>; +def : Pat<(v8i16 (smax v8i16:$src1, v8i16:$src2)), + (v8i16 (VMAXSH $src1, $src2))>; +def : Pat<(v4i32 (umax v4i32:$src1, v4i32:$src2)), + (v4i32 (VMAXUW $src1, $src2))>; +def : Pat<(v4i32 (smax v4i32:$src1, v4i32:$src2)), + (v4i32 (VMAXSW $src1, $src2))>; +def : Pat<(v16i8 (umin v16i8:$src1, v16i8:$src2)), + (v16i8 (VMINUB $src1, $src2))>; +def : Pat<(v16i8 (smin v16i8:$src1, v16i8:$src2)), + (v16i8 (VMINSB $src1, $src2))>; +def : Pat<(v8i16 (umin v8i16:$src1, v8i16:$src2)), + (v8i16 (VMINUH $src1, $src2))>; +def : Pat<(v8i16 (smin v8i16:$src1, v8i16:$src2)), + (v8i16 (VMINSH $src1, $src2))>; +def : Pat<(v4i32 (umin v4i32:$src1, v4i32:$src2)), + (v4i32 (VMINUW $src1, $src2))>; +def : Pat<(v4i32 (smin v4i32:$src1, v4i32:$src2)), + (v4i32 (VMINSW $src1, $src2))>; + // Shuffles. // Match vsldoi(x,x), vpkuwum(x,x), vpkuhum(x,x) diff --git a/contrib/llvm/lib/Target/PowerPC/PPCInstrBuilder.h b/contrib/llvm/lib/Target/PowerPC/PPCInstrBuilder.h index cf71b1c59869..323f7e39adf7 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCInstrBuilder.h +++ b/contrib/llvm/lib/Target/PowerPC/PPCInstrBuilder.h @@ -1,9 +1,8 @@ //===-- PPCInstrBuilder.h - Aides for building PPC insts --------*- C++ -*-===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // diff --git a/contrib/llvm/lib/Target/PowerPC/PPCInstrFormats.td b/contrib/llvm/lib/Target/PowerPC/PPCInstrFormats.td index 2fe765dd99e1..a48eb1690695 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCInstrFormats.td +++ b/contrib/llvm/lib/Target/PowerPC/PPCInstrFormats.td @@ -1,9 +1,8 @@ //===- PowerPCInstrFormats.td - PowerPC Instruction Formats --*- tablegen -*-=// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// @@ -38,14 +37,6 @@ class I<bits<6> opcode, dag OOL, dag IOL, string asmstr, InstrItinClass itin> let TSFlags{2} = PPC970_Cracked; let TSFlags{5-3} = PPC970_Unit; - /// Indicate that the VSX instruction is to use VSX numbering/encoding. - /// Since ISA 3.0, there are scalar instructions that use the upper - /// half of the VSX register set only. Rather than adding further complexity - /// to the register class set, the VSX registers just include the Altivec - /// registers and this flag decides the numbering to be used for them. - bits<1> UseVSXReg = 0; - let TSFlags{6} = UseVSXReg; - // Indicate that this instruction is of type X-Form Load or Store bits<1> XFormMemOp = 0; let TSFlags{7} = XFormMemOp; @@ -74,7 +65,6 @@ class PPC970_Unit_VALU { bits<3> PPC970_Unit = 5; } class PPC970_Unit_VPERM { bits<3> PPC970_Unit = 6; } class PPC970_Unit_BRU { bits<3> PPC970_Unit = 7; } -class UseVSXReg { bits<1> UseVSXReg = 1; } class XFormMemOp { bits<1> XFormMemOp = 1; } // Two joined instructions; used to emit two adjacent instructions as one. @@ -730,6 +720,7 @@ class XForm_25_memOp<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, : XForm_base_r3xo_memOp<opcode, xo, OOL, IOL, asmstr, itin, pattern> { } +// [PO RT /// RB XO RC] class XForm_26<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr, InstrItinClass itin, list<dag> pattern> : XForm_base_r3xo<opcode, xo, OOL, IOL, asmstr, itin, pattern> { @@ -1193,9 +1184,9 @@ class XX2_RD6_DCMX7_RS6<bits<6> opcode, bits<4> xo1, bits<3> xo2, let Inst{11-15} = DCMX{4-0}; let Inst{16-20} = XB{4-0}; let Inst{21-24} = xo1; - let Inst{25} = DCMX{5}; + let Inst{25} = DCMX{6}; let Inst{26-28} = xo2; - let Inst{29} = DCMX{6}; + let Inst{29} = DCMX{5}; let Inst{30} = XB{5}; let Inst{31} = XT{5}; } diff --git a/contrib/llvm/lib/Target/PowerPC/PPCInstrHTM.td b/contrib/llvm/lib/Target/PowerPC/PPCInstrHTM.td index 0efe797c765d..104b57a70a2e 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCInstrHTM.td +++ b/contrib/llvm/lib/Target/PowerPC/PPCInstrHTM.td @@ -1,9 +1,8 @@ //===-- PPCInstrHTM.td - The PowerPC Hardware Transactional Memory -*-===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -21,55 +20,53 @@ def HTM_get_imm : SDNodeXForm<imm, [{ }]>; let hasSideEffects = 1 in { -def TCHECK_RET : PPCCustomInserterPseudo<(outs crrc:$out), (ins), "#TCHECK_RET", []>; +def TCHECK_RET : PPCCustomInserterPseudo<(outs gprc:$out), (ins), "#TCHECK_RET", []>; +def TBEGIN_RET : PPCCustomInserterPseudo<(outs gprc:$out), (ins u1imm:$R), "#TBEGIN_RET", []>; } let Predicates = [HasHTM] in { +let Defs = [CR0] in { def TBEGIN : XForm_htm0 <31, 654, - (outs crrc0:$ret), (ins u1imm:$R), "tbegin. $R", IIC_SprMTSPR, []>; + (outs), (ins u1imm:$R), "tbegin. $R", IIC_SprMTSPR, []>; def TEND : XForm_htm1 <31, 686, - (outs crrc0:$ret), (ins u1imm:$A), "tend. $A", IIC_SprMTSPR, []>; + (outs), (ins u1imm:$A), "tend. $A", IIC_SprMTSPR, []>; def TABORT : XForm_base_r3xo <31, 910, - (outs crrc0:$ret), (ins gprc:$A), "tabort. $A", IIC_SprMTSPR, + (outs), (ins gprc:$A), "tabort. $A", IIC_SprMTSPR, []>, isDOT { let RST = 0; let B = 0; } def TABORTWC : XForm_base_r3xo <31, 782, - (outs crrc0:$ret), (ins u5imm:$RTS, gprc:$A, gprc:$B), + (outs), (ins u5imm:$RTS, gprc:$A, gprc:$B), "tabortwc. $RTS, $A, $B", IIC_SprMTSPR, []>, isDOT; def TABORTWCI : XForm_base_r3xo <31, 846, - (outs crrc0:$ret), (ins u5imm:$RTS, gprc:$A, u5imm:$B), + (outs), (ins u5imm:$RTS, gprc:$A, u5imm:$B), "tabortwci. $RTS, $A, $B", IIC_SprMTSPR, []>, isDOT; def TABORTDC : XForm_base_r3xo <31, 814, - (outs crrc0:$ret), (ins u5imm:$RTS, gprc:$A, gprc:$B), + (outs), (ins u5imm:$RTS, gprc:$A, gprc:$B), "tabortdc. $RTS, $A, $B", IIC_SprMTSPR, []>, isDOT; def TABORTDCI : XForm_base_r3xo <31, 878, - (outs crrc0:$ret), (ins u5imm:$RTS, gprc:$A, u5imm:$B), + (outs), (ins u5imm:$RTS, gprc:$A, u5imm:$B), "tabortdci. $RTS, $A, $B", IIC_SprMTSPR, []>, isDOT; def TSR : XForm_htm2 <31, 750, - (outs crrc0:$ret), (ins u1imm:$L), "tsr. $L", IIC_SprMTSPR, []>, + (outs), (ins u1imm:$L), "tsr. $L", IIC_SprMTSPR, []>, isDOT; -def TCHECK : XForm_htm3 <31, 718, - (outs), (ins crrc:$BF), "tcheck $BF", IIC_SprMTSPR, []>; - - def TRECLAIM : XForm_base_r3xo <31, 942, - (outs crrc:$ret), (ins gprc:$A), "treclaim. $A", + (outs), (ins gprc:$A), "treclaim. $A", IIC_SprMTSPR, []>, isDOT { let RST = 0; @@ -77,13 +74,17 @@ def TRECLAIM : XForm_base_r3xo <31, 942, } def TRECHKPT : XForm_base_r3xo <31, 1006, - (outs crrc:$ret), (ins), "trechkpt.", IIC_SprMTSPR, []>, + (outs), (ins), "trechkpt.", IIC_SprMTSPR, []>, isDOT { let RST = 0; let A = 0; let B = 0; } +} + +def TCHECK : XForm_htm3 <31, 718, + (outs crrc:$BF), (ins), "tcheck $BF", IIC_SprMTSPR, []>; // Builtins // All HTM instructions, with the exception of tcheck, set CR0 with the @@ -94,15 +95,11 @@ def TRECHKPT : XForm_base_r3xo <31, 1006, // tbegin builtin API which defines a return value of 1 as success. def : Pat<(int_ppc_tbegin i32:$R), - (XORI - (EXTRACT_SUBREG ( - TBEGIN (HTM_get_imm imm:$R)), sub_eq), - 1)>; + (XORI (TBEGIN_RET(HTM_get_imm imm:$R)), 1)>; def : Pat<(int_ppc_tend i32:$R), (TEND (HTM_get_imm imm:$R))>; - def : Pat<(int_ppc_tabort i32:$R), (TABORT $R)>; @@ -167,6 +164,8 @@ def : Pat<(int_ppc_tsuspend), (TSR 0)>; def : Pat<(i64 (int_ppc_ttest)), - (RLDICL (i64 (COPY (TABORTWCI 0, ZERO, 0))), 36, 28)>; + (RLDICL (i64 (INSERT_SUBREG (i64 (IMPLICIT_DEF)), + (TABORTWCI 0, (LI 0), 0), sub_32)), + 36, 28)>; } // [HasHTM] diff --git a/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp b/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp index d754ce2990d2..a787bdd56b9d 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp +++ b/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp @@ -1,9 +1,8 @@ //===-- PPCInstrInfo.cpp - PowerPC Instruction Information ----------------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -333,6 +332,17 @@ bool PPCInstrInfo::isReallyTriviallyReMaterializable(const MachineInstr &MI, case PPC::ADDIStocHA: case PPC::ADDItocL: case PPC::LOAD_STACK_GUARD: + case PPC::XXLXORz: + case PPC::XXLXORspz: + case PPC::XXLXORdpz: + case PPC::V_SET0B: + case PPC::V_SET0H: + case PPC::V_SET0: + case PPC::V_SETALLONESB: + case PPC::V_SETALLONESH: + case PPC::V_SETALLONES: + case PPC::CRSET: + case PPC::CRUNSET: return true; } return false; @@ -381,9 +391,9 @@ MachineInstr *PPCInstrInfo::commuteInstructionImpl(MachineInstr &MI, bool NewMI, // Swap op1/op2 assert(((OpIdx1 == 1 && OpIdx2 == 2) || (OpIdx1 == 2 && OpIdx2 == 1)) && "Only the operands 1 and 2 can be swapped in RLSIMI/RLWIMIo."); - unsigned Reg0 = MI.getOperand(0).getReg(); - unsigned Reg1 = MI.getOperand(1).getReg(); - unsigned Reg2 = MI.getOperand(2).getReg(); + Register Reg0 = MI.getOperand(0).getReg(); + Register Reg1 = MI.getOperand(1).getReg(); + Register Reg2 = MI.getOperand(2).getReg(); unsigned SubReg1 = MI.getOperand(1).getSubReg(); unsigned SubReg2 = MI.getOperand(2).getSubReg(); bool Reg1IsKill = MI.getOperand(1).isKill(); @@ -411,7 +421,7 @@ MachineInstr *PPCInstrInfo::commuteInstructionImpl(MachineInstr &MI, bool NewMI, if (NewMI) { // Create a new instruction. - unsigned Reg0 = ChangeReg0 ? Reg2 : MI.getOperand(0).getReg(); + Register Reg0 = ChangeReg0 ? Reg2 : MI.getOperand(0).getReg(); bool Reg0IsDead = MI.getOperand(0).isDead(); return BuildMI(MF, MI.getDebugLoc(), MI.getDesc()) .addReg(Reg0, RegState::Define | getDeadRegState(Reg0IsDead)) @@ -942,12 +952,16 @@ void PPCInstrInfo::copyPhysReg(MachineBasicBlock &MBB, return; } else if (PPC::G8RCRegClass.contains(SrcReg) && PPC::VSFRCRegClass.contains(DestReg)) { + assert(Subtarget.hasDirectMove() && + "Subtarget doesn't support directmove, don't know how to copy."); BuildMI(MBB, I, DL, get(PPC::MTVSRD), DestReg).addReg(SrcReg); NumGPRtoVSRSpill++; getKillRegState(KillSrc); return; } else if (PPC::VSFRCRegClass.contains(SrcReg) && PPC::G8RCRegClass.contains(DestReg)) { + assert(Subtarget.hasDirectMove() && + "Subtarget doesn't support directmove, don't know how to copy."); BuildMI(MBB, I, DL, get(PPC::MFVSRD), DestReg).addReg(SrcReg); getKillRegState(KillSrc); return; @@ -963,7 +977,6 @@ void PPCInstrInfo::copyPhysReg(MachineBasicBlock &MBB, return; } - unsigned Opc; if (PPC::GPRCRegClass.contains(DestReg, SrcReg)) Opc = PPC::OR; @@ -996,6 +1009,8 @@ void PPCInstrInfo::copyPhysReg(MachineBasicBlock &MBB, Opc = PPC::QVFMRb; else if (PPC::CRBITRCRegClass.contains(DestReg, SrcReg)) Opc = PPC::CROR; + else if (PPC::SPE4RCRegClass.contains(DestReg, SrcReg)) + Opc = PPC::OR; else if (PPC::SPERCRegClass.contains(DestReg, SrcReg)) Opc = PPC::EVOR; else @@ -1066,6 +1081,10 @@ unsigned PPCInstrInfo::getStoreOpcodeForSpill(unsigned Reg, OpcodeIndex = SOK_Float8Spill; } else if (PPC::F4RCRegClass.contains(Reg)) { OpcodeIndex = SOK_Float4Spill; + } else if (PPC::SPERCRegClass.contains(Reg)) { + OpcodeIndex = SOK_SPESpill; + } else if (PPC::SPE4RCRegClass.contains(Reg)) { + OpcodeIndex = SOK_SPE4Spill; } else if (PPC::CRRCRegClass.contains(Reg)) { OpcodeIndex = SOK_CRSpill; } else if (PPC::CRBITRCRegClass.contains(Reg)) { @@ -1152,6 +1171,10 @@ PPCInstrInfo::getLoadOpcodeForSpill(unsigned Reg, OpcodeIndex = SOK_Float8Spill; } else if (PPC::F4RCRegClass.contains(Reg)) { OpcodeIndex = SOK_Float4Spill; + } else if (PPC::SPERCRegClass.contains(Reg)) { + OpcodeIndex = SOK_SPESpill; + } else if (PPC::SPE4RCRegClass.contains(Reg)) { + OpcodeIndex = SOK_SPE4Spill; } else if (PPC::CRRCRegClass.contains(Reg)) { OpcodeIndex = SOK_CRSpill; } else if (PPC::CRBITRCRegClass.contains(Reg)) { @@ -1632,6 +1655,7 @@ bool PPCInstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, unsigned SrcReg, if (OpC == PPC::FCMPUS || OpC == PPC::FCMPUD) return false; + const TargetRegisterInfo *TRI = &getRegisterInfo(); // The record forms set the condition register based on a signed comparison // with zero (so says the ISA manual). This is not as straightforward as it // seems, however, because this is always a 64-bit comparison on PPC64, even @@ -1645,6 +1669,11 @@ bool PPCInstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, unsigned SrcReg, bool is32BitUnsignedCompare = OpC == PPC::CMPLWI || OpC == PPC::CMPLW; bool is64BitUnsignedCompare = OpC == PPC::CMPLDI || OpC == PPC::CMPLD; + // Look through copies unless that gets us to a physical register. + unsigned ActualSrc = TRI->lookThruCopyLike(SrcReg, MRI); + if (TargetRegisterInfo::isVirtualRegister(ActualSrc)) + SrcReg = ActualSrc; + // Get the unique definition of SrcReg. MachineInstr *MI = MRI->getUniqueVRegDef(SrcReg); if (!MI) return false; @@ -1745,7 +1774,6 @@ bool PPCInstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, unsigned SrcReg, return false; PPC::Predicate Pred = (PPC::Predicate)UseMI->getOperand(0).getImm(); - PPC::Predicate NewPred = Pred; unsigned PredCond = PPC::getPredicateCondition(Pred); unsigned PredHint = PPC::getPredicateHint(Pred); int16_t Immed = (int16_t)Value; @@ -1755,25 +1783,23 @@ bool PPCInstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, unsigned SrcReg, if (Immed == -1 && PredCond == PPC::PRED_GT) // We convert "greater than -1" into "greater than or equal to 0", // since we are assuming signed comparison by !equalityOnly - NewPred = PPC::getPredicate(PPC::PRED_GE, PredHint); + Pred = PPC::getPredicate(PPC::PRED_GE, PredHint); else if (Immed == -1 && PredCond == PPC::PRED_LE) // We convert "less than or equal to -1" into "less than 0". - NewPred = PPC::getPredicate(PPC::PRED_LT, PredHint); + Pred = PPC::getPredicate(PPC::PRED_LT, PredHint); else if (Immed == 1 && PredCond == PPC::PRED_LT) // We convert "less than 1" into "less than or equal to 0". - NewPred = PPC::getPredicate(PPC::PRED_LE, PredHint); + Pred = PPC::getPredicate(PPC::PRED_LE, PredHint); else if (Immed == 1 && PredCond == PPC::PRED_GE) // We convert "greater than or equal to 1" into "greater than 0". - NewPred = PPC::getPredicate(PPC::PRED_GT, PredHint); + Pred = PPC::getPredicate(PPC::PRED_GT, PredHint); else return false; - PredsToUpdate.push_back(std::make_pair(&(UseMI->getOperand(0)), - NewPred)); + PredsToUpdate.push_back(std::make_pair(&(UseMI->getOperand(0)), Pred)); } // Search for Sub. - const TargetRegisterInfo *TRI = &getRegisterInfo(); --I; // Get ready to iterate backward from CmpInstr. @@ -1992,7 +2018,7 @@ bool PPCInstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, unsigned SrcReg, unsigned PPCInstrInfo::getInstSizeInBytes(const MachineInstr &MI) const { unsigned Opcode = MI.getOpcode(); - if (Opcode == PPC::INLINEASM) { + if (Opcode == PPC::INLINEASM || Opcode == PPC::INLINEASM_BR) { const MachineFunction *MF = MI.getParent()->getParent(); const char *AsmStr = MI.getOperand(0).getSymbolName(); return getInlineAsmLength(AsmStr, *MF->getTarget().getMCAsmInfo()); @@ -2358,13 +2384,6 @@ MachineInstr *PPCInstrInfo::getForwardingDefMI( MachineBasicBlock::reverse_iterator E = MI.getParent()->rend(), It = MI; It++; unsigned Reg = MI.getOperand(i).getReg(); - // MachineInstr::readsRegister only returns true if the machine - // instruction reads the exact register or its super-register. It - // does not consider uses of sub-registers which seems like strange - // behaviour. Nonetheless, if we end up with a 64-bit register here, - // get the corresponding 32-bit register to check. - if (PPC::G8RCRegClass.contains(Reg)) - Reg = Reg - PPC::X0 + PPC::R0; // Is this register defined by some form of add-immediate (including // load-immediate) within this basic block? @@ -2381,7 +2400,7 @@ MachineInstr *PPCInstrInfo::getForwardingDefMI( return &*It; } break; - } else if (It->readsRegister(Reg, &getRegisterInfo())) + } else if (It->readsRegister(Reg, &getRegisterInfo())) // If we see another use of this reg between the def and the MI, // we want to flat it so the def isn't deleted. SeenIntermediateUse = true; @@ -2424,6 +2443,83 @@ const unsigned *PPCInstrInfo::getLoadOpcodesForSpillArray() const { return OpcodesForSpill[(Subtarget.hasP9Vector()) ? 1 : 0]; } +void PPCInstrInfo::fixupIsDeadOrKill(MachineInstr &StartMI, MachineInstr &EndMI, + unsigned RegNo) const { + const MachineRegisterInfo &MRI = + StartMI.getParent()->getParent()->getRegInfo(); + if (MRI.isSSA()) + return; + + // Instructions between [StartMI, EndMI] should be in same basic block. + assert((StartMI.getParent() == EndMI.getParent()) && + "Instructions are not in same basic block"); + + bool IsKillSet = false; + + auto clearOperandKillInfo = [=] (MachineInstr &MI, unsigned Index) { + MachineOperand &MO = MI.getOperand(Index); + if (MO.isReg() && MO.isUse() && MO.isKill() && + getRegisterInfo().regsOverlap(MO.getReg(), RegNo)) + MO.setIsKill(false); + }; + + // Set killed flag for EndMI. + // No need to do anything if EndMI defines RegNo. + int UseIndex = + EndMI.findRegisterUseOperandIdx(RegNo, false, &getRegisterInfo()); + if (UseIndex != -1) { + EndMI.getOperand(UseIndex).setIsKill(true); + IsKillSet = true; + // Clear killed flag for other EndMI operands related to RegNo. In some + // upexpected cases, killed may be set multiple times for same register + // operand in same MI. + for (int i = 0, e = EndMI.getNumOperands(); i != e; ++i) + if (i != UseIndex) + clearOperandKillInfo(EndMI, i); + } + + // Walking the inst in reverse order (EndMI -> StartMI]. + MachineBasicBlock::reverse_iterator It = EndMI; + MachineBasicBlock::reverse_iterator E = EndMI.getParent()->rend(); + // EndMI has been handled above, skip it here. + It++; + MachineOperand *MO = nullptr; + for (; It != E; ++It) { + // Skip insturctions which could not be a def/use of RegNo. + if (It->isDebugInstr() || It->isPosition()) + continue; + + // Clear killed flag for all It operands related to RegNo. In some + // upexpected cases, killed may be set multiple times for same register + // operand in same MI. + for (int i = 0, e = It->getNumOperands(); i != e; ++i) + clearOperandKillInfo(*It, i); + + // If killed is not set, set killed for its last use or set dead for its def + // if no use found. + if (!IsKillSet) { + if ((MO = It->findRegisterUseOperand(RegNo, false, &getRegisterInfo()))) { + // Use found, set it killed. + IsKillSet = true; + MO->setIsKill(true); + continue; + } else if ((MO = It->findRegisterDefOperand(RegNo, false, true, + &getRegisterInfo()))) { + // No use found, set dead for its def. + assert(&*It == &StartMI && "No new def between StartMI and EndMI."); + MO->setIsDead(true); + break; + } + } + + if ((&*It) == &StartMI) + break; + } + // Ensure RegMo liveness is killed after EndMI. + assert((IsKillSet || (MO && MO->isDead())) && + "RegNo should be killed or dead"); +} + // If this instruction has an immediate form and one of its operands is a // result of a load-immediate or an add-immediate, convert it to // the immediate form if the constant is in range. @@ -2440,8 +2536,9 @@ bool PPCInstrInfo::convertToImmediateForm(MachineInstr &MI, return false; assert(ForwardingOperand < MI.getNumOperands() && "The forwarding operand needs to be valid at this point"); - bool KillFwdDefMI = !SeenIntermediateUse && - MI.getOperand(ForwardingOperand).isKill(); + bool IsForwardingOperandKilled = MI.getOperand(ForwardingOperand).isKill(); + bool KillFwdDefMI = !SeenIntermediateUse && IsForwardingOperandKilled; + unsigned ForwardingOperandReg = MI.getOperand(ForwardingOperand).getReg(); if (KilledDef && KillFwdDefMI) *KilledDef = DefMI; @@ -2450,8 +2547,9 @@ bool PPCInstrInfo::convertToImmediateForm(MachineInstr &MI, // If this is a reg+reg instruction that has a reg+imm form, // and one of the operands is produced by an add-immediate, // try to convert it. - if (HasImmForm && transformToImmFormFedByAdd(MI, III, ForwardingOperand, - *DefMI, KillFwdDefMI)) + if (HasImmForm && + transformToImmFormFedByAdd(MI, III, ForwardingOperand, *DefMI, + KillFwdDefMI)) return true; if ((DefMI->getOpcode() != PPC::LI && DefMI->getOpcode() != PPC::LI8) || @@ -2466,7 +2564,7 @@ bool PPCInstrInfo::convertToImmediateForm(MachineInstr &MI, // If this is a reg+reg instruction that has a reg+imm form, // and one of the operands is produced by LI, convert it now. if (HasImmForm) - return transformToImmFormFedByLI(MI, III, ForwardingOperand, SExtImm); + return transformToImmFormFedByLI(MI, III, ForwardingOperand, *DefMI, SExtImm); bool ReplaceWithLI = false; bool Is64BitLI = false; @@ -2486,6 +2584,8 @@ bool PPCInstrInfo::convertToImmediateForm(MachineInstr &MI, case PPC::CMPLDI: { // Doing this post-RA would require dataflow analysis to reliably find uses // of the CR register set by the compare. + // No need to fixup killed/dead flag since this transformation is only valid + // before RA. if (PostRA) return false; // If a compare-immediate is fed by an immediate and is itself an input of @@ -2662,6 +2762,14 @@ bool PPCInstrInfo::convertToImmediateForm(MachineInstr &MI, if (KilledDef && SetCR) *KilledDef = nullptr; replaceInstrWithLI(MI, LII); + + // Fixup killed/dead flag after transformation. + // Pattern: + // ForwardingOperandReg = LI imm1 + // y = op2 imm2, ForwardingOperandReg(killed) + if (IsForwardingOperandKilled) + fixupIsDeadOrKill(*DefMI, MI, ForwardingOperandReg); + LLVM_DEBUG(dbgs() << "With:\n"); LLVM_DEBUG(MI.dump()); return true; @@ -2669,10 +2777,6 @@ bool PPCInstrInfo::convertToImmediateForm(MachineInstr &MI, return false; } -static bool isVFReg(unsigned Reg) { - return PPC::VFRCRegClass.contains(Reg); -} - bool PPCInstrInfo::instrHasImmForm(const MachineInstr &MI, ImmInstrInfo &III, bool PostRA) const { unsigned Opc = MI.getOpcode(); @@ -3007,7 +3111,7 @@ bool PPCInstrInfo::instrHasImmForm(const MachineInstr &MI, break; case PPC::LXSSPX: if (PostRA) { - if (isVFReg(MI.getOperand(0).getReg())) + if (isVFRegister(MI.getOperand(0).getReg())) III.ImmOpcode = PPC::LXSSP; else { III.ImmOpcode = PPC::LFS; @@ -3021,7 +3125,7 @@ bool PPCInstrInfo::instrHasImmForm(const MachineInstr &MI, break; case PPC::LXSDX: if (PostRA) { - if (isVFReg(MI.getOperand(0).getReg())) + if (isVFRegister(MI.getOperand(0).getReg())) III.ImmOpcode = PPC::LXSD; else { III.ImmOpcode = PPC::LFD; @@ -3039,7 +3143,7 @@ bool PPCInstrInfo::instrHasImmForm(const MachineInstr &MI, break; case PPC::STXSSPX: if (PostRA) { - if (isVFReg(MI.getOperand(0).getReg())) + if (isVFRegister(MI.getOperand(0).getReg())) III.ImmOpcode = PPC::STXSSP; else { III.ImmOpcode = PPC::STFS; @@ -3053,7 +3157,7 @@ bool PPCInstrInfo::instrHasImmForm(const MachineInstr &MI, break; case PPC::STXSDX: if (PostRA) { - if (isVFReg(MI.getOperand(0).getReg())) + if (isVFRegister(MI.getOperand(0).getReg())) III.ImmOpcode = PPC::STXSD; else { III.ImmOpcode = PPC::STFD; @@ -3110,7 +3214,7 @@ static void swapMIOperands(MachineInstr &MI, unsigned Op1, unsigned Op2) { } } -// Check if the 'MI' that has the index OpNoForForwarding +// Check if the 'MI' that has the index OpNoForForwarding // meets the requirement described in the ImmInstrInfo. bool PPCInstrInfo::isUseMIElgibleForForwarding(MachineInstr &MI, const ImmInstrInfo &III, @@ -3156,7 +3260,7 @@ bool PPCInstrInfo::isDefMIElgibleForForwarding(MachineInstr &DefMI, MachineOperand *&RegMO) const { unsigned Opc = DefMI.getOpcode(); if (Opc != PPC::ADDItocL && Opc != PPC::ADDI && Opc != PPC::ADDI8) - return false; + return false; assert(DefMI.getNumOperands() >= 3 && "Add inst must have at least three operands"); @@ -3169,11 +3273,10 @@ bool PPCInstrInfo::isDefMIElgibleForForwarding(MachineInstr &DefMI, return isAnImmediateOperand(*ImmMO); } -bool PPCInstrInfo::isRegElgibleForForwarding(const MachineOperand &RegMO, - const MachineInstr &DefMI, - const MachineInstr &MI, - bool KillDefMI - ) const { +bool PPCInstrInfo::isRegElgibleForForwarding( + const MachineOperand &RegMO, const MachineInstr &DefMI, + const MachineInstr &MI, bool KillDefMI, + bool &IsFwdFeederRegKilled) const { // x = addi y, imm // ... // z = lfdx 0, x -> z = lfd imm(y) @@ -3184,14 +3287,7 @@ bool PPCInstrInfo::isRegElgibleForForwarding(const MachineOperand &RegMO, if (MRI.isSSA()) return false; - // MachineInstr::readsRegister only returns true if the machine - // instruction reads the exact register or its super-register. It - // does not consider uses of sub-registers which seems like strange - // behaviour. Nonetheless, if we end up with a 64-bit register here, - // get the corresponding 32-bit register to check. unsigned Reg = RegMO.getReg(); - if (PPC::G8RCRegClass.contains(Reg)) - Reg = Reg - PPC::X0 + PPC::R0; // Walking the inst in reverse(MI-->DefMI) to get the last DEF of the Reg. MachineBasicBlock::const_reverse_iterator It = MI; @@ -3200,15 +3296,17 @@ bool PPCInstrInfo::isRegElgibleForForwarding(const MachineOperand &RegMO, for (; It != E; ++It) { if (It->modifiesRegister(Reg, &getRegisterInfo()) && (&*It) != &DefMI) return false; + else if (It->killsRegister(Reg, &getRegisterInfo()) && (&*It) != &DefMI) + IsFwdFeederRegKilled = true; // Made it to DefMI without encountering a clobber. if ((&*It) == &DefMI) break; } assert((&*It) == &DefMI && "DefMI is missing"); - // If DefMI also uses the register to be forwarded, we can only forward it + // If DefMI also defines the register to be forwarded, we can only forward it // if DefMI is being erased. - if (DefMI.readsRegister(Reg, &getRegisterInfo())) + if (DefMI.modifiesRegister(Reg, &getRegisterInfo())) return KillDefMI; return true; @@ -3271,11 +3369,9 @@ bool PPCInstrInfo::isImmElgibleForForwarding(const MachineOperand &ImmMO, // is the literal zero, attempt to forward the source of the add-immediate to // the corresponding D-Form instruction with the displacement coming from // the immediate being added. -bool PPCInstrInfo::transformToImmFormFedByAdd(MachineInstr &MI, - const ImmInstrInfo &III, - unsigned OpNoForForwarding, - MachineInstr &DefMI, - bool KillDefMI) const { +bool PPCInstrInfo::transformToImmFormFedByAdd( + MachineInstr &MI, const ImmInstrInfo &III, unsigned OpNoForForwarding, + MachineInstr &DefMI, bool KillDefMI) const { // RegMO ImmMO // | | // x = addi reg, imm <----- DefMI @@ -3300,10 +3396,19 @@ bool PPCInstrInfo::transformToImmFormFedByAdd(MachineInstr &MI, if (!isImmElgibleForForwarding(*ImmMO, DefMI, III, Imm)) return false; + bool IsFwdFeederRegKilled = false; // Check if the RegMO can be forwarded to MI. - if (!isRegElgibleForForwarding(*RegMO, DefMI, MI, KillDefMI)) + if (!isRegElgibleForForwarding(*RegMO, DefMI, MI, KillDefMI, + IsFwdFeederRegKilled)) return false; + // Get killed info in case fixup needed after transformation. + unsigned ForwardKilledOperandReg = ~0U; + MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo(); + bool PostRA = !MRI.isSSA(); + if (PostRA && MI.getOperand(OpNoForForwarding).isKill()) + ForwardKilledOperandReg = MI.getOperand(OpNoForForwarding).getReg(); + // We know that, the MI and DefMI both meet the pattern, and // the Imm also meet the requirement with the new Imm-form. // It is safe to do the transformation now. @@ -3327,7 +3432,7 @@ bool PPCInstrInfo::transformToImmFormFedByAdd(MachineInstr &MI, // Otherwise, it is Constant Pool Index(CPI) or Global, // which is relocation in fact. We need to replace the special zero // register with ImmMO. - // Before that, we need to fixup the target flags for imm. + // Before that, we need to fixup the target flags for imm. // For some reason, we miss to set the flag for the ImmMO if it is CPI. if (DefMI.getOpcode() == PPC::ADDItocL) ImmMO->setTargetFlags(PPCII::MO_TOC_LO); @@ -3354,6 +3459,22 @@ bool PPCInstrInfo::transformToImmFormFedByAdd(MachineInstr &MI, // Update the opcode. MI.setDesc(get(III.ImmOpcode)); + // Fix up killed/dead flag after transformation. + // Pattern 1: + // x = ADD KilledFwdFeederReg, imm + // n = opn KilledFwdFeederReg(killed), regn + // y = XOP 0, x + // Pattern 2: + // x = ADD reg(killed), imm + // y = XOP 0, x + if (IsFwdFeederRegKilled || RegMO->isKill()) + fixupIsDeadOrKill(DefMI, MI, RegMO->getReg()); + // Pattern 3: + // ForwardKilledOperandReg = ADD reg, imm + // y = XOP 0, ForwardKilledOperandReg(killed) + if (ForwardKilledOperandReg != ~0U) + fixupIsDeadOrKill(DefMI, MI, ForwardKilledOperandReg); + LLVM_DEBUG(dbgs() << "With:\n"); LLVM_DEBUG(MI.dump()); @@ -3363,6 +3484,7 @@ bool PPCInstrInfo::transformToImmFormFedByAdd(MachineInstr &MI, bool PPCInstrInfo::transformToImmFormFedByLI(MachineInstr &MI, const ImmInstrInfo &III, unsigned ConstantOpNo, + MachineInstr &DefMI, int64_t Imm) const { MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo(); bool PostRA = !MRI.isSSA(); @@ -3401,6 +3523,11 @@ bool PPCInstrInfo::transformToImmFormFedByLI(MachineInstr &MI, return false; } + // Get killed info in case fixup needed after transformation. + unsigned ForwardKilledOperandReg = ~0U; + if (PostRA && MI.getOperand(ConstantOpNo).isKill()) + ForwardKilledOperandReg = MI.getOperand(ConstantOpNo).getReg(); + unsigned Opc = MI.getOpcode(); bool SpecialShift32 = Opc == PPC::SLW || Opc == PPC::SLWo || Opc == PPC::SRW || Opc == PPC::SRWo; @@ -3483,6 +3610,13 @@ bool PPCInstrInfo::transformToImmFormFedByLI(MachineInstr &MI, } } } + + // Fix up killed/dead flag after transformation. + // Pattern: + // ForwardKilledOperandReg = LI imm + // y = XOP reg, ForwardKilledOperandReg(killed) + if (ForwardKilledOperandReg != ~0U) + fixupIsDeadOrKill(DefMI, MI, ForwardKilledOperandReg); return true; } @@ -3784,3 +3918,133 @@ PPCInstrInfo::isSignOrZeroExtended(const MachineInstr &MI, bool SignExt, } return false; } + +bool PPCInstrInfo::isBDNZ(unsigned Opcode) const { + return (Opcode == (Subtarget.isPPC64() ? PPC::BDNZ8 : PPC::BDNZ)); +} + +bool PPCInstrInfo::analyzeLoop(MachineLoop &L, MachineInstr *&IndVarInst, + MachineInstr *&CmpInst) const { + MachineBasicBlock *LoopEnd = L.getBottomBlock(); + MachineBasicBlock::iterator I = LoopEnd->getFirstTerminator(); + // We really "analyze" only CTR loops right now. + if (I != LoopEnd->end() && isBDNZ(I->getOpcode())) { + IndVarInst = nullptr; + CmpInst = &*I; + return false; + } + return true; +} + +MachineInstr * +PPCInstrInfo::findLoopInstr(MachineBasicBlock &PreHeader) const { + + unsigned LOOPi = (Subtarget.isPPC64() ? PPC::MTCTR8loop : PPC::MTCTRloop); + + // The loop set-up instruction should be in preheader + for (auto &I : PreHeader.instrs()) + if (I.getOpcode() == LOOPi) + return &I; + return nullptr; +} + +unsigned PPCInstrInfo::reduceLoopCount( + MachineBasicBlock &MBB, MachineBasicBlock &PreHeader, MachineInstr *IndVar, + MachineInstr &Cmp, SmallVectorImpl<MachineOperand> &Cond, + SmallVectorImpl<MachineInstr *> &PrevInsts, unsigned Iter, + unsigned MaxIter) const { + // We expect a hardware loop currently. This means that IndVar is set + // to null, and the compare is the ENDLOOP instruction. + assert((!IndVar) && isBDNZ(Cmp.getOpcode()) && "Expecting a CTR loop"); + MachineFunction *MF = MBB.getParent(); + DebugLoc DL = Cmp.getDebugLoc(); + MachineInstr *Loop = findLoopInstr(PreHeader); + if (!Loop) + return 0; + unsigned LoopCountReg = Loop->getOperand(0).getReg(); + MachineRegisterInfo &MRI = MF->getRegInfo(); + MachineInstr *LoopCount = MRI.getUniqueVRegDef(LoopCountReg); + + if (!LoopCount) + return 0; + // If the loop trip count is a compile-time value, then just change the + // value. + if (LoopCount->getOpcode() == PPC::LI8 || LoopCount->getOpcode() == PPC::LI) { + int64_t Offset = LoopCount->getOperand(1).getImm(); + if (Offset <= 1) { + LoopCount->eraseFromParent(); + Loop->eraseFromParent(); + return 0; + } + LoopCount->getOperand(1).setImm(Offset - 1); + return Offset - 1; + } + + // The loop trip count is a run-time value. + // We need to subtract one from the trip count, + // and insert branch later to check if we're done with the loop. + + // Since BDZ/BDZ8 that we will insert will also decrease the ctr by 1, + // so we don't need to generate any thing here. + Cond.push_back(MachineOperand::CreateImm(0)); + Cond.push_back(MachineOperand::CreateReg( + Subtarget.isPPC64() ? PPC::CTR8 : PPC::CTR, true)); + return LoopCountReg; +} + +// Return true if get the base operand, byte offset of an instruction and the +// memory width. Width is the size of memory that is being loaded/stored. +bool PPCInstrInfo::getMemOperandWithOffsetWidth( + const MachineInstr &LdSt, + const MachineOperand *&BaseReg, + int64_t &Offset, + unsigned &Width, + const TargetRegisterInfo *TRI) const { + assert(LdSt.mayLoadOrStore() && "Expected a memory operation."); + + // Handle only loads/stores with base register followed by immediate offset. + if (LdSt.getNumExplicitOperands() != 3) + return false; + if (!LdSt.getOperand(1).isImm() || !LdSt.getOperand(2).isReg()) + return false; + + if (!LdSt.hasOneMemOperand()) + return false; + + Width = (*LdSt.memoperands_begin())->getSize(); + Offset = LdSt.getOperand(1).getImm(); + BaseReg = &LdSt.getOperand(2); + return true; +} + +bool PPCInstrInfo::areMemAccessesTriviallyDisjoint( + const MachineInstr &MIa, const MachineInstr &MIb, + AliasAnalysis * /*AA*/) const { + assert(MIa.mayLoadOrStore() && "MIa must be a load or store."); + assert(MIb.mayLoadOrStore() && "MIb must be a load or store."); + + if (MIa.hasUnmodeledSideEffects() || MIb.hasUnmodeledSideEffects() || + MIa.hasOrderedMemoryRef() || MIb.hasOrderedMemoryRef()) + return false; + + // Retrieve the base register, offset from the base register and width. Width + // is the size of memory that is being loaded/stored (e.g. 1, 2, 4). If + // base registers are identical, and the offset of a lower memory access + + // the width doesn't overlap the offset of a higher memory access, + // then the memory accesses are different. + const TargetRegisterInfo *TRI = &getRegisterInfo(); + const MachineOperand *BaseOpA = nullptr, *BaseOpB = nullptr; + int64_t OffsetA = 0, OffsetB = 0; + unsigned int WidthA = 0, WidthB = 0; + if (getMemOperandWithOffsetWidth(MIa, BaseOpA, OffsetA, WidthA, TRI) && + getMemOperandWithOffsetWidth(MIb, BaseOpB, OffsetB, WidthB, TRI)) { + if (BaseOpA->isIdenticalTo(*BaseOpB)) { + int LowOffset = std::min(OffsetA, OffsetB); + int HighOffset = std::max(OffsetA, OffsetB); + int LowWidth = (LowOffset == OffsetA) ? WidthA : WidthB; + if (LowOffset + LowWidth <= HighOffset) + return true; + } + } + return false; +} diff --git a/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.h b/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.h index 7ed558b835af..70fb757e8f1e 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.h +++ b/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.h @@ -1,9 +1,8 @@ //===-- PPCInstrInfo.h - PowerPC Instruction Information --------*- C++ -*-===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -14,7 +13,6 @@ #ifndef LLVM_LIB_TARGET_POWERPC_PPCINSTRINFO_H #define LLVM_LIB_TARGET_POWERPC_PPCINSTRINFO_H -#include "PPC.h" #include "PPCRegisterInfo.h" #include "llvm/CodeGen/TargetInstrInfo.h" @@ -66,9 +64,6 @@ enum { /// Shift count to bypass PPC970 flags NewDef_Shift = 6, - /// The VSX instruction that uses VSX register (vs0-vs63), instead of VMX - /// register (v0-v31). - UseVSXReg = 0x1 << NewDef_Shift, /// This instruction is an X-Form memory operation. XFormMemOp = 0x1 << (NewDef_Shift+1) }; @@ -129,12 +124,12 @@ class PPCInstrInfo : public PPCGenInstrInfo { // If the inst has imm-form and one of its operand is produced by a LI, // put the imm into the inst directly and remove the LI if possible. bool transformToImmFormFedByLI(MachineInstr &MI, const ImmInstrInfo &III, - unsigned ConstantOpNo, int64_t Imm) const; + unsigned ConstantOpNo, MachineInstr &DefMI, + int64_t Imm) const; // If the inst has imm-form and one of its operand is produced by an // add-immediate, try to transform it when possible. bool transformToImmFormFedByAdd(MachineInstr &MI, const ImmInstrInfo &III, - unsigned ConstantOpNo, - MachineInstr &DefMI, + unsigned ConstantOpNo, MachineInstr &DefMI, bool KillDefMI) const; // Try to find that, if the instruction 'MI' contains any operand that // could be forwarded from some inst that feeds it. If yes, return the @@ -159,8 +154,8 @@ class PPCInstrInfo : public PPCGenInstrInfo { int64_t &Imm) const; bool isRegElgibleForForwarding(const MachineOperand &RegMO, const MachineInstr &DefMI, - const MachineInstr &MI, - bool KillDefMI) const; + const MachineInstr &MI, bool KillDefMI, + bool &IsFwdFeederRegKilled) const; const unsigned *getStoreOpcodesForSpillArray() const; const unsigned *getLoadOpcodesForSpillArray() const; virtual void anchor(); @@ -362,6 +357,22 @@ public: unsigned SrcReg2, int Mask, int Value, const MachineRegisterInfo *MRI) const override; + + /// Return true if get the base operand, byte offset of an instruction and + /// the memory width. Width is the size of memory that is being + /// loaded/stored (e.g. 1, 2, 4, 8). + bool getMemOperandWithOffsetWidth(const MachineInstr &LdSt, + const MachineOperand *&BaseOp, + int64_t &Offset, unsigned &Width, + const TargetRegisterInfo *TRI) const; + + /// Return true if two MIs access different memory addresses and false + /// otherwise + bool + areMemAccessesTriviallyDisjoint(const MachineInstr &MIa, + const MachineInstr &MIb, + AliasAnalysis *AA = nullptr) const override; + /// GetInstSize - Return the number of bytes of code the specified /// instruction may be. This returns the maximum number of bytes. /// @@ -412,6 +423,18 @@ public: bool convertToImmediateForm(MachineInstr &MI, MachineInstr **KilledDef = nullptr) const; + + /// Fixup killed/dead flag for register \p RegNo between instructions [\p + /// StartMI, \p EndMI]. Some PostRA transformations may violate register + /// killed/dead flags semantics, this function can be called to fix up. Before + /// calling this function, + /// 1. Ensure that \p RegNo liveness is killed after instruction \p EndMI. + /// 2. Ensure that there is no new definition between (\p StartMI, \p EndMI) + /// and possible definition for \p RegNo is \p StartMI or \p EndMI. + /// 3. Ensure that all instructions between [\p StartMI, \p EndMI] are in same + /// basic block. + void fixupIsDeadOrKill(MachineInstr &StartMI, MachineInstr &EndMI, + unsigned RegNo) const; void replaceInstrWithLI(MachineInstr &MI, const LoadImmediateInfo &LII) const; void replaceInstrOperandWithImm(MachineInstr &MI, unsigned OpNo, int64_t Imm) const; @@ -429,14 +452,55 @@ public: /// operands). static unsigned getRegNumForOperand(const MCInstrDesc &Desc, unsigned Reg, unsigned OpNo) { - if (Desc.TSFlags & PPCII::UseVSXReg) { - if (isVRRegister(Reg)) - Reg = PPC::VSX32 + (Reg - PPC::V0); - else if (isVFRegister(Reg)) - Reg = PPC::VSX32 + (Reg - PPC::VF0); + int16_t regClass = Desc.OpInfo[OpNo].RegClass; + switch (regClass) { + // We store F0-F31, VF0-VF31 in MCOperand and it should be F0-F31, + // VSX32-VSX63 during encoding/disassembling + case PPC::VSSRCRegClassID: + case PPC::VSFRCRegClassID: + if (isVFRegister(Reg)) + return PPC::VSX32 + (Reg - PPC::VF0); + break; + // We store VSL0-VSL31, V0-V31 in MCOperand and it should be VSL0-VSL31, + // VSX32-VSX63 during encoding/disassembling + case PPC::VSRCRegClassID: + if (isVRRegister(Reg)) + return PPC::VSX32 + (Reg - PPC::V0); + break; + // Other RegClass doesn't need mapping + default: + break; } return Reg; } + + /// Check \p Opcode is BDNZ (Decrement CTR and branch if it is still nonzero). + bool isBDNZ(unsigned Opcode) const; + + /// Find the hardware loop instruction used to set-up the specified loop. + /// On PPC, we have two instructions used to set-up the hardware loop + /// (MTCTRloop, MTCTR8loop) with corresponding endloop (BDNZ, BDNZ8) + /// instructions to indicate the end of a loop. + MachineInstr *findLoopInstr(MachineBasicBlock &PreHeader) const; + + /// Analyze the loop code to find the loop induction variable and compare used + /// to compute the number of iterations. Currently, we analyze loop that are + /// controlled using hardware loops. In this case, the induction variable + /// instruction is null. For all other cases, this function returns true, + /// which means we're unable to analyze it. \p IndVarInst and \p CmpInst will + /// return new values when we can analyze the readonly loop \p L, otherwise, + /// nothing got changed + bool analyzeLoop(MachineLoop &L, MachineInstr *&IndVarInst, + MachineInstr *&CmpInst) const override; + /// Generate code to reduce the loop iteration by one and check if the loop + /// is finished. Return the value/register of the new loop count. We need + /// this function when peeling off one or more iterations of a loop. This + /// function assumes the last iteration is peeled first. + unsigned reduceLoopCount(MachineBasicBlock &MBB, MachineBasicBlock &PreHeader, + MachineInstr *IndVar, MachineInstr &Cmp, + SmallVectorImpl<MachineOperand> &Cond, + SmallVectorImpl<MachineInstr *> &PrevInsts, + unsigned Iter, unsigned MaxIter) const override; }; } diff --git a/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.td b/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.td index 77aa4fe3d415..c313337047f0 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.td +++ b/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.td @@ -1,9 +1,8 @@ //===-- PPCInstrInfo.td - The PowerPC Instruction Set ------*- tablegen -*-===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -231,6 +230,18 @@ def PPCbuild_fp128: SDNode<"PPCISD::BUILD_FP128", SDTCisSameAs<1,2>]>, []>; +def PPCbuild_spe64: SDNode<"PPCISD::BUILD_SPE64", + SDTypeProfile<1, 2, + [SDTCisVT<0, f64>, SDTCisVT<1,i32>, + SDTCisVT<1,i32>]>, + []>; + +def PPCextract_spe : SDNode<"PPCISD::EXTRACT_SPE", + SDTypeProfile<1, 2, + [SDTCisVT<0, i32>, SDTCisVT<1, f64>, + SDTCisPtrTy<2>]>, + []>; + // These are target-independent nodes, but have target-specific formats. def callseq_start : SDNode<"ISD::CALLSEQ_START", SDT_PPCCallSeqStart, [SDNPHasChain, SDNPOutGlue]>; @@ -458,6 +469,17 @@ def nonQuadwOffsetStore : PatFrag<(ops node:$val, node:$ptr), return !isOffsetMultipleOf(N, 16); }]>; +// PatFrag for binary operation whose operands are both non-constant +class BinOpWithoutSImm16Operand<SDNode opcode> : + PatFrag<(ops node:$left, node:$right), (opcode node:$left, node:$right), [{ + int16_t Imm; + return !isIntS16Immediate(N->getOperand(0), Imm) + && !isIntS16Immediate(N->getOperand(1), Imm); +}]>; + +def add_without_simm16 : BinOpWithoutSImm16Operand<add>; +def mul_without_simm16 : BinOpWithoutSImm16Operand<mul>; + //===----------------------------------------------------------------------===// // PowerPC Flag Definitions. @@ -546,10 +568,6 @@ def PPCRegCRRCAsmOperand : AsmOperandClass { def crrc : RegisterOperand<CRRC> { let ParserMatchClass = PPCRegCRRCAsmOperand; } -def crrc0 : RegisterOperand<CRRC0> { - let ParserMatchClass = PPCRegCRRCAsmOperand; -} - def PPCRegSPERCAsmOperand : AsmOperandClass { let Name = "RegSPERC"; let PredicateMethod = "isRegNumber"; } @@ -883,11 +901,24 @@ def pred : Operand<OtherVT> { } // Define PowerPC specific addressing mode. -def iaddr : ComplexPattern<iPTR, 2, "SelectAddrImm", [], []>; -def xaddr : ComplexPattern<iPTR, 2, "SelectAddrIdx", [], []>; + +// d-form +def iaddr : ComplexPattern<iPTR, 2, "SelectAddrImm", [], []>; // "stb" +// ds-form +def iaddrX4 : ComplexPattern<iPTR, 2, "SelectAddrImmX4", [], []>; // "std" +// dq-form +def iaddrX16 : ComplexPattern<iPTR, 2, "SelectAddrImmX16", [], []>; // "stxv" + +// Below forms are all x-form addressing mode, use three different ones so we +// can make a accurate check for x-form instructions in ISEL. +// x-form addressing mode whose associated diplacement form is D. +def xaddr : ComplexPattern<iPTR, 2, "SelectAddrIdx", [], []>; // "stbx" +// x-form addressing mode whose associated diplacement form is DS. +def xaddrX4 : ComplexPattern<iPTR, 2, "SelectAddrIdxX4", [], []>; // "stdx" +// x-form addressing mode whose associated diplacement form is DQ. +def xaddrX16 : ComplexPattern<iPTR, 2, "SelectAddrIdxX16", [], []>; // "stxvx" + def xoaddr : ComplexPattern<iPTR, 2, "SelectAddrIdxOnly",[], []>; -def ixaddr : ComplexPattern<iPTR, 2, "SelectAddrImmX4", [], []>; // "std" -def iqaddr : ComplexPattern<iPTR, 2, "SelectAddrImmX16", [], []>; // "stxv" // The address in a single register. This is used with the SjLj // pseudo-instructions. @@ -1311,6 +1342,15 @@ let isTerminator = 1, isBarrier = 1, PPC970_Unit = 7 in { } } +// Set the float rounding mode. +let Uses = [RM], Defs = [RM] in { +def SETRNDi : PPCCustomInserterPseudo<(outs f8rc:$FRT), (ins u2imm:$RND), + "#SETRNDi", [(set f64:$FRT, (int_ppc_setrnd (i32 imm:$RND)))]>; + +def SETRND : PPCCustomInserterPseudo<(outs f8rc:$FRT), (ins gprc:$in), + "#SETRND", [(set f64:$FRT, (int_ppc_setrnd gprc :$in))]>; +} + let Defs = [LR] in def MovePCtoLR : PPCEmitTimePseudo<(outs), (ins), "#MovePCtoLR", []>, PPC970_Unit_BRU; @@ -1437,6 +1477,9 @@ let isCall = 1, PPC970_Unit = 7, Defs = [LR] in { def BCLn : BForm_4<16, 4, 0, 1, (outs), (ins crbitrc:$bi, condbrtarget:$dst), "bcl 4, $bi, $dst">; + def BL_NOP : IForm_and_DForm_4_zero<18, 0, 1, 24, + (outs), (ins calltarget:$func), + "bl $func\n\tnop", IIC_BrB, []>; } } let Uses = [CTR, RM] in { @@ -2514,6 +2557,7 @@ def CRORC : XLForm_1<19, 417, (outs crbitrc:$CRD), [(set i1:$CRD, (or i1:$CRA, (not i1:$CRB)))]>; let isCodeGenOnly = 1 in { +let isReMaterializable = 1, isAsCheapAsAMove = 1 in { def CRSET : XLForm_1_ext<19, 289, (outs crbitrc:$dst), (ins), "creqv $dst, $dst, $dst", IIC_BrCR, [(set i1:$dst, 1)]>; @@ -2521,6 +2565,7 @@ def CRSET : XLForm_1_ext<19, 289, (outs crbitrc:$dst), (ins), def CRUNSET: XLForm_1_ext<19, 193, (outs crbitrc:$dst), (ins), "crxor $dst, $dst, $dst", IIC_BrCR, [(set i1:$dst, 0)]>; +} let Defs = [CR1EQ], CRD = 6 in { def CR6SET : XLForm_1_ext<19, 289, (outs), (ins), @@ -2568,7 +2613,7 @@ def MTCTR : XFXForm_7_ext<31, 467, 9, (outs), (ins gprc:$rS), PPC970_DGroup_First, PPC970_Unit_FXU; } let hasSideEffects = 1, isCodeGenOnly = 1, Defs = [CTR] in { -let Pattern = [(int_ppc_mtctr i32:$rS)] in +let Pattern = [(int_set_loop_iterations i32:$rS)] in def MTCTRloop : XFXForm_7_ext<31, 467, 9, (outs), (ins gprc:$rS), "mtctr $rS", IIC_SprMTSPR>, PPC970_DGroup_First, PPC970_Unit_FXU; @@ -2995,9 +3040,16 @@ def : Pat<(and (rotl i32:$in, i32:$sh), maskimm32:$imm), // Calls def : Pat<(PPCcall (i32 tglobaladdr:$dst)), (BL tglobaladdr:$dst)>; + def : Pat<(PPCcall (i32 texternalsym:$dst)), (BL texternalsym:$dst)>; +// Calls for AIX only +def : Pat<(PPCcall (i32 mcsym:$dst)), + (BL mcsym:$dst)>; +def : Pat<(PPCcall_nop (i32 mcsym:$dst)), + (BL_NOP mcsym:$dst)>; + def : Pat<(PPCtc_return (i32 tglobaladdr:$dst), imm:$imm), (TCRETURNdi tglobaladdr:$dst, imm:$imm)>; @@ -4073,6 +4125,10 @@ def SLBMFEV : XLForm_1_gen<31, 851, (outs gprc:$RT), (ins gprc:$RB), def SLBIA : XForm_0<31, 498, (outs), (ins), "slbia", IIC_SprSLBIA, []>; +let Defs = [CR0] in +def SLBFEEo : XForm_26<31, 979, (outs gprc:$RT), (ins gprc:$RB), + "slbfee. $RT, $RB", IIC_SprSLBFEE, []>, isDOT; + def TLBIA : XForm_0<31, 370, (outs), (ins), "tlbia", IIC_SprTLBIA, []>; diff --git a/contrib/llvm/lib/Target/PowerPC/PPCInstrQPX.td b/contrib/llvm/lib/Target/PowerPC/PPCInstrQPX.td index ef589ad01fd7..d67041d46d9f 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCInstrQPX.td +++ b/contrib/llvm/lib/Target/PowerPC/PPCInstrQPX.td @@ -1,9 +1,8 @@ //===- PPCInstrQPX.td - The PowerPC QPX Extension --*- tablegen -*-===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // diff --git a/contrib/llvm/lib/Target/PowerPC/PPCInstrSPE.td b/contrib/llvm/lib/Target/PowerPC/PPCInstrSPE.td index 9f5891a45f22..935c3044ae47 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCInstrSPE.td +++ b/contrib/llvm/lib/Target/PowerPC/PPCInstrSPE.td @@ -1,9 +1,8 @@ //=======-- PPCInstrSPE.td - The PowerPC SPE Extension -*- tablegen -*-=======// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -512,7 +511,7 @@ def EVLWWSPLATX : EVXForm_1<792, (outs sperc:$RT), (ins memrr:$src), def EVMERGEHI : EVXForm_1<556, (outs sperc:$RT), (ins sperc:$RA, sperc:$RB), "evmergehi $RT, $RA, $RB", IIC_VecGeneral, []>; -def EVMERGELO : EVXForm_1<557, (outs sperc:$RT), (ins sperc:$RA, sperc:$RB), +def EVMERGELO : EVXForm_1<557, (outs sperc:$RT), (ins gprc:$RA, gprc:$RB), "evmergelo $RT, $RA, $RB", IIC_VecGeneral, []>; def EVMERGEHILO : EVXForm_1<558, (outs sperc:$RT), (ins sperc:$RA, sperc:$RB), "evmergehilo $RT, $RA, $RB", IIC_VecGeneral, []>; @@ -887,4 +886,14 @@ def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETUGT)), (SELECT_SPE (CRANDC $lhs, $rhs), $tval, $fval)>; def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETNE)), (SELECT_SPE (CRXOR $lhs, $rhs), $tval, $fval)>; + + +def : Pat<(f64 (PPCbuild_spe64 i32:$rB, i32:$rA)), + (f64 (COPY_TO_REGCLASS (EVMERGELO $rA, $rB), SPERC))>; + +def : Pat<(i32 (PPCextract_spe f64:$rA, 1)), + (i32 (EXTRACT_SUBREG (EVMERGEHI $rA, $rA), sub_32))>; +def : Pat<(i32 (PPCextract_spe f64:$rA, 0)), + (i32 (EXTRACT_SUBREG $rA, sub_32))>; + } diff --git a/contrib/llvm/lib/Target/PowerPC/PPCInstrVSX.td b/contrib/llvm/lib/Target/PowerPC/PPCInstrVSX.td index 0f073388dc74..07f38a61d098 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCInstrVSX.td +++ b/contrib/llvm/lib/Target/PowerPC/PPCInstrVSX.td @@ -1,9 +1,8 @@ //===- PPCInstrVSX.td - The PowerPC VSX Extension --*- tablegen -*-===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -54,6 +53,15 @@ def PPCRegSPILLTOVSRRCAsmOperand : AsmOperandClass { def spilltovsrrc : RegisterOperand<SPILLTOVSRRC> { let ParserMatchClass = PPCRegSPILLTOVSRRCAsmOperand; } + +def SDT_PPCldvsxlh : SDTypeProfile<1, 1, [ + SDTCisVT<0, v4f32>, SDTCisPtrTy<1> +]>; + +def SDT_PPCfpextlh : SDTypeProfile<1, 1, [ + SDTCisVT<0, v2f64>, SDTCisVT<1, v4f32> +]>; + // Little-endian-specific nodes. def SDT_PPClxvd2x : SDTypeProfile<1, 1, [ SDTCisVT<0, v2f64>, SDTCisPtrTy<1> @@ -85,6 +93,10 @@ def PPCuvec2fp: SDNode<"PPCISD::UINT_VEC_TO_FP", SDTVecConv, []>; def PPCswapNoChain : SDNode<"PPCISD::SWAP_NO_CHAIN", SDT_PPCxxswapd>; def PPCvabsd : SDNode<"PPCISD::VABSD", SDTVabsd, []>; +def PPCfpextlh : SDNode<"PPCISD::FP_EXTEND_LH", SDT_PPCfpextlh, []>; +def PPCldvsxlh : SDNode<"PPCISD::LD_VSX_LH", SDT_PPCldvsxlh, + [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>; + multiclass XX3Form_Rcr<bits<6> opcode, bits<7> xo, string asmbase, string asmstr, InstrItinClass itin, Intrinsic Int, ValueType OutTy, ValueType InTy> { @@ -124,7 +136,6 @@ def HasOnlySwappingMemOps : Predicate<"!PPCSubTarget->hasP9Vector()">; let Predicates = [HasVSX] in { let AddedComplexity = 400 in { // Prefer VSX patterns over non-VSX patterns. -let UseVSXReg = 1 in { let hasSideEffects = 0 in { // VSX instructions don't have side effects. let Uses = [RM] in { @@ -841,12 +852,12 @@ let Uses = [RM] in { "xxlxor $XT, $XA, $XB", IIC_VecGeneral, [(set v4i32:$XT, (xor v4i32:$XA, v4i32:$XB))]>; } // isCommutable - let isCodeGenOnly = 1 in - def XXLXORz : XX3Form_Zero<60, 154, (outs vsrc:$XT), (ins), + + let isCodeGenOnly = 1, isMoveImm = 1, isAsCheapAsAMove = 1, + isReMaterializable = 1 in { + def XXLXORz : XX3Form_Zero<60, 154, (outs vsrc:$XT), (ins), "xxlxor $XT, $XT, $XT", IIC_VecGeneral, [(set v4i32:$XT, (v4i32 immAllZerosV))]>; - - let isCodeGenOnly = 1 in { def XXLXORdpz : XX3Form_SetZero<60, 154, (outs vsfrc:$XT), (ins), "xxlxor $XT, $XT, $XT", IIC_VecGeneral, @@ -895,11 +906,10 @@ let Uses = [RM] in { (PPCxxsplt v4i32:$XB, imm32SExt16:$UIM))]>; let isCodeGenOnly = 1 in def XXSPLTWs : XX2Form_2<60, 164, - (outs vsrc:$XT), (ins vfrc:$XB, u2imm:$UIM), + (outs vsrc:$XT), (ins vsfrc:$XB, u2imm:$UIM), "xxspltw $XT, $XB, $UIM", IIC_VecPerm, []>; } // hasSideEffects -} // UseVSXReg = 1 // SELECT_CC_* - Used to implement the SELECT_CC DAG operation. Expanded after // instruction selection into a branch sequence. @@ -961,6 +971,10 @@ let AddedComplexity = 400 in { // Prefer VSX patterns over non-VSX patterns. def : Pat<(v4i32 (vnot_ppc v4i32:$A)), (v4i32 (XXLNOR $A, $A))>; +def : Pat<(v4i32 (or (and (vnot_ppc v4i32:$C), v4i32:$A), + (and v4i32:$B, v4i32:$C))), + (v4i32 (XXSEL $A, $B, $C))>; + let Predicates = [IsBigEndian] in { def : Pat<(v2f64 (scalar_to_vector f64:$A)), (v2f64 (SUBREG_TO_REG (i64 1), $A, sub_64))>; @@ -1063,6 +1077,8 @@ def : Pat<(v2f64 (PPCuvec2fp v4i32:$C, 0)), def : Pat<(v2f64 (PPCuvec2fp v4i32:$C, 1)), (v2f64 (XVCVUXWDP (v2i64 (XXMRGLW $C, $C))))>; +def : Pat<(v2f64 (PPCfpextlh v4f32:$C)), (XVCVSPDP (XXMRGHW $C, $C))>; + // Loads. let Predicates = [HasVSX, HasOnlySwappingMemOps] in { def : Pat<(v2f64 (PPClxvd2x xoaddr:$src)), (LXVD2X xoaddr:$src)>; @@ -1176,6 +1192,15 @@ def : Pat<(vselect v4i32:$vA, v4f32:$vB, v4f32:$vC), def : Pat<(vselect v2i64:$vA, v2f64:$vB, v2f64:$vC), (XXSEL $vC, $vB, $vA)>; +def : Pat<(v4f32 (fmaxnum v4f32:$src1, v4f32:$src2)), + (v4f32 (XVMAXSP $src1, $src2))>; +def : Pat<(v4f32 (fminnum v4f32:$src1, v4f32:$src2)), + (v4f32 (XVMINSP $src1, $src2))>; +def : Pat<(v2f64 (fmaxnum v2f64:$src1, v2f64:$src2)), + (v2f64 (XVMAXDP $src1, $src2))>; +def : Pat<(v2f64 (fminnum v2f64:$src1, v2f64:$src2)), + (v2f64 (XVMINDP $src1, $src2))>; + let Predicates = [IsLittleEndian] in { def : Pat<(f64 (PPCfcfid (PPCmtvsra (i64 (vector_extract v2i64:$S, 0))))), (f64 (XSCVSXDDP (COPY_TO_REGCLASS (XXPERMDI $S, $S, 2), VSFRC)))>; @@ -1248,7 +1273,7 @@ def HasDirectMove : Predicate<"PPCSubTarget->hasDirectMove()">; def NoP9Vector : Predicate<"!PPCSubTarget->hasP9Vector()">; let Predicates = [HasP8Vector] in { let AddedComplexity = 400 in { // Prefer VSX patterns over non-VSX patterns. - let isCommutable = 1, UseVSXReg = 1 in { + let isCommutable = 1 in { def XXLEQV : XX3Form<60, 186, (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB), "xxleqv $XT, $XA, $XB", IIC_VecGeneral, @@ -1258,12 +1283,11 @@ let AddedComplexity = 400 in { // Prefer VSX patterns over non-VSX patterns. "xxlnand $XT, $XA, $XB", IIC_VecGeneral, [(set v4i32:$XT, (vnot_ppc (and v4i32:$XA, v4i32:$XB)))]>; - } // isCommutable, UseVSXReg + } // isCommutable def : Pat<(int_ppc_vsx_xxleqv v4i32:$A, v4i32:$B), (XXLEQV $A, $B)>; - let UseVSXReg = 1 in { def XXLORC : XX3Form<60, 170, (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB), "xxlorc $XT, $XA, $XB", IIC_VecGeneral, @@ -1312,7 +1336,6 @@ let AddedComplexity = 400 in { // Prefer VSX patterns over non-VSX patterns. "#STIWX", [(PPCstfiwx f64:$XT, xoaddr:$dst)]>; } // mayStore - } // UseVSXReg = 1 def : Pat<(f64 (extloadf32 xoaddr:$src)), (COPY_TO_REGCLASS (XFLOADf32 xoaddr:$src), VSFRC)>; @@ -1342,7 +1365,6 @@ let AddedComplexity = 400 in { // Prefer VSX patterns over non-VSX patterns. def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETNE)), (SELECT_VSSRC (CRXOR $lhs, $rhs), $tval, $fval)>; - let UseVSXReg = 1 in { // VSX Elementary Scalar FP arithmetic (SP) let isCommutable = 1 in { def XSADDSP : XX3Form<60, 0, @@ -1354,7 +1376,10 @@ let AddedComplexity = 400 in { // Prefer VSX patterns over non-VSX patterns. "xsmulsp $XT, $XA, $XB", IIC_VecFP, [(set f32:$XT, (fmul f32:$XA, f32:$XB))]>; } // isCommutable - + def XSSUBSP : XX3Form<60, 8, + (outs vssrc:$XT), (ins vssrc:$XA, vssrc:$XB), + "xssubsp $XT, $XA, $XB", IIC_VecFP, + [(set f32:$XT, (fsub f32:$XA, f32:$XB))]>; def XSDIVSP : XX3Form<60, 24, (outs vssrc:$XT), (ins vssrc:$XA, vssrc:$XB), "xsdivsp $XT, $XA, $XB", IIC_FPDivS, @@ -1374,10 +1399,6 @@ let AddedComplexity = 400 in { // Prefer VSX patterns over non-VSX patterns. (outs vssrc:$XT), (ins vssrc:$XB), "xsrsqrtesp $XT, $XB", IIC_VecFP, [(set f32:$XT, (PPCfrsqrte f32:$XB))]>; - def XSSUBSP : XX3Form<60, 8, - (outs vssrc:$XT), (ins vssrc:$XA, vssrc:$XB), - "xssubsp $XT, $XA, $XB", IIC_VecFP, - [(set f32:$XT, (fsub f32:$XA, f32:$XB))]>; // FMA Instructions let BaseName = "XSMADDASP" in { @@ -1470,7 +1491,6 @@ let AddedComplexity = 400 in { // Prefer VSX patterns over non-VSX patterns. "xscvdpspn $XT, $XB", IIC_VecFP, []>; def XSCVSPDPN : XX2Form<60, 331, (outs vssrc:$XT), (ins vsrc:$XB), "xscvspdpn $XT, $XB", IIC_VecFP, []>; - } // UseVSXReg = 1 let Predicates = [IsLittleEndian] in { def : Pat<DWToSPExtractConv.El0SS1, @@ -1514,10 +1534,22 @@ let AddedComplexity = 400 in { // Prefer VSX patterns over non-VSX patterns. (f64 (PPCcv_fp_to_uint_in_vsr f64:$src)), xoaddr:$dst, 4), (STIWX (XSCVDPUXWS f64:$src), xoaddr:$dst)>; + def : Pat<(v2i64 (smax v2i64:$src1, v2i64:$src2)), + (v2i64 (VMAXSD (COPY_TO_REGCLASS $src1, VRRC), + (COPY_TO_REGCLASS $src2, VRRC)))>; + def : Pat<(v2i64 (umax v2i64:$src1, v2i64:$src2)), + (v2i64 (VMAXUD (COPY_TO_REGCLASS $src1, VRRC), + (COPY_TO_REGCLASS $src2, VRRC)))>; + def : Pat<(v2i64 (smin v2i64:$src1, v2i64:$src2)), + (v2i64 (VMINSD (COPY_TO_REGCLASS $src1, VRRC), + (COPY_TO_REGCLASS $src2, VRRC)))>; + def : Pat<(v2i64 (umin v2i64:$src1, v2i64:$src2)), + (v2i64 (VMINUD (COPY_TO_REGCLASS $src1, VRRC), + (COPY_TO_REGCLASS $src2, VRRC)))>; } // AddedComplexity = 400 } // HasP8Vector -let UseVSXReg = 1, AddedComplexity = 400 in { +let AddedComplexity = 400 in { let Predicates = [HasDirectMove] in { // VSX direct move instructions def MFVSRD : XX1_RS6_RD5_XO<31, 51, (outs g8rc:$rA), (ins vsfrc:$XT), @@ -1525,7 +1557,7 @@ let Predicates = [HasDirectMove] in { [(set i64:$rA, (PPCmfvsr f64:$XT))]>, Requires<[In64BitMode]>; let isCodeGenOnly = 1 in - def MFVRD : XX1_RS6_RD5_XO<31, 51, (outs g8rc:$rA), (ins vrrc:$XT), + def MFVRD : XX1_RS6_RD5_XO<31, 51, (outs g8rc:$rA), (ins vsrc:$XT), "mfvsrd $rA, $XT", IIC_VecGeneral, []>, Requires<[In64BitMode]>; @@ -1557,7 +1589,7 @@ let Predicates = [IsISA3_0, HasDirectMove] in { []>, Requires<[In64BitMode]>; } // IsISA3_0, HasDirectMove -} // UseVSXReg = 1 +} // AddedComplexity = 400 // We want to parse this from asm, but we don't want to emit this as it would // be emitted with a VSX reg. So leave Emit = 0 here. @@ -2415,7 +2447,6 @@ let AddedComplexity = 400, Predicates = [HasP9Vector] in { list<dag> pattern> : X_VT5_XO5_VB5_VSFR<opcode, xo2, xo, opc, pattern>, isDOT; - let UseVSXReg = 1 in { // [PO T XO B XO BX /] class XX2_RT5_XO5_XB6<bits<6> opcode, bits<5> xo2, bits<9> xo, string opc, list<dag> pattern> @@ -2434,7 +2465,6 @@ let AddedComplexity = 400, Predicates = [HasP9Vector] in { InstrItinClass itin, list<dag> pattern> : XX3Form<opcode, xo, (outs xty:$XT), (ins aty:$XA, bty:$XB), !strconcat(opc, " $XT, $XA, $XB"), itin, pattern>; - } // UseVSXReg = 1 // [PO VRT VRA VRB XO /] class X_VT5_VA5_VB5<bits<6> opcode, bits<10> xo, string opc, @@ -2482,69 +2512,70 @@ let AddedComplexity = 400, Predicates = [HasP9Vector] in { let isCommutable = 1 in { def XSADDQP : X_VT5_VA5_VB5 <63, 4, "xsaddqp", [(set f128:$vT, (fadd f128:$vA, f128:$vB))]>; + def XSMULQP : X_VT5_VA5_VB5 <63, 36, "xsmulqp", + [(set f128:$vT, (fmul f128:$vA, f128:$vB))]>; + } + def XSSUBQP : X_VT5_VA5_VB5 <63, 516, "xssubqp" , + [(set f128:$vT, (fsub f128:$vA, f128:$vB))]>; + def XSDIVQP : X_VT5_VA5_VB5 <63, 548, "xsdivqp", + [(set f128:$vT, (fdiv f128:$vA, f128:$vB))]>; + // Square-Root + def XSSQRTQP : X_VT5_XO5_VB5 <63, 27, 804, "xssqrtqp", + [(set f128:$vT, (fsqrt f128:$vB))]>; + // (Negative) Multiply-{Add/Subtract} + def XSMADDQP : X_VT5_VA5_VB5_FMA <63, 388, "xsmaddqp", + [(set f128:$vT, + (fma f128:$vA, f128:$vB, + f128:$vTi))]>; + def XSMSUBQP : X_VT5_VA5_VB5_FMA <63, 420, "xsmsubqp" , + [(set f128:$vT, + (fma f128:$vA, f128:$vB, + (fneg f128:$vTi)))]>; + def XSNMADDQP : X_VT5_VA5_VB5_FMA <63, 452, "xsnmaddqp", + [(set f128:$vT, + (fneg (fma f128:$vA, f128:$vB, + f128:$vTi)))]>; + def XSNMSUBQP : X_VT5_VA5_VB5_FMA <63, 484, "xsnmsubqp", + [(set f128:$vT, + (fneg (fma f128:$vA, f128:$vB, + (fneg f128:$vTi))))]>; + + let isCommutable = 1 in { def XSADDQPO : X_VT5_VA5_VB5_Ro<63, 4, "xsaddqpo", [(set f128:$vT, (int_ppc_addf128_round_to_odd f128:$vA, f128:$vB))]>; - def XSMULQP : X_VT5_VA5_VB5 <63, 36, "xsmulqp", - [(set f128:$vT, (fmul f128:$vA, f128:$vB))]>; def XSMULQPO : X_VT5_VA5_VB5_Ro<63, 36, "xsmulqpo", [(set f128:$vT, (int_ppc_mulf128_round_to_odd f128:$vA, f128:$vB))]>; } - - def XSSUBQP : X_VT5_VA5_VB5 <63, 516, "xssubqp" , - [(set f128:$vT, (fsub f128:$vA, f128:$vB))]>; def XSSUBQPO : X_VT5_VA5_VB5_Ro<63, 516, "xssubqpo", [(set f128:$vT, (int_ppc_subf128_round_to_odd f128:$vA, f128:$vB))]>; - def XSDIVQP : X_VT5_VA5_VB5 <63, 548, "xsdivqp", - [(set f128:$vT, (fdiv f128:$vA, f128:$vB))]>; def XSDIVQPO : X_VT5_VA5_VB5_Ro<63, 548, "xsdivqpo", [(set f128:$vT, (int_ppc_divf128_round_to_odd f128:$vA, f128:$vB))]>; - - // Square-Root - def XSSQRTQP : X_VT5_XO5_VB5 <63, 27, 804, "xssqrtqp", - [(set f128:$vT, (fsqrt f128:$vB))]>; def XSSQRTQPO : X_VT5_XO5_VB5_Ro<63, 27, 804, "xssqrtqpo", [(set f128:$vT, (int_ppc_sqrtf128_round_to_odd f128:$vB))]>; - // (Negative) Multiply-{Add/Subtract} - def XSMADDQP : X_VT5_VA5_VB5_FMA <63, 388, "xsmaddqp", - [(set f128:$vT, - (fma f128:$vA, f128:$vB, - f128:$vTi))]>; def XSMADDQPO : X_VT5_VA5_VB5_FMA_Ro<63, 388, "xsmaddqpo", [(set f128:$vT, (int_ppc_fmaf128_round_to_odd f128:$vA,f128:$vB,f128:$vTi))]>; - def XSMSUBQP : X_VT5_VA5_VB5_FMA <63, 420, "xsmsubqp" , - [(set f128:$vT, - (fma f128:$vA, f128:$vB, - (fneg f128:$vTi)))]>; def XSMSUBQPO : X_VT5_VA5_VB5_FMA_Ro<63, 420, "xsmsubqpo" , [(set f128:$vT, (int_ppc_fmaf128_round_to_odd f128:$vA, f128:$vB, (fneg f128:$vTi)))]>; - def XSNMADDQP : X_VT5_VA5_VB5_FMA <63, 452, "xsnmaddqp", - [(set f128:$vT, - (fneg (fma f128:$vA, f128:$vB, - f128:$vTi)))]>; def XSNMADDQPO: X_VT5_VA5_VB5_FMA_Ro<63, 452, "xsnmaddqpo", [(set f128:$vT, (fneg (int_ppc_fmaf128_round_to_odd f128:$vA, f128:$vB, f128:$vTi)))]>; - def XSNMSUBQP : X_VT5_VA5_VB5_FMA <63, 484, "xsnmsubqp", - [(set f128:$vT, - (fneg (fma f128:$vA, f128:$vB, - (fneg f128:$vTi))))]>; def XSNMSUBQPO: X_VT5_VA5_VB5_FMA_Ro<63, 484, "xsnmsubqpo", [(set f128:$vT, (fneg (int_ppc_fmaf128_round_to_odd @@ -2572,8 +2603,7 @@ let AddedComplexity = 400, Predicates = [HasP9Vector] in { // DP/QP Compare Exponents def XSCMPEXPDP : XX3Form_1<60, 59, (outs crrc:$crD), (ins vsfrc:$XA, vsfrc:$XB), - "xscmpexpdp $crD, $XA, $XB", IIC_FPCompare, []>, - UseVSXReg; + "xscmpexpdp $crD, $XA, $XB", IIC_FPCompare, []>; def XSCMPEXPQP : X_BF3_VA5_VB5<63, 164, "xscmpexpqp", []>; // DP Compare ==, >=, >, != @@ -2631,7 +2661,6 @@ let AddedComplexity = 400, Predicates = [HasP9Vector] in { def : Pat<(f128 (uint_to_fp (i32 (load xoaddr:$src)))), (f128 (XSCVUDQP (LIWZX xoaddr:$src)))>; - let UseVSXReg = 1 in { //===--------------------------------------------------------------------===// // Round to Floating-Point Integer Instructions @@ -2648,8 +2677,6 @@ let AddedComplexity = 400, Predicates = [HasP9Vector] in { [(set v4f32:$XT, (int_ppc_vsx_xvcvsphp v4f32:$XB))]>; - } // UseVSXReg = 1 - // Pattern for matching Vector HP -> Vector SP intrinsic. Defined as a // separate pattern so that it can convert the input register class from // VRRC(v8i16) to VSRC. @@ -2691,7 +2718,7 @@ let AddedComplexity = 400, Predicates = [HasP9Vector] in { // Insert Exponent DP/QP // XT NOTE: XT.dword[1] = 0xUUUU_UUUU_UUUU_UUUU def XSIEXPDP : XX1Form <60, 918, (outs vsrc:$XT), (ins g8rc:$rA, g8rc:$rB), - "xsiexpdp $XT, $rA, $rB", IIC_VecFP, []>, UseVSXReg; + "xsiexpdp $XT, $rA, $rB", IIC_VecFP, []>; // vB NOTE: only vB.dword[0] is used, that's why we don't use // X_VT5_VA5_VB5 form def XSIEXPQP : XForm_18<63, 868, (outs vrrc:$vT), (ins vrrc:$vA, vsfrc:$vB), @@ -2712,7 +2739,6 @@ let AddedComplexity = 400, Predicates = [HasP9Vector] in { (v2i64 (XSXEXPQP $vA)), sub_64)))>; // Vector Insert Word - let UseVSXReg = 1 in { // XB NOTE: Only XB.dword[1] is used, but we use vsrc on XB. def XXINSERTW : XX2_RD6_UIM5_RS6<60, 181, (outs vsrc:$XT), @@ -2726,7 +2752,6 @@ let AddedComplexity = 400, Predicates = [HasP9Vector] in { def XXEXTRACTUW : XX2_RD6_UIM5_RS6<60, 165, (outs vsfrc:$XT), (ins vsrc:$XB, u4imm:$UIMM), "xxextractuw $XT, $XB, $UIMM", IIC_VecFP, []>; - } // UseVSXReg = 1 // Vector Insert Exponent DP/SP def XVIEXPDP : XX3_XT5_XA5_XB5<60, 248, "xviexpdp", vsrc, vsrc, vsrc, @@ -2759,20 +2784,17 @@ let AddedComplexity = 400, Predicates = [HasP9Vector] in { //===--------------------------------------------------------------------===// // Test Data Class SP/DP/QP - let UseVSXReg = 1 in { def XSTSTDCSP : XX2_BF3_DCMX7_RS6<60, 298, (outs crrc:$BF), (ins u7imm:$DCMX, vsfrc:$XB), "xststdcsp $BF, $XB, $DCMX", IIC_VecFP, []>; def XSTSTDCDP : XX2_BF3_DCMX7_RS6<60, 362, (outs crrc:$BF), (ins u7imm:$DCMX, vsfrc:$XB), "xststdcdp $BF, $XB, $DCMX", IIC_VecFP, []>; - } // UseVSXReg = 1 def XSTSTDCQP : X_BF3_DCMX7_RS5 <63, 708, (outs crrc:$BF), (ins u7imm:$DCMX, vrrc:$vB), "xststdcqp $BF, $vB, $DCMX", IIC_VecFP, []>; // Vector Test Data Class SP/DP - let UseVSXReg = 1 in { def XVTSTDCSP : XX2_RD6_DCMX7_RS6<60, 13, 5, (outs vsrc:$XT), (ins u7imm:$DCMX, vsrc:$XB), "xvtstdcsp $XT, $XB, $DCMX", IIC_VecFP, @@ -2783,7 +2805,6 @@ let AddedComplexity = 400, Predicates = [HasP9Vector] in { "xvtstdcdp $XT, $XB, $DCMX", IIC_VecFP, [(set v2i64: $XT, (int_ppc_vsx_xvtstdcdp v2f64:$XB, imm:$DCMX))]>; - } // UseVSXReg = 1 //===--------------------------------------------------------------------===// @@ -2824,7 +2845,7 @@ let AddedComplexity = 400, Predicates = [HasP9Vector] in { // Vector Splat Immediate Byte def XXSPLTIB : X_RD6_IMM8<60, 360, (outs vsrc:$XT), (ins u8imm:$IMM8), - "xxspltib $XT, $IMM8", IIC_VecPerm, []>, UseVSXReg; + "xxspltib $XT, $IMM8", IIC_VecPerm, []>; //===--------------------------------------------------------------------===// // Vector/Scalar Load/Store Instructions @@ -2834,7 +2855,7 @@ let AddedComplexity = 400, Predicates = [HasP9Vector] in { let mayLoad = 1, mayStore = 0 in { // Load Vector def LXV : DQ_RD6_RS5_DQ12<61, 1, (outs vsrc:$XT), (ins memrix16:$src), - "lxv $XT, $src", IIC_LdStLFD, []>, UseVSXReg; + "lxv $XT, $src", IIC_LdStLFD, []>; // Load DWord def LXSD : DSForm_1<57, 2, (outs vfrc:$vD), (ins memrix:$src), "lxsd $vD, $src", IIC_LdStLFD, []>; @@ -2847,7 +2868,7 @@ let AddedComplexity = 400, Predicates = [HasP9Vector] in { class X_XT6_RA5_RB5<bits<6> opcode, bits<10> xo, string opc, RegisterOperand vtype, list<dag> pattern> : XX1Form_memOp<opcode, xo, (outs vtype:$XT), (ins memrr:$src), - !strconcat(opc, " $XT, $src"), IIC_LdStLFD, pattern>, UseVSXReg; + !strconcat(opc, " $XT, $src"), IIC_LdStLFD, pattern>; // Load as Integer Byte/Halfword & Zero Indexed def LXSIBZX : X_XT6_RA5_RB5<31, 781, "lxsibzx", vsfrc, @@ -2861,16 +2882,14 @@ let AddedComplexity = 400, Predicates = [HasP9Vector] in { // Load Vector Indexed def LXVX : X_XT6_RA5_RB5<31, 268, "lxvx" , vsrc, - [(set v2f64:$XT, (load xaddr:$src))]>; + [(set v2f64:$XT, (load xaddrX16:$src))]>; // Load Vector (Left-justified) with Length def LXVL : XX1Form_memOp<31, 269, (outs vsrc:$XT), (ins memr:$src, g8rc:$rB), "lxvl $XT, $src, $rB", IIC_LdStLoad, - [(set v4i32:$XT, (int_ppc_vsx_lxvl addr:$src, i64:$rB))]>, - UseVSXReg; + [(set v4i32:$XT, (int_ppc_vsx_lxvl addr:$src, i64:$rB))]>; def LXVLL : XX1Form_memOp<31,301, (outs vsrc:$XT), (ins memr:$src, g8rc:$rB), "lxvll $XT, $src, $rB", IIC_LdStLoad, - [(set v4i32:$XT, (int_ppc_vsx_lxvll addr:$src, i64:$rB))]>, - UseVSXReg; + [(set v4i32:$XT, (int_ppc_vsx_lxvll addr:$src, i64:$rB))]>; // Load Vector Word & Splat Indexed def LXVWSX : X_XT6_RA5_RB5<31, 364, "lxvwsx" , vsrc, []>; @@ -2881,7 +2900,7 @@ let AddedComplexity = 400, Predicates = [HasP9Vector] in { let mayStore = 1, mayLoad = 0 in { // Store Vector def STXV : DQ_RD6_RS5_DQ12<61, 5, (outs), (ins vsrc:$XT, memrix16:$dst), - "stxv $XT, $dst", IIC_LdStSTFD, []>, UseVSXReg; + "stxv $XT, $dst", IIC_LdStSTFD, []>; // Store DWord def STXSD : DSForm_1<61, 2, (outs), (ins vfrc:$vS, memrix:$dst), "stxsd $vS, $dst", IIC_LdStSTFD, []>; @@ -2893,7 +2912,7 @@ let AddedComplexity = 400, Predicates = [HasP9Vector] in { class X_XS6_RA5_RB5<bits<6> opcode, bits<10> xo, string opc, RegisterOperand vtype, list<dag> pattern> : XX1Form_memOp<opcode, xo, (outs), (ins vtype:$XT, memrr:$dst), - !strconcat(opc, " $XT, $dst"), IIC_LdStSTFD, pattern>, UseVSXReg; + !strconcat(opc, " $XT, $dst"), IIC_LdStSTFD, pattern>; // Store as Integer Byte/Halfword Indexed def STXSIBX : X_XS6_RA5_RB5<31, 909, "stxsibx" , vsfrc, @@ -2901,8 +2920,8 @@ let AddedComplexity = 400, Predicates = [HasP9Vector] in { def STXSIHX : X_XS6_RA5_RB5<31, 941, "stxsihx" , vsfrc, [(PPCstxsix f64:$XT, xoaddr:$dst, 2)]>; let isCodeGenOnly = 1 in { - def STXSIBXv : X_XS6_RA5_RB5<31, 909, "stxsibx" , vrrc, []>; - def STXSIHXv : X_XS6_RA5_RB5<31, 941, "stxsihx" , vrrc, []>; + def STXSIBXv : X_XS6_RA5_RB5<31, 909, "stxsibx" , vsrc, []>; + def STXSIHXv : X_XS6_RA5_RB5<31, 941, "stxsihx" , vsrc, []>; } // Store Vector Halfword*8/Byte*16 Indexed @@ -2911,21 +2930,19 @@ let AddedComplexity = 400, Predicates = [HasP9Vector] in { // Store Vector Indexed def STXVX : X_XS6_RA5_RB5<31, 396, "stxvx" , vsrc, - [(store v2f64:$XT, xaddr:$dst)]>; + [(store v2f64:$XT, xaddrX16:$dst)]>; // Store Vector (Left-justified) with Length def STXVL : XX1Form_memOp<31, 397, (outs), (ins vsrc:$XT, memr:$dst, g8rc:$rB), "stxvl $XT, $dst, $rB", IIC_LdStLoad, [(int_ppc_vsx_stxvl v4i32:$XT, addr:$dst, - i64:$rB)]>, - UseVSXReg; + i64:$rB)]>; def STXVLL : XX1Form_memOp<31, 429, (outs), (ins vsrc:$XT, memr:$dst, g8rc:$rB), "stxvll $XT, $dst, $rB", IIC_LdStLoad, [(int_ppc_vsx_stxvll v4i32:$XT, addr:$dst, - i64:$rB)]>, - UseVSXReg; + i64:$rB)]>; } // mayStore let Predicates = [IsLittleEndian] in { @@ -3045,24 +3062,24 @@ let AddedComplexity = 400, Predicates = [HasP9Vector] in { } // IsLittleEndian, HasP9Vector // D-Form Load/Store - def : Pat<(v4i32 (quadwOffsetLoad iqaddr:$src)), (LXV memrix16:$src)>; - def : Pat<(v4f32 (quadwOffsetLoad iqaddr:$src)), (LXV memrix16:$src)>; - def : Pat<(v2i64 (quadwOffsetLoad iqaddr:$src)), (LXV memrix16:$src)>; - def : Pat<(v2f64 (quadwOffsetLoad iqaddr:$src)), (LXV memrix16:$src)>; - def : Pat<(f128 (quadwOffsetLoad iqaddr:$src)), + def : Pat<(v4i32 (quadwOffsetLoad iaddrX16:$src)), (LXV memrix16:$src)>; + def : Pat<(v4f32 (quadwOffsetLoad iaddrX16:$src)), (LXV memrix16:$src)>; + def : Pat<(v2i64 (quadwOffsetLoad iaddrX16:$src)), (LXV memrix16:$src)>; + def : Pat<(v2f64 (quadwOffsetLoad iaddrX16:$src)), (LXV memrix16:$src)>; + def : Pat<(f128 (quadwOffsetLoad iaddrX16:$src)), (COPY_TO_REGCLASS (LXV memrix16:$src), VRRC)>; - def : Pat<(v4i32 (int_ppc_vsx_lxvw4x iqaddr:$src)), (LXV memrix16:$src)>; - def : Pat<(v2f64 (int_ppc_vsx_lxvd2x iqaddr:$src)), (LXV memrix16:$src)>; + def : Pat<(v4i32 (int_ppc_vsx_lxvw4x iaddrX16:$src)), (LXV memrix16:$src)>; + def : Pat<(v2f64 (int_ppc_vsx_lxvd2x iaddrX16:$src)), (LXV memrix16:$src)>; - def : Pat<(quadwOffsetStore v4f32:$rS, iqaddr:$dst), (STXV $rS, memrix16:$dst)>; - def : Pat<(quadwOffsetStore v4i32:$rS, iqaddr:$dst), (STXV $rS, memrix16:$dst)>; - def : Pat<(quadwOffsetStore v2f64:$rS, iqaddr:$dst), (STXV $rS, memrix16:$dst)>; - def : Pat<(quadwOffsetStore f128:$rS, iqaddr:$dst), + def : Pat<(quadwOffsetStore v4f32:$rS, iaddrX16:$dst), (STXV $rS, memrix16:$dst)>; + def : Pat<(quadwOffsetStore v4i32:$rS, iaddrX16:$dst), (STXV $rS, memrix16:$dst)>; + def : Pat<(quadwOffsetStore v2f64:$rS, iaddrX16:$dst), (STXV $rS, memrix16:$dst)>; + def : Pat<(quadwOffsetStore f128:$rS, iaddrX16:$dst), (STXV (COPY_TO_REGCLASS $rS, VSRC), memrix16:$dst)>; - def : Pat<(quadwOffsetStore v2i64:$rS, iqaddr:$dst), (STXV $rS, memrix16:$dst)>; - def : Pat<(int_ppc_vsx_stxvw4x v4i32:$rS, iqaddr:$dst), + def : Pat<(quadwOffsetStore v2i64:$rS, iaddrX16:$dst), (STXV $rS, memrix16:$dst)>; + def : Pat<(int_ppc_vsx_stxvw4x v4i32:$rS, iaddrX16:$dst), (STXV $rS, memrix16:$dst)>; - def : Pat<(int_ppc_vsx_stxvd2x v2f64:$rS, iqaddr:$dst), + def : Pat<(int_ppc_vsx_stxvd2x v2f64:$rS, iaddrX16:$dst), (STXV $rS, memrix16:$dst)>; @@ -3159,109 +3176,109 @@ let AddedComplexity = 400, Predicates = [HasP9Vector] in { let Predicates = [IsBigEndian, HasP9Vector] in { // Scalar stores of i8 def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 0)), xoaddr:$dst), - (STXSIBXv (v16i8 (VSLDOI $S, $S, 9)), xoaddr:$dst)>; + (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 9)), VSRC), xoaddr:$dst)>; def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 1)), xoaddr:$dst), - (STXSIBXv (v16i8 (VSLDOI $S, $S, 10)), xoaddr:$dst)>; + (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 10)), VSRC), xoaddr:$dst)>; def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 2)), xoaddr:$dst), - (STXSIBXv (v16i8 (VSLDOI $S, $S, 11)), xoaddr:$dst)>; + (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 11)), VSRC), xoaddr:$dst)>; def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 3)), xoaddr:$dst), - (STXSIBXv (v16i8 (VSLDOI $S, $S, 12)), xoaddr:$dst)>; + (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 12)), VSRC), xoaddr:$dst)>; def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 4)), xoaddr:$dst), - (STXSIBXv (v16i8 (VSLDOI $S, $S, 13)), xoaddr:$dst)>; + (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 13)), VSRC), xoaddr:$dst)>; def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 5)), xoaddr:$dst), - (STXSIBXv (v16i8 (VSLDOI $S, $S, 14)), xoaddr:$dst)>; + (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 14)), VSRC), xoaddr:$dst)>; def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 6)), xoaddr:$dst), - (STXSIBXv (v16i8 (VSLDOI $S, $S, 15)), xoaddr:$dst)>; + (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 15)), VSRC), xoaddr:$dst)>; def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 7)), xoaddr:$dst), - (STXSIBXv $S, xoaddr:$dst)>; + (STXSIBXv (COPY_TO_REGCLASS $S, VSRC), xoaddr:$dst)>; def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 8)), xoaddr:$dst), - (STXSIBXv (v16i8 (VSLDOI $S, $S, 1)), xoaddr:$dst)>; + (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 1)), VSRC), xoaddr:$dst)>; def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 9)), xoaddr:$dst), - (STXSIBXv (v16i8 (VSLDOI $S, $S, 2)), xoaddr:$dst)>; + (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 2)), VSRC), xoaddr:$dst)>; def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 10)), xoaddr:$dst), - (STXSIBXv (v16i8 (VSLDOI $S, $S, 3)), xoaddr:$dst)>; + (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 3)), VSRC), xoaddr:$dst)>; def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 11)), xoaddr:$dst), - (STXSIBXv (v16i8 (VSLDOI $S, $S, 4)), xoaddr:$dst)>; + (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 4)), VSRC), xoaddr:$dst)>; def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 12)), xoaddr:$dst), - (STXSIBXv (v16i8 (VSLDOI $S, $S, 5)), xoaddr:$dst)>; + (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 5)), VSRC), xoaddr:$dst)>; def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 13)), xoaddr:$dst), - (STXSIBXv (v16i8 (VSLDOI $S, $S, 6)), xoaddr:$dst)>; + (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 6)), VSRC), xoaddr:$dst)>; def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 14)), xoaddr:$dst), - (STXSIBXv (v16i8 (VSLDOI $S, $S, 7)), xoaddr:$dst)>; + (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 7)), VSRC), xoaddr:$dst)>; def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 15)), xoaddr:$dst), - (STXSIBXv (v16i8 (VSLDOI $S, $S, 8)), xoaddr:$dst)>; + (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 8)), VSRC), xoaddr:$dst)>; // Scalar stores of i16 def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 0)), xoaddr:$dst), - (STXSIHXv (v16i8 (VSLDOI $S, $S, 10)), xoaddr:$dst)>; + (STXSIHXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 10)), VSRC), xoaddr:$dst)>; def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 1)), xoaddr:$dst), - (STXSIHXv (v16i8 (VSLDOI $S, $S, 12)), xoaddr:$dst)>; + (STXSIHXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 12)), VSRC), xoaddr:$dst)>; def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 2)), xoaddr:$dst), - (STXSIHXv (v16i8 (VSLDOI $S, $S, 14)), xoaddr:$dst)>; + (STXSIHXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 14)), VSRC), xoaddr:$dst)>; def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 3)), xoaddr:$dst), - (STXSIHXv $S, xoaddr:$dst)>; + (STXSIHXv (COPY_TO_REGCLASS $S, VSRC), xoaddr:$dst)>; def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 4)), xoaddr:$dst), - (STXSIHXv (v16i8 (VSLDOI $S, $S, 2)), xoaddr:$dst)>; + (STXSIHXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 2)), VSRC), xoaddr:$dst)>; def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 5)), xoaddr:$dst), - (STXSIHXv (v16i8 (VSLDOI $S, $S, 4)), xoaddr:$dst)>; + (STXSIHXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 4)), VSRC), xoaddr:$dst)>; def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 6)), xoaddr:$dst), - (STXSIHXv (v16i8 (VSLDOI $S, $S, 6)), xoaddr:$dst)>; + (STXSIHXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 6)), VSRC), xoaddr:$dst)>; def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 7)), xoaddr:$dst), - (STXSIHXv (v16i8 (VSLDOI $S, $S, 8)), xoaddr:$dst)>; + (STXSIHXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 8)), VSRC), xoaddr:$dst)>; } // IsBigEndian, HasP9Vector let Predicates = [IsLittleEndian, HasP9Vector] in { // Scalar stores of i8 def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 0)), xoaddr:$dst), - (STXSIBXv (v16i8 (VSLDOI $S, $S, 8)), xoaddr:$dst)>; + (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 8)), VSRC), xoaddr:$dst)>; def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 1)), xoaddr:$dst), - (STXSIBXv (v16i8 (VSLDOI $S, $S, 7)), xoaddr:$dst)>; + (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 7)), VSRC), xoaddr:$dst)>; def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 2)), xoaddr:$dst), - (STXSIBXv (v16i8 (VSLDOI $S, $S, 6)), xoaddr:$dst)>; + (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 6)), VSRC), xoaddr:$dst)>; def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 3)), xoaddr:$dst), - (STXSIBXv (v16i8 (VSLDOI $S, $S, 5)), xoaddr:$dst)>; + (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 5)), VSRC), xoaddr:$dst)>; def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 4)), xoaddr:$dst), - (STXSIBXv (v16i8 (VSLDOI $S, $S, 4)), xoaddr:$dst)>; + (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 4)), VSRC), xoaddr:$dst)>; def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 5)), xoaddr:$dst), - (STXSIBXv (v16i8 (VSLDOI $S, $S, 3)), xoaddr:$dst)>; + (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 3)), VSRC), xoaddr:$dst)>; def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 6)), xoaddr:$dst), - (STXSIBXv (v16i8 (VSLDOI $S, $S, 2)), xoaddr:$dst)>; + (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 2)), VSRC), xoaddr:$dst)>; def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 7)), xoaddr:$dst), - (STXSIBXv (v16i8 (VSLDOI $S, $S, 1)), xoaddr:$dst)>; + (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 1)), VSRC), xoaddr:$dst)>; def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 8)), xoaddr:$dst), - (STXSIBXv $S, xoaddr:$dst)>; + (STXSIBXv (COPY_TO_REGCLASS $S, VSRC), xoaddr:$dst)>; def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 9)), xoaddr:$dst), - (STXSIBXv (v16i8 (VSLDOI $S, $S, 15)), xoaddr:$dst)>; + (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 15)), VSRC), xoaddr:$dst)>; def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 10)), xoaddr:$dst), - (STXSIBXv (v16i8 (VSLDOI $S, $S, 14)), xoaddr:$dst)>; + (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 14)), VSRC), xoaddr:$dst)>; def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 11)), xoaddr:$dst), - (STXSIBXv (v16i8 (VSLDOI $S, $S, 13)), xoaddr:$dst)>; + (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 13)), VSRC), xoaddr:$dst)>; def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 12)), xoaddr:$dst), - (STXSIBXv (v16i8 (VSLDOI $S, $S, 12)), xoaddr:$dst)>; + (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 12)), VSRC), xoaddr:$dst)>; def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 13)), xoaddr:$dst), - (STXSIBXv (v16i8 (VSLDOI $S, $S, 11)), xoaddr:$dst)>; + (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 11)), VSRC), xoaddr:$dst)>; def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 14)), xoaddr:$dst), - (STXSIBXv (v16i8 (VSLDOI $S, $S, 10)), xoaddr:$dst)>; + (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 10)), VSRC), xoaddr:$dst)>; def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 15)), xoaddr:$dst), - (STXSIBXv (v16i8 (VSLDOI $S, $S, 9)), xoaddr:$dst)>; + (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 9)), VSRC), xoaddr:$dst)>; // Scalar stores of i16 def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 0)), xoaddr:$dst), - (STXSIHXv (v16i8 (VSLDOI $S, $S, 8)), xoaddr:$dst)>; + (STXSIHXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 8)), VSRC), xoaddr:$dst)>; def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 1)), xoaddr:$dst), - (STXSIHXv (v16i8 (VSLDOI $S, $S, 6)), xoaddr:$dst)>; + (STXSIHXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 6)), VSRC), xoaddr:$dst)>; def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 2)), xoaddr:$dst), - (STXSIHXv (v16i8 (VSLDOI $S, $S, 4)), xoaddr:$dst)>; + (STXSIHXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 4)), VSRC), xoaddr:$dst)>; def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 3)), xoaddr:$dst), - (STXSIHXv (v16i8 (VSLDOI $S, $S, 2)), xoaddr:$dst)>; + (STXSIHXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 2)), VSRC), xoaddr:$dst)>; def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 4)), xoaddr:$dst), - (STXSIHXv $S, xoaddr:$dst)>; + (STXSIHXv (COPY_TO_REGCLASS $S, VSRC), xoaddr:$dst)>; def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 5)), xoaddr:$dst), - (STXSIHXv (v16i8 (VSLDOI $S, $S, 14)), xoaddr:$dst)>; + (STXSIHXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 14)), VSRC), xoaddr:$dst)>; def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 6)), xoaddr:$dst), - (STXSIHXv (v16i8 (VSLDOI $S, $S, 12)), xoaddr:$dst)>; + (STXSIHXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 12)), VSRC), xoaddr:$dst)>; def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 7)), xoaddr:$dst), - (STXSIHXv (v16i8 (VSLDOI $S, $S, 10)), xoaddr:$dst)>; + (STXSIHXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 10)), VSRC), xoaddr:$dst)>; } // IsLittleEndian, HasP9Vector @@ -3273,53 +3290,97 @@ let AddedComplexity = 400, Predicates = [HasP9Vector] in { def DFLOADf32 : PPCPostRAExpPseudo<(outs vssrc:$XT), (ins memrix:$src), "#DFLOADf32", - [(set f32:$XT, (load ixaddr:$src))]>; + [(set f32:$XT, (load iaddrX4:$src))]>; def DFLOADf64 : PPCPostRAExpPseudo<(outs vsfrc:$XT), (ins memrix:$src), "#DFLOADf64", - [(set f64:$XT, (load ixaddr:$src))]>; + [(set f64:$XT, (load iaddrX4:$src))]>; def DFSTOREf32 : PPCPostRAExpPseudo<(outs), (ins vssrc:$XT, memrix:$dst), "#DFSTOREf32", - [(store f32:$XT, ixaddr:$dst)]>; + [(store f32:$XT, iaddrX4:$dst)]>; def DFSTOREf64 : PPCPostRAExpPseudo<(outs), (ins vsfrc:$XT, memrix:$dst), "#DFSTOREf64", - [(store f64:$XT, ixaddr:$dst)]>; + [(store f64:$XT, iaddrX4:$dst)]>; - def : Pat<(f64 (extloadf32 ixaddr:$src)), - (COPY_TO_REGCLASS (DFLOADf32 ixaddr:$src), VSFRC)>; - def : Pat<(f32 (fpround (f64 (extloadf32 ixaddr:$src)))), - (f32 (DFLOADf32 ixaddr:$src))>; + def : Pat<(f64 (extloadf32 iaddrX4:$src)), + (COPY_TO_REGCLASS (DFLOADf32 iaddrX4:$src), VSFRC)>; + def : Pat<(f32 (fpround (f64 (extloadf32 iaddrX4:$src)))), + (f32 (DFLOADf32 iaddrX4:$src))>; + def : Pat<(v4f32 (PPCldvsxlh xaddr:$src)), + (COPY_TO_REGCLASS (XFLOADf64 xaddr:$src), VSRC)>; + def : Pat<(v4f32 (PPCldvsxlh iaddrX4:$src)), + (COPY_TO_REGCLASS (DFLOADf64 iaddrX4:$src), VSRC)>; let AddedComplexity = 400 in { // The following pseudoinstructions are used to ensure the utilization // of all 64 VSX registers. let Predicates = [IsLittleEndian, HasP9Vector] in { - def : Pat<(v2i64 (scalar_to_vector (i64 (load ixaddr:$src)))), + def : Pat<(v2i64 (scalar_to_vector (i64 (load iaddrX4:$src)))), (v2i64 (XXPERMDIs - (COPY_TO_REGCLASS (DFLOADf64 ixaddr:$src), VSRC), 2))>; - def : Pat<(v2i64 (scalar_to_vector (i64 (load xaddr:$src)))), + (COPY_TO_REGCLASS (DFLOADf64 iaddrX4:$src), VSRC), 2))>; + def : Pat<(v2i64 (scalar_to_vector (i64 (load xaddrX4:$src)))), (v2i64 (XXPERMDIs - (COPY_TO_REGCLASS (XFLOADf64 xaddr:$src), VSRC), 2))>; + (COPY_TO_REGCLASS (XFLOADf64 xaddrX4:$src), VSRC), 2))>; - def : Pat<(v2f64 (scalar_to_vector (f64 (load ixaddr:$src)))), + def : Pat<(v2f64 (scalar_to_vector (f64 (load iaddrX4:$src)))), (v2f64 (XXPERMDIs - (COPY_TO_REGCLASS (DFLOADf64 ixaddr:$src), VSRC), 2))>; - def : Pat<(v2f64 (scalar_to_vector (f64 (load xaddr:$src)))), + (COPY_TO_REGCLASS (DFLOADf64 iaddrX4:$src), VSRC), 2))>; + def : Pat<(v2f64 (scalar_to_vector (f64 (load xaddrX4:$src)))), (v2f64 (XXPERMDIs - (COPY_TO_REGCLASS (XFLOADf64 xaddr:$src), VSRC), 2))>; - } + (COPY_TO_REGCLASS (XFLOADf64 xaddrX4:$src), VSRC), 2))>; + def : Pat<(store (i64 (extractelt v2i64:$A, 0)), xaddrX4:$src), + (XFSTOREf64 (EXTRACT_SUBREG (XXPERMDI $A, $A, 2), + sub_64), xaddrX4:$src)>; + def : Pat<(store (f64 (extractelt v2f64:$A, 0)), xaddrX4:$src), + (XFSTOREf64 (EXTRACT_SUBREG (XXPERMDI $A, $A, 2), + sub_64), xaddrX4:$src)>; + def : Pat<(store (i64 (extractelt v2i64:$A, 1)), xaddrX4:$src), + (XFSTOREf64 (EXTRACT_SUBREG $A, sub_64), xaddrX4:$src)>; + def : Pat<(store (f64 (extractelt v2f64:$A, 1)), xaddrX4:$src), + (XFSTOREf64 (EXTRACT_SUBREG $A, sub_64), xaddrX4:$src)>; + def : Pat<(store (i64 (extractelt v2i64:$A, 0)), iaddrX4:$src), + (DFSTOREf64 (EXTRACT_SUBREG (XXPERMDI $A, $A, 2), + sub_64), iaddrX4:$src)>; + def : Pat<(store (f64 (extractelt v2f64:$A, 0)), iaddrX4:$src), + (DFSTOREf64 (EXTRACT_SUBREG (XXPERMDI $A, $A, 2), sub_64), + iaddrX4:$src)>; + def : Pat<(store (i64 (extractelt v2i64:$A, 1)), iaddrX4:$src), + (DFSTOREf64 (EXTRACT_SUBREG $A, sub_64), iaddrX4:$src)>; + def : Pat<(store (f64 (extractelt v2f64:$A, 1)), iaddrX4:$src), + (DFSTOREf64 (EXTRACT_SUBREG $A, sub_64), iaddrX4:$src)>; + } // IsLittleEndian, HasP9Vector let Predicates = [IsBigEndian, HasP9Vector] in { - def : Pat<(v2i64 (scalar_to_vector (i64 (load ixaddr:$src)))), - (v2i64 (COPY_TO_REGCLASS (DFLOADf64 ixaddr:$src), VSRC))>; - def : Pat<(v2i64 (scalar_to_vector (i64 (load xaddr:$src)))), - (v2i64 (COPY_TO_REGCLASS (XFLOADf64 xaddr:$src), VSRC))>; - - def : Pat<(v2f64 (scalar_to_vector (f64 (load ixaddr:$src)))), - (v2f64 (COPY_TO_REGCLASS (DFLOADf64 ixaddr:$src), VSRC))>; - def : Pat<(v2f64 (scalar_to_vector (f64 (load xaddr:$src)))), - (v2f64 (COPY_TO_REGCLASS (XFLOADf64 xaddr:$src), VSRC))>; - } + def : Pat<(v2i64 (scalar_to_vector (i64 (load iaddrX4:$src)))), + (v2i64 (COPY_TO_REGCLASS (DFLOADf64 iaddrX4:$src), VSRC))>; + def : Pat<(v2i64 (scalar_to_vector (i64 (load xaddrX4:$src)))), + (v2i64 (COPY_TO_REGCLASS (XFLOADf64 xaddrX4:$src), VSRC))>; + + def : Pat<(v2f64 (scalar_to_vector (f64 (load iaddrX4:$src)))), + (v2f64 (COPY_TO_REGCLASS (DFLOADf64 iaddrX4:$src), VSRC))>; + def : Pat<(v2f64 (scalar_to_vector (f64 (load xaddrX4:$src)))), + (v2f64 (COPY_TO_REGCLASS (XFLOADf64 xaddrX4:$src), VSRC))>; + def : Pat<(store (i64 (extractelt v2i64:$A, 1)), xaddrX4:$src), + (XFSTOREf64 (EXTRACT_SUBREG (XXPERMDI $A, $A, 2), + sub_64), xaddrX4:$src)>; + def : Pat<(store (f64 (extractelt v2f64:$A, 1)), xaddrX4:$src), + (XFSTOREf64 (EXTRACT_SUBREG (XXPERMDI $A, $A, 2), + sub_64), xaddrX4:$src)>; + def : Pat<(store (i64 (extractelt v2i64:$A, 0)), xaddrX4:$src), + (XFSTOREf64 (EXTRACT_SUBREG $A, sub_64), xaddrX4:$src)>; + def : Pat<(store (f64 (extractelt v2f64:$A, 0)), xaddrX4:$src), + (XFSTOREf64 (EXTRACT_SUBREG $A, sub_64), xaddrX4:$src)>; + def : Pat<(store (i64 (extractelt v2i64:$A, 1)), iaddrX4:$src), + (DFSTOREf64 (EXTRACT_SUBREG (XXPERMDI $A, $A, 2), + sub_64), iaddrX4:$src)>; + def : Pat<(store (f64 (extractelt v2f64:$A, 1)), iaddrX4:$src), + (DFSTOREf64 (EXTRACT_SUBREG (XXPERMDI $A, $A, 2), + sub_64), iaddrX4:$src)>; + def : Pat<(store (i64 (extractelt v2i64:$A, 0)), iaddrX4:$src), + (DFSTOREf64 (EXTRACT_SUBREG $A, sub_64), iaddrX4:$src)>; + def : Pat<(store (f64 (extractelt v2f64:$A, 0)), iaddrX4:$src), + (DFSTOREf64 (EXTRACT_SUBREG $A, sub_64), iaddrX4:$src)>; + } // IsBigEndian, HasP9Vector } let Predicates = [IsBigEndian, HasP9Vector] in { @@ -3455,14 +3516,14 @@ let AddedComplexity = 400, Predicates = [HasP9Vector] in { } // IsLittleEndian, HasP9Vector // Convert (Un)Signed DWord in memory -> QP - def : Pat<(f128 (sint_to_fp (i64 (load xaddr:$src)))), - (f128 (XSCVSDQP (LXSDX xaddr:$src)))>; - def : Pat<(f128 (sint_to_fp (i64 (load ixaddr:$src)))), - (f128 (XSCVSDQP (LXSD ixaddr:$src)))>; - def : Pat<(f128 (uint_to_fp (i64 (load xaddr:$src)))), - (f128 (XSCVUDQP (LXSDX xaddr:$src)))>; - def : Pat<(f128 (uint_to_fp (i64 (load ixaddr:$src)))), - (f128 (XSCVUDQP (LXSD ixaddr:$src)))>; + def : Pat<(f128 (sint_to_fp (i64 (load xaddrX4:$src)))), + (f128 (XSCVSDQP (LXSDX xaddrX4:$src)))>; + def : Pat<(f128 (sint_to_fp (i64 (load iaddrX4:$src)))), + (f128 (XSCVSDQP (LXSD iaddrX4:$src)))>; + def : Pat<(f128 (uint_to_fp (i64 (load xaddrX4:$src)))), + (f128 (XSCVUDQP (LXSDX xaddrX4:$src)))>; + def : Pat<(f128 (uint_to_fp (i64 (load iaddrX4:$src)))), + (f128 (XSCVUDQP (LXSD iaddrX4:$src)))>; // Convert Unsigned HWord in memory -> QP def : Pat<(f128 (uint_to_fp ScalarLoads.ZELi16)), @@ -3483,13 +3544,13 @@ let AddedComplexity = 400, Predicates = [HasP9Vector] in { // Instructions for store(fptosi). // The 8-byte version is repeated here due to availability of D-Form STXSD. def : Pat<(PPCstore_scal_int_from_vsr - (f64 (PPCcv_fp_to_sint_in_vsr f128:$src)), xaddr:$dst, 8), + (f64 (PPCcv_fp_to_sint_in_vsr f128:$src)), xaddrX4:$dst, 8), (STXSDX (COPY_TO_REGCLASS (XSCVQPSDZ f128:$src), VFRC), - xaddr:$dst)>; + xaddrX4:$dst)>; def : Pat<(PPCstore_scal_int_from_vsr - (f64 (PPCcv_fp_to_sint_in_vsr f128:$src)), ixaddr:$dst, 8), + (f64 (PPCcv_fp_to_sint_in_vsr f128:$src)), iaddrX4:$dst, 8), (STXSD (COPY_TO_REGCLASS (XSCVQPSDZ f128:$src), VFRC), - ixaddr:$dst)>; + iaddrX4:$dst)>; def : Pat<(PPCstore_scal_int_from_vsr (f64 (PPCcv_fp_to_sint_in_vsr f128:$src)), xoaddr:$dst, 4), (STXSIWX (COPY_TO_REGCLASS (XSCVQPSWZ $src), VFRC), xoaddr:$dst)>; @@ -3500,11 +3561,11 @@ let AddedComplexity = 400, Predicates = [HasP9Vector] in { (f64 (PPCcv_fp_to_sint_in_vsr f128:$src)), xoaddr:$dst, 1), (STXSIBX (COPY_TO_REGCLASS (XSCVQPSWZ $src), VFRC), xoaddr:$dst)>; def : Pat<(PPCstore_scal_int_from_vsr - (f64 (PPCcv_fp_to_sint_in_vsr f64:$src)), xaddr:$dst, 8), - (STXSDX (XSCVDPSXDS f64:$src), xaddr:$dst)>; + (f64 (PPCcv_fp_to_sint_in_vsr f64:$src)), xaddrX4:$dst, 8), + (STXSDX (XSCVDPSXDS f64:$src), xaddrX4:$dst)>; def : Pat<(PPCstore_scal_int_from_vsr - (f64 (PPCcv_fp_to_sint_in_vsr f64:$src)), ixaddr:$dst, 8), - (STXSD (XSCVDPSXDS f64:$src), ixaddr:$dst)>; + (f64 (PPCcv_fp_to_sint_in_vsr f64:$src)), iaddrX4:$dst, 8), + (STXSD (XSCVDPSXDS f64:$src), iaddrX4:$dst)>; def : Pat<(PPCstore_scal_int_from_vsr (f64 (PPCcv_fp_to_sint_in_vsr f64:$src)), xoaddr:$dst, 2), (STXSIHX (XSCVDPSXWS f64:$src), xoaddr:$dst)>; @@ -3514,13 +3575,13 @@ let AddedComplexity = 400, Predicates = [HasP9Vector] in { // Instructions for store(fptoui). def : Pat<(PPCstore_scal_int_from_vsr - (f64 (PPCcv_fp_to_uint_in_vsr f128:$src)), xaddr:$dst, 8), + (f64 (PPCcv_fp_to_uint_in_vsr f128:$src)), xaddrX4:$dst, 8), (STXSDX (COPY_TO_REGCLASS (XSCVQPUDZ f128:$src), VFRC), - xaddr:$dst)>; + xaddrX4:$dst)>; def : Pat<(PPCstore_scal_int_from_vsr - (f64 (PPCcv_fp_to_uint_in_vsr f128:$src)), ixaddr:$dst, 8), + (f64 (PPCcv_fp_to_uint_in_vsr f128:$src)), iaddrX4:$dst, 8), (STXSD (COPY_TO_REGCLASS (XSCVQPUDZ f128:$src), VFRC), - ixaddr:$dst)>; + iaddrX4:$dst)>; def : Pat<(PPCstore_scal_int_from_vsr (f64 (PPCcv_fp_to_uint_in_vsr f128:$src)), xoaddr:$dst, 4), (STXSIWX (COPY_TO_REGCLASS (XSCVQPUWZ $src), VFRC), xoaddr:$dst)>; @@ -3531,11 +3592,11 @@ let AddedComplexity = 400, Predicates = [HasP9Vector] in { (f64 (PPCcv_fp_to_uint_in_vsr f128:$src)), xoaddr:$dst, 1), (STXSIBX (COPY_TO_REGCLASS (XSCVQPUWZ $src), VFRC), xoaddr:$dst)>; def : Pat<(PPCstore_scal_int_from_vsr - (f64 (PPCcv_fp_to_uint_in_vsr f64:$src)), xaddr:$dst, 8), - (STXSDX (XSCVDPUXDS f64:$src), xaddr:$dst)>; + (f64 (PPCcv_fp_to_uint_in_vsr f64:$src)), xaddrX4:$dst, 8), + (STXSDX (XSCVDPUXDS f64:$src), xaddrX4:$dst)>; def : Pat<(PPCstore_scal_int_from_vsr - (f64 (PPCcv_fp_to_uint_in_vsr f64:$src)), ixaddr:$dst, 8), - (STXSD (XSCVDPUXDS f64:$src), ixaddr:$dst)>; + (f64 (PPCcv_fp_to_uint_in_vsr f64:$src)), iaddrX4:$dst, 8), + (STXSD (XSCVDPUXDS f64:$src), iaddrX4:$dst)>; def : Pat<(PPCstore_scal_int_from_vsr (f64 (PPCcv_fp_to_uint_in_vsr f64:$src)), xoaddr:$dst, 2), (STXSIHX (XSCVDPUXWS f64:$src), xoaddr:$dst)>; @@ -3668,13 +3729,13 @@ def FltToLongLoad { dag A = (i64 (PPCmfvsr (PPCfctidz (f64 (extloadf32 xoaddr:$A))))); } def FltToLongLoadP9 { - dag A = (i64 (PPCmfvsr (PPCfctidz (f64 (extloadf32 ixaddr:$A))))); + dag A = (i64 (PPCmfvsr (PPCfctidz (f64 (extloadf32 iaddrX4:$A))))); } def FltToULongLoad { dag A = (i64 (PPCmfvsr (PPCfctiduz (f64 (extloadf32 xoaddr:$A))))); } def FltToULongLoadP9 { - dag A = (i64 (PPCmfvsr (PPCfctiduz (f64 (extloadf32 ixaddr:$A))))); + dag A = (i64 (PPCmfvsr (PPCfctiduz (f64 (extloadf32 iaddrX4:$A))))); } def FltToLong { dag A = (i64 (PPCmfvsr (f64 (PPCfctidz (fpextend f32:$A))))); @@ -3704,13 +3765,13 @@ def DblToIntLoad { dag A = (i32 (PPCmfvsr (PPCfctiwz (f64 (load xoaddr:$A))))); } def DblToIntLoadP9 { - dag A = (i32 (PPCmfvsr (PPCfctiwz (f64 (load ixaddr:$A))))); + dag A = (i32 (PPCmfvsr (PPCfctiwz (f64 (load iaddrX4:$A))))); } def DblToUIntLoad { dag A = (i32 (PPCmfvsr (PPCfctiwuz (f64 (load xoaddr:$A))))); } def DblToUIntLoadP9 { - dag A = (i32 (PPCmfvsr (PPCfctiwuz (f64 (load ixaddr:$A))))); + dag A = (i32 (PPCmfvsr (PPCfctiwuz (f64 (load iaddrX4:$A))))); } def DblToLongLoad { dag A = (i64 (PPCmfvsr (PPCfctidz (f64 (load xoaddr:$A))))); @@ -3834,8 +3895,38 @@ let AddedComplexity = 400 in { def : Pat<DWToSPExtractConv.BVS, (v4f32 (VPKUDUM (XXSLDWI (XVCVSXDSP $S1), (XVCVSXDSP $S1), 3), (XXSLDWI (XVCVSXDSP $S2), (XVCVSXDSP $S2), 3)))>; + def : Pat<(store (i32 (extractelt v4i32:$A, 1)), xoaddr:$src), + (STIWX (EXTRACT_SUBREG $A, sub_64), xoaddr:$src)>; + def : Pat<(store (f32 (extractelt v4f32:$A, 1)), xoaddr:$src), + (STIWX (EXTRACT_SUBREG $A, sub_64), xoaddr:$src)>; + + // Elements in a register on a BE system are in order <0, 1, 2, 3>. + // The store instructions store the second word from the left. + // So to align element zero, we need to modulo-left-shift by 3 words. + // Similar logic applies for elements 2 and 3. + foreach Idx = [ [0,3], [2,1], [3,2] ] in { + def : Pat<(store (i32 (extractelt v4i32:$A, !head(Idx))), xoaddr:$src), + (STIWX (EXTRACT_SUBREG (XXSLDWI $A, $A, !head(!tail(Idx))), + sub_64), xoaddr:$src)>; + def : Pat<(store (f32 (extractelt v4f32:$A, !head(Idx))), xoaddr:$src), + (STIWX (EXTRACT_SUBREG (XXSLDWI $A, $A, !head(!tail(Idx))), + sub_64), xoaddr:$src)>; + } } + let Predicates = [HasP8Vector, IsBigEndian, NoP9Vector] in { + def : Pat<(store (i64 (extractelt v2i64:$A, 0)), xoaddr:$src), + (XFSTOREf64 (EXTRACT_SUBREG $A, sub_64), xoaddr:$src)>; + def : Pat<(store (f64 (extractelt v2f64:$A, 0)), xoaddr:$src), + (XFSTOREf64 (EXTRACT_SUBREG $A, sub_64), xoaddr:$src)>; + def : Pat<(store (i64 (extractelt v2i64:$A, 1)), xoaddr:$src), + (XFSTOREf64 (EXTRACT_SUBREG (XXPERMDI $A, $A, 2), sub_64), + xoaddr:$src)>; + def : Pat<(store (f64 (extractelt v2f64:$A, 1)), xoaddr:$src), + (XFSTOREf64 (EXTRACT_SUBREG (XXPERMDI $A, $A, 2), sub_64), + xoaddr:$src)>; + } + // Big endian, available on all targets with VSX let Predicates = [IsBigEndian, HasVSX] in { def : Pat<(v2f64 (build_vector f64:$A, f64:$B)), @@ -3871,8 +3962,38 @@ let AddedComplexity = 400 in { def : Pat<DWToSPExtractConv.BVS, (v4f32 (VPKUDUM (XXSLDWI (XVCVSXDSP $S2), (XVCVSXDSP $S2), 3), (XXSLDWI (XVCVSXDSP $S1), (XVCVSXDSP $S1), 3)))>; + def : Pat<(store (i32 (extractelt v4i32:$A, 2)), xoaddr:$src), + (STIWX (EXTRACT_SUBREG $A, sub_64), xoaddr:$src)>; + def : Pat<(store (f32 (extractelt v4f32:$A, 2)), xoaddr:$src), + (STIWX (EXTRACT_SUBREG $A, sub_64), xoaddr:$src)>; + + // Elements in a register on a LE system are in order <3, 2, 1, 0>. + // The store instructions store the second word from the left. + // So to align element 3, we need to modulo-left-shift by 3 words. + // Similar logic applies for elements 0 and 1. + foreach Idx = [ [0,2], [1,1], [3,3] ] in { + def : Pat<(store (i32 (extractelt v4i32:$A, !head(Idx))), xoaddr:$src), + (STIWX (EXTRACT_SUBREG (XXSLDWI $A, $A, !head(!tail(Idx))), + sub_64), xoaddr:$src)>; + def : Pat<(store (f32 (extractelt v4f32:$A, !head(Idx))), xoaddr:$src), + (STIWX (EXTRACT_SUBREG (XXSLDWI $A, $A, !head(!tail(Idx))), + sub_64), xoaddr:$src)>; + } } + let Predicates = [HasP8Vector, IsLittleEndian, NoP9Vector] in { + def : Pat<(store (i64 (extractelt v2i64:$A, 0)), xoaddr:$src), + (XFSTOREf64 (EXTRACT_SUBREG (XXPERMDI $A, $A, 2), sub_64), + xoaddr:$src)>; + def : Pat<(store (f64 (extractelt v2f64:$A, 0)), xoaddr:$src), + (XFSTOREf64 (EXTRACT_SUBREG (XXPERMDI $A, $A, 2), sub_64), + xoaddr:$src)>; + def : Pat<(store (i64 (extractelt v2i64:$A, 1)), xoaddr:$src), + (XFSTOREf64 (EXTRACT_SUBREG $A, sub_64), xoaddr:$src)>; + def : Pat<(store (f64 (extractelt v2f64:$A, 1)), xoaddr:$src), + (XFSTOREf64 (EXTRACT_SUBREG $A, sub_64), xoaddr:$src)>; + } + let Predicates = [IsLittleEndian, HasVSX] in { // Little endian, available on all targets with VSX def : Pat<(v2f64 (build_vector f64:$A, f64:$B)), @@ -3969,17 +4090,17 @@ let AddedComplexity = 400 in { (v4i32 (XVCVSPUXWS (LXVWSX xoaddr:$A)))>; def : Pat<(v4i32 (scalar_to_vector DblToIntLoadP9.A)), (v4i32 (XXSPLTW (COPY_TO_REGCLASS - (XSCVDPSXWS (DFLOADf64 ixaddr:$A)), VSRC), 1))>; + (XSCVDPSXWS (DFLOADf64 iaddrX4:$A)), VSRC), 1))>; def : Pat<(v4i32 (scalar_to_vector DblToUIntLoadP9.A)), (v4i32 (XXSPLTW (COPY_TO_REGCLASS - (XSCVDPUXWS (DFLOADf64 ixaddr:$A)), VSRC), 1))>; + (XSCVDPUXWS (DFLOADf64 iaddrX4:$A)), VSRC), 1))>; def : Pat<(v2i64 (scalar_to_vector FltToLongLoadP9.A)), (v2i64 (XXPERMDIs (XSCVDPSXDS (COPY_TO_REGCLASS - (DFLOADf32 ixaddr:$A), + (DFLOADf32 iaddrX4:$A), VSFRC)), 0))>; def : Pat<(v2i64 (scalar_to_vector FltToULongLoadP9.A)), (v2i64 (XXPERMDIs (XSCVDPUXDS (COPY_TO_REGCLASS - (DFLOADf32 ixaddr:$A), + (DFLOADf32 iaddrX4:$A), VSFRC)), 0))>; } diff --git a/contrib/llvm/lib/Target/PowerPC/PPCLoopPreIncPrep.cpp b/contrib/llvm/lib/Target/PowerPC/PPCLoopPreIncPrep.cpp index 0b57dd9b618d..4d45d96d4479 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCLoopPreIncPrep.cpp +++ b/contrib/llvm/lib/Target/PowerPC/PPCLoopPreIncPrep.cpp @@ -1,9 +1,8 @@ //===------ PPCLoopPreIncPrep.cpp - Loop Pre-Inc. AM Prep. Pass -----------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -65,12 +64,6 @@ static cl::opt<unsigned> MaxVars("ppc-preinc-prep-max-vars", STATISTIC(PHINodeAlreadyExists, "PHI node already in pre-increment form"); -namespace llvm { - - void initializePPCLoopPreIncPrepPass(PassRegistry&); - -} // end namespace llvm - namespace { class PPCLoopPreIncPrep : public FunctionPass { @@ -338,7 +331,7 @@ bool PPCLoopPreIncPrep::runOnLoop(Loop *L) { // iteration space), insert a new preheader for the loop. if (!LoopPredecessor || !LoopPredecessor->getTerminator()->getType()->isVoidTy()) { - LoopPredecessor = InsertPreheaderForLoop(L, DT, LI, PreserveLCSSA); + LoopPredecessor = InsertPreheaderForLoop(L, DT, LI, nullptr, PreserveLCSSA); if (LoopPredecessor) MadeChange = true; } diff --git a/contrib/llvm/lib/Target/PowerPC/PPCMCInstLower.cpp b/contrib/llvm/lib/Target/PowerPC/PPCMCInstLower.cpp index e731c0bc0c23..027e6bd1ba06 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCMCInstLower.cpp +++ b/contrib/llvm/lib/Target/PowerPC/PPCMCInstLower.cpp @@ -1,9 +1,8 @@ //===-- PPCMCInstLower.cpp - Convert PPC MachineInstr to an MCInst --------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -111,16 +110,16 @@ static MCOperand GetSymbolRef(const MachineOperand &MO, const MCSymbol *Symbol, RefKind = MCSymbolRefExpr::VK_PLT; const MachineFunction *MF = MO.getParent()->getParent()->getParent(); + const Module *M = MF->getFunction().getParent(); const PPCSubtarget *Subtarget = &(MF->getSubtarget<PPCSubtarget>()); const TargetMachine &TM = Printer.TM; const MCExpr *Expr = MCSymbolRefExpr::create(Symbol, RefKind, Ctx); - // -msecure-plt option works only in PIC mode. If secure plt mode - // is on add 32768 to symbol. + // If -msecure-plt -fPIC, add 32768 to symbol. if (Subtarget->isSecurePlt() && TM.isPositionIndependent() && + M->getPICLevel() == PICLevel::BigPIC && MO.getTargetFlags() == PPCII::MO_PLT) - Expr = MCBinaryExpr::createAdd(Expr, - MCConstantExpr::create(32768, Ctx), - Ctx); + Expr = + MCBinaryExpr::createAdd(Expr, MCConstantExpr::create(32768, Ctx), Ctx); if (!MO.isJTI() && MO.getOffset()) Expr = MCBinaryExpr::createAdd(Expr, diff --git a/contrib/llvm/lib/Target/PowerPC/PPCMIPeephole.cpp b/contrib/llvm/lib/Target/PowerPC/PPCMIPeephole.cpp index 0068df19f0c8..446246358e96 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCMIPeephole.cpp +++ b/contrib/llvm/lib/Target/PowerPC/PPCMIPeephole.cpp @@ -1,9 +1,8 @@ //===-------------- PPCMIPeephole.cpp - MI Peephole Cleanups -------------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===---------------------------------------------------------------------===// // @@ -22,9 +21,12 @@ #include "PPC.h" #include "PPCInstrBuilder.h" #include "PPCInstrInfo.h" +#include "PPCMachineFunctionInfo.h" #include "PPCTargetMachine.h" #include "llvm/ADT/Statistic.h" +#include "llvm/CodeGen/MachineBlockFrequencyInfo.h" #include "llvm/CodeGen/MachineDominators.h" +#include "llvm/CodeGen/MachinePostDominators.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" @@ -38,6 +40,7 @@ using namespace llvm; STATISTIC(RemoveTOCSave, "Number of TOC saves removed"); STATISTIC(MultiTOCSaves, "Number of functions with multiple TOC saves that must be kept"); +STATISTIC(NumTOCSavesInPrologue, "Number of TOC saves placed in the prologue"); STATISTIC(NumEliminatedSExt, "Number of eliminated sign-extensions"); STATISTIC(NumEliminatedZExt, "Number of eliminated zero-extensions"); STATISTIC(NumOptADDLIs, "Number of optimized ADD instruction fed by LI"); @@ -48,6 +51,10 @@ STATISTIC(NumFunctionsEnteredInMIPeephole, STATISTIC(NumFixedPointIterations, "Number of fixed-point iterations converting reg-reg instructions " "to reg-imm ones"); +STATISTIC(NumRotatesCollapsed, + "Number of pairs of rotate left, clear left/right collapsed"); +STATISTIC(NumEXTSWAndSLDICombined, + "Number of pairs of EXTSW and SLDI combined as EXTSWSLI"); static cl::opt<bool> FixedPointRegToImm("ppc-reg-to-imm-fixed-point", cl::Hidden, cl::init(true), @@ -83,6 +90,9 @@ struct PPCMIPeephole : public MachineFunctionPass { private: MachineDominatorTree *MDT; + MachinePostDominatorTree *MPDT; + MachineBlockFrequencyInfo *MBFI; + uint64_t EntryFreq; // Initialize class variables. void initialize(MachineFunction &MFParm); @@ -93,6 +103,8 @@ private: // Perform peepholes. bool eliminateRedundantCompare(void); bool eliminateRedundantTOCSaves(std::map<MachineInstr *, bool> &TOCSaves); + bool combineSEXTAndSHL(MachineInstr &MI, MachineInstr *&ToErase); + bool emitRLDICWhenLoweringJumpTables(MachineInstr &MI); void UpdateTOCSaves(std::map<MachineInstr *, bool> &TOCSaves, MachineInstr *MI); @@ -100,7 +112,11 @@ public: void getAnalysisUsage(AnalysisUsage &AU) const override { AU.addRequired<MachineDominatorTree>(); + AU.addRequired<MachinePostDominatorTree>(); + AU.addRequired<MachineBlockFrequencyInfo>(); AU.addPreserved<MachineDominatorTree>(); + AU.addPreserved<MachinePostDominatorTree>(); + AU.addPreserved<MachineBlockFrequencyInfo>(); MachineFunctionPass::getAnalysisUsage(AU); } @@ -118,6 +134,9 @@ void PPCMIPeephole::initialize(MachineFunction &MFParm) { MF = &MFParm; MRI = &MF->getRegInfo(); MDT = &getAnalysis<MachineDominatorTree>(); + MPDT = &getAnalysis<MachinePostDominatorTree>(); + MBFI = &getAnalysis<MachineBlockFrequencyInfo>(); + EntryFreq = MBFI->getEntryFreq(); TII = MF->getSubtarget<PPCSubtarget>().getInstrInfo(); LLVM_DEBUG(dbgs() << "*** PowerPC MI peephole pass ***\n\n"); LLVM_DEBUG(MF->dump()); @@ -198,6 +217,30 @@ getKnownLeadingZeroCount(MachineInstr *MI, const PPCInstrInfo *TII) { void PPCMIPeephole::UpdateTOCSaves( std::map<MachineInstr *, bool> &TOCSaves, MachineInstr *MI) { assert(TII->isTOCSaveMI(*MI) && "Expecting a TOC save instruction here"); + assert(MF->getSubtarget<PPCSubtarget>().isELFv2ABI() && + "TOC-save removal only supported on ELFv2"); + PPCFunctionInfo *FI = MF->getInfo<PPCFunctionInfo>(); + + MachineBasicBlock *Entry = &MF->front(); + uint64_t CurrBlockFreq = MBFI->getBlockFreq(MI->getParent()).getFrequency(); + + // If the block in which the TOC save resides is in a block that + // post-dominates Entry, or a block that is hotter than entry (keep in mind + // that early MachineLICM has already run so the TOC save won't be hoisted) + // we can just do the save in the prologue. + if (CurrBlockFreq > EntryFreq || MPDT->dominates(MI->getParent(), Entry)) + FI->setMustSaveTOC(true); + + // If we are saving the TOC in the prologue, all the TOC saves can be removed + // from the code. + if (FI->mustSaveTOC()) { + for (auto &TOCSave : TOCSaves) + TOCSave.second = false; + // Add new instruction to map. + TOCSaves[MI] = false; + return; + } + bool Keep = true; for (auto It = TOCSaves.begin(); It != TOCSaves.end(); It++ ) { MachineInstr *CurrInst = It->first; @@ -758,6 +801,11 @@ bool PPCMIPeephole::simplifyCode(void) { NumOptADDLIs++; break; } + case PPC::RLDICR: { + Simplified |= emitRLDICWhenLoweringJumpTables(MI) || + combineSEXTAndSHL(MI, ToErase); + break; + } } } @@ -771,6 +819,10 @@ bool PPCMIPeephole::simplifyCode(void) { // Eliminate all the TOC save instructions which are redundant. Simplified |= eliminateRedundantTOCSaves(TOCSaves); + PPCFunctionInfo *FI = MF->getInfo<PPCFunctionInfo>(); + if (FI->mustSaveTOC()) + NumTOCSavesInPrologue++; + // We try to eliminate redundant compare instruction. Simplified |= eliminateRedundantCompare(); @@ -1275,10 +1327,136 @@ bool PPCMIPeephole::eliminateRedundantCompare(void) { return Simplified; } +// We miss the opportunity to emit an RLDIC when lowering jump tables +// since ISEL sees only a single basic block. When selecting, the clear +// and shift left will be in different blocks. +bool PPCMIPeephole::emitRLDICWhenLoweringJumpTables(MachineInstr &MI) { + if (MI.getOpcode() != PPC::RLDICR) + return false; + + unsigned SrcReg = MI.getOperand(1).getReg(); + if (!TargetRegisterInfo::isVirtualRegister(SrcReg)) + return false; + + MachineInstr *SrcMI = MRI->getVRegDef(SrcReg); + if (SrcMI->getOpcode() != PPC::RLDICL) + return false; + + MachineOperand MOpSHSrc = SrcMI->getOperand(2); + MachineOperand MOpMBSrc = SrcMI->getOperand(3); + MachineOperand MOpSHMI = MI.getOperand(2); + MachineOperand MOpMEMI = MI.getOperand(3); + if (!(MOpSHSrc.isImm() && MOpMBSrc.isImm() && MOpSHMI.isImm() && + MOpMEMI.isImm())) + return false; + + uint64_t SHSrc = MOpSHSrc.getImm(); + uint64_t MBSrc = MOpMBSrc.getImm(); + uint64_t SHMI = MOpSHMI.getImm(); + uint64_t MEMI = MOpMEMI.getImm(); + uint64_t NewSH = SHSrc + SHMI; + uint64_t NewMB = MBSrc - SHMI; + if (NewMB > 63 || NewSH > 63) + return false; + + // The bits cleared with RLDICL are [0, MBSrc). + // The bits cleared with RLDICR are (MEMI, 63]. + // After the sequence, the bits cleared are: + // [0, MBSrc-SHMI) and (MEMI, 63). + // + // The bits cleared with RLDIC are [0, NewMB) and (63-NewSH, 63]. + if ((63 - NewSH) != MEMI) + return false; + + LLVM_DEBUG(dbgs() << "Converting pair: "); + LLVM_DEBUG(SrcMI->dump()); + LLVM_DEBUG(MI.dump()); + + MI.setDesc(TII->get(PPC::RLDIC)); + MI.getOperand(1).setReg(SrcMI->getOperand(1).getReg()); + MI.getOperand(2).setImm(NewSH); + MI.getOperand(3).setImm(NewMB); + + LLVM_DEBUG(dbgs() << "To: "); + LLVM_DEBUG(MI.dump()); + NumRotatesCollapsed++; + return true; +} + +// For case in LLVM IR +// entry: +// %iconv = sext i32 %index to i64 +// br i1 undef label %true, label %false +// true: +// %ptr = getelementptr inbounds i32, i32* null, i64 %iconv +// ... +// PPCISelLowering::combineSHL fails to combine, because sext and shl are in +// different BBs when conducting instruction selection. We can do a peephole +// optimization to combine these two instructions into extswsli after +// instruction selection. +bool PPCMIPeephole::combineSEXTAndSHL(MachineInstr &MI, + MachineInstr *&ToErase) { + if (MI.getOpcode() != PPC::RLDICR) + return false; + + if (!MF->getSubtarget<PPCSubtarget>().isISA3_0()) + return false; + + assert(MI.getNumOperands() == 4 && "RLDICR should have 4 operands"); + + MachineOperand MOpSHMI = MI.getOperand(2); + MachineOperand MOpMEMI = MI.getOperand(3); + if (!(MOpSHMI.isImm() && MOpMEMI.isImm())) + return false; + + uint64_t SHMI = MOpSHMI.getImm(); + uint64_t MEMI = MOpMEMI.getImm(); + if (SHMI + MEMI != 63) + return false; + + unsigned SrcReg = MI.getOperand(1).getReg(); + if (!TargetRegisterInfo::isVirtualRegister(SrcReg)) + return false; + + MachineInstr *SrcMI = MRI->getVRegDef(SrcReg); + if (SrcMI->getOpcode() != PPC::EXTSW && + SrcMI->getOpcode() != PPC::EXTSW_32_64) + return false; + + // If the register defined by extsw has more than one use, combination is not + // needed. + if (!MRI->hasOneNonDBGUse(SrcReg)) + return false; + + LLVM_DEBUG(dbgs() << "Combining pair: "); + LLVM_DEBUG(SrcMI->dump()); + LLVM_DEBUG(MI.dump()); + + MachineInstr *NewInstr = + BuildMI(*MI.getParent(), &MI, MI.getDebugLoc(), + SrcMI->getOpcode() == PPC::EXTSW ? TII->get(PPC::EXTSWSLI) + : TII->get(PPC::EXTSWSLI_32_64), + MI.getOperand(0).getReg()) + .add(SrcMI->getOperand(1)) + .add(MOpSHMI); + (void)NewInstr; + + LLVM_DEBUG(dbgs() << "TO: "); + LLVM_DEBUG(NewInstr->dump()); + ++NumEXTSWAndSLDICombined; + ToErase = &MI; + // SrcMI, which is extsw, is of no use now, erase it. + SrcMI->eraseFromParent(); + return true; +} + } // end default namespace INITIALIZE_PASS_BEGIN(PPCMIPeephole, DEBUG_TYPE, "PowerPC MI Peephole Optimization", false, false) +INITIALIZE_PASS_DEPENDENCY(MachineBlockFrequencyInfo) +INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) +INITIALIZE_PASS_DEPENDENCY(MachinePostDominatorTree) INITIALIZE_PASS_END(PPCMIPeephole, DEBUG_TYPE, "PowerPC MI Peephole Optimization", false, false) diff --git a/contrib/llvm/lib/Target/PowerPC/PPCMachineFunctionInfo.cpp b/contrib/llvm/lib/Target/PowerPC/PPCMachineFunctionInfo.cpp index 3923417257e8..2f65d6a2855b 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCMachineFunctionInfo.cpp +++ b/contrib/llvm/lib/Target/PowerPC/PPCMachineFunctionInfo.cpp @@ -1,9 +1,8 @@ //===-- PPCMachineFunctionInfo.cpp - Private data used for PowerPC --------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// diff --git a/contrib/llvm/lib/Target/PowerPC/PPCMachineFunctionInfo.h b/contrib/llvm/lib/Target/PowerPC/PPCMachineFunctionInfo.h index 8a3f50aa9565..dfae19804d94 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCMachineFunctionInfo.h +++ b/contrib/llvm/lib/Target/PowerPC/PPCMachineFunctionInfo.h @@ -1,9 +1,8 @@ //===-- PPCMachineFunctionInfo.h - Private data used for PowerPC --*- C++ -*-=// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -45,6 +44,12 @@ class PPCFunctionInfo : public MachineFunctionInfo { /// PEI. bool MustSaveLR; + /// MustSaveTOC - Indicates that the TOC save needs to be performed in the + /// prologue of the function. This is typically the case when there are + /// indirect calls in the function and it is more profitable to save the + /// TOC pointer in the prologue than in the block(s) containing the call(s). + bool MustSaveTOC = false; + /// Do we have to disable shrink-wrapping? This has to be set if we emit any /// instructions that clobber LR in the entry block because discovering this /// in PEI is too late (happens after shrink-wrapping); @@ -152,6 +157,9 @@ public: void setMustSaveLR(bool U) { MustSaveLR = U; } bool mustSaveLR() const { return MustSaveLR; } + void setMustSaveTOC(bool U) { MustSaveTOC = U; } + bool mustSaveTOC() const { return MustSaveTOC; } + /// We certainly don't want to shrink wrap functions if we've emitted a /// MovePCtoLR8 as that has to go into the entry, so the prologue definitely /// has to go into the entry block. diff --git a/contrib/llvm/lib/Target/PowerPC/PPCMachineScheduler.cpp b/contrib/llvm/lib/Target/PowerPC/PPCMachineScheduler.cpp new file mode 100644 index 000000000000..a38c8f475066 --- /dev/null +++ b/contrib/llvm/lib/Target/PowerPC/PPCMachineScheduler.cpp @@ -0,0 +1,83 @@ +//===- PPCMachineScheduler.cpp - MI Scheduler for PowerPC -------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "PPCMachineScheduler.h" +#include "MCTargetDesc/PPCMCTargetDesc.h" + +using namespace llvm; + +static cl::opt<bool> +DisableAddiLoadHeuristic("disable-ppc-sched-addi-load", + cl::desc("Disable scheduling addi instruction before" + "load for ppc"), cl::Hidden); + +bool PPCPreRASchedStrategy::biasAddiLoadCandidate(SchedCandidate &Cand, + SchedCandidate &TryCand, + SchedBoundary &Zone) const { + if (DisableAddiLoadHeuristic) + return false; + + auto isADDIInstr = [&] (const MachineInstr &Inst) { + return Inst.getOpcode() == PPC::ADDI || Inst.getOpcode() == PPC::ADDI8; + }; + + SchedCandidate &FirstCand = Zone.isTop() ? TryCand : Cand; + SchedCandidate &SecondCand = Zone.isTop() ? Cand : TryCand; + if (isADDIInstr(*FirstCand.SU->getInstr()) && + SecondCand.SU->getInstr()->mayLoad()) { + TryCand.Reason = Stall; + return true; + } + if (FirstCand.SU->getInstr()->mayLoad() && + isADDIInstr(*SecondCand.SU->getInstr())) { + TryCand.Reason = NoCand; + return true; + } + + return false; +} + +void PPCPreRASchedStrategy::tryCandidate(SchedCandidate &Cand, + SchedCandidate &TryCand, + SchedBoundary *Zone) const { + GenericScheduler::tryCandidate(Cand, TryCand, Zone); + + if (!Cand.isValid() || !Zone) + return; + + // Add powerpc specific heuristic only when TryCand isn't selected or + // selected as node order. + if (TryCand.Reason != NodeOrder && TryCand.Reason != NoCand) + return; + + // There are some benefits to schedule the ADDI before the load to hide the + // latency, as RA may create a true dependency between the load and addi. + if (biasAddiLoadCandidate(Cand, TryCand, *Zone)) + return; +} + +void PPCPostRASchedStrategy::enterMBB(MachineBasicBlock *MBB) { + // Custom PPC PostRA specific behavior here. + PostGenericScheduler::enterMBB(MBB); +} + +void PPCPostRASchedStrategy::leaveMBB() { + // Custom PPC PostRA specific behavior here. + PostGenericScheduler::leaveMBB(); +} + +void PPCPostRASchedStrategy::initialize(ScheduleDAGMI *Dag) { + // Custom PPC PostRA specific initialization here. + PostGenericScheduler::initialize(Dag); +} + +SUnit *PPCPostRASchedStrategy::pickNode(bool &IsTopNode) { + // Custom PPC PostRA specific scheduling here. + return PostGenericScheduler::pickNode(IsTopNode); +} + diff --git a/contrib/llvm/lib/Target/PowerPC/PPCMachineScheduler.h b/contrib/llvm/lib/Target/PowerPC/PPCMachineScheduler.h new file mode 100644 index 000000000000..93532d9545a6 --- /dev/null +++ b/contrib/llvm/lib/Target/PowerPC/PPCMachineScheduler.h @@ -0,0 +1,49 @@ +//===- PPCMachineScheduler.h - Custom PowerPC MI scheduler --*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Custom PowerPC MI scheduler. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_POWERPC_POWERPCMACHINESCHEDULER_H +#define LLVM_LIB_TARGET_POWERPC_POWERPCMACHINESCHEDULER_H + +#include "llvm/CodeGen/MachineScheduler.h" + +namespace llvm { + +/// A MachineSchedStrategy implementation for PowerPC pre RA scheduling. +class PPCPreRASchedStrategy : public GenericScheduler { +public: + PPCPreRASchedStrategy(const MachineSchedContext *C) : + GenericScheduler(C) {} +protected: + void tryCandidate(SchedCandidate &Cand, SchedCandidate &TryCand, + SchedBoundary *Zone) const override; +private: + bool biasAddiLoadCandidate(SchedCandidate &Cand, + SchedCandidate &TryCand, + SchedBoundary &Zone) const; +}; + +/// A MachineSchedStrategy implementation for PowerPC post RA scheduling. +class PPCPostRASchedStrategy : public PostGenericScheduler { +public: + PPCPostRASchedStrategy(const MachineSchedContext *C) : + PostGenericScheduler(C) {} + +protected: + void initialize(ScheduleDAGMI *Dag) override; + SUnit *pickNode(bool &IsTopNode) override; + void enterMBB(MachineBasicBlock *MBB) override; + void leaveMBB() override; +}; + +} // end namespace llvm + +#endif // LLVM_LIB_TARGET_POWERPC_POWERPCMACHINESCHEDULER_H diff --git a/contrib/llvm/lib/Target/PowerPC/PPCPerfectShuffle.h b/contrib/llvm/lib/Target/PowerPC/PPCPerfectShuffle.h index 8a1d68011c5f..d0d84efdbd20 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCPerfectShuffle.h +++ b/contrib/llvm/lib/Target/PowerPC/PPCPerfectShuffle.h @@ -1,9 +1,8 @@ //===-- PPCPerfectShuffle.h - Altivec Perfect Shuffle Table -----*- C++ -*-===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // diff --git a/contrib/llvm/lib/Target/PowerPC/PPCPfmCounters.td b/contrib/llvm/lib/Target/PowerPC/PPCPfmCounters.td index d2a09f30c0f3..20b9efdc9df9 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCPfmCounters.td +++ b/contrib/llvm/lib/Target/PowerPC/PPCPfmCounters.td @@ -1,9 +1,8 @@ //===-- PPCPfmCounters.td - PPC Hardware Counters ----------*- tablegen -*-===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // diff --git a/contrib/llvm/lib/Target/PowerPC/PPCPreEmitPeephole.cpp b/contrib/llvm/lib/Target/PowerPC/PPCPreEmitPeephole.cpp index 4458b92ceb5e..d83c92276800 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCPreEmitPeephole.cpp +++ b/contrib/llvm/lib/Target/PowerPC/PPCPreEmitPeephole.cpp @@ -1,9 +1,8 @@ //===--------- PPCPreEmitPeephole.cpp - Late peephole optimizations -------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // diff --git a/contrib/llvm/lib/Target/PowerPC/PPCQPXLoadSplat.cpp b/contrib/llvm/lib/Target/PowerPC/PPCQPXLoadSplat.cpp index 25b2b54cbe98..3a83cc27439c 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCQPXLoadSplat.cpp +++ b/contrib/llvm/lib/Target/PowerPC/PPCQPXLoadSplat.cpp @@ -1,9 +1,8 @@ //===----- PPCQPXLoadSplat.cpp - QPX Load Splat Simplification ------------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -31,10 +30,6 @@ using namespace llvm; STATISTIC(NumSimplified, "Number of QPX load splats simplified"); -namespace llvm { - void initializePPCQPXLoadSplatPass(PassRegistry&); -} - namespace { struct PPCQPXLoadSplat : public MachineFunctionPass { static char ID; diff --git a/contrib/llvm/lib/Target/PowerPC/PPCReduceCRLogicals.cpp b/contrib/llvm/lib/Target/PowerPC/PPCReduceCRLogicals.cpp index 173fc18b9ebf..8eaa6dfe2bf7 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCReduceCRLogicals.cpp +++ b/contrib/llvm/lib/Target/PowerPC/PPCReduceCRLogicals.cpp @@ -1,9 +1,8 @@ //===---- PPCReduceCRLogicals.cpp - Reduce CR Bit Logical operations ------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===---------------------------------------------------------------------===// // @@ -49,10 +48,6 @@ STATISTIC(NumNotSplitChainCopies, STATISTIC(NumNotSplitWrongOpcode, "Number of blocks not split due to the wrong opcode."); -namespace llvm { - void initializePPCReduceCRLogicalsPass(PassRegistry&); -} - /// Given a basic block \p Successor that potentially contains PHIs, this /// function will look for any incoming values in the PHIs that are supposed to /// be coming from \p OrigMBB but whose definition is actually in \p NewMBB. @@ -171,9 +166,33 @@ static bool splitMBB(BlockSplitInfo &BSI) { : *ThisMBB->succ_begin(); MachineBasicBlock *NewBRTarget = BSI.BranchToFallThrough ? OrigFallThrough : OrigTarget; - BranchProbability ProbToNewTarget = - !BSI.MBPI ? BranchProbability::getUnknown() - : BSI.MBPI->getEdgeProbability(ThisMBB, NewBRTarget); + + // It's impossible to know the precise branch probability after the split. + // But it still needs to be reasonable, the whole probability to original + // targets should not be changed. + // After split NewBRTarget will get two incoming edges. Assume P0 is the + // original branch probability to NewBRTarget, P1 and P2 are new branch + // probabilies to NewBRTarget after split. If the two edge frequencies are + // same, then + // F * P1 = F * P0 / 2 ==> P1 = P0 / 2 + // F * (1 - P1) * P2 = F * P1 ==> P2 = P1 / (1 - P1) + BranchProbability ProbToNewTarget, ProbFallThrough; // Prob for new Br. + BranchProbability ProbOrigTarget, ProbOrigFallThrough; // Prob for orig Br. + ProbToNewTarget = ProbFallThrough = BranchProbability::getUnknown(); + ProbOrigTarget = ProbOrigFallThrough = BranchProbability::getUnknown(); + if (BSI.MBPI) { + if (BSI.BranchToFallThrough) { + ProbToNewTarget = BSI.MBPI->getEdgeProbability(ThisMBB, OrigFallThrough) / 2; + ProbFallThrough = ProbToNewTarget.getCompl(); + ProbOrigFallThrough = ProbToNewTarget / ProbToNewTarget.getCompl(); + ProbOrigTarget = ProbOrigFallThrough.getCompl(); + } else { + ProbToNewTarget = BSI.MBPI->getEdgeProbability(ThisMBB, OrigTarget) / 2; + ProbFallThrough = ProbToNewTarget.getCompl(); + ProbOrigTarget = ProbToNewTarget / ProbToNewTarget.getCompl(); + ProbOrigFallThrough = ProbOrigTarget.getCompl(); + } + } // Create a new basic block. MachineBasicBlock::iterator InsertPoint = BSI.SplitBefore; @@ -185,11 +204,16 @@ static bool splitMBB(BlockSplitInfo &BSI) { // Move everything after SplitBefore into the new block. NewMBB->splice(NewMBB->end(), ThisMBB, InsertPoint, ThisMBB->end()); NewMBB->transferSuccessors(ThisMBB); + if (!ProbOrigTarget.isUnknown()) { + auto MBBI = std::find(NewMBB->succ_begin(), NewMBB->succ_end(), OrigTarget); + NewMBB->setSuccProbability(MBBI, ProbOrigTarget); + MBBI = std::find(NewMBB->succ_begin(), NewMBB->succ_end(), OrigFallThrough); + NewMBB->setSuccProbability(MBBI, ProbOrigFallThrough); + } - // Add the two successors to ThisMBB. The probabilities come from the - // existing blocks if available. + // Add the two successors to ThisMBB. ThisMBB->addSuccessor(NewBRTarget, ProbToNewTarget); - ThisMBB->addSuccessor(NewMBB, ProbToNewTarget.getCompl()); + ThisMBB->addSuccessor(NewMBB, ProbFallThrough); // Add the branches to ThisMBB. BuildMI(*ThisMBB, ThisMBB->end(), BSI.SplitBefore->getDebugLoc(), diff --git a/contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp b/contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp index 3d067aa8e621..12554ea8d079 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp +++ b/contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp @@ -1,9 +1,8 @@ //===-- PPCRegisterInfo.cpp - PowerPC Register Information ----------------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -13,7 +12,6 @@ //===----------------------------------------------------------------------===// #include "PPCRegisterInfo.h" -#include "PPC.h" #include "PPCFrameLowering.h" #include "PPCInstrBuilder.h" #include "PPCMachineFunctionInfo.h" @@ -71,6 +69,14 @@ StackPtrConst("ppc-stack-ptr-caller-preserved", "caller preserved registers can be LICM candidates"), cl::init(true), cl::Hidden); +static cl::opt<unsigned> +MaxCRBitSpillDist("ppc-max-crbit-spill-dist", + cl::desc("Maximum search distance for definition of CR bit " + "spill on ppc"), + cl::Hidden, cl::init(100)); + +static unsigned offsetMinAlignForOpcode(unsigned OpC); + PPCRegisterInfo::PPCRegisterInfo(const PPCTargetMachine &TM) : PPCGenRegisterInfo(TM.isPPC64() ? PPC::LR8 : PPC::LR, TM.isPPC64() ? 0 : 1, @@ -153,30 +159,39 @@ PPCRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { if (TM.isPPC64() && MF->getInfo<PPCFunctionInfo>()->isSplitCSR()) return CSR_SRV464_TLS_PE_SaveList; - if (Subtarget.hasSPE()) - return CSR_SVR432_SPE_SaveList; - // On PPC64, we might need to save r2 (but only if it is not reserved). bool SaveR2 = MF->getRegInfo().isAllocatable(PPC::X2); + // Cold calling convention CSRs. if (MF->getFunction().getCallingConv() == CallingConv::Cold) { - return TM.isPPC64() - ? (Subtarget.hasAltivec() - ? (SaveR2 ? CSR_SVR64_ColdCC_R2_Altivec_SaveList - : CSR_SVR64_ColdCC_Altivec_SaveList) - : (SaveR2 ? CSR_SVR64_ColdCC_R2_SaveList - : CSR_SVR64_ColdCC_SaveList)) - : (Subtarget.hasAltivec() ? CSR_SVR32_ColdCC_Altivec_SaveList - : CSR_SVR32_ColdCC_SaveList); + if (TM.isPPC64()) { + if (Subtarget.hasAltivec()) + return SaveR2 ? CSR_SVR64_ColdCC_R2_Altivec_SaveList + : CSR_SVR64_ColdCC_Altivec_SaveList; + return SaveR2 ? CSR_SVR64_ColdCC_R2_SaveList + : CSR_SVR64_ColdCC_SaveList; + } + // 32-bit targets. + if (Subtarget.hasAltivec()) + return CSR_SVR32_ColdCC_Altivec_SaveList; + else if (Subtarget.hasSPE()) + return CSR_SVR32_ColdCC_SPE_SaveList; + return CSR_SVR32_ColdCC_SaveList; } - - return TM.isPPC64() - ? (Subtarget.hasAltivec() - ? (SaveR2 ? CSR_SVR464_R2_Altivec_SaveList - : CSR_SVR464_Altivec_SaveList) - : (SaveR2 ? CSR_SVR464_R2_SaveList : CSR_SVR464_SaveList)) - : (Subtarget.hasAltivec() ? CSR_SVR432_Altivec_SaveList - : CSR_SVR432_SaveList); + // Standard calling convention CSRs. + if (TM.isPPC64()) { + if (Subtarget.hasAltivec()) + return SaveR2 ? CSR_SVR464_R2_Altivec_SaveList + : CSR_SVR464_Altivec_SaveList; + return SaveR2 ? CSR_SVR464_R2_SaveList + : CSR_SVR464_SaveList; + } + // 32-bit targets. + if (Subtarget.hasAltivec()) + return CSR_SVR432_Altivec_SaveList; + else if (Subtarget.hasSPE()) + return CSR_SVR432_SPE_SaveList; + return CSR_SVR432_SaveList; } const MCPhysReg * @@ -221,18 +236,26 @@ PPCRegisterInfo::getCallPreservedMask(const MachineFunction &MF, : CSR_Darwin64_RegMask) : (Subtarget.hasAltivec() ? CSR_Darwin32_Altivec_RegMask : CSR_Darwin32_RegMask); + if (Subtarget.isAIXABI()) { + assert(!Subtarget.hasAltivec() && "Altivec is not implemented on AIX yet."); + return TM.isPPC64() ? CSR_AIX64_RegMask : CSR_AIX32_RegMask; + } if (CC == CallingConv::Cold) { return TM.isPPC64() ? (Subtarget.hasAltivec() ? CSR_SVR64_ColdCC_Altivec_RegMask : CSR_SVR64_ColdCC_RegMask) : (Subtarget.hasAltivec() ? CSR_SVR32_ColdCC_Altivec_RegMask - : CSR_SVR32_ColdCC_RegMask); + : (Subtarget.hasSPE() + ? CSR_SVR32_ColdCC_SPE_RegMask + : CSR_SVR32_ColdCC_RegMask)); } return TM.isPPC64() ? (Subtarget.hasAltivec() ? CSR_SVR464_Altivec_RegMask : CSR_SVR464_RegMask) : (Subtarget.hasAltivec() ? CSR_SVR432_Altivec_RegMask - : CSR_SVR432_RegMask); + : (Subtarget.hasSPE() + ? CSR_SVR432_SPE_RegMask + : CSR_SVR432_RegMask)); } const uint32_t* @@ -288,6 +311,11 @@ BitVector PPCRegisterInfo::getReservedRegs(const MachineFunction &MF) const { markSuperRegs(Reserved, PPC::R13); // Small Data Area pointer register } + // Always reserve r2 on AIX for now. + // TODO: Make r2 allocatable on AIX/XCOFF for some leaf functions. + if (Subtarget.isAIXABI()) + markSuperRegs(Reserved, PPC::R2); // System-reserved register + // On PPC64, r13 is the thread pointer. Never allocate this register. if (TM.isPPC64()) markSuperRegs(Reserved, PPC::R13); @@ -316,6 +344,51 @@ BitVector PPCRegisterInfo::getReservedRegs(const MachineFunction &MF) const { return Reserved; } +bool PPCRegisterInfo::requiresFrameIndexScavenging(const MachineFunction &MF) const { + const PPCSubtarget &Subtarget = MF.getSubtarget<PPCSubtarget>(); + const PPCInstrInfo *InstrInfo = Subtarget.getInstrInfo(); + const MachineFrameInfo &MFI = MF.getFrameInfo(); + const std::vector<CalleeSavedInfo> &Info = MFI.getCalleeSavedInfo(); + + // If the callee saved info is invalid we have to default to true for safety. + if (!MFI.isCalleeSavedInfoValid()) + return true; + + // We will require the use of X-Forms because the frame is larger than what + // can be represented in signed 16 bits that fit in the immediate of a D-Form. + // If we need an X-Form then we need a register to store the address offset. + unsigned FrameSize = MFI.getStackSize(); + // Signed 16 bits means that the FrameSize cannot be more than 15 bits. + if (FrameSize & ~0x7FFF) + return true; + + // The callee saved info is valid so it can be traversed. + // Checking for registers that need saving that do not have load or store + // forms where the address offset is an immediate. + for (unsigned i = 0; i < Info.size(); i++) { + int FrIdx = Info[i].getFrameIdx(); + unsigned Reg = Info[i].getReg(); + + unsigned Opcode = InstrInfo->getStoreOpcodeForSpill(Reg); + if (!MFI.isFixedObjectIndex(FrIdx)) { + // This is not a fixed object. If it requires alignment then we may still + // need to use the XForm. + if (offsetMinAlignForOpcode(Opcode) > 1) + return true; + } + + // This is eiher: + // 1) A fixed frame index object which we know are aligned so + // as long as we have a valid DForm/DSForm/DQForm (non XForm) we don't + // need to consider the alignement here. + // 2) A not fixed object but in that case we now know that the min required + // alignment is no more than 1 based on the previous check. + if (InstrInfo->isXFormMemOp(Opcode)) + return true; + } + return false; +} + bool PPCRegisterInfo::isCallerPreservedPhysReg(unsigned PhysReg, const MachineFunction &MF) const { assert(TargetRegisterInfo::isPhysicalRegister(PhysReg)); @@ -664,6 +737,7 @@ void PPCRegisterInfo::lowerCRBitSpilling(MachineBasicBlock::iterator II, MachineFunction &MF = *MBB.getParent(); const PPCSubtarget &Subtarget = MF.getSubtarget<PPCSubtarget>(); const TargetInstrInfo &TII = *Subtarget.getInstrInfo(); + const TargetRegisterInfo* TRI = Subtarget.getRegisterInfo(); DebugLoc dl = MI.getDebugLoc(); bool LP64 = TM.isPPC64(); @@ -673,27 +747,59 @@ void PPCRegisterInfo::lowerCRBitSpilling(MachineBasicBlock::iterator II, unsigned Reg = MF.getRegInfo().createVirtualRegister(LP64 ? G8RC : GPRC); unsigned SrcReg = MI.getOperand(0).getReg(); - // We need to move the CR field that contains the CR bit we are spilling. - // The super register may not be explicitly defined (i.e. it can be defined - // by a CR-logical that only defines the subreg) so we state that the CR - // field is undef. Also, in order to preserve the kill flag on the CR bit, - // we add it as an implicit use. - BuildMI(MBB, II, dl, TII.get(LP64 ? PPC::MFOCRF8 : PPC::MFOCRF), Reg) + // Search up the BB to find the definition of the CR bit. + MachineBasicBlock::reverse_iterator Ins; + unsigned CRBitSpillDistance = 0; + for (Ins = MI; Ins != MBB.rend(); Ins++) { + // Definition found. + if (Ins->modifiesRegister(SrcReg, TRI)) + break; + // Unable to find CR bit definition within maximum search distance. + if (CRBitSpillDistance == MaxCRBitSpillDist) { + Ins = MI; + break; + } + // Skip debug instructions when counting CR bit spill distance. + if (!Ins->isDebugInstr()) + CRBitSpillDistance++; + } + + // Unable to find the definition of the CR bit in the MBB. + if (Ins == MBB.rend()) + Ins = MI; + + // There is no need to extract the CR bit if its value is already known. + switch (Ins->getOpcode()) { + case PPC::CRUNSET: + BuildMI(MBB, II, dl, TII.get(LP64 ? PPC::LI8 : PPC::LI), Reg) + .addImm(0); + break; + case PPC::CRSET: + BuildMI(MBB, II, dl, TII.get(LP64 ? PPC::LIS8 : PPC::LIS), Reg) + .addImm(-32768); + break; + default: + // We need to move the CR field that contains the CR bit we are spilling. + // The super register may not be explicitly defined (i.e. it can be defined + // by a CR-logical that only defines the subreg) so we state that the CR + // field is undef. Also, in order to preserve the kill flag on the CR bit, + // we add it as an implicit use. + BuildMI(MBB, II, dl, TII.get(LP64 ? PPC::MFOCRF8 : PPC::MFOCRF), Reg) .addReg(getCRFromCRBit(SrcReg), RegState::Undef) .addReg(SrcReg, RegState::Implicit | getKillRegState(MI.getOperand(0).isKill())); - // If the saved register wasn't CR0LT, shift the bits left so that the bit to - // store is the first one. Mask all but that bit. - unsigned Reg1 = Reg; - Reg = MF.getRegInfo().createVirtualRegister(LP64 ? G8RC : GPRC); - - // rlwinm rA, rA, ShiftBits, 0, 0. - BuildMI(MBB, II, dl, TII.get(LP64 ? PPC::RLWINM8 : PPC::RLWINM), Reg) - .addReg(Reg1, RegState::Kill) - .addImm(getEncodingValue(SrcReg)) - .addImm(0).addImm(0); + // If the saved register wasn't CR0LT, shift the bits left so that the bit + // to store is the first one. Mask all but that bit. + unsigned Reg1 = Reg; + Reg = MF.getRegInfo().createVirtualRegister(LP64 ? G8RC : GPRC); + // rlwinm rA, rA, ShiftBits, 0, 0. + BuildMI(MBB, II, dl, TII.get(LP64 ? PPC::RLWINM8 : PPC::RLWINM), Reg) + .addReg(Reg1, RegState::Kill) + .addImm(getEncodingValue(SrcReg)) + .addImm(0).addImm(0); + } addFrameReference(BuildMI(MBB, II, dl, TII.get(LP64 ? PPC::STW8 : PPC::STW)) .addReg(Reg, RegState::Kill), FrameIndex); @@ -826,9 +932,7 @@ bool PPCRegisterInfo::hasReservedSpillSlot(const MachineFunction &MF, } // If the offset must be a multiple of some value, return what that value is. -static unsigned offsetMinAlign(const MachineInstr &MI) { - unsigned OpC = MI.getOpcode(); - +static unsigned offsetMinAlignForOpcode(unsigned OpC) { switch (OpC) { default: return 1; @@ -847,12 +951,21 @@ static unsigned offsetMinAlign(const MachineInstr &MI) { case PPC::STXSD: case PPC::STXSSP: return 4; + case PPC::EVLDD: + case PPC::EVSTDD: + return 8; case PPC::LXV: case PPC::STXV: return 16; } } +// If the offset must be a multiple of some value, return what that value is. +static unsigned offsetMinAlign(const MachineInstr &MI) { + unsigned OpC = MI.getOpcode(); + return offsetMinAlignForOpcode(OpC); +} + // Return the OffsetOperandNo given the FIOperandNum (and the instruction). static unsigned getOffsetONFromFION(const MachineInstr &MI, unsigned FIOperandNum) { @@ -963,7 +1076,10 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, // happen in invalid code. assert(OpC != PPC::DBG_VALUE && "This should be handled in a target-independent way"); - if (!noImmForm && ((isInt<16>(Offset) && + bool OffsetFitsMnemonic = (OpC == PPC::EVSTDD || OpC == PPC::EVLDD) ? + isUInt<8>(Offset) : + isInt<16>(Offset); + if (!noImmForm && ((OffsetFitsMnemonic && ((Offset % offsetMinAlign(MI)) == 0)) || OpC == TargetOpcode::STACKMAP || OpC == TargetOpcode::PATCHPOINT)) { @@ -1001,7 +1117,8 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, if (noImmForm) OperandBase = 1; - else if (OpC != TargetOpcode::INLINEASM) { + else if (OpC != TargetOpcode::INLINEASM && + OpC != TargetOpcode::INLINEASM_BR) { assert(ImmToIdxMap.count(OpC) && "No indexed form of load or store available!"); unsigned NewOpcode = ImmToIdxMap.find(OpC)->second; @@ -1016,7 +1133,7 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, MI.getOperand(OperandBase + 1).ChangeToRegister(SReg, false, false, true); } -unsigned PPCRegisterInfo::getFrameRegister(const MachineFunction &MF) const { +Register PPCRegisterInfo::getFrameRegister(const MachineFunction &MF) const { const PPCFrameLowering *TFI = getFrameLowering(MF); if (!TM.isPPC64()) @@ -1025,7 +1142,7 @@ unsigned PPCRegisterInfo::getFrameRegister(const MachineFunction &MF) const { return TFI->hasFP(MF) ? PPC::X31 : PPC::X1; } -unsigned PPCRegisterInfo::getBaseRegister(const MachineFunction &MF) const { +Register PPCRegisterInfo::getBaseRegister(const MachineFunction &MF) const { const PPCSubtarget &Subtarget = MF.getSubtarget<PPCSubtarget>(); if (!hasBasePointer(MF)) return getFrameRegister(MF); @@ -1080,7 +1197,7 @@ needsFrameBaseReg(MachineInstr *MI, int64_t Offset) const { MachineBasicBlock &MBB = *MI->getParent(); MachineFunction &MF = *MBB.getParent(); const PPCFrameLowering *TFI = getFrameLowering(MF); - unsigned StackEst = TFI->determineFrameLayout(MF, false, true); + unsigned StackEst = TFI->determineFrameLayout(MF, true); // If we likely don't need a stack frame, then we probably don't need a // virtual base register either. diff --git a/contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.h b/contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.h index e93fe4ce3453..a50e05920cd4 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.h +++ b/contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.h @@ -1,9 +1,8 @@ //===-- PPCRegisterInfo.h - PowerPC Register Information Impl ---*- C++ -*-===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -15,13 +14,14 @@ #ifndef LLVM_LIB_TARGET_POWERPC_PPCREGISTERINFO_H #define LLVM_LIB_TARGET_POWERPC_PPCREGISTERINFO_H -#include "PPC.h" +#include "MCTargetDesc/PPCMCTargetDesc.h" #include "llvm/ADT/DenseMap.h" #define GET_REGINFO_HEADER #include "PPCGenRegisterInfo.inc" namespace llvm { +class PPCTargetMachine; inline static unsigned getCRFromCRBit(unsigned SrcReg) { unsigned Reg = 0; @@ -90,9 +90,7 @@ public: return true; } - bool requiresFrameIndexScavenging(const MachineFunction &MF) const override { - return true; - } + bool requiresFrameIndexScavenging(const MachineFunction &MF) const override; bool trackLivenessAfterRegAlloc(const MachineFunction &MF) const override { return true; @@ -134,10 +132,10 @@ public: int64_t Offset) const override; // Debug information queries. - unsigned getFrameRegister(const MachineFunction &MF) const override; + Register getFrameRegister(const MachineFunction &MF) const override; // Base pointer (stack realignment) support. - unsigned getBaseRegister(const MachineFunction &MF) const; + Register getBaseRegister(const MachineFunction &MF) const; bool hasBasePointer(const MachineFunction &MF) const; /// stripRegisterPrefix - This method strips the character prefix from a diff --git a/contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.td b/contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.td index d0d29b6d2c7d..af0dff6347a6 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.td +++ b/contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.td @@ -1,9 +1,8 @@ //===-- PPCRegisterInfo.td - The PowerPC Register File -----*- tablegen -*-===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -375,8 +374,6 @@ def CRBITRC : RegisterClass<"PPC", [i1], 32, def CRRC : RegisterClass<"PPC", [i32], 32, (add CR0, CR1, CR5, CR6, CR7, CR2, CR3, CR4)>; -def CRRC0 : RegisterClass<"PPC", [i32], 32, (add CR0)>; - // The CTR registers are not allocatable because they're used by the // decrement-and-branch instructions, and thus need to stay live across // multiple basic blocks. diff --git a/contrib/llvm/lib/Target/PowerPC/PPCSchedule.td b/contrib/llvm/lib/Target/PowerPC/PPCSchedule.td index c8fe7d7eea78..4fa29d96ca14 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCSchedule.td +++ b/contrib/llvm/lib/Target/PowerPC/PPCSchedule.td @@ -1,9 +1,8 @@ //===-- PPCSchedule.td - PowerPC Scheduling Definitions ----*- tablegen -*-===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// @@ -106,6 +105,7 @@ def IIC_VecVSL : InstrItinClass; def IIC_VecVSR : InstrItinClass; def IIC_SprMTMSRD : InstrItinClass; def IIC_SprSLIE : InstrItinClass; +def IIC_SprSLBFEE : InstrItinClass; def IIC_SprSLBIE : InstrItinClass; def IIC_SprSLBIEG : InstrItinClass; def IIC_SprSLBMTE : InstrItinClass; diff --git a/contrib/llvm/lib/Target/PowerPC/PPCSchedule440.td b/contrib/llvm/lib/Target/PowerPC/PPCSchedule440.td index 646822eedbe0..708261fc7cc8 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCSchedule440.td +++ b/contrib/llvm/lib/Target/PowerPC/PPCSchedule440.td @@ -1,9 +1,8 @@ //===-- PPCSchedule440.td - PPC 440 Scheduling Definitions -*- tablegen -*-===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// diff --git a/contrib/llvm/lib/Target/PowerPC/PPCScheduleA2.td b/contrib/llvm/lib/Target/PowerPC/PPCScheduleA2.td index f34c1accc0fd..c2b298524e00 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCScheduleA2.td +++ b/contrib/llvm/lib/Target/PowerPC/PPCScheduleA2.td @@ -1,9 +1,8 @@ //===- PPCScheduleA2.td - PPC A2 Scheduling Definitions --*- tablegen -*-===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// diff --git a/contrib/llvm/lib/Target/PowerPC/PPCScheduleE500.td b/contrib/llvm/lib/Target/PowerPC/PPCScheduleE500.td index 479a970b2537..74744dda54f7 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCScheduleE500.td +++ b/contrib/llvm/lib/Target/PowerPC/PPCScheduleE500.td @@ -1,9 +1,8 @@ //===-- PPCScheduleE500.td - e500 Scheduling Defs ------*- tablegen -*-===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // diff --git a/contrib/llvm/lib/Target/PowerPC/PPCScheduleE500mc.td b/contrib/llvm/lib/Target/PowerPC/PPCScheduleE500mc.td index d8bda073833f..1a1c041565b6 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCScheduleE500mc.td +++ b/contrib/llvm/lib/Target/PowerPC/PPCScheduleE500mc.td @@ -1,9 +1,8 @@ //===-- PPCScheduleE500mc.td - e500mc Scheduling Defs ------*- tablegen -*-===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // diff --git a/contrib/llvm/lib/Target/PowerPC/PPCScheduleE5500.td b/contrib/llvm/lib/Target/PowerPC/PPCScheduleE5500.td index 3e50803955c4..4480d7fba4fb 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCScheduleE5500.td +++ b/contrib/llvm/lib/Target/PowerPC/PPCScheduleE5500.td @@ -1,9 +1,8 @@ //===-- PPCScheduleE500mc.td - e5500 Scheduling Defs -------*- tablegen -*-===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // diff --git a/contrib/llvm/lib/Target/PowerPC/PPCScheduleG3.td b/contrib/llvm/lib/Target/PowerPC/PPCScheduleG3.td index 0995b7200d93..8f1907f2c016 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCScheduleG3.td +++ b/contrib/llvm/lib/Target/PowerPC/PPCScheduleG3.td @@ -1,9 +1,8 @@ //===-- PPCScheduleG3.td - PPC G3 Scheduling Definitions ---*- tablegen -*-===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // diff --git a/contrib/llvm/lib/Target/PowerPC/PPCScheduleG4.td b/contrib/llvm/lib/Target/PowerPC/PPCScheduleG4.td index 1b15c7b3c7ad..0eabc49d7841 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCScheduleG4.td +++ b/contrib/llvm/lib/Target/PowerPC/PPCScheduleG4.td @@ -1,9 +1,8 @@ //===-- PPCScheduleG4.td - PPC G4 Scheduling Definitions ---*- tablegen -*-===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // diff --git a/contrib/llvm/lib/Target/PowerPC/PPCScheduleG4Plus.td b/contrib/llvm/lib/Target/PowerPC/PPCScheduleG4Plus.td index 0044c3c6a449..9c84aec638d7 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCScheduleG4Plus.td +++ b/contrib/llvm/lib/Target/PowerPC/PPCScheduleG4Plus.td @@ -1,9 +1,8 @@ //===-- PPCScheduleG4Plus.td - PPC G4+ Scheduling Defs. ----*- tablegen -*-===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // diff --git a/contrib/llvm/lib/Target/PowerPC/PPCScheduleG5.td b/contrib/llvm/lib/Target/PowerPC/PPCScheduleG5.td index c802b80170fb..087073537796 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCScheduleG5.td +++ b/contrib/llvm/lib/Target/PowerPC/PPCScheduleG5.td @@ -1,9 +1,8 @@ //===-- PPCScheduleG5.td - PPC G5 Scheduling Definitions ---*- tablegen -*-===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // diff --git a/contrib/llvm/lib/Target/PowerPC/PPCScheduleP7.td b/contrib/llvm/lib/Target/PowerPC/PPCScheduleP7.td index 1d6e509819da..5a8c1eb2b837 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCScheduleP7.td +++ b/contrib/llvm/lib/Target/PowerPC/PPCScheduleP7.td @@ -1,9 +1,8 @@ //===-- PPCScheduleP7.td - PPC P7 Scheduling Definitions ---*- tablegen -*-===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // diff --git a/contrib/llvm/lib/Target/PowerPC/PPCScheduleP8.td b/contrib/llvm/lib/Target/PowerPC/PPCScheduleP8.td index ff39dfda7016..70a58f42a98a 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCScheduleP8.td +++ b/contrib/llvm/lib/Target/PowerPC/PPCScheduleP8.td @@ -1,9 +1,8 @@ //===-- PPCScheduleP8.td - PPC P8 Scheduling Definitions ---*- tablegen -*-===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // diff --git a/contrib/llvm/lib/Target/PowerPC/PPCScheduleP9.td b/contrib/llvm/lib/Target/PowerPC/PPCScheduleP9.td index a1e625c855e0..6a79cca89194 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCScheduleP9.td +++ b/contrib/llvm/lib/Target/PowerPC/PPCScheduleP9.td @@ -1,9 +1,8 @@ //===-- PPCScheduleP9.td - PPC P9 Scheduling Definitions ---*- tablegen -*-===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -51,8 +50,21 @@ let SchedModel = P9Model in { // ***************** Processor Resources ***************** - //Dispatcher: - def DISPATCHER : ProcResource<12>; + // Dispatcher slots: + // x0, x1, x2, and x3 are the dedicated slice dispatch ports, where each + // corresponds to one of the four execution slices. + def DISPx02 : ProcResource<2>; + def DISPx13 : ProcResource<2>; + // The xa and xb ports can be used to send an iop to either of the two slices + // of the superslice, but are restricted to iops with only two primary sources. + def DISPxab : ProcResource<2>; + // b0 and b1 are dedicated dispatch ports into the branch slice. + def DISPb01 : ProcResource<2>; + + // Any non BR dispatch ports + def DISP_NBR + : ProcResGroup<[ DISPx02, DISPx13, DISPxab]>; + def DISP_SS : ProcResGroup<[ DISPx02, DISPx13]>; // Issue Ports // An instruction can go down one of two issue queues. @@ -117,8 +129,37 @@ let SchedModel = P9Model in { // ***************** SchedWriteRes Definitions ***************** - //Dispatcher - def DISP_1C : SchedWriteRes<[DISPATCHER]> { + // Dispatcher + // Dispatch Rules: '-' or 'V' + // Vector ('V') - vector iops (128-bit operand) take only one decode and + // dispatch slot but are dispatched to both the even and odd slices of a + // superslice. + def DISP_1C : SchedWriteRes<[DISP_NBR]> { + let NumMicroOps = 0; + let Latency = 1; + } + // Dispatch Rules: 'E' + // Even slice ('E')- certain operations must be sent only to an even slice. + // Also consumes odd dispatch slice slot of the same superslice at dispatch + def DISP_EVEN_1C : SchedWriteRes<[ DISPx02, DISPx13 ]> { + let NumMicroOps = 0; + let Latency = 1; + } + // Dispatch Rules: 'P' + // Paired ('P') - certain cracked and expanded iops are paired such that they + // must dispatch together to the same superslice. + def DISP_PAIR_1C : SchedWriteRes<[ DISP_SS, DISP_SS]> { + let NumMicroOps = 0; + let Latency = 1; + } + // Tuple Restricted ('R') - certain iops preclude dispatching more than one + // operation per slice for the super- slice to which they are dispatched + def DISP_3SLOTS_1C : SchedWriteRes<[DISPx02, DISPx13, DISPxab]> { + let NumMicroOps = 0; + let Latency = 1; + } + // Each execution and branch slice can receive up to two iops per cycle + def DISP_BR_1C : SchedWriteRes<[ DISPxab ]> { let NumMicroOps = 0; let Latency = 1; } @@ -148,7 +189,7 @@ let SchedModel = P9Model in { // ALU Units // An ALU may take either 2 or 3 cycles to complete the operation. - // However, the ALU unit is only every busy for 1 cycle at a time and may + // However, the ALU unit is only ever busy for 1 cycle at a time and may // receive new instructions each cycle. def P9_ALU_2C : SchedWriteRes<[ALU]> { let Latency = 2; @@ -203,10 +244,6 @@ let SchedModel = P9Model in { // DP Unit // A DP unit may take from 2 to 36 cycles to complete. // Some DP operations keep the unit busy for up to 10 cycles. - def P9_DP_2C : SchedWriteRes<[DP]> { - let Latency = 2; - } - def P9_DP_5C : SchedWriteRes<[DP]> { let Latency = 5; } @@ -228,11 +265,6 @@ let SchedModel = P9Model in { let Latency = 22; } - def P9_DP_24C_8 : SchedWriteRes<[DP]> { - let ResourceCycles = [8]; - let Latency = 24; - } - def P9_DPO_24C_8 : SchedWriteRes<[DPO]> { let ResourceCycles = [8]; let Latency = 24; @@ -248,11 +280,6 @@ let SchedModel = P9Model in { let Latency = 22; } - def P9_DP_27C_7 : SchedWriteRes<[DP]> { - let ResourceCycles = [7]; - let Latency = 27; - } - def P9_DPE_27C_10 : SchedWriteRes<[DP]> { let ResourceCycles = [10]; let Latency = 27; @@ -383,16 +410,12 @@ let SchedModel = P9Model in { def P9_IntDivAndALUOp_26C_8 : WriteSequence<[P9_DIV_24C_8, P9_ALU_2C]>; def P9_IntDivAndALUOp_42C_8 : WriteSequence<[P9_DIV_40C_8, P9_ALU_2C]>; def P9_StoreAndALUOp_3C : WriteSequence<[P9_LS_1C, P9_ALU_2C]>; - def P9_StoreAndALUOp_4C : WriteSequence<[P9_LS_1C, P9_ALU_3C]>; def P9_ALUOpAndALUOp_4C : WriteSequence<[P9_ALU_2C, P9_ALU_2C]>; def P9_ALU2OpAndALU2Op_6C : WriteSequence<[P9_ALU_3C, P9_ALU_3C]>; def P9_ALUOpAndALUOpAndALUOp_6C : WriteSequence<[P9_ALU_2C, P9_ALU_2C, P9_ALU_2C]>; def P9_DPOpAndALUOp_7C : WriteSequence<[P9_DP_5C, P9_ALU_2C]>; - def P9_DPOpAndALUOp_9C : WriteSequence<[P9_DP_7C, P9_ALU_2C]>; def P9_DPOpAndALU2Op_10C : WriteSequence<[P9_DP_7C, P9_ALU_3C]>; - def P9_DPOpAndALUOp_24C_5 : WriteSequence<[P9_DP_22C_5, P9_ALU_2C]>; - def P9_DPOpAndALUOp_35C_8 : WriteSequence<[P9_DP_33C_8, P9_ALU_2C]>; def P9_DPOpAndALU2Op_25C_5 : WriteSequence<[P9_DP_22C_5, P9_ALU_3C]>; def P9_DPOpAndALU2Op_29C_5 : WriteSequence<[P9_DP_26C_5, P9_ALU_3C]>; def P9_DPOpAndALU2Op_36C_8 : WriteSequence<[P9_DP_33C_8, P9_ALU_3C]>; diff --git a/contrib/llvm/lib/Target/PowerPC/PPCSubtarget.cpp b/contrib/llvm/lib/Target/PowerPC/PPCSubtarget.cpp index 57244ddff552..6239decf1539 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCSubtarget.cpp +++ b/contrib/llvm/lib/Target/PowerPC/PPCSubtarget.cpp @@ -1,9 +1,8 @@ //===-- PowerPCSubtarget.cpp - PPC Subtarget Information ------------------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -40,6 +39,11 @@ static cl::opt<bool> QPXStackUnaligned("qpx-stack-unaligned", cl::desc("Even when QPX is enabled the stack is not 32-byte aligned"), cl::Hidden); +static cl::opt<bool> + EnableMachinePipeliner("ppc-enable-pipeliner", + cl::desc("Enable Machine Pipeliner for PPC"), + cl::init(false), cl::Hidden); + PPCSubtarget &PPCSubtarget::initializeSubtargetDependencies(StringRef CPU, StringRef FS) { initializeEnvironment(); @@ -68,6 +72,7 @@ void PPCSubtarget::initializeEnvironment() { HasFPU = false; HasQPX = false; HasVSX = false; + NeedsTwoConstNR = false; HasP8Vector = false; HasP8Altivec = false; HasP8Crypto = false; @@ -103,11 +108,13 @@ void PPCSubtarget::initializeEnvironment() { HasDirectMove = false; IsQPXStackUnaligned = false; HasHTM = false; - HasFusion = false; HasFloat128 = false; IsISA3_0 = false; UseLongCalls = false; SecurePlt = false; + VectorsUseTwoUnits = false; + UsePPCPreRASchedStrategy = false; + UsePPCPostRASchedStrategy = false; HasPOPCNTD = POPCNTD_Unavailable; } @@ -138,8 +145,9 @@ void PPCSubtarget::initSubtargetFeatures(StringRef CPU, StringRef FS) { if (isDarwin()) HasLazyResolverStubs = true; - if ((TargetTriple.isOSFreeBSD() && TargetTriple.getOSMajorVersion() >= 13) - || TargetTriple.isOSNetBSD() || TargetTriple.isOSOpenBSD()) + if ((TargetTriple.isOSFreeBSD() && TargetTriple.getOSMajorVersion() >= 13) || + TargetTriple.isOSNetBSD() || TargetTriple.isOSOpenBSD() || + TargetTriple.isMusl()) SecurePlt = true; if (HasSPE && IsPPC64) @@ -179,10 +187,14 @@ bool PPCSubtarget::hasLazyResolverStub(const GlobalValue *GV) const { return false; } -bool PPCSubtarget::enableMachineScheduler() const { - return true; +bool PPCSubtarget::enableMachineScheduler() const { return true; } + +bool PPCSubtarget::enableMachinePipeliner() const { + return (DarwinDirective == PPC::DIR_PWR9) && EnableMachinePipeliner; } +bool PPCSubtarget::useDFAforSMS() const { return false; } + // This overrides the PostRAScheduler bit in the SchedModel for each CPU. bool PPCSubtarget::enablePostRAScheduler() const { return true; } diff --git a/contrib/llvm/lib/Target/PowerPC/PPCSubtarget.h b/contrib/llvm/lib/Target/PowerPC/PPCSubtarget.h index c56f254d6bec..55fec1cb6d99 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCSubtarget.h +++ b/contrib/llvm/lib/Target/PowerPC/PPCSubtarget.h @@ -1,9 +1,8 @@ //===-- PPCSubtarget.h - Define Subtarget for the PPC ----------*- C++ -*--===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -99,6 +98,7 @@ protected: bool HasSPE; bool HasQPX; bool HasVSX; + bool NeedsTwoConstNR; bool HasP8Vector; bool HasP8Altivec; bool HasP8Crypto; @@ -131,11 +131,13 @@ protected: bool HasPartwordAtomics; bool HasDirectMove; bool HasHTM; - bool HasFusion; bool HasFloat128; bool IsISA3_0; bool UseLongCalls; bool SecurePlt; + bool VectorsUseTwoUnits; + bool UsePPCPreRASchedStrategy; + bool UsePPCPostRASchedStrategy; POPCNTDKind HasPOPCNTD; @@ -244,6 +246,7 @@ public: bool hasFPU() const { return HasFPU; } bool hasQPX() const { return HasQPX; } bool hasVSX() const { return HasVSX; } + bool needsTwoConstNR() const { return NeedsTwoConstNR; } bool hasP8Vector() const { return HasP8Vector; } bool hasP8Altivec() const { return HasP8Altivec; } bool hasP8Crypto() const { return HasP8Crypto; } @@ -260,6 +263,7 @@ public: bool isPPC4xx() const { return IsPPC4xx; } bool isPPC6xx() const { return IsPPC6xx; } bool isSecurePlt() const {return SecurePlt; } + bool vectorsUseTwoUnits() const {return VectorsUseTwoUnits; } bool isE500() const { return IsE500; } bool isFeatureMFTB() const { return FeatureMFTB; } bool isDeprecatedDST() const { return DeprecatedDST; } @@ -267,6 +271,8 @@ public: bool hasInvariantFunctionDescriptors() const { return HasInvariantFunctionDescriptors; } + bool usePPCPreRASchedStrategy() const { return UsePPCPreRASchedStrategy; } + bool usePPCPostRASchedStrategy() const { return UsePPCPostRASchedStrategy; } bool hasPartwordAtomics() const { return HasPartwordAtomics; } bool hasDirectMove() const { return HasDirectMove; } @@ -285,7 +291,6 @@ public: } bool hasHTM() const { return HasHTM; } - bool hasFusion() const { return HasFusion; } bool hasFloat128() const { return HasFloat128; } bool isISA3_0() const { return IsISA3_0; } bool useLongCalls() const { return UseLongCalls; } @@ -307,16 +312,21 @@ public: bool isTargetLinux() const { return TargetTriple.isOSLinux(); } bool isDarwinABI() const { return isTargetMachO() || isDarwin(); } - bool isSVR4ABI() const { return !isDarwinABI(); } + bool isAIXABI() const { return TargetTriple.isOSAIX(); } + bool isSVR4ABI() const { return !isDarwinABI() && !isAIXABI(); } bool isELFv2ABI() const; /// Originally, this function return hasISEL(). Now we always enable it, /// but may expand the ISEL instruction later. bool enableEarlyIfConversion() const override { return true; } - // Scheduling customization. + /// Scheduling customization. bool enableMachineScheduler() const override; - // This overrides the PostRAScheduler bit in the SchedModel for each CPU. + /// Pipeliner customization. + bool enableMachinePipeliner() const override; + /// Machine Pipeliner customization + bool useDFAforSMS() const override; + /// This overrides the PostRAScheduler bit in the SchedModel for each CPU. bool enablePostRAScheduler() const override; AntiDepBreakMode getAntiDepBreakMode() const override; void getCriticalPathRCs(RegClassVector &CriticalPathRCs) const override; diff --git a/contrib/llvm/lib/Target/PowerPC/PPCTLSDynamicCall.cpp b/contrib/llvm/lib/Target/PowerPC/PPCTLSDynamicCall.cpp index ac36abbe8439..fb826c4a32f1 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCTLSDynamicCall.cpp +++ b/contrib/llvm/lib/Target/PowerPC/PPCTLSDynamicCall.cpp @@ -1,9 +1,8 @@ //===---------- PPCTLSDynamicCall.cpp - TLS Dynamic Call Fixup ------------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -35,10 +34,6 @@ using namespace llvm; #define DEBUG_TYPE "ppc-tls-dynamic-call" -namespace llvm { - void initializePPCTLSDynamicCallPass(PassRegistry&); -} - namespace { struct PPCTLSDynamicCall : public MachineFunctionPass { static char ID; diff --git a/contrib/llvm/lib/Target/PowerPC/PPCTOCRegDeps.cpp b/contrib/llvm/lib/Target/PowerPC/PPCTOCRegDeps.cpp index 17345b6ca8d3..3eb0569fb955 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCTOCRegDeps.cpp +++ b/contrib/llvm/lib/Target/PowerPC/PPCTOCRegDeps.cpp @@ -1,9 +1,8 @@ //===-- PPCTOCRegDeps.cpp - Add Extra TOC Register Dependencies -----------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -83,10 +82,6 @@ using namespace llvm; #define DEBUG_TYPE "ppc-toc-reg-deps" -namespace llvm { - void initializePPCTOCRegDepsPass(PassRegistry&); -} - namespace { // PPCTOCRegDeps pass - For simple functions without epilogue code, move // returns up, and create conditional returns, to avoid unnecessary diff --git a/contrib/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp b/contrib/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp index 580d057602f5..ce00f848dd72 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp +++ b/contrib/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp @@ -1,9 +1,8 @@ //===-- PPCTargetMachine.cpp - Define TargetMachine for PowerPC -----------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -14,9 +13,11 @@ #include "PPCTargetMachine.h" #include "MCTargetDesc/PPCMCTargetDesc.h" #include "PPC.h" +#include "PPCMachineScheduler.h" #include "PPCSubtarget.h" #include "PPCTargetObjectFile.h" #include "PPCTargetTransformInfo.h" +#include "TargetInfo/PowerPCTargetInfo.h" #include "llvm/ADT/Optional.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/StringRef.h" @@ -100,6 +101,19 @@ extern "C" void LLVMInitializePowerPCTarget() { RegisterTargetMachine<PPCTargetMachine> C(getThePPC64LETarget()); PassRegistry &PR = *PassRegistry::getPassRegistry(); +#ifndef NDEBUG + initializePPCCTRLoopsVerifyPass(PR); +#endif + initializePPCLoopPreIncPrepPass(PR); + initializePPCTOCRegDepsPass(PR); + initializePPCEarlyReturnPass(PR); + initializePPCVSXCopyPass(PR); + initializePPCVSXFMAMutatePass(PR); + initializePPCVSXSwapRemovalPass(PR); + initializePPCReduceCRLogicalsPass(PR); + initializePPCBSelPass(PR); + initializePPCBranchCoalescingPass(PR); + initializePPCQPXLoadSplatPass(PR); initializePPCBoolRetToIntPass(PR); initializePPCExpandISELPass(PR); initializePPCPreEmitPeepholePass(PR); @@ -199,6 +213,8 @@ static PPCTargetMachine::PPCABI computeTargetABI(const Triple &TT, case Triple::ppc64le: return PPCTargetMachine::PPC_ABI_ELFv2; case Triple::ppc64: + if (TT.getEnvironment() == llvm::Triple::ELFv2) + return PPCTargetMachine::PPC_ABI_ELFv2; return PPCTargetMachine::PPC_ABI_ELFv1; default: return PPCTargetMachine::PPC_ABI_UNKNOWN; @@ -227,9 +243,9 @@ static CodeModel::Model getEffectivePPCCodeModel(const Triple &TT, bool JIT) { if (CM) { if (*CM == CodeModel::Tiny) - report_fatal_error("Target does not support the tiny CodeModel"); + report_fatal_error("Target does not support the tiny CodeModel", false); if (*CM == CodeModel::Kernel) - report_fatal_error("Target does not support the kernel CodeModel"); + report_fatal_error("Target does not support the kernel CodeModel", false); return *CM; } if (!TT.isOSDarwin() && !JIT && @@ -238,6 +254,29 @@ static CodeModel::Model getEffectivePPCCodeModel(const Triple &TT, return CodeModel::Small; } + +static ScheduleDAGInstrs *createPPCMachineScheduler(MachineSchedContext *C) { + const PPCSubtarget &ST = C->MF->getSubtarget<PPCSubtarget>(); + ScheduleDAGMILive *DAG = + new ScheduleDAGMILive(C, ST.usePPCPreRASchedStrategy() ? + llvm::make_unique<PPCPreRASchedStrategy>(C) : + llvm::make_unique<GenericScheduler>(C)); + // add DAG Mutations here. + DAG->addMutation(createCopyConstrainDAGMutation(DAG->TII, DAG->TRI)); + return DAG; +} + +static ScheduleDAGInstrs *createPPCPostMachineScheduler( + MachineSchedContext *C) { + const PPCSubtarget &ST = C->MF->getSubtarget<PPCSubtarget>(); + ScheduleDAGMI *DAG = + new ScheduleDAGMI(C, ST.usePPCPostRASchedStrategy() ? + llvm::make_unique<PPCPostRASchedStrategy>(C) : + llvm::make_unique<PostGenericScheduler>(C), true); + // add DAG Mutations here. + return DAG; +} + // The FeatureString here is a little subtle. We are modifying the feature // string with what are (currently) non-function specific overrides as it goes // into the LLVMTargetMachine constructor and then using the stored value in the @@ -331,6 +370,14 @@ public: void addPreRegAlloc() override; void addPreSched2() override; void addPreEmitPass() override; + ScheduleDAGInstrs * + createMachineScheduler(MachineSchedContext *C) const override { + return createPPCMachineScheduler(C); + } + ScheduleDAGInstrs * + createPostMachineScheduler(MachineSchedContext *C) const override { + return createPPCPostMachineScheduler(C); + } }; } // end anonymous namespace @@ -374,7 +421,7 @@ bool PPCPassConfig::addPreISel() { addPass(createPPCLoopPreIncPrepPass(getPPCTargetMachine())); if (!DisableCTRLoops && getOptLevel() != CodeGenOpt::None) - addPass(createPPCCTRLoops()); + addPass(createHardwareLoopsPass()); return false; } @@ -441,6 +488,9 @@ void PPCPassConfig::addPreRegAlloc() { } if (EnableExtraTOCRegDeps) addPass(createPPCTOCRegDepsPass()); + + if (getOptLevel() != CodeGenOpt::None) + addPass(&MachinePipelinerID); } void PPCPassConfig::addPreSched2() { @@ -469,3 +519,13 @@ TargetTransformInfo PPCTargetMachine::getTargetTransformInfo(const Function &F) { return TargetTransformInfo(PPCTTIImpl(this, F)); } + +static MachineSchedRegistry +PPCPreRASchedRegistry("ppc-prera", + "Run PowerPC PreRA specific scheduler", + createPPCMachineScheduler); + +static MachineSchedRegistry +PPCPostRASchedRegistry("ppc-postra", + "Run PowerPC PostRA specific scheduler", + createPPCPostMachineScheduler); diff --git a/contrib/llvm/lib/Target/PowerPC/PPCTargetMachine.h b/contrib/llvm/lib/Target/PowerPC/PPCTargetMachine.h index 75b98a815ab4..fd1d14ae32d4 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCTargetMachine.h +++ b/contrib/llvm/lib/Target/PowerPC/PPCTargetMachine.h @@ -1,9 +1,8 @@ //===-- PPCTargetMachine.h - Define TargetMachine for PowerPC ---*- C++ -*-===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -59,10 +58,6 @@ public: const Triple &TT = getTargetTriple(); return (TT.getArch() == Triple::ppc64 || TT.getArch() == Triple::ppc64le); }; - - bool isMachineVerifierClean() const override { - return false; - } }; } // end namespace llvm diff --git a/contrib/llvm/lib/Target/PowerPC/PPCTargetObjectFile.cpp b/contrib/llvm/lib/Target/PowerPC/PPCTargetObjectFile.cpp index a049dc3fda93..e237fab1b267 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCTargetObjectFile.cpp +++ b/contrib/llvm/lib/Target/PowerPC/PPCTargetObjectFile.cpp @@ -1,9 +1,8 @@ //===-- PPCTargetObjectFile.cpp - PPC Object Info -------------------------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// diff --git a/contrib/llvm/lib/Target/PowerPC/PPCTargetObjectFile.h b/contrib/llvm/lib/Target/PowerPC/PPCTargetObjectFile.h index 417b8ed0d612..78a5840c87c7 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCTargetObjectFile.h +++ b/contrib/llvm/lib/Target/PowerPC/PPCTargetObjectFile.h @@ -1,9 +1,8 @@ //===-- PPCTargetObjectFile.h - PPC Object Info -----------------*- C++ -*-===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// diff --git a/contrib/llvm/lib/Target/PowerPC/PPCTargetStreamer.h b/contrib/llvm/lib/Target/PowerPC/PPCTargetStreamer.h index 310fea9ef09f..e17361d997fd 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCTargetStreamer.h +++ b/contrib/llvm/lib/Target/PowerPC/PPCTargetStreamer.h @@ -1,9 +1,8 @@ //===- PPCTargetStreamer.h - PPC Target Streamer ----------------*- C++ -*-===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// diff --git a/contrib/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp b/contrib/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp index bc9bcab83a0a..ff3dfbfaca05 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp +++ b/contrib/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp @@ -1,17 +1,18 @@ //===-- PPCTargetTransformInfo.cpp - PPC specific TTI ---------------------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// #include "PPCTargetTransformInfo.h" +#include "llvm/Analysis/CodeMetrics.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/CodeGen/BasicTTIImpl.h" #include "llvm/CodeGen/CostTable.h" #include "llvm/CodeGen/TargetLowering.h" +#include "llvm/CodeGen/TargetSchedule.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" using namespace llvm; @@ -32,6 +33,13 @@ EnablePPCColdCC("ppc-enable-coldcc", cl::Hidden, cl::init(false), cl::desc("Enable using coldcc calling conv for cold " "internal functions")); +// The latency of mtctr is only justified if there are more than 4 +// comparisons that will be removed as a result. +static cl::opt<unsigned> +SmallCTRLoopThreshold("min-ctr-loop-threshold", cl::init(4), cl::Hidden, + cl::desc("Loops with a constant trip count smaller than " + "this value will not use the count register.")); + //===----------------------------------------------------------------------===// // // PPC cost model. @@ -205,6 +213,341 @@ unsigned PPCTTIImpl::getUserCost(const User *U, return BaseT::getUserCost(U, Operands); } +bool PPCTTIImpl::mightUseCTR(BasicBlock *BB, + TargetLibraryInfo *LibInfo) { + const PPCTargetMachine &TM = ST->getTargetMachine(); + + // Loop through the inline asm constraints and look for something that + // clobbers ctr. + auto asmClobbersCTR = [](InlineAsm *IA) { + InlineAsm::ConstraintInfoVector CIV = IA->ParseConstraints(); + for (unsigned i = 0, ie = CIV.size(); i < ie; ++i) { + InlineAsm::ConstraintInfo &C = CIV[i]; + if (C.Type != InlineAsm::isInput) + for (unsigned j = 0, je = C.Codes.size(); j < je; ++j) + if (StringRef(C.Codes[j]).equals_lower("{ctr}")) + return true; + } + return false; + }; + + // Determining the address of a TLS variable results in a function call in + // certain TLS models. + std::function<bool(const Value*)> memAddrUsesCTR = + [&memAddrUsesCTR, &TM](const Value *MemAddr) -> bool { + const auto *GV = dyn_cast<GlobalValue>(MemAddr); + if (!GV) { + // Recurse to check for constants that refer to TLS global variables. + if (const auto *CV = dyn_cast<Constant>(MemAddr)) + for (const auto &CO : CV->operands()) + if (memAddrUsesCTR(CO)) + return true; + + return false; + } + + if (!GV->isThreadLocal()) + return false; + TLSModel::Model Model = TM.getTLSModel(GV); + return Model == TLSModel::GeneralDynamic || + Model == TLSModel::LocalDynamic; + }; + + auto isLargeIntegerTy = [](bool Is32Bit, Type *Ty) { + if (IntegerType *ITy = dyn_cast<IntegerType>(Ty)) + return ITy->getBitWidth() > (Is32Bit ? 32U : 64U); + + return false; + }; + + for (BasicBlock::iterator J = BB->begin(), JE = BB->end(); + J != JE; ++J) { + if (CallInst *CI = dyn_cast<CallInst>(J)) { + // Inline ASM is okay, unless it clobbers the ctr register. + if (InlineAsm *IA = dyn_cast<InlineAsm>(CI->getCalledValue())) { + if (asmClobbersCTR(IA)) + return true; + continue; + } + + if (Function *F = CI->getCalledFunction()) { + // Most intrinsics don't become function calls, but some might. + // sin, cos, exp and log are always calls. + unsigned Opcode = 0; + if (F->getIntrinsicID() != Intrinsic::not_intrinsic) { + switch (F->getIntrinsicID()) { + default: continue; + // If we have a call to ppc_is_decremented_ctr_nonzero, or ppc_mtctr + // we're definitely using CTR. + case Intrinsic::set_loop_iterations: + case Intrinsic::loop_decrement: + return true; + +// VisualStudio defines setjmp as _setjmp +#if defined(_MSC_VER) && defined(setjmp) && \ + !defined(setjmp_undefined_for_msvc) +# pragma push_macro("setjmp") +# undef setjmp +# define setjmp_undefined_for_msvc +#endif + + case Intrinsic::setjmp: + +#if defined(_MSC_VER) && defined(setjmp_undefined_for_msvc) + // let's return it to _setjmp state +# pragma pop_macro("setjmp") +# undef setjmp_undefined_for_msvc +#endif + + case Intrinsic::longjmp: + + // Exclude eh_sjlj_setjmp; we don't need to exclude eh_sjlj_longjmp + // because, although it does clobber the counter register, the + // control can't then return to inside the loop unless there is also + // an eh_sjlj_setjmp. + case Intrinsic::eh_sjlj_setjmp: + + case Intrinsic::memcpy: + case Intrinsic::memmove: + case Intrinsic::memset: + case Intrinsic::powi: + case Intrinsic::log: + case Intrinsic::log2: + case Intrinsic::log10: + case Intrinsic::exp: + case Intrinsic::exp2: + case Intrinsic::pow: + case Intrinsic::sin: + case Intrinsic::cos: + return true; + case Intrinsic::copysign: + if (CI->getArgOperand(0)->getType()->getScalarType()-> + isPPC_FP128Ty()) + return true; + else + continue; // ISD::FCOPYSIGN is never a library call. + case Intrinsic::sqrt: Opcode = ISD::FSQRT; break; + case Intrinsic::floor: Opcode = ISD::FFLOOR; break; + case Intrinsic::ceil: Opcode = ISD::FCEIL; break; + case Intrinsic::trunc: Opcode = ISD::FTRUNC; break; + case Intrinsic::rint: Opcode = ISD::FRINT; break; + case Intrinsic::nearbyint: Opcode = ISD::FNEARBYINT; break; + case Intrinsic::round: Opcode = ISD::FROUND; break; + case Intrinsic::minnum: Opcode = ISD::FMINNUM; break; + case Intrinsic::maxnum: Opcode = ISD::FMAXNUM; break; + case Intrinsic::umul_with_overflow: Opcode = ISD::UMULO; break; + case Intrinsic::smul_with_overflow: Opcode = ISD::SMULO; break; + } + } + + // PowerPC does not use [US]DIVREM or other library calls for + // operations on regular types which are not otherwise library calls + // (i.e. soft float or atomics). If adapting for targets that do, + // additional care is required here. + + LibFunc Func; + if (!F->hasLocalLinkage() && F->hasName() && LibInfo && + LibInfo->getLibFunc(F->getName(), Func) && + LibInfo->hasOptimizedCodeGen(Func)) { + // Non-read-only functions are never treated as intrinsics. + if (!CI->onlyReadsMemory()) + return true; + + // Conversion happens only for FP calls. + if (!CI->getArgOperand(0)->getType()->isFloatingPointTy()) + return true; + + switch (Func) { + default: return true; + case LibFunc_copysign: + case LibFunc_copysignf: + continue; // ISD::FCOPYSIGN is never a library call. + case LibFunc_copysignl: + return true; + case LibFunc_fabs: + case LibFunc_fabsf: + case LibFunc_fabsl: + continue; // ISD::FABS is never a library call. + case LibFunc_sqrt: + case LibFunc_sqrtf: + case LibFunc_sqrtl: + Opcode = ISD::FSQRT; break; + case LibFunc_floor: + case LibFunc_floorf: + case LibFunc_floorl: + Opcode = ISD::FFLOOR; break; + case LibFunc_nearbyint: + case LibFunc_nearbyintf: + case LibFunc_nearbyintl: + Opcode = ISD::FNEARBYINT; break; + case LibFunc_ceil: + case LibFunc_ceilf: + case LibFunc_ceill: + Opcode = ISD::FCEIL; break; + case LibFunc_rint: + case LibFunc_rintf: + case LibFunc_rintl: + Opcode = ISD::FRINT; break; + case LibFunc_round: + case LibFunc_roundf: + case LibFunc_roundl: + Opcode = ISD::FROUND; break; + case LibFunc_trunc: + case LibFunc_truncf: + case LibFunc_truncl: + Opcode = ISD::FTRUNC; break; + case LibFunc_fmin: + case LibFunc_fminf: + case LibFunc_fminl: + Opcode = ISD::FMINNUM; break; + case LibFunc_fmax: + case LibFunc_fmaxf: + case LibFunc_fmaxl: + Opcode = ISD::FMAXNUM; break; + } + } + + if (Opcode) { + EVT EVTy = + TLI->getValueType(DL, CI->getArgOperand(0)->getType(), true); + + if (EVTy == MVT::Other) + return true; + + if (TLI->isOperationLegalOrCustom(Opcode, EVTy)) + continue; + else if (EVTy.isVector() && + TLI->isOperationLegalOrCustom(Opcode, EVTy.getScalarType())) + continue; + + return true; + } + } + + return true; + } else if (isa<BinaryOperator>(J) && + J->getType()->getScalarType()->isPPC_FP128Ty()) { + // Most operations on ppc_f128 values become calls. + return true; + } else if (isa<UIToFPInst>(J) || isa<SIToFPInst>(J) || + isa<FPToUIInst>(J) || isa<FPToSIInst>(J)) { + CastInst *CI = cast<CastInst>(J); + if (CI->getSrcTy()->getScalarType()->isPPC_FP128Ty() || + CI->getDestTy()->getScalarType()->isPPC_FP128Ty() || + isLargeIntegerTy(!TM.isPPC64(), CI->getSrcTy()->getScalarType()) || + isLargeIntegerTy(!TM.isPPC64(), CI->getDestTy()->getScalarType())) + return true; + } else if (isLargeIntegerTy(!TM.isPPC64(), + J->getType()->getScalarType()) && + (J->getOpcode() == Instruction::UDiv || + J->getOpcode() == Instruction::SDiv || + J->getOpcode() == Instruction::URem || + J->getOpcode() == Instruction::SRem)) { + return true; + } else if (!TM.isPPC64() && + isLargeIntegerTy(false, J->getType()->getScalarType()) && + (J->getOpcode() == Instruction::Shl || + J->getOpcode() == Instruction::AShr || + J->getOpcode() == Instruction::LShr)) { + // Only on PPC32, for 128-bit integers (specifically not 64-bit + // integers), these might be runtime calls. + return true; + } else if (isa<IndirectBrInst>(J) || isa<InvokeInst>(J)) { + // On PowerPC, indirect jumps use the counter register. + return true; + } else if (SwitchInst *SI = dyn_cast<SwitchInst>(J)) { + if (SI->getNumCases() + 1 >= (unsigned)TLI->getMinimumJumpTableEntries()) + return true; + } + + // FREM is always a call. + if (J->getOpcode() == Instruction::FRem) + return true; + + if (ST->useSoftFloat()) { + switch(J->getOpcode()) { + case Instruction::FAdd: + case Instruction::FSub: + case Instruction::FMul: + case Instruction::FDiv: + case Instruction::FPTrunc: + case Instruction::FPExt: + case Instruction::FPToUI: + case Instruction::FPToSI: + case Instruction::UIToFP: + case Instruction::SIToFP: + case Instruction::FCmp: + return true; + } + } + + for (Value *Operand : J->operands()) + if (memAddrUsesCTR(Operand)) + return true; + } + + return false; +} + +bool PPCTTIImpl::isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE, + AssumptionCache &AC, + TargetLibraryInfo *LibInfo, + HardwareLoopInfo &HWLoopInfo) { + const PPCTargetMachine &TM = ST->getTargetMachine(); + TargetSchedModel SchedModel; + SchedModel.init(ST); + + // Do not convert small short loops to CTR loop. + unsigned ConstTripCount = SE.getSmallConstantTripCount(L); + if (ConstTripCount && ConstTripCount < SmallCTRLoopThreshold) { + SmallPtrSet<const Value *, 32> EphValues; + CodeMetrics::collectEphemeralValues(L, &AC, EphValues); + CodeMetrics Metrics; + for (BasicBlock *BB : L->blocks()) + Metrics.analyzeBasicBlock(BB, *this, EphValues); + // 6 is an approximate latency for the mtctr instruction. + if (Metrics.NumInsts <= (6 * SchedModel.getIssueWidth())) + return false; + } + + // We don't want to spill/restore the counter register, and so we don't + // want to use the counter register if the loop contains calls. + for (Loop::block_iterator I = L->block_begin(), IE = L->block_end(); + I != IE; ++I) + if (mightUseCTR(*I, LibInfo)) + return false; + + SmallVector<BasicBlock*, 4> ExitingBlocks; + L->getExitingBlocks(ExitingBlocks); + + // If there is an exit edge known to be frequently taken, + // we should not transform this loop. + for (auto &BB : ExitingBlocks) { + Instruction *TI = BB->getTerminator(); + if (!TI) continue; + + if (BranchInst *BI = dyn_cast<BranchInst>(TI)) { + uint64_t TrueWeight = 0, FalseWeight = 0; + if (!BI->isConditional() || + !BI->extractProfMetadata(TrueWeight, FalseWeight)) + continue; + + // If the exit path is more frequent than the loop path, + // we return here without further analysis for this loop. + bool TrueIsExit = !L->contains(BI->getSuccessor(0)); + if (( TrueIsExit && FalseWeight < TrueWeight) || + (!TrueIsExit && FalseWeight > TrueWeight)) + return false; + } + } + + LLVMContext &C = L->getHeader()->getContext(); + HWLoopInfo.CountType = TM.isPPC64() ? + Type::getInt64Ty(C) : Type::getInt32Ty(C); + HWLoopInfo.LoopDecrement = ConstantInt::get(HWLoopInfo.CountType, 1); + return true; +} + void PPCTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP) { if (ST->getDarwinDirective() == PPC::DIR_A2) { @@ -239,17 +582,12 @@ bool PPCTTIImpl::enableAggressiveInterleaving(bool LoopHasReductions) { return LoopHasReductions; } -const PPCTTIImpl::TTI::MemCmpExpansionOptions * -PPCTTIImpl::enableMemCmpExpansion(bool IsZeroCmp) const { - static const auto Options = []() { - TTI::MemCmpExpansionOptions Options; - Options.LoadSizes.push_back(8); - Options.LoadSizes.push_back(4); - Options.LoadSizes.push_back(2); - Options.LoadSizes.push_back(1); - return Options; - }(); - return &Options; +PPCTTIImpl::TTI::MemCmpExpansionOptions +PPCTTIImpl::enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const { + TTI::MemCmpExpansionOptions Options; + Options.LoadSizes = {8, 4, 2, 1}; + Options.MaxNumLoads = TLI->getMaxExpandSizeMemcmp(OptSize); + return Options; } bool PPCTTIImpl::enableInterleavedAccessVectorization() { @@ -324,6 +662,33 @@ unsigned PPCTTIImpl::getMaxInterleaveFactor(unsigned VF) { return 2; } +// Adjust the cost of vector instructions on targets which there is overlap +// between the vector and scalar units, thereby reducing the overall throughput +// of vector code wrt. scalar code. +int PPCTTIImpl::vectorCostAdjustment(int Cost, unsigned Opcode, Type *Ty1, + Type *Ty2) { + if (!ST->vectorsUseTwoUnits() || !Ty1->isVectorTy()) + return Cost; + + std::pair<int, MVT> LT1 = TLI->getTypeLegalizationCost(DL, Ty1); + // If type legalization involves splitting the vector, we don't want to + // double the cost at every step - only the last step. + if (LT1.first != 1 || !LT1.second.isVector()) + return Cost; + + int ISD = TLI->InstructionOpcodeToISD(Opcode); + if (TLI->isOperationExpand(ISD, LT1.second)) + return Cost; + + if (Ty2) { + std::pair<int, MVT> LT2 = TLI->getTypeLegalizationCost(DL, Ty2); + if (LT2.first != 1 || !LT2.second.isVector()) + return Cost; + } + + return Cost * 2; +} + int PPCTTIImpl::getArithmeticInstrCost( unsigned Opcode, Type *Ty, TTI::OperandValueKind Op1Info, TTI::OperandValueKind Op2Info, TTI::OperandValueProperties Opd1PropInfo, @@ -331,8 +696,9 @@ int PPCTTIImpl::getArithmeticInstrCost( assert(TLI->InstructionOpcodeToISD(Opcode) && "Invalid opcode"); // Fallback to the default implementation. - return BaseT::getArithmeticInstrCost(Opcode, Ty, Op1Info, Op2Info, - Opd1PropInfo, Opd2PropInfo); + int Cost = BaseT::getArithmeticInstrCost(Opcode, Ty, Op1Info, Op2Info, + Opd1PropInfo, Opd2PropInfo); + return vectorCostAdjustment(Cost, Opcode, Ty, nullptr); } int PPCTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index, @@ -345,19 +711,22 @@ int PPCTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index, // instruction). We need one such shuffle instruction for each actual // register (this is not true for arbitrary shuffles, but is true for the // structured types of shuffles covered by TTI::ShuffleKind). - return LT.first; + return vectorCostAdjustment(LT.first, Instruction::ShuffleVector, Tp, + nullptr); } int PPCTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, const Instruction *I) { assert(TLI->InstructionOpcodeToISD(Opcode) && "Invalid opcode"); - return BaseT::getCastInstrCost(Opcode, Dst, Src); + int Cost = BaseT::getCastInstrCost(Opcode, Dst, Src); + return vectorCostAdjustment(Cost, Opcode, Dst, Src); } int PPCTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, const Instruction *I) { - return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, I); + int Cost = BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, I); + return vectorCostAdjustment(Cost, Opcode, ValTy, nullptr); } int PPCTTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) { @@ -366,18 +735,23 @@ int PPCTTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) { int ISD = TLI->InstructionOpcodeToISD(Opcode); assert(ISD && "Invalid opcode"); + int Cost = BaseT::getVectorInstrCost(Opcode, Val, Index); + Cost = vectorCostAdjustment(Cost, Opcode, Val, nullptr); + if (ST->hasVSX() && Val->getScalarType()->isDoubleTy()) { - // Double-precision scalars are already located in index #0. - if (Index == 0) + // Double-precision scalars are already located in index #0 (or #1 if LE). + if (ISD == ISD::EXTRACT_VECTOR_ELT && + Index == (ST->isLittleEndian() ? 1 : 0)) return 0; - return BaseT::getVectorInstrCost(Opcode, Val, Index); + return Cost; + } else if (ST->hasQPX() && Val->getScalarType()->isFloatingPointTy()) { // Floating point scalars are already located in index #0. if (Index == 0) return 0; - return BaseT::getVectorInstrCost(Opcode, Val, Index); + return Cost; } // Estimated cost of a load-hit-store delay. This was obtained @@ -394,9 +768,9 @@ int PPCTTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) { // these need to be estimated as very costly. if (ISD == ISD::EXTRACT_VECTOR_ELT || ISD == ISD::INSERT_VECTOR_ELT) - return LHSPenalty + BaseT::getVectorInstrCost(Opcode, Val, Index); + return LHSPenalty + Cost; - return BaseT::getVectorInstrCost(Opcode, Val, Index); + return Cost; } int PPCTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, @@ -407,6 +781,7 @@ int PPCTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, "Invalid Opcode"); int Cost = BaseT::getMemoryOpCost(Opcode, Src, Alignment, AddressSpace); + Cost = vectorCostAdjustment(Cost, Opcode, Src, nullptr); bool IsAltivecType = ST->hasAltivec() && (LT.second == MVT::v16i8 || LT.second == MVT::v8i16 || @@ -500,3 +875,25 @@ int PPCTTIImpl::getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, return Cost; } +bool PPCTTIImpl::canSaveCmp(Loop *L, BranchInst **BI, ScalarEvolution *SE, + LoopInfo *LI, DominatorTree *DT, + AssumptionCache *AC, TargetLibraryInfo *LibInfo) { + // Process nested loops first. + for (Loop::iterator I = L->begin(), E = L->end(); I != E; ++I) + if (canSaveCmp(*I, BI, SE, LI, DT, AC, LibInfo)) + return false; // Stop search. + + HardwareLoopInfo HWLoopInfo(L); + + if (!HWLoopInfo.canAnalyze(*LI)) + return false; + + if (!isHardwareLoopProfitable(L, *SE, *AC, LibInfo, HWLoopInfo)) + return false; + + if (!HWLoopInfo.isHardwareLoopCandidate(*SE, *LI, *DT)) + return false; + + *BI = HWLoopInfo.ExitBranch; + return true; +} diff --git a/contrib/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h b/contrib/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h index 9221a910288a..5d76ee418b69 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h +++ b/contrib/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h @@ -1,9 +1,8 @@ //===-- PPCTargetTransformInfo.h - PPC specific TTI -------------*- C++ -*-===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// /// \file @@ -17,7 +16,6 @@ #ifndef LLVM_LIB_TARGET_POWERPC_PPCTARGETTRANSFORMINFO_H #define LLVM_LIB_TARGET_POWERPC_PPCTARGETTRANSFORMINFO_H -#include "PPC.h" #include "PPCTargetMachine.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/CodeGen/BasicTTIImpl.h" @@ -35,6 +33,7 @@ class PPCTTIImpl : public BasicTTIImplBase<PPCTTIImpl> { const PPCSubtarget *getST() const { return ST; } const PPCTargetLowering *getTLI() const { return TLI; } + bool mightUseCTR(BasicBlock *BB, TargetLibraryInfo *LibInfo); public: explicit PPCTTIImpl(const PPCTargetMachine *TM, const Function &F) @@ -54,6 +53,13 @@ public: unsigned getUserCost(const User *U, ArrayRef<const Value *> Operands); TTI::PopcntSupportKind getPopcntSupport(unsigned TyWidth); + bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE, + AssumptionCache &AC, + TargetLibraryInfo *LibInfo, + HardwareLoopInfo &HWLoopInfo); + bool canSaveCmp(Loop *L, BranchInst **BI, ScalarEvolution *SE, LoopInfo *LI, + DominatorTree *DT, AssumptionCache *AC, + TargetLibraryInfo *LibInfo); void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP); @@ -63,14 +69,15 @@ public: /// @{ bool useColdCCForColdCall(Function &F); bool enableAggressiveInterleaving(bool LoopHasReductions); - const TTI::MemCmpExpansionOptions *enableMemCmpExpansion( - bool IsZeroCmp) const; + TTI::MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize, + bool IsZeroCmp) const; bool enableInterleavedAccessVectorization(); unsigned getNumberOfRegisters(bool Vector); unsigned getRegisterBitWidth(bool Vector) const; unsigned getCacheLineSize(); unsigned getPrefetchDistance(); unsigned getMaxInterleaveFactor(unsigned VF); + int vectorCostAdjustment(int Cost, unsigned Opcode, Type *Ty1, Type *Ty2); int getArithmeticInstrCost( unsigned Opcode, Type *Ty, TTI::OperandValueKind Opd1Info = TTI::OK_AnyValue, diff --git a/contrib/llvm/lib/Target/PowerPC/PPCVSXCopy.cpp b/contrib/llvm/lib/Target/PowerPC/PPCVSXCopy.cpp index 93fe3230ab81..719ed7b63878 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCVSXCopy.cpp +++ b/contrib/llvm/lib/Target/PowerPC/PPCVSXCopy.cpp @@ -1,9 +1,8 @@ //===-------------- PPCVSXCopy.cpp - VSX Copy Legalization ----------------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -37,10 +36,6 @@ using namespace llvm; #define DEBUG_TYPE "ppc-vsx-copy" -namespace llvm { - void initializePPCVSXCopyPass(PassRegistry&); -} - namespace { // PPCVSXCopy pass - For copies between VSX registers and non-VSX registers // (Altivec and scalar floating-point registers), we need to transform the diff --git a/contrib/llvm/lib/Target/PowerPC/PPCVSXFMAMutate.cpp b/contrib/llvm/lib/Target/PowerPC/PPCVSXFMAMutate.cpp index 6586f503a7b8..ce78239df0a8 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCVSXFMAMutate.cpp +++ b/contrib/llvm/lib/Target/PowerPC/PPCVSXFMAMutate.cpp @@ -1,9 +1,8 @@ //===--------------- PPCVSXFMAMutate.cpp - VSX FMA Mutation ---------------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // diff --git a/contrib/llvm/lib/Target/PowerPC/PPCVSXSwapRemoval.cpp b/contrib/llvm/lib/Target/PowerPC/PPCVSXSwapRemoval.cpp index 1be193e08c01..44175af7f9b6 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCVSXSwapRemoval.cpp +++ b/contrib/llvm/lib/Target/PowerPC/PPCVSXSwapRemoval.cpp @@ -1,9 +1,8 @@ //===----------- PPCVSXSwapRemoval.cpp - Remove VSX LE Swaps -------------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===---------------------------------------------------------------------===// // @@ -60,10 +59,6 @@ using namespace llvm; #define DEBUG_TYPE "ppc-vsx-swaps" -namespace llvm { - void initializePPCVSXSwapRemovalPass(PassRegistry&); -} - namespace { // A PPCVSXSwapEntry is created for each machine instruction that @@ -427,6 +422,7 @@ bool PPCVSXSwapRemoval::gatherVectorInstructions() { // of opcodes having a common attribute in TableGen. Should this // change, this is a prime candidate to use such a mechanism. case PPC::INLINEASM: + case PPC::INLINEASM_BR: case PPC::EXTRACT_SUBREG: case PPC::INSERT_SUBREG: case PPC::COPY_TO_REGCLASS: diff --git a/contrib/llvm/lib/Target/PowerPC/README_P9.txt b/contrib/llvm/lib/Target/PowerPC/README_P9.txt index d56f7cca7b21..c9984b7604bd 100644 --- a/contrib/llvm/lib/Target/PowerPC/README_P9.txt +++ b/contrib/llvm/lib/Target/PowerPC/README_P9.txt @@ -512,8 +512,8 @@ Fixed Point Facility: "lxsdx $XT, $src", IIC_LdStLFD, [(set f64:$XT, (load xoaddr:$src))]>; - . (set f64:$XT, (load ixaddr:$src)) - (set f64:$XT, (store ixaddr:$dst)) + . (set f64:$XT, (load iaddrX4:$src)) + (set f64:$XT, (store iaddrX4:$dst)) - Load/Store SP, with conversion from/to DP: lxssp stxssp . Similar to lxsspx/stxsspx: @@ -521,8 +521,8 @@ Fixed Point Facility: "lxsspx $XT, $src", IIC_LdStLFD, [(set f32:$XT, (load xoaddr:$src))]>; - . (set f32:$XT, (load ixaddr:$src)) - (set f32:$XT, (store ixaddr:$dst)) + . (set f32:$XT, (load iaddrX4:$src)) + (set f32:$XT, (store iaddrX4:$dst)) - Load as Integer Byte/Halfword & Zero Indexed: lxsibzx lxsihzx . Similar to lxsiwzx: diff --git a/contrib/llvm/lib/Target/PowerPC/TargetInfo/PowerPCTargetInfo.cpp b/contrib/llvm/lib/Target/PowerPC/TargetInfo/PowerPCTargetInfo.cpp index 979595264472..99b5dec74668 100644 --- a/contrib/llvm/lib/Target/PowerPC/TargetInfo/PowerPCTargetInfo.cpp +++ b/contrib/llvm/lib/Target/PowerPC/TargetInfo/PowerPCTargetInfo.cpp @@ -1,14 +1,12 @@ //===-- PowerPCTargetInfo.cpp - PowerPC Target Implementation -------------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// -#include "PPC.h" -#include "llvm/IR/Module.h" +#include "TargetInfo/PowerPCTargetInfo.h" #include "llvm/Support/TargetRegistry.h" using namespace llvm; diff --git a/contrib/llvm/lib/Target/PowerPC/TargetInfo/PowerPCTargetInfo.h b/contrib/llvm/lib/Target/PowerPC/TargetInfo/PowerPCTargetInfo.h new file mode 100644 index 000000000000..2d0afbfb1be0 --- /dev/null +++ b/contrib/llvm/lib/Target/PowerPC/TargetInfo/PowerPCTargetInfo.h @@ -0,0 +1,22 @@ +//===-- PowerPCTargetInfo.h - PowerPC Target Implementation -----*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_POWERPC_TARGETINFO_POWERPCTARGETINFO_H +#define LLVM_LIB_TARGET_POWERPC_TARGETINFO_POWERPCTARGETINFO_H + +namespace llvm { + +class Target; + +Target &getThePPC32Target(); +Target &getThePPC64Target(); +Target &getThePPC64LETarget(); + +} // namespace llvm + +#endif // LLVM_LIB_TARGET_POWERPC_TARGETINFO_POWERPCTARGETINFO_H |