diff options
Diffstat (limited to 'contrib/llvm/lib/Target/SystemZ')
75 files changed, 4325 insertions, 1123 deletions
diff --git a/contrib/llvm/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp b/contrib/llvm/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp index 91959b4151b3..a259ba3433d6 100644 --- a/contrib/llvm/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp +++ b/contrib/llvm/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp @@ -1,14 +1,14 @@ //===-- SystemZAsmParser.cpp - Parse SystemZ assembly instructions --------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// -#include "InstPrinter/SystemZInstPrinter.h" +#include "MCTargetDesc/SystemZInstPrinter.h" #include "MCTargetDesc/SystemZMCTargetDesc.h" +#include "TargetInfo/SystemZTargetInfo.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" @@ -651,7 +651,6 @@ static void printMCExpr(const MCExpr *E, raw_ostream &OS) { void SystemZOperand::print(raw_ostream &OS) const { switch (Kind) { - break; case KindToken: OS << "Token:" << getToken(); break; @@ -1181,8 +1180,10 @@ bool SystemZAsmParser::parseOperand(OperandVector &Operands, // features to be available during the operand check, or else we will fail to // find the custom parser, and then we will later get an InvalidOperand error // instead of a MissingFeature errror. - uint64_t AvailableFeatures = getAvailableFeatures(); - setAvailableFeatures(~(uint64_t)0); + FeatureBitset AvailableFeatures = getAvailableFeatures(); + FeatureBitset All; + All.set(); + setAvailableFeatures(All); OperandMatchResultTy ResTy = MatchOperandParserImpl(Operands, Mnemonic); setAvailableFeatures(AvailableFeatures); if (ResTy == MatchOperand_Success) @@ -1233,7 +1234,8 @@ bool SystemZAsmParser::parseOperand(OperandVector &Operands, return false; } -static std::string SystemZMnemonicSpellCheck(StringRef S, uint64_t FBS, +static std::string SystemZMnemonicSpellCheck(StringRef S, + const FeatureBitset &FBS, unsigned VariantID = 0); bool SystemZAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, @@ -1244,8 +1246,9 @@ bool SystemZAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, MCInst Inst; unsigned MatchResult; + FeatureBitset MissingFeatures; MatchResult = MatchInstructionImpl(Operands, Inst, ErrorInfo, - MatchingInlineAsm); + MissingFeatures, MatchingInlineAsm); switch (MatchResult) { case Match_Success: Inst.setLoc(IDLoc); @@ -1253,17 +1256,15 @@ bool SystemZAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, return false; case Match_MissingFeature: { - assert(ErrorInfo && "Unknown missing feature!"); + assert(MissingFeatures.any() && "Unknown missing feature!"); // Special case the error message for the very common case where only // a single subtarget feature is missing std::string Msg = "instruction requires:"; - uint64_t Mask = 1; - for (unsigned I = 0; I < sizeof(ErrorInfo) * 8 - 1; ++I) { - if (ErrorInfo & Mask) { + for (unsigned I = 0, E = MissingFeatures.size(); I != E; ++I) { + if (MissingFeatures[I]) { Msg += " "; - Msg += getSubtargetFeatureName(ErrorInfo & Mask); + Msg += getSubtargetFeatureName(I); } - Mask <<= 1; } return Error(IDLoc, Msg); } @@ -1282,7 +1283,7 @@ bool SystemZAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, } case Match_MnemonicFail: { - uint64_t FBS = ComputeAvailableFeatures(getSTI().getFeatureBits()); + FeatureBitset FBS = ComputeAvailableFeatures(getSTI().getFeatureBits()); std::string Suggestion = SystemZMnemonicSpellCheck( ((SystemZOperand &)*Operands[0]).getToken(), FBS); return Error(IDLoc, "invalid instruction" + Suggestion, diff --git a/contrib/llvm/lib/Target/SystemZ/Disassembler/SystemZDisassembler.cpp b/contrib/llvm/lib/Target/SystemZ/Disassembler/SystemZDisassembler.cpp index 8903b57ffd0b..70c26db33ced 100644 --- a/contrib/llvm/lib/Target/SystemZ/Disassembler/SystemZDisassembler.cpp +++ b/contrib/llvm/lib/Target/SystemZ/Disassembler/SystemZDisassembler.cpp @@ -1,14 +1,14 @@ //===-- SystemZDisassembler.cpp - Disassembler for SystemZ ------*- C++ -*-===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// #include "MCTargetDesc/SystemZMCTargetDesc.h" #include "SystemZ.h" +#include "TargetInfo/SystemZTargetInfo.h" #include "llvm/MC/MCDisassembler/MCDisassembler.h" #include "llvm/MC/MCFixedLenDisassembler.h" #include "llvm/MC/MCInst.h" diff --git a/contrib/llvm/lib/Target/SystemZ/InstPrinter/SystemZInstPrinter.cpp b/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZInstPrinter.cpp index 6cd12e13e220..91cb35dd72f2 100644 --- a/contrib/llvm/lib/Target/SystemZ/InstPrinter/SystemZInstPrinter.cpp +++ b/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZInstPrinter.cpp @@ -1,9 +1,8 @@ //===- SystemZInstPrinter.cpp - Convert SystemZ MCInst to assembly syntax -===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// diff --git a/contrib/llvm/lib/Target/SystemZ/InstPrinter/SystemZInstPrinter.h b/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZInstPrinter.h index d65c661545eb..4235d4e21792 100644 --- a/contrib/llvm/lib/Target/SystemZ/InstPrinter/SystemZInstPrinter.h +++ b/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZInstPrinter.h @@ -1,9 +1,8 @@ //==- SystemZInstPrinter.h - Convert SystemZ MCInst to assembly --*- C++ -*-==// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -11,8 +10,8 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_LIB_TARGET_SYSTEMZ_INSTPRINTER_SYSTEMZINSTPRINTER_H -#define LLVM_LIB_TARGET_SYSTEMZ_INSTPRINTER_SYSTEMZINSTPRINTER_H +#ifndef LLVM_LIB_TARGET_SYSTEMZ_MCTARGETDESC_SYSTEMZINSTPRINTER_H +#define LLVM_LIB_TARGET_SYSTEMZ_MCTARGETDESC_SYSTEMZINSTPRINTER_H #include "llvm/MC/MCInstPrinter.h" #include <cstdint> @@ -75,4 +74,4 @@ private: } // end namespace llvm -#endif // LLVM_LIB_TARGET_SYSTEMZ_INSTPRINTER_SYSTEMZINSTPRINTER_H +#endif // LLVM_LIB_TARGET_SYSTEMZ_MCTARGETDESC_SYSTEMZINSTPRINTER_H diff --git a/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmBackend.cpp b/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmBackend.cpp index 2146832f7794..23d8585095cc 100644 --- a/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmBackend.cpp +++ b/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmBackend.cpp @@ -1,9 +1,8 @@ //===-- SystemZMCAsmBackend.cpp - SystemZ assembler backend ---------------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// diff --git a/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmInfo.cpp b/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmInfo.cpp index 6e00981939b6..d6cdacfcab92 100644 --- a/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmInfo.cpp +++ b/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmInfo.cpp @@ -1,9 +1,8 @@ //===-- SystemZMCAsmInfo.cpp - SystemZ asm properties ---------------------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// diff --git a/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmInfo.h b/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmInfo.h index 800f89232063..b8818a65f9e3 100644 --- a/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmInfo.h +++ b/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmInfo.h @@ -1,9 +1,8 @@ //====-- SystemZMCAsmInfo.h - SystemZ asm properties -----------*- C++ -*--===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// diff --git a/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCCodeEmitter.cpp b/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCCodeEmitter.cpp index d188f56512ab..a5ccf4f68ffd 100644 --- a/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCCodeEmitter.cpp +++ b/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCCodeEmitter.cpp @@ -1,9 +1,8 @@ //===-- SystemZMCCodeEmitter.cpp - Convert SystemZ code to machine code ---===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -144,9 +143,10 @@ private: } private: - uint64_t computeAvailableFeatures(const FeatureBitset &FB) const; - void verifyInstructionPredicates(const MCInst &MI, - uint64_t AvailableFeatures) const; + FeatureBitset computeAvailableFeatures(const FeatureBitset &FB) const; + void + verifyInstructionPredicates(const MCInst &MI, + const FeatureBitset &AvailableFeatures) const; }; } // end anonymous namespace diff --git a/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCFixups.h b/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCFixups.h index c012accc14dd..14f6198183b9 100644 --- a/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCFixups.h +++ b/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCFixups.h @@ -1,9 +1,8 @@ //===-- SystemZMCFixups.h - SystemZ-specific fixup entries ------*- C++ -*-===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// diff --git a/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCObjectWriter.cpp b/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCObjectWriter.cpp index 888be519fb16..8d8ba5644e10 100644 --- a/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCObjectWriter.cpp +++ b/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCObjectWriter.cpp @@ -1,9 +1,8 @@ //===-- SystemZMCObjectWriter.cpp - SystemZ ELF writer --------------------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// @@ -37,8 +36,8 @@ protected: } // end anonymous namespace SystemZObjectWriter::SystemZObjectWriter(uint8_t OSABI) - : MCELFObjectTargetWriter(/*Is64Bit=*/true, OSABI, ELF::EM_S390, - /*HasRelocationAddend=*/ true) {} + : MCELFObjectTargetWriter(/*Is64Bit_=*/true, OSABI, ELF::EM_S390, + /*HasRelocationAddend_=*/ true) {} // Return the relocation type for an absolute value of MCFixupKind Kind. static unsigned getAbsoluteReloc(unsigned Kind) { diff --git a/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp b/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp index 05688ed8efbb..3c0300cfd8f0 100644 --- a/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp +++ b/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp @@ -1,15 +1,16 @@ //===-- SystemZMCTargetDesc.cpp - SystemZ target descriptions -------------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// #include "SystemZMCTargetDesc.h" -#include "InstPrinter/SystemZInstPrinter.h" +#include "SystemZInstPrinter.h" #include "SystemZMCAsmInfo.h" +#include "TargetInfo/SystemZTargetInfo.h" +#include "llvm/MC/MCDwarf.h" #include "llvm/MC/MCInstrInfo.h" #include "llvm/MC/MCRegisterInfo.h" #include "llvm/MC/MCStreamer.h" diff --git a/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.h b/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.h index 1617a807e65a..8f720c5abb34 100644 --- a/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.h +++ b/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.h @@ -1,9 +1,8 @@ //===-- SystemZMCTargetDesc.h - SystemZ target descriptions -----*- C++ -*-===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// @@ -30,8 +29,6 @@ class Triple; class raw_pwrite_stream; class raw_ostream; -Target &getTheSystemZTarget(); - namespace SystemZMC { // How many bytes are in the ABI-defined, caller-allocated part of // a stack frame. diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZ.h b/contrib/llvm/lib/Target/SystemZ/SystemZ.h index fdbde3d8dbc3..2b0f90182d7f 100644 --- a/contrib/llvm/lib/Target/SystemZ/SystemZ.h +++ b/contrib/llvm/lib/Target/SystemZ/SystemZ.h @@ -1,9 +1,8 @@ //==- SystemZ.h - Top-Level Interface for SystemZ representation -*- C++ -*-==// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -195,6 +194,7 @@ FunctionPass *createSystemZExpandPseudoPass(SystemZTargetMachine &TM); FunctionPass *createSystemZShortenInstPass(SystemZTargetMachine &TM); FunctionPass *createSystemZLongBranchPass(SystemZTargetMachine &TM); FunctionPass *createSystemZLDCleanupPass(SystemZTargetMachine &TM); +FunctionPass *createSystemZPostRewritePass(SystemZTargetMachine &TM); FunctionPass *createSystemZTDCPass(); } // end namespace llvm diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZ.td b/contrib/llvm/lib/Target/SystemZ/SystemZ.td index 3800f7a26b79..ebbc6ffd2f1e 100644 --- a/contrib/llvm/lib/Target/SystemZ/SystemZ.td +++ b/contrib/llvm/lib/Target/SystemZ/SystemZ.td @@ -1,9 +1,8 @@ //===-- SystemZ.td - Describe the SystemZ target machine -----*- tblgen -*-===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZAsmPrinter.cpp b/contrib/llvm/lib/Target/SystemZ/SystemZAsmPrinter.cpp index e2de721be568..ef378e4ade7a 100644 --- a/contrib/llvm/lib/Target/SystemZ/SystemZAsmPrinter.cpp +++ b/contrib/llvm/lib/Target/SystemZ/SystemZAsmPrinter.cpp @@ -1,9 +1,8 @@ //===-- SystemZAsmPrinter.cpp - SystemZ LLVM assembly printer -------------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -13,9 +12,10 @@ //===----------------------------------------------------------------------===// #include "SystemZAsmPrinter.h" -#include "InstPrinter/SystemZInstPrinter.h" +#include "MCTargetDesc/SystemZInstPrinter.h" #include "SystemZConstantPoolValue.h" #include "SystemZMCInstLower.h" +#include "TargetInfo/SystemZTargetInfo.h" #include "llvm/CodeGen/MachineModuleInfoImpls.h" #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" #include "llvm/IR/Mangler.h" @@ -80,6 +80,27 @@ static const MCSymbolRefExpr *getGlobalOffsetTable(MCContext &Context) { Context); } +// MI is an instruction that accepts an optional alignment hint, +// and which was already lowered to LoweredMI. If the alignment +// of the original memory operand is known, update LoweredMI to +// an instruction with the corresponding hint set. +static void lowerAlignmentHint(const MachineInstr *MI, MCInst &LoweredMI, + unsigned Opcode) { + if (!MI->hasOneMemOperand()) + return; + const MachineMemOperand *MMO = *MI->memoperands_begin(); + unsigned AlignmentHint = 0; + if (MMO->getAlignment() >= 16) + AlignmentHint = 4; + else if (MMO->getAlignment() >= 8) + AlignmentHint = 3; + if (AlignmentHint == 0) + return; + + LoweredMI.setOpcode(Opcode); + LoweredMI.addOperand(MCOperand::createImm(AlignmentHint)); +} + // MI loads the high part of a vector from memory. Return an instruction // that uses replicating vector load Opcode to do the same thing. static MCInst lowerSubvectorLoad(const MachineInstr *MI, unsigned Opcode) { @@ -351,6 +372,26 @@ void SystemZAsmPrinter::EmitInstruction(const MachineInstr *MI) { .addReg(SystemZMC::getRegAsVR128(MI->getOperand(1).getReg())); break; + case SystemZ::VL: + Lower.lower(MI, LoweredMI); + lowerAlignmentHint(MI, LoweredMI, SystemZ::VLAlign); + break; + + case SystemZ::VST: + Lower.lower(MI, LoweredMI); + lowerAlignmentHint(MI, LoweredMI, SystemZ::VSTAlign); + break; + + case SystemZ::VLM: + Lower.lower(MI, LoweredMI); + lowerAlignmentHint(MI, LoweredMI, SystemZ::VLMAlign); + break; + + case SystemZ::VSTM: + Lower.lower(MI, LoweredMI); + lowerAlignmentHint(MI, LoweredMI, SystemZ::VSTMAlign); + break; + case SystemZ::VL32: LoweredMI = lowerSubvectorLoad(MI, SystemZ::VLREPF); break; @@ -618,26 +659,19 @@ EmitMachineConstantPoolValue(MachineConstantPoolValue *MCPV) { OutStreamer->EmitValue(Expr, Size); } -bool SystemZAsmPrinter::PrintAsmOperand(const MachineInstr *MI, - unsigned OpNo, - unsigned AsmVariant, +bool SystemZAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, const char *ExtraCode, raw_ostream &OS) { - if (ExtraCode && *ExtraCode == 'n') { - if (!MI->getOperand(OpNo).isImm()) - return true; - OS << -int64_t(MI->getOperand(OpNo).getImm()); - } else { - SystemZMCInstLower Lower(MF->getContext(), *this); - MCOperand MO(Lower.lowerOperand(MI->getOperand(OpNo))); - SystemZInstPrinter::printOperand(MO, MAI, OS); - } + if (ExtraCode) + return AsmPrinter::PrintAsmOperand(MI, OpNo, ExtraCode, OS); + SystemZMCInstLower Lower(MF->getContext(), *this); + MCOperand MO(Lower.lowerOperand(MI->getOperand(OpNo))); + SystemZInstPrinter::printOperand(MO, MAI, OS); return false; } bool SystemZAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo, - unsigned AsmVariant, const char *ExtraCode, raw_ostream &OS) { SystemZInstPrinter::printAddress(MI->getOperand(OpNo).getReg(), diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZAsmPrinter.h b/contrib/llvm/lib/Target/SystemZ/SystemZAsmPrinter.h index cb88ec32f83a..aa5d3ca78e61 100644 --- a/contrib/llvm/lib/Target/SystemZ/SystemZAsmPrinter.h +++ b/contrib/llvm/lib/Target/SystemZ/SystemZAsmPrinter.h @@ -1,9 +1,8 @@ //===-- SystemZAsmPrinter.h - SystemZ LLVM assembly printer ----*- C++ -*--===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// @@ -37,11 +36,9 @@ public: void EmitMachineConstantPoolValue(MachineConstantPoolValue *MCPV) override; void EmitEndOfAsmFile(Module &M) override; bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, - unsigned AsmVariant, const char *ExtraCode, - raw_ostream &OS) override; + const char *ExtraCode, raw_ostream &OS) override; bool PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo, - unsigned AsmVariant, const char *ExtraCode, - raw_ostream &OS) override; + const char *ExtraCode, raw_ostream &OS) override; bool doInitialization(Module &M) override { SM.reset(); diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZCallingConv.cpp b/contrib/llvm/lib/Target/SystemZ/SystemZCallingConv.cpp index 72da51f74b10..91c7fae17a75 100644 --- a/contrib/llvm/lib/Target/SystemZ/SystemZCallingConv.cpp +++ b/contrib/llvm/lib/Target/SystemZ/SystemZCallingConv.cpp @@ -1,9 +1,8 @@ //===-- SystemZCallingConv.cpp - Calling conventions for SystemZ ----------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZCallingConv.h b/contrib/llvm/lib/Target/SystemZ/SystemZCallingConv.h index b5523e586f4c..82f29b6361f1 100644 --- a/contrib/llvm/lib/Target/SystemZ/SystemZCallingConv.h +++ b/contrib/llvm/lib/Target/SystemZ/SystemZCallingConv.h @@ -1,9 +1,8 @@ //===-- SystemZCallingConv.h - Calling conventions for SystemZ --*- C++ -*-===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZCallingConv.td b/contrib/llvm/lib/Target/SystemZ/SystemZCallingConv.td index deba27fee7fe..bbd51546ac9f 100644 --- a/contrib/llvm/lib/Target/SystemZ/SystemZCallingConv.td +++ b/contrib/llvm/lib/Target/SystemZ/SystemZCallingConv.td @@ -1,9 +1,8 @@ //=- SystemZCallingConv.td - Calling conventions for SystemZ -*- tablegen -*-=// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // This describes the calling conventions for the SystemZ ABI. diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZConstantPoolValue.cpp b/contrib/llvm/lib/Target/SystemZ/SystemZConstantPoolValue.cpp index 4a6beb67f182..ffeee4da95cc 100644 --- a/contrib/llvm/lib/Target/SystemZ/SystemZConstantPoolValue.cpp +++ b/contrib/llvm/lib/Target/SystemZ/SystemZConstantPoolValue.cpp @@ -1,9 +1,8 @@ //===-- SystemZConstantPoolValue.cpp - SystemZ constant-pool value --------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZConstantPoolValue.h b/contrib/llvm/lib/Target/SystemZ/SystemZConstantPoolValue.h index a71b595560d2..6cb7710abdfe 100644 --- a/contrib/llvm/lib/Target/SystemZ/SystemZConstantPoolValue.h +++ b/contrib/llvm/lib/Target/SystemZ/SystemZConstantPoolValue.h @@ -1,9 +1,8 @@ //===- SystemZConstantPoolValue.h - SystemZ constant-pool value -*- C++ -*-===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZElimCompare.cpp b/contrib/llvm/lib/Target/SystemZ/SystemZElimCompare.cpp index 668a77ac014f..9cbf6b320504 100644 --- a/contrib/llvm/lib/Target/SystemZ/SystemZElimCompare.cpp +++ b/contrib/llvm/lib/Target/SystemZ/SystemZElimCompare.cpp @@ -1,9 +1,8 @@ //===-- SystemZElimCompare.cpp - Eliminate comparison instructions --------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -147,6 +146,9 @@ static bool resultTests(MachineInstr &MI, unsigned Reg) { // Describe the references to Reg or any of its aliases in MI. Reference SystemZElimCompare::getRegReferences(MachineInstr &MI, unsigned Reg) { Reference Ref; + if (MI.isDebugInstr()) + return Ref; + for (unsigned I = 0, E = MI.getNumOperands(); I != E; ++I) { const MachineOperand &MO = MI.getOperand(I); if (MO.isReg()) { @@ -523,9 +525,9 @@ bool SystemZElimCompare::fuseCompareOperations( // SrcReg2 is the register if the source operand is a register, // 0 if the source operand is immediate, and the base register // if the source operand is memory (index is not supported). - unsigned SrcReg = Compare.getOperand(0).getReg(); - unsigned SrcReg2 = - Compare.getOperand(1).isReg() ? Compare.getOperand(1).getReg() : 0; + Register SrcReg = Compare.getOperand(0).getReg(); + Register SrcReg2 = + Compare.getOperand(1).isReg() ? Compare.getOperand(1).getReg() : Register(); MachineBasicBlock::iterator MBBI = Compare, MBBE = Branch; for (++MBBI; MBBI != MBBE; ++MBBI) if (MBBI->modifiesRegister(SrcReg, TRI) || diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZExpandPseudo.cpp b/contrib/llvm/lib/Target/SystemZ/SystemZExpandPseudo.cpp index 67c80899d491..09708fb4241c 100644 --- a/contrib/llvm/lib/Target/SystemZ/SystemZExpandPseudo.cpp +++ b/contrib/llvm/lib/Target/SystemZ/SystemZExpandPseudo.cpp @@ -1,9 +1,8 @@ //==-- SystemZExpandPseudo.cpp - Expand pseudo instructions -------*- C++ -*-=// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZFeatures.td b/contrib/llvm/lib/Target/SystemZ/SystemZFeatures.td index beff45dba81d..dae795e845b0 100644 --- a/contrib/llvm/lib/Target/SystemZ/SystemZFeatures.td +++ b/contrib/llvm/lib/Target/SystemZ/SystemZFeatures.td @@ -1,9 +1,8 @@ //===-- SystemZ.td - SystemZ processors and features ---------*- tblgen -*-===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -242,6 +241,51 @@ def Arch12NewFeatures : SystemZFeatureList<[ //===----------------------------------------------------------------------===// // +// New features added in the Thirteenth Edition of the z/Architecture +// +//===----------------------------------------------------------------------===// + +def FeatureMiscellaneousExtensions3 : SystemZFeature< + "miscellaneous-extensions-3", "MiscellaneousExtensions3", + "Assume that the miscellaneous-extensions facility 3 is installed" +>; + +def FeatureMessageSecurityAssist9 : SystemZFeature< + "message-security-assist-extension9", "MessageSecurityAssist9", + "Assume that the message-security-assist extension facility 9 is installed" +>; + +def FeatureVectorEnhancements2 : SystemZFeature< + "vector-enhancements-2", "VectorEnhancements2", + "Assume that the vector enhancements facility 2 is installed" +>; + +def FeatureVectorPackedDecimalEnhancement : SystemZFeature< + "vector-packed-decimal-enhancement", "VectorPackedDecimalEnhancement", + "Assume that the vector packed decimal enhancement facility is installed" +>; + +def FeatureEnhancedSort : SystemZFeature< + "enhanced-sort", "EnhancedSort", + "Assume that the enhanced-sort facility is installed" +>; + +def FeatureDeflateConversion : SystemZFeature< + "deflate-conversion", "DeflateConversion", + "Assume that the deflate-conversion facility is installed" +>; + +def Arch13NewFeatures : SystemZFeatureList<[ + FeatureMiscellaneousExtensions3, + FeatureMessageSecurityAssist9, + FeatureVectorEnhancements2, + FeatureVectorPackedDecimalEnhancement, + FeatureEnhancedSort, + FeatureDeflateConversion +]>; + +//===----------------------------------------------------------------------===// +// // Cumulative supported and unsupported feature sets // //===----------------------------------------------------------------------===// @@ -256,9 +300,13 @@ def Arch11SupportedFeatures : SystemZFeatureAdd<Arch10SupportedFeatures.List, Arch11NewFeatures.List>; def Arch12SupportedFeatures : SystemZFeatureAdd<Arch11SupportedFeatures.List, Arch12NewFeatures.List>; +def Arch13SupportedFeatures + : SystemZFeatureAdd<Arch12SupportedFeatures.List, Arch13NewFeatures.List>; -def Arch12UnsupportedFeatures +def Arch13UnsupportedFeatures : SystemZFeatureList<[]>; +def Arch12UnsupportedFeatures + : SystemZFeatureAdd<Arch13UnsupportedFeatures.List, Arch13NewFeatures.List>; def Arch11UnsupportedFeatures : SystemZFeatureAdd<Arch12UnsupportedFeatures.List, Arch12NewFeatures.List>; def Arch10UnsupportedFeatures diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp b/contrib/llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp index 565299c90139..da28faebb326 100644 --- a/contrib/llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp +++ b/contrib/llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp @@ -1,9 +1,8 @@ //===-- SystemZFrameLowering.cpp - Frame lowering for SystemZ -------------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZFrameLowering.h b/contrib/llvm/lib/Target/SystemZ/SystemZFrameLowering.h index 08c84c785cc0..71ef3e4dc240 100644 --- a/contrib/llvm/lib/Target/SystemZ/SystemZFrameLowering.h +++ b/contrib/llvm/lib/Target/SystemZ/SystemZFrameLowering.h @@ -1,9 +1,8 @@ //===-- SystemZFrameLowering.h - Frame lowering for SystemZ -----*- C++ -*-===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZHazardRecognizer.cpp b/contrib/llvm/lib/Target/SystemZ/SystemZHazardRecognizer.cpp index 8726b56bc94f..e2af02227999 100644 --- a/contrib/llvm/lib/Target/SystemZ/SystemZHazardRecognizer.cpp +++ b/contrib/llvm/lib/Target/SystemZ/SystemZHazardRecognizer.cpp @@ -1,9 +1,8 @@ //=-- SystemZHazardRecognizer.h - SystemZ Hazard Recognizer -----*- C++ -*-===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZHazardRecognizer.h b/contrib/llvm/lib/Target/SystemZ/SystemZHazardRecognizer.h index 6292feefbfea..38bf41ebe96a 100644 --- a/contrib/llvm/lib/Target/SystemZ/SystemZHazardRecognizer.h +++ b/contrib/llvm/lib/Target/SystemZ/SystemZHazardRecognizer.h @@ -1,9 +1,8 @@ //=-- SystemZHazardRecognizer.h - SystemZ Hazard Recognizer -----*- C++ -*-===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp b/contrib/llvm/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp index 5bc2ab0ef2d8..9dc4512255cc 100644 --- a/contrib/llvm/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp +++ b/contrib/llvm/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp @@ -1,9 +1,8 @@ //===-- SystemZISelDAGToDAG.cpp - A dag to dag inst selector for SystemZ --===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -12,6 +11,7 @@ //===----------------------------------------------------------------------===// #include "SystemZTargetMachine.h" +#include "SystemZISelLowering.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/CodeGen/SelectionDAGISel.h" #include "llvm/Support/Debug.h" @@ -304,6 +304,9 @@ class SystemZDAGToDAGISel : public SelectionDAGISel { void splitLargeImmediate(unsigned Opcode, SDNode *Node, SDValue Op0, uint64_t UpperVal, uint64_t LowerVal); + void loadVectorConstant(const SystemZVectorConstantInfo &VCI, + SDNode *Node); + // Try to use gather instruction Opcode to implement vector insertion N. bool tryGather(SDNode *N, unsigned Opcode); @@ -1132,6 +1135,35 @@ void SystemZDAGToDAGISel::splitLargeImmediate(unsigned Opcode, SDNode *Node, SelectCode(Or.getNode()); } +void SystemZDAGToDAGISel::loadVectorConstant( + const SystemZVectorConstantInfo &VCI, SDNode *Node) { + assert((VCI.Opcode == SystemZISD::BYTE_MASK || + VCI.Opcode == SystemZISD::REPLICATE || + VCI.Opcode == SystemZISD::ROTATE_MASK) && + "Bad opcode!"); + assert(VCI.VecVT.getSizeInBits() == 128 && "Expected a vector type"); + EVT VT = Node->getValueType(0); + SDLoc DL(Node); + SmallVector<SDValue, 2> Ops; + for (unsigned OpVal : VCI.OpVals) + Ops.push_back(CurDAG->getConstant(OpVal, DL, MVT::i32)); + SDValue Op = CurDAG->getNode(VCI.Opcode, DL, VCI.VecVT, Ops); + + if (VCI.VecVT == VT.getSimpleVT()) + ReplaceNode(Node, Op.getNode()); + else if (VT.getSizeInBits() == 128) { + SDValue BitCast = CurDAG->getNode(ISD::BITCAST, DL, VT, Op); + ReplaceNode(Node, BitCast.getNode()); + SelectCode(BitCast.getNode()); + } else { // float or double + unsigned SubRegIdx = + (VT.getSizeInBits() == 32 ? SystemZ::subreg_h32 : SystemZ::subreg_h64); + ReplaceNode( + Node, CurDAG->getTargetExtractSubreg(SubRegIdx, DL, VT, Op).getNode()); + } + SelectCode(Op.getNode()); +} + bool SystemZDAGToDAGISel::tryGather(SDNode *N, unsigned Opcode) { SDValue ElemV = N->getOperand(2); auto *ElemN = dyn_cast<ConstantSDNode>(ElemV); @@ -1243,6 +1275,9 @@ static bool isFusableLoadOpStorePattern(StoreSDNode *StoreNode, InputChain = LoadNode->getChain(); } else if (Chain.getOpcode() == ISD::TokenFactor) { SmallVector<SDValue, 4> ChainOps; + SmallVector<const SDNode *, 4> LoopWorklist; + SmallPtrSet<const SDNode *, 16> Visited; + const unsigned int Max = 1024; for (unsigned i = 0, e = Chain.getNumOperands(); i != e; ++i) { SDValue Op = Chain.getOperand(i); if (Op == Load.getValue(1)) { @@ -1251,28 +1286,26 @@ static bool isFusableLoadOpStorePattern(StoreSDNode *StoreNode, ChainOps.push_back(Load.getOperand(0)); continue; } - - // Make sure using Op as part of the chain would not cause a cycle here. - // In theory, we could check whether the chain node is a predecessor of - // the load. But that can be very expensive. Instead visit the uses and - // make sure they all have smaller node id than the load. - int LoadId = LoadNode->getNodeId(); - for (SDNode::use_iterator UI = Op.getNode()->use_begin(), - UE = UI->use_end(); UI != UE; ++UI) { - if (UI.getUse().getResNo() != 0) - continue; - if (UI->getNodeId() > LoadId) - return false; - } - + LoopWorklist.push_back(Op.getNode()); ChainOps.push_back(Op); } - if (ChainCheck) + if (ChainCheck) { + // Add the other operand of StoredVal to worklist. + for (SDValue Op : StoredVal->ops()) + if (Op.getNode() != LoadNode) + LoopWorklist.push_back(Op.getNode()); + + // Check if Load is reachable from any of the nodes in the worklist. + if (SDNode::hasPredecessorHelper(Load.getNode(), Visited, LoopWorklist, Max, + true)) + return false; + // Make a new TokenFactor with all the other input chains except // for the load. InputChain = CurDAG->getNode(ISD::TokenFactor, SDLoc(Chain), MVT::Other, ChainOps); + } } if (!ChainCheck) return false; @@ -1447,6 +1480,23 @@ void SystemZDAGToDAGISel::Select(SDNode *Node) { Node->getOperand(0).getOpcode() != ISD::Constant) if (auto *Op1 = dyn_cast<ConstantSDNode>(Node->getOperand(1))) { uint64_t Val = Op1->getZExtValue(); + // Don't split the operation if we can match one of the combined + // logical operations provided by miscellaneous-extensions-3. + if (Subtarget->hasMiscellaneousExtensions3()) { + unsigned ChildOpcode = Node->getOperand(0).getOpcode(); + // Check whether this expression matches NAND/NOR/NXOR. + if (Val == (uint64_t)-1 && Opcode == ISD::XOR) + if (ChildOpcode == ISD::AND || ChildOpcode == ISD::OR || + ChildOpcode == ISD::XOR) + break; + // Check whether this expression matches OR-with-complement. + if (Opcode == ISD::OR && ChildOpcode == ISD::XOR) { + auto Op0 = Node->getOperand(0); + if (auto *Op0Op1 = dyn_cast<ConstantSDNode>(Op0->getOperand(1))) + if (Op0Op1->getZExtValue() == (uint64_t)-1) + break; + } + } if (!SystemZ::isImmLF(Val) && !SystemZ::isImmHF(Val)) { splitLargeImmediate(Opcode, Node, Node->getOperand(0), Val - uint32_t(Val), uint32_t(Val)); @@ -1527,6 +1577,27 @@ void SystemZDAGToDAGISel::Select(SDNode *Node) { break; } + case ISD::BUILD_VECTOR: { + auto *BVN = cast<BuildVectorSDNode>(Node); + SystemZVectorConstantInfo VCI(BVN); + if (VCI.isVectorConstantLegal(*Subtarget)) { + loadVectorConstant(VCI, Node); + return; + } + break; + } + + case ISD::ConstantFP: { + APFloat Imm = cast<ConstantFPSDNode>(Node)->getValueAPF(); + if (Imm.isZero() || Imm.isNegZero()) + break; + SystemZVectorConstantInfo VCI(Imm); + bool Success = VCI.isVectorConstantLegal(*Subtarget); (void)Success; + assert(Success && "Expected legal FP immediate"); + loadVectorConstant(VCI, Node); + return; + } + case ISD::STORE: { if (tryFoldLoadStoreIntoMemOperand(Node)) return; diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp b/contrib/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp index 607e55bf71c8..78820f511ab4 100644 --- a/contrib/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp +++ b/contrib/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp @@ -1,9 +1,8 @@ //===-- SystemZISelLowering.cpp - SystemZ DAG lowering implementation -----===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -250,8 +249,15 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM, // We have native support for a 64-bit CTLZ, via FLOGR. setOperationAction(ISD::CTLZ, MVT::i32, Promote); + setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32, Promote); setOperationAction(ISD::CTLZ, MVT::i64, Legal); + // On arch13 we have native support for a 64-bit CTPOP. + if (Subtarget.hasMiscellaneousExtensions3()) { + setOperationAction(ISD::CTPOP, MVT::i32, Promote); + setOperationAction(ISD::CTPOP, MVT::i64, Legal); + } + // Give LowerOperation the chance to replace 64-bit ORs with subregs. setOperationAction(ISD::OR, MVT::i64, Custom); @@ -377,6 +383,17 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM, setOperationAction(ISD::UINT_TO_FP, MVT::v2f64, Legal); } + if (Subtarget.hasVectorEnhancements2()) { + setOperationAction(ISD::FP_TO_SINT, MVT::v4i32, Legal); + setOperationAction(ISD::FP_TO_SINT, MVT::v4f32, Legal); + setOperationAction(ISD::FP_TO_UINT, MVT::v4i32, Legal); + setOperationAction(ISD::FP_TO_UINT, MVT::v4f32, Legal); + setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Legal); + setOperationAction(ISD::SINT_TO_FP, MVT::v4f32, Legal); + setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, Legal); + setOperationAction(ISD::UINT_TO_FP, MVT::v4f32, Legal); + } + // Handle floating-point types. for (unsigned I = MVT::FIRST_FP_VALUETYPE; I <= MVT::LAST_FP_VALUETYPE; @@ -401,6 +418,24 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM, setOperationAction(ISD::FSINCOS, VT, Expand); setOperationAction(ISD::FREM, VT, Expand); setOperationAction(ISD::FPOW, VT, Expand); + + // Handle constrained floating-point operations. + setOperationAction(ISD::STRICT_FADD, VT, Legal); + setOperationAction(ISD::STRICT_FSUB, VT, Legal); + setOperationAction(ISD::STRICT_FMUL, VT, Legal); + setOperationAction(ISD::STRICT_FDIV, VT, Legal); + setOperationAction(ISD::STRICT_FMA, VT, Legal); + setOperationAction(ISD::STRICT_FSQRT, VT, Legal); + setOperationAction(ISD::STRICT_FRINT, VT, Legal); + setOperationAction(ISD::STRICT_FP_ROUND, VT, Legal); + setOperationAction(ISD::STRICT_FP_EXTEND, VT, Legal); + if (Subtarget.hasFPExtension()) { + setOperationAction(ISD::STRICT_FNEARBYINT, VT, Legal); + setOperationAction(ISD::STRICT_FFLOOR, VT, Legal); + setOperationAction(ISD::STRICT_FCEIL, VT, Legal); + setOperationAction(ISD::STRICT_FROUND, VT, Legal); + setOperationAction(ISD::STRICT_FTRUNC, VT, Legal); + } } } @@ -432,6 +467,20 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM, setOperationAction(ISD::FCEIL, MVT::v2f64, Legal); setOperationAction(ISD::FTRUNC, MVT::v2f64, Legal); setOperationAction(ISD::FROUND, MVT::v2f64, Legal); + + // Handle constrained floating-point operations. + setOperationAction(ISD::STRICT_FADD, MVT::v2f64, Legal); + setOperationAction(ISD::STRICT_FSUB, MVT::v2f64, Legal); + setOperationAction(ISD::STRICT_FMUL, MVT::v2f64, Legal); + setOperationAction(ISD::STRICT_FMA, MVT::v2f64, Legal); + setOperationAction(ISD::STRICT_FDIV, MVT::v2f64, Legal); + setOperationAction(ISD::STRICT_FSQRT, MVT::v2f64, Legal); + setOperationAction(ISD::STRICT_FRINT, MVT::v2f64, Legal); + setOperationAction(ISD::STRICT_FNEARBYINT, MVT::v2f64, Legal); + setOperationAction(ISD::STRICT_FFLOOR, MVT::v2f64, Legal); + setOperationAction(ISD::STRICT_FCEIL, MVT::v2f64, Legal); + setOperationAction(ISD::STRICT_FTRUNC, MVT::v2f64, Legal); + setOperationAction(ISD::STRICT_FROUND, MVT::v2f64, Legal); } // The vector enhancements facility 1 has instructions for these. @@ -475,6 +524,25 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM, setOperationAction(ISD::FMAXIMUM, MVT::f128, Legal); setOperationAction(ISD::FMINNUM, MVT::f128, Legal); setOperationAction(ISD::FMINIMUM, MVT::f128, Legal); + + // Handle constrained floating-point operations. + setOperationAction(ISD::STRICT_FADD, MVT::v4f32, Legal); + setOperationAction(ISD::STRICT_FSUB, MVT::v4f32, Legal); + setOperationAction(ISD::STRICT_FMUL, MVT::v4f32, Legal); + setOperationAction(ISD::STRICT_FMA, MVT::v4f32, Legal); + setOperationAction(ISD::STRICT_FDIV, MVT::v4f32, Legal); + setOperationAction(ISD::STRICT_FSQRT, MVT::v4f32, Legal); + setOperationAction(ISD::STRICT_FRINT, MVT::v4f32, Legal); + setOperationAction(ISD::STRICT_FNEARBYINT, MVT::v4f32, Legal); + setOperationAction(ISD::STRICT_FFLOOR, MVT::v4f32, Legal); + setOperationAction(ISD::STRICT_FCEIL, MVT::v4f32, Legal); + setOperationAction(ISD::STRICT_FROUND, MVT::v4f32, Legal); + setOperationAction(ISD::STRICT_FTRUNC, MVT::v4f32, Legal); + for (auto VT : { MVT::f32, MVT::f64, MVT::f128, + MVT::v4f32, MVT::v2f64 }) { + setOperationAction(ISD::STRICT_FMAXNUM, VT, Legal); + setOperationAction(ISD::STRICT_FMINNUM, VT, Legal); + } } // We have fused multiply-addition for f32 and f64 but not f128. @@ -525,6 +593,7 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM, setTargetDAGCombine(ISD::SIGN_EXTEND_INREG); setTargetDAGCombine(ISD::LOAD); setTargetDAGCombine(ISD::STORE); + setTargetDAGCombine(ISD::VECTOR_SHUFFLE); setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT); setTargetDAGCombine(ISD::FP_ROUND); setTargetDAGCombine(ISD::FP_EXTEND); @@ -577,9 +646,127 @@ bool SystemZTargetLowering::isFMAFasterThanFMulAndFAdd(EVT VT) const { return false; } -bool SystemZTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const { +// Return true if the constant can be generated with a vector instruction, +// such as VGM, VGMB or VREPI. +bool SystemZVectorConstantInfo::isVectorConstantLegal( + const SystemZSubtarget &Subtarget) { + const SystemZInstrInfo *TII = + static_cast<const SystemZInstrInfo *>(Subtarget.getInstrInfo()); + if (!Subtarget.hasVector() || + (isFP128 && !Subtarget.hasVectorEnhancements1())) + return false; + + // Try using VECTOR GENERATE BYTE MASK. This is the architecturally- + // preferred way of creating all-zero and all-one vectors so give it + // priority over other methods below. + unsigned Mask = 0; + unsigned I = 0; + for (; I < SystemZ::VectorBytes; ++I) { + uint64_t Byte = IntBits.lshr(I * 8).trunc(8).getZExtValue(); + if (Byte == 0xff) + Mask |= 1ULL << I; + else if (Byte != 0) + break; + } + if (I == SystemZ::VectorBytes) { + Opcode = SystemZISD::BYTE_MASK; + OpVals.push_back(Mask); + VecVT = MVT::getVectorVT(MVT::getIntegerVT(8), 16); + return true; + } + + if (SplatBitSize > 64) + return false; + + auto tryValue = [&](uint64_t Value) -> bool { + // Try VECTOR REPLICATE IMMEDIATE + int64_t SignedValue = SignExtend64(Value, SplatBitSize); + if (isInt<16>(SignedValue)) { + OpVals.push_back(((unsigned) SignedValue)); + Opcode = SystemZISD::REPLICATE; + VecVT = MVT::getVectorVT(MVT::getIntegerVT(SplatBitSize), + SystemZ::VectorBits / SplatBitSize); + return true; + } + // Try VECTOR GENERATE MASK + unsigned Start, End; + if (TII->isRxSBGMask(Value, SplatBitSize, Start, End)) { + // isRxSBGMask returns the bit numbers for a full 64-bit value, with 0 + // denoting 1 << 63 and 63 denoting 1. Convert them to bit numbers for + // an SplatBitSize value, so that 0 denotes 1 << (SplatBitSize-1). + OpVals.push_back(Start - (64 - SplatBitSize)); + OpVals.push_back(End - (64 - SplatBitSize)); + Opcode = SystemZISD::ROTATE_MASK; + VecVT = MVT::getVectorVT(MVT::getIntegerVT(SplatBitSize), + SystemZ::VectorBits / SplatBitSize); + return true; + } + return false; + }; + + // First try assuming that any undefined bits above the highest set bit + // and below the lowest set bit are 1s. This increases the likelihood of + // being able to use a sign-extended element value in VECTOR REPLICATE + // IMMEDIATE or a wraparound mask in VECTOR GENERATE MASK. + uint64_t SplatBitsZ = SplatBits.getZExtValue(); + uint64_t SplatUndefZ = SplatUndef.getZExtValue(); + uint64_t Lower = + (SplatUndefZ & ((uint64_t(1) << findFirstSet(SplatBitsZ)) - 1)); + uint64_t Upper = + (SplatUndefZ & ~((uint64_t(1) << findLastSet(SplatBitsZ)) - 1)); + if (tryValue(SplatBitsZ | Upper | Lower)) + return true; + + // Now try assuming that any undefined bits between the first and + // last defined set bits are set. This increases the chances of + // using a non-wraparound mask. + uint64_t Middle = SplatUndefZ & ~Upper & ~Lower; + return tryValue(SplatBitsZ | Middle); +} + +SystemZVectorConstantInfo::SystemZVectorConstantInfo(APFloat FPImm) { + IntBits = FPImm.bitcastToAPInt().zextOrSelf(128); + isFP128 = (&FPImm.getSemantics() == &APFloat::IEEEquad()); + + // Find the smallest splat. + SplatBits = FPImm.bitcastToAPInt(); + unsigned Width = SplatBits.getBitWidth(); + while (Width > 8) { + unsigned HalfSize = Width / 2; + APInt HighValue = SplatBits.lshr(HalfSize).trunc(HalfSize); + APInt LowValue = SplatBits.trunc(HalfSize); + + // If the two halves do not match, stop here. + if (HighValue != LowValue || 8 > HalfSize) + break; + + SplatBits = HighValue; + Width = HalfSize; + } + SplatUndef = 0; + SplatBitSize = Width; +} + +SystemZVectorConstantInfo::SystemZVectorConstantInfo(BuildVectorSDNode *BVN) { + assert(BVN->isConstant() && "Expected a constant BUILD_VECTOR"); + bool HasAnyUndefs; + + // Get IntBits by finding the 128 bit splat. + BVN->isConstantSplat(IntBits, SplatUndef, SplatBitSize, HasAnyUndefs, 128, + true); + + // Get SplatBits by finding the 8 bit or greater splat. + BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs, 8, + true); +} + +bool SystemZTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT, + bool ForCodeSize) const { // We can load zero using LZ?R and negative zero using LZ?R;LC?BR. - return Imm.isZero() || Imm.isNegZero(); + if (Imm.isZero() || Imm.isNegZero()) + return true; + + return SystemZVectorConstantInfo(Imm).isVectorConstantLegal(Subtarget); } bool SystemZTargetLowering::isLegalICmpImmediate(int64_t Imm) const { @@ -592,10 +779,8 @@ bool SystemZTargetLowering::isLegalAddImmediate(int64_t Imm) const { return isUInt<32>(Imm) || isUInt<32>(-Imm); } -bool SystemZTargetLowering::allowsMisalignedMemoryAccesses(EVT VT, - unsigned, - unsigned, - bool *Fast) const { +bool SystemZTargetLowering::allowsMisalignedMemoryAccesses( + EVT VT, unsigned, unsigned, MachineMemOperand::Flags, bool *Fast) const { // Unaligned accesses should never be slower than the expanded version. // We check specifically for aligned accesses in the few cases where // they are required. @@ -1642,6 +1827,20 @@ static bool isIntrinsicWithCC(SDValue Op, unsigned &Opcode, unsigned &CCValid) { CCValid = SystemZ::CCMASK_ANY; return true; + case Intrinsic::s390_vstrsb: + case Intrinsic::s390_vstrsh: + case Intrinsic::s390_vstrsf: + Opcode = SystemZISD::VSTRS_CC; + CCValid = SystemZ::CCMASK_ANY; + return true; + + case Intrinsic::s390_vstrszb: + case Intrinsic::s390_vstrszh: + case Intrinsic::s390_vstrszf: + Opcode = SystemZISD::VSTRSZ_CC; + CCValid = SystemZ::CCMASK_ANY; + return true; + case Intrinsic::s390_vfcedbs: case Intrinsic::s390_vfcesbs: Opcode = SystemZISD::VFCMPES; @@ -2511,9 +2710,8 @@ SDValue SystemZTargetLowering::lowerVectorSETCC(SelectionDAG &DAG, break; } if (Invert) { - SDValue Mask = DAG.getNode(SystemZISD::BYTE_MASK, DL, MVT::v16i8, - DAG.getConstant(65535, DL, MVT::i32)); - Mask = DAG.getNode(ISD::BITCAST, DL, VT, Mask); + SDValue Mask = + DAG.getSplatBuildVector(VT, DL, DAG.getConstant(-1, DL, MVT::i64)); Cmp = DAG.getNode(ISD::XOR, DL, VT, Cmp, Mask); } return Cmp; @@ -3261,6 +3459,18 @@ SDValue SystemZTargetLowering::lowerXALUO(SDValue Op, return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(), Result, SetCC); } +static bool isAddCarryChain(SDValue Carry) { + while (Carry.getOpcode() == ISD::ADDCARRY) + Carry = Carry.getOperand(2); + return Carry.getOpcode() == ISD::UADDO; +} + +static bool isSubBorrowChain(SDValue Carry) { + while (Carry.getOpcode() == ISD::SUBCARRY) + Carry = Carry.getOperand(2); + return Carry.getOpcode() == ISD::USUBO; +} + // Lower ADDCARRY/SUBCARRY nodes. SDValue SystemZTargetLowering::lowerADDSUBCARRY(SDValue Op, SelectionDAG &DAG) const { @@ -3283,11 +3493,17 @@ SDValue SystemZTargetLowering::lowerADDSUBCARRY(SDValue Op, switch (Op.getOpcode()) { default: llvm_unreachable("Unknown instruction!"); case ISD::ADDCARRY: + if (!isAddCarryChain(Carry)) + return SDValue(); + BaseOp = SystemZISD::ADDCARRY; CCValid = SystemZ::CCMASK_LOGICAL; CCMask = SystemZ::CCMASK_LOGICAL_CARRY; break; case ISD::SUBCARRY: + if (!isSubBorrowChain(Carry)) + return SDValue(); + BaseOp = SystemZISD::SUBCARRY; CCValid = SystemZ::CCMASK_LOGICAL; CCMask = SystemZ::CCMASK_LOGICAL_BORROW; @@ -3331,14 +3547,14 @@ SDValue SystemZTargetLowering::lowerCTPOP(SDValue Op, break; } case 32: { - SDValue Tmp = DAG.getNode(SystemZISD::BYTE_MASK, DL, MVT::v16i8, - DAG.getConstant(0, DL, MVT::i32)); + SDValue Tmp = DAG.getSplatBuildVector(MVT::v16i8, DL, + DAG.getConstant(0, DL, MVT::i32)); Op = DAG.getNode(SystemZISD::VSUM, DL, VT, Op, Tmp); break; } case 64: { - SDValue Tmp = DAG.getNode(SystemZISD::BYTE_MASK, DL, MVT::v16i8, - DAG.getConstant(0, DL, MVT::i32)); + SDValue Tmp = DAG.getSplatBuildVector(MVT::v16i8, DL, + DAG.getConstant(0, DL, MVT::i32)); Op = DAG.getNode(SystemZISD::VSUM, DL, MVT::v4i32, Op, Tmp); Op = DAG.getNode(SystemZISD::VSUM, DL, VT, Op, Tmp); break; @@ -3602,6 +3818,27 @@ SDValue SystemZTargetLowering::lowerATOMIC_CMP_SWAP(SDValue Op, return SDValue(); } +MachineMemOperand::Flags +SystemZTargetLowering::getMMOFlags(const Instruction &I) const { + // Because of how we convert atomic_load and atomic_store to normal loads and + // stores in the DAG, we need to ensure that the MMOs are marked volatile + // since DAGCombine hasn't been updated to account for atomic, but non + // volatile loads. (See D57601) + if (auto *SI = dyn_cast<StoreInst>(&I)) + if (SI->isAtomic()) + return MachineMemOperand::MOVolatile; + if (auto *LI = dyn_cast<LoadInst>(&I)) + if (LI->isAtomic()) + return MachineMemOperand::MOVolatile; + if (auto *AI = dyn_cast<AtomicRMWInst>(&I)) + if (AI->isAtomic()) + return MachineMemOperand::MOVolatile; + if (auto *AI = dyn_cast<AtomicCmpXchgInst>(&I)) + if (AI->isAtomic()) + return MachineMemOperand::MOVolatile; + return MachineMemOperand::MONone; +} + SDValue SystemZTargetLowering::lowerSTACKSAVE(SDValue Op, SelectionDAG &DAG) const { MachineFunction &MF = DAG.getMachineFunction(); @@ -4260,78 +4497,6 @@ static SDValue joinDwords(SelectionDAG &DAG, const SDLoc &DL, SDValue Op0, return DAG.getNode(SystemZISD::JOIN_DWORDS, DL, MVT::v2i64, Op0, Op1); } -// Try to represent constant BUILD_VECTOR node BVN using a -// SystemZISD::BYTE_MASK-style mask. Store the mask value in Mask -// on success. -static bool tryBuildVectorByteMask(BuildVectorSDNode *BVN, uint64_t &Mask) { - EVT ElemVT = BVN->getValueType(0).getVectorElementType(); - unsigned BytesPerElement = ElemVT.getStoreSize(); - for (unsigned I = 0, E = BVN->getNumOperands(); I != E; ++I) { - SDValue Op = BVN->getOperand(I); - if (!Op.isUndef()) { - uint64_t Value; - if (Op.getOpcode() == ISD::Constant) - Value = cast<ConstantSDNode>(Op)->getZExtValue(); - else if (Op.getOpcode() == ISD::ConstantFP) - Value = (cast<ConstantFPSDNode>(Op)->getValueAPF().bitcastToAPInt() - .getZExtValue()); - else - return false; - for (unsigned J = 0; J < BytesPerElement; ++J) { - uint64_t Byte = (Value >> (J * 8)) & 0xff; - if (Byte == 0xff) - Mask |= 1ULL << ((E - I - 1) * BytesPerElement + J); - else if (Byte != 0) - return false; - } - } - } - return true; -} - -// Try to load a vector constant in which BitsPerElement-bit value Value -// is replicated to fill the vector. VT is the type of the resulting -// constant, which may have elements of a different size from BitsPerElement. -// Return the SDValue of the constant on success, otherwise return -// an empty value. -static SDValue tryBuildVectorReplicate(SelectionDAG &DAG, - const SystemZInstrInfo *TII, - const SDLoc &DL, EVT VT, uint64_t Value, - unsigned BitsPerElement) { - // Signed 16-bit values can be replicated using VREPI. - // Mark the constants as opaque or DAGCombiner will convert back to - // BUILD_VECTOR. - int64_t SignedValue = SignExtend64(Value, BitsPerElement); - if (isInt<16>(SignedValue)) { - MVT VecVT = MVT::getVectorVT(MVT::getIntegerVT(BitsPerElement), - SystemZ::VectorBits / BitsPerElement); - SDValue Op = DAG.getNode( - SystemZISD::REPLICATE, DL, VecVT, - DAG.getConstant(SignedValue, DL, MVT::i32, false, true /*isOpaque*/)); - return DAG.getNode(ISD::BITCAST, DL, VT, Op); - } - // See whether rotating the constant left some N places gives a value that - // is one less than a power of 2 (i.e. all zeros followed by all ones). - // If so we can use VGM. - unsigned Start, End; - if (TII->isRxSBGMask(Value, BitsPerElement, Start, End)) { - // isRxSBGMask returns the bit numbers for a full 64-bit value, - // with 0 denoting 1 << 63 and 63 denoting 1. Convert them to - // bit numbers for an BitsPerElement value, so that 0 denotes - // 1 << (BitsPerElement-1). - Start -= 64 - BitsPerElement; - End -= 64 - BitsPerElement; - MVT VecVT = MVT::getVectorVT(MVT::getIntegerVT(BitsPerElement), - SystemZ::VectorBits / BitsPerElement); - SDValue Op = DAG.getNode( - SystemZISD::ROTATE_MASK, DL, VecVT, - DAG.getConstant(Start, DL, MVT::i32, false, true /*isOpaque*/), - DAG.getConstant(End, DL, MVT::i32, false, true /*isOpaque*/)); - return DAG.getNode(ISD::BITCAST, DL, VT, Op); - } - return SDValue(); -} - // If a BUILD_VECTOR contains some EXTRACT_VECTOR_ELTs, it's usually // better to use VECTOR_SHUFFLEs on them, only using BUILD_VECTOR for // the non-EXTRACT_VECTOR_ELT elements. See if the given BUILD_VECTOR @@ -4385,9 +4550,18 @@ static SDValue tryBuildVectorShuffle(SelectionDAG &DAG, return GS.getNode(DAG, SDLoc(BVN)); } +bool SystemZTargetLowering::isVectorElementLoad(SDValue Op) const { + if (Op.getOpcode() == ISD::LOAD && cast<LoadSDNode>(Op)->isUnindexed()) + return true; + if (Subtarget.hasVectorEnhancements2() && Op.getOpcode() == SystemZISD::LRV) + return true; + return false; +} + // Combine GPR scalar values Elems into a vector of type VT. -static SDValue buildVector(SelectionDAG &DAG, const SDLoc &DL, EVT VT, - SmallVectorImpl<SDValue> &Elems) { +SDValue +SystemZTargetLowering::buildVector(SelectionDAG &DAG, const SDLoc &DL, EVT VT, + SmallVectorImpl<SDValue> &Elems) const { // See whether there is a single replicated value. SDValue Single; unsigned int NumElements = Elems.size(); @@ -4416,13 +4590,13 @@ static SDValue buildVector(SelectionDAG &DAG, const SDLoc &DL, EVT VT, // we would need 2 instructions to replicate it: VLVGP followed by VREPx. // This is only a win if the single defined element is used more than once. // In other cases we're better off using a single VLVGx. - if (Single.getNode() && (Count > 1 || Single.getOpcode() == ISD::LOAD)) + if (Single.getNode() && (Count > 1 || isVectorElementLoad(Single))) return DAG.getNode(SystemZISD::REPLICATE, DL, VT, Single); // If all elements are loads, use VLREP/VLEs (below). bool AllLoads = true; for (auto Elem : Elems) - if (Elem.getOpcode() != ISD::LOAD || cast<LoadSDNode>(Elem)->isIndexed()) { + if (!isVectorElementLoad(Elem)) { AllLoads = false; break; } @@ -4494,8 +4668,7 @@ static SDValue buildVector(SelectionDAG &DAG, const SDLoc &DL, EVT VT, std::map<const SDNode*, unsigned> UseCounts; SDNode *LoadMaxUses = nullptr; for (unsigned I = 0; I < NumElements; ++I) - if (Elems[I].getOpcode() == ISD::LOAD && - cast<LoadSDNode>(Elems[I])->isUnindexed()) { + if (isVectorElementLoad(Elems[I])) { SDNode *Ld = Elems[I].getNode(); UseCounts[Ld]++; if (LoadMaxUses == nullptr || UseCounts[LoadMaxUses] < UseCounts[Ld]) @@ -4532,56 +4705,13 @@ static SDValue buildVector(SelectionDAG &DAG, const SDLoc &DL, EVT VT, SDValue SystemZTargetLowering::lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const { - const SystemZInstrInfo *TII = - static_cast<const SystemZInstrInfo *>(Subtarget.getInstrInfo()); auto *BVN = cast<BuildVectorSDNode>(Op.getNode()); SDLoc DL(Op); EVT VT = Op.getValueType(); if (BVN->isConstant()) { - // Try using VECTOR GENERATE BYTE MASK. This is the architecturally- - // preferred way of creating all-zero and all-one vectors so give it - // priority over other methods below. - uint64_t Mask = 0; - if (tryBuildVectorByteMask(BVN, Mask)) { - SDValue Op = DAG.getNode( - SystemZISD::BYTE_MASK, DL, MVT::v16i8, - DAG.getConstant(Mask, DL, MVT::i32, false, true /*isOpaque*/)); - return DAG.getNode(ISD::BITCAST, DL, VT, Op); - } - - // Try using some form of replication. - APInt SplatBits, SplatUndef; - unsigned SplatBitSize; - bool HasAnyUndefs; - if (BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs, - 8, true) && - SplatBitSize <= 64) { - // First try assuming that any undefined bits above the highest set bit - // and below the lowest set bit are 1s. This increases the likelihood of - // being able to use a sign-extended element value in VECTOR REPLICATE - // IMMEDIATE or a wraparound mask in VECTOR GENERATE MASK. - uint64_t SplatBitsZ = SplatBits.getZExtValue(); - uint64_t SplatUndefZ = SplatUndef.getZExtValue(); - uint64_t Lower = (SplatUndefZ - & ((uint64_t(1) << findFirstSet(SplatBitsZ)) - 1)); - uint64_t Upper = (SplatUndefZ - & ~((uint64_t(1) << findLastSet(SplatBitsZ)) - 1)); - uint64_t Value = SplatBitsZ | Upper | Lower; - SDValue Op = tryBuildVectorReplicate(DAG, TII, DL, VT, Value, - SplatBitSize); - if (Op.getNode()) - return Op; - - // Now try assuming that any undefined bits between the first and - // last defined set bits are set. This increases the chances of - // using a non-wraparound mask. - uint64_t Middle = SplatUndefZ & ~Upper & ~Lower; - Value = SplatBitsZ | Middle; - Op = tryBuildVectorReplicate(DAG, TII, DL, VT, Value, SplatBitSize); - if (Op.getNode()) - return Op; - } + if (SystemZVectorConstantInfo(BVN).isVectorConstantLegal(Subtarget)) + return Op; // Fall back to loading it from memory. return SDValue(); @@ -5074,6 +5204,8 @@ const char *SystemZTargetLowering::getTargetNodeName(unsigned Opcode) const { OPCODE(VISTR_CC); OPCODE(VSTRC_CC); OPCODE(VSTRCZ_CC); + OPCODE(VSTRS_CC); + OPCODE(VSTRSZ_CC); OPCODE(TDC); OPCODE(ATOMIC_SWAPW); OPCODE(ATOMIC_LOADW_ADD); @@ -5093,6 +5225,8 @@ const char *SystemZTargetLowering::getTargetNodeName(unsigned Opcode) const { OPCODE(ATOMIC_CMP_SWAP_128); OPCODE(LRV); OPCODE(STRV); + OPCODE(VLER); + OPCODE(VSTER); OPCODE(PREFETCH); } return nullptr; @@ -5340,8 +5474,7 @@ SDValue SystemZTargetLowering::combineMERGE( SDValue Op1 = N->getOperand(1); if (Op0.getOpcode() == ISD::BITCAST) Op0 = Op0.getOperand(0); - if (Op0.getOpcode() == SystemZISD::BYTE_MASK && - cast<ConstantSDNode>(Op0.getOperand(0))->getZExtValue() == 0) { + if (ISD::isBuildVectorAllZeros(Op0.getNode())) { // (z_merge_* 0, 0) -> 0. This is mostly useful for using VLLEZF // for v4f32. if (Op1 == N->getOperand(0)) @@ -5407,6 +5540,31 @@ SDValue SystemZTargetLowering::combineLOAD( return SDValue(N, 0); } +bool SystemZTargetLowering::canLoadStoreByteSwapped(EVT VT) const { + if (VT == MVT::i16 || VT == MVT::i32 || VT == MVT::i64) + return true; + if (Subtarget.hasVectorEnhancements2()) + if (VT == MVT::v8i16 || VT == MVT::v4i32 || VT == MVT::v2i64) + return true; + return false; +} + +static bool isVectorElementSwap(ArrayRef<int> M, EVT VT) { + if (!VT.isVector() || !VT.isSimple() || + VT.getSizeInBits() != 128 || + VT.getScalarSizeInBits() % 8 != 0) + return false; + + unsigned NumElts = VT.getVectorNumElements(); + for (unsigned i = 0; i < NumElts; ++i) { + if (M[i] < 0) continue; // ignore UNDEF indices + if ((unsigned) M[i] != NumElts - 1 - i) + return false; + } + + return true; +} + SDValue SystemZTargetLowering::combineSTORE( SDNode *N, DAGCombinerInfo &DCI) const { SelectionDAG &DAG = DCI.DAG; @@ -5428,13 +5586,11 @@ SDValue SystemZTargetLowering::combineSTORE( SN->getMemOperand()); } } - // Combine STORE (BSWAP) into STRVH/STRV/STRVG + // Combine STORE (BSWAP) into STRVH/STRV/STRVG/VSTBR if (!SN->isTruncatingStore() && Op1.getOpcode() == ISD::BSWAP && Op1.getNode()->hasOneUse() && - (Op1.getValueType() == MVT::i16 || - Op1.getValueType() == MVT::i32 || - Op1.getValueType() == MVT::i64)) { + canLoadStoreByteSwapped(Op1.getValueType())) { SDValue BSwapOp = Op1.getOperand(0); @@ -5449,15 +5605,97 @@ SDValue SystemZTargetLowering::combineSTORE( DAG.getMemIntrinsicNode(SystemZISD::STRV, SDLoc(N), DAG.getVTList(MVT::Other), Ops, MemVT, SN->getMemOperand()); } + // Combine STORE (element-swap) into VSTER + if (!SN->isTruncatingStore() && + Op1.getOpcode() == ISD::VECTOR_SHUFFLE && + Op1.getNode()->hasOneUse() && + Subtarget.hasVectorEnhancements2()) { + ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op1.getNode()); + ArrayRef<int> ShuffleMask = SVN->getMask(); + if (isVectorElementSwap(ShuffleMask, Op1.getValueType())) { + SDValue Ops[] = { + N->getOperand(0), Op1.getOperand(0), N->getOperand(2) + }; + + return DAG.getMemIntrinsicNode(SystemZISD::VSTER, SDLoc(N), + DAG.getVTList(MVT::Other), + Ops, MemVT, SN->getMemOperand()); + } + } + + return SDValue(); +} + +SDValue SystemZTargetLowering::combineVECTOR_SHUFFLE( + SDNode *N, DAGCombinerInfo &DCI) const { + SelectionDAG &DAG = DCI.DAG; + // Combine element-swap (LOAD) into VLER + if (ISD::isNON_EXTLoad(N->getOperand(0).getNode()) && + N->getOperand(0).hasOneUse() && + Subtarget.hasVectorEnhancements2()) { + ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N); + ArrayRef<int> ShuffleMask = SVN->getMask(); + if (isVectorElementSwap(ShuffleMask, N->getValueType(0))) { + SDValue Load = N->getOperand(0); + LoadSDNode *LD = cast<LoadSDNode>(Load); + + // Create the element-swapping load. + SDValue Ops[] = { + LD->getChain(), // Chain + LD->getBasePtr() // Ptr + }; + SDValue ESLoad = + DAG.getMemIntrinsicNode(SystemZISD::VLER, SDLoc(N), + DAG.getVTList(LD->getValueType(0), MVT::Other), + Ops, LD->getMemoryVT(), LD->getMemOperand()); + + // First, combine the VECTOR_SHUFFLE away. This makes the value produced + // by the load dead. + DCI.CombineTo(N, ESLoad); + + // Next, combine the load away, we give it a bogus result value but a real + // chain result. The result value is dead because the shuffle is dead. + DCI.CombineTo(Load.getNode(), ESLoad, ESLoad.getValue(1)); + + // Return N so it doesn't get rechecked! + return SDValue(N, 0); + } + } + return SDValue(); } SDValue SystemZTargetLowering::combineEXTRACT_VECTOR_ELT( SDNode *N, DAGCombinerInfo &DCI) const { + SelectionDAG &DAG = DCI.DAG; if (!Subtarget.hasVector()) return SDValue(); + // Look through bitcasts that retain the number of vector elements. + SDValue Op = N->getOperand(0); + if (Op.getOpcode() == ISD::BITCAST && + Op.getValueType().isVector() && + Op.getOperand(0).getValueType().isVector() && + Op.getValueType().getVectorNumElements() == + Op.getOperand(0).getValueType().getVectorNumElements()) + Op = Op.getOperand(0); + + // Pull BSWAP out of a vector extraction. + if (Op.getOpcode() == ISD::BSWAP && Op.hasOneUse()) { + EVT VecVT = Op.getValueType(); + EVT EltVT = VecVT.getVectorElementType(); + Op = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(N), EltVT, + Op.getOperand(0), N->getOperand(1)); + DCI.AddToWorklist(Op.getNode()); + Op = DAG.getNode(ISD::BSWAP, SDLoc(N), EltVT, Op); + if (EltVT != N->getValueType(0)) { + DCI.AddToWorklist(Op.getNode()); + Op = DAG.getNode(ISD::BITCAST, SDLoc(N), N->getValueType(0), Op); + } + return Op; + } + // Try to simplify a vector extraction. if (auto *IndexN = dyn_cast<ConstantSDNode>(N->getOperand(1))) { SDValue Op0 = N->getOperand(0); @@ -5480,6 +5718,10 @@ SDValue SystemZTargetLowering::combineJOIN_DWORDS( SDValue SystemZTargetLowering::combineFP_ROUND( SDNode *N, DAGCombinerInfo &DCI) const { + + if (!Subtarget.hasVector()) + return SDValue(); + // (fpround (extract_vector_elt X 0)) // (fpround (extract_vector_elt X 1)) -> // (extract_vector_elt (VROUND X) 0) @@ -5527,6 +5769,10 @@ SDValue SystemZTargetLowering::combineFP_ROUND( SDValue SystemZTargetLowering::combineFP_EXTEND( SDNode *N, DAGCombinerInfo &DCI) const { + + if (!Subtarget.hasVector()) + return SDValue(); + // (fpextend (extract_vector_elt X 0)) // (fpextend (extract_vector_elt X 2)) -> // (extract_vector_elt (VEXTEND X) 0) @@ -5575,11 +5821,10 @@ SDValue SystemZTargetLowering::combineFP_EXTEND( SDValue SystemZTargetLowering::combineBSWAP( SDNode *N, DAGCombinerInfo &DCI) const { SelectionDAG &DAG = DCI.DAG; - // Combine BSWAP (LOAD) into LRVH/LRV/LRVG + // Combine BSWAP (LOAD) into LRVH/LRV/LRVG/VLBR if (ISD::isNON_EXTLoad(N->getOperand(0).getNode()) && N->getOperand(0).hasOneUse() && - (N->getValueType(0) == MVT::i16 || N->getValueType(0) == MVT::i32 || - N->getValueType(0) == MVT::i64)) { + canLoadStoreByteSwapped(N->getValueType(0))) { SDValue Load = N->getOperand(0); LoadSDNode *LD = cast<LoadSDNode>(Load); @@ -5612,6 +5857,74 @@ SDValue SystemZTargetLowering::combineBSWAP( // Return N so it doesn't get rechecked! return SDValue(N, 0); } + + // Look through bitcasts that retain the number of vector elements. + SDValue Op = N->getOperand(0); + if (Op.getOpcode() == ISD::BITCAST && + Op.getValueType().isVector() && + Op.getOperand(0).getValueType().isVector() && + Op.getValueType().getVectorNumElements() == + Op.getOperand(0).getValueType().getVectorNumElements()) + Op = Op.getOperand(0); + + // Push BSWAP into a vector insertion if at least one side then simplifies. + if (Op.getOpcode() == ISD::INSERT_VECTOR_ELT && Op.hasOneUse()) { + SDValue Vec = Op.getOperand(0); + SDValue Elt = Op.getOperand(1); + SDValue Idx = Op.getOperand(2); + + if (DAG.isConstantIntBuildVectorOrConstantInt(Vec) || + Vec.getOpcode() == ISD::BSWAP || Vec.isUndef() || + DAG.isConstantIntBuildVectorOrConstantInt(Elt) || + Elt.getOpcode() == ISD::BSWAP || Elt.isUndef() || + (canLoadStoreByteSwapped(N->getValueType(0)) && + ISD::isNON_EXTLoad(Elt.getNode()) && Elt.hasOneUse())) { + EVT VecVT = N->getValueType(0); + EVT EltVT = N->getValueType(0).getVectorElementType(); + if (VecVT != Vec.getValueType()) { + Vec = DAG.getNode(ISD::BITCAST, SDLoc(N), VecVT, Vec); + DCI.AddToWorklist(Vec.getNode()); + } + if (EltVT != Elt.getValueType()) { + Elt = DAG.getNode(ISD::BITCAST, SDLoc(N), EltVT, Elt); + DCI.AddToWorklist(Elt.getNode()); + } + Vec = DAG.getNode(ISD::BSWAP, SDLoc(N), VecVT, Vec); + DCI.AddToWorklist(Vec.getNode()); + Elt = DAG.getNode(ISD::BSWAP, SDLoc(N), EltVT, Elt); + DCI.AddToWorklist(Elt.getNode()); + return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), VecVT, + Vec, Elt, Idx); + } + } + + // Push BSWAP into a vector shuffle if at least one side then simplifies. + ShuffleVectorSDNode *SV = dyn_cast<ShuffleVectorSDNode>(Op); + if (SV && Op.hasOneUse()) { + SDValue Op0 = Op.getOperand(0); + SDValue Op1 = Op.getOperand(1); + + if (DAG.isConstantIntBuildVectorOrConstantInt(Op0) || + Op0.getOpcode() == ISD::BSWAP || Op0.isUndef() || + DAG.isConstantIntBuildVectorOrConstantInt(Op1) || + Op1.getOpcode() == ISD::BSWAP || Op1.isUndef()) { + EVT VecVT = N->getValueType(0); + if (VecVT != Op0.getValueType()) { + Op0 = DAG.getNode(ISD::BITCAST, SDLoc(N), VecVT, Op0); + DCI.AddToWorklist(Op0.getNode()); + } + if (VecVT != Op1.getValueType()) { + Op1 = DAG.getNode(ISD::BITCAST, SDLoc(N), VecVT, Op1); + DCI.AddToWorklist(Op1.getNode()); + } + Op0 = DAG.getNode(ISD::BSWAP, SDLoc(N), VecVT, Op0); + DCI.AddToWorklist(Op0.getNode()); + Op1 = DAG.getNode(ISD::BSWAP, SDLoc(N), VecVT, Op1); + DCI.AddToWorklist(Op1.getNode()); + return DAG.getVectorShuffle(VecVT, SDLoc(N), Op0, Op1, SV->getMask()); + } + } + return SDValue(); } @@ -5811,12 +6124,18 @@ SDValue SystemZTargetLowering::combineIntDIVREM( // since it is not Legal but Custom it can only happen before // legalization. Therefore we must scalarize this early before Combine // 1. For widened vectors, this is already the result of type legalization. - if (VT.isVector() && isTypeLegal(VT) && + if (DCI.Level == BeforeLegalizeTypes && VT.isVector() && isTypeLegal(VT) && DAG.isConstantIntBuildVectorOrConstantInt(N->getOperand(1))) return DAG.UnrollVectorOp(N); return SDValue(); } +SDValue SystemZTargetLowering::unwrapAddress(SDValue N) const { + if (N->getOpcode() == SystemZISD::PCREL_WRAPPER) + return N->getOperand(0); + return N; +} + SDValue SystemZTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const { switch(N->getOpcode()) { @@ -5828,6 +6147,7 @@ SDValue SystemZTargetLowering::PerformDAGCombine(SDNode *N, case SystemZISD::MERGE_LOW: return combineMERGE(N, DCI); case ISD::LOAD: return combineLOAD(N, DCI); case ISD::STORE: return combineSTORE(N, DCI); + case ISD::VECTOR_SHUFFLE: return combineVECTOR_SHUFFLE(N, DCI); case ISD::EXTRACT_VECTOR_ELT: return combineEXTRACT_VECTOR_ELT(N, DCI); case SystemZISD::JOIN_DWORDS: return combineJOIN_DWORDS(N, DCI); case ISD::FP_ROUND: return combineFP_ROUND(N, DCI); @@ -6018,12 +6338,10 @@ SystemZTargetLowering::computeKnownBitsForTargetNode(const SDValue Op, case Intrinsic::s390_vuplhw: case Intrinsic::s390_vuplf: { SDValue SrcOp = Op.getOperand(1); - unsigned SrcBitWidth = SrcOp.getScalarValueSizeInBits(); APInt SrcDemE = getDemandedSrcElements(Op, DemandedElts, 0); Known = DAG.computeKnownBits(SrcOp, SrcDemE, Depth + 1); if (IsLogical) { - Known = Known.zext(BitWidth); - Known.Zero.setBitsFrom(SrcBitWidth); + Known = Known.zext(BitWidth, true); } else Known = Known.sext(BitWidth); break; @@ -6052,7 +6370,7 @@ SystemZTargetLowering::computeKnownBitsForTargetNode(const SDValue Op, // Known has the width of the source operand(s). Adjust if needed to match // the passed bitwidth. if (Known.getBitWidth() != BitWidth) - Known = Known.zextOrTrunc(BitWidth); + Known = Known.zextOrTrunc(BitWidth, false); } static unsigned computeNumSignBitsBinOp(SDValue Op, const APInt &DemandedElts, @@ -6166,7 +6484,7 @@ static MachineBasicBlock *splitBlockBefore(MachineBasicBlock::iterator MI, } // Force base value Base into a register before MI. Return the register. -static unsigned forceReg(MachineInstr &MI, MachineOperand &Base, +static Register forceReg(MachineInstr &MI, MachineOperand &Base, const SystemZInstrInfo *TII) { if (Base.isReg()) return Base.getReg(); @@ -6175,7 +6493,7 @@ static unsigned forceReg(MachineInstr &MI, MachineOperand &Base, MachineFunction &MF = *MBB->getParent(); MachineRegisterInfo &MRI = MF.getRegInfo(); - unsigned Reg = MRI.createVirtualRegister(&SystemZ::ADDR64BitRegClass); + Register Reg = MRI.createVirtualRegister(&SystemZ::ADDR64BitRegClass); BuildMI(*MBB, MI, MI.getDebugLoc(), TII->get(SystemZ::LA), Reg) .add(Base) .addImm(0) @@ -6254,7 +6572,8 @@ static void createPHIsForSelects(MachineBasicBlock::iterator MIItBegin, // destination registers, and the registers that went into the PHI. DenseMap<unsigned, std::pair<unsigned, unsigned>> RegRewriteTable; - for (MachineBasicBlock::iterator MIIt = MIItBegin; MIIt != MIItEnd; ++MIIt) { + for (MachineBasicBlock::iterator MIIt = MIItBegin; MIIt != MIItEnd; + MIIt = skipDebugInstructionsForward(++MIIt, MIItEnd)) { unsigned DestReg = MIIt->getOperand(0).getReg(); unsigned TrueReg = MIIt->getOperand(1).getReg(); unsigned FalseReg = MIIt->getOperand(2).getReg(); @@ -6278,6 +6597,8 @@ static void createPHIsForSelects(MachineBasicBlock::iterator MIItBegin, // Add this PHI to the rewrite table. RegRewriteTable[DestReg] = std::make_pair(TrueReg, FalseReg); } + + MF->getProperties().reset(MachineFunctionProperties::Property::NoPHIs); } // Implement EmitInstrWithCustomInserter for pseudo Select* instruction MI. @@ -6295,8 +6616,8 @@ SystemZTargetLowering::emitSelect(MachineInstr &MI, // same condition code value, we want to expand all of them into // a single pair of basic blocks using the same condition. MachineInstr *LastMI = &MI; - MachineBasicBlock::iterator NextMIIt = - std::next(MachineBasicBlock::iterator(MI)); + MachineBasicBlock::iterator NextMIIt = skipDebugInstructionsForward( + std::next(MachineBasicBlock::iterator(MI)), MBB->end()); if (isSelectPseudo(MI)) while (NextMIIt != MBB->end() && isSelectPseudo(*NextMIIt) && @@ -6304,7 +6625,7 @@ SystemZTargetLowering::emitSelect(MachineInstr &MI, (NextMIIt->getOperand(4).getImm() == CCMask || NextMIIt->getOperand(4).getImm() == (CCValid ^ CCMask))) { LastMI = &*NextMIIt; - ++NextMIIt; + NextMIIt = skipDebugInstructionsForward(++NextMIIt, MBB->end()); } MachineBasicBlock *StartMBB = MBB; @@ -6337,8 +6658,8 @@ SystemZTargetLowering::emitSelect(MachineInstr &MI, // ... MBB = JoinMBB; MachineBasicBlock::iterator MIItBegin = MachineBasicBlock::iterator(MI); - MachineBasicBlock::iterator MIItEnd = - std::next(MachineBasicBlock::iterator(LastMI)); + MachineBasicBlock::iterator MIItEnd = skipDebugInstructionsForward( + std::next(MachineBasicBlock::iterator(LastMI)), MBB->end()); createPHIsForSelects(MIItBegin, MIItEnd, StartMBB, FalseMBB, MBB); StartMBB->erase(MIItBegin, MIItEnd); @@ -6456,8 +6777,8 @@ MachineBasicBlock *SystemZTargetLowering::emitAtomicLoadBinary( MachineOperand Base = earlyUseOperand(MI.getOperand(1)); int64_t Disp = MI.getOperand(2).getImm(); MachineOperand Src2 = earlyUseOperand(MI.getOperand(3)); - unsigned BitShift = (IsSubWord ? MI.getOperand(4).getReg() : 0); - unsigned NegBitShift = (IsSubWord ? MI.getOperand(5).getReg() : 0); + Register BitShift = IsSubWord ? MI.getOperand(4).getReg() : Register(); + Register NegBitShift = IsSubWord ? MI.getOperand(5).getReg() : Register(); DebugLoc DL = MI.getDebugLoc(); if (IsSubWord) BitSize = MI.getOperand(6).getImm(); @@ -6475,12 +6796,12 @@ MachineBasicBlock *SystemZTargetLowering::emitAtomicLoadBinary( assert(LOpcode && CSOpcode && "Displacement out of range"); // Create virtual registers for temporary results. - unsigned OrigVal = MRI.createVirtualRegister(RC); - unsigned OldVal = MRI.createVirtualRegister(RC); - unsigned NewVal = (BinOpcode || IsSubWord ? + Register OrigVal = MRI.createVirtualRegister(RC); + Register OldVal = MRI.createVirtualRegister(RC); + Register NewVal = (BinOpcode || IsSubWord ? MRI.createVirtualRegister(RC) : Src2.getReg()); - unsigned RotatedOldVal = (IsSubWord ? MRI.createVirtualRegister(RC) : OldVal); - unsigned RotatedNewVal = (IsSubWord ? MRI.createVirtualRegister(RC) : NewVal); + Register RotatedOldVal = (IsSubWord ? MRI.createVirtualRegister(RC) : OldVal); + Register RotatedNewVal = (IsSubWord ? MRI.createVirtualRegister(RC) : NewVal); // Insert a basic block for the main loop. MachineBasicBlock *StartMBB = MBB; @@ -6573,9 +6894,9 @@ MachineBasicBlock *SystemZTargetLowering::emitAtomicLoadMinMax( unsigned Dest = MI.getOperand(0).getReg(); MachineOperand Base = earlyUseOperand(MI.getOperand(1)); int64_t Disp = MI.getOperand(2).getImm(); - unsigned Src2 = MI.getOperand(3).getReg(); - unsigned BitShift = (IsSubWord ? MI.getOperand(4).getReg() : 0); - unsigned NegBitShift = (IsSubWord ? MI.getOperand(5).getReg() : 0); + Register Src2 = MI.getOperand(3).getReg(); + Register BitShift = (IsSubWord ? MI.getOperand(4).getReg() : Register()); + Register NegBitShift = (IsSubWord ? MI.getOperand(5).getReg() : Register()); DebugLoc DL = MI.getDebugLoc(); if (IsSubWord) BitSize = MI.getOperand(6).getImm(); @@ -6593,12 +6914,12 @@ MachineBasicBlock *SystemZTargetLowering::emitAtomicLoadMinMax( assert(LOpcode && CSOpcode && "Displacement out of range"); // Create virtual registers for temporary results. - unsigned OrigVal = MRI.createVirtualRegister(RC); - unsigned OldVal = MRI.createVirtualRegister(RC); - unsigned NewVal = MRI.createVirtualRegister(RC); - unsigned RotatedOldVal = (IsSubWord ? MRI.createVirtualRegister(RC) : OldVal); - unsigned RotatedAltVal = (IsSubWord ? MRI.createVirtualRegister(RC) : Src2); - unsigned RotatedNewVal = (IsSubWord ? MRI.createVirtualRegister(RC) : NewVal); + Register OrigVal = MRI.createVirtualRegister(RC); + Register OldVal = MRI.createVirtualRegister(RC); + Register NewVal = MRI.createVirtualRegister(RC); + Register RotatedOldVal = (IsSubWord ? MRI.createVirtualRegister(RC) : OldVal); + Register RotatedAltVal = (IsSubWord ? MRI.createVirtualRegister(RC) : Src2); + Register RotatedNewVal = (IsSubWord ? MRI.createVirtualRegister(RC) : NewVal); // Insert 3 basic blocks for the loop. MachineBasicBlock *StartMBB = MBB; @@ -6881,22 +7202,22 @@ MachineBasicBlock *SystemZTargetLowering::emitMemMemWrapper( if (MI.getNumExplicitOperands() > 5) { bool HaveSingleBase = DestBase.isIdenticalTo(SrcBase); - uint64_t StartCountReg = MI.getOperand(5).getReg(); - uint64_t StartSrcReg = forceReg(MI, SrcBase, TII); - uint64_t StartDestReg = (HaveSingleBase ? StartSrcReg : + Register StartCountReg = MI.getOperand(5).getReg(); + Register StartSrcReg = forceReg(MI, SrcBase, TII); + Register StartDestReg = (HaveSingleBase ? StartSrcReg : forceReg(MI, DestBase, TII)); const TargetRegisterClass *RC = &SystemZ::ADDR64BitRegClass; - uint64_t ThisSrcReg = MRI.createVirtualRegister(RC); - uint64_t ThisDestReg = (HaveSingleBase ? ThisSrcReg : + Register ThisSrcReg = MRI.createVirtualRegister(RC); + Register ThisDestReg = (HaveSingleBase ? ThisSrcReg : MRI.createVirtualRegister(RC)); - uint64_t NextSrcReg = MRI.createVirtualRegister(RC); - uint64_t NextDestReg = (HaveSingleBase ? NextSrcReg : + Register NextSrcReg = MRI.createVirtualRegister(RC); + Register NextDestReg = (HaveSingleBase ? NextSrcReg : MRI.createVirtualRegister(RC)); RC = &SystemZ::GR64BitRegClass; - uint64_t ThisCountReg = MRI.createVirtualRegister(RC); - uint64_t NextCountReg = MRI.createVirtualRegister(RC); + Register ThisCountReg = MRI.createVirtualRegister(RC); + Register NextCountReg = MRI.createVirtualRegister(RC); MachineBasicBlock *StartMBB = MBB; MachineBasicBlock *DoneMBB = splitBlockBefore(MI, MBB); diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZISelLowering.h b/contrib/llvm/lib/Target/SystemZ/SystemZISelLowering.h index 622da32e418d..23cdcc72bc42 100644 --- a/contrib/llvm/lib/Target/SystemZ/SystemZISelLowering.h +++ b/contrib/llvm/lib/Target/SystemZ/SystemZISelLowering.h @@ -1,9 +1,8 @@ //===-- SystemZISelLowering.h - SystemZ DAG lowering interface --*- C++ -*-===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -16,6 +15,7 @@ #define LLVM_LIB_TARGET_SYSTEMZ_SYSTEMZISELLOWERING_H #include "SystemZ.h" +#include "SystemZInstrInfo.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/SelectionDAG.h" #include "llvm/CodeGen/TargetLowering.h" @@ -281,6 +281,8 @@ enum NodeType : unsigned { VISTR_CC, VSTRC_CC, VSTRCZ_CC, + VSTRS_CC, + VSTRSZ_CC, // Test Data Class. // @@ -340,6 +342,9 @@ enum NodeType : unsigned { // Byte swapping load/store. Same operands as regular load/store. LRV, STRV, + // Element swapping load/store. Same operands as regular load/store. + VLER, VSTER, + // Prefetch from the second operand using the 4-bit control code in // the first operand. The code is 1 for a load prefetch and 2 for // a store prefetch. @@ -396,10 +401,12 @@ public: return TypeWidenVector; return TargetLoweringBase::getPreferredVectorAction(VT); } + bool isCheapToSpeculateCtlz() const override { return true; } EVT getSetCCResultType(const DataLayout &DL, LLVMContext &, EVT) const override; bool isFMAFasterThanFMulAndFAdd(EVT VT) const override; - bool isFPImmLegal(const APFloat &Imm, EVT VT) const override; + bool isFPImmLegal(const APFloat &Imm, EVT VT, + bool ForCodeSize) const override; bool isLegalICmpImmediate(int64_t Imm) const override; bool isLegalAddImmediate(int64_t Imm) const override; bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, @@ -407,6 +414,7 @@ public: Instruction *I = nullptr) const override; bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AS, unsigned Align, + MachineMemOperand::Flags Flags, bool *Fast) const override; bool isTruncateFree(Type *, Type *) const override; bool isTruncateFree(EVT, EVT) const override; @@ -568,6 +576,9 @@ private: SDValue lowerPREFETCH(SDValue Op, SelectionDAG &DAG) const; SDValue lowerINTRINSIC_W_CHAIN(SDValue Op, SelectionDAG &DAG) const; SDValue lowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const; + bool isVectorElementLoad(SDValue Op) const; + SDValue buildVector(SelectionDAG &DAG, const SDLoc &DL, EVT VT, + SmallVectorImpl<SDValue> &Elems) const; SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const; SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const; SDValue lowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) const; @@ -587,8 +598,10 @@ private: SDValue combineSIGN_EXTEND(SDNode *N, DAGCombinerInfo &DCI) const; SDValue combineSIGN_EXTEND_INREG(SDNode *N, DAGCombinerInfo &DCI) const; SDValue combineMERGE(SDNode *N, DAGCombinerInfo &DCI) const; + bool canLoadStoreByteSwapped(EVT VT) const; SDValue combineLOAD(SDNode *N, DAGCombinerInfo &DCI) const; SDValue combineSTORE(SDNode *N, DAGCombinerInfo &DCI) const; + SDValue combineVECTOR_SHUFFLE(SDNode *N, DAGCombinerInfo &DCI) const; SDValue combineEXTRACT_VECTOR_ELT(SDNode *N, DAGCombinerInfo &DCI) const; SDValue combineJOIN_DWORDS(SDNode *N, DAGCombinerInfo &DCI) const; SDValue combineFP_ROUND(SDNode *N, DAGCombinerInfo &DCI) const; @@ -599,6 +612,8 @@ private: SDValue combineGET_CCMASK(SDNode *N, DAGCombinerInfo &DCI) const; SDValue combineIntDIVREM(SDNode *N, DAGCombinerInfo &DCI) const; + SDValue unwrapAddress(SDValue N) const override; + // If the last instruction before MBBI in MBB was some form of COMPARE, // try to replace it with a COMPARE AND BRANCH just before MBBI. // CCMask and Target are the BRC-like operands for the branch. @@ -639,8 +654,27 @@ private: MachineBasicBlock *MBB, unsigned Opcode) const; + MachineMemOperand::Flags getMMOFlags(const Instruction &I) const override; const TargetRegisterClass *getRepRegClassFor(MVT VT) const override; }; + +struct SystemZVectorConstantInfo { +private: + APInt IntBits; // The 128 bits as an integer. + APInt SplatBits; // Smallest splat value. + APInt SplatUndef; // Bits correspoding to undef operands of the BVN. + unsigned SplatBitSize = 0; + bool isFP128 = false; + +public: + unsigned Opcode = 0; + SmallVector<unsigned, 2> OpVals; + MVT VecVT; + SystemZVectorConstantInfo(APFloat FPImm); + SystemZVectorConstantInfo(BuildVectorSDNode *BVN); + bool isVectorConstantLegal(const SystemZSubtarget &Subtarget); +}; + } // end namespace llvm #endif diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZInstrBuilder.h b/contrib/llvm/lib/Target/SystemZ/SystemZInstrBuilder.h index 896b665d25eb..ec7639e71f81 100644 --- a/contrib/llvm/lib/Target/SystemZ/SystemZInstrBuilder.h +++ b/contrib/llvm/lib/Target/SystemZ/SystemZInstrBuilder.h @@ -1,9 +1,8 @@ //===-- SystemZInstrBuilder.h - Functions to aid building insts -*- C++ -*-===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZInstrDFP.td b/contrib/llvm/lib/Target/SystemZ/SystemZInstrDFP.td index 08ab2d7bbc52..8d7a773ff4d9 100644 --- a/contrib/llvm/lib/Target/SystemZ/SystemZInstrDFP.td +++ b/contrib/llvm/lib/Target/SystemZ/SystemZInstrDFP.td @@ -1,9 +1,8 @@ //==- SystemZInstrDFP.td - Floating-point SystemZ instructions -*- tblgen-*-==// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -20,7 +19,7 @@ //===----------------------------------------------------------------------===// // Load and test. -let Defs = [CC] in { +let Uses = [FPC], Defs = [CC] in { def LTDTR : UnaryRRE<"ltdtr", 0xB3D6, null_frag, FP64, FP64>; def LTXTR : UnaryRRE<"ltxtr", 0xB3DE, null_frag, FP128, FP128>; } @@ -32,25 +31,31 @@ let Defs = [CC] in { // Convert floating-point values to narrower representations. The destination // of LDXTR is a 128-bit value, but only the first register of the pair is used. -def LEDTR : TernaryRRFe<"ledtr", 0xB3D5, FP32, FP64>; -def LDXTR : TernaryRRFe<"ldxtr", 0xB3DD, FP128, FP128>; +let Uses = [FPC] in { + def LEDTR : TernaryRRFe<"ledtr", 0xB3D5, FP32, FP64>; + def LDXTR : TernaryRRFe<"ldxtr", 0xB3DD, FP128, FP128>; +} // Extend floating-point values to wider representations. -def LDETR : BinaryRRFd<"ldetr", 0xB3D4, FP64, FP32>; -def LXDTR : BinaryRRFd<"lxdtr", 0xB3DC, FP128, FP64>; +let Uses = [FPC] in { + def LDETR : BinaryRRFd<"ldetr", 0xB3D4, FP64, FP32>; + def LXDTR : BinaryRRFd<"lxdtr", 0xB3DC, FP128, FP64>; +} // Convert a signed integer value to a floating-point one. -def CDGTR : UnaryRRE<"cdgtr", 0xB3F1, null_frag, FP64, GR64>; -def CXGTR : UnaryRRE<"cxgtr", 0xB3F9, null_frag, FP128, GR64>; -let Predicates = [FeatureFPExtension] in { - def CDGTRA : TernaryRRFe<"cdgtra", 0xB3F1, FP64, GR64>; - def CXGTRA : TernaryRRFe<"cxgtra", 0xB3F9, FP128, GR64>; - def CDFTR : TernaryRRFe<"cdftr", 0xB951, FP64, GR32>; - def CXFTR : TernaryRRFe<"cxftr", 0xB959, FP128, GR32>; +let Uses = [FPC] in { + def CDGTR : UnaryRRE<"cdgtr", 0xB3F1, null_frag, FP64, GR64>; + def CXGTR : UnaryRRE<"cxgtr", 0xB3F9, null_frag, FP128, GR64>; + let Predicates = [FeatureFPExtension] in { + def CDGTRA : TernaryRRFe<"cdgtra", 0xB3F1, FP64, GR64>; + def CXGTRA : TernaryRRFe<"cxgtra", 0xB3F9, FP128, GR64>; + def CDFTR : TernaryRRFe<"cdftr", 0xB951, FP64, GR32>; + def CXFTR : TernaryRRFe<"cxftr", 0xB959, FP128, GR32>; + } } // Convert an unsigned integer value to a floating-point one. -let Predicates = [FeatureFPExtension] in { +let Uses = [FPC], Predicates = [FeatureFPExtension] in { def CDLGTR : TernaryRRFe<"cdlgtr", 0xB952, FP64, GR64>; def CXLGTR : TernaryRRFe<"cxlgtr", 0xB95A, FP128, GR64>; def CDLFTR : TernaryRRFe<"cdlftr", 0xB953, FP64, GR32>; @@ -58,7 +63,7 @@ let Predicates = [FeatureFPExtension] in { } // Convert a floating-point value to a signed integer value. -let Defs = [CC] in { +let Uses = [FPC], Defs = [CC] in { def CGDTR : BinaryRRFe<"cgdtr", 0xB3E1, GR64, FP64>; def CGXTR : BinaryRRFe<"cgxtr", 0xB3E9, GR64, FP128>; let Predicates = [FeatureFPExtension] in { @@ -70,7 +75,7 @@ let Defs = [CC] in { } // Convert a floating-point value to an unsigned integer value. -let Defs = [CC] in { +let Uses = [FPC], Defs = [CC] in { let Predicates = [FeatureFPExtension] in { def CLGDTR : TernaryRRFe<"clgdtr", 0xB942, GR64, FP64>; def CLGXTR : TernaryRRFe<"clgxtr", 0xB94A, GR64, FP128>; @@ -108,7 +113,7 @@ let Predicates = [FeatureDFPPackedConversion] in { } // Perform floating-point operation. -let Defs = [CC, R1L, F0Q], Uses = [R0L, F4Q] in +let Defs = [CC, R1L, F0Q], Uses = [FPC, R0L, F4Q] in def PFPO : SideEffectInherentE<"pfpo", 0x010A>; @@ -118,8 +123,10 @@ let Defs = [CC, R1L, F0Q], Uses = [R0L, F4Q] in // Round to an integer, with the second operand (M3) specifying the rounding // mode. M4 can be set to 4 to suppress detection of inexact conditions. -def FIDTR : TernaryRRFe<"fidtr", 0xB3D7, FP64, FP64>; -def FIXTR : TernaryRRFe<"fixtr", 0xB3DF, FP128, FP128>; +let Uses = [FPC] in { + def FIDTR : TernaryRRFe<"fidtr", 0xB3D7, FP64, FP64>; + def FIXTR : TernaryRRFe<"fixtr", 0xB3DF, FP128, FP128>; +} // Extract biased exponent. def EEDTR : UnaryRRE<"eedtr", 0xB3E5, null_frag, FP64, FP64>; @@ -135,7 +142,7 @@ def ESXTR : UnaryRRE<"esxtr", 0xB3EF, null_frag, FP128, FP128>; //===----------------------------------------------------------------------===// // Addition. -let Defs = [CC] in { +let Uses = [FPC], Defs = [CC] in { let isCommutable = 1 in { def ADTR : BinaryRRFa<"adtr", 0xB3D2, null_frag, FP64, FP64, FP64>; def AXTR : BinaryRRFa<"axtr", 0xB3DA, null_frag, FP128, FP128, FP128>; @@ -147,7 +154,7 @@ let Defs = [CC] in { } // Subtraction. -let Defs = [CC] in { +let Uses = [FPC], Defs = [CC] in { def SDTR : BinaryRRFa<"sdtr", 0xB3D3, null_frag, FP64, FP64, FP64>; def SXTR : BinaryRRFa<"sxtr", 0xB3DB, null_frag, FP128, FP128, FP128>; let Predicates = [FeatureFPExtension] in { @@ -157,30 +164,38 @@ let Defs = [CC] in { } // Multiplication. -let isCommutable = 1 in { - def MDTR : BinaryRRFa<"mdtr", 0xB3D0, null_frag, FP64, FP64, FP64>; - def MXTR : BinaryRRFa<"mxtr", 0xB3D8, null_frag, FP128, FP128, FP128>; -} -let Predicates = [FeatureFPExtension] in { - def MDTRA : TernaryRRFa<"mdtra", 0xB3D0, FP64, FP64, FP64>; - def MXTRA : TernaryRRFa<"mxtra", 0xB3D8, FP128, FP128, FP128>; +let Uses = [FPC] in { + let isCommutable = 1 in { + def MDTR : BinaryRRFa<"mdtr", 0xB3D0, null_frag, FP64, FP64, FP64>; + def MXTR : BinaryRRFa<"mxtr", 0xB3D8, null_frag, FP128, FP128, FP128>; + } + let Predicates = [FeatureFPExtension] in { + def MDTRA : TernaryRRFa<"mdtra", 0xB3D0, FP64, FP64, FP64>; + def MXTRA : TernaryRRFa<"mxtra", 0xB3D8, FP128, FP128, FP128>; + } } // Division. -def DDTR : BinaryRRFa<"ddtr", 0xB3D1, null_frag, FP64, FP64, FP64>; -def DXTR : BinaryRRFa<"dxtr", 0xB3D9, null_frag, FP128, FP128, FP128>; -let Predicates = [FeatureFPExtension] in { - def DDTRA : TernaryRRFa<"ddtra", 0xB3D1, FP64, FP64, FP64>; - def DXTRA : TernaryRRFa<"dxtra", 0xB3D9, FP128, FP128, FP128>; +let Uses = [FPC] in { + def DDTR : BinaryRRFa<"ddtr", 0xB3D1, null_frag, FP64, FP64, FP64>; + def DXTR : BinaryRRFa<"dxtr", 0xB3D9, null_frag, FP128, FP128, FP128>; + let Predicates = [FeatureFPExtension] in { + def DDTRA : TernaryRRFa<"ddtra", 0xB3D1, FP64, FP64, FP64>; + def DXTRA : TernaryRRFa<"dxtra", 0xB3D9, FP128, FP128, FP128>; + } } // Quantize. -def QADTR : TernaryRRFb<"qadtr", 0xB3F5, FP64, FP64, FP64>; -def QAXTR : TernaryRRFb<"qaxtr", 0xB3FD, FP128, FP128, FP128>; +let Uses = [FPC] in { + def QADTR : TernaryRRFb<"qadtr", 0xB3F5, FP64, FP64, FP64>; + def QAXTR : TernaryRRFb<"qaxtr", 0xB3FD, FP128, FP128, FP128>; +} // Reround. -def RRDTR : TernaryRRFb<"rrdtr", 0xB3F7, FP64, FP64, FP64>; -def RRXTR : TernaryRRFb<"rrxtr", 0xB3FF, FP128, FP128, FP128>; +let Uses = [FPC] in { + def RRDTR : TernaryRRFb<"rrdtr", 0xB3F7, FP64, FP64, FP64>; + def RRXTR : TernaryRRFb<"rrxtr", 0xB3FF, FP128, FP128, FP128>; +} // Shift significand left/right. def SLDT : BinaryRXF<"sldt", 0xED40, null_frag, FP64, FP64, null_frag, 0>; @@ -198,13 +213,13 @@ def IEXTR : BinaryRRFb<"iextr", 0xB3FE, null_frag, FP128, FP128, FP128>; //===----------------------------------------------------------------------===// // Compare. -let Defs = [CC] in { +let Uses = [FPC], Defs = [CC] in { def CDTR : CompareRRE<"cdtr", 0xB3E4, null_frag, FP64, FP64>; def CXTR : CompareRRE<"cxtr", 0xB3EC, null_frag, FP128, FP128>; } // Compare and signal. -let Defs = [CC] in { +let Uses = [FPC], Defs = [CC] in { def KDTR : CompareRRE<"kdtr", 0xB3E0, null_frag, FP64, FP64>; def KXTR : CompareRRE<"kxtr", 0xB3E8, null_frag, FP128, FP128>; } diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZInstrFP.td b/contrib/llvm/lib/Target/SystemZ/SystemZInstrFP.td index 1374ee91fa29..19c7ec58ed3d 100644 --- a/contrib/llvm/lib/Target/SystemZ/SystemZInstrFP.td +++ b/contrib/llvm/lib/Target/SystemZ/SystemZInstrFP.td @@ -1,9 +1,8 @@ //==- SystemZInstrFP.td - Floating-point SystemZ instructions --*- tblgen-*-==// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// @@ -53,7 +52,8 @@ let isCodeGenOnly = 1 in // Moves between two floating-point registers that also set the condition // codes. -let Defs = [CC], CCValues = 0xF, CompareZeroCCMask = 0xF in { +let Uses = [FPC], mayRaiseFPException = 1, + Defs = [CC], CCValues = 0xF, CompareZeroCCMask = 0xF in { defm LTEBR : LoadAndTestRRE<"ltebr", 0xB302, FP32>; defm LTDBR : LoadAndTestRRE<"ltdbr", 0xB312, FP64>; defm LTXBR : LoadAndTestRRE<"ltxbr", 0xB342, FP128>; @@ -69,7 +69,8 @@ let Predicates = [FeatureNoVector] in { // Use a normal load-and-test for compare against zero in case of // vector support (via a pseudo to simplify instruction selection). -let Defs = [CC], usesCustomInserter = 1, hasNoSchedulingInfo = 1 in { +let Uses = [FPC], mayRaiseFPException = 1, + Defs = [CC], usesCustomInserter = 1, hasNoSchedulingInfo = 1 in { def LTEBRCompare_VecPseudo : Pseudo<(outs), (ins FP32:$R1, FP32:$R2), []>; def LTDBRCompare_VecPseudo : Pseudo<(outs), (ins FP64:$R1, FP64:$R2), []>; def LTXBRCompare_VecPseudo : Pseudo<(outs), (ins FP128:$R1, FP128:$R2), []>; @@ -174,56 +175,64 @@ let SimpleBDXStore = 1, mayStore = 1 in { // Convert floating-point values to narrower representations, rounding // according to the current mode. The destination of LEXBR and LDXBR // is a 128-bit value, but only the first register of the pair is used. -def LEDBR : UnaryRRE<"ledbr", 0xB344, fpround, FP32, FP64>; -def LEXBR : UnaryRRE<"lexbr", 0xB346, null_frag, FP128, FP128>; -def LDXBR : UnaryRRE<"ldxbr", 0xB345, null_frag, FP128, FP128>; - -def LEDBRA : TernaryRRFe<"ledbra", 0xB344, FP32, FP64>, - Requires<[FeatureFPExtension]>; -def LEXBRA : TernaryRRFe<"lexbra", 0xB346, FP128, FP128>, - Requires<[FeatureFPExtension]>; -def LDXBRA : TernaryRRFe<"ldxbra", 0xB345, FP128, FP128>, - Requires<[FeatureFPExtension]>; +let Uses = [FPC], mayRaiseFPException = 1 in { + def LEDBR : UnaryRRE<"ledbr", 0xB344, any_fpround, FP32, FP64>; + def LEXBR : UnaryRRE<"lexbr", 0xB346, null_frag, FP128, FP128>; + def LDXBR : UnaryRRE<"ldxbr", 0xB345, null_frag, FP128, FP128>; + + def LEDBRA : TernaryRRFe<"ledbra", 0xB344, FP32, FP64>, + Requires<[FeatureFPExtension]>; + def LEXBRA : TernaryRRFe<"lexbra", 0xB346, FP128, FP128>, + Requires<[FeatureFPExtension]>; + def LDXBRA : TernaryRRFe<"ldxbra", 0xB345, FP128, FP128>, + Requires<[FeatureFPExtension]>; +} let Predicates = [FeatureNoVectorEnhancements1] in { - def : Pat<(f32 (fpround FP128:$src)), + def : Pat<(f32 (any_fpround FP128:$src)), (EXTRACT_SUBREG (LEXBR FP128:$src), subreg_hh32)>; - def : Pat<(f64 (fpround FP128:$src)), + def : Pat<(f64 (any_fpround FP128:$src)), (EXTRACT_SUBREG (LDXBR FP128:$src), subreg_h64)>; } // Extend register floating-point values to wider representations. -def LDEBR : UnaryRRE<"ldebr", 0xB304, fpextend, FP64, FP32>; -def LXEBR : UnaryRRE<"lxebr", 0xB306, null_frag, FP128, FP32>; -def LXDBR : UnaryRRE<"lxdbr", 0xB305, null_frag, FP128, FP64>; +let Uses = [FPC], mayRaiseFPException = 1 in { + def LDEBR : UnaryRRE<"ldebr", 0xB304, any_fpextend, FP64, FP32>; + def LXEBR : UnaryRRE<"lxebr", 0xB306, null_frag, FP128, FP32>; + def LXDBR : UnaryRRE<"lxdbr", 0xB305, null_frag, FP128, FP64>; +} let Predicates = [FeatureNoVectorEnhancements1] in { - def : Pat<(f128 (fpextend (f32 FP32:$src))), (LXEBR FP32:$src)>; - def : Pat<(f128 (fpextend (f64 FP64:$src))), (LXDBR FP64:$src)>; + def : Pat<(f128 (any_fpextend (f32 FP32:$src))), (LXEBR FP32:$src)>; + def : Pat<(f128 (any_fpextend (f64 FP64:$src))), (LXDBR FP64:$src)>; } // Extend memory floating-point values to wider representations. -def LDEB : UnaryRXE<"ldeb", 0xED04, extloadf32, FP64, 4>; -def LXEB : UnaryRXE<"lxeb", 0xED06, null_frag, FP128, 4>; -def LXDB : UnaryRXE<"lxdb", 0xED05, null_frag, FP128, 8>; +let Uses = [FPC], mayRaiseFPException = 1 in { + def LDEB : UnaryRXE<"ldeb", 0xED04, any_extloadf32, FP64, 4>; + def LXEB : UnaryRXE<"lxeb", 0xED06, null_frag, FP128, 4>; + def LXDB : UnaryRXE<"lxdb", 0xED05, null_frag, FP128, 8>; +} let Predicates = [FeatureNoVectorEnhancements1] in { - def : Pat<(f128 (extloadf32 bdxaddr12only:$src)), + def : Pat<(f128 (any_extloadf32 bdxaddr12only:$src)), (LXEB bdxaddr12only:$src)>; - def : Pat<(f128 (extloadf64 bdxaddr12only:$src)), + def : Pat<(f128 (any_extloadf64 bdxaddr12only:$src)), (LXDB bdxaddr12only:$src)>; } // Convert a signed integer register value to a floating-point one. -def CEFBR : UnaryRRE<"cefbr", 0xB394, sint_to_fp, FP32, GR32>; -def CDFBR : UnaryRRE<"cdfbr", 0xB395, sint_to_fp, FP64, GR32>; -def CXFBR : UnaryRRE<"cxfbr", 0xB396, sint_to_fp, FP128, GR32>; - -def CEGBR : UnaryRRE<"cegbr", 0xB3A4, sint_to_fp, FP32, GR64>; -def CDGBR : UnaryRRE<"cdgbr", 0xB3A5, sint_to_fp, FP64, GR64>; -def CXGBR : UnaryRRE<"cxgbr", 0xB3A6, sint_to_fp, FP128, GR64>; +let Uses = [FPC], mayRaiseFPException = 1 in { + def CEFBR : UnaryRRE<"cefbr", 0xB394, sint_to_fp, FP32, GR32>; + def CDFBR : UnaryRRE<"cdfbr", 0xB395, sint_to_fp, FP64, GR32>; + def CXFBR : UnaryRRE<"cxfbr", 0xB396, sint_to_fp, FP128, GR32>; + + def CEGBR : UnaryRRE<"cegbr", 0xB3A4, sint_to_fp, FP32, GR64>; + def CDGBR : UnaryRRE<"cdgbr", 0xB3A5, sint_to_fp, FP64, GR64>; + def CXGBR : UnaryRRE<"cxgbr", 0xB3A6, sint_to_fp, FP128, GR64>; +} // The FP extension feature provides versions of the above that allow // specifying rounding mode and inexact-exception suppression flags. -let Predicates = [FeatureFPExtension] in { +let Uses = [FPC], mayRaiseFPException = 1, Predicates = [FeatureFPExtension] in { def CEFBRA : TernaryRRFe<"cefbra", 0xB394, FP32, GR32>; def CDFBRA : TernaryRRFe<"cdfbra", 0xB395, FP64, GR32>; def CXFBRA : TernaryRRFe<"cxfbra", 0xB396, FP128, GR32>; @@ -235,13 +244,15 @@ let Predicates = [FeatureFPExtension] in { // Convert am unsigned integer register value to a floating-point one. let Predicates = [FeatureFPExtension] in { - def CELFBR : TernaryRRFe<"celfbr", 0xB390, FP32, GR32>; - def CDLFBR : TernaryRRFe<"cdlfbr", 0xB391, FP64, GR32>; - def CXLFBR : TernaryRRFe<"cxlfbr", 0xB392, FP128, GR32>; - - def CELGBR : TernaryRRFe<"celgbr", 0xB3A0, FP32, GR64>; - def CDLGBR : TernaryRRFe<"cdlgbr", 0xB3A1, FP64, GR64>; - def CXLGBR : TernaryRRFe<"cxlgbr", 0xB3A2, FP128, GR64>; + let Uses = [FPC], mayRaiseFPException = 1 in { + def CELFBR : TernaryRRFe<"celfbr", 0xB390, FP32, GR32>; + def CDLFBR : TernaryRRFe<"cdlfbr", 0xB391, FP64, GR32>; + def CXLFBR : TernaryRRFe<"cxlfbr", 0xB392, FP128, GR32>; + + def CELGBR : TernaryRRFe<"celgbr", 0xB3A0, FP32, GR64>; + def CDLGBR : TernaryRRFe<"cdlgbr", 0xB3A1, FP64, GR64>; + def CXLGBR : TernaryRRFe<"cxlgbr", 0xB3A2, FP128, GR64>; + } def : Pat<(f32 (uint_to_fp GR32:$src)), (CELFBR 0, GR32:$src, 0)>; def : Pat<(f64 (uint_to_fp GR32:$src)), (CDLFBR 0, GR32:$src, 0)>; @@ -254,7 +265,7 @@ let Predicates = [FeatureFPExtension] in { // Convert a floating-point register value to a signed integer value, // with the second operand (modifier M3) specifying the rounding mode. -let Defs = [CC] in { +let Uses = [FPC], mayRaiseFPException = 1, Defs = [CC] in { def CFEBR : BinaryRRFe<"cfebr", 0xB398, GR32, FP32>; def CFDBR : BinaryRRFe<"cfdbr", 0xB399, GR32, FP64>; def CFXBR : BinaryRRFe<"cfxbr", 0xB39A, GR32, FP128>; @@ -275,7 +286,8 @@ def : Pat<(i64 (fp_to_sint FP128:$src)), (CGXBR 5, FP128:$src)>; // The FP extension feature provides versions of the above that allow // also specifying the inexact-exception suppression flag. -let Predicates = [FeatureFPExtension], Defs = [CC] in { +let Uses = [FPC], mayRaiseFPException = 1, + Predicates = [FeatureFPExtension], Defs = [CC] in { def CFEBRA : TernaryRRFe<"cfebra", 0xB398, GR32, FP32>; def CFDBRA : TernaryRRFe<"cfdbra", 0xB399, GR32, FP64>; def CFXBRA : TernaryRRFe<"cfxbra", 0xB39A, GR32, FP128>; @@ -287,7 +299,7 @@ let Predicates = [FeatureFPExtension], Defs = [CC] in { // Convert a floating-point register value to an unsigned integer value. let Predicates = [FeatureFPExtension] in { - let Defs = [CC] in { + let Uses = [FPC], mayRaiseFPException = 1, Defs = [CC] in { def CLFEBR : TernaryRRFe<"clfebr", 0xB39C, GR32, FP32>; def CLFDBR : TernaryRRFe<"clfdbr", 0xB39D, GR32, FP64>; def CLFXBR : TernaryRRFe<"clfxbr", 0xB39E, GR32, FP128>; @@ -353,59 +365,65 @@ let isCodeGenOnly = 1 in def LNDFR_32 : UnaryRRE<"lndfr", 0xB371, fnabs, FP32, FP32>; // Square root. -def SQEBR : UnaryRRE<"sqebr", 0xB314, fsqrt, FP32, FP32>; -def SQDBR : UnaryRRE<"sqdbr", 0xB315, fsqrt, FP64, FP64>; -def SQXBR : UnaryRRE<"sqxbr", 0xB316, fsqrt, FP128, FP128>; +let Uses = [FPC], mayRaiseFPException = 1 in { + def SQEBR : UnaryRRE<"sqebr", 0xB314, any_fsqrt, FP32, FP32>; + def SQDBR : UnaryRRE<"sqdbr", 0xB315, any_fsqrt, FP64, FP64>; + def SQXBR : UnaryRRE<"sqxbr", 0xB316, any_fsqrt, FP128, FP128>; -def SQEB : UnaryRXE<"sqeb", 0xED14, loadu<fsqrt>, FP32, 4>; -def SQDB : UnaryRXE<"sqdb", 0xED15, loadu<fsqrt>, FP64, 8>; + def SQEB : UnaryRXE<"sqeb", 0xED14, loadu<any_fsqrt>, FP32, 4>; + def SQDB : UnaryRXE<"sqdb", 0xED15, loadu<any_fsqrt>, FP64, 8>; +} // Round to an integer, with the second operand (modifier M3) specifying // the rounding mode. These forms always check for inexact conditions. -def FIEBR : BinaryRRFe<"fiebr", 0xB357, FP32, FP32>; -def FIDBR : BinaryRRFe<"fidbr", 0xB35F, FP64, FP64>; -def FIXBR : BinaryRRFe<"fixbr", 0xB347, FP128, FP128>; +let Uses = [FPC], mayRaiseFPException = 1 in { + def FIEBR : BinaryRRFe<"fiebr", 0xB357, FP32, FP32>; + def FIDBR : BinaryRRFe<"fidbr", 0xB35F, FP64, FP64>; + def FIXBR : BinaryRRFe<"fixbr", 0xB347, FP128, FP128>; +} // frint rounds according to the current mode (modifier 0) and detects // inexact conditions. -def : Pat<(frint FP32:$src), (FIEBR 0, FP32:$src)>; -def : Pat<(frint FP64:$src), (FIDBR 0, FP64:$src)>; -def : Pat<(frint FP128:$src), (FIXBR 0, FP128:$src)>; +def : Pat<(any_frint FP32:$src), (FIEBR 0, FP32:$src)>; +def : Pat<(any_frint FP64:$src), (FIDBR 0, FP64:$src)>; +def : Pat<(any_frint FP128:$src), (FIXBR 0, FP128:$src)>; let Predicates = [FeatureFPExtension] in { // Extended forms of the FIxBR instructions. M4 can be set to 4 // to suppress detection of inexact conditions. - def FIEBRA : TernaryRRFe<"fiebra", 0xB357, FP32, FP32>; - def FIDBRA : TernaryRRFe<"fidbra", 0xB35F, FP64, FP64>; - def FIXBRA : TernaryRRFe<"fixbra", 0xB347, FP128, FP128>; + let Uses = [FPC], mayRaiseFPException = 1 in { + def FIEBRA : TernaryRRFe<"fiebra", 0xB357, FP32, FP32>; + def FIDBRA : TernaryRRFe<"fidbra", 0xB35F, FP64, FP64>; + def FIXBRA : TernaryRRFe<"fixbra", 0xB347, FP128, FP128>; + } // fnearbyint is like frint but does not detect inexact conditions. - def : Pat<(fnearbyint FP32:$src), (FIEBRA 0, FP32:$src, 4)>; - def : Pat<(fnearbyint FP64:$src), (FIDBRA 0, FP64:$src, 4)>; - def : Pat<(fnearbyint FP128:$src), (FIXBRA 0, FP128:$src, 4)>; + def : Pat<(any_fnearbyint FP32:$src), (FIEBRA 0, FP32:$src, 4)>; + def : Pat<(any_fnearbyint FP64:$src), (FIDBRA 0, FP64:$src, 4)>; + def : Pat<(any_fnearbyint FP128:$src), (FIXBRA 0, FP128:$src, 4)>; // floor is no longer allowed to raise an inexact condition, // so restrict it to the cases where the condition can be suppressed. // Mode 7 is round towards -inf. - def : Pat<(ffloor FP32:$src), (FIEBRA 7, FP32:$src, 4)>; - def : Pat<(ffloor FP64:$src), (FIDBRA 7, FP64:$src, 4)>; - def : Pat<(ffloor FP128:$src), (FIXBRA 7, FP128:$src, 4)>; + def : Pat<(any_ffloor FP32:$src), (FIEBRA 7, FP32:$src, 4)>; + def : Pat<(any_ffloor FP64:$src), (FIDBRA 7, FP64:$src, 4)>; + def : Pat<(any_ffloor FP128:$src), (FIXBRA 7, FP128:$src, 4)>; // Same idea for ceil, where mode 6 is round towards +inf. - def : Pat<(fceil FP32:$src), (FIEBRA 6, FP32:$src, 4)>; - def : Pat<(fceil FP64:$src), (FIDBRA 6, FP64:$src, 4)>; - def : Pat<(fceil FP128:$src), (FIXBRA 6, FP128:$src, 4)>; + def : Pat<(any_fceil FP32:$src), (FIEBRA 6, FP32:$src, 4)>; + def : Pat<(any_fceil FP64:$src), (FIDBRA 6, FP64:$src, 4)>; + def : Pat<(any_fceil FP128:$src), (FIXBRA 6, FP128:$src, 4)>; // Same idea for trunc, where mode 5 is round towards zero. - def : Pat<(ftrunc FP32:$src), (FIEBRA 5, FP32:$src, 4)>; - def : Pat<(ftrunc FP64:$src), (FIDBRA 5, FP64:$src, 4)>; - def : Pat<(ftrunc FP128:$src), (FIXBRA 5, FP128:$src, 4)>; + def : Pat<(any_ftrunc FP32:$src), (FIEBRA 5, FP32:$src, 4)>; + def : Pat<(any_ftrunc FP64:$src), (FIDBRA 5, FP64:$src, 4)>; + def : Pat<(any_ftrunc FP128:$src), (FIXBRA 5, FP128:$src, 4)>; // Same idea for round, where mode 1 is round towards nearest with // ties away from zero. - def : Pat<(fround FP32:$src), (FIEBRA 1, FP32:$src, 4)>; - def : Pat<(fround FP64:$src), (FIDBRA 1, FP64:$src, 4)>; - def : Pat<(fround FP128:$src), (FIXBRA 1, FP128:$src, 4)>; + def : Pat<(any_fround FP32:$src), (FIEBRA 1, FP32:$src, 4)>; + def : Pat<(any_fround FP64:$src), (FIDBRA 1, FP64:$src, 4)>; + def : Pat<(any_fround FP128:$src), (FIXBRA 1, FP128:$src, 4)>; } //===----------------------------------------------------------------------===// @@ -413,87 +431,103 @@ let Predicates = [FeatureFPExtension] in { //===----------------------------------------------------------------------===// // Addition. -let Defs = [CC], CCValues = 0xF, CompareZeroCCMask = 0xF in { +let Uses = [FPC], mayRaiseFPException = 1, + Defs = [CC], CCValues = 0xF, CompareZeroCCMask = 0xF in { let isCommutable = 1 in { - def AEBR : BinaryRRE<"aebr", 0xB30A, fadd, FP32, FP32>; - def ADBR : BinaryRRE<"adbr", 0xB31A, fadd, FP64, FP64>; - def AXBR : BinaryRRE<"axbr", 0xB34A, fadd, FP128, FP128>; + def AEBR : BinaryRRE<"aebr", 0xB30A, any_fadd, FP32, FP32>; + def ADBR : BinaryRRE<"adbr", 0xB31A, any_fadd, FP64, FP64>; + def AXBR : BinaryRRE<"axbr", 0xB34A, any_fadd, FP128, FP128>; } - def AEB : BinaryRXE<"aeb", 0xED0A, fadd, FP32, load, 4>; - def ADB : BinaryRXE<"adb", 0xED1A, fadd, FP64, load, 8>; + def AEB : BinaryRXE<"aeb", 0xED0A, any_fadd, FP32, load, 4>; + def ADB : BinaryRXE<"adb", 0xED1A, any_fadd, FP64, load, 8>; } // Subtraction. -let Defs = [CC], CCValues = 0xF, CompareZeroCCMask = 0xF in { - def SEBR : BinaryRRE<"sebr", 0xB30B, fsub, FP32, FP32>; - def SDBR : BinaryRRE<"sdbr", 0xB31B, fsub, FP64, FP64>; - def SXBR : BinaryRRE<"sxbr", 0xB34B, fsub, FP128, FP128>; - - def SEB : BinaryRXE<"seb", 0xED0B, fsub, FP32, load, 4>; - def SDB : BinaryRXE<"sdb", 0xED1B, fsub, FP64, load, 8>; +let Uses = [FPC], mayRaiseFPException = 1, + Defs = [CC], CCValues = 0xF, CompareZeroCCMask = 0xF in { + def SEBR : BinaryRRE<"sebr", 0xB30B, any_fsub, FP32, FP32>; + def SDBR : BinaryRRE<"sdbr", 0xB31B, any_fsub, FP64, FP64>; + def SXBR : BinaryRRE<"sxbr", 0xB34B, any_fsub, FP128, FP128>; + + def SEB : BinaryRXE<"seb", 0xED0B, any_fsub, FP32, load, 4>; + def SDB : BinaryRXE<"sdb", 0xED1B, any_fsub, FP64, load, 8>; } // Multiplication. -let isCommutable = 1 in { - def MEEBR : BinaryRRE<"meebr", 0xB317, fmul, FP32, FP32>; - def MDBR : BinaryRRE<"mdbr", 0xB31C, fmul, FP64, FP64>; - def MXBR : BinaryRRE<"mxbr", 0xB34C, fmul, FP128, FP128>; +let Uses = [FPC], mayRaiseFPException = 1 in { + let isCommutable = 1 in { + def MEEBR : BinaryRRE<"meebr", 0xB317, any_fmul, FP32, FP32>; + def MDBR : BinaryRRE<"mdbr", 0xB31C, any_fmul, FP64, FP64>; + def MXBR : BinaryRRE<"mxbr", 0xB34C, any_fmul, FP128, FP128>; + } + def MEEB : BinaryRXE<"meeb", 0xED17, any_fmul, FP32, load, 4>; + def MDB : BinaryRXE<"mdb", 0xED1C, any_fmul, FP64, load, 8>; } -def MEEB : BinaryRXE<"meeb", 0xED17, fmul, FP32, load, 4>; -def MDB : BinaryRXE<"mdb", 0xED1C, fmul, FP64, load, 8>; // f64 multiplication of two FP32 registers. -def MDEBR : BinaryRRE<"mdebr", 0xB30C, null_frag, FP64, FP32>; -def : Pat<(fmul (f64 (fpextend FP32:$src1)), (f64 (fpextend FP32:$src2))), +let Uses = [FPC], mayRaiseFPException = 1 in + def MDEBR : BinaryRRE<"mdebr", 0xB30C, null_frag, FP64, FP32>; +def : Pat<(any_fmul (f64 (fpextend FP32:$src1)), + (f64 (fpextend FP32:$src2))), (MDEBR (INSERT_SUBREG (f64 (IMPLICIT_DEF)), FP32:$src1, subreg_h32), FP32:$src2)>; // f64 multiplication of an FP32 register and an f32 memory. -def MDEB : BinaryRXE<"mdeb", 0xED0C, null_frag, FP64, load, 4>; -def : Pat<(fmul (f64 (fpextend FP32:$src1)), - (f64 (extloadf32 bdxaddr12only:$addr))), +let Uses = [FPC], mayRaiseFPException = 1 in + def MDEB : BinaryRXE<"mdeb", 0xED0C, null_frag, FP64, load, 4>; +def : Pat<(any_fmul (f64 (fpextend FP32:$src1)), + (f64 (extloadf32 bdxaddr12only:$addr))), (MDEB (INSERT_SUBREG (f64 (IMPLICIT_DEF)), FP32:$src1, subreg_h32), bdxaddr12only:$addr)>; // f128 multiplication of two FP64 registers. -def MXDBR : BinaryRRE<"mxdbr", 0xB307, null_frag, FP128, FP64>; +let Uses = [FPC], mayRaiseFPException = 1 in + def MXDBR : BinaryRRE<"mxdbr", 0xB307, null_frag, FP128, FP64>; let Predicates = [FeatureNoVectorEnhancements1] in - def : Pat<(fmul (f128 (fpextend FP64:$src1)), (f128 (fpextend FP64:$src2))), + def : Pat<(any_fmul (f128 (fpextend FP64:$src1)), + (f128 (fpextend FP64:$src2))), (MXDBR (INSERT_SUBREG (f128 (IMPLICIT_DEF)), FP64:$src1, subreg_h64), FP64:$src2)>; // f128 multiplication of an FP64 register and an f64 memory. -def MXDB : BinaryRXE<"mxdb", 0xED07, null_frag, FP128, load, 8>; +let Uses = [FPC], mayRaiseFPException = 1 in + def MXDB : BinaryRXE<"mxdb", 0xED07, null_frag, FP128, load, 8>; let Predicates = [FeatureNoVectorEnhancements1] in - def : Pat<(fmul (f128 (fpextend FP64:$src1)), - (f128 (extloadf64 bdxaddr12only:$addr))), + def : Pat<(any_fmul (f128 (fpextend FP64:$src1)), + (f128 (extloadf64 bdxaddr12only:$addr))), (MXDB (INSERT_SUBREG (f128 (IMPLICIT_DEF)), FP64:$src1, subreg_h64), bdxaddr12only:$addr)>; // Fused multiply-add. -def MAEBR : TernaryRRD<"maebr", 0xB30E, z_fma, FP32, FP32>; -def MADBR : TernaryRRD<"madbr", 0xB31E, z_fma, FP64, FP64>; +let Uses = [FPC], mayRaiseFPException = 1 in { + def MAEBR : TernaryRRD<"maebr", 0xB30E, z_any_fma, FP32, FP32>; + def MADBR : TernaryRRD<"madbr", 0xB31E, z_any_fma, FP64, FP64>; -def MAEB : TernaryRXF<"maeb", 0xED0E, z_fma, FP32, FP32, load, 4>; -def MADB : TernaryRXF<"madb", 0xED1E, z_fma, FP64, FP64, load, 8>; + def MAEB : TernaryRXF<"maeb", 0xED0E, z_any_fma, FP32, FP32, load, 4>; + def MADB : TernaryRXF<"madb", 0xED1E, z_any_fma, FP64, FP64, load, 8>; +} // Fused multiply-subtract. -def MSEBR : TernaryRRD<"msebr", 0xB30F, z_fms, FP32, FP32>; -def MSDBR : TernaryRRD<"msdbr", 0xB31F, z_fms, FP64, FP64>; +let Uses = [FPC], mayRaiseFPException = 1 in { + def MSEBR : TernaryRRD<"msebr", 0xB30F, z_any_fms, FP32, FP32>; + def MSDBR : TernaryRRD<"msdbr", 0xB31F, z_any_fms, FP64, FP64>; -def MSEB : TernaryRXF<"mseb", 0xED0F, z_fms, FP32, FP32, load, 4>; -def MSDB : TernaryRXF<"msdb", 0xED1F, z_fms, FP64, FP64, load, 8>; + def MSEB : TernaryRXF<"mseb", 0xED0F, z_any_fms, FP32, FP32, load, 4>; + def MSDB : TernaryRXF<"msdb", 0xED1F, z_any_fms, FP64, FP64, load, 8>; +} // Division. -def DEBR : BinaryRRE<"debr", 0xB30D, fdiv, FP32, FP32>; -def DDBR : BinaryRRE<"ddbr", 0xB31D, fdiv, FP64, FP64>; -def DXBR : BinaryRRE<"dxbr", 0xB34D, fdiv, FP128, FP128>; +let Uses = [FPC], mayRaiseFPException = 1 in { + def DEBR : BinaryRRE<"debr", 0xB30D, any_fdiv, FP32, FP32>; + def DDBR : BinaryRRE<"ddbr", 0xB31D, any_fdiv, FP64, FP64>; + def DXBR : BinaryRRE<"dxbr", 0xB34D, any_fdiv, FP128, FP128>; -def DEB : BinaryRXE<"deb", 0xED0D, fdiv, FP32, load, 4>; -def DDB : BinaryRXE<"ddb", 0xED1D, fdiv, FP64, load, 8>; + def DEB : BinaryRXE<"deb", 0xED0D, any_fdiv, FP32, load, 4>; + def DDB : BinaryRXE<"ddb", 0xED1D, any_fdiv, FP64, load, 8>; +} // Divide to integer. -let Defs = [CC] in { +let Uses = [FPC], mayRaiseFPException = 1, Defs = [CC] in { def DIEBR : TernaryRRFb<"diebr", 0xB353, FP32, FP32, FP32>; def DIDBR : TernaryRRFb<"didbr", 0xB35B, FP64, FP64, FP64>; } @@ -502,7 +536,7 @@ let Defs = [CC] in { // Comparisons //===----------------------------------------------------------------------===// -let Defs = [CC], CCValues = 0xF in { +let Uses = [FPC], mayRaiseFPException = 1, Defs = [CC], CCValues = 0xF in { def CEBR : CompareRRE<"cebr", 0xB309, z_fcmp, FP32, FP32>; def CDBR : CompareRRE<"cdbr", 0xB319, z_fcmp, FP64, FP64>; def CXBR : CompareRRE<"cxbr", 0xB349, z_fcmp, FP128, FP128>; @@ -532,20 +566,28 @@ let Defs = [CC], CCValues = 0xC in { let hasSideEffects = 1 in { let mayLoad = 1, mayStore = 1 in { // TODO: EFPC and SFPC do not touch memory at all - def EFPC : InherentRRE<"efpc", 0xB38C, GR32, int_s390_efpc>; - def STFPC : StoreInherentS<"stfpc", 0xB29C, storei<int_s390_efpc>, 4>; - - def SFPC : SideEffectUnaryRRE<"sfpc", 0xB384, GR32, int_s390_sfpc>; - def LFPC : SideEffectUnaryS<"lfpc", 0xB29D, loadu<int_s390_sfpc>, 4>; + let Uses = [FPC] in { + def EFPC : InherentRRE<"efpc", 0xB38C, GR32, int_s390_efpc>; + def STFPC : StoreInherentS<"stfpc", 0xB29C, storei<int_s390_efpc>, 4>; + } + + let Defs = [FPC] in { + def SFPC : SideEffectUnaryRRE<"sfpc", 0xB384, GR32, int_s390_sfpc>; + def LFPC : SideEffectUnaryS<"lfpc", 0xB29D, loadu<int_s390_sfpc>, 4>; + } } - def SFASR : SideEffectUnaryRRE<"sfasr", 0xB385, GR32, null_frag>; - def LFAS : SideEffectUnaryS<"lfas", 0xB2BD, null_frag, 4>; + let Defs = [FPC], mayRaiseFPException = 1 in { + def SFASR : SideEffectUnaryRRE<"sfasr", 0xB385, GR32, null_frag>; + def LFAS : SideEffectUnaryS<"lfas", 0xB2BD, null_frag, 4>; + } - def SRNMB : SideEffectAddressS<"srnmb", 0xB2B8, null_frag, shift12only>, - Requires<[FeatureFPExtension]>; - def SRNM : SideEffectAddressS<"srnm", 0xB299, null_frag, shift12only>; - def SRNMT : SideEffectAddressS<"srnmt", 0xB2B9, null_frag, shift12only>; + let Uses = [FPC], Defs = [FPC] in { + def SRNMB : SideEffectAddressS<"srnmb", 0xB2B8, null_frag, shift12only>, + Requires<[FeatureFPExtension]>; + def SRNM : SideEffectAddressS<"srnm", 0xB299, null_frag, shift12only>; + def SRNMT : SideEffectAddressS<"srnmt", 0xB2B9, null_frag, shift12only>; + } } //===----------------------------------------------------------------------===// diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZInstrFormats.td b/contrib/llvm/lib/Target/SystemZ/SystemZInstrFormats.td index 1e904a86ea79..2a1d14de3ddf 100644 --- a/contrib/llvm/lib/Target/SystemZ/SystemZInstrFormats.td +++ b/contrib/llvm/lib/Target/SystemZ/SystemZInstrFormats.td @@ -1,9 +1,8 @@ //==- SystemZInstrFormats.td - SystemZ Instruction Formats --*- tablegen -*-==// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// @@ -38,6 +37,12 @@ class InstSystemZ<int size, dag outs, dag ins, string asmstr, string OpKey = ""; string OpType = "none"; + // MemKey identifies a targe reg-mem opcode, while MemType can be either + // "pseudo" or "target". This is used to map a pseduo memory instruction to + // its corresponding target opcode. See comment at MemFoldPseudo. + string MemKey = ""; + string MemType = "none"; + // Many distinct-operands instructions have older 2-operand equivalents. // NumOpsKey uniquely identifies one of these 2-operand and 3-operand pairs, // with NumOpsValue being "2" or "3" as appropriate. @@ -121,7 +126,8 @@ def getDisp20Opcode : InstrMapping { let ValueCols = [["20"]]; } -// Return the memory form of a register instruction. +// Return the memory form of a register instruction. Note that this may +// return a MemFoldPseudo instruction (see below). def getMemOpcode : InstrMapping { let FilterClass = "InstSystemZ"; let RowFields = ["OpKey"]; @@ -130,13 +136,22 @@ def getMemOpcode : InstrMapping { let ValueCols = [["mem"]]; } -// Return the 3-operand form of a 2-operand instruction. -def getThreeOperandOpcode : InstrMapping { +// Return the target memory instruction for a MemFoldPseudo. +def getTargetMemOpcode : InstrMapping { + let FilterClass = "InstSystemZ"; + let RowFields = ["MemKey"]; + let ColFields = ["MemType"]; + let KeyCol = ["pseudo"]; + let ValueCols = [["target"]]; +} + +// Return the 2-operand form of a 3-operand instruction. +def getTwoOperandOpcode : InstrMapping { let FilterClass = "InstSystemZ"; let RowFields = ["NumOpsKey"]; let ColFields = ["NumOpsValue"]; - let KeyCol = ["2"]; - let ValueCols = [["3"]]; + let KeyCol = ["3"]; + let ValueCols = [["2"]]; } //===----------------------------------------------------------------------===// @@ -1399,13 +1414,15 @@ class InstVRRi<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern> bits<4> R1; bits<5> V2; bits<4> M3; + bits<4> M4; let Inst{47-40} = op{15-8}; let Inst{39-36} = R1; let Inst{35-32} = V2{3-0}; let Inst{31-24} = 0; let Inst{23-20} = M3; - let Inst{19-12} = 0; + let Inst{19-16} = M4; + let Inst{15-12} = 0; let Inst{11} = 0; let Inst{10} = V2{4}; let Inst{9-8} = 0; @@ -2410,11 +2427,16 @@ class LoadMultipleSSe<string mnemonic, bits<8> opcode, RegisterOperand cls> let mayLoad = 1; } -class LoadMultipleVRSa<string mnemonic, bits<16> opcode> - : InstVRSa<opcode, (outs VR128:$V1, VR128:$V3), (ins bdaddr12only:$BD2), - mnemonic#"\t$V1, $V3, $BD2", []> { - let M4 = 0; - let mayLoad = 1; +multiclass LoadMultipleVRSaAlign<string mnemonic, bits<16> opcode> { + let mayLoad = 1 in { + def Align : InstVRSa<opcode, (outs VR128:$V1, VR128:$V3), + (ins bdaddr12only:$BD2, imm32zx4:$M4), + mnemonic#"\t$V1, $V3, $BD2, $M4", []>; + let M4 = 0 in + def "" : InstVRSa<opcode, (outs VR128:$V1, VR128:$V3), + (ins bdaddr12only:$BD2), + mnemonic#"\t$V1, $V3, $BD2", []>; + } } class StoreRILPC<string mnemonic, bits<12> opcode, SDPatternOperator operator, @@ -2469,12 +2491,29 @@ class StoreVRX<string mnemonic, bits<16> opcode, SDPatternOperator operator, TypedReg tr, bits<5> bytes, bits<4> type = 0> : InstVRX<opcode, (outs), (ins tr.op:$V1, bdxaddr12only:$XBD2), mnemonic#"\t$V1, $XBD2", - [(set (tr.vt tr.op:$V1), (operator bdxaddr12only:$XBD2))]> { + [(operator (tr.vt tr.op:$V1), bdxaddr12only:$XBD2)]> { let M3 = type; let mayStore = 1; let AccessBytes = bytes; } +class StoreVRXGeneric<string mnemonic, bits<16> opcode> + : InstVRX<opcode, (outs), (ins VR128:$V1, bdxaddr12only:$XBD2, imm32zx4:$M3), + mnemonic#"\t$V1, $XBD2, $M3", []> { + let mayStore = 1; +} + +multiclass StoreVRXAlign<string mnemonic, bits<16> opcode> { + let mayStore = 1, AccessBytes = 16 in { + def Align : InstVRX<opcode, (outs), + (ins VR128:$V1, bdxaddr12only:$XBD2, imm32zx4:$M3), + mnemonic#"\t$V1, $XBD2, $M3", []>; + let M3 = 0 in + def "" : InstVRX<opcode, (outs), (ins VR128:$V1, bdxaddr12only:$XBD2), + mnemonic#"\t$V1, $XBD2", []>; + } +} + class StoreLengthVRSb<string mnemonic, bits<16> opcode, SDPatternOperator operator, bits<5> bytes> : InstVRSb<opcode, (outs), (ins VR128:$V1, GR32:$R3, bdaddr12only:$BD2), @@ -2527,11 +2566,16 @@ multiclass StoreMultipleRSPair<string mnemonic, bits<8> rsOpcode, } } -class StoreMultipleVRSa<string mnemonic, bits<16> opcode> - : InstVRSa<opcode, (outs), (ins VR128:$V1, VR128:$V3, bdaddr12only:$BD2), - mnemonic#"\t$V1, $V3, $BD2", []> { - let M4 = 0; - let mayStore = 1; +multiclass StoreMultipleVRSaAlign<string mnemonic, bits<16> opcode> { + let mayStore = 1 in { + def Align : InstVRSa<opcode, (outs), (ins VR128:$V1, VR128:$V3, + bdaddr12only:$BD2, imm32zx4:$M4), + mnemonic#"\t$V1, $V3, $BD2, $M4", []>; + let M4 = 0 in + def "" : InstVRSa<opcode, (outs), (ins VR128:$V1, VR128:$V3, + bdaddr12only:$BD2), + mnemonic#"\t$V1, $V3, $BD2", []>; + } } // StoreSI* instructions are used to store an integer to memory, but the @@ -2925,6 +2969,17 @@ class UnaryVRXGeneric<string mnemonic, bits<16> opcode> let mayLoad = 1; } +multiclass UnaryVRXAlign<string mnemonic, bits<16> opcode> { + let mayLoad = 1, AccessBytes = 16 in { + def Align : InstVRX<opcode, (outs VR128:$V1), + (ins bdxaddr12only:$XBD2, imm32zx4:$M3), + mnemonic#"\t$V1, $XBD2, $M3", []>; + let M3 = 0 in + def "" : InstVRX<opcode, (outs VR128:$V1), (ins bdxaddr12only:$XBD2), + mnemonic#"\t$V1, $XBD2", []>; + } +} + class SideEffectBinaryRX<string mnemonic, bits<8> opcode, RegisterOperand cls> : InstRXa<opcode, (outs), (ins cls:$R1, bdxaddr12only:$XBD2), @@ -3067,6 +3122,8 @@ class BinaryRRFa<string mnemonic, bits<16> opcode, SDPatternOperator operator, mnemonic#"\t$R1, $R2, $R3", [(set cls1:$R1, (operator cls2:$R2, cls3:$R3))]> { let M4 = 0; + let OpKey = mnemonic#cls1; + let OpType = "reg"; } multiclass BinaryRRAndK<string mnemonic, bits<8> opcode1, bits<16> opcode2, @@ -3074,9 +3131,9 @@ multiclass BinaryRRAndK<string mnemonic, bits<8> opcode1, bits<16> opcode2, RegisterOperand cls2> { let NumOpsKey = mnemonic in { let NumOpsValue = "3" in - def K : BinaryRRFa<mnemonic#"k", opcode2, null_frag, cls1, cls1, cls2>, + def K : BinaryRRFa<mnemonic#"k", opcode2, operator, cls1, cls1, cls2>, Requires<[FeatureDistinctOps]>; - let NumOpsValue = "2", isConvertibleToThreeAddress = 1 in + let NumOpsValue = "2" in def "" : BinaryRR<mnemonic, opcode1, operator, cls1, cls2>; } } @@ -3086,9 +3143,9 @@ multiclass BinaryRREAndK<string mnemonic, bits<16> opcode1, bits<16> opcode2, RegisterOperand cls2> { let NumOpsKey = mnemonic in { let NumOpsValue = "3" in - def K : BinaryRRFa<mnemonic#"k", opcode2, null_frag, cls1, cls1, cls2>, + def K : BinaryRRFa<mnemonic#"k", opcode2, operator, cls1, cls1, cls2>, Requires<[FeatureDistinctOps]>; - let NumOpsValue = "2", isConvertibleToThreeAddress = 1 in + let NumOpsValue = "2" in def "" : BinaryRRE<mnemonic, opcode1, operator, cls1, cls2>; } } @@ -3102,6 +3159,11 @@ class BinaryRRFb<string mnemonic, bits<16> opcode, SDPatternOperator operator, let M4 = 0; } +class BinaryRRFc<string mnemonic, bits<16> opcode, + RegisterOperand cls1, RegisterOperand cls2> + : InstRRFc<opcode, (outs cls1:$R1), (ins cls2:$R2, imm32zx4:$M3), + mnemonic#"\t$R1, $R2, $M3", []>; + class BinaryMemRRFc<string mnemonic, bits<16> opcode, RegisterOperand cls1, RegisterOperand cls2, Immediate imm> : InstRRFc<opcode, (outs cls2:$R2, cls1:$R1), (ins cls1:$R1src, imm:$M3), @@ -3169,6 +3231,41 @@ multiclass CondBinaryRRFPair<string mnemonic, bits<16> opcode, def Asm : AsmCondBinaryRRF<mnemonic, opcode, cls1, cls2>; } +class CondBinaryRRFa<string mnemonic, bits<16> opcode, RegisterOperand cls1, + RegisterOperand cls2, RegisterOperand cls3> + : InstRRFa<opcode, (outs cls1:$R1), + (ins cls3:$R3, cls2:$R2, cond4:$valid, cond4:$M4), + mnemonic#"$M4\t$R1, $R2, $R3", + [(set cls1:$R1, (z_select_ccmask cls2:$R2, cls3:$R3, + cond4:$valid, cond4:$M4))]> { + let CCMaskLast = 1; +} + +// Like CondBinaryRRFa, but used for the raw assembly form. The condition-code +// mask is the third operand rather than being part of the mnemonic. +class AsmCondBinaryRRFa<string mnemonic, bits<16> opcode, RegisterOperand cls1, + RegisterOperand cls2, RegisterOperand cls3> + : InstRRFa<opcode, (outs cls1:$R1), (ins cls3:$R3, cls2:$R2, imm32zx4:$M4), + mnemonic#"\t$R1, $R2, $R3, $M4", []>; + +// Like CondBinaryRRFa, but with a fixed CC mask. +class FixedCondBinaryRRFa<CondVariant V, string mnemonic, bits<16> opcode, + RegisterOperand cls1, RegisterOperand cls2, + RegisterOperand cls3> + : InstRRFa<opcode, (outs cls1:$R1), (ins cls3:$R3, cls2:$R2), + mnemonic#V.suffix#"\t$R1, $R2, $R3", []> { + let isAsmParserOnly = V.alternate; + let M4 = V.ccmask; +} + +multiclass CondBinaryRRFaPair<string mnemonic, bits<16> opcode, + RegisterOperand cls1, RegisterOperand cls2, + RegisterOperand cls3> { + let isCodeGenOnly = 1 in + def "" : CondBinaryRRFa<mnemonic, opcode, cls1, cls2, cls3>; + def Asm : AsmCondBinaryRRFa<mnemonic, opcode, cls1, cls2, cls3>; +} + class BinaryRI<string mnemonic, bits<12> opcode, SDPatternOperator operator, RegisterOperand cls, Immediate imm> : InstRIa<opcode, (outs cls:$R1), (ins cls:$R1src, imm:$I2), @@ -3189,9 +3286,9 @@ multiclass BinaryRIAndK<string mnemonic, bits<12> opcode1, bits<16> opcode2, Immediate imm> { let NumOpsKey = mnemonic in { let NumOpsValue = "3" in - def K : BinaryRIE<mnemonic##"k", opcode2, null_frag, cls, imm>, + def K : BinaryRIE<mnemonic##"k", opcode2, operator, cls, imm>, Requires<[FeatureDistinctOps]>; - let NumOpsValue = "2", isConvertibleToThreeAddress = 1 in + let NumOpsValue = "2" in def "" : BinaryRI<mnemonic, opcode1, operator, cls, imm>; } } @@ -3266,9 +3363,9 @@ multiclass BinaryRSAndK<string mnemonic, bits<8> opcode1, bits<16> opcode2, SDPatternOperator operator, RegisterOperand cls> { let NumOpsKey = mnemonic in { let NumOpsValue = "3" in - def K : BinaryRSY<mnemonic##"k", opcode2, null_frag, cls>, + def K : BinaryRSY<mnemonic##"k", opcode2, operator, cls>, Requires<[FeatureDistinctOps]>; - let NumOpsValue = "2", isConvertibleToThreeAddress = 1 in + let NumOpsValue = "2" in def "" : BinaryRS<mnemonic, opcode1, operator, cls>; } } @@ -3563,7 +3660,9 @@ class BinaryVRRf<string mnemonic, bits<16> opcode, SDPatternOperator operator, class BinaryVRRi<string mnemonic, bits<16> opcode, RegisterOperand cls> : InstVRRi<opcode, (outs cls:$R1), (ins VR128:$V2, imm32zx4:$M3), - mnemonic#"\t$R1, $V2, $M3", []>; + mnemonic#"\t$R1, $V2, $M3", []> { + let M4 = 0; +} class BinaryVRSa<string mnemonic, bits<16> opcode, SDPatternOperator operator, TypedReg tr1, TypedReg tr2, bits<4> type> @@ -3941,6 +4040,17 @@ class SideEffectTernaryRRFa<string mnemonic, bits<16> opcode, let M4 = 0; } +class SideEffectTernaryMemMemRRFa<string mnemonic, bits<16> opcode, + RegisterOperand cls1, RegisterOperand cls2, + RegisterOperand cls3> + : InstRRFa<opcode, (outs cls1:$R1, cls2:$R2), + (ins cls1:$R1src, cls2:$R2src, cls3:$R3), + mnemonic#"\t$R1, $R2, $R3", []> { + let Constraints = "$R1 = $R1src, $R2 = $R2src"; + let DisableEncoding = "$R1src, $R2src"; + let M4 = 0; +} + class SideEffectTernaryRRFb<string mnemonic, bits<16> opcode, RegisterOperand cls1, RegisterOperand cls2, RegisterOperand cls3> @@ -4229,7 +4339,7 @@ class TernaryVRRcFloatGeneric<string mnemonic, bits<16> opcode> mnemonic#"\t$V1, $V2, $V3, $M4, $M5, $M6", []>; class TernaryVRRd<string mnemonic, bits<16> opcode, SDPatternOperator operator, - TypedReg tr1, TypedReg tr2, bits<4> type = 0> + TypedReg tr1, TypedReg tr2, bits<4> type = 0, bits<4> m6 = 0> : InstVRRd<opcode, (outs tr1.op:$V1), (ins tr2.op:$V2, tr2.op:$V3, tr1.op:$V4), mnemonic#"\t$V1, $V2, $V3, $V4", @@ -4237,7 +4347,7 @@ class TernaryVRRd<string mnemonic, bits<16> opcode, SDPatternOperator operator, (tr2.vt tr2.op:$V3), (tr1.vt tr1.op:$V4)))]> { let M5 = type; - let M6 = 0; + let M6 = m6; } class TernaryVRRdGeneric<string mnemonic, bits<16> opcode> @@ -4247,6 +4357,34 @@ class TernaryVRRdGeneric<string mnemonic, bits<16> opcode> let M6 = 0; } +// Ternary operation where the assembler mnemonic has an extra operand to +// optionally allow specifiying arbitrary M6 values. +multiclass TernaryExtraVRRd<string mnemonic, bits<16> opcode, + SDPatternOperator operator, + TypedReg tr1, TypedReg tr2, bits<4> type> { + let M5 = type, Defs = [CC] in + def "" : InstVRRd<opcode, (outs tr1.op:$V1), + (ins tr2.op:$V2, tr2.op:$V3, tr1.op:$V4, imm32zx4:$M6), + mnemonic#"\t$V1, $V2, $V3, $V4, $M6", []>; + def : Pat<(operator (tr2.vt tr2.op:$V2), (tr2.vt tr2.op:$V3), + (tr1.vt tr1.op:$V4)), + (!cast<Instruction>(NAME) tr2.op:$V2, tr2.op:$V3, tr1.op:$V4, 0)>; + def : InstAlias<mnemonic#"\t$V1, $V2, $V3, $V4", + (!cast<Instruction>(NAME) tr1.op:$V1, tr2.op:$V2, + tr2.op:$V3, tr1.op:$V4, 0)>; +} + +multiclass TernaryExtraVRRdGeneric<string mnemonic, bits<16> opcode> { + let Defs = [CC] in + def "" : InstVRRd<opcode, (outs VR128:$V1), + (ins VR128:$V2, VR128:$V3, VR128:$V4, + imm32zx4:$M5, imm32zx4:$M6), + mnemonic#"\t$V1, $V2, $V3, $V4, $M5, $M6", []>; + def : InstAlias<mnemonic#"\t$V1, $V2, $V3, $V4, $M5", + (!cast<Instruction>(NAME) VR128:$V1, VR128:$V2, VR128:$V3, + VR128:$V4, imm32zx4:$M5, 0)>; +} + class TernaryVRRe<string mnemonic, bits<16> opcode, SDPatternOperator operator, TypedReg tr1, TypedReg tr2, bits<4> m5 = 0, bits<4> type = 0> : InstVRRe<opcode, (outs tr1.op:$V1), @@ -4277,6 +4415,11 @@ class TernaryVRSb<string mnemonic, bits<16> opcode, SDPatternOperator operator, let M4 = type; } +class TernaryVRRi<string mnemonic, bits<16> opcode, RegisterOperand cls> + : InstVRRi<opcode, (outs cls:$R1), (ins VR128:$V2, + imm32zx4:$M3, imm32zx4:$M4), + mnemonic#"\t$R1, $V2, $M3, $M4", []>; + class TernaryVRSbGeneric<string mnemonic, bits<16> opcode> : InstVRSb<opcode, (outs VR128:$V1), (ins VR128:$V1src, GR64:$R3, shift12only:$BD2, imm32zx4:$M4), @@ -4594,14 +4737,31 @@ multiclass BinaryRIAndKPseudo<string key, SDPatternOperator operator, RegisterOperand cls, Immediate imm> { let NumOpsKey = key in { let NumOpsValue = "3" in - def K : BinaryRIEPseudo<null_frag, cls, imm>, + def K : BinaryRIEPseudo<operator, cls, imm>, Requires<[FeatureHighWord, FeatureDistinctOps]>; - let NumOpsValue = "2", isConvertibleToThreeAddress = 1 in + let NumOpsValue = "2" in def "" : BinaryRIPseudo<operator, cls, imm>, Requires<[FeatureHighWord]>; } } +// A pseudo that is used during register allocation when folding a memory +// operand. The 3-address register instruction with a spilled source cannot +// be converted directly to a target 2-address reg/mem instruction. +// Mapping: <INSN>R -> MemFoldPseudo -> <INSN> +class MemFoldPseudo<string mnemonic, RegisterOperand cls, bits<5> bytes, + AddressingMode mode> + : Pseudo<(outs cls:$R1), (ins cls:$R2, mode:$XBD2), []> { + let OpKey = mnemonic#"rk"#cls; + let OpType = "mem"; + let MemKey = mnemonic#cls; + let MemType = "pseudo"; + let mayLoad = 1; + let AccessBytes = bytes; + let HasIndex = 1; + let hasNoSchedulingInfo = 1; +} + // Like CompareRI, but expanded after RA depending on the choice of register. class CompareRIPseudo<SDPatternOperator operator, RegisterOperand cls, Immediate imm> @@ -4639,6 +4799,17 @@ class CondBinaryRRFPseudo<RegisterOperand cls1, RegisterOperand cls2> let CCMaskLast = 1; } +// Like CondBinaryRRFa, but expanded after RA depending on the choice of +// register. +class CondBinaryRRFaPseudo<RegisterOperand cls1, RegisterOperand cls2, + RegisterOperand cls3> + : Pseudo<(outs cls1:$R1), + (ins cls3:$R3, cls2:$R2, cond4:$valid, cond4:$M4), + [(set cls1:$R1, (z_select_ccmask cls2:$R2, cls3:$R3, + cond4:$valid, cond4:$M4))]> { + let CCMaskLast = 1; +} + // Like CondBinaryRIE, but expanded after RA depending on the choice of // register. class CondBinaryRIEPseudo<RegisterOperand cls, Immediate imm> @@ -4776,58 +4947,6 @@ class AtomicLoadWBinaryReg<SDPatternOperator operator> class AtomicLoadWBinaryImm<SDPatternOperator operator, Immediate imm> : AtomicLoadWBinary<operator, (i32 imm:$src2), imm>; -// Define an instruction that operates on two fixed-length blocks of memory, -// and associated pseudo instructions for operating on blocks of any size. -// The Sequence form uses a straight-line sequence of instructions and -// the Loop form uses a loop of length-256 instructions followed by -// another instruction to handle the excess. -multiclass MemorySS<string mnemonic, bits<8> opcode, - SDPatternOperator sequence, SDPatternOperator loop> { - def "" : SideEffectBinarySSa<mnemonic, opcode>; - let usesCustomInserter = 1, hasNoSchedulingInfo = 1, Defs = [CC] in { - def Sequence : Pseudo<(outs), (ins bdaddr12only:$dest, bdaddr12only:$src, - imm64:$length), - [(sequence bdaddr12only:$dest, bdaddr12only:$src, - imm64:$length)]>; - def Loop : Pseudo<(outs), (ins bdaddr12only:$dest, bdaddr12only:$src, - imm64:$length, GR64:$count256), - [(loop bdaddr12only:$dest, bdaddr12only:$src, - imm64:$length, GR64:$count256)]>; - } -} - -// The same, but setting a CC result as comparion operator. -multiclass CompareMemorySS<string mnemonic, bits<8> opcode, - SDPatternOperator sequence, SDPatternOperator loop> { - def "" : SideEffectBinarySSa<mnemonic, opcode>; - let usesCustomInserter = 1, hasNoSchedulingInfo = 1 in { - def Sequence : Pseudo<(outs), (ins bdaddr12only:$dest, bdaddr12only:$src, - imm64:$length), - [(set CC, (sequence bdaddr12only:$dest, bdaddr12only:$src, - imm64:$length))]>; - def Loop : Pseudo<(outs), (ins bdaddr12only:$dest, bdaddr12only:$src, - imm64:$length, GR64:$count256), - [(set CC, (loop bdaddr12only:$dest, bdaddr12only:$src, - imm64:$length, GR64:$count256))]>; - } -} - -// Define an instruction that operates on two strings, both terminated -// by the character in R0. The instruction processes a CPU-determinated -// number of bytes at a time and sets CC to 3 if the instruction needs -// to be repeated. Also define a pseudo instruction that represents -// the full loop (the main instruction plus the branch on CC==3). -multiclass StringRRE<string mnemonic, bits<16> opcode, - SDPatternOperator operator> { - let Uses = [R0L] in - def "" : SideEffectBinaryMemMemRRE<mnemonic, opcode, GR64, GR64>; - let usesCustomInserter = 1, hasNoSchedulingInfo = 1 in - def Loop : Pseudo<(outs GR64:$end), - (ins GR64:$start1, GR64:$start2, GR32:$char), - [(set GR64:$end, (operator GR64:$start1, GR64:$start2, - GR32:$char))]>; -} - // A pseudo instruction that is a direct alias of a real instruction. // These aliases are used in cases where a particular register operand is // fixed or where the same instruction is used with different register sizes. @@ -4893,3 +5012,90 @@ class RotateSelectAliasRIEf<RegisterOperand cls1, RegisterOperand cls2> imm32zx6:$I5), []> { let Constraints = "$R1 = $R1src"; } + +//===----------------------------------------------------------------------===// +// Multiclasses that emit both real and pseudo instructions +//===----------------------------------------------------------------------===// + +multiclass BinaryRXYAndPseudo<string mnemonic, bits<16> opcode, + SDPatternOperator operator, RegisterOperand cls, + SDPatternOperator load, bits<5> bytes, + AddressingMode mode = bdxaddr20only> { + + def "" : BinaryRXY<mnemonic, opcode, operator, cls, load, bytes, mode> { + let MemKey = mnemonic#cls; + let MemType = "target"; + } + let Has20BitOffset = 1 in + def _MemFoldPseudo : MemFoldPseudo<mnemonic, cls, bytes, mode>; +} + +multiclass BinaryRXPairAndPseudo<string mnemonic, bits<8> rxOpcode, + bits<16> rxyOpcode, SDPatternOperator operator, + RegisterOperand cls, + SDPatternOperator load, bits<5> bytes> { + let DispKey = mnemonic ## #cls in { + def "" : BinaryRX<mnemonic, rxOpcode, operator, cls, load, bytes, + bdxaddr12pair> { + let DispSize = "12"; + let MemKey = mnemonic#cls; + let MemType = "target"; + } + let DispSize = "20" in + def Y : BinaryRXY<mnemonic#"y", rxyOpcode, operator, cls, load, + bytes, bdxaddr20pair>; + } + def _MemFoldPseudo : MemFoldPseudo<mnemonic, cls, bytes, bdxaddr12pair>; +} + +// Define an instruction that operates on two fixed-length blocks of memory, +// and associated pseudo instructions for operating on blocks of any size. +// The Sequence form uses a straight-line sequence of instructions and +// the Loop form uses a loop of length-256 instructions followed by +// another instruction to handle the excess. +multiclass MemorySS<string mnemonic, bits<8> opcode, + SDPatternOperator sequence, SDPatternOperator loop> { + def "" : SideEffectBinarySSa<mnemonic, opcode>; + let usesCustomInserter = 1, hasNoSchedulingInfo = 1, Defs = [CC] in { + def Sequence : Pseudo<(outs), (ins bdaddr12only:$dest, bdaddr12only:$src, + imm64:$length), + [(sequence bdaddr12only:$dest, bdaddr12only:$src, + imm64:$length)]>; + def Loop : Pseudo<(outs), (ins bdaddr12only:$dest, bdaddr12only:$src, + imm64:$length, GR64:$count256), + [(loop bdaddr12only:$dest, bdaddr12only:$src, + imm64:$length, GR64:$count256)]>; + } +} + +// The same, but setting a CC result as comparion operator. +multiclass CompareMemorySS<string mnemonic, bits<8> opcode, + SDPatternOperator sequence, SDPatternOperator loop> { + def "" : SideEffectBinarySSa<mnemonic, opcode>; + let usesCustomInserter = 1, hasNoSchedulingInfo = 1 in { + def Sequence : Pseudo<(outs), (ins bdaddr12only:$dest, bdaddr12only:$src, + imm64:$length), + [(set CC, (sequence bdaddr12only:$dest, bdaddr12only:$src, + imm64:$length))]>; + def Loop : Pseudo<(outs), (ins bdaddr12only:$dest, bdaddr12only:$src, + imm64:$length, GR64:$count256), + [(set CC, (loop bdaddr12only:$dest, bdaddr12only:$src, + imm64:$length, GR64:$count256))]>; + } +} + +// Define an instruction that operates on two strings, both terminated +// by the character in R0. The instruction processes a CPU-determinated +// number of bytes at a time and sets CC to 3 if the instruction needs +// to be repeated. Also define a pseudo instruction that represents +// the full loop (the main instruction plus the branch on CC==3). +multiclass StringRRE<string mnemonic, bits<16> opcode, + SDPatternOperator operator> { + let Uses = [R0L] in + def "" : SideEffectBinaryMemMemRRE<mnemonic, opcode, GR64, GR64>; + let usesCustomInserter = 1, hasNoSchedulingInfo = 1 in + def Loop : Pseudo<(outs GR64:$end), + (ins GR64:$start1, GR64:$start2, GR32:$char), + [(set GR64:$end, (operator GR64:$start1, GR64:$start2, + GR32:$char))]>; +} diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZInstrHFP.td b/contrib/llvm/lib/Target/SystemZ/SystemZInstrHFP.td index 6d5b4b92f650..2e3c9932d621 100644 --- a/contrib/llvm/lib/Target/SystemZ/SystemZInstrHFP.td +++ b/contrib/llvm/lib/Target/SystemZ/SystemZInstrHFP.td @@ -1,9 +1,8 @@ //==- SystemZInstrHFP.td - Floating-point SystemZ instructions -*- tblgen-*-==// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp b/contrib/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp index 8aab5c2c4999..57c1cf4ec70a 100644 --- a/contrib/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp +++ b/contrib/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp @@ -1,9 +1,8 @@ //===-- SystemZInstrInfo.cpp - SystemZ instruction information ------------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -169,11 +168,13 @@ void SystemZInstrInfo::expandRIEPseudo(MachineInstr &MI, unsigned LowOpcode, if (!DestIsHigh && !SrcIsHigh) MI.setDesc(get(LowOpcodeK)); else { - emitGRX32Move(*MI.getParent(), MI, MI.getDebugLoc(), DestReg, SrcReg, - SystemZ::LR, 32, MI.getOperand(1).isKill(), - MI.getOperand(1).isUndef()); + if (DestReg != SrcReg) { + emitGRX32Move(*MI.getParent(), MI, MI.getDebugLoc(), DestReg, SrcReg, + SystemZ::LR, 32, MI.getOperand(1).isKill(), + MI.getOperand(1).isUndef()); + MI.getOperand(1).setReg(DestReg); + } MI.setDesc(get(DestIsHigh ? HighOpcode : LowOpcode)); - MI.getOperand(1).setReg(DestReg); MI.tieOperands(0, 1); } } @@ -222,6 +223,65 @@ void SystemZInstrInfo::expandLOCRPseudo(MachineInstr &MI, unsigned LowOpcode, // correctly. This change is defered to the SystemZExpandPseudo pass. } +// MI is a select pseudo instruction. Replace it with LowOpcode if source +// and destination are all low GR32s and HighOpcode if source and destination +// are all high GR32s. Otherwise, use the two-operand MixedOpcode. +void SystemZInstrInfo::expandSELRPseudo(MachineInstr &MI, unsigned LowOpcode, + unsigned HighOpcode, + unsigned MixedOpcode) const { + unsigned DestReg = MI.getOperand(0).getReg(); + unsigned Src1Reg = MI.getOperand(1).getReg(); + unsigned Src2Reg = MI.getOperand(2).getReg(); + bool DestIsHigh = isHighReg(DestReg); + bool Src1IsHigh = isHighReg(Src1Reg); + bool Src2IsHigh = isHighReg(Src2Reg); + + // If sources and destination aren't all high or all low, we may be able to + // simplify the operation by moving one of the sources to the destination + // first. But only if this doesn't clobber the other source. + if (DestReg != Src1Reg && DestReg != Src2Reg) { + if (DestIsHigh != Src1IsHigh) { + emitGRX32Move(*MI.getParent(), MI, MI.getDebugLoc(), DestReg, Src1Reg, + SystemZ::LR, 32, MI.getOperand(1).isKill(), + MI.getOperand(1).isUndef()); + MI.getOperand(1).setReg(DestReg); + Src1Reg = DestReg; + Src1IsHigh = DestIsHigh; + } else if (DestIsHigh != Src2IsHigh) { + emitGRX32Move(*MI.getParent(), MI, MI.getDebugLoc(), DestReg, Src2Reg, + SystemZ::LR, 32, MI.getOperand(2).isKill(), + MI.getOperand(2).isUndef()); + MI.getOperand(2).setReg(DestReg); + Src2Reg = DestReg; + Src2IsHigh = DestIsHigh; + } + } + + // If the destination (now) matches one source, prefer this to be first. + if (DestReg != Src1Reg && DestReg == Src2Reg) { + commuteInstruction(MI, false, 1, 2); + std::swap(Src1Reg, Src2Reg); + std::swap(Src1IsHigh, Src2IsHigh); + } + + if (!DestIsHigh && !Src1IsHigh && !Src2IsHigh) + MI.setDesc(get(LowOpcode)); + else if (DestIsHigh && Src1IsHigh && Src2IsHigh) + MI.setDesc(get(HighOpcode)); + else { + // Given the simplifcation above, we must already have a two-operand case. + assert (DestReg == Src1Reg); + MI.setDesc(get(MixedOpcode)); + MI.tieOperands(0, 1); + LOCRMuxJumps++; + } + + // If we were unable to implement the pseudo with a single instruction, we + // need to convert it back into a branch sequence. This cannot be done here + // since the caller of expandPostRAPseudo does not handle changes to the CFG + // correctly. This change is defered to the SystemZExpandPseudo pass. +} + // MI is an RR-style pseudo instruction that zero-extends the low Size bits // of one GRX32 into another. Replace it with LowOpcode if both operands // are low registers, otherwise use RISB[LH]G. @@ -311,6 +371,10 @@ MachineInstr *SystemZInstrInfo::commuteInstructionImpl(MachineInstr &MI, }; switch (MI.getOpcode()) { + case SystemZ::SELRMux: + case SystemZ::SELFHR: + case SystemZ::SELR: + case SystemZ::SELGR: case SystemZ::LOCRMux: case SystemZ::LOCFHR: case SystemZ::LOCR: @@ -605,7 +669,9 @@ void SystemZInstrInfo::insertSelect(MachineBasicBlock &MBB, unsigned Opc; if (SystemZ::GRX32BitRegClass.hasSubClassEq(RC)) { - if (STI.hasLoadStoreOnCond2()) + if (STI.hasMiscellaneousExtensions3()) + Opc = SystemZ::SELRMux; + else if (STI.hasLoadStoreOnCond2()) Opc = SystemZ::LOCRMux; else { Opc = SystemZ::LOCR; @@ -617,9 +683,12 @@ void SystemZInstrInfo::insertSelect(MachineBasicBlock &MBB, TrueReg = TReg; FalseReg = FReg; } - } else if (SystemZ::GR64BitRegClass.hasSubClassEq(RC)) - Opc = SystemZ::LOCGR; - else + } else if (SystemZ::GR64BitRegClass.hasSubClassEq(RC)) { + if (STI.hasMiscellaneousExtensions3()) + Opc = SystemZ::SELGR; + else + Opc = SystemZ::LOCGR; + } else llvm_unreachable("Invalid register class"); BuildMI(MBB, I, DL, get(Opc), DstReg) @@ -642,7 +711,11 @@ bool SystemZInstrInfo::FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI, unsigned NewUseOpc; unsigned UseIdx; int CommuteIdx = -1; + bool TieOps = false; switch (UseOpc) { + case SystemZ::SELRMux: + TieOps = true; + LLVM_FALLTHROUGH; case SystemZ::LOCRMux: if (!STI.hasLoadStoreOnCond2()) return false; @@ -654,6 +727,9 @@ bool SystemZInstrInfo::FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI, else return false; break; + case SystemZ::SELGR: + TieOps = true; + LLVM_FALLTHROUGH; case SystemZ::LOCGR: if (!STI.hasLoadStoreOnCond2()) return false; @@ -675,6 +751,8 @@ bool SystemZInstrInfo::FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI, bool DeleteDef = MRI->hasOneNonDBGUse(Reg); UseMI.setDesc(get(NewUseOpc)); + if (TieOps) + UseMI.tieOperands(0, 1); UseMI.getOperand(UseIdx).ChangeToImmediate(ImmVal); if (DeleteDef) DefMI.eraseFromParent(); @@ -958,73 +1036,13 @@ static void transferDeadCC(MachineInstr *OldMI, MachineInstr *NewMI) { } } -// Used to return from convertToThreeAddress after replacing two-address -// instruction OldMI with three-address instruction NewMI. -static MachineInstr *finishConvertToThreeAddress(MachineInstr *OldMI, - MachineInstr *NewMI, - LiveVariables *LV) { - if (LV) { - unsigned NumOps = OldMI->getNumOperands(); - for (unsigned I = 1; I < NumOps; ++I) { - MachineOperand &Op = OldMI->getOperand(I); - if (Op.isReg() && Op.isKill()) - LV->replaceKillInstruction(Op.getReg(), *OldMI, *NewMI); - } - } - transferDeadCC(OldMI, NewMI); - return NewMI; -} - MachineInstr *SystemZInstrInfo::convertToThreeAddress( MachineFunction::iterator &MFI, MachineInstr &MI, LiveVariables *LV) const { MachineBasicBlock *MBB = MI.getParent(); - MachineFunction *MF = MBB->getParent(); - MachineRegisterInfo &MRI = MF->getRegInfo(); - - unsigned Opcode = MI.getOpcode(); - unsigned NumOps = MI.getNumOperands(); - - // Try to convert something like SLL into SLLK, if supported. - // We prefer to keep the two-operand form where possible both - // because it tends to be shorter and because some instructions - // have memory forms that can be used during spilling. - if (STI.hasDistinctOps()) { - MachineOperand &Dest = MI.getOperand(0); - MachineOperand &Src = MI.getOperand(1); - unsigned DestReg = Dest.getReg(); - unsigned SrcReg = Src.getReg(); - // AHIMux is only really a three-operand instruction when both operands - // are low registers. Try to constrain both operands to be low if - // possible. - if (Opcode == SystemZ::AHIMux && - TargetRegisterInfo::isVirtualRegister(DestReg) && - TargetRegisterInfo::isVirtualRegister(SrcReg) && - MRI.getRegClass(DestReg)->contains(SystemZ::R1L) && - MRI.getRegClass(SrcReg)->contains(SystemZ::R1L)) { - MRI.constrainRegClass(DestReg, &SystemZ::GR32BitRegClass); - MRI.constrainRegClass(SrcReg, &SystemZ::GR32BitRegClass); - } - int ThreeOperandOpcode = SystemZ::getThreeOperandOpcode(Opcode); - if (ThreeOperandOpcode >= 0) { - // Create three address instruction without adding the implicit - // operands. Those will instead be copied over from the original - // instruction by the loop below. - MachineInstrBuilder MIB( - *MF, MF->CreateMachineInstr(get(ThreeOperandOpcode), MI.getDebugLoc(), - /*NoImplicit=*/true)); - MIB.add(Dest); - // Keep the kill state, but drop the tied flag. - MIB.addReg(Src.getReg(), getKillRegState(Src.isKill()), Src.getSubReg()); - // Keep the remaining operands as-is. - for (unsigned I = 2; I < NumOps; ++I) - MIB.add(MI.getOperand(I)); - MBB->insert(MI, MIB); - return finishConvertToThreeAddress(&MI, MIB, LV); - } - } // Try to convert an AND into an RISBG-type instruction. - if (LogicOp And = interpretAndImmediate(Opcode)) { + // TODO: It might be beneficial to select RISBG and shorten to AND instead. + if (LogicOp And = interpretAndImmediate(MI.getOpcode())) { uint64_t Imm = MI.getOperand(2).getImm() << And.ImmLSB; // AND IMMEDIATE leaves the other bits of the register unchanged. Imm |= allOnes(And.RegSize) & ~(allOnes(And.ImmSize) << And.ImmLSB); @@ -1052,7 +1070,16 @@ MachineInstr *SystemZInstrInfo::convertToThreeAddress( .addImm(Start) .addImm(End + 128) .addImm(0); - return finishConvertToThreeAddress(&MI, MIB, LV); + if (LV) { + unsigned NumOps = MI.getNumOperands(); + for (unsigned I = 1; I < NumOps; ++I) { + MachineOperand &Op = MI.getOperand(I); + if (Op.isReg() && Op.isKill()) + LV->replaceKillInstruction(Op.getReg(), MI, *MIB); + } + } + transferDeadCC(&MI, MIB); + return MIB; } } return nullptr; @@ -1061,7 +1088,7 @@ MachineInstr *SystemZInstrInfo::convertToThreeAddress( MachineInstr *SystemZInstrInfo::foldMemoryOperandImpl( MachineFunction &MF, MachineInstr &MI, ArrayRef<unsigned> Ops, MachineBasicBlock::iterator InsertPt, int FrameIndex, - LiveIntervals *LIS) const { + LiveIntervals *LIS, VirtRegMap *VRM) const { const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); const MachineFrameInfo &MFI = MF.getFrameInfo(); unsigned Size = MFI.getObjectSize(FrameIndex); @@ -1189,7 +1216,7 @@ MachineInstr *SystemZInstrInfo::foldMemoryOperandImpl( // MVCs that turn out to be redundant. if (OpNum == 0 && MI.hasOneMemOperand()) { MachineMemOperand *MMO = *MI.memoperands_begin(); - if (MMO->getSize() == Size && !MMO->isVolatile()) { + if (MMO->getSize() == Size && !MMO->isVolatile() && !MMO->isAtomic()) { // Handle conversion of loads. if (isSimpleBD12Move(&MI, SystemZII::SimpleBDXLoad)) { return BuildMI(*InsertPt->getParent(), InsertPt, MI.getDebugLoc(), @@ -1215,12 +1242,37 @@ MachineInstr *SystemZInstrInfo::foldMemoryOperandImpl( } } - // If the spilled operand is the final one, try to change <INSN>R - // into <INSN>. + // If the spilled operand is the final one or the instruction is + // commutable, try to change <INSN>R into <INSN>. + unsigned NumOps = MI.getNumExplicitOperands(); int MemOpcode = SystemZ::getMemOpcode(Opcode); + + // See if this is a 3-address instruction that is convertible to 2-address + // and suitable for folding below. Only try this with virtual registers + // and a provided VRM (during regalloc). + bool NeedsCommute = false; + if (SystemZ::getTwoOperandOpcode(Opcode) != -1 && MemOpcode != -1) { + if (VRM == nullptr) + MemOpcode = -1; + else { + assert(NumOps == 3 && "Expected two source registers."); + Register DstReg = MI.getOperand(0).getReg(); + Register DstPhys = + (TRI->isVirtualRegister(DstReg) ? VRM->getPhys(DstReg) : DstReg); + Register SrcReg = (OpNum == 2 ? MI.getOperand(1).getReg() + : ((OpNum == 1 && MI.isCommutable()) + ? MI.getOperand(2).getReg() + : Register())); + if (DstPhys && !SystemZ::GRH32BitRegClass.contains(DstPhys) && SrcReg && + TRI->isVirtualRegister(SrcReg) && DstPhys == VRM->getPhys(SrcReg)) + NeedsCommute = (OpNum == 1); + else + MemOpcode = -1; + } + } + if (MemOpcode >= 0) { - unsigned NumOps = MI.getNumExplicitOperands(); - if (OpNum == NumOps - 1) { + if ((OpNum == NumOps - 1) || NeedsCommute) { const MCInstrDesc &MemDesc = get(MemOpcode); uint64_t AccessBytes = SystemZII::getAccessSize(MemDesc.TSFlags); assert(AccessBytes != 0 && "Size of access should be known"); @@ -1228,8 +1280,12 @@ MachineInstr *SystemZInstrInfo::foldMemoryOperandImpl( uint64_t Offset = Size - AccessBytes; MachineInstrBuilder MIB = BuildMI(*InsertPt->getParent(), InsertPt, MI.getDebugLoc(), get(MemOpcode)); - for (unsigned I = 0; I < OpNum; ++I) - MIB.add(MI.getOperand(I)); + MIB.add(MI.getOperand(0)); + if (NeedsCommute) + MIB.add(MI.getOperand(2)); + else + for (unsigned I = 1; I < OpNum; ++I) + MIB.add(MI.getOperand(I)); MIB.addFrameIndex(FrameIndex).addImm(Offset); if (MemDesc.TSFlags & SystemZII::HasIndex) MIB.addReg(0); @@ -1306,6 +1362,11 @@ bool SystemZInstrInfo::expandPostRAPseudo(MachineInstr &MI) const { expandLOCRPseudo(MI, SystemZ::LOCR, SystemZ::LOCFHR); return true; + case SystemZ::SELRMux: + expandSELRPseudo(MI, SystemZ::SELR, SystemZ::SELFHR, + SystemZ::LOCRMux); + return true; + case SystemZ::STCMux: expandRXYPseudo(MI, SystemZ::STC, SystemZ::STCH); return true; @@ -1432,7 +1493,7 @@ bool SystemZInstrInfo::expandPostRAPseudo(MachineInstr &MI) const { } unsigned SystemZInstrInfo::getInstSizeInBytes(const MachineInstr &MI) const { - if (MI.getOpcode() == TargetOpcode::INLINEASM) { + if (MI.isInlineAsm()) { const MachineFunction *MF = MI.getParent()->getParent(); const char *AsmStr = MI.getOperand(0).getSymbolName(); return getInlineAsmLength(AsmStr, *MF->getTarget().getMCAsmInfo()); @@ -1783,7 +1844,8 @@ void SystemZInstrInfo::loadImmediate(MachineBasicBlock &MBB, } bool SystemZInstrInfo:: -areMemAccessesTriviallyDisjoint(MachineInstr &MIa, MachineInstr &MIb, +areMemAccessesTriviallyDisjoint(const MachineInstr &MIa, + const MachineInstr &MIb, AliasAnalysis *AA) const { if (!MIa.hasOneMemOperand() || !MIb.hasOneMemOperand()) diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZInstrInfo.h b/contrib/llvm/lib/Target/SystemZ/SystemZInstrInfo.h index 0392430ed872..2edde175542e 100644 --- a/contrib/llvm/lib/Target/SystemZ/SystemZInstrInfo.h +++ b/contrib/llvm/lib/Target/SystemZ/SystemZInstrInfo.h @@ -1,9 +1,8 @@ //===-- SystemZInstrInfo.h - SystemZ instruction information ----*- C++ -*-===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -142,6 +141,11 @@ enum FusedCompareType { } // end namespace SystemZII +namespace SystemZ { +int getTwoOperandOpcode(uint16_t Opcode); +int getTargetMemOpcode(uint16_t Opcode); +} + class SystemZInstrInfo : public SystemZGenInstrInfo { const SystemZRegisterInfo RI; SystemZSubtarget &STI; @@ -158,6 +162,8 @@ class SystemZInstrInfo : public SystemZGenInstrInfo { unsigned HighOpcode) const; void expandLOCRPseudo(MachineInstr &MI, unsigned LowOpcode, unsigned HighOpcode) const; + void expandSELRPseudo(MachineInstr &MI, unsigned LowOpcode, + unsigned HighOpcode, unsigned MixedOpcode) const; void expandZExtPseudo(MachineInstr &MI, unsigned LowOpcode, unsigned Size) const; void expandLoadStackGuard(MachineInstr *MI) const; @@ -249,7 +255,8 @@ public: foldMemoryOperandImpl(MachineFunction &MF, MachineInstr &MI, ArrayRef<unsigned> Ops, MachineBasicBlock::iterator InsertPt, int FrameIndex, - LiveIntervals *LIS = nullptr) const override; + LiveIntervals *LIS = nullptr, + VirtRegMap *VRM = nullptr) const override; MachineInstr *foldMemoryOperandImpl( MachineFunction &MF, MachineInstr &MI, ArrayRef<unsigned> Ops, MachineBasicBlock::iterator InsertPt, MachineInstr &LoadMI, @@ -314,7 +321,8 @@ public: // addresses. This function returns true if two MIs access different // memory addresses and false otherwise. bool - areMemAccessesTriviallyDisjoint(MachineInstr &MIa, MachineInstr &MIb, + areMemAccessesTriviallyDisjoint(const MachineInstr &MIa, + const MachineInstr &MIb, AliasAnalysis *AA = nullptr) const override; }; diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZInstrInfo.td b/contrib/llvm/lib/Target/SystemZ/SystemZInstrInfo.td index 8d3b1011d0a7..91856893e3bd 100644 --- a/contrib/llvm/lib/Target/SystemZ/SystemZInstrInfo.td +++ b/contrib/llvm/lib/Target/SystemZ/SystemZInstrInfo.td @@ -1,9 +1,8 @@ //===-- SystemZInstrInfo.td - General SystemZ instructions ----*- tblgen-*-===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// @@ -256,7 +255,7 @@ let isCall = 1, Defs = [CC] in { } // Regular calls. -let isCall = 1, Defs = [R14D, CC] in { +let isCall = 1, Defs = [R14D, CC], Uses = [FPC] in { def CallBRASL : Alias<6, (outs), (ins pcrel32:$I2, variable_ops), [(z_call pcrel32:$I2)]>; def CallBASR : Alias<2, (outs), (ins ADDR64:$R2, variable_ops), @@ -362,9 +361,6 @@ defm CondStore64 : CondStores<GR64, nonvolatile_store, //===----------------------------------------------------------------------===// // Register moves. -// Expands to LR, RISBHG or RISBLG, depending on the choice of registers. -def LRMux : UnaryRRPseudo<"lr", null_frag, GRX32, GRX32>, - Requires<[FeatureHighWord]>; def LR : UnaryRR <"lr", 0x18, null_frag, GR32, GR32>; def LGR : UnaryRRE<"lgr", 0xB904, null_frag, GR64, GR64>; @@ -478,6 +474,11 @@ let mayLoad = 1, mayStore = 1, Defs = [CC] in { def MVCLU : SideEffectTernaryMemMemRSY<"mvclu", 0xEB8E, GR128, GR128>; } +// Move right. +let Predicates = [FeatureMiscellaneousExtensions3], + mayLoad = 1, mayStore = 1, Uses = [R0L] in + def MVCRL : SideEffectBinarySSE<"mvcrl", 0xE50A>; + // String moves. let mayLoad = 1, mayStore = 1, Defs = [CC] in defm MVST : StringRRE<"mvst", 0xB255, z_stpcpy>; @@ -486,6 +487,29 @@ let mayLoad = 1, mayStore = 1, Defs = [CC] in // Conditional move instructions //===----------------------------------------------------------------------===// +let Predicates = [FeatureMiscellaneousExtensions3], Uses = [CC] in { + // Select. + let isCommutable = 1 in { + // Expands to SELR or SELFHR or a branch-and-move sequence, + // depending on the choice of registers. + def SELRMux : CondBinaryRRFaPseudo<GRX32, GRX32, GRX32>; + defm SELFHR : CondBinaryRRFaPair<"selfhr", 0xB9C0, GRH32, GRH32, GRH32>; + defm SELR : CondBinaryRRFaPair<"selr", 0xB9F0, GR32, GR32, GR32>; + defm SELGR : CondBinaryRRFaPair<"selgr", 0xB9E3, GR64, GR64, GR64>; + } + + // Define AsmParser extended mnemonics for each general condition-code mask. + foreach V = [ "E", "NE", "H", "NH", "L", "NL", "HE", "NHE", "LE", "NLE", + "Z", "NZ", "P", "NP", "M", "NM", "LH", "NLH", "O", "NO" ] in { + def SELRAsm#V : FixedCondBinaryRRFa<CV<V>, "selr", 0xB9F0, + GR32, GR32, GR32>; + def SELFHRAsm#V : FixedCondBinaryRRFa<CV<V>, "selfhr", 0xB9C0, + GRH32, GRH32, GRH32>; + def SELGRAsm#V : FixedCondBinaryRRFa<CV<V>, "selgr", 0xB9E3, + GR64, GR64, GR64>; + } +} + let Predicates = [FeatureLoadStoreOnCond2], Uses = [CC] in { // Load immediate on condition. Matched via DAG pattern and created // by the PeepholeOptimizer via FoldImmediate. @@ -920,11 +944,11 @@ let Defs = [CC], CCValues = 0xF, CompareZeroCCMask = 0x8 in { // Addition of memory. defm AH : BinaryRXPair<"ah", 0x4A, 0xE37A, z_sadd, GR32, asextloadi16, 2>; - defm A : BinaryRXPair<"a", 0x5A, 0xE35A, z_sadd, GR32, load, 4>; + defm A : BinaryRXPairAndPseudo<"a", 0x5A, 0xE35A, z_sadd, GR32, load, 4>; def AGH : BinaryRXY<"agh", 0xE338, z_sadd, GR64, asextloadi16, 2>, Requires<[FeatureMiscellaneousExtensions2]>; def AGF : BinaryRXY<"agf", 0xE318, z_sadd, GR64, asextloadi32, 4>; - def AG : BinaryRXY<"ag", 0xE308, z_sadd, GR64, load, 8>; + defm AG : BinaryRXYAndPseudo<"ag", 0xE308, z_sadd, GR64, load, 8>; // Addition to memory. def ASI : BinarySIY<"asi", 0xEB6A, add, imm32sx8>; @@ -962,9 +986,9 @@ let Defs = [CC] in { Requires<[FeatureHighWord]>; // Addition of memory. - defm AL : BinaryRXPair<"al", 0x5E, 0xE35E, z_uadd, GR32, load, 4>; + defm AL : BinaryRXPairAndPseudo<"al", 0x5E, 0xE35E, z_uadd, GR32, load, 4>; def ALGF : BinaryRXY<"algf", 0xE31A, z_uadd, GR64, azextloadi32, 4>; - def ALG : BinaryRXY<"alg", 0xE30A, z_uadd, GR64, load, 8>; + defm ALG : BinaryRXYAndPseudo<"alg", 0xE30A, z_uadd, GR64, load, 8>; // Addition to memory. def ALSI : BinarySIY<"alsi", 0xEB6E, null_frag, imm32sx8>; @@ -1007,11 +1031,11 @@ let Defs = [CC], CCValues = 0xF, CompareZeroCCMask = 0x8 in { // Subtraction of memory. defm SH : BinaryRXPair<"sh", 0x4B, 0xE37B, z_ssub, GR32, asextloadi16, 2>; - defm S : BinaryRXPair<"s", 0x5B, 0xE35B, z_ssub, GR32, load, 4>; + defm S : BinaryRXPairAndPseudo<"s", 0x5B, 0xE35B, z_ssub, GR32, load, 4>; def SGH : BinaryRXY<"sgh", 0xE339, z_ssub, GR64, asextloadi16, 2>, Requires<[FeatureMiscellaneousExtensions2]>; def SGF : BinaryRXY<"sgf", 0xE319, z_ssub, GR64, asextloadi32, 4>; - def SG : BinaryRXY<"sg", 0xE309, z_ssub, GR64, load, 8>; + defm SG : BinaryRXYAndPseudo<"sg", 0xE309, z_ssub, GR64, load, 8>; } defm : SXB<z_ssub, GR64, SGFR>; @@ -1033,6 +1057,14 @@ let AddedComplexity = 1 in { (AGFI GR64:$src1, imm64sx32n:$src2)>; } +// And vice versa in one special case, where we need to load a +// constant into a register in any case, but the negated constant +// requires fewer instructions to load. +def : Pat<(z_saddo GR64:$src1, imm64lh16n:$src2), + (SGR GR64:$src1, (LLILH imm64lh16n:$src2))>; +def : Pat<(z_saddo GR64:$src1, imm64lf32n:$src2), + (SGR GR64:$src1, (LLILF imm64lf32n:$src2))>; + // Subtraction producing a carry. let Defs = [CC] in { // Subtraction of a register. @@ -1051,9 +1083,9 @@ let Defs = [CC] in { def SLGFI : BinaryRIL<"slgfi", 0xC24, z_usub, GR64, imm64zx32>; // Subtraction of memory. - defm SL : BinaryRXPair<"sl", 0x5F, 0xE35F, z_usub, GR32, load, 4>; + defm SL : BinaryRXPairAndPseudo<"sl", 0x5F, 0xE35F, z_usub, GR32, load, 4>; def SLGF : BinaryRXY<"slgf", 0xE31B, z_usub, GR64, azextloadi32, 4>; - def SLG : BinaryRXY<"slg", 0xE30B, z_usub, GR64, load, 8>; + defm SLG : BinaryRXYAndPseudo<"slg", 0xE30B, z_usub, GR64, load, 8>; } defm : ZXB<z_usub, GR64, SLGFR>; @@ -1128,8 +1160,8 @@ let Defs = [CC] in { // ANDs of memory. let CCValues = 0xC, CompareZeroCCMask = 0x8 in { - defm N : BinaryRXPair<"n", 0x54, 0xE354, and, GR32, load, 4>; - def NG : BinaryRXY<"ng", 0xE380, and, GR64, load, 8>; + defm N : BinaryRXPairAndPseudo<"n", 0x54, 0xE354, and, GR32, load, 4>; + defm NG : BinaryRXYAndPseudo<"ng", 0xE380, and, GR64, load, 8>; } // AND to memory @@ -1185,8 +1217,8 @@ let Defs = [CC] in { // ORs of memory. let CCValues = 0xC, CompareZeroCCMask = 0x8 in { - defm O : BinaryRXPair<"o", 0x56, 0xE356, or, GR32, load, 4>; - def OG : BinaryRXY<"og", 0xE381, or, GR64, load, 8>; + defm O : BinaryRXPairAndPseudo<"o", 0x56, 0xE356, or, GR32, load, 4>; + defm OG : BinaryRXYAndPseudo<"og", 0xE381, or, GR64, load, 8>; } // OR to memory @@ -1225,8 +1257,8 @@ let Defs = [CC] in { // XORs of memory. let CCValues = 0xC, CompareZeroCCMask = 0x8 in { - defm X : BinaryRXPair<"x",0x57, 0xE357, xor, GR32, load, 4>; - def XG : BinaryRXY<"xg", 0xE382, xor, GR64, load, 8>; + defm X : BinaryRXPairAndPseudo<"x",0x57, 0xE357, xor, GR32, load, 4>; + defm XG : BinaryRXYAndPseudo<"xg", 0xE382, xor, GR64, load, 8>; } // XOR to memory @@ -1240,6 +1272,43 @@ defm : RMWIByte<xor, bdaddr12pair, XI>; defm : RMWIByte<xor, bdaddr20pair, XIY>; //===----------------------------------------------------------------------===// +// Combined logical operations +//===----------------------------------------------------------------------===// + +let Predicates = [FeatureMiscellaneousExtensions3], + Defs = [CC] in { + // AND with complement. + let CCValues = 0xC, CompareZeroCCMask = 0x8 in { + def NCRK : BinaryRRFa<"ncrk", 0xB9F5, andc, GR32, GR32, GR32>; + def NCGRK : BinaryRRFa<"ncgrk", 0xB9E5, andc, GR64, GR64, GR64>; + } + + // OR with complement. + let CCValues = 0xC, CompareZeroCCMask = 0x8 in { + def OCRK : BinaryRRFa<"ocrk", 0xB975, orc, GR32, GR32, GR32>; + def OCGRK : BinaryRRFa<"ocgrk", 0xB965, orc, GR64, GR64, GR64>; + } + + // NAND. + let isCommutable = 1, CCValues = 0xC, CompareZeroCCMask = 0x8 in { + def NNRK : BinaryRRFa<"nnrk", 0xB974, nand, GR32, GR32, GR32>; + def NNGRK : BinaryRRFa<"nngrk", 0xB964, nand, GR64, GR64, GR64>; + } + + // NOR. + let isCommutable = 1, CCValues = 0xC, CompareZeroCCMask = 0x8 in { + def NORK : BinaryRRFa<"nork", 0xB976, nor, GR32, GR32, GR32>; + def NOGRK : BinaryRRFa<"nogrk", 0xB966, nor, GR64, GR64, GR64>; + } + + // NXOR. + let isCommutable = 1, CCValues = 0xC, CompareZeroCCMask = 0x8 in { + def NXRK : BinaryRRFa<"nxrk", 0xB977, nxor, GR32, GR32, GR32>; + def NXGRK : BinaryRRFa<"nxgrk", 0xB967, nxor, GR64, GR64, GR64>; + } +} + +//===----------------------------------------------------------------------===// // Multiplication //===----------------------------------------------------------------------===// @@ -1833,6 +1902,9 @@ let mayLoad = 1, mayStore = 1, Uses = [R0L, R1D], Defs = [CC] in { let Predicates = [FeatureMessageSecurityAssist8] in def KMA : SideEffectTernaryMemMemMemRRFb<"kma", 0xB929, GR128, GR128, GR128>; + + let Predicates = [FeatureMessageSecurityAssist9] in + def KDSA : SideEffectBinaryMemRRE<"kdsa", 0xB93A, GR64, GR128>; } //===----------------------------------------------------------------------===// @@ -2013,7 +2085,12 @@ let Defs = [CC] in def : Pat<(ctlz GR64:$src), (EXTRACT_SUBREG (FLOGR GR64:$src), subreg_h64)>; -// Population count. Counts bits set per byte. +// Population count. Counts bits set per byte or doubleword. +let Predicates = [FeatureMiscellaneousExtensions3] in { + let Defs = [CC] in + def POPCNTOpt : BinaryRRFc<"popcnt", 0xB9E1, GR64, GR64>; + def : Pat<(ctpop GR64:$src), (POPCNTOpt GR64:$src, 8)>; +} let Predicates = [FeaturePopulationCount], Defs = [CC] in def POPCNT : UnaryRRE<"popcnt", 0xB9E1, z_popcnt, GR64, GR64>; @@ -2044,6 +2121,17 @@ let mayLoad = 1, Defs = [CC] in let mayLoad = 1, mayStore = 1, Defs = [CC, R1D], Uses = [R0L, R1D] in def CMPSC : SideEffectBinaryMemMemRRE<"cmpsc", 0xB263, GR128, GR128>; +// Sort lists. +let Predicates = [FeatureEnhancedSort], + mayLoad = 1, mayStore = 1, Defs = [CC], Uses = [R0L, R1D] in + def SORTL : SideEffectBinaryMemMemRRE<"sortl", 0xB938, GR128, GR128>; + +// Deflate conversion call. +let Predicates = [FeatureDeflateConversion], + mayLoad = 1, mayStore = 1, Defs = [CC], Uses = [R0L, R1D] in + def DFLTCC : SideEffectTernaryMemMemRRFa<"dfltcc", 0xB939, + GR128, GR128, GR64>; + // Execute. let hasSideEffects = 1 in { def EX : SideEffectBinaryRX<"ex", 0x44, GR64>; @@ -2186,6 +2274,22 @@ let AddedComplexity = 4 in { (RLLG GR64:$val, (NILL GR32:$shift, imm32zx16trunc:$imm), 0)>; } +// Substitute (x*64-s) with (-s), since shift/rotate instructions only +// use the last 6 bits of the second operand register (making it modulo 64). +let AddedComplexity = 4 in { + def : Pat<(shl GR64:$val, (sub imm32mod64, GR32:$shift)), + (SLLG GR64:$val, (LCR GR32:$shift), 0)>; + + def : Pat<(sra GR64:$val, (sub imm32mod64, GR32:$shift)), + (SRAG GR64:$val, (LCR GR32:$shift), 0)>; + + def : Pat<(srl GR64:$val, (sub imm32mod64, GR32:$shift)), + (SRLG GR64:$val, (LCR GR32:$shift), 0)>; + + def : Pat<(rotl GR64:$val, (sub imm32mod64, GR32:$shift)), + (RLLG GR64:$val, (LCR GR32:$shift), 0)>; +} + // Peepholes for turning scalar operations into block operations. defm : BlockLoadStore<anyextloadi8, i32, MVCSequence, NCSequence, OCSequence, XCSequence, 1>; diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZInstrSystem.td b/contrib/llvm/lib/Target/SystemZ/SystemZInstrSystem.td index c351577fa5bd..ecce16c9cd73 100644 --- a/contrib/llvm/lib/Target/SystemZ/SystemZInstrSystem.td +++ b/contrib/llvm/lib/Target/SystemZ/SystemZInstrSystem.td @@ -1,9 +1,8 @@ //==- SystemZInstrSystem.td - SystemZ system instructions -*- tblgen-*-----==// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZInstrVector.td b/contrib/llvm/lib/Target/SystemZ/SystemZInstrVector.td index 6c97b85277c3..261727f89058 100644 --- a/contrib/llvm/lib/Target/SystemZ/SystemZInstrVector.td +++ b/contrib/llvm/lib/Target/SystemZ/SystemZInstrVector.td @@ -1,9 +1,8 @@ //==- SystemZInstrVector.td - SystemZ Vector instructions ------*- tblgen-*-==// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// @@ -104,7 +103,7 @@ let Predicates = [FeatureVector] in { let Predicates = [FeatureVector] in { // Load. - def VL : UnaryVRX<"vl", 0xE706, null_frag, v128any, 16>; + defm VL : UnaryVRXAlign<"vl", 0xE706>; // Load to block boundary. The number of loaded bytes is only known // at run time. The instruction is really polymorphic, but v128b matches @@ -123,7 +122,7 @@ let Predicates = [FeatureVector] in { def VLL : BinaryVRSb<"vll", 0xE737, int_s390_vll, 0>; // Load multiple. - def VLM : LoadMultipleVRSa<"vlm", 0xE736>; + defm VLM : LoadMultipleVRSaAlign<"vlm", 0xE736>; // Load and replicate def VLREP : UnaryVRXGeneric<"vlrep", 0xE705>; @@ -208,13 +207,13 @@ defm : ReplicatePeephole<VLREPG, v2f64, load, f64>; let Predicates = [FeatureVector] in { // Store. - def VST : StoreVRX<"vst", 0xE70E, null_frag, v128any, 16>; + defm VST : StoreVRXAlign<"vst", 0xE70E>; // Store with length. The number of stored bytes is only known at run time. def VSTL : StoreLengthVRSb<"vstl", 0xE73F, int_s390_vstl, 0>; // Store multiple. - def VSTM : StoreMultipleVRSa<"vstm", 0xE73E>; + defm VSTM : StoreMultipleVRSaAlign<"vstm", 0xE73E>; // Store element. def VSTEB : StoreBinaryVRX<"vsteb", 0xE708, z_vstei8, v128b, 1, imm32zx4>; @@ -250,6 +249,81 @@ let Predicates = [FeatureVectorPackedDecimal] in { } //===----------------------------------------------------------------------===// +// Byte swaps +//===----------------------------------------------------------------------===// + +let Predicates = [FeatureVectorEnhancements2] in { + // Load byte-reversed elements. + def VLBR : UnaryVRXGeneric<"vlbr", 0xE606>; + def VLBRH : UnaryVRX<"vlbrh", 0xE606, z_loadbswap, v128h, 16, 1>; + def VLBRF : UnaryVRX<"vlbrf", 0xE606, z_loadbswap, v128f, 16, 2>; + def VLBRG : UnaryVRX<"vlbrg", 0xE606, z_loadbswap, v128g, 16, 3>; + def VLBRQ : UnaryVRX<"vlbrq", 0xE606, null_frag, v128q, 16, 4>; + + // Load elements reversed. + def VLER : UnaryVRXGeneric<"vler", 0xE607>; + def VLERH : UnaryVRX<"vlerh", 0xE607, z_loadeswap, v128h, 16, 1>; + def VLERF : UnaryVRX<"vlerf", 0xE607, z_loadeswap, v128f, 16, 2>; + def VLERG : UnaryVRX<"vlerg", 0xE607, z_loadeswap, v128g, 16, 3>; + def : Pat<(v4f32 (z_loadeswap bdxaddr12only:$addr)), + (VLERF bdxaddr12only:$addr)>; + def : Pat<(v2f64 (z_loadeswap bdxaddr12only:$addr)), + (VLERG bdxaddr12only:$addr)>; + def : Pat<(v16i8 (z_loadeswap bdxaddr12only:$addr)), + (VLBRQ bdxaddr12only:$addr)>; + + // Load byte-reversed element. + def VLEBRH : TernaryVRX<"vlebrh", 0xE601, z_vlebri16, v128h, v128h, 2, imm32zx3>; + def VLEBRF : TernaryVRX<"vlebrf", 0xE603, z_vlebri32, v128f, v128f, 4, imm32zx2>; + def VLEBRG : TernaryVRX<"vlebrg", 0xE602, z_vlebri64, v128g, v128g, 8, imm32zx1>; + + // Load byte-reversed element and zero. + def VLLEBRZ : UnaryVRXGeneric<"vllebrz", 0xE604>; + def VLLEBRZH : UnaryVRX<"vllebrzh", 0xE604, z_vllebrzi16, v128h, 2, 1>; + def VLLEBRZF : UnaryVRX<"vllebrzf", 0xE604, z_vllebrzi32, v128f, 4, 2>; + def VLLEBRZG : UnaryVRX<"vllebrzg", 0xE604, z_vllebrzi64, v128g, 8, 3>; + def VLLEBRZE : UnaryVRX<"vllebrze", 0xE604, z_vllebrzli32, v128f, 4, 6>; + def : InstAlias<"lerv\t$V1, $XBD2", + (VLLEBRZE VR128:$V1, bdxaddr12only:$XBD2), 0>; + def : InstAlias<"ldrv\t$V1, $XBD2", + (VLLEBRZG VR128:$V1, bdxaddr12only:$XBD2), 0>; + + // Load byte-reversed element and replicate. + def VLBRREP : UnaryVRXGeneric<"vlbrrep", 0xE605>; + def VLBRREPH : UnaryVRX<"vlbrreph", 0xE605, z_replicate_loadbswapi16, v128h, 2, 1>; + def VLBRREPF : UnaryVRX<"vlbrrepf", 0xE605, z_replicate_loadbswapi32, v128f, 4, 2>; + def VLBRREPG : UnaryVRX<"vlbrrepg", 0xE605, z_replicate_loadbswapi64, v128g, 8, 3>; + + // Store byte-reversed elements. + def VSTBR : StoreVRXGeneric<"vstbr", 0xE60E>; + def VSTBRH : StoreVRX<"vstbrh", 0xE60E, z_storebswap, v128h, 16, 1>; + def VSTBRF : StoreVRX<"vstbrf", 0xE60E, z_storebswap, v128f, 16, 2>; + def VSTBRG : StoreVRX<"vstbrg", 0xE60E, z_storebswap, v128g, 16, 3>; + def VSTBRQ : StoreVRX<"vstbrq", 0xE60E, null_frag, v128q, 16, 4>; + + // Store elements reversed. + def VSTER : StoreVRXGeneric<"vster", 0xE60F>; + def VSTERH : StoreVRX<"vsterh", 0xE60F, z_storeeswap, v128h, 16, 1>; + def VSTERF : StoreVRX<"vsterf", 0xE60F, z_storeeswap, v128f, 16, 2>; + def VSTERG : StoreVRX<"vsterg", 0xE60F, z_storeeswap, v128g, 16, 3>; + def : Pat<(z_storeeswap (v4f32 VR128:$val), bdxaddr12only:$addr), + (VSTERF VR128:$val, bdxaddr12only:$addr)>; + def : Pat<(z_storeeswap (v2f64 VR128:$val), bdxaddr12only:$addr), + (VSTERG VR128:$val, bdxaddr12only:$addr)>; + def : Pat<(z_storeeswap (v16i8 VR128:$val), bdxaddr12only:$addr), + (VSTBRQ VR128:$val, bdxaddr12only:$addr)>; + + // Store byte-reversed element. + def VSTEBRH : StoreBinaryVRX<"vstebrh", 0xE609, z_vstebri16, v128h, 2, imm32zx3>; + def VSTEBRF : StoreBinaryVRX<"vstebrf", 0xE60B, z_vstebri32, v128f, 4, imm32zx2>; + def VSTEBRG : StoreBinaryVRX<"vstebrg", 0xE60A, z_vstebri64, v128g, 8, imm32zx1>; + def : InstAlias<"sterv\t$V1, $XBD2", + (VSTEBRF VR128:$V1, bdxaddr12only:$XBD2, 0), 0>; + def : InstAlias<"stdrv\t$V1, $XBD2", + (VSTEBRG VR128:$V1, bdxaddr12only:$XBD2, 0), 0>; +} + +//===----------------------------------------------------------------------===// // Selects and permutes //===----------------------------------------------------------------------===// @@ -707,6 +781,10 @@ let Predicates = [FeatureVector] in { def : Pat<(int_s390_vsldb VR128:$x, VR128:$y, imm32zx8:$z), (VSLDB VR128:$x, VR128:$y, imm32zx8:$z)>; + // Shift left double by bit. + let Predicates = [FeatureVectorEnhancements2] in + def VSLD : TernaryVRId<"vsld", 0xE786, int_s390_vsld, v128b, v128b, 0>; + // Shift right arithmetic. def VSRA : BinaryVRRc<"vsra", 0xE77E, int_s390_vsra, v128b, v128b>; @@ -719,6 +797,10 @@ let Predicates = [FeatureVector] in { // Shift right logical by byte. def VSRLB : BinaryVRRc<"vsrlb", 0xE77D, int_s390_vsrlb, v128b, v128b>; + // Shift right double by bit. + let Predicates = [FeatureVectorEnhancements2] in + def VSRD : TernaryVRId<"vsrd", 0xE787, int_s390_vsrd, v128b, v128b, 0>; + // Subtract. def VS : BinaryVRRcGeneric<"vs", 0xE7F7>; def VSB : BinaryVRRc<"vsb", 0xE7F7, sub, v128b, v128b, 0>; @@ -925,126 +1007,190 @@ let Predicates = [FeatureVector] in { // See comments in SystemZInstrFP.td for the suppression flags and // rounding modes. multiclass VectorRounding<Instruction insn, TypedReg tr> { - def : FPConversion<insn, frint, tr, tr, 0, 0>; - def : FPConversion<insn, fnearbyint, tr, tr, 4, 0>; - def : FPConversion<insn, ffloor, tr, tr, 4, 7>; - def : FPConversion<insn, fceil, tr, tr, 4, 6>; - def : FPConversion<insn, ftrunc, tr, tr, 4, 5>; - def : FPConversion<insn, fround, tr, tr, 4, 1>; + def : FPConversion<insn, any_frint, tr, tr, 0, 0>; + def : FPConversion<insn, any_fnearbyint, tr, tr, 4, 0>; + def : FPConversion<insn, any_ffloor, tr, tr, 4, 7>; + def : FPConversion<insn, any_fceil, tr, tr, 4, 6>; + def : FPConversion<insn, any_ftrunc, tr, tr, 4, 5>; + def : FPConversion<insn, any_fround, tr, tr, 4, 1>; } let Predicates = [FeatureVector] in { // Add. - def VFA : BinaryVRRcFloatGeneric<"vfa", 0xE7E3>; - def VFADB : BinaryVRRc<"vfadb", 0xE7E3, fadd, v128db, v128db, 3, 0>; - def WFADB : BinaryVRRc<"wfadb", 0xE7E3, fadd, v64db, v64db, 3, 8>; - let Predicates = [FeatureVectorEnhancements1] in { - def VFASB : BinaryVRRc<"vfasb", 0xE7E3, fadd, v128sb, v128sb, 2, 0>; - def WFASB : BinaryVRRc<"wfasb", 0xE7E3, fadd, v32sb, v32sb, 2, 8>; - def WFAXB : BinaryVRRc<"wfaxb", 0xE7E3, fadd, v128xb, v128xb, 4, 8>; + let Uses = [FPC], mayRaiseFPException = 1 in { + def VFA : BinaryVRRcFloatGeneric<"vfa", 0xE7E3>; + def VFADB : BinaryVRRc<"vfadb", 0xE7E3, any_fadd, v128db, v128db, 3, 0>; + def WFADB : BinaryVRRc<"wfadb", 0xE7E3, any_fadd, v64db, v64db, 3, 8>; + let Predicates = [FeatureVectorEnhancements1] in { + def VFASB : BinaryVRRc<"vfasb", 0xE7E3, any_fadd, v128sb, v128sb, 2, 0>; + def WFASB : BinaryVRRc<"wfasb", 0xE7E3, any_fadd, v32sb, v32sb, 2, 8>; + def WFAXB : BinaryVRRc<"wfaxb", 0xE7E3, any_fadd, v128xb, v128xb, 4, 8>; + } } - // Convert from fixed 64-bit. - def VCDG : TernaryVRRaFloatGeneric<"vcdg", 0xE7C3>; - def VCDGB : TernaryVRRa<"vcdgb", 0xE7C3, null_frag, v128db, v128g, 3, 0>; - def WCDGB : TernaryVRRa<"wcdgb", 0xE7C3, null_frag, v64db, v64g, 3, 8>; + // Convert from fixed. + let Uses = [FPC], mayRaiseFPException = 1 in { + def VCDG : TernaryVRRaFloatGeneric<"vcdg", 0xE7C3>; + def VCDGB : TernaryVRRa<"vcdgb", 0xE7C3, null_frag, v128db, v128g, 3, 0>; + def WCDGB : TernaryVRRa<"wcdgb", 0xE7C3, null_frag, v64db, v64g, 3, 8>; + } def : FPConversion<VCDGB, sint_to_fp, v128db, v128g, 0, 0>; + let Predicates = [FeatureVectorEnhancements2] in { + let Uses = [FPC], mayRaiseFPException = 1 in { + let isAsmParserOnly = 1 in + def VCFPS : TernaryVRRaFloatGeneric<"vcfps", 0xE7C3>; + def VCEFB : TernaryVRRa<"vcefb", 0xE7C3, null_frag, v128sb, v128g, 2, 0>; + def WCEFB : TernaryVRRa<"wcefb", 0xE7C3, null_frag, v32sb, v32f, 2, 8>; + } + def : FPConversion<VCEFB, sint_to_fp, v128sb, v128f, 0, 0>; + } - // Convert from logical 64-bit. - def VCDLG : TernaryVRRaFloatGeneric<"vcdlg", 0xE7C1>; - def VCDLGB : TernaryVRRa<"vcdlgb", 0xE7C1, null_frag, v128db, v128g, 3, 0>; - def WCDLGB : TernaryVRRa<"wcdlgb", 0xE7C1, null_frag, v64db, v64g, 3, 8>; + // Convert from logical. + let Uses = [FPC], mayRaiseFPException = 1 in { + def VCDLG : TernaryVRRaFloatGeneric<"vcdlg", 0xE7C1>; + def VCDLGB : TernaryVRRa<"vcdlgb", 0xE7C1, null_frag, v128db, v128g, 3, 0>; + def WCDLGB : TernaryVRRa<"wcdlgb", 0xE7C1, null_frag, v64db, v64g, 3, 8>; + } def : FPConversion<VCDLGB, uint_to_fp, v128db, v128g, 0, 0>; + let Predicates = [FeatureVectorEnhancements2] in { + let Uses = [FPC], mayRaiseFPException = 1 in { + let isAsmParserOnly = 1 in + def VCFPL : TernaryVRRaFloatGeneric<"vcfpl", 0xE7C1>; + def VCELFB : TernaryVRRa<"vcelfb", 0xE7C1, null_frag, v128sb, v128g, 2, 0>; + def WCELFB : TernaryVRRa<"wcelfb", 0xE7C1, null_frag, v32sb, v32f, 2, 8>; + } + def : FPConversion<VCELFB, uint_to_fp, v128sb, v128f, 0, 0>; + } - // Convert to fixed 64-bit. - def VCGD : TernaryVRRaFloatGeneric<"vcgd", 0xE7C2>; - def VCGDB : TernaryVRRa<"vcgdb", 0xE7C2, null_frag, v128g, v128db, 3, 0>; - def WCGDB : TernaryVRRa<"wcgdb", 0xE7C2, null_frag, v64g, v64db, 3, 8>; + // Convert to fixed. + let Uses = [FPC], mayRaiseFPException = 1 in { + def VCGD : TernaryVRRaFloatGeneric<"vcgd", 0xE7C2>; + def VCGDB : TernaryVRRa<"vcgdb", 0xE7C2, null_frag, v128g, v128db, 3, 0>; + def WCGDB : TernaryVRRa<"wcgdb", 0xE7C2, null_frag, v64g, v64db, 3, 8>; + } // Rounding mode should agree with SystemZInstrFP.td. def : FPConversion<VCGDB, fp_to_sint, v128g, v128db, 0, 5>; + let Predicates = [FeatureVectorEnhancements2] in { + let Uses = [FPC], mayRaiseFPException = 1 in { + let isAsmParserOnly = 1 in + def VCSFP : TernaryVRRaFloatGeneric<"vcsfp", 0xE7C2>; + def VCFEB : TernaryVRRa<"vcfeb", 0xE7C2, null_frag, v128sb, v128g, 2, 0>; + def WCFEB : TernaryVRRa<"wcfeb", 0xE7C2, null_frag, v32sb, v32f, 2, 8>; + } + // Rounding mode should agree with SystemZInstrFP.td. + def : FPConversion<VCFEB, fp_to_sint, v128f, v128sb, 0, 5>; + } - // Convert to logical 64-bit. - def VCLGD : TernaryVRRaFloatGeneric<"vclgd", 0xE7C0>; - def VCLGDB : TernaryVRRa<"vclgdb", 0xE7C0, null_frag, v128g, v128db, 3, 0>; - def WCLGDB : TernaryVRRa<"wclgdb", 0xE7C0, null_frag, v64g, v64db, 3, 8>; + // Convert to logical. + let Uses = [FPC], mayRaiseFPException = 1 in { + def VCLGD : TernaryVRRaFloatGeneric<"vclgd", 0xE7C0>; + def VCLGDB : TernaryVRRa<"vclgdb", 0xE7C0, null_frag, v128g, v128db, 3, 0>; + def WCLGDB : TernaryVRRa<"wclgdb", 0xE7C0, null_frag, v64g, v64db, 3, 8>; + } // Rounding mode should agree with SystemZInstrFP.td. def : FPConversion<VCLGDB, fp_to_uint, v128g, v128db, 0, 5>; + let Predicates = [FeatureVectorEnhancements2] in { + let Uses = [FPC], mayRaiseFPException = 1 in { + let isAsmParserOnly = 1 in + def VCLFP : TernaryVRRaFloatGeneric<"vclfp", 0xE7C0>; + def VCLFEB : TernaryVRRa<"vclfeb", 0xE7C0, null_frag, v128sb, v128g, 2, 0>; + def WCLFEB : TernaryVRRa<"wclfeb", 0xE7C0, null_frag, v32sb, v32f, 2, 8>; + } + // Rounding mode should agree with SystemZInstrFP.td. + def : FPConversion<VCLFEB, fp_to_uint, v128f, v128sb, 0, 5>; + } // Divide. - def VFD : BinaryVRRcFloatGeneric<"vfd", 0xE7E5>; - def VFDDB : BinaryVRRc<"vfddb", 0xE7E5, fdiv, v128db, v128db, 3, 0>; - def WFDDB : BinaryVRRc<"wfddb", 0xE7E5, fdiv, v64db, v64db, 3, 8>; - let Predicates = [FeatureVectorEnhancements1] in { - def VFDSB : BinaryVRRc<"vfdsb", 0xE7E5, fdiv, v128sb, v128sb, 2, 0>; - def WFDSB : BinaryVRRc<"wfdsb", 0xE7E5, fdiv, v32sb, v32sb, 2, 8>; - def WFDXB : BinaryVRRc<"wfdxb", 0xE7E5, fdiv, v128xb, v128xb, 4, 8>; + let Uses = [FPC], mayRaiseFPException = 1 in { + def VFD : BinaryVRRcFloatGeneric<"vfd", 0xE7E5>; + def VFDDB : BinaryVRRc<"vfddb", 0xE7E5, any_fdiv, v128db, v128db, 3, 0>; + def WFDDB : BinaryVRRc<"wfddb", 0xE7E5, any_fdiv, v64db, v64db, 3, 8>; + let Predicates = [FeatureVectorEnhancements1] in { + def VFDSB : BinaryVRRc<"vfdsb", 0xE7E5, any_fdiv, v128sb, v128sb, 2, 0>; + def WFDSB : BinaryVRRc<"wfdsb", 0xE7E5, any_fdiv, v32sb, v32sb, 2, 8>; + def WFDXB : BinaryVRRc<"wfdxb", 0xE7E5, any_fdiv, v128xb, v128xb, 4, 8>; + } } // Load FP integer. - def VFI : TernaryVRRaFloatGeneric<"vfi", 0xE7C7>; - def VFIDB : TernaryVRRa<"vfidb", 0xE7C7, int_s390_vfidb, v128db, v128db, 3, 0>; - def WFIDB : TernaryVRRa<"wfidb", 0xE7C7, null_frag, v64db, v64db, 3, 8>; + let Uses = [FPC], mayRaiseFPException = 1 in { + def VFI : TernaryVRRaFloatGeneric<"vfi", 0xE7C7>; + def VFIDB : TernaryVRRa<"vfidb", 0xE7C7, int_s390_vfidb, v128db, v128db, 3, 0>; + def WFIDB : TernaryVRRa<"wfidb", 0xE7C7, null_frag, v64db, v64db, 3, 8>; + } defm : VectorRounding<VFIDB, v128db>; defm : VectorRounding<WFIDB, v64db>; let Predicates = [FeatureVectorEnhancements1] in { - def VFISB : TernaryVRRa<"vfisb", 0xE7C7, int_s390_vfisb, v128sb, v128sb, 2, 0>; - def WFISB : TernaryVRRa<"wfisb", 0xE7C7, null_frag, v32sb, v32sb, 2, 8>; - def WFIXB : TernaryVRRa<"wfixb", 0xE7C7, null_frag, v128xb, v128xb, 4, 8>; + let Uses = [FPC], mayRaiseFPException = 1 in { + def VFISB : TernaryVRRa<"vfisb", 0xE7C7, int_s390_vfisb, v128sb, v128sb, 2, 0>; + def WFISB : TernaryVRRa<"wfisb", 0xE7C7, null_frag, v32sb, v32sb, 2, 8>; + def WFIXB : TernaryVRRa<"wfixb", 0xE7C7, null_frag, v128xb, v128xb, 4, 8>; + } defm : VectorRounding<VFISB, v128sb>; defm : VectorRounding<WFISB, v32sb>; defm : VectorRounding<WFIXB, v128xb>; } // Load lengthened. - def VLDE : UnaryVRRaFloatGeneric<"vlde", 0xE7C4>; - def VLDEB : UnaryVRRa<"vldeb", 0xE7C4, z_vextend, v128db, v128sb, 2, 0>; - def WLDEB : UnaryVRRa<"wldeb", 0xE7C4, fpextend, v64db, v32sb, 2, 8>; + let Uses = [FPC], mayRaiseFPException = 1 in { + def VLDE : UnaryVRRaFloatGeneric<"vlde", 0xE7C4>; + def VLDEB : UnaryVRRa<"vldeb", 0xE7C4, z_vextend, v128db, v128sb, 2, 0>; + def WLDEB : UnaryVRRa<"wldeb", 0xE7C4, any_fpextend, v64db, v32sb, 2, 8>; + } let Predicates = [FeatureVectorEnhancements1] in { - let isAsmParserOnly = 1 in { - def VFLL : UnaryVRRaFloatGeneric<"vfll", 0xE7C4>; - def VFLLS : UnaryVRRa<"vflls", 0xE7C4, null_frag, v128db, v128sb, 2, 0>; - def WFLLS : UnaryVRRa<"wflls", 0xE7C4, null_frag, v64db, v32sb, 2, 8>; + let Uses = [FPC], mayRaiseFPException = 1 in { + let isAsmParserOnly = 1 in { + def VFLL : UnaryVRRaFloatGeneric<"vfll", 0xE7C4>; + def VFLLS : UnaryVRRa<"vflls", 0xE7C4, null_frag, v128db, v128sb, 2, 0>; + def WFLLS : UnaryVRRa<"wflls", 0xE7C4, null_frag, v64db, v32sb, 2, 8>; + } + def WFLLD : UnaryVRRa<"wflld", 0xE7C4, any_fpextend, v128xb, v64db, 3, 8>; } - def WFLLD : UnaryVRRa<"wflld", 0xE7C4, fpextend, v128xb, v64db, 3, 8>; - def : Pat<(f128 (fpextend (f32 VR32:$src))), + def : Pat<(f128 (any_fpextend (f32 VR32:$src))), (WFLLD (WLDEB VR32:$src))>; } // Load rounded. - def VLED : TernaryVRRaFloatGeneric<"vled", 0xE7C5>; - def VLEDB : TernaryVRRa<"vledb", 0xE7C5, null_frag, v128sb, v128db, 3, 0>; - def WLEDB : TernaryVRRa<"wledb", 0xE7C5, null_frag, v32sb, v64db, 3, 8>; + let Uses = [FPC], mayRaiseFPException = 1 in { + def VLED : TernaryVRRaFloatGeneric<"vled", 0xE7C5>; + def VLEDB : TernaryVRRa<"vledb", 0xE7C5, null_frag, v128sb, v128db, 3, 0>; + def WLEDB : TernaryVRRa<"wledb", 0xE7C5, null_frag, v32sb, v64db, 3, 8>; + } def : Pat<(v4f32 (z_vround (v2f64 VR128:$src))), (VLEDB VR128:$src, 0, 0)>; - def : FPConversion<WLEDB, fpround, v32sb, v64db, 0, 0>; + def : FPConversion<WLEDB, any_fpround, v32sb, v64db, 0, 0>; let Predicates = [FeatureVectorEnhancements1] in { - let isAsmParserOnly = 1 in { - def VFLR : TernaryVRRaFloatGeneric<"vflr", 0xE7C5>; - def VFLRD : TernaryVRRa<"vflrd", 0xE7C5, null_frag, v128sb, v128db, 3, 0>; - def WFLRD : TernaryVRRa<"wflrd", 0xE7C5, null_frag, v32sb, v64db, 3, 8>; + let Uses = [FPC], mayRaiseFPException = 1 in { + let isAsmParserOnly = 1 in { + def VFLR : TernaryVRRaFloatGeneric<"vflr", 0xE7C5>; + def VFLRD : TernaryVRRa<"vflrd", 0xE7C5, null_frag, v128sb, v128db, 3, 0>; + def WFLRD : TernaryVRRa<"wflrd", 0xE7C5, null_frag, v32sb, v64db, 3, 8>; + } + def WFLRX : TernaryVRRa<"wflrx", 0xE7C5, null_frag, v64db, v128xb, 4, 8>; } - def WFLRX : TernaryVRRa<"wflrx", 0xE7C5, null_frag, v64db, v128xb, 4, 8>; - def : FPConversion<WFLRX, fpround, v64db, v128xb, 0, 0>; - def : Pat<(f32 (fpround (f128 VR128:$src))), + def : FPConversion<WFLRX, any_fpround, v64db, v128xb, 0, 0>; + def : Pat<(f32 (any_fpround (f128 VR128:$src))), (WLEDB (WFLRX VR128:$src, 0, 3), 0, 0)>; } // Maximum. multiclass VectorMax<Instruction insn, TypedReg tr> { - def : FPMinMax<insn, fmaxnum, tr, 4>; + def : FPMinMax<insn, any_fmaxnum, tr, 4>; def : FPMinMax<insn, fmaximum, tr, 1>; } let Predicates = [FeatureVectorEnhancements1] in { - def VFMAX : TernaryVRRcFloatGeneric<"vfmax", 0xE7EF>; - def VFMAXDB : TernaryVRRcFloat<"vfmaxdb", 0xE7EF, int_s390_vfmaxdb, - v128db, v128db, 3, 0>; - def WFMAXDB : TernaryVRRcFloat<"wfmaxdb", 0xE7EF, null_frag, - v64db, v64db, 3, 8>; - def VFMAXSB : TernaryVRRcFloat<"vfmaxsb", 0xE7EF, int_s390_vfmaxsb, - v128sb, v128sb, 2, 0>; - def WFMAXSB : TernaryVRRcFloat<"wfmaxsb", 0xE7EF, null_frag, - v32sb, v32sb, 2, 8>; - def WFMAXXB : TernaryVRRcFloat<"wfmaxxb", 0xE7EF, null_frag, - v128xb, v128xb, 4, 8>; + let Uses = [FPC], mayRaiseFPException = 1 in { + def VFMAX : TernaryVRRcFloatGeneric<"vfmax", 0xE7EF>; + def VFMAXDB : TernaryVRRcFloat<"vfmaxdb", 0xE7EF, int_s390_vfmaxdb, + v128db, v128db, 3, 0>; + def WFMAXDB : TernaryVRRcFloat<"wfmaxdb", 0xE7EF, null_frag, + v64db, v64db, 3, 8>; + def VFMAXSB : TernaryVRRcFloat<"vfmaxsb", 0xE7EF, int_s390_vfmaxsb, + v128sb, v128sb, 2, 0>; + def WFMAXSB : TernaryVRRcFloat<"wfmaxsb", 0xE7EF, null_frag, + v32sb, v32sb, 2, 8>; + def WFMAXXB : TernaryVRRcFloat<"wfmaxxb", 0xE7EF, null_frag, + v128xb, v128xb, 4, 8>; + } defm : VectorMax<VFMAXDB, v128db>; defm : VectorMax<WFMAXDB, v64db>; defm : VectorMax<VFMAXSB, v128sb>; @@ -1054,21 +1200,23 @@ let Predicates = [FeatureVector] in { // Minimum. multiclass VectorMin<Instruction insn, TypedReg tr> { - def : FPMinMax<insn, fminnum, tr, 4>; + def : FPMinMax<insn, any_fminnum, tr, 4>; def : FPMinMax<insn, fminimum, tr, 1>; } let Predicates = [FeatureVectorEnhancements1] in { - def VFMIN : TernaryVRRcFloatGeneric<"vfmin", 0xE7EE>; - def VFMINDB : TernaryVRRcFloat<"vfmindb", 0xE7EE, int_s390_vfmindb, - v128db, v128db, 3, 0>; - def WFMINDB : TernaryVRRcFloat<"wfmindb", 0xE7EE, null_frag, - v64db, v64db, 3, 8>; - def VFMINSB : TernaryVRRcFloat<"vfminsb", 0xE7EE, int_s390_vfminsb, - v128sb, v128sb, 2, 0>; - def WFMINSB : TernaryVRRcFloat<"wfminsb", 0xE7EE, null_frag, - v32sb, v32sb, 2, 8>; - def WFMINXB : TernaryVRRcFloat<"wfminxb", 0xE7EE, null_frag, - v128xb, v128xb, 4, 8>; + let Uses = [FPC], mayRaiseFPException = 1 in { + def VFMIN : TernaryVRRcFloatGeneric<"vfmin", 0xE7EE>; + def VFMINDB : TernaryVRRcFloat<"vfmindb", 0xE7EE, int_s390_vfmindb, + v128db, v128db, 3, 0>; + def WFMINDB : TernaryVRRcFloat<"wfmindb", 0xE7EE, null_frag, + v64db, v64db, 3, 8>; + def VFMINSB : TernaryVRRcFloat<"vfminsb", 0xE7EE, int_s390_vfminsb, + v128sb, v128sb, 2, 0>; + def WFMINSB : TernaryVRRcFloat<"wfminsb", 0xE7EE, null_frag, + v32sb, v32sb, 2, 8>; + def WFMINXB : TernaryVRRcFloat<"wfminxb", 0xE7EE, null_frag, + v128xb, v128xb, 4, 8>; + } defm : VectorMin<VFMINDB, v128db>; defm : VectorMin<WFMINDB, v64db>; defm : VectorMin<VFMINSB, v128sb>; @@ -1077,53 +1225,61 @@ let Predicates = [FeatureVector] in { } // Multiply. - def VFM : BinaryVRRcFloatGeneric<"vfm", 0xE7E7>; - def VFMDB : BinaryVRRc<"vfmdb", 0xE7E7, fmul, v128db, v128db, 3, 0>; - def WFMDB : BinaryVRRc<"wfmdb", 0xE7E7, fmul, v64db, v64db, 3, 8>; - let Predicates = [FeatureVectorEnhancements1] in { - def VFMSB : BinaryVRRc<"vfmsb", 0xE7E7, fmul, v128sb, v128sb, 2, 0>; - def WFMSB : BinaryVRRc<"wfmsb", 0xE7E7, fmul, v32sb, v32sb, 2, 8>; - def WFMXB : BinaryVRRc<"wfmxb", 0xE7E7, fmul, v128xb, v128xb, 4, 8>; + let Uses = [FPC], mayRaiseFPException = 1 in { + def VFM : BinaryVRRcFloatGeneric<"vfm", 0xE7E7>; + def VFMDB : BinaryVRRc<"vfmdb", 0xE7E7, any_fmul, v128db, v128db, 3, 0>; + def WFMDB : BinaryVRRc<"wfmdb", 0xE7E7, any_fmul, v64db, v64db, 3, 8>; + let Predicates = [FeatureVectorEnhancements1] in { + def VFMSB : BinaryVRRc<"vfmsb", 0xE7E7, any_fmul, v128sb, v128sb, 2, 0>; + def WFMSB : BinaryVRRc<"wfmsb", 0xE7E7, any_fmul, v32sb, v32sb, 2, 8>; + def WFMXB : BinaryVRRc<"wfmxb", 0xE7E7, any_fmul, v128xb, v128xb, 4, 8>; + } } // Multiply and add. - def VFMA : TernaryVRReFloatGeneric<"vfma", 0xE78F>; - def VFMADB : TernaryVRRe<"vfmadb", 0xE78F, fma, v128db, v128db, 0, 3>; - def WFMADB : TernaryVRRe<"wfmadb", 0xE78F, fma, v64db, v64db, 8, 3>; - let Predicates = [FeatureVectorEnhancements1] in { - def VFMASB : TernaryVRRe<"vfmasb", 0xE78F, fma, v128sb, v128sb, 0, 2>; - def WFMASB : TernaryVRRe<"wfmasb", 0xE78F, fma, v32sb, v32sb, 8, 2>; - def WFMAXB : TernaryVRRe<"wfmaxb", 0xE78F, fma, v128xb, v128xb, 8, 4>; + let Uses = [FPC], mayRaiseFPException = 1 in { + def VFMA : TernaryVRReFloatGeneric<"vfma", 0xE78F>; + def VFMADB : TernaryVRRe<"vfmadb", 0xE78F, any_fma, v128db, v128db, 0, 3>; + def WFMADB : TernaryVRRe<"wfmadb", 0xE78F, any_fma, v64db, v64db, 8, 3>; + let Predicates = [FeatureVectorEnhancements1] in { + def VFMASB : TernaryVRRe<"vfmasb", 0xE78F, any_fma, v128sb, v128sb, 0, 2>; + def WFMASB : TernaryVRRe<"wfmasb", 0xE78F, any_fma, v32sb, v32sb, 8, 2>; + def WFMAXB : TernaryVRRe<"wfmaxb", 0xE78F, any_fma, v128xb, v128xb, 8, 4>; + } } // Multiply and subtract. - def VFMS : TernaryVRReFloatGeneric<"vfms", 0xE78E>; - def VFMSDB : TernaryVRRe<"vfmsdb", 0xE78E, fms, v128db, v128db, 0, 3>; - def WFMSDB : TernaryVRRe<"wfmsdb", 0xE78E, fms, v64db, v64db, 8, 3>; - let Predicates = [FeatureVectorEnhancements1] in { - def VFMSSB : TernaryVRRe<"vfmssb", 0xE78E, fms, v128sb, v128sb, 0, 2>; - def WFMSSB : TernaryVRRe<"wfmssb", 0xE78E, fms, v32sb, v32sb, 8, 2>; - def WFMSXB : TernaryVRRe<"wfmsxb", 0xE78E, fms, v128xb, v128xb, 8, 4>; + let Uses = [FPC], mayRaiseFPException = 1 in { + def VFMS : TernaryVRReFloatGeneric<"vfms", 0xE78E>; + def VFMSDB : TernaryVRRe<"vfmsdb", 0xE78E, any_fms, v128db, v128db, 0, 3>; + def WFMSDB : TernaryVRRe<"wfmsdb", 0xE78E, any_fms, v64db, v64db, 8, 3>; + let Predicates = [FeatureVectorEnhancements1] in { + def VFMSSB : TernaryVRRe<"vfmssb", 0xE78E, any_fms, v128sb, v128sb, 0, 2>; + def WFMSSB : TernaryVRRe<"wfmssb", 0xE78E, any_fms, v32sb, v32sb, 8, 2>; + def WFMSXB : TernaryVRRe<"wfmsxb", 0xE78E, any_fms, v128xb, v128xb, 8, 4>; + } } // Negative multiply and add. - let Predicates = [FeatureVectorEnhancements1] in { + let Uses = [FPC], mayRaiseFPException = 1, + Predicates = [FeatureVectorEnhancements1] in { def VFNMA : TernaryVRReFloatGeneric<"vfnma", 0xE79F>; - def VFNMADB : TernaryVRRe<"vfnmadb", 0xE79F, fnma, v128db, v128db, 0, 3>; - def WFNMADB : TernaryVRRe<"wfnmadb", 0xE79F, fnma, v64db, v64db, 8, 3>; - def VFNMASB : TernaryVRRe<"vfnmasb", 0xE79F, fnma, v128sb, v128sb, 0, 2>; - def WFNMASB : TernaryVRRe<"wfnmasb", 0xE79F, fnma, v32sb, v32sb, 8, 2>; - def WFNMAXB : TernaryVRRe<"wfnmaxb", 0xE79F, fnma, v128xb, v128xb, 8, 4>; + def VFNMADB : TernaryVRRe<"vfnmadb", 0xE79F, any_fnma, v128db, v128db, 0, 3>; + def WFNMADB : TernaryVRRe<"wfnmadb", 0xE79F, any_fnma, v64db, v64db, 8, 3>; + def VFNMASB : TernaryVRRe<"vfnmasb", 0xE79F, any_fnma, v128sb, v128sb, 0, 2>; + def WFNMASB : TernaryVRRe<"wfnmasb", 0xE79F, any_fnma, v32sb, v32sb, 8, 2>; + def WFNMAXB : TernaryVRRe<"wfnmaxb", 0xE79F, any_fnma, v128xb, v128xb, 8, 4>; } // Negative multiply and subtract. - let Predicates = [FeatureVectorEnhancements1] in { + let Uses = [FPC], mayRaiseFPException = 1, + Predicates = [FeatureVectorEnhancements1] in { def VFNMS : TernaryVRReFloatGeneric<"vfnms", 0xE79E>; - def VFNMSDB : TernaryVRRe<"vfnmsdb", 0xE79E, fnms, v128db, v128db, 0, 3>; - def WFNMSDB : TernaryVRRe<"wfnmsdb", 0xE79E, fnms, v64db, v64db, 8, 3>; - def VFNMSSB : TernaryVRRe<"vfnmssb", 0xE79E, fnms, v128sb, v128sb, 0, 2>; - def WFNMSSB : TernaryVRRe<"wfnmssb", 0xE79E, fnms, v32sb, v32sb, 8, 2>; - def WFNMSXB : TernaryVRRe<"wfnmsxb", 0xE79E, fnms, v128xb, v128xb, 8, 4>; + def VFNMSDB : TernaryVRRe<"vfnmsdb", 0xE79E, any_fnms, v128db, v128db, 0, 3>; + def WFNMSDB : TernaryVRRe<"wfnmsdb", 0xE79E, any_fnms, v64db, v64db, 8, 3>; + def VFNMSSB : TernaryVRRe<"vfnmssb", 0xE79E, any_fnms, v128sb, v128sb, 0, 2>; + def WFNMSSB : TernaryVRRe<"wfnmssb", 0xE79E, any_fnms, v32sb, v32sb, 8, 2>; + def WFNMSXB : TernaryVRRe<"wfnmsxb", 0xE79E, any_fnms, v128xb, v128xb, 8, 4>; } // Perform sign operation. @@ -1164,23 +1320,27 @@ let Predicates = [FeatureVector] in { } // Square root. - def VFSQ : UnaryVRRaFloatGeneric<"vfsq", 0xE7CE>; - def VFSQDB : UnaryVRRa<"vfsqdb", 0xE7CE, fsqrt, v128db, v128db, 3, 0>; - def WFSQDB : UnaryVRRa<"wfsqdb", 0xE7CE, fsqrt, v64db, v64db, 3, 8>; - let Predicates = [FeatureVectorEnhancements1] in { - def VFSQSB : UnaryVRRa<"vfsqsb", 0xE7CE, fsqrt, v128sb, v128sb, 2, 0>; - def WFSQSB : UnaryVRRa<"wfsqsb", 0xE7CE, fsqrt, v32sb, v32sb, 2, 8>; - def WFSQXB : UnaryVRRa<"wfsqxb", 0xE7CE, fsqrt, v128xb, v128xb, 4, 8>; + let Uses = [FPC], mayRaiseFPException = 1 in { + def VFSQ : UnaryVRRaFloatGeneric<"vfsq", 0xE7CE>; + def VFSQDB : UnaryVRRa<"vfsqdb", 0xE7CE, any_fsqrt, v128db, v128db, 3, 0>; + def WFSQDB : UnaryVRRa<"wfsqdb", 0xE7CE, any_fsqrt, v64db, v64db, 3, 8>; + let Predicates = [FeatureVectorEnhancements1] in { + def VFSQSB : UnaryVRRa<"vfsqsb", 0xE7CE, any_fsqrt, v128sb, v128sb, 2, 0>; + def WFSQSB : UnaryVRRa<"wfsqsb", 0xE7CE, any_fsqrt, v32sb, v32sb, 2, 8>; + def WFSQXB : UnaryVRRa<"wfsqxb", 0xE7CE, any_fsqrt, v128xb, v128xb, 4, 8>; + } } // Subtract. - def VFS : BinaryVRRcFloatGeneric<"vfs", 0xE7E2>; - def VFSDB : BinaryVRRc<"vfsdb", 0xE7E2, fsub, v128db, v128db, 3, 0>; - def WFSDB : BinaryVRRc<"wfsdb", 0xE7E2, fsub, v64db, v64db, 3, 8>; - let Predicates = [FeatureVectorEnhancements1] in { - def VFSSB : BinaryVRRc<"vfssb", 0xE7E2, fsub, v128sb, v128sb, 2, 0>; - def WFSSB : BinaryVRRc<"wfssb", 0xE7E2, fsub, v32sb, v32sb, 2, 8>; - def WFSXB : BinaryVRRc<"wfsxb", 0xE7E2, fsub, v128xb, v128xb, 4, 8>; + let Uses = [FPC], mayRaiseFPException = 1 in { + def VFS : BinaryVRRcFloatGeneric<"vfs", 0xE7E2>; + def VFSDB : BinaryVRRc<"vfsdb", 0xE7E2, any_fsub, v128db, v128db, 3, 0>; + def WFSDB : BinaryVRRc<"wfsdb", 0xE7E2, any_fsub, v64db, v64db, 3, 8>; + let Predicates = [FeatureVectorEnhancements1] in { + def VFSSB : BinaryVRRc<"vfssb", 0xE7E2, any_fsub, v128sb, v128sb, 2, 0>; + def WFSSB : BinaryVRRc<"wfssb", 0xE7E2, any_fsub, v32sb, v32sb, 2, 8>; + def WFSXB : BinaryVRRc<"wfsxb", 0xE7E2, any_fsub, v128xb, v128xb, 4, 8>; + } } // Test data class immediate. @@ -1202,7 +1362,7 @@ let Predicates = [FeatureVector] in { let Predicates = [FeatureVector] in { // Compare scalar. - let Defs = [CC] in { + let Uses = [FPC], mayRaiseFPException = 1, Defs = [CC] in { def WFC : CompareVRRaFloatGeneric<"wfc", 0xE7CB>; def WFCDB : CompareVRRa<"wfcdb", 0xE7CB, z_fcmp, v64db, 3>; let Predicates = [FeatureVectorEnhancements1] in { @@ -1212,7 +1372,7 @@ let Predicates = [FeatureVector] in { } // Compare and signal scalar. - let Defs = [CC] in { + let Uses = [FPC], mayRaiseFPException = 1, Defs = [CC] in { def WFK : CompareVRRaFloatGeneric<"wfk", 0xE7CA>; def WFKDB : CompareVRRa<"wfkdb", 0xE7CA, null_frag, v64db, 3>; let Predicates = [FeatureVectorEnhancements1] in { @@ -1222,22 +1382,25 @@ let Predicates = [FeatureVector] in { } // Compare equal. - def VFCE : BinaryVRRcSPairFloatGeneric<"vfce", 0xE7E8>; - defm VFCEDB : BinaryVRRcSPair<"vfcedb", 0xE7E8, z_vfcmpe, z_vfcmpes, - v128g, v128db, 3, 0>; - defm WFCEDB : BinaryVRRcSPair<"wfcedb", 0xE7E8, null_frag, null_frag, - v64g, v64db, 3, 8>; - let Predicates = [FeatureVectorEnhancements1] in { - defm VFCESB : BinaryVRRcSPair<"vfcesb", 0xE7E8, z_vfcmpe, z_vfcmpes, - v128f, v128sb, 2, 0>; - defm WFCESB : BinaryVRRcSPair<"wfcesb", 0xE7E8, null_frag, null_frag, - v32f, v32sb, 2, 8>; - defm WFCEXB : BinaryVRRcSPair<"wfcexb", 0xE7E8, null_frag, null_frag, - v128q, v128xb, 4, 8>; + let Uses = [FPC], mayRaiseFPException = 1 in { + def VFCE : BinaryVRRcSPairFloatGeneric<"vfce", 0xE7E8>; + defm VFCEDB : BinaryVRRcSPair<"vfcedb", 0xE7E8, z_vfcmpe, z_vfcmpes, + v128g, v128db, 3, 0>; + defm WFCEDB : BinaryVRRcSPair<"wfcedb", 0xE7E8, null_frag, null_frag, + v64g, v64db, 3, 8>; + let Predicates = [FeatureVectorEnhancements1] in { + defm VFCESB : BinaryVRRcSPair<"vfcesb", 0xE7E8, z_vfcmpe, z_vfcmpes, + v128f, v128sb, 2, 0>; + defm WFCESB : BinaryVRRcSPair<"wfcesb", 0xE7E8, null_frag, null_frag, + v32f, v32sb, 2, 8>; + defm WFCEXB : BinaryVRRcSPair<"wfcexb", 0xE7E8, null_frag, null_frag, + v128q, v128xb, 4, 8>; + } } // Compare and signal equal. - let Predicates = [FeatureVectorEnhancements1] in { + let Uses = [FPC], mayRaiseFPException = 1, + Predicates = [FeatureVectorEnhancements1] in { defm VFKEDB : BinaryVRRcSPair<"vfkedb", 0xE7E8, null_frag, null_frag, v128g, v128db, 3, 4>; defm WFKEDB : BinaryVRRcSPair<"wfkedb", 0xE7E8, null_frag, null_frag, @@ -1251,22 +1414,25 @@ let Predicates = [FeatureVector] in { } // Compare high. - def VFCH : BinaryVRRcSPairFloatGeneric<"vfch", 0xE7EB>; - defm VFCHDB : BinaryVRRcSPair<"vfchdb", 0xE7EB, z_vfcmph, z_vfcmphs, - v128g, v128db, 3, 0>; - defm WFCHDB : BinaryVRRcSPair<"wfchdb", 0xE7EB, null_frag, null_frag, - v64g, v64db, 3, 8>; - let Predicates = [FeatureVectorEnhancements1] in { - defm VFCHSB : BinaryVRRcSPair<"vfchsb", 0xE7EB, z_vfcmph, z_vfcmphs, - v128f, v128sb, 2, 0>; - defm WFCHSB : BinaryVRRcSPair<"wfchsb", 0xE7EB, null_frag, null_frag, - v32f, v32sb, 2, 8>; - defm WFCHXB : BinaryVRRcSPair<"wfchxb", 0xE7EB, null_frag, null_frag, - v128q, v128xb, 4, 8>; + let Uses = [FPC], mayRaiseFPException = 1 in { + def VFCH : BinaryVRRcSPairFloatGeneric<"vfch", 0xE7EB>; + defm VFCHDB : BinaryVRRcSPair<"vfchdb", 0xE7EB, z_vfcmph, z_vfcmphs, + v128g, v128db, 3, 0>; + defm WFCHDB : BinaryVRRcSPair<"wfchdb", 0xE7EB, null_frag, null_frag, + v64g, v64db, 3, 8>; + let Predicates = [FeatureVectorEnhancements1] in { + defm VFCHSB : BinaryVRRcSPair<"vfchsb", 0xE7EB, z_vfcmph, z_vfcmphs, + v128f, v128sb, 2, 0>; + defm WFCHSB : BinaryVRRcSPair<"wfchsb", 0xE7EB, null_frag, null_frag, + v32f, v32sb, 2, 8>; + defm WFCHXB : BinaryVRRcSPair<"wfchxb", 0xE7EB, null_frag, null_frag, + v128q, v128xb, 4, 8>; + } } // Compare and signal high. - let Predicates = [FeatureVectorEnhancements1] in { + let Uses = [FPC], mayRaiseFPException = 1, + Predicates = [FeatureVectorEnhancements1] in { defm VFKHDB : BinaryVRRcSPair<"vfkhdb", 0xE7EB, null_frag, null_frag, v128g, v128db, 3, 4>; defm WFKHDB : BinaryVRRcSPair<"wfkhdb", 0xE7EB, null_frag, null_frag, @@ -1280,22 +1446,25 @@ let Predicates = [FeatureVector] in { } // Compare high or equal. - def VFCHE : BinaryVRRcSPairFloatGeneric<"vfche", 0xE7EA>; - defm VFCHEDB : BinaryVRRcSPair<"vfchedb", 0xE7EA, z_vfcmphe, z_vfcmphes, - v128g, v128db, 3, 0>; - defm WFCHEDB : BinaryVRRcSPair<"wfchedb", 0xE7EA, null_frag, null_frag, - v64g, v64db, 3, 8>; - let Predicates = [FeatureVectorEnhancements1] in { - defm VFCHESB : BinaryVRRcSPair<"vfchesb", 0xE7EA, z_vfcmphe, z_vfcmphes, - v128f, v128sb, 2, 0>; - defm WFCHESB : BinaryVRRcSPair<"wfchesb", 0xE7EA, null_frag, null_frag, - v32f, v32sb, 2, 8>; - defm WFCHEXB : BinaryVRRcSPair<"wfchexb", 0xE7EA, null_frag, null_frag, - v128q, v128xb, 4, 8>; + let Uses = [FPC], mayRaiseFPException = 1 in { + def VFCHE : BinaryVRRcSPairFloatGeneric<"vfche", 0xE7EA>; + defm VFCHEDB : BinaryVRRcSPair<"vfchedb", 0xE7EA, z_vfcmphe, z_vfcmphes, + v128g, v128db, 3, 0>; + defm WFCHEDB : BinaryVRRcSPair<"wfchedb", 0xE7EA, null_frag, null_frag, + v64g, v64db, 3, 8>; + let Predicates = [FeatureVectorEnhancements1] in { + defm VFCHESB : BinaryVRRcSPair<"vfchesb", 0xE7EA, z_vfcmphe, z_vfcmphes, + v128f, v128sb, 2, 0>; + defm WFCHESB : BinaryVRRcSPair<"wfchesb", 0xE7EA, null_frag, null_frag, + v32f, v32sb, 2, 8>; + defm WFCHEXB : BinaryVRRcSPair<"wfchexb", 0xE7EA, null_frag, null_frag, + v128q, v128xb, 4, 8>; + } } // Compare and signal high or equal. - let Predicates = [FeatureVectorEnhancements1] in { + let Uses = [FPC], mayRaiseFPException = 1, + Predicates = [FeatureVectorEnhancements1] in { defm VFKHEDB : BinaryVRRcSPair<"vfkhedb", 0xE7EA, null_frag, null_frag, v128g, v128db, 3, 4>; defm WFKHEDB : BinaryVRRcSPair<"wfkhedb", 0xE7EA, null_frag, null_frag, @@ -1520,6 +1689,24 @@ let Predicates = [FeatureVector] in { z_vstrcz_cc, v128f, v128f, 2, 2>; } +let Predicates = [FeatureVectorEnhancements2] in { + defm VSTRS : TernaryExtraVRRdGeneric<"vstrs", 0xE78B>; + defm VSTRSB : TernaryExtraVRRd<"vstrsb", 0xE78B, + z_vstrs_cc, v128b, v128b, 0>; + defm VSTRSH : TernaryExtraVRRd<"vstrsh", 0xE78B, + z_vstrs_cc, v128b, v128h, 1>; + defm VSTRSF : TernaryExtraVRRd<"vstrsf", 0xE78B, + z_vstrs_cc, v128b, v128f, 2>; + let Defs = [CC] in { + def VSTRSZB : TernaryVRRd<"vstrszb", 0xE78B, + z_vstrsz_cc, v128b, v128b, 0, 2>; + def VSTRSZH : TernaryVRRd<"vstrszh", 0xE78B, + z_vstrsz_cc, v128b, v128h, 1, 2>; + def VSTRSZF : TernaryVRRd<"vstrszf", 0xE78B, + z_vstrsz_cc, v128b, v128f, 2, 2>; + } +} + //===----------------------------------------------------------------------===// // Packed-decimal instructions //===----------------------------------------------------------------------===// @@ -1531,6 +1718,10 @@ let Predicates = [FeatureVectorPackedDecimal] in { def VUPKZ : StoreLengthVSI<"vupkz", 0xE63C, null_frag, 0>; let Defs = [CC] in { + let Predicates = [FeatureVectorPackedDecimalEnhancement] in { + def VCVBOpt : TernaryVRRi<"vcvb", 0xE650, GR32>; + def VCVBGOpt : TernaryVRRi<"vcvbg", 0xE652, GR64>; + } def VCVB : BinaryVRRi<"vcvb", 0xE650, GR32>; def VCVBG : BinaryVRRi<"vcvbg", 0xE652, GR64>; def VCVD : TernaryVRIi<"vcvd", 0xE658, GR32>; diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZLDCleanup.cpp b/contrib/llvm/lib/Target/SystemZ/SystemZLDCleanup.cpp index f532e9e23b1f..06d893d043e9 100644 --- a/contrib/llvm/lib/Target/SystemZ/SystemZLDCleanup.cpp +++ b/contrib/llvm/lib/Target/SystemZ/SystemZLDCleanup.cpp @@ -1,9 +1,8 @@ //===-- SystemZLDCleanup.cpp - Clean up local-dynamic TLS accesses --------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZLongBranch.cpp b/contrib/llvm/lib/Target/SystemZ/SystemZLongBranch.cpp index 802962bd4db0..95d7e22dec32 100644 --- a/contrib/llvm/lib/Target/SystemZ/SystemZLongBranch.cpp +++ b/contrib/llvm/lib/Target/SystemZ/SystemZLongBranch.cpp @@ -1,9 +1,8 @@ //===-- SystemZLongBranch.cpp - Branch lengthening for SystemZ ------------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZMCInstLower.cpp b/contrib/llvm/lib/Target/SystemZ/SystemZMCInstLower.cpp index 2655e4866b20..ef39f80a94ef 100644 --- a/contrib/llvm/lib/Target/SystemZ/SystemZMCInstLower.cpp +++ b/contrib/llvm/lib/Target/SystemZ/SystemZMCInstLower.cpp @@ -1,9 +1,8 @@ //===-- SystemZMCInstLower.cpp - Lower MachineInstr to MCInst -------------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZMCInstLower.h b/contrib/llvm/lib/Target/SystemZ/SystemZMCInstLower.h index 7173cfa42959..14ad06488312 100644 --- a/contrib/llvm/lib/Target/SystemZ/SystemZMCInstLower.h +++ b/contrib/llvm/lib/Target/SystemZ/SystemZMCInstLower.h @@ -1,9 +1,8 @@ //===-- SystemZMCInstLower.h - Lower MachineInstr to MCInst ----*- C++ -*--===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZMachineFunctionInfo.cpp b/contrib/llvm/lib/Target/SystemZ/SystemZMachineFunctionInfo.cpp index 1a7c0d7f687a..9b6aa3593ce0 100644 --- a/contrib/llvm/lib/Target/SystemZ/SystemZMachineFunctionInfo.cpp +++ b/contrib/llvm/lib/Target/SystemZ/SystemZMachineFunctionInfo.cpp @@ -1,9 +1,8 @@ //=== SystemZMachineFunctionInfo.cpp - SystemZ machine function info ------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZMachineFunctionInfo.h b/contrib/llvm/lib/Target/SystemZ/SystemZMachineFunctionInfo.h index 4f64f4c65f1d..9eec3f37bc28 100644 --- a/contrib/llvm/lib/Target/SystemZ/SystemZMachineFunctionInfo.h +++ b/contrib/llvm/lib/Target/SystemZ/SystemZMachineFunctionInfo.h @@ -1,9 +1,8 @@ //=== SystemZMachineFunctionInfo.h - SystemZ machine function info -*- C++ -*-// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZMachineScheduler.cpp b/contrib/llvm/lib/Target/SystemZ/SystemZMachineScheduler.cpp index 98e761ef87fe..0becfaa1d49c 100644 --- a/contrib/llvm/lib/Target/SystemZ/SystemZMachineScheduler.cpp +++ b/contrib/llvm/lib/Target/SystemZ/SystemZMachineScheduler.cpp @@ -1,9 +1,8 @@ //-- SystemZMachineScheduler.cpp - SystemZ Scheduler Interface -*- C++ -*---==// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZMachineScheduler.h b/contrib/llvm/lib/Target/SystemZ/SystemZMachineScheduler.h index ab820e5d3e63..0d5cc2e03e8d 100644 --- a/contrib/llvm/lib/Target/SystemZ/SystemZMachineScheduler.h +++ b/contrib/llvm/lib/Target/SystemZ/SystemZMachineScheduler.h @@ -1,9 +1,8 @@ //==- SystemZMachineScheduler.h - SystemZ Scheduler Interface ----*- C++ -*-==// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZOperands.td b/contrib/llvm/lib/Target/SystemZ/SystemZOperands.td index 7bf32bf19a4a..56632e1529a2 100644 --- a/contrib/llvm/lib/Target/SystemZ/SystemZOperands.td +++ b/contrib/llvm/lib/Target/SystemZ/SystemZOperands.td @@ -1,9 +1,8 @@ //===-- SystemZOperands.td - SystemZ instruction operands ----*- tblgen-*--===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// @@ -189,6 +188,17 @@ def HF32 : SDNodeXForm<imm, [{ return CurDAG->getTargetConstant(Value, SDLoc(N), MVT::i64); }]>; +// Negated variants. +def NEGLH16 : SDNodeXForm<imm, [{ + uint64_t Value = (-N->getZExtValue() & 0x00000000FFFF0000ULL) >> 16; + return CurDAG->getTargetConstant(Value, SDLoc(N), MVT::i64); +}]>; + +def NEGLF32 : SDNodeXForm<imm, [{ + uint64_t Value = -N->getZExtValue() & 0x00000000FFFFFFFFULL; + return CurDAG->getTargetConstant(Value, SDLoc(N), MVT::i64); +}]>; + // Truncate an immediate to a 8-bit signed quantity. def SIMM8 : SDNodeXForm<imm, [{ return CurDAG->getTargetConstant(int8_t(N->getZExtValue()), SDLoc(N), @@ -431,6 +441,15 @@ def imm64hf32c : Immediate<i64, [{ return SystemZ::isImmHF(uint64_t(~N->getZExtValue())); }], HF32, "U32Imm">; +// Negated immediates that fit LF32 or LH16. +def imm64lh16n : Immediate<i64, [{ + return SystemZ::isImmLH(uint64_t(-N->getZExtValue())); +}], NEGLH16, "U16Imm">; + +def imm64lf32n : Immediate<i64, [{ + return SystemZ::isImmLF(uint64_t(-N->getZExtValue())); +}], NEGLF32, "U32Imm">; + // Short immediates. def imm64sx8 : Immediate<i64, [{ return isInt<8>(N->getSExtValue()); diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZOperators.td b/contrib/llvm/lib/Target/SystemZ/SystemZOperators.td index 626675bfb70c..15bd12bc98a4 100644 --- a/contrib/llvm/lib/Target/SystemZ/SystemZOperators.td +++ b/contrib/llvm/lib/Target/SystemZ/SystemZOperators.td @@ -1,9 +1,8 @@ //===-- SystemZOperators.td - SystemZ-specific operators ------*- tblgen-*-===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// @@ -192,6 +191,12 @@ def SDT_ZVecTernary : SDTypeProfile<1, 3, SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, SDTCisSameAs<0, 3>]>; +def SDT_ZVecTernaryConvCC : SDTypeProfile<2, 3, + [SDTCisVec<0>, + SDTCisVT<1, i32>, + SDTCisVec<2>, + SDTCisSameAs<2, 3>, + SDTCisSameAs<0, 4>]>; def SDT_ZVecTernaryInt : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0, 1>, @@ -279,6 +284,10 @@ def z_loadbswap : SDNode<"SystemZISD::LRV", SDTLoad, [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>; def z_storebswap : SDNode<"SystemZISD::STRV", SDTStore, [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>; +def z_loadeswap : SDNode<"SystemZISD::VLER", SDTLoad, + [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>; +def z_storeeswap : SDNode<"SystemZISD::VSTER", SDTStore, + [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>; def z_tdc : SDNode<"SystemZISD::TDC", SDT_ZTest>; @@ -338,6 +347,10 @@ def z_vstrc_cc : SDNode<"SystemZISD::VSTRC_CC", SDT_ZVecQuaternaryIntCC>; def z_vstrcz_cc : SDNode<"SystemZISD::VSTRCZ_CC", SDT_ZVecQuaternaryIntCC>; +def z_vstrs_cc : SDNode<"SystemZISD::VSTRS_CC", + SDT_ZVecTernaryConvCC>; +def z_vstrsz_cc : SDNode<"SystemZISD::VSTRSZ_CC", + SDT_ZVecTernaryConvCC>; def z_vftci : SDNode<"SystemZISD::VFTCI", SDT_ZVecBinaryConvIntCC>; class AtomicWOp<string name, SDTypeProfile profile = SDT_ZAtomicLoadBinaryW> @@ -662,22 +675,34 @@ def z_usub : PatFrags<(ops node:$src1, node:$src2), [(z_usubo node:$src1, node:$src2), (sub node:$src1, node:$src2)]>; +// Combined logical operations. +def andc : PatFrag<(ops node:$src1, node:$src2), + (and node:$src1, (not node:$src2))>; +def orc : PatFrag<(ops node:$src1, node:$src2), + (or node:$src1, (not node:$src2))>; +def nand : PatFrag<(ops node:$src1, node:$src2), + (not (and node:$src1, node:$src2))>; +def nor : PatFrag<(ops node:$src1, node:$src2), + (not (or node:$src1, node:$src2))>; +def nxor : PatFrag<(ops node:$src1, node:$src2), + (not (xor node:$src1, node:$src2))>; + // Fused multiply-subtract, using the natural operand order. -def fms : PatFrag<(ops node:$src1, node:$src2, node:$src3), - (fma node:$src1, node:$src2, (fneg node:$src3))>; +def any_fms : PatFrag<(ops node:$src1, node:$src2, node:$src3), + (any_fma node:$src1, node:$src2, (fneg node:$src3))>; // Fused multiply-add and multiply-subtract, but with the order of the // operands matching SystemZ's MA and MS instructions. -def z_fma : PatFrag<(ops node:$src1, node:$src2, node:$src3), - (fma node:$src2, node:$src3, node:$src1)>; -def z_fms : PatFrag<(ops node:$src1, node:$src2, node:$src3), - (fma node:$src2, node:$src3, (fneg node:$src1))>; +def z_any_fma : PatFrag<(ops node:$src1, node:$src2, node:$src3), + (any_fma node:$src2, node:$src3, node:$src1)>; +def z_any_fms : PatFrag<(ops node:$src1, node:$src2, node:$src3), + (any_fma node:$src2, node:$src3, (fneg node:$src1))>; // Negative fused multiply-add and multiply-subtract. -def fnma : PatFrag<(ops node:$src1, node:$src2, node:$src3), - (fneg (fma node:$src1, node:$src2, node:$src3))>; -def fnms : PatFrag<(ops node:$src1, node:$src2, node:$src3), - (fneg (fms node:$src1, node:$src2, node:$src3))>; +def any_fnma : PatFrag<(ops node:$src1, node:$src2, node:$src3), + (fneg (any_fma node:$src1, node:$src2, node:$src3))>; +def any_fnms : PatFrag<(ops node:$src1, node:$src2, node:$src3), + (fneg (any_fms node:$src1, node:$src2, node:$src3))>; // Floating-point negative absolute. def fnabs : PatFrag<(ops node:$ptr), (fneg (fabs node:$ptr))>; @@ -709,9 +734,9 @@ class shiftop<SDPatternOperator operator> [(operator node:$val, node:$count), (operator node:$val, (and node:$count, imm32bottom6set))]>; -// Vector representation of all-zeros and all-ones. -def z_vzero : PatFrag<(ops), (bitconvert (v16i8 (z_byte_mask (i32 0))))>; -def z_vones : PatFrag<(ops), (bitconvert (v16i8 (z_byte_mask (i32 65535))))>; +def imm32mod64 : PatLeaf<(i32 imm), [{ + return (N->getZExtValue() % 64 == 0); +}]>; // Load a scalar and replicate it in all elements of a vector. class z_replicate_load<ValueType scalartype, SDPatternOperator load> @@ -723,6 +748,10 @@ def z_replicate_loadi32 : z_replicate_load<i32, load>; def z_replicate_loadi64 : z_replicate_load<i64, load>; def z_replicate_loadf32 : z_replicate_load<f32, load>; def z_replicate_loadf64 : z_replicate_load<f64, load>; +// Byte-swapped replicated vector element loads. +def z_replicate_loadbswapi16 : z_replicate_load<i32, z_loadbswap16>; +def z_replicate_loadbswapi32 : z_replicate_load<i32, z_loadbswap32>; +def z_replicate_loadbswapi64 : z_replicate_load<i64, z_loadbswap64>; // Load a scalar and insert it into a single element of a vector. class z_vle<ValueType scalartype, SDPatternOperator load> @@ -735,18 +764,22 @@ def z_vlei32 : z_vle<i32, load>; def z_vlei64 : z_vle<i64, load>; def z_vlef32 : z_vle<f32, load>; def z_vlef64 : z_vle<f64, load>; +// Byte-swapped vector element loads. +def z_vlebri16 : z_vle<i32, z_loadbswap16>; +def z_vlebri32 : z_vle<i32, z_loadbswap32>; +def z_vlebri64 : z_vle<i64, z_loadbswap64>; // Load a scalar and insert it into the low element of the high i64 of a // zeroed vector. class z_vllez<ValueType scalartype, SDPatternOperator load, int index> : PatFrag<(ops node:$addr), - (z_vector_insert (z_vzero), + (z_vector_insert immAllZerosV, (scalartype (load node:$addr)), (i32 index))>; def z_vllezi8 : z_vllez<i32, anyextloadi8, 7>; def z_vllezi16 : z_vllez<i32, anyextloadi16, 3>; def z_vllezi32 : z_vllez<i32, load, 1>; def z_vllezi64 : PatFrags<(ops node:$addr), - [(z_vector_insert (z_vzero), + [(z_vector_insert immAllZerosV, (i64 (load node:$addr)), (i32 0)), (z_join_dwords (i64 (load node:$addr)), (i64 0))]>; // We use high merges to form a v4f32 from four f32s. Propagating zero @@ -759,11 +792,12 @@ def z_vllezf32 : PatFrag<(ops node:$addr), (bitconvert (v4f32 (scalar_to_vector (f32 (load node:$addr)))))))), - (v2i64 (z_vzero)))>; + (v2i64 + (bitconvert (v4f32 immAllZerosV))))>; def z_vllezf64 : PatFrag<(ops node:$addr), (z_merge_high (v2f64 (scalar_to_vector (f64 (load node:$addr)))), - (z_vzero))>; + immAllZerosV)>; // Similarly for the high element of a zeroed vector. def z_vllezli32 : z_vllez<i32, load, 0>; @@ -774,8 +808,21 @@ def z_vllezlf32 : PatFrag<(ops node:$addr), (z_merge_high (v4f32 (scalar_to_vector (f32 (load node:$addr)))), - (v4f32 (z_vzero))))), - (v2i64 (z_vzero)))>; + (v4f32 immAllZerosV)))), + (v2i64 + (bitconvert (v4f32 immAllZerosV))))>; + +// Byte-swapped variants. +def z_vllebrzi16 : z_vllez<i32, z_loadbswap16, 3>; +def z_vllebrzi32 : z_vllez<i32, z_loadbswap32, 1>; +def z_vllebrzli32 : z_vllez<i32, z_loadbswap32, 0>; +def z_vllebrzi64 : PatFrags<(ops node:$addr), + [(z_vector_insert immAllZerosV, + (i64 (z_loadbswap64 node:$addr)), + (i32 0)), + (z_join_dwords (i64 (z_loadbswap64 node:$addr)), + (i64 0))]>; + // Store one element of a vector. class z_vste<ValueType scalartype, SDPatternOperator store> @@ -788,18 +835,22 @@ def z_vstei32 : z_vste<i32, store>; def z_vstei64 : z_vste<i64, store>; def z_vstef32 : z_vste<f32, store>; def z_vstef64 : z_vste<f64, store>; +// Byte-swapped vector element stores. +def z_vstebri16 : z_vste<i32, z_storebswap16>; +def z_vstebri32 : z_vste<i32, z_storebswap32>; +def z_vstebri64 : z_vste<i64, z_storebswap64>; // Arithmetic negation on vectors. -def z_vneg : PatFrag<(ops node:$x), (sub (z_vzero), node:$x)>; +def z_vneg : PatFrag<(ops node:$x), (sub immAllZerosV, node:$x)>; // Bitwise negation on vectors. -def z_vnot : PatFrag<(ops node:$x), (xor node:$x, (z_vones))>; +def z_vnot : PatFrag<(ops node:$x), (xor node:$x, immAllOnesV)>; // Signed "integer greater than zero" on vectors. -def z_vicmph_zero : PatFrag<(ops node:$x), (z_vicmph node:$x, (z_vzero))>; +def z_vicmph_zero : PatFrag<(ops node:$x), (z_vicmph node:$x, immAllZerosV)>; // Signed "integer less than zero" on vectors. -def z_vicmpl_zero : PatFrag<(ops node:$x), (z_vicmph (z_vzero), node:$x)>; +def z_vicmpl_zero : PatFrag<(ops node:$x), (z_vicmph immAllZerosV, node:$x)>; // Integer absolute on vectors. class z_viabs<int shift> diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZPatterns.td b/contrib/llvm/lib/Target/SystemZ/SystemZPatterns.td index 152521fb66a8..beaf4de285a3 100644 --- a/contrib/llvm/lib/Target/SystemZ/SystemZPatterns.td +++ b/contrib/llvm/lib/Target/SystemZ/SystemZPatterns.td @@ -1,9 +1,8 @@ //===-- SystemZPatterns.td - SystemZ-specific pattern rules ---*- tblgen-*-===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZPostRewrite.cpp b/contrib/llvm/lib/Target/SystemZ/SystemZPostRewrite.cpp new file mode 100644 index 000000000000..8e4060eac74c --- /dev/null +++ b/contrib/llvm/lib/Target/SystemZ/SystemZPostRewrite.cpp @@ -0,0 +1,124 @@ +//==---- SystemZPostRewrite.cpp - Select pseudos after RegAlloc ---*- C++ -*-=// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file contains a pass that is run immediately after VirtRegRewriter +// but before MachineCopyPropagation. The purpose is to lower pseudos to +// target instructions before any later pass might substitute a register for +// another. +// +//===----------------------------------------------------------------------===// + +#include "SystemZ.h" +#include "SystemZInstrInfo.h" +#include "SystemZSubtarget.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +using namespace llvm; + +#define SYSTEMZ_POSTREWRITE_NAME "SystemZ Post Rewrite pass" + +#define DEBUG_TYPE "systemz-postrewrite" +STATISTIC(MemFoldCopies, "Number of copies inserted before folded mem ops."); + +namespace llvm { + void initializeSystemZPostRewritePass(PassRegistry&); +} + +namespace { + +class SystemZPostRewrite : public MachineFunctionPass { +public: + static char ID; + SystemZPostRewrite() : MachineFunctionPass(ID) { + initializeSystemZPostRewritePass(*PassRegistry::getPassRegistry()); + } + + const SystemZInstrInfo *TII; + + bool runOnMachineFunction(MachineFunction &Fn) override; + + StringRef getPassName() const override { return SYSTEMZ_POSTREWRITE_NAME; } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.setPreservesAll(); + MachineFunctionPass::getAnalysisUsage(AU); + } + +private: + bool selectMI(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, + MachineBasicBlock::iterator &NextMBBI); + bool selectMBB(MachineBasicBlock &MBB); +}; + +char SystemZPostRewrite::ID = 0; + +} // end anonymous namespace + +INITIALIZE_PASS(SystemZPostRewrite, "systemz-post-rewrite", + SYSTEMZ_POSTREWRITE_NAME, false, false) + +/// Returns an instance of the Post Rewrite pass. +FunctionPass *llvm::createSystemZPostRewritePass(SystemZTargetMachine &TM) { + return new SystemZPostRewrite(); +} + +/// If MBBI references a pseudo instruction that should be selected here, +/// do it and return true. Otherwise return false. +bool SystemZPostRewrite::selectMI(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + MachineBasicBlock::iterator &NextMBBI) { + MachineInstr &MI = *MBBI; + unsigned Opcode = MI.getOpcode(); + + // Note: If this could be done during regalloc in foldMemoryOperandImpl() + // while also updating the LiveIntervals, there would be no need for the + // MemFoldPseudo to begin with. + int TargetMemOpcode = SystemZ::getTargetMemOpcode(Opcode); + if (TargetMemOpcode != -1) { + MI.setDesc(TII->get(TargetMemOpcode)); + MI.tieOperands(0, 1); + unsigned DstReg = MI.getOperand(0).getReg(); + MachineOperand &SrcMO = MI.getOperand(1); + if (DstReg != SrcMO.getReg()) { + BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(SystemZ::COPY), DstReg) + .addReg(SrcMO.getReg()); + SrcMO.setReg(DstReg); + MemFoldCopies++; + } + return true; + } + + return false; +} + +/// Iterate over the instructions in basic block MBB and select any +/// pseudo instructions. Return true if anything was modified. +bool SystemZPostRewrite::selectMBB(MachineBasicBlock &MBB) { + bool Modified = false; + + MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end(); + while (MBBI != E) { + MachineBasicBlock::iterator NMBBI = std::next(MBBI); + Modified |= selectMI(MBB, MBBI, NMBBI); + MBBI = NMBBI; + } + + return Modified; +} + +bool SystemZPostRewrite::runOnMachineFunction(MachineFunction &MF) { + TII = static_cast<const SystemZInstrInfo *>(MF.getSubtarget().getInstrInfo()); + + bool Modified = false; + for (auto &MBB : MF) + Modified |= selectMBB(MBB); + + return Modified; +} + diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZProcessors.td b/contrib/llvm/lib/Target/SystemZ/SystemZProcessors.td index 0dca4582dc0d..b27c25beb58c 100644 --- a/contrib/llvm/lib/Target/SystemZ/SystemZProcessors.td +++ b/contrib/llvm/lib/Target/SystemZ/SystemZProcessors.td @@ -1,9 +1,8 @@ //===-- SystemZ.td - SystemZ processors and features ---------*- tblgen -*-===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -36,3 +35,5 @@ def : ProcessorModel<"z13", Z13Model, Arch11SupportedFeatures.List>; def : ProcessorModel<"arch12", Z14Model, Arch12SupportedFeatures.List>; def : ProcessorModel<"z14", Z14Model, Arch12SupportedFeatures.List>; +def : ProcessorModel<"arch13", Arch13Model, Arch13SupportedFeatures.List>; + diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZRegisterInfo.cpp b/contrib/llvm/lib/Target/SystemZ/SystemZRegisterInfo.cpp index e9f9188048da..e7cd6871dbb4 100644 --- a/contrib/llvm/lib/Target/SystemZ/SystemZRegisterInfo.cpp +++ b/contrib/llvm/lib/Target/SystemZ/SystemZRegisterInfo.cpp @@ -1,9 +1,8 @@ //===-- SystemZRegisterInfo.cpp - SystemZ register information ------------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// @@ -54,6 +53,26 @@ static const TargetRegisterClass *getRC32(MachineOperand &MO, return RC; } +// Pass the registers of RC as hints while making sure that if any of these +// registers are copy hints (and therefore already in Hints), hint them +// first. +static void addHints(ArrayRef<MCPhysReg> Order, + SmallVectorImpl<MCPhysReg> &Hints, + const TargetRegisterClass *RC, + const MachineRegisterInfo *MRI) { + SmallSet<unsigned, 4> CopyHints; + CopyHints.insert(Hints.begin(), Hints.end()); + Hints.clear(); + for (MCPhysReg Reg : Order) + if (CopyHints.count(Reg) && + RC->contains(Reg) && !MRI->isReserved(Reg)) + Hints.push_back(Reg); + for (MCPhysReg Reg : Order) + if (!CopyHints.count(Reg) && + RC->contains(Reg) && !MRI->isReserved(Reg)) + Hints.push_back(Reg); +} + bool SystemZRegisterInfo::getRegAllocationHints(unsigned VirtReg, ArrayRef<MCPhysReg> Order, @@ -62,7 +81,8 @@ SystemZRegisterInfo::getRegAllocationHints(unsigned VirtReg, const VirtRegMap *VRM, const LiveRegMatrix *Matrix) const { const MachineRegisterInfo *MRI = &MF.getRegInfo(); - const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); + const SystemZSubtarget &Subtarget = MF.getSubtarget<SystemZSubtarget>(); + const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo(); bool BaseImplRetVal = TargetRegisterInfo::getRegAllocationHints( VirtReg, Order, Hints, MF, VRM, Matrix); @@ -76,31 +96,23 @@ SystemZRegisterInfo::getRegAllocationHints(unsigned VirtReg, if (!DoneRegs.insert(Reg).second) continue; - for (auto &Use : MRI->use_instructions(Reg)) + for (auto &Use : MRI->reg_instructions(Reg)) { // For LOCRMux, see if the other operand is already a high or low - // register, and in that case give the correpsonding hints for + // register, and in that case give the corresponding hints for // VirtReg. LOCR instructions need both operands in either high or - // low parts. - if (Use.getOpcode() == SystemZ::LOCRMux) { + // low parts. Same handling for SELRMux. + if (Use.getOpcode() == SystemZ::LOCRMux || + Use.getOpcode() == SystemZ::SELRMux) { MachineOperand &TrueMO = Use.getOperand(1); MachineOperand &FalseMO = Use.getOperand(2); const TargetRegisterClass *RC = TRI->getCommonSubClass(getRC32(FalseMO, VRM, MRI), getRC32(TrueMO, VRM, MRI)); + if (Use.getOpcode() == SystemZ::SELRMux) + RC = TRI->getCommonSubClass(RC, + getRC32(Use.getOperand(0), VRM, MRI)); if (RC && RC != &SystemZ::GRX32BitRegClass) { - // Pass the registers of RC as hints while making sure that if - // any of these registers are copy hints, hint them first. - SmallSet<unsigned, 4> CopyHints; - CopyHints.insert(Hints.begin(), Hints.end()); - Hints.clear(); - for (MCPhysReg Reg : Order) - if (CopyHints.count(Reg) && - RC->contains(Reg) && !MRI->isReserved(Reg)) - Hints.push_back(Reg); - for (MCPhysReg Reg : Order) - if (!CopyHints.count(Reg) && - RC->contains(Reg) && !MRI->isReserved(Reg)) - Hints.push_back(Reg); + addHints(Order, Hints, RC, MRI); // Return true to make these hints the only regs available to // RA. This may mean extra spilling but since the alternative is // a jump sequence expansion of the LOCRMux, it is preferred. @@ -112,10 +124,70 @@ SystemZRegisterInfo::getRegAllocationHints(unsigned VirtReg, (TrueMO.getReg() == Reg ? FalseMO.getReg() : TrueMO.getReg()); if (MRI->getRegClass(OtherReg) == &SystemZ::GRX32BitRegClass) Worklist.push_back(OtherReg); - } + } // end LOCRMux + else if (Use.getOpcode() == SystemZ::CHIMux || + Use.getOpcode() == SystemZ::CFIMux) { + if (Use.getOperand(1).getImm() == 0) { + bool OnlyLMuxes = true; + for (MachineInstr &DefMI : MRI->def_instructions(VirtReg)) + if (DefMI.getOpcode() != SystemZ::LMux) + OnlyLMuxes = false; + if (OnlyLMuxes) { + addHints(Order, Hints, &SystemZ::GR32BitRegClass, MRI); + // Return false to make these hints preferred but not obligatory. + return false; + } + } + } // end CHIMux / CFIMux + } } } + if (VRM == nullptr) + return BaseImplRetVal; + + // Add any two address hints after any copy hints. + SmallSet<unsigned, 4> TwoAddrHints; + for (auto &Use : MRI->reg_nodbg_instructions(VirtReg)) + if (SystemZ::getTwoOperandOpcode(Use.getOpcode()) != -1) { + const MachineOperand *VRRegMO = nullptr; + const MachineOperand *OtherMO = nullptr; + const MachineOperand *CommuMO = nullptr; + if (VirtReg == Use.getOperand(0).getReg()) { + VRRegMO = &Use.getOperand(0); + OtherMO = &Use.getOperand(1); + if (Use.isCommutable()) + CommuMO = &Use.getOperand(2); + } else if (VirtReg == Use.getOperand(1).getReg()) { + VRRegMO = &Use.getOperand(1); + OtherMO = &Use.getOperand(0); + } else if (VirtReg == Use.getOperand(2).getReg() && Use.isCommutable()) { + VRRegMO = &Use.getOperand(2); + OtherMO = &Use.getOperand(0); + } else + continue; + + auto tryAddHint = [&](const MachineOperand *MO) -> void { + Register Reg = MO->getReg(); + Register PhysReg = isPhysicalRegister(Reg) ? Reg : VRM->getPhys(Reg); + if (PhysReg) { + if (MO->getSubReg()) + PhysReg = getSubReg(PhysReg, MO->getSubReg()); + if (VRRegMO->getSubReg()) + PhysReg = getMatchingSuperReg(PhysReg, VRRegMO->getSubReg(), + MRI->getRegClass(VirtReg)); + if (!MRI->isReserved(PhysReg) && !is_contained(Hints, PhysReg)) + TwoAddrHints.insert(PhysReg); + } + }; + tryAddHint(OtherMO); + if (CommuMO) + tryAddHint(CommuMO); + } + for (MCPhysReg OrderReg : Order) + if (TwoAddrHints.count(OrderReg)) + Hints.push_back(OrderReg); + return BaseImplRetVal; } @@ -169,6 +241,9 @@ SystemZRegisterInfo::getReservedRegs(const MachineFunction &MF) const { Reserved.set(SystemZ::A0); Reserved.set(SystemZ::A1); + // FPC is the floating-point control register. + Reserved.set(SystemZ::FPC); + return Reserved; } @@ -328,7 +403,7 @@ bool SystemZRegisterInfo::shouldCoalesce(MachineInstr *MI, return true; } -unsigned +Register SystemZRegisterInfo::getFrameRegister(const MachineFunction &MF) const { const SystemZFrameLowering *TFI = getFrameLowering(MF); return TFI->hasFP(MF) ? SystemZ::R11D : SystemZ::R15D; diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZRegisterInfo.h b/contrib/llvm/lib/Target/SystemZ/SystemZRegisterInfo.h index 9fd2e4ae4f00..4f721ec23e53 100644 --- a/contrib/llvm/lib/Target/SystemZ/SystemZRegisterInfo.h +++ b/contrib/llvm/lib/Target/SystemZ/SystemZRegisterInfo.h @@ -1,9 +1,8 @@ //===-- SystemZRegisterInfo.h - SystemZ register information ----*- C++ -*-===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// @@ -84,7 +83,7 @@ public: const TargetRegisterClass *NewRC, LiveIntervals &LIS) const override; - unsigned getFrameRegister(const MachineFunction &MF) const override; + Register getFrameRegister(const MachineFunction &MF) const override; }; } // end namespace llvm diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZRegisterInfo.td b/contrib/llvm/lib/Target/SystemZ/SystemZRegisterInfo.td index cea88c088b86..3567b0f3acf8 100644 --- a/contrib/llvm/lib/Target/SystemZ/SystemZRegisterInfo.td +++ b/contrib/llvm/lib/Target/SystemZ/SystemZRegisterInfo.td @@ -1,9 +1,8 @@ //==- SystemZRegisterInfo.td - SystemZ register definitions -*- tablegen -*-==// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// @@ -296,6 +295,13 @@ def CC : SystemZReg<"cc">; let isAllocatable = 0, CopyCost = -1 in def CCR : RegisterClass<"SystemZ", [i32], 32, (add CC)>; +// The floating-point control register. +// Note: We only model the current rounding modes and the IEEE masks. +// IEEE flags and DXC are not modeled here. +def FPC : SystemZReg<"fpc">; +let isAllocatable = 0 in + def FPCRegs : RegisterClass<"SystemZ", [i32], 32, (add FPC)>; + // Access registers. class ACR32<bits<16> num, string n> : SystemZReg<n> { let HWEncoding = num; diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZSchedule.td b/contrib/llvm/lib/Target/SystemZ/SystemZSchedule.td index 83bf97e6841a..98eca2802242 100644 --- a/contrib/llvm/lib/Target/SystemZ/SystemZSchedule.td +++ b/contrib/llvm/lib/Target/SystemZ/SystemZSchedule.td @@ -1,9 +1,8 @@ //==-- SystemZSchedule.td - SystemZ Scheduling Definitions ----*- tblgen -*-==// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// @@ -60,6 +59,7 @@ def VBU : SchedWrite; // Virtual branching unit def MCD : SchedWrite; // Millicode +include "SystemZScheduleArch13.td" include "SystemZScheduleZ14.td" include "SystemZScheduleZ13.td" include "SystemZScheduleZEC12.td" diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZScheduleArch13.td b/contrib/llvm/lib/Target/SystemZ/SystemZScheduleArch13.td new file mode 100644 index 000000000000..9f82f24d0e8f --- /dev/null +++ b/contrib/llvm/lib/Target/SystemZ/SystemZScheduleArch13.td @@ -0,0 +1,1695 @@ +//-- SystemZScheduleArch13.td - SystemZ Scheduling Definitions ----*- tblgen -*-=// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines the machine model for Arch13 to support instruction +// scheduling and other instruction cost heuristics. +// +// Pseudos expanded right after isel do not need to be modelled here. +// +//===----------------------------------------------------------------------===// + +def Arch13Model : SchedMachineModel { + + let UnsupportedFeatures = Arch13UnsupportedFeatures.List; + + let IssueWidth = 6; // Number of instructions decoded per cycle. + let MicroOpBufferSize = 60; // Issue queues + let LoadLatency = 1; // Optimistic load latency. + + let PostRAScheduler = 1; + + // Extra cycles for a mispredicted branch. + let MispredictPenalty = 20; +} + +let SchedModel = Arch13Model in { +// These definitions need the SchedModel value. They could be put in a +// subtarget common include file, but it seems the include system in Tablegen +// currently (2016) rejects multiple includes of same file. + +// Decoder grouping rules +let NumMicroOps = 1 in { + def : WriteRes<NormalGr, []>; + def : WriteRes<BeginGroup, []> { let BeginGroup = 1; } + def : WriteRes<EndGroup, []> { let EndGroup = 1; } +} +def : WriteRes<Cracked, []> { + let NumMicroOps = 2; + let BeginGroup = 1; +} +def : WriteRes<GroupAlone, []> { + let NumMicroOps = 3; + let BeginGroup = 1; + let EndGroup = 1; +} +def : WriteRes<GroupAlone2, []> { + let NumMicroOps = 6; + let BeginGroup = 1; + let EndGroup = 1; +} +def : WriteRes<GroupAlone3, []> { + let NumMicroOps = 9; + let BeginGroup = 1; + let EndGroup = 1; +} + +// Incoming latency removed from the register operand which is used together +// with a memory operand by the instruction. +def : ReadAdvance<RegReadAdv, 4>; + +// LoadLatency (above) is not used for instructions in this file. This is +// instead the role of LSULatency, which is the latency value added to the +// result of loads and instructions with folded memory operands. +def : WriteRes<LSULatency, []> { let Latency = 4; let NumMicroOps = 0; } + +let NumMicroOps = 0 in { + foreach L = 1-30 in + def : WriteRes<!cast<SchedWrite>("WLat"#L), []> { let Latency = L; } +} + +// Execution units. +def Arch13_FXaUnit : ProcResource<2>; +def Arch13_FXbUnit : ProcResource<2>; +def Arch13_LSUnit : ProcResource<2>; +def Arch13_VecUnit : ProcResource<2>; +def Arch13_VecFPdUnit : ProcResource<2> { let BufferSize = 1; /* blocking */ } +def Arch13_VBUnit : ProcResource<2>; +def Arch13_MCD : ProcResource<1>; + +// Subtarget specific definitions of scheduling resources. +let NumMicroOps = 0 in { + def : WriteRes<FXa, [Arch13_FXaUnit]>; + def : WriteRes<FXb, [Arch13_FXbUnit]>; + def : WriteRes<LSU, [Arch13_LSUnit]>; + def : WriteRes<VecBF, [Arch13_VecUnit]>; + def : WriteRes<VecDF, [Arch13_VecUnit]>; + def : WriteRes<VecDFX, [Arch13_VecUnit]>; + def : WriteRes<VecMul, [Arch13_VecUnit]>; + def : WriteRes<VecStr, [Arch13_VecUnit]>; + def : WriteRes<VecXsPm, [Arch13_VecUnit]>; + foreach Num = 2-5 in { let ResourceCycles = [Num] in { + def : WriteRes<!cast<SchedWrite>("FXa"#Num), [Arch13_FXaUnit]>; + def : WriteRes<!cast<SchedWrite>("FXb"#Num), [Arch13_FXbUnit]>; + def : WriteRes<!cast<SchedWrite>("LSU"#Num), [Arch13_LSUnit]>; + def : WriteRes<!cast<SchedWrite>("VecBF"#Num), [Arch13_VecUnit]>; + def : WriteRes<!cast<SchedWrite>("VecDF"#Num), [Arch13_VecUnit]>; + def : WriteRes<!cast<SchedWrite>("VecDFX"#Num), [Arch13_VecUnit]>; + def : WriteRes<!cast<SchedWrite>("VecMul"#Num), [Arch13_VecUnit]>; + def : WriteRes<!cast<SchedWrite>("VecStr"#Num), [Arch13_VecUnit]>; + def : WriteRes<!cast<SchedWrite>("VecXsPm"#Num), [Arch13_VecUnit]>; + }} + + def : WriteRes<VecFPd, [Arch13_VecFPdUnit]> { let ResourceCycles = [30]; } + + def : WriteRes<VBU, [Arch13_VBUnit]>; // Virtual Branching Unit +} + +def : WriteRes<MCD, [Arch13_MCD]> { let NumMicroOps = 3; + let BeginGroup = 1; + let EndGroup = 1; } + +// -------------------------- INSTRUCTIONS ---------------------------------- // + +// InstRW constructs have been used in order to preserve the +// readability of the InstrInfo files. + +// For each instruction, as matched by a regexp, provide a list of +// resources that it needs. These will be combined into a SchedClass. + +//===----------------------------------------------------------------------===// +// Stack allocation +//===----------------------------------------------------------------------===// + +// Pseudo -> LA / LAY +def : InstRW<[WLat1, FXa, NormalGr], (instregex "ADJDYNALLOC$")>; + +//===----------------------------------------------------------------------===// +// Branch instructions +//===----------------------------------------------------------------------===// + +// Branch +def : InstRW<[WLat1, VBU, NormalGr], (instregex "(Call)?BRC(L)?(Asm.*)?$")>; +def : InstRW<[WLat1, VBU, NormalGr], (instregex "(Call)?J(G)?(Asm.*)?$")>; +def : InstRW<[WLat1, FXb, NormalGr], (instregex "(Call)?BC(R)?(Asm.*)?$")>; +def : InstRW<[WLat1, FXb, NormalGr], (instregex "(Call)?B(R)?(Asm.*)?$")>; +def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "BI(C)?(Asm.*)?$")>; +def : InstRW<[WLat1, FXa, EndGroup], (instregex "BRCT(G)?$")>; +def : InstRW<[WLat1, FXa, FXb, GroupAlone], (instregex "BRCTH$")>; +def : InstRW<[WLat1, FXa, FXb, GroupAlone], (instregex "BCT(G)?(R)?$")>; +def : InstRW<[WLat1, FXa2, FXb2, GroupAlone2], + (instregex "B(R)?X(H|L).*$")>; + +// Compare and branch +def : InstRW<[WLat1, FXb, NormalGr], (instregex "C(L)?(G)?(I|R)J(Asm.*)?$")>; +def : InstRW<[WLat1, FXb2, GroupAlone], + (instregex "C(L)?(G)?(I|R)B(Call|Return|Asm.*)?$")>; + +//===----------------------------------------------------------------------===// +// Trap instructions +//===----------------------------------------------------------------------===// + +// Trap +def : InstRW<[WLat1, VBU, NormalGr], (instregex "(Cond)?Trap$")>; + +// Compare and trap +def : InstRW<[WLat1, FXb, NormalGr], (instregex "C(G)?(I|R)T(Asm.*)?$")>; +def : InstRW<[WLat1, FXb, NormalGr], (instregex "CL(G)?RT(Asm.*)?$")>; +def : InstRW<[WLat1, FXb, NormalGr], (instregex "CL(F|G)IT(Asm.*)?$")>; +def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "CL(G)?T(Asm.*)?$")>; + +//===----------------------------------------------------------------------===// +// Call and return instructions +//===----------------------------------------------------------------------===// + +// Call +def : InstRW<[WLat1, VBU, FXa2, GroupAlone], (instregex "(Call)?BRAS$")>; +def : InstRW<[WLat1, FXa2, FXb, GroupAlone], (instregex "(Call)?BRASL$")>; +def : InstRW<[WLat1, FXa2, FXb, GroupAlone], (instregex "(Call)?BAS(R)?$")>; +def : InstRW<[WLat1, FXa2, FXb, GroupAlone], (instregex "TLS_(G|L)DCALL$")>; + +// Return +def : InstRW<[WLat1, FXb, EndGroup], (instregex "Return$")>; +def : InstRW<[WLat1, FXb, NormalGr], (instregex "CondReturn$")>; + +//===----------------------------------------------------------------------===// +// Move instructions +//===----------------------------------------------------------------------===// + +// Moves +def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "MV(G|H)?HI$")>; +def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "MVI(Y)?$")>; + +// Move character +def : InstRW<[WLat1, FXb, LSU3, GroupAlone], (instregex "MVC$")>; +def : InstRW<[WLat30, WLat30, WLat30, MCD], (instregex "MVCL(E|U)?$")>; +def : InstRW<[WLat1, LSU2, GroupAlone], (instregex "MVCRL$")>; + +// Pseudo -> reg move +def : InstRW<[WLat1, FXa, NormalGr], (instregex "COPY(_TO_REGCLASS)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "EXTRACT_SUBREG$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "INSERT_SUBREG$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "REG_SEQUENCE$")>; + +// Loads +def : InstRW<[LSULatency, LSU, NormalGr], (instregex "L(Y|FH|RL|Mux)?$")>; +def : InstRW<[LSULatency, LSULatency, LSU, NormalGr], (instregex "LCBB$")>; +def : InstRW<[LSULatency, LSU, NormalGr], (instregex "LG(RL)?$")>; +def : InstRW<[LSULatency, LSU, NormalGr], (instregex "L128$")>; + +def : InstRW<[WLat1, FXa, NormalGr], (instregex "LLIH(F|H|L)$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "LLIL(F|H|L)$")>; + +def : InstRW<[WLat1, FXa, NormalGr], (instregex "LG(F|H)I$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "LHI(Mux)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "LR(Mux)?$")>; + +// Load and zero rightmost byte +def : InstRW<[LSULatency, LSU, NormalGr], (instregex "LZR(F|G)$")>; + +// Load and trap +def : InstRW<[WLat1LSU, FXb, LSU, NormalGr], (instregex "L(FH|G)?AT$")>; + +// Load and test +def : InstRW<[WLat1LSU, WLat1LSU, LSU, FXa, NormalGr], (instregex "LT(G)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "LT(G)?R$")>; + +// Stores +def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "STG(RL)?$")>; +def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "ST128$")>; +def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "ST(Y|FH|RL|Mux)?$")>; + +// String moves. +def : InstRW<[WLat30, WLat30, WLat30, MCD], (instregex "MVST$")>; + +//===----------------------------------------------------------------------===// +// Conditional move instructions +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat2, FXa, NormalGr], (instregex "LOCRMux$")>; +def : InstRW<[WLat2, FXa, NormalGr], (instregex "LOC(G|FH)?R(Asm.*)?$")>; +def : InstRW<[WLat2, FXa, NormalGr], (instregex "LOC(G|H)?HI(Mux|(Asm.*))?$")>; +def : InstRW<[WLat2LSU, RegReadAdv, FXa, LSU, NormalGr], + (instregex "LOC(G|FH|Mux)?(Asm.*)?$")>; +def : InstRW<[WLat1, FXb, LSU, NormalGr], + (instregex "STOC(G|FH|Mux)?(Asm.*)?$")>; + +def : InstRW<[WLat2, FXa, NormalGr], (instregex "SELRMux$")>; +def : InstRW<[WLat2, FXa, NormalGr], (instregex "SEL(G|FH)?R(Asm.*)?$")>; + +//===----------------------------------------------------------------------===// +// Sign extensions +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat1, FXa, NormalGr], (instregex "L(B|H|G)R$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "LG(B|H|F)R$")>; + +def : InstRW<[WLat1LSU, WLat1LSU, FXa, LSU, NormalGr], (instregex "LTGF$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "LTGFR$")>; + +def : InstRW<[WLat1LSU, FXa, LSU, NormalGr], (instregex "LB(H|Mux)?$")>; +def : InstRW<[WLat1LSU, FXa, LSU, NormalGr], (instregex "LH(Y)?$")>; +def : InstRW<[WLat1LSU, FXa, LSU, NormalGr], (instregex "LH(H|Mux|RL)$")>; +def : InstRW<[WLat1LSU, FXa, LSU, NormalGr], (instregex "LG(B|H|F)$")>; +def : InstRW<[WLat1LSU, FXa, LSU, NormalGr], (instregex "LG(H|F)RL$")>; + +//===----------------------------------------------------------------------===// +// Zero extensions +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat1, FXa, NormalGr], (instregex "LLCR(Mux)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "LLHR(Mux)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "LLG(C|H|F|T)R$")>; +def : InstRW<[LSULatency, LSU, NormalGr], (instregex "LLC(Mux)?$")>; +def : InstRW<[LSULatency, LSU, NormalGr], (instregex "LLH(Mux)?$")>; +def : InstRW<[WLat1LSU, FXa, LSU, NormalGr], (instregex "LL(C|H)H$")>; +def : InstRW<[LSULatency, LSU, NormalGr], (instregex "LLHRL$")>; +def : InstRW<[LSULatency, LSU, NormalGr], (instregex "LLG(C|H|F|T|HRL|FRL)$")>; + +// Load and zero rightmost byte +def : InstRW<[LSULatency, LSU, NormalGr], (instregex "LLZRGF$")>; + +// Load and trap +def : InstRW<[WLat1LSU, FXb, LSU, NormalGr], (instregex "LLG(F|T)?AT$")>; + +//===----------------------------------------------------------------------===// +// Truncations +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "STC(H|Y|Mux)?$")>; +def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "STH(H|Y|RL|Mux)?$")>; +def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "STCM(H|Y)?$")>; + +//===----------------------------------------------------------------------===// +// Multi-register moves +//===----------------------------------------------------------------------===// + +// Load multiple (estimated average of 5 ops) +def : InstRW<[WLat10, WLat10, LSU5, GroupAlone], (instregex "LM(H|Y|G)?$")>; + +// Load multiple disjoint +def : InstRW<[WLat30, WLat30, MCD], (instregex "LMD$")>; + +// Store multiple +def : InstRW<[WLat1, LSU2, FXb3, GroupAlone], (instregex "STM(G|H|Y)?$")>; + +//===----------------------------------------------------------------------===// +// Byte swaps +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat1, FXa, NormalGr], (instregex "LRV(G)?R$")>; +def : InstRW<[WLat1LSU, FXa, LSU, NormalGr], (instregex "LRV(G|H)?$")>; +def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "STRV(G|H)?$")>; +def : InstRW<[WLat30, MCD], (instregex "MVCIN$")>; + +//===----------------------------------------------------------------------===// +// Load address instructions +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat1, FXa, NormalGr], (instregex "LA(Y|RL)?$")>; + +// Load the Global Offset Table address ( -> larl ) +def : InstRW<[WLat1, FXa, NormalGr], (instregex "GOT$")>; + +//===----------------------------------------------------------------------===// +// Absolute and Negation +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat1, WLat1, FXa, NormalGr], (instregex "LP(G)?R$")>; +def : InstRW<[WLat2, WLat2, FXa2, Cracked], (instregex "L(N|P)GFR$")>; +def : InstRW<[WLat1, WLat1, FXa, NormalGr], (instregex "LN(R|GR)$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "LC(R|GR)$")>; +def : InstRW<[WLat2, WLat2, FXa2, Cracked], (instregex "LCGFR$")>; + +//===----------------------------------------------------------------------===// +// Insertion +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat1LSU, RegReadAdv, FXa, LSU, NormalGr], (instregex "IC(Y)?$")>; +def : InstRW<[WLat1LSU, RegReadAdv, FXa, LSU, NormalGr], + (instregex "IC32(Y)?$")>; +def : InstRW<[WLat1LSU, RegReadAdv, WLat1LSU, FXa, LSU, NormalGr], + (instregex "ICM(H|Y)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "II(F|H|L)Mux$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "IIHF(64)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "IIHH(64)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "IIHL(64)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "IILF(64)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "IILH(64)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "IILL(64)?$")>; + +//===----------------------------------------------------------------------===// +// Addition +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat1LSU, WLat1LSU, RegReadAdv, FXa, LSU, NormalGr], + (instregex "A(Y)?$")>; +def : InstRW<[WLat2LSU, WLat2LSU, RegReadAdv, FXa, LSU, NormalGr], + (instregex "AH(Y)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "AIH$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "AFI(Mux)?$")>; +def : InstRW<[WLat1LSU, WLat1LSU, RegReadAdv, FXa, LSU, NormalGr], + (instregex "AG$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "AGFI$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "AGHI(K)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "AGR(K)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "AHI(K)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "AHIMux(K)?$")>; +def : InstRW<[WLat1LSU, WLat1LSU, RegReadAdv, FXa, LSU, NormalGr], + (instregex "AL(Y)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "AL(FI|HSIK)$")>; +def : InstRW<[WLat1LSU, WLat1LSU, RegReadAdv, FXa, LSU, NormalGr], + (instregex "ALG(F)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "ALGHSIK$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "ALGF(I|R)$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "ALGR(K)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "ALR(K)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "AR(K)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "A(L)?HHHR$")>; +def : InstRW<[WLat2, WLat2, FXa, NormalGr], (instregex "A(L)?HHLR$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "ALSIH(N)?$")>; +def : InstRW<[WLat2LSU, FXb, LSU, NormalGr], (instregex "A(L)?(G)?SI$")>; + +// Logical addition with carry +def : InstRW<[WLat2LSU, WLat2LSU, RegReadAdv, FXa, LSU, GroupAlone], + (instregex "ALC(G)?$")>; +def : InstRW<[WLat2, WLat2, FXa, GroupAlone], (instregex "ALC(G)?R$")>; + +// Add with sign extension (16/32 -> 64) +def : InstRW<[WLat2LSU, WLat2LSU, RegReadAdv, FXa, LSU, NormalGr], + (instregex "AG(F|H)$")>; +def : InstRW<[WLat2, WLat2, FXa, NormalGr], (instregex "AGFR$")>; + +//===----------------------------------------------------------------------===// +// Subtraction +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat1LSU, WLat1LSU, RegReadAdv, FXa, LSU, NormalGr], + (instregex "S(G|Y)?$")>; +def : InstRW<[WLat2LSU, WLat2LSU, RegReadAdv, FXa, LSU, NormalGr], + (instregex "SH(Y)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "SGR(K)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "SLFI$")>; +def : InstRW<[WLat1LSU, WLat1LSU, RegReadAdv, FXa, LSU, NormalGr], + (instregex "SL(G|GF|Y)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "SLGF(I|R)$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "SLGR(K)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "SLR(K)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "SR(K)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "S(L)?HHHR$")>; +def : InstRW<[WLat2, WLat2, FXa, NormalGr], (instregex "S(L)?HHLR$")>; + +// Subtraction with borrow +def : InstRW<[WLat2LSU, WLat2LSU, RegReadAdv, FXa, LSU, GroupAlone], + (instregex "SLB(G)?$")>; +def : InstRW<[WLat2, WLat2, FXa, GroupAlone], (instregex "SLB(G)?R$")>; + +// Subtraction with sign extension (16/32 -> 64) +def : InstRW<[WLat2LSU, WLat2LSU, RegReadAdv, FXa, LSU, NormalGr], + (instregex "SG(F|H)$")>; +def : InstRW<[WLat2, WLat2, FXa, NormalGr], (instregex "SGFR$")>; + +//===----------------------------------------------------------------------===// +// AND +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat1LSU, WLat1LSU, RegReadAdv, FXa, LSU, NormalGr], + (instregex "N(G|Y)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "NGR(K)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "NI(FMux|HMux|LMux)$")>; +def : InstRW<[WLat2LSU, FXb, LSU, NormalGr], (instregex "NI(Y)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "NIHF(64)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "NIHH(64)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "NIHL(64)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "NILF(64)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "NILH(64)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "NILL(64)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "NR(K)?$")>; +def : InstRW<[WLat3LSU, LSU2, FXb, Cracked], (instregex "NC$")>; + +//===----------------------------------------------------------------------===// +// OR +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat1LSU, WLat1LSU, RegReadAdv, FXa, LSU, NormalGr], + (instregex "O(G|Y)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "OGR(K)?$")>; +def : InstRW<[WLat2LSU, FXb, LSU, NormalGr], (instregex "OI(Y)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "OI(FMux|HMux|LMux)$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "OIHF(64)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "OIHH(64)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "OIHL(64)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "OILF(64)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "OILH(64)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "OILL(64)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "OR(K)?$")>; +def : InstRW<[WLat3LSU, LSU2, FXb, Cracked], (instregex "OC$")>; + +//===----------------------------------------------------------------------===// +// XOR +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat1LSU, WLat1LSU, RegReadAdv, FXa, LSU, NormalGr], + (instregex "X(G|Y)?$")>; +def : InstRW<[WLat2LSU, FXb, LSU, NormalGr], (instregex "XI(Y)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "XIFMux$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "XGR(K)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "XIHF(64)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "XILF(64)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "XR(K)?$")>; +def : InstRW<[WLat3LSU, LSU2, FXb, Cracked], (instregex "XC$")>; + +//===----------------------------------------------------------------------===// +// Combined logical operations +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat1, FXa, NormalGr], (instregex "NC(G)?RK$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "OC(G)?RK$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "NN(G)?RK$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "NO(G)?RK$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "NX(G)?RK$")>; + +//===----------------------------------------------------------------------===// +// Multiplication +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat5LSU, RegReadAdv, FXa, LSU, NormalGr], + (instregex "MS(GF|Y)?$")>; +def : InstRW<[WLat5, FXa, NormalGr], (instregex "MS(R|FI)$")>; +def : InstRW<[WLat7LSU, RegReadAdv, FXa, LSU, NormalGr], (instregex "MSG$")>; +def : InstRW<[WLat7, FXa, NormalGr], (instregex "MSGR$")>; +def : InstRW<[WLat5, FXa, NormalGr], (instregex "MSGF(I|R)$")>; +def : InstRW<[WLat8LSU, RegReadAdv, FXa2, LSU, GroupAlone], (instregex "MLG$")>; +def : InstRW<[WLat8, FXa2, GroupAlone], (instregex "MLGR$")>; +def : InstRW<[WLat4, FXa, NormalGr], (instregex "MGHI$")>; +def : InstRW<[WLat4, FXa, NormalGr], (instregex "MHI$")>; +def : InstRW<[WLat4LSU, RegReadAdv, FXa, LSU, NormalGr], (instregex "MH(Y)?$")>; +def : InstRW<[WLat6, FXa2, GroupAlone], (instregex "M(L)?R$")>; +def : InstRW<[WLat6LSU, RegReadAdv, FXa2, LSU, GroupAlone], + (instregex "M(FY|L)?$")>; +def : InstRW<[WLat8, RegReadAdv, FXa, LSU, NormalGr], (instregex "MGH$")>; +def : InstRW<[WLat12, RegReadAdv, FXa2, LSU, GroupAlone], (instregex "MG$")>; +def : InstRW<[WLat8, FXa2, GroupAlone], (instregex "MGRK$")>; +def : InstRW<[WLat6LSU, WLat6LSU, RegReadAdv, FXa, LSU, NormalGr], + (instregex "MSC$")>; +def : InstRW<[WLat8LSU, WLat8LSU, RegReadAdv, FXa, LSU, NormalGr], + (instregex "MSGC$")>; +def : InstRW<[WLat6, WLat6, FXa, NormalGr], (instregex "MSRKC$")>; +def : InstRW<[WLat8, WLat8, FXa, NormalGr], (instregex "MSGRKC$")>; + +//===----------------------------------------------------------------------===// +// Division and remainder +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat20, FXa4, GroupAlone], (instregex "DR$")>; +def : InstRW<[WLat30, RegReadAdv, FXa4, LSU, GroupAlone2], (instregex "D$")>; +def : InstRW<[WLat30, FXa2, GroupAlone], (instregex "DSG(F)?R$")>; +def : InstRW<[WLat30, RegReadAdv, FXa2, LSU, GroupAlone2], + (instregex "DSG(F)?$")>; +def : InstRW<[WLat20, FXa4, GroupAlone], (instregex "DLR$")>; +def : InstRW<[WLat30, FXa4, GroupAlone], (instregex "DLGR$")>; +def : InstRW<[WLat30, RegReadAdv, FXa4, LSU, GroupAlone2], + (instregex "DL(G)?$")>; + +//===----------------------------------------------------------------------===// +// Shifts +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat1, FXa, NormalGr], (instregex "SLL(G|K)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "SRL(G|K)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "SRA(G|K)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "SLA(G|K)?$")>; +def : InstRW<[WLat5LSU, WLat5LSU, FXa4, LSU, GroupAlone2], + (instregex "S(L|R)D(A|L)$")>; + +// Rotate +def : InstRW<[WLat2LSU, FXa, LSU, NormalGr], (instregex "RLL(G)?$")>; + +// Rotate and insert +def : InstRW<[WLat1, FXa, NormalGr], (instregex "RISBG(N|32)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "RISBH(G|H|L)$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "RISBL(G|H|L)$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "RISBMux$")>; + +// Rotate and Select +def : InstRW<[WLat2, WLat2, FXa2, Cracked], (instregex "R(N|O|X)SBG$")>; + +//===----------------------------------------------------------------------===// +// Comparison +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat1LSU, RegReadAdv, FXb, LSU, NormalGr], + (instregex "C(G|Y|Mux)?$")>; +def : InstRW<[WLat1LSU, FXb, LSU, NormalGr], (instregex "CRL$")>; +def : InstRW<[WLat1, FXb, NormalGr], (instregex "C(F|H)I(Mux)?$")>; +def : InstRW<[WLat1, FXb, NormalGr], (instregex "CG(F|H)I$")>; +def : InstRW<[WLat1LSU, FXb, LSU, NormalGr], (instregex "CG(HSI|RL)$")>; +def : InstRW<[WLat1, FXb, NormalGr], (instregex "C(G)?R$")>; +def : InstRW<[WLat1, FXb, NormalGr], (instregex "CIH$")>; +def : InstRW<[WLat1LSU, RegReadAdv, FXb, LSU, NormalGr], (instregex "CHF$")>; +def : InstRW<[WLat1LSU, FXb, LSU, NormalGr], (instregex "CHSI$")>; +def : InstRW<[WLat1LSU, RegReadAdv, FXb, LSU, NormalGr], + (instregex "CL(Y|Mux)?$")>; +def : InstRW<[WLat1LSU, FXb, LSU, NormalGr], (instregex "CLFHSI$")>; +def : InstRW<[WLat1, FXb, NormalGr], (instregex "CLFI(Mux)?$")>; +def : InstRW<[WLat1LSU, RegReadAdv, FXb, LSU, NormalGr], (instregex "CLG$")>; +def : InstRW<[WLat1LSU, FXb, LSU, NormalGr], (instregex "CLG(HRL|HSI)$")>; +def : InstRW<[WLat1LSU, RegReadAdv, FXb, LSU, NormalGr], (instregex "CLGF$")>; +def : InstRW<[WLat1LSU, FXb, LSU, NormalGr], (instregex "CLGFRL$")>; +def : InstRW<[WLat1, FXb, NormalGr], (instregex "CLGF(I|R)$")>; +def : InstRW<[WLat1, FXb, NormalGr], (instregex "CLGR$")>; +def : InstRW<[WLat1LSU, FXb, LSU, NormalGr], (instregex "CLGRL$")>; +def : InstRW<[WLat1LSU, RegReadAdv, FXb, LSU, NormalGr], (instregex "CLHF$")>; +def : InstRW<[WLat1LSU, FXb, LSU, NormalGr], (instregex "CLH(RL|HSI)$")>; +def : InstRW<[WLat1, FXb, NormalGr], (instregex "CLIH$")>; +def : InstRW<[WLat1LSU, FXb, LSU, NormalGr], (instregex "CLI(Y)?$")>; +def : InstRW<[WLat1, FXb, NormalGr], (instregex "CLR$")>; +def : InstRW<[WLat1LSU, FXb, LSU, NormalGr], (instregex "CLRL$")>; +def : InstRW<[WLat1, FXb, NormalGr], (instregex "C(L)?HHR$")>; +def : InstRW<[WLat2, FXb, NormalGr], (instregex "C(L)?HLR$")>; + +// Compare halfword +def : InstRW<[WLat2LSU, RegReadAdv, FXb, LSU, NormalGr], (instregex "CH(Y)?$")>; +def : InstRW<[WLat2LSU, FXb, LSU, NormalGr], (instregex "CHRL$")>; +def : InstRW<[WLat2LSU, RegReadAdv, FXb, LSU, NormalGr], (instregex "CGH$")>; +def : InstRW<[WLat2LSU, FXb, LSU, NormalGr], (instregex "CGHRL$")>; +def : InstRW<[WLat2LSU, FXa, FXb, LSU, Cracked], (instregex "CHHSI$")>; + +// Compare with sign extension (32 -> 64) +def : InstRW<[WLat2LSU, RegReadAdv, FXb, LSU, NormalGr], (instregex "CGF$")>; +def : InstRW<[WLat2LSU, FXb, LSU, NormalGr], (instregex "CGFRL$")>; +def : InstRW<[WLat2, FXb, NormalGr], (instregex "CGFR$")>; + +// Compare logical character +def : InstRW<[WLat6, FXb, LSU2, Cracked], (instregex "CLC$")>; +def : InstRW<[WLat30, WLat30, WLat30, MCD], (instregex "CLCL(E|U)?$")>; +def : InstRW<[WLat30, WLat30, WLat30, MCD], (instregex "CLST$")>; + +// Test under mask +def : InstRW<[WLat1LSU, FXb, LSU, NormalGr], (instregex "TM(Y)?$")>; +def : InstRW<[WLat1, FXb, NormalGr], (instregex "TM(H|L)Mux$")>; +def : InstRW<[WLat1, FXb, NormalGr], (instregex "TMHH(64)?$")>; +def : InstRW<[WLat1, FXb, NormalGr], (instregex "TMHL(64)?$")>; +def : InstRW<[WLat1, FXb, NormalGr], (instregex "TMLH(64)?$")>; +def : InstRW<[WLat1, FXb, NormalGr], (instregex "TMLL(64)?$")>; + +// Compare logical characters under mask +def : InstRW<[WLat2LSU, RegReadAdv, FXb, LSU, NormalGr], + (instregex "CLM(H|Y)?$")>; + +//===----------------------------------------------------------------------===// +// Prefetch and execution hint +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat1, LSU, NormalGr], (instregex "PFD(RL)?$")>; +def : InstRW<[WLat1, FXb, NormalGr], (instregex "BPP$")>; +def : InstRW<[FXb, EndGroup], (instregex "BPRP$")>; +def : InstRW<[WLat1, FXb, NormalGr], (instregex "NIAI$")>; + +//===----------------------------------------------------------------------===// +// Atomic operations +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat1, FXb, EndGroup], (instregex "Serialize$")>; + +def : InstRW<[WLat2LSU, WLat2LSU, FXb, LSU, NormalGr], (instregex "LAA(G)?$")>; +def : InstRW<[WLat2LSU, WLat2LSU, FXb, LSU, NormalGr], (instregex "LAAL(G)?$")>; +def : InstRW<[WLat2LSU, WLat2LSU, FXb, LSU, NormalGr], (instregex "LAN(G)?$")>; +def : InstRW<[WLat2LSU, WLat2LSU, FXb, LSU, NormalGr], (instregex "LAO(G)?$")>; +def : InstRW<[WLat2LSU, WLat2LSU, FXb, LSU, NormalGr], (instregex "LAX(G)?$")>; + +// Test and set +def : InstRW<[WLat2LSU, FXb, LSU, EndGroup], (instregex "TS$")>; + +// Compare and swap +def : InstRW<[WLat3LSU, WLat3LSU, FXa, FXb, LSU, GroupAlone], + (instregex "CS(G|Y)?$")>; + +// Compare double and swap +def : InstRW<[WLat6LSU, WLat6LSU, FXa3, FXb2, LSU, GroupAlone2], + (instregex "CDS(Y)?$")>; +def : InstRW<[WLat15, WLat15, FXa2, FXb4, LSU3, + GroupAlone3], (instregex "CDSG$")>; + +// Compare and swap and store +def : InstRW<[WLat30, MCD], (instregex "CSST$")>; + +// Perform locked operation +def : InstRW<[WLat30, MCD], (instregex "PLO$")>; + +// Load/store pair from/to quadword +def : InstRW<[WLat4LSU, LSU2, GroupAlone], (instregex "LPQ$")>; +def : InstRW<[WLat1, FXb2, LSU, GroupAlone], (instregex "STPQ$")>; + +// Load pair disjoint +def : InstRW<[WLat1LSU, WLat1LSU, LSU2, GroupAlone], (instregex "LPD(G)?$")>; + +//===----------------------------------------------------------------------===// +// Translate and convert +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat1, LSU5, GroupAlone], (instregex "TR$")>; +def : InstRW<[WLat30, WLat30, WLat30, FXa3, LSU2, GroupAlone2], + (instregex "TRT$")>; +def : InstRW<[WLat30, WLat30, WLat30, MCD], (instregex "TRTR$")>; +def : InstRW<[WLat30, WLat30, MCD], (instregex "TRE$")>; +def : InstRW<[WLat30, WLat30, WLat30, MCD], (instregex "TRT(R)?E(Opt)?$")>; +def : InstRW<[WLat30, WLat30, WLat30, MCD], (instregex "TR(T|O)(T|O)(Opt)?$")>; +def : InstRW<[WLat30, WLat30, WLat30, MCD], + (instregex "CU(12|14|21|24|41|42)(Opt)?$")>; +def : InstRW<[WLat30, WLat30, WLat30, MCD], (instregex "(CUUTF|CUTFU)(Opt)?$")>; + +//===----------------------------------------------------------------------===// +// Message-security assist +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat30, WLat30, WLat30, WLat30, MCD], + (instregex "KM(C|F|O|CTR|A)?$")>; +def : InstRW<[WLat30, WLat30, WLat30, MCD], + (instregex "(KIMD|KLMD|KMAC|KDSA)$")>; +def : InstRW<[WLat30, WLat30, WLat30, MCD], + (instregex "(PCC|PPNO|PRNO)$")>; + +//===----------------------------------------------------------------------===// +// Guarded storage +//===----------------------------------------------------------------------===// + +def : InstRW<[LSULatency, LSU, NormalGr], (instregex "LGG$")>; +def : InstRW<[LSULatency, LSU, NormalGr], (instregex "LLGFSG$")>; +def : InstRW<[WLat30, MCD], (instregex "(L|ST)GSC$")>; + +//===----------------------------------------------------------------------===// +// Decimal arithmetic +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat20, RegReadAdv, FXb, VecDF2, LSU2, GroupAlone2], + (instregex "CVBG$")>; +def : InstRW<[WLat20, RegReadAdv, FXb, VecDF, LSU, GroupAlone2], + (instregex "CVB(Y)?$")>; +def : InstRW<[WLat1, FXb3, VecDF4, LSU, GroupAlone3], (instregex "CVDG$")>; +def : InstRW<[WLat1, FXb2, VecDF, LSU, GroupAlone2], (instregex "CVD(Y)?$")>; +def : InstRW<[WLat1, LSU5, GroupAlone], (instregex "MV(N|O|Z)$")>; +def : InstRW<[WLat1, LSU5, GroupAlone], (instregex "(PACK|PKA|PKU)$")>; +def : InstRW<[WLat12, LSU5, GroupAlone], (instregex "UNPK(A|U)$")>; +def : InstRW<[WLat1, FXb, LSU2, Cracked], (instregex "UNPK$")>; + +def : InstRW<[WLat5LSU, FXb, VecDFX, LSU3, GroupAlone2], + (instregex "(A|S|ZA)P$")>; +def : InstRW<[WLat1, FXb, VecDFX2, LSU3, GroupAlone2], (instregex "MP$")>; +def : InstRW<[WLat1, FXb, VecDFX4, LSU3, GroupAlone2], (instregex "DP$")>; +def : InstRW<[WLat15, FXb, VecDFX2, LSU2, GroupAlone3], (instregex "SRP$")>; +def : InstRW<[WLat8, VecDFX, LSU, LSU, GroupAlone], (instregex "CP$")>; +def : InstRW<[WLat3LSU, VecDFX, LSU, Cracked], (instregex "TP$")>; +def : InstRW<[WLat30, MCD], (instregex "ED(MK)?$")>; + +//===----------------------------------------------------------------------===// +// Access registers +//===----------------------------------------------------------------------===// + +// Extract/set/copy access register +def : InstRW<[WLat3, LSU, NormalGr], (instregex "(EAR|SAR|CPYA)$")>; + +// Load address extended +def : InstRW<[WLat5, LSU, FXa, Cracked], (instregex "LAE(Y)?$")>; + +// Load/store access multiple (not modeled precisely) +def : InstRW<[WLat20, WLat20, LSU5, GroupAlone], (instregex "LAM(Y)?$")>; +def : InstRW<[WLat1, LSU5, FXb, GroupAlone2], (instregex "STAM(Y)?$")>; + +//===----------------------------------------------------------------------===// +// Program mask and addressing mode +//===----------------------------------------------------------------------===// + +// Insert Program Mask +def : InstRW<[WLat3, FXa, EndGroup], (instregex "IPM$")>; + +// Set Program Mask +def : InstRW<[WLat3, LSU, EndGroup], (instregex "SPM$")>; + +// Branch and link +def : InstRW<[WLat1, FXa2, FXb, GroupAlone], (instregex "BAL(R)?$")>; + +// Test addressing mode +def : InstRW<[WLat1, FXb, NormalGr], (instregex "TAM$")>; + +// Set addressing mode +def : InstRW<[WLat1, FXb, EndGroup], (instregex "SAM(24|31|64)$")>; + +// Branch (and save) and set mode. +def : InstRW<[WLat1, FXa, FXb, GroupAlone], (instregex "BSM$")>; +def : InstRW<[WLat1, FXa2, FXb, GroupAlone], (instregex "BASSM$")>; + +//===----------------------------------------------------------------------===// +// Transactional execution +//===----------------------------------------------------------------------===// + +// Transaction begin +def : InstRW<[WLat9, LSU2, FXb5, GroupAlone2], (instregex "TBEGIN(C)?$")>; + +// Transaction end +def : InstRW<[WLat1, FXb, GroupAlone], (instregex "TEND$")>; + +// Transaction abort +def : InstRW<[WLat30, MCD], (instregex "TABORT$")>; + +// Extract Transaction Nesting Depth +def : InstRW<[WLat1, FXa, NormalGr], (instregex "ETND$")>; + +// Nontransactional store +def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "NTSTG$")>; + +//===----------------------------------------------------------------------===// +// Processor assist +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat1, FXb, GroupAlone], (instregex "PPA$")>; + +//===----------------------------------------------------------------------===// +// Miscellaneous Instructions. +//===----------------------------------------------------------------------===// + +// Find leftmost one +def : InstRW<[WLat5, WLat5, FXa2, GroupAlone], (instregex "FLOGR$")>; + +// Population count +def : InstRW<[WLat3, WLat3, FXa, NormalGr], (instregex "POPCNT(Opt)?$")>; + +// String instructions +def : InstRW<[WLat30, WLat30, WLat30, MCD], (instregex "SRST(U)?$")>; +def : InstRW<[WLat30, WLat30, WLat30, MCD], (instregex "CUSE$")>; + +// Various complex instructions +def : InstRW<[WLat30, WLat30, WLat30, WLat30, MCD], (instregex "CFC$")>; +def : InstRW<[WLat30, WLat30, WLat30, WLat30, WLat30, WLat30, MCD], + (instregex "UPT$")>; +def : InstRW<[WLat30, WLat30, WLat30, MCD], (instregex "CKSM$")>; +def : InstRW<[WLat30, WLat30, WLat30, WLat30, MCD], (instregex "CMPSC$")>; +def : InstRW<[WLat30, WLat30, WLat30, WLat30, MCD], (instregex "SORTL$")>; +def : InstRW<[WLat30, WLat30, WLat30, WLat30, MCD], (instregex "DFLTCC$")>; + +// Execute +def : InstRW<[WLat1, FXb, GroupAlone], (instregex "EX(RL)?$")>; + +//===----------------------------------------------------------------------===// +// .insn directive instructions +//===----------------------------------------------------------------------===// + +// An "empty" sched-class will be assigned instead of the "invalid sched-class". +// getNumDecoderSlots() will then return 1 instead of 0. +def : InstRW<[], (instregex "Insn.*")>; + + +// ----------------------------- Floating point ----------------------------- // + +//===----------------------------------------------------------------------===// +// FP: Move instructions +//===----------------------------------------------------------------------===// + +// Load zero +def : InstRW<[WLat1, FXb, NormalGr], (instregex "LZ(DR|ER)$")>; +def : InstRW<[WLat2, FXb2, Cracked], (instregex "LZXR$")>; + +// Load +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "LER$")>; +def : InstRW<[WLat1, FXb, NormalGr], (instregex "LD(R|R32|GR)$")>; +def : InstRW<[WLat3, FXb, NormalGr], (instregex "LGDR$")>; +def : InstRW<[WLat2, FXb2, GroupAlone], (instregex "LXR$")>; + +// Load and Test +def : InstRW<[WLat3, WLat3, VecXsPm, NormalGr], (instregex "LT(E|D)BR$")>; +def : InstRW<[WLat3, VecXsPm, NormalGr], (instregex "LT(E|D)BRCompare$")>; +def : InstRW<[WLat10, WLat10, VecDF4, GroupAlone], + (instregex "LTXBR(Compare)?$")>; + +// Copy sign +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "CPSDR(d|s)(d|s)$")>; + +//===----------------------------------------------------------------------===// +// FP: Load instructions +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat2LSU, VecXsPm, LSU, NormalGr], (instregex "LE(Y)?$")>; +def : InstRW<[LSULatency, LSU, NormalGr], (instregex "LD(Y|E32)?$")>; +def : InstRW<[LSULatency, LSU, NormalGr], (instregex "LX$")>; + +//===----------------------------------------------------------------------===// +// FP: Store instructions +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "ST(E|D)(Y)?$")>; +def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "STX$")>; + +//===----------------------------------------------------------------------===// +// FP: Conversion instructions +//===----------------------------------------------------------------------===// + +// Load rounded +def : InstRW<[WLat6, VecBF, NormalGr], (instregex "LEDBR(A)?$")>; +def : InstRW<[WLat9, VecDF2, NormalGr], (instregex "L(E|D)XBR(A)?$")>; + +// Load lengthened +def : InstRW<[WLat6LSU, VecBF, LSU, NormalGr], (instregex "LDEB$")>; +def : InstRW<[WLat6, VecBF, NormalGr], (instregex "LDEBR$")>; +def : InstRW<[WLat7LSU, VecBF4, LSU, GroupAlone], (instregex "LX(E|D)B$")>; +def : InstRW<[WLat7, VecBF4, GroupAlone], (instregex "LX(E|D)BR$")>; + +// Convert from fixed / logical +def : InstRW<[WLat7, FXb, VecBF, Cracked], (instregex "C(E|D)(F|G)BR(A)?$")>; +def : InstRW<[WLat11, FXb, VecDF4, GroupAlone2], (instregex "CX(F|G)BR(A)?$")>; +def : InstRW<[WLat7, FXb, VecBF, Cracked], (instregex "C(E|D)L(F|G)BR$")>; +def : InstRW<[WLat11, FXb, VecDF4, GroupAlone2], (instregex "CXL(F|G)BR$")>; + +// Convert to fixed / logical +def : InstRW<[WLat9, WLat9, FXb, VecBF, Cracked], + (instregex "C(F|G)(E|D)BR(A)?$")>; +def : InstRW<[WLat12, WLat12, FXb, VecDF2, Cracked], + (instregex "C(F|G)XBR(A)?$")>; +def : InstRW<[WLat9, WLat9, FXb, VecBF, GroupAlone], (instregex "CLFEBR$")>; +def : InstRW<[WLat9, WLat9, FXb, VecBF, Cracked], (instregex "CLFDBR$")>; +def : InstRW<[WLat9, WLat9, FXb, VecBF, Cracked], (instregex "CLG(E|D)BR$")>; +def : InstRW<[WLat12, WLat12, FXb, VecDF2, Cracked], (instregex "CL(F|G)XBR$")>; + +//===----------------------------------------------------------------------===// +// FP: Unary arithmetic +//===----------------------------------------------------------------------===// + +// Load Complement / Negative / Positive +def : InstRW<[WLat3, WLat3, VecXsPm, NormalGr], (instregex "L(C|N|P)(E|D)BR$")>; +def : InstRW<[WLat1, FXb, NormalGr], (instregex "L(C|N|P)DFR(_32)?$")>; +def : InstRW<[WLat10, WLat10, VecDF4, GroupAlone], (instregex "L(C|N|P)XBR$")>; + +// Square root +def : InstRW<[WLat30, VecFPd, LSU, NormalGr], (instregex "SQ(E|D)B$")>; +def : InstRW<[WLat30, VecFPd, NormalGr], (instregex "SQ(E|D)BR$")>; +def : InstRW<[WLat30, VecFPd, GroupAlone], (instregex "SQXBR$")>; + +// Load FP integer +def : InstRW<[WLat6, VecBF, NormalGr], (instregex "FI(E|D)BR(A)?$")>; +def : InstRW<[WLat10, VecDF4, GroupAlone], (instregex "FIXBR(A)?$")>; + +//===----------------------------------------------------------------------===// +// FP: Binary arithmetic +//===----------------------------------------------------------------------===// + +// Addition +def : InstRW<[WLat6LSU, WLat6LSU, RegReadAdv, VecBF, LSU, NormalGr], + (instregex "A(E|D)B$")>; +def : InstRW<[WLat6, WLat6, VecBF, NormalGr], (instregex "A(E|D)BR$")>; +def : InstRW<[WLat10, WLat10, VecDF4, GroupAlone], (instregex "AXBR$")>; + +// Subtraction +def : InstRW<[WLat6LSU, WLat6LSU, RegReadAdv, VecBF, LSU, NormalGr], + (instregex "S(E|D)B$")>; +def : InstRW<[WLat6, WLat6, VecBF, NormalGr], (instregex "S(E|D)BR$")>; +def : InstRW<[WLat10, WLat10, VecDF4, GroupAlone], (instregex "SXBR$")>; + +// Multiply +def : InstRW<[WLat6LSU, RegReadAdv, VecBF, LSU, NormalGr], + (instregex "M(D|DE|EE)B$")>; +def : InstRW<[WLat6, VecBF, NormalGr], (instregex "M(D|DE|EE)BR$")>; +def : InstRW<[WLat7LSU, RegReadAdv, VecBF4, LSU, GroupAlone], + (instregex "MXDB$")>; +def : InstRW<[WLat7, VecBF4, GroupAlone], (instregex "MXDBR$")>; +def : InstRW<[WLat15, VecDF4, GroupAlone], (instregex "MXBR$")>; + +// Multiply and add / subtract +def : InstRW<[WLat6LSU, RegReadAdv, RegReadAdv, VecBF2, LSU, GroupAlone], + (instregex "M(A|S)EB$")>; +def : InstRW<[WLat6, VecBF, GroupAlone], (instregex "M(A|S)EBR$")>; +def : InstRW<[WLat6LSU, RegReadAdv, RegReadAdv, VecBF2, LSU, GroupAlone], + (instregex "M(A|S)DB$")>; +def : InstRW<[WLat6, VecBF, NormalGr], (instregex "M(A|S)DBR$")>; + +// Division +def : InstRW<[WLat30, RegReadAdv, VecFPd, LSU, NormalGr], + (instregex "D(E|D)B$")>; +def : InstRW<[WLat30, VecFPd, NormalGr], (instregex "D(E|D)BR$")>; +def : InstRW<[WLat30, VecFPd, GroupAlone], (instregex "DXBR$")>; + +// Divide to integer +def : InstRW<[WLat30, WLat30, WLat30, MCD], (instregex "DI(E|D)BR$")>; + +//===----------------------------------------------------------------------===// +// FP: Comparisons +//===----------------------------------------------------------------------===// + +// Compare +def : InstRW<[WLat3LSU, RegReadAdv, VecXsPm, LSU, NormalGr], + (instregex "(K|C)(E|D)B$")>; +def : InstRW<[WLat3, VecXsPm, NormalGr], (instregex "(K|C)(E|D)BR$")>; +def : InstRW<[WLat9, VecDF2, GroupAlone], (instregex "(K|C)XBR$")>; + +// Test Data Class +def : InstRW<[WLat5, LSU, VecXsPm, NormalGr], (instregex "TC(E|D)B$")>; +def : InstRW<[WLat10, LSU, VecDF4, GroupAlone], (instregex "TCXB$")>; + +//===----------------------------------------------------------------------===// +// FP: Floating-point control register instructions +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat4, FXa, LSU, GroupAlone], (instregex "EFPC$")>; +def : InstRW<[WLat1, FXb, LSU, GroupAlone], (instregex "STFPC$")>; +def : InstRW<[WLat3, LSU, GroupAlone], (instregex "SFPC$")>; +def : InstRW<[WLat3LSU, LSU2, GroupAlone], (instregex "LFPC$")>; +def : InstRW<[WLat30, MCD], (instregex "SFASR$")>; +def : InstRW<[WLat30, MCD], (instregex "LFAS$")>; +def : InstRW<[WLat3, FXb, GroupAlone], (instregex "SRNM(B|T)?$")>; + + +// --------------------- Hexadecimal floating point ------------------------- // + +//===----------------------------------------------------------------------===// +// HFP: Move instructions +//===----------------------------------------------------------------------===// + +// Load and Test +def : InstRW<[WLat3, WLat3, VecXsPm, NormalGr], (instregex "LT(E|D)R$")>; +def : InstRW<[WLat10, WLat10, VecDF4, GroupAlone], (instregex "LTXR$")>; + +//===----------------------------------------------------------------------===// +// HFP: Conversion instructions +//===----------------------------------------------------------------------===// + +// Load rounded +def : InstRW<[WLat6, VecBF, NormalGr], (instregex "(LEDR|LRER)$")>; +def : InstRW<[WLat6, VecBF, NormalGr], (instregex "LEXR$")>; +def : InstRW<[WLat9, VecDF2, NormalGr], (instregex "(LDXR|LRDR)$")>; + +// Load lengthened +def : InstRW<[LSULatency, LSU, NormalGr], (instregex "LDE$")>; +def : InstRW<[WLat1, FXb, NormalGr], (instregex "LDER$")>; +def : InstRW<[WLat7LSU, VecBF4, LSU, GroupAlone], (instregex "LX(E|D)$")>; +def : InstRW<[WLat7, VecBF4, GroupAlone], (instregex "LX(E|D)R$")>; + +// Convert from fixed +def : InstRW<[WLat7, FXb, VecBF, Cracked], (instregex "C(E|D)(F|G)R$")>; +def : InstRW<[WLat11, FXb, VecDF4, GroupAlone2], (instregex "CX(F|G)R$")>; + +// Convert to fixed +def : InstRW<[WLat9, WLat9, FXb, VecBF, Cracked], (instregex "C(F|G)(E|D)R$")>; +def : InstRW<[WLat12, WLat12, FXb, VecDF2, Cracked], (instregex "C(F|G)XR$")>; + +// Convert BFP to HFP / HFP to BFP. +def : InstRW<[WLat6, WLat6, VecBF, NormalGr], (instregex "THD(E)?R$")>; +def : InstRW<[WLat6, WLat6, VecBF, NormalGr], (instregex "TB(E)?DR$")>; + +//===----------------------------------------------------------------------===// +// HFP: Unary arithmetic +//===----------------------------------------------------------------------===// + +// Load Complement / Negative / Positive +def : InstRW<[WLat3, WLat3, VecXsPm, NormalGr], (instregex "L(C|N|P)(E|D)R$")>; +def : InstRW<[WLat10, WLat10, VecDF4, GroupAlone], (instregex "L(C|N|P)XR$")>; + +// Halve +def : InstRW<[WLat6, VecBF, NormalGr], (instregex "H(E|D)R$")>; + +// Square root +def : InstRW<[WLat30, VecFPd, LSU, NormalGr], (instregex "SQ(E|D)$")>; +def : InstRW<[WLat30, VecFPd, NormalGr], (instregex "SQ(E|D)R$")>; +def : InstRW<[WLat30, VecFPd, GroupAlone], (instregex "SQXR$")>; + +// Load FP integer +def : InstRW<[WLat6, VecBF, NormalGr], (instregex "FI(E|D)R$")>; +def : InstRW<[WLat10, VecDF4, GroupAlone], (instregex "FIXR$")>; + +//===----------------------------------------------------------------------===// +// HFP: Binary arithmetic +//===----------------------------------------------------------------------===// + +// Addition +def : InstRW<[WLat6LSU, WLat6LSU, RegReadAdv, VecBF, LSU, NormalGr], + (instregex "A(E|D|U|W)$")>; +def : InstRW<[WLat6, WLat6, VecBF, NormalGr], (instregex "A(E|D|U|W)R$")>; +def : InstRW<[WLat10, WLat10, VecDF4, GroupAlone], (instregex "AXR$")>; + +// Subtraction +def : InstRW<[WLat6LSU, WLat6LSU, RegReadAdv, VecBF, LSU, NormalGr], + (instregex "S(E|D|U|W)$")>; +def : InstRW<[WLat6, WLat6, VecBF, NormalGr], (instregex "S(E|D|U|W)R$")>; +def : InstRW<[WLat10, WLat10, VecDF4, GroupAlone], (instregex "SXR$")>; + +// Multiply +def : InstRW<[WLat6LSU, RegReadAdv, VecBF, LSU, NormalGr], + (instregex "M(D|DE|E|EE)$")>; +def : InstRW<[WLat6, VecBF, NormalGr], (instregex "M(D|DE|E|EE)R$")>; +def : InstRW<[WLat7LSU, RegReadAdv, VecBF4, LSU, GroupAlone], + (instregex "MXD$")>; +def : InstRW<[WLat7, VecBF4, GroupAlone], (instregex "MXDR$")>; +def : InstRW<[WLat30, VecDF4, GroupAlone], (instregex "MXR$")>; +def : InstRW<[WLat7LSU, RegReadAdv, VecBF4, LSU, GroupAlone], (instregex "MY$")>; +def : InstRW<[WLat6LSU, RegReadAdv, VecBF2, LSU, GroupAlone], + (instregex "MY(H|L)$")>; +def : InstRW<[WLat7, VecBF4, GroupAlone], (instregex "MYR$")>; +def : InstRW<[WLat6, VecBF, GroupAlone], (instregex "MY(H|L)R$")>; + +// Multiply and add / subtract +def : InstRW<[WLat6LSU, RegReadAdv, RegReadAdv, VecBF2, LSU, GroupAlone], + (instregex "M(A|S)(E|D)$")>; +def : InstRW<[WLat6, VecBF, GroupAlone], (instregex "M(A|S)(E|D)R$")>; +def : InstRW<[WLat7LSU, RegReadAdv, RegReadAdv, VecBF4, LSU, GroupAlone], + (instregex "MAY$")>; +def : InstRW<[WLat6LSU, RegReadAdv, RegReadAdv, VecBF2, LSU, GroupAlone], + (instregex "MAY(H|L)$")>; +def : InstRW<[WLat7, VecBF4, GroupAlone], (instregex "MAYR$")>; +def : InstRW<[WLat6, VecBF, GroupAlone], (instregex "MAY(H|L)R$")>; + +// Division +def : InstRW<[WLat30, RegReadAdv, VecFPd, LSU, NormalGr], (instregex "D(E|D)$")>; +def : InstRW<[WLat30, VecFPd, NormalGr], (instregex "D(E|D)R$")>; +def : InstRW<[WLat30, VecFPd, GroupAlone], (instregex "DXR$")>; + +//===----------------------------------------------------------------------===// +// HFP: Comparisons +//===----------------------------------------------------------------------===// + +// Compare +def : InstRW<[WLat6LSU, RegReadAdv, VecBF, LSU, NormalGr], + (instregex "C(E|D)$")>; +def : InstRW<[WLat6, VecBF, NormalGr], (instregex "C(E|D)R$")>; +def : InstRW<[WLat10, VecDF2, GroupAlone], (instregex "CXR$")>; + + +// ------------------------ Decimal floating point -------------------------- // + +//===----------------------------------------------------------------------===// +// DFP: Move instructions +//===----------------------------------------------------------------------===// + +// Load and Test +def : InstRW<[WLat8, WLat8, VecDF, NormalGr], (instregex "LTDTR$")>; +def : InstRW<[WLat10, WLat10, VecDF4, GroupAlone], (instregex "LTXTR$")>; + +//===----------------------------------------------------------------------===// +// DFP: Conversion instructions +//===----------------------------------------------------------------------===// + +// Load rounded +def : InstRW<[WLat15, VecDF, NormalGr], (instregex "LEDTR$")>; +def : InstRW<[WLat15, VecDF2, NormalGr], (instregex "LDXTR$")>; + +// Load lengthened +def : InstRW<[WLat8, VecDF, NormalGr], (instregex "LDETR$")>; +def : InstRW<[WLat10, VecDF4, GroupAlone], (instregex "LXDTR$")>; + +// Convert from fixed / logical +def : InstRW<[WLat15, FXb, VecDF, Cracked], (instregex "CDFTR(A)?$")>; +def : InstRW<[WLat30, FXb, VecDF, Cracked], (instregex "CDGTR(A)?$")>; +def : InstRW<[WLat15, FXb, VecDF4, GroupAlone2], (instregex "CXFTR(A)?$")>; +def : InstRW<[WLat30, FXb, VecDF4, GroupAlone2], (instregex "CXGTR(A)?$")>; +def : InstRW<[WLat15, FXb, VecDF, Cracked], (instregex "CDLFTR$")>; +def : InstRW<[WLat30, FXb, VecDF, Cracked], (instregex "CDLGTR$")>; +def : InstRW<[WLat15, FXb, VecDF4, GroupAlone2], (instregex "CXLFTR$")>; +def : InstRW<[WLat30, FXb, VecDF4, GroupAlone2], (instregex "CXLGTR$")>; + +// Convert to fixed / logical +def : InstRW<[WLat30, WLat30, FXb, VecDF, Cracked], + (instregex "C(F|G)DTR(A)?$")>; +def : InstRW<[WLat30, WLat30, FXb, VecDF2, Cracked], + (instregex "C(F|G)XTR(A)?$")>; +def : InstRW<[WLat30, WLat30, FXb, VecDF, Cracked], (instregex "CL(F|G)DTR$")>; +def : InstRW<[WLat30, WLat30, FXb, VecDF2, Cracked], (instregex "CL(F|G)XTR$")>; + +// Convert from / to signed / unsigned packed +def : InstRW<[WLat9, FXb, VecDF, Cracked], (instregex "CD(S|U)TR$")>; +def : InstRW<[WLat12, FXb2, VecDF4, GroupAlone2], (instregex "CX(S|U)TR$")>; +def : InstRW<[WLat11, FXb, VecDF, Cracked], (instregex "C(S|U)DTR$")>; +def : InstRW<[WLat15, FXb2, VecDF4, GroupAlone2], (instregex "C(S|U)XTR$")>; + +// Convert from / to zoned +def : InstRW<[WLat8LSU, LSU, VecDF, Cracked], (instregex "CDZT$")>; +def : InstRW<[WLat16LSU, LSU2, VecDF4, GroupAlone3], (instregex "CXZT$")>; +def : InstRW<[WLat1, FXb, LSU, VecDF, Cracked], (instregex "CZDT$")>; +def : InstRW<[WLat1, FXb, LSU, VecDF2, GroupAlone], (instregex "CZXT$")>; + +// Convert from / to packed +def : InstRW<[WLat8LSU, LSU, VecDF, Cracked], (instregex "CDPT$")>; +def : InstRW<[WLat16LSU, LSU2, VecDF4, GroupAlone3], (instregex "CXPT$")>; +def : InstRW<[WLat1, FXb, LSU, VecDF, Cracked], (instregex "CPDT$")>; +def : InstRW<[WLat1, FXb, LSU, VecDF2, GroupAlone], (instregex "CPXT$")>; + +// Perform floating-point operation +def : InstRW<[WLat30, WLat30, WLat30, MCD], (instregex "PFPO$")>; + +//===----------------------------------------------------------------------===// +// DFP: Unary arithmetic +//===----------------------------------------------------------------------===// + +// Load FP integer +def : InstRW<[WLat8, VecDF, NormalGr], (instregex "FIDTR$")>; +def : InstRW<[WLat10, VecDF4, GroupAlone], (instregex "FIXTR$")>; + +// Extract biased exponent +def : InstRW<[WLat11, FXb, VecDF, Cracked], (instregex "EEDTR$")>; +def : InstRW<[WLat11, FXb, VecDF, Cracked], (instregex "EEXTR$")>; + +// Extract significance +def : InstRW<[WLat11, FXb, VecDF, Cracked], (instregex "ESDTR$")>; +def : InstRW<[WLat12, FXb, VecDF2, Cracked], (instregex "ESXTR$")>; + +//===----------------------------------------------------------------------===// +// DFP: Binary arithmetic +//===----------------------------------------------------------------------===// + +// Addition +def : InstRW<[WLat8, WLat8, VecDF, NormalGr], (instregex "ADTR(A)?$")>; +def : InstRW<[WLat10, WLat10, VecDF4, GroupAlone], (instregex "AXTR(A)?$")>; + +// Subtraction +def : InstRW<[WLat8, WLat8, VecDF, NormalGr], (instregex "SDTR(A)?$")>; +def : InstRW<[WLat10, WLat10, VecDF4, GroupAlone], (instregex "SXTR(A)?$")>; + +// Multiply +def : InstRW<[WLat30, VecDF, NormalGr], (instregex "MDTR(A)?$")>; +def : InstRW<[WLat30, VecDF4, GroupAlone], (instregex "MXTR(A)?$")>; + +// Division +def : InstRW<[WLat30, VecDF, NormalGr], (instregex "DDTR(A)?$")>; +def : InstRW<[WLat30, VecDF4, GroupAlone], (instregex "DXTR(A)?$")>; + +// Quantize +def : InstRW<[WLat8, WLat8, VecDF, NormalGr], (instregex "QADTR$")>; +def : InstRW<[WLat10, WLat10, VecDF4, GroupAlone], (instregex "QAXTR$")>; + +// Reround +def : InstRW<[WLat9, WLat9, FXb, VecDF, Cracked], (instregex "RRDTR$")>; +def : InstRW<[WLat11, WLat11, FXb, VecDF4, GroupAlone2], (instregex "RRXTR$")>; + +// Shift significand left/right +def : InstRW<[WLat11LSU, LSU, VecDF, GroupAlone], (instregex "S(L|R)DT$")>; +def : InstRW<[WLat11LSU, LSU, VecDF4, GroupAlone], (instregex "S(L|R)XT$")>; + +// Insert biased exponent +def : InstRW<[WLat9, FXb, VecDF, Cracked], (instregex "IEDTR$")>; +def : InstRW<[WLat11, FXb, VecDF4, GroupAlone2], (instregex "IEXTR$")>; + +//===----------------------------------------------------------------------===// +// DFP: Comparisons +//===----------------------------------------------------------------------===// + +// Compare +def : InstRW<[WLat8, VecDF, NormalGr], (instregex "(K|C)DTR$")>; +def : InstRW<[WLat9, VecDF2, GroupAlone], (instregex "(K|C)XTR$")>; + +// Compare biased exponent +def : InstRW<[WLat8, VecDF, NormalGr], (instregex "CEDTR$")>; +def : InstRW<[WLat8, VecDF, NormalGr], (instregex "CEXTR$")>; + +// Test Data Class/Group +def : InstRW<[WLat15, LSU, VecDF, NormalGr], (instregex "TD(C|G)(E|D)T$")>; +def : InstRW<[WLat15, LSU, VecDF2, GroupAlone], (instregex "TD(C|G)XT$")>; + + +// --------------------------------- Vector --------------------------------- // + +//===----------------------------------------------------------------------===// +// Vector: Move instructions +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat1, FXb, NormalGr], (instregex "VLR(32|64)?$")>; +def : InstRW<[WLat3, FXb, NormalGr], (instregex "VLGV(B|F|G|H)?$")>; +def : InstRW<[WLat1, FXb, NormalGr], (instregex "VLVG(B|F|G|H)?$")>; +def : InstRW<[WLat3, FXb, NormalGr], (instregex "VLVGP(32)?$")>; + +//===----------------------------------------------------------------------===// +// Vector: Immediate instructions +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VZERO$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VONE$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VGBM$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VGM(B|F|G|H)?$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VREPI(B|F|G|H)?$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VLEI(B|F|G|H)$")>; + +//===----------------------------------------------------------------------===// +// Vector: Loads +//===----------------------------------------------------------------------===// + +def : InstRW<[LSULatency, LSU, NormalGr], (instregex "VL(Align)?$")>; +def : InstRW<[LSULatency, LSU, NormalGr], (instregex "VL(L|BB)$")>; +def : InstRW<[LSULatency, LSU, NormalGr], (instregex "VL(32|64)$")>; +def : InstRW<[LSULatency, LSU, NormalGr], (instregex "VLLEZ(B|F|G|H|LF)?$")>; +def : InstRW<[LSULatency, LSU, NormalGr], (instregex "VLREP(B|F|G|H)?$")>; +def : InstRW<[WLat2LSU, RegReadAdv, VecXsPm, LSU, NormalGr], + (instregex "VLE(B|F|G|H)$")>; +def : InstRW<[WLat5LSU, RegReadAdv, FXb, LSU, VecXsPm, Cracked], + (instregex "VGE(F|G)$")>; +def : InstRW<[WLat4LSU, WLat4LSU, LSU5, GroupAlone], + (instregex "VLM(Align)?$")>; +def : InstRW<[LSULatency, LSU, NormalGr], (instregex "VLRL(R)?$")>; + +//===----------------------------------------------------------------------===// +// Vector: Stores +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "VST(Align|L|32|64)?$")>; +def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "VSTE(F|G)$")>; +def : InstRW<[WLat1, FXb, LSU, VecXsPm, Cracked], (instregex "VSTE(B|H)$")>; +def : InstRW<[WLat1, LSU2, FXb3, GroupAlone2], (instregex "VSTM(Align)?$")>; +def : InstRW<[WLat1, FXb2, LSU, Cracked], (instregex "VSCE(F|G)$")>; +def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "VSTRL(R)?$")>; + +//===----------------------------------------------------------------------===// +// Vector: Byte swaps +//===----------------------------------------------------------------------===// + +def : InstRW<[LSULatency, LSU, NormalGr], (instregex "VLBR(H|F|G|Q)?$")>; +def : InstRW<[LSULatency, LSU, NormalGr], (instregex "VLER(H|F|G)?$")>; +def : InstRW<[WLat2LSU, RegReadAdv, VecXsPm, LSU, NormalGr], + (instregex "VLEBR(H|F|G)$")>; +def : InstRW<[LSULatency, LSU, NormalGr], (instregex "VLLEBRZ(H|F|G|E)?$")>; +def : InstRW<[LSULatency, LSU, NormalGr], (instregex "VLBRREP(H|F|G)?$")>; +def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "VSTBR(H|F|G|Q)?$")>; +def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "VSTER(H|F|G)?$")>; +def : InstRW<[WLat1, FXb, LSU, VecXsPm, Cracked], (instregex "VSTEBRH$")>; +def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "VSTEBR(F|G)$")>; + +//===----------------------------------------------------------------------===// +// Vector: Selects and permutes +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VMRH(B|F|G|H)?$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VMRL(B|F|G|H)?$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VPERM$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VPDI$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VBPERM$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VREP(B|F|G|H)?$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VSEL$")>; + +//===----------------------------------------------------------------------===// +// Vector: Widening and narrowing +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VPK(F|G|H)?$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VPKS(F|G|H)?$")>; +def : InstRW<[WLat3, WLat3, VecXsPm, NormalGr], (instregex "VPKS(F|G|H)S$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VPKLS(F|G|H)?$")>; +def : InstRW<[WLat3, WLat3, VecXsPm, NormalGr], (instregex "VPKLS(F|G|H)S$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VSEG(B|F|H)?$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VUPH(B|F|H)?$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VUPL(B|F)?$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VUPLH(B|F|H|W)?$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VUPLL(B|F|H)?$")>; + +//===----------------------------------------------------------------------===// +// Vector: Integer arithmetic +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VA(B|F|G|H|Q|C|CQ)?$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VACC(B|F|G|H|Q|C|CQ)?$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VAVG(B|F|G|H)?$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VAVGL(B|F|G|H)?$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VN(C|O|N|X)?$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VO(C)?$")>; +def : InstRW<[WLat4, VecMul, NormalGr], (instregex "VCKSM$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VCLZ(B|F|G|H)?$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VCTZ(B|F|G|H)?$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VX$")>; +def : InstRW<[WLat4, VecMul, NormalGr], (instregex "VGFM?$")>; +def : InstRW<[WLat4, VecMul, NormalGr], (instregex "VGFMA(B|F|G|H)?$")>; +def : InstRW<[WLat4, VecMul, NormalGr], (instregex "VGFM(B|F|G|H)$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VLC(B|F|G|H)?$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VLP(B|F|G|H)?$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VMX(B|F|G|H)?$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VMXL(B|F|G|H)?$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VMN(B|F|G|H)?$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VMNL(B|F|G|H)?$")>; +def : InstRW<[WLat4, VecMul, NormalGr], (instregex "VMAL(B|F)?$")>; +def : InstRW<[WLat4, VecMul, NormalGr], (instregex "VMALE(B|F|H)?$")>; +def : InstRW<[WLat4, VecMul, NormalGr], (instregex "VMALH(B|F|H|W)?$")>; +def : InstRW<[WLat4, VecMul, NormalGr], (instregex "VMALO(B|F|H)?$")>; +def : InstRW<[WLat4, VecMul, NormalGr], (instregex "VMAO(B|F|H)?$")>; +def : InstRW<[WLat4, VecMul, NormalGr], (instregex "VMAE(B|F|H)?$")>; +def : InstRW<[WLat4, VecMul, NormalGr], (instregex "VMAH(B|F|H)?$")>; +def : InstRW<[WLat4, VecMul, NormalGr], (instregex "VME(B|F|H)?$")>; +def : InstRW<[WLat4, VecMul, NormalGr], (instregex "VMH(B|F|H)?$")>; +def : InstRW<[WLat4, VecMul, NormalGr], (instregex "VML(B|F)?$")>; +def : InstRW<[WLat4, VecMul, NormalGr], (instregex "VMLE(B|F|H)?$")>; +def : InstRW<[WLat4, VecMul, NormalGr], (instregex "VMLH(B|F|H|W)?$")>; +def : InstRW<[WLat4, VecMul, NormalGr], (instregex "VMLO(B|F|H)?$")>; +def : InstRW<[WLat4, VecMul, NormalGr], (instregex "VMO(B|F|H)?$")>; +def : InstRW<[WLat6, VecBF, NormalGr], (instregex "VMSL(G)?$")>; + +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VPOPCT(B|F|G|H)?$")>; + +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VERLL(B|F|G|H)?$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VERLLV(B|F|G|H)?$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VERIM(B|F|G|H)?$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VESL(B|F|G|H)?$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VESLV(B|F|G|H)?$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VESRA(B|F|G|H)?$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VESRAV(B|F|G|H)?$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VESRL(B|F|G|H)?$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VESRLV(B|F|G|H)?$")>; + +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VSL(DB)?$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VSLB$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VSR(A|L)$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VSR(A|L)B$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VSLD$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VSRD$")>; + +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VSB(I|IQ|CBI|CBIQ)?$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VSCBI(B|F|G|H|Q)?$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VS(F|G|H|Q)?$")>; + +def : InstRW<[WLat4, VecMul, NormalGr], (instregex "VSUM(B|H)?$")>; +def : InstRW<[WLat4, VecMul, NormalGr], (instregex "VSUMG(F|H)?$")>; +def : InstRW<[WLat4, VecMul, NormalGr], (instregex "VSUMQ(F|G)?$")>; + +//===----------------------------------------------------------------------===// +// Vector: Integer comparison +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat3, VecXsPm, NormalGr], (instregex "VEC(B|F|G|H)?$")>; +def : InstRW<[WLat3, VecXsPm, NormalGr], (instregex "VECL(B|F|G|H)?$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VCEQ(B|F|G|H)?$")>; +def : InstRW<[WLat3, WLat3, VecXsPm, NormalGr], (instregex "VCEQ(B|F|G|H)S$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VCH(B|F|G|H)?$")>; +def : InstRW<[WLat3, WLat3, VecXsPm, NormalGr], (instregex "VCH(B|F|G|H)S$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VCHL(B|F|G|H)?$")>; +def : InstRW<[WLat3, WLat3, VecXsPm, NormalGr], (instregex "VCHL(B|F|G|H)S$")>; +def : InstRW<[WLat4, VecStr, NormalGr], (instregex "VTM$")>; + +//===----------------------------------------------------------------------===// +// Vector: Floating-point arithmetic +//===----------------------------------------------------------------------===// + +// Conversion and rounding +def : InstRW<[WLat6, VecBF, NormalGr], (instregex "VCFP(S|L)$")>; +def : InstRW<[WLat6, VecBF, NormalGr], (instregex "VCD(L)?G$")>; +def : InstRW<[WLat6, VecBF, NormalGr], (instregex "VCD(L)?GB$")>; +def : InstRW<[WLat6, VecBF, NormalGr], (instregex "WCD(L)?GB$")>; +def : InstRW<[WLat6, VecBF, NormalGr], (instregex "VCE(L)?FB$")>; +def : InstRW<[WLat6, VecBF, NormalGr], (instregex "WCE(L)?FB$")>; +def : InstRW<[WLat6, VecBF, NormalGr], (instregex "VC(S|L)FP$")>; +def : InstRW<[WLat6, VecBF, NormalGr], (instregex "VC(L)?GD$")>; +def : InstRW<[WLat6, VecBF, NormalGr], (instregex "VC(L)?GDB$")>; +def : InstRW<[WLat6, VecBF, NormalGr], (instregex "WC(L)?GDB$")>; +def : InstRW<[WLat6, VecBF, NormalGr], (instregex "VC(L)?FEB$")>; +def : InstRW<[WLat6, VecBF, NormalGr], (instregex "WC(L)?FEB$")>; +def : InstRW<[WLat6, VecBF, NormalGr], (instregex "VL(DE|ED)$")>; +def : InstRW<[WLat6, VecBF, NormalGr], (instregex "VL(DE|ED)B$")>; +def : InstRW<[WLat6, VecBF, NormalGr], (instregex "WL(DE|ED)B$")>; +def : InstRW<[WLat6, VecBF, NormalGr], (instregex "VFL(L|R)$")>; +def : InstRW<[WLat6, VecBF, NormalGr], (instregex "VFL(LS|RD)$")>; +def : InstRW<[WLat6, VecBF, NormalGr], (instregex "WFL(LS|RD)$")>; +def : InstRW<[WLat6, VecBF, NormalGr], (instregex "WFLLD$")>; +def : InstRW<[WLat10, VecDF2, NormalGr], (instregex "WFLRX$")>; +def : InstRW<[WLat6, VecBF, NormalGr], (instregex "VFI(DB)?$")>; +def : InstRW<[WLat6, VecBF, NormalGr], (instregex "WFIDB$")>; +def : InstRW<[WLat6, VecBF, NormalGr], (instregex "VFISB$")>; +def : InstRW<[WLat6, VecBF, NormalGr], (instregex "WFISB$")>; +def : InstRW<[WLat10, VecDF2, NormalGr], (instregex "WFIXB$")>; + +// Sign operations +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VFPSO$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "(V|W)FPSODB$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "(V|W)FPSOSB$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "WFPSOXB$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "(V|W)FL(C|N|P)DB$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "(V|W)FL(C|N|P)SB$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "WFL(C|N|P)XB$")>; + +// Minimum / maximum +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VF(MAX|MIN)$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VF(MAX|MIN)DB$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "WF(MAX|MIN)DB$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VF(MAX|MIN)SB$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "WF(MAX|MIN)SB$")>; +def : InstRW<[WLat2, VecDFX, NormalGr], (instregex "WF(MAX|MIN)XB$")>; + +// Test data class +def : InstRW<[WLat3, WLat3, VecXsPm, NormalGr], (instregex "VFTCI$")>; +def : InstRW<[WLat3, WLat3, VecXsPm, NormalGr], (instregex "(V|W)FTCIDB$")>; +def : InstRW<[WLat3, WLat3, VecXsPm, NormalGr], (instregex "(V|W)FTCISB$")>; +def : InstRW<[WLat3, WLat3, VecDFX, NormalGr], (instregex "WFTCIXB$")>; + +// Add / subtract +def : InstRW<[WLat6, VecBF, NormalGr], (instregex "VF(A|S)$")>; +def : InstRW<[WLat6, VecBF, NormalGr], (instregex "VF(A|S)DB$")>; +def : InstRW<[WLat6, VecBF, NormalGr], (instregex "WF(A|S)DB$")>; +def : InstRW<[WLat6, VecBF, NormalGr], (instregex "VF(A|S)SB$")>; +def : InstRW<[WLat6, VecBF, NormalGr], (instregex "WF(A|S)SB$")>; +def : InstRW<[WLat10, VecDF2, NormalGr], (instregex "WF(A|S)XB$")>; + +// Multiply / multiply-and-add/subtract +def : InstRW<[WLat6, VecBF, NormalGr], (instregex "VFM(DB)?$")>; +def : InstRW<[WLat6, VecBF, NormalGr], (instregex "WFM(D|S)B$")>; +def : InstRW<[WLat6, VecBF, NormalGr], (instregex "VFMSB$")>; +def : InstRW<[WLat20, VecDF2, NormalGr], (instregex "WFMXB$")>; +def : InstRW<[WLat6, VecBF, NormalGr], (instregex "VF(N)?M(A|S)$")>; +def : InstRW<[WLat6, VecBF, NormalGr], (instregex "VF(N)?M(A|S)DB$")>; +def : InstRW<[WLat6, VecBF, NormalGr], (instregex "WF(N)?M(A|S)DB$")>; +def : InstRW<[WLat6, VecBF, NormalGr], (instregex "VF(N)?M(A|S)SB$")>; +def : InstRW<[WLat6, VecBF, NormalGr], (instregex "WF(N)?M(A|S)SB$")>; +def : InstRW<[WLat30, VecDF2, NormalGr], (instregex "WF(N)?M(A|S)XB$")>; + +// Divide / square root +def : InstRW<[WLat30, VecFPd, NormalGr], (instregex "VFD$")>; +def : InstRW<[WLat30, VecFPd, NormalGr], (instregex "(V|W)FDDB$")>; +def : InstRW<[WLat30, VecFPd, NormalGr], (instregex "(V|W)FDSB$")>; +def : InstRW<[WLat30, VecFPd, NormalGr], (instregex "WFDXB$")>; +def : InstRW<[WLat30, VecFPd, NormalGr], (instregex "VFSQ$")>; +def : InstRW<[WLat30, VecFPd, NormalGr], (instregex "(V|W)FSQDB$")>; +def : InstRW<[WLat30, VecFPd, NormalGr], (instregex "(V|W)FSQSB$")>; +def : InstRW<[WLat30, VecFPd, NormalGr], (instregex "WFSQXB$")>; + +//===----------------------------------------------------------------------===// +// Vector: Floating-point comparison +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VF(C|K)(E|H|HE)$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VF(C|K)(E|H|HE)DB$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "WFC(E|H|HE)DB$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "WFK(E|H|HE)DB$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VF(C|K)(E|H|HE)SB$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "WFC(E|H|HE)SB$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "WFK(E|H|HE)SB$")>; +def : InstRW<[WLat2, VecDFX, NormalGr], (instregex "WFC(E|H|HE)XB$")>; +def : InstRW<[WLat2, VecDFX, NormalGr], (instregex "WFK(E|H|HE)XB$")>; +def : InstRW<[WLat3, WLat3, VecXsPm, NormalGr], (instregex "VFC(E|H|HE)DBS$")>; +def : InstRW<[WLat3, WLat3, VecXsPm, NormalGr], (instregex "VFK(E|H|HE)DBS$")>; +def : InstRW<[WLat3, WLat3, VecXsPm, NormalGr], + (instregex "WF(C|K)(E|H|HE)DBS$")>; +def : InstRW<[WLat3, WLat3, VecXsPm, NormalGr], + (instregex "VF(C|K)(E|H|HE)SBS$")>; +def : InstRW<[WLat3, WLat3, VecXsPm, NormalGr], (instregex "WFC(E|H|HE)SBS$")>; +def : InstRW<[WLat3, WLat3, VecXsPm, NormalGr], (instregex "WFK(E|H|HE)SBS$")>; +def : InstRW<[WLat3, WLat3, VecDFX, NormalGr], (instregex "WFC(E|H|HE)XBS$")>; +def : InstRW<[WLat3, WLat3, VecDFX, NormalGr], (instregex "WFK(E|H|HE)XBS$")>; +def : InstRW<[WLat3, VecXsPm, NormalGr], (instregex "WF(C|K)$")>; +def : InstRW<[WLat3, VecXsPm, NormalGr], (instregex "WF(C|K)DB$")>; +def : InstRW<[WLat3, VecXsPm, NormalGr], (instregex "WF(C|K)SB$")>; +def : InstRW<[WLat3, VecDFX, NormalGr], (instregex "WF(C|K)XB$")>; + +//===----------------------------------------------------------------------===// +// Vector: Floating-point insertion and extraction +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat1, FXb, NormalGr], (instregex "LEFR$")>; +def : InstRW<[WLat3, FXb, NormalGr], (instregex "LFER$")>; + +//===----------------------------------------------------------------------===// +// Vector: String instructions +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat3, VecStr, NormalGr], (instregex "VFAE(B)?$")>; +def : InstRW<[WLat3, VecStr, NormalGr], (instregex "VFAE(F|H)$")>; +def : InstRW<[WLat4, WLat4, VecStr, NormalGr], (instregex "VFAE(B|F|H)S$")>; +def : InstRW<[WLat3, VecStr, NormalGr], (instregex "VFAEZ(B|F|H)$")>; +def : InstRW<[WLat4, WLat4, VecStr, NormalGr], (instregex "VFAEZ(B|F|H)S$")>; +def : InstRW<[WLat3, VecStr, NormalGr], (instregex "VFEE(B|F|H|ZB|ZF|ZH)?$")>; +def : InstRW<[WLat4, WLat4, VecStr, NormalGr], + (instregex "VFEE(B|F|H|ZB|ZF|ZH)S$")>; +def : InstRW<[WLat3, VecStr, NormalGr], (instregex "VFENE(B|F|H|ZB|ZF|ZH)?$")>; +def : InstRW<[WLat4, WLat4, VecStr, NormalGr], + (instregex "VFENE(B|F|H|ZB|ZF|ZH)S$")>; +def : InstRW<[WLat3, VecStr, NormalGr], (instregex "VISTR(B|F|H)?$")>; +def : InstRW<[WLat4, WLat4, VecStr, NormalGr], (instregex "VISTR(B|F|H)S$")>; +def : InstRW<[WLat3, VecStr, NormalGr], (instregex "VSTRC(B|F|H)?$")>; +def : InstRW<[WLat4, WLat4, VecStr, NormalGr], (instregex "VSTRC(B|F|H)S$")>; +def : InstRW<[WLat3, VecStr, NormalGr], (instregex "VSTRCZ(B|F|H)$")>; +def : InstRW<[WLat4, WLat4, VecStr, NormalGr], (instregex "VSTRCZ(B|F|H)S$")>; +def : InstRW<[WLat4, WLat4, VecStr, NormalGr], (instregex "VSTRS(B|F|H)?$")>; +def : InstRW<[WLat4, WLat4, VecStr, NormalGr], (instregex "VSTRSZ(B|F|H)$")>; + +//===----------------------------------------------------------------------===// +// Vector: Packed-decimal instructions +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat2, VecDFX, NormalGr], (instregex "VLIP$")>; +def : InstRW<[WLat6, VecDFX, LSU, GroupAlone2], (instregex "VPKZ$")>; +def : InstRW<[WLat1, VecDFX, FXb, LSU2, GroupAlone2], (instregex "VUPKZ$")>; +def : InstRW<[WLat20, WLat20, VecDF2, FXb, GroupAlone], + (instregex "VCVB(G)?(Opt)?$")>; +def : InstRW<[WLat15, WLat15, VecDF2, FXb, GroupAlone], + (instregex "VCVD(G)?$")>; +def : InstRW<[WLat4, WLat4, VecDFX, NormalGr], (instregex "V(A|S)P$")>; +def : InstRW<[WLat30, WLat30, VecDF2, GroupAlone], (instregex "VM(S)?P$")>; +def : InstRW<[WLat30, WLat30, VecDF2, GroupAlone], (instregex "V(D|R)P$")>; +def : InstRW<[WLat30, WLat30, VecDF2, GroupAlone], (instregex "VSDP$")>; +def : InstRW<[WLat10, WLat10, VecDF2, NormalGr], (instregex "VSRP$")>; +def : InstRW<[WLat4, WLat4, VecDFX, NormalGr], (instregex "VPSOP$")>; +def : InstRW<[WLat2, VecDFX, NormalGr], (instregex "V(T|C)P$")>; + + +// -------------------------------- System ---------------------------------- // + +//===----------------------------------------------------------------------===// +// System: Program-Status Word Instructions +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat30, WLat30, MCD], (instregex "EPSW$")>; +def : InstRW<[WLat20, GroupAlone3], (instregex "LPSW(E)?$")>; +def : InstRW<[WLat3, FXa, GroupAlone], (instregex "IPK$")>; +def : InstRW<[WLat1, LSU, EndGroup], (instregex "SPKA$")>; +def : InstRW<[WLat1, LSU, EndGroup], (instregex "SSM$")>; +def : InstRW<[WLat1, FXb, LSU, GroupAlone], (instregex "ST(N|O)SM$")>; +def : InstRW<[WLat3, FXa, NormalGr], (instregex "IAC$")>; +def : InstRW<[WLat1, LSU, EndGroup], (instregex "SAC(F)?$")>; + +//===----------------------------------------------------------------------===// +// System: Control Register Instructions +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat4LSU, WLat4LSU, LSU2, GroupAlone], (instregex "LCTL(G)?$")>; +def : InstRW<[WLat1, LSU5, FXb, GroupAlone2], (instregex "STCT(L|G)$")>; +def : InstRW<[LSULatency, LSU, NormalGr], (instregex "E(P|S)A(I)?R$")>; +def : InstRW<[WLat30, MCD], (instregex "SSA(I)?R$")>; +def : InstRW<[WLat30, MCD], (instregex "ESEA$")>; + +//===----------------------------------------------------------------------===// +// System: Prefix-Register Instructions +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat30, MCD], (instregex "S(T)?PX$")>; + +//===----------------------------------------------------------------------===// +// System: Storage-Key and Real Memory Instructions +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat30, MCD], (instregex "ISKE$")>; +def : InstRW<[WLat30, MCD], (instregex "IVSK$")>; +def : InstRW<[WLat30, MCD], (instregex "SSKE(Opt)?$")>; +def : InstRW<[WLat30, MCD], (instregex "RRB(E|M)$")>; +def : InstRW<[WLat30, MCD], (instregex "IRBM$")>; +def : InstRW<[WLat30, MCD], (instregex "PFMF$")>; +def : InstRW<[WLat30, WLat30, MCD], (instregex "TB$")>; +def : InstRW<[WLat30, MCD], (instregex "PGIN$")>; +def : InstRW<[WLat30, MCD], (instregex "PGOUT$")>; + +//===----------------------------------------------------------------------===// +// System: Dynamic-Address-Translation Instructions +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat30, MCD], (instregex "IPTE(Opt)?(Opt)?$")>; +def : InstRW<[WLat30, MCD], (instregex "IDTE(Opt)?$")>; +def : InstRW<[WLat30, MCD], (instregex "CRDTE(Opt)?$")>; +def : InstRW<[WLat30, MCD], (instregex "PTLB$")>; +def : InstRW<[WLat30, WLat30, MCD], (instregex "CSP(G)?$")>; +def : InstRW<[WLat30, WLat30, WLat30, MCD], (instregex "LPTEA$")>; +def : InstRW<[WLat30, WLat30, MCD], (instregex "LRA(Y|G)?$")>; +def : InstRW<[WLat30, MCD], (instregex "STRAG$")>; +def : InstRW<[WLat30, MCD], (instregex "LURA(G)?$")>; +def : InstRW<[WLat30, MCD], (instregex "STUR(A|G)$")>; +def : InstRW<[WLat30, MCD], (instregex "TPROT$")>; + +//===----------------------------------------------------------------------===// +// System: Memory-move Instructions +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat4LSU, FXa2, FXb, LSU5, GroupAlone2], (instregex "MVC(K|P|S)$")>; +def : InstRW<[WLat1, FXa, LSU5, GroupAlone2], (instregex "MVC(S|D)K$")>; +def : InstRW<[WLat30, MCD], (instregex "MVCOS$")>; +def : InstRW<[WLat30, MCD], (instregex "MVPG$")>; + +//===----------------------------------------------------------------------===// +// System: Address-Space Instructions +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat30, MCD], (instregex "LASP$")>; +def : InstRW<[WLat1, LSU, GroupAlone], (instregex "PALB$")>; +def : InstRW<[WLat30, MCD], (instregex "PC$")>; +def : InstRW<[WLat30, MCD], (instregex "PR$")>; +def : InstRW<[WLat30, MCD], (instregex "PT(I)?$")>; +def : InstRW<[WLat30, MCD], (instregex "RP$")>; +def : InstRW<[WLat30, MCD], (instregex "BS(G|A)$")>; +def : InstRW<[WLat30, MCD], (instregex "TAR$")>; + +//===----------------------------------------------------------------------===// +// System: Linkage-Stack Instructions +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat30, MCD], (instregex "BAKR$")>; +def : InstRW<[WLat30, MCD], (instregex "EREG(G)?$")>; +def : InstRW<[WLat30, WLat30, MCD], (instregex "(E|M)STA$")>; + +//===----------------------------------------------------------------------===// +// System: Time-Related Instructions +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat30, MCD], (instregex "PTFF$")>; +def : InstRW<[WLat30, MCD], (instregex "SCK(PF|C)?$")>; +def : InstRW<[WLat1, LSU2, GroupAlone], (instregex "SPT$")>; +def : InstRW<[WLat15, LSU3, FXa2, FXb, GroupAlone2], (instregex "STCK(F)?$")>; +def : InstRW<[WLat20, LSU4, FXa2, FXb2, GroupAlone3], (instregex "STCKE$")>; +def : InstRW<[WLat30, MCD], (instregex "STCKC$")>; +def : InstRW<[WLat1, LSU2, FXb, Cracked], (instregex "STPT$")>; + +//===----------------------------------------------------------------------===// +// System: CPU-Related Instructions +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat30, MCD], (instregex "STAP$")>; +def : InstRW<[WLat30, MCD], (instregex "STIDP$")>; +def : InstRW<[WLat30, WLat30, MCD], (instregex "STSI$")>; +def : InstRW<[WLat30, WLat30, MCD], (instregex "STFL(E)?$")>; +def : InstRW<[WLat30, MCD], (instregex "ECAG$")>; +def : InstRW<[WLat30, WLat30, MCD], (instregex "ECTG$")>; +def : InstRW<[WLat30, MCD], (instregex "PTF$")>; +def : InstRW<[WLat30, MCD], (instregex "PCKMO$")>; + +//===----------------------------------------------------------------------===// +// System: Miscellaneous Instructions +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat30, MCD], (instregex "SVC$")>; +def : InstRW<[WLat1, FXb, GroupAlone], (instregex "MC$")>; +def : InstRW<[WLat30, MCD], (instregex "DIAG$")>; +def : InstRW<[WLat1, FXb, NormalGr], (instregex "TRAC(E|G)$")>; +def : InstRW<[WLat30, MCD], (instregex "TRAP(2|4)$")>; +def : InstRW<[WLat30, MCD], (instregex "SIG(P|A)$")>; +def : InstRW<[WLat30, MCD], (instregex "SIE$")>; + +//===----------------------------------------------------------------------===// +// System: CPU-Measurement Facility Instructions +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat1, FXb, NormalGr], (instregex "LPP$")>; +def : InstRW<[WLat30, WLat30, MCD], (instregex "ECPGA$")>; +def : InstRW<[WLat30, WLat30, MCD], (instregex "E(C|P)CTR$")>; +def : InstRW<[WLat30, MCD], (instregex "LCCTL$")>; +def : InstRW<[WLat30, MCD], (instregex "L(P|S)CTL$")>; +def : InstRW<[WLat30, MCD], (instregex "Q(S|CTR)I$")>; +def : InstRW<[WLat30, MCD], (instregex "S(C|P)CTR$")>; + +//===----------------------------------------------------------------------===// +// System: I/O Instructions +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat30, MCD], (instregex "(C|H|R|X)SCH$")>; +def : InstRW<[WLat30, MCD], (instregex "(M|S|ST|T)SCH$")>; +def : InstRW<[WLat30, MCD], (instregex "RCHP$")>; +def : InstRW<[WLat30, MCD], (instregex "SCHM$")>; +def : InstRW<[WLat30, MCD], (instregex "STC(PS|RW)$")>; +def : InstRW<[WLat30, MCD], (instregex "TPI$")>; +def : InstRW<[WLat30, MCD], (instregex "SAL$")>; + +} + diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZScheduleZ13.td b/contrib/llvm/lib/Target/SystemZ/SystemZScheduleZ13.td index 74e1dad87908..b3266051da4e 100644 --- a/contrib/llvm/lib/Target/SystemZ/SystemZScheduleZ13.td +++ b/contrib/llvm/lib/Target/SystemZ/SystemZScheduleZ13.td @@ -1,9 +1,8 @@ //-- SystemZScheduleZ13.td - SystemZ Scheduling Definitions ----*- tblgen -*-=// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -1192,8 +1191,8 @@ def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VLEI(B|F|G|H)$")>; // Vector: Loads //===----------------------------------------------------------------------===// -def : InstRW<[LSULatency, LSU, NormalGr], (instregex "VL(BB)?$")>; -def : InstRW<[LSULatency, LSU, NormalGr], (instregex "VLL$")>; +def : InstRW<[LSULatency, LSU, NormalGr], (instregex "VL(Align)?$")>; +def : InstRW<[LSULatency, LSU, NormalGr], (instregex "VL(L|BB)$")>; def : InstRW<[LSULatency, LSU, NormalGr], (instregex "VL(32|64)$")>; def : InstRW<[LSULatency, LSU, NormalGr], (instregex "VLLEZ(B|F|G|H)?$")>; def : InstRW<[LSULatency, LSU, NormalGr], (instregex "VLREP(B|F|G|H)?$")>; @@ -1201,16 +1200,17 @@ def : InstRW<[WLat2LSU, RegReadAdv, VecXsPm, LSU, NormalGr], (instregex "VLE(B|F|G|H)$")>; def : InstRW<[WLat6LSU, RegReadAdv, FXb, LSU, VecXsPm, Cracked], (instregex "VGE(F|G)$")>; -def : InstRW<[WLat4LSU, WLat4LSU, LSU5, GroupAlone], (instregex "VLM$")>; +def : InstRW<[WLat4LSU, WLat4LSU, LSU5, GroupAlone], + (instregex "VLM(Align)?$")>; //===----------------------------------------------------------------------===// // Vector: Stores //===----------------------------------------------------------------------===// -def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "VST(L|32|64)?$")>; +def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "VST(Align|L|32|64)?$")>; def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "VSTE(F|G)$")>; def : InstRW<[WLat1, FXb, LSU, VecXsPm, Cracked], (instregex "VSTE(B|H)$")>; -def : InstRW<[WLat1, LSU2, FXb3, GroupAlone2], (instregex "VSTM$")>; +def : InstRW<[WLat1, LSU2, FXb3, GroupAlone2], (instregex "VSTM(Align)?$")>; def : InstRW<[WLat1, FXb2, LSU, Cracked], (instregex "VSCE(F|G)$")>; //===----------------------------------------------------------------------===// diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZScheduleZ14.td b/contrib/llvm/lib/Target/SystemZ/SystemZScheduleZ14.td index 1962fdf3a1d1..df7282a2961b 100644 --- a/contrib/llvm/lib/Target/SystemZ/SystemZScheduleZ14.td +++ b/contrib/llvm/lib/Target/SystemZ/SystemZScheduleZ14.td @@ -1,9 +1,8 @@ //-- SystemZScheduleZ14.td - SystemZ Scheduling Definitions ----*- tblgen -*-=// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -1210,8 +1209,8 @@ def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VLEI(B|F|G|H)$")>; // Vector: Loads //===----------------------------------------------------------------------===// -def : InstRW<[LSULatency, LSU, NormalGr], (instregex "VL(BB)?$")>; -def : InstRW<[LSULatency, LSU, NormalGr], (instregex "VLL$")>; +def : InstRW<[LSULatency, LSU, NormalGr], (instregex "VL(Align)?$")>; +def : InstRW<[LSULatency, LSU, NormalGr], (instregex "VL(L|BB)$")>; def : InstRW<[LSULatency, LSU, NormalGr], (instregex "VL(32|64)$")>; def : InstRW<[LSULatency, LSU, NormalGr], (instregex "VLLEZ(B|F|G|H|LF)?$")>; def : InstRW<[LSULatency, LSU, NormalGr], (instregex "VLREP(B|F|G|H)?$")>; @@ -1219,17 +1218,18 @@ def : InstRW<[WLat2LSU, RegReadAdv, VecXsPm, LSU, NormalGr], (instregex "VLE(B|F|G|H)$")>; def : InstRW<[WLat5LSU, RegReadAdv, FXb, LSU, VecXsPm, Cracked], (instregex "VGE(F|G)$")>; -def : InstRW<[WLat4LSU, WLat4LSU, LSU5, GroupAlone], (instregex "VLM$")>; +def : InstRW<[WLat4LSU, WLat4LSU, LSU5, GroupAlone], + (instregex "VLM(Align)?$")>; def : InstRW<[LSULatency, LSU, NormalGr], (instregex "VLRL(R)?$")>; //===----------------------------------------------------------------------===// // Vector: Stores //===----------------------------------------------------------------------===// -def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "VST(L|32|64)?$")>; +def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "VST(Align|L|32|64)?$")>; def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "VSTE(F|G)$")>; def : InstRW<[WLat1, FXb, LSU, VecXsPm, Cracked], (instregex "VSTE(B|H)$")>; -def : InstRW<[WLat1, LSU2, FXb3, GroupAlone2], (instregex "VSTM$")>; +def : InstRW<[WLat1, LSU2, FXb3, GroupAlone2], (instregex "VSTM(Align)?$")>; def : InstRW<[WLat1, FXb2, LSU, Cracked], (instregex "VSCE(F|G)$")>; def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "VSTRL(R)?$")>; diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZScheduleZ196.td b/contrib/llvm/lib/Target/SystemZ/SystemZScheduleZ196.td index 7535739f813a..ca714ef1a702 100644 --- a/contrib/llvm/lib/Target/SystemZ/SystemZScheduleZ196.td +++ b/contrib/llvm/lib/Target/SystemZ/SystemZScheduleZ196.td @@ -1,9 +1,8 @@ //=- SystemZScheduleZ196.td - SystemZ Scheduling Definitions ---*- tblgen -*-=// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZScheduleZEC12.td b/contrib/llvm/lib/Target/SystemZ/SystemZScheduleZEC12.td index a21d2c4cef70..fb226be678da 100644 --- a/contrib/llvm/lib/Target/SystemZ/SystemZScheduleZEC12.td +++ b/contrib/llvm/lib/Target/SystemZ/SystemZScheduleZEC12.td @@ -1,9 +1,8 @@ //=- SystemZScheduleZEC12.td - SystemZ Scheduling Definitions --*- tblgen -*-=// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp b/contrib/llvm/lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp index 4592e82eea71..a50e6aa59711 100644 --- a/contrib/llvm/lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp +++ b/contrib/llvm/lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp @@ -1,9 +1,8 @@ //===-- SystemZSelectionDAGInfo.cpp - SystemZ SelectionDAG Info -----------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZSelectionDAGInfo.h b/contrib/llvm/lib/Target/SystemZ/SystemZSelectionDAGInfo.h index 93cd970c30c6..7d63bae83cf3 100644 --- a/contrib/llvm/lib/Target/SystemZ/SystemZSelectionDAGInfo.h +++ b/contrib/llvm/lib/Target/SystemZ/SystemZSelectionDAGInfo.h @@ -1,9 +1,8 @@ //===-- SystemZSelectionDAGInfo.h - SystemZ SelectionDAG Info ---*- C++ -*-===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZShortenInst.cpp b/contrib/llvm/lib/Target/SystemZ/SystemZShortenInst.cpp index 195fa20a2c90..e79dfc5b4b9e 100644 --- a/contrib/llvm/lib/Target/SystemZ/SystemZShortenInst.cpp +++ b/contrib/llvm/lib/Target/SystemZ/SystemZShortenInst.cpp @@ -1,9 +1,8 @@ //===-- SystemZShortenInst.cpp - Instruction-shortening pass --------------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -47,6 +46,7 @@ private: bool shortenOn001(MachineInstr &MI, unsigned Opcode); bool shortenOn001AddCC(MachineInstr &MI, unsigned Opcode); bool shortenFPConv(MachineInstr &MI, unsigned Opcode); + bool shortenSelect(MachineInstr &MI, unsigned Opcode); const SystemZInstrInfo *TII; const TargetRegisterInfo *TRI; @@ -176,6 +176,23 @@ bool SystemZShortenInst::shortenFPConv(MachineInstr &MI, unsigned Opcode) { return false; } +// MI is a three-operand select instruction. If one of the sources match +// the destination, convert to the equivalent load-on-condition. +bool SystemZShortenInst::shortenSelect(MachineInstr &MI, unsigned Opcode) { + if (MI.getOperand(0).getReg() == MI.getOperand(1).getReg()) { + MI.setDesc(TII->get(Opcode)); + MI.tieOperands(0, 1); + return true; + } + if (MI.getOperand(0).getReg() == MI.getOperand(2).getReg()) { + TII->commuteInstruction(MI, false, 1, 2); + MI.setDesc(TII->get(Opcode)); + MI.tieOperands(0, 1); + return true; + } + return false; +} + // Process all instructions in MBB. Return true if something changed. bool SystemZShortenInst::processBlock(MachineBasicBlock &MBB) { bool Changed = false; @@ -196,6 +213,18 @@ bool SystemZShortenInst::processBlock(MachineBasicBlock &MBB) { Changed |= shortenIIF(MI, SystemZ::LLIHL, SystemZ::LLIHH); break; + case SystemZ::SELR: + Changed |= shortenSelect(MI, SystemZ::LOCR); + break; + + case SystemZ::SELFHR: + Changed |= shortenSelect(MI, SystemZ::LOCFHR); + break; + + case SystemZ::SELGR: + Changed |= shortenSelect(MI, SystemZ::LOCGR); + break; + case SystemZ::WFADB: Changed |= shortenOn001AddCC(MI, SystemZ::ADBR); break; @@ -300,6 +329,31 @@ bool SystemZShortenInst::processBlock(MachineBasicBlock &MBB) { case SystemZ::VST64: Changed |= shortenOn0(MI, SystemZ::STD); break; + + default: { + int TwoOperandOpcode = SystemZ::getTwoOperandOpcode(MI.getOpcode()); + if (TwoOperandOpcode == -1) + break; + + if ((MI.getOperand(0).getReg() != MI.getOperand(1).getReg()) && + (!MI.isCommutable() || + MI.getOperand(0).getReg() != MI.getOperand(2).getReg() || + !TII->commuteInstruction(MI, false, 1, 2))) + break; + + MI.setDesc(TII->get(TwoOperandOpcode)); + MI.tieOperands(0, 1); + if (TwoOperandOpcode == SystemZ::SLL || + TwoOperandOpcode == SystemZ::SLA || + TwoOperandOpcode == SystemZ::SRL || + TwoOperandOpcode == SystemZ::SRA) { + // These shifts only use the low 6 bits of the shift count. + MachineOperand &ImmMO = MI.getOperand(3); + ImmMO.setImm(ImmMO.getImm() & 0xfff); + } + Changed = true; + break; + } } LiveRegs.stepBackward(MI); diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZSubtarget.cpp b/contrib/llvm/lib/Target/SystemZ/SystemZSubtarget.cpp index fb030a207bc7..5e8af81842c4 100644 --- a/contrib/llvm/lib/Target/SystemZ/SystemZSubtarget.cpp +++ b/contrib/llvm/lib/Target/SystemZ/SystemZSubtarget.cpp @@ -1,9 +1,8 @@ //===-- SystemZSubtarget.cpp - SystemZ subtarget information --------------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// @@ -56,6 +55,9 @@ SystemZSubtarget::SystemZSubtarget(const Triple &TT, const std::string &CPU, HasMessageSecurityAssist7(false), HasMessageSecurityAssist8(false), HasVectorEnhancements1(false), HasVectorPackedDecimal(false), HasInsertReferenceBitsMultiple(false), + HasMiscellaneousExtensions3(false), HasMessageSecurityAssist9(false), + HasVectorEnhancements2(false), HasVectorPackedDecimalEnhancement(false), + HasEnhancedSort(false), HasDeflateConversion(false), TargetTriple(TT), InstrInfo(initializeSubtargetDependencies(CPU, FS)), TLInfo(TM, *this), TSInfo(), FrameLowering() {} diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZSubtarget.h b/contrib/llvm/lib/Target/SystemZ/SystemZSubtarget.h index cb6b21a1d465..fa3f65d93c91 100644 --- a/contrib/llvm/lib/Target/SystemZ/SystemZSubtarget.h +++ b/contrib/llvm/lib/Target/SystemZ/SystemZSubtarget.h @@ -1,9 +1,8 @@ //===-- SystemZSubtarget.h - SystemZ subtarget information -----*- C++ -*--===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -63,6 +62,12 @@ protected: bool HasVectorEnhancements1; bool HasVectorPackedDecimal; bool HasInsertReferenceBitsMultiple; + bool HasMiscellaneousExtensions3; + bool HasMessageSecurityAssist9; + bool HasVectorEnhancements2; + bool HasVectorPackedDecimalEnhancement; + bool HasEnhancedSort; + bool HasDeflateConversion; private: Triple TargetTriple; @@ -210,6 +215,30 @@ public: return HasInsertReferenceBitsMultiple; } + // Return true if the target has the miscellaneous-extensions facility 3. + bool hasMiscellaneousExtensions3() const { + return HasMiscellaneousExtensions3; + } + + // Return true if the target has the message-security-assist + // extension facility 9. + bool hasMessageSecurityAssist9() const { return HasMessageSecurityAssist9; } + + // Return true if the target has the vector-enhancements facility 2. + bool hasVectorEnhancements2() const { return HasVectorEnhancements2; } + + // Return true if the target has the vector-packed-decimal + // enhancement facility. + bool hasVectorPackedDecimalEnhancement() const { + return HasVectorPackedDecimalEnhancement; + } + + // Return true if the target has the enhanced-sort facility. + bool hasEnhancedSort() const { return HasEnhancedSort; } + + // Return true if the target has the deflate-conversion facility. + bool hasDeflateConversion() const { return HasDeflateConversion; } + // Return true if GV can be accessed using LARL for reloc model RM // and code model CM. bool isPC32DBLSymbol(const GlobalValue *GV, CodeModel::Model CM) const; diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZTDC.cpp b/contrib/llvm/lib/Target/SystemZ/SystemZTDC.cpp index 5dbd23d420a3..478848c30701 100644 --- a/contrib/llvm/lib/Target/SystemZ/SystemZTDC.cpp +++ b/contrib/llvm/lib/Target/SystemZ/SystemZTDC.cpp @@ -1,9 +1,8 @@ //===-- SystemZTDC.cpp - Utilize Test Data Class instruction --------------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -356,8 +355,8 @@ bool SystemZTDCPass::runOnFunction(Function &F) { if (!Worthy) continue; // Call the intrinsic, compare result with 0. - Value *TDCFunc = Intrinsic::getDeclaration(&M, Intrinsic::s390_tdc, - V->getType()); + Function *TDCFunc = + Intrinsic::getDeclaration(&M, Intrinsic::s390_tdc, V->getType()); IRBuilder<> IRB(I); Value *MaskVal = ConstantInt::get(Type::getInt64Ty(Ctx), Mask); Instruction *TDC = IRB.CreateCall(TDCFunc, {V, MaskVal}); diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZTargetMachine.cpp b/contrib/llvm/lib/Target/SystemZ/SystemZTargetMachine.cpp index 9596a2b6388d..5c49e6eff0bf 100644 --- a/contrib/llvm/lib/Target/SystemZ/SystemZTargetMachine.cpp +++ b/contrib/llvm/lib/Target/SystemZ/SystemZTargetMachine.cpp @@ -1,9 +1,8 @@ //===-- SystemZTargetMachine.cpp - Define TargetMachine for SystemZ -------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// @@ -12,6 +11,7 @@ #include "SystemZ.h" #include "SystemZMachineScheduler.h" #include "SystemZTargetTransformInfo.h" +#include "TargetInfo/SystemZTargetInfo.h" #include "llvm/ADT/Optional.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallVector.h" @@ -133,9 +133,9 @@ getEffectiveSystemZCodeModel(Optional<CodeModel::Model> CM, Reloc::Model RM, bool JIT) { if (CM) { if (*CM == CodeModel::Tiny) - report_fatal_error("Target does not support the tiny CodeModel"); + report_fatal_error("Target does not support the tiny CodeModel", false); if (*CM == CodeModel::Kernel) - report_fatal_error("Target does not support the kernel CodeModel"); + report_fatal_error("Target does not support the kernel CodeModel", false); return *CM; } if (JIT) @@ -183,6 +183,7 @@ public: void addIRPasses() override; bool addInstSelector() override; bool addILPOpts() override; + void addPostRewrite() override; void addPreSched2() override; void addPreEmitPass() override; }; @@ -212,7 +213,16 @@ bool SystemZPassConfig::addILPOpts() { return true; } +void SystemZPassConfig::addPostRewrite() { + addPass(createSystemZPostRewritePass(getSystemZTargetMachine())); +} + void SystemZPassConfig::addPreSched2() { + // PostRewrite needs to be run at -O0 also (in which case addPostRewrite() + // is not called). + if (getOptLevel() == CodeGenOpt::None) + addPass(createSystemZPostRewritePass(getSystemZTargetMachine())); + addPass(createSystemZExpandPseudoPass(getSystemZTargetMachine())); if (getOptLevel() != CodeGenOpt::None) diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZTargetMachine.h b/contrib/llvm/lib/Target/SystemZ/SystemZTargetMachine.h index 52bf8bba55de..ac04a080f580 100644 --- a/contrib/llvm/lib/Target/SystemZ/SystemZTargetMachine.h +++ b/contrib/llvm/lib/Target/SystemZ/SystemZTargetMachine.h @@ -1,9 +1,8 @@ //=- SystemZTargetMachine.h - Define TargetMachine for SystemZ ----*- C++ -*-=// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp b/contrib/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp index 129610fe095b..145cf87ef9f5 100644 --- a/contrib/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp +++ b/contrib/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp @@ -1,9 +1,8 @@ //===-- SystemZTargetTransformInfo.cpp - SystemZ-specific TTI -------------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -467,6 +466,27 @@ int SystemZTTIImpl::getArithmeticInstrCost( if (Opcode == Instruction::FRem) return LIBCALL_COST; + // Give discount for some combined logical operations if supported. + if (Args.size() == 2 && ST->hasMiscellaneousExtensions3()) { + if (Opcode == Instruction::Xor) { + for (const Value *A : Args) { + if (const Instruction *I = dyn_cast<Instruction>(A)) + if (I->hasOneUse() && + (I->getOpcode() == Instruction::And || + I->getOpcode() == Instruction::Or || + I->getOpcode() == Instruction::Xor)) + return 0; + } + } + else if (Opcode == Instruction::Or || Opcode == Instruction::And) { + for (const Value *A : Args) { + if (const Instruction *I = dyn_cast<Instruction>(A)) + if (I->hasOneUse() && I->getOpcode() == Instruction::Xor) + return 0; + } + } + } + // Or requires one instruction, although it has custom handling for i64. if (Opcode == Instruction::Or) return 1; @@ -687,9 +707,9 @@ int SystemZTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, // TODO: Fix base implementation which could simplify things a bit here // (seems to miss on differentiating on scalar/vector types). - // Only 64 bit vector conversions are natively supported. - if (DstScalarBits == 64) { - if (SrcScalarBits == 64) + // Only 64 bit vector conversions are natively supported before arch13. + if (DstScalarBits == 64 || ST->hasVectorEnhancements2()) { + if (SrcScalarBits == DstScalarBits) return NumDstVectors; if (SrcScalarBits == 1) @@ -857,7 +877,7 @@ int SystemZTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, case Instruction::Select: if (ValTy->isFloatingPointTy()) return 4; // No load on condition for FP - costs a conditional jump. - return 1; // Load On Condition. + return 1; // Load On Condition / Select Register. } } @@ -1010,7 +1030,8 @@ int SystemZTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src, (Src->isVectorTy() ? getNumVectorRegs(Src) : getNumberOfParts(Src)); // Store/Load reversed saves one instruction. - if (!Src->isVectorTy() && NumOps == 1 && I != nullptr) { + if (((!Src->isVectorTy() && NumOps == 1) || ST->hasVectorEnhancements2()) && + I != nullptr) { if (Opcode == Instruction::Load && I->hasOneUse()) { const Instruction *LdUser = cast<Instruction>(*I->user_begin()); // In case of load -> bswap -> store, return normal cost for the load. diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.h b/contrib/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.h index e79bee1ea3a8..16ce2ef1d7a0 100644 --- a/contrib/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.h +++ b/contrib/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.h @@ -1,9 +1,8 @@ //===-- SystemZTargetTransformInfo.h - SystemZ-specific TTI ---------------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// diff --git a/contrib/llvm/lib/Target/SystemZ/TargetInfo/SystemZTargetInfo.cpp b/contrib/llvm/lib/Target/SystemZ/TargetInfo/SystemZTargetInfo.cpp index e2b9efd35d3e..713a55ee8400 100644 --- a/contrib/llvm/lib/Target/SystemZ/TargetInfo/SystemZTargetInfo.cpp +++ b/contrib/llvm/lib/Target/SystemZ/TargetInfo/SystemZTargetInfo.cpp @@ -1,13 +1,12 @@ //===-- SystemZTargetInfo.cpp - SystemZ target implementation -------------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// -#include "SystemZ.h" +#include "TargetInfo/SystemZTargetInfo.h" #include "llvm/Support/TargetRegistry.h" using namespace llvm; diff --git a/contrib/llvm/lib/Target/SystemZ/TargetInfo/SystemZTargetInfo.h b/contrib/llvm/lib/Target/SystemZ/TargetInfo/SystemZTargetInfo.h new file mode 100644 index 000000000000..cad141c81e6b --- /dev/null +++ b/contrib/llvm/lib/Target/SystemZ/TargetInfo/SystemZTargetInfo.h @@ -0,0 +1,20 @@ +//===-- SystemZTargetInfo.h - SystemZ target implementation -----*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_SYSTEMZ_TARGETINFO_SYSTEMZTARGETINFO_H +#define LLVM_LIB_TARGET_SYSTEMZ_TARGETINFO_SYSTEMZTARGETINFO_H + +namespace llvm { + +class Target; + +Target &getTheSystemZTarget(); + +} // namespace llvm + +#endif // LLVM_LIB_TARGET_SYSTEMZ_TARGETINFO_SYSTEMZTARGETINFO_H |